富文本转word
#coding=utf-8
from docx import Document
from docx.shared import Pt, RGBColor
from docx.oxml.ns import qn
from lxml import etree
# from docx.enum.text import WD_ALIGN_PARAGRAPH
data = [{
'document_title': u'<h1>写一个题目阿斯蒂芬按时</h1>',
'document_content': u"""<p>你好</p>
<p>你好中国</p>
<p><strong>hello </strong>world</p>
"""
},{
'document_title': u'写一个题目阿斯蒂芬按时',
'document_content': u"""<p>你好</p>
<p>你好中国</p>
<p><strong>hello </strong>world</p>
"""
}]
<!-- more -->
# 格式化富文本
def clean_richtext(richtext):
response = etree.HTML(text=richtext)
return response.xpath('string(.)')
def render_data(data):
document = Document()
font_name = u'宋体'
for item in data:
title_text = clean_richtext(item['document_title'])
content_text = clean_richtext(item['document_content'])
title = document.add_heading()
paragraph = document.add_paragraph()
# 居中
# title.alignment = WD_ALIGN_PARAGRAPH.CENTER
title_run = title.add_run(title_text)
title_run.font.size = Pt(22)
title_run.font.color.rgb = RGBColor(0, 0, 0)
title_run.font.name = font_name
title_run._element.rPr.rFonts.set(qn('w:eastAsia'), font_name)
para_run = paragraph.add_run(content_text)
para_run.font.size = Pt(10.5)
para_run.font.name = font_name
para_run._element.rPr.rFonts.set(qn('w:eastAsia'), font_name)
document.save('test.docx')
render_data(data)