富文本转word

  1. #coding=utf-8
  2. from docx import Document
  3. from docx.shared import Pt, RGBColor
  4. from docx.oxml.ns import qn
  5. from lxml import etree
  6. # from docx.enum.text import WD_ALIGN_PARAGRAPH
  7. data = [{
  8. 'document_title': u'<h1>写一个题目阿斯蒂芬按时</h1>',
  9. 'document_content': u"""<p>你好</p>
  10. <p>你好中国</p>
  11. <p><strong>hello </strong>world</p>
  12. """
  13. },{
  14. 'document_title': u'写一个题目阿斯蒂芬按时',
  15. 'document_content': u"""<p>你好</p>
  16. <p>你好中国</p>
  17. <p><strong>hello </strong>world</p>
  18. """
  19. }]
  20. <!-- more -->
  21. # 格式化富文本
  22. def clean_richtext(richtext):
  23. response = etree.HTML(text=richtext)
  24. return response.xpath('string(.)')
  25. def render_data(data):
  26. document = Document()
  27. font_name = u'宋体'
  28. for item in data:
  29. title_text = clean_richtext(item['document_title'])
  30. content_text = clean_richtext(item['document_content'])
  31. title = document.add_heading()
  32. paragraph = document.add_paragraph()
  33. # 居中
  34. # title.alignment = WD_ALIGN_PARAGRAPH.CENTER
  35. title_run = title.add_run(title_text)
  36. title_run.font.size = Pt(22)
  37. title_run.font.color.rgb = RGBColor(0, 0, 0)
  38. title_run.font.name = font_name
  39. title_run._element.rPr.rFonts.set(qn('w:eastAsia'), font_name)
  40. para_run = paragraph.add_run(content_text)
  41. para_run.font.size = Pt(10.5)
  42. para_run.font.name = font_name
  43. para_run._element.rPr.rFonts.set(qn('w:eastAsia'), font_name)
  44. document.save('test.docx')
  45. render_data(data)