format_manual_v3.py 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. 格式化系统操作手册 - 第3版
  5. - 添加自动目录
  6. - 添加页眉页脚(页码)
  7. """
  8. from docx import Document
  9. from docx.shared import Pt, Inches
  10. from docx.enum.text import WD_ALIGN_PARAGRAPH
  11. from docx.oxml.ns import qn
  12. from docx.oxml import OxmlElement
  13. import re
  14. from io import BytesIO
  15. def set_font(run, font_name='宋体', font_size=12, bold=False):
  16. """设置字体"""
  17. run.font.name = font_name
  18. run.font.size = Pt(font_size)
  19. run.font.bold = bold
  20. run._element.rPr.rFonts.set(qn('w:eastAsia'), font_name)
  21. def is_heading_level_1(text):
  22. """判断是否为一级标题"""
  23. if not text:
  24. return False
  25. return bool(re.match(r'^\d+、', text)) or text == '系统登录'
  26. def is_heading_level_2(text):
  27. """判断是否为二级标题"""
  28. if not text or len(text) < 3:
  29. return False
  30. match = re.match(r'^\d+\.\d+\s', text)
  31. return bool(match)
  32. def is_heading_level_3(text):
  33. """判断是否为三级标题"""
  34. if not text or len(text) < 5:
  35. return False
  36. match = re.match(r'^\d+\.\d+\.\d+\s', text)
  37. return bool(match)
  38. def add_toc(doc):
  39. """添加自动目录"""
  40. paragraph = doc.add_paragraph()
  41. run = paragraph.add_run()
  42. # 创建 TOC 域代码
  43. fldChar1 = OxmlElement('w:fldChar')
  44. fldChar1.set(qn('w:fldCharType'), 'begin')
  45. instrText = OxmlElement('w:instrText')
  46. instrText.set(qn('xml:space'), 'preserve')
  47. instrText.text = 'TOC \\o "1-3" \\h \\z \\u'
  48. fldChar2 = OxmlElement('w:fldChar')
  49. fldChar2.set(qn('w:fldCharType'), 'end')
  50. run._r.append(fldChar1)
  51. run._r.append(instrText)
  52. run._r.append(fldChar2)
  53. return paragraph
  54. def add_page_number(section):
  55. """添加页眉页脚,显示页码"""
  56. # 添加页脚
  57. footer = section.footer
  58. footer.is_linked_to_previous = False
  59. # 清空现有内容
  60. for para in footer.paragraphs:
  61. para.clear()
  62. # 创建页脚段落
  63. footer_para = footer.paragraphs[0] if footer.paragraphs else footer.add_paragraph()
  64. footer_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
  65. # 添加 "第X页/共Y页" 格式
  66. run = footer_para.add_run('第 ')
  67. set_font(run, font_name='宋体', font_size=10)
  68. # 添加当前页码域
  69. fldChar1 = OxmlElement('w:fldChar')
  70. fldChar1.set(qn('w:fldCharType'), 'begin')
  71. instrText1 = OxmlElement('w:instrText')
  72. instrText1.set(qn('xml:space'), 'preserve')
  73. instrText1.text = 'PAGE'
  74. fldChar2 = OxmlElement('w:fldChar')
  75. fldChar2.set(qn('w:fldCharType'), 'end')
  76. run._r.append(fldChar1)
  77. run._r.append(instrText1)
  78. run._r.append(fldChar2)
  79. run = footer_para.add_run(' 页/共 ')
  80. set_font(run, font_name='宋体', font_size=10)
  81. # 添加总页数域
  82. run = footer_para.add_run()
  83. fldChar3 = OxmlElement('w:fldChar')
  84. fldChar3.set(qn('w:fldCharType'), 'begin')
  85. instrText2 = OxmlElement('w:instrText')
  86. instrText2.set(qn('xml:space'), 'preserve')
  87. instrText2.text = 'NUMPAGES'
  88. fldChar4 = OxmlElement('w:fldChar')
  89. fldChar4.set(qn('w:fldCharType'), 'end')
  90. run._r.append(fldChar3)
  91. run._r.append(instrText2)
  92. run._r.append(fldChar4)
  93. run = footer_para.add_run(' 页')
  94. set_font(run, font_name='宋体', font_size=10)
  95. def copy_paragraph_with_images(source_para, target_doc, apply_heading_style=False):
  96. """复制段落,包括文本和图片"""
  97. text = source_para.text.strip()
  98. # 创建新段落
  99. new_para = target_doc.add_paragraph()
  100. # 根据文本类型设置格式和样式
  101. if is_heading_level_1(text):
  102. if apply_heading_style:
  103. new_para.style = 'Heading 1'
  104. run = new_para.add_run(text)
  105. set_font(run, font_name='黑体', font_size=16, bold=True)
  106. new_para.paragraph_format.space_before = Pt(12)
  107. new_para.paragraph_format.space_after = Pt(6)
  108. elif is_heading_level_2(text):
  109. if apply_heading_style:
  110. new_para.style = 'Heading 2'
  111. run = new_para.add_run(text)
  112. set_font(run, font_name='黑体', font_size=14, bold=True)
  113. new_para.paragraph_format.space_before = Pt(10)
  114. new_para.paragraph_format.space_after = Pt(5)
  115. elif is_heading_level_3(text):
  116. if apply_heading_style:
  117. new_para.style = 'Heading 3'
  118. run = new_para.add_run(text)
  119. set_font(run, font_name='黑体', font_size=13, bold=True)
  120. new_para.paragraph_format.space_before = Pt(8)
  121. new_para.paragraph_format.space_after = Pt(4)
  122. else:
  123. # 普通段落
  124. if text:
  125. run = new_para.add_run(text)
  126. set_font(run, font_name='宋体', font_size=11)
  127. new_para.paragraph_format.space_after = Pt(3)
  128. new_para.paragraph_format.line_spacing = 1.15
  129. # 检查并复制图片
  130. if source_para._element.xpath('.//w:drawing'):
  131. img_para = target_doc.add_paragraph()
  132. img_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
  133. for run in source_para.runs:
  134. if hasattr(run, '_element') and run._element.xpath('.//a:blip'):
  135. blip = run._element.xpath('.//a:blip')[0]
  136. rId = blip.get(qn('r:embed'))
  137. image_part = source_para.part.related_parts[rId]
  138. image_bytes = image_part.blob
  139. try:
  140. inline = run._element.xpath('.//wp:inline')[0]
  141. extent = inline.xpath('.//wp:extent')[0]
  142. cx = int(extent.get('cx'))
  143. cy = int(extent.get('cy'))
  144. width = Inches(cx / 914400)
  145. height = Inches(cy / 914400)
  146. img_run = img_para.add_run()
  147. image_stream = BytesIO(image_bytes)
  148. img_run.add_picture(image_stream, width=width, height=height)
  149. except Exception as e:
  150. try:
  151. img_run = img_para.add_run()
  152. image_stream = BytesIO(image_bytes)
  153. img_run.add_picture(image_stream)
  154. except:
  155. pass
  156. def format_document(input_path, output_path):
  157. """格式化文档"""
  158. print("正在读取文档...")
  159. doc = Document(input_path)
  160. print("创建新文档...")
  161. new_doc = Document()
  162. # 设置页边距
  163. for section in new_doc.sections:
  164. section.top_margin = Inches(1)
  165. section.bottom_margin = Inches(1)
  166. section.left_margin = Inches(1.25)
  167. section.right_margin = Inches(1.25)
  168. # ===== 第一页:标题页 =====
  169. print("创建标题页...")
  170. for _ in range(8):
  171. new_doc.add_paragraph()
  172. title_para = new_doc.add_paragraph()
  173. title_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
  174. title_run = title_para.add_run('系统操作手册')
  175. set_font(title_run, font_name='黑体', font_size=28, bold=True)
  176. new_doc.add_page_break()
  177. # ===== 第二页:自动目录 =====
  178. print("创建自动目录...")
  179. toc_title = new_doc.add_paragraph()
  180. toc_title.alignment = WD_ALIGN_PARAGRAPH.CENTER
  181. toc_run = toc_title.add_run('目 录')
  182. set_font(toc_run, font_name='黑体', font_size=18, bold=True)
  183. new_doc.add_paragraph()
  184. # 添加自动目录
  185. add_toc(new_doc)
  186. new_doc.add_paragraph()
  187. new_doc.add_paragraph()
  188. new_doc.add_page_break()
  189. # ===== 第三页开始:正文内容 =====
  190. print("复制正文内容...")
  191. content_started = False
  192. para_count = 0
  193. for para in doc.paragraphs:
  194. text = para.text.strip()
  195. if not content_started and not text:
  196. continue
  197. if text:
  198. content_started = True
  199. if content_started:
  200. # 应用标题样式,以便自动目录可以识别
  201. copy_paragraph_with_images(para, new_doc, apply_heading_style=True)
  202. para_count += 1
  203. if para_count % 10 == 0:
  204. print(f"已处理 {para_count} 个段落...")
  205. # ===== 添加页眉页脚 =====
  206. print("添加页眉页脚...")
  207. for section in new_doc.sections:
  208. add_page_number(section)
  209. # 保存新文档
  210. print("保存文档...")
  211. new_doc.save(output_path)
  212. print(f"\n✓ 文档格式化完成!")
  213. print(f"✓ 已保存到: {output_path}")
  214. print(f"✓ 共处理 {para_count} 个段落")
  215. print("\n提示:打开文档后,请右键点击目录,选择'更新域'来更新目录内容。")
  216. if __name__ == '__main__':
  217. input_file = '/Users/geng/Desktop/系统操作手册.docx'
  218. output_file = '/Users/geng/Desktop/系统操作手册_格式化.docx'
  219. try:
  220. format_document(input_file, output_file)
  221. except Exception as e:
  222. print(f"\n✗ 错误: {e}")
  223. import traceback
  224. traceback.print_exc()