format_manual.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. 格式化系统操作手册
  5. """
  6. from docx import Document
  7. from docx.shared import Pt, Inches, RGBColor
  8. from docx.enum.text import WD_ALIGN_PARAGRAPH
  9. from docx.oxml.ns import qn
  10. from docx.oxml import OxmlElement
  11. def add_page_break(doc):
  12. """添加分页符"""
  13. doc.add_page_break()
  14. def set_font(run, font_name='宋体', font_size=12, bold=False):
  15. """设置字体"""
  16. run.font.name = font_name
  17. run.font.size = Pt(font_size)
  18. run.font.bold = bold
  19. # 设置中文字体
  20. run._element.rPr.rFonts.set(qn('w:eastAsia'), font_name)
  21. def format_document(input_path, output_path):
  22. """格式化文档"""
  23. # 读取原文档
  24. doc = Document(input_path)
  25. # 创建新文档
  26. new_doc = Document()
  27. # 设置页边距
  28. for section in new_doc.sections:
  29. section.top_margin = Inches(1)
  30. section.bottom_margin = Inches(1)
  31. section.left_margin = Inches(1.25)
  32. section.right_margin = Inches(1.25)
  33. # ===== 第一页:标题页 =====
  34. title_para = new_doc.add_paragraph()
  35. title_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
  36. # 添加一些空行使标题居中
  37. for _ in range(8):
  38. new_doc.add_paragraph()
  39. title_para = new_doc.add_paragraph()
  40. title_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
  41. title_run = title_para.add_run('系统操作手册')
  42. set_font(title_run, font_name='黑体', font_size=28, bold=True)
  43. # 添加分页符
  44. add_page_break(new_doc)
  45. # ===== 第二页:目录 =====
  46. toc_title = new_doc.add_paragraph()
  47. toc_title.alignment = WD_ALIGN_PARAGRAPH.CENTER
  48. toc_run = toc_title.add_run('目 录')
  49. set_font(toc_run, font_name='黑体', font_size=18, bold=True)
  50. new_doc.add_paragraph() # 空行
  51. # 目录内容
  52. toc_items = [
  53. ('1、系统登录', 2),
  54. ('1.1 系统登录', 3),
  55. ('1.2 系统首页', 3),
  56. ('2、数据管理', 2),
  57. ('2.1 检查列表', 3),
  58. ('2.1.1 影像上传', 4),
  59. ('2.1.2 查看影像数据详情', 4),
  60. ('2.2 部位列表', 3),
  61. ('2.3 患者信息', 3),
  62. ('3、质控管理', 2),
  63. ('3.1 全量质控', 3),
  64. ('3.2 质控任务', 3),
  65. ('3.3 质控结果', 3),
  66. ('3.4 部位结果', 3),
  67. ('4、字典管理', 2),
  68. ('4.1 质控标准', 3),
  69. ('4.2 质控因子', 3),
  70. ('4.3 检查项目', 3),
  71. ('5、系统管理', 2),
  72. ('5.1 用户管理', 3),
  73. ('5.2 对接设置', 3),
  74. ]
  75. for item_text, level in toc_items:
  76. toc_para = new_doc.add_paragraph()
  77. indent = (level - 2) * 0.5 # 缩进
  78. toc_para.paragraph_format.left_indent = Inches(indent)
  79. toc_run = toc_para.add_run(item_text)
  80. set_font(toc_run, font_name='宋体', font_size=12)
  81. # 添加分页符
  82. add_page_break(new_doc)
  83. # ===== 第三页开始:正文内容 =====
  84. # 收集原文档中的所有段落和图片
  85. content_started = False
  86. for para in doc.paragraphs:
  87. text = para.text.strip()
  88. # 跳过前面的空段落,直到找到第一个有内容的段落
  89. if not content_started and not text:
  90. continue
  91. if text:
  92. content_started = True
  93. # 判断段落类型并应用相应格式
  94. new_para = new_doc.add_paragraph()
  95. # 一级标题 (如: 1、系统登录, 2、数据管理)
  96. if text and (text.startswith('1、') or text.startswith('2、') or
  97. text.startswith('3、') or text.startswith('4、') or
  98. text.startswith('5、') or text == '系统登录'):
  99. new_para.style = 'Heading 1'
  100. run = new_para.add_run(text)
  101. set_font(run, font_name='黑体', font_size=16, bold=True)
  102. new_para.paragraph_format.space_before = Pt(12)
  103. new_para.paragraph_format.space_after = Pt(6)
  104. # 二级标题 (如: 1.1, 1.2, 2.1)
  105. elif text and len(text) > 2 and text[0].isdigit() and '.' in text[:4] and text.split()[0].count('.') == 1:
  106. new_para.style = 'Heading 2'
  107. run = new_para.add_run(text)
  108. set_font(run, font_name='黑体', font_size=14, bold=True)
  109. new_para.paragraph_format.space_before = Pt(10)
  110. new_para.paragraph_format.space_after = Pt(5)
  111. new_para.paragraph_format.left_indent = Inches(0)
  112. # 三级标题 (如: 2.1.1, 2.1.2)
  113. elif text and len(text) > 4 and text[0].isdigit() and text.split()[0].count('.') == 2:
  114. new_para.style = 'Heading 3'
  115. run = new_para.add_run(text)
  116. set_font(run, font_name='黑体', font_size=13, bold=True)
  117. new_para.paragraph_format.space_before = Pt(8)
  118. new_para.paragraph_format.space_after = Pt(4)
  119. new_para.paragraph_format.left_indent = Inches(0)
  120. # 普通段落
  121. else:
  122. run = new_para.add_run(text)
  123. set_font(run, font_name='宋体', font_size=11)
  124. new_para.paragraph_format.space_after = Pt(3)
  125. new_para.paragraph_format.line_spacing = 1.15
  126. # 复制图片
  127. if para._element.xpath('.//w:drawing'):
  128. # 如果段落包含图片,复制图片
  129. for run in para.runs:
  130. if 'graphicData' in run._element.xml:
  131. try:
  132. # 获取图片
  133. inline = run._element.xpath('.//w:drawing//wp:inline')
  134. if inline:
  135. # 复制图片到新文档
  136. new_para_img = new_doc.add_paragraph()
  137. new_para_img.alignment = WD_ALIGN_PARAGRAPH.CENTER
  138. # 注意:这里需要从原文档中提取图片数据
  139. # 由于复杂性,我们先保留图片位置的空段落
  140. except:
  141. pass
  142. # 保存新文档
  143. new_doc.save(output_path)
  144. print(f"文档格式化完成!已保存到: {output_path}")
  145. if __name__ == '__main__':
  146. input_file = '/Users/geng/Desktop/系统操作手册.docx'
  147. output_file = '/Users/geng/Desktop/系统操作手册_格式化.docx'
  148. format_document(input_file, output_file)