| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176 |
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- """
- 格式化系统操作手册
- """
- from docx import Document
- from docx.shared import Pt, Inches, RGBColor
- from docx.enum.text import WD_ALIGN_PARAGRAPH
- from docx.oxml.ns import qn
- from docx.oxml import OxmlElement
- def add_page_break(doc):
- """添加分页符"""
- doc.add_page_break()
- def set_font(run, font_name='宋体', font_size=12, bold=False):
- """设置字体"""
- run.font.name = font_name
- run.font.size = Pt(font_size)
- run.font.bold = bold
- # 设置中文字体
- run._element.rPr.rFonts.set(qn('w:eastAsia'), font_name)
- def format_document(input_path, output_path):
- """格式化文档"""
- # 读取原文档
- doc = Document(input_path)
- # 创建新文档
- new_doc = Document()
- # 设置页边距
- for section in new_doc.sections:
- section.top_margin = Inches(1)
- section.bottom_margin = Inches(1)
- section.left_margin = Inches(1.25)
- section.right_margin = Inches(1.25)
- # ===== 第一页:标题页 =====
- title_para = new_doc.add_paragraph()
- title_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
- # 添加一些空行使标题居中
- for _ in range(8):
- new_doc.add_paragraph()
- title_para = new_doc.add_paragraph()
- title_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
- title_run = title_para.add_run('系统操作手册')
- set_font(title_run, font_name='黑体', font_size=28, bold=True)
- # 添加分页符
- add_page_break(new_doc)
- # ===== 第二页:目录 =====
- toc_title = new_doc.add_paragraph()
- toc_title.alignment = WD_ALIGN_PARAGRAPH.CENTER
- toc_run = toc_title.add_run('目 录')
- set_font(toc_run, font_name='黑体', font_size=18, bold=True)
- new_doc.add_paragraph() # 空行
- # 目录内容
- toc_items = [
- ('1、系统登录', 2),
- ('1.1 系统登录', 3),
- ('1.2 系统首页', 3),
- ('2、数据管理', 2),
- ('2.1 检查列表', 3),
- ('2.1.1 影像上传', 4),
- ('2.1.2 查看影像数据详情', 4),
- ('2.2 部位列表', 3),
- ('2.3 患者信息', 3),
- ('3、质控管理', 2),
- ('3.1 全量质控', 3),
- ('3.2 质控任务', 3),
- ('3.3 质控结果', 3),
- ('3.4 部位结果', 3),
- ('4、字典管理', 2),
- ('4.1 质控标准', 3),
- ('4.2 质控因子', 3),
- ('4.3 检查项目', 3),
- ('5、系统管理', 2),
- ('5.1 用户管理', 3),
- ('5.2 对接设置', 3),
- ]
- for item_text, level in toc_items:
- toc_para = new_doc.add_paragraph()
- indent = (level - 2) * 0.5 # 缩进
- toc_para.paragraph_format.left_indent = Inches(indent)
- toc_run = toc_para.add_run(item_text)
- set_font(toc_run, font_name='宋体', font_size=12)
- # 添加分页符
- add_page_break(new_doc)
- # ===== 第三页开始:正文内容 =====
- # 收集原文档中的所有段落和图片
- content_started = False
- for para in doc.paragraphs:
- text = para.text.strip()
- # 跳过前面的空段落,直到找到第一个有内容的段落
- if not content_started and not text:
- continue
- if text:
- content_started = True
- # 判断段落类型并应用相应格式
- new_para = new_doc.add_paragraph()
- # 一级标题 (如: 1、系统登录, 2、数据管理)
- if text and (text.startswith('1、') or text.startswith('2、') or
- text.startswith('3、') or text.startswith('4、') or
- text.startswith('5、') or text == '系统登录'):
- new_para.style = 'Heading 1'
- run = new_para.add_run(text)
- set_font(run, font_name='黑体', font_size=16, bold=True)
- new_para.paragraph_format.space_before = Pt(12)
- new_para.paragraph_format.space_after = Pt(6)
- # 二级标题 (如: 1.1, 1.2, 2.1)
- elif text and len(text) > 2 and text[0].isdigit() and '.' in text[:4] and text.split()[0].count('.') == 1:
- new_para.style = 'Heading 2'
- run = new_para.add_run(text)
- set_font(run, font_name='黑体', font_size=14, bold=True)
- new_para.paragraph_format.space_before = Pt(10)
- new_para.paragraph_format.space_after = Pt(5)
- new_para.paragraph_format.left_indent = Inches(0)
- # 三级标题 (如: 2.1.1, 2.1.2)
- elif text and len(text) > 4 and text[0].isdigit() and text.split()[0].count('.') == 2:
- new_para.style = 'Heading 3'
- run = new_para.add_run(text)
- set_font(run, font_name='黑体', font_size=13, bold=True)
- new_para.paragraph_format.space_before = Pt(8)
- new_para.paragraph_format.space_after = Pt(4)
- new_para.paragraph_format.left_indent = Inches(0)
- # 普通段落
- else:
- run = new_para.add_run(text)
- set_font(run, font_name='宋体', font_size=11)
- new_para.paragraph_format.space_after = Pt(3)
- new_para.paragraph_format.line_spacing = 1.15
- # 复制图片
- if para._element.xpath('.//w:drawing'):
- # 如果段落包含图片,复制图片
- for run in para.runs:
- if 'graphicData' in run._element.xml:
- try:
- # 获取图片
- inline = run._element.xpath('.//w:drawing//wp:inline')
- if inline:
- # 复制图片到新文档
- new_para_img = new_doc.add_paragraph()
- new_para_img.alignment = WD_ALIGN_PARAGRAPH.CENTER
- # 注意:这里需要从原文档中提取图片数据
- # 由于复杂性,我们先保留图片位置的空段落
- except:
- pass
- # 保存新文档
- new_doc.save(output_path)
- print(f"文档格式化完成!已保存到: {output_path}")
- if __name__ == '__main__':
- input_file = '/Users/geng/Desktop/系统操作手册.docx'
- output_file = '/Users/geng/Desktop/系统操作手册_格式化.docx'
- format_document(input_file, output_file)
|