#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 格式化系统操作手册 - 改进版本,支持图片复制 """ from docx import Document from docx.shared import Pt, Inches from docx.enum.text import WD_ALIGN_PARAGRAPH from docx.oxml.ns import qn import re from io import BytesIO def set_font(run, font_name='宋体', font_size=12, bold=False): """设置字体""" run.font.name = font_name run.font.size = Pt(font_size) run.font.bold = bold run._element.rPr.rFonts.set(qn('w:eastAsia'), font_name) def is_heading_level_1(text): """判断是否为一级标题""" if not text: return False # 匹配: 1、系统登录, 2、数据管理 等 return bool(re.match(r'^\d+、', text)) or text == '系统登录' def is_heading_level_2(text): """判断是否为二级标题""" if not text or len(text) < 3: return False # 匹配: 1.1, 1.2, 2.1 等 match = re.match(r'^\d+\.\d+\s', text) return bool(match) def is_heading_level_3(text): """判断是否为三级标题""" if not text or len(text) < 5: return False # 匹配: 2.1.1, 2.1.2 等 match = re.match(r'^\d+\.\d+\.\d+\s', text) return bool(match) def copy_paragraph_with_images(source_para, target_doc): """复制段落,包括文本和图片""" text = source_para.text.strip() # 创建新段落 new_para = target_doc.add_paragraph() # 根据文本类型设置格式 if is_heading_level_1(text): run = new_para.add_run(text) set_font(run, font_name='黑体', font_size=16, bold=True) new_para.paragraph_format.space_before = Pt(12) new_para.paragraph_format.space_after = Pt(6) elif is_heading_level_2(text): run = new_para.add_run(text) set_font(run, font_name='黑体', font_size=14, bold=True) new_para.paragraph_format.space_before = Pt(10) new_para.paragraph_format.space_after = Pt(5) elif is_heading_level_3(text): run = new_para.add_run(text) set_font(run, font_name='黑体', font_size=13, bold=True) new_para.paragraph_format.space_before = Pt(8) new_para.paragraph_format.space_after = Pt(4) else: # 普通段落 if text: run = new_para.add_run(text) set_font(run, font_name='宋体', font_size=11) new_para.paragraph_format.space_after = Pt(3) new_para.paragraph_format.line_spacing = 1.15 # 检查并复制图片 if source_para._element.xpath('.//w:drawing'): img_para = target_doc.add_paragraph() img_para.alignment = WD_ALIGN_PARAGRAPH.CENTER for run in source_para.runs: if hasattr(run, '_element') and run._element.xpath('.//a:blip'): # 获取图片的 rId blip = run._element.xpath('.//a:blip')[0] rId = blip.get(qn('r:embed')) # 从源文档获取图片 image_part = source_para.part.related_parts[rId] image_bytes = image_part.blob # 添加到新文档 try: # 获取原始图片尺寸 inline = run._element.xpath('.//wp:inline')[0] extent = inline.xpath('.//wp:extent')[0] cx = int(extent.get('cx')) cy = int(extent.get('cy')) # 转换为英寸 width = Inches(cx / 914400) height = Inches(cy / 914400) # 添加图片 - 使用 BytesIO 包装字节数据 img_run = img_para.add_run() image_stream = BytesIO(image_bytes) img_run.add_picture(image_stream, width=width, height=height) except Exception as e: print(f"复制图片时出错: {e}") # 如果失败,尝试不指定尺寸 try: img_run = img_para.add_run() image_stream = BytesIO(image_bytes) img_run.add_picture(image_stream) except: pass def format_document(input_path, output_path): """格式化文档""" print("正在读取文档...") doc = Document(input_path) print("创建新文档...") new_doc = Document() # 设置页边距 for section in new_doc.sections: section.top_margin = Inches(1) section.bottom_margin = Inches(1) section.left_margin = Inches(1.25) section.right_margin = Inches(1.25) # ===== 第一页:标题页 ===== print("创建标题页...") # 添加空行使标题居中 for _ in range(8): new_doc.add_paragraph() title_para = new_doc.add_paragraph() title_para.alignment = WD_ALIGN_PARAGRAPH.CENTER title_run = title_para.add_run('系统操作手册') set_font(title_run, font_name='黑体', font_size=28, bold=True) # 添加分页符 new_doc.add_page_break() # ===== 第二页:目录 ===== print("创建目录...") toc_title = new_doc.add_paragraph() toc_title.alignment = WD_ALIGN_PARAGRAPH.CENTER toc_run = toc_title.add_run('目 录') set_font(toc_run, font_name='黑体', font_size=18, bold=True) new_doc.add_paragraph() # 目录内容 toc_items = [ ('1、系统登录', 0), (' 1.1 系统登录', 1), (' 1.2 系统首页', 1), ('2、数据管理', 0), (' 2.1 检查列表', 1), (' 2.1.1 影像上传', 2), (' 2.1.2 查看影像数据详情', 2), (' 2.2 部位列表', 1), (' 2.3 患者信息', 1), ('3、质控管理', 0), (' 3.1 全量质控', 1), (' 3.2 质控任务', 1), (' 3.3 质控结果', 1), (' 3.4 部位结果', 1), ('4、字典管理', 0), (' 4.1 质控标准', 1), (' 4.2 质控因子', 1), (' 4.3 检查项目', 1), ('5、系统管理', 0), (' 5.1 用户管理', 1), (' 5.2 对接设置', 1), ] for item_text, level in toc_items: toc_para = new_doc.add_paragraph() toc_run = toc_para.add_run(item_text) set_font(toc_run, font_name='宋体', font_size=12) toc_para.paragraph_format.space_after = Pt(2) # 添加分页符 new_doc.add_page_break() # ===== 第三页开始:正文内容 ===== print("复制正文内容...") content_started = False para_count = 0 for para in doc.paragraphs: text = para.text.strip() # 跳过前面的空段落 if not content_started and not text: continue if text: content_started = True if content_started: copy_paragraph_with_images(para, new_doc) para_count += 1 if para_count % 10 == 0: print(f"已处理 {para_count} 个段落...") # 保存新文档 print("保存文档...") new_doc.save(output_path) print(f"\n✓ 文档格式化完成!") print(f"✓ 已保存到: {output_path}") print(f"✓ 共处理 {para_count} 个段落") if __name__ == '__main__': input_file = '/Users/geng/Desktop/系统操作手册.docx' output_file = '/Users/geng/Desktop/系统操作手册_格式化.docx' try: format_document(input_file, output_file) except Exception as e: print(f"\n✗ 错误: {e}") import traceback traceback.print_exc()