zskk_bg
/
qc_ns_web


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279
							#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
格式化系统操作手册 - 第3版
- 添加自动目录
- 添加页眉页脚（页码）
"""

from docx import Document
from docx.shared import Pt, Inches
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.oxml.ns import qn
from docx.oxml import OxmlElement
import re
from io import BytesIO

def set_font(run, font_name='宋体', font_size=12, bold=False):
    """设置字体"""
    run.font.name = font_name
    run.font.size = Pt(font_size)
    run.font.bold = bold
    run._element.rPr.rFonts.set(qn('w:eastAsia'), font_name)

def is_heading_level_1(text):
    """判断是否为一级标题"""
    if not text:
        return False
    return bool(re.match(r'^\d+、', text)) or text == '系统登录'

def is_heading_level_2(text):
    """判断是否为二级标题"""
    if not text or len(text) < 3:
        return False
    match = re.match(r'^\d+\.\d+\s', text)
    return bool(match)

def is_heading_level_3(text):
    """判断是否为三级标题"""
    if not text or len(text) < 5:
        return False
    match = re.match(r'^\d+\.\d+\.\d+\s', text)
    return bool(match)

def add_toc(doc):
    """添加自动目录"""
    paragraph = doc.add_paragraph()
    run = paragraph.add_run()

    # 创建 TOC 域代码
    fldChar1 = OxmlElement('w:fldChar')
    fldChar1.set(qn('w:fldCharType'), 'begin')

    instrText = OxmlElement('w:instrText')
    instrText.set(qn('xml:space'), 'preserve')
    instrText.text = 'TOC \\o "1-3" \\h \\z \\u'

    fldChar2 = OxmlElement('w:fldChar')
    fldChar2.set(qn('w:fldCharType'), 'end')

    run._r.append(fldChar1)
    run._r.append(instrText)
    run._r.append(fldChar2)

    return paragraph

def add_page_number(section):
    """添加页眉页脚，显示页码"""
    # 添加页脚
    footer = section.footer
    footer.is_linked_to_previous = False

    # 清空现有内容
    for para in footer.paragraphs:
        para.clear()

    # 创建页脚段落
    footer_para = footer.paragraphs[0] if footer.paragraphs else footer.add_paragraph()
    footer_para.alignment = WD_ALIGN_PARAGRAPH.CENTER

    # 添加 "第X页/共Y页" 格式
    run = footer_para.add_run('第 ')
    set_font(run, font_name='宋体', font_size=10)

    # 添加当前页码域
    fldChar1 = OxmlElement('w:fldChar')
    fldChar1.set(qn('w:fldCharType'), 'begin')

    instrText1 = OxmlElement('w:instrText')
    instrText1.set(qn('xml:space'), 'preserve')
    instrText1.text = 'PAGE'

    fldChar2 = OxmlElement('w:fldChar')
    fldChar2.set(qn('w:fldCharType'), 'end')

    run._r.append(fldChar1)
    run._r.append(instrText1)
    run._r.append(fldChar2)

    run = footer_para.add_run(' 页/共 ')
    set_font(run, font_name='宋体', font_size=10)

    # 添加总页数域
    run = footer_para.add_run()
    fldChar3 = OxmlElement('w:fldChar')
    fldChar3.set(qn('w:fldCharType'), 'begin')

    instrText2 = OxmlElement('w:instrText')
    instrText2.set(qn('xml:space'), 'preserve')
    instrText2.text = 'NUMPAGES'

    fldChar4 = OxmlElement('w:fldChar')
    fldChar4.set(qn('w:fldCharType'), 'end')

    run._r.append(fldChar3)
    run._r.append(instrText2)
    run._r.append(fldChar4)

    run = footer_para.add_run(' 页')
    set_font(run, font_name='宋体', font_size=10)

def copy_paragraph_with_images(source_para, target_doc, apply_heading_style=False):
    """复制段落，包括文本和图片"""
    text = source_para.text.strip()

    # 创建新段落
    new_para = target_doc.add_paragraph()

    # 根据文本类型设置格式和样式
    if is_heading_level_1(text):
        if apply_heading_style:
            new_para.style = 'Heading 1'
        run = new_para.add_run(text)
        set_font(run, font_name='黑体', font_size=16, bold=True)
        new_para.paragraph_format.space_before = Pt(12)
        new_para.paragraph_format.space_after = Pt(6)

    elif is_heading_level_2(text):
        if apply_heading_style:
            new_para.style = 'Heading 2'
        run = new_para.add_run(text)
        set_font(run, font_name='黑体', font_size=14, bold=True)
        new_para.paragraph_format.space_before = Pt(10)
        new_para.paragraph_format.space_after = Pt(5)

    elif is_heading_level_3(text):
        if apply_heading_style:
            new_para.style = 'Heading 3'
        run = new_para.add_run(text)
        set_font(run, font_name='黑体', font_size=13, bold=True)
        new_para.paragraph_format.space_before = Pt(8)
        new_para.paragraph_format.space_after = Pt(4)

    else:
        # 普通段落
        if text:
            run = new_para.add_run(text)
            set_font(run, font_name='宋体', font_size=11)
            new_para.paragraph_format.space_after = Pt(3)
            new_para.paragraph_format.line_spacing = 1.15

    # 检查并复制图片
    if source_para._element.xpath('.//w:drawing'):
        img_para = target_doc.add_paragraph()
        img_para.alignment = WD_ALIGN_PARAGRAPH.CENTER

        for run in source_para.runs:
            if hasattr(run, '_element') and run._element.xpath('.//a:blip'):
                blip = run._element.xpath('.//a:blip')[0]
                rId = blip.get(qn('r:embed'))
                image_part = source_para.part.related_parts[rId]
                image_bytes = image_part.blob

                try:
                    inline = run._element.xpath('.//wp:inline')[0]
                    extent = inline.xpath('.//wp:extent')[0]
                    cx = int(extent.get('cx'))
                    cy = int(extent.get('cy'))
                    width = Inches(cx / 914400)
                    height = Inches(cy / 914400)

                    img_run = img_para.add_run()
                    image_stream = BytesIO(image_bytes)
                    img_run.add_picture(image_stream, width=width, height=height)
                except Exception as e:
                    try:
                        img_run = img_para.add_run()
                        image_stream = BytesIO(image_bytes)
                        img_run.add_picture(image_stream)
                    except:
                        pass

def format_document(input_path, output_path):
    """格式化文档"""
    print("正在读取文档...")
    doc = Document(input_path)

    print("创建新文档...")
    new_doc = Document()

    # 设置页边距
    for section in new_doc.sections:
        section.top_margin = Inches(1)
        section.bottom_margin = Inches(1)
        section.left_margin = Inches(1.25)
        section.right_margin = Inches(1.25)

    # ===== 第一页：标题页 =====
    print("创建标题页...")
    for _ in range(8):
        new_doc.add_paragraph()

    title_para = new_doc.add_paragraph()
    title_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
    title_run = title_para.add_run('系统操作手册')
    set_font(title_run, font_name='黑体', font_size=28, bold=True)

    new_doc.add_page_break()

    # ===== 第二页：自动目录 =====
    print("创建自动目录...")
    toc_title = new_doc.add_paragraph()
    toc_title.alignment = WD_ALIGN_PARAGRAPH.CENTER
    toc_run = toc_title.add_run('目  录')
    set_font(toc_run, font_name='黑体', font_size=18, bold=True)
    new_doc.add_paragraph()

    # 添加自动目录
    add_toc(new_doc)
    new_doc.add_paragraph()
    new_doc.add_paragraph()

    new_doc.add_page_break()

    # ===== 第三页开始：正文内容 =====
    print("复制正文内容...")
    content_started = False
    para_count = 0

    for para in doc.paragraphs:
        text = para.text.strip()

        if not content_started and not text:
            continue

        if text:
            content_started = True

        if content_started:
            # 应用标题样式，以便自动目录可以识别
            copy_paragraph_with_images(para, new_doc, apply_heading_style=True)
            para_count += 1

            if para_count % 10 == 0:
                print(f"已处理 {para_count} 个段落...")

    # ===== 添加页眉页脚 =====
    print("添加页眉页脚...")
    for section in new_doc.sections:
        add_page_number(section)

    # 保存新文档
    print("保存文档...")
    new_doc.save(output_path)
    print(f"\n✓ 文档格式化完成！")
    print(f"✓ 已保存到: {output_path}")
    print(f"✓ 共处理 {para_count} 个段落")
    print("\n提示：打开文档后，请右键点击目录，选择'更新域'来更新目录内容。")


if __name__ == '__main__':
    input_file = '/Users/geng/Desktop/系统操作手册.docx'
    output_file = '/Users/geng/Desktop/系统操作手册_格式化.docx'

    try:
        format_document(input_file, output_file)
    except Exception as e:
        print(f"\n✗ 错误: {e}")
        import traceback
        traceback.print_exc()