"""把 sections/*.md 渲染成符合中国基金申报书排版规范的 .docx。 字体规范 (来自 typography.md): - 标题黑体 / 正文中文宋体 / 英文 Times New Roman - 行距 1.5 倍 / 首行缩进 2 字符 - A4 纸 / 上下 2.5cm / 左 3.0cm / 右 2.0cm 用法: python render_docx.py --fund-type key_rd -o 支持的基金类型: key_rd / major_project / nsfc_joint_fund / nsfc_general / nsfc_youth / provincial / enterprise """ from __future__ import annotations import argparse import re import sys from pathlib import Path from docx import Document from docx.enum.text import WD_ALIGN_PARAGRAPH from docx.oxml.ns import qn from docx.shared import Cm, Pt def _set_east_asia_font(run, font_name: str = "宋体") -> None: """让 run 的中文字体生效 (python-docx 不直接支持东亚字体)。""" rPr = run._element.get_or_add_rPr() rFonts = rPr.find(qn("w:rFonts")) if rFonts is None: from docx.oxml import OxmlElement rFonts = OxmlElement("w:rFonts") rPr.append(rFonts) rFonts.set(qn("w:eastAsia"), font_name) rFonts.set(qn("w:ascii"), "Times New Roman") rFonts.set(qn("w:hAnsi"), "Times New Roman") def init_doc() -> Document: doc = Document() section = doc.sections[0] section.page_height = Cm(29.7) section.page_width = Cm(21) section.top_margin = Cm(2.5) section.bottom_margin = Cm(2.5) section.left_margin = Cm(3.0) section.right_margin = Cm(2.0) # 不在这里改 default style — 直接每段自己设字体最稳 return doc def add_paragraph(doc: Document, text: str, *, level: int = 0) -> None: """level: 0 正文 / 1 一级标题 / 2 二级标题 / 3 三级标题""" if level == 0: p = doc.add_paragraph() p.paragraph_format.line_spacing = 1.5 p.paragraph_format.first_line_indent = Pt(24) # 2 字符 p.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY run = p.add_run(text) run.font.name = "Times New Roman" run.font.size = Pt(12) # 小四 _set_east_asia_font(run, "宋体") else: sizes = {1: Pt(14), 2: Pt(12), 3: Pt(12)} bolds = {1: False, 2: True, 3: True} font_cn = {1: "黑体", 2: "黑体", 3: "宋体"} p = doc.add_paragraph() p.paragraph_format.line_spacing = 1.5 p.paragraph_format.space_before = Pt(6) p.paragraph_format.space_after = Pt(3) run = p.add_run(text) run.font.name = "Times New Roman" run.font.size = sizes[level] run.bold = bolds[level] _set_east_asia_font(run, font_cn[level]) _HEADING_RE = re.compile(r"^(#+)\s+(.+)$") _TABLE_LINE_RE = re.compile(r"^\s*\|.+\|\s*$") def render_md_block(doc: Document, md_text: str) -> None: lines = md_text.splitlines() i = 0 while i < len(lines): line = lines[i].rstrip() # markdown table if _TABLE_LINE_RE.match(line): tbl_lines = [] while i < len(lines) and _TABLE_LINE_RE.match(lines[i]): tbl_lines.append(lines[i]) i += 1 _render_md_table(doc, tbl_lines) continue if not line.strip(): i += 1 continue m = _HEADING_RE.match(line) if m: hashes, title = m.group(1), m.group(2) level = min(len(hashes), 3) add_paragraph(doc, title.strip(), level=level) elif line.startswith(">"): # blockquote — 申报书里通常是写作提示, 渲染时跳过 (撰稿提示不入正稿) pass else: # 正文段落: 把连续非空行合并成一段 buf = [line] j = i + 1 while j < len(lines) and lines[j].strip() and not _HEADING_RE.match(lines[j]) and not lines[j].startswith(">") and not _TABLE_LINE_RE.match(lines[j]): buf.append(lines[j].rstrip()) j += 1 text = " ".join(s.strip() for s in buf) add_paragraph(doc, text, level=0) i = j continue i += 1 def _render_md_table(doc: Document, table_lines: list[str]) -> None: """把一段 markdown 表格转换成 docx 表格。第二行如果是分隔符 (---) 跳过。""" rows: list[list[str]] = [] for ln in table_lines: cells = [c.strip() for c in ln.strip().strip("|").split("|")] # skip pure separator row if all(re.match(r"^[-: ]+$", c) for c in cells): continue rows.append(cells) if not rows: return n_cols = max(len(r) for r in rows) for r in rows: while len(r) < n_cols: r.append("") table = doc.add_table(rows=len(rows), cols=n_cols) table.style = "Light Grid Accent 1" for ri, row in enumerate(rows): for ci, val in enumerate(row): cell = table.rows[ri].cells[ci] cell.text = "" # clear default p = cell.paragraphs[0] run = p.add_run(val) run.font.name = "Times New Roman" run.font.size = Pt(10.5) # 五号 _set_east_asia_font(run, "宋体") if ri == 0: run.bold = True def render_sections(sections_dir: Path, out: Path, fund_type: str) -> None: if not sections_dir.is_dir(): print(f"[ERR] sections dir not found: {sections_dir}", file=sys.stderr) sys.exit(2) md_files = sorted(sections_dir.glob("*.md")) if not md_files: print(f"[ERR] no .md found in {sections_dir}", file=sys.stderr) sys.exit(2) doc = init_doc() for f in md_files: text = f.read_text(encoding="utf-8") render_md_block(doc, text) # 每个 section 后加分页, 让结构更清晰 doc.add_page_break() out.parent.mkdir(parents=True, exist_ok=True) doc.save(str(out)) # 统计 paras = sum(1 for p in doc.paragraphs) chars = sum(len(p.text) for p in doc.paragraphs) print(f"[OK] rendered {len(md_files)} sections → {out}") print(f" paragraphs: {paras} | total chars: {chars}") print(f" fund_type: {fund_type}") print(f" font: 中文宋体小四 / 英文 Times New Roman 小四 / 行距 1.5 / 首行缩进 2 字符") def main() -> None: ap = argparse.ArgumentParser(description="渲染章节 md → 申报书 docx") ap.add_argument("sections_dir", type=Path, help="sections/*.md 目录") ap.add_argument("--fund-type", required=True, choices=["key_rd", "major_project", "nsfc_joint_fund", "nsfc_general", "nsfc_youth", "provincial", "enterprise"]) ap.add_argument("-o", "--output", type=Path, required=True, help="输出 .docx 路径") args = ap.parse_args() render_sections(args.sections_dir, args.output, args.fund_type) if __name__ == "__main__": main()