proposal/render_docx: 加目录 + 内联 md 解析 + 列表分行
修复实测三个问题: 1. 加目录页 (Word TOC 域 + 改用 builtin Heading 1/2/3 样式才能被索引) 2. 内联解析 **加粗** / *斜体* / `等宽`,不再以字面量进入正文与表格 3. [N] 引文 / 1./2. / 一、 / (一) 等列表项各自独立成段, 不再被散文合并器吃掉换行 顺带: - 直接修改 Normal/Heading style 的 rFonts 让样式继承字体 - 标题 Heading 1/2 强制黑体, 字色覆盖 builtin 蓝色 - 表格 cells 也走内联解析, 解决单元格 ** 残留
This commit is contained in:
parent
80f9934f8f
commit
8995a898ba
|
|
@ -1,14 +1,18 @@
|
||||||
"""把 sections/*.md 渲染成符合中国基金申报书排版规范的 .docx。
|
"""把 sections/*.md 渲染成符合中国基金申报书排版规范的 .docx。
|
||||||
|
|
||||||
字体规范 (来自 typography.md):
|
字体规范 (来自 typography.md):
|
||||||
- 标题黑体 / 正文中文宋体 / 英文 Times New Roman
|
- 标题黑体 (一二级) / 三级标题宋体 / 正文中文宋体 / 英文 Times New Roman
|
||||||
- 行距 1.5 倍 / 首行缩进 2 字符
|
- 行距 1.5 倍 / 首行缩进 2 字符
|
||||||
- A4 纸 / 上下 2.5cm / 左 3.0cm / 右 2.0cm
|
- A4 纸 / 上下 2.5cm / 左 3.0cm / 右 2.0cm
|
||||||
|
|
||||||
|
特性:
|
||||||
|
- 自动插入"目录"页 (Word 内右键更新域 / F9 即生成 TOC)
|
||||||
|
- 内联 markdown 解析: **加粗** / *斜体* / `等宽`
|
||||||
|
- 列表/引用文献项 ([N], 1., (1), 一、, -, *) 各自独立成段
|
||||||
|
- markdown 表格自动识别, 包含分隔行 |---|---|
|
||||||
|
|
||||||
用法:
|
用法:
|
||||||
python render_docx.py <sections_dir> --fund-type key_rd -o <out.docx>
|
python render_docx.py <sections_dir> --fund-type key_rd -o <out.docx>
|
||||||
|
|
||||||
支持的基金类型: key_rd / major_project / nsfc_joint_fund / nsfc_general / nsfc_youth / provincial / enterprise
|
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
import argparse
|
import argparse
|
||||||
|
|
@ -18,25 +22,47 @@ from pathlib import Path
|
||||||
|
|
||||||
from docx import Document
|
from docx import Document
|
||||||
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
||||||
|
from docx.oxml import OxmlElement
|
||||||
from docx.oxml.ns import qn
|
from docx.oxml.ns import qn
|
||||||
from docx.shared import Cm, Pt
|
from docx.shared import Cm, Pt, RGBColor
|
||||||
|
|
||||||
|
|
||||||
def _set_east_asia_font(run, font_name: str = "宋体") -> None:
|
# ───────────────────────── 字体辅助 ─────────────────────────
|
||||||
"""让 run 的中文字体生效 (python-docx 不直接支持东亚字体)。"""
|
|
||||||
|
def _set_run_fonts(run, *, cn_font: str = "宋体", en_font: str = "Times New Roman") -> None:
|
||||||
|
"""同时设置 run 的中文 (eastAsia) 和西文 (ascii/hAnsi) 字体。"""
|
||||||
rPr = run._element.get_or_add_rPr()
|
rPr = run._element.get_or_add_rPr()
|
||||||
rFonts = rPr.find(qn("w:rFonts"))
|
rFonts = rPr.find(qn("w:rFonts"))
|
||||||
if rFonts is None:
|
if rFonts is None:
|
||||||
from docx.oxml import OxmlElement
|
|
||||||
rFonts = OxmlElement("w:rFonts")
|
rFonts = OxmlElement("w:rFonts")
|
||||||
rPr.append(rFonts)
|
rPr.append(rFonts)
|
||||||
rFonts.set(qn("w:eastAsia"), font_name)
|
rFonts.set(qn("w:eastAsia"), cn_font)
|
||||||
rFonts.set(qn("w:ascii"), "Times New Roman")
|
rFonts.set(qn("w:ascii"), en_font)
|
||||||
rFonts.set(qn("w:hAnsi"), "Times New Roman")
|
rFonts.set(qn("w:hAnsi"), en_font)
|
||||||
|
|
||||||
|
|
||||||
|
def _set_style_fonts(style, *, cn_font: str = "宋体", en_font: str = "Times New Roman") -> None:
|
||||||
|
"""直接给 style 写 rFonts, 这样基于该 style 的所有段落都继承字体。"""
|
||||||
|
el = style.element
|
||||||
|
rPr = el.find(qn("w:rPr"))
|
||||||
|
if rPr is None:
|
||||||
|
rPr = OxmlElement("w:rPr")
|
||||||
|
el.insert(0, rPr)
|
||||||
|
rFonts = rPr.find(qn("w:rFonts"))
|
||||||
|
if rFonts is None:
|
||||||
|
rFonts = OxmlElement("w:rFonts")
|
||||||
|
rPr.append(rFonts)
|
||||||
|
rFonts.set(qn("w:eastAsia"), cn_font)
|
||||||
|
rFonts.set(qn("w:ascii"), en_font)
|
||||||
|
rFonts.set(qn("w:hAnsi"), en_font)
|
||||||
|
|
||||||
|
|
||||||
|
# ───────────────────────── 文档初始化 ─────────────────────────
|
||||||
|
|
||||||
def init_doc() -> Document:
|
def init_doc() -> Document:
|
||||||
doc = Document()
|
doc = Document()
|
||||||
|
|
||||||
|
# 页面
|
||||||
section = doc.sections[0]
|
section = doc.sections[0]
|
||||||
section.page_height = Cm(29.7)
|
section.page_height = Cm(29.7)
|
||||||
section.page_width = Cm(21)
|
section.page_width = Cm(21)
|
||||||
|
|
@ -44,88 +70,210 @@ def init_doc() -> Document:
|
||||||
section.bottom_margin = Cm(2.5)
|
section.bottom_margin = Cm(2.5)
|
||||||
section.left_margin = Cm(3.0)
|
section.left_margin = Cm(3.0)
|
||||||
section.right_margin = Cm(2.0)
|
section.right_margin = Cm(2.0)
|
||||||
# 不在这里改 default style — 直接每段自己设字体最稳
|
|
||||||
|
# Normal 样式 (正文)
|
||||||
|
normal = doc.styles["Normal"]
|
||||||
|
normal.font.name = "Times New Roman"
|
||||||
|
normal.font.size = Pt(12) # 小四
|
||||||
|
_set_style_fonts(normal, cn_font="宋体")
|
||||||
|
pf = normal.paragraph_format
|
||||||
|
pf.line_spacing = 1.5
|
||||||
|
pf.space_before = Pt(0)
|
||||||
|
pf.space_after = Pt(0)
|
||||||
|
|
||||||
|
# Heading 样式 — 让 Word TOC 域识别
|
||||||
|
for lvl, sz, cn in [(1, Pt(14), "黑体"), (2, Pt(12), "黑体"), (3, Pt(12), "宋体")]:
|
||||||
|
h = doc.styles[f"Heading {lvl}"]
|
||||||
|
h.font.name = "Times New Roman"
|
||||||
|
h.font.size = sz
|
||||||
|
h.font.bold = True
|
||||||
|
h.font.color.rgb = RGBColor(0, 0, 0) # 覆盖 builtin 蓝色
|
||||||
|
_set_style_fonts(h, cn_font=cn)
|
||||||
|
h.paragraph_format.line_spacing = 1.5
|
||||||
|
h.paragraph_format.space_before = Pt(6)
|
||||||
|
h.paragraph_format.space_after = Pt(3)
|
||||||
|
h.paragraph_format.first_line_indent = None
|
||||||
|
|
||||||
return doc
|
return doc
|
||||||
|
|
||||||
|
|
||||||
def add_paragraph(doc: Document, text: str, *, level: int = 0) -> None:
|
# ───────────────────────── TOC ─────────────────────────
|
||||||
"""level: 0 正文 / 1 一级标题 / 2 二级标题 / 3 三级标题"""
|
|
||||||
if level == 0:
|
def add_toc(doc: Document, depth: int = 3) -> None:
|
||||||
p = doc.add_paragraph()
|
"""在文档开头插入 '目录' 标题 + Word 域 TOC。
|
||||||
p.paragraph_format.line_spacing = 1.5
|
|
||||||
p.paragraph_format.first_line_indent = Pt(24) # 2 字符
|
Word 打开时不会自动展开;用户右键域 → '更新域' 或按 F9。
|
||||||
p.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
|
LibreOffice 打开会更直接显示。
|
||||||
run = p.add_run(text)
|
"""
|
||||||
run.font.name = "Times New Roman"
|
# "目 录" 标题 (居中, 不用 Heading 样式以免自我包含)
|
||||||
run.font.size = Pt(12) # 小四
|
p = doc.add_paragraph()
|
||||||
_set_east_asia_font(run, "宋体")
|
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||||
else:
|
p.paragraph_format.first_line_indent = None
|
||||||
sizes = {1: Pt(14), 2: Pt(12), 3: Pt(12)}
|
p.paragraph_format.space_before = Pt(12)
|
||||||
bolds = {1: False, 2: True, 3: True}
|
p.paragraph_format.space_after = Pt(6)
|
||||||
font_cn = {1: "黑体", 2: "黑体", 3: "宋体"}
|
run = p.add_run("目 录")
|
||||||
p = doc.add_paragraph()
|
run.font.size = Pt(16) # 三号
|
||||||
p.paragraph_format.line_spacing = 1.5
|
run.font.bold = True
|
||||||
p.paragraph_format.space_before = Pt(6)
|
_set_run_fonts(run, cn_font="黑体")
|
||||||
p.paragraph_format.space_after = Pt(3)
|
|
||||||
run = p.add_run(text)
|
# TOC 域
|
||||||
run.font.name = "Times New Roman"
|
p = doc.add_paragraph()
|
||||||
run.font.size = sizes[level]
|
p.paragraph_format.first_line_indent = None
|
||||||
run.bold = bolds[level]
|
run = p.add_run()
|
||||||
_set_east_asia_font(run, font_cn[level])
|
|
||||||
|
fldChar1 = OxmlElement("w:fldChar")
|
||||||
|
fldChar1.set(qn("w:fldCharType"), "begin")
|
||||||
|
|
||||||
|
instrText = OxmlElement("w:instrText")
|
||||||
|
instrText.set(qn("xml:space"), "preserve")
|
||||||
|
instrText.text = f' TOC \\o "1-{depth}" \\h \\z \\u '
|
||||||
|
|
||||||
|
fldChar2 = OxmlElement("w:fldChar")
|
||||||
|
fldChar2.set(qn("w:fldCharType"), "separate")
|
||||||
|
|
||||||
|
fldChar3 = OxmlElement("w:fldChar")
|
||||||
|
fldChar3.set(qn("w:fldCharType"), "end")
|
||||||
|
|
||||||
|
# 占位文字 — Word 更新域时会被实际目录替换
|
||||||
|
placeholder_t = OxmlElement("w:t")
|
||||||
|
placeholder_t.set(qn("xml:space"), "preserve")
|
||||||
|
placeholder_t.text = "[在 Word 中按 F9 或右键此处选择 “更新域” 即可生成完整目录]"
|
||||||
|
|
||||||
|
run._element.append(fldChar1)
|
||||||
|
run._element.append(instrText)
|
||||||
|
run._element.append(fldChar2)
|
||||||
|
run._element.append(placeholder_t)
|
||||||
|
run._element.append(fldChar3)
|
||||||
|
|
||||||
|
doc.add_page_break()
|
||||||
|
|
||||||
|
|
||||||
_HEADING_RE = re.compile(r"^(#+)\s+(.+)$")
|
# ───────────────────────── 内联 markdown ─────────────────────────
|
||||||
_TABLE_LINE_RE = re.compile(r"^\s*\|.+\|\s*$")
|
|
||||||
|
# 顺序敏感:**bold** 必须先于 *italic* 匹配, 否则会被 italic 抢
|
||||||
|
_INLINE_RE = re.compile(
|
||||||
|
r"(?P<bold>\*\*(?P<bold_t>[^*\n]+?)\*\*)"
|
||||||
|
r"|(?P<italic>(?<![\*\w])\*(?P<italic_t>[^*\n]+?)\*(?!\*))"
|
||||||
|
r"|(?P<code>`(?P<code_t>[^`\n]+?)`)"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def render_md_block(doc: Document, md_text: str) -> None:
|
def parse_inline(text: str) -> list[tuple[str, str]]:
|
||||||
lines = md_text.splitlines()
|
"""切成 (style, segment) 列表; style ∈ plain/bold/italic/code。"""
|
||||||
i = 0
|
out: list[tuple[str, str]] = []
|
||||||
while i < len(lines):
|
pos = 0
|
||||||
line = lines[i].rstrip()
|
for m in _INLINE_RE.finditer(text):
|
||||||
|
if m.start() > pos:
|
||||||
|
out.append(("plain", text[pos:m.start()]))
|
||||||
|
if m.group("bold"):
|
||||||
|
out.append(("bold", m.group("bold_t")))
|
||||||
|
elif m.group("italic"):
|
||||||
|
out.append(("italic", m.group("italic_t")))
|
||||||
|
elif m.group("code"):
|
||||||
|
out.append(("code", m.group("code_t")))
|
||||||
|
pos = m.end()
|
||||||
|
if pos < len(text):
|
||||||
|
out.append(("plain", text[pos:]))
|
||||||
|
return out or [("plain", text)]
|
||||||
|
|
||||||
# markdown table
|
|
||||||
if _TABLE_LINE_RE.match(line):
|
|
||||||
tbl_lines = []
|
|
||||||
while i < len(lines) and _TABLE_LINE_RE.match(lines[i]):
|
|
||||||
tbl_lines.append(lines[i])
|
|
||||||
i += 1
|
|
||||||
_render_md_table(doc, tbl_lines)
|
|
||||||
continue
|
|
||||||
|
|
||||||
if not line.strip():
|
def add_inline(paragraph, text: str, *, size: Pt = Pt(12), cn_font: str = "宋体") -> None:
|
||||||
i += 1
|
for style, seg in parse_inline(text):
|
||||||
continue
|
run = paragraph.add_run(seg)
|
||||||
|
run.font.size = size
|
||||||
m = _HEADING_RE.match(line)
|
if style == "bold":
|
||||||
if m:
|
run.bold = True
|
||||||
hashes, title = m.group(1), m.group(2)
|
_set_run_fonts(run, cn_font=cn_font, en_font="Times New Roman")
|
||||||
level = min(len(hashes), 3)
|
elif style == "italic":
|
||||||
add_paragraph(doc, title.strip(), level=level)
|
run.italic = True
|
||||||
elif line.startswith(">"):
|
_set_run_fonts(run, cn_font=cn_font, en_font="Times New Roman")
|
||||||
# blockquote — 申报书里通常是写作提示, 渲染时跳过 (撰稿提示不入正稿)
|
elif style == "code":
|
||||||
pass
|
_set_run_fonts(run, cn_font=cn_font, en_font="Consolas")
|
||||||
else:
|
else:
|
||||||
# 正文段落: 把连续非空行合并成一段
|
_set_run_fonts(run, cn_font=cn_font, en_font="Times New Roman")
|
||||||
buf = [line]
|
|
||||||
j = i + 1
|
|
||||||
while j < len(lines) and lines[j].strip() and not _HEADING_RE.match(lines[j]) and not lines[j].startswith(">") and not _TABLE_LINE_RE.match(lines[j]):
|
|
||||||
buf.append(lines[j].rstrip())
|
|
||||||
j += 1
|
|
||||||
text = " ".join(s.strip() for s in buf)
|
|
||||||
add_paragraph(doc, text, level=0)
|
|
||||||
i = j
|
|
||||||
continue
|
|
||||||
i += 1
|
|
||||||
|
|
||||||
|
|
||||||
def _render_md_table(doc: Document, table_lines: list[str]) -> None:
|
# ───────────────────────── 段落 / 标题 / 列表 ─────────────────────────
|
||||||
"""把一段 markdown 表格转换成 docx 表格。第二行如果是分隔符 (---) 跳过。"""
|
|
||||||
|
def add_heading(doc: Document, text: str, level: int) -> None:
|
||||||
|
p = doc.add_paragraph(style=f"Heading {level}")
|
||||||
|
p.paragraph_format.first_line_indent = None
|
||||||
|
# 标题里通常无内联 markdown, 但万一有也按内联解析 (黑体大小由 style 已设)
|
||||||
|
sizes = {1: Pt(14), 2: Pt(12), 3: Pt(12)}
|
||||||
|
cn = {1: "黑体", 2: "黑体", 3: "宋体"}
|
||||||
|
add_inline(p, text, size=sizes[level], cn_font=cn[level])
|
||||||
|
for run in p.runs:
|
||||||
|
run.bold = True
|
||||||
|
|
||||||
|
|
||||||
|
def add_body_paragraph(doc: Document, text: str, *, indent: bool = True) -> None:
|
||||||
|
p = doc.add_paragraph()
|
||||||
|
pf = p.paragraph_format
|
||||||
|
pf.line_spacing = 1.5
|
||||||
|
if indent:
|
||||||
|
pf.first_line_indent = Pt(24) # 2 字符
|
||||||
|
else:
|
||||||
|
pf.first_line_indent = None
|
||||||
|
p.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
|
||||||
|
add_inline(p, text)
|
||||||
|
|
||||||
|
|
||||||
|
# ───────────────────────── 行类型识别 ─────────────────────────
|
||||||
|
|
||||||
|
_HEADING_RE = re.compile(r"^(#{1,6})\s+(.+)$")
|
||||||
|
_TABLE_LINE_RE = re.compile(r"^\s*\|.*\|\s*$")
|
||||||
|
_BLOCKQUOTE_RE = re.compile(r"^\s*>\s?")
|
||||||
|
_HR_RE = re.compile(r"^\s*-{3,}\s*$|^\s*={3,}\s*$|^\s*_{3,}\s*$")
|
||||||
|
|
||||||
|
# 列表项 (各自独立成段, 不跟相邻行合并, 不缩进首行)
|
||||||
|
_LIST_PATTERNS = [
|
||||||
|
re.compile(r"^\[\d+\]\s"), # [1]
|
||||||
|
re.compile(r"^[-*+]\s"), # - / * / +
|
||||||
|
re.compile(r"^\d+[\.、.]\s*"), # 1. / 1、 / 1.
|
||||||
|
re.compile(r"^\(\d+\)\s*"), # (1)
|
||||||
|
re.compile(r"^(\d+)\s*"), # (1)
|
||||||
|
re.compile(r"^[一二三四五六七八九十百千]+[、.\.]"), # 一、
|
||||||
|
re.compile(r"^[((][一二三四五六七八九十百千]+[))]"), # (一)
|
||||||
|
re.compile(r"^[①②③④⑤⑥⑦⑧⑨⑩⑪⑫⑬⑭⑮]"), # ①
|
||||||
|
re.compile(r"^第[一二三四五六七八九十百]+[条章节]"), # 第一条
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def is_list_item(line: str) -> bool:
|
||||||
|
return any(p.match(line) for p in _LIST_PATTERNS)
|
||||||
|
|
||||||
|
|
||||||
|
def is_table_line(line: str) -> bool:
|
||||||
|
return bool(_TABLE_LINE_RE.match(line))
|
||||||
|
|
||||||
|
|
||||||
|
def is_heading(line: str) -> bool:
|
||||||
|
return bool(_HEADING_RE.match(line))
|
||||||
|
|
||||||
|
|
||||||
|
def is_blockquote(line: str) -> bool:
|
||||||
|
return bool(_BLOCKQUOTE_RE.match(line))
|
||||||
|
|
||||||
|
|
||||||
|
def is_hr(line: str) -> bool:
|
||||||
|
return bool(_HR_RE.match(line))
|
||||||
|
|
||||||
|
|
||||||
|
# ───────────────────────── 表格 ─────────────────────────
|
||||||
|
|
||||||
|
def _split_md_row(line: str) -> list[str]:
|
||||||
|
return [c.strip() for c in line.strip().strip("|").split("|")]
|
||||||
|
|
||||||
|
|
||||||
|
def _is_separator_row(cells: list[str]) -> bool:
|
||||||
|
return all(re.match(r"^[-:\s]+$", c) for c in cells if c != "")
|
||||||
|
|
||||||
|
|
||||||
|
def render_table(doc: Document, table_lines: list[str]) -> None:
|
||||||
rows: list[list[str]] = []
|
rows: list[list[str]] = []
|
||||||
for ln in table_lines:
|
for ln in table_lines:
|
||||||
cells = [c.strip() for c in ln.strip().strip("|").split("|")]
|
cells = _split_md_row(ln)
|
||||||
# skip pure separator row
|
if not cells or _is_separator_row(cells):
|
||||||
if all(re.match(r"^[-: ]+$", c) for c in cells):
|
|
||||||
continue
|
continue
|
||||||
rows.append(cells)
|
rows.append(cells)
|
||||||
if not rows:
|
if not rows:
|
||||||
|
|
@ -134,21 +282,91 @@ def _render_md_table(doc: Document, table_lines: list[str]) -> None:
|
||||||
for r in rows:
|
for r in rows:
|
||||||
while len(r) < n_cols:
|
while len(r) < n_cols:
|
||||||
r.append("")
|
r.append("")
|
||||||
|
|
||||||
table = doc.add_table(rows=len(rows), cols=n_cols)
|
table = doc.add_table(rows=len(rows), cols=n_cols)
|
||||||
table.style = "Light Grid Accent 1"
|
try:
|
||||||
|
table.style = "Light Grid Accent 1"
|
||||||
|
except KeyError:
|
||||||
|
pass # style 不存在就用默认
|
||||||
|
|
||||||
for ri, row in enumerate(rows):
|
for ri, row in enumerate(rows):
|
||||||
for ci, val in enumerate(row):
|
for ci, val in enumerate(row):
|
||||||
cell = table.rows[ri].cells[ci]
|
cell = table.rows[ri].cells[ci]
|
||||||
cell.text = "" # clear default
|
# 清掉 cell 默认空段落
|
||||||
|
cell.text = ""
|
||||||
p = cell.paragraphs[0]
|
p = cell.paragraphs[0]
|
||||||
run = p.add_run(val)
|
p.paragraph_format.first_line_indent = None
|
||||||
run.font.name = "Times New Roman"
|
p.paragraph_format.line_spacing = 1.2
|
||||||
run.font.size = Pt(10.5) # 五号
|
add_inline(p, val, size=Pt(10.5), cn_font="宋体")
|
||||||
_set_east_asia_font(run, "宋体")
|
|
||||||
if ri == 0:
|
if ri == 0:
|
||||||
run.bold = True
|
for run in p.runs:
|
||||||
|
run.bold = True
|
||||||
|
|
||||||
|
|
||||||
|
# ───────────────────────── 主渲染 ─────────────────────────
|
||||||
|
|
||||||
|
def render_md_block(doc: Document, md_text: str) -> None:
|
||||||
|
lines = md_text.splitlines()
|
||||||
|
i = 0
|
||||||
|
n = len(lines)
|
||||||
|
while i < n:
|
||||||
|
line = lines[i].rstrip()
|
||||||
|
|
||||||
|
# 空行
|
||||||
|
if not line.strip():
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 横线
|
||||||
|
if is_hr(line):
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 表格 (连续若干行 | ... | 视为一张表)
|
||||||
|
if is_table_line(line):
|
||||||
|
block: list[str] = []
|
||||||
|
while i < n and is_table_line(lines[i]):
|
||||||
|
block.append(lines[i])
|
||||||
|
i += 1
|
||||||
|
render_table(doc, block)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 标题
|
||||||
|
m = _HEADING_RE.match(line)
|
||||||
|
if m:
|
||||||
|
level = min(len(m.group(1)), 3)
|
||||||
|
add_heading(doc, m.group(2).strip(), level)
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 引用块 — 模板里多用作"写作提示", 不入正稿
|
||||||
|
if is_blockquote(line):
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 列表项 (含引文 [N]) — 各自独立成段, 不缩进首行
|
||||||
|
if is_list_item(line):
|
||||||
|
add_body_paragraph(doc, line.strip(), indent=False)
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 散文段落 — 合并下一空行 / 特殊行前的连续行
|
||||||
|
buf = [line.strip()]
|
||||||
|
j = i + 1
|
||||||
|
while j < n:
|
||||||
|
nxt = lines[j].rstrip()
|
||||||
|
if not nxt.strip():
|
||||||
|
break
|
||||||
|
if is_heading(nxt) or is_blockquote(nxt) or is_table_line(nxt) or is_list_item(nxt) or is_hr(nxt):
|
||||||
|
break
|
||||||
|
buf.append(nxt.strip())
|
||||||
|
j += 1
|
||||||
|
add_body_paragraph(doc, " ".join(buf), indent=True)
|
||||||
|
i = j
|
||||||
|
|
||||||
|
|
||||||
|
# ───────────────────────── 入口 ─────────────────────────
|
||||||
|
|
||||||
def render_sections(sections_dir: Path, out: Path, fund_type: str) -> None:
|
def render_sections(sections_dir: Path, out: Path, fund_type: str) -> None:
|
||||||
if not sections_dir.is_dir():
|
if not sections_dir.is_dir():
|
||||||
print(f"[ERR] sections dir not found: {sections_dir}", file=sys.stderr)
|
print(f"[ERR] sections dir not found: {sections_dir}", file=sys.stderr)
|
||||||
|
|
@ -159,30 +377,34 @@ def render_sections(sections_dir: Path, out: Path, fund_type: str) -> None:
|
||||||
sys.exit(2)
|
sys.exit(2)
|
||||||
|
|
||||||
doc = init_doc()
|
doc = init_doc()
|
||||||
|
add_toc(doc)
|
||||||
for f in md_files:
|
for f in md_files:
|
||||||
text = f.read_text(encoding="utf-8")
|
text = f.read_text(encoding="utf-8")
|
||||||
render_md_block(doc, text)
|
render_md_block(doc, text)
|
||||||
# 每个 section 后加分页, 让结构更清晰
|
|
||||||
doc.add_page_break()
|
doc.add_page_break()
|
||||||
|
|
||||||
out.parent.mkdir(parents=True, exist_ok=True)
|
out.parent.mkdir(parents=True, exist_ok=True)
|
||||||
doc.save(str(out))
|
doc.save(str(out))
|
||||||
|
|
||||||
# 统计
|
paras = sum(1 for _ in doc.paragraphs)
|
||||||
paras = sum(1 for p in doc.paragraphs)
|
|
||||||
chars = sum(len(p.text) for p in doc.paragraphs)
|
chars = sum(len(p.text) for p in doc.paragraphs)
|
||||||
print(f"[OK] rendered {len(md_files)} sections → {out}")
|
tbls = len(doc.tables)
|
||||||
print(f" paragraphs: {paras} | total chars: {chars}")
|
print(f"[OK] rendered {len(md_files)} sections -> {out}")
|
||||||
|
print(f" paragraphs: {paras} | tables: {tbls} | total chars: {chars}")
|
||||||
print(f" fund_type: {fund_type}")
|
print(f" fund_type: {fund_type}")
|
||||||
print(f" font: 中文宋体小四 / 英文 Times New Roman 小四 / 行距 1.5 / 首行缩进 2 字符")
|
print(f" font: 中文宋体小四 / 英文 Times New Roman 小四 / 行距 1.5 / 首行缩进 2 字符")
|
||||||
|
print(f" 提示: 在 Word 中打开后按 F9 (或右键目录 -> 更新域) 生成实际目录。")
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
def main() -> None:
|
||||||
ap = argparse.ArgumentParser(description="渲染章节 md → 申报书 docx")
|
ap = argparse.ArgumentParser(description="渲染章节 md → 申报书 docx")
|
||||||
ap.add_argument("sections_dir", type=Path, help="sections/*.md 目录")
|
ap.add_argument("sections_dir", type=Path, help="sections/*.md 目录")
|
||||||
ap.add_argument("--fund-type", required=True,
|
ap.add_argument(
|
||||||
choices=["key_rd", "major_project", "nsfc_joint_fund",
|
"--fund-type",
|
||||||
"nsfc_general", "nsfc_youth", "provincial", "enterprise"])
|
required=True,
|
||||||
|
choices=["key_rd", "major_project", "nsfc_joint_fund",
|
||||||
|
"nsfc_general", "nsfc_youth", "provincial", "enterprise"],
|
||||||
|
)
|
||||||
ap.add_argument("-o", "--output", type=Path, required=True, help="输出 .docx 路径")
|
ap.add_argument("-o", "--output", type=Path, required=True, help="输出 .docx 路径")
|
||||||
args = ap.parse_args()
|
args = ap.parse_args()
|
||||||
render_sections(args.sections_dir, args.output, args.fund_type)
|
render_sections(args.sections_dir, args.output, args.fund_type)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue