zcbot/skills/brief/scripts/render_docx.py

714 lines
26 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""把 sections/*.md 渲染成科研方向简报 .docx(简报体例,区别于 paper 的投稿稿)。
相对 paper/render_docx.py 的简报专属增强:
- **商务红配色**(主色 #C00000):标题分级染色 + 标题下细色条;TL;DR / 「判断」行做浅红底纹 callout
- **引文上标 + 内部超链接**:正文 [1] / [W3] → 上标红色,点击锚到文末参考文献对应条目
- **参考文献可点击**:DOI → https://doi.org/... 蓝色超链接;web 条目里的域名/路径 → https:// 超链接
- **化学式下标(白名单)**:CO2 / C3S2 / Na2O / SO4 ... → 真实下标,**白名单精确匹配**,不误伤 LC3 / EN 197-5 / 8.5 Mt / 2026
字体规范同院内其它渲染:中文宋体小四 / 英文 Times New Roman 小四 / 行距 1.5 / 首行缩进 2 字符。
支持 **加粗** / *斜体* / `等宽` / 列表 / 表格 / ![caption](png) 居中插图。
用法:
python render_docx.py <sections_dir> -o <out.docx>
python render_docx.py <sections_dir> --no-color -o <out.docx> # 关配色出纯黑白
"""
from __future__ import annotations
import argparse
import re
import sys
from pathlib import Path
from docx import Document
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.opc.constants import RELATIONSHIP_TYPE as RT
from docx.oxml import OxmlElement
from docx.oxml.ns import qn
from docx.shared import Cm, Pt, RGBColor
# ───────────────────────── 主题色 ─────────────────────────
PRIMARY = "C00000" # 商务红主色
PRIMARY_RGB = RGBColor(0xC0, 0x00, 0x00)
TLDR_FILL = "FBE9E9" # TL;DR 浅红底纹
CALLOUT_FILL = "F7DDDD" # 「判断」callout 底纹
LINK_BLUE = "1155CC" # 超链接蓝
TABLE_HEAD_FILL = "C00000"
# ───────────────────────── 字体 / 低层 OOXML 辅助 ─────────────────────────
def _set_run_fonts(run, *, cn_font="宋体", en_font="Times New Roman") -> None:
rPr = run._element.get_or_add_rPr()
rFonts = rPr.find(qn("w:rFonts"))
if rFonts is None:
rFonts = OxmlElement("w:rFonts")
rPr.append(rFonts)
rFonts.set(qn("w:eastAsia"), cn_font)
rFonts.set(qn("w:ascii"), en_font)
rFonts.set(qn("w:hAnsi"), en_font)
def _set_style_fonts(style, *, cn_font="宋体", en_font="Times New Roman") -> None:
el = style.element
rPr = el.find(qn("w:rPr"))
if rPr is None:
rPr = OxmlElement("w:rPr")
el.insert(0, rPr)
rFonts = rPr.find(qn("w:rFonts"))
if rFonts is None:
rFonts = OxmlElement("w:rFonts")
rPr.append(rFonts)
rFonts.set(qn("w:eastAsia"), cn_font)
rFonts.set(qn("w:ascii"), en_font)
rFonts.set(qn("w:hAnsi"), en_font)
def _set_subscript(run) -> None:
rPr = run._element.get_or_add_rPr()
va = OxmlElement("w:vertAlign")
va.set(qn("w:val"), "subscript")
rPr.append(va)
def _para_shading(paragraph, fill: str) -> None:
pPr = paragraph._p.get_or_add_pPr()
shd = OxmlElement("w:shd")
shd.set(qn("w:val"), "clear")
shd.set(qn("w:color"), "auto")
shd.set(qn("w:fill"), fill)
pPr.append(shd)
def _para_border(paragraph, *, sides=("bottom",), color=PRIMARY, size=8, space=3) -> None:
pPr = paragraph._p.get_or_add_pPr()
pBdr = pPr.find(qn("w:pBdr"))
if pBdr is None:
pBdr = OxmlElement("w:pBdr")
pPr.append(pBdr)
for side in sides:
el = OxmlElement(f"w:{side}")
el.set(qn("w:val"), "single")
el.set(qn("w:sz"), str(size))
el.set(qn("w:space"), str(space))
el.set(qn("w:color"), color)
pBdr.append(el)
def _add_bookmark(paragraph, name: str, bm_id: int) -> None:
start = OxmlElement("w:bookmarkStart")
start.set(qn("w:id"), str(bm_id))
start.set(qn("w:name"), name)
end = OxmlElement("w:bookmarkEnd")
end.set(qn("w:id"), str(bm_id))
paragraph._p.insert(0, start)
paragraph._p.append(end)
def _mk_run_xml(text: str, *, size_pt: float, color=None, superscript=False,
underline=False, bold=False, cn_font="宋体", en_font="Times New Roman"):
r = OxmlElement("w:r")
rPr = OxmlElement("w:rPr")
rFonts = OxmlElement("w:rFonts")
rFonts.set(qn("w:eastAsia"), cn_font)
rFonts.set(qn("w:ascii"), en_font)
rFonts.set(qn("w:hAnsi"), en_font)
rPr.append(rFonts)
if bold:
rPr.append(OxmlElement("w:b"))
if color:
c = OxmlElement("w:color")
c.set(qn("w:val"), color)
rPr.append(c)
if underline:
u = OxmlElement("w:u")
u.set(qn("w:val"), "single")
rPr.append(u)
if superscript:
va = OxmlElement("w:vertAlign")
va.set(qn("w:val"), "superscript")
rPr.append(va)
sz = OxmlElement("w:sz")
sz.set(qn("w:val"), str(int(size_pt * 2)))
rPr.append(sz)
r.append(rPr)
t = OxmlElement("w:t")
t.set(qn("xml:space"), "preserve")
t.text = text
r.append(t)
return r
def add_internal_link(paragraph, anchor: str, text: str, *, size_pt: float,
color=PRIMARY, superscript=False) -> None:
h = OxmlElement("w:hyperlink")
h.set(qn("w:anchor"), anchor)
h.append(_mk_run_xml(text, size_pt=size_pt, color=color, superscript=superscript))
paragraph._p.append(h)
def add_external_link(paragraph, url: str, text: str, *, size_pt: float) -> None:
part = paragraph.part
r_id = part.relate_to(url, RT.HYPERLINK, is_external=True)
h = OxmlElement("w:hyperlink")
h.set(qn("r:id"), r_id)
h.append(_mk_run_xml(text, size_pt=size_pt, color=LINK_BLUE, underline=True))
paragraph._p.append(h)
# ───────────────────────── 文档初始化 ─────────────────────────
def init_doc(color: bool) -> Document:
doc = Document()
section = doc.sections[0]
section.page_height = Cm(29.7)
section.page_width = Cm(21)
for m in ("top_margin", "bottom_margin", "left_margin", "right_margin"):
setattr(section, m, Cm(2.5))
normal = doc.styles["Normal"]
normal.font.name = "Times New Roman"
normal.font.size = Pt(12)
_set_style_fonts(normal, cn_font="宋体")
pf = normal.paragraph_format
pf.line_spacing = 1.5
pf.space_before = Pt(0)
pf.space_after = Pt(0)
head_color = PRIMARY_RGB if color else RGBColor(0, 0, 0)
for lvl, sz, cn in [(1, Pt(18), "黑体"), (2, Pt(14), "黑体"), (3, Pt(12), "黑体")]:
h = doc.styles[f"Heading {lvl}"]
h.font.name = "Times New Roman"
h.font.size = sz
h.font.bold = True
h.font.color.rgb = head_color
_set_style_fonts(h, cn_font=cn)
h.paragraph_format.line_spacing = 1.3
h.paragraph_format.space_before = Pt(10 if lvl <= 2 else 6)
h.paragraph_format.space_after = Pt(4)
h.paragraph_format.first_line_indent = None
return doc
# ───────────────────────── 内联:bold/italic/code 切分 ─────────────────────────
_INLINE_RE = re.compile(
r"(?P<bold>\*\*(?P<bold_t>[^*\n]+?)\*\*)"
r"|(?P<italic>(?<![\*\w])\*(?P<italic_t>[^*\n]+?)\*(?!\*))"
r"|(?P<code>`(?P<code_t>[^`\n]+?)`)"
)
# 引文标记 [12] / [W3]
_CITE_RE = re.compile(r"\[(W?\d+)\]")
# 化学式下标白名单(统一三处渲染器共用同一份;长的在前,\b 防误伤 LC3 / C595 / 2026;
# 不含 Ca2+ 这类带电荷的——它是上标不是下标,白名单不收即天然避开)
_CHEM_RE = re.compile(
r"Ca\(OH\)2|Mg\(OH\)2"
r"|\b(?:Al2O3|Fe2O3|Fe3O4|Mn2O3|Cr2O3|P2O5|Na2SO4|K2SO4|CaSO4|CaCO3|MgCO3|"
r"CaCl2|MgCl2|Na2O|K2O|SiO2|TiO2|ZrO2|SO4|SO3|SO2|CO3|CO2|NO3|NO2|PO4|"
r"H2O|NH3|CH4|C4AF|C3S2|C2AS|C3S|C2S|C3A|O2|N2|H2)\b"
)
def _emit_chem(paragraph, text: str, *, size_pt: float, cn_font: str) -> None:
"""把白名单化学式里的数字渲成下标,其余正常。"""
pos = 0
for m in _CHEM_RE.finditer(text):
if m.start() > pos:
_emit_plain_run(paragraph, text[pos:m.start()], size_pt=size_pt, cn_font=cn_font)
formula = m.group(0)
buf = ""
for ch in formula:
if ch.isdigit():
if buf:
_emit_plain_run(paragraph, buf, size_pt=size_pt, cn_font=cn_font)
buf = ""
sub = paragraph.add_run(ch)
sub.font.size = Pt(size_pt)
_set_run_fonts(sub, cn_font=cn_font, en_font="Times New Roman")
_set_subscript(sub)
else:
buf += ch
if buf:
_emit_plain_run(paragraph, buf, size_pt=size_pt, cn_font=cn_font)
pos = m.end()
if pos < len(text):
_emit_plain_run(paragraph, text[pos:], size_pt=size_pt, cn_font=cn_font)
def _emit_plain_run(paragraph, text: str, *, size_pt: float, cn_font: str) -> None:
if not text:
return
run = paragraph.add_run(text)
run.font.size = Pt(size_pt)
_set_run_fonts(run, cn_font=cn_font, en_font="Times New Roman")
def _emit_plain_with_cites(paragraph, text: str, *, size_pt: float, cn_font: str,
make_citations: bool) -> None:
"""plain 段:处理引文上标超链接 + 化学式下标。"""
if not make_citations:
_emit_chem(paragraph, text, size_pt=size_pt, cn_font=cn_font)
return
pos = 0
prev_end = None
for m in _CITE_RE.finditer(text):
if m.start() > pos:
_emit_chem(paragraph, text[pos:m.start()], size_pt=size_pt, cn_font=cn_font)
# 连续 [1][3] 之间补一个上标逗号
if prev_end is not None and m.start() == prev_end:
comma = paragraph.add_run(",")
comma.font.size = Pt(size_pt * 0.85)
comma.font.color.rgb = PRIMARY_RGB
_set_subscript_super(comma)
cid = m.group(1)
add_internal_link(paragraph, f"ref_{cid}", cid, size_pt=size_pt * 0.85,
color=PRIMARY, superscript=True)
prev_end = m.end()
pos = m.end()
if pos < len(text):
_emit_chem(paragraph, text[pos:], size_pt=size_pt, cn_font=cn_font)
def _set_subscript_super(run) -> None:
rPr = run._element.get_or_add_rPr()
va = OxmlElement("w:vertAlign")
va.set(qn("w:val"), "superscript")
rPr.append(va)
def add_inline_rich(paragraph, text: str, *, size_pt=12.0, cn_font="宋体",
make_citations=True) -> None:
pos = 0
for m in _INLINE_RE.finditer(text):
if m.start() > pos:
_emit_plain_with_cites(paragraph, text[pos:m.start()], size_pt=size_pt,
cn_font=cn_font, make_citations=make_citations)
if m.group("bold"):
run = paragraph.add_run(m.group("bold_t"))
run.bold = True
run.font.size = Pt(size_pt)
_set_run_fonts(run, cn_font=cn_font)
elif m.group("italic"):
run = paragraph.add_run(m.group("italic_t"))
run.italic = True
run.font.size = Pt(size_pt)
_set_run_fonts(run, cn_font=cn_font)
elif m.group("code"):
run = paragraph.add_run(m.group("code_t"))
run.font.size = Pt(size_pt)
_set_run_fonts(run, cn_font=cn_font, en_font="Consolas")
pos = m.end()
if pos < len(text):
_emit_plain_with_cites(paragraph, text[pos:], size_pt=size_pt,
cn_font=cn_font, make_citations=make_citations)
# ───────────────────────── 标题 / 段落 ─────────────────────────
def add_heading(doc: Document, text: str, level: int, color: bool) -> None:
p = doc.add_paragraph(style=f"Heading {level}")
p.paragraph_format.first_line_indent = None
sizes = {1: 18.0, 2: 14.0, 3: 12.0}
if level == 1:
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
add_inline_rich(p, text, size_pt=sizes[level], cn_font="黑体", make_citations=False)
for run in p.runs:
run.bold = True
if color and level <= 2:
_para_border(p, sides=("bottom",), color=PRIMARY, size=(12 if level == 1 else 6))
elif color and level == 3:
p.paragraph_format.left_indent = Pt(8)
_para_border(p, sides=("left",), color=PRIMARY, size=20, space=6)
def add_body_paragraph(doc: Document, text: str, *, indent=True) -> None:
p = doc.add_paragraph()
pf = p.paragraph_format
pf.line_spacing = 1.5
pf.first_line_indent = Pt(24) if indent else None
p.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
add_inline_rich(p, text)
def add_callout(doc: Document, text: str, fill: str, color: bool) -> None:
"""判断 / 引用块类强调框:底纹 + 左红条。"""
p = doc.add_paragraph()
pf = p.paragraph_format
pf.line_spacing = 1.4
pf.first_line_indent = None
pf.left_indent = Pt(8)
pf.space_before = Pt(3)
pf.space_after = Pt(3)
if color:
_para_shading(p, fill)
_para_border(p, sides=("left",), color=PRIMARY, size=22, space=5)
add_inline_rich(p, text)
def add_meta_band(doc: Document, text: str, color: bool) -> None:
"""标题下方的信息带(方向/时间窗/深度/数据源/受众):居中浅红底纹 + 上下细线。"""
p = doc.add_paragraph()
pf = p.paragraph_format
pf.first_line_indent = None
pf.space_before = Pt(2)
pf.space_after = Pt(12)
pf.line_spacing = 1.35
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
if color:
_para_shading(p, "F3DADA")
_para_border(p, sides=("top", "bottom"), color=PRIMARY, size=6, space=3)
add_inline_rich(p, text, size_pt=10.5, make_citations=False)
def add_tldr_card(doc: Document, text: str, color: bool) -> None:
"""TL;DR 要点:每条做成浅红左条卡片,堆叠成卡片列。"""
p = doc.add_paragraph()
pf = p.paragraph_format
pf.first_line_indent = None
pf.left_indent = Pt(10)
pf.space_before = Pt(1)
pf.space_after = Pt(3)
pf.line_spacing = 1.3
if color:
_para_shading(p, TLDR_FILL)
_para_border(p, sides=("left",), color=PRIMARY, size=26, space=6)
add_inline_rich(p, text, size_pt=11.0)
def _add_field(paragraph, instr: str) -> None:
run = paragraph.add_run()
for typ, payload in (("begin", None), ("instr", instr), ("separate", None), ("end", None)):
if typ == "instr":
el = OxmlElement("w:instrText")
el.set(qn("xml:space"), "preserve")
el.text = payload
else:
el = OxmlElement("w:fldChar")
el.set(qn("w:fldCharType"), typ)
run._r.append(el)
def add_page_footer(doc: Document, color: bool) -> None:
p = doc.sections[0].footer.paragraphs[0]
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
pre = p.add_run("")
_add_field(p, " PAGE ")
post = p.add_run("")
for r in p.runs:
r.font.size = Pt(9)
if color:
r.font.color.rgb = PRIMARY_RGB
_set_run_fonts(r, cn_font="宋体")
# ───────────────────────── 参考文献条目(可点击)─────────────────────────
_REF_RE = re.compile(r"^\[(W?\d+)\]\s+(.+)$")
_DOI_RE = re.compile(r"^10\.\d{4,9}/\S+$")
_URL_TOKEN_RE = re.compile(r"([a-z0-9][\w.\-]*\.[a-z]{2,}(?:/[^\s]+)?)", re.IGNORECASE)
def add_reference_item(doc: Document, cid: str, value: str, bm_id: int, color: bool) -> None:
p = doc.add_paragraph()
pf = p.paragraph_format
pf.first_line_indent = None
pf.left_indent = Pt(18)
pf.line_spacing = 1.3
_add_bookmark(p, f"ref_{cid}", bm_id)
# 编号标签 [n]
lab = p.add_run(f"[{cid}] ")
lab.bold = True
lab.font.size = Pt(10.5)
if color:
lab.font.color.rgb = PRIMARY_RGB
_set_run_fonts(lab, cn_font="宋体")
value = value.strip()
if _DOI_RE.match(value):
add_external_link(p, f"https://doi.org/{value}", value, size_pt=10.5)
return
# web 条目:把第一个像 URL 的 token 变成超链接
m = _URL_TOKEN_RE.search(value)
if m and ("/" in m.group(1) or m.group(1).count(".") >= 1) and " " not in m.group(1):
pre, mid, post = value[:m.start()], m.group(1), value[m.end():]
_emit_plain_run(p, pre, size_pt=10.5, cn_font="宋体")
url = mid if mid.startswith("http") else f"https://{mid}"
add_external_link(p, url, mid, size_pt=10.5)
if post:
_emit_plain_run(p, post, size_pt=10.5, cn_font="宋体")
else:
_emit_plain_run(p, value, size_pt=10.5, cn_font="宋体")
# ───────────────────────── 行类型识别 ─────────────────────────
_HEADING_RE = re.compile(r"^(#{1,6})\s+(.+)$")
_TABLE_LINE_RE = re.compile(r"^\s*\|.*\|\s*$")
_BLOCKQUOTE_RE = re.compile(r"^\s*>\s?")
_HR_RE = re.compile(r"^\s*-{3,}\s*$|^\s*={3,}\s*$|^\s*_{3,}\s*$")
_FENCE_RE = re.compile(r"^\s*(`{3,}|~{3,})\s*(\S*)\s*$")
_IMAGE_LINE_RE = re.compile(r"^\s*!\[(?P<cap>[^\]]*)\]\((?P<src>[^)\s]+)\)\s*$")
_MAX_IMG_WIDTH = Cm(15)
_LIST_PATTERNS = [
re.compile(r"^[-*+]\s"),
re.compile(r"^\d+[\.、.]\s*"),
re.compile(r"^\(\d+\)\s*"),
re.compile(r"^\d+\s*"),
re.compile(r"^[①②③④⑤⑥⑦⑧⑨⑩⑪⑫⑬⑭⑮]"),
]
def is_list_item(line: str) -> bool:
return any(p.match(line) for p in _LIST_PATTERNS)
# ───────────────────────── 表格 ─────────────────────────
def _split_md_row(line: str) -> list[str]:
return [c.strip() for c in line.strip().strip("|").split("|")]
def _is_sep_row(cells: list[str]) -> bool:
return all(re.match(r"^[-:\s]+$", c) for c in cells if c != "")
def render_table(doc: Document, table_lines: list[str], color: bool) -> None:
rows = []
for ln in table_lines:
cells = _split_md_row(ln)
if not cells or _is_sep_row(cells):
continue
rows.append(cells)
if not rows:
return
n_cols = max(len(r) for r in rows)
for r in rows:
while len(r) < n_cols:
r.append("")
table = doc.add_table(rows=len(rows), cols=n_cols)
try:
table.style = "Table Grid"
except KeyError:
pass
for ri, row in enumerate(rows):
for ci, val in enumerate(row):
cell = table.rows[ri].cells[ci]
cell.text = ""
p = cell.paragraphs[0]
p.paragraph_format.first_line_indent = None
p.paragraph_format.line_spacing = 1.2
add_inline_rich(p, val, size_pt=10.5, cn_font="宋体", make_citations=False)
if ri == 0:
if color:
_para_shading(p, TABLE_HEAD_FILL)
for run in p.runs:
run.bold = True
if color:
run.font.color.rgb = RGBColor(0xFF, 0xFF, 0xFF)
# ───────────────────────── 图片 ─────────────────────────
def _resolve_image_path(src: str, base_dir: Path) -> Path | None:
p = Path(src)
if not p.is_absolute():
p = (base_dir / p).resolve()
return p if p.is_file() else None
def add_image(doc: Document, png_path: Path, caption: str | None, ctx: dict) -> None:
p = doc.add_paragraph()
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
p.paragraph_format.first_line_indent = None
p.paragraph_format.space_before = Pt(6)
p.paragraph_format.space_after = Pt(3)
run = p.add_run()
try:
run.add_picture(str(png_path), width=_MAX_IMG_WIDTH)
except Exception as e:
run.add_text(f"[image failed: {png_path.name}: {e}]")
return
ctx["fig_no"] = ctx.get("fig_no", 0) + 1
cap_p = doc.add_paragraph()
cap_p.alignment = WD_ALIGN_PARAGRAPH.CENTER
cap_p.paragraph_format.first_line_indent = None
cap_p.paragraph_format.space_after = Pt(6)
cap_text = f"{ctx['fig_no']} {caption}" if caption else f"{ctx['fig_no']}"
cap_run = cap_p.add_run(cap_text)
cap_run.font.size = Pt(10.5)
cap_run.bold = True
_set_run_fonts(cap_run, cn_font="宋体")
# ───────────────────────── 主渲染 ─────────────────────────
def render_md_block(doc: Document, md_text: str, ctx: dict) -> None:
color = ctx["color"]
lines = md_text.splitlines()
i, n = 0, len(lines)
in_refs = False # 进入「参考文献」段后,[n] 行按引文条目渲染
expect_meta = False # 紧跟 H1 标题的信息带(方向/时间窗...)
in_tldr = False # 「一句话要点」段:列表项做卡片
while i < n:
line = lines[i].rstrip()
if not line.strip():
i += 1
continue
if _HR_RE.match(line):
i += 1
continue
m_img = _IMAGE_LINE_RE.match(line)
if m_img:
png = _resolve_image_path(m_img.group("src"), ctx["sections_dir"])
if png is not None:
add_image(doc, png, m_img.group("cap").strip() or None, ctx)
else:
add_body_paragraph(doc, f"[image missing: {m_img.group('src')}]", indent=False)
i += 1
continue
m_fence = _FENCE_RE.match(line)
if m_fence:
fence = m_fence.group(1)
code = []
i += 1
while i < n:
mc = _FENCE_RE.match(lines[i])
if mc and mc.group(1)[0] == fence[0] and len(mc.group(1)) >= len(fence):
i += 1
break
code.append(lines[i])
i += 1
for ln in code:
p = doc.add_paragraph()
p.paragraph_format.first_line_indent = None
p.paragraph_format.line_spacing = 1.0
run = p.add_run(ln if ln else " ")
run.font.size = Pt(10.5)
_set_run_fonts(run, cn_font="新宋体", en_font="Consolas")
continue
if _TABLE_LINE_RE.match(line):
block = []
while i < n and _TABLE_LINE_RE.match(lines[i]):
block.append(lines[i])
i += 1
render_table(doc, block, color)
continue
m = _HEADING_RE.match(line)
if m:
title = m.group(2).strip()
level = min(len(m.group(1)), 3)
in_refs = "参考文献" in title
expect_meta = (level == 1)
if level <= 2:
in_tldr = ("要点" in title) or ("TL;DR" in title.upper())
add_heading(doc, title, level, color)
i += 1
continue
if _BLOCKQUOTE_RE.match(line):
# 引用块:并合连续 > 行,做浅红 callout(说明 / 取舍纪律等)
buf = [_BLOCKQUOTE_RE.sub("", line).strip()]
i += 1
while i < n and _BLOCKQUOTE_RE.match(lines[i]):
buf.append(_BLOCKQUOTE_RE.sub("", lines[i]).strip())
i += 1
add_callout(doc, " ".join(buf), TLDR_FILL, color)
continue
# 参考文献条目
if in_refs:
m_ref = _REF_RE.match(line.strip())
if m_ref:
ctx["bm_id"] += 1
add_reference_item(doc, m_ref.group(1), m_ref.group(2), ctx["bm_id"], color)
i += 1
continue
# 「判断」强调行 → callout
if line.strip().startswith("**判断**"):
add_callout(doc, line.strip(), CALLOUT_FILL, color)
i += 1
continue
if is_list_item(line):
if in_tldr:
add_tldr_card(doc, line.strip(), color)
else:
add_body_paragraph(doc, line.strip(), indent=False)
i += 1
continue
# 紧跟标题的信息带
if expect_meta and ("时间窗" in line):
add_meta_band(doc, line.strip(), color)
expect_meta = False
i += 1
continue
# 普通段落:并合软换行
buf = [line.strip()]
j = i + 1
while j < n:
nxt = lines[j].rstrip()
if not nxt.strip() or _HEADING_RE.match(nxt) or _BLOCKQUOTE_RE.match(nxt) \
or _TABLE_LINE_RE.match(nxt) or is_list_item(nxt) or _HR_RE.match(nxt):
break
buf.append(nxt.strip())
j += 1
add_body_paragraph(doc, " ".join(buf), indent=True)
i = j
def render_sections(sections_dir: Path, out: Path, color: bool) -> None:
if not sections_dir.is_dir():
print(f"[ERR] sections dir not found: {sections_dir}", file=sys.stderr)
sys.exit(2)
md_files = sorted(sections_dir.glob("*.md"))
if not md_files:
print(f"[ERR] no .md found in {sections_dir}", file=sys.stderr)
sys.exit(2)
ctx = {
"sections_dir": sections_dir,
"figures_dir": sections_dir.parent / "figures",
"fig_no": 0,
"bm_id": 0,
"color": color,
}
doc = init_doc(color)
add_page_footer(doc, color)
for idx, f in enumerate(md_files):
render_md_block(doc, f.read_text(encoding="utf-8"), ctx)
if idx != len(md_files) - 1:
doc.add_page_break()
out.parent.mkdir(parents=True, exist_ok=True)
doc.save(str(out))
paras = sum(1 for _ in doc.paragraphs)
chars = sum(len(p.text) for p in doc.paragraphs)
print(f"[OK] rendered {len(md_files)} sections -> {out}")
print(f" paragraphs: {paras} | tables: {len(doc.tables)} | figures: {ctx['fig_no']} | chars: {chars}")
print(f" theme: {'商务红 #C00000' if color else '黑白'} | 引文上标+超链接 | 化学式下标白名单")
def main() -> None:
ap = argparse.ArgumentParser(description="渲染章节 md → 科研方向简报 docx")
ap.add_argument("sections_dir", type=Path, help="sections/*.md 目录")
ap.add_argument("--no-color", dest="color", action="store_false",
help="关配色,出纯黑白(默认商务红主题)")
ap.add_argument("-o", "--output", type=Path, required=True, help="输出 .docx 路径")
args = ap.parse_args()
render_sections(args.sections_dir, args.output, args.color)
if __name__ == "__main__":
main()