"""brief 简报体例 docx 渲染器(商务红主题 + 引文上标超链 + callout/底纹边框)。 brief 是三 profile 里最富的一支:书签锚点、内部/外部超链接、引文 [n]/[Wn] 上标回链、 参考条目 DOI 超链、概览信息带 / TL;DR 卡片 / 判断 callout、页脚页码域。这些 paper/proposal 都没有,故 brief 保留自己的渲染层,只从 rendering.common 复用叶子原语(字体/化学式/块级正则/ 表格行切分/图片路径)。函数体逐字移植自旧 skills/brief/scripts/render_docx.py。 """ from __future__ import annotations import re import sys from pathlib import Path from docx import Document from docx.enum.text import WD_ALIGN_PARAGRAPH from docx.opc.constants import RELATIONSHIP_TYPE as RT from docx.oxml import OxmlElement from docx.oxml.ns import qn from docx.shared import Cm, Pt, RGBColor from .common import ( set_run_fonts as _set_run_fonts, set_style_fonts as _set_style_fonts, set_subscript as _set_subscript, CHEM_RE as _CHEM_RE, INLINE_RE as _INLINE_RE, HEADING_RE as _HEADING_RE, TABLE_LINE_RE as _TABLE_LINE_RE, BLOCKQUOTE_RE as _BLOCKQUOTE_RE, HR_RE as _HR_RE, FENCE_RE as _FENCE_RE, IMAGE_LINE_RE as _IMAGE_LINE_RE, split_md_row as _split_md_row, is_separator_row as _is_sep_row, resolve_image_path as _resolve_image_path, MAX_IMG_WIDTH as _MAX_IMG_WIDTH, ) # ───────────────────────── 主题色 ───────────────────────── PRIMARY = "C00000" # 商务红主色 PRIMARY_RGB = RGBColor(0xC0, 0x00, 0x00) TLDR_FILL = "FBE9E9" # TL;DR 浅红底纹 CALLOUT_FILL = "F7DDDD" # 「判断」callout 底纹 LINK_BLUE = "1155CC" # 超链接蓝 TABLE_HEAD_FILL = "C00000" # ───────────────────────── 低层 OOXML 辅助 ───────────────────────── def _para_shading(paragraph, fill: str) -> None: pPr = paragraph._p.get_or_add_pPr() shd = OxmlElement("w:shd") shd.set(qn("w:val"), "clear") shd.set(qn("w:color"), "auto") shd.set(qn("w:fill"), fill) pPr.append(shd) def _para_border(paragraph, *, sides=("bottom",), color=PRIMARY, size=8, space=3) -> None: pPr = paragraph._p.get_or_add_pPr() pBdr = pPr.find(qn("w:pBdr")) if pBdr is None: pBdr = OxmlElement("w:pBdr") pPr.append(pBdr) for side in sides: el = OxmlElement(f"w:{side}") el.set(qn("w:val"), "single") el.set(qn("w:sz"), str(size)) el.set(qn("w:space"), str(space)) el.set(qn("w:color"), color) pBdr.append(el) def _add_bookmark(paragraph, name: str, bm_id: int) -> None: start = OxmlElement("w:bookmarkStart") start.set(qn("w:id"), str(bm_id)) start.set(qn("w:name"), name) end = OxmlElement("w:bookmarkEnd") end.set(qn("w:id"), str(bm_id)) paragraph._p.insert(0, start) paragraph._p.append(end) def _mk_run_xml(text: str, *, size_pt: float, color=None, superscript=False, underline=False, bold=False, cn_font="宋体", en_font="Times New Roman"): r = OxmlElement("w:r") rPr = OxmlElement("w:rPr") rFonts = OxmlElement("w:rFonts") rFonts.set(qn("w:eastAsia"), cn_font) rFonts.set(qn("w:ascii"), en_font) rFonts.set(qn("w:hAnsi"), en_font) rPr.append(rFonts) if bold: rPr.append(OxmlElement("w:b")) if color: c = OxmlElement("w:color") c.set(qn("w:val"), color) rPr.append(c) if underline: u = OxmlElement("w:u") u.set(qn("w:val"), "single") rPr.append(u) if superscript: va = OxmlElement("w:vertAlign") va.set(qn("w:val"), "superscript") rPr.append(va) sz = OxmlElement("w:sz") sz.set(qn("w:val"), str(int(size_pt * 2))) rPr.append(sz) r.append(rPr) t = OxmlElement("w:t") t.set(qn("xml:space"), "preserve") t.text = text r.append(t) return r def add_internal_link(paragraph, anchor: str, text: str, *, size_pt: float, color=PRIMARY, superscript=False) -> None: h = OxmlElement("w:hyperlink") h.set(qn("w:anchor"), anchor) h.append(_mk_run_xml(text, size_pt=size_pt, color=color, superscript=superscript)) paragraph._p.append(h) def add_external_link(paragraph, url: str, text: str, *, size_pt: float) -> None: part = paragraph.part r_id = part.relate_to(url, RT.HYPERLINK, is_external=True) h = OxmlElement("w:hyperlink") h.set(qn("r:id"), r_id) h.append(_mk_run_xml(text, size_pt=size_pt, color=LINK_BLUE, underline=True)) paragraph._p.append(h) # ───────────────────────── 文档初始化 ───────────────────────── def init_doc(color: bool) -> Document: doc = Document() section = doc.sections[0] section.page_height = Cm(29.7) section.page_width = Cm(21) for m in ("top_margin", "bottom_margin", "left_margin", "right_margin"): setattr(section, m, Cm(2.5)) normal = doc.styles["Normal"] normal.font.name = "Times New Roman" normal.font.size = Pt(12) _set_style_fonts(normal, cn_font="宋体") pf = normal.paragraph_format pf.line_spacing = 1.5 pf.space_before = Pt(0) pf.space_after = Pt(0) head_color = PRIMARY_RGB if color else RGBColor(0, 0, 0) for lvl, sz, cn in [(1, Pt(18), "黑体"), (2, Pt(14), "黑体"), (3, Pt(12), "黑体")]: h = doc.styles[f"Heading {lvl}"] h.font.name = "Times New Roman" h.font.size = sz h.font.bold = True h.font.color.rgb = head_color _set_style_fonts(h, cn_font=cn) h.paragraph_format.line_spacing = 1.3 h.paragraph_format.space_before = Pt(10 if lvl <= 2 else 6) h.paragraph_format.space_after = Pt(4) h.paragraph_format.first_line_indent = None return doc # ───────────────────────── 内联:bold/italic/code + 引文 + 化学式 ───────────────────────── # 引文标记 [12] / [W3] _CITE_RE = re.compile(r"\[(W?\d+)\]") def _emit_chem(paragraph, text: str, *, size_pt: float, cn_font: str) -> None: """把白名单化学式里的数字渲成下标,其余正常。""" pos = 0 for m in _CHEM_RE.finditer(text): if m.start() > pos: _emit_plain_run(paragraph, text[pos:m.start()], size_pt=size_pt, cn_font=cn_font) formula = m.group(0) buf = "" for ch in formula: if ch.isdigit(): if buf: _emit_plain_run(paragraph, buf, size_pt=size_pt, cn_font=cn_font) buf = "" sub = paragraph.add_run(ch) sub.font.size = Pt(size_pt) _set_run_fonts(sub, cn_font=cn_font, en_font="Times New Roman") _set_subscript(sub) else: buf += ch if buf: _emit_plain_run(paragraph, buf, size_pt=size_pt, cn_font=cn_font) pos = m.end() if pos < len(text): _emit_plain_run(paragraph, text[pos:], size_pt=size_pt, cn_font=cn_font) def _emit_plain_run(paragraph, text: str, *, size_pt: float, cn_font: str) -> None: if not text: return run = paragraph.add_run(text) run.font.size = Pt(size_pt) _set_run_fonts(run, cn_font=cn_font, en_font="Times New Roman") def _emit_plain_with_cites(paragraph, text: str, *, size_pt: float, cn_font: str, make_citations: bool) -> None: """plain 段:处理引文上标超链接 + 化学式下标。""" if not make_citations: _emit_chem(paragraph, text, size_pt=size_pt, cn_font=cn_font) return pos = 0 prev_end = None for m in _CITE_RE.finditer(text): if m.start() > pos: _emit_chem(paragraph, text[pos:m.start()], size_pt=size_pt, cn_font=cn_font) # 连续 [1][3] 之间补一个上标逗号 if prev_end is not None and m.start() == prev_end: comma = paragraph.add_run(",") comma.font.size = Pt(size_pt * 0.85) comma.font.color.rgb = PRIMARY_RGB _set_subscript_super(comma) cid = m.group(1) add_internal_link(paragraph, f"ref_{cid}", cid, size_pt=size_pt * 0.85, color=PRIMARY, superscript=True) prev_end = m.end() pos = m.end() if pos < len(text): _emit_chem(paragraph, text[pos:], size_pt=size_pt, cn_font=cn_font) def _set_subscript_super(run) -> None: rPr = run._element.get_or_add_rPr() va = OxmlElement("w:vertAlign") va.set(qn("w:val"), "superscript") rPr.append(va) def add_inline_rich(paragraph, text: str, *, size_pt=12.0, cn_font="宋体", make_citations=True) -> None: pos = 0 for m in _INLINE_RE.finditer(text): if m.start() > pos: _emit_plain_with_cites(paragraph, text[pos:m.start()], size_pt=size_pt, cn_font=cn_font, make_citations=make_citations) if m.group("bold"): run = paragraph.add_run(m.group("bold_t")) run.bold = True run.font.size = Pt(size_pt) _set_run_fonts(run, cn_font=cn_font) elif m.group("italic"): run = paragraph.add_run(m.group("italic_t")) run.italic = True run.font.size = Pt(size_pt) _set_run_fonts(run, cn_font=cn_font) elif m.group("code"): run = paragraph.add_run(m.group("code_t")) run.font.size = Pt(size_pt) _set_run_fonts(run, cn_font=cn_font, en_font="Consolas") pos = m.end() if pos < len(text): _emit_plain_with_cites(paragraph, text[pos:], size_pt=size_pt, cn_font=cn_font, make_citations=make_citations) # ───────────────────────── 标题 / 段落 ───────────────────────── def add_heading(doc: Document, text: str, level: int, color: bool) -> None: p = doc.add_paragraph(style=f"Heading {level}") p.paragraph_format.first_line_indent = None sizes = {1: 18.0, 2: 14.0, 3: 12.0} if level == 1: p.alignment = WD_ALIGN_PARAGRAPH.CENTER add_inline_rich(p, text, size_pt=sizes[level], cn_font="黑体", make_citations=False) for run in p.runs: run.bold = True if color and level <= 2: _para_border(p, sides=("bottom",), color=PRIMARY, size=(12 if level == 1 else 6)) elif color and level == 3: p.paragraph_format.left_indent = Pt(8) _para_border(p, sides=("left",), color=PRIMARY, size=20, space=6) def add_body_paragraph(doc: Document, text: str, *, indent=True) -> None: p = doc.add_paragraph() pf = p.paragraph_format pf.line_spacing = 1.5 pf.first_line_indent = Pt(24) if indent else None p.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY add_inline_rich(p, text) def add_callout(doc: Document, text: str, fill: str, color: bool) -> None: """判断 / 引用块类强调框:底纹 + 左红条。""" p = doc.add_paragraph() pf = p.paragraph_format pf.line_spacing = 1.4 pf.first_line_indent = None pf.left_indent = Pt(8) pf.space_before = Pt(3) pf.space_after = Pt(3) if color: _para_shading(p, fill) _para_border(p, sides=("left",), color=PRIMARY, size=22, space=5) add_inline_rich(p, text) def add_meta_band(doc: Document, text: str, color: bool) -> None: """标题下方的信息带(方向/时间窗/深度/数据源/受众):居中浅红底纹 + 上下细线。""" p = doc.add_paragraph() pf = p.paragraph_format pf.first_line_indent = None pf.space_before = Pt(2) pf.space_after = Pt(12) pf.line_spacing = 1.35 p.alignment = WD_ALIGN_PARAGRAPH.CENTER if color: _para_shading(p, "F3DADA") _para_border(p, sides=("top", "bottom"), color=PRIMARY, size=6, space=3) add_inline_rich(p, text, size_pt=10.5, make_citations=False) def add_tldr_card(doc: Document, text: str, color: bool) -> None: """TL;DR 要点:每条做成浅红左条卡片,堆叠成卡片列。""" p = doc.add_paragraph() pf = p.paragraph_format pf.first_line_indent = None pf.left_indent = Pt(10) pf.space_before = Pt(1) pf.space_after = Pt(3) pf.line_spacing = 1.3 if color: _para_shading(p, TLDR_FILL) _para_border(p, sides=("left",), color=PRIMARY, size=26, space=6) add_inline_rich(p, text, size_pt=11.0) def _add_field(paragraph, instr: str) -> None: run = paragraph.add_run() for typ, payload in (("begin", None), ("instr", instr), ("separate", None), ("end", None)): if typ == "instr": el = OxmlElement("w:instrText") el.set(qn("xml:space"), "preserve") el.text = payload else: el = OxmlElement("w:fldChar") el.set(qn("w:fldCharType"), typ) run._r.append(el) def add_page_footer(doc: Document, color: bool) -> None: p = doc.sections[0].footer.paragraphs[0] p.alignment = WD_ALIGN_PARAGRAPH.CENTER pre = p.add_run("第 ") _add_field(p, " PAGE ") post = p.add_run(" 页") for r in p.runs: r.font.size = Pt(9) if color: r.font.color.rgb = PRIMARY_RGB _set_run_fonts(r, cn_font="宋体") # ───────────────────────── 参考文献条目(可点击)───────────────────────── _REF_RE = re.compile(r"^\[(W?\d+)\]\s+(.+)$") _DOI_RE = re.compile(r"^10\.\d{4,9}/\S+$") _DOI_INLINE_RE = re.compile(r"10\.\d{4,9}/\S+") # 条目内 DOI 子串(论文列表条目末尾常带 "DOI: 10.xxx") _URL_TOKEN_RE = re.compile(r"([a-z0-9][\w.\-]*\.[a-z]{2,}(?:/[^\s]+)?)", re.IGNORECASE) def add_reference_item(doc: Document, cid: str, value: str, bm_id: int, color: bool) -> None: p = doc.add_paragraph() pf = p.paragraph_format pf.first_line_indent = None pf.left_indent = Pt(18) pf.line_spacing = 1.3 _add_bookmark(p, f"ref_{cid}", bm_id) # 编号标签 [n] lab = p.add_run(f"[{cid}] ") lab.bold = True lab.font.size = Pt(10.5) if color: lab.font.color.rgb = PRIMARY_RGB _set_run_fonts(lab, cn_font="宋体") value = value.strip() if _DOI_RE.match(value): add_external_link(p, f"https://doi.org/{value}", value, size_pt=10.5) return # 论文列表条目:行内含 DOI(如 "<标题>. <作者>, <刊>, 2026-03. DOI: 10.1016/...") # → 把 DOI 子串做成超链接,前后文正常 m_doi = _DOI_INLINE_RE.search(value) if m_doi: doi = m_doi.group(0).rstrip(".,;)") pre, post = value[:m_doi.start()], value[m_doi.start() + len(doi):] if pre: _emit_plain_run(p, pre, size_pt=10.5, cn_font="宋体") add_external_link(p, f"https://doi.org/{doi}", doi, size_pt=10.5) if post: _emit_plain_run(p, post, size_pt=10.5, cn_font="宋体") return # web 条目:把第一个像 URL 的 token 变成超链接 m = _URL_TOKEN_RE.search(value) if m and ("/" in m.group(1) or m.group(1).count(".") >= 1) and " " not in m.group(1): pre, mid, post = value[:m.start()], m.group(1), value[m.end():] _emit_plain_run(p, pre, size_pt=10.5, cn_font="宋体") url = mid if mid.startswith("http") else f"https://{mid}" add_external_link(p, url, mid, size_pt=10.5) if post: _emit_plain_run(p, post, size_pt=10.5, cn_font="宋体") else: _emit_plain_run(p, value, size_pt=10.5, cn_font="宋体") # ───────────────────────── 行类型识别(brief 专属列表模式)───────────────────────── _LIST_PATTERNS = [ re.compile(r"^[-*+]\s"), re.compile(r"^\d+[\.、.]\s*"), re.compile(r"^\(\d+\)\s*"), re.compile(r"^(\d+)\s*"), re.compile(r"^[①②③④⑤⑥⑦⑧⑨⑩⑪⑫⑬⑭⑮]"), ] def is_list_item(line: str) -> bool: return any(p.match(line) for p in _LIST_PATTERNS) # ───────────────────────── 表格 ───────────────────────── def render_table(doc: Document, table_lines: list[str], color: bool) -> None: rows = [] for ln in table_lines: cells = _split_md_row(ln) if not cells or _is_sep_row(cells): continue rows.append(cells) if not rows: return n_cols = max(len(r) for r in rows) for r in rows: while len(r) < n_cols: r.append("") table = doc.add_table(rows=len(rows), cols=n_cols) try: table.style = "Table Grid" except KeyError: pass for ri, row in enumerate(rows): for ci, val in enumerate(row): cell = table.rows[ri].cells[ci] cell.text = "" p = cell.paragraphs[0] p.paragraph_format.first_line_indent = None p.paragraph_format.line_spacing = 1.2 add_inline_rich(p, val, size_pt=10.5, cn_font="宋体", make_citations=False) if ri == 0: if color: _para_shading(p, TABLE_HEAD_FILL) for run in p.runs: run.bold = True if color: run.font.color.rgb = RGBColor(0xFF, 0xFF, 0xFF) # ───────────────────────── 图片 ───────────────────────── def add_image(doc: Document, png_path: Path, caption: str | None, ctx: dict) -> None: p = doc.add_paragraph() p.alignment = WD_ALIGN_PARAGRAPH.CENTER p.paragraph_format.first_line_indent = None p.paragraph_format.space_before = Pt(6) p.paragraph_format.space_after = Pt(3) run = p.add_run() try: run.add_picture(str(png_path), width=_MAX_IMG_WIDTH) except Exception as e: run.add_text(f"[image failed: {png_path.name}: {e}]") return ctx["fig_no"] = ctx.get("fig_no", 0) + 1 cap_p = doc.add_paragraph() cap_p.alignment = WD_ALIGN_PARAGRAPH.CENTER cap_p.paragraph_format.first_line_indent = None cap_p.paragraph_format.space_after = Pt(6) cap_text = f"图 {ctx['fig_no']} {caption}" if caption else f"图 {ctx['fig_no']}" cap_run = cap_p.add_run(cap_text) cap_run.font.size = Pt(10.5) cap_run.bold = True _set_run_fonts(cap_run, cn_font="宋体") # ───────────────────────── 主渲染 ───────────────────────── def render_md_block(doc: Document, md_text: str, ctx: dict) -> None: color = ctx["color"] lines = md_text.splitlines() i, n = 0, len(lines) in_refs = False # 进入「参考文献」段后,[n] 行按引文条目渲染 expect_meta = False # 紧跟 H1 标题的信息带(方向/时间窗...) in_tldr = False # 「一句话要点」段:列表项做卡片 while i < n: line = lines[i].rstrip() if not line.strip(): i += 1 continue if _HR_RE.match(line): i += 1 continue m_img = _IMAGE_LINE_RE.match(line) if m_img: png = _resolve_image_path(m_img.group("src"), ctx["sections_dir"]) if png is not None: add_image(doc, png, m_img.group("cap").strip() or None, ctx) else: add_body_paragraph(doc, f"[image missing: {m_img.group('src')}]", indent=False) i += 1 continue m_fence = _FENCE_RE.match(line) if m_fence: fence = m_fence.group(1) code = [] i += 1 while i < n: mc = _FENCE_RE.match(lines[i]) if mc and mc.group(1)[0] == fence[0] and len(mc.group(1)) >= len(fence): i += 1 break code.append(lines[i]) i += 1 for ln in code: p = doc.add_paragraph() p.paragraph_format.first_line_indent = None p.paragraph_format.line_spacing = 1.0 run = p.add_run(ln if ln else " ") run.font.size = Pt(10.5) _set_run_fonts(run, cn_font="新宋体", en_font="Consolas") continue if _TABLE_LINE_RE.match(line): block = [] while i < n and _TABLE_LINE_RE.match(lines[i]): block.append(lines[i]) i += 1 render_table(doc, block, color) continue m = _HEADING_RE.match(line) if m: title = m.group(2).strip() level = min(len(m.group(1)), 3) # 只在 H1/H2 重判段类型 —— 让「重要论文列表」段下的 ### 期刊子标题不重置 in_refs, # 子标题下的 [n] 条目才能继续按参考锚点渲染(带 DOI 超链接) if level <= 2: in_refs = ("参考文献" in title) or ("论文列表" in title) or ("文献列表" in title) expect_meta = (level == 1) if level <= 2: in_tldr = ("要点" in title) or ("TL;DR" in title.upper()) add_heading(doc, title, level, color) i += 1 continue if _BLOCKQUOTE_RE.match(line): # 引用块:并合连续 > 行,做浅红 callout(说明 / 取舍纪律等) buf = [_BLOCKQUOTE_RE.sub("", line).strip()] i += 1 while i < n and _BLOCKQUOTE_RE.match(lines[i]): buf.append(_BLOCKQUOTE_RE.sub("", lines[i]).strip()) i += 1 add_callout(doc, " ".join(buf), TLDR_FILL, color) continue # 参考文献条目 if in_refs: m_ref = _REF_RE.match(line.strip()) if m_ref: ctx["bm_id"] += 1 add_reference_item(doc, m_ref.group(1), m_ref.group(2), ctx["bm_id"], color) i += 1 continue # 「判断」强调行 → callout if line.strip().startswith("**判断**"): add_callout(doc, line.strip(), CALLOUT_FILL, color) i += 1 continue if is_list_item(line): if in_tldr: add_tldr_card(doc, line.strip(), color) else: add_body_paragraph(doc, line.strip(), indent=False) i += 1 continue # 紧跟标题的信息带 if expect_meta and ("时间窗" in line): add_meta_band(doc, line.strip(), color) expect_meta = False i += 1 continue # 普通段落:并合软换行 buf = [line.strip()] j = i + 1 while j < n: nxt = lines[j].rstrip() if not nxt.strip() or _HEADING_RE.match(nxt) or _BLOCKQUOTE_RE.match(nxt) \ or _TABLE_LINE_RE.match(nxt) or is_list_item(nxt) or _HR_RE.match(nxt): break buf.append(nxt.strip()) j += 1 add_body_paragraph(doc, " ".join(buf), indent=True) i = j # ───────────────────────── 入口 ───────────────────────── def render_sections(sections_dir: Path, out: Path, color: bool) -> None: if not sections_dir.is_dir(): print(f"[ERR] sections dir not found: {sections_dir}", file=sys.stderr) sys.exit(2) md_files = sorted(sections_dir.glob("*.md")) if not md_files: print(f"[ERR] no .md found in {sections_dir}", file=sys.stderr) sys.exit(2) ctx = { "sections_dir": sections_dir, "figures_dir": sections_dir.parent / "figures", "fig_no": 0, "bm_id": 0, "color": color, } doc = init_doc(color) add_page_footer(doc, color) for idx, f in enumerate(md_files): render_md_block(doc, f.read_text(encoding="utf-8"), ctx) if idx != len(md_files) - 1: doc.add_page_break() out.parent.mkdir(parents=True, exist_ok=True) doc.save(str(out)) paras = sum(1 for _ in doc.paragraphs) chars = sum(len(p.text) for p in doc.paragraphs) print(f"[OK] rendered {len(md_files)} sections -> {out}") print(f" profile: brief | paragraphs: {paras} | tables: {len(doc.tables)} | " f"figures: {ctx['fig_no']} | chars: {chars} | theme: {'商务红' if color else '黑白'}")