"""Smoke: render_docx.py 图片 + mermaid 缓存路径。 构造一个临时 sections/, figures/ 结构, 跑 render_docx, 验证: - mermaid 块 hash 在 figures/ 有对应 png → 走插图路径 - mermaid 块 hash 在 figures/ 没 png → 走 ASCII fallback (不崩, 文本保留) - ![](path) 直接图片 → 走插图路径 - 图编号自增 - inline_shapes 数 = 命中插图的次数 """ from __future__ import annotations import hashlib import os import shutil import subprocess import sys import tempfile from pathlib import Path ROOT = Path(__file__).resolve().parent.parent RENDER_DOCX = ROOT / "skills" / "proposal" / "scripts" / "render_docx.py" PYTHON = ROOT / ".venv" / "Scripts" / "python.exe" if not PYTHON.exists(): PYTHON = Path(sys.executable) # CI / unix fallback def _run_render(sections: Path, out: Path) -> subprocess.CompletedProcess: """跑 render_docx.py, 子进程强制 utf-8 输出 (Windows GBK stdout 兜底)。""" env = os.environ.copy() env["PYTHONIOENCODING"] = "utf-8" return subprocess.run( [str(PYTHON), str(RENDER_DOCX), str(sections), "--fund-type", "key_rd", "-o", str(out)], capture_output=True, text=True, encoding="utf-8", env=env, ) def mermaid_hash(source: str) -> str: return hashlib.sha1(source.strip().encode("utf-8")).hexdigest()[:10] def make_tiny_png(out: Path) -> None: """用 matplotlib 生成一张 1-bar 的真 png(确保 python-docx 能 add_picture)。""" import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt fig, ax = plt.subplots(figsize=(4, 2), dpi=100) ax.bar(["A", "B", "C"], [1, 3, 2], color="#c00000") ax.set_title("smoke") fig.savefig(str(out), bbox_inches="tight") plt.close(fig) def case(name: str): def deco(fn): def wrapped(*a, **kw): print(f"[case] {name} ...", end=" ") try: fn(*a, **kw) print("OK") except Exception as e: print(f"FAIL: {e}") raise return wrapped return deco @case("happy: cached mermaid + direct image + ASCII fallback") def smoke_happy(tmp: Path) -> None: sections = tmp / "sections" figures = tmp / "figures" sections.mkdir(parents=True) figures.mkdir(parents=True) # mermaid block #1 — 命中缓存 m1 = ( "%% caption: 关键问题与技术映射\n" "flowchart LR\n" " A --> B\n" ) h1 = mermaid_hash(m1) png1 = figures / f"fig_{h1}.png" make_tiny_png(png1) # mermaid block #2 — 缺缓存, 走 ASCII fallback m2 = ( "%% caption: 缺缓存的图\n" "flowchart TB\n" " X --> Y\n" ) # direct image — 自己造 png direct_png = figures / "direct.png" make_tiny_png(direct_png) # 写 .md (sections / "01_test.md").write_text( f"""# 测试章节 这是一段散文。**加粗** 与 *斜体* 应当正确解析。 ```mermaid {m1.rstrip()} ``` 正文继续。下面是一张缺缓存的 mermaid: ```mermaid {m2.rstrip()} ``` 再看一张直接引用的图: ![已有 PNG: 一柱形示例](../figures/direct.png) 末尾段。 """, encoding="utf-8", ) out = tmp / "test.docx" proc = _run_render(sections, out) assert proc.returncode == 0, f"render_docx exited {proc.returncode}\nSTDERR: {proc.stderr}\nSTDOUT: {proc.stdout}" assert out.is_file() and out.stat().st_size > 1000, f"output docx not produced: {out}" # 报告里应明确 figures: 2 (mermaid#1 + direct) assert "figures: 2" in proc.stdout, f"expected 'figures: 2' in stdout, got:\n{proc.stdout}" # 打开 docx 验内容 from docx import Document doc = Document(str(out)) # 真插图数(inline_shapes 计 add_picture)= 2 assert len(doc.inline_shapes) == 2, f"expected 2 inline shapes, got {len(doc.inline_shapes)}" all_text = "\n".join(p.text for p in doc.paragraphs) # 命中缓存的 mermaid 走图 + 题 assert "图 1" in all_text and "关键问题与技术映射" in all_text, "missing fig 1 caption" # direct 图也有题 assert "图 2" in all_text and "已有 PNG" in all_text, "missing fig 2 caption" # 缺缓存的 mermaid 走 ASCII fallback,源码保留 assert "flowchart TB" in all_text and "X --> Y" in all_text, "ASCII fallback didn't preserve mermaid source" # 缺缓存的不应该有 "图 3"(没插入图就不计数) assert "图 3" not in all_text, "ghost figure number for missed cache" @case("happy: no diagrams at all (regression: existing flows unchanged)") def smoke_no_diagrams(tmp: Path) -> None: sections = tmp / "sections" sections.mkdir(parents=True) (sections / "01.md").write_text( "# 标题\n\n散文段落。**加粗**。\n\n| 列 1 | 列 2 |\n|---|---|\n| a | b |\n", encoding="utf-8", ) out = tmp / "test.docx" proc = _run_render(sections, out) assert proc.returncode == 0, f"render_docx exited {proc.returncode}\nSTDERR: {proc.stderr}" assert "figures: 0" in proc.stdout, f"expected 'figures: 0' in stdout, got:\n{proc.stdout}" from docx import Document doc = Document(str(out)) assert len(doc.inline_shapes) == 0 assert len(doc.tables) == 1 # markdown table @case("render_diagrams: scans + hashes mermaid blocks, cache hit short-circuit") def smoke_render_diagrams(tmp: Path) -> None: """不依赖 mmdc / mermaid.ink:预先放 cache png, 期望 render_diagrams 全部 'cache' 命中。""" sys.path.insert(0, str(ROOT / "skills" / "proposal" / "scripts")) try: import render_diagrams as rd finally: sys.path.pop(0) sections = tmp / "sections" figures = tmp / "figures" sections.mkdir(parents=True) figures.mkdir(parents=True) m1 = "%% caption: 图甲\nflowchart LR\n A --> B\n" m2 = "flowchart TB\n X --> Y\n Y --> Z\n" (sections / "a.md").write_text( f"# A\n\n```mermaid\n{m1.rstrip()}\n```\n\n散文。\n\n```mermaid\n{m2.rstrip()}\n```\n", encoding="utf-8", ) (sections / "b.md").write_text("# B\n\n仅文本,无 mermaid。\n", encoding="utf-8") # 预填两个 png 让 render_one 走 cache 分支(避开网络) for src in (m1, m2): h = rd.mermaid_hash(src) (figures / f"fig_{h}.png").write_bytes(b"\x89PNG\r\n\x1a\nfake") # API 调用(不走 subprocess, 避免 stdout 编码再次干扰) rc = rd.render_sections(sections) assert rc == 0, f"render_sections rc={rc}" # caption 抽取 assert rd.extract_caption(m1) == "图甲" assert rd.extract_caption(m2) is None # find_mermaid_blocks 行为 text = (sections / "a.md").read_text(encoding="utf-8") blocks = rd.find_mermaid_blocks(text) assert len(blocks) == 2, f"expected 2 blocks, got {len(blocks)}" @case("missing image src → 占位文字, 不崩") def smoke_missing_image(tmp: Path) -> None: sections = tmp / "sections" sections.mkdir(parents=True) (sections / "01.md").write_text( "# 测试\n\n![不存在](figures/ghost.png)\n\n后面一段。\n", encoding="utf-8", ) out = tmp / "test.docx" proc = _run_render(sections, out) assert proc.returncode == 0, f"render_docx exited {proc.returncode}\nSTDERR: {proc.stderr}" from docx import Document doc = Document(str(out)) assert len(doc.inline_shapes) == 0 all_text = "\n".join(p.text for p in doc.paragraphs) assert "图片缺失" in all_text, "missing-image placeholder not rendered" assert "后面一段" in all_text, "後续段落丢了" def main() -> None: if not RENDER_DOCX.exists(): print(f"[ERR] render_docx.py not found: {RENDER_DOCX}", file=sys.stderr) sys.exit(2) with tempfile.TemporaryDirectory(prefix="zcbot_smoke_") as td: base = Path(td) smoke_happy(base / "happy") smoke_no_diagrams(base / "nodia") smoke_render_diagrams(base / "diag") smoke_missing_image(base / "ghost") print() print("[OK] all smoke cases passed") if __name__ == "__main__": main()