zcbot/scripts/_smoke_proposal_diagrams.py

245 lines
7.9 KiB
Python

"""Smoke: render_docx.py 图片 + mermaid 缓存路径。
构造一个临时 sections/, figures/ 结构, 跑 render_docx, 验证:
- mermaid 块 hash 在 figures/ 有对应 png → 走插图路径
- mermaid 块 hash 在 figures/ 没 png → 走 ASCII fallback (不崩, 文本保留)
- ![](path) 直接图片 → 走插图路径
- 图编号自增
- inline_shapes 数 = 命中插图的次数
"""
from __future__ import annotations
import hashlib
import os
import shutil
import subprocess
import sys
import tempfile
from pathlib import Path
ROOT = Path(__file__).resolve().parent.parent
RENDER_DOCX = ROOT / "skills" / "proposal" / "scripts" / "render_docx.py"
PYTHON = ROOT / ".venv" / "Scripts" / "python.exe"
if not PYTHON.exists():
PYTHON = Path(sys.executable) # CI / unix fallback
def _run_render(sections: Path, out: Path) -> subprocess.CompletedProcess:
"""跑 render_docx.py, 子进程强制 utf-8 输出 (Windows GBK stdout 兜底)。"""
env = os.environ.copy()
env["PYTHONIOENCODING"] = "utf-8"
return subprocess.run(
[str(PYTHON), str(RENDER_DOCX), str(sections), "--fund-type", "key_rd", "-o", str(out)],
capture_output=True, text=True, encoding="utf-8", env=env,
)
def mermaid_hash(source: str) -> str:
return hashlib.sha1(source.strip().encode("utf-8")).hexdigest()[:10]
def make_tiny_png(out: Path) -> None:
"""用 matplotlib 生成一张 1-bar 的真 png(确保 python-docx 能 add_picture)。"""
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(4, 2), dpi=100)
ax.bar(["A", "B", "C"], [1, 3, 2], color="#c00000")
ax.set_title("smoke")
fig.savefig(str(out), bbox_inches="tight")
plt.close(fig)
def case(name: str):
def deco(fn):
def wrapped(*a, **kw):
print(f"[case] {name} ...", end=" ")
try:
fn(*a, **kw)
print("OK")
except Exception as e:
print(f"FAIL: {e}")
raise
return wrapped
return deco
@case("happy: cached mermaid + direct image + ASCII fallback")
def smoke_happy(tmp: Path) -> None:
sections = tmp / "sections"
figures = tmp / "figures"
sections.mkdir(parents=True)
figures.mkdir(parents=True)
# mermaid block #1 — 命中缓存
m1 = (
"%% caption: 关键问题与技术映射\n"
"flowchart LR\n"
" A --> B\n"
)
h1 = mermaid_hash(m1)
png1 = figures / f"fig_{h1}.png"
make_tiny_png(png1)
# mermaid block #2 — 缺缓存, 走 ASCII fallback
m2 = (
"%% caption: 缺缓存的图\n"
"flowchart TB\n"
" X --> Y\n"
)
# direct image — 自己造 png
direct_png = figures / "direct.png"
make_tiny_png(direct_png)
# 写 .md
(sections / "01_test.md").write_text(
f"""# 测试章节
这是一段散文。**加粗** 与 *斜体* 应当正确解析。
```mermaid
{m1.rstrip()}
```
正文继续。下面是一张缺缓存的 mermaid:
```mermaid
{m2.rstrip()}
```
再看一张直接引用的图:
![已有 PNG: 一柱形示例](../figures/direct.png)
末尾段。
""",
encoding="utf-8",
)
out = tmp / "test.docx"
proc = _run_render(sections, out)
assert proc.returncode == 0, f"render_docx exited {proc.returncode}\nSTDERR: {proc.stderr}\nSTDOUT: {proc.stdout}"
assert out.is_file() and out.stat().st_size > 1000, f"output docx not produced: {out}"
# 报告里应明确 figures: 2 (mermaid#1 + direct)
assert "figures: 2" in proc.stdout, f"expected 'figures: 2' in stdout, got:\n{proc.stdout}"
# 打开 docx 验内容
from docx import Document
doc = Document(str(out))
# 真插图数(inline_shapes 计 add_picture)= 2
assert len(doc.inline_shapes) == 2, f"expected 2 inline shapes, got {len(doc.inline_shapes)}"
all_text = "\n".join(p.text for p in doc.paragraphs)
# 命中缓存的 mermaid 走图 + 题
assert "图 1" in all_text and "关键问题与技术映射" in all_text, "missing fig 1 caption"
# direct 图也有题
assert "图 2" in all_text and "已有 PNG" in all_text, "missing fig 2 caption"
# 缺缓存的 mermaid 走 ASCII fallback,源码保留
assert "flowchart TB" in all_text and "X --> Y" in all_text, "ASCII fallback didn't preserve mermaid source"
# 缺缓存的不应该有 "图 3"(没插入图就不计数)
assert "图 3" not in all_text, "ghost figure number for missed cache"
@case("happy: no diagrams at all (regression: existing flows unchanged)")
def smoke_no_diagrams(tmp: Path) -> None:
sections = tmp / "sections"
sections.mkdir(parents=True)
(sections / "01.md").write_text(
"# 标题\n\n散文段落。**加粗**。\n\n| 列 1 | 列 2 |\n|---|---|\n| a | b |\n",
encoding="utf-8",
)
out = tmp / "test.docx"
proc = _run_render(sections, out)
assert proc.returncode == 0, f"render_docx exited {proc.returncode}\nSTDERR: {proc.stderr}"
assert "figures: 0" in proc.stdout, f"expected 'figures: 0' in stdout, got:\n{proc.stdout}"
from docx import Document
doc = Document(str(out))
assert len(doc.inline_shapes) == 0
assert len(doc.tables) == 1 # markdown table
@case("render_diagrams: scans + hashes mermaid blocks, cache hit short-circuit")
def smoke_render_diagrams(tmp: Path) -> None:
"""不依赖 mmdc / mermaid.ink:预先放 cache png, 期望 render_diagrams 全部 'cache' 命中。"""
sys.path.insert(0, str(ROOT / "skills" / "proposal" / "scripts"))
try:
import render_diagrams as rd
finally:
sys.path.pop(0)
sections = tmp / "sections"
figures = tmp / "figures"
sections.mkdir(parents=True)
figures.mkdir(parents=True)
m1 = "%% caption: 图甲\nflowchart LR\n A --> B\n"
m2 = "flowchart TB\n X --> Y\n Y --> Z\n"
(sections / "a.md").write_text(
f"# A\n\n```mermaid\n{m1.rstrip()}\n```\n\n散文。\n\n```mermaid\n{m2.rstrip()}\n```\n",
encoding="utf-8",
)
(sections / "b.md").write_text("# B\n\n仅文本,无 mermaid。\n", encoding="utf-8")
# 预填两个 png 让 render_one 走 cache 分支(避开网络)
for src in (m1, m2):
h = rd.mermaid_hash(src)
(figures / f"fig_{h}.png").write_bytes(b"\x89PNG\r\n\x1a\nfake")
# API 调用(不走 subprocess, 避免 stdout 编码再次干扰)
rc = rd.render_sections(sections)
assert rc == 0, f"render_sections rc={rc}"
# caption 抽取
assert rd.extract_caption(m1) == "图甲"
assert rd.extract_caption(m2) is None
# find_mermaid_blocks 行为
text = (sections / "a.md").read_text(encoding="utf-8")
blocks = rd.find_mermaid_blocks(text)
assert len(blocks) == 2, f"expected 2 blocks, got {len(blocks)}"
@case("missing image src → 占位文字, 不崩")
def smoke_missing_image(tmp: Path) -> None:
sections = tmp / "sections"
sections.mkdir(parents=True)
(sections / "01.md").write_text(
"# 测试\n\n![不存在](figures/ghost.png)\n\n后面一段。\n",
encoding="utf-8",
)
out = tmp / "test.docx"
proc = _run_render(sections, out)
assert proc.returncode == 0, f"render_docx exited {proc.returncode}\nSTDERR: {proc.stderr}"
from docx import Document
doc = Document(str(out))
assert len(doc.inline_shapes) == 0
all_text = "\n".join(p.text for p in doc.paragraphs)
assert "图片缺失" in all_text, "missing-image placeholder not rendered"
assert "后面一段" in all_text, "後续段落丢了"
def main() -> None:
if not RENDER_DOCX.exists():
print(f"[ERR] render_docx.py not found: {RENDER_DOCX}", file=sys.stderr)
sys.exit(2)
with tempfile.TemporaryDirectory(prefix="zcbot_smoke_") as td:
base = Path(td)
smoke_happy(base / "happy")
smoke_no_diagrams(base / "nodia")
smoke_render_diagrams(base / "diag")
smoke_missing_image(base / "ghost")
print()
print("[OK] all smoke cases passed")
if __name__ == "__main__":
main()