245 lines
7.9 KiB
Python
245 lines
7.9 KiB
Python
"""Smoke: render_docx.py 图片 + mermaid 缓存路径。
|
|
|
|
构造一个临时 sections/, figures/ 结构, 跑 render_docx, 验证:
|
|
- mermaid 块 hash 在 figures/ 有对应 png → 走插图路径
|
|
- mermaid 块 hash 在 figures/ 没 png → 走 ASCII fallback (不崩, 文本保留)
|
|
-  直接图片 → 走插图路径
|
|
- 图编号自增
|
|
- inline_shapes 数 = 命中插图的次数
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import hashlib
|
|
import os
|
|
import shutil
|
|
import subprocess
|
|
import sys
|
|
import tempfile
|
|
from pathlib import Path
|
|
|
|
ROOT = Path(__file__).resolve().parent.parent
|
|
RENDER_DOCX = ROOT / "skills" / "proposal" / "scripts" / "render_docx.py"
|
|
PYTHON = ROOT / ".venv" / "Scripts" / "python.exe"
|
|
if not PYTHON.exists():
|
|
PYTHON = Path(sys.executable) # CI / unix fallback
|
|
|
|
|
|
def _run_render(sections: Path, out: Path) -> subprocess.CompletedProcess:
|
|
"""跑 render_docx.py, 子进程强制 utf-8 输出 (Windows GBK stdout 兜底)。"""
|
|
env = os.environ.copy()
|
|
env["PYTHONIOENCODING"] = "utf-8"
|
|
return subprocess.run(
|
|
[str(PYTHON), str(RENDER_DOCX), str(sections), "--fund-type", "key_rd", "-o", str(out)],
|
|
capture_output=True, text=True, encoding="utf-8", env=env,
|
|
)
|
|
|
|
|
|
def mermaid_hash(source: str) -> str:
|
|
return hashlib.sha1(source.strip().encode("utf-8")).hexdigest()[:10]
|
|
|
|
|
|
def make_tiny_png(out: Path) -> None:
|
|
"""用 matplotlib 生成一张 1-bar 的真 png(确保 python-docx 能 add_picture)。"""
|
|
import matplotlib
|
|
matplotlib.use("Agg")
|
|
import matplotlib.pyplot as plt
|
|
fig, ax = plt.subplots(figsize=(4, 2), dpi=100)
|
|
ax.bar(["A", "B", "C"], [1, 3, 2], color="#c00000")
|
|
ax.set_title("smoke")
|
|
fig.savefig(str(out), bbox_inches="tight")
|
|
plt.close(fig)
|
|
|
|
|
|
def case(name: str):
|
|
def deco(fn):
|
|
def wrapped(*a, **kw):
|
|
print(f"[case] {name} ...", end=" ")
|
|
try:
|
|
fn(*a, **kw)
|
|
print("OK")
|
|
except Exception as e:
|
|
print(f"FAIL: {e}")
|
|
raise
|
|
return wrapped
|
|
return deco
|
|
|
|
|
|
@case("happy: cached mermaid + direct image + ASCII fallback")
|
|
def smoke_happy(tmp: Path) -> None:
|
|
sections = tmp / "sections"
|
|
figures = tmp / "figures"
|
|
sections.mkdir(parents=True)
|
|
figures.mkdir(parents=True)
|
|
|
|
# mermaid block #1 — 命中缓存
|
|
m1 = (
|
|
"%% caption: 关键问题与技术映射\n"
|
|
"flowchart LR\n"
|
|
" A --> B\n"
|
|
)
|
|
h1 = mermaid_hash(m1)
|
|
png1 = figures / f"fig_{h1}.png"
|
|
make_tiny_png(png1)
|
|
|
|
# mermaid block #2 — 缺缓存, 走 ASCII fallback
|
|
m2 = (
|
|
"%% caption: 缺缓存的图\n"
|
|
"flowchart TB\n"
|
|
" X --> Y\n"
|
|
)
|
|
|
|
# direct image — 自己造 png
|
|
direct_png = figures / "direct.png"
|
|
make_tiny_png(direct_png)
|
|
|
|
# 写 .md
|
|
(sections / "01_test.md").write_text(
|
|
f"""# 测试章节
|
|
|
|
这是一段散文。**加粗** 与 *斜体* 应当正确解析。
|
|
|
|
```mermaid
|
|
{m1.rstrip()}
|
|
```
|
|
|
|
正文继续。下面是一张缺缓存的 mermaid:
|
|
|
|
```mermaid
|
|
{m2.rstrip()}
|
|
```
|
|
|
|
再看一张直接引用的图:
|
|
|
|

|
|
|
|
末尾段。
|
|
""",
|
|
encoding="utf-8",
|
|
)
|
|
|
|
out = tmp / "test.docx"
|
|
proc = _run_render(sections, out)
|
|
assert proc.returncode == 0, f"render_docx exited {proc.returncode}\nSTDERR: {proc.stderr}\nSTDOUT: {proc.stdout}"
|
|
assert out.is_file() and out.stat().st_size > 1000, f"output docx not produced: {out}"
|
|
|
|
# 报告里应明确 figures: 2 (mermaid#1 + direct)
|
|
assert "figures: 2" in proc.stdout, f"expected 'figures: 2' in stdout, got:\n{proc.stdout}"
|
|
|
|
# 打开 docx 验内容
|
|
from docx import Document
|
|
doc = Document(str(out))
|
|
# 真插图数(inline_shapes 计 add_picture)= 2
|
|
assert len(doc.inline_shapes) == 2, f"expected 2 inline shapes, got {len(doc.inline_shapes)}"
|
|
|
|
all_text = "\n".join(p.text for p in doc.paragraphs)
|
|
# 命中缓存的 mermaid 走图 + 题
|
|
assert "图 1" in all_text and "关键问题与技术映射" in all_text, "missing fig 1 caption"
|
|
# direct 图也有题
|
|
assert "图 2" in all_text and "已有 PNG" in all_text, "missing fig 2 caption"
|
|
# 缺缓存的 mermaid 走 ASCII fallback,源码保留
|
|
assert "flowchart TB" in all_text and "X --> Y" in all_text, "ASCII fallback didn't preserve mermaid source"
|
|
# 缺缓存的不应该有 "图 3"(没插入图就不计数)
|
|
assert "图 3" not in all_text, "ghost figure number for missed cache"
|
|
|
|
|
|
@case("happy: no diagrams at all (regression: existing flows unchanged)")
|
|
def smoke_no_diagrams(tmp: Path) -> None:
|
|
sections = tmp / "sections"
|
|
sections.mkdir(parents=True)
|
|
(sections / "01.md").write_text(
|
|
"# 标题\n\n散文段落。**加粗**。\n\n| 列 1 | 列 2 |\n|---|---|\n| a | b |\n",
|
|
encoding="utf-8",
|
|
)
|
|
|
|
out = tmp / "test.docx"
|
|
proc = _run_render(sections, out)
|
|
assert proc.returncode == 0, f"render_docx exited {proc.returncode}\nSTDERR: {proc.stderr}"
|
|
assert "figures: 0" in proc.stdout, f"expected 'figures: 0' in stdout, got:\n{proc.stdout}"
|
|
|
|
from docx import Document
|
|
doc = Document(str(out))
|
|
assert len(doc.inline_shapes) == 0
|
|
assert len(doc.tables) == 1 # markdown table
|
|
|
|
|
|
@case("render_diagrams: scans + hashes mermaid blocks, cache hit short-circuit")
|
|
def smoke_render_diagrams(tmp: Path) -> None:
|
|
"""不依赖 mmdc / mermaid.ink:预先放 cache png, 期望 render_diagrams 全部 'cache' 命中。"""
|
|
sys.path.insert(0, str(ROOT / "skills" / "proposal" / "scripts"))
|
|
try:
|
|
import render_diagrams as rd
|
|
finally:
|
|
sys.path.pop(0)
|
|
|
|
sections = tmp / "sections"
|
|
figures = tmp / "figures"
|
|
sections.mkdir(parents=True)
|
|
figures.mkdir(parents=True)
|
|
|
|
m1 = "%% caption: 图甲\nflowchart LR\n A --> B\n"
|
|
m2 = "flowchart TB\n X --> Y\n Y --> Z\n"
|
|
|
|
(sections / "a.md").write_text(
|
|
f"# A\n\n```mermaid\n{m1.rstrip()}\n```\n\n散文。\n\n```mermaid\n{m2.rstrip()}\n```\n",
|
|
encoding="utf-8",
|
|
)
|
|
(sections / "b.md").write_text("# B\n\n仅文本,无 mermaid。\n", encoding="utf-8")
|
|
|
|
# 预填两个 png 让 render_one 走 cache 分支(避开网络)
|
|
for src in (m1, m2):
|
|
h = rd.mermaid_hash(src)
|
|
(figures / f"fig_{h}.png").write_bytes(b"\x89PNG\r\n\x1a\nfake")
|
|
|
|
# API 调用(不走 subprocess, 避免 stdout 编码再次干扰)
|
|
rc = rd.render_sections(sections)
|
|
assert rc == 0, f"render_sections rc={rc}"
|
|
|
|
# caption 抽取
|
|
assert rd.extract_caption(m1) == "图甲"
|
|
assert rd.extract_caption(m2) is None
|
|
|
|
# find_mermaid_blocks 行为
|
|
text = (sections / "a.md").read_text(encoding="utf-8")
|
|
blocks = rd.find_mermaid_blocks(text)
|
|
assert len(blocks) == 2, f"expected 2 blocks, got {len(blocks)}"
|
|
|
|
|
|
@case("missing image src → 占位文字, 不崩")
|
|
def smoke_missing_image(tmp: Path) -> None:
|
|
sections = tmp / "sections"
|
|
sections.mkdir(parents=True)
|
|
(sections / "01.md").write_text(
|
|
"# 测试\n\n\n\n后面一段。\n",
|
|
encoding="utf-8",
|
|
)
|
|
|
|
out = tmp / "test.docx"
|
|
proc = _run_render(sections, out)
|
|
assert proc.returncode == 0, f"render_docx exited {proc.returncode}\nSTDERR: {proc.stderr}"
|
|
from docx import Document
|
|
doc = Document(str(out))
|
|
assert len(doc.inline_shapes) == 0
|
|
all_text = "\n".join(p.text for p in doc.paragraphs)
|
|
assert "图片缺失" in all_text, "missing-image placeholder not rendered"
|
|
assert "后面一段" in all_text, "後续段落丢了"
|
|
|
|
|
|
def main() -> None:
|
|
if not RENDER_DOCX.exists():
|
|
print(f"[ERR] render_docx.py not found: {RENDER_DOCX}", file=sys.stderr)
|
|
sys.exit(2)
|
|
|
|
with tempfile.TemporaryDirectory(prefix="zcbot_smoke_") as td:
|
|
base = Path(td)
|
|
smoke_happy(base / "happy")
|
|
smoke_no_diagrams(base / "nodia")
|
|
smoke_render_diagrams(base / "diag")
|
|
smoke_missing_image(base / "ghost")
|
|
|
|
print()
|
|
print("[OK] all smoke cases passed")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|