"""预处理 sections/*.md 里的 mermaid 块 → 缓存为 figures/fig_.png。不改动 sections/*.md(mermaid 源是真相,留着方便迭代),只往 /../figures/ 下落 PNG 缓存,以 mermaid 源的 sha1 前缀为文件名。render_docx.py 在遇到 ```mermaid 块时按相同 hash 查 figures/,有就插图 + 图题,没有就 ASCII 兜底。渲染后端选择 (按优先级): 1. 本地 mmdc (mermaid-cli) —— 最高质量,要 Node.js + npm i -g @mermaid-js/mermaid-cli 2. mermaid.ink 公网 API —— 不装东西,要联网两种都没,留警告退出 0(让流水线继续),render_docx.py 走 ASCII fallback。题注约定:mermaid 块第一行可写 %% caption: 关键技术关系架构 caption 不写也能渲染,题号自动在 render_docx.py 里递增。用法: python render_diagrams.py /sections/ """ from __future__ import annotations import argparse import base64 import hashlib import re import shutil import subprocess import sys import tempfile import urllib.error import urllib.request from pathlib import Path _FENCE_OPEN_RE = re.compile(r"^\s*```\s*mermaid\s*$") _FENCE_CLOSE_RE = re.compile(r"^\s*```\s*$") _CAPTION_RE = re.compile(r"^\s*%%\s*caption\s*:\s*(.+?)\s*$", re.IGNORECASE) MERMAID_INK_URL = "https://mermaid.ink/img/{payload}?type=png&bgColor=FFFFFF" def mermaid_hash(source: str) -> str: """对 mermaid 源算 sha1, 取前 10 位作为文件名稳定 id。""" return hashlib.sha1(source.strip().encode("utf-8")).hexdigest()[:10] def extract_caption(source: str) -> str | None: for ln in source.splitlines(): m = _CAPTION_RE.match(ln) if m: return m.group(1).strip() return None def find_mermaid_blocks(md_text: str) -> list[str]: """返回 .md 里所有 mermaid 块的源码(不含 ``` fence)。""" blocks: list[str] = [] lines = md_text.splitlines() i = 0 n = len(lines) while i < n: if _FENCE_OPEN_RE.match(lines[i]): buf: list[str] = [] i += 1 while i < n and not _FENCE_CLOSE_RE.match(lines[i]): buf.append(lines[i]) i += 1 blocks.append("\n".join(buf)) i += 1 else: i += 1 return blocks def render_via_mmdc(source: str, out_png: Path) -> bool: """有 mmdc 就用 mmdc, 输出 png 到 out_png。成功 True, 失败 False。""" mmdc = shutil.which("mmdc") if not mmdc: return False with tempfile.NamedTemporaryFile("w", suffix=".mmd", delete=False, encoding="utf-8") as tf: tf.write(source) tmp_path = Path(tf.name) try: proc = subprocess.run( [mmdc, "-i", str(tmp_path), "-o", str(out_png), "-b", "white", "--quiet"], capture_output=True, text=True, timeout=60, ) if proc.returncode != 0: print(f" [mmdc] returncode={proc.returncode}: {proc.stderr.strip()[:200]}", file=sys.stderr) return False return out_png.exists() except (subprocess.TimeoutExpired, OSError) as e: print(f" [mmdc] error: {e}", file=sys.stderr) return False finally: try: tmp_path.unlink() except OSError: pass def render_via_mermaid_ink(source: str, out_png: Path) -> bool: """通过 mermaid.ink 公网 API 渲染。要联网。成功 True, 失败 False。""" payload = base64.urlsafe_b64encode(source.strip().encode("utf-8")).decode("ascii").rstrip("=") url = MERMAID_INK_URL.format(payload=payload) try: req = urllib.request.Request(url, headers={"User-Agent": "zcbot-proposal/1.0"}) with urllib.request.urlopen(req, timeout=30) as resp: if resp.status != 200: print(f" [mermaid.ink] HTTP {resp.status}", file=sys.stderr) return False data = resp.read() if not data or len(data) < 100: print(f" [mermaid.ink] payload too small ({len(data)} bytes)", file=sys.stderr) return False out_png.write_bytes(data) return True except (urllib.error.URLError, urllib.error.HTTPError, TimeoutError, OSError) as e: print(f" [mermaid.ink] error: {e}", file=sys.stderr) return False def render_one(source: str, out_png: Path) -> str: """渲染一块 mermaid → png。返回使用的后端名 / "skip" / "fail"。""" if out_png.exists(): return "cache" if render_via_mmdc(source, out_png): return "mmdc" if render_via_mermaid_ink(source, out_png): return "mermaid.ink" return "fail" def render_sections(sections_dir: Path) -> int: if not sections_dir.is_dir(): print(f"[ERR] sections dir not found: {sections_dir}", file=sys.stderr) return 2 figures_dir = sections_dir.parent / "figures" figures_dir.mkdir(parents=True, exist_ok=True) md_files = sorted(sections_dir.glob("*.md")) if not md_files: print(f"[ERR] no .md found in {sections_dir}", file=sys.stderr) return 2 total = 0 by_backend: dict[str, int] = {} fail_blocks: list[tuple[Path, str]] = [] for md in md_files: text = md.read_text(encoding="utf-8") blocks = find_mermaid_blocks(text) if not blocks: continue for src in blocks: total += 1 h = mermaid_hash(src) png = figures_dir / f"fig_{h}.png" backend = render_one(src, png) by_backend[backend] = by_backend.get(backend, 0) + 1 cap = extract_caption(src) or "(no caption)" mark = {"cache": "·", "mmdc": "+", "mermaid.ink": "+", "fail": "x"}[backend] print(f" {mark} [{backend:11s}] {md.name} :: {h} :: {cap}") if backend == "fail": fail_blocks.append((md, cap)) print() print(f"[OK] processed {total} mermaid block(s) -> {figures_dir}") for b, c in sorted(by_backend.items()): print(f" {b}: {c}") if fail_blocks: print() print(f"[WARN] {len(fail_blocks)} block(s) failed to render. render_docx.py 会走 ASCII fallback.") print(f" 要画真图: 装 mmdc (npm i -g @mermaid-js/mermaid-cli) 或保证联网走 mermaid.ink。") for md, cap in fail_blocks: print(f" - {md.name} :: {cap}") return 0 def main() -> None: ap = argparse.ArgumentParser(description="预处理 sections/*.md 的 mermaid 块 → figures/*.png") ap.add_argument("sections_dir", type=Path, help="sections/*.md 目录") args = ap.parse_args() rc = render_sections(args.sections_dir) sys.exit(rc) if __name__ == "__main__": main()