zcbot/skills/ppt/scripts/svg_preview.py

376 lines
16 KiB
Python

"""svg_preview.py: 用无头 Chrome/Edge 把 SVG 页渲成 PNG,供交付前肉眼/vision 验收。
SVG-first 管线里 SVG 就是视觉真相(导出的 pptx 与之 1:1),所以验收直接渲 SVG 最忠实 ——
比渲最终 pptx 更早、更准地暴露"标题层级 / 卡片过挤过空 / 文字掉色 / 节奏单调"这类观感问题。
用法:
python svg_preview.py <project_dir> # 渲 <project_dir>/svg_output 全部页
python svg_preview.py <project_dir> --pages 1,3,5 # 只渲第 1/3/5 页(按文件排序)
python svg_preview.py <project_dir> -o <out_dir> # 指定 PNG 输出目录(默认 <project_dir>/preview)
python svg_preview.py <svg_dir_or_file> # 直接渲某个目录/单文件
约定:优先渲 <project_dir>/svg_output;没有则退而渲 <project_dir> 本身。
依赖:本机装了 Chrome 或 Edge(无需 pip 包)。两者都没有则报错退出。
产物默认 2x 超采样,够清晰看版面。
渲 project 目录时同步维护 `.build/acceptance.json` 验收记录(每页:源 sha1 +
渲染时间 + verdict)。看完 PNG 后用 accept_pages.py 标 pass/fail;svg_to_pptx
的导出 gate 要求每页 verdict=pass 且源文件此后未改动 —— "从没渲过就交付"
"改了页不复看"都会被导出边界挡下。
"""
from __future__ import annotations
import argparse
import hashlib
import json
import re
import subprocess
import sys
from datetime import datetime
try: # zcbot: Windows GBK 控制台兼容,避免 emoji/© 等触发 UnicodeEncodeError
sys.stdout.reconfigure(encoding="utf-8", errors="replace")
sys.stderr.reconfigure(encoding="utf-8", errors="replace")
except Exception:
pass
import tempfile
import os
import shutil
from pathlib import Path
_CHROME_CANDIDATES = [
r"C:\Program Files\Google\Chrome\Application\chrome.exe",
r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe",
r"C:\Program Files\Microsoft\Edge\Application\msedge.exe",
r"C:\Program Files (x86)\Microsoft\Edge\Application\msedge.exe",
]
# Linux / zcbot sandbox: the image ships chromium at /usr/bin/chromium (installed
# for mermaid-cli). The original ppt-master discovery only knew Windows
# chrome/edge, so SVG preview always failed inside the container. Mirror the
# discovery zcbot already uses in rendering/pdf.py so it resolves the bundled
# chromium instead.
_LINUX_CANDIDATES = [
"/usr/bin/chromium", "/usr/bin/chromium-browser",
"/usr/bin/google-chrome", "/usr/bin/google-chrome-stable",
]
_WHICH_NAMES = (
"chrome", "chrome.exe", "msedge", "msedge.exe",
"chromium", "chromium-browser", "google-chrome", "google-chrome-stable",
)
def find_browser() -> str | None:
"""Return a chromium/chrome/edge executable path, or None if none found.
Returns None (not raise) so the caller can fall back to the cairosvg
renderer when no browser exists (e.g. a sandbox image built without the
chromium layer).
"""
# Explicit override first (matches rendering/pdf.py's CHROMIUM/CHROME env).
env = os.environ.get("CHROMIUM") or os.environ.get("CHROME")
if env and (shutil.which(env) or Path(env).exists()):
return shutil.which(env) or env
for c in _CHROME_CANDIDATES + _LINUX_CANDIDATES:
if Path(c).exists():
return c
for name in _WHICH_NAMES:
p = shutil.which(name)
if p:
return p
return None
_VIEWBOX_RE = re.compile(r'viewBox\s*=\s*["\']\s*([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s*["\']')
_WH_RE = re.compile(r'\b(width|height)\s*=\s*["\']\s*([\d.]+)')
def _dims(svg_text: str) -> tuple[float, float]:
m = _VIEWBOX_RE.search(svg_text)
if m:
return float(m.group(3)), float(m.group(4))
w = h = None
for name, val in _WH_RE.findall(svg_text):
if name == "width":
w = float(val)
elif name == "height":
h = float(val)
return (w or 1280.0), (h or 720.0)
def _wrap_html(svg_path: Path, w: float, h: float) -> str:
# 内联引用本地 svg,固定画布尺寸、去边距,Chrome 截图即得整页
uri = svg_path.resolve().as_uri()
return (
"<!doctype html><html><head><meta charset='utf-8'><style>"
"html,body{margin:0;padding:0;background:#fff}"
f"img{{display:block;width:{w}px;height:{h}px}}"
"</style></head><body>"
f"<img src='{uri}'>"
"</body></html>"
)
def render(browser: str, svg_path: Path, out_png: Path, scale: float = 2.0) -> bool:
"""Render one SVG to PNG via headless chromium. Returns True on success.
On failure prints a concise warning with the chromium stderr tail (the
original code silenced stderr, so a broken render looked identical to a
missing browser — impossible to diagnose). The caller keeps going so one
bad page doesn't abort the whole batch.
"""
svg_text = svg_path.read_text(encoding="utf-8", errors="ignore")
w, h = _dims(svg_text)
html = _wrap_html(svg_path, w, h)
# chromium resolves --screenshot against its own cwd, not ours; a relative
# path silently fails to write ("系统找不到指定的路径"). Always pass absolute.
out_png = out_png.resolve()
out_png.parent.mkdir(parents=True, exist_ok=True)
# TemporaryDirectory holds both the wrapper HTML and a throwaway
# --user-data-dir; auto-cleaned on exit.
with tempfile.TemporaryDirectory(prefix="svgprev-") as tmp:
html_path = Path(tmp) / "page.html"
html_path.write_text(html, encoding="utf-8")
proc = subprocess.run(
[
browser, "--headless", "--disable-gpu", "--no-sandbox",
# Required in the cap-dropped sandbox: chromium's own setuid
# sandbox can't start (--no-sandbox), and the container's 64MB
# /dev/shm is too small (--disable-dev-shm-usage), else chromium
# crashes mid-render. Same flags rendering/pdf.py uses.
"--disable-dev-shm-usage", "--user-data-dir=%s" % (Path(tmp) / "cr"),
"--hide-scrollbars", "--force-device-scale-factor=%s" % scale,
"--window-size=%d,%d" % (round(w), round(h)),
"--default-background-color=FFFFFFFF",
"--screenshot=%s" % str(out_png),
html_path.as_uri(),
],
stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, timeout=120, check=False,
)
if out_png.exists() and out_png.stat().st_size > 0:
return True
tail = (proc.stderr or b"").decode("utf-8", "replace").strip()[-400:]
print(f" [warn] 渲染失败 {svg_path.name}(rc={proc.returncode})"
+ (f": {tail}" if tail else ""))
return False
def _collect(target: Path) -> tuple[list[Path], Path, Path | None, str]:
"""返回 (svg 文件列表, 默认输出目录, project_root 或 None, 渲染来源标签)。
project_root 非 None 时(target 是标准 project 目录),渲染结束后会把每页
写进 `.build/acceptance.json` 验收记录 —— 导出 gate 依赖它。
"""
from project_utils import svg_final_dir, preview_dir
if target.is_file() and target.suffix.lower() == ".svg":
return [target], preview_dir(target.parent), None, "file"
# 目录:优先 .build/svg_final(finalize 后图标/配图已内嵌,渲出来最忠实);
# 没有就退而渲 svg_output(生成中验收 —— cairosvg 兜底会就地展开图标,chromium
# 直接渲则图标仍是占位符不显示)。
sf = svg_final_dir(target)
if sf.is_dir() and any(sf.glob("*.svg")):
svg_dir, source = sf, "svg_final"
elif (target / "svg_output").is_dir():
svg_dir, source = target / "svg_output", "svg_output"
else:
svg_dir, source = target, "dir"
files = sorted(svg_dir.glob("*.svg"))
root = target if source in ("svg_final", "svg_output") else None
return files, preview_dir(target), root, source
def _sha1(path: Path) -> str:
return hashlib.sha1(path.read_bytes()).hexdigest()
def acceptance_path(project_root: Path) -> Path:
return project_root / ".build" / "acceptance.json"
def load_acceptance(project_root: Path) -> dict:
p = acceptance_path(project_root)
if p.exists():
try:
data = json.loads(p.read_text(encoding="utf-8"))
if isinstance(data, dict) and isinstance(data.get("pages"), dict):
return data
except (json.JSONDecodeError, OSError):
pass # 损坏就重建 —— 记录本身可再生
return {"version": 1, "pages": {}}
def save_acceptance(project_root: Path, data: dict) -> Path:
p = acceptance_path(project_root)
p.parent.mkdir(parents=True, exist_ok=True)
p.write_text(json.dumps(data, ensure_ascii=False, indent=1), encoding="utf-8")
return p
def record_renders(project_root: Path, source: str,
rendered: list[tuple[Path, Path]]) -> None:
"""把本轮渲好的页写进验收记录(merge,verdict 归 pending)。
每条记录锚定 svg_output 源文件的 sha1:accept_pages 标 pass、导出 gate 放行
都要求源文件此后未再改动 —— 改一页就必须重渲重看那一页,盲改混不过去。
源比渲染对象新(改了 svg_output 没重跑 finalize)的页跳过不记,并提示。
"""
data = load_acceptance(project_root)
pages = data["pages"]
now = datetime.now().isoformat(timespec="seconds")
stale: list[str] = []
def rel(p: Path) -> str:
# 存相对项目根的路径,项目目录整个挪走记录仍有效;跨盘等场景退绝对路径
try:
return str(p.resolve().relative_to(project_root.resolve()))
except ValueError:
return str(p.resolve())
for svg, png in rendered:
src = project_root / "svg_output" / svg.name
if not src.exists():
src = svg
if source == "svg_final" and src.stat().st_mtime > svg.stat().st_mtime + 1:
stale.append(svg.stem)
continue
sha = _sha1(src)
old = pages.get(svg.stem) or {}
keep_verdict = old.get("verdict") if old.get("source_sha1") == sha else None
pages[svg.stem] = {
"png": rel(png),
"source": rel(src),
"source_sha1": sha,
"rendered_from": source,
"rendered_at": now,
"verdict": keep_verdict or "pending",
**({"verdict_at": old["verdict_at"]}
if keep_verdict and old.get("verdict_at") else {}),
}
p = save_acceptance(project_root, data)
if stale:
print(f" [warn] {len(stale)} 页的 svg_output 比 .build/svg_final 新,"
f"渲的是旧版、未记入验收:{', '.join(stale[:8])}"
f"{'' if len(stale) > 8 else ''} —— 先重跑 finalize_svg 再渲。")
pending = sum(1 for v in pages.values() if v.get("verdict") != "pass")
print(f"[svg_preview] 验收记录已更新:{p}(pending {pending} 页)")
print(" 下一步:read/look_at_image 逐页过目,再用 accept_pages.py 标记 "
"--pass <页名…> / --pass-all;导出 gate 只认 pass 且源未改动的页。")
def _select(files: list[Path], pages: str | None) -> list[Path]:
if not pages:
return files
idxs = []
for tok in pages.split(","):
tok = tok.strip()
if tok.isdigit():
idxs.append(int(tok) - 1)
return [files[i] for i in idxs if 0 <= i < len(files)]
def _expand_icons_for_cairo(svg_text: str) -> str:
"""Expand `<use data-icon>` placeholders to real `<g><path>` in-memory.
cairosvg does not understand the `data-icon` placeholder and errors on the
href-less `<use>` (observed: CAIRO_STATUS_INVALID_MATRIX on every icon-bearing
page). Reuse finalize's exact regex embedder so the fallback rasterizes the
same icons the native exporter draws. No-op when the SVG carries no
placeholders (e.g. already-finalized svg_final/).
"""
if 'data-icon' not in svg_text:
return svg_text
try:
from svg_finalize.embed_icons import process_svg_file, DEFAULT_ICONS_DIR
except Exception:
return svg_text # can't expand — let cairosvg try as-is
try:
import contextlib
import io as _io
with tempfile.TemporaryDirectory(prefix="svgprev-ico-") as tmp:
f = Path(tmp) / "x.svg"
f.write_text(svg_text, encoding="utf-8")
# Silence the embedder's per-file "[OK] x.svg (N icons)" chatter —
# the temp name is meaningless in preview output.
with contextlib.redirect_stdout(_io.StringIO()):
process_svg_file(f, DEFAULT_ICONS_DIR, dry_run=False, verbose=False,
fallback_dir=DEFAULT_ICONS_DIR)
return f.read_text(encoding="utf-8")
except Exception:
return svg_text
def render_cairosvg(cairosvg, svg_path: Path, out_png: Path, scale: float = 2.0) -> bool:
"""Rasterize one SVG via cairosvg (browser-free fallback). Returns True on success.
Icons are pre-expanded so cairosvg doesn't choke on `<use data-icon>`.
Fidelity is slightly below chromium (no JS, limited filter support), but it
lets visual acceptance run in a sandbox image built without chromium.
"""
svg_text = _expand_icons_for_cairo(
svg_path.read_text(encoding="utf-8", errors="ignore"))
out_png = out_png.resolve()
out_png.parent.mkdir(parents=True, exist_ok=True)
try:
cairosvg.svg2png(bytestring=svg_text.encode("utf-8"),
write_to=str(out_png), scale=scale)
except Exception as e:
print(f" [warn] cairosvg 渲染失败 {svg_path.name}: {e}")
return False
return out_png.exists() and out_png.stat().st_size > 0
def main() -> None:
ap = argparse.ArgumentParser(description="把 SVG 页渲成 PNG 供肉眼/vision 验收")
ap.add_argument("target", type=Path, help="project_dir / svg 目录 / 单个 .svg 文件")
ap.add_argument("--pages", default=None, help="只渲指定页,如 1,3,5(按文件排序)")
ap.add_argument("-o", "--out", type=Path, default=None, help="PNG 输出目录")
ap.add_argument("--scale", type=float, default=2.0, help="超采样倍数,默认 2")
args = ap.parse_args()
files, default_out, project_root, source = _collect(args.target)
if not files:
raise SystemExit(f"[fatal] 没找到 SVG:{args.target}")
all_count = len(files)
files = _select(files, args.pages)
if not files:
raise SystemExit(f"[fatal] --pages {args.pages} 没选中任何页(共 {all_count} 页)")
out_dir = args.out or default_out
# Renderer: prefer a real browser (chromium — highest fidelity, matches the
# native pptx). Fall back to cairosvg when no browser exists (sandbox image
# built without the chromium layer) so visual acceptance still runs.
browser = find_browser()
cairo = None
if browser:
print(f"[svg_preview] browser={browser}")
else:
try:
import cairosvg
cairo = cairosvg
print("[svg_preview] 未找到 chromium → 回退 cairosvg(图标已预展开;"
"保真度略低于 chromium。装 chromium 或设 CHROMIUM 可用浏览器渲染)")
except Exception:
raise SystemExit(
"[fatal] 未找到 Chrome / Edge / Chromium,也无 cairosvg 兜底。"
"沙箱镜像应自带 /usr/bin/chromium(rebuild sandbox 镜像),"
"或 `pip install cairosvg`,或设 CHROMIUM 环境变量。")
done: list[tuple[Path, Path]] = []
for svg in files:
png = out_dir / (svg.stem + ".png")
if browser:
render(browser, svg, png, scale=args.scale)
else:
render_cairosvg(cairo, svg, png, scale=args.scale)
if png.exists():
done.append((svg, png))
print(f" [ok] {svg.name} -> {png}")
else:
print(f" [FAIL] {svg.name} 未生成 PNG")
print(f"[svg_preview] {len(done)}/{len(files)} 页渲好,输出目录:{out_dir}")
if project_root is not None and done:
record_renders(project_root, source, done)
if __name__ == "__main__":
main()