zcbot/skills/ppt/scripts/quality_check.py

259 lines
8.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""quality_check.py: 验收 .pptx,产出问题清单。
用法:
python quality_check.py <output.pptx> [--spec spec_lock.md]
检查项:
- 文件存在且 > 10KB
- 总页数与 spec 一致 (如提供 spec_lock.md)
- 每页有标题
- 每页 bullet ≤ 5 条
- 文字字号 ≥ 14pt (除页脚)
- 颜色集合 ≤ 5 种 (粗略统计)
- 没有 untitled / output / placeholder 等占位文件名
- **形状不越出画布边界** (left+width / top+height 超界即报)
- **textbox 文本估算行数 > 框高度** —— 推断溢出
退出码:
0 = 全通过
1 = 有 warning
2 = 致命问题 (文件缺失等)
"""
from __future__ import annotations
import argparse
import re
import sys
from pathlib import Path
try:
from pptx import Presentation
from pptx.util import Pt
except ImportError:
print("[fatal] pip install python-pptx", file=sys.stderr)
sys.exit(2)
# ---- spec 解析 (松散 markdown 解析,够用就行) ----
def parse_spec(spec_path: Path) -> dict:
if not spec_path or not spec_path.exists():
return {}
text = spec_path.read_text(encoding="utf-8")
spec: dict = {}
m = re.search(r"页数[:\s]*(\d+)", text)
if m:
spec["page_count"] = int(m.group(1))
m = re.search(r"画布[:\s]*(16:9|4:3|9:16|1:1|3:4)", text)
if m:
spec["canvas"] = m.group(1)
hexes = re.findall(r"#([0-9A-Fa-f]{6})", text)
if hexes:
spec["colors"] = [h.upper() for h in hexes[:5]]
return spec
# ---- 检查 ----
def check_pptx(path: Path, spec: dict) -> tuple[list, list]:
"""returns (errors, warnings)"""
errors, warnings = [], []
if not path.exists():
errors.append(f"文件不存在: {path}")
return errors, warnings
size_kb = path.stat().st_size / 1024
if size_kb < 10:
errors.append(f"文件太小 ({size_kb:.1f}KB),python-pptx 可能没写完")
name = path.stem.lower()
if name in ("untitled", "output", "presentation", "untitled1", "new", "test"):
warnings.append(
f"文件名 '{path.name}' 太通用,建议按主题命名"
)
prs = Presentation(path)
n_slides = len(prs.slides)
slide_w_in = prs.slide_width / 914400 # EMU → inch
slide_h_in = prs.slide_height / 914400
print(
f"[info] 文件: {path.name} 大小: {size_kb:.1f}KB "
f"页数: {n_slides} 画布: {slide_w_in:.2f}×{slide_h_in:.2f} in"
)
expected = spec.get("page_count")
if expected and n_slides != expected:
warnings.append(f"页数 {n_slides} 与 spec 期望 {expected} 不符")
spec_colors = set(spec.get("colors", []))
seen_colors: set[str] = set()
for idx, slide in enumerate(prs.slides, 1):
title_text = None
bullet_count = 0
small_font_count = 0
for s_i, shape in enumerate(slide.shapes):
# ---- 形状越界检查 (任何 shape) ----
try:
left_in = shape.left / 914400 if shape.left is not None else 0
top_in = shape.top / 914400 if shape.top is not None else 0
w_in = shape.width / 914400 if shape.width is not None else 0
h_in = shape.height / 914400 if shape.height is not None else 0
except (AttributeError, TypeError):
left_in = top_in = w_in = h_in = 0
tol = 0.02 # 0.02 in 容忍 (约 0.5mm)
shape_label = (
shape.name if hasattr(shape, "name") and shape.name
else f"shape#{s_i}"
)
if left_in < -tol or top_in < -tol:
warnings.append(
f"{idx}{shape_label} 起点为负: "
f"({left_in:.2f}, {top_in:.2f})"
)
if left_in + w_in > slide_w_in + tol:
overflow = left_in + w_in - slide_w_in
warnings.append(
f"{idx}{shape_label} 右越界 {overflow:.2f}in "
f"(画布 {slide_w_in:.2f},shape 右 {left_in + w_in:.2f})"
)
if top_in + h_in > slide_h_in + tol:
overflow = top_in + h_in - slide_h_in
warnings.append(
f"{idx}{shape_label} 下越界 {overflow:.2f}in "
f"(画布 {slide_h_in:.2f},shape 底 {top_in + h_in:.2f})"
)
if not shape.has_text_frame:
continue
tf = shape.text_frame
text = (tf.text or "").strip()
if not text:
continue
if title_text is None and len(text) <= 40 and "\n" not in text:
title_text = text
# ---- 文本溢出估算 ----
# 估算:中文字号 N pt 在框宽 W in 下,每行约 W*72/N 个中文字
# 非空段落数 + 长段落折行数 ≈ 实际行数
# 行数 × (size_pt * 1.4 / 72) > 框高 → 溢出
try:
first_size_pt = None
for para in tf.paragraphs:
for run in para.runs:
if run.font.size:
first_size_pt = run.font.size.pt
break
if first_size_pt:
break
if first_size_pt and w_in > 0.5 and h_in > 0.2:
chars_per_line = max(1, int(w_in * 72 / first_size_pt))
est_lines = 0
for para in tf.paragraphs:
ptxt = (para.text or "").strip()
if not ptxt:
continue
est_lines += max(
1,
(len(ptxt) + chars_per_line - 1) // chars_per_line
)
line_height_in = first_size_pt * 1.4 / 72
needed_h = est_lines * line_height_in
if needed_h > h_in + 0.1:
warnings.append(
f"{idx}{shape_label} 文本可能溢出 "
f"(估 {est_lines} 行,需 {needed_h:.2f}in,"
f"框高 {h_in:.2f}in): {text[:25]}..."
)
except (AttributeError, TypeError, ValueError):
pass
for para in tf.paragraphs:
ptxt = (para.text or "").strip()
if not ptxt:
continue
if len(ptxt) > 1 and ptxt != title_text:
bullet_count += 1
for run in para.runs:
if run.font.size:
if run.font.size < Pt(14):
small_font_count += 1
if run.font.color and run.font.color.type:
try:
rgb = run.font.color.rgb
if rgb is not None:
seen_colors.add(str(rgb))
except (AttributeError, KeyError, ValueError):
pass
if title_text is None:
warnings.append(f"{idx} 页缺标题")
elif len(title_text) > 30:
warnings.append(
f"{idx} 页标题过长 ({len(title_text)} 字): {title_text[:20]}..."
)
if bullet_count > 5:
warnings.append(
f"{idx} 页 bullet {bullet_count} 条 (上限 5),建议拆页或转图表"
)
if small_font_count > 0:
warnings.append(
f"{idx} 页有 {small_font_count} 处字号 < 14pt,投影看不清"
)
if len(seen_colors) > 6:
warnings.append(
f"颜色 {len(seen_colors)} 种 (含不同灰阶),理想 ≤ 5;考虑收敛到三色制"
)
if spec_colors and seen_colors:
unmatched = seen_colors - spec_colors
if len(unmatched) > 3:
warnings.append(
f"出现 {len(unmatched)} 个 spec_lock 之外的颜色,可能用了 matplotlib 默认色板"
)
return errors, warnings
def main():
ap = argparse.ArgumentParser()
ap.add_argument("pptx", type=Path)
ap.add_argument("--spec", type=Path, default=None,
help="spec_lock.md 路径")
args = ap.parse_args()
spec = parse_spec(args.spec) if args.spec else {}
if spec:
print(f"[info] spec 已加载: {spec}")
errors, warnings = check_pptx(args.pptx, spec)
if errors:
print("\n[errors]")
for e in errors:
print(f"{e}")
if warnings:
print("\n[warnings]")
for w in warnings:
print(f" ! {w}")
if not errors and not warnings:
print("\n[ok] 全部通过")
sys.exit(0)
sys.exit(2 if errors else 1)
if __name__ == "__main__":
main()