zcbot/skills/ppt/scripts/quality_check.py

"""quality_check.py: 验收 .pptx,产出问题清单。

用法:
    python quality_check.py <output.pptx> [--spec spec_lock.md]

检查项:
  - 文件存在且 > 10KB
  - 总页数与 spec 一致 (如提供 spec_lock.md)
  - 每页有标题
  - 每页 bullet ≤ 5 条
  - 文字字号 ≥ 14pt (除页脚)
  - 颜色集合 ≤ 5 种 (粗略统计)
  - 没有 untitled / output / placeholder 等占位文件名
  - **形状不越出画布边界** (left+width / top+height 超界即报)
  - **textbox 文本估算行数 > 框高度** —— 推断溢出

退出码:
  0 = 全通过
  1 = 有 warning
  2 = 致命问题 (文件缺失等)
"""
from __future__ import annotations

import argparse
import re
import sys
from pathlib import Path

try:
    from pptx import Presentation
    from pptx.util import Pt
except ImportError:
    print("[fatal] pip install python-pptx", file=sys.stderr)
    sys.exit(2)


# ---- spec 解析 (松散 markdown 解析,够用就行) ----

def parse_spec(spec_path: Path) -> dict:
    if not spec_path or not spec_path.exists():
        return {}
    text = spec_path.read_text(encoding="utf-8")
    spec: dict = {}

    m = re.search(r"页数[:\s]*(\d+)", text)
    if m:
        spec["page_count"] = int(m.group(1))

    m = re.search(r"画布[:\s]*(16:9|4:3|9:16|1:1|3:4)", text)
    if m:
        spec["canvas"] = m.group(1)

    hexes = re.findall(r"#([0-9A-Fa-f]{6})", text)
    if hexes:
        spec["colors"] = [h.upper() for h in hexes[:5]]

    return spec


# ---- 检查 ----

def check_pptx(path: Path, spec: dict) -> tuple[list, list]:
    """returns (errors, warnings)"""
    errors, warnings = [], []

    if not path.exists():
        errors.append(f"文件不存在: {path}")
        return errors, warnings

    size_kb = path.stat().st_size / 1024
    if size_kb < 10:
        errors.append(f"文件太小 ({size_kb:.1f}KB),python-pptx 可能没写完")

    name = path.stem.lower()
    if name in ("untitled", "output", "presentation", "untitled1", "new", "test"):
        warnings.append(
            f"文件名 '{path.name}' 太通用,建议按主题命名"
        )

    prs = Presentation(path)
    n_slides = len(prs.slides)
    slide_w_in = prs.slide_width / 914400  # EMU → inch
    slide_h_in = prs.slide_height / 914400
    print(
        f"[info] 文件: {path.name}  大小: {size_kb:.1f}KB  "
        f"页数: {n_slides}  画布: {slide_w_in:.2f}×{slide_h_in:.2f} in"
    )

    expected = spec.get("page_count")
    if expected and n_slides != expected:
        warnings.append(f"页数 {n_slides} 与 spec 期望 {expected} 不符")

    spec_colors = set(spec.get("colors", []))
    seen_colors: set[str] = set()

    for idx, slide in enumerate(prs.slides, 1):
        title_text = None
        bullet_count = 0
        small_font_count = 0

        for s_i, shape in enumerate(slide.shapes):
            # ---- 形状越界检查 (任何 shape) ----
            try:
                left_in = shape.left / 914400 if shape.left is not None else 0
                top_in = shape.top / 914400 if shape.top is not None else 0
                w_in = shape.width / 914400 if shape.width is not None else 0
                h_in = shape.height / 914400 if shape.height is not None else 0
            except (AttributeError, TypeError):
                left_in = top_in = w_in = h_in = 0

            tol = 0.02  # 0.02 in 容忍 (约 0.5mm)
            shape_label = (
                shape.name if hasattr(shape, "name") and shape.name
                else f"shape#{s_i}"
            )
            if left_in < -tol or top_in < -tol:
                warnings.append(
                    f"第 {idx} 页 {shape_label} 起点为负: "
                    f"({left_in:.2f}, {top_in:.2f})"
                )
            if left_in + w_in > slide_w_in + tol:
                overflow = left_in + w_in - slide_w_in
                warnings.append(
                    f"第 {idx} 页 {shape_label} 右越界 {overflow:.2f}in "
                    f"(画布 {slide_w_in:.2f},shape 右 {left_in + w_in:.2f})"
                )
            if top_in + h_in > slide_h_in + tol:
                overflow = top_in + h_in - slide_h_in
                warnings.append(
                    f"第 {idx} 页 {shape_label} 下越界 {overflow:.2f}in "
                    f"(画布 {slide_h_in:.2f},shape 底 {top_in + h_in:.2f})"
                )

            if not shape.has_text_frame:
                continue
            tf = shape.text_frame
            text = (tf.text or "").strip()
            if not text:
                continue

            if title_text is None and len(text) <= 40 and "\n" not in text:
                title_text = text

            # ---- 文本溢出估算 ----
            # 估算:中文字号 N pt 在框宽 W in 下,每行约 W*72/N 个中文字
            #     非空段落数 + 长段落折行数 ≈ 实际行数
            #     行数 × (size_pt * 1.4 / 72) > 框高 → 溢出
            try:
                first_size_pt = None
                for para in tf.paragraphs:
                    for run in para.runs:
                        if run.font.size:
                            first_size_pt = run.font.size.pt
                            break
                    if first_size_pt:
                        break
                if first_size_pt and w_in > 0.5 and h_in > 0.2:
                    chars_per_line = max(1, int(w_in * 72 / first_size_pt))
                    est_lines = 0
                    for para in tf.paragraphs:
                        ptxt = (para.text or "").strip()
                        if not ptxt:
                            continue
                        est_lines += max(
                            1,
                            (len(ptxt) + chars_per_line - 1) // chars_per_line
                        )
                    line_height_in = first_size_pt * 1.4 / 72
                    needed_h = est_lines * line_height_in
                    if needed_h > h_in + 0.1:
                        warnings.append(
                            f"第 {idx} 页 {shape_label} 文本可能溢出 "
                            f"(估 {est_lines} 行,需 {needed_h:.2f}in,"
                            f"框高 {h_in:.2f}in): {text[:25]}..."
                        )
            except (AttributeError, TypeError, ValueError):
                pass

            for para in tf.paragraphs:
                ptxt = (para.text or "").strip()
                if not ptxt:
                    continue
                if len(ptxt) > 1 and ptxt != title_text:
                    bullet_count += 1
                for run in para.runs:
                    if run.font.size:
                        if run.font.size < Pt(14):
                            small_font_count += 1
                    if run.font.color and run.font.color.type:
                        try:
                            rgb = run.font.color.rgb
                            if rgb is not None:
                                seen_colors.add(str(rgb))
                        except (AttributeError, KeyError, ValueError):
                            pass

        if title_text is None:
            warnings.append(f"第 {idx} 页缺标题")
        elif len(title_text) > 30:
            warnings.append(
                f"第 {idx} 页标题过长 ({len(title_text)} 字): {title_text[:20]}..."
            )

        if bullet_count > 5:
            warnings.append(
                f"第 {idx} 页 bullet {bullet_count} 条 (上限 5),建议拆页或转图表"
            )

        if small_font_count > 0:
            warnings.append(
                f"第 {idx} 页有 {small_font_count} 处字号 < 14pt,投影看不清"
            )

    if len(seen_colors) > 6:
        warnings.append(
            f"颜色 {len(seen_colors)} 种 (含不同灰阶),理想 ≤ 5;考虑收敛到三色制"
        )

    if spec_colors and seen_colors:
        unmatched = seen_colors - spec_colors
        if len(unmatched) > 3:
            warnings.append(
                f"出现 {len(unmatched)} 个 spec_lock 之外的颜色,可能用了 matplotlib 默认色板"
            )

    return errors, warnings


def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("pptx", type=Path)
    ap.add_argument("--spec", type=Path, default=None,
                    help="spec_lock.md 路径")
    args = ap.parse_args()

    spec = parse_spec(args.spec) if args.spec else {}
    if spec:
        print(f"[info] spec 已加载: {spec}")

    errors, warnings = check_pptx(args.pptx, spec)

    if errors:
        print("\n[errors]")
        for e in errors:
            print(f"  ✗ {e}")
    if warnings:
        print("\n[warnings]")
        for w in warnings:
            print(f"  ! {w}")

    if not errors and not warnings:
        print("\n[ok] 全部通过")
        sys.exit(0)
    sys.exit(2 if errors else 1)


if __name__ == "__main__":
    main()