zcbot/skills/ppt/scripts/quality_check.py

297 lines
10 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""quality_check.py: 验收 .pptx,产出问题清单。
用法:
python quality_check.py <output.pptx> [--spec spec.md]
检查项:
- 文件存在且 > 10KB
- 总页数与 spec 一致 (如提供 spec.md)
- 每页有标题
- 每页 bullet ≤ 5 条
- 文字字号 ≥ 14pt (除页脚)
- 非灰阶(彩色)≤ 3 种 (三色制;文字色 + 形状填充色都计,灰阶/白不计)
- 出现 spec 之外的非灰阶色 (擅自换色 / 非主题色)
- 没有 untitled / output / placeholder 等占位文件名
- **形状不越出画布边界** (left+width / top+height 超界即报)
- **textbox 文本估算行数 > 框高度** —— 推断溢出
退出码:
0 = 全通过
1 = 有 warning
2 = 致命问题 (文件缺失等)
"""
from __future__ import annotations
import argparse
import re
import sys
from pathlib import Path
try:
from pptx import Presentation
from pptx.util import Pt
from pptx.enum.dml import MSO_FILL, MSO_COLOR_TYPE
except ImportError:
print("[fatal] pip install python-pptx", file=sys.stderr)
sys.exit(2)
# ---- 颜色辅助 ----
def _is_neutral(hex6: str) -> bool:
"""灰阶/黑/白判定:R/G/B 极差 ≤ 12 即视为中性色(三色制里不计入彩色)。"""
try:
r, g, b = int(hex6[0:2], 16), int(hex6[2:4], 16), int(hex6[4:6], 16)
except (ValueError, IndexError):
return False
return max(r, g, b) - min(r, g, b) <= 12
def _shape_fill_hex(shape) -> str | None:
"""取形状的纯色填充 hex(大写,无 #)。非实心 / 主题色 / 取不到 → None。"""
try:
fill = shape.fill
if fill.type != MSO_FILL.SOLID:
return None
fc = fill.fore_color
if fc.type != MSO_COLOR_TYPE.RGB: # 主题色访问 .rgb 会抛,先挡掉
return None
return str(fc.rgb).upper()
except (TypeError, AttributeError, KeyError, ValueError):
return None
# ---- spec 解析 (松散 markdown 解析,够用就行) ----
def parse_spec(spec_path: Path) -> dict:
if not spec_path or not spec_path.exists():
return {}
text = spec_path.read_text(encoding="utf-8")
spec: dict = {}
m = re.search(r"页数[:\s]*(\d+)", text)
if m:
spec["page_count"] = int(m.group(1))
m = re.search(r"画布[:\s]*(16:9|4:3|9:16|1:1|3:4)", text)
if m:
spec["canvas"] = m.group(1)
hexes = re.findall(r"#([0-9A-Fa-f]{6})", text)
if hexes:
spec["colors"] = [h.upper() for h in hexes[:5]]
return spec
# ---- 检查 ----
def check_pptx(path: Path, spec: dict) -> tuple[list, list]:
"""returns (errors, warnings)"""
errors, warnings = [], []
if not path.exists():
errors.append(f"文件不存在: {path}")
return errors, warnings
size_kb = path.stat().st_size / 1024
if size_kb < 10:
errors.append(f"文件太小 ({size_kb:.1f}KB),python-pptx 可能没写完")
name = path.stem.lower()
if name in ("untitled", "output", "presentation", "untitled1", "new", "test"):
warnings.append(
f"文件名 '{path.name}' 太通用,建议按主题命名"
)
prs = Presentation(path)
n_slides = len(prs.slides)
slide_w_in = prs.slide_width / 914400 # EMU → inch
slide_h_in = prs.slide_height / 914400
print(
f"[info] 文件: {path.name} 大小: {size_kb:.1f}KB "
f"页数: {n_slides} 画布: {slide_w_in:.2f}×{slide_h_in:.2f} in"
)
expected = spec.get("page_count")
if expected and n_slides != expected:
warnings.append(f"页数 {n_slides} 与 spec 期望 {expected} 不符")
spec_colors = set(spec.get("colors", []))
seen_colors: set[str] = set()
for idx, slide in enumerate(prs.slides, 1):
title_text = None
bullet_count = 0
small_font_count = 0
for s_i, shape in enumerate(slide.shapes):
# ---- 形状越界检查 (任何 shape) ----
try:
left_in = shape.left / 914400 if shape.left is not None else 0
top_in = shape.top / 914400 if shape.top is not None else 0
w_in = shape.width / 914400 if shape.width is not None else 0
h_in = shape.height / 914400 if shape.height is not None else 0
except (AttributeError, TypeError):
left_in = top_in = w_in = h_in = 0
tol = 0.02 # 0.02 in 容忍 (约 0.5mm)
shape_label = (
shape.name if hasattr(shape, "name") and shape.name
else f"shape#{s_i}"
)
if left_in < -tol or top_in < -tol:
warnings.append(
f"{idx}{shape_label} 起点为负: "
f"({left_in:.2f}, {top_in:.2f})"
)
if left_in + w_in > slide_w_in + tol:
overflow = left_in + w_in - slide_w_in
warnings.append(
f"{idx}{shape_label} 右越界 {overflow:.2f}in "
f"(画布 {slide_w_in:.2f},shape 右 {left_in + w_in:.2f})"
)
if top_in + h_in > slide_h_in + tol:
overflow = top_in + h_in - slide_h_in
warnings.append(
f"{idx}{shape_label} 下越界 {overflow:.2f}in "
f"(画布 {slide_h_in:.2f},shape 底 {top_in + h_in:.2f})"
)
# ---- 形状填充色 (品牌条/徽章/圆点/标签/底块) ----
fill_hex = _shape_fill_hex(shape)
if fill_hex:
seen_colors.add(fill_hex)
if not shape.has_text_frame:
continue
tf = shape.text_frame
text = (tf.text or "").strip()
if not text:
continue
if title_text is None and len(text) <= 40 and "\n" not in text:
title_text = text
# ---- 文本溢出估算 ----
# 估算:中文字号 N pt 在框宽 W in 下,每行约 W*72/N 个中文字
# 非空段落数 + 长段落折行数 ≈ 实际行数
# 行数 × (size_pt * 1.4 / 72) > 框高 → 溢出
try:
first_size_pt = None
for para in tf.paragraphs:
for run in para.runs:
if run.font.size:
first_size_pt = run.font.size.pt
break
if first_size_pt:
break
if first_size_pt and w_in > 0.5 and h_in > 0.2:
chars_per_line = max(1, int(w_in * 72 / first_size_pt))
est_lines = 0
for para in tf.paragraphs:
ptxt = (para.text or "").strip()
if not ptxt:
continue
est_lines += max(
1,
(len(ptxt) + chars_per_line - 1) // chars_per_line
)
line_height_in = first_size_pt * 1.4 / 72
needed_h = est_lines * line_height_in
if needed_h > h_in + 0.1:
warnings.append(
f"{idx}{shape_label} 文本可能溢出 "
f"(估 {est_lines} 行,需 {needed_h:.2f}in,"
f"框高 {h_in:.2f}in): {text[:25]}..."
)
except (AttributeError, TypeError, ValueError):
pass
for para in tf.paragraphs:
ptxt = (para.text or "").strip()
if not ptxt:
continue
if len(ptxt) > 1 and ptxt != title_text:
bullet_count += 1
for run in para.runs:
if run.font.size:
if run.font.size < Pt(14):
small_font_count += 1
if run.font.color and run.font.color.type:
try:
rgb = run.font.color.rgb
if rgb is not None:
seen_colors.add(str(rgb))
except (AttributeError, KeyError, ValueError):
pass
if title_text is None:
warnings.append(f"{idx} 页缺标题")
elif len(title_text) > 30:
warnings.append(
f"{idx} 页标题过长 ({len(title_text)} 字): {title_text[:20]}..."
)
if bullet_count > 5:
warnings.append(
f"{idx} 页 bullet {bullet_count} 条 (上限 5),建议拆页或转图表"
)
if small_font_count > 0:
warnings.append(
f"{idx} 页有 {small_font_count} 处字号 < 14pt,投影看不清"
)
# 三色制按"非灰阶色"判定:灰/黑/白不计 (design_principles §2「其他全部用灰阶」)
chromatic = {c for c in seen_colors if not _is_neutral(c)}
if len(chromatic) > 3:
warnings.append(
f"非灰阶色 {len(chromatic)} 种 (三色制上限 3): "
f"{', '.join('#' + c for c in sorted(chromatic))};收敛到主/辅/强调三色"
)
if spec_colors:
spec_chromatic = {c for c in spec_colors if not _is_neutral(c)}
extra = chromatic - spec_chromatic
if extra:
warnings.append(
f"出现 spec 之外的非灰阶色 {', '.join('#' + c for c in sorted(extra))};"
f"擅自换色 / 非主题色 (spec 定的是 "
f"{', '.join('#' + c for c in sorted(spec_chromatic))})"
)
return errors, warnings
def main():
ap = argparse.ArgumentParser()
ap.add_argument("pptx", type=Path)
ap.add_argument("--spec", type=Path, default=None,
help="spec.md 路径")
args = ap.parse_args()
spec = parse_spec(args.spec) if args.spec else {}
if spec:
print(f"[info] spec 已加载: {spec}")
errors, warnings = check_pptx(args.pptx, spec)
if errors:
print("\n[errors]")
for e in errors:
print(f"{e}")
if warnings:
print("\n[warnings]")
for w in warnings:
print(f" ! {w}")
if not errors and not warnings:
print("\n[ok] 全部通过")
sys.exit(0)
sys.exit(2 if errors else 1)
if __name__ == "__main__":
main()