"""quality_check.py: 验收 .pptx,产出问题清单。 用法: python quality_check.py [--spec spec.md] 检查项: - 文件存在且 > 10KB - 总页数与 spec 一致 (如提供 spec.md) - 每页有标题 - 每页 bullet ≤ 5 条 - 文字字号 ≥ 14pt (除页脚) - 非灰阶(彩色)≤ 3 种 (三色制;文字色 + 形状填充色都计,灰阶/白不计) - 出现 spec 之外的非灰阶色 (擅自换色 / 非主题色) - 没有 untitled / output / placeholder 等占位文件名 - **形状不越出画布边界** (left+width / top+height 超界即报) - **textbox 文本估算行数 > 框高度** —— 推断溢出 - **内容形状互相重叠** (文字压文字 / 文字压图标 / 图标压图标;装饰填充不计) 退出码: 0 = 全通过 1 = 有 warning 2 = 致命问题 (文件缺失等) """ from __future__ import annotations import argparse import re import sys from pathlib import Path try: from pptx import Presentation from pptx.util import Pt from pptx.enum.dml import MSO_FILL, MSO_COLOR_TYPE from pptx.enum.shapes import MSO_SHAPE_TYPE except ImportError: print("[fatal] pip install python-pptx", file=sys.stderr) sys.exit(2) # ---- 重叠检测参数 ---- # 只检"内容形状"(有文字 / 图片)两两重叠 —— 装饰形状(无文字纯色填充:品牌条/分隔线/ # 圆点/色块标签/装饰星箭头)天然不算内容,不参与;"文字叠在色块上"也不会误报(色块无 # 文字)。要抓的是文字压文字 / 文字压图标 / 图标压图标这类真缺陷。 _OVERLAP_MIN_DIM = 0.08 # in:交叠的宽和高都需超过此值(滤掉边缘贴合/发丝线) _OVERLAP_MIN_RATIO = 0.25 # 交叠面积 / 较小形状面积 超过此比例才算"压住" # ---- 颜色辅助 ---- def _is_neutral(hex6: str) -> bool: """灰阶/黑/白判定:R/G/B 极差 ≤ 12 即视为中性色(三色制里不计入彩色)。""" try: r, g, b = int(hex6[0:2], 16), int(hex6[2:4], 16), int(hex6[4:6], 16) except (ValueError, IndexError): return False return max(r, g, b) - min(r, g, b) <= 12 def _shape_fill_hex(shape) -> str | None: """取形状的纯色填充 hex(大写,无 #)。非实心 / 主题色 / 取不到 → None。""" try: fill = shape.fill if fill.type != MSO_FILL.SOLID: return None fc = fill.fore_color if fc.type != MSO_COLOR_TYPE.RGB: # 主题色访问 .rgb 会抛,先挡掉 return None return str(fc.rgb).upper() except (TypeError, AttributeError, KeyError, ValueError): return None # ---- spec 解析 (松散 markdown 解析,够用就行) ---- def parse_spec(spec_path: Path) -> dict: if not spec_path or not spec_path.exists(): return {} text = spec_path.read_text(encoding="utf-8") spec: dict = {} m = re.search(r"页数[:\s]*(\d+)", text) if m: spec["page_count"] = int(m.group(1)) m = re.search(r"画布[:\s]*(16:9|4:3|9:16|1:1|3:4)", text) if m: spec["canvas"] = m.group(1) hexes = re.findall(r"#([0-9A-Fa-f]{6})", text) if hexes: spec["colors"] = [h.upper() for h in hexes[:5]] return spec # ---- 检查 ---- def check_pptx(path: Path, spec: dict) -> tuple[list, list]: """returns (errors, warnings)""" errors, warnings = [], [] if not path.exists(): errors.append(f"文件不存在: {path}") return errors, warnings size_kb = path.stat().st_size / 1024 if size_kb < 10: errors.append(f"文件太小 ({size_kb:.1f}KB),python-pptx 可能没写完") name = path.stem.lower() if name in ("untitled", "output", "presentation", "untitled1", "new", "test"): warnings.append( f"文件名 '{path.name}' 太通用,建议按主题命名" ) prs = Presentation(path) n_slides = len(prs.slides) slide_w_in = prs.slide_width / 914400 # EMU → inch slide_h_in = prs.slide_height / 914400 print( f"[info] 文件: {path.name} 大小: {size_kb:.1f}KB " f"页数: {n_slides} 画布: {slide_w_in:.2f}×{slide_h_in:.2f} in" ) expected = spec.get("page_count") if expected and n_slides != expected: warnings.append(f"页数 {n_slides} 与 spec 期望 {expected} 不符") spec_colors = set(spec.get("colors", [])) seen_colors: set[str] = set() for idx, slide in enumerate(prs.slides, 1): title_text = None bullet_count = 0 small_font_count = 0 content_shapes: list = [] # (l, t, w, h, label, head) — 有文字 / 图片的形状 for s_i, shape in enumerate(slide.shapes): # ---- 形状越界检查 (任何 shape) ---- try: left_in = shape.left / 914400 if shape.left is not None else 0 top_in = shape.top / 914400 if shape.top is not None else 0 w_in = shape.width / 914400 if shape.width is not None else 0 h_in = shape.height / 914400 if shape.height is not None else 0 except (AttributeError, TypeError): left_in = top_in = w_in = h_in = 0 tol = 0.02 # 0.02 in 容忍 (约 0.5mm) shape_label = ( shape.name if hasattr(shape, "name") and shape.name else f"shape#{s_i}" ) if left_in < -tol or top_in < -tol: warnings.append( f"第 {idx} 页 {shape_label} 起点为负: " f"({left_in:.2f}, {top_in:.2f})" ) if left_in + w_in > slide_w_in + tol: overflow = left_in + w_in - slide_w_in warnings.append( f"第 {idx} 页 {shape_label} 右越界 {overflow:.2f}in " f"(画布 {slide_w_in:.2f},shape 右 {left_in + w_in:.2f})" ) if top_in + h_in > slide_h_in + tol: overflow = top_in + h_in - slide_h_in warnings.append( f"第 {idx} 页 {shape_label} 下越界 {overflow:.2f}in " f"(画布 {slide_h_in:.2f},shape 底 {top_in + h_in:.2f})" ) # ---- 形状填充色 (品牌条/徽章/圆点/标签/底块) ---- fill_hex = _shape_fill_hex(shape) if fill_hex: seen_colors.add(fill_hex) # ---- 收集"内容形状"供重叠检测 (有文字 / 图片) ---- try: is_pic = shape.shape_type == MSO_SHAPE_TYPE.PICTURE except (AttributeError, ValueError): is_pic = False head = "" if shape.has_text_frame: head = (shape.text_frame.text or "").strip() if (head or is_pic) and w_in > 0.05 and h_in > 0.05: content_shapes.append( (left_in, top_in, w_in, h_in, shape_label, head[:18] if head else "[图片]") ) if not shape.has_text_frame: continue tf = shape.text_frame text = (tf.text or "").strip() if not text: continue if title_text is None and len(text) <= 40 and "\n" not in text: title_text = text # ---- 文本溢出估算 ---- # 估算:中文字号 N pt 在框宽 W in 下,每行约 W*72/N 个中文字 # 非空段落数 + 长段落折行数 ≈ 实际行数 # 行数 × (size_pt * 1.4 / 72) > 框高 → 溢出 try: first_size_pt = None for para in tf.paragraphs: for run in para.runs: if run.font.size: first_size_pt = run.font.size.pt break if first_size_pt: break if first_size_pt and w_in > 0.5 and h_in > 0.2: chars_per_line = max(1, int(w_in * 72 / first_size_pt)) est_lines = 0 for para in tf.paragraphs: ptxt = (para.text or "").strip() if not ptxt: continue est_lines += max( 1, (len(ptxt) + chars_per_line - 1) // chars_per_line ) line_height_in = first_size_pt * 1.4 / 72 needed_h = est_lines * line_height_in if needed_h > h_in + 0.1: warnings.append( f"第 {idx} 页 {shape_label} 文本可能溢出 " f"(估 {est_lines} 行,需 {needed_h:.2f}in," f"框高 {h_in:.2f}in): {text[:25]}..." ) except (AttributeError, TypeError, ValueError): pass for para in tf.paragraphs: ptxt = (para.text or "").strip() if not ptxt: continue if len(ptxt) > 1 and ptxt != title_text: bullet_count += 1 for run in para.runs: if run.font.size: if run.font.size < Pt(14): small_font_count += 1 if run.font.color and run.font.color.type: try: rgb = run.font.color.rgb if rgb is not None: seen_colors.add(str(rgb)) except (AttributeError, KeyError, ValueError): pass if title_text is None: warnings.append(f"第 {idx} 页缺标题") elif len(title_text) > 30: warnings.append( f"第 {idx} 页标题过长 ({len(title_text)} 字): {title_text[:20]}..." ) if bullet_count > 5: warnings.append( f"第 {idx} 页 bullet {bullet_count} 条 (上限 5),建议拆页或转图表" ) if small_font_count > 0: warnings.append( f"第 {idx} 页有 {small_font_count} 处字号 < 14pt,投影看不清" ) # ---- 内容形状两两重叠 (文字压文字 / 文字压图标 / 图标压图标) ---- for i in range(len(content_shapes)): ax, ay, aw, ah, alab, ahead = content_shapes[i] for j in range(i + 1, len(content_shapes)): bx, by, bw, bh, blab, bhead = content_shapes[j] ix = min(ax + aw, bx + bw) - max(ax, bx) iy = min(ay + ah, by + bh) - max(ay, by) if ix <= _OVERLAP_MIN_DIM or iy <= _OVERLAP_MIN_DIM: continue min_area = min(aw * ah, bw * bh) if min_area <= 0: continue ratio = (ix * iy) / min_area if ratio >= _OVERLAP_MIN_RATIO: warnings.append( f"第 {idx} 页 内容重叠 {ratio * 100:.0f}%: " f'{alab}("{ahead}") × {blab}("{bhead}")' ) # 三色制按"非灰阶色"判定:灰/黑/白不计 (design_principles §2「其他全部用灰阶」) chromatic = {c for c in seen_colors if not _is_neutral(c)} if len(chromatic) > 3: warnings.append( f"非灰阶色 {len(chromatic)} 种 (三色制上限 3): " f"{', '.join('#' + c for c in sorted(chromatic))};收敛到主/辅/强调三色" ) if spec_colors: spec_chromatic = {c for c in spec_colors if not _is_neutral(c)} extra = chromatic - spec_chromatic if extra: warnings.append( f"出现 spec 之外的非灰阶色 {', '.join('#' + c for c in sorted(extra))};" f"擅自换色 / 非主题色 (spec 定的是 " f"{', '.join('#' + c for c in sorted(spec_chromatic))})" ) return errors, warnings def main(): ap = argparse.ArgumentParser() ap.add_argument("pptx", type=Path) ap.add_argument("--spec", type=Path, default=None, help="spec.md 路径") args = ap.parse_args() spec = parse_spec(args.spec) if args.spec else {} if spec: print(f"[info] spec 已加载: {spec}") errors, warnings = check_pptx(args.pptx, spec) if errors: print("\n[errors]") for e in errors: print(f" ✗ {e}") if warnings: print("\n[warnings]") for w in warnings: print(f" ! {w}") if not errors and not warnings: print("\n[ok] 全部通过") sys.exit(0) sys.exit(2 if errors else 1) if __name__ == "__main__": main()