399 lines
16 KiB
Python
399 lines
16 KiB
Python
"""quality_check.py: 验收 .pptx,产出问题清单。
|
||
|
||
用法:
|
||
python quality_check.py <output.pptx> [--spec spec.md]
|
||
|
||
检查项:
|
||
- 文件存在且 > 10KB
|
||
- 总页数与 spec 一致 (如提供 spec.md)
|
||
- 每页有标题
|
||
- 每页 bullet ≤ 5 条
|
||
- 文字字号 ≥ 14pt (除页脚)
|
||
- 非灰阶(彩色)≤ 3 种 (三色制;文字色 + 形状填充色都计,灰阶/白不计)
|
||
- 出现 spec 之外的非灰阶色 (擅自换色 / 非主题色)
|
||
- 没有 untitled / output / placeholder 等占位文件名
|
||
- **形状不越出画布边界** (left+width / top+height 超界即报)
|
||
- **textbox 文本估算行数 > 框高度** —— 推断溢出
|
||
- **内容形状互相重叠** (文字压文字 / 文字压图标 / 图标压图标;装饰填充不计)
|
||
|
||
退出码:
|
||
0 = 全通过
|
||
1 = 有 warning
|
||
2 = 致命问题 (文件缺失等)
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import argparse
|
||
import colorsys
|
||
import re
|
||
import sys
|
||
from pathlib import Path
|
||
|
||
try:
|
||
from pptx import Presentation
|
||
from pptx.util import Pt
|
||
from pptx.enum.dml import MSO_FILL, MSO_COLOR_TYPE
|
||
from pptx.enum.shapes import MSO_SHAPE_TYPE
|
||
except ImportError:
|
||
print("[fatal] pip install python-pptx", file=sys.stderr)
|
||
sys.exit(2)
|
||
|
||
|
||
# ---- 重叠检测参数 ----
|
||
# 只检"内容形状"(有文字 / 图片)两两重叠 —— 装饰形状(无文字纯色填充:品牌条/分隔线/
|
||
# 圆点/色块标签/装饰星箭头)天然不算内容,不参与;"文字叠在色块上"也不会误报(色块无
|
||
# 文字)。要抓的是文字压文字 / 文字压图标 / 图标压图标这类真缺陷。
|
||
_OVERLAP_MIN_DIM = 0.08 # in:交叠的宽和高都需超过此值(滤掉边缘贴合/发丝线)
|
||
_OVERLAP_MIN_RATIO = 0.25 # 交叠面积 / 较小形状面积 超过此比例才算"压住"
|
||
|
||
|
||
# ---- 颜色辅助 ----
|
||
# 三色制按"色系数"判定,不是"hex 数":主/辅常同色系,主色的明暗阶(深红 #8A0000)、
|
||
# 浅底(wash/soft tint #F2CCCC)都从那三色派生,不该被算成"新色"。所以:
|
||
# - 低饱和的浅色/灰阶 → 中性(卡片底、wash 底),不计入彩色
|
||
# - 高饱和的算"彩色",但按色相(hue)归桶 —— 同色系(红的深浅)收敛成一个
|
||
# 这样"白底+红卡片+深红渐变+金强调"= 2 个色系,不会误报超 3 色。
|
||
|
||
def _hsv(hex6: str):
|
||
r, g, b = (int(hex6[0:2], 16) / 255, int(hex6[2:4], 16) / 255,
|
||
int(hex6[4:6], 16) / 255)
|
||
return colorsys.rgb_to_hsv(r, g, b) # h,s,v ∈ [0,1]
|
||
|
||
|
||
def _is_chromatic(hex6: str) -> bool:
|
||
"""是否计入"彩色"。低饱和(浅底/wash/灰阶)或近黑 → 中性,不计。"""
|
||
try:
|
||
_h, s, v = _hsv(hex6)
|
||
except (ValueError, IndexError):
|
||
return False
|
||
return s >= 0.30 and v >= 0.18
|
||
|
||
|
||
def _hue_family(hex6: str) -> int:
|
||
"""色相归桶(30° 一桶)。同色系的深浅落同一桶,收敛成一个色。"""
|
||
h, _s, _v = _hsv(hex6)
|
||
return int((h * 360) // 30)
|
||
|
||
|
||
def _is_neutral(hex6: str) -> bool:
|
||
"""保留旧名:非彩色(中性)= 不计入三色制。"""
|
||
return not _is_chromatic(hex6)
|
||
|
||
|
||
# 标签类形状名:这些天然用小字号(eyebrow/胶囊/页脚/数据来源/KPI 小注),
|
||
# 不参与"字号 < 14pt"与"bullet ≤ 5"的统计 —— 它们不是正文 bullet。
|
||
_LABEL_NAME_RE = re.compile(
|
||
r"(pill|eyebrow|footer|page_num|source|meta|_sub|kpi_sub|badge|tag|label)",
|
||
re.IGNORECASE,
|
||
)
|
||
# bullet 类形状名:真正的要点列表才计入 bullet 数。
|
||
_BULLET_NAME_RE = re.compile(r"(bullet|_pt_|agenda|list|item)", re.IGNORECASE)
|
||
|
||
|
||
def _shape_fill_hex(shape) -> str | None:
|
||
"""取形状的纯色填充 hex(大写,无 #)。非实心 / 主题色 / 取不到 → None。"""
|
||
try:
|
||
fill = shape.fill
|
||
if fill.type != MSO_FILL.SOLID:
|
||
return None
|
||
fc = fill.fore_color
|
||
if fc.type != MSO_COLOR_TYPE.RGB: # 主题色访问 .rgb 会抛,先挡掉
|
||
return None
|
||
return str(fc.rgb).upper()
|
||
except (TypeError, AttributeError, KeyError, ValueError):
|
||
return None
|
||
|
||
|
||
# ---- spec 解析 (松散 markdown 解析,够用就行) ----
|
||
|
||
def parse_spec(spec_path: Path) -> dict:
|
||
if not spec_path or not spec_path.exists():
|
||
return {}
|
||
text = spec_path.read_text(encoding="utf-8")
|
||
spec: dict = {}
|
||
|
||
m = re.search(r"页数[:\s]*(\d+)", text)
|
||
if m:
|
||
spec["page_count"] = int(m.group(1))
|
||
|
||
m = re.search(r"画布[:\s]*(16:9|4:3|9:16|1:1|3:4)", text)
|
||
if m:
|
||
spec["canvas"] = m.group(1)
|
||
|
||
hexes = re.findall(r"#([0-9A-Fa-f]{6})", text)
|
||
if hexes:
|
||
spec["colors"] = [h.upper() for h in hexes[:5]]
|
||
|
||
return spec
|
||
|
||
|
||
# ---- 检查 ----
|
||
|
||
def check_pptx(path: Path, spec: dict) -> tuple[list, list]:
|
||
"""returns (errors, warnings)"""
|
||
errors, warnings = [], []
|
||
|
||
if not path.exists():
|
||
errors.append(f"文件不存在: {path}")
|
||
return errors, warnings
|
||
|
||
size_kb = path.stat().st_size / 1024
|
||
if size_kb < 10:
|
||
errors.append(f"文件太小 ({size_kb:.1f}KB),python-pptx 可能没写完")
|
||
|
||
name = path.stem.lower()
|
||
if name in ("untitled", "output", "presentation", "untitled1", "new", "test"):
|
||
warnings.append(
|
||
f"文件名 '{path.name}' 太通用,建议按主题命名"
|
||
)
|
||
|
||
prs = Presentation(path)
|
||
n_slides = len(prs.slides)
|
||
slide_w_in = prs.slide_width / 914400 # EMU → inch
|
||
slide_h_in = prs.slide_height / 914400
|
||
print(
|
||
f"[info] 文件: {path.name} 大小: {size_kb:.1f}KB "
|
||
f"页数: {n_slides} 画布: {slide_w_in:.2f}×{slide_h_in:.2f} in"
|
||
)
|
||
|
||
expected = spec.get("page_count")
|
||
if expected and n_slides != expected:
|
||
warnings.append(f"页数 {n_slides} 与 spec 期望 {expected} 不符")
|
||
|
||
spec_colors = set(spec.get("colors", []))
|
||
seen_colors: set[str] = set()
|
||
|
||
for idx, slide in enumerate(prs.slides, 1):
|
||
title_text = None
|
||
small_font_count = 0
|
||
bullet_xs: list = [] # 每个 bullet 项的 x 中心 —— 末尾按列分组判 ≤5
|
||
content_shapes: list = [] # (l, t, w, h, label, head) — 有文字 / 图片的形状
|
||
|
||
for s_i, shape in enumerate(slide.shapes):
|
||
# ---- 形状越界检查 (任何 shape) ----
|
||
try:
|
||
left_in = shape.left / 914400 if shape.left is not None else 0
|
||
top_in = shape.top / 914400 if shape.top is not None else 0
|
||
w_in = shape.width / 914400 if shape.width is not None else 0
|
||
h_in = shape.height / 914400 if shape.height is not None else 0
|
||
except (AttributeError, TypeError):
|
||
left_in = top_in = w_in = h_in = 0
|
||
|
||
tol = 0.02 # 0.02 in 容忍 (约 0.5mm)
|
||
shape_label = (
|
||
shape.name if hasattr(shape, "name") and shape.name
|
||
else f"shape#{s_i}"
|
||
)
|
||
if left_in < -tol or top_in < -tol:
|
||
warnings.append(
|
||
f"第 {idx} 页 {shape_label} 起点为负: "
|
||
f"({left_in:.2f}, {top_in:.2f})"
|
||
)
|
||
if left_in + w_in > slide_w_in + tol:
|
||
overflow = left_in + w_in - slide_w_in
|
||
warnings.append(
|
||
f"第 {idx} 页 {shape_label} 右越界 {overflow:.2f}in "
|
||
f"(画布 {slide_w_in:.2f},shape 右 {left_in + w_in:.2f})"
|
||
)
|
||
if top_in + h_in > slide_h_in + tol:
|
||
overflow = top_in + h_in - slide_h_in
|
||
warnings.append(
|
||
f"第 {idx} 页 {shape_label} 下越界 {overflow:.2f}in "
|
||
f"(画布 {slide_h_in:.2f},shape 底 {top_in + h_in:.2f})"
|
||
)
|
||
|
||
# ---- 形状填充色 (品牌条/徽章/圆点/标签/底块) ----
|
||
fill_hex = _shape_fill_hex(shape)
|
||
if fill_hex:
|
||
seen_colors.add(fill_hex)
|
||
|
||
# ---- 收集"内容形状"供重叠检测 (有文字 / 图片) ----
|
||
try:
|
||
is_pic = shape.shape_type == MSO_SHAPE_TYPE.PICTURE
|
||
except (AttributeError, ValueError):
|
||
is_pic = False
|
||
head = ""
|
||
if shape.has_text_frame:
|
||
head = (shape.text_frame.text or "").strip()
|
||
if (head or is_pic) and w_in > 0.05 and h_in > 0.05:
|
||
content_shapes.append(
|
||
(left_in, top_in, w_in, h_in, shape_label,
|
||
head[:18] if head else "[图片]")
|
||
)
|
||
|
||
if not shape.has_text_frame:
|
||
continue
|
||
tf = shape.text_frame
|
||
text = (tf.text or "").strip()
|
||
if not text:
|
||
continue
|
||
|
||
if title_text is None and len(text) <= 40 and "\n" not in text:
|
||
title_text = text
|
||
|
||
# ---- 文本溢出估算 ----
|
||
# 估算:中文字号 N pt 在框宽 W in 下,每行约 W*72/N 个中文字
|
||
# 非空段落数 + 长段落折行数 ≈ 实际行数
|
||
# 行数 × (size_pt * 1.4 / 72) > 框高 → 溢出
|
||
try:
|
||
first_size_pt = None
|
||
for para in tf.paragraphs:
|
||
for run in para.runs:
|
||
if run.font.size:
|
||
first_size_pt = run.font.size.pt
|
||
break
|
||
if first_size_pt:
|
||
break
|
||
# 大号展示字(标题/KPI 大数字/章节编号 ≥ 40pt)单行短文本,
|
||
# 按"每行字数"估折行会假阳(每行才 1-2 字),跳过 —— 标题长度另有
|
||
# ≤30 字检查兜底。
|
||
if (first_size_pt and first_size_pt < 40
|
||
and w_in > 0.5 and h_in > 0.2):
|
||
chars_per_line = max(1, int(w_in * 72 / first_size_pt))
|
||
est_lines = 0
|
||
for para in tf.paragraphs:
|
||
ptxt = (para.text or "").strip()
|
||
if not ptxt:
|
||
continue
|
||
est_lines += max(
|
||
1,
|
||
(len(ptxt) + chars_per_line - 1) // chars_per_line
|
||
)
|
||
line_height_in = first_size_pt * 1.4 / 72
|
||
needed_h = est_lines * line_height_in
|
||
if needed_h > h_in + 0.1:
|
||
warnings.append(
|
||
f"第 {idx} 页 {shape_label} 文本可能溢出 "
|
||
f"(估 {est_lines} 行,需 {needed_h:.2f}in,"
|
||
f"框高 {h_in:.2f}in): {text[:25]}..."
|
||
)
|
||
except (AttributeError, TypeError, ValueError):
|
||
pass
|
||
|
||
is_label = bool(_LABEL_NAME_RE.search(shape_label))
|
||
is_bullet_shape = bool(_BULLET_NAME_RE.search(shape_label))
|
||
nonempty_paras = [
|
||
p for p in tf.paragraphs if (p.text or "").strip()
|
||
and (p.text or "").strip() != title_text
|
||
]
|
||
# bullet 只统计"真要点列表":名字像 bullet 的,或一个框里 ≥2 段的列表。
|
||
# KPI 卡 / 卡片标题 / 胶囊这类结构化短文本(单段、非 bullet 名)不算 bullet,
|
||
# 否则一页 4 张 KPI 卡会被误报成 "12 条 bullet"。
|
||
if not is_label and (is_bullet_shape or len(nonempty_paras) >= 2):
|
||
cx = left_in + w_in / 2 # x 中心,供按列分组
|
||
bullet_xs.extend([cx] * len(nonempty_paras))
|
||
|
||
for para in tf.paragraphs:
|
||
ptxt = (para.text or "").strip()
|
||
if not ptxt:
|
||
continue
|
||
for run in para.runs:
|
||
# 标签类(eyebrow/胶囊/页脚/小注)天然小字,不算"投影看不清"
|
||
if run.font.size and not is_label:
|
||
if run.font.size < Pt(14):
|
||
small_font_count += 1
|
||
if run.font.color and run.font.color.type:
|
||
try:
|
||
rgb = run.font.color.rgb
|
||
if rgb is not None:
|
||
seen_colors.add(str(rgb))
|
||
except (AttributeError, KeyError, ValueError):
|
||
pass
|
||
|
||
if title_text is None:
|
||
warnings.append(f"第 {idx} 页缺标题")
|
||
elif len(title_text) > 30:
|
||
warnings.append(
|
||
f"第 {idx} 页标题过长 ({len(title_text)} 字): {title_text[:20]}..."
|
||
)
|
||
|
||
# bullet ≤5 按"列"判:双栏对比天生左 3 + 右 3,不该当整页 6 条报。
|
||
# 按 slide 中线把 bullet 分左右两列,任一列 > 5 才警告(单列列表也走这条)。
|
||
mid = slide_w_in / 2
|
||
left_n = sum(1 for x in bullet_xs if x < mid)
|
||
right_n = len(bullet_xs) - left_n
|
||
max_col = max(left_n, right_n)
|
||
if max_col > 5:
|
||
warnings.append(
|
||
f"第 {idx} 页单列 bullet {max_col} 条 (上限 5),建议拆页或转图表"
|
||
)
|
||
|
||
if small_font_count > 0:
|
||
warnings.append(
|
||
f"第 {idx} 页有 {small_font_count} 处字号 < 14pt,投影看不清"
|
||
)
|
||
|
||
# ---- 内容形状两两重叠 (文字压文字 / 文字压图标 / 图标压图标) ----
|
||
for i in range(len(content_shapes)):
|
||
ax, ay, aw, ah, alab, ahead = content_shapes[i]
|
||
for j in range(i + 1, len(content_shapes)):
|
||
bx, by, bw, bh, blab, bhead = content_shapes[j]
|
||
ix = min(ax + aw, bx + bw) - max(ax, bx)
|
||
iy = min(ay + ah, by + bh) - max(ay, by)
|
||
if ix <= _OVERLAP_MIN_DIM or iy <= _OVERLAP_MIN_DIM:
|
||
continue
|
||
min_area = min(aw * ah, bw * bh)
|
||
if min_area <= 0:
|
||
continue
|
||
ratio = (ix * iy) / min_area
|
||
if ratio >= _OVERLAP_MIN_RATIO:
|
||
warnings.append(
|
||
f"第 {idx} 页 内容重叠 {ratio * 100:.0f}%: "
|
||
f'{alab}("{ahead}") × {blab}("{bhead}")'
|
||
)
|
||
|
||
# 三色制按"色系数"判定:同色系深浅(主色/深红渐变/浅红卡片底)收敛成一桶,
|
||
# 低饱和浅色/灰阶不计。这样卡片式设计的派生色阶不会被误报超 3 色。
|
||
chromatic = {c for c in seen_colors if _is_chromatic(c)}
|
||
families = {_hue_family(c) for c in chromatic}
|
||
if len(families) > 3:
|
||
warnings.append(
|
||
f"彩色色系 {len(families)} 个 (三色制上限 3): "
|
||
f"{', '.join('#' + c for c in sorted(chromatic))};收敛到主/辅/强调三色系"
|
||
)
|
||
|
||
if spec_colors:
|
||
spec_families = {_hue_family(c) for c in spec_colors if _is_chromatic(c)}
|
||
extra = {c for c in chromatic if _hue_family(c) not in spec_families}
|
||
if extra:
|
||
spec_chromatic = {c for c in spec_colors if _is_chromatic(c)}
|
||
warnings.append(
|
||
f"出现 spec 之外的色系 {', '.join('#' + c for c in sorted(extra))};"
|
||
f"擅自换色 / 非主题色 (spec 定的是 "
|
||
f"{', '.join('#' + c for c in sorted(spec_chromatic))})"
|
||
)
|
||
|
||
return errors, warnings
|
||
|
||
|
||
def main():
|
||
ap = argparse.ArgumentParser()
|
||
ap.add_argument("pptx", type=Path)
|
||
ap.add_argument("--spec", type=Path, default=None,
|
||
help="spec.md 路径")
|
||
args = ap.parse_args()
|
||
|
||
spec = parse_spec(args.spec) if args.spec else {}
|
||
if spec:
|
||
print(f"[info] spec 已加载: {spec}")
|
||
|
||
errors, warnings = check_pptx(args.pptx, spec)
|
||
|
||
if errors:
|
||
print("\n[errors]")
|
||
for e in errors:
|
||
print(f" ✗ {e}")
|
||
if warnings:
|
||
print("\n[warnings]")
|
||
for w in warnings:
|
||
print(f" ! {w}")
|
||
|
||
if not errors and not warnings:
|
||
print("\n[ok] 全部通过")
|
||
sys.exit(0)
|
||
sys.exit(2 if errors else 1)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|