zcbot/skills/ppt/scripts/svg_quality_checker.py

1777 lines
76 KiB
Python

#!/usr/bin/env python3
"""
PPT Master - SVG Quality Check Tool
Checks whether SVG files comply with project technical specifications.
Usage:
python3 scripts/svg_quality_checker.py <svg_file>
python3 scripts/svg_quality_checker.py <directory>
python3 scripts/svg_quality_checker.py --all examples
"""
import sys
try: # zcbot: Windows GBK 控制台兼容,避免 emoji/© 等触发 UnicodeEncodeError
sys.stdout.reconfigure(encoding="utf-8", errors="replace")
sys.stderr.reconfigure(encoding="utf-8", errors="replace")
except Exception:
pass
import re
import json
import html
from pathlib import Path
from typing import List, Dict, Tuple
from collections import Counter, defaultdict
from xml.etree import ElementTree as ET
try:
from project_utils import CANVAS_FORMATS
from error_helper import ErrorHelper
except ImportError:
print("Warning: Unable to import dependency modules")
CANVAS_FORMATS = {}
ErrorHelper = None
try:
from update_spec import parse_lock as _parse_spec_lock
except ImportError:
_parse_spec_lock = None # spec_lock drift check will be skipped
try:
from svg_to_pptx.animation_config import (
load_animation_config as _load_animation_config,
validate_animation_config as _validate_animation_config,
)
except ImportError:
_load_animation_config = None
_validate_animation_config = None
HEX_VALUE_RE = re.compile(r"#[0-9A-Fa-f]{3,8}")
SVG_NS = "http://www.w3.org/2000/svg"
# Ramp envelope for font-size drift detection.
# From design_spec_reference.md §IV — Font Size Hierarchy: the ramp spans
# from page-number floor (0.5x body) to cover-title ceiling (5.0x body).
# Intermediate px values within this envelope are permitted per
# executor-base.md §2.1 ("Executor may use an intermediate size ... provided
# the size's ratio to body falls within the corresponding role's band"); only
# values outside every band — i.e. outside this envelope — are drift.
RAMP_MIN_RATIO = 0.5
RAMP_MAX_RATIO = 5.0
# Modes / visual styles that legitimately use unbounded hero / poster type
# (huge cover numerals, act dividers, single-number reveals). For these the
# size-drift upper bound is dropped — the oversize is the design, not Executor
# drift. The lower bound still applies.
POSTER_SIZE_MODES = {'showcase'}
POSTER_SIZE_STYLES = {'zine'}
def _design_spec_is_brand(spec_path: Path) -> bool:
"""Return True when a design_spec.md frontmatter declares ``kind: brand``.
Lightweight detector that does not require PyYAML — scans only the
frontmatter block (``---`` delimited) for a ``kind:`` line whose value
contains ``brand``. Used by ``check_directory`` to skip SVG validation
on brand-only template directories.
"""
try:
text = spec_path.read_text(encoding='utf-8')
except OSError:
return False
if not text.startswith('---\n'):
return False
end = text.find('\n---\n', 4)
if end == -1:
return False
fm_block = text[4:end]
for line in fm_block.splitlines():
stripped = line.strip()
if stripped.startswith('kind:'):
value = stripped.split(':', 1)[1].strip().strip('"\'')
return value == 'brand'
return False
def _parse_placeholders_fallback(block: str) -> Dict[str, Tuple[str, ...]]:
"""Tiny YAML-free reader for the documented ``placeholders:`` shape.
Used only when PyYAML is unavailable. Recognized lines (indentation-aware,
two-space indent assumed):
.. code-block:: yaml
placeholders:
01_cover: ["{{TITLE}}", "{{LOGO}}"]
03_content: []
03a_content_two_col:
- "{{LEFT_TITLE}}"
- "{{RIGHT_TITLE}}"
Anything outside this minimal grammar is silently skipped — designers who
rely on advanced YAML should install pyyaml.
"""
out: Dict[str, Tuple[str, ...]] = {}
inline_re = re.compile(
r"^\s{2}([A-Za-z0-9_]+)\s*:\s*\[(.*)\]\s*$"
)
empty_re = re.compile(r"^\s{2}([A-Za-z0-9_]+)\s*:\s*\[\s*\]\s*$")
block_header_re = re.compile(r"^\s{2}([A-Za-z0-9_]+)\s*:\s*$")
item_re = re.compile(r'^\s{4}-\s*"?([^"]+)"?\s*$')
in_section = False
current_block_key: str | None = None
current_items: List[str] = []
def _flush_block() -> None:
nonlocal current_block_key, current_items
if current_block_key is not None:
out[current_block_key] = tuple(current_items)
current_block_key = None
current_items = []
for line in block.splitlines():
if line.startswith("placeholders:"):
in_section = True
continue
if not in_section:
continue
# End of section: dedent to a non-key line.
if line and not line.startswith(" "):
_flush_block()
in_section = False
continue
if current_block_key is not None:
m = item_re.match(line)
if m:
value = m.group(1).strip().strip('"').strip("'")
if value:
current_items.append(value)
continue
# Block ended.
_flush_block()
if empty_re.match(line):
key = empty_re.match(line).group(1)
out[key] = ()
continue
m = inline_re.match(line)
if m:
key, raw = m.group(1), m.group(2)
items = [p.strip().strip('"').strip("'") for p in raw.split(",")]
out[key] = tuple(item for item in items if item)
continue
m = block_header_re.match(line)
if m:
current_block_key = m.group(1)
current_items = []
continue
_flush_block()
return out
class SVGQualityChecker:
"""SVG quality checker"""
# Default placeholder convention per page-type prefix. This is a *hint*,
# not a hard contract: templates may define their own placeholder vocabulary
# via `placeholders:` in design_spec.md frontmatter (see
# references/template-designer.md §4). Missing default placeholders surface
# as warnings, never errors — designers may legitimately swap
# `{{THANK_YOU}}` for `{{CLOSING_MESSAGE}}`, omit `{{DATE}}` when irrelevant,
# or build content variants with bespoke slot vocabularies.
#
# Variants reuse the parent type's expectation (`03a_content_two_col.svg`
# is matched by the same `03_content` rules as `03_content.svg`).
DEFAULT_PLACEHOLDER_CONVENTION = {
"01_cover": ("{{TITLE}}",), # only the title is universally expected
"02_chapter": ("{{CHAPTER_TITLE}}",),
"02_toc": (), # TOC layouts vary too widely to assert anything
"03_content": ("{{PAGE_TITLE}}",),
"04_ending": (), # ending pages legitimately use varied vocabularies
}
def __init__(self, *, template_mode: bool = False):
self.template_mode = template_mode
self.results = []
self.summary = {
'total': 0,
'passed': 0,
'warnings': 0,
'errors': 0
}
self.issue_types = defaultdict(int)
# spec_lock drift state (populated only when _parse_spec_lock is available
# and a spec_lock.md is found near the SVG)
self._lock_cache: Dict[Path, Dict] = {}
self._drift_summary: Dict[str, Dict[str, set]] = {
'colors': defaultdict(set),
'fonts': defaultdict(set),
'sizes': defaultdict(set),
}
self._lock_seen = False # True once we locate at least one spec_lock.md
self._source_manifest_cache: Dict[Path, Dict] = {}
# Template-mode aggregation (populated by check_directory when
# template_mode=True). Each entry is (severity, kind, message) where
# severity is 'error' or 'warning'. Printed in print_summary.
self._template_issues: List[Tuple[str, str, str]] = []
self._animation_issues: List[Tuple[str, str]] = []
# Icon-usage aggregation (non-template mode). When spec_lock declares an
# icon library + inventory, the strategist intends the deck to use icons.
# The native exporter and finalize both expand <use data-icon> from the
# library, so an authored placeholder reliably becomes a real icon — but
# only if the executor writes one. A deck that locks an inventory yet
# authors ZERO placeholders ships flat/icon-less; this is the missing
# feedback loop that catches the executor silently skipping icons.
self._icon_inventory_declared = False # any page's spec_lock locked icons
self._deck_icon_total = 0 # total <use data-icon> across the deck
self._pages_missing_icons: List[str] = [] # declared-but-icon-less pages
# Visual-richness aggregation (non-template mode). The most common AI-deck
# regression is "wall of text boxes": every page is <text> on <rect> with
# zero diagrams, charts, figures, or imagery. <path>/<polyline>/<polygon>/
# <image> are the unambiguous "this page draws something" primitives —
# rect/line are layout/divider scaffolding and don't count. A content-rich
# deck (>=6 pages, text-heavy) with zero such primitives deck-wide is the
# flat-deck pathology; catch it so it can't ship silently.
self._deck_page_count = 0 # non-template SVG pages checked
self._deck_graphic_total = 0 # path+polyline+polygon+image across deck
self._deck_text_total = 0 # <text> across deck (density signal)
self._pages_no_graphic: List[str] = [] # pages with zero graphic primitives
def check_file(self, svg_file: str, expected_format: str = None) -> Dict:
"""
Check a single SVG file
Args:
svg_file: SVG file path
expected_format: Expected canvas format (e.g., 'ppt169')
Returns:
Check result dictionary
"""
svg_path = Path(svg_file)
if not svg_path.exists():
return {
'file': str(svg_file),
'exists': False,
'errors': ['File does not exist'],
'warnings': [],
'passed': False
}
result = {
'file': svg_path.name,
'path': str(svg_path),
'exists': True,
'errors': [],
'warnings': [],
'info': {},
'passed': True
}
try:
with open(svg_path, 'r', encoding='utf-8') as f:
content = f.read()
# 0. Check XML well-formedness — every other check assumes the file
# is valid XML. Bail early on failure so the regex-based checks
# below don't produce misleading errors on a broken document.
if self._check_xml_well_formed(content, result):
# 1. Check viewBox
self._check_viewbox(content, result, expected_format)
# 2. Check forbidden elements
self._check_forbidden_elements(content, result)
# 3. Check font-size values
self._check_font_size_values(content, result)
# 4. Check fonts
self._check_fonts(content, result)
# 5. Check width/height consistency with viewBox
self._check_dimensions(content, result)
# 6. Check text wrapping methods
self._check_text_elements(content, result)
# 7. Check image references (file existence and resolution)
self._check_image_references(content, svg_path, result)
# 8. Check object-level animation anchor quality.
self._check_animation_group_ids(content, result)
# 8b. Check <pattern> elements declare a PPTX preset.
self._check_pattern_fills(content, result)
# 9. Check spec_lock drift (colors / font-family / font-size).
# Templates do not ship a spec_lock.md, so skip in template
# mode to avoid noise.
if not self.template_mode:
self._check_spec_lock_drift(content, svg_path, result)
# 10. Check web-sourced image attribution. Templates don't carry
# image_sources.json; skip in template mode.
if not self.template_mode:
self._check_sourced_image_attribution(content, svg_path, result)
# 11. Check declared-vs-used icons. Templates don't ship a
# spec_lock.md; skip in template mode.
if not self.template_mode:
self._check_icon_usage(content, svg_path, result)
# 12. Check visual richness (flat text-on-rectangles deck).
if not self.template_mode:
self._check_graphic_richness(content, result)
# Determine pass/fail
result['passed'] = len(result['errors']) == 0
except Exception as e:
result['errors'].append(f"Failed to read file: {e}")
result['passed'] = False
# Update statistics
self.summary['total'] += 1
if result['passed']:
if result['warnings']:
self.summary['warnings'] += 1
else:
self.summary['passed'] += 1
else:
self.summary['errors'] += 1
# Categorize issue types
for error in result['errors']:
self.issue_types[self._categorize_issue(error)] += 1
self.results.append(result)
return result
def _check_xml_well_formed(self, content: str, result: Dict) -> bool:
"""Check that the SVG content parses as well-formed XML.
SVG is strict XML. AI-generated decks frequently produce content that
looks fine in HTML5-tolerant previews but fails strict XML parsing —
common causes are HTML named entities (&nbsp; &mdash; &copy;…) and
bare XML reserved characters in text (R&D, error < 5%). Such pages
cannot be exported to PPTX, so we surface them here as a hard error
before any downstream check looks at them.
Returns True when the document is well-formed; False otherwise.
"""
try:
ET.fromstring(content)
return True
except ET.ParseError as e:
result['errors'].append(
f"Invalid XML: {e} — SVG must be well-formed XML. "
f"Use raw Unicode for typography (—, ©, →, NBSP); "
f"escape XML reserved chars as &amp; &lt; &gt; &quot; &apos; "
f"(see references/shared-standards.md §1)."
)
return False
def _check_viewbox(self, content: str, result: Dict, expected_format: str = None):
"""Check viewBox attribute"""
viewbox_match = re.search(r'viewBox="([^"]+)"', content)
if not viewbox_match:
result['errors'].append("Missing viewBox attribute")
return
viewbox = viewbox_match.group(1)
result['info']['viewbox'] = viewbox
# Check format
if not re.match(r'0 0 \d+ \d+', viewbox):
result['warnings'].append(f"Unusual viewBox format: {viewbox}")
# Check if it matches expected format
if expected_format and expected_format in CANVAS_FORMATS:
expected_viewbox = CANVAS_FORMATS[expected_format]['viewbox']
if viewbox != expected_viewbox:
result['errors'].append(
f"viewBox mismatch: expected '{expected_viewbox}', got '{viewbox}'"
)
def _check_forbidden_elements(self, content: str, result: Dict):
"""Check forbidden elements (blocklist)"""
content_lower = content.lower()
# ============================================================
# Forbidden elements blocklist - PPT incompatible
# ============================================================
# Clipping / masking
# clipPath is allowed on <image> elements and on pptx_to_svg-generated
# nested crop <svg data-pptx-crop="1"> wrappers. Both map back to
# DrawingML picture geometry in the native converter.
if '<clippath' in content_lower:
# clip-path on non-image elements → error
clip_on_non_image = re.search(
r'<(?!image\b)(?!svg\b[^>]*\bdata-pptx-crop\s*=\s*["\']1["\'])\w+[^>]*\bclip-path\s*=',
content,
re.IGNORECASE,
)
if clip_on_non_image:
result['errors'].append(
"clip-path is only allowed on <image> elements or "
"pptx_to_svg crop wrappers — for shapes, draw the target "
"shape directly instead of clipping")
# Check that every clip-path reference has a matching <clipPath> def
clip_refs = re.findall(r'clip-path\s*=\s*["\']url\(#([^)]+)\)', content)
for ref_id in clip_refs:
if f'id="{ref_id}"' not in content and f"id='{ref_id}'" not in content:
result['errors'].append(
f"clip-path references #{ref_id} but no matching "
f"<clipPath id=\"{ref_id}\"> definition found")
if '<mask' in content_lower:
result['errors'].append("Detected forbidden <mask> element (PPT does not support SVG masks)")
# Style system
if '<style' in content_lower:
result['errors'].append("Detected forbidden <style> element (use inline attributes instead)")
if re.search(r'\bclass\s*=', content):
result['errors'].append("Detected forbidden class attribute (use inline styles instead)")
# id attribute: only report error when <style> also exists (id is harmful only with CSS selectors)
# id inside <defs> for linearGradient/filter etc. is required, Inkscape also auto-adds id to elements,
# standalone id attributes have no impact on PPT export
if '<style' in content_lower and re.search(r'\bid\s*=', content):
result['errors'].append(
"Detected id attribute used with <style> (CSS selectors forbidden, use inline styles instead)"
)
if re.search(r'<\?xml-stylesheet\b', content_lower):
result['errors'].append("Detected forbidden xml-stylesheet (external CSS references forbidden)")
if re.search(r'<link[^>]*rel\s*=\s*["\']stylesheet["\']', content_lower):
result['errors'].append("Detected forbidden <link rel=\"stylesheet\"> (external CSS references forbidden)")
if re.search(r'@import\s+', content_lower):
result['errors'].append("Detected forbidden @import (external CSS references forbidden)")
# Structure / nesting
if '<foreignobject' in content_lower:
result['errors'].append(
"Detected forbidden <foreignObject> element (use <tspan> for manual line breaks)")
has_symbol = '<symbol' in content_lower
has_use = re.search(r'<use\b', content_lower) is not None
if has_symbol and has_use:
result['errors'].append("Detected forbidden <symbol> + <use> complex usage (use basic shapes or simple <use> instead)")
# marker-start / marker-end are conditionally allowed (see shared-standards.md §1.1).
# The converter maps qualifying <marker> defs to native DrawingML <a:headEnd>/<a:tailEnd>.
# We only warn when a marker is used without an obvious <defs> definition in the same file.
if re.search(r'\bmarker-(?:start|end)\s*=\s*["\']url\(#([^)]+)\)', content_lower):
if '<marker' not in content_lower:
result['errors'].append(
"Detected marker-start/marker-end referencing a marker id, "
"but no <marker> element found in the file")
# Text / fonts
if '<textpath' in content_lower:
result['errors'].append("Detected forbidden <textPath> element (path text is incompatible with PPT)")
if '@font-face' in content_lower:
result['errors'].append("Detected forbidden @font-face (use system font stack)")
# Animation / interaction
if re.search(r'<animate', content_lower):
result['errors'].append("Detected forbidden SMIL animation element <animate*> (SVG animations are not exported)")
if re.search(r'<set\b', content_lower):
result['errors'].append("Detected forbidden SMIL animation element <set> (SVG animations are not exported)")
if '<script' in content_lower:
result['errors'].append("Detected forbidden <script> element (scripts and event handlers forbidden)")
if re.search(r'\bon\w+\s*=', content): # onclick, onload etc.
result['errors'].append("Detected forbidden event attributes (e.g., onclick, onload)")
# Other discouraged elements
if '<iframe' in content_lower:
result['errors'].append("Detected <iframe> element (should not appear in SVG)")
if re.search(r'rgba\s*\(', content_lower):
result['errors'].append("Detected forbidden rgba() color (use fill-opacity/stroke-opacity instead)")
if re.search(r'<g[^>]*\sopacity\s*=', content_lower):
result['errors'].append("Detected forbidden <g opacity> (set opacity on each child element individually)")
if re.search(r'<image[^>]*\sopacity\s*=', content_lower):
result['errors'].append("Detected forbidden <image opacity> (use overlay mask approach)")
def _check_font_size_values(self, content: str, result: Dict):
"""Require font-size values to be unitless numeric SVG px values."""
numeric_re = re.compile(r'^(?:\d+(?:\.\d+)?|\.\d+)$')
bad_values = set()
for match in re.finditer(r'\bfont-size\s*=\s*(["\'])(.*?)\1', content, re.IGNORECASE):
raw = match.group(2).strip()
if not numeric_re.fullmatch(raw):
bad_values.add(raw)
for match in re.finditer(r'\bfont-size\s*:\s*([^;"\']+)', content, re.IGNORECASE):
raw = match.group(1).strip()
if not numeric_re.fullmatch(raw):
bad_values.add(raw)
if bad_values:
shown_values = sorted(bad_values)
shown = ', '.join(shown_values[:5])
more = len(shown_values) - 5
suffix = f" (+{more} more)" if more > 0 else ""
result['errors'].append(
f"font-size must be a unitless numeric px value; found {shown}{suffix}. "
"Write e.g. font-size=\"28\", never font-size=\"28px\" or \"21pt\"."
)
def _check_fonts(self, content: str, result: Dict):
"""Check font usage.
PPTX stores a single `typeface` per run with no runtime fallback, so every
stack must END with a cross-platform pre-installed family. See
strategist.md §g "PPT-safe font discipline".
"""
font_matches = re.findall(
r'font-family[:\s]*["\']([^"\']+)["\']', content, re.IGNORECASE)
if not font_matches:
return
result['info']['fonts'] = list(set(font_matches))
# Pre-installed on Windows + macOS out of the box (plus their direct
# FONT_FALLBACK_WIN mappings). A stack whose last concrete family is in
# this set survives the PPTX round-trip on any viewer machine.
ppt_safe_tail = {
'microsoft yahei', 'simhei', 'simsun', 'kaiti', 'fangsong',
'dengxian', 'microsoft jhenghei',
'pingfang sc', 'heiti sc', 'songti sc', 'stsong',
'arial', 'arial black', 'calibri', 'segoe ui', 'verdana',
'helvetica', 'helvetica neue', 'tahoma', 'trebuchet ms',
'times new roman', 'times', 'georgia', 'cambria', 'palatino',
'consolas', 'courier new', 'menlo', 'monaco',
'impact',
}
for font_family in font_matches:
# Drop the generic CSS fallback (sans-serif / serif / monospace)
# and inspect the last concrete family.
parts = [p.strip().strip('"').strip("'").lower()
for p in font_family.split(',')]
parts = [p for p in parts
if p and p not in ('sans-serif', 'serif', 'monospace',
'cursive', 'fantasy', 'system-ui')]
if not parts:
continue
tail = parts[-1]
if tail not in ppt_safe_tail:
result['warnings'].append(
f"Font stack does not end on a PPT-safe family "
f"(expected e.g. Microsoft YaHei / SimSun / Arial / "
f"Times New Roman / Consolas): {font_family}"
)
break
def _check_dimensions(self, content: str, result: Dict):
"""Check width/height consistency with viewBox"""
width_match = re.search(r'width="(\d+)"', content)
height_match = re.search(r'height="(\d+)"', content)
if width_match and height_match:
width = width_match.group(1)
height = height_match.group(1)
result['info']['dimensions'] = f"{width}x{height}"
# Check consistency with viewBox
if 'viewbox' in result['info']:
viewbox_parts = result['info']['viewbox'].split()
if len(viewbox_parts) == 4:
vb_width, vb_height = viewbox_parts[2], viewbox_parts[3]
if width != vb_width or height != vb_height:
result['warnings'].append(
f"width/height ({width}x{height}) does not match viewBox "
f"({vb_width}x{vb_height})"
)
def _check_text_elements(self, content: str, result: Dict):
"""Check text elements and wrapping methods"""
# Count text and tspan elements
text_count = content.count('<text')
tspan_count = content.count('<tspan')
result['info']['text_elements'] = text_count
result['info']['tspan_elements'] = tspan_count
# Check for overly long single-line text (may need wrapping)
text_matches = re.findall(r'<text[^>]*>([^<]{100,})</text>', content)
if text_matches:
result['warnings'].append(
f"Detected {len(text_matches)} potentially overly long single-line text(s) (consider using tspan for wrapping)"
)
def _check_image_references(self, content: str, svg_path: Path, result: Dict):
"""Check image file existence and resolution vs display size."""
# Find all <image ...> elements (capture the full tag)
img_tag_pattern = re.compile(r'<image\b([^>]*)/?>', re.IGNORECASE)
svg_dir = svg_path.parent
checked = set()
for tag_match in img_tag_pattern.finditer(content):
attrs = tag_match.group(1)
# Extract href (prefer href over xlink:href)
href_match = (
re.search(r'\bhref="(?!data:)([^"]+)"', attrs) or
re.search(r'\bxlink:href="(?!data:)([^"]+)"', attrs)
)
if not href_match:
continue
href = href_match.group(1)
if href in checked:
continue
checked.add(href)
# Resolve path relative to SVG file directory
img_path = (svg_dir / href).resolve()
if not img_path.exists():
result['errors'].append(
f"Image file not found: {href} (resolved to {img_path})")
continue
# Check resolution vs display size
w_match = re.search(r'\bwidth="([^"]+)"', attrs)
h_match = re.search(r'\bheight="([^"]+)"', attrs)
display_w_str = w_match.group(1) if w_match else None
display_h_str = h_match.group(1) if h_match else None
if not display_w_str or not display_h_str:
continue
try:
display_w = float(display_w_str)
display_h = float(display_h_str)
except (ValueError, TypeError):
continue
try:
from PIL import Image as PILImage
with PILImage.open(img_path) as img:
actual_w, actual_h = img.size
if actual_w < display_w or actual_h < display_h:
result['warnings'].append(
f"Image {href} is {actual_w}x{actual_h} but displayed at "
f"{int(display_w)}x{int(display_h)} — may appear blurry")
elif actual_w > display_w * 4 and actual_h > display_h * 4:
result['warnings'].append(
f"Image {href} is {actual_w}x{actual_h} but displayed at "
f"{int(display_w)}x{int(display_h)} — consider downsizing "
f"to reduce file size")
except ImportError:
pass # PIL not available, skip resolution check
except Exception:
pass # Image unreadable, skip resolution check
def _check_animation_group_ids(self, content: str, result: Dict):
"""Warn when visible top-level groups cannot be customized."""
try:
root = ET.fromstring(content)
except ET.ParseError:
return
non_visual = {'defs', 'title', 'desc', 'metadata', 'style'}
for index, child in enumerate(list(root), start=1):
tag = child.tag.split('}', 1)[-1]
if tag in non_visual:
continue
if tag == 'g' and not child.get('id'):
result['warnings'].append(
f"Top-level visible <g> #{index} has no id; "
"object-level animation config cannot reference it"
)
# OOXML ST_PresetPatternVal enum — anything outside this set produces a
# PPTX schema violation ("PowerPoint found a problem with the content").
_OOXML_PATTERN_PRESETS = frozenset({
'pct5', 'pct10', 'pct20', 'pct25', 'pct30', 'pct40', 'pct50', 'pct60',
'pct70', 'pct75', 'pct80', 'pct90',
'horz', 'vert', 'ltHorz', 'ltVert', 'dkHorz', 'dkVert',
'narHorz', 'narVert', 'dashHorz', 'dashVert',
'cross', 'dnDiag', 'upDiag', 'ltDnDiag', 'ltUpDiag', 'dkDnDiag',
'dkUpDiag', 'wdDnDiag', 'wdUpDiag',
'dashDnDiag', 'dashUpDiag', 'diagCross',
'smCheck', 'lgCheck', 'smGrid', 'lgGrid', 'dotGrid', 'smConfetti',
'lgConfetti', 'horzBrick', 'diagBrick', 'solidDmnd', 'openDmnd',
'dotDmnd', 'plaid', 'sphere', 'weave', 'wave', 'trellis', 'zigZag',
'divot', 'shingle',
})
def _check_pattern_fills(self, content: str, result: Dict):
"""Audit <pattern> defs that drive PPTX <a:pattFill> output.
svg_to_pptx maps <pattern fill> to native <a:pattFill prst="...">. The
preset name comes from `data-pptx-pattern` (e.g. `lgGrid` / `smGrid` /
`dkUpDiag`). Two failure modes worth catching pre-export:
1. Missing annotation → converter silently falls back to `ltUpDiag`
(diagonal stripes) and picks `bg = #FFFFFF` when the pattern has
no child <rect>, turning a hand-authored grid into white-on-stripes
in PPTX.
2. Invalid preset name → PPTX schema rejects the file; PowerPoint
opens it with "needs to be repaired". OOXML
`ST_PresetPatternVal` is a closed enum — only the names in
`_OOXML_PATTERN_PRESETS` are legal. Inventing `ltGrid` (no such
value) is the canonical mistake; the only grids are `smGrid` /
`lgGrid` / `dotGrid`.
"""
try:
root = ET.fromstring(content)
except ET.ParseError:
return
for pattern in root.iter(f'{{{SVG_NS}}}pattern'):
pat_id = pattern.get('id', '<unnamed>')
prst = pattern.get('data-pptx-pattern')
if not prst:
result['warnings'].append(
f"<pattern id=\"{pat_id}\"> has no data-pptx-pattern attribute — "
"PPTX export will fall back to `ltUpDiag` (diagonal stripes), "
"not your custom geometry. Add data-pptx-pattern=\"lgGrid\" / "
"\"smGrid\" / etc. plus a <rect fill=\"<bg>\"/> child so the "
"preset and bg color match your design."
)
continue
if prst not in self._OOXML_PATTERN_PRESETS:
result['errors'].append(
f"<pattern id=\"{pat_id}\"> uses data-pptx-pattern=\"{prst}\" "
"which is not in OOXML ST_PresetPatternVal — exported PPTX "
"will fail schema validation ('needs to be repaired'). "
"Use one of: smGrid / lgGrid / dotGrid (grids), "
"ltUpDiag / dkUpDiag / cross / diagCross / weave / plaid / "
"horzBrick (others); full enum in svg_quality_checker.py "
"_OOXML_PATTERN_PRESETS."
)
def _get_spec_lock(self, svg_path: Path):
"""Locate and parse spec_lock.md near the SVG. Returns dict or None.
Looks in svg_path.parent and svg_path.parent.parent (covers the two
common layouts: SVG directly under <project>/ or under
<project>/svg_output/). Results are cached per lock path.
"""
if _parse_spec_lock is None:
return None
for candidate in (svg_path.parent / 'spec_lock.md',
svg_path.parent.parent / 'spec_lock.md'):
if candidate in self._lock_cache:
return self._lock_cache[candidate]
if candidate.exists():
try:
data = _parse_spec_lock(candidate)
except Exception:
data = None
self._lock_cache[candidate] = data
if data is not None:
self._lock_seen = True
return data
return None
def _check_spec_lock_drift(self, content: str, svg_path: Path, result: Dict):
"""Detect values used in the SVG that fall outside spec_lock.md.
Covers colors (fill / stroke / stop-color), font-family, and font-size.
Emits per-file warnings summarising the drift counts; exact drifting
values are accumulated in self._drift_summary for the end-of-run
aggregation. When spec_lock.md is missing, silently skip (consistent
with executor-base.md §2.1's 'missing lock → warn and proceed' policy).
"""
lock = self._get_spec_lock(svg_path)
if lock is None:
return
# Build allow-sets from the lock
allowed_colors = set()
for v in lock.get('colors', {}).values():
if HEX_VALUE_RE.fullmatch(v):
allowed_colors.add(v.upper())
typo = lock.get('typography', {})
numeric_size_re = re.compile(r'^(?:\d+(?:\.\d+)?|\.\d+)$')
invalid_lock_sizes = []
for k, v in typo.items():
if k == 'font_family' or k.endswith('_family'):
continue
if not numeric_size_re.fullmatch(v.strip()):
invalid_lock_sizes.append(f"{k}: {v}")
if invalid_lock_sizes:
shown = ', '.join(invalid_lock_sizes[:5])
more = len(invalid_lock_sizes) - 5
suffix = f" (+{more} more)" if more > 0 else ""
result['errors'].append(
f"spec_lock typography sizes must be unitless numeric px values; "
f"found {shown}{suffix}."
)
# Font families: default `font_family` plus any per-role `*_family`
# override (title_family / body_family / emphasis_family / code_family,
# per spec_lock_reference.md). Any of these is a legitimate declared
# value; an SVG that uses any one of them is not drifting.
allowed_fonts = set()
if typo:
default_font = typo.get('font_family', '').strip()
if default_font:
allowed_fonts.add(self._normalize_font_stack(default_font))
for k, v in typo.items():
if k == 'font_family' or not k.endswith('_family'):
continue
v_clean = v.strip()
# Skip placeholder text like "same as body (omit if identical)"
if not v_clean or v_clean.lower().startswith('same as'):
continue
allowed_fonts.add(self._normalize_font_stack(v_clean))
# Sizes: declared slots are anchors; body is the ramp baseline.
allowed_sizes = set()
body_px = None
for k, v in typo.items():
if k == 'font_family' or k.endswith('_family'):
continue
allowed_sizes.add(self._normalize_size(v))
if k == 'body':
try:
body_px = float(self._normalize_size(v))
except (ValueError, TypeError):
body_px = None
# Scan SVG for used values
color_drifts = set()
for attr in ('fill', 'stroke', 'stop-color'):
pattern = re.compile(rf'\b{attr}\s*=\s*["\'](#[0-9A-Fa-f]{{3,8}})["\']')
for m in pattern.finditer(content):
val = m.group(1).upper()
if val not in allowed_colors:
color_drifts.add(val)
font_drifts = set()
# Capture to the matching delimiter (group 1) so a double-quoted stack
# containing single-quoted family names is not truncated at the inner quote.
for m in re.finditer(r'font-family\s*=\s*(["\'])(.*?)\1', content):
val = m.group(2).strip()
if allowed_fonts and self._normalize_font_stack(val) not in allowed_fonts:
font_drifts.add(val)
# Poster / showcase contexts use unbounded hero type — drop the ceiling.
mode = (lock.get('mode', {}).get('mode') or '').strip().lower()
vstyle = (lock.get('visual_style', {}).get('visual_style') or '').strip().lower()
max_ratio = (float('inf') if mode in POSTER_SIZE_MODES or vstyle in POSTER_SIZE_STYLES
else RAMP_MAX_RATIO)
size_drifts = set()
used_sizes = []
for m in re.finditer(r'font-size\s*=\s*["\']([^"\']+)["\']', content):
val = self._normalize_size(m.group(1))
used_sizes.append(val)
if not allowed_sizes or val in allowed_sizes:
continue
# Intermediate values are allowed when they sit inside the ramp
# envelope (ratio to body within [RAMP_MIN_RATIO, max_ratio]).
if body_px and body_px > 0:
try:
ratio = float(val) / body_px
if RAMP_MIN_RATIO <= ratio <= max_ratio:
continue
except ValueError:
pass
size_drifts.add(val)
template_size_drift = self._detect_template_size_drift(
used_sizes, allowed_sizes, body_px
)
# Record in run-wide aggregation
fname = svg_path.name
for v in color_drifts:
self._drift_summary['colors'][v].add(fname)
for v in font_drifts:
self._drift_summary['fonts'][v].add(fname)
for v in size_drifts:
self._drift_summary['sizes'][v].add(fname)
# Per-file warning (one condensed line; details live in summary)
parts = []
if color_drifts:
parts.append(f"{len(color_drifts)} color(s)")
if font_drifts:
parts.append(f"{len(font_drifts)} font-family value(s)")
if size_drifts:
parts.append(f"{len(size_drifts)} font-size value(s)")
if parts:
result['warnings'].append(
f"spec_lock drift: {', '.join(parts)} not in spec_lock.md "
"(see drift summary for details)"
)
if template_size_drift:
result['warnings'].append(template_size_drift)
def _detect_template_size_drift(self, used_sizes, allowed_sizes, body_px):
"""Warn when template-like small sizes bypass the locked type ramp.
The normal drift check deliberately permits in-ramp feature sizes, so
it should not hard-fail valid hero numbers or one-off labels. This
warning targets the common executor failure mode: copying a template's
compact 12/15/16px text stack instead of mapping content roles to
spec_lock typography, then reflowing from those locked px values.
"""
if not allowed_sizes or not body_px or body_px <= 0:
return None
try:
declared_min = min(float(v) for v in allowed_sizes)
except ValueError:
declared_min = None
# Stay narrow on purpose: real decks carry legitimate undeclared
# sub-body sizes (intermediate levels, labels, emphasis) just below the
# locked body, so "any size < body" floods the warning and destroys its
# credibility. Only flag values that read as genuine template leftovers
# — at or below `body * 0.75`, or below the smallest declared slot. This
# under-warns (a stray 15/16 against a body of 18 can slip through) in
# exchange for not crying wolf on valid intermediate type.
template_like_limit = body_px * 0.75
template_like_sub_body = []
for raw in used_sizes:
if raw in allowed_sizes:
continue
try:
size = float(raw)
except (TypeError, ValueError):
continue
below_declared_floor = declared_min is not None and size < declared_min
if size <= template_like_limit or below_declared_floor:
template_like_sub_body.append(raw)
if not template_like_sub_body:
return None
counts = Counter(template_like_sub_body)
distinct = sorted(counts, key=lambda v: float(v))
repeated_total = sum(counts.values())
below_declared_floor = []
if declared_min is not None:
below_declared_floor = [v for v in distinct if float(v) < declared_min]
if len(distinct) < 2 and repeated_total < 4 and not below_declared_floor:
return None
sample = ', '.join(
f"{v}x{counts[v]}" if counts[v] > 1 else v
for v in distinct[:5]
)
more = len(distinct) - 5
suffix = f" (+{more} more)" if more > 0 else ""
return (
"possible template font-size drift: undeclared sub-body size(s) "
f"{sample}{suffix}. Map each text item to a spec_lock typography "
"role first, then reflow card height / y / dy / line-height from "
"the locked px values."
)
def _find_image_sources_manifest(self, svg_path: Path) -> Path | None:
"""Locate image_sources.json for a project SVG.
Quality checks run primarily on <project>/svg_output/*.svg, but this
also supports SVGs checked from project root or svg_final.
"""
bases = (svg_path.parent, svg_path.parent.parent, svg_path.parent.parent.parent)
for base in bases:
candidate = base / 'images' / 'image_sources.json'
if candidate.exists():
return candidate
return None
def _load_image_sources_manifest(self, svg_path: Path) -> Dict:
manifest_path = self._find_image_sources_manifest(svg_path)
if manifest_path is None:
return {}
if manifest_path in self._source_manifest_cache:
return self._source_manifest_cache[manifest_path]
try:
payload = json.loads(manifest_path.read_text(encoding='utf-8'))
except (OSError, json.JSONDecodeError):
payload = {}
self._source_manifest_cache[manifest_path] = payload
return payload
def _check_sourced_image_attribution(self, content: str, svg_path: Path, result: Dict):
"""Require visible credit text for attribution-required web images.
image_search.py records the legal tier in images/image_sources.json;
Executor must render compact credit text into the SVG. This check
prevents a quality-first CC BY / CC BY-SA image from silently reaching
export without attribution.
"""
manifest = self._load_image_sources_manifest(svg_path)
items = manifest.get('items') or []
if not items:
return
text_content = html.unescape(re.sub(r'<[^>]+>', ' ', content))
text_content = re.sub(r'\s+', ' ', text_content)
svg_stem = svg_path.stem
for item in items:
if not item.get('attribution_required') and item.get('license_tier') != 'attribution-required':
continue
filename = Path(str(item.get('filename') or '')).name
slide = str(item.get('slide') or '').strip()
referenced = bool(filename and filename in content)
same_slide = bool(slide and slide == svg_stem)
if not referenced and not same_slide:
continue
license_name = str(item.get('license_name') or '').upper()
license_token = 'CC BY-SA' if 'BY-SA' in license_name else 'CC BY'
has_credit = license_token in text_content.upper()
if not has_credit:
result['errors'].append(
f"Missing inline attribution for sourced image {filename or '(unknown)'} "
f"({license_token}). Add compact credit text per "
f"references/image-searcher.md §7."
)
@staticmethod
def _normalize_size(value: str) -> str:
"""Normalize a font-size value for drift comparison.
Unit-bearing SVG values are reported as errors before drift checking.
The legacy `px` strip remains to avoid a duplicate drift warning after
the hard error has already identified the unit problem.
"""
v = value.strip().lower()
if v.endswith('px'):
v = v[:-2].strip()
return v
@staticmethod
def _normalize_font_stack(stack: str) -> str:
"""Normalize a font-family stack for comparison: split on commas, strip
quotes / whitespace, lowercase, rejoin. Collapses cosmetic differences
(comma spacing, single vs double quotes, case) so that
`Consolas,'Courier New',monospace` matches `Consolas, "Courier New", monospace`."""
parts = [p.strip().strip('"\'').lower() for p in stack.split(',')]
return ','.join(p for p in parts if p)
def _categorize_issue(self, error_msg: str) -> str:
"""Categorize issue type"""
if 'Invalid XML' in error_msg:
return 'XML well-formedness'
elif 'viewBox' in error_msg:
return 'viewBox issues'
elif 'foreignObject' in error_msg:
return 'foreignObject'
elif 'font' in error_msg.lower():
return 'Font issues'
else:
return 'Other'
def check_directory(self, directory: str, expected_format: str = None) -> List[Dict]:
"""
Check all SVG files in a directory
Args:
directory: Directory path
expected_format: Expected canvas format
Returns:
List of check results
"""
dir_path = Path(directory)
if not dir_path.exists():
print(f"[ERROR] Directory does not exist: {directory}")
return []
# Brand-only template directories (templates/brands/<id>/) have no SVG
# roster — design_spec.md frontmatter declares `kind: brand`. Skip SVG
# checks entirely; brand validation lives in register_template.py.
if self.template_mode and dir_path.is_dir():
spec = dir_path / 'design_spec.md'
if spec.exists() and _design_spec_is_brand(spec):
print(
f"[INFO] Brand directory detected (kind: brand) — "
f"SVG checks skipped."
)
print(
f"[INFO] Validate brand specs via: "
f"python3 scripts/register_template.py "
f"--kind brand <brand_id> --dry-run"
)
return self.results
# Find all SVG files
if dir_path.is_file():
svg_files = [dir_path]
else:
if self.template_mode:
# Template directories live at templates/{layouts,decks}/<id>/.
svg_files = sorted(dir_path.glob('*.svg'))
else:
svg_output = dir_path / \
'svg_output' if (
dir_path / 'svg_output').exists() else dir_path
svg_files = sorted(svg_output.glob('*.svg'))
if not svg_files:
print(f"[WARN] No SVG files found")
return []
print(f"\n[SCAN] Checking {len(svg_files)} SVG file(s)...\n")
for svg_file in svg_files:
result = self.check_file(str(svg_file), expected_format)
self._print_result(result)
if self.template_mode and dir_path.is_dir():
self._check_template_contract(dir_path, svg_files)
elif dir_path.is_dir():
self._check_animation_config_contract(dir_path)
return self.results
def _check_animation_config_contract(self, dir_path: Path) -> None:
"""Project-level animations.json reference checks."""
if _load_animation_config is None or _validate_animation_config is None:
return
project_path = dir_path if (dir_path / 'svg_output').exists() else dir_path.parent
try:
config = _load_animation_config(project_path)
except Exception as exc:
self._animation_issues.append(('error', f"animations.json is invalid: {exc}"))
return
if not config:
return
for warning in _validate_animation_config(project_path, config):
self._animation_issues.append(('warning', warning))
def _check_template_contract(self, dir_path: Path,
svg_files: List[Path]) -> None:
"""Template-mode-only checks: roster ↔ design_spec consistency and
per-page placeholder hints.
- **Roster mismatch (orphan / missing)** is reported as an *error*: a
stale roster will produce a wrong ``layouts_index.json`` entry.
- **Placeholder gaps** are reported as *warnings*. Templates may
legitimately omit conventional placeholders or swap them out (e.g.
``{{CLOSING_MESSAGE}}`` instead of ``{{THANK_YOU}}``), and a content
variant may use a bespoke slot vocabulary. Designers can declare
their own per-stem expectations via ``placeholders:`` frontmatter
in ``design_spec.md`` to suppress these warnings explicitly.
Issues are aggregated and printed in :py:meth:`print_summary` so the
per-file report stays focused on intrinsic SVG validity.
"""
spec_path = dir_path / 'design_spec.md'
spec_text = spec_path.read_text(encoding='utf-8') if spec_path.exists() else ""
spec_pages = self._extract_spec_roster(spec_text) if spec_text else []
custom_contract = self._extract_frontmatter_placeholders(spec_text) if spec_text else {}
on_disk = {p.stem for p in svg_files}
if spec_pages:
spec_set = set(spec_pages)
orphan = sorted(on_disk - spec_set)
missing = sorted(spec_set - on_disk)
for page in orphan:
self._template_issues.append((
'error',
'roster_orphan',
f"{page}.svg exists on disk but is not listed in design_spec.md Page Roster",
))
for page in missing:
self._template_issues.append((
'error',
'roster_missing',
f"design_spec.md Page Roster lists {page} but {page}.svg is missing on disk",
))
elif spec_path.exists():
# design_spec.md is present but the roster parser found nothing —
# surface as a warning. Legacy specs may lack an explicit roster.
self._template_issues.append((
'warning',
'roster_unknown',
f"could not extract page roster from {spec_path.name}; "
"skipping orphan/missing checks",
))
else:
self._template_issues.append((
'error',
'spec_missing',
f"{spec_path.name} not found — required for every library template",
))
# Per-file placeholder coverage. Variants reuse the parent type's set
# (e.g. 03a_content_two_col.svg ↔ 03_content rules) unless the spec
# frontmatter overrides that page (custom_contract takes precedence).
for svg_file in svg_files:
expected = self._lookup_template_contract(
svg_file.stem, overrides=custom_contract,
)
if expected is None:
continue # extension pages or stems with no convention
try:
content = svg_file.read_text(encoding='utf-8')
except OSError:
continue
for placeholder in expected:
if placeholder not in content:
self._template_issues.append((
'warning',
'placeholder_hint',
f"{svg_file.name}: missing conventional placeholder {placeholder} "
"(declare 'placeholders:' frontmatter in design_spec.md to silence)",
))
@staticmethod
def _extract_frontmatter_placeholders(spec_text: str) -> Dict[str, Tuple[str, ...]]:
"""Read the optional ``placeholders:`` map from design_spec.md frontmatter.
Shape:
.. code-block:: yaml
placeholders:
01_cover: ["{{TITLE}}", "{{BRAND_LOGO}}"]
03_content: [] # explicitly assert "no expectation"
03a_content_two_col: # variant-specific override
- "{{LEFT_TITLE}}"
- "{{RIGHT_TITLE}}"
Each key is a stem (full filename without ``.svg``) or page-type prefix
(``01_cover``). An empty list silences the default convention for that
stem; a populated list replaces the default. Stems / prefixes not
listed fall back to ``DEFAULT_PLACEHOLDER_CONVENTION``.
We parse with PyYAML when available; otherwise we fall back to a
minimal regex that handles the documented shape.
"""
if not spec_text.startswith("---\n"):
return {}
end = spec_text.find("\n---\n", 4)
if end == -1:
return {}
block = spec_text[4:end]
try:
import yaml # type: ignore
except ImportError:
return _parse_placeholders_fallback(block)
try:
data = yaml.safe_load(block) or {}
except yaml.YAMLError:
return {}
if not isinstance(data, dict):
return {}
raw = data.get("placeholders")
if not isinstance(raw, dict):
return {}
out: Dict[str, Tuple[str, ...]] = {}
for stem, value in raw.items():
if not isinstance(stem, str):
continue
if isinstance(value, list):
out[stem] = tuple(str(v) for v in value)
elif value is None:
out[stem] = ()
return out
@staticmethod
def _extract_spec_roster(spec_text: str) -> List[str]:
"""Best-effort: extract the page roster from design_spec.md.
Templates do not share a uniform section index for the roster — the
personality-only skeleton puts it at §V "Page Roster"; legacy specs use
§VI "Page Roster" or bury filenames under §VII "Page Types" as
``### N. Cover Page (01_cover.svg)``. We match by title (any roman
index), then fall back to scanning the whole document for any
backtick-wrapped ``<stem>.svg`` reference.
Returns the deduplicated stem list in document order. Empty result
means we can't determine the roster confidently — caller should treat
that as "skip orphan/missing checks", not as "no pages declared".
"""
# Pass 1: explicit roster section, any roman numeral.
section = re.search(
r"^##\s+[IVX]+\.\s+(?:Page Roster|Page Structure|Pages|Page Types)\b.*?(?=^##\s+|\Z)",
spec_text,
re.MULTILINE | re.DOTALL | re.IGNORECASE,
)
scope = section.group(0) if section else None
# Pass 2: full document. We *only* trust this scan when the explicit
# roster scan came up empty (no `<stem>.svg` references inside it) —
# otherwise the explicit section's deliberate roster wins over loose
# mentions elsewhere.
if scope and re.search(r"[`\(][0-9A-Za-z_]+\.svg[`\)]", scope):
text = scope
else:
text = spec_text
stems: List[str] = []
seen: set = set()
# Accept backtick-quoted (`01_cover.svg`) and parenthesized
# (01_cover.svg) forms — existing specs use either.
svg_ref_re = re.compile(r"[`\(]([0-9A-Za-z_]+\.svg)[`\)]")
for match in svg_ref_re.finditer(text):
stem = match.group(1)[:-4]
if stem in seen or not re.match(r"^\d", stem):
continue
seen.add(stem)
stems.append(stem)
# If the explicit §VI scan listed bare stems (without .svg), accept
# those as fallback — but only when they were inside that section.
if not stems and scope:
for match in re.finditer(r"`([0-9]{2}[a-z]?_[A-Za-z0-9_]+)`", scope):
stem = match.group(1)
if stem in seen:
continue
seen.add(stem)
stems.append(stem)
return stems
@classmethod
def _lookup_template_contract(
cls, stem: str, *,
overrides: Dict[str, Tuple[str, ...]] | None = None,
) -> Tuple[str, ...] | None:
"""Resolve a SVG stem to its expected placeholder set.
Resolution order, first hit wins:
1. ``overrides[stem]`` — frontmatter entry for the exact filename
2. ``overrides[<page_type_prefix>]`` — frontmatter entry for the
variant's parent type (e.g. ``03_content`` for
``03a_content_two_col``)
3. ``DEFAULT_PLACEHOLDER_CONVENTION[<page_type_prefix>]``
Returns ``None`` for stems with no matching convention or override —
e.g. extension pages like ``05_section_break``. ``()`` (empty tuple)
is a valid value meaning "no expected placeholders" — used to
explicitly silence the default convention.
"""
overrides = overrides or {}
if stem in overrides:
return overrides[stem]
# Variant convention: <NN><letter>?_<rest>; strip the letter to find
# the parent type prefix, e.g. "03a_content_two_col" -> "03_content".
match = re.match(r"^(\d{2})([a-z])?_([a-z]+)", stem)
if not match:
return None
num, _letter, kind = match.groups()
key = f"{num}_{kind}"
if key in overrides:
return overrides[key]
return cls.DEFAULT_PLACEHOLDER_CONVENTION.get(key)
def _print_result(self, result: Dict):
"""Print check result for a single file"""
if result['passed']:
if result['warnings']:
icon = "[WARN]"
status = "Passed (with warnings)"
else:
icon = "[OK]"
status = "Passed"
else:
icon = "[ERROR]"
status = "Failed"
print(f"{icon} {result['file']} - {status}")
# Display basic info
if result['info']:
info_items = []
if 'viewbox' in result['info']:
info_items.append(f"viewBox: {result['info']['viewbox']}")
if info_items:
print(f" {' | '.join(info_items)}")
# Display errors
if result['errors']:
for error in result['errors']:
print(f" [ERROR] {error}")
# Display warnings
if result['warnings']:
for warning in result['warnings'][:2]: # Only show first 2 warnings
print(f" [WARN] {warning}")
if len(result['warnings']) > 2:
print(f" ... and {len(result['warnings']) - 2} more warning(s)")
print()
def _check_icon_usage(self, content: str, svg_path: Path, result: Dict) -> None:
"""Warn when a page references no icons despite spec_lock locking an
inventory, and feed the deck-level zero-icons gate.
Section / cover / closing pages legitimately ship without icons, so a
single icon-less page is only a soft per-page warning. The hard failure
is deck-wide (every page icon-less while an inventory is locked) and is
emitted in :py:meth:`_print_icon_summary`.
"""
lock = self._get_spec_lock(svg_path)
if not lock:
return
icons = lock.get('icons') or {}
library = (icons.get('library') or '').strip().lower()
inventory = (icons.get('inventory') or '').strip().lower()
_empty = ('', 'none', '(none)', '-', 'n/a')
declared = library not in _empty and inventory not in _empty
if not declared:
return
self._icon_inventory_declared = True
count = len(re.findall(r'<use\b[^>]*\bdata-icon\s*=', content))
result.setdefault('info', {})['icon_count'] = count
self._deck_icon_total += count
if count == 0:
self._pages_missing_icons.append(svg_path.name)
result['warnings'].append(
f"spec_lock locks an icon library ({icons.get('library')}) + inventory "
f"but this page references no <use data-icon> — content pages should place "
f"1-3 inventory icons (cover / section / closing pages may omit)"
)
def _print_icon_summary(self):
"""Deck-level icon-usage gate.
Declared inventory + zero icons deck-wide is a hard error (the locked
icons are unused and the deck renders flat). Bumps ``summary['errors']``
so the process exits non-zero, mirroring ``_print_animation_summary``.
"""
if not self._icon_inventory_declared:
return
if self._deck_icon_total == 0:
self.summary['errors'] += 1
print("\n[ERROR] Icon usage: spec_lock locks an icon library + inventory, "
"but the deck authors ZERO <use data-icon> across all pages.")
print(" The locked icons are unused — the deck renders flat / icon-less.")
print(" Fix: in the executor, place inventory icons on content pages "
"(KPI / list / process / comparison layouts especially), then re-run.")
elif self._pages_missing_icons:
print(f"\n[INFO] Icon usage: {self._deck_icon_total} icon(s) deck-wide; "
f"{len(self._pages_missing_icons)} page(s) reference none "
f"({', '.join(self._pages_missing_icons)}).")
print(" Cover / section / closing pages may legitimately omit icons; "
"verify dense content pages aren't missing them.")
def _check_graphic_richness(self, content: str, result: Dict) -> None:
"""Tally graphic primitives per page for the deck-level flat-deck gate.
Counts <path>/<polyline>/<polygon>/<image> — the elements that actually
draw a diagram, chart, figure, or photo. <rect> and <line> are excluded
on purpose: they are layout cards, backgrounds, and dividers, and a deck
built entirely from them is exactly the "text on rectangles" look this
catches. Per-page nudges stay soft; the hard gate is deck-wide.
"""
g = (len(re.findall(r'<path\b', content))
+ len(re.findall(r'<polyline\b', content))
+ len(re.findall(r'<polygon\b', content))
+ len(re.findall(r'<image\b', content)))
result.setdefault('info', {})['graphic_count'] = g
self._deck_page_count += 1
self._deck_graphic_total += g
self._deck_text_total += len(re.findall(r'<text\b', content))
if g == 0:
self._pages_no_graphic.append(result.get('file', '?'))
def _print_graphic_summary(self):
"""Deck-level flat-deck gate.
A content-rich deck (>=6 pages, text-heavy) with zero path/polygon/
polyline/image deck-wide is the wall-of-text-boxes pathology — hard
error so it can't ship. Below that bar, surface a soft note when most
pages carry no figure. Short or sparse decks (<6 pages) are exempt to
avoid false-failing minimalist / teaser decks.
"""
pages = self._deck_page_count
if pages < 6:
return
avg_text = self._deck_text_total / pages
if self._deck_graphic_total == 0 and avg_text >= 10:
self.summary['errors'] += 1
print(f"\n[ERROR] Visual richness: {pages} text-heavy pages but ZERO "
"diagram/figure primitives (<path>/<polygon>/<polyline>/<image>) "
"deck-wide — the deck is text on rectangles.")
print(" Map content shape -> a visual: comparison->columns/quadrant, "
"timeline->process, share->donut, trend->line, ≥3 data points->chart "
"(adapt a templates/charts/ template or draw it), add diagrams/imagery, "
"then re-run.")
elif len(self._pages_no_graphic) >= max(6, int(pages * 0.7)):
print(f"\n[INFO] Visual richness: {len(self._pages_no_graphic)}/{pages} pages "
"have no diagram/figure (<path>/<polygon>/<polyline>/<image>) — "
"verify dense content pages aren't just text + boxes.")
def print_summary(self):
"""Print check summary"""
print("=" * 80)
print("[SUMMARY] Check Summary")
print("=" * 80)
print(f"\nTotal files: {self.summary['total']}")
print(
f" [OK] Fully passed: {self.summary['passed']} ({self._percentage(self.summary['passed'])}%)")
print(
f" [WARN] With warnings: {self.summary['warnings']} ({self._percentage(self.summary['warnings'])}%)")
print(
f" [ERROR] With errors: {self.summary['errors']} ({self._percentage(self.summary['errors'])}%)")
if self.issue_types:
print(f"\nIssue categories:")
for issue_type, count in sorted(self.issue_types.items(), key=lambda x: x[1], reverse=True):
print(f" {issue_type}: {count}")
# spec_lock drift aggregation (only printed when a lock was found)
self._print_drift_summary()
# Template-mode aggregation (orphan/missing roster + placeholder hints)
self._print_template_summary()
# Animation config aggregation.
self._print_animation_summary()
# Deck-level icon-usage gate (declared inventory but icon-less deck).
self._print_icon_summary()
# Deck-level flat-deck gate (text-on-rectangles, no diagrams/figures).
self._print_graphic_summary()
# Fix suggestions
if self.summary['errors'] > 0 or self.summary['warnings'] > 0:
print(f"\n[TIP] Common fixes:")
print(f" 1. XML well-formedness: write typography as raw Unicode (—, ©, →, NBSP); escape XML reserved chars as &amp; &lt; &gt; &quot; &apos; — never use HTML named entities like &nbsp; &mdash; &copy;")
print(f" 2. viewBox issues: Ensure consistency with canvas format (see references/canvas-formats.md)")
print(f" 3. foreignObject: Use <text> + <tspan> for manual line breaks")
print(f" 4. Font issues: end every font-family stack with a PPT-safe family (e.g. Microsoft YaHei / Arial / Consolas)")
def _print_animation_summary(self):
"""Print animations.json validation issues if present."""
if not self._animation_issues:
return
errors = [item for item in self._animation_issues if item[0] == 'error']
warnings = [item for item in self._animation_issues if item[0] == 'warning']
self.summary['errors'] += len(errors)
self.summary['warnings'] += len(warnings)
for severity, _msg in self._animation_issues:
self.issue_types[f'animation_config_{severity}'] += 1
print("\n[ANIMATION] animations.json checks")
for _severity, msg in errors:
print(f" [ERROR] {msg}")
for _severity, msg in warnings:
print(f" [WARN] {msg}")
def _print_template_summary(self):
"""Aggregate template-mode roster / placeholder issues at the bottom.
Errors land under the ``errors`` summary count (so the exit signal
from ``main`` agrees), warnings under ``warnings``. Both are listed
per file so the user can act on them directly.
"""
if not self._template_issues:
return
errors = [item for item in self._template_issues if item[0] == 'error']
warnings = [item for item in self._template_issues if item[0] == 'warning']
# Mirror into the global summary so downstream "0 errors" gates honor
# template-mode issues.
self.summary['errors'] += len(errors)
self.summary['warnings'] += len(warnings)
for severity, kind, _msg in self._template_issues:
self.issue_types[f"template_{kind}"] += 1
print("\n[TEMPLATE] Template mode checks")
if errors:
print(f" Errors ({len(errors)}):")
for _sev, kind, msg in errors:
print(f" [{kind}] {msg}")
if warnings:
print(f" Warnings ({len(warnings)}):")
for _sev, kind, msg in warnings:
print(f" [{kind}] {msg}")
if not errors:
print(" No structural roster issues. Placeholder hints above are advisory only;")
print(" declare 'placeholders:' frontmatter in design_spec.md to silence them.")
def _print_drift_summary(self):
"""Print spec_lock drift aggregation if any was observed.
Values are sorted by file-count descending so frequent drift surfaces
first. Frequent drift usually means spec_lock.md is missing entries
the Strategist should have included; rare drift is more likely actual
Executor drift and warrants SVG review.
"""
if not self._lock_seen:
return
has_drift = any(self._drift_summary[cat] for cat in self._drift_summary)
if not has_drift:
print("\n[OK] spec_lock drift: none — all colors, fonts, and sizes are anchored to spec_lock.md")
return
print("\nspec_lock drift — values used outside spec_lock.md:")
labels = [('colors', 'Colors'),
('fonts', 'Font families'),
('sizes', 'Font sizes')]
for category, label in labels:
items = self._drift_summary.get(category, {})
if not items:
continue
entries = sorted(items.items(), key=lambda x: (-len(x[1]), x[0]))
print(f" {label}:")
for val, files in entries:
n = len(files)
suffix = "file" if n == 1 else "files"
print(f" {val} ({n} {suffix})")
print(
"Tip: frequent out-of-lock values usually mean spec_lock.md is missing\n"
" entries — extend the lock (scripts/update_spec.py or manual edit).\n"
" Rare ones are likely Executor drift — review the affected SVGs."
)
def _percentage(self, count: int) -> int:
"""Calculate percentage"""
if self.summary['total'] == 0:
return 0
return int(count / self.summary['total'] * 100)
def export_report(self, output_file: str = 'svg_quality_report.txt'):
"""Export check report"""
with open(output_file, 'w', encoding='utf-8') as f:
f.write("PPT Master SVG Quality Check Report\n")
f.write("=" * 80 + "\n\n")
for result in self.results:
status = "[OK] Passed" if result['passed'] else "[ERROR] Failed"
f.write(f"{status} - {result['file']}\n")
f.write(f"Path: {result.get('path', 'N/A')}\n")
if result['info']:
f.write(f"Info: {result['info']}\n")
if result['errors']:
f.write(f"\nErrors:\n")
for error in result['errors']:
f.write(f" - {error}\n")
if result['warnings']:
f.write(f"\nWarnings:\n")
for warning in result['warnings']:
f.write(f" - {warning}\n")
f.write("\n" + "-" * 80 + "\n\n")
# Write summary
f.write("\n" + "=" * 80 + "\n")
f.write("Check Summary\n")
f.write("=" * 80 + "\n\n")
f.write(f"Total files: {self.summary['total']}\n")
f.write(f"Fully passed: {self.summary['passed']}\n")
f.write(f"With warnings: {self.summary['warnings']}\n")
f.write(f"With errors: {self.summary['errors']}\n")
print(f"\n[REPORT] Check report exported: {output_file}")
def print_usage() -> None:
"""Print CLI usage information."""
print("PPT Master - SVG Quality Check Tool\n")
print("Usage:")
print(" python3 scripts/svg_quality_checker.py <svg_file>")
print(" python3 scripts/svg_quality_checker.py <directory>")
print(" python3 scripts/svg_quality_checker.py <template_dir> --template-mode")
print(" python3 scripts/svg_quality_checker.py --all examples")
print("\nExamples:")
print(" python3 scripts/svg_quality_checker.py examples/project/svg_output/slide_01.svg")
print(" python3 scripts/svg_quality_checker.py examples/project/svg_output")
print(" python3 scripts/svg_quality_checker.py examples/project")
print(" python3 scripts/svg_quality_checker.py templates/layouts/academic_defense --template-mode")
print(" python3 scripts/svg_quality_checker.py templates/decks/招商银行 --template-mode")
print("\nOptions:")
print(" --format <ppt169|ppt43|...> Expected canvas format")
print(" --template-mode Validate a templates/{layouts,decks}/<id> directory:")
print(" glob *.svg directly, skip spec_lock checks,")
print(" enforce roster ↔ design_spec.md Page Roster consistency,")
print(" and emit advisory placeholder-convention warnings.")
def main() -> None:
"""Run the CLI entry point."""
if len(sys.argv) < 2:
print_usage()
sys.exit(0)
if sys.argv[1] in {"-h", "--help", "help"}:
print_usage()
sys.exit(0)
if sys.argv[1].startswith("--") and sys.argv[1] not in {"--all"}:
print(f"[ERROR] Missing target before option: {sys.argv[1]}")
print_usage()
sys.exit(1)
template_mode = '--template-mode' in sys.argv
checker = SVGQualityChecker(template_mode=template_mode)
# Parse arguments
target = sys.argv[1]
expected_format = None
if '--format' in sys.argv:
idx = sys.argv.index('--format')
if idx + 1 < len(sys.argv):
expected_format = sys.argv[idx + 1]
# Execute check
if target == '--all':
# Check all example projects
base_dir = sys.argv[2] if len(sys.argv) > 2 else 'examples'
from project_utils import find_all_projects
projects = find_all_projects(base_dir)
for project in projects:
print(f"\n{'=' * 80}")
print(f"Checking project: {project.name}")
print('=' * 80)
checker.check_directory(str(project))
else:
checker.check_directory(target, expected_format)
# Print summary
checker.print_summary()
# Export report (if specified)
if '--export' in sys.argv:
output_file = 'svg_quality_report.txt'
if '--output' in sys.argv:
idx = sys.argv.index('--output')
if idx + 1 < len(sys.argv):
output_file = sys.argv[idx + 1]
checker.export_report(output_file)
# Return exit code
if checker.summary['errors'] > 0:
sys.exit(1)
else:
sys.exit(0)
if __name__ == '__main__':
main()