#!/usr/bin/env python3 """ PPT Master - SVG Quality Check Tool Checks whether SVG files comply with project technical specifications. Usage: python3 scripts/svg_quality_checker.py python3 scripts/svg_quality_checker.py python3 scripts/svg_quality_checker.py --all examples """ import sys try: # zcbot: Windows GBK 控制台兼容，避免 emoji/© 等触发 UnicodeEncodeError sys.stdout.reconfigure(encoding="utf-8", errors="replace") sys.stderr.reconfigure(encoding="utf-8", errors="replace") except Exception: pass import re import json import html from pathlib import Path from typing import List, Dict, Tuple from collections import Counter, defaultdict from xml.etree import ElementTree as ET try: from project_utils import CANVAS_FORMATS from error_helper import ErrorHelper except ImportError: print("Warning: Unable to import dependency modules") CANVAS_FORMATS = {} ErrorHelper = None try: from update_spec import parse_lock as _parse_spec_lock except ImportError: _parse_spec_lock = None # spec_lock drift check will be skipped try: from svg_to_pptx.animation_config import ( load_animation_config as _load_animation_config, validate_animation_config as _validate_animation_config, ) except ImportError: _load_animation_config = None _validate_animation_config = None HEX_VALUE_RE = re.compile(r"#[0-9A-Fa-f]{3,8}") SVG_NS = "http://www.w3.org/2000/svg" # Ramp envelope for font-size drift detection. # From design_spec_reference.md §IV — Font Size Hierarchy: the ramp spans # from page-number floor (0.5x body) to cover-title ceiling (5.0x body). # Intermediate px values within this envelope are permitted per # executor-base.md §2.1 ("Executor may use an intermediate size ... provided # the size's ratio to body falls within the corresponding role's band"); only # values outside every band — i.e. outside this envelope — are drift. RAMP_MIN_RATIO = 0.5 RAMP_MAX_RATIO = 5.0 # Modes / visual styles that legitimately use unbounded hero / poster type # (huge cover numerals, act dividers, single-number reveals). For these the # size-drift upper bound is dropped — the oversize is the design, not Executor # drift. The lower bound still applies. POSTER_SIZE_MODES = {'showcase'} POSTER_SIZE_STYLES = {'zine'} def _design_spec_is_brand(spec_path: Path) -> bool: """Return True when a design_spec.md frontmatter declares ``kind: brand``. Lightweight detector that does not require PyYAML — scans only the frontmatter block (``---`` delimited) for a ``kind:`` line whose value contains ``brand``. Used by ``check_directory`` to skip SVG validation on brand-only template directories. """ try: text = spec_path.read_text(encoding='utf-8') except OSError: return False if not text.startswith('---\n'): return False end = text.find('\n---\n', 4) if end == -1: return False fm_block = text[4:end] for line in fm_block.splitlines(): stripped = line.strip() if stripped.startswith('kind:'): value = stripped.split(':', 1)[1].strip().strip('"\'') return value == 'brand' return False def _parse_placeholders_fallback(block: str) -> Dict[str, Tuple[str, ...]]: """Tiny YAML-free reader for the documented ``placeholders:`` shape. Used only when PyYAML is unavailable. Recognized lines (indentation-aware, two-space indent assumed): .. code-block:: yaml placeholders: 01_cover: ["{{TITLE}}", "{{LOGO}}"] 03_content: [] 03a_content_two_col: - "{{LEFT_TITLE}}" - "{{RIGHT_TITLE}}" Anything outside this minimal grammar is silently skipped — designers who rely on advanced YAML should install pyyaml. """ out: Dict[str, Tuple[str, ...]] = {} inline_re = re.compile( r"^\s{2}([A-Za-z0-9_]+)\s*:\s*\[(.*)\]\s*$" ) empty_re = re.compile(r"^\s{2}([A-Za-z0-9_]+)\s*:\s*\[\s*\]\s*$") block_header_re = re.compile(r"^\s{2}([A-Za-z0-9_]+)\s*:\s*$") item_re = re.compile(r'^\s{4}-\s*"?([^"]+)"?\s*$') in_section = False current_block_key: str | None = None current_items: List[str] = [] def _flush_block() -> None: nonlocal current_block_key, current_items if current_block_key is not None: out[current_block_key] = tuple(current_items) current_block_key = None current_items = [] for line in block.splitlines(): if line.startswith("placeholders:"): in_section = True continue if not in_section: continue # End of section: dedent to a non-key line. if line and not line.startswith(" "): _flush_block() in_section = False continue if current_block_key is not None: m = item_re.match(line) if m: value = m.group(1).strip().strip('"').strip("'") if value: current_items.append(value) continue # Block ended. _flush_block() if empty_re.match(line): key = empty_re.match(line).group(1) out[key] = () continue m = inline_re.match(line) if m: key, raw = m.group(1), m.group(2) items = [p.strip().strip('"').strip("'") for p in raw.split(",")] out[key] = tuple(item for item in items if item) continue m = block_header_re.match(line) if m: current_block_key = m.group(1) current_items = [] continue _flush_block() return out class SVGQualityChecker: """SVG quality checker""" # Default placeholder convention per page-type prefix. This is a *hint*, # not a hard contract: templates may define their own placeholder vocabulary # via `placeholders:` in design_spec.md frontmatter (see # references/template-designer.md §4). Missing default placeholders surface # as warnings, never errors — designers may legitimately swap # `{{THANK_YOU}}` for `{{CLOSING_MESSAGE}}`, omit `{{DATE}}` when irrelevant, # or build content variants with bespoke slot vocabularies. # # Variants reuse the parent type's expectation (`03a_content_two_col.svg` # is matched by the same `03_content` rules as `03_content.svg`). DEFAULT_PLACEHOLDER_CONVENTION = { "01_cover": ("{{TITLE}}",), # only the title is universally expected "02_chapter": ("{{CHAPTER_TITLE}}",), "02_toc": (), # TOC layouts vary too widely to assert anything "03_content": ("{{PAGE_TITLE}}",), "04_ending": (), # ending pages legitimately use varied vocabularies } def __init__(self, *, template_mode: bool = False): self.template_mode = template_mode self.results = [] self.summary = { 'total': 0, 'passed': 0, 'warnings': 0, 'errors': 0 } self.issue_types = defaultdict(int) # spec_lock drift state (populated only when _parse_spec_lock is available # and a spec_lock.md is found near the SVG) self._lock_cache: Dict[Path, Dict] = {} self._drift_summary: Dict[str, Dict[str, set]] = { 'colors': defaultdict(set), 'fonts': defaultdict(set), 'sizes': defaultdict(set), } self._lock_seen = False # True once we locate at least one spec_lock.md self._source_manifest_cache: Dict[Path, Dict] = {} # Template-mode aggregation (populated by check_directory when # template_mode=True). Each entry is (severity, kind, message) where # severity is 'error' or 'warning'. Printed in print_summary. self._template_issues: List[Tuple[str, str, str]] = [] self._animation_issues: List[Tuple[str, str]] = [] # Icon-usage aggregation (non-template mode). When spec_lock declares an # icon library + inventory, the strategist intends the deck to use icons. # The native exporter and finalize both expand from the # library, so an authored placeholder reliably becomes a real icon — but # only if the executor writes one. A deck that locks an inventory yet # authors ZERO placeholders ships flat/icon-less; this is the missing # feedback loop that catches the executor silently skipping icons. self._icon_inventory_declared = False # any page's spec_lock locked icons self._deck_icon_total = 0 # total across the deck self._pages_missing_icons: List[str] = [] # declared-but-icon-less pages # Visual-richness aggregation (non-template mode). The most common AI-deck # regression is "wall of text boxes": every page is on with # zero diagrams, charts, figures, or imagery. /// # are the unambiguous "this page draws something" primitives — # rect/line are layout/divider scaffolding and don't count. A content-rich # deck (>=6 pages, text-heavy) with zero such primitives deck-wide is the # flat-deck pathology; catch it so it can't ship silently. self._deck_page_count = 0 # non-template SVG pages checked self._deck_graphic_total = 0 # path+polyline+polygon+image across deck self._deck_text_total = 0 # across deck (density signal) self._pages_no_graphic: List[str] = [] # pages with zero graphic primitives def check_file(self, svg_file: str, expected_format: str = None) -> Dict: """ Check a single SVG file Args: svg_file: SVG file path expected_format: Expected canvas format (e.g., 'ppt169') Returns: Check result dictionary """ svg_path = Path(svg_file) if not svg_path.exists(): return { 'file': str(svg_file), 'exists': False, 'errors': ['File does not exist'], 'warnings': [], 'passed': False } result = { 'file': svg_path.name, 'path': str(svg_path), 'exists': True, 'errors': [], 'warnings': [], 'info': {}, 'passed': True } try: with open(svg_path, 'r', encoding='utf-8') as f: content = f.read() # 0. Check XML well-formedness — every other check assumes the file # is valid XML. Bail early on failure so the regex-based checks # below don't produce misleading errors on a broken document. if self._check_xml_well_formed(content, result): # 1. Check viewBox self._check_viewbox(content, result, expected_format) # 2. Check forbidden elements self._check_forbidden_elements(content, result) # 3. Check font-size values self._check_font_size_values(content, result) # 4. Check fonts self._check_fonts(content, result) # 5. Check width/height consistency with viewBox self._check_dimensions(content, result) # 6. Check text wrapping methods self._check_text_elements(content, result) # 7. Check image references (file existence and resolution) self._check_image_references(content, svg_path, result) # 8. Check object-level animation anchor quality. self._check_animation_group_ids(content, result) # 8b. Check elements declare a PPTX preset. self._check_pattern_fills(content, result) # 9. Check spec_lock drift (colors / font-family / font-size). # Templates do not ship a spec_lock.md, so skip in template # mode to avoid noise. if not self.template_mode: self._check_spec_lock_drift(content, svg_path, result) # 10. Check web-sourced image attribution. Templates don't carry # image_sources.json; skip in template mode. if not self.template_mode: self._check_sourced_image_attribution(content, svg_path, result) # 11. Check declared-vs-used icons. Templates don't ship a # spec_lock.md; skip in template mode. if not self.template_mode: self._check_icon_usage(content, svg_path, result) # 12. Check visual richness (flat text-on-rectangles deck). if not self.template_mode: self._check_graphic_richness(content, result) # 13. Geometry lint: estimated text/icon bounding boxes → # text-on-text / icon-on-text overlap + off-canvas elements. # Templates carry {{PLACEHOLDER}} text whose rendered width # is unrepresentative, so skip in template mode. if not self.template_mode: self._check_geometry(content, result) # Determine pass/fail result['passed'] = len(result['errors']) == 0 except Exception as e: result['errors'].append(f"Failed to read file: {e}") result['passed'] = False # Update statistics self.summary['total'] += 1 if result['passed']: if result['warnings']: self.summary['warnings'] += 1 else: self.summary['passed'] += 1 else: self.summary['errors'] += 1 # Categorize issue types for error in result['errors']: self.issue_types[self._categorize_issue(error)] += 1 self.results.append(result) return result def _check_xml_well_formed(self, content: str, result: Dict) -> bool: """Check that the SVG content parses as well-formed XML. SVG is strict XML. AI-generated decks frequently produce content that looks fine in HTML5-tolerant previews but fails strict XML parsing — common causes are HTML named entities ( — ©…) and bare XML reserved characters in text (R&D, error < 5%). Such pages cannot be exported to PPTX, so we surface them here as a hard error before any downstream check looks at them. Returns True when the document is well-formed; False otherwise. """ try: ET.fromstring(content) return True except ET.ParseError as e: result['errors'].append( f"Invalid XML: {e} — SVG must be well-formed XML. " f"Use raw Unicode for typography (—, ©, →, NBSP); " f"escape XML reserved chars as & < > " ' " f"(see references/shared-standards.md §1)." ) return False def _check_viewbox(self, content: str, result: Dict, expected_format: str = None): """Check viewBox attribute""" viewbox_match = re.search(r'viewBox="([^"]+)"', content) if not viewbox_match: result['errors'].append("Missing viewBox attribute") return viewbox = viewbox_match.group(1) result['info']['viewbox'] = viewbox # Check format if not re.match(r'0 0 \d+ \d+', viewbox): result['warnings'].append(f"Unusual viewBox format: {viewbox}") # Check if it matches expected format if expected_format and expected_format in CANVAS_FORMATS: expected_viewbox = CANVAS_FORMATS[expected_format]['viewbox'] if viewbox != expected_viewbox: result['errors'].append( f"viewBox mismatch: expected '{expected_viewbox}', got '{viewbox}'" ) def _check_forbidden_elements(self, content: str, result: Dict): """Check forbidden elements (blocklist)""" content_lower = content.lower() # ============================================================ # Forbidden elements blocklist - PPT incompatible # ============================================================ # Clipping / masking # clipPath is allowed on elements and on pptx_to_svg-generated # nested crop ', content) if text_matches: result['warnings'].append( f"Detected {len(text_matches)} potentially overly long single-line text(s) (consider using tspan for wrapping)" ) def _check_image_references(self, content: str, svg_path: Path, result: Dict): """Check image file existence and resolution vs display size.""" # Find all elements (capture the full tag) img_tag_pattern = re.compile(r']*)/?>', re.IGNORECASE) svg_dir = svg_path.parent checked = set() for tag_match in img_tag_pattern.finditer(content): attrs = tag_match.group(1) # Extract href (prefer href over xlink:href) href_match = ( re.search(r'\bhref="(?!data:)([^"]+)"', attrs) or re.search(r'\bxlink:href="(?!data:)([^"]+)"', attrs) ) if not href_match: continue href = href_match.group(1) if href in checked: continue checked.add(href) # Resolve path relative to SVG file directory img_path = (svg_dir / href).resolve() if not img_path.exists(): result['errors'].append( f"Image file not found: {href} (resolved to {img_path})") continue # Check resolution vs display size w_match = re.search(r'\bwidth="([^"]+)"', attrs) h_match = re.search(r'\bheight="([^"]+)"', attrs) display_w_str = w_match.group(1) if w_match else None display_h_str = h_match.group(1) if h_match else None if not display_w_str or not display_h_str: continue try: display_w = float(display_w_str) display_h = float(display_h_str) except (ValueError, TypeError): continue try: from PIL import Image as PILImage with PILImage.open(img_path) as img: actual_w, actual_h = img.size if actual_w < display_w or actual_h < display_h: result['warnings'].append( f"Image {href} is {actual_w}x{actual_h} but displayed at " f"{int(display_w)}x{int(display_h)} — may appear blurry") elif actual_w > display_w * 4 and actual_h > display_h * 4: result['warnings'].append( f"Image {href} is {actual_w}x{actual_h} but displayed at " f"{int(display_w)}x{int(display_h)} — consider downsizing " f"to reduce file size") except ImportError: pass # PIL not available, skip resolution check except Exception: pass # Image unreadable, skip resolution check def _check_animation_group_ids(self, content: str, result: Dict): """Warn when visible top-level groups cannot be customized.""" try: root = ET.fromstring(content) except ET.ParseError: return non_visual = {'defs', 'title', 'desc', 'metadata', 'style'} for index, child in enumerate(list(root), start=1): tag = child.tag.split('}', 1)[-1] if tag in non_visual: continue if tag == 'g' and not child.get('id'): result['warnings'].append( f"Top-level visible #{index} has no id; " "object-level animation config cannot reference it" ) # OOXML ST_PresetPatternVal enum — anything outside this set produces a # PPTX schema violation ("PowerPoint found a problem with the content"). _OOXML_PATTERN_PRESETS = frozenset({ 'pct5', 'pct10', 'pct20', 'pct25', 'pct30', 'pct40', 'pct50', 'pct60', 'pct70', 'pct75', 'pct80', 'pct90', 'horz', 'vert', 'ltHorz', 'ltVert', 'dkHorz', 'dkVert', 'narHorz', 'narVert', 'dashHorz', 'dashVert', 'cross', 'dnDiag', 'upDiag', 'ltDnDiag', 'ltUpDiag', 'dkDnDiag', 'dkUpDiag', 'wdDnDiag', 'wdUpDiag', 'dashDnDiag', 'dashUpDiag', 'diagCross', 'smCheck', 'lgCheck', 'smGrid', 'lgGrid', 'dotGrid', 'smConfetti', 'lgConfetti', 'horzBrick', 'diagBrick', 'solidDmnd', 'openDmnd', 'dotDmnd', 'plaid', 'sphere', 'weave', 'wave', 'trellis', 'zigZag', 'divot', 'shingle', }) def _check_pattern_fills(self, content: str, result: Dict): """Audit defs that drive PPTX output. svg_to_pptx maps to native . The preset name comes from `data-pptx-pattern` (e.g. `lgGrid` / `smGrid` / `dkUpDiag`). Two failure modes worth catching pre-export: 1. Missing annotation → converter silently falls back to `ltUpDiag` (diagonal stripes) and picks `bg = #FFFFFF` when the pattern has no child , turning a hand-authored grid into white-on-stripes in PPTX. 2. Invalid preset name → PPTX schema rejects the file; PowerPoint opens it with "needs to be repaired". OOXML `ST_PresetPatternVal` is a closed enum — only the names in `_OOXML_PATTERN_PRESETS` are legal. Inventing `ltGrid` (no such value) is the canonical mistake; the only grids are `smGrid` / `lgGrid` / `dotGrid`. """ try: root = ET.fromstring(content) except ET.ParseError: return for pattern in root.iter(f'{{{SVG_NS}}}pattern'): pat_id = pattern.get('id', '') prst = pattern.get('data-pptx-pattern') if not prst: result['warnings'].append( f" has no data-pptx-pattern attribute — " "PPTX export will fall back to `ltUpDiag` (diagonal stripes), " "not your custom geometry. Add data-pptx-pattern=\"lgGrid\" / " "\"smGrid\" / etc. plus a \"/> child so the " "preset and bg color match your design." ) continue if prst not in self._OOXML_PATTERN_PRESETS: result['errors'].append( f" uses data-pptx-pattern=\"{prst}\" " "which is not in OOXML ST_PresetPatternVal — exported PPTX " "will fail schema validation ('needs to be repaired'). " "Use one of: smGrid / lgGrid / dotGrid (grids), " "ltUpDiag / dkUpDiag / cross / diagCross / weave / plaid / " "horzBrick (others); full enum in svg_quality_checker.py " "_OOXML_PATTERN_PRESETS." ) def _get_spec_lock(self, svg_path: Path): """Locate and parse spec_lock.md near the SVG. Returns dict or None. Looks in svg_path.parent and svg_path.parent.parent (covers the two common layouts: SVG directly under / or under /svg_output/). Results are cached per lock path. """ if _parse_spec_lock is None: return None for candidate in (svg_path.parent / 'spec_lock.md', svg_path.parent.parent / 'spec_lock.md'): if candidate in self._lock_cache: return self._lock_cache[candidate] if candidate.exists(): try: data = _parse_spec_lock(candidate) except Exception: data = None self._lock_cache[candidate] = data if data is not None: self._lock_seen = True return data return None def _check_spec_lock_drift(self, content: str, svg_path: Path, result: Dict): """Detect values used in the SVG that fall outside spec_lock.md. Covers colors (fill / stroke / stop-color), font-family, and font-size. Emits per-file warnings summarising the drift counts; exact drifting values are accumulated in self._drift_summary for the end-of-run aggregation. When spec_lock.md is missing, silently skip (consistent with executor-base.md §2.1's 'missing lock → warn and proceed' policy). """ lock = self._get_spec_lock(svg_path) if lock is None: return # Build allow-sets from the lock allowed_colors = set() for v in lock.get('colors', {}).values(): if HEX_VALUE_RE.fullmatch(v): allowed_colors.add(v.upper()) typo = lock.get('typography', {}) numeric_size_re = re.compile(r'^(?:\d+(?:\.\d+)?|\.\d+)$') invalid_lock_sizes = [] for k, v in typo.items(): if k == 'font_family' or k.endswith('_family'): continue if not numeric_size_re.fullmatch(v.strip()): invalid_lock_sizes.append(f"{k}: {v}") if invalid_lock_sizes: shown = ', '.join(invalid_lock_sizes[:5]) more = len(invalid_lock_sizes) - 5 suffix = f" (+{more} more)" if more > 0 else "" result['errors'].append( f"spec_lock typography sizes must be unitless numeric px values; " f"found {shown}{suffix}." ) # Font families: default `font_family` plus any per-role `*_family` # override (title_family / body_family / emphasis_family / code_family, # per spec_lock_reference.md). Any of these is a legitimate declared # value; an SVG that uses any one of them is not drifting. allowed_fonts = set() if typo: default_font = typo.get('font_family', '').strip() if default_font: allowed_fonts.add(self._normalize_font_stack(default_font)) for k, v in typo.items(): if k == 'font_family' or not k.endswith('_family'): continue v_clean = v.strip() # Skip placeholder text like "same as body (omit if identical)" if not v_clean or v_clean.lower().startswith('same as'): continue allowed_fonts.add(self._normalize_font_stack(v_clean)) # Sizes: declared slots are anchors; body is the ramp baseline. allowed_sizes = set() body_px = None for k, v in typo.items(): if k == 'font_family' or k.endswith('_family'): continue allowed_sizes.add(self._normalize_size(v)) if k == 'body': try: body_px = float(self._normalize_size(v)) except (ValueError, TypeError): body_px = None # Scan SVG for used values color_drifts = set() for attr in ('fill', 'stroke', 'stop-color'): pattern = re.compile(rf'\b{attr}\s*=\s*["\'](#[0-9A-Fa-f]{{3,8}})["\']') for m in pattern.finditer(content): val = m.group(1).upper() if val not in allowed_colors: color_drifts.add(val) font_drifts = set() # Capture to the matching delimiter (group 1) so a double-quoted stack # containing single-quoted family names is not truncated at the inner quote. for m in re.finditer(r'font-family\s*=\s*(["\'])(.*?)\1', content): val = m.group(2).strip() if allowed_fonts and self._normalize_font_stack(val) not in allowed_fonts: font_drifts.add(val) # Poster / showcase contexts use unbounded hero type — drop the ceiling. mode = (lock.get('mode', {}).get('mode') or '').strip().lower() vstyle = (lock.get('visual_style', {}).get('visual_style') or '').strip().lower() max_ratio = (float('inf') if mode in POSTER_SIZE_MODES or vstyle in POSTER_SIZE_STYLES else RAMP_MAX_RATIO) size_drifts = set() used_sizes = [] for m in re.finditer(r'font-size\s*=\s*["\']([^"\']+)["\']', content): val = self._normalize_size(m.group(1)) used_sizes.append(val) if not allowed_sizes or val in allowed_sizes: continue # Intermediate values are allowed when they sit inside the ramp # envelope (ratio to body within [RAMP_MIN_RATIO, max_ratio]). if body_px and body_px > 0: try: ratio = float(val) / body_px if RAMP_MIN_RATIO <= ratio <= max_ratio: continue except ValueError: pass size_drifts.add(val) template_size_drift = self._detect_template_size_drift( used_sizes, allowed_sizes, body_px ) # Record in run-wide aggregation fname = svg_path.name for v in color_drifts: self._drift_summary['colors'][v].add(fname) for v in font_drifts: self._drift_summary['fonts'][v].add(fname) for v in size_drifts: self._drift_summary['sizes'][v].add(fname) # Per-file warning (one condensed line; details live in summary) parts = [] if color_drifts: parts.append(f"{len(color_drifts)} color(s)") if font_drifts: parts.append(f"{len(font_drifts)} font-family value(s)") if size_drifts: parts.append(f"{len(size_drifts)} font-size value(s)") if parts: result['warnings'].append( f"spec_lock drift: {', '.join(parts)} not in spec_lock.md " "(see drift summary for details)" ) if template_size_drift: result['warnings'].append(template_size_drift) def _detect_template_size_drift(self, used_sizes, allowed_sizes, body_px): """Warn when template-like small sizes bypass the locked type ramp. The normal drift check deliberately permits in-ramp feature sizes, so it should not hard-fail valid hero numbers or one-off labels. This warning targets the common executor failure mode: copying a template's compact 12/15/16px text stack instead of mapping content roles to spec_lock typography, then reflowing from those locked px values. """ if not allowed_sizes or not body_px or body_px <= 0: return None try: declared_min = min(float(v) for v in allowed_sizes) except ValueError: declared_min = None # Stay narrow on purpose: real decks carry legitimate undeclared # sub-body sizes (intermediate levels, labels, emphasis) just below the # locked body, so "any size < body" floods the warning and destroys its # credibility. Only flag values that read as genuine template leftovers # — at or below `body * 0.75`, or below the smallest declared slot. This # under-warns (a stray 15/16 against a body of 18 can slip through) in # exchange for not crying wolf on valid intermediate type. template_like_limit = body_px * 0.75 template_like_sub_body = [] for raw in used_sizes: if raw in allowed_sizes: continue try: size = float(raw) except (TypeError, ValueError): continue below_declared_floor = declared_min is not None and size < declared_min if size <= template_like_limit or below_declared_floor: template_like_sub_body.append(raw) if not template_like_sub_body: return None counts = Counter(template_like_sub_body) distinct = sorted(counts, key=lambda v: float(v)) repeated_total = sum(counts.values()) below_declared_floor = [] if declared_min is not None: below_declared_floor = [v for v in distinct if float(v) < declared_min] if len(distinct) < 2 and repeated_total < 4 and not below_declared_floor: return None sample = ', '.join( f"{v}x{counts[v]}" if counts[v] > 1 else v for v in distinct[:5] ) more = len(distinct) - 5 suffix = f" (+{more} more)" if more > 0 else "" return ( "possible template font-size drift: undeclared sub-body size(s) " f"{sample}{suffix}. Map each text item to a spec_lock typography " "role first, then reflow card height / y / dy / line-height from " "the locked px values." ) def _find_image_sources_manifest(self, svg_path: Path) -> Path | None: """Locate image_sources.json for a project SVG. Quality checks run primarily on /svg_output/*.svg, but this also supports SVGs checked from project root or svg_final. """ bases = (svg_path.parent, svg_path.parent.parent, svg_path.parent.parent.parent) for base in bases: candidate = base / 'images' / 'image_sources.json' if candidate.exists(): return candidate return None def _load_image_sources_manifest(self, svg_path: Path) -> Dict: manifest_path = self._find_image_sources_manifest(svg_path) if manifest_path is None: return {} if manifest_path in self._source_manifest_cache: return self._source_manifest_cache[manifest_path] try: payload = json.loads(manifest_path.read_text(encoding='utf-8')) except (OSError, json.JSONDecodeError): payload = {} self._source_manifest_cache[manifest_path] = payload return payload def _check_sourced_image_attribution(self, content: str, svg_path: Path, result: Dict): """Require visible credit text for attribution-required web images. image_search.py records the legal tier in images/image_sources.json; Executor must render compact credit text into the SVG. This check prevents a quality-first CC BY / CC BY-SA image from silently reaching export without attribution. """ manifest = self._load_image_sources_manifest(svg_path) items = manifest.get('items') or [] if not items: return text_content = html.unescape(re.sub(r'<[^>]+>', ' ', content)) text_content = re.sub(r'\s+', ' ', text_content) svg_stem = svg_path.stem for item in items: if not item.get('attribution_required') and item.get('license_tier') != 'attribution-required': continue filename = Path(str(item.get('filename') or '')).name slide = str(item.get('slide') or '').strip() referenced = bool(filename and filename in content) same_slide = bool(slide and slide == svg_stem) if not referenced and not same_slide: continue license_name = str(item.get('license_name') or '').upper() license_token = 'CC BY-SA' if 'BY-SA' in license_name else 'CC BY' has_credit = license_token in text_content.upper() if not has_credit: result['errors'].append( f"Missing inline attribution for sourced image {filename or '(unknown)'} " f"({license_token}). Add compact credit text per " f"references/image-searcher.md §7." ) @staticmethod def _normalize_size(value: str) -> str: """Normalize a font-size value for drift comparison. Unit-bearing SVG values are reported as errors before drift checking. The legacy `px` strip remains to avoid a duplicate drift warning after the hard error has already identified the unit problem. """ v = value.strip().lower() if v.endswith('px'): v = v[:-2].strip() return v @staticmethod def _normalize_font_stack(stack: str) -> str: """Normalize a font-family stack for comparison: split on commas, strip quotes / whitespace, lowercase, rejoin. Collapses cosmetic differences (comma spacing, single vs double quotes, case) so that `Consolas,'Courier New',monospace` matches `Consolas, "Courier New", monospace`.""" parts = [p.strip().strip('"\'').lower() for p in stack.split(',')] return ','.join(p for p in parts if p) def _categorize_issue(self, error_msg: str) -> str: """Categorize issue type""" if 'Invalid XML' in error_msg: return 'XML well-formedness' elif 'viewBox' in error_msg: return 'viewBox issues' elif 'foreignObject' in error_msg: return 'foreignObject' elif 'font' in error_msg.lower(): return 'Font issues' else: return 'Other' def check_directory(self, directory: str, expected_format: str = None) -> List[Dict]: """ Check all SVG files in a directory Args: directory: Directory path expected_format: Expected canvas format Returns: List of check results """ dir_path = Path(directory) if not dir_path.exists(): print(f"[ERROR] Directory does not exist: {directory}") return [] # Brand-only template directories (templates/brands//) have no SVG # roster — design_spec.md frontmatter declares `kind: brand`. Skip SVG # checks entirely; brand validation lives in register_template.py. if self.template_mode and dir_path.is_dir(): spec = dir_path / 'design_spec.md' if spec.exists() and _design_spec_is_brand(spec): print( f"[INFO] Brand directory detected (kind: brand) — " f"SVG checks skipped." ) print( f"[INFO] Validate brand specs via: " f"python3 scripts/register_template.py " f"--kind brand --dry-run" ) return self.results # Find all SVG files if dir_path.is_file(): svg_files = [dir_path] else: if self.template_mode: # Template directories live at templates/{layouts,decks}//. svg_files = sorted(dir_path.glob('*.svg')) else: svg_output = dir_path / \ 'svg_output' if ( dir_path / 'svg_output').exists() else dir_path svg_files = sorted(svg_output.glob('*.svg')) if not svg_files: print(f"[WARN] No SVG files found") return [] print(f"\n[SCAN] Checking {len(svg_files)} SVG file(s)...\n") for svg_file in svg_files: result = self.check_file(str(svg_file), expected_format) self._print_result(result) if self.template_mode and dir_path.is_dir(): self._check_template_contract(dir_path, svg_files) elif dir_path.is_dir(): self._check_animation_config_contract(dir_path) return self.results def _check_animation_config_contract(self, dir_path: Path) -> None: """Project-level animations.json reference checks.""" if _load_animation_config is None or _validate_animation_config is None: return project_path = dir_path if (dir_path / 'svg_output').exists() else dir_path.parent try: config = _load_animation_config(project_path) except Exception as exc: self._animation_issues.append(('error', f"animations.json is invalid: {exc}")) return if not config: return for warning in _validate_animation_config(project_path, config): self._animation_issues.append(('warning', warning)) def _check_template_contract(self, dir_path: Path, svg_files: List[Path]) -> None: """Template-mode-only checks: roster ↔ design_spec consistency and per-page placeholder hints. - **Roster mismatch (orphan / missing)** is reported as an *error*: a stale roster will produce a wrong ``layouts_index.json`` entry. - **Placeholder gaps** are reported as *warnings*. Templates may legitimately omit conventional placeholders or swap them out (e.g. ``{{CLOSING_MESSAGE}}`` instead of ``{{THANK_YOU}}``), and a content variant may use a bespoke slot vocabulary. Designers can declare their own per-stem expectations via ``placeholders:`` frontmatter in ``design_spec.md`` to suppress these warnings explicitly. Issues are aggregated and printed in :py:meth:`print_summary` so the per-file report stays focused on intrinsic SVG validity. """ spec_path = dir_path / 'design_spec.md' spec_text = spec_path.read_text(encoding='utf-8') if spec_path.exists() else "" spec_pages = self._extract_spec_roster(spec_text) if spec_text else [] custom_contract = self._extract_frontmatter_placeholders(spec_text) if spec_text else {} on_disk = {p.stem for p in svg_files} if spec_pages: spec_set = set(spec_pages) orphan = sorted(on_disk - spec_set) missing = sorted(spec_set - on_disk) for page in orphan: self._template_issues.append(( 'error', 'roster_orphan', f"{page}.svg exists on disk but is not listed in design_spec.md Page Roster", )) for page in missing: self._template_issues.append(( 'error', 'roster_missing', f"design_spec.md Page Roster lists {page} but {page}.svg is missing on disk", )) elif spec_path.exists(): # design_spec.md is present but the roster parser found nothing — # surface as a warning. Legacy specs may lack an explicit roster. self._template_issues.append(( 'warning', 'roster_unknown', f"could not extract page roster from {spec_path.name}; " "skipping orphan/missing checks", )) else: self._template_issues.append(( 'error', 'spec_missing', f"{spec_path.name} not found — required for every library template", )) # Per-file placeholder coverage. Variants reuse the parent type's set # (e.g. 03a_content_two_col.svg ↔ 03_content rules) unless the spec # frontmatter overrides that page (custom_contract takes precedence). for svg_file in svg_files: expected = self._lookup_template_contract( svg_file.stem, overrides=custom_contract, ) if expected is None: continue # extension pages or stems with no convention try: content = svg_file.read_text(encoding='utf-8') except OSError: continue for placeholder in expected: if placeholder not in content: self._template_issues.append(( 'warning', 'placeholder_hint', f"{svg_file.name}: missing conventional placeholder {placeholder} " "(declare 'placeholders:' frontmatter in design_spec.md to silence)", )) @staticmethod def _extract_frontmatter_placeholders(spec_text: str) -> Dict[str, Tuple[str, ...]]: """Read the optional ``placeholders:`` map from design_spec.md frontmatter. Shape: .. code-block:: yaml placeholders: 01_cover: ["{{TITLE}}", "{{BRAND_LOGO}}"] 03_content: [] # explicitly assert "no expectation" 03a_content_two_col: # variant-specific override - "{{LEFT_TITLE}}" - "{{RIGHT_TITLE}}" Each key is a stem (full filename without ``.svg``) or page-type prefix (``01_cover``). An empty list silences the default convention for that stem; a populated list replaces the default. Stems / prefixes not listed fall back to ``DEFAULT_PLACEHOLDER_CONVENTION``. We parse with PyYAML when available; otherwise we fall back to a minimal regex that handles the documented shape. """ if not spec_text.startswith("---\n"): return {} end = spec_text.find("\n---\n", 4) if end == -1: return {} block = spec_text[4:end] try: import yaml # type: ignore except ImportError: return _parse_placeholders_fallback(block) try: data = yaml.safe_load(block) or {} except yaml.YAMLError: return {} if not isinstance(data, dict): return {} raw = data.get("placeholders") if not isinstance(raw, dict): return {} out: Dict[str, Tuple[str, ...]] = {} for stem, value in raw.items(): if not isinstance(stem, str): continue if isinstance(value, list): out[stem] = tuple(str(v) for v in value) elif value is None: out[stem] = () return out @staticmethod def _extract_spec_roster(spec_text: str) -> List[str]: """Best-effort: extract the page roster from design_spec.md. Templates do not share a uniform section index for the roster — the personality-only skeleton puts it at §V "Page Roster"; legacy specs use §VI "Page Roster" or bury filenames under §VII "Page Types" as ``### N. Cover Page (01_cover.svg)``. We match by title (any roman index), then fall back to scanning the whole document for any backtick-wrapped ``.svg`` reference. Returns the deduplicated stem list in document order. Empty result means we can't determine the roster confidently — caller should treat that as "skip orphan/missing checks", not as "no pages declared". """ # Pass 1: explicit roster section, any roman numeral. section = re.search( r"^##\s+[IVX]+\.\s+(?:Page Roster|Page Structure|Pages|Page Types)\b.*?(?=^##\s+|\Z)", spec_text, re.MULTILINE | re.DOTALL | re.IGNORECASE, ) scope = section.group(0) if section else None # Pass 2: full document. We *only* trust this scan when the explicit # roster scan came up empty (no `.svg` references inside it) — # otherwise the explicit section's deliberate roster wins over loose # mentions elsewhere. if scope and re.search(r"[`$][0-9A-Za-z_]+\.svg[`$]", scope): text = scope else: text = spec_text stems: List[str] = [] seen: set = set() # Accept backtick-quoted (`01_cover.svg`) and parenthesized # (01_cover.svg) forms — existing specs use either. svg_ref_re = re.compile(r"[`$]([0-9A-Za-z_]+\.svg)[`$]") for match in svg_ref_re.finditer(text): stem = match.group(1)[:-4] if stem in seen or not re.match(r"^\d", stem): continue seen.add(stem) stems.append(stem) # If the explicit §VI scan listed bare stems (without .svg), accept # those as fallback — but only when they were inside that section. if not stems and scope: for match in re.finditer(r"`([0-9]{2}[a-z]?_[A-Za-z0-9_]+)`", scope): stem = match.group(1) if stem in seen: continue seen.add(stem) stems.append(stem) return stems @classmethod def _lookup_template_contract( cls, stem: str, *, overrides: Dict[str, Tuple[str, ...]] | None = None, ) -> Tuple[str, ...] | None: """Resolve a SVG stem to its expected placeholder set. Resolution order, first hit wins: 1. ``overrides[stem]`` — frontmatter entry for the exact filename 2. ``overrides[]`` — frontmatter entry for the variant's parent type (e.g. ``03_content`` for ``03a_content_two_col``) 3. ``DEFAULT_PLACEHOLDER_CONVENTION[]`` Returns ``None`` for stems with no matching convention or override — e.g. extension pages like ``05_section_break``. ``()`` (empty tuple) is a valid value meaning "no expected placeholders" — used to explicitly silence the default convention. """ overrides = overrides or {} if stem in overrides: return overrides[stem] # Variant convention: ?_; strip the letter to find # the parent type prefix, e.g. "03a_content_two_col" -> "03_content". match = re.match(r"^(\d{2})([a-z])?_([a-z]+)", stem) if not match: return None num, _letter, kind = match.groups() key = f"{num}_{kind}" if key in overrides: return overrides[key] return cls.DEFAULT_PLACEHOLDER_CONVENTION.get(key) def _print_result(self, result: Dict): """Print check result for a single file""" if result['passed']: if result['warnings']: icon = "[WARN]" status = "Passed (with warnings)" else: icon = "[OK]" status = "Passed" else: icon = "[ERROR]" status = "Failed" print(f"{icon} {result['file']} - {status}") # Display basic info if result['info']: info_items = [] if 'viewbox' in result['info']: info_items.append(f"viewBox: {result['info']['viewbox']}") if info_items: print(f" {' | '.join(info_items)}") # Display errors if result['errors']: for error in result['errors']: print(f" [ERROR] {error}") # Display warnings if result['warnings']: for warning in result['warnings'][:2]: # Only show first 2 warnings print(f" [WARN] {warning}") if len(result['warnings']) > 2: print(f" ... and {len(result['warnings']) - 2} more warning(s)") print() def _check_icon_usage(self, content: str, svg_path: Path, result: Dict) -> None: """Warn when a page references no icons despite spec_lock locking an inventory, and feed the deck-level zero-icons gate. Section / cover / closing pages legitimately ship without icons, so a single icon-less page is only a soft per-page warning. The hard failure is deck-wide (every page icon-less while an inventory is locked) and is emitted in :py:meth:`_print_icon_summary`. """ lock = self._get_spec_lock(svg_path) if not lock: return icons = lock.get('icons') or {} library = (icons.get('library') or '').strip().lower() inventory = (icons.get('inventory') or '').strip().lower() _empty = ('', 'none', '(none)', '-', 'n/a') declared = library not in _empty and inventory not in _empty if not declared: return self._icon_inventory_declared = True count = len(re.findall(r']*\bdata-icon\s*=', content)) result.setdefault('info', {})['icon_count'] = count self._deck_icon_total += count if count == 0: self._pages_missing_icons.append(svg_path.name) result['warnings'].append( f"spec_lock locks an icon library ({icons.get('library')}) + inventory " f"but this page references no — content pages should place " f"1-3 inventory icons (cover / section / closing pages may omit)" ) def _print_icon_summary(self): """Deck-level icon-usage gate. Declared inventory + zero icons deck-wide is a hard error (the locked icons are unused and the deck renders flat). Bumps ``summary['errors']`` so the process exits non-zero, mirroring ``_print_animation_summary``. """ if not self._icon_inventory_declared: return if self._deck_icon_total == 0: self.summary['errors'] += 1 print("\n[ERROR] Icon usage: spec_lock locks an icon library + inventory, " "but the deck authors ZERO across all pages.") print(" The locked icons are unused — the deck renders flat / icon-less.") print(" Fix: in the executor, place inventory icons on content pages " "(KPI / list / process / comparison layouts especially), then re-run.") elif self._pages_missing_icons: print(f"\n[INFO] Icon usage: {self._deck_icon_total} icon(s) deck-wide; " f"{len(self._pages_missing_icons)} page(s) reference none " f"({', '.join(self._pages_missing_icons)}).") print(" Cover / section / closing pages may legitimately omit icons; " "verify dense content pages aren't missing them.") def _check_graphic_richness(self, content: str, result: Dict) -> None: """Tally graphic primitives per page for the deck-level flat-deck gate. Counts //// — the elements that actually draw a diagram, chart, figure, or photo. is included because node / bubble / venn / timeline diagrams are built from circles (excluding it false-flagged a 21-circle roadmap as "no figure"). and stay excluded: they are layout cards, backgrounds, and dividers, and a deck built only from them is the "text on rectangles" look this catches. Icons don't count here — they have their own gate. Per-page nudges stay soft; the hard gate is deck-wide. """ g = (len(re.findall(r' float: """Approximate advance width of one char, in em (× font-size).""" o = ord(ch) if (0x2E80 <= o <= 0x9FFF or 0xF900 <= o <= 0xFAFF or 0xFF00 <= o <= 0xFF60 or 0x3000 <= o <= 0x303F): return 1.0 # CJK ideographs, kana, fullwidth forms, CJK punct if ch == ' ': return 0.30 if ch.isdigit(): return 0.60 if ch.isupper(): return 0.72 if o < 0x2E80: return 0.52 # latin lowercase / halfwidth punctuation return 0.70 @classmethod def _est_text_w(cls, s: str, fs: float) -> float: return sum(cls._est_char_w(c) for c in s) * fs @staticmethod def _f(value, default=None): try: return float(str(value).strip()) except (TypeError, ValueError): return default def _collect_geometry(self, root) -> Tuple[list, list]: """Walk the tree collecting estimated text boxes and exact icon boxes. Only translate() transforms are followed; any other transform makes coordinates unknowable without a full matrix engine, so that subtree is skipped (better silent than wrong). Boxes are dicts: {x0, y0, x1, y1, fs, label, exact_left} — exact_left marks a start-anchored text whose left edge is exact (only the right edge is estimated). """ texts: list = [] icons: list = [] translate_re = re.compile( r'^\s*translate$\s*(-?[\d.]+)(?:[\s,]+(-?[\d.]+))?\s*$\s*$') skip_tags = {'defs', 'clipPath', 'marker', 'symbol', 'pattern', 'mask', 'linearGradient', 'radialGradient', 'filter'} def local(tag): return tag.split('}')[-1] def effective_opacity(el, inherited: float) -> float: op = self._f(el.get('opacity'), 1.0) fop = self._f(el.get('fill-opacity'), 1.0) return inherited * min(op if op is not None else 1.0, fop if fop is not None else 1.0) def line_box(runs: list, x: float, y: float, anchor: str, tx: float, ty: float): """One box per visual line. runs = [(text, fs), ...] flowed left-to-right; the anchor positions the line's TOTAL width, which is how SVG actually lays out a with styled inline tspans (e.g. $4.2B (35%) renders as one right-aligned line, not two stacked runs).""" w = sum(self._est_text_w(t, f) for t, f in runs) fs = max(f for _, f in runs) if anchor == 'middle': x0 = x - w / 2 elif anchor == 'end': x0 = x - w else: x0 = x joined = ''.join(t for t, _ in runs) label = joined if len(joined) <= 12 else joined[:12] + '…' return { 'x0': x0 + tx, 'y0': y - 0.76 * fs + ty, 'x1': x0 + w + tx, 'y1': y + 0.22 * fs + ty, 'fs': fs, 'label': label, 'baseline': y + ty, 'anchor_x': x + tx, 'exact_left': anchor not in ('middle', 'end'), } def collect_text(el, tx, ty, inh_fs, inh_anchor, inh_op): fs = self._f(el.get('font-size'), inh_fs) or 16.0 anchor = el.get('text-anchor') or inh_anchor if effective_opacity(el, inh_op) < 0.35 or el.get('fill') == 'none': return x = self._f(el.get('x')) y = self._f(el.get('y')) if x is None or y is None: return # Group content into visual lines: a tspan with explicit x/y or a # non-zero dy starts a new line; anything else (leading text, # styled inline tspans, tspan tails) flows onto the current line. cur_x, cur_y = x, y cur_runs: list = [] def flush(): nonlocal cur_runs if any(t for t, _ in cur_runs): texts.append(line_box(cur_runs, cur_x, cur_y, anchor, tx, ty)) cur_runs = [] own = (el.text or '').strip() if own: cur_runs.append((own, fs)) for ts in el: if local(ts.tag) != 'tspan': continue tfs = self._f(ts.get('font-size'), fs) or fs tsx = self._f(ts.get('x')) tsy = self._f(ts.get('y')) dy_raw = (ts.get('dy') or '').strip() if dy_raw.endswith('em'): dy = (self._f(dy_raw[:-2], 0.0) or 0.0) * tfs else: dy = self._f(dy_raw, 0.0) or 0.0 if tsx is not None or tsy is not None or dy: flush() if tsx is not None: cur_x = tsx if tsy is not None: cur_y = tsy else: cur_y += dy t = ''.join(ts.itertext()).strip() if t: cur_runs.append((t, tfs)) tail = (ts.tail or '').strip() if tail: cur_runs.append((tail, fs)) flush() def walk(el, tx, ty, inh_fs, inh_anchor, inh_op): tag = local(el.tag) if tag in skip_tags: return tr = el.get('transform') if tr: m = translate_re.match(tr) if not m: return # rotate/scale/matrix — coords unknown, skip subtree tx += float(m.group(1)) ty += float(m.group(2) or 0) if tag == 'text': collect_text(el, tx, ty, inh_fs, inh_anchor, inh_op) return if tag == 'use' and el.get('data-icon'): x, y = self._f(el.get('x')), self._f(el.get('y')) w, h = self._f(el.get('width')), self._f(el.get('height')) if None not in (x, y, w, h): icons.append({ 'x0': x + tx, 'y0': y + ty, 'x1': x + w + tx, 'y1': y + h + ty, 'label': el.get('data-icon'), }) return inh_fs = self._f(el.get('font-size'), inh_fs) inh_anchor = el.get('text-anchor') or inh_anchor inh_op = effective_opacity(el, inh_op) if inh_op < 0.35: return for c in el: walk(c, tx, ty, inh_fs, inh_anchor, inh_op) walk(root, 0.0, 0.0, None, 'start', 1.0) return texts, icons @staticmethod def _box_intersection(a: Dict, b: Dict) -> Tuple[float, float]: iw = min(a['x1'], b['x1']) - max(a['x0'], b['x0']) ih = min(a['y1'], b['y1']) - max(a['y0'], b['y0']) return max(iw, 0.0), max(ih, 0.0) def _check_geometry(self, content: str, result: Dict) -> None: """Detect text/icon overlaps and off-canvas elements (estimated boxes).""" try: root = ET.fromstring(content) except ET.ParseError: return # already reported by the well-formedness check vb = re.search(r'viewBox="([^"]+)"', content) if not vb: return parts = vb.group(1).split() if len(parts) != 4: return canvas_w, canvas_h = float(parts[2]), float(parts[3]) texts, icons = self._collect_geometry(root) errors: List[str] = [] warnings: List[str] = [] # 1. Off-canvas: baseline / anchor coordinates are exact → error; # right-edge overflow relies on the width estimate → warning. for t in texts: if t['baseline'] > canvas_h + 1: errors.append( f"text \"{t['label']}\" baseline y={t['baseline']:.0f} is below " f"the canvas (height {canvas_h:.0f}) — it will be clipped") elif t['exact_left'] and t['anchor_x'] > canvas_w + 1: errors.append( f"text \"{t['label']}\" starts at x={t['anchor_x']:.0f}, beyond " f"the canvas (width {canvas_w:.0f})") elif t['x1'] > canvas_w + 0.6 * t['fs']: warnings.append( f"text \"{t['label']}\" likely overflows the right canvas edge " f"(estimated right {t['x1']:.0f} > {canvas_w:.0f})") elif t['x0'] < -0.6 * t['fs']: warnings.append( f"text \"{t['label']}\" likely overflows the left canvas edge " f"(estimated left {t['x0']:.0f} < 0)") for ic in icons: if ic['x0'] >= canvas_w or ic['y0'] >= canvas_h or ic['x1'] <= 0 or ic['y1'] <= 0: errors.append( f"icon {ic['label']} at ({ic['x0']:.0f},{ic['y0']:.0f}) is entirely " f"outside the canvas") elif (ic['x0'] < -2 or ic['y0'] < -2 or ic['x1'] > canvas_w + 2 or ic['y1'] > canvas_h + 2): warnings.append( f"icon {ic['label']} extends beyond the canvas edge " f"({ic['x0']:.0f},{ic['y0']:.0f})-({ic['x1']:.0f},{ic['y1']:.0f})") # 2. Text-on-text collisions. Adjacent lines at normal line-height never # intersect (box height ≈ 0.98×fs, line gap ≥ 1.15×fs), so any real # intersection means two runs share the same space. for i in range(len(texts)): for j in range(i + 1, len(texts)): a, b = texts[i], texts[j] iw, ih = self._box_intersection(a, b) if iw <= 0 or ih <= 0: continue min_fs = min(a['fs'], b['fs']) min_h = min(a['y1'] - a['y0'], b['y1'] - b['y0']) # Same-baseline runs are horizontally sequenced by design — # any real horizontal overlap means the left run's width was # under-budgeted (the classic big-numeral-plus-caption bug), # regardless of how small the overlap area ratio is. same_line = abs(a['baseline'] - b['baseline']) < 0.5 * min_fs if iw < 0.6 * min_fs or ih < 0.45 * min_h: continue # graze from estimation noise — ignore min_area = min((a['x1'] - a['x0']) * (a['y1'] - a['y0']), (b['x1'] - b['x0']) * (b['y1'] - b['y0'])) ratio = (iw * ih) / min_area if min_area > 0 else 0 # Text-text overlaps cap at WARNING: the width estimate can't # tell a crash from a deliberate graze (quadrant captions, # word clouds, tightly-kerned numeral+suffix pairs all overlap # estimated boxes legitimately). The warning carries exact # coordinates so the render-acceptance pass knows which spot # to eyeball; icon-on-text and off-canvas below stay errors # because their geometry is exact. if ratio >= 0.15 or same_line: warnings.append( f"text \"{a['label']}\" and \"{b['label']}\" overlap " f"(~{ratio * 100:.0f}% of the smaller run, around " f"({max(a['x0'], b['x0']):.0f},{max(a['y0'], b['y0']):.0f})) " f"— eyeball this spot at render acceptance") # 3. Icon-on-text collisions. Icon geometry is exact; the text estimate # only inflates the right edge, so a large covered fraction of the # icon is a reliable signal. for ic in icons: icon_area = (ic['x1'] - ic['x0']) * (ic['y1'] - ic['y0']) if icon_area <= 0: continue for t in texts: iw, ih = self._box_intersection(ic, t) if iw <= 0 or ih <= 0: continue ratio = (iw * ih) / icon_area msg = (f"icon {ic['label']} overlaps text \"{t['label']}\" " f"(~{ratio * 100:.0f}% of the icon covered, at " f"({ic['x0']:.0f},{ic['y0']:.0f}))") if ratio >= 0.55: errors.append(msg) elif ratio >= 0.25: warnings.append(msg) # 4. Icon-on-icon collisions (both exact) — always at least a warning. for i in range(len(icons)): for j in range(i + 1, len(icons)): a, b = icons[i], icons[j] iw, ih = self._box_intersection(a, b) if iw <= 0 or ih <= 0: continue min_area = min((a['x1'] - a['x0']) * (a['y1'] - a['y0']), (b['x1'] - b['x0']) * (b['y1'] - b['y0'])) if min_area > 0 and (iw * ih) / min_area >= 0.3: warnings.append( f"icons {a['label']} and {b['label']} overlap at " f"({max(a['x0'], b['x0']):.0f},{max(a['y0'], b['y0']):.0f})") for bucket, dest in ((errors, result['errors']), (warnings, result['warnings'])): shown = bucket[:self._GEOM_MAX_REPORTS] dest.extend(f"Geometry: {m}" for m in shown) if len(bucket) > len(shown): dest.append( f"Geometry: ... and {len(bucket) - len(shown)} more " f"similar issue(s) on this page") def _print_graphic_summary(self): """Deck-level flat-deck gate. Two hard-error bars for a text-heavy content deck (>=6 pages): - ZERO figure primitives deck-wide → the wall-of-text-boxes pathology. - >=60% of pages carry no figure → mostly-flat (a few token diagrams don't rescue a deck that is otherwise text + boxes + icons). Between 40% and 60% is a soft INFO nudge. Short decks (<6 pages) are exempt to avoid false-failing minimalist / teaser decks. Figure = path/polygon/polyline/circle/image (see _check_graphic_richness). """ pages = self._deck_page_count if pages < 6: return avg_text = self._deck_text_total / pages no_g = len(self._pages_no_graphic) frac = no_g / pages _fix = ("Map content shape -> a visual: comparison->columns/quadrant, " "timeline->process, share->donut, trend->line, ≥3 data points->chart " "(adapt a templates/charts/ template or draw it), add diagrams/imagery, " "then re-run.") if self._deck_graphic_total == 0 and avg_text >= 10: self.summary['errors'] += 1 print(f"\n[ERROR] Visual richness: {pages} text-heavy pages but ZERO " "diagram/figure primitives (////" ") deck-wide — the deck is text on rectangles.") print(" " + _fix) elif frac >= 0.6 and avg_text >= 10: self.summary['errors'] += 1 print(f"\n[ERROR] Visual richness: {no_g}/{pages} pages carry no diagram/" "figure (////) — the deck is " "mostly text + boxes; a few token diagrams don't cover a data/analysis " "deck.") print(" " + _fix) elif frac >= 0.4: print(f"\n[INFO] Visual richness: {no_g}/{pages} pages have no diagram/figure " "(////) — verify dense content " "pages aren't just text + boxes.") def print_summary(self): """Print check summary""" print("=" * 80) print("[SUMMARY] Check Summary") print("=" * 80) print(f"\nTotal files: {self.summary['total']}") print( f" [OK] Fully passed: {self.summary['passed']} ({self._percentage(self.summary['passed'])}%)") print( f" [WARN] With warnings: {self.summary['warnings']} ({self._percentage(self.summary['warnings'])}%)") print( f" [ERROR] With errors: {self.summary['errors']} ({self._percentage(self.summary['errors'])}%)") if self.issue_types: print(f"\nIssue categories:") for issue_type, count in sorted(self.issue_types.items(), key=lambda x: x[1], reverse=True): print(f" {issue_type}: {count}") # spec_lock drift aggregation (only printed when a lock was found) self._print_drift_summary() # Template-mode aggregation (orphan/missing roster + placeholder hints) self._print_template_summary() # Animation config aggregation. self._print_animation_summary() # Deck-level icon-usage gate (declared inventory but icon-less deck). self._print_icon_summary() # Deck-level flat-deck gate (text-on-rectangles, no diagrams/figures). self._print_graphic_summary() # Fix suggestions if self.summary['errors'] > 0 or self.summary['warnings'] > 0: print(f"\n[TIP] Common fixes:") print(f" 1. XML well-formedness: write typography as raw Unicode (—, ©, →, NBSP); escape XML reserved chars as & < > " ' — never use HTML named entities like — ©") print(f" 2. viewBox issues: Ensure consistency with canvas format (see references/canvas-formats.md)") print(f" 3. foreignObject: Use + for manual line breaks") print(f" 4. Font issues: end every font-family stack with a PPT-safe family (e.g. Microsoft YaHei / Arial / Consolas)") def _print_animation_summary(self): """Print animations.json validation issues if present.""" if not self._animation_issues: return errors = [item for item in self._animation_issues if item[0] == 'error'] warnings = [item for item in self._animation_issues if item[0] == 'warning'] self.summary['errors'] += len(errors) self.summary['warnings'] += len(warnings) for severity, _msg in self._animation_issues: self.issue_types[f'animation_config_{severity}'] += 1 print("\n[ANIMATION] animations.json checks") for _severity, msg in errors: print(f" [ERROR] {msg}") for _severity, msg in warnings: print(f" [WARN] {msg}") def _print_template_summary(self): """Aggregate template-mode roster / placeholder issues at the bottom. Errors land under the ``errors`` summary count (so the exit signal from ``main`` agrees), warnings under ``warnings``. Both are listed per file so the user can act on them directly. """ if not self._template_issues: return errors = [item for item in self._template_issues if item[0] == 'error'] warnings = [item for item in self._template_issues if item[0] == 'warning'] # Mirror into the global summary so downstream "0 errors" gates honor # template-mode issues. self.summary['errors'] += len(errors) self.summary['warnings'] += len(warnings) for severity, kind, _msg in self._template_issues: self.issue_types[f"template_{kind}"] += 1 print("\n[TEMPLATE] Template mode checks") if errors: print(f" Errors ({len(errors)}):") for _sev, kind, msg in errors: print(f" [{kind}] {msg}") if warnings: print(f" Warnings ({len(warnings)}):") for _sev, kind, msg in warnings: print(f" [{kind}] {msg}") if not errors: print(" No structural roster issues. Placeholder hints above are advisory only;") print(" declare 'placeholders:' frontmatter in design_spec.md to silence them.") def _print_drift_summary(self): """Print spec_lock drift aggregation if any was observed. Values are sorted by file-count descending so frequent drift surfaces first. Frequent drift usually means spec_lock.md is missing entries the Strategist should have included; rare drift is more likely actual Executor drift and warrants SVG review. """ if not self._lock_seen: return has_drift = any(self._drift_summary[cat] for cat in self._drift_summary) if not has_drift: print("\n[OK] spec_lock drift: none — all colors, fonts, and sizes are anchored to spec_lock.md") return print("\nspec_lock drift — values used outside spec_lock.md:") labels = [('colors', 'Colors'), ('fonts', 'Font families'), ('sizes', 'Font sizes')] for category, label in labels: items = self._drift_summary.get(category, {}) if not items: continue entries = sorted(items.items(), key=lambda x: (-len(x[1]), x[0])) print(f" {label}:") for val, files in entries: n = len(files) suffix = "file" if n == 1 else "files" print(f" {val} ({n} {suffix})") print( "Tip: frequent out-of-lock values usually mean spec_lock.md is missing\n" " entries — extend the lock (scripts/update_spec.py or manual edit).\n" " Rare ones are likely Executor drift — review the affected SVGs." ) def _percentage(self, count: int) -> int: """Calculate percentage""" if self.summary['total'] == 0: return 0 return int(count / self.summary['total'] * 100) def export_report(self, output_file: str = 'svg_quality_report.txt'): """Export check report""" with open(output_file, 'w', encoding='utf-8') as f: f.write("PPT Master SVG Quality Check Report\n") f.write("=" * 80 + "\n\n") for result in self.results: status = "[OK] Passed" if result['passed'] else "[ERROR] Failed" f.write(f"{status} - {result['file']}\n") f.write(f"Path: {result.get('path', 'N/A')}\n") if result['info']: f.write(f"Info: {result['info']}\n") if result['errors']: f.write(f"\nErrors:\n") for error in result['errors']: f.write(f" - {error}\n") if result['warnings']: f.write(f"\nWarnings:\n") for warning in result['warnings']: f.write(f" - {warning}\n") f.write("\n" + "-" * 80 + "\n\n") # Write summary f.write("\n" + "=" * 80 + "\n") f.write("Check Summary\n") f.write("=" * 80 + "\n\n") f.write(f"Total files: {self.summary['total']}\n") f.write(f"Fully passed: {self.summary['passed']}\n") f.write(f"With warnings: {self.summary['warnings']}\n") f.write(f"With errors: {self.summary['errors']}\n") print(f"\n[REPORT] Check report exported: {output_file}") def print_usage() -> None: """Print CLI usage information.""" print("PPT Master - SVG Quality Check Tool\n") print("Usage:") print(" python3 scripts/svg_quality_checker.py ") print(" python3 scripts/svg_quality_checker.py ") print(" python3 scripts/svg_quality_checker.py --template-mode") print(" python3 scripts/svg_quality_checker.py --all examples") print("\nExamples:") print(" python3 scripts/svg_quality_checker.py examples/project/svg_output/slide_01.svg") print(" python3 scripts/svg_quality_checker.py examples/project/svg_output") print(" python3 scripts/svg_quality_checker.py examples/project") print(" python3 scripts/svg_quality_checker.py templates/layouts/academic_defense --template-mode") print(" python3 scripts/svg_quality_checker.py templates/decks/招商银行 --template-mode") print("\nOptions:") print(" --format Expected canvas format") print(" --template-mode Validate a templates/{layouts,decks}/ directory:") print(" glob *.svg directly, skip spec_lock checks,") print(" enforce roster ↔ design_spec.md Page Roster consistency,") print(" and emit advisory placeholder-convention warnings.") def main() -> None: """Run the CLI entry point.""" if len(sys.argv) < 2: print_usage() sys.exit(0) if sys.argv[1] in {"-h", "--help", "help"}: print_usage() sys.exit(0) if sys.argv[1].startswith("--") and sys.argv[1] not in {"--all"}: print(f"[ERROR] Missing target before option: {sys.argv[1]}") print_usage() sys.exit(1) template_mode = '--template-mode' in sys.argv checker = SVGQualityChecker(template_mode=template_mode) # Parse arguments target = sys.argv[1] expected_format = None if '--format' in sys.argv: idx = sys.argv.index('--format') if idx + 1 < len(sys.argv): expected_format = sys.argv[idx + 1] # Execute check if target == '--all': # Check all example projects base_dir = sys.argv[2] if len(sys.argv) > 2 else 'examples' from project_utils import find_all_projects projects = find_all_projects(base_dir) for project in projects: print(f"\n{'=' * 80}") print(f"Checking project: {project.name}") print('=' * 80) checker.check_directory(str(project)) else: checker.check_directory(target, expected_format) # Print summary checker.print_summary() # Export report (if specified) if '--export' in sys.argv: output_file = 'svg_quality_report.txt' if '--output' in sys.argv: idx = sys.argv.index('--output') if idx + 1 < len(sys.argv): output_file = sys.argv[idx + 1] checker.export_report(output_file) # Return exit code if checker.summary['errors'] > 0: sys.exit(1) else: sys.exit(0) if __name__ == '__main__': main()