#!/usr/bin/env python3 """ PPT Master - SVG Quality Check Tool Checks whether SVG files comply with project technical specifications. Usage: python3 scripts/svg_quality_checker.py python3 scripts/svg_quality_checker.py python3 scripts/svg_quality_checker.py --all examples """ import sys try: # zcbot: Windows GBK 控制台兼容,避免 emoji/© 等触发 UnicodeEncodeError sys.stdout.reconfigure(encoding="utf-8", errors="replace") sys.stderr.reconfigure(encoding="utf-8", errors="replace") except Exception: pass import re import json import html from pathlib import Path from typing import List, Dict, Tuple from collections import Counter, defaultdict from xml.etree import ElementTree as ET try: from project_utils import CANVAS_FORMATS from error_helper import ErrorHelper except ImportError: print("Warning: Unable to import dependency modules") CANVAS_FORMATS = {} ErrorHelper = None try: from update_spec import parse_lock as _parse_spec_lock except ImportError: _parse_spec_lock = None # spec_lock drift check will be skipped try: from svg_to_pptx.animation_config import ( load_animation_config as _load_animation_config, validate_animation_config as _validate_animation_config, ) except ImportError: _load_animation_config = None _validate_animation_config = None HEX_VALUE_RE = re.compile(r"#[0-9A-Fa-f]{3,8}") SVG_NS = "http://www.w3.org/2000/svg" # Ramp envelope for font-size drift detection. # From design_spec_reference.md §IV — Font Size Hierarchy: the ramp spans # from page-number floor (0.5x body) to cover-title ceiling (5.0x body). # Intermediate px values within this envelope are permitted per # executor-base.md §2.1 ("Executor may use an intermediate size ... provided # the size's ratio to body falls within the corresponding role's band"); only # values outside every band — i.e. outside this envelope — are drift. RAMP_MIN_RATIO = 0.5 RAMP_MAX_RATIO = 5.0 # Modes / visual styles that legitimately use unbounded hero / poster type # (huge cover numerals, act dividers, single-number reveals). For these the # size-drift upper bound is dropped — the oversize is the design, not Executor # drift. The lower bound still applies. POSTER_SIZE_MODES = {'showcase'} POSTER_SIZE_STYLES = {'zine'} def _design_spec_is_brand(spec_path: Path) -> bool: """Return True when a design_spec.md frontmatter declares ``kind: brand``. Lightweight detector that does not require PyYAML — scans only the frontmatter block (``---`` delimited) for a ``kind:`` line whose value contains ``brand``. Used by ``check_directory`` to skip SVG validation on brand-only template directories. """ try: text = spec_path.read_text(encoding='utf-8') except OSError: return False if not text.startswith('---\n'): return False end = text.find('\n---\n', 4) if end == -1: return False fm_block = text[4:end] for line in fm_block.splitlines(): stripped = line.strip() if stripped.startswith('kind:'): value = stripped.split(':', 1)[1].strip().strip('"\'') return value == 'brand' return False def _parse_placeholders_fallback(block: str) -> Dict[str, Tuple[str, ...]]: """Tiny YAML-free reader for the documented ``placeholders:`` shape. Used only when PyYAML is unavailable. Recognized lines (indentation-aware, two-space indent assumed): .. code-block:: yaml placeholders: 01_cover: ["{{TITLE}}", "{{LOGO}}"] 03_content: [] 03a_content_two_col: - "{{LEFT_TITLE}}" - "{{RIGHT_TITLE}}" Anything outside this minimal grammar is silently skipped — designers who rely on advanced YAML should install pyyaml. """ out: Dict[str, Tuple[str, ...]] = {} inline_re = re.compile( r"^\s{2}([A-Za-z0-9_]+)\s*:\s*\[(.*)\]\s*$" ) empty_re = re.compile(r"^\s{2}([A-Za-z0-9_]+)\s*:\s*\[\s*\]\s*$") block_header_re = re.compile(r"^\s{2}([A-Za-z0-9_]+)\s*:\s*$") item_re = re.compile(r'^\s{4}-\s*"?([^"]+)"?\s*$') in_section = False current_block_key: str | None = None current_items: List[str] = [] def _flush_block() -> None: nonlocal current_block_key, current_items if current_block_key is not None: out[current_block_key] = tuple(current_items) current_block_key = None current_items = [] for line in block.splitlines(): if line.startswith("placeholders:"): in_section = True continue if not in_section: continue # End of section: dedent to a non-key line. if line and not line.startswith(" "): _flush_block() in_section = False continue if current_block_key is not None: m = item_re.match(line) if m: value = m.group(1).strip().strip('"').strip("'") if value: current_items.append(value) continue # Block ended. _flush_block() if empty_re.match(line): key = empty_re.match(line).group(1) out[key] = () continue m = inline_re.match(line) if m: key, raw = m.group(1), m.group(2) items = [p.strip().strip('"').strip("'") for p in raw.split(",")] out[key] = tuple(item for item in items if item) continue m = block_header_re.match(line) if m: current_block_key = m.group(1) current_items = [] continue _flush_block() return out class SVGQualityChecker: """SVG quality checker""" # Default placeholder convention per page-type prefix. This is a *hint*, # not a hard contract: templates may define their own placeholder vocabulary # via `placeholders:` in design_spec.md frontmatter (see # references/template-designer.md §4). Missing default placeholders surface # as warnings, never errors — designers may legitimately swap # `{{THANK_YOU}}` for `{{CLOSING_MESSAGE}}`, omit `{{DATE}}` when irrelevant, # or build content variants with bespoke slot vocabularies. # # Variants reuse the parent type's expectation (`03a_content_two_col.svg` # is matched by the same `03_content` rules as `03_content.svg`). DEFAULT_PLACEHOLDER_CONVENTION = { "01_cover": ("{{TITLE}}",), # only the title is universally expected "02_chapter": ("{{CHAPTER_TITLE}}",), "02_toc": (), # TOC layouts vary too widely to assert anything "03_content": ("{{PAGE_TITLE}}",), "04_ending": (), # ending pages legitimately use varied vocabularies } def __init__(self, *, template_mode: bool = False): self.template_mode = template_mode self.results = [] self.summary = { 'total': 0, 'passed': 0, 'warnings': 0, 'errors': 0 } self.issue_types = defaultdict(int) # spec_lock drift state (populated only when _parse_spec_lock is available # and a spec_lock.md is found near the SVG) self._lock_cache: Dict[Path, Dict] = {} self._drift_summary: Dict[str, Dict[str, set]] = { 'colors': defaultdict(set), 'fonts': defaultdict(set), 'sizes': defaultdict(set), } self._lock_seen = False # True once we locate at least one spec_lock.md self._source_manifest_cache: Dict[Path, Dict] = {} # Template-mode aggregation (populated by check_directory when # template_mode=True). Each entry is (severity, kind, message) where # severity is 'error' or 'warning'. Printed in print_summary. self._template_issues: List[Tuple[str, str, str]] = [] self._animation_issues: List[Tuple[str, str]] = [] # Icon-usage aggregation (non-template mode). When spec_lock declares an # icon library + inventory, the strategist intends the deck to use icons. # The native exporter and finalize both expand from the # library, so an authored placeholder reliably becomes a real icon — but # only if the executor writes one. A deck that locks an inventory yet # authors ZERO placeholders ships flat/icon-less; this is the missing # feedback loop that catches the executor silently skipping icons. self._icon_inventory_declared = False # any page's spec_lock locked icons self._deck_icon_total = 0 # total across the deck self._pages_missing_icons: List[str] = [] # declared-but-icon-less pages def check_file(self, svg_file: str, expected_format: str = None) -> Dict: """ Check a single SVG file Args: svg_file: SVG file path expected_format: Expected canvas format (e.g., 'ppt169') Returns: Check result dictionary """ svg_path = Path(svg_file) if not svg_path.exists(): return { 'file': str(svg_file), 'exists': False, 'errors': ['File does not exist'], 'warnings': [], 'passed': False } result = { 'file': svg_path.name, 'path': str(svg_path), 'exists': True, 'errors': [], 'warnings': [], 'info': {}, 'passed': True } try: with open(svg_path, 'r', encoding='utf-8') as f: content = f.read() # 0. Check XML well-formedness — every other check assumes the file # is valid XML. Bail early on failure so the regex-based checks # below don't produce misleading errors on a broken document. if self._check_xml_well_formed(content, result): # 1. Check viewBox self._check_viewbox(content, result, expected_format) # 2. Check forbidden elements self._check_forbidden_elements(content, result) # 3. Check font-size values self._check_font_size_values(content, result) # 4. Check fonts self._check_fonts(content, result) # 5. Check width/height consistency with viewBox self._check_dimensions(content, result) # 6. Check text wrapping methods self._check_text_elements(content, result) # 7. Check image references (file existence and resolution) self._check_image_references(content, svg_path, result) # 8. Check object-level animation anchor quality. self._check_animation_group_ids(content, result) # 8b. Check elements declare a PPTX preset. self._check_pattern_fills(content, result) # 9. Check spec_lock drift (colors / font-family / font-size). # Templates do not ship a spec_lock.md, so skip in template # mode to avoid noise. if not self.template_mode: self._check_spec_lock_drift(content, svg_path, result) # 10. Check web-sourced image attribution. Templates don't carry # image_sources.json; skip in template mode. if not self.template_mode: self._check_sourced_image_attribution(content, svg_path, result) # 11. Check declared-vs-used icons. Templates don't ship a # spec_lock.md; skip in template mode. if not self.template_mode: self._check_icon_usage(content, svg_path, result) # Determine pass/fail result['passed'] = len(result['errors']) == 0 except Exception as e: result['errors'].append(f"Failed to read file: {e}") result['passed'] = False # Update statistics self.summary['total'] += 1 if result['passed']: if result['warnings']: self.summary['warnings'] += 1 else: self.summary['passed'] += 1 else: self.summary['errors'] += 1 # Categorize issue types for error in result['errors']: self.issue_types[self._categorize_issue(error)] += 1 self.results.append(result) return result def _check_xml_well_formed(self, content: str, result: Dict) -> bool: """Check that the SVG content parses as well-formed XML. SVG is strict XML. AI-generated decks frequently produce content that looks fine in HTML5-tolerant previews but fails strict XML parsing — common causes are HTML named entities (  — ©…) and bare XML reserved characters in text (R&D, error < 5%). Such pages cannot be exported to PPTX, so we surface them here as a hard error before any downstream check looks at them. Returns True when the document is well-formed; False otherwise. """ try: ET.fromstring(content) return True except ET.ParseError as e: result['errors'].append( f"Invalid XML: {e} — SVG must be well-formed XML. " f"Use raw Unicode for typography (—, ©, →, NBSP); " f"escape XML reserved chars as & < > " ' " f"(see references/shared-standards.md §1)." ) return False def _check_viewbox(self, content: str, result: Dict, expected_format: str = None): """Check viewBox attribute""" viewbox_match = re.search(r'viewBox="([^"]+)"', content) if not viewbox_match: result['errors'].append("Missing viewBox attribute") return viewbox = viewbox_match.group(1) result['info']['viewbox'] = viewbox # Check format if not re.match(r'0 0 \d+ \d+', viewbox): result['warnings'].append(f"Unusual viewBox format: {viewbox}") # Check if it matches expected format if expected_format and expected_format in CANVAS_FORMATS: expected_viewbox = CANVAS_FORMATS[expected_format]['viewbox'] if viewbox != expected_viewbox: result['errors'].append( f"viewBox mismatch: expected '{expected_viewbox}', got '{viewbox}'" ) def _check_forbidden_elements(self, content: str, result: Dict): """Check forbidden elements (blocklist)""" content_lower = content.lower() # ============================================================ # Forbidden elements blocklist - PPT incompatible # ============================================================ # Clipping / masking # clipPath is allowed on elements and on pptx_to_svg-generated # nested crop wrappers. Both map back to # DrawingML picture geometry in the native converter. if ']*\bdata-pptx-crop\s*=\s*["\']1["\'])\w+[^>]*\bclip-path\s*=', content, re.IGNORECASE, ) if clip_on_non_image: result['errors'].append( "clip-path is only allowed on elements or " "pptx_to_svg crop wrappers — for shapes, draw the target " "shape directly instead of clipping") # Check that every clip-path reference has a matching def clip_refs = re.findall(r'clip-path\s*=\s*["\']url\(#([^)]+)\)', content) for ref_id in clip_refs: if f'id="{ref_id}"' not in content and f"id='{ref_id}'" not in content: result['errors'].append( f"clip-path references #{ref_id} but no matching " f" definition found") if ' element (PPT does not support SVG masks)") # Style system if ' element (use inline attributes instead)") if re.search(r'\bclass\s*=', content): result['errors'].append("Detected forbidden class attribute (use inline styles instead)") # id attribute: only report error when