2519 lines
113 KiB
Python
2519 lines
113 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
PPT Master - SVG Quality Check Tool
|
||
|
||
Checks whether SVG files comply with project technical specifications.
|
||
|
||
Usage:
|
||
python3 scripts/svg_quality_checker.py <svg_file>
|
||
python3 scripts/svg_quality_checker.py <directory>
|
||
python3 scripts/svg_quality_checker.py --all examples
|
||
"""
|
||
|
||
import sys
|
||
try: # zcbot: Windows GBK 控制台兼容,避免 emoji/© 等触发 UnicodeEncodeError
|
||
sys.stdout.reconfigure(encoding="utf-8", errors="replace")
|
||
sys.stderr.reconfigure(encoding="utf-8", errors="replace")
|
||
except Exception:
|
||
pass
|
||
import re
|
||
import json
|
||
import html
|
||
from pathlib import Path
|
||
from typing import List, Dict, Tuple
|
||
from collections import Counter, defaultdict
|
||
from xml.etree import ElementTree as ET
|
||
|
||
try:
|
||
from project_utils import CANVAS_FORMATS
|
||
from error_helper import ErrorHelper
|
||
except ImportError:
|
||
print("Warning: Unable to import dependency modules")
|
||
CANVAS_FORMATS = {}
|
||
ErrorHelper = None
|
||
|
||
try:
|
||
from update_spec import parse_lock as _parse_spec_lock
|
||
except ImportError:
|
||
_parse_spec_lock = None # spec_lock drift check will be skipped
|
||
|
||
try:
|
||
from svg_to_pptx.animation_config import (
|
||
load_animation_config as _load_animation_config,
|
||
validate_animation_config as _validate_animation_config,
|
||
)
|
||
except ImportError:
|
||
_load_animation_config = None
|
||
_validate_animation_config = None
|
||
|
||
|
||
HEX_VALUE_RE = re.compile(r"#[0-9A-Fa-f]{3,8}")
|
||
SVG_NS = "http://www.w3.org/2000/svg"
|
||
|
||
# Ramp envelope for font-size drift detection.
|
||
# From design_spec_reference.md §IV — Font Size Hierarchy: the ramp spans
|
||
# from page-number floor (0.5x body) to cover-title ceiling (5.0x body).
|
||
# Intermediate px values within this envelope are permitted per
|
||
# executor-base.md §2.1 ("Executor may use an intermediate size ... provided
|
||
# the size's ratio to body falls within the corresponding role's band"); only
|
||
# values outside every band — i.e. outside this envelope — are drift.
|
||
RAMP_MIN_RATIO = 0.5
|
||
RAMP_MAX_RATIO = 5.0
|
||
|
||
# Modes / visual styles that legitimately use unbounded hero / poster type
|
||
# (huge cover numerals, act dividers, single-number reveals). For these the
|
||
# size-drift upper bound is dropped — the oversize is the design, not Executor
|
||
# drift. The lower bound still applies.
|
||
POSTER_SIZE_MODES = {'showcase'}
|
||
POSTER_SIZE_STYLES = {'zine'}
|
||
|
||
|
||
def _design_spec_is_brand(spec_path: Path) -> bool:
|
||
"""Return True when a design_spec.md frontmatter declares ``kind: brand``.
|
||
|
||
Lightweight detector that does not require PyYAML — scans only the
|
||
frontmatter block (``---`` delimited) for a ``kind:`` line whose value
|
||
contains ``brand``. Used by ``check_directory`` to skip SVG validation
|
||
on brand-only template directories.
|
||
"""
|
||
try:
|
||
text = spec_path.read_text(encoding='utf-8')
|
||
except OSError:
|
||
return False
|
||
if not text.startswith('---\n'):
|
||
return False
|
||
end = text.find('\n---\n', 4)
|
||
if end == -1:
|
||
return False
|
||
fm_block = text[4:end]
|
||
for line in fm_block.splitlines():
|
||
stripped = line.strip()
|
||
if stripped.startswith('kind:'):
|
||
value = stripped.split(':', 1)[1].strip().strip('"\'')
|
||
return value == 'brand'
|
||
return False
|
||
|
||
|
||
def _parse_placeholders_fallback(block: str) -> Dict[str, Tuple[str, ...]]:
|
||
"""Tiny YAML-free reader for the documented ``placeholders:`` shape.
|
||
|
||
Used only when PyYAML is unavailable. Recognized lines (indentation-aware,
|
||
two-space indent assumed):
|
||
|
||
.. code-block:: yaml
|
||
|
||
placeholders:
|
||
01_cover: ["{{TITLE}}", "{{LOGO}}"]
|
||
03_content: []
|
||
03a_content_two_col:
|
||
- "{{LEFT_TITLE}}"
|
||
- "{{RIGHT_TITLE}}"
|
||
|
||
Anything outside this minimal grammar is silently skipped — designers who
|
||
rely on advanced YAML should install pyyaml.
|
||
"""
|
||
out: Dict[str, Tuple[str, ...]] = {}
|
||
inline_re = re.compile(
|
||
r"^\s{2}([A-Za-z0-9_]+)\s*:\s*\[(.*)\]\s*$"
|
||
)
|
||
empty_re = re.compile(r"^\s{2}([A-Za-z0-9_]+)\s*:\s*\[\s*\]\s*$")
|
||
block_header_re = re.compile(r"^\s{2}([A-Za-z0-9_]+)\s*:\s*$")
|
||
item_re = re.compile(r'^\s{4}-\s*"?([^"]+)"?\s*$')
|
||
|
||
in_section = False
|
||
current_block_key: str | None = None
|
||
current_items: List[str] = []
|
||
|
||
def _flush_block() -> None:
|
||
nonlocal current_block_key, current_items
|
||
if current_block_key is not None:
|
||
out[current_block_key] = tuple(current_items)
|
||
current_block_key = None
|
||
current_items = []
|
||
|
||
for line in block.splitlines():
|
||
if line.startswith("placeholders:"):
|
||
in_section = True
|
||
continue
|
||
if not in_section:
|
||
continue
|
||
|
||
# End of section: dedent to a non-key line.
|
||
if line and not line.startswith(" "):
|
||
_flush_block()
|
||
in_section = False
|
||
continue
|
||
|
||
if current_block_key is not None:
|
||
m = item_re.match(line)
|
||
if m:
|
||
value = m.group(1).strip().strip('"').strip("'")
|
||
if value:
|
||
current_items.append(value)
|
||
continue
|
||
# Block ended.
|
||
_flush_block()
|
||
|
||
if empty_re.match(line):
|
||
key = empty_re.match(line).group(1)
|
||
out[key] = ()
|
||
continue
|
||
|
||
m = inline_re.match(line)
|
||
if m:
|
||
key, raw = m.group(1), m.group(2)
|
||
items = [p.strip().strip('"').strip("'") for p in raw.split(",")]
|
||
out[key] = tuple(item for item in items if item)
|
||
continue
|
||
|
||
m = block_header_re.match(line)
|
||
if m:
|
||
current_block_key = m.group(1)
|
||
current_items = []
|
||
continue
|
||
|
||
_flush_block()
|
||
return out
|
||
|
||
|
||
class SVGQualityChecker:
|
||
"""SVG quality checker"""
|
||
|
||
# Default placeholder convention per page-type prefix. This is a *hint*,
|
||
# not a hard contract: templates may define their own placeholder vocabulary
|
||
# via `placeholders:` in design_spec.md frontmatter (see
|
||
# references/template-designer.md §4). Missing default placeholders surface
|
||
# as warnings, never errors — designers may legitimately swap
|
||
# `{{THANK_YOU}}` for `{{CLOSING_MESSAGE}}`, omit `{{DATE}}` when irrelevant,
|
||
# or build content variants with bespoke slot vocabularies.
|
||
#
|
||
# Variants reuse the parent type's expectation (`03a_content_two_col.svg`
|
||
# is matched by the same `03_content` rules as `03_content.svg`).
|
||
DEFAULT_PLACEHOLDER_CONVENTION = {
|
||
"01_cover": ("{{TITLE}}",), # only the title is universally expected
|
||
"02_chapter": ("{{CHAPTER_TITLE}}",),
|
||
"02_toc": (), # TOC layouts vary too widely to assert anything
|
||
"03_content": ("{{PAGE_TITLE}}",),
|
||
"04_ending": (), # ending pages legitimately use varied vocabularies
|
||
}
|
||
|
||
def __init__(self, *, template_mode: bool = False):
|
||
self.template_mode = template_mode
|
||
self.results = []
|
||
self.summary = {
|
||
'total': 0,
|
||
'passed': 0,
|
||
'warnings': 0,
|
||
'errors': 0
|
||
}
|
||
self.issue_types = defaultdict(int)
|
||
# spec_lock drift state (populated only when _parse_spec_lock is available
|
||
# and a spec_lock.md is found near the SVG)
|
||
self._lock_cache: Dict[Path, Dict] = {}
|
||
self._drift_summary: Dict[str, Dict[str, set]] = {
|
||
'colors': defaultdict(set),
|
||
'fonts': defaultdict(set),
|
||
'sizes': defaultdict(set),
|
||
}
|
||
self._lock_seen = False # True once we locate at least one spec_lock.md
|
||
self._source_manifest_cache: Dict[Path, Dict] = {}
|
||
# Template-mode aggregation (populated by check_directory when
|
||
# template_mode=True). Each entry is (severity, kind, message) where
|
||
# severity is 'error' or 'warning'. Printed in print_summary.
|
||
self._template_issues: List[Tuple[str, str, str]] = []
|
||
self._animation_issues: List[Tuple[str, str]] = []
|
||
# Icon-usage aggregation (non-template mode). When spec_lock declares an
|
||
# icon library + inventory, the strategist intends the deck to use icons.
|
||
# The native exporter and finalize both expand <use data-icon> from the
|
||
# library, so an authored placeholder reliably becomes a real icon — but
|
||
# only if the executor writes one. A deck that locks an inventory yet
|
||
# authors ZERO placeholders ships flat/icon-less; this is the missing
|
||
# feedback loop that catches the executor silently skipping icons.
|
||
self._icon_inventory_declared = False # any page's spec_lock locked icons
|
||
self._deck_icon_total = 0 # total <use data-icon> across the deck
|
||
self._pages_missing_icons: List[str] = [] # declared-but-icon-less pages
|
||
# Visual-richness aggregation (non-template mode). The most common AI-deck
|
||
# regression is "wall of text boxes": every page is <text> on <rect> with
|
||
# zero diagrams, charts, figures, or imagery. <path>/<polyline>/<polygon>/
|
||
# <image> are the unambiguous "this page draws something" primitives —
|
||
# rect/line are layout/divider scaffolding and don't count. A content-rich
|
||
# deck (>=6 pages, text-heavy) with zero such primitives deck-wide is the
|
||
# flat-deck pathology; catch it so it can't ship silently.
|
||
self._deck_page_count = 0 # non-template SVG pages checked
|
||
self._deck_graphic_total = 0 # path+polyline+polygon+image across deck
|
||
self._deck_text_total = 0 # <text> across deck (density signal)
|
||
self._pages_no_graphic: List[str] = [] # pages with zero graphic primitives
|
||
# Alignment / grid / monotony aggregation (check 14). Cross-page margin
|
||
# drift and repeated layout archetypes are only visible deck-wide, so
|
||
# per-page passes record into these and print_summary aggregates.
|
||
self._grid_locked = False # any spec_lock carried layout_grid
|
||
self._page_left_edges: Dict[str, float] = {} # page -> primary content left edge
|
||
self._page_fingerprints: Dict[str, tuple] = {} # page -> layout archetype fingerprint
|
||
|
||
def check_file(self, svg_file: str, expected_format: str = None) -> Dict:
|
||
"""
|
||
Check a single SVG file
|
||
|
||
Args:
|
||
svg_file: SVG file path
|
||
expected_format: Expected canvas format (e.g., 'ppt169')
|
||
|
||
Returns:
|
||
Check result dictionary
|
||
"""
|
||
svg_path = Path(svg_file)
|
||
|
||
if not svg_path.exists():
|
||
return {
|
||
'file': str(svg_file),
|
||
'exists': False,
|
||
'errors': ['File does not exist'],
|
||
'warnings': [],
|
||
'passed': False
|
||
}
|
||
|
||
result = {
|
||
'file': svg_path.name,
|
||
'path': str(svg_path),
|
||
'exists': True,
|
||
'errors': [],
|
||
'warnings': [],
|
||
'info': {},
|
||
'passed': True
|
||
}
|
||
|
||
try:
|
||
with open(svg_path, 'r', encoding='utf-8') as f:
|
||
content = f.read()
|
||
|
||
# 0. Check XML well-formedness — every other check assumes the file
|
||
# is valid XML. Bail early on failure so the regex-based checks
|
||
# below don't produce misleading errors on a broken document.
|
||
if self._check_xml_well_formed(content, result):
|
||
# 1. Check viewBox
|
||
self._check_viewbox(content, result, expected_format)
|
||
|
||
# 2. Check forbidden elements
|
||
self._check_forbidden_elements(content, result)
|
||
|
||
# 3. Check font-size values
|
||
self._check_font_size_values(content, result)
|
||
|
||
# 4. Check fonts
|
||
self._check_fonts(content, result)
|
||
|
||
# 5. Check width/height consistency with viewBox
|
||
self._check_dimensions(content, result)
|
||
|
||
# 6. Check text wrapping methods
|
||
self._check_text_elements(content, result)
|
||
|
||
# 7. Check image references (file existence and resolution)
|
||
self._check_image_references(content, svg_path, result)
|
||
|
||
# 8. Check object-level animation anchor quality.
|
||
self._check_animation_group_ids(content, result)
|
||
|
||
# 8b. Check <pattern> elements declare a PPTX preset.
|
||
self._check_pattern_fills(content, result)
|
||
|
||
# 9. Check spec_lock drift (colors / font-family / font-size).
|
||
# Templates do not ship a spec_lock.md, so skip in template
|
||
# mode to avoid noise.
|
||
if not self.template_mode:
|
||
self._check_spec_lock_drift(content, svg_path, result)
|
||
|
||
# 10. Check web-sourced image attribution. Templates don't carry
|
||
# image_sources.json; skip in template mode.
|
||
if not self.template_mode:
|
||
self._check_sourced_image_attribution(content, svg_path, result)
|
||
|
||
# 11. Check declared-vs-used icons. Templates don't ship a
|
||
# spec_lock.md; skip in template mode.
|
||
if not self.template_mode:
|
||
self._check_icon_usage(content, svg_path, result)
|
||
|
||
# 12. Check visual richness (flat text-on-rectangles deck).
|
||
if not self.template_mode:
|
||
self._check_graphic_richness(content, result)
|
||
|
||
# 13. Geometry lint: estimated text/icon bounding boxes →
|
||
# text-on-text / icon-on-text overlap + off-canvas elements.
|
||
# Templates carry {{PLACEHOLDER}} text whose rendered width
|
||
# is unrepresentative, so skip in template mode.
|
||
if not self.template_mode:
|
||
self._check_geometry(content, result)
|
||
|
||
# 14. Alignment lint: sibling-card near-miss misalignment,
|
||
# layout_grid lock enforcement, uneven row gaps, plus
|
||
# deck-level margin-drift / layout-monotony aggregation.
|
||
if not self.template_mode:
|
||
self._check_alignment(content, svg_path, result)
|
||
|
||
# Determine pass/fail
|
||
result['passed'] = len(result['errors']) == 0
|
||
|
||
except Exception as e:
|
||
result['errors'].append(f"Failed to read file: {e}")
|
||
result['passed'] = False
|
||
|
||
# Update statistics
|
||
self.summary['total'] += 1
|
||
if result['passed']:
|
||
if result['warnings']:
|
||
self.summary['warnings'] += 1
|
||
else:
|
||
self.summary['passed'] += 1
|
||
else:
|
||
self.summary['errors'] += 1
|
||
|
||
# Categorize issue types
|
||
for error in result['errors']:
|
||
self.issue_types[self._categorize_issue(error)] += 1
|
||
|
||
self.results.append(result)
|
||
return result
|
||
|
||
def _check_xml_well_formed(self, content: str, result: Dict) -> bool:
|
||
"""Check that the SVG content parses as well-formed XML.
|
||
|
||
SVG is strict XML. AI-generated decks frequently produce content that
|
||
looks fine in HTML5-tolerant previews but fails strict XML parsing —
|
||
common causes are HTML named entities ( — ©…) and
|
||
bare XML reserved characters in text (R&D, error < 5%). Such pages
|
||
cannot be exported to PPTX, so we surface them here as a hard error
|
||
before any downstream check looks at them.
|
||
|
||
Returns True when the document is well-formed; False otherwise.
|
||
"""
|
||
try:
|
||
ET.fromstring(content)
|
||
return True
|
||
except ET.ParseError as e:
|
||
result['errors'].append(
|
||
f"Invalid XML: {e} — SVG must be well-formed XML. "
|
||
f"Use raw Unicode for typography (—, ©, →, NBSP); "
|
||
f"escape XML reserved chars as & < > " ' "
|
||
f"(see references/shared-standards.md §1)."
|
||
)
|
||
return False
|
||
|
||
def _check_viewbox(self, content: str, result: Dict, expected_format: str = None):
|
||
"""Check viewBox attribute"""
|
||
viewbox_match = re.search(r'viewBox="([^"]+)"', content)
|
||
|
||
if not viewbox_match:
|
||
result['errors'].append("Missing viewBox attribute")
|
||
return
|
||
|
||
viewbox = viewbox_match.group(1)
|
||
result['info']['viewbox'] = viewbox
|
||
|
||
# Check format
|
||
if not re.match(r'0 0 \d+ \d+', viewbox):
|
||
result['warnings'].append(f"Unusual viewBox format: {viewbox}")
|
||
|
||
# Check if it matches expected format
|
||
if expected_format and expected_format in CANVAS_FORMATS:
|
||
expected_viewbox = CANVAS_FORMATS[expected_format]['viewbox']
|
||
if viewbox != expected_viewbox:
|
||
result['errors'].append(
|
||
f"viewBox mismatch: expected '{expected_viewbox}', got '{viewbox}'"
|
||
)
|
||
|
||
def _check_forbidden_elements(self, content: str, result: Dict):
|
||
"""Check forbidden elements (blocklist)"""
|
||
content_lower = content.lower()
|
||
|
||
# ============================================================
|
||
# Forbidden elements blocklist - PPT incompatible
|
||
# ============================================================
|
||
|
||
# Clipping / masking
|
||
# clipPath is allowed on <image> elements and on pptx_to_svg-generated
|
||
# nested crop <svg data-pptx-crop="1"> wrappers. Both map back to
|
||
# DrawingML picture geometry in the native converter.
|
||
if '<clippath' in content_lower:
|
||
# clip-path on non-image elements → error
|
||
clip_on_non_image = re.search(
|
||
r'<(?!image\b)(?!svg\b[^>]*\bdata-pptx-crop\s*=\s*["\']1["\'])\w+[^>]*\bclip-path\s*=',
|
||
content,
|
||
re.IGNORECASE,
|
||
)
|
||
if clip_on_non_image:
|
||
result['errors'].append(
|
||
"clip-path is only allowed on <image> elements or "
|
||
"pptx_to_svg crop wrappers — for shapes, draw the target "
|
||
"shape directly instead of clipping")
|
||
# Check that every clip-path reference has a matching <clipPath> def
|
||
clip_refs = re.findall(r'clip-path\s*=\s*["\']url\(#([^)]+)\)', content)
|
||
for ref_id in clip_refs:
|
||
if f'id="{ref_id}"' not in content and f"id='{ref_id}'" not in content:
|
||
result['errors'].append(
|
||
f"clip-path references #{ref_id} but no matching "
|
||
f"<clipPath id=\"{ref_id}\"> definition found")
|
||
if '<mask' in content_lower:
|
||
result['errors'].append("Detected forbidden <mask> element (PPT does not support SVG masks)")
|
||
|
||
# Style system
|
||
if '<style' in content_lower:
|
||
result['errors'].append("Detected forbidden <style> element (use inline attributes instead)")
|
||
if re.search(r'\bclass\s*=', content):
|
||
result['errors'].append("Detected forbidden class attribute (use inline styles instead)")
|
||
# id attribute: only report error when <style> also exists (id is harmful only with CSS selectors)
|
||
# id inside <defs> for linearGradient/filter etc. is required, Inkscape also auto-adds id to elements,
|
||
# standalone id attributes have no impact on PPT export
|
||
if '<style' in content_lower and re.search(r'\bid\s*=', content):
|
||
result['errors'].append(
|
||
"Detected id attribute used with <style> (CSS selectors forbidden, use inline styles instead)"
|
||
)
|
||
if re.search(r'<\?xml-stylesheet\b', content_lower):
|
||
result['errors'].append("Detected forbidden xml-stylesheet (external CSS references forbidden)")
|
||
if re.search(r'<link[^>]*rel\s*=\s*["\']stylesheet["\']', content_lower):
|
||
result['errors'].append("Detected forbidden <link rel=\"stylesheet\"> (external CSS references forbidden)")
|
||
if re.search(r'@import\s+', content_lower):
|
||
result['errors'].append("Detected forbidden @import (external CSS references forbidden)")
|
||
|
||
# Structure / nesting
|
||
if '<foreignobject' in content_lower:
|
||
result['errors'].append(
|
||
"Detected forbidden <foreignObject> element (use <tspan> for manual line breaks)")
|
||
has_symbol = '<symbol' in content_lower
|
||
has_use = re.search(r'<use\b', content_lower) is not None
|
||
if has_symbol and has_use:
|
||
result['errors'].append("Detected forbidden <symbol> + <use> complex usage (use basic shapes or simple <use> instead)")
|
||
# marker-start / marker-end are conditionally allowed (see shared-standards.md §1.1).
|
||
# The converter maps qualifying <marker> defs to native DrawingML <a:headEnd>/<a:tailEnd>.
|
||
# We only warn when a marker is used without an obvious <defs> definition in the same file.
|
||
if re.search(r'\bmarker-(?:start|end)\s*=\s*["\']url\(#([^)]+)\)', content_lower):
|
||
if '<marker' not in content_lower:
|
||
result['errors'].append(
|
||
"Detected marker-start/marker-end referencing a marker id, "
|
||
"but no <marker> element found in the file")
|
||
|
||
# Text / fonts
|
||
if '<textpath' in content_lower:
|
||
result['errors'].append("Detected forbidden <textPath> element (path text is incompatible with PPT)")
|
||
if '@font-face' in content_lower:
|
||
result['errors'].append("Detected forbidden @font-face (use system font stack)")
|
||
|
||
# Animation / interaction
|
||
if re.search(r'<animate', content_lower):
|
||
result['errors'].append("Detected forbidden SMIL animation element <animate*> (SVG animations are not exported)")
|
||
if re.search(r'<set\b', content_lower):
|
||
result['errors'].append("Detected forbidden SMIL animation element <set> (SVG animations are not exported)")
|
||
if '<script' in content_lower:
|
||
result['errors'].append("Detected forbidden <script> element (scripts and event handlers forbidden)")
|
||
if re.search(r'\bon\w+\s*=', content): # onclick, onload etc.
|
||
result['errors'].append("Detected forbidden event attributes (e.g., onclick, onload)")
|
||
|
||
# Other discouraged elements
|
||
if '<iframe' in content_lower:
|
||
result['errors'].append("Detected <iframe> element (should not appear in SVG)")
|
||
if re.search(r'rgba\s*\(', content_lower):
|
||
result['errors'].append("Detected forbidden rgba() color (use fill-opacity/stroke-opacity instead)")
|
||
if re.search(r'<g[^>]*\sopacity\s*=', content_lower):
|
||
result['errors'].append("Detected forbidden <g opacity> (set opacity on each child element individually)")
|
||
if re.search(r'<image[^>]*\sopacity\s*=', content_lower):
|
||
result['errors'].append("Detected forbidden <image opacity> (use overlay mask approach)")
|
||
|
||
def _check_font_size_values(self, content: str, result: Dict):
|
||
"""Require font-size values to be unitless numeric SVG px values."""
|
||
numeric_re = re.compile(r'^(?:\d+(?:\.\d+)?|\.\d+)$')
|
||
bad_values = set()
|
||
|
||
for match in re.finditer(r'\bfont-size\s*=\s*(["\'])(.*?)\1', content, re.IGNORECASE):
|
||
raw = match.group(2).strip()
|
||
if not numeric_re.fullmatch(raw):
|
||
bad_values.add(raw)
|
||
|
||
for match in re.finditer(r'\bfont-size\s*:\s*([^;"\']+)', content, re.IGNORECASE):
|
||
raw = match.group(1).strip()
|
||
if not numeric_re.fullmatch(raw):
|
||
bad_values.add(raw)
|
||
|
||
if bad_values:
|
||
shown_values = sorted(bad_values)
|
||
shown = ', '.join(shown_values[:5])
|
||
more = len(shown_values) - 5
|
||
suffix = f" (+{more} more)" if more > 0 else ""
|
||
result['errors'].append(
|
||
f"font-size must be a unitless numeric px value; found {shown}{suffix}. "
|
||
"Write e.g. font-size=\"28\", never font-size=\"28px\" or \"21pt\"."
|
||
)
|
||
|
||
def _check_fonts(self, content: str, result: Dict):
|
||
"""Check font usage.
|
||
|
||
PPTX stores a single `typeface` per run with no runtime fallback, so every
|
||
stack must END with a cross-platform pre-installed family. See
|
||
strategist.md §g "PPT-safe font discipline".
|
||
"""
|
||
font_matches = re.findall(
|
||
r'font-family[:\s]*["\']([^"\']+)["\']', content, re.IGNORECASE)
|
||
|
||
if not font_matches:
|
||
return
|
||
|
||
result['info']['fonts'] = list(set(font_matches))
|
||
|
||
# Pre-installed on Windows + macOS out of the box (plus their direct
|
||
# FONT_FALLBACK_WIN mappings). A stack whose last concrete family is in
|
||
# this set survives the PPTX round-trip on any viewer machine.
|
||
ppt_safe_tail = {
|
||
'microsoft yahei', 'simhei', 'simsun', 'kaiti', 'fangsong',
|
||
'dengxian', 'microsoft jhenghei',
|
||
'pingfang sc', 'heiti sc', 'songti sc', 'stsong',
|
||
'arial', 'arial black', 'calibri', 'segoe ui', 'verdana',
|
||
'helvetica', 'helvetica neue', 'tahoma', 'trebuchet ms',
|
||
'times new roman', 'times', 'georgia', 'cambria', 'palatino',
|
||
'consolas', 'courier new', 'menlo', 'monaco',
|
||
'impact',
|
||
}
|
||
|
||
for font_family in font_matches:
|
||
# Drop the generic CSS fallback (sans-serif / serif / monospace)
|
||
# and inspect the last concrete family.
|
||
parts = [p.strip().strip('"').strip("'").lower()
|
||
for p in font_family.split(',')]
|
||
parts = [p for p in parts
|
||
if p and p not in ('sans-serif', 'serif', 'monospace',
|
||
'cursive', 'fantasy', 'system-ui')]
|
||
if not parts:
|
||
continue
|
||
tail = parts[-1]
|
||
if tail not in ppt_safe_tail:
|
||
result['warnings'].append(
|
||
f"Font stack does not end on a PPT-safe family "
|
||
f"(expected e.g. Microsoft YaHei / SimSun / Arial / "
|
||
f"Times New Roman / Consolas): {font_family}"
|
||
)
|
||
break
|
||
|
||
def _check_dimensions(self, content: str, result: Dict):
|
||
"""Check width/height consistency with viewBox"""
|
||
width_match = re.search(r'width="(\d+)"', content)
|
||
height_match = re.search(r'height="(\d+)"', content)
|
||
|
||
if width_match and height_match:
|
||
width = width_match.group(1)
|
||
height = height_match.group(1)
|
||
result['info']['dimensions'] = f"{width}x{height}"
|
||
|
||
# Check consistency with viewBox
|
||
if 'viewbox' in result['info']:
|
||
viewbox_parts = result['info']['viewbox'].split()
|
||
if len(viewbox_parts) == 4:
|
||
vb_width, vb_height = viewbox_parts[2], viewbox_parts[3]
|
||
if width != vb_width or height != vb_height:
|
||
result['warnings'].append(
|
||
f"width/height ({width}x{height}) does not match viewBox "
|
||
f"({vb_width}x{vb_height})"
|
||
)
|
||
|
||
def _check_text_elements(self, content: str, result: Dict):
|
||
"""Check text elements and wrapping methods"""
|
||
# Count text and tspan elements
|
||
text_count = content.count('<text')
|
||
tspan_count = content.count('<tspan')
|
||
|
||
result['info']['text_elements'] = text_count
|
||
result['info']['tspan_elements'] = tspan_count
|
||
|
||
# Check for overly long single-line text (may need wrapping)
|
||
text_matches = re.findall(r'<text[^>]*>([^<]{100,})</text>', content)
|
||
if text_matches:
|
||
result['warnings'].append(
|
||
f"Detected {len(text_matches)} potentially overly long single-line text(s) (consider using tspan for wrapping)"
|
||
)
|
||
|
||
def _check_image_references(self, content: str, svg_path: Path, result: Dict):
|
||
"""Check image file existence and resolution vs display size."""
|
||
# Find all <image ...> elements (capture the full tag)
|
||
img_tag_pattern = re.compile(r'<image\b([^>]*)/?>', re.IGNORECASE)
|
||
|
||
svg_dir = svg_path.parent
|
||
checked = set()
|
||
|
||
for tag_match in img_tag_pattern.finditer(content):
|
||
attrs = tag_match.group(1)
|
||
|
||
# Extract href (prefer href over xlink:href)
|
||
href_match = (
|
||
re.search(r'\bhref="(?!data:)([^"]+)"', attrs) or
|
||
re.search(r'\bxlink:href="(?!data:)([^"]+)"', attrs)
|
||
)
|
||
if not href_match:
|
||
continue
|
||
|
||
href = href_match.group(1)
|
||
if href in checked:
|
||
continue
|
||
checked.add(href)
|
||
|
||
# Resolve path relative to SVG file directory
|
||
img_path = (svg_dir / href).resolve()
|
||
|
||
if not img_path.exists():
|
||
result['errors'].append(
|
||
f"Image file not found: {href} (resolved to {img_path})")
|
||
continue
|
||
|
||
# Check resolution vs display size
|
||
w_match = re.search(r'\bwidth="([^"]+)"', attrs)
|
||
h_match = re.search(r'\bheight="([^"]+)"', attrs)
|
||
display_w_str = w_match.group(1) if w_match else None
|
||
display_h_str = h_match.group(1) if h_match else None
|
||
if not display_w_str or not display_h_str:
|
||
continue
|
||
|
||
try:
|
||
display_w = float(display_w_str)
|
||
display_h = float(display_h_str)
|
||
except (ValueError, TypeError):
|
||
continue
|
||
|
||
try:
|
||
from PIL import Image as PILImage
|
||
with PILImage.open(img_path) as img:
|
||
actual_w, actual_h = img.size
|
||
|
||
if actual_w < display_w or actual_h < display_h:
|
||
result['warnings'].append(
|
||
f"Image {href} is {actual_w}x{actual_h} but displayed at "
|
||
f"{int(display_w)}x{int(display_h)} — may appear blurry")
|
||
elif actual_w > display_w * 4 and actual_h > display_h * 4:
|
||
result['warnings'].append(
|
||
f"Image {href} is {actual_w}x{actual_h} but displayed at "
|
||
f"{int(display_w)}x{int(display_h)} — consider downsizing "
|
||
f"to reduce file size")
|
||
except ImportError:
|
||
pass # PIL not available, skip resolution check
|
||
except Exception:
|
||
pass # Image unreadable, skip resolution check
|
||
|
||
def _check_animation_group_ids(self, content: str, result: Dict):
|
||
"""Warn when visible top-level groups cannot be customized."""
|
||
try:
|
||
root = ET.fromstring(content)
|
||
except ET.ParseError:
|
||
return
|
||
|
||
non_visual = {'defs', 'title', 'desc', 'metadata', 'style'}
|
||
for index, child in enumerate(list(root), start=1):
|
||
tag = child.tag.split('}', 1)[-1]
|
||
if tag in non_visual:
|
||
continue
|
||
if tag == 'g' and not child.get('id'):
|
||
result['warnings'].append(
|
||
f"Top-level visible <g> #{index} has no id; "
|
||
"object-level animation config cannot reference it"
|
||
)
|
||
|
||
# OOXML ST_PresetPatternVal enum — anything outside this set produces a
|
||
# PPTX schema violation ("PowerPoint found a problem with the content").
|
||
_OOXML_PATTERN_PRESETS = frozenset({
|
||
'pct5', 'pct10', 'pct20', 'pct25', 'pct30', 'pct40', 'pct50', 'pct60',
|
||
'pct70', 'pct75', 'pct80', 'pct90',
|
||
'horz', 'vert', 'ltHorz', 'ltVert', 'dkHorz', 'dkVert',
|
||
'narHorz', 'narVert', 'dashHorz', 'dashVert',
|
||
'cross', 'dnDiag', 'upDiag', 'ltDnDiag', 'ltUpDiag', 'dkDnDiag',
|
||
'dkUpDiag', 'wdDnDiag', 'wdUpDiag',
|
||
'dashDnDiag', 'dashUpDiag', 'diagCross',
|
||
'smCheck', 'lgCheck', 'smGrid', 'lgGrid', 'dotGrid', 'smConfetti',
|
||
'lgConfetti', 'horzBrick', 'diagBrick', 'solidDmnd', 'openDmnd',
|
||
'dotDmnd', 'plaid', 'sphere', 'weave', 'wave', 'trellis', 'zigZag',
|
||
'divot', 'shingle',
|
||
})
|
||
|
||
def _check_pattern_fills(self, content: str, result: Dict):
|
||
"""Audit <pattern> defs that drive PPTX <a:pattFill> output.
|
||
|
||
svg_to_pptx maps <pattern fill> to native <a:pattFill prst="...">. The
|
||
preset name comes from `data-pptx-pattern` (e.g. `lgGrid` / `smGrid` /
|
||
`dkUpDiag`). Two failure modes worth catching pre-export:
|
||
|
||
1. Missing annotation → converter silently falls back to `ltUpDiag`
|
||
(diagonal stripes) and picks `bg = #FFFFFF` when the pattern has
|
||
no child <rect>, turning a hand-authored grid into white-on-stripes
|
||
in PPTX.
|
||
2. Invalid preset name → PPTX schema rejects the file; PowerPoint
|
||
opens it with "needs to be repaired". OOXML
|
||
`ST_PresetPatternVal` is a closed enum — only the names in
|
||
`_OOXML_PATTERN_PRESETS` are legal. Inventing `ltGrid` (no such
|
||
value) is the canonical mistake; the only grids are `smGrid` /
|
||
`lgGrid` / `dotGrid`.
|
||
"""
|
||
try:
|
||
root = ET.fromstring(content)
|
||
except ET.ParseError:
|
||
return
|
||
|
||
for pattern in root.iter(f'{{{SVG_NS}}}pattern'):
|
||
pat_id = pattern.get('id', '<unnamed>')
|
||
prst = pattern.get('data-pptx-pattern')
|
||
if not prst:
|
||
result['warnings'].append(
|
||
f"<pattern id=\"{pat_id}\"> has no data-pptx-pattern attribute — "
|
||
"PPTX export will fall back to `ltUpDiag` (diagonal stripes), "
|
||
"not your custom geometry. Add data-pptx-pattern=\"lgGrid\" / "
|
||
"\"smGrid\" / etc. plus a <rect fill=\"<bg>\"/> child so the "
|
||
"preset and bg color match your design."
|
||
)
|
||
continue
|
||
if prst not in self._OOXML_PATTERN_PRESETS:
|
||
result['errors'].append(
|
||
f"<pattern id=\"{pat_id}\"> uses data-pptx-pattern=\"{prst}\" "
|
||
"which is not in OOXML ST_PresetPatternVal — exported PPTX "
|
||
"will fail schema validation ('needs to be repaired'). "
|
||
"Use one of: smGrid / lgGrid / dotGrid (grids), "
|
||
"ltUpDiag / dkUpDiag / cross / diagCross / weave / plaid / "
|
||
"horzBrick (others); full enum in svg_quality_checker.py "
|
||
"_OOXML_PATTERN_PRESETS."
|
||
)
|
||
|
||
def _get_spec_lock(self, svg_path: Path):
|
||
"""Locate and parse spec_lock.md near the SVG. Returns dict or None.
|
||
|
||
Looks in svg_path.parent and svg_path.parent.parent (covers the two
|
||
common layouts: SVG directly under <project>/ or under
|
||
<project>/svg_output/). Results are cached per lock path.
|
||
"""
|
||
if _parse_spec_lock is None:
|
||
return None
|
||
for candidate in (svg_path.parent / 'spec_lock.md',
|
||
svg_path.parent.parent / 'spec_lock.md'):
|
||
if candidate in self._lock_cache:
|
||
return self._lock_cache[candidate]
|
||
if candidate.exists():
|
||
try:
|
||
data = _parse_spec_lock(candidate)
|
||
except Exception:
|
||
data = None
|
||
self._lock_cache[candidate] = data
|
||
if data is not None:
|
||
self._lock_seen = True
|
||
return data
|
||
return None
|
||
|
||
def _check_spec_lock_drift(self, content: str, svg_path: Path, result: Dict):
|
||
"""Detect values used in the SVG that fall outside spec_lock.md.
|
||
|
||
Covers colors (fill / stroke / stop-color), font-family, and font-size.
|
||
Emits per-file warnings summarising the drift counts; exact drifting
|
||
values are accumulated in self._drift_summary for the end-of-run
|
||
aggregation. When spec_lock.md is missing, silently skip (consistent
|
||
with executor-base.md §2.1's 'missing lock → warn and proceed' policy).
|
||
"""
|
||
lock = self._get_spec_lock(svg_path)
|
||
if lock is None:
|
||
return
|
||
|
||
# Build allow-sets from the lock
|
||
allowed_colors = set()
|
||
for v in lock.get('colors', {}).values():
|
||
if HEX_VALUE_RE.fullmatch(v):
|
||
allowed_colors.add(v.upper())
|
||
|
||
typo = lock.get('typography', {})
|
||
numeric_size_re = re.compile(r'^(?:\d+(?:\.\d+)?|\.\d+)$')
|
||
invalid_lock_sizes = []
|
||
for k, v in typo.items():
|
||
if k == 'font_family' or k.endswith('_family'):
|
||
continue
|
||
if not numeric_size_re.fullmatch(v.strip()):
|
||
invalid_lock_sizes.append(f"{k}: {v}")
|
||
if invalid_lock_sizes:
|
||
shown = ', '.join(invalid_lock_sizes[:5])
|
||
more = len(invalid_lock_sizes) - 5
|
||
suffix = f" (+{more} more)" if more > 0 else ""
|
||
result['errors'].append(
|
||
f"spec_lock typography sizes must be unitless numeric px values; "
|
||
f"found {shown}{suffix}."
|
||
)
|
||
|
||
# Font families: default `font_family` plus any per-role `*_family`
|
||
# override (title_family / body_family / emphasis_family / code_family,
|
||
# per spec_lock_reference.md). Any of these is a legitimate declared
|
||
# value; an SVG that uses any one of them is not drifting.
|
||
allowed_fonts = set()
|
||
if typo:
|
||
default_font = typo.get('font_family', '').strip()
|
||
if default_font:
|
||
allowed_fonts.add(self._normalize_font_stack(default_font))
|
||
for k, v in typo.items():
|
||
if k == 'font_family' or not k.endswith('_family'):
|
||
continue
|
||
v_clean = v.strip()
|
||
# Skip placeholder text like "same as body (omit if identical)"
|
||
if not v_clean or v_clean.lower().startswith('same as'):
|
||
continue
|
||
allowed_fonts.add(self._normalize_font_stack(v_clean))
|
||
|
||
# Sizes: declared slots are anchors; body is the ramp baseline.
|
||
allowed_sizes = set()
|
||
body_px = None
|
||
for k, v in typo.items():
|
||
if k == 'font_family' or k.endswith('_family'):
|
||
continue
|
||
allowed_sizes.add(self._normalize_size(v))
|
||
if k == 'body':
|
||
try:
|
||
body_px = float(self._normalize_size(v))
|
||
except (ValueError, TypeError):
|
||
body_px = None
|
||
|
||
# Scan SVG for used values
|
||
color_drifts = set()
|
||
for attr in ('fill', 'stroke', 'stop-color'):
|
||
pattern = re.compile(rf'\b{attr}\s*=\s*["\'](#[0-9A-Fa-f]{{3,8}})["\']')
|
||
for m in pattern.finditer(content):
|
||
val = m.group(1).upper()
|
||
if val not in allowed_colors:
|
||
color_drifts.add(val)
|
||
|
||
font_drifts = set()
|
||
# Capture to the matching delimiter (group 1) so a double-quoted stack
|
||
# containing single-quoted family names is not truncated at the inner quote.
|
||
for m in re.finditer(r'font-family\s*=\s*(["\'])(.*?)\1', content):
|
||
val = m.group(2).strip()
|
||
if allowed_fonts and self._normalize_font_stack(val) not in allowed_fonts:
|
||
font_drifts.add(val)
|
||
|
||
# Poster / showcase contexts use unbounded hero type — drop the ceiling.
|
||
mode = (lock.get('mode', {}).get('mode') or '').strip().lower()
|
||
vstyle = (lock.get('visual_style', {}).get('visual_style') or '').strip().lower()
|
||
max_ratio = (float('inf') if mode in POSTER_SIZE_MODES or vstyle in POSTER_SIZE_STYLES
|
||
else RAMP_MAX_RATIO)
|
||
|
||
size_drifts = set()
|
||
used_sizes = []
|
||
for m in re.finditer(r'font-size\s*=\s*["\']([^"\']+)["\']', content):
|
||
val = self._normalize_size(m.group(1))
|
||
used_sizes.append(val)
|
||
if not allowed_sizes or val in allowed_sizes:
|
||
continue
|
||
# Intermediate values are allowed when they sit inside the ramp
|
||
# envelope (ratio to body within [RAMP_MIN_RATIO, max_ratio]).
|
||
if body_px and body_px > 0:
|
||
try:
|
||
ratio = float(val) / body_px
|
||
if RAMP_MIN_RATIO <= ratio <= max_ratio:
|
||
continue
|
||
except ValueError:
|
||
pass
|
||
size_drifts.add(val)
|
||
|
||
template_size_drift = self._detect_template_size_drift(
|
||
used_sizes, allowed_sizes, body_px
|
||
)
|
||
|
||
# Record in run-wide aggregation
|
||
fname = svg_path.name
|
||
for v in color_drifts:
|
||
self._drift_summary['colors'][v].add(fname)
|
||
for v in font_drifts:
|
||
self._drift_summary['fonts'][v].add(fname)
|
||
for v in size_drifts:
|
||
self._drift_summary['sizes'][v].add(fname)
|
||
|
||
# Per-file warning (one condensed line; details live in summary)
|
||
parts = []
|
||
if color_drifts:
|
||
parts.append(f"{len(color_drifts)} color(s)")
|
||
if font_drifts:
|
||
parts.append(f"{len(font_drifts)} font-family value(s)")
|
||
if size_drifts:
|
||
parts.append(f"{len(size_drifts)} font-size value(s)")
|
||
if parts:
|
||
result['warnings'].append(
|
||
f"spec_lock drift: {', '.join(parts)} not in spec_lock.md "
|
||
"(see drift summary for details)"
|
||
)
|
||
if template_size_drift:
|
||
result['warnings'].append(template_size_drift)
|
||
|
||
def _detect_template_size_drift(self, used_sizes, allowed_sizes, body_px):
|
||
"""Warn when template-like small sizes bypass the locked type ramp.
|
||
|
||
The normal drift check deliberately permits in-ramp feature sizes, so
|
||
it should not hard-fail valid hero numbers or one-off labels. This
|
||
warning targets the common executor failure mode: copying a template's
|
||
compact 12/15/16px text stack instead of mapping content roles to
|
||
spec_lock typography, then reflowing from those locked px values.
|
||
"""
|
||
if not allowed_sizes or not body_px or body_px <= 0:
|
||
return None
|
||
|
||
try:
|
||
declared_min = min(float(v) for v in allowed_sizes)
|
||
except ValueError:
|
||
declared_min = None
|
||
|
||
# Stay narrow on purpose: real decks carry legitimate undeclared
|
||
# sub-body sizes (intermediate levels, labels, emphasis) just below the
|
||
# locked body, so "any size < body" floods the warning and destroys its
|
||
# credibility. Only flag values that read as genuine template leftovers
|
||
# — at or below `body * 0.75`, or below the smallest declared slot. This
|
||
# under-warns (a stray 15/16 against a body of 18 can slip through) in
|
||
# exchange for not crying wolf on valid intermediate type.
|
||
template_like_limit = body_px * 0.75
|
||
template_like_sub_body = []
|
||
for raw in used_sizes:
|
||
if raw in allowed_sizes:
|
||
continue
|
||
try:
|
||
size = float(raw)
|
||
except (TypeError, ValueError):
|
||
continue
|
||
below_declared_floor = declared_min is not None and size < declared_min
|
||
if size <= template_like_limit or below_declared_floor:
|
||
template_like_sub_body.append(raw)
|
||
|
||
if not template_like_sub_body:
|
||
return None
|
||
|
||
counts = Counter(template_like_sub_body)
|
||
distinct = sorted(counts, key=lambda v: float(v))
|
||
repeated_total = sum(counts.values())
|
||
|
||
below_declared_floor = []
|
||
if declared_min is not None:
|
||
below_declared_floor = [v for v in distinct if float(v) < declared_min]
|
||
|
||
if len(distinct) < 2 and repeated_total < 4 and not below_declared_floor:
|
||
return None
|
||
|
||
sample = ', '.join(
|
||
f"{v}x{counts[v]}" if counts[v] > 1 else v
|
||
for v in distinct[:5]
|
||
)
|
||
more = len(distinct) - 5
|
||
suffix = f" (+{more} more)" if more > 0 else ""
|
||
return (
|
||
"possible template font-size drift: undeclared sub-body size(s) "
|
||
f"{sample}{suffix}. Map each text item to a spec_lock typography "
|
||
"role first, then reflow card height / y / dy / line-height from "
|
||
"the locked px values."
|
||
)
|
||
|
||
def _find_image_sources_manifest(self, svg_path: Path) -> Path | None:
|
||
"""Locate image_sources.json for a project SVG.
|
||
|
||
Quality checks run primarily on <project>/svg_output/*.svg, but this
|
||
also supports SVGs checked from project root or svg_final.
|
||
"""
|
||
bases = (svg_path.parent, svg_path.parent.parent, svg_path.parent.parent.parent)
|
||
for base in bases:
|
||
candidate = base / 'images' / 'image_sources.json'
|
||
if candidate.exists():
|
||
return candidate
|
||
return None
|
||
|
||
def _load_image_sources_manifest(self, svg_path: Path) -> Dict:
|
||
manifest_path = self._find_image_sources_manifest(svg_path)
|
||
if manifest_path is None:
|
||
return {}
|
||
if manifest_path in self._source_manifest_cache:
|
||
return self._source_manifest_cache[manifest_path]
|
||
try:
|
||
payload = json.loads(manifest_path.read_text(encoding='utf-8'))
|
||
except (OSError, json.JSONDecodeError):
|
||
payload = {}
|
||
self._source_manifest_cache[manifest_path] = payload
|
||
return payload
|
||
|
||
def _check_sourced_image_attribution(self, content: str, svg_path: Path, result: Dict):
|
||
"""Require visible credit text for attribution-required web images.
|
||
|
||
image_search.py records the legal tier in images/image_sources.json;
|
||
Executor must render compact credit text into the SVG. This check
|
||
prevents a quality-first CC BY / CC BY-SA image from silently reaching
|
||
export without attribution.
|
||
"""
|
||
manifest = self._load_image_sources_manifest(svg_path)
|
||
items = manifest.get('items') or []
|
||
if not items:
|
||
return
|
||
|
||
text_content = html.unescape(re.sub(r'<[^>]+>', ' ', content))
|
||
text_content = re.sub(r'\s+', ' ', text_content)
|
||
svg_stem = svg_path.stem
|
||
|
||
for item in items:
|
||
if not item.get('attribution_required') and item.get('license_tier') != 'attribution-required':
|
||
continue
|
||
|
||
filename = Path(str(item.get('filename') or '')).name
|
||
slide = str(item.get('slide') or '').strip()
|
||
referenced = bool(filename and filename in content)
|
||
same_slide = bool(slide and slide == svg_stem)
|
||
if not referenced and not same_slide:
|
||
continue
|
||
|
||
license_name = str(item.get('license_name') or '').upper()
|
||
license_token = 'CC BY-SA' if 'BY-SA' in license_name else 'CC BY'
|
||
has_credit = license_token in text_content.upper()
|
||
if not has_credit:
|
||
result['errors'].append(
|
||
f"Missing inline attribution for sourced image {filename or '(unknown)'} "
|
||
f"({license_token}). Add compact credit text per "
|
||
f"references/image-searcher.md §7."
|
||
)
|
||
|
||
@staticmethod
|
||
def _normalize_size(value: str) -> str:
|
||
"""Normalize a font-size value for drift comparison.
|
||
|
||
Unit-bearing SVG values are reported as errors before drift checking.
|
||
The legacy `px` strip remains to avoid a duplicate drift warning after
|
||
the hard error has already identified the unit problem.
|
||
"""
|
||
v = value.strip().lower()
|
||
if v.endswith('px'):
|
||
v = v[:-2].strip()
|
||
return v
|
||
|
||
@staticmethod
|
||
def _normalize_font_stack(stack: str) -> str:
|
||
"""Normalize a font-family stack for comparison: split on commas, strip
|
||
quotes / whitespace, lowercase, rejoin. Collapses cosmetic differences
|
||
(comma spacing, single vs double quotes, case) so that
|
||
`Consolas,'Courier New',monospace` matches `Consolas, "Courier New", monospace`."""
|
||
parts = [p.strip().strip('"\'').lower() for p in stack.split(',')]
|
||
return ','.join(p for p in parts if p)
|
||
|
||
def _categorize_issue(self, error_msg: str) -> str:
|
||
"""Categorize issue type"""
|
||
if 'Invalid XML' in error_msg:
|
||
return 'XML well-formedness'
|
||
elif 'viewBox' in error_msg:
|
||
return 'viewBox issues'
|
||
elif 'foreignObject' in error_msg:
|
||
return 'foreignObject'
|
||
elif error_msg.startswith('Alignment:'):
|
||
return 'Alignment/grid'
|
||
elif error_msg.startswith('Geometry:'):
|
||
return 'Geometry'
|
||
elif 'font' in error_msg.lower():
|
||
return 'Font issues'
|
||
else:
|
||
return 'Other'
|
||
|
||
def check_directory(self, directory: str, expected_format: str = None) -> List[Dict]:
|
||
"""
|
||
Check all SVG files in a directory
|
||
|
||
Args:
|
||
directory: Directory path
|
||
expected_format: Expected canvas format
|
||
|
||
Returns:
|
||
List of check results
|
||
"""
|
||
dir_path = Path(directory)
|
||
|
||
if not dir_path.exists():
|
||
print(f"[ERROR] Directory does not exist: {directory}")
|
||
return []
|
||
|
||
# Brand-only template directories (templates/brands/<id>/) have no SVG
|
||
# roster — design_spec.md frontmatter declares `kind: brand`. Skip SVG
|
||
# checks entirely; brand validation lives in register_template.py.
|
||
if self.template_mode and dir_path.is_dir():
|
||
spec = dir_path / 'design_spec.md'
|
||
if spec.exists() and _design_spec_is_brand(spec):
|
||
print(
|
||
f"[INFO] Brand directory detected (kind: brand) — "
|
||
f"SVG checks skipped."
|
||
)
|
||
print(
|
||
f"[INFO] Validate brand specs via: "
|
||
f"python3 scripts/register_template.py "
|
||
f"--kind brand <brand_id> --dry-run"
|
||
)
|
||
return self.results
|
||
|
||
# Find all SVG files
|
||
if dir_path.is_file():
|
||
svg_files = [dir_path]
|
||
else:
|
||
if self.template_mode:
|
||
# Template directories live at templates/{layouts,decks}/<id>/.
|
||
svg_files = sorted(dir_path.glob('*.svg'))
|
||
else:
|
||
svg_output = dir_path / \
|
||
'svg_output' if (
|
||
dir_path / 'svg_output').exists() else dir_path
|
||
svg_files = sorted(svg_output.glob('*.svg'))
|
||
|
||
if not svg_files:
|
||
print(f"[WARN] No SVG files found")
|
||
return []
|
||
|
||
print(f"\n[SCAN] Checking {len(svg_files)} SVG file(s)...\n")
|
||
|
||
for svg_file in svg_files:
|
||
result = self.check_file(str(svg_file), expected_format)
|
||
self._print_result(result)
|
||
|
||
if self.template_mode and dir_path.is_dir():
|
||
self._check_template_contract(dir_path, svg_files)
|
||
elif dir_path.is_dir():
|
||
self._check_animation_config_contract(dir_path)
|
||
|
||
return self.results
|
||
|
||
def _check_animation_config_contract(self, dir_path: Path) -> None:
|
||
"""Project-level animations.json reference checks."""
|
||
if _load_animation_config is None or _validate_animation_config is None:
|
||
return
|
||
project_path = dir_path if (dir_path / 'svg_output').exists() else dir_path.parent
|
||
try:
|
||
config = _load_animation_config(project_path)
|
||
except Exception as exc:
|
||
self._animation_issues.append(('error', f"animations.json is invalid: {exc}"))
|
||
return
|
||
if not config:
|
||
return
|
||
for warning in _validate_animation_config(project_path, config):
|
||
self._animation_issues.append(('warning', warning))
|
||
|
||
def _check_template_contract(self, dir_path: Path,
|
||
svg_files: List[Path]) -> None:
|
||
"""Template-mode-only checks: roster ↔ design_spec consistency and
|
||
per-page placeholder hints.
|
||
|
||
- **Roster mismatch (orphan / missing)** is reported as an *error*: a
|
||
stale roster will produce a wrong ``layouts_index.json`` entry.
|
||
- **Placeholder gaps** are reported as *warnings*. Templates may
|
||
legitimately omit conventional placeholders or swap them out (e.g.
|
||
``{{CLOSING_MESSAGE}}`` instead of ``{{THANK_YOU}}``), and a content
|
||
variant may use a bespoke slot vocabulary. Designers can declare
|
||
their own per-stem expectations via ``placeholders:`` frontmatter
|
||
in ``design_spec.md`` to suppress these warnings explicitly.
|
||
|
||
Issues are aggregated and printed in :py:meth:`print_summary` so the
|
||
per-file report stays focused on intrinsic SVG validity.
|
||
"""
|
||
spec_path = dir_path / 'design_spec.md'
|
||
spec_text = spec_path.read_text(encoding='utf-8') if spec_path.exists() else ""
|
||
spec_pages = self._extract_spec_roster(spec_text) if spec_text else []
|
||
custom_contract = self._extract_frontmatter_placeholders(spec_text) if spec_text else {}
|
||
|
||
on_disk = {p.stem for p in svg_files}
|
||
|
||
if spec_pages:
|
||
spec_set = set(spec_pages)
|
||
orphan = sorted(on_disk - spec_set)
|
||
missing = sorted(spec_set - on_disk)
|
||
for page in orphan:
|
||
self._template_issues.append((
|
||
'error',
|
||
'roster_orphan',
|
||
f"{page}.svg exists on disk but is not listed in design_spec.md Page Roster",
|
||
))
|
||
for page in missing:
|
||
self._template_issues.append((
|
||
'error',
|
||
'roster_missing',
|
||
f"design_spec.md Page Roster lists {page} but {page}.svg is missing on disk",
|
||
))
|
||
elif spec_path.exists():
|
||
# design_spec.md is present but the roster parser found nothing —
|
||
# surface as a warning. Legacy specs may lack an explicit roster.
|
||
self._template_issues.append((
|
||
'warning',
|
||
'roster_unknown',
|
||
f"could not extract page roster from {spec_path.name}; "
|
||
"skipping orphan/missing checks",
|
||
))
|
||
else:
|
||
self._template_issues.append((
|
||
'error',
|
||
'spec_missing',
|
||
f"{spec_path.name} not found — required for every library template",
|
||
))
|
||
|
||
# Per-file placeholder coverage. Variants reuse the parent type's set
|
||
# (e.g. 03a_content_two_col.svg ↔ 03_content rules) unless the spec
|
||
# frontmatter overrides that page (custom_contract takes precedence).
|
||
for svg_file in svg_files:
|
||
expected = self._lookup_template_contract(
|
||
svg_file.stem, overrides=custom_contract,
|
||
)
|
||
if expected is None:
|
||
continue # extension pages or stems with no convention
|
||
try:
|
||
content = svg_file.read_text(encoding='utf-8')
|
||
except OSError:
|
||
continue
|
||
for placeholder in expected:
|
||
if placeholder not in content:
|
||
self._template_issues.append((
|
||
'warning',
|
||
'placeholder_hint',
|
||
f"{svg_file.name}: missing conventional placeholder {placeholder} "
|
||
"(declare 'placeholders:' frontmatter in design_spec.md to silence)",
|
||
))
|
||
|
||
@staticmethod
|
||
def _extract_frontmatter_placeholders(spec_text: str) -> Dict[str, Tuple[str, ...]]:
|
||
"""Read the optional ``placeholders:`` map from design_spec.md frontmatter.
|
||
|
||
Shape:
|
||
|
||
.. code-block:: yaml
|
||
|
||
placeholders:
|
||
01_cover: ["{{TITLE}}", "{{BRAND_LOGO}}"]
|
||
03_content: [] # explicitly assert "no expectation"
|
||
03a_content_two_col: # variant-specific override
|
||
- "{{LEFT_TITLE}}"
|
||
- "{{RIGHT_TITLE}}"
|
||
|
||
Each key is a stem (full filename without ``.svg``) or page-type prefix
|
||
(``01_cover``). An empty list silences the default convention for that
|
||
stem; a populated list replaces the default. Stems / prefixes not
|
||
listed fall back to ``DEFAULT_PLACEHOLDER_CONVENTION``.
|
||
|
||
We parse with PyYAML when available; otherwise we fall back to a
|
||
minimal regex that handles the documented shape.
|
||
"""
|
||
if not spec_text.startswith("---\n"):
|
||
return {}
|
||
end = spec_text.find("\n---\n", 4)
|
||
if end == -1:
|
||
return {}
|
||
block = spec_text[4:end]
|
||
|
||
try:
|
||
import yaml # type: ignore
|
||
except ImportError:
|
||
return _parse_placeholders_fallback(block)
|
||
|
||
try:
|
||
data = yaml.safe_load(block) or {}
|
||
except yaml.YAMLError:
|
||
return {}
|
||
if not isinstance(data, dict):
|
||
return {}
|
||
raw = data.get("placeholders")
|
||
if not isinstance(raw, dict):
|
||
return {}
|
||
|
||
out: Dict[str, Tuple[str, ...]] = {}
|
||
for stem, value in raw.items():
|
||
if not isinstance(stem, str):
|
||
continue
|
||
if isinstance(value, list):
|
||
out[stem] = tuple(str(v) for v in value)
|
||
elif value is None:
|
||
out[stem] = ()
|
||
return out
|
||
|
||
@staticmethod
|
||
def _extract_spec_roster(spec_text: str) -> List[str]:
|
||
"""Best-effort: extract the page roster from design_spec.md.
|
||
|
||
Templates do not share a uniform section index for the roster — the
|
||
personality-only skeleton puts it at §V "Page Roster"; legacy specs use
|
||
§VI "Page Roster" or bury filenames under §VII "Page Types" as
|
||
``### N. Cover Page (01_cover.svg)``. We match by title (any roman
|
||
index), then fall back to scanning the whole document for any
|
||
backtick-wrapped ``<stem>.svg`` reference.
|
||
|
||
Returns the deduplicated stem list in document order. Empty result
|
||
means we can't determine the roster confidently — caller should treat
|
||
that as "skip orphan/missing checks", not as "no pages declared".
|
||
"""
|
||
# Pass 1: explicit roster section, any roman numeral.
|
||
section = re.search(
|
||
r"^##\s+[IVX]+\.\s+(?:Page Roster|Page Structure|Pages|Page Types)\b.*?(?=^##\s+|\Z)",
|
||
spec_text,
|
||
re.MULTILINE | re.DOTALL | re.IGNORECASE,
|
||
)
|
||
scope = section.group(0) if section else None
|
||
|
||
# Pass 2: full document. We *only* trust this scan when the explicit
|
||
# roster scan came up empty (no `<stem>.svg` references inside it) —
|
||
# otherwise the explicit section's deliberate roster wins over loose
|
||
# mentions elsewhere.
|
||
if scope and re.search(r"[`\(][0-9A-Za-z_]+\.svg[`\)]", scope):
|
||
text = scope
|
||
else:
|
||
text = spec_text
|
||
|
||
stems: List[str] = []
|
||
seen: set = set()
|
||
# Accept backtick-quoted (`01_cover.svg`) and parenthesized
|
||
# (01_cover.svg) forms — existing specs use either.
|
||
svg_ref_re = re.compile(r"[`\(]([0-9A-Za-z_]+\.svg)[`\)]")
|
||
for match in svg_ref_re.finditer(text):
|
||
stem = match.group(1)[:-4]
|
||
if stem in seen or not re.match(r"^\d", stem):
|
||
continue
|
||
seen.add(stem)
|
||
stems.append(stem)
|
||
|
||
# If the explicit §VI scan listed bare stems (without .svg), accept
|
||
# those as fallback — but only when they were inside that section.
|
||
if not stems and scope:
|
||
for match in re.finditer(r"`([0-9]{2}[a-z]?_[A-Za-z0-9_]+)`", scope):
|
||
stem = match.group(1)
|
||
if stem in seen:
|
||
continue
|
||
seen.add(stem)
|
||
stems.append(stem)
|
||
|
||
return stems
|
||
|
||
@classmethod
|
||
def _lookup_template_contract(
|
||
cls, stem: str, *,
|
||
overrides: Dict[str, Tuple[str, ...]] | None = None,
|
||
) -> Tuple[str, ...] | None:
|
||
"""Resolve a SVG stem to its expected placeholder set.
|
||
|
||
Resolution order, first hit wins:
|
||
1. ``overrides[stem]`` — frontmatter entry for the exact filename
|
||
2. ``overrides[<page_type_prefix>]`` — frontmatter entry for the
|
||
variant's parent type (e.g. ``03_content`` for
|
||
``03a_content_two_col``)
|
||
3. ``DEFAULT_PLACEHOLDER_CONVENTION[<page_type_prefix>]``
|
||
|
||
Returns ``None`` for stems with no matching convention or override —
|
||
e.g. extension pages like ``05_section_break``. ``()`` (empty tuple)
|
||
is a valid value meaning "no expected placeholders" — used to
|
||
explicitly silence the default convention.
|
||
"""
|
||
overrides = overrides or {}
|
||
if stem in overrides:
|
||
return overrides[stem]
|
||
|
||
# Variant convention: <NN><letter>?_<rest>; strip the letter to find
|
||
# the parent type prefix, e.g. "03a_content_two_col" -> "03_content".
|
||
match = re.match(r"^(\d{2})([a-z])?_([a-z]+)", stem)
|
||
if not match:
|
||
return None
|
||
num, _letter, kind = match.groups()
|
||
key = f"{num}_{kind}"
|
||
if key in overrides:
|
||
return overrides[key]
|
||
return cls.DEFAULT_PLACEHOLDER_CONVENTION.get(key)
|
||
|
||
def _print_result(self, result: Dict):
|
||
"""Print check result for a single file"""
|
||
if result['passed']:
|
||
if result['warnings']:
|
||
icon = "[WARN]"
|
||
status = "Passed (with warnings)"
|
||
else:
|
||
icon = "[OK]"
|
||
status = "Passed"
|
||
else:
|
||
icon = "[ERROR]"
|
||
status = "Failed"
|
||
|
||
print(f"{icon} {result['file']} - {status}")
|
||
|
||
# Display basic info
|
||
if result['info']:
|
||
info_items = []
|
||
if 'viewbox' in result['info']:
|
||
info_items.append(f"viewBox: {result['info']['viewbox']}")
|
||
if info_items:
|
||
print(f" {' | '.join(info_items)}")
|
||
|
||
# Display errors
|
||
if result['errors']:
|
||
for error in result['errors']:
|
||
print(f" [ERROR] {error}")
|
||
|
||
# Display warnings
|
||
if result['warnings']:
|
||
for warning in result['warnings'][:2]: # Only show first 2 warnings
|
||
print(f" [WARN] {warning}")
|
||
if len(result['warnings']) > 2:
|
||
print(f" ... and {len(result['warnings']) - 2} more warning(s)")
|
||
|
||
print()
|
||
|
||
def _check_icon_usage(self, content: str, svg_path: Path, result: Dict) -> None:
|
||
"""Warn when a page references no icons despite spec_lock locking an
|
||
inventory, and feed the deck-level zero-icons gate.
|
||
|
||
Section / cover / closing pages legitimately ship without icons, so a
|
||
single icon-less page is only a soft per-page warning. The hard failure
|
||
is deck-wide (every page icon-less while an inventory is locked) and is
|
||
emitted in :py:meth:`_print_icon_summary`.
|
||
"""
|
||
lock = self._get_spec_lock(svg_path)
|
||
if not lock:
|
||
return
|
||
icons = lock.get('icons') or {}
|
||
library = (icons.get('library') or '').strip().lower()
|
||
inventory = (icons.get('inventory') or '').strip().lower()
|
||
_empty = ('', 'none', '(none)', '-', 'n/a')
|
||
declared = library not in _empty and inventory not in _empty
|
||
if not declared:
|
||
return
|
||
self._icon_inventory_declared = True
|
||
count = len(re.findall(r'<use\b[^>]*\bdata-icon\s*=', content))
|
||
result.setdefault('info', {})['icon_count'] = count
|
||
self._deck_icon_total += count
|
||
if count == 0:
|
||
self._pages_missing_icons.append(svg_path.name)
|
||
result['warnings'].append(
|
||
f"spec_lock locks an icon library ({icons.get('library')}) + inventory "
|
||
f"but this page references no <use data-icon> — content pages should place "
|
||
f"1-3 inventory icons (cover / section / closing pages may omit)"
|
||
)
|
||
|
||
def _print_icon_summary(self):
|
||
"""Deck-level icon-usage gate.
|
||
|
||
Declared inventory + zero icons deck-wide is a hard error (the locked
|
||
icons are unused and the deck renders flat). Bumps ``summary['errors']``
|
||
so the process exits non-zero, mirroring ``_print_animation_summary``.
|
||
"""
|
||
if not self._icon_inventory_declared:
|
||
return
|
||
if self._deck_icon_total == 0:
|
||
self.summary['errors'] += 1
|
||
print("\n[ERROR] Icon usage: spec_lock locks an icon library + inventory, "
|
||
"but the deck authors ZERO <use data-icon> across all pages.")
|
||
print(" The locked icons are unused — the deck renders flat / icon-less.")
|
||
print(" Fix: in the executor, place inventory icons on content pages "
|
||
"(KPI / list / process / comparison layouts especially), then re-run.")
|
||
elif self._pages_missing_icons:
|
||
print(f"\n[INFO] Icon usage: {self._deck_icon_total} icon(s) deck-wide; "
|
||
f"{len(self._pages_missing_icons)} page(s) reference none "
|
||
f"({', '.join(self._pages_missing_icons)}).")
|
||
print(" Cover / section / closing pages may legitimately omit icons; "
|
||
"verify dense content pages aren't missing them.")
|
||
|
||
def _check_graphic_richness(self, content: str, result: Dict) -> None:
|
||
"""Tally graphic primitives per page for the deck-level flat-deck gate.
|
||
|
||
Counts <path>/<polyline>/<polygon>/<circle>/<image> — the elements that
|
||
actually draw a diagram, chart, figure, or photo. <circle> is included
|
||
because node / bubble / venn / timeline diagrams are built from circles
|
||
(excluding it false-flagged a 21-circle roadmap as "no figure"). <rect>
|
||
and <line> stay excluded: they are layout cards, backgrounds, and
|
||
dividers, and a deck built only from them is the "text on rectangles"
|
||
look this catches. Icons don't count here — they have their own gate.
|
||
Per-page nudges stay soft; the hard gate is deck-wide.
|
||
"""
|
||
g = (len(re.findall(r'<path\b', content))
|
||
+ len(re.findall(r'<polyline\b', content))
|
||
+ len(re.findall(r'<polygon\b', content))
|
||
+ len(re.findall(r'<circle\b', content))
|
||
+ len(re.findall(r'<image\b', content)))
|
||
result.setdefault('info', {})['graphic_count'] = g
|
||
self._deck_page_count += 1
|
||
self._deck_graphic_total += g
|
||
self._deck_text_total += len(re.findall(r'<text\b', content))
|
||
if g == 0:
|
||
self._pages_no_graphic.append(result.get('file', '?'))
|
||
|
||
# ── Geometry lint (check 13) ─────────────────────────────────────────
|
||
# Hand-written absolute coordinates fail in ways no string-level check
|
||
# sees: a display-size numeral overrunning its label, an icon patched in
|
||
# on top of a title, a TOC row past the canvas bottom. Text width can't
|
||
# be measured without rendering, but a per-char estimate (CJK ≈ 1em,
|
||
# Latin ≈ 0.5–0.7em) is accurate enough to flag hard collisions.
|
||
# Thresholds are deliberately loose — a near-total overlap is an error,
|
||
# a partial one only a warning — so estimation noise doesn't hard-fail
|
||
# legitimate tight layouts.
|
||
|
||
_GEOM_MAX_REPORTS = 6 # per file, per category — avoid drowning the report
|
||
|
||
@staticmethod
|
||
def _est_char_w(ch: str) -> float:
|
||
"""Approximate advance width of one char, in em (× font-size)."""
|
||
o = ord(ch)
|
||
if (0x2E80 <= o <= 0x9FFF or 0xF900 <= o <= 0xFAFF
|
||
or 0xFF00 <= o <= 0xFF60 or 0x3000 <= o <= 0x303F):
|
||
return 1.0 # CJK ideographs, kana, fullwidth forms, CJK punct
|
||
if ch == ' ':
|
||
return 0.30
|
||
if ch.isdigit():
|
||
return 0.60
|
||
if ch.isupper():
|
||
return 0.72
|
||
if o < 0x2E80:
|
||
return 0.52 # latin lowercase / halfwidth punctuation
|
||
return 0.70
|
||
|
||
@classmethod
|
||
def _est_text_w(cls, s: str, fs: float) -> float:
|
||
return sum(cls._est_char_w(c) for c in s) * fs
|
||
|
||
@staticmethod
|
||
def _f(value, default=None):
|
||
try:
|
||
return float(str(value).strip())
|
||
except (TypeError, ValueError):
|
||
return default
|
||
|
||
def _collect_geometry(self, root) -> Tuple[list, list, list]:
|
||
"""Walk the tree collecting text boxes, icon boxes and visible rects.
|
||
|
||
Only translate() transforms are followed; any other transform makes
|
||
coordinates unknowable without a full matrix engine, so that subtree
|
||
is skipped (better silent than wrong). Boxes are dicts:
|
||
{x0, y0, x1, y1, fs, label, exact_left} — exact_left marks a
|
||
start-anchored text whose left edge is exact (only the right edge is
|
||
estimated). Rects (cards / bands, ≥12px on both axes, visible fill or
|
||
stroke) feed the text-straddles-edge check.
|
||
"""
|
||
texts: list = []
|
||
icons: list = []
|
||
rects: list = []
|
||
translate_re = re.compile(
|
||
r'^\s*translate\(\s*(-?[\d.]+)(?:[\s,]+(-?[\d.]+))?\s*\)\s*$')
|
||
skip_tags = {'defs', 'clipPath', 'marker', 'symbol', 'pattern',
|
||
'mask', 'linearGradient', 'radialGradient', 'filter'}
|
||
|
||
def local(tag):
|
||
return tag.split('}')[-1]
|
||
|
||
def effective_opacity(el, inherited: float) -> float:
|
||
op = self._f(el.get('opacity'), 1.0)
|
||
fop = self._f(el.get('fill-opacity'), 1.0)
|
||
return inherited * min(op if op is not None else 1.0,
|
||
fop if fop is not None else 1.0)
|
||
|
||
def line_box(runs: list, x: float, y: float, anchor: str,
|
||
tx: float, ty: float):
|
||
"""One box per visual line. runs = [(text, fs), ...] flowed
|
||
left-to-right; the anchor positions the line's TOTAL width, which
|
||
is how SVG actually lays out a <text> with styled inline tspans
|
||
(e.g. <text text-anchor="end">$4.2B <tspan>(35%)</tspan></text>
|
||
renders as one right-aligned line, not two stacked runs)."""
|
||
w = sum(self._est_text_w(t, f) for t, f in runs)
|
||
fs = max(f for _, f in runs)
|
||
if anchor == 'middle':
|
||
x0 = x - w / 2
|
||
elif anchor == 'end':
|
||
x0 = x - w
|
||
else:
|
||
x0 = x
|
||
joined = ''.join(t for t, _ in runs)
|
||
label = joined if len(joined) <= 12 else joined[:12] + '…'
|
||
return {
|
||
'x0': x0 + tx, 'y0': y - 0.76 * fs + ty,
|
||
'x1': x0 + w + tx, 'y1': y + 0.22 * fs + ty,
|
||
'fs': fs, 'label': label,
|
||
'baseline': y + ty, 'anchor_x': x + tx,
|
||
'exact_left': anchor not in ('middle', 'end'),
|
||
}
|
||
|
||
def collect_text(el, tx, ty, inh_fs, inh_anchor, inh_op):
|
||
fs = self._f(el.get('font-size'), inh_fs) or 16.0
|
||
anchor = el.get('text-anchor') or inh_anchor
|
||
if effective_opacity(el, inh_op) < 0.35 or el.get('fill') == 'none':
|
||
return
|
||
x = self._f(el.get('x'))
|
||
y = self._f(el.get('y'))
|
||
if x is None or y is None:
|
||
return
|
||
# Group content into visual lines: a tspan with explicit x/y or a
|
||
# non-zero dy starts a new line; anything else (leading text,
|
||
# styled inline tspans, tspan tails) flows onto the current line.
|
||
cur_x, cur_y = x, y
|
||
cur_runs: list = []
|
||
|
||
def flush():
|
||
nonlocal cur_runs
|
||
if any(t for t, _ in cur_runs):
|
||
texts.append(line_box(cur_runs, cur_x, cur_y, anchor, tx, ty))
|
||
cur_runs = []
|
||
|
||
own = (el.text or '').strip()
|
||
if own:
|
||
cur_runs.append((own, fs))
|
||
for ts in el:
|
||
if local(ts.tag) != 'tspan':
|
||
continue
|
||
tfs = self._f(ts.get('font-size'), fs) or fs
|
||
tsx = self._f(ts.get('x'))
|
||
tsy = self._f(ts.get('y'))
|
||
dy_raw = (ts.get('dy') or '').strip()
|
||
if dy_raw.endswith('em'):
|
||
dy = (self._f(dy_raw[:-2], 0.0) or 0.0) * tfs
|
||
else:
|
||
dy = self._f(dy_raw, 0.0) or 0.0
|
||
if tsx is not None or tsy is not None or dy:
|
||
flush()
|
||
if tsx is not None:
|
||
cur_x = tsx
|
||
if tsy is not None:
|
||
cur_y = tsy
|
||
else:
|
||
cur_y += dy
|
||
t = ''.join(ts.itertext()).strip()
|
||
if t:
|
||
cur_runs.append((t, tfs))
|
||
tail = (ts.tail or '').strip()
|
||
if tail:
|
||
cur_runs.append((tail, fs))
|
||
flush()
|
||
|
||
def walk(el, tx, ty, inh_fs, inh_anchor, inh_op):
|
||
tag = local(el.tag)
|
||
if tag in skip_tags:
|
||
return
|
||
tr = el.get('transform')
|
||
if tr:
|
||
m = translate_re.match(tr)
|
||
if not m:
|
||
return # rotate/scale/matrix — coords unknown, skip subtree
|
||
tx += float(m.group(1))
|
||
ty += float(m.group(2) or 0)
|
||
if tag == 'text':
|
||
collect_text(el, tx, ty, inh_fs, inh_anchor, inh_op)
|
||
return
|
||
if tag == 'use' and el.get('data-icon'):
|
||
x, y = self._f(el.get('x')), self._f(el.get('y'))
|
||
w, h = self._f(el.get('width')), self._f(el.get('height'))
|
||
if None not in (x, y, w, h):
|
||
icons.append({
|
||
'x0': x + tx, 'y0': y + ty,
|
||
'x1': x + w + tx, 'y1': y + h + ty,
|
||
'label': el.get('data-icon'),
|
||
})
|
||
return
|
||
if tag == 'rect':
|
||
x, y = self._f(el.get('x'), 0.0), self._f(el.get('y'), 0.0)
|
||
w, h = self._f(el.get('width')), self._f(el.get('height'))
|
||
visible = (el.get('fill') or '').strip().lower() != 'none' \
|
||
or el.get('stroke') not in (None, 'none')
|
||
# ≥12px both axes: hairline rules / accent bars / top bands are
|
||
# legitimate text neighbors, only card/band-sized boxes matter.
|
||
if visible and w and h and w >= 12 and h >= 12:
|
||
rects.append({
|
||
'x0': x + tx, 'y0': y + ty,
|
||
'x1': x + w + tx, 'y1': y + h + ty,
|
||
})
|
||
return
|
||
inh_fs = self._f(el.get('font-size'), inh_fs)
|
||
inh_anchor = el.get('text-anchor') or inh_anchor
|
||
inh_op = effective_opacity(el, inh_op)
|
||
if inh_op < 0.35:
|
||
return
|
||
for c in el:
|
||
walk(c, tx, ty, inh_fs, inh_anchor, inh_op)
|
||
|
||
walk(root, 0.0, 0.0, None, 'start', 1.0)
|
||
return texts, icons, rects
|
||
|
||
@staticmethod
|
||
def _box_intersection(a: Dict, b: Dict) -> Tuple[float, float]:
|
||
iw = min(a['x1'], b['x1']) - max(a['x0'], b['x0'])
|
||
ih = min(a['y1'], b['y1']) - max(a['y0'], b['y0'])
|
||
return max(iw, 0.0), max(ih, 0.0)
|
||
|
||
def _check_geometry(self, content: str, result: Dict) -> None:
|
||
"""Detect text/icon overlaps and off-canvas elements (estimated boxes)."""
|
||
try:
|
||
root = ET.fromstring(content)
|
||
except ET.ParseError:
|
||
return # already reported by the well-formedness check
|
||
vb = re.search(r'viewBox="([^"]+)"', content)
|
||
if not vb:
|
||
return
|
||
parts = vb.group(1).split()
|
||
if len(parts) != 4:
|
||
return
|
||
canvas_w, canvas_h = float(parts[2]), float(parts[3])
|
||
|
||
texts, icons, rects = self._collect_geometry(root)
|
||
|
||
errors: List[str] = []
|
||
warnings: List[str] = []
|
||
|
||
# 1. Off-canvas: baseline / anchor coordinates are exact → error;
|
||
# right-edge overflow relies on the width estimate → warning.
|
||
for t in texts:
|
||
if t['baseline'] > canvas_h + 1:
|
||
errors.append(
|
||
f"text \"{t['label']}\" baseline y={t['baseline']:.0f} is below "
|
||
f"the canvas (height {canvas_h:.0f}) — it will be clipped")
|
||
elif t['exact_left'] and t['anchor_x'] > canvas_w + 1:
|
||
errors.append(
|
||
f"text \"{t['label']}\" starts at x={t['anchor_x']:.0f}, beyond "
|
||
f"the canvas (width {canvas_w:.0f})")
|
||
elif t['x1'] > canvas_w + 0.6 * t['fs']:
|
||
warnings.append(
|
||
f"text \"{t['label']}\" likely overflows the right canvas edge "
|
||
f"(estimated right {t['x1']:.0f} > {canvas_w:.0f})")
|
||
elif t['x0'] < -0.6 * t['fs']:
|
||
warnings.append(
|
||
f"text \"{t['label']}\" likely overflows the left canvas edge "
|
||
f"(estimated left {t['x0']:.0f} < 0)")
|
||
for ic in icons:
|
||
if ic['x0'] >= canvas_w or ic['y0'] >= canvas_h or ic['x1'] <= 0 or ic['y1'] <= 0:
|
||
errors.append(
|
||
f"icon {ic['label']} at ({ic['x0']:.0f},{ic['y0']:.0f}) is entirely "
|
||
f"outside the canvas")
|
||
elif (ic['x0'] < -2 or ic['y0'] < -2
|
||
or ic['x1'] > canvas_w + 2 or ic['y1'] > canvas_h + 2):
|
||
warnings.append(
|
||
f"icon {ic['label']} extends beyond the canvas edge "
|
||
f"({ic['x0']:.0f},{ic['y0']:.0f})-({ic['x1']:.0f},{ic['y1']:.0f})")
|
||
|
||
# 2. Text-on-text collisions. Adjacent lines at normal line-height never
|
||
# intersect (box height ≈ 0.98×fs, line gap ≥ 1.15×fs), so any real
|
||
# intersection means two runs share the same space.
|
||
for i in range(len(texts)):
|
||
for j in range(i + 1, len(texts)):
|
||
a, b = texts[i], texts[j]
|
||
iw, ih = self._box_intersection(a, b)
|
||
if iw <= 0 or ih <= 0:
|
||
continue
|
||
min_fs = min(a['fs'], b['fs'])
|
||
min_h = min(a['y1'] - a['y0'], b['y1'] - b['y0'])
|
||
# Same-baseline runs are horizontally sequenced by design —
|
||
# any real horizontal overlap means the left run's width was
|
||
# under-budgeted (the classic big-numeral-plus-caption bug),
|
||
# regardless of how small the overlap area ratio is.
|
||
same_line = abs(a['baseline'] - b['baseline']) < 0.5 * min_fs
|
||
if iw < 0.6 * min_fs or ih < 0.45 * min_h:
|
||
continue # graze from estimation noise — ignore
|
||
min_area = min((a['x1'] - a['x0']) * (a['y1'] - a['y0']),
|
||
(b['x1'] - b['x0']) * (b['y1'] - b['y0']))
|
||
ratio = (iw * ih) / min_area if min_area > 0 else 0
|
||
# Text-text overlaps cap at WARNING: the width estimate can't
|
||
# tell a crash from a deliberate graze (quadrant captions,
|
||
# word clouds, tightly-kerned numeral+suffix pairs all overlap
|
||
# estimated boxes legitimately). The warning carries exact
|
||
# coordinates so the render-acceptance pass knows which spot
|
||
# to eyeball; icon-on-text and off-canvas below stay errors
|
||
# because their geometry is exact.
|
||
if ratio >= 0.15 or same_line:
|
||
warnings.append(
|
||
f"text \"{a['label']}\" and \"{b['label']}\" overlap "
|
||
f"(~{ratio * 100:.0f}% of the smaller run, around "
|
||
f"({max(a['x0'], b['x0']):.0f},{max(a['y0'], b['y0']):.0f})) "
|
||
f"— eyeball this spot at render acceptance")
|
||
|
||
# 3. Icon-on-text collisions. Icon geometry is exact; the text estimate
|
||
# only inflates the right edge, so a large covered fraction of the
|
||
# icon is a reliable signal.
|
||
for ic in icons:
|
||
icon_area = (ic['x1'] - ic['x0']) * (ic['y1'] - ic['y0'])
|
||
if icon_area <= 0:
|
||
continue
|
||
for t in texts:
|
||
iw, ih = self._box_intersection(ic, t)
|
||
if iw <= 0 or ih <= 0:
|
||
continue
|
||
ratio = (iw * ih) / icon_area
|
||
msg = (f"icon {ic['label']} overlaps text \"{t['label']}\" "
|
||
f"(~{ratio * 100:.0f}% of the icon covered, at "
|
||
f"({ic['x0']:.0f},{ic['y0']:.0f}))")
|
||
if ratio >= 0.55:
|
||
errors.append(msg)
|
||
elif ratio >= 0.25:
|
||
warnings.append(msg)
|
||
|
||
# 4. Icon-on-icon collisions (both exact) — always at least a warning.
|
||
for i in range(len(icons)):
|
||
for j in range(i + 1, len(icons)):
|
||
a, b = icons[i], icons[j]
|
||
iw, ih = self._box_intersection(a, b)
|
||
if iw <= 0 or ih <= 0:
|
||
continue
|
||
min_area = min((a['x1'] - a['x0']) * (a['y1'] - a['y0']),
|
||
(b['x1'] - b['x0']) * (b['y1'] - b['y0']))
|
||
if min_area > 0 and (iw * ih) / min_area >= 0.3:
|
||
warnings.append(
|
||
f"icons {a['label']} and {b['label']} overlap at "
|
||
f"({max(a['x0'], b['x0']):.0f},{max(a['y0'], b['y0']):.0f})")
|
||
|
||
# 5. Text straddling a card/band edge (warning). A run that is partly
|
||
# inside and partly outside a visible rect is either clipped by the
|
||
# band (header text taller than its band) or poking out of its card
|
||
# (mis-centered captions) — both shipped in a real deck. Ratio
|
||
# bounds leave room for the width estimate; a deliberate badge
|
||
# overlapping a card edge is the FP case, hence warning-tier.
|
||
for t in texts:
|
||
t_area = (t['x1'] - t['x0']) * (t['y1'] - t['y0'])
|
||
if t_area <= 0:
|
||
continue
|
||
for r in rects:
|
||
iw, ih = self._box_intersection(t, r)
|
||
if iw <= 0 or ih <= 0:
|
||
continue
|
||
ratio = (iw * ih) / t_area
|
||
if 0.2 <= ratio <= 0.85:
|
||
warnings.append(
|
||
f"text \"{t['label']}\" straddles a card/band edge at "
|
||
f"({r['x0']:.0f},{r['y0']:.0f})-({r['x1']:.0f},{r['y1']:.0f}) "
|
||
f"(~{ratio * 100:.0f}% inside) — likely clipped by the band "
|
||
f"or poking out of the card; eyeball at render acceptance")
|
||
break # one report per text run is enough
|
||
|
||
for bucket, dest in ((errors, result['errors']), (warnings, result['warnings'])):
|
||
shown = bucket[:self._GEOM_MAX_REPORTS]
|
||
dest.extend(f"Geometry: {m}" for m in shown)
|
||
if len(bucket) > len(shown):
|
||
dest.append(
|
||
f"Geometry: ... and {len(bucket) - len(shown)} more "
|
||
f"similar issue(s) on this page")
|
||
|
||
# ── Alignment / grid / monotony lint (check 14) ──────────────────────
|
||
# The shipped failures no overlap check sees: sibling cards a few px out
|
||
# of line ("meant to align, didn't"), content blocks drifting off the
|
||
# deck's margin line page by page, and the same card/icon-grid archetype
|
||
# repeated until the deck reads monotone. Rect / icon coordinates are
|
||
# exact (no width estimation), so near-miss offsets are reliable: a
|
||
# 2-12px offset between row-mates is virtually never design intent —
|
||
# deliberate stagger clears 16px — which is why those land error-tier,
|
||
# unlike the estimated text boxes above.
|
||
|
||
_ALIGN_TOL = 2.0 # <= : aligned (authoring rounding slack)
|
||
_ALIGN_ERR = 12.0 # (tol, err] : hard misalignment → error
|
||
_ALIGN_INTENT = 16.0 # (err, intent): borderline → warning; >= intent: deliberate
|
||
_CARD_MIN_W = 60.0 # smaller rects are chips / accent bars, not cards
|
||
_CARD_MIN_H = 36.0
|
||
_CLUSTER_TOL = 14.0 # row/col clustering tolerance (fingerprints, gaps)
|
||
_PAGE_NUM_RE = re.compile(r'^(\d{1,3})[_\-]')
|
||
|
||
def _page_rhythm(self, svg_path: Path, lock) -> str:
|
||
"""Return this page's page_rhythm tag ('' when unknown)."""
|
||
rhythm = (lock or {}).get('page_rhythm') or {}
|
||
m = self._PAGE_NUM_RE.match(svg_path.name)
|
||
if not m:
|
||
return ''
|
||
return (rhythm.get(f'P{int(m.group(1)):02d}') or '').strip().lower()
|
||
|
||
def _cards_from_rects(self, rects, canvas_w, canvas_h):
|
||
"""Card-sized visible rects, excluding full-bleed backgrounds."""
|
||
cards = []
|
||
for r in rects:
|
||
w, h = r['x1'] - r['x0'], r['y1'] - r['y0']
|
||
if w < self._CARD_MIN_W or h < self._CARD_MIN_H:
|
||
continue
|
||
if w * h >= 0.85 * canvas_w * canvas_h:
|
||
continue
|
||
cards.append(r)
|
||
return cards
|
||
|
||
@staticmethod
|
||
def _similar_size(a, b, rel=0.2, floor=8.0):
|
||
return abs(a - b) <= max(floor, rel * max(a, b))
|
||
|
||
@staticmethod
|
||
def _cluster_values(values, tol):
|
||
"""Cluster sorted scalars; returns list of cluster-center floats."""
|
||
centers = []
|
||
for v in sorted(values):
|
||
if centers and v - centers[-1][-1] <= tol:
|
||
centers[-1].append(v)
|
||
else:
|
||
centers.append([v])
|
||
return [sum(c) / len(c) for c in centers]
|
||
|
||
def _layout_fingerprint(self, cards, icons):
|
||
"""Classify the page's dominant grid archetype, or None.
|
||
|
||
icon-grid: >=4 icons arranged in a >=2x2 grid (the icon+title+text
|
||
card pattern — usually no visible card rect, so icons carry the
|
||
structure). card-grid: >=4 similar-sized card rects in a grid, or a
|
||
>=4-row single-column stack (full-width list rows). Pages without a
|
||
dominant grid (covers, chapters, diagrams, timelines) get None and
|
||
never count toward monotony.
|
||
"""
|
||
if len(icons) >= 4:
|
||
rows = self._cluster_values([i['y0'] for i in icons], self._CLUSTER_TOL)
|
||
cols = self._cluster_values([i['x0'] for i in icons], self._CLUSTER_TOL)
|
||
if len(rows) >= 2 and len(cols) >= 2 \
|
||
and len(rows) * len(cols) <= len(icons) + 2:
|
||
return ('icon-grid', len(rows), len(cols))
|
||
# Consider only the largest similar-size card family on the page.
|
||
if len(cards) >= 4:
|
||
fam = max(
|
||
([c for c in cards
|
||
if self._similar_size(c['x1'] - c['x0'], k['x1'] - k['x0'])
|
||
and self._similar_size(c['y1'] - c['y0'], k['y1'] - k['y0'])]
|
||
for k in cards),
|
||
key=len,
|
||
)
|
||
if len(fam) >= 4:
|
||
rows = self._cluster_values([c['y0'] for c in fam], self._CLUSTER_TOL)
|
||
cols = self._cluster_values([c['x0'] for c in fam], self._CLUSTER_TOL)
|
||
if len(rows) * len(cols) <= len(fam) + 2 \
|
||
and (min(len(rows), len(cols)) >= 2
|
||
or (len(cols) == 1 and len(rows) >= 4)):
|
||
return ('card-grid', len(rows), len(cols))
|
||
return None
|
||
|
||
def _check_alignment(self, content: str, svg_path: Path, result: Dict) -> None:
|
||
"""Sibling alignment + layout_grid enforcement + deck aggregation."""
|
||
try:
|
||
root = ET.fromstring(content)
|
||
except ET.ParseError:
|
||
return
|
||
vb = re.search(r'viewBox="([^"]+)"', content)
|
||
if not vb:
|
||
return
|
||
parts = vb.group(1).split()
|
||
if len(parts) != 4:
|
||
return
|
||
canvas_w, canvas_h = float(parts[2]), float(parts[3])
|
||
|
||
texts, icons, rects = self._collect_geometry(root)
|
||
cards = self._cards_from_rects(rects, canvas_w, canvas_h)
|
||
|
||
# Chart plot areas (the mandatory §3.1 marker): rects inside them are
|
||
# data marks (bars / boxes) whose offsets encode values, not layout —
|
||
# exclude them from every alignment check.
|
||
plot_areas = [
|
||
tuple(map(float, m.groups()))
|
||
for m in re.finditer(
|
||
r'chart-plot-area:\s*([\d.]+)\s*,\s*([\d.]+)\s*,\s*([\d.]+)\s*,\s*([\d.]+)',
|
||
content)
|
||
]
|
||
if plot_areas:
|
||
cards = [
|
||
c for c in cards
|
||
if not any(px0 <= (c['x0'] + c['x1']) / 2 <= px1
|
||
and py0 <= (c['y0'] + c['y1']) / 2 <= py1
|
||
for px0, py0, px1, py1 in plot_areas)
|
||
]
|
||
|
||
errors: List[str] = []
|
||
warnings: List[str] = []
|
||
|
||
def near_equal(u, v, tol=None):
|
||
return abs(u - v) <= (self._ALIGN_TOL if tol is None else tol)
|
||
|
||
def cross_match(u, v):
|
||
# Cross-axis dimension near-equality: true sibling cards share the
|
||
# dimension perpendicular to their run (heights in a row, widths in
|
||
# a column). Data bars / featured-emphasis cards differ more.
|
||
return abs(u - v) <= max(6.0, 0.04 * max(u, v))
|
||
|
||
# 1. Sibling near-miss misalignment (exact geometry → error tier).
|
||
# A pair only errs when NO alignment scheme fits: leading edges,
|
||
# centers, and trailing edges all disagree. Shared centers (tree
|
||
# nodes on an axis, symmetric emphasis growth) or shared trailing
|
||
# edges (baseline-anchored elements) mean the offset is a scheme,
|
||
# not an accident.
|
||
for i in range(len(cards)):
|
||
for j in range(i + 1, len(cards)):
|
||
a, b = cards[i], cards[j]
|
||
aw, ah = a['x1'] - a['x0'], a['y1'] - a['y0']
|
||
bw, bh = b['x1'] - b['x0'], b['y1'] - b['y0']
|
||
v_overlap = min(a['y1'], b['y1']) - max(a['y0'], b['y0'])
|
||
h_overlap = min(a['x1'], b['x1']) - max(a['x0'], b['x0'])
|
||
# Row-mates: share a horizontal band, horizontally disjoint.
|
||
if (cross_match(ah, bh) and v_overlap >= 0.6 * min(ah, bh)
|
||
and h_overlap <= self._ALIGN_TOL):
|
||
dy = abs(a['y0'] - b['y0'])
|
||
aligned_otherwise = (
|
||
near_equal(a['y1'], b['y1'])
|
||
or near_equal((a['y0'] + a['y1']) / 2,
|
||
(b['y0'] + b['y1']) / 2))
|
||
if dy <= self._ALIGN_TOL or aligned_otherwise:
|
||
if dy <= self._ALIGN_TOL and self._similar_size(aw, bw):
|
||
dh = abs(ah - bh)
|
||
if self._ALIGN_TOL < dh:
|
||
warnings.append(
|
||
f"row-mate cards at x={a['x0']:.0f} and "
|
||
f"x={b['x0']:.0f} share a top but differ "
|
||
f"{dh:.0f}px in height — equalize or make the "
|
||
f"difference deliberate (>=16px)")
|
||
elif dy <= self._ALIGN_ERR:
|
||
errors.append(
|
||
f"row-mate cards at x={a['x0']:.0f} and x={b['x0']:.0f} have "
|
||
f"tops {dy:.0f}px apart (y={a['y0']:.0f} vs {b['y0']:.0f}) — "
|
||
f"meant to align; snap to one y")
|
||
elif dy < self._ALIGN_INTENT:
|
||
warnings.append(
|
||
f"row-mate cards at x={a['x0']:.0f} and x={b['x0']:.0f} have "
|
||
f"tops {dy:.0f}px apart — deliberate stagger should clear "
|
||
f"{self._ALIGN_INTENT:.0f}px")
|
||
# Column-mates: share a vertical band, vertically disjoint.
|
||
elif (cross_match(aw, bw) and h_overlap >= 0.6 * min(aw, bw)
|
||
and v_overlap <= self._ALIGN_TOL):
|
||
dx = abs(a['x0'] - b['x0'])
|
||
aligned_otherwise = (
|
||
near_equal(a['x1'], b['x1'])
|
||
or near_equal((a['x0'] + a['x1']) / 2,
|
||
(b['x0'] + b['x1']) / 2))
|
||
if dx <= self._ALIGN_TOL or aligned_otherwise:
|
||
pass
|
||
elif dx <= self._ALIGN_ERR:
|
||
errors.append(
|
||
f"column-mate cards at y={a['y0']:.0f} and y={b['y0']:.0f} have "
|
||
f"left edges {dx:.0f}px apart (x={a['x0']:.0f} vs {b['x0']:.0f}) "
|
||
f"— meant to align; snap to one x")
|
||
elif dx < self._ALIGN_INTENT:
|
||
warnings.append(
|
||
f"column-mate cards at y={a['y0']:.0f} and y={b['y0']:.0f} have "
|
||
f"left edges {dx:.0f}px apart — deliberate indent should clear "
|
||
f"{self._ALIGN_INTENT:.0f}px")
|
||
|
||
# 2. Uneven gaps in a >=3-card row. Only flag near-equal-but-not-equal
|
||
# spreads; a 2+1 grouping (gap spread comparable to the gap itself)
|
||
# is design intent, not drift.
|
||
top_groups: Dict[float, list] = {}
|
||
for c in cards:
|
||
for k in top_groups:
|
||
if abs(c['y0'] - k) <= self._CLUSTER_TOL:
|
||
top_groups[k].append(c)
|
||
break
|
||
else:
|
||
top_groups[c['y0']] = [c]
|
||
for row in top_groups.values():
|
||
if len(row) < 3:
|
||
continue
|
||
row.sort(key=lambda r: r['x0'])
|
||
gaps = [row[k + 1]['x0'] - row[k]['x1'] for k in range(len(row) - 1)]
|
||
if any(g < -self._ALIGN_TOL for g in gaps):
|
||
continue # overlapping/nested — not a simple row
|
||
spread = max(gaps) - min(gaps)
|
||
if 4.0 < spread and spread < 0.35 * max(gaps):
|
||
warnings.append(
|
||
f"{len(row)}-card row at y={row[0]['y0']:.0f} has uneven gaps "
|
||
f"({', '.join(f'{g:.0f}' for g in gaps)}px) — equalize to one gutter")
|
||
|
||
# 3. layout_grid lock enforcement (spec-declared baselines → error on
|
||
# near-miss deviation; clean break >=16px is allowed by contract).
|
||
lock = self._get_spec_lock(svg_path)
|
||
grid = (lock or {}).get('layout_grid') or {}
|
||
margin_x = self._f(grid.get('margin_x'))
|
||
content_top = self._f(grid.get('content_top'))
|
||
footer_y = self._f(grid.get('footer_y'))
|
||
rhythm = self._page_rhythm(svg_path, lock)
|
||
structural = rhythm == 'anchor'
|
||
if margin_x is not None:
|
||
self._grid_locked = True
|
||
if margin_x is not None and not structural:
|
||
seen = set()
|
||
for el, x0 in ([(f"card at y={c['y0']:.0f}", c['x0']) for c in cards]
|
||
+ [(f"text \"{t['label']}\"", t['x0'])
|
||
for t in texts if t['exact_left']]):
|
||
dev = abs(x0 - margin_x)
|
||
key = round(x0)
|
||
if self._ALIGN_TOL < dev < self._ALIGN_INTENT and key not in seen:
|
||
seen.add(key)
|
||
errors.append(
|
||
f"{el} sits at x={x0:.0f}, {dev:.0f}px off the locked "
|
||
f"margin_x={margin_x:.0f} — snap to the grid or clear it by >=16px")
|
||
if content_top is not None and not structural:
|
||
seen = set()
|
||
for el, y0 in ([(f"card at x={c['x0']:.0f}", c['y0']) for c in cards]
|
||
+ [(f"icon {i['label']}", i['y0']) for i in icons]):
|
||
dev = abs(y0 - content_top)
|
||
key = round(y0)
|
||
if self._ALIGN_TOL < dev < self._ALIGN_INTENT and key not in seen:
|
||
seen.add(key)
|
||
errors.append(
|
||
f"{el} starts at y={y0:.0f}, {dev:.0f}px off the locked "
|
||
f"content_top={content_top:.0f} — snap to the grid or clear it "
|
||
f"by >=16px")
|
||
if footer_y is not None and not structural:
|
||
for t in texts:
|
||
dev = abs(t['baseline'] - footer_y)
|
||
if self._ALIGN_TOL < dev < self._ALIGN_INTENT:
|
||
errors.append(
|
||
f"text \"{t['label']}\" baseline y={t['baseline']:.0f} is "
|
||
f"{dev:.0f}px off the locked footer_y={footer_y:.0f}")
|
||
break
|
||
|
||
# 4. Deck aggregation: primary left edge (margin-drift fallback when no
|
||
# layout_grid is locked) + layout-archetype fingerprint (monotony).
|
||
# Only numbered deck pages (NN_*.svg) participate — a directory of
|
||
# standalone template/chart SVGs is not a deck, and aggregating
|
||
# across unrelated files produces meaningless drift/monotony noise.
|
||
if not structural and self._PAGE_NUM_RE.match(svg_path.name):
|
||
edge_candidates = ([c['x0'] for c in cards]
|
||
+ [t['x0'] for t in texts if t['exact_left']])
|
||
edge_candidates = [x for x in edge_candidates if 0 < x < 0.25 * canvas_w]
|
||
if len(texts) >= 4 and edge_candidates:
|
||
self._page_left_edges[svg_path.name] = min(edge_candidates)
|
||
fp = self._layout_fingerprint(cards, icons)
|
||
if fp:
|
||
self._page_fingerprints[svg_path.name] = fp
|
||
|
||
for bucket, dest in ((errors, result['errors']), (warnings, result['warnings'])):
|
||
shown = bucket[:self._GEOM_MAX_REPORTS]
|
||
dest.extend(f"Alignment: {m}" for m in shown)
|
||
if len(bucket) > len(shown):
|
||
dest.append(
|
||
f"Alignment: ... and {len(bucket) - len(shown)} more "
|
||
f"similar issue(s) on this page")
|
||
|
||
def _print_alignment_summary(self):
|
||
"""Deck-level margin-drift fallback + layout-monotony gate.
|
||
|
||
Margin drift: without a layout_grid lock there is no declared baseline,
|
||
so cluster each content page's primary left edge — several distinct
|
||
values within 16px of each other is the drift signature ("meant to be
|
||
one margin line"), warning-tier only (legacy decks must not hard-fail).
|
||
Monotony: >=3 content pages sharing one grid fingerprint → warning;
|
||
>=4 or over half the deck → error (the user-visible "every page is
|
||
the same card wall" pathology). Same-family (any dims) repetition is
|
||
an advisory nudge. Short decks (<6 pages) exempt, like the flat gate.
|
||
"""
|
||
pages = self._deck_page_count
|
||
# Margin-drift fallback (only when no layout_grid was declared).
|
||
if not self._grid_locked and len(self._page_left_edges) >= 4:
|
||
centers = self._cluster_values(
|
||
self._page_left_edges.values(), self._ALIGN_TOL)
|
||
drifting = [
|
||
(a, b) for i, a in enumerate(centers) for b in centers[i + 1:]
|
||
if self._ALIGN_TOL < b - a < self._ALIGN_INTENT
|
||
]
|
||
if drifting:
|
||
self.summary['warnings'] += 1
|
||
vals = sorted({round(v) for pair in drifting for v in pair})
|
||
pages_by_edge = ', '.join(
|
||
f"{name}@{edge:.0f}" for name, edge
|
||
in sorted(self._page_left_edges.items())
|
||
if any(abs(edge - v) <= self._ALIGN_TOL for v in vals))
|
||
print(f"\n[WARN] Margin drift: content left edges cluster at "
|
||
f"{vals}px across pages — meant to be one margin line. "
|
||
f"Lock layout_grid in spec_lock.md and snap pages to it.")
|
||
print(f" ({pages_by_edge})")
|
||
# Layout monotony gate.
|
||
if pages < 6 or not self._page_fingerprints:
|
||
return
|
||
by_fp: Dict[tuple, list] = defaultdict(list)
|
||
by_family: Dict[str, list] = defaultdict(list)
|
||
for name, fp in self._page_fingerprints.items():
|
||
by_fp[fp].append(name)
|
||
by_family[fp[0]].append(name)
|
||
fp, members = max(by_fp.items(), key=lambda kv: len(kv[1]))
|
||
n = len(members)
|
||
label = f"{fp[1]}x{fp[2]} {fp[0]}"
|
||
_fix = ("Rework all but 1-2 of them into a different visual form — "
|
||
"timeline / layered architecture / quadrant / process flow / "
|
||
"hub-spoke / chart (templates/charts/) — per the content->layout "
|
||
"mapping in strategist.md; also check spec_lock page_layouts.")
|
||
if n >= 4 or (n >= 3 and n > 0.5 * pages):
|
||
self.summary['errors'] += 1
|
||
print(f"\n[ERROR] Layout monotony: {n} pages share the same {label} "
|
||
f"archetype ({', '.join(sorted(members))}) — the deck reads as "
|
||
f"the same card wall repeated.")
|
||
print(" " + _fix)
|
||
elif n >= 3:
|
||
self.summary['warnings'] += 1
|
||
print(f"\n[WARN] Layout monotony: {n} pages share the same {label} "
|
||
f"archetype ({', '.join(sorted(members))}) — consider reworking "
|
||
f"at least {n - 2} of them.")
|
||
print(" " + _fix)
|
||
else:
|
||
fam, fam_members = max(by_family.items(), key=lambda kv: len(kv[1]))
|
||
if len(fam_members) >= 4:
|
||
self.summary['warnings'] += 1
|
||
print(f"\n[WARN] Layout monotony: {len(fam_members)} pages are all "
|
||
f"{fam} layouts ({', '.join(sorted(fam_members))}) — vary the "
|
||
f"visual form even if the grid dims differ.")
|
||
print(" " + _fix)
|
||
|
||
def _print_graphic_summary(self):
|
||
"""Deck-level flat-deck gate.
|
||
|
||
Two hard-error bars for a text-heavy content deck (>=6 pages):
|
||
- ZERO figure primitives deck-wide → the wall-of-text-boxes pathology.
|
||
- >=60% of pages carry no figure → mostly-flat (a few token diagrams
|
||
don't rescue a deck that is otherwise text + boxes + icons).
|
||
Between 40% and 60% is a soft INFO nudge. Short decks (<6 pages) are
|
||
exempt to avoid false-failing minimalist / teaser decks. Figure =
|
||
path/polygon/polyline/circle/image (see _check_graphic_richness).
|
||
"""
|
||
pages = self._deck_page_count
|
||
if pages < 6:
|
||
return
|
||
avg_text = self._deck_text_total / pages
|
||
no_g = len(self._pages_no_graphic)
|
||
frac = no_g / pages
|
||
_fix = ("Map content shape -> a visual: comparison->columns/quadrant, "
|
||
"timeline->process, share->donut, trend->line, ≥3 data points->chart "
|
||
"(adapt a templates/charts/ template or draw it), add diagrams/imagery, "
|
||
"then re-run.")
|
||
if self._deck_graphic_total == 0 and avg_text >= 10:
|
||
self.summary['errors'] += 1
|
||
print(f"\n[ERROR] Visual richness: {pages} text-heavy pages but ZERO "
|
||
"diagram/figure primitives (<path>/<polygon>/<polyline>/<circle>/"
|
||
"<image>) deck-wide — the deck is text on rectangles.")
|
||
print(" " + _fix)
|
||
elif frac >= 0.6 and avg_text >= 10:
|
||
self.summary['errors'] += 1
|
||
print(f"\n[ERROR] Visual richness: {no_g}/{pages} pages carry no diagram/"
|
||
"figure (<path>/<polygon>/<polyline>/<circle>/<image>) — the deck is "
|
||
"mostly text + boxes; a few token diagrams don't cover a data/analysis "
|
||
"deck.")
|
||
print(" " + _fix)
|
||
elif frac >= 0.4:
|
||
print(f"\n[INFO] Visual richness: {no_g}/{pages} pages have no diagram/figure "
|
||
"(<path>/<polygon>/<polyline>/<circle>/<image>) — verify dense content "
|
||
"pages aren't just text + boxes.")
|
||
|
||
def print_summary(self):
|
||
"""Print check summary"""
|
||
print("=" * 80)
|
||
print("[SUMMARY] Check Summary")
|
||
print("=" * 80)
|
||
|
||
print(f"\nTotal files: {self.summary['total']}")
|
||
print(
|
||
f" [OK] Fully passed: {self.summary['passed']} ({self._percentage(self.summary['passed'])}%)")
|
||
print(
|
||
f" [WARN] With warnings: {self.summary['warnings']} ({self._percentage(self.summary['warnings'])}%)")
|
||
print(
|
||
f" [ERROR] With errors: {self.summary['errors']} ({self._percentage(self.summary['errors'])}%)")
|
||
|
||
if self.issue_types:
|
||
print(f"\nIssue categories:")
|
||
for issue_type, count in sorted(self.issue_types.items(), key=lambda x: x[1], reverse=True):
|
||
print(f" {issue_type}: {count}")
|
||
|
||
# spec_lock drift aggregation (only printed when a lock was found)
|
||
self._print_drift_summary()
|
||
|
||
# Template-mode aggregation (orphan/missing roster + placeholder hints)
|
||
self._print_template_summary()
|
||
|
||
# Animation config aggregation.
|
||
self._print_animation_summary()
|
||
|
||
# Deck-level icon-usage gate (declared inventory but icon-less deck).
|
||
self._print_icon_summary()
|
||
|
||
# Deck-level flat-deck gate (text-on-rectangles, no diagrams/figures).
|
||
self._print_graphic_summary()
|
||
|
||
# Deck-level margin-drift fallback + layout-monotony gate.
|
||
self._print_alignment_summary()
|
||
|
||
# Fix suggestions
|
||
if self.summary['errors'] > 0 or self.summary['warnings'] > 0:
|
||
print(f"\n[TIP] Common fixes:")
|
||
print(f" 1. XML well-formedness: write typography as raw Unicode (—, ©, →, NBSP); escape XML reserved chars as & < > " ' — never use HTML named entities like — ©")
|
||
print(f" 2. viewBox issues: Ensure consistency with canvas format (see references/canvas-formats.md)")
|
||
print(f" 3. foreignObject: Use <text> + <tspan> for manual line breaks")
|
||
print(f" 4. Font issues: end every font-family stack with a PPT-safe family (e.g. Microsoft YaHei / Arial / Consolas)")
|
||
|
||
def _print_animation_summary(self):
|
||
"""Print animations.json validation issues if present."""
|
||
if not self._animation_issues:
|
||
return
|
||
|
||
errors = [item for item in self._animation_issues if item[0] == 'error']
|
||
warnings = [item for item in self._animation_issues if item[0] == 'warning']
|
||
self.summary['errors'] += len(errors)
|
||
self.summary['warnings'] += len(warnings)
|
||
for severity, _msg in self._animation_issues:
|
||
self.issue_types[f'animation_config_{severity}'] += 1
|
||
|
||
print("\n[ANIMATION] animations.json checks")
|
||
for _severity, msg in errors:
|
||
print(f" [ERROR] {msg}")
|
||
for _severity, msg in warnings:
|
||
print(f" [WARN] {msg}")
|
||
|
||
def _print_template_summary(self):
|
||
"""Aggregate template-mode roster / placeholder issues at the bottom.
|
||
|
||
Errors land under the ``errors`` summary count (so the exit signal
|
||
from ``main`` agrees), warnings under ``warnings``. Both are listed
|
||
per file so the user can act on them directly.
|
||
"""
|
||
if not self._template_issues:
|
||
return
|
||
|
||
errors = [item for item in self._template_issues if item[0] == 'error']
|
||
warnings = [item for item in self._template_issues if item[0] == 'warning']
|
||
|
||
# Mirror into the global summary so downstream "0 errors" gates honor
|
||
# template-mode issues.
|
||
self.summary['errors'] += len(errors)
|
||
self.summary['warnings'] += len(warnings)
|
||
for severity, kind, _msg in self._template_issues:
|
||
self.issue_types[f"template_{kind}"] += 1
|
||
|
||
print("\n[TEMPLATE] Template mode checks")
|
||
if errors:
|
||
print(f" Errors ({len(errors)}):")
|
||
for _sev, kind, msg in errors:
|
||
print(f" [{kind}] {msg}")
|
||
if warnings:
|
||
print(f" Warnings ({len(warnings)}):")
|
||
for _sev, kind, msg in warnings:
|
||
print(f" [{kind}] {msg}")
|
||
if not errors:
|
||
print(" No structural roster issues. Placeholder hints above are advisory only;")
|
||
print(" declare 'placeholders:' frontmatter in design_spec.md to silence them.")
|
||
|
||
def _print_drift_summary(self):
|
||
"""Print spec_lock drift aggregation if any was observed.
|
||
|
||
Values are sorted by file-count descending so frequent drift surfaces
|
||
first. Frequent drift usually means spec_lock.md is missing entries
|
||
the Strategist should have included; rare drift is more likely actual
|
||
Executor drift and warrants SVG review.
|
||
"""
|
||
if not self._lock_seen:
|
||
return
|
||
has_drift = any(self._drift_summary[cat] for cat in self._drift_summary)
|
||
if not has_drift:
|
||
print("\n[OK] spec_lock drift: none — all colors, fonts, and sizes are anchored to spec_lock.md")
|
||
return
|
||
|
||
print("\nspec_lock drift — values used outside spec_lock.md:")
|
||
labels = [('colors', 'Colors'),
|
||
('fonts', 'Font families'),
|
||
('sizes', 'Font sizes')]
|
||
for category, label in labels:
|
||
items = self._drift_summary.get(category, {})
|
||
if not items:
|
||
continue
|
||
entries = sorted(items.items(), key=lambda x: (-len(x[1]), x[0]))
|
||
print(f" {label}:")
|
||
for val, files in entries:
|
||
n = len(files)
|
||
suffix = "file" if n == 1 else "files"
|
||
print(f" {val} ({n} {suffix})")
|
||
print(
|
||
"Tip: frequent out-of-lock values usually mean spec_lock.md is missing\n"
|
||
" entries — extend the lock (scripts/update_spec.py or manual edit).\n"
|
||
" Rare ones are likely Executor drift — review the affected SVGs."
|
||
)
|
||
|
||
def _percentage(self, count: int) -> int:
|
||
"""Calculate percentage"""
|
||
if self.summary['total'] == 0:
|
||
return 0
|
||
return int(count / self.summary['total'] * 100)
|
||
|
||
def export_report(self, output_file: str = 'svg_quality_report.txt'):
|
||
"""Export check report"""
|
||
with open(output_file, 'w', encoding='utf-8') as f:
|
||
f.write("PPT Master SVG Quality Check Report\n")
|
||
f.write("=" * 80 + "\n\n")
|
||
|
||
for result in self.results:
|
||
status = "[OK] Passed" if result['passed'] else "[ERROR] Failed"
|
||
f.write(f"{status} - {result['file']}\n")
|
||
f.write(f"Path: {result.get('path', 'N/A')}\n")
|
||
|
||
if result['info']:
|
||
f.write(f"Info: {result['info']}\n")
|
||
|
||
if result['errors']:
|
||
f.write(f"\nErrors:\n")
|
||
for error in result['errors']:
|
||
f.write(f" - {error}\n")
|
||
|
||
if result['warnings']:
|
||
f.write(f"\nWarnings:\n")
|
||
for warning in result['warnings']:
|
||
f.write(f" - {warning}\n")
|
||
|
||
f.write("\n" + "-" * 80 + "\n\n")
|
||
|
||
# Write summary
|
||
f.write("\n" + "=" * 80 + "\n")
|
||
f.write("Check Summary\n")
|
||
f.write("=" * 80 + "\n\n")
|
||
f.write(f"Total files: {self.summary['total']}\n")
|
||
f.write(f"Fully passed: {self.summary['passed']}\n")
|
||
f.write(f"With warnings: {self.summary['warnings']}\n")
|
||
f.write(f"With errors: {self.summary['errors']}\n")
|
||
|
||
print(f"\n[REPORT] Check report exported: {output_file}")
|
||
|
||
|
||
def print_usage() -> None:
|
||
"""Print CLI usage information."""
|
||
print("PPT Master - SVG Quality Check Tool\n")
|
||
print("Usage:")
|
||
print(" python3 scripts/svg_quality_checker.py <svg_file>")
|
||
print(" python3 scripts/svg_quality_checker.py <directory>")
|
||
print(" python3 scripts/svg_quality_checker.py <template_dir> --template-mode")
|
||
print(" python3 scripts/svg_quality_checker.py --all examples")
|
||
print("\nExamples:")
|
||
print(" python3 scripts/svg_quality_checker.py examples/project/svg_output/slide_01.svg")
|
||
print(" python3 scripts/svg_quality_checker.py examples/project/svg_output")
|
||
print(" python3 scripts/svg_quality_checker.py examples/project")
|
||
print(" python3 scripts/svg_quality_checker.py templates/layouts/academic_defense --template-mode")
|
||
print(" python3 scripts/svg_quality_checker.py templates/decks/招商银行 --template-mode")
|
||
print("\nOptions:")
|
||
print(" --format <ppt169|ppt43|...> Expected canvas format")
|
||
print(" --template-mode Validate a templates/{layouts,decks}/<id> directory:")
|
||
print(" glob *.svg directly, skip spec_lock checks,")
|
||
print(" enforce roster ↔ design_spec.md Page Roster consistency,")
|
||
print(" and emit advisory placeholder-convention warnings.")
|
||
|
||
|
||
def main() -> None:
|
||
"""Run the CLI entry point."""
|
||
if len(sys.argv) < 2:
|
||
print_usage()
|
||
sys.exit(0)
|
||
|
||
if sys.argv[1] in {"-h", "--help", "help"}:
|
||
print_usage()
|
||
sys.exit(0)
|
||
|
||
if sys.argv[1].startswith("--") and sys.argv[1] not in {"--all"}:
|
||
print(f"[ERROR] Missing target before option: {sys.argv[1]}")
|
||
print_usage()
|
||
sys.exit(1)
|
||
|
||
template_mode = '--template-mode' in sys.argv
|
||
checker = SVGQualityChecker(template_mode=template_mode)
|
||
|
||
# Parse arguments
|
||
target = sys.argv[1]
|
||
expected_format = None
|
||
|
||
if '--format' in sys.argv:
|
||
idx = sys.argv.index('--format')
|
||
if idx + 1 < len(sys.argv):
|
||
expected_format = sys.argv[idx + 1]
|
||
|
||
# Execute check
|
||
if target == '--all':
|
||
# Check all example projects
|
||
base_dir = sys.argv[2] if len(sys.argv) > 2 else 'examples'
|
||
from project_utils import find_all_projects
|
||
projects = find_all_projects(base_dir)
|
||
|
||
for project in projects:
|
||
print(f"\n{'=' * 80}")
|
||
print(f"Checking project: {project.name}")
|
||
print('=' * 80)
|
||
checker.check_directory(str(project))
|
||
else:
|
||
checker.check_directory(target, expected_format)
|
||
|
||
# Print summary
|
||
checker.print_summary()
|
||
|
||
# Export report (if specified)
|
||
if '--export' in sys.argv:
|
||
output_file = 'svg_quality_report.txt'
|
||
if '--output' in sys.argv:
|
||
idx = sys.argv.index('--output')
|
||
if idx + 1 < len(sys.argv):
|
||
output_file = sys.argv[idx + 1]
|
||
checker.export_report(output_file)
|
||
|
||
# Return exit code
|
||
if checker.summary['errors'] > 0:
|
||
sys.exit(1)
|
||
else:
|
||
sys.exit(0)
|
||
|
||
|
||
if __name__ == '__main__':
|
||
main()
|