zcbot/skills/ppt/scripts/svg_finalize/align_embed_images.py

464 lines
16 KiB
Python

#!/usr/bin/env python3
"""PPT Master — single-pass image alignment + Base64 embedding.
Replaces the previous three independent finalize_svg steps:
crop-images → for each <image preserveAspectRatio="… slice"/>, crop the
source bitmap to the target aspect ratio at the given
anchor and write to ``images/cropped/`` so the SVG
reference points to a pre-cropped asset.
fix-aspect → for each <image>, read the source bitmap dimensions and
adjust x/y/width/height so the rendered box matches the
image aspect ratio (PowerPoint's "Convert to Shape"
ignores preserveAspectRatio and stretches otherwise).
embed-images → Base64-inline every external image reference so the
legacy/preview pptx (which packages the SVG verbatim)
can resolve them — pptx-internal SVG cannot follow
``../images/…`` relative URIs.
Why merge: each step independently parsed + serialized the SVG, each step
re-read the same bitmap from disk, and the two spatial transforms (crop and
fit-box) are mutually exclusive yet were sequenced one after the other.
The fix-aspect default ``preserveAspectRatio = "xMidYMid meet"`` could
also kick in on rects already cropped by crop-images (whose par was
already removed), with the only thing keeping it from corrupting the
geometry being that crop and fix-aspect happened to produce numerically
equal box dimensions — a brittle accident.
The merged pipeline:
for image in svg:
if href starts with data: → skip (already inline)
if href is unresolvable / external URL → skip
if href points to EMF/WMF → skip (native PPTX passthrough only)
if missing preserveAspectRatio → just embed (do not assume meet)
if align == none → just embed (no spatial transform)
if mode == slice → crop in memory, embed cropped bytes
if mode == meet → adjust x/y/w/h, embed original bytes
write SVG once
Bonus: the cropped bitmap is base64-inlined directly without going through
``images/cropped/``, so that intermediate directory disappears and stale
crops can no longer accumulate across re-runs.
"""
from __future__ import annotations
import base64
import io
import os
import re
import sys
from pathlib import Path
from typing import TYPE_CHECKING
from urllib.parse import unquote
from xml.etree import ElementTree as ET
if __package__ in {None, ''}:
import types
package = types.ModuleType('svg_finalize')
package.__path__ = [str(Path(__file__).resolve().parent)] # type: ignore[attr-defined]
sys.modules.setdefault('svg_finalize', package)
__package__ = 'svg_finalize'
# Reuse helpers from the previous standalone modules.
from .crop_images import crop_image_to_size, get_crop_anchor, parse_preserve_aspect_ratio
from .embed_images import _optimize_image_bytes, get_mime_type
from .fix_image_aspect import calculate_fitted_dimensions
if TYPE_CHECKING: # pragma: no cover
from PIL import Image as PILImage # noqa: F401
SVG_NS = 'http://www.w3.org/2000/svg'
XLINK_NS = 'http://www.w3.org/1999/xlink'
# PIL save format is named slightly differently from the file extension /
# MIME type set we expose elsewhere; this map covers the formats we accept.
_PIL_FORMAT_BY_MIME = {
'image/png': 'PNG',
'image/jpeg': 'JPEG',
'image/gif': 'GIF',
'image/webp': 'WEBP',
}
_OFFICE_VECTOR_EXTENSIONS = {'.emf', '.wmf'}
def _parse_float(val: str | None, default: float = 0.0) -> float:
"""Best-effort float parse, tolerating trailing ``px`` etc."""
if val is None or val == '':
return default
try:
return float(re.sub(r'(px|pt|em|%|rem)$', '', val.strip()))
except (ValueError, AttributeError):
return default
def _format_number(n: float) -> str:
"""Format a float for compact SVG attribute output."""
if abs(n - round(n)) < 1e-6:
return str(int(round(n)))
s = f"{n:.2f}".rstrip('0').rstrip('.')
return s or '0'
def _resolve_image_path(href: str, svg_dir: Path) -> Path | None:
"""Resolve an <image> href to a local filesystem path.
Returns None for unresolvable references (http/https/etc.) so callers
can leave those refs untouched.
"""
if not href:
return None
decoded = unquote(href)
if decoded.startswith(('http://', 'https://', 'file://')):
return None
if os.path.isabs(decoded):
candidate = Path(decoded)
else:
candidate = (svg_dir / decoded).resolve()
return candidate if candidate.exists() else None
def _load_pil_image(img_path: Path) -> 'PILImage' | None:
"""Open an image with PIL, returning None on any failure."""
try:
from PIL import Image
except ImportError:
return None
try:
return Image.open(img_path)
except (OSError, ValueError):
return None
def _normalize_for_save(img: 'PILImage', mime_type: str) -> 'PILImage':
"""Coerce a PIL image into a mode that the target format can save.
JPEG cannot store alpha — flatten to white background. Other formats
keep alpha when present.
"""
if mime_type == 'image/jpeg':
if img.mode in ('RGBA', 'LA'):
from PIL import Image
background = Image.new('RGB', img.size, (255, 255, 255))
alpha = img.getchannel('A') if img.mode == 'RGBA' else None
background.paste(img.convert('RGB'), mask=alpha)
return background
if img.mode != 'RGB':
return img.convert('RGB')
return img
# PNG / GIF / WEBP — preserve alpha if present
if img.mode == 'P':
return img.convert('RGBA' if 'A' in img.getbands() else 'RGB')
return img
def _encode_pil_to_data_uri(
img: 'PILImage',
src_path: Path,
*,
compress: bool,
max_dimension: int | None,
fallback_bytes: bytes | None,
) -> tuple[str, int] | None:
"""Serialize *img* to a base64 data URI.
If the image hasn't been transformed (slice crop or meet fit), prefer
re-encoding the original file bytes so we don't risk mutating an
already-optimized asset. *fallback_bytes* carries the raw on-disk
bytes for that path.
"""
mime_type = get_mime_type(src_path.name, fallback_bytes)
pil_format = _PIL_FORMAT_BY_MIME.get(mime_type, 'PNG')
# Encode current PIL image
try:
prepared = _normalize_for_save(img, mime_type)
buf = io.BytesIO()
save_kwargs: dict = {'format': pil_format}
if pil_format == 'JPEG':
save_kwargs['quality'] = 95
save_kwargs['optimize'] = True
elif pil_format == 'PNG':
save_kwargs['optimize'] = True
prepared.save(buf, **save_kwargs)
encoded_bytes = buf.getvalue()
except (OSError, ValueError):
return None
# If caller passed the original bytes and they're smaller (because PIL
# round-tripping an asset that was already well-compressed inflates it),
# fall back to those.
chosen = encoded_bytes
if fallback_bytes and len(fallback_bytes) < len(encoded_bytes):
chosen = fallback_bytes
chosen = _optimize_image_bytes(
chosen, mime_type, compress=compress, max_dimension=max_dimension,
)
b64 = base64.b64encode(chosen).decode('ascii')
return f'data:{mime_type};base64,{b64}', len(chosen)
def _iter_image_elements(root: ET.Element):
"""Yield every <image> in the tree regardless of namespace prefix."""
for image in root.iter(f'{{{SVG_NS}}}image'):
yield image
# Also catch namespace-stripped trees just in case
for image in root.iter('image'):
yield image
def _get_href(image: ET.Element) -> str | None:
"""Return the image href, supporting both ``href`` and ``xlink:href``."""
return image.get('href') or image.get(f'{{{XLINK_NS}}}href')
def _set_href(image: ET.Element, value: str) -> None:
"""Write the data URI back to whichever href attribute the image used."""
if image.get(f'{{{XLINK_NS}}}href') is not None:
image.set(f'{{{XLINK_NS}}}href', value)
else:
image.set('href', value)
def _process_one_image(
image: ET.Element,
svg_dir: Path,
*,
compress: bool,
max_dimension: int | None,
verbose: bool,
) -> tuple[bool, str | None]:
"""Align (slice/meet) and embed a single <image>.
Returns ``(processed, error)`` where *processed* is True iff the image
was rewritten and *error* is a short message when something went wrong
(the image is left untouched in that case).
"""
href = _get_href(image)
if not href:
return False, None
if href.startswith('data:'):
return False, None # already inline
img_path = _resolve_image_path(href, svg_dir)
if img_path is None:
return False, f'unresolved href: {href[:60]}'
try:
with open(img_path, 'rb') as fh:
raw_bytes = fh.read()
except OSError as exc:
return False, f'read failed: {exc}'
if img_path.suffix.lower() in _OFFICE_VECTOR_EXTENSIONS:
if verbose:
print(f' [INFO] {img_path.name}: Office vector left external for native PPTX passthrough')
return False, None
img = _load_pil_image(img_path)
if img is None:
return False, 'PIL open failed'
box_x = _parse_float(image.get('x'))
box_y = _parse_float(image.get('y'))
box_w = _parse_float(image.get('width'))
box_h = _parse_float(image.get('height'))
if box_w <= 0 or box_h <= 0:
return False, 'zero-sized box'
par_attr = image.get('preserveAspectRatio') or ''
par_attr = par_attr.strip()
# ------------------------------------------------------------------
# Decide the spatial transform
# ------------------------------------------------------------------
final_img: 'PILImage' = img
new_x, new_y, new_w, new_h = box_x, box_y, box_w, box_h
transformed = False # True iff bitmap content changed (crop happened)
if not par_attr:
# No preserveAspectRatio at all. The previous pipeline's fix-aspect
# step assumed "xMidYMid meet" here, which silently re-fit images
# that crop-images had already shaped. Treat absence as "leave it
# alone": embed bytes, keep box.
pass
else:
align, mode = parse_preserve_aspect_ratio(par_attr)
if align == 'none':
# Author wants stretch-to-box; preserve geometry, embed bytes.
pass
elif mode == 'slice':
x_anchor, y_anchor = get_crop_anchor(align)
cropped = crop_image_to_size(img, int(box_w), int(box_h),
x_anchor, y_anchor)
final_img = cropped
transformed = True
else: # meet (or any other mode → treat as meet)
new_w_calc, new_h_calc, off_x, off_y = calculate_fitted_dimensions(
img.size[0], img.size[1], box_w, box_h, mode='meet',
)
new_x = box_x + off_x
new_y = box_y + off_y
new_w = new_w_calc
new_h = new_h_calc
# ------------------------------------------------------------------
# Encode and rewrite
# ------------------------------------------------------------------
encoded = _encode_pil_to_data_uri(
final_img,
img_path,
compress=compress,
max_dimension=max_dimension,
fallback_bytes=raw_bytes if not transformed else None,
)
if encoded is None:
return False, 'encode failed'
data_uri, _ = encoded
_set_href(image, data_uri)
image.set('x', _format_number(new_x))
image.set('y', _format_number(new_y))
image.set('width', _format_number(new_w))
image.set('height', _format_number(new_h))
if 'preserveAspectRatio' in image.attrib:
del image.attrib['preserveAspectRatio']
if verbose:
suffix = ' (cropped)' if transformed else ''
print(f' [OK] {img_path.name}{suffix}')
return True, None
def count_office_vector_refs_in_svg(svg_path: str | Path) -> int:
"""Count local EMF/WMF image refs that the embed pass intentionally skips."""
svg_path = Path(svg_path)
svg_dir = svg_path.parent.resolve()
try:
tree = ET.parse(svg_path)
except ET.ParseError:
return 0
count = 0
seen: set[int] = set()
for image in _iter_image_elements(tree.getroot()):
ident = id(image)
if ident in seen:
continue
seen.add(ident)
href = _get_href(image)
if not href or href.startswith('data:'):
continue
img_path = _resolve_image_path(href, svg_dir)
if img_path and img_path.suffix.lower() in _OFFICE_VECTOR_EXTENSIONS:
count += 1
return count
def align_and_embed_images_in_svg(
svg_path: str | Path,
*,
dry_run: bool = False,
verbose: bool = False,
compress: bool = False,
max_dimension: int | None = None,
) -> tuple[int, int]:
"""Run the merged align + embed pass on a single SVG file.
Returns ``(processed_count, error_count)``.
"""
svg_path = Path(svg_path)
svg_dir = svg_path.parent.resolve()
# Register namespaces for clean serialization
ET.register_namespace('', SVG_NS)
ET.register_namespace('xlink', XLINK_NS)
try:
tree = ET.parse(svg_path)
except ET.ParseError as exc:
if verbose:
print(f' [ERROR] {svg_path.name}: parse failed ({exc})')
return (0, 1)
root = tree.getroot()
# Avoid double-iteration if an element matches both namespaced and
# bare-tag iteration paths.
seen: set[int] = set()
processed = 0
errors = 0
for image in _iter_image_elements(root):
ident = id(image)
if ident in seen:
continue
seen.add(ident)
if dry_run:
processed += 1
continue
ok, err = _process_one_image(
image, svg_dir,
compress=compress, max_dimension=max_dimension, verbose=verbose,
)
if ok:
processed += 1
elif err:
errors += 1
if verbose:
print(f' [WARN] {svg_path.name}: {err}')
if processed > 0 and not dry_run:
tree.write(svg_path, encoding='utf-8', xml_declaration=False)
return (processed, errors)
# ---------------------------------------------------------------------------
# Standalone CLI (rare; the main entry point is finalize_svg.py)
# ---------------------------------------------------------------------------
def build_parser() -> argparse.ArgumentParser:
"""Build the standalone diagnostic parser."""
import argparse
parser = argparse.ArgumentParser(
description='Align (slice/meet) and Base64-embed all <image> refs in an SVG.',
)
parser.add_argument('svg', type=Path, help='SVG file to process in place')
parser.add_argument('-n', '--dry-run', action='store_true')
parser.add_argument('-v', '--verbose', action='store_true')
parser.add_argument('--compress', action='store_true',
help='Compress images before embedding')
parser.add_argument('--max-dimension', type=int, default=None,
help='Downscale images larger than this on either axis')
return parser
def main(argv: list[str] | None = None) -> int:
"""Run the standalone diagnostic CLI."""
parser = build_parser()
args = parser.parse_args(argv)
if not args.svg.exists():
print(f'Error: file not found: {args.svg}', file=sys.stderr)
return 1
proc, err = align_and_embed_images_in_svg(
args.svg,
dry_run=args.dry_run,
verbose=args.verbose,
compress=args.compress,
max_dimension=args.max_dimension,
)
print(f'Processed {proc} image(s), {err} error(s)')
return 1 if err else 0
if __name__ == '__main__':
raise SystemExit(main())