475 lines
16 KiB
Python
475 lines
16 KiB
Python
#!/usr/bin/env python3
|
|
"""PPT Master — single-pass image alignment + Base64 embedding.
|
|
|
|
Replaces the previous three independent finalize_svg steps:
|
|
|
|
crop-images → for each <image preserveAspectRatio="… slice"/>, crop the
|
|
source bitmap to the target aspect ratio at the given
|
|
anchor and write to ``images/cropped/`` so the SVG
|
|
reference points to a pre-cropped asset.
|
|
fix-aspect → for each <image>, read the source bitmap dimensions and
|
|
adjust x/y/width/height so the rendered box matches the
|
|
image aspect ratio (PowerPoint's "Convert to Shape"
|
|
ignores preserveAspectRatio and stretches otherwise).
|
|
embed-images → Base64-inline every external image reference so the
|
|
legacy/preview pptx (which packages the SVG verbatim)
|
|
can resolve them — pptx-internal SVG cannot follow
|
|
``../images/…`` relative URIs.
|
|
|
|
Why merge: each step independently parsed + serialized the SVG, each step
|
|
re-read the same bitmap from disk, and the two spatial transforms (crop and
|
|
fit-box) are mutually exclusive yet were sequenced one after the other.
|
|
The fix-aspect default ``preserveAspectRatio = "xMidYMid meet"`` could
|
|
also kick in on rects already cropped by crop-images (whose par was
|
|
already removed), with the only thing keeping it from corrupting the
|
|
geometry being that crop and fix-aspect happened to produce numerically
|
|
equal box dimensions — a brittle accident.
|
|
|
|
The merged pipeline:
|
|
|
|
for image in svg:
|
|
if href starts with data: → skip (already inline)
|
|
if href is unresolvable / external URL → skip
|
|
if href points to EMF/WMF → skip (native PPTX passthrough only)
|
|
if missing preserveAspectRatio → just embed (do not assume meet)
|
|
if align == none → just embed (no spatial transform)
|
|
if mode == slice → crop in memory, embed cropped bytes
|
|
if mode == meet → adjust x/y/w/h, embed original bytes
|
|
write SVG once
|
|
|
|
Bonus: the cropped bitmap is base64-inlined directly without going through
|
|
``images/cropped/``, so that intermediate directory disappears and stale
|
|
crops can no longer accumulate across re-runs.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import base64
|
|
import io
|
|
import os
|
|
import re
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import TYPE_CHECKING
|
|
from urllib.parse import unquote
|
|
from xml.etree import ElementTree as ET
|
|
|
|
if __package__ in {None, ''}:
|
|
import types
|
|
|
|
package = types.ModuleType('svg_finalize')
|
|
package.__path__ = [str(Path(__file__).resolve().parent)] # type: ignore[attr-defined]
|
|
sys.modules.setdefault('svg_finalize', package)
|
|
__package__ = 'svg_finalize'
|
|
|
|
# Reuse helpers from the previous standalone modules.
|
|
from .crop_images import crop_image_to_size, get_crop_anchor, parse_preserve_aspect_ratio
|
|
from .embed_images import _optimize_image_bytes, get_mime_type
|
|
from .fix_image_aspect import calculate_fitted_dimensions
|
|
|
|
if TYPE_CHECKING: # pragma: no cover
|
|
from PIL import Image as PILImage # noqa: F401
|
|
|
|
|
|
SVG_NS = 'http://www.w3.org/2000/svg'
|
|
XLINK_NS = 'http://www.w3.org/1999/xlink'
|
|
|
|
# PIL save format is named slightly differently from the file extension /
|
|
# MIME type set we expose elsewhere; this map covers the formats we accept.
|
|
_PIL_FORMAT_BY_MIME = {
|
|
'image/png': 'PNG',
|
|
'image/jpeg': 'JPEG',
|
|
'image/gif': 'GIF',
|
|
'image/webp': 'WEBP',
|
|
}
|
|
_OFFICE_VECTOR_EXTENSIONS = {'.emf', '.wmf'}
|
|
|
|
|
|
def _parse_float(val: str | None, default: float = 0.0) -> float:
|
|
"""Best-effort float parse, tolerating trailing ``px`` etc."""
|
|
if val is None or val == '':
|
|
return default
|
|
try:
|
|
return float(re.sub(r'(px|pt|em|%|rem)$', '', val.strip()))
|
|
except (ValueError, AttributeError):
|
|
return default
|
|
|
|
|
|
def _format_number(n: float) -> str:
|
|
"""Format a float for compact SVG attribute output."""
|
|
if abs(n - round(n)) < 1e-6:
|
|
return str(int(round(n)))
|
|
s = f"{n:.2f}".rstrip('0').rstrip('.')
|
|
return s or '0'
|
|
|
|
|
|
def _resolve_image_path(href: str, svg_dir: Path) -> Path | None:
|
|
"""Resolve an <image> href to a local filesystem path.
|
|
|
|
Returns None for unresolvable references (http/https/etc.) so callers
|
|
can leave those refs untouched.
|
|
"""
|
|
if not href:
|
|
return None
|
|
decoded = unquote(href)
|
|
if decoded.startswith(('http://', 'https://', 'file://')):
|
|
return None
|
|
if os.path.isabs(decoded):
|
|
candidate = Path(decoded)
|
|
return candidate if candidate.exists() else None
|
|
candidate = (svg_dir / decoded).resolve()
|
|
if candidate.exists():
|
|
return candidate
|
|
# svg_final is a copytree of svg_output living two levels deeper
|
|
# (<project>/.build/svg_final), but authored hrefs like ../images/x.png
|
|
# are relative to svg_output — from the copy they land on the
|
|
# nonexistent .build/images and every external image silently stayed
|
|
# un-embedded. Rebase the same relative path onto svg_output.
|
|
if svg_dir.name == 'svg_final' and svg_dir.parent.name == '.build':
|
|
authored = (svg_dir.parent.parent / 'svg_output' / decoded).resolve()
|
|
if authored.exists():
|
|
return authored
|
|
return None
|
|
|
|
|
|
def _load_pil_image(img_path: Path) -> 'PILImage' | None:
|
|
"""Open an image with PIL, returning None on any failure."""
|
|
try:
|
|
from PIL import Image
|
|
except ImportError:
|
|
return None
|
|
try:
|
|
return Image.open(img_path)
|
|
except (OSError, ValueError):
|
|
return None
|
|
|
|
|
|
def _normalize_for_save(img: 'PILImage', mime_type: str) -> 'PILImage':
|
|
"""Coerce a PIL image into a mode that the target format can save.
|
|
|
|
JPEG cannot store alpha — flatten to white background. Other formats
|
|
keep alpha when present.
|
|
"""
|
|
if mime_type == 'image/jpeg':
|
|
if img.mode in ('RGBA', 'LA'):
|
|
from PIL import Image
|
|
background = Image.new('RGB', img.size, (255, 255, 255))
|
|
alpha = img.getchannel('A') if img.mode == 'RGBA' else None
|
|
background.paste(img.convert('RGB'), mask=alpha)
|
|
return background
|
|
if img.mode != 'RGB':
|
|
return img.convert('RGB')
|
|
return img
|
|
# PNG / GIF / WEBP — preserve alpha if present
|
|
if img.mode == 'P':
|
|
return img.convert('RGBA' if 'A' in img.getbands() else 'RGB')
|
|
return img
|
|
|
|
|
|
def _encode_pil_to_data_uri(
|
|
img: 'PILImage',
|
|
src_path: Path,
|
|
*,
|
|
compress: bool,
|
|
max_dimension: int | None,
|
|
fallback_bytes: bytes | None,
|
|
) -> tuple[str, int] | None:
|
|
"""Serialize *img* to a base64 data URI.
|
|
|
|
If the image hasn't been transformed (slice crop or meet fit), prefer
|
|
re-encoding the original file bytes so we don't risk mutating an
|
|
already-optimized asset. *fallback_bytes* carries the raw on-disk
|
|
bytes for that path.
|
|
"""
|
|
mime_type = get_mime_type(src_path.name, fallback_bytes)
|
|
pil_format = _PIL_FORMAT_BY_MIME.get(mime_type, 'PNG')
|
|
|
|
# Encode current PIL image
|
|
try:
|
|
prepared = _normalize_for_save(img, mime_type)
|
|
buf = io.BytesIO()
|
|
save_kwargs: dict = {'format': pil_format}
|
|
if pil_format == 'JPEG':
|
|
save_kwargs['quality'] = 95
|
|
save_kwargs['optimize'] = True
|
|
elif pil_format == 'PNG':
|
|
save_kwargs['optimize'] = True
|
|
prepared.save(buf, **save_kwargs)
|
|
encoded_bytes = buf.getvalue()
|
|
except (OSError, ValueError):
|
|
return None
|
|
|
|
# If caller passed the original bytes and they're smaller (because PIL
|
|
# round-tripping an asset that was already well-compressed inflates it),
|
|
# fall back to those.
|
|
chosen = encoded_bytes
|
|
if fallback_bytes and len(fallback_bytes) < len(encoded_bytes):
|
|
chosen = fallback_bytes
|
|
|
|
chosen = _optimize_image_bytes(
|
|
chosen, mime_type, compress=compress, max_dimension=max_dimension,
|
|
)
|
|
|
|
b64 = base64.b64encode(chosen).decode('ascii')
|
|
return f'data:{mime_type};base64,{b64}', len(chosen)
|
|
|
|
|
|
def _iter_image_elements(root: ET.Element):
|
|
"""Yield every <image> in the tree regardless of namespace prefix."""
|
|
for image in root.iter(f'{{{SVG_NS}}}image'):
|
|
yield image
|
|
# Also catch namespace-stripped trees just in case
|
|
for image in root.iter('image'):
|
|
yield image
|
|
|
|
|
|
def _get_href(image: ET.Element) -> str | None:
|
|
"""Return the image href, supporting both ``href`` and ``xlink:href``."""
|
|
return image.get('href') or image.get(f'{{{XLINK_NS}}}href')
|
|
|
|
|
|
def _set_href(image: ET.Element, value: str) -> None:
|
|
"""Write the data URI back to whichever href attribute the image used."""
|
|
if image.get(f'{{{XLINK_NS}}}href') is not None:
|
|
image.set(f'{{{XLINK_NS}}}href', value)
|
|
else:
|
|
image.set('href', value)
|
|
|
|
|
|
def _process_one_image(
|
|
image: ET.Element,
|
|
svg_dir: Path,
|
|
*,
|
|
compress: bool,
|
|
max_dimension: int | None,
|
|
verbose: bool,
|
|
) -> tuple[bool, str | None]:
|
|
"""Align (slice/meet) and embed a single <image>.
|
|
|
|
Returns ``(processed, error)`` where *processed* is True iff the image
|
|
was rewritten and *error* is a short message when something went wrong
|
|
(the image is left untouched in that case).
|
|
"""
|
|
href = _get_href(image)
|
|
if not href:
|
|
return False, None
|
|
if href.startswith('data:'):
|
|
return False, None # already inline
|
|
|
|
img_path = _resolve_image_path(href, svg_dir)
|
|
if img_path is None:
|
|
return False, f'unresolved href: {href[:60]}'
|
|
|
|
try:
|
|
with open(img_path, 'rb') as fh:
|
|
raw_bytes = fh.read()
|
|
except OSError as exc:
|
|
return False, f'read failed: {exc}'
|
|
|
|
if img_path.suffix.lower() in _OFFICE_VECTOR_EXTENSIONS:
|
|
if verbose:
|
|
print(f' [INFO] {img_path.name}: Office vector left external for native PPTX passthrough')
|
|
return False, None
|
|
|
|
img = _load_pil_image(img_path)
|
|
if img is None:
|
|
return False, 'PIL open failed'
|
|
|
|
box_x = _parse_float(image.get('x'))
|
|
box_y = _parse_float(image.get('y'))
|
|
box_w = _parse_float(image.get('width'))
|
|
box_h = _parse_float(image.get('height'))
|
|
if box_w <= 0 or box_h <= 0:
|
|
return False, 'zero-sized box'
|
|
|
|
par_attr = image.get('preserveAspectRatio') or ''
|
|
par_attr = par_attr.strip()
|
|
|
|
# ------------------------------------------------------------------
|
|
# Decide the spatial transform
|
|
# ------------------------------------------------------------------
|
|
final_img: 'PILImage' = img
|
|
new_x, new_y, new_w, new_h = box_x, box_y, box_w, box_h
|
|
transformed = False # True iff bitmap content changed (crop happened)
|
|
|
|
if not par_attr:
|
|
# No preserveAspectRatio at all. The previous pipeline's fix-aspect
|
|
# step assumed "xMidYMid meet" here, which silently re-fit images
|
|
# that crop-images had already shaped. Treat absence as "leave it
|
|
# alone": embed bytes, keep box.
|
|
pass
|
|
else:
|
|
align, mode = parse_preserve_aspect_ratio(par_attr)
|
|
if align == 'none':
|
|
# Author wants stretch-to-box; preserve geometry, embed bytes.
|
|
pass
|
|
elif mode == 'slice':
|
|
x_anchor, y_anchor = get_crop_anchor(align)
|
|
cropped = crop_image_to_size(img, int(box_w), int(box_h),
|
|
x_anchor, y_anchor)
|
|
final_img = cropped
|
|
transformed = True
|
|
else: # meet (or any other mode → treat as meet)
|
|
new_w_calc, new_h_calc, off_x, off_y = calculate_fitted_dimensions(
|
|
img.size[0], img.size[1], box_w, box_h, mode='meet',
|
|
)
|
|
new_x = box_x + off_x
|
|
new_y = box_y + off_y
|
|
new_w = new_w_calc
|
|
new_h = new_h_calc
|
|
|
|
# ------------------------------------------------------------------
|
|
# Encode and rewrite
|
|
# ------------------------------------------------------------------
|
|
encoded = _encode_pil_to_data_uri(
|
|
final_img,
|
|
img_path,
|
|
compress=compress,
|
|
max_dimension=max_dimension,
|
|
fallback_bytes=raw_bytes if not transformed else None,
|
|
)
|
|
if encoded is None:
|
|
return False, 'encode failed'
|
|
data_uri, _ = encoded
|
|
|
|
_set_href(image, data_uri)
|
|
image.set('x', _format_number(new_x))
|
|
image.set('y', _format_number(new_y))
|
|
image.set('width', _format_number(new_w))
|
|
image.set('height', _format_number(new_h))
|
|
if 'preserveAspectRatio' in image.attrib:
|
|
del image.attrib['preserveAspectRatio']
|
|
|
|
if verbose:
|
|
suffix = ' (cropped)' if transformed else ''
|
|
print(f' [OK] {img_path.name}{suffix}')
|
|
return True, None
|
|
|
|
|
|
def count_office_vector_refs_in_svg(svg_path: str | Path) -> int:
|
|
"""Count local EMF/WMF image refs that the embed pass intentionally skips."""
|
|
svg_path = Path(svg_path)
|
|
svg_dir = svg_path.parent.resolve()
|
|
try:
|
|
tree = ET.parse(svg_path)
|
|
except ET.ParseError:
|
|
return 0
|
|
count = 0
|
|
seen: set[int] = set()
|
|
for image in _iter_image_elements(tree.getroot()):
|
|
ident = id(image)
|
|
if ident in seen:
|
|
continue
|
|
seen.add(ident)
|
|
href = _get_href(image)
|
|
if not href or href.startswith('data:'):
|
|
continue
|
|
img_path = _resolve_image_path(href, svg_dir)
|
|
if img_path and img_path.suffix.lower() in _OFFICE_VECTOR_EXTENSIONS:
|
|
count += 1
|
|
return count
|
|
|
|
|
|
def align_and_embed_images_in_svg(
|
|
svg_path: str | Path,
|
|
*,
|
|
dry_run: bool = False,
|
|
verbose: bool = False,
|
|
compress: bool = False,
|
|
max_dimension: int | None = None,
|
|
) -> tuple[int, int]:
|
|
"""Run the merged align + embed pass on a single SVG file.
|
|
|
|
Returns ``(processed_count, error_count)``.
|
|
"""
|
|
svg_path = Path(svg_path)
|
|
svg_dir = svg_path.parent.resolve()
|
|
|
|
# Register namespaces for clean serialization
|
|
ET.register_namespace('', SVG_NS)
|
|
ET.register_namespace('xlink', XLINK_NS)
|
|
|
|
try:
|
|
tree = ET.parse(svg_path)
|
|
except ET.ParseError as exc:
|
|
if verbose:
|
|
print(f' [ERROR] {svg_path.name}: parse failed ({exc})')
|
|
return (0, 1)
|
|
root = tree.getroot()
|
|
|
|
# Avoid double-iteration if an element matches both namespaced and
|
|
# bare-tag iteration paths.
|
|
seen: set[int] = set()
|
|
processed = 0
|
|
errors = 0
|
|
|
|
for image in _iter_image_elements(root):
|
|
ident = id(image)
|
|
if ident in seen:
|
|
continue
|
|
seen.add(ident)
|
|
|
|
if dry_run:
|
|
processed += 1
|
|
continue
|
|
|
|
ok, err = _process_one_image(
|
|
image, svg_dir,
|
|
compress=compress, max_dimension=max_dimension, verbose=verbose,
|
|
)
|
|
if ok:
|
|
processed += 1
|
|
elif err:
|
|
errors += 1
|
|
if verbose:
|
|
print(f' [WARN] {svg_path.name}: {err}')
|
|
|
|
if processed > 0 and not dry_run:
|
|
tree.write(svg_path, encoding='utf-8', xml_declaration=False)
|
|
|
|
return (processed, errors)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Standalone CLI (rare; the main entry point is finalize_svg.py)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def build_parser() -> argparse.ArgumentParser:
|
|
"""Build the standalone diagnostic parser."""
|
|
import argparse
|
|
parser = argparse.ArgumentParser(
|
|
description='Align (slice/meet) and Base64-embed all <image> refs in an SVG.',
|
|
)
|
|
parser.add_argument('svg', type=Path, help='SVG file to process in place')
|
|
parser.add_argument('-n', '--dry-run', action='store_true')
|
|
parser.add_argument('-v', '--verbose', action='store_true')
|
|
parser.add_argument('--compress', action='store_true',
|
|
help='Compress images before embedding')
|
|
parser.add_argument('--max-dimension', type=int, default=None,
|
|
help='Downscale images larger than this on either axis')
|
|
return parser
|
|
|
|
|
|
def main(argv: list[str] | None = None) -> int:
|
|
"""Run the standalone diagnostic CLI."""
|
|
parser = build_parser()
|
|
args = parser.parse_args(argv)
|
|
|
|
if not args.svg.exists():
|
|
print(f'Error: file not found: {args.svg}', file=sys.stderr)
|
|
return 1
|
|
|
|
proc, err = align_and_embed_images_in_svg(
|
|
args.svg,
|
|
dry_run=args.dry_run,
|
|
verbose=args.verbose,
|
|
compress=args.compress,
|
|
max_dimension=args.max_dimension,
|
|
)
|
|
print(f'Processed {proc} image(s), {err} error(s)')
|
|
return 1 if err else 0
|
|
|
|
|
|
if __name__ == '__main__':
|
|
raise SystemExit(main())
|