266 lines
9.4 KiB
Python
266 lines
9.4 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
SVG Image Embedding Tool
|
|
Converts externally referenced images in SVG files to Base64 inline format.
|
|
|
|
Usage:
|
|
python3 scripts/svg_finalize/embed_images.py <svg_file> [svg_file2] ...
|
|
python3 scripts/svg_finalize/embed_images.py *.svg
|
|
|
|
Examples:
|
|
python3 scripts/svg_finalize/embed_images.py examples/ppt169_demo/svg_output/01_cover.svg
|
|
python3 scripts/svg_finalize/embed_images.py examples/ppt169_demo/svg_output/*.svg
|
|
"""
|
|
|
|
import os
|
|
import base64
|
|
import re
|
|
import sys
|
|
import argparse
|
|
|
|
|
|
def get_mime_type(filename: str, file_bytes: bytes | None = None) -> str:
|
|
"""Return the MIME type based on file bytes first, then extension."""
|
|
if file_bytes:
|
|
if file_bytes.startswith(b"\x89PNG\r\n\x1a\n"):
|
|
return 'image/png'
|
|
if file_bytes.startswith(b"\xff\xd8\xff"):
|
|
return 'image/jpeg'
|
|
if file_bytes.startswith((b"GIF87a", b"GIF89a")):
|
|
return 'image/gif'
|
|
if file_bytes.startswith(b"RIFF") and file_bytes[8:12] == b"WEBP":
|
|
return 'image/webp'
|
|
if file_bytes.lstrip().startswith(b"<svg"):
|
|
return 'image/svg+xml'
|
|
|
|
ext = filename.lower().split('.')[-1]
|
|
mime_map = {
|
|
'png': 'image/png',
|
|
'jpg': 'image/jpeg',
|
|
'jpeg': 'image/jpeg',
|
|
'gif': 'image/gif',
|
|
'webp': 'image/webp',
|
|
'svg': 'image/svg+xml',
|
|
}
|
|
return mime_map.get(ext, 'application/octet-stream')
|
|
|
|
def get_file_size_str(size_bytes: int) -> str:
|
|
"""Convert byte count to a human-readable file size string."""
|
|
if size_bytes < 1024:
|
|
return f"{size_bytes} B"
|
|
elif size_bytes < 1024 * 1024:
|
|
return f"{size_bytes / 1024:.1f} KB"
|
|
else:
|
|
return f"{size_bytes / (1024 * 1024):.1f} MB"
|
|
|
|
def _optimize_image_bytes(img_bytes: bytes, mime_type: str,
|
|
compress: bool = False,
|
|
max_dimension: int | None = None) -> bytes:
|
|
"""Optionally compress and/or downscale image bytes.
|
|
|
|
Returns the (possibly optimized) image bytes. Falls back to the
|
|
original bytes if PIL is not available or optimization fails.
|
|
"""
|
|
if not compress and not max_dimension:
|
|
return img_bytes
|
|
|
|
try:
|
|
from PIL import Image as PILImage
|
|
import io
|
|
except ImportError:
|
|
return img_bytes
|
|
|
|
try:
|
|
img = PILImage.open(io.BytesIO(img_bytes))
|
|
except Exception:
|
|
return img_bytes
|
|
|
|
changed = False
|
|
|
|
# Downscale if exceeding max_dimension
|
|
if max_dimension:
|
|
w, h = img.size
|
|
if w > max_dimension or h > max_dimension:
|
|
ratio = min(max_dimension / w, max_dimension / h)
|
|
new_w, new_h = int(w * ratio), int(h * ratio)
|
|
img = img.resize((new_w, new_h), PILImage.LANCZOS)
|
|
changed = True
|
|
|
|
# Compress
|
|
if compress or changed:
|
|
buf = io.BytesIO()
|
|
if mime_type == 'image/jpeg':
|
|
if img.mode in ('RGBA', 'P'):
|
|
img = img.convert('RGB')
|
|
img.save(buf, format='JPEG', quality=85, optimize=True)
|
|
elif mime_type == 'image/png':
|
|
img.save(buf, format='PNG', optimize=True)
|
|
else:
|
|
# For other formats, just re-save
|
|
fmt = img.format or 'PNG'
|
|
img.save(buf, format=fmt)
|
|
|
|
optimized = buf.getvalue()
|
|
# Only use optimized version if it's actually smaller
|
|
if len(optimized) < len(img_bytes):
|
|
return optimized
|
|
|
|
return img_bytes
|
|
|
|
|
|
def embed_images_in_svg(svg_path: str, dry_run: bool = False,
|
|
compress: bool = False,
|
|
max_dimension: int | None = None) -> tuple[int, int]:
|
|
"""
|
|
Convert externally referenced images in an SVG file to Base64 inline format.
|
|
|
|
Args:
|
|
svg_path: SVG file path
|
|
dry_run: If True, only show which images would be processed without modifying the file
|
|
compress: If True, compress images before embedding (JPEG quality=85, PNG optimize)
|
|
max_dimension: If set, downscale images exceeding this dimension on either axis
|
|
|
|
Returns:
|
|
tuple: (number of images processed, file size after embedding)
|
|
"""
|
|
svg_dir = os.path.dirname(os.path.abspath(svg_path))
|
|
|
|
with open(svg_path, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
original_size = len(content.encode('utf-8'))
|
|
|
|
# Match href="xxx.png" or href="xxx.jpg" etc. (exclude those already using data:)
|
|
pattern = r'href="(?!data:)([^"]+\.(png|jpg|jpeg|gif|webp))"'
|
|
|
|
images_found = []
|
|
images_embedded = 0
|
|
|
|
def replace_with_base64(match):
|
|
nonlocal images_embedded
|
|
img_path = match.group(1)
|
|
|
|
# Decode XML/HTML entities (e.g., & -> &)
|
|
import html
|
|
img_path_decoded = html.unescape(img_path)
|
|
|
|
# Handle relative paths
|
|
if not os.path.isabs(img_path_decoded):
|
|
full_path = os.path.join(svg_dir, img_path_decoded)
|
|
else:
|
|
full_path = img_path_decoded
|
|
|
|
if not os.path.exists(full_path):
|
|
print(f" [WARN] Image not found: {img_path}")
|
|
images_found.append((img_path, "NOT FOUND", 0, None))
|
|
return match.group(0)
|
|
|
|
img_size = os.path.getsize(full_path)
|
|
|
|
if dry_run:
|
|
images_found.append((img_path, "WILL EMBED", img_size, None))
|
|
return match.group(0)
|
|
|
|
with open(full_path, 'rb') as img_file:
|
|
img_bytes = img_file.read()
|
|
|
|
mime_type = get_mime_type(img_path, img_bytes)
|
|
optimized_bytes = _optimize_image_bytes(
|
|
img_bytes, mime_type, compress=compress, max_dimension=max_dimension)
|
|
b64_data = base64.b64encode(optimized_bytes).decode('utf-8')
|
|
|
|
images_embedded += 1
|
|
saved = len(img_bytes) - len(optimized_bytes)
|
|
if saved > 0 and (compress or max_dimension):
|
|
pct = saved / len(img_bytes) * 100
|
|
images_found.append((img_path, "EMBEDDED", img_size,
|
|
f"{get_file_size_str(len(img_bytes))} → {get_file_size_str(len(optimized_bytes))}, saved {pct:.0f}%"))
|
|
else:
|
|
images_found.append((img_path, "EMBEDDED", img_size, None))
|
|
|
|
return f'href="data:{mime_type};base64,{b64_data}"'
|
|
|
|
new_content = re.sub(pattern, replace_with_base64, content)
|
|
|
|
new_size = len(new_content.encode('utf-8'))
|
|
|
|
# Print processed images
|
|
if images_found:
|
|
print(f"\n[FILE] {os.path.basename(svg_path)}")
|
|
for img_path, status, size, opt_info in images_found:
|
|
size_str = get_file_size_str(size) if size > 0 else ""
|
|
if status == "EMBEDDED":
|
|
if opt_info:
|
|
print(f" [OK] {img_path} ({opt_info})")
|
|
else:
|
|
print(f" [OK] {img_path} ({size_str})")
|
|
elif status == "WILL EMBED":
|
|
print(f" [PREVIEW] {img_path} ({size_str}) [dry-run]")
|
|
else:
|
|
print(f" [FAIL] {img_path} ({status})")
|
|
|
|
print(f" [SIZE] {get_file_size_str(original_size)} -> {get_file_size_str(new_size)}")
|
|
|
|
if not dry_run and images_embedded > 0:
|
|
with open(svg_path, 'w', encoding='utf-8') as f:
|
|
f.write(new_content)
|
|
|
|
return (images_embedded, new_size)
|
|
|
|
def main() -> None:
|
|
"""Run the CLI entry point."""
|
|
parser = argparse.ArgumentParser(
|
|
description='Convert externally referenced images in SVG files to Base64 inline format',
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog='''
|
|
Examples:
|
|
%(prog)s 01_cover.svg # Process a single file
|
|
%(prog)s *.svg # Process all SVGs in current directory
|
|
%(prog)s --dry-run *.svg # Preview files to be processed
|
|
'''
|
|
)
|
|
parser.add_argument('files', nargs='+', help='SVG files to process')
|
|
parser.add_argument('--dry-run', '-n', action='store_true',
|
|
help='Only show which images would be processed, without modifying files')
|
|
parser.add_argument('--compress', action='store_true',
|
|
help='Compress images before embedding (JPEG quality=85, PNG optimize)')
|
|
parser.add_argument('--max-dimension', type=int, default=None,
|
|
help='Downscale images exceeding this dimension on either axis (e.g., 2560)')
|
|
|
|
args = parser.parse_args()
|
|
|
|
if args.dry_run:
|
|
print("[INFO] Dry-run mode: only preview, no modification\n")
|
|
if args.compress:
|
|
print("[INFO] Compression enabled: JPEG quality=85, PNG optimize")
|
|
if args.max_dimension:
|
|
print(f"[INFO] Max dimension: {args.max_dimension}px")
|
|
|
|
total_images = 0
|
|
total_files = 0
|
|
|
|
for svg_file in args.files:
|
|
if not os.path.exists(svg_file):
|
|
print(f"[ERROR] File not found: {svg_file}")
|
|
continue
|
|
|
|
if not svg_file.endswith('.svg'):
|
|
print(f"[SKIP] Skipping non-SVG file: {svg_file}")
|
|
continue
|
|
|
|
images, _ = embed_images_in_svg(svg_file, dry_run=args.dry_run,
|
|
compress=args.compress,
|
|
max_dimension=args.max_dimension)
|
|
if images > 0:
|
|
total_images += images
|
|
total_files += 1
|
|
|
|
print(f"\n{'=' * 50}")
|
|
if args.dry_run:
|
|
print(f"[PREVIEW] Will process {total_images} images in {total_files} files")
|
|
else:
|
|
print(f"[DONE] Embedded {total_images} images in {total_files} files")
|
|
|
|
if __name__ == '__main__':
|
|
main()
|