zcbot/skills/ppt/scripts/svg_finalize/flatten_tspan.py

741 lines
26 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import sys
import re
import argparse
from xml.etree import ElementTree as ET
SVG_NS = "http://www.w3.org/2000/svg"
NSMAP = {"svg": SVG_NS}
# Ensure pretty element names without ns0 prefix on write
ET.register_namespace("", SVG_NS)
TEXT_STYLE_ATTRS = {
# common text styling
"font-family",
"font-size",
"font-weight",
"font-style",
"font-variant",
"font-stretch",
"letter-spacing",
"word-spacing",
"kerning",
"text-anchor",
"text-decoration",
"dominant-baseline",
"writing-mode",
"direction",
# color/paint
"fill",
"fill-opacity",
"stroke",
"stroke-width",
"stroke-opacity",
"opacity",
"paint-order",
# transforms/filters
"transform",
"clip-path",
"filter",
}
num_re = re.compile(r"^[\s,]*([+-]?(?:\d+\.?\d*|\d*\.\d+))")
def parse_first_number(val: str | None) -> float | None:
"""Parse the first numeric token from an SVG attribute value."""
if val is None:
return None
m = num_re.match(val)
if not m:
return None
try:
return float(m.group(1))
except ValueError:
return None
def format_number(n: float | None) -> str | None:
"""Format a float for compact SVG attribute output."""
if n is None:
return None
if abs(n - round(n)) < 1e-6:
return str(int(round(n)))
# Trim trailing zeros
s = f"{n:.6f}".rstrip("0").rstrip(".")
return s
def parse_style(style_str: str | None) -> dict[str, str]:
"""Parse an inline SVG style string into a mapping."""
out: dict[str, str] = {}
if not style_str:
return out
# split by ; and then :
for chunk in style_str.split(";"):
if not chunk.strip():
continue
if ":" in chunk:
k, v = chunk.split(":", 1)
out[k.strip()] = v.strip()
return out
def style_to_string(style_map: dict[str, str]) -> str:
"""Serialize a style mapping back into an inline SVG style string."""
if not style_map:
return ""
return ";".join(f"{k}:{v}" for k, v in style_map.items())
def merge_styles(parent_style: str | None, child_style: str | None) -> str:
"""Merge parent and child inline styles, preferring child values."""
p = parse_style(parent_style)
c = parse_style(child_style)
p.update(c) # child overrides
return style_to_string(p)
def get_attr(elem: ET.Element | None, name: str, default: str | None = None) -> str | None:
"""Read an attribute from an element with a default fallback."""
return elem.get(name) if elem is not None and name in elem.attrib else default
def compute_line_positions(
text_el: ET.Element,
tspan_el: ET.Element,
cur_x: float | None,
cur_y: float | None,
) -> tuple[float | None, float | None]:
"""
Compute absolute x,y for a tspan based on parent <text> current baseline and tspan's x/y/dx/dy.
Returns (new_x, new_y).
"""
del text_el
# Prefer explicit x/y on tspan
t_x_attr = get_attr(tspan_el, "x")
t_y_attr = get_attr(tspan_el, "y")
t_dx_attr = get_attr(tspan_el, "dx")
t_dy_attr = get_attr(tspan_el, "dy")
if t_x_attr is not None:
nx = parse_first_number(t_x_attr)
elif t_dx_attr is not None:
dx = parse_first_number(t_dx_attr) or 0.0
nx = (cur_x or 0.0) + dx
else:
nx = cur_x
if t_y_attr is not None:
ny = parse_first_number(t_y_attr)
elif t_dy_attr is not None:
dy = parse_first_number(t_dy_attr) or 0.0
ny = (cur_y or 0.0) + dy
else:
ny = cur_y
return nx, ny
def collect_text_content(el: ET.Element) -> str:
"""Collect all text content from an element subtree."""
# Gather all text within the element (flatten nested tspans if any)
parts = []
for s in el.itertext():
if s:
parts.append(s)
return "".join(parts)
def copy_text_attrs(
src_el: ET.Element,
dst_el: ET.Element,
exclude: set[str] | None = None,
) -> None:
"""Copy shared text styling attributes between SVG text elements."""
exclude = exclude or set()
# Copy style string first
if "style" in src_el.attrib and "style" not in exclude:
dst_el.set("style", src_el.attrib["style"])
for k in TEXT_STYLE_ATTRS:
if k in exclude:
continue
v = src_el.get(k)
if v is not None:
dst_el.set(k, v)
# xml:space preservation
xml_space = src_el.get("{http://www.w3.org/XML/1998/namespace}space")
if xml_space is not None and "{http://www.w3.org/XML/1998/namespace}space" not in exclude:
dst_el.set("{http://www.w3.org/XML/1998/namespace}space", xml_space)
PARAGRAPH_MARK_ATTR = "data-paragraph-line-height"
PARAGRAPH_SPACE_BEFORE_ATTR = "data-paragraph-space-before"
# Marks a line-break tspan as a SOFT break inside the current paragraph
# (SVG used dy to simulate text wrapping; the downstream converter should
# merge its runs into the previous <a:p> rather than start a new one).
PARAGRAPH_SOFT_BREAK_ATTR = "data-paragraph-soft-break"
# Tolerance for detecting "base line-height" vs "paragraph gap": dy values
# within ±DY_TOLERANCE_PX of each other are considered the same line-height.
DY_TOLERANCE_PX = 0.5
# Cap on dy / base ratio. Anything beyond this (e.g. a 5x gap) is rejected
# as a real section break that shouldn't merge into one text frame.
MAX_DY_MULTIPLIER = 3.0
def _tspan_has_positional_descendant(tspan: ET.Element) -> bool:
"""Return True if any nested tspan inside this one carries x/y/dy."""
for child in list(tspan):
if child.tag != f"{{{SVG_NS}}}tspan":
continue
for k in ("x", "y", "dy"):
if child.get(k) is not None:
return True
if _tspan_has_positional_descendant(child):
return True
return False
def _classify_paragraph_block(
text_el: ET.Element,
is_svg_tag,
is_new_line_tspan,
) -> tuple[float, list[float], list[bool], list[list[ET.Element]]] | None:
"""Detect a mergeable paragraph block.
Returns ``(base_line_height_px, extra_space_before_px_per_line,
is_soft_break_per_line, line_groups)`` if the children form a mergeable paragraph.
Each list has one entry per direct-child tspan (line):
- extra_space_before_px_per_line[i]: extra px above base line-height,
used as <a:spcBef> on the downstream <a:p>. First entry is 0.
- is_soft_break_per_line[i]: True if this line should merge into the
previous <a:p> (SVG dy was simulating word-wrap); False if it starts
a fresh <a:p>. First entry is always False (paragraph head).
Conditions (all must hold):
- No leading text directly under <text>.
- Every direct child is a <tspan>.
- Every logical line starts with a new-line tspan.
- Direct-child inline formatting tspans without x/y/dy are allowed only
after a line starts; they are normalized into the previous line.
- First line-break tspan has dy == 0 (or no dy).
- All subsequent line-break tspans use positive dy (no <y>).
- dy values cluster around a single minimum "base line-height";
any larger dy must be ≤ MAX_DY_MULTIPLIER × base. Anything larger
is treated as a section break and rejected.
- Every line-break tspan that sets x repeats the parent <text>'s x.
- No nested tspan inside any line carries x/y/dy.
"""
base_x = parse_first_number(get_attr(text_el, "x"))
if (text_el.text or "").strip():
return None
direct_tspans = [c for c in list(text_el) if is_svg_tag(c, "tspan")]
direct_children_all = [c for c in list(text_el)]
if len(direct_tspans) < 2:
return None
if len(direct_tspans) != len(direct_children_all):
return None
line_groups: list[list[ET.Element]] = []
for tspan in direct_tspans:
if is_new_line_tspan(tspan):
line_groups.append([tspan])
else:
if not line_groups:
return None
if _tspan_has_positional_descendant(tspan):
return None
line_groups[-1].append(tspan)
if len(line_groups) < 2:
return None
# First pass: validate per-line structural rules and collect dy values.
dy_values: list[float] = [] # one per line (0 for first)
for idx, group in enumerate(line_groups):
tspan = group[0]
t_y = get_attr(tspan, "y")
if t_y is not None:
return None
t_x_raw = get_attr(tspan, "x")
if t_x_raw is not None:
t_x = parse_first_number(t_x_raw)
if base_x is None or t_x is None or abs(t_x - base_x) > 1e-6:
return None
t_dy_raw = get_attr(tspan, "dy")
t_dy = parse_first_number(t_dy_raw) if t_dy_raw is not None else None
if idx == 0:
if t_dy is not None and abs(t_dy) > 1e-6:
return None
dy_values.append(0.0)
else:
if t_dy is None or t_dy <= 0:
return None
dy_values.append(t_dy)
if _tspan_has_positional_descendant(tspan):
return None
# Second pass: pick the base line-height as the minimum positive dy and
# express each line's dy as base + extra space-before.
positive_dys = [d for d in dy_values[1:] if d > 0]
if not positive_dys:
return None
base = min(positive_dys)
extras: list[float] = [0.0] # first line never has space-before
soft_breaks: list[bool] = [False] # first line starts a paragraph
for d in dy_values[1:]:
if d + DY_TOLERANCE_PX < base:
return None # below base — line overlap, not a paragraph
if d > base * MAX_DY_MULTIPLIER + DY_TOLERANCE_PX:
return None # gap too large — treat as section break
extra = d - base
if extra < 0:
extra = 0.0
# dy at the base line-height = soft break (SVG was simulating wrap);
# dy strictly greater than base = hard paragraph break.
is_soft = abs(extra) <= DY_TOLERANCE_PX
extras.append(0.0 if is_soft else extra)
soft_breaks.append(is_soft)
return base, extras, soft_breaks, line_groups
def _emit_mergeable_paragraph(
text_el: ET.Element,
base_dy: float,
extras: list[float],
soft_breaks: list[bool],
line_groups: list[list[ET.Element]],
) -> None:
"""Rewrite text_el in place so it stays a single <text> with paragraph rows.
The base line-height goes on the parent <text> via PARAGRAPH_MARK_ATTR.
Each direct-child tspan is normalized: x/y/dy stripped; inline-run
styling and nested tspans are preserved. Per-tspan attrs:
- PARAGRAPH_SOFT_BREAK_ATTR="1" on tspans that should be appended to
the previous <a:p> downstream (SVG used dy to simulate wrap)
- PARAGRAPH_SPACE_BEFORE_ATTR on tspans that open a new paragraph
with an extra gap (omitted when 0)
"""
text_el.set(PARAGRAPH_MARK_ATTR, format_number(base_dy))
# Normalize authoring variants before the downstream converter reads the
# paragraph: a line-break tspan may be followed by direct-child inline
# formatting tspans. Move those inline runs under the line-break tspan so
# every direct child of <text> is one logical visual line.
normalized_lines: list[ET.Element] = []
for group in line_groups:
line = group[0]
for inline_tspan in group[1:]:
try:
text_el.remove(inline_tspan)
except ValueError:
pass
if inline_tspan.tail and not inline_tspan.tail.strip():
inline_tspan.tail = None
line.append(inline_tspan)
normalized_lines.append(line)
for child in list(text_el):
if child not in normalized_lines:
text_el.remove(child)
extras_iter = iter(extras)
soft_iter = iter(soft_breaks)
for tspan in normalized_lines:
for k in ("x", "y", "dy"):
if k in tspan.attrib:
del tspan.attrib[k]
try:
extra = next(extras_iter)
soft = next(soft_iter)
except StopIteration:
extra = 0.0
soft = False
if soft:
tspan.set(PARAGRAPH_SOFT_BREAK_ATTR, "1")
elif extra > 1e-6:
tspan.set(PARAGRAPH_SPACE_BEFORE_ATTR, format_number(extra))
def flatten_text_with_tspans(
tree: ET.ElementTree,
merge_paragraphs: bool = False,
) -> bool:
"""Flatten multi-line tspan text into independent text nodes when needed.
When ``merge_paragraphs`` is True, mergeable paragraph blocks (same x,
dy clustered around one base line-height) are kept as a single <text>
so downstream conversion emits one editable PowerPoint text frame
with multiple <a:p>. Default False preserves the original behavior:
every line-break tspan becomes its own <text>, matching the SVG's
pixel-fidelity contract.
"""
root = tree.getroot()
parent_map = {c: p for p in root.iter() for c in p}
changed = False
def is_svg_tag(el: ET.Element, name: str) -> bool:
return el.tag == f"{{{SVG_NS}}}{name}"
def is_new_line_tspan(tspan: ET.Element) -> bool:
"""Determine whether a tspan represents a new line (has its own y or non-zero dy)."""
t_dy_attr = get_attr(tspan, "dy")
t_y_attr = get_attr(tspan, "y")
t_x_attr = get_attr(tspan, "x")
dy_val = parse_first_number(t_dy_attr) if t_dy_attr is not None else None
# Has its own y attribute, or has non-zero dy, or has its own x attribute (indicating a new line)
if t_y_attr is not None:
return True
if dy_val is not None and dy_val != 0:
return True
# If tspan has an x attribute and there are preceding sibling tspans, treat it as a new line
if t_x_attr is not None:
return True
return False
# Collect candidates first to avoid modifying while iterating
candidates = []
for el in root.iter():
if is_svg_tag(el, "text"):
has_tspan_child = any(is_svg_tag(c, "tspan") for c in list(el))
if has_tspan_child:
candidates.append(el)
for text_el in candidates:
parent = parent_map.get(text_el)
if parent is None:
continue
# First check whether any tspan needs flattening (dy != 0 or has its own y attribute)
needs_flatten = False
for child in list(text_el):
if not is_svg_tag(child, "tspan"):
continue
if is_new_line_tspan(child):
needs_flatten = True
break
# If no tspan needs a line break, skip the entire text element
if not needs_flatten:
continue
# Paragraph fast-path (opt-in via merge_paragraphs=True): if the
# children form a mergeable paragraph (same x, dy clustered around
# one base line-height with optional paragraph gaps, no nested
# positional tspans), keep as one <text> and let the downstream
# converter emit multiple <a:p> runs. When disabled, every tspan
# gets its own independent <text> so the SVG's exact line layout
# is preserved in PowerPoint.
if merge_paragraphs:
paragraph = _classify_paragraph_block(text_el, is_svg_tag, is_new_line_tspan)
if paragraph is not None:
base_dy, extras, soft_breaks, line_groups = paragraph
_emit_mergeable_paragraph(text_el, base_dy, extras, soft_breaks, line_groups)
changed = True
continue
base_x = parse_first_number(get_attr(text_el, "x")) or 0.0
base_y = parse_first_number(get_attr(text_el, "y")) or 0.0
cur_x, cur_y = base_x, base_y
new_texts = []
# Collect tspan elements belonging to the same line
current_line_tspans = []
current_line_lead_text = None
# Leading text directly under <text>
lead_text = (text_el.text or "").strip()
if lead_text:
current_line_lead_text = lead_text
for idx, child in enumerate(list(text_el)):
if not is_svg_tag(child, "tspan"):
continue
content = collect_text_content(child)
# Check whether this tspan starts a new line
if is_new_line_tspan(child):
# Save previously accumulated same-line tspans first
if current_line_tspans or current_line_lead_text:
ne = _create_text_element_from_line(
text_el, current_line_lead_text, current_line_tspans, cur_x, cur_y
)
new_texts.append(ne)
current_line_tspans = []
current_line_lead_text = None
# Update position
nx, ny = compute_line_positions(text_el, child, cur_x, cur_y)
cur_x, cur_y = nx, ny
# If content is not empty, add to the current line
if content.strip():
current_line_tspans.append(child)
# Process the last line
if current_line_tspans or current_line_lead_text:
ne = _create_text_element_from_line(
text_el, current_line_lead_text, current_line_tspans, cur_x, cur_y
)
new_texts.append(ne)
if new_texts:
# Replace original <text> with the list of new <text> nodes
try:
idx = list(parent).index(text_el)
except ValueError:
idx = None
# Insert in place to preserve drawing order
for i, ne in enumerate(new_texts):
if idx is not None:
parent.insert(idx + i, ne)
else:
parent.append(ne)
# Remove the original <text>
parent.remove(text_el)
changed = True
return changed
def _has_tspan_children(elem: ET.Element) -> bool:
"""Return True if elem contains any nested <tspan> children (inline runs)."""
return any(c.tag == f"{{{SVG_NS}}}tspan" for c in list(elem))
def _copy_inline_tspan(src: ET.Element, strip_line_attrs: bool) -> ET.Element:
"""Deep-copy a tspan as an inline run, preserving nested tspan structure, head text, and tail text.
When strip_line_attrs is True, x/y/dy on the copied tspan are dropped because the
enclosing <text> now positions the line. dx is preserved (safe inline kerning).
Nested tspans are copied recursively without stripping (they are already inline-only).
"""
new = ET.Element(f"{{{SVG_NS}}}tspan")
for k, v in src.attrib.items():
if strip_line_attrs and k in ("x", "y", "dy"):
continue
new.set(k, v)
new.text = src.text
for child in list(src):
if child.tag == f"{{{SVG_NS}}}tspan":
new.append(_copy_inline_tspan(child, strip_line_attrs=False))
new.tail = src.tail
return new
def _create_text_element_from_line(
text_el: ET.Element,
lead_text: str | None,
tspans: list[ET.Element],
x: float | None,
y: float | None,
) -> ET.Element:
"""
Create a text element from a line's content (may contain leading text and multiple tspans).
If there is only one tspan with no nested tspan children and no leading text, the line
collapses to a plain <text>...</text>. Otherwise the tspan structure (including any
nested inline tspans) is preserved so per-run formatting survives the flatten step.
"""
ne = ET.Element(f"{{{SVG_NS}}}text")
# Copy attrs from parent <text>
copy_text_attrs(text_el, ne, exclude={"x", "y"})
ne.set("x", format_number(x))
ne.set("y", format_number(y))
# Transform
p_tf = text_el.get("transform")
if p_tf:
ne.set("transform", p_tf)
# Compact path: a single tspan with no nested inline runs collapses to <text>text</text>
if not lead_text and len(tspans) == 1 and not _has_tspan_children(tspans[0]):
tspan = tspans[0]
content = collect_text_content(tspan)
# Merge style
merged_style = merge_styles(text_el.get("style"), tspan.get("style"))
if merged_style:
ne.set("style", merged_style)
# Override specific attributes from tspan
for attr in TEXT_STYLE_ATTRS:
cv = tspan.get(attr)
if cv is not None:
ne.set(attr, cv)
# Combine transform
c_tf = tspan.get("transform")
if p_tf and c_tf:
ne.set("transform", f"{p_tf} {c_tf}")
elif c_tf:
ne.set("transform", c_tf)
ne.text = content
else:
# Preserve tspan structure, including nested inline tspans and tail text
if lead_text:
ne.text = lead_text
for tspan in tspans:
ne.append(_copy_inline_tspan(tspan, strip_line_attrs=True))
return ne
def process_svg_file(
src_path: str,
dst_path: str,
merge_paragraphs: bool = False,
) -> bool:
"""Flatten eligible tspan lines in one SVG file."""
try:
tree = ET.parse(src_path)
except ET.ParseError as e:
print(f"[WARN] Failed to parse {src_path}: {e}")
return False
changed = flatten_text_with_tspans(tree, merge_paragraphs=merge_paragraphs)
# Ensure destination directory exists
os.makedirs(os.path.dirname(dst_path), exist_ok=True)
# Write out XML without XML declaration to mimic input style
tree.write(dst_path, encoding="utf-8", xml_declaration=False, method="xml")
return changed
def _compute_default_out_base(inp: str) -> str:
"""Compute default output path for directory or file input."""
if os.path.isdir(inp):
# Default: if input ends with svg_output, use sibling svg_output_flattext;
# otherwise append _flattext to the directory name at the same level.
head, tail = os.path.split(os.path.normpath(inp))
if tail == "svg_output":
return os.path.join(head, "svg_output_flattext")
return inp.rstrip("/\\") + "_flattext"
else:
base, ext = os.path.splitext(inp)
return base + "_flattext" + ext
def _interactive_get_paths() -> tuple[str | None, str | None]:
"""
Interactive mode: prompt the user for input path (SVG file or directory)
and optional output path. Returns (inp, out_base) or (None, None) if cancelled.
"""
print("[Interactive mode] No arguments provided; running interactively.")
print("Please enter the path to process (SVG file or directory containing SVGs).")
print("Enter q to quit.\n")
while True:
raw = input("Input path (file/dir): ").strip()
if raw.lower() in {"q", "quit", "exit"} or raw == "":
return None, None
inp = os.path.expanduser(raw)
if os.path.exists(inp):
break
print("Path does not exist. Please re-enter or enter q to quit.")
default_out = _compute_default_out_base(inp)
if os.path.isdir(inp):
prompt = f"Output directory [default: {default_out}]: "
else:
prompt = f"Output file [default: {default_out}]: "
raw_out = input(prompt).strip()
out_base = os.path.expanduser(raw_out) if raw_out else default_out
return inp, out_base
def main() -> None:
"""Run the CLI entry point."""
# CLI parsing with optional interactive mode
parser = argparse.ArgumentParser(
description="Flatten <tspan> lines into multiple <text> nodes for better compatibility.",
add_help=True,
)
parser.add_argument("input", nargs="?", help="Input path: SVG file or directory")
parser.add_argument("output", nargs="?", help="Optional output file/dir")
parser.add_argument(
"-i",
"--interactive",
action="store_true",
help="Run in interactive prompt mode to input paths",
)
parser.add_argument(
"--merge-paragraphs",
action="store_true",
default=False,
help=(
"Opt-in: merge mergeable paragraph blocks (same x, dy clustered "
"around one base line-height) into a single <text> annotated for "
"downstream multi-<a:p> conversion. Default off — every line-break "
"tspan becomes its own <text>, preserving SVG pixel fidelity."
),
)
args = parser.parse_args()
if args.interactive or not args.input:
inp, out_base = _interactive_get_paths()
if not inp:
print("Cancelled. Usage: python3 scripts/svg_finalize/flatten_tspan.py <input_dir_or_svg> [output_dir]")
sys.exit(0)
else:
inp = args.input
out_base = args.output
if os.path.isdir(inp):
# If output base not provided, create a sibling folder named svg_output_flattext for svg_output
if out_base is None:
out_base = _compute_default_out_base(inp)
total = 0
changed_count = 0
out_base_abs = os.path.abspath(out_base)
for root, dirs, files in os.walk(inp):
# Avoid recursing into the output directory when it lives under input
dirs[:] = [d for d in dirs if os.path.abspath(os.path.join(root, d)) != out_base_abs]
rel_root = os.path.relpath(root, inp)
for f in files:
if not f.lower().endswith(".svg"):
continue
src = os.path.join(root, f)
dst = os.path.join(out_base, rel_root, f) if rel_root != "." else os.path.join(out_base, f)
total += 1
changed = process_svg_file(src, dst, merge_paragraphs=args.merge_paragraphs)
if changed:
changed_count += 1
print(f"Processed {total} SVG(s). With <tspan> flattened: {changed_count}.")
print(f"Output written to: {out_base}")
else:
src = inp
if out_base is None:
out_base = _compute_default_out_base(src)
changed = process_svg_file(src, out_base, merge_paragraphs=args.merge_paragraphs)
print(f"Written: {out_base} (flattened: {changed})")
if __name__ == "__main__":
main()