zcbot/skills/ppt/scripts/svg_finalize/flatten_tspan.py

import os
import sys
import re
import argparse
from xml.etree import ElementTree as ET


SVG_NS = "http://www.w3.org/2000/svg"
NSMAP = {"svg": SVG_NS}

# Ensure pretty element names without ns0 prefix on write
ET.register_namespace("", SVG_NS)


TEXT_STYLE_ATTRS = {
    # common text styling
    "font-family",
    "font-size",
    "font-weight",
    "font-style",
    "font-variant",
    "font-stretch",
    "letter-spacing",
    "word-spacing",
    "kerning",
    "text-anchor",
    "text-decoration",
    "dominant-baseline",
    "writing-mode",
    "direction",
    # color/paint
    "fill",
    "fill-opacity",
    "stroke",
    "stroke-width",
    "stroke-opacity",
    "opacity",
    "paint-order",
    # transforms/filters
    "transform",
    "clip-path",
    "filter",
}


num_re = re.compile(r"^[\s,]*([+-]?(?:\d+\.?\d*|\d*\.\d+))")


def parse_first_number(val: str | None) -> float | None:
    """Parse the first numeric token from an SVG attribute value."""
    if val is None:
        return None
    m = num_re.match(val)
    if not m:
        return None
    try:
        return float(m.group(1))
    except ValueError:
        return None


def format_number(n: float | None) -> str | None:
    """Format a float for compact SVG attribute output."""
    if n is None:
        return None
    if abs(n - round(n)) < 1e-6:
        return str(int(round(n)))
    # Trim trailing zeros
    s = f"{n:.6f}".rstrip("0").rstrip(".")
    return s


def parse_style(style_str: str | None) -> dict[str, str]:
    """Parse an inline SVG style string into a mapping."""
    out: dict[str, str] = {}
    if not style_str:
        return out
    # split by ; and then :
    for chunk in style_str.split(";"):
        if not chunk.strip():
            continue
        if ":" in chunk:
            k, v = chunk.split(":", 1)
            out[k.strip()] = v.strip()
    return out


def style_to_string(style_map: dict[str, str]) -> str:
    """Serialize a style mapping back into an inline SVG style string."""
    if not style_map:
        return ""
    return ";".join(f"{k}:{v}" for k, v in style_map.items())


def merge_styles(parent_style: str | None, child_style: str | None) -> str:
    """Merge parent and child inline styles, preferring child values."""
    p = parse_style(parent_style)
    c = parse_style(child_style)
    p.update(c)  # child overrides
    return style_to_string(p)


def get_attr(elem: ET.Element | None, name: str, default: str | None = None) -> str | None:
    """Read an attribute from an element with a default fallback."""
    return elem.get(name) if elem is not None and name in elem.attrib else default


def compute_line_positions(
    text_el: ET.Element,
    tspan_el: ET.Element,
    cur_x: float | None,
    cur_y: float | None,
) -> tuple[float | None, float | None]:
    """
    Compute absolute x,y for a tspan based on parent <text> current baseline and tspan's x/y/dx/dy.
    Returns (new_x, new_y).
    """
    del text_el
    # Prefer explicit x/y on tspan
    t_x_attr = get_attr(tspan_el, "x")
    t_y_attr = get_attr(tspan_el, "y")
    t_dx_attr = get_attr(tspan_el, "dx")
    t_dy_attr = get_attr(tspan_el, "dy")

    if t_x_attr is not None:
        nx = parse_first_number(t_x_attr)
    elif t_dx_attr is not None:
        dx = parse_first_number(t_dx_attr) or 0.0
        nx = (cur_x or 0.0) + dx
    else:
        nx = cur_x

    if t_y_attr is not None:
        ny = parse_first_number(t_y_attr)
    elif t_dy_attr is not None:
        dy = parse_first_number(t_dy_attr) or 0.0
        ny = (cur_y or 0.0) + dy
    else:
        ny = cur_y

    return nx, ny


def collect_text_content(el: ET.Element) -> str:
    """Collect all text content from an element subtree."""
    # Gather all text within the element (flatten nested tspans if any)
    parts = []
    for s in el.itertext():
        if s:
            parts.append(s)
    return "".join(parts)


def copy_text_attrs(
    src_el: ET.Element,
    dst_el: ET.Element,
    exclude: set[str] | None = None,
) -> None:
    """Copy shared text styling attributes between SVG text elements."""
    exclude = exclude or set()
    # Copy style string first
    if "style" in src_el.attrib and "style" not in exclude:
        dst_el.set("style", src_el.attrib["style"])
    for k in TEXT_STYLE_ATTRS:
        if k in exclude:
            continue
        v = src_el.get(k)
        if v is not None:
            dst_el.set(k, v)
    # xml:space preservation
    xml_space = src_el.get("{http://www.w3.org/XML/1998/namespace}space")
    if xml_space is not None and "{http://www.w3.org/XML/1998/namespace}space" not in exclude:
        dst_el.set("{http://www.w3.org/XML/1998/namespace}space", xml_space)


PARAGRAPH_MARK_ATTR = "data-paragraph-line-height"
PARAGRAPH_SPACE_BEFORE_ATTR = "data-paragraph-space-before"
# Marks a line-break tspan as a SOFT break inside the current paragraph
# (SVG used dy to simulate text wrapping; the downstream converter should
# merge its runs into the previous <a:p> rather than start a new one).
PARAGRAPH_SOFT_BREAK_ATTR = "data-paragraph-soft-break"

# Tolerance for detecting "base line-height" vs "paragraph gap": dy values
# within ±DY_TOLERANCE_PX of each other are considered the same line-height.
DY_TOLERANCE_PX = 0.5
# Cap on dy / base ratio. Anything beyond this (e.g. a 5x gap) is rejected
# as a real section break that shouldn't merge into one text frame.
MAX_DY_MULTIPLIER = 3.0


def _tspan_has_positional_descendant(tspan: ET.Element) -> bool:
    """Return True if any nested tspan inside this one carries x/y/dy."""
    for child in list(tspan):
        if child.tag != f"{{{SVG_NS}}}tspan":
            continue
        for k in ("x", "y", "dy"):
            if child.get(k) is not None:
                return True
        if _tspan_has_positional_descendant(child):
            return True
    return False


def _classify_paragraph_block(
    text_el: ET.Element,
    is_svg_tag,
    is_new_line_tspan,
) -> tuple[float, list[float], list[bool], list[list[ET.Element]]] | None:
    """Detect a mergeable paragraph block.

    Returns ``(base_line_height_px, extra_space_before_px_per_line,
    is_soft_break_per_line, line_groups)`` if the children form a mergeable paragraph.
    Each list has one entry per direct-child tspan (line):

      - extra_space_before_px_per_line[i]: extra px above base line-height,
        used as <a:spcBef> on the downstream <a:p>. First entry is 0.
      - is_soft_break_per_line[i]: True if this line should merge into the
        previous <a:p> (SVG dy was simulating word-wrap); False if it starts
        a fresh <a:p>. First entry is always False (paragraph head).

    Conditions (all must hold):
      - No leading text directly under <text>.
      - Every direct child is a <tspan>.
      - Every logical line starts with a new-line tspan.
      - Direct-child inline formatting tspans without x/y/dy are allowed only
        after a line starts; they are normalized into the previous line.
      - First line-break tspan has dy == 0 (or no dy).
      - All subsequent line-break tspans use positive dy (no <y>).
      - dy values cluster around a single minimum "base line-height";
        any larger dy must be ≤ MAX_DY_MULTIPLIER × base. Anything larger
        is treated as a section break and rejected.
      - Every line-break tspan that sets x repeats the parent <text>'s x.
      - No nested tspan inside any line carries x/y/dy.
    """
    base_x = parse_first_number(get_attr(text_el, "x"))
    if (text_el.text or "").strip():
        return None

    direct_tspans = [c for c in list(text_el) if is_svg_tag(c, "tspan")]
    direct_children_all = [c for c in list(text_el)]
    if len(direct_tspans) < 2:
        return None
    if len(direct_tspans) != len(direct_children_all):
        return None

    line_groups: list[list[ET.Element]] = []
    for tspan in direct_tspans:
        if is_new_line_tspan(tspan):
            line_groups.append([tspan])
        else:
            if not line_groups:
                return None
            if _tspan_has_positional_descendant(tspan):
                return None
            line_groups[-1].append(tspan)

    if len(line_groups) < 2:
        return None

    # First pass: validate per-line structural rules and collect dy values.
    dy_values: list[float] = []  # one per line (0 for first)
    for idx, group in enumerate(line_groups):
        tspan = group[0]

        t_y = get_attr(tspan, "y")
        if t_y is not None:
            return None

        t_x_raw = get_attr(tspan, "x")
        if t_x_raw is not None:
            t_x = parse_first_number(t_x_raw)
            if base_x is None or t_x is None or abs(t_x - base_x) > 1e-6:
                return None

        t_dy_raw = get_attr(tspan, "dy")
        t_dy = parse_first_number(t_dy_raw) if t_dy_raw is not None else None

        if idx == 0:
            if t_dy is not None and abs(t_dy) > 1e-6:
                return None
            dy_values.append(0.0)
        else:
            if t_dy is None or t_dy <= 0:
                return None
            dy_values.append(t_dy)

        if _tspan_has_positional_descendant(tspan):
            return None

    # Second pass: pick the base line-height as the minimum positive dy and
    # express each line's dy as base + extra space-before.
    positive_dys = [d for d in dy_values[1:] if d > 0]
    if not positive_dys:
        return None
    base = min(positive_dys)

    extras: list[float] = [0.0]  # first line never has space-before
    soft_breaks: list[bool] = [False]  # first line starts a paragraph
    for d in dy_values[1:]:
        if d + DY_TOLERANCE_PX < base:
            return None  # below base — line overlap, not a paragraph
        if d > base * MAX_DY_MULTIPLIER + DY_TOLERANCE_PX:
            return None  # gap too large — treat as section break
        extra = d - base
        if extra < 0:
            extra = 0.0
        # dy at the base line-height = soft break (SVG was simulating wrap);
        # dy strictly greater than base = hard paragraph break.
        is_soft = abs(extra) <= DY_TOLERANCE_PX
        extras.append(0.0 if is_soft else extra)
        soft_breaks.append(is_soft)

    return base, extras, soft_breaks, line_groups


def _emit_mergeable_paragraph(
    text_el: ET.Element,
    base_dy: float,
    extras: list[float],
    soft_breaks: list[bool],
    line_groups: list[list[ET.Element]],
) -> None:
    """Rewrite text_el in place so it stays a single <text> with paragraph rows.

    The base line-height goes on the parent <text> via PARAGRAPH_MARK_ATTR.
    Each direct-child tspan is normalized: x/y/dy stripped; inline-run
    styling and nested tspans are preserved. Per-tspan attrs:
      - PARAGRAPH_SOFT_BREAK_ATTR="1" on tspans that should be appended to
        the previous <a:p> downstream (SVG used dy to simulate wrap)
      - PARAGRAPH_SPACE_BEFORE_ATTR on tspans that open a new paragraph
        with an extra gap (omitted when 0)
    """
    text_el.set(PARAGRAPH_MARK_ATTR, format_number(base_dy))

    # Normalize authoring variants before the downstream converter reads the
    # paragraph: a line-break tspan may be followed by direct-child inline
    # formatting tspans. Move those inline runs under the line-break tspan so
    # every direct child of <text> is one logical visual line.
    normalized_lines: list[ET.Element] = []
    for group in line_groups:
        line = group[0]
        for inline_tspan in group[1:]:
            try:
                text_el.remove(inline_tspan)
            except ValueError:
                pass
            if inline_tspan.tail and not inline_tspan.tail.strip():
                inline_tspan.tail = None
            line.append(inline_tspan)
        normalized_lines.append(line)

    for child in list(text_el):
        if child not in normalized_lines:
            text_el.remove(child)

    extras_iter = iter(extras)
    soft_iter = iter(soft_breaks)
    for tspan in normalized_lines:
        for k in ("x", "y", "dy"):
            if k in tspan.attrib:
                del tspan.attrib[k]
        try:
            extra = next(extras_iter)
            soft = next(soft_iter)
        except StopIteration:
            extra = 0.0
            soft = False
        if soft:
            tspan.set(PARAGRAPH_SOFT_BREAK_ATTR, "1")
        elif extra > 1e-6:
            tspan.set(PARAGRAPH_SPACE_BEFORE_ATTR, format_number(extra))


def flatten_text_with_tspans(
    tree: ET.ElementTree,
    merge_paragraphs: bool = False,
) -> bool:
    """Flatten multi-line tspan text into independent text nodes when needed.

    When ``merge_paragraphs`` is True, mergeable paragraph blocks (same x,
    dy clustered around one base line-height) are kept as a single <text>
    so downstream conversion emits one editable PowerPoint text frame
    with multiple <a:p>. Default False preserves the original behavior:
    every line-break tspan becomes its own <text>, matching the SVG's
    pixel-fidelity contract.
    """
    root = tree.getroot()
    parent_map = {c: p for p in root.iter() for c in p}
    changed = False

    def is_svg_tag(el: ET.Element, name: str) -> bool:
        return el.tag == f"{{{SVG_NS}}}{name}"

    def is_new_line_tspan(tspan: ET.Element) -> bool:
        """Determine whether a tspan represents a new line (has its own y or non-zero dy)."""
        t_dy_attr = get_attr(tspan, "dy")
        t_y_attr = get_attr(tspan, "y")
        t_x_attr = get_attr(tspan, "x")
        dy_val = parse_first_number(t_dy_attr) if t_dy_attr is not None else None
        # Has its own y attribute, or has non-zero dy, or has its own x attribute (indicating a new line)
        if t_y_attr is not None:
            return True
        if dy_val is not None and dy_val != 0:
            return True
        # If tspan has an x attribute and there are preceding sibling tspans, treat it as a new line
        if t_x_attr is not None:
            return True
        return False

    # Collect candidates first to avoid modifying while iterating
    candidates = []
    for el in root.iter():
        if is_svg_tag(el, "text"):
            has_tspan_child = any(is_svg_tag(c, "tspan") for c in list(el))
            if has_tspan_child:
                candidates.append(el)

    for text_el in candidates:
        parent = parent_map.get(text_el)
        if parent is None:
            continue

        # First check whether any tspan needs flattening (dy != 0 or has its own y attribute)
        needs_flatten = False
        for child in list(text_el):
            if not is_svg_tag(child, "tspan"):
                continue
            if is_new_line_tspan(child):
                needs_flatten = True
                break

        # If no tspan needs a line break, skip the entire text element
        if not needs_flatten:
            continue

        # Paragraph fast-path (opt-in via merge_paragraphs=True): if the
        # children form a mergeable paragraph (same x, dy clustered around
        # one base line-height with optional paragraph gaps, no nested
        # positional tspans), keep as one <text> and let the downstream
        # converter emit multiple <a:p> runs. When disabled, every tspan
        # gets its own independent <text> so the SVG's exact line layout
        # is preserved in PowerPoint.
        if merge_paragraphs:
            paragraph = _classify_paragraph_block(text_el, is_svg_tag, is_new_line_tspan)
            if paragraph is not None:
                base_dy, extras, soft_breaks, line_groups = paragraph
                _emit_mergeable_paragraph(text_el, base_dy, extras, soft_breaks, line_groups)
                changed = True
                continue

        base_x = parse_first_number(get_attr(text_el, "x")) or 0.0
        base_y = parse_first_number(get_attr(text_el, "y")) or 0.0
        cur_x, cur_y = base_x, base_y

        new_texts = []

        # Collect tspan elements belonging to the same line
        current_line_tspans = []
        current_line_lead_text = None

        # Leading text directly under <text>
        lead_text = (text_el.text or "").strip()
        if lead_text:
            current_line_lead_text = lead_text

        for idx, child in enumerate(list(text_el)):
            if not is_svg_tag(child, "tspan"):
                continue

            content = collect_text_content(child)

            # Check whether this tspan starts a new line
            if is_new_line_tspan(child):
                # Save previously accumulated same-line tspans first
                if current_line_tspans or current_line_lead_text:
                    ne = _create_text_element_from_line(
                        text_el, current_line_lead_text, current_line_tspans, cur_x, cur_y
                    )
                    new_texts.append(ne)
                    current_line_tspans = []
                    current_line_lead_text = None

                # Update position
                nx, ny = compute_line_positions(text_el, child, cur_x, cur_y)
                cur_x, cur_y = nx, ny

            # If content is not empty, add to the current line
            if content.strip():
                current_line_tspans.append(child)

        # Process the last line
        if current_line_tspans or current_line_lead_text:
            ne = _create_text_element_from_line(
                text_el, current_line_lead_text, current_line_tspans, cur_x, cur_y
            )
            new_texts.append(ne)

        if new_texts:
            # Replace original <text> with the list of new <text> nodes
            try:
                idx = list(parent).index(text_el)
            except ValueError:
                idx = None

            # Insert in place to preserve drawing order
            for i, ne in enumerate(new_texts):
                if idx is not None:
                    parent.insert(idx + i, ne)
                else:
                    parent.append(ne)

            # Remove the original <text>
            parent.remove(text_el)
            changed = True

    return changed


def _has_tspan_children(elem: ET.Element) -> bool:
    """Return True if elem contains any nested <tspan> children (inline runs)."""
    return any(c.tag == f"{{{SVG_NS}}}tspan" for c in list(elem))


def _copy_inline_tspan(src: ET.Element, strip_line_attrs: bool) -> ET.Element:
    """Deep-copy a tspan as an inline run, preserving nested tspan structure, head text, and tail text.

    When strip_line_attrs is True, x/y/dy on the copied tspan are dropped because the
    enclosing <text> now positions the line. dx is preserved (safe inline kerning).
    Nested tspans are copied recursively without stripping (they are already inline-only).
    """
    new = ET.Element(f"{{{SVG_NS}}}tspan")
    for k, v in src.attrib.items():
        if strip_line_attrs and k in ("x", "y", "dy"):
            continue
        new.set(k, v)
    new.text = src.text
    for child in list(src):
        if child.tag == f"{{{SVG_NS}}}tspan":
            new.append(_copy_inline_tspan(child, strip_line_attrs=False))
    new.tail = src.tail
    return new


def _create_text_element_from_line(
    text_el: ET.Element,
    lead_text: str | None,
    tspans: list[ET.Element],
    x: float | None,
    y: float | None,
) -> ET.Element:
    """
    Create a text element from a line's content (may contain leading text and multiple tspans).
    If there is only one tspan with no nested tspan children and no leading text, the line
    collapses to a plain <text>...</text>. Otherwise the tspan structure (including any
    nested inline tspans) is preserved so per-run formatting survives the flatten step.
    """
    ne = ET.Element(f"{{{SVG_NS}}}text")

    # Copy attrs from parent <text>
    copy_text_attrs(text_el, ne, exclude={"x", "y"})
    ne.set("x", format_number(x))
    ne.set("y", format_number(y))

    # Transform
    p_tf = text_el.get("transform")
    if p_tf:
        ne.set("transform", p_tf)

    # Compact path: a single tspan with no nested inline runs collapses to <text>text</text>
    if not lead_text and len(tspans) == 1 and not _has_tspan_children(tspans[0]):
        tspan = tspans[0]
        content = collect_text_content(tspan)

        # Merge style
        merged_style = merge_styles(text_el.get("style"), tspan.get("style"))
        if merged_style:
            ne.set("style", merged_style)

        # Override specific attributes from tspan
        for attr in TEXT_STYLE_ATTRS:
            cv = tspan.get(attr)
            if cv is not None:
                ne.set(attr, cv)

        # Combine transform
        c_tf = tspan.get("transform")
        if p_tf and c_tf:
            ne.set("transform", f"{p_tf} {c_tf}")
        elif c_tf:
            ne.set("transform", c_tf)

        ne.text = content
    else:
        # Preserve tspan structure, including nested inline tspans and tail text
        if lead_text:
            ne.text = lead_text

        for tspan in tspans:
            ne.append(_copy_inline_tspan(tspan, strip_line_attrs=True))

    return ne


def process_svg_file(
    src_path: str,
    dst_path: str,
    merge_paragraphs: bool = False,
) -> bool:
    """Flatten eligible tspan lines in one SVG file."""
    try:
        tree = ET.parse(src_path)
    except ET.ParseError as e:
        print(f"[WARN] Failed to parse {src_path}: {e}")
        return False

    changed = flatten_text_with_tspans(tree, merge_paragraphs=merge_paragraphs)

    # Ensure destination directory exists
    os.makedirs(os.path.dirname(dst_path), exist_ok=True)

    # Write out XML without XML declaration to mimic input style
    tree.write(dst_path, encoding="utf-8", xml_declaration=False, method="xml")
    return changed


def _compute_default_out_base(inp: str) -> str:
    """Compute default output path for directory or file input."""
    if os.path.isdir(inp):
        # Default: if input ends with svg_output, use sibling svg_output_flattext;
        # otherwise append _flattext to the directory name at the same level.
        head, tail = os.path.split(os.path.normpath(inp))
        if tail == "svg_output":
            return os.path.join(head, "svg_output_flattext")
        return inp.rstrip("/\\") + "_flattext"
    else:
        base, ext = os.path.splitext(inp)
        return base + "_flattext" + ext


def _interactive_get_paths() -> tuple[str | None, str | None]:
    """
    Interactive mode: prompt the user for input path (SVG file or directory)
    and optional output path. Returns (inp, out_base) or (None, None) if cancelled.
    """
    print("[Interactive mode] No arguments provided; running interactively.")
    print("Please enter the path to process (SVG file or directory containing SVGs).")
    print("Enter q to quit.\n")

    while True:
        raw = input("Input path (file/dir): ").strip()
        if raw.lower() in {"q", "quit", "exit"} or raw == "":
            return None, None
        inp = os.path.expanduser(raw)
        if os.path.exists(inp):
            break
        print("Path does not exist. Please re-enter or enter q to quit.")

    default_out = _compute_default_out_base(inp)
    if os.path.isdir(inp):
        prompt = f"Output directory [default: {default_out}]: "
    else:
        prompt = f"Output file [default: {default_out}]: "

    raw_out = input(prompt).strip()
    out_base = os.path.expanduser(raw_out) if raw_out else default_out

    return inp, out_base


def main() -> None:
    """Run the CLI entry point."""
    # CLI parsing with optional interactive mode
    parser = argparse.ArgumentParser(
        description="Flatten <tspan> lines into multiple <text> nodes for better compatibility.",
        add_help=True,
    )
    parser.add_argument("input", nargs="?", help="Input path: SVG file or directory")
    parser.add_argument("output", nargs="?", help="Optional output file/dir")
    parser.add_argument(
        "-i",
        "--interactive",
        action="store_true",
        help="Run in interactive prompt mode to input paths",
    )
    parser.add_argument(
        "--merge-paragraphs",
        action="store_true",
        default=False,
        help=(
            "Opt-in: merge mergeable paragraph blocks (same x, dy clustered "
            "around one base line-height) into a single <text> annotated for "
            "downstream multi-<a:p> conversion. Default off — every line-break "
            "tspan becomes its own <text>, preserving SVG pixel fidelity."
        ),
    )

    args = parser.parse_args()

    if args.interactive or not args.input:
        inp, out_base = _interactive_get_paths()
        if not inp:
            print("Cancelled. Usage: python3 scripts/svg_finalize/flatten_tspan.py <input_dir_or_svg> [output_dir]")
            sys.exit(0)
    else:
        inp = args.input
        out_base = args.output

    if os.path.isdir(inp):
        # If output base not provided, create a sibling folder named svg_output_flattext for svg_output
        if out_base is None:
            out_base = _compute_default_out_base(inp)

        total = 0
        changed_count = 0
        out_base_abs = os.path.abspath(out_base)
        for root, dirs, files in os.walk(inp):
            # Avoid recursing into the output directory when it lives under input
            dirs[:] = [d for d in dirs if os.path.abspath(os.path.join(root, d)) != out_base_abs]
            rel_root = os.path.relpath(root, inp)
            for f in files:
                if not f.lower().endswith(".svg"):
                    continue
                src = os.path.join(root, f)
                dst = os.path.join(out_base, rel_root, f) if rel_root != "." else os.path.join(out_base, f)
                total += 1
                changed = process_svg_file(src, dst, merge_paragraphs=args.merge_paragraphs)
                if changed:
                    changed_count += 1
        print(f"Processed {total} SVG(s). With <tspan> flattened: {changed_count}.")
        print(f"Output written to: {out_base}")
    else:
        src = inp
        if out_base is None:
            out_base = _compute_default_out_base(src)
        changed = process_svg_file(src, out_base, merge_paragraphs=args.merge_paragraphs)
        print(f"Written: {out_base} (flattened: {changed})")


if __name__ == "__main__":
    main()