"""Coordinate helpers, color parsing, and font utilities for DrawingML conversion.""" from __future__ import annotations import re import math from xml.etree import ElementTree as ET from .drawingml_context import AffineMatrix, ConvertContext, IDENTITY_MATRIX # --------------------------------------------------------------------------- # Constants # --------------------------------------------------------------------------- SVG_NS = 'http://www.w3.org/2000/svg' XLINK_NS = 'http://www.w3.org/1999/xlink' EMU_PER_PX = 9525 # 1 SVG px = 9525 EMU (96 DPI) FONT_PX_TO_HUNDREDTHS_PT = 75 # 1px = 0.75pt -> 75 hundredths-of-a-point ANGLE_UNIT = 60000 # DrawingML angle: 60000ths of a degree # SVG attributes inheritable from parent INHERITABLE_ATTRS = [ 'fill', 'stroke', 'stroke-width', 'stroke-dasharray', 'stroke-linecap', 'stroke-linejoin', 'opacity', 'fill-opacity', 'stroke-opacity', 'font-family', 'font-size', 'font-weight', 'font-style', 'text-anchor', 'letter-spacing', 'text-decoration', ] # Known East Asian fonts EA_FONTS = { 'PingFang SC', 'PingFang TC', 'PingFang HK', 'Microsoft YaHei', 'Microsoft JhengHei', 'SimSun', 'SimHei', 'FangSong', 'KaiTi', 'STKaiti', 'STHeiti', 'STSong', 'STFangsong', 'STXihei', 'STZhongsong', 'Hiragino Sans', 'Hiragino Sans GB', 'Hiragino Mincho ProN', 'Hiragino Kaku Gothic ProN', 'Hiragino Kaku Gothic Pro', 'Hiragino Mincho Pro', 'Noto Sans SC', 'Noto Sans TC', 'Noto Serif SC', 'Noto Serif TC', 'Noto Sans JP', 'Noto Serif JP', 'Noto Sans CJK JP', 'Source Han Sans SC', 'Source Han Sans TC', 'Source Han Serif SC', 'Source Han Serif TC', 'Source Han Sans JP', 'Source Han Serif JP', 'WenQuanYi Micro Hei', 'WenQuanYi Zen Hei', 'YouYuan', 'LiSu', 'HuaWenKaiTi', 'Songti SC', 'Songti TC', # Windows 10/11 + Office default / common Simplified Chinese 'DengXian', 'DengXian Light', 'DengXian Bold', 'Microsoft YaHei UI', # Office display Chinese (华文 / 方正) — usually title-only, not on every client 'STXingkai', 'STLiti', 'STXinwei', 'STHupo', 'STCaiyun', 'FZShuTi', 'FZYaoti', # Common Traditional Chinese (Office) 'DFKai-SB', 'MingLiU', 'PMingLiU', 'MingLiU-ExtB', 'PMingLiU-ExtB', 'Microsoft JhengHei UI', # Japanese fonts (Windows-available) 'Yu Gothic', 'Yu Gothic UI', 'Yu Mincho', 'Meiryo', 'Meiryo UI', 'メイリオ', 'MS Gothic', 'MS Mincho', 'MS PGothic', 'MS PMincho', 'MS UI Gothic', # Korean 'Malgun Gothic', 'Gulim', 'Dotum', 'Batang', 'Noto Sans KR', 'Noto Serif KR', } SYSTEM_FONTS = {'system-ui', '-apple-system', 'BlinkMacSystemFont'} # macOS/Linux-only fonts -> Windows equivalents FONT_FALLBACK_WIN = { 'PingFang SC': 'Microsoft YaHei', 'PingFang TC': 'Microsoft JhengHei', 'PingFang HK': 'Microsoft JhengHei', 'Hiragino Sans': 'Microsoft YaHei', 'Hiragino Sans GB': 'Microsoft YaHei', 'Hiragino Mincho ProN': 'SimSun', 'STHeiti': 'SimHei', 'STSong': 'SimSun', 'STKaiti': 'KaiTi', 'STFangsong': 'FangSong', 'STXihei': 'Microsoft YaHei', 'STZhongsong': 'SimSun', 'Songti SC': 'SimSun', 'Songti TC': 'SimSun', 'Noto Sans SC': 'Microsoft YaHei', 'Noto Sans TC': 'Microsoft JhengHei', 'Noto Serif SC': 'SimSun', 'Noto Serif TC': 'SimSun', # Japanese: keep as-is if user specified (PowerPoint will fallback if uninstalled) # 'Noto Sans JP': → keep as 'Noto Sans JP' (do not map) # 'メイリオ': → keep as 'メイリオ' (Meiryo alias) 'メイリオ': 'Meiryo', 'Source Han Sans SC': 'Microsoft YaHei', 'Source Han Sans TC': 'Microsoft JhengHei', 'Source Han Serif SC': 'SimSun', 'Source Han Serif TC': 'SimSun', 'Source Han Sans JP': 'Noto Sans JP', 'Source Han Serif JP': 'Noto Serif JP', 'WenQuanYi Micro Hei': 'Microsoft YaHei', 'WenQuanYi Zen Hei': 'Microsoft YaHei', # Latin fonts (macOS / Linux / Web -> Windows) 'SF Pro': 'Segoe UI', 'SF Pro Display': 'Segoe UI', 'SF Pro Text': 'Segoe UI', 'SF Mono': 'Consolas', 'Menlo': 'Consolas', 'Monaco': 'Consolas', 'Helvetica Neue': 'Arial', 'Helvetica': 'Arial', 'Roboto': 'Segoe UI', 'Ubuntu': 'Segoe UI', 'Liberation Sans': 'Arial', 'Liberation Serif': 'Times New Roman', 'Liberation Mono': 'Consolas', 'DejaVu Sans': 'Segoe UI', 'DejaVu Serif': 'Times New Roman', 'DejaVu Sans Mono': 'Consolas', } GENERIC_FONT_MAP = { 'monospace': 'Consolas', 'sans-serif': 'Segoe UI', 'serif': 'Times New Roman', } # When the latin font is serif and no EA font is specified, # prefer SimSun (serif CJK) over Microsoft YaHei (sans-serif CJK). _SERIF_LATIN = { 'Times New Roman', 'Georgia', 'Garamond', 'Palatino', 'Palatino Linotype', 'Book Antiqua', 'Cambria', 'SimSun', 'Liberation Serif', 'DejaVu Serif', } # SVG stroke-dasharray -> DrawingML prstDash DASH_PRESETS = { '4,4': 'dash', '4 4': 'dash', '6,3': 'dash', '6 3': 'dash', '2,2': 'sysDot', '2 2': 'sysDot', '8,4': 'lgDash', '8 4': 'lgDash', '8,4,2,4': 'lgDashDot', '8 4 2 4': 'lgDashDot', } # --------------------------------------------------------------------------- # Coordinate helpers # --------------------------------------------------------------------------- def px_to_emu(px: float) -> int: """Convert SVG pixels to EMU.""" return round(px * EMU_PER_PX) def _f(val: str | None, default: float = 0.0) -> float: """Parse a float attribute value, returning default if missing.""" if val is None: return default try: return float(val) except (ValueError, TypeError): return default # --------------------------------------------------------------------------- # SVG transform matrix helpers # --------------------------------------------------------------------------- _TRANSFORM_RE = re.compile(r'([a-zA-Z]+)\(([^)]*)\)') _NUMBER_RE = re.compile(r'[-+]?(?:\d*\.\d+|\d+\.?)(?:[eE][-+]?\d+)?') def matrix_multiply(left: AffineMatrix, right: AffineMatrix) -> AffineMatrix: """Compose two SVG affine matrices, applying ``right`` before ``left``.""" a1, b1, c1, d1, e1, f1 = left a2, b2, c2, d2, e2, f2 = right return ( a1 * a2 + c1 * b2, b1 * a2 + d1 * b2, a1 * c2 + c1 * d2, b1 * c2 + d1 * d2, a1 * e2 + c1 * f2 + e1, b1 * e2 + d1 * f2 + f1, ) def _translate_matrix(tx: float, ty: float = 0.0) -> AffineMatrix: return (1.0, 0.0, 0.0, 1.0, tx, ty) def _scale_matrix(sx: float, sy: float | None = None) -> AffineMatrix: return (sx, 0.0, 0.0, sx if sy is None else sy, 0.0, 0.0) def _rotate_matrix(angle_deg: float, cx: float | None = None, cy: float | None = None) -> AffineMatrix: rad = math.radians(angle_deg) cos_a = math.cos(rad) sin_a = math.sin(rad) rot = (cos_a, sin_a, -sin_a, cos_a, 0.0, 0.0) if cx is None or cy is None: return rot return matrix_multiply( matrix_multiply(_translate_matrix(cx, cy), rot), _translate_matrix(-cx, -cy), ) def parse_transform_matrix(transform_str: str) -> AffineMatrix: """Parse an SVG transform list into one affine matrix.""" if not transform_str: return IDENTITY_MATRIX matrix = IDENTITY_MATRIX for name, raw_args in _TRANSFORM_RE.findall(transform_str): args = [float(n) for n in _NUMBER_RE.findall(raw_args)] name = name.lower() local = IDENTITY_MATRIX if name == 'matrix' and len(args) >= 6: local = (args[0], args[1], args[2], args[3], args[4], args[5]) elif name == 'translate' and args: local = _translate_matrix(args[0], args[1] if len(args) > 1 else 0.0) elif name == 'scale' and args: local = _scale_matrix(args[0], args[1] if len(args) > 1 else None) elif name == 'rotate' and args: local = _rotate_matrix( args[0], args[1] if len(args) > 2 else None, args[2] if len(args) > 2 else None, ) matrix = matrix_multiply(matrix, local) return matrix def transform_point(matrix: AffineMatrix, x: float, y: float) -> tuple[float, float]: """Apply an SVG affine matrix to a point.""" a, b, c, d, e, f = matrix return a * x + c * y + e, b * x + d * y + f def rect_to_dml_xfrm( x: float, y: float, w: float, h: float, matrix: AffineMatrix, ) -> tuple[str, int, int, int, int, tuple[int, int, int, int]]: """Map a transformed SVG rectangle to DrawingML xfrm attributes. DrawingML can represent rotated/flipped rectangles, but not arbitrary shear. Template-import picture wrappers only use translate/rotate/scale, so decomposing the transformed local X/Y axes is sufficient here. """ p0 = transform_point(matrix, x, y) p1 = transform_point(matrix, x + w, y) p2 = transform_point(matrix, x + w, y + h) p3 = transform_point(matrix, x, y + h) ux = p1[0] - p0[0] uy = p1[1] - p0[1] vx = p3[0] - p0[0] vy = p3[1] - p0[1] rect_w = max(math.hypot(ux, uy), 0.001) rect_h = max(math.hypot(vx, vy), 0.001) cross = ux * vy - uy * vx if cross < 0: angle_deg = math.degrees(math.atan2(-uy, -ux)) flip_attr = ' flipH="1"' else: angle_deg = math.degrees(math.atan2(uy, ux)) flip_attr = '' rot = round(angle_deg * ANGLE_UNIT) rot_attr = f' rot="{rot}"' if rot else '' center_x = (p0[0] + p2[0]) / 2 center_y = (p0[1] + p2[1]) / 2 off_x = px_to_emu(center_x - rect_w / 2) off_y = px_to_emu(center_y - rect_h / 2) ext_cx = px_to_emu(rect_w) ext_cy = px_to_emu(rect_h) xs = [p0[0], p1[0], p2[0], p3[0]] ys = [p0[1], p1[1], p2[1], p3[1]] bounds = ( px_to_emu(min(xs)), px_to_emu(min(ys)), px_to_emu(max(xs)), px_to_emu(max(ys)), ) return f'{flip_attr}{rot_attr}', off_x, off_y, ext_cx, ext_cy, bounds def _extract_inheritable_styles(elem: ET.Element) -> dict[str, str]: """Extract all SVG-inheritable presentation attributes from an element.""" styles: dict[str, str] = {} for attr in INHERITABLE_ATTRS: val = elem.get(attr) if val is not None: styles[attr] = val return styles def _get_attr(elem: ET.Element, attr: str, ctx: ConvertContext) -> str | None: """Get effective attribute: element's own value first, then inherited.""" val = elem.get(attr) if val is not None: return val return ctx.inherited_styles.get(attr) def ctx_x(val: float, ctx: ConvertContext) -> float: """Apply context scale + translate to an X coordinate.""" return val * ctx.scale_x + ctx.translate_x def ctx_y(val: float, ctx: ConvertContext) -> float: """Apply context scale + translate to a Y coordinate.""" return val * ctx.scale_y + ctx.translate_y def ctx_w(val: float, ctx: ConvertContext) -> float: """Apply context scale to a width value.""" return val * ctx.scale_x def ctx_h(val: float, ctx: ConvertContext) -> float: """Apply context scale to a height value.""" return val * ctx.scale_y # --------------------------------------------------------------------------- # Color / style parsing # --------------------------------------------------------------------------- def parse_hex_color(color_str: str) -> str | None: """Parse '#RRGGBB' or '#RGB' to 'RRGGBB'. Returns None on failure.""" if not color_str: return None color_str = color_str.strip() if color_str.startswith('#'): color_str = color_str[1:] if len(color_str) == 3: color_str = ''.join(c * 2 for c in color_str) if len(color_str) == 6 and all(c in '0123456789abcdefABCDEF' for c in color_str): return color_str.upper() return None def parse_stop_style(style_str: str) -> tuple[str | None, float]: """Parse a gradient stop's style attribute. Args: style_str: Style string like 'stop-color:#XXX;stop-opacity:N'. Returns: (color, opacity) tuple. """ color = None opacity = 1.0 if not style_str: return color, opacity for part in style_str.split(';'): part = part.strip() if part.startswith('stop-color:'): color = parse_hex_color(part.split(':', 1)[1].strip()) elif part.startswith('stop-opacity:'): try: opacity = float(part.split(':', 1)[1].strip()) except ValueError: pass return color, opacity def resolve_url_id(url_str: str) -> str | None: """Extract ID from 'url(#someId)' reference.""" if not url_str: return None m = re.match(r'url\(#([^)]+)\)', url_str.strip()) return m.group(1) if m else None def get_effective_filter_id(elem: ET.Element, ctx: ConvertContext) -> str | None: """Get the effective filter ID for an element, including inherited context.""" filt = elem.get('filter') if filt: return resolve_url_id(filt) return ctx.filter_id # --------------------------------------------------------------------------- # Font parsing # --------------------------------------------------------------------------- def parse_font_family(font_family_str: str) -> dict[str, str]: """Parse CSS font-family into latin/ea typeface names. Prioritizes Windows-available fonts since PPTX is primarily opened on Windows. macOS/Linux-only fonts are mapped via FONT_FALLBACK_WIN. """ if not font_family_str: return {'latin': 'Segoe UI', 'ea': 'Microsoft YaHei'} fonts = [f.strip().strip("'\"") for f in font_family_str.split(',')] latin_font = None ea_font = None for font in fonts: if font in SYSTEM_FONTS: continue if font in GENERIC_FONT_MAP: resolved = GENERIC_FONT_MAP[font] latin_font = latin_font or resolved continue win_font = FONT_FALLBACK_WIN.get(font, font) if font in EA_FONTS: ea_font = ea_font or win_font else: latin_font = latin_font or win_font # PPT renders CJK text via latin typeface when ea doesn't match if not latin_font and ea_font: latin_font = ea_font final_latin = latin_font or 'Segoe UI' # EA must always be a CJK-capable font if not ea_font: ea_font = 'SimSun' if final_latin in _SERIF_LATIN else 'Microsoft YaHei' return {'latin': final_latin, 'ea': ea_font} def is_cjk_char(ch: str) -> bool: """Check if a character is CJK (Chinese/Japanese/Korean).""" cp = ord(ch) return (0x4E00 <= cp <= 0x9FFF or 0x3400 <= cp <= 0x4DBF or 0x2E80 <= cp <= 0x2EFF or 0x3000 <= cp <= 0x303F or 0xFF00 <= cp <= 0xFFEF or 0xF900 <= cp <= 0xFAFF or 0x20000 <= cp <= 0x2A6DF) def detect_text_lang(text: str) -> str: """Return a DrawingML language tag for a text run.""" return 'zh-CN' if any(is_cjk_char(ch) for ch in text) else 'en-US' def resolve_text_run_fonts(text: str, fonts: dict[str, str]) -> dict[str, str]: """Return DrawingML latin/ea/cs typefaces for one text run.""" latin = fonts['latin'] if detect_text_lang(text) == 'zh-CN': ea = fonts['ea'] else: ea = latin return {'latin': latin, 'ea': ea, 'cs': latin} def estimate_text_width(text: str, font_size: float, font_weight: str = '400') -> float: """Estimate text width in SVG pixels.""" width = 0.0 for ch in text: if is_cjk_char(ch): width += font_size elif ch == ' ': width += font_size * 0.3 elif ch in 'mMwWOQ%': width += font_size * 0.75 elif ch in 'iIlj!|': width += font_size * 0.3 elif ch.isdigit(): # digits are tabular (uniform ~0.55em) in most UI fonts, including # '1' — classing it with 'il|' under-sizes the box and makes # renderers that ignore wrap="none" (LibreOffice) wrap the line width += font_size * 0.55 else: width += font_size * 0.55 if font_weight in ('bold', '600', '700', '800', '900'): width *= 1.05 return width def _xml_escape(text: str) -> str: """Escape XML special characters.""" return (text.replace('&', '&') .replace('<', '<') .replace('>', '>') .replace('"', '"'))