"""Markdown to plain text conversion and notes slide XML generation.""" from __future__ import annotations import re from .drawingml_utils import detect_text_lang def markdown_to_plain_text(md_content: str) -> str: """Convert Markdown notes to plain text for PPTX notes. Args: md_content: Markdown formatted notes content. Returns: Plain text content. """ def strip_inline_bold(text: str) -> str: text = re.sub(r'\*\*(.+?)\*\*', r'\1', text) text = re.sub(r'__(.+?)__', r'\1', text) return text lines: list[str] = [] for line in md_content.split('\n'): if line.startswith('#'): text = re.sub(r'^#+\s*', '', line).strip() text = strip_inline_bold(text) if text: lines.append(text) lines.append('') elif line.strip().startswith('- '): item_text = line.strip()[2:] item_text = strip_inline_bold(item_text) lines.append('• ' + item_text) elif line.strip(): text = strip_inline_bold(line.strip()) lines.append(text) else: lines.append('') # Merge consecutive empty lines result: list[str] = [] is_prev_empty = False for line in lines: if line == '': if not is_prev_empty: result.append(line) is_prev_empty = True else: result.append(line) is_prev_empty = False return '\n'.join(result).strip() def create_notes_slide_xml(slide_num: int, notes_text: str) -> str: """Create notes slide XML. Args: slide_num: Slide number. notes_text: Notes text in plain text format. Returns: Notes slide XML string. """ notes_text = (notes_text .replace('&', '&') .replace('<', '<') .replace('>', '>')) paragraphs: list[str] = [] for para in notes_text.split('\n'): if para.strip(): lang = detect_text_lang(para) paragraphs.append(f''' {para} ''') else: paragraphs.append('') paragraphs_xml = ( '\n '.join(paragraphs) if paragraphs else '' ) return f''' {paragraphs_xml} ''' def create_notes_slide_rels_xml(slide_num: int) -> str: """Create notes slide relationship file XML. Args: slide_num: Slide number. Returns: Relationship file XML string. """ # No notesMaster relationship: the base PPTX produced by python-pptx does # not ship a notesMaster part, so referencing one here would create a # dangling rels Target and PowerPoint reports the file as corrupt. return f''' '''