"""把 task 的 PG messages 表 + state.json 渲染为 .docx 对话稿。 布局: - 文档开头 meta 表(task_id / 模式 / 描述 / 模型 / 创建时间 / 消息数 / tokens / 导出时间) - 主体每条消息一组段落,全部左排,小字号,角色用不同颜色加粗区分 - assistant 的 reasoning_content 默认带,灰色斜体 - tool 结果保留前 head + 中间省略 + 后 tail 三段 - tool_calls 把 function 名 + 参数 JSON 单列展示 调用入口: - 顶层函数 export_chat_to_docx(task_dir, out_path=None, ...) - CLI 子命令 `python cli.py export ` 与 REPL `/export []` 都走它 §7 B Step 2 后:messages 从 PG 读(按 task_id);state.json 还在 task_dir 下(Step 3 删)。 """ from __future__ import annotations import json from datetime import datetime from pathlib import Path from typing import Optional from uuid import UUID from docx import Document from docx.enum.text import WD_ALIGN_PARAGRAPH from docx.oxml import OxmlElement from docx.oxml.ns import qn from docx.shared import Cm, Pt, RGBColor # ───────────────────────── 配色 ───────────────────────── # 选 Word 浅底高对比度的 GitHub-ish 色板,不刺眼也能区分 COLOR_USER = RGBColor(0x6F, 0x42, 0xC1) # 紫 COLOR_ASSISTANT = RGBColor(0x1F, 0x6F, 0xEB) # 蓝 COLOR_TOOL_CALL = RGBColor(0xBF, 0x63, 0x10) # 橙(深一点保证可读) COLOR_TOOL_RESULT = RGBColor(0x1A, 0x7F, 0x37) # 绿 COLOR_REASONING = RGBColor(0x6E, 0x76, 0x81) # 中灰,斜体 COLOR_SYSTEM = RGBColor(0x57, 0x60, 0x6A) # 暗灰 COLOR_META_LABEL = RGBColor(0x57, 0x60, 0x6A) # ───────────────────────── 字体辅助 ───────────────────────── def _set_run_fonts(run, *, cn_font: str = "宋体", en_font: str = "Times New Roman") -> None: rPr = run._element.get_or_add_rPr() rFonts = rPr.find(qn("w:rFonts")) if rFonts is None: rFonts = OxmlElement("w:rFonts") rPr.append(rFonts) rFonts.set(qn("w:eastAsia"), cn_font) rFonts.set(qn("w:ascii"), en_font) rFonts.set(qn("w:hAnsi"), en_font) def _preserve_spaces(run) -> None: """让 docx 不压缩连续空格 — 代码块/JSON 缩进必须留住。""" for t in run._element.iter(qn("w:t")): t.set(qn("xml:space"), "preserve") # ───────────────────────── 文档骨架 ───────────────────────── def _init_doc() -> Document: doc = Document() section = doc.sections[0] section.page_height = Cm(29.7) section.page_width = Cm(21) section.top_margin = Cm(2.0) section.bottom_margin = Cm(2.0) section.left_margin = Cm(2.5) section.right_margin = Cm(2.0) normal = doc.styles["Normal"] normal.font.name = "Times New Roman" normal.font.size = Pt(9.5) pf = normal.paragraph_format pf.line_spacing = 1.3 pf.space_before = Pt(0) pf.space_after = Pt(0) pf.first_line_indent = None return doc # ───────────────────────── 段落原语 ───────────────────────── def _add_role_header(doc: Document, label: str, color: RGBColor) -> None: p = doc.add_paragraph() pf = p.paragraph_format pf.first_line_indent = None pf.space_before = Pt(8) pf.space_after = Pt(2) p.alignment = WD_ALIGN_PARAGRAPH.LEFT run = p.add_run(label) run.font.size = Pt(10.5) run.font.bold = True run.font.color.rgb = color _set_run_fonts(run, cn_font="黑体", en_font="Consolas") def _add_text( doc: Document, text: str, *, color: Optional[RGBColor] = None, italic: bool = False, mono: bool = False, size: Pt = Pt(9.5), indent_left: Optional[Pt] = None, ) -> None: """整段文本输出。保留 \n 换行;mono 用等宽中文(新宋体)+ Consolas。""" if not text: return p = doc.add_paragraph() pf = p.paragraph_format pf.first_line_indent = None pf.line_spacing = 1.25 pf.space_before = Pt(0) pf.space_after = Pt(2) if indent_left is not None: pf.left_indent = indent_left p.alignment = WD_ALIGN_PARAGRAPH.LEFT cn_font = "新宋体" if mono else "宋体" en_font = "Consolas" if mono else "Times New Roman" lines = text.split("\n") for i, line in enumerate(lines): if i > 0: br = p.add_run() br.add_break() run = p.add_run(line) run.font.size = size if color is not None: run.font.color.rgb = color if italic: run.italic = True _set_run_fonts(run, cn_font=cn_font, en_font=en_font) if mono: _preserve_spaces(run) # ───────────────────────── 工具结果裁剪 ───────────────────────── def _truncate_with_ellipsis(text: str, head: int, tail: int) -> str: """前 head + 省略 + 后 tail。整体短于阈值则原样返回。""" if text is None: return "" if len(text) <= head + tail + 80: return text omitted = len(text) - head - tail return f"{text[:head]}\n\n... [omitted {omitted} chars] ...\n\n{text[-tail:]}" def _format_args(args_str: str) -> str: """tool_call 参数若是合法 JSON 就 pretty,否则原样返回。""" if not args_str: return "" try: parsed = json.loads(args_str) return json.dumps(parsed, ensure_ascii=False, indent=2) except Exception: return args_str # ───────────────────────── Meta 区块 ───────────────────────── def _add_meta_block( doc: Document, meta: dict, task_state: dict, n_msgs: int, task_dir: Path ) -> None: p = doc.add_paragraph() p.alignment = WD_ALIGN_PARAGRAPH.LEFT p.paragraph_format.first_line_indent = None p.paragraph_format.space_before = Pt(0) p.paragraph_format.space_after = Pt(4) title = f"Task 对话记录 - {meta.get('id') or task_state.get('task_id') or '?'}" run = p.add_run(title) run.font.size = Pt(14) run.font.bold = True _set_run_fonts(run, cn_font="黑体", en_font="Consolas") desc = task_state.get("description") or "" mode = task_state.get("mode") or "" status = task_state.get("status") or "" model = meta.get("model") or task_state.get("model") or "" profile = meta.get("model_profile") or task_state.get("model_profile") or "" cwd = meta.get("cwd") or task_state.get("cwd") or "" created = meta.get("created_at") or task_state.get("created_at") or "" updated = task_state.get("updated_at") or "" tp = task_state.get("tokens_prompt", 0) tc = task_state.get("tokens_completion", 0) rows = [ ("Task ID", meta.get("id") or task_state.get("task_id") or "?"), ("模式", mode), ("描述", desc), ("状态", status), ("模型", model), ("Profile", profile), ("CWD", cwd), ("创建时间", created), ("更新时间", updated), ("消息数", str(n_msgs)), ("Tokens", f"{tp} prompt / {tc} completion / {tp + tc} total"), ("Task dir", str(task_dir)), ("导出时间", datetime.now().isoformat(timespec="seconds")), ] table = doc.add_table(rows=len(rows), cols=2) try: table.style = "Light Grid Accent 1" except KeyError: pass for ri, (k, v) in enumerate(rows): c1 = table.rows[ri].cells[0] c1.text = "" p1 = c1.paragraphs[0] p1.paragraph_format.first_line_indent = None p1.paragraph_format.line_spacing = 1.15 run = p1.add_run(k) run.font.size = Pt(9) run.font.bold = True run.font.color.rgb = COLOR_META_LABEL _set_run_fonts(run, cn_font="宋体", en_font="Times New Roman") c2 = table.rows[ri].cells[1] c2.text = "" p2 = c2.paragraphs[0] p2.paragraph_format.first_line_indent = None p2.paragraph_format.line_spacing = 1.15 run = p2.add_run(str(v) if v else "-") run.font.size = Pt(9) _set_run_fonts(run, cn_font="宋体", en_font="Times New Roman") # ───────────────────────── 单条消息渲染 ───────────────────────── def _render_message( doc: Document, msg: dict, *, include_reasoning: bool, tool_head: int, tool_tail: int, ) -> None: role = msg.get("role") if role == "system": _add_role_header(doc, "[system]", COLOR_SYSTEM) content = msg.get("content") or "" # system prompt 通常 2-5KB,导出时也压一下 content = _truncate_with_ellipsis(content, 1500, 500) _add_text(doc, content, color=COLOR_SYSTEM, size=Pt(8.5), mono=True) return if role == "user": _add_role_header(doc, "[user]", COLOR_USER) _add_text(doc, msg.get("content") or "", size=Pt(10)) return if role == "assistant": _add_role_header(doc, "[assistant]", COLOR_ASSISTANT) if include_reasoning: rc = msg.get("reasoning_content") or "" if not rc: psf = msg.get("provider_specific_fields") or {} rc = psf.get("reasoning_content") or "" if rc: _add_text( doc, "▎reasoning", color=COLOR_REASONING, size=Pt(8.5), italic=True, ) _add_text( doc, rc, color=COLOR_REASONING, size=Pt(9), italic=True, indent_left=Pt(12), ) content = msg.get("content") or "" if content: _add_text(doc, content, size=Pt(10)) for call in msg.get("tool_calls") or []: fn_obj = call.get("function") or {} fn = fn_obj.get("name", "?") args = fn_obj.get("arguments", "") cid = call.get("id", "") _add_text( doc, f"▎tool_call -> {fn} ({cid})", color=COLOR_TOOL_CALL, size=Pt(9), italic=True, ) _add_text( doc, _format_args(args), color=COLOR_TOOL_CALL, size=Pt(8.5), mono=True, indent_left=Pt(12), ) return if role == "tool": cid = msg.get("tool_call_id", "") _add_role_header(doc, f"[tool result] ({cid})", COLOR_TOOL_RESULT) content = msg.get("content") or "" truncated = _truncate_with_ellipsis(content, tool_head, tool_tail) _add_text( doc, truncated, color=COLOR_TOOL_RESULT, size=Pt(8.5), mono=True, indent_left=Pt(12), ) return # 兜底:未知 role _add_role_header(doc, f"[{role or 'unknown'}]", COLOR_SYSTEM) _add_text(doc, msg.get("content") or "", size=Pt(9.5)) # ───────────────────────── 顶层入口 ───────────────────────── def export_chat_to_docx( task_dir: Path, out_path: Optional[Path] = None, *, include_system: bool = False, include_reasoning: bool = True, tool_head: int = 1000, tool_tail: int = 500, ) -> Path: """渲染 task 对话为 .docx,返回写入路径。 task_dir 名字必须是 UUID(messages 从 PG 按 task_id 读)。state.json 仍在 task_dir 下(Step 3 前)提供 mode/desc/tokens 等 meta。 """ try: tid = UUID(task_dir.name) except ValueError: raise ValueError(f"task_dir name 不是有效 UUID: {task_dir.name}") # 从 PG 读 messages,按 idx 排序 from sqlalchemy import select from core.storage import session_scope from core.storage.models import Message as MessageRow with session_scope() as s: rows = s.execute( select(MessageRow).where(MessageRow.task_id == tid).order_by(MessageRow.idx) ).scalars().all() messages = [dict(r.payload) for r in rows] state_path = task_dir / "state.json" task_state: dict = {} if state_path.exists(): try: task_state = json.loads(state_path.read_text(encoding="utf-8")) or {} except Exception: task_state = {} if out_path is None: out_path = task_dir / f"chat_{tid}.docx" meta = {"id": str(tid), "model": task_state.get("model", ""), "model_profile": task_state.get("model_profile", ""), "cwd": task_state.get("cwd", ""), "created_at": task_state.get("created_at", "")} doc = _init_doc() _add_meta_block(doc, meta, task_state, len(messages), task_dir) doc.add_paragraph() # 与 meta 表保持一行间距 for msg in messages: if msg.get("role") == "system" and not include_system: continue _render_message( doc, msg, include_reasoning=include_reasoning, tool_head=tool_head, tool_tail=tool_tail, ) out_path.parent.mkdir(parents=True, exist_ok=True) doc.save(str(out_path)) return out_path