From b86c051290e13de143ae90e7b1979ab0f0b8af87 Mon Sep 17 00:00:00 2001 From: caoqianming Date: Fri, 8 May 2026 14:42:45 +0800 Subject: [PATCH] =?UTF-8?q?cli+core:=20=E5=8A=A0=20task=20=E5=AF=B9?= =?UTF-8?q?=E8=AF=9D=E5=AF=BC=E5=87=BA=E4=B8=BA=20.docx?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - core/export_docx.py: 渲染 messages.json 为对话稿,左排小字 + 角色配色 (user/assistant/tool/tool_call/reasoning),meta 信息表置文档开头,tool 结果默认前 1000 + 中间省略 + 后 500 - cli.py: 加 `export ` 子命令(支持 last / -o / --include-system / --no-reasoning / --tool-head / --tool-tail)与 REPL `/export []` - 默认跳过 system prompt(信息密度低),默认带 reasoning_content(观察价值高) Co-Authored-By: Claude Opus 4.7 (1M context) --- cli.py | 78 +++++++++- core/export_docx.py | 372 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 449 insertions(+), 1 deletion(-) create mode 100644 core/export_docx.py diff --git a/cli.py b/cli.py index 1ba6cfb..30ce2c3 100644 --- a/cli.py +++ b/cli.py @@ -14,6 +14,7 @@ from __future__ import annotations import json import shutil import sys +from pathlib import Path import click from rich.prompt import Prompt @@ -125,7 +126,8 @@ def chat(model: str, workspace: str, resume: str, mode: str, desc: str) -> None: console.print( "[info]/exit 退出 /reset 清空对话(保留 task) /new 开新 task " "/resume [last|] 切到已有 task /id /status 查看 " - "/done /abandon 改状态 /desc <文本> 设描述[/info]\n" + "/done /abandon 改状态 /desc <文本> 设描述 " + "/export [] 导出对话为 .docx[/info]\n" ) while True: @@ -249,6 +251,31 @@ def chat(model: str, workspace: str, resume: str, mode: str, desc: str) -> None: task_state.save(task_dir) console.print(f"[info]description set: {new_desc!r}[/info]") continue + if cmd.startswith("/export"): + arg = cmd[len("/export"):].strip() + target_dir = task_dir + if arg: + ws_dir = resolve_workspace(workspace) + if arg == "last": + rs = _list_task_rows(ws_dir, limit=1) + if not rs: + console.print("[warn]没有 task 可导出[/warn]") + continue + arg = rs[0][1] + target_dir = tasks_dir(ws_dir) / arg + if not (target_dir / "messages.json").exists(): + console.print( + f"[warn]无可导出内容: {target_dir.name} 还没有消息[/warn]" + ) + continue + try: + from core.export_docx import export_chat_to_docx + out = export_chat_to_docx(target_dir) + except Exception as e: + console.print(f"[err]导出失败:[/err] {type(e).__name__}: {e}") + continue + console.print(f"[ok]已导出[/ok] -> {out}") + continue if not cmd: continue @@ -291,6 +318,55 @@ def tasks(workspace: str, limit: int, status: str) -> None: make_console().print(tbl) +@cli.command() +@click.argument("task_id") +@click.option("--workspace", default=None, help="工作目录") +@click.option("-o", "--output", default=None, + help="输出 .docx 路径,默认 /chat_.docx") +@click.option("--include-system", is_flag=True, + help="包含 system prompt(默认跳过,信息密度低)") +@click.option("--no-reasoning", is_flag=True, + help="不包含 reasoning_content(默认带)") +@click.option("--tool-head", default=1000, type=int, + help="tool 结果保留前 N 字符(默认 1000)") +@click.option("--tool-tail", default=500, type=int, + help="tool 结果保留后 N 字符(默认 500)") +def export(task_id: str, workspace: str, output: str, include_system: bool, + no_reasoning: bool, tool_head: int, tool_tail: int) -> None: + """把指定 task 的对话导出为 .docx。task_id 用 'last' 取最近一个。""" + from core.export_docx import export_chat_to_docx + + console = make_console() + cfg = load_config() + ws = resolve_workspace(workspace, cfg) + + if task_id == "last": + rs = _list_task_rows(ws, limit=1) + if not rs: + console.print("[err]没有 task 可导出[/err]") + sys.exit(1) + task_id = rs[0][1] + + td = tasks_dir(ws) / task_id + if not (td / "messages.json").exists(): + console.print(f"[err]task 不存在或无 messages.json:[/err] {td}") + sys.exit(1) + + out = Path(output).resolve() if output else None + try: + path = export_chat_to_docx( + td, out, + include_system=include_system, + include_reasoning=not no_reasoning, + tool_head=tool_head, + tool_tail=tool_tail, + ) + except Exception as e: + console.print(f"[err]导出失败:[/err] {type(e).__name__}: {e}") + sys.exit(1) + console.print(f"[ok]导出完成[/ok] -> {path}") + + @cli.command() @click.option("--model", default=None, help="模型档案,如 deepseek_v4.flash 或 deepseek_v4.pro") @click.option("--long-context", is_flag=True, help="加跑 needle-in-haystack(费 token,默认关)") diff --git a/core/export_docx.py b/core/export_docx.py new file mode 100644 index 0000000..a64e4e7 --- /dev/null +++ b/core/export_docx.py @@ -0,0 +1,372 @@ +"""把 task 的 messages.json 渲染为 .docx 对话稿。 + +布局: +- 文档开头 meta 表(task_id / 模式 / 描述 / 模型 / 创建时间 / 消息数 / tokens / 导出时间) +- 主体每条消息一组段落,全部左排,小字号,角色用不同颜色加粗区分 + - assistant 的 reasoning_content 默认带,灰色斜体 + - tool 结果保留前 head + 中间省略 + 后 tail 三段 + - tool_calls 把 function 名 + 参数 JSON 单列展示 + +调用入口: +- 顶层函数 export_chat_to_docx(task_dir, out_path=None, ...) +- CLI 子命令 `python cli.py export ` 与 REPL `/export []` 都走它 +""" +from __future__ import annotations + +import json +from datetime import datetime +from pathlib import Path +from typing import Optional + +from docx import Document +from docx.enum.text import WD_ALIGN_PARAGRAPH +from docx.oxml import OxmlElement +from docx.oxml.ns import qn +from docx.shared import Cm, Pt, RGBColor + + +# ───────────────────────── 配色 ───────────────────────── +# 选 Word 浅底高对比度的 GitHub-ish 色板,不刺眼也能区分 + +COLOR_USER = RGBColor(0x6F, 0x42, 0xC1) # 紫 +COLOR_ASSISTANT = RGBColor(0x1F, 0x6F, 0xEB) # 蓝 +COLOR_TOOL_CALL = RGBColor(0xBF, 0x63, 0x10) # 橙(深一点保证可读) +COLOR_TOOL_RESULT = RGBColor(0x1A, 0x7F, 0x37) # 绿 +COLOR_REASONING = RGBColor(0x6E, 0x76, 0x81) # 中灰,斜体 +COLOR_SYSTEM = RGBColor(0x57, 0x60, 0x6A) # 暗灰 +COLOR_META_LABEL = RGBColor(0x57, 0x60, 0x6A) + + +# ───────────────────────── 字体辅助 ───────────────────────── + +def _set_run_fonts(run, *, cn_font: str = "宋体", en_font: str = "Times New Roman") -> None: + rPr = run._element.get_or_add_rPr() + rFonts = rPr.find(qn("w:rFonts")) + if rFonts is None: + rFonts = OxmlElement("w:rFonts") + rPr.append(rFonts) + rFonts.set(qn("w:eastAsia"), cn_font) + rFonts.set(qn("w:ascii"), en_font) + rFonts.set(qn("w:hAnsi"), en_font) + + +def _preserve_spaces(run) -> None: + """让 docx 不压缩连续空格 — 代码块/JSON 缩进必须留住。""" + for t in run._element.iter(qn("w:t")): + t.set(qn("xml:space"), "preserve") + + +# ───────────────────────── 文档骨架 ───────────────────────── + +def _init_doc() -> Document: + doc = Document() + section = doc.sections[0] + section.page_height = Cm(29.7) + section.page_width = Cm(21) + section.top_margin = Cm(2.0) + section.bottom_margin = Cm(2.0) + section.left_margin = Cm(2.5) + section.right_margin = Cm(2.0) + + normal = doc.styles["Normal"] + normal.font.name = "Times New Roman" + normal.font.size = Pt(9.5) + pf = normal.paragraph_format + pf.line_spacing = 1.3 + pf.space_before = Pt(0) + pf.space_after = Pt(0) + pf.first_line_indent = None + return doc + + +# ───────────────────────── 段落原语 ───────────────────────── + +def _add_role_header(doc: Document, label: str, color: RGBColor) -> None: + p = doc.add_paragraph() + pf = p.paragraph_format + pf.first_line_indent = None + pf.space_before = Pt(8) + pf.space_after = Pt(2) + p.alignment = WD_ALIGN_PARAGRAPH.LEFT + run = p.add_run(label) + run.font.size = Pt(10.5) + run.font.bold = True + run.font.color.rgb = color + _set_run_fonts(run, cn_font="黑体", en_font="Consolas") + + +def _add_text( + doc: Document, + text: str, + *, + color: Optional[RGBColor] = None, + italic: bool = False, + mono: bool = False, + size: Pt = Pt(9.5), + indent_left: Optional[Pt] = None, +) -> None: + """整段文本输出。保留 \n 换行;mono 用等宽中文(新宋体)+ Consolas。""" + if not text: + return + p = doc.add_paragraph() + pf = p.paragraph_format + pf.first_line_indent = None + pf.line_spacing = 1.25 + pf.space_before = Pt(0) + pf.space_after = Pt(2) + if indent_left is not None: + pf.left_indent = indent_left + p.alignment = WD_ALIGN_PARAGRAPH.LEFT + + cn_font = "新宋体" if mono else "宋体" + en_font = "Consolas" if mono else "Times New Roman" + + lines = text.split("\n") + for i, line in enumerate(lines): + if i > 0: + br = p.add_run() + br.add_break() + run = p.add_run(line) + run.font.size = size + if color is not None: + run.font.color.rgb = color + if italic: + run.italic = True + _set_run_fonts(run, cn_font=cn_font, en_font=en_font) + if mono: + _preserve_spaces(run) + + +# ───────────────────────── 工具结果裁剪 ───────────────────────── + +def _truncate_with_ellipsis(text: str, head: int, tail: int) -> str: + """前 head + 省略 + 后 tail。整体短于阈值则原样返回。""" + if text is None: + return "" + if len(text) <= head + tail + 80: + return text + omitted = len(text) - head - tail + return f"{text[:head]}\n\n... [omitted {omitted} chars] ...\n\n{text[-tail:]}" + + +def _format_args(args_str: str) -> str: + """tool_call 参数若是合法 JSON 就 pretty,否则原样返回。""" + if not args_str: + return "" + try: + parsed = json.loads(args_str) + return json.dumps(parsed, ensure_ascii=False, indent=2) + except Exception: + return args_str + + +# ───────────────────────── Meta 区块 ───────────────────────── + +def _add_meta_block( + doc: Document, meta: dict, task_state: dict, n_msgs: int, source_path: Path +) -> None: + p = doc.add_paragraph() + p.alignment = WD_ALIGN_PARAGRAPH.LEFT + p.paragraph_format.first_line_indent = None + p.paragraph_format.space_before = Pt(0) + p.paragraph_format.space_after = Pt(4) + title = f"Task 对话记录 - {meta.get('id') or task_state.get('task_id') or '?'}" + run = p.add_run(title) + run.font.size = Pt(14) + run.font.bold = True + _set_run_fonts(run, cn_font="黑体", en_font="Consolas") + + desc = task_state.get("description") or "" + mode = task_state.get("mode") or "" + status = task_state.get("status") or "" + model = meta.get("model") or task_state.get("model") or "" + profile = meta.get("model_profile") or task_state.get("model_profile") or "" + cwd = meta.get("cwd") or task_state.get("cwd") or "" + created = meta.get("created_at") or task_state.get("created_at") or "" + updated = task_state.get("updated_at") or "" + tp = task_state.get("tokens_prompt", 0) + tc = task_state.get("tokens_completion", 0) + + rows = [ + ("Task ID", meta.get("id") or task_state.get("task_id") or "?"), + ("模式", mode), + ("描述", desc), + ("状态", status), + ("模型", model), + ("Profile", profile), + ("CWD", cwd), + ("创建时间", created), + ("更新时间", updated), + ("消息数", str(n_msgs)), + ("Tokens", f"{tp} prompt / {tc} completion / {tp + tc} total"), + ("源文件", str(source_path)), + ("导出时间", datetime.now().isoformat(timespec="seconds")), + ] + + table = doc.add_table(rows=len(rows), cols=2) + try: + table.style = "Light Grid Accent 1" + except KeyError: + pass + + for ri, (k, v) in enumerate(rows): + c1 = table.rows[ri].cells[0] + c1.text = "" + p1 = c1.paragraphs[0] + p1.paragraph_format.first_line_indent = None + p1.paragraph_format.line_spacing = 1.15 + run = p1.add_run(k) + run.font.size = Pt(9) + run.font.bold = True + run.font.color.rgb = COLOR_META_LABEL + _set_run_fonts(run, cn_font="宋体", en_font="Times New Roman") + + c2 = table.rows[ri].cells[1] + c2.text = "" + p2 = c2.paragraphs[0] + p2.paragraph_format.first_line_indent = None + p2.paragraph_format.line_spacing = 1.15 + run = p2.add_run(str(v) if v else "-") + run.font.size = Pt(9) + _set_run_fonts(run, cn_font="宋体", en_font="Times New Roman") + + +# ───────────────────────── 单条消息渲染 ───────────────────────── + +def _render_message( + doc: Document, + msg: dict, + *, + include_reasoning: bool, + tool_head: int, + tool_tail: int, +) -> None: + role = msg.get("role") + + if role == "system": + _add_role_header(doc, "[system]", COLOR_SYSTEM) + content = msg.get("content") or "" + # system prompt 通常 2-5KB,导出时也压一下 + content = _truncate_with_ellipsis(content, 1500, 500) + _add_text(doc, content, color=COLOR_SYSTEM, size=Pt(8.5), mono=True) + return + + if role == "user": + _add_role_header(doc, "[user]", COLOR_USER) + _add_text(doc, msg.get("content") or "", size=Pt(10)) + return + + if role == "assistant": + _add_role_header(doc, "[assistant]", COLOR_ASSISTANT) + if include_reasoning: + rc = msg.get("reasoning_content") or "" + if not rc: + psf = msg.get("provider_specific_fields") or {} + rc = psf.get("reasoning_content") or "" + if rc: + _add_text( + doc, "▎reasoning", + color=COLOR_REASONING, size=Pt(8.5), italic=True, + ) + _add_text( + doc, rc, + color=COLOR_REASONING, size=Pt(9), italic=True, + indent_left=Pt(12), + ) + content = msg.get("content") or "" + if content: + _add_text(doc, content, size=Pt(10)) + for call in msg.get("tool_calls") or []: + fn_obj = call.get("function") or {} + fn = fn_obj.get("name", "?") + args = fn_obj.get("arguments", "") + cid = call.get("id", "") + _add_text( + doc, f"▎tool_call -> {fn} ({cid})", + color=COLOR_TOOL_CALL, size=Pt(9), italic=True, + ) + _add_text( + doc, _format_args(args), + color=COLOR_TOOL_CALL, size=Pt(8.5), mono=True, + indent_left=Pt(12), + ) + return + + if role == "tool": + cid = msg.get("tool_call_id", "") + _add_role_header(doc, f"[tool result] ({cid})", COLOR_TOOL_RESULT) + content = msg.get("content") or "" + truncated = _truncate_with_ellipsis(content, tool_head, tool_tail) + _add_text( + doc, truncated, + color=COLOR_TOOL_RESULT, size=Pt(8.5), mono=True, + indent_left=Pt(12), + ) + return + + # 兜底:未知 role + _add_role_header(doc, f"[{role or 'unknown'}]", COLOR_SYSTEM) + _add_text(doc, msg.get("content") or "", size=Pt(9.5)) + + +# ───────────────────────── 顶层入口 ───────────────────────── + +def export_chat_to_docx( + task_dir: Path, + out_path: Optional[Path] = None, + *, + include_system: bool = False, + include_reasoning: bool = True, + tool_head: int = 1000, + tool_tail: int = 500, +) -> Path: + """渲染 task_dir 下的 messages.json 为 .docx,返回写入路径。 + + out_path 缺省落到 task_dir/chat_.docx。 + include_system 默认 False(system prompt 信息密度低,默认跳过)。 + include_reasoning 默认 True(模型思考过程,有观察价值)。 + tool 结果默认前 1000 + 后 500,中间省略。 + """ + msg_path = task_dir / "messages.json" + if not msg_path.exists(): + raise FileNotFoundError(f"messages.json 不存在: {msg_path}") + + data = json.loads(msg_path.read_text(encoding="utf-8")) + if isinstance(data, list): + meta = {} + messages = data + elif isinstance(data, dict): + meta = data.get("meta") or {} + messages = data.get("messages") or [] + else: + raise ValueError(f"messages.json 格式不识别: {type(data).__name__}") + + state_path = task_dir / "state.json" + task_state: dict = {} + if state_path.exists(): + try: + task_state = json.loads(state_path.read_text(encoding="utf-8")) or {} + except Exception: + task_state = {} + + if out_path is None: + tid = meta.get("id") or task_state.get("task_id") or task_dir.name + out_path = task_dir / f"chat_{tid}.docx" + + doc = _init_doc() + _add_meta_block(doc, meta, task_state, len(messages), msg_path) + doc.add_paragraph() # 与 meta 表保持一行间距 + + for msg in messages: + if msg.get("role") == "system" and not include_system: + continue + _render_message( + doc, msg, + include_reasoning=include_reasoning, + tool_head=tool_head, + tool_tail=tool_tail, + ) + + out_path.parent.mkdir(parents=True, exist_ok=True) + doc.save(str(out_path)) + return out_path