cli+core: 加 task 对话导出为 .docx

- core/export_docx.py: 渲染 messages.json 为对话稿,左排小字 + 角色配色
  (user/assistant/tool/tool_call/reasoning),meta 信息表置文档开头,tool
  结果默认前 1000 + 中间省略 + 后 500
- cli.py: 加 `export <task_id>` 子命令(支持 last / -o / --include-system /
  --no-reasoning / --tool-head / --tool-tail)与 REPL `/export [<id>]`
- 默认跳过 system prompt(信息密度低),默认带 reasoning_content(观察价值高)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
caoqianming 2026-05-08 14:42:45 +08:00
parent ae93016442
commit b86c051290
2 changed files with 449 additions and 1 deletions

78
cli.py
View File

@ -14,6 +14,7 @@ from __future__ import annotations
import json import json
import shutil import shutil
import sys import sys
from pathlib import Path
import click import click
from rich.prompt import Prompt from rich.prompt import Prompt
@ -125,7 +126,8 @@ def chat(model: str, workspace: str, resume: str, mode: str, desc: str) -> None:
console.print( console.print(
"[info]/exit 退出 /reset 清空对话(保留 task) /new 开新 task " "[info]/exit 退出 /reset 清空对话(保留 task) /new 开新 task "
"/resume [last|<id>] 切到已有 task /id /status 查看 " "/resume [last|<id>] 切到已有 task /id /status 查看 "
"/done /abandon 改状态 /desc <文本> 设描述[/info]\n" "/done /abandon 改状态 /desc <文本> 设描述 "
"/export [<id>] 导出对话为 .docx[/info]\n"
) )
while True: while True:
@ -249,6 +251,31 @@ def chat(model: str, workspace: str, resume: str, mode: str, desc: str) -> None:
task_state.save(task_dir) task_state.save(task_dir)
console.print(f"[info]description set: {new_desc!r}[/info]") console.print(f"[info]description set: {new_desc!r}[/info]")
continue continue
if cmd.startswith("/export"):
arg = cmd[len("/export"):].strip()
target_dir = task_dir
if arg:
ws_dir = resolve_workspace(workspace)
if arg == "last":
rs = _list_task_rows(ws_dir, limit=1)
if not rs:
console.print("[warn]没有 task 可导出[/warn]")
continue
arg = rs[0][1]
target_dir = tasks_dir(ws_dir) / arg
if not (target_dir / "messages.json").exists():
console.print(
f"[warn]无可导出内容: {target_dir.name} 还没有消息[/warn]"
)
continue
try:
from core.export_docx import export_chat_to_docx
out = export_chat_to_docx(target_dir)
except Exception as e:
console.print(f"[err]导出失败:[/err] {type(e).__name__}: {e}")
continue
console.print(f"[ok]已导出[/ok] -> {out}")
continue
if not cmd: if not cmd:
continue continue
@ -291,6 +318,55 @@ def tasks(workspace: str, limit: int, status: str) -> None:
make_console().print(tbl) make_console().print(tbl)
@cli.command()
@click.argument("task_id")
@click.option("--workspace", default=None, help="工作目录")
@click.option("-o", "--output", default=None,
help="输出 .docx 路径,默认 <task_dir>/chat_<task_id>.docx")
@click.option("--include-system", is_flag=True,
help="包含 system prompt(默认跳过,信息密度低)")
@click.option("--no-reasoning", is_flag=True,
help="不包含 reasoning_content(默认带)")
@click.option("--tool-head", default=1000, type=int,
help="tool 结果保留前 N 字符(默认 1000)")
@click.option("--tool-tail", default=500, type=int,
help="tool 结果保留后 N 字符(默认 500)")
def export(task_id: str, workspace: str, output: str, include_system: bool,
no_reasoning: bool, tool_head: int, tool_tail: int) -> None:
"""把指定 task 的对话导出为 .docx。task_id 用 'last' 取最近一个。"""
from core.export_docx import export_chat_to_docx
console = make_console()
cfg = load_config()
ws = resolve_workspace(workspace, cfg)
if task_id == "last":
rs = _list_task_rows(ws, limit=1)
if not rs:
console.print("[err]没有 task 可导出[/err]")
sys.exit(1)
task_id = rs[0][1]
td = tasks_dir(ws) / task_id
if not (td / "messages.json").exists():
console.print(f"[err]task 不存在或无 messages.json:[/err] {td}")
sys.exit(1)
out = Path(output).resolve() if output else None
try:
path = export_chat_to_docx(
td, out,
include_system=include_system,
include_reasoning=not no_reasoning,
tool_head=tool_head,
tool_tail=tool_tail,
)
except Exception as e:
console.print(f"[err]导出失败:[/err] {type(e).__name__}: {e}")
sys.exit(1)
console.print(f"[ok]导出完成[/ok] -> {path}")
@cli.command() @cli.command()
@click.option("--model", default=None, help="模型档案,如 deepseek_v4.flash 或 deepseek_v4.pro") @click.option("--model", default=None, help="模型档案,如 deepseek_v4.flash 或 deepseek_v4.pro")
@click.option("--long-context", is_flag=True, help="加跑 needle-in-haystack(费 token,默认关)") @click.option("--long-context", is_flag=True, help="加跑 needle-in-haystack(费 token,默认关)")

372
core/export_docx.py Normal file
View File

@ -0,0 +1,372 @@
"""把 task 的 messages.json 渲染为 .docx 对话稿。
布局:
- 文档开头 meta (task_id / 模式 / 描述 / 模型 / 创建时间 / 消息数 / tokens / 导出时间)
- 主体每条消息一组段落,全部左排,小字号,角色用不同颜色加粗区分
- assistant reasoning_content 默认带,灰色斜体
- tool 结果保留前 head + 中间省略 + tail 三段
- tool_calls function + 参数 JSON 单列展示
调用入口:
- 顶层函数 export_chat_to_docx(task_dir, out_path=None, ...)
- CLI 子命令 `python cli.py export <task_id>` REPL `/export [<task_id>]` 都走它
"""
from __future__ import annotations
import json
from datetime import datetime
from pathlib import Path
from typing import Optional
from docx import Document
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.oxml import OxmlElement
from docx.oxml.ns import qn
from docx.shared import Cm, Pt, RGBColor
# ───────────────────────── 配色 ─────────────────────────
# 选 Word 浅底高对比度的 GitHub-ish 色板,不刺眼也能区分
COLOR_USER = RGBColor(0x6F, 0x42, 0xC1) # 紫
COLOR_ASSISTANT = RGBColor(0x1F, 0x6F, 0xEB) # 蓝
COLOR_TOOL_CALL = RGBColor(0xBF, 0x63, 0x10) # 橙(深一点保证可读)
COLOR_TOOL_RESULT = RGBColor(0x1A, 0x7F, 0x37) # 绿
COLOR_REASONING = RGBColor(0x6E, 0x76, 0x81) # 中灰,斜体
COLOR_SYSTEM = RGBColor(0x57, 0x60, 0x6A) # 暗灰
COLOR_META_LABEL = RGBColor(0x57, 0x60, 0x6A)
# ───────────────────────── 字体辅助 ─────────────────────────
def _set_run_fonts(run, *, cn_font: str = "宋体", en_font: str = "Times New Roman") -> None:
rPr = run._element.get_or_add_rPr()
rFonts = rPr.find(qn("w:rFonts"))
if rFonts is None:
rFonts = OxmlElement("w:rFonts")
rPr.append(rFonts)
rFonts.set(qn("w:eastAsia"), cn_font)
rFonts.set(qn("w:ascii"), en_font)
rFonts.set(qn("w:hAnsi"), en_font)
def _preserve_spaces(run) -> None:
"""让 docx 不压缩连续空格 — 代码块/JSON 缩进必须留住。"""
for t in run._element.iter(qn("w:t")):
t.set(qn("xml:space"), "preserve")
# ───────────────────────── 文档骨架 ─────────────────────────
def _init_doc() -> Document:
doc = Document()
section = doc.sections[0]
section.page_height = Cm(29.7)
section.page_width = Cm(21)
section.top_margin = Cm(2.0)
section.bottom_margin = Cm(2.0)
section.left_margin = Cm(2.5)
section.right_margin = Cm(2.0)
normal = doc.styles["Normal"]
normal.font.name = "Times New Roman"
normal.font.size = Pt(9.5)
pf = normal.paragraph_format
pf.line_spacing = 1.3
pf.space_before = Pt(0)
pf.space_after = Pt(0)
pf.first_line_indent = None
return doc
# ───────────────────────── 段落原语 ─────────────────────────
def _add_role_header(doc: Document, label: str, color: RGBColor) -> None:
p = doc.add_paragraph()
pf = p.paragraph_format
pf.first_line_indent = None
pf.space_before = Pt(8)
pf.space_after = Pt(2)
p.alignment = WD_ALIGN_PARAGRAPH.LEFT
run = p.add_run(label)
run.font.size = Pt(10.5)
run.font.bold = True
run.font.color.rgb = color
_set_run_fonts(run, cn_font="黑体", en_font="Consolas")
def _add_text(
doc: Document,
text: str,
*,
color: Optional[RGBColor] = None,
italic: bool = False,
mono: bool = False,
size: Pt = Pt(9.5),
indent_left: Optional[Pt] = None,
) -> None:
"""整段文本输出。保留 \n 换行;mono 用等宽中文(新宋体)+ Consolas。"""
if not text:
return
p = doc.add_paragraph()
pf = p.paragraph_format
pf.first_line_indent = None
pf.line_spacing = 1.25
pf.space_before = Pt(0)
pf.space_after = Pt(2)
if indent_left is not None:
pf.left_indent = indent_left
p.alignment = WD_ALIGN_PARAGRAPH.LEFT
cn_font = "新宋体" if mono else "宋体"
en_font = "Consolas" if mono else "Times New Roman"
lines = text.split("\n")
for i, line in enumerate(lines):
if i > 0:
br = p.add_run()
br.add_break()
run = p.add_run(line)
run.font.size = size
if color is not None:
run.font.color.rgb = color
if italic:
run.italic = True
_set_run_fonts(run, cn_font=cn_font, en_font=en_font)
if mono:
_preserve_spaces(run)
# ───────────────────────── 工具结果裁剪 ─────────────────────────
def _truncate_with_ellipsis(text: str, head: int, tail: int) -> str:
"""前 head + 省略 + 后 tail。整体短于阈值则原样返回。"""
if text is None:
return ""
if len(text) <= head + tail + 80:
return text
omitted = len(text) - head - tail
return f"{text[:head]}\n\n... [omitted {omitted} chars] ...\n\n{text[-tail:]}"
def _format_args(args_str: str) -> str:
"""tool_call 参数若是合法 JSON 就 pretty,否则原样返回。"""
if not args_str:
return ""
try:
parsed = json.loads(args_str)
return json.dumps(parsed, ensure_ascii=False, indent=2)
except Exception:
return args_str
# ───────────────────────── Meta 区块 ─────────────────────────
def _add_meta_block(
doc: Document, meta: dict, task_state: dict, n_msgs: int, source_path: Path
) -> None:
p = doc.add_paragraph()
p.alignment = WD_ALIGN_PARAGRAPH.LEFT
p.paragraph_format.first_line_indent = None
p.paragraph_format.space_before = Pt(0)
p.paragraph_format.space_after = Pt(4)
title = f"Task 对话记录 - {meta.get('id') or task_state.get('task_id') or '?'}"
run = p.add_run(title)
run.font.size = Pt(14)
run.font.bold = True
_set_run_fonts(run, cn_font="黑体", en_font="Consolas")
desc = task_state.get("description") or ""
mode = task_state.get("mode") or ""
status = task_state.get("status") or ""
model = meta.get("model") or task_state.get("model") or ""
profile = meta.get("model_profile") or task_state.get("model_profile") or ""
cwd = meta.get("cwd") or task_state.get("cwd") or ""
created = meta.get("created_at") or task_state.get("created_at") or ""
updated = task_state.get("updated_at") or ""
tp = task_state.get("tokens_prompt", 0)
tc = task_state.get("tokens_completion", 0)
rows = [
("Task ID", meta.get("id") or task_state.get("task_id") or "?"),
("模式", mode),
("描述", desc),
("状态", status),
("模型", model),
("Profile", profile),
("CWD", cwd),
("创建时间", created),
("更新时间", updated),
("消息数", str(n_msgs)),
("Tokens", f"{tp} prompt / {tc} completion / {tp + tc} total"),
("源文件", str(source_path)),
("导出时间", datetime.now().isoformat(timespec="seconds")),
]
table = doc.add_table(rows=len(rows), cols=2)
try:
table.style = "Light Grid Accent 1"
except KeyError:
pass
for ri, (k, v) in enumerate(rows):
c1 = table.rows[ri].cells[0]
c1.text = ""
p1 = c1.paragraphs[0]
p1.paragraph_format.first_line_indent = None
p1.paragraph_format.line_spacing = 1.15
run = p1.add_run(k)
run.font.size = Pt(9)
run.font.bold = True
run.font.color.rgb = COLOR_META_LABEL
_set_run_fonts(run, cn_font="宋体", en_font="Times New Roman")
c2 = table.rows[ri].cells[1]
c2.text = ""
p2 = c2.paragraphs[0]
p2.paragraph_format.first_line_indent = None
p2.paragraph_format.line_spacing = 1.15
run = p2.add_run(str(v) if v else "-")
run.font.size = Pt(9)
_set_run_fonts(run, cn_font="宋体", en_font="Times New Roman")
# ───────────────────────── 单条消息渲染 ─────────────────────────
def _render_message(
doc: Document,
msg: dict,
*,
include_reasoning: bool,
tool_head: int,
tool_tail: int,
) -> None:
role = msg.get("role")
if role == "system":
_add_role_header(doc, "[system]", COLOR_SYSTEM)
content = msg.get("content") or ""
# system prompt 通常 2-5KB,导出时也压一下
content = _truncate_with_ellipsis(content, 1500, 500)
_add_text(doc, content, color=COLOR_SYSTEM, size=Pt(8.5), mono=True)
return
if role == "user":
_add_role_header(doc, "[user]", COLOR_USER)
_add_text(doc, msg.get("content") or "", size=Pt(10))
return
if role == "assistant":
_add_role_header(doc, "[assistant]", COLOR_ASSISTANT)
if include_reasoning:
rc = msg.get("reasoning_content") or ""
if not rc:
psf = msg.get("provider_specific_fields") or {}
rc = psf.get("reasoning_content") or ""
if rc:
_add_text(
doc, "▎reasoning",
color=COLOR_REASONING, size=Pt(8.5), italic=True,
)
_add_text(
doc, rc,
color=COLOR_REASONING, size=Pt(9), italic=True,
indent_left=Pt(12),
)
content = msg.get("content") or ""
if content:
_add_text(doc, content, size=Pt(10))
for call in msg.get("tool_calls") or []:
fn_obj = call.get("function") or {}
fn = fn_obj.get("name", "?")
args = fn_obj.get("arguments", "")
cid = call.get("id", "")
_add_text(
doc, f"▎tool_call -> {fn} ({cid})",
color=COLOR_TOOL_CALL, size=Pt(9), italic=True,
)
_add_text(
doc, _format_args(args),
color=COLOR_TOOL_CALL, size=Pt(8.5), mono=True,
indent_left=Pt(12),
)
return
if role == "tool":
cid = msg.get("tool_call_id", "")
_add_role_header(doc, f"[tool result] ({cid})", COLOR_TOOL_RESULT)
content = msg.get("content") or ""
truncated = _truncate_with_ellipsis(content, tool_head, tool_tail)
_add_text(
doc, truncated,
color=COLOR_TOOL_RESULT, size=Pt(8.5), mono=True,
indent_left=Pt(12),
)
return
# 兜底:未知 role
_add_role_header(doc, f"[{role or 'unknown'}]", COLOR_SYSTEM)
_add_text(doc, msg.get("content") or "", size=Pt(9.5))
# ───────────────────────── 顶层入口 ─────────────────────────
def export_chat_to_docx(
task_dir: Path,
out_path: Optional[Path] = None,
*,
include_system: bool = False,
include_reasoning: bool = True,
tool_head: int = 1000,
tool_tail: int = 500,
) -> Path:
"""渲染 task_dir 下的 messages.json 为 .docx,返回写入路径。
out_path 缺省落到 task_dir/chat_<task_id>.docx
include_system 默认 False(system prompt 信息密度低,默认跳过)
include_reasoning 默认 True(模型思考过程,有观察价值)
tool 结果默认前 1000 + 500,中间省略
"""
msg_path = task_dir / "messages.json"
if not msg_path.exists():
raise FileNotFoundError(f"messages.json 不存在: {msg_path}")
data = json.loads(msg_path.read_text(encoding="utf-8"))
if isinstance(data, list):
meta = {}
messages = data
elif isinstance(data, dict):
meta = data.get("meta") or {}
messages = data.get("messages") or []
else:
raise ValueError(f"messages.json 格式不识别: {type(data).__name__}")
state_path = task_dir / "state.json"
task_state: dict = {}
if state_path.exists():
try:
task_state = json.loads(state_path.read_text(encoding="utf-8")) or {}
except Exception:
task_state = {}
if out_path is None:
tid = meta.get("id") or task_state.get("task_id") or task_dir.name
out_path = task_dir / f"chat_{tid}.docx"
doc = _init_doc()
_add_meta_block(doc, meta, task_state, len(messages), msg_path)
doc.add_paragraph() # 与 meta 表保持一行间距
for msg in messages:
if msg.get("role") == "system" and not include_system:
continue
_render_message(
doc, msg,
include_reasoning=include_reasoning,
tool_head=tool_head,
tool_tail=tool_tail,
)
out_path.parent.mkdir(parents=True, exist_ok=True)
doc.save(str(out_path))
return out_path