diff --git a/PROGRESS.md b/PROGRESS.md index 94969b8..866700f 100644 --- a/PROGRESS.md +++ b/PROGRESS.md @@ -21,6 +21,9 @@ ## 已完成关键能力 +### 2026-07-03 / 进度条自愈:回放层强制单调完成(d1285247 复盘,bump 0.36.2) +用户报 task d1285247(ppt生成3)进度条反常:后面步(质检/导出)打绿勾、前面步(摄取素材/配图)却卡红圈"…",顶部"4/6"。诊断脚本 `scripts/diag_progress_d1285247.py` 拉出 `task_progress` 调用序列定位**非渲染 bug**——`progress.js` 忠实回放了模型发的调用:模型每次推进是"标下一步 completed + 再下一步 in_progress"的跳步,**每次都漏给上一次留在 in_progress 的那步补 completed**(s1、s3 被漏),回放到最后就是 `s1=in_progress,s2=completed,s3=in_progress,s4/s5/s6=completed`。根因是模型用工具收尾不稳,纯提示拦不住(与门体系教训同构)。修在**回放层加确定性单调不变量**:`enforceMonotonicProgress`——checklist 线性推进,只要某步 completed,其之前所有步自动视为 completed;`applyProgressAction` 的 set_plan / update_step 两条出口都过一遍,漏发自愈。前端单测加 3 条(含复刻 d1285247 跳步序列 → 6/6)。已知边界:假设步骤线性顺序(现有所有 skill 成立);若将来出现真·并行/乱序 checklist 会被抹平。 + ### 2026-07-03 / ppt 门体系二轮硬化:逃生口收紧 + 导出自动质检 + svg_final 嵌图修复(139a59c5 重跑复盘,bump 0.36.1) 0.36.0 上线后同 task 重跑(仍 deepseek-v4-flash):产物整体大幅好转,但仍有 4/25 页错位(P12 色带裁两行标题+正文跑出卡外 / P14·P18 文字骑卡片边框 / P21 手画饼图弧线劈叉)。轨迹显示**两道新门都触发了、都被模型 8 秒内用逃生口按过去**:质检+渲图验收 0 调用,`--allow-iconless` + `--allow-unreviewed` 连按直接导出——门有了,逃生口对弱模型等于"报错时该加的参数"。且 `--allow-iconless` 的"正当理由"是我们自己给的:wrapper docstring 老示例教它 `-s final`,而图标门检查的是 svg_final(data-icon 已展开)→ 误报零图标;`-s final` 还连锁出图片路径连环坑(见 F)。二轮修五处:**A 验收门分层**——"从没渲过/渲后又改/finalize 前渲的"为硬问题,**任何 CLI flag 不豁免**(渲图便宜且机器可验,没理由交付没人能看过的页);`--allow-unreviewed` 只豁免"渲过但没标 pass";运维兜底走 `ZCBOT_PPT_FORCE_EXPORT=1` 环境变量(不进 --help/SKILL)。**B 拔 `-s final` 雷**——图标门永远对 svg_output 源检测(误报根除);wrapper docstring 示例去掉 `-s final` 并注明勿用。**C 导出自动质检门**——svg_to_pptx 导出前内嵌复跑 quality checker 逐页硬错误(坏 XML/禁用特性/图片缺失/几何 error),error 拒绝导出、无豁免参数(fail-open 于 import 失败)——"忘跑/不跑质检"从此无效。**D** 验收门报错计数措辞修正。**E 几何质检加"文字骑卡片边缘"检测**(warning 带坐标:文字与可见矩形交叠面积占比 0.2–0.85 即骑边,P12/P14/P18 三类当场可命中;P21 饼图弧线错误静态无解,只能渲图过目)。**F 修 svg_final 嵌图失效 bug**——finalize 先 copytree 到 `.build/svg_final` 再就地嵌图,`../images/` 从 svg_final 解析必落空 → **所有 deck 的 svg_final 一直嵌不进外链图**(渲图验收 PNG 里图片也是空的);`_resolve_image_path` 加"rebase 回 svg_output 同相对路径"兜底,实测 data:URI 落位。本机全链路回归:未渲→硬拒(带 flag 也拒)/ pending→拒、flag 放 / pass→放行 / 质检 error→拒 / env 强制→放;71 charts 模板几何 0 error。已知边界:P21 类"图形画错但不重叠不越界"仍只有渲图过目能拦——"看没看"无法机器验证,治本要平台层 vision 验收(待做,同 0.35.1 备注)。 diff --git a/core/__init__.py b/core/__init__.py index 3cccbb5..b698ffa 100644 --- a/core/__init__.py +++ b/core/__init__.py @@ -1,3 +1,3 @@ # zcbot 版本号单一事实源:web/app.py 的 FastAPI version、/healthz 返回、前端展示都引这里。 # 改版本只动这一行。 -__version__ = "0.36.1" +__version__ = "0.36.2" diff --git a/scripts/diag_progress_d1285247.py b/scripts/diag_progress_d1285247.py new file mode 100644 index 0000000..605b039 --- /dev/null +++ b/scripts/diag_progress_d1285247.py @@ -0,0 +1,56 @@ +"""Dump the task_progress tool-call sequence for a task (by id prefix). ASCII-only.""" +import json +import os +import sys +from pathlib import Path + +env = Path(__file__).resolve().parent.parent / ".env" +for line in env.read_text(encoding="utf-8").splitlines(): + if line.strip().startswith("ZCBOT_DB_URL="): + os.environ["ZCBOT_DB_URL"] = line.split("=", 1)[1].strip() +from sqlalchemy import create_engine, text # noqa: E402 + +engine = create_engine(os.environ["ZCBOT_DB_URL"]) +prefix = sys.argv[1] if len(sys.argv) > 1 else "d1285247" + +with engine.connect() as conn: + row = conn.execute( + text("select task_id,name,status,run_status from tasks where task_id::text like :p"), + {"p": prefix + "%"}, + ).fetchone() + if not row: + print("[NO TASK]", prefix) + sys.exit(1) + tid = row[0] + print(f"[TASK] {tid} name={row[1]!r} status={row[2]} run={row[3]}") + + msgs = conn.execute( + text("select idx,payload from messages where task_id=:t order by idx"), + {"t": tid}, + ).fetchall() + print(f"[MESSAGES] {len(msgs)}") + + n = 0 + for idx, p in msgs: + for tc in p.get("tool_calls") or []: + fn = tc.get("function") or {} + if fn.get("name") != "task_progress": + continue + n += 1 + try: + args = json.loads(fn.get("arguments") or "{}") + except Exception as e: + print(f" [{idx}] PARSE-ERR: {e} raw={fn.get('arguments')!r}") + continue + act = args.get("action") + if act == "set_plan": + steps = args.get("steps") or [] + print(f" [{idx}] set_plan ({len(steps)} steps):") + for st in steps: + print(f" {st.get('id')!r:8} {st.get('status'):11} {st.get('title')!r}") + elif act == "update_step": + st = args.get("step") or {} + print(f" [{idx}] update_step id={st.get('id')!r} status={st.get('status')!r} title={st.get('title')!r}") + else: + print(f" [{idx}] {act} {json.dumps(args, ensure_ascii=False)}") + print(f"[task_progress calls] {n}") diff --git a/tests/frontend_task_progress.test.mjs b/tests/frontend_task_progress.test.mjs index bd69082..a4ffd24 100644 --- a/tests/frontend_task_progress.test.mjs +++ b/tests/frontend_task_progress.test.mjs @@ -3,6 +3,7 @@ import test from "node:test"; import { applyProgressAction, + enforceMonotonicProgress, progressActionsFromToolCalls, } from "../web/static/js/progress.js"; @@ -49,3 +50,70 @@ test("tool calls can apply progress updates on top of previous task progress", ( { id: "s2", title: "实现功能", status: "pending" }, ]); }); + +test("a completed step force-completes earlier dangling steps (monotonic heal)", () => { + const steps = [ + { id: "s1", title: "摄取素材", status: "in_progress" }, + { id: "s2", title: "策略", status: "completed" }, + { id: "s3", title: "配图", status: "pending" }, + ]; + assert.deepEqual(enforceMonotonicProgress(steps), [ + { id: "s1", title: "摄取素材", status: "completed" }, + { id: "s2", title: "策略", status: "completed" }, + { id: "s3", title: "配图", status: "pending" }, + ]); +}); + +test("update_step marking a later step completed heals the earlier in_progress step", () => { + const initial = applyProgressAction([], { + action: "set_plan", + steps: [ + { id: "s1", title: "摄取素材", status: "in_progress" }, + { id: "s2", title: "策略", status: "pending" }, + { id: "s3", title: "配图", status: "pending" }, + ], + }); + + const updated = applyProgressAction(initial, { + action: "update_step", + step: { id: "s2", status: "completed" }, + }); + + assert.deepEqual(updated, [ + { id: "s1", title: "摄取素材", status: "completed" }, + { id: "s2", title: "策略", status: "completed" }, + { id: "s3", title: "配图", status: "pending" }, + ]); +}); + +// Replay the exact d1285247 task_progress sequence that produced the reported +// "green check below, red dot above" bug; the heal must yield a clean 6/6. +test("replays the d1285247 skip-ahead sequence to a fully completed plan", () => { + const call = (args) => ({ + function: { name: "task_progress", arguments: JSON.stringify(args) }, + }); + const seq = [ + call({ + action: "set_plan", + steps: [ + { id: "s1", title: "摄取素材", status: "in_progress" }, + { id: "s2", title: "策略", status: "pending" }, + { id: "s3", title: "配图", status: "pending" }, + { id: "s4", title: "执行", status: "pending" }, + { id: "s5", title: "质检", status: "pending" }, + { id: "s6", title: "导出", status: "pending" }, + ], + }), + call({ action: "update_step", step: { id: "s2", status: "completed" } }), + call({ action: "update_step", step: { id: "s3", status: "in_progress" } }), + call({ action: "update_step", step: { id: "s4", status: "completed" } }), + call({ action: "update_step", step: { id: "s5", status: "in_progress" } }), + call({ action: "update_step", step: { id: "s5", status: "completed" } }), + call({ action: "update_step", step: { id: "s6", status: "completed" } }), + ]; + + const { steps } = progressActionsFromToolCalls(seq, []); + assert.deepEqual(steps.map(s => s.status), [ + "completed", "completed", "completed", "completed", "completed", "completed", + ]); +}); diff --git a/web/static/js/progress.js b/web/static/js/progress.js index 12a5c34..6831f17 100644 --- a/web/static/js/progress.js +++ b/web/static/js/progress.js @@ -18,13 +18,30 @@ export function normalizeProgressStep(step) { return { id, title, status }; } +// The checklist is a linear progress bar: work advances top-to-bottom. Models +// don't always send a `completed` update before moving the in_progress marker +// on (observed: later steps marked done while earlier ones dangle at +// in_progress), which renders as "green check below, red dot above". Enforce +// monotonic completion — any step before the last completed one is completed +// too — so a missed update self-heals instead of stranding earlier steps. +export function enforceMonotonicProgress(steps) { + if (!Array.isArray(steps)) return []; + let lastCompleted = -1; + for (let i = 0; i < steps.length; i++) { + if (steps[i] && steps[i].status === "completed") lastCompleted = i; + } + if (lastCompleted <= 0) return steps.map(s => ({ ...s })); + return steps.map((s, i) => (i < lastCompleted ? { ...s, status: "completed" } : { ...s })); +} + export function applyProgressAction(progress, args) { const current = cloneProgressSteps(progress); if (!args || typeof args !== "object") return current; const action = args.action || ""; if (action === "clear") return []; if (action === "set_plan") { - return Array.isArray(args.steps) ? args.steps.map(normalizeProgressStep).filter(Boolean) : []; + const planned = Array.isArray(args.steps) ? args.steps.map(normalizeProgressStep).filter(Boolean) : []; + return enforceMonotonicProgress(planned); } if (action === "update_step") { const raw = args.step; @@ -48,7 +65,7 @@ export function applyProgressAction(progress, args) { status: normalizeProgressStatus(raw.status), }); } - return next; + return enforceMonotonicProgress(next); } return current; }