diff --git a/PROGRESS.md b/PROGRESS.md
index ebe1190..d32c152 100644
--- a/PROGRESS.md
+++ b/PROGRESS.md
@@ -23,7 +23,7 @@
 
 ### 2026-06-05
 
-- **记账给 DeepSeek 前缀缓存命中折价(修虚高 ~2-3x)+ 前端体现缓存命中/真实成本**:排查"rust 优势→PPT"那 task(flash,34 轮)发现 `tokens_in` 累计 69.9 万里 **88.6% 是缓存命中**,但 `usage.py::_fallback_chat_cost_cny` 把命中段也按 `input` 全价(1.0)算 → 记 ¥0.84,真实(命中按 0.1x)只 ~¥0.28,**越大的 task 虚高越多**(文献采集 53% 命中:¥33→~¥16)。修:① `ModelCapabilities` 加 `cache_hit_cny_per_mtoken`(deepseek flash 0.1 / pro 0.2;0=不区分按全价兜底,绝不少记);② 成本公式拆三段「命中×缓存价 + (input−命中)×input价 + output×output价」,`loop.py` 把 `cache_hit_tokens` + 缓存单价透传进 `record_chat_usage`;③ 前端不加 DB 列——`web/app.py` 加 `_usage_aggregates`(单查询 GROUP BY `usage_events`,复用列表 `msg_counts` 同款批量范式,无 N+1)on-the-fly 算每 task 真实成本 + 缓存命中 token,`_task_dict` 带出,列表行显 `¥`、顶栏 `formatTaskUsage` 显「总 tok · 缓存命中 N% · ¥真实花费」。**折价只对新 chat 事件生效**,历史 events 保留原记账(不回填,不删数据)。**注**:真正压低 token 体量的杠杆是减少轮数(高成本 task 全是 100+ 轮的逐步 write/run_python 循环),非本次范围。
+- **记账给 DeepSeek 前缀缓存命中折价(修虚高 ~2-3x)+ 前端体现缓存命中/真实成本**:排查"rust 优势→PPT"那 task(flash,34 轮)发现 `tokens_in` 累计 69.9 万里 **88.6% 是缓存命中**,但 `usage.py::_fallback_chat_cost_cny` 把命中段也按 `input` 全价(1.0)算 → 记 ¥0.84,真实(命中按 0.1x)只 ~¥0.28,**越大的 task 虚高越多**(文献采集 53% 命中:¥33→~¥16)。修:① `ModelCapabilities` 加 `cache_hit_cny_per_mtoken`(deepseek flash 0.1 / pro 0.2;0=不区分按全价兜底,绝不少记);② 成本公式拆三段「命中×缓存价 + (input−命中)×input价 + output×output价」,`loop.py` 把 `cache_hit_tokens` + 缓存单价透传进 `record_chat_usage`;③ 前端不加 DB 列——`web/app.py` 加 `_usage_aggregates`(单查询 GROUP BY `usage_events`,复用列表 `msg_counts` 同款批量范式,无 N+1)on-the-fly 算每 task 真实成本 + chat token + 缓存命中,`_task_dict` 带出;列表行**不内联花费**、只显 tok 数,花费/缓存命中率藏 hover tooltip(`taskUsageTooltip`,多行:输入/输出拆分 · 命中 + 命中率 · ¥真实花费),顶栏额外内联简版。**折价只对新 chat 事件生效**,历史走 backfill 脚本(`scripts/backfill_chat_cost_cache_discount.py`,默认 dry-run,`--apply` 落库;`--assume-cache-hit-rate RATE` 给无 `cache_hit_tokens` 字段的老事件按估算命中率折价——DeepSeek 当时缓存了只是没记,全价偏高;实测过的事件用真实值不受影响)。**坑修**:命中率分母原误用 `tasks.tokens_prompt`,但该列会被「清空对话」重置而 `usage_events` 不重置 → 跨源相除算出 822% 怪值;改为 `_task_dict` 的 token 总量也优先取 usage_events 聚合(与 cache_hit 同源,命中率恒 ≤100%)。**注**:真正压低 token 体量的杠杆是减少轮数(高成本 task 全是 100+ 轮的逐步 write/run_python 循环),非本次范围。
 
 ### 2026-06-04
 
diff --git a/scripts/backfill_chat_cost_cache_discount.py b/scripts/backfill_chat_cost_cache_discount.py
index d067715..53040c3 100644
--- a/scripts/backfill_chat_cost_cache_discount.py
+++ b/scripts/backfill_chat_cost_cache_discount.py
@@ -48,8 +48,21 @@ from core.storage.usage import _fallback_chat_cost_cny
 def main() -> int:
     ap = argparse.ArgumentParser()
     ap.add_argument("--apply", action="store_true", help="真写;默认 dry-run 只打印")
+    ap.add_argument(
+        "--assume-cache-hit-rate",
+        type=float,
+        default=None,
+        metavar="RATE",
+        help="对 units 没记 cache_hit_tokens 的老事件,假定命中率 RATE(0~1)折价。"
+             "DeepSeek 当时其实缓存了前缀只是没记 → 全价偏高;给个保守估算(如 0.7)更贴近真实。"
+             "已记 cache_hit_tokens 的事件用真实值,不受影响。缺省=不假定(无字段按 0 命中/全价)。",
+    )
     args = ap.parse_args()
 
+    assume_rate = args.assume_cache_hit_rate
+    if assume_rate is not None and not (0.0 <= assume_rate <= 1.0):
+        ap.error("--assume-cache-hit-rate 必须在 0~1 之间")
+
     cfg = load_config()
     models_dir = AB_ROOT / cfg["models_dir"]
 
@@ -64,10 +77,11 @@ def main() -> int:
                 caps_cache[profile] = None
         return caps_cache[profile]
 
-    # per-profile 统计:事件数 / 改动数 / 跳过数 / 旧总额 / 新总额
+    # per-profile 统计:事件数 / 改动数 / 跳过数 / 假定命中数 / 旧总额 / 新总额
     stat_n: dict[str, int] = defaultdict(int)
     stat_changed: dict[str, int] = defaultdict(int)
     stat_skipped: dict[str, int] = defaultdict(int)
+    stat_assumed: dict[str, int] = defaultdict(int)
     old_sum: dict[str, Decimal] = defaultdict(lambda: Decimal("0"))
     new_sum: dict[str, Decimal] = defaultdict(lambda: Decimal("0"))
 
@@ -97,12 +111,23 @@ def main() -> int:
                 new_sum[profile] += Decimal(str(e.cost_cny))  # 无价不变
                 continue
 
+            tin = int(u.get("tokens_in") or 0)
+            # cache_hit:优先用真实记录;没记(key 缺失)且开了 --assume-cache-hit-rate
+            # 时按估算命中率折(DeepSeek 当时缓存了只是没记)。key 在(含 =0)= 真实值,不假定。
+            if "cache_hit_tokens" in u:
+                cache_hit = int(u.get("cache_hit_tokens") or 0)
+            elif assume_rate is not None:
+                cache_hit = int(round(tin * assume_rate))
+                stat_assumed[profile] += 1
+            else:
+                cache_hit = 0
+
             new_cost = _fallback_chat_cost_cny(
-                prompt_tokens=int(u.get("tokens_in") or 0),
+                prompt_tokens=tin,
                 completion_tokens=int(u.get("tokens_out") or 0),
                 input_cny_per_mtoken=inp,
                 output_cny_per_mtoken=outp,
-                cache_hit_tokens=int(u.get("cache_hit_tokens") or 0),
+                cache_hit_tokens=cache_hit,
                 cache_hit_cny_per_mtoken=chp,
             )
             old_cost = Decimal(str(e.cost_cny))
@@ -118,8 +143,10 @@ def main() -> int:
             s.rollback()
 
     print()
+    if assume_rate is not None:
+        print(f"[assume] 无 cache_hit 字段的老事件按命中率 {assume_rate:.0%} 估算折价")
     print(f"{'model_profile':<22}{'events':>8}{'changed':>9}{'skipped':>9}"
-          f"{'old_¥':>12}{'new_¥':>12}")
+          f"{'assumed':>9}{'old_¥':>12}{'new_¥':>12}")
     tot_old = Decimal("0")
     tot_new = Decimal("0")
     for profile in sorted(stat_n):
@@ -127,10 +154,11 @@ def main() -> int:
         tot_old += o
         tot_new += n
         print(f"{profile:<22}{stat_n[profile]:>8}{stat_changed[profile]:>9}"
-              f"{stat_skipped[profile]:>9}{float(o):>12.4f}{float(n):>12.4f}")
+              f"{stat_skipped[profile]:>9}{stat_assumed[profile]:>9}"
+              f"{float(o):>12.4f}{float(n):>12.4f}")
     print(f"{'TOTAL':<22}{sum(stat_n.values()):>8}"
           f"{sum(stat_changed.values()):>9}{sum(stat_skipped.values()):>9}"
-          f"{float(tot_old):>12.4f}{float(tot_new):>12.4f}")
+          f"{sum(stat_assumed.values()):>9}{float(tot_old):>12.4f}{float(tot_new):>12.4f}")
     print()
     print(f"[mode] {'APPLIED (committed)' if args.apply else 'DRY-RUN (no commit, rerun with --apply)'}")
     return 0
diff --git a/web/app.py b/web/app.py
index 0d1a6ff..b292da8 100644
--- a/web/app.py
+++ b/web/app.py
@@ -94,31 +94,42 @@ def _parse_ordering(s: Optional[str]) -> list:
 
 
 def _usage_aggregates(s: Any, tids: list) -> dict:
-    """按 task_id 批量聚合 usage_events:真实成本 + 缓存命中 token。
+    """按 task_id 批量聚合 usage_events:真实成本 + chat token + 缓存命中。
 
     单查询 GROUP BY(复用列表接口 msg_counts 同款批量范式,无 N+1)。on-the-fly 现算,
     不落 tasks 列 —— 对所有历史 task 即时准确,免回填。
     - cost_cny:全 kind(chat+image+video)合计 = task 真实花费
-    - cache_hit:仅 chat,units.cache_hit_tokens 之和(DeepSeek 等前缀缓存命中部分)
-    返回 {task_id: {"cost_cny": float, "tokens_cache_hit": int}}。
+    - tokens_in/out + cache_hit:仅 chat。**三者同源 usage_events**,故缓存命中率
+      `cache_hit / tokens_in` 恒 ≤ 100%;不能拿 `tasks.tokens_prompt` 当分母 ——
+      那列会被「清空对话」重置而 usage_events 不重置,跨源相除会算出 >100% 的怪值。
+    返回 {task_id: {"cost_cny": float, "tokens_in": int, "tokens_out": int,
+                    "tokens_cache_hit": int}}。
     """
     if not tids:
         return {}
-    cache_hit_col = cast(UsageEvent.units["cache_hit_tokens"].astext, BigInteger)
+    chat = UsageEvent.kind == "chat"
+    tin_col = cast(UsageEvent.units["tokens_in"].astext, BigInteger)
+    tout_col = cast(UsageEvent.units["tokens_out"].astext, BigInteger)
+    hit_col = cast(UsageEvent.units["cache_hit_tokens"].astext, BigInteger)
     rows = s.execute(
         select(
             UsageEvent.task_id,
             func.coalesce(func.sum(UsageEvent.cost_cny), 0),
-            func.coalesce(
-                func.sum(cache_hit_col).filter(UsageEvent.kind == "chat"), 0
-            ),
+            func.coalesce(func.sum(tin_col).filter(chat), 0),
+            func.coalesce(func.sum(tout_col).filter(chat), 0),
+            func.coalesce(func.sum(hit_col).filter(chat), 0),
         )
         .where(UsageEvent.task_id.in_(tids))
         .group_by(UsageEvent.task_id)
     ).all()
     return {
-        tid: {"cost_cny": float(cost or 0), "tokens_cache_hit": int(hit or 0)}
-        for tid, cost, hit in rows
+        tid: {
+            "cost_cny": float(cost or 0),
+            "tokens_in": int(tin or 0),
+            "tokens_out": int(tout or 0),
+            "tokens_cache_hit": int(hit or 0),
+        }
+        for tid, cost, tin, tout, hit in rows
     }
 
 
@@ -134,6 +145,11 @@ def _task_dict(
     缺省回退到 tasks.cost_cny 列(多为 0)与 0 命中,前端据此显 ¥ / 缓存命中率。
     """
     u = usage or {}
+    # token 总量优先取 usage_events 聚合(用量 source-of-truth,且与 cache_hit 同源 →
+    # 命中率分母一致、恒 ≤100%);无 usage 时回退 tasks 概览列。tasks.tokens_prompt 会被
+    # 「清空对话」重置,不能与 usage_events 的 cache_hit 跨源相除。
+    tokens_prompt = int(u["tokens_in"]) if "tokens_in" in u else (row.tokens_prompt or 0)
+    tokens_completion = int(u["tokens_out"]) if "tokens_out" in u else (row.tokens_completion or 0)
     d = {
         "task_id": str(row.task_id),
         "name": row.name or "",
@@ -143,9 +159,9 @@ def _task_dict(
         "skill": row.skill or "",
         "model": row.model or "",
         "model_profile": row.model_profile or "",
-        "tokens_prompt": row.tokens_prompt or 0,
-        "tokens_completion": row.tokens_completion or 0,
-        "tokens": (row.tokens_prompt or 0) + (row.tokens_completion or 0),
+        "tokens_prompt": tokens_prompt,
+        "tokens_completion": tokens_completion,
+        "tokens": tokens_prompt + tokens_completion,
         # 缓存命中 token(chat 前缀缓存)+ 真实成本(已按缓存折价,见 usage.py)。
         # on-the-fly 聚合;未传 usage 时回退列/0。
         "tokens_cache_hit": int(u.get("tokens_cache_hit", 0)),
diff --git a/web/static/dev.html b/web/static/dev.html
index 7f9149a..e118bd3 100644
--- a/web/static/dev.html
+++ b/web/static/dev.html
@@ -1226,7 +1226,23 @@ function fmtCost(n) {
   return "¥" + n.toFixed(2);
 }
 
-// 任务级累计用量(顶栏):总 token · 缓存命中率 · 真实花费。
+// 任务累计用量的 hover 详情(多行):输入/输出拆分 · 缓存命中 + 命中率 · 真实花费。
+// 列表行 + 顶栏共用(列表只显 tok 数,花费/缓存藏 tooltip;顶栏额外内联简版)。
+function taskUsageTooltip(t) {
+  const pin = t.tokens_prompt || 0;
+  const pout = t.tokens_completion || 0;
+  const hit = t.tokens_cache_hit || 0;
+  const lines = [`输入 ${pin.toLocaleString()} / 输出 ${pout.toLocaleString()} tok（合计 ${(pin + pout).toLocaleString()}）`];
+  if (pin > 0 && hit > 0) {
+    lines.push(`前缀缓存命中 ${hit.toLocaleString()} tok（命中率 ${Math.round(hit / pin * 100)}%，命中部分按低价计费）`);
+  }
+  if (t.cost_cny > 0) {
+    lines.push(`真实花费 ¥${(t.cost_cny).toFixed(4)}（已按缓存命中折价）`);
+  }
+  return lines.join("\n");
+}
+
+// 任务级累计用量(顶栏):总 token · 缓存命中率 · 真实花费;详情走 taskUsageTooltip。
 // 缓存命中率 = cache_hit / 总输入(tokens_prompt);命中越高说明前缀复用越好、越省钱。
 function formatTaskUsage(t) {
   const tok = t.tokens || 0;
@@ -1239,10 +1255,7 @@ function formatTaskUsage(t) {
   }
   const cost = fmtCost(t.cost_cny);
   if (cost) bits.push(cost);
-  const title = `累计:输入 ${pin.toLocaleString()} / 输出 ${(t.tokens_completion || 0).toLocaleString()} tok`
-    + (hit > 0 ? `\n前缀缓存命中 ${hit.toLocaleString()} tok（命中部分按低价计费）` : "")
-    + (t.cost_cny > 0 ? `\n真实花费 ¥${(t.cost_cny).toFixed(4)}（已按缓存命中折价）` : "");
-  return `<span class="muted" title="${escapeHtml(title)}" style="white-space:nowrap;">${bits.join(" · ")}</span>`;
+  return `<span class="muted" title="${escapeHtml(taskUsageTooltip(t))}" style="white-space:nowrap;">${bits.join(" · ")}</span>`;
 }
 
 function formatContextStats(d) {
@@ -1756,8 +1769,7 @@ function renderTaskList(tasks, append = false) {
             <span class="badge ${t.status}">${statusLabel}</span>
             ${t.skill ? `<span class="muted" title="${escapeHtml(t.skill)}">${escapeHtml(t.skill)}</span>` : ""}
             <span class="num right-group">${t.n_messages || 0} 条</span>
-            <span class="num" title="${(t.tokens || 0).toLocaleString()} tokens${t.tokens_cache_hit ? `（其中缓存命中 ${(t.tokens_cache_hit).toLocaleString()}）` : ""}">${fmtTokens(t.tokens)} tok</span>
-            ${t.cost_cny > 0 ? `<span class="num" title="真实花费(已按缓存命中折价)">${fmtCost(t.cost_cny)}</span>` : ""}
+            <span class="num" title="${escapeHtml(taskUsageTooltip(t))}">${fmtTokens(t.tokens)} tok</span>
             <span class="muted time-ago" title="${escapeHtml(fmtTime(t.updated_at))}">${escapeHtml(fmtTimeAgo(t.updated_at))}</span>
           </div>
         </div>