loop: thinking spinner 显示耗时 + 累计 token,每轮回打成本一行

spinner 文本由后台 daemon 线程每 100ms 用 status.update() 刷新,显示 'thinking... 1.3s ctx 12,345 tok'。每轮 LLM 返回后追加一行 dim '[in N out N t Xs]',留痕便于回看本轮成本。全 ASCII。 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-07 14:05:44 +08:00 · 2026-05-07 14:05:44 +08:00 · 61ff98bb79
parent 8995a898ba
commit 61ff98bb79
1 changed files with 58 additions and 2 deletions
--- a/core/loop.py
+++ b/core/loop.py
@ -2,7 +2,10 @@
 from __future__ import annotations

 import json
-from typing import Any, Dict, Optional
+import threading
+import time
+from contextlib import contextmanager
+from typing import Any, Dict, Optional, Tuple

 from rich.console import Console
 from rich.markdown import Markdown
@ -12,6 +15,19 @@ from .llm import LLM
 from .session import Session


+def _extract_usage(usage: Any) -> Tuple[int, int]:
+    """从 litellm response.usage 提 (prompt_tokens, completion_tokens)。"""
+    if not usage:
+        return 0, 0
+    if hasattr(usage, "model_dump"):
+        usage = usage.model_dump()
+    elif hasattr(usage, "dict"):
+        usage = usage.dict()
+    if isinstance(usage, dict):
+        return int(usage.get("prompt_tokens") or 0), int(usage.get("completion_tokens") or 0)
+    return 0, 0
+
+
 class AgentLoop:
    def __init__(
        self,
@ -29,11 +45,46 @@ class AgentLoop:
        self.max_iterations = max_iterations or capabilities.max_iterations
        self.console = console or Console()

+    @contextmanager
+    def _thinking(self):
+        """spinner 实时刷耗时 + 上下文 token 数。yield 出的 ctx 退出后填 elapsed。"""
+        start = time.monotonic()
+        stop = threading.Event()
+
+        def fmt() -> str:
+            elapsed = time.monotonic() - start
+            total = self.llm.token_counter.total
+            tail = f"  ctx {total:,} tok" if total else ""
+            return f"[dim]thinking... {elapsed:.1f}s{tail}[/dim]"
+
+        class Ctx:
+            elapsed: float = 0.0
+
+        ctx = Ctx()
+        status = self.console.status(fmt(), spinner="dots")
+
+        def tick() -> None:
+            while not stop.wait(0.1):
+                try:
+                    status.update(fmt())
+                except Exception:
+                    return
+
+        with status:
+            th = threading.Thread(target=tick, daemon=True)
+            th.start()
+            try:
+                yield ctx
+            finally:
+                stop.set()
+                th.join(timeout=0.5)
+                ctx.elapsed = time.monotonic() - start
+
    def run(self, user_message: str) -> str:
        self.session.append({"role": "user", "content": user_message})

        for _ in range(self.max_iterations):
-            with self.console.status("[dim]thinking...[/dim]", spinner="dots"):
+            with self._thinking() as t:
                response = self.llm.chat(
                    messages=self.session.messages,
                    tools=[t.schema for t in self.tools.values()],
@ -42,6 +93,11 @@ class AgentLoop:
            msg = response.choices[0].message
            self.session.append(msg)

+            pt, ct = _extract_usage(getattr(response, "usage", None))
+            self.console.print(
+                f"[dim][in {pt:,}  out {ct:,}  t {t.elapsed:.1f}s][/dim]"
+            )
+
            tool_calls = getattr(msg, "tool_calls", None) or []
            content = getattr(msg, "content", None)
            if content: