loop: thinking spinner 显示耗时 + 累计 token,每轮回打成本一行

spinner 文本由后台 daemon 线程每 100ms 用 status.update() 刷新,显示
'thinking... 1.3s  ctx 12,345 tok'。每轮 LLM 返回后追加一行 dim
'[in N  out N  t Xs]',留痕便于回看本轮成本。全 ASCII。

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
caoqianming 2026-05-07 14:05:44 +08:00
parent 8995a898ba
commit 61ff98bb79
1 changed files with 58 additions and 2 deletions

View File

@ -2,7 +2,10 @@
from __future__ import annotations
import json
from typing import Any, Dict, Optional
import threading
import time
from contextlib import contextmanager
from typing import Any, Dict, Optional, Tuple
from rich.console import Console
from rich.markdown import Markdown
@ -12,6 +15,19 @@ from .llm import LLM
from .session import Session
def _extract_usage(usage: Any) -> Tuple[int, int]:
"""从 litellm response.usage 提 (prompt_tokens, completion_tokens)。"""
if not usage:
return 0, 0
if hasattr(usage, "model_dump"):
usage = usage.model_dump()
elif hasattr(usage, "dict"):
usage = usage.dict()
if isinstance(usage, dict):
return int(usage.get("prompt_tokens") or 0), int(usage.get("completion_tokens") or 0)
return 0, 0
class AgentLoop:
def __init__(
self,
@ -29,11 +45,46 @@ class AgentLoop:
self.max_iterations = max_iterations or capabilities.max_iterations
self.console = console or Console()
@contextmanager
def _thinking(self):
"""spinner 实时刷耗时 + 上下文 token 数。yield 出的 ctx 退出后填 elapsed。"""
start = time.monotonic()
stop = threading.Event()
def fmt() -> str:
elapsed = time.monotonic() - start
total = self.llm.token_counter.total
tail = f" ctx {total:,} tok" if total else ""
return f"[dim]thinking... {elapsed:.1f}s{tail}[/dim]"
class Ctx:
elapsed: float = 0.0
ctx = Ctx()
status = self.console.status(fmt(), spinner="dots")
def tick() -> None:
while not stop.wait(0.1):
try:
status.update(fmt())
except Exception:
return
with status:
th = threading.Thread(target=tick, daemon=True)
th.start()
try:
yield ctx
finally:
stop.set()
th.join(timeout=0.5)
ctx.elapsed = time.monotonic() - start
def run(self, user_message: str) -> str:
self.session.append({"role": "user", "content": user_message})
for _ in range(self.max_iterations):
with self.console.status("[dim]thinking...[/dim]", spinner="dots"):
with self._thinking() as t:
response = self.llm.chat(
messages=self.session.messages,
tools=[t.schema for t in self.tools.values()],
@ -42,6 +93,11 @@ class AgentLoop:
msg = response.choices[0].message
self.session.append(msg)
pt, ct = _extract_usage(getattr(response, "usage", None))
self.console.print(
f"[dim][in {pt:,} out {ct:,} t {t.elapsed:.1f}s][/dim]"
)
tool_calls = getattr(msg, "tool_calls", None) or []
content = getattr(msg, "content", None)
if content: