diff --git a/core/loop.py b/core/loop.py index 4ca796d..57de9d1 100644 --- a/core/loop.py +++ b/core/loop.py @@ -2,7 +2,10 @@ from __future__ import annotations import json -from typing import Any, Dict, Optional +import threading +import time +from contextlib import contextmanager +from typing import Any, Dict, Optional, Tuple from rich.console import Console from rich.markdown import Markdown @@ -12,6 +15,19 @@ from .llm import LLM from .session import Session +def _extract_usage(usage: Any) -> Tuple[int, int]: + """从 litellm response.usage 提 (prompt_tokens, completion_tokens)。""" + if not usage: + return 0, 0 + if hasattr(usage, "model_dump"): + usage = usage.model_dump() + elif hasattr(usage, "dict"): + usage = usage.dict() + if isinstance(usage, dict): + return int(usage.get("prompt_tokens") or 0), int(usage.get("completion_tokens") or 0) + return 0, 0 + + class AgentLoop: def __init__( self, @@ -29,11 +45,46 @@ class AgentLoop: self.max_iterations = max_iterations or capabilities.max_iterations self.console = console or Console() + @contextmanager + def _thinking(self): + """spinner 实时刷耗时 + 上下文 token 数。yield 出的 ctx 退出后填 elapsed。""" + start = time.monotonic() + stop = threading.Event() + + def fmt() -> str: + elapsed = time.monotonic() - start + total = self.llm.token_counter.total + tail = f" ctx {total:,} tok" if total else "" + return f"[dim]thinking... {elapsed:.1f}s{tail}[/dim]" + + class Ctx: + elapsed: float = 0.0 + + ctx = Ctx() + status = self.console.status(fmt(), spinner="dots") + + def tick() -> None: + while not stop.wait(0.1): + try: + status.update(fmt()) + except Exception: + return + + with status: + th = threading.Thread(target=tick, daemon=True) + th.start() + try: + yield ctx + finally: + stop.set() + th.join(timeout=0.5) + ctx.elapsed = time.monotonic() - start + def run(self, user_message: str) -> str: self.session.append({"role": "user", "content": user_message}) for _ in range(self.max_iterations): - with self.console.status("[dim]thinking...[/dim]", spinner="dots"): + with self._thinking() as t: response = self.llm.chat( messages=self.session.messages, tools=[t.schema for t in self.tools.values()], @@ -42,6 +93,11 @@ class AgentLoop: msg = response.choices[0].message self.session.append(msg) + pt, ct = _extract_usage(getattr(response, "usage", None)) + self.console.print( + f"[dim][in {pt:,} out {ct:,} t {t.elapsed:.1f}s][/dim]" + ) + tool_calls = getattr(msg, "tool_calls", None) or [] content = getattr(msg, "content", None) if content: