"""看某 task 里 document_search / document_download 的真实参数序列, 判断是「同 query 反复」(病A) 还是「不同 query 地毯式」(病B)。""" import json import os import sys from pathlib import Path env = Path(__file__).resolve().parent.parent / ".env" for line in env.read_text(encoding="utf-8").splitlines(): if line.strip().startswith("ZCBOT_DB_URL="): os.environ["ZCBOT_DB_URL"] = line.split("=", 1)[1].strip() from sqlalchemy import create_engine, text # noqa: E402 engine = create_engine(os.environ["ZCBOT_DB_URL"]) prefix = sys.argv[1] if len(sys.argv) > 1 else "ff1686b7" watch = sys.argv[2] if len(sys.argv) > 2 else "document_search" with engine.connect() as conn: tid = conn.execute( text("select task_id from tasks where task_id::text like :p"), {"p": prefix + "%"}, ).fetchone()[0] msgs = conn.execute( text("select idx, payload from messages where task_id=:t order by idx"), {"t": tid}, ).fetchall() seq = [] for idx, payload in msgs: if payload.get("role") != "assistant": continue for tc in payload.get("tool_calls") or []: fn = tc.get("function") or {} if fn.get("name") != watch: continue try: args = json.loads(fn.get("arguments") or "{}") except Exception: args = {"": fn.get("arguments")} seq.append((idx, args)) print(f"task {tid} — {watch}: {len(seq)} 次\n") from collections import Counter # noqa: E402 # 用 query/关键字段做 key 看重复 keys = [] for _, args in seq: k = args.get("query") or args.get("keyword") or args.get("q") or json.dumps(args, ensure_ascii=False) keys.append(k) c = Counter(keys) dup = [(k, n) for k, n in c.most_common() if n > 1] print(f"unique query: {len(c)} / total {len(keys)}") print(f"被重复的 query 数: {len(dup)}\n") print("=== 重复最多的 query TOP 15 ===") for k, n in c.most_common(15): mark = " <<<同一query重复" if n > 1 else "" print(f" {n:>3}x {str(k)[:80]}{mark}") print("\n=== 前 40 次调用的 query 顺序(看是不是连着搜同一个) ===") for i, (idx, args) in enumerate(seq[:40]): k = args.get("query") or args.get("keyword") or json.dumps(args, ensure_ascii=False) print(f" [{idx:>4}] {str(k)[:80]}")