zcbot/scripts/diag_search_args.py

63 lines
2.3 KiB
Python

"""看某 task 里 document_search / document_download 的真实参数序列,
判断是「同 query 反复」(病A) 还是「不同 query 地毯式」(病B)。"""
import json
import os
import sys
from pathlib import Path
env = Path(__file__).resolve().parent.parent / ".env"
for line in env.read_text(encoding="utf-8").splitlines():
if line.strip().startswith("ZCBOT_DB_URL="):
os.environ["ZCBOT_DB_URL"] = line.split("=", 1)[1].strip()
from sqlalchemy import create_engine, text # noqa: E402
engine = create_engine(os.environ["ZCBOT_DB_URL"])
prefix = sys.argv[1] if len(sys.argv) > 1 else "ff1686b7"
watch = sys.argv[2] if len(sys.argv) > 2 else "document_search"
with engine.connect() as conn:
tid = conn.execute(
text("select task_id from tasks where task_id::text like :p"),
{"p": prefix + "%"},
).fetchone()[0]
msgs = conn.execute(
text("select idx, payload from messages where task_id=:t order by idx"),
{"t": tid},
).fetchall()
seq = []
for idx, payload in msgs:
if payload.get("role") != "assistant":
continue
for tc in payload.get("tool_calls") or []:
fn = tc.get("function") or {}
if fn.get("name") != watch:
continue
try:
args = json.loads(fn.get("arguments") or "{}")
except Exception:
args = {"<bad>": fn.get("arguments")}
seq.append((idx, args))
print(f"task {tid}{watch}: {len(seq)}\n")
from collections import Counter # noqa: E402
# 用 query/关键字段做 key 看重复
keys = []
for _, args in seq:
k = args.get("query") or args.get("keyword") or args.get("q") or json.dumps(args, ensure_ascii=False)
keys.append(k)
c = Counter(keys)
dup = [(k, n) for k, n in c.most_common() if n > 1]
print(f"unique query: {len(c)} / total {len(keys)}")
print(f"被重复的 query 数: {len(dup)}\n")
print("=== 重复最多的 query TOP 15 ===")
for k, n in c.most_common(15):
mark = " <<<同一query重复" if n > 1 else ""
print(f" {n:>3}x {str(k)[:80]}{mark}")
print("\n=== 前 40 次调用的 query 顺序(看是不是连着搜同一个) ===")
for i, (idx, args) in enumerate(seq[:40]):
k = args.get("query") or args.get("keyword") or json.dumps(args, ensure_ascii=False)
print(f" [{idx:>4}] {str(k)[:80]}")