63 lines
2.3 KiB
Python
63 lines
2.3 KiB
Python
"""看某 task 里 document_search / document_download 的真实参数序列,
|
|
判断是「同 query 反复」(病A) 还是「不同 query 地毯式」(病B)。"""
|
|
import json
|
|
import os
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
env = Path(__file__).resolve().parent.parent / ".env"
|
|
for line in env.read_text(encoding="utf-8").splitlines():
|
|
if line.strip().startswith("ZCBOT_DB_URL="):
|
|
os.environ["ZCBOT_DB_URL"] = line.split("=", 1)[1].strip()
|
|
|
|
from sqlalchemy import create_engine, text # noqa: E402
|
|
|
|
engine = create_engine(os.environ["ZCBOT_DB_URL"])
|
|
prefix = sys.argv[1] if len(sys.argv) > 1 else "ff1686b7"
|
|
watch = sys.argv[2] if len(sys.argv) > 2 else "document_search"
|
|
|
|
with engine.connect() as conn:
|
|
tid = conn.execute(
|
|
text("select task_id from tasks where task_id::text like :p"),
|
|
{"p": prefix + "%"},
|
|
).fetchone()[0]
|
|
msgs = conn.execute(
|
|
text("select idx, payload from messages where task_id=:t order by idx"),
|
|
{"t": tid},
|
|
).fetchall()
|
|
|
|
seq = []
|
|
for idx, payload in msgs:
|
|
if payload.get("role") != "assistant":
|
|
continue
|
|
for tc in payload.get("tool_calls") or []:
|
|
fn = tc.get("function") or {}
|
|
if fn.get("name") != watch:
|
|
continue
|
|
try:
|
|
args = json.loads(fn.get("arguments") or "{}")
|
|
except Exception:
|
|
args = {"<bad>": fn.get("arguments")}
|
|
seq.append((idx, args))
|
|
|
|
print(f"task {tid} — {watch}: {len(seq)} 次\n")
|
|
from collections import Counter # noqa: E402
|
|
|
|
# 用 query/关键字段做 key 看重复
|
|
keys = []
|
|
for _, args in seq:
|
|
k = args.get("query") or args.get("keyword") or args.get("q") or json.dumps(args, ensure_ascii=False)
|
|
keys.append(k)
|
|
c = Counter(keys)
|
|
dup = [(k, n) for k, n in c.most_common() if n > 1]
|
|
print(f"unique query: {len(c)} / total {len(keys)}")
|
|
print(f"被重复的 query 数: {len(dup)}\n")
|
|
print("=== 重复最多的 query TOP 15 ===")
|
|
for k, n in c.most_common(15):
|
|
mark = " <<<同一query重复" if n > 1 else ""
|
|
print(f" {n:>3}x {str(k)[:80]}{mark}")
|
|
print("\n=== 前 40 次调用的 query 顺序(看是不是连着搜同一个) ===")
|
|
for i, (idx, args) in enumerate(seq[:40]):
|
|
k = args.get("query") or args.get("keyword") or json.dumps(args, ensure_ascii=False)
|
|
print(f" [{idx:>4}] {str(k)[:80]}")
|