68 lines
2.6 KiB
Python
68 lines
2.6 KiB
Python
"""判断 task 里:① tool 结果有多少带 [Error] / 失败;② 重复调用是否跟在错误后面。"""
|
|
import json
|
|
import os
|
|
import sys
|
|
from collections import Counter
|
|
from pathlib import Path
|
|
|
|
env = Path(__file__).resolve().parent.parent / ".env"
|
|
for line in env.read_text(encoding="utf-8").splitlines():
|
|
if line.strip().startswith("ZCBOT_DB_URL="):
|
|
os.environ["ZCBOT_DB_URL"] = line.split("=", 1)[1].strip()
|
|
from sqlalchemy import create_engine, text # noqa: E402
|
|
|
|
engine = create_engine(os.environ["ZCBOT_DB_URL"])
|
|
prefix = sys.argv[1] if len(sys.argv) > 1 else "ab063233"
|
|
|
|
with engine.connect() as conn:
|
|
tid = conn.execute(
|
|
text("select task_id from tasks where task_id::text like :p"), {"p": prefix + "%"}
|
|
).fetchone()[0]
|
|
msgs = conn.execute(
|
|
text("select idx, payload from messages where task_id=:t order by idx"),
|
|
{"t": tid},
|
|
).fetchall()
|
|
|
|
# 收集 tool 结果文本(role=tool)和它们的 name
|
|
results = {} # idx -> (name, text)
|
|
calls = [] # (idx, name, args_fingerprint)
|
|
for idx, payload in msgs:
|
|
role = payload.get("role")
|
|
if role == "tool":
|
|
results[idx] = (payload.get("name"), str(payload.get("content") or ""))
|
|
elif role == "assistant":
|
|
for tc in payload.get("tool_calls") or []:
|
|
fn = tc.get("function") or {}
|
|
try:
|
|
args = json.loads(fn.get("arguments") or "{}")
|
|
except Exception:
|
|
args = {}
|
|
fp = fn.get("name") + "|" + json.dumps(args, ensure_ascii=False, sort_keys=True)
|
|
calls.append((idx, fn.get("name"), fp))
|
|
|
|
n_tool = len(results)
|
|
n_err = sum(1 for _, (_, t) in results.items() if "[Error" in t or "Traceback" in t or "exit 1" in t or "[stderr]" in t)
|
|
print(f"task {tid}")
|
|
print(f"tool 结果总数: {n_tool} 含错误/stderr/exit1: {n_err} ({100*n_err/max(n_tool,1):.0f}%)\n")
|
|
|
|
# 完全同名同参指纹的重复
|
|
c = Counter(fp for _, _, fp in calls)
|
|
exactdup = [(fp, n) for fp, n in c.most_common() if n > 1]
|
|
print(f"完全同名同参(含全部参数)的调用指纹: 重复 {len(exactdup)} 种")
|
|
print("=== 同名同参重复 TOP 10(连参数都一字不差) ===")
|
|
for fp, n in c.most_common(10):
|
|
if n > 1:
|
|
name, _, rest = fp.partition("|")
|
|
print(f" {n:>3}x {name}: {rest[:70]}")
|
|
|
|
# 错误样本
|
|
print("\n=== 前 5 条错误结果样本 ===")
|
|
shown = 0
|
|
for idx in sorted(results):
|
|
name, t = results[idx]
|
|
if any(k in t for k in ("[Error", "Traceback", "exit 1", "[stderr]")):
|
|
print(f" [{idx}] {name}: {t[:160].strip()}")
|
|
shown += 1
|
|
if shown >= 5:
|
|
break
|