fix(tools): MP 工具改服务端限量查询,止血 IP 被封根因
mp_search_summary 之前不传分页参数,mp-api 默认 chunk_size=1000 且 list(docs) 自动翻完所有页,limit 只做客户端切片 —— 每次搜索都整库级下载, 被 MP 判 abusive traffic 封 host IP/ASN。改为 search(num_chunks=1, chunk_size=limit, ...) 服务端单页限量。mp_get_entries 的 limit 同为"只裁剪 保存、不减流量"的假参数,但 get_entries_in_chemsys 天然全量(相图用途)无法 限量,只在 description 点明开销。测试加断言锁定 num_chunks/chunk_size 已传。 Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
f66d55cc3b
commit
e62d4fce4a
|
|
@ -75,9 +75,11 @@ class TestMaterialsProjectHostTools(unittest.TestCase):
|
|||
formula_pretty = "Ca3SiO5"
|
||||
energy_above_hull = 0.0123
|
||||
|
||||
captured = {}
|
||||
|
||||
class FakeSummary:
|
||||
def search(self, **kwargs):
|
||||
self.kwargs = kwargs
|
||||
captured.update(kwargs)
|
||||
return [FakeDoc()]
|
||||
|
||||
class FakeMaterials:
|
||||
|
|
@ -110,6 +112,10 @@ class TestMaterialsProjectHostTools(unittest.TestCase):
|
|||
self.assertEqual(data[0]["formula_pretty"], "Ca3SiO5")
|
||||
self.assertEqual(data[0]["energy_above_hull"], 0.0123)
|
||||
self.assertNotIn("host-secret", out)
|
||||
# Server-side limiting: a single page of `limit` records, never the
|
||||
# default chunk_size=1000 full-database pagination that gets the IP banned.
|
||||
self.assertEqual(captured["num_chunks"], 1)
|
||||
self.assertEqual(captured["chunk_size"], 2)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
|||
|
|
@ -70,8 +70,8 @@ class MaterialsProjectSearchSummaryTool(Tool):
|
|||
"formula": {"type": "string", "description": "Optional formula such as Ca3SiO5."},
|
||||
"material_ids": {"type": "array", "items": {"type": "string"}, "description": "Optional Materials Project ids such as mp-123."},
|
||||
"elements": {"type": "array", "items": {"type": "string"}, "description": "Optional element symbols for a chemical system search."},
|
||||
"fields": {"type": "array", "items": {"type": "string"}, "description": "Fields to return; defaults to material_id/formula/symmetry/energy_above_hull."},
|
||||
"limit": {"type": "integer", "default": 10, "description": "Maximum records returned, 1-50."},
|
||||
"fields": {"type": "array", "items": {"type": "string"}, "description": "Fields to return; defaults to material_id/formula_pretty/symmetry/energy_above_hull. Do NOT put the search formula here — use formula_pretty (not 'formula'). Common fields: formula_pretty, symmetry, energy_above_hull, band_gap, density, volume, is_stable, structure."},
|
||||
"limit": {"type": "integer", "default": 10, "description": "Maximum records returned, 1-50. Server-side bounded — keep small to stay within MP fair-use."},
|
||||
},
|
||||
}
|
||||
|
||||
|
|
@ -96,7 +96,12 @@ class MaterialsProjectSearchSummaryTool(Tool):
|
|||
return "[Error] formula / material_ids / elements 至少传一个"
|
||||
try:
|
||||
with _mpr() as mpr:
|
||||
docs = mpr.materials.summary.search(**kwargs)
|
||||
# num_chunks=1 + chunk_size=limit 让服务端单次只回 limit 条、不翻页。
|
||||
# 否则 mp-api 默认 chunk_size=1000 且自动拉完所有页 —— 整库级下载,
|
||||
# 会被 MP 判定为 abusive traffic 并封 IP/ASN。
|
||||
docs = mpr.materials.summary.search(
|
||||
num_chunks=1, chunk_size=limit, **kwargs
|
||||
)
|
||||
except Exception as e:
|
||||
return f"[Error] mp_search_summary failed: {type(e).__name__}: {e}"
|
||||
plain = [_to_plain(d) for d in list(docs)[:limit]]
|
||||
|
|
@ -149,13 +154,14 @@ class MaterialsProjectGetEntriesTool(Tool):
|
|||
name = "mp_get_entries"
|
||||
description = (
|
||||
"Fetch Materials Project computed entries for a chemical system and save trimmed JSON to task_dir/materials/. "
|
||||
"Downloads the FULL chemical system (all sub-systems) — volume grows fast with element count; call sparingly and reuse the saved file rather than re-querying."
|
||||
)
|
||||
parameters = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"elements": {"type": "array", "items": {"type": "string"}, "description": "Chemical system elements, e.g. ['Ca','Si','O','H']."},
|
||||
"elements": {"type": "array", "items": {"type": "string"}, "description": "Chemical system elements, e.g. ['Ca','Si','O','H']. More elements = much heavier download."},
|
||||
"filename": {"type": "string", "description": "Optional JSON filename. Defaults to mp_entries_<chemsys>.json."},
|
||||
"limit": {"type": "integer", "default": 200, "description": "Maximum entries saved, 1-1000."},
|
||||
"limit": {"type": "integer", "default": 200, "description": "Max entries SAVED to disk, 1-1000. NOTE: this only trims the saved JSON — the full chemsys is still fetched from MP. It does not reduce network traffic."},
|
||||
},
|
||||
"required": ["elements"],
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue