fix(tools): MP 工具改服务端限量查询,止血 IP 被封根因

mp_search_summary 之前不传分页参数,mp-api 默认 chunk_size=1000 且
list(docs) 自动翻完所有页,limit 只做客户端切片 —— 每次搜索都整库级下载,
被 MP 判 abusive traffic 封 host IP/ASN。改为 search(num_chunks=1,
chunk_size=limit, ...) 服务端单页限量。mp_get_entries 的 limit 同为"只裁剪
保存、不减流量"的假参数,但 get_entries_in_chemsys 天然全量(相图用途)无法
限量,只在 description 点明开销。测试加断言锁定 num_chunks/chunk_size 已传。

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
caoqianming 2026-06-01 15:11:16 +08:00
parent f66d55cc3b
commit e62d4fce4a
2 changed files with 19 additions and 7 deletions

View File

@ -75,9 +75,11 @@ class TestMaterialsProjectHostTools(unittest.TestCase):
formula_pretty = "Ca3SiO5" formula_pretty = "Ca3SiO5"
energy_above_hull = 0.0123 energy_above_hull = 0.0123
captured = {}
class FakeSummary: class FakeSummary:
def search(self, **kwargs): def search(self, **kwargs):
self.kwargs = kwargs captured.update(kwargs)
return [FakeDoc()] return [FakeDoc()]
class FakeMaterials: class FakeMaterials:
@ -110,6 +112,10 @@ class TestMaterialsProjectHostTools(unittest.TestCase):
self.assertEqual(data[0]["formula_pretty"], "Ca3SiO5") self.assertEqual(data[0]["formula_pretty"], "Ca3SiO5")
self.assertEqual(data[0]["energy_above_hull"], 0.0123) self.assertEqual(data[0]["energy_above_hull"], 0.0123)
self.assertNotIn("host-secret", out) self.assertNotIn("host-secret", out)
# Server-side limiting: a single page of `limit` records, never the
# default chunk_size=1000 full-database pagination that gets the IP banned.
self.assertEqual(captured["num_chunks"], 1)
self.assertEqual(captured["chunk_size"], 2)
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -70,8 +70,8 @@ class MaterialsProjectSearchSummaryTool(Tool):
"formula": {"type": "string", "description": "Optional formula such as Ca3SiO5."}, "formula": {"type": "string", "description": "Optional formula such as Ca3SiO5."},
"material_ids": {"type": "array", "items": {"type": "string"}, "description": "Optional Materials Project ids such as mp-123."}, "material_ids": {"type": "array", "items": {"type": "string"}, "description": "Optional Materials Project ids such as mp-123."},
"elements": {"type": "array", "items": {"type": "string"}, "description": "Optional element symbols for a chemical system search."}, "elements": {"type": "array", "items": {"type": "string"}, "description": "Optional element symbols for a chemical system search."},
"fields": {"type": "array", "items": {"type": "string"}, "description": "Fields to return; defaults to material_id/formula/symmetry/energy_above_hull."}, "fields": {"type": "array", "items": {"type": "string"}, "description": "Fields to return; defaults to material_id/formula_pretty/symmetry/energy_above_hull. Do NOT put the search formula here — use formula_pretty (not 'formula'). Common fields: formula_pretty, symmetry, energy_above_hull, band_gap, density, volume, is_stable, structure."},
"limit": {"type": "integer", "default": 10, "description": "Maximum records returned, 1-50."}, "limit": {"type": "integer", "default": 10, "description": "Maximum records returned, 1-50. Server-side bounded — keep small to stay within MP fair-use."},
}, },
} }
@ -96,7 +96,12 @@ class MaterialsProjectSearchSummaryTool(Tool):
return "[Error] formula / material_ids / elements 至少传一个" return "[Error] formula / material_ids / elements 至少传一个"
try: try:
with _mpr() as mpr: with _mpr() as mpr:
docs = mpr.materials.summary.search(**kwargs) # num_chunks=1 + chunk_size=limit 让服务端单次只回 limit 条、不翻页。
# 否则 mp-api 默认 chunk_size=1000 且自动拉完所有页 —— 整库级下载,
# 会被 MP 判定为 abusive traffic 并封 IP/ASN。
docs = mpr.materials.summary.search(
num_chunks=1, chunk_size=limit, **kwargs
)
except Exception as e: except Exception as e:
return f"[Error] mp_search_summary failed: {type(e).__name__}: {e}" return f"[Error] mp_search_summary failed: {type(e).__name__}: {e}"
plain = [_to_plain(d) for d in list(docs)[:limit]] plain = [_to_plain(d) for d in list(docs)[:limit]]
@ -148,14 +153,15 @@ class MaterialsProjectGetStructureTool(Tool):
class MaterialsProjectGetEntriesTool(Tool): class MaterialsProjectGetEntriesTool(Tool):
name = "mp_get_entries" name = "mp_get_entries"
description = ( description = (
"Fetch Materials Project computed entries for a chemical system and save trimmed JSON to task_dir/materials/." "Fetch Materials Project computed entries for a chemical system and save trimmed JSON to task_dir/materials/. "
"Downloads the FULL chemical system (all sub-systems) — volume grows fast with element count; call sparingly and reuse the saved file rather than re-querying."
) )
parameters = { parameters = {
"type": "object", "type": "object",
"properties": { "properties": {
"elements": {"type": "array", "items": {"type": "string"}, "description": "Chemical system elements, e.g. ['Ca','Si','O','H']."}, "elements": {"type": "array", "items": {"type": "string"}, "description": "Chemical system elements, e.g. ['Ca','Si','O','H']. More elements = much heavier download."},
"filename": {"type": "string", "description": "Optional JSON filename. Defaults to mp_entries_<chemsys>.json."}, "filename": {"type": "string", "description": "Optional JSON filename. Defaults to mp_entries_<chemsys>.json."},
"limit": {"type": "integer", "default": 200, "description": "Maximum entries saved, 1-1000."}, "limit": {"type": "integer", "default": 200, "description": "Max entries SAVED to disk, 1-1000. NOTE: this only trims the saved JSON — the full chemsys is still fetched from MP. It does not reduce network traffic."},
}, },
"required": ["elements"], "required": ["elements"],
} }