From e62d4fce4ad3b5a9e2ba49685fd71ab3c64b5ea5 Mon Sep 17 00:00:00 2001 From: caoqianming Date: Mon, 1 Jun 2026 15:11:16 +0800 Subject: [PATCH] =?UTF-8?q?fix(tools):=20MP=20=E5=B7=A5=E5=85=B7=E6=94=B9?= =?UTF-8?q?=E6=9C=8D=E5=8A=A1=E7=AB=AF=E9=99=90=E9=87=8F=E6=9F=A5=E8=AF=A2?= =?UTF-8?q?=EF=BC=8C=E6=AD=A2=E8=A1=80=20IP=20=E8=A2=AB=E5=B0=81=E6=A0=B9?= =?UTF-8?q?=E5=9B=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mp_search_summary 之前不传分页参数,mp-api 默认 chunk_size=1000 且 list(docs) 自动翻完所有页,limit 只做客户端切片 —— 每次搜索都整库级下载, 被 MP 判 abusive traffic 封 host IP/ASN。改为 search(num_chunks=1, chunk_size=limit, ...) 服务端单页限量。mp_get_entries 的 limit 同为"只裁剪 保存、不减流量"的假参数,但 get_entries_in_chemsys 天然全量(相图用途)无法 限量,只在 description 点明开销。测试加断言锁定 num_chunks/chunk_size 已传。 Co-Authored-By: Claude Opus 4.8 (1M context) --- tests/test_secret_host_tools.py | 8 +++++++- tools/materials_project.py | 18 ++++++++++++------ 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/tests/test_secret_host_tools.py b/tests/test_secret_host_tools.py index e218f67..f73e95e 100644 --- a/tests/test_secret_host_tools.py +++ b/tests/test_secret_host_tools.py @@ -75,9 +75,11 @@ class TestMaterialsProjectHostTools(unittest.TestCase): formula_pretty = "Ca3SiO5" energy_above_hull = 0.0123 + captured = {} + class FakeSummary: def search(self, **kwargs): - self.kwargs = kwargs + captured.update(kwargs) return [FakeDoc()] class FakeMaterials: @@ -110,6 +112,10 @@ class TestMaterialsProjectHostTools(unittest.TestCase): self.assertEqual(data[0]["formula_pretty"], "Ca3SiO5") self.assertEqual(data[0]["energy_above_hull"], 0.0123) self.assertNotIn("host-secret", out) + # Server-side limiting: a single page of `limit` records, never the + # default chunk_size=1000 full-database pagination that gets the IP banned. + self.assertEqual(captured["num_chunks"], 1) + self.assertEqual(captured["chunk_size"], 2) if __name__ == "__main__": diff --git a/tools/materials_project.py b/tools/materials_project.py index cfd61ff..2b3a105 100644 --- a/tools/materials_project.py +++ b/tools/materials_project.py @@ -70,8 +70,8 @@ class MaterialsProjectSearchSummaryTool(Tool): "formula": {"type": "string", "description": "Optional formula such as Ca3SiO5."}, "material_ids": {"type": "array", "items": {"type": "string"}, "description": "Optional Materials Project ids such as mp-123."}, "elements": {"type": "array", "items": {"type": "string"}, "description": "Optional element symbols for a chemical system search."}, - "fields": {"type": "array", "items": {"type": "string"}, "description": "Fields to return; defaults to material_id/formula/symmetry/energy_above_hull."}, - "limit": {"type": "integer", "default": 10, "description": "Maximum records returned, 1-50."}, + "fields": {"type": "array", "items": {"type": "string"}, "description": "Fields to return; defaults to material_id/formula_pretty/symmetry/energy_above_hull. Do NOT put the search formula here — use formula_pretty (not 'formula'). Common fields: formula_pretty, symmetry, energy_above_hull, band_gap, density, volume, is_stable, structure."}, + "limit": {"type": "integer", "default": 10, "description": "Maximum records returned, 1-50. Server-side bounded — keep small to stay within MP fair-use."}, }, } @@ -96,7 +96,12 @@ class MaterialsProjectSearchSummaryTool(Tool): return "[Error] formula / material_ids / elements 至少传一个" try: with _mpr() as mpr: - docs = mpr.materials.summary.search(**kwargs) + # num_chunks=1 + chunk_size=limit 让服务端单次只回 limit 条、不翻页。 + # 否则 mp-api 默认 chunk_size=1000 且自动拉完所有页 —— 整库级下载, + # 会被 MP 判定为 abusive traffic 并封 IP/ASN。 + docs = mpr.materials.summary.search( + num_chunks=1, chunk_size=limit, **kwargs + ) except Exception as e: return f"[Error] mp_search_summary failed: {type(e).__name__}: {e}" plain = [_to_plain(d) for d in list(docs)[:limit]] @@ -148,14 +153,15 @@ class MaterialsProjectGetStructureTool(Tool): class MaterialsProjectGetEntriesTool(Tool): name = "mp_get_entries" description = ( - "Fetch Materials Project computed entries for a chemical system and save trimmed JSON to task_dir/materials/." + "Fetch Materials Project computed entries for a chemical system and save trimmed JSON to task_dir/materials/. " + "Downloads the FULL chemical system (all sub-systems) — volume grows fast with element count; call sparingly and reuse the saved file rather than re-querying." ) parameters = { "type": "object", "properties": { - "elements": {"type": "array", "items": {"type": "string"}, "description": "Chemical system elements, e.g. ['Ca','Si','O','H']."}, + "elements": {"type": "array", "items": {"type": "string"}, "description": "Chemical system elements, e.g. ['Ca','Si','O','H']. More elements = much heavier download."}, "filename": {"type": "string", "description": "Optional JSON filename. Defaults to mp_entries_.json."}, - "limit": {"type": "integer", "default": 200, "description": "Maximum entries saved, 1-1000."}, + "limit": {"type": "integer", "default": 200, "description": "Max entries SAVED to disk, 1-1000. NOTE: this only trims the saved JSON — the full chemsys is still fetched from MP. It does not reduce network traffic."}, }, "required": ["elements"], }