diff --git a/tests/test_secret_host_tools.py b/tests/test_secret_host_tools.py index e218f67..f73e95e 100644 --- a/tests/test_secret_host_tools.py +++ b/tests/test_secret_host_tools.py @@ -75,9 +75,11 @@ class TestMaterialsProjectHostTools(unittest.TestCase): formula_pretty = "Ca3SiO5" energy_above_hull = 0.0123 + captured = {} + class FakeSummary: def search(self, **kwargs): - self.kwargs = kwargs + captured.update(kwargs) return [FakeDoc()] class FakeMaterials: @@ -110,6 +112,10 @@ class TestMaterialsProjectHostTools(unittest.TestCase): self.assertEqual(data[0]["formula_pretty"], "Ca3SiO5") self.assertEqual(data[0]["energy_above_hull"], 0.0123) self.assertNotIn("host-secret", out) + # Server-side limiting: a single page of `limit` records, never the + # default chunk_size=1000 full-database pagination that gets the IP banned. + self.assertEqual(captured["num_chunks"], 1) + self.assertEqual(captured["chunk_size"], 2) if __name__ == "__main__": diff --git a/tools/materials_project.py b/tools/materials_project.py index cfd61ff..2b3a105 100644 --- a/tools/materials_project.py +++ b/tools/materials_project.py @@ -70,8 +70,8 @@ class MaterialsProjectSearchSummaryTool(Tool): "formula": {"type": "string", "description": "Optional formula such as Ca3SiO5."}, "material_ids": {"type": "array", "items": {"type": "string"}, "description": "Optional Materials Project ids such as mp-123."}, "elements": {"type": "array", "items": {"type": "string"}, "description": "Optional element symbols for a chemical system search."}, - "fields": {"type": "array", "items": {"type": "string"}, "description": "Fields to return; defaults to material_id/formula/symmetry/energy_above_hull."}, - "limit": {"type": "integer", "default": 10, "description": "Maximum records returned, 1-50."}, + "fields": {"type": "array", "items": {"type": "string"}, "description": "Fields to return; defaults to material_id/formula_pretty/symmetry/energy_above_hull. Do NOT put the search formula here — use formula_pretty (not 'formula'). Common fields: formula_pretty, symmetry, energy_above_hull, band_gap, density, volume, is_stable, structure."}, + "limit": {"type": "integer", "default": 10, "description": "Maximum records returned, 1-50. Server-side bounded — keep small to stay within MP fair-use."}, }, } @@ -96,7 +96,12 @@ class MaterialsProjectSearchSummaryTool(Tool): return "[Error] formula / material_ids / elements 至少传一个" try: with _mpr() as mpr: - docs = mpr.materials.summary.search(**kwargs) + # num_chunks=1 + chunk_size=limit 让服务端单次只回 limit 条、不翻页。 + # 否则 mp-api 默认 chunk_size=1000 且自动拉完所有页 —— 整库级下载, + # 会被 MP 判定为 abusive traffic 并封 IP/ASN。 + docs = mpr.materials.summary.search( + num_chunks=1, chunk_size=limit, **kwargs + ) except Exception as e: return f"[Error] mp_search_summary failed: {type(e).__name__}: {e}" plain = [_to_plain(d) for d in list(docs)[:limit]] @@ -148,14 +153,15 @@ class MaterialsProjectGetStructureTool(Tool): class MaterialsProjectGetEntriesTool(Tool): name = "mp_get_entries" description = ( - "Fetch Materials Project computed entries for a chemical system and save trimmed JSON to task_dir/materials/." + "Fetch Materials Project computed entries for a chemical system and save trimmed JSON to task_dir/materials/. " + "Downloads the FULL chemical system (all sub-systems) — volume grows fast with element count; call sparingly and reuse the saved file rather than re-querying." ) parameters = { "type": "object", "properties": { - "elements": {"type": "array", "items": {"type": "string"}, "description": "Chemical system elements, e.g. ['Ca','Si','O','H']."}, + "elements": {"type": "array", "items": {"type": "string"}, "description": "Chemical system elements, e.g. ['Ca','Si','O','H']. More elements = much heavier download."}, "filename": {"type": "string", "description": "Optional JSON filename. Defaults to mp_entries_.json."}, - "limit": {"type": "integer", "default": 200, "description": "Maximum entries saved, 1-1000."}, + "limit": {"type": "integer", "default": 200, "description": "Max entries SAVED to disk, 1-1000. NOTE: this only trims the saved JSON — the full chemsys is still fetched from MP. It does not reduce network traffic."}, }, "required": ["elements"], }