diff --git a/apps/resm/d_oaurl.py b/apps/resm/d_oaurl.py index acc81a8..c12ee41 100644 --- a/apps/resm/d_oaurl.py +++ b/apps/resm/d_oaurl.py @@ -45,7 +45,12 @@ async def download_from_url_playwright(url: str, save_path: str) -> tuple[bool, async def on_response(response): nonlocal pdf_content if "application/pdf" in response.headers.get("content-type", ""): - pdf_content = await response.body() + try: + # 确保完全读取响应体 + pdf_content = await response.body() + print(f"✓ 成功捕获 PDF,大小: {len(pdf_content)} bytes") + except Exception as e: + print(f"⚠ 读取 PDF 响应体失败: {e}") page.on("response", on_response) @@ -92,16 +97,26 @@ async def download_from_url_playwright(url: str, save_path: str) -> tuple[bool, if not pdf_content: print("等待 PDF 响应...") try: - await page.wait_for_response( + response = await page.wait_for_response( lambda response: "application/pdf" in response.headers.get("content-type", ""), timeout=15000 ) + # 确保响应体完全加载 + pdf_content = await response.body() + print(f"✓ 通过 wait_for_response 获取 PDF,大小: {len(pdf_content)} bytes") except Exception as e: print(f"⚠ 等待 PDF 响应超时: {e}") if pdf_content: + # 验证文件大小(PDF 通常大于 10KB) + pdf_size = len(pdf_content) + if pdf_size < 10240: + await browser.close() + return False, f"PDF 文件过小: {pdf_size} bytes,可能下载不完整" + with open(save_path, "wb") as f: f.write(pdf_content) + print(f"✓ PDF 已保存到: {save_path},大小: {pdf_size} bytes") await browser.close() return True, "" else: