From 352966946ee1e91703ab1f004232e8b6383b2c95 Mon Sep 17 00:00:00 2001 From: caoqianming Date: Tue, 10 Feb 2026 11:14:30 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E7=A1=AE=E4=BF=9Dpdf=E4=B8=8B=E8=BD=BD?= =?UTF-8?q?=E5=AE=8C=E6=95=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- apps/resm/d_oaurl.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/apps/resm/d_oaurl.py b/apps/resm/d_oaurl.py index acc81a8..c12ee41 100644 --- a/apps/resm/d_oaurl.py +++ b/apps/resm/d_oaurl.py @@ -45,7 +45,12 @@ async def download_from_url_playwright(url: str, save_path: str) -> tuple[bool, async def on_response(response): nonlocal pdf_content if "application/pdf" in response.headers.get("content-type", ""): - pdf_content = await response.body() + try: + # 确保完全读取响应体 + pdf_content = await response.body() + print(f"✓ 成功捕获 PDF,大小: {len(pdf_content)} bytes") + except Exception as e: + print(f"⚠ 读取 PDF 响应体失败: {e}") page.on("response", on_response) @@ -92,16 +97,26 @@ async def download_from_url_playwright(url: str, save_path: str) -> tuple[bool, if not pdf_content: print("等待 PDF 响应...") try: - await page.wait_for_response( + response = await page.wait_for_response( lambda response: "application/pdf" in response.headers.get("content-type", ""), timeout=15000 ) + # 确保响应体完全加载 + pdf_content = await response.body() + print(f"✓ 通过 wait_for_response 获取 PDF,大小: {len(pdf_content)} bytes") except Exception as e: print(f"⚠ 等待 PDF 响应超时: {e}") if pdf_content: + # 验证文件大小(PDF 通常大于 10KB) + pdf_size = len(pdf_content) + if pdf_size < 10240: + await browser.close() + return False, f"PDF 文件过小: {pdf_size} bytes,可能下载不完整" + with open(save_path, "wb") as f: f.write(pdf_content) + print(f"✓ PDF 已保存到: {save_path},大小: {pdf_size} bytes") await browser.close() return True, "" else: