feat: 确保pdf下载完整
This commit is contained in:
parent
b9b469f917
commit
352966946e
|
|
@ -45,7 +45,12 @@ async def download_from_url_playwright(url: str, save_path: str) -> tuple[bool,
|
||||||
async def on_response(response):
|
async def on_response(response):
|
||||||
nonlocal pdf_content
|
nonlocal pdf_content
|
||||||
if "application/pdf" in response.headers.get("content-type", ""):
|
if "application/pdf" in response.headers.get("content-type", ""):
|
||||||
pdf_content = await response.body()
|
try:
|
||||||
|
# 确保完全读取响应体
|
||||||
|
pdf_content = await response.body()
|
||||||
|
print(f"✓ 成功捕获 PDF,大小: {len(pdf_content)} bytes")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"⚠ 读取 PDF 响应体失败: {e}")
|
||||||
|
|
||||||
page.on("response", on_response)
|
page.on("response", on_response)
|
||||||
|
|
||||||
|
|
@ -92,16 +97,26 @@ async def download_from_url_playwright(url: str, save_path: str) -> tuple[bool,
|
||||||
if not pdf_content:
|
if not pdf_content:
|
||||||
print("等待 PDF 响应...")
|
print("等待 PDF 响应...")
|
||||||
try:
|
try:
|
||||||
await page.wait_for_response(
|
response = await page.wait_for_response(
|
||||||
lambda response: "application/pdf" in response.headers.get("content-type", ""),
|
lambda response: "application/pdf" in response.headers.get("content-type", ""),
|
||||||
timeout=15000
|
timeout=15000
|
||||||
)
|
)
|
||||||
|
# 确保响应体完全加载
|
||||||
|
pdf_content = await response.body()
|
||||||
|
print(f"✓ 通过 wait_for_response 获取 PDF,大小: {len(pdf_content)} bytes")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"⚠ 等待 PDF 响应超时: {e}")
|
print(f"⚠ 等待 PDF 响应超时: {e}")
|
||||||
|
|
||||||
if pdf_content:
|
if pdf_content:
|
||||||
|
# 验证文件大小(PDF 通常大于 10KB)
|
||||||
|
pdf_size = len(pdf_content)
|
||||||
|
if pdf_size < 10240:
|
||||||
|
await browser.close()
|
||||||
|
return False, f"PDF 文件过小: {pdf_size} bytes,可能下载不完整"
|
||||||
|
|
||||||
with open(save_path, "wb") as f:
|
with open(save_path, "wb") as f:
|
||||||
f.write(pdf_content)
|
f.write(pdf_content)
|
||||||
|
print(f"✓ PDF 已保存到: {save_path},大小: {pdf_size} bytes")
|
||||||
await browser.close()
|
await browser.close()
|
||||||
return True, ""
|
return True, ""
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue