feat: download_pdf尝试从openalex下载

This commit is contained in:
caoqianming 2026-01-28 15:18:52 +08:00
parent adfa84d323
commit 67b3a4d8c3
1 changed files with 25 additions and 22 deletions

View File

@ -322,7 +322,7 @@ def download_pdf(paper_id):
res = requests.get(paper.oa_url, headers=headers, timeout=(3, 15))
except requests.RequestException as e:
paper.save_fail_reason("oa_url_request_error")
return f"request_error_final: {str(e)}"
return save_pdf_from_openalex(paper)
if res.status_code == 200:
# 检查是否是PDF文件检查魔数 %PDF 或 content-type
@ -339,25 +339,28 @@ def download_pdf(paper_id):
paper.save(update_fields=["has_fulltext", "has_fulltext_pdf", "fetch_status", "update_time"])
return "success"
else:
paper.save_fail_reason("oa_url_not_pdf")
return "not_pdf"
return save_pdf_from_openalex(paper)
else:
# 尝试openalex下载
try:
res = requests.get(url=f"https://content.openalex.org/works/{paper.openalex_id}.pdf",
params={
"api_key": OPENALEX_KEY
})
except requests.RequestException as e:
paper.save_fail_reason("oa_url_not_pdf;openalex_pdf_error")
return f"openalex_pdf_error: {str(e)}"
if res.status_code == 200:
paper.save_file_pdf(res.content)
paper.has_fulltext = True
paper.has_fulltext_pdf = True
paper.fetch_status = "fulltext_ready"
paper.save(update_fields=["has_fulltext", "has_fulltext_pdf", "fetch_status", "update_time"])
return "success"
else:
paper.save_fail_reason("oa_url_not_pdf;openalex_pdf_error")
return f"openalex_pdf_error: {res.status_code}"
return save_pdf_from_openalex(paper)
def save_pdf_from_openalex(paper:Paper):
# 尝试openalex下载
try:
res = requests.get(url=f"https://content.openalex.org/works/{paper.openalex_id}.pdf",
params={
"api_key": OPENALEX_KEY
})
except requests.RequestException as e:
paper.save_fail_reason("oa_url_not_pdf;openalex_pdf_error")
return f"openalex_pdf_error: {str(e)}"
if res.status_code == 200:
paper.save_file_pdf(res.content)
paper.has_fulltext = True
paper.has_fulltext_pdf = True
paper.fetch_status = "fulltext_ready"
paper.save(update_fields=["has_fulltext", "has_fulltext_pdf", "fetch_status", "update_time"])
return "success"
else:
paper.save_fail_reason("oa_url_not_pdf;openalex_pdf_error")
return f"openalex_pdf_error: {res.status_code}"