feat: download_pdf尝试从openalex下载
This commit is contained in:
parent
adfa84d323
commit
67b3a4d8c3
|
|
@ -322,7 +322,7 @@ def download_pdf(paper_id):
|
||||||
res = requests.get(paper.oa_url, headers=headers, timeout=(3, 15))
|
res = requests.get(paper.oa_url, headers=headers, timeout=(3, 15))
|
||||||
except requests.RequestException as e:
|
except requests.RequestException as e:
|
||||||
paper.save_fail_reason("oa_url_request_error")
|
paper.save_fail_reason("oa_url_request_error")
|
||||||
return f"request_error_final: {str(e)}"
|
return save_pdf_from_openalex(paper)
|
||||||
|
|
||||||
if res.status_code == 200:
|
if res.status_code == 200:
|
||||||
# 检查是否是PDF文件:检查魔数 %PDF 或 content-type
|
# 检查是否是PDF文件:检查魔数 %PDF 或 content-type
|
||||||
|
|
@ -339,9 +339,12 @@ def download_pdf(paper_id):
|
||||||
paper.save(update_fields=["has_fulltext", "has_fulltext_pdf", "fetch_status", "update_time"])
|
paper.save(update_fields=["has_fulltext", "has_fulltext_pdf", "fetch_status", "update_time"])
|
||||||
return "success"
|
return "success"
|
||||||
else:
|
else:
|
||||||
paper.save_fail_reason("oa_url_not_pdf")
|
return save_pdf_from_openalex(paper)
|
||||||
return "not_pdf"
|
|
||||||
else:
|
else:
|
||||||
|
return save_pdf_from_openalex(paper)
|
||||||
|
|
||||||
|
|
||||||
|
def save_pdf_from_openalex(paper:Paper):
|
||||||
# 尝试openalex下载
|
# 尝试openalex下载
|
||||||
try:
|
try:
|
||||||
res = requests.get(url=f"https://content.openalex.org/works/{paper.openalex_id}.pdf",
|
res = requests.get(url=f"https://content.openalex.org/works/{paper.openalex_id}.pdf",
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue