From 67b3a4d8c3d8d2db5e12a7ad0addea44a7d2a4c4 Mon Sep 17 00:00:00 2001 From: caoqianming Date: Wed, 28 Jan 2026 15:18:52 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20download=5Fpdf=E5=B0=9D=E8=AF=95?= =?UTF-8?q?=E4=BB=8Eopenalex=E4=B8=8B=E8=BD=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- apps/resm/tasks.py | 47 ++++++++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/apps/resm/tasks.py b/apps/resm/tasks.py index e31f6e8..bc1437b 100644 --- a/apps/resm/tasks.py +++ b/apps/resm/tasks.py @@ -322,7 +322,7 @@ def download_pdf(paper_id): res = requests.get(paper.oa_url, headers=headers, timeout=(3, 15)) except requests.RequestException as e: paper.save_fail_reason("oa_url_request_error") - return f"request_error_final: {str(e)}" + return save_pdf_from_openalex(paper) if res.status_code == 200: # 检查是否是PDF文件:检查魔数 %PDF 或 content-type @@ -339,25 +339,28 @@ def download_pdf(paper_id): paper.save(update_fields=["has_fulltext", "has_fulltext_pdf", "fetch_status", "update_time"]) return "success" else: - paper.save_fail_reason("oa_url_not_pdf") - return "not_pdf" + return save_pdf_from_openalex(paper) else: - # 尝试openalex下载 - try: - res = requests.get(url=f"https://content.openalex.org/works/{paper.openalex_id}.pdf", - params={ - "api_key": OPENALEX_KEY - }) - except requests.RequestException as e: - paper.save_fail_reason("oa_url_not_pdf;openalex_pdf_error") - return f"openalex_pdf_error: {str(e)}" - if res.status_code == 200: - paper.save_file_pdf(res.content) - paper.has_fulltext = True - paper.has_fulltext_pdf = True - paper.fetch_status = "fulltext_ready" - paper.save(update_fields=["has_fulltext", "has_fulltext_pdf", "fetch_status", "update_time"]) - return "success" - else: - paper.save_fail_reason("oa_url_not_pdf;openalex_pdf_error") - return f"openalex_pdf_error: {res.status_code}" \ No newline at end of file + return save_pdf_from_openalex(paper) + + +def save_pdf_from_openalex(paper:Paper): + # 尝试openalex下载 + try: + res = requests.get(url=f"https://content.openalex.org/works/{paper.openalex_id}.pdf", + params={ + "api_key": OPENALEX_KEY + }) + except requests.RequestException as e: + paper.save_fail_reason("oa_url_not_pdf;openalex_pdf_error") + return f"openalex_pdf_error: {str(e)}" + if res.status_code == 200: + paper.save_file_pdf(res.content) + paper.has_fulltext = True + paper.has_fulltext_pdf = True + paper.fetch_status = "fulltext_ready" + paper.save(update_fields=["has_fulltext", "has_fulltext_pdf", "fetch_status", "update_time"]) + return "success" + else: + paper.save_fail_reason("oa_url_not_pdf;openalex_pdf_error") + return f"openalex_pdf_error: {res.status_code}" \ No newline at end of file