diff --git a/apps/resm/tasks.py b/apps/resm/tasks.py index e31f6e8..bc1437b 100644 --- a/apps/resm/tasks.py +++ b/apps/resm/tasks.py @@ -322,7 +322,7 @@ def download_pdf(paper_id): res = requests.get(paper.oa_url, headers=headers, timeout=(3, 15)) except requests.RequestException as e: paper.save_fail_reason("oa_url_request_error") - return f"request_error_final: {str(e)}" + return save_pdf_from_openalex(paper) if res.status_code == 200: # 检查是否是PDF文件:检查魔数 %PDF 或 content-type @@ -339,25 +339,28 @@ def download_pdf(paper_id): paper.save(update_fields=["has_fulltext", "has_fulltext_pdf", "fetch_status", "update_time"]) return "success" else: - paper.save_fail_reason("oa_url_not_pdf") - return "not_pdf" + return save_pdf_from_openalex(paper) else: - # 尝试openalex下载 - try: - res = requests.get(url=f"https://content.openalex.org/works/{paper.openalex_id}.pdf", - params={ - "api_key": OPENALEX_KEY - }) - except requests.RequestException as e: - paper.save_fail_reason("oa_url_not_pdf;openalex_pdf_error") - return f"openalex_pdf_error: {str(e)}" - if res.status_code == 200: - paper.save_file_pdf(res.content) - paper.has_fulltext = True - paper.has_fulltext_pdf = True - paper.fetch_status = "fulltext_ready" - paper.save(update_fields=["has_fulltext", "has_fulltext_pdf", "fetch_status", "update_time"]) - return "success" - else: - paper.save_fail_reason("oa_url_not_pdf;openalex_pdf_error") - return f"openalex_pdf_error: {res.status_code}" \ No newline at end of file + return save_pdf_from_openalex(paper) + + +def save_pdf_from_openalex(paper:Paper): + # 尝试openalex下载 + try: + res = requests.get(url=f"https://content.openalex.org/works/{paper.openalex_id}.pdf", + params={ + "api_key": OPENALEX_KEY + }) + except requests.RequestException as e: + paper.save_fail_reason("oa_url_not_pdf;openalex_pdf_error") + return f"openalex_pdf_error: {str(e)}" + if res.status_code == 200: + paper.save_file_pdf(res.content) + paper.has_fulltext = True + paper.has_fulltext_pdf = True + paper.fetch_status = "fulltext_ready" + paper.save(update_fields=["has_fulltext", "has_fulltext_pdf", "fetch_status", "update_time"]) + return "success" + else: + paper.save_fail_reason("oa_url_not_pdf;openalex_pdf_error") + return f"openalex_pdf_error: {res.status_code}" \ No newline at end of file