feat: 优化 get_pdf_from_openalex

This commit is contained in:
caoqianming 2026-02-12 10:25:13 +08:00
parent d5f8e43751
commit b3ea39757e
1 changed files with 5 additions and 2 deletions

View File

@ -156,14 +156,17 @@ def get_pdf_from_openalex(number_of_task: int =10):
fetch_status="downloading").exclude(fail_reason__contains="openalex_pdf_not_found")[:number_of_task] fetch_status="downloading").exclude(fail_reason__contains="openalex_pdf_not_found")[:number_of_task]
if not qs.exists(): if not qs.exists():
return "done" return "done"
msg = ""
for paper in qs: for paper in qs:
if not show_task_run(def_name): if not show_task_run(def_name):
break break
paper.fetch("downloading") paper.fetch("downloading")
save_pdf_from_openalex(paper) msg = save_pdf_from_openalex(paper)
paper.fetch_end() paper.fetch_end()
if paper.has_fulltext_pdf: if paper.has_fulltext_pdf:
count += 1 count += 1
if cache.get("openalex_api_exceed"):
break
countdown = 2 countdown = 2
if cache.get("openalex_api_exceed"): if cache.get("openalex_api_exceed"):
countdown = 5 * 60 # 5分钟后重试 countdown = 5 * 60 # 5分钟后重试
@ -175,7 +178,7 @@ def get_pdf_from_openalex(number_of_task: int =10):
}, },
countdown=countdown, countdown=countdown,
) )
return count return msg, count
@shared_task(base=CustomTask) @shared_task(base=CustomTask)
def get_abstract_from_elsevier(number_of_task:int = 20, exclude_failed:bool=True): def get_abstract_from_elsevier(number_of_task:int = 20, exclude_failed:bool=True):