feat: 可配置get_max_running

This commit is contained in:
caoqianming 2026-01-29 15:37:14 +08:00
parent b9cbc44d4a
commit 434659ba09
1 changed files with 11 additions and 6 deletions

View File

@ -257,13 +257,17 @@ def get_pdf_from_elsevier(number_of_task=100):
return f'{def_name}, {err_msg}, remaining {qs_count} papers' return f'{def_name}, {err_msg}, remaining {qs_count} papers'
RUNNING_KEY = "download_pdf:running" RUNNING_KEY = "download_pdf:running"
MAX_RUNNING = 50
def get_max_running():
return cache.get("download_pdf:max_running", 100)
def can_send_more(): def can_send_more():
return cache.get(RUNNING_KEY, 0) < MAX_RUNNING return cache.get(RUNNING_KEY, 0) < get_max_running()
@shared_task(base=CustomTask) @shared_task(base=CustomTask)
def send_download_fulltext_task(number_of_task=MAX_RUNNING): def send_download_fulltext_task(number_of_task=100):
if number_of_task != get_max_running():
cache.set("download_pdf:max_running", number_of_task)
qs = Paper.objects.filter(is_oa=True, has_fulltext=False, fail_reason=None) qs = Paper.objects.filter(is_oa=True, has_fulltext=False, fail_reason=None)
if not qs.exists(): if not qs.exists():
return "done" return "done"
@ -277,7 +281,7 @@ def send_download_fulltext_task(number_of_task=MAX_RUNNING):
if paper.oa_url: if paper.oa_url:
# 发送任务前先增加计数,确保计数准确 # 发送任务前先增加计数,确保计数准确
current_count = incr_running() current_count = incr_running()
if current_count > MAX_RUNNING: if current_count > number_of_task:
# 超过限制,回滚计数并停止 # 超过限制,回滚计数并停止
decr_running() decr_running()
break break
@ -292,7 +296,7 @@ def send_download_fulltext_task(number_of_task=MAX_RUNNING):
countdown=countdown, countdown=countdown,
) )
task_count += 1 task_count += 1
return f"sent {task_count} download_pdf tasks, running {cache.get(RUNNING_KEY, 0)}/{MAX_RUNNING}" return f"sent {task_count} download_pdf tasks, running {cache.get(RUNNING_KEY, 0)}/{number_of_task}"
def incr_running(): def incr_running():
@ -391,5 +395,6 @@ def save_pdf_from_openalex(paper:Paper):
cache.set("openalex_api_exceed", True, timeout=3600) cache.set("openalex_api_exceed", True, timeout=3600)
return "Insufficient credits" return "Insufficient credits"
def save_pdf_from_scihub(paper:Paper):
pass
# https://sci.bban.top/pdf/10.1016/j.conbuildmat.2020.121016.pdf?download=true # https://sci.bban.top/pdf/10.1016/j.conbuildmat.2020.121016.pdf?download=true