diff --git a/apps/resm/tasks.py b/apps/resm/tasks.py index 1507f80..06fe7a3 100644 --- a/apps/resm/tasks.py +++ b/apps/resm/tasks.py @@ -257,13 +257,17 @@ def get_pdf_from_elsevier(number_of_task=100): return f'{def_name}, {err_msg}, remaining {qs_count} papers' RUNNING_KEY = "download_pdf:running" -MAX_RUNNING = 50 + +def get_max_running(): + return cache.get("download_pdf:max_running", 100) def can_send_more(): - return cache.get(RUNNING_KEY, 0) < MAX_RUNNING + return cache.get(RUNNING_KEY, 0) < get_max_running() @shared_task(base=CustomTask) -def send_download_fulltext_task(number_of_task=MAX_RUNNING): +def send_download_fulltext_task(number_of_task=100): + if number_of_task != get_max_running(): + cache.set("download_pdf:max_running", number_of_task) qs = Paper.objects.filter(is_oa=True, has_fulltext=False, fail_reason=None) if not qs.exists(): return "done" @@ -277,7 +281,7 @@ def send_download_fulltext_task(number_of_task=MAX_RUNNING): if paper.oa_url: # 发送任务前先增加计数,确保计数准确 current_count = incr_running() - if current_count > MAX_RUNNING: + if current_count > number_of_task: # 超过限制,回滚计数并停止 decr_running() break @@ -292,7 +296,7 @@ def send_download_fulltext_task(number_of_task=MAX_RUNNING): countdown=countdown, ) task_count += 1 - return f"sent {task_count} download_pdf tasks, running {cache.get(RUNNING_KEY, 0)}/{MAX_RUNNING}" + return f"sent {task_count} download_pdf tasks, running {cache.get(RUNNING_KEY, 0)}/{number_of_task}" def incr_running(): @@ -391,5 +395,6 @@ def save_pdf_from_openalex(paper:Paper): cache.set("openalex_api_exceed", True, timeout=3600) return "Insufficient credits" - +def save_pdf_from_scihub(paper:Paper): + pass # https://sci.bban.top/pdf/10.1016/j.conbuildmat.2020.121016.pdf?download=true \ No newline at end of file