diff --git a/apps/resm/tasks.py b/apps/resm/tasks.py index 1ebfe10..53a89de 100644 --- a/apps/resm/tasks.py +++ b/apps/resm/tasks.py @@ -25,6 +25,7 @@ def get_paper_meta_from_openalex(publication_year:int, search_key:str, end_year: cache_cursor = cache.get(cache_key, "*") pager = Works().filter( publication_year=publication_year, + has_doi=True, type="article" # 将 type 移到 filter 中 ).search(search_key).select([ "id", "doi", "title", "publication_date", @@ -63,8 +64,13 @@ def get_paper_meta_from_openalex(publication_year:int, search_key:str, end_year: ELSEVIER_APIKEY = 'aa8868cac9e27d6153ab0a0acd7b50bf' +def is_elsevier_abstract_task_enabled(): + return cache.get("elsevier_abstract_task_enabled", True) + @shared_task(base=CustomTask) def get_abstract_from_elsevier(publication_year: int = None, number_of_task:int = 100): + if not is_elsevier_abstract_task_enabled(): + return "stoped" qs = Paper.objects.filter(has_abstract=False) if publication_year is not None: qs = qs.filter(publication_year=publication_year) @@ -82,6 +88,8 @@ def get_abstract_from_elsevier(publication_year: int = None, number_of_task:int err_msg = "" with requests.Session() as req: for paper in qs[:number_of_task]: + if not is_elsevier_abstract_task_enabled(): + break try: res = req.get( f"https://api.elsevier.com/content/article/doi/{paper.doi}", @@ -143,13 +151,13 @@ def get_abstract_from_elsevier(publication_year: int = None, number_of_task:int elif res.status_code == 404: paper.fail_reason = "elsevier_doi_not_found" paper.save(update_fields=["fail_reason"]) - - current_app.send_task( - "apps.resm.tasks.get_abstract_from_elsevier", - kwargs={ - "publication_year": publication_year, - "number_of_task": number_of_task, - }, - countdown=5, - ) + if is_elsevier_abstract_task_enabled(): + current_app.send_task( + "apps.resm.tasks.get_abstract_from_elsevier", + kwargs={ + "publication_year": publication_year, + "number_of_task": number_of_task, + }, + countdown=5, + ) return f'{err_msg}, remaining {qs.count()} papers' \ No newline at end of file