From b24bb644856cd007a736ca93d36467fc4ba84b9f Mon Sep 17 00:00:00 2001 From: caoqianming Date: Mon, 2 Feb 2026 09:23:36 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20get=5Fabstract=5Ffrom=5Felsevier=20?= =?UTF-8?q?=E4=BD=BF=E7=94=A8instoken?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- apps/resm/tasks.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/apps/resm/tasks.py b/apps/resm/tasks.py index da1ac8e..cdc960d 100644 --- a/apps/resm/tasks.py +++ b/apps/resm/tasks.py @@ -21,6 +21,12 @@ config.retry_http_codes = [429, 500, 503] OPENALEX_KEY = "NPimoE2ecdWmfdhH8abxEp" config.api_key = OPENALEX_KEY +ELSEVIER_APIKEY = 'aa8868cac9e27d6153ab0a0acd7b50bf' +ELSEVIER_HEADERS = { + "X-ELS-Insttoken": "135fa874aea9f0de11cad187ccb4878c", + "X-ELS-APIKey": ELSEVIER_APIKEY, +} + @shared_task(base=CustomTask) def get_paper_meta_from_openalex(publication_year:int, keywords:str="", search:str="", end_year:int=None): cache_key = f"openalex_cursor_{publication_year}_{keywords}{search}" @@ -94,9 +100,6 @@ def get_paper_meta_from_openalex(publication_year:int, keywords:str="", search:s countdown=5 ) - -ELSEVIER_APIKEY = 'aa8868cac9e27d6153ab0a0acd7b50bf' - # 常用的 User-Agent 列表 USER_AGENTS = [ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", @@ -126,14 +129,15 @@ def get_abstract_from_elsevier(number_of_task:int = 20): qs = qs.exclude( fail_reason__contains="elsevier_doi_not_found" ).exclude(fail_reason__contains="elsevier_abstract_not_found" - ).exclude(fetch_status="downloading").order_by("publication_date") + ).exclude(fetch_status="downloading" + ).filter(doi__startswith="10.1016").order_by("publication_date") if not qs.exists(): return "done" params = { - "apiKey": ELSEVIER_APIKEY, - "httpAccept": "text/xml" + "httpAccept": "text/xml", + "view": "FULL" } err_msg = "" count_abs = 0 @@ -150,6 +154,7 @@ def get_abstract_from_elsevier(number_of_task:int = 20): res = req.get( f"https://api.elsevier.com/content/article/doi/{paper.doi}", params=params, + headers = ELSEVIER_HEADERS, timeout=(3, 15) ) except requests.RequestException: @@ -191,7 +196,6 @@ def get_abstract_from_elsevier(number_of_task:int = 20): if has_fulltext: paper.has_fulltext = True paper.has_fulltext_xml = True - save_pdf_from_elsevier(paper) count_fulltext += 1 paper.save_file_xml(xml_str) @@ -351,6 +355,9 @@ def save_pdf_from_oa_url(paper:Paper): if is_pdf and len(res.content) > 1024: # 至少1KB paper.save_file_pdf(res.content, save_obj=True) return "success" + else: + paper.save_fail_reason("oa_url_not_pdf") + return "oa_url_not_pdf" return f"oa_url_pdf_error: {res.status_code}" def save_pdf_from_openalex(paper:Paper): @@ -375,8 +382,7 @@ def save_pdf_from_openalex(paper:Paper): def save_pdf_from_elsevier(paper:Paper): params = { - "apiKey": ELSEVIER_APIKEY, - "httpAccept": "application/pdf" + "httpAccept": "application/pdf" } try: res = requests.get(