From a09369ab179f6ebd518f3ed80971fd59b93f580b Mon Sep 17 00:00:00 2001 From: caoqianming Date: Tue, 27 Jan 2026 14:22:01 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20get=5Fabstract=5Ffrom=5Felsevier=20?= =?UTF-8?q?=E5=A2=9E=E5=8A=A0elsevier=5Fabstract=5Fnot=5Ffound?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- apps/resm/tasks.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/apps/resm/tasks.py b/apps/resm/tasks.py index 53a89de..3ae2705 100644 --- a/apps/resm/tasks.py +++ b/apps/resm/tasks.py @@ -76,7 +76,7 @@ def get_abstract_from_elsevier(publication_year: int = None, number_of_task:int qs = qs.filter(publication_year=publication_year) qs = qs.exclude( fail_reason="elsevier_doi_not_found" - ).order_by("publication_date") + ).exclude(fail_reason="elsevier_abstract_not_found").order_by("publication_date") if not qs.exists(): return "done" @@ -99,14 +99,13 @@ def get_abstract_from_elsevier(publication_year: int = None, number_of_task:int except requests.RequestException: err_msg = "elsevier_request_error" break - if res.status_code == 200: xml_str = res.text try: root = etree.fromstring(xml_str.encode("utf-8")) except etree.XMLSyntaxError: paper.fail_reason = "elsevier_xml_error" - paper.save(update_fields=["fail_reason"]) + paper.save(update_fields=["fail_reason", "update_time"]) continue ns = {"dc": "http://purl.org/dc/elements/1.1/", @@ -123,6 +122,10 @@ def get_abstract_from_elsevier(publication_year: int = None, number_of_task:int paper.has_abstract = True paper.has_abstract_xml = True paper.fetch_status = "abstract_ready" + else: + paper.fail_reason = "elsevier_abstract_not_found" + paper.save(update_fields=["fail_reason", "update_time"]) + continue paras = root.xpath("//ce:para", namespaces=ns) has_fulltext = len(paras) > 0 @@ -145,12 +148,11 @@ def get_abstract_from_elsevier(publication_year: int = None, number_of_task:int paper_file = os.path.join(paper_dir, f"{safe_doi}.xml") with open(paper_file, "wb") as f: f.write(xml_str.encode("utf-8")) - paper.save(update_fields=["has_abstract", "has_abstract_xml", "has_fulltext", "has_fulltext_xml", "update_time"]) elif res.status_code == 404: paper.fail_reason = "elsevier_doi_not_found" - paper.save(update_fields=["fail_reason"]) + paper.save(update_fields=["fail_reason", "update_time"]) if is_elsevier_abstract_task_enabled(): current_app.send_task( "apps.resm.tasks.get_abstract_from_elsevier",