diff --git a/apps/resm/tasks.py b/apps/resm/tasks.py index 53a89de..3ae2705 100644 --- a/apps/resm/tasks.py +++ b/apps/resm/tasks.py @@ -76,7 +76,7 @@ def get_abstract_from_elsevier(publication_year: int = None, number_of_task:int qs = qs.filter(publication_year=publication_year) qs = qs.exclude( fail_reason="elsevier_doi_not_found" - ).order_by("publication_date") + ).exclude(fail_reason="elsevier_abstract_not_found").order_by("publication_date") if not qs.exists(): return "done" @@ -99,14 +99,13 @@ def get_abstract_from_elsevier(publication_year: int = None, number_of_task:int except requests.RequestException: err_msg = "elsevier_request_error" break - if res.status_code == 200: xml_str = res.text try: root = etree.fromstring(xml_str.encode("utf-8")) except etree.XMLSyntaxError: paper.fail_reason = "elsevier_xml_error" - paper.save(update_fields=["fail_reason"]) + paper.save(update_fields=["fail_reason", "update_time"]) continue ns = {"dc": "http://purl.org/dc/elements/1.1/", @@ -123,6 +122,10 @@ def get_abstract_from_elsevier(publication_year: int = None, number_of_task:int paper.has_abstract = True paper.has_abstract_xml = True paper.fetch_status = "abstract_ready" + else: + paper.fail_reason = "elsevier_abstract_not_found" + paper.save(update_fields=["fail_reason", "update_time"]) + continue paras = root.xpath("//ce:para", namespaces=ns) has_fulltext = len(paras) > 0 @@ -145,12 +148,11 @@ def get_abstract_from_elsevier(publication_year: int = None, number_of_task:int paper_file = os.path.join(paper_dir, f"{safe_doi}.xml") with open(paper_file, "wb") as f: f.write(xml_str.encode("utf-8")) - paper.save(update_fields=["has_abstract", "has_abstract_xml", "has_fulltext", "has_fulltext_xml", "update_time"]) elif res.status_code == 404: paper.fail_reason = "elsevier_doi_not_found" - paper.save(update_fields=["fail_reason"]) + paper.save(update_fields=["fail_reason", "update_time"]) if is_elsevier_abstract_task_enabled(): current_app.send_task( "apps.resm.tasks.get_abstract_from_elsevier",