feat: get_abstract_from_elsevier 增加elsevier_abstract_not_found

This commit is contained in:
caoqianming 2026-01-27 14:22:01 +08:00
parent 58780125aa
commit a09369ab17
1 changed files with 7 additions and 5 deletions

View File

@ -76,7 +76,7 @@ def get_abstract_from_elsevier(publication_year: int = None, number_of_task:int
qs = qs.filter(publication_year=publication_year) qs = qs.filter(publication_year=publication_year)
qs = qs.exclude( qs = qs.exclude(
fail_reason="elsevier_doi_not_found" fail_reason="elsevier_doi_not_found"
).order_by("publication_date") ).exclude(fail_reason="elsevier_abstract_not_found").order_by("publication_date")
if not qs.exists(): if not qs.exists():
return "done" return "done"
@ -99,14 +99,13 @@ def get_abstract_from_elsevier(publication_year: int = None, number_of_task:int
except requests.RequestException: except requests.RequestException:
err_msg = "elsevier_request_error" err_msg = "elsevier_request_error"
break break
if res.status_code == 200: if res.status_code == 200:
xml_str = res.text xml_str = res.text
try: try:
root = etree.fromstring(xml_str.encode("utf-8")) root = etree.fromstring(xml_str.encode("utf-8"))
except etree.XMLSyntaxError: except etree.XMLSyntaxError:
paper.fail_reason = "elsevier_xml_error" paper.fail_reason = "elsevier_xml_error"
paper.save(update_fields=["fail_reason"]) paper.save(update_fields=["fail_reason", "update_time"])
continue continue
ns = {"dc": "http://purl.org/dc/elements/1.1/", ns = {"dc": "http://purl.org/dc/elements/1.1/",
@ -123,6 +122,10 @@ def get_abstract_from_elsevier(publication_year: int = None, number_of_task:int
paper.has_abstract = True paper.has_abstract = True
paper.has_abstract_xml = True paper.has_abstract_xml = True
paper.fetch_status = "abstract_ready" paper.fetch_status = "abstract_ready"
else:
paper.fail_reason = "elsevier_abstract_not_found"
paper.save(update_fields=["fail_reason", "update_time"])
continue
paras = root.xpath("//ce:para", namespaces=ns) paras = root.xpath("//ce:para", namespaces=ns)
has_fulltext = len(paras) > 0 has_fulltext = len(paras) > 0
@ -145,12 +148,11 @@ def get_abstract_from_elsevier(publication_year: int = None, number_of_task:int
paper_file = os.path.join(paper_dir, f"{safe_doi}.xml") paper_file = os.path.join(paper_dir, f"{safe_doi}.xml")
with open(paper_file, "wb") as f: with open(paper_file, "wb") as f:
f.write(xml_str.encode("utf-8")) f.write(xml_str.encode("utf-8"))
paper.save(update_fields=["has_abstract", "has_abstract_xml", "has_fulltext", "has_fulltext_xml", "update_time"]) paper.save(update_fields=["has_abstract", "has_abstract_xml", "has_fulltext", "has_fulltext_xml", "update_time"])
elif res.status_code == 404: elif res.status_code == 404:
paper.fail_reason = "elsevier_doi_not_found" paper.fail_reason = "elsevier_doi_not_found"
paper.save(update_fields=["fail_reason"]) paper.save(update_fields=["fail_reason", "update_time"])
if is_elsevier_abstract_task_enabled(): if is_elsevier_abstract_task_enabled():
current_app.send_task( current_app.send_task(
"apps.resm.tasks.get_abstract_from_elsevier", "apps.resm.tasks.get_abstract_from_elsevier",