feat: get_abstract_from_elsevier 增加elsevier_abstract_not_found
This commit is contained in:
parent
58780125aa
commit
a09369ab17
|
|
@ -76,7 +76,7 @@ def get_abstract_from_elsevier(publication_year: int = None, number_of_task:int
|
|||
qs = qs.filter(publication_year=publication_year)
|
||||
qs = qs.exclude(
|
||||
fail_reason="elsevier_doi_not_found"
|
||||
).order_by("publication_date")
|
||||
).exclude(fail_reason="elsevier_abstract_not_found").order_by("publication_date")
|
||||
|
||||
if not qs.exists():
|
||||
return "done"
|
||||
|
|
@ -99,14 +99,13 @@ def get_abstract_from_elsevier(publication_year: int = None, number_of_task:int
|
|||
except requests.RequestException:
|
||||
err_msg = "elsevier_request_error"
|
||||
break
|
||||
|
||||
if res.status_code == 200:
|
||||
xml_str = res.text
|
||||
try:
|
||||
root = etree.fromstring(xml_str.encode("utf-8"))
|
||||
except etree.XMLSyntaxError:
|
||||
paper.fail_reason = "elsevier_xml_error"
|
||||
paper.save(update_fields=["fail_reason"])
|
||||
paper.save(update_fields=["fail_reason", "update_time"])
|
||||
continue
|
||||
|
||||
ns = {"dc": "http://purl.org/dc/elements/1.1/",
|
||||
|
|
@ -123,6 +122,10 @@ def get_abstract_from_elsevier(publication_year: int = None, number_of_task:int
|
|||
paper.has_abstract = True
|
||||
paper.has_abstract_xml = True
|
||||
paper.fetch_status = "abstract_ready"
|
||||
else:
|
||||
paper.fail_reason = "elsevier_abstract_not_found"
|
||||
paper.save(update_fields=["fail_reason", "update_time"])
|
||||
continue
|
||||
|
||||
paras = root.xpath("//ce:para", namespaces=ns)
|
||||
has_fulltext = len(paras) > 0
|
||||
|
|
@ -145,12 +148,11 @@ def get_abstract_from_elsevier(publication_year: int = None, number_of_task:int
|
|||
paper_file = os.path.join(paper_dir, f"{safe_doi}.xml")
|
||||
with open(paper_file, "wb") as f:
|
||||
f.write(xml_str.encode("utf-8"))
|
||||
|
||||
paper.save(update_fields=["has_abstract", "has_abstract_xml", "has_fulltext", "has_fulltext_xml", "update_time"])
|
||||
|
||||
elif res.status_code == 404:
|
||||
paper.fail_reason = "elsevier_doi_not_found"
|
||||
paper.save(update_fields=["fail_reason"])
|
||||
paper.save(update_fields=["fail_reason", "update_time"])
|
||||
if is_elsevier_abstract_task_enabled():
|
||||
current_app.send_task(
|
||||
"apps.resm.tasks.get_abstract_from_elsevier",
|
||||
|
|
|
|||
Loading…
Reference in New Issue