feat: get_abstract_from_elsevier 增加elsevier_abstract_not_found
This commit is contained in:
parent
58780125aa
commit
a09369ab17
|
|
@ -76,7 +76,7 @@ def get_abstract_from_elsevier(publication_year: int = None, number_of_task:int
|
||||||
qs = qs.filter(publication_year=publication_year)
|
qs = qs.filter(publication_year=publication_year)
|
||||||
qs = qs.exclude(
|
qs = qs.exclude(
|
||||||
fail_reason="elsevier_doi_not_found"
|
fail_reason="elsevier_doi_not_found"
|
||||||
).order_by("publication_date")
|
).exclude(fail_reason="elsevier_abstract_not_found").order_by("publication_date")
|
||||||
|
|
||||||
if not qs.exists():
|
if not qs.exists():
|
||||||
return "done"
|
return "done"
|
||||||
|
|
@ -99,14 +99,13 @@ def get_abstract_from_elsevier(publication_year: int = None, number_of_task:int
|
||||||
except requests.RequestException:
|
except requests.RequestException:
|
||||||
err_msg = "elsevier_request_error"
|
err_msg = "elsevier_request_error"
|
||||||
break
|
break
|
||||||
|
|
||||||
if res.status_code == 200:
|
if res.status_code == 200:
|
||||||
xml_str = res.text
|
xml_str = res.text
|
||||||
try:
|
try:
|
||||||
root = etree.fromstring(xml_str.encode("utf-8"))
|
root = etree.fromstring(xml_str.encode("utf-8"))
|
||||||
except etree.XMLSyntaxError:
|
except etree.XMLSyntaxError:
|
||||||
paper.fail_reason = "elsevier_xml_error"
|
paper.fail_reason = "elsevier_xml_error"
|
||||||
paper.save(update_fields=["fail_reason"])
|
paper.save(update_fields=["fail_reason", "update_time"])
|
||||||
continue
|
continue
|
||||||
|
|
||||||
ns = {"dc": "http://purl.org/dc/elements/1.1/",
|
ns = {"dc": "http://purl.org/dc/elements/1.1/",
|
||||||
|
|
@ -123,6 +122,10 @@ def get_abstract_from_elsevier(publication_year: int = None, number_of_task:int
|
||||||
paper.has_abstract = True
|
paper.has_abstract = True
|
||||||
paper.has_abstract_xml = True
|
paper.has_abstract_xml = True
|
||||||
paper.fetch_status = "abstract_ready"
|
paper.fetch_status = "abstract_ready"
|
||||||
|
else:
|
||||||
|
paper.fail_reason = "elsevier_abstract_not_found"
|
||||||
|
paper.save(update_fields=["fail_reason", "update_time"])
|
||||||
|
continue
|
||||||
|
|
||||||
paras = root.xpath("//ce:para", namespaces=ns)
|
paras = root.xpath("//ce:para", namespaces=ns)
|
||||||
has_fulltext = len(paras) > 0
|
has_fulltext = len(paras) > 0
|
||||||
|
|
@ -145,12 +148,11 @@ def get_abstract_from_elsevier(publication_year: int = None, number_of_task:int
|
||||||
paper_file = os.path.join(paper_dir, f"{safe_doi}.xml")
|
paper_file = os.path.join(paper_dir, f"{safe_doi}.xml")
|
||||||
with open(paper_file, "wb") as f:
|
with open(paper_file, "wb") as f:
|
||||||
f.write(xml_str.encode("utf-8"))
|
f.write(xml_str.encode("utf-8"))
|
||||||
|
|
||||||
paper.save(update_fields=["has_abstract", "has_abstract_xml", "has_fulltext", "has_fulltext_xml", "update_time"])
|
paper.save(update_fields=["has_abstract", "has_abstract_xml", "has_fulltext", "has_fulltext_xml", "update_time"])
|
||||||
|
|
||||||
elif res.status_code == 404:
|
elif res.status_code == 404:
|
||||||
paper.fail_reason = "elsevier_doi_not_found"
|
paper.fail_reason = "elsevier_doi_not_found"
|
||||||
paper.save(update_fields=["fail_reason"])
|
paper.save(update_fields=["fail_reason", "update_time"])
|
||||||
if is_elsevier_abstract_task_enabled():
|
if is_elsevier_abstract_task_enabled():
|
||||||
current_app.send_task(
|
current_app.send_task(
|
||||||
"apps.resm.tasks.get_abstract_from_elsevier",
|
"apps.resm.tasks.get_abstract_from_elsevier",
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue