feat: 恢复状态
This commit is contained in:
parent
df7dbc6717
commit
f922685561
|
|
@ -139,60 +139,70 @@ def get_abstract_from_elsevier(number_of_task:int = 20):
|
|||
for paper in qs[:number_of_task]:
|
||||
if not show_task_run(def_name):
|
||||
break
|
||||
original_status = paper.fetch_status
|
||||
if original_status == "downloading":
|
||||
return f"paper {paper.id} is already downloading"
|
||||
paper.fetch_status = "downloading"
|
||||
paper.save(update_fields=["fetch_status", "update_time"])
|
||||
try:
|
||||
res = req.get(
|
||||
f"https://api.elsevier.com/content/article/doi/{paper.doi}",
|
||||
params=params,
|
||||
timeout=(3, 15)
|
||||
)
|
||||
except requests.RequestException:
|
||||
err_msg = "elsevier_request_error"
|
||||
break
|
||||
if res.status_code == 200:
|
||||
xml_str = res.text
|
||||
try:
|
||||
root = etree.fromstring(xml_str.encode("utf-8"))
|
||||
except etree.XMLSyntaxError:
|
||||
paper.save_fail_reason("elsevier_xml_error")
|
||||
continue
|
||||
|
||||
ns = {"dc": "http://purl.org/dc/elements/1.1/",
|
||||
"ce": "http://www.elsevier.com/xml/common/dtd",
|
||||
"xocs": "http://www.elsevier.com/xml/xocs/dtd",}
|
||||
abstract = root.xpath("//dc:description/text()", namespaces=ns)
|
||||
if abstract:
|
||||
PaperAbstract.objects.update_or_create(
|
||||
paper=paper,
|
||||
defaults={
|
||||
"abstract": abstract[0].strip(),
|
||||
"source": "elsevier"
|
||||
}
|
||||
res = req.get(
|
||||
f"https://api.elsevier.com/content/article/doi/{paper.doi}",
|
||||
params=params,
|
||||
timeout=(3, 15)
|
||||
)
|
||||
paper.has_abstract = True
|
||||
paper.has_abstract_xml = True
|
||||
paper.fetch_status = "abstract_ready"
|
||||
else:
|
||||
paper.save_fail_reason("elsevier_abstract_not_found")
|
||||
continue
|
||||
except requests.RequestException:
|
||||
err_msg = "elsevier_request_error"
|
||||
break
|
||||
if res.status_code == 200:
|
||||
xml_str = res.text
|
||||
try:
|
||||
root = etree.fromstring(xml_str.encode("utf-8"))
|
||||
except etree.XMLSyntaxError:
|
||||
paper.save_fail_reason("elsevier_xml_error")
|
||||
continue
|
||||
|
||||
paras = root.xpath("//ce:para", namespaces=ns)
|
||||
has_fulltext = len(paras) > 0
|
||||
if has_fulltext is False:
|
||||
rawtexts = root.xpath("//xocs:rawtext/text()",namespaces=ns)
|
||||
if rawtexts and len(rawtexts[0].strip()) > 2000:
|
||||
has_fulltext = True
|
||||
if has_fulltext:
|
||||
paper.has_fulltext = True
|
||||
paper.has_fulltext_xml = True
|
||||
paper.fetch_status = "fulltext_ready"
|
||||
|
||||
paper.save_file_xml(xml_str)
|
||||
paper.save(update_fields=["has_abstract",
|
||||
"has_abstract_xml", "has_fulltext",
|
||||
"has_fulltext_xml", "update_time", "fetch_status"])
|
||||
ns = {"dc": "http://purl.org/dc/elements/1.1/",
|
||||
"ce": "http://www.elsevier.com/xml/common/dtd",
|
||||
"xocs": "http://www.elsevier.com/xml/xocs/dtd",}
|
||||
abstract = root.xpath("//dc:description/text()", namespaces=ns)
|
||||
if abstract:
|
||||
PaperAbstract.objects.update_or_create(
|
||||
paper=paper,
|
||||
defaults={
|
||||
"abstract": abstract[0].strip(),
|
||||
"source": "elsevier"
|
||||
}
|
||||
)
|
||||
paper.has_abstract = True
|
||||
paper.has_abstract_xml = True
|
||||
paper.fetch_status = "abstract_ready"
|
||||
else:
|
||||
paper.save_fail_reason("elsevier_abstract_not_found")
|
||||
continue
|
||||
|
||||
elif res.status_code == 404:
|
||||
paper.save_fail_reason("elsevier_doi_not_found")
|
||||
paras = root.xpath("//ce:para", namespaces=ns)
|
||||
has_fulltext = len(paras) > 0
|
||||
if has_fulltext is False:
|
||||
rawtexts = root.xpath("//xocs:rawtext/text()",namespaces=ns)
|
||||
if rawtexts and len(rawtexts[0].strip()) > 2000:
|
||||
has_fulltext = True
|
||||
if has_fulltext:
|
||||
paper.has_fulltext = True
|
||||
paper.has_fulltext_xml = True
|
||||
paper.fetch_status = "fulltext_ready"
|
||||
|
||||
paper.save_file_xml(xml_str)
|
||||
paper.save(update_fields=["has_abstract",
|
||||
"has_abstract_xml", "has_fulltext",
|
||||
"has_fulltext_xml", "update_time", "fetch_status"])
|
||||
|
||||
elif res.status_code == 404:
|
||||
paper.save_fail_reason("elsevier_doi_not_found")
|
||||
finally:
|
||||
if paper.fetch_status == "downloading":
|
||||
paper.fetch_status = original_status
|
||||
paper.save(update_fields=["fetch_status", "update_time"])
|
||||
|
||||
qs_count = qs.count()
|
||||
if show_task_run(def_name) and qs_count > 0:
|
||||
|
|
@ -315,8 +325,7 @@ def download_pdf(paper_id):
|
|||
msg = save_pdf_from_openalex(paper)
|
||||
return msg, current_from
|
||||
finally:
|
||||
# 出错时恢复到原状态
|
||||
if paper.fetch_status == "downloading" and paper.has_fulltext_pdf is False:
|
||||
if paper.fetch_status == "downloading":
|
||||
paper.fetch_status = original_status
|
||||
paper.save(update_fields=['fetch_status', 'update_time'])
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue