feat: 恢复状态
This commit is contained in:
parent
df7dbc6717
commit
f922685561
|
|
@ -139,60 +139,70 @@ def get_abstract_from_elsevier(number_of_task:int = 20):
|
||||||
for paper in qs[:number_of_task]:
|
for paper in qs[:number_of_task]:
|
||||||
if not show_task_run(def_name):
|
if not show_task_run(def_name):
|
||||||
break
|
break
|
||||||
|
original_status = paper.fetch_status
|
||||||
|
if original_status == "downloading":
|
||||||
|
return f"paper {paper.id} is already downloading"
|
||||||
|
paper.fetch_status = "downloading"
|
||||||
|
paper.save(update_fields=["fetch_status", "update_time"])
|
||||||
try:
|
try:
|
||||||
res = req.get(
|
|
||||||
f"https://api.elsevier.com/content/article/doi/{paper.doi}",
|
|
||||||
params=params,
|
|
||||||
timeout=(3, 15)
|
|
||||||
)
|
|
||||||
except requests.RequestException:
|
|
||||||
err_msg = "elsevier_request_error"
|
|
||||||
break
|
|
||||||
if res.status_code == 200:
|
|
||||||
xml_str = res.text
|
|
||||||
try:
|
try:
|
||||||
root = etree.fromstring(xml_str.encode("utf-8"))
|
res = req.get(
|
||||||
except etree.XMLSyntaxError:
|
f"https://api.elsevier.com/content/article/doi/{paper.doi}",
|
||||||
paper.save_fail_reason("elsevier_xml_error")
|
params=params,
|
||||||
continue
|
timeout=(3, 15)
|
||||||
|
|
||||||
ns = {"dc": "http://purl.org/dc/elements/1.1/",
|
|
||||||
"ce": "http://www.elsevier.com/xml/common/dtd",
|
|
||||||
"xocs": "http://www.elsevier.com/xml/xocs/dtd",}
|
|
||||||
abstract = root.xpath("//dc:description/text()", namespaces=ns)
|
|
||||||
if abstract:
|
|
||||||
PaperAbstract.objects.update_or_create(
|
|
||||||
paper=paper,
|
|
||||||
defaults={
|
|
||||||
"abstract": abstract[0].strip(),
|
|
||||||
"source": "elsevier"
|
|
||||||
}
|
|
||||||
)
|
)
|
||||||
paper.has_abstract = True
|
except requests.RequestException:
|
||||||
paper.has_abstract_xml = True
|
err_msg = "elsevier_request_error"
|
||||||
paper.fetch_status = "abstract_ready"
|
break
|
||||||
else:
|
if res.status_code == 200:
|
||||||
paper.save_fail_reason("elsevier_abstract_not_found")
|
xml_str = res.text
|
||||||
continue
|
try:
|
||||||
|
root = etree.fromstring(xml_str.encode("utf-8"))
|
||||||
|
except etree.XMLSyntaxError:
|
||||||
|
paper.save_fail_reason("elsevier_xml_error")
|
||||||
|
continue
|
||||||
|
|
||||||
paras = root.xpath("//ce:para", namespaces=ns)
|
ns = {"dc": "http://purl.org/dc/elements/1.1/",
|
||||||
has_fulltext = len(paras) > 0
|
"ce": "http://www.elsevier.com/xml/common/dtd",
|
||||||
if has_fulltext is False:
|
"xocs": "http://www.elsevier.com/xml/xocs/dtd",}
|
||||||
rawtexts = root.xpath("//xocs:rawtext/text()",namespaces=ns)
|
abstract = root.xpath("//dc:description/text()", namespaces=ns)
|
||||||
if rawtexts and len(rawtexts[0].strip()) > 2000:
|
if abstract:
|
||||||
has_fulltext = True
|
PaperAbstract.objects.update_or_create(
|
||||||
if has_fulltext:
|
paper=paper,
|
||||||
paper.has_fulltext = True
|
defaults={
|
||||||
paper.has_fulltext_xml = True
|
"abstract": abstract[0].strip(),
|
||||||
paper.fetch_status = "fulltext_ready"
|
"source": "elsevier"
|
||||||
|
}
|
||||||
paper.save_file_xml(xml_str)
|
)
|
||||||
paper.save(update_fields=["has_abstract",
|
paper.has_abstract = True
|
||||||
"has_abstract_xml", "has_fulltext",
|
paper.has_abstract_xml = True
|
||||||
"has_fulltext_xml", "update_time", "fetch_status"])
|
paper.fetch_status = "abstract_ready"
|
||||||
|
else:
|
||||||
|
paper.save_fail_reason("elsevier_abstract_not_found")
|
||||||
|
continue
|
||||||
|
|
||||||
elif res.status_code == 404:
|
paras = root.xpath("//ce:para", namespaces=ns)
|
||||||
paper.save_fail_reason("elsevier_doi_not_found")
|
has_fulltext = len(paras) > 0
|
||||||
|
if has_fulltext is False:
|
||||||
|
rawtexts = root.xpath("//xocs:rawtext/text()",namespaces=ns)
|
||||||
|
if rawtexts and len(rawtexts[0].strip()) > 2000:
|
||||||
|
has_fulltext = True
|
||||||
|
if has_fulltext:
|
||||||
|
paper.has_fulltext = True
|
||||||
|
paper.has_fulltext_xml = True
|
||||||
|
paper.fetch_status = "fulltext_ready"
|
||||||
|
|
||||||
|
paper.save_file_xml(xml_str)
|
||||||
|
paper.save(update_fields=["has_abstract",
|
||||||
|
"has_abstract_xml", "has_fulltext",
|
||||||
|
"has_fulltext_xml", "update_time", "fetch_status"])
|
||||||
|
|
||||||
|
elif res.status_code == 404:
|
||||||
|
paper.save_fail_reason("elsevier_doi_not_found")
|
||||||
|
finally:
|
||||||
|
if paper.fetch_status == "downloading":
|
||||||
|
paper.fetch_status = original_status
|
||||||
|
paper.save(update_fields=["fetch_status", "update_time"])
|
||||||
|
|
||||||
qs_count = qs.count()
|
qs_count = qs.count()
|
||||||
if show_task_run(def_name) and qs_count > 0:
|
if show_task_run(def_name) and qs_count > 0:
|
||||||
|
|
@ -315,8 +325,7 @@ def download_pdf(paper_id):
|
||||||
msg = save_pdf_from_openalex(paper)
|
msg = save_pdf_from_openalex(paper)
|
||||||
return msg, current_from
|
return msg, current_from
|
||||||
finally:
|
finally:
|
||||||
# 出错时恢复到原状态
|
if paper.fetch_status == "downloading":
|
||||||
if paper.fetch_status == "downloading" and paper.has_fulltext_pdf is False:
|
|
||||||
paper.fetch_status = original_status
|
paper.fetch_status = original_status
|
||||||
paper.save(update_fields=['fetch_status', 'update_time'])
|
paper.save(update_fields=['fetch_status', 'update_time'])
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue