feat: 恢复状态

2026-01-29 18:08:32 +08:00 · 2026-01-29 18:08:32 +08:00 · f922685561
parent df7dbc6717
commit f922685561
1 changed files with 60 additions and 51 deletions
--- a/apps/resm/tasks.py
+++ b/apps/resm/tasks.py
@ -139,60 +139,70 @@ def get_abstract_from_elsevier(number_of_task:int = 20):
        for paper in qs[:number_of_task]:
            if not show_task_run(def_name):
                break
            original_status = paper.fetch_status
            if original_status == "downloading":
                return f"paper {paper.id} is already downloading"
            paper.fetch_status = "downloading"
            paper.save(update_fields=["fetch_status", "update_time"])
            try:
                res = req.get(
                    f"https://api.elsevier.com/content/article/doi/{paper.doi}",
                    params=params,
                    timeout=(3, 15)
                )
            except requests.RequestException:
                err_msg = "elsevier_request_error"
                break
            if res.status_code == 200:
                xml_str = res.text
                try:
-                    root = etree.fromstring(xml_str.encode("utf-8"))
+                    res = req.get(
-                except etree.XMLSyntaxError:
+                        f"https://api.elsevier.com/content/article/doi/{paper.doi}",
-                    paper.save_fail_reason("elsevier_xml_error")
+                        params=params,
-                    continue
+                        timeout=(3, 15)
                ns = {"dc": "http://purl.org/dc/elements/1.1/",
                "ce": "http://www.elsevier.com/xml/common/dtd",
                "xocs": "http://www.elsevier.com/xml/xocs/dtd",}
                abstract = root.xpath("//dc:description/text()", namespaces=ns)
                if abstract:
                    PaperAbstract.objects.update_or_create(
                        paper=paper,
                        defaults={
                            "abstract": abstract[0].strip(),
                            "source": "elsevier"
                        }
                    )
-                    paper.has_abstract = True
+                except requests.RequestException:
-                    paper.has_abstract_xml = True
+                    err_msg = "elsevier_request_error"
-                    paper.fetch_status = "abstract_ready"
+                    break
-                else:
+                if res.status_code == 200:
-                    paper.save_fail_reason("elsevier_abstract_not_found")
+                    xml_str = res.text
-                    continue
+                    try:
                        root = etree.fromstring(xml_str.encode("utf-8"))
                    except etree.XMLSyntaxError:
                        paper.save_fail_reason("elsevier_xml_error")
                        continue
-                paras = root.xpath("//ce:para", namespaces=ns)
+                    ns = {"dc": "http://purl.org/dc/elements/1.1/",
-                has_fulltext = len(paras) > 0
+                    "ce": "http://www.elsevier.com/xml/common/dtd",
-                if has_fulltext is False:
+                    "xocs": "http://www.elsevier.com/xml/xocs/dtd",}
-                    rawtexts = root.xpath("//xocs:rawtext/text()",namespaces=ns)
+                    abstract = root.xpath("//dc:description/text()", namespaces=ns)
-                    if rawtexts and len(rawtexts[0].strip()) > 2000:
+                    if abstract:
-                        has_fulltext = True
+                        PaperAbstract.objects.update_or_create(
-                if has_fulltext:
+                            paper=paper,
-                    paper.has_fulltext = True
+                            defaults={
-                    paper.has_fulltext_xml = True
+                                "abstract": abstract[0].strip(),
-                    paper.fetch_status = "fulltext_ready"
+                                "source": "elsevier"
                            }
                        )
                        paper.has_abstract = True
                        paper.has_abstract_xml = True
                        paper.fetch_status = "abstract_ready"
                    else:
                        paper.save_fail_reason("elsevier_abstract_not_found")
                        continue
-                paper.save_file_xml(xml_str)
+                    paras = root.xpath("//ce:para", namespaces=ns)
-                paper.save(update_fields=["has_abstract", 
+                    has_fulltext = len(paras) > 0
-                "has_abstract_xml", "has_fulltext", 
+                    if has_fulltext is False:
-                "has_fulltext_xml", "update_time", "fetch_status"])
+                        rawtexts = root.xpath("//xocs:rawtext/text()",namespaces=ns)
                        if rawtexts and len(rawtexts[0].strip()) > 2000:
                            has_fulltext = True
                    if has_fulltext:
                        paper.has_fulltext = True
                        paper.has_fulltext_xml = True
                        paper.fetch_status = "fulltext_ready"
-            elif res.status_code == 404:
+                    paper.save_file_xml(xml_str)
-                paper.save_fail_reason("elsevier_doi_not_found")
+                    paper.save(update_fields=["has_abstract", 
                    "has_abstract_xml", "has_fulltext", 
                    "has_fulltext_xml", "update_time", "fetch_status"])
                elif res.status_code == 404:
                    paper.save_fail_reason("elsevier_doi_not_found")
            finally:
                if paper.fetch_status == "downloading":
                    paper.fetch_status = original_status
                    paper.save(update_fields=["fetch_status", "update_time"])
    qs_count = qs.count()
    if show_task_run(def_name) and qs_count > 0:
@ -315,8 +325,7 @@ def download_pdf(paper_id):
            msg = save_pdf_from_openalex(paper)
        return msg, current_from
    finally:
-        # 出错时恢复到原状态
+        if paper.fetch_status == "downloading":
        if paper.fetch_status == "downloading" and paper.has_fulltext_pdf is False:
            paper.fetch_status = original_status
            paper.save(update_fields=['fetch_status', 'update_time'])