fix(resm): 修复 openalex 链失败遮蔽 PDF 兜底下载
send_download_fulltext_task 原用 fail_reason=None 选取, 导致被 openalex 保活链写过 fail_reason 的论文被永久遮蔽, oa_url/elsevier/scihub 兜底路径 永不尝试。改为 download_pdf 终态打稳定标记 download_pdf_tried, 选取时据此 排除 —— 既解除遮蔽, 又防本链路对同一篇无限重试。 Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
12f97fc47f
commit
75d02814c6
|
|
@ -712,9 +712,12 @@ def can_send_more(max_running):
|
||||||
|
|
||||||
@shared_task(base=CustomTask)
|
@shared_task(base=CustomTask)
|
||||||
def send_download_fulltext_task(number_of_task=100):
|
def send_download_fulltext_task(number_of_task=100):
|
||||||
qs = Paper.objects.filter(has_fulltext=False, fail_reason=None, is_oa=True).exclude(
|
# 只排除"本下载链路已尝试过"的论文(download_pdf 终态会打 download_pdf_tried 标记),
|
||||||
|
# 不再用 fail_reason=None —— 否则被 openalex 保活链失败标记蹭上 fail_reason 的论文会被
|
||||||
|
# 永久遮蔽, 其 oa_url/elsevier/scihub 兜底路径永远不会被尝试。
|
||||||
|
qs = Paper.objects.filter(has_fulltext=False, is_oa=True).exclude(
|
||||||
fetch_status='downloading'
|
fetch_status='downloading'
|
||||||
)
|
).exclude(fail_reason__contains="download_pdf_tried")
|
||||||
if not qs.exists():
|
if not qs.exists():
|
||||||
return "done"
|
return "done"
|
||||||
qs0 = qs.order_by("?")
|
qs0 = qs.order_by("?")
|
||||||
|
|
@ -772,8 +775,12 @@ def download_pdf(paper_id):
|
||||||
# if paper.has_fulltext_pdf is False and cache.get("openalex_api_exceed") is None:
|
# if paper.has_fulltext_pdf is False and cache.get("openalex_api_exceed") is None:
|
||||||
# current_from = "openalex"
|
# current_from = "openalex"
|
||||||
# msg = save_pdf_from_openalex(paper)
|
# msg = save_pdf_from_openalex(paper)
|
||||||
if paper.fail_reason is None and paper.has_fulltext_pdf is False:
|
if paper.has_fulltext_pdf is False:
|
||||||
paper.save_fail_reason(msg)
|
# 终态标记: 无论该论文之前是否已被 openalex 保活链写过 fail_reason, 这里都追加
|
||||||
|
# 一条带 download_pdf_tried 的终态, 供 send_download_fulltext_task 据此排除。
|
||||||
|
# 既防本下载链路(oa_url/elsevier/scihub)对同一篇无限重试, 也避免 openalex 链
|
||||||
|
# 留下的 fail_reason 把本链路的首次尝试给遮蔽掉。
|
||||||
|
paper.save_fail_reason(f"download_pdf_tried:{msg}")
|
||||||
return msg, current_from
|
return msg, current_from
|
||||||
finally:
|
finally:
|
||||||
paper.fetch_end()
|
paper.fetch_end()
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue