fix(resm): 修复 openalex 链失败遮蔽 PDF 兜底下载
send_download_fulltext_task 原用 fail_reason=None 选取, 导致被 openalex 保活链写过 fail_reason 的论文被永久遮蔽, oa_url/elsevier/scihub 兜底路径 永不尝试。改为 download_pdf 终态打稳定标记 download_pdf_tried, 选取时据此 排除 —— 既解除遮蔽, 又防本链路对同一篇无限重试。 Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
12f97fc47f
commit
75d02814c6
|
|
@ -712,9 +712,12 @@ def can_send_more(max_running):
|
|||
|
||||
@shared_task(base=CustomTask)
|
||||
def send_download_fulltext_task(number_of_task=100):
|
||||
qs = Paper.objects.filter(has_fulltext=False, fail_reason=None, is_oa=True).exclude(
|
||||
# 只排除"本下载链路已尝试过"的论文(download_pdf 终态会打 download_pdf_tried 标记),
|
||||
# 不再用 fail_reason=None —— 否则被 openalex 保活链失败标记蹭上 fail_reason 的论文会被
|
||||
# 永久遮蔽, 其 oa_url/elsevier/scihub 兜底路径永远不会被尝试。
|
||||
qs = Paper.objects.filter(has_fulltext=False, is_oa=True).exclude(
|
||||
fetch_status='downloading'
|
||||
)
|
||||
).exclude(fail_reason__contains="download_pdf_tried")
|
||||
if not qs.exists():
|
||||
return "done"
|
||||
qs0 = qs.order_by("?")
|
||||
|
|
@ -772,8 +775,12 @@ def download_pdf(paper_id):
|
|||
# if paper.has_fulltext_pdf is False and cache.get("openalex_api_exceed") is None:
|
||||
# current_from = "openalex"
|
||||
# msg = save_pdf_from_openalex(paper)
|
||||
if paper.fail_reason is None and paper.has_fulltext_pdf is False:
|
||||
paper.save_fail_reason(msg)
|
||||
if paper.has_fulltext_pdf is False:
|
||||
# 终态标记: 无论该论文之前是否已被 openalex 保活链写过 fail_reason, 这里都追加
|
||||
# 一条带 download_pdf_tried 的终态, 供 send_download_fulltext_task 据此排除。
|
||||
# 既防本下载链路(oa_url/elsevier/scihub)对同一篇无限重试, 也避免 openalex 链
|
||||
# 留下的 fail_reason 把本链路的首次尝试给遮蔽掉。
|
||||
paper.save_fail_reason(f"download_pdf_tried:{msg}")
|
||||
return msg, current_from
|
||||
finally:
|
||||
paper.fetch_end()
|
||||
|
|
|
|||
Loading…
Reference in New Issue