feat: get_pdf_from_openalex
This commit is contained in:
parent
76e8204680
commit
1ddca4d34d
|
|
@ -15,6 +15,7 @@ from .d_oaurl import download_from_url_playwright
|
|||
import asyncio
|
||||
import sys
|
||||
import os
|
||||
from django.db.models import Q
|
||||
|
||||
# config.email = "caoqianming@foxmail.com"
|
||||
config.email = "caoqianming@ctc.ac.cn"
|
||||
|
|
@ -144,6 +145,36 @@ def get_random_headers():
|
|||
def show_task_run(def_name: str):
|
||||
return cache.get(def_name, True)
|
||||
|
||||
def get_pdf_from_openalex(number_of_task: int =10):
|
||||
def_name = get_pdf_from_openalex.name
|
||||
if not show_task_run(def_name):
|
||||
return "stoped"
|
||||
count = 0
|
||||
qs = Paper.objects.filter(is_oa=True, has_fulltext=False).exclude(
|
||||
fetch_status="downloading")[:number_of_task]
|
||||
if not qs.exists():
|
||||
return "done"
|
||||
for paper in qs:
|
||||
if not show_task_run(def_name):
|
||||
break
|
||||
paper.fetch("downloading")
|
||||
save_pdf_from_openalex(paper)
|
||||
paper.fetch_end()
|
||||
if paper.has_fulltext_pdf:
|
||||
count += 1
|
||||
countdown = 2
|
||||
if cache.get("openalex_api_exceed"):
|
||||
countdown = 5 * 60 # 5分钟后重试
|
||||
if show_task_run(def_name):
|
||||
current_app.send_task(
|
||||
"apps.resm.tasks.get_pdf_from_openalex",
|
||||
kwargs={
|
||||
"number_of_task": number_of_task,
|
||||
},
|
||||
countdown=countdown,
|
||||
)
|
||||
return count
|
||||
|
||||
@shared_task(base=CustomTask)
|
||||
def get_abstract_from_elsevier(number_of_task:int = 20, exclude_failed:bool=True):
|
||||
def_name = get_abstract_from_elsevier.name
|
||||
|
|
|
|||
Loading…
Reference in New Issue