feat: get_pdf_from_openalex
This commit is contained in:
parent
76e8204680
commit
1ddca4d34d
|
|
@ -15,6 +15,7 @@ from .d_oaurl import download_from_url_playwright
|
||||||
import asyncio
|
import asyncio
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
|
from django.db.models import Q
|
||||||
|
|
||||||
# config.email = "caoqianming@foxmail.com"
|
# config.email = "caoqianming@foxmail.com"
|
||||||
config.email = "caoqianming@ctc.ac.cn"
|
config.email = "caoqianming@ctc.ac.cn"
|
||||||
|
|
@ -144,6 +145,36 @@ def get_random_headers():
|
||||||
def show_task_run(def_name: str):
|
def show_task_run(def_name: str):
|
||||||
return cache.get(def_name, True)
|
return cache.get(def_name, True)
|
||||||
|
|
||||||
|
def get_pdf_from_openalex(number_of_task: int =10):
|
||||||
|
def_name = get_pdf_from_openalex.name
|
||||||
|
if not show_task_run(def_name):
|
||||||
|
return "stoped"
|
||||||
|
count = 0
|
||||||
|
qs = Paper.objects.filter(is_oa=True, has_fulltext=False).exclude(
|
||||||
|
fetch_status="downloading")[:number_of_task]
|
||||||
|
if not qs.exists():
|
||||||
|
return "done"
|
||||||
|
for paper in qs:
|
||||||
|
if not show_task_run(def_name):
|
||||||
|
break
|
||||||
|
paper.fetch("downloading")
|
||||||
|
save_pdf_from_openalex(paper)
|
||||||
|
paper.fetch_end()
|
||||||
|
if paper.has_fulltext_pdf:
|
||||||
|
count += 1
|
||||||
|
countdown = 2
|
||||||
|
if cache.get("openalex_api_exceed"):
|
||||||
|
countdown = 5 * 60 # 5分钟后重试
|
||||||
|
if show_task_run(def_name):
|
||||||
|
current_app.send_task(
|
||||||
|
"apps.resm.tasks.get_pdf_from_openalex",
|
||||||
|
kwargs={
|
||||||
|
"number_of_task": number_of_task,
|
||||||
|
},
|
||||||
|
countdown=countdown,
|
||||||
|
)
|
||||||
|
return count
|
||||||
|
|
||||||
@shared_task(base=CustomTask)
|
@shared_task(base=CustomTask)
|
||||||
def get_abstract_from_elsevier(number_of_task:int = 20, exclude_failed:bool=True):
|
def get_abstract_from_elsevier(number_of_task:int = 20, exclude_failed:bool=True):
|
||||||
def_name = get_abstract_from_elsevier.name
|
def_name = get_abstract_from_elsevier.name
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue