feat: 优化get_paper_meta_from_openalex

This commit is contained in:
caoqianming 2026-01-27 14:33:05 +08:00
parent a09369ab17
commit 38b74e64f1
1 changed files with 12 additions and 2 deletions

View File

@ -12,6 +12,7 @@ from lxml import etree
from django.conf import settings
import os
from celery import current_app
from datetime import datetime
config.email = "caoqianming@foxmail.com"
config.max_retries = 0
@ -20,7 +21,7 @@ config.retry_http_codes = [429, 500, 503]
config.api_key = "4KJZdkCFA0uFb6IsYKc8cd"
@shared_task(base=CustomTask)
def get_paper_meta_from_openalex(publication_year:int, search_key:str, end_year:int=2026):
def get_paper_meta_from_openalex(publication_year:int, search_key:str, end_year:int=None):
cache_key = f"openalex_cursor_{publication_year}_{search_key}"
cache_cursor = cache.get(cache_key, "*")
pager = Works().filter(
@ -59,8 +60,17 @@ def get_paper_meta_from_openalex(publication_year:int, search_key:str, end_year:
papers.append(paper)
Paper.objects.bulk_create(papers, ignore_conflicts=True)
cache.set(cache_key, next_cursor, timeout=None)
if end_year is None:
end_year = datetime.now().year
if publication_year + 1 <= end_year:
get_paper_meta_from_openalex.delay(publication_year + 1, search_key, end_year)
current_app.send_task(
"apps.resm.tasks.get_paper_meta_from_openalex",
kwargs={
"publication_year": publication_year + 1,
"search_key": search_key,
"end_year": end_year
}
)
ELSEVIER_APIKEY = 'aa8868cac9e27d6153ab0a0acd7b50bf'