feat: 添加o_keywords 字段
This commit is contained in:
parent
5b6e4ee591
commit
f57f624b65
|
|
@ -27,7 +27,7 @@ class Migration(migrations.Migration):
|
|||
),
|
||||
migrations.AddField(
|
||||
model_name='paper',
|
||||
name='search_word_first',
|
||||
name='o_search',
|
||||
field=models.TextField(default='cement'),
|
||||
),
|
||||
]
|
||||
|
|
|
|||
|
|
@ -0,0 +1,18 @@
|
|||
# Generated by Django 4.2.27 on 2026-01-28 02:25
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('resm', '0002_paper_has_abstract_xml_paper_has_fulltext_pdf_and_more'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='paper',
|
||||
name='o_keywords',
|
||||
field=models.TextField(blank=True, null=True),
|
||||
),
|
||||
]
|
||||
|
|
@ -43,7 +43,8 @@ class Paper(BaseModel):
|
|||
default="openalex",
|
||||
verbose_name="元数据来源"
|
||||
)
|
||||
search_word_first = models.TextField(default="cement")
|
||||
o_search = models.TextField(default="cement")
|
||||
o_keywords = models.TextField(null=True, blank=True)
|
||||
|
||||
def init_save_dir(self):
|
||||
publication_date = self.publication_date
|
||||
|
|
|
|||
|
|
@ -3,14 +3,11 @@ from __future__ import absolute_import, unicode_literals
|
|||
from apps.utils.tasks import CustomTask
|
||||
from celery import shared_task
|
||||
from pyalex import Works, config
|
||||
from itertools import chain
|
||||
from apps.resm.models import Paper, PaperAbstract
|
||||
from apps.utils.snowflake import idWorker
|
||||
from django.core.cache import cache
|
||||
import requests
|
||||
from lxml import etree
|
||||
from django.conf import settings
|
||||
import os
|
||||
from celery import current_app
|
||||
from datetime import datetime
|
||||
|
||||
|
|
@ -21,17 +18,35 @@ config.retry_http_codes = [429, 500, 503]
|
|||
config.api_key = "4KJZdkCFA0uFb6IsYKc8cd"
|
||||
|
||||
@shared_task(base=CustomTask)
|
||||
def get_paper_meta_from_openalex(publication_year:int, search_key:str, end_year:int=None):
|
||||
cache_key = f"openalex_cursor_{publication_year}_{search_key}"
|
||||
def get_paper_meta_from_openalex(publication_year:int, keywords:str="", search:str="", end_year:int=None):
|
||||
cache_key = f"openalex_cursor_{publication_year}_{keywords}{search}"
|
||||
cache_cursor = cache.get(cache_key, "*")
|
||||
if keywords or search:
|
||||
pass
|
||||
else:
|
||||
raise Exception("keywords or search must be provided")
|
||||
# filter=keywords.id:clinker|cement
|
||||
pager = Works().filter(
|
||||
publication_year=publication_year,
|
||||
has_doi=True,
|
||||
type="article" # 将 type 移到 filter 中
|
||||
).search(search_key).select([
|
||||
type="article"
|
||||
)
|
||||
if keywords:
|
||||
if "|" in keywords:
|
||||
keywords_list = keywords.split("|")
|
||||
else:
|
||||
keywords_list = [keywords]
|
||||
pager = pager.filter(
|
||||
keywords={"id": keywords_list}
|
||||
)
|
||||
if search:
|
||||
pager = pager.filter(
|
||||
search=search
|
||||
)
|
||||
pager = pager.select([
|
||||
"id", "doi", "title", "publication_date",
|
||||
"open_access", "authorships", "primary_location", "publication_year",
|
||||
"display_name"
|
||||
"display_name", "content_urls"
|
||||
]).paginate(per_page=200, n_max=None, cursor=cache_cursor)
|
||||
next_cursor = pager._next_value
|
||||
for page in pager:
|
||||
|
|
@ -40,7 +55,8 @@ def get_paper_meta_from_openalex(publication_year:int, search_key:str, end_year:
|
|||
if record["doi"] and (record["display_name"] or record["title"]):
|
||||
paper = Paper()
|
||||
paper.id = idWorker.get_id()
|
||||
paper.search_word_first = search_key
|
||||
paper.o_keywords = keywords
|
||||
paper.o_search = search
|
||||
paper.source = "openalex"
|
||||
paper.type = "article"
|
||||
paper.openalex_id = record["id"].split("/")[-1]
|
||||
|
|
@ -67,7 +83,8 @@ def get_paper_meta_from_openalex(publication_year:int, search_key:str, end_year:
|
|||
"apps.resm.tasks.get_paper_meta_from_openalex",
|
||||
kwargs={
|
||||
"publication_year": publication_year + 1,
|
||||
"search_key": search_key,
|
||||
"keywords": keywords,
|
||||
"search": search,
|
||||
"end_year": end_year
|
||||
},
|
||||
countdown=5
|
||||
|
|
|
|||
Loading…
Reference in New Issue