feat: 添加o_keywords 字段
This commit is contained in:
parent
5b6e4ee591
commit
f57f624b65
|
|
@ -27,7 +27,7 @@ class Migration(migrations.Migration):
|
||||||
),
|
),
|
||||||
migrations.AddField(
|
migrations.AddField(
|
||||||
model_name='paper',
|
model_name='paper',
|
||||||
name='search_word_first',
|
name='o_search',
|
||||||
field=models.TextField(default='cement'),
|
field=models.TextField(default='cement'),
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,18 @@
|
||||||
|
# Generated by Django 4.2.27 on 2026-01-28 02:25
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('resm', '0002_paper_has_abstract_xml_paper_has_fulltext_pdf_and_more'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='paper',
|
||||||
|
name='o_keywords',
|
||||||
|
field=models.TextField(blank=True, null=True),
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
@ -43,7 +43,8 @@ class Paper(BaseModel):
|
||||||
default="openalex",
|
default="openalex",
|
||||||
verbose_name="元数据来源"
|
verbose_name="元数据来源"
|
||||||
)
|
)
|
||||||
search_word_first = models.TextField(default="cement")
|
o_search = models.TextField(default="cement")
|
||||||
|
o_keywords = models.TextField(null=True, blank=True)
|
||||||
|
|
||||||
def init_save_dir(self):
|
def init_save_dir(self):
|
||||||
publication_date = self.publication_date
|
publication_date = self.publication_date
|
||||||
|
|
|
||||||
|
|
@ -3,14 +3,11 @@ from __future__ import absolute_import, unicode_literals
|
||||||
from apps.utils.tasks import CustomTask
|
from apps.utils.tasks import CustomTask
|
||||||
from celery import shared_task
|
from celery import shared_task
|
||||||
from pyalex import Works, config
|
from pyalex import Works, config
|
||||||
from itertools import chain
|
|
||||||
from apps.resm.models import Paper, PaperAbstract
|
from apps.resm.models import Paper, PaperAbstract
|
||||||
from apps.utils.snowflake import idWorker
|
from apps.utils.snowflake import idWorker
|
||||||
from django.core.cache import cache
|
from django.core.cache import cache
|
||||||
import requests
|
import requests
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
from django.conf import settings
|
|
||||||
import os
|
|
||||||
from celery import current_app
|
from celery import current_app
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
@ -21,17 +18,35 @@ config.retry_http_codes = [429, 500, 503]
|
||||||
config.api_key = "4KJZdkCFA0uFb6IsYKc8cd"
|
config.api_key = "4KJZdkCFA0uFb6IsYKc8cd"
|
||||||
|
|
||||||
@shared_task(base=CustomTask)
|
@shared_task(base=CustomTask)
|
||||||
def get_paper_meta_from_openalex(publication_year:int, search_key:str, end_year:int=None):
|
def get_paper_meta_from_openalex(publication_year:int, keywords:str="", search:str="", end_year:int=None):
|
||||||
cache_key = f"openalex_cursor_{publication_year}_{search_key}"
|
cache_key = f"openalex_cursor_{publication_year}_{keywords}{search}"
|
||||||
cache_cursor = cache.get(cache_key, "*")
|
cache_cursor = cache.get(cache_key, "*")
|
||||||
|
if keywords or search:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
raise Exception("keywords or search must be provided")
|
||||||
|
# filter=keywords.id:clinker|cement
|
||||||
pager = Works().filter(
|
pager = Works().filter(
|
||||||
publication_year=publication_year,
|
publication_year=publication_year,
|
||||||
has_doi=True,
|
has_doi=True,
|
||||||
type="article" # 将 type 移到 filter 中
|
type="article"
|
||||||
).search(search_key).select([
|
)
|
||||||
|
if keywords:
|
||||||
|
if "|" in keywords:
|
||||||
|
keywords_list = keywords.split("|")
|
||||||
|
else:
|
||||||
|
keywords_list = [keywords]
|
||||||
|
pager = pager.filter(
|
||||||
|
keywords={"id": keywords_list}
|
||||||
|
)
|
||||||
|
if search:
|
||||||
|
pager = pager.filter(
|
||||||
|
search=search
|
||||||
|
)
|
||||||
|
pager = pager.select([
|
||||||
"id", "doi", "title", "publication_date",
|
"id", "doi", "title", "publication_date",
|
||||||
"open_access", "authorships", "primary_location", "publication_year",
|
"open_access", "authorships", "primary_location", "publication_year",
|
||||||
"display_name"
|
"display_name", "content_urls"
|
||||||
]).paginate(per_page=200, n_max=None, cursor=cache_cursor)
|
]).paginate(per_page=200, n_max=None, cursor=cache_cursor)
|
||||||
next_cursor = pager._next_value
|
next_cursor = pager._next_value
|
||||||
for page in pager:
|
for page in pager:
|
||||||
|
|
@ -40,7 +55,8 @@ def get_paper_meta_from_openalex(publication_year:int, search_key:str, end_year:
|
||||||
if record["doi"] and (record["display_name"] or record["title"]):
|
if record["doi"] and (record["display_name"] or record["title"]):
|
||||||
paper = Paper()
|
paper = Paper()
|
||||||
paper.id = idWorker.get_id()
|
paper.id = idWorker.get_id()
|
||||||
paper.search_word_first = search_key
|
paper.o_keywords = keywords
|
||||||
|
paper.o_search = search
|
||||||
paper.source = "openalex"
|
paper.source = "openalex"
|
||||||
paper.type = "article"
|
paper.type = "article"
|
||||||
paper.openalex_id = record["id"].split("/")[-1]
|
paper.openalex_id = record["id"].split("/")[-1]
|
||||||
|
|
@ -67,7 +83,8 @@ def get_paper_meta_from_openalex(publication_year:int, search_key:str, end_year:
|
||||||
"apps.resm.tasks.get_paper_meta_from_openalex",
|
"apps.resm.tasks.get_paper_meta_from_openalex",
|
||||||
kwargs={
|
kwargs={
|
||||||
"publication_year": publication_year + 1,
|
"publication_year": publication_year + 1,
|
||||||
"search_key": search_key,
|
"keywords": keywords,
|
||||||
|
"search": search,
|
||||||
"end_year": end_year
|
"end_year": end_year
|
||||||
},
|
},
|
||||||
countdown=5
|
countdown=5
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue