feat: 添加resm app
This commit is contained in:
parent
f9584f6a00
commit
9af30eccaf
|
|
@ -0,0 +1,3 @@
|
|||
from django.contrib import admin
|
||||
|
||||
# Register your models here.
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
from django.apps import AppConfig
|
||||
|
||||
|
||||
class ResmConfig(AppConfig):
|
||||
default_auto_field = 'django.db.models.BigAutoField'
|
||||
name = 'apps.resm'
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
# Generated by Django 4.2.27 on 2026-01-23 01:53
|
||||
|
||||
from django.db import migrations, models
|
||||
import django.db.models.deletion
|
||||
import django.utils.timezone
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
initial = True
|
||||
|
||||
dependencies = [
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='Paper',
|
||||
fields=[
|
||||
('id', models.CharField(editable=False, help_text='主键ID', max_length=20, primary_key=True, serialize=False, verbose_name='主键ID')),
|
||||
('create_time', models.DateTimeField(default=django.utils.timezone.now, help_text='创建时间', verbose_name='创建时间')),
|
||||
('update_time', models.DateTimeField(auto_now=True, help_text='修改时间', verbose_name='修改时间')),
|
||||
('is_deleted', models.BooleanField(default=False, help_text='删除标记', verbose_name='删除标记')),
|
||||
('openalex_id', models.TextField(blank=True, null=True, unique=True, verbose_name='OpenAlex ID')),
|
||||
('doi', models.TextField(unique=True, verbose_name='DOI')),
|
||||
('type', models.CharField(db_index=True, max_length=20)),
|
||||
('title', models.TextField()),
|
||||
('publication_date', models.DateField(blank=True, null=True)),
|
||||
('publication_year', models.IntegerField(db_index=True)),
|
||||
('first_author', models.TextField(blank=True, null=True)),
|
||||
('first_author_institution', models.TextField(blank=True, null=True)),
|
||||
('publication_name', models.TextField(blank=True, null=True)),
|
||||
('is_oa', models.BooleanField(db_index=True, default=False)),
|
||||
('oa_url', models.TextField(blank=True, null=True)),
|
||||
('has_abstract', models.BooleanField(db_index=True, default=False)),
|
||||
('has_fulltext', models.BooleanField(db_index=True, default=False)),
|
||||
('fetch_status', models.CharField(db_index=True, default='meta_only', max_length=20)),
|
||||
('fail_reason', models.CharField(blank=True, max_length=50, null=True)),
|
||||
('source', models.CharField(default='openalex', max_length=20, verbose_name='元数据来源')),
|
||||
],
|
||||
options={
|
||||
'abstract': False,
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='PaperAbstract',
|
||||
fields=[
|
||||
('id', models.CharField(editable=False, help_text='主键ID', max_length=20, primary_key=True, serialize=False, verbose_name='主键ID')),
|
||||
('create_time', models.DateTimeField(default=django.utils.timezone.now, help_text='创建时间', verbose_name='创建时间')),
|
||||
('update_time', models.DateTimeField(auto_now=True, help_text='修改时间', verbose_name='修改时间')),
|
||||
('is_deleted', models.BooleanField(default=False, help_text='删除标记', verbose_name='删除标记')),
|
||||
('abstract', models.TextField()),
|
||||
('source', models.CharField(max_length=20, verbose_name='摘要来源')),
|
||||
('paper', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, related_name='abstract', to='resm.paper')),
|
||||
],
|
||||
options={
|
||||
'abstract': False,
|
||||
},
|
||||
),
|
||||
]
|
||||
|
|
@ -0,0 +1,53 @@
|
|||
from django.db import models
|
||||
from apps.utils.models import BaseModel
|
||||
# Create your models here.
|
||||
|
||||
class Paper(BaseModel):
|
||||
# ===== 全局唯一标识 =====
|
||||
openalex_id = models.TextField(unique=True, verbose_name="OpenAlex ID", null=True, blank=True)
|
||||
doi = models.TextField(unique=True, verbose_name="DOI")
|
||||
# ===== 基本信息 =====
|
||||
type = models.CharField(max_length=20, db_index=True)
|
||||
title = models.TextField()
|
||||
publication_date = models.DateField(null=True, blank=True)
|
||||
publication_year = models.IntegerField(db_index=True)
|
||||
# ===== 作者(最小可用集)=====
|
||||
first_author = models.TextField(null=True, blank=True)
|
||||
first_author_institution = models.TextField(null=True, blank=True)
|
||||
# ===== 期刊 =====
|
||||
publication_name = models.TextField(null=True, blank=True)
|
||||
# ===== OA 元信息 =====
|
||||
is_oa = models.BooleanField(default=False, db_index=True)
|
||||
oa_url = models.TextField(null=True, blank=True)
|
||||
# ===== 状态位(调度核心)=====
|
||||
has_abstract = models.BooleanField(default=False, db_index=True)
|
||||
has_fulltext = models.BooleanField(default=False, db_index=True)
|
||||
fetch_status = models.CharField(
|
||||
max_length=20,
|
||||
default="meta_only", # meta_only / abstract_ready / fulltext_ready / parsed / failed
|
||||
db_index=True
|
||||
)
|
||||
fail_reason = models.CharField(
|
||||
max_length=50,
|
||||
null=True,
|
||||
blank=True
|
||||
)
|
||||
|
||||
source = models.CharField(
|
||||
max_length=20,
|
||||
default="openalex",
|
||||
verbose_name="元数据来源"
|
||||
)
|
||||
|
||||
class PaperAbstract(BaseModel):
|
||||
paper = models.OneToOneField(
|
||||
Paper,
|
||||
on_delete=models.CASCADE,
|
||||
related_name="abstract"
|
||||
)
|
||||
|
||||
abstract = models.TextField()
|
||||
source = models.CharField(
|
||||
max_length=20,
|
||||
verbose_name="摘要来源" # openalex / elsevier / crossref
|
||||
)
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
# Create your tasks here
|
||||
from __future__ import absolute_import, unicode_literals
|
||||
from apps.utils.tasks import CustomTask
|
||||
from celery import shared_task
|
||||
from pyalex import Works, config
|
||||
from itertools import chain
|
||||
from apps.resm.models import Paper
|
||||
from apps.utils.snowflake import idWorker
|
||||
|
||||
config.email = "caoqianming@foxmail.com"
|
||||
config.max_retries = 0
|
||||
config.retry_backoff_factor = 0.1
|
||||
config.retry_http_codes = [429, 500, 503]
|
||||
config.api_key = "4KJZdkCFA0uFb6IsYKc8cd"
|
||||
|
||||
@shared_task(base=CustomTask)
|
||||
def get_paper_meta_from_openalex(publication_year:int, search_key:str):
|
||||
query = Works().filter(
|
||||
publication_year=publication_year,
|
||||
type="article" # 将 type 移到 filter 中
|
||||
).search(search_key).select([
|
||||
"id", "doi", "title", "publication_date",
|
||||
"open_access", "authorships", "primary_location", "publication_year"
|
||||
])
|
||||
papers = []
|
||||
for record in chain(*query.paginate(per_page=200)):
|
||||
if record["doi"]:
|
||||
paper = Paper()
|
||||
paper.id = idWorker.get_id()
|
||||
paper.type = "article"
|
||||
paper.openalex_id = record["id"].split("/")[-1]
|
||||
paper.doi = record["doi"].replace("https://doi.org/", "")
|
||||
paper.title = record["title"]
|
||||
paper.publication_date = record["publication_date"]
|
||||
paper.publication_year = record["publication_year"]
|
||||
if record["open_access"]:
|
||||
paper.is_oa = record["open_access"]["is_oa"]
|
||||
paper.oa_url = record["open_access"]["oa_url"]
|
||||
if record["authorships"]:
|
||||
paper.first_author = record["authorships"][0]["author"]["display_name"]
|
||||
if record["authorships"][0]["institutions"]:
|
||||
paper.first_author_institution = record["authorships"][0]["institutions"][0]["display_name"]
|
||||
if record["primary_location"] and record["primary_location"]["source"]:
|
||||
paper.publication_name = record["primary_location"]["source"]["display_name"]
|
||||
papers.append(paper)
|
||||
Paper.objects.bulk_create(papers, ignore_conflicts=True)
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
from django.test import TestCase
|
||||
|
||||
# Create your tests here.
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
from django.shortcuts import render
|
||||
|
||||
# Create your views here.
|
||||
|
|
@ -58,7 +58,8 @@ INSTALLED_APPS = [
|
|||
'apps.system',
|
||||
'apps.auth1',
|
||||
'apps.wf',
|
||||
'apps.ops'
|
||||
'apps.ops',
|
||||
'apps.resm'
|
||||
]
|
||||
|
||||
MIDDLEWARE = [
|
||||
|
|
|
|||
Loading…
Reference in New Issue