feat: 添加resm app
This commit is contained in:
parent
f9584f6a00
commit
9af30eccaf
|
|
@ -0,0 +1,3 @@
|
||||||
|
from django.contrib import admin
|
||||||
|
|
||||||
|
# Register your models here.
|
||||||
|
|
@ -0,0 +1,6 @@
|
||||||
|
from django.apps import AppConfig
|
||||||
|
|
||||||
|
|
||||||
|
class ResmConfig(AppConfig):
|
||||||
|
default_auto_field = 'django.db.models.BigAutoField'
|
||||||
|
name = 'apps.resm'
|
||||||
|
|
@ -0,0 +1,59 @@
|
||||||
|
# Generated by Django 4.2.27 on 2026-01-23 01:53
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
import django.db.models.deletion
|
||||||
|
import django.utils.timezone
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
initial = True
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='Paper',
|
||||||
|
fields=[
|
||||||
|
('id', models.CharField(editable=False, help_text='主键ID', max_length=20, primary_key=True, serialize=False, verbose_name='主键ID')),
|
||||||
|
('create_time', models.DateTimeField(default=django.utils.timezone.now, help_text='创建时间', verbose_name='创建时间')),
|
||||||
|
('update_time', models.DateTimeField(auto_now=True, help_text='修改时间', verbose_name='修改时间')),
|
||||||
|
('is_deleted', models.BooleanField(default=False, help_text='删除标记', verbose_name='删除标记')),
|
||||||
|
('openalex_id', models.TextField(blank=True, null=True, unique=True, verbose_name='OpenAlex ID')),
|
||||||
|
('doi', models.TextField(unique=True, verbose_name='DOI')),
|
||||||
|
('type', models.CharField(db_index=True, max_length=20)),
|
||||||
|
('title', models.TextField()),
|
||||||
|
('publication_date', models.DateField(blank=True, null=True)),
|
||||||
|
('publication_year', models.IntegerField(db_index=True)),
|
||||||
|
('first_author', models.TextField(blank=True, null=True)),
|
||||||
|
('first_author_institution', models.TextField(blank=True, null=True)),
|
||||||
|
('publication_name', models.TextField(blank=True, null=True)),
|
||||||
|
('is_oa', models.BooleanField(db_index=True, default=False)),
|
||||||
|
('oa_url', models.TextField(blank=True, null=True)),
|
||||||
|
('has_abstract', models.BooleanField(db_index=True, default=False)),
|
||||||
|
('has_fulltext', models.BooleanField(db_index=True, default=False)),
|
||||||
|
('fetch_status', models.CharField(db_index=True, default='meta_only', max_length=20)),
|
||||||
|
('fail_reason', models.CharField(blank=True, max_length=50, null=True)),
|
||||||
|
('source', models.CharField(default='openalex', max_length=20, verbose_name='元数据来源')),
|
||||||
|
],
|
||||||
|
options={
|
||||||
|
'abstract': False,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='PaperAbstract',
|
||||||
|
fields=[
|
||||||
|
('id', models.CharField(editable=False, help_text='主键ID', max_length=20, primary_key=True, serialize=False, verbose_name='主键ID')),
|
||||||
|
('create_time', models.DateTimeField(default=django.utils.timezone.now, help_text='创建时间', verbose_name='创建时间')),
|
||||||
|
('update_time', models.DateTimeField(auto_now=True, help_text='修改时间', verbose_name='修改时间')),
|
||||||
|
('is_deleted', models.BooleanField(default=False, help_text='删除标记', verbose_name='删除标记')),
|
||||||
|
('abstract', models.TextField()),
|
||||||
|
('source', models.CharField(max_length=20, verbose_name='摘要来源')),
|
||||||
|
('paper', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, related_name='abstract', to='resm.paper')),
|
||||||
|
],
|
||||||
|
options={
|
||||||
|
'abstract': False,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
@ -0,0 +1,53 @@
|
||||||
|
from django.db import models
|
||||||
|
from apps.utils.models import BaseModel
|
||||||
|
# Create your models here.
|
||||||
|
|
||||||
|
class Paper(BaseModel):
|
||||||
|
# ===== 全局唯一标识 =====
|
||||||
|
openalex_id = models.TextField(unique=True, verbose_name="OpenAlex ID", null=True, blank=True)
|
||||||
|
doi = models.TextField(unique=True, verbose_name="DOI")
|
||||||
|
# ===== 基本信息 =====
|
||||||
|
type = models.CharField(max_length=20, db_index=True)
|
||||||
|
title = models.TextField()
|
||||||
|
publication_date = models.DateField(null=True, blank=True)
|
||||||
|
publication_year = models.IntegerField(db_index=True)
|
||||||
|
# ===== 作者(最小可用集)=====
|
||||||
|
first_author = models.TextField(null=True, blank=True)
|
||||||
|
first_author_institution = models.TextField(null=True, blank=True)
|
||||||
|
# ===== 期刊 =====
|
||||||
|
publication_name = models.TextField(null=True, blank=True)
|
||||||
|
# ===== OA 元信息 =====
|
||||||
|
is_oa = models.BooleanField(default=False, db_index=True)
|
||||||
|
oa_url = models.TextField(null=True, blank=True)
|
||||||
|
# ===== 状态位(调度核心)=====
|
||||||
|
has_abstract = models.BooleanField(default=False, db_index=True)
|
||||||
|
has_fulltext = models.BooleanField(default=False, db_index=True)
|
||||||
|
fetch_status = models.CharField(
|
||||||
|
max_length=20,
|
||||||
|
default="meta_only", # meta_only / abstract_ready / fulltext_ready / parsed / failed
|
||||||
|
db_index=True
|
||||||
|
)
|
||||||
|
fail_reason = models.CharField(
|
||||||
|
max_length=50,
|
||||||
|
null=True,
|
||||||
|
blank=True
|
||||||
|
)
|
||||||
|
|
||||||
|
source = models.CharField(
|
||||||
|
max_length=20,
|
||||||
|
default="openalex",
|
||||||
|
verbose_name="元数据来源"
|
||||||
|
)
|
||||||
|
|
||||||
|
class PaperAbstract(BaseModel):
|
||||||
|
paper = models.OneToOneField(
|
||||||
|
Paper,
|
||||||
|
on_delete=models.CASCADE,
|
||||||
|
related_name="abstract"
|
||||||
|
)
|
||||||
|
|
||||||
|
abstract = models.TextField()
|
||||||
|
source = models.CharField(
|
||||||
|
max_length=20,
|
||||||
|
verbose_name="摘要来源" # openalex / elsevier / crossref
|
||||||
|
)
|
||||||
|
|
@ -0,0 +1,46 @@
|
||||||
|
# Create your tasks here
|
||||||
|
from __future__ import absolute_import, unicode_literals
|
||||||
|
from apps.utils.tasks import CustomTask
|
||||||
|
from celery import shared_task
|
||||||
|
from pyalex import Works, config
|
||||||
|
from itertools import chain
|
||||||
|
from apps.resm.models import Paper
|
||||||
|
from apps.utils.snowflake import idWorker
|
||||||
|
|
||||||
|
config.email = "caoqianming@foxmail.com"
|
||||||
|
config.max_retries = 0
|
||||||
|
config.retry_backoff_factor = 0.1
|
||||||
|
config.retry_http_codes = [429, 500, 503]
|
||||||
|
config.api_key = "4KJZdkCFA0uFb6IsYKc8cd"
|
||||||
|
|
||||||
|
@shared_task(base=CustomTask)
|
||||||
|
def get_paper_meta_from_openalex(publication_year:int, search_key:str):
|
||||||
|
query = Works().filter(
|
||||||
|
publication_year=publication_year,
|
||||||
|
type="article" # 将 type 移到 filter 中
|
||||||
|
).search(search_key).select([
|
||||||
|
"id", "doi", "title", "publication_date",
|
||||||
|
"open_access", "authorships", "primary_location", "publication_year"
|
||||||
|
])
|
||||||
|
papers = []
|
||||||
|
for record in chain(*query.paginate(per_page=200)):
|
||||||
|
if record["doi"]:
|
||||||
|
paper = Paper()
|
||||||
|
paper.id = idWorker.get_id()
|
||||||
|
paper.type = "article"
|
||||||
|
paper.openalex_id = record["id"].split("/")[-1]
|
||||||
|
paper.doi = record["doi"].replace("https://doi.org/", "")
|
||||||
|
paper.title = record["title"]
|
||||||
|
paper.publication_date = record["publication_date"]
|
||||||
|
paper.publication_year = record["publication_year"]
|
||||||
|
if record["open_access"]:
|
||||||
|
paper.is_oa = record["open_access"]["is_oa"]
|
||||||
|
paper.oa_url = record["open_access"]["oa_url"]
|
||||||
|
if record["authorships"]:
|
||||||
|
paper.first_author = record["authorships"][0]["author"]["display_name"]
|
||||||
|
if record["authorships"][0]["institutions"]:
|
||||||
|
paper.first_author_institution = record["authorships"][0]["institutions"][0]["display_name"]
|
||||||
|
if record["primary_location"] and record["primary_location"]["source"]:
|
||||||
|
paper.publication_name = record["primary_location"]["source"]["display_name"]
|
||||||
|
papers.append(paper)
|
||||||
|
Paper.objects.bulk_create(papers, ignore_conflicts=True)
|
||||||
|
|
@ -0,0 +1,3 @@
|
||||||
|
from django.test import TestCase
|
||||||
|
|
||||||
|
# Create your tests here.
|
||||||
|
|
@ -0,0 +1,3 @@
|
||||||
|
from django.shortcuts import render
|
||||||
|
|
||||||
|
# Create your views here.
|
||||||
|
|
@ -58,7 +58,8 @@ INSTALLED_APPS = [
|
||||||
'apps.system',
|
'apps.system',
|
||||||
'apps.auth1',
|
'apps.auth1',
|
||||||
'apps.wf',
|
'apps.wf',
|
||||||
'apps.ops'
|
'apps.ops',
|
||||||
|
'apps.resm'
|
||||||
]
|
]
|
||||||
|
|
||||||
MIDDLEWARE = [
|
MIDDLEWARE = [
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue