feat: 添加resm app

This commit is contained in:
caoqianming 2026-01-23 10:37:41 +08:00
parent f9584f6a00
commit 9af30eccaf
11 changed files with 175 additions and 1 deletions

0
apps/resm/__init__.py Normal file
View File

3
apps/resm/admin.py Normal file
View File

@ -0,0 +1,3 @@
from django.contrib import admin
# Register your models here.

6
apps/resm/apps.py Normal file
View File

@ -0,0 +1,6 @@
from django.apps import AppConfig
class ResmConfig(AppConfig):
default_auto_field = 'django.db.models.BigAutoField'
name = 'apps.resm'

View File

@ -0,0 +1,59 @@
# Generated by Django 4.2.27 on 2026-01-23 01:53
from django.db import migrations, models
import django.db.models.deletion
import django.utils.timezone
class Migration(migrations.Migration):
initial = True
dependencies = [
]
operations = [
migrations.CreateModel(
name='Paper',
fields=[
('id', models.CharField(editable=False, help_text='主键ID', max_length=20, primary_key=True, serialize=False, verbose_name='主键ID')),
('create_time', models.DateTimeField(default=django.utils.timezone.now, help_text='创建时间', verbose_name='创建时间')),
('update_time', models.DateTimeField(auto_now=True, help_text='修改时间', verbose_name='修改时间')),
('is_deleted', models.BooleanField(default=False, help_text='删除标记', verbose_name='删除标记')),
('openalex_id', models.TextField(blank=True, null=True, unique=True, verbose_name='OpenAlex ID')),
('doi', models.TextField(unique=True, verbose_name='DOI')),
('type', models.CharField(db_index=True, max_length=20)),
('title', models.TextField()),
('publication_date', models.DateField(blank=True, null=True)),
('publication_year', models.IntegerField(db_index=True)),
('first_author', models.TextField(blank=True, null=True)),
('first_author_institution', models.TextField(blank=True, null=True)),
('publication_name', models.TextField(blank=True, null=True)),
('is_oa', models.BooleanField(db_index=True, default=False)),
('oa_url', models.TextField(blank=True, null=True)),
('has_abstract', models.BooleanField(db_index=True, default=False)),
('has_fulltext', models.BooleanField(db_index=True, default=False)),
('fetch_status', models.CharField(db_index=True, default='meta_only', max_length=20)),
('fail_reason', models.CharField(blank=True, max_length=50, null=True)),
('source', models.CharField(default='openalex', max_length=20, verbose_name='元数据来源')),
],
options={
'abstract': False,
},
),
migrations.CreateModel(
name='PaperAbstract',
fields=[
('id', models.CharField(editable=False, help_text='主键ID', max_length=20, primary_key=True, serialize=False, verbose_name='主键ID')),
('create_time', models.DateTimeField(default=django.utils.timezone.now, help_text='创建时间', verbose_name='创建时间')),
('update_time', models.DateTimeField(auto_now=True, help_text='修改时间', verbose_name='修改时间')),
('is_deleted', models.BooleanField(default=False, help_text='删除标记', verbose_name='删除标记')),
('abstract', models.TextField()),
('source', models.CharField(max_length=20, verbose_name='摘要来源')),
('paper', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, related_name='abstract', to='resm.paper')),
],
options={
'abstract': False,
},
),
]

View File

53
apps/resm/models.py Normal file
View File

@ -0,0 +1,53 @@
from django.db import models
from apps.utils.models import BaseModel
# Create your models here.
class Paper(BaseModel):
# ===== 全局唯一标识 =====
openalex_id = models.TextField(unique=True, verbose_name="OpenAlex ID", null=True, blank=True)
doi = models.TextField(unique=True, verbose_name="DOI")
# ===== 基本信息 =====
type = models.CharField(max_length=20, db_index=True)
title = models.TextField()
publication_date = models.DateField(null=True, blank=True)
publication_year = models.IntegerField(db_index=True)
# ===== 作者(最小可用集)=====
first_author = models.TextField(null=True, blank=True)
first_author_institution = models.TextField(null=True, blank=True)
# ===== 期刊 =====
publication_name = models.TextField(null=True, blank=True)
# ===== OA 元信息 =====
is_oa = models.BooleanField(default=False, db_index=True)
oa_url = models.TextField(null=True, blank=True)
# ===== 状态位(调度核心)=====
has_abstract = models.BooleanField(default=False, db_index=True)
has_fulltext = models.BooleanField(default=False, db_index=True)
fetch_status = models.CharField(
max_length=20,
default="meta_only", # meta_only / abstract_ready / fulltext_ready / parsed / failed
db_index=True
)
fail_reason = models.CharField(
max_length=50,
null=True,
blank=True
)
source = models.CharField(
max_length=20,
default="openalex",
verbose_name="元数据来源"
)
class PaperAbstract(BaseModel):
paper = models.OneToOneField(
Paper,
on_delete=models.CASCADE,
related_name="abstract"
)
abstract = models.TextField()
source = models.CharField(
max_length=20,
verbose_name="摘要来源" # openalex / elsevier / crossref
)

46
apps/resm/tasks.py Normal file
View File

@ -0,0 +1,46 @@
# Create your tasks here
from __future__ import absolute_import, unicode_literals
from apps.utils.tasks import CustomTask
from celery import shared_task
from pyalex import Works, config
from itertools import chain
from apps.resm.models import Paper
from apps.utils.snowflake import idWorker
config.email = "caoqianming@foxmail.com"
config.max_retries = 0
config.retry_backoff_factor = 0.1
config.retry_http_codes = [429, 500, 503]
config.api_key = "4KJZdkCFA0uFb6IsYKc8cd"
@shared_task(base=CustomTask)
def get_paper_meta_from_openalex(publication_year:int, search_key:str):
query = Works().filter(
publication_year=publication_year,
type="article" # 将 type 移到 filter 中
).search(search_key).select([
"id", "doi", "title", "publication_date",
"open_access", "authorships", "primary_location", "publication_year"
])
papers = []
for record in chain(*query.paginate(per_page=200)):
if record["doi"]:
paper = Paper()
paper.id = idWorker.get_id()
paper.type = "article"
paper.openalex_id = record["id"].split("/")[-1]
paper.doi = record["doi"].replace("https://doi.org/", "")
paper.title = record["title"]
paper.publication_date = record["publication_date"]
paper.publication_year = record["publication_year"]
if record["open_access"]:
paper.is_oa = record["open_access"]["is_oa"]
paper.oa_url = record["open_access"]["oa_url"]
if record["authorships"]:
paper.first_author = record["authorships"][0]["author"]["display_name"]
if record["authorships"][0]["institutions"]:
paper.first_author_institution = record["authorships"][0]["institutions"][0]["display_name"]
if record["primary_location"] and record["primary_location"]["source"]:
paper.publication_name = record["primary_location"]["source"]["display_name"]
papers.append(paper)
Paper.objects.bulk_create(papers, ignore_conflicts=True)

3
apps/resm/tests.py Normal file
View File

@ -0,0 +1,3 @@
from django.test import TestCase
# Create your tests here.

0
apps/resm/urls.py Normal file
View File

3
apps/resm/views.py Normal file
View File

@ -0,0 +1,3 @@
from django.shortcuts import render
# Create your views here.

View File

@ -58,7 +58,8 @@ INSTALLED_APPS = [
'apps.system', 'apps.system',
'apps.auth1', 'apps.auth1',
'apps.wf', 'apps.wf',
'apps.ops' 'apps.ops',
'apps.resm'
] ]
MIDDLEWARE = [ MIDDLEWARE = [