from django.db import models from apps.utils.models import BaseModel from django.conf import settings import os # Create your models here. class Paper(BaseModel): # ===== 全局唯一标识 ===== openalex_id = models.TextField(unique=True, verbose_name="OpenAlex ID", null=True, blank=True) doi = models.TextField(unique=True, verbose_name="DOI") # ===== 基本信息 ===== type = models.CharField(max_length=20, db_index=True) title = models.TextField() publication_date = models.DateField(null=True, blank=True) publication_year = models.IntegerField(db_index=True) # ===== 作者(最小可用集)===== first_author = models.TextField(null=True, blank=True) first_author_institution = models.TextField(null=True, blank=True) # ===== 期刊 ===== publication_name = models.TextField(null=True, blank=True) # ===== OA 元信息 ===== is_oa = models.BooleanField(default=False, db_index=True) oa_url = models.TextField(null=True, blank=True) # ===== 状态位(调度核心)===== has_abstract = models.BooleanField(default=False, db_index=True) has_abstract_xml = models.BooleanField(default=False, db_index=True) has_fulltext = models.BooleanField(default=False, db_index=True) has_fulltext_xml = models.BooleanField(default=False, db_index=True) has_fulltext_pdf = models.BooleanField(default=False, db_index=True) fetch_status = models.CharField( max_length=20, default="meta_only", # meta_only / abstract_ready / fulltext_ready / parsed / failed db_index=True ) fail_reason = models.TextField(null=True, blank=True) source = models.CharField( max_length=20, default="openalex", verbose_name="元数据来源" ) o_search = models.TextField(default="cement") o_keywords = models.TextField(null=True, blank=True) def init_save_dir(self): publication_date = self.publication_date paper_dir = os.path.join( settings.BASE_DIR, "media/papers", str(publication_date.year), str(publication_date.month), str(publication_date.day) ) os.makedirs(paper_dir, exist_ok=True) return paper_dir def save_file_xml(self, content): safe_doi = self.doi.replace("/", "_") paper_file = os.path.join(self.init_save_dir(), f"{safe_doi}.xml") with open(paper_file, "wb") as f: f.write(content.encode("utf-8")) def save_file_pdf(self, content): safe_doi = self.doi.replace("/", "_") paper_file = os.path.join(self.init_save_dir(), f"{safe_doi}.pdf") with open(paper_file, "wb") as f: f.write(content) def save_fail_reason(self, reason): if self.fail_reason: self.fail_reason += f";{reason}" else: self.fail_reason = f";{reason}" self.save(update_fields=["fail_reason", "update_time"]) class PaperAbstract(BaseModel): paper = models.OneToOneField( Paper, on_delete=models.CASCADE, related_name="abstract" ) abstract = models.TextField() source = models.CharField( max_length=20, verbose_name="摘要来源" # openalex / elsevier / crossref )