feat: paper list 加 pdf_url / xml_url 直链字段 + pg_trgm GIN 索引
serializers: PaperListSerializer 加 pdf_url / xml_url SerializerMethodField,基于 publication_date + safe_doi 后端拼 absolute_uri;has_fulltext_{pdf,xml}=False 或 publication_date 缺失返空串。LLM 客户端从 list 一次拿到直链,不必拼 URL。
migration 0006: CREATE EXTENSION IF NOT EXISTS pg_trgm + 3 列 GIN 索引(title / first_author / first_author_institution),根治 SearchFilter 跨列 ILIKE '%xxx%' 全表扫 timeout(高频词如 cement 原本 30s+,加索引后几十 ms)。
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
e8320bce05
commit
6a5a5d7b6b
|
|
@ -0,0 +1,37 @@
|
|||
"""为 SearchFilter 的 title / first_author / first_author_institution 三列建 pg_trgm GIN 索引。
|
||||
|
||||
原因:DRF SearchFilter 走 `column ILIKE '%keyword%'` 前后通配,B-tree 索引救不了;
|
||||
高频关键词 + 几十万行表会 30s+ timeout。pg_trgm 给 trigram 建 GIN 索引,
|
||||
ILIKE '%xxx%' 走索引,降到几十 ms。
|
||||
|
||||
pg_trgm 是 PostgreSQL contrib 扩展,首次启用需要 DB superuser 权限;
|
||||
CREATE EXTENSION IF NOT EXISTS 幂等,迁移可安全重跑。
|
||||
"""
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('resm', '0005_alter_paper_fetch_status'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RunSQL(
|
||||
sql=[
|
||||
"CREATE EXTENSION IF NOT EXISTS pg_trgm;",
|
||||
"CREATE INDEX IF NOT EXISTS paper_title_trgm "
|
||||
"ON resm_paper USING gin (title gin_trgm_ops);",
|
||||
"CREATE INDEX IF NOT EXISTS paper_first_author_trgm "
|
||||
"ON resm_paper USING gin (first_author gin_trgm_ops);",
|
||||
"CREATE INDEX IF NOT EXISTS paper_first_author_institution_trgm "
|
||||
"ON resm_paper USING gin (first_author_institution gin_trgm_ops);",
|
||||
],
|
||||
reverse_sql=[
|
||||
"DROP INDEX IF EXISTS paper_first_author_institution_trgm;",
|
||||
"DROP INDEX IF EXISTS paper_first_author_trgm;",
|
||||
"DROP INDEX IF EXISTS paper_title_trgm;",
|
||||
# pg_trgm 扩展不 drop —— 其他 app / 表可能也在用
|
||||
],
|
||||
),
|
||||
]
|
||||
|
|
@ -5,6 +5,8 @@ from .models import Paper
|
|||
|
||||
class PaperListSerializer(CustomModelSerializer):
|
||||
abstract = serializers.SerializerMethodField()
|
||||
pdf_url = serializers.SerializerMethodField()
|
||||
xml_url = serializers.SerializerMethodField()
|
||||
|
||||
class Meta:
|
||||
model = Paper
|
||||
|
|
@ -16,3 +18,23 @@ class PaperListSerializer(CustomModelSerializer):
|
|||
if abs_obj is None:
|
||||
return ""
|
||||
return abs_obj.abstract or ""
|
||||
|
||||
def _media_url(self, obj, ext: str) -> str:
|
||||
"""拼 /media/papers/<Y>/<M>/<D>/<safe_doi>.<ext> 静态直链。
|
||||
|
||||
date 缺失(unknown 目录)/ doi 缺失返空串 —— LLM 拿到空串就知道没法直链下,
|
||||
改走 get_paper / paper_pdf_view。
|
||||
"""
|
||||
if obj.publication_date is None or not obj.doi:
|
||||
return ""
|
||||
safe_doi = obj.doi.replace("/", "_")
|
||||
d = obj.publication_date
|
||||
path = f"/media/papers/{d.year}/{d.month}/{d.day}/{safe_doi}.{ext}"
|
||||
request = self.context.get("request")
|
||||
return request.build_absolute_uri(path) if request else path
|
||||
|
||||
def get_pdf_url(self, obj) -> str:
|
||||
return self._media_url(obj, "pdf") if obj.has_fulltext_pdf else ""
|
||||
|
||||
def get_xml_url(self, obj) -> str:
|
||||
return self._media_url(obj, "xml") if obj.has_fulltext_xml else ""
|
||||
|
|
|
|||
Loading…
Reference in New Issue