feat: paper list 加 pdf_url / xml_url 直链字段 + pg_trgm GIN 索引
serializers: PaperListSerializer 加 pdf_url / xml_url SerializerMethodField,基于 publication_date + safe_doi 后端拼 absolute_uri;has_fulltext_{pdf,xml}=False 或 publication_date 缺失返空串。LLM 客户端从 list 一次拿到直链,不必拼 URL。
migration 0006: CREATE EXTENSION IF NOT EXISTS pg_trgm + 3 列 GIN 索引(title / first_author / first_author_institution),根治 SearchFilter 跨列 ILIKE '%xxx%' 全表扫 timeout(高频词如 cement 原本 30s+,加索引后几十 ms)。
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
e8320bce05
commit
6a5a5d7b6b
|
|
@ -0,0 +1,37 @@
|
||||||
|
"""为 SearchFilter 的 title / first_author / first_author_institution 三列建 pg_trgm GIN 索引。
|
||||||
|
|
||||||
|
原因:DRF SearchFilter 走 `column ILIKE '%keyword%'` 前后通配,B-tree 索引救不了;
|
||||||
|
高频关键词 + 几十万行表会 30s+ timeout。pg_trgm 给 trigram 建 GIN 索引,
|
||||||
|
ILIKE '%xxx%' 走索引,降到几十 ms。
|
||||||
|
|
||||||
|
pg_trgm 是 PostgreSQL contrib 扩展,首次启用需要 DB superuser 权限;
|
||||||
|
CREATE EXTENSION IF NOT EXISTS 幂等,迁移可安全重跑。
|
||||||
|
"""
|
||||||
|
from django.db import migrations
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('resm', '0005_alter_paper_fetch_status'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.RunSQL(
|
||||||
|
sql=[
|
||||||
|
"CREATE EXTENSION IF NOT EXISTS pg_trgm;",
|
||||||
|
"CREATE INDEX IF NOT EXISTS paper_title_trgm "
|
||||||
|
"ON resm_paper USING gin (title gin_trgm_ops);",
|
||||||
|
"CREATE INDEX IF NOT EXISTS paper_first_author_trgm "
|
||||||
|
"ON resm_paper USING gin (first_author gin_trgm_ops);",
|
||||||
|
"CREATE INDEX IF NOT EXISTS paper_first_author_institution_trgm "
|
||||||
|
"ON resm_paper USING gin (first_author_institution gin_trgm_ops);",
|
||||||
|
],
|
||||||
|
reverse_sql=[
|
||||||
|
"DROP INDEX IF EXISTS paper_first_author_institution_trgm;",
|
||||||
|
"DROP INDEX IF EXISTS paper_first_author_trgm;",
|
||||||
|
"DROP INDEX IF EXISTS paper_title_trgm;",
|
||||||
|
# pg_trgm 扩展不 drop —— 其他 app / 表可能也在用
|
||||||
|
],
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
@ -5,6 +5,8 @@ from .models import Paper
|
||||||
|
|
||||||
class PaperListSerializer(CustomModelSerializer):
|
class PaperListSerializer(CustomModelSerializer):
|
||||||
abstract = serializers.SerializerMethodField()
|
abstract = serializers.SerializerMethodField()
|
||||||
|
pdf_url = serializers.SerializerMethodField()
|
||||||
|
xml_url = serializers.SerializerMethodField()
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
model = Paper
|
model = Paper
|
||||||
|
|
@ -16,3 +18,23 @@ class PaperListSerializer(CustomModelSerializer):
|
||||||
if abs_obj is None:
|
if abs_obj is None:
|
||||||
return ""
|
return ""
|
||||||
return abs_obj.abstract or ""
|
return abs_obj.abstract or ""
|
||||||
|
|
||||||
|
def _media_url(self, obj, ext: str) -> str:
|
||||||
|
"""拼 /media/papers/<Y>/<M>/<D>/<safe_doi>.<ext> 静态直链。
|
||||||
|
|
||||||
|
date 缺失(unknown 目录)/ doi 缺失返空串 —— LLM 拿到空串就知道没法直链下,
|
||||||
|
改走 get_paper / paper_pdf_view。
|
||||||
|
"""
|
||||||
|
if obj.publication_date is None or not obj.doi:
|
||||||
|
return ""
|
||||||
|
safe_doi = obj.doi.replace("/", "_")
|
||||||
|
d = obj.publication_date
|
||||||
|
path = f"/media/papers/{d.year}/{d.month}/{d.day}/{safe_doi}.{ext}"
|
||||||
|
request = self.context.get("request")
|
||||||
|
return request.build_absolute_uri(path) if request else path
|
||||||
|
|
||||||
|
def get_pdf_url(self, obj) -> str:
|
||||||
|
return self._media_url(obj, "pdf") if obj.has_fulltext_pdf else ""
|
||||||
|
|
||||||
|
def get_xml_url(self, obj) -> str:
|
||||||
|
return self._media_url(obj, "xml") if obj.has_fulltext_xml else ""
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue