diff --git a/apps/resm/migrations/0006_pg_trgm_index.py b/apps/resm/migrations/0006_pg_trgm_index.py new file mode 100644 index 0000000..7a48924 --- /dev/null +++ b/apps/resm/migrations/0006_pg_trgm_index.py @@ -0,0 +1,37 @@ +"""为 SearchFilter 的 title / first_author / first_author_institution 三列建 pg_trgm GIN 索引。 + +原因:DRF SearchFilter 走 `column ILIKE '%keyword%'` 前后通配,B-tree 索引救不了; +高频关键词 + 几十万行表会 30s+ timeout。pg_trgm 给 trigram 建 GIN 索引, +ILIKE '%xxx%' 走索引,降到几十 ms。 + +pg_trgm 是 PostgreSQL contrib 扩展,首次启用需要 DB superuser 权限; +CREATE EXTENSION IF NOT EXISTS 幂等,迁移可安全重跑。 +""" +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('resm', '0005_alter_paper_fetch_status'), + ] + + operations = [ + migrations.RunSQL( + sql=[ + "CREATE EXTENSION IF NOT EXISTS pg_trgm;", + "CREATE INDEX IF NOT EXISTS paper_title_trgm " + "ON resm_paper USING gin (title gin_trgm_ops);", + "CREATE INDEX IF NOT EXISTS paper_first_author_trgm " + "ON resm_paper USING gin (first_author gin_trgm_ops);", + "CREATE INDEX IF NOT EXISTS paper_first_author_institution_trgm " + "ON resm_paper USING gin (first_author_institution gin_trgm_ops);", + ], + reverse_sql=[ + "DROP INDEX IF EXISTS paper_first_author_institution_trgm;", + "DROP INDEX IF EXISTS paper_first_author_trgm;", + "DROP INDEX IF EXISTS paper_title_trgm;", + # pg_trgm 扩展不 drop —— 其他 app / 表可能也在用 + ], + ), + ] diff --git a/apps/resm/serializers.py b/apps/resm/serializers.py index 73efee7..be5bac7 100644 --- a/apps/resm/serializers.py +++ b/apps/resm/serializers.py @@ -5,6 +5,8 @@ from .models import Paper class PaperListSerializer(CustomModelSerializer): abstract = serializers.SerializerMethodField() + pdf_url = serializers.SerializerMethodField() + xml_url = serializers.SerializerMethodField() class Meta: model = Paper @@ -16,3 +18,23 @@ class PaperListSerializer(CustomModelSerializer): if abs_obj is None: return "" return abs_obj.abstract or "" + + def _media_url(self, obj, ext: str) -> str: + """拼 /media/papers////. 静态直链。 + + date 缺失(unknown 目录)/ doi 缺失返空串 —— LLM 拿到空串就知道没法直链下, + 改走 get_paper / paper_pdf_view。 + """ + if obj.publication_date is None or not obj.doi: + return "" + safe_doi = obj.doi.replace("/", "_") + d = obj.publication_date + path = f"/media/papers/{d.year}/{d.month}/{d.day}/{safe_doi}.{ext}" + request = self.context.get("request") + return request.build_absolute_uri(path) if request else path + + def get_pdf_url(self, obj) -> str: + return self._media_url(obj, "pdf") if obj.has_fulltext_pdf else "" + + def get_xml_url(self, obj) -> str: + return self._media_url(obj, "xml") if obj.has_fulltext_xml else ""