From 425ea599376b43ca896a834f3f4751ca9d6e1aee Mon Sep 17 00:00:00 2001 From: caoqianming Date: Thu, 14 May 2026 10:41:44 +0800 Subject: [PATCH] =?UTF-8?q?core(=C2=A77=20B=20Step=201):=20Storage=20?= =?UTF-8?q?=E5=9F=BA=E5=BB=BA=20=E2=80=94=20SQLAlchemy=20ORM=20+=20alembic?= =?UTF-8?q?=20+=20db=20CLI?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - core/storage/{engine,models}.py: 5 表 ORM(users/tasks/messages/runs/ usage_events)+ session_scope 上下文 + 本地 sentinel user 初始化 - alembic 初版 migration 0001_initial_schema: messages.payload GIN 索引 + tasks (user_id, task_dir) 复合索引 + pgcrypto 扩展兜底 - cli.py: db upgrade/downgrade/current 子命令组;ZCBOT_DB_URL 未设 给 ASCII 报错 + exit 2(避开 Windows GBK 控制台编码问题) - requirements: +sqlalchemy>=2.0 +psycopg[binary]>=3.1 +alembic>=1.13 - DB URL 来自环境变量 ZCBOT_DB_URL,不引导 docker(用户给测试库地址) 已在远端测试 PG 跑通 db upgrade head + db current。Session/TaskState ORM 接入留 Step 2-3。 Co-Authored-By: Claude Opus 4.7 (1M context) --- PROGRESS.md | 25 +++- alembic.ini | 43 ++++++ cli.py | 45 +++++++ core/storage/__init__.py | 22 +++ core/storage/engine.py | 80 +++++++++++ core/storage/models.py | 124 +++++++++++++++++ db/migrations/env.py | 61 +++++++++ db/migrations/script.py.mako | 25 ++++ .../20260514_0930_0001_initial_schema.py | 125 ++++++++++++++++++ requirements.txt | 5 + 10 files changed, 549 insertions(+), 6 deletions(-) create mode 100644 alembic.ini create mode 100644 core/storage/__init__.py create mode 100644 core/storage/engine.py create mode 100644 core/storage/models.py create mode 100644 db/migrations/env.py create mode 100644 db/migrations/script.py.mako create mode 100644 db/migrations/versions/20260514_0930_0001_initial_schema.py diff --git a/PROGRESS.md b/PROGRESS.md index 127508f..bcbacb6 100644 --- a/PROGRESS.md +++ b/PROGRESS.md @@ -2,7 +2,7 @@ > 配合 `DESIGN.md`。本文件只记 phase 状态、决策偏差、文件量、下一步。 -最后更新:2026-05-12 +最后更新:2026-05-14 --- @@ -15,7 +15,7 @@ | 5 | Eval Suite | ⏸ 不做 | dogfooding 替代,probe 覆盖健康检查 | | 6 | 长任务工程化 | 🟡 | task + 恢复 ✅;双层记忆 ✅;context 压缩未做 | | 7 | 打磨 | ❌ | Docker 沙盒 / 更多 skill | -| §7 SaaS | DESIGN §7 路线 | 🟡 | A 事件流化 ✅;B(Storage 落 PG + Folder API)可立刻开 | +| §7 SaaS | DESIGN §7 路线 | 🟡 | A 事件流化 ✅;B 进行中(Step 1 基建 ✅;Step 2-6 待) | --- @@ -28,6 +28,8 @@ - **05-09 → 05-10 / §7 草案 + 导出**:DESIGN §7 初版(05-12 重写);`cli.py export ` + `core/export_docx.py`。 - **05-11 / 原子写 + 双层记忆 + §7 A**:`atomic_write_text` 接管 save;`core/memory.py`(core.md 入 prompt,extended/* 走索引);loop 事件流化(`sink.emit`)铺 SSE 路。 - **05-12 / §7 改写**:platform/core 多租户方案废弃,改 user-direct(folder-centric、task/messages 入 PG、no-subtask、hard cascade)。 +- **05-14 / §7.1 心智模型修正**:`Folder-centric` → **Task 一等公民 + Dir 文件副视图**(双视图正交,dir 不是 task 父容器);task_dir 留空=一次性对话 / 指定=项目化二分语义入文。 +- **05-14 / §7 B Step 1 基建**:`core/storage/{engine,models}.py` SQLAlchemy 2.x ORM(users/tasks/messages/runs/usage_events 5 表)+ alembic(初版 migration `0001_initial_schema`,GIN/复合索引)+ `cli db {upgrade,downgrade,current}` 子命令组 + 本地 sentinel user(`00000000-...`)+ `ZCBOT_DB_URL` 必填(未设给清晰报错,不引导 docker)。已在远端测试 PG 跑通 `db upgrade head`。 --- @@ -57,24 +59,35 @@ core/skills.py 81 core/task.py 64 core/memory.py 76 core/export_docx.py 372 +core/storage/__init__.py 22 ← §7 B Step 1 +core/storage/engine.py 80 ← §7 B Step 1 +core/storage/models.py 124 ← §7 B Step 1 tools/base.py 34 tools/fs.py 182 tools/shell.py 94 tools/run_python.py 84 tools/skill_tool.py 45 main.py 210 -cli.py 439 +cli.py 484 ← §7 B Step 1: +db 子命令组 +db/migrations/env.py 61 ← §7 B Step 1 +db/migrations/versions/ + 0001_initial_schema.py 125 ← §7 B Step 1 ───────────────────────────────── -Python 合计 ~2429 行 +Python 合计 ~2841 行 ``` -加 skills/ppt 脚本 ~600 行 + SKILL.md / references / config / prompts,总仓库约 3000 行。 +加 skills/ppt 脚本 ~600 行 + SKILL.md / references / config / prompts + alembic.ini,总仓库约 3500 行。 --- ## 下一步候选(性价比排序) -1. **§7 B 阶段**(~1 周)—— Storage 落 PG(单一实现)+ task_dir 双形态 + Folder API + No-subtask。**dogfood 即生效**(messages 进 DB → 全文搜可用)。里程碑:schema + alembic → ORM 接入 Session/TaskState → CLI 适配 → `migrate-from-fs` → Folder API + no-subtask SQL → 本地 sentinel user init。 +1. **§7 B 剩余 Step 2-6**(~4 天) + - Step 2 Session ORM 改造(append/load → PG messages 表) + - Step 3 TaskState ORM 改造(state.json → PG tasks 表;加 task_dir 字段语义) + - Step 4 main.py / cli.py 适配 ORM(`resolve_task_messages_path` 重构、`_cleanup_if_empty` 新双检查、UUID task_id 处理) + - Step 5 `cli migrate-from-fs`(导旧 workspace/tasks/*/ → PG,idempotent) + - Step 6 no-subtask SQL 校验(`new LIKE existing/%` cascade) 2. **Phase 6 context 三层压缩**(~1 天)—— 兜底,V4 长上下文一般用不到。 3. **Phase 7 更多 skill / 模型档案**(持续)。 4. **Proposal mermaid 预渲染**(~半天)—— ASCII 透传不够用时再上 `mmdc`。 diff --git a/alembic.ini b/alembic.ini new file mode 100644 index 0000000..3c302f7 --- /dev/null +++ b/alembic.ini @@ -0,0 +1,43 @@ +# Alembic config. DB URL is read from ZCBOT_DB_URL env var in env.py +# (NOT hardcoded here, so SaaS/local share the same alembic.ini). + +[alembic] +script_location = db/migrations +prepend_sys_path = . +version_path_separator = os +# timestamp + rev + slug, sortable by name +file_template = %%(year)d%%(month).2d%%(day).2d_%%(hour).2d%%(minute).2d_%%(rev)s_%%(slug)s + +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARN +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/cli.py b/cli.py index 3047f84..3ebf8f3 100644 --- a/cli.py +++ b/cli.py @@ -37,6 +37,51 @@ def cli() -> None: """zcbot - 个人任务 agent""" +@cli.group() +def db() -> None: + """数据库管理 (alembic upgrade/downgrade/current)。需先 export ZCBOT_DB_URL。""" + + +def _alembic_cfg(): + from alembic.config import Config + return Config(str(ROOT / "alembic.ini")) + + +def _run_alembic(fn, *args) -> None: + """统一包一层友好出错(ZCBOT_DB_URL 未设置 / 连不上 → 简洁报错,不打 traceback)。""" + try: + fn(_alembic_cfg(), *args) + except RuntimeError as e: + click.echo(f"[err] {e}", err=True) + sys.exit(2) + except Exception as e: + click.echo(f"[err] {type(e).__name__}: {e}", err=True) + sys.exit(3) + + +@db.command("upgrade") +@click.argument("revision", default="head") +def db_upgrade(revision: str) -> None: + """alembic upgrade (default head).""" + from alembic import command + _run_alembic(command.upgrade, revision) + + +@db.command("downgrade") +@click.argument("revision") +def db_downgrade(revision: str) -> None: + """alembic downgrade (use -1 for one step, base for all).""" + from alembic import command + _run_alembic(command.downgrade, revision) + + +@db.command("current") +def db_current() -> None: + """alembic current -- show currently applied revision.""" + from alembic import command + _run_alembic(command.current) + + def _cleanup_if_empty(task_dir, session, console=None) -> bool: """切走前清理 task_dir。三条都满足才删: 1) session 没有 user 消息 diff --git a/core/storage/__init__.py b/core/storage/__init__.py new file mode 100644 index 0000000..f88421d --- /dev/null +++ b/core/storage/__init__.py @@ -0,0 +1,22 @@ +"""§7 B 阶段:Storage 落 PG。 + +入口: + from core.storage import get_engine, session_scope, ensure_local_sentinel + from core.storage.models import User, Task, Message, Run, UsageEvent + +ZCBOT_DB_URL 环境变量必填(本地连测试 / staging PG;SaaS 连生产 PG)。 +未设置时 get_engine() 抛 RuntimeError 并指引设置。 +""" +from .engine import ( + SENTINEL_USER_ID, + ensure_local_sentinel, + get_engine, + session_scope, +) + +__all__ = [ + "SENTINEL_USER_ID", + "ensure_local_sentinel", + "get_engine", + "session_scope", +] diff --git a/core/storage/engine.py b/core/storage/engine.py new file mode 100644 index 0000000..c7880c9 --- /dev/null +++ b/core/storage/engine.py @@ -0,0 +1,80 @@ +"""PG 连接 + Session factory + 本地 sentinel 初始化。 + +`ZCBOT_DB_URL` 必填,标准 SQLAlchemy URL,如: + postgresql+psycopg://user:pass@host:5432/zcbot + +未设置时 get_engine() 抛 RuntimeError 并打印指引(不引导 docker)。 +""" +from __future__ import annotations + +import os +from contextlib import contextmanager +from typing import Iterator, Optional + +from sqlalchemy import Engine, create_engine, select +from sqlalchemy.orm import Session, sessionmaker + +from .models import SENTINEL_USER_ID, User + +_engine: Optional[Engine] = None +_SessionLocal: Optional[sessionmaker[Session]] = None + + +_DB_URL_HINT = ( + "ZCBOT_DB_URL is not set.\n" + " export ZCBOT_DB_URL='postgresql+psycopg://user:pass@host:5432/dbname'\n" + " (local: dev/staging PG; SaaS: production PG)" +) + + +def _read_db_url() -> str: + url = os.environ.get("ZCBOT_DB_URL", "").strip() + if not url: + raise RuntimeError(_DB_URL_HINT) + return url + + +def get_engine() -> Engine: + """单例 engine。线程安全(SQLAlchemy 内置 pool)。""" + global _engine, _SessionLocal + if _engine is None: + url = _read_db_url() + _engine = create_engine(url, pool_pre_ping=True, future=True) + _SessionLocal = sessionmaker(bind=_engine, expire_on_commit=False, future=True) + return _engine + + +def get_sessionmaker() -> sessionmaker[Session]: + if _SessionLocal is None: + get_engine() + assert _SessionLocal is not None + return _SessionLocal + + +@contextmanager +def session_scope() -> Iterator[Session]: + """事务上下文:成功 commit,异常 rollback,总是 close。""" + sm = get_sessionmaker() + s = sm() + try: + yield s + s.commit() + except Exception: + s.rollback() + raise + finally: + s.close() + + +def ensure_local_sentinel() -> None: + """本地形态:若 users 表无 sentinel 行则 INSERT。 + + 本地 CLI 启动时调用一次,SaaS 形态不调用(用户由 auth 流程创建)。 + 幂等。 + """ + with session_scope() as s: + existing = s.execute( + select(User).where(User.user_id == SENTINEL_USER_ID) + ).scalar_one_or_none() + if existing is None: + s.add(User(user_id=SENTINEL_USER_ID)) diff --git a/core/storage/models.py b/core/storage/models.py new file mode 100644 index 0000000..a84a0dc --- /dev/null +++ b/core/storage/models.py @@ -0,0 +1,124 @@ +"""SQLAlchemy 2.x ORM models,对应 DESIGN.md §7.4 schema。 + +5 张表:users / tasks / messages / runs / usage_events。 +- users 本地形态固定 INSERT sentinel(`00000000-...`) +- messages.payload 用 jsonb,GIN 索引在 migration 里建 +- runs / usage_events 在 B 阶段先建表,真正写入要等 D 阶段(HTTP /v1 + run 生命周期) +""" +from __future__ import annotations + +from datetime import datetime +from decimal import Decimal +from typing import Any, Optional +from uuid import UUID, uuid4 + +from sqlalchemy import ( + BigInteger, + DateTime, + ForeignKey, + Integer, + Numeric, + Text, + UniqueConstraint, + func, +) +from sqlalchemy.dialects.postgresql import JSONB, UUID as PG_UUID +from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column + + +class Base(DeclarativeBase): + pass + + +# 本地单用户 sentinel —— 所有本地 task 都 FK 到这一行 +SENTINEL_USER_ID: UUID = UUID("00000000-0000-0000-0000-000000000000") + + +class User(Base): + __tablename__ = "users" + + user_id: Mapped[UUID] = mapped_column(PG_UUID(as_uuid=True), primary_key=True, default=uuid4) + email: Mapped[Optional[str]] = mapped_column(Text, nullable=True) + oidc_subject: Mapped[Optional[str]] = mapped_column(Text, nullable=True) + password_hash: Mapped[Optional[str]] = mapped_column(Text, nullable=True) + plan: Mapped[Optional[str]] = mapped_column(Text, nullable=True) + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), server_default=func.now(), nullable=False + ) + + +class Task(Base): + __tablename__ = "tasks" + + task_id: Mapped[UUID] = mapped_column(PG_UUID(as_uuid=True), primary_key=True, default=uuid4) + user_id: Mapped[UUID] = mapped_column( + PG_UUID(as_uuid=True), ForeignKey("users.user_id"), nullable=False + ) + task_dir: Mapped[str] = mapped_column(Text, nullable=False) + mode: Mapped[str] = mapped_column(Text, nullable=False, default="") + description: Mapped[str] = mapped_column(Text, nullable=False, default="") + status: Mapped[str] = mapped_column(Text, nullable=False, default="active") + model: Mapped[str] = mapped_column(Text, nullable=False, default="") + model_profile: Mapped[str] = mapped_column(Text, nullable=False, default="") + reasoning_effort: Mapped[str] = mapped_column(Text, nullable=False, default="") + tokens_prompt: Mapped[int] = mapped_column(Integer, nullable=False, default=0) + tokens_completion: Mapped[int] = mapped_column(Integer, nullable=False, default=0) + cost_usd: Mapped[Decimal] = mapped_column(Numeric(12, 6), nullable=False, default=0) + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), server_default=func.now(), nullable=False + ) + updated_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), server_default=func.now(), onupdate=func.now(), nullable=False + ) + + +class Message(Base): + __tablename__ = "messages" + __table_args__ = (UniqueConstraint("task_id", "idx", name="uq_messages_task_idx"),) + + message_id: Mapped[UUID] = mapped_column(PG_UUID(as_uuid=True), primary_key=True, default=uuid4) + task_id: Mapped[UUID] = mapped_column( + PG_UUID(as_uuid=True), + ForeignKey("tasks.task_id", ondelete="CASCADE"), + nullable=False, + ) + idx: Mapped[int] = mapped_column(Integer, nullable=False) + payload: Mapped[dict[str, Any]] = mapped_column(JSONB, nullable=False) + tokens_in: Mapped[Optional[int]] = mapped_column(Integer, nullable=True) + tokens_out: Mapped[Optional[int]] = mapped_column(Integer, nullable=True) + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), server_default=func.now(), nullable=False + ) + + +class Run(Base): + __tablename__ = "runs" + + run_id: Mapped[UUID] = mapped_column(PG_UUID(as_uuid=True), primary_key=True, default=uuid4) + task_id: Mapped[UUID] = mapped_column( + PG_UUID(as_uuid=True), + ForeignKey("tasks.task_id", ondelete="CASCADE"), + nullable=False, + ) + status: Mapped[str] = mapped_column(Text, nullable=False, default="pending") + started_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True) + finished_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True) + error: Mapped[Optional[str]] = mapped_column(Text, nullable=True) + tokens_p: Mapped[int] = mapped_column(Integer, nullable=False, default=0) + tokens_c: Mapped[int] = mapped_column(Integer, nullable=False, default=0) + + +class UsageEvent(Base): + """append-only 审计。task_id / run_id 不 FK,task 硬删后审计仍存活(§7.4)。""" + + __tablename__ = "usage_events" + + id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True) + user_id: Mapped[UUID] = mapped_column(PG_UUID(as_uuid=True), nullable=False) + task_id: Mapped[Optional[UUID]] = mapped_column(PG_UUID(as_uuid=True), nullable=True) + run_id: Mapped[Optional[UUID]] = mapped_column(PG_UUID(as_uuid=True), nullable=True) + kind: Mapped[str] = mapped_column(Text, nullable=False) + value: Mapped[Decimal] = mapped_column(Numeric(20, 8), nullable=False) + ts: Mapped[datetime] = mapped_column( + DateTime(timezone=True), server_default=func.now(), nullable=False + ) diff --git a/db/migrations/env.py b/db/migrations/env.py new file mode 100644 index 0000000..335f0a9 --- /dev/null +++ b/db/migrations/env.py @@ -0,0 +1,61 @@ +"""Alembic env.py -- read DB URL from ZCBOT_DB_URL, metadata from core.storage.models.""" +from __future__ import annotations + +import os +import sys +from logging.config import fileConfig +from pathlib import Path + +from alembic import context +from sqlalchemy import engine_from_config, pool + +# Make project root importable so we can pull in core.storage.models +ROOT = Path(__file__).resolve().parent.parent.parent +sys.path.insert(0, str(ROOT)) + +from core.storage.models import Base # noqa: E402 + +config = context.config + +if config.config_file_name is not None: + fileConfig(config.config_file_name) + +# Inject URL from env var (not hardcoded in alembic.ini) +db_url = os.environ.get("ZCBOT_DB_URL", "").strip() +if not db_url: + raise RuntimeError( + "ZCBOT_DB_URL is not set.\n" + " export ZCBOT_DB_URL='postgresql+psycopg://user:pass@host:5432/dbname'" + ) +config.set_main_option("sqlalchemy.url", db_url) + +target_metadata = Base.metadata + + +def run_migrations_offline() -> None: + context.configure( + url=db_url, + target_metadata=target_metadata, + literal_binds=True, + dialect_opts={"paramstyle": "named"}, + ) + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online() -> None: + connectable = engine_from_config( + config.get_section(config.config_ini_section, {}), + prefix="sqlalchemy.", + poolclass=pool.NullPool, + ) + with connectable.connect() as connection: + context.configure(connection=connection, target_metadata=target_metadata) + with context.begin_transaction(): + context.run_migrations() + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/db/migrations/script.py.mako b/db/migrations/script.py.mako new file mode 100644 index 0000000..17dcba0 --- /dev/null +++ b/db/migrations/script.py.mako @@ -0,0 +1,25 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +revision: str = ${repr(up_revision)} +down_revision: Union[str, None] = ${repr(down_revision)} +branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)} +depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)} + + +def upgrade() -> None: + ${upgrades if upgrades else "pass"} + + +def downgrade() -> None: + ${downgrades if downgrades else "pass"} diff --git a/db/migrations/versions/20260514_0930_0001_initial_schema.py b/db/migrations/versions/20260514_0930_0001_initial_schema.py new file mode 100644 index 0000000..4230716 --- /dev/null +++ b/db/migrations/versions/20260514_0930_0001_initial_schema.py @@ -0,0 +1,125 @@ +"""initial schema -- users / tasks / messages / runs / usage_events + +Revision ID: 0001 +Revises: +Create Date: 2026-05-14 + +DESIGN.md section 7.4 schema. First migration. +- pgcrypto extension fallback (PG 13+ has gen_random_uuid built-in; + older versions need the extension). +- messages.payload GIN index (jsonb_path_ops). +- tasks (user_id, task_dir) and (user_id, status) composite indexes. +- Local sentinel user is INSERTed by core.storage.ensure_local_sentinel + at CLI startup, NOT in this migration (avoids stray sentinel rows on + the SaaS instance). +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +revision: str = "0001" +down_revision: Union[str, None] = None +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.execute("CREATE EXTENSION IF NOT EXISTS pgcrypto") + + op.create_table( + "users", + sa.Column("user_id", postgresql.UUID(as_uuid=True), primary_key=True, + server_default=sa.text("gen_random_uuid()")), + sa.Column("email", sa.Text(), nullable=True), + sa.Column("oidc_subject", sa.Text(), nullable=True), + sa.Column("password_hash", sa.Text(), nullable=True), + sa.Column("plan", sa.Text(), nullable=True), + sa.Column("created_at", sa.DateTime(timezone=True), + server_default=sa.text("now()"), nullable=False), + ) + + op.create_table( + "tasks", + sa.Column("task_id", postgresql.UUID(as_uuid=True), primary_key=True, + server_default=sa.text("gen_random_uuid()")), + sa.Column("user_id", postgresql.UUID(as_uuid=True), + sa.ForeignKey("users.user_id"), nullable=False), + sa.Column("task_dir", sa.Text(), nullable=False), + sa.Column("mode", sa.Text(), nullable=False, server_default=""), + sa.Column("description", sa.Text(), nullable=False, server_default=""), + sa.Column("status", sa.Text(), nullable=False, server_default="active"), + sa.Column("model", sa.Text(), nullable=False, server_default=""), + sa.Column("model_profile", sa.Text(), nullable=False, server_default=""), + sa.Column("reasoning_effort", sa.Text(), nullable=False, server_default=""), + sa.Column("tokens_prompt", sa.Integer(), nullable=False, server_default="0"), + sa.Column("tokens_completion", sa.Integer(), nullable=False, server_default="0"), + sa.Column("cost_usd", sa.Numeric(12, 6), nullable=False, server_default="0"), + sa.Column("created_at", sa.DateTime(timezone=True), + server_default=sa.text("now()"), nullable=False), + sa.Column("updated_at", sa.DateTime(timezone=True), + server_default=sa.text("now()"), nullable=False), + ) + op.create_index("ix_tasks_user_task_dir", "tasks", ["user_id", "task_dir"]) + op.create_index("ix_tasks_user_status", "tasks", ["user_id", "status"]) + + op.create_table( + "messages", + sa.Column("message_id", postgresql.UUID(as_uuid=True), primary_key=True, + server_default=sa.text("gen_random_uuid()")), + sa.Column("task_id", postgresql.UUID(as_uuid=True), + sa.ForeignKey("tasks.task_id", ondelete="CASCADE"), nullable=False), + sa.Column("idx", sa.Integer(), nullable=False), + sa.Column("payload", postgresql.JSONB(), nullable=False), + sa.Column("tokens_in", sa.Integer(), nullable=True), + sa.Column("tokens_out", sa.Integer(), nullable=True), + sa.Column("created_at", sa.DateTime(timezone=True), + server_default=sa.text("now()"), nullable=False), + sa.UniqueConstraint("task_id", "idx", name="uq_messages_task_idx"), + ) + op.create_index( + "ix_messages_payload_gin", "messages", ["payload"], + postgresql_using="gin", postgresql_ops={"payload": "jsonb_path_ops"}, + ) + + op.create_table( + "runs", + sa.Column("run_id", postgresql.UUID(as_uuid=True), primary_key=True, + server_default=sa.text("gen_random_uuid()")), + sa.Column("task_id", postgresql.UUID(as_uuid=True), + sa.ForeignKey("tasks.task_id", ondelete="CASCADE"), nullable=False), + sa.Column("status", sa.Text(), nullable=False, server_default="pending"), + sa.Column("started_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("finished_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("error", sa.Text(), nullable=True), + sa.Column("tokens_p", sa.Integer(), nullable=False, server_default="0"), + sa.Column("tokens_c", sa.Integer(), nullable=False, server_default="0"), + ) + op.create_index("ix_runs_task", "runs", ["task_id"]) + + op.create_table( + "usage_events", + sa.Column("id", sa.BigInteger(), primary_key=True, autoincrement=True), + sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("task_id", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("run_id", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("kind", sa.Text(), nullable=False), + sa.Column("value", sa.Numeric(20, 8), nullable=False), + sa.Column("ts", sa.DateTime(timezone=True), + server_default=sa.text("now()"), nullable=False), + ) + op.create_index("ix_usage_user_ts", "usage_events", ["user_id", "ts"]) + + +def downgrade() -> None: + op.drop_index("ix_usage_user_ts", table_name="usage_events") + op.drop_table("usage_events") + op.drop_index("ix_runs_task", table_name="runs") + op.drop_table("runs") + op.drop_index("ix_messages_payload_gin", table_name="messages") + op.drop_table("messages") + op.drop_index("ix_tasks_user_status", table_name="tasks") + op.drop_index("ix_tasks_user_task_dir", table_name="tasks") + op.drop_table("tasks") + op.drop_table("users") diff --git a/requirements.txt b/requirements.txt index 50d5740..8817068 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,3 +10,8 @@ matplotlib>=3.8.0 # 素材摄取: PDF/DOCX/PPTX/XLSX/HTML/URL → Markdown (ppt 阶段零 + proposal 阶段零) markitdown[pdf,docx,pptx,xlsx]>=0.0.1 + +# §7 B 阶段: Storage 落 PG +sqlalchemy>=2.0.0 +psycopg[binary]>=3.1.0 +alembic>=1.13.0