zcbot/core/storage/models.py

268 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""SQLAlchemy 2.x ORM models,对应 DESIGN.md §7.4 schema。
4 张表:users / tasks / messages / usage_events。
- users 行在 web 入口按需 INSERT(`/v1/auth/login_password` 实际创行 / `/v1/auth/login`
platform_key 入口 ensure_user_row);email UNIQUE(0005)给 login lookup 用,
password_hash 是 bcrypt(`bcrypt.hashpw`),只在邮箱密码登录时有值
- messages.payload 用 jsonb,GIN 索引在 migration 里建;messages.model_profile
(0006)只在 assistant 行有值,标注产生该条 message 的模型
- run 状态承载在 tasks.run_status / run_error 两列(0004 合并 runs 表);
原 runs / 旧 usage_events 表 0004 删 — 详 DESIGN §7.4 取舍 / PROGRESS 05-18
- usage_events(0006 v2 形态):per-event 多态用量行,kind=chat/image/video/...,
units 列 JSONB(chat 装 tokens_in/out,image 装 count/size 等)。用户级聚合
走 (user_id, created_at) 复合索引,JOIN-free。详 DESIGN §7.4 / PROGRESS 05-19
"""
from __future__ import annotations
from datetime import datetime
from decimal import Decimal
from typing import Any, Optional
from uuid import UUID, uuid4
from sqlalchemy import (
BigInteger,
Boolean,
DateTime,
ForeignKey,
Integer,
Numeric,
Text,
UniqueConstraint,
func,
)
from sqlalchemy.dialects.postgresql import JSONB, UUID as PG_UUID
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
class Base(DeclarativeBase):
pass
class User(Base):
__tablename__ = "users"
user_id: Mapped[UUID] = mapped_column(PG_UUID(as_uuid=True), primary_key=True, default=uuid4)
email: Mapped[Optional[str]] = mapped_column(Text, nullable=True, unique=True)
oidc_subject: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
password_hash: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
plan: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
# 0016:平台登录注入的用户档案。name=显示名/姓名,user_name=平台账号名;均 nullable
# (platform_key 入口 ensure_user_row upsert 写;邮箱密码 / 历史行留空)。未来 OIDC
# 接管时由 ID token 的 name / preferred_username claim 注入,数据流不变。
name: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
user_name: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
# 0009:访问角色。'user'(默认)/ 'admin';仅 admin 可访问 /v1/admin/* 管理端点。
# 提管理员:main.py user role --email X --role admin。
role: Mapped[str] = mapped_column(Text, nullable=False, server_default="user")
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now(), nullable=False
)
class Task(Base):
__tablename__ = "tasks"
task_id: Mapped[UUID] = mapped_column(PG_UUID(as_uuid=True), primary_key=True, default=uuid4)
user_id: Mapped[UUID] = mapped_column(
PG_UUID(as_uuid=True), ForeignKey("users.user_id"), nullable=False
)
name: Mapped[str] = mapped_column(Text, nullable=False)
working_dir: Mapped[str] = mapped_column(Text, nullable=False)
skill: Mapped[str] = mapped_column(Text, nullable=False, default="")
description: Mapped[str] = mapped_column(Text, nullable=False, default="")
# 渠道来源(0011):web=网页端常规任务 / wechat=微信 ClawBot 常驻对话。
# 仅 INSERT 时由建 task 方写定,后续 upsert/save 不传 → 不覆盖。前端据此打徽章 + 置顶。
channel: Mapped[str] = mapped_column(Text, nullable=False, default="web", server_default="web")
status: Mapped[str] = mapped_column(Text, nullable=False, default="active")
model: Mapped[str] = mapped_column(Text, nullable=False, default="")
model_profile: Mapped[str] = mapped_column(Text, nullable=False, default="")
reasoning_effort: Mapped[str] = mapped_column(Text, nullable=False, default="")
tokens_prompt: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
tokens_completion: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
cost_cny: Mapped[Decimal] = mapped_column(Numeric(12, 6), nullable=False, default=0)
# 当前 in-flight 状态(原 runs 表合并入,DESIGN §7.4 简化 / 0004 migration):
# idle / running / cancelling / error;ok / cancelled 收尾直接回 idle,
# 只有 error 是持久终态(下次起新 run 时由 post_message 清掉)
run_status: Mapped[str] = mapped_column(Text, nullable=False, default="idle")
run_error: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now(), nullable=False
)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now(), onupdate=func.now(), nullable=False
)
# 软删除标记(0010):置时间即"逻辑删除",从列表隐藏但 DB 行 / messages / usage_events /
# 工作目录文件全部保留(留作语料 + 可恢复)。NULL = 未删。物理删只在管理员清理时走。
deleted_at: Mapped[Optional[datetime]] = mapped_column(
DateTime(timezone=True), nullable=True
)
class Message(Base):
__tablename__ = "messages"
__table_args__ = (UniqueConstraint("task_id", "idx", name="uq_messages_task_idx"),)
message_id: Mapped[UUID] = mapped_column(PG_UUID(as_uuid=True), primary_key=True, default=uuid4)
task_id: Mapped[UUID] = mapped_column(
PG_UUID(as_uuid=True),
ForeignKey("tasks.task_id", ondelete="CASCADE"),
nullable=False,
)
idx: Mapped[int] = mapped_column(Integer, nullable=False)
payload: Mapped[dict[str, Any]] = mapped_column(JSONB, nullable=False)
tokens_in: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
tokens_out: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
# 0006:产生该 message 的模型(只在 assistant 行有值;user/tool/system 为 NULL)。
# 跟 usage_events.model_profile 写入一致,JOIN-free 时按 message 直查也能拿到。
model_profile: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now(), nullable=False
)
class UsageEvent(Base):
"""per-event 用量记账(0006 v2 形态)。
一行 = 一次产生成本的调用。chat 类型由 loop 在 assistant message 入库后写入;
未来的媒体工具(image/video/audio)在 tool execute 完后由 loop 顺手记账。
units 列 polymorphic JSONB —— chat: {"tokens_in": N, "tokens_out": M};
image: {"count": K, "size": "1024x1024"};按 kind 约定。
按 user 聚合的统计 query 走 (user_id, created_at) 索引,不 JOIN tasks 表。
"""
__tablename__ = "usage_events"
event_id: Mapped[UUID] = mapped_column(PG_UUID(as_uuid=True), primary_key=True, default=uuid4)
user_id: Mapped[UUID] = mapped_column(
PG_UUID(as_uuid=True), ForeignKey("users.user_id"), nullable=False
)
task_id: Mapped[UUID] = mapped_column(
PG_UUID(as_uuid=True),
ForeignKey("tasks.task_id", ondelete="CASCADE"),
nullable=False,
)
message_id: Mapped[Optional[UUID]] = mapped_column(
PG_UUID(as_uuid=True),
ForeignKey("messages.message_id", ondelete="SET NULL"),
nullable=True,
)
kind: Mapped[str] = mapped_column(Text, nullable=False) # chat / image / video / audio / ...
model_profile: Mapped[str] = mapped_column(Text, nullable=False) # deepseek_v4.pro / dall-e-3 / ...
units: Mapped[dict[str, Any]] = mapped_column(JSONB, nullable=False)
cost_cny: Mapped[Decimal] = mapped_column(Numeric(12, 6), nullable=False, default=0)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now(), nullable=False
)
class UserDiskUsage(Base):
"""per-user 工作目录字节使用快照(0008,§7.5 #4 软配额表)。
每个 user_id 单行 upsert,lifespan 后台 task 周期(默 15min)扫描 user_root 落库;
write 前 gate(DockerExecutor / /v1/files/upload)查这表对比 yaml `quotas.disk_bytes_per_user`,
超额返 [Error] 硬阻。
扫描间隙写入会突破上限一点(race-tolerant,跟 image/video 配额一致接受);外部用户
开放前 OS 层 xfs prjquota 兜底真上限。详 DESIGN §7.5 #4 / PROGRESS。
"""
__tablename__ = "user_disk_usage"
user_id: Mapped[UUID] = mapped_column(
PG_UUID(as_uuid=True),
ForeignKey("users.user_id", ondelete="CASCADE"),
primary_key=True,
)
bytes_used: Mapped[int] = mapped_column(BigInteger, nullable=False, default=0)
file_count: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
scanned_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now(), nullable=False
)
class ScheduledJob(Base):
"""定时任务(0011,DESIGN §8.5)。
一行 = 一个"到点把 prompt 喂进 agent 主管线"的计划。本体 = cron+tz(何时)
+ prompt(做什么)+ mode(跑在哪);"发邮件"不是字段,是 agent 据 prompt 调
send_email 的动作。仅 notify(可空 JSONB)给"必达某邮箱"留确定性兜底。
守护循环(web/app.py lifespan `_scheduler_loop`,仿 _disk_scanner)每 ~30s 扫
`enabled AND deleted_at IS NULL AND next_run_at<=now()`,命中即复用 _run_agent_bg
起 run,跑完回写 last_* + croniter 算 next_run_at。mode:
- isolated(默认):每次新建临时 task,只带本 job 的 prompt,不继承历史 → 省 token
- persistent:绑定 bound_task_id 常驻 task,追加消息有跨天连续性
"""
__tablename__ = "scheduled_jobs"
job_id: Mapped[UUID] = mapped_column(PG_UUID(as_uuid=True), primary_key=True, default=uuid4)
user_id: Mapped[UUID] = mapped_column(
PG_UUID(as_uuid=True), ForeignKey("users.user_id", ondelete="CASCADE"), nullable=False
)
name: Mapped[str] = mapped_column(Text, nullable=False)
prompt: Mapped[str] = mapped_column(Text, nullable=False)
cron: Mapped[str] = mapped_column(Text, nullable=False) # 标准 5 段 cron
tz: Mapped[str] = mapped_column(Text, nullable=False, server_default="Asia/Shanghai")
mode: Mapped[str] = mapped_column(Text, nullable=False, server_default="isolated") # isolated|persistent
# persistent 模式绑定的常驻 task;task 软删/物理删后 SET NULL(下次触发当 isolated 兜底)
bound_task_id: Mapped[Optional[UUID]] = mapped_column(
PG_UUID(as_uuid=True), ForeignKey("tasks.task_id", ondelete="SET NULL"), nullable=True
)
skill: Mapped[str] = mapped_column(Text, nullable=False, server_default="") # 可选预载 skill
model_profile: Mapped[str] = mapped_column(Text, nullable=False, server_default="") # 可选模型覆盖
# 第 3 层可靠投递:{"channel":"email","to":"a@b.com"};NULL=不兜底(走 prompt 驱动/线程未读)
notify: Mapped[Optional[dict[str, Any]]] = mapped_column(JSONB, nullable=True)
enabled: Mapped[bool] = mapped_column(Boolean, nullable=False, server_default="true")
timeout_seconds: Mapped[int] = mapped_column(Integer, nullable=False, server_default="0") # 0=不限
next_run_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False)
last_run_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True)
last_status: Mapped[Optional[str]] = mapped_column(Text, nullable=True) # ok|error|skipped
last_error: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
last_task_id: Mapped[Optional[UUID]] = mapped_column(PG_UUID(as_uuid=True), nullable=True)
consecutive_failures: Mapped[int] = mapped_column(Integer, nullable=False, server_default="0")
run_count: Mapped[int] = mapped_column(Integer, nullable=False, server_default="0")
expires_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now(), nullable=False
)
deleted_at: Mapped[Optional[datetime]] = mapped_column(
DateTime(timezone=True), nullable=True
)
class ChannelBinding(Base):
"""微信渠道绑定(0015,DESIGN §8.7 渠道抽象)。
一行 = 一个用户在某渠道(`channel`)的一份绑定配置;PK=(user_id, channel) → 1 用户每渠道 1 行。
沿用本库「判别列 + JSONB 多态」范式(同 usage_events.kind+units / scheduled_jobs.notify):
各渠道配置字段不同,全装进 `config` JSONB,加渠道不动 schema、不再各建一表。
config 形态(敏感字段经 core/wechat/crypto.py 加密入 JSONB,绝不进沙箱/日志/API):
- channel='clawbot':{bot_token*, bot_im_id, user_im_id, base_url, latest_context_token*,
context_token_at(iso), chat_task_id(str)} —— *=密文;context_token 24h 窗口主动推靠它。
- channel='wecom':{wecom_userid} —— 企业成员 id,非密钥、明文;无条件推。
chat_task_id/FK、per-字段 NOT NULL 退到应用层校验,与 usage_events JSONB 同向取舍。)
"""
__tablename__ = "channel_bindings"
user_id: Mapped[UUID] = mapped_column(
PG_UUID(as_uuid=True),
ForeignKey("users.user_id", ondelete="CASCADE"),
primary_key=True,
)
channel: Mapped[str] = mapped_column(Text, primary_key=True) # clawbot | wecom | ...
status: Mapped[str] = mapped_column(Text, nullable=False, server_default="active") # active|revoked
config: Mapped[dict[str, Any]] = mapped_column(JSONB, nullable=False, default=dict)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now(), nullable=False
)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now(), nullable=False
)