321 lines
13 KiB
Python
321 lines
13 KiB
Python
"""装配入口: 读 config → 加载 capabilities/skills → 构造 LLM/tools/session/loop。
|
|
|
|
存储布局(§7.0 / §7.4):本地 + SaaS 共用 `workspace/` 根,只差 user_id:
|
|
|
|
PG tasks / messages ← 元数据 + 消息
|
|
workspace/users/<user_id>/<working_dir>/ ← 工作目录(用户起名,可多 task 共享)
|
|
workspace/users/<user_id>/.memory/{core.md, extended/} ← per-user 记忆(dotfile 隔离)
|
|
|
|
所有入口都走 web `/v1` + JWT(user_id = sub);dev SPA 走邮箱密码登录
|
|
(`users.email/password_hash`,bcrypt)、platform 服务端走 platform_key 登录。task_id / user_id 全 UUID;
|
|
state.json 已删除(元数据全在 PG)。
|
|
|
|
**新建 task 必须给 `name`**(任务显示名,DB 列 NOT NULL);**`working_dir` 可选**
|
|
(留空 → 用 name 作目录名;同 working_dir 多 task 自动共享 §7.1)。name 和 working_dir
|
|
都过同一份 `validate_task_name` 校验(简单名,不含 `/\\..`、不以 `.` 起头)。
|
|
`_cleanup_if_empty` 不 rmtree FS —— 同 working_dir 跨 task 复用,空 task 只删 DB 行。
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from typing import Optional, Tuple
|
|
from uuid import UUID, uuid4
|
|
|
|
import yaml
|
|
from rich.console import Console
|
|
|
|
from core.capabilities import ModelCapabilities
|
|
from core.llm import LLM
|
|
from core.loop import AgentLoop
|
|
from core.memory import memory_block
|
|
from core.paths import ROOT, from_db_path, to_db_path
|
|
from core.session import Session
|
|
from core.sinks import ConsoleEventSink
|
|
from core.skills import SkillRegistry
|
|
from core.storage import check_no_subtask
|
|
from core.task import TaskState
|
|
from tools.fs import EditTool, GlobTool, GrepTool, ReadTool, WriteTool
|
|
from tools.run_python import RunPythonTool
|
|
from tools.shell import ShellTool
|
|
from tools.skill_tool import LoadSkillTool
|
|
|
|
|
|
def load_config() -> dict:
|
|
return yaml.safe_load((ROOT / "config" / "agent.yaml").read_text(encoding="utf-8")) or {}
|
|
|
|
|
|
def resolve_workspace(workspace: Optional[str], cfg: Optional[dict] = None) -> Path:
|
|
cfg = cfg or load_config()
|
|
p = Path(workspace) if workspace else ROOT / cfg.get("workspace_dir", "workspace")
|
|
p.mkdir(parents=True, exist_ok=True)
|
|
return p
|
|
|
|
|
|
def user_root(workspace_dir: Path, user_id: UUID) -> Path:
|
|
"""per-user 子树根:`<workspace>/users/<user_id>/`。working_dir / `.memory/` 都在下面。"""
|
|
d = workspace_dir / "users" / str(user_id)
|
|
d.mkdir(parents=True, exist_ok=True)
|
|
return d
|
|
|
|
|
|
class InvalidTaskName(ValueError):
|
|
"""task name / working_dir 不合法(空 / 含分隔符 / dotfile 起头 / 超长)。"""
|
|
|
|
|
|
def validate_task_name(name: str) -> str:
|
|
"""返回 stripped name;非法抛 InvalidTaskName。
|
|
|
|
name 和 working_dir 共用一份规则:非空 / 不含 `/\\` 和 NUL / 不以 `.` 起头
|
|
(挡 `.memory` 等系统区)/ ≤ 255 字符。允许 CJK 与其他 Unicode 字符。
|
|
"""
|
|
n = (name or "").strip()
|
|
if not n:
|
|
raise InvalidTaskName("name 不能为空")
|
|
if len(n) > 255:
|
|
raise InvalidTaskName(f"name 超长(>255 字符): {n[:40]!r}...")
|
|
if any(c in n for c in ("/", "\\", "\x00")):
|
|
raise InvalidTaskName(f"name 不能含 `/` `\\` 或 NUL: {n!r}")
|
|
if n.startswith("."):
|
|
raise InvalidTaskName(
|
|
f"name 不能以 `.` 起头(保留给 .memory 等系统区): {n!r}"
|
|
)
|
|
return n
|
|
|
|
|
|
def working_dir_from_name(workspace_dir: Path, user_id: UUID, dir_name: str) -> Path:
|
|
"""`<workspace>/users/<user_id>/<dir_name>` 绝对路径。
|
|
|
|
入参 dir_name 由 `validate_task_name` 在入口校验过;本函数只拼路径,不 mkdir
|
|
(目录创建放在 task 创建入口 build_agent / web `/v1/tasks`,函数保持纯)。
|
|
"""
|
|
return user_root(workspace_dir, user_id) / dir_name
|
|
|
|
|
|
def resolve_task_id(
|
|
workspace_dir: Path,
|
|
task_id_arg: Optional[str],
|
|
resume: bool,
|
|
user_id: UUID,
|
|
working_dir_name: Optional[str] = None,
|
|
) -> Tuple[UUID, Path]:
|
|
"""返回 (task_id, working_dir 绝对路径)。
|
|
|
|
新建:`working_dir_name` 必填(调用方应已 fallback 到 name + 校验过),
|
|
工作目录 = `<workspace>/users/<uid>/<working_dir_name>/`。
|
|
Resume:`task_id_arg` 是完整 UUID 字符串(web 路由进来的总是 UUID),
|
|
working_dir 从 PG `tasks.working_dir` 读还原;`working_dir_name` 在 resume 时被忽略。
|
|
"""
|
|
if resume:
|
|
from sqlalchemy import select
|
|
from core.storage import session_scope
|
|
from core.storage.models import Task
|
|
|
|
tid = UUID(task_id_arg) if task_id_arg else None
|
|
if tid is None:
|
|
raise ValueError("resume 必须指定 task_id")
|
|
with session_scope() as s:
|
|
db_dir = s.execute(
|
|
select(Task.working_dir).where(Task.task_id == tid)
|
|
).scalar_one_or_none() or ""
|
|
if not db_dir:
|
|
raise ValueError(
|
|
f"task {tid} has empty working_dir in DB — should not happen "
|
|
"(new tasks require name + working_dir; legacy empty data was wiped)"
|
|
)
|
|
# DB 存的是 db 形态(相对 ROOT 或绝对),走 from_db_path 还原绝对
|
|
return tid, from_db_path(db_dir)
|
|
|
|
if not working_dir_name:
|
|
raise InvalidTaskName("new task 必须指定 working_dir(或留空 fallback 用 name)")
|
|
safe = validate_task_name(working_dir_name)
|
|
return uuid4(), working_dir_from_name(workspace_dir, user_id, safe)
|
|
|
|
|
|
def _build_system_prompt(
|
|
cfg: dict,
|
|
skills: SkillRegistry,
|
|
workspace_dir: Path,
|
|
tool_base: Path,
|
|
working_dir: Path,
|
|
user_id: UUID,
|
|
) -> str:
|
|
"""拼 system prompt: 模板 + skill 列表 + memory + 工作目录段。
|
|
|
|
new task 和 resume task 都走这里,memory 演化即时生效。memory 按 user_id 隔离。
|
|
"""
|
|
prompt = (ROOT / cfg["system_prompt"]).read_text(encoding="utf-8")
|
|
if skills.skills:
|
|
prompt += f"\n\n## 可用 skill (用 load_skill 加载完整指引)\n{skills.discovery_block()}"
|
|
prompt += memory_block(workspace_dir, user_id)
|
|
wd_abs = working_dir.resolve()
|
|
prompt += (
|
|
f"\n\n## 工作目录\n"
|
|
f"- cwd(用户启动时所在目录,只读用): `{tool_base}`\n"
|
|
f"- **task_dir(所有产物写到这里)**: `{wd_abs}`\n\n"
|
|
f"SKILL 文档里出现的 `<task_dir>` 占位符,一律指上面这个绝对路径。"
|
|
f"产物示例: `{wd_abs}/spec_lock.md`、"
|
|
f"`{wd_abs}/sections/01_summary.md`、"
|
|
f"`{wd_abs}/slides/`、最终 .docx/.pptx。\n"
|
|
f"⛔ 不要把产物写到 cwd / `skills/` / repo 根 —— 只写到 task_dir。"
|
|
)
|
|
return prompt
|
|
|
|
|
|
def build_agent(
|
|
*,
|
|
user_id: UUID,
|
|
model_name: Optional[str] = None,
|
|
workspace: Optional[str] = None,
|
|
console: Optional[Console] = None,
|
|
session_id: Optional[str] = None,
|
|
resume: bool = False,
|
|
tool_base: Optional[Path] = None,
|
|
skill: str = "",
|
|
description: str = "",
|
|
name: Optional[str] = None,
|
|
working_dir: Optional[str] = None,
|
|
) -> Tuple[AgentLoop, Session, str, TaskState, Path]:
|
|
"""返回 (agent, session, task_id_str, task_state, working_dir_path)。
|
|
|
|
新建 task:
|
|
- `name` 必填(任务显示名,DB 列 NOT NULL,走 validate_task_name)
|
|
- `working_dir` 可选(留空 → fallback 用 name 作目录名;非空也走 validate_task_name)
|
|
Resume:name / working_dir 都忽略(从 DB 读)。
|
|
|
|
`user_id` 必填,决定 working_dir 根、memory 子树、no-subtask 校验作用域。
|
|
web 入口从 JWT 拿到后透传;不允许无 user 的调用路径。
|
|
"""
|
|
cfg = load_config()
|
|
uid = user_id
|
|
|
|
# model 选择优先级:caller 传参 > resume 时 task.model_profile > cfg["default_model"]。
|
|
# caller 传参为新建 task 时 web POST /v1/tasks 接收的 model_profile 字段;resume
|
|
# 不传时读 tasks 表(由顶栏下拉切换 PATCH 维护)。整体满足 grill A 粒度:下条 send 生效。
|
|
model = model_name
|
|
if model is None and resume and session_id:
|
|
from sqlalchemy import select as _select
|
|
from core.storage import session_scope as _scope
|
|
from core.storage.models import Task as _Task
|
|
with _scope() as _s:
|
|
model = _s.execute(
|
|
_select(_Task.model_profile).where(_Task.task_id == UUID(session_id))
|
|
).scalar_one_or_none() or None
|
|
if not model:
|
|
model = cfg["default_model"]
|
|
|
|
caps = ModelCapabilities.load(model, ROOT / cfg["models_dir"])
|
|
llm = LLM(caps)
|
|
|
|
workspace_dir = resolve_workspace(workspace, cfg)
|
|
|
|
# 新建时校验 name + 解析 working_dir(留空 fallback 用 name);resume 跳过
|
|
task_name_safe = ""
|
|
wd_name_for_resolve: Optional[str] = None
|
|
if not resume:
|
|
if not name:
|
|
raise InvalidTaskName("new task 必须指定 name(任务显示名)")
|
|
task_name_safe = validate_task_name(name)
|
|
wd_raw = (working_dir or "").strip()
|
|
wd_name = wd_raw if wd_raw else task_name_safe
|
|
wd_name_for_resolve = validate_task_name(wd_name)
|
|
|
|
task_id, working_dir_path = resolve_task_id(
|
|
workspace_dir, session_id, resume, uid, wd_name_for_resolve
|
|
)
|
|
sid = str(task_id)
|
|
|
|
# §7.4 no-subtask:新建 task 时校验 working_dir 不与同 user 已有 task 形成前缀嵌套
|
|
# (resume 跳过 —— 该 task 已落库,改名走 Folder API 的 cascade)
|
|
if not resume:
|
|
check_no_subtask(str(working_dir_path), user_id=uid)
|
|
# working_dir 立刻建出 —— DB 是 source of truth,FS 目录视为可重生的视图。
|
|
# resume 时也兜底 mkdir(用户可能经 /v1/files/delete 删过空目录),
|
|
# 同 working_dir 多 task 共享,exist_ok=True 不冲突。
|
|
working_dir_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
tool_base = Path(tool_base) if tool_base else Path.cwd()
|
|
|
|
skills = SkillRegistry(ROOT / cfg.get("skills_dir", "skills"))
|
|
|
|
system_prompt = _build_system_prompt(
|
|
cfg, skills, workspace_dir, tool_base, working_dir_path, uid
|
|
)
|
|
|
|
now_iso = datetime.now().isoformat(timespec="seconds")
|
|
# meta["working_dir"] 是 db 形态(相对 ROOT 或绝对);Session.append → ensure_local_task_row
|
|
# 把它直接落 PG tasks.working_dir,所以这里就转好。文件系统操作仍用 working_dir_path(absolute)。
|
|
wd_db = to_db_path(working_dir_path)
|
|
meta = {
|
|
"id": sid,
|
|
"created_at": now_iso,
|
|
"cwd": str(tool_base),
|
|
"name": task_name_safe, # resume 时空字符串(Session.load 会从 DB 拿不到 -- 不要紧,ensure 走 ON CONFLICT DO NOTHING)
|
|
"working_dir": wd_db,
|
|
"model": caps.model_id,
|
|
"model_profile": model,
|
|
"skill": skill,
|
|
"description": description,
|
|
"reasoning_effort": caps.default_reasoning_effort or "",
|
|
}
|
|
|
|
if resume:
|
|
session = Session.load(task_id, system_prompt=system_prompt, meta=meta)
|
|
task_state = TaskState.load(task_id)
|
|
if task_state is None:
|
|
# tasks 行不存在 —— 理论上 resolve_task_id 已经定位到 DB 行了,走到这里
|
|
# 说明被并发删了,兜底构造空 state(不主动 save,等下条 append / 命令)
|
|
task_state = TaskState(
|
|
task_id=sid, user_id=uid, name="", working_dir=wd_db,
|
|
skill=skill, description=description, status="active",
|
|
model=caps.model_id, model_profile=model,
|
|
)
|
|
# resume 时 meta name 用 DB 里读出来的真值(给 Session.append → ensure 用,避免落空串)
|
|
meta["name"] = task_state.name
|
|
else:
|
|
session = Session(task_id=task_id, system_prompt=system_prompt, meta=meta)
|
|
# 懒创建:TaskState 仅内存。tasks 行在首条 user 消息 append 时由
|
|
# ensure_local_task_row 占位 INSERT(name 已就位);首次 sync_task_tokens
|
|
# 或 /done /desc 走 upsert 覆盖完整字段。
|
|
task_state = TaskState(
|
|
task_id=sid, user_id=uid, name=task_name_safe, working_dir=wd_db,
|
|
skill=skill, description=description, status="active",
|
|
model=caps.model_id, model_profile=model,
|
|
reasoning_effort=caps.default_reasoning_effort or "",
|
|
)
|
|
|
|
tools = {}
|
|
for cls in (ReadTool, WriteTool, EditTool, GlobTool, GrepTool, ShellTool):
|
|
t = cls(base_dir=tool_base)
|
|
tools[t.name] = t
|
|
|
|
if skills.skills:
|
|
ls = LoadSkillTool(registry=skills, base_dir=tool_base)
|
|
tools[ls.name] = ls
|
|
|
|
if caps.enable_run_python:
|
|
rp = RunPythonTool(base_dir=tool_base)
|
|
tools[rp.name] = rp
|
|
|
|
sink = ConsoleEventSink(console, token_counter=lambda: llm.token_counter.total) if console else None
|
|
agent = AgentLoop(llm, tools, session, caps, user_id=uid, sink=sink)
|
|
return agent, session, sid, task_state, working_dir_path
|
|
|
|
|
|
def sync_task_tokens(task_state: TaskState, llm: LLM) -> None:
|
|
"""每轮 agent.run 后调,把 LLM 累计 tokens UPDATE 到 PG tasks 表。
|
|
|
|
走 update_task 而非 task_state.save() —— 只更 tokens 两列,避免无谓全字段 UPSERT
|
|
且 ORM-level update 自动刷 updated_at。
|
|
"""
|
|
from uuid import UUID
|
|
from core.storage import update_task
|
|
tc = llm.token_counter
|
|
task_state.tokens_prompt = tc.prompt_tokens
|
|
task_state.tokens_completion = tc.completion_tokens
|
|
update_task(
|
|
UUID(task_state.task_id),
|
|
tokens_prompt=tc.prompt_tokens,
|
|
tokens_completion=tc.completion_tokens,
|
|
)
|