240 lines
9.6 KiB
Python
240 lines
9.6 KiB
Python
"""DockerExecutor:`shell` / `run_python` 走 docker exec,其余 in-process(§7.5 #6)。
|
||
|
||
Backend 二分(§7.5 #6 信任域):
|
||
- host in-process:`read/write/edit/glob/grep/load_skill/web_*/seedream/seedance`
|
||
原本就在 host 持凭据(Bocha key / ARK key)或走 `paths.py::resolve_user_path` 校验
|
||
(user-rooted 安全边界已存),塞容器无收益付 ~200ms exec overhead × N 次
|
||
- container exec:`shell` / `run_python` —— 执行模型生成的任意代码,必须容器隔离
|
||
|
||
容器准入(per call):
|
||
1. `pool.ensure(user_id)` —— 拿到 / 起 `zcbot-sandbox-<uid>` 容器(per-user lock 已串行化)
|
||
2. `docker exec --user 1000:1000 --workdir /workspace/<wd_name> <c> setsid bash -c '<cmd>'`
|
||
3. timeout 到 → 杀 docker CLI 客户端(Popen.kill())
|
||
4. 完成 → `pool.mark_active(user_id)` 刷 idle 计时
|
||
|
||
run_python tmp .py 落 host 侧 `<user_root>/.zcbot_tmp/<task_id>/<rand>.py`(bind mount
|
||
自动可见于容器 `/workspace/.zcbot_tmp/<task_id>/`),执行完 unlink。dotfile 起头让
|
||
`/v1/files` API 天然过滤(`web/app.py:169` startswith(".")),用户视野不污染。
|
||
|
||
Cancel limitation(第一版接受):
|
||
- docker exec 客户端断开后,容器内 server 端进程**不会**因此终止 —— 这是 docker 设计
|
||
- 第一版只杀 docker CLI(Popen.kill());容器内残留进程靠 idle 5min reaper / 下次
|
||
ensure 时 rm -f 兜底
|
||
- 升级触发(§7.5 #3 PGID 协议):用户反馈"取消了但还在烧 CPU" / 多次 cancel 后
|
||
容器内进程堆积 → 启用「ZCBOT_EXEC_ID env + PGID 写文件 + 二次 exec kill」协议
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import os
|
||
import secrets
|
||
import subprocess
|
||
import time
|
||
from pathlib import Path
|
||
from typing import Any, Dict, List, Optional
|
||
from uuid import UUID
|
||
|
||
from .executor import ExecCtx, Executor, ToolResult
|
||
from .executor_host import HostExecutor
|
||
from .sandbox import SandboxPool
|
||
|
||
|
||
CONTAINER_TOOLS = frozenset({"shell", "run_python"})
|
||
|
||
# 容器内非 root 用户:与 Dockerfile HOST_UID/HOST_GID build-arg 默认值对齐。
|
||
# 部署机 host 上 zcbot 账号 uid 若非 1000,镜像 build 时透传 HOST_UID + 这里
|
||
# env `ZCBOT_SANDBOX_EXEC_USER` 同步改(详 RUN.md "Sandbox 部署"段)。
|
||
DEFAULT_EXEC_USER = "1000:1000"
|
||
|
||
# host 侧 tmp 脚本目录(user_root 内 dotfile,被 /v1/files API 隐藏)
|
||
TMP_SUBDIR = ".zcbot_tmp"
|
||
|
||
|
||
class DockerExecutor(Executor):
|
||
"""组合 HostExecutor + docker exec dispatch shell/run_python。
|
||
|
||
host backend 仍承担 schema 列表 + 大部分 tool 执行;本类只在 shell/run_python
|
||
命中时夺路接管,docker exec 在 per-user 容器里跑。
|
||
"""
|
||
|
||
def __init__(
|
||
self,
|
||
host: HostExecutor,
|
||
pool: SandboxPool,
|
||
user_id: UUID,
|
||
user_root: Path,
|
||
working_dir: Path,
|
||
) -> None:
|
||
self.host = host
|
||
self.pool = pool
|
||
self.user_id = user_id
|
||
self.user_root = user_root.resolve()
|
||
self.working_dir = working_dir.resolve()
|
||
# 容器内对应路径 /workspace/<wd_name>
|
||
try:
|
||
wd_rel = self.working_dir.relative_to(self.user_root)
|
||
self.container_workdir = "/workspace/" + wd_rel.as_posix()
|
||
except ValueError:
|
||
# working_dir 不在 user_root 下 —— 防御性兜底,正常路径不会到这里
|
||
self.container_workdir = "/workspace"
|
||
self.exec_user = os.getenv("ZCBOT_SANDBOX_EXEC_USER", DEFAULT_EXEC_USER)
|
||
|
||
# ── Executor 接口 ────────────────────────────────────────
|
||
|
||
def has_tool(self, name: str) -> bool:
|
||
return self.host.has_tool(name)
|
||
|
||
def schemas(self) -> List[Dict[str, Any]]:
|
||
return self.host.schemas()
|
||
|
||
def call_tool(self, name: str, args: Dict[str, Any], ctx: ExecCtx) -> ToolResult:
|
||
if name not in CONTAINER_TOOLS:
|
||
return self.host.call_tool(name, args, ctx)
|
||
if not self.host.has_tool(name):
|
||
# caps.enable_run_python=False 等场景下,host 没装 run_python → schema 也没暴露
|
||
return ToolResult(content=f"[Error] unknown tool: {name}", exit_code=2)
|
||
try:
|
||
if name == "shell":
|
||
return self._exec_shell(args, ctx)
|
||
if name == "run_python":
|
||
return self._exec_python(args, ctx)
|
||
except Exception as e:
|
||
return ToolResult(
|
||
content=f"[Error executing {name} via docker] {type(e).__name__}: {e}",
|
||
exit_code=1,
|
||
)
|
||
return ToolResult(content=f"[Error] unhandled container tool: {name}", exit_code=2)
|
||
|
||
# ── shell ────────────────────────────────────────────────
|
||
|
||
def _exec_shell(self, args: Dict[str, Any], ctx: ExecCtx) -> ToolResult:
|
||
cmd = args.get("command")
|
||
if not isinstance(cmd, str) or not cmd.strip():
|
||
return ToolResult(
|
||
content="[Error] bad arguments to shell: command must be non-empty string",
|
||
exit_code=2,
|
||
)
|
||
timeout = int(args.get("timeout") or 60)
|
||
|
||
container = self.pool.ensure(self.user_id)
|
||
argv = self._docker_exec_argv(container) + ["setsid", "bash", "-c", cmd]
|
||
result = self._run_subprocess(argv, timeout=timeout, ctx=ctx)
|
||
self.pool.mark_active(self.user_id)
|
||
return result
|
||
|
||
# ── run_python ───────────────────────────────────────────
|
||
|
||
def _exec_python(self, args: Dict[str, Any], ctx: ExecCtx) -> ToolResult:
|
||
code = args.get("code")
|
||
if not isinstance(code, str):
|
||
return ToolResult(
|
||
content="[Error] bad arguments to run_python: code must be string",
|
||
exit_code=2,
|
||
)
|
||
timeout = int(args.get("timeout") or 120)
|
||
|
||
# tmp .py 落 host 侧 `.zcbot_tmp/<task_id>/<rand>.py`;
|
||
# 容器内对应 /workspace/.zcbot_tmp/<task_id>/<rand>.py
|
||
tmp_root = self.user_root / TMP_SUBDIR / str(ctx.task_id)
|
||
tmp_root.mkdir(parents=True, exist_ok=True)
|
||
rand_name = f"{int(time.time() * 1000)}-{secrets.token_hex(4)}.py"
|
||
host_script = tmp_root / rand_name
|
||
container_script = f"/workspace/{TMP_SUBDIR}/{ctx.task_id}/{rand_name}"
|
||
host_script.write_text(code, encoding="utf-8")
|
||
|
||
try:
|
||
container = self.pool.ensure(self.user_id)
|
||
argv = self._docker_exec_argv(
|
||
container,
|
||
extra_env={
|
||
"PYTHONIOENCODING": "utf-8",
|
||
"PYTHONPATH": "/workspace",
|
||
},
|
||
) + ["setsid", "python", container_script]
|
||
result = self._run_subprocess(argv, timeout=timeout, ctx=ctx)
|
||
self.pool.mark_active(self.user_id)
|
||
return result
|
||
finally:
|
||
try:
|
||
host_script.unlink()
|
||
except OSError:
|
||
pass
|
||
|
||
# ── helpers ──────────────────────────────────────────────
|
||
|
||
def _docker_exec_argv(
|
||
self, container: str, extra_env: Optional[Dict[str, str]] = None
|
||
) -> List[str]:
|
||
argv = [
|
||
"docker", "exec",
|
||
"--user", self.exec_user,
|
||
"--workdir", self.container_workdir,
|
||
]
|
||
env: Dict[str, str] = {}
|
||
if extra_env:
|
||
env.update(extra_env)
|
||
for k, v in env.items():
|
||
argv.extend(["-e", f"{k}={v}"])
|
||
argv.append(container)
|
||
return argv
|
||
|
||
def _run_subprocess(
|
||
self, argv: List[str], timeout: int, ctx: ExecCtx
|
||
) -> ToolResult:
|
||
"""跑 docker exec 子进程,带 cancel 协作 poll。
|
||
|
||
cancel 命中 / timeout 到 → Popen.kill() 杀 docker CLI 客户端;
|
||
容器内 server 端进程接受 limitation(见模块头注释)。
|
||
"""
|
||
cancel_check = ctx.cancel_check
|
||
try:
|
||
proc = subprocess.Popen(
|
||
argv,
|
||
stdout=subprocess.PIPE,
|
||
stderr=subprocess.PIPE,
|
||
text=True,
|
||
encoding="utf-8",
|
||
errors="replace",
|
||
)
|
||
except FileNotFoundError as e:
|
||
return ToolResult(content=f"[Error] docker CLI not found: {e}", exit_code=2)
|
||
|
||
start = time.monotonic()
|
||
cancel_hit = False
|
||
timeout_hit = False
|
||
stdout: str = ""
|
||
stderr: str = ""
|
||
while True:
|
||
try:
|
||
stdout, stderr = proc.communicate(timeout=0.5)
|
||
break
|
||
except subprocess.TimeoutExpired:
|
||
if cancel_check is not None and cancel_check():
|
||
cancel_hit = True
|
||
proc.kill()
|
||
stdout, stderr = proc.communicate()
|
||
break
|
||
if time.monotonic() - start > timeout:
|
||
timeout_hit = True
|
||
proc.kill()
|
||
stdout, stderr = proc.communicate()
|
||
break
|
||
|
||
if timeout_hit:
|
||
return ToolResult(
|
||
content=f"[Error] command timed out after {timeout}s",
|
||
exit_code=124,
|
||
)
|
||
if cancel_hit:
|
||
return ToolResult(
|
||
content="[Error] command cancelled by user",
|
||
exit_code=130,
|
||
)
|
||
|
||
parts: List[str] = []
|
||
if stdout:
|
||
parts.append(f"[stdout]\n{stdout.rstrip()}")
|
||
if stderr:
|
||
parts.append(f"[stderr]\n{stderr.rstrip()}")
|
||
parts.append(f"[exit {proc.returncode}]")
|
||
return ToolResult(content="\n".join(parts), exit_code=proc.returncode)
|