From 42755e246edf3d8e7400e7869b94823eda79d9a1 Mon Sep 17 00:00:00 2001 From: caoqianming Date: Thu, 4 Jun 2026 14:12:53 +0800 Subject: [PATCH] =?UTF-8?q?fix(sandbox):=20shell=20=E4=B9=9F=E6=B3=A8?= =?UTF-8?q?=E5=85=A5=20PYTHONPATH=20+=20HOME=3D/tmp(=E4=BF=AE=E5=8F=AA?= =?UTF-8?q?=E8=AF=BB=20rootfs=20=E4=B8=A4=E5=89=AF=E4=BD=9C=E7=94=A8)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 容器 --read-only rootfs 下两个副作用: - PYTHONPATH 原先只 run_python 注入,shell `python -c "from skills..."` 撞 ModuleNotFoundError - /home/zcbot 不可写,matplotlib/fontconfig 往 ~/.config / ~/.cache 写缓存刷 "Read-only file system" / "No writable cache" 噪音 抽 _CONTAINER_ENV = {PYTHONPATH, HOME=/tmp},shell/run_python/fs 三路共用 (-e 确定性覆盖)。HOME=/tmp 一刀让缓存落 tmpfs(matplotlib→/tmp/.config、 fontconfig→/tmp/.cache),免逐个 MPLCONFIGDIR/XDG_CACHE_HOME。纯代码改, 重启 web 生效,免重建镜像。 Co-Authored-By: Claude Opus 4.8 (1M context) --- PROGRESS.md | 6 +++++- core/executor_docker.py | 29 ++++++++++++++++++++--------- tests/test_executor_docker.py | 7 +++++++ 3 files changed, 32 insertions(+), 10 deletions(-) diff --git a/PROGRESS.md b/PROGRESS.md index 91e1349..cb996a6 100644 --- a/PROGRESS.md +++ b/PROGRESS.md @@ -2,7 +2,7 @@ > 配合 `DESIGN.md`。本文件只记 phase 状态、决策偏差、文件量、下一步。每条 1-2 句:做了啥 + 关键判断;细节查 `git log` / `git diff` / `DESIGN §7.9`。 -最后更新:2026-06-03(默认镜像源改清华 —— 腾讯 PyPI 吐损坏 litellm wheel) +最后更新:2026-06-04(sandbox 容器 env:shell 也注入 PYTHONPATH + HOME=/tmp) --- @@ -21,6 +21,10 @@ ## 已完成关键能力 +### 2026-06-04 + +- **sandbox 容器 env 收编到一处 + shell 也注入(修两个只读 rootfs 副作用)**:① `PYTHONPATH=/sandbox:/workspace` 原先只 `run_python` 注入,shell 里 `python -c "from skills..."` 撞 ModuleNotFoundError;② `--read-only` rootfs 下 `/home/zcbot` 不可写,matplotlib/fontconfig 往 `~/.config`/`~/.cache` 写缓存刷 "Read-only file system" / "No writable cache" 噪音。改:`executor_docker.py` 抽 `_CONTAINER_ENV = {PYTHONPATH, HOME=/tmp}`,shell/run_python/fs 三路共用(`-e` 确定性覆盖)—— `HOME=/tmp` 一刀让缓存落 tmpfs(matplotlib→/tmp/.config、fontconfig→/tmp/.cache),不用逐个 MPLCONFIGDIR/XDG_CACHE_HOME。纯代码改,重启 web 生效,免重建镜像。 + ### 2026-06-03 - **修 docker sandbox 下 system prompt 焊死宿主路径(agent 找不到自己的文件)**:`ZCBOT_SANDBOX_BACKEND=docker` 时 shell/run_python/fs 工具全在容器里跑(`/users/` bind 到 `/workspace`),但 `_build_system_prompt` 注入的 `task_dir` / cwd 是宿主绝对路径(容器内不存在),LLM 据此 `find /home/ubuntu/zcbot ...` 全空、瞎转到 `pwd` 才发现真身在 `/workspace/`。修法同 `LoadSkillTool` 的 `container_skills_dir` 改写:docker 下 `task_dir` + 「宪法」glob 范例换成容器路径 `/workspace/`,并去掉 docker 下无意义的 cwd 行(容器 cwd 恒等 task_dir);host 不变。 diff --git a/core/executor_docker.py b/core/executor_docker.py index d57ca42..3008378 100644 --- a/core/executor_docker.py +++ b/core/executor_docker.py @@ -70,6 +70,21 @@ CONTAINER_TOOLS = SHELL_LIKE_TOOLS | FS_TOOLS # 写死 uid:gid 形式("1000:1000")会与 bind mount owner 错配,导致 EACCES。 DEFAULT_EXEC_USER = "zcbot" +# 容器 rootfs 只读(docker run --read-only),/home/zcbot 不可写 ── 所有 exec 路径 +# (shell / run_python / fs 工具)共用这套 env,用 `docker exec -e` 确定性覆盖容器默认值: +# - PYTHONPATH=/sandbox:/workspace:让 `from skills.xxx import ...` 全路径生效。 +# /sandbox 在前(skills bind mount 到 /sandbox/skills:ro,SKILL.md 教这条 import +# path),/workspace 在后(用户 task 目录的本地脚本)。原先只 run_python 注入, +# shell `python -c "from skills..."` 撞 ModuleNotFoundError;收编到这一处。 +# - HOME=/tmp:只读 home 下 matplotlib/fontconfig 等想往 ~/.config / ~/.cache 写缓存 +# 会刷 "Read-only file system" / "No writable cache" 噪音。指到 /tmp(tmpfs 可写) +# 一刀盖掉两类(matplotlib → /tmp/.config,fontconfig → /tmp/.cache),无需逐个 +# 专用变量(MPLCONFIGDIR / XDG_CACHE_HOME)。 +_CONTAINER_ENV = { + "PYTHONPATH": "/sandbox:/workspace", + "HOME": "/tmp", +} + # host 侧 tmp 脚本目录(user_root 内 dotfile,被 /v1/files API 隐藏) TMP_SUBDIR = ".zcbot_tmp" @@ -148,7 +163,9 @@ class DockerExecutor(Executor): # attach 出问题,延迟输出(sleep 2 后 print)被截掉,LLM 拿到空 [exit 0]。 # PGID kill 协议(§7.5 Step 3b)未来要做时换 `setsid --wait` 或 wrapper, # 不能裸 setsid。 - argv = self._docker_exec_argv(container) + ["bash", "-c", cmd] + argv = self._docker_exec_argv( + container, extra_env=_CONTAINER_ENV + ) + ["bash", "-c", cmd] result = self._run_subprocess(argv, timeout=timeout, ctx=ctx) self.pool.mark_active(self.user_id) return result @@ -177,13 +194,7 @@ class DockerExecutor(Executor): container = self.pool.ensure(self.user_id) argv = self._docker_exec_argv( container, - extra_env={ - "PYTHONIOENCODING": "utf-8", - # /sandbox 在前:让 `from skills.xxx.helper import ...` work - # (skills/ bind mount 到 /sandbox/skills:ro,SKILL.md 教 LLM - # 这条 import path);/workspace 在后:用户 task 目录的本地脚本 - "PYTHONPATH": "/sandbox:/workspace", - }, + extra_env={**_CONTAINER_ENV, "PYTHONIOENCODING": "utf-8"}, ) + ["python", container_script] # 删 setsid 同上(_exec_shell 注释) result = self._run_subprocess(argv, timeout=timeout, ctx=ctx) self.pool.mark_active(self.user_id) @@ -218,7 +229,7 @@ class DockerExecutor(Executor): container = self.pool.ensure(self.user_id) argv = self._docker_exec_argv( container, - extra_env={"PYTHONIOENCODING": "utf-8"}, + extra_env={**_CONTAINER_ENV, "PYTHONIOENCODING": "utf-8"}, stdin_open=True, ) + ["python", "/sandbox/tool_runner.py", name] diff --git a/tests/test_executor_docker.py b/tests/test_executor_docker.py index 1e6b295..080e255 100644 --- a/tests/test_executor_docker.py +++ b/tests/test_executor_docker.py @@ -152,6 +152,11 @@ class TestShellExec(unittest.TestCase): container_idx = argv.index(f"zcbot-sandbox-{executor.user_id}") # bash -c 紧跟 container 之后(setsid 2026-05-29 删 —— 跟 docker exec stdio 不兼容) self.assertEqual(argv[container_idx + 1:], ["bash", "-c", "echo hello"]) + # shell 也注入容器 env(PYTHONPATH 让 `python -c "from skills..."` work; + # HOME=/tmp 让只读 home 下 matplotlib/fontconfig 缓存写得进 tmpfs) + env_kvs = [argv[i + 1] for i, a in enumerate(argv) if a == "-e"] + self.assertIn("PYTHONPATH=/sandbox:/workspace", env_kvs) + self.assertIn("HOME=/tmp", env_kvs) self.assertEqual(pool.ensure_calls, [executor.user_id]) self.assertEqual(pool.mark_active_calls, [executor.user_id]) @@ -345,6 +350,8 @@ class TestRunPython(unittest.TestCase): # PYTHONPATH 必须含 /sandbox(让 SKILL.md 教的 `from skills.xxx import` work, # skills/ bind mount 到 /sandbox/skills:ro)+ /workspace(用户 task 目录) self.assertIn("PYTHONPATH=/sandbox:/workspace", env_kvs) + # HOME=/tmp:只读 home 下缓存改写 tmpfs(matplotlib/fontconfig 噪音) + self.assertIn("HOME=/tmp", env_kvs) # host 侧 tmp 已 unlink(目录可能仍在,无所谓 —— ensure 容器时会重新 mkdir) tmp_subroot = executor.user_root / TMP_SUBDIR / str(ctx.task_id)