From 4ff5b0c1715bf417e2b05b446c745c0e5380fb5a Mon Sep 17 00:00:00 2001 From: caoqianming Date: Wed, 27 May 2026 12:21:05 +0800 Subject: [PATCH] =?UTF-8?q?sandbox:=20host=20=E4=BE=A7=20resolv.conf=20bin?= =?UTF-8?q?d=20mount=20=E8=A6=86=E7=9B=96=E5=AE=B9=E5=99=A8=20ro?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit docker 默 /etc/resolv.conf 是 ro mount(尤其 --read-only rootfs 下), init.sh `cat > /etc/resolv.conf` 写不进。改主路径: - SandboxPool._ensure_resolv_conf_file:host 侧 /.sandbox/resolv.conf 写公网 DNS(nameserver 8.8.8.8 / 114.114.114.114) - _docker_run 加 -v :/etc/resolv.conf:ro,user 指定 mount 优先级 > daemon 默注入,直接覆盖 embedded DNS 127.0.0.11 - ZCBOT_DNS env 仍传给 init.sh 作 fallback(init.sh 已 robust 失败不退) RUN.md 故障兜底两行(Read-only 报错根因 + 解法)。 Co-Authored-By: Claude Opus 4.7 (1M context) --- RUN.md | 3 ++- core/sandbox/pool.py | 39 ++++++++++++++++++++++++++++++++++----- 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/RUN.md b/RUN.md index 87c7ac6..9c77b71 100644 --- a/RUN.md +++ b/RUN.md @@ -483,7 +483,8 @@ sudo xfs_quota -x -c "limit -p bhard=10g zcbot_" /opt | `POST /v1/files/upload` 返 413 `已达磁盘配额上限` | per-user 5GB(yaml `quotas.disk_bytes_per_user`)。让用户在 dev SPA 右侧文件栏删旧产物 / 大文件,或改 yaml 升配重启 web | | `[warn] network zcbot-sandbox-net is --internal (legacy)` | 上一版 sandbox network 创建时带了 `--internal`(完全禁 outbound),当前 dogfood 阶段放开。`docker stop $(docker ps -aq -f label=zcbot.product=sandbox) ; docker network rm zcbot-sandbox-net`,重启 web 自动 recreate 为非 internal | | tool write/edit 返 `[Error] 已达磁盘配额上限` | 同 upload 413,见上 | -| 容器内 `curl https://www.baidu.com` 报 `Temporary failure in name resolution` | 两层:① iptables `127.0.0.0/8 DROP` 把 docker embedded DNS(`127.0.0.11:53`)挡了 ── init.sh 已加 `127.0.0.11/32 udp/tcp 53 ACCEPT`;② docker embedded DNS 上游探测 host systemd-resolved 失败(腾讯云轻量等场景常见)── yaml `sandbox.dns` 显式指定 `8.8.8.8 / 114.114.114.114`,docker run `--dns` 直接注入容器 `/etc/resolv.conf`,绕过上游探测路径。重 build 镜像 + `docker rm -f $(docker ps -aq -f label=zcbot.product=sandbox)` 让新 image + 新 dns 配置生效 | +| 容器内 `curl https://www.baidu.com` 报 `Temporary failure in name resolution` | docker user-defined bridge network 上 /etc/resolv.conf 默 `nameserver 127.0.0.11`(embedded DNS),腾讯云轻量等场景 daemon 探测上游失败 → embedded DNS forward 跪。修法:yaml `sandbox.dns` 指定 `[8.8.8.8, 114.114.114.114]`,SandboxPool 把 host 侧 `/.sandbox/resolv.conf` 文件 bind mount `-v ...:/etc/resolv.conf:ro` 覆盖容器默 ro mount,绕开 embedded DNS。`docker rm -f $(docker ps -aq -f label=zcbot.product=sandbox)` + `systemctl restart zcbot` 让新容器按 mount 配置生效 | +| init.sh 报 `/etc/resolv.conf: Read-only file system` | docker 默把 /etc/resolv.conf 当 ro mount,init.sh 内 `cat >` 写不进。host 侧 bind mount 已是主路径(见上),init.sh 写仅作 fallback 且失败 robust 不退出容器 | | 启动报 `PLATFORM_KEY env not set` / `JWT_SECRET env not set` | D' 过渡 auth 强制双 env 必填。生成 `python -c "import secrets;print(secrets.token_urlsafe(48))"` 各填一,写 `.env` 重起 | | `/v1/auth/login_password` 返 403 `invalid email or password` | 邮箱不存在 / `password_hash` 列为空(platform_key 入口建的 user) / 密码错。`SELECT user_id, email, password_hash IS NOT NULL AS has_pw FROM users WHERE email=...` 核对;无行 → `main.py user add`;有行无密码 → `UPDATE users SET password_hash=...`(用 `.venv/Scripts/python.exe -c "from web.auth import hash_password;print(hash_password('xxx'))"` 算)或 `user add --user-id` 接到现有 user_id | | `main.py user add` 报 `IntegrityError ... uq_users_email` | 邮箱已存在,改 email 或先 `DELETE FROM users WHERE email=...`(先清该 user 的 tasks) | diff --git a/core/sandbox/pool.py b/core/sandbox/pool.py index fe06ba4..0aec2fc 100644 --- a/core/sandbox/pool.py +++ b/core/sandbox/pool.py @@ -160,10 +160,33 @@ class SandboxPool: self._last_active[user_id] = _now() return name + def _ensure_resolv_conf_file(self) -> Optional[Path]: + """准备 host 侧 resolv.conf 用于 bind mount 覆盖容器内 ro mount。 + + docker daemon 默 mount /etc/resolv.conf 是 ro,且 init.sh 在 `--read-only` + rootfs 下写不进 ── 改用 host 侧准备一份 + `-v :/etc/resolv.conf:ro` + 覆盖,绕开 embedded DNS。无 dns 配置返 None,不 mount(走 docker 默 embedded DNS)。 + """ + if not self.dns: + return None + sandbox_dir = self.user_root_base.parent / ".sandbox" + try: + sandbox_dir.mkdir(parents=True, exist_ok=True) + except OSError: + return None + resolv = sandbox_dir / "resolv.conf" + content = "".join(f"nameserver {ip}\n" for ip in self.dns if ip) + try: + resolv.write_text(content, encoding="utf-8") + except OSError: + return None + return resolv + def _docker_run(self, user_id: UUID, name: str) -> None: """同步阻塞;由 ensure 在 to_thread 里调。""" user_root = self.user_root_base / str(user_id) user_root.mkdir(parents=True, exist_ok=True) + resolv_file = self._ensure_resolv_conf_file() cmd: List[str] = [ "docker", "run", "-d", @@ -184,11 +207,17 @@ class SandboxPool: "-e", f"ZCBOT_PG_IPS={self.pg_ips}", "--restart=no", ] - # 显式 DNS:用 env 传给 init.sh,启动时覆写 /etc/resolv.conf 直接指公网 DNS。 - # 不用 docker `--dns` flag ── user-defined bridge network 上 `--dns` 只改 - # docker daemon 给 embedded DNS(127.0.0.11)的上游目标,容器 resolv.conf - # 不变;daemon 上游探测在腾讯云轻量等场景不稳,embedded DNS 自身 forward - # 仍跪。init.sh 直接写 resolv.conf 完全绕过 embedded DNS。 + # 显式 DNS 两层(主路径 + fallback): + # 1. **主**:host 侧 resolv.conf bind mount 覆盖(下面 -v),user 指定 mount + # 优先级 > daemon 默注入,绕开 embedded DNS(127.0.0.11) + # 2. **fallback**:ZCBOT_DNS env 传给 init.sh 启动时 cat > /etc/resolv.conf, + # 容器内 /etc/resolv.conf 是 ro 时写失败也 warn 继续(init.sh 已 robust)。 + # 保留以防 host 侧 file mount 因某种原因失败的备份路径 + # 用 docker `--dns` flag 不行 ── user-defined bridge network 上 `--dns` 只改 + # docker daemon 给 embedded DNS 的上游,不动容器 resolv.conf;daemon 上游 + # 探测在腾讯云轻量等场景下挂掉,embedded DNS 自身 forward 仍跪 + if resolv_file is not None: + cmd += ["-v", f"{resolv_file}:/etc/resolv.conf:ro"] if self.dns: cmd += ["-e", f"ZCBOT_DNS={','.join(self.dns)}"] # repo skills 只读 mount ── fs 工具进容器后(read/glob/grep)能 access