diff --git a/PROGRESS.md b/PROGRESS.md index 3ada729..03e392d 100644 --- a/PROGRESS.md +++ b/PROGRESS.md @@ -2,7 +2,7 @@ > 配合 `DESIGN.md`。本文件只记 phase 状态、决策偏差、文件量、下一步。每条 1-2 句:做了啥 + 关键判断;细节查 `git log` / `git diff` / `DESIGN §7.9`。 -最后更新:2026-06-30(web 端 SVG 预览修复:强制 image/svg+xml + bump 0.33.1) +最后更新:2026-06-30(look_at_image 超时透明重试 + 超时 60→120s + bump 0.33.2) --- @@ -21,6 +21,9 @@ ## 已完成关键能力 +### 2026-06-30 / look_at_image 偶发超时:tool 内透明重试 + 超时上限提到 120s(bump 0.33.2) +Seed 2.0 Lite 非流式,长 OCR 首字节可能逼近 60s read timeout → 偶发超时,且返 `[Error]` 会触发主模型重发整个 tool call(图 base64 重传、输入 token 再付一次,正中"报错重试烧 token"根因)。修法:`ark_client` 新增 `ArkTimeoutError(ArkError)` 子类(仅超时/网络抖动抛它,HTTP 4xx/5xx 业务错误仍抛普通 `ArkError` 不重试);`look_at_image` 对该子类退避重试(`timeout_retries` 默认 1 次,退避 2^n s),在 tool 内消化掉不抛给主模型;`doubao.yaml` vision `request_timeout_s` 60→120。子类仍是 `ArkError`,seedream 等现有 `except ArkError` 不受影响。 + ### 2026-06-30 / 修复 web 端 SVG 无法预览(bump 0.33.1) SVG 在 `` 里必须 Content-Type=`image/svg+xml` 才渲染。前端 `preview.js` 的 `_showImage` / mini 图片分支据扩展名强制 blob mime(与服务端响应头无关);后端 `download` 接口对 `.svg` 显式回 `image/svg+xml`(部分部署环境 mimetypes 未注册 svg → 会被 FileResponse 猜成 octet-stream)。双保险。 diff --git a/config/media/doubao.yaml b/config/media/doubao.yaml index 582288e..11d2946 100644 --- a/config/media/doubao.yaml +++ b/config/media/doubao.yaml @@ -40,7 +40,8 @@ vision: price_cny_per_mtoken_output: 3.6 price_cny_per_mtoken_cache_hit: 0.12 max_image_mb: 10 # 单图上限(超出 tool 侧直接报错,不发请求) - request_timeout_s: 60 # 读图慢于此判超时 + request_timeout_s: 120 # 读图慢于此判超时(非流式,长 OCR 首字节可能逼近上限) + timeout_retries: 1 # 超时/网络抖动 tool 内透明重试次数(退避 2^n s);不含业务错误 video: # fast 放第一个 → 默认 variant(成本敏感场景优先);开通了 Pro 的用户从顶栏下拉切。 diff --git a/core/__init__.py b/core/__init__.py index ca29ba6..3632376 100644 --- a/core/__init__.py +++ b/core/__init__.py @@ -1,3 +1,3 @@ # zcbot 版本号单一事实源:web/app.py 的 FastAPI version、/healthz 返回、前端展示都引这里。 # 改版本只动这一行。 -__version__ = "0.33.1" +__version__ = "0.33.2" diff --git a/core/ark_client.py b/core/ark_client.py index db98bf8..b66e654 100644 --- a/core/ark_client.py +++ b/core/ark_client.py @@ -23,6 +23,14 @@ class ArkError(RuntimeError): """ark API 调用失败的统一异常。""" +class ArkTimeoutError(ArkError): + """可重试的瞬时失败:请求超时 / 网络抖动(非业务错误)。 + + HTTP 4xx/5xx 业务错误仍抛普通 ArkError(不该重试,重试也是同样的错)。 + caller 可单独 catch 本子类做退避重试;catch ArkError 仍能兜住(isinstance)。 + """ + + @dataclass class ArkConfig: api_key: str @@ -73,18 +81,18 @@ class ArkClient: try: resp = self._client.post(path, json=body, timeout=timeout_s or self.timeout_s) except httpx.TimeoutException as e: - raise ArkError(f"timeout calling POST {path}: {e}") from e + raise ArkTimeoutError(f"timeout calling POST {path}: {e}") from e except httpx.HTTPError as e: - raise ArkError(f"network error calling POST {path}: {e}") from e + raise ArkTimeoutError(f"network error calling POST {path}: {e}") from e return self._parse(resp, f"POST {path}") def get_json(self, path: str, *, timeout_s: Optional[float] = None) -> dict: try: resp = self._client.get(path, timeout=timeout_s or self.timeout_s) except httpx.TimeoutException as e: - raise ArkError(f"timeout calling GET {path}: {e}") from e + raise ArkTimeoutError(f"timeout calling GET {path}: {e}") from e except httpx.HTTPError as e: - raise ArkError(f"network error calling GET {path}: {e}") from e + raise ArkTimeoutError(f"network error calling GET {path}: {e}") from e return self._parse(resp, f"GET {path}") @staticmethod diff --git a/tools/look_at_image.py b/tools/look_at_image.py index 9ec0af6..4485c14 100644 --- a/tools/look_at_image.py +++ b/tools/look_at_image.py @@ -13,7 +13,9 @@ from pathlib import Path from typing import Any, Optional from uuid import UUID -from core.ark_client import ArkClient, ArkConfig, ArkError +import time + +from core.ark_client import ArkClient, ArkConfig, ArkError, ArkTimeoutError from core.storage.usage import record_vision_usage from .base import Tool, compact_tool_output @@ -113,11 +115,26 @@ class LookAtImageTool(Tool): ], } - try: - with ArkClient(self.ark_cfg, timeout_s=timeout_s) as client: - resp = client.post_json(endpoint, body, timeout_s=timeout_s) - except ArkError as e: - return f"[Error] look_at_image API: {e}" + # 透明重试:Seed 2.0 Lite 非流式,长 OCR 偶发超时/网络抖动。tool 内消化掉, + # 不把 [Error] 抛给主模型 —— 否则主模型会重发整个 tool call(图 base64 重传、 + # 输入 token 再付一次)。仅 ArkTimeoutError(超时/网络)重试;HTTP 业务错误不重试。 + max_attempts = int(cfg.get("timeout_retries", 1)) + 1 + resp = None + for attempt in range(max_attempts): + try: + with ArkClient(self.ark_cfg, timeout_s=timeout_s) as client: + resp = client.post_json(endpoint, body, timeout_s=timeout_s) + break + except ArkTimeoutError as e: + if attempt == max_attempts - 1: + return f"[Error] look_at_image API: {e}(已重试 {attempt} 次仍超时)" + print( + f"[look_at_image] timeout, retrying ({attempt + 1}/{max_attempts - 1}): {e}", + flush=True, + ) + time.sleep(2 ** attempt) + except ArkError as e: + return f"[Error] look_at_image API: {e}" answer = self._extract_answer(resp) if not answer: