zcbot/core/skills.py

169 lines
6.5 KiB
Python

"""Skill 注册表 (Anthropic 标准格式)。
每个 skill 是 <root>/<name>/ 目录,内含 SKILL.md(带 frontmatter)+ 可选的
references/、scripts/、assets/。启动时只读 frontmatter 做 discovery,完整 SKILL.md
和 references 由 agent 按需加载(渐进披露)。
多来源:内置 skill(`ROOT/skills`,只读)+ 用户 skill(`user_root/.skills`,可写)。
来源按顺序扫,**后扫的同名覆盖先扫的** —— 用户 skill 排在内置之后,故"用户覆盖
内置"(user wins);覆盖关系记进 `user_overrides` 供 discovery 显式标注,不静默。
"""
from __future__ import annotations
import re
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, List, Optional, Tuple, Union
import yaml
_FRONTMATTER_RE = re.compile(r"^---\n(.*?)\n---\n?", re.DOTALL)
def parse_frontmatter(text: str) -> Tuple[dict, str]:
"""解析 markdown 顶部的 YAML frontmatter。返回 (meta, body)。
frontmatter YAML 非法时抛 `yaml.YAMLError`(由 `SkillRegistry._scan` 捕获记进
load_errors —— 用户手写 skill 易踩,不能让一个坏 skill 崩掉整次扫描)。
"""
m = _FRONTMATTER_RE.match(text)
if not m:
return {}, text
meta = yaml.safe_load(m.group(1)) or {}
if not isinstance(meta, dict):
meta = {}
return meta, text[m.end():]
class SkillLoadError(Exception):
"""skill 目录有 SKILL.md 但加载失败(YAML 坏 / 缺 description 等)。
"没有 SKILL.md(根本不是 skill 目录,静默跳过)"区分:前者要面向用户报,
后者是正常的非 skill 子目录。
"""
@dataclass
class Skill:
name: str
description: str
skill_dir: Path
source: str = "builtin" # 'builtin' | 'user'
@property
def skill_md(self) -> Path:
return self.skill_dir / "SKILL.md"
def full_content(self) -> str:
return self.skill_md.read_text(encoding="utf-8")
@classmethod
def from_dir(cls, skill_dir: Path, source: str = "builtin") -> Optional["Skill"]:
"""加载一个 skill 目录。
无 SKILL.md → 返回 None(静默跳过,不是 skill 目录);
有 SKILL.md 但格式错(YAML 坏 / 缺 description) → 抛 SkillLoadError。
"""
md = skill_dir / "SKILL.md"
if not md.exists():
return None # 不是 skill 目录,静默跳过
try:
text = md.read_text(encoding="utf-8")
except (OSError, UnicodeDecodeError) as e:
raise SkillLoadError(f"读不出 SKILL.md: {e}")
try:
meta, _ = parse_frontmatter(text)
except yaml.YAMLError as e:
raise SkillLoadError(f"frontmatter YAML 非法: {e}")
name = meta.get("name") or skill_dir.name
desc = meta.get("description") or ""
if not desc:
raise SkillLoadError("缺 description(frontmatter 必须有 name + description)")
return cls(name=name, description=desc, skill_dir=skill_dir, source=source)
@dataclass
class SkillSource:
"""一个 skill 搜索来源。
container_root: docker backend 下该来源在容器内的挂载前缀
(内置 → `/sandbox/skills`,用户 → `/workspace/.skills`);None = host backend,
LoadSkillTool 退回 host 绝对路径。
"""
root: Path
source: str = "builtin"
container_root: Optional[str] = None
SourcesArg = Union[Path, str, SkillSource, List[SkillSource]]
class SkillRegistry:
def __init__(self, sources: SourcesArg) -> None:
# 单个 Path/str → 包成单一 builtin 来源(向后兼容直接传目录的调用 / 测试)
if isinstance(sources, (str, Path)):
sources = [SkillSource(Path(sources), "builtin")]
elif isinstance(sources, SkillSource):
sources = [sources]
self.sources: List[SkillSource] = list(sources)
self.skills: Dict[str, Skill] = {}
# 用户 skill 覆盖了内置 skill 的 name 集合 —— discovery 显式标注,覆盖不静默
self.user_overrides: set[str] = set()
# 加载失败的用户 skill:(目录名, 原因)。内置 skill 失败是 dev bug,不进此列
# (不面向终端用户报),由测试 / 启动日志兜底
self.load_errors: List[Tuple[str, str]] = []
self._container_roots: Dict[str, Optional[str]] = {}
self._scan()
def _scan(self) -> None:
for src in self.sources:
self._container_roots[src.source] = src.container_root
if not src.root.exists():
continue # 用户没有 .skills 目录 → 一次 exists() 跳过,零成本
for child in sorted(src.root.iterdir()):
if not child.is_dir():
continue
try:
skill = Skill.from_dir(child, source=src.source)
except SkillLoadError as e:
if src.source == "user":
self.load_errors.append((child.name, str(e)))
continue
if skill is None:
continue
prev = self.skills.get(skill.name)
if prev is not None and prev.source != skill.source and skill.source == "user":
self.user_overrides.add(skill.name) # 用户覆盖了内置
self.skills[skill.name] = skill # 后扫覆盖先扫 → user wins
def discovery_block(self) -> str:
"""注入 system prompt 的 skill 列表(name + description + 来源标注)。"""
if not self.skills and not self.load_errors:
return ""
lines = []
for s in self.skills.values():
if s.source == "user":
tag = " [你的·已覆盖内置]" if s.name in self.user_overrides else " [你的]"
else:
tag = ""
lines.append(f"- **{s.name}**{tag}: {s.description}")
block = "\n".join(lines)
if self.load_errors:
errs = "; ".join(f"`{n}`({why})" for n, why in self.load_errors)
block += (
"\n\n> ⚠️ 你有用户 skill 因格式问题未加载,需要时提醒用户修好 frontmatter"
f"(修好后下条消息生效):{errs}"
)
return block
def container_dir(self, skill: Skill) -> Optional[str]:
"""docker 下该 skill 在容器内的目录;host backend → None(调用方退回 host 绝对路径)。"""
root = self._container_roots.get(skill.source)
if not root:
return None
return f"{root.rstrip('/')}/{skill.name}"
def get(self, name: str) -> Optional[Skill]:
return self.skills.get(name)