zcbot/core/shortcuts.py

"""用户快捷指令(触发词 → 完整指令)。渠道无关,入口层确定性展开。

存储:`workspace/users/<user_id>/.memory/shortcuts.md` —— 蹭 memory 的 per-user 存储壳
(同一 workspace 内按 user_id 隔离,agent 已有该目录写权限),但**与 memory 是两种机制**:

- memory 是注进 system prompt、给模型**参考**的软上下文(概率召回)。
- 快捷指令**不进上下文**:展开发生在入口层、模型跑之前 —— 每条入站消息先经 `expand()`
  查表,整条精确命中触发词就把文本替换成完整指令再跑 agent。所以存再多条,平时上下文也是 0;
  触发时进上下文的就是那条完整指令本身(= 用户本来要打的字),无额外 token。

维护(agent 自管,同 memory):用户在对话里说"记个快捷词:X → Y",模型往 shortcuts.md 写一行
(memory 契约里加了一句告诉它格式);触发不靠模型,靠本模块解析,确定、零歧义。

格式(markdown 两列表,容错解析;表头/分隔行自动跳过):

    | 触发词 | 指令 |
    |---|---|
    | 简报 | 给我输出一份昨日的 AI 新闻简报 |

匹配语义:整条消息 `strip()` + `casefold()` 后与某触发词**精确相等**才展开;
"帮我出个简报" 不命中(当普通消息走)。与「新话题」魔法命令同风格,零误伤。
（触发词含 `|` 会破坏表格解析 —— 约定触发词不含竖线;指令正文含竖线也会被截断,同样避免。）
"""
from __future__ import annotations

import re
from pathlib import Path
from typing import Dict, Optional, Tuple
from uuid import UUID

# 表头行的触发词(解析时跳过,避免把表头当成一条快捷词)
_HEADER_TRIGGERS = {"触发词", "触发", "快捷词", "快捷指令", "命令", "trigger", "shortcut"}
# markdown 表格分隔行的单元格:`---` / `:--` / `:-:` 之类
_SEP_RE = re.compile(r"^:?-+:?$")


def _shortcuts_file(workspace_dir: Path, user_id: UUID) -> Path:
    return workspace_dir / "users" / str(user_id) / ".memory" / "shortcuts.md"


def _normalize(s: str) -> str:
    return s.strip().casefold()


def _is_separator(cell: str) -> bool:
    return bool(_SEP_RE.match(cell.replace(" ", "")))


def parse_shortcuts(text: str) -> Dict[str, str]:
    """解析 shortcuts.md 文本 → {归一化触发词: 完整指令}。纯函数,可测。

    容错:只认以 `|` 起头的表格行;跳过分隔行、表头行、空单元格行;
    触发词重复时**先出现者赢**(首行优先,和人读顺序一致)。
    """
    mapping: Dict[str, str] = {}
    for raw in text.splitlines():
        line = raw.strip()
        if not line.startswith("|"):
            continue
        cells = [c.strip() for c in line.strip("|").split("|")]
        if len(cells) < 2:
            continue
        trigger, prompt = cells[0], cells[1]
        if not trigger or not prompt:
            continue
        if _is_separator(trigger) and _is_separator(prompt):
            continue  # 分隔行 |---|---|
        key = _normalize(trigger)
        if not key or key in _HEADER_TRIGGERS:
            continue  # 空或表头
        mapping.setdefault(key, prompt)  # 首行优先
    return mapping


def load_shortcuts(workspace_dir: Path, user_id: UUID) -> Dict[str, str]:
    """读该用户 shortcuts.md 并解析;文件不存在 / 读失败 → 空表(不抛,不挡入站)。"""
    p = _shortcuts_file(workspace_dir, user_id)
    if not p.is_file():
        return {}
    try:
        return parse_shortcuts(p.read_text(encoding="utf-8"))
    except (OSError, UnicodeDecodeError):
        return {}


def expand(
    workspace_dir: Path, user_id: UUID, text: str
) -> Tuple[str, Optional[str]]:
    """入口层展开:整条 `text` 精确命中某触发词 → 返回 (完整指令, 命中的触发词原文);
    未命中 → 返回 (text 原样, None)。空文本直接原样返回。

    调用点:渠道核心 `_run_channel_conversation` + 网页 `post_message`,共用此函数,
    保证任何入口打同一个触发词行为一致。
    """
    if not text or not text.strip():
        return text, None
    mapping = load_shortcuts(workspace_dir, user_id)
    if not mapping:
        return text, None
    prompt = mapping.get(_normalize(text))
    if prompt is None:
        return text, None
    return prompt, text.strip()