From 1837df5f88839cc96468a9b993282b57c5f27856 Mon Sep 17 00:00:00 2001 From: unraid Date: Wed, 22 Apr 2026 22:38:09 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=20skill=20learning?= =?UTF-8?q?=20=E6=8A=80=E8=83=BD=E5=AD=A6=E4=B9=A0=E9=97=AD=E7=8E=AF?= =?UTF-8?q?=E7=B3=BB=E7=BB=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 --- .gitignore | 5 + AGENTS.md | 283 +++++++ .../DiscoverSkillsTool/DiscoverSkillsTool.ts | 107 +++ .../__tests__/DiscoverSkillsTool.test.ts | 54 ++ .../src/tools/DiscoverSkillsTool/prompt.ts | 16 +- .../__tests__/skill-learning.test.ts | 152 ++++ src/commands/skill-learning/index.ts | 15 + src/commands/skill-learning/skill-learning.ts | 310 ++++++++ src/commands/skill-learning/skillPanel.tsx | 197 +++++ src/commands/skill-search/index.ts | 12 + .../skill-search/skillSearchPanel.tsx | 169 ++++ .../__tests__/promptEngineeringAudit.test.ts | 33 + .../promptEngineeringAudit.runner.ts | 731 ++++++++++++++++++ src/entrypoints/init.ts | 6 + .../skillLearning/__tests__/evolution.test.ts | 152 ++++ .../__tests__/instinctStore.test.ts | 143 ++++ .../__tests__/learningPolicy.test.ts | 81 ++ .../__tests__/observationStore.test.ts | 108 +++ .../__tests__/observerBackend.test.ts | 135 ++++ .../__tests__/projectContext.test.ts | 160 ++++ .../skillLearning/__tests__/promotion.test.ts | 144 ++++ .../__tests__/runtimeObserver.test.ts | 143 ++++ .../__tests__/sessionObserver.test.ts | 103 +++ .../__tests__/skillDedup.test.ts | 100 +++ .../__tests__/skillGapStore.test.ts | 360 +++++++++ .../__tests__/skillGenerator.test.ts | 56 ++ .../__tests__/skillLearningSmoke.test.ts | 154 ++++ .../__tests__/skillLifecycle.test.ts | 161 ++++ .../throttleAndCircuitBreaker.test.ts | 372 +++++++++ .../__tests__/toolEventObserver.test.ts | 196 +++++ src/services/skillLearning/agentGenerator.ts | 164 ++++ .../skillLearning/commandGenerator.ts | 167 ++++ src/services/skillLearning/config.ts | 52 ++ src/services/skillLearning/evolution.ts | 174 +++++ src/services/skillLearning/featureCheck.ts | 12 + src/services/skillLearning/index.ts | 37 + src/services/skillLearning/instinctParser.ts | 115 +++ src/services/skillLearning/instinctStore.ts | 258 +++++++ src/services/skillLearning/learningPolicy.ts | 106 +++ .../skillLearning/llmObserverBackend.ts | 301 ++++++++ .../skillLearning/observationStore.ts | 451 +++++++++++ src/services/skillLearning/observerBackend.ts | 71 ++ src/services/skillLearning/projectContext.ts | 264 +++++++ src/services/skillLearning/promotion.ts | 161 ++++ src/services/skillLearning/runtimeObserver.ts | 386 +++++++++ src/services/skillLearning/sessionObserver.ts | 296 +++++++ src/services/skillLearning/skillGapStore.ts | 499 ++++++++++++ src/services/skillLearning/skillGenerator.ts | 206 +++++ src/services/skillLearning/skillLifecycle.ts | 496 ++++++++++++ .../skillLearning/toolEventObserver.ts | 312 ++++++++ src/services/skillLearning/types.ts | 109 +++ .../__tests__/intentNormalize.test.ts | 229 ++++++ .../skillSearch/__tests__/localSearch.test.ts | 221 ++++++ .../__tests__/prefetch.extractQuery.test.ts | 123 +++ .../skillSearch/__tests__/prefetch.test.ts | 101 +++ src/services/skillSearch/featureCheck.ts | 13 +- src/services/skillSearch/intentNormalize.ts | 149 ++++ src/services/skillSearch/localSearch.ts | 447 ++++++++++- src/services/skillSearch/prefetch.ts | 324 +++++++- src/services/skillSearch/signals.ts | 10 +- src/services/tools/toolExecution.ts | 82 +- src/tools.ts | 5 + .../hooks/__tests__/skillImprovement.test.ts | 26 + src/utils/hooks/skillImprovement.ts | 20 +- 64 files changed, 11009 insertions(+), 36 deletions(-) create mode 100644 AGENTS.md create mode 100644 packages/builtin-tools/src/tools/DiscoverSkillsTool/DiscoverSkillsTool.ts create mode 100644 packages/builtin-tools/src/tools/DiscoverSkillsTool/__tests__/DiscoverSkillsTool.test.ts create mode 100644 src/commands/skill-learning/__tests__/skill-learning.test.ts create mode 100644 src/commands/skill-learning/index.ts create mode 100644 src/commands/skill-learning/skill-learning.ts create mode 100644 src/commands/skill-learning/skillPanel.tsx create mode 100644 src/commands/skill-search/index.ts create mode 100644 src/commands/skill-search/skillSearchPanel.tsx create mode 100644 src/constants/__tests__/promptEngineeringAudit.test.ts create mode 100644 src/constants/promptEngineeringAudit.runner.ts create mode 100644 src/services/skillLearning/__tests__/evolution.test.ts create mode 100644 src/services/skillLearning/__tests__/instinctStore.test.ts create mode 100644 src/services/skillLearning/__tests__/learningPolicy.test.ts create mode 100644 src/services/skillLearning/__tests__/observationStore.test.ts create mode 100644 src/services/skillLearning/__tests__/observerBackend.test.ts create mode 100644 src/services/skillLearning/__tests__/projectContext.test.ts create mode 100644 src/services/skillLearning/__tests__/promotion.test.ts create mode 100644 src/services/skillLearning/__tests__/runtimeObserver.test.ts create mode 100644 src/services/skillLearning/__tests__/sessionObserver.test.ts create mode 100644 src/services/skillLearning/__tests__/skillDedup.test.ts create mode 100644 src/services/skillLearning/__tests__/skillGapStore.test.ts create mode 100644 src/services/skillLearning/__tests__/skillGenerator.test.ts create mode 100644 src/services/skillLearning/__tests__/skillLearningSmoke.test.ts create mode 100644 src/services/skillLearning/__tests__/skillLifecycle.test.ts create mode 100644 src/services/skillLearning/__tests__/throttleAndCircuitBreaker.test.ts create mode 100644 src/services/skillLearning/__tests__/toolEventObserver.test.ts create mode 100644 src/services/skillLearning/agentGenerator.ts create mode 100644 src/services/skillLearning/commandGenerator.ts create mode 100644 src/services/skillLearning/config.ts create mode 100644 src/services/skillLearning/evolution.ts create mode 100644 src/services/skillLearning/featureCheck.ts create mode 100644 src/services/skillLearning/index.ts create mode 100644 src/services/skillLearning/instinctParser.ts create mode 100644 src/services/skillLearning/instinctStore.ts create mode 100644 src/services/skillLearning/learningPolicy.ts create mode 100644 src/services/skillLearning/llmObserverBackend.ts create mode 100644 src/services/skillLearning/observationStore.ts create mode 100644 src/services/skillLearning/observerBackend.ts create mode 100644 src/services/skillLearning/projectContext.ts create mode 100644 src/services/skillLearning/promotion.ts create mode 100644 src/services/skillLearning/runtimeObserver.ts create mode 100644 src/services/skillLearning/sessionObserver.ts create mode 100644 src/services/skillLearning/skillGapStore.ts create mode 100644 src/services/skillLearning/skillGenerator.ts create mode 100644 src/services/skillLearning/skillLifecycle.ts create mode 100644 src/services/skillLearning/toolEventObserver.ts create mode 100644 src/services/skillLearning/types.ts create mode 100644 src/services/skillSearch/__tests__/intentNormalize.test.ts create mode 100644 src/services/skillSearch/__tests__/localSearch.test.ts create mode 100644 src/services/skillSearch/__tests__/prefetch.extractQuery.test.ts create mode 100644 src/services/skillSearch/__tests__/prefetch.test.ts create mode 100644 src/services/skillSearch/intentNormalize.ts create mode 100644 src/utils/hooks/__tests__/skillImprovement.test.ts diff --git a/.gitignore b/.gitignore index 6f0a4e069..bf422f8e9 100644 --- a/.gitignore +++ b/.gitignore @@ -19,6 +19,11 @@ src/utils/vendor/ /*.png *.bmp +# Internal system prompt documents +Claude-Opus-*.txt +Claude-Sonnet-*.txt +Claude-Haiku-*.txt + # Agent / tool state dirs .swarm/ .agents/__pycache__/ diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 000000000..d1404eee6 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,283 @@ +# AGENTS.md + +This file provides guidance to Codex (Codex.ai/code) when working with code in this repository. + +## Project Overview + +This is a **reverse-engineered / decompiled** version of Anthropic's official Codex CLI tool. The goal is to restore core functionality while trimming secondary capabilities. Many modules are stubbed or feature-flagged off. TypeScript strict mode is enforced — **`bunx tsc --noEmit` must pass with zero errors**. + +## Git Commit Message Convention + +使用 **Conventional Commits** 规范: + +``` +: <描述> +``` + +常见 type:`feat`、`fix`、`docs`、`chore`、`refactor` + +示例: +- `feat: 添加模型 1M 上下文切换` +- `fix: 修复初次登陆的校验问题` +- `chore: remove prefetchOfficialMcpUrls call on startup` + +## Commands + +```bash +# Install dependencies +bun install + +# Dev mode (runs cli.tsx with MACRO defines injected via -d flags) +bun run dev + +# Dev mode with debugger (set BUN_INSPECT=9229 to pick port) +bun run dev:inspect + +# Pipe mode +echo "say hello" | bun run src/entrypoints/cli.tsx -p + +# Build (code splitting, outputs dist/cli.js + chunk files) +bun run build + +# Test +bun test # run all tests (2453 tests / 137 files / 0 fail) +bun test src/utils/__tests__/hash.test.ts # run single file +bun test --coverage # with coverage report + +# Lint & Format (Biome) +bun run lint # check only +bun run lint:fix # auto-fix +bun run format # format all src/ + +# Health check +bun run health + +# Check unused exports +bun run check:unused + +# Remote Control Server +bun run rcs + +# Docs dev server (Mintlify) +bun run docs:dev +``` + +详细的测试规范、覆盖状态和改进计划见 `docs/testing-spec.md`。 + +## Architecture + +### Runtime & Build + +- **Runtime**: Bun (not Node.js). All imports, builds, and execution use Bun APIs. +- **Build**: `build.ts` 执行 `Bun.build()` with `splitting: true`,入口 `src/entrypoints/cli.tsx`,输出 `dist/cli.js` + chunk files。Build 默认启用 19 个 feature(见下方 Feature Flag 段)。构建后自动替换 `import.meta.require` 为 Node.js 兼容版本(产物 bun/node 都可运行)。 +- **Dev mode**: `scripts/dev.ts` 通过 Bun `-d` flag 注入 `MACRO.*` defines,运行 `src/entrypoints/cli.tsx`。默认启用全部 feature。 +- **Module system**: ESM (`"type": "module"`), TSX with `react-jsx` transform. +- **Monorepo**: Bun workspaces — 14 个 internal packages in `packages/` resolved via `workspace:*`。 +- **Lint/Format**: Biome (`biome.json`)。`bun run lint` / `bun run lint:fix` / `bun run format`。 +- **Defines**: 集中管理在 `scripts/defines.ts`。当前版本 `2.1.888`。 +- **CI**: GitHub Actions — `ci.yml`(构建+测试)、`release-rcs.yml`(RCS 发布)、`update-contributors.yml`(自动更新贡献者)。 + +### Entry & Bootstrap + +1. **`src/entrypoints/cli.tsx`** (323 行) — True entrypoint。`main()` 函数按优先级处理多条快速路径: + - `--version` / `-v` — 零模块加载 + - `--dump-system-prompt` — feature-gated (DUMP_SYSTEM_PROMPT) + - `--Codex-in-chrome-mcp` / `--chrome-native-host` + - `--computer-use-mcp` — 独立 MCP server 模式 + - `--daemon-worker=` — feature-gated (DAEMON) + - `remote-control` / `rc` / `remote` / `sync` / `bridge` — feature-gated (BRIDGE_MODE) + - `daemon` [subcommand] — feature-gated (DAEMON) + - `ps` / `logs` / `attach` / `kill` / `--bg` — feature-gated (BG_SESSIONS) + - `new` / `list` / `reply` — Template job commands + - `environment-runner` / `self-hosted-runner` — BYOC runner + - `--tmux` + `--worktree` 组合 + - 默认路径:加载 `main.tsx` 启动完整 CLI +2. **`src/main.tsx`** (~6970 行) — Commander.js CLI definition。注册大量 subcommands:`mcp` (serve/add/remove/list...)、`server`、`ssh`、`open`、`auth`、`plugin`、`agents`、`auto-mode`、`doctor`、`update` 等。主 `.action()` 处理器负责权限、MCP、会话恢复、REPL/Headless 模式分发。 +3. **`src/entrypoints/init.ts`** — One-time initialization (telemetry, config, trust dialog)。 + +### Core Loop + +- **`src/query.ts`** — The main API query function. Sends messages to Codex API, handles streaming responses, processes tool calls, and manages the conversation turn loop. +- **`src/QueryEngine.ts`** — Higher-level orchestrator wrapping `query()`. Manages conversation state, compaction, file history snapshots, attribution, and turn-level bookkeeping. Used by the REPL screen. +- **`src/screens/REPL.tsx`** — The interactive REPL screen (React/Ink component). Handles user input, message display, tool permission prompts, and keyboard shortcuts. + +### API Layer + +- **`src/services/api/Codex.ts`** — Core API client. Builds request params (system prompt, messages, tools, betas), calls the Anthropic SDK streaming endpoint, and processes `BetaRawMessageStreamEvent` events. +- **7 providers**: `firstParty` (Anthropic direct), `bedrock` (AWS), `vertex` (Google Cloud), `foundry`, `openai`, `gemini`, `grok` (xAI)。 +- Provider selection in `src/utils/model/providers.ts`。优先级:modelType 参数 > 环境变量 > 默认 firstParty。 + +### Tool System + +- **`src/Tool.ts`** — Tool interface definition (`Tool` type) and utilities (`findToolByName`, `toolMatchesName`). +- **`src/tools.ts`** (387 行) — Tool registry. Assembles the tool list; some tools are conditionally loaded via `feature()` flags or `process.env.USER_TYPE`. +- **`src/tools//`** — 55 个 tool 目录。主要分类: + - **文件操作**: FileEditTool, FileReadTool, FileWriteTool, GlobTool, GrepTool + - **Shell/执行**: BashTool, PowerShellTool, REPLTool + - **Agent 系统**: AgentTool, TaskCreateTool, TaskUpdateTool, TaskListTool, TaskGetTool + - **规划**: EnterPlanModeTool, ExitPlanModeV2Tool, VerifyPlanExecutionTool + - **Web/MCP**: WebFetchTool, WebSearchTool, MCPTool, McpAuthTool + - **调度**: CronCreateTool, CronDeleteTool, CronListTool + - **其他**: LSPTool, ConfigTool, SkillTool, EnterWorktreeTool, ExitWorktreeTool 等 +- **`src/tools/shared/`** — Tool 共享工具函数。 + +### UI Layer (Ink) + +- **`src/ink.ts`** — Ink render wrapper with ThemeProvider injection. +- **`packages/@ant/ink/`** — Custom Ink framework(forked/internal),包含 components、core、hooks、keybindings、theme、utils。注意:不是 `src/ink/`。 +- **`src/components/`** — 149 个组件目录/文件,渲染于终端 Ink 环境中。关键组件: + - `App.tsx` — Root provider (AppState, Stats, FpsMetrics) + - `Messages.tsx` / `MessageRow.tsx` — Conversation message rendering + - `PromptInput/` — User input handling + - `permissions/` — Tool permission approval UI + - `design-system/` — 复用 UI 组件(Dialog, FuzzyPicker, ProgressBar, ThemeProvider 等) +- Components use React Compiler runtime (`react/compiler-runtime`) — decompiled output has `_c()` memoization calls throughout. + +### State Management + +- **`src/state/AppState.tsx`** — Central app state type and context provider. Contains messages, tools, permissions, MCP connections, etc. +- **`src/state/AppStateStore.ts`** — Default state and store factory. +- **`src/state/store.ts`** — Zustand-style store for AppState (`createStore`). +- **`src/state/selectors.ts`** — State selectors. +- **`src/bootstrap/state.ts`** — Module-level singletons for session-global state (session ID, CWD, project root, token counts, model overrides, client type, permission mode). + +### Workspace Packages + +| Package | 说明 | +|---------|------| +| `packages/@ant/ink/` | Forked Ink 框架(components、hooks、keybindings、theme) | +| `packages/@ant/computer-use-mcp/` | Computer Use MCP server(截图/键鼠/剪贴板/应用管理) | +| `packages/@ant/computer-use-input/` | 键鼠模拟(dispatcher + darwin/win32/linux backend) | +| `packages/@ant/computer-use-swift/` | 截图 + 应用管理(dispatcher + per-platform backend) | +| `packages/@ant/Codex-for-chrome-mcp/` | Chrome 浏览器控制(通过 `--chrome` 启用) | +| `packages/remote-control-server/` | 自托管 Remote Control Server(Docker 部署,含 Web UI) | +| `packages/swarm/` | Swarm 解耦模块 | +| `packages/shell/` | Shell 抽象 | +| `packages/audio-capture-napi/` | 原生音频捕获(已恢复) | +| `packages/color-diff-napi/` | 颜色差异计算(完整实现,11 tests) | +| `packages/image-processor-napi/` | 图像处理(已恢复) | +| `packages/modifiers-napi/` | 键盘修饰键检测(stub) | +| `packages/url-handler-napi/` | URL scheme 处理(stub) | + +### Bridge / Remote Control + +- **`src/bridge/`** (~37 files) — Remote Control / Bridge 模式。feature-gated by `BRIDGE_MODE`。包含 bridge API、会话管理、JWT 认证、消息传输、权限回调等。Entry: `bridgeMain.ts`。 +- **`packages/remote-control-server/`** — 自托管 RCS,支持 Docker 部署,含 Web UI 控制面板。通过 `bun run rcs` 启动。 +- CLI 快速路径: `Codex remote-control` / `Codex rc` / `Codex bridge`。 +- 详见 `docs/features/remote-control-self-hosting.md`。 + +### Daemon Mode + +- **`src/daemon/`** — Daemon 模式(长驻 supervisor)。feature-gated by `DAEMON`。包含 `main.ts`(entry)和 `workerRegistry.ts`(worker 管理)。 + +### Context & System Prompt + +- **`src/context.ts`** — Builds system/user context for the API call (git status, date, AGENTS.md contents, memory files). +- **`src/utils/claudemd.ts`** — Discovers and loads AGENTS.md files from project hierarchy. + +### Feature Flag System + +Feature flags control which functionality is enabled at runtime. 代码中统一通过 `import { feature } from 'bun:bundle'` 导入,调用 `feature('FLAG_NAME')` 返回 `boolean`。 + +**启用方式**: 环境变量 `FEATURE_=1`。例如 `FEATURE_BUDDY=1 bun run dev`。 + +**Build 默认 features**(19 个,见 `build.ts`): +- 基础: `BUDDY`, `TRANSCRIPT_CLASSIFIER`, `BRIDGE_MODE`, `AGENT_TRIGGERS_REMOTE`, `CHICAGO_MCP`, `VOICE_MODE` +- 统计/缓存: `SHOT_STATS`, `PROMPT_CACHE_BREAK_DETECTION`, `TOKEN_BUDGET` +- P0 本地: `AGENT_TRIGGERS`, `ULTRATHINK`, `BUILTIN_EXPLORE_PLAN_AGENTS`, `LODESTONE` +- P1 API 依赖: `EXTRACT_MEMORIES`, `VERIFICATION_AGENT`, `KAIROS_BRIEF`, `AWAY_SUMMARY`, `ULTRAPLAN` +- P2: `DAEMON` + +**Dev mode 默认**: 全部启用(见 `scripts/dev.ts`)。 + +**类型声明**: `src/types/internal-modules.d.ts` 中声明了 `bun:bundle` 模块的 `feature` 函数签名。 + +**新增功能的正确做法**: 保留 `import { feature } from 'bun:bundle'` + `feature('FLAG_NAME')` 的标准模式,在运行时通过环境变量或配置控制,不要绕过 feature flag 直接 import。 + +### Multi-API 兼容层 + +所有兼容层均采用流适配器模式:将第三方 API 格式转为 Anthropic 内部格式,下游代码完全不改。 + +#### OpenAI 兼容层 + +通过 `CLAUDE_CODE_USE_OPENAI=1` 启用,支持 Ollama/DeepSeek/vLLM 等任意 OpenAI Chat Completions 协议端点。含 DeepSeek thinking mode 支持。 + +- **`src/services/api/openai/`** — client、消息/工具转换、流适配、模型映射 +- 关键环境变量:`CLAUDE_CODE_USE_OPENAI`、`OPENAI_API_KEY`、`OPENAI_BASE_URL`、`OPENAI_MODEL` + +#### Gemini 兼容层 + +通过 `CLAUDE_CODE_USE_GEMINI=1` 启用。独立环境变量体系。 + +- **`src/services/api/gemini/`** — client、模型映射、类型定义 +- 关键环境变量:`GEMINI_API_KEY`(必填)、`GEMINI_MODEL`(直接指定)、`GEMINI_DEFAULT_SONNET_MODEL`/`GEMINI_DEFAULT_OPUS_MODEL`(按能力映射) +- 模型映射优先级:`GEMINI_MODEL` > `GEMINI_DEFAULT_*_MODEL` > `ANTHROPIC_DEFAULT_*_MODEL`(已废弃) > 原样返回 + +#### Grok 兼容层 + +通过 `CLAUDE_CODE_USE_GROK=1` 启用。自定义模型映射支持 xAI Grok API。 + +- **`src/services/api/grok/`** — client、模型映射 + +详见各兼容层的 docs 文档。 + +### Stubbed/Deleted Modules + +| Module | Status | +|--------|--------| +| Computer Use (`@ant/*`) | Restored — macOS + Windows + Linux(后端完整度不一) | +| `*-napi` packages | `audio-capture-napi`、`image-processor-napi` 已恢复;`color-diff-napi` 完整;`modifiers-napi`、`url-handler-napi` 仍为 stub | +| Voice Mode | Restored — Push-to-Talk 语音输入(需 Anthropic OAuth) | +| OpenAI/Gemini/Grok 兼容层 | Restored | +| Remote Control Server | Restored — 自托管 RCS + Web UI | +| Analytics / GrowthBook / Sentry | Empty implementations | +| Magic Docs / LSP Server | Removed | +| Plugins / Marketplace | Removed | +| MCP OAuth | Simplified | + +### Key Type Files + +- **`src/types/global.d.ts`** — Declares `MACRO`, `BUILD_TARGET`, `BUILD_ENV` and internal Anthropic-only identifiers. +- **`src/types/internal-modules.d.ts`** — Type declarations for `bun:bundle`, `bun:ffi`, `@anthropic-ai/mcpb`. +- **`src/types/message.ts`** — Message type hierarchy (UserMessage, AssistantMessage, SystemMessage, etc.). +- **`src/types/permissions.ts`** — Permission mode and result types. + +## Testing + +- **框架**: `bun:test`(内置断言 + mock) +- **当前状态**: 2472 tests / 138 files / 0 fail +- **单元测试**: 就近放置于 `src/**/__tests__/`,文件名 `.test.ts` +- **集成测试**: `tests/integration/` — 4 个文件(cli-arguments, context-build, message-pipeline, tool-chain) +- **共享 mock/fixture**: `tests/mocks/`(api-responses, file-system, fixtures/) +- **命名**: `describe("functionName")` + `test("behavior description")`,英文 +- **Mock 模式**: 对重依赖模块使用 `mock.module()` + `await import()` 解锁(必须内联在测试文件中,不能从共享 helper 导入) +- **包测试**: `packages/` 下各包也有独立测试(如 `color-diff-napi` 11 tests) + +### 类型检查 + +项目使用 TypeScript strict 模式,**tsc 必须零错误**。每次修改后运行: + +```bash +bunx tsc --noEmit +``` + +**类型规范**: +- 生产代码禁止 `as any`;测试文件中 mock 数据可用 `as any` +- 类型不匹配优先用 `as unknown as SpecificType` 双重断言,或补充 interface +- 未知结构对象用 `Record` 替代 `any` +- 联合类型用类型守卫(type guard)收窄,不要强转 +- `msg.request` 属性访问:`const req = msg.request as Record` +- Ink `color` prop:用 `as keyof Theme` 而非 `as any` + +## Working with This Codebase + +- **tsc must pass** — `bunx tsc --noEmit` 必须零错误,任何修改都不能引入新的类型错误。 +- **Feature flags** — 默认全部关闭(`feature()` 返回 `false`)。Dev/build 各有自己的默认启用列表。不要在 `cli.tsx` 中重定义 `feature` 函数。 +- **React Compiler output** — Components have decompiled memoization boilerplate (`const $ = _c(N)`). This is normal. +- **`bun:bundle` import** — `import { feature } from 'bun:bundle'` 是 Bun 内置模块,由运行时/构建器解析。不要用自定义函数替代它。**`feature()` 只能直接用在 `if` 语句或三元表达式的条件位置**(Bun 编译器限制),不能赋值给变量、不能放在箭头函数体里、不能作为 `&&` 链的一部分。正确:`if (feature('X')) {}` 或 `feature('X') ? a : b`。 +- **`src/` path alias** — tsconfig maps `src/*` to `./src/*`. Imports like `import { ... } from 'src/utils/...'` are valid. +- **MACRO defines** — 集中管理在 `scripts/defines.ts`。Dev mode 通过 `bun -d` 注入,build 通过 `Bun.build({ define })` 注入。修改版本号等常量只改这个文件。 +- **构建产物兼容 Node.js** — `build.ts` 会自动后处理 `import.meta.require`,产物可直接用 `node dist/cli.js` 运行。 +- **Biome 配置** — 大量 lint 规则被关闭(decompiled 代码不适合严格 lint)。`.tsx` 文件用 120 行宽 + 强制分号;其他文件 80 行宽 + 按需分号。 +- **Ink 框架在 `packages/@ant/ink/`** — 不是 `src/ink/`(该目录不存在)。Ink 相关的组件、hooks、keybindings 都在 packages 中。 +- **Provider 优先级** — `modelType` 参数 > 环境变量 > 默认 `firstParty`。新增 provider 需在 `src/utils/model/providers.ts` 注册。 diff --git a/packages/builtin-tools/src/tools/DiscoverSkillsTool/DiscoverSkillsTool.ts b/packages/builtin-tools/src/tools/DiscoverSkillsTool/DiscoverSkillsTool.ts new file mode 100644 index 000000000..d2f56c112 --- /dev/null +++ b/packages/builtin-tools/src/tools/DiscoverSkillsTool/DiscoverSkillsTool.ts @@ -0,0 +1,107 @@ +import { z } from 'zod/v4' +import type { ToolResultBlockParam } from 'src/Tool.js' +import { buildTool } from 'src/Tool.js' +import { lazySchema } from 'src/utils/lazySchema.js' +import { + DISCOVER_SKILLS_TOOL_NAME, + DESCRIPTION, + DISCOVER_SKILLS_PROMPT, +} from './prompt.js' + +const inputSchema = lazySchema(() => + z.strictObject({ + description: z + .string() + .describe( + 'Description of what you want to do. Be specific — e.g. "deploy a Next.js app to Cloudflare Workers" rather than just "deploy".', + ), + limit: z + .number() + .optional() + .describe('Maximum number of results to return (default: 5)'), + }), +) +type InputSchema = ReturnType +type DiscoverInput = z.infer + +type DiscoverOutput = { + results: Array<{ name: string; description: string; score: number }> + count: number +} + +export const DiscoverSkillsTool = buildTool({ + name: DISCOVER_SKILLS_TOOL_NAME, + searchHint: 'find search discover skills commands tools capabilities', + maxResultSizeChars: 10_000, + strict: true, + + get inputSchema(): InputSchema { + return inputSchema() + }, + + async description() { + return DESCRIPTION + }, + async prompt() { + return DISCOVER_SKILLS_PROMPT + }, + + isConcurrencySafe() { + return true + }, + isReadOnly() { + return true + }, + + userFacingName() { + return 'Discover Skills' + }, + + renderToolUseMessage(input: Partial) { + return `Searching skills: ${input.description?.slice(0, 80) ?? '...'}` + }, + + mapToolResultToToolResultBlockParam( + content: DiscoverOutput, + toolUseID: string, + ): ToolResultBlockParam { + if (content.count === 0) { + return { + tool_use_id: toolUseID, + type: 'tool_result', + content: 'No matching skills found for that description.', + } + } + const lines = content.results.map( + (r, i) => + `${i + 1}. **${r.name}** (score: ${r.score.toFixed(2)})\n ${r.description}`, + ) + return { + tool_use_id: toolUseID, + type: 'tool_result', + content: `Found ${content.count} relevant skill(s):\n\n${lines.join('\n\n')}`, + } + }, + + async call(input: DiscoverInput, context) { + const { getSkillIndex, searchSkills } = await import( + 'src/services/skillSearch/localSearch.js' + ) + const { getCwd } = await import('src/utils/cwd.js') + const cwd = getCwd() + + const index = await getSkillIndex(cwd) + const results = searchSkills(input.description, index, input.limit ?? 5) + + return { + data: { + results: results.map(r => ({ + name: r.name, + description: r.description, + score: r.score, + })), + count: results.length, + }, + } + }, +}) diff --git a/packages/builtin-tools/src/tools/DiscoverSkillsTool/__tests__/DiscoverSkillsTool.test.ts b/packages/builtin-tools/src/tools/DiscoverSkillsTool/__tests__/DiscoverSkillsTool.test.ts new file mode 100644 index 000000000..97e8a541e --- /dev/null +++ b/packages/builtin-tools/src/tools/DiscoverSkillsTool/__tests__/DiscoverSkillsTool.test.ts @@ -0,0 +1,54 @@ +import { describe, test, expect } from 'bun:test' +import { DISCOVER_SKILLS_TOOL_NAME } from '../prompt.js' + +describe('DiscoverSkillsTool', () => { + test('DISCOVER_SKILLS_TOOL_NAME is not empty', () => { + expect(DISCOVER_SKILLS_TOOL_NAME).toBe('DiscoverSkills') + expect(DISCOVER_SKILLS_TOOL_NAME.length).toBeGreaterThan(0) + }) + + test('tool exports are functions', async () => { + const { DiscoverSkillsTool } = await import('../DiscoverSkillsTool.js') + expect(DiscoverSkillsTool).toBeDefined() + expect(DiscoverSkillsTool.name).toBe('DiscoverSkills') + expect(typeof DiscoverSkillsTool.call).toBe('function') + }) + + test('tool has correct metadata', async () => { + const { DiscoverSkillsTool } = await import('../DiscoverSkillsTool.js') + expect(await DiscoverSkillsTool.description()).toContain('skill') + expect(DiscoverSkillsTool.userFacingName()).toBe('Discover Skills') + expect(DiscoverSkillsTool.isReadOnly()).toBe(true) + expect(DiscoverSkillsTool.isConcurrencySafe()).toBe(true) + }) + + test('renderToolUseMessage formats input', async () => { + const { DiscoverSkillsTool } = await import('../DiscoverSkillsTool.js') + const msg = DiscoverSkillsTool.renderToolUseMessage({ + description: 'deploy to cloudflare', + }) + expect(msg).toContain('deploy to cloudflare') + }) + + test('mapToolResultToToolResultBlockParam formats empty results', async () => { + const { DiscoverSkillsTool } = await import('../DiscoverSkillsTool.js') + const result = DiscoverSkillsTool.mapToolResultToToolResultBlockParam( + { results: [], count: 0 }, + 'test-id', + ) + expect(result.content).toContain('No matching skills') + }) + + test('mapToolResultToToolResultBlockParam formats results', async () => { + const { DiscoverSkillsTool } = await import('../DiscoverSkillsTool.js') + const result = DiscoverSkillsTool.mapToolResultToToolResultBlockParam( + { + results: [{ name: 'test-skill', description: 'A test skill', score: 0.85 }], + count: 1, + }, + 'test-id', + ) + expect(result.content).toContain('test-skill') + expect(result.content).toContain('0.85') + }) +}) diff --git a/packages/builtin-tools/src/tools/DiscoverSkillsTool/prompt.ts b/packages/builtin-tools/src/tools/DiscoverSkillsTool/prompt.ts index 20ddc1ab7..24b0437e0 100644 --- a/packages/builtin-tools/src/tools/DiscoverSkillsTool/prompt.ts +++ b/packages/builtin-tools/src/tools/DiscoverSkillsTool/prompt.ts @@ -1,3 +1,13 @@ -// Auto-generated stub — replace with real implementation -export {}; -export const DISCOVER_SKILLS_TOOL_NAME: string = ''; +export const DISCOVER_SKILLS_TOOL_NAME = 'DiscoverSkills' + +export const DESCRIPTION = + 'Search for relevant skills by describing what you want to do' + +export const DISCOVER_SKILLS_PROMPT = `Search for skills relevant to a task description. Returns matching skills ranked by relevance. + +Use this when: +- The auto-surfaced skills don't cover your current task +- You're pivoting to a different kind of work mid-conversation +- You want to find specialized skills for an unusual workflow + +The search uses TF-IDF keyword matching against all registered skills (bundled, user-defined, and MCP-provided). Results include skill name, description, and relevance score.` diff --git a/src/commands/skill-learning/__tests__/skill-learning.test.ts b/src/commands/skill-learning/__tests__/skill-learning.test.ts new file mode 100644 index 000000000..7cc12edfc --- /dev/null +++ b/src/commands/skill-learning/__tests__/skill-learning.test.ts @@ -0,0 +1,152 @@ +import { afterEach, beforeEach, describe, expect, test } from 'bun:test' +import { mkdtempSync, rmSync, writeFileSync } from 'node:fs' +import { tmpdir } from 'node:os' +import { join } from 'node:path' +import { call } from '../skill-learning.js' +import { + recordSkillGap, + saveInstinct, + createInstinct, + resolveProjectContext, +} from '../../../services/skillLearning/index.js' + +let root: string +const originalEnv = { ...process.env } + +beforeEach(() => { + root = mkdtempSync(join(tmpdir(), 'skill-learning-command-')) + process.env = { ...originalEnv } + process.env.CLAUDE_SKILL_LEARNING_HOME = root + process.env.CLAUDE_CONFIG_DIR = join(root, 'config') + process.env.SKILL_LEARNING_ENABLED = '1' +}) + +afterEach(() => { + process.env = { ...originalEnv } + rmSync(root, { recursive: true, force: true }) +}) + +describe('skill-learning command', () => { + test('status reports observations and instincts', async () => { + const result = await call('status', {} as any) + + expect(result.type).toBe('text') + if (result.type === 'text') { + expect(result.value).toContain('Skill Learning status') + expect(result.value).toContain('Observations: 0') + } + }) + + test('promote (no args) prints usage and candidate summary', async () => { + const result = await call('promote', {} as any) + + expect(result.type).toBe('text') + if (result.type === 'text') { + expect(result.value).toContain('Promotion candidates') + expect(result.value).toContain('promote gap') + expect(result.value).toContain('promote instinct') + } + }) + + test('promote gap promotes a pending gap to draft', async () => { + const project = resolveProjectContext(process.cwd()) + const gap = await recordSkillGap({ + prompt: 'refactor the api gateway', + cwd: process.cwd(), + project, + rootDir: root, + }) + expect(gap.status).toBe('pending') + + const result = await call(`promote gap ${gap.key}`, {} as any) + + expect(result.type).toBe('text') + if (result.type === 'text') { + expect(result.value).toContain('Promoted gap') + expect(result.value).toContain('status=draft') + } + }) + + test('promote gap reports not found', async () => { + const result = await call('promote gap does-not-exist', {} as any) + expect(result.type).toBe('text') + if (result.type === 'text') { + expect(result.value).toContain('No gap found') + } + }) + + test('promote instinct copies a project instinct to global scope', async () => { + const project = resolveProjectContext(process.cwd()) + const instinct = createInstinct({ + trigger: 'when committing', + action: 'run tests first', + confidence: 0.85, + domain: 'testing', + source: 'session-observation', + scope: 'project', + projectId: project.projectId, + projectName: project.projectName, + evidence: ['observed twice'], + }) + await saveInstinct(instinct, { project, rootDir: root }) + + const result = await call(`promote instinct ${instinct.id}`, {} as any) + + expect(result.type).toBe('text') + if (result.type === 'text') { + expect(result.value).toContain('Promoted instinct') + expect(result.value).toContain('global scope') + } + }) + + test('projects lists known project scopes', async () => { + // Resolving once registers the current project in the registry. + resolveProjectContext(root) + + const result = await call('projects', {} as any) + + expect(result.type).toBe('text') + if (result.type === 'text') { + expect( + result.value.includes('Known project scopes') || + result.value.includes('No known project scopes'), + ).toBe(true) + } + }) + + test('default help mentions promote and projects, no write-fixture', async () => { + const result = await call('unknown-sub', {} as any) + expect(result.type).toBe('text') + if (result.type === 'text') { + expect(result.value).toContain('promote') + expect(result.value).toContain('projects') + expect(result.value).not.toContain('write-fixture') + } + }) + + test('ingest imports transcript observations and instincts', async () => { + const transcript = join(root, 'session.jsonl') + writeFileSync( + transcript, + JSON.stringify({ + type: 'user', + sessionId: 's1', + cwd: root, + message: { role: 'user', content: '不要 mock,用 testing-library' }, + }) + '\n', + ) + + // Pass --min-session-length=0 so the 1-line test transcript is not skipped + // by the ECC-parity gate (default threshold: 10 observations). + const result = await call( + `ingest ${transcript} --min-session-length=0`, + {} as any, + ) + + expect(result.type).toBe('text') + if (result.type === 'text') { + expect(result.value).toContain('Ingested') + expect(result.value).toContain('saved 1 instincts') + } + }) +}) diff --git a/src/commands/skill-learning/index.ts b/src/commands/skill-learning/index.ts new file mode 100644 index 000000000..a5afb655d --- /dev/null +++ b/src/commands/skill-learning/index.ts @@ -0,0 +1,15 @@ +import type { Command } from '../../commands.js' +import { isSkillLearningEnabled } from '../../services/skillLearning/featureCheck.js' + +const skillLearning = { + type: 'local-jsx', + name: 'skill-learning', + description: 'Manage skill learning (observe, analyze, evolve)', + argumentHint: + '[start|stop|about|status|ingest|evolve|export|import|prune|promote|projects]', + isEnabled: () => isSkillLearningEnabled(), + isHidden: false, + load: () => import('./skillPanel.js'), +} satisfies Command + +export default skillLearning diff --git a/src/commands/skill-learning/skill-learning.ts b/src/commands/skill-learning/skill-learning.ts new file mode 100644 index 000000000..febb0a833 --- /dev/null +++ b/src/commands/skill-learning/skill-learning.ts @@ -0,0 +1,310 @@ +import { join } from 'node:path' +import type { LocalCommandCall } from '../../types/command.js' +import { getClaudeConfigHomeDir } from '../../utils/envUtils.js' +import { + analyzeObservations, + applySkillLifecycleDecision, + compareExistingSkills, + decideSkillLifecycle, + exportInstincts, + findPromotionCandidates, + generateSkillCandidates, + importInstincts, + ingestTranscript, + listKnownProjects, + loadInstincts, + promoteGapToDraft, + prunePendingInstincts, + readObservations, + readSkillGaps, + resolveProjectContext, + saveInstinct, + upsertInstinct, +} from '../../services/skillLearning/index.js' + +export const call: LocalCommandCall = async ( + args, +): Promise<{ type: 'text'; value: string }> => { + const parts = args.trim().split(/\s+/).filter(Boolean) + const sub = parts[0] ?? 'status' + const project = resolveProjectContext(process.cwd()) + const rootDir = process.env.CLAUDE_SKILL_LEARNING_HOME + const options = { project, rootDir } + + switch (sub) { + case 'status': { + const [observations, instincts] = await Promise.all([ + readObservations(options), + loadInstincts(options), + ]) + return { + type: 'text', + value: [ + `Skill Learning status for ${project.projectName} (${project.projectId})`, + `Observations: ${observations.length}`, + `Instincts: ${instincts.length}`, + ].join('\n'), + } + } + case 'ingest': { + const transcript = parts[1] + if (!transcript) { + return { + type: 'text', + value: + 'Usage: /skill-learning ingest [--min-session-length=]', + } + } + const minSessionLength = parseFlagNumber( + parts, + '--min-session-length', + 10, + ) + const observations = await ingestTranscript(transcript, options) + if (observations.length < minSessionLength) { + return { + type: 'text', + value: `Session too short for learning (${observations.length} < min=${minSessionLength}). Skipping instinct extraction.`, + } + } + const instincts = analyzeObservations(observations) + const saved = [] + for (const instinct of instincts) { + saved.push(await upsertInstinct(instinct, options)) + } + return { + type: 'text', + value: `Ingested ${observations.length} observations and saved ${saved.length} instincts.`, + } + } + case 'evolve': { + const generate = parts.includes('--generate') + const instincts = await loadInstincts(options) + const drafts = generateSkillCandidates(instincts, { cwd: process.cwd() }) + const written = [] + if (generate) { + for (const draft of drafts) { + const roots = [ + join(process.cwd(), '.claude', 'skills'), + join(getClaudeConfigHomeDir(), 'skills'), + ] + const existing = await compareExistingSkills(draft, roots) + const decision = decideSkillLifecycle(draft, existing) + const result = await applySkillLifecycleDecision(decision) + written.push( + `${decision.type}: ${result.activePath ?? result.archivedPath ?? result.deletedPath ?? 'no active write'}`, + ) + } + } + return { + type: 'text', + value: generate + ? `Generated ${written.length} learned skill(s):\n${written.join('\n')}` + : `Found ${drafts.length} skill candidate(s). Use --generate to write them.`, + } + } + case 'export': { + const output = parts[1] ?? 'skill-learning-instincts.json' + const scope = parseFlagString(parts, '--scope') + const minConf = parseFlagNumber(parts, '--min-conf', undefined) + const domain = parseFlagString(parts, '--domain') + const filter = (instincts: Awaited>) => + instincts.filter(i => { + if (scope && i.scope !== scope) return false + if (minConf !== undefined && i.confidence < minConf) return false + if (domain && i.domain !== domain) return false + return true + }) + const all = await loadInstincts(options) + const filtered = filter(all) + if (filtered.length !== all.length) { + await exportInstincts(output, options) + // Re-write with filtered payload to honor filter args. + const { writeFile } = await import('node:fs/promises') + await writeFile(output, `${JSON.stringify(filtered, null, 2)}\n`) + } else { + await exportInstincts(output, options) + } + const parts2: string[] = [ + `Exported ${filtered.length} instincts to ${output}`, + ] + if (scope || minConf !== undefined || domain) { + const filters: string[] = [] + if (scope) filters.push(`scope=${scope}`) + if (minConf !== undefined) filters.push(`min-conf=${minConf}`) + if (domain) filters.push(`domain=${domain}`) + parts2.push(`(filters: ${filters.join(', ')})`) + } + return { type: 'text', value: parts2.join(' ') } + } + case 'import': { + const input = parts[1] + if (!input) { + return { + type: 'text', + value: + 'Usage: /skill-learning import [--scope=] [--min-conf=] [--domain=] [--dry-run]', + } + } + const scope = parseFlagString(parts, '--scope') + const minConf = parseFlagNumber(parts, '--min-conf', undefined) + const domain = parseFlagString(parts, '--domain') + const dryRun = parts.includes('--dry-run') + // Read + filter first so --dry-run can truly skip persistence. The + // previous `importInstincts(...)` call wrote to disk before branching + // on --dry-run, which defeated the purpose of the flag. + const { readFile: readFileFs } = await import('node:fs/promises') + const parsed = JSON.parse(await readFileFs(input, 'utf8')) as Awaited< + ReturnType + > + const filtered = parsed.filter(i => { + if (scope && i.scope !== scope) return false + if (minConf !== undefined && i.confidence < minConf) return false + if (domain && i.domain !== domain) return false + return true + }) + if (dryRun) { + return { + type: 'text', + value: `Dry run: would import ${filtered.length}/${parsed.length} instincts.`, + } + } + for (const instinct of filtered) { + await upsertInstinct(instinct, options) + } + return { + type: 'text', + value: `Imported ${filtered.length}/${parsed.length} instincts.`, + } + } + case 'prune': { + const maxAgeIndex = parts.indexOf('--max-age') + const maxAge = + maxAgeIndex >= 0 && parts[maxAgeIndex + 1] + ? Number(parts[maxAgeIndex + 1]) + : 30 + const pruned = await prunePendingInstincts(maxAge, options) + return { + type: 'text', + value: `Pruned ${pruned.length} pending instincts.`, + } + } + case 'promote': { + const target = parts[1] + if (!target) { + const gaps = await readSkillGaps(project, rootDir) + const instincts = await loadInstincts(options) + const candidates = findPromotionCandidates(instincts) + const lines = [ + `Promotion candidates for ${project.projectName} (${project.projectId}):`, + `Pending gaps: ${gaps.filter(g => g.status === 'pending').length}`, + `Global-eligible instincts (>=2 projects, avg confidence >=0.8): ${candidates.length}`, + '', + 'Usage:', + ' /skill-learning promote gap # pending gap -> draft', + ' /skill-learning promote instinct # project instinct -> global', + ] + return { type: 'text', value: lines.join('\n') } + } + + if (target === 'gap') { + const gapKey = parts[2] + if (!gapKey) { + return { + type: 'text', + value: 'Usage: /skill-learning promote gap ', + } + } + const updated = await promoteGapToDraft(gapKey, project, rootDir) + if (!updated) { + return { type: 'text', value: `No gap found for key "${gapKey}".` } + } + return { + type: 'text', + value: `Promoted gap ${gapKey} to status=${updated.status} (draft=${updated.draft?.skillPath ?? 'none'}).`, + } + } + + if (target === 'instinct') { + const instinctId = parts[2] + if (!instinctId) { + return { + type: 'text', + value: 'Usage: /skill-learning promote instinct ', + } + } + const projectInstincts = await loadInstincts(options) + const match = projectInstincts.find(i => i.id === instinctId) + if (!match) { + return { + type: 'text', + value: `No project-scoped instinct found for id "${instinctId}".`, + } + } + if (match.scope === 'global') { + return { + type: 'text', + value: `Instinct ${instinctId} is already global.`, + } + } + const globalCopy = { ...match, scope: 'global' as const } + await saveInstinct(globalCopy, { scope: 'global', rootDir }) + return { + type: 'text', + value: `Promoted instinct ${instinctId} to global scope.`, + } + } + + return { + type: 'text', + value: + 'Usage: /skill-learning promote [gap |instinct ]', + } + } + case 'projects': { + const projects = listKnownProjects() + if (projects.length === 0) { + return { type: 'text', value: 'No known project scopes yet.' } + } + const lines = ['Known project scopes:'] + for (const record of projects) { + const projectOptions = { project: record, rootDir } + const [instincts, observations] = await Promise.all([ + loadInstincts(projectOptions), + readObservations(projectOptions), + ]) + lines.push( + `- ${record.projectName} (${record.projectId}) — instincts: ${instincts.length}, observations: ${observations.length}, lastSeen: ${record.lastSeenAt}`, + ) + } + return { type: 'text', value: lines.join('\n') } + } + default: + return { + type: 'text', + value: + 'Usage: /skill-learning [status|ingest|evolve|export|import|prune|promote|projects]', + } + } +} + +function parseFlagString(parts: string[], flag: string): string | undefined { + const eqForm = parts.find(p => p.startsWith(`${flag}=`)) + if (eqForm) return eqForm.slice(flag.length + 1) || undefined + const idx = parts.indexOf(flag) + if (idx >= 0 && parts[idx + 1] && !parts[idx + 1].startsWith('--')) { + return parts[idx + 1] + } + return undefined +} + +function parseFlagNumber( + parts: string[], + flag: string, + fallback: T, +): number | T { + const raw = parseFlagString(parts, flag) + if (raw === undefined) return fallback + const value = Number(raw) + return Number.isFinite(value) ? value : fallback +} diff --git a/src/commands/skill-learning/skillPanel.tsx b/src/commands/skill-learning/skillPanel.tsx new file mode 100644 index 000000000..70d0379bf --- /dev/null +++ b/src/commands/skill-learning/skillPanel.tsx @@ -0,0 +1,197 @@ +import React, { useMemo, useState } from 'react'; +import { Box, Text, useInput } from '@anthropic/ink'; +import { Dialog } from '@anthropic/ink'; +import { useRegisterOverlay } from '../../context/overlayContext.js'; +import type { LocalJSXCommandOnDone } from '../../types/command.js'; +import { isSkillLearningEnabled } from '../../services/skillLearning/featureCheck.js'; + +type SkillAction = { + label: string; + description: string; + run: () => Promise; +}; + +const ACTION_LABEL_COLUMN_WIDTH = 28; + +const ABOUT_TEXT = `# Skill Learning (自动学习) + +Skill Learning 是一个闭环学习系统,通过观察用户的操作模式自动提取直觉(instinct), +并在达到阈值后生成可复用的 skill 文件、agent 和 command。 + +## 工作流程 +1. **Observe** — 记录每轮对话中的工具调用、用户纠正、错误解决模式 +2. **Analyze** — 使用启发式或 LLM 后端分析观察数据,提取 instinct candidate +3. **Evolve** — 将高置信度 instinct 聚类,生成 skill/agent/command 候选 +4. **Lifecycle** — 对生成的 skill 进行去重、版本比较、归档或替换 + +## 子命令 +- /skill-learning status — 查看当前项目的观察和直觉数量 +- /skill-learning ingest — 从 transcript 导入观察数据 +- /skill-learning evolve — 生成 skill 候选 (--generate 写入磁盘) +- /skill-learning export — 导出 instinct 为 JSON +- /skill-learning import — 导入 instinct JSON +- /skill-learning prune — 清理过期的 pending instinct +- /skill-learning promote — 将 instinct/gap 提升为全局范围 +- /skill-learning projects — 列出所有已知的项目范围 + +## 启用方式 +- SKILL_LEARNING_ENABLED=1 或 FEATURE_SKILL_LEARNING=1 +- 状态: ${isSkillLearningEnabled() ? '已启用' : '未启用'} +`; + +async function getStatusText(): Promise { + const { readObservations, loadInstincts, resolveProjectContext } = await import( + '../../services/skillLearning/index.js' + ); + const project = resolveProjectContext(process.cwd()); + const [observations, instincts] = await Promise.all([readObservations({ project }), loadInstincts({ project })]); + return [ + `Skill Learning status for ${project.projectName} (${project.projectId})`, + `Observations: ${observations.length}`, + `Instincts: ${instincts.length}`, + '', + `Skill Learning: ${isSkillLearningEnabled() ? 'enabled' : 'disabled'}`, + ].join('\n'); +} + +async function startSkillLearning(): Promise { + const lines: string[] = []; + + if (!isSkillLearningEnabled()) { + process.env.SKILL_LEARNING_ENABLED = '1'; + lines.push('Skill Learning: enabled (SKILL_LEARNING_ENABLED=1)'); + } else { + lines.push('Skill Learning: already enabled'); + } + + try { + const { initSkillLearning } = await import('../../services/skillLearning/runtimeObserver.js'); + initSkillLearning(); + lines.push('Runtime observer: initialized'); + } catch { + lines.push('Runtime observer: init skipped (not available)'); + } + + return lines.join('\n'); +} + +async function stopSkillLearning(): Promise { + const lines: string[] = []; + + if (isSkillLearningEnabled()) { + process.env.SKILL_LEARNING_ENABLED = '0'; + process.env.CLAUDE_SKILL_LEARNING_DISABLE = '1'; + lines.push('Skill Learning: disabled (SKILL_LEARNING_ENABLED=0)'); + } else { + lines.push('Skill Learning: already disabled'); + } + + return lines.join('\n'); +} + +function SkillPanel({ onDone }: { onDone: LocalJSXCommandOnDone }): React.ReactNode { + useRegisterOverlay('skill-panel'); + const [selectedIndex, setSelectedIndex] = useState(0); + + const actions = useMemo( + () => [ + { + label: 'Status', + description: 'Show skill learning status for current project', + run: getStatusText, + }, + { + label: 'Start', + description: 'Enable skill learning for this session', + run: startSkillLearning, + }, + { + label: 'Stop', + description: 'Disable skill learning for this session', + run: stopSkillLearning, + }, + { + label: 'About', + description: 'Detailed description of skill learning features', + run: () => Promise.resolve(ABOUT_TEXT), + }, + ], + [], + ); + + const selectCurrent = () => { + const action = actions[selectedIndex]; + if (!action) return; + void action.run().then(result => { + onDone(result, { display: 'system' }); + }); + }; + + useInput((_input, key) => { + if (key.upArrow) { + setSelectedIndex(index => Math.max(0, index - 1)); + return; + } + if (key.downArrow) { + setSelectedIndex(index => Math.min(actions.length - 1, index + 1)); + return; + } + if (key.return) { + selectCurrent(); + } + }); + + return ( + onDone('Skill panel dismissed', { display: 'system' })} + color="background" + hideInputGuide + > + + {actions.map((action, index) => ( + + {`${index === selectedIndex ? '›' : ' '} ${action.label}`.padEnd(ACTION_LABEL_COLUMN_WIDTH)} + {action.description} + + ))} + + ↑/↓ select · Enter run · Esc close + + + + ); +} + +export async function call(onDone: LocalJSXCommandOnDone, _context: unknown, args?: string): Promise { + const trimmed = args?.trim() ?? ''; + + if (trimmed === 'start') { + onDone(await startSkillLearning(), { display: 'system' }); + return null; + } + if (trimmed === 'stop') { + onDone(await stopSkillLearning(), { display: 'system' }); + return null; + } + if (trimmed === 'about') { + onDone(ABOUT_TEXT, { display: 'system' }); + return null; + } + if (trimmed === 'status') { + onDone(await getStatusText(), { display: 'system' }); + return null; + } + + if (trimmed) { + const { call: textCall } = await import('./skill-learning.js'); + const result = await textCall(trimmed, {} as any); + if (result && typeof result === 'object' && 'value' in result) { + onDone((result as { value: string }).value, { display: 'system' }); + } + return null; + } + + return ; +} diff --git a/src/commands/skill-search/index.ts b/src/commands/skill-search/index.ts new file mode 100644 index 000000000..e3c35aea0 --- /dev/null +++ b/src/commands/skill-search/index.ts @@ -0,0 +1,12 @@ +import type { Command } from '../../commands.js' + +const skillSearch = { + type: 'local-jsx', + name: 'skill-search', + description: 'Control automatic skill matching during conversations', + argumentHint: '[start|stop|about|status]', + isHidden: false, + load: () => import('./skillSearchPanel.js'), +} satisfies Command + +export default skillSearch diff --git a/src/commands/skill-search/skillSearchPanel.tsx b/src/commands/skill-search/skillSearchPanel.tsx new file mode 100644 index 000000000..7361e6969 --- /dev/null +++ b/src/commands/skill-search/skillSearchPanel.tsx @@ -0,0 +1,169 @@ +import React, { useMemo, useState } from 'react'; +import { Box, Text, useInput } from '@anthropic/ink'; +import { Dialog } from '@anthropic/ink'; +import { useRegisterOverlay } from '../../context/overlayContext.js'; +import type { LocalJSXCommandOnDone } from '../../types/command.js'; +import { isSkillSearchEnabled } from '../../services/skillSearch/featureCheck.js'; + +type SkillSearchAction = { + label: string; + description: string; + run: () => Promise; +}; + +const ACTION_LABEL_COLUMN_WIDTH = 28; + +const ABOUT_TEXT = `# Skill Search (自动技能匹配) + +Skill Search 控制对话中的自动技能匹配功能。 + +启用后,Claude Code 会在每轮对话中自动搜索并加载与当前任务最相关的 skill 文件, +无需手动指定。搜索基于 TF-IDF 向量余弦相似度,支持英文词干化和 CJK bi-gram 分词。 + +## 工作原理 +1. 对话开始时,自动索引 .claude/skills/ 和 ~/.claude/skills/ 下的 Markdown 文件 +2. 每轮对话根据上下文自动匹配最相关的 skill +3. 匹配到的 skill 内容会作为上下文注入,指导 Claude Code 的行为 + +## 控制方式 +- /skill-search start — 启用自动匹配 +- /skill-search stop — 禁用自动匹配 +- /skill-search status — 查看当前状态 + +当前状态: ${isSkillSearchEnabled() ? '已启用' : '未启用'} +`; + +function getStatusText(): string { + return [ + 'Skill Search (自动技能匹配)', + `Status: ${isSkillSearchEnabled() ? 'enabled' : 'disabled'}`, + '', + 'When enabled, relevant skills are automatically matched and', + 'injected into conversation context each turn.', + ].join('\n'); +} + +async function startSkillSearch(): Promise { + if (isSkillSearchEnabled() && process.env.SKILL_SEARCH_ENABLED !== '0') { + return 'Skill Search: already enabled'; + } + + process.env.SKILL_SEARCH_ENABLED = '1'; + const lines = ['Skill Search: enabled (SKILL_SEARCH_ENABLED=1)']; + + try { + const { clearSkillIndexCache } = await import('../../services/skillSearch/localSearch.js'); + clearSkillIndexCache(); + lines.push('Skill index cache: cleared (will rebuild on next search)'); + } catch { + lines.push('Skill index cache: clear skipped'); + } + + return lines.join('\n'); +} + +async function stopSkillSearch(): Promise { + if (!isSkillSearchEnabled()) { + return 'Skill Search: already disabled'; + } + process.env.SKILL_SEARCH_ENABLED = '0'; + return 'Skill Search: disabled (SKILL_SEARCH_ENABLED=0)'; +} + +function SkillSearchPanel({ onDone }: { onDone: LocalJSXCommandOnDone }): React.ReactNode { + useRegisterOverlay('skill-search-panel'); + const [selectedIndex, setSelectedIndex] = useState(0); + + const actions = useMemo( + () => [ + { + label: 'Status', + description: 'Show whether automatic skill matching is active', + run: () => Promise.resolve(getStatusText()), + }, + { + label: 'Start', + description: 'Enable automatic skill matching for this session', + run: startSkillSearch, + }, + { + label: 'Stop', + description: 'Disable automatic skill matching for this session', + run: stopSkillSearch, + }, + { + label: 'About', + description: 'How automatic skill matching works', + run: () => Promise.resolve(ABOUT_TEXT), + }, + ], + [], + ); + + const selectCurrent = () => { + const action = actions[selectedIndex]; + if (!action) return; + void action.run().then(result => { + onDone(result, { display: 'system' }); + }); + }; + + useInput((_input, key) => { + if (key.upArrow) { + setSelectedIndex(index => Math.max(0, index - 1)); + return; + } + if (key.downArrow) { + setSelectedIndex(index => Math.min(actions.length - 1, index + 1)); + return; + } + if (key.return) { + selectCurrent(); + } + }); + + return ( + onDone('Skill search panel dismissed', { display: 'system' })} + color="background" + hideInputGuide + > + + {actions.map((action, index) => ( + + {`${index === selectedIndex ? '›' : ' '} ${action.label}`.padEnd(ACTION_LABEL_COLUMN_WIDTH)} + {action.description} + + ))} + + ↑/↓ select · Enter run · Esc close + + + + ); +} + +export async function call(onDone: LocalJSXCommandOnDone, _context: unknown, args?: string): Promise { + const trimmed = args?.trim() ?? ''; + + if (trimmed === 'start') { + onDone(await startSkillSearch(), { display: 'system' }); + return null; + } + if (trimmed === 'stop') { + onDone(await stopSkillSearch(), { display: 'system' }); + return null; + } + if (trimmed === 'about') { + onDone(ABOUT_TEXT, { display: 'system' }); + return null; + } + if (trimmed === 'status') { + onDone(getStatusText(), { display: 'system' }); + return null; + } + + return ; +} diff --git a/src/constants/__tests__/promptEngineeringAudit.test.ts b/src/constants/__tests__/promptEngineeringAudit.test.ts new file mode 100644 index 000000000..a8bff30c3 --- /dev/null +++ b/src/constants/__tests__/promptEngineeringAudit.test.ts @@ -0,0 +1,33 @@ +/** + * promptEngineeringAudit.test.ts + * + * Thin subprocess wrapper that runs the real audit in an isolated bun:test + * process. This prevents the 30+ mock.module() calls in the runner from + * leaking into other test files in the same bun test batch. + */ + +import { describe, test, expect } from 'bun:test' +import { resolve, relative } from 'path' + +const PROJECT_ROOT = resolve(__dirname, '..', '..', '..') +const RUNNER_ABS = resolve(__dirname, '..', 'promptEngineeringAudit.runner.ts') +const RUNNER_REL = './' + relative(PROJECT_ROOT, RUNNER_ABS).replace(/\\/g, '/') + +describe('Opus 4.7 Prompt Engineering Audit', () => { + test('runs 64 audit checks in isolated subprocess', async () => { + const proc = Bun.spawn(['bun', 'test', RUNNER_REL], { + cwd: PROJECT_ROOT, + stdout: 'pipe', + stderr: 'pipe', + }) + const code = await proc.exited + if (code !== 0) { + const stderr = await new Response(proc.stderr).text() + const stdout = await new Response(proc.stdout).text() + const output = (stderr + '\n' + stdout).slice(-3000) + throw new Error( + `Prompt audit subprocess failed (exit ${code}):\n${output}`, + ) + } + }, 60_000) +}) diff --git a/src/constants/promptEngineeringAudit.runner.ts b/src/constants/promptEngineeringAudit.runner.ts new file mode 100644 index 000000000..60291f135 --- /dev/null +++ b/src/constants/promptEngineeringAudit.runner.ts @@ -0,0 +1,731 @@ +/** + * promptEngineeringAudit.test.ts + * + * 验证 prompts.ts 中从 Opus 4.7 官方 prompt 借鉴的提示词工程改进。 + * 对应审计文档: docs/features/opus-4.7-prompt-engineering-audit.md + * + * 测试策略: 通过 getSystemPrompt() 生成完整 system prompt, + * 然后检查关键段落是否存在。大部分被测函数是 module-private, + * 只能通过最终输出间接验证。 + */ + +import { describe, test, expect, mock, beforeEach } from 'bun:test' + +// --- MACRO 全局注入 (编译时 define 在测试中不可用) --- +;(globalThis as any).MACRO = { + VERSION: '2.1.888', + BUILD_TIME: '2026-04-22T00:00:00Z', + FEEDBACK_CHANNEL: '', + ISSUES_EXPLAINER: 'report issues on GitHub', + NATIVE_PACKAGE_URL: '', + PACKAGE_URL: '', + VERSION_CHANGELOG: '', +} + +// --- Mock 链 (阻断副作用) --- + +mock.module('src/bootstrap/state.js', () => ({ + getIsNonInteractiveSession: () => false, + sessionId: 'test-session', + getCwd: () => '/test/project', +})) +mock.module('src/utils/cwd.js', () => ({ + getCwd: () => '/test/project', +})) +mock.module('src/utils/git.js', () => ({ + getIsGit: async () => true, +})) +mock.module('src/utils/worktree.js', () => ({ + getCurrentWorktreeSession: () => null, +})) +mock.module('src/constants/common.js', () => ({ + getSessionStartDate: () => '2026-04-22', +})) +mock.module('src/utils/settings/settings.js', () => ({ + getInitialSettings: () => ({ language: undefined }), +})) +mock.module('src/commands/poor/poorMode.js', () => ({ + isPoorModeActive: () => false, +})) +mock.module('src/utils/env.js', () => ({ + env: { platform: 'linux' }, +})) +mock.module('src/utils/envUtils.js', () => ({ + isEnvTruthy: () => false, +})) +mock.module('src/utils/model/model.js', () => ({ + getCanonicalName: (id: string) => id, + getMarketingNameForModel: (id: string) => { + if (id.includes('opus-4-7')) return 'Claude Opus 4.7' + if (id.includes('opus-4-6')) return 'Claude Opus 4.6' + if (id.includes('sonnet-4-6')) return 'Claude Sonnet 4.6' + return null + }, +})) +mock.module('src/commands.js', () => ({ + getSkillToolCommands: async () => [], +})) +mock.module('src/constants/outputStyles.js', () => ({ + getOutputStyleConfig: async () => null, +})) +mock.module('src/utils/embeddedTools.js', () => ({ + hasEmbeddedSearchTools: () => false, +})) +mock.module('src/utils/permissions/filesystem.js', () => ({ + isScratchpadEnabled: () => false, + getScratchpadDir: () => '/tmp/scratchpad', +})) +mock.module('src/utils/betas.js', () => ({ + shouldUseGlobalCacheScope: () => false, +})) +mock.module('src/utils/undercover.js', () => ({ + isUndercover: () => false, +})) +mock.module('src/utils/model/antModels.js', () => ({ + getAntModelOverrideConfig: () => null, +})) +mock.module('src/utils/mcpInstructionsDelta.js', () => ({ + isMcpInstructionsDeltaEnabled: () => false, +})) +mock.module('src/memdir/memdir.js', () => ({ + loadMemoryPrompt: async () => null, +})) +mock.module('src/utils/debug.js', () => ({ + logForDebugging: () => {}, +})) +mock.module('src/services/analytics/growthbook.js', () => ({ + getFeatureValue_CACHED_MAY_BE_STALE: () => false, +})) +mock.module('bun:bundle', () => ({ + feature: (_name: string) => false, +})) +mock.module('src/constants/systemPromptSections.js', () => ({ + systemPromptSection: (_name: string, fn: () => any) => fn(), + DANGEROUS_uncachedSystemPromptSection: (_name: string, fn: () => any) => fn(), + resolveSystemPromptSections: async (sections: any[]) => + sections.filter(s => s !== null), +})) + +// 工具常量 mock +const TOOL_NAMES = { + Bash: 'Bash', + Read: 'Read', + Edit: 'Edit', + Write: 'Write', + Glob: 'Glob', + Grep: 'Grep', + Agent: 'Agent', + AskUserQuestion: 'AskUserQuestion', + TaskCreate: 'TaskCreate', + DiscoverSkills: 'DiscoverSkills', + Skill: 'Skill', + Sleep: 'Sleep', +} + +mock.module( + '@claude-code-best/builtin-tools/tools/BashTool/toolName.js', + () => ({ BASH_TOOL_NAME: TOOL_NAMES.Bash }), +) +mock.module( + '@claude-code-best/builtin-tools/tools/FileReadTool/prompt.js', + () => ({ FILE_READ_TOOL_NAME: TOOL_NAMES.Read }), +) +mock.module( + '@claude-code-best/builtin-tools/tools/FileEditTool/constants.js', + () => ({ FILE_EDIT_TOOL_NAME: TOOL_NAMES.Edit }), +) +mock.module( + '@claude-code-best/builtin-tools/tools/FileWriteTool/prompt.js', + () => ({ FILE_WRITE_TOOL_NAME: TOOL_NAMES.Write }), +) +mock.module('@claude-code-best/builtin-tools/tools/GlobTool/prompt.js', () => ({ + GLOB_TOOL_NAME: TOOL_NAMES.Glob, +})) +mock.module('@claude-code-best/builtin-tools/tools/GrepTool/prompt.js', () => ({ + GREP_TOOL_NAME: TOOL_NAMES.Grep, +})) +mock.module( + '@claude-code-best/builtin-tools/tools/AgentTool/constants.js', + () => ({ + AGENT_TOOL_NAME: TOOL_NAMES.Agent, + VERIFICATION_AGENT_TYPE: 'verification', + }), +) +mock.module( + '@claude-code-best/builtin-tools/tools/AgentTool/forkSubagent.js', + () => ({ isForkSubagentEnabled: () => false }), +) +mock.module( + '@claude-code-best/builtin-tools/tools/AgentTool/builtInAgents.js', + () => ({ areExplorePlanAgentsEnabled: () => false }), +) +mock.module( + '@claude-code-best/builtin-tools/tools/AgentTool/built-in/exploreAgent.js', + () => ({ + EXPLORE_AGENT: { agentType: 'explore' }, + EXPLORE_AGENT_MIN_QUERIES: 5, + }), +) +mock.module( + '@claude-code-best/builtin-tools/tools/AskUserQuestionTool/prompt.js', + () => ({ ASK_USER_QUESTION_TOOL_NAME: TOOL_NAMES.AskUserQuestion }), +) +mock.module( + '@claude-code-best/builtin-tools/tools/TodoWriteTool/constants.js', + () => ({ TODO_WRITE_TOOL_NAME: 'TodoWrite' }), +) +mock.module( + '@claude-code-best/builtin-tools/tools/TaskCreateTool/constants.js', + () => ({ TASK_CREATE_TOOL_NAME: TOOL_NAMES.TaskCreate }), +) +mock.module( + '@claude-code-best/builtin-tools/tools/DiscoverSkillsTool/prompt.js', + () => ({ DISCOVER_SKILLS_TOOL_NAME: TOOL_NAMES.DiscoverSkills }), +) +mock.module( + '@claude-code-best/builtin-tools/tools/SkillTool/constants.js', + () => ({ SKILL_TOOL_NAME: TOOL_NAMES.Skill }), +) +mock.module( + '@claude-code-best/builtin-tools/tools/SleepTool/prompt.js', + () => ({ SLEEP_TOOL_NAME: TOOL_NAMES.Sleep }), +) +mock.module( + '@claude-code-best/builtin-tools/tools/REPLTool/constants.js', + () => ({ isReplModeEnabled: () => false }), +) + +// --- 导入被测模块 --- + +import { + getSystemPrompt, + prependBullets, + computeSimpleEnvInfo, + getScratchpadInstructions, +} from './prompts.js' +import type { Tools } from '../Tool.js' + +// --- 辅助 --- + +const standardTools: Tools = [ + { name: 'Bash' }, + { name: 'Read' }, + { name: 'Edit' }, + { name: 'Write' }, + { name: 'Glob' }, + { name: 'Grep' }, + { name: 'Agent' }, + { name: 'AskUserQuestion' }, + { name: 'TaskCreate' }, +] as any + +async function getFullPrompt( + tools: Tools = standardTools, + model = 'claude-opus-4-7', +): Promise { + const sections = await getSystemPrompt(tools, model) + return sections.join('\n\n') +} + +// ===================================================================== +// 第一部分: 提示词工程技巧验证 +// 对应审计文档 第一部分 #1-#10 +// ===================================================================== + +describe('Opus 4.7 Prompt Engineering Audit', () => { + // ------------------------------------------------------------------ + // #1 决策树结构 (Decision Tree) + // TXT 来源: {request_evaluation_checklist} — Step 0→1→2→3 + // ------------------------------------------------------------------ + describe('#1 Decision tree for tool selection', () => { + test('prompt contains step-based tool selection guidance', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('Step 0') + expect(prompt).toContain('Step 1') + expect(prompt).toContain('Step 2') + expect(prompt).toContain('Step 3') + }) + + test('decision tree has "stop at the first match" semantics', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('stop at the first match') + }) + + test('Step 0 teaches when NOT to use tools', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('Step 0') + expect(prompt).toContain('answer directly, no tool call') + }) + + test('Step 1 prioritizes dedicated tools over Bash', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('Step 1') + expect(prompt).toContain('dedicated tool') + }) + }) + + // ------------------------------------------------------------------ + // #2 反模式先行 (Anti-Pattern First) + // TXT 来源: {unnecessary_computer_use_avoidance}, {artifact_usage_criteria} + // ------------------------------------------------------------------ + describe('#2 Anti-pattern guidance (when NOT to use tools)', () => { + test('prompt says when NOT to use tools', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('Do NOT use') + }) + + test('includes explicit "Do not use tools when" section', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('Do not use tools when') + }) + + test('anti-pattern covers knowledge questions', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain( + 'programming concepts, syntax, or design patterns', + ) + }) + + test('anti-pattern covers content already in context', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('already visible in context') + }) + + test('includes file creation anti-pattern', async () => { + const prompt = await getFullPrompt() + const hasFileAntiPattern = + prompt.includes('Do not create files unless') || + prompt.includes('prefer editing an existing file') + expect(hasFileAntiPattern).toBe(true) + }) + }) + + // ------------------------------------------------------------------ + // #6 渐进式回退链 (Progressive Fallback Chain) + // TXT 来源: {core_search_behaviors}, {past_chats_tools} + // ------------------------------------------------------------------ + describe('#6 Progressive fallback chain', () => { + test('Grep/Glob fallback chain exists', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('fallback chain') + }) + + test('fallback includes broader pattern as first retry', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('Broader pattern') + }) + + test('fallback includes alternate naming conventions', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('camelCase vs snake_case') + }) + + test('fallback ends with asking user after exhaustion', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('ask for guidance') + }) + }) + + // ------------------------------------------------------------------ + // #3 Few-Shot 场景示例 (Few-Shot Examples) + // TXT 来源: {examples}, {visualizer_examples}, {past_chats_tools} + // ------------------------------------------------------------------ + describe('#3 Few-shot examples', () => { + test('contains tool selection examples with arrow notation', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('→') + expect(prompt).toContain('Tool selection examples') + }) + + test('has multiple concrete Request→Action pairs (>=5)', async () => { + const prompt = await getFullPrompt() + const arrowCount = (prompt.match(/[""].+?[""] → /g) || []).length + expect(arrowCount).toBeGreaterThanOrEqual(5) + }) + + test('examples cover different tool types', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('Glob("**/*.tsx")') + expect(prompt).toContain('Bash("bun test")') + expect(prompt).toContain('Grep("TODO")') + expect(prompt).toContain('answer directly') + }) + + test('examples include negative cases (what NOT to use)', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('not Bash find') + expect(prompt).toContain('not Bash sed') + }) + }) + + // ------------------------------------------------------------------ + // #4 语言信号识别 (Linguistic Signal Detection) + // TXT 来源: {past_chats_tools}, {file_creation_advice} + // ------------------------------------------------------------------ + describe('#4 Linguistic signal detection', () => { + test('file creation signals teach when to create vs inline', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('Linguistic signals') + expect(prompt).toContain('write a script') + expect(prompt).toContain('create a config') + }) + + test('inline answer signals are listed', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('show me how') + expect(prompt).toContain('answer inline') + }) + + test('20-line threshold for file creation', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('20 lines') + }) + }) + + // ------------------------------------------------------------------ + // #5 成本不对称分析 (Asymmetric Cost Analysis) + // TXT 来源: {tool_discovery} "treat tool_search as essentially free" + // ------------------------------------------------------------------ + describe('#5 Cost asymmetry framing', () => { + test('prompt has cost asymmetry for actions (existing)', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('cost of pausing to confirm is low') + }) + + test('frames search tools as cheap', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('cheap operations') + }) + + test('expanded cost asymmetry with multiple scenarios', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('Cost asymmetry principle') + expect(prompt).toContain('costs user trust') + expect(prompt).toContain('breaks their flow') + }) + }) + + // ------------------------------------------------------------------ + // #7 反过度解释 (Anti-Over-Explanation) + // TXT 来源: {sharing_files}, {request_evaluation_checklist} + // ------------------------------------------------------------------ + describe('#7 Anti-over-explanation', () => { + test('prompt contains no-machinery-narration rule (existing)', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain("Don't narrate internal machinery") + }) + + test('includes anti-postamble guidance', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('Do not restate') + expect(prompt).toContain('the user can read the diff') + }) + + test('discourages offering unchosen approach', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('unchosen approach') + }) + }) + + // ------------------------------------------------------------------ + // #8 查询构造教学 (Query Construction Teaching) + // TXT 来源: {search_usage_guidelines}, {past_chats_tools} + // ------------------------------------------------------------------ + describe('#8 Query construction guidance', () => { + test('includes Grep query construction advice', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('query construction') + expect(prompt).toContain('content words') + }) + + test('Grep guidance teaches content words vs meta-descriptions', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('authenticate|login|signIn') + expect(prompt).toContain('not "auth handling code"') + }) + + test('Grep guidance teaches pipe alternation for naming variants', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('userId|user_id|userID') + }) + + test('includes Glob query construction advice', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('Glob query construction') + expect(prompt).toContain('**/*Auth*.ts') + }) + + test('Glob guidance teaches narrowing by extension', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('**/*.test.ts') + }) + }) + + // ------------------------------------------------------------------ + // #9 Prompt 注入防御 (Prompt Injection Defense) + // TXT 来源: {anthropic_reminders}, {request_evaluation_checklist} + // ------------------------------------------------------------------ + describe('#9 Prompt injection defense', () => { + test('prompt warns about prompt injection in tool results (existing)', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('prompt injection') + }) + + test('distinguishes file instructions from user instructions', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('not from the user') + }) + }) + + // ===================================================================== + // 第二部分: 行为规则验证 + // 对应审计文档 第二部分 #11-#18 + // ===================================================================== + + // ------------------------------------------------------------------ + // #11 格式化纪律 (Formatting Discipline) + // TXT 来源: {lists_and_bullets} + // ------------------------------------------------------------------ + // ------------------------------------------------------------------ + // #10 分步搜索策略 (Multi-Step Search Strategy) + // TXT 来源: {tool_discovery}, {core_search_behaviors} + // ------------------------------------------------------------------ + describe('#10 Multi-step search strategy', () => { + test('scales search effort to task complexity', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('Scale search effort to task complexity') + }) + + test('gives concrete complexity tiers', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('Single file fix') + expect(prompt).toContain('Cross-cutting change') + expect(prompt).toContain('Architecture investigation') + }) + }) + + describe('#11 Formatting discipline', () => { + test('prompt contains prose-first guidance (existing)', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('direct answer in prose') + }) + + test('discourages over-formatting', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('over-formatting') + expect(prompt).toContain('natural language') + }) + + test('bullet points must be 1-2 sentences, not fragments', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('1-2 sentences') + expect(prompt).toContain('not sentence fragments') + }) + }) + + // ------------------------------------------------------------------ + // #22 先搜再说不知道 (Search Before Saying Unknown) + // TXT 来源: {tool_discovery} + // ------------------------------------------------------------------ + describe('#22 Search before saying unknown', () => { + test('instructs to search before claiming something does not exist', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('Search first, report results second') + }) + + test('explicitly says do not say "I don\'t see that file"', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain("don't see that file") + }) + }) + + // ------------------------------------------------------------------ + // #12 温暖语气 (Warm Tone) + // TXT 来源: {tone_and_formatting} + // ------------------------------------------------------------------ + describe('#12 Warm tone', () => { + test('avoids negative assumptions about user abilities', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('negative assumptions') + }) + + test('pushback should be constructive', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('constructively') + }) + }) + + // ------------------------------------------------------------------ + // #20 风险感知时说得更少 (Say Less When Risky) + // TXT 来源: {refusal_handling} + // ------------------------------------------------------------------ + describe('#20 Say less when risky', () => { + test('security-sensitive code should say less about details', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('saying less about implementation details') + }) + }) + + // ------------------------------------------------------------------ + // #23 不解释为什么搜索 (Don't Justify Search) + // TXT 来源: {search_usage_guidelines} + // ------------------------------------------------------------------ + describe("#23 Don't justify search", () => { + test('instructs not to justify why searching', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain("Don't justify why you're searching") + }) + }) + + // ------------------------------------------------------------------ + // #13 产品线信息 (Product Information) + // TXT 来源: {product_information} + // ------------------------------------------------------------------ + describe('#13 Product information', () => { + test('env info contains Claude Code product description', async () => { + const envInfo = await computeSimpleEnvInfo('claude-opus-4-7') + expect(envInfo).toContain('Claude Code') + expect(envInfo).toContain('CLI') + }) + + test('env info contains model family', async () => { + const envInfo = await computeSimpleEnvInfo('claude-opus-4-7') + expect(envInfo).toContain('Claude 4.5/4.6/4.7') + }) + + test('env info contains correct model IDs', async () => { + const envInfo = await computeSimpleEnvInfo('claude-opus-4-7') + expect(envInfo).toContain('claude-opus-4-7') + expect(envInfo).toContain('claude-sonnet-4-6') + expect(envInfo).toContain('claude-haiku-4-5') + }) + + test('mentions Chrome/Excel/Cowork products', async () => { + const envInfo = await computeSimpleEnvInfo('claude-opus-4-7') + expect(envInfo).toContain('Chrome') + expect(envInfo).toContain('Excel') + expect(envInfo).toContain('Cowork') + }) + }) + + // ------------------------------------------------------------------ + // #15 对话结束尊重 (Conversation End Respect) + // TXT 来源: {refusal_handling} line 51 + // ------------------------------------------------------------------ + describe('#15 Conversation end respect', () => { + test('discourages "anything else?" appendages', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('the user will ask if they need more') + }) + }) + + // ------------------------------------------------------------------ + // #16 每回复最多一个问题 (One Question Per Response) + // TXT 来源: {tone_and_formatting} line 71 + // ------------------------------------------------------------------ + describe('#16 One question per response', () => { + test('limits questions per response', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('one question per response') + }) + }) + + // ===================================================================== + // 第三部分: 已存在功能的回归测试 + // 确保现有的从 TXT 对齐的锚点不被破坏 + // ===================================================================== + + describe('Existing behavioral anchors (regression)', () => { + test('default_stance: default to helping', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('Default to helping') + expect(prompt).toContain('concrete, specific risk of serious harm') + }) + + test('anti-collapse: no self-abasement', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('self-abasement') + expect(prompt).toContain('maintain self-respect') + }) + + test('cutoff silence: do not proactively mention cutoff', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain( + "Don't proactively mention your knowledge cutoff", + ) + }) + + test('no-machinery-narration: describe in user terms', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain("Don't narrate internal machinery") + expect(prompt).toContain('Describe the action in user terms') + }) + + test('tool_discovery: search before saying unavailable', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('visible tool list is partial by design') + expect(prompt).toContain( + 'Only state something is unavailable after the search returns no match', + ) + }) + + test('false-claims mitigation: report outcomes faithfully', async () => { + const prompt = await getFullPrompt() + expect(prompt).toContain('Report outcomes faithfully') + }) + + test('CYBER_RISK_INSTRUCTION: allows security testing', async () => { + const prompt = await getFullPrompt() + // TS 允许安全测试 (TXT 完全禁止 — 这是有意的差异) + expect(prompt).not.toContain( + 'does not write or explain or work on malicious code', + ) + }) + }) + + // ===================================================================== + // 第四部分: prependBullets 工具函数 + // ===================================================================== + + describe('prependBullets utility', () => { + test('flat items get single bullet', () => { + const result = prependBullets(['A', 'B']) + expect(result).toEqual([' - A', ' - B']) + }) + + test('nested arrays get double-indented bullets', () => { + const result = prependBullets(['A', ['sub1', 'sub2'], 'B']) + expect(result).toEqual([' - A', ' - sub1', ' - sub2', ' - B']) + }) + + test('empty array returns empty', () => { + expect(prependBullets([])).toEqual([]) + }) + }) + + // ===================================================================== + // 第五部分: 环境信息与模型 cutoff + // ===================================================================== + + describe('Knowledge cutoff correctness', () => { + test('Opus 4.7 cutoff is January 2026', async () => { + const envInfo = await computeSimpleEnvInfo('claude-opus-4-7') + expect(envInfo).toContain('January 2026') + }) + + test('Opus 4.6 cutoff is May 2025', async () => { + const envInfo = await computeSimpleEnvInfo('claude-opus-4-6') + expect(envInfo).toContain('May 2025') + }) + + test('Sonnet 4.6 cutoff is August 2025', async () => { + const envInfo = await computeSimpleEnvInfo('claude-sonnet-4-6') + expect(envInfo).toContain('August 2025') + }) + + test('Opus 4.7 frontier model name is correct', async () => { + const envInfo = await computeSimpleEnvInfo('claude-opus-4-7') + expect(envInfo).toContain('Claude Opus 4.7') + }) + }) +}) diff --git a/src/entrypoints/init.ts b/src/entrypoints/init.ts index 3e0c33933..c3125b8b7 100644 --- a/src/entrypoints/init.ts +++ b/src/entrypoints/init.ts @@ -108,6 +108,12 @@ export const init = memoize(async (): Promise => { }) profileCheckpoint('init_after_1p_event_logging') + // Start balance polling (no-op unless a provider is configured via env). + void import('../services/providerUsage/balance/poller.js').then(m => + m.startBalancePolling(), + ) + profileCheckpoint('init_after_balance_polling') + // Populate OAuth account info if it is not already cached in config. This is needed since the // OAuth account info may not be populated when logging in through the VSCode extension. void populateOAuthAccountInfoIfNeeded() diff --git a/src/services/skillLearning/__tests__/evolution.test.ts b/src/services/skillLearning/__tests__/evolution.test.ts new file mode 100644 index 000000000..4fece3248 --- /dev/null +++ b/src/services/skillLearning/__tests__/evolution.test.ts @@ -0,0 +1,152 @@ +import { afterEach, beforeEach, describe, expect, test } from 'bun:test' +import { mkdtempSync, rmSync } from 'node:fs' +import { tmpdir } from 'node:os' +import { join } from 'node:path' +import { createInstinct } from '../instinctParser.js' +import { + classifyEvolutionTarget, + clusterInstincts, + generateAgentCandidates, + generateCommandCandidates, + generateSkillCandidates, +} from '../evolution.js' + +describe('evolution', () => { + test('clusters related instincts by trigger and domain', () => { + const instincts = [ + createInstinct({ + trigger: 'when writing tests', + action: 'use testing-library', + confidence: 0.7, + domain: 'testing', + source: 'session-observation', + scope: 'project', + evidence: ['one'], + }), + createInstinct({ + trigger: 'when writing tests', + action: 'avoid implementation mocks', + confidence: 0.8, + domain: 'testing', + source: 'session-observation', + scope: 'project', + evidence: ['two'], + }), + createInstinct({ + trigger: 'when writing tests', + action: 'prefer describe/test structure', + confidence: 0.75, + domain: 'testing', + source: 'session-observation', + scope: 'project', + evidence: ['three'], + }), + ] + + const clusters = clusterInstincts(instincts) + expect(clusters).toHaveLength(1) + expect(clusters[0]?.averageConfidence).toBe(0.75) + }) + + test('classifies explicit user-invoked workflows as command candidates', () => { + expect( + classifyEvolutionTarget([ + createInstinct({ + trigger: 'when user asks to create migration', + action: 'run command steps', + confidence: 0.8, + domain: 'workflow', + source: 'session-observation', + scope: 'project', + evidence: ['one'], + }), + ]), + ).toBe('command') + }) + + test('generates skill candidates for high-confidence skill clusters', () => { + // Cluster-size floor (>=3) is non-negotiable post-H15 fix: a single + // high-confidence instinct must not become a persistent skill. Three + // independent observations are required to promote. + const instincts = [ + createInstinct({ + trigger: 'when writing tests', + action: 'use testing-library', + confidence: 0.8, + domain: 'testing', + source: 'session-observation', + scope: 'project', + evidence: ['one'], + }), + createInstinct({ + trigger: 'when writing tests', + action: 'avoid implementation mocks', + confidence: 0.8, + domain: 'testing', + source: 'session-observation', + scope: 'project', + evidence: ['two'], + }), + createInstinct({ + trigger: 'when writing tests', + action: 'prefer describe/test structure', + confidence: 0.8, + domain: 'testing', + source: 'session-observation', + scope: 'project', + evidence: ['three'], + }), + ] + + expect(generateSkillCandidates(instincts)).toHaveLength(1) + }) + + describe('three-path generation', () => { + let tmp: string + beforeEach(() => { + tmp = mkdtempSync(join(tmpdir(), 'skill-learning-evolve-')) + }) + afterEach(() => { + rmSync(tmp, { recursive: true, force: true }) + }) + + test('command-triggered instincts produce command candidates, not skill candidates', () => { + // Need >=3 instincts to satisfy the cluster-size floor post-H15. + const instincts = Array.from({ length: 3 }, (_, i) => + createInstinct({ + trigger: 'when user asks to create migration', + action: 'run command: pnpm run migration', + confidence: 0.85, + domain: 'workflow', + source: 'session-observation', + scope: 'project', + evidence: [`user invocation ${i}`], + }), + ) + + const commands = generateCommandCandidates(instincts, { cwd: tmp }) + const skills = generateSkillCandidates(instincts, { cwd: tmp }) + expect(commands).toHaveLength(1) + expect(skills).toHaveLength(0) + expect(commands[0]?.content).toContain('/') + }) + + test('four debug multi-step instincts cluster into an agent candidate', () => { + const instincts = Array.from({ length: 4 }, (_, i) => + createInstinct({ + trigger: 'when debugging multi-step regressions', + action: 'investigate stack trace, reproduce locally, and add test', + confidence: 0.82, + domain: 'debugging', + source: 'session-observation', + scope: 'project', + evidence: [`incident-${i}`], + }), + ) + + const agents = generateAgentCandidates(instincts, { cwd: tmp }) + expect(agents).toHaveLength(1) + expect(agents[0]?.content).toContain('Playbook') + }) + }) +}) diff --git a/src/services/skillLearning/__tests__/instinctStore.test.ts b/src/services/skillLearning/__tests__/instinctStore.test.ts new file mode 100644 index 000000000..bb81f31e0 --- /dev/null +++ b/src/services/skillLearning/__tests__/instinctStore.test.ts @@ -0,0 +1,143 @@ +import { afterEach, beforeEach, describe, expect, test } from 'bun:test' +import { mkdtempSync, rmSync } from 'node:fs' +import { tmpdir } from 'node:os' +import { join } from 'node:path' +import { + loadInstincts, + prunePendingInstincts, + saveInstinct, + upsertInstinct, +} from '../instinctStore.js' +import { createInstinct } from '../instinctParser.js' + +let rootDir: string + +beforeEach(() => { + rootDir = mkdtempSync(join(tmpdir(), 'skill-learning-instinct-')) +}) + +afterEach(() => { + rmSync(rootDir, { recursive: true, force: true }) +}) + +describe('instinctStore', () => { + test('saves and loads instincts', async () => { + await saveInstinct( + createInstinct({ + trigger: 'when testing', + action: 'use testing-library', + confidence: 0.7, + domain: 'testing', + source: 'session-observation', + scope: 'project', + evidence: ['user correction'], + }), + { rootDir, project: projectContext() }, + ) + + const instincts = await loadInstincts({ + rootDir, + project: projectContext(), + }) + expect(instincts).toHaveLength(1) + expect(instincts[0]?.action).toContain('testing-library') + }) + + test('upsert increases confidence for confirming instincts', async () => { + const first = createInstinct({ + id: 'test-instinct', + trigger: 'when testing', + action: 'prefer testing-library', + confidence: 0.7, + domain: 'testing', + source: 'session-observation', + scope: 'project', + evidence: ['one'], + }) + await upsertInstinct(first, { rootDir, project: projectContext() }) + const second = { ...first, evidence: ['two'] } + const updated = await upsertInstinct(second, { + rootDir, + project: projectContext(), + }) + + expect(updated.confidence).toBeGreaterThan(first.confidence) + expect(updated.evidence).toContain('one') + expect(updated.evidence).toContain('two') + }) + + test('outcome-aware upsert: failure evidence reduces confidence', async () => { + const first = createInstinct({ + id: 'outcome-aware', + trigger: 'when writing tests', + action: 'use testing-library', + confidence: 0.7, + domain: 'testing', + source: 'session-observation', + scope: 'project', + evidence: ['one'], + evidenceOutcome: 'success', + }) + const afterSuccess = await upsertInstinct(first, { + rootDir, + project: projectContext(), + }) + await upsertInstinct(first, { rootDir, project: projectContext() }) + const afterAnotherSuccess = ( + await loadInstincts({ rootDir, project: projectContext() }) + ).find(i => i.id === 'outcome-aware')! + + const failure = { + ...first, + evidence: ['two'], + evidenceOutcome: 'failure' as const, + } + const afterFailure = await upsertInstinct(failure, { + rootDir, + project: projectContext(), + }) + + expect(afterSuccess.confidence).toBe(0.7) + expect(afterAnotherSuccess.confidence).toBeGreaterThan( + afterSuccess.confidence, + ) + expect(afterFailure.confidence).toBeLessThan(afterAnotherSuccess.confidence) + }) + + test('prunes old pending instincts', async () => { + const old = createInstinct( + { + id: 'old-instinct', + trigger: 'old', + action: 'old', + confidence: 0.3, + domain: 'project', + source: 'session-observation', + scope: 'project', + evidence: ['old'], + }, + '2020-01-01T00:00:00.000Z', + ) + await saveInstinct(old, { rootDir, project: projectContext() }) + + const pruned = await prunePendingInstincts(30, { + rootDir, + project: projectContext(), + }) + expect(pruned.map(instinct => instinct.id)).toContain('old-instinct') + expect(await loadInstincts({ rootDir, project: projectContext() })).toEqual( + [], + ) + }) +}) + +function projectContext() { + return { + projectId: 'p1', + projectName: 'project', + cwd: rootDir, + scope: 'project' as const, + source: 'global' as const, + storageDir: join(rootDir, 'projects', 'p1'), + } +} diff --git a/src/services/skillLearning/__tests__/learningPolicy.test.ts b/src/services/skillLearning/__tests__/learningPolicy.test.ts new file mode 100644 index 000000000..d815d7780 --- /dev/null +++ b/src/services/skillLearning/__tests__/learningPolicy.test.ts @@ -0,0 +1,81 @@ +import { describe, expect, test } from 'bun:test' +import { createInstinct } from '../instinctParser.js' +import { + buildLearnedSkillName, + decideDefaultScope, + isGenericSkillName, + isValidLearnedSkillName, + normalizeSkillName, + shouldGenerateSkillFromInstincts, +} from '../learningPolicy.js' + +describe('learningPolicy', () => { + test('normalizes learned skill names to lowercase kebab-case with length cap', () => { + const name = normalizeSkillName('Testing React Testing Library!!!') + + expect(name).toBe('testing-react-testing-library') + expect(name.length).toBeLessThanOrEqual(64) + }) + + test('rejects generic learned skill names', () => { + expect(isGenericSkillName('learned-skill')).toBe(true) + expect(isValidLearnedSkillName('learned-skill')).toBe(false) + }) + + test('builds domain-prefixed names from instincts', () => { + const instinct = createInstinct({ + trigger: 'when writing React tests', + action: 'use testing-library and avoid implementation mocks', + confidence: 0.85, + domain: 'testing', + source: 'session-observation', + scope: 'project', + evidence: ['user correction'], + }) + + const name = buildLearnedSkillName([instinct]) + + expect(name.startsWith('testing-')).toBe(true) + expect(isValidLearnedSkillName(name)).toBe(true) + }) + + test('uses confidence threshold before generating skills', () => { + const low = createInstinct({ + trigger: 'when testing', + action: 'try a tentative pattern', + confidence: 0.3, + domain: 'testing', + source: 'session-observation', + scope: 'project', + evidence: ['weak signal'], + }) + const high = { ...low, confidence: 0.8 } + + expect(shouldGenerateSkillFromInstincts([low])).toBe(false) + expect(shouldGenerateSkillFromInstincts([high])).toBe(true) + }) + + test('promotes only global-friendly repeated instinct groups by default', () => { + const workflow = createInstinct({ + trigger: 'when modifying code', + action: 'Grep then Read then Edit', + confidence: 0.8, + domain: 'workflow', + source: 'session-observation', + scope: 'project', + evidence: ['repeated workflow'], + }) + const testing = createInstinct({ + trigger: 'when writing React tests', + action: 'use testing-library', + confidence: 0.8, + domain: 'testing', + source: 'session-observation', + scope: 'project', + evidence: ['project convention'], + }) + + expect(decideDefaultScope([workflow, workflow])).toBe('global') + expect(decideDefaultScope([testing])).toBe('project') + }) +}) diff --git a/src/services/skillLearning/__tests__/observationStore.test.ts b/src/services/skillLearning/__tests__/observationStore.test.ts new file mode 100644 index 000000000..eeef0b032 --- /dev/null +++ b/src/services/skillLearning/__tests__/observationStore.test.ts @@ -0,0 +1,108 @@ +import { afterEach, beforeEach, describe, expect, test } from 'bun:test' +import { mkdtempSync, rmSync, writeFileSync } from 'node:fs' +import { tmpdir } from 'node:os' +import { join } from 'node:path' +import { + appendObservation, + ingestTranscript, + readObservations, + scrubText, +} from '../observationStore.js' + +let rootDir: string + +beforeEach(() => { + rootDir = mkdtempSync(join(tmpdir(), 'skill-learning-observation-')) +}) + +afterEach(() => { + rmSync(rootDir, { recursive: true, force: true }) +}) + +describe('observationStore', () => { + test('scrubs secrets and truncates large fields', () => { + const scrubbed = scrubText('api_key: sk-ant-1234567890abcdef extra', 80) + expect(scrubbed).toContain('[REDACTED]') + + const truncated = scrubText( + `api_key: sk-ant-1234567890abcdef ${'x'.repeat(120)}`, + 40, + ) + expect(truncated).toContain('[REDACTED]') + expect(truncated).toContain('[TRUNCATED') + }) + + test('appends and reads project observations', async () => { + await appendObservation( + { + id: 'obs-1', + timestamp: '2026-04-16T00:00:00.000Z', + event: 'user_message', + sessionId: 's1', + projectId: 'p1', + projectName: 'project', + cwd: rootDir, + messageText: '不要 mock,用 testing-library', + }, + { + rootDir, + project: projectContext(), + }, + ) + + const observations = await readObservations({ + rootDir, + project: projectContext(), + }) + expect(observations).toHaveLength(1) + expect(observations[0]?.messageText).toContain('testing-library') + }) + + test('ingests Claude transcript JSONL into observations', async () => { + const transcript = join(rootDir, 'session.jsonl') + writeFileSync( + transcript, + [ + JSON.stringify({ + type: 'user', + sessionId: 's1', + cwd: rootDir, + timestamp: '2026-04-16T00:00:00.000Z', + message: { role: 'user', content: '不要 mock,用 testing-library' }, + }), + JSON.stringify({ + type: 'assistant', + sessionId: 's1', + cwd: rootDir, + timestamp: '2026-04-16T00:00:01.000Z', + message: { + role: 'assistant', + content: [ + { type: 'tool_use', name: 'Grep', input: { pattern: 'x' } }, + ], + }, + }), + ].join('\n'), + ) + + const observations = await ingestTranscript(transcript, { + rootDir, + project: projectContext(), + }) + + expect(observations.length).toBeGreaterThanOrEqual(2) + expect(observations.map(o => o.event)).toContain('user_message') + expect(observations.map(o => o.event)).toContain('tool_start') + }) +}) + +function projectContext() { + return { + projectId: 'p1', + projectName: 'project', + cwd: rootDir, + scope: 'project' as const, + source: 'global' as const, + storageDir: join(rootDir, 'projects', 'p1'), + } +} diff --git a/src/services/skillLearning/__tests__/observerBackend.test.ts b/src/services/skillLearning/__tests__/observerBackend.test.ts new file mode 100644 index 000000000..a028201a8 --- /dev/null +++ b/src/services/skillLearning/__tests__/observerBackend.test.ts @@ -0,0 +1,135 @@ +import { afterEach, beforeEach, describe, expect, test } from 'bun:test' +import { + getActiveObserverBackend, + listObserverBackends, + registerObserverBackend, + resolveDefaultObserverBackend, + setActiveObserverBackend, + analyzeWithActiveBackend, + type ObserverBackend, +} from '../observerBackend.js' +import { analyzeObservations } from '../sessionObserver.js' +import type { StoredSkillObservation } from '../observationStore.js' + +function obs(partial: Partial): StoredSkillObservation { + return { + id: partial.id ?? crypto.randomUUID(), + timestamp: '2026-04-16T00:00:00.000Z', + event: partial.event ?? 'user_message', + sessionId: 's1', + projectId: 'p1', + projectName: 'project', + cwd: process.cwd(), + ...partial, + } +} + +const originalBackendName = getActiveObserverBackend().name + +afterEach(() => { + setActiveObserverBackend(originalBackendName) +}) + +describe('observerBackend', () => { + test('registers heuristic and llm backends by default', () => { + const names = listObserverBackends() + expect(names).toContain('heuristic') + expect(names).toContain('llm') + }) + + test('resolveDefaultObserverBackend honours SKILL_LEARNING_OBSERVER_BACKEND env', () => { + // Adversarial probe for the env switch — if this regresses, the LLM + // backend would be silently unreachable in production even with the env + // variable set, which was the original AC2 gap. + const original = process.env.SKILL_LEARNING_OBSERVER_BACKEND + try { + process.env.SKILL_LEARNING_OBSERVER_BACKEND = 'llm' + resolveDefaultObserverBackend() + expect(getActiveObserverBackend().name).toBe('llm') + + // Unknown backend names must not crash; the current active stays. + process.env.SKILL_LEARNING_OBSERVER_BACKEND = 'nonexistent' + resolveDefaultObserverBackend() + expect(getActiveObserverBackend().name).toBe('llm') + + // Clearing the env leaves whatever was active — explicit opt-out is + // setActiveObserverBackend, not clearing the env. + delete process.env.SKILL_LEARNING_OBSERVER_BACKEND + resolveDefaultObserverBackend() + expect(getActiveObserverBackend().name).toBe('llm') + } finally { + if (original === undefined) { + delete process.env.SKILL_LEARNING_OBSERVER_BACKEND + } else { + process.env.SKILL_LEARNING_OBSERVER_BACKEND = original + } + } + }) + + test('heuristic backend preserves existing correction detection', async () => { + setActiveObserverBackend('heuristic') + const candidates = await analyzeWithActiveBackend([ + obs({ messageText: '不要直接 mock,用 testing-library' }), + ]) + expect(candidates).toHaveLength(1) + expect(candidates[0]?.action).toContain('testing-library') + }) + + test('llm backend short-circuits to [] on empty observations', async () => { + // With the real Haiku-backed implementation the backend only calls + // queryHaiku when there are observations to analyse. Empty-input short + // circuit guarantees the no-cost path needed for hot loops. + setActiveObserverBackend('llm') + const candidates = await analyzeWithActiveBackend([]) + expect(candidates).toEqual([]) + }) + + test('analyzeObservations routes to active backend (sync path throws for async backends)', () => { + // Heuristic backend is sync — analyzeObservations works directly. + const previousCount = analyzeObservations([ + obs({ messageText: '不要直接 mock,用 testing-library' }), + ]).length + expect(previousCount).toBe(1) + + // The LLM backend is now a real async implementation (queryHaiku). The + // sync `analyzeObservations` helper refuses to return a pending Promise + // and throws with a clear instruction to use `analyzeWithActiveBackend` + // instead — prove the routing reached the async backend by catching + // that exact error. + setActiveObserverBackend('llm') + expect(() => + analyzeObservations([ + obs({ messageText: '不要直接 mock,用 testing-library' }), + ]), + ).toThrow(/Promise/) + }) + + test('custom backends can be registered and switched', async () => { + const custom: ObserverBackend = { + name: 'custom-test', + analyze() { + return [ + { + trigger: 'custom trigger', + action: 'custom action', + confidence: 0.9, + domain: 'project', + source: 'session-observation', + scope: 'project', + evidence: ['custom evidence'], + }, + ] + }, + } + registerObserverBackend(custom) + setActiveObserverBackend('custom-test') + + const candidates = await analyzeWithActiveBackend([]) + expect(candidates).toHaveLength(1) + expect(candidates[0]?.trigger).toBe('custom trigger') + }) + + test('switching to an unknown backend throws', () => { + expect(() => setActiveObserverBackend('does-not-exist')).toThrow() + }) +}) diff --git a/src/services/skillLearning/__tests__/projectContext.test.ts b/src/services/skillLearning/__tests__/projectContext.test.ts new file mode 100644 index 000000000..7b36b9ca3 --- /dev/null +++ b/src/services/skillLearning/__tests__/projectContext.test.ts @@ -0,0 +1,160 @@ +import { afterAll, beforeEach, describe, expect, test } from 'bun:test' +import { existsSync, mkdirSync, mkdtempSync, readFileSync, rmSync } from 'fs' +import { tmpdir } from 'os' +import { join } from 'path' +import { execFileSync } from 'child_process' +import { getClaudeConfigHomeDir } from '../../../utils/envUtils.js' +import { + getProjectContextPath, + getProjectsRegistryPath, + getSkillLearningRootDir, + resolveProjectContext, +} from '../projectContext.js' +import { isSkillLearningEnabled } from '../featureCheck.js' + +const tempBase = mkdtempSync(join(tmpdir(), 'skill-learning-context-test-')) +const originalEnv = { ...process.env } + +beforeEach(() => { + resetEnv() + const tempHome = mkdtempSync(join(tempBase, 'home-')) + process.env.CLAUDE_CONFIG_DIR = tempHome +}) + +afterAll(() => { + process.env = { ...originalEnv } + clearConfigDirCache() + rmSync(tempBase, { recursive: true, force: true }) +}) + +describe('isSkillLearningEnabled', () => { + test('honors explicit SKILL_LEARNING_ENABLED overrides', () => { + process.env.SKILL_LEARNING_ENABLED = '1' + expect(isSkillLearningEnabled()).toBe(true) + + process.env.SKILL_LEARNING_ENABLED = '0' + expect(isSkillLearningEnabled()).toBe(false) + }) + + test('honors FEATURE_SKILL_LEARNING env fallback', () => { + delete process.env.SKILL_LEARNING_ENABLED + process.env.FEATURE_SKILL_LEARNING = '1' + expect(isSkillLearningEnabled()).toBe(true) + + process.env.FEATURE_SKILL_LEARNING = '0' + expect(isSkillLearningEnabled()).toBe(false) + }) +}) + +describe('resolveProjectContext', () => { + test('prefers CLAUDE_PROJECT_DIR and writes registry files', () => { + const cwd = mkdirTempDir('cwd-') + const projectDir = mkdirTempDir('project-') + process.env.CLAUDE_PROJECT_DIR = projectDir + + const context = resolveProjectContext(cwd) + + expect(context.source).toBe('claude_project_dir') + expect(context.scope).toBe('project') + expect(context.projectRoot).toBe(projectDir) + expect(context.projectName).toBe(lastPathSegment(projectDir)) + expect(context.storageDir).toContain(context.projectId) + + expect(existsSync(getProjectsRegistryPath())).toBe(true) + expect(existsSync(getProjectContextPath(context.projectId))).toBe(true) + + const registry = readJson(getProjectsRegistryPath()) + expect(registry.projects[context.projectId].source).toBe( + 'claude_project_dir', + ) + }) + + test('uses git remote as stable identity across different checkouts', () => { + const first = createGitRepo('remote-a-', 'https://example.com/acme/app.git') + const second = createGitRepo( + 'remote-b-', + 'https://example.com/acme/app.git', + ) + + const firstContext = resolveProjectContext(first) + const secondContext = resolveProjectContext(second) + + expect(firstContext.source).toBe('git_remote') + expect(secondContext.source).toBe('git_remote') + expect(firstContext.projectId).toBe(secondContext.projectId) + expect(firstContext.gitRemote).toBe('https://example.com/acme/app') + expect(firstContext.projectName).toBe('app') + + const registry = readJson(getProjectsRegistryPath()) + expect(Object.keys(registry.projects)).toContain(firstContext.projectId) + expect(registry.projects[firstContext.projectId].gitRemote).toBe( + 'https://example.com/acme/app', + ) + }) + + test('falls back to git root when origin remote is missing', () => { + const repo = createGitRepo('root-only-') + + const context = resolveProjectContext(join(repo, 'nested')) + + expect(context.source).toBe('git_root') + expect(context.scope).toBe('project') + expect(context.projectRoot).toBe(repo) + expect(context.projectName).toBe(lastPathSegment(repo)) + }) + + test('falls back to global context outside a git repository', () => { + const cwd = mkdirTempDir('not-git-') + + const context = resolveProjectContext(cwd) + + expect(context.source).toBe('global') + expect(context.scope).toBe('global') + expect(context.projectId).toBe('global') + expect(context.projectName).toBe('Global') + expect(context.storageDir).toBe(join(getSkillLearningRootDir(), 'global')) + expect(existsSync(getProjectContextPath('global'))).toBe(true) + }) +}) + +function createGitRepo(prefix: string, remote?: string): string { + const dir = mkdirTempDir(prefix) + mkdirSync(join(dir, 'nested'), { recursive: true }) + execFileSync('git', ['init'], { cwd: dir, stdio: 'ignore' }) + if (remote) { + execFileSync('git', ['remote', 'add', 'origin', remote], { + cwd: dir, + stdio: 'ignore', + }) + } + return dir +} + +function mkdirTempDir(prefix: string): string { + return mkdtempSync(join(tempBase, prefix)) +} + +function readJson(path: string): any { + return JSON.parse(readFileSync(path, 'utf8')) +} + +function lastPathSegment(path: string): string { + return path.split(/[\\/]/).filter(Boolean).at(-1) ?? path +} + +function resetEnv(): void { + process.env = { ...originalEnv } + delete process.env.CLAUDE_PROJECT_DIR + delete process.env.SKILL_LEARNING_ENABLED + delete process.env.FEATURE_SKILL_LEARNING + clearConfigDirCache() +} + +function clearConfigDirCache(): void { + if ( + typeof getClaudeConfigHomeDir === 'function' && + 'cache' in getClaudeConfigHomeDir + ) { + ;(getClaudeConfigHomeDir as any).cache.clear?.() + } +} diff --git a/src/services/skillLearning/__tests__/promotion.test.ts b/src/services/skillLearning/__tests__/promotion.test.ts new file mode 100644 index 000000000..dce51cdd5 --- /dev/null +++ b/src/services/skillLearning/__tests__/promotion.test.ts @@ -0,0 +1,144 @@ +import { afterEach, beforeEach, describe, expect, test } from 'bun:test' +import { mkdtempSync, rmSync } from 'node:fs' +import { tmpdir } from 'node:os' +import { join } from 'node:path' +import { createInstinct } from '../instinctParser.js' +import { saveInstinct, loadInstincts } from '../instinctStore.js' +import { + checkPromotion, + findPromotionCandidates, + resetPromotionBookkeeping, +} from '../promotion.js' +import type { SkillLearningProjectContext } from '../types.js' + +let rootDir: string + +function projectCtx(projectId: string): SkillLearningProjectContext { + return { + projectId, + projectName: projectId, + scope: 'project', + source: 'git_root', + cwd: rootDir, + storageDir: join(rootDir, 'projects', projectId), + } +} + +function globalCtx(): SkillLearningProjectContext { + return { + projectId: 'global', + projectName: 'Global', + scope: 'global', + source: 'global', + cwd: rootDir, + storageDir: join(rootDir, 'global'), + } +} + +beforeEach(() => { + rootDir = mkdtempSync(join(tmpdir(), 'skill-learning-promote-')) + resetPromotionBookkeeping() +}) + +afterEach(() => { + rmSync(rootDir, { recursive: true, force: true }) +}) + +describe('promotion', () => { + test('findPromotionCandidates returns instincts with 2+ projects and avg>=0.8', () => { + const mk = (projectId: string) => + createInstinct({ + id: 'shared-trigger', + trigger: 'shared', + action: 'shared', + confidence: 0.85, + domain: 'workflow', + source: 'session-observation', + scope: 'project', + projectId, + projectName: projectId, + evidence: ['ev'], + status: 'active', + }) + const candidates = findPromotionCandidates([mk('alpha'), mk('beta')]) + expect(candidates).toHaveLength(1) + expect(candidates[0]?.projectIds.sort()).toEqual(['alpha', 'beta']) + }) + + test('checkPromotion writes a global copy for cross-project instincts', async () => { + const mk = (projectId: string) => + createInstinct({ + id: 'shared-id', + trigger: 'shared', + action: 'shared', + confidence: 0.85, + domain: 'workflow', + source: 'session-observation', + scope: 'project', + projectId, + projectName: projectId, + evidence: ['ev'], + status: 'active', + }) + await saveInstinct(mk('alpha'), { rootDir, project: projectCtx('alpha') }) + await saveInstinct(mk('beta'), { rootDir, project: projectCtx('beta') }) + + const promoted = await checkPromotion({ rootDir }) + expect(promoted.map(p => p.instinctId)).toContain('shared-id') + + const globalInstincts = await loadInstincts({ + rootDir, + scope: 'global', + project: globalCtx(), + }) + const global = globalInstincts.find(i => i.id === 'shared-id') + expect(global).toBeDefined() + expect(global?.scope).toBe('global') + expect(global?.confidence).toBeGreaterThanOrEqual(0.8) + }) + + test('checkPromotion is idempotent within a session', async () => { + const mk = (projectId: string) => + createInstinct({ + id: 'repeat-id', + trigger: 'repeat', + action: 'repeat', + confidence: 0.85, + domain: 'workflow', + source: 'session-observation', + scope: 'project', + projectId, + projectName: projectId, + evidence: ['ev'], + status: 'active', + }) + await saveInstinct(mk('alpha'), { rootDir, project: projectCtx('alpha') }) + await saveInstinct(mk('beta'), { rootDir, project: projectCtx('beta') }) + + const first = await checkPromotion({ rootDir }) + const second = await checkPromotion({ rootDir }) + + expect(first).toHaveLength(1) + expect(second).toHaveLength(0) + }) + + test('does not promote when only one project has the instinct', async () => { + const instinct = createInstinct({ + id: 'solo', + trigger: 'solo', + action: 'solo', + confidence: 0.9, + domain: 'workflow', + source: 'session-observation', + scope: 'project', + projectId: 'alpha', + projectName: 'alpha', + evidence: ['ev'], + status: 'active', + }) + await saveInstinct(instinct, { rootDir, project: projectCtx('alpha') }) + + const promoted = await checkPromotion({ rootDir }) + expect(promoted).toEqual([]) + }) +}) diff --git a/src/services/skillLearning/__tests__/runtimeObserver.test.ts b/src/services/skillLearning/__tests__/runtimeObserver.test.ts new file mode 100644 index 000000000..39b1e7c19 --- /dev/null +++ b/src/services/skillLearning/__tests__/runtimeObserver.test.ts @@ -0,0 +1,143 @@ +import { afterEach, beforeEach, describe, expect, test } from 'bun:test' +import { existsSync, mkdtempSync, rmSync } from 'node:fs' +import { tmpdir } from 'node:os' +import { join } from 'node:path' +import { + resetSkillLearningConfig, + setSkillLearningConfigForTest, +} from '../config.js' +import { loadInstincts, readObservations } from '../index.js' +import { + resetRuntimeObserverForTest, + runSkillLearningPostSampling, +} from '../runtimeObserver.js' + +let root: string +let previousCwd: string +const originalEnv = { ...process.env } + +beforeEach(() => { + root = mkdtempSync(join(tmpdir(), 'skill-learning-runtime-')) + previousCwd = process.cwd() + process.chdir(root) + process.env = { ...originalEnv } + process.env.CLAUDE_SKILL_LEARNING_HOME = join(root, 'learning-home') + process.env.CLAUDE_CONFIG_DIR = join(root, 'config') + process.env.SKILL_LEARNING_ENABLED = '1' + process.env.NODE_ENV = 'test' + setSkillLearningConfigForTest({ minConfidence: 0.3, minClusterSize: 1 }) + resetRuntimeObserverForTest() +}) + +afterEach(() => { + process.chdir(previousCwd) + process.env = { ...originalEnv } + resetSkillLearningConfig() + rmSync(root, { recursive: true, force: true }) +}) + +describe('runtimeObserver', () => { + test('records and learns from post-sampling main-thread messages', async () => { + await runSkillLearningPostSampling({ + querySource: 'repl_main_thread', + messages: [ + { + type: 'user', + uuid: 'u1' as any, + message: { role: 'user', content: '不要 mock,用 testing-library' }, + }, + ], + systemPrompt: [] as any, + userContext: {}, + systemContext: {}, + toolUseContext: { agentId: undefined } as any, + }) + + const observations = await readObservations({ + rootDir: process.env.CLAUDE_SKILL_LEARNING_HOME, + project: { + projectId: 'global', + projectName: 'global', + cwd: root, + scope: 'global', + source: 'global', + storageDir: join(process.env.CLAUDE_SKILL_LEARNING_HOME!, 'global'), + }, + }) + const instincts = await loadInstincts({ + rootDir: process.env.CLAUDE_SKILL_LEARNING_HOME, + project: { + projectId: 'global', + projectName: 'global', + cwd: root, + scope: 'global', + source: 'global', + storageDir: join(process.env.CLAUDE_SKILL_LEARNING_HOME!, 'global'), + }, + }) + + expect(observations).toHaveLength(1) + expect(instincts[0]?.action).toContain('testing-library') + }) + + test('skips subagent sessions', async () => { + await runSkillLearningPostSampling({ + querySource: 'repl_main_thread', + messages: [ + { + type: 'user', + uuid: 'u1' as any, + message: { role: 'user', content: '不要 mock,用 testing-library' }, + }, + ], + systemPrompt: [] as any, + userContext: {}, + systemContext: {}, + toolUseContext: { agentId: 'agent-1' } as any, + }) + + const observations = await readObservations({ + rootDir: process.env.CLAUDE_SKILL_LEARNING_HOME, + }) + expect(observations).toEqual([]) + }) + + test('auto-evolves repeated corrections into an active learned skill', async () => { + await runSkillLearningPostSampling({ + querySource: 'repl_main_thread', + messages: [ + { + type: 'user', + uuid: 'u1' as any, + message: { role: 'user', content: '不要 mock,用 testing-library' }, + }, + { + type: 'user', + uuid: 'u2' as any, + message: { role: 'user', content: '不要 mock,用 testing-library' }, + }, + { + type: 'user', + uuid: 'u3' as any, + message: { role: 'user', content: '不要 mock,用 testing-library' }, + }, + ], + systemPrompt: [] as any, + userContext: {}, + systemContext: {}, + toolUseContext: { agentId: undefined } as any, + }) + + expect( + existsSync( + join( + root, + '.claude', + 'skills', + 'testing-choosing-between-mock-testing-library', + 'SKILL.md', + ), + ), + ).toBe(true) + }) +}) diff --git a/src/services/skillLearning/__tests__/sessionObserver.test.ts b/src/services/skillLearning/__tests__/sessionObserver.test.ts new file mode 100644 index 000000000..79985e29b --- /dev/null +++ b/src/services/skillLearning/__tests__/sessionObserver.test.ts @@ -0,0 +1,103 @@ +import { describe, expect, test } from 'bun:test' +import { analyzeObservations } from '../sessionObserver.js' +import type { StoredSkillObservation } from '../observationStore.js' + +function obs(partial: Partial): StoredSkillObservation { + return { + id: partial.id ?? crypto.randomUUID(), + timestamp: '2026-04-16T00:00:00.000Z', + event: partial.event ?? 'user_message', + sessionId: 's1', + projectId: 'p1', + projectName: 'project', + cwd: process.cwd(), + ...partial, + } +} + +describe('sessionObserver', () => { + test('extracts user correction instincts', () => { + const instincts = analyzeObservations([ + obs({ messageText: '不要直接 mock,用 testing-library' }), + ]) + + expect(instincts).toHaveLength(1) + expect(instincts[0]?.domain).toBe('testing') + expect(instincts[0]?.action).toContain('testing-library') + }) + + test('extracts repeated Grep -> Read -> Edit workflow instinct', () => { + const seq = ['Grep', 'Read', 'Edit', 'Grep', 'Read', 'Edit'] + const instincts = analyzeObservations( + seq.map((toolName, index) => + obs({ id: `o${index}`, event: 'tool_start', toolName }), + ), + ) + + expect(instincts.some(instinct => instinct.domain === 'workflow')).toBe( + true, + ) + }) + + test('does not invent instincts without clear patterns', () => { + expect(analyzeObservations([obs({ messageText: 'hello' })])).toEqual([]) + }) + + test('snapshots recent tool outcome on correction candidates', () => { + const [instinct] = analyzeObservations([ + obs({ + id: 'o0', + event: 'tool_complete', + toolName: 'Edit', + outcome: 'failure', + }), + obs({ + id: 'o1', + event: 'user_message', + messageText: '不要直接 mock,用 testing-library', + }), + ]) + expect(instinct?.evidenceOutcome).toBe('failure') + }) + + test('marks tool-error-resolution candidates as success outcome', () => { + const instincts = analyzeObservations([ + obs({ + id: 'o0', + event: 'tool_complete', + toolName: 'Grep', + outcome: 'failure', + }), + obs({ + id: 'o1', + event: 'tool_complete', + toolName: 'Grep', + outcome: 'success', + }), + ]) + const resolution = instincts.find(i => i.domain === 'debugging') + expect(resolution?.evidenceOutcome).toBe('success') + }) + + test('leaves evidenceOutcome undefined when no prior tool_complete exists', () => { + const [instinct] = analyzeObservations([ + obs({ + id: 'o0', + event: 'user_message', + messageText: '不要直接 mock,用 testing-library', + }), + ]) + expect(instinct?.evidenceOutcome).toBeUndefined() + }) + + test('single "always/must" convention message gets confidence <= 0.4', () => { + const instincts = analyzeObservations([ + obs({ messageText: 'always use pnpm' }), + ]) + + expect(instincts.length).toBeGreaterThan(0) + for (const instinct of instincts) { + expect(instinct.confidence).toBeLessThanOrEqual(0.4) + } + }) +}) diff --git a/src/services/skillLearning/__tests__/skillDedup.test.ts b/src/services/skillLearning/__tests__/skillDedup.test.ts new file mode 100644 index 000000000..5e7ab6798 --- /dev/null +++ b/src/services/skillLearning/__tests__/skillDedup.test.ts @@ -0,0 +1,100 @@ +import { afterEach, beforeEach, describe, expect, test } from 'bun:test' +import { + existsSync, + mkdirSync, + mkdtempSync, + readFileSync, + rmSync, +} from 'node:fs' +import { tmpdir } from 'node:os' +import { join } from 'node:path' +import { + generateOrMergeSkillDraft, + writeLearnedSkill, +} from '../skillGenerator.js' +import { createInstinct } from '../instinctParser.js' + +let root: string +let skillsRoot: string + +beforeEach(() => { + root = mkdtempSync(join(tmpdir(), 'skill-learning-dedup-')) + skillsRoot = join(root, '.claude', 'skills') + mkdirSync(skillsRoot, { recursive: true }) +}) + +afterEach(() => { + rmSync(root, { recursive: true, force: true }) +}) + +function testingInstinct(evidence: string) { + return createInstinct({ + trigger: 'when writing tests', + action: 'use testing-library', + confidence: 0.85, + domain: 'testing', + source: 'session-observation', + scope: 'project', + evidence: [evidence], + status: 'active', + }) +} + +describe('skill dedup', () => { + test('first instinct cluster creates a new skill', async () => { + const outcome = await generateOrMergeSkillDraft( + [testingInstinct('first')], + { cwd: root }, + [skillsRoot], + ) + expect(outcome.action).toBe('create') + if (outcome.action === 'create') { + await writeLearnedSkill(outcome.draft) + } + }) + + test('second run with same trigger appends evidence instead of writing a duplicate', async () => { + const first = await generateOrMergeSkillDraft( + [testingInstinct('first')], + { cwd: root }, + [skillsRoot], + ) + expect(first.action).toBe('create') + if (first.action === 'create') { + await writeLearnedSkill(first.draft) + } + + // Second pass — same cluster should collide with the skill we just wrote. + const second = await generateOrMergeSkillDraft( + [testingInstinct('second')], + { cwd: root }, + [skillsRoot], + ) + expect(second.action).toBe('append-evidence') + if (second.action === 'append-evidence') { + expect(second.overlap).toBeGreaterThanOrEqual(0.8) + const body = readFileSync(second.appendedPath, 'utf8') + expect(body).toContain('Learned evidence') + expect(body).toContain('- second') + } + + // There must still be only one SKILL.md file on disk. + const files = findSkillMdFiles(skillsRoot) + expect(files).toHaveLength(1) + }) +}) + +function findSkillMdFiles(dir: string): string[] { + const { readdirSync, statSync } = + require('node:fs') as typeof import('node:fs') + const results: string[] = [] + for (const entry of readdirSync(dir)) { + const full = join(dir, entry) + if (statSync(full).isDirectory()) { + results.push(...findSkillMdFiles(full)) + } else if (entry === 'SKILL.md' && existsSync(full)) { + results.push(full) + } + } + return results +} diff --git a/src/services/skillLearning/__tests__/skillGapStore.test.ts b/src/services/skillLearning/__tests__/skillGapStore.test.ts new file mode 100644 index 000000000..cd4b2d3e9 --- /dev/null +++ b/src/services/skillLearning/__tests__/skillGapStore.test.ts @@ -0,0 +1,360 @@ +import { afterEach, beforeEach, describe, expect, test } from 'bun:test' +import { + existsSync, + mkdtempSync, + readFileSync, + rmSync, + writeFileSync, + mkdirSync, +} from 'node:fs' +import { tmpdir } from 'node:os' +import { join } from 'node:path' +import { + findGapKeyByDraftPath, + readSkillGaps, + recordDraftHit, + recordSkillGap, + rejectSkillGap, + shouldPromoteToActive, + shouldPromoteToDraft, + type SkillGapRecord, +} from '../skillGapStore.js' +import type { SkillLearningProjectContext } from '../types.js' + +let root: string +let project: SkillLearningProjectContext + +beforeEach(() => { + root = mkdtempSync(join(tmpdir(), 'skill-gap-store-')) + project = { + projectId: 'global', + projectName: 'global', + scope: 'global', + source: 'global', + cwd: root, + storageDir: join(root, 'global'), + projectRoot: root, + } +}) + +afterEach(() => { + try { + rmSync(root, { + recursive: true, + force: true, + maxRetries: 10, + retryDelay: 100, + }) + } catch { + // Temp cleanup best-effort; Windows may hold transient handles. + } +}) + +function draftsDir(): string { + return join(root, '.claude', 'skills', '.drafts') +} + +describe('recordSkillGap — P0-1 state machine', () => { + test('first occurrence lands in pending and writes no skill file', async () => { + const gap = await recordSkillGap({ + prompt: 'Refactor the data pipeline please', + cwd: root, + project, + rootDir: root, + }) + + expect(gap.status).toBe('pending') + expect(gap.count).toBe(1) + expect(gap.draft).toBeUndefined() + expect(gap.active).toBeUndefined() + expect(existsSync(draftsDir())).toBe(false) + }) + + test('single Chinese exhortation stays pending — no draft, no active', async () => { + const gap = await recordSkillGap({ + prompt: '以后必须严格检查类型', + cwd: root, + project, + rootDir: root, + }) + + expect(gap.status).toBe('pending') + expect(gap.draft).toBeUndefined() + expect(gap.active).toBeUndefined() + }) + + test('second occurrence promotes to draft but not active', async () => { + const prompt = 'explain the build pipeline' + await recordSkillGap({ prompt, cwd: root, project, rootDir: root }) + const second = await recordSkillGap({ + prompt, + cwd: root, + project, + rootDir: root, + }) + + expect(second.status).toBe('draft') + expect(second.count).toBe(2) + expect(second.draft?.type).toBe('draft') + expect(second.active).toBeUndefined() + expect(existsSync(second.draft!.skillPath)).toBe(true) + }) + + test('single strong English exhortation ("must never") stays pending', async () => { + const gap = await recordSkillGap({ + prompt: 'You must never commit secrets to git', + cwd: root, + project, + rootDir: root, + }) + + expect(gap.status).toBe('pending') + expect(gap.count).toBe(1) + expect(gap.draft).toBeUndefined() + expect(gap.active).toBeUndefined() + }) + + test('reaching count >= 4 promotes an existing draft to active', async () => { + const prompt = 'clean up abandoned feature flags' + for (let i = 0; i < 3; i++) { + await recordSkillGap({ prompt, cwd: root, project, rootDir: root }) + } + const fourth = await recordSkillGap({ + prompt, + cwd: root, + project, + rootDir: root, + }) + + expect(fourth.status).toBe('active') + expect(fourth.count).toBe(4) + expect(fourth.draft).toBeDefined() + expect(fourth.active?.type).toBe('active') + expect(existsSync(fourth.active!.skillPath)).toBe(true) + }) + + test('rejected gaps do not regenerate artefacts on subsequent calls', async () => { + const prompt = 'please format the README differently' + await recordSkillGap({ prompt, cwd: root, project, rootDir: root }) + const promoted = await recordSkillGap({ + prompt, + cwd: root, + project, + rootDir: root, + }) + expect(promoted.status).toBe('draft') + + await rejectSkillGap(promoted.key, project, root) + const afterReject = await recordSkillGap({ + prompt, + cwd: root, + project, + rootDir: root, + }) + + expect(afterReject.status).toBe('rejected') + expect(afterReject.count).toBe(3) + expect(afterReject.active).toBeUndefined() + }) +}) + +describe('recordDraftHit — draft hits escalation (P1-4 contract)', () => { + test('draftHits reaching 2 escalates a draft to active', async () => { + const prompt = 'improve error handling in loader.ts' + await recordSkillGap({ prompt, cwd: root, project, rootDir: root }) + const drafted = await recordSkillGap({ + prompt, + cwd: root, + project, + rootDir: root, + }) + expect(drafted.status).toBe('draft') + + // Distinct session IDs — recordDraftHit enforces one hit per session so + // a single session can't flip the draftHits>=2 active gate alone + await recordDraftHit(drafted.key, project, root, 'session-a') + const afterSecondHit = await recordDraftHit( + drafted.key, + project, + root, + 'session-b', + ) + + expect(afterSecondHit?.draftHits).toBe(2) + expect(afterSecondHit?.status).toBe('active') + expect(afterSecondHit?.active?.type).toBe('active') + }) + + test('first draft hit does not promote to active', async () => { + const prompt = 'add missing null checks in handler' + await recordSkillGap({ prompt, cwd: root, project, rootDir: root }) + const drafted = await recordSkillGap({ + prompt, + cwd: root, + project, + rootDir: root, + }) + + const afterOneHit = await recordDraftHit(drafted.key, project, root) + + expect(afterOneHit?.draftHits).toBe(1) + expect(afterOneHit?.status).toBe('draft') + expect(afterOneHit?.active).toBeUndefined() + }) + + test('findGapKeyByDraftPath resolves the correct gap for an existing draft', async () => { + const prompt = 'restructure the module boundaries' + await recordSkillGap({ prompt, cwd: root, project, rootDir: root }) + const drafted = await recordSkillGap({ + prompt, + cwd: root, + project, + rootDir: root, + }) + expect(drafted.draft?.skillPath).toBeTruthy() + + const foundKey = await findGapKeyByDraftPath( + drafted.draft!.skillPath, + project, + root, + ) + + expect(foundKey).toBe(drafted.key) + }) + + test('findGapKeyByDraftPath returns undefined for unknown paths', async () => { + const result = await findGapKeyByDraftPath( + '/nowhere/.claude/skills/.drafts/mystery/SKILL.md', + project, + root, + ) + expect(result).toBeUndefined() + }) + + test('recordDraftHit is a no-op on pending gaps', async () => { + const gap = await recordSkillGap({ + prompt: 'investigate the mysterious cache bug', + cwd: root, + project, + rootDir: root, + }) + + const updated = await recordDraftHit(gap.key, project, root) + + expect(updated?.status).toBe('pending') + expect(updated?.draftHits).toBe(0) + }) +}) + +describe('shouldPromoteToDraft / shouldPromoteToActive', () => { + test('shouldPromoteToDraft requires count >= 2 (strong signal no longer bypasses)', () => { + const base: SkillGapRecord = { + key: 'k', + prompt: 'refactor this', + count: 1, + draftHits: 0, + draftHitSessions: [], + status: 'pending', + sessionId: 's', + cwd: root, + projectId: 'global', + projectName: 'global', + recommendations: [], + createdAt: new Date().toISOString(), + updatedAt: new Date().toISOString(), + } + + expect(shouldPromoteToDraft(base)).toBe(false) + expect(shouldPromoteToDraft({ ...base, count: 2 })).toBe(true) + // Single strong-signal prompt no longer promotes — must also repeat. + expect( + shouldPromoteToDraft({ ...base, prompt: '必须使用 testing-library' }), + ).toBe(false) + }) + + test('shouldPromoteToActive requires a draft plus threshold', () => { + const withDraft: SkillGapRecord = { + key: 'k', + prompt: 'refactor', + count: 3, + draftHits: 0, + draftHitSessions: [], + status: 'draft', + sessionId: 's', + cwd: root, + projectId: 'global', + projectName: 'global', + recommendations: [], + createdAt: new Date().toISOString(), + updatedAt: new Date().toISOString(), + draft: { type: 'draft', name: 'x', skillPath: '/tmp/x' }, + } + + expect(shouldPromoteToActive(withDraft)).toBe(false) + expect(shouldPromoteToActive({ ...withDraft, count: 4 })).toBe(true) + expect(shouldPromoteToActive({ ...withDraft, draftHits: 2 })).toBe(true) + expect(shouldPromoteToActive({ ...withDraft, draft: undefined })).toBe( + false, + ) + }) +}) + +describe('migrateLegacyGapState', () => { + test('resets legacy status=draft count=1 (no file) to pending', async () => { + const gapPath = join(root, 'global', 'skill-gaps.json') + mkdirSync(join(root, 'global'), { recursive: true }) + const legacy = { + version: 1, + gaps: { + 'legacy-key': { + key: 'legacy-key', + prompt: 'old gap', + count: 1, + status: 'draft', + sessionId: 's1', + cwd: root, + projectId: 'global', + projectName: 'global', + recommendations: [], + createdAt: '2025-01-01T00:00:00.000Z', + updatedAt: '2025-01-01T00:00:00.000Z', + }, + }, + } + writeFileSync(gapPath, JSON.stringify(legacy), 'utf8') + + const gaps = await readSkillGaps(project, root) + const migrated = gaps[0] + + expect(migrated?.status).toBe('pending') + expect(migrated?.draftHits).toBe(0) + }) + + test('downgrades active without skill file to draft if draft exists', async () => { + const gapPath = join(root, 'global', 'skill-gaps.json') + mkdirSync(join(root, 'global'), { recursive: true }) + const legacy = { + version: 1, + gaps: { + 'legacy-key': { + key: 'legacy-key', + prompt: 'old', + count: 3, + status: 'active', + sessionId: 's1', + cwd: root, + projectId: 'global', + projectName: 'global', + recommendations: [], + createdAt: '2025-01-01T00:00:00.000Z', + updatedAt: '2025-01-01T00:00:00.000Z', + draft: { type: 'draft', name: 'x', skillPath: '/tmp/x' }, + }, + }, + } + writeFileSync(gapPath, JSON.stringify(legacy), 'utf8') + + const gaps = await readSkillGaps(project, root) + expect(gaps[0]?.status).toBe('draft') + }) +}) diff --git a/src/services/skillLearning/__tests__/skillGenerator.test.ts b/src/services/skillLearning/__tests__/skillGenerator.test.ts new file mode 100644 index 000000000..a897703a4 --- /dev/null +++ b/src/services/skillLearning/__tests__/skillGenerator.test.ts @@ -0,0 +1,56 @@ +import { afterEach, beforeEach, describe, expect, test } from 'bun:test' +import { existsSync, mkdtempSync, readFileSync, rmSync } from 'node:fs' +import { tmpdir } from 'node:os' +import { join } from 'node:path' +import { createInstinct } from '../instinctParser.js' +import { generateSkillDraft, writeLearnedSkill } from '../skillGenerator.js' + +let cwd: string + +beforeEach(() => { + cwd = mkdtempSync(join(tmpdir(), 'skill-learning-generator-')) +}) + +afterEach(() => { + rmSync(cwd, { recursive: true, force: true }) +}) + +describe('skillGenerator', () => { + test('generates a valid SKILL.md draft from instincts', () => { + const instinct = createInstinct({ + trigger: 'when writing React tests', + action: 'use testing-library and avoid implementation mocks', + confidence: 0.85, + domain: 'testing', + source: 'session-observation', + scope: 'project', + evidence: ['user correction'], + }) + + const draft = generateSkillDraft([instinct], { cwd }) + + expect(draft.name).toContain('testing') + expect(draft.content).toContain('name:') + expect(draft.content).toContain('description:') + expect(draft.content).toContain('## Trigger') + expect(draft.content).toContain('## Evidence') + }) + + test('writes learned skills to project scope', async () => { + const instinct = createInstinct({ + trigger: 'when writing React tests', + action: 'use testing-library', + confidence: 0.85, + domain: 'testing', + source: 'session-observation', + scope: 'project', + evidence: ['user correction'], + }) + const draft = generateSkillDraft([instinct], { cwd }) + + const file = await writeLearnedSkill(draft) + + expect(existsSync(file)).toBe(true) + expect(readFileSync(file, 'utf8')).toContain('use testing-library') + }) +}) diff --git a/src/services/skillLearning/__tests__/skillLearningSmoke.test.ts b/src/services/skillLearning/__tests__/skillLearningSmoke.test.ts new file mode 100644 index 000000000..e194e466f --- /dev/null +++ b/src/services/skillLearning/__tests__/skillLearningSmoke.test.ts @@ -0,0 +1,154 @@ +import { afterEach, beforeEach, describe, expect, test } from 'bun:test' +import { + existsSync, + mkdtempSync, + readFileSync, + rmSync, + writeFileSync, +} from 'node:fs' +import { tmpdir } from 'node:os' +import { join } from 'node:path' +import { call } from '../../../commands/skill-learning/skill-learning.js' +import { clearCommandsCache } from '../../../commands.js' +import { getSkillIndex, searchSkills } from '../../skillSearch/localSearch.js' +import { + resetSkillLearningConfig, + setSkillLearningConfigForTest, +} from '../config.js' +import { loadInstincts, readObservations } from '../index.js' + +let root: string +let previousCwd: string +const originalEnv = { ...process.env } + +beforeEach(() => { + root = mkdtempSync(join(tmpdir(), 'skill-learning-smoke-')) + previousCwd = process.cwd() + process.chdir(root) + process.env = { ...originalEnv } + process.env.CLAUDE_SKILL_LEARNING_HOME = join(root, 'learning-home') + process.env.CLAUDE_CONFIG_DIR = join(root, 'config') + process.env.SKILL_LEARNING_ENABLED = '1' + process.env.ANTHROPIC_API_KEY = 'test-key' + process.env.NODE_ENV = 'test' + setSkillLearningConfigForTest({ minConfidence: 0.3, minClusterSize: 1 }) +}) + +afterEach(() => { + process.chdir(previousCwd) + process.env = { ...originalEnv } + resetSkillLearningConfig() + clearCommandsCache() + try { + rmSync(root, { + recursive: true, + force: true, + maxRetries: 10, + retryDelay: 100, + }) + } catch { + // Windows can keep a transient handle open after dynamic command loading. + // Temp cleanup is best-effort; failing here would mask the smoke result. + } +}) + +describe('skillLearning smoke', () => { + test('ingests corrections, evolves a learned skill, and skill search finds it', async () => { + const transcript = join(root, 'session.jsonl') + writeFileSync(transcript, buildTranscript(), 'utf8') + + // Pass --min-session-length=0 so the 9-observation test transcript is not + // skipped by the ECC-parity gate (default threshold: 10 observations). + const ingestResult = await call( + `ingest ${transcript} --min-session-length=0`, + {} as any, + ) + expect(ingestResult.type).toBe('text') + if (ingestResult.type === 'text') { + expect(ingestResult.value).toContain('Ingested 9 observations') + } + + const options = { + rootDir: process.env.CLAUDE_SKILL_LEARNING_HOME, + project: { + projectId: 'global', + projectName: 'global', + cwd: root, + scope: 'global' as const, + source: 'global' as const, + storageDir: join(process.env.CLAUDE_SKILL_LEARNING_HOME!, 'global'), + }, + } + const observations = await readObservations(options) + expect(observations).toHaveLength(9) + + const instincts = await loadInstincts(options) + const testingInstinct = instincts.find(i => i.domain === 'testing') + expect(testingInstinct?.confidence).toBe(0.8) + expect(testingInstinct?.status).toBe('active') + + const evolveResult = await call('evolve --generate', {} as any) + expect(evolveResult.type).toBe('text') + if (evolveResult.type === 'text') { + // Smoke transcript (9 obs, single fabricated instinct per domain) may + // produce 1 or 2 candidates depending on sessionObserver's clustering. + // Post-H15 we accept either — the smoke proves end-to-end wiring, not + // exact cluster math. + expect(evolveResult.value).toMatch(/Generated [12] learned skill\(s\)/) + } + + const skillName = 'testing-choosing-between-mock-testing-library' + const skillFile = join(root, '.claude', 'skills', skillName, 'SKILL.md') + expect(existsSync(skillFile)).toBe(true) + expect(readFileSync(skillFile, 'utf8')).toContain('Prefer testing-library') + + clearCommandsCache() + const index = await getSkillIndex(root) + expect(index.some(entry => entry.name === skillName)).toBe(true) + + const results = searchSkills( + 'write tests with testing library instead of mock', + index, + 5, + ) + expect(results[0]?.name).toBe(skillName) + }) +}) + +function buildTranscript(): string { + const entries = [ + user('不要 mock,用 testing-library', 0), + toolUse('Grep', { pattern: 'renderHook' }, 1), + toolUse('Read', { file_path: 'src/example.test.tsx' }, 2), + toolUse('Edit', { file_path: 'src/example.test.tsx' }, 3), + user('不要 mock,用 testing-library', 4), + toolUse('Grep', { pattern: 'mock' }, 5), + toolUse('Read', { file_path: 'src/example.test.tsx' }, 6), + toolUse('Edit', { file_path: 'src/example.test.tsx' }, 7), + user('不要 mock,用 testing-library', 8), + ] + return `${entries.map(entry => JSON.stringify(entry)).join('\n')}\n` +} + +function user(content: string, second: number) { + return { + type: 'user', + sessionId: 'smoke-session', + cwd: root, + timestamp: `2026-04-16T00:00:0${second}.000Z`, + message: { role: 'user', content }, + } +} + +function toolUse(name: string, input: Record, second: number) { + return { + type: 'assistant', + sessionId: 'smoke-session', + cwd: root, + timestamp: `2026-04-16T00:00:0${second}.000Z`, + message: { + role: 'assistant', + content: [{ type: 'tool_use', name, input }], + }, + } +} diff --git a/src/services/skillLearning/__tests__/skillLifecycle.test.ts b/src/services/skillLearning/__tests__/skillLifecycle.test.ts new file mode 100644 index 000000000..e171c6268 --- /dev/null +++ b/src/services/skillLearning/__tests__/skillLifecycle.test.ts @@ -0,0 +1,161 @@ +import { afterEach, beforeEach, describe, expect, test } from 'bun:test' +import { + existsSync, + mkdtempSync, + readFileSync, + rmSync, + writeFileSync, +} from 'node:fs' +import { mkdir } from 'node:fs/promises' +import { tmpdir } from 'node:os' +import { join } from 'node:path' +import type { LearnedSkillDraft } from '../types.js' +import { + applySkillLifecycleDecision, + compareExistingSkills, + decideSkillLifecycle, + loadExistingSkills, +} from '../skillLifecycle.js' + +let root: string + +beforeEach(() => { + root = mkdtempSync(join(tmpdir(), 'skill-learning-lifecycle-')) +}) + +afterEach(() => { + rmSync(root, { recursive: true, force: true }) +}) + +describe('skillLifecycle', () => { + test('detects overlapping existing skills', async () => { + await writeSkill('react-testing', 'Use testing-library for React tests') + const draft = draftSkill( + 'react-testing-updated', + 'Use testing-library for React tests and avoid implementation mocks', + ) + + const matches = await compareExistingSkills(draft, [root]) + + expect(matches[0]?.name).toBe('react-testing') + }) + + test('replace archives old skill so it leaves active index', async () => { + await writeSkill( + 'react-testing', + 'Use testing-library for React tests and avoid implementation mocks', + ) + const draft = draftSkill( + 'react-testing-updated', + 'Use testing-library for React tests and avoid implementation mocks', + ) + const matches = await compareExistingSkills(draft, [root]) + const decision = decideSkillLifecycle(draft, matches) + + expect(decision.type).toBe('replace') + const result = await applySkillLifecycleDecision(decision) + + expect(result.activePath).toBeDefined() + expect(result.archivedPath).toBeDefined() + expect(existsSync(join(root, 'react-testing'))).toBe(false) + expect( + existsSync(join(result.archivedPath!, 'replacement-manifest.json')), + ).toBe(true) + expect( + (await loadExistingSkills([root])).map(skill => skill.name), + ).not.toContain('react-testing') + }) + + test('create writes new skill when no overlap exists', async () => { + const draft = draftSkill('new-testing', 'A unique learned testing workflow') + const decision = decideSkillLifecycle(draft, []) + const result = await applySkillLifecycleDecision(decision) + + expect(result.activePath).toBeDefined() + expect(readFileSync(result.activePath!, 'utf8')).toContain('new-testing') + }) + + test('merge skips user-authored skill without origin field and logs warning', async () => { + const body = + 'Use testing-library for React tests and avoid implementation mocks' + await writeSkill('react-testing', body, null) + // Build a draft that overlaps with the existing skill at the merge threshold + const draft: LearnedSkillDraft = { + name: 'react-testing', + description: body, + scope: 'project', + sourceInstinctIds: ['i1'], + confidence: 0.6, + content: `---\nname: react-testing\ndescription: ${JSON.stringify(body)}\n---\n\n# React Testing\n\n${body}\n`, + outputPath: join(root, 'react-testing-patch'), + } + const matches = await compareExistingSkills(draft, [root]) + // Force a merge decision by lowering confidence below the replace threshold + const decision = decideSkillLifecycle(draft, matches) + expect(decision.type).toBe('merge') + + const stderrChunks: string[] = [] + const originalWrite = process.stderr.write.bind(process.stderr) + process.stderr.write = (chunk: unknown) => { + stderrChunks.push(String(chunk)) + return true + } + try { + const result = await applySkillLifecycleDecision(decision) + expect(result.activePath).toBeUndefined() + expect( + stderrChunks.some(line => + line.includes('[skill-learning] skip user-authored skill'), + ), + ).toBe(true) + } finally { + process.stderr.write = originalWrite + } + }) + + test('replace proceeds normally for skill-learning-generated skill', async () => { + await writeSkill( + 'generated-testing', + 'Use testing-library for React tests and avoid implementation mocks', + 'skill-learning', + ) + const draft = draftSkill( + 'generated-testing-updated', + 'Use testing-library for React tests and avoid implementation mocks', + ) + const matches = await compareExistingSkills(draft, [root]) + const decision = decideSkillLifecycle(draft, matches) + + expect(decision.type).toBe('replace') + const result = await applySkillLifecycleDecision(decision) + + expect(result.activePath).toBeDefined() + expect(result.archivedPath).toBeDefined() + }) +}) + +async function writeSkill( + name: string, + body: string, + origin: string | null = 'skill-learning', +): Promise { + const dir = join(root, name) + await mkdir(dir, { recursive: true }) + const originLine = origin !== null ? `origin: ${origin}\n` : '' + writeFileSync( + join(dir, 'SKILL.md'), + `---\nname: ${name}\ndescription: ${JSON.stringify(body)}\n${originLine}---\n\n# ${name}\n\n${body}\n`, + ) +} + +function draftSkill(name: string, text: string): LearnedSkillDraft { + return { + name, + description: text, + scope: 'project', + sourceInstinctIds: ['i1'], + confidence: 0.9, + content: `---\nname: ${name}\ndescription: ${JSON.stringify(text)}\n---\n\n# ${name}\n\n${text}\n`, + outputPath: join(root, name), + } +} diff --git a/src/services/skillLearning/__tests__/throttleAndCircuitBreaker.test.ts b/src/services/skillLearning/__tests__/throttleAndCircuitBreaker.test.ts new file mode 100644 index 000000000..7671fd9fa --- /dev/null +++ b/src/services/skillLearning/__tests__/throttleAndCircuitBreaker.test.ts @@ -0,0 +1,372 @@ +/** + * Unit tests for H5 (LLM call throttle), H6 (message watermark dedup), + * and H7 (circuit breaker) improvements. + */ +import { afterEach, beforeEach, describe, expect, test } from 'bun:test' +import { mkdtempSync, rmSync } from 'node:fs' +import { tmpdir } from 'node:os' +import { join } from 'node:path' + +import { + resetSkillLearningConfig, + setSkillLearningConfigForTest, +} from '../config.js' +import { resetCircuitBreaker } from '../llmObserverBackend.js' +import { + resetRuntimeLLMBookkeeping, + resetRuntimeObserverForTest, + runSkillLearningPostSampling, +} from '../runtimeObserver.js' +import type { REPLHookContext } from '../../../utils/hooks/postSamplingHooks.js' +import { + setActiveObserverBackend, + getActiveObserverBackend, + registerObserverBackend, + type ObserverBackend, +} from '../observerBackend.js' +import type { StoredSkillObservation } from '../observationStore.js' + +let root: string +let previousCwd: string +const originalEnv = { ...process.env } +const originalBackendName = getActiveObserverBackend().name + +function makeCtx( + messages: Array<{ uuid: string; content: string }>, +): REPLHookContext { + return { + querySource: 'repl_main_thread', + messages: messages.map(({ uuid, content }) => ({ + type: 'user' as const, + uuid: uuid as any, + message: { role: 'user' as const, content }, + })), + systemPrompt: [] as any, + userContext: {}, + systemContext: {}, + toolUseContext: { agentId: undefined } as any, + } +} + +function make5Msgs(prefix: string): Array<{ uuid: string; content: string }> { + return Array.from({ length: 5 }, (_, i) => ({ + uuid: `${prefix}-${i}`, + content: '不要 mock,用 testing-library', + })) +} + +function makeObs(count: number): StoredSkillObservation[] { + return Array.from({ length: count }, (_, i) => ({ + id: `o${i}`, + timestamp: new Date().toISOString(), + event: 'user_message' as const, + sessionId: 's1', + projectId: 'p1', + projectName: 'project', + cwd: '/tmp', + messageText: 'test message', + })) +} + +beforeEach(() => { + root = mkdtempSync(join(tmpdir(), 'skill-throttle-test-')) + previousCwd = process.cwd() + process.chdir(root) + process.env = { ...originalEnv } + process.env.CLAUDE_SKILL_LEARNING_HOME = join(root, 'learning-home') + process.env.CLAUDE_CONFIG_DIR = join(root, 'config') + process.env.SKILL_LEARNING_ENABLED = '1' + process.env.NODE_ENV = 'test' + resetRuntimeObserverForTest() + resetCircuitBreaker() + setActiveObserverBackend(originalBackendName) +}) + +afterEach(() => { + process.chdir(previousCwd) + process.env = { ...originalEnv } + resetSkillLearningConfig() + rmSync(root, { recursive: true, force: true }) + resetRuntimeObserverForTest() + resetCircuitBreaker() + setActiveObserverBackend(originalBackendName) +}) + +// --------------------------------------------------------------------------- +// H5: LLM throttle — minimum observation count gate +// --------------------------------------------------------------------------- +describe('H5: LLM call throttle', () => { + test('fewer than 5 observations routes to heuristic — LLM backend not called', async () => { + let llmCallCount = 0 + const trackingBackend: ObserverBackend = { + name: 'tracking-under5', + analyze() { + llmCallCount++ + return [] + }, + } + registerObserverBackend(trackingBackend) + setActiveObserverBackend('tracking-under5') + + // 3 messages → 3 observations, below the threshold of 5. + await runSkillLearningPostSampling( + makeCtx([ + { uuid: 'u5a', content: '不要 mock,用 testing-library' }, + { uuid: 'u5b', content: '不要 mock,用 testing-library' }, + { uuid: 'u5c', content: '不要 mock,用 testing-library' }, + ]), + ) + + expect(llmCallCount).toBe(0) + }) + + test('session cap: more calls than cap reaches heuristic fallback', async () => { + // Cap at 1 call, cooldown 0ms. + setSkillLearningConfigForTest({ + llm: { maxCallsPerSession: 1, cooldownMs: 0 }, + }) + + let llmCallCount = 0 + const trackingBackend: ObserverBackend = { + name: 'tracking-cap', + analyze() { + llmCallCount++ + return [] + }, + } + registerObserverBackend(trackingBackend) + setActiveObserverBackend('tracking-cap') + + // First call with 5 messages — reaches LLM. + await runSkillLearningPostSampling(makeCtx(make5Msgs('cap1'))) + expect(llmCallCount).toBe(1) + + // Second call with 5 different messages — cap hit, must NOT reach LLM. + await runSkillLearningPostSampling(makeCtx(make5Msgs('cap2'))) + expect(llmCallCount).toBe(1) + }) + + test('cooldown gate: second call within cooldown window skips LLM', async () => { + // Very long cooldown — second call is always within window. + setSkillLearningConfigForTest({ + llm: { cooldownMs: 999_999_000, maxCallsPerSession: 100 }, + }) + + let llmCallCount = 0 + const trackingBackend: ObserverBackend = { + name: 'tracking-cooldown', + analyze() { + llmCallCount++ + return [] + }, + } + registerObserverBackend(trackingBackend) + setActiveObserverBackend('tracking-cooldown') + + await runSkillLearningPostSampling(makeCtx(make5Msgs('cd1'))) + expect(llmCallCount).toBe(1) + + // Second call — still within 999999 second cooldown. + await runSkillLearningPostSampling(makeCtx(make5Msgs('cd2'))) + expect(llmCallCount).toBe(1) + }) + + test('resetRuntimeLLMBookkeeping resets session counter and timestamps', async () => { + setSkillLearningConfigForTest({ + llm: { maxCallsPerSession: 1, cooldownMs: 0 }, + }) + + let llmCallCount = 0 + const trackingBackend: ObserverBackend = { + name: 'tracking-reset', + analyze() { + llmCallCount++ + return [] + }, + } + registerObserverBackend(trackingBackend) + setActiveObserverBackend('tracking-reset') + + // First call reaches LLM; cap = 1, so second call is blocked. + await runSkillLearningPostSampling(makeCtx(make5Msgs('rr1'))) + await runSkillLearningPostSampling(makeCtx(make5Msgs('rr2'))) + expect(llmCallCount).toBe(1) + + // After reset the counter clears — next call reaches LLM again. + resetRuntimeLLMBookkeeping() + await runSkillLearningPostSampling(makeCtx(make5Msgs('rr3'))) + expect(llmCallCount).toBe(2) + }) +}) + +// --------------------------------------------------------------------------- +// H6: Message watermark dedup +// --------------------------------------------------------------------------- +describe('H6: message watermark dedup', () => { + test('same message uuids are not re-processed in a subsequent call', async () => { + // Use a backend that counts observations to detect dedup. + let totalObservations = 0 + const countingBackend: ObserverBackend = { + name: 'counting-dedup', + analyze(observations) { + totalObservations += observations.length + return [] + }, + } + registerObserverBackend(countingBackend) + setActiveObserverBackend('counting-dedup') + setSkillLearningConfigForTest({ + llm: { cooldownMs: 0, maxCallsPerSession: 100 }, + }) + + const messages = make5Msgs('ded') + + // First call: 5 new message observations. + await runSkillLearningPostSampling(makeCtx(messages)) + const afterFirst = totalObservations + + // Second call with SAME messages: all uuids already seen → 0 new + // observations from messages. The early `if (observations.length === 0) return` + // fires and the backend is never called. + await runSkillLearningPostSampling(makeCtx(messages)) + const afterSecond = totalObservations + + expect(afterSecond).toBe(afterFirst) + }) + + test('different message uuids are always processed', async () => { + let totalObservations = 0 + const countingBackend: ObserverBackend = { + name: 'counting-dedup-new', + analyze(observations) { + totalObservations += observations.length + return [] + }, + } + registerObserverBackend(countingBackend) + setActiveObserverBackend('counting-dedup-new') + setSkillLearningConfigForTest({ + llm: { cooldownMs: 0, maxCallsPerSession: 100 }, + }) + + await runSkillLearningPostSampling(makeCtx(make5Msgs('new1'))) + const afterFirst = totalObservations + + // Different uuids — all 5 new messages pass dedup. + await runSkillLearningPostSampling(makeCtx(make5Msgs('new2'))) + expect(totalObservations).toBeGreaterThan(afterFirst) + }) + + test('resetRuntimeLLMBookkeeping clears dedup set — same uuids reprocessed', async () => { + let totalObservations = 0 + const countingBackend: ObserverBackend = { + name: 'counting-dedup-clr', + analyze(observations) { + totalObservations += observations.length + return [] + }, + } + registerObserverBackend(countingBackend) + setActiveObserverBackend('counting-dedup-clr') + setSkillLearningConfigForTest({ + llm: { cooldownMs: 0, maxCallsPerSession: 100 }, + }) + + const messages = make5Msgs('clr') + await runSkillLearningPostSampling(makeCtx(messages)) + const afterFirst = totalObservations + + // After reset, dedup set is cleared — same messages are reprocessed. + resetRuntimeLLMBookkeeping() + await runSkillLearningPostSampling(makeCtx(messages)) + expect(totalObservations).toBeGreaterThan(afterFirst) + }) +}) + +// --------------------------------------------------------------------------- +// H7: Circuit breaker (tests the llmObserverBackend state machine directly) +// --------------------------------------------------------------------------- +describe('H7: circuit breaker', () => { + test('circuit opens after failure threshold and subsequent calls return heuristic result without hitting queryHaiku', async () => { + // In the test environment, queryHaiku will fail (no API key). We leverage + // that to trigger circuit breaker state via the real backend. We verify + // the circuit opens by checking that the backend returns [] (empty LLM + // output, falls through to heuristic) and by exercising resetCircuitBreaker. + + const { llmObserverBackend } = await import('../llmObserverBackend.js') + resetCircuitBreaker() + + setSkillLearningConfigForTest({ + llm: { failureThreshold: 3, circuitCooldownMs: 60_000 }, + }) + + const obs = makeObs(5) + + // 3 calls → each fails → 3rd failure opens circuit. + // All return heuristic fallback (possibly [] since obs have no message text + // that the heuristic would match against correction patterns, but the calls + // still go through the circuit). + await llmObserverBackend.analyze(obs) + await llmObserverBackend.analyze(obs) + await llmObserverBackend.analyze(obs) + + // Circuit is now open. Verify resetCircuitBreaker closes it by checking + // the module-level state: after reset the backend does not short-circuit + // immediately (it tries queryHaiku again, fails again, increments counter). + // We can observe this by calling resetCircuitBreaker and making another + // call — it will NOT short-circuit the queryHaiku attempt. + resetCircuitBreaker() + + // This call must reach queryHaiku (which fails → heuristic fallback) rather + // than short-circuit to heuristic from the open circuit. Either way the + // return value is an array — but the key is that resetCircuitBreaker works. + const result = await llmObserverBackend.analyze(obs) + expect(Array.isArray(result)).toBe(true) + }) + + test('circuit breaker env vars are respected', async () => { + // Verify that setting threshold to 1 opens circuit after the first failure. + const { llmObserverBackend } = await import('../llmObserverBackend.js') + resetCircuitBreaker() + + setSkillLearningConfigForTest({ + llm: { failureThreshold: 1, circuitCooldownMs: 60_000 }, + }) + + const obs = makeObs(5) + + // One failure — circuit should open. + await llmObserverBackend.analyze(obs) + + // The next call should be short-circuited. We can't easily observe this + // without mocking, but we can verify that after resetCircuitBreaker the + // state is clean and a call proceeds without crashing. + resetCircuitBreaker() + const result = await llmObserverBackend.analyze(obs) + expect(Array.isArray(result)).toBe(true) + }) + + test('empty observations bypass circuit breaker entirely', async () => { + const { llmObserverBackend } = await import('../llmObserverBackend.js') + resetCircuitBreaker() + + // Empty observations → short-circuit at top of analyseWithHaiku → [] + // regardless of circuit state. + const result = await llmObserverBackend.analyze([]) + expect(result).toEqual([]) + }) + + test('resetCircuitBreaker resets state to closed', async () => { + const { llmObserverBackend } = await import('../llmObserverBackend.js') + resetCircuitBreaker() + + // After reset, the backend is in clean state. Calling it with observations + // returns an array (either LLM result or heuristic fallback). + const result = await llmObserverBackend.analyze(makeObs(3)) + expect(Array.isArray(result)).toBe(true) + + resetCircuitBreaker() + const result2 = await llmObserverBackend.analyze(makeObs(3)) + expect(Array.isArray(result2)).toBe(true) + }) +}) diff --git a/src/services/skillLearning/__tests__/toolEventObserver.test.ts b/src/services/skillLearning/__tests__/toolEventObserver.test.ts new file mode 100644 index 000000000..a29023dfc --- /dev/null +++ b/src/services/skillLearning/__tests__/toolEventObserver.test.ts @@ -0,0 +1,196 @@ +import { afterEach, beforeEach, describe, expect, mock, test } from 'bun:test' +import { mkdtempSync, rmSync } from 'node:fs' +import { tmpdir } from 'node:os' +import { join } from 'node:path' +import { readObservations } from '../observationStore.js' +import { + hasToolHookObservationsForTurn, + pruneEmittedTurns, + recordToolComplete, + recordToolError, + recordToolStart, + recordUserCorrection, + resetToolHookBookkeeping, + resetToolHookDepsCache, + runToolCallWithSkillLearningHooks, +} from '../toolEventObserver.js' + +let rootDir: string + +beforeEach(() => { + rootDir = mkdtempSync(join(tmpdir(), 'skill-learning-tool-hook-')) + resetToolHookBookkeeping() + process.env.CLAUDE_SKILL_LEARNING_HOME = rootDir +}) + +afterEach(() => { + delete process.env.CLAUDE_SKILL_LEARNING_HOME + rmSync(rootDir, { recursive: true, force: true }) +}) + +function ctx() { + return { + sessionId: 'tool-hook-session', + turn: 1, + projectId: 'p1', + projectName: 'project', + cwd: rootDir, + project: { + projectId: 'p1', + projectName: 'project', + cwd: rootDir, + scope: 'project' as const, + source: 'global' as const, + storageDir: join(rootDir, 'projects', 'p1'), + }, + } +} + +describe('toolEventObserver', () => { + test('records tool_start with tool-hook source', async () => { + await recordToolStart(ctx(), 'Grep', { pattern: 'foo' }) + const observations = await readObservations({ + rootDir, + project: ctx().project, + }) + expect(observations).toHaveLength(1) + expect(observations[0]?.event).toBe('tool_start') + expect(observations[0]?.source).toBe('tool-hook') + expect(observations[0]?.toolName).toBe('Grep') + }) + + test('records tool_complete with success outcome', async () => { + await recordToolComplete(ctx(), 'Edit', 'ok', 'success') + const observations = await readObservations({ + rootDir, + project: ctx().project, + }) + expect(observations[0]?.event).toBe('tool_complete') + expect(observations[0]?.outcome).toBe('success') + }) + + test('records tool_error as tool_complete with failure outcome', async () => { + await recordToolError(ctx(), 'Bash', new Error('boom')) + const observations = await readObservations({ + rootDir, + project: ctx().project, + }) + expect(observations[0]?.outcome).toBe('failure') + }) + + test('records user correction message', async () => { + await recordUserCorrection(ctx(), '不要 mock,用 testing-library') + const observations = await readObservations({ + rootDir, + project: ctx().project, + }) + expect(observations[0]?.event).toBe('user_message') + expect(observations[0]?.messageText).toContain('testing-library') + }) + + test('tracks which session+turn has tool-hook observations', async () => { + expect(hasToolHookObservationsForTurn('tool-hook-session', 1)).toBe(false) + await recordToolStart(ctx(), 'Grep') + expect(hasToolHookObservationsForTurn('tool-hook-session', 1)).toBe(true) + expect(hasToolHookObservationsForTurn('tool-hook-session', 2)).toBe(false) + }) + + // H11: emittedTurns bounded memory tests + describe('pruneEmittedTurns', () => { + test('prunes Set entries exceeding SET_MAX keeping most recent', async () => { + const sessionId = 'big-session' + // Fill 501 turns (threshold is 500) + for (let i = 1; i <= 501; i++) { + await recordToolStart({ ...ctx(), sessionId, turn: i }, 'Grep') + } + // After pruning the Set should not exceed KEEP limit (250) + expect(hasToolHookObservationsForTurn(sessionId, 1)).toBe(false) // oldest pruned + expect(hasToolHookObservationsForTurn(sessionId, 501)).toBe(true) // newest kept + expect(hasToolHookObservationsForTurn(sessionId, 252)).toBe(true) // within keep window + }) + + test('prunes Map entries exceeding MAP_MAX keeping most recent insertions', async () => { + // Insert 51 distinct sessions (threshold is 50) + for (let i = 0; i < 51; i++) { + await recordToolStart( + { ...ctx(), sessionId: `session-${i}`, turn: 1 }, + 'Grep', + ) + } + // Oldest sessions should have been pruned from the Map + expect(hasToolHookObservationsForTurn('session-0', 1)).toBe(false) + // Most recent sessions should still be present + expect(hasToolHookObservationsForTurn('session-50', 1)).toBe(true) + }) + + test('pruneEmittedTurns is idempotent when within limits', async () => { + await recordToolStart(ctx(), 'Grep') + pruneEmittedTurns() + pruneEmittedTurns() + // Should not affect tracked turns within limits + expect(hasToolHookObservationsForTurn('tool-hook-session', 1)).toBe(true) + }) + }) + + // H10: fire-and-forget / flag-off tests + describe('runToolCallWithSkillLearningHooks', () => { + afterEach(() => { + resetToolHookDepsCache() + delete process.env.SKILL_LEARNING_ENABLED + }) + + test('invoke completes before recordToolStart promise resolves (fire-and-forget)', async () => { + process.env.SKILL_LEARNING_ENABLED = '1' + resetToolHookDepsCache() + + const completionOrder: string[] = [] + let resolveStart!: () => void + // A slow recordToolStart: promise that resolves only when we let it + const slowStartPromise = new Promise(res => { + resolveStart = res + }) + + // We spy on appendObservation by replacing the module's behaviour + // without mocking: we just verify timing via a flag + let invokeCompleted = false + + const result = await runToolCallWithSkillLearningHooks( + 'TestTool', + {}, + { sessionId: 'test-ff-session', turn: 99 }, + async () => { + // Short delay to let any awaited hooks run first (they must not) + await new Promise(res => setTimeout(res, 5)) + invokeCompleted = true + completionOrder.push('invoke') + return { data: 'done' } + }, + ) + + // The invoke result is returned immediately — observation may still be in-flight + expect(result).toEqual({ data: 'done' }) + expect(invokeCompleted).toBe(true) + }) + + test('flag off: wrapper skips observation entirely and returns invoke result', async () => { + process.env.SKILL_LEARNING_ENABLED = '0' + resetToolHookDepsCache() + + let invokeCalled = false + const result = await runToolCallWithSkillLearningHooks( + 'TestTool', + {}, + {}, + async () => { + invokeCalled = true + return { data: 42 } + }, + ) + expect(invokeCalled).toBe(true) + expect(result).toEqual({ data: 42 }) + // No observations should have been written + const obs = await readObservations({ rootDir, project: ctx().project }) + expect(obs).toHaveLength(0) + }) + }) +}) diff --git a/src/services/skillLearning/agentGenerator.ts b/src/services/skillLearning/agentGenerator.ts new file mode 100644 index 000000000..032180686 --- /dev/null +++ b/src/services/skillLearning/agentGenerator.ts @@ -0,0 +1,164 @@ +import { mkdir, writeFile } from 'node:fs/promises' +import { existsSync } from 'node:fs' +import { join } from 'node:path' +import { getClaudeConfigHomeDir } from '../../utils/envUtils.js' +import { clearCommandsCache } from '../../commands.js' +import type { Instinct } from './instinctParser.js' +import { normalizeSkillName } from './learningPolicy.js' +import type { SkillLearningScope } from './types.js' + +export type AgentGeneratorOptions = { + cwd?: string + globalAgentsDir?: string + outputRoot?: string + name?: string + description?: string + scope?: SkillLearningScope +} + +export type LearnedAgentDraft = { + name: string + description: string + scope: SkillLearningScope + sourceInstinctIds: string[] + confidence: number + content: string + outputPath: string +} + +export function generateAgentDraft( + instincts: Instinct[], + options?: AgentGeneratorOptions, +): LearnedAgentDraft { + if (instincts.length === 0) { + throw new Error('Cannot generate an agent draft without instincts') + } + + const scope = options?.scope ?? instincts[0]?.scope ?? 'project' + const rawName = options?.name ?? buildAgentName(instincts) + const name = normalizeSkillName(rawName) + const confidence = averageConfidence(instincts) + const description = options?.description ?? buildDescription(instincts) + const outputPath = getLearnedAgentPath(name, scope, options) + const content = buildAgentContent({ + name, + description, + confidence, + instincts, + }) + + return { + name, + description, + scope, + sourceInstinctIds: instincts.map(instinct => instinct.id), + confidence: Number(confidence.toFixed(2)), + content, + outputPath, + } +} + +export async function writeLearnedAgent( + draft: LearnedAgentDraft, +): Promise { + await mkdir(draft.outputPath, { recursive: true }) + const filePath = join(draft.outputPath, `${draft.name}.md`) + if (existsSync(filePath)) return filePath + await writeFile(filePath, draft.content, 'utf8') + clearCommandsCache() + return filePath +} + +export function getLearnedAgentPath( + _name: string, + scope: SkillLearningScope, + options?: AgentGeneratorOptions, +): string { + if (options?.outputRoot) return options.outputRoot + if (scope === 'project') { + return join(options?.cwd ?? process.cwd(), '.claude', 'agents') + } + return options?.globalAgentsDir ?? join(getClaudeConfigHomeDir(), 'agents') +} + +function buildAgentName(instincts: Instinct[]): string { + const words = extractWords(instincts, 4) + const name = ['learned', 'agent', ...words].join('-') + return normalizeSkillName(name) || 'learned-agent' +} + +function buildDescription(instincts: Instinct[]): string { + const trigger = instincts[0]?.trigger ?? 'Run the learned multi-step workflow' + return trigger.replace(/\s+/g, ' ').slice(0, 120) +} + +function buildAgentContent(params: { + name: string + description: string + confidence: number + instincts: Instinct[] +}): string { + const { name, description, confidence, instincts } = params + return [ + '---', + `name: ${name}`, + `description: ${JSON.stringify(description)}`, + 'origin: skill-learning', + `confidence: ${Number(confidence.toFixed(2))}`, + `evolved_from: [${instincts.map(instinct => JSON.stringify(instinct.id)).join(', ')}]`, + '---', + '', + `You are the ${name} learned agent.`, + '', + '## Triggers', + '', + instincts.map(instinct => `- ${instinct.trigger}`).join('\n'), + '', + '## Playbook', + '', + instincts.map(instinct => `- ${instinct.action}`).join('\n'), + '', + '## Evidence', + '', + instincts + .flatMap(instinct => instinct.evidence.map(evidence => `- ${evidence}`)) + .join('\n'), + '', + ].join('\n') +} + +function averageConfidence(instincts: Instinct[]): number { + return ( + instincts.reduce((sum, instinct) => sum + instinct.confidence, 0) / + instincts.length + ) +} + +function extractWords(instincts: Instinct[], max: number): string[] { + const stopWords = new Set([ + 'when', + 'with', + 'this', + 'that', + 'user', + 'asks', + 'for', + 'the', + 'and', + 'debug', + 'investigate', + 'research', + ]) + const words: string[] = [] + for (const instinct of instincts) { + for (const token of `${instinct.trigger} ${instinct.action}` + .toLowerCase() + .split(/[^a-z0-9]+/)) { + if (token.length > 2 && !stopWords.has(token) && !words.includes(token)) { + words.push(token) + } + if (words.length >= max) return words + } + } + return words +} diff --git a/src/services/skillLearning/commandGenerator.ts b/src/services/skillLearning/commandGenerator.ts new file mode 100644 index 000000000..fd6f19550 --- /dev/null +++ b/src/services/skillLearning/commandGenerator.ts @@ -0,0 +1,167 @@ +import { mkdir, writeFile } from 'node:fs/promises' +import { existsSync } from 'node:fs' +import { join } from 'node:path' +import { getClaudeConfigHomeDir } from '../../utils/envUtils.js' +import { clearCommandsCache } from '../../commands.js' +import type { Instinct } from './instinctParser.js' +import { normalizeSkillName } from './learningPolicy.js' +import type { SkillLearningScope } from './types.js' + +export type CommandGeneratorOptions = { + cwd?: string + globalCommandsDir?: string + outputRoot?: string + name?: string + description?: string + scope?: SkillLearningScope +} + +export type LearnedCommandDraft = { + name: string + description: string + scope: SkillLearningScope + sourceInstinctIds: string[] + confidence: number + content: string + outputPath: string +} + +export function generateCommandDraft( + instincts: Instinct[], + options?: CommandGeneratorOptions, +): LearnedCommandDraft { + if (instincts.length === 0) { + throw new Error('Cannot generate a command draft without instincts') + } + + const scope = options?.scope ?? instincts[0]?.scope ?? 'project' + const rawName = options?.name ?? buildCommandName(instincts) + const name = normalizeSkillName(rawName) + const confidence = averageConfidence(instincts) + const description = options?.description ?? buildDescription(instincts) + const outputPath = getLearnedCommandPath(name, scope, options) + const content = buildCommandContent({ + name, + description, + confidence, + instincts, + }) + + return { + name, + description, + scope, + sourceInstinctIds: instincts.map(instinct => instinct.id), + confidence: Number(confidence.toFixed(2)), + content, + outputPath, + } +} + +export async function writeLearnedCommand( + draft: LearnedCommandDraft, +): Promise { + await mkdir(draft.outputPath, { recursive: true }) + const filePath = join(draft.outputPath, `${draft.name}.md`) + if (existsSync(filePath)) return filePath + await writeFile(filePath, draft.content, 'utf8') + clearCommandsCache() + return filePath +} + +export function getLearnedCommandPath( + _name: string, + scope: SkillLearningScope, + options?: CommandGeneratorOptions, +): string { + if (options?.outputRoot) return options.outputRoot + if (scope === 'project') { + return join(options?.cwd ?? process.cwd(), '.claude', 'commands') + } + return ( + options?.globalCommandsDir ?? join(getClaudeConfigHomeDir(), 'commands') + ) +} + +function buildCommandName(instincts: Instinct[]): string { + const words = extractWords(instincts, 4) + const name = ['learned', ...words].join('-') + return normalizeSkillName(name) || 'learned-command' +} + +function buildDescription(instincts: Instinct[]): string { + const trigger = instincts[0]?.trigger ?? 'Reuse the learned workflow' + return trigger.replace(/\s+/g, ' ').slice(0, 120) +} + +function buildCommandContent(params: { + name: string + description: string + confidence: number + instincts: Instinct[] +}): string { + const { name, description, confidence, instincts } = params + return [ + '---', + `name: ${name}`, + `description: ${JSON.stringify(description)}`, + 'origin: skill-learning', + `confidence: ${Number(confidence.toFixed(2))}`, + `evolved_from: [${instincts.map(instinct => JSON.stringify(instinct.id)).join(', ')}]`, + '---', + '', + `# /${name}`, + '', + '## When to use', + '', + instincts.map(instinct => `- ${instinct.trigger}`).join('\n'), + '', + '## Steps', + '', + instincts.map(instinct => `- ${instinct.action}`).join('\n'), + '', + '## Evidence', + '', + instincts + .flatMap(instinct => instinct.evidence.map(evidence => `- ${evidence}`)) + .join('\n'), + '', + ].join('\n') +} + +function averageConfidence(instincts: Instinct[]): number { + return ( + instincts.reduce((sum, instinct) => sum + instinct.confidence, 0) / + instincts.length + ) +} + +function extractWords(instincts: Instinct[], max: number): string[] { + const stopWords = new Set([ + 'when', + 'with', + 'this', + 'that', + 'user', + 'asks', + 'for', + 'the', + 'and', + 'run', + 'use', + 'prefer', + 'avoid', + ]) + const words: string[] = [] + for (const instinct of instincts) { + for (const token of `${instinct.trigger} ${instinct.action}` + .toLowerCase() + .split(/[^a-z0-9]+/)) { + if (token.length > 2 && !stopWords.has(token) && !words.includes(token)) { + words.push(token) + } + if (words.length >= max) return words + } + } + return words +} diff --git a/src/services/skillLearning/config.ts b/src/services/skillLearning/config.ts new file mode 100644 index 000000000..0499eeb9b --- /dev/null +++ b/src/services/skillLearning/config.ts @@ -0,0 +1,52 @@ +export type SkillLearningLlmConfig = { + readonly timeoutMs: number + readonly maxCallsPerSession: number + readonly cooldownMs: number + readonly failureThreshold: number + readonly circuitCooldownMs: number +} + +export type SkillLearningConfig = { + readonly minConfidence: number + readonly minClusterSize: number + readonly llm: SkillLearningLlmConfig +} + +export type SkillLearningConfigOverrides = { + minConfidence?: number + minClusterSize?: number + llm?: Partial +} + +const DEFAULTS: SkillLearningConfig = { + minConfidence: 0.75, + minClusterSize: 3, + llm: { + timeoutMs: 10_000, + maxCallsPerSession: 20, + cooldownMs: 30_000, + failureThreshold: 3, + circuitCooldownMs: 60_000, + }, +} + +let overrides: SkillLearningConfigOverrides | undefined + +export function getSkillLearningConfig(): SkillLearningConfig { + if (!overrides) return DEFAULTS + return { + minConfidence: overrides.minConfidence ?? DEFAULTS.minConfidence, + minClusterSize: overrides.minClusterSize ?? DEFAULTS.minClusterSize, + llm: { ...DEFAULTS.llm, ...overrides.llm }, + } +} + +export function setSkillLearningConfigForTest( + config: SkillLearningConfigOverrides, +): void { + overrides = config +} + +export function resetSkillLearningConfig(): void { + overrides = undefined +} diff --git a/src/services/skillLearning/evolution.ts b/src/services/skillLearning/evolution.ts new file mode 100644 index 000000000..90f900e5e --- /dev/null +++ b/src/services/skillLearning/evolution.ts @@ -0,0 +1,174 @@ +import type { Instinct } from './instinctParser.js' +import { shouldGenerateSkillFromInstincts } from './learningPolicy.js' +import { + generateSkillDraft, + type SkillGeneratorOptions, +} from './skillGenerator.js' +import { + generateCommandDraft, + type CommandGeneratorOptions, + type LearnedCommandDraft, +} from './commandGenerator.js' +import { + generateAgentDraft, + type AgentGeneratorOptions, + type LearnedAgentDraft, +} from './agentGenerator.js' +import { getSkillLearningConfig } from './config.js' +import type { LearnedSkillDraft } from './types.js' + +export type EvolutionCandidate = { + target: 'skill' | 'command' | 'agent' + trigger: string + domain: string + instincts: Instinct[] + averageConfidence: number +} + +export type LearnedArtifactDraft = + | { kind: 'skill'; draft: LearnedSkillDraft } + | { kind: 'command'; draft: LearnedCommandDraft } + | { kind: 'agent'; draft: LearnedAgentDraft } + +export function clusterInstincts(instincts: Instinct[]): EvolutionCandidate[] { + const groups = new Map() + for (const instinct of instincts) { + if (instinct.status !== 'active' && instinct.status !== 'pending') continue + const key = `${instinct.domain}:${normalizedTrigger(instinct.trigger)}` + const group = groups.get(key) ?? [] + group.push(instinct) + groups.set(key, group) + } + + return Array.from(groups.values()) + .filter(group => { + // Require the cluster-size floor unconditionally. Single-shot + // high-confidence instincts previously bypassed this via the + // `|| confidence >= 0.8` OR, which let one message become a + // persistent policy — exactly the H15 risk the threshold guards + // against. Repeated independent observation is non-negotiable. + return group.length >= getSkillLearningConfig().minClusterSize + }) + .map(group => { + const averageConfidence = + group.reduce((sum, instinct) => sum + instinct.confidence, 0) / + group.length + return { + target: classifyEvolutionTarget(group), + trigger: group[0]?.trigger ?? 'learned pattern', + domain: group[0]?.domain ?? 'project', + instincts: group, + averageConfidence: Number(averageConfidence.toFixed(2)), + } + }) + .sort((a, b) => b.averageConfidence - a.averageConfidence) +} + +export function classifyEvolutionTarget( + instinctsOrCandidate: Instinct[] | EvolutionCandidate, +): 'skill' | 'command' | 'agent' { + const instincts = Array.isArray(instinctsOrCandidate) + ? instinctsOrCandidate + : instinctsOrCandidate.instincts + const text = instincts + .map(i => `${i.trigger} ${i.action}`) + .join(' ') + .toLowerCase() + if (/user asks|explicitly request|command|run /.test(text)) return 'command' + if ( + instincts.length >= 4 && + /(debug|investigate|research|multi-step)/.test(text) + ) { + return 'agent' + } + return 'skill' +} + +export function suggestEvolutions(instincts: Instinct[]): EvolutionCandidate[] { + return clusterInstincts(instincts) +} + +export function generateSkillCandidates( + instincts: Instinct[], + options?: SkillGeneratorOptions, +): LearnedSkillDraft[] { + return clusterInstincts(instincts) + .filter( + candidate => + candidate.target === 'skill' && + shouldGenerateSkillFromInstincts(candidate.instincts), + ) + .map(candidate => + generateSkillDraft(candidate.instincts, { + ...options, + scope: candidate.instincts[0]?.scope ?? 'project', + }), + ) +} + +export function generateCommandCandidates( + instincts: Instinct[], + options?: CommandGeneratorOptions, +): LearnedCommandDraft[] { + return clusterInstincts(instincts) + .filter( + candidate => + candidate.target === 'command' && + shouldGenerateSkillFromInstincts(candidate.instincts), + ) + .map(candidate => + generateCommandDraft(candidate.instincts, { + ...options, + scope: candidate.instincts[0]?.scope ?? 'project', + }), + ) +} + +export function generateAgentCandidates( + instincts: Instinct[], + options?: AgentGeneratorOptions, +): LearnedAgentDraft[] { + return clusterInstincts(instincts) + .filter( + candidate => + candidate.target === 'agent' && + shouldGenerateSkillFromInstincts(candidate.instincts), + ) + .map(candidate => + generateAgentDraft(candidate.instincts, { + ...options, + scope: candidate.instincts[0]?.scope ?? 'project', + }), + ) +} + +export function generateAllCandidates( + instincts: Instinct[], + options?: { + skill?: SkillGeneratorOptions + command?: CommandGeneratorOptions + agent?: AgentGeneratorOptions + }, +): LearnedArtifactDraft[] { + return [ + ...generateSkillCandidates(instincts, options?.skill).map( + (draft): LearnedArtifactDraft => ({ kind: 'skill', draft }), + ), + ...generateCommandCandidates(instincts, options?.command).map( + (draft): LearnedArtifactDraft => ({ kind: 'command', draft }), + ), + ...generateAgentCandidates(instincts, options?.agent).map( + (draft): LearnedArtifactDraft => ({ kind: 'agent', draft }), + ), + ] +} + +function normalizedTrigger(trigger: string): string { + return trigger + .toLowerCase() + .replace(/[^a-z0-9]+/g, ' ') + .split(/\s+/) + .filter(Boolean) + .slice(0, 6) + .join(' ') +} diff --git a/src/services/skillLearning/featureCheck.ts b/src/services/skillLearning/featureCheck.ts new file mode 100644 index 000000000..f67f17919 --- /dev/null +++ b/src/services/skillLearning/featureCheck.ts @@ -0,0 +1,12 @@ +import { feature } from 'bun:bundle' + +export function isSkillLearningEnabled(): boolean { + if (process.env.SKILL_LEARNING_ENABLED === '0') return false + if (process.env.SKILL_LEARNING_ENABLED === '1') return true + if (process.env.FEATURE_SKILL_LEARNING === '0') return false + if (process.env.FEATURE_SKILL_LEARNING === '1') return true + if (feature('SKILL_LEARNING')) { + return true + } + return false +} diff --git a/src/services/skillLearning/index.ts b/src/services/skillLearning/index.ts new file mode 100644 index 000000000..9d7900451 --- /dev/null +++ b/src/services/skillLearning/index.ts @@ -0,0 +1,37 @@ +export * from './featureCheck.js' +export * from './evolution.js' +export { + createInstinct, + parseInstinct, + serializeInstinct, +} from './instinctParser.js' +export * from './learningPolicy.js' +export { + exportInstincts, + importInstincts, + loadInstincts, + prunePendingInstincts, + saveInstinct, + updateConfidence, + upsertInstinct, +} from './instinctStore.js' +export { + appendObservation, + ingestTranscript, + readObservations, + scrubObservation, + scrubText, +} from './observationStore.js' +export * from './promotion.js' +export * from './projectContext.js' +export * from './runtimeObserver.js' +export * from './observerBackend.js' +export { llmObserverBackend } from './llmObserverBackend.js' +export * from './commandGenerator.js' +export * from './agentGenerator.js' +export * from './toolEventObserver.js' +export * from './sessionObserver.js' +export * from './skillGapStore.js' +export * from './skillGenerator.js' +export * from './skillLifecycle.js' +export * from './types.js' diff --git a/src/services/skillLearning/instinctParser.ts b/src/services/skillLearning/instinctParser.ts new file mode 100644 index 000000000..c61a49961 --- /dev/null +++ b/src/services/skillLearning/instinctParser.ts @@ -0,0 +1,115 @@ +import { createHash } from 'node:crypto' +import type { + SkillLearningProjectContext, + SkillLearningScope, + StoredSkillObservation, +} from './observationStore.js' +import type { Instinct as BaseInstinct, InstinctStatus } from './types.js' + +export type { Instinct } from './types.js' + +export type StoredInstinct = BaseInstinct & { + observationIds?: string[] +} + +export type InstinctCandidate = Omit< + StoredInstinct, + 'id' | 'createdAt' | 'updatedAt' | 'status' +> & { + id?: string + status?: InstinctStatus +} + +export function createInstinct( + candidate: InstinctCandidate, + now = new Date().toISOString(), +): StoredInstinct { + return normalizeInstinct({ + id: + candidate.id ?? + buildInstinctId(candidate.trigger, candidate.action, candidate.scope), + ...candidate, + createdAt: now, + updatedAt: now, + status: candidate.status ?? 'pending', + }) +} + +export function normalizeInstinct(instinct: StoredInstinct): StoredInstinct { + return { + ...instinct, + id: instinct.id || buildInstinctId(instinct.trigger, instinct.action), + confidence: clampConfidence(instinct.confidence), + evidence: Array.from(new Set(instinct.evidence.filter(Boolean))), + evidenceOutcome: instinct.evidenceOutcome, + observationIds: instinct.observationIds + ? Array.from(new Set(instinct.observationIds)) + : undefined, + } +} + +export function serializeInstinct(instinct: StoredInstinct): string { + return `${JSON.stringify(normalizeInstinct(instinct), null, 2)}\n` +} + +export function parseInstinct(content: string): StoredInstinct { + return normalizeInstinct(JSON.parse(content) as StoredInstinct) +} + +export function buildInstinctId( + trigger: string, + action: string, + scope: SkillLearningScope = 'project', +): string { + const slug = `${trigger} ${action}` + .toLowerCase() + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-|-$/g, '') + .slice(0, 48) + const hash = createHash('sha1') + .update(`${scope}\n${trigger}\n${action}`) + .digest('hex') + .slice(0, 10) + return `${slug || 'instinct'}-${hash}` +} + +export function candidateFromObservation( + observation: StoredSkillObservation, + project?: SkillLearningProjectContext, +): Partial { + return { + scope: project?.scope ?? 'project', + projectId: project?.projectId ?? observation.projectId, + projectName: project?.projectName ?? observation.projectName, + source: 'session-observation', + evidence: [ + observation.messageText ?? + observation.toolOutput ?? + observation.toolInput ?? + observation.toolName ?? + observation.id, + ], + observationIds: [observation.id], + } +} + +export function isContradictingInstinct( + existing: StoredInstinct, + incoming: StoredInstinct, +): boolean { + const existingTrigger = existing.trigger.toLowerCase() + const incomingTrigger = incoming.trigger.toLowerCase() + if (existingTrigger !== incomingTrigger) return false + + const existingAction = existing.action.toLowerCase() + const incomingAction = incoming.action.toLowerCase() + return ( + existingAction.includes('avoid') !== incomingAction.includes('avoid') || + existingAction.includes('prefer') !== incomingAction.includes('prefer') + ) +} + +export function clampConfidence(confidence: number): number { + if (Number.isNaN(confidence)) return 0 + return Math.max(0, Math.min(1, Number(confidence.toFixed(2)))) +} diff --git a/src/services/skillLearning/instinctStore.ts b/src/services/skillLearning/instinctStore.ts new file mode 100644 index 000000000..435d817d8 --- /dev/null +++ b/src/services/skillLearning/instinctStore.ts @@ -0,0 +1,258 @@ +import { + mkdir, + readFile, + readdir, + rename, + unlink, + writeFile, +} from 'node:fs/promises' +import { randomBytes } from 'node:crypto' +import { dirname, join } from 'node:path' +import { + getSkillLearningRoot, + type ObservationStoreOptions, + type SkillLearningProjectContext, + type SkillLearningScope, +} from './observationStore.js' +import { + clampConfidence, + isContradictingInstinct, + normalizeInstinct, + parseInstinct, + serializeInstinct, + type StoredInstinct, +} from './instinctParser.js' + +let upsertQueue: Promise = Promise.resolve() + +export type InstinctStoreOptions = ObservationStoreOptions & { + project?: SkillLearningProjectContext + scope?: SkillLearningScope +} + +export function getInstinctsDir(options?: InstinctStoreOptions): string { + const root = getSkillLearningRoot(options) + const project = options?.project + const scope = options?.scope ?? project?.scope ?? 'project' + + if (scope === 'global' || !project || project.projectId === 'global') { + return join(root, 'global', 'instincts', 'personal') + } + return join(root, 'projects', project.projectId, 'instincts', 'personal') +} + +export async function saveInstinct( + instinct: StoredInstinct, + options?: InstinctStoreOptions, +): Promise { + const normalized = normalizeInstinct(instinct) + const dir = getInstinctsDir(options) + await mkdir(dir, { recursive: true }) + const target = instinctPath(normalized.id, options) + const tmp = `${target}.${randomBytes(6).toString('hex')}.tmp` + await writeFile(tmp, serializeInstinct(normalized)) + await rename(tmp, target) + return normalized +} + +export async function loadInstincts( + options?: InstinctStoreOptions, +): Promise { + const dir = getInstinctsDir(options) + let files: string[] = [] + try { + files = await readdir(dir) + } catch (error) { + if ((error as NodeJS.ErrnoException).code === 'ENOENT') return [] + throw error + } + + const instincts: StoredInstinct[] = [] + for (const file of files.filter(file => file.endsWith('.json'))) { + const content = await readFile(join(dir, file), 'utf8') + instincts.push(parseInstinct(content)) + } + + return instincts.sort((a, b) => a.id.localeCompare(b.id)) +} + +export function upsertInstinct( + incoming: StoredInstinct, + options?: InstinctStoreOptions, +): Promise { + const result = upsertQueue.then(() => doUpsertInstinct(incoming, options)) + upsertQueue = result.catch(() => {}) + return result +} + +async function doUpsertInstinct( + incoming: StoredInstinct, + options?: InstinctStoreOptions, +): Promise { + const existing = await loadInstincts(options) + // Match by ID first; fall back to (same trigger + contradicting action) so + // that a contradictory instinct with a slightly different ID (differing + // action/scope) still merges and can drive the conflict-hold transition + // instead of silently accumulating as a separate record. + const match = + existing.find(instinct => instinct.id === incoming.id) ?? + existing.find( + instinct => + instinct.trigger.toLowerCase() === incoming.trigger.toLowerCase() && + isContradictingInstinct(instinct, incoming), + ) + const now = new Date().toISOString() + + if (!match) return saveInstinct(incoming, options) + + const contradiction = isContradictingInstinct(match, incoming) + const confidenceDelta = contradiction + ? -0.1 + : outcomeConfidenceDelta(incoming.evidenceOutcome) + const nextConfidence = clampConfidence(match.confidence + confidenceDelta) + const nextStatus = resolveNextStatus( + match.status, + nextConfidence, + contradiction, + ) + const merged = normalizeInstinct({ + ...match, + confidence: nextConfidence, + evidence: [...match.evidence, ...incoming.evidence], + evidenceOutcome: incoming.evidenceOutcome ?? match.evidenceOutcome, + observationIds: [ + ...(match.observationIds ?? []), + ...(incoming.observationIds ?? []), + ], + updatedAt: now, + status: nextStatus, + }) + + return saveInstinct(merged, options) +} + +function resolveNextStatus( + current: StoredInstinct['status'], + nextConfidence: number, + contradiction: boolean, +): StoredInstinct['status'] { + if (contradiction && nextConfidence < 0.3) return 'conflict-hold' + if (current === 'conflict-hold' && nextConfidence >= 0.5) return 'active' + if (current === 'pending' && nextConfidence >= 0.8) return 'active' + return current +} + +const DECAY_PER_WEEK = 0.02 +const MS_PER_WEEK = 7 * 24 * 60 * 60 * 1000 + +/** + * Apply time-based confidence decay to all instincts (ECC parity: -0.02/week). + * Only decays `pending` and `active` instincts; terminal states + * (stale/superseded/retired/archived/conflict-hold) do not decay. + */ +export async function decayInstinctConfidence( + options?: InstinctStoreOptions, +): Promise { + const instincts = await loadInstincts(options) + const now = Date.now() + let decayed = 0 + + for (const instinct of instincts) { + if (instinct.status !== 'pending' && instinct.status !== 'active') continue + const updatedAtMs = Date.parse(instinct.updatedAt) + if (Number.isNaN(updatedAtMs)) continue + const weeksElapsed = Math.floor((now - updatedAtMs) / MS_PER_WEEK) + if (weeksElapsed < 1) continue + + const delta = -DECAY_PER_WEEK * weeksElapsed + const nextConfidence = clampConfidence(instinct.confidence + delta) + if (nextConfidence === instinct.confidence) continue + + // Bump updatedAt so subsequent maintenance runs don't re-apply the same + // elapsed-week delta. + await saveInstinct( + normalizeInstinct({ + ...instinct, + confidence: nextConfidence, + updatedAt: new Date(now).toISOString(), + }), + options, + ) + decayed += 1 + } + + return decayed +} + +function outcomeConfidenceDelta( + outcome: StoredInstinct['evidenceOutcome'], +): number { + if (outcome === 'failure') return -0.05 + return 0.05 +} + +export async function updateConfidence( + instinctId: string, + delta: number, + options?: InstinctStoreOptions, +): Promise { + const instincts = await loadInstincts(options) + const target = instincts.find(instinct => instinct.id === instinctId) + if (!target) return null + + const updated = normalizeInstinct({ + ...target, + confidence: clampConfidence(target.confidence + delta), + updatedAt: new Date().toISOString(), + }) + return saveInstinct(updated, options) +} + +export async function exportInstincts( + outputPath: string, + options?: InstinctStoreOptions, +): Promise { + const instincts = await loadInstincts(options) + await mkdir(dirname(outputPath), { recursive: true }) + await writeFile(outputPath, `${JSON.stringify(instincts, null, 2)}\n`) + return instincts +} + +export async function importInstincts( + inputPath: string, + options?: InstinctStoreOptions, +): Promise { + const parsed = JSON.parse( + await readFile(inputPath, 'utf8'), + ) as StoredInstinct[] + const saved: StoredInstinct[] = [] + for (const instinct of parsed) { + saved.push(await upsertInstinct(normalizeInstinct(instinct), options)) + } + return saved +} + +export async function prunePendingInstincts( + maxAgeDays: number, + options?: InstinctStoreOptions, +): Promise { + const instincts = await loadInstincts(options) + const cutoff = Date.now() - maxAgeDays * 24 * 60 * 60 * 1000 + const pruned: StoredInstinct[] = [] + + for (const instinct of instincts) { + if ( + instinct.status === 'pending' && + Date.parse(instinct.updatedAt) < cutoff + ) { + await unlink(instinctPath(instinct.id, options)) + pruned.push(instinct) + } + } + + return pruned +} + +function instinctPath(id: string, options?: InstinctStoreOptions): string { + return join(getInstinctsDir(options), `${id}.json`) +} diff --git a/src/services/skillLearning/learningPolicy.ts b/src/services/skillLearning/learningPolicy.ts new file mode 100644 index 000000000..5064ec293 --- /dev/null +++ b/src/services/skillLearning/learningPolicy.ts @@ -0,0 +1,106 @@ +import { getSkillLearningConfig } from './config.js' +import type { Instinct } from './instinctParser.js' +import type { InstinctDomain, SkillLearningScope } from './types.js' + +export const MIN_CONFIDENCE_TO_GENERATE_SKILL = 0.75 +export const MAX_SKILL_NAME_LENGTH = 64 + +const DOMAIN_PREFIXES: Record = { + workflow: 'workflow', + testing: 'testing', + debugging: 'debugging', + 'code-style': 'style', + security: 'security', + git: 'git', + project: 'project', +} + +const GENERIC_NAMES = new Set([ + 'learned-skill', + 'better-skill', + 'new-skill', + 'project-skill', + 'workflow-skill', +]) + +export function shouldGenerateSkillFromInstincts( + instincts: readonly Instinct[], +): boolean { + if (instincts.length === 0) return false + const avg = + instincts.reduce((sum, instinct) => sum + instinct.confidence, 0) / + instincts.length + return avg >= getSkillLearningConfig().minConfidence +} + +export function buildLearnedSkillName(instincts: readonly Instinct[]): string { + const domain = instincts[0]?.domain ?? 'project' + const prefix = DOMAIN_PREFIXES[domain] + const words = new Set() + for (const instinct of instincts) { + for (const word of `${instinct.trigger} ${instinct.action}` + .toLowerCase() + .split(/[^a-z0-9]+/)) { + if (isUsefulNameWord(word)) words.add(word) + if (words.size >= 5) break + } + if (words.size >= 5) break + } + + const name = normalizeSkillName([prefix, ...words].join('-')) + return isGenericSkillName(name) ? `${prefix}-learned-pattern` : name +} + +export function normalizeSkillName(value: string): string { + const normalized = value + .toLowerCase() + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-|-$/g, '') + .slice(0, MAX_SKILL_NAME_LENGTH) + .replace(/-$/g, '') + return normalized || 'learned-skill' +} + +export function isValidLearnedSkillName(value: string): boolean { + return ( + value === normalizeSkillName(value) && + value.length > 0 && + value.length <= MAX_SKILL_NAME_LENGTH && + !isGenericSkillName(value) + ) +} + +export function isGenericSkillName(value: string): boolean { + return GENERIC_NAMES.has(value) +} + +export function decideDefaultScope( + instincts: readonly Instinct[], +): SkillLearningScope { + if (instincts.length === 0) return 'project' + const globalFriendly = instincts.every(instinct => + ['security', 'git', 'workflow'].includes(instinct.domain), + ) + return globalFriendly && instincts.length >= 2 ? 'global' : 'project' +} + +function isUsefulNameWord(word: string): boolean { + return ( + word.length > 2 && + ![ + 'when', + 'with', + 'this', + 'that', + 'user', + 'project', + 'prefer', + 'avoid', + 'use', + 'using', + 'the', + 'and', + 'for', + ].includes(word) + ) +} diff --git a/src/services/skillLearning/llmObserverBackend.ts b/src/services/skillLearning/llmObserverBackend.ts new file mode 100644 index 000000000..9e3d5def3 --- /dev/null +++ b/src/services/skillLearning/llmObserverBackend.ts @@ -0,0 +1,301 @@ +import { queryHaiku } from '../api/claude.js' +import { asSystemPrompt } from '../../utils/systemPromptType.js' +import { getSkillLearningConfig } from './config.js' +import type { InstinctCandidate } from './instinctParser.js' +import type { StoredSkillObservation } from './observationStore.js' +import type { + ObserverBackend, + ObserverBackendContext, +} from './observerBackend.js' +import { + INSTINCT_DOMAINS, + type InstinctDomain, + type SkillLearningScope, +} from './types.js' + +/** + * LLM-based observer backend. + * + * Runs the small fast model (Haiku) through the project's `queryHaiku` + * helper, feeds it a compact summary of recent observations, and asks for + * up to three atomic reusable instincts in JSON. Output is validated and + * mapped to `InstinctCandidate[]` so the existing evolution pipeline + * consumes LLM output the same way it consumes heuristic output. + * + * Design notes: + * - Reuses `queryHaiku` (goes through the full Claude Code API stack: + * OAuth, beta headers, providers, VCR in tests). No new auth code. + * - Caps input to the tail of the observation buffer so the prompt stays + * small and predictable, and runs under a 10-second abort signal so a + * slow Haiku round-trip never blocks the REPL turn end. + * - On ANY failure (abort, parse error, empty output) returns `[]` — + * the backend is opt-in via `SKILL_LEARNING_OBSERVER_BACKEND=llm` and + * must never destabilise skill-learning when the API is unavailable. + */ + +const MAX_OBSERVATIONS_PER_CALL = 30 +const MAX_CANDIDATES_PER_CALL = 3 + +// --- Circuit breaker state --- +let consecutiveFailures = 0 +let circuitOpenUntil = 0 + +export function resetCircuitBreaker(): void { + consecutiveFailures = 0 + circuitOpenUntil = 0 +} + +const LLM_OBSERVER_SYSTEM_PROMPT = `You analyse a short sequence of observations from a coding-assistant session (user messages, tool invocations with outcomes, assistant messages) and extract atomic, reusable "instincts" — behavioural patterns that would help the assistant act correctly in future similar situations. + +Respond with ONLY a JSON array (no prose, no code fences, no commentary). Each item must match this schema: + +{ + "trigger": string, // <= 80 chars, short phrase describing WHEN the instinct applies + "action": string, // <= 120 chars, short phrase describing WHAT to do + "confidence": number, // 0..1 — how strongly these observations support the pattern + "domain": "workflow"|"testing"|"debugging"|"code-style"|"security"|"git"|"project", + "scope": "project"|"global", + "evidence": string[] // 1..3 short excerpts copied/paraphrased from the observations +} + +Rules: +- Return [] if nothing clearly reusable. No guessing. +- At most 3 items, highest confidence first. +- confidence > 0.7 only when observations show the pattern in action (a correction followed by a successful retry, a repeated sequence, an explicit rule). +- Never include secrets, tokens, full file contents, or personally-identifying data. +- Scope "global" only when the pattern is obviously project-agnostic (generic testing, git hygiene); default to "project".` + +export const llmObserverBackend: ObserverBackend = { + name: 'llm', + analyze( + observations: StoredSkillObservation[], + ctx?: ObserverBackendContext, + ): Promise { + return analyseWithHaiku(observations, ctx) + }, +} + +async function analyseWithHaiku( + observations: StoredSkillObservation[], + ctx?: ObserverBackendContext, +): Promise { + if (observations.length === 0) return [] + + // Circuit breaker: if the circuit is open, skip queryHaiku entirely. + if (Date.now() < circuitOpenUntil) { + return runHeuristicFallback(observations, ctx) + } + + const capped = observations.slice(-MAX_OBSERVATIONS_PER_CALL) + const userPrompt = buildUserPrompt(capped) + const signal = makeTimeoutSignal(getSkillLearningConfig().llm.timeoutMs) + + let responseText: string + try { + const response = await queryHaiku({ + systemPrompt: asSystemPrompt([LLM_OBSERVER_SYSTEM_PROMPT]), + userPrompt, + signal, + options: { + querySource: 'skill_learning_observer', + enablePromptCaching: true, + agents: [], + isNonInteractiveSession: true, + hasAppendSystemPrompt: false, + mcpTools: [], + }, + }) + // Success: reset failure counter. + consecutiveFailures = 0 + responseText = extractResponseText(response.message?.content) + } catch { + // Haiku failure (timeout / rate limit / bad response) — increment failure + // counter and potentially open the circuit breaker. + consecutiveFailures++ + if (consecutiveFailures >= getSkillLearningConfig().llm.failureThreshold) { + circuitOpenUntil = + Date.now() + getSkillLearningConfig().llm.circuitCooldownMs + } + return runHeuristicFallback(observations, ctx) + } + + const parsed = parseInstinctCandidates(responseText, ctx, capped) + if (parsed.length === 0) { + // Empty / malformed LLM output — count as a failure so the circuit + // breaker opens if Haiku is systematically returning garbage (e.g. the + // model version drifted and no longer emits the expected JSON). + consecutiveFailures++ + if (consecutiveFailures >= getSkillLearningConfig().llm.failureThreshold) { + circuitOpenUntil = + Date.now() + getSkillLearningConfig().llm.circuitCooldownMs + } + return runHeuristicFallback(observations, ctx) + } + return parsed +} + +async function runHeuristicFallback( + observations: StoredSkillObservation[], + ctx?: ObserverBackendContext, +): Promise { + try { + const { heuristicObserverBackend } = await import('./sessionObserver.js') + const result = heuristicObserverBackend.analyze(observations, ctx) + return Array.isArray(result) ? result : await result + } catch { + return [] + } +} + +function buildUserPrompt(observations: StoredSkillObservation[]): string { + const rendered = observations + .map((observation, index) => renderObservation(observation, index)) + .join('\n') + return `Observations (chronological, newest last):\n${rendered}\n\nExtract up to ${MAX_CANDIDATES_PER_CALL} atomic instincts. JSON array only.` +} + +function renderObservation( + observation: StoredSkillObservation, + index: number, +): string { + const segments: string[] = [`#${index + 1}`, `event=${observation.event}`] + if (observation.toolName) segments.push(`tool=${observation.toolName}`) + if (observation.outcome) segments.push(`outcome=${observation.outcome}`) + if (observation.messageText) { + segments.push( + `text=${JSON.stringify(truncate(observation.messageText, 200))}`, + ) + } + if (observation.toolInput) { + segments.push(`in=${JSON.stringify(truncate(observation.toolInput, 120))}`) + } + if (observation.toolOutput) { + segments.push( + `out=${JSON.stringify(truncate(observation.toolOutput, 120))}`, + ) + } + return segments.join(' | ') +} + +function truncate(value: string, max: number): string { + if (value.length <= max) return value + return `${value.slice(0, max)}…` +} + +function extractResponseText(content: unknown): string { + if (!Array.isArray(content)) return '' + const parts: string[] = [] + for (const block of content) { + if (!block || typeof block !== 'object') continue + const record = block as Record + if (record.type !== 'text') continue + if (typeof record.text === 'string') parts.push(record.text) + } + return parts.join('').trim() +} + +function parseInstinctCandidates( + raw: string, + ctx: ObserverBackendContext | undefined, + observations: StoredSkillObservation[], +): InstinctCandidate[] { + const json = extractJsonArray(raw) + if (!json) return [] + + let parsed: unknown + try { + parsed = JSON.parse(json) + } catch { + return [] + } + if (!Array.isArray(parsed)) return [] + + const observationIds = observations.map(observation => observation.id) + const candidates: InstinctCandidate[] = [] + + for (const item of parsed.slice(0, MAX_CANDIDATES_PER_CALL)) { + const candidate = normaliseCandidate(item, ctx, observationIds) + if (candidate) candidates.push(candidate) + } + + return candidates +} + +function extractJsonArray(raw: string): string | undefined { + if (!raw) return undefined + const start = raw.indexOf('[') + const end = raw.lastIndexOf(']') + if (start < 0 || end <= start) return undefined + return raw.slice(start, end + 1) +} + +function normaliseCandidate( + item: unknown, + ctx: ObserverBackendContext | undefined, + observationIds: string[], +): InstinctCandidate | undefined { + if (!item || typeof item !== 'object') return undefined + const record = item as Record + + const trigger = stringField(record.trigger, 80) + const action = stringField(record.action, 120) + if (!trigger || !action) return undefined + + const evidence = evidenceField(record.evidence) + if (evidence.length === 0) return undefined + + return { + trigger, + action, + confidence: clampUnitInterval(record.confidence), + domain: domainField(record.domain), + source: 'session-observation', + scope: scopeField(record.scope), + projectId: ctx?.project?.projectId, + projectName: ctx?.project?.projectName, + evidence, + observationIds, + } +} + +function stringField(value: unknown, maxLength: number): string | undefined { + if (typeof value !== 'string') return undefined + const trimmed = value.trim() + if (!trimmed) return undefined + return trimmed.length > maxLength ? trimmed.slice(0, maxLength) : trimmed +} + +function clampUnitInterval(value: unknown): number { + if (typeof value !== 'number' || !Number.isFinite(value)) return 0.5 + if (value < 0) return 0 + if (value > 1) return 1 + return value +} + +function domainField(value: unknown): InstinctDomain { + if (typeof value !== 'string') return 'project' + return (INSTINCT_DOMAINS as readonly string[]).includes(value) + ? (value as InstinctDomain) + : 'project' +} + +function scopeField(value: unknown): SkillLearningScope { + return value === 'global' ? 'global' : 'project' +} + +function evidenceField(value: unknown): string[] { + if (!Array.isArray(value)) return [] + const entries: string[] = [] + for (const entry of value) { + if (typeof entry !== 'string') continue + const trimmed = entry.trim() + if (!trimmed) continue + entries.push(trimmed.length > 200 ? `${trimmed.slice(0, 200)}…` : trimmed) + if (entries.length === 3) break + } + return entries +} + +function makeTimeoutSignal(ms: number): AbortSignal { + return AbortSignal.timeout(ms) +} diff --git a/src/services/skillLearning/observationStore.ts b/src/services/skillLearning/observationStore.ts new file mode 100644 index 000000000..bf87d136e --- /dev/null +++ b/src/services/skillLearning/observationStore.ts @@ -0,0 +1,451 @@ +import { mkdir, readFile, rename, stat, writeFile } from 'node:fs/promises' +import { dirname, join } from 'node:path' +import { createHash, randomUUID } from 'node:crypto' +import type { + SkillLearningProjectContext as BaseSkillLearningProjectContext, + SkillLearningScope, + SkillObservation as BaseSkillObservation, + SkillObservationEvent, + SkillObservationOutcome, +} from './types.js' + +export type { SkillLearningScope, SkillObservation } from './types.js' + +export type SkillLearningProjectContext = Pick< + BaseSkillLearningProjectContext, + 'projectId' | 'projectName' | 'cwd' +> & + Partial< + Omit + > + +export type ObservationEvent = Exclude + +export type ObservationOutcome = SkillObservationOutcome | 'interrupted' + +export type StoredSkillObservation = Omit< + BaseSkillObservation, + 'event' | 'outcome' | 'toolInput' | 'toolOutput' +> & { + event: ObservationEvent + outcome?: ObservationOutcome + toolInput?: string + toolOutput?: string + toolName?: string + messageText?: string + source?: 'transcript' | 'hook' | 'tool-hook' | 'imported' + contentHash?: string + // Turn index at which the observation was captured. Used by + // runtimeObserver to scope tool-hook observations to the current REPL + // turn for scoping tool-hook records to the current REPL turn. + turn?: number +} + +export type ObservationStoreOptions = { + rootDir?: string + project?: SkillLearningProjectContext + maxFieldLength?: number + archiveThresholdBytes?: number +} + +type ClaudeTranscriptEntry = { + sessionId?: string + cwd?: string + timestamp?: string + type?: string + message?: { + role?: string + content?: unknown + } + tool_name?: string + tool_input?: unknown + tool_response?: unknown +} + +const DEFAULT_MAX_FIELD_LENGTH = 5_000 +const DEFAULT_ARCHIVE_THRESHOLD_BYTES = 1_000_000 +const DEFAULT_PURGE_MAX_AGE_DAYS = 30 +const SECRET_REPLACEMENT = '[REDACTED]' + +const SECRET_PATTERNS: RegExp[] = [ + /\b(?:sk|sk-ant|sk-proj|xox[baprs])-[A-Za-z0-9_-]{12,}\b/g, + /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g, + /\b(?:api[_-]?key|token|secret|password|authorization)\b\s*[:=]\s*["']?[^"',\s}]+/gi, + /\bBearer\s+[A-Za-z0-9._~+/=-]{12,}\b/gi, +] + +export function getSkillLearningRoot( + options?: ObservationStoreOptions, +): string { + if (options?.rootDir) return options.rootDir + if (process.env.CLAUDE_SKILL_LEARNING_HOME) { + return process.env.CLAUDE_SKILL_LEARNING_HOME + } + return join(process.env.HOME ?? process.cwd(), '.claude', 'skill-learning') +} + +export function getObservationFilePath( + options?: ObservationStoreOptions, +): string { + const root = getSkillLearningRoot(options) + const project = options?.project + if ( + !project || + project.scope === 'global' || + project.projectId === 'global' + ) { + return join(root, 'global', 'observations.jsonl') + } + return join(root, 'projects', project.projectId, 'observations.jsonl') +} + +export function scrubText( + value: string | undefined, + maxLength = DEFAULT_MAX_FIELD_LENGTH, +): string | undefined { + if (value === undefined) return undefined + + let scrubbed = value + for (const pattern of SECRET_PATTERNS) { + scrubbed = scrubbed.replace(pattern, match => { + const key = match.split(/[:=]/, 1)[0] + return /[:=]/.test(match) + ? `${key}: ${SECRET_REPLACEMENT}` + : SECRET_REPLACEMENT + }) + } + + if (scrubbed.length <= maxLength) return scrubbed + + const hash = hashText(scrubbed) + let preview = scrubbed.slice(0, maxLength) + if ( + scrubbed.includes(SECRET_REPLACEMENT) && + !preview.includes(SECRET_REPLACEMENT) + ) { + preview = `${SECRET_REPLACEMENT} ${preview}` + } + return `${preview}\n[TRUNCATED length=${scrubbed.length} sha256=${hash}]` +} + +export function scrubObservation( + observation: StoredSkillObservation, + options?: ObservationStoreOptions, +): StoredSkillObservation { + const maxLength = options?.maxFieldLength ?? DEFAULT_MAX_FIELD_LENGTH + const scrubbed: StoredSkillObservation = { + ...observation, + toolInput: scrubText(observation.toolInput, maxLength), + toolOutput: scrubText(observation.toolOutput, maxLength), + messageText: scrubText(observation.messageText, maxLength), + } + + const hashSource = [ + scrubbed.event, + scrubbed.toolName ?? '', + scrubbed.toolInput ?? '', + scrubbed.toolOutput ?? '', + scrubbed.messageText ?? '', + ].join('\n') + + return { + ...scrubbed, + contentHash: hashText(hashSource), + } +} + +const MAX_SINGLE_OBSERVATION_BYTES = 64 * 1024 + +export async function appendObservation( + observation: StoredSkillObservation, + options?: ObservationStoreOptions, +): Promise { + const filePath = getObservationFilePath(options) + await mkdir(dirname(filePath), { recursive: true }) + await archiveLargeObservationFile(options) + + const scrubbed = scrubObservation(observation, options) + const serialized = JSON.stringify(scrubbed) + if (Buffer.byteLength(serialized) > MAX_SINGLE_OBSERVATION_BYTES) { + return scrubbed + } + await writeFile(filePath, `${serialized}\n`, { + flag: 'a', + }) + return scrubbed +} + +export async function readObservations( + options?: ObservationStoreOptions, +): Promise { + const filePath = getObservationFilePath(options) + let content = '' + try { + content = await readFile(filePath, 'utf8') + } catch (error) { + if ((error as NodeJS.ErrnoException).code === 'ENOENT') return [] + throw error + } + + const observations: StoredSkillObservation[] = [] + for (const line of content.split(/\r?\n/)) { + if (!line.trim()) continue + try { + observations.push(JSON.parse(line) as StoredSkillObservation) + } catch { + // Skip corrupt/truncated JSONL lines (e.g. from concurrent append + // interleaved with a crash). One bad line must not break the whole read. + } + } + return observations +} + +export async function ingestTranscript( + transcriptPath: string, + options?: ObservationStoreOptions, +): Promise { + const transcript = await readFile(transcriptPath, 'utf8') + const observations: StoredSkillObservation[] = [] + + for (const line of transcript.split(/\r?\n/)) { + if (!line.trim()) continue + + const entry = JSON.parse(line) as ClaudeTranscriptEntry + for (const observation of observationsFromTranscriptEntry(entry, options)) { + observations.push(await appendObservation(observation, options)) + } + } + + return observations +} + +export async function purgeOldObservations( + options?: ObservationStoreOptions & { maxAgeDays?: number }, +): Promise { + const filePath = getObservationFilePath(options) + const maxAgeDays = options?.maxAgeDays ?? DEFAULT_PURGE_MAX_AGE_DAYS + const cutoff = Date.now() - maxAgeDays * 24 * 60 * 60 * 1000 + + let content = '' + try { + content = await readFile(filePath, 'utf8') + } catch (error) { + if ((error as NodeJS.ErrnoException).code === 'ENOENT') return 0 + throw error + } + + const kept: string[] = [] + let purged = 0 + for (const line of content.split(/\r?\n/)) { + if (!line.trim()) continue + try { + const obs = JSON.parse(line) as StoredSkillObservation + const ts = Date.parse(obs.timestamp) + if (!Number.isNaN(ts) && ts < cutoff) { + purged += 1 + continue + } + kept.push(line) + } catch { + kept.push(line) + } + } + + if (purged === 0) return 0 + // Atomic write: temp + rename. Direct writeFile leaves a truncated/empty + // file if the process crashes mid-write, losing retained observations. + const tmpPath = `${filePath}.tmp-${process.pid}-${Date.now()}` + await writeFile(tmpPath, kept.length ? `${kept.join('\n')}\n` : '') + await rename(tmpPath, filePath) + return purged +} + +export async function archiveLargeObservationFile( + options?: ObservationStoreOptions, +): Promise { + const filePath = getObservationFilePath(options) + const threshold = + options?.archiveThresholdBytes ?? DEFAULT_ARCHIVE_THRESHOLD_BYTES + + let currentStat + try { + currentStat = await stat(filePath) + } catch (error) { + if ((error as NodeJS.ErrnoException).code === 'ENOENT') return null + throw error + } + + if (currentStat.size < threshold) return null + + const archiveDir = join(dirname(filePath), 'observations.archive') + await mkdir(archiveDir, { recursive: true }) + const archivePath = join( + archiveDir, + `observations-${new Date().toISOString().replace(/[:.]/g, '-')}.jsonl`, + ) + await rename(filePath, archivePath) + return archivePath +} + +function observationsFromTranscriptEntry( + entry: ClaudeTranscriptEntry, + options?: ObservationStoreOptions, +): StoredSkillObservation[] { + const project = options?.project + const base = { + sessionId: entry.sessionId ?? 'unknown-session', + projectId: project?.projectId ?? 'global', + projectName: project?.projectName ?? 'global', + cwd: entry.cwd ?? project?.cwd ?? process.cwd(), + timestamp: entry.timestamp ?? new Date().toISOString(), + source: 'transcript' as const, + } + + const role = entry.message?.role ?? entry.type + const content = entry.message?.content + const observations: StoredSkillObservation[] = [] + + if (entry.tool_name) { + observations.push({ + ...base, + id: createObservationId(), + event: 'tool_complete', + toolName: entry.tool_name, + toolInput: stringifyField(entry.tool_input), + toolOutput: stringifyField(entry.tool_response), + outcome: inferOutcome(entry.tool_response), + }) + } + + if (role === 'user') { + const toolResults = extractToolResults(content) + if (toolResults.length > 0) { + for (const result of toolResults) { + observations.push({ + ...base, + id: createObservationId(), + event: 'tool_complete', + toolName: result.name, + toolOutput: result.output, + outcome: result.isError ? 'failure' : 'success', + }) + } + return observations + } + + observations.push({ + ...base, + id: createObservationId(), + event: 'user_message', + messageText: extractText(content), + }) + return observations + } + + if (role === 'assistant') { + const toolUses = extractToolUses(content) + for (const toolUse of toolUses) { + observations.push({ + ...base, + id: createObservationId(), + event: 'tool_start', + toolName: toolUse.name, + toolInput: toolUse.input, + }) + } + + const text = extractText(content) + if (text.trim()) { + observations.push({ + ...base, + id: createObservationId(), + event: 'assistant_message', + messageText: text, + }) + } + } + + return observations +} + +function extractText(content: unknown): string { + if (typeof content === 'string') return content + if (!Array.isArray(content)) return stringifyField(content) ?? '' + + return content + .map(part => { + if (typeof part === 'string') return part + if (!part || typeof part !== 'object') return '' + const record = part as Record + return typeof record.text === 'string' ? record.text : '' + }) + .filter(Boolean) + .join('\n') +} + +function extractToolUses( + content: unknown, +): Array<{ name: string; input: string | undefined }> { + if (!Array.isArray(content)) return [] + return content.flatMap(part => { + if (!part || typeof part !== 'object') return [] + const record = part as Record + if (record.type !== 'tool_use') return [] + return [ + { + name: String(record.name ?? 'unknown_tool'), + input: stringifyField(record.input), + }, + ] + }) +} + +function extractToolResults( + content: unknown, +): Array<{ name: string; output: string | undefined; isError: boolean }> { + if (!Array.isArray(content)) return [] + return content.flatMap(part => { + if (!part || typeof part !== 'object') return [] + const record = part as Record + if (record.type !== 'tool_result') return [] + return [ + { + name: String(record.name ?? record.tool_name ?? 'unknown_tool'), + output: stringifyField(record.content), + isError: record.is_error === true, + }, + ] + }) +} + +function inferOutcome(value: unknown): ObservationOutcome { + const text = stringifyField(value)?.toLowerCase() ?? '' + if (text.includes('interrupted') || text.includes('aborted')) { + return 'interrupted' + } + if ( + text.includes('error') || + text.includes('exception') || + text.includes('failed') + ) { + return 'failure' + } + return 'success' +} + +export function stringifyField(value: unknown): string | undefined { + if (value === undefined || value === null) return undefined + if (typeof value === 'string') return value + return JSON.stringify(value) +} + +function createObservationId(): string { + if (typeof crypto !== 'undefined' && 'randomUUID' in crypto) { + return crypto.randomUUID() + } + return randomUUID() +} + +function hashText(value: string): string { + return createHash('sha256').update(value).digest('hex') +} diff --git a/src/services/skillLearning/observerBackend.ts b/src/services/skillLearning/observerBackend.ts new file mode 100644 index 000000000..c2ec8284d --- /dev/null +++ b/src/services/skillLearning/observerBackend.ts @@ -0,0 +1,71 @@ +import type { InstinctCandidate } from './instinctParser.js' +import type { StoredSkillObservation } from './observationStore.js' +import type { SkillLearningProjectContext } from './types.js' + +export type ObserverBackendContext = { + project?: SkillLearningProjectContext +} + +export type ObserverBackendResult = + | InstinctCandidate[] + | Promise + +export interface ObserverBackend { + readonly name: string + analyze( + observations: StoredSkillObservation[], + ctx?: ObserverBackendContext, + ): ObserverBackendResult +} + +const registry = new Map() +let activeName: string | undefined + +export function registerObserverBackend(backend: ObserverBackend): void { + registry.set(backend.name, backend) + if (!activeName) activeName = backend.name +} + +export function setActiveObserverBackend(name: string): void { + if (!registry.has(name)) { + throw new Error(`Observer backend "${name}" is not registered`) + } + activeName = name +} + +export function getActiveObserverBackend(): ObserverBackend { + const backend = activeName ? registry.get(activeName) : undefined + if (!backend) { + throw new Error( + 'No observer backend is active — register one before analyzing observations', + ) + } + return backend +} + +export function listObserverBackends(): string[] { + return Array.from(registry.keys()) +} + +export function resetObserverBackendsForTest(): void { + registry.clear() + activeName = undefined +} + +export async function analyzeWithActiveBackend( + observations: StoredSkillObservation[], + ctx?: ObserverBackendContext, +): Promise { + return Promise.resolve(getActiveObserverBackend().analyze(observations, ctx)) +} + +function pickBackendFromEnv(): string | undefined { + const raw = process.env.SKILL_LEARNING_OBSERVER_BACKEND?.trim() + return raw && registry.has(raw) ? raw : undefined +} + +export function resolveDefaultObserverBackend(): ObserverBackend { + const preferred = pickBackendFromEnv() + if (preferred) setActiveObserverBackend(preferred) + return getActiveObserverBackend() +} diff --git a/src/services/skillLearning/projectContext.ts b/src/services/skillLearning/projectContext.ts new file mode 100644 index 000000000..a886cee6f --- /dev/null +++ b/src/services/skillLearning/projectContext.ts @@ -0,0 +1,264 @@ +import { execFileSync } from 'child_process' +import { createHash } from 'crypto' +import { + existsSync, + mkdirSync, + readFileSync, + realpathSync, + writeFileSync, +} from 'fs' +import { basename, join, resolve } from 'path' +import { getClaudeConfigHomeDir } from '../../utils/envUtils.js' +import type { + ProjectContextSource, + SkillLearningProjectContext, + SkillLearningProjectRecord, + SkillLearningProjectsRegistry, + SkillLearningScope, +} from './types.js' + +const REGISTRY_VERSION = 1 +const GLOBAL_PROJECT_ID = 'global' +const GLOBAL_PROJECT_NAME = 'Global' + +export function getSkillLearningRootDir(): string { + return join(getClaudeConfigHomeDir(), 'skill-learning') +} + +export function getProjectsRegistryPath(): string { + return join(getSkillLearningRootDir(), 'projects.json') +} + +export function getProjectStorageDir(projectId: string): string { + if (projectId === GLOBAL_PROJECT_ID) { + return join(getSkillLearningRootDir(), 'global') + } + return join(getSkillLearningRootDir(), 'projects', projectId) +} + +export function getProjectContextPath(projectId: string): string { + return join(getProjectStorageDir(projectId), 'project.json') +} + +// Per-cwd in-memory cache. `resolveContext` does synchronous `git` forks and +// `persistProjectContext` does registry/project.json writes on every call — +// in the tool.call hot path (one wrapper invocation per tool) that cost would +// accumulate into the hundreds-of-ms range per session. Cache keyed by the +// exact cwd string so different worktrees still get independent entries. +const contextCache = new Map() +const PERSIST_INTERVAL_MS = 5 * 60 * 1000 +let lastPersistAt = 0 + +export function resolveProjectContext( + cwd = process.cwd(), +): SkillLearningProjectContext { + const cached = contextCache.get(cwd) + if (cached) { + // Still touch the registry so long-lived processes keep `lastSeenAt` + // reasonably fresh, but throttle the write so it doesn't fire on every + // tool call. + const now = Date.now() + if (now - lastPersistAt > PERSIST_INTERVAL_MS) { + lastPersistAt = now + persistProjectContext(cached) + } + return cached + } + const resolved = resolveContext(cwd) + contextCache.set(cwd, resolved) + persistProjectContext(resolved) + lastPersistAt = Date.now() + return resolved +} + +export function resetProjectContextCacheForTest(): void { + contextCache.clear() + lastPersistAt = 0 +} + +export function listKnownProjects(): SkillLearningProjectRecord[] { + const registry = readProjectsRegistry(getProjectsRegistryPath()) + return Object.values(registry.projects).sort((a, b) => + a.projectName.localeCompare(b.projectName), + ) +} + +function resolveContext(cwd: string): SkillLearningProjectContext { + const envProjectDir = process.env.CLAUDE_PROJECT_DIR?.trim() + if (envProjectDir) { + const projectRoot = normalizePath(envProjectDir) + return buildContext({ + source: 'claude_project_dir', + scope: 'project', + cwd, + projectRoot, + identity: `claude-project-dir:${projectRoot}`, + projectName: basename(projectRoot) || 'project', + }) + } + + const gitRemote = git(['remote', 'get-url', 'origin'], cwd) + if (gitRemote) { + const projectRoot = git(['rev-parse', '--show-toplevel'], cwd) + const normalizedRemote = normalizeGitRemote(gitRemote) + return buildContext({ + source: 'git_remote', + scope: 'project', + cwd, + projectRoot: projectRoot + ? normalizePath(projectRoot) + : normalizePath(cwd), + gitRemote: normalizedRemote, + identity: `git-remote:${normalizedRemote}`, + projectName: projectNameFromRemote(normalizedRemote), + }) + } + + const gitRoot = git(['rev-parse', '--show-toplevel'], cwd) + if (gitRoot) { + const projectRoot = normalizePath(gitRoot) + return buildContext({ + source: 'git_root', + scope: 'project', + cwd, + projectRoot, + identity: `git-root:${projectRoot}`, + projectName: basename(projectRoot) || 'project', + }) + } + + return buildContext({ + source: 'global', + scope: 'global', + cwd, + projectRoot: undefined, + identity: 'global', + projectName: GLOBAL_PROJECT_NAME, + }) +} + +function buildContext(input: { + source: ProjectContextSource + scope: SkillLearningScope + cwd: string + projectRoot?: string + gitRemote?: string + identity: string + projectName: string +}): SkillLearningProjectContext { + const projectId = + input.scope === 'global' + ? GLOBAL_PROJECT_ID + : stableProjectId(input.identity) + return { + projectId, + projectName: input.projectName, + scope: input.scope, + source: input.source, + cwd: normalizePath(input.cwd), + projectRoot: input.projectRoot, + gitRemote: input.gitRemote, + storageDir: getProjectStorageDir(projectId), + } +} + +function persistProjectContext(context: SkillLearningProjectContext): void { + const now = new Date().toISOString() + const registryPath = getProjectsRegistryPath() + const registry = readProjectsRegistry(registryPath) + const existing = registry.projects[context.projectId] + const record: SkillLearningProjectRecord = { + ...context, + firstSeenAt: existing?.firstSeenAt ?? now, + lastSeenAt: now, + } + + registry.projects[context.projectId] = record + registry.updatedAt = now + + mkdirSync(context.storageDir, { recursive: true }) + mkdirSync(getSkillLearningRootDir(), { recursive: true }) + writeJson(registryPath, registry) + writeJson(getProjectContextPath(context.projectId), record) +} + +function readProjectsRegistry(path: string): SkillLearningProjectsRegistry { + if (!existsSync(path)) { + return { + version: REGISTRY_VERSION, + updatedAt: new Date(0).toISOString(), + projects: {}, + } + } + + try { + const parsed = JSON.parse( + readFileSync(path, 'utf8'), + ) as Partial + if ( + parsed.version === REGISTRY_VERSION && + typeof parsed.projects === 'object' && + parsed.projects + ) { + return { + version: REGISTRY_VERSION, + updatedAt: + typeof parsed.updatedAt === 'string' + ? parsed.updatedAt + : new Date(0).toISOString(), + projects: parsed.projects as Record, + } + } + } catch { + // Fall through to a fresh registry. Corrupt state should not block startup. + } + + return { + version: REGISTRY_VERSION, + updatedAt: new Date(0).toISOString(), + projects: {}, + } +} + +function writeJson(path: string, value: unknown): void { + writeFileSync(path, `${JSON.stringify(value, null, 2)}\n`, 'utf8') +} + +function git(args: string[], cwd: string): string | null { + try { + const output = execFileSync('git', ['-C', cwd, ...args], { + encoding: 'utf8', + stdio: ['ignore', 'pipe', 'ignore'], + }) + const trimmed = output.trim() + return trimmed ? trimmed : null + } catch { + return null + } +} + +function normalizePath(path: string): string { + const resolved = resolve(path) + try { + return realpathSync.native(resolved).normalize('NFC') + } catch { + return resolved.normalize('NFC') + } +} + +function normalizeGitRemote(remote: string): string { + let normalized = remote.trim().replace(/\\/g, '/') + normalized = normalized.replace(/\.git$/i, '') + normalized = normalized.replace(/\/+$/g, '') + return normalized.toLowerCase() +} + +function projectNameFromRemote(remote: string): string { + const match = remote.match(/[:/]([^/:]+?)(?:\.git)?$/) + return match?.[1] || 'project' +} + +function stableProjectId(identity: string): string { + const hash = createHash('sha256').update(identity).digest('hex').slice(0, 16) + return `project-${hash}` +} diff --git a/src/services/skillLearning/promotion.ts b/src/services/skillLearning/promotion.ts new file mode 100644 index 000000000..12fb2805e --- /dev/null +++ b/src/services/skillLearning/promotion.ts @@ -0,0 +1,161 @@ +import { readdir } from 'node:fs/promises' +import { existsSync } from 'node:fs' +import { join } from 'node:path' +import type { Instinct, StoredInstinct } from './instinctParser.js' +import { + getInstinctsDir, + loadInstincts, + saveInstinct, + type InstinctStoreOptions, +} from './instinctStore.js' +import { getSkillLearningRoot } from './observationStore.js' +import type { SkillLearningProjectContext } from './types.js' + +export type PromotionCandidate = { + instinctId: string + averageConfidence: number + projectIds: string[] +} + +export type PromotionOptions = { + rootDir?: string + minProjects?: number + minConfidence?: number +} + +const sessionPromotedIds = new Set() + +export function resetPromotionBookkeeping(): void { + sessionPromotedIds.clear() +} + +export function findPromotionCandidates( + instincts: Instinct[], + minProjects = 2, + minConfidence = 0.8, +): PromotionCandidate[] { + const grouped = new Map() + for (const instinct of instincts) { + if (instinct.scope !== 'project') continue + const group = grouped.get(instinct.id) ?? [] + group.push(instinct) + grouped.set(instinct.id, group) + } + + return Array.from(grouped.entries()).flatMap(([instinctId, group]) => { + const projectIds = Array.from( + new Set(group.map(instinct => instinct.projectId).filter(Boolean)), + ) as string[] + const averageConfidence = + group.reduce((sum, instinct) => sum + instinct.confidence, 0) / + group.length + if ( + projectIds.length >= minProjects && + averageConfidence >= minConfidence + ) { + return [ + { + instinctId, + projectIds, + averageConfidence: Number(averageConfidence.toFixed(2)), + }, + ] + } + return [] + }) +} + +export async function checkPromotion( + options: PromotionOptions = {}, +): Promise { + const minProjects = options.minProjects ?? 2 + const minConfidence = options.minConfidence ?? 0.8 + const allProjectInstincts = await loadAllProjectInstincts(options.rootDir) + + const candidates = findPromotionCandidates( + allProjectInstincts, + minProjects, + minConfidence, + ) + const promoted: PromotionCandidate[] = [] + + for (const candidate of candidates) { + if (sessionPromotedIds.has(candidate.instinctId)) continue + + const source = allProjectInstincts.find( + instinct => instinct.id === candidate.instinctId, + ) + if (!source) continue + + const globalInstinct: StoredInstinct = { + ...source, + scope: 'global', + projectId: undefined, + projectName: undefined, + confidence: candidate.averageConfidence, + updatedAt: new Date().toISOString(), + } + + const globalOptions: InstinctStoreOptions = { + rootDir: options.rootDir, + scope: 'global', + project: globalProjectContext(options.rootDir), + } + await saveInstinct(globalInstinct, globalOptions) + + sessionPromotedIds.add(candidate.instinctId) + promoted.push(candidate) + } + + return promoted +} + +async function loadAllProjectInstincts( + rootDir?: string, +): Promise { + const root = getSkillLearningRoot(rootDir ? { rootDir } : undefined) + const projectsRoot = join(root, 'projects') + if (!existsSync(projectsRoot)) return [] + + const entries = await readdir(projectsRoot, { withFileTypes: true }) + const instincts: StoredInstinct[] = [] + for (const entry of entries) { + if (!entry.isDirectory()) continue + const project: SkillLearningProjectContext = { + projectId: entry.name, + projectName: entry.name, + scope: 'project', + source: 'git_root', + cwd: projectsRoot, + storageDir: join(projectsRoot, entry.name), + } + const projectInstincts = await loadInstincts({ + rootDir, + project, + scope: 'project', + }) + instincts.push(...projectInstincts) + } + return instincts +} + +function globalProjectContext(rootDir?: string): SkillLearningProjectContext { + const root = getSkillLearningRoot(rootDir ? { rootDir } : undefined) + return { + projectId: 'global', + projectName: 'Global', + scope: 'global', + source: 'global', + cwd: root, + storageDir: join(root, 'global'), + } +} + +// Re-export for consumers that need to inspect the global instincts directory. +export function getGlobalInstinctsDir(rootDir?: string): string { + return getInstinctsDir({ + rootDir, + scope: 'global', + project: globalProjectContext(rootDir), + }) +} diff --git a/src/services/skillLearning/runtimeObserver.ts b/src/services/skillLearning/runtimeObserver.ts new file mode 100644 index 000000000..9796ad2a3 --- /dev/null +++ b/src/services/skillLearning/runtimeObserver.ts @@ -0,0 +1,386 @@ +import type { REPLHookContext } from '../../utils/hooks/postSamplingHooks.js' +import { registerPostSamplingHook } from '../../utils/hooks/postSamplingHooks.js' +import { getSkillLearningConfig } from './config.js' +import { isSkillLearningEnabled } from './featureCheck.js' +import { + appendObservation, + getSkillLearningRoot, + purgeOldObservations, + stringifyField, +} from './observationStore.js' +import { resolveProjectContext } from './projectContext.js' +import './sessionObserver.js' +import { createInstinct } from './instinctParser.js' +import { + analyzeWithActiveBackend, + resolveDefaultObserverBackend, +} from './observerBackend.js' +import { + decayInstinctConfidence, + loadInstincts, + prunePendingInstincts, + upsertInstinct, +} from './instinctStore.js' +import type { StoredSkillObservation } from './observationStore.js' +import type { Message } from '../../types/message.js' +import { + applySkillLifecycleDecision, + compareExistingArtifacts, + decideSkillLifecycle, +} from './skillLifecycle.js' +import { + generateAgentCandidates, + generateCommandCandidates, + clusterInstincts, +} from './evolution.js' +import { generateOrMergeSkillDraft } from './skillGenerator.js' +import { shouldGenerateSkillFromInstincts } from './learningPolicy.js' +import { writeLearnedCommand } from './commandGenerator.js' +import { writeLearnedAgent } from './agentGenerator.js' +import { readObservations } from './observationStore.js' +import { checkPromotion } from './promotion.js' +import { existsSync } from 'node:fs' +import { join } from 'node:path' +import { getClaudeConfigHomeDir } from '../../utils/envUtils.js' + +export const RUNTIME_SESSION_ID = 'runtime-session' + +let initialized = false +let runtimeTurn = 0 +// Timestamp watermark for consumed tool-hook observations — enables replay of +// only the records that arrived since the previous post-sampling pass. +let lastConsumedToolHookTimestamp = '' + +// --- H5: LLM call throttle --- +let llmCallsThisSession = 0 +let lastLlmCallTimestamp = 0 + +// --- H6: message watermark dedup --- +// Key: `${sessionId}:${messageId}` — prevents reprocessing the same message +// across repeated post-sampling calls in one REPL session. +const lastProcessedMessageIds = new Set() +const MAX_PROCESSED_IDS = 1000 +const TRIM_PROCESSED_IDS_TO = 500 + +export function resetRuntimeLLMBookkeeping(): void { + llmCallsThisSession = 0 + lastLlmCallTimestamp = 0 + lastProcessedMessageIds.clear() +} + +export function getRuntimeTurn(): number { + return runtimeTurn +} + +export function initSkillLearning(): void { + if (initialized) return + initialized = true + // Resolve the active observer backend from SKILL_LEARNING_OBSERVER_BACKEND + // env. Without this call the registry stays on whichever backend was + // registered first (heuristic) — which means the env switch would silently + // be a no-op in production. Swallow registry errors so a typo in the env + // variable can never crash startup. + try { + resolveDefaultObserverBackend() + } catch { + // No backend registered yet, or env points at unknown name — leave the + // registry in its existing state. + } + registerPostSamplingHook(runSkillLearningPostSampling) + // Fire-and-forget startup maintenance: ECC parity for confidence decay, + // observation purge, pending instinct prune. Errors are swallowed so that + // skill-learning maintenance never blocks CLI startup. + void runStartupMaintenance().catch(() => {}) +} + +async function runStartupMaintenance(): Promise { + if (!isSkillLearningEnabled()) return + if (process.env.CLAUDE_SKILL_LEARNING_DISABLE) return + const project = resolveProjectContext(process.cwd()) + const options = { project } + await Promise.allSettled([ + decayInstinctConfidence(options), + purgeOldObservations(options), + prunePendingInstincts(30, options), + ]) +} + +function isInsideSkillLearningStorage(cwd: string): boolean { + try { + const root = getSkillLearningRoot() + return cwd.startsWith(root) + } catch { + return false + } +} + +export async function runSkillLearningPostSampling( + context: REPLHookContext, +): Promise { + if (!isSkillLearningEnabled()) return + // Self-filter layers in order: env escape hatch, entrypoint (only main REPL + // thread — `startsWith` covers 'repl_main_thread:outputStyle:'), sub- + // agent skip, and a path guard that prevents feedback loops when the user + // hand-edits files inside the skill-learning storage directory itself. + if (process.env.CLAUDE_SKILL_LEARNING_DISABLE) return + if (!context.querySource?.startsWith('repl_main_thread')) return + if (context.toolUseContext.agentId) return + const cwd = process.cwd() + if (isInsideSkillLearningStorage(cwd)) return + + const project = resolveProjectContext(cwd) + const options = { project } + ++runtimeTurn + + const observations: StoredSkillObservation[] = [] + + // Always reconstruct from the REPL message stream — it is the only source + // that captures user prompts and assistant outcomes (tool-hook observations + // cover tool events only). + for (const observation of observationsFromMessages( + context.messages, + project, + )) { + observations.push(await appendObservation(observation, options)) + } + + // Additionally pull tool-hook observations that arrived since the last + // consumption watermark — deterministic records with precise outcomes. + const all = await readObservations(options) + const fresh = all.filter( + o => + o.source === 'tool-hook' && + o.sessionId === RUNTIME_SESSION_ID && + typeof o.timestamp === 'string' && + o.timestamp > lastConsumedToolHookTimestamp, + ) + observations.push(...fresh) + for (const o of fresh) { + if (o.timestamp > lastConsumedToolHookTimestamp) { + lastConsumedToolHookTimestamp = o.timestamp + } + } + + if (observations.length === 0) return + + // H5: throttle LLM calls — minimum observation count, per-session cap, and + // debounce interval. When any gate fires, fall back to heuristic directly. + const now = Date.now() + const minObservations = 5 + const { llm } = getSkillLearningConfig() + const shouldCallLLM = + observations.length >= minObservations && + llmCallsThisSession < llm.maxCallsPerSession && + now - lastLlmCallTimestamp >= llm.cooldownMs + + let candidates + if (shouldCallLLM) { + llmCallsThisSession++ + lastLlmCallTimestamp = now + candidates = await analyzeWithActiveBackend(observations, { project }) + } else { + // Fall back to the heuristic backend without consuming an LLM call. + const { heuristicObserverBackend } = await import('./sessionObserver.js') + const result = heuristicObserverBackend.analyze(observations, { project }) + candidates = Array.isArray(result) ? result : await result + } + + for (const candidate of candidates) { + await upsertInstinct(createInstinct(candidate), options) + } + + await autoEvolveLearnedSkills(options) +} + +export function resetRuntimeObserverForTest(): void { + runtimeTurn = 0 + lastConsumedToolHookTimestamp = '' + resetRuntimeLLMBookkeeping() +} + +async function autoEvolveLearnedSkills(options: { + project: ReturnType +}): Promise { + const instincts = await loadInstincts(options) + const cwd = process.cwd() + + const skillRoots = [ + join(cwd, '.claude', 'skills'), + join(getClaudeConfigHomeDir(), 'skills'), + ] + const skillClusters = clusterInstincts(instincts).filter( + candidate => + candidate.target === 'skill' && + shouldGenerateSkillFromInstincts(candidate.instincts), + ) + for (const cluster of skillClusters) { + const outcome = await generateOrMergeSkillDraft( + cluster.instincts, + { cwd, scope: cluster.instincts[0]?.scope ?? 'project' }, + skillRoots, + ) + if (outcome.action === 'append-evidence') continue + const draft = outcome.draft + if (existsSync(join(draft.outputPath, 'SKILL.md'))) continue + const existing = await compareExistingArtifacts('skill', draft, skillRoots) + const decision = decideSkillLifecycle(draft, existing) + await applySkillLifecycleDecision(decision) + } + + const commandDrafts = generateCommandCandidates(instincts, { cwd }) + for (const draft of commandDrafts) { + const roots = [ + join(cwd, '.claude', 'commands'), + join(getClaudeConfigHomeDir(), 'commands'), + ] + const existing = await compareExistingArtifacts('command', draft, roots) + if (existing.length > 0) continue + await writeLearnedCommand(draft) + } + + const agentDrafts = generateAgentCandidates(instincts, { cwd }) + for (const draft of agentDrafts) { + const roots = [ + join(cwd, '.claude', 'agents'), + join(getClaudeConfigHomeDir(), 'agents'), + ] + const existing = await compareExistingArtifacts('agent', draft, roots) + if (existing.length > 0) continue + await writeLearnedAgent(draft) + } + + await checkPromotion() +} + +function observationsFromMessages( + messages: Message[], + project: ReturnType, +): StoredSkillObservation[] { + const sessionId = RUNTIME_SESSION_ID + const base = { + sessionId, + projectId: project.projectId, + projectName: project.projectName, + cwd: project.cwd, + timestamp: new Date().toISOString(), + source: 'hook' as const, + } + + return messages.flatMap((message): StoredSkillObservation[] => { + // H6: watermark dedup — skip messages already processed in this session. + const msgKey = `${sessionId}:${String(message.uuid)}` + if (lastProcessedMessageIds.has(msgKey)) return [] + lastProcessedMessageIds.add(msgKey) + // FIFO truncation to keep the set bounded. Drop down to exactly + // TRIM_PROCESSED_IDS_TO entries (off-by-one fix: previously left size+1 + // because the subtraction didn't account for the just-added entry). + if (lastProcessedMessageIds.size > MAX_PROCESSED_IDS) { + const toDrop = lastProcessedMessageIds.size - TRIM_PROCESSED_IDS_TO + const iter = lastProcessedMessageIds.values() + for (let i = 0; i < toDrop; i++) { + const next = iter.next() + if (next.done) break + lastProcessedMessageIds.delete(next.value) + } + } + + if (message.type === 'user') { + const toolResults = toolResultsFromContent(message.message?.content) + if (toolResults.length > 0) { + return toolResults.map(result => ({ + ...base, + id: crypto.randomUUID(), + event: 'tool_complete', + toolName: result.toolName, + toolOutput: result.output, + outcome: result.isError ? 'failure' : 'success', + })) + } + const text = textFromContent(message.message?.content) + return text.trim() + ? [ + { + ...base, + id: crypto.randomUUID(), + event: 'user_message', + messageText: text, + }, + ] + : [] + } + + if (message.type === 'assistant') { + const toolUses = toolUsesFromContent(message.message?.content) + const text = textFromContent(message.message?.content) + return [ + ...toolUses.map(toolUse => ({ + ...base, + id: crypto.randomUUID(), + event: 'tool_start' as const, + toolName: toolUse.toolName, + toolInput: toolUse.input, + })), + ...(text.trim() + ? [ + { + ...base, + id: crypto.randomUUID(), + event: 'assistant_message' as const, + messageText: text, + }, + ] + : []), + ] + } + + return [] + }) +} + +function textFromContent(content: unknown): string { + if (typeof content === 'string') return content + if (!Array.isArray(content)) return '' + return content + .map(block => { + if (!block || typeof block !== 'object') return '' + const record = block as Record + return typeof record.text === 'string' ? record.text : '' + }) + .filter(Boolean) + .join('\n') +} + +function toolUsesFromContent( + content: unknown, +): Array<{ toolName: string; input?: string }> { + if (!Array.isArray(content)) return [] + return content.flatMap(block => { + if (!block || typeof block !== 'object') return [] + const record = block as Record + if (record.type !== 'tool_use') return [] + return [ + { + toolName: String(record.name ?? 'unknown_tool'), + input: stringifyField(record.input), + }, + ] + }) +} + +function toolResultsFromContent( + content: unknown, +): Array<{ toolName: string; output?: string; isError: boolean }> { + if (!Array.isArray(content)) return [] + return content.flatMap(block => { + if (!block || typeof block !== 'object') return [] + const record = block as Record + if (record.type !== 'tool_result') return [] + return [ + { + toolName: String(record.name ?? record.tool_name ?? 'unknown_tool'), + output: stringifyField(record.content), + isError: record.is_error === true, + }, + ] + }) +} diff --git a/src/services/skillLearning/sessionObserver.ts b/src/services/skillLearning/sessionObserver.ts new file mode 100644 index 000000000..08194a929 --- /dev/null +++ b/src/services/skillLearning/sessionObserver.ts @@ -0,0 +1,296 @@ +import type { StoredSkillObservation } from './observationStore.js' +import { + candidateFromObservation, + createInstinct, + type InstinctCandidate, + type StoredInstinct, +} from './instinctParser.js' +import type { InstinctDomain, SkillObservationOutcome } from './types.js' +import { + analyzeWithActiveBackend, + getActiveObserverBackend, + registerObserverBackend, + type ObserverBackend, + type ObserverBackendContext, +} from './observerBackend.js' +import { llmObserverBackend } from './llmObserverBackend.js' + +export type SessionObserverOptions = { + minRepeatedSequenceCount?: number +} + +const DEFAULT_MIN_REPEATED_SEQUENCE_COUNT = 2 + +export function heuristicAnalyze( + observations: StoredSkillObservation[], + options?: SessionObserverOptions, +): InstinctCandidate[] { + return [ + ...extractUserCorrections(observations), + ...extractToolErrorResolutions(observations), + ...extractRepeatedToolSequences(observations, options), + ...extractProjectConventions(observations), + ] +} + +export const heuristicObserverBackend: ObserverBackend = { + name: 'heuristic', + analyze( + observations: StoredSkillObservation[], + _ctx?: ObserverBackendContext, + ): InstinctCandidate[] { + return heuristicAnalyze(observations) + }, +} + +registerObserverBackend(heuristicObserverBackend) +registerObserverBackend(llmObserverBackend) + +export function analyzeObservations( + observations: StoredSkillObservation[], + options?: SessionObserverOptions, +): StoredInstinct[] { + const backend = getActiveObserverBackend() + const candidates = + backend.name === 'heuristic' + ? heuristicAnalyze(observations, options) + : ensureSyncCandidates(backend.analyze(observations)) + return candidates.map(candidate => createInstinct(candidate)) +} + +export async function analyzeObservationsAsync( + observations: StoredSkillObservation[], + ctx?: ObserverBackendContext, +): Promise { + const candidates = await analyzeWithActiveBackend(observations, ctx) + return candidates.map(candidate => createInstinct(candidate)) +} + +export const observeSession = analyzeObservations + +function ensureSyncCandidates( + result: InstinctCandidate[] | Promise, +): InstinctCandidate[] { + if (Array.isArray(result)) return result + throw new Error( + 'Active observer backend returned a Promise; use analyzeObservationsAsync instead', + ) +} + +function extractUserCorrections( + observations: StoredSkillObservation[], +): InstinctCandidate[] { + return observations.flatMap((observation, index) => { + if (observation.event !== 'user_message' || !observation.messageText) { + return [] + } + + const text = observation.messageText.trim() + const correction = parseCorrection(text) + if (!correction) return [] + + const base = candidateFromObservation(observation) + return [ + { + ...base, + trigger: correction.trigger, + action: correction.action, + confidence: 0.7, + domain: inferDomain(text), + source: 'session-observation', + scope: 'project', + evidence: [text], + evidenceOutcome: recentOutcomeBefore(observations, index), + observationIds: [observation.id], + }, + ] + }) +} + +function extractToolErrorResolutions( + observations: StoredSkillObservation[], +): InstinctCandidate[] { + const candidates: InstinctCandidate[] = [] + + for (let i = 0; i < observations.length; i++) { + const current = observations[i] + if (current.event !== 'tool_complete' || current.outcome !== 'failure') { + continue + } + + const laterSuccess = observations.slice(i + 1, i + 6).find(next => { + return ( + next.event === 'tool_complete' && + next.outcome === 'success' && + next.toolName === current.toolName + ) + }) + + if (!laterSuccess || !current.toolName) continue + + candidates.push({ + ...candidateFromObservation(current), + trigger: `When ${current.toolName} fails during this project`, + action: `Use the follow-up successful ${current.toolName} invocation as the resolution pattern before retrying blindly.`, + confidence: 0.5, + domain: 'debugging', + source: 'session-observation', + scope: 'project', + evidence: [ + current.toolOutput ?? `${current.toolName} failed`, + laterSuccess.toolOutput ?? `${laterSuccess.toolName} succeeded`, + ], + evidenceOutcome: 'success', + observationIds: [current.id, laterSuccess.id], + }) + } + + return candidates +} + +function extractRepeatedToolSequences( + observations: StoredSkillObservation[], + options?: SessionObserverOptions, +): InstinctCandidate[] { + const minCount = + options?.minRepeatedSequenceCount ?? DEFAULT_MIN_REPEATED_SEQUENCE_COUNT + const toolEvents = observations.filter( + observation => + observation.event === 'tool_start' || + observation.event === 'tool_complete', + ) + const names = toolEvents.map(observation => observation.toolName ?? '') + const sequence = ['Grep', 'Read', 'Edit'] + const matchedIds: string[] = [] + let count = 0 + + for (let i = 0; i <= names.length - sequence.length; i++) { + if (sequence.every((name, offset) => names[i + offset] === name)) { + count++ + matchedIds.push( + ...toolEvents.slice(i, i + sequence.length).map(o => o.id), + ) + } + } + + if (count < minCount) return [] + + const evidence = `Observed ${count} repeated Grep -> Read -> Edit workflow sequences.` + const first = toolEvents.find(event => matchedIds.includes(event.id)) + const lastMatchedId = matchedIds[matchedIds.length - 1] + const lastEvent = toolEvents.find(event => event.id === lastMatchedId) + const sequenceOutcome = + lastEvent?.event === 'tool_complete' ? lastEvent.outcome : undefined + + return [ + { + ...candidateFromObservation(first ?? observations[0]), + trigger: 'When changing code in this project', + action: + 'Prefer the Grep -> Read -> Edit workflow: locate symbols, inspect context, then apply the smallest edit.', + confidence: count >= 3 ? 0.65 : 0.5, + domain: 'workflow', + source: 'session-observation', + scope: 'project', + evidence: [evidence], + evidenceOutcome: normalizeOutcome(sequenceOutcome), + observationIds: Array.from(new Set(matchedIds)), + }, + ] +} + +function extractProjectConventions( + observations: StoredSkillObservation[], +): InstinctCandidate[] { + return observations.flatMap((observation, index) => { + if (observation.event !== 'user_message' || !observation.messageText) { + return [] + } + const text = observation.messageText.trim() + if (!/(项目约定|规范|必须|convention|always|must)/i.test(text)) { + return [] + } + + return [ + { + ...candidateFromObservation(observation), + trigger: 'When working in this project', + action: `Follow the project convention: ${text}`, + // Single occurrence gets 0.4 so it stays below the 0.75 promotion + // threshold. Promotion requires corroborating high-confidence evidence + // (e.g. two 0.4s still average 0.4 — other signals must raise the mean). + confidence: 0.4, + domain: 'project', + source: 'session-observation', + scope: 'project', + evidence: [text], + evidenceOutcome: recentOutcomeBefore(observations, index), + observationIds: [observation.id], + }, + ] + }) +} + +function recentOutcomeBefore( + observations: StoredSkillObservation[], + index: number, +): SkillObservationOutcome | undefined { + for (let i = index - 1; i >= 0; i--) { + const prior = observations[i] + if (prior.event !== 'tool_complete') continue + return normalizeOutcome(prior.outcome) + } + return undefined +} + +function normalizeOutcome( + outcome: StoredSkillObservation['outcome'], +): SkillObservationOutcome | undefined { + if (outcome === 'success' || outcome === 'failure' || outcome === 'unknown') { + return outcome + } + return undefined +} + +function parseCorrection( + text: string, +): { trigger: string; action: string } | null { + const noUsePattern = + /(?:不要|别|不应(?:该)?|不要再)\s*(?[^,,。.;;]+)[,,\s]*(?:用|使用|改用|应该用|要用)\s*(?[^,,。.;;]+)/i + const englishPattern = + /(?:do not|don't|avoid)\s+(?[^,.;]+)[,;\s]+(?:use|prefer)\s+(?[^,.;]+)/i + const shouldPattern = + /(?:你应该|应该先|must|should)\s*(?[^,,。.;;]+)/i + + const noUse = text.match(noUsePattern) ?? text.match(englishPattern) + if (noUse?.groups) { + const avoid = noUse.groups.avoid.trim() + const prefer = noUse.groups.prefer.trim() + return { + trigger: `When choosing between ${avoid} and ${prefer}`, + action: `Prefer ${prefer}; avoid ${avoid}.`, + } + } + + const should = text.match(shouldPattern) + if (should?.groups) { + const prefer = should.groups.prefer.trim() + return { + trigger: 'When this user gives a corrective instruction', + action: `Prefer this corrected action: ${prefer}.`, + } + } + + return null +} + +function inferDomain(text: string): InstinctDomain { + const lowered = text.toLowerCase() + if (/test|mock|testing-library|vitest|jest|bun test/.test(lowered)) { + return 'testing' + } + if (/git|commit|branch/.test(lowered)) return 'git' + if (/security|secret|token|password/.test(lowered)) return 'security' + if (/style|format|lint|naming/.test(lowered)) return 'code-style' + return 'project' +} diff --git a/src/services/skillLearning/skillGapStore.ts b/src/services/skillLearning/skillGapStore.ts new file mode 100644 index 000000000..04c4f323b --- /dev/null +++ b/src/services/skillLearning/skillGapStore.ts @@ -0,0 +1,499 @@ +import { existsSync } from 'node:fs' +import { mkdir, readFile, rename, writeFile } from 'node:fs/promises' +import { createHash } from 'node:crypto' +import { dirname, join } from 'node:path' +import type { SearchResult } from '../skillSearch/localSearch.js' +import { createInstinct, type StoredInstinct } from './instinctParser.js' +import { + getProjectStorageDir, + resolveProjectContext, +} from './projectContext.js' +import { generateSkillDraft, writeLearnedSkill } from './skillGenerator.js' +import type { + InstinctDomain, + SkillGapStatus, + SkillLearningProjectContext, +} from './types.js' + +export type SkillGapRecommendation = Pick< + SearchResult, + 'name' | 'description' | 'score' +> + +export type SkillGapMaterialization = + | { + type: 'draft' + name: string + skillPath: string + } + | { + type: 'active' + name: string + skillPath: string + } + +export type SkillGapRecord = { + key: string + prompt: string + count: number + draftHits: number + // Session IDs that have already contributed a draft hit for this gap — + // prevents one session from inflating `draftHits` beyond 1 and flipping the + // `draftHits >= 2` active-promotion gate by itself. + draftHitSessions: string[] + status: SkillGapStatus + sessionId: string + cwd: string + projectId: string + projectName: string + recommendations: SkillGapRecommendation[] + createdAt: string + updatedAt: string + draft?: SkillGapMaterialization + active?: SkillGapMaterialization +} + +// P0-2 hook: when outcome-aware observation lands, augment this with a +// lookup into observationStore for a matching `outcome: 'success'` tool_complete +// observation keyed by (sessionId, gap.key). Until then, draft promotion uses +// count/signal only. +const DRAFT_PROMOTION_COUNT = 2 +const ACTIVE_PROMOTION_COUNT = 4 +const ACTIVE_PROMOTION_DRAFT_HITS = 2 + +type SkillGapState = { + version: 1 + gaps: Record +} + +export type RecordSkillGapOptions = { + prompt: string + cwd?: string + sessionId?: string + recommendations?: SearchResult[] + project?: SkillLearningProjectContext + rootDir?: string +} + +export async function recordSkillGap( + options: RecordSkillGapOptions, +): Promise { + const prompt = options.prompt.trim() + if (!prompt) { + throw new Error('Cannot record an empty skill gap') + } + + const project = options.project ?? resolveProjectContext(options.cwd) + const state = await readSkillGapState(project, options.rootDir) + const key = buildSkillGapKey(prompt) + const now = new Date().toISOString() + const existing = state.gaps[key] + + const gap: SkillGapRecord = { + key, + prompt, + count: (existing?.count ?? 0) + 1, + draftHits: existing?.draftHits ?? 0, + draftHitSessions: existing?.draftHitSessions ?? [], + status: existing?.status ?? 'pending', + sessionId: options.sessionId ?? 'unknown-session', + cwd: options.cwd ?? project.cwd, + projectId: project.projectId, + projectName: project.projectName, + recommendations: (options.recommendations ?? []).slice(0, 5).map(r => ({ + name: r.name, + description: r.description, + score: r.score, + })), + createdAt: existing?.createdAt ?? now, + updatedAt: now, + draft: existing?.draft, + active: existing?.active, + } + + if (gap.status === 'rejected') { + state.gaps[key] = gap + await writeSkillGapState(project, state, options.rootDir) + return gap + } + + if (!gap.draft && shouldPromoteToDraft(gap)) { + gap.draft = await writeSkillGapDraft(gap, project) + gap.status = 'draft' + await clearRuntimeSkillCaches() + } + + if (gap.draft && !gap.active && shouldPromoteToActive(gap)) { + gap.active = await writeActiveSkillForGap(gap, project) + gap.status = 'active' + await clearRuntimeSkillCaches() + } + + state.gaps[key] = gap + await writeSkillGapState(project, state, options.rootDir) + return gap +} + +export async function readSkillGaps( + project = resolveProjectContext(), + rootDir?: string, +): Promise { + const state = await readSkillGapState(project, rootDir) + return Object.values(state.gaps).sort((a, b) => a.key.localeCompare(b.key)) +} + +export async function findGapKeyByDraftPath( + draftPath: string, + project = resolveProjectContext(), + rootDir?: string, +): Promise { + const state = await readSkillGapState(project, rootDir) + for (const gap of Object.values(state.gaps)) { + if (gap.draft?.skillPath === draftPath) return gap.key + } + return undefined +} + +export async function recordDraftHit( + key: string, + project = resolveProjectContext(), + rootDir?: string, + sessionId = 'unknown-session', +): Promise { + const state = await readSkillGapState(project, rootDir) + const gap = state.gaps[key] + if (!gap || !gap.draft || gap.active) return gap + // One draft hit per session: a single actor reloading the same draft + // repeatedly must not flip the draftHits>=2 gate. + const existingSessions = gap.draftHitSessions ?? [] + if (existingSessions.includes(sessionId)) return gap + const now = new Date().toISOString() + const updated: SkillGapRecord = { + ...gap, + draftHits: gap.draftHits + 1, + draftHitSessions: [...existingSessions, sessionId], + updatedAt: now, + } + + if (shouldPromoteToActive(updated)) { + updated.active = await writeActiveSkillForGap(updated, project) + updated.status = 'active' + await clearRuntimeSkillCaches() + } + + state.gaps[key] = updated + await writeSkillGapState(project, state, rootDir) + return updated +} + +export async function promoteGapToDraft( + key: string, + project = resolveProjectContext(), + rootDir?: string, +): Promise { + const state = await readSkillGapState(project, rootDir) + const gap = state.gaps[key] + if (!gap) return undefined + if (gap.status === 'rejected') return gap + if (gap.draft) return gap + const updated: SkillGapRecord = { + ...gap, + draft: await writeSkillGapDraft(gap, project), + status: 'draft', + updatedAt: new Date().toISOString(), + } + state.gaps[key] = updated + await writeSkillGapState(project, state, rootDir) + await clearRuntimeSkillCaches() + return updated +} + +export async function rejectSkillGap( + key: string, + project = resolveProjectContext(), + rootDir?: string, +): Promise { + const state = await readSkillGapState(project, rootDir) + const gap = state.gaps[key] + if (!gap) return undefined + const updated: SkillGapRecord = { + ...gap, + status: 'rejected', + updatedAt: new Date().toISOString(), + } + state.gaps[key] = updated + await writeSkillGapState(project, state, rootDir) + return updated +} + +export function shouldPromoteToDraft(gap: SkillGapRecord): boolean { + // Draft promotion now requires repeated occurrence. The legacy + // `isStrongReusableSignal` path was the cause of single-utterance Chinese + // exhortations being promoted straight to active — P0-2 will reintroduce + // outcome-aware signal once the observation layer supplies it. + return gap.count >= DRAFT_PROMOTION_COUNT +} + +export function shouldPromoteToActive(gap: SkillGapRecord): boolean { + if (!gap.draft) return false + return ( + gap.count >= ACTIVE_PROMOTION_COUNT || + gap.draftHits >= ACTIVE_PROMOTION_DRAFT_HITS + ) +} + +async function writeSkillGapDraft( + gap: SkillGapRecord, + project: SkillLearningProjectContext, +): Promise { + const instinct = createGapInstinct(gap, 'pending') + const draftsRoot = join( + project.projectRoot ?? project.cwd, + '.claude', + 'skills', + '.drafts', + ) + const draft = generateSkillDraft([instinct], { + cwd: project.projectRoot ?? project.cwd, + outputRoot: draftsRoot, + scope: 'project', + name: `draft-${buildNameFragment(gap.prompt)}`, + description: + 'Draft learned skill candidate. Promote after repeated evidence or explicit user correction.', + }) + const skillFile = join(draft.outputPath, 'SKILL.md') + if (!existsSync(skillFile)) { + await writeLearnedSkill({ + ...draft, + content: + draft.content + + '\n## Promotion Rule\n\nDo not move this draft into active skills until the same gap repeats or the user explicitly confirms this should become reusable.\n', + }) + } + return { type: 'draft', name: draft.name, skillPath: skillFile } +} + +async function writeActiveSkillForGap( + gap: SkillGapRecord, + project: SkillLearningProjectContext, +): Promise { + const instinct = createGapInstinct(gap, 'active') + const draft = generateSkillDraft([instinct], { + cwd: project.projectRoot ?? project.cwd, + scope: 'project', + name: buildNameFragment(gap.prompt), + description: buildGapAction(gap.prompt), + }) + const skillFile = join(draft.outputPath, 'SKILL.md') + if (!existsSync(skillFile)) { + await writeLearnedSkill(draft) + } + return { type: 'active', name: draft.name, skillPath: skillFile } +} + +function createGapInstinct( + gap: SkillGapRecord, + status: StoredInstinct['status'], +): StoredInstinct { + return createInstinct({ + trigger: `When the user asks for ${summarize(gap.prompt, 120)}`, + action: buildGapAction(gap.prompt), + confidence: status === 'active' ? 0.82 : 0.55, + domain: inferDomain(gap.prompt), + source: 'session-observation', + scope: 'project', + projectId: gap.projectId, + projectName: gap.projectName, + evidence: [ + `Skill gap prompt: ${summarize(gap.prompt, 180)}`, + `No high-confidence active skill was auto-loaded.`, + `Observed ${gap.count} time(s).`, + ], + status, + }) +} + +function buildGapAction(prompt: string): string { + if ( + /feature\s*\(|feature flag|flag_name|stub|no-op|noop|最小实现/i.test(prompt) + ) { + return 'Audit feature flags by scanning feature() call sites, excluding generated/dependency noise, classifying each candidate as stub, shell, MVP, or thin-toggle, and writing an evidence-backed document.' + } + if (/skill|技能|学习|进化|evolve|learning/i.test(prompt)) { + return 'Run skill discovery first; auto-load only high-confidence matching skills; record a skill gap when none match; promote repeated or corrected gaps into learned skills.' + } + if (/test|测试|stub|调用链|参数/i.test(prompt)) { + return 'Infer tests from existing files, parameters, exports, and call chains before simplifying mocks or inventing behavior.' + } + return `Reuse the workflow learned from this prompt: ${summarize(prompt, 180)}.` +} + +function inferDomain(prompt: string): InstinctDomain { + const text = prompt.toLowerCase() + if (/test|测试|stub|fixture|断言/.test(text)) return 'testing' + if (/error|bug|fix|失败|错误|修复|debug/.test(text)) return 'debugging' + if (/security|安全|漏洞|secret|token/.test(text)) return 'security' + if (/git|commit|branch|pr\b/.test(text)) return 'git' + if (/style|lint|format|命名|规范/.test(text)) return 'code-style' + return 'workflow' +} + +async function readSkillGapState( + project: SkillLearningProjectContext, + rootDir?: string, +): Promise { + const path = getSkillGapStatePath(project, rootDir) + let raw: string + try { + raw = await readFile(path, 'utf8') + } catch (error) { + // Only treat "file doesn't exist yet" as empty state. Every other error + // (EACCES, EIO, disk full, etc.) must throw — swallowing them here would + // let a subsequent write persist {} and zero out all gap records. + if ((error as NodeJS.ErrnoException).code === 'ENOENT') { + return { version: 1, gaps: {} } + } + throw error + } + try { + return migrateLegacyGapState(JSON.parse(raw) as SkillGapState) + } catch { + // Corrupt/truncated JSON — don't silently reset. Backup and start fresh, + // so the crash isn't masked and the data can be recovered manually. + const backup = `${path}.corrupt-${Date.now()}` + try { + await writeFile(backup, raw, 'utf8') + } catch { + /* best effort */ + } + return { version: 1, gaps: {} } + } +} + +function migrateLegacyGapState(state: SkillGapState): SkillGapState { + const migrated: Record = {} + for (const [key, record] of Object.entries(state.gaps ?? {})) { + const legacy = record as Partial & { + status?: unknown + } + const draftHits = + typeof legacy.draftHits === 'number' && Number.isFinite(legacy.draftHits) + ? legacy.draftHits + : 0 + const count = typeof legacy.count === 'number' ? legacy.count : 1 + const normalizedStatus = normalizeLegacyStatus(legacy.status) + const hasDraftFile = Boolean(legacy.draft) + const hasActiveFile = Boolean(legacy.active) + + let status: SkillGapStatus = normalizedStatus + if (status === 'draft' && count < DRAFT_PROMOTION_COUNT && !hasDraftFile) { + // Legacy first-call-writes-draft artifact with no file on disk yet. + status = 'pending' + } + if (status === 'active' && !hasActiveFile) { + status = hasDraftFile ? 'draft' : 'pending' + } + + const draftHitSessions = Array.isArray(legacy.draftHitSessions) + ? legacy.draftHitSessions.filter( + (session): session is string => typeof session === 'string', + ) + : [] + migrated[key] = { + ...(record as SkillGapRecord), + count, + draftHits, + draftHitSessions, + status, + } + } + return { version: 1, gaps: migrated } +} + +function normalizeLegacyStatus(value: unknown): SkillGapStatus { + if ( + value === 'pending' || + value === 'draft' || + value === 'active' || + value === 'rejected' + ) { + return value + } + return 'pending' +} + +async function writeSkillGapState( + project: SkillLearningProjectContext, + state: SkillGapState, + rootDir?: string, +): Promise { + const path = getSkillGapStatePath(project, rootDir) + await mkdir(dirname(path), { recursive: true }) + // Atomic write: temp + rename. A direct writeFile leaves a truncated file + // on crash mid-write; combined with the (now strict) readSkillGapState, + // that would lose gap records. + const tmpPath = `${path}.tmp-${process.pid}-${Date.now()}` + await writeFile(tmpPath, `${JSON.stringify(state, null, 2)}\n`, 'utf8') + await rename(tmpPath, path) +} + +function getSkillGapStatePath( + project: SkillLearningProjectContext, + rootDir?: string, +): string { + const base = rootDir + ? project.projectId === 'global' + ? join(rootDir, 'global') + : join(rootDir, 'projects', project.projectId) + : getProjectStorageDir(project.projectId) + return join(base, 'skill-gaps.json') +} + +function buildSkillGapKey(prompt: string): string { + return `${buildNameFragment(prompt)}-${hash(prompt).slice(0, 8)}` +} + +function buildNameFragment(prompt: string): string { + const mapped = prompt + .replaceAll('技能', ' skill ') + .replaceAll('学习', ' learning ') + .replaceAll('进化', ' evolution ') + .replaceAll('测试', ' testing ') + .replaceAll('最小实现', ' minimal implementation ') + .toLowerCase() + const stop = new Set([ + 'the', + 'and', + 'for', + 'with', + 'this', + 'that', + 'user', + 'about', + 'feature', + 'flag', + 'name', + ]) + const words = (mapped.match(/[a-z0-9][a-z0-9_-]{2,}/g) ?? []) + .filter(word => !stop.has(word)) + .slice(0, 5) + const value = words.join('-') || 'learned-gap' + return value.slice(0, 54).replace(/-+$/g, '') +} + +function summarize(value: string, max: number): string { + return value.replace(/\s+/g, ' ').trim().slice(0, max) +} + +function hash(value: string): string { + return createHash('sha1').update(value).digest('hex') +} + +async function clearRuntimeSkillCaches(): Promise { + try { + const { clearCommandsCache } = await import('../../commands.js') + clearCommandsCache() + } catch { + // Best effort only; generated skill files are still available next process. + } +} diff --git a/src/services/skillLearning/skillGenerator.ts b/src/services/skillLearning/skillGenerator.ts new file mode 100644 index 000000000..1091cfefc --- /dev/null +++ b/src/services/skillLearning/skillGenerator.ts @@ -0,0 +1,206 @@ +import { mkdir, readFile, writeFile } from 'node:fs/promises' +import { join } from 'node:path' +import { getClaudeConfigHomeDir } from '../../utils/envUtils.js' +import { clearSkillIndexCache } from '../skillSearch/localSearch.js' +import type { Instinct } from './instinctParser.js' +import { buildLearnedSkillName, normalizeSkillName } from './learningPolicy.js' +import { + compareExistingArtifacts, + scoreArtifactOverlap, + type ExistingSkill, +} from './skillLifecycle.js' +import type { LearnedSkillDraft, SkillLearningScope } from './types.js' + +export const DUPLICATE_SKILL_OVERLAP_THRESHOLD = 0.8 + +export type SkillGeneratorOptions = { + cwd?: string + globalSkillsDir?: string + outputRoot?: string + name?: string + description?: string +} + +export function generateSkillDraft( + instincts: Instinct[], + options?: SkillGeneratorOptions & { scope?: SkillLearningScope }, +): LearnedSkillDraft { + if (instincts.length === 0) { + throw new Error('Cannot generate a skill draft without instincts') + } + + const scope = options?.scope ?? instincts[0]?.scope ?? 'project' + const name = options?.name + ? normalizeSkillName(options.name) + : buildSkillName(instincts) + const confidence = + instincts.reduce((sum, instinct) => sum + instinct.confidence, 0) / + instincts.length + const description = options?.description ?? buildDescription(instincts) + const outputPath = getLearnedSkillPath(name, scope, options) + const content = buildSkillContent({ + name, + description, + confidence, + instincts, + }) + + return { + name, + description, + scope, + sourceInstinctIds: instincts.map(instinct => instinct.id), + confidence: Number(confidence.toFixed(2)), + content, + outputPath, + } +} + +export type SkillDedupOutcome = + | { action: 'create'; draft: LearnedSkillDraft } + | { + action: 'append-evidence' + target: ExistingSkill + overlap: number + appendedPath: string + } + +export async function generateOrMergeSkillDraft( + instincts: Instinct[], + options: SkillGeneratorOptions & { scope?: SkillLearningScope }, + existingRoots: string[], +): Promise { + const draft = generateSkillDraft(instincts, options) + const candidates = await compareExistingArtifacts( + 'skill', + draft, + existingRoots, + ) + for (const candidate of candidates) { + const overlap = scoreArtifactOverlap(draft, candidate) + if (overlap >= DUPLICATE_SKILL_OVERLAP_THRESHOLD) { + const appendedPath = await appendInstinctEvidenceToSkill( + candidate, + instincts, + ) + return { + action: 'append-evidence', + target: candidate, + overlap, + appendedPath, + } + } + } + return { action: 'create', draft } +} + +export async function appendInstinctEvidenceToSkill( + target: ExistingSkill, + instincts: Instinct[], +): Promise { + const existing = await readFile(target.path, 'utf8').catch( + () => target.content, + ) + const now = new Date().toISOString() + const block = [ + '', + `## Learned evidence (${now})`, + '', + ...instincts.flatMap(instinct => + instinct.evidence.map(evidence => `- ${evidence}`), + ), + '', + ].join('\n') + const merged = existing.endsWith('\n') + ? existing + block + : `${existing}\n${block}` + await writeFile(target.path, merged, 'utf8') + clearSkillIndexCache() + return target.path +} + +export async function writeLearnedSkill( + draft: LearnedSkillDraft, +): Promise { + await mkdir(draft.outputPath, { recursive: true }) + const filePath = join(draft.outputPath, 'SKILL.md') + await writeFile(filePath, draft.content, 'utf8') + clearSkillIndexCache() + try { + const { clearCommandsCache } = await import('../../commands.js') + clearCommandsCache() + } catch { + // Best effort: the next process will see the generated skill even if the + // in-process command cache cannot be cleared due to import timing. + } + return filePath +} + +export function getLearnedSkillPath( + name: string, + scope: SkillLearningScope, + options?: SkillGeneratorOptions, +): string { + if (options?.outputRoot) return join(options.outputRoot, name) + if (scope === 'project') { + return join(options?.cwd ?? process.cwd(), '.claude', 'skills', name) + } + return join( + options?.globalSkillsDir ?? join(getClaudeConfigHomeDir(), 'skills'), + name, + ) +} + +function buildSkillName(instincts: Instinct[]): string { + return buildLearnedSkillName(instincts) +} + +function buildDescription(instincts: Instinct[]): string { + const action = instincts[0]?.action ?? 'Apply a learned project pattern' + const short = action.replace(/\s+/g, ' ').slice(0, 120) + return short.length > 0 ? short : 'Apply learned project patterns' +} + +function buildSkillContent(params: { + name: string + description: string + confidence: number + instincts: Instinct[] +}): string { + const { name, description, confidence, instincts } = params + const lines = [ + '---', + `name: ${name}`, + `description: ${JSON.stringify(description)}`, + 'origin: skill-learning', + `confidence: ${Number(confidence.toFixed(2))}`, + `evolved_from: [${instincts.map(instinct => JSON.stringify(instinct.id)).join(', ')}]`, + '---', + '', + `# ${titleCase(name)}`, + '', + '## Trigger', + '', + instincts.map(instinct => `- ${instinct.trigger}`).join('\n'), + '', + '## Action', + '', + instincts.map(instinct => `- ${instinct.action}`).join('\n'), + '', + '## Evidence', + '', + instincts + .flatMap(instinct => instinct.evidence.map(evidence => `- ${evidence}`)) + .join('\n'), + '', + ] + return lines.join('\n') +} + +function titleCase(value: string): string { + return value + .split('-') + .filter(Boolean) + .map(part => part[0]?.toUpperCase() + part.slice(1)) + .join(' ') +} diff --git a/src/services/skillLearning/skillLifecycle.ts b/src/services/skillLearning/skillLifecycle.ts new file mode 100644 index 000000000..9edeff5c0 --- /dev/null +++ b/src/services/skillLearning/skillLifecycle.ts @@ -0,0 +1,496 @@ +import { + mkdir, + readdir, + readFile, + rename, + rm, + writeFile, +} from 'node:fs/promises' +import { existsSync } from 'node:fs' +import { basename, dirname, join } from 'node:path' +import { clearSkillIndexCache } from '../skillSearch/localSearch.js' +import type { LearnedSkillDraft } from './types.js' +import { writeLearnedSkill } from './skillGenerator.js' + +export type ExistingSkill = { + name: string + path: string + description: string + content: string + confidence?: number + status?: 'active' | 'superseded' | 'archived' | 'deleted' + referencedBy?: string[] + safeToDelete?: boolean + quality?: 'low' | 'medium' | 'high' +} + +export type SkillLifecycleDecision = + | { type: 'create'; draft: LearnedSkillDraft; reason: string } + | { type: 'merge'; targetSkill: ExistingSkill; patch: string; reason: string } + | { + type: 'replace' + targetSkill: ExistingSkill + draft: LearnedSkillDraft + reason: string + hardDelete?: boolean + } + | { type: 'archive'; targetSkill: ExistingSkill; reason: string } + | { + type: 'delete' + targetSkill: ExistingSkill + reason: string + confirmed?: boolean + } + +export type ReplacementManifest = { + oldSkill: string + oldPath: string + newSkill?: string + newPath?: string + action: 'archive' | 'delete' + reason: string + replacedAt: string + recoverable: boolean +} + +export type SkillLifecycleOptions = { + allowHardDelete?: boolean + archiveRoot?: string + manifestRoot?: string + now?: Date +} + +export type LearnedArtifactKind = 'skill' | 'command' | 'agent' + +export type ArtifactDraft = { + name: string + description: string + content: string +} + +export async function compareExistingArtifacts( + kind: LearnedArtifactKind, + draft: ArtifactDraft, + rootsOrSkills: string[] | ExistingSkill[], +): Promise { + const existing = + rootsOrSkills.length > 0 && typeof rootsOrSkills[0] === 'string' + ? await loadExistingArtifacts(kind, rootsOrSkills as string[]) + : (rootsOrSkills as ExistingSkill[]) + const draftTerms = terms( + `${draft.name} ${draft.description} ${draft.content}`, + ) + return existing + .map(skill => ({ + skill, + score: overlapScore( + draftTerms, + terms(`${skill.name} ${skill.description} ${skill.content}`), + ), + })) + .filter(item => item.score >= 0.18) + .sort((a, b) => b.score - a.score) + .map(item => item.skill) +} + +export async function compareExistingSkills( + draft: LearnedSkillDraft, + rootsOrSkills: string[] | ExistingSkill[], +): Promise { + return compareExistingArtifacts('skill', draft, rootsOrSkills) +} + +export async function loadExistingArtifacts( + kind: LearnedArtifactKind, + roots: string[], +): Promise { + if (kind === 'skill') return loadExistingSkills(roots) + const results: ExistingSkill[] = [] + for (const root of roots) { + if (!existsSync(root)) continue + await collectArtifactFiles(root, results) + } + return results +} + +export function decideSkillLifecycle( + draft: LearnedSkillDraft, + existingSkills: ExistingSkill[], + options: Pick = {}, +): SkillLifecycleDecision { + const deletable = existingSkills.find(skill => isSafeToHardDelete(skill)) + if (options.allowHardDelete && deletable) { + return { + type: 'delete', + targetSkill: deletable, + reason: + 'Existing skill is low quality, unreferenced, and safe to delete.', + confirmed: true, + } + } + + const target = existingSkills[0] + if (!target) { + return { + type: 'create', + draft, + reason: 'No overlapping active skill found.', + } + } + + const draftTerms = terms( + `${draft.name} ${draft.description} ${draft.content}`, + ) + const existingTerms = terms( + `${target.name} ${target.description} ${target.content}`, + ) + const score = overlapScore(draftTerms, existingTerms) + + if ( + score >= 0.72 && + draft.confidence >= 0.75 && + shouldReplaceSkill(draft, target) + ) { + return { + type: 'replace', + targetSkill: target, + draft, + reason: `New learned skill has high overlap (${score.toFixed(2)}) and higher confidence.`, + } + } + + if (score >= 0.35) { + return { + type: 'merge', + targetSkill: target, + patch: buildMergePatch(draft), + reason: `Existing skill overlaps with the learned pattern (${score.toFixed(2)}).`, + } + } + + return { type: 'create', draft, reason: 'Overlap is too low to merge.' } +} + +export async function applySkillLifecycleDecision( + decision: SkillLifecycleDecision, + options: SkillLifecycleOptions = {}, +): Promise<{ + activePath?: string + archivedPath?: string + deletedPath?: string + manifestPath?: string + tombstonePath?: string +}> { + switch (decision.type) { + case 'create': { + return { activePath: await writeLearnedSkill(decision.draft) } + } + case 'merge': { + if (!isSkillLearningGenerated(decision.targetSkill)) { + process.stderr.write( + `[skill-learning] skip user-authored skill: ${decision.targetSkill.path}\n`, + ) + return {} + } + return { + activePath: await writeMergePatch(decision.targetSkill, decision.patch), + } + } + case 'replace': { + if (!isSkillLearningGenerated(decision.targetSkill)) { + process.stderr.write( + `[skill-learning] skip user-authored skill: ${decision.targetSkill.path}\n`, + ) + return {} + } + // Archive/delete the superseded skill before the replacement is + // written so that any search-index refresh between the two steps can + // never observe both skills active simultaneously. `decision.draft + // .outputPath` is the exact path `writeLearnedSkill` will target. + const predictedNewPath = decision.draft.outputPath + if (decision.hardDelete) { + const { deletedPath, manifestPath, tombstonePath } = await deleteSkill( + decision.targetSkill, + decision.reason, + { + newSkill: decision.draft.name, + newPath: predictedNewPath, + }, + { ...options, allowHardDelete: true }, + ) + const activePath = await writeLearnedSkill(decision.draft) + return { activePath, deletedPath, manifestPath, tombstonePath } + } + const { archivedPath, manifestPath } = await archiveSkill( + decision.targetSkill, + decision.reason, + { + newSkill: decision.draft.name, + newPath: predictedNewPath, + }, + options, + ) + const activePath = await writeLearnedSkill(decision.draft) + return { activePath, archivedPath, manifestPath } + } + case 'archive': + return await archiveSkill( + decision.targetSkill, + decision.reason, + undefined, + options, + ) + case 'delete': + return await deleteSkill( + decision.targetSkill, + decision.reason, + undefined, + { + ...options, + allowHardDelete: + options.allowHardDelete && decision.confirmed !== false, + }, + ) + } +} + +export async function loadExistingSkills( + roots: string[], +): Promise { + const skills: ExistingSkill[] = [] + for (const root of roots) { + if (!existsSync(root)) continue + await collectSkillFiles(root, skills) + } + return skills +} + +export async function archiveSkill( + skill: ExistingSkill, + reason: string, + replacement?: { newSkill?: string; newPath?: string }, + options: SkillLifecycleOptions = {}, +): Promise<{ archivedPath: string; manifestPath: string }> { + const skillDir = dirname(skill.path) + const archiveRoot = options.archiveRoot ?? join(dirname(skillDir), '.archive') + const archivedPath = join( + archiveRoot, + `${basename(skillDir)}-${timestamp(options.now)}`, + ) + await mkdir(archiveRoot, { recursive: true }) + await rename(skillDir, archivedPath) + const manifestPath = await writeReplacementManifest( + options.manifestRoot ?? archivedPath, + { + oldSkill: skill.name, + oldPath: skill.path, + newSkill: replacement?.newSkill, + newPath: replacement?.newPath, + action: 'archive', + reason, + replacedAt: (options.now ?? new Date()).toISOString(), + recoverable: true, + }, + ) + clearSkillIndexCache() + return { archivedPath, manifestPath } +} + +export async function deleteSkill( + skill: ExistingSkill, + reason: string, + replacement?: { newSkill?: string; newPath?: string }, + options: SkillLifecycleOptions = {}, +): Promise<{ + deletedPath: string + manifestPath: string + tombstonePath: string +}> { + if (!options.allowHardDelete) { + throw new Error('Hard delete requires allowHardDelete=true') + } + + const skillDir = dirname(skill.path) + const content = existsSync(skill.path) + ? await readFile(skill.path, 'utf8') + : '' + const manifestRoot = + options.manifestRoot ?? join(dirname(skillDir), '.tombstones') + const manifestPath = await writeReplacementManifest(manifestRoot, { + oldSkill: skill.name, + oldPath: skill.path, + newSkill: replacement?.newSkill, + newPath: replacement?.newPath, + action: 'delete', + reason, + replacedAt: (options.now ?? new Date()).toISOString(), + recoverable: false, + }) + const tombstonePath = join( + manifestRoot, + `${skill.name}-${timestamp(options.now)}.tombstone.json`, + ) + await writeFile( + tombstonePath, + `${JSON.stringify({ deletedSkill: skill.name, oldPath: skill.path, content }, null, 2)}\n`, + 'utf8', + ) + await rm(skillDir, { recursive: true, force: true }) + clearSkillIndexCache() + return { deletedPath: skill.path, manifestPath, tombstonePath } +} + +export async function writeReplacementManifest( + directory: string, + manifest: ReplacementManifest, +): Promise { + await mkdir(directory, { recursive: true }) + const manifestPath = join(directory, 'replacement-manifest.json') + await writeFile( + manifestPath, + `${JSON.stringify(manifest, null, 2)}\n`, + 'utf8', + ) + return manifestPath +} + +async function writeMergePatch( + skill: ExistingSkill, + patch: string, +): Promise { + const patchPath = join(dirname(skill.path), 'learned-skill.patch.md') + await writeFile(patchPath, patch, 'utf8') + clearSkillIndexCache() + return patchPath +} + +function buildMergePatch(draft: LearnedSkillDraft): string { + return [ + '# Learned Skill Merge Patch', + '', + `Target learned skill: ${draft.name}`, + `Confidence: ${draft.confidence}`, + '', + '## Suggested additions', + '', + draft.content, + ].join('\n') +} + +function shouldReplaceSkill( + draft: LearnedSkillDraft, + target: ExistingSkill, +): boolean { + if (target.status === 'superseded' || target.status === 'archived') + return true + const confidenceGap = draft.confidence - (target.confidence ?? 0.5) + const contentGap = draft.content.length - target.content.length + return confidenceGap >= 0.15 || contentGap > 160 +} + +function isSafeToHardDelete(skill: ExistingSkill): boolean { + return ( + skill.safeToDelete === true && + (skill.referencedBy?.length ?? 0) === 0 && + skill.quality === 'low' + ) +} + +function timestamp(date = new Date()): string { + return date.toISOString().replace(/[:.]/g, '-') +} + +async function collectSkillFiles( + root: string, + results: ExistingSkill[], +): Promise { + const entries = await readdir(root, { withFileTypes: true }) + for (const entry of entries) { + const full = join(root, entry.name) + if (entry.isDirectory()) { + if (entry.name === '.archive') continue + await collectSkillFiles(full, results) + continue + } + if (entry.isFile() && entry.name === 'SKILL.md') { + const content = await readFile(full, 'utf8') + results.push({ + name: parseFrontmatter(content, 'name') ?? basename(dirname(full)), + description: parseFrontmatter(content, 'description') ?? '', + path: full, + content, + }) + } + } +} + +async function collectArtifactFiles( + root: string, + results: ExistingSkill[], +): Promise { + const entries = await readdir(root, { withFileTypes: true }) + for (const entry of entries) { + const full = join(root, entry.name) + if (entry.isDirectory()) { + if (entry.name === '.archive') continue + await collectArtifactFiles(full, results) + continue + } + if (entry.isFile() && entry.name.endsWith('.md')) { + const content = await readFile(full, 'utf8') + results.push({ + name: + parseFrontmatter(content, 'name') ?? entry.name.replace(/\.md$/, ''), + description: parseFrontmatter(content, 'description') ?? '', + path: full, + content, + }) + } + } +} + +function parseFrontmatter(content: string, key: string): string | undefined { + // Restrict the search to the actual YAML frontmatter block between the + // opening `---` and the next `---`. A naked body line like + // `origin: skill-learning` in a user-authored doc must NOT be mistaken + // for a generated-skill marker. + const fmMatch = content.match(/^---\r?\n([\s\S]*?)\r?\n---/) + if (!fmMatch) return undefined + const match = fmMatch[1].match(new RegExp(`^${key}:\\s*"?([^"\\n]+)"?`, 'm')) + return match?.[1]?.trim() +} + +function isSkillLearningGenerated(skill: ExistingSkill): boolean { + return parseFrontmatter(skill.content, 'origin') === 'skill-learning' +} + +function terms(value: string): Set { + return new Set( + value + .toLowerCase() + .split(/[^a-z0-9]+/) + .filter(term => term.length > 2), + ) +} + +function overlapScore(a: Set, b: Set): number { + if (a.size === 0 || b.size === 0) return 0 + let intersection = 0 + for (const term of a) { + if (b.has(term)) intersection++ + } + return intersection / Math.min(a.size, b.size) +} + +export function scoreArtifactOverlap( + draft: ArtifactDraft, + existing: { name: string; description: string; content: string }, +): number { + const draftTerms = terms( + `${draft.name} ${draft.description} ${draft.content}`, + ) + const existingTerms = terms( + `${existing.name} ${existing.description} ${existing.content}`, + ) + return overlapScore(draftTerms, existingTerms) +} diff --git a/src/services/skillLearning/toolEventObserver.ts b/src/services/skillLearning/toolEventObserver.ts new file mode 100644 index 000000000..2e29710f6 --- /dev/null +++ b/src/services/skillLearning/toolEventObserver.ts @@ -0,0 +1,312 @@ +import { randomUUID } from 'node:crypto' +import { + appendObservation, + type StoredSkillObservation, +} from './observationStore.js' +import type { + SkillLearningProjectContext, + SkillObservationOutcome, +} from './types.js' +import { logForDebugging } from '../../utils/debug.js' +import { logError } from '../../utils/log.js' + +/** + * Tool event hook layer. + * + * Preferred observation pathway: consumers (tool dispatcher, REPL turn loop, + * or integration tests) call `recordToolStart` / `recordToolComplete` / + * `recordToolError` / `recordUserCorrection` as tool-level events happen, + * producing deterministic observations with `source: 'tool-hook'`. + * + * Post-sampling reconstruction (runtimeObserver.observationsFromMessages) + * is retained as a fallback for environments where the caller cannot emit + * tool events directly. + * + * @todo Wire these functions into `src/Tool.ts`'s public dispatch so the + * main REPL tool loop produces tool-hook observations automatically. + * Until then, callers that do have tool-level signal (integration + * tests, custom harness code, future tool middleware) can use the + * functions here directly. + */ + +export type ToolHookContext = { + sessionId: string + turn: number + projectId: string + projectName: string + cwd: string + project?: SkillLearningProjectContext +} + +/** Maximum number of turns tracked per session before pruning. */ +const EMITTED_TURNS_SET_MAX = 500 +/** How many turns to retain after pruning a session Set. */ +const EMITTED_TURNS_SET_KEEP = 250 +/** Maximum number of sessions tracked in the Map before pruning. */ +const EMITTED_TURNS_MAP_MAX = 50 +/** How many sessions to retain after pruning the Map. */ +const EMITTED_TURNS_MAP_KEEP = 25 + +const emittedTurns = new Map>() + +/** + * Prune `emittedTurns` to stay within memory bounds. + * + * - If any session's Set exceeds `EMITTED_TURNS_SET_MAX` entries, retain only + * the most recent `EMITTED_TURNS_SET_KEEP` turn numbers (FIFO trim). + * - If the Map itself exceeds `EMITTED_TURNS_MAP_MAX` entries, delete the + * oldest `EMITTED_TURNS_MAP_MAX - EMITTED_TURNS_MAP_KEEP` sessions + * (insertion-order LRU). + * + * Exported so tests and `resetToolHookBookkeeping` callers can invoke it + * directly. + */ +export function pruneEmittedTurns(): void { + // Prune over-sized Sets first. FIFO by insertion order — NOT by turn + // number magnitude. Non-monotonic turn ordering (e.g. replayed transcripts + // or nested tool chains) should not cause us to evict the wrong entries. + for (const [sessionId, turns] of emittedTurns) { + if (turns.size > EMITTED_TURNS_SET_MAX) { + const iter = turns.values() + const toDrop = turns.size - EMITTED_TURNS_SET_KEEP + for (let i = 0; i < toDrop; i++) { + const next = iter.next() + if (next.done) break + turns.delete(next.value) + } + } + } + // Prune over-sized Map (delete oldest insertion-order entries). + if (emittedTurns.size > EMITTED_TURNS_MAP_MAX) { + const toDelete = emittedTurns.size - EMITTED_TURNS_MAP_KEEP + let deleted = 0 + for (const key of emittedTurns.keys()) { + if (deleted >= toDelete) break + emittedTurns.delete(key) + deleted++ + } + } +} + +function markTurn(sessionId: string, turn: number): void { + // Refresh Map insertion order: delete + re-set so a recently-touched + // session is treated as "youngest" for the LRU-ish Map eviction. + const seen = emittedTurns.get(sessionId) ?? new Set() + seen.add(turn) + emittedTurns.delete(sessionId) + emittedTurns.set(sessionId, seen) + pruneEmittedTurns() +} + +export function hasToolHookObservationsForTurn( + sessionId: string, + turn: number, +): boolean { + return emittedTurns.get(sessionId)?.has(turn) ?? false +} + +export function resetToolHookBookkeeping(): void { + emittedTurns.clear() +} + +function baseObservation( + ctx: ToolHookContext, +): Pick< + StoredSkillObservation, + | 'id' + | 'sessionId' + | 'projectId' + | 'projectName' + | 'cwd' + | 'timestamp' + | 'source' + | 'turn' +> { + return { + id: randomUUID(), + sessionId: ctx.sessionId, + projectId: ctx.projectId, + projectName: ctx.projectName, + cwd: ctx.cwd, + timestamp: new Date().toISOString(), + source: 'tool-hook', + // Persist turn so runtimeObserver can filter tool-hook observations by + // the current turn rather than sweeping all historical tool-hook data + // (codex review Q1). + turn: ctx.turn, + } +} + +// Cached import promise — resolved once so the hot path pays no repeated +// dynamic-import overhead after the first invocation. +let _depImportCache: + | Promise<{ + resolveProjectContext: (cwd: string) => SkillLearningProjectContext + isSkillLearningEnabled: () => boolean + RUNTIME_SESSION_ID: string + getRuntimeTurn: () => number + }> + | undefined + +function _getDeps() { + if (!_depImportCache) { + _depImportCache = Promise.all([ + import('./projectContext.js'), + import('./featureCheck.js'), + import('./runtimeObserver.js'), + ]).then(([pc, fc, ro]) => ({ + resolveProjectContext: pc.resolveProjectContext, + isSkillLearningEnabled: fc.isSkillLearningEnabled, + RUNTIME_SESSION_ID: ro.RUNTIME_SESSION_ID, + getRuntimeTurn: ro.getRuntimeTurn, + })) + } + return _depImportCache +} + +/** Reset the cached dep import (for test isolation). */ +export function resetToolHookDepsCache(): void { + _depImportCache = undefined +} + +/** + * Wrap a tool.call invocation with deterministic tool-event observation. + * + * Designed for the single call site in `toolExecution.ts`. The hook calls + * (`recordToolStart`, `recordToolComplete`, `recordToolError`) are true + * fire-and-forget: the tool invoke result is returned immediately without + * waiting for the observation to persist. Errors in observation are caught + * and logged so they never surface to the caller. + */ +export async function runToolCallWithSkillLearningHooks( + toolName: string, + input: unknown, + callContext: { sessionId?: string; turn?: number }, + invoke: () => Promise, +): Promise { + let ctx: ToolHookContext | undefined + try { + const { + resolveProjectContext, + isSkillLearningEnabled, + RUNTIME_SESSION_ID, + getRuntimeTurn, + } = await _getDeps() + if (!isSkillLearningEnabled()) { + return invoke() + } + const project = resolveProjectContext(process.cwd()) + // Always emit under the runtime observer's sessionId so the post-sampling + // consumer can find our records. The prior default `'cli'` fell outside + // the observer's sessionId filter and made tool-hook observations + // structurally unconsumable (codex second-pass audit AC1). + ctx = { + sessionId: callContext.sessionId ?? RUNTIME_SESSION_ID, + turn: callContext.turn ?? getRuntimeTurn(), + projectId: project.projectId, + projectName: project.projectName, + cwd: project.cwd, + project, + } + // Fire-and-forget: do NOT await — tool invoke must not be blocked. + void recordToolStart(ctx, toolName, input).catch(e => { + logForDebugging('skill-learning: recordToolStart error') + logError(e) + }) + } catch (e) { + // Never let observation setup errors affect tool execution. + logForDebugging('skill-learning: hook setup error') + logError(e) + } + try { + const result = await invoke() + if (ctx) { + // Fire-and-forget: do NOT await. + void recordToolComplete(ctx, toolName, result, 'success').catch(e => { + logForDebugging('skill-learning: recordToolComplete error') + logError(e) + }) + } + return result + } catch (error) { + if (ctx) { + // Fire-and-forget: do NOT await. + void recordToolError(ctx, toolName, error).catch(e => { + logForDebugging('skill-learning: recordToolError error') + logError(e) + }) + } + throw error + } +} + +export async function recordToolStart( + ctx: ToolHookContext, + toolName: string, + input?: unknown, +): Promise { + markTurn(ctx.sessionId, ctx.turn) + const observation: StoredSkillObservation = { + ...baseObservation(ctx), + event: 'tool_start', + toolName, + toolInput: stringify(input), + } + return appendObservation(observation, { project: ctx.project }) +} + +export async function recordToolComplete( + ctx: ToolHookContext, + toolName: string, + output?: unknown, + outcome: SkillObservationOutcome = 'success', +): Promise { + markTurn(ctx.sessionId, ctx.turn) + const observation: StoredSkillObservation = { + ...baseObservation(ctx), + event: 'tool_complete', + toolName, + toolOutput: stringify(output), + outcome, + } + return appendObservation(observation, { project: ctx.project }) +} + +export async function recordToolError( + ctx: ToolHookContext, + toolName: string, + error: unknown, +): Promise { + markTurn(ctx.sessionId, ctx.turn) + const observation: StoredSkillObservation = { + ...baseObservation(ctx), + event: 'tool_complete', + toolName, + toolOutput: stringify(error), + outcome: 'failure', + } + return appendObservation(observation, { project: ctx.project }) +} + +export async function recordUserCorrection( + ctx: ToolHookContext, + messageText: string, +): Promise { + markTurn(ctx.sessionId, ctx.turn) + const observation: StoredSkillObservation = { + ...baseObservation(ctx), + event: 'user_message', + messageText, + } + return appendObservation(observation, { project: ctx.project }) +} + +function stringify(value: unknown): string | undefined { + if (value === undefined || value === null) return undefined + if (typeof value === 'string') return value + try { + return JSON.stringify(value) + } catch { + return String(value) + } +} diff --git a/src/services/skillLearning/types.ts b/src/services/skillLearning/types.ts new file mode 100644 index 000000000..e723baa88 --- /dev/null +++ b/src/services/skillLearning/types.ts @@ -0,0 +1,109 @@ +export type SkillLearningScope = 'project' | 'global' + +export type SkillGapStatus = 'pending' | 'draft' | 'active' | 'rejected' + +export type SkillObservationEvent = + | 'user_message' + | 'assistant_message' + | 'tool_start' + | 'tool_complete' + | 'tool_error' + +export type SkillObservationOutcome = 'success' | 'failure' | 'unknown' + +export const INSTINCT_DOMAINS = [ + 'workflow', + 'testing', + 'debugging', + 'code-style', + 'security', + 'git', + 'project', +] as const + +export type InstinctDomain = (typeof INSTINCT_DOMAINS)[number] + +export type InstinctSource = + | 'session-observation' + | 'repo-analysis' + | 'imported' + +export type InstinctStatus = + | 'pending' + | 'active' + | 'stale' + | 'superseded' + | 'retired' + | 'archived' + | 'conflict-hold' + +export type ProjectContextSource = + | 'claude_project_dir' + | 'git_remote' + | 'git_root' + | 'global' + +export interface SkillObservation { + id: string + timestamp: string + event: SkillObservationEvent + sessionId: string + projectId: string + projectName: string + cwd: string + toolName?: string + toolInput?: unknown + toolOutput?: unknown + messageText?: string + outcome?: SkillObservationOutcome +} + +export interface Instinct { + id: string + trigger: string + action: string + confidence: number + domain: InstinctDomain + source: InstinctSource + scope: SkillLearningScope + projectId?: string + projectName?: string + evidence: string[] + evidenceOutcome?: SkillObservationOutcome + createdAt: string + updatedAt: string + status: InstinctStatus +} + +export interface LearnedSkillDraft { + name: string + description: string + scope: SkillLearningScope + sourceInstinctIds: string[] + confidence: number + content: string + outputPath: string +} + +export interface SkillLearningProjectContext { + projectId: string + projectName: string + scope: SkillLearningScope + source: ProjectContextSource + cwd: string + projectRoot?: string + gitRemote?: string + storageDir: string +} + +export interface SkillLearningProjectRecord + extends SkillLearningProjectContext { + firstSeenAt: string + lastSeenAt: string +} + +export interface SkillLearningProjectsRegistry { + version: 1 + updatedAt: string + projects: Record +} diff --git a/src/services/skillSearch/__tests__/intentNormalize.test.ts b/src/services/skillSearch/__tests__/intentNormalize.test.ts new file mode 100644 index 000000000..13a98b6da --- /dev/null +++ b/src/services/skillSearch/__tests__/intentNormalize.test.ts @@ -0,0 +1,229 @@ +import { afterEach, beforeEach, describe, expect, mock, test } from 'bun:test' + +// Must mock queryHaiku before importing the module under test so the ESM +// import binding picks up the stub. +const haikuCalls: Array<{ systemPrompt: unknown; userPrompt: string }> = [] +let haikuResponder: (userPrompt: string) => Promise = async () => ({ + message: { content: [{ type: 'text', text: 'optimize code performance' }] }, +}) + +mock.module('../../api/claude.js', () => ({ + queryHaiku: mock( + async (args: { systemPrompt: unknown; userPrompt: string }) => { + haikuCalls.push({ + systemPrompt: args.systemPrompt, + userPrompt: args.userPrompt, + }) + return haikuResponder(args.userPrompt) + }, + ), +})) + +import { + clearIntentNormalizeCache, + isIntentNormalizeEnabled, + normalizeQueryIntent, +} from '../intentNormalize.js' + +const originalEnv = { ...process.env } + +beforeEach(() => { + process.env = { ...originalEnv } + haikuCalls.length = 0 + haikuResponder = async () => ({ + message: { content: [{ type: 'text', text: 'optimize code performance' }] }, + }) + clearIntentNormalizeCache() +}) + +afterEach(() => { + process.env = { ...originalEnv } + clearIntentNormalizeCache() +}) + +describe('isIntentNormalizeEnabled', () => { + test('defaults to disabled when flag is unset', () => { + delete process.env.SKILL_SEARCH_INTENT_ENABLED + expect(isIntentNormalizeEnabled()).toBe(false) + }) + + test('enabled when flag is "1"', () => { + process.env.SKILL_SEARCH_INTENT_ENABLED = '1' + expect(isIntentNormalizeEnabled()).toBe(true) + }) + + test('disabled for any value other than "1"', () => { + process.env.SKILL_SEARCH_INTENT_ENABLED = 'true' + expect(isIntentNormalizeEnabled()).toBe(false) + }) +}) + +describe('normalizeQueryIntent — feature flag gating', () => { + test('returns query unchanged when flag is off', async () => { + delete process.env.SKILL_SEARCH_INTENT_ENABLED + const result = await normalizeQueryIntent('帮我优化代码的性能') + expect(result).toBe('帮我优化代码的性能') + expect(haikuCalls.length).toBe(0) + }) + + test('returns empty string as-is without calling Haiku', async () => { + process.env.SKILL_SEARCH_INTENT_ENABLED = '1' + const result = await normalizeQueryIntent('') + expect(result).toBe('') + expect(haikuCalls.length).toBe(0) + }) + + test('trims whitespace-only input to empty string', async () => { + process.env.SKILL_SEARCH_INTENT_ENABLED = '1' + const result = await normalizeQueryIntent(' \n ') + expect(result).toBe('') + expect(haikuCalls.length).toBe(0) + }) +}) + +describe('normalizeQueryIntent — ASCII fast path', () => { + test('ASCII query bypasses Haiku and returns unchanged', async () => { + process.env.SKILL_SEARCH_INTENT_ENABLED = '1' + const result = await normalizeQueryIntent('optimize code performance') + expect(result).toBe('optimize code performance') + expect(haikuCalls.length).toBe(0) + }) + + test('ASCII query with punctuation still bypasses Haiku', async () => { + process.env.SKILL_SEARCH_INTENT_ENABLED = '1' + const result = await normalizeQueryIntent('audit feature flags for stubs') + expect(result).toBe('audit feature flags for stubs') + expect(haikuCalls.length).toBe(0) + }) +}) + +describe('normalizeQueryIntent — CJK path calls Haiku', () => { + test('CJK query concatenates keywords returned by Haiku', async () => { + process.env.SKILL_SEARCH_INTENT_ENABLED = '1' + haikuResponder = async () => ({ + message: { + content: [{ type: 'text', text: 'optimize code performance refactor' }], + }, + }) + + const result = await normalizeQueryIntent('帮我优化代码的性能') + + expect(haikuCalls.length).toBe(1) + expect(result).toBe('帮我优化代码的性能 optimize code performance refactor') + }) + + test('mixed CJK + ASCII query also calls Haiku', async () => { + process.env.SKILL_SEARCH_INTENT_ENABLED = '1' + haikuResponder = async () => ({ + message: { content: [{ type: 'text', text: 'review code audit' }] }, + }) + const result = await normalizeQueryIntent('帮我做 code review') + expect(haikuCalls.length).toBe(1) + expect(result).toBe('帮我做 code review review code audit') + }) + + test('Haiku output gets sanitized: lowercased, punctuation stripped', async () => { + process.env.SKILL_SEARCH_INTENT_ENABLED = '1' + haikuResponder = async () => ({ + message: { + content: [{ type: 'text', text: 'Optimize, Code! Performance?' }], + }, + }) + const result = await normalizeQueryIntent('优化代码') + expect(result).toBe('优化代码 optimize code performance') + }) +}) + +describe('normalizeQueryIntent — graceful fallback', () => { + test('empty LLM response falls back to original query', async () => { + process.env.SKILL_SEARCH_INTENT_ENABLED = '1' + haikuResponder = async () => ({ + message: { content: [{ type: 'text', text: '' }] }, + }) + const result = await normalizeQueryIntent('优化代码') + expect(result).toBe('优化代码') + expect(haikuCalls.length).toBe(1) + }) + + test('Haiku throwing an error falls back to original query', async () => { + process.env.SKILL_SEARCH_INTENT_ENABLED = '1' + haikuResponder = async () => { + throw new Error('network down') + } + const result = await normalizeQueryIntent('重构代码') + expect(result).toBe('重构代码') + expect(haikuCalls.length).toBe(1) + }) + + test('malformed LLM response (no text blocks) falls back', async () => { + process.env.SKILL_SEARCH_INTENT_ENABLED = '1' + haikuResponder = async () => ({ message: { content: 'not-an-array' } }) + const result = await normalizeQueryIntent('优化代码') + expect(result).toBe('优化代码') + }) + + test('LLM responds with only punctuation -> sanitize empties it -> fallback', async () => { + process.env.SKILL_SEARCH_INTENT_ENABLED = '1' + haikuResponder = async () => ({ + message: { content: [{ type: 'text', text: '!!!???' }] }, + }) + const result = await normalizeQueryIntent('优化代码') + expect(result).toBe('优化代码') + }) +}) + +describe('normalizeQueryIntent — cache behavior', () => { + test('repeat calls with same query use cache (only 1 Haiku call)', async () => { + process.env.SKILL_SEARCH_INTENT_ENABLED = '1' + haikuResponder = async () => ({ + message: { content: [{ type: 'text', text: 'optimize code' }] }, + }) + + const a = await normalizeQueryIntent('帮我优化代码') + const b = await normalizeQueryIntent('帮我优化代码') + const c = await normalizeQueryIntent('帮我优化代码') + + expect(a).toBe(b) + expect(b).toBe(c) + expect(haikuCalls.length).toBe(1) + }) + + test('different queries trigger separate Haiku calls', async () => { + process.env.SKILL_SEARCH_INTENT_ENABLED = '1' + haikuResponder = async (userPrompt: string) => ({ + message: { + content: [{ type: 'text', text: `kw-for-${userPrompt.slice(0, 2)}` }], + }, + }) + + await normalizeQueryIntent('优化代码') + await normalizeQueryIntent('重构模块') + + expect(haikuCalls.length).toBe(2) + }) + + test('clearIntentNormalizeCache resets the cache', async () => { + process.env.SKILL_SEARCH_INTENT_ENABLED = '1' + haikuResponder = async () => ({ + message: { content: [{ type: 'text', text: 'kw' }] }, + }) + + await normalizeQueryIntent('优化代码') + clearIntentNormalizeCache() + await normalizeQueryIntent('优化代码') + + expect(haikuCalls.length).toBe(2) + }) +}) + +describe('normalizeQueryIntent — input capping', () => { + test('very long CJK input is truncated to 500 chars before sending to Haiku', async () => { + process.env.SKILL_SEARCH_INTENT_ENABLED = '1' + const longInput = '优化代码'.repeat(300) // 1200 chars + haikuResponder = async () => ({ + message: { content: [{ type: 'text', text: 'optimize code' }] }, + }) + await normalizeQueryIntent(longInput) + expect(haikuCalls[0]?.userPrompt.length).toBeLessThanOrEqual(500) + }) +}) diff --git a/src/services/skillSearch/__tests__/localSearch.test.ts b/src/services/skillSearch/__tests__/localSearch.test.ts new file mode 100644 index 000000000..63595f842 --- /dev/null +++ b/src/services/skillSearch/__tests__/localSearch.test.ts @@ -0,0 +1,221 @@ +import { describe, expect, test } from 'bun:test' +import { + searchSkills, + tokenize, + tokenizeAndStem, + type SkillIndexEntry, +} from '../localSearch.js' + +function makeEntry(overrides: Partial): SkillIndexEntry { + const tokens = overrides.tokens ?? [] + const tfVector = overrides.tfVector ?? buildTfVector(tokens) + const name = overrides.name ?? 'test-skill' + return { + name, + normalizedName: + overrides.normalizedName ?? name.toLowerCase().replace(/[-_]/g, ' '), + description: overrides.description ?? '', + whenToUse: overrides.whenToUse, + source: overrides.source ?? 'test', + loadedFrom: overrides.loadedFrom, + skillRoot: overrides.skillRoot, + contentLength: overrides.contentLength, + tokens, + tfVector, + } +} + +function buildTfVector(tokens: string[]): Map { + const freq = new Map() + for (const t of tokens) freq.set(t, (freq.get(t) ?? 0) + 1) + const max = Math.max(...freq.values(), 1) + const tf = new Map() + for (const [term, count] of freq) tf.set(term, count / max) + return tf +} + +describe('tokenize — CJK bi-gram + ASCII', () => { + test('优化重构流程 produces five overlapping bi-grams', () => { + const tokens = tokenize('优化重构流程') + expect(tokens).toContain('优化') + expect(tokens).toContain('化重') + expect(tokens).toContain('重构') + expect(tokens).toContain('构流') + expect(tokens).toContain('流程') + expect(tokens.length).toBe(5) + }) + + test('pure ASCII input retains prior behaviour (regression)', () => { + const tokens = tokenize('Refactor TypeScript helpers') + expect(tokens).toContain('refactor') + expect(tokens).toContain('typescript') + expect(tokens).toContain('helpers') + }) + + test('mixed Chinese + English is segmented on both sides', () => { + const tokens = tokenize('优化 refactor 流程') + expect(tokens).toContain('优化') + expect(tokens).toContain('流程') + expect(tokens).toContain('refactor') + // Adjacent CJK segments are separated by ASCII content, so no cross-segment + // bi-gram should appear. + expect(tokens).not.toContain('化流') + }) + + test('isolated single Chinese character produces no bi-gram', () => { + const tokens = tokenize('优 is lonely') + expect(tokens.some(t => /[\u4e00-\u9fff]/.test(t))).toBe(false) + expect(tokens).toContain('lonely') + }) + + test('ASCII stop words still filtered in mixed input', () => { + const tokens = tokenize('the 优化 is fast') + expect(tokens).not.toContain('the') + expect(tokens).not.toContain('is') + expect(tokens).toContain('优化') + expect(tokens).toContain('fast') + }) +}) + +describe('tokenizeAndStem — CJK passes through, ASCII stemmed', () => { + test('CJK bi-grams are not stemmed', () => { + const tokens = tokenizeAndStem('优化流程') + expect(tokens).toContain('优化') + expect(tokens).toContain('化流') + expect(tokens).toContain('流程') + }) + + test('ASCII words are stemmed while CJK survives', () => { + const tokens = tokenizeAndStem('refactoring 重构 helpers') + expect(tokens).toContain('refactor') + expect(tokens).toContain('重构') + expect(tokens).toContain('helper') + }) +}) + +describe('searchSkills — CJK query against skill index', () => { + test('Chinese query against Chinese-metadata skill produces positive score', () => { + const chineseSkillTokens = tokenizeAndStem( + 'refactor-cleaner 清理 重构 流程 的工具', + ) + const unrelatedTokens = tokenizeAndStem( + 'database-migration tool for schema upgrades', + ) + const index: SkillIndexEntry[] = [ + makeEntry({ + name: 'refactor-cleaner', + description: '清理和重构流程辅助', + tokens: chineseSkillTokens, + }), + makeEntry({ + name: 'database-migration', + description: 'schema upgrade', + tokens: unrelatedTokens, + }), + ] + + const results = searchSkills('优化重构流程', index, 5) + + expect(results.length).toBeGreaterThan(0) + expect(results[0]?.name).toBe('refactor-cleaner') + expect(results[0]?.score).toBeGreaterThan(0) + }) + + test('pure English query still ranks English skill first (regression)', () => { + const refactorTokens = tokenizeAndStem( + 'refactor clean typescript code helper', + ) + const unrelatedTokens = tokenizeAndStem( + 'security review audit vulnerabilities', + ) + const index: SkillIndexEntry[] = [ + makeEntry({ + name: 'refactor-helper', + description: 'refactor typescript', + tokens: refactorTokens, + }), + makeEntry({ + name: 'security-review', + description: 'security audit', + tokens: unrelatedTokens, + }), + ] + + const results = searchSkills('refactor typescript', index, 5) + + expect(results[0]?.name).toBe('refactor-helper') + }) + + test('CJK query with only 1 matching bi-gram is filtered out (Proposal D)', () => { + const promptOptTokens = tokenizeAndStem( + 'prompt-optimizer optimize prompts for better performance 当前最佳实践', + ) + const otherTokens = tokenizeAndStem( + 'database-migration tool for schema upgrades', + ) + const index: SkillIndexEntry[] = [ + makeEntry({ + name: 'prompt-optimizer', + description: 'optimize prompts', + tokens: promptOptTokens, + }), + makeEntry({ + name: 'database-migration', + description: 'schema upgrade', + tokens: otherTokens, + }), + ] + + const results = searchSkills('研究当前代码', index, 5) + + expect(results.length).toBe(0) + }) + + test('CJK query with 2+ matching bi-grams passes the gate', () => { + const refactorTokens = tokenizeAndStem( + 'refactor-cleaner 代码重构 清理冗余代码', + ) + const unrelatedTokens = tokenizeAndStem( + 'database-migration tool for schema upgrades', + ) + const index: SkillIndexEntry[] = [ + makeEntry({ + name: 'refactor-cleaner', + description: '代码重构清理', + tokens: refactorTokens, + }), + makeEntry({ + name: 'database-migration', + description: 'schema upgrade', + tokens: unrelatedTokens, + }), + ] + + const results = searchSkills('重构代码', index, 5) + + expect(results.length).toBeGreaterThan(0) + expect(results[0]?.name).toBe('refactor-cleaner') + }) + + test('exact skill name in query boosts score (Proposal C)', () => { + const codeReviewTokens = tokenizeAndStem('code-review review code quality') + const securityTokens = tokenizeAndStem('security-review review security') + const index: SkillIndexEntry[] = [ + makeEntry({ + name: 'code-review', + description: 'review code quality', + tokens: codeReviewTokens, + }), + makeEntry({ + name: 'security-review', + description: 'review security', + tokens: securityTokens, + }), + ] + + const results = searchSkills('code review', index, 5) + + expect(results[0]?.name).toBe('code-review') + expect(results[0]!.score).toBeGreaterThanOrEqual(0.75) + }) +}) diff --git a/src/services/skillSearch/__tests__/prefetch.extractQuery.test.ts b/src/services/skillSearch/__tests__/prefetch.extractQuery.test.ts new file mode 100644 index 000000000..b9ab234b7 --- /dev/null +++ b/src/services/skillSearch/__tests__/prefetch.extractQuery.test.ts @@ -0,0 +1,123 @@ +import { describe, expect, test } from 'bun:test' +import { extractQueryFromMessages } from '../prefetch.js' +import type { Message } from '../../../types/message.js' + +function userText(text: string): Message { + return { type: 'user', content: text } as unknown as Message +} + +function userTextBlocks(text: string): Message { + return { + type: 'user', + content: [{ type: 'text', text }], + } as unknown as Message +} + +function userToolResult(id: string): Message { + return { + type: 'user', + content: [{ type: 'tool_result', tool_use_id: id, content: 'output' }], + } as unknown as Message +} + +function assistantText(text: string): Message { + return { type: 'assistant', content: text } as unknown as Message +} + +describe('extractQueryFromMessages — inter-turn穿透逻辑', () => { + test('null input + messages末尾是tool_result → 穿透到真实user文本', () => { + const messages: Message[] = [ + userText('研究当前代码'), + assistantText('调用工具'), + userToolResult('tool_01'), + ] + const query = extractQueryFromMessages(null, messages) + expect(query).toBe('研究当前代码') + }) + + test('null input + messages末尾是text block形式的user → 正确提取', () => { + const messages: Message[] = [ + userTextBlocks('refactor the auth module'), + assistantText('thinking...'), + userToolResult('tool_02'), + ] + const query = extractQueryFromMessages(null, messages) + expect(query).toBe('refactor the auth module') + }) + + test('null input + 连续多轮tool_result → 继续向前找到最早的user文本', () => { + const messages: Message[] = [ + userText('研究当前代码'), + assistantText('第一次调用'), + userToolResult('tool_a'), + assistantText('第二次调用'), + userToolResult('tool_b'), + assistantText('第三次调用'), + userToolResult('tool_c'), + ] + const query = extractQueryFromMessages(null, messages) + expect(query).toBe('研究当前代码') + }) + + test('null input + 空messages → 空串', () => { + const query = extractQueryFromMessages(null, []) + expect(query).toBe('') + }) + + test('null input + 全是tool_result (无真实文本) → 空串', () => { + const messages: Message[] = [ + userToolResult('tool_a'), + userToolResult('tool_b'), + ] + const query = extractQueryFromMessages(null, messages) + expect(query).toBe('') + }) + + test('string input + null messages → 只返回input', () => { + const query = extractQueryFromMessages('hello world', []) + expect(query).toBe('hello world') + }) + + test('string input + 有user文本 → 两者拼接', () => { + const messages: Message[] = [userText('previous query')] + const query = extractQueryFromMessages('new query', messages) + expect(query).toContain('new query') + expect(query).toContain('previous query') + }) + + test('超长user文本被截断到500字', () => { + const longText = 'a'.repeat(1000) + const messages: Message[] = [userText(longText)] + const query = extractQueryFromMessages(null, messages) + expect(query.length).toBe(500) + }) + + test('tool_result里含text字段 (但type=tool_result) → 必须跳过,不能误用', () => { + const messages: Message[] = [ + userText('real query'), + { + type: 'user', + content: [ + { + type: 'tool_result', + text: 'this is tool output masquerading as text', + }, + ], + } as unknown as Message, + ] + const query = extractQueryFromMessages(null, messages) + expect(query).toBe('real query') + }) + + test('user content数组里text为空串 → 跳过空text继续找', () => { + const messages: Message[] = [ + userText('real query'), + { + type: 'user', + content: [{ type: 'text', text: ' ' }], + } as unknown as Message, + ] + const query = extractQueryFromMessages(null, messages) + expect(query).toBe('real query') + }) +}) diff --git a/src/services/skillSearch/__tests__/prefetch.test.ts b/src/services/skillSearch/__tests__/prefetch.test.ts new file mode 100644 index 000000000..290933d9e --- /dev/null +++ b/src/services/skillSearch/__tests__/prefetch.test.ts @@ -0,0 +1,101 @@ +import { afterEach, beforeEach, describe, expect, test } from 'bun:test' +import { + existsSync, + mkdirSync, + mkdtempSync, + rmSync, + writeFileSync, +} from 'node:fs' +import { tmpdir } from 'node:os' +import { join } from 'node:path' +import { clearCommandsCache } from '../../../commands.js' +import { getTurnZeroSkillDiscovery } from '../prefetch.js' +import { clearSkillIndexCache } from '../localSearch.js' + +let root: string +let previousCwd: string +const originalEnv = { ...process.env } + +beforeEach(() => { + root = mkdtempSync(join(tmpdir(), 'skill-search-prefetch-')) + previousCwd = process.cwd() + process.chdir(root) + process.env = { ...originalEnv } + process.env.CLAUDE_CONFIG_DIR = join(root, 'config') + process.env.CLAUDE_SKILL_LEARNING_HOME = join(root, 'learning') + process.env.SKILL_SEARCH_ENABLED = '1' + process.env.SKILL_LEARNING_ENABLED = '1' + process.env.NODE_ENV = 'test' + process.env.ANTHROPIC_API_KEY = 'test-key' + clearCommandsCache() + clearSkillIndexCache() +}) + +afterEach(() => { + process.chdir(previousCwd) + process.env = { ...originalEnv } + clearCommandsCache() + clearSkillIndexCache() + try { + rmSync(root, { + recursive: true, + force: true, + maxRetries: 10, + retryDelay: 100, + }) + } catch { + // Windows can keep transient handles after dynamic command loading. + } +}) + +describe('skill search prefetch', () => { + test('auto-loads high-confidence project skill content', async () => { + const skillDir = join(root, '.claude', 'skills', 'feature-audit') + mkdirSync(skillDir, { recursive: true }) + writeFileSync( + join(skillDir, 'SKILL.md'), + [ + '---', + 'name: feature-audit', + 'description: Audit feature flags and classify minimal implementations', + '---', + '', + '# Feature Audit', + '', + 'Use the feature flag audit workflow and classify flags as stub, shell, MVP, or thin-toggle.', + ].join('\n'), + ) + + const attachment = await getTurnZeroSkillDiscovery( + 'audit feature flags for minimal implementation stubs', + [], + { agentId: undefined } as any, + ) + + expect(attachment?.type).toBe('skill_discovery') + if (attachment?.type !== 'skill_discovery') { + throw new Error('expected skill_discovery attachment') + } + expect(attachment.skills[0]?.name).toBe('feature-audit') + expect(attachment.skills[0]?.autoLoaded).toBe(true) + expect(attachment.skills[0]?.content).toContain( + 'feature flag audit workflow', + ) + }) + + test('records a pending skill gap on the first unmatched prompt (no draft file yet)', async () => { + const attachment = await getTurnZeroSkillDiscovery( + 'frobnicate zephyr ledger workflow', + [], + { agentId: undefined } as any, + ) + + expect(attachment?.type).toBe('skill_discovery') + if (attachment?.type !== 'skill_discovery') { + throw new Error('expected skill_discovery attachment') + } + expect(attachment.skills).toEqual([]) + expect(attachment.gap?.status).toBe('pending') + expect(attachment.gap?.draftPath).toBeUndefined() + }) +}) diff --git a/src/services/skillSearch/featureCheck.ts b/src/services/skillSearch/featureCheck.ts index ff8950f4b..38dcda534 100644 --- a/src/services/skillSearch/featureCheck.ts +++ b/src/services/skillSearch/featureCheck.ts @@ -1,3 +1,10 @@ -// Auto-generated stub — replace with real implementation -export {}; -export const isSkillSearchEnabled: () => boolean = () => false; +import { feature } from 'bun:bundle' + +export function isSkillSearchEnabled(): boolean { + if (process.env.SKILL_SEARCH_ENABLED === '0') return false + if (process.env.SKILL_SEARCH_ENABLED === '1') return true + if (feature('EXPERIMENTAL_SKILL_SEARCH')) { + return true + } + return false +} diff --git a/src/services/skillSearch/intentNormalize.ts b/src/services/skillSearch/intentNormalize.ts new file mode 100644 index 000000000..9073958b8 --- /dev/null +++ b/src/services/skillSearch/intentNormalize.ts @@ -0,0 +1,149 @@ +/** + * Intent Normalization Layer for Skill Search + * + * Problem: TF-IDF bag-of-words loses meaning when the user query is in Chinese + * and most skill descriptions are English. CJK bi-grams get DF=1 (language + * mismatch, not true rarity), producing IDF values that promote spurious + * matches like `prompt-optimizer` for `帮我优化代码的性能`. + * + * Fix: Before handing the query to `searchSkills()`, ask Haiku to normalize it + * into 3-6 English task/object keywords. Concatenate the normalized form with + * the original so TF-IDF sees both — English keywords carry real matching + * signal, the original text stays as a fallback. + * + * Design: + * - Turn-zero only (blocking on user input): one Haiku call per session-unique + * query. Not called in inter-turn prefetch (which repeats per tool loop). + * - Process-level cache: identical queries within a session reuse the result. + * - Graceful fallback: Haiku failure / timeout / empty → return original query. + * - ASCII-only fast path: queries without CJK characters skip the LLM entirely. + * - Feature-flagged: `SKILL_SEARCH_INTENT_ENABLED=1` to opt in. + */ + +import { queryHaiku } from '../api/claude.js' +import { asSystemPrompt } from '../../utils/systemPromptType.js' +import { logForDebugging } from '../../utils/debug.js' + +const INTENT_SYSTEM_PROMPT = `You are a query normalizer for a skill-search index. + +Given a user's natural-language request (often Chinese, possibly long), extract 3-6 English keywords that capture: +1. TASK VERB (optimize, review, debug, refactor, test, deploy, analyze, write, audit, design, research, cleanup, implement) +2. OBJECT (code, prompt, test, UI, API, database, documentation, performance, security, architecture) +3. CONTEXT/DOMAIN when clear (frontend, backend, mobile, python, go, rust, typescript) + +Output ONLY space-separated lowercase English keywords. No prose, no JSON, no punctuation, no code fences. + +Examples: +- "帮我优化代码的性能" -> optimize code performance refactor +- "研究当前代码的实现然后分析优化思路" -> analyze code research refactor architecture +- "优化 prompt 的表达" -> optimize prompt refine writing +- "帮我做 code review" -> code review audit +- "清理代码里的 TODO" -> cleanup refactor dead-code +- "重构这个模块的代码" -> refactor code modularize +- "帮我写个 Go 单元测试" -> write test golang unit + +Output ONLY keywords. Nothing else.` + +const DEFAULT_TIMEOUT_MS = 6_000 +const MAX_QUERY_CHARS = 500 +const MAX_KEYWORDS_CHARS = 120 + +/** Process-level cache. Keyed by the original (trimmed) query. */ +const cache = new Map() + +export function isIntentNormalizeEnabled(): boolean { + return process.env.SKILL_SEARCH_INTENT_ENABLED === '1' +} + +/** Only reset between tests. */ +export function clearIntentNormalizeCache(): void { + cache.clear() +} + +/** + * Normalize a user query so TF-IDF sees English task keywords. + * Returns ` ` on success, or the original string on any + * failure path. Never throws. + */ +export async function normalizeQueryIntent(query: string): Promise { + const trimmed = query.trim() + if (!trimmed) return trimmed + if (!isIntentNormalizeEnabled()) return trimmed + + // ASCII-only queries are already in the right shape for the index. + if (!/[\u4e00-\u9fff]/.test(trimmed)) return trimmed + + const cached = cache.get(trimmed) + if (cached !== undefined) return cached + + const capped = trimmed.slice(0, MAX_QUERY_CHARS) + const keywords = await callHaiku(capped) + const result = keywords ? `${trimmed} ${keywords}` : trimmed + cache.set(trimmed, result) + logForDebugging( + `[skill-search] intent normalized: "${trimmed.slice(0, 40)}" -> "${keywords}"`, + ) + return result +} + +async function callHaiku(query: string): Promise { + const timeoutMs = getTimeoutMs() + const controller = new AbortController() + const timer = setTimeout(() => controller.abort(), timeoutMs) + + try { + const response = await queryHaiku({ + systemPrompt: asSystemPrompt([INTENT_SYSTEM_PROMPT]), + userPrompt: query, + signal: controller.signal, + options: { + querySource: 'skill_search_intent', + enablePromptCaching: true, + agents: [], + isNonInteractiveSession: true, + hasAppendSystemPrompt: false, + mcpTools: [], + }, + }) + const text = extractResponseText(response?.message?.content) + return sanitizeKeywords(text) + } catch (error) { + logForDebugging(`[skill-search] intent normalize failed: ${error}`) + return '' + } finally { + clearTimeout(timer) + } +} + +function getTimeoutMs(): number { + const raw = process.env.SKILL_SEARCH_INTENT_TIMEOUT_MS + if (!raw) return DEFAULT_TIMEOUT_MS + const parsed = Number(raw) + if (!Number.isFinite(parsed) || parsed <= 0) return DEFAULT_TIMEOUT_MS + return parsed +} + +function extractResponseText(content: unknown): string { + if (!Array.isArray(content)) return '' + const parts: string[] = [] + for (const block of content) { + if (!block || typeof block !== 'object') continue + const record = block as Record + if (record.type !== 'text') continue + if (typeof record.text === 'string') parts.push(record.text) + } + return parts.join('').trim() +} + +function sanitizeKeywords(raw: string): string { + if (!raw) return '' + // Strip anything that's not a keyword character. Keep ascii letters, digits, + // hyphens, and spaces. Collapse whitespace. + const cleaned = raw + .toLowerCase() + .replace(/[^a-z0-9\- ]+/g, ' ') + .replace(/\s+/g, ' ') + .trim() + if (!cleaned) return '' + return cleaned.slice(0, MAX_KEYWORDS_CHARS) +} diff --git a/src/services/skillSearch/localSearch.ts b/src/services/skillSearch/localSearch.ts index f8139d653..5b2d2c643 100644 --- a/src/services/skillSearch/localSearch.ts +++ b/src/services/skillSearch/localSearch.ts @@ -1,3 +1,444 @@ -// Auto-generated stub — replace with real implementation -export {}; -export const clearSkillIndexCache: () => void = () => {}; +import { logForDebugging } from '../../utils/debug.js' + +export interface SkillIndexEntry { + name: string + normalizedName: string + description: string + whenToUse: string | undefined + source: string + loadedFrom: string | undefined + skillRoot: string | undefined + contentLength: number | undefined + tokens: string[] + tfVector: Map +} + +export interface SearchResult { + name: string + description: string + score: number + shortId?: string + source?: string + loadedFrom?: string + skillRoot?: string + contentLength?: number +} + +const STOP_WORDS = new Set([ + 'a', + 'an', + 'the', + 'is', + 'are', + 'was', + 'were', + 'be', + 'been', + 'being', + 'have', + 'has', + 'had', + 'do', + 'does', + 'did', + 'will', + 'would', + 'could', + 'should', + 'may', + 'might', + 'shall', + 'can', + 'need', + 'dare', + 'ought', + 'used', + 'to', + 'of', + 'in', + 'for', + 'on', + 'with', + 'at', + 'by', + 'from', + 'as', + 'into', + 'through', + 'during', + 'before', + 'after', + 'above', + 'below', + 'between', + 'out', + 'off', + 'over', + 'under', + 'again', + 'further', + 'then', + 'once', + 'here', + 'there', + 'when', + 'where', + 'why', + 'how', + 'all', + 'each', + 'every', + 'both', + 'few', + 'more', + 'most', + 'other', + 'some', + 'such', + 'no', + 'nor', + 'not', + 'only', + 'own', + 'same', + 'so', + 'than', + 'too', + 'very', + 'just', + 'because', + 'but', + 'and', + 'or', + 'if', + 'while', + 'this', + 'that', + 'these', + 'those', + 'it', + 'its', + 'i', + 'me', + 'my', + 'we', + 'our', + 'you', + 'your', + 'he', + 'him', + 'his', + 'she', + 'her', + 'they', + 'them', + 'their', + 'what', + 'which', + 'who', + 'whom', + 'use', + 'using', + 'used', +]) + +const CJK_RANGE = /[\u4e00-\u9fff\u3400-\u4dbf]/ + +function isCjk(ch: string): boolean { + return CJK_RANGE.test(ch) +} + +export function tokenize(text: string): string[] { + const tokens: string[] = [] + const lower = text.toLowerCase() + let i = 0 + + while (i < lower.length) { + if (isCjk(lower[i]!)) { + let cjkRun = '' + while (i < lower.length && isCjk(lower[i]!)) { + cjkRun += lower[i] + i++ + } + for (let j = 0; j < cjkRun.length - 1; j++) { + tokens.push(cjkRun.slice(j, j + 2)) + } + } else if (/[a-z0-9]/.test(lower[i]!)) { + let word = '' + while (i < lower.length && /[a-z0-9\-_]/.test(lower[i]!)) { + word += lower[i] + i++ + } + const cleaned = word.replace(/^[-_]+|[-_]+$/g, '') + if (cleaned && !STOP_WORDS.has(cleaned)) { + tokens.push(cleaned) + } + } else { + i++ + } + } + + return tokens +} + +function stem(word: string): string { + if (isCjk(word[0] ?? '')) return word + let s = word + if (s.endsWith('ing') && s.length > 5) s = s.slice(0, -3) + else if (s.endsWith('tion') && s.length > 5) s = s.slice(0, -4) + else if (s.endsWith('ness') && s.length > 5) s = s.slice(0, -4) + else if (s.endsWith('ment') && s.length > 5) s = s.slice(0, -4) + else if (s.endsWith('ers') && s.length > 4) s = s.slice(0, -1) + else if (s.endsWith('er') && s.length > 4) s = s.slice(0, -2) + else if (s.endsWith('es') && s.length > 4) s = s.slice(0, -2) + else if (s.endsWith('s') && s.length > 3 && !s.endsWith('ss')) + s = s.slice(0, -1) + else if (s.endsWith('ed') && s.length > 4) s = s.slice(0, -2) + else if (s.endsWith('ly') && s.length > 4) s = s.slice(0, -2) + return s +} + +export function tokenizeAndStem(text: string): string[] { + return tokenize(text).map(stem) +} + +const FIELD_WEIGHT = { + name: 3.0, + whenToUse: 2.0, + description: 1.0, + allowedTools: 0.3, +} as const + +function computeWeightedTf( + fields: { tokens: string[]; weight: number }[], +): Map { + const weighted = new Map() + for (const field of fields) { + const freq = new Map() + for (const t of field.tokens) freq.set(t, (freq.get(t) ?? 0) + 1) + let max = 1 + for (const v of freq.values()) if (v > max) max = v + for (const [term, count] of freq) { + const val = (count / max) * field.weight + const existing = weighted.get(term) ?? 0 + if (val > existing) weighted.set(term, val) + } + } + return weighted +} + +function computeIdf(index: SkillIndexEntry[]): Map { + const df = new Map() + for (const entry of index) { + const seen = new Set() + for (const t of entry.tokens) { + if (!seen.has(t)) { + df.set(t, (df.get(t) ?? 0) + 1) + seen.add(t) + } + } + } + const N = index.length + const idf = new Map() + for (const [term, count] of df) { + idf.set(term, Math.log(N / count)) + } + return idf +} + +function cosineSimilarity( + queryTfIdf: Map, + docTfIdf: Map, +): number { + let dot = 0 + let normQ = 0 + let normD = 0 + + for (const [term, qWeight] of queryTfIdf) { + const dWeight = docTfIdf.get(term) ?? 0 + dot += qWeight * dWeight + normQ += qWeight * qWeight + } + for (const dWeight of docTfIdf.values()) { + normD += dWeight * dWeight + } + + const denom = Math.sqrt(normQ) * Math.sqrt(normD) + return denom === 0 ? 0 : dot / denom +} + +const DISPLAY_MIN_SCORE = Number( + process.env.SKILL_SEARCH_DISPLAY_MIN_SCORE ?? '0.10', +) +const NAME_MATCH_BONUS = 0.4 +const NAME_MATCH_MIN_LENGTH = 4 +const CJK_MIN_BIGRAM_MATCHES = 2 + +function normalizeSkillName(name: string): string { + return name.toLowerCase().replace(/[-_]/g, ' ') +} + +function splitHyphenatedName(name: string): string[] { + return name + .toLowerCase() + .split(/[-_]/) + .filter(p => p.length >= 3) +} + +let cachedIndex: SkillIndexEntry[] | null = null +let cachedIdf: Map | null = null +let cachedCwd: string | null = null + +export function clearSkillIndexCache(): void { + cachedIndex = null + cachedIdf = null + cachedCwd = null + logForDebugging('[skill-search] index cache cleared') +} + +export async function getSkillIndex(cwd: string): Promise { + if (cachedIndex && cachedCwd === cwd) return cachedIndex + + const { getCommands } = await import('../../commands.js') + const commands = await getCommands(cwd) + + const entries: SkillIndexEntry[] = [] + for (const cmd of commands) { + if ((cmd as Record).type !== 'prompt') continue + if ((cmd as Record).disableModelInvocation) continue + + const name = cmd.name + const description = cmd.description ?? '' + const whenToUse = (cmd as Record).whenToUse as + | string + | undefined + const allowedTools = + ( + (cmd as Record).allowedTools as string[] | undefined + )?.join(' ') ?? '' + + const nameTokens = tokenizeAndStem(name) + const nameParts = splitHyphenatedName(name) + const nameWithParts = [ + ...nameTokens, + ...nameParts.map(stem).filter(t => !STOP_WORDS.has(t)), + ] + + const descTokens = tokenizeAndStem(description) + const whenTokens = tokenizeAndStem(whenToUse ?? '') + const toolsTokens = tokenizeAndStem(allowedTools) + + const allTokens = [ + ...new Set([ + ...nameWithParts, + ...descTokens, + ...whenTokens, + ...toolsTokens, + ]), + ] + + const tfVector = computeWeightedTf([ + { tokens: nameWithParts, weight: FIELD_WEIGHT.name }, + { tokens: whenTokens, weight: FIELD_WEIGHT.whenToUse }, + { tokens: descTokens, weight: FIELD_WEIGHT.description }, + { tokens: toolsTokens, weight: FIELD_WEIGHT.allowedTools }, + ]) + + entries.push({ + name, + normalizedName: normalizeSkillName(name), + description, + whenToUse, + source: ((cmd as Record).source as string) ?? 'unknown', + loadedFrom: (cmd as Record).loadedFrom as + | string + | undefined, + skillRoot: (cmd as Record).skillRoot as + | string + | undefined, + contentLength: (cmd as Record).contentLength as + | number + | undefined, + tokens: allTokens, + tfVector, + }) + } + + const idf = computeIdf(entries) + + for (const entry of entries) { + for (const [term, tf] of entry.tfVector) { + entry.tfVector.set(term, tf * (idf.get(term) ?? 0)) + } + } + + cachedIndex = entries + cachedIdf = idf + cachedCwd = cwd + logForDebugging( + `[skill-search] indexed ${entries.length} skills from ${commands.length} commands`, + ) + return entries +} + +export function searchSkills( + query: string, + index: SkillIndexEntry[], + limit = 5, +): SearchResult[] { + if (index.length === 0 || !query.trim()) return [] + + const queryTokens = tokenizeAndStem(query) + if (queryTokens.length === 0) return [] + + const queryTf = new Map() + const freq = new Map() + for (const t of queryTokens) freq.set(t, (freq.get(t) ?? 0) + 1) + let max = 1 + for (const v of freq.values()) if (v > max) max = v + for (const [term, count] of freq) queryTf.set(term, count / max) + + const idf = cachedIdf ?? computeIdf(index) + const queryTfIdf = new Map() + for (const [term, tf] of queryTf) { + queryTfIdf.set(term, tf * (idf.get(term) ?? 0)) + } + + const queryCjkTokens = queryTokens.filter(t => isCjk(t[0] ?? '')) + const queryAsciiTokens = queryTokens.filter(t => !isCjk(t[0] ?? '')) + const queryLower = query.toLowerCase().replace(/[-_]/g, ' ') + + const results: SearchResult[] = [] + for (const entry of index) { + let score = cosineSimilarity(queryTfIdf, entry.tfVector) + + if (queryCjkTokens.length > 0 && score > 0) { + const matchingCjk = queryCjkTokens.filter(t => entry.tfVector.has(t)) + if (matchingCjk.length < CJK_MIN_BIGRAM_MATCHES) { + const hasAsciiMatch = queryAsciiTokens.some(t => entry.tfVector.has(t)) + if (!hasAsciiMatch) score = 0 + } + } + + if (entry.name.length >= NAME_MATCH_MIN_LENGTH) { + if (queryLower.includes(entry.normalizedName)) { + score = Math.max(score, 0.75) + } + } + + if (score >= DISPLAY_MIN_SCORE) { + results.push({ + name: entry.name, + description: entry.description, + score, + source: entry.source, + loadedFrom: entry.loadedFrom, + skillRoot: entry.skillRoot, + contentLength: entry.contentLength, + }) + } + } + + results.sort((a, b) => b.score - a.score) + return results.slice(0, limit) +} diff --git a/src/services/skillSearch/prefetch.ts b/src/services/skillSearch/prefetch.ts index 50c8729ec..6d77f6c33 100644 --- a/src/services/skillSearch/prefetch.ts +++ b/src/services/skillSearch/prefetch.ts @@ -1,18 +1,328 @@ -// Auto-generated stub — replace with real implementation import type { Attachment } from '../../utils/attachments.js' import type { Message } from '../../types/message.js' import type { ToolUseContext } from '../../Tool.js' +import type { DiscoverySignal } from './signals.js' +import { isSkillSearchEnabled } from './featureCheck.js' +import { + getSkillIndex, + searchSkills, + type SearchResult, +} from './localSearch.js' +import { normalizeQueryIntent } from './intentNormalize.js' +import { logForDebugging } from '../../utils/debug.js' +import { readFile } from 'node:fs/promises' +import { join } from 'node:path' +import { parseFrontmatter } from '../../utils/frontmatterParser.js' -export const startSkillDiscoveryPrefetch: ( +const discoveredThisSession = new Set() +const recordedGapSignals = new Set() + +const AUTO_LOAD_MIN_SCORE = Number( + process.env.SKILL_SEARCH_AUTOLOAD_MIN_SCORE ?? '0.30', +) +const AUTO_LOAD_LIMIT = Number(process.env.SKILL_SEARCH_AUTOLOAD_LIMIT ?? '2') +const AUTO_LOAD_MAX_CHARS = Number( + process.env.SKILL_SEARCH_AUTOLOAD_MAX_CHARS ?? '12000', +) + +export function extractQueryFromMessages( + input: string | null, + messages: Message[], +): string { + const parts: string[] = [] + + if (input) parts.push(input) + + // Walk backward. In inter-turn prefetch the most recent 'user' message is + // typically a tool_result (no text block), so we must keep walking until we + // find a real user utterance with string content or a text block. + for (let i = messages.length - 1; i >= 0; i--) { + const msg = messages[i] as Record + if (msg.type !== 'user') continue + const content = msg.content + if (typeof content === 'string') { + parts.push(content.slice(0, 500)) + break + } + if (Array.isArray(content)) { + let foundText = false + for (const block of content) { + const entry = block as Record + // Skip tool_result and other non-text blocks — they carry no discovery + // signal and would return undefined here regardless. + if (entry.type && entry.type !== 'text') continue + const text = entry.text + if (typeof text === 'string' && text.trim()) { + parts.push(text.slice(0, 500)) + foundText = true + break + } + } + if (foundText) break + } + } + + return parts.join(' ') +} + +function buildDiscoveryAttachment( + skills: SkillDiscoveryResult[], + signal: DiscoverySignal, + gap?: SkillDiscoveryGap, +): Attachment { + return { + type: 'skill_discovery', + skills, + signal, + source: 'native', + gap, + } as Attachment +} + +type SkillDiscoveryResult = { + name: string + description: string + shortId?: string + score?: number + autoLoaded?: boolean + content?: string + path?: string +} + +type SkillDiscoveryGap = { + key: string + status: 'pending' | 'draft' | 'active' + draftName?: string + draftPath?: string + activeName?: string + activePath?: string +} + +async function enrichResultsForAutoLoad( + results: SearchResult[], + context: ToolUseContext, +): Promise { + let loadedCount = 0 + const enriched: SkillDiscoveryResult[] = [] + + for (const result of results) { + const base: SkillDiscoveryResult = { + name: result.name, + description: result.description, + score: result.score, + } + + if (loadedCount >= AUTO_LOAD_LIMIT || result.score < AUTO_LOAD_MIN_SCORE) { + enriched.push(base) + continue + } + + const loaded = await loadSkillContent(result) + if (!loaded) { + enriched.push(base) + continue + } + + loadedCount++ + await markAutoLoadedSkill(result.name, loaded.path, loaded.content, context) + enriched.push({ + ...base, + autoLoaded: true, + content: loaded.content, + path: loaded.path, + }) + } + + return enriched +} + +async function loadSkillContent( + result: SearchResult, +): Promise<{ path: string; content: string } | null> { + if (!result.skillRoot) return null + + const candidates = [ + join(result.skillRoot, 'SKILL.md'), + join(result.skillRoot, 'skill.md'), + ] + + for (const path of candidates) { + try { + const raw = await readFile(path, 'utf8') + return { + path, + content: parseFrontmatter(raw).content.slice(0, AUTO_LOAD_MAX_CHARS), + } + } catch { + // Try next candidate. + } + } + return null +} + +async function markAutoLoadedSkill( + name: string, + path: string, + content: string, + context: ToolUseContext, +): Promise { + try { + const { addInvokedSkill } = await import('../../bootstrap/state.js') + addInvokedSkill(name, path, content, context.agentId ?? null) + } catch { + // Best effort only. + } +} + +async function maybeRecordSkillGap( + queryText: string, + results: SearchResult[], + context: ToolUseContext, + trigger: DiscoverySignal['trigger'], +): Promise { + if (trigger !== 'user_input') return undefined + if (!queryText.trim()) return undefined + + const gapSignalKey = `${trigger}:${queryText.trim().toLowerCase()}` + if (recordedGapSignals.has(gapSignalKey)) return undefined + recordedGapSignals.add(gapSignalKey) + + try { + const [{ isSkillLearningEnabled }, { recordSkillGap }] = await Promise.all([ + import('../skillLearning/featureCheck.js'), + import('../skillLearning/skillGapStore.js'), + ]) + if (!isSkillLearningEnabled()) return undefined + const gap = await recordSkillGap({ + prompt: queryText, + cwd: + ((context as Record).cwd as string) ?? process.cwd(), + sessionId: + ((context as Record).sessionId as string) ?? + 'unknown-session', + recommendations: results, + }) + const status = gap.status + if (status !== 'pending' && status !== 'draft' && status !== 'active') { + return undefined + } + return { + key: gap.key, + status, + draftName: gap.draft?.name, + draftPath: gap.draft?.skillPath, + activeName: gap.active?.name, + activePath: gap.active?.skillPath, + } + } catch (error) { + logForDebugging(`[skill-search] skill gap learning error: ${error}`) + return undefined + } +} + +export async function startSkillDiscoveryPrefetch( input: string | null, messages: Message[], toolUseContext: ToolUseContext, -) => Promise = (async () => []); -export const collectSkillDiscoveryPrefetch: ( +): Promise { + if (!isSkillSearchEnabled()) return [] + + const startedAt = Date.now() + const queryText = extractQueryFromMessages(input, messages) + if (!queryText.trim()) return [] + + try { + const cwd = + ((toolUseContext as Record).cwd as string) ?? + process.cwd() + const index = await getSkillIndex(cwd) + const results = searchSkills(queryText, index) + + const newResults = results.filter(r => !discoveredThisSession.has(r.name)) + if (newResults.length === 0) return [] + + for (const r of newResults) discoveredThisSession.add(r.name) + + const signal: DiscoverySignal = { + trigger: 'assistant_turn', + queryText: queryText.slice(0, 200), + startedAt, + durationMs: Date.now() - startedAt, + indexSize: index.length, + method: 'tfidf', + } + + logForDebugging( + `[skill-search] prefetch found ${newResults.length} skills in ${signal.durationMs}ms`, + ) + + return [ + buildDiscoveryAttachment( + await enrichResultsForAutoLoad(newResults, toolUseContext), + signal, + ), + ] + } catch (error) { + logForDebugging(`[skill-search] prefetch error: ${error}`) + return [] + } +} + +export async function collectSkillDiscoveryPrefetch( pending: Promise, -) => Promise = (async (pending) => pending); -export const getTurnZeroSkillDiscovery: ( +): Promise { + try { + return await pending + } catch { + return [] + } +} + +export async function getTurnZeroSkillDiscovery( input: string, messages: Message[], context: ToolUseContext, -) => Promise = (async () => null); +): Promise { + if (!isSkillSearchEnabled()) return null + if (!input.trim()) return null + + const startedAt = Date.now() + + try { + const cwd = + ((context as Record).cwd as string) ?? process.cwd() + const index = await getSkillIndex(cwd) + // Intent normalization (feature-flagged, ASCII-only fast path, graceful + // fallback to original). Turn-zero is the one blocking entry — acceptable + // to add a Haiku call here since a bad match here pollutes the LLM's + // context for the entire session. + const searchQuery = await normalizeQueryIntent(input) + const results = searchSkills(searchQuery, index) + const enriched = await enrichResultsForAutoLoad(results, context) + const gap = enriched.some(result => result.autoLoaded) + ? undefined + : await maybeRecordSkillGap(input, results, context, 'user_input') + + if (results.length === 0 && !gap) return null + + for (const r of results) discoveredThisSession.add(r.name) + + const signal: DiscoverySignal = { + trigger: 'user_input', + queryText: input.slice(0, 200), + startedAt, + durationMs: Date.now() - startedAt, + indexSize: index.length, + method: 'tfidf', + } + + logForDebugging( + `[skill-search] turn-zero found ${results.length} skills in ${signal.durationMs}ms`, + ) + + return buildDiscoveryAttachment(enriched, signal, gap) + } catch (error) { + logForDebugging(`[skill-search] turn-zero error: ${error}`) + return null + } +} diff --git a/src/services/skillSearch/signals.ts b/src/services/skillSearch/signals.ts index 0b89faefe..3719eaeb1 100644 --- a/src/services/skillSearch/signals.ts +++ b/src/services/skillSearch/signals.ts @@ -1,2 +1,8 @@ -// Auto-generated stub — replace with real implementation -export type DiscoverySignal = any; +export interface DiscoverySignal { + trigger: 'user_input' | 'assistant_turn' | 'tool_call' + queryText: string + startedAt: number + durationMs: number + indexSize: number + method: 'tfidf' | 'keyword' +} diff --git a/src/services/tools/toolExecution.ts b/src/services/tools/toolExecution.ts index 97852b2ad..d1bb44da4 100644 --- a/src/services/tools/toolExecution.ts +++ b/src/services/tools/toolExecution.ts @@ -130,6 +130,34 @@ import { runPostToolUseHooks, runPreToolUseHooks, } from './toolHooks.js' +import { isSkillLearningEnabled } from '../skillLearning/featureCheck.js' + +// Cached import promise for the skill-learning wrapper — paid once, not per call. +let _skillLearningWrapperCache: + | Promise<{ + runToolCallWithSkillLearningHooks: ( + toolName: string, + input: unknown, + callContext: { sessionId?: string; turn?: number }, + invoke: () => Promise, + ) => Promise + }> + | undefined + +function getSkillLearningWrapper() { + if (!_skillLearningWrapperCache) { + _skillLearningWrapperCache = import( + '../skillLearning/toolEventObserver.js' + ).catch(err => { + // Clear the cache on rejection so the next tool call can retry the + // import instead of reusing the same rejected promise forever (which + // would break every flag-on tool call in the session). + _skillLearningWrapperCache = undefined + throw err + }) + } + return _skillLearningWrapperCache +} /** Minimum total hook duration (ms) to show inline timing summary */ export const HOOK_TIMING_DISPLAY_THRESHOLD_MS = 500 @@ -1218,22 +1246,44 @@ async function checkPermissionsAndCallTool( callInput = processedInput } try { - const result = await tool.call( - callInput, - { - ...toolUseContext, - toolUseId: toolUseID, - userModified: permissionDecision.userModified ?? false, - }, - canUseTool, - assistantMessage, - progress => { - onToolProgress({ - toolUseID: progress.toolUseID, - data: progress.data, - }) - }, - ) + // AC1 parity: wrap the single canonical tool.call site with deterministic + // tool-event observation hooks (codex review follow-up). Hooks are + // fire-and-forget inside the wrapper; tool execution is never blocked or + // altered by skill-learning plumbing. + // + // The invoke lambda is shared between the flag-on (wrapper) and flag-off + // (direct) paths so that post-call processing is never duplicated. + const invokeToolCall = () => + tool.call( + callInput, + { + ...toolUseContext, + toolUseId: toolUseID, + userModified: permissionDecision.userModified ?? false, + }, + canUseTool, + assistantMessage, + progress => { + onToolProgress({ + toolUseID: progress.toolUseID, + data: progress.data, + }) + }, + ) + // Fast-path: skip wrapper entirely when skill-learning is disabled to + // avoid even the cached-import resolution on the hot path. + const result = isSkillLearningEnabled() + ? await (async () => { + const { runToolCallWithSkillLearningHooks } = + await getSkillLearningWrapper() + return runToolCallWithSkillLearningHooks( + tool.name, + callInput, + { sessionId: (toolUseContext as { sessionId?: string }).sessionId }, + invokeToolCall, + ) + })() + : await invokeToolCall() const durationMs = Date.now() - startTime addToToolDuration(durationMs) diff --git a/src/tools.ts b/src/tools.ts index 9c956ff65..8edc638b3 100644 --- a/src/tools.ts +++ b/src/tools.ts @@ -121,6 +121,10 @@ const coordinatorModeModule = feature('COORDINATOR_MODE') const SnipTool = feature('HISTORY_SNIP') ? require('@claude-code-best/builtin-tools/tools/SnipTool/SnipTool.js').SnipTool : null +const DiscoverSkillsTool = feature('EXPERIMENTAL_SKILL_SEARCH') + ? require('@claude-code-best/builtin-tools/tools/DiscoverSkillsTool/DiscoverSkillsTool.js') + .DiscoverSkillsTool + : null const ReviewArtifactTool = feature('REVIEW_ARTIFACT') ? require('@claude-code-best/builtin-tools/tools/ReviewArtifactTool/ReviewArtifactTool.js') .ReviewArtifactTool @@ -244,6 +248,7 @@ export function getAllBaseTools(): Tools { ...(ReviewArtifactTool ? [ReviewArtifactTool] : []), ...(getPowerShellTool() ? [getPowerShellTool()] : []), ...(SnipTool ? [SnipTool] : []), + ...(DiscoverSkillsTool ? [DiscoverSkillsTool] : []), ...(process.env.NODE_ENV === 'test' ? [TestingPermissionTool] : []), ListMcpResourcesTool, ReadMcpResourceTool, diff --git a/src/utils/hooks/__tests__/skillImprovement.test.ts b/src/utils/hooks/__tests__/skillImprovement.test.ts new file mode 100644 index 000000000..8abcfabe4 --- /dev/null +++ b/src/utils/hooks/__tests__/skillImprovement.test.ts @@ -0,0 +1,26 @@ +import { afterEach, describe, expect, test } from 'bun:test' +import { isSkillImprovementEnabled } from '../skillImprovement.js' + +const originalEnv = { ...process.env } + +afterEach(() => { + process.env = { ...originalEnv } +}) + +describe('skillImprovement', () => { + test('is enabled when skill learning is enabled', () => { + process.env = { ...originalEnv } + process.env.SKILL_LEARNING_ENABLED = '1' + delete process.env.SKILL_IMPROVEMENT_ENABLED + + expect(isSkillImprovementEnabled()).toBe(true) + }) + + test('explicit skill improvement opt-out wins', () => { + process.env = { ...originalEnv } + process.env.SKILL_LEARNING_ENABLED = '1' + process.env.SKILL_IMPROVEMENT_ENABLED = '0' + + expect(isSkillImprovementEnabled()).toBe(false) + }) +}) diff --git a/src/utils/hooks/skillImprovement.ts b/src/utils/hooks/skillImprovement.ts index 45ec64062..0c7ed9c0a 100644 --- a/src/utils/hooks/skillImprovement.ts +++ b/src/utils/hooks/skillImprovement.ts @@ -7,7 +7,11 @@ import { logEvent, } from '../../services/analytics/index.js' import { queryModelWithoutStreaming } from '../../services/api/claude.js' -import { createTrace, endTrace, isLangfuseEnabled } from '../../services/langfuse/index.js' +import { + createTrace, + endTrace, + isLangfuseEnabled, +} from '../../services/langfuse/index.js' import { getSessionId } from '../../bootstrap/state.js' import { getAPIProvider } from '../model/providers.js' import { getEmptyToolPermissionContext } from '../../Tool.js' @@ -31,6 +35,16 @@ import { } from './apiQueryHookHelper.js' import { registerPostSamplingHook } from './postSamplingHooks.js' +export function isSkillImprovementEnabled(): boolean { + const explicit = process.env.SKILL_IMPROVEMENT_ENABLED + if (explicit === '0' || explicit === 'false') return false + if (explicit === '1' || explicit === 'true') return true + return ( + process.env.SKILL_LEARNING_ENABLED === '1' || + process.env.SKILL_LEARNING_ENABLED === 'true' + ) +} + const TURN_BATCH_SIZE = 5 export type SkillUpdate = { @@ -265,7 +279,9 @@ Rules: endTrace(langfuseTrace) - const responseText = extractTextContent(Array.isArray(response.message.content) ? response.message.content : []).trim() + const responseText = extractTextContent( + Array.isArray(response.message.content) ? response.message.content : [], + ).trim() const updatedContent = extractTag(responseText, 'updated_file') if (!updatedContent) {