feat: 添加 skill learning 技能学习闭环系统

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
unraid
2026-04-22 22:38:09 +08:00
parent 04c7ed4250
commit 1837df5f88
64 changed files with 11009 additions and 36 deletions

View File

@@ -0,0 +1,229 @@
import { afterEach, beforeEach, describe, expect, mock, test } from 'bun:test'
// Must mock queryHaiku before importing the module under test so the ESM
// import binding picks up the stub.
const haikuCalls: Array<{ systemPrompt: unknown; userPrompt: string }> = []
let haikuResponder: (userPrompt: string) => Promise<unknown> = async () => ({
message: { content: [{ type: 'text', text: 'optimize code performance' }] },
})
mock.module('../../api/claude.js', () => ({
queryHaiku: mock(
async (args: { systemPrompt: unknown; userPrompt: string }) => {
haikuCalls.push({
systemPrompt: args.systemPrompt,
userPrompt: args.userPrompt,
})
return haikuResponder(args.userPrompt)
},
),
}))
import {
clearIntentNormalizeCache,
isIntentNormalizeEnabled,
normalizeQueryIntent,
} from '../intentNormalize.js'
const originalEnv = { ...process.env }
beforeEach(() => {
process.env = { ...originalEnv }
haikuCalls.length = 0
haikuResponder = async () => ({
message: { content: [{ type: 'text', text: 'optimize code performance' }] },
})
clearIntentNormalizeCache()
})
afterEach(() => {
process.env = { ...originalEnv }
clearIntentNormalizeCache()
})
describe('isIntentNormalizeEnabled', () => {
test('defaults to disabled when flag is unset', () => {
delete process.env.SKILL_SEARCH_INTENT_ENABLED
expect(isIntentNormalizeEnabled()).toBe(false)
})
test('enabled when flag is "1"', () => {
process.env.SKILL_SEARCH_INTENT_ENABLED = '1'
expect(isIntentNormalizeEnabled()).toBe(true)
})
test('disabled for any value other than "1"', () => {
process.env.SKILL_SEARCH_INTENT_ENABLED = 'true'
expect(isIntentNormalizeEnabled()).toBe(false)
})
})
describe('normalizeQueryIntent — feature flag gating', () => {
test('returns query unchanged when flag is off', async () => {
delete process.env.SKILL_SEARCH_INTENT_ENABLED
const result = await normalizeQueryIntent('帮我优化代码的性能')
expect(result).toBe('帮我优化代码的性能')
expect(haikuCalls.length).toBe(0)
})
test('returns empty string as-is without calling Haiku', async () => {
process.env.SKILL_SEARCH_INTENT_ENABLED = '1'
const result = await normalizeQueryIntent('')
expect(result).toBe('')
expect(haikuCalls.length).toBe(0)
})
test('trims whitespace-only input to empty string', async () => {
process.env.SKILL_SEARCH_INTENT_ENABLED = '1'
const result = await normalizeQueryIntent(' \n ')
expect(result).toBe('')
expect(haikuCalls.length).toBe(0)
})
})
describe('normalizeQueryIntent — ASCII fast path', () => {
test('ASCII query bypasses Haiku and returns unchanged', async () => {
process.env.SKILL_SEARCH_INTENT_ENABLED = '1'
const result = await normalizeQueryIntent('optimize code performance')
expect(result).toBe('optimize code performance')
expect(haikuCalls.length).toBe(0)
})
test('ASCII query with punctuation still bypasses Haiku', async () => {
process.env.SKILL_SEARCH_INTENT_ENABLED = '1'
const result = await normalizeQueryIntent('audit feature flags for stubs')
expect(result).toBe('audit feature flags for stubs')
expect(haikuCalls.length).toBe(0)
})
})
describe('normalizeQueryIntent — CJK path calls Haiku', () => {
test('CJK query concatenates keywords returned by Haiku', async () => {
process.env.SKILL_SEARCH_INTENT_ENABLED = '1'
haikuResponder = async () => ({
message: {
content: [{ type: 'text', text: 'optimize code performance refactor' }],
},
})
const result = await normalizeQueryIntent('帮我优化代码的性能')
expect(haikuCalls.length).toBe(1)
expect(result).toBe('帮我优化代码的性能 optimize code performance refactor')
})
test('mixed CJK + ASCII query also calls Haiku', async () => {
process.env.SKILL_SEARCH_INTENT_ENABLED = '1'
haikuResponder = async () => ({
message: { content: [{ type: 'text', text: 'review code audit' }] },
})
const result = await normalizeQueryIntent('帮我做 code review')
expect(haikuCalls.length).toBe(1)
expect(result).toBe('帮我做 code review review code audit')
})
test('Haiku output gets sanitized: lowercased, punctuation stripped', async () => {
process.env.SKILL_SEARCH_INTENT_ENABLED = '1'
haikuResponder = async () => ({
message: {
content: [{ type: 'text', text: 'Optimize, Code! Performance?' }],
},
})
const result = await normalizeQueryIntent('优化代码')
expect(result).toBe('优化代码 optimize code performance')
})
})
describe('normalizeQueryIntent — graceful fallback', () => {
test('empty LLM response falls back to original query', async () => {
process.env.SKILL_SEARCH_INTENT_ENABLED = '1'
haikuResponder = async () => ({
message: { content: [{ type: 'text', text: '' }] },
})
const result = await normalizeQueryIntent('优化代码')
expect(result).toBe('优化代码')
expect(haikuCalls.length).toBe(1)
})
test('Haiku throwing an error falls back to original query', async () => {
process.env.SKILL_SEARCH_INTENT_ENABLED = '1'
haikuResponder = async () => {
throw new Error('network down')
}
const result = await normalizeQueryIntent('重构代码')
expect(result).toBe('重构代码')
expect(haikuCalls.length).toBe(1)
})
test('malformed LLM response (no text blocks) falls back', async () => {
process.env.SKILL_SEARCH_INTENT_ENABLED = '1'
haikuResponder = async () => ({ message: { content: 'not-an-array' } })
const result = await normalizeQueryIntent('优化代码')
expect(result).toBe('优化代码')
})
test('LLM responds with only punctuation -> sanitize empties it -> fallback', async () => {
process.env.SKILL_SEARCH_INTENT_ENABLED = '1'
haikuResponder = async () => ({
message: { content: [{ type: 'text', text: '!!!???' }] },
})
const result = await normalizeQueryIntent('优化代码')
expect(result).toBe('优化代码')
})
})
describe('normalizeQueryIntent — cache behavior', () => {
test('repeat calls with same query use cache (only 1 Haiku call)', async () => {
process.env.SKILL_SEARCH_INTENT_ENABLED = '1'
haikuResponder = async () => ({
message: { content: [{ type: 'text', text: 'optimize code' }] },
})
const a = await normalizeQueryIntent('帮我优化代码')
const b = await normalizeQueryIntent('帮我优化代码')
const c = await normalizeQueryIntent('帮我优化代码')
expect(a).toBe(b)
expect(b).toBe(c)
expect(haikuCalls.length).toBe(1)
})
test('different queries trigger separate Haiku calls', async () => {
process.env.SKILL_SEARCH_INTENT_ENABLED = '1'
haikuResponder = async (userPrompt: string) => ({
message: {
content: [{ type: 'text', text: `kw-for-${userPrompt.slice(0, 2)}` }],
},
})
await normalizeQueryIntent('优化代码')
await normalizeQueryIntent('重构模块')
expect(haikuCalls.length).toBe(2)
})
test('clearIntentNormalizeCache resets the cache', async () => {
process.env.SKILL_SEARCH_INTENT_ENABLED = '1'
haikuResponder = async () => ({
message: { content: [{ type: 'text', text: 'kw' }] },
})
await normalizeQueryIntent('优化代码')
clearIntentNormalizeCache()
await normalizeQueryIntent('优化代码')
expect(haikuCalls.length).toBe(2)
})
})
describe('normalizeQueryIntent — input capping', () => {
test('very long CJK input is truncated to 500 chars before sending to Haiku', async () => {
process.env.SKILL_SEARCH_INTENT_ENABLED = '1'
const longInput = '优化代码'.repeat(300) // 1200 chars
haikuResponder = async () => ({
message: { content: [{ type: 'text', text: 'optimize code' }] },
})
await normalizeQueryIntent(longInput)
expect(haikuCalls[0]?.userPrompt.length).toBeLessThanOrEqual(500)
})
})

View File

@@ -0,0 +1,221 @@
import { describe, expect, test } from 'bun:test'
import {
searchSkills,
tokenize,
tokenizeAndStem,
type SkillIndexEntry,
} from '../localSearch.js'
function makeEntry(overrides: Partial<SkillIndexEntry>): SkillIndexEntry {
const tokens = overrides.tokens ?? []
const tfVector = overrides.tfVector ?? buildTfVector(tokens)
const name = overrides.name ?? 'test-skill'
return {
name,
normalizedName:
overrides.normalizedName ?? name.toLowerCase().replace(/[-_]/g, ' '),
description: overrides.description ?? '',
whenToUse: overrides.whenToUse,
source: overrides.source ?? 'test',
loadedFrom: overrides.loadedFrom,
skillRoot: overrides.skillRoot,
contentLength: overrides.contentLength,
tokens,
tfVector,
}
}
function buildTfVector(tokens: string[]): Map<string, number> {
const freq = new Map<string, number>()
for (const t of tokens) freq.set(t, (freq.get(t) ?? 0) + 1)
const max = Math.max(...freq.values(), 1)
const tf = new Map<string, number>()
for (const [term, count] of freq) tf.set(term, count / max)
return tf
}
describe('tokenize — CJK bi-gram + ASCII', () => {
test('优化重构流程 produces five overlapping bi-grams', () => {
const tokens = tokenize('优化重构流程')
expect(tokens).toContain('优化')
expect(tokens).toContain('化重')
expect(tokens).toContain('重构')
expect(tokens).toContain('构流')
expect(tokens).toContain('流程')
expect(tokens.length).toBe(5)
})
test('pure ASCII input retains prior behaviour (regression)', () => {
const tokens = tokenize('Refactor TypeScript helpers')
expect(tokens).toContain('refactor')
expect(tokens).toContain('typescript')
expect(tokens).toContain('helpers')
})
test('mixed Chinese + English is segmented on both sides', () => {
const tokens = tokenize('优化 refactor 流程')
expect(tokens).toContain('优化')
expect(tokens).toContain('流程')
expect(tokens).toContain('refactor')
// Adjacent CJK segments are separated by ASCII content, so no cross-segment
// bi-gram should appear.
expect(tokens).not.toContain('化流')
})
test('isolated single Chinese character produces no bi-gram', () => {
const tokens = tokenize('优 is lonely')
expect(tokens.some(t => /[\u4e00-\u9fff]/.test(t))).toBe(false)
expect(tokens).toContain('lonely')
})
test('ASCII stop words still filtered in mixed input', () => {
const tokens = tokenize('the 优化 is fast')
expect(tokens).not.toContain('the')
expect(tokens).not.toContain('is')
expect(tokens).toContain('优化')
expect(tokens).toContain('fast')
})
})
describe('tokenizeAndStem — CJK passes through, ASCII stemmed', () => {
test('CJK bi-grams are not stemmed', () => {
const tokens = tokenizeAndStem('优化流程')
expect(tokens).toContain('优化')
expect(tokens).toContain('化流')
expect(tokens).toContain('流程')
})
test('ASCII words are stemmed while CJK survives', () => {
const tokens = tokenizeAndStem('refactoring 重构 helpers')
expect(tokens).toContain('refactor')
expect(tokens).toContain('重构')
expect(tokens).toContain('helper')
})
})
describe('searchSkills — CJK query against skill index', () => {
test('Chinese query against Chinese-metadata skill produces positive score', () => {
const chineseSkillTokens = tokenizeAndStem(
'refactor-cleaner 清理 重构 流程 的工具',
)
const unrelatedTokens = tokenizeAndStem(
'database-migration tool for schema upgrades',
)
const index: SkillIndexEntry[] = [
makeEntry({
name: 'refactor-cleaner',
description: '清理和重构流程辅助',
tokens: chineseSkillTokens,
}),
makeEntry({
name: 'database-migration',
description: 'schema upgrade',
tokens: unrelatedTokens,
}),
]
const results = searchSkills('优化重构流程', index, 5)
expect(results.length).toBeGreaterThan(0)
expect(results[0]?.name).toBe('refactor-cleaner')
expect(results[0]?.score).toBeGreaterThan(0)
})
test('pure English query still ranks English skill first (regression)', () => {
const refactorTokens = tokenizeAndStem(
'refactor clean typescript code helper',
)
const unrelatedTokens = tokenizeAndStem(
'security review audit vulnerabilities',
)
const index: SkillIndexEntry[] = [
makeEntry({
name: 'refactor-helper',
description: 'refactor typescript',
tokens: refactorTokens,
}),
makeEntry({
name: 'security-review',
description: 'security audit',
tokens: unrelatedTokens,
}),
]
const results = searchSkills('refactor typescript', index, 5)
expect(results[0]?.name).toBe('refactor-helper')
})
test('CJK query with only 1 matching bi-gram is filtered out (Proposal D)', () => {
const promptOptTokens = tokenizeAndStem(
'prompt-optimizer optimize prompts for better performance 当前最佳实践',
)
const otherTokens = tokenizeAndStem(
'database-migration tool for schema upgrades',
)
const index: SkillIndexEntry[] = [
makeEntry({
name: 'prompt-optimizer',
description: 'optimize prompts',
tokens: promptOptTokens,
}),
makeEntry({
name: 'database-migration',
description: 'schema upgrade',
tokens: otherTokens,
}),
]
const results = searchSkills('研究当前代码', index, 5)
expect(results.length).toBe(0)
})
test('CJK query with 2+ matching bi-grams passes the gate', () => {
const refactorTokens = tokenizeAndStem(
'refactor-cleaner 代码重构 清理冗余代码',
)
const unrelatedTokens = tokenizeAndStem(
'database-migration tool for schema upgrades',
)
const index: SkillIndexEntry[] = [
makeEntry({
name: 'refactor-cleaner',
description: '代码重构清理',
tokens: refactorTokens,
}),
makeEntry({
name: 'database-migration',
description: 'schema upgrade',
tokens: unrelatedTokens,
}),
]
const results = searchSkills('重构代码', index, 5)
expect(results.length).toBeGreaterThan(0)
expect(results[0]?.name).toBe('refactor-cleaner')
})
test('exact skill name in query boosts score (Proposal C)', () => {
const codeReviewTokens = tokenizeAndStem('code-review review code quality')
const securityTokens = tokenizeAndStem('security-review review security')
const index: SkillIndexEntry[] = [
makeEntry({
name: 'code-review',
description: 'review code quality',
tokens: codeReviewTokens,
}),
makeEntry({
name: 'security-review',
description: 'review security',
tokens: securityTokens,
}),
]
const results = searchSkills('code review', index, 5)
expect(results[0]?.name).toBe('code-review')
expect(results[0]!.score).toBeGreaterThanOrEqual(0.75)
})
})

View File

@@ -0,0 +1,123 @@
import { describe, expect, test } from 'bun:test'
import { extractQueryFromMessages } from '../prefetch.js'
import type { Message } from '../../../types/message.js'
function userText(text: string): Message {
return { type: 'user', content: text } as unknown as Message
}
function userTextBlocks(text: string): Message {
return {
type: 'user',
content: [{ type: 'text', text }],
} as unknown as Message
}
function userToolResult(id: string): Message {
return {
type: 'user',
content: [{ type: 'tool_result', tool_use_id: id, content: 'output' }],
} as unknown as Message
}
function assistantText(text: string): Message {
return { type: 'assistant', content: text } as unknown as Message
}
describe('extractQueryFromMessages — inter-turn穿透逻辑', () => {
test('null input + messages末尾是tool_result → 穿透到真实user文本', () => {
const messages: Message[] = [
userText('研究当前代码'),
assistantText('调用工具'),
userToolResult('tool_01'),
]
const query = extractQueryFromMessages(null, messages)
expect(query).toBe('研究当前代码')
})
test('null input + messages末尾是text block形式的user → 正确提取', () => {
const messages: Message[] = [
userTextBlocks('refactor the auth module'),
assistantText('thinking...'),
userToolResult('tool_02'),
]
const query = extractQueryFromMessages(null, messages)
expect(query).toBe('refactor the auth module')
})
test('null input + 连续多轮tool_result → 继续向前找到最早的user文本', () => {
const messages: Message[] = [
userText('研究当前代码'),
assistantText('第一次调用'),
userToolResult('tool_a'),
assistantText('第二次调用'),
userToolResult('tool_b'),
assistantText('第三次调用'),
userToolResult('tool_c'),
]
const query = extractQueryFromMessages(null, messages)
expect(query).toBe('研究当前代码')
})
test('null input + 空messages → 空串', () => {
const query = extractQueryFromMessages(null, [])
expect(query).toBe('')
})
test('null input + 全是tool_result (无真实文本) → 空串', () => {
const messages: Message[] = [
userToolResult('tool_a'),
userToolResult('tool_b'),
]
const query = extractQueryFromMessages(null, messages)
expect(query).toBe('')
})
test('string input + null messages → 只返回input', () => {
const query = extractQueryFromMessages('hello world', [])
expect(query).toBe('hello world')
})
test('string input + 有user文本 → 两者拼接', () => {
const messages: Message[] = [userText('previous query')]
const query = extractQueryFromMessages('new query', messages)
expect(query).toContain('new query')
expect(query).toContain('previous query')
})
test('超长user文本被截断到500字', () => {
const longText = 'a'.repeat(1000)
const messages: Message[] = [userText(longText)]
const query = extractQueryFromMessages(null, messages)
expect(query.length).toBe(500)
})
test('tool_result里含text字段 (但type=tool_result) → 必须跳过,不能误用', () => {
const messages: Message[] = [
userText('real query'),
{
type: 'user',
content: [
{
type: 'tool_result',
text: 'this is tool output masquerading as text',
},
],
} as unknown as Message,
]
const query = extractQueryFromMessages(null, messages)
expect(query).toBe('real query')
})
test('user content数组里text为空串 → 跳过空text继续找', () => {
const messages: Message[] = [
userText('real query'),
{
type: 'user',
content: [{ type: 'text', text: ' ' }],
} as unknown as Message,
]
const query = extractQueryFromMessages(null, messages)
expect(query).toBe('real query')
})
})

View File

@@ -0,0 +1,101 @@
import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
import {
existsSync,
mkdirSync,
mkdtempSync,
rmSync,
writeFileSync,
} from 'node:fs'
import { tmpdir } from 'node:os'
import { join } from 'node:path'
import { clearCommandsCache } from '../../../commands.js'
import { getTurnZeroSkillDiscovery } from '../prefetch.js'
import { clearSkillIndexCache } from '../localSearch.js'
let root: string
let previousCwd: string
const originalEnv = { ...process.env }
beforeEach(() => {
root = mkdtempSync(join(tmpdir(), 'skill-search-prefetch-'))
previousCwd = process.cwd()
process.chdir(root)
process.env = { ...originalEnv }
process.env.CLAUDE_CONFIG_DIR = join(root, 'config')
process.env.CLAUDE_SKILL_LEARNING_HOME = join(root, 'learning')
process.env.SKILL_SEARCH_ENABLED = '1'
process.env.SKILL_LEARNING_ENABLED = '1'
process.env.NODE_ENV = 'test'
process.env.ANTHROPIC_API_KEY = 'test-key'
clearCommandsCache()
clearSkillIndexCache()
})
afterEach(() => {
process.chdir(previousCwd)
process.env = { ...originalEnv }
clearCommandsCache()
clearSkillIndexCache()
try {
rmSync(root, {
recursive: true,
force: true,
maxRetries: 10,
retryDelay: 100,
})
} catch {
// Windows can keep transient handles after dynamic command loading.
}
})
describe('skill search prefetch', () => {
test('auto-loads high-confidence project skill content', async () => {
const skillDir = join(root, '.claude', 'skills', 'feature-audit')
mkdirSync(skillDir, { recursive: true })
writeFileSync(
join(skillDir, 'SKILL.md'),
[
'---',
'name: feature-audit',
'description: Audit feature flags and classify minimal implementations',
'---',
'',
'# Feature Audit',
'',
'Use the feature flag audit workflow and classify flags as stub, shell, MVP, or thin-toggle.',
].join('\n'),
)
const attachment = await getTurnZeroSkillDiscovery(
'audit feature flags for minimal implementation stubs',
[],
{ agentId: undefined } as any,
)
expect(attachment?.type).toBe('skill_discovery')
if (attachment?.type !== 'skill_discovery') {
throw new Error('expected skill_discovery attachment')
}
expect(attachment.skills[0]?.name).toBe('feature-audit')
expect(attachment.skills[0]?.autoLoaded).toBe(true)
expect(attachment.skills[0]?.content).toContain(
'feature flag audit workflow',
)
})
test('records a pending skill gap on the first unmatched prompt (no draft file yet)', async () => {
const attachment = await getTurnZeroSkillDiscovery(
'frobnicate zephyr ledger workflow',
[],
{ agentId: undefined } as any,
)
expect(attachment?.type).toBe('skill_discovery')
if (attachment?.type !== 'skill_discovery') {
throw new Error('expected skill_discovery attachment')
}
expect(attachment.skills).toEqual([])
expect(attachment.gap?.status).toBe('pending')
expect(attachment.gap?.draftPath).toBeUndefined()
})
})