feat: 添加 skill learning 技能学习闭环系统

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
unraid
2026-04-22 22:38:09 +08:00
parent 04c7ed4250
commit 1837df5f88
64 changed files with 11009 additions and 36 deletions

View File

@@ -0,0 +1,152 @@
import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
import { mkdtempSync, rmSync } from 'node:fs'
import { tmpdir } from 'node:os'
import { join } from 'node:path'
import { createInstinct } from '../instinctParser.js'
import {
classifyEvolutionTarget,
clusterInstincts,
generateAgentCandidates,
generateCommandCandidates,
generateSkillCandidates,
} from '../evolution.js'
describe('evolution', () => {
test('clusters related instincts by trigger and domain', () => {
const instincts = [
createInstinct({
trigger: 'when writing tests',
action: 'use testing-library',
confidence: 0.7,
domain: 'testing',
source: 'session-observation',
scope: 'project',
evidence: ['one'],
}),
createInstinct({
trigger: 'when writing tests',
action: 'avoid implementation mocks',
confidence: 0.8,
domain: 'testing',
source: 'session-observation',
scope: 'project',
evidence: ['two'],
}),
createInstinct({
trigger: 'when writing tests',
action: 'prefer describe/test structure',
confidence: 0.75,
domain: 'testing',
source: 'session-observation',
scope: 'project',
evidence: ['three'],
}),
]
const clusters = clusterInstincts(instincts)
expect(clusters).toHaveLength(1)
expect(clusters[0]?.averageConfidence).toBe(0.75)
})
test('classifies explicit user-invoked workflows as command candidates', () => {
expect(
classifyEvolutionTarget([
createInstinct({
trigger: 'when user asks to create migration',
action: 'run command steps',
confidence: 0.8,
domain: 'workflow',
source: 'session-observation',
scope: 'project',
evidence: ['one'],
}),
]),
).toBe('command')
})
test('generates skill candidates for high-confidence skill clusters', () => {
// Cluster-size floor (>=3) is non-negotiable post-H15 fix: a single
// high-confidence instinct must not become a persistent skill. Three
// independent observations are required to promote.
const instincts = [
createInstinct({
trigger: 'when writing tests',
action: 'use testing-library',
confidence: 0.8,
domain: 'testing',
source: 'session-observation',
scope: 'project',
evidence: ['one'],
}),
createInstinct({
trigger: 'when writing tests',
action: 'avoid implementation mocks',
confidence: 0.8,
domain: 'testing',
source: 'session-observation',
scope: 'project',
evidence: ['two'],
}),
createInstinct({
trigger: 'when writing tests',
action: 'prefer describe/test structure',
confidence: 0.8,
domain: 'testing',
source: 'session-observation',
scope: 'project',
evidence: ['three'],
}),
]
expect(generateSkillCandidates(instincts)).toHaveLength(1)
})
describe('three-path generation', () => {
let tmp: string
beforeEach(() => {
tmp = mkdtempSync(join(tmpdir(), 'skill-learning-evolve-'))
})
afterEach(() => {
rmSync(tmp, { recursive: true, force: true })
})
test('command-triggered instincts produce command candidates, not skill candidates', () => {
// Need >=3 instincts to satisfy the cluster-size floor post-H15.
const instincts = Array.from({ length: 3 }, (_, i) =>
createInstinct({
trigger: 'when user asks to create migration',
action: 'run command: pnpm run migration',
confidence: 0.85,
domain: 'workflow',
source: 'session-observation',
scope: 'project',
evidence: [`user invocation ${i}`],
}),
)
const commands = generateCommandCandidates(instincts, { cwd: tmp })
const skills = generateSkillCandidates(instincts, { cwd: tmp })
expect(commands).toHaveLength(1)
expect(skills).toHaveLength(0)
expect(commands[0]?.content).toContain('/')
})
test('four debug multi-step instincts cluster into an agent candidate', () => {
const instincts = Array.from({ length: 4 }, (_, i) =>
createInstinct({
trigger: 'when debugging multi-step regressions',
action: 'investigate stack trace, reproduce locally, and add test',
confidence: 0.82,
domain: 'debugging',
source: 'session-observation',
scope: 'project',
evidence: [`incident-${i}`],
}),
)
const agents = generateAgentCandidates(instincts, { cwd: tmp })
expect(agents).toHaveLength(1)
expect(agents[0]?.content).toContain('Playbook')
})
})
})

View File

@@ -0,0 +1,143 @@
import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
import { mkdtempSync, rmSync } from 'node:fs'
import { tmpdir } from 'node:os'
import { join } from 'node:path'
import {
loadInstincts,
prunePendingInstincts,
saveInstinct,
upsertInstinct,
} from '../instinctStore.js'
import { createInstinct } from '../instinctParser.js'
let rootDir: string
beforeEach(() => {
rootDir = mkdtempSync(join(tmpdir(), 'skill-learning-instinct-'))
})
afterEach(() => {
rmSync(rootDir, { recursive: true, force: true })
})
describe('instinctStore', () => {
test('saves and loads instincts', async () => {
await saveInstinct(
createInstinct({
trigger: 'when testing',
action: 'use testing-library',
confidence: 0.7,
domain: 'testing',
source: 'session-observation',
scope: 'project',
evidence: ['user correction'],
}),
{ rootDir, project: projectContext() },
)
const instincts = await loadInstincts({
rootDir,
project: projectContext(),
})
expect(instincts).toHaveLength(1)
expect(instincts[0]?.action).toContain('testing-library')
})
test('upsert increases confidence for confirming instincts', async () => {
const first = createInstinct({
id: 'test-instinct',
trigger: 'when testing',
action: 'prefer testing-library',
confidence: 0.7,
domain: 'testing',
source: 'session-observation',
scope: 'project',
evidence: ['one'],
})
await upsertInstinct(first, { rootDir, project: projectContext() })
const second = { ...first, evidence: ['two'] }
const updated = await upsertInstinct(second, {
rootDir,
project: projectContext(),
})
expect(updated.confidence).toBeGreaterThan(first.confidence)
expect(updated.evidence).toContain('one')
expect(updated.evidence).toContain('two')
})
test('outcome-aware upsert: failure evidence reduces confidence', async () => {
const first = createInstinct({
id: 'outcome-aware',
trigger: 'when writing tests',
action: 'use testing-library',
confidence: 0.7,
domain: 'testing',
source: 'session-observation',
scope: 'project',
evidence: ['one'],
evidenceOutcome: 'success',
})
const afterSuccess = await upsertInstinct(first, {
rootDir,
project: projectContext(),
})
await upsertInstinct(first, { rootDir, project: projectContext() })
const afterAnotherSuccess = (
await loadInstincts({ rootDir, project: projectContext() })
).find(i => i.id === 'outcome-aware')!
const failure = {
...first,
evidence: ['two'],
evidenceOutcome: 'failure' as const,
}
const afterFailure = await upsertInstinct(failure, {
rootDir,
project: projectContext(),
})
expect(afterSuccess.confidence).toBe(0.7)
expect(afterAnotherSuccess.confidence).toBeGreaterThan(
afterSuccess.confidence,
)
expect(afterFailure.confidence).toBeLessThan(afterAnotherSuccess.confidence)
})
test('prunes old pending instincts', async () => {
const old = createInstinct(
{
id: 'old-instinct',
trigger: 'old',
action: 'old',
confidence: 0.3,
domain: 'project',
source: 'session-observation',
scope: 'project',
evidence: ['old'],
},
'2020-01-01T00:00:00.000Z',
)
await saveInstinct(old, { rootDir, project: projectContext() })
const pruned = await prunePendingInstincts(30, {
rootDir,
project: projectContext(),
})
expect(pruned.map(instinct => instinct.id)).toContain('old-instinct')
expect(await loadInstincts({ rootDir, project: projectContext() })).toEqual(
[],
)
})
})
function projectContext() {
return {
projectId: 'p1',
projectName: 'project',
cwd: rootDir,
scope: 'project' as const,
source: 'global' as const,
storageDir: join(rootDir, 'projects', 'p1'),
}
}

View File

@@ -0,0 +1,81 @@
import { describe, expect, test } from 'bun:test'
import { createInstinct } from '../instinctParser.js'
import {
buildLearnedSkillName,
decideDefaultScope,
isGenericSkillName,
isValidLearnedSkillName,
normalizeSkillName,
shouldGenerateSkillFromInstincts,
} from '../learningPolicy.js'
describe('learningPolicy', () => {
test('normalizes learned skill names to lowercase kebab-case with length cap', () => {
const name = normalizeSkillName('Testing React Testing Library!!!')
expect(name).toBe('testing-react-testing-library')
expect(name.length).toBeLessThanOrEqual(64)
})
test('rejects generic learned skill names', () => {
expect(isGenericSkillName('learned-skill')).toBe(true)
expect(isValidLearnedSkillName('learned-skill')).toBe(false)
})
test('builds domain-prefixed names from instincts', () => {
const instinct = createInstinct({
trigger: 'when writing React tests',
action: 'use testing-library and avoid implementation mocks',
confidence: 0.85,
domain: 'testing',
source: 'session-observation',
scope: 'project',
evidence: ['user correction'],
})
const name = buildLearnedSkillName([instinct])
expect(name.startsWith('testing-')).toBe(true)
expect(isValidLearnedSkillName(name)).toBe(true)
})
test('uses confidence threshold before generating skills', () => {
const low = createInstinct({
trigger: 'when testing',
action: 'try a tentative pattern',
confidence: 0.3,
domain: 'testing',
source: 'session-observation',
scope: 'project',
evidence: ['weak signal'],
})
const high = { ...low, confidence: 0.8 }
expect(shouldGenerateSkillFromInstincts([low])).toBe(false)
expect(shouldGenerateSkillFromInstincts([high])).toBe(true)
})
test('promotes only global-friendly repeated instinct groups by default', () => {
const workflow = createInstinct({
trigger: 'when modifying code',
action: 'Grep then Read then Edit',
confidence: 0.8,
domain: 'workflow',
source: 'session-observation',
scope: 'project',
evidence: ['repeated workflow'],
})
const testing = createInstinct({
trigger: 'when writing React tests',
action: 'use testing-library',
confidence: 0.8,
domain: 'testing',
source: 'session-observation',
scope: 'project',
evidence: ['project convention'],
})
expect(decideDefaultScope([workflow, workflow])).toBe('global')
expect(decideDefaultScope([testing])).toBe('project')
})
})

View File

@@ -0,0 +1,108 @@
import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
import { mkdtempSync, rmSync, writeFileSync } from 'node:fs'
import { tmpdir } from 'node:os'
import { join } from 'node:path'
import {
appendObservation,
ingestTranscript,
readObservations,
scrubText,
} from '../observationStore.js'
let rootDir: string
beforeEach(() => {
rootDir = mkdtempSync(join(tmpdir(), 'skill-learning-observation-'))
})
afterEach(() => {
rmSync(rootDir, { recursive: true, force: true })
})
describe('observationStore', () => {
test('scrubs secrets and truncates large fields', () => {
const scrubbed = scrubText('api_key: sk-ant-1234567890abcdef extra', 80)
expect(scrubbed).toContain('[REDACTED]')
const truncated = scrubText(
`api_key: sk-ant-1234567890abcdef ${'x'.repeat(120)}`,
40,
)
expect(truncated).toContain('[REDACTED]')
expect(truncated).toContain('[TRUNCATED')
})
test('appends and reads project observations', async () => {
await appendObservation(
{
id: 'obs-1',
timestamp: '2026-04-16T00:00:00.000Z',
event: 'user_message',
sessionId: 's1',
projectId: 'p1',
projectName: 'project',
cwd: rootDir,
messageText: '不要 mock用 testing-library',
},
{
rootDir,
project: projectContext(),
},
)
const observations = await readObservations({
rootDir,
project: projectContext(),
})
expect(observations).toHaveLength(1)
expect(observations[0]?.messageText).toContain('testing-library')
})
test('ingests Claude transcript JSONL into observations', async () => {
const transcript = join(rootDir, 'session.jsonl')
writeFileSync(
transcript,
[
JSON.stringify({
type: 'user',
sessionId: 's1',
cwd: rootDir,
timestamp: '2026-04-16T00:00:00.000Z',
message: { role: 'user', content: '不要 mock用 testing-library' },
}),
JSON.stringify({
type: 'assistant',
sessionId: 's1',
cwd: rootDir,
timestamp: '2026-04-16T00:00:01.000Z',
message: {
role: 'assistant',
content: [
{ type: 'tool_use', name: 'Grep', input: { pattern: 'x' } },
],
},
}),
].join('\n'),
)
const observations = await ingestTranscript(transcript, {
rootDir,
project: projectContext(),
})
expect(observations.length).toBeGreaterThanOrEqual(2)
expect(observations.map(o => o.event)).toContain('user_message')
expect(observations.map(o => o.event)).toContain('tool_start')
})
})
function projectContext() {
return {
projectId: 'p1',
projectName: 'project',
cwd: rootDir,
scope: 'project' as const,
source: 'global' as const,
storageDir: join(rootDir, 'projects', 'p1'),
}
}

View File

@@ -0,0 +1,135 @@
import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
import {
getActiveObserverBackend,
listObserverBackends,
registerObserverBackend,
resolveDefaultObserverBackend,
setActiveObserverBackend,
analyzeWithActiveBackend,
type ObserverBackend,
} from '../observerBackend.js'
import { analyzeObservations } from '../sessionObserver.js'
import type { StoredSkillObservation } from '../observationStore.js'
function obs(partial: Partial<StoredSkillObservation>): StoredSkillObservation {
return {
id: partial.id ?? crypto.randomUUID(),
timestamp: '2026-04-16T00:00:00.000Z',
event: partial.event ?? 'user_message',
sessionId: 's1',
projectId: 'p1',
projectName: 'project',
cwd: process.cwd(),
...partial,
}
}
const originalBackendName = getActiveObserverBackend().name
afterEach(() => {
setActiveObserverBackend(originalBackendName)
})
describe('observerBackend', () => {
test('registers heuristic and llm backends by default', () => {
const names = listObserverBackends()
expect(names).toContain('heuristic')
expect(names).toContain('llm')
})
test('resolveDefaultObserverBackend honours SKILL_LEARNING_OBSERVER_BACKEND env', () => {
// Adversarial probe for the env switch — if this regresses, the LLM
// backend would be silently unreachable in production even with the env
// variable set, which was the original AC2 gap.
const original = process.env.SKILL_LEARNING_OBSERVER_BACKEND
try {
process.env.SKILL_LEARNING_OBSERVER_BACKEND = 'llm'
resolveDefaultObserverBackend()
expect(getActiveObserverBackend().name).toBe('llm')
// Unknown backend names must not crash; the current active stays.
process.env.SKILL_LEARNING_OBSERVER_BACKEND = 'nonexistent'
resolveDefaultObserverBackend()
expect(getActiveObserverBackend().name).toBe('llm')
// Clearing the env leaves whatever was active — explicit opt-out is
// setActiveObserverBackend, not clearing the env.
delete process.env.SKILL_LEARNING_OBSERVER_BACKEND
resolveDefaultObserverBackend()
expect(getActiveObserverBackend().name).toBe('llm')
} finally {
if (original === undefined) {
delete process.env.SKILL_LEARNING_OBSERVER_BACKEND
} else {
process.env.SKILL_LEARNING_OBSERVER_BACKEND = original
}
}
})
test('heuristic backend preserves existing correction detection', async () => {
setActiveObserverBackend('heuristic')
const candidates = await analyzeWithActiveBackend([
obs({ messageText: '不要直接 mock用 testing-library' }),
])
expect(candidates).toHaveLength(1)
expect(candidates[0]?.action).toContain('testing-library')
})
test('llm backend short-circuits to [] on empty observations', async () => {
// With the real Haiku-backed implementation the backend only calls
// queryHaiku when there are observations to analyse. Empty-input short
// circuit guarantees the no-cost path needed for hot loops.
setActiveObserverBackend('llm')
const candidates = await analyzeWithActiveBackend([])
expect(candidates).toEqual([])
})
test('analyzeObservations routes to active backend (sync path throws for async backends)', () => {
// Heuristic backend is sync — analyzeObservations works directly.
const previousCount = analyzeObservations([
obs({ messageText: '不要直接 mock用 testing-library' }),
]).length
expect(previousCount).toBe(1)
// The LLM backend is now a real async implementation (queryHaiku). The
// sync `analyzeObservations` helper refuses to return a pending Promise
// and throws with a clear instruction to use `analyzeWithActiveBackend`
// instead — prove the routing reached the async backend by catching
// that exact error.
setActiveObserverBackend('llm')
expect(() =>
analyzeObservations([
obs({ messageText: '不要直接 mock用 testing-library' }),
]),
).toThrow(/Promise/)
})
test('custom backends can be registered and switched', async () => {
const custom: ObserverBackend = {
name: 'custom-test',
analyze() {
return [
{
trigger: 'custom trigger',
action: 'custom action',
confidence: 0.9,
domain: 'project',
source: 'session-observation',
scope: 'project',
evidence: ['custom evidence'],
},
]
},
}
registerObserverBackend(custom)
setActiveObserverBackend('custom-test')
const candidates = await analyzeWithActiveBackend([])
expect(candidates).toHaveLength(1)
expect(candidates[0]?.trigger).toBe('custom trigger')
})
test('switching to an unknown backend throws', () => {
expect(() => setActiveObserverBackend('does-not-exist')).toThrow()
})
})

View File

@@ -0,0 +1,160 @@
import { afterAll, beforeEach, describe, expect, test } from 'bun:test'
import { existsSync, mkdirSync, mkdtempSync, readFileSync, rmSync } from 'fs'
import { tmpdir } from 'os'
import { join } from 'path'
import { execFileSync } from 'child_process'
import { getClaudeConfigHomeDir } from '../../../utils/envUtils.js'
import {
getProjectContextPath,
getProjectsRegistryPath,
getSkillLearningRootDir,
resolveProjectContext,
} from '../projectContext.js'
import { isSkillLearningEnabled } from '../featureCheck.js'
const tempBase = mkdtempSync(join(tmpdir(), 'skill-learning-context-test-'))
const originalEnv = { ...process.env }
beforeEach(() => {
resetEnv()
const tempHome = mkdtempSync(join(tempBase, 'home-'))
process.env.CLAUDE_CONFIG_DIR = tempHome
})
afterAll(() => {
process.env = { ...originalEnv }
clearConfigDirCache()
rmSync(tempBase, { recursive: true, force: true })
})
describe('isSkillLearningEnabled', () => {
test('honors explicit SKILL_LEARNING_ENABLED overrides', () => {
process.env.SKILL_LEARNING_ENABLED = '1'
expect(isSkillLearningEnabled()).toBe(true)
process.env.SKILL_LEARNING_ENABLED = '0'
expect(isSkillLearningEnabled()).toBe(false)
})
test('honors FEATURE_SKILL_LEARNING env fallback', () => {
delete process.env.SKILL_LEARNING_ENABLED
process.env.FEATURE_SKILL_LEARNING = '1'
expect(isSkillLearningEnabled()).toBe(true)
process.env.FEATURE_SKILL_LEARNING = '0'
expect(isSkillLearningEnabled()).toBe(false)
})
})
describe('resolveProjectContext', () => {
test('prefers CLAUDE_PROJECT_DIR and writes registry files', () => {
const cwd = mkdirTempDir('cwd-')
const projectDir = mkdirTempDir('project-')
process.env.CLAUDE_PROJECT_DIR = projectDir
const context = resolveProjectContext(cwd)
expect(context.source).toBe('claude_project_dir')
expect(context.scope).toBe('project')
expect(context.projectRoot).toBe(projectDir)
expect(context.projectName).toBe(lastPathSegment(projectDir))
expect(context.storageDir).toContain(context.projectId)
expect(existsSync(getProjectsRegistryPath())).toBe(true)
expect(existsSync(getProjectContextPath(context.projectId))).toBe(true)
const registry = readJson(getProjectsRegistryPath())
expect(registry.projects[context.projectId].source).toBe(
'claude_project_dir',
)
})
test('uses git remote as stable identity across different checkouts', () => {
const first = createGitRepo('remote-a-', 'https://example.com/acme/app.git')
const second = createGitRepo(
'remote-b-',
'https://example.com/acme/app.git',
)
const firstContext = resolveProjectContext(first)
const secondContext = resolveProjectContext(second)
expect(firstContext.source).toBe('git_remote')
expect(secondContext.source).toBe('git_remote')
expect(firstContext.projectId).toBe(secondContext.projectId)
expect(firstContext.gitRemote).toBe('https://example.com/acme/app')
expect(firstContext.projectName).toBe('app')
const registry = readJson(getProjectsRegistryPath())
expect(Object.keys(registry.projects)).toContain(firstContext.projectId)
expect(registry.projects[firstContext.projectId].gitRemote).toBe(
'https://example.com/acme/app',
)
})
test('falls back to git root when origin remote is missing', () => {
const repo = createGitRepo('root-only-')
const context = resolveProjectContext(join(repo, 'nested'))
expect(context.source).toBe('git_root')
expect(context.scope).toBe('project')
expect(context.projectRoot).toBe(repo)
expect(context.projectName).toBe(lastPathSegment(repo))
})
test('falls back to global context outside a git repository', () => {
const cwd = mkdirTempDir('not-git-')
const context = resolveProjectContext(cwd)
expect(context.source).toBe('global')
expect(context.scope).toBe('global')
expect(context.projectId).toBe('global')
expect(context.projectName).toBe('Global')
expect(context.storageDir).toBe(join(getSkillLearningRootDir(), 'global'))
expect(existsSync(getProjectContextPath('global'))).toBe(true)
})
})
function createGitRepo(prefix: string, remote?: string): string {
const dir = mkdirTempDir(prefix)
mkdirSync(join(dir, 'nested'), { recursive: true })
execFileSync('git', ['init'], { cwd: dir, stdio: 'ignore' })
if (remote) {
execFileSync('git', ['remote', 'add', 'origin', remote], {
cwd: dir,
stdio: 'ignore',
})
}
return dir
}
function mkdirTempDir(prefix: string): string {
return mkdtempSync(join(tempBase, prefix))
}
function readJson(path: string): any {
return JSON.parse(readFileSync(path, 'utf8'))
}
function lastPathSegment(path: string): string {
return path.split(/[\\/]/).filter(Boolean).at(-1) ?? path
}
function resetEnv(): void {
process.env = { ...originalEnv }
delete process.env.CLAUDE_PROJECT_DIR
delete process.env.SKILL_LEARNING_ENABLED
delete process.env.FEATURE_SKILL_LEARNING
clearConfigDirCache()
}
function clearConfigDirCache(): void {
if (
typeof getClaudeConfigHomeDir === 'function' &&
'cache' in getClaudeConfigHomeDir
) {
;(getClaudeConfigHomeDir as any).cache.clear?.()
}
}

View File

@@ -0,0 +1,144 @@
import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
import { mkdtempSync, rmSync } from 'node:fs'
import { tmpdir } from 'node:os'
import { join } from 'node:path'
import { createInstinct } from '../instinctParser.js'
import { saveInstinct, loadInstincts } from '../instinctStore.js'
import {
checkPromotion,
findPromotionCandidates,
resetPromotionBookkeeping,
} from '../promotion.js'
import type { SkillLearningProjectContext } from '../types.js'
let rootDir: string
function projectCtx(projectId: string): SkillLearningProjectContext {
return {
projectId,
projectName: projectId,
scope: 'project',
source: 'git_root',
cwd: rootDir,
storageDir: join(rootDir, 'projects', projectId),
}
}
function globalCtx(): SkillLearningProjectContext {
return {
projectId: 'global',
projectName: 'Global',
scope: 'global',
source: 'global',
cwd: rootDir,
storageDir: join(rootDir, 'global'),
}
}
beforeEach(() => {
rootDir = mkdtempSync(join(tmpdir(), 'skill-learning-promote-'))
resetPromotionBookkeeping()
})
afterEach(() => {
rmSync(rootDir, { recursive: true, force: true })
})
describe('promotion', () => {
test('findPromotionCandidates returns instincts with 2+ projects and avg>=0.8', () => {
const mk = (projectId: string) =>
createInstinct({
id: 'shared-trigger',
trigger: 'shared',
action: 'shared',
confidence: 0.85,
domain: 'workflow',
source: 'session-observation',
scope: 'project',
projectId,
projectName: projectId,
evidence: ['ev'],
status: 'active',
})
const candidates = findPromotionCandidates([mk('alpha'), mk('beta')])
expect(candidates).toHaveLength(1)
expect(candidates[0]?.projectIds.sort()).toEqual(['alpha', 'beta'])
})
test('checkPromotion writes a global copy for cross-project instincts', async () => {
const mk = (projectId: string) =>
createInstinct({
id: 'shared-id',
trigger: 'shared',
action: 'shared',
confidence: 0.85,
domain: 'workflow',
source: 'session-observation',
scope: 'project',
projectId,
projectName: projectId,
evidence: ['ev'],
status: 'active',
})
await saveInstinct(mk('alpha'), { rootDir, project: projectCtx('alpha') })
await saveInstinct(mk('beta'), { rootDir, project: projectCtx('beta') })
const promoted = await checkPromotion({ rootDir })
expect(promoted.map(p => p.instinctId)).toContain('shared-id')
const globalInstincts = await loadInstincts({
rootDir,
scope: 'global',
project: globalCtx(),
})
const global = globalInstincts.find(i => i.id === 'shared-id')
expect(global).toBeDefined()
expect(global?.scope).toBe('global')
expect(global?.confidence).toBeGreaterThanOrEqual(0.8)
})
test('checkPromotion is idempotent within a session', async () => {
const mk = (projectId: string) =>
createInstinct({
id: 'repeat-id',
trigger: 'repeat',
action: 'repeat',
confidence: 0.85,
domain: 'workflow',
source: 'session-observation',
scope: 'project',
projectId,
projectName: projectId,
evidence: ['ev'],
status: 'active',
})
await saveInstinct(mk('alpha'), { rootDir, project: projectCtx('alpha') })
await saveInstinct(mk('beta'), { rootDir, project: projectCtx('beta') })
const first = await checkPromotion({ rootDir })
const second = await checkPromotion({ rootDir })
expect(first).toHaveLength(1)
expect(second).toHaveLength(0)
})
test('does not promote when only one project has the instinct', async () => {
const instinct = createInstinct({
id: 'solo',
trigger: 'solo',
action: 'solo',
confidence: 0.9,
domain: 'workflow',
source: 'session-observation',
scope: 'project',
projectId: 'alpha',
projectName: 'alpha',
evidence: ['ev'],
status: 'active',
})
await saveInstinct(instinct, { rootDir, project: projectCtx('alpha') })
const promoted = await checkPromotion({ rootDir })
expect(promoted).toEqual([])
})
})

View File

@@ -0,0 +1,143 @@
import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
import { existsSync, mkdtempSync, rmSync } from 'node:fs'
import { tmpdir } from 'node:os'
import { join } from 'node:path'
import {
resetSkillLearningConfig,
setSkillLearningConfigForTest,
} from '../config.js'
import { loadInstincts, readObservations } from '../index.js'
import {
resetRuntimeObserverForTest,
runSkillLearningPostSampling,
} from '../runtimeObserver.js'
let root: string
let previousCwd: string
const originalEnv = { ...process.env }
beforeEach(() => {
root = mkdtempSync(join(tmpdir(), 'skill-learning-runtime-'))
previousCwd = process.cwd()
process.chdir(root)
process.env = { ...originalEnv }
process.env.CLAUDE_SKILL_LEARNING_HOME = join(root, 'learning-home')
process.env.CLAUDE_CONFIG_DIR = join(root, 'config')
process.env.SKILL_LEARNING_ENABLED = '1'
process.env.NODE_ENV = 'test'
setSkillLearningConfigForTest({ minConfidence: 0.3, minClusterSize: 1 })
resetRuntimeObserverForTest()
})
afterEach(() => {
process.chdir(previousCwd)
process.env = { ...originalEnv }
resetSkillLearningConfig()
rmSync(root, { recursive: true, force: true })
})
describe('runtimeObserver', () => {
test('records and learns from post-sampling main-thread messages', async () => {
await runSkillLearningPostSampling({
querySource: 'repl_main_thread',
messages: [
{
type: 'user',
uuid: 'u1' as any,
message: { role: 'user', content: '不要 mock用 testing-library' },
},
],
systemPrompt: [] as any,
userContext: {},
systemContext: {},
toolUseContext: { agentId: undefined } as any,
})
const observations = await readObservations({
rootDir: process.env.CLAUDE_SKILL_LEARNING_HOME,
project: {
projectId: 'global',
projectName: 'global',
cwd: root,
scope: 'global',
source: 'global',
storageDir: join(process.env.CLAUDE_SKILL_LEARNING_HOME!, 'global'),
},
})
const instincts = await loadInstincts({
rootDir: process.env.CLAUDE_SKILL_LEARNING_HOME,
project: {
projectId: 'global',
projectName: 'global',
cwd: root,
scope: 'global',
source: 'global',
storageDir: join(process.env.CLAUDE_SKILL_LEARNING_HOME!, 'global'),
},
})
expect(observations).toHaveLength(1)
expect(instincts[0]?.action).toContain('testing-library')
})
test('skips subagent sessions', async () => {
await runSkillLearningPostSampling({
querySource: 'repl_main_thread',
messages: [
{
type: 'user',
uuid: 'u1' as any,
message: { role: 'user', content: '不要 mock用 testing-library' },
},
],
systemPrompt: [] as any,
userContext: {},
systemContext: {},
toolUseContext: { agentId: 'agent-1' } as any,
})
const observations = await readObservations({
rootDir: process.env.CLAUDE_SKILL_LEARNING_HOME,
})
expect(observations).toEqual([])
})
test('auto-evolves repeated corrections into an active learned skill', async () => {
await runSkillLearningPostSampling({
querySource: 'repl_main_thread',
messages: [
{
type: 'user',
uuid: 'u1' as any,
message: { role: 'user', content: '不要 mock用 testing-library' },
},
{
type: 'user',
uuid: 'u2' as any,
message: { role: 'user', content: '不要 mock用 testing-library' },
},
{
type: 'user',
uuid: 'u3' as any,
message: { role: 'user', content: '不要 mock用 testing-library' },
},
],
systemPrompt: [] as any,
userContext: {},
systemContext: {},
toolUseContext: { agentId: undefined } as any,
})
expect(
existsSync(
join(
root,
'.claude',
'skills',
'testing-choosing-between-mock-testing-library',
'SKILL.md',
),
),
).toBe(true)
})
})

View File

@@ -0,0 +1,103 @@
import { describe, expect, test } from 'bun:test'
import { analyzeObservations } from '../sessionObserver.js'
import type { StoredSkillObservation } from '../observationStore.js'
function obs(partial: Partial<StoredSkillObservation>): StoredSkillObservation {
return {
id: partial.id ?? crypto.randomUUID(),
timestamp: '2026-04-16T00:00:00.000Z',
event: partial.event ?? 'user_message',
sessionId: 's1',
projectId: 'p1',
projectName: 'project',
cwd: process.cwd(),
...partial,
}
}
describe('sessionObserver', () => {
test('extracts user correction instincts', () => {
const instincts = analyzeObservations([
obs({ messageText: '不要直接 mock用 testing-library' }),
])
expect(instincts).toHaveLength(1)
expect(instincts[0]?.domain).toBe('testing')
expect(instincts[0]?.action).toContain('testing-library')
})
test('extracts repeated Grep -> Read -> Edit workflow instinct', () => {
const seq = ['Grep', 'Read', 'Edit', 'Grep', 'Read', 'Edit']
const instincts = analyzeObservations(
seq.map((toolName, index) =>
obs({ id: `o${index}`, event: 'tool_start', toolName }),
),
)
expect(instincts.some(instinct => instinct.domain === 'workflow')).toBe(
true,
)
})
test('does not invent instincts without clear patterns', () => {
expect(analyzeObservations([obs({ messageText: 'hello' })])).toEqual([])
})
test('snapshots recent tool outcome on correction candidates', () => {
const [instinct] = analyzeObservations([
obs({
id: 'o0',
event: 'tool_complete',
toolName: 'Edit',
outcome: 'failure',
}),
obs({
id: 'o1',
event: 'user_message',
messageText: '不要直接 mock用 testing-library',
}),
])
expect(instinct?.evidenceOutcome).toBe('failure')
})
test('marks tool-error-resolution candidates as success outcome', () => {
const instincts = analyzeObservations([
obs({
id: 'o0',
event: 'tool_complete',
toolName: 'Grep',
outcome: 'failure',
}),
obs({
id: 'o1',
event: 'tool_complete',
toolName: 'Grep',
outcome: 'success',
}),
])
const resolution = instincts.find(i => i.domain === 'debugging')
expect(resolution?.evidenceOutcome).toBe('success')
})
test('leaves evidenceOutcome undefined when no prior tool_complete exists', () => {
const [instinct] = analyzeObservations([
obs({
id: 'o0',
event: 'user_message',
messageText: '不要直接 mock用 testing-library',
}),
])
expect(instinct?.evidenceOutcome).toBeUndefined()
})
test('single "always/must" convention message gets confidence <= 0.4', () => {
const instincts = analyzeObservations([
obs({ messageText: 'always use pnpm' }),
])
expect(instincts.length).toBeGreaterThan(0)
for (const instinct of instincts) {
expect(instinct.confidence).toBeLessThanOrEqual(0.4)
}
})
})

View File

@@ -0,0 +1,100 @@
import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
import {
existsSync,
mkdirSync,
mkdtempSync,
readFileSync,
rmSync,
} from 'node:fs'
import { tmpdir } from 'node:os'
import { join } from 'node:path'
import {
generateOrMergeSkillDraft,
writeLearnedSkill,
} from '../skillGenerator.js'
import { createInstinct } from '../instinctParser.js'
let root: string
let skillsRoot: string
beforeEach(() => {
root = mkdtempSync(join(tmpdir(), 'skill-learning-dedup-'))
skillsRoot = join(root, '.claude', 'skills')
mkdirSync(skillsRoot, { recursive: true })
})
afterEach(() => {
rmSync(root, { recursive: true, force: true })
})
function testingInstinct(evidence: string) {
return createInstinct({
trigger: 'when writing tests',
action: 'use testing-library',
confidence: 0.85,
domain: 'testing',
source: 'session-observation',
scope: 'project',
evidence: [evidence],
status: 'active',
})
}
describe('skill dedup', () => {
test('first instinct cluster creates a new skill', async () => {
const outcome = await generateOrMergeSkillDraft(
[testingInstinct('first')],
{ cwd: root },
[skillsRoot],
)
expect(outcome.action).toBe('create')
if (outcome.action === 'create') {
await writeLearnedSkill(outcome.draft)
}
})
test('second run with same trigger appends evidence instead of writing a duplicate', async () => {
const first = await generateOrMergeSkillDraft(
[testingInstinct('first')],
{ cwd: root },
[skillsRoot],
)
expect(first.action).toBe('create')
if (first.action === 'create') {
await writeLearnedSkill(first.draft)
}
// Second pass — same cluster should collide with the skill we just wrote.
const second = await generateOrMergeSkillDraft(
[testingInstinct('second')],
{ cwd: root },
[skillsRoot],
)
expect(second.action).toBe('append-evidence')
if (second.action === 'append-evidence') {
expect(second.overlap).toBeGreaterThanOrEqual(0.8)
const body = readFileSync(second.appendedPath, 'utf8')
expect(body).toContain('Learned evidence')
expect(body).toContain('- second')
}
// There must still be only one SKILL.md file on disk.
const files = findSkillMdFiles(skillsRoot)
expect(files).toHaveLength(1)
})
})
function findSkillMdFiles(dir: string): string[] {
const { readdirSync, statSync } =
require('node:fs') as typeof import('node:fs')
const results: string[] = []
for (const entry of readdirSync(dir)) {
const full = join(dir, entry)
if (statSync(full).isDirectory()) {
results.push(...findSkillMdFiles(full))
} else if (entry === 'SKILL.md' && existsSync(full)) {
results.push(full)
}
}
return results
}

View File

@@ -0,0 +1,360 @@
import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
import {
existsSync,
mkdtempSync,
readFileSync,
rmSync,
writeFileSync,
mkdirSync,
} from 'node:fs'
import { tmpdir } from 'node:os'
import { join } from 'node:path'
import {
findGapKeyByDraftPath,
readSkillGaps,
recordDraftHit,
recordSkillGap,
rejectSkillGap,
shouldPromoteToActive,
shouldPromoteToDraft,
type SkillGapRecord,
} from '../skillGapStore.js'
import type { SkillLearningProjectContext } from '../types.js'
let root: string
let project: SkillLearningProjectContext
beforeEach(() => {
root = mkdtempSync(join(tmpdir(), 'skill-gap-store-'))
project = {
projectId: 'global',
projectName: 'global',
scope: 'global',
source: 'global',
cwd: root,
storageDir: join(root, 'global'),
projectRoot: root,
}
})
afterEach(() => {
try {
rmSync(root, {
recursive: true,
force: true,
maxRetries: 10,
retryDelay: 100,
})
} catch {
// Temp cleanup best-effort; Windows may hold transient handles.
}
})
function draftsDir(): string {
return join(root, '.claude', 'skills', '.drafts')
}
describe('recordSkillGap — P0-1 state machine', () => {
test('first occurrence lands in pending and writes no skill file', async () => {
const gap = await recordSkillGap({
prompt: 'Refactor the data pipeline please',
cwd: root,
project,
rootDir: root,
})
expect(gap.status).toBe('pending')
expect(gap.count).toBe(1)
expect(gap.draft).toBeUndefined()
expect(gap.active).toBeUndefined()
expect(existsSync(draftsDir())).toBe(false)
})
test('single Chinese exhortation stays pending — no draft, no active', async () => {
const gap = await recordSkillGap({
prompt: '以后必须严格检查类型',
cwd: root,
project,
rootDir: root,
})
expect(gap.status).toBe('pending')
expect(gap.draft).toBeUndefined()
expect(gap.active).toBeUndefined()
})
test('second occurrence promotes to draft but not active', async () => {
const prompt = 'explain the build pipeline'
await recordSkillGap({ prompt, cwd: root, project, rootDir: root })
const second = await recordSkillGap({
prompt,
cwd: root,
project,
rootDir: root,
})
expect(second.status).toBe('draft')
expect(second.count).toBe(2)
expect(second.draft?.type).toBe('draft')
expect(second.active).toBeUndefined()
expect(existsSync(second.draft!.skillPath)).toBe(true)
})
test('single strong English exhortation ("must never") stays pending', async () => {
const gap = await recordSkillGap({
prompt: 'You must never commit secrets to git',
cwd: root,
project,
rootDir: root,
})
expect(gap.status).toBe('pending')
expect(gap.count).toBe(1)
expect(gap.draft).toBeUndefined()
expect(gap.active).toBeUndefined()
})
test('reaching count >= 4 promotes an existing draft to active', async () => {
const prompt = 'clean up abandoned feature flags'
for (let i = 0; i < 3; i++) {
await recordSkillGap({ prompt, cwd: root, project, rootDir: root })
}
const fourth = await recordSkillGap({
prompt,
cwd: root,
project,
rootDir: root,
})
expect(fourth.status).toBe('active')
expect(fourth.count).toBe(4)
expect(fourth.draft).toBeDefined()
expect(fourth.active?.type).toBe('active')
expect(existsSync(fourth.active!.skillPath)).toBe(true)
})
test('rejected gaps do not regenerate artefacts on subsequent calls', async () => {
const prompt = 'please format the README differently'
await recordSkillGap({ prompt, cwd: root, project, rootDir: root })
const promoted = await recordSkillGap({
prompt,
cwd: root,
project,
rootDir: root,
})
expect(promoted.status).toBe('draft')
await rejectSkillGap(promoted.key, project, root)
const afterReject = await recordSkillGap({
prompt,
cwd: root,
project,
rootDir: root,
})
expect(afterReject.status).toBe('rejected')
expect(afterReject.count).toBe(3)
expect(afterReject.active).toBeUndefined()
})
})
describe('recordDraftHit — draft hits escalation (P1-4 contract)', () => {
test('draftHits reaching 2 escalates a draft to active', async () => {
const prompt = 'improve error handling in loader.ts'
await recordSkillGap({ prompt, cwd: root, project, rootDir: root })
const drafted = await recordSkillGap({
prompt,
cwd: root,
project,
rootDir: root,
})
expect(drafted.status).toBe('draft')
// Distinct session IDs — recordDraftHit enforces one hit per session so
// a single session can't flip the draftHits>=2 active gate alone
await recordDraftHit(drafted.key, project, root, 'session-a')
const afterSecondHit = await recordDraftHit(
drafted.key,
project,
root,
'session-b',
)
expect(afterSecondHit?.draftHits).toBe(2)
expect(afterSecondHit?.status).toBe('active')
expect(afterSecondHit?.active?.type).toBe('active')
})
test('first draft hit does not promote to active', async () => {
const prompt = 'add missing null checks in handler'
await recordSkillGap({ prompt, cwd: root, project, rootDir: root })
const drafted = await recordSkillGap({
prompt,
cwd: root,
project,
rootDir: root,
})
const afterOneHit = await recordDraftHit(drafted.key, project, root)
expect(afterOneHit?.draftHits).toBe(1)
expect(afterOneHit?.status).toBe('draft')
expect(afterOneHit?.active).toBeUndefined()
})
test('findGapKeyByDraftPath resolves the correct gap for an existing draft', async () => {
const prompt = 'restructure the module boundaries'
await recordSkillGap({ prompt, cwd: root, project, rootDir: root })
const drafted = await recordSkillGap({
prompt,
cwd: root,
project,
rootDir: root,
})
expect(drafted.draft?.skillPath).toBeTruthy()
const foundKey = await findGapKeyByDraftPath(
drafted.draft!.skillPath,
project,
root,
)
expect(foundKey).toBe(drafted.key)
})
test('findGapKeyByDraftPath returns undefined for unknown paths', async () => {
const result = await findGapKeyByDraftPath(
'/nowhere/.claude/skills/.drafts/mystery/SKILL.md',
project,
root,
)
expect(result).toBeUndefined()
})
test('recordDraftHit is a no-op on pending gaps', async () => {
const gap = await recordSkillGap({
prompt: 'investigate the mysterious cache bug',
cwd: root,
project,
rootDir: root,
})
const updated = await recordDraftHit(gap.key, project, root)
expect(updated?.status).toBe('pending')
expect(updated?.draftHits).toBe(0)
})
})
describe('shouldPromoteToDraft / shouldPromoteToActive', () => {
test('shouldPromoteToDraft requires count >= 2 (strong signal no longer bypasses)', () => {
const base: SkillGapRecord = {
key: 'k',
prompt: 'refactor this',
count: 1,
draftHits: 0,
draftHitSessions: [],
status: 'pending',
sessionId: 's',
cwd: root,
projectId: 'global',
projectName: 'global',
recommendations: [],
createdAt: new Date().toISOString(),
updatedAt: new Date().toISOString(),
}
expect(shouldPromoteToDraft(base)).toBe(false)
expect(shouldPromoteToDraft({ ...base, count: 2 })).toBe(true)
// Single strong-signal prompt no longer promotes — must also repeat.
expect(
shouldPromoteToDraft({ ...base, prompt: '必须使用 testing-library' }),
).toBe(false)
})
test('shouldPromoteToActive requires a draft plus threshold', () => {
const withDraft: SkillGapRecord = {
key: 'k',
prompt: 'refactor',
count: 3,
draftHits: 0,
draftHitSessions: [],
status: 'draft',
sessionId: 's',
cwd: root,
projectId: 'global',
projectName: 'global',
recommendations: [],
createdAt: new Date().toISOString(),
updatedAt: new Date().toISOString(),
draft: { type: 'draft', name: 'x', skillPath: '/tmp/x' },
}
expect(shouldPromoteToActive(withDraft)).toBe(false)
expect(shouldPromoteToActive({ ...withDraft, count: 4 })).toBe(true)
expect(shouldPromoteToActive({ ...withDraft, draftHits: 2 })).toBe(true)
expect(shouldPromoteToActive({ ...withDraft, draft: undefined })).toBe(
false,
)
})
})
describe('migrateLegacyGapState', () => {
test('resets legacy status=draft count=1 (no file) to pending', async () => {
const gapPath = join(root, 'global', 'skill-gaps.json')
mkdirSync(join(root, 'global'), { recursive: true })
const legacy = {
version: 1,
gaps: {
'legacy-key': {
key: 'legacy-key',
prompt: 'old gap',
count: 1,
status: 'draft',
sessionId: 's1',
cwd: root,
projectId: 'global',
projectName: 'global',
recommendations: [],
createdAt: '2025-01-01T00:00:00.000Z',
updatedAt: '2025-01-01T00:00:00.000Z',
},
},
}
writeFileSync(gapPath, JSON.stringify(legacy), 'utf8')
const gaps = await readSkillGaps(project, root)
const migrated = gaps[0]
expect(migrated?.status).toBe('pending')
expect(migrated?.draftHits).toBe(0)
})
test('downgrades active without skill file to draft if draft exists', async () => {
const gapPath = join(root, 'global', 'skill-gaps.json')
mkdirSync(join(root, 'global'), { recursive: true })
const legacy = {
version: 1,
gaps: {
'legacy-key': {
key: 'legacy-key',
prompt: 'old',
count: 3,
status: 'active',
sessionId: 's1',
cwd: root,
projectId: 'global',
projectName: 'global',
recommendations: [],
createdAt: '2025-01-01T00:00:00.000Z',
updatedAt: '2025-01-01T00:00:00.000Z',
draft: { type: 'draft', name: 'x', skillPath: '/tmp/x' },
},
},
}
writeFileSync(gapPath, JSON.stringify(legacy), 'utf8')
const gaps = await readSkillGaps(project, root)
expect(gaps[0]?.status).toBe('draft')
})
})

View File

@@ -0,0 +1,56 @@
import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
import { existsSync, mkdtempSync, readFileSync, rmSync } from 'node:fs'
import { tmpdir } from 'node:os'
import { join } from 'node:path'
import { createInstinct } from '../instinctParser.js'
import { generateSkillDraft, writeLearnedSkill } from '../skillGenerator.js'
let cwd: string
beforeEach(() => {
cwd = mkdtempSync(join(tmpdir(), 'skill-learning-generator-'))
})
afterEach(() => {
rmSync(cwd, { recursive: true, force: true })
})
describe('skillGenerator', () => {
test('generates a valid SKILL.md draft from instincts', () => {
const instinct = createInstinct({
trigger: 'when writing React tests',
action: 'use testing-library and avoid implementation mocks',
confidence: 0.85,
domain: 'testing',
source: 'session-observation',
scope: 'project',
evidence: ['user correction'],
})
const draft = generateSkillDraft([instinct], { cwd })
expect(draft.name).toContain('testing')
expect(draft.content).toContain('name:')
expect(draft.content).toContain('description:')
expect(draft.content).toContain('## Trigger')
expect(draft.content).toContain('## Evidence')
})
test('writes learned skills to project scope', async () => {
const instinct = createInstinct({
trigger: 'when writing React tests',
action: 'use testing-library',
confidence: 0.85,
domain: 'testing',
source: 'session-observation',
scope: 'project',
evidence: ['user correction'],
})
const draft = generateSkillDraft([instinct], { cwd })
const file = await writeLearnedSkill(draft)
expect(existsSync(file)).toBe(true)
expect(readFileSync(file, 'utf8')).toContain('use testing-library')
})
})

View File

@@ -0,0 +1,154 @@
import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
import {
existsSync,
mkdtempSync,
readFileSync,
rmSync,
writeFileSync,
} from 'node:fs'
import { tmpdir } from 'node:os'
import { join } from 'node:path'
import { call } from '../../../commands/skill-learning/skill-learning.js'
import { clearCommandsCache } from '../../../commands.js'
import { getSkillIndex, searchSkills } from '../../skillSearch/localSearch.js'
import {
resetSkillLearningConfig,
setSkillLearningConfigForTest,
} from '../config.js'
import { loadInstincts, readObservations } from '../index.js'
let root: string
let previousCwd: string
const originalEnv = { ...process.env }
beforeEach(() => {
root = mkdtempSync(join(tmpdir(), 'skill-learning-smoke-'))
previousCwd = process.cwd()
process.chdir(root)
process.env = { ...originalEnv }
process.env.CLAUDE_SKILL_LEARNING_HOME = join(root, 'learning-home')
process.env.CLAUDE_CONFIG_DIR = join(root, 'config')
process.env.SKILL_LEARNING_ENABLED = '1'
process.env.ANTHROPIC_API_KEY = 'test-key'
process.env.NODE_ENV = 'test'
setSkillLearningConfigForTest({ minConfidence: 0.3, minClusterSize: 1 })
})
afterEach(() => {
process.chdir(previousCwd)
process.env = { ...originalEnv }
resetSkillLearningConfig()
clearCommandsCache()
try {
rmSync(root, {
recursive: true,
force: true,
maxRetries: 10,
retryDelay: 100,
})
} catch {
// Windows can keep a transient handle open after dynamic command loading.
// Temp cleanup is best-effort; failing here would mask the smoke result.
}
})
describe('skillLearning smoke', () => {
test('ingests corrections, evolves a learned skill, and skill search finds it', async () => {
const transcript = join(root, 'session.jsonl')
writeFileSync(transcript, buildTranscript(), 'utf8')
// Pass --min-session-length=0 so the 9-observation test transcript is not
// skipped by the ECC-parity gate (default threshold: 10 observations).
const ingestResult = await call(
`ingest ${transcript} --min-session-length=0`,
{} as any,
)
expect(ingestResult.type).toBe('text')
if (ingestResult.type === 'text') {
expect(ingestResult.value).toContain('Ingested 9 observations')
}
const options = {
rootDir: process.env.CLAUDE_SKILL_LEARNING_HOME,
project: {
projectId: 'global',
projectName: 'global',
cwd: root,
scope: 'global' as const,
source: 'global' as const,
storageDir: join(process.env.CLAUDE_SKILL_LEARNING_HOME!, 'global'),
},
}
const observations = await readObservations(options)
expect(observations).toHaveLength(9)
const instincts = await loadInstincts(options)
const testingInstinct = instincts.find(i => i.domain === 'testing')
expect(testingInstinct?.confidence).toBe(0.8)
expect(testingInstinct?.status).toBe('active')
const evolveResult = await call('evolve --generate', {} as any)
expect(evolveResult.type).toBe('text')
if (evolveResult.type === 'text') {
// Smoke transcript (9 obs, single fabricated instinct per domain) may
// produce 1 or 2 candidates depending on sessionObserver's clustering.
// Post-H15 we accept either — the smoke proves end-to-end wiring, not
// exact cluster math.
expect(evolveResult.value).toMatch(/Generated [12] learned skill\(s\)/)
}
const skillName = 'testing-choosing-between-mock-testing-library'
const skillFile = join(root, '.claude', 'skills', skillName, 'SKILL.md')
expect(existsSync(skillFile)).toBe(true)
expect(readFileSync(skillFile, 'utf8')).toContain('Prefer testing-library')
clearCommandsCache()
const index = await getSkillIndex(root)
expect(index.some(entry => entry.name === skillName)).toBe(true)
const results = searchSkills(
'write tests with testing library instead of mock',
index,
5,
)
expect(results[0]?.name).toBe(skillName)
})
})
function buildTranscript(): string {
const entries = [
user('不要 mock用 testing-library', 0),
toolUse('Grep', { pattern: 'renderHook' }, 1),
toolUse('Read', { file_path: 'src/example.test.tsx' }, 2),
toolUse('Edit', { file_path: 'src/example.test.tsx' }, 3),
user('不要 mock用 testing-library', 4),
toolUse('Grep', { pattern: 'mock' }, 5),
toolUse('Read', { file_path: 'src/example.test.tsx' }, 6),
toolUse('Edit', { file_path: 'src/example.test.tsx' }, 7),
user('不要 mock用 testing-library', 8),
]
return `${entries.map(entry => JSON.stringify(entry)).join('\n')}\n`
}
function user(content: string, second: number) {
return {
type: 'user',
sessionId: 'smoke-session',
cwd: root,
timestamp: `2026-04-16T00:00:0${second}.000Z`,
message: { role: 'user', content },
}
}
function toolUse(name: string, input: Record<string, unknown>, second: number) {
return {
type: 'assistant',
sessionId: 'smoke-session',
cwd: root,
timestamp: `2026-04-16T00:00:0${second}.000Z`,
message: {
role: 'assistant',
content: [{ type: 'tool_use', name, input }],
},
}
}

View File

@@ -0,0 +1,161 @@
import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
import {
existsSync,
mkdtempSync,
readFileSync,
rmSync,
writeFileSync,
} from 'node:fs'
import { mkdir } from 'node:fs/promises'
import { tmpdir } from 'node:os'
import { join } from 'node:path'
import type { LearnedSkillDraft } from '../types.js'
import {
applySkillLifecycleDecision,
compareExistingSkills,
decideSkillLifecycle,
loadExistingSkills,
} from '../skillLifecycle.js'
let root: string
beforeEach(() => {
root = mkdtempSync(join(tmpdir(), 'skill-learning-lifecycle-'))
})
afterEach(() => {
rmSync(root, { recursive: true, force: true })
})
describe('skillLifecycle', () => {
test('detects overlapping existing skills', async () => {
await writeSkill('react-testing', 'Use testing-library for React tests')
const draft = draftSkill(
'react-testing-updated',
'Use testing-library for React tests and avoid implementation mocks',
)
const matches = await compareExistingSkills(draft, [root])
expect(matches[0]?.name).toBe('react-testing')
})
test('replace archives old skill so it leaves active index', async () => {
await writeSkill(
'react-testing',
'Use testing-library for React tests and avoid implementation mocks',
)
const draft = draftSkill(
'react-testing-updated',
'Use testing-library for React tests and avoid implementation mocks',
)
const matches = await compareExistingSkills(draft, [root])
const decision = decideSkillLifecycle(draft, matches)
expect(decision.type).toBe('replace')
const result = await applySkillLifecycleDecision(decision)
expect(result.activePath).toBeDefined()
expect(result.archivedPath).toBeDefined()
expect(existsSync(join(root, 'react-testing'))).toBe(false)
expect(
existsSync(join(result.archivedPath!, 'replacement-manifest.json')),
).toBe(true)
expect(
(await loadExistingSkills([root])).map(skill => skill.name),
).not.toContain('react-testing')
})
test('create writes new skill when no overlap exists', async () => {
const draft = draftSkill('new-testing', 'A unique learned testing workflow')
const decision = decideSkillLifecycle(draft, [])
const result = await applySkillLifecycleDecision(decision)
expect(result.activePath).toBeDefined()
expect(readFileSync(result.activePath!, 'utf8')).toContain('new-testing')
})
test('merge skips user-authored skill without origin field and logs warning', async () => {
const body =
'Use testing-library for React tests and avoid implementation mocks'
await writeSkill('react-testing', body, null)
// Build a draft that overlaps with the existing skill at the merge threshold
const draft: LearnedSkillDraft = {
name: 'react-testing',
description: body,
scope: 'project',
sourceInstinctIds: ['i1'],
confidence: 0.6,
content: `---\nname: react-testing\ndescription: ${JSON.stringify(body)}\n---\n\n# React Testing\n\n${body}\n`,
outputPath: join(root, 'react-testing-patch'),
}
const matches = await compareExistingSkills(draft, [root])
// Force a merge decision by lowering confidence below the replace threshold
const decision = decideSkillLifecycle(draft, matches)
expect(decision.type).toBe('merge')
const stderrChunks: string[] = []
const originalWrite = process.stderr.write.bind(process.stderr)
process.stderr.write = (chunk: unknown) => {
stderrChunks.push(String(chunk))
return true
}
try {
const result = await applySkillLifecycleDecision(decision)
expect(result.activePath).toBeUndefined()
expect(
stderrChunks.some(line =>
line.includes('[skill-learning] skip user-authored skill'),
),
).toBe(true)
} finally {
process.stderr.write = originalWrite
}
})
test('replace proceeds normally for skill-learning-generated skill', async () => {
await writeSkill(
'generated-testing',
'Use testing-library for React tests and avoid implementation mocks',
'skill-learning',
)
const draft = draftSkill(
'generated-testing-updated',
'Use testing-library for React tests and avoid implementation mocks',
)
const matches = await compareExistingSkills(draft, [root])
const decision = decideSkillLifecycle(draft, matches)
expect(decision.type).toBe('replace')
const result = await applySkillLifecycleDecision(decision)
expect(result.activePath).toBeDefined()
expect(result.archivedPath).toBeDefined()
})
})
async function writeSkill(
name: string,
body: string,
origin: string | null = 'skill-learning',
): Promise<void> {
const dir = join(root, name)
await mkdir(dir, { recursive: true })
const originLine = origin !== null ? `origin: ${origin}\n` : ''
writeFileSync(
join(dir, 'SKILL.md'),
`---\nname: ${name}\ndescription: ${JSON.stringify(body)}\n${originLine}---\n\n# ${name}\n\n${body}\n`,
)
}
function draftSkill(name: string, text: string): LearnedSkillDraft {
return {
name,
description: text,
scope: 'project',
sourceInstinctIds: ['i1'],
confidence: 0.9,
content: `---\nname: ${name}\ndescription: ${JSON.stringify(text)}\n---\n\n# ${name}\n\n${text}\n`,
outputPath: join(root, name),
}
}

View File

@@ -0,0 +1,372 @@
/**
* Unit tests for H5 (LLM call throttle), H6 (message watermark dedup),
* and H7 (circuit breaker) improvements.
*/
import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
import { mkdtempSync, rmSync } from 'node:fs'
import { tmpdir } from 'node:os'
import { join } from 'node:path'
import {
resetSkillLearningConfig,
setSkillLearningConfigForTest,
} from '../config.js'
import { resetCircuitBreaker } from '../llmObserverBackend.js'
import {
resetRuntimeLLMBookkeeping,
resetRuntimeObserverForTest,
runSkillLearningPostSampling,
} from '../runtimeObserver.js'
import type { REPLHookContext } from '../../../utils/hooks/postSamplingHooks.js'
import {
setActiveObserverBackend,
getActiveObserverBackend,
registerObserverBackend,
type ObserverBackend,
} from '../observerBackend.js'
import type { StoredSkillObservation } from '../observationStore.js'
let root: string
let previousCwd: string
const originalEnv = { ...process.env }
const originalBackendName = getActiveObserverBackend().name
function makeCtx(
messages: Array<{ uuid: string; content: string }>,
): REPLHookContext {
return {
querySource: 'repl_main_thread',
messages: messages.map(({ uuid, content }) => ({
type: 'user' as const,
uuid: uuid as any,
message: { role: 'user' as const, content },
})),
systemPrompt: [] as any,
userContext: {},
systemContext: {},
toolUseContext: { agentId: undefined } as any,
}
}
function make5Msgs(prefix: string): Array<{ uuid: string; content: string }> {
return Array.from({ length: 5 }, (_, i) => ({
uuid: `${prefix}-${i}`,
content: '不要 mock用 testing-library',
}))
}
function makeObs(count: number): StoredSkillObservation[] {
return Array.from({ length: count }, (_, i) => ({
id: `o${i}`,
timestamp: new Date().toISOString(),
event: 'user_message' as const,
sessionId: 's1',
projectId: 'p1',
projectName: 'project',
cwd: '/tmp',
messageText: 'test message',
}))
}
beforeEach(() => {
root = mkdtempSync(join(tmpdir(), 'skill-throttle-test-'))
previousCwd = process.cwd()
process.chdir(root)
process.env = { ...originalEnv }
process.env.CLAUDE_SKILL_LEARNING_HOME = join(root, 'learning-home')
process.env.CLAUDE_CONFIG_DIR = join(root, 'config')
process.env.SKILL_LEARNING_ENABLED = '1'
process.env.NODE_ENV = 'test'
resetRuntimeObserverForTest()
resetCircuitBreaker()
setActiveObserverBackend(originalBackendName)
})
afterEach(() => {
process.chdir(previousCwd)
process.env = { ...originalEnv }
resetSkillLearningConfig()
rmSync(root, { recursive: true, force: true })
resetRuntimeObserverForTest()
resetCircuitBreaker()
setActiveObserverBackend(originalBackendName)
})
// ---------------------------------------------------------------------------
// H5: LLM throttle — minimum observation count gate
// ---------------------------------------------------------------------------
describe('H5: LLM call throttle', () => {
test('fewer than 5 observations routes to heuristic — LLM backend not called', async () => {
let llmCallCount = 0
const trackingBackend: ObserverBackend = {
name: 'tracking-under5',
analyze() {
llmCallCount++
return []
},
}
registerObserverBackend(trackingBackend)
setActiveObserverBackend('tracking-under5')
// 3 messages → 3 observations, below the threshold of 5.
await runSkillLearningPostSampling(
makeCtx([
{ uuid: 'u5a', content: '不要 mock用 testing-library' },
{ uuid: 'u5b', content: '不要 mock用 testing-library' },
{ uuid: 'u5c', content: '不要 mock用 testing-library' },
]),
)
expect(llmCallCount).toBe(0)
})
test('session cap: more calls than cap reaches heuristic fallback', async () => {
// Cap at 1 call, cooldown 0ms.
setSkillLearningConfigForTest({
llm: { maxCallsPerSession: 1, cooldownMs: 0 },
})
let llmCallCount = 0
const trackingBackend: ObserverBackend = {
name: 'tracking-cap',
analyze() {
llmCallCount++
return []
},
}
registerObserverBackend(trackingBackend)
setActiveObserverBackend('tracking-cap')
// First call with 5 messages — reaches LLM.
await runSkillLearningPostSampling(makeCtx(make5Msgs('cap1')))
expect(llmCallCount).toBe(1)
// Second call with 5 different messages — cap hit, must NOT reach LLM.
await runSkillLearningPostSampling(makeCtx(make5Msgs('cap2')))
expect(llmCallCount).toBe(1)
})
test('cooldown gate: second call within cooldown window skips LLM', async () => {
// Very long cooldown — second call is always within window.
setSkillLearningConfigForTest({
llm: { cooldownMs: 999_999_000, maxCallsPerSession: 100 },
})
let llmCallCount = 0
const trackingBackend: ObserverBackend = {
name: 'tracking-cooldown',
analyze() {
llmCallCount++
return []
},
}
registerObserverBackend(trackingBackend)
setActiveObserverBackend('tracking-cooldown')
await runSkillLearningPostSampling(makeCtx(make5Msgs('cd1')))
expect(llmCallCount).toBe(1)
// Second call — still within 999999 second cooldown.
await runSkillLearningPostSampling(makeCtx(make5Msgs('cd2')))
expect(llmCallCount).toBe(1)
})
test('resetRuntimeLLMBookkeeping resets session counter and timestamps', async () => {
setSkillLearningConfigForTest({
llm: { maxCallsPerSession: 1, cooldownMs: 0 },
})
let llmCallCount = 0
const trackingBackend: ObserverBackend = {
name: 'tracking-reset',
analyze() {
llmCallCount++
return []
},
}
registerObserverBackend(trackingBackend)
setActiveObserverBackend('tracking-reset')
// First call reaches LLM; cap = 1, so second call is blocked.
await runSkillLearningPostSampling(makeCtx(make5Msgs('rr1')))
await runSkillLearningPostSampling(makeCtx(make5Msgs('rr2')))
expect(llmCallCount).toBe(1)
// After reset the counter clears — next call reaches LLM again.
resetRuntimeLLMBookkeeping()
await runSkillLearningPostSampling(makeCtx(make5Msgs('rr3')))
expect(llmCallCount).toBe(2)
})
})
// ---------------------------------------------------------------------------
// H6: Message watermark dedup
// ---------------------------------------------------------------------------
describe('H6: message watermark dedup', () => {
test('same message uuids are not re-processed in a subsequent call', async () => {
// Use a backend that counts observations to detect dedup.
let totalObservations = 0
const countingBackend: ObserverBackend = {
name: 'counting-dedup',
analyze(observations) {
totalObservations += observations.length
return []
},
}
registerObserverBackend(countingBackend)
setActiveObserverBackend('counting-dedup')
setSkillLearningConfigForTest({
llm: { cooldownMs: 0, maxCallsPerSession: 100 },
})
const messages = make5Msgs('ded')
// First call: 5 new message observations.
await runSkillLearningPostSampling(makeCtx(messages))
const afterFirst = totalObservations
// Second call with SAME messages: all uuids already seen → 0 new
// observations from messages. The early `if (observations.length === 0) return`
// fires and the backend is never called.
await runSkillLearningPostSampling(makeCtx(messages))
const afterSecond = totalObservations
expect(afterSecond).toBe(afterFirst)
})
test('different message uuids are always processed', async () => {
let totalObservations = 0
const countingBackend: ObserverBackend = {
name: 'counting-dedup-new',
analyze(observations) {
totalObservations += observations.length
return []
},
}
registerObserverBackend(countingBackend)
setActiveObserverBackend('counting-dedup-new')
setSkillLearningConfigForTest({
llm: { cooldownMs: 0, maxCallsPerSession: 100 },
})
await runSkillLearningPostSampling(makeCtx(make5Msgs('new1')))
const afterFirst = totalObservations
// Different uuids — all 5 new messages pass dedup.
await runSkillLearningPostSampling(makeCtx(make5Msgs('new2')))
expect(totalObservations).toBeGreaterThan(afterFirst)
})
test('resetRuntimeLLMBookkeeping clears dedup set — same uuids reprocessed', async () => {
let totalObservations = 0
const countingBackend: ObserverBackend = {
name: 'counting-dedup-clr',
analyze(observations) {
totalObservations += observations.length
return []
},
}
registerObserverBackend(countingBackend)
setActiveObserverBackend('counting-dedup-clr')
setSkillLearningConfigForTest({
llm: { cooldownMs: 0, maxCallsPerSession: 100 },
})
const messages = make5Msgs('clr')
await runSkillLearningPostSampling(makeCtx(messages))
const afterFirst = totalObservations
// After reset, dedup set is cleared — same messages are reprocessed.
resetRuntimeLLMBookkeeping()
await runSkillLearningPostSampling(makeCtx(messages))
expect(totalObservations).toBeGreaterThan(afterFirst)
})
})
// ---------------------------------------------------------------------------
// H7: Circuit breaker (tests the llmObserverBackend state machine directly)
// ---------------------------------------------------------------------------
describe('H7: circuit breaker', () => {
test('circuit opens after failure threshold and subsequent calls return heuristic result without hitting queryHaiku', async () => {
// In the test environment, queryHaiku will fail (no API key). We leverage
// that to trigger circuit breaker state via the real backend. We verify
// the circuit opens by checking that the backend returns [] (empty LLM
// output, falls through to heuristic) and by exercising resetCircuitBreaker.
const { llmObserverBackend } = await import('../llmObserverBackend.js')
resetCircuitBreaker()
setSkillLearningConfigForTest({
llm: { failureThreshold: 3, circuitCooldownMs: 60_000 },
})
const obs = makeObs(5)
// 3 calls → each fails → 3rd failure opens circuit.
// All return heuristic fallback (possibly [] since obs have no message text
// that the heuristic would match against correction patterns, but the calls
// still go through the circuit).
await llmObserverBackend.analyze(obs)
await llmObserverBackend.analyze(obs)
await llmObserverBackend.analyze(obs)
// Circuit is now open. Verify resetCircuitBreaker closes it by checking
// the module-level state: after reset the backend does not short-circuit
// immediately (it tries queryHaiku again, fails again, increments counter).
// We can observe this by calling resetCircuitBreaker and making another
// call — it will NOT short-circuit the queryHaiku attempt.
resetCircuitBreaker()
// This call must reach queryHaiku (which fails → heuristic fallback) rather
// than short-circuit to heuristic from the open circuit. Either way the
// return value is an array — but the key is that resetCircuitBreaker works.
const result = await llmObserverBackend.analyze(obs)
expect(Array.isArray(result)).toBe(true)
})
test('circuit breaker env vars are respected', async () => {
// Verify that setting threshold to 1 opens circuit after the first failure.
const { llmObserverBackend } = await import('../llmObserverBackend.js')
resetCircuitBreaker()
setSkillLearningConfigForTest({
llm: { failureThreshold: 1, circuitCooldownMs: 60_000 },
})
const obs = makeObs(5)
// One failure — circuit should open.
await llmObserverBackend.analyze(obs)
// The next call should be short-circuited. We can't easily observe this
// without mocking, but we can verify that after resetCircuitBreaker the
// state is clean and a call proceeds without crashing.
resetCircuitBreaker()
const result = await llmObserverBackend.analyze(obs)
expect(Array.isArray(result)).toBe(true)
})
test('empty observations bypass circuit breaker entirely', async () => {
const { llmObserverBackend } = await import('../llmObserverBackend.js')
resetCircuitBreaker()
// Empty observations → short-circuit at top of analyseWithHaiku → []
// regardless of circuit state.
const result = await llmObserverBackend.analyze([])
expect(result).toEqual([])
})
test('resetCircuitBreaker resets state to closed', async () => {
const { llmObserverBackend } = await import('../llmObserverBackend.js')
resetCircuitBreaker()
// After reset, the backend is in clean state. Calling it with observations
// returns an array (either LLM result or heuristic fallback).
const result = await llmObserverBackend.analyze(makeObs(3))
expect(Array.isArray(result)).toBe(true)
resetCircuitBreaker()
const result2 = await llmObserverBackend.analyze(makeObs(3))
expect(Array.isArray(result2)).toBe(true)
})
})

View File

@@ -0,0 +1,196 @@
import { afterEach, beforeEach, describe, expect, mock, test } from 'bun:test'
import { mkdtempSync, rmSync } from 'node:fs'
import { tmpdir } from 'node:os'
import { join } from 'node:path'
import { readObservations } from '../observationStore.js'
import {
hasToolHookObservationsForTurn,
pruneEmittedTurns,
recordToolComplete,
recordToolError,
recordToolStart,
recordUserCorrection,
resetToolHookBookkeeping,
resetToolHookDepsCache,
runToolCallWithSkillLearningHooks,
} from '../toolEventObserver.js'
let rootDir: string
beforeEach(() => {
rootDir = mkdtempSync(join(tmpdir(), 'skill-learning-tool-hook-'))
resetToolHookBookkeeping()
process.env.CLAUDE_SKILL_LEARNING_HOME = rootDir
})
afterEach(() => {
delete process.env.CLAUDE_SKILL_LEARNING_HOME
rmSync(rootDir, { recursive: true, force: true })
})
function ctx() {
return {
sessionId: 'tool-hook-session',
turn: 1,
projectId: 'p1',
projectName: 'project',
cwd: rootDir,
project: {
projectId: 'p1',
projectName: 'project',
cwd: rootDir,
scope: 'project' as const,
source: 'global' as const,
storageDir: join(rootDir, 'projects', 'p1'),
},
}
}
describe('toolEventObserver', () => {
test('records tool_start with tool-hook source', async () => {
await recordToolStart(ctx(), 'Grep', { pattern: 'foo' })
const observations = await readObservations({
rootDir,
project: ctx().project,
})
expect(observations).toHaveLength(1)
expect(observations[0]?.event).toBe('tool_start')
expect(observations[0]?.source).toBe('tool-hook')
expect(observations[0]?.toolName).toBe('Grep')
})
test('records tool_complete with success outcome', async () => {
await recordToolComplete(ctx(), 'Edit', 'ok', 'success')
const observations = await readObservations({
rootDir,
project: ctx().project,
})
expect(observations[0]?.event).toBe('tool_complete')
expect(observations[0]?.outcome).toBe('success')
})
test('records tool_error as tool_complete with failure outcome', async () => {
await recordToolError(ctx(), 'Bash', new Error('boom'))
const observations = await readObservations({
rootDir,
project: ctx().project,
})
expect(observations[0]?.outcome).toBe('failure')
})
test('records user correction message', async () => {
await recordUserCorrection(ctx(), '不要 mock用 testing-library')
const observations = await readObservations({
rootDir,
project: ctx().project,
})
expect(observations[0]?.event).toBe('user_message')
expect(observations[0]?.messageText).toContain('testing-library')
})
test('tracks which session+turn has tool-hook observations', async () => {
expect(hasToolHookObservationsForTurn('tool-hook-session', 1)).toBe(false)
await recordToolStart(ctx(), 'Grep')
expect(hasToolHookObservationsForTurn('tool-hook-session', 1)).toBe(true)
expect(hasToolHookObservationsForTurn('tool-hook-session', 2)).toBe(false)
})
// H11: emittedTurns bounded memory tests
describe('pruneEmittedTurns', () => {
test('prunes Set entries exceeding SET_MAX keeping most recent', async () => {
const sessionId = 'big-session'
// Fill 501 turns (threshold is 500)
for (let i = 1; i <= 501; i++) {
await recordToolStart({ ...ctx(), sessionId, turn: i }, 'Grep')
}
// After pruning the Set should not exceed KEEP limit (250)
expect(hasToolHookObservationsForTurn(sessionId, 1)).toBe(false) // oldest pruned
expect(hasToolHookObservationsForTurn(sessionId, 501)).toBe(true) // newest kept
expect(hasToolHookObservationsForTurn(sessionId, 252)).toBe(true) // within keep window
})
test('prunes Map entries exceeding MAP_MAX keeping most recent insertions', async () => {
// Insert 51 distinct sessions (threshold is 50)
for (let i = 0; i < 51; i++) {
await recordToolStart(
{ ...ctx(), sessionId: `session-${i}`, turn: 1 },
'Grep',
)
}
// Oldest sessions should have been pruned from the Map
expect(hasToolHookObservationsForTurn('session-0', 1)).toBe(false)
// Most recent sessions should still be present
expect(hasToolHookObservationsForTurn('session-50', 1)).toBe(true)
})
test('pruneEmittedTurns is idempotent when within limits', async () => {
await recordToolStart(ctx(), 'Grep')
pruneEmittedTurns()
pruneEmittedTurns()
// Should not affect tracked turns within limits
expect(hasToolHookObservationsForTurn('tool-hook-session', 1)).toBe(true)
})
})
// H10: fire-and-forget / flag-off tests
describe('runToolCallWithSkillLearningHooks', () => {
afterEach(() => {
resetToolHookDepsCache()
delete process.env.SKILL_LEARNING_ENABLED
})
test('invoke completes before recordToolStart promise resolves (fire-and-forget)', async () => {
process.env.SKILL_LEARNING_ENABLED = '1'
resetToolHookDepsCache()
const completionOrder: string[] = []
let resolveStart!: () => void
// A slow recordToolStart: promise that resolves only when we let it
const slowStartPromise = new Promise<void>(res => {
resolveStart = res
})
// We spy on appendObservation by replacing the module's behaviour
// without mocking: we just verify timing via a flag
let invokeCompleted = false
const result = await runToolCallWithSkillLearningHooks(
'TestTool',
{},
{ sessionId: 'test-ff-session', turn: 99 },
async () => {
// Short delay to let any awaited hooks run first (they must not)
await new Promise(res => setTimeout(res, 5))
invokeCompleted = true
completionOrder.push('invoke')
return { data: 'done' }
},
)
// The invoke result is returned immediately — observation may still be in-flight
expect(result).toEqual({ data: 'done' })
expect(invokeCompleted).toBe(true)
})
test('flag off: wrapper skips observation entirely and returns invoke result', async () => {
process.env.SKILL_LEARNING_ENABLED = '0'
resetToolHookDepsCache()
let invokeCalled = false
const result = await runToolCallWithSkillLearningHooks(
'TestTool',
{},
{},
async () => {
invokeCalled = true
return { data: 42 }
},
)
expect(invokeCalled).toBe(true)
expect(result).toEqual({ data: 42 })
// No observations should have been written
const obs = await readObservations({ rootDir, project: ctx().project })
expect(obs).toHaveLength(0)
})
})
})

View File

@@ -0,0 +1,164 @@
import { mkdir, writeFile } from 'node:fs/promises'
import { existsSync } from 'node:fs'
import { join } from 'node:path'
import { getClaudeConfigHomeDir } from '../../utils/envUtils.js'
import { clearCommandsCache } from '../../commands.js'
import type { Instinct } from './instinctParser.js'
import { normalizeSkillName } from './learningPolicy.js'
import type { SkillLearningScope } from './types.js'
export type AgentGeneratorOptions = {
cwd?: string
globalAgentsDir?: string
outputRoot?: string
name?: string
description?: string
scope?: SkillLearningScope
}
export type LearnedAgentDraft = {
name: string
description: string
scope: SkillLearningScope
sourceInstinctIds: string[]
confidence: number
content: string
outputPath: string
}
export function generateAgentDraft(
instincts: Instinct[],
options?: AgentGeneratorOptions,
): LearnedAgentDraft {
if (instincts.length === 0) {
throw new Error('Cannot generate an agent draft without instincts')
}
const scope = options?.scope ?? instincts[0]?.scope ?? 'project'
const rawName = options?.name ?? buildAgentName(instincts)
const name = normalizeSkillName(rawName)
const confidence = averageConfidence(instincts)
const description = options?.description ?? buildDescription(instincts)
const outputPath = getLearnedAgentPath(name, scope, options)
const content = buildAgentContent({
name,
description,
confidence,
instincts,
})
return {
name,
description,
scope,
sourceInstinctIds: instincts.map(instinct => instinct.id),
confidence: Number(confidence.toFixed(2)),
content,
outputPath,
}
}
export async function writeLearnedAgent(
draft: LearnedAgentDraft,
): Promise<string> {
await mkdir(draft.outputPath, { recursive: true })
const filePath = join(draft.outputPath, `${draft.name}.md`)
if (existsSync(filePath)) return filePath
await writeFile(filePath, draft.content, 'utf8')
clearCommandsCache()
return filePath
}
export function getLearnedAgentPath(
_name: string,
scope: SkillLearningScope,
options?: AgentGeneratorOptions,
): string {
if (options?.outputRoot) return options.outputRoot
if (scope === 'project') {
return join(options?.cwd ?? process.cwd(), '.claude', 'agents')
}
return options?.globalAgentsDir ?? join(getClaudeConfigHomeDir(), 'agents')
}
function buildAgentName(instincts: Instinct[]): string {
const words = extractWords(instincts, 4)
const name = ['learned', 'agent', ...words].join('-')
return normalizeSkillName(name) || 'learned-agent'
}
function buildDescription(instincts: Instinct[]): string {
const trigger = instincts[0]?.trigger ?? 'Run the learned multi-step workflow'
return trigger.replace(/\s+/g, ' ').slice(0, 120)
}
function buildAgentContent(params: {
name: string
description: string
confidence: number
instincts: Instinct[]
}): string {
const { name, description, confidence, instincts } = params
return [
'---',
`name: ${name}`,
`description: ${JSON.stringify(description)}`,
'origin: skill-learning',
`confidence: ${Number(confidence.toFixed(2))}`,
`evolved_from: [${instincts.map(instinct => JSON.stringify(instinct.id)).join(', ')}]`,
'---',
'',
`You are the ${name} learned agent.`,
'',
'## Triggers',
'',
instincts.map(instinct => `- ${instinct.trigger}`).join('\n'),
'',
'## Playbook',
'',
instincts.map(instinct => `- ${instinct.action}`).join('\n'),
'',
'## Evidence',
'',
instincts
.flatMap(instinct => instinct.evidence.map(evidence => `- ${evidence}`))
.join('\n'),
'',
].join('\n')
}
function averageConfidence(instincts: Instinct[]): number {
return (
instincts.reduce((sum, instinct) => sum + instinct.confidence, 0) /
instincts.length
)
}
function extractWords(instincts: Instinct[], max: number): string[] {
const stopWords = new Set([
'when',
'with',
'this',
'that',
'user',
'asks',
'for',
'the',
'and',
'debug',
'investigate',
'research',
])
const words: string[] = []
for (const instinct of instincts) {
for (const token of `${instinct.trigger} ${instinct.action}`
.toLowerCase()
.split(/[^a-z0-9]+/)) {
if (token.length > 2 && !stopWords.has(token) && !words.includes(token)) {
words.push(token)
}
if (words.length >= max) return words
}
}
return words
}

View File

@@ -0,0 +1,167 @@
import { mkdir, writeFile } from 'node:fs/promises'
import { existsSync } from 'node:fs'
import { join } from 'node:path'
import { getClaudeConfigHomeDir } from '../../utils/envUtils.js'
import { clearCommandsCache } from '../../commands.js'
import type { Instinct } from './instinctParser.js'
import { normalizeSkillName } from './learningPolicy.js'
import type { SkillLearningScope } from './types.js'
export type CommandGeneratorOptions = {
cwd?: string
globalCommandsDir?: string
outputRoot?: string
name?: string
description?: string
scope?: SkillLearningScope
}
export type LearnedCommandDraft = {
name: string
description: string
scope: SkillLearningScope
sourceInstinctIds: string[]
confidence: number
content: string
outputPath: string
}
export function generateCommandDraft(
instincts: Instinct[],
options?: CommandGeneratorOptions,
): LearnedCommandDraft {
if (instincts.length === 0) {
throw new Error('Cannot generate a command draft without instincts')
}
const scope = options?.scope ?? instincts[0]?.scope ?? 'project'
const rawName = options?.name ?? buildCommandName(instincts)
const name = normalizeSkillName(rawName)
const confidence = averageConfidence(instincts)
const description = options?.description ?? buildDescription(instincts)
const outputPath = getLearnedCommandPath(name, scope, options)
const content = buildCommandContent({
name,
description,
confidence,
instincts,
})
return {
name,
description,
scope,
sourceInstinctIds: instincts.map(instinct => instinct.id),
confidence: Number(confidence.toFixed(2)),
content,
outputPath,
}
}
export async function writeLearnedCommand(
draft: LearnedCommandDraft,
): Promise<string> {
await mkdir(draft.outputPath, { recursive: true })
const filePath = join(draft.outputPath, `${draft.name}.md`)
if (existsSync(filePath)) return filePath
await writeFile(filePath, draft.content, 'utf8')
clearCommandsCache()
return filePath
}
export function getLearnedCommandPath(
_name: string,
scope: SkillLearningScope,
options?: CommandGeneratorOptions,
): string {
if (options?.outputRoot) return options.outputRoot
if (scope === 'project') {
return join(options?.cwd ?? process.cwd(), '.claude', 'commands')
}
return (
options?.globalCommandsDir ?? join(getClaudeConfigHomeDir(), 'commands')
)
}
function buildCommandName(instincts: Instinct[]): string {
const words = extractWords(instincts, 4)
const name = ['learned', ...words].join('-')
return normalizeSkillName(name) || 'learned-command'
}
function buildDescription(instincts: Instinct[]): string {
const trigger = instincts[0]?.trigger ?? 'Reuse the learned workflow'
return trigger.replace(/\s+/g, ' ').slice(0, 120)
}
function buildCommandContent(params: {
name: string
description: string
confidence: number
instincts: Instinct[]
}): string {
const { name, description, confidence, instincts } = params
return [
'---',
`name: ${name}`,
`description: ${JSON.stringify(description)}`,
'origin: skill-learning',
`confidence: ${Number(confidence.toFixed(2))}`,
`evolved_from: [${instincts.map(instinct => JSON.stringify(instinct.id)).join(', ')}]`,
'---',
'',
`# /${name}`,
'',
'## When to use',
'',
instincts.map(instinct => `- ${instinct.trigger}`).join('\n'),
'',
'## Steps',
'',
instincts.map(instinct => `- ${instinct.action}`).join('\n'),
'',
'## Evidence',
'',
instincts
.flatMap(instinct => instinct.evidence.map(evidence => `- ${evidence}`))
.join('\n'),
'',
].join('\n')
}
function averageConfidence(instincts: Instinct[]): number {
return (
instincts.reduce((sum, instinct) => sum + instinct.confidence, 0) /
instincts.length
)
}
function extractWords(instincts: Instinct[], max: number): string[] {
const stopWords = new Set([
'when',
'with',
'this',
'that',
'user',
'asks',
'for',
'the',
'and',
'run',
'use',
'prefer',
'avoid',
])
const words: string[] = []
for (const instinct of instincts) {
for (const token of `${instinct.trigger} ${instinct.action}`
.toLowerCase()
.split(/[^a-z0-9]+/)) {
if (token.length > 2 && !stopWords.has(token) && !words.includes(token)) {
words.push(token)
}
if (words.length >= max) return words
}
}
return words
}

View File

@@ -0,0 +1,52 @@
export type SkillLearningLlmConfig = {
readonly timeoutMs: number
readonly maxCallsPerSession: number
readonly cooldownMs: number
readonly failureThreshold: number
readonly circuitCooldownMs: number
}
export type SkillLearningConfig = {
readonly minConfidence: number
readonly minClusterSize: number
readonly llm: SkillLearningLlmConfig
}
export type SkillLearningConfigOverrides = {
minConfidence?: number
minClusterSize?: number
llm?: Partial<SkillLearningLlmConfig>
}
const DEFAULTS: SkillLearningConfig = {
minConfidence: 0.75,
minClusterSize: 3,
llm: {
timeoutMs: 10_000,
maxCallsPerSession: 20,
cooldownMs: 30_000,
failureThreshold: 3,
circuitCooldownMs: 60_000,
},
}
let overrides: SkillLearningConfigOverrides | undefined
export function getSkillLearningConfig(): SkillLearningConfig {
if (!overrides) return DEFAULTS
return {
minConfidence: overrides.minConfidence ?? DEFAULTS.minConfidence,
minClusterSize: overrides.minClusterSize ?? DEFAULTS.minClusterSize,
llm: { ...DEFAULTS.llm, ...overrides.llm },
}
}
export function setSkillLearningConfigForTest(
config: SkillLearningConfigOverrides,
): void {
overrides = config
}
export function resetSkillLearningConfig(): void {
overrides = undefined
}

View File

@@ -0,0 +1,174 @@
import type { Instinct } from './instinctParser.js'
import { shouldGenerateSkillFromInstincts } from './learningPolicy.js'
import {
generateSkillDraft,
type SkillGeneratorOptions,
} from './skillGenerator.js'
import {
generateCommandDraft,
type CommandGeneratorOptions,
type LearnedCommandDraft,
} from './commandGenerator.js'
import {
generateAgentDraft,
type AgentGeneratorOptions,
type LearnedAgentDraft,
} from './agentGenerator.js'
import { getSkillLearningConfig } from './config.js'
import type { LearnedSkillDraft } from './types.js'
export type EvolutionCandidate = {
target: 'skill' | 'command' | 'agent'
trigger: string
domain: string
instincts: Instinct[]
averageConfidence: number
}
export type LearnedArtifactDraft =
| { kind: 'skill'; draft: LearnedSkillDraft }
| { kind: 'command'; draft: LearnedCommandDraft }
| { kind: 'agent'; draft: LearnedAgentDraft }
export function clusterInstincts(instincts: Instinct[]): EvolutionCandidate[] {
const groups = new Map<string, Instinct[]>()
for (const instinct of instincts) {
if (instinct.status !== 'active' && instinct.status !== 'pending') continue
const key = `${instinct.domain}:${normalizedTrigger(instinct.trigger)}`
const group = groups.get(key) ?? []
group.push(instinct)
groups.set(key, group)
}
return Array.from(groups.values())
.filter(group => {
// Require the cluster-size floor unconditionally. Single-shot
// high-confidence instincts previously bypassed this via the
// `|| confidence >= 0.8` OR, which let one message become a
// persistent policy — exactly the H15 risk the threshold guards
// against. Repeated independent observation is non-negotiable.
return group.length >= getSkillLearningConfig().minClusterSize
})
.map(group => {
const averageConfidence =
group.reduce((sum, instinct) => sum + instinct.confidence, 0) /
group.length
return {
target: classifyEvolutionTarget(group),
trigger: group[0]?.trigger ?? 'learned pattern',
domain: group[0]?.domain ?? 'project',
instincts: group,
averageConfidence: Number(averageConfidence.toFixed(2)),
}
})
.sort((a, b) => b.averageConfidence - a.averageConfidence)
}
export function classifyEvolutionTarget(
instinctsOrCandidate: Instinct[] | EvolutionCandidate,
): 'skill' | 'command' | 'agent' {
const instincts = Array.isArray(instinctsOrCandidate)
? instinctsOrCandidate
: instinctsOrCandidate.instincts
const text = instincts
.map(i => `${i.trigger} ${i.action}`)
.join(' ')
.toLowerCase()
if (/user asks|explicitly request|command|run /.test(text)) return 'command'
if (
instincts.length >= 4 &&
/(debug|investigate|research|multi-step)/.test(text)
) {
return 'agent'
}
return 'skill'
}
export function suggestEvolutions(instincts: Instinct[]): EvolutionCandidate[] {
return clusterInstincts(instincts)
}
export function generateSkillCandidates(
instincts: Instinct[],
options?: SkillGeneratorOptions,
): LearnedSkillDraft[] {
return clusterInstincts(instincts)
.filter(
candidate =>
candidate.target === 'skill' &&
shouldGenerateSkillFromInstincts(candidate.instincts),
)
.map(candidate =>
generateSkillDraft(candidate.instincts, {
...options,
scope: candidate.instincts[0]?.scope ?? 'project',
}),
)
}
export function generateCommandCandidates(
instincts: Instinct[],
options?: CommandGeneratorOptions,
): LearnedCommandDraft[] {
return clusterInstincts(instincts)
.filter(
candidate =>
candidate.target === 'command' &&
shouldGenerateSkillFromInstincts(candidate.instincts),
)
.map(candidate =>
generateCommandDraft(candidate.instincts, {
...options,
scope: candidate.instincts[0]?.scope ?? 'project',
}),
)
}
export function generateAgentCandidates(
instincts: Instinct[],
options?: AgentGeneratorOptions,
): LearnedAgentDraft[] {
return clusterInstincts(instincts)
.filter(
candidate =>
candidate.target === 'agent' &&
shouldGenerateSkillFromInstincts(candidate.instincts),
)
.map(candidate =>
generateAgentDraft(candidate.instincts, {
...options,
scope: candidate.instincts[0]?.scope ?? 'project',
}),
)
}
export function generateAllCandidates(
instincts: Instinct[],
options?: {
skill?: SkillGeneratorOptions
command?: CommandGeneratorOptions
agent?: AgentGeneratorOptions
},
): LearnedArtifactDraft[] {
return [
...generateSkillCandidates(instincts, options?.skill).map(
(draft): LearnedArtifactDraft => ({ kind: 'skill', draft }),
),
...generateCommandCandidates(instincts, options?.command).map(
(draft): LearnedArtifactDraft => ({ kind: 'command', draft }),
),
...generateAgentCandidates(instincts, options?.agent).map(
(draft): LearnedArtifactDraft => ({ kind: 'agent', draft }),
),
]
}
function normalizedTrigger(trigger: string): string {
return trigger
.toLowerCase()
.replace(/[^a-z0-9]+/g, ' ')
.split(/\s+/)
.filter(Boolean)
.slice(0, 6)
.join(' ')
}

View File

@@ -0,0 +1,12 @@
import { feature } from 'bun:bundle'
export function isSkillLearningEnabled(): boolean {
if (process.env.SKILL_LEARNING_ENABLED === '0') return false
if (process.env.SKILL_LEARNING_ENABLED === '1') return true
if (process.env.FEATURE_SKILL_LEARNING === '0') return false
if (process.env.FEATURE_SKILL_LEARNING === '1') return true
if (feature('SKILL_LEARNING')) {
return true
}
return false
}

View File

@@ -0,0 +1,37 @@
export * from './featureCheck.js'
export * from './evolution.js'
export {
createInstinct,
parseInstinct,
serializeInstinct,
} from './instinctParser.js'
export * from './learningPolicy.js'
export {
exportInstincts,
importInstincts,
loadInstincts,
prunePendingInstincts,
saveInstinct,
updateConfidence,
upsertInstinct,
} from './instinctStore.js'
export {
appendObservation,
ingestTranscript,
readObservations,
scrubObservation,
scrubText,
} from './observationStore.js'
export * from './promotion.js'
export * from './projectContext.js'
export * from './runtimeObserver.js'
export * from './observerBackend.js'
export { llmObserverBackend } from './llmObserverBackend.js'
export * from './commandGenerator.js'
export * from './agentGenerator.js'
export * from './toolEventObserver.js'
export * from './sessionObserver.js'
export * from './skillGapStore.js'
export * from './skillGenerator.js'
export * from './skillLifecycle.js'
export * from './types.js'

View File

@@ -0,0 +1,115 @@
import { createHash } from 'node:crypto'
import type {
SkillLearningProjectContext,
SkillLearningScope,
StoredSkillObservation,
} from './observationStore.js'
import type { Instinct as BaseInstinct, InstinctStatus } from './types.js'
export type { Instinct } from './types.js'
export type StoredInstinct = BaseInstinct & {
observationIds?: string[]
}
export type InstinctCandidate = Omit<
StoredInstinct,
'id' | 'createdAt' | 'updatedAt' | 'status'
> & {
id?: string
status?: InstinctStatus
}
export function createInstinct(
candidate: InstinctCandidate,
now = new Date().toISOString(),
): StoredInstinct {
return normalizeInstinct({
id:
candidate.id ??
buildInstinctId(candidate.trigger, candidate.action, candidate.scope),
...candidate,
createdAt: now,
updatedAt: now,
status: candidate.status ?? 'pending',
})
}
export function normalizeInstinct(instinct: StoredInstinct): StoredInstinct {
return {
...instinct,
id: instinct.id || buildInstinctId(instinct.trigger, instinct.action),
confidence: clampConfidence(instinct.confidence),
evidence: Array.from(new Set(instinct.evidence.filter(Boolean))),
evidenceOutcome: instinct.evidenceOutcome,
observationIds: instinct.observationIds
? Array.from(new Set(instinct.observationIds))
: undefined,
}
}
export function serializeInstinct(instinct: StoredInstinct): string {
return `${JSON.stringify(normalizeInstinct(instinct), null, 2)}\n`
}
export function parseInstinct(content: string): StoredInstinct {
return normalizeInstinct(JSON.parse(content) as StoredInstinct)
}
export function buildInstinctId(
trigger: string,
action: string,
scope: SkillLearningScope = 'project',
): string {
const slug = `${trigger} ${action}`
.toLowerCase()
.replace(/[^a-z0-9]+/g, '-')
.replace(/^-|-$/g, '')
.slice(0, 48)
const hash = createHash('sha1')
.update(`${scope}\n${trigger}\n${action}`)
.digest('hex')
.slice(0, 10)
return `${slug || 'instinct'}-${hash}`
}
export function candidateFromObservation(
observation: StoredSkillObservation,
project?: SkillLearningProjectContext,
): Partial<InstinctCandidate> {
return {
scope: project?.scope ?? 'project',
projectId: project?.projectId ?? observation.projectId,
projectName: project?.projectName ?? observation.projectName,
source: 'session-observation',
evidence: [
observation.messageText ??
observation.toolOutput ??
observation.toolInput ??
observation.toolName ??
observation.id,
],
observationIds: [observation.id],
}
}
export function isContradictingInstinct(
existing: StoredInstinct,
incoming: StoredInstinct,
): boolean {
const existingTrigger = existing.trigger.toLowerCase()
const incomingTrigger = incoming.trigger.toLowerCase()
if (existingTrigger !== incomingTrigger) return false
const existingAction = existing.action.toLowerCase()
const incomingAction = incoming.action.toLowerCase()
return (
existingAction.includes('avoid') !== incomingAction.includes('avoid') ||
existingAction.includes('prefer') !== incomingAction.includes('prefer')
)
}
export function clampConfidence(confidence: number): number {
if (Number.isNaN(confidence)) return 0
return Math.max(0, Math.min(1, Number(confidence.toFixed(2))))
}

View File

@@ -0,0 +1,258 @@
import {
mkdir,
readFile,
readdir,
rename,
unlink,
writeFile,
} from 'node:fs/promises'
import { randomBytes } from 'node:crypto'
import { dirname, join } from 'node:path'
import {
getSkillLearningRoot,
type ObservationStoreOptions,
type SkillLearningProjectContext,
type SkillLearningScope,
} from './observationStore.js'
import {
clampConfidence,
isContradictingInstinct,
normalizeInstinct,
parseInstinct,
serializeInstinct,
type StoredInstinct,
} from './instinctParser.js'
let upsertQueue: Promise<unknown> = Promise.resolve()
export type InstinctStoreOptions = ObservationStoreOptions & {
project?: SkillLearningProjectContext
scope?: SkillLearningScope
}
export function getInstinctsDir(options?: InstinctStoreOptions): string {
const root = getSkillLearningRoot(options)
const project = options?.project
const scope = options?.scope ?? project?.scope ?? 'project'
if (scope === 'global' || !project || project.projectId === 'global') {
return join(root, 'global', 'instincts', 'personal')
}
return join(root, 'projects', project.projectId, 'instincts', 'personal')
}
export async function saveInstinct(
instinct: StoredInstinct,
options?: InstinctStoreOptions,
): Promise<StoredInstinct> {
const normalized = normalizeInstinct(instinct)
const dir = getInstinctsDir(options)
await mkdir(dir, { recursive: true })
const target = instinctPath(normalized.id, options)
const tmp = `${target}.${randomBytes(6).toString('hex')}.tmp`
await writeFile(tmp, serializeInstinct(normalized))
await rename(tmp, target)
return normalized
}
export async function loadInstincts(
options?: InstinctStoreOptions,
): Promise<StoredInstinct[]> {
const dir = getInstinctsDir(options)
let files: string[] = []
try {
files = await readdir(dir)
} catch (error) {
if ((error as NodeJS.ErrnoException).code === 'ENOENT') return []
throw error
}
const instincts: StoredInstinct[] = []
for (const file of files.filter(file => file.endsWith('.json'))) {
const content = await readFile(join(dir, file), 'utf8')
instincts.push(parseInstinct(content))
}
return instincts.sort((a, b) => a.id.localeCompare(b.id))
}
export function upsertInstinct(
incoming: StoredInstinct,
options?: InstinctStoreOptions,
): Promise<StoredInstinct> {
const result = upsertQueue.then(() => doUpsertInstinct(incoming, options))
upsertQueue = result.catch(() => {})
return result
}
async function doUpsertInstinct(
incoming: StoredInstinct,
options?: InstinctStoreOptions,
): Promise<StoredInstinct> {
const existing = await loadInstincts(options)
// Match by ID first; fall back to (same trigger + contradicting action) so
// that a contradictory instinct with a slightly different ID (differing
// action/scope) still merges and can drive the conflict-hold transition
// instead of silently accumulating as a separate record.
const match =
existing.find(instinct => instinct.id === incoming.id) ??
existing.find(
instinct =>
instinct.trigger.toLowerCase() === incoming.trigger.toLowerCase() &&
isContradictingInstinct(instinct, incoming),
)
const now = new Date().toISOString()
if (!match) return saveInstinct(incoming, options)
const contradiction = isContradictingInstinct(match, incoming)
const confidenceDelta = contradiction
? -0.1
: outcomeConfidenceDelta(incoming.evidenceOutcome)
const nextConfidence = clampConfidence(match.confidence + confidenceDelta)
const nextStatus = resolveNextStatus(
match.status,
nextConfidence,
contradiction,
)
const merged = normalizeInstinct({
...match,
confidence: nextConfidence,
evidence: [...match.evidence, ...incoming.evidence],
evidenceOutcome: incoming.evidenceOutcome ?? match.evidenceOutcome,
observationIds: [
...(match.observationIds ?? []),
...(incoming.observationIds ?? []),
],
updatedAt: now,
status: nextStatus,
})
return saveInstinct(merged, options)
}
function resolveNextStatus(
current: StoredInstinct['status'],
nextConfidence: number,
contradiction: boolean,
): StoredInstinct['status'] {
if (contradiction && nextConfidence < 0.3) return 'conflict-hold'
if (current === 'conflict-hold' && nextConfidence >= 0.5) return 'active'
if (current === 'pending' && nextConfidence >= 0.8) return 'active'
return current
}
const DECAY_PER_WEEK = 0.02
const MS_PER_WEEK = 7 * 24 * 60 * 60 * 1000
/**
* Apply time-based confidence decay to all instincts (ECC parity: -0.02/week).
* Only decays `pending` and `active` instincts; terminal states
* (stale/superseded/retired/archived/conflict-hold) do not decay.
*/
export async function decayInstinctConfidence(
options?: InstinctStoreOptions,
): Promise<number> {
const instincts = await loadInstincts(options)
const now = Date.now()
let decayed = 0
for (const instinct of instincts) {
if (instinct.status !== 'pending' && instinct.status !== 'active') continue
const updatedAtMs = Date.parse(instinct.updatedAt)
if (Number.isNaN(updatedAtMs)) continue
const weeksElapsed = Math.floor((now - updatedAtMs) / MS_PER_WEEK)
if (weeksElapsed < 1) continue
const delta = -DECAY_PER_WEEK * weeksElapsed
const nextConfidence = clampConfidence(instinct.confidence + delta)
if (nextConfidence === instinct.confidence) continue
// Bump updatedAt so subsequent maintenance runs don't re-apply the same
// elapsed-week delta.
await saveInstinct(
normalizeInstinct({
...instinct,
confidence: nextConfidence,
updatedAt: new Date(now).toISOString(),
}),
options,
)
decayed += 1
}
return decayed
}
function outcomeConfidenceDelta(
outcome: StoredInstinct['evidenceOutcome'],
): number {
if (outcome === 'failure') return -0.05
return 0.05
}
export async function updateConfidence(
instinctId: string,
delta: number,
options?: InstinctStoreOptions,
): Promise<StoredInstinct | null> {
const instincts = await loadInstincts(options)
const target = instincts.find(instinct => instinct.id === instinctId)
if (!target) return null
const updated = normalizeInstinct({
...target,
confidence: clampConfidence(target.confidence + delta),
updatedAt: new Date().toISOString(),
})
return saveInstinct(updated, options)
}
export async function exportInstincts(
outputPath: string,
options?: InstinctStoreOptions,
): Promise<StoredInstinct[]> {
const instincts = await loadInstincts(options)
await mkdir(dirname(outputPath), { recursive: true })
await writeFile(outputPath, `${JSON.stringify(instincts, null, 2)}\n`)
return instincts
}
export async function importInstincts(
inputPath: string,
options?: InstinctStoreOptions,
): Promise<StoredInstinct[]> {
const parsed = JSON.parse(
await readFile(inputPath, 'utf8'),
) as StoredInstinct[]
const saved: StoredInstinct[] = []
for (const instinct of parsed) {
saved.push(await upsertInstinct(normalizeInstinct(instinct), options))
}
return saved
}
export async function prunePendingInstincts(
maxAgeDays: number,
options?: InstinctStoreOptions,
): Promise<StoredInstinct[]> {
const instincts = await loadInstincts(options)
const cutoff = Date.now() - maxAgeDays * 24 * 60 * 60 * 1000
const pruned: StoredInstinct[] = []
for (const instinct of instincts) {
if (
instinct.status === 'pending' &&
Date.parse(instinct.updatedAt) < cutoff
) {
await unlink(instinctPath(instinct.id, options))
pruned.push(instinct)
}
}
return pruned
}
function instinctPath(id: string, options?: InstinctStoreOptions): string {
return join(getInstinctsDir(options), `${id}.json`)
}

View File

@@ -0,0 +1,106 @@
import { getSkillLearningConfig } from './config.js'
import type { Instinct } from './instinctParser.js'
import type { InstinctDomain, SkillLearningScope } from './types.js'
export const MIN_CONFIDENCE_TO_GENERATE_SKILL = 0.75
export const MAX_SKILL_NAME_LENGTH = 64
const DOMAIN_PREFIXES: Record<InstinctDomain, string> = {
workflow: 'workflow',
testing: 'testing',
debugging: 'debugging',
'code-style': 'style',
security: 'security',
git: 'git',
project: 'project',
}
const GENERIC_NAMES = new Set([
'learned-skill',
'better-skill',
'new-skill',
'project-skill',
'workflow-skill',
])
export function shouldGenerateSkillFromInstincts(
instincts: readonly Instinct[],
): boolean {
if (instincts.length === 0) return false
const avg =
instincts.reduce((sum, instinct) => sum + instinct.confidence, 0) /
instincts.length
return avg >= getSkillLearningConfig().minConfidence
}
export function buildLearnedSkillName(instincts: readonly Instinct[]): string {
const domain = instincts[0]?.domain ?? 'project'
const prefix = DOMAIN_PREFIXES[domain]
const words = new Set<string>()
for (const instinct of instincts) {
for (const word of `${instinct.trigger} ${instinct.action}`
.toLowerCase()
.split(/[^a-z0-9]+/)) {
if (isUsefulNameWord(word)) words.add(word)
if (words.size >= 5) break
}
if (words.size >= 5) break
}
const name = normalizeSkillName([prefix, ...words].join('-'))
return isGenericSkillName(name) ? `${prefix}-learned-pattern` : name
}
export function normalizeSkillName(value: string): string {
const normalized = value
.toLowerCase()
.replace(/[^a-z0-9]+/g, '-')
.replace(/^-|-$/g, '')
.slice(0, MAX_SKILL_NAME_LENGTH)
.replace(/-$/g, '')
return normalized || 'learned-skill'
}
export function isValidLearnedSkillName(value: string): boolean {
return (
value === normalizeSkillName(value) &&
value.length > 0 &&
value.length <= MAX_SKILL_NAME_LENGTH &&
!isGenericSkillName(value)
)
}
export function isGenericSkillName(value: string): boolean {
return GENERIC_NAMES.has(value)
}
export function decideDefaultScope(
instincts: readonly Instinct[],
): SkillLearningScope {
if (instincts.length === 0) return 'project'
const globalFriendly = instincts.every(instinct =>
['security', 'git', 'workflow'].includes(instinct.domain),
)
return globalFriendly && instincts.length >= 2 ? 'global' : 'project'
}
function isUsefulNameWord(word: string): boolean {
return (
word.length > 2 &&
![
'when',
'with',
'this',
'that',
'user',
'project',
'prefer',
'avoid',
'use',
'using',
'the',
'and',
'for',
].includes(word)
)
}

View File

@@ -0,0 +1,301 @@
import { queryHaiku } from '../api/claude.js'
import { asSystemPrompt } from '../../utils/systemPromptType.js'
import { getSkillLearningConfig } from './config.js'
import type { InstinctCandidate } from './instinctParser.js'
import type { StoredSkillObservation } from './observationStore.js'
import type {
ObserverBackend,
ObserverBackendContext,
} from './observerBackend.js'
import {
INSTINCT_DOMAINS,
type InstinctDomain,
type SkillLearningScope,
} from './types.js'
/**
* LLM-based observer backend.
*
* Runs the small fast model (Haiku) through the project's `queryHaiku`
* helper, feeds it a compact summary of recent observations, and asks for
* up to three atomic reusable instincts in JSON. Output is validated and
* mapped to `InstinctCandidate[]` so the existing evolution pipeline
* consumes LLM output the same way it consumes heuristic output.
*
* Design notes:
* - Reuses `queryHaiku` (goes through the full Claude Code API stack:
* OAuth, beta headers, providers, VCR in tests). No new auth code.
* - Caps input to the tail of the observation buffer so the prompt stays
* small and predictable, and runs under a 10-second abort signal so a
* slow Haiku round-trip never blocks the REPL turn end.
* - On ANY failure (abort, parse error, empty output) returns `[]` —
* the backend is opt-in via `SKILL_LEARNING_OBSERVER_BACKEND=llm` and
* must never destabilise skill-learning when the API is unavailable.
*/
const MAX_OBSERVATIONS_PER_CALL = 30
const MAX_CANDIDATES_PER_CALL = 3
// --- Circuit breaker state ---
let consecutiveFailures = 0
let circuitOpenUntil = 0
export function resetCircuitBreaker(): void {
consecutiveFailures = 0
circuitOpenUntil = 0
}
const LLM_OBSERVER_SYSTEM_PROMPT = `You analyse a short sequence of observations from a coding-assistant session (user messages, tool invocations with outcomes, assistant messages) and extract atomic, reusable "instincts" — behavioural patterns that would help the assistant act correctly in future similar situations.
Respond with ONLY a JSON array (no prose, no code fences, no commentary). Each item must match this schema:
{
"trigger": string, // <= 80 chars, short phrase describing WHEN the instinct applies
"action": string, // <= 120 chars, short phrase describing WHAT to do
"confidence": number, // 0..1 — how strongly these observations support the pattern
"domain": "workflow"|"testing"|"debugging"|"code-style"|"security"|"git"|"project",
"scope": "project"|"global",
"evidence": string[] // 1..3 short excerpts copied/paraphrased from the observations
}
Rules:
- Return [] if nothing clearly reusable. No guessing.
- At most 3 items, highest confidence first.
- confidence > 0.7 only when observations show the pattern in action (a correction followed by a successful retry, a repeated sequence, an explicit rule).
- Never include secrets, tokens, full file contents, or personally-identifying data.
- Scope "global" only when the pattern is obviously project-agnostic (generic testing, git hygiene); default to "project".`
export const llmObserverBackend: ObserverBackend = {
name: 'llm',
analyze(
observations: StoredSkillObservation[],
ctx?: ObserverBackendContext,
): Promise<InstinctCandidate[]> {
return analyseWithHaiku(observations, ctx)
},
}
async function analyseWithHaiku(
observations: StoredSkillObservation[],
ctx?: ObserverBackendContext,
): Promise<InstinctCandidate[]> {
if (observations.length === 0) return []
// Circuit breaker: if the circuit is open, skip queryHaiku entirely.
if (Date.now() < circuitOpenUntil) {
return runHeuristicFallback(observations, ctx)
}
const capped = observations.slice(-MAX_OBSERVATIONS_PER_CALL)
const userPrompt = buildUserPrompt(capped)
const signal = makeTimeoutSignal(getSkillLearningConfig().llm.timeoutMs)
let responseText: string
try {
const response = await queryHaiku({
systemPrompt: asSystemPrompt([LLM_OBSERVER_SYSTEM_PROMPT]),
userPrompt,
signal,
options: {
querySource: 'skill_learning_observer',
enablePromptCaching: true,
agents: [],
isNonInteractiveSession: true,
hasAppendSystemPrompt: false,
mcpTools: [],
},
})
// Success: reset failure counter.
consecutiveFailures = 0
responseText = extractResponseText(response.message?.content)
} catch {
// Haiku failure (timeout / rate limit / bad response) — increment failure
// counter and potentially open the circuit breaker.
consecutiveFailures++
if (consecutiveFailures >= getSkillLearningConfig().llm.failureThreshold) {
circuitOpenUntil =
Date.now() + getSkillLearningConfig().llm.circuitCooldownMs
}
return runHeuristicFallback(observations, ctx)
}
const parsed = parseInstinctCandidates(responseText, ctx, capped)
if (parsed.length === 0) {
// Empty / malformed LLM output — count as a failure so the circuit
// breaker opens if Haiku is systematically returning garbage (e.g. the
// model version drifted and no longer emits the expected JSON).
consecutiveFailures++
if (consecutiveFailures >= getSkillLearningConfig().llm.failureThreshold) {
circuitOpenUntil =
Date.now() + getSkillLearningConfig().llm.circuitCooldownMs
}
return runHeuristicFallback(observations, ctx)
}
return parsed
}
async function runHeuristicFallback(
observations: StoredSkillObservation[],
ctx?: ObserverBackendContext,
): Promise<InstinctCandidate[]> {
try {
const { heuristicObserverBackend } = await import('./sessionObserver.js')
const result = heuristicObserverBackend.analyze(observations, ctx)
return Array.isArray(result) ? result : await result
} catch {
return []
}
}
function buildUserPrompt(observations: StoredSkillObservation[]): string {
const rendered = observations
.map((observation, index) => renderObservation(observation, index))
.join('\n')
return `Observations (chronological, newest last):\n${rendered}\n\nExtract up to ${MAX_CANDIDATES_PER_CALL} atomic instincts. JSON array only.`
}
function renderObservation(
observation: StoredSkillObservation,
index: number,
): string {
const segments: string[] = [`#${index + 1}`, `event=${observation.event}`]
if (observation.toolName) segments.push(`tool=${observation.toolName}`)
if (observation.outcome) segments.push(`outcome=${observation.outcome}`)
if (observation.messageText) {
segments.push(
`text=${JSON.stringify(truncate(observation.messageText, 200))}`,
)
}
if (observation.toolInput) {
segments.push(`in=${JSON.stringify(truncate(observation.toolInput, 120))}`)
}
if (observation.toolOutput) {
segments.push(
`out=${JSON.stringify(truncate(observation.toolOutput, 120))}`,
)
}
return segments.join(' | ')
}
function truncate(value: string, max: number): string {
if (value.length <= max) return value
return `${value.slice(0, max)}`
}
function extractResponseText(content: unknown): string {
if (!Array.isArray(content)) return ''
const parts: string[] = []
for (const block of content) {
if (!block || typeof block !== 'object') continue
const record = block as Record<string, unknown>
if (record.type !== 'text') continue
if (typeof record.text === 'string') parts.push(record.text)
}
return parts.join('').trim()
}
function parseInstinctCandidates(
raw: string,
ctx: ObserverBackendContext | undefined,
observations: StoredSkillObservation[],
): InstinctCandidate[] {
const json = extractJsonArray(raw)
if (!json) return []
let parsed: unknown
try {
parsed = JSON.parse(json)
} catch {
return []
}
if (!Array.isArray(parsed)) return []
const observationIds = observations.map(observation => observation.id)
const candidates: InstinctCandidate[] = []
for (const item of parsed.slice(0, MAX_CANDIDATES_PER_CALL)) {
const candidate = normaliseCandidate(item, ctx, observationIds)
if (candidate) candidates.push(candidate)
}
return candidates
}
function extractJsonArray(raw: string): string | undefined {
if (!raw) return undefined
const start = raw.indexOf('[')
const end = raw.lastIndexOf(']')
if (start < 0 || end <= start) return undefined
return raw.slice(start, end + 1)
}
function normaliseCandidate(
item: unknown,
ctx: ObserverBackendContext | undefined,
observationIds: string[],
): InstinctCandidate | undefined {
if (!item || typeof item !== 'object') return undefined
const record = item as Record<string, unknown>
const trigger = stringField(record.trigger, 80)
const action = stringField(record.action, 120)
if (!trigger || !action) return undefined
const evidence = evidenceField(record.evidence)
if (evidence.length === 0) return undefined
return {
trigger,
action,
confidence: clampUnitInterval(record.confidence),
domain: domainField(record.domain),
source: 'session-observation',
scope: scopeField(record.scope),
projectId: ctx?.project?.projectId,
projectName: ctx?.project?.projectName,
evidence,
observationIds,
}
}
function stringField(value: unknown, maxLength: number): string | undefined {
if (typeof value !== 'string') return undefined
const trimmed = value.trim()
if (!trimmed) return undefined
return trimmed.length > maxLength ? trimmed.slice(0, maxLength) : trimmed
}
function clampUnitInterval(value: unknown): number {
if (typeof value !== 'number' || !Number.isFinite(value)) return 0.5
if (value < 0) return 0
if (value > 1) return 1
return value
}
function domainField(value: unknown): InstinctDomain {
if (typeof value !== 'string') return 'project'
return (INSTINCT_DOMAINS as readonly string[]).includes(value)
? (value as InstinctDomain)
: 'project'
}
function scopeField(value: unknown): SkillLearningScope {
return value === 'global' ? 'global' : 'project'
}
function evidenceField(value: unknown): string[] {
if (!Array.isArray(value)) return []
const entries: string[] = []
for (const entry of value) {
if (typeof entry !== 'string') continue
const trimmed = entry.trim()
if (!trimmed) continue
entries.push(trimmed.length > 200 ? `${trimmed.slice(0, 200)}` : trimmed)
if (entries.length === 3) break
}
return entries
}
function makeTimeoutSignal(ms: number): AbortSignal {
return AbortSignal.timeout(ms)
}

View File

@@ -0,0 +1,451 @@
import { mkdir, readFile, rename, stat, writeFile } from 'node:fs/promises'
import { dirname, join } from 'node:path'
import { createHash, randomUUID } from 'node:crypto'
import type {
SkillLearningProjectContext as BaseSkillLearningProjectContext,
SkillLearningScope,
SkillObservation as BaseSkillObservation,
SkillObservationEvent,
SkillObservationOutcome,
} from './types.js'
export type { SkillLearningScope, SkillObservation } from './types.js'
export type SkillLearningProjectContext = Pick<
BaseSkillLearningProjectContext,
'projectId' | 'projectName' | 'cwd'
> &
Partial<
Omit<BaseSkillLearningProjectContext, 'projectId' | 'projectName' | 'cwd'>
>
export type ObservationEvent = Exclude<SkillObservationEvent, 'tool_error'>
export type ObservationOutcome = SkillObservationOutcome | 'interrupted'
export type StoredSkillObservation = Omit<
BaseSkillObservation,
'event' | 'outcome' | 'toolInput' | 'toolOutput'
> & {
event: ObservationEvent
outcome?: ObservationOutcome
toolInput?: string
toolOutput?: string
toolName?: string
messageText?: string
source?: 'transcript' | 'hook' | 'tool-hook' | 'imported'
contentHash?: string
// Turn index at which the observation was captured. Used by
// runtimeObserver to scope tool-hook observations to the current REPL
// turn for scoping tool-hook records to the current REPL turn.
turn?: number
}
export type ObservationStoreOptions = {
rootDir?: string
project?: SkillLearningProjectContext
maxFieldLength?: number
archiveThresholdBytes?: number
}
type ClaudeTranscriptEntry = {
sessionId?: string
cwd?: string
timestamp?: string
type?: string
message?: {
role?: string
content?: unknown
}
tool_name?: string
tool_input?: unknown
tool_response?: unknown
}
const DEFAULT_MAX_FIELD_LENGTH = 5_000
const DEFAULT_ARCHIVE_THRESHOLD_BYTES = 1_000_000
const DEFAULT_PURGE_MAX_AGE_DAYS = 30
const SECRET_REPLACEMENT = '[REDACTED]'
const SECRET_PATTERNS: RegExp[] = [
/\b(?:sk|sk-ant|sk-proj|xox[baprs])-[A-Za-z0-9_-]{12,}\b/g,
/\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g,
/\b(?:api[_-]?key|token|secret|password|authorization)\b\s*[:=]\s*["']?[^"',\s}]+/gi,
/\bBearer\s+[A-Za-z0-9._~+/=-]{12,}\b/gi,
]
export function getSkillLearningRoot(
options?: ObservationStoreOptions,
): string {
if (options?.rootDir) return options.rootDir
if (process.env.CLAUDE_SKILL_LEARNING_HOME) {
return process.env.CLAUDE_SKILL_LEARNING_HOME
}
return join(process.env.HOME ?? process.cwd(), '.claude', 'skill-learning')
}
export function getObservationFilePath(
options?: ObservationStoreOptions,
): string {
const root = getSkillLearningRoot(options)
const project = options?.project
if (
!project ||
project.scope === 'global' ||
project.projectId === 'global'
) {
return join(root, 'global', 'observations.jsonl')
}
return join(root, 'projects', project.projectId, 'observations.jsonl')
}
export function scrubText(
value: string | undefined,
maxLength = DEFAULT_MAX_FIELD_LENGTH,
): string | undefined {
if (value === undefined) return undefined
let scrubbed = value
for (const pattern of SECRET_PATTERNS) {
scrubbed = scrubbed.replace(pattern, match => {
const key = match.split(/[:=]/, 1)[0]
return /[:=]/.test(match)
? `${key}: ${SECRET_REPLACEMENT}`
: SECRET_REPLACEMENT
})
}
if (scrubbed.length <= maxLength) return scrubbed
const hash = hashText(scrubbed)
let preview = scrubbed.slice(0, maxLength)
if (
scrubbed.includes(SECRET_REPLACEMENT) &&
!preview.includes(SECRET_REPLACEMENT)
) {
preview = `${SECRET_REPLACEMENT} ${preview}`
}
return `${preview}\n[TRUNCATED length=${scrubbed.length} sha256=${hash}]`
}
export function scrubObservation(
observation: StoredSkillObservation,
options?: ObservationStoreOptions,
): StoredSkillObservation {
const maxLength = options?.maxFieldLength ?? DEFAULT_MAX_FIELD_LENGTH
const scrubbed: StoredSkillObservation = {
...observation,
toolInput: scrubText(observation.toolInput, maxLength),
toolOutput: scrubText(observation.toolOutput, maxLength),
messageText: scrubText(observation.messageText, maxLength),
}
const hashSource = [
scrubbed.event,
scrubbed.toolName ?? '',
scrubbed.toolInput ?? '',
scrubbed.toolOutput ?? '',
scrubbed.messageText ?? '',
].join('\n')
return {
...scrubbed,
contentHash: hashText(hashSource),
}
}
const MAX_SINGLE_OBSERVATION_BYTES = 64 * 1024
export async function appendObservation(
observation: StoredSkillObservation,
options?: ObservationStoreOptions,
): Promise<StoredSkillObservation> {
const filePath = getObservationFilePath(options)
await mkdir(dirname(filePath), { recursive: true })
await archiveLargeObservationFile(options)
const scrubbed = scrubObservation(observation, options)
const serialized = JSON.stringify(scrubbed)
if (Buffer.byteLength(serialized) > MAX_SINGLE_OBSERVATION_BYTES) {
return scrubbed
}
await writeFile(filePath, `${serialized}\n`, {
flag: 'a',
})
return scrubbed
}
export async function readObservations(
options?: ObservationStoreOptions,
): Promise<StoredSkillObservation[]> {
const filePath = getObservationFilePath(options)
let content = ''
try {
content = await readFile(filePath, 'utf8')
} catch (error) {
if ((error as NodeJS.ErrnoException).code === 'ENOENT') return []
throw error
}
const observations: StoredSkillObservation[] = []
for (const line of content.split(/\r?\n/)) {
if (!line.trim()) continue
try {
observations.push(JSON.parse(line) as StoredSkillObservation)
} catch {
// Skip corrupt/truncated JSONL lines (e.g. from concurrent append
// interleaved with a crash). One bad line must not break the whole read.
}
}
return observations
}
export async function ingestTranscript(
transcriptPath: string,
options?: ObservationStoreOptions,
): Promise<StoredSkillObservation[]> {
const transcript = await readFile(transcriptPath, 'utf8')
const observations: StoredSkillObservation[] = []
for (const line of transcript.split(/\r?\n/)) {
if (!line.trim()) continue
const entry = JSON.parse(line) as ClaudeTranscriptEntry
for (const observation of observationsFromTranscriptEntry(entry, options)) {
observations.push(await appendObservation(observation, options))
}
}
return observations
}
export async function purgeOldObservations(
options?: ObservationStoreOptions & { maxAgeDays?: number },
): Promise<number> {
const filePath = getObservationFilePath(options)
const maxAgeDays = options?.maxAgeDays ?? DEFAULT_PURGE_MAX_AGE_DAYS
const cutoff = Date.now() - maxAgeDays * 24 * 60 * 60 * 1000
let content = ''
try {
content = await readFile(filePath, 'utf8')
} catch (error) {
if ((error as NodeJS.ErrnoException).code === 'ENOENT') return 0
throw error
}
const kept: string[] = []
let purged = 0
for (const line of content.split(/\r?\n/)) {
if (!line.trim()) continue
try {
const obs = JSON.parse(line) as StoredSkillObservation
const ts = Date.parse(obs.timestamp)
if (!Number.isNaN(ts) && ts < cutoff) {
purged += 1
continue
}
kept.push(line)
} catch {
kept.push(line)
}
}
if (purged === 0) return 0
// Atomic write: temp + rename. Direct writeFile leaves a truncated/empty
// file if the process crashes mid-write, losing retained observations.
const tmpPath = `${filePath}.tmp-${process.pid}-${Date.now()}`
await writeFile(tmpPath, kept.length ? `${kept.join('\n')}\n` : '')
await rename(tmpPath, filePath)
return purged
}
export async function archiveLargeObservationFile(
options?: ObservationStoreOptions,
): Promise<string | null> {
const filePath = getObservationFilePath(options)
const threshold =
options?.archiveThresholdBytes ?? DEFAULT_ARCHIVE_THRESHOLD_BYTES
let currentStat
try {
currentStat = await stat(filePath)
} catch (error) {
if ((error as NodeJS.ErrnoException).code === 'ENOENT') return null
throw error
}
if (currentStat.size < threshold) return null
const archiveDir = join(dirname(filePath), 'observations.archive')
await mkdir(archiveDir, { recursive: true })
const archivePath = join(
archiveDir,
`observations-${new Date().toISOString().replace(/[:.]/g, '-')}.jsonl`,
)
await rename(filePath, archivePath)
return archivePath
}
function observationsFromTranscriptEntry(
entry: ClaudeTranscriptEntry,
options?: ObservationStoreOptions,
): StoredSkillObservation[] {
const project = options?.project
const base = {
sessionId: entry.sessionId ?? 'unknown-session',
projectId: project?.projectId ?? 'global',
projectName: project?.projectName ?? 'global',
cwd: entry.cwd ?? project?.cwd ?? process.cwd(),
timestamp: entry.timestamp ?? new Date().toISOString(),
source: 'transcript' as const,
}
const role = entry.message?.role ?? entry.type
const content = entry.message?.content
const observations: StoredSkillObservation[] = []
if (entry.tool_name) {
observations.push({
...base,
id: createObservationId(),
event: 'tool_complete',
toolName: entry.tool_name,
toolInput: stringifyField(entry.tool_input),
toolOutput: stringifyField(entry.tool_response),
outcome: inferOutcome(entry.tool_response),
})
}
if (role === 'user') {
const toolResults = extractToolResults(content)
if (toolResults.length > 0) {
for (const result of toolResults) {
observations.push({
...base,
id: createObservationId(),
event: 'tool_complete',
toolName: result.name,
toolOutput: result.output,
outcome: result.isError ? 'failure' : 'success',
})
}
return observations
}
observations.push({
...base,
id: createObservationId(),
event: 'user_message',
messageText: extractText(content),
})
return observations
}
if (role === 'assistant') {
const toolUses = extractToolUses(content)
for (const toolUse of toolUses) {
observations.push({
...base,
id: createObservationId(),
event: 'tool_start',
toolName: toolUse.name,
toolInput: toolUse.input,
})
}
const text = extractText(content)
if (text.trim()) {
observations.push({
...base,
id: createObservationId(),
event: 'assistant_message',
messageText: text,
})
}
}
return observations
}
function extractText(content: unknown): string {
if (typeof content === 'string') return content
if (!Array.isArray(content)) return stringifyField(content) ?? ''
return content
.map(part => {
if (typeof part === 'string') return part
if (!part || typeof part !== 'object') return ''
const record = part as Record<string, unknown>
return typeof record.text === 'string' ? record.text : ''
})
.filter(Boolean)
.join('\n')
}
function extractToolUses(
content: unknown,
): Array<{ name: string; input: string | undefined }> {
if (!Array.isArray(content)) return []
return content.flatMap(part => {
if (!part || typeof part !== 'object') return []
const record = part as Record<string, unknown>
if (record.type !== 'tool_use') return []
return [
{
name: String(record.name ?? 'unknown_tool'),
input: stringifyField(record.input),
},
]
})
}
function extractToolResults(
content: unknown,
): Array<{ name: string; output: string | undefined; isError: boolean }> {
if (!Array.isArray(content)) return []
return content.flatMap(part => {
if (!part || typeof part !== 'object') return []
const record = part as Record<string, unknown>
if (record.type !== 'tool_result') return []
return [
{
name: String(record.name ?? record.tool_name ?? 'unknown_tool'),
output: stringifyField(record.content),
isError: record.is_error === true,
},
]
})
}
function inferOutcome(value: unknown): ObservationOutcome {
const text = stringifyField(value)?.toLowerCase() ?? ''
if (text.includes('interrupted') || text.includes('aborted')) {
return 'interrupted'
}
if (
text.includes('error') ||
text.includes('exception') ||
text.includes('failed')
) {
return 'failure'
}
return 'success'
}
export function stringifyField(value: unknown): string | undefined {
if (value === undefined || value === null) return undefined
if (typeof value === 'string') return value
return JSON.stringify(value)
}
function createObservationId(): string {
if (typeof crypto !== 'undefined' && 'randomUUID' in crypto) {
return crypto.randomUUID()
}
return randomUUID()
}
function hashText(value: string): string {
return createHash('sha256').update(value).digest('hex')
}

View File

@@ -0,0 +1,71 @@
import type { InstinctCandidate } from './instinctParser.js'
import type { StoredSkillObservation } from './observationStore.js'
import type { SkillLearningProjectContext } from './types.js'
export type ObserverBackendContext = {
project?: SkillLearningProjectContext
}
export type ObserverBackendResult =
| InstinctCandidate[]
| Promise<InstinctCandidate[]>
export interface ObserverBackend {
readonly name: string
analyze(
observations: StoredSkillObservation[],
ctx?: ObserverBackendContext,
): ObserverBackendResult
}
const registry = new Map<string, ObserverBackend>()
let activeName: string | undefined
export function registerObserverBackend(backend: ObserverBackend): void {
registry.set(backend.name, backend)
if (!activeName) activeName = backend.name
}
export function setActiveObserverBackend(name: string): void {
if (!registry.has(name)) {
throw new Error(`Observer backend "${name}" is not registered`)
}
activeName = name
}
export function getActiveObserverBackend(): ObserverBackend {
const backend = activeName ? registry.get(activeName) : undefined
if (!backend) {
throw new Error(
'No observer backend is active — register one before analyzing observations',
)
}
return backend
}
export function listObserverBackends(): string[] {
return Array.from(registry.keys())
}
export function resetObserverBackendsForTest(): void {
registry.clear()
activeName = undefined
}
export async function analyzeWithActiveBackend(
observations: StoredSkillObservation[],
ctx?: ObserverBackendContext,
): Promise<InstinctCandidate[]> {
return Promise.resolve(getActiveObserverBackend().analyze(observations, ctx))
}
function pickBackendFromEnv(): string | undefined {
const raw = process.env.SKILL_LEARNING_OBSERVER_BACKEND?.trim()
return raw && registry.has(raw) ? raw : undefined
}
export function resolveDefaultObserverBackend(): ObserverBackend {
const preferred = pickBackendFromEnv()
if (preferred) setActiveObserverBackend(preferred)
return getActiveObserverBackend()
}

View File

@@ -0,0 +1,264 @@
import { execFileSync } from 'child_process'
import { createHash } from 'crypto'
import {
existsSync,
mkdirSync,
readFileSync,
realpathSync,
writeFileSync,
} from 'fs'
import { basename, join, resolve } from 'path'
import { getClaudeConfigHomeDir } from '../../utils/envUtils.js'
import type {
ProjectContextSource,
SkillLearningProjectContext,
SkillLearningProjectRecord,
SkillLearningProjectsRegistry,
SkillLearningScope,
} from './types.js'
const REGISTRY_VERSION = 1
const GLOBAL_PROJECT_ID = 'global'
const GLOBAL_PROJECT_NAME = 'Global'
export function getSkillLearningRootDir(): string {
return join(getClaudeConfigHomeDir(), 'skill-learning')
}
export function getProjectsRegistryPath(): string {
return join(getSkillLearningRootDir(), 'projects.json')
}
export function getProjectStorageDir(projectId: string): string {
if (projectId === GLOBAL_PROJECT_ID) {
return join(getSkillLearningRootDir(), 'global')
}
return join(getSkillLearningRootDir(), 'projects', projectId)
}
export function getProjectContextPath(projectId: string): string {
return join(getProjectStorageDir(projectId), 'project.json')
}
// Per-cwd in-memory cache. `resolveContext` does synchronous `git` forks and
// `persistProjectContext` does registry/project.json writes on every call —
// in the tool.call hot path (one wrapper invocation per tool) that cost would
// accumulate into the hundreds-of-ms range per session. Cache keyed by the
// exact cwd string so different worktrees still get independent entries.
const contextCache = new Map<string, SkillLearningProjectContext>()
const PERSIST_INTERVAL_MS = 5 * 60 * 1000
let lastPersistAt = 0
export function resolveProjectContext(
cwd = process.cwd(),
): SkillLearningProjectContext {
const cached = contextCache.get(cwd)
if (cached) {
// Still touch the registry so long-lived processes keep `lastSeenAt`
// reasonably fresh, but throttle the write so it doesn't fire on every
// tool call.
const now = Date.now()
if (now - lastPersistAt > PERSIST_INTERVAL_MS) {
lastPersistAt = now
persistProjectContext(cached)
}
return cached
}
const resolved = resolveContext(cwd)
contextCache.set(cwd, resolved)
persistProjectContext(resolved)
lastPersistAt = Date.now()
return resolved
}
export function resetProjectContextCacheForTest(): void {
contextCache.clear()
lastPersistAt = 0
}
export function listKnownProjects(): SkillLearningProjectRecord[] {
const registry = readProjectsRegistry(getProjectsRegistryPath())
return Object.values(registry.projects).sort((a, b) =>
a.projectName.localeCompare(b.projectName),
)
}
function resolveContext(cwd: string): SkillLearningProjectContext {
const envProjectDir = process.env.CLAUDE_PROJECT_DIR?.trim()
if (envProjectDir) {
const projectRoot = normalizePath(envProjectDir)
return buildContext({
source: 'claude_project_dir',
scope: 'project',
cwd,
projectRoot,
identity: `claude-project-dir:${projectRoot}`,
projectName: basename(projectRoot) || 'project',
})
}
const gitRemote = git(['remote', 'get-url', 'origin'], cwd)
if (gitRemote) {
const projectRoot = git(['rev-parse', '--show-toplevel'], cwd)
const normalizedRemote = normalizeGitRemote(gitRemote)
return buildContext({
source: 'git_remote',
scope: 'project',
cwd,
projectRoot: projectRoot
? normalizePath(projectRoot)
: normalizePath(cwd),
gitRemote: normalizedRemote,
identity: `git-remote:${normalizedRemote}`,
projectName: projectNameFromRemote(normalizedRemote),
})
}
const gitRoot = git(['rev-parse', '--show-toplevel'], cwd)
if (gitRoot) {
const projectRoot = normalizePath(gitRoot)
return buildContext({
source: 'git_root',
scope: 'project',
cwd,
projectRoot,
identity: `git-root:${projectRoot}`,
projectName: basename(projectRoot) || 'project',
})
}
return buildContext({
source: 'global',
scope: 'global',
cwd,
projectRoot: undefined,
identity: 'global',
projectName: GLOBAL_PROJECT_NAME,
})
}
function buildContext(input: {
source: ProjectContextSource
scope: SkillLearningScope
cwd: string
projectRoot?: string
gitRemote?: string
identity: string
projectName: string
}): SkillLearningProjectContext {
const projectId =
input.scope === 'global'
? GLOBAL_PROJECT_ID
: stableProjectId(input.identity)
return {
projectId,
projectName: input.projectName,
scope: input.scope,
source: input.source,
cwd: normalizePath(input.cwd),
projectRoot: input.projectRoot,
gitRemote: input.gitRemote,
storageDir: getProjectStorageDir(projectId),
}
}
function persistProjectContext(context: SkillLearningProjectContext): void {
const now = new Date().toISOString()
const registryPath = getProjectsRegistryPath()
const registry = readProjectsRegistry(registryPath)
const existing = registry.projects[context.projectId]
const record: SkillLearningProjectRecord = {
...context,
firstSeenAt: existing?.firstSeenAt ?? now,
lastSeenAt: now,
}
registry.projects[context.projectId] = record
registry.updatedAt = now
mkdirSync(context.storageDir, { recursive: true })
mkdirSync(getSkillLearningRootDir(), { recursive: true })
writeJson(registryPath, registry)
writeJson(getProjectContextPath(context.projectId), record)
}
function readProjectsRegistry(path: string): SkillLearningProjectsRegistry {
if (!existsSync(path)) {
return {
version: REGISTRY_VERSION,
updatedAt: new Date(0).toISOString(),
projects: {},
}
}
try {
const parsed = JSON.parse(
readFileSync(path, 'utf8'),
) as Partial<SkillLearningProjectsRegistry>
if (
parsed.version === REGISTRY_VERSION &&
typeof parsed.projects === 'object' &&
parsed.projects
) {
return {
version: REGISTRY_VERSION,
updatedAt:
typeof parsed.updatedAt === 'string'
? parsed.updatedAt
: new Date(0).toISOString(),
projects: parsed.projects as Record<string, SkillLearningProjectRecord>,
}
}
} catch {
// Fall through to a fresh registry. Corrupt state should not block startup.
}
return {
version: REGISTRY_VERSION,
updatedAt: new Date(0).toISOString(),
projects: {},
}
}
function writeJson(path: string, value: unknown): void {
writeFileSync(path, `${JSON.stringify(value, null, 2)}\n`, 'utf8')
}
function git(args: string[], cwd: string): string | null {
try {
const output = execFileSync('git', ['-C', cwd, ...args], {
encoding: 'utf8',
stdio: ['ignore', 'pipe', 'ignore'],
})
const trimmed = output.trim()
return trimmed ? trimmed : null
} catch {
return null
}
}
function normalizePath(path: string): string {
const resolved = resolve(path)
try {
return realpathSync.native(resolved).normalize('NFC')
} catch {
return resolved.normalize('NFC')
}
}
function normalizeGitRemote(remote: string): string {
let normalized = remote.trim().replace(/\\/g, '/')
normalized = normalized.replace(/\.git$/i, '')
normalized = normalized.replace(/\/+$/g, '')
return normalized.toLowerCase()
}
function projectNameFromRemote(remote: string): string {
const match = remote.match(/[:/]([^/:]+?)(?:\.git)?$/)
return match?.[1] || 'project'
}
function stableProjectId(identity: string): string {
const hash = createHash('sha256').update(identity).digest('hex').slice(0, 16)
return `project-${hash}`
}

View File

@@ -0,0 +1,161 @@
import { readdir } from 'node:fs/promises'
import { existsSync } from 'node:fs'
import { join } from 'node:path'
import type { Instinct, StoredInstinct } from './instinctParser.js'
import {
getInstinctsDir,
loadInstincts,
saveInstinct,
type InstinctStoreOptions,
} from './instinctStore.js'
import { getSkillLearningRoot } from './observationStore.js'
import type { SkillLearningProjectContext } from './types.js'
export type PromotionCandidate = {
instinctId: string
averageConfidence: number
projectIds: string[]
}
export type PromotionOptions = {
rootDir?: string
minProjects?: number
minConfidence?: number
}
const sessionPromotedIds = new Set<string>()
export function resetPromotionBookkeeping(): void {
sessionPromotedIds.clear()
}
export function findPromotionCandidates(
instincts: Instinct[],
minProjects = 2,
minConfidence = 0.8,
): PromotionCandidate[] {
const grouped = new Map<string, Instinct[]>()
for (const instinct of instincts) {
if (instinct.scope !== 'project') continue
const group = grouped.get(instinct.id) ?? []
group.push(instinct)
grouped.set(instinct.id, group)
}
return Array.from(grouped.entries()).flatMap(([instinctId, group]) => {
const projectIds = Array.from(
new Set(group.map(instinct => instinct.projectId).filter(Boolean)),
) as string[]
const averageConfidence =
group.reduce((sum, instinct) => sum + instinct.confidence, 0) /
group.length
if (
projectIds.length >= minProjects &&
averageConfidence >= minConfidence
) {
return [
{
instinctId,
projectIds,
averageConfidence: Number(averageConfidence.toFixed(2)),
},
]
}
return []
})
}
export async function checkPromotion(
options: PromotionOptions = {},
): Promise<PromotionCandidate[]> {
const minProjects = options.minProjects ?? 2
const minConfidence = options.minConfidence ?? 0.8
const allProjectInstincts = await loadAllProjectInstincts(options.rootDir)
const candidates = findPromotionCandidates(
allProjectInstincts,
minProjects,
minConfidence,
)
const promoted: PromotionCandidate[] = []
for (const candidate of candidates) {
if (sessionPromotedIds.has(candidate.instinctId)) continue
const source = allProjectInstincts.find(
instinct => instinct.id === candidate.instinctId,
)
if (!source) continue
const globalInstinct: StoredInstinct = {
...source,
scope: 'global',
projectId: undefined,
projectName: undefined,
confidence: candidate.averageConfidence,
updatedAt: new Date().toISOString(),
}
const globalOptions: InstinctStoreOptions = {
rootDir: options.rootDir,
scope: 'global',
project: globalProjectContext(options.rootDir),
}
await saveInstinct(globalInstinct, globalOptions)
sessionPromotedIds.add(candidate.instinctId)
promoted.push(candidate)
}
return promoted
}
async function loadAllProjectInstincts(
rootDir?: string,
): Promise<StoredInstinct[]> {
const root = getSkillLearningRoot(rootDir ? { rootDir } : undefined)
const projectsRoot = join(root, 'projects')
if (!existsSync(projectsRoot)) return []
const entries = await readdir(projectsRoot, { withFileTypes: true })
const instincts: StoredInstinct[] = []
for (const entry of entries) {
if (!entry.isDirectory()) continue
const project: SkillLearningProjectContext = {
projectId: entry.name,
projectName: entry.name,
scope: 'project',
source: 'git_root',
cwd: projectsRoot,
storageDir: join(projectsRoot, entry.name),
}
const projectInstincts = await loadInstincts({
rootDir,
project,
scope: 'project',
})
instincts.push(...projectInstincts)
}
return instincts
}
function globalProjectContext(rootDir?: string): SkillLearningProjectContext {
const root = getSkillLearningRoot(rootDir ? { rootDir } : undefined)
return {
projectId: 'global',
projectName: 'Global',
scope: 'global',
source: 'global',
cwd: root,
storageDir: join(root, 'global'),
}
}
// Re-export for consumers that need to inspect the global instincts directory.
export function getGlobalInstinctsDir(rootDir?: string): string {
return getInstinctsDir({
rootDir,
scope: 'global',
project: globalProjectContext(rootDir),
})
}

View File

@@ -0,0 +1,386 @@
import type { REPLHookContext } from '../../utils/hooks/postSamplingHooks.js'
import { registerPostSamplingHook } from '../../utils/hooks/postSamplingHooks.js'
import { getSkillLearningConfig } from './config.js'
import { isSkillLearningEnabled } from './featureCheck.js'
import {
appendObservation,
getSkillLearningRoot,
purgeOldObservations,
stringifyField,
} from './observationStore.js'
import { resolveProjectContext } from './projectContext.js'
import './sessionObserver.js'
import { createInstinct } from './instinctParser.js'
import {
analyzeWithActiveBackend,
resolveDefaultObserverBackend,
} from './observerBackend.js'
import {
decayInstinctConfidence,
loadInstincts,
prunePendingInstincts,
upsertInstinct,
} from './instinctStore.js'
import type { StoredSkillObservation } from './observationStore.js'
import type { Message } from '../../types/message.js'
import {
applySkillLifecycleDecision,
compareExistingArtifacts,
decideSkillLifecycle,
} from './skillLifecycle.js'
import {
generateAgentCandidates,
generateCommandCandidates,
clusterInstincts,
} from './evolution.js'
import { generateOrMergeSkillDraft } from './skillGenerator.js'
import { shouldGenerateSkillFromInstincts } from './learningPolicy.js'
import { writeLearnedCommand } from './commandGenerator.js'
import { writeLearnedAgent } from './agentGenerator.js'
import { readObservations } from './observationStore.js'
import { checkPromotion } from './promotion.js'
import { existsSync } from 'node:fs'
import { join } from 'node:path'
import { getClaudeConfigHomeDir } from '../../utils/envUtils.js'
export const RUNTIME_SESSION_ID = 'runtime-session'
let initialized = false
let runtimeTurn = 0
// Timestamp watermark for consumed tool-hook observations — enables replay of
// only the records that arrived since the previous post-sampling pass.
let lastConsumedToolHookTimestamp = ''
// --- H5: LLM call throttle ---
let llmCallsThisSession = 0
let lastLlmCallTimestamp = 0
// --- H6: message watermark dedup ---
// Key: `${sessionId}:${messageId}` — prevents reprocessing the same message
// across repeated post-sampling calls in one REPL session.
const lastProcessedMessageIds = new Set<string>()
const MAX_PROCESSED_IDS = 1000
const TRIM_PROCESSED_IDS_TO = 500
export function resetRuntimeLLMBookkeeping(): void {
llmCallsThisSession = 0
lastLlmCallTimestamp = 0
lastProcessedMessageIds.clear()
}
export function getRuntimeTurn(): number {
return runtimeTurn
}
export function initSkillLearning(): void {
if (initialized) return
initialized = true
// Resolve the active observer backend from SKILL_LEARNING_OBSERVER_BACKEND
// env. Without this call the registry stays on whichever backend was
// registered first (heuristic) — which means the env switch would silently
// be a no-op in production. Swallow registry errors so a typo in the env
// variable can never crash startup.
try {
resolveDefaultObserverBackend()
} catch {
// No backend registered yet, or env points at unknown name — leave the
// registry in its existing state.
}
registerPostSamplingHook(runSkillLearningPostSampling)
// Fire-and-forget startup maintenance: ECC parity for confidence decay,
// observation purge, pending instinct prune. Errors are swallowed so that
// skill-learning maintenance never blocks CLI startup.
void runStartupMaintenance().catch(() => {})
}
async function runStartupMaintenance(): Promise<void> {
if (!isSkillLearningEnabled()) return
if (process.env.CLAUDE_SKILL_LEARNING_DISABLE) return
const project = resolveProjectContext(process.cwd())
const options = { project }
await Promise.allSettled([
decayInstinctConfidence(options),
purgeOldObservations(options),
prunePendingInstincts(30, options),
])
}
function isInsideSkillLearningStorage(cwd: string): boolean {
try {
const root = getSkillLearningRoot()
return cwd.startsWith(root)
} catch {
return false
}
}
export async function runSkillLearningPostSampling(
context: REPLHookContext,
): Promise<void> {
if (!isSkillLearningEnabled()) return
// Self-filter layers in order: env escape hatch, entrypoint (only main REPL
// thread — `startsWith` covers 'repl_main_thread:outputStyle:<name>'), sub-
// agent skip, and a path guard that prevents feedback loops when the user
// hand-edits files inside the skill-learning storage directory itself.
if (process.env.CLAUDE_SKILL_LEARNING_DISABLE) return
if (!context.querySource?.startsWith('repl_main_thread')) return
if (context.toolUseContext.agentId) return
const cwd = process.cwd()
if (isInsideSkillLearningStorage(cwd)) return
const project = resolveProjectContext(cwd)
const options = { project }
++runtimeTurn
const observations: StoredSkillObservation[] = []
// Always reconstruct from the REPL message stream — it is the only source
// that captures user prompts and assistant outcomes (tool-hook observations
// cover tool events only).
for (const observation of observationsFromMessages(
context.messages,
project,
)) {
observations.push(await appendObservation(observation, options))
}
// Additionally pull tool-hook observations that arrived since the last
// consumption watermark — deterministic records with precise outcomes.
const all = await readObservations(options)
const fresh = all.filter(
o =>
o.source === 'tool-hook' &&
o.sessionId === RUNTIME_SESSION_ID &&
typeof o.timestamp === 'string' &&
o.timestamp > lastConsumedToolHookTimestamp,
)
observations.push(...fresh)
for (const o of fresh) {
if (o.timestamp > lastConsumedToolHookTimestamp) {
lastConsumedToolHookTimestamp = o.timestamp
}
}
if (observations.length === 0) return
// H5: throttle LLM calls — minimum observation count, per-session cap, and
// debounce interval. When any gate fires, fall back to heuristic directly.
const now = Date.now()
const minObservations = 5
const { llm } = getSkillLearningConfig()
const shouldCallLLM =
observations.length >= minObservations &&
llmCallsThisSession < llm.maxCallsPerSession &&
now - lastLlmCallTimestamp >= llm.cooldownMs
let candidates
if (shouldCallLLM) {
llmCallsThisSession++
lastLlmCallTimestamp = now
candidates = await analyzeWithActiveBackend(observations, { project })
} else {
// Fall back to the heuristic backend without consuming an LLM call.
const { heuristicObserverBackend } = await import('./sessionObserver.js')
const result = heuristicObserverBackend.analyze(observations, { project })
candidates = Array.isArray(result) ? result : await result
}
for (const candidate of candidates) {
await upsertInstinct(createInstinct(candidate), options)
}
await autoEvolveLearnedSkills(options)
}
export function resetRuntimeObserverForTest(): void {
runtimeTurn = 0
lastConsumedToolHookTimestamp = ''
resetRuntimeLLMBookkeeping()
}
async function autoEvolveLearnedSkills(options: {
project: ReturnType<typeof resolveProjectContext>
}): Promise<void> {
const instincts = await loadInstincts(options)
const cwd = process.cwd()
const skillRoots = [
join(cwd, '.claude', 'skills'),
join(getClaudeConfigHomeDir(), 'skills'),
]
const skillClusters = clusterInstincts(instincts).filter(
candidate =>
candidate.target === 'skill' &&
shouldGenerateSkillFromInstincts(candidate.instincts),
)
for (const cluster of skillClusters) {
const outcome = await generateOrMergeSkillDraft(
cluster.instincts,
{ cwd, scope: cluster.instincts[0]?.scope ?? 'project' },
skillRoots,
)
if (outcome.action === 'append-evidence') continue
const draft = outcome.draft
if (existsSync(join(draft.outputPath, 'SKILL.md'))) continue
const existing = await compareExistingArtifacts('skill', draft, skillRoots)
const decision = decideSkillLifecycle(draft, existing)
await applySkillLifecycleDecision(decision)
}
const commandDrafts = generateCommandCandidates(instincts, { cwd })
for (const draft of commandDrafts) {
const roots = [
join(cwd, '.claude', 'commands'),
join(getClaudeConfigHomeDir(), 'commands'),
]
const existing = await compareExistingArtifacts('command', draft, roots)
if (existing.length > 0) continue
await writeLearnedCommand(draft)
}
const agentDrafts = generateAgentCandidates(instincts, { cwd })
for (const draft of agentDrafts) {
const roots = [
join(cwd, '.claude', 'agents'),
join(getClaudeConfigHomeDir(), 'agents'),
]
const existing = await compareExistingArtifacts('agent', draft, roots)
if (existing.length > 0) continue
await writeLearnedAgent(draft)
}
await checkPromotion()
}
function observationsFromMessages(
messages: Message[],
project: ReturnType<typeof resolveProjectContext>,
): StoredSkillObservation[] {
const sessionId = RUNTIME_SESSION_ID
const base = {
sessionId,
projectId: project.projectId,
projectName: project.projectName,
cwd: project.cwd,
timestamp: new Date().toISOString(),
source: 'hook' as const,
}
return messages.flatMap((message): StoredSkillObservation[] => {
// H6: watermark dedup — skip messages already processed in this session.
const msgKey = `${sessionId}:${String(message.uuid)}`
if (lastProcessedMessageIds.has(msgKey)) return []
lastProcessedMessageIds.add(msgKey)
// FIFO truncation to keep the set bounded. Drop down to exactly
// TRIM_PROCESSED_IDS_TO entries (off-by-one fix: previously left size+1
// because the subtraction didn't account for the just-added entry).
if (lastProcessedMessageIds.size > MAX_PROCESSED_IDS) {
const toDrop = lastProcessedMessageIds.size - TRIM_PROCESSED_IDS_TO
const iter = lastProcessedMessageIds.values()
for (let i = 0; i < toDrop; i++) {
const next = iter.next()
if (next.done) break
lastProcessedMessageIds.delete(next.value)
}
}
if (message.type === 'user') {
const toolResults = toolResultsFromContent(message.message?.content)
if (toolResults.length > 0) {
return toolResults.map(result => ({
...base,
id: crypto.randomUUID(),
event: 'tool_complete',
toolName: result.toolName,
toolOutput: result.output,
outcome: result.isError ? 'failure' : 'success',
}))
}
const text = textFromContent(message.message?.content)
return text.trim()
? [
{
...base,
id: crypto.randomUUID(),
event: 'user_message',
messageText: text,
},
]
: []
}
if (message.type === 'assistant') {
const toolUses = toolUsesFromContent(message.message?.content)
const text = textFromContent(message.message?.content)
return [
...toolUses.map(toolUse => ({
...base,
id: crypto.randomUUID(),
event: 'tool_start' as const,
toolName: toolUse.toolName,
toolInput: toolUse.input,
})),
...(text.trim()
? [
{
...base,
id: crypto.randomUUID(),
event: 'assistant_message' as const,
messageText: text,
},
]
: []),
]
}
return []
})
}
function textFromContent(content: unknown): string {
if (typeof content === 'string') return content
if (!Array.isArray(content)) return ''
return content
.map(block => {
if (!block || typeof block !== 'object') return ''
const record = block as Record<string, unknown>
return typeof record.text === 'string' ? record.text : ''
})
.filter(Boolean)
.join('\n')
}
function toolUsesFromContent(
content: unknown,
): Array<{ toolName: string; input?: string }> {
if (!Array.isArray(content)) return []
return content.flatMap(block => {
if (!block || typeof block !== 'object') return []
const record = block as Record<string, unknown>
if (record.type !== 'tool_use') return []
return [
{
toolName: String(record.name ?? 'unknown_tool'),
input: stringifyField(record.input),
},
]
})
}
function toolResultsFromContent(
content: unknown,
): Array<{ toolName: string; output?: string; isError: boolean }> {
if (!Array.isArray(content)) return []
return content.flatMap(block => {
if (!block || typeof block !== 'object') return []
const record = block as Record<string, unknown>
if (record.type !== 'tool_result') return []
return [
{
toolName: String(record.name ?? record.tool_name ?? 'unknown_tool'),
output: stringifyField(record.content),
isError: record.is_error === true,
},
]
})
}

View File

@@ -0,0 +1,296 @@
import type { StoredSkillObservation } from './observationStore.js'
import {
candidateFromObservation,
createInstinct,
type InstinctCandidate,
type StoredInstinct,
} from './instinctParser.js'
import type { InstinctDomain, SkillObservationOutcome } from './types.js'
import {
analyzeWithActiveBackend,
getActiveObserverBackend,
registerObserverBackend,
type ObserverBackend,
type ObserverBackendContext,
} from './observerBackend.js'
import { llmObserverBackend } from './llmObserverBackend.js'
export type SessionObserverOptions = {
minRepeatedSequenceCount?: number
}
const DEFAULT_MIN_REPEATED_SEQUENCE_COUNT = 2
export function heuristicAnalyze(
observations: StoredSkillObservation[],
options?: SessionObserverOptions,
): InstinctCandidate[] {
return [
...extractUserCorrections(observations),
...extractToolErrorResolutions(observations),
...extractRepeatedToolSequences(observations, options),
...extractProjectConventions(observations),
]
}
export const heuristicObserverBackend: ObserverBackend = {
name: 'heuristic',
analyze(
observations: StoredSkillObservation[],
_ctx?: ObserverBackendContext,
): InstinctCandidate[] {
return heuristicAnalyze(observations)
},
}
registerObserverBackend(heuristicObserverBackend)
registerObserverBackend(llmObserverBackend)
export function analyzeObservations(
observations: StoredSkillObservation[],
options?: SessionObserverOptions,
): StoredInstinct[] {
const backend = getActiveObserverBackend()
const candidates =
backend.name === 'heuristic'
? heuristicAnalyze(observations, options)
: ensureSyncCandidates(backend.analyze(observations))
return candidates.map(candidate => createInstinct(candidate))
}
export async function analyzeObservationsAsync(
observations: StoredSkillObservation[],
ctx?: ObserverBackendContext,
): Promise<StoredInstinct[]> {
const candidates = await analyzeWithActiveBackend(observations, ctx)
return candidates.map(candidate => createInstinct(candidate))
}
export const observeSession = analyzeObservations
function ensureSyncCandidates(
result: InstinctCandidate[] | Promise<InstinctCandidate[]>,
): InstinctCandidate[] {
if (Array.isArray(result)) return result
throw new Error(
'Active observer backend returned a Promise; use analyzeObservationsAsync instead',
)
}
function extractUserCorrections(
observations: StoredSkillObservation[],
): InstinctCandidate[] {
return observations.flatMap((observation, index) => {
if (observation.event !== 'user_message' || !observation.messageText) {
return []
}
const text = observation.messageText.trim()
const correction = parseCorrection(text)
if (!correction) return []
const base = candidateFromObservation(observation)
return [
{
...base,
trigger: correction.trigger,
action: correction.action,
confidence: 0.7,
domain: inferDomain(text),
source: 'session-observation',
scope: 'project',
evidence: [text],
evidenceOutcome: recentOutcomeBefore(observations, index),
observationIds: [observation.id],
},
]
})
}
function extractToolErrorResolutions(
observations: StoredSkillObservation[],
): InstinctCandidate[] {
const candidates: InstinctCandidate[] = []
for (let i = 0; i < observations.length; i++) {
const current = observations[i]
if (current.event !== 'tool_complete' || current.outcome !== 'failure') {
continue
}
const laterSuccess = observations.slice(i + 1, i + 6).find(next => {
return (
next.event === 'tool_complete' &&
next.outcome === 'success' &&
next.toolName === current.toolName
)
})
if (!laterSuccess || !current.toolName) continue
candidates.push({
...candidateFromObservation(current),
trigger: `When ${current.toolName} fails during this project`,
action: `Use the follow-up successful ${current.toolName} invocation as the resolution pattern before retrying blindly.`,
confidence: 0.5,
domain: 'debugging',
source: 'session-observation',
scope: 'project',
evidence: [
current.toolOutput ?? `${current.toolName} failed`,
laterSuccess.toolOutput ?? `${laterSuccess.toolName} succeeded`,
],
evidenceOutcome: 'success',
observationIds: [current.id, laterSuccess.id],
})
}
return candidates
}
function extractRepeatedToolSequences(
observations: StoredSkillObservation[],
options?: SessionObserverOptions,
): InstinctCandidate[] {
const minCount =
options?.minRepeatedSequenceCount ?? DEFAULT_MIN_REPEATED_SEQUENCE_COUNT
const toolEvents = observations.filter(
observation =>
observation.event === 'tool_start' ||
observation.event === 'tool_complete',
)
const names = toolEvents.map(observation => observation.toolName ?? '')
const sequence = ['Grep', 'Read', 'Edit']
const matchedIds: string[] = []
let count = 0
for (let i = 0; i <= names.length - sequence.length; i++) {
if (sequence.every((name, offset) => names[i + offset] === name)) {
count++
matchedIds.push(
...toolEvents.slice(i, i + sequence.length).map(o => o.id),
)
}
}
if (count < minCount) return []
const evidence = `Observed ${count} repeated Grep -> Read -> Edit workflow sequences.`
const first = toolEvents.find(event => matchedIds.includes(event.id))
const lastMatchedId = matchedIds[matchedIds.length - 1]
const lastEvent = toolEvents.find(event => event.id === lastMatchedId)
const sequenceOutcome =
lastEvent?.event === 'tool_complete' ? lastEvent.outcome : undefined
return [
{
...candidateFromObservation(first ?? observations[0]),
trigger: 'When changing code in this project',
action:
'Prefer the Grep -> Read -> Edit workflow: locate symbols, inspect context, then apply the smallest edit.',
confidence: count >= 3 ? 0.65 : 0.5,
domain: 'workflow',
source: 'session-observation',
scope: 'project',
evidence: [evidence],
evidenceOutcome: normalizeOutcome(sequenceOutcome),
observationIds: Array.from(new Set(matchedIds)),
},
]
}
function extractProjectConventions(
observations: StoredSkillObservation[],
): InstinctCandidate[] {
return observations.flatMap((observation, index) => {
if (observation.event !== 'user_message' || !observation.messageText) {
return []
}
const text = observation.messageText.trim()
if (!/(项目约定|规范|必须|convention|always|must)/i.test(text)) {
return []
}
return [
{
...candidateFromObservation(observation),
trigger: 'When working in this project',
action: `Follow the project convention: ${text}`,
// Single occurrence gets 0.4 so it stays below the 0.75 promotion
// threshold. Promotion requires corroborating high-confidence evidence
// (e.g. two 0.4s still average 0.4 — other signals must raise the mean).
confidence: 0.4,
domain: 'project',
source: 'session-observation',
scope: 'project',
evidence: [text],
evidenceOutcome: recentOutcomeBefore(observations, index),
observationIds: [observation.id],
},
]
})
}
function recentOutcomeBefore(
observations: StoredSkillObservation[],
index: number,
): SkillObservationOutcome | undefined {
for (let i = index - 1; i >= 0; i--) {
const prior = observations[i]
if (prior.event !== 'tool_complete') continue
return normalizeOutcome(prior.outcome)
}
return undefined
}
function normalizeOutcome(
outcome: StoredSkillObservation['outcome'],
): SkillObservationOutcome | undefined {
if (outcome === 'success' || outcome === 'failure' || outcome === 'unknown') {
return outcome
}
return undefined
}
function parseCorrection(
text: string,
): { trigger: string; action: string } | null {
const noUsePattern =
/(?:不要|别|不应(?:该)?|不要再)\s*(?<avoid>[^,。.;]+)[,\s]*(?:用|使用|改用|应该用|要用)\s*(?<prefer>[^,。.;]+)/i
const englishPattern =
/(?:do not|don't|avoid)\s+(?<avoid>[^,.;]+)[,;\s]+(?:use|prefer)\s+(?<prefer>[^,.;]+)/i
const shouldPattern =
/(?:你应该|应该先|must|should)\s*(?<prefer>[^,。.;]+)/i
const noUse = text.match(noUsePattern) ?? text.match(englishPattern)
if (noUse?.groups) {
const avoid = noUse.groups.avoid.trim()
const prefer = noUse.groups.prefer.trim()
return {
trigger: `When choosing between ${avoid} and ${prefer}`,
action: `Prefer ${prefer}; avoid ${avoid}.`,
}
}
const should = text.match(shouldPattern)
if (should?.groups) {
const prefer = should.groups.prefer.trim()
return {
trigger: 'When this user gives a corrective instruction',
action: `Prefer this corrected action: ${prefer}.`,
}
}
return null
}
function inferDomain(text: string): InstinctDomain {
const lowered = text.toLowerCase()
if (/test|mock|testing-library|vitest|jest|bun test/.test(lowered)) {
return 'testing'
}
if (/git|commit|branch/.test(lowered)) return 'git'
if (/security|secret|token|password/.test(lowered)) return 'security'
if (/style|format|lint|naming/.test(lowered)) return 'code-style'
return 'project'
}

View File

@@ -0,0 +1,499 @@
import { existsSync } from 'node:fs'
import { mkdir, readFile, rename, writeFile } from 'node:fs/promises'
import { createHash } from 'node:crypto'
import { dirname, join } from 'node:path'
import type { SearchResult } from '../skillSearch/localSearch.js'
import { createInstinct, type StoredInstinct } from './instinctParser.js'
import {
getProjectStorageDir,
resolveProjectContext,
} from './projectContext.js'
import { generateSkillDraft, writeLearnedSkill } from './skillGenerator.js'
import type {
InstinctDomain,
SkillGapStatus,
SkillLearningProjectContext,
} from './types.js'
export type SkillGapRecommendation = Pick<
SearchResult,
'name' | 'description' | 'score'
>
export type SkillGapMaterialization =
| {
type: 'draft'
name: string
skillPath: string
}
| {
type: 'active'
name: string
skillPath: string
}
export type SkillGapRecord = {
key: string
prompt: string
count: number
draftHits: number
// Session IDs that have already contributed a draft hit for this gap —
// prevents one session from inflating `draftHits` beyond 1 and flipping the
// `draftHits >= 2` active-promotion gate by itself.
draftHitSessions: string[]
status: SkillGapStatus
sessionId: string
cwd: string
projectId: string
projectName: string
recommendations: SkillGapRecommendation[]
createdAt: string
updatedAt: string
draft?: SkillGapMaterialization
active?: SkillGapMaterialization
}
// P0-2 hook: when outcome-aware observation lands, augment this with a
// lookup into observationStore for a matching `outcome: 'success'` tool_complete
// observation keyed by (sessionId, gap.key). Until then, draft promotion uses
// count/signal only.
const DRAFT_PROMOTION_COUNT = 2
const ACTIVE_PROMOTION_COUNT = 4
const ACTIVE_PROMOTION_DRAFT_HITS = 2
type SkillGapState = {
version: 1
gaps: Record<string, SkillGapRecord>
}
export type RecordSkillGapOptions = {
prompt: string
cwd?: string
sessionId?: string
recommendations?: SearchResult[]
project?: SkillLearningProjectContext
rootDir?: string
}
export async function recordSkillGap(
options: RecordSkillGapOptions,
): Promise<SkillGapRecord> {
const prompt = options.prompt.trim()
if (!prompt) {
throw new Error('Cannot record an empty skill gap')
}
const project = options.project ?? resolveProjectContext(options.cwd)
const state = await readSkillGapState(project, options.rootDir)
const key = buildSkillGapKey(prompt)
const now = new Date().toISOString()
const existing = state.gaps[key]
const gap: SkillGapRecord = {
key,
prompt,
count: (existing?.count ?? 0) + 1,
draftHits: existing?.draftHits ?? 0,
draftHitSessions: existing?.draftHitSessions ?? [],
status: existing?.status ?? 'pending',
sessionId: options.sessionId ?? 'unknown-session',
cwd: options.cwd ?? project.cwd,
projectId: project.projectId,
projectName: project.projectName,
recommendations: (options.recommendations ?? []).slice(0, 5).map(r => ({
name: r.name,
description: r.description,
score: r.score,
})),
createdAt: existing?.createdAt ?? now,
updatedAt: now,
draft: existing?.draft,
active: existing?.active,
}
if (gap.status === 'rejected') {
state.gaps[key] = gap
await writeSkillGapState(project, state, options.rootDir)
return gap
}
if (!gap.draft && shouldPromoteToDraft(gap)) {
gap.draft = await writeSkillGapDraft(gap, project)
gap.status = 'draft'
await clearRuntimeSkillCaches()
}
if (gap.draft && !gap.active && shouldPromoteToActive(gap)) {
gap.active = await writeActiveSkillForGap(gap, project)
gap.status = 'active'
await clearRuntimeSkillCaches()
}
state.gaps[key] = gap
await writeSkillGapState(project, state, options.rootDir)
return gap
}
export async function readSkillGaps(
project = resolveProjectContext(),
rootDir?: string,
): Promise<SkillGapRecord[]> {
const state = await readSkillGapState(project, rootDir)
return Object.values(state.gaps).sort((a, b) => a.key.localeCompare(b.key))
}
export async function findGapKeyByDraftPath(
draftPath: string,
project = resolveProjectContext(),
rootDir?: string,
): Promise<string | undefined> {
const state = await readSkillGapState(project, rootDir)
for (const gap of Object.values(state.gaps)) {
if (gap.draft?.skillPath === draftPath) return gap.key
}
return undefined
}
export async function recordDraftHit(
key: string,
project = resolveProjectContext(),
rootDir?: string,
sessionId = 'unknown-session',
): Promise<SkillGapRecord | undefined> {
const state = await readSkillGapState(project, rootDir)
const gap = state.gaps[key]
if (!gap || !gap.draft || gap.active) return gap
// One draft hit per session: a single actor reloading the same draft
// repeatedly must not flip the draftHits>=2 gate.
const existingSessions = gap.draftHitSessions ?? []
if (existingSessions.includes(sessionId)) return gap
const now = new Date().toISOString()
const updated: SkillGapRecord = {
...gap,
draftHits: gap.draftHits + 1,
draftHitSessions: [...existingSessions, sessionId],
updatedAt: now,
}
if (shouldPromoteToActive(updated)) {
updated.active = await writeActiveSkillForGap(updated, project)
updated.status = 'active'
await clearRuntimeSkillCaches()
}
state.gaps[key] = updated
await writeSkillGapState(project, state, rootDir)
return updated
}
export async function promoteGapToDraft(
key: string,
project = resolveProjectContext(),
rootDir?: string,
): Promise<SkillGapRecord | undefined> {
const state = await readSkillGapState(project, rootDir)
const gap = state.gaps[key]
if (!gap) return undefined
if (gap.status === 'rejected') return gap
if (gap.draft) return gap
const updated: SkillGapRecord = {
...gap,
draft: await writeSkillGapDraft(gap, project),
status: 'draft',
updatedAt: new Date().toISOString(),
}
state.gaps[key] = updated
await writeSkillGapState(project, state, rootDir)
await clearRuntimeSkillCaches()
return updated
}
export async function rejectSkillGap(
key: string,
project = resolveProjectContext(),
rootDir?: string,
): Promise<SkillGapRecord | undefined> {
const state = await readSkillGapState(project, rootDir)
const gap = state.gaps[key]
if (!gap) return undefined
const updated: SkillGapRecord = {
...gap,
status: 'rejected',
updatedAt: new Date().toISOString(),
}
state.gaps[key] = updated
await writeSkillGapState(project, state, rootDir)
return updated
}
export function shouldPromoteToDraft(gap: SkillGapRecord): boolean {
// Draft promotion now requires repeated occurrence. The legacy
// `isStrongReusableSignal` path was the cause of single-utterance Chinese
// exhortations being promoted straight to active — P0-2 will reintroduce
// outcome-aware signal once the observation layer supplies it.
return gap.count >= DRAFT_PROMOTION_COUNT
}
export function shouldPromoteToActive(gap: SkillGapRecord): boolean {
if (!gap.draft) return false
return (
gap.count >= ACTIVE_PROMOTION_COUNT ||
gap.draftHits >= ACTIVE_PROMOTION_DRAFT_HITS
)
}
async function writeSkillGapDraft(
gap: SkillGapRecord,
project: SkillLearningProjectContext,
): Promise<SkillGapMaterialization> {
const instinct = createGapInstinct(gap, 'pending')
const draftsRoot = join(
project.projectRoot ?? project.cwd,
'.claude',
'skills',
'.drafts',
)
const draft = generateSkillDraft([instinct], {
cwd: project.projectRoot ?? project.cwd,
outputRoot: draftsRoot,
scope: 'project',
name: `draft-${buildNameFragment(gap.prompt)}`,
description:
'Draft learned skill candidate. Promote after repeated evidence or explicit user correction.',
})
const skillFile = join(draft.outputPath, 'SKILL.md')
if (!existsSync(skillFile)) {
await writeLearnedSkill({
...draft,
content:
draft.content +
'\n## Promotion Rule\n\nDo not move this draft into active skills until the same gap repeats or the user explicitly confirms this should become reusable.\n',
})
}
return { type: 'draft', name: draft.name, skillPath: skillFile }
}
async function writeActiveSkillForGap(
gap: SkillGapRecord,
project: SkillLearningProjectContext,
): Promise<SkillGapMaterialization> {
const instinct = createGapInstinct(gap, 'active')
const draft = generateSkillDraft([instinct], {
cwd: project.projectRoot ?? project.cwd,
scope: 'project',
name: buildNameFragment(gap.prompt),
description: buildGapAction(gap.prompt),
})
const skillFile = join(draft.outputPath, 'SKILL.md')
if (!existsSync(skillFile)) {
await writeLearnedSkill(draft)
}
return { type: 'active', name: draft.name, skillPath: skillFile }
}
function createGapInstinct(
gap: SkillGapRecord,
status: StoredInstinct['status'],
): StoredInstinct {
return createInstinct({
trigger: `When the user asks for ${summarize(gap.prompt, 120)}`,
action: buildGapAction(gap.prompt),
confidence: status === 'active' ? 0.82 : 0.55,
domain: inferDomain(gap.prompt),
source: 'session-observation',
scope: 'project',
projectId: gap.projectId,
projectName: gap.projectName,
evidence: [
`Skill gap prompt: ${summarize(gap.prompt, 180)}`,
`No high-confidence active skill was auto-loaded.`,
`Observed ${gap.count} time(s).`,
],
status,
})
}
function buildGapAction(prompt: string): string {
if (
/feature\s*\(|feature flag|flag_name|stub|no-op|noop|最小实现/i.test(prompt)
) {
return 'Audit feature flags by scanning feature() call sites, excluding generated/dependency noise, classifying each candidate as stub, shell, MVP, or thin-toggle, and writing an evidence-backed document.'
}
if (/skill|技能|学习|进化|evolve|learning/i.test(prompt)) {
return 'Run skill discovery first; auto-load only high-confidence matching skills; record a skill gap when none match; promote repeated or corrected gaps into learned skills.'
}
if (/test|测试|stub|调用链|参数/i.test(prompt)) {
return 'Infer tests from existing files, parameters, exports, and call chains before simplifying mocks or inventing behavior.'
}
return `Reuse the workflow learned from this prompt: ${summarize(prompt, 180)}.`
}
function inferDomain(prompt: string): InstinctDomain {
const text = prompt.toLowerCase()
if (/test|测试|stub|fixture|断言/.test(text)) return 'testing'
if (/error|bug|fix|失败|错误|修复|debug/.test(text)) return 'debugging'
if (/security|安全|漏洞|secret|token/.test(text)) return 'security'
if (/git|commit|branch|pr\b/.test(text)) return 'git'
if (/style|lint|format|命名|规范/.test(text)) return 'code-style'
return 'workflow'
}
async function readSkillGapState(
project: SkillLearningProjectContext,
rootDir?: string,
): Promise<SkillGapState> {
const path = getSkillGapStatePath(project, rootDir)
let raw: string
try {
raw = await readFile(path, 'utf8')
} catch (error) {
// Only treat "file doesn't exist yet" as empty state. Every other error
// (EACCES, EIO, disk full, etc.) must throw — swallowing them here would
// let a subsequent write persist {} and zero out all gap records.
if ((error as NodeJS.ErrnoException).code === 'ENOENT') {
return { version: 1, gaps: {} }
}
throw error
}
try {
return migrateLegacyGapState(JSON.parse(raw) as SkillGapState)
} catch {
// Corrupt/truncated JSON — don't silently reset. Backup and start fresh,
// so the crash isn't masked and the data can be recovered manually.
const backup = `${path}.corrupt-${Date.now()}`
try {
await writeFile(backup, raw, 'utf8')
} catch {
/* best effort */
}
return { version: 1, gaps: {} }
}
}
function migrateLegacyGapState(state: SkillGapState): SkillGapState {
const migrated: Record<string, SkillGapRecord> = {}
for (const [key, record] of Object.entries(state.gaps ?? {})) {
const legacy = record as Partial<SkillGapRecord> & {
status?: unknown
}
const draftHits =
typeof legacy.draftHits === 'number' && Number.isFinite(legacy.draftHits)
? legacy.draftHits
: 0
const count = typeof legacy.count === 'number' ? legacy.count : 1
const normalizedStatus = normalizeLegacyStatus(legacy.status)
const hasDraftFile = Boolean(legacy.draft)
const hasActiveFile = Boolean(legacy.active)
let status: SkillGapStatus = normalizedStatus
if (status === 'draft' && count < DRAFT_PROMOTION_COUNT && !hasDraftFile) {
// Legacy first-call-writes-draft artifact with no file on disk yet.
status = 'pending'
}
if (status === 'active' && !hasActiveFile) {
status = hasDraftFile ? 'draft' : 'pending'
}
const draftHitSessions = Array.isArray(legacy.draftHitSessions)
? legacy.draftHitSessions.filter(
(session): session is string => typeof session === 'string',
)
: []
migrated[key] = {
...(record as SkillGapRecord),
count,
draftHits,
draftHitSessions,
status,
}
}
return { version: 1, gaps: migrated }
}
function normalizeLegacyStatus(value: unknown): SkillGapStatus {
if (
value === 'pending' ||
value === 'draft' ||
value === 'active' ||
value === 'rejected'
) {
return value
}
return 'pending'
}
async function writeSkillGapState(
project: SkillLearningProjectContext,
state: SkillGapState,
rootDir?: string,
): Promise<void> {
const path = getSkillGapStatePath(project, rootDir)
await mkdir(dirname(path), { recursive: true })
// Atomic write: temp + rename. A direct writeFile leaves a truncated file
// on crash mid-write; combined with the (now strict) readSkillGapState,
// that would lose gap records.
const tmpPath = `${path}.tmp-${process.pid}-${Date.now()}`
await writeFile(tmpPath, `${JSON.stringify(state, null, 2)}\n`, 'utf8')
await rename(tmpPath, path)
}
function getSkillGapStatePath(
project: SkillLearningProjectContext,
rootDir?: string,
): string {
const base = rootDir
? project.projectId === 'global'
? join(rootDir, 'global')
: join(rootDir, 'projects', project.projectId)
: getProjectStorageDir(project.projectId)
return join(base, 'skill-gaps.json')
}
function buildSkillGapKey(prompt: string): string {
return `${buildNameFragment(prompt)}-${hash(prompt).slice(0, 8)}`
}
function buildNameFragment(prompt: string): string {
const mapped = prompt
.replaceAll('技能', ' skill ')
.replaceAll('学习', ' learning ')
.replaceAll('进化', ' evolution ')
.replaceAll('测试', ' testing ')
.replaceAll('最小实现', ' minimal implementation ')
.toLowerCase()
const stop = new Set([
'the',
'and',
'for',
'with',
'this',
'that',
'user',
'about',
'feature',
'flag',
'name',
])
const words = (mapped.match(/[a-z0-9][a-z0-9_-]{2,}/g) ?? [])
.filter(word => !stop.has(word))
.slice(0, 5)
const value = words.join('-') || 'learned-gap'
return value.slice(0, 54).replace(/-+$/g, '')
}
function summarize(value: string, max: number): string {
return value.replace(/\s+/g, ' ').trim().slice(0, max)
}
function hash(value: string): string {
return createHash('sha1').update(value).digest('hex')
}
async function clearRuntimeSkillCaches(): Promise<void> {
try {
const { clearCommandsCache } = await import('../../commands.js')
clearCommandsCache()
} catch {
// Best effort only; generated skill files are still available next process.
}
}

View File

@@ -0,0 +1,206 @@
import { mkdir, readFile, writeFile } from 'node:fs/promises'
import { join } from 'node:path'
import { getClaudeConfigHomeDir } from '../../utils/envUtils.js'
import { clearSkillIndexCache } from '../skillSearch/localSearch.js'
import type { Instinct } from './instinctParser.js'
import { buildLearnedSkillName, normalizeSkillName } from './learningPolicy.js'
import {
compareExistingArtifacts,
scoreArtifactOverlap,
type ExistingSkill,
} from './skillLifecycle.js'
import type { LearnedSkillDraft, SkillLearningScope } from './types.js'
export const DUPLICATE_SKILL_OVERLAP_THRESHOLD = 0.8
export type SkillGeneratorOptions = {
cwd?: string
globalSkillsDir?: string
outputRoot?: string
name?: string
description?: string
}
export function generateSkillDraft(
instincts: Instinct[],
options?: SkillGeneratorOptions & { scope?: SkillLearningScope },
): LearnedSkillDraft {
if (instincts.length === 0) {
throw new Error('Cannot generate a skill draft without instincts')
}
const scope = options?.scope ?? instincts[0]?.scope ?? 'project'
const name = options?.name
? normalizeSkillName(options.name)
: buildSkillName(instincts)
const confidence =
instincts.reduce((sum, instinct) => sum + instinct.confidence, 0) /
instincts.length
const description = options?.description ?? buildDescription(instincts)
const outputPath = getLearnedSkillPath(name, scope, options)
const content = buildSkillContent({
name,
description,
confidence,
instincts,
})
return {
name,
description,
scope,
sourceInstinctIds: instincts.map(instinct => instinct.id),
confidence: Number(confidence.toFixed(2)),
content,
outputPath,
}
}
export type SkillDedupOutcome =
| { action: 'create'; draft: LearnedSkillDraft }
| {
action: 'append-evidence'
target: ExistingSkill
overlap: number
appendedPath: string
}
export async function generateOrMergeSkillDraft(
instincts: Instinct[],
options: SkillGeneratorOptions & { scope?: SkillLearningScope },
existingRoots: string[],
): Promise<SkillDedupOutcome> {
const draft = generateSkillDraft(instincts, options)
const candidates = await compareExistingArtifacts(
'skill',
draft,
existingRoots,
)
for (const candidate of candidates) {
const overlap = scoreArtifactOverlap(draft, candidate)
if (overlap >= DUPLICATE_SKILL_OVERLAP_THRESHOLD) {
const appendedPath = await appendInstinctEvidenceToSkill(
candidate,
instincts,
)
return {
action: 'append-evidence',
target: candidate,
overlap,
appendedPath,
}
}
}
return { action: 'create', draft }
}
export async function appendInstinctEvidenceToSkill(
target: ExistingSkill,
instincts: Instinct[],
): Promise<string> {
const existing = await readFile(target.path, 'utf8').catch(
() => target.content,
)
const now = new Date().toISOString()
const block = [
'',
`## Learned evidence (${now})`,
'',
...instincts.flatMap(instinct =>
instinct.evidence.map(evidence => `- ${evidence}`),
),
'',
].join('\n')
const merged = existing.endsWith('\n')
? existing + block
: `${existing}\n${block}`
await writeFile(target.path, merged, 'utf8')
clearSkillIndexCache()
return target.path
}
export async function writeLearnedSkill(
draft: LearnedSkillDraft,
): Promise<string> {
await mkdir(draft.outputPath, { recursive: true })
const filePath = join(draft.outputPath, 'SKILL.md')
await writeFile(filePath, draft.content, 'utf8')
clearSkillIndexCache()
try {
const { clearCommandsCache } = await import('../../commands.js')
clearCommandsCache()
} catch {
// Best effort: the next process will see the generated skill even if the
// in-process command cache cannot be cleared due to import timing.
}
return filePath
}
export function getLearnedSkillPath(
name: string,
scope: SkillLearningScope,
options?: SkillGeneratorOptions,
): string {
if (options?.outputRoot) return join(options.outputRoot, name)
if (scope === 'project') {
return join(options?.cwd ?? process.cwd(), '.claude', 'skills', name)
}
return join(
options?.globalSkillsDir ?? join(getClaudeConfigHomeDir(), 'skills'),
name,
)
}
function buildSkillName(instincts: Instinct[]): string {
return buildLearnedSkillName(instincts)
}
function buildDescription(instincts: Instinct[]): string {
const action = instincts[0]?.action ?? 'Apply a learned project pattern'
const short = action.replace(/\s+/g, ' ').slice(0, 120)
return short.length > 0 ? short : 'Apply learned project patterns'
}
function buildSkillContent(params: {
name: string
description: string
confidence: number
instincts: Instinct[]
}): string {
const { name, description, confidence, instincts } = params
const lines = [
'---',
`name: ${name}`,
`description: ${JSON.stringify(description)}`,
'origin: skill-learning',
`confidence: ${Number(confidence.toFixed(2))}`,
`evolved_from: [${instincts.map(instinct => JSON.stringify(instinct.id)).join(', ')}]`,
'---',
'',
`# ${titleCase(name)}`,
'',
'## Trigger',
'',
instincts.map(instinct => `- ${instinct.trigger}`).join('\n'),
'',
'## Action',
'',
instincts.map(instinct => `- ${instinct.action}`).join('\n'),
'',
'## Evidence',
'',
instincts
.flatMap(instinct => instinct.evidence.map(evidence => `- ${evidence}`))
.join('\n'),
'',
]
return lines.join('\n')
}
function titleCase(value: string): string {
return value
.split('-')
.filter(Boolean)
.map(part => part[0]?.toUpperCase() + part.slice(1))
.join(' ')
}

View File

@@ -0,0 +1,496 @@
import {
mkdir,
readdir,
readFile,
rename,
rm,
writeFile,
} from 'node:fs/promises'
import { existsSync } from 'node:fs'
import { basename, dirname, join } from 'node:path'
import { clearSkillIndexCache } from '../skillSearch/localSearch.js'
import type { LearnedSkillDraft } from './types.js'
import { writeLearnedSkill } from './skillGenerator.js'
export type ExistingSkill = {
name: string
path: string
description: string
content: string
confidence?: number
status?: 'active' | 'superseded' | 'archived' | 'deleted'
referencedBy?: string[]
safeToDelete?: boolean
quality?: 'low' | 'medium' | 'high'
}
export type SkillLifecycleDecision =
| { type: 'create'; draft: LearnedSkillDraft; reason: string }
| { type: 'merge'; targetSkill: ExistingSkill; patch: string; reason: string }
| {
type: 'replace'
targetSkill: ExistingSkill
draft: LearnedSkillDraft
reason: string
hardDelete?: boolean
}
| { type: 'archive'; targetSkill: ExistingSkill; reason: string }
| {
type: 'delete'
targetSkill: ExistingSkill
reason: string
confirmed?: boolean
}
export type ReplacementManifest = {
oldSkill: string
oldPath: string
newSkill?: string
newPath?: string
action: 'archive' | 'delete'
reason: string
replacedAt: string
recoverable: boolean
}
export type SkillLifecycleOptions = {
allowHardDelete?: boolean
archiveRoot?: string
manifestRoot?: string
now?: Date
}
export type LearnedArtifactKind = 'skill' | 'command' | 'agent'
export type ArtifactDraft = {
name: string
description: string
content: string
}
export async function compareExistingArtifacts(
kind: LearnedArtifactKind,
draft: ArtifactDraft,
rootsOrSkills: string[] | ExistingSkill[],
): Promise<ExistingSkill[]> {
const existing =
rootsOrSkills.length > 0 && typeof rootsOrSkills[0] === 'string'
? await loadExistingArtifacts(kind, rootsOrSkills as string[])
: (rootsOrSkills as ExistingSkill[])
const draftTerms = terms(
`${draft.name} ${draft.description} ${draft.content}`,
)
return existing
.map(skill => ({
skill,
score: overlapScore(
draftTerms,
terms(`${skill.name} ${skill.description} ${skill.content}`),
),
}))
.filter(item => item.score >= 0.18)
.sort((a, b) => b.score - a.score)
.map(item => item.skill)
}
export async function compareExistingSkills(
draft: LearnedSkillDraft,
rootsOrSkills: string[] | ExistingSkill[],
): Promise<ExistingSkill[]> {
return compareExistingArtifacts('skill', draft, rootsOrSkills)
}
export async function loadExistingArtifacts(
kind: LearnedArtifactKind,
roots: string[],
): Promise<ExistingSkill[]> {
if (kind === 'skill') return loadExistingSkills(roots)
const results: ExistingSkill[] = []
for (const root of roots) {
if (!existsSync(root)) continue
await collectArtifactFiles(root, results)
}
return results
}
export function decideSkillLifecycle(
draft: LearnedSkillDraft,
existingSkills: ExistingSkill[],
options: Pick<SkillLifecycleOptions, 'allowHardDelete'> = {},
): SkillLifecycleDecision {
const deletable = existingSkills.find(skill => isSafeToHardDelete(skill))
if (options.allowHardDelete && deletable) {
return {
type: 'delete',
targetSkill: deletable,
reason:
'Existing skill is low quality, unreferenced, and safe to delete.',
confirmed: true,
}
}
const target = existingSkills[0]
if (!target) {
return {
type: 'create',
draft,
reason: 'No overlapping active skill found.',
}
}
const draftTerms = terms(
`${draft.name} ${draft.description} ${draft.content}`,
)
const existingTerms = terms(
`${target.name} ${target.description} ${target.content}`,
)
const score = overlapScore(draftTerms, existingTerms)
if (
score >= 0.72 &&
draft.confidence >= 0.75 &&
shouldReplaceSkill(draft, target)
) {
return {
type: 'replace',
targetSkill: target,
draft,
reason: `New learned skill has high overlap (${score.toFixed(2)}) and higher confidence.`,
}
}
if (score >= 0.35) {
return {
type: 'merge',
targetSkill: target,
patch: buildMergePatch(draft),
reason: `Existing skill overlaps with the learned pattern (${score.toFixed(2)}).`,
}
}
return { type: 'create', draft, reason: 'Overlap is too low to merge.' }
}
export async function applySkillLifecycleDecision(
decision: SkillLifecycleDecision,
options: SkillLifecycleOptions = {},
): Promise<{
activePath?: string
archivedPath?: string
deletedPath?: string
manifestPath?: string
tombstonePath?: string
}> {
switch (decision.type) {
case 'create': {
return { activePath: await writeLearnedSkill(decision.draft) }
}
case 'merge': {
if (!isSkillLearningGenerated(decision.targetSkill)) {
process.stderr.write(
`[skill-learning] skip user-authored skill: ${decision.targetSkill.path}\n`,
)
return {}
}
return {
activePath: await writeMergePatch(decision.targetSkill, decision.patch),
}
}
case 'replace': {
if (!isSkillLearningGenerated(decision.targetSkill)) {
process.stderr.write(
`[skill-learning] skip user-authored skill: ${decision.targetSkill.path}\n`,
)
return {}
}
// Archive/delete the superseded skill before the replacement is
// written so that any search-index refresh between the two steps can
// never observe both skills active simultaneously. `decision.draft
// .outputPath` is the exact path `writeLearnedSkill` will target.
const predictedNewPath = decision.draft.outputPath
if (decision.hardDelete) {
const { deletedPath, manifestPath, tombstonePath } = await deleteSkill(
decision.targetSkill,
decision.reason,
{
newSkill: decision.draft.name,
newPath: predictedNewPath,
},
{ ...options, allowHardDelete: true },
)
const activePath = await writeLearnedSkill(decision.draft)
return { activePath, deletedPath, manifestPath, tombstonePath }
}
const { archivedPath, manifestPath } = await archiveSkill(
decision.targetSkill,
decision.reason,
{
newSkill: decision.draft.name,
newPath: predictedNewPath,
},
options,
)
const activePath = await writeLearnedSkill(decision.draft)
return { activePath, archivedPath, manifestPath }
}
case 'archive':
return await archiveSkill(
decision.targetSkill,
decision.reason,
undefined,
options,
)
case 'delete':
return await deleteSkill(
decision.targetSkill,
decision.reason,
undefined,
{
...options,
allowHardDelete:
options.allowHardDelete && decision.confirmed !== false,
},
)
}
}
export async function loadExistingSkills(
roots: string[],
): Promise<ExistingSkill[]> {
const skills: ExistingSkill[] = []
for (const root of roots) {
if (!existsSync(root)) continue
await collectSkillFiles(root, skills)
}
return skills
}
export async function archiveSkill(
skill: ExistingSkill,
reason: string,
replacement?: { newSkill?: string; newPath?: string },
options: SkillLifecycleOptions = {},
): Promise<{ archivedPath: string; manifestPath: string }> {
const skillDir = dirname(skill.path)
const archiveRoot = options.archiveRoot ?? join(dirname(skillDir), '.archive')
const archivedPath = join(
archiveRoot,
`${basename(skillDir)}-${timestamp(options.now)}`,
)
await mkdir(archiveRoot, { recursive: true })
await rename(skillDir, archivedPath)
const manifestPath = await writeReplacementManifest(
options.manifestRoot ?? archivedPath,
{
oldSkill: skill.name,
oldPath: skill.path,
newSkill: replacement?.newSkill,
newPath: replacement?.newPath,
action: 'archive',
reason,
replacedAt: (options.now ?? new Date()).toISOString(),
recoverable: true,
},
)
clearSkillIndexCache()
return { archivedPath, manifestPath }
}
export async function deleteSkill(
skill: ExistingSkill,
reason: string,
replacement?: { newSkill?: string; newPath?: string },
options: SkillLifecycleOptions = {},
): Promise<{
deletedPath: string
manifestPath: string
tombstonePath: string
}> {
if (!options.allowHardDelete) {
throw new Error('Hard delete requires allowHardDelete=true')
}
const skillDir = dirname(skill.path)
const content = existsSync(skill.path)
? await readFile(skill.path, 'utf8')
: ''
const manifestRoot =
options.manifestRoot ?? join(dirname(skillDir), '.tombstones')
const manifestPath = await writeReplacementManifest(manifestRoot, {
oldSkill: skill.name,
oldPath: skill.path,
newSkill: replacement?.newSkill,
newPath: replacement?.newPath,
action: 'delete',
reason,
replacedAt: (options.now ?? new Date()).toISOString(),
recoverable: false,
})
const tombstonePath = join(
manifestRoot,
`${skill.name}-${timestamp(options.now)}.tombstone.json`,
)
await writeFile(
tombstonePath,
`${JSON.stringify({ deletedSkill: skill.name, oldPath: skill.path, content }, null, 2)}\n`,
'utf8',
)
await rm(skillDir, { recursive: true, force: true })
clearSkillIndexCache()
return { deletedPath: skill.path, manifestPath, tombstonePath }
}
export async function writeReplacementManifest(
directory: string,
manifest: ReplacementManifest,
): Promise<string> {
await mkdir(directory, { recursive: true })
const manifestPath = join(directory, 'replacement-manifest.json')
await writeFile(
manifestPath,
`${JSON.stringify(manifest, null, 2)}\n`,
'utf8',
)
return manifestPath
}
async function writeMergePatch(
skill: ExistingSkill,
patch: string,
): Promise<string> {
const patchPath = join(dirname(skill.path), 'learned-skill.patch.md')
await writeFile(patchPath, patch, 'utf8')
clearSkillIndexCache()
return patchPath
}
function buildMergePatch(draft: LearnedSkillDraft): string {
return [
'# Learned Skill Merge Patch',
'',
`Target learned skill: ${draft.name}`,
`Confidence: ${draft.confidence}`,
'',
'## Suggested additions',
'',
draft.content,
].join('\n')
}
function shouldReplaceSkill(
draft: LearnedSkillDraft,
target: ExistingSkill,
): boolean {
if (target.status === 'superseded' || target.status === 'archived')
return true
const confidenceGap = draft.confidence - (target.confidence ?? 0.5)
const contentGap = draft.content.length - target.content.length
return confidenceGap >= 0.15 || contentGap > 160
}
function isSafeToHardDelete(skill: ExistingSkill): boolean {
return (
skill.safeToDelete === true &&
(skill.referencedBy?.length ?? 0) === 0 &&
skill.quality === 'low'
)
}
function timestamp(date = new Date()): string {
return date.toISOString().replace(/[:.]/g, '-')
}
async function collectSkillFiles(
root: string,
results: ExistingSkill[],
): Promise<void> {
const entries = await readdir(root, { withFileTypes: true })
for (const entry of entries) {
const full = join(root, entry.name)
if (entry.isDirectory()) {
if (entry.name === '.archive') continue
await collectSkillFiles(full, results)
continue
}
if (entry.isFile() && entry.name === 'SKILL.md') {
const content = await readFile(full, 'utf8')
results.push({
name: parseFrontmatter(content, 'name') ?? basename(dirname(full)),
description: parseFrontmatter(content, 'description') ?? '',
path: full,
content,
})
}
}
}
async function collectArtifactFiles(
root: string,
results: ExistingSkill[],
): Promise<void> {
const entries = await readdir(root, { withFileTypes: true })
for (const entry of entries) {
const full = join(root, entry.name)
if (entry.isDirectory()) {
if (entry.name === '.archive') continue
await collectArtifactFiles(full, results)
continue
}
if (entry.isFile() && entry.name.endsWith('.md')) {
const content = await readFile(full, 'utf8')
results.push({
name:
parseFrontmatter(content, 'name') ?? entry.name.replace(/\.md$/, ''),
description: parseFrontmatter(content, 'description') ?? '',
path: full,
content,
})
}
}
}
function parseFrontmatter(content: string, key: string): string | undefined {
// Restrict the search to the actual YAML frontmatter block between the
// opening `---` and the next `---`. A naked body line like
// `origin: skill-learning` in a user-authored doc must NOT be mistaken
// for a generated-skill marker.
const fmMatch = content.match(/^---\r?\n([\s\S]*?)\r?\n---/)
if (!fmMatch) return undefined
const match = fmMatch[1].match(new RegExp(`^${key}:\\s*"?([^"\\n]+)"?`, 'm'))
return match?.[1]?.trim()
}
function isSkillLearningGenerated(skill: ExistingSkill): boolean {
return parseFrontmatter(skill.content, 'origin') === 'skill-learning'
}
function terms(value: string): Set<string> {
return new Set(
value
.toLowerCase()
.split(/[^a-z0-9]+/)
.filter(term => term.length > 2),
)
}
function overlapScore(a: Set<string>, b: Set<string>): number {
if (a.size === 0 || b.size === 0) return 0
let intersection = 0
for (const term of a) {
if (b.has(term)) intersection++
}
return intersection / Math.min(a.size, b.size)
}
export function scoreArtifactOverlap(
draft: ArtifactDraft,
existing: { name: string; description: string; content: string },
): number {
const draftTerms = terms(
`${draft.name} ${draft.description} ${draft.content}`,
)
const existingTerms = terms(
`${existing.name} ${existing.description} ${existing.content}`,
)
return overlapScore(draftTerms, existingTerms)
}

View File

@@ -0,0 +1,312 @@
import { randomUUID } from 'node:crypto'
import {
appendObservation,
type StoredSkillObservation,
} from './observationStore.js'
import type {
SkillLearningProjectContext,
SkillObservationOutcome,
} from './types.js'
import { logForDebugging } from '../../utils/debug.js'
import { logError } from '../../utils/log.js'
/**
* Tool event hook layer.
*
* Preferred observation pathway: consumers (tool dispatcher, REPL turn loop,
* or integration tests) call `recordToolStart` / `recordToolComplete` /
* `recordToolError` / `recordUserCorrection` as tool-level events happen,
* producing deterministic observations with `source: 'tool-hook'`.
*
* Post-sampling reconstruction (runtimeObserver.observationsFromMessages)
* is retained as a fallback for environments where the caller cannot emit
* tool events directly.
*
* @todo Wire these functions into `src/Tool.ts`'s public dispatch so the
* main REPL tool loop produces tool-hook observations automatically.
* Until then, callers that do have tool-level signal (integration
* tests, custom harness code, future tool middleware) can use the
* functions here directly.
*/
export type ToolHookContext = {
sessionId: string
turn: number
projectId: string
projectName: string
cwd: string
project?: SkillLearningProjectContext
}
/** Maximum number of turns tracked per session before pruning. */
const EMITTED_TURNS_SET_MAX = 500
/** How many turns to retain after pruning a session Set. */
const EMITTED_TURNS_SET_KEEP = 250
/** Maximum number of sessions tracked in the Map before pruning. */
const EMITTED_TURNS_MAP_MAX = 50
/** How many sessions to retain after pruning the Map. */
const EMITTED_TURNS_MAP_KEEP = 25
const emittedTurns = new Map<string, Set<number>>()
/**
* Prune `emittedTurns` to stay within memory bounds.
*
* - If any session's Set exceeds `EMITTED_TURNS_SET_MAX` entries, retain only
* the most recent `EMITTED_TURNS_SET_KEEP` turn numbers (FIFO trim).
* - If the Map itself exceeds `EMITTED_TURNS_MAP_MAX` entries, delete the
* oldest `EMITTED_TURNS_MAP_MAX - EMITTED_TURNS_MAP_KEEP` sessions
* (insertion-order LRU).
*
* Exported so tests and `resetToolHookBookkeeping` callers can invoke it
* directly.
*/
export function pruneEmittedTurns(): void {
// Prune over-sized Sets first. FIFO by insertion order — NOT by turn
// number magnitude. Non-monotonic turn ordering (e.g. replayed transcripts
// or nested tool chains) should not cause us to evict the wrong entries.
for (const [sessionId, turns] of emittedTurns) {
if (turns.size > EMITTED_TURNS_SET_MAX) {
const iter = turns.values()
const toDrop = turns.size - EMITTED_TURNS_SET_KEEP
for (let i = 0; i < toDrop; i++) {
const next = iter.next()
if (next.done) break
turns.delete(next.value)
}
}
}
// Prune over-sized Map (delete oldest insertion-order entries).
if (emittedTurns.size > EMITTED_TURNS_MAP_MAX) {
const toDelete = emittedTurns.size - EMITTED_TURNS_MAP_KEEP
let deleted = 0
for (const key of emittedTurns.keys()) {
if (deleted >= toDelete) break
emittedTurns.delete(key)
deleted++
}
}
}
function markTurn(sessionId: string, turn: number): void {
// Refresh Map insertion order: delete + re-set so a recently-touched
// session is treated as "youngest" for the LRU-ish Map eviction.
const seen = emittedTurns.get(sessionId) ?? new Set<number>()
seen.add(turn)
emittedTurns.delete(sessionId)
emittedTurns.set(sessionId, seen)
pruneEmittedTurns()
}
export function hasToolHookObservationsForTurn(
sessionId: string,
turn: number,
): boolean {
return emittedTurns.get(sessionId)?.has(turn) ?? false
}
export function resetToolHookBookkeeping(): void {
emittedTurns.clear()
}
function baseObservation(
ctx: ToolHookContext,
): Pick<
StoredSkillObservation,
| 'id'
| 'sessionId'
| 'projectId'
| 'projectName'
| 'cwd'
| 'timestamp'
| 'source'
| 'turn'
> {
return {
id: randomUUID(),
sessionId: ctx.sessionId,
projectId: ctx.projectId,
projectName: ctx.projectName,
cwd: ctx.cwd,
timestamp: new Date().toISOString(),
source: 'tool-hook',
// Persist turn so runtimeObserver can filter tool-hook observations by
// the current turn rather than sweeping all historical tool-hook data
// (codex review Q1).
turn: ctx.turn,
}
}
// Cached import promise — resolved once so the hot path pays no repeated
// dynamic-import overhead after the first invocation.
let _depImportCache:
| Promise<{
resolveProjectContext: (cwd: string) => SkillLearningProjectContext
isSkillLearningEnabled: () => boolean
RUNTIME_SESSION_ID: string
getRuntimeTurn: () => number
}>
| undefined
function _getDeps() {
if (!_depImportCache) {
_depImportCache = Promise.all([
import('./projectContext.js'),
import('./featureCheck.js'),
import('./runtimeObserver.js'),
]).then(([pc, fc, ro]) => ({
resolveProjectContext: pc.resolveProjectContext,
isSkillLearningEnabled: fc.isSkillLearningEnabled,
RUNTIME_SESSION_ID: ro.RUNTIME_SESSION_ID,
getRuntimeTurn: ro.getRuntimeTurn,
}))
}
return _depImportCache
}
/** Reset the cached dep import (for test isolation). */
export function resetToolHookDepsCache(): void {
_depImportCache = undefined
}
/**
* Wrap a tool.call invocation with deterministic tool-event observation.
*
* Designed for the single call site in `toolExecution.ts`. The hook calls
* (`recordToolStart`, `recordToolComplete`, `recordToolError`) are true
* fire-and-forget: the tool invoke result is returned immediately without
* waiting for the observation to persist. Errors in observation are caught
* and logged so they never surface to the caller.
*/
export async function runToolCallWithSkillLearningHooks<T>(
toolName: string,
input: unknown,
callContext: { sessionId?: string; turn?: number },
invoke: () => Promise<T>,
): Promise<T> {
let ctx: ToolHookContext | undefined
try {
const {
resolveProjectContext,
isSkillLearningEnabled,
RUNTIME_SESSION_ID,
getRuntimeTurn,
} = await _getDeps()
if (!isSkillLearningEnabled()) {
return invoke()
}
const project = resolveProjectContext(process.cwd())
// Always emit under the runtime observer's sessionId so the post-sampling
// consumer can find our records. The prior default `'cli'` fell outside
// the observer's sessionId filter and made tool-hook observations
// structurally unconsumable (codex second-pass audit AC1).
ctx = {
sessionId: callContext.sessionId ?? RUNTIME_SESSION_ID,
turn: callContext.turn ?? getRuntimeTurn(),
projectId: project.projectId,
projectName: project.projectName,
cwd: project.cwd,
project,
}
// Fire-and-forget: do NOT await — tool invoke must not be blocked.
void recordToolStart(ctx, toolName, input).catch(e => {
logForDebugging('skill-learning: recordToolStart error')
logError(e)
})
} catch (e) {
// Never let observation setup errors affect tool execution.
logForDebugging('skill-learning: hook setup error')
logError(e)
}
try {
const result = await invoke()
if (ctx) {
// Fire-and-forget: do NOT await.
void recordToolComplete(ctx, toolName, result, 'success').catch(e => {
logForDebugging('skill-learning: recordToolComplete error')
logError(e)
})
}
return result
} catch (error) {
if (ctx) {
// Fire-and-forget: do NOT await.
void recordToolError(ctx, toolName, error).catch(e => {
logForDebugging('skill-learning: recordToolError error')
logError(e)
})
}
throw error
}
}
export async function recordToolStart(
ctx: ToolHookContext,
toolName: string,
input?: unknown,
): Promise<StoredSkillObservation> {
markTurn(ctx.sessionId, ctx.turn)
const observation: StoredSkillObservation = {
...baseObservation(ctx),
event: 'tool_start',
toolName,
toolInput: stringify(input),
}
return appendObservation(observation, { project: ctx.project })
}
export async function recordToolComplete(
ctx: ToolHookContext,
toolName: string,
output?: unknown,
outcome: SkillObservationOutcome = 'success',
): Promise<StoredSkillObservation> {
markTurn(ctx.sessionId, ctx.turn)
const observation: StoredSkillObservation = {
...baseObservation(ctx),
event: 'tool_complete',
toolName,
toolOutput: stringify(output),
outcome,
}
return appendObservation(observation, { project: ctx.project })
}
export async function recordToolError(
ctx: ToolHookContext,
toolName: string,
error: unknown,
): Promise<StoredSkillObservation> {
markTurn(ctx.sessionId, ctx.turn)
const observation: StoredSkillObservation = {
...baseObservation(ctx),
event: 'tool_complete',
toolName,
toolOutput: stringify(error),
outcome: 'failure',
}
return appendObservation(observation, { project: ctx.project })
}
export async function recordUserCorrection(
ctx: ToolHookContext,
messageText: string,
): Promise<StoredSkillObservation> {
markTurn(ctx.sessionId, ctx.turn)
const observation: StoredSkillObservation = {
...baseObservation(ctx),
event: 'user_message',
messageText,
}
return appendObservation(observation, { project: ctx.project })
}
function stringify(value: unknown): string | undefined {
if (value === undefined || value === null) return undefined
if (typeof value === 'string') return value
try {
return JSON.stringify(value)
} catch {
return String(value)
}
}

View File

@@ -0,0 +1,109 @@
export type SkillLearningScope = 'project' | 'global'
export type SkillGapStatus = 'pending' | 'draft' | 'active' | 'rejected'
export type SkillObservationEvent =
| 'user_message'
| 'assistant_message'
| 'tool_start'
| 'tool_complete'
| 'tool_error'
export type SkillObservationOutcome = 'success' | 'failure' | 'unknown'
export const INSTINCT_DOMAINS = [
'workflow',
'testing',
'debugging',
'code-style',
'security',
'git',
'project',
] as const
export type InstinctDomain = (typeof INSTINCT_DOMAINS)[number]
export type InstinctSource =
| 'session-observation'
| 'repo-analysis'
| 'imported'
export type InstinctStatus =
| 'pending'
| 'active'
| 'stale'
| 'superseded'
| 'retired'
| 'archived'
| 'conflict-hold'
export type ProjectContextSource =
| 'claude_project_dir'
| 'git_remote'
| 'git_root'
| 'global'
export interface SkillObservation {
id: string
timestamp: string
event: SkillObservationEvent
sessionId: string
projectId: string
projectName: string
cwd: string
toolName?: string
toolInput?: unknown
toolOutput?: unknown
messageText?: string
outcome?: SkillObservationOutcome
}
export interface Instinct {
id: string
trigger: string
action: string
confidence: number
domain: InstinctDomain
source: InstinctSource
scope: SkillLearningScope
projectId?: string
projectName?: string
evidence: string[]
evidenceOutcome?: SkillObservationOutcome
createdAt: string
updatedAt: string
status: InstinctStatus
}
export interface LearnedSkillDraft {
name: string
description: string
scope: SkillLearningScope
sourceInstinctIds: string[]
confidence: number
content: string
outputPath: string
}
export interface SkillLearningProjectContext {
projectId: string
projectName: string
scope: SkillLearningScope
source: ProjectContextSource
cwd: string
projectRoot?: string
gitRemote?: string
storageDir: string
}
export interface SkillLearningProjectRecord
extends SkillLearningProjectContext {
firstSeenAt: string
lastSeenAt: string
}
export interface SkillLearningProjectsRegistry {
version: 1
updatedAt: string
projects: Record<string, SkillLearningProjectRecord>
}