mirror of
https://github.com/claude-code-best/claude-code.git
synced 2026-06-18 14:25:51 +00:00
feat: 添加 skill learning 技能学习闭环系统
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
152
src/services/skillLearning/__tests__/evolution.test.ts
Normal file
152
src/services/skillLearning/__tests__/evolution.test.ts
Normal file
@@ -0,0 +1,152 @@
|
||||
import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
|
||||
import { mkdtempSync, rmSync } from 'node:fs'
|
||||
import { tmpdir } from 'node:os'
|
||||
import { join } from 'node:path'
|
||||
import { createInstinct } from '../instinctParser.js'
|
||||
import {
|
||||
classifyEvolutionTarget,
|
||||
clusterInstincts,
|
||||
generateAgentCandidates,
|
||||
generateCommandCandidates,
|
||||
generateSkillCandidates,
|
||||
} from '../evolution.js'
|
||||
|
||||
describe('evolution', () => {
|
||||
test('clusters related instincts by trigger and domain', () => {
|
||||
const instincts = [
|
||||
createInstinct({
|
||||
trigger: 'when writing tests',
|
||||
action: 'use testing-library',
|
||||
confidence: 0.7,
|
||||
domain: 'testing',
|
||||
source: 'session-observation',
|
||||
scope: 'project',
|
||||
evidence: ['one'],
|
||||
}),
|
||||
createInstinct({
|
||||
trigger: 'when writing tests',
|
||||
action: 'avoid implementation mocks',
|
||||
confidence: 0.8,
|
||||
domain: 'testing',
|
||||
source: 'session-observation',
|
||||
scope: 'project',
|
||||
evidence: ['two'],
|
||||
}),
|
||||
createInstinct({
|
||||
trigger: 'when writing tests',
|
||||
action: 'prefer describe/test structure',
|
||||
confidence: 0.75,
|
||||
domain: 'testing',
|
||||
source: 'session-observation',
|
||||
scope: 'project',
|
||||
evidence: ['three'],
|
||||
}),
|
||||
]
|
||||
|
||||
const clusters = clusterInstincts(instincts)
|
||||
expect(clusters).toHaveLength(1)
|
||||
expect(clusters[0]?.averageConfidence).toBe(0.75)
|
||||
})
|
||||
|
||||
test('classifies explicit user-invoked workflows as command candidates', () => {
|
||||
expect(
|
||||
classifyEvolutionTarget([
|
||||
createInstinct({
|
||||
trigger: 'when user asks to create migration',
|
||||
action: 'run command steps',
|
||||
confidence: 0.8,
|
||||
domain: 'workflow',
|
||||
source: 'session-observation',
|
||||
scope: 'project',
|
||||
evidence: ['one'],
|
||||
}),
|
||||
]),
|
||||
).toBe('command')
|
||||
})
|
||||
|
||||
test('generates skill candidates for high-confidence skill clusters', () => {
|
||||
// Cluster-size floor (>=3) is non-negotiable post-H15 fix: a single
|
||||
// high-confidence instinct must not become a persistent skill. Three
|
||||
// independent observations are required to promote.
|
||||
const instincts = [
|
||||
createInstinct({
|
||||
trigger: 'when writing tests',
|
||||
action: 'use testing-library',
|
||||
confidence: 0.8,
|
||||
domain: 'testing',
|
||||
source: 'session-observation',
|
||||
scope: 'project',
|
||||
evidence: ['one'],
|
||||
}),
|
||||
createInstinct({
|
||||
trigger: 'when writing tests',
|
||||
action: 'avoid implementation mocks',
|
||||
confidence: 0.8,
|
||||
domain: 'testing',
|
||||
source: 'session-observation',
|
||||
scope: 'project',
|
||||
evidence: ['two'],
|
||||
}),
|
||||
createInstinct({
|
||||
trigger: 'when writing tests',
|
||||
action: 'prefer describe/test structure',
|
||||
confidence: 0.8,
|
||||
domain: 'testing',
|
||||
source: 'session-observation',
|
||||
scope: 'project',
|
||||
evidence: ['three'],
|
||||
}),
|
||||
]
|
||||
|
||||
expect(generateSkillCandidates(instincts)).toHaveLength(1)
|
||||
})
|
||||
|
||||
describe('three-path generation', () => {
|
||||
let tmp: string
|
||||
beforeEach(() => {
|
||||
tmp = mkdtempSync(join(tmpdir(), 'skill-learning-evolve-'))
|
||||
})
|
||||
afterEach(() => {
|
||||
rmSync(tmp, { recursive: true, force: true })
|
||||
})
|
||||
|
||||
test('command-triggered instincts produce command candidates, not skill candidates', () => {
|
||||
// Need >=3 instincts to satisfy the cluster-size floor post-H15.
|
||||
const instincts = Array.from({ length: 3 }, (_, i) =>
|
||||
createInstinct({
|
||||
trigger: 'when user asks to create migration',
|
||||
action: 'run command: pnpm run migration',
|
||||
confidence: 0.85,
|
||||
domain: 'workflow',
|
||||
source: 'session-observation',
|
||||
scope: 'project',
|
||||
evidence: [`user invocation ${i}`],
|
||||
}),
|
||||
)
|
||||
|
||||
const commands = generateCommandCandidates(instincts, { cwd: tmp })
|
||||
const skills = generateSkillCandidates(instincts, { cwd: tmp })
|
||||
expect(commands).toHaveLength(1)
|
||||
expect(skills).toHaveLength(0)
|
||||
expect(commands[0]?.content).toContain('/')
|
||||
})
|
||||
|
||||
test('four debug multi-step instincts cluster into an agent candidate', () => {
|
||||
const instincts = Array.from({ length: 4 }, (_, i) =>
|
||||
createInstinct({
|
||||
trigger: 'when debugging multi-step regressions',
|
||||
action: 'investigate stack trace, reproduce locally, and add test',
|
||||
confidence: 0.82,
|
||||
domain: 'debugging',
|
||||
source: 'session-observation',
|
||||
scope: 'project',
|
||||
evidence: [`incident-${i}`],
|
||||
}),
|
||||
)
|
||||
|
||||
const agents = generateAgentCandidates(instincts, { cwd: tmp })
|
||||
expect(agents).toHaveLength(1)
|
||||
expect(agents[0]?.content).toContain('Playbook')
|
||||
})
|
||||
})
|
||||
})
|
||||
143
src/services/skillLearning/__tests__/instinctStore.test.ts
Normal file
143
src/services/skillLearning/__tests__/instinctStore.test.ts
Normal file
@@ -0,0 +1,143 @@
|
||||
import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
|
||||
import { mkdtempSync, rmSync } from 'node:fs'
|
||||
import { tmpdir } from 'node:os'
|
||||
import { join } from 'node:path'
|
||||
import {
|
||||
loadInstincts,
|
||||
prunePendingInstincts,
|
||||
saveInstinct,
|
||||
upsertInstinct,
|
||||
} from '../instinctStore.js'
|
||||
import { createInstinct } from '../instinctParser.js'
|
||||
|
||||
let rootDir: string
|
||||
|
||||
beforeEach(() => {
|
||||
rootDir = mkdtempSync(join(tmpdir(), 'skill-learning-instinct-'))
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
rmSync(rootDir, { recursive: true, force: true })
|
||||
})
|
||||
|
||||
describe('instinctStore', () => {
|
||||
test('saves and loads instincts', async () => {
|
||||
await saveInstinct(
|
||||
createInstinct({
|
||||
trigger: 'when testing',
|
||||
action: 'use testing-library',
|
||||
confidence: 0.7,
|
||||
domain: 'testing',
|
||||
source: 'session-observation',
|
||||
scope: 'project',
|
||||
evidence: ['user correction'],
|
||||
}),
|
||||
{ rootDir, project: projectContext() },
|
||||
)
|
||||
|
||||
const instincts = await loadInstincts({
|
||||
rootDir,
|
||||
project: projectContext(),
|
||||
})
|
||||
expect(instincts).toHaveLength(1)
|
||||
expect(instincts[0]?.action).toContain('testing-library')
|
||||
})
|
||||
|
||||
test('upsert increases confidence for confirming instincts', async () => {
|
||||
const first = createInstinct({
|
||||
id: 'test-instinct',
|
||||
trigger: 'when testing',
|
||||
action: 'prefer testing-library',
|
||||
confidence: 0.7,
|
||||
domain: 'testing',
|
||||
source: 'session-observation',
|
||||
scope: 'project',
|
||||
evidence: ['one'],
|
||||
})
|
||||
await upsertInstinct(first, { rootDir, project: projectContext() })
|
||||
const second = { ...first, evidence: ['two'] }
|
||||
const updated = await upsertInstinct(second, {
|
||||
rootDir,
|
||||
project: projectContext(),
|
||||
})
|
||||
|
||||
expect(updated.confidence).toBeGreaterThan(first.confidence)
|
||||
expect(updated.evidence).toContain('one')
|
||||
expect(updated.evidence).toContain('two')
|
||||
})
|
||||
|
||||
test('outcome-aware upsert: failure evidence reduces confidence', async () => {
|
||||
const first = createInstinct({
|
||||
id: 'outcome-aware',
|
||||
trigger: 'when writing tests',
|
||||
action: 'use testing-library',
|
||||
confidence: 0.7,
|
||||
domain: 'testing',
|
||||
source: 'session-observation',
|
||||
scope: 'project',
|
||||
evidence: ['one'],
|
||||
evidenceOutcome: 'success',
|
||||
})
|
||||
const afterSuccess = await upsertInstinct(first, {
|
||||
rootDir,
|
||||
project: projectContext(),
|
||||
})
|
||||
await upsertInstinct(first, { rootDir, project: projectContext() })
|
||||
const afterAnotherSuccess = (
|
||||
await loadInstincts({ rootDir, project: projectContext() })
|
||||
).find(i => i.id === 'outcome-aware')!
|
||||
|
||||
const failure = {
|
||||
...first,
|
||||
evidence: ['two'],
|
||||
evidenceOutcome: 'failure' as const,
|
||||
}
|
||||
const afterFailure = await upsertInstinct(failure, {
|
||||
rootDir,
|
||||
project: projectContext(),
|
||||
})
|
||||
|
||||
expect(afterSuccess.confidence).toBe(0.7)
|
||||
expect(afterAnotherSuccess.confidence).toBeGreaterThan(
|
||||
afterSuccess.confidence,
|
||||
)
|
||||
expect(afterFailure.confidence).toBeLessThan(afterAnotherSuccess.confidence)
|
||||
})
|
||||
|
||||
test('prunes old pending instincts', async () => {
|
||||
const old = createInstinct(
|
||||
{
|
||||
id: 'old-instinct',
|
||||
trigger: 'old',
|
||||
action: 'old',
|
||||
confidence: 0.3,
|
||||
domain: 'project',
|
||||
source: 'session-observation',
|
||||
scope: 'project',
|
||||
evidence: ['old'],
|
||||
},
|
||||
'2020-01-01T00:00:00.000Z',
|
||||
)
|
||||
await saveInstinct(old, { rootDir, project: projectContext() })
|
||||
|
||||
const pruned = await prunePendingInstincts(30, {
|
||||
rootDir,
|
||||
project: projectContext(),
|
||||
})
|
||||
expect(pruned.map(instinct => instinct.id)).toContain('old-instinct')
|
||||
expect(await loadInstincts({ rootDir, project: projectContext() })).toEqual(
|
||||
[],
|
||||
)
|
||||
})
|
||||
})
|
||||
|
||||
function projectContext() {
|
||||
return {
|
||||
projectId: 'p1',
|
||||
projectName: 'project',
|
||||
cwd: rootDir,
|
||||
scope: 'project' as const,
|
||||
source: 'global' as const,
|
||||
storageDir: join(rootDir, 'projects', 'p1'),
|
||||
}
|
||||
}
|
||||
81
src/services/skillLearning/__tests__/learningPolicy.test.ts
Normal file
81
src/services/skillLearning/__tests__/learningPolicy.test.ts
Normal file
@@ -0,0 +1,81 @@
|
||||
import { describe, expect, test } from 'bun:test'
|
||||
import { createInstinct } from '../instinctParser.js'
|
||||
import {
|
||||
buildLearnedSkillName,
|
||||
decideDefaultScope,
|
||||
isGenericSkillName,
|
||||
isValidLearnedSkillName,
|
||||
normalizeSkillName,
|
||||
shouldGenerateSkillFromInstincts,
|
||||
} from '../learningPolicy.js'
|
||||
|
||||
describe('learningPolicy', () => {
|
||||
test('normalizes learned skill names to lowercase kebab-case with length cap', () => {
|
||||
const name = normalizeSkillName('Testing React Testing Library!!!')
|
||||
|
||||
expect(name).toBe('testing-react-testing-library')
|
||||
expect(name.length).toBeLessThanOrEqual(64)
|
||||
})
|
||||
|
||||
test('rejects generic learned skill names', () => {
|
||||
expect(isGenericSkillName('learned-skill')).toBe(true)
|
||||
expect(isValidLearnedSkillName('learned-skill')).toBe(false)
|
||||
})
|
||||
|
||||
test('builds domain-prefixed names from instincts', () => {
|
||||
const instinct = createInstinct({
|
||||
trigger: 'when writing React tests',
|
||||
action: 'use testing-library and avoid implementation mocks',
|
||||
confidence: 0.85,
|
||||
domain: 'testing',
|
||||
source: 'session-observation',
|
||||
scope: 'project',
|
||||
evidence: ['user correction'],
|
||||
})
|
||||
|
||||
const name = buildLearnedSkillName([instinct])
|
||||
|
||||
expect(name.startsWith('testing-')).toBe(true)
|
||||
expect(isValidLearnedSkillName(name)).toBe(true)
|
||||
})
|
||||
|
||||
test('uses confidence threshold before generating skills', () => {
|
||||
const low = createInstinct({
|
||||
trigger: 'when testing',
|
||||
action: 'try a tentative pattern',
|
||||
confidence: 0.3,
|
||||
domain: 'testing',
|
||||
source: 'session-observation',
|
||||
scope: 'project',
|
||||
evidence: ['weak signal'],
|
||||
})
|
||||
const high = { ...low, confidence: 0.8 }
|
||||
|
||||
expect(shouldGenerateSkillFromInstincts([low])).toBe(false)
|
||||
expect(shouldGenerateSkillFromInstincts([high])).toBe(true)
|
||||
})
|
||||
|
||||
test('promotes only global-friendly repeated instinct groups by default', () => {
|
||||
const workflow = createInstinct({
|
||||
trigger: 'when modifying code',
|
||||
action: 'Grep then Read then Edit',
|
||||
confidence: 0.8,
|
||||
domain: 'workflow',
|
||||
source: 'session-observation',
|
||||
scope: 'project',
|
||||
evidence: ['repeated workflow'],
|
||||
})
|
||||
const testing = createInstinct({
|
||||
trigger: 'when writing React tests',
|
||||
action: 'use testing-library',
|
||||
confidence: 0.8,
|
||||
domain: 'testing',
|
||||
source: 'session-observation',
|
||||
scope: 'project',
|
||||
evidence: ['project convention'],
|
||||
})
|
||||
|
||||
expect(decideDefaultScope([workflow, workflow])).toBe('global')
|
||||
expect(decideDefaultScope([testing])).toBe('project')
|
||||
})
|
||||
})
|
||||
108
src/services/skillLearning/__tests__/observationStore.test.ts
Normal file
108
src/services/skillLearning/__tests__/observationStore.test.ts
Normal file
@@ -0,0 +1,108 @@
|
||||
import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
|
||||
import { mkdtempSync, rmSync, writeFileSync } from 'node:fs'
|
||||
import { tmpdir } from 'node:os'
|
||||
import { join } from 'node:path'
|
||||
import {
|
||||
appendObservation,
|
||||
ingestTranscript,
|
||||
readObservations,
|
||||
scrubText,
|
||||
} from '../observationStore.js'
|
||||
|
||||
let rootDir: string
|
||||
|
||||
beforeEach(() => {
|
||||
rootDir = mkdtempSync(join(tmpdir(), 'skill-learning-observation-'))
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
rmSync(rootDir, { recursive: true, force: true })
|
||||
})
|
||||
|
||||
describe('observationStore', () => {
|
||||
test('scrubs secrets and truncates large fields', () => {
|
||||
const scrubbed = scrubText('api_key: sk-ant-1234567890abcdef extra', 80)
|
||||
expect(scrubbed).toContain('[REDACTED]')
|
||||
|
||||
const truncated = scrubText(
|
||||
`api_key: sk-ant-1234567890abcdef ${'x'.repeat(120)}`,
|
||||
40,
|
||||
)
|
||||
expect(truncated).toContain('[REDACTED]')
|
||||
expect(truncated).toContain('[TRUNCATED')
|
||||
})
|
||||
|
||||
test('appends and reads project observations', async () => {
|
||||
await appendObservation(
|
||||
{
|
||||
id: 'obs-1',
|
||||
timestamp: '2026-04-16T00:00:00.000Z',
|
||||
event: 'user_message',
|
||||
sessionId: 's1',
|
||||
projectId: 'p1',
|
||||
projectName: 'project',
|
||||
cwd: rootDir,
|
||||
messageText: '不要 mock,用 testing-library',
|
||||
},
|
||||
{
|
||||
rootDir,
|
||||
project: projectContext(),
|
||||
},
|
||||
)
|
||||
|
||||
const observations = await readObservations({
|
||||
rootDir,
|
||||
project: projectContext(),
|
||||
})
|
||||
expect(observations).toHaveLength(1)
|
||||
expect(observations[0]?.messageText).toContain('testing-library')
|
||||
})
|
||||
|
||||
test('ingests Claude transcript JSONL into observations', async () => {
|
||||
const transcript = join(rootDir, 'session.jsonl')
|
||||
writeFileSync(
|
||||
transcript,
|
||||
[
|
||||
JSON.stringify({
|
||||
type: 'user',
|
||||
sessionId: 's1',
|
||||
cwd: rootDir,
|
||||
timestamp: '2026-04-16T00:00:00.000Z',
|
||||
message: { role: 'user', content: '不要 mock,用 testing-library' },
|
||||
}),
|
||||
JSON.stringify({
|
||||
type: 'assistant',
|
||||
sessionId: 's1',
|
||||
cwd: rootDir,
|
||||
timestamp: '2026-04-16T00:00:01.000Z',
|
||||
message: {
|
||||
role: 'assistant',
|
||||
content: [
|
||||
{ type: 'tool_use', name: 'Grep', input: { pattern: 'x' } },
|
||||
],
|
||||
},
|
||||
}),
|
||||
].join('\n'),
|
||||
)
|
||||
|
||||
const observations = await ingestTranscript(transcript, {
|
||||
rootDir,
|
||||
project: projectContext(),
|
||||
})
|
||||
|
||||
expect(observations.length).toBeGreaterThanOrEqual(2)
|
||||
expect(observations.map(o => o.event)).toContain('user_message')
|
||||
expect(observations.map(o => o.event)).toContain('tool_start')
|
||||
})
|
||||
})
|
||||
|
||||
function projectContext() {
|
||||
return {
|
||||
projectId: 'p1',
|
||||
projectName: 'project',
|
||||
cwd: rootDir,
|
||||
scope: 'project' as const,
|
||||
source: 'global' as const,
|
||||
storageDir: join(rootDir, 'projects', 'p1'),
|
||||
}
|
||||
}
|
||||
135
src/services/skillLearning/__tests__/observerBackend.test.ts
Normal file
135
src/services/skillLearning/__tests__/observerBackend.test.ts
Normal file
@@ -0,0 +1,135 @@
|
||||
import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
|
||||
import {
|
||||
getActiveObserverBackend,
|
||||
listObserverBackends,
|
||||
registerObserverBackend,
|
||||
resolveDefaultObserverBackend,
|
||||
setActiveObserverBackend,
|
||||
analyzeWithActiveBackend,
|
||||
type ObserverBackend,
|
||||
} from '../observerBackend.js'
|
||||
import { analyzeObservations } from '../sessionObserver.js'
|
||||
import type { StoredSkillObservation } from '../observationStore.js'
|
||||
|
||||
function obs(partial: Partial<StoredSkillObservation>): StoredSkillObservation {
|
||||
return {
|
||||
id: partial.id ?? crypto.randomUUID(),
|
||||
timestamp: '2026-04-16T00:00:00.000Z',
|
||||
event: partial.event ?? 'user_message',
|
||||
sessionId: 's1',
|
||||
projectId: 'p1',
|
||||
projectName: 'project',
|
||||
cwd: process.cwd(),
|
||||
...partial,
|
||||
}
|
||||
}
|
||||
|
||||
const originalBackendName = getActiveObserverBackend().name
|
||||
|
||||
afterEach(() => {
|
||||
setActiveObserverBackend(originalBackendName)
|
||||
})
|
||||
|
||||
describe('observerBackend', () => {
|
||||
test('registers heuristic and llm backends by default', () => {
|
||||
const names = listObserverBackends()
|
||||
expect(names).toContain('heuristic')
|
||||
expect(names).toContain('llm')
|
||||
})
|
||||
|
||||
test('resolveDefaultObserverBackend honours SKILL_LEARNING_OBSERVER_BACKEND env', () => {
|
||||
// Adversarial probe for the env switch — if this regresses, the LLM
|
||||
// backend would be silently unreachable in production even with the env
|
||||
// variable set, which was the original AC2 gap.
|
||||
const original = process.env.SKILL_LEARNING_OBSERVER_BACKEND
|
||||
try {
|
||||
process.env.SKILL_LEARNING_OBSERVER_BACKEND = 'llm'
|
||||
resolveDefaultObserverBackend()
|
||||
expect(getActiveObserverBackend().name).toBe('llm')
|
||||
|
||||
// Unknown backend names must not crash; the current active stays.
|
||||
process.env.SKILL_LEARNING_OBSERVER_BACKEND = 'nonexistent'
|
||||
resolveDefaultObserverBackend()
|
||||
expect(getActiveObserverBackend().name).toBe('llm')
|
||||
|
||||
// Clearing the env leaves whatever was active — explicit opt-out is
|
||||
// setActiveObserverBackend, not clearing the env.
|
||||
delete process.env.SKILL_LEARNING_OBSERVER_BACKEND
|
||||
resolveDefaultObserverBackend()
|
||||
expect(getActiveObserverBackend().name).toBe('llm')
|
||||
} finally {
|
||||
if (original === undefined) {
|
||||
delete process.env.SKILL_LEARNING_OBSERVER_BACKEND
|
||||
} else {
|
||||
process.env.SKILL_LEARNING_OBSERVER_BACKEND = original
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
test('heuristic backend preserves existing correction detection', async () => {
|
||||
setActiveObserverBackend('heuristic')
|
||||
const candidates = await analyzeWithActiveBackend([
|
||||
obs({ messageText: '不要直接 mock,用 testing-library' }),
|
||||
])
|
||||
expect(candidates).toHaveLength(1)
|
||||
expect(candidates[0]?.action).toContain('testing-library')
|
||||
})
|
||||
|
||||
test('llm backend short-circuits to [] on empty observations', async () => {
|
||||
// With the real Haiku-backed implementation the backend only calls
|
||||
// queryHaiku when there are observations to analyse. Empty-input short
|
||||
// circuit guarantees the no-cost path needed for hot loops.
|
||||
setActiveObserverBackend('llm')
|
||||
const candidates = await analyzeWithActiveBackend([])
|
||||
expect(candidates).toEqual([])
|
||||
})
|
||||
|
||||
test('analyzeObservations routes to active backend (sync path throws for async backends)', () => {
|
||||
// Heuristic backend is sync — analyzeObservations works directly.
|
||||
const previousCount = analyzeObservations([
|
||||
obs({ messageText: '不要直接 mock,用 testing-library' }),
|
||||
]).length
|
||||
expect(previousCount).toBe(1)
|
||||
|
||||
// The LLM backend is now a real async implementation (queryHaiku). The
|
||||
// sync `analyzeObservations` helper refuses to return a pending Promise
|
||||
// and throws with a clear instruction to use `analyzeWithActiveBackend`
|
||||
// instead — prove the routing reached the async backend by catching
|
||||
// that exact error.
|
||||
setActiveObserverBackend('llm')
|
||||
expect(() =>
|
||||
analyzeObservations([
|
||||
obs({ messageText: '不要直接 mock,用 testing-library' }),
|
||||
]),
|
||||
).toThrow(/Promise/)
|
||||
})
|
||||
|
||||
test('custom backends can be registered and switched', async () => {
|
||||
const custom: ObserverBackend = {
|
||||
name: 'custom-test',
|
||||
analyze() {
|
||||
return [
|
||||
{
|
||||
trigger: 'custom trigger',
|
||||
action: 'custom action',
|
||||
confidence: 0.9,
|
||||
domain: 'project',
|
||||
source: 'session-observation',
|
||||
scope: 'project',
|
||||
evidence: ['custom evidence'],
|
||||
},
|
||||
]
|
||||
},
|
||||
}
|
||||
registerObserverBackend(custom)
|
||||
setActiveObserverBackend('custom-test')
|
||||
|
||||
const candidates = await analyzeWithActiveBackend([])
|
||||
expect(candidates).toHaveLength(1)
|
||||
expect(candidates[0]?.trigger).toBe('custom trigger')
|
||||
})
|
||||
|
||||
test('switching to an unknown backend throws', () => {
|
||||
expect(() => setActiveObserverBackend('does-not-exist')).toThrow()
|
||||
})
|
||||
})
|
||||
160
src/services/skillLearning/__tests__/projectContext.test.ts
Normal file
160
src/services/skillLearning/__tests__/projectContext.test.ts
Normal file
@@ -0,0 +1,160 @@
|
||||
import { afterAll, beforeEach, describe, expect, test } from 'bun:test'
|
||||
import { existsSync, mkdirSync, mkdtempSync, readFileSync, rmSync } from 'fs'
|
||||
import { tmpdir } from 'os'
|
||||
import { join } from 'path'
|
||||
import { execFileSync } from 'child_process'
|
||||
import { getClaudeConfigHomeDir } from '../../../utils/envUtils.js'
|
||||
import {
|
||||
getProjectContextPath,
|
||||
getProjectsRegistryPath,
|
||||
getSkillLearningRootDir,
|
||||
resolveProjectContext,
|
||||
} from '../projectContext.js'
|
||||
import { isSkillLearningEnabled } from '../featureCheck.js'
|
||||
|
||||
const tempBase = mkdtempSync(join(tmpdir(), 'skill-learning-context-test-'))
|
||||
const originalEnv = { ...process.env }
|
||||
|
||||
beforeEach(() => {
|
||||
resetEnv()
|
||||
const tempHome = mkdtempSync(join(tempBase, 'home-'))
|
||||
process.env.CLAUDE_CONFIG_DIR = tempHome
|
||||
})
|
||||
|
||||
afterAll(() => {
|
||||
process.env = { ...originalEnv }
|
||||
clearConfigDirCache()
|
||||
rmSync(tempBase, { recursive: true, force: true })
|
||||
})
|
||||
|
||||
describe('isSkillLearningEnabled', () => {
|
||||
test('honors explicit SKILL_LEARNING_ENABLED overrides', () => {
|
||||
process.env.SKILL_LEARNING_ENABLED = '1'
|
||||
expect(isSkillLearningEnabled()).toBe(true)
|
||||
|
||||
process.env.SKILL_LEARNING_ENABLED = '0'
|
||||
expect(isSkillLearningEnabled()).toBe(false)
|
||||
})
|
||||
|
||||
test('honors FEATURE_SKILL_LEARNING env fallback', () => {
|
||||
delete process.env.SKILL_LEARNING_ENABLED
|
||||
process.env.FEATURE_SKILL_LEARNING = '1'
|
||||
expect(isSkillLearningEnabled()).toBe(true)
|
||||
|
||||
process.env.FEATURE_SKILL_LEARNING = '0'
|
||||
expect(isSkillLearningEnabled()).toBe(false)
|
||||
})
|
||||
})
|
||||
|
||||
describe('resolveProjectContext', () => {
|
||||
test('prefers CLAUDE_PROJECT_DIR and writes registry files', () => {
|
||||
const cwd = mkdirTempDir('cwd-')
|
||||
const projectDir = mkdirTempDir('project-')
|
||||
process.env.CLAUDE_PROJECT_DIR = projectDir
|
||||
|
||||
const context = resolveProjectContext(cwd)
|
||||
|
||||
expect(context.source).toBe('claude_project_dir')
|
||||
expect(context.scope).toBe('project')
|
||||
expect(context.projectRoot).toBe(projectDir)
|
||||
expect(context.projectName).toBe(lastPathSegment(projectDir))
|
||||
expect(context.storageDir).toContain(context.projectId)
|
||||
|
||||
expect(existsSync(getProjectsRegistryPath())).toBe(true)
|
||||
expect(existsSync(getProjectContextPath(context.projectId))).toBe(true)
|
||||
|
||||
const registry = readJson(getProjectsRegistryPath())
|
||||
expect(registry.projects[context.projectId].source).toBe(
|
||||
'claude_project_dir',
|
||||
)
|
||||
})
|
||||
|
||||
test('uses git remote as stable identity across different checkouts', () => {
|
||||
const first = createGitRepo('remote-a-', 'https://example.com/acme/app.git')
|
||||
const second = createGitRepo(
|
||||
'remote-b-',
|
||||
'https://example.com/acme/app.git',
|
||||
)
|
||||
|
||||
const firstContext = resolveProjectContext(first)
|
||||
const secondContext = resolveProjectContext(second)
|
||||
|
||||
expect(firstContext.source).toBe('git_remote')
|
||||
expect(secondContext.source).toBe('git_remote')
|
||||
expect(firstContext.projectId).toBe(secondContext.projectId)
|
||||
expect(firstContext.gitRemote).toBe('https://example.com/acme/app')
|
||||
expect(firstContext.projectName).toBe('app')
|
||||
|
||||
const registry = readJson(getProjectsRegistryPath())
|
||||
expect(Object.keys(registry.projects)).toContain(firstContext.projectId)
|
||||
expect(registry.projects[firstContext.projectId].gitRemote).toBe(
|
||||
'https://example.com/acme/app',
|
||||
)
|
||||
})
|
||||
|
||||
test('falls back to git root when origin remote is missing', () => {
|
||||
const repo = createGitRepo('root-only-')
|
||||
|
||||
const context = resolveProjectContext(join(repo, 'nested'))
|
||||
|
||||
expect(context.source).toBe('git_root')
|
||||
expect(context.scope).toBe('project')
|
||||
expect(context.projectRoot).toBe(repo)
|
||||
expect(context.projectName).toBe(lastPathSegment(repo))
|
||||
})
|
||||
|
||||
test('falls back to global context outside a git repository', () => {
|
||||
const cwd = mkdirTempDir('not-git-')
|
||||
|
||||
const context = resolveProjectContext(cwd)
|
||||
|
||||
expect(context.source).toBe('global')
|
||||
expect(context.scope).toBe('global')
|
||||
expect(context.projectId).toBe('global')
|
||||
expect(context.projectName).toBe('Global')
|
||||
expect(context.storageDir).toBe(join(getSkillLearningRootDir(), 'global'))
|
||||
expect(existsSync(getProjectContextPath('global'))).toBe(true)
|
||||
})
|
||||
})
|
||||
|
||||
function createGitRepo(prefix: string, remote?: string): string {
|
||||
const dir = mkdirTempDir(prefix)
|
||||
mkdirSync(join(dir, 'nested'), { recursive: true })
|
||||
execFileSync('git', ['init'], { cwd: dir, stdio: 'ignore' })
|
||||
if (remote) {
|
||||
execFileSync('git', ['remote', 'add', 'origin', remote], {
|
||||
cwd: dir,
|
||||
stdio: 'ignore',
|
||||
})
|
||||
}
|
||||
return dir
|
||||
}
|
||||
|
||||
function mkdirTempDir(prefix: string): string {
|
||||
return mkdtempSync(join(tempBase, prefix))
|
||||
}
|
||||
|
||||
function readJson(path: string): any {
|
||||
return JSON.parse(readFileSync(path, 'utf8'))
|
||||
}
|
||||
|
||||
function lastPathSegment(path: string): string {
|
||||
return path.split(/[\\/]/).filter(Boolean).at(-1) ?? path
|
||||
}
|
||||
|
||||
function resetEnv(): void {
|
||||
process.env = { ...originalEnv }
|
||||
delete process.env.CLAUDE_PROJECT_DIR
|
||||
delete process.env.SKILL_LEARNING_ENABLED
|
||||
delete process.env.FEATURE_SKILL_LEARNING
|
||||
clearConfigDirCache()
|
||||
}
|
||||
|
||||
function clearConfigDirCache(): void {
|
||||
if (
|
||||
typeof getClaudeConfigHomeDir === 'function' &&
|
||||
'cache' in getClaudeConfigHomeDir
|
||||
) {
|
||||
;(getClaudeConfigHomeDir as any).cache.clear?.()
|
||||
}
|
||||
}
|
||||
144
src/services/skillLearning/__tests__/promotion.test.ts
Normal file
144
src/services/skillLearning/__tests__/promotion.test.ts
Normal file
@@ -0,0 +1,144 @@
|
||||
import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
|
||||
import { mkdtempSync, rmSync } from 'node:fs'
|
||||
import { tmpdir } from 'node:os'
|
||||
import { join } from 'node:path'
|
||||
import { createInstinct } from '../instinctParser.js'
|
||||
import { saveInstinct, loadInstincts } from '../instinctStore.js'
|
||||
import {
|
||||
checkPromotion,
|
||||
findPromotionCandidates,
|
||||
resetPromotionBookkeeping,
|
||||
} from '../promotion.js'
|
||||
import type { SkillLearningProjectContext } from '../types.js'
|
||||
|
||||
let rootDir: string
|
||||
|
||||
function projectCtx(projectId: string): SkillLearningProjectContext {
|
||||
return {
|
||||
projectId,
|
||||
projectName: projectId,
|
||||
scope: 'project',
|
||||
source: 'git_root',
|
||||
cwd: rootDir,
|
||||
storageDir: join(rootDir, 'projects', projectId),
|
||||
}
|
||||
}
|
||||
|
||||
function globalCtx(): SkillLearningProjectContext {
|
||||
return {
|
||||
projectId: 'global',
|
||||
projectName: 'Global',
|
||||
scope: 'global',
|
||||
source: 'global',
|
||||
cwd: rootDir,
|
||||
storageDir: join(rootDir, 'global'),
|
||||
}
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
rootDir = mkdtempSync(join(tmpdir(), 'skill-learning-promote-'))
|
||||
resetPromotionBookkeeping()
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
rmSync(rootDir, { recursive: true, force: true })
|
||||
})
|
||||
|
||||
describe('promotion', () => {
|
||||
test('findPromotionCandidates returns instincts with 2+ projects and avg>=0.8', () => {
|
||||
const mk = (projectId: string) =>
|
||||
createInstinct({
|
||||
id: 'shared-trigger',
|
||||
trigger: 'shared',
|
||||
action: 'shared',
|
||||
confidence: 0.85,
|
||||
domain: 'workflow',
|
||||
source: 'session-observation',
|
||||
scope: 'project',
|
||||
projectId,
|
||||
projectName: projectId,
|
||||
evidence: ['ev'],
|
||||
status: 'active',
|
||||
})
|
||||
const candidates = findPromotionCandidates([mk('alpha'), mk('beta')])
|
||||
expect(candidates).toHaveLength(1)
|
||||
expect(candidates[0]?.projectIds.sort()).toEqual(['alpha', 'beta'])
|
||||
})
|
||||
|
||||
test('checkPromotion writes a global copy for cross-project instincts', async () => {
|
||||
const mk = (projectId: string) =>
|
||||
createInstinct({
|
||||
id: 'shared-id',
|
||||
trigger: 'shared',
|
||||
action: 'shared',
|
||||
confidence: 0.85,
|
||||
domain: 'workflow',
|
||||
source: 'session-observation',
|
||||
scope: 'project',
|
||||
projectId,
|
||||
projectName: projectId,
|
||||
evidence: ['ev'],
|
||||
status: 'active',
|
||||
})
|
||||
await saveInstinct(mk('alpha'), { rootDir, project: projectCtx('alpha') })
|
||||
await saveInstinct(mk('beta'), { rootDir, project: projectCtx('beta') })
|
||||
|
||||
const promoted = await checkPromotion({ rootDir })
|
||||
expect(promoted.map(p => p.instinctId)).toContain('shared-id')
|
||||
|
||||
const globalInstincts = await loadInstincts({
|
||||
rootDir,
|
||||
scope: 'global',
|
||||
project: globalCtx(),
|
||||
})
|
||||
const global = globalInstincts.find(i => i.id === 'shared-id')
|
||||
expect(global).toBeDefined()
|
||||
expect(global?.scope).toBe('global')
|
||||
expect(global?.confidence).toBeGreaterThanOrEqual(0.8)
|
||||
})
|
||||
|
||||
test('checkPromotion is idempotent within a session', async () => {
|
||||
const mk = (projectId: string) =>
|
||||
createInstinct({
|
||||
id: 'repeat-id',
|
||||
trigger: 'repeat',
|
||||
action: 'repeat',
|
||||
confidence: 0.85,
|
||||
domain: 'workflow',
|
||||
source: 'session-observation',
|
||||
scope: 'project',
|
||||
projectId,
|
||||
projectName: projectId,
|
||||
evidence: ['ev'],
|
||||
status: 'active',
|
||||
})
|
||||
await saveInstinct(mk('alpha'), { rootDir, project: projectCtx('alpha') })
|
||||
await saveInstinct(mk('beta'), { rootDir, project: projectCtx('beta') })
|
||||
|
||||
const first = await checkPromotion({ rootDir })
|
||||
const second = await checkPromotion({ rootDir })
|
||||
|
||||
expect(first).toHaveLength(1)
|
||||
expect(second).toHaveLength(0)
|
||||
})
|
||||
|
||||
test('does not promote when only one project has the instinct', async () => {
|
||||
const instinct = createInstinct({
|
||||
id: 'solo',
|
||||
trigger: 'solo',
|
||||
action: 'solo',
|
||||
confidence: 0.9,
|
||||
domain: 'workflow',
|
||||
source: 'session-observation',
|
||||
scope: 'project',
|
||||
projectId: 'alpha',
|
||||
projectName: 'alpha',
|
||||
evidence: ['ev'],
|
||||
status: 'active',
|
||||
})
|
||||
await saveInstinct(instinct, { rootDir, project: projectCtx('alpha') })
|
||||
|
||||
const promoted = await checkPromotion({ rootDir })
|
||||
expect(promoted).toEqual([])
|
||||
})
|
||||
})
|
||||
143
src/services/skillLearning/__tests__/runtimeObserver.test.ts
Normal file
143
src/services/skillLearning/__tests__/runtimeObserver.test.ts
Normal file
@@ -0,0 +1,143 @@
|
||||
import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
|
||||
import { existsSync, mkdtempSync, rmSync } from 'node:fs'
|
||||
import { tmpdir } from 'node:os'
|
||||
import { join } from 'node:path'
|
||||
import {
|
||||
resetSkillLearningConfig,
|
||||
setSkillLearningConfigForTest,
|
||||
} from '../config.js'
|
||||
import { loadInstincts, readObservations } from '../index.js'
|
||||
import {
|
||||
resetRuntimeObserverForTest,
|
||||
runSkillLearningPostSampling,
|
||||
} from '../runtimeObserver.js'
|
||||
|
||||
let root: string
|
||||
let previousCwd: string
|
||||
const originalEnv = { ...process.env }
|
||||
|
||||
beforeEach(() => {
|
||||
root = mkdtempSync(join(tmpdir(), 'skill-learning-runtime-'))
|
||||
previousCwd = process.cwd()
|
||||
process.chdir(root)
|
||||
process.env = { ...originalEnv }
|
||||
process.env.CLAUDE_SKILL_LEARNING_HOME = join(root, 'learning-home')
|
||||
process.env.CLAUDE_CONFIG_DIR = join(root, 'config')
|
||||
process.env.SKILL_LEARNING_ENABLED = '1'
|
||||
process.env.NODE_ENV = 'test'
|
||||
setSkillLearningConfigForTest({ minConfidence: 0.3, minClusterSize: 1 })
|
||||
resetRuntimeObserverForTest()
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
process.chdir(previousCwd)
|
||||
process.env = { ...originalEnv }
|
||||
resetSkillLearningConfig()
|
||||
rmSync(root, { recursive: true, force: true })
|
||||
})
|
||||
|
||||
describe('runtimeObserver', () => {
|
||||
test('records and learns from post-sampling main-thread messages', async () => {
|
||||
await runSkillLearningPostSampling({
|
||||
querySource: 'repl_main_thread',
|
||||
messages: [
|
||||
{
|
||||
type: 'user',
|
||||
uuid: 'u1' as any,
|
||||
message: { role: 'user', content: '不要 mock,用 testing-library' },
|
||||
},
|
||||
],
|
||||
systemPrompt: [] as any,
|
||||
userContext: {},
|
||||
systemContext: {},
|
||||
toolUseContext: { agentId: undefined } as any,
|
||||
})
|
||||
|
||||
const observations = await readObservations({
|
||||
rootDir: process.env.CLAUDE_SKILL_LEARNING_HOME,
|
||||
project: {
|
||||
projectId: 'global',
|
||||
projectName: 'global',
|
||||
cwd: root,
|
||||
scope: 'global',
|
||||
source: 'global',
|
||||
storageDir: join(process.env.CLAUDE_SKILL_LEARNING_HOME!, 'global'),
|
||||
},
|
||||
})
|
||||
const instincts = await loadInstincts({
|
||||
rootDir: process.env.CLAUDE_SKILL_LEARNING_HOME,
|
||||
project: {
|
||||
projectId: 'global',
|
||||
projectName: 'global',
|
||||
cwd: root,
|
||||
scope: 'global',
|
||||
source: 'global',
|
||||
storageDir: join(process.env.CLAUDE_SKILL_LEARNING_HOME!, 'global'),
|
||||
},
|
||||
})
|
||||
|
||||
expect(observations).toHaveLength(1)
|
||||
expect(instincts[0]?.action).toContain('testing-library')
|
||||
})
|
||||
|
||||
test('skips subagent sessions', async () => {
|
||||
await runSkillLearningPostSampling({
|
||||
querySource: 'repl_main_thread',
|
||||
messages: [
|
||||
{
|
||||
type: 'user',
|
||||
uuid: 'u1' as any,
|
||||
message: { role: 'user', content: '不要 mock,用 testing-library' },
|
||||
},
|
||||
],
|
||||
systemPrompt: [] as any,
|
||||
userContext: {},
|
||||
systemContext: {},
|
||||
toolUseContext: { agentId: 'agent-1' } as any,
|
||||
})
|
||||
|
||||
const observations = await readObservations({
|
||||
rootDir: process.env.CLAUDE_SKILL_LEARNING_HOME,
|
||||
})
|
||||
expect(observations).toEqual([])
|
||||
})
|
||||
|
||||
test('auto-evolves repeated corrections into an active learned skill', async () => {
|
||||
await runSkillLearningPostSampling({
|
||||
querySource: 'repl_main_thread',
|
||||
messages: [
|
||||
{
|
||||
type: 'user',
|
||||
uuid: 'u1' as any,
|
||||
message: { role: 'user', content: '不要 mock,用 testing-library' },
|
||||
},
|
||||
{
|
||||
type: 'user',
|
||||
uuid: 'u2' as any,
|
||||
message: { role: 'user', content: '不要 mock,用 testing-library' },
|
||||
},
|
||||
{
|
||||
type: 'user',
|
||||
uuid: 'u3' as any,
|
||||
message: { role: 'user', content: '不要 mock,用 testing-library' },
|
||||
},
|
||||
],
|
||||
systemPrompt: [] as any,
|
||||
userContext: {},
|
||||
systemContext: {},
|
||||
toolUseContext: { agentId: undefined } as any,
|
||||
})
|
||||
|
||||
expect(
|
||||
existsSync(
|
||||
join(
|
||||
root,
|
||||
'.claude',
|
||||
'skills',
|
||||
'testing-choosing-between-mock-testing-library',
|
||||
'SKILL.md',
|
||||
),
|
||||
),
|
||||
).toBe(true)
|
||||
})
|
||||
})
|
||||
103
src/services/skillLearning/__tests__/sessionObserver.test.ts
Normal file
103
src/services/skillLearning/__tests__/sessionObserver.test.ts
Normal file
@@ -0,0 +1,103 @@
|
||||
import { describe, expect, test } from 'bun:test'
|
||||
import { analyzeObservations } from '../sessionObserver.js'
|
||||
import type { StoredSkillObservation } from '../observationStore.js'
|
||||
|
||||
function obs(partial: Partial<StoredSkillObservation>): StoredSkillObservation {
|
||||
return {
|
||||
id: partial.id ?? crypto.randomUUID(),
|
||||
timestamp: '2026-04-16T00:00:00.000Z',
|
||||
event: partial.event ?? 'user_message',
|
||||
sessionId: 's1',
|
||||
projectId: 'p1',
|
||||
projectName: 'project',
|
||||
cwd: process.cwd(),
|
||||
...partial,
|
||||
}
|
||||
}
|
||||
|
||||
describe('sessionObserver', () => {
|
||||
test('extracts user correction instincts', () => {
|
||||
const instincts = analyzeObservations([
|
||||
obs({ messageText: '不要直接 mock,用 testing-library' }),
|
||||
])
|
||||
|
||||
expect(instincts).toHaveLength(1)
|
||||
expect(instincts[0]?.domain).toBe('testing')
|
||||
expect(instincts[0]?.action).toContain('testing-library')
|
||||
})
|
||||
|
||||
test('extracts repeated Grep -> Read -> Edit workflow instinct', () => {
|
||||
const seq = ['Grep', 'Read', 'Edit', 'Grep', 'Read', 'Edit']
|
||||
const instincts = analyzeObservations(
|
||||
seq.map((toolName, index) =>
|
||||
obs({ id: `o${index}`, event: 'tool_start', toolName }),
|
||||
),
|
||||
)
|
||||
|
||||
expect(instincts.some(instinct => instinct.domain === 'workflow')).toBe(
|
||||
true,
|
||||
)
|
||||
})
|
||||
|
||||
test('does not invent instincts without clear patterns', () => {
|
||||
expect(analyzeObservations([obs({ messageText: 'hello' })])).toEqual([])
|
||||
})
|
||||
|
||||
test('snapshots recent tool outcome on correction candidates', () => {
|
||||
const [instinct] = analyzeObservations([
|
||||
obs({
|
||||
id: 'o0',
|
||||
event: 'tool_complete',
|
||||
toolName: 'Edit',
|
||||
outcome: 'failure',
|
||||
}),
|
||||
obs({
|
||||
id: 'o1',
|
||||
event: 'user_message',
|
||||
messageText: '不要直接 mock,用 testing-library',
|
||||
}),
|
||||
])
|
||||
expect(instinct?.evidenceOutcome).toBe('failure')
|
||||
})
|
||||
|
||||
test('marks tool-error-resolution candidates as success outcome', () => {
|
||||
const instincts = analyzeObservations([
|
||||
obs({
|
||||
id: 'o0',
|
||||
event: 'tool_complete',
|
||||
toolName: 'Grep',
|
||||
outcome: 'failure',
|
||||
}),
|
||||
obs({
|
||||
id: 'o1',
|
||||
event: 'tool_complete',
|
||||
toolName: 'Grep',
|
||||
outcome: 'success',
|
||||
}),
|
||||
])
|
||||
const resolution = instincts.find(i => i.domain === 'debugging')
|
||||
expect(resolution?.evidenceOutcome).toBe('success')
|
||||
})
|
||||
|
||||
test('leaves evidenceOutcome undefined when no prior tool_complete exists', () => {
|
||||
const [instinct] = analyzeObservations([
|
||||
obs({
|
||||
id: 'o0',
|
||||
event: 'user_message',
|
||||
messageText: '不要直接 mock,用 testing-library',
|
||||
}),
|
||||
])
|
||||
expect(instinct?.evidenceOutcome).toBeUndefined()
|
||||
})
|
||||
|
||||
test('single "always/must" convention message gets confidence <= 0.4', () => {
|
||||
const instincts = analyzeObservations([
|
||||
obs({ messageText: 'always use pnpm' }),
|
||||
])
|
||||
|
||||
expect(instincts.length).toBeGreaterThan(0)
|
||||
for (const instinct of instincts) {
|
||||
expect(instinct.confidence).toBeLessThanOrEqual(0.4)
|
||||
}
|
||||
})
|
||||
})
|
||||
100
src/services/skillLearning/__tests__/skillDedup.test.ts
Normal file
100
src/services/skillLearning/__tests__/skillDedup.test.ts
Normal file
@@ -0,0 +1,100 @@
|
||||
import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
|
||||
import {
|
||||
existsSync,
|
||||
mkdirSync,
|
||||
mkdtempSync,
|
||||
readFileSync,
|
||||
rmSync,
|
||||
} from 'node:fs'
|
||||
import { tmpdir } from 'node:os'
|
||||
import { join } from 'node:path'
|
||||
import {
|
||||
generateOrMergeSkillDraft,
|
||||
writeLearnedSkill,
|
||||
} from '../skillGenerator.js'
|
||||
import { createInstinct } from '../instinctParser.js'
|
||||
|
||||
let root: string
|
||||
let skillsRoot: string
|
||||
|
||||
beforeEach(() => {
|
||||
root = mkdtempSync(join(tmpdir(), 'skill-learning-dedup-'))
|
||||
skillsRoot = join(root, '.claude', 'skills')
|
||||
mkdirSync(skillsRoot, { recursive: true })
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
rmSync(root, { recursive: true, force: true })
|
||||
})
|
||||
|
||||
function testingInstinct(evidence: string) {
|
||||
return createInstinct({
|
||||
trigger: 'when writing tests',
|
||||
action: 'use testing-library',
|
||||
confidence: 0.85,
|
||||
domain: 'testing',
|
||||
source: 'session-observation',
|
||||
scope: 'project',
|
||||
evidence: [evidence],
|
||||
status: 'active',
|
||||
})
|
||||
}
|
||||
|
||||
describe('skill dedup', () => {
|
||||
test('first instinct cluster creates a new skill', async () => {
|
||||
const outcome = await generateOrMergeSkillDraft(
|
||||
[testingInstinct('first')],
|
||||
{ cwd: root },
|
||||
[skillsRoot],
|
||||
)
|
||||
expect(outcome.action).toBe('create')
|
||||
if (outcome.action === 'create') {
|
||||
await writeLearnedSkill(outcome.draft)
|
||||
}
|
||||
})
|
||||
|
||||
test('second run with same trigger appends evidence instead of writing a duplicate', async () => {
|
||||
const first = await generateOrMergeSkillDraft(
|
||||
[testingInstinct('first')],
|
||||
{ cwd: root },
|
||||
[skillsRoot],
|
||||
)
|
||||
expect(first.action).toBe('create')
|
||||
if (first.action === 'create') {
|
||||
await writeLearnedSkill(first.draft)
|
||||
}
|
||||
|
||||
// Second pass — same cluster should collide with the skill we just wrote.
|
||||
const second = await generateOrMergeSkillDraft(
|
||||
[testingInstinct('second')],
|
||||
{ cwd: root },
|
||||
[skillsRoot],
|
||||
)
|
||||
expect(second.action).toBe('append-evidence')
|
||||
if (second.action === 'append-evidence') {
|
||||
expect(second.overlap).toBeGreaterThanOrEqual(0.8)
|
||||
const body = readFileSync(second.appendedPath, 'utf8')
|
||||
expect(body).toContain('Learned evidence')
|
||||
expect(body).toContain('- second')
|
||||
}
|
||||
|
||||
// There must still be only one SKILL.md file on disk.
|
||||
const files = findSkillMdFiles(skillsRoot)
|
||||
expect(files).toHaveLength(1)
|
||||
})
|
||||
})
|
||||
|
||||
function findSkillMdFiles(dir: string): string[] {
|
||||
const { readdirSync, statSync } =
|
||||
require('node:fs') as typeof import('node:fs')
|
||||
const results: string[] = []
|
||||
for (const entry of readdirSync(dir)) {
|
||||
const full = join(dir, entry)
|
||||
if (statSync(full).isDirectory()) {
|
||||
results.push(...findSkillMdFiles(full))
|
||||
} else if (entry === 'SKILL.md' && existsSync(full)) {
|
||||
results.push(full)
|
||||
}
|
||||
}
|
||||
return results
|
||||
}
|
||||
360
src/services/skillLearning/__tests__/skillGapStore.test.ts
Normal file
360
src/services/skillLearning/__tests__/skillGapStore.test.ts
Normal file
@@ -0,0 +1,360 @@
|
||||
import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
|
||||
import {
|
||||
existsSync,
|
||||
mkdtempSync,
|
||||
readFileSync,
|
||||
rmSync,
|
||||
writeFileSync,
|
||||
mkdirSync,
|
||||
} from 'node:fs'
|
||||
import { tmpdir } from 'node:os'
|
||||
import { join } from 'node:path'
|
||||
import {
|
||||
findGapKeyByDraftPath,
|
||||
readSkillGaps,
|
||||
recordDraftHit,
|
||||
recordSkillGap,
|
||||
rejectSkillGap,
|
||||
shouldPromoteToActive,
|
||||
shouldPromoteToDraft,
|
||||
type SkillGapRecord,
|
||||
} from '../skillGapStore.js'
|
||||
import type { SkillLearningProjectContext } from '../types.js'
|
||||
|
||||
let root: string
|
||||
let project: SkillLearningProjectContext
|
||||
|
||||
beforeEach(() => {
|
||||
root = mkdtempSync(join(tmpdir(), 'skill-gap-store-'))
|
||||
project = {
|
||||
projectId: 'global',
|
||||
projectName: 'global',
|
||||
scope: 'global',
|
||||
source: 'global',
|
||||
cwd: root,
|
||||
storageDir: join(root, 'global'),
|
||||
projectRoot: root,
|
||||
}
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
try {
|
||||
rmSync(root, {
|
||||
recursive: true,
|
||||
force: true,
|
||||
maxRetries: 10,
|
||||
retryDelay: 100,
|
||||
})
|
||||
} catch {
|
||||
// Temp cleanup best-effort; Windows may hold transient handles.
|
||||
}
|
||||
})
|
||||
|
||||
function draftsDir(): string {
|
||||
return join(root, '.claude', 'skills', '.drafts')
|
||||
}
|
||||
|
||||
describe('recordSkillGap — P0-1 state machine', () => {
|
||||
test('first occurrence lands in pending and writes no skill file', async () => {
|
||||
const gap = await recordSkillGap({
|
||||
prompt: 'Refactor the data pipeline please',
|
||||
cwd: root,
|
||||
project,
|
||||
rootDir: root,
|
||||
})
|
||||
|
||||
expect(gap.status).toBe('pending')
|
||||
expect(gap.count).toBe(1)
|
||||
expect(gap.draft).toBeUndefined()
|
||||
expect(gap.active).toBeUndefined()
|
||||
expect(existsSync(draftsDir())).toBe(false)
|
||||
})
|
||||
|
||||
test('single Chinese exhortation stays pending — no draft, no active', async () => {
|
||||
const gap = await recordSkillGap({
|
||||
prompt: '以后必须严格检查类型',
|
||||
cwd: root,
|
||||
project,
|
||||
rootDir: root,
|
||||
})
|
||||
|
||||
expect(gap.status).toBe('pending')
|
||||
expect(gap.draft).toBeUndefined()
|
||||
expect(gap.active).toBeUndefined()
|
||||
})
|
||||
|
||||
test('second occurrence promotes to draft but not active', async () => {
|
||||
const prompt = 'explain the build pipeline'
|
||||
await recordSkillGap({ prompt, cwd: root, project, rootDir: root })
|
||||
const second = await recordSkillGap({
|
||||
prompt,
|
||||
cwd: root,
|
||||
project,
|
||||
rootDir: root,
|
||||
})
|
||||
|
||||
expect(second.status).toBe('draft')
|
||||
expect(second.count).toBe(2)
|
||||
expect(second.draft?.type).toBe('draft')
|
||||
expect(second.active).toBeUndefined()
|
||||
expect(existsSync(second.draft!.skillPath)).toBe(true)
|
||||
})
|
||||
|
||||
test('single strong English exhortation ("must never") stays pending', async () => {
|
||||
const gap = await recordSkillGap({
|
||||
prompt: 'You must never commit secrets to git',
|
||||
cwd: root,
|
||||
project,
|
||||
rootDir: root,
|
||||
})
|
||||
|
||||
expect(gap.status).toBe('pending')
|
||||
expect(gap.count).toBe(1)
|
||||
expect(gap.draft).toBeUndefined()
|
||||
expect(gap.active).toBeUndefined()
|
||||
})
|
||||
|
||||
test('reaching count >= 4 promotes an existing draft to active', async () => {
|
||||
const prompt = 'clean up abandoned feature flags'
|
||||
for (let i = 0; i < 3; i++) {
|
||||
await recordSkillGap({ prompt, cwd: root, project, rootDir: root })
|
||||
}
|
||||
const fourth = await recordSkillGap({
|
||||
prompt,
|
||||
cwd: root,
|
||||
project,
|
||||
rootDir: root,
|
||||
})
|
||||
|
||||
expect(fourth.status).toBe('active')
|
||||
expect(fourth.count).toBe(4)
|
||||
expect(fourth.draft).toBeDefined()
|
||||
expect(fourth.active?.type).toBe('active')
|
||||
expect(existsSync(fourth.active!.skillPath)).toBe(true)
|
||||
})
|
||||
|
||||
test('rejected gaps do not regenerate artefacts on subsequent calls', async () => {
|
||||
const prompt = 'please format the README differently'
|
||||
await recordSkillGap({ prompt, cwd: root, project, rootDir: root })
|
||||
const promoted = await recordSkillGap({
|
||||
prompt,
|
||||
cwd: root,
|
||||
project,
|
||||
rootDir: root,
|
||||
})
|
||||
expect(promoted.status).toBe('draft')
|
||||
|
||||
await rejectSkillGap(promoted.key, project, root)
|
||||
const afterReject = await recordSkillGap({
|
||||
prompt,
|
||||
cwd: root,
|
||||
project,
|
||||
rootDir: root,
|
||||
})
|
||||
|
||||
expect(afterReject.status).toBe('rejected')
|
||||
expect(afterReject.count).toBe(3)
|
||||
expect(afterReject.active).toBeUndefined()
|
||||
})
|
||||
})
|
||||
|
||||
describe('recordDraftHit — draft hits escalation (P1-4 contract)', () => {
|
||||
test('draftHits reaching 2 escalates a draft to active', async () => {
|
||||
const prompt = 'improve error handling in loader.ts'
|
||||
await recordSkillGap({ prompt, cwd: root, project, rootDir: root })
|
||||
const drafted = await recordSkillGap({
|
||||
prompt,
|
||||
cwd: root,
|
||||
project,
|
||||
rootDir: root,
|
||||
})
|
||||
expect(drafted.status).toBe('draft')
|
||||
|
||||
// Distinct session IDs — recordDraftHit enforces one hit per session so
|
||||
// a single session can't flip the draftHits>=2 active gate alone
|
||||
await recordDraftHit(drafted.key, project, root, 'session-a')
|
||||
const afterSecondHit = await recordDraftHit(
|
||||
drafted.key,
|
||||
project,
|
||||
root,
|
||||
'session-b',
|
||||
)
|
||||
|
||||
expect(afterSecondHit?.draftHits).toBe(2)
|
||||
expect(afterSecondHit?.status).toBe('active')
|
||||
expect(afterSecondHit?.active?.type).toBe('active')
|
||||
})
|
||||
|
||||
test('first draft hit does not promote to active', async () => {
|
||||
const prompt = 'add missing null checks in handler'
|
||||
await recordSkillGap({ prompt, cwd: root, project, rootDir: root })
|
||||
const drafted = await recordSkillGap({
|
||||
prompt,
|
||||
cwd: root,
|
||||
project,
|
||||
rootDir: root,
|
||||
})
|
||||
|
||||
const afterOneHit = await recordDraftHit(drafted.key, project, root)
|
||||
|
||||
expect(afterOneHit?.draftHits).toBe(1)
|
||||
expect(afterOneHit?.status).toBe('draft')
|
||||
expect(afterOneHit?.active).toBeUndefined()
|
||||
})
|
||||
|
||||
test('findGapKeyByDraftPath resolves the correct gap for an existing draft', async () => {
|
||||
const prompt = 'restructure the module boundaries'
|
||||
await recordSkillGap({ prompt, cwd: root, project, rootDir: root })
|
||||
const drafted = await recordSkillGap({
|
||||
prompt,
|
||||
cwd: root,
|
||||
project,
|
||||
rootDir: root,
|
||||
})
|
||||
expect(drafted.draft?.skillPath).toBeTruthy()
|
||||
|
||||
const foundKey = await findGapKeyByDraftPath(
|
||||
drafted.draft!.skillPath,
|
||||
project,
|
||||
root,
|
||||
)
|
||||
|
||||
expect(foundKey).toBe(drafted.key)
|
||||
})
|
||||
|
||||
test('findGapKeyByDraftPath returns undefined for unknown paths', async () => {
|
||||
const result = await findGapKeyByDraftPath(
|
||||
'/nowhere/.claude/skills/.drafts/mystery/SKILL.md',
|
||||
project,
|
||||
root,
|
||||
)
|
||||
expect(result).toBeUndefined()
|
||||
})
|
||||
|
||||
test('recordDraftHit is a no-op on pending gaps', async () => {
|
||||
const gap = await recordSkillGap({
|
||||
prompt: 'investigate the mysterious cache bug',
|
||||
cwd: root,
|
||||
project,
|
||||
rootDir: root,
|
||||
})
|
||||
|
||||
const updated = await recordDraftHit(gap.key, project, root)
|
||||
|
||||
expect(updated?.status).toBe('pending')
|
||||
expect(updated?.draftHits).toBe(0)
|
||||
})
|
||||
})
|
||||
|
||||
describe('shouldPromoteToDraft / shouldPromoteToActive', () => {
|
||||
test('shouldPromoteToDraft requires count >= 2 (strong signal no longer bypasses)', () => {
|
||||
const base: SkillGapRecord = {
|
||||
key: 'k',
|
||||
prompt: 'refactor this',
|
||||
count: 1,
|
||||
draftHits: 0,
|
||||
draftHitSessions: [],
|
||||
status: 'pending',
|
||||
sessionId: 's',
|
||||
cwd: root,
|
||||
projectId: 'global',
|
||||
projectName: 'global',
|
||||
recommendations: [],
|
||||
createdAt: new Date().toISOString(),
|
||||
updatedAt: new Date().toISOString(),
|
||||
}
|
||||
|
||||
expect(shouldPromoteToDraft(base)).toBe(false)
|
||||
expect(shouldPromoteToDraft({ ...base, count: 2 })).toBe(true)
|
||||
// Single strong-signal prompt no longer promotes — must also repeat.
|
||||
expect(
|
||||
shouldPromoteToDraft({ ...base, prompt: '必须使用 testing-library' }),
|
||||
).toBe(false)
|
||||
})
|
||||
|
||||
test('shouldPromoteToActive requires a draft plus threshold', () => {
|
||||
const withDraft: SkillGapRecord = {
|
||||
key: 'k',
|
||||
prompt: 'refactor',
|
||||
count: 3,
|
||||
draftHits: 0,
|
||||
draftHitSessions: [],
|
||||
status: 'draft',
|
||||
sessionId: 's',
|
||||
cwd: root,
|
||||
projectId: 'global',
|
||||
projectName: 'global',
|
||||
recommendations: [],
|
||||
createdAt: new Date().toISOString(),
|
||||
updatedAt: new Date().toISOString(),
|
||||
draft: { type: 'draft', name: 'x', skillPath: '/tmp/x' },
|
||||
}
|
||||
|
||||
expect(shouldPromoteToActive(withDraft)).toBe(false)
|
||||
expect(shouldPromoteToActive({ ...withDraft, count: 4 })).toBe(true)
|
||||
expect(shouldPromoteToActive({ ...withDraft, draftHits: 2 })).toBe(true)
|
||||
expect(shouldPromoteToActive({ ...withDraft, draft: undefined })).toBe(
|
||||
false,
|
||||
)
|
||||
})
|
||||
})
|
||||
|
||||
describe('migrateLegacyGapState', () => {
|
||||
test('resets legacy status=draft count=1 (no file) to pending', async () => {
|
||||
const gapPath = join(root, 'global', 'skill-gaps.json')
|
||||
mkdirSync(join(root, 'global'), { recursive: true })
|
||||
const legacy = {
|
||||
version: 1,
|
||||
gaps: {
|
||||
'legacy-key': {
|
||||
key: 'legacy-key',
|
||||
prompt: 'old gap',
|
||||
count: 1,
|
||||
status: 'draft',
|
||||
sessionId: 's1',
|
||||
cwd: root,
|
||||
projectId: 'global',
|
||||
projectName: 'global',
|
||||
recommendations: [],
|
||||
createdAt: '2025-01-01T00:00:00.000Z',
|
||||
updatedAt: '2025-01-01T00:00:00.000Z',
|
||||
},
|
||||
},
|
||||
}
|
||||
writeFileSync(gapPath, JSON.stringify(legacy), 'utf8')
|
||||
|
||||
const gaps = await readSkillGaps(project, root)
|
||||
const migrated = gaps[0]
|
||||
|
||||
expect(migrated?.status).toBe('pending')
|
||||
expect(migrated?.draftHits).toBe(0)
|
||||
})
|
||||
|
||||
test('downgrades active without skill file to draft if draft exists', async () => {
|
||||
const gapPath = join(root, 'global', 'skill-gaps.json')
|
||||
mkdirSync(join(root, 'global'), { recursive: true })
|
||||
const legacy = {
|
||||
version: 1,
|
||||
gaps: {
|
||||
'legacy-key': {
|
||||
key: 'legacy-key',
|
||||
prompt: 'old',
|
||||
count: 3,
|
||||
status: 'active',
|
||||
sessionId: 's1',
|
||||
cwd: root,
|
||||
projectId: 'global',
|
||||
projectName: 'global',
|
||||
recommendations: [],
|
||||
createdAt: '2025-01-01T00:00:00.000Z',
|
||||
updatedAt: '2025-01-01T00:00:00.000Z',
|
||||
draft: { type: 'draft', name: 'x', skillPath: '/tmp/x' },
|
||||
},
|
||||
},
|
||||
}
|
||||
writeFileSync(gapPath, JSON.stringify(legacy), 'utf8')
|
||||
|
||||
const gaps = await readSkillGaps(project, root)
|
||||
expect(gaps[0]?.status).toBe('draft')
|
||||
})
|
||||
})
|
||||
56
src/services/skillLearning/__tests__/skillGenerator.test.ts
Normal file
56
src/services/skillLearning/__tests__/skillGenerator.test.ts
Normal file
@@ -0,0 +1,56 @@
|
||||
import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
|
||||
import { existsSync, mkdtempSync, readFileSync, rmSync } from 'node:fs'
|
||||
import { tmpdir } from 'node:os'
|
||||
import { join } from 'node:path'
|
||||
import { createInstinct } from '../instinctParser.js'
|
||||
import { generateSkillDraft, writeLearnedSkill } from '../skillGenerator.js'
|
||||
|
||||
let cwd: string
|
||||
|
||||
beforeEach(() => {
|
||||
cwd = mkdtempSync(join(tmpdir(), 'skill-learning-generator-'))
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
rmSync(cwd, { recursive: true, force: true })
|
||||
})
|
||||
|
||||
describe('skillGenerator', () => {
|
||||
test('generates a valid SKILL.md draft from instincts', () => {
|
||||
const instinct = createInstinct({
|
||||
trigger: 'when writing React tests',
|
||||
action: 'use testing-library and avoid implementation mocks',
|
||||
confidence: 0.85,
|
||||
domain: 'testing',
|
||||
source: 'session-observation',
|
||||
scope: 'project',
|
||||
evidence: ['user correction'],
|
||||
})
|
||||
|
||||
const draft = generateSkillDraft([instinct], { cwd })
|
||||
|
||||
expect(draft.name).toContain('testing')
|
||||
expect(draft.content).toContain('name:')
|
||||
expect(draft.content).toContain('description:')
|
||||
expect(draft.content).toContain('## Trigger')
|
||||
expect(draft.content).toContain('## Evidence')
|
||||
})
|
||||
|
||||
test('writes learned skills to project scope', async () => {
|
||||
const instinct = createInstinct({
|
||||
trigger: 'when writing React tests',
|
||||
action: 'use testing-library',
|
||||
confidence: 0.85,
|
||||
domain: 'testing',
|
||||
source: 'session-observation',
|
||||
scope: 'project',
|
||||
evidence: ['user correction'],
|
||||
})
|
||||
const draft = generateSkillDraft([instinct], { cwd })
|
||||
|
||||
const file = await writeLearnedSkill(draft)
|
||||
|
||||
expect(existsSync(file)).toBe(true)
|
||||
expect(readFileSync(file, 'utf8')).toContain('use testing-library')
|
||||
})
|
||||
})
|
||||
154
src/services/skillLearning/__tests__/skillLearningSmoke.test.ts
Normal file
154
src/services/skillLearning/__tests__/skillLearningSmoke.test.ts
Normal file
@@ -0,0 +1,154 @@
|
||||
import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
|
||||
import {
|
||||
existsSync,
|
||||
mkdtempSync,
|
||||
readFileSync,
|
||||
rmSync,
|
||||
writeFileSync,
|
||||
} from 'node:fs'
|
||||
import { tmpdir } from 'node:os'
|
||||
import { join } from 'node:path'
|
||||
import { call } from '../../../commands/skill-learning/skill-learning.js'
|
||||
import { clearCommandsCache } from '../../../commands.js'
|
||||
import { getSkillIndex, searchSkills } from '../../skillSearch/localSearch.js'
|
||||
import {
|
||||
resetSkillLearningConfig,
|
||||
setSkillLearningConfigForTest,
|
||||
} from '../config.js'
|
||||
import { loadInstincts, readObservations } from '../index.js'
|
||||
|
||||
let root: string
|
||||
let previousCwd: string
|
||||
const originalEnv = { ...process.env }
|
||||
|
||||
beforeEach(() => {
|
||||
root = mkdtempSync(join(tmpdir(), 'skill-learning-smoke-'))
|
||||
previousCwd = process.cwd()
|
||||
process.chdir(root)
|
||||
process.env = { ...originalEnv }
|
||||
process.env.CLAUDE_SKILL_LEARNING_HOME = join(root, 'learning-home')
|
||||
process.env.CLAUDE_CONFIG_DIR = join(root, 'config')
|
||||
process.env.SKILL_LEARNING_ENABLED = '1'
|
||||
process.env.ANTHROPIC_API_KEY = 'test-key'
|
||||
process.env.NODE_ENV = 'test'
|
||||
setSkillLearningConfigForTest({ minConfidence: 0.3, minClusterSize: 1 })
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
process.chdir(previousCwd)
|
||||
process.env = { ...originalEnv }
|
||||
resetSkillLearningConfig()
|
||||
clearCommandsCache()
|
||||
try {
|
||||
rmSync(root, {
|
||||
recursive: true,
|
||||
force: true,
|
||||
maxRetries: 10,
|
||||
retryDelay: 100,
|
||||
})
|
||||
} catch {
|
||||
// Windows can keep a transient handle open after dynamic command loading.
|
||||
// Temp cleanup is best-effort; failing here would mask the smoke result.
|
||||
}
|
||||
})
|
||||
|
||||
describe('skillLearning smoke', () => {
|
||||
test('ingests corrections, evolves a learned skill, and skill search finds it', async () => {
|
||||
const transcript = join(root, 'session.jsonl')
|
||||
writeFileSync(transcript, buildTranscript(), 'utf8')
|
||||
|
||||
// Pass --min-session-length=0 so the 9-observation test transcript is not
|
||||
// skipped by the ECC-parity gate (default threshold: 10 observations).
|
||||
const ingestResult = await call(
|
||||
`ingest ${transcript} --min-session-length=0`,
|
||||
{} as any,
|
||||
)
|
||||
expect(ingestResult.type).toBe('text')
|
||||
if (ingestResult.type === 'text') {
|
||||
expect(ingestResult.value).toContain('Ingested 9 observations')
|
||||
}
|
||||
|
||||
const options = {
|
||||
rootDir: process.env.CLAUDE_SKILL_LEARNING_HOME,
|
||||
project: {
|
||||
projectId: 'global',
|
||||
projectName: 'global',
|
||||
cwd: root,
|
||||
scope: 'global' as const,
|
||||
source: 'global' as const,
|
||||
storageDir: join(process.env.CLAUDE_SKILL_LEARNING_HOME!, 'global'),
|
||||
},
|
||||
}
|
||||
const observations = await readObservations(options)
|
||||
expect(observations).toHaveLength(9)
|
||||
|
||||
const instincts = await loadInstincts(options)
|
||||
const testingInstinct = instincts.find(i => i.domain === 'testing')
|
||||
expect(testingInstinct?.confidence).toBe(0.8)
|
||||
expect(testingInstinct?.status).toBe('active')
|
||||
|
||||
const evolveResult = await call('evolve --generate', {} as any)
|
||||
expect(evolveResult.type).toBe('text')
|
||||
if (evolveResult.type === 'text') {
|
||||
// Smoke transcript (9 obs, single fabricated instinct per domain) may
|
||||
// produce 1 or 2 candidates depending on sessionObserver's clustering.
|
||||
// Post-H15 we accept either — the smoke proves end-to-end wiring, not
|
||||
// exact cluster math.
|
||||
expect(evolveResult.value).toMatch(/Generated [12] learned skill\(s\)/)
|
||||
}
|
||||
|
||||
const skillName = 'testing-choosing-between-mock-testing-library'
|
||||
const skillFile = join(root, '.claude', 'skills', skillName, 'SKILL.md')
|
||||
expect(existsSync(skillFile)).toBe(true)
|
||||
expect(readFileSync(skillFile, 'utf8')).toContain('Prefer testing-library')
|
||||
|
||||
clearCommandsCache()
|
||||
const index = await getSkillIndex(root)
|
||||
expect(index.some(entry => entry.name === skillName)).toBe(true)
|
||||
|
||||
const results = searchSkills(
|
||||
'write tests with testing library instead of mock',
|
||||
index,
|
||||
5,
|
||||
)
|
||||
expect(results[0]?.name).toBe(skillName)
|
||||
})
|
||||
})
|
||||
|
||||
function buildTranscript(): string {
|
||||
const entries = [
|
||||
user('不要 mock,用 testing-library', 0),
|
||||
toolUse('Grep', { pattern: 'renderHook' }, 1),
|
||||
toolUse('Read', { file_path: 'src/example.test.tsx' }, 2),
|
||||
toolUse('Edit', { file_path: 'src/example.test.tsx' }, 3),
|
||||
user('不要 mock,用 testing-library', 4),
|
||||
toolUse('Grep', { pattern: 'mock' }, 5),
|
||||
toolUse('Read', { file_path: 'src/example.test.tsx' }, 6),
|
||||
toolUse('Edit', { file_path: 'src/example.test.tsx' }, 7),
|
||||
user('不要 mock,用 testing-library', 8),
|
||||
]
|
||||
return `${entries.map(entry => JSON.stringify(entry)).join('\n')}\n`
|
||||
}
|
||||
|
||||
function user(content: string, second: number) {
|
||||
return {
|
||||
type: 'user',
|
||||
sessionId: 'smoke-session',
|
||||
cwd: root,
|
||||
timestamp: `2026-04-16T00:00:0${second}.000Z`,
|
||||
message: { role: 'user', content },
|
||||
}
|
||||
}
|
||||
|
||||
function toolUse(name: string, input: Record<string, unknown>, second: number) {
|
||||
return {
|
||||
type: 'assistant',
|
||||
sessionId: 'smoke-session',
|
||||
cwd: root,
|
||||
timestamp: `2026-04-16T00:00:0${second}.000Z`,
|
||||
message: {
|
||||
role: 'assistant',
|
||||
content: [{ type: 'tool_use', name, input }],
|
||||
},
|
||||
}
|
||||
}
|
||||
161
src/services/skillLearning/__tests__/skillLifecycle.test.ts
Normal file
161
src/services/skillLearning/__tests__/skillLifecycle.test.ts
Normal file
@@ -0,0 +1,161 @@
|
||||
import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
|
||||
import {
|
||||
existsSync,
|
||||
mkdtempSync,
|
||||
readFileSync,
|
||||
rmSync,
|
||||
writeFileSync,
|
||||
} from 'node:fs'
|
||||
import { mkdir } from 'node:fs/promises'
|
||||
import { tmpdir } from 'node:os'
|
||||
import { join } from 'node:path'
|
||||
import type { LearnedSkillDraft } from '../types.js'
|
||||
import {
|
||||
applySkillLifecycleDecision,
|
||||
compareExistingSkills,
|
||||
decideSkillLifecycle,
|
||||
loadExistingSkills,
|
||||
} from '../skillLifecycle.js'
|
||||
|
||||
let root: string
|
||||
|
||||
beforeEach(() => {
|
||||
root = mkdtempSync(join(tmpdir(), 'skill-learning-lifecycle-'))
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
rmSync(root, { recursive: true, force: true })
|
||||
})
|
||||
|
||||
describe('skillLifecycle', () => {
|
||||
test('detects overlapping existing skills', async () => {
|
||||
await writeSkill('react-testing', 'Use testing-library for React tests')
|
||||
const draft = draftSkill(
|
||||
'react-testing-updated',
|
||||
'Use testing-library for React tests and avoid implementation mocks',
|
||||
)
|
||||
|
||||
const matches = await compareExistingSkills(draft, [root])
|
||||
|
||||
expect(matches[0]?.name).toBe('react-testing')
|
||||
})
|
||||
|
||||
test('replace archives old skill so it leaves active index', async () => {
|
||||
await writeSkill(
|
||||
'react-testing',
|
||||
'Use testing-library for React tests and avoid implementation mocks',
|
||||
)
|
||||
const draft = draftSkill(
|
||||
'react-testing-updated',
|
||||
'Use testing-library for React tests and avoid implementation mocks',
|
||||
)
|
||||
const matches = await compareExistingSkills(draft, [root])
|
||||
const decision = decideSkillLifecycle(draft, matches)
|
||||
|
||||
expect(decision.type).toBe('replace')
|
||||
const result = await applySkillLifecycleDecision(decision)
|
||||
|
||||
expect(result.activePath).toBeDefined()
|
||||
expect(result.archivedPath).toBeDefined()
|
||||
expect(existsSync(join(root, 'react-testing'))).toBe(false)
|
||||
expect(
|
||||
existsSync(join(result.archivedPath!, 'replacement-manifest.json')),
|
||||
).toBe(true)
|
||||
expect(
|
||||
(await loadExistingSkills([root])).map(skill => skill.name),
|
||||
).not.toContain('react-testing')
|
||||
})
|
||||
|
||||
test('create writes new skill when no overlap exists', async () => {
|
||||
const draft = draftSkill('new-testing', 'A unique learned testing workflow')
|
||||
const decision = decideSkillLifecycle(draft, [])
|
||||
const result = await applySkillLifecycleDecision(decision)
|
||||
|
||||
expect(result.activePath).toBeDefined()
|
||||
expect(readFileSync(result.activePath!, 'utf8')).toContain('new-testing')
|
||||
})
|
||||
|
||||
test('merge skips user-authored skill without origin field and logs warning', async () => {
|
||||
const body =
|
||||
'Use testing-library for React tests and avoid implementation mocks'
|
||||
await writeSkill('react-testing', body, null)
|
||||
// Build a draft that overlaps with the existing skill at the merge threshold
|
||||
const draft: LearnedSkillDraft = {
|
||||
name: 'react-testing',
|
||||
description: body,
|
||||
scope: 'project',
|
||||
sourceInstinctIds: ['i1'],
|
||||
confidence: 0.6,
|
||||
content: `---\nname: react-testing\ndescription: ${JSON.stringify(body)}\n---\n\n# React Testing\n\n${body}\n`,
|
||||
outputPath: join(root, 'react-testing-patch'),
|
||||
}
|
||||
const matches = await compareExistingSkills(draft, [root])
|
||||
// Force a merge decision by lowering confidence below the replace threshold
|
||||
const decision = decideSkillLifecycle(draft, matches)
|
||||
expect(decision.type).toBe('merge')
|
||||
|
||||
const stderrChunks: string[] = []
|
||||
const originalWrite = process.stderr.write.bind(process.stderr)
|
||||
process.stderr.write = (chunk: unknown) => {
|
||||
stderrChunks.push(String(chunk))
|
||||
return true
|
||||
}
|
||||
try {
|
||||
const result = await applySkillLifecycleDecision(decision)
|
||||
expect(result.activePath).toBeUndefined()
|
||||
expect(
|
||||
stderrChunks.some(line =>
|
||||
line.includes('[skill-learning] skip user-authored skill'),
|
||||
),
|
||||
).toBe(true)
|
||||
} finally {
|
||||
process.stderr.write = originalWrite
|
||||
}
|
||||
})
|
||||
|
||||
test('replace proceeds normally for skill-learning-generated skill', async () => {
|
||||
await writeSkill(
|
||||
'generated-testing',
|
||||
'Use testing-library for React tests and avoid implementation mocks',
|
||||
'skill-learning',
|
||||
)
|
||||
const draft = draftSkill(
|
||||
'generated-testing-updated',
|
||||
'Use testing-library for React tests and avoid implementation mocks',
|
||||
)
|
||||
const matches = await compareExistingSkills(draft, [root])
|
||||
const decision = decideSkillLifecycle(draft, matches)
|
||||
|
||||
expect(decision.type).toBe('replace')
|
||||
const result = await applySkillLifecycleDecision(decision)
|
||||
|
||||
expect(result.activePath).toBeDefined()
|
||||
expect(result.archivedPath).toBeDefined()
|
||||
})
|
||||
})
|
||||
|
||||
async function writeSkill(
|
||||
name: string,
|
||||
body: string,
|
||||
origin: string | null = 'skill-learning',
|
||||
): Promise<void> {
|
||||
const dir = join(root, name)
|
||||
await mkdir(dir, { recursive: true })
|
||||
const originLine = origin !== null ? `origin: ${origin}\n` : ''
|
||||
writeFileSync(
|
||||
join(dir, 'SKILL.md'),
|
||||
`---\nname: ${name}\ndescription: ${JSON.stringify(body)}\n${originLine}---\n\n# ${name}\n\n${body}\n`,
|
||||
)
|
||||
}
|
||||
|
||||
function draftSkill(name: string, text: string): LearnedSkillDraft {
|
||||
return {
|
||||
name,
|
||||
description: text,
|
||||
scope: 'project',
|
||||
sourceInstinctIds: ['i1'],
|
||||
confidence: 0.9,
|
||||
content: `---\nname: ${name}\ndescription: ${JSON.stringify(text)}\n---\n\n# ${name}\n\n${text}\n`,
|
||||
outputPath: join(root, name),
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,372 @@
|
||||
/**
|
||||
* Unit tests for H5 (LLM call throttle), H6 (message watermark dedup),
|
||||
* and H7 (circuit breaker) improvements.
|
||||
*/
|
||||
import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
|
||||
import { mkdtempSync, rmSync } from 'node:fs'
|
||||
import { tmpdir } from 'node:os'
|
||||
import { join } from 'node:path'
|
||||
|
||||
import {
|
||||
resetSkillLearningConfig,
|
||||
setSkillLearningConfigForTest,
|
||||
} from '../config.js'
|
||||
import { resetCircuitBreaker } from '../llmObserverBackend.js'
|
||||
import {
|
||||
resetRuntimeLLMBookkeeping,
|
||||
resetRuntimeObserverForTest,
|
||||
runSkillLearningPostSampling,
|
||||
} from '../runtimeObserver.js'
|
||||
import type { REPLHookContext } from '../../../utils/hooks/postSamplingHooks.js'
|
||||
import {
|
||||
setActiveObserverBackend,
|
||||
getActiveObserverBackend,
|
||||
registerObserverBackend,
|
||||
type ObserverBackend,
|
||||
} from '../observerBackend.js'
|
||||
import type { StoredSkillObservation } from '../observationStore.js'
|
||||
|
||||
let root: string
|
||||
let previousCwd: string
|
||||
const originalEnv = { ...process.env }
|
||||
const originalBackendName = getActiveObserverBackend().name
|
||||
|
||||
function makeCtx(
|
||||
messages: Array<{ uuid: string; content: string }>,
|
||||
): REPLHookContext {
|
||||
return {
|
||||
querySource: 'repl_main_thread',
|
||||
messages: messages.map(({ uuid, content }) => ({
|
||||
type: 'user' as const,
|
||||
uuid: uuid as any,
|
||||
message: { role: 'user' as const, content },
|
||||
})),
|
||||
systemPrompt: [] as any,
|
||||
userContext: {},
|
||||
systemContext: {},
|
||||
toolUseContext: { agentId: undefined } as any,
|
||||
}
|
||||
}
|
||||
|
||||
function make5Msgs(prefix: string): Array<{ uuid: string; content: string }> {
|
||||
return Array.from({ length: 5 }, (_, i) => ({
|
||||
uuid: `${prefix}-${i}`,
|
||||
content: '不要 mock,用 testing-library',
|
||||
}))
|
||||
}
|
||||
|
||||
function makeObs(count: number): StoredSkillObservation[] {
|
||||
return Array.from({ length: count }, (_, i) => ({
|
||||
id: `o${i}`,
|
||||
timestamp: new Date().toISOString(),
|
||||
event: 'user_message' as const,
|
||||
sessionId: 's1',
|
||||
projectId: 'p1',
|
||||
projectName: 'project',
|
||||
cwd: '/tmp',
|
||||
messageText: 'test message',
|
||||
}))
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
root = mkdtempSync(join(tmpdir(), 'skill-throttle-test-'))
|
||||
previousCwd = process.cwd()
|
||||
process.chdir(root)
|
||||
process.env = { ...originalEnv }
|
||||
process.env.CLAUDE_SKILL_LEARNING_HOME = join(root, 'learning-home')
|
||||
process.env.CLAUDE_CONFIG_DIR = join(root, 'config')
|
||||
process.env.SKILL_LEARNING_ENABLED = '1'
|
||||
process.env.NODE_ENV = 'test'
|
||||
resetRuntimeObserverForTest()
|
||||
resetCircuitBreaker()
|
||||
setActiveObserverBackend(originalBackendName)
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
process.chdir(previousCwd)
|
||||
process.env = { ...originalEnv }
|
||||
resetSkillLearningConfig()
|
||||
rmSync(root, { recursive: true, force: true })
|
||||
resetRuntimeObserverForTest()
|
||||
resetCircuitBreaker()
|
||||
setActiveObserverBackend(originalBackendName)
|
||||
})
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// H5: LLM throttle — minimum observation count gate
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('H5: LLM call throttle', () => {
|
||||
test('fewer than 5 observations routes to heuristic — LLM backend not called', async () => {
|
||||
let llmCallCount = 0
|
||||
const trackingBackend: ObserverBackend = {
|
||||
name: 'tracking-under5',
|
||||
analyze() {
|
||||
llmCallCount++
|
||||
return []
|
||||
},
|
||||
}
|
||||
registerObserverBackend(trackingBackend)
|
||||
setActiveObserverBackend('tracking-under5')
|
||||
|
||||
// 3 messages → 3 observations, below the threshold of 5.
|
||||
await runSkillLearningPostSampling(
|
||||
makeCtx([
|
||||
{ uuid: 'u5a', content: '不要 mock,用 testing-library' },
|
||||
{ uuid: 'u5b', content: '不要 mock,用 testing-library' },
|
||||
{ uuid: 'u5c', content: '不要 mock,用 testing-library' },
|
||||
]),
|
||||
)
|
||||
|
||||
expect(llmCallCount).toBe(0)
|
||||
})
|
||||
|
||||
test('session cap: more calls than cap reaches heuristic fallback', async () => {
|
||||
// Cap at 1 call, cooldown 0ms.
|
||||
setSkillLearningConfigForTest({
|
||||
llm: { maxCallsPerSession: 1, cooldownMs: 0 },
|
||||
})
|
||||
|
||||
let llmCallCount = 0
|
||||
const trackingBackend: ObserverBackend = {
|
||||
name: 'tracking-cap',
|
||||
analyze() {
|
||||
llmCallCount++
|
||||
return []
|
||||
},
|
||||
}
|
||||
registerObserverBackend(trackingBackend)
|
||||
setActiveObserverBackend('tracking-cap')
|
||||
|
||||
// First call with 5 messages — reaches LLM.
|
||||
await runSkillLearningPostSampling(makeCtx(make5Msgs('cap1')))
|
||||
expect(llmCallCount).toBe(1)
|
||||
|
||||
// Second call with 5 different messages — cap hit, must NOT reach LLM.
|
||||
await runSkillLearningPostSampling(makeCtx(make5Msgs('cap2')))
|
||||
expect(llmCallCount).toBe(1)
|
||||
})
|
||||
|
||||
test('cooldown gate: second call within cooldown window skips LLM', async () => {
|
||||
// Very long cooldown — second call is always within window.
|
||||
setSkillLearningConfigForTest({
|
||||
llm: { cooldownMs: 999_999_000, maxCallsPerSession: 100 },
|
||||
})
|
||||
|
||||
let llmCallCount = 0
|
||||
const trackingBackend: ObserverBackend = {
|
||||
name: 'tracking-cooldown',
|
||||
analyze() {
|
||||
llmCallCount++
|
||||
return []
|
||||
},
|
||||
}
|
||||
registerObserverBackend(trackingBackend)
|
||||
setActiveObserverBackend('tracking-cooldown')
|
||||
|
||||
await runSkillLearningPostSampling(makeCtx(make5Msgs('cd1')))
|
||||
expect(llmCallCount).toBe(1)
|
||||
|
||||
// Second call — still within 999999 second cooldown.
|
||||
await runSkillLearningPostSampling(makeCtx(make5Msgs('cd2')))
|
||||
expect(llmCallCount).toBe(1)
|
||||
})
|
||||
|
||||
test('resetRuntimeLLMBookkeeping resets session counter and timestamps', async () => {
|
||||
setSkillLearningConfigForTest({
|
||||
llm: { maxCallsPerSession: 1, cooldownMs: 0 },
|
||||
})
|
||||
|
||||
let llmCallCount = 0
|
||||
const trackingBackend: ObserverBackend = {
|
||||
name: 'tracking-reset',
|
||||
analyze() {
|
||||
llmCallCount++
|
||||
return []
|
||||
},
|
||||
}
|
||||
registerObserverBackend(trackingBackend)
|
||||
setActiveObserverBackend('tracking-reset')
|
||||
|
||||
// First call reaches LLM; cap = 1, so second call is blocked.
|
||||
await runSkillLearningPostSampling(makeCtx(make5Msgs('rr1')))
|
||||
await runSkillLearningPostSampling(makeCtx(make5Msgs('rr2')))
|
||||
expect(llmCallCount).toBe(1)
|
||||
|
||||
// After reset the counter clears — next call reaches LLM again.
|
||||
resetRuntimeLLMBookkeeping()
|
||||
await runSkillLearningPostSampling(makeCtx(make5Msgs('rr3')))
|
||||
expect(llmCallCount).toBe(2)
|
||||
})
|
||||
})
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// H6: Message watermark dedup
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('H6: message watermark dedup', () => {
|
||||
test('same message uuids are not re-processed in a subsequent call', async () => {
|
||||
// Use a backend that counts observations to detect dedup.
|
||||
let totalObservations = 0
|
||||
const countingBackend: ObserverBackend = {
|
||||
name: 'counting-dedup',
|
||||
analyze(observations) {
|
||||
totalObservations += observations.length
|
||||
return []
|
||||
},
|
||||
}
|
||||
registerObserverBackend(countingBackend)
|
||||
setActiveObserverBackend('counting-dedup')
|
||||
setSkillLearningConfigForTest({
|
||||
llm: { cooldownMs: 0, maxCallsPerSession: 100 },
|
||||
})
|
||||
|
||||
const messages = make5Msgs('ded')
|
||||
|
||||
// First call: 5 new message observations.
|
||||
await runSkillLearningPostSampling(makeCtx(messages))
|
||||
const afterFirst = totalObservations
|
||||
|
||||
// Second call with SAME messages: all uuids already seen → 0 new
|
||||
// observations from messages. The early `if (observations.length === 0) return`
|
||||
// fires and the backend is never called.
|
||||
await runSkillLearningPostSampling(makeCtx(messages))
|
||||
const afterSecond = totalObservations
|
||||
|
||||
expect(afterSecond).toBe(afterFirst)
|
||||
})
|
||||
|
||||
test('different message uuids are always processed', async () => {
|
||||
let totalObservations = 0
|
||||
const countingBackend: ObserverBackend = {
|
||||
name: 'counting-dedup-new',
|
||||
analyze(observations) {
|
||||
totalObservations += observations.length
|
||||
return []
|
||||
},
|
||||
}
|
||||
registerObserverBackend(countingBackend)
|
||||
setActiveObserverBackend('counting-dedup-new')
|
||||
setSkillLearningConfigForTest({
|
||||
llm: { cooldownMs: 0, maxCallsPerSession: 100 },
|
||||
})
|
||||
|
||||
await runSkillLearningPostSampling(makeCtx(make5Msgs('new1')))
|
||||
const afterFirst = totalObservations
|
||||
|
||||
// Different uuids — all 5 new messages pass dedup.
|
||||
await runSkillLearningPostSampling(makeCtx(make5Msgs('new2')))
|
||||
expect(totalObservations).toBeGreaterThan(afterFirst)
|
||||
})
|
||||
|
||||
test('resetRuntimeLLMBookkeeping clears dedup set — same uuids reprocessed', async () => {
|
||||
let totalObservations = 0
|
||||
const countingBackend: ObserverBackend = {
|
||||
name: 'counting-dedup-clr',
|
||||
analyze(observations) {
|
||||
totalObservations += observations.length
|
||||
return []
|
||||
},
|
||||
}
|
||||
registerObserverBackend(countingBackend)
|
||||
setActiveObserverBackend('counting-dedup-clr')
|
||||
setSkillLearningConfigForTest({
|
||||
llm: { cooldownMs: 0, maxCallsPerSession: 100 },
|
||||
})
|
||||
|
||||
const messages = make5Msgs('clr')
|
||||
await runSkillLearningPostSampling(makeCtx(messages))
|
||||
const afterFirst = totalObservations
|
||||
|
||||
// After reset, dedup set is cleared — same messages are reprocessed.
|
||||
resetRuntimeLLMBookkeeping()
|
||||
await runSkillLearningPostSampling(makeCtx(messages))
|
||||
expect(totalObservations).toBeGreaterThan(afterFirst)
|
||||
})
|
||||
})
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// H7: Circuit breaker (tests the llmObserverBackend state machine directly)
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('H7: circuit breaker', () => {
|
||||
test('circuit opens after failure threshold and subsequent calls return heuristic result without hitting queryHaiku', async () => {
|
||||
// In the test environment, queryHaiku will fail (no API key). We leverage
|
||||
// that to trigger circuit breaker state via the real backend. We verify
|
||||
// the circuit opens by checking that the backend returns [] (empty LLM
|
||||
// output, falls through to heuristic) and by exercising resetCircuitBreaker.
|
||||
|
||||
const { llmObserverBackend } = await import('../llmObserverBackend.js')
|
||||
resetCircuitBreaker()
|
||||
|
||||
setSkillLearningConfigForTest({
|
||||
llm: { failureThreshold: 3, circuitCooldownMs: 60_000 },
|
||||
})
|
||||
|
||||
const obs = makeObs(5)
|
||||
|
||||
// 3 calls → each fails → 3rd failure opens circuit.
|
||||
// All return heuristic fallback (possibly [] since obs have no message text
|
||||
// that the heuristic would match against correction patterns, but the calls
|
||||
// still go through the circuit).
|
||||
await llmObserverBackend.analyze(obs)
|
||||
await llmObserverBackend.analyze(obs)
|
||||
await llmObserverBackend.analyze(obs)
|
||||
|
||||
// Circuit is now open. Verify resetCircuitBreaker closes it by checking
|
||||
// the module-level state: after reset the backend does not short-circuit
|
||||
// immediately (it tries queryHaiku again, fails again, increments counter).
|
||||
// We can observe this by calling resetCircuitBreaker and making another
|
||||
// call — it will NOT short-circuit the queryHaiku attempt.
|
||||
resetCircuitBreaker()
|
||||
|
||||
// This call must reach queryHaiku (which fails → heuristic fallback) rather
|
||||
// than short-circuit to heuristic from the open circuit. Either way the
|
||||
// return value is an array — but the key is that resetCircuitBreaker works.
|
||||
const result = await llmObserverBackend.analyze(obs)
|
||||
expect(Array.isArray(result)).toBe(true)
|
||||
})
|
||||
|
||||
test('circuit breaker env vars are respected', async () => {
|
||||
// Verify that setting threshold to 1 opens circuit after the first failure.
|
||||
const { llmObserverBackend } = await import('../llmObserverBackend.js')
|
||||
resetCircuitBreaker()
|
||||
|
||||
setSkillLearningConfigForTest({
|
||||
llm: { failureThreshold: 1, circuitCooldownMs: 60_000 },
|
||||
})
|
||||
|
||||
const obs = makeObs(5)
|
||||
|
||||
// One failure — circuit should open.
|
||||
await llmObserverBackend.analyze(obs)
|
||||
|
||||
// The next call should be short-circuited. We can't easily observe this
|
||||
// without mocking, but we can verify that after resetCircuitBreaker the
|
||||
// state is clean and a call proceeds without crashing.
|
||||
resetCircuitBreaker()
|
||||
const result = await llmObserverBackend.analyze(obs)
|
||||
expect(Array.isArray(result)).toBe(true)
|
||||
})
|
||||
|
||||
test('empty observations bypass circuit breaker entirely', async () => {
|
||||
const { llmObserverBackend } = await import('../llmObserverBackend.js')
|
||||
resetCircuitBreaker()
|
||||
|
||||
// Empty observations → short-circuit at top of analyseWithHaiku → []
|
||||
// regardless of circuit state.
|
||||
const result = await llmObserverBackend.analyze([])
|
||||
expect(result).toEqual([])
|
||||
})
|
||||
|
||||
test('resetCircuitBreaker resets state to closed', async () => {
|
||||
const { llmObserverBackend } = await import('../llmObserverBackend.js')
|
||||
resetCircuitBreaker()
|
||||
|
||||
// After reset, the backend is in clean state. Calling it with observations
|
||||
// returns an array (either LLM result or heuristic fallback).
|
||||
const result = await llmObserverBackend.analyze(makeObs(3))
|
||||
expect(Array.isArray(result)).toBe(true)
|
||||
|
||||
resetCircuitBreaker()
|
||||
const result2 = await llmObserverBackend.analyze(makeObs(3))
|
||||
expect(Array.isArray(result2)).toBe(true)
|
||||
})
|
||||
})
|
||||
196
src/services/skillLearning/__tests__/toolEventObserver.test.ts
Normal file
196
src/services/skillLearning/__tests__/toolEventObserver.test.ts
Normal file
@@ -0,0 +1,196 @@
|
||||
import { afterEach, beforeEach, describe, expect, mock, test } from 'bun:test'
|
||||
import { mkdtempSync, rmSync } from 'node:fs'
|
||||
import { tmpdir } from 'node:os'
|
||||
import { join } from 'node:path'
|
||||
import { readObservations } from '../observationStore.js'
|
||||
import {
|
||||
hasToolHookObservationsForTurn,
|
||||
pruneEmittedTurns,
|
||||
recordToolComplete,
|
||||
recordToolError,
|
||||
recordToolStart,
|
||||
recordUserCorrection,
|
||||
resetToolHookBookkeeping,
|
||||
resetToolHookDepsCache,
|
||||
runToolCallWithSkillLearningHooks,
|
||||
} from '../toolEventObserver.js'
|
||||
|
||||
let rootDir: string
|
||||
|
||||
beforeEach(() => {
|
||||
rootDir = mkdtempSync(join(tmpdir(), 'skill-learning-tool-hook-'))
|
||||
resetToolHookBookkeeping()
|
||||
process.env.CLAUDE_SKILL_LEARNING_HOME = rootDir
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
delete process.env.CLAUDE_SKILL_LEARNING_HOME
|
||||
rmSync(rootDir, { recursive: true, force: true })
|
||||
})
|
||||
|
||||
function ctx() {
|
||||
return {
|
||||
sessionId: 'tool-hook-session',
|
||||
turn: 1,
|
||||
projectId: 'p1',
|
||||
projectName: 'project',
|
||||
cwd: rootDir,
|
||||
project: {
|
||||
projectId: 'p1',
|
||||
projectName: 'project',
|
||||
cwd: rootDir,
|
||||
scope: 'project' as const,
|
||||
source: 'global' as const,
|
||||
storageDir: join(rootDir, 'projects', 'p1'),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
describe('toolEventObserver', () => {
|
||||
test('records tool_start with tool-hook source', async () => {
|
||||
await recordToolStart(ctx(), 'Grep', { pattern: 'foo' })
|
||||
const observations = await readObservations({
|
||||
rootDir,
|
||||
project: ctx().project,
|
||||
})
|
||||
expect(observations).toHaveLength(1)
|
||||
expect(observations[0]?.event).toBe('tool_start')
|
||||
expect(observations[0]?.source).toBe('tool-hook')
|
||||
expect(observations[0]?.toolName).toBe('Grep')
|
||||
})
|
||||
|
||||
test('records tool_complete with success outcome', async () => {
|
||||
await recordToolComplete(ctx(), 'Edit', 'ok', 'success')
|
||||
const observations = await readObservations({
|
||||
rootDir,
|
||||
project: ctx().project,
|
||||
})
|
||||
expect(observations[0]?.event).toBe('tool_complete')
|
||||
expect(observations[0]?.outcome).toBe('success')
|
||||
})
|
||||
|
||||
test('records tool_error as tool_complete with failure outcome', async () => {
|
||||
await recordToolError(ctx(), 'Bash', new Error('boom'))
|
||||
const observations = await readObservations({
|
||||
rootDir,
|
||||
project: ctx().project,
|
||||
})
|
||||
expect(observations[0]?.outcome).toBe('failure')
|
||||
})
|
||||
|
||||
test('records user correction message', async () => {
|
||||
await recordUserCorrection(ctx(), '不要 mock,用 testing-library')
|
||||
const observations = await readObservations({
|
||||
rootDir,
|
||||
project: ctx().project,
|
||||
})
|
||||
expect(observations[0]?.event).toBe('user_message')
|
||||
expect(observations[0]?.messageText).toContain('testing-library')
|
||||
})
|
||||
|
||||
test('tracks which session+turn has tool-hook observations', async () => {
|
||||
expect(hasToolHookObservationsForTurn('tool-hook-session', 1)).toBe(false)
|
||||
await recordToolStart(ctx(), 'Grep')
|
||||
expect(hasToolHookObservationsForTurn('tool-hook-session', 1)).toBe(true)
|
||||
expect(hasToolHookObservationsForTurn('tool-hook-session', 2)).toBe(false)
|
||||
})
|
||||
|
||||
// H11: emittedTurns bounded memory tests
|
||||
describe('pruneEmittedTurns', () => {
|
||||
test('prunes Set entries exceeding SET_MAX keeping most recent', async () => {
|
||||
const sessionId = 'big-session'
|
||||
// Fill 501 turns (threshold is 500)
|
||||
for (let i = 1; i <= 501; i++) {
|
||||
await recordToolStart({ ...ctx(), sessionId, turn: i }, 'Grep')
|
||||
}
|
||||
// After pruning the Set should not exceed KEEP limit (250)
|
||||
expect(hasToolHookObservationsForTurn(sessionId, 1)).toBe(false) // oldest pruned
|
||||
expect(hasToolHookObservationsForTurn(sessionId, 501)).toBe(true) // newest kept
|
||||
expect(hasToolHookObservationsForTurn(sessionId, 252)).toBe(true) // within keep window
|
||||
})
|
||||
|
||||
test('prunes Map entries exceeding MAP_MAX keeping most recent insertions', async () => {
|
||||
// Insert 51 distinct sessions (threshold is 50)
|
||||
for (let i = 0; i < 51; i++) {
|
||||
await recordToolStart(
|
||||
{ ...ctx(), sessionId: `session-${i}`, turn: 1 },
|
||||
'Grep',
|
||||
)
|
||||
}
|
||||
// Oldest sessions should have been pruned from the Map
|
||||
expect(hasToolHookObservationsForTurn('session-0', 1)).toBe(false)
|
||||
// Most recent sessions should still be present
|
||||
expect(hasToolHookObservationsForTurn('session-50', 1)).toBe(true)
|
||||
})
|
||||
|
||||
test('pruneEmittedTurns is idempotent when within limits', async () => {
|
||||
await recordToolStart(ctx(), 'Grep')
|
||||
pruneEmittedTurns()
|
||||
pruneEmittedTurns()
|
||||
// Should not affect tracked turns within limits
|
||||
expect(hasToolHookObservationsForTurn('tool-hook-session', 1)).toBe(true)
|
||||
})
|
||||
})
|
||||
|
||||
// H10: fire-and-forget / flag-off tests
|
||||
describe('runToolCallWithSkillLearningHooks', () => {
|
||||
afterEach(() => {
|
||||
resetToolHookDepsCache()
|
||||
delete process.env.SKILL_LEARNING_ENABLED
|
||||
})
|
||||
|
||||
test('invoke completes before recordToolStart promise resolves (fire-and-forget)', async () => {
|
||||
process.env.SKILL_LEARNING_ENABLED = '1'
|
||||
resetToolHookDepsCache()
|
||||
|
||||
const completionOrder: string[] = []
|
||||
let resolveStart!: () => void
|
||||
// A slow recordToolStart: promise that resolves only when we let it
|
||||
const slowStartPromise = new Promise<void>(res => {
|
||||
resolveStart = res
|
||||
})
|
||||
|
||||
// We spy on appendObservation by replacing the module's behaviour
|
||||
// without mocking: we just verify timing via a flag
|
||||
let invokeCompleted = false
|
||||
|
||||
const result = await runToolCallWithSkillLearningHooks(
|
||||
'TestTool',
|
||||
{},
|
||||
{ sessionId: 'test-ff-session', turn: 99 },
|
||||
async () => {
|
||||
// Short delay to let any awaited hooks run first (they must not)
|
||||
await new Promise(res => setTimeout(res, 5))
|
||||
invokeCompleted = true
|
||||
completionOrder.push('invoke')
|
||||
return { data: 'done' }
|
||||
},
|
||||
)
|
||||
|
||||
// The invoke result is returned immediately — observation may still be in-flight
|
||||
expect(result).toEqual({ data: 'done' })
|
||||
expect(invokeCompleted).toBe(true)
|
||||
})
|
||||
|
||||
test('flag off: wrapper skips observation entirely and returns invoke result', async () => {
|
||||
process.env.SKILL_LEARNING_ENABLED = '0'
|
||||
resetToolHookDepsCache()
|
||||
|
||||
let invokeCalled = false
|
||||
const result = await runToolCallWithSkillLearningHooks(
|
||||
'TestTool',
|
||||
{},
|
||||
{},
|
||||
async () => {
|
||||
invokeCalled = true
|
||||
return { data: 42 }
|
||||
},
|
||||
)
|
||||
expect(invokeCalled).toBe(true)
|
||||
expect(result).toEqual({ data: 42 })
|
||||
// No observations should have been written
|
||||
const obs = await readObservations({ rootDir, project: ctx().project })
|
||||
expect(obs).toHaveLength(0)
|
||||
})
|
||||
})
|
||||
})
|
||||
164
src/services/skillLearning/agentGenerator.ts
Normal file
164
src/services/skillLearning/agentGenerator.ts
Normal file
@@ -0,0 +1,164 @@
|
||||
import { mkdir, writeFile } from 'node:fs/promises'
|
||||
import { existsSync } from 'node:fs'
|
||||
import { join } from 'node:path'
|
||||
import { getClaudeConfigHomeDir } from '../../utils/envUtils.js'
|
||||
import { clearCommandsCache } from '../../commands.js'
|
||||
import type { Instinct } from './instinctParser.js'
|
||||
import { normalizeSkillName } from './learningPolicy.js'
|
||||
import type { SkillLearningScope } from './types.js'
|
||||
|
||||
export type AgentGeneratorOptions = {
|
||||
cwd?: string
|
||||
globalAgentsDir?: string
|
||||
outputRoot?: string
|
||||
name?: string
|
||||
description?: string
|
||||
scope?: SkillLearningScope
|
||||
}
|
||||
|
||||
export type LearnedAgentDraft = {
|
||||
name: string
|
||||
description: string
|
||||
scope: SkillLearningScope
|
||||
sourceInstinctIds: string[]
|
||||
confidence: number
|
||||
content: string
|
||||
outputPath: string
|
||||
}
|
||||
|
||||
export function generateAgentDraft(
|
||||
instincts: Instinct[],
|
||||
options?: AgentGeneratorOptions,
|
||||
): LearnedAgentDraft {
|
||||
if (instincts.length === 0) {
|
||||
throw new Error('Cannot generate an agent draft without instincts')
|
||||
}
|
||||
|
||||
const scope = options?.scope ?? instincts[0]?.scope ?? 'project'
|
||||
const rawName = options?.name ?? buildAgentName(instincts)
|
||||
const name = normalizeSkillName(rawName)
|
||||
const confidence = averageConfidence(instincts)
|
||||
const description = options?.description ?? buildDescription(instincts)
|
||||
const outputPath = getLearnedAgentPath(name, scope, options)
|
||||
const content = buildAgentContent({
|
||||
name,
|
||||
description,
|
||||
confidence,
|
||||
instincts,
|
||||
})
|
||||
|
||||
return {
|
||||
name,
|
||||
description,
|
||||
scope,
|
||||
sourceInstinctIds: instincts.map(instinct => instinct.id),
|
||||
confidence: Number(confidence.toFixed(2)),
|
||||
content,
|
||||
outputPath,
|
||||
}
|
||||
}
|
||||
|
||||
export async function writeLearnedAgent(
|
||||
draft: LearnedAgentDraft,
|
||||
): Promise<string> {
|
||||
await mkdir(draft.outputPath, { recursive: true })
|
||||
const filePath = join(draft.outputPath, `${draft.name}.md`)
|
||||
if (existsSync(filePath)) return filePath
|
||||
await writeFile(filePath, draft.content, 'utf8')
|
||||
clearCommandsCache()
|
||||
return filePath
|
||||
}
|
||||
|
||||
export function getLearnedAgentPath(
|
||||
_name: string,
|
||||
scope: SkillLearningScope,
|
||||
options?: AgentGeneratorOptions,
|
||||
): string {
|
||||
if (options?.outputRoot) return options.outputRoot
|
||||
if (scope === 'project') {
|
||||
return join(options?.cwd ?? process.cwd(), '.claude', 'agents')
|
||||
}
|
||||
return options?.globalAgentsDir ?? join(getClaudeConfigHomeDir(), 'agents')
|
||||
}
|
||||
|
||||
function buildAgentName(instincts: Instinct[]): string {
|
||||
const words = extractWords(instincts, 4)
|
||||
const name = ['learned', 'agent', ...words].join('-')
|
||||
return normalizeSkillName(name) || 'learned-agent'
|
||||
}
|
||||
|
||||
function buildDescription(instincts: Instinct[]): string {
|
||||
const trigger = instincts[0]?.trigger ?? 'Run the learned multi-step workflow'
|
||||
return trigger.replace(/\s+/g, ' ').slice(0, 120)
|
||||
}
|
||||
|
||||
function buildAgentContent(params: {
|
||||
name: string
|
||||
description: string
|
||||
confidence: number
|
||||
instincts: Instinct[]
|
||||
}): string {
|
||||
const { name, description, confidence, instincts } = params
|
||||
return [
|
||||
'---',
|
||||
`name: ${name}`,
|
||||
`description: ${JSON.stringify(description)}`,
|
||||
'origin: skill-learning',
|
||||
`confidence: ${Number(confidence.toFixed(2))}`,
|
||||
`evolved_from: [${instincts.map(instinct => JSON.stringify(instinct.id)).join(', ')}]`,
|
||||
'---',
|
||||
'',
|
||||
`You are the ${name} learned agent.`,
|
||||
'',
|
||||
'## Triggers',
|
||||
'',
|
||||
instincts.map(instinct => `- ${instinct.trigger}`).join('\n'),
|
||||
'',
|
||||
'## Playbook',
|
||||
'',
|
||||
instincts.map(instinct => `- ${instinct.action}`).join('\n'),
|
||||
'',
|
||||
'## Evidence',
|
||||
'',
|
||||
instincts
|
||||
.flatMap(instinct => instinct.evidence.map(evidence => `- ${evidence}`))
|
||||
.join('\n'),
|
||||
'',
|
||||
].join('\n')
|
||||
}
|
||||
|
||||
function averageConfidence(instincts: Instinct[]): number {
|
||||
return (
|
||||
instincts.reduce((sum, instinct) => sum + instinct.confidence, 0) /
|
||||
instincts.length
|
||||
)
|
||||
}
|
||||
|
||||
function extractWords(instincts: Instinct[], max: number): string[] {
|
||||
const stopWords = new Set([
|
||||
'when',
|
||||
'with',
|
||||
'this',
|
||||
'that',
|
||||
'user',
|
||||
'asks',
|
||||
'for',
|
||||
'the',
|
||||
'and',
|
||||
'debug',
|
||||
'investigate',
|
||||
'research',
|
||||
])
|
||||
const words: string[] = []
|
||||
for (const instinct of instincts) {
|
||||
for (const token of `${instinct.trigger} ${instinct.action}`
|
||||
.toLowerCase()
|
||||
.split(/[^a-z0-9]+/)) {
|
||||
if (token.length > 2 && !stopWords.has(token) && !words.includes(token)) {
|
||||
words.push(token)
|
||||
}
|
||||
if (words.length >= max) return words
|
||||
}
|
||||
}
|
||||
return words
|
||||
}
|
||||
167
src/services/skillLearning/commandGenerator.ts
Normal file
167
src/services/skillLearning/commandGenerator.ts
Normal file
@@ -0,0 +1,167 @@
|
||||
import { mkdir, writeFile } from 'node:fs/promises'
|
||||
import { existsSync } from 'node:fs'
|
||||
import { join } from 'node:path'
|
||||
import { getClaudeConfigHomeDir } from '../../utils/envUtils.js'
|
||||
import { clearCommandsCache } from '../../commands.js'
|
||||
import type { Instinct } from './instinctParser.js'
|
||||
import { normalizeSkillName } from './learningPolicy.js'
|
||||
import type { SkillLearningScope } from './types.js'
|
||||
|
||||
export type CommandGeneratorOptions = {
|
||||
cwd?: string
|
||||
globalCommandsDir?: string
|
||||
outputRoot?: string
|
||||
name?: string
|
||||
description?: string
|
||||
scope?: SkillLearningScope
|
||||
}
|
||||
|
||||
export type LearnedCommandDraft = {
|
||||
name: string
|
||||
description: string
|
||||
scope: SkillLearningScope
|
||||
sourceInstinctIds: string[]
|
||||
confidence: number
|
||||
content: string
|
||||
outputPath: string
|
||||
}
|
||||
|
||||
export function generateCommandDraft(
|
||||
instincts: Instinct[],
|
||||
options?: CommandGeneratorOptions,
|
||||
): LearnedCommandDraft {
|
||||
if (instincts.length === 0) {
|
||||
throw new Error('Cannot generate a command draft without instincts')
|
||||
}
|
||||
|
||||
const scope = options?.scope ?? instincts[0]?.scope ?? 'project'
|
||||
const rawName = options?.name ?? buildCommandName(instincts)
|
||||
const name = normalizeSkillName(rawName)
|
||||
const confidence = averageConfidence(instincts)
|
||||
const description = options?.description ?? buildDescription(instincts)
|
||||
const outputPath = getLearnedCommandPath(name, scope, options)
|
||||
const content = buildCommandContent({
|
||||
name,
|
||||
description,
|
||||
confidence,
|
||||
instincts,
|
||||
})
|
||||
|
||||
return {
|
||||
name,
|
||||
description,
|
||||
scope,
|
||||
sourceInstinctIds: instincts.map(instinct => instinct.id),
|
||||
confidence: Number(confidence.toFixed(2)),
|
||||
content,
|
||||
outputPath,
|
||||
}
|
||||
}
|
||||
|
||||
export async function writeLearnedCommand(
|
||||
draft: LearnedCommandDraft,
|
||||
): Promise<string> {
|
||||
await mkdir(draft.outputPath, { recursive: true })
|
||||
const filePath = join(draft.outputPath, `${draft.name}.md`)
|
||||
if (existsSync(filePath)) return filePath
|
||||
await writeFile(filePath, draft.content, 'utf8')
|
||||
clearCommandsCache()
|
||||
return filePath
|
||||
}
|
||||
|
||||
export function getLearnedCommandPath(
|
||||
_name: string,
|
||||
scope: SkillLearningScope,
|
||||
options?: CommandGeneratorOptions,
|
||||
): string {
|
||||
if (options?.outputRoot) return options.outputRoot
|
||||
if (scope === 'project') {
|
||||
return join(options?.cwd ?? process.cwd(), '.claude', 'commands')
|
||||
}
|
||||
return (
|
||||
options?.globalCommandsDir ?? join(getClaudeConfigHomeDir(), 'commands')
|
||||
)
|
||||
}
|
||||
|
||||
function buildCommandName(instincts: Instinct[]): string {
|
||||
const words = extractWords(instincts, 4)
|
||||
const name = ['learned', ...words].join('-')
|
||||
return normalizeSkillName(name) || 'learned-command'
|
||||
}
|
||||
|
||||
function buildDescription(instincts: Instinct[]): string {
|
||||
const trigger = instincts[0]?.trigger ?? 'Reuse the learned workflow'
|
||||
return trigger.replace(/\s+/g, ' ').slice(0, 120)
|
||||
}
|
||||
|
||||
function buildCommandContent(params: {
|
||||
name: string
|
||||
description: string
|
||||
confidence: number
|
||||
instincts: Instinct[]
|
||||
}): string {
|
||||
const { name, description, confidence, instincts } = params
|
||||
return [
|
||||
'---',
|
||||
`name: ${name}`,
|
||||
`description: ${JSON.stringify(description)}`,
|
||||
'origin: skill-learning',
|
||||
`confidence: ${Number(confidence.toFixed(2))}`,
|
||||
`evolved_from: [${instincts.map(instinct => JSON.stringify(instinct.id)).join(', ')}]`,
|
||||
'---',
|
||||
'',
|
||||
`# /${name}`,
|
||||
'',
|
||||
'## When to use',
|
||||
'',
|
||||
instincts.map(instinct => `- ${instinct.trigger}`).join('\n'),
|
||||
'',
|
||||
'## Steps',
|
||||
'',
|
||||
instincts.map(instinct => `- ${instinct.action}`).join('\n'),
|
||||
'',
|
||||
'## Evidence',
|
||||
'',
|
||||
instincts
|
||||
.flatMap(instinct => instinct.evidence.map(evidence => `- ${evidence}`))
|
||||
.join('\n'),
|
||||
'',
|
||||
].join('\n')
|
||||
}
|
||||
|
||||
function averageConfidence(instincts: Instinct[]): number {
|
||||
return (
|
||||
instincts.reduce((sum, instinct) => sum + instinct.confidence, 0) /
|
||||
instincts.length
|
||||
)
|
||||
}
|
||||
|
||||
function extractWords(instincts: Instinct[], max: number): string[] {
|
||||
const stopWords = new Set([
|
||||
'when',
|
||||
'with',
|
||||
'this',
|
||||
'that',
|
||||
'user',
|
||||
'asks',
|
||||
'for',
|
||||
'the',
|
||||
'and',
|
||||
'run',
|
||||
'use',
|
||||
'prefer',
|
||||
'avoid',
|
||||
])
|
||||
const words: string[] = []
|
||||
for (const instinct of instincts) {
|
||||
for (const token of `${instinct.trigger} ${instinct.action}`
|
||||
.toLowerCase()
|
||||
.split(/[^a-z0-9]+/)) {
|
||||
if (token.length > 2 && !stopWords.has(token) && !words.includes(token)) {
|
||||
words.push(token)
|
||||
}
|
||||
if (words.length >= max) return words
|
||||
}
|
||||
}
|
||||
return words
|
||||
}
|
||||
52
src/services/skillLearning/config.ts
Normal file
52
src/services/skillLearning/config.ts
Normal file
@@ -0,0 +1,52 @@
|
||||
export type SkillLearningLlmConfig = {
|
||||
readonly timeoutMs: number
|
||||
readonly maxCallsPerSession: number
|
||||
readonly cooldownMs: number
|
||||
readonly failureThreshold: number
|
||||
readonly circuitCooldownMs: number
|
||||
}
|
||||
|
||||
export type SkillLearningConfig = {
|
||||
readonly minConfidence: number
|
||||
readonly minClusterSize: number
|
||||
readonly llm: SkillLearningLlmConfig
|
||||
}
|
||||
|
||||
export type SkillLearningConfigOverrides = {
|
||||
minConfidence?: number
|
||||
minClusterSize?: number
|
||||
llm?: Partial<SkillLearningLlmConfig>
|
||||
}
|
||||
|
||||
const DEFAULTS: SkillLearningConfig = {
|
||||
minConfidence: 0.75,
|
||||
minClusterSize: 3,
|
||||
llm: {
|
||||
timeoutMs: 10_000,
|
||||
maxCallsPerSession: 20,
|
||||
cooldownMs: 30_000,
|
||||
failureThreshold: 3,
|
||||
circuitCooldownMs: 60_000,
|
||||
},
|
||||
}
|
||||
|
||||
let overrides: SkillLearningConfigOverrides | undefined
|
||||
|
||||
export function getSkillLearningConfig(): SkillLearningConfig {
|
||||
if (!overrides) return DEFAULTS
|
||||
return {
|
||||
minConfidence: overrides.minConfidence ?? DEFAULTS.minConfidence,
|
||||
minClusterSize: overrides.minClusterSize ?? DEFAULTS.minClusterSize,
|
||||
llm: { ...DEFAULTS.llm, ...overrides.llm },
|
||||
}
|
||||
}
|
||||
|
||||
export function setSkillLearningConfigForTest(
|
||||
config: SkillLearningConfigOverrides,
|
||||
): void {
|
||||
overrides = config
|
||||
}
|
||||
|
||||
export function resetSkillLearningConfig(): void {
|
||||
overrides = undefined
|
||||
}
|
||||
174
src/services/skillLearning/evolution.ts
Normal file
174
src/services/skillLearning/evolution.ts
Normal file
@@ -0,0 +1,174 @@
|
||||
import type { Instinct } from './instinctParser.js'
|
||||
import { shouldGenerateSkillFromInstincts } from './learningPolicy.js'
|
||||
import {
|
||||
generateSkillDraft,
|
||||
type SkillGeneratorOptions,
|
||||
} from './skillGenerator.js'
|
||||
import {
|
||||
generateCommandDraft,
|
||||
type CommandGeneratorOptions,
|
||||
type LearnedCommandDraft,
|
||||
} from './commandGenerator.js'
|
||||
import {
|
||||
generateAgentDraft,
|
||||
type AgentGeneratorOptions,
|
||||
type LearnedAgentDraft,
|
||||
} from './agentGenerator.js'
|
||||
import { getSkillLearningConfig } from './config.js'
|
||||
import type { LearnedSkillDraft } from './types.js'
|
||||
|
||||
export type EvolutionCandidate = {
|
||||
target: 'skill' | 'command' | 'agent'
|
||||
trigger: string
|
||||
domain: string
|
||||
instincts: Instinct[]
|
||||
averageConfidence: number
|
||||
}
|
||||
|
||||
export type LearnedArtifactDraft =
|
||||
| { kind: 'skill'; draft: LearnedSkillDraft }
|
||||
| { kind: 'command'; draft: LearnedCommandDraft }
|
||||
| { kind: 'agent'; draft: LearnedAgentDraft }
|
||||
|
||||
export function clusterInstincts(instincts: Instinct[]): EvolutionCandidate[] {
|
||||
const groups = new Map<string, Instinct[]>()
|
||||
for (const instinct of instincts) {
|
||||
if (instinct.status !== 'active' && instinct.status !== 'pending') continue
|
||||
const key = `${instinct.domain}:${normalizedTrigger(instinct.trigger)}`
|
||||
const group = groups.get(key) ?? []
|
||||
group.push(instinct)
|
||||
groups.set(key, group)
|
||||
}
|
||||
|
||||
return Array.from(groups.values())
|
||||
.filter(group => {
|
||||
// Require the cluster-size floor unconditionally. Single-shot
|
||||
// high-confidence instincts previously bypassed this via the
|
||||
// `|| confidence >= 0.8` OR, which let one message become a
|
||||
// persistent policy — exactly the H15 risk the threshold guards
|
||||
// against. Repeated independent observation is non-negotiable.
|
||||
return group.length >= getSkillLearningConfig().minClusterSize
|
||||
})
|
||||
.map(group => {
|
||||
const averageConfidence =
|
||||
group.reduce((sum, instinct) => sum + instinct.confidence, 0) /
|
||||
group.length
|
||||
return {
|
||||
target: classifyEvolutionTarget(group),
|
||||
trigger: group[0]?.trigger ?? 'learned pattern',
|
||||
domain: group[0]?.domain ?? 'project',
|
||||
instincts: group,
|
||||
averageConfidence: Number(averageConfidence.toFixed(2)),
|
||||
}
|
||||
})
|
||||
.sort((a, b) => b.averageConfidence - a.averageConfidence)
|
||||
}
|
||||
|
||||
export function classifyEvolutionTarget(
|
||||
instinctsOrCandidate: Instinct[] | EvolutionCandidate,
|
||||
): 'skill' | 'command' | 'agent' {
|
||||
const instincts = Array.isArray(instinctsOrCandidate)
|
||||
? instinctsOrCandidate
|
||||
: instinctsOrCandidate.instincts
|
||||
const text = instincts
|
||||
.map(i => `${i.trigger} ${i.action}`)
|
||||
.join(' ')
|
||||
.toLowerCase()
|
||||
if (/user asks|explicitly request|command|run /.test(text)) return 'command'
|
||||
if (
|
||||
instincts.length >= 4 &&
|
||||
/(debug|investigate|research|multi-step)/.test(text)
|
||||
) {
|
||||
return 'agent'
|
||||
}
|
||||
return 'skill'
|
||||
}
|
||||
|
||||
export function suggestEvolutions(instincts: Instinct[]): EvolutionCandidate[] {
|
||||
return clusterInstincts(instincts)
|
||||
}
|
||||
|
||||
export function generateSkillCandidates(
|
||||
instincts: Instinct[],
|
||||
options?: SkillGeneratorOptions,
|
||||
): LearnedSkillDraft[] {
|
||||
return clusterInstincts(instincts)
|
||||
.filter(
|
||||
candidate =>
|
||||
candidate.target === 'skill' &&
|
||||
shouldGenerateSkillFromInstincts(candidate.instincts),
|
||||
)
|
||||
.map(candidate =>
|
||||
generateSkillDraft(candidate.instincts, {
|
||||
...options,
|
||||
scope: candidate.instincts[0]?.scope ?? 'project',
|
||||
}),
|
||||
)
|
||||
}
|
||||
|
||||
export function generateCommandCandidates(
|
||||
instincts: Instinct[],
|
||||
options?: CommandGeneratorOptions,
|
||||
): LearnedCommandDraft[] {
|
||||
return clusterInstincts(instincts)
|
||||
.filter(
|
||||
candidate =>
|
||||
candidate.target === 'command' &&
|
||||
shouldGenerateSkillFromInstincts(candidate.instincts),
|
||||
)
|
||||
.map(candidate =>
|
||||
generateCommandDraft(candidate.instincts, {
|
||||
...options,
|
||||
scope: candidate.instincts[0]?.scope ?? 'project',
|
||||
}),
|
||||
)
|
||||
}
|
||||
|
||||
export function generateAgentCandidates(
|
||||
instincts: Instinct[],
|
||||
options?: AgentGeneratorOptions,
|
||||
): LearnedAgentDraft[] {
|
||||
return clusterInstincts(instincts)
|
||||
.filter(
|
||||
candidate =>
|
||||
candidate.target === 'agent' &&
|
||||
shouldGenerateSkillFromInstincts(candidate.instincts),
|
||||
)
|
||||
.map(candidate =>
|
||||
generateAgentDraft(candidate.instincts, {
|
||||
...options,
|
||||
scope: candidate.instincts[0]?.scope ?? 'project',
|
||||
}),
|
||||
)
|
||||
}
|
||||
|
||||
export function generateAllCandidates(
|
||||
instincts: Instinct[],
|
||||
options?: {
|
||||
skill?: SkillGeneratorOptions
|
||||
command?: CommandGeneratorOptions
|
||||
agent?: AgentGeneratorOptions
|
||||
},
|
||||
): LearnedArtifactDraft[] {
|
||||
return [
|
||||
...generateSkillCandidates(instincts, options?.skill).map(
|
||||
(draft): LearnedArtifactDraft => ({ kind: 'skill', draft }),
|
||||
),
|
||||
...generateCommandCandidates(instincts, options?.command).map(
|
||||
(draft): LearnedArtifactDraft => ({ kind: 'command', draft }),
|
||||
),
|
||||
...generateAgentCandidates(instincts, options?.agent).map(
|
||||
(draft): LearnedArtifactDraft => ({ kind: 'agent', draft }),
|
||||
),
|
||||
]
|
||||
}
|
||||
|
||||
function normalizedTrigger(trigger: string): string {
|
||||
return trigger
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9]+/g, ' ')
|
||||
.split(/\s+/)
|
||||
.filter(Boolean)
|
||||
.slice(0, 6)
|
||||
.join(' ')
|
||||
}
|
||||
12
src/services/skillLearning/featureCheck.ts
Normal file
12
src/services/skillLearning/featureCheck.ts
Normal file
@@ -0,0 +1,12 @@
|
||||
import { feature } from 'bun:bundle'
|
||||
|
||||
export function isSkillLearningEnabled(): boolean {
|
||||
if (process.env.SKILL_LEARNING_ENABLED === '0') return false
|
||||
if (process.env.SKILL_LEARNING_ENABLED === '1') return true
|
||||
if (process.env.FEATURE_SKILL_LEARNING === '0') return false
|
||||
if (process.env.FEATURE_SKILL_LEARNING === '1') return true
|
||||
if (feature('SKILL_LEARNING')) {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
37
src/services/skillLearning/index.ts
Normal file
37
src/services/skillLearning/index.ts
Normal file
@@ -0,0 +1,37 @@
|
||||
export * from './featureCheck.js'
|
||||
export * from './evolution.js'
|
||||
export {
|
||||
createInstinct,
|
||||
parseInstinct,
|
||||
serializeInstinct,
|
||||
} from './instinctParser.js'
|
||||
export * from './learningPolicy.js'
|
||||
export {
|
||||
exportInstincts,
|
||||
importInstincts,
|
||||
loadInstincts,
|
||||
prunePendingInstincts,
|
||||
saveInstinct,
|
||||
updateConfidence,
|
||||
upsertInstinct,
|
||||
} from './instinctStore.js'
|
||||
export {
|
||||
appendObservation,
|
||||
ingestTranscript,
|
||||
readObservations,
|
||||
scrubObservation,
|
||||
scrubText,
|
||||
} from './observationStore.js'
|
||||
export * from './promotion.js'
|
||||
export * from './projectContext.js'
|
||||
export * from './runtimeObserver.js'
|
||||
export * from './observerBackend.js'
|
||||
export { llmObserverBackend } from './llmObserverBackend.js'
|
||||
export * from './commandGenerator.js'
|
||||
export * from './agentGenerator.js'
|
||||
export * from './toolEventObserver.js'
|
||||
export * from './sessionObserver.js'
|
||||
export * from './skillGapStore.js'
|
||||
export * from './skillGenerator.js'
|
||||
export * from './skillLifecycle.js'
|
||||
export * from './types.js'
|
||||
115
src/services/skillLearning/instinctParser.ts
Normal file
115
src/services/skillLearning/instinctParser.ts
Normal file
@@ -0,0 +1,115 @@
|
||||
import { createHash } from 'node:crypto'
|
||||
import type {
|
||||
SkillLearningProjectContext,
|
||||
SkillLearningScope,
|
||||
StoredSkillObservation,
|
||||
} from './observationStore.js'
|
||||
import type { Instinct as BaseInstinct, InstinctStatus } from './types.js'
|
||||
|
||||
export type { Instinct } from './types.js'
|
||||
|
||||
export type StoredInstinct = BaseInstinct & {
|
||||
observationIds?: string[]
|
||||
}
|
||||
|
||||
export type InstinctCandidate = Omit<
|
||||
StoredInstinct,
|
||||
'id' | 'createdAt' | 'updatedAt' | 'status'
|
||||
> & {
|
||||
id?: string
|
||||
status?: InstinctStatus
|
||||
}
|
||||
|
||||
export function createInstinct(
|
||||
candidate: InstinctCandidate,
|
||||
now = new Date().toISOString(),
|
||||
): StoredInstinct {
|
||||
return normalizeInstinct({
|
||||
id:
|
||||
candidate.id ??
|
||||
buildInstinctId(candidate.trigger, candidate.action, candidate.scope),
|
||||
...candidate,
|
||||
createdAt: now,
|
||||
updatedAt: now,
|
||||
status: candidate.status ?? 'pending',
|
||||
})
|
||||
}
|
||||
|
||||
export function normalizeInstinct(instinct: StoredInstinct): StoredInstinct {
|
||||
return {
|
||||
...instinct,
|
||||
id: instinct.id || buildInstinctId(instinct.trigger, instinct.action),
|
||||
confidence: clampConfidence(instinct.confidence),
|
||||
evidence: Array.from(new Set(instinct.evidence.filter(Boolean))),
|
||||
evidenceOutcome: instinct.evidenceOutcome,
|
||||
observationIds: instinct.observationIds
|
||||
? Array.from(new Set(instinct.observationIds))
|
||||
: undefined,
|
||||
}
|
||||
}
|
||||
|
||||
export function serializeInstinct(instinct: StoredInstinct): string {
|
||||
return `${JSON.stringify(normalizeInstinct(instinct), null, 2)}\n`
|
||||
}
|
||||
|
||||
export function parseInstinct(content: string): StoredInstinct {
|
||||
return normalizeInstinct(JSON.parse(content) as StoredInstinct)
|
||||
}
|
||||
|
||||
export function buildInstinctId(
|
||||
trigger: string,
|
||||
action: string,
|
||||
scope: SkillLearningScope = 'project',
|
||||
): string {
|
||||
const slug = `${trigger} ${action}`
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9]+/g, '-')
|
||||
.replace(/^-|-$/g, '')
|
||||
.slice(0, 48)
|
||||
const hash = createHash('sha1')
|
||||
.update(`${scope}\n${trigger}\n${action}`)
|
||||
.digest('hex')
|
||||
.slice(0, 10)
|
||||
return `${slug || 'instinct'}-${hash}`
|
||||
}
|
||||
|
||||
export function candidateFromObservation(
|
||||
observation: StoredSkillObservation,
|
||||
project?: SkillLearningProjectContext,
|
||||
): Partial<InstinctCandidate> {
|
||||
return {
|
||||
scope: project?.scope ?? 'project',
|
||||
projectId: project?.projectId ?? observation.projectId,
|
||||
projectName: project?.projectName ?? observation.projectName,
|
||||
source: 'session-observation',
|
||||
evidence: [
|
||||
observation.messageText ??
|
||||
observation.toolOutput ??
|
||||
observation.toolInput ??
|
||||
observation.toolName ??
|
||||
observation.id,
|
||||
],
|
||||
observationIds: [observation.id],
|
||||
}
|
||||
}
|
||||
|
||||
export function isContradictingInstinct(
|
||||
existing: StoredInstinct,
|
||||
incoming: StoredInstinct,
|
||||
): boolean {
|
||||
const existingTrigger = existing.trigger.toLowerCase()
|
||||
const incomingTrigger = incoming.trigger.toLowerCase()
|
||||
if (existingTrigger !== incomingTrigger) return false
|
||||
|
||||
const existingAction = existing.action.toLowerCase()
|
||||
const incomingAction = incoming.action.toLowerCase()
|
||||
return (
|
||||
existingAction.includes('avoid') !== incomingAction.includes('avoid') ||
|
||||
existingAction.includes('prefer') !== incomingAction.includes('prefer')
|
||||
)
|
||||
}
|
||||
|
||||
export function clampConfidence(confidence: number): number {
|
||||
if (Number.isNaN(confidence)) return 0
|
||||
return Math.max(0, Math.min(1, Number(confidence.toFixed(2))))
|
||||
}
|
||||
258
src/services/skillLearning/instinctStore.ts
Normal file
258
src/services/skillLearning/instinctStore.ts
Normal file
@@ -0,0 +1,258 @@
|
||||
import {
|
||||
mkdir,
|
||||
readFile,
|
||||
readdir,
|
||||
rename,
|
||||
unlink,
|
||||
writeFile,
|
||||
} from 'node:fs/promises'
|
||||
import { randomBytes } from 'node:crypto'
|
||||
import { dirname, join } from 'node:path'
|
||||
import {
|
||||
getSkillLearningRoot,
|
||||
type ObservationStoreOptions,
|
||||
type SkillLearningProjectContext,
|
||||
type SkillLearningScope,
|
||||
} from './observationStore.js'
|
||||
import {
|
||||
clampConfidence,
|
||||
isContradictingInstinct,
|
||||
normalizeInstinct,
|
||||
parseInstinct,
|
||||
serializeInstinct,
|
||||
type StoredInstinct,
|
||||
} from './instinctParser.js'
|
||||
|
||||
let upsertQueue: Promise<unknown> = Promise.resolve()
|
||||
|
||||
export type InstinctStoreOptions = ObservationStoreOptions & {
|
||||
project?: SkillLearningProjectContext
|
||||
scope?: SkillLearningScope
|
||||
}
|
||||
|
||||
export function getInstinctsDir(options?: InstinctStoreOptions): string {
|
||||
const root = getSkillLearningRoot(options)
|
||||
const project = options?.project
|
||||
const scope = options?.scope ?? project?.scope ?? 'project'
|
||||
|
||||
if (scope === 'global' || !project || project.projectId === 'global') {
|
||||
return join(root, 'global', 'instincts', 'personal')
|
||||
}
|
||||
return join(root, 'projects', project.projectId, 'instincts', 'personal')
|
||||
}
|
||||
|
||||
export async function saveInstinct(
|
||||
instinct: StoredInstinct,
|
||||
options?: InstinctStoreOptions,
|
||||
): Promise<StoredInstinct> {
|
||||
const normalized = normalizeInstinct(instinct)
|
||||
const dir = getInstinctsDir(options)
|
||||
await mkdir(dir, { recursive: true })
|
||||
const target = instinctPath(normalized.id, options)
|
||||
const tmp = `${target}.${randomBytes(6).toString('hex')}.tmp`
|
||||
await writeFile(tmp, serializeInstinct(normalized))
|
||||
await rename(tmp, target)
|
||||
return normalized
|
||||
}
|
||||
|
||||
export async function loadInstincts(
|
||||
options?: InstinctStoreOptions,
|
||||
): Promise<StoredInstinct[]> {
|
||||
const dir = getInstinctsDir(options)
|
||||
let files: string[] = []
|
||||
try {
|
||||
files = await readdir(dir)
|
||||
} catch (error) {
|
||||
if ((error as NodeJS.ErrnoException).code === 'ENOENT') return []
|
||||
throw error
|
||||
}
|
||||
|
||||
const instincts: StoredInstinct[] = []
|
||||
for (const file of files.filter(file => file.endsWith('.json'))) {
|
||||
const content = await readFile(join(dir, file), 'utf8')
|
||||
instincts.push(parseInstinct(content))
|
||||
}
|
||||
|
||||
return instincts.sort((a, b) => a.id.localeCompare(b.id))
|
||||
}
|
||||
|
||||
export function upsertInstinct(
|
||||
incoming: StoredInstinct,
|
||||
options?: InstinctStoreOptions,
|
||||
): Promise<StoredInstinct> {
|
||||
const result = upsertQueue.then(() => doUpsertInstinct(incoming, options))
|
||||
upsertQueue = result.catch(() => {})
|
||||
return result
|
||||
}
|
||||
|
||||
async function doUpsertInstinct(
|
||||
incoming: StoredInstinct,
|
||||
options?: InstinctStoreOptions,
|
||||
): Promise<StoredInstinct> {
|
||||
const existing = await loadInstincts(options)
|
||||
// Match by ID first; fall back to (same trigger + contradicting action) so
|
||||
// that a contradictory instinct with a slightly different ID (differing
|
||||
// action/scope) still merges and can drive the conflict-hold transition
|
||||
// instead of silently accumulating as a separate record.
|
||||
const match =
|
||||
existing.find(instinct => instinct.id === incoming.id) ??
|
||||
existing.find(
|
||||
instinct =>
|
||||
instinct.trigger.toLowerCase() === incoming.trigger.toLowerCase() &&
|
||||
isContradictingInstinct(instinct, incoming),
|
||||
)
|
||||
const now = new Date().toISOString()
|
||||
|
||||
if (!match) return saveInstinct(incoming, options)
|
||||
|
||||
const contradiction = isContradictingInstinct(match, incoming)
|
||||
const confidenceDelta = contradiction
|
||||
? -0.1
|
||||
: outcomeConfidenceDelta(incoming.evidenceOutcome)
|
||||
const nextConfidence = clampConfidence(match.confidence + confidenceDelta)
|
||||
const nextStatus = resolveNextStatus(
|
||||
match.status,
|
||||
nextConfidence,
|
||||
contradiction,
|
||||
)
|
||||
const merged = normalizeInstinct({
|
||||
...match,
|
||||
confidence: nextConfidence,
|
||||
evidence: [...match.evidence, ...incoming.evidence],
|
||||
evidenceOutcome: incoming.evidenceOutcome ?? match.evidenceOutcome,
|
||||
observationIds: [
|
||||
...(match.observationIds ?? []),
|
||||
...(incoming.observationIds ?? []),
|
||||
],
|
||||
updatedAt: now,
|
||||
status: nextStatus,
|
||||
})
|
||||
|
||||
return saveInstinct(merged, options)
|
||||
}
|
||||
|
||||
function resolveNextStatus(
|
||||
current: StoredInstinct['status'],
|
||||
nextConfidence: number,
|
||||
contradiction: boolean,
|
||||
): StoredInstinct['status'] {
|
||||
if (contradiction && nextConfidence < 0.3) return 'conflict-hold'
|
||||
if (current === 'conflict-hold' && nextConfidence >= 0.5) return 'active'
|
||||
if (current === 'pending' && nextConfidence >= 0.8) return 'active'
|
||||
return current
|
||||
}
|
||||
|
||||
const DECAY_PER_WEEK = 0.02
|
||||
const MS_PER_WEEK = 7 * 24 * 60 * 60 * 1000
|
||||
|
||||
/**
|
||||
* Apply time-based confidence decay to all instincts (ECC parity: -0.02/week).
|
||||
* Only decays `pending` and `active` instincts; terminal states
|
||||
* (stale/superseded/retired/archived/conflict-hold) do not decay.
|
||||
*/
|
||||
export async function decayInstinctConfidence(
|
||||
options?: InstinctStoreOptions,
|
||||
): Promise<number> {
|
||||
const instincts = await loadInstincts(options)
|
||||
const now = Date.now()
|
||||
let decayed = 0
|
||||
|
||||
for (const instinct of instincts) {
|
||||
if (instinct.status !== 'pending' && instinct.status !== 'active') continue
|
||||
const updatedAtMs = Date.parse(instinct.updatedAt)
|
||||
if (Number.isNaN(updatedAtMs)) continue
|
||||
const weeksElapsed = Math.floor((now - updatedAtMs) / MS_PER_WEEK)
|
||||
if (weeksElapsed < 1) continue
|
||||
|
||||
const delta = -DECAY_PER_WEEK * weeksElapsed
|
||||
const nextConfidence = clampConfidence(instinct.confidence + delta)
|
||||
if (nextConfidence === instinct.confidence) continue
|
||||
|
||||
// Bump updatedAt so subsequent maintenance runs don't re-apply the same
|
||||
// elapsed-week delta.
|
||||
await saveInstinct(
|
||||
normalizeInstinct({
|
||||
...instinct,
|
||||
confidence: nextConfidence,
|
||||
updatedAt: new Date(now).toISOString(),
|
||||
}),
|
||||
options,
|
||||
)
|
||||
decayed += 1
|
||||
}
|
||||
|
||||
return decayed
|
||||
}
|
||||
|
||||
function outcomeConfidenceDelta(
|
||||
outcome: StoredInstinct['evidenceOutcome'],
|
||||
): number {
|
||||
if (outcome === 'failure') return -0.05
|
||||
return 0.05
|
||||
}
|
||||
|
||||
export async function updateConfidence(
|
||||
instinctId: string,
|
||||
delta: number,
|
||||
options?: InstinctStoreOptions,
|
||||
): Promise<StoredInstinct | null> {
|
||||
const instincts = await loadInstincts(options)
|
||||
const target = instincts.find(instinct => instinct.id === instinctId)
|
||||
if (!target) return null
|
||||
|
||||
const updated = normalizeInstinct({
|
||||
...target,
|
||||
confidence: clampConfidence(target.confidence + delta),
|
||||
updatedAt: new Date().toISOString(),
|
||||
})
|
||||
return saveInstinct(updated, options)
|
||||
}
|
||||
|
||||
export async function exportInstincts(
|
||||
outputPath: string,
|
||||
options?: InstinctStoreOptions,
|
||||
): Promise<StoredInstinct[]> {
|
||||
const instincts = await loadInstincts(options)
|
||||
await mkdir(dirname(outputPath), { recursive: true })
|
||||
await writeFile(outputPath, `${JSON.stringify(instincts, null, 2)}\n`)
|
||||
return instincts
|
||||
}
|
||||
|
||||
export async function importInstincts(
|
||||
inputPath: string,
|
||||
options?: InstinctStoreOptions,
|
||||
): Promise<StoredInstinct[]> {
|
||||
const parsed = JSON.parse(
|
||||
await readFile(inputPath, 'utf8'),
|
||||
) as StoredInstinct[]
|
||||
const saved: StoredInstinct[] = []
|
||||
for (const instinct of parsed) {
|
||||
saved.push(await upsertInstinct(normalizeInstinct(instinct), options))
|
||||
}
|
||||
return saved
|
||||
}
|
||||
|
||||
export async function prunePendingInstincts(
|
||||
maxAgeDays: number,
|
||||
options?: InstinctStoreOptions,
|
||||
): Promise<StoredInstinct[]> {
|
||||
const instincts = await loadInstincts(options)
|
||||
const cutoff = Date.now() - maxAgeDays * 24 * 60 * 60 * 1000
|
||||
const pruned: StoredInstinct[] = []
|
||||
|
||||
for (const instinct of instincts) {
|
||||
if (
|
||||
instinct.status === 'pending' &&
|
||||
Date.parse(instinct.updatedAt) < cutoff
|
||||
) {
|
||||
await unlink(instinctPath(instinct.id, options))
|
||||
pruned.push(instinct)
|
||||
}
|
||||
}
|
||||
|
||||
return pruned
|
||||
}
|
||||
|
||||
function instinctPath(id: string, options?: InstinctStoreOptions): string {
|
||||
return join(getInstinctsDir(options), `${id}.json`)
|
||||
}
|
||||
106
src/services/skillLearning/learningPolicy.ts
Normal file
106
src/services/skillLearning/learningPolicy.ts
Normal file
@@ -0,0 +1,106 @@
|
||||
import { getSkillLearningConfig } from './config.js'
|
||||
import type { Instinct } from './instinctParser.js'
|
||||
import type { InstinctDomain, SkillLearningScope } from './types.js'
|
||||
|
||||
export const MIN_CONFIDENCE_TO_GENERATE_SKILL = 0.75
|
||||
export const MAX_SKILL_NAME_LENGTH = 64
|
||||
|
||||
const DOMAIN_PREFIXES: Record<InstinctDomain, string> = {
|
||||
workflow: 'workflow',
|
||||
testing: 'testing',
|
||||
debugging: 'debugging',
|
||||
'code-style': 'style',
|
||||
security: 'security',
|
||||
git: 'git',
|
||||
project: 'project',
|
||||
}
|
||||
|
||||
const GENERIC_NAMES = new Set([
|
||||
'learned-skill',
|
||||
'better-skill',
|
||||
'new-skill',
|
||||
'project-skill',
|
||||
'workflow-skill',
|
||||
])
|
||||
|
||||
export function shouldGenerateSkillFromInstincts(
|
||||
instincts: readonly Instinct[],
|
||||
): boolean {
|
||||
if (instincts.length === 0) return false
|
||||
const avg =
|
||||
instincts.reduce((sum, instinct) => sum + instinct.confidence, 0) /
|
||||
instincts.length
|
||||
return avg >= getSkillLearningConfig().minConfidence
|
||||
}
|
||||
|
||||
export function buildLearnedSkillName(instincts: readonly Instinct[]): string {
|
||||
const domain = instincts[0]?.domain ?? 'project'
|
||||
const prefix = DOMAIN_PREFIXES[domain]
|
||||
const words = new Set<string>()
|
||||
for (const instinct of instincts) {
|
||||
for (const word of `${instinct.trigger} ${instinct.action}`
|
||||
.toLowerCase()
|
||||
.split(/[^a-z0-9]+/)) {
|
||||
if (isUsefulNameWord(word)) words.add(word)
|
||||
if (words.size >= 5) break
|
||||
}
|
||||
if (words.size >= 5) break
|
||||
}
|
||||
|
||||
const name = normalizeSkillName([prefix, ...words].join('-'))
|
||||
return isGenericSkillName(name) ? `${prefix}-learned-pattern` : name
|
||||
}
|
||||
|
||||
export function normalizeSkillName(value: string): string {
|
||||
const normalized = value
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9]+/g, '-')
|
||||
.replace(/^-|-$/g, '')
|
||||
.slice(0, MAX_SKILL_NAME_LENGTH)
|
||||
.replace(/-$/g, '')
|
||||
return normalized || 'learned-skill'
|
||||
}
|
||||
|
||||
export function isValidLearnedSkillName(value: string): boolean {
|
||||
return (
|
||||
value === normalizeSkillName(value) &&
|
||||
value.length > 0 &&
|
||||
value.length <= MAX_SKILL_NAME_LENGTH &&
|
||||
!isGenericSkillName(value)
|
||||
)
|
||||
}
|
||||
|
||||
export function isGenericSkillName(value: string): boolean {
|
||||
return GENERIC_NAMES.has(value)
|
||||
}
|
||||
|
||||
export function decideDefaultScope(
|
||||
instincts: readonly Instinct[],
|
||||
): SkillLearningScope {
|
||||
if (instincts.length === 0) return 'project'
|
||||
const globalFriendly = instincts.every(instinct =>
|
||||
['security', 'git', 'workflow'].includes(instinct.domain),
|
||||
)
|
||||
return globalFriendly && instincts.length >= 2 ? 'global' : 'project'
|
||||
}
|
||||
|
||||
function isUsefulNameWord(word: string): boolean {
|
||||
return (
|
||||
word.length > 2 &&
|
||||
![
|
||||
'when',
|
||||
'with',
|
||||
'this',
|
||||
'that',
|
||||
'user',
|
||||
'project',
|
||||
'prefer',
|
||||
'avoid',
|
||||
'use',
|
||||
'using',
|
||||
'the',
|
||||
'and',
|
||||
'for',
|
||||
].includes(word)
|
||||
)
|
||||
}
|
||||
301
src/services/skillLearning/llmObserverBackend.ts
Normal file
301
src/services/skillLearning/llmObserverBackend.ts
Normal file
@@ -0,0 +1,301 @@
|
||||
import { queryHaiku } from '../api/claude.js'
|
||||
import { asSystemPrompt } from '../../utils/systemPromptType.js'
|
||||
import { getSkillLearningConfig } from './config.js'
|
||||
import type { InstinctCandidate } from './instinctParser.js'
|
||||
import type { StoredSkillObservation } from './observationStore.js'
|
||||
import type {
|
||||
ObserverBackend,
|
||||
ObserverBackendContext,
|
||||
} from './observerBackend.js'
|
||||
import {
|
||||
INSTINCT_DOMAINS,
|
||||
type InstinctDomain,
|
||||
type SkillLearningScope,
|
||||
} from './types.js'
|
||||
|
||||
/**
|
||||
* LLM-based observer backend.
|
||||
*
|
||||
* Runs the small fast model (Haiku) through the project's `queryHaiku`
|
||||
* helper, feeds it a compact summary of recent observations, and asks for
|
||||
* up to three atomic reusable instincts in JSON. Output is validated and
|
||||
* mapped to `InstinctCandidate[]` so the existing evolution pipeline
|
||||
* consumes LLM output the same way it consumes heuristic output.
|
||||
*
|
||||
* Design notes:
|
||||
* - Reuses `queryHaiku` (goes through the full Claude Code API stack:
|
||||
* OAuth, beta headers, providers, VCR in tests). No new auth code.
|
||||
* - Caps input to the tail of the observation buffer so the prompt stays
|
||||
* small and predictable, and runs under a 10-second abort signal so a
|
||||
* slow Haiku round-trip never blocks the REPL turn end.
|
||||
* - On ANY failure (abort, parse error, empty output) returns `[]` —
|
||||
* the backend is opt-in via `SKILL_LEARNING_OBSERVER_BACKEND=llm` and
|
||||
* must never destabilise skill-learning when the API is unavailable.
|
||||
*/
|
||||
|
||||
const MAX_OBSERVATIONS_PER_CALL = 30
|
||||
const MAX_CANDIDATES_PER_CALL = 3
|
||||
|
||||
// --- Circuit breaker state ---
|
||||
let consecutiveFailures = 0
|
||||
let circuitOpenUntil = 0
|
||||
|
||||
export function resetCircuitBreaker(): void {
|
||||
consecutiveFailures = 0
|
||||
circuitOpenUntil = 0
|
||||
}
|
||||
|
||||
const LLM_OBSERVER_SYSTEM_PROMPT = `You analyse a short sequence of observations from a coding-assistant session (user messages, tool invocations with outcomes, assistant messages) and extract atomic, reusable "instincts" — behavioural patterns that would help the assistant act correctly in future similar situations.
|
||||
|
||||
Respond with ONLY a JSON array (no prose, no code fences, no commentary). Each item must match this schema:
|
||||
|
||||
{
|
||||
"trigger": string, // <= 80 chars, short phrase describing WHEN the instinct applies
|
||||
"action": string, // <= 120 chars, short phrase describing WHAT to do
|
||||
"confidence": number, // 0..1 — how strongly these observations support the pattern
|
||||
"domain": "workflow"|"testing"|"debugging"|"code-style"|"security"|"git"|"project",
|
||||
"scope": "project"|"global",
|
||||
"evidence": string[] // 1..3 short excerpts copied/paraphrased from the observations
|
||||
}
|
||||
|
||||
Rules:
|
||||
- Return [] if nothing clearly reusable. No guessing.
|
||||
- At most 3 items, highest confidence first.
|
||||
- confidence > 0.7 only when observations show the pattern in action (a correction followed by a successful retry, a repeated sequence, an explicit rule).
|
||||
- Never include secrets, tokens, full file contents, or personally-identifying data.
|
||||
- Scope "global" only when the pattern is obviously project-agnostic (generic testing, git hygiene); default to "project".`
|
||||
|
||||
export const llmObserverBackend: ObserverBackend = {
|
||||
name: 'llm',
|
||||
analyze(
|
||||
observations: StoredSkillObservation[],
|
||||
ctx?: ObserverBackendContext,
|
||||
): Promise<InstinctCandidate[]> {
|
||||
return analyseWithHaiku(observations, ctx)
|
||||
},
|
||||
}
|
||||
|
||||
async function analyseWithHaiku(
|
||||
observations: StoredSkillObservation[],
|
||||
ctx?: ObserverBackendContext,
|
||||
): Promise<InstinctCandidate[]> {
|
||||
if (observations.length === 0) return []
|
||||
|
||||
// Circuit breaker: if the circuit is open, skip queryHaiku entirely.
|
||||
if (Date.now() < circuitOpenUntil) {
|
||||
return runHeuristicFallback(observations, ctx)
|
||||
}
|
||||
|
||||
const capped = observations.slice(-MAX_OBSERVATIONS_PER_CALL)
|
||||
const userPrompt = buildUserPrompt(capped)
|
||||
const signal = makeTimeoutSignal(getSkillLearningConfig().llm.timeoutMs)
|
||||
|
||||
let responseText: string
|
||||
try {
|
||||
const response = await queryHaiku({
|
||||
systemPrompt: asSystemPrompt([LLM_OBSERVER_SYSTEM_PROMPT]),
|
||||
userPrompt,
|
||||
signal,
|
||||
options: {
|
||||
querySource: 'skill_learning_observer',
|
||||
enablePromptCaching: true,
|
||||
agents: [],
|
||||
isNonInteractiveSession: true,
|
||||
hasAppendSystemPrompt: false,
|
||||
mcpTools: [],
|
||||
},
|
||||
})
|
||||
// Success: reset failure counter.
|
||||
consecutiveFailures = 0
|
||||
responseText = extractResponseText(response.message?.content)
|
||||
} catch {
|
||||
// Haiku failure (timeout / rate limit / bad response) — increment failure
|
||||
// counter and potentially open the circuit breaker.
|
||||
consecutiveFailures++
|
||||
if (consecutiveFailures >= getSkillLearningConfig().llm.failureThreshold) {
|
||||
circuitOpenUntil =
|
||||
Date.now() + getSkillLearningConfig().llm.circuitCooldownMs
|
||||
}
|
||||
return runHeuristicFallback(observations, ctx)
|
||||
}
|
||||
|
||||
const parsed = parseInstinctCandidates(responseText, ctx, capped)
|
||||
if (parsed.length === 0) {
|
||||
// Empty / malformed LLM output — count as a failure so the circuit
|
||||
// breaker opens if Haiku is systematically returning garbage (e.g. the
|
||||
// model version drifted and no longer emits the expected JSON).
|
||||
consecutiveFailures++
|
||||
if (consecutiveFailures >= getSkillLearningConfig().llm.failureThreshold) {
|
||||
circuitOpenUntil =
|
||||
Date.now() + getSkillLearningConfig().llm.circuitCooldownMs
|
||||
}
|
||||
return runHeuristicFallback(observations, ctx)
|
||||
}
|
||||
return parsed
|
||||
}
|
||||
|
||||
async function runHeuristicFallback(
|
||||
observations: StoredSkillObservation[],
|
||||
ctx?: ObserverBackendContext,
|
||||
): Promise<InstinctCandidate[]> {
|
||||
try {
|
||||
const { heuristicObserverBackend } = await import('./sessionObserver.js')
|
||||
const result = heuristicObserverBackend.analyze(observations, ctx)
|
||||
return Array.isArray(result) ? result : await result
|
||||
} catch {
|
||||
return []
|
||||
}
|
||||
}
|
||||
|
||||
function buildUserPrompt(observations: StoredSkillObservation[]): string {
|
||||
const rendered = observations
|
||||
.map((observation, index) => renderObservation(observation, index))
|
||||
.join('\n')
|
||||
return `Observations (chronological, newest last):\n${rendered}\n\nExtract up to ${MAX_CANDIDATES_PER_CALL} atomic instincts. JSON array only.`
|
||||
}
|
||||
|
||||
function renderObservation(
|
||||
observation: StoredSkillObservation,
|
||||
index: number,
|
||||
): string {
|
||||
const segments: string[] = [`#${index + 1}`, `event=${observation.event}`]
|
||||
if (observation.toolName) segments.push(`tool=${observation.toolName}`)
|
||||
if (observation.outcome) segments.push(`outcome=${observation.outcome}`)
|
||||
if (observation.messageText) {
|
||||
segments.push(
|
||||
`text=${JSON.stringify(truncate(observation.messageText, 200))}`,
|
||||
)
|
||||
}
|
||||
if (observation.toolInput) {
|
||||
segments.push(`in=${JSON.stringify(truncate(observation.toolInput, 120))}`)
|
||||
}
|
||||
if (observation.toolOutput) {
|
||||
segments.push(
|
||||
`out=${JSON.stringify(truncate(observation.toolOutput, 120))}`,
|
||||
)
|
||||
}
|
||||
return segments.join(' | ')
|
||||
}
|
||||
|
||||
function truncate(value: string, max: number): string {
|
||||
if (value.length <= max) return value
|
||||
return `${value.slice(0, max)}…`
|
||||
}
|
||||
|
||||
function extractResponseText(content: unknown): string {
|
||||
if (!Array.isArray(content)) return ''
|
||||
const parts: string[] = []
|
||||
for (const block of content) {
|
||||
if (!block || typeof block !== 'object') continue
|
||||
const record = block as Record<string, unknown>
|
||||
if (record.type !== 'text') continue
|
||||
if (typeof record.text === 'string') parts.push(record.text)
|
||||
}
|
||||
return parts.join('').trim()
|
||||
}
|
||||
|
||||
function parseInstinctCandidates(
|
||||
raw: string,
|
||||
ctx: ObserverBackendContext | undefined,
|
||||
observations: StoredSkillObservation[],
|
||||
): InstinctCandidate[] {
|
||||
const json = extractJsonArray(raw)
|
||||
if (!json) return []
|
||||
|
||||
let parsed: unknown
|
||||
try {
|
||||
parsed = JSON.parse(json)
|
||||
} catch {
|
||||
return []
|
||||
}
|
||||
if (!Array.isArray(parsed)) return []
|
||||
|
||||
const observationIds = observations.map(observation => observation.id)
|
||||
const candidates: InstinctCandidate[] = []
|
||||
|
||||
for (const item of parsed.slice(0, MAX_CANDIDATES_PER_CALL)) {
|
||||
const candidate = normaliseCandidate(item, ctx, observationIds)
|
||||
if (candidate) candidates.push(candidate)
|
||||
}
|
||||
|
||||
return candidates
|
||||
}
|
||||
|
||||
function extractJsonArray(raw: string): string | undefined {
|
||||
if (!raw) return undefined
|
||||
const start = raw.indexOf('[')
|
||||
const end = raw.lastIndexOf(']')
|
||||
if (start < 0 || end <= start) return undefined
|
||||
return raw.slice(start, end + 1)
|
||||
}
|
||||
|
||||
function normaliseCandidate(
|
||||
item: unknown,
|
||||
ctx: ObserverBackendContext | undefined,
|
||||
observationIds: string[],
|
||||
): InstinctCandidate | undefined {
|
||||
if (!item || typeof item !== 'object') return undefined
|
||||
const record = item as Record<string, unknown>
|
||||
|
||||
const trigger = stringField(record.trigger, 80)
|
||||
const action = stringField(record.action, 120)
|
||||
if (!trigger || !action) return undefined
|
||||
|
||||
const evidence = evidenceField(record.evidence)
|
||||
if (evidence.length === 0) return undefined
|
||||
|
||||
return {
|
||||
trigger,
|
||||
action,
|
||||
confidence: clampUnitInterval(record.confidence),
|
||||
domain: domainField(record.domain),
|
||||
source: 'session-observation',
|
||||
scope: scopeField(record.scope),
|
||||
projectId: ctx?.project?.projectId,
|
||||
projectName: ctx?.project?.projectName,
|
||||
evidence,
|
||||
observationIds,
|
||||
}
|
||||
}
|
||||
|
||||
function stringField(value: unknown, maxLength: number): string | undefined {
|
||||
if (typeof value !== 'string') return undefined
|
||||
const trimmed = value.trim()
|
||||
if (!trimmed) return undefined
|
||||
return trimmed.length > maxLength ? trimmed.slice(0, maxLength) : trimmed
|
||||
}
|
||||
|
||||
function clampUnitInterval(value: unknown): number {
|
||||
if (typeof value !== 'number' || !Number.isFinite(value)) return 0.5
|
||||
if (value < 0) return 0
|
||||
if (value > 1) return 1
|
||||
return value
|
||||
}
|
||||
|
||||
function domainField(value: unknown): InstinctDomain {
|
||||
if (typeof value !== 'string') return 'project'
|
||||
return (INSTINCT_DOMAINS as readonly string[]).includes(value)
|
||||
? (value as InstinctDomain)
|
||||
: 'project'
|
||||
}
|
||||
|
||||
function scopeField(value: unknown): SkillLearningScope {
|
||||
return value === 'global' ? 'global' : 'project'
|
||||
}
|
||||
|
||||
function evidenceField(value: unknown): string[] {
|
||||
if (!Array.isArray(value)) return []
|
||||
const entries: string[] = []
|
||||
for (const entry of value) {
|
||||
if (typeof entry !== 'string') continue
|
||||
const trimmed = entry.trim()
|
||||
if (!trimmed) continue
|
||||
entries.push(trimmed.length > 200 ? `${trimmed.slice(0, 200)}…` : trimmed)
|
||||
if (entries.length === 3) break
|
||||
}
|
||||
return entries
|
||||
}
|
||||
|
||||
function makeTimeoutSignal(ms: number): AbortSignal {
|
||||
return AbortSignal.timeout(ms)
|
||||
}
|
||||
451
src/services/skillLearning/observationStore.ts
Normal file
451
src/services/skillLearning/observationStore.ts
Normal file
@@ -0,0 +1,451 @@
|
||||
import { mkdir, readFile, rename, stat, writeFile } from 'node:fs/promises'
|
||||
import { dirname, join } from 'node:path'
|
||||
import { createHash, randomUUID } from 'node:crypto'
|
||||
import type {
|
||||
SkillLearningProjectContext as BaseSkillLearningProjectContext,
|
||||
SkillLearningScope,
|
||||
SkillObservation as BaseSkillObservation,
|
||||
SkillObservationEvent,
|
||||
SkillObservationOutcome,
|
||||
} from './types.js'
|
||||
|
||||
export type { SkillLearningScope, SkillObservation } from './types.js'
|
||||
|
||||
export type SkillLearningProjectContext = Pick<
|
||||
BaseSkillLearningProjectContext,
|
||||
'projectId' | 'projectName' | 'cwd'
|
||||
> &
|
||||
Partial<
|
||||
Omit<BaseSkillLearningProjectContext, 'projectId' | 'projectName' | 'cwd'>
|
||||
>
|
||||
|
||||
export type ObservationEvent = Exclude<SkillObservationEvent, 'tool_error'>
|
||||
|
||||
export type ObservationOutcome = SkillObservationOutcome | 'interrupted'
|
||||
|
||||
export type StoredSkillObservation = Omit<
|
||||
BaseSkillObservation,
|
||||
'event' | 'outcome' | 'toolInput' | 'toolOutput'
|
||||
> & {
|
||||
event: ObservationEvent
|
||||
outcome?: ObservationOutcome
|
||||
toolInput?: string
|
||||
toolOutput?: string
|
||||
toolName?: string
|
||||
messageText?: string
|
||||
source?: 'transcript' | 'hook' | 'tool-hook' | 'imported'
|
||||
contentHash?: string
|
||||
// Turn index at which the observation was captured. Used by
|
||||
// runtimeObserver to scope tool-hook observations to the current REPL
|
||||
// turn for scoping tool-hook records to the current REPL turn.
|
||||
turn?: number
|
||||
}
|
||||
|
||||
export type ObservationStoreOptions = {
|
||||
rootDir?: string
|
||||
project?: SkillLearningProjectContext
|
||||
maxFieldLength?: number
|
||||
archiveThresholdBytes?: number
|
||||
}
|
||||
|
||||
type ClaudeTranscriptEntry = {
|
||||
sessionId?: string
|
||||
cwd?: string
|
||||
timestamp?: string
|
||||
type?: string
|
||||
message?: {
|
||||
role?: string
|
||||
content?: unknown
|
||||
}
|
||||
tool_name?: string
|
||||
tool_input?: unknown
|
||||
tool_response?: unknown
|
||||
}
|
||||
|
||||
const DEFAULT_MAX_FIELD_LENGTH = 5_000
|
||||
const DEFAULT_ARCHIVE_THRESHOLD_BYTES = 1_000_000
|
||||
const DEFAULT_PURGE_MAX_AGE_DAYS = 30
|
||||
const SECRET_REPLACEMENT = '[REDACTED]'
|
||||
|
||||
const SECRET_PATTERNS: RegExp[] = [
|
||||
/\b(?:sk|sk-ant|sk-proj|xox[baprs])-[A-Za-z0-9_-]{12,}\b/g,
|
||||
/\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g,
|
||||
/\b(?:api[_-]?key|token|secret|password|authorization)\b\s*[:=]\s*["']?[^"',\s}]+/gi,
|
||||
/\bBearer\s+[A-Za-z0-9._~+/=-]{12,}\b/gi,
|
||||
]
|
||||
|
||||
export function getSkillLearningRoot(
|
||||
options?: ObservationStoreOptions,
|
||||
): string {
|
||||
if (options?.rootDir) return options.rootDir
|
||||
if (process.env.CLAUDE_SKILL_LEARNING_HOME) {
|
||||
return process.env.CLAUDE_SKILL_LEARNING_HOME
|
||||
}
|
||||
return join(process.env.HOME ?? process.cwd(), '.claude', 'skill-learning')
|
||||
}
|
||||
|
||||
export function getObservationFilePath(
|
||||
options?: ObservationStoreOptions,
|
||||
): string {
|
||||
const root = getSkillLearningRoot(options)
|
||||
const project = options?.project
|
||||
if (
|
||||
!project ||
|
||||
project.scope === 'global' ||
|
||||
project.projectId === 'global'
|
||||
) {
|
||||
return join(root, 'global', 'observations.jsonl')
|
||||
}
|
||||
return join(root, 'projects', project.projectId, 'observations.jsonl')
|
||||
}
|
||||
|
||||
export function scrubText(
|
||||
value: string | undefined,
|
||||
maxLength = DEFAULT_MAX_FIELD_LENGTH,
|
||||
): string | undefined {
|
||||
if (value === undefined) return undefined
|
||||
|
||||
let scrubbed = value
|
||||
for (const pattern of SECRET_PATTERNS) {
|
||||
scrubbed = scrubbed.replace(pattern, match => {
|
||||
const key = match.split(/[:=]/, 1)[0]
|
||||
return /[:=]/.test(match)
|
||||
? `${key}: ${SECRET_REPLACEMENT}`
|
||||
: SECRET_REPLACEMENT
|
||||
})
|
||||
}
|
||||
|
||||
if (scrubbed.length <= maxLength) return scrubbed
|
||||
|
||||
const hash = hashText(scrubbed)
|
||||
let preview = scrubbed.slice(0, maxLength)
|
||||
if (
|
||||
scrubbed.includes(SECRET_REPLACEMENT) &&
|
||||
!preview.includes(SECRET_REPLACEMENT)
|
||||
) {
|
||||
preview = `${SECRET_REPLACEMENT} ${preview}`
|
||||
}
|
||||
return `${preview}\n[TRUNCATED length=${scrubbed.length} sha256=${hash}]`
|
||||
}
|
||||
|
||||
export function scrubObservation(
|
||||
observation: StoredSkillObservation,
|
||||
options?: ObservationStoreOptions,
|
||||
): StoredSkillObservation {
|
||||
const maxLength = options?.maxFieldLength ?? DEFAULT_MAX_FIELD_LENGTH
|
||||
const scrubbed: StoredSkillObservation = {
|
||||
...observation,
|
||||
toolInput: scrubText(observation.toolInput, maxLength),
|
||||
toolOutput: scrubText(observation.toolOutput, maxLength),
|
||||
messageText: scrubText(observation.messageText, maxLength),
|
||||
}
|
||||
|
||||
const hashSource = [
|
||||
scrubbed.event,
|
||||
scrubbed.toolName ?? '',
|
||||
scrubbed.toolInput ?? '',
|
||||
scrubbed.toolOutput ?? '',
|
||||
scrubbed.messageText ?? '',
|
||||
].join('\n')
|
||||
|
||||
return {
|
||||
...scrubbed,
|
||||
contentHash: hashText(hashSource),
|
||||
}
|
||||
}
|
||||
|
||||
const MAX_SINGLE_OBSERVATION_BYTES = 64 * 1024
|
||||
|
||||
export async function appendObservation(
|
||||
observation: StoredSkillObservation,
|
||||
options?: ObservationStoreOptions,
|
||||
): Promise<StoredSkillObservation> {
|
||||
const filePath = getObservationFilePath(options)
|
||||
await mkdir(dirname(filePath), { recursive: true })
|
||||
await archiveLargeObservationFile(options)
|
||||
|
||||
const scrubbed = scrubObservation(observation, options)
|
||||
const serialized = JSON.stringify(scrubbed)
|
||||
if (Buffer.byteLength(serialized) > MAX_SINGLE_OBSERVATION_BYTES) {
|
||||
return scrubbed
|
||||
}
|
||||
await writeFile(filePath, `${serialized}\n`, {
|
||||
flag: 'a',
|
||||
})
|
||||
return scrubbed
|
||||
}
|
||||
|
||||
export async function readObservations(
|
||||
options?: ObservationStoreOptions,
|
||||
): Promise<StoredSkillObservation[]> {
|
||||
const filePath = getObservationFilePath(options)
|
||||
let content = ''
|
||||
try {
|
||||
content = await readFile(filePath, 'utf8')
|
||||
} catch (error) {
|
||||
if ((error as NodeJS.ErrnoException).code === 'ENOENT') return []
|
||||
throw error
|
||||
}
|
||||
|
||||
const observations: StoredSkillObservation[] = []
|
||||
for (const line of content.split(/\r?\n/)) {
|
||||
if (!line.trim()) continue
|
||||
try {
|
||||
observations.push(JSON.parse(line) as StoredSkillObservation)
|
||||
} catch {
|
||||
// Skip corrupt/truncated JSONL lines (e.g. from concurrent append
|
||||
// interleaved with a crash). One bad line must not break the whole read.
|
||||
}
|
||||
}
|
||||
return observations
|
||||
}
|
||||
|
||||
export async function ingestTranscript(
|
||||
transcriptPath: string,
|
||||
options?: ObservationStoreOptions,
|
||||
): Promise<StoredSkillObservation[]> {
|
||||
const transcript = await readFile(transcriptPath, 'utf8')
|
||||
const observations: StoredSkillObservation[] = []
|
||||
|
||||
for (const line of transcript.split(/\r?\n/)) {
|
||||
if (!line.trim()) continue
|
||||
|
||||
const entry = JSON.parse(line) as ClaudeTranscriptEntry
|
||||
for (const observation of observationsFromTranscriptEntry(entry, options)) {
|
||||
observations.push(await appendObservation(observation, options))
|
||||
}
|
||||
}
|
||||
|
||||
return observations
|
||||
}
|
||||
|
||||
export async function purgeOldObservations(
|
||||
options?: ObservationStoreOptions & { maxAgeDays?: number },
|
||||
): Promise<number> {
|
||||
const filePath = getObservationFilePath(options)
|
||||
const maxAgeDays = options?.maxAgeDays ?? DEFAULT_PURGE_MAX_AGE_DAYS
|
||||
const cutoff = Date.now() - maxAgeDays * 24 * 60 * 60 * 1000
|
||||
|
||||
let content = ''
|
||||
try {
|
||||
content = await readFile(filePath, 'utf8')
|
||||
} catch (error) {
|
||||
if ((error as NodeJS.ErrnoException).code === 'ENOENT') return 0
|
||||
throw error
|
||||
}
|
||||
|
||||
const kept: string[] = []
|
||||
let purged = 0
|
||||
for (const line of content.split(/\r?\n/)) {
|
||||
if (!line.trim()) continue
|
||||
try {
|
||||
const obs = JSON.parse(line) as StoredSkillObservation
|
||||
const ts = Date.parse(obs.timestamp)
|
||||
if (!Number.isNaN(ts) && ts < cutoff) {
|
||||
purged += 1
|
||||
continue
|
||||
}
|
||||
kept.push(line)
|
||||
} catch {
|
||||
kept.push(line)
|
||||
}
|
||||
}
|
||||
|
||||
if (purged === 0) return 0
|
||||
// Atomic write: temp + rename. Direct writeFile leaves a truncated/empty
|
||||
// file if the process crashes mid-write, losing retained observations.
|
||||
const tmpPath = `${filePath}.tmp-${process.pid}-${Date.now()}`
|
||||
await writeFile(tmpPath, kept.length ? `${kept.join('\n')}\n` : '')
|
||||
await rename(tmpPath, filePath)
|
||||
return purged
|
||||
}
|
||||
|
||||
export async function archiveLargeObservationFile(
|
||||
options?: ObservationStoreOptions,
|
||||
): Promise<string | null> {
|
||||
const filePath = getObservationFilePath(options)
|
||||
const threshold =
|
||||
options?.archiveThresholdBytes ?? DEFAULT_ARCHIVE_THRESHOLD_BYTES
|
||||
|
||||
let currentStat
|
||||
try {
|
||||
currentStat = await stat(filePath)
|
||||
} catch (error) {
|
||||
if ((error as NodeJS.ErrnoException).code === 'ENOENT') return null
|
||||
throw error
|
||||
}
|
||||
|
||||
if (currentStat.size < threshold) return null
|
||||
|
||||
const archiveDir = join(dirname(filePath), 'observations.archive')
|
||||
await mkdir(archiveDir, { recursive: true })
|
||||
const archivePath = join(
|
||||
archiveDir,
|
||||
`observations-${new Date().toISOString().replace(/[:.]/g, '-')}.jsonl`,
|
||||
)
|
||||
await rename(filePath, archivePath)
|
||||
return archivePath
|
||||
}
|
||||
|
||||
function observationsFromTranscriptEntry(
|
||||
entry: ClaudeTranscriptEntry,
|
||||
options?: ObservationStoreOptions,
|
||||
): StoredSkillObservation[] {
|
||||
const project = options?.project
|
||||
const base = {
|
||||
sessionId: entry.sessionId ?? 'unknown-session',
|
||||
projectId: project?.projectId ?? 'global',
|
||||
projectName: project?.projectName ?? 'global',
|
||||
cwd: entry.cwd ?? project?.cwd ?? process.cwd(),
|
||||
timestamp: entry.timestamp ?? new Date().toISOString(),
|
||||
source: 'transcript' as const,
|
||||
}
|
||||
|
||||
const role = entry.message?.role ?? entry.type
|
||||
const content = entry.message?.content
|
||||
const observations: StoredSkillObservation[] = []
|
||||
|
||||
if (entry.tool_name) {
|
||||
observations.push({
|
||||
...base,
|
||||
id: createObservationId(),
|
||||
event: 'tool_complete',
|
||||
toolName: entry.tool_name,
|
||||
toolInput: stringifyField(entry.tool_input),
|
||||
toolOutput: stringifyField(entry.tool_response),
|
||||
outcome: inferOutcome(entry.tool_response),
|
||||
})
|
||||
}
|
||||
|
||||
if (role === 'user') {
|
||||
const toolResults = extractToolResults(content)
|
||||
if (toolResults.length > 0) {
|
||||
for (const result of toolResults) {
|
||||
observations.push({
|
||||
...base,
|
||||
id: createObservationId(),
|
||||
event: 'tool_complete',
|
||||
toolName: result.name,
|
||||
toolOutput: result.output,
|
||||
outcome: result.isError ? 'failure' : 'success',
|
||||
})
|
||||
}
|
||||
return observations
|
||||
}
|
||||
|
||||
observations.push({
|
||||
...base,
|
||||
id: createObservationId(),
|
||||
event: 'user_message',
|
||||
messageText: extractText(content),
|
||||
})
|
||||
return observations
|
||||
}
|
||||
|
||||
if (role === 'assistant') {
|
||||
const toolUses = extractToolUses(content)
|
||||
for (const toolUse of toolUses) {
|
||||
observations.push({
|
||||
...base,
|
||||
id: createObservationId(),
|
||||
event: 'tool_start',
|
||||
toolName: toolUse.name,
|
||||
toolInput: toolUse.input,
|
||||
})
|
||||
}
|
||||
|
||||
const text = extractText(content)
|
||||
if (text.trim()) {
|
||||
observations.push({
|
||||
...base,
|
||||
id: createObservationId(),
|
||||
event: 'assistant_message',
|
||||
messageText: text,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return observations
|
||||
}
|
||||
|
||||
function extractText(content: unknown): string {
|
||||
if (typeof content === 'string') return content
|
||||
if (!Array.isArray(content)) return stringifyField(content) ?? ''
|
||||
|
||||
return content
|
||||
.map(part => {
|
||||
if (typeof part === 'string') return part
|
||||
if (!part || typeof part !== 'object') return ''
|
||||
const record = part as Record<string, unknown>
|
||||
return typeof record.text === 'string' ? record.text : ''
|
||||
})
|
||||
.filter(Boolean)
|
||||
.join('\n')
|
||||
}
|
||||
|
||||
function extractToolUses(
|
||||
content: unknown,
|
||||
): Array<{ name: string; input: string | undefined }> {
|
||||
if (!Array.isArray(content)) return []
|
||||
return content.flatMap(part => {
|
||||
if (!part || typeof part !== 'object') return []
|
||||
const record = part as Record<string, unknown>
|
||||
if (record.type !== 'tool_use') return []
|
||||
return [
|
||||
{
|
||||
name: String(record.name ?? 'unknown_tool'),
|
||||
input: stringifyField(record.input),
|
||||
},
|
||||
]
|
||||
})
|
||||
}
|
||||
|
||||
function extractToolResults(
|
||||
content: unknown,
|
||||
): Array<{ name: string; output: string | undefined; isError: boolean }> {
|
||||
if (!Array.isArray(content)) return []
|
||||
return content.flatMap(part => {
|
||||
if (!part || typeof part !== 'object') return []
|
||||
const record = part as Record<string, unknown>
|
||||
if (record.type !== 'tool_result') return []
|
||||
return [
|
||||
{
|
||||
name: String(record.name ?? record.tool_name ?? 'unknown_tool'),
|
||||
output: stringifyField(record.content),
|
||||
isError: record.is_error === true,
|
||||
},
|
||||
]
|
||||
})
|
||||
}
|
||||
|
||||
function inferOutcome(value: unknown): ObservationOutcome {
|
||||
const text = stringifyField(value)?.toLowerCase() ?? ''
|
||||
if (text.includes('interrupted') || text.includes('aborted')) {
|
||||
return 'interrupted'
|
||||
}
|
||||
if (
|
||||
text.includes('error') ||
|
||||
text.includes('exception') ||
|
||||
text.includes('failed')
|
||||
) {
|
||||
return 'failure'
|
||||
}
|
||||
return 'success'
|
||||
}
|
||||
|
||||
export function stringifyField(value: unknown): string | undefined {
|
||||
if (value === undefined || value === null) return undefined
|
||||
if (typeof value === 'string') return value
|
||||
return JSON.stringify(value)
|
||||
}
|
||||
|
||||
function createObservationId(): string {
|
||||
if (typeof crypto !== 'undefined' && 'randomUUID' in crypto) {
|
||||
return crypto.randomUUID()
|
||||
}
|
||||
return randomUUID()
|
||||
}
|
||||
|
||||
function hashText(value: string): string {
|
||||
return createHash('sha256').update(value).digest('hex')
|
||||
}
|
||||
71
src/services/skillLearning/observerBackend.ts
Normal file
71
src/services/skillLearning/observerBackend.ts
Normal file
@@ -0,0 +1,71 @@
|
||||
import type { InstinctCandidate } from './instinctParser.js'
|
||||
import type { StoredSkillObservation } from './observationStore.js'
|
||||
import type { SkillLearningProjectContext } from './types.js'
|
||||
|
||||
export type ObserverBackendContext = {
|
||||
project?: SkillLearningProjectContext
|
||||
}
|
||||
|
||||
export type ObserverBackendResult =
|
||||
| InstinctCandidate[]
|
||||
| Promise<InstinctCandidate[]>
|
||||
|
||||
export interface ObserverBackend {
|
||||
readonly name: string
|
||||
analyze(
|
||||
observations: StoredSkillObservation[],
|
||||
ctx?: ObserverBackendContext,
|
||||
): ObserverBackendResult
|
||||
}
|
||||
|
||||
const registry = new Map<string, ObserverBackend>()
|
||||
let activeName: string | undefined
|
||||
|
||||
export function registerObserverBackend(backend: ObserverBackend): void {
|
||||
registry.set(backend.name, backend)
|
||||
if (!activeName) activeName = backend.name
|
||||
}
|
||||
|
||||
export function setActiveObserverBackend(name: string): void {
|
||||
if (!registry.has(name)) {
|
||||
throw new Error(`Observer backend "${name}" is not registered`)
|
||||
}
|
||||
activeName = name
|
||||
}
|
||||
|
||||
export function getActiveObserverBackend(): ObserverBackend {
|
||||
const backend = activeName ? registry.get(activeName) : undefined
|
||||
if (!backend) {
|
||||
throw new Error(
|
||||
'No observer backend is active — register one before analyzing observations',
|
||||
)
|
||||
}
|
||||
return backend
|
||||
}
|
||||
|
||||
export function listObserverBackends(): string[] {
|
||||
return Array.from(registry.keys())
|
||||
}
|
||||
|
||||
export function resetObserverBackendsForTest(): void {
|
||||
registry.clear()
|
||||
activeName = undefined
|
||||
}
|
||||
|
||||
export async function analyzeWithActiveBackend(
|
||||
observations: StoredSkillObservation[],
|
||||
ctx?: ObserverBackendContext,
|
||||
): Promise<InstinctCandidate[]> {
|
||||
return Promise.resolve(getActiveObserverBackend().analyze(observations, ctx))
|
||||
}
|
||||
|
||||
function pickBackendFromEnv(): string | undefined {
|
||||
const raw = process.env.SKILL_LEARNING_OBSERVER_BACKEND?.trim()
|
||||
return raw && registry.has(raw) ? raw : undefined
|
||||
}
|
||||
|
||||
export function resolveDefaultObserverBackend(): ObserverBackend {
|
||||
const preferred = pickBackendFromEnv()
|
||||
if (preferred) setActiveObserverBackend(preferred)
|
||||
return getActiveObserverBackend()
|
||||
}
|
||||
264
src/services/skillLearning/projectContext.ts
Normal file
264
src/services/skillLearning/projectContext.ts
Normal file
@@ -0,0 +1,264 @@
|
||||
import { execFileSync } from 'child_process'
|
||||
import { createHash } from 'crypto'
|
||||
import {
|
||||
existsSync,
|
||||
mkdirSync,
|
||||
readFileSync,
|
||||
realpathSync,
|
||||
writeFileSync,
|
||||
} from 'fs'
|
||||
import { basename, join, resolve } from 'path'
|
||||
import { getClaudeConfigHomeDir } from '../../utils/envUtils.js'
|
||||
import type {
|
||||
ProjectContextSource,
|
||||
SkillLearningProjectContext,
|
||||
SkillLearningProjectRecord,
|
||||
SkillLearningProjectsRegistry,
|
||||
SkillLearningScope,
|
||||
} from './types.js'
|
||||
|
||||
const REGISTRY_VERSION = 1
|
||||
const GLOBAL_PROJECT_ID = 'global'
|
||||
const GLOBAL_PROJECT_NAME = 'Global'
|
||||
|
||||
export function getSkillLearningRootDir(): string {
|
||||
return join(getClaudeConfigHomeDir(), 'skill-learning')
|
||||
}
|
||||
|
||||
export function getProjectsRegistryPath(): string {
|
||||
return join(getSkillLearningRootDir(), 'projects.json')
|
||||
}
|
||||
|
||||
export function getProjectStorageDir(projectId: string): string {
|
||||
if (projectId === GLOBAL_PROJECT_ID) {
|
||||
return join(getSkillLearningRootDir(), 'global')
|
||||
}
|
||||
return join(getSkillLearningRootDir(), 'projects', projectId)
|
||||
}
|
||||
|
||||
export function getProjectContextPath(projectId: string): string {
|
||||
return join(getProjectStorageDir(projectId), 'project.json')
|
||||
}
|
||||
|
||||
// Per-cwd in-memory cache. `resolveContext` does synchronous `git` forks and
|
||||
// `persistProjectContext` does registry/project.json writes on every call —
|
||||
// in the tool.call hot path (one wrapper invocation per tool) that cost would
|
||||
// accumulate into the hundreds-of-ms range per session. Cache keyed by the
|
||||
// exact cwd string so different worktrees still get independent entries.
|
||||
const contextCache = new Map<string, SkillLearningProjectContext>()
|
||||
const PERSIST_INTERVAL_MS = 5 * 60 * 1000
|
||||
let lastPersistAt = 0
|
||||
|
||||
export function resolveProjectContext(
|
||||
cwd = process.cwd(),
|
||||
): SkillLearningProjectContext {
|
||||
const cached = contextCache.get(cwd)
|
||||
if (cached) {
|
||||
// Still touch the registry so long-lived processes keep `lastSeenAt`
|
||||
// reasonably fresh, but throttle the write so it doesn't fire on every
|
||||
// tool call.
|
||||
const now = Date.now()
|
||||
if (now - lastPersistAt > PERSIST_INTERVAL_MS) {
|
||||
lastPersistAt = now
|
||||
persistProjectContext(cached)
|
||||
}
|
||||
return cached
|
||||
}
|
||||
const resolved = resolveContext(cwd)
|
||||
contextCache.set(cwd, resolved)
|
||||
persistProjectContext(resolved)
|
||||
lastPersistAt = Date.now()
|
||||
return resolved
|
||||
}
|
||||
|
||||
export function resetProjectContextCacheForTest(): void {
|
||||
contextCache.clear()
|
||||
lastPersistAt = 0
|
||||
}
|
||||
|
||||
export function listKnownProjects(): SkillLearningProjectRecord[] {
|
||||
const registry = readProjectsRegistry(getProjectsRegistryPath())
|
||||
return Object.values(registry.projects).sort((a, b) =>
|
||||
a.projectName.localeCompare(b.projectName),
|
||||
)
|
||||
}
|
||||
|
||||
function resolveContext(cwd: string): SkillLearningProjectContext {
|
||||
const envProjectDir = process.env.CLAUDE_PROJECT_DIR?.trim()
|
||||
if (envProjectDir) {
|
||||
const projectRoot = normalizePath(envProjectDir)
|
||||
return buildContext({
|
||||
source: 'claude_project_dir',
|
||||
scope: 'project',
|
||||
cwd,
|
||||
projectRoot,
|
||||
identity: `claude-project-dir:${projectRoot}`,
|
||||
projectName: basename(projectRoot) || 'project',
|
||||
})
|
||||
}
|
||||
|
||||
const gitRemote = git(['remote', 'get-url', 'origin'], cwd)
|
||||
if (gitRemote) {
|
||||
const projectRoot = git(['rev-parse', '--show-toplevel'], cwd)
|
||||
const normalizedRemote = normalizeGitRemote(gitRemote)
|
||||
return buildContext({
|
||||
source: 'git_remote',
|
||||
scope: 'project',
|
||||
cwd,
|
||||
projectRoot: projectRoot
|
||||
? normalizePath(projectRoot)
|
||||
: normalizePath(cwd),
|
||||
gitRemote: normalizedRemote,
|
||||
identity: `git-remote:${normalizedRemote}`,
|
||||
projectName: projectNameFromRemote(normalizedRemote),
|
||||
})
|
||||
}
|
||||
|
||||
const gitRoot = git(['rev-parse', '--show-toplevel'], cwd)
|
||||
if (gitRoot) {
|
||||
const projectRoot = normalizePath(gitRoot)
|
||||
return buildContext({
|
||||
source: 'git_root',
|
||||
scope: 'project',
|
||||
cwd,
|
||||
projectRoot,
|
||||
identity: `git-root:${projectRoot}`,
|
||||
projectName: basename(projectRoot) || 'project',
|
||||
})
|
||||
}
|
||||
|
||||
return buildContext({
|
||||
source: 'global',
|
||||
scope: 'global',
|
||||
cwd,
|
||||
projectRoot: undefined,
|
||||
identity: 'global',
|
||||
projectName: GLOBAL_PROJECT_NAME,
|
||||
})
|
||||
}
|
||||
|
||||
function buildContext(input: {
|
||||
source: ProjectContextSource
|
||||
scope: SkillLearningScope
|
||||
cwd: string
|
||||
projectRoot?: string
|
||||
gitRemote?: string
|
||||
identity: string
|
||||
projectName: string
|
||||
}): SkillLearningProjectContext {
|
||||
const projectId =
|
||||
input.scope === 'global'
|
||||
? GLOBAL_PROJECT_ID
|
||||
: stableProjectId(input.identity)
|
||||
return {
|
||||
projectId,
|
||||
projectName: input.projectName,
|
||||
scope: input.scope,
|
||||
source: input.source,
|
||||
cwd: normalizePath(input.cwd),
|
||||
projectRoot: input.projectRoot,
|
||||
gitRemote: input.gitRemote,
|
||||
storageDir: getProjectStorageDir(projectId),
|
||||
}
|
||||
}
|
||||
|
||||
function persistProjectContext(context: SkillLearningProjectContext): void {
|
||||
const now = new Date().toISOString()
|
||||
const registryPath = getProjectsRegistryPath()
|
||||
const registry = readProjectsRegistry(registryPath)
|
||||
const existing = registry.projects[context.projectId]
|
||||
const record: SkillLearningProjectRecord = {
|
||||
...context,
|
||||
firstSeenAt: existing?.firstSeenAt ?? now,
|
||||
lastSeenAt: now,
|
||||
}
|
||||
|
||||
registry.projects[context.projectId] = record
|
||||
registry.updatedAt = now
|
||||
|
||||
mkdirSync(context.storageDir, { recursive: true })
|
||||
mkdirSync(getSkillLearningRootDir(), { recursive: true })
|
||||
writeJson(registryPath, registry)
|
||||
writeJson(getProjectContextPath(context.projectId), record)
|
||||
}
|
||||
|
||||
function readProjectsRegistry(path: string): SkillLearningProjectsRegistry {
|
||||
if (!existsSync(path)) {
|
||||
return {
|
||||
version: REGISTRY_VERSION,
|
||||
updatedAt: new Date(0).toISOString(),
|
||||
projects: {},
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(
|
||||
readFileSync(path, 'utf8'),
|
||||
) as Partial<SkillLearningProjectsRegistry>
|
||||
if (
|
||||
parsed.version === REGISTRY_VERSION &&
|
||||
typeof parsed.projects === 'object' &&
|
||||
parsed.projects
|
||||
) {
|
||||
return {
|
||||
version: REGISTRY_VERSION,
|
||||
updatedAt:
|
||||
typeof parsed.updatedAt === 'string'
|
||||
? parsed.updatedAt
|
||||
: new Date(0).toISOString(),
|
||||
projects: parsed.projects as Record<string, SkillLearningProjectRecord>,
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Fall through to a fresh registry. Corrupt state should not block startup.
|
||||
}
|
||||
|
||||
return {
|
||||
version: REGISTRY_VERSION,
|
||||
updatedAt: new Date(0).toISOString(),
|
||||
projects: {},
|
||||
}
|
||||
}
|
||||
|
||||
function writeJson(path: string, value: unknown): void {
|
||||
writeFileSync(path, `${JSON.stringify(value, null, 2)}\n`, 'utf8')
|
||||
}
|
||||
|
||||
function git(args: string[], cwd: string): string | null {
|
||||
try {
|
||||
const output = execFileSync('git', ['-C', cwd, ...args], {
|
||||
encoding: 'utf8',
|
||||
stdio: ['ignore', 'pipe', 'ignore'],
|
||||
})
|
||||
const trimmed = output.trim()
|
||||
return trimmed ? trimmed : null
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
function normalizePath(path: string): string {
|
||||
const resolved = resolve(path)
|
||||
try {
|
||||
return realpathSync.native(resolved).normalize('NFC')
|
||||
} catch {
|
||||
return resolved.normalize('NFC')
|
||||
}
|
||||
}
|
||||
|
||||
function normalizeGitRemote(remote: string): string {
|
||||
let normalized = remote.trim().replace(/\\/g, '/')
|
||||
normalized = normalized.replace(/\.git$/i, '')
|
||||
normalized = normalized.replace(/\/+$/g, '')
|
||||
return normalized.toLowerCase()
|
||||
}
|
||||
|
||||
function projectNameFromRemote(remote: string): string {
|
||||
const match = remote.match(/[:/]([^/:]+?)(?:\.git)?$/)
|
||||
return match?.[1] || 'project'
|
||||
}
|
||||
|
||||
function stableProjectId(identity: string): string {
|
||||
const hash = createHash('sha256').update(identity).digest('hex').slice(0, 16)
|
||||
return `project-${hash}`
|
||||
}
|
||||
161
src/services/skillLearning/promotion.ts
Normal file
161
src/services/skillLearning/promotion.ts
Normal file
@@ -0,0 +1,161 @@
|
||||
import { readdir } from 'node:fs/promises'
|
||||
import { existsSync } from 'node:fs'
|
||||
import { join } from 'node:path'
|
||||
import type { Instinct, StoredInstinct } from './instinctParser.js'
|
||||
import {
|
||||
getInstinctsDir,
|
||||
loadInstincts,
|
||||
saveInstinct,
|
||||
type InstinctStoreOptions,
|
||||
} from './instinctStore.js'
|
||||
import { getSkillLearningRoot } from './observationStore.js'
|
||||
import type { SkillLearningProjectContext } from './types.js'
|
||||
|
||||
export type PromotionCandidate = {
|
||||
instinctId: string
|
||||
averageConfidence: number
|
||||
projectIds: string[]
|
||||
}
|
||||
|
||||
export type PromotionOptions = {
|
||||
rootDir?: string
|
||||
minProjects?: number
|
||||
minConfidence?: number
|
||||
}
|
||||
|
||||
const sessionPromotedIds = new Set<string>()
|
||||
|
||||
export function resetPromotionBookkeeping(): void {
|
||||
sessionPromotedIds.clear()
|
||||
}
|
||||
|
||||
export function findPromotionCandidates(
|
||||
instincts: Instinct[],
|
||||
minProjects = 2,
|
||||
minConfidence = 0.8,
|
||||
): PromotionCandidate[] {
|
||||
const grouped = new Map<string, Instinct[]>()
|
||||
for (const instinct of instincts) {
|
||||
if (instinct.scope !== 'project') continue
|
||||
const group = grouped.get(instinct.id) ?? []
|
||||
group.push(instinct)
|
||||
grouped.set(instinct.id, group)
|
||||
}
|
||||
|
||||
return Array.from(grouped.entries()).flatMap(([instinctId, group]) => {
|
||||
const projectIds = Array.from(
|
||||
new Set(group.map(instinct => instinct.projectId).filter(Boolean)),
|
||||
) as string[]
|
||||
const averageConfidence =
|
||||
group.reduce((sum, instinct) => sum + instinct.confidence, 0) /
|
||||
group.length
|
||||
if (
|
||||
projectIds.length >= minProjects &&
|
||||
averageConfidence >= minConfidence
|
||||
) {
|
||||
return [
|
||||
{
|
||||
instinctId,
|
||||
projectIds,
|
||||
averageConfidence: Number(averageConfidence.toFixed(2)),
|
||||
},
|
||||
]
|
||||
}
|
||||
return []
|
||||
})
|
||||
}
|
||||
|
||||
export async function checkPromotion(
|
||||
options: PromotionOptions = {},
|
||||
): Promise<PromotionCandidate[]> {
|
||||
const minProjects = options.minProjects ?? 2
|
||||
const minConfidence = options.minConfidence ?? 0.8
|
||||
const allProjectInstincts = await loadAllProjectInstincts(options.rootDir)
|
||||
|
||||
const candidates = findPromotionCandidates(
|
||||
allProjectInstincts,
|
||||
minProjects,
|
||||
minConfidence,
|
||||
)
|
||||
const promoted: PromotionCandidate[] = []
|
||||
|
||||
for (const candidate of candidates) {
|
||||
if (sessionPromotedIds.has(candidate.instinctId)) continue
|
||||
|
||||
const source = allProjectInstincts.find(
|
||||
instinct => instinct.id === candidate.instinctId,
|
||||
)
|
||||
if (!source) continue
|
||||
|
||||
const globalInstinct: StoredInstinct = {
|
||||
...source,
|
||||
scope: 'global',
|
||||
projectId: undefined,
|
||||
projectName: undefined,
|
||||
confidence: candidate.averageConfidence,
|
||||
updatedAt: new Date().toISOString(),
|
||||
}
|
||||
|
||||
const globalOptions: InstinctStoreOptions = {
|
||||
rootDir: options.rootDir,
|
||||
scope: 'global',
|
||||
project: globalProjectContext(options.rootDir),
|
||||
}
|
||||
await saveInstinct(globalInstinct, globalOptions)
|
||||
|
||||
sessionPromotedIds.add(candidate.instinctId)
|
||||
promoted.push(candidate)
|
||||
}
|
||||
|
||||
return promoted
|
||||
}
|
||||
|
||||
async function loadAllProjectInstincts(
|
||||
rootDir?: string,
|
||||
): Promise<StoredInstinct[]> {
|
||||
const root = getSkillLearningRoot(rootDir ? { rootDir } : undefined)
|
||||
const projectsRoot = join(root, 'projects')
|
||||
if (!existsSync(projectsRoot)) return []
|
||||
|
||||
const entries = await readdir(projectsRoot, { withFileTypes: true })
|
||||
const instincts: StoredInstinct[] = []
|
||||
for (const entry of entries) {
|
||||
if (!entry.isDirectory()) continue
|
||||
const project: SkillLearningProjectContext = {
|
||||
projectId: entry.name,
|
||||
projectName: entry.name,
|
||||
scope: 'project',
|
||||
source: 'git_root',
|
||||
cwd: projectsRoot,
|
||||
storageDir: join(projectsRoot, entry.name),
|
||||
}
|
||||
const projectInstincts = await loadInstincts({
|
||||
rootDir,
|
||||
project,
|
||||
scope: 'project',
|
||||
})
|
||||
instincts.push(...projectInstincts)
|
||||
}
|
||||
return instincts
|
||||
}
|
||||
|
||||
function globalProjectContext(rootDir?: string): SkillLearningProjectContext {
|
||||
const root = getSkillLearningRoot(rootDir ? { rootDir } : undefined)
|
||||
return {
|
||||
projectId: 'global',
|
||||
projectName: 'Global',
|
||||
scope: 'global',
|
||||
source: 'global',
|
||||
cwd: root,
|
||||
storageDir: join(root, 'global'),
|
||||
}
|
||||
}
|
||||
|
||||
// Re-export for consumers that need to inspect the global instincts directory.
|
||||
export function getGlobalInstinctsDir(rootDir?: string): string {
|
||||
return getInstinctsDir({
|
||||
rootDir,
|
||||
scope: 'global',
|
||||
project: globalProjectContext(rootDir),
|
||||
})
|
||||
}
|
||||
386
src/services/skillLearning/runtimeObserver.ts
Normal file
386
src/services/skillLearning/runtimeObserver.ts
Normal file
@@ -0,0 +1,386 @@
|
||||
import type { REPLHookContext } from '../../utils/hooks/postSamplingHooks.js'
|
||||
import { registerPostSamplingHook } from '../../utils/hooks/postSamplingHooks.js'
|
||||
import { getSkillLearningConfig } from './config.js'
|
||||
import { isSkillLearningEnabled } from './featureCheck.js'
|
||||
import {
|
||||
appendObservation,
|
||||
getSkillLearningRoot,
|
||||
purgeOldObservations,
|
||||
stringifyField,
|
||||
} from './observationStore.js'
|
||||
import { resolveProjectContext } from './projectContext.js'
|
||||
import './sessionObserver.js'
|
||||
import { createInstinct } from './instinctParser.js'
|
||||
import {
|
||||
analyzeWithActiveBackend,
|
||||
resolveDefaultObserverBackend,
|
||||
} from './observerBackend.js'
|
||||
import {
|
||||
decayInstinctConfidence,
|
||||
loadInstincts,
|
||||
prunePendingInstincts,
|
||||
upsertInstinct,
|
||||
} from './instinctStore.js'
|
||||
import type { StoredSkillObservation } from './observationStore.js'
|
||||
import type { Message } from '../../types/message.js'
|
||||
import {
|
||||
applySkillLifecycleDecision,
|
||||
compareExistingArtifacts,
|
||||
decideSkillLifecycle,
|
||||
} from './skillLifecycle.js'
|
||||
import {
|
||||
generateAgentCandidates,
|
||||
generateCommandCandidates,
|
||||
clusterInstincts,
|
||||
} from './evolution.js'
|
||||
import { generateOrMergeSkillDraft } from './skillGenerator.js'
|
||||
import { shouldGenerateSkillFromInstincts } from './learningPolicy.js'
|
||||
import { writeLearnedCommand } from './commandGenerator.js'
|
||||
import { writeLearnedAgent } from './agentGenerator.js'
|
||||
import { readObservations } from './observationStore.js'
|
||||
import { checkPromotion } from './promotion.js'
|
||||
import { existsSync } from 'node:fs'
|
||||
import { join } from 'node:path'
|
||||
import { getClaudeConfigHomeDir } from '../../utils/envUtils.js'
|
||||
|
||||
export const RUNTIME_SESSION_ID = 'runtime-session'
|
||||
|
||||
let initialized = false
|
||||
let runtimeTurn = 0
|
||||
// Timestamp watermark for consumed tool-hook observations — enables replay of
|
||||
// only the records that arrived since the previous post-sampling pass.
|
||||
let lastConsumedToolHookTimestamp = ''
|
||||
|
||||
// --- H5: LLM call throttle ---
|
||||
let llmCallsThisSession = 0
|
||||
let lastLlmCallTimestamp = 0
|
||||
|
||||
// --- H6: message watermark dedup ---
|
||||
// Key: `${sessionId}:${messageId}` — prevents reprocessing the same message
|
||||
// across repeated post-sampling calls in one REPL session.
|
||||
const lastProcessedMessageIds = new Set<string>()
|
||||
const MAX_PROCESSED_IDS = 1000
|
||||
const TRIM_PROCESSED_IDS_TO = 500
|
||||
|
||||
export function resetRuntimeLLMBookkeeping(): void {
|
||||
llmCallsThisSession = 0
|
||||
lastLlmCallTimestamp = 0
|
||||
lastProcessedMessageIds.clear()
|
||||
}
|
||||
|
||||
export function getRuntimeTurn(): number {
|
||||
return runtimeTurn
|
||||
}
|
||||
|
||||
export function initSkillLearning(): void {
|
||||
if (initialized) return
|
||||
initialized = true
|
||||
// Resolve the active observer backend from SKILL_LEARNING_OBSERVER_BACKEND
|
||||
// env. Without this call the registry stays on whichever backend was
|
||||
// registered first (heuristic) — which means the env switch would silently
|
||||
// be a no-op in production. Swallow registry errors so a typo in the env
|
||||
// variable can never crash startup.
|
||||
try {
|
||||
resolveDefaultObserverBackend()
|
||||
} catch {
|
||||
// No backend registered yet, or env points at unknown name — leave the
|
||||
// registry in its existing state.
|
||||
}
|
||||
registerPostSamplingHook(runSkillLearningPostSampling)
|
||||
// Fire-and-forget startup maintenance: ECC parity for confidence decay,
|
||||
// observation purge, pending instinct prune. Errors are swallowed so that
|
||||
// skill-learning maintenance never blocks CLI startup.
|
||||
void runStartupMaintenance().catch(() => {})
|
||||
}
|
||||
|
||||
async function runStartupMaintenance(): Promise<void> {
|
||||
if (!isSkillLearningEnabled()) return
|
||||
if (process.env.CLAUDE_SKILL_LEARNING_DISABLE) return
|
||||
const project = resolveProjectContext(process.cwd())
|
||||
const options = { project }
|
||||
await Promise.allSettled([
|
||||
decayInstinctConfidence(options),
|
||||
purgeOldObservations(options),
|
||||
prunePendingInstincts(30, options),
|
||||
])
|
||||
}
|
||||
|
||||
function isInsideSkillLearningStorage(cwd: string): boolean {
|
||||
try {
|
||||
const root = getSkillLearningRoot()
|
||||
return cwd.startsWith(root)
|
||||
} catch {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
export async function runSkillLearningPostSampling(
|
||||
context: REPLHookContext,
|
||||
): Promise<void> {
|
||||
if (!isSkillLearningEnabled()) return
|
||||
// Self-filter layers in order: env escape hatch, entrypoint (only main REPL
|
||||
// thread — `startsWith` covers 'repl_main_thread:outputStyle:<name>'), sub-
|
||||
// agent skip, and a path guard that prevents feedback loops when the user
|
||||
// hand-edits files inside the skill-learning storage directory itself.
|
||||
if (process.env.CLAUDE_SKILL_LEARNING_DISABLE) return
|
||||
if (!context.querySource?.startsWith('repl_main_thread')) return
|
||||
if (context.toolUseContext.agentId) return
|
||||
const cwd = process.cwd()
|
||||
if (isInsideSkillLearningStorage(cwd)) return
|
||||
|
||||
const project = resolveProjectContext(cwd)
|
||||
const options = { project }
|
||||
++runtimeTurn
|
||||
|
||||
const observations: StoredSkillObservation[] = []
|
||||
|
||||
// Always reconstruct from the REPL message stream — it is the only source
|
||||
// that captures user prompts and assistant outcomes (tool-hook observations
|
||||
// cover tool events only).
|
||||
for (const observation of observationsFromMessages(
|
||||
context.messages,
|
||||
project,
|
||||
)) {
|
||||
observations.push(await appendObservation(observation, options))
|
||||
}
|
||||
|
||||
// Additionally pull tool-hook observations that arrived since the last
|
||||
// consumption watermark — deterministic records with precise outcomes.
|
||||
const all = await readObservations(options)
|
||||
const fresh = all.filter(
|
||||
o =>
|
||||
o.source === 'tool-hook' &&
|
||||
o.sessionId === RUNTIME_SESSION_ID &&
|
||||
typeof o.timestamp === 'string' &&
|
||||
o.timestamp > lastConsumedToolHookTimestamp,
|
||||
)
|
||||
observations.push(...fresh)
|
||||
for (const o of fresh) {
|
||||
if (o.timestamp > lastConsumedToolHookTimestamp) {
|
||||
lastConsumedToolHookTimestamp = o.timestamp
|
||||
}
|
||||
}
|
||||
|
||||
if (observations.length === 0) return
|
||||
|
||||
// H5: throttle LLM calls — minimum observation count, per-session cap, and
|
||||
// debounce interval. When any gate fires, fall back to heuristic directly.
|
||||
const now = Date.now()
|
||||
const minObservations = 5
|
||||
const { llm } = getSkillLearningConfig()
|
||||
const shouldCallLLM =
|
||||
observations.length >= minObservations &&
|
||||
llmCallsThisSession < llm.maxCallsPerSession &&
|
||||
now - lastLlmCallTimestamp >= llm.cooldownMs
|
||||
|
||||
let candidates
|
||||
if (shouldCallLLM) {
|
||||
llmCallsThisSession++
|
||||
lastLlmCallTimestamp = now
|
||||
candidates = await analyzeWithActiveBackend(observations, { project })
|
||||
} else {
|
||||
// Fall back to the heuristic backend without consuming an LLM call.
|
||||
const { heuristicObserverBackend } = await import('./sessionObserver.js')
|
||||
const result = heuristicObserverBackend.analyze(observations, { project })
|
||||
candidates = Array.isArray(result) ? result : await result
|
||||
}
|
||||
|
||||
for (const candidate of candidates) {
|
||||
await upsertInstinct(createInstinct(candidate), options)
|
||||
}
|
||||
|
||||
await autoEvolveLearnedSkills(options)
|
||||
}
|
||||
|
||||
export function resetRuntimeObserverForTest(): void {
|
||||
runtimeTurn = 0
|
||||
lastConsumedToolHookTimestamp = ''
|
||||
resetRuntimeLLMBookkeeping()
|
||||
}
|
||||
|
||||
async function autoEvolveLearnedSkills(options: {
|
||||
project: ReturnType<typeof resolveProjectContext>
|
||||
}): Promise<void> {
|
||||
const instincts = await loadInstincts(options)
|
||||
const cwd = process.cwd()
|
||||
|
||||
const skillRoots = [
|
||||
join(cwd, '.claude', 'skills'),
|
||||
join(getClaudeConfigHomeDir(), 'skills'),
|
||||
]
|
||||
const skillClusters = clusterInstincts(instincts).filter(
|
||||
candidate =>
|
||||
candidate.target === 'skill' &&
|
||||
shouldGenerateSkillFromInstincts(candidate.instincts),
|
||||
)
|
||||
for (const cluster of skillClusters) {
|
||||
const outcome = await generateOrMergeSkillDraft(
|
||||
cluster.instincts,
|
||||
{ cwd, scope: cluster.instincts[0]?.scope ?? 'project' },
|
||||
skillRoots,
|
||||
)
|
||||
if (outcome.action === 'append-evidence') continue
|
||||
const draft = outcome.draft
|
||||
if (existsSync(join(draft.outputPath, 'SKILL.md'))) continue
|
||||
const existing = await compareExistingArtifacts('skill', draft, skillRoots)
|
||||
const decision = decideSkillLifecycle(draft, existing)
|
||||
await applySkillLifecycleDecision(decision)
|
||||
}
|
||||
|
||||
const commandDrafts = generateCommandCandidates(instincts, { cwd })
|
||||
for (const draft of commandDrafts) {
|
||||
const roots = [
|
||||
join(cwd, '.claude', 'commands'),
|
||||
join(getClaudeConfigHomeDir(), 'commands'),
|
||||
]
|
||||
const existing = await compareExistingArtifacts('command', draft, roots)
|
||||
if (existing.length > 0) continue
|
||||
await writeLearnedCommand(draft)
|
||||
}
|
||||
|
||||
const agentDrafts = generateAgentCandidates(instincts, { cwd })
|
||||
for (const draft of agentDrafts) {
|
||||
const roots = [
|
||||
join(cwd, '.claude', 'agents'),
|
||||
join(getClaudeConfigHomeDir(), 'agents'),
|
||||
]
|
||||
const existing = await compareExistingArtifacts('agent', draft, roots)
|
||||
if (existing.length > 0) continue
|
||||
await writeLearnedAgent(draft)
|
||||
}
|
||||
|
||||
await checkPromotion()
|
||||
}
|
||||
|
||||
function observationsFromMessages(
|
||||
messages: Message[],
|
||||
project: ReturnType<typeof resolveProjectContext>,
|
||||
): StoredSkillObservation[] {
|
||||
const sessionId = RUNTIME_SESSION_ID
|
||||
const base = {
|
||||
sessionId,
|
||||
projectId: project.projectId,
|
||||
projectName: project.projectName,
|
||||
cwd: project.cwd,
|
||||
timestamp: new Date().toISOString(),
|
||||
source: 'hook' as const,
|
||||
}
|
||||
|
||||
return messages.flatMap((message): StoredSkillObservation[] => {
|
||||
// H6: watermark dedup — skip messages already processed in this session.
|
||||
const msgKey = `${sessionId}:${String(message.uuid)}`
|
||||
if (lastProcessedMessageIds.has(msgKey)) return []
|
||||
lastProcessedMessageIds.add(msgKey)
|
||||
// FIFO truncation to keep the set bounded. Drop down to exactly
|
||||
// TRIM_PROCESSED_IDS_TO entries (off-by-one fix: previously left size+1
|
||||
// because the subtraction didn't account for the just-added entry).
|
||||
if (lastProcessedMessageIds.size > MAX_PROCESSED_IDS) {
|
||||
const toDrop = lastProcessedMessageIds.size - TRIM_PROCESSED_IDS_TO
|
||||
const iter = lastProcessedMessageIds.values()
|
||||
for (let i = 0; i < toDrop; i++) {
|
||||
const next = iter.next()
|
||||
if (next.done) break
|
||||
lastProcessedMessageIds.delete(next.value)
|
||||
}
|
||||
}
|
||||
|
||||
if (message.type === 'user') {
|
||||
const toolResults = toolResultsFromContent(message.message?.content)
|
||||
if (toolResults.length > 0) {
|
||||
return toolResults.map(result => ({
|
||||
...base,
|
||||
id: crypto.randomUUID(),
|
||||
event: 'tool_complete',
|
||||
toolName: result.toolName,
|
||||
toolOutput: result.output,
|
||||
outcome: result.isError ? 'failure' : 'success',
|
||||
}))
|
||||
}
|
||||
const text = textFromContent(message.message?.content)
|
||||
return text.trim()
|
||||
? [
|
||||
{
|
||||
...base,
|
||||
id: crypto.randomUUID(),
|
||||
event: 'user_message',
|
||||
messageText: text,
|
||||
},
|
||||
]
|
||||
: []
|
||||
}
|
||||
|
||||
if (message.type === 'assistant') {
|
||||
const toolUses = toolUsesFromContent(message.message?.content)
|
||||
const text = textFromContent(message.message?.content)
|
||||
return [
|
||||
...toolUses.map(toolUse => ({
|
||||
...base,
|
||||
id: crypto.randomUUID(),
|
||||
event: 'tool_start' as const,
|
||||
toolName: toolUse.toolName,
|
||||
toolInput: toolUse.input,
|
||||
})),
|
||||
...(text.trim()
|
||||
? [
|
||||
{
|
||||
...base,
|
||||
id: crypto.randomUUID(),
|
||||
event: 'assistant_message' as const,
|
||||
messageText: text,
|
||||
},
|
||||
]
|
||||
: []),
|
||||
]
|
||||
}
|
||||
|
||||
return []
|
||||
})
|
||||
}
|
||||
|
||||
function textFromContent(content: unknown): string {
|
||||
if (typeof content === 'string') return content
|
||||
if (!Array.isArray(content)) return ''
|
||||
return content
|
||||
.map(block => {
|
||||
if (!block || typeof block !== 'object') return ''
|
||||
const record = block as Record<string, unknown>
|
||||
return typeof record.text === 'string' ? record.text : ''
|
||||
})
|
||||
.filter(Boolean)
|
||||
.join('\n')
|
||||
}
|
||||
|
||||
function toolUsesFromContent(
|
||||
content: unknown,
|
||||
): Array<{ toolName: string; input?: string }> {
|
||||
if (!Array.isArray(content)) return []
|
||||
return content.flatMap(block => {
|
||||
if (!block || typeof block !== 'object') return []
|
||||
const record = block as Record<string, unknown>
|
||||
if (record.type !== 'tool_use') return []
|
||||
return [
|
||||
{
|
||||
toolName: String(record.name ?? 'unknown_tool'),
|
||||
input: stringifyField(record.input),
|
||||
},
|
||||
]
|
||||
})
|
||||
}
|
||||
|
||||
function toolResultsFromContent(
|
||||
content: unknown,
|
||||
): Array<{ toolName: string; output?: string; isError: boolean }> {
|
||||
if (!Array.isArray(content)) return []
|
||||
return content.flatMap(block => {
|
||||
if (!block || typeof block !== 'object') return []
|
||||
const record = block as Record<string, unknown>
|
||||
if (record.type !== 'tool_result') return []
|
||||
return [
|
||||
{
|
||||
toolName: String(record.name ?? record.tool_name ?? 'unknown_tool'),
|
||||
output: stringifyField(record.content),
|
||||
isError: record.is_error === true,
|
||||
},
|
||||
]
|
||||
})
|
||||
}
|
||||
296
src/services/skillLearning/sessionObserver.ts
Normal file
296
src/services/skillLearning/sessionObserver.ts
Normal file
@@ -0,0 +1,296 @@
|
||||
import type { StoredSkillObservation } from './observationStore.js'
|
||||
import {
|
||||
candidateFromObservation,
|
||||
createInstinct,
|
||||
type InstinctCandidate,
|
||||
type StoredInstinct,
|
||||
} from './instinctParser.js'
|
||||
import type { InstinctDomain, SkillObservationOutcome } from './types.js'
|
||||
import {
|
||||
analyzeWithActiveBackend,
|
||||
getActiveObserverBackend,
|
||||
registerObserverBackend,
|
||||
type ObserverBackend,
|
||||
type ObserverBackendContext,
|
||||
} from './observerBackend.js'
|
||||
import { llmObserverBackend } from './llmObserverBackend.js'
|
||||
|
||||
export type SessionObserverOptions = {
|
||||
minRepeatedSequenceCount?: number
|
||||
}
|
||||
|
||||
const DEFAULT_MIN_REPEATED_SEQUENCE_COUNT = 2
|
||||
|
||||
export function heuristicAnalyze(
|
||||
observations: StoredSkillObservation[],
|
||||
options?: SessionObserverOptions,
|
||||
): InstinctCandidate[] {
|
||||
return [
|
||||
...extractUserCorrections(observations),
|
||||
...extractToolErrorResolutions(observations),
|
||||
...extractRepeatedToolSequences(observations, options),
|
||||
...extractProjectConventions(observations),
|
||||
]
|
||||
}
|
||||
|
||||
export const heuristicObserverBackend: ObserverBackend = {
|
||||
name: 'heuristic',
|
||||
analyze(
|
||||
observations: StoredSkillObservation[],
|
||||
_ctx?: ObserverBackendContext,
|
||||
): InstinctCandidate[] {
|
||||
return heuristicAnalyze(observations)
|
||||
},
|
||||
}
|
||||
|
||||
registerObserverBackend(heuristicObserverBackend)
|
||||
registerObserverBackend(llmObserverBackend)
|
||||
|
||||
export function analyzeObservations(
|
||||
observations: StoredSkillObservation[],
|
||||
options?: SessionObserverOptions,
|
||||
): StoredInstinct[] {
|
||||
const backend = getActiveObserverBackend()
|
||||
const candidates =
|
||||
backend.name === 'heuristic'
|
||||
? heuristicAnalyze(observations, options)
|
||||
: ensureSyncCandidates(backend.analyze(observations))
|
||||
return candidates.map(candidate => createInstinct(candidate))
|
||||
}
|
||||
|
||||
export async function analyzeObservationsAsync(
|
||||
observations: StoredSkillObservation[],
|
||||
ctx?: ObserverBackendContext,
|
||||
): Promise<StoredInstinct[]> {
|
||||
const candidates = await analyzeWithActiveBackend(observations, ctx)
|
||||
return candidates.map(candidate => createInstinct(candidate))
|
||||
}
|
||||
|
||||
export const observeSession = analyzeObservations
|
||||
|
||||
function ensureSyncCandidates(
|
||||
result: InstinctCandidate[] | Promise<InstinctCandidate[]>,
|
||||
): InstinctCandidate[] {
|
||||
if (Array.isArray(result)) return result
|
||||
throw new Error(
|
||||
'Active observer backend returned a Promise; use analyzeObservationsAsync instead',
|
||||
)
|
||||
}
|
||||
|
||||
function extractUserCorrections(
|
||||
observations: StoredSkillObservation[],
|
||||
): InstinctCandidate[] {
|
||||
return observations.flatMap((observation, index) => {
|
||||
if (observation.event !== 'user_message' || !observation.messageText) {
|
||||
return []
|
||||
}
|
||||
|
||||
const text = observation.messageText.trim()
|
||||
const correction = parseCorrection(text)
|
||||
if (!correction) return []
|
||||
|
||||
const base = candidateFromObservation(observation)
|
||||
return [
|
||||
{
|
||||
...base,
|
||||
trigger: correction.trigger,
|
||||
action: correction.action,
|
||||
confidence: 0.7,
|
||||
domain: inferDomain(text),
|
||||
source: 'session-observation',
|
||||
scope: 'project',
|
||||
evidence: [text],
|
||||
evidenceOutcome: recentOutcomeBefore(observations, index),
|
||||
observationIds: [observation.id],
|
||||
},
|
||||
]
|
||||
})
|
||||
}
|
||||
|
||||
function extractToolErrorResolutions(
|
||||
observations: StoredSkillObservation[],
|
||||
): InstinctCandidate[] {
|
||||
const candidates: InstinctCandidate[] = []
|
||||
|
||||
for (let i = 0; i < observations.length; i++) {
|
||||
const current = observations[i]
|
||||
if (current.event !== 'tool_complete' || current.outcome !== 'failure') {
|
||||
continue
|
||||
}
|
||||
|
||||
const laterSuccess = observations.slice(i + 1, i + 6).find(next => {
|
||||
return (
|
||||
next.event === 'tool_complete' &&
|
||||
next.outcome === 'success' &&
|
||||
next.toolName === current.toolName
|
||||
)
|
||||
})
|
||||
|
||||
if (!laterSuccess || !current.toolName) continue
|
||||
|
||||
candidates.push({
|
||||
...candidateFromObservation(current),
|
||||
trigger: `When ${current.toolName} fails during this project`,
|
||||
action: `Use the follow-up successful ${current.toolName} invocation as the resolution pattern before retrying blindly.`,
|
||||
confidence: 0.5,
|
||||
domain: 'debugging',
|
||||
source: 'session-observation',
|
||||
scope: 'project',
|
||||
evidence: [
|
||||
current.toolOutput ?? `${current.toolName} failed`,
|
||||
laterSuccess.toolOutput ?? `${laterSuccess.toolName} succeeded`,
|
||||
],
|
||||
evidenceOutcome: 'success',
|
||||
observationIds: [current.id, laterSuccess.id],
|
||||
})
|
||||
}
|
||||
|
||||
return candidates
|
||||
}
|
||||
|
||||
function extractRepeatedToolSequences(
|
||||
observations: StoredSkillObservation[],
|
||||
options?: SessionObserverOptions,
|
||||
): InstinctCandidate[] {
|
||||
const minCount =
|
||||
options?.minRepeatedSequenceCount ?? DEFAULT_MIN_REPEATED_SEQUENCE_COUNT
|
||||
const toolEvents = observations.filter(
|
||||
observation =>
|
||||
observation.event === 'tool_start' ||
|
||||
observation.event === 'tool_complete',
|
||||
)
|
||||
const names = toolEvents.map(observation => observation.toolName ?? '')
|
||||
const sequence = ['Grep', 'Read', 'Edit']
|
||||
const matchedIds: string[] = []
|
||||
let count = 0
|
||||
|
||||
for (let i = 0; i <= names.length - sequence.length; i++) {
|
||||
if (sequence.every((name, offset) => names[i + offset] === name)) {
|
||||
count++
|
||||
matchedIds.push(
|
||||
...toolEvents.slice(i, i + sequence.length).map(o => o.id),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
if (count < minCount) return []
|
||||
|
||||
const evidence = `Observed ${count} repeated Grep -> Read -> Edit workflow sequences.`
|
||||
const first = toolEvents.find(event => matchedIds.includes(event.id))
|
||||
const lastMatchedId = matchedIds[matchedIds.length - 1]
|
||||
const lastEvent = toolEvents.find(event => event.id === lastMatchedId)
|
||||
const sequenceOutcome =
|
||||
lastEvent?.event === 'tool_complete' ? lastEvent.outcome : undefined
|
||||
|
||||
return [
|
||||
{
|
||||
...candidateFromObservation(first ?? observations[0]),
|
||||
trigger: 'When changing code in this project',
|
||||
action:
|
||||
'Prefer the Grep -> Read -> Edit workflow: locate symbols, inspect context, then apply the smallest edit.',
|
||||
confidence: count >= 3 ? 0.65 : 0.5,
|
||||
domain: 'workflow',
|
||||
source: 'session-observation',
|
||||
scope: 'project',
|
||||
evidence: [evidence],
|
||||
evidenceOutcome: normalizeOutcome(sequenceOutcome),
|
||||
observationIds: Array.from(new Set(matchedIds)),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
function extractProjectConventions(
|
||||
observations: StoredSkillObservation[],
|
||||
): InstinctCandidate[] {
|
||||
return observations.flatMap((observation, index) => {
|
||||
if (observation.event !== 'user_message' || !observation.messageText) {
|
||||
return []
|
||||
}
|
||||
const text = observation.messageText.trim()
|
||||
if (!/(项目约定|规范|必须|convention|always|must)/i.test(text)) {
|
||||
return []
|
||||
}
|
||||
|
||||
return [
|
||||
{
|
||||
...candidateFromObservation(observation),
|
||||
trigger: 'When working in this project',
|
||||
action: `Follow the project convention: ${text}`,
|
||||
// Single occurrence gets 0.4 so it stays below the 0.75 promotion
|
||||
// threshold. Promotion requires corroborating high-confidence evidence
|
||||
// (e.g. two 0.4s still average 0.4 — other signals must raise the mean).
|
||||
confidence: 0.4,
|
||||
domain: 'project',
|
||||
source: 'session-observation',
|
||||
scope: 'project',
|
||||
evidence: [text],
|
||||
evidenceOutcome: recentOutcomeBefore(observations, index),
|
||||
observationIds: [observation.id],
|
||||
},
|
||||
]
|
||||
})
|
||||
}
|
||||
|
||||
function recentOutcomeBefore(
|
||||
observations: StoredSkillObservation[],
|
||||
index: number,
|
||||
): SkillObservationOutcome | undefined {
|
||||
for (let i = index - 1; i >= 0; i--) {
|
||||
const prior = observations[i]
|
||||
if (prior.event !== 'tool_complete') continue
|
||||
return normalizeOutcome(prior.outcome)
|
||||
}
|
||||
return undefined
|
||||
}
|
||||
|
||||
function normalizeOutcome(
|
||||
outcome: StoredSkillObservation['outcome'],
|
||||
): SkillObservationOutcome | undefined {
|
||||
if (outcome === 'success' || outcome === 'failure' || outcome === 'unknown') {
|
||||
return outcome
|
||||
}
|
||||
return undefined
|
||||
}
|
||||
|
||||
function parseCorrection(
|
||||
text: string,
|
||||
): { trigger: string; action: string } | null {
|
||||
const noUsePattern =
|
||||
/(?:不要|别|不应(?:该)?|不要再)\s*(?<avoid>[^,,。.;;]+)[,,\s]*(?:用|使用|改用|应该用|要用)\s*(?<prefer>[^,,。.;;]+)/i
|
||||
const englishPattern =
|
||||
/(?:do not|don't|avoid)\s+(?<avoid>[^,.;]+)[,;\s]+(?:use|prefer)\s+(?<prefer>[^,.;]+)/i
|
||||
const shouldPattern =
|
||||
/(?:你应该|应该先|must|should)\s*(?<prefer>[^,,。.;;]+)/i
|
||||
|
||||
const noUse = text.match(noUsePattern) ?? text.match(englishPattern)
|
||||
if (noUse?.groups) {
|
||||
const avoid = noUse.groups.avoid.trim()
|
||||
const prefer = noUse.groups.prefer.trim()
|
||||
return {
|
||||
trigger: `When choosing between ${avoid} and ${prefer}`,
|
||||
action: `Prefer ${prefer}; avoid ${avoid}.`,
|
||||
}
|
||||
}
|
||||
|
||||
const should = text.match(shouldPattern)
|
||||
if (should?.groups) {
|
||||
const prefer = should.groups.prefer.trim()
|
||||
return {
|
||||
trigger: 'When this user gives a corrective instruction',
|
||||
action: `Prefer this corrected action: ${prefer}.`,
|
||||
}
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
function inferDomain(text: string): InstinctDomain {
|
||||
const lowered = text.toLowerCase()
|
||||
if (/test|mock|testing-library|vitest|jest|bun test/.test(lowered)) {
|
||||
return 'testing'
|
||||
}
|
||||
if (/git|commit|branch/.test(lowered)) return 'git'
|
||||
if (/security|secret|token|password/.test(lowered)) return 'security'
|
||||
if (/style|format|lint|naming/.test(lowered)) return 'code-style'
|
||||
return 'project'
|
||||
}
|
||||
499
src/services/skillLearning/skillGapStore.ts
Normal file
499
src/services/skillLearning/skillGapStore.ts
Normal file
@@ -0,0 +1,499 @@
|
||||
import { existsSync } from 'node:fs'
|
||||
import { mkdir, readFile, rename, writeFile } from 'node:fs/promises'
|
||||
import { createHash } from 'node:crypto'
|
||||
import { dirname, join } from 'node:path'
|
||||
import type { SearchResult } from '../skillSearch/localSearch.js'
|
||||
import { createInstinct, type StoredInstinct } from './instinctParser.js'
|
||||
import {
|
||||
getProjectStorageDir,
|
||||
resolveProjectContext,
|
||||
} from './projectContext.js'
|
||||
import { generateSkillDraft, writeLearnedSkill } from './skillGenerator.js'
|
||||
import type {
|
||||
InstinctDomain,
|
||||
SkillGapStatus,
|
||||
SkillLearningProjectContext,
|
||||
} from './types.js'
|
||||
|
||||
export type SkillGapRecommendation = Pick<
|
||||
SearchResult,
|
||||
'name' | 'description' | 'score'
|
||||
>
|
||||
|
||||
export type SkillGapMaterialization =
|
||||
| {
|
||||
type: 'draft'
|
||||
name: string
|
||||
skillPath: string
|
||||
}
|
||||
| {
|
||||
type: 'active'
|
||||
name: string
|
||||
skillPath: string
|
||||
}
|
||||
|
||||
export type SkillGapRecord = {
|
||||
key: string
|
||||
prompt: string
|
||||
count: number
|
||||
draftHits: number
|
||||
// Session IDs that have already contributed a draft hit for this gap —
|
||||
// prevents one session from inflating `draftHits` beyond 1 and flipping the
|
||||
// `draftHits >= 2` active-promotion gate by itself.
|
||||
draftHitSessions: string[]
|
||||
status: SkillGapStatus
|
||||
sessionId: string
|
||||
cwd: string
|
||||
projectId: string
|
||||
projectName: string
|
||||
recommendations: SkillGapRecommendation[]
|
||||
createdAt: string
|
||||
updatedAt: string
|
||||
draft?: SkillGapMaterialization
|
||||
active?: SkillGapMaterialization
|
||||
}
|
||||
|
||||
// P0-2 hook: when outcome-aware observation lands, augment this with a
|
||||
// lookup into observationStore for a matching `outcome: 'success'` tool_complete
|
||||
// observation keyed by (sessionId, gap.key). Until then, draft promotion uses
|
||||
// count/signal only.
|
||||
const DRAFT_PROMOTION_COUNT = 2
|
||||
const ACTIVE_PROMOTION_COUNT = 4
|
||||
const ACTIVE_PROMOTION_DRAFT_HITS = 2
|
||||
|
||||
type SkillGapState = {
|
||||
version: 1
|
||||
gaps: Record<string, SkillGapRecord>
|
||||
}
|
||||
|
||||
export type RecordSkillGapOptions = {
|
||||
prompt: string
|
||||
cwd?: string
|
||||
sessionId?: string
|
||||
recommendations?: SearchResult[]
|
||||
project?: SkillLearningProjectContext
|
||||
rootDir?: string
|
||||
}
|
||||
|
||||
export async function recordSkillGap(
|
||||
options: RecordSkillGapOptions,
|
||||
): Promise<SkillGapRecord> {
|
||||
const prompt = options.prompt.trim()
|
||||
if (!prompt) {
|
||||
throw new Error('Cannot record an empty skill gap')
|
||||
}
|
||||
|
||||
const project = options.project ?? resolveProjectContext(options.cwd)
|
||||
const state = await readSkillGapState(project, options.rootDir)
|
||||
const key = buildSkillGapKey(prompt)
|
||||
const now = new Date().toISOString()
|
||||
const existing = state.gaps[key]
|
||||
|
||||
const gap: SkillGapRecord = {
|
||||
key,
|
||||
prompt,
|
||||
count: (existing?.count ?? 0) + 1,
|
||||
draftHits: existing?.draftHits ?? 0,
|
||||
draftHitSessions: existing?.draftHitSessions ?? [],
|
||||
status: existing?.status ?? 'pending',
|
||||
sessionId: options.sessionId ?? 'unknown-session',
|
||||
cwd: options.cwd ?? project.cwd,
|
||||
projectId: project.projectId,
|
||||
projectName: project.projectName,
|
||||
recommendations: (options.recommendations ?? []).slice(0, 5).map(r => ({
|
||||
name: r.name,
|
||||
description: r.description,
|
||||
score: r.score,
|
||||
})),
|
||||
createdAt: existing?.createdAt ?? now,
|
||||
updatedAt: now,
|
||||
draft: existing?.draft,
|
||||
active: existing?.active,
|
||||
}
|
||||
|
||||
if (gap.status === 'rejected') {
|
||||
state.gaps[key] = gap
|
||||
await writeSkillGapState(project, state, options.rootDir)
|
||||
return gap
|
||||
}
|
||||
|
||||
if (!gap.draft && shouldPromoteToDraft(gap)) {
|
||||
gap.draft = await writeSkillGapDraft(gap, project)
|
||||
gap.status = 'draft'
|
||||
await clearRuntimeSkillCaches()
|
||||
}
|
||||
|
||||
if (gap.draft && !gap.active && shouldPromoteToActive(gap)) {
|
||||
gap.active = await writeActiveSkillForGap(gap, project)
|
||||
gap.status = 'active'
|
||||
await clearRuntimeSkillCaches()
|
||||
}
|
||||
|
||||
state.gaps[key] = gap
|
||||
await writeSkillGapState(project, state, options.rootDir)
|
||||
return gap
|
||||
}
|
||||
|
||||
export async function readSkillGaps(
|
||||
project = resolveProjectContext(),
|
||||
rootDir?: string,
|
||||
): Promise<SkillGapRecord[]> {
|
||||
const state = await readSkillGapState(project, rootDir)
|
||||
return Object.values(state.gaps).sort((a, b) => a.key.localeCompare(b.key))
|
||||
}
|
||||
|
||||
export async function findGapKeyByDraftPath(
|
||||
draftPath: string,
|
||||
project = resolveProjectContext(),
|
||||
rootDir?: string,
|
||||
): Promise<string | undefined> {
|
||||
const state = await readSkillGapState(project, rootDir)
|
||||
for (const gap of Object.values(state.gaps)) {
|
||||
if (gap.draft?.skillPath === draftPath) return gap.key
|
||||
}
|
||||
return undefined
|
||||
}
|
||||
|
||||
export async function recordDraftHit(
|
||||
key: string,
|
||||
project = resolveProjectContext(),
|
||||
rootDir?: string,
|
||||
sessionId = 'unknown-session',
|
||||
): Promise<SkillGapRecord | undefined> {
|
||||
const state = await readSkillGapState(project, rootDir)
|
||||
const gap = state.gaps[key]
|
||||
if (!gap || !gap.draft || gap.active) return gap
|
||||
// One draft hit per session: a single actor reloading the same draft
|
||||
// repeatedly must not flip the draftHits>=2 gate.
|
||||
const existingSessions = gap.draftHitSessions ?? []
|
||||
if (existingSessions.includes(sessionId)) return gap
|
||||
const now = new Date().toISOString()
|
||||
const updated: SkillGapRecord = {
|
||||
...gap,
|
||||
draftHits: gap.draftHits + 1,
|
||||
draftHitSessions: [...existingSessions, sessionId],
|
||||
updatedAt: now,
|
||||
}
|
||||
|
||||
if (shouldPromoteToActive(updated)) {
|
||||
updated.active = await writeActiveSkillForGap(updated, project)
|
||||
updated.status = 'active'
|
||||
await clearRuntimeSkillCaches()
|
||||
}
|
||||
|
||||
state.gaps[key] = updated
|
||||
await writeSkillGapState(project, state, rootDir)
|
||||
return updated
|
||||
}
|
||||
|
||||
export async function promoteGapToDraft(
|
||||
key: string,
|
||||
project = resolveProjectContext(),
|
||||
rootDir?: string,
|
||||
): Promise<SkillGapRecord | undefined> {
|
||||
const state = await readSkillGapState(project, rootDir)
|
||||
const gap = state.gaps[key]
|
||||
if (!gap) return undefined
|
||||
if (gap.status === 'rejected') return gap
|
||||
if (gap.draft) return gap
|
||||
const updated: SkillGapRecord = {
|
||||
...gap,
|
||||
draft: await writeSkillGapDraft(gap, project),
|
||||
status: 'draft',
|
||||
updatedAt: new Date().toISOString(),
|
||||
}
|
||||
state.gaps[key] = updated
|
||||
await writeSkillGapState(project, state, rootDir)
|
||||
await clearRuntimeSkillCaches()
|
||||
return updated
|
||||
}
|
||||
|
||||
export async function rejectSkillGap(
|
||||
key: string,
|
||||
project = resolveProjectContext(),
|
||||
rootDir?: string,
|
||||
): Promise<SkillGapRecord | undefined> {
|
||||
const state = await readSkillGapState(project, rootDir)
|
||||
const gap = state.gaps[key]
|
||||
if (!gap) return undefined
|
||||
const updated: SkillGapRecord = {
|
||||
...gap,
|
||||
status: 'rejected',
|
||||
updatedAt: new Date().toISOString(),
|
||||
}
|
||||
state.gaps[key] = updated
|
||||
await writeSkillGapState(project, state, rootDir)
|
||||
return updated
|
||||
}
|
||||
|
||||
export function shouldPromoteToDraft(gap: SkillGapRecord): boolean {
|
||||
// Draft promotion now requires repeated occurrence. The legacy
|
||||
// `isStrongReusableSignal` path was the cause of single-utterance Chinese
|
||||
// exhortations being promoted straight to active — P0-2 will reintroduce
|
||||
// outcome-aware signal once the observation layer supplies it.
|
||||
return gap.count >= DRAFT_PROMOTION_COUNT
|
||||
}
|
||||
|
||||
export function shouldPromoteToActive(gap: SkillGapRecord): boolean {
|
||||
if (!gap.draft) return false
|
||||
return (
|
||||
gap.count >= ACTIVE_PROMOTION_COUNT ||
|
||||
gap.draftHits >= ACTIVE_PROMOTION_DRAFT_HITS
|
||||
)
|
||||
}
|
||||
|
||||
async function writeSkillGapDraft(
|
||||
gap: SkillGapRecord,
|
||||
project: SkillLearningProjectContext,
|
||||
): Promise<SkillGapMaterialization> {
|
||||
const instinct = createGapInstinct(gap, 'pending')
|
||||
const draftsRoot = join(
|
||||
project.projectRoot ?? project.cwd,
|
||||
'.claude',
|
||||
'skills',
|
||||
'.drafts',
|
||||
)
|
||||
const draft = generateSkillDraft([instinct], {
|
||||
cwd: project.projectRoot ?? project.cwd,
|
||||
outputRoot: draftsRoot,
|
||||
scope: 'project',
|
||||
name: `draft-${buildNameFragment(gap.prompt)}`,
|
||||
description:
|
||||
'Draft learned skill candidate. Promote after repeated evidence or explicit user correction.',
|
||||
})
|
||||
const skillFile = join(draft.outputPath, 'SKILL.md')
|
||||
if (!existsSync(skillFile)) {
|
||||
await writeLearnedSkill({
|
||||
...draft,
|
||||
content:
|
||||
draft.content +
|
||||
'\n## Promotion Rule\n\nDo not move this draft into active skills until the same gap repeats or the user explicitly confirms this should become reusable.\n',
|
||||
})
|
||||
}
|
||||
return { type: 'draft', name: draft.name, skillPath: skillFile }
|
||||
}
|
||||
|
||||
async function writeActiveSkillForGap(
|
||||
gap: SkillGapRecord,
|
||||
project: SkillLearningProjectContext,
|
||||
): Promise<SkillGapMaterialization> {
|
||||
const instinct = createGapInstinct(gap, 'active')
|
||||
const draft = generateSkillDraft([instinct], {
|
||||
cwd: project.projectRoot ?? project.cwd,
|
||||
scope: 'project',
|
||||
name: buildNameFragment(gap.prompt),
|
||||
description: buildGapAction(gap.prompt),
|
||||
})
|
||||
const skillFile = join(draft.outputPath, 'SKILL.md')
|
||||
if (!existsSync(skillFile)) {
|
||||
await writeLearnedSkill(draft)
|
||||
}
|
||||
return { type: 'active', name: draft.name, skillPath: skillFile }
|
||||
}
|
||||
|
||||
function createGapInstinct(
|
||||
gap: SkillGapRecord,
|
||||
status: StoredInstinct['status'],
|
||||
): StoredInstinct {
|
||||
return createInstinct({
|
||||
trigger: `When the user asks for ${summarize(gap.prompt, 120)}`,
|
||||
action: buildGapAction(gap.prompt),
|
||||
confidence: status === 'active' ? 0.82 : 0.55,
|
||||
domain: inferDomain(gap.prompt),
|
||||
source: 'session-observation',
|
||||
scope: 'project',
|
||||
projectId: gap.projectId,
|
||||
projectName: gap.projectName,
|
||||
evidence: [
|
||||
`Skill gap prompt: ${summarize(gap.prompt, 180)}`,
|
||||
`No high-confidence active skill was auto-loaded.`,
|
||||
`Observed ${gap.count} time(s).`,
|
||||
],
|
||||
status,
|
||||
})
|
||||
}
|
||||
|
||||
function buildGapAction(prompt: string): string {
|
||||
if (
|
||||
/feature\s*\(|feature flag|flag_name|stub|no-op|noop|最小实现/i.test(prompt)
|
||||
) {
|
||||
return 'Audit feature flags by scanning feature() call sites, excluding generated/dependency noise, classifying each candidate as stub, shell, MVP, or thin-toggle, and writing an evidence-backed document.'
|
||||
}
|
||||
if (/skill|技能|学习|进化|evolve|learning/i.test(prompt)) {
|
||||
return 'Run skill discovery first; auto-load only high-confidence matching skills; record a skill gap when none match; promote repeated or corrected gaps into learned skills.'
|
||||
}
|
||||
if (/test|测试|stub|调用链|参数/i.test(prompt)) {
|
||||
return 'Infer tests from existing files, parameters, exports, and call chains before simplifying mocks or inventing behavior.'
|
||||
}
|
||||
return `Reuse the workflow learned from this prompt: ${summarize(prompt, 180)}.`
|
||||
}
|
||||
|
||||
function inferDomain(prompt: string): InstinctDomain {
|
||||
const text = prompt.toLowerCase()
|
||||
if (/test|测试|stub|fixture|断言/.test(text)) return 'testing'
|
||||
if (/error|bug|fix|失败|错误|修复|debug/.test(text)) return 'debugging'
|
||||
if (/security|安全|漏洞|secret|token/.test(text)) return 'security'
|
||||
if (/git|commit|branch|pr\b/.test(text)) return 'git'
|
||||
if (/style|lint|format|命名|规范/.test(text)) return 'code-style'
|
||||
return 'workflow'
|
||||
}
|
||||
|
||||
async function readSkillGapState(
|
||||
project: SkillLearningProjectContext,
|
||||
rootDir?: string,
|
||||
): Promise<SkillGapState> {
|
||||
const path = getSkillGapStatePath(project, rootDir)
|
||||
let raw: string
|
||||
try {
|
||||
raw = await readFile(path, 'utf8')
|
||||
} catch (error) {
|
||||
// Only treat "file doesn't exist yet" as empty state. Every other error
|
||||
// (EACCES, EIO, disk full, etc.) must throw — swallowing them here would
|
||||
// let a subsequent write persist {} and zero out all gap records.
|
||||
if ((error as NodeJS.ErrnoException).code === 'ENOENT') {
|
||||
return { version: 1, gaps: {} }
|
||||
}
|
||||
throw error
|
||||
}
|
||||
try {
|
||||
return migrateLegacyGapState(JSON.parse(raw) as SkillGapState)
|
||||
} catch {
|
||||
// Corrupt/truncated JSON — don't silently reset. Backup and start fresh,
|
||||
// so the crash isn't masked and the data can be recovered manually.
|
||||
const backup = `${path}.corrupt-${Date.now()}`
|
||||
try {
|
||||
await writeFile(backup, raw, 'utf8')
|
||||
} catch {
|
||||
/* best effort */
|
||||
}
|
||||
return { version: 1, gaps: {} }
|
||||
}
|
||||
}
|
||||
|
||||
function migrateLegacyGapState(state: SkillGapState): SkillGapState {
|
||||
const migrated: Record<string, SkillGapRecord> = {}
|
||||
for (const [key, record] of Object.entries(state.gaps ?? {})) {
|
||||
const legacy = record as Partial<SkillGapRecord> & {
|
||||
status?: unknown
|
||||
}
|
||||
const draftHits =
|
||||
typeof legacy.draftHits === 'number' && Number.isFinite(legacy.draftHits)
|
||||
? legacy.draftHits
|
||||
: 0
|
||||
const count = typeof legacy.count === 'number' ? legacy.count : 1
|
||||
const normalizedStatus = normalizeLegacyStatus(legacy.status)
|
||||
const hasDraftFile = Boolean(legacy.draft)
|
||||
const hasActiveFile = Boolean(legacy.active)
|
||||
|
||||
let status: SkillGapStatus = normalizedStatus
|
||||
if (status === 'draft' && count < DRAFT_PROMOTION_COUNT && !hasDraftFile) {
|
||||
// Legacy first-call-writes-draft artifact with no file on disk yet.
|
||||
status = 'pending'
|
||||
}
|
||||
if (status === 'active' && !hasActiveFile) {
|
||||
status = hasDraftFile ? 'draft' : 'pending'
|
||||
}
|
||||
|
||||
const draftHitSessions = Array.isArray(legacy.draftHitSessions)
|
||||
? legacy.draftHitSessions.filter(
|
||||
(session): session is string => typeof session === 'string',
|
||||
)
|
||||
: []
|
||||
migrated[key] = {
|
||||
...(record as SkillGapRecord),
|
||||
count,
|
||||
draftHits,
|
||||
draftHitSessions,
|
||||
status,
|
||||
}
|
||||
}
|
||||
return { version: 1, gaps: migrated }
|
||||
}
|
||||
|
||||
function normalizeLegacyStatus(value: unknown): SkillGapStatus {
|
||||
if (
|
||||
value === 'pending' ||
|
||||
value === 'draft' ||
|
||||
value === 'active' ||
|
||||
value === 'rejected'
|
||||
) {
|
||||
return value
|
||||
}
|
||||
return 'pending'
|
||||
}
|
||||
|
||||
async function writeSkillGapState(
|
||||
project: SkillLearningProjectContext,
|
||||
state: SkillGapState,
|
||||
rootDir?: string,
|
||||
): Promise<void> {
|
||||
const path = getSkillGapStatePath(project, rootDir)
|
||||
await mkdir(dirname(path), { recursive: true })
|
||||
// Atomic write: temp + rename. A direct writeFile leaves a truncated file
|
||||
// on crash mid-write; combined with the (now strict) readSkillGapState,
|
||||
// that would lose gap records.
|
||||
const tmpPath = `${path}.tmp-${process.pid}-${Date.now()}`
|
||||
await writeFile(tmpPath, `${JSON.stringify(state, null, 2)}\n`, 'utf8')
|
||||
await rename(tmpPath, path)
|
||||
}
|
||||
|
||||
function getSkillGapStatePath(
|
||||
project: SkillLearningProjectContext,
|
||||
rootDir?: string,
|
||||
): string {
|
||||
const base = rootDir
|
||||
? project.projectId === 'global'
|
||||
? join(rootDir, 'global')
|
||||
: join(rootDir, 'projects', project.projectId)
|
||||
: getProjectStorageDir(project.projectId)
|
||||
return join(base, 'skill-gaps.json')
|
||||
}
|
||||
|
||||
function buildSkillGapKey(prompt: string): string {
|
||||
return `${buildNameFragment(prompt)}-${hash(prompt).slice(0, 8)}`
|
||||
}
|
||||
|
||||
function buildNameFragment(prompt: string): string {
|
||||
const mapped = prompt
|
||||
.replaceAll('技能', ' skill ')
|
||||
.replaceAll('学习', ' learning ')
|
||||
.replaceAll('进化', ' evolution ')
|
||||
.replaceAll('测试', ' testing ')
|
||||
.replaceAll('最小实现', ' minimal implementation ')
|
||||
.toLowerCase()
|
||||
const stop = new Set([
|
||||
'the',
|
||||
'and',
|
||||
'for',
|
||||
'with',
|
||||
'this',
|
||||
'that',
|
||||
'user',
|
||||
'about',
|
||||
'feature',
|
||||
'flag',
|
||||
'name',
|
||||
])
|
||||
const words = (mapped.match(/[a-z0-9][a-z0-9_-]{2,}/g) ?? [])
|
||||
.filter(word => !stop.has(word))
|
||||
.slice(0, 5)
|
||||
const value = words.join('-') || 'learned-gap'
|
||||
return value.slice(0, 54).replace(/-+$/g, '')
|
||||
}
|
||||
|
||||
function summarize(value: string, max: number): string {
|
||||
return value.replace(/\s+/g, ' ').trim().slice(0, max)
|
||||
}
|
||||
|
||||
function hash(value: string): string {
|
||||
return createHash('sha1').update(value).digest('hex')
|
||||
}
|
||||
|
||||
async function clearRuntimeSkillCaches(): Promise<void> {
|
||||
try {
|
||||
const { clearCommandsCache } = await import('../../commands.js')
|
||||
clearCommandsCache()
|
||||
} catch {
|
||||
// Best effort only; generated skill files are still available next process.
|
||||
}
|
||||
}
|
||||
206
src/services/skillLearning/skillGenerator.ts
Normal file
206
src/services/skillLearning/skillGenerator.ts
Normal file
@@ -0,0 +1,206 @@
|
||||
import { mkdir, readFile, writeFile } from 'node:fs/promises'
|
||||
import { join } from 'node:path'
|
||||
import { getClaudeConfigHomeDir } from '../../utils/envUtils.js'
|
||||
import { clearSkillIndexCache } from '../skillSearch/localSearch.js'
|
||||
import type { Instinct } from './instinctParser.js'
|
||||
import { buildLearnedSkillName, normalizeSkillName } from './learningPolicy.js'
|
||||
import {
|
||||
compareExistingArtifacts,
|
||||
scoreArtifactOverlap,
|
||||
type ExistingSkill,
|
||||
} from './skillLifecycle.js'
|
||||
import type { LearnedSkillDraft, SkillLearningScope } from './types.js'
|
||||
|
||||
export const DUPLICATE_SKILL_OVERLAP_THRESHOLD = 0.8
|
||||
|
||||
export type SkillGeneratorOptions = {
|
||||
cwd?: string
|
||||
globalSkillsDir?: string
|
||||
outputRoot?: string
|
||||
name?: string
|
||||
description?: string
|
||||
}
|
||||
|
||||
export function generateSkillDraft(
|
||||
instincts: Instinct[],
|
||||
options?: SkillGeneratorOptions & { scope?: SkillLearningScope },
|
||||
): LearnedSkillDraft {
|
||||
if (instincts.length === 0) {
|
||||
throw new Error('Cannot generate a skill draft without instincts')
|
||||
}
|
||||
|
||||
const scope = options?.scope ?? instincts[0]?.scope ?? 'project'
|
||||
const name = options?.name
|
||||
? normalizeSkillName(options.name)
|
||||
: buildSkillName(instincts)
|
||||
const confidence =
|
||||
instincts.reduce((sum, instinct) => sum + instinct.confidence, 0) /
|
||||
instincts.length
|
||||
const description = options?.description ?? buildDescription(instincts)
|
||||
const outputPath = getLearnedSkillPath(name, scope, options)
|
||||
const content = buildSkillContent({
|
||||
name,
|
||||
description,
|
||||
confidence,
|
||||
instincts,
|
||||
})
|
||||
|
||||
return {
|
||||
name,
|
||||
description,
|
||||
scope,
|
||||
sourceInstinctIds: instincts.map(instinct => instinct.id),
|
||||
confidence: Number(confidence.toFixed(2)),
|
||||
content,
|
||||
outputPath,
|
||||
}
|
||||
}
|
||||
|
||||
export type SkillDedupOutcome =
|
||||
| { action: 'create'; draft: LearnedSkillDraft }
|
||||
| {
|
||||
action: 'append-evidence'
|
||||
target: ExistingSkill
|
||||
overlap: number
|
||||
appendedPath: string
|
||||
}
|
||||
|
||||
export async function generateOrMergeSkillDraft(
|
||||
instincts: Instinct[],
|
||||
options: SkillGeneratorOptions & { scope?: SkillLearningScope },
|
||||
existingRoots: string[],
|
||||
): Promise<SkillDedupOutcome> {
|
||||
const draft = generateSkillDraft(instincts, options)
|
||||
const candidates = await compareExistingArtifacts(
|
||||
'skill',
|
||||
draft,
|
||||
existingRoots,
|
||||
)
|
||||
for (const candidate of candidates) {
|
||||
const overlap = scoreArtifactOverlap(draft, candidate)
|
||||
if (overlap >= DUPLICATE_SKILL_OVERLAP_THRESHOLD) {
|
||||
const appendedPath = await appendInstinctEvidenceToSkill(
|
||||
candidate,
|
||||
instincts,
|
||||
)
|
||||
return {
|
||||
action: 'append-evidence',
|
||||
target: candidate,
|
||||
overlap,
|
||||
appendedPath,
|
||||
}
|
||||
}
|
||||
}
|
||||
return { action: 'create', draft }
|
||||
}
|
||||
|
||||
export async function appendInstinctEvidenceToSkill(
|
||||
target: ExistingSkill,
|
||||
instincts: Instinct[],
|
||||
): Promise<string> {
|
||||
const existing = await readFile(target.path, 'utf8').catch(
|
||||
() => target.content,
|
||||
)
|
||||
const now = new Date().toISOString()
|
||||
const block = [
|
||||
'',
|
||||
`## Learned evidence (${now})`,
|
||||
'',
|
||||
...instincts.flatMap(instinct =>
|
||||
instinct.evidence.map(evidence => `- ${evidence}`),
|
||||
),
|
||||
'',
|
||||
].join('\n')
|
||||
const merged = existing.endsWith('\n')
|
||||
? existing + block
|
||||
: `${existing}\n${block}`
|
||||
await writeFile(target.path, merged, 'utf8')
|
||||
clearSkillIndexCache()
|
||||
return target.path
|
||||
}
|
||||
|
||||
export async function writeLearnedSkill(
|
||||
draft: LearnedSkillDraft,
|
||||
): Promise<string> {
|
||||
await mkdir(draft.outputPath, { recursive: true })
|
||||
const filePath = join(draft.outputPath, 'SKILL.md')
|
||||
await writeFile(filePath, draft.content, 'utf8')
|
||||
clearSkillIndexCache()
|
||||
try {
|
||||
const { clearCommandsCache } = await import('../../commands.js')
|
||||
clearCommandsCache()
|
||||
} catch {
|
||||
// Best effort: the next process will see the generated skill even if the
|
||||
// in-process command cache cannot be cleared due to import timing.
|
||||
}
|
||||
return filePath
|
||||
}
|
||||
|
||||
export function getLearnedSkillPath(
|
||||
name: string,
|
||||
scope: SkillLearningScope,
|
||||
options?: SkillGeneratorOptions,
|
||||
): string {
|
||||
if (options?.outputRoot) return join(options.outputRoot, name)
|
||||
if (scope === 'project') {
|
||||
return join(options?.cwd ?? process.cwd(), '.claude', 'skills', name)
|
||||
}
|
||||
return join(
|
||||
options?.globalSkillsDir ?? join(getClaudeConfigHomeDir(), 'skills'),
|
||||
name,
|
||||
)
|
||||
}
|
||||
|
||||
function buildSkillName(instincts: Instinct[]): string {
|
||||
return buildLearnedSkillName(instincts)
|
||||
}
|
||||
|
||||
function buildDescription(instincts: Instinct[]): string {
|
||||
const action = instincts[0]?.action ?? 'Apply a learned project pattern'
|
||||
const short = action.replace(/\s+/g, ' ').slice(0, 120)
|
||||
return short.length > 0 ? short : 'Apply learned project patterns'
|
||||
}
|
||||
|
||||
function buildSkillContent(params: {
|
||||
name: string
|
||||
description: string
|
||||
confidence: number
|
||||
instincts: Instinct[]
|
||||
}): string {
|
||||
const { name, description, confidence, instincts } = params
|
||||
const lines = [
|
||||
'---',
|
||||
`name: ${name}`,
|
||||
`description: ${JSON.stringify(description)}`,
|
||||
'origin: skill-learning',
|
||||
`confidence: ${Number(confidence.toFixed(2))}`,
|
||||
`evolved_from: [${instincts.map(instinct => JSON.stringify(instinct.id)).join(', ')}]`,
|
||||
'---',
|
||||
'',
|
||||
`# ${titleCase(name)}`,
|
||||
'',
|
||||
'## Trigger',
|
||||
'',
|
||||
instincts.map(instinct => `- ${instinct.trigger}`).join('\n'),
|
||||
'',
|
||||
'## Action',
|
||||
'',
|
||||
instincts.map(instinct => `- ${instinct.action}`).join('\n'),
|
||||
'',
|
||||
'## Evidence',
|
||||
'',
|
||||
instincts
|
||||
.flatMap(instinct => instinct.evidence.map(evidence => `- ${evidence}`))
|
||||
.join('\n'),
|
||||
'',
|
||||
]
|
||||
return lines.join('\n')
|
||||
}
|
||||
|
||||
function titleCase(value: string): string {
|
||||
return value
|
||||
.split('-')
|
||||
.filter(Boolean)
|
||||
.map(part => part[0]?.toUpperCase() + part.slice(1))
|
||||
.join(' ')
|
||||
}
|
||||
496
src/services/skillLearning/skillLifecycle.ts
Normal file
496
src/services/skillLearning/skillLifecycle.ts
Normal file
@@ -0,0 +1,496 @@
|
||||
import {
|
||||
mkdir,
|
||||
readdir,
|
||||
readFile,
|
||||
rename,
|
||||
rm,
|
||||
writeFile,
|
||||
} from 'node:fs/promises'
|
||||
import { existsSync } from 'node:fs'
|
||||
import { basename, dirname, join } from 'node:path'
|
||||
import { clearSkillIndexCache } from '../skillSearch/localSearch.js'
|
||||
import type { LearnedSkillDraft } from './types.js'
|
||||
import { writeLearnedSkill } from './skillGenerator.js'
|
||||
|
||||
export type ExistingSkill = {
|
||||
name: string
|
||||
path: string
|
||||
description: string
|
||||
content: string
|
||||
confidence?: number
|
||||
status?: 'active' | 'superseded' | 'archived' | 'deleted'
|
||||
referencedBy?: string[]
|
||||
safeToDelete?: boolean
|
||||
quality?: 'low' | 'medium' | 'high'
|
||||
}
|
||||
|
||||
export type SkillLifecycleDecision =
|
||||
| { type: 'create'; draft: LearnedSkillDraft; reason: string }
|
||||
| { type: 'merge'; targetSkill: ExistingSkill; patch: string; reason: string }
|
||||
| {
|
||||
type: 'replace'
|
||||
targetSkill: ExistingSkill
|
||||
draft: LearnedSkillDraft
|
||||
reason: string
|
||||
hardDelete?: boolean
|
||||
}
|
||||
| { type: 'archive'; targetSkill: ExistingSkill; reason: string }
|
||||
| {
|
||||
type: 'delete'
|
||||
targetSkill: ExistingSkill
|
||||
reason: string
|
||||
confirmed?: boolean
|
||||
}
|
||||
|
||||
export type ReplacementManifest = {
|
||||
oldSkill: string
|
||||
oldPath: string
|
||||
newSkill?: string
|
||||
newPath?: string
|
||||
action: 'archive' | 'delete'
|
||||
reason: string
|
||||
replacedAt: string
|
||||
recoverable: boolean
|
||||
}
|
||||
|
||||
export type SkillLifecycleOptions = {
|
||||
allowHardDelete?: boolean
|
||||
archiveRoot?: string
|
||||
manifestRoot?: string
|
||||
now?: Date
|
||||
}
|
||||
|
||||
export type LearnedArtifactKind = 'skill' | 'command' | 'agent'
|
||||
|
||||
export type ArtifactDraft = {
|
||||
name: string
|
||||
description: string
|
||||
content: string
|
||||
}
|
||||
|
||||
export async function compareExistingArtifacts(
|
||||
kind: LearnedArtifactKind,
|
||||
draft: ArtifactDraft,
|
||||
rootsOrSkills: string[] | ExistingSkill[],
|
||||
): Promise<ExistingSkill[]> {
|
||||
const existing =
|
||||
rootsOrSkills.length > 0 && typeof rootsOrSkills[0] === 'string'
|
||||
? await loadExistingArtifacts(kind, rootsOrSkills as string[])
|
||||
: (rootsOrSkills as ExistingSkill[])
|
||||
const draftTerms = terms(
|
||||
`${draft.name} ${draft.description} ${draft.content}`,
|
||||
)
|
||||
return existing
|
||||
.map(skill => ({
|
||||
skill,
|
||||
score: overlapScore(
|
||||
draftTerms,
|
||||
terms(`${skill.name} ${skill.description} ${skill.content}`),
|
||||
),
|
||||
}))
|
||||
.filter(item => item.score >= 0.18)
|
||||
.sort((a, b) => b.score - a.score)
|
||||
.map(item => item.skill)
|
||||
}
|
||||
|
||||
export async function compareExistingSkills(
|
||||
draft: LearnedSkillDraft,
|
||||
rootsOrSkills: string[] | ExistingSkill[],
|
||||
): Promise<ExistingSkill[]> {
|
||||
return compareExistingArtifacts('skill', draft, rootsOrSkills)
|
||||
}
|
||||
|
||||
export async function loadExistingArtifacts(
|
||||
kind: LearnedArtifactKind,
|
||||
roots: string[],
|
||||
): Promise<ExistingSkill[]> {
|
||||
if (kind === 'skill') return loadExistingSkills(roots)
|
||||
const results: ExistingSkill[] = []
|
||||
for (const root of roots) {
|
||||
if (!existsSync(root)) continue
|
||||
await collectArtifactFiles(root, results)
|
||||
}
|
||||
return results
|
||||
}
|
||||
|
||||
export function decideSkillLifecycle(
|
||||
draft: LearnedSkillDraft,
|
||||
existingSkills: ExistingSkill[],
|
||||
options: Pick<SkillLifecycleOptions, 'allowHardDelete'> = {},
|
||||
): SkillLifecycleDecision {
|
||||
const deletable = existingSkills.find(skill => isSafeToHardDelete(skill))
|
||||
if (options.allowHardDelete && deletable) {
|
||||
return {
|
||||
type: 'delete',
|
||||
targetSkill: deletable,
|
||||
reason:
|
||||
'Existing skill is low quality, unreferenced, and safe to delete.',
|
||||
confirmed: true,
|
||||
}
|
||||
}
|
||||
|
||||
const target = existingSkills[0]
|
||||
if (!target) {
|
||||
return {
|
||||
type: 'create',
|
||||
draft,
|
||||
reason: 'No overlapping active skill found.',
|
||||
}
|
||||
}
|
||||
|
||||
const draftTerms = terms(
|
||||
`${draft.name} ${draft.description} ${draft.content}`,
|
||||
)
|
||||
const existingTerms = terms(
|
||||
`${target.name} ${target.description} ${target.content}`,
|
||||
)
|
||||
const score = overlapScore(draftTerms, existingTerms)
|
||||
|
||||
if (
|
||||
score >= 0.72 &&
|
||||
draft.confidence >= 0.75 &&
|
||||
shouldReplaceSkill(draft, target)
|
||||
) {
|
||||
return {
|
||||
type: 'replace',
|
||||
targetSkill: target,
|
||||
draft,
|
||||
reason: `New learned skill has high overlap (${score.toFixed(2)}) and higher confidence.`,
|
||||
}
|
||||
}
|
||||
|
||||
if (score >= 0.35) {
|
||||
return {
|
||||
type: 'merge',
|
||||
targetSkill: target,
|
||||
patch: buildMergePatch(draft),
|
||||
reason: `Existing skill overlaps with the learned pattern (${score.toFixed(2)}).`,
|
||||
}
|
||||
}
|
||||
|
||||
return { type: 'create', draft, reason: 'Overlap is too low to merge.' }
|
||||
}
|
||||
|
||||
export async function applySkillLifecycleDecision(
|
||||
decision: SkillLifecycleDecision,
|
||||
options: SkillLifecycleOptions = {},
|
||||
): Promise<{
|
||||
activePath?: string
|
||||
archivedPath?: string
|
||||
deletedPath?: string
|
||||
manifestPath?: string
|
||||
tombstonePath?: string
|
||||
}> {
|
||||
switch (decision.type) {
|
||||
case 'create': {
|
||||
return { activePath: await writeLearnedSkill(decision.draft) }
|
||||
}
|
||||
case 'merge': {
|
||||
if (!isSkillLearningGenerated(decision.targetSkill)) {
|
||||
process.stderr.write(
|
||||
`[skill-learning] skip user-authored skill: ${decision.targetSkill.path}\n`,
|
||||
)
|
||||
return {}
|
||||
}
|
||||
return {
|
||||
activePath: await writeMergePatch(decision.targetSkill, decision.patch),
|
||||
}
|
||||
}
|
||||
case 'replace': {
|
||||
if (!isSkillLearningGenerated(decision.targetSkill)) {
|
||||
process.stderr.write(
|
||||
`[skill-learning] skip user-authored skill: ${decision.targetSkill.path}\n`,
|
||||
)
|
||||
return {}
|
||||
}
|
||||
// Archive/delete the superseded skill before the replacement is
|
||||
// written so that any search-index refresh between the two steps can
|
||||
// never observe both skills active simultaneously. `decision.draft
|
||||
// .outputPath` is the exact path `writeLearnedSkill` will target.
|
||||
const predictedNewPath = decision.draft.outputPath
|
||||
if (decision.hardDelete) {
|
||||
const { deletedPath, manifestPath, tombstonePath } = await deleteSkill(
|
||||
decision.targetSkill,
|
||||
decision.reason,
|
||||
{
|
||||
newSkill: decision.draft.name,
|
||||
newPath: predictedNewPath,
|
||||
},
|
||||
{ ...options, allowHardDelete: true },
|
||||
)
|
||||
const activePath = await writeLearnedSkill(decision.draft)
|
||||
return { activePath, deletedPath, manifestPath, tombstonePath }
|
||||
}
|
||||
const { archivedPath, manifestPath } = await archiveSkill(
|
||||
decision.targetSkill,
|
||||
decision.reason,
|
||||
{
|
||||
newSkill: decision.draft.name,
|
||||
newPath: predictedNewPath,
|
||||
},
|
||||
options,
|
||||
)
|
||||
const activePath = await writeLearnedSkill(decision.draft)
|
||||
return { activePath, archivedPath, manifestPath }
|
||||
}
|
||||
case 'archive':
|
||||
return await archiveSkill(
|
||||
decision.targetSkill,
|
||||
decision.reason,
|
||||
undefined,
|
||||
options,
|
||||
)
|
||||
case 'delete':
|
||||
return await deleteSkill(
|
||||
decision.targetSkill,
|
||||
decision.reason,
|
||||
undefined,
|
||||
{
|
||||
...options,
|
||||
allowHardDelete:
|
||||
options.allowHardDelete && decision.confirmed !== false,
|
||||
},
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
export async function loadExistingSkills(
|
||||
roots: string[],
|
||||
): Promise<ExistingSkill[]> {
|
||||
const skills: ExistingSkill[] = []
|
||||
for (const root of roots) {
|
||||
if (!existsSync(root)) continue
|
||||
await collectSkillFiles(root, skills)
|
||||
}
|
||||
return skills
|
||||
}
|
||||
|
||||
export async function archiveSkill(
|
||||
skill: ExistingSkill,
|
||||
reason: string,
|
||||
replacement?: { newSkill?: string; newPath?: string },
|
||||
options: SkillLifecycleOptions = {},
|
||||
): Promise<{ archivedPath: string; manifestPath: string }> {
|
||||
const skillDir = dirname(skill.path)
|
||||
const archiveRoot = options.archiveRoot ?? join(dirname(skillDir), '.archive')
|
||||
const archivedPath = join(
|
||||
archiveRoot,
|
||||
`${basename(skillDir)}-${timestamp(options.now)}`,
|
||||
)
|
||||
await mkdir(archiveRoot, { recursive: true })
|
||||
await rename(skillDir, archivedPath)
|
||||
const manifestPath = await writeReplacementManifest(
|
||||
options.manifestRoot ?? archivedPath,
|
||||
{
|
||||
oldSkill: skill.name,
|
||||
oldPath: skill.path,
|
||||
newSkill: replacement?.newSkill,
|
||||
newPath: replacement?.newPath,
|
||||
action: 'archive',
|
||||
reason,
|
||||
replacedAt: (options.now ?? new Date()).toISOString(),
|
||||
recoverable: true,
|
||||
},
|
||||
)
|
||||
clearSkillIndexCache()
|
||||
return { archivedPath, manifestPath }
|
||||
}
|
||||
|
||||
export async function deleteSkill(
|
||||
skill: ExistingSkill,
|
||||
reason: string,
|
||||
replacement?: { newSkill?: string; newPath?: string },
|
||||
options: SkillLifecycleOptions = {},
|
||||
): Promise<{
|
||||
deletedPath: string
|
||||
manifestPath: string
|
||||
tombstonePath: string
|
||||
}> {
|
||||
if (!options.allowHardDelete) {
|
||||
throw new Error('Hard delete requires allowHardDelete=true')
|
||||
}
|
||||
|
||||
const skillDir = dirname(skill.path)
|
||||
const content = existsSync(skill.path)
|
||||
? await readFile(skill.path, 'utf8')
|
||||
: ''
|
||||
const manifestRoot =
|
||||
options.manifestRoot ?? join(dirname(skillDir), '.tombstones')
|
||||
const manifestPath = await writeReplacementManifest(manifestRoot, {
|
||||
oldSkill: skill.name,
|
||||
oldPath: skill.path,
|
||||
newSkill: replacement?.newSkill,
|
||||
newPath: replacement?.newPath,
|
||||
action: 'delete',
|
||||
reason,
|
||||
replacedAt: (options.now ?? new Date()).toISOString(),
|
||||
recoverable: false,
|
||||
})
|
||||
const tombstonePath = join(
|
||||
manifestRoot,
|
||||
`${skill.name}-${timestamp(options.now)}.tombstone.json`,
|
||||
)
|
||||
await writeFile(
|
||||
tombstonePath,
|
||||
`${JSON.stringify({ deletedSkill: skill.name, oldPath: skill.path, content }, null, 2)}\n`,
|
||||
'utf8',
|
||||
)
|
||||
await rm(skillDir, { recursive: true, force: true })
|
||||
clearSkillIndexCache()
|
||||
return { deletedPath: skill.path, manifestPath, tombstonePath }
|
||||
}
|
||||
|
||||
export async function writeReplacementManifest(
|
||||
directory: string,
|
||||
manifest: ReplacementManifest,
|
||||
): Promise<string> {
|
||||
await mkdir(directory, { recursive: true })
|
||||
const manifestPath = join(directory, 'replacement-manifest.json')
|
||||
await writeFile(
|
||||
manifestPath,
|
||||
`${JSON.stringify(manifest, null, 2)}\n`,
|
||||
'utf8',
|
||||
)
|
||||
return manifestPath
|
||||
}
|
||||
|
||||
async function writeMergePatch(
|
||||
skill: ExistingSkill,
|
||||
patch: string,
|
||||
): Promise<string> {
|
||||
const patchPath = join(dirname(skill.path), 'learned-skill.patch.md')
|
||||
await writeFile(patchPath, patch, 'utf8')
|
||||
clearSkillIndexCache()
|
||||
return patchPath
|
||||
}
|
||||
|
||||
function buildMergePatch(draft: LearnedSkillDraft): string {
|
||||
return [
|
||||
'# Learned Skill Merge Patch',
|
||||
'',
|
||||
`Target learned skill: ${draft.name}`,
|
||||
`Confidence: ${draft.confidence}`,
|
||||
'',
|
||||
'## Suggested additions',
|
||||
'',
|
||||
draft.content,
|
||||
].join('\n')
|
||||
}
|
||||
|
||||
function shouldReplaceSkill(
|
||||
draft: LearnedSkillDraft,
|
||||
target: ExistingSkill,
|
||||
): boolean {
|
||||
if (target.status === 'superseded' || target.status === 'archived')
|
||||
return true
|
||||
const confidenceGap = draft.confidence - (target.confidence ?? 0.5)
|
||||
const contentGap = draft.content.length - target.content.length
|
||||
return confidenceGap >= 0.15 || contentGap > 160
|
||||
}
|
||||
|
||||
function isSafeToHardDelete(skill: ExistingSkill): boolean {
|
||||
return (
|
||||
skill.safeToDelete === true &&
|
||||
(skill.referencedBy?.length ?? 0) === 0 &&
|
||||
skill.quality === 'low'
|
||||
)
|
||||
}
|
||||
|
||||
function timestamp(date = new Date()): string {
|
||||
return date.toISOString().replace(/[:.]/g, '-')
|
||||
}
|
||||
|
||||
async function collectSkillFiles(
|
||||
root: string,
|
||||
results: ExistingSkill[],
|
||||
): Promise<void> {
|
||||
const entries = await readdir(root, { withFileTypes: true })
|
||||
for (const entry of entries) {
|
||||
const full = join(root, entry.name)
|
||||
if (entry.isDirectory()) {
|
||||
if (entry.name === '.archive') continue
|
||||
await collectSkillFiles(full, results)
|
||||
continue
|
||||
}
|
||||
if (entry.isFile() && entry.name === 'SKILL.md') {
|
||||
const content = await readFile(full, 'utf8')
|
||||
results.push({
|
||||
name: parseFrontmatter(content, 'name') ?? basename(dirname(full)),
|
||||
description: parseFrontmatter(content, 'description') ?? '',
|
||||
path: full,
|
||||
content,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function collectArtifactFiles(
|
||||
root: string,
|
||||
results: ExistingSkill[],
|
||||
): Promise<void> {
|
||||
const entries = await readdir(root, { withFileTypes: true })
|
||||
for (const entry of entries) {
|
||||
const full = join(root, entry.name)
|
||||
if (entry.isDirectory()) {
|
||||
if (entry.name === '.archive') continue
|
||||
await collectArtifactFiles(full, results)
|
||||
continue
|
||||
}
|
||||
if (entry.isFile() && entry.name.endsWith('.md')) {
|
||||
const content = await readFile(full, 'utf8')
|
||||
results.push({
|
||||
name:
|
||||
parseFrontmatter(content, 'name') ?? entry.name.replace(/\.md$/, ''),
|
||||
description: parseFrontmatter(content, 'description') ?? '',
|
||||
path: full,
|
||||
content,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function parseFrontmatter(content: string, key: string): string | undefined {
|
||||
// Restrict the search to the actual YAML frontmatter block between the
|
||||
// opening `---` and the next `---`. A naked body line like
|
||||
// `origin: skill-learning` in a user-authored doc must NOT be mistaken
|
||||
// for a generated-skill marker.
|
||||
const fmMatch = content.match(/^---\r?\n([\s\S]*?)\r?\n---/)
|
||||
if (!fmMatch) return undefined
|
||||
const match = fmMatch[1].match(new RegExp(`^${key}:\\s*"?([^"\\n]+)"?`, 'm'))
|
||||
return match?.[1]?.trim()
|
||||
}
|
||||
|
||||
function isSkillLearningGenerated(skill: ExistingSkill): boolean {
|
||||
return parseFrontmatter(skill.content, 'origin') === 'skill-learning'
|
||||
}
|
||||
|
||||
function terms(value: string): Set<string> {
|
||||
return new Set(
|
||||
value
|
||||
.toLowerCase()
|
||||
.split(/[^a-z0-9]+/)
|
||||
.filter(term => term.length > 2),
|
||||
)
|
||||
}
|
||||
|
||||
function overlapScore(a: Set<string>, b: Set<string>): number {
|
||||
if (a.size === 0 || b.size === 0) return 0
|
||||
let intersection = 0
|
||||
for (const term of a) {
|
||||
if (b.has(term)) intersection++
|
||||
}
|
||||
return intersection / Math.min(a.size, b.size)
|
||||
}
|
||||
|
||||
export function scoreArtifactOverlap(
|
||||
draft: ArtifactDraft,
|
||||
existing: { name: string; description: string; content: string },
|
||||
): number {
|
||||
const draftTerms = terms(
|
||||
`${draft.name} ${draft.description} ${draft.content}`,
|
||||
)
|
||||
const existingTerms = terms(
|
||||
`${existing.name} ${existing.description} ${existing.content}`,
|
||||
)
|
||||
return overlapScore(draftTerms, existingTerms)
|
||||
}
|
||||
312
src/services/skillLearning/toolEventObserver.ts
Normal file
312
src/services/skillLearning/toolEventObserver.ts
Normal file
@@ -0,0 +1,312 @@
|
||||
import { randomUUID } from 'node:crypto'
|
||||
import {
|
||||
appendObservation,
|
||||
type StoredSkillObservation,
|
||||
} from './observationStore.js'
|
||||
import type {
|
||||
SkillLearningProjectContext,
|
||||
SkillObservationOutcome,
|
||||
} from './types.js'
|
||||
import { logForDebugging } from '../../utils/debug.js'
|
||||
import { logError } from '../../utils/log.js'
|
||||
|
||||
/**
|
||||
* Tool event hook layer.
|
||||
*
|
||||
* Preferred observation pathway: consumers (tool dispatcher, REPL turn loop,
|
||||
* or integration tests) call `recordToolStart` / `recordToolComplete` /
|
||||
* `recordToolError` / `recordUserCorrection` as tool-level events happen,
|
||||
* producing deterministic observations with `source: 'tool-hook'`.
|
||||
*
|
||||
* Post-sampling reconstruction (runtimeObserver.observationsFromMessages)
|
||||
* is retained as a fallback for environments where the caller cannot emit
|
||||
* tool events directly.
|
||||
*
|
||||
* @todo Wire these functions into `src/Tool.ts`'s public dispatch so the
|
||||
* main REPL tool loop produces tool-hook observations automatically.
|
||||
* Until then, callers that do have tool-level signal (integration
|
||||
* tests, custom harness code, future tool middleware) can use the
|
||||
* functions here directly.
|
||||
*/
|
||||
|
||||
export type ToolHookContext = {
|
||||
sessionId: string
|
||||
turn: number
|
||||
projectId: string
|
||||
projectName: string
|
||||
cwd: string
|
||||
project?: SkillLearningProjectContext
|
||||
}
|
||||
|
||||
/** Maximum number of turns tracked per session before pruning. */
|
||||
const EMITTED_TURNS_SET_MAX = 500
|
||||
/** How many turns to retain after pruning a session Set. */
|
||||
const EMITTED_TURNS_SET_KEEP = 250
|
||||
/** Maximum number of sessions tracked in the Map before pruning. */
|
||||
const EMITTED_TURNS_MAP_MAX = 50
|
||||
/** How many sessions to retain after pruning the Map. */
|
||||
const EMITTED_TURNS_MAP_KEEP = 25
|
||||
|
||||
const emittedTurns = new Map<string, Set<number>>()
|
||||
|
||||
/**
|
||||
* Prune `emittedTurns` to stay within memory bounds.
|
||||
*
|
||||
* - If any session's Set exceeds `EMITTED_TURNS_SET_MAX` entries, retain only
|
||||
* the most recent `EMITTED_TURNS_SET_KEEP` turn numbers (FIFO trim).
|
||||
* - If the Map itself exceeds `EMITTED_TURNS_MAP_MAX` entries, delete the
|
||||
* oldest `EMITTED_TURNS_MAP_MAX - EMITTED_TURNS_MAP_KEEP` sessions
|
||||
* (insertion-order LRU).
|
||||
*
|
||||
* Exported so tests and `resetToolHookBookkeeping` callers can invoke it
|
||||
* directly.
|
||||
*/
|
||||
export function pruneEmittedTurns(): void {
|
||||
// Prune over-sized Sets first. FIFO by insertion order — NOT by turn
|
||||
// number magnitude. Non-monotonic turn ordering (e.g. replayed transcripts
|
||||
// or nested tool chains) should not cause us to evict the wrong entries.
|
||||
for (const [sessionId, turns] of emittedTurns) {
|
||||
if (turns.size > EMITTED_TURNS_SET_MAX) {
|
||||
const iter = turns.values()
|
||||
const toDrop = turns.size - EMITTED_TURNS_SET_KEEP
|
||||
for (let i = 0; i < toDrop; i++) {
|
||||
const next = iter.next()
|
||||
if (next.done) break
|
||||
turns.delete(next.value)
|
||||
}
|
||||
}
|
||||
}
|
||||
// Prune over-sized Map (delete oldest insertion-order entries).
|
||||
if (emittedTurns.size > EMITTED_TURNS_MAP_MAX) {
|
||||
const toDelete = emittedTurns.size - EMITTED_TURNS_MAP_KEEP
|
||||
let deleted = 0
|
||||
for (const key of emittedTurns.keys()) {
|
||||
if (deleted >= toDelete) break
|
||||
emittedTurns.delete(key)
|
||||
deleted++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function markTurn(sessionId: string, turn: number): void {
|
||||
// Refresh Map insertion order: delete + re-set so a recently-touched
|
||||
// session is treated as "youngest" for the LRU-ish Map eviction.
|
||||
const seen = emittedTurns.get(sessionId) ?? new Set<number>()
|
||||
seen.add(turn)
|
||||
emittedTurns.delete(sessionId)
|
||||
emittedTurns.set(sessionId, seen)
|
||||
pruneEmittedTurns()
|
||||
}
|
||||
|
||||
export function hasToolHookObservationsForTurn(
|
||||
sessionId: string,
|
||||
turn: number,
|
||||
): boolean {
|
||||
return emittedTurns.get(sessionId)?.has(turn) ?? false
|
||||
}
|
||||
|
||||
export function resetToolHookBookkeeping(): void {
|
||||
emittedTurns.clear()
|
||||
}
|
||||
|
||||
function baseObservation(
|
||||
ctx: ToolHookContext,
|
||||
): Pick<
|
||||
StoredSkillObservation,
|
||||
| 'id'
|
||||
| 'sessionId'
|
||||
| 'projectId'
|
||||
| 'projectName'
|
||||
| 'cwd'
|
||||
| 'timestamp'
|
||||
| 'source'
|
||||
| 'turn'
|
||||
> {
|
||||
return {
|
||||
id: randomUUID(),
|
||||
sessionId: ctx.sessionId,
|
||||
projectId: ctx.projectId,
|
||||
projectName: ctx.projectName,
|
||||
cwd: ctx.cwd,
|
||||
timestamp: new Date().toISOString(),
|
||||
source: 'tool-hook',
|
||||
// Persist turn so runtimeObserver can filter tool-hook observations by
|
||||
// the current turn rather than sweeping all historical tool-hook data
|
||||
// (codex review Q1).
|
||||
turn: ctx.turn,
|
||||
}
|
||||
}
|
||||
|
||||
// Cached import promise — resolved once so the hot path pays no repeated
|
||||
// dynamic-import overhead after the first invocation.
|
||||
let _depImportCache:
|
||||
| Promise<{
|
||||
resolveProjectContext: (cwd: string) => SkillLearningProjectContext
|
||||
isSkillLearningEnabled: () => boolean
|
||||
RUNTIME_SESSION_ID: string
|
||||
getRuntimeTurn: () => number
|
||||
}>
|
||||
| undefined
|
||||
|
||||
function _getDeps() {
|
||||
if (!_depImportCache) {
|
||||
_depImportCache = Promise.all([
|
||||
import('./projectContext.js'),
|
||||
import('./featureCheck.js'),
|
||||
import('./runtimeObserver.js'),
|
||||
]).then(([pc, fc, ro]) => ({
|
||||
resolveProjectContext: pc.resolveProjectContext,
|
||||
isSkillLearningEnabled: fc.isSkillLearningEnabled,
|
||||
RUNTIME_SESSION_ID: ro.RUNTIME_SESSION_ID,
|
||||
getRuntimeTurn: ro.getRuntimeTurn,
|
||||
}))
|
||||
}
|
||||
return _depImportCache
|
||||
}
|
||||
|
||||
/** Reset the cached dep import (for test isolation). */
|
||||
export function resetToolHookDepsCache(): void {
|
||||
_depImportCache = undefined
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrap a tool.call invocation with deterministic tool-event observation.
|
||||
*
|
||||
* Designed for the single call site in `toolExecution.ts`. The hook calls
|
||||
* (`recordToolStart`, `recordToolComplete`, `recordToolError`) are true
|
||||
* fire-and-forget: the tool invoke result is returned immediately without
|
||||
* waiting for the observation to persist. Errors in observation are caught
|
||||
* and logged so they never surface to the caller.
|
||||
*/
|
||||
export async function runToolCallWithSkillLearningHooks<T>(
|
||||
toolName: string,
|
||||
input: unknown,
|
||||
callContext: { sessionId?: string; turn?: number },
|
||||
invoke: () => Promise<T>,
|
||||
): Promise<T> {
|
||||
let ctx: ToolHookContext | undefined
|
||||
try {
|
||||
const {
|
||||
resolveProjectContext,
|
||||
isSkillLearningEnabled,
|
||||
RUNTIME_SESSION_ID,
|
||||
getRuntimeTurn,
|
||||
} = await _getDeps()
|
||||
if (!isSkillLearningEnabled()) {
|
||||
return invoke()
|
||||
}
|
||||
const project = resolveProjectContext(process.cwd())
|
||||
// Always emit under the runtime observer's sessionId so the post-sampling
|
||||
// consumer can find our records. The prior default `'cli'` fell outside
|
||||
// the observer's sessionId filter and made tool-hook observations
|
||||
// structurally unconsumable (codex second-pass audit AC1).
|
||||
ctx = {
|
||||
sessionId: callContext.sessionId ?? RUNTIME_SESSION_ID,
|
||||
turn: callContext.turn ?? getRuntimeTurn(),
|
||||
projectId: project.projectId,
|
||||
projectName: project.projectName,
|
||||
cwd: project.cwd,
|
||||
project,
|
||||
}
|
||||
// Fire-and-forget: do NOT await — tool invoke must not be blocked.
|
||||
void recordToolStart(ctx, toolName, input).catch(e => {
|
||||
logForDebugging('skill-learning: recordToolStart error')
|
||||
logError(e)
|
||||
})
|
||||
} catch (e) {
|
||||
// Never let observation setup errors affect tool execution.
|
||||
logForDebugging('skill-learning: hook setup error')
|
||||
logError(e)
|
||||
}
|
||||
try {
|
||||
const result = await invoke()
|
||||
if (ctx) {
|
||||
// Fire-and-forget: do NOT await.
|
||||
void recordToolComplete(ctx, toolName, result, 'success').catch(e => {
|
||||
logForDebugging('skill-learning: recordToolComplete error')
|
||||
logError(e)
|
||||
})
|
||||
}
|
||||
return result
|
||||
} catch (error) {
|
||||
if (ctx) {
|
||||
// Fire-and-forget: do NOT await.
|
||||
void recordToolError(ctx, toolName, error).catch(e => {
|
||||
logForDebugging('skill-learning: recordToolError error')
|
||||
logError(e)
|
||||
})
|
||||
}
|
||||
throw error
|
||||
}
|
||||
}
|
||||
|
||||
export async function recordToolStart(
|
||||
ctx: ToolHookContext,
|
||||
toolName: string,
|
||||
input?: unknown,
|
||||
): Promise<StoredSkillObservation> {
|
||||
markTurn(ctx.sessionId, ctx.turn)
|
||||
const observation: StoredSkillObservation = {
|
||||
...baseObservation(ctx),
|
||||
event: 'tool_start',
|
||||
toolName,
|
||||
toolInput: stringify(input),
|
||||
}
|
||||
return appendObservation(observation, { project: ctx.project })
|
||||
}
|
||||
|
||||
export async function recordToolComplete(
|
||||
ctx: ToolHookContext,
|
||||
toolName: string,
|
||||
output?: unknown,
|
||||
outcome: SkillObservationOutcome = 'success',
|
||||
): Promise<StoredSkillObservation> {
|
||||
markTurn(ctx.sessionId, ctx.turn)
|
||||
const observation: StoredSkillObservation = {
|
||||
...baseObservation(ctx),
|
||||
event: 'tool_complete',
|
||||
toolName,
|
||||
toolOutput: stringify(output),
|
||||
outcome,
|
||||
}
|
||||
return appendObservation(observation, { project: ctx.project })
|
||||
}
|
||||
|
||||
export async function recordToolError(
|
||||
ctx: ToolHookContext,
|
||||
toolName: string,
|
||||
error: unknown,
|
||||
): Promise<StoredSkillObservation> {
|
||||
markTurn(ctx.sessionId, ctx.turn)
|
||||
const observation: StoredSkillObservation = {
|
||||
...baseObservation(ctx),
|
||||
event: 'tool_complete',
|
||||
toolName,
|
||||
toolOutput: stringify(error),
|
||||
outcome: 'failure',
|
||||
}
|
||||
return appendObservation(observation, { project: ctx.project })
|
||||
}
|
||||
|
||||
export async function recordUserCorrection(
|
||||
ctx: ToolHookContext,
|
||||
messageText: string,
|
||||
): Promise<StoredSkillObservation> {
|
||||
markTurn(ctx.sessionId, ctx.turn)
|
||||
const observation: StoredSkillObservation = {
|
||||
...baseObservation(ctx),
|
||||
event: 'user_message',
|
||||
messageText,
|
||||
}
|
||||
return appendObservation(observation, { project: ctx.project })
|
||||
}
|
||||
|
||||
function stringify(value: unknown): string | undefined {
|
||||
if (value === undefined || value === null) return undefined
|
||||
if (typeof value === 'string') return value
|
||||
try {
|
||||
return JSON.stringify(value)
|
||||
} catch {
|
||||
return String(value)
|
||||
}
|
||||
}
|
||||
109
src/services/skillLearning/types.ts
Normal file
109
src/services/skillLearning/types.ts
Normal file
@@ -0,0 +1,109 @@
|
||||
export type SkillLearningScope = 'project' | 'global'
|
||||
|
||||
export type SkillGapStatus = 'pending' | 'draft' | 'active' | 'rejected'
|
||||
|
||||
export type SkillObservationEvent =
|
||||
| 'user_message'
|
||||
| 'assistant_message'
|
||||
| 'tool_start'
|
||||
| 'tool_complete'
|
||||
| 'tool_error'
|
||||
|
||||
export type SkillObservationOutcome = 'success' | 'failure' | 'unknown'
|
||||
|
||||
export const INSTINCT_DOMAINS = [
|
||||
'workflow',
|
||||
'testing',
|
||||
'debugging',
|
||||
'code-style',
|
||||
'security',
|
||||
'git',
|
||||
'project',
|
||||
] as const
|
||||
|
||||
export type InstinctDomain = (typeof INSTINCT_DOMAINS)[number]
|
||||
|
||||
export type InstinctSource =
|
||||
| 'session-observation'
|
||||
| 'repo-analysis'
|
||||
| 'imported'
|
||||
|
||||
export type InstinctStatus =
|
||||
| 'pending'
|
||||
| 'active'
|
||||
| 'stale'
|
||||
| 'superseded'
|
||||
| 'retired'
|
||||
| 'archived'
|
||||
| 'conflict-hold'
|
||||
|
||||
export type ProjectContextSource =
|
||||
| 'claude_project_dir'
|
||||
| 'git_remote'
|
||||
| 'git_root'
|
||||
| 'global'
|
||||
|
||||
export interface SkillObservation {
|
||||
id: string
|
||||
timestamp: string
|
||||
event: SkillObservationEvent
|
||||
sessionId: string
|
||||
projectId: string
|
||||
projectName: string
|
||||
cwd: string
|
||||
toolName?: string
|
||||
toolInput?: unknown
|
||||
toolOutput?: unknown
|
||||
messageText?: string
|
||||
outcome?: SkillObservationOutcome
|
||||
}
|
||||
|
||||
export interface Instinct {
|
||||
id: string
|
||||
trigger: string
|
||||
action: string
|
||||
confidence: number
|
||||
domain: InstinctDomain
|
||||
source: InstinctSource
|
||||
scope: SkillLearningScope
|
||||
projectId?: string
|
||||
projectName?: string
|
||||
evidence: string[]
|
||||
evidenceOutcome?: SkillObservationOutcome
|
||||
createdAt: string
|
||||
updatedAt: string
|
||||
status: InstinctStatus
|
||||
}
|
||||
|
||||
export interface LearnedSkillDraft {
|
||||
name: string
|
||||
description: string
|
||||
scope: SkillLearningScope
|
||||
sourceInstinctIds: string[]
|
||||
confidence: number
|
||||
content: string
|
||||
outputPath: string
|
||||
}
|
||||
|
||||
export interface SkillLearningProjectContext {
|
||||
projectId: string
|
||||
projectName: string
|
||||
scope: SkillLearningScope
|
||||
source: ProjectContextSource
|
||||
cwd: string
|
||||
projectRoot?: string
|
||||
gitRemote?: string
|
||||
storageDir: string
|
||||
}
|
||||
|
||||
export interface SkillLearningProjectRecord
|
||||
extends SkillLearningProjectContext {
|
||||
firstSeenAt: string
|
||||
lastSeenAt: string
|
||||
}
|
||||
|
||||
export interface SkillLearningProjectsRegistry {
|
||||
version: 1
|
||||
updatedAt: string
|
||||
projects: Record<string, SkillLearningProjectRecord>
|
||||
}
|
||||
Reference in New Issue
Block a user