feat: 添加 skill learning 技能学习闭环系统

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-06-18 14:25:51 +00:00 · 2026-04-22 22:38:09 +08:00
parent 04c7ed4250
commit 1837df5f88
64 changed files with 11009 additions and 36 deletions
--- a/src/services/skillLearning/tests/evolution.test.ts
+++ b/src/services/skillLearning/tests/evolution.test.ts
@@ -0,0 +1,152 @@
+import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
+import { mkdtempSync, rmSync } from 'node:fs'
+import { tmpdir } from 'node:os'
+import { join } from 'node:path'
+import { createInstinct } from '../instinctParser.js'
+import {
+  classifyEvolutionTarget,
+  clusterInstincts,
+  generateAgentCandidates,
+  generateCommandCandidates,
+  generateSkillCandidates,
+} from '../evolution.js'
+
+describe('evolution', () => {
+  test('clusters related instincts by trigger and domain', () => {
+    const instincts = [
+      createInstinct({
+        trigger: 'when writing tests',
+        action: 'use testing-library',
+        confidence: 0.7,
+        domain: 'testing',
+        source: 'session-observation',
+        scope: 'project',
+        evidence: ['one'],
+      }),
+      createInstinct({
+        trigger: 'when writing tests',
+        action: 'avoid implementation mocks',
+        confidence: 0.8,
+        domain: 'testing',
+        source: 'session-observation',
+        scope: 'project',
+        evidence: ['two'],
+      }),
+      createInstinct({
+        trigger: 'when writing tests',
+        action: 'prefer describe/test structure',
+        confidence: 0.75,
+        domain: 'testing',
+        source: 'session-observation',
+        scope: 'project',
+        evidence: ['three'],
+      }),
+    ]
+
+    const clusters = clusterInstincts(instincts)
+    expect(clusters).toHaveLength(1)
+    expect(clusters[0]?.averageConfidence).toBe(0.75)
+  })
+
+  test('classifies explicit user-invoked workflows as command candidates', () => {
+    expect(
+      classifyEvolutionTarget([
+        createInstinct({
+          trigger: 'when user asks to create migration',
+          action: 'run command steps',
+          confidence: 0.8,
+          domain: 'workflow',
+          source: 'session-observation',
+          scope: 'project',
+          evidence: ['one'],
+        }),
+      ]),
+    ).toBe('command')
+  })
+
+  test('generates skill candidates for high-confidence skill clusters', () => {
+    // Cluster-size floor (>=3) is non-negotiable post-H15 fix: a single
+    // high-confidence instinct must not become a persistent skill. Three
+    // independent observations are required to promote.
+    const instincts = [
+      createInstinct({
+        trigger: 'when writing tests',
+        action: 'use testing-library',
+        confidence: 0.8,
+        domain: 'testing',
+        source: 'session-observation',
+        scope: 'project',
+        evidence: ['one'],
+      }),
+      createInstinct({
+        trigger: 'when writing tests',
+        action: 'avoid implementation mocks',
+        confidence: 0.8,
+        domain: 'testing',
+        source: 'session-observation',
+        scope: 'project',
+        evidence: ['two'],
+      }),
+      createInstinct({
+        trigger: 'when writing tests',
+        action: 'prefer describe/test structure',
+        confidence: 0.8,
+        domain: 'testing',
+        source: 'session-observation',
+        scope: 'project',
+        evidence: ['three'],
+      }),
+    ]
+
+    expect(generateSkillCandidates(instincts)).toHaveLength(1)
+  })
+
+  describe('three-path generation', () => {
+    let tmp: string
+    beforeEach(() => {
+      tmp = mkdtempSync(join(tmpdir(), 'skill-learning-evolve-'))
+    })
+    afterEach(() => {
+      rmSync(tmp, { recursive: true, force: true })
+    })
+
+    test('command-triggered instincts produce command candidates, not skill candidates', () => {
+      // Need >=3 instincts to satisfy the cluster-size floor post-H15.
+      const instincts = Array.from({ length: 3 }, (_, i) =>
+        createInstinct({
+          trigger: 'when user asks to create migration',
+          action: 'run command: pnpm run migration',
+          confidence: 0.85,
+          domain: 'workflow',
+          source: 'session-observation',
+          scope: 'project',
+          evidence: [`user invocation ${i}`],
+        }),
+      )
+
+      const commands = generateCommandCandidates(instincts, { cwd: tmp })
+      const skills = generateSkillCandidates(instincts, { cwd: tmp })
+      expect(commands).toHaveLength(1)
+      expect(skills).toHaveLength(0)
+      expect(commands[0]?.content).toContain('/')
+    })
+
+    test('four debug multi-step instincts cluster into an agent candidate', () => {
+      const instincts = Array.from({ length: 4 }, (_, i) =>
+        createInstinct({
+          trigger: 'when debugging multi-step regressions',
+          action: 'investigate stack trace, reproduce locally, and add test',
+          confidence: 0.82,
+          domain: 'debugging',
+          source: 'session-observation',
+          scope: 'project',
+          evidence: [`incident-${i}`],
+        }),
+      )
+
+      const agents = generateAgentCandidates(instincts, { cwd: tmp })
+      expect(agents).toHaveLength(1)
+      expect(agents[0]?.content).toContain('Playbook')
+    })
+  })
+})
--- a/src/services/skillLearning/tests/instinctStore.test.ts
+++ b/src/services/skillLearning/tests/instinctStore.test.ts
@@ -0,0 +1,143 @@
+import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
+import { mkdtempSync, rmSync } from 'node:fs'
+import { tmpdir } from 'node:os'
+import { join } from 'node:path'
+import {
+  loadInstincts,
+  prunePendingInstincts,
+  saveInstinct,
+  upsertInstinct,
+} from '../instinctStore.js'
+import { createInstinct } from '../instinctParser.js'
+
+let rootDir: string
+
+beforeEach(() => {
+  rootDir = mkdtempSync(join(tmpdir(), 'skill-learning-instinct-'))
+})
+
+afterEach(() => {
+  rmSync(rootDir, { recursive: true, force: true })
+})
+
+describe('instinctStore', () => {
+  test('saves and loads instincts', async () => {
+    await saveInstinct(
+      createInstinct({
+        trigger: 'when testing',
+        action: 'use testing-library',
+        confidence: 0.7,
+        domain: 'testing',
+        source: 'session-observation',
+        scope: 'project',
+        evidence: ['user correction'],
+      }),
+      { rootDir, project: projectContext() },
+    )
+
+    const instincts = await loadInstincts({
+      rootDir,
+      project: projectContext(),
+    })
+    expect(instincts).toHaveLength(1)
+    expect(instincts[0]?.action).toContain('testing-library')
+  })
+
+  test('upsert increases confidence for confirming instincts', async () => {
+    const first = createInstinct({
+      id: 'test-instinct',
+      trigger: 'when testing',
+      action: 'prefer testing-library',
+      confidence: 0.7,
+      domain: 'testing',
+      source: 'session-observation',
+      scope: 'project',
+      evidence: ['one'],
+    })
+    await upsertInstinct(first, { rootDir, project: projectContext() })
+    const second = { ...first, evidence: ['two'] }
+    const updated = await upsertInstinct(second, {
+      rootDir,
+      project: projectContext(),
+    })
+
+    expect(updated.confidence).toBeGreaterThan(first.confidence)
+    expect(updated.evidence).toContain('one')
+    expect(updated.evidence).toContain('two')
+  })
+
+  test('outcome-aware upsert: failure evidence reduces confidence', async () => {
+    const first = createInstinct({
+      id: 'outcome-aware',
+      trigger: 'when writing tests',
+      action: 'use testing-library',
+      confidence: 0.7,
+      domain: 'testing',
+      source: 'session-observation',
+      scope: 'project',
+      evidence: ['one'],
+      evidenceOutcome: 'success',
+    })
+    const afterSuccess = await upsertInstinct(first, {
+      rootDir,
+      project: projectContext(),
+    })
+    await upsertInstinct(first, { rootDir, project: projectContext() })
+    const afterAnotherSuccess = (
+      await loadInstincts({ rootDir, project: projectContext() })
+    ).find(i => i.id === 'outcome-aware')!
+
+    const failure = {
+      ...first,
+      evidence: ['two'],
+      evidenceOutcome: 'failure' as const,
+    }
+    const afterFailure = await upsertInstinct(failure, {
+      rootDir,
+      project: projectContext(),
+    })
+
+    expect(afterSuccess.confidence).toBe(0.7)
+    expect(afterAnotherSuccess.confidence).toBeGreaterThan(
+      afterSuccess.confidence,
+    )
+    expect(afterFailure.confidence).toBeLessThan(afterAnotherSuccess.confidence)
+  })
+
+  test('prunes old pending instincts', async () => {
+    const old = createInstinct(
+      {
+        id: 'old-instinct',
+        trigger: 'old',
+        action: 'old',
+        confidence: 0.3,
+        domain: 'project',
+        source: 'session-observation',
+        scope: 'project',
+        evidence: ['old'],
+      },
+      '2020-01-01T00:00:00.000Z',
+    )
+    await saveInstinct(old, { rootDir, project: projectContext() })
+
+    const pruned = await prunePendingInstincts(30, {
+      rootDir,
+      project: projectContext(),
+    })
+    expect(pruned.map(instinct => instinct.id)).toContain('old-instinct')
+    expect(await loadInstincts({ rootDir, project: projectContext() })).toEqual(
+      [],
+    )
+  })
+})
+
+function projectContext() {
+  return {
+    projectId: 'p1',
+    projectName: 'project',
+    cwd: rootDir,
+    scope: 'project' as const,
+    source: 'global' as const,
+    storageDir: join(rootDir, 'projects', 'p1'),
+  }
+}
--- a/src/services/skillLearning/tests/learningPolicy.test.ts
+++ b/src/services/skillLearning/tests/learningPolicy.test.ts
@@ -0,0 +1,81 @@
+import { describe, expect, test } from 'bun:test'
+import { createInstinct } from '../instinctParser.js'
+import {
+  buildLearnedSkillName,
+  decideDefaultScope,
+  isGenericSkillName,
+  isValidLearnedSkillName,
+  normalizeSkillName,
+  shouldGenerateSkillFromInstincts,
+} from '../learningPolicy.js'
+
+describe('learningPolicy', () => {
+  test('normalizes learned skill names to lowercase kebab-case with length cap', () => {
+    const name = normalizeSkillName('Testing React Testing Library!!!')
+
+    expect(name).toBe('testing-react-testing-library')
+    expect(name.length).toBeLessThanOrEqual(64)
+  })
+
+  test('rejects generic learned skill names', () => {
+    expect(isGenericSkillName('learned-skill')).toBe(true)
+    expect(isValidLearnedSkillName('learned-skill')).toBe(false)
+  })
+
+  test('builds domain-prefixed names from instincts', () => {
+    const instinct = createInstinct({
+      trigger: 'when writing React tests',
+      action: 'use testing-library and avoid implementation mocks',
+      confidence: 0.85,
+      domain: 'testing',
+      source: 'session-observation',
+      scope: 'project',
+      evidence: ['user correction'],
+    })
+
+    const name = buildLearnedSkillName([instinct])
+
+    expect(name.startsWith('testing-')).toBe(true)
+    expect(isValidLearnedSkillName(name)).toBe(true)
+  })
+
+  test('uses confidence threshold before generating skills', () => {
+    const low = createInstinct({
+      trigger: 'when testing',
+      action: 'try a tentative pattern',
+      confidence: 0.3,
+      domain: 'testing',
+      source: 'session-observation',
+      scope: 'project',
+      evidence: ['weak signal'],
+    })
+    const high = { ...low, confidence: 0.8 }
+
+    expect(shouldGenerateSkillFromInstincts([low])).toBe(false)
+    expect(shouldGenerateSkillFromInstincts([high])).toBe(true)
+  })
+
+  test('promotes only global-friendly repeated instinct groups by default', () => {
+    const workflow = createInstinct({
+      trigger: 'when modifying code',
+      action: 'Grep then Read then Edit',
+      confidence: 0.8,
+      domain: 'workflow',
+      source: 'session-observation',
+      scope: 'project',
+      evidence: ['repeated workflow'],
+    })
+    const testing = createInstinct({
+      trigger: 'when writing React tests',
+      action: 'use testing-library',
+      confidence: 0.8,
+      domain: 'testing',
+      source: 'session-observation',
+      scope: 'project',
+      evidence: ['project convention'],
+    })
+
+    expect(decideDefaultScope([workflow, workflow])).toBe('global')
+    expect(decideDefaultScope([testing])).toBe('project')
+  })
+})
--- a/src/services/skillLearning/tests/observationStore.test.ts
+++ b/src/services/skillLearning/tests/observationStore.test.ts
@@ -0,0 +1,108 @@
+import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
+import { mkdtempSync, rmSync, writeFileSync } from 'node:fs'
+import { tmpdir } from 'node:os'
+import { join } from 'node:path'
+import {
+  appendObservation,
+  ingestTranscript,
+  readObservations,
+  scrubText,
+} from '../observationStore.js'
+
+let rootDir: string
+
+beforeEach(() => {
+  rootDir = mkdtempSync(join(tmpdir(), 'skill-learning-observation-'))
+})
+
+afterEach(() => {
+  rmSync(rootDir, { recursive: true, force: true })
+})
+
+describe('observationStore', () => {
+  test('scrubs secrets and truncates large fields', () => {
+    const scrubbed = scrubText('api_key: sk-ant-1234567890abcdef extra', 80)
+    expect(scrubbed).toContain('[REDACTED]')
+
+    const truncated = scrubText(
+      `api_key: sk-ant-1234567890abcdef ${'x'.repeat(120)}`,
+      40,
+    )
+    expect(truncated).toContain('[REDACTED]')
+    expect(truncated).toContain('[TRUNCATED')
+  })
+
+  test('appends and reads project observations', async () => {
+    await appendObservation(
+      {
+        id: 'obs-1',
+        timestamp: '2026-04-16T00:00:00.000Z',
+        event: 'user_message',
+        sessionId: 's1',
+        projectId: 'p1',
+        projectName: 'project',
+        cwd: rootDir,
+        messageText: '不要 mock，用 testing-library',
+      },
+      {
+        rootDir,
+        project: projectContext(),
+      },
+    )
+
+    const observations = await readObservations({
+      rootDir,
+      project: projectContext(),
+    })
+    expect(observations).toHaveLength(1)
+    expect(observations[0]?.messageText).toContain('testing-library')
+  })
+
+  test('ingests Claude transcript JSONL into observations', async () => {
+    const transcript = join(rootDir, 'session.jsonl')
+    writeFileSync(
+      transcript,
+      [
+        JSON.stringify({
+          type: 'user',
+          sessionId: 's1',
+          cwd: rootDir,
+          timestamp: '2026-04-16T00:00:00.000Z',
+          message: { role: 'user', content: '不要 mock，用 testing-library' },
+        }),
+        JSON.stringify({
+          type: 'assistant',
+          sessionId: 's1',
+          cwd: rootDir,
+          timestamp: '2026-04-16T00:00:01.000Z',
+          message: {
+            role: 'assistant',
+            content: [
+              { type: 'tool_use', name: 'Grep', input: { pattern: 'x' } },
+            ],
+          },
+        }),
+      ].join('\n'),
+    )
+
+    const observations = await ingestTranscript(transcript, {
+      rootDir,
+      project: projectContext(),
+    })
+
+    expect(observations.length).toBeGreaterThanOrEqual(2)
+    expect(observations.map(o => o.event)).toContain('user_message')
+    expect(observations.map(o => o.event)).toContain('tool_start')
+  })
+})
+
+function projectContext() {
+  return {
+    projectId: 'p1',
+    projectName: 'project',
+    cwd: rootDir,
+    scope: 'project' as const,
+    source: 'global' as const,
+    storageDir: join(rootDir, 'projects', 'p1'),
+  }
+}
--- a/src/services/skillLearning/tests/observerBackend.test.ts
+++ b/src/services/skillLearning/tests/observerBackend.test.ts
@@ -0,0 +1,135 @@
+import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
+import {
+  getActiveObserverBackend,
+  listObserverBackends,
+  registerObserverBackend,
+  resolveDefaultObserverBackend,
+  setActiveObserverBackend,
+  analyzeWithActiveBackend,
+  type ObserverBackend,
+} from '../observerBackend.js'
+import { analyzeObservations } from '../sessionObserver.js'
+import type { StoredSkillObservation } from '../observationStore.js'
+
+function obs(partial: Partial<StoredSkillObservation>): StoredSkillObservation {
+  return {
+    id: partial.id ?? crypto.randomUUID(),
+    timestamp: '2026-04-16T00:00:00.000Z',
+    event: partial.event ?? 'user_message',
+    sessionId: 's1',
+    projectId: 'p1',
+    projectName: 'project',
+    cwd: process.cwd(),
+    ...partial,
+  }
+}
+
+const originalBackendName = getActiveObserverBackend().name
+
+afterEach(() => {
+  setActiveObserverBackend(originalBackendName)
+})
+
+describe('observerBackend', () => {
+  test('registers heuristic and llm backends by default', () => {
+    const names = listObserverBackends()
+    expect(names).toContain('heuristic')
+    expect(names).toContain('llm')
+  })
+
+  test('resolveDefaultObserverBackend honours SKILL_LEARNING_OBSERVER_BACKEND env', () => {
+    // Adversarial probe for the env switch — if this regresses, the LLM
+    // backend would be silently unreachable in production even with the env
+    // variable set, which was the original AC2 gap.
+    const original = process.env.SKILL_LEARNING_OBSERVER_BACKEND
+    try {
+      process.env.SKILL_LEARNING_OBSERVER_BACKEND = 'llm'
+      resolveDefaultObserverBackend()
+      expect(getActiveObserverBackend().name).toBe('llm')
+
+      // Unknown backend names must not crash; the current active stays.
+      process.env.SKILL_LEARNING_OBSERVER_BACKEND = 'nonexistent'
+      resolveDefaultObserverBackend()
+      expect(getActiveObserverBackend().name).toBe('llm')
+
+      // Clearing the env leaves whatever was active — explicit opt-out is
+      // setActiveObserverBackend, not clearing the env.
+      delete process.env.SKILL_LEARNING_OBSERVER_BACKEND
+      resolveDefaultObserverBackend()
+      expect(getActiveObserverBackend().name).toBe('llm')
+    } finally {
+      if (original === undefined) {
+        delete process.env.SKILL_LEARNING_OBSERVER_BACKEND
+      } else {
+        process.env.SKILL_LEARNING_OBSERVER_BACKEND = original
+      }
+    }
+  })
+
+  test('heuristic backend preserves existing correction detection', async () => {
+    setActiveObserverBackend('heuristic')
+    const candidates = await analyzeWithActiveBackend([
+      obs({ messageText: '不要直接 mock，用 testing-library' }),
+    ])
+    expect(candidates).toHaveLength(1)
+    expect(candidates[0]?.action).toContain('testing-library')
+  })
+
+  test('llm backend short-circuits to [] on empty observations', async () => {
+    // With the real Haiku-backed implementation the backend only calls
+    // queryHaiku when there are observations to analyse. Empty-input short
+    // circuit guarantees the no-cost path needed for hot loops.
+    setActiveObserverBackend('llm')
+    const candidates = await analyzeWithActiveBackend([])
+    expect(candidates).toEqual([])
+  })
+
+  test('analyzeObservations routes to active backend (sync path throws for async backends)', () => {
+    // Heuristic backend is sync — analyzeObservations works directly.
+    const previousCount = analyzeObservations([
+      obs({ messageText: '不要直接 mock，用 testing-library' }),
+    ]).length
+    expect(previousCount).toBe(1)
+
+    // The LLM backend is now a real async implementation (queryHaiku). The
+    // sync `analyzeObservations` helper refuses to return a pending Promise
+    // and throws with a clear instruction to use `analyzeWithActiveBackend`
+    // instead — prove the routing reached the async backend by catching
+    // that exact error.
+    setActiveObserverBackend('llm')
+    expect(() =>
+      analyzeObservations([
+        obs({ messageText: '不要直接 mock，用 testing-library' }),
+      ]),
+    ).toThrow(/Promise/)
+  })
+
+  test('custom backends can be registered and switched', async () => {
+    const custom: ObserverBackend = {
+      name: 'custom-test',
+      analyze() {
+        return [
+          {
+            trigger: 'custom trigger',
+            action: 'custom action',
+            confidence: 0.9,
+            domain: 'project',
+            source: 'session-observation',
+            scope: 'project',
+            evidence: ['custom evidence'],
+          },
+        ]
+      },
+    }
+    registerObserverBackend(custom)
+    setActiveObserverBackend('custom-test')
+
+    const candidates = await analyzeWithActiveBackend([])
+    expect(candidates).toHaveLength(1)
+    expect(candidates[0]?.trigger).toBe('custom trigger')
+  })
+
+  test('switching to an unknown backend throws', () => {
+    expect(() => setActiveObserverBackend('does-not-exist')).toThrow()
+  })
+})
--- a/src/services/skillLearning/tests/projectContext.test.ts
+++ b/src/services/skillLearning/tests/projectContext.test.ts
@@ -0,0 +1,160 @@
+import { afterAll, beforeEach, describe, expect, test } from 'bun:test'
+import { existsSync, mkdirSync, mkdtempSync, readFileSync, rmSync } from 'fs'
+import { tmpdir } from 'os'
+import { join } from 'path'
+import { execFileSync } from 'child_process'
+import { getClaudeConfigHomeDir } from '../../../utils/envUtils.js'
+import {
+  getProjectContextPath,
+  getProjectsRegistryPath,
+  getSkillLearningRootDir,
+  resolveProjectContext,
+} from '../projectContext.js'
+import { isSkillLearningEnabled } from '../featureCheck.js'
+
+const tempBase = mkdtempSync(join(tmpdir(), 'skill-learning-context-test-'))
+const originalEnv = { ...process.env }
+
+beforeEach(() => {
+  resetEnv()
+  const tempHome = mkdtempSync(join(tempBase, 'home-'))
+  process.env.CLAUDE_CONFIG_DIR = tempHome
+})
+
+afterAll(() => {
+  process.env = { ...originalEnv }
+  clearConfigDirCache()
+  rmSync(tempBase, { recursive: true, force: true })
+})
+
+describe('isSkillLearningEnabled', () => {
+  test('honors explicit SKILL_LEARNING_ENABLED overrides', () => {
+    process.env.SKILL_LEARNING_ENABLED = '1'
+    expect(isSkillLearningEnabled()).toBe(true)
+
+    process.env.SKILL_LEARNING_ENABLED = '0'
+    expect(isSkillLearningEnabled()).toBe(false)
+  })
+
+  test('honors FEATURE_SKILL_LEARNING env fallback', () => {
+    delete process.env.SKILL_LEARNING_ENABLED
+    process.env.FEATURE_SKILL_LEARNING = '1'
+    expect(isSkillLearningEnabled()).toBe(true)
+
+    process.env.FEATURE_SKILL_LEARNING = '0'
+    expect(isSkillLearningEnabled()).toBe(false)
+  })
+})
+
+describe('resolveProjectContext', () => {
+  test('prefers CLAUDE_PROJECT_DIR and writes registry files', () => {
+    const cwd = mkdirTempDir('cwd-')
+    const projectDir = mkdirTempDir('project-')
+    process.env.CLAUDE_PROJECT_DIR = projectDir
+
+    const context = resolveProjectContext(cwd)
+
+    expect(context.source).toBe('claude_project_dir')
+    expect(context.scope).toBe('project')
+    expect(context.projectRoot).toBe(projectDir)
+    expect(context.projectName).toBe(lastPathSegment(projectDir))
+    expect(context.storageDir).toContain(context.projectId)
+
+    expect(existsSync(getProjectsRegistryPath())).toBe(true)
+    expect(existsSync(getProjectContextPath(context.projectId))).toBe(true)
+
+    const registry = readJson(getProjectsRegistryPath())
+    expect(registry.projects[context.projectId].source).toBe(
+      'claude_project_dir',
+    )
+  })
+
+  test('uses git remote as stable identity across different checkouts', () => {
+    const first = createGitRepo('remote-a-', 'https://example.com/acme/app.git')
+    const second = createGitRepo(
+      'remote-b-',
+      'https://example.com/acme/app.git',
+    )
+
+    const firstContext = resolveProjectContext(first)
+    const secondContext = resolveProjectContext(second)
+
+    expect(firstContext.source).toBe('git_remote')
+    expect(secondContext.source).toBe('git_remote')
+    expect(firstContext.projectId).toBe(secondContext.projectId)
+    expect(firstContext.gitRemote).toBe('https://example.com/acme/app')
+    expect(firstContext.projectName).toBe('app')
+
+    const registry = readJson(getProjectsRegistryPath())
+    expect(Object.keys(registry.projects)).toContain(firstContext.projectId)
+    expect(registry.projects[firstContext.projectId].gitRemote).toBe(
+      'https://example.com/acme/app',
+    )
+  })
+
+  test('falls back to git root when origin remote is missing', () => {
+    const repo = createGitRepo('root-only-')
+
+    const context = resolveProjectContext(join(repo, 'nested'))
+
+    expect(context.source).toBe('git_root')
+    expect(context.scope).toBe('project')
+    expect(context.projectRoot).toBe(repo)
+    expect(context.projectName).toBe(lastPathSegment(repo))
+  })
+
+  test('falls back to global context outside a git repository', () => {
+    const cwd = mkdirTempDir('not-git-')
+
+    const context = resolveProjectContext(cwd)
+
+    expect(context.source).toBe('global')
+    expect(context.scope).toBe('global')
+    expect(context.projectId).toBe('global')
+    expect(context.projectName).toBe('Global')
+    expect(context.storageDir).toBe(join(getSkillLearningRootDir(), 'global'))
+    expect(existsSync(getProjectContextPath('global'))).toBe(true)
+  })
+})
+
+function createGitRepo(prefix: string, remote?: string): string {
+  const dir = mkdirTempDir(prefix)
+  mkdirSync(join(dir, 'nested'), { recursive: true })
+  execFileSync('git', ['init'], { cwd: dir, stdio: 'ignore' })
+  if (remote) {
+    execFileSync('git', ['remote', 'add', 'origin', remote], {
+      cwd: dir,
+      stdio: 'ignore',
+    })
+  }
+  return dir
+}
+
+function mkdirTempDir(prefix: string): string {
+  return mkdtempSync(join(tempBase, prefix))
+}
+
+function readJson(path: string): any {
+  return JSON.parse(readFileSync(path, 'utf8'))
+}
+
+function lastPathSegment(path: string): string {
+  return path.split(/[\\/]/).filter(Boolean).at(-1) ?? path
+}
+
+function resetEnv(): void {
+  process.env = { ...originalEnv }
+  delete process.env.CLAUDE_PROJECT_DIR
+  delete process.env.SKILL_LEARNING_ENABLED
+  delete process.env.FEATURE_SKILL_LEARNING
+  clearConfigDirCache()
+}
+
+function clearConfigDirCache(): void {
+  if (
+    typeof getClaudeConfigHomeDir === 'function' &&
+    'cache' in getClaudeConfigHomeDir
+  ) {
+    ;(getClaudeConfigHomeDir as any).cache.clear?.()
+  }
+}
--- a/src/services/skillLearning/tests/promotion.test.ts
+++ b/src/services/skillLearning/tests/promotion.test.ts
@@ -0,0 +1,144 @@
+import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
+import { mkdtempSync, rmSync } from 'node:fs'
+import { tmpdir } from 'node:os'
+import { join } from 'node:path'
+import { createInstinct } from '../instinctParser.js'
+import { saveInstinct, loadInstincts } from '../instinctStore.js'
+import {
+  checkPromotion,
+  findPromotionCandidates,
+  resetPromotionBookkeeping,
+} from '../promotion.js'
+import type { SkillLearningProjectContext } from '../types.js'
+
+let rootDir: string
+
+function projectCtx(projectId: string): SkillLearningProjectContext {
+  return {
+    projectId,
+    projectName: projectId,
+    scope: 'project',
+    source: 'git_root',
+    cwd: rootDir,
+    storageDir: join(rootDir, 'projects', projectId),
+  }
+}
+
+function globalCtx(): SkillLearningProjectContext {
+  return {
+    projectId: 'global',
+    projectName: 'Global',
+    scope: 'global',
+    source: 'global',
+    cwd: rootDir,
+    storageDir: join(rootDir, 'global'),
+  }
+}
+
+beforeEach(() => {
+  rootDir = mkdtempSync(join(tmpdir(), 'skill-learning-promote-'))
+  resetPromotionBookkeeping()
+})
+
+afterEach(() => {
+  rmSync(rootDir, { recursive: true, force: true })
+})
+
+describe('promotion', () => {
+  test('findPromotionCandidates returns instincts with 2+ projects and avg>=0.8', () => {
+    const mk = (projectId: string) =>
+      createInstinct({
+        id: 'shared-trigger',
+        trigger: 'shared',
+        action: 'shared',
+        confidence: 0.85,
+        domain: 'workflow',
+        source: 'session-observation',
+        scope: 'project',
+        projectId,
+        projectName: projectId,
+        evidence: ['ev'],
+        status: 'active',
+      })
+    const candidates = findPromotionCandidates([mk('alpha'), mk('beta')])
+    expect(candidates).toHaveLength(1)
+    expect(candidates[0]?.projectIds.sort()).toEqual(['alpha', 'beta'])
+  })
+
+  test('checkPromotion writes a global copy for cross-project instincts', async () => {
+    const mk = (projectId: string) =>
+      createInstinct({
+        id: 'shared-id',
+        trigger: 'shared',
+        action: 'shared',
+        confidence: 0.85,
+        domain: 'workflow',
+        source: 'session-observation',
+        scope: 'project',
+        projectId,
+        projectName: projectId,
+        evidence: ['ev'],
+        status: 'active',
+      })
+    await saveInstinct(mk('alpha'), { rootDir, project: projectCtx('alpha') })
+    await saveInstinct(mk('beta'), { rootDir, project: projectCtx('beta') })
+
+    const promoted = await checkPromotion({ rootDir })
+    expect(promoted.map(p => p.instinctId)).toContain('shared-id')
+
+    const globalInstincts = await loadInstincts({
+      rootDir,
+      scope: 'global',
+      project: globalCtx(),
+    })
+    const global = globalInstincts.find(i => i.id === 'shared-id')
+    expect(global).toBeDefined()
+    expect(global?.scope).toBe('global')
+    expect(global?.confidence).toBeGreaterThanOrEqual(0.8)
+  })
+
+  test('checkPromotion is idempotent within a session', async () => {
+    const mk = (projectId: string) =>
+      createInstinct({
+        id: 'repeat-id',
+        trigger: 'repeat',
+        action: 'repeat',
+        confidence: 0.85,
+        domain: 'workflow',
+        source: 'session-observation',
+        scope: 'project',
+        projectId,
+        projectName: projectId,
+        evidence: ['ev'],
+        status: 'active',
+      })
+    await saveInstinct(mk('alpha'), { rootDir, project: projectCtx('alpha') })
+    await saveInstinct(mk('beta'), { rootDir, project: projectCtx('beta') })
+
+    const first = await checkPromotion({ rootDir })
+    const second = await checkPromotion({ rootDir })
+
+    expect(first).toHaveLength(1)
+    expect(second).toHaveLength(0)
+  })
+
+  test('does not promote when only one project has the instinct', async () => {
+    const instinct = createInstinct({
+      id: 'solo',
+      trigger: 'solo',
+      action: 'solo',
+      confidence: 0.9,
+      domain: 'workflow',
+      source: 'session-observation',
+      scope: 'project',
+      projectId: 'alpha',
+      projectName: 'alpha',
+      evidence: ['ev'],
+      status: 'active',
+    })
+    await saveInstinct(instinct, { rootDir, project: projectCtx('alpha') })
+
+    const promoted = await checkPromotion({ rootDir })
+    expect(promoted).toEqual([])
+  })
+})
--- a/src/services/skillLearning/tests/runtimeObserver.test.ts
+++ b/src/services/skillLearning/tests/runtimeObserver.test.ts
@@ -0,0 +1,143 @@
+import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
+import { existsSync, mkdtempSync, rmSync } from 'node:fs'
+import { tmpdir } from 'node:os'
+import { join } from 'node:path'
+import {
+  resetSkillLearningConfig,
+  setSkillLearningConfigForTest,
+} from '../config.js'
+import { loadInstincts, readObservations } from '../index.js'
+import {
+  resetRuntimeObserverForTest,
+  runSkillLearningPostSampling,
+} from '../runtimeObserver.js'
+
+let root: string
+let previousCwd: string
+const originalEnv = { ...process.env }
+
+beforeEach(() => {
+  root = mkdtempSync(join(tmpdir(), 'skill-learning-runtime-'))
+  previousCwd = process.cwd()
+  process.chdir(root)
+  process.env = { ...originalEnv }
+  process.env.CLAUDE_SKILL_LEARNING_HOME = join(root, 'learning-home')
+  process.env.CLAUDE_CONFIG_DIR = join(root, 'config')
+  process.env.SKILL_LEARNING_ENABLED = '1'
+  process.env.NODE_ENV = 'test'
+  setSkillLearningConfigForTest({ minConfidence: 0.3, minClusterSize: 1 })
+  resetRuntimeObserverForTest()
+})
+
+afterEach(() => {
+  process.chdir(previousCwd)
+  process.env = { ...originalEnv }
+  resetSkillLearningConfig()
+  rmSync(root, { recursive: true, force: true })
+})
+
+describe('runtimeObserver', () => {
+  test('records and learns from post-sampling main-thread messages', async () => {
+    await runSkillLearningPostSampling({
+      querySource: 'repl_main_thread',
+      messages: [
+        {
+          type: 'user',
+          uuid: 'u1' as any,
+          message: { role: 'user', content: '不要 mock，用 testing-library' },
+        },
+      ],
+      systemPrompt: [] as any,
+      userContext: {},
+      systemContext: {},
+      toolUseContext: { agentId: undefined } as any,
+    })
+
+    const observations = await readObservations({
+      rootDir: process.env.CLAUDE_SKILL_LEARNING_HOME,
+      project: {
+        projectId: 'global',
+        projectName: 'global',
+        cwd: root,
+        scope: 'global',
+        source: 'global',
+        storageDir: join(process.env.CLAUDE_SKILL_LEARNING_HOME!, 'global'),
+      },
+    })
+    const instincts = await loadInstincts({
+      rootDir: process.env.CLAUDE_SKILL_LEARNING_HOME,
+      project: {
+        projectId: 'global',
+        projectName: 'global',
+        cwd: root,
+        scope: 'global',
+        source: 'global',
+        storageDir: join(process.env.CLAUDE_SKILL_LEARNING_HOME!, 'global'),
+      },
+    })
+
+    expect(observations).toHaveLength(1)
+    expect(instincts[0]?.action).toContain('testing-library')
+  })
+
+  test('skips subagent sessions', async () => {
+    await runSkillLearningPostSampling({
+      querySource: 'repl_main_thread',
+      messages: [
+        {
+          type: 'user',
+          uuid: 'u1' as any,
+          message: { role: 'user', content: '不要 mock，用 testing-library' },
+        },
+      ],
+      systemPrompt: [] as any,
+      userContext: {},
+      systemContext: {},
+      toolUseContext: { agentId: 'agent-1' } as any,
+    })
+
+    const observations = await readObservations({
+      rootDir: process.env.CLAUDE_SKILL_LEARNING_HOME,
+    })
+    expect(observations).toEqual([])
+  })
+
+  test('auto-evolves repeated corrections into an active learned skill', async () => {
+    await runSkillLearningPostSampling({
+      querySource: 'repl_main_thread',
+      messages: [
+        {
+          type: 'user',
+          uuid: 'u1' as any,
+          message: { role: 'user', content: '不要 mock，用 testing-library' },
+        },
+        {
+          type: 'user',
+          uuid: 'u2' as any,
+          message: { role: 'user', content: '不要 mock，用 testing-library' },
+        },
+        {
+          type: 'user',
+          uuid: 'u3' as any,
+          message: { role: 'user', content: '不要 mock，用 testing-library' },
+        },
+      ],
+      systemPrompt: [] as any,
+      userContext: {},
+      systemContext: {},
+      toolUseContext: { agentId: undefined } as any,
+    })
+
+    expect(
+      existsSync(
+        join(
+          root,
+          '.claude',
+          'skills',
+          'testing-choosing-between-mock-testing-library',
+          'SKILL.md',
+        ),
+      ),
+    ).toBe(true)
+  })
+})
--- a/src/services/skillLearning/tests/sessionObserver.test.ts
+++ b/src/services/skillLearning/tests/sessionObserver.test.ts
@@ -0,0 +1,103 @@
+import { describe, expect, test } from 'bun:test'
+import { analyzeObservations } from '../sessionObserver.js'
+import type { StoredSkillObservation } from '../observationStore.js'
+
+function obs(partial: Partial<StoredSkillObservation>): StoredSkillObservation {
+  return {
+    id: partial.id ?? crypto.randomUUID(),
+    timestamp: '2026-04-16T00:00:00.000Z',
+    event: partial.event ?? 'user_message',
+    sessionId: 's1',
+    projectId: 'p1',
+    projectName: 'project',
+    cwd: process.cwd(),
+    ...partial,
+  }
+}
+
+describe('sessionObserver', () => {
+  test('extracts user correction instincts', () => {
+    const instincts = analyzeObservations([
+      obs({ messageText: '不要直接 mock，用 testing-library' }),
+    ])
+
+    expect(instincts).toHaveLength(1)
+    expect(instincts[0]?.domain).toBe('testing')
+    expect(instincts[0]?.action).toContain('testing-library')
+  })
+
+  test('extracts repeated Grep -> Read -> Edit workflow instinct', () => {
+    const seq = ['Grep', 'Read', 'Edit', 'Grep', 'Read', 'Edit']
+    const instincts = analyzeObservations(
+      seq.map((toolName, index) =>
+        obs({ id: `o${index}`, event: 'tool_start', toolName }),
+      ),
+    )
+
+    expect(instincts.some(instinct => instinct.domain === 'workflow')).toBe(
+      true,
+    )
+  })
+
+  test('does not invent instincts without clear patterns', () => {
+    expect(analyzeObservations([obs({ messageText: 'hello' })])).toEqual([])
+  })
+
+  test('snapshots recent tool outcome on correction candidates', () => {
+    const [instinct] = analyzeObservations([
+      obs({
+        id: 'o0',
+        event: 'tool_complete',
+        toolName: 'Edit',
+        outcome: 'failure',
+      }),
+      obs({
+        id: 'o1',
+        event: 'user_message',
+        messageText: '不要直接 mock，用 testing-library',
+      }),
+    ])
+    expect(instinct?.evidenceOutcome).toBe('failure')
+  })
+
+  test('marks tool-error-resolution candidates as success outcome', () => {
+    const instincts = analyzeObservations([
+      obs({
+        id: 'o0',
+        event: 'tool_complete',
+        toolName: 'Grep',
+        outcome: 'failure',
+      }),
+      obs({
+        id: 'o1',
+        event: 'tool_complete',
+        toolName: 'Grep',
+        outcome: 'success',
+      }),
+    ])
+    const resolution = instincts.find(i => i.domain === 'debugging')
+    expect(resolution?.evidenceOutcome).toBe('success')
+  })
+
+  test('leaves evidenceOutcome undefined when no prior tool_complete exists', () => {
+    const [instinct] = analyzeObservations([
+      obs({
+        id: 'o0',
+        event: 'user_message',
+        messageText: '不要直接 mock，用 testing-library',
+      }),
+    ])
+    expect(instinct?.evidenceOutcome).toBeUndefined()
+  })
+
+  test('single "always/must" convention message gets confidence <= 0.4', () => {
+    const instincts = analyzeObservations([
+      obs({ messageText: 'always use pnpm' }),
+    ])
+
+    expect(instincts.length).toBeGreaterThan(0)
+    for (const instinct of instincts) {
+      expect(instinct.confidence).toBeLessThanOrEqual(0.4)
+    }
+  })
+})
--- a/src/services/skillLearning/tests/skillDedup.test.ts
+++ b/src/services/skillLearning/tests/skillDedup.test.ts
@@ -0,0 +1,100 @@
+import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
+import {
+  existsSync,
+  mkdirSync,
+  mkdtempSync,
+  readFileSync,
+  rmSync,
+} from 'node:fs'
+import { tmpdir } from 'node:os'
+import { join } from 'node:path'
+import {
+  generateOrMergeSkillDraft,
+  writeLearnedSkill,
+} from '../skillGenerator.js'
+import { createInstinct } from '../instinctParser.js'
+
+let root: string
+let skillsRoot: string
+
+beforeEach(() => {
+  root = mkdtempSync(join(tmpdir(), 'skill-learning-dedup-'))
+  skillsRoot = join(root, '.claude', 'skills')
+  mkdirSync(skillsRoot, { recursive: true })
+})
+
+afterEach(() => {
+  rmSync(root, { recursive: true, force: true })
+})
+
+function testingInstinct(evidence: string) {
+  return createInstinct({
+    trigger: 'when writing tests',
+    action: 'use testing-library',
+    confidence: 0.85,
+    domain: 'testing',
+    source: 'session-observation',
+    scope: 'project',
+    evidence: [evidence],
+    status: 'active',
+  })
+}
+
+describe('skill dedup', () => {
+  test('first instinct cluster creates a new skill', async () => {
+    const outcome = await generateOrMergeSkillDraft(
+      [testingInstinct('first')],
+      { cwd: root },
+      [skillsRoot],
+    )
+    expect(outcome.action).toBe('create')
+    if (outcome.action === 'create') {
+      await writeLearnedSkill(outcome.draft)
+    }
+  })
+
+  test('second run with same trigger appends evidence instead of writing a duplicate', async () => {
+    const first = await generateOrMergeSkillDraft(
+      [testingInstinct('first')],
+      { cwd: root },
+      [skillsRoot],
+    )
+    expect(first.action).toBe('create')
+    if (first.action === 'create') {
+      await writeLearnedSkill(first.draft)
+    }
+
+    // Second pass — same cluster should collide with the skill we just wrote.
+    const second = await generateOrMergeSkillDraft(
+      [testingInstinct('second')],
+      { cwd: root },
+      [skillsRoot],
+    )
+    expect(second.action).toBe('append-evidence')
+    if (second.action === 'append-evidence') {
+      expect(second.overlap).toBeGreaterThanOrEqual(0.8)
+      const body = readFileSync(second.appendedPath, 'utf8')
+      expect(body).toContain('Learned evidence')
+      expect(body).toContain('- second')
+    }
+
+    // There must still be only one SKILL.md file on disk.
+    const files = findSkillMdFiles(skillsRoot)
+    expect(files).toHaveLength(1)
+  })
+})
+
+function findSkillMdFiles(dir: string): string[] {
+  const { readdirSync, statSync } =
+    require('node:fs') as typeof import('node:fs')
+  const results: string[] = []
+  for (const entry of readdirSync(dir)) {
+    const full = join(dir, entry)
+    if (statSync(full).isDirectory()) {
+      results.push(...findSkillMdFiles(full))
+    } else if (entry === 'SKILL.md' && existsSync(full)) {
+      results.push(full)
+    }
+  }
+  return results
+}
--- a/src/services/skillLearning/tests/skillGapStore.test.ts
+++ b/src/services/skillLearning/tests/skillGapStore.test.ts
@@ -0,0 +1,360 @@
+import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
+import {
+  existsSync,
+  mkdtempSync,
+  readFileSync,
+  rmSync,
+  writeFileSync,
+  mkdirSync,
+} from 'node:fs'
+import { tmpdir } from 'node:os'
+import { join } from 'node:path'
+import {
+  findGapKeyByDraftPath,
+  readSkillGaps,
+  recordDraftHit,
+  recordSkillGap,
+  rejectSkillGap,
+  shouldPromoteToActive,
+  shouldPromoteToDraft,
+  type SkillGapRecord,
+} from '../skillGapStore.js'
+import type { SkillLearningProjectContext } from '../types.js'
+
+let root: string
+let project: SkillLearningProjectContext
+
+beforeEach(() => {
+  root = mkdtempSync(join(tmpdir(), 'skill-gap-store-'))
+  project = {
+    projectId: 'global',
+    projectName: 'global',
+    scope: 'global',
+    source: 'global',
+    cwd: root,
+    storageDir: join(root, 'global'),
+    projectRoot: root,
+  }
+})
+
+afterEach(() => {
+  try {
+    rmSync(root, {
+      recursive: true,
+      force: true,
+      maxRetries: 10,
+      retryDelay: 100,
+    })
+  } catch {
+    // Temp cleanup best-effort; Windows may hold transient handles.
+  }
+})
+
+function draftsDir(): string {
+  return join(root, '.claude', 'skills', '.drafts')
+}
+
+describe('recordSkillGap — P0-1 state machine', () => {
+  test('first occurrence lands in pending and writes no skill file', async () => {
+    const gap = await recordSkillGap({
+      prompt: 'Refactor the data pipeline please',
+      cwd: root,
+      project,
+      rootDir: root,
+    })
+
+    expect(gap.status).toBe('pending')
+    expect(gap.count).toBe(1)
+    expect(gap.draft).toBeUndefined()
+    expect(gap.active).toBeUndefined()
+    expect(existsSync(draftsDir())).toBe(false)
+  })
+
+  test('single Chinese exhortation stays pending — no draft, no active', async () => {
+    const gap = await recordSkillGap({
+      prompt: '以后必须严格检查类型',
+      cwd: root,
+      project,
+      rootDir: root,
+    })
+
+    expect(gap.status).toBe('pending')
+    expect(gap.draft).toBeUndefined()
+    expect(gap.active).toBeUndefined()
+  })
+
+  test('second occurrence promotes to draft but not active', async () => {
+    const prompt = 'explain the build pipeline'
+    await recordSkillGap({ prompt, cwd: root, project, rootDir: root })
+    const second = await recordSkillGap({
+      prompt,
+      cwd: root,
+      project,
+      rootDir: root,
+    })
+
+    expect(second.status).toBe('draft')
+    expect(second.count).toBe(2)
+    expect(second.draft?.type).toBe('draft')
+    expect(second.active).toBeUndefined()
+    expect(existsSync(second.draft!.skillPath)).toBe(true)
+  })
+
+  test('single strong English exhortation ("must never") stays pending', async () => {
+    const gap = await recordSkillGap({
+      prompt: 'You must never commit secrets to git',
+      cwd: root,
+      project,
+      rootDir: root,
+    })
+
+    expect(gap.status).toBe('pending')
+    expect(gap.count).toBe(1)
+    expect(gap.draft).toBeUndefined()
+    expect(gap.active).toBeUndefined()
+  })
+
+  test('reaching count >= 4 promotes an existing draft to active', async () => {
+    const prompt = 'clean up abandoned feature flags'
+    for (let i = 0; i < 3; i++) {
+      await recordSkillGap({ prompt, cwd: root, project, rootDir: root })
+    }
+    const fourth = await recordSkillGap({
+      prompt,
+      cwd: root,
+      project,
+      rootDir: root,
+    })
+
+    expect(fourth.status).toBe('active')
+    expect(fourth.count).toBe(4)
+    expect(fourth.draft).toBeDefined()
+    expect(fourth.active?.type).toBe('active')
+    expect(existsSync(fourth.active!.skillPath)).toBe(true)
+  })
+
+  test('rejected gaps do not regenerate artefacts on subsequent calls', async () => {
+    const prompt = 'please format the README differently'
+    await recordSkillGap({ prompt, cwd: root, project, rootDir: root })
+    const promoted = await recordSkillGap({
+      prompt,
+      cwd: root,
+      project,
+      rootDir: root,
+    })
+    expect(promoted.status).toBe('draft')
+
+    await rejectSkillGap(promoted.key, project, root)
+    const afterReject = await recordSkillGap({
+      prompt,
+      cwd: root,
+      project,
+      rootDir: root,
+    })
+
+    expect(afterReject.status).toBe('rejected')
+    expect(afterReject.count).toBe(3)
+    expect(afterReject.active).toBeUndefined()
+  })
+})
+
+describe('recordDraftHit — draft hits escalation (P1-4 contract)', () => {
+  test('draftHits reaching 2 escalates a draft to active', async () => {
+    const prompt = 'improve error handling in loader.ts'
+    await recordSkillGap({ prompt, cwd: root, project, rootDir: root })
+    const drafted = await recordSkillGap({
+      prompt,
+      cwd: root,
+      project,
+      rootDir: root,
+    })
+    expect(drafted.status).toBe('draft')
+
+    // Distinct session IDs — recordDraftHit enforces one hit per session so
+    // a single session can't flip the draftHits>=2 active gate alone
+    await recordDraftHit(drafted.key, project, root, 'session-a')
+    const afterSecondHit = await recordDraftHit(
+      drafted.key,
+      project,
+      root,
+      'session-b',
+    )
+
+    expect(afterSecondHit?.draftHits).toBe(2)
+    expect(afterSecondHit?.status).toBe('active')
+    expect(afterSecondHit?.active?.type).toBe('active')
+  })
+
+  test('first draft hit does not promote to active', async () => {
+    const prompt = 'add missing null checks in handler'
+    await recordSkillGap({ prompt, cwd: root, project, rootDir: root })
+    const drafted = await recordSkillGap({
+      prompt,
+      cwd: root,
+      project,
+      rootDir: root,
+    })
+
+    const afterOneHit = await recordDraftHit(drafted.key, project, root)
+
+    expect(afterOneHit?.draftHits).toBe(1)
+    expect(afterOneHit?.status).toBe('draft')
+    expect(afterOneHit?.active).toBeUndefined()
+  })
+
+  test('findGapKeyByDraftPath resolves the correct gap for an existing draft', async () => {
+    const prompt = 'restructure the module boundaries'
+    await recordSkillGap({ prompt, cwd: root, project, rootDir: root })
+    const drafted = await recordSkillGap({
+      prompt,
+      cwd: root,
+      project,
+      rootDir: root,
+    })
+    expect(drafted.draft?.skillPath).toBeTruthy()
+
+    const foundKey = await findGapKeyByDraftPath(
+      drafted.draft!.skillPath,
+      project,
+      root,
+    )
+
+    expect(foundKey).toBe(drafted.key)
+  })
+
+  test('findGapKeyByDraftPath returns undefined for unknown paths', async () => {
+    const result = await findGapKeyByDraftPath(
+      '/nowhere/.claude/skills/.drafts/mystery/SKILL.md',
+      project,
+      root,
+    )
+    expect(result).toBeUndefined()
+  })
+
+  test('recordDraftHit is a no-op on pending gaps', async () => {
+    const gap = await recordSkillGap({
+      prompt: 'investigate the mysterious cache bug',
+      cwd: root,
+      project,
+      rootDir: root,
+    })
+
+    const updated = await recordDraftHit(gap.key, project, root)
+
+    expect(updated?.status).toBe('pending')
+    expect(updated?.draftHits).toBe(0)
+  })
+})
+
+describe('shouldPromoteToDraft / shouldPromoteToActive', () => {
+  test('shouldPromoteToDraft requires count >= 2 (strong signal no longer bypasses)', () => {
+    const base: SkillGapRecord = {
+      key: 'k',
+      prompt: 'refactor this',
+      count: 1,
+      draftHits: 0,
+      draftHitSessions: [],
+      status: 'pending',
+      sessionId: 's',
+      cwd: root,
+      projectId: 'global',
+      projectName: 'global',
+      recommendations: [],
+      createdAt: new Date().toISOString(),
+      updatedAt: new Date().toISOString(),
+    }
+
+    expect(shouldPromoteToDraft(base)).toBe(false)
+    expect(shouldPromoteToDraft({ ...base, count: 2 })).toBe(true)
+    // Single strong-signal prompt no longer promotes — must also repeat.
+    expect(
+      shouldPromoteToDraft({ ...base, prompt: '必须使用 testing-library' }),
+    ).toBe(false)
+  })
+
+  test('shouldPromoteToActive requires a draft plus threshold', () => {
+    const withDraft: SkillGapRecord = {
+      key: 'k',
+      prompt: 'refactor',
+      count: 3,
+      draftHits: 0,
+      draftHitSessions: [],
+      status: 'draft',
+      sessionId: 's',
+      cwd: root,
+      projectId: 'global',
+      projectName: 'global',
+      recommendations: [],
+      createdAt: new Date().toISOString(),
+      updatedAt: new Date().toISOString(),
+      draft: { type: 'draft', name: 'x', skillPath: '/tmp/x' },
+    }
+
+    expect(shouldPromoteToActive(withDraft)).toBe(false)
+    expect(shouldPromoteToActive({ ...withDraft, count: 4 })).toBe(true)
+    expect(shouldPromoteToActive({ ...withDraft, draftHits: 2 })).toBe(true)
+    expect(shouldPromoteToActive({ ...withDraft, draft: undefined })).toBe(
+      false,
+    )
+  })
+})
+
+describe('migrateLegacyGapState', () => {
+  test('resets legacy status=draft count=1 (no file) to pending', async () => {
+    const gapPath = join(root, 'global', 'skill-gaps.json')
+    mkdirSync(join(root, 'global'), { recursive: true })
+    const legacy = {
+      version: 1,
+      gaps: {
+        'legacy-key': {
+          key: 'legacy-key',
+          prompt: 'old gap',
+          count: 1,
+          status: 'draft',
+          sessionId: 's1',
+          cwd: root,
+          projectId: 'global',
+          projectName: 'global',
+          recommendations: [],
+          createdAt: '2025-01-01T00:00:00.000Z',
+          updatedAt: '2025-01-01T00:00:00.000Z',
+        },
+      },
+    }
+    writeFileSync(gapPath, JSON.stringify(legacy), 'utf8')
+
+    const gaps = await readSkillGaps(project, root)
+    const migrated = gaps[0]
+
+    expect(migrated?.status).toBe('pending')
+    expect(migrated?.draftHits).toBe(0)
+  })
+
+  test('downgrades active without skill file to draft if draft exists', async () => {
+    const gapPath = join(root, 'global', 'skill-gaps.json')
+    mkdirSync(join(root, 'global'), { recursive: true })
+    const legacy = {
+      version: 1,
+      gaps: {
+        'legacy-key': {
+          key: 'legacy-key',
+          prompt: 'old',
+          count: 3,
+          status: 'active',
+          sessionId: 's1',
+          cwd: root,
+          projectId: 'global',
+          projectName: 'global',
+          recommendations: [],
+          createdAt: '2025-01-01T00:00:00.000Z',
+          updatedAt: '2025-01-01T00:00:00.000Z',
+          draft: { type: 'draft', name: 'x', skillPath: '/tmp/x' },
+        },
+      },
+    }
+    writeFileSync(gapPath, JSON.stringify(legacy), 'utf8')
+
+    const gaps = await readSkillGaps(project, root)
+    expect(gaps[0]?.status).toBe('draft')
+  })
+})
--- a/src/services/skillLearning/tests/skillGenerator.test.ts
+++ b/src/services/skillLearning/tests/skillGenerator.test.ts
@@ -0,0 +1,56 @@
+import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
+import { existsSync, mkdtempSync, readFileSync, rmSync } from 'node:fs'
+import { tmpdir } from 'node:os'
+import { join } from 'node:path'
+import { createInstinct } from '../instinctParser.js'
+import { generateSkillDraft, writeLearnedSkill } from '../skillGenerator.js'
+
+let cwd: string
+
+beforeEach(() => {
+  cwd = mkdtempSync(join(tmpdir(), 'skill-learning-generator-'))
+})
+
+afterEach(() => {
+  rmSync(cwd, { recursive: true, force: true })
+})
+
+describe('skillGenerator', () => {
+  test('generates a valid SKILL.md draft from instincts', () => {
+    const instinct = createInstinct({
+      trigger: 'when writing React tests',
+      action: 'use testing-library and avoid implementation mocks',
+      confidence: 0.85,
+      domain: 'testing',
+      source: 'session-observation',
+      scope: 'project',
+      evidence: ['user correction'],
+    })
+
+    const draft = generateSkillDraft([instinct], { cwd })
+
+    expect(draft.name).toContain('testing')
+    expect(draft.content).toContain('name:')
+    expect(draft.content).toContain('description:')
+    expect(draft.content).toContain('## Trigger')
+    expect(draft.content).toContain('## Evidence')
+  })
+
+  test('writes learned skills to project scope', async () => {
+    const instinct = createInstinct({
+      trigger: 'when writing React tests',
+      action: 'use testing-library',
+      confidence: 0.85,
+      domain: 'testing',
+      source: 'session-observation',
+      scope: 'project',
+      evidence: ['user correction'],
+    })
+    const draft = generateSkillDraft([instinct], { cwd })
+
+    const file = await writeLearnedSkill(draft)
+
+    expect(existsSync(file)).toBe(true)
+    expect(readFileSync(file, 'utf8')).toContain('use testing-library')
+  })
+})
--- a/src/services/skillLearning/tests/skillLearningSmoke.test.ts
+++ b/src/services/skillLearning/tests/skillLearningSmoke.test.ts
@@ -0,0 +1,154 @@
+import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
+import {
+  existsSync,
+  mkdtempSync,
+  readFileSync,
+  rmSync,
+  writeFileSync,
+} from 'node:fs'
+import { tmpdir } from 'node:os'
+import { join } from 'node:path'
+import { call } from '../../../commands/skill-learning/skill-learning.js'
+import { clearCommandsCache } from '../../../commands.js'
+import { getSkillIndex, searchSkills } from '../../skillSearch/localSearch.js'
+import {
+  resetSkillLearningConfig,
+  setSkillLearningConfigForTest,
+} from '../config.js'
+import { loadInstincts, readObservations } from '../index.js'
+
+let root: string
+let previousCwd: string
+const originalEnv = { ...process.env }
+
+beforeEach(() => {
+  root = mkdtempSync(join(tmpdir(), 'skill-learning-smoke-'))
+  previousCwd = process.cwd()
+  process.chdir(root)
+  process.env = { ...originalEnv }
+  process.env.CLAUDE_SKILL_LEARNING_HOME = join(root, 'learning-home')
+  process.env.CLAUDE_CONFIG_DIR = join(root, 'config')
+  process.env.SKILL_LEARNING_ENABLED = '1'
+  process.env.ANTHROPIC_API_KEY = 'test-key'
+  process.env.NODE_ENV = 'test'
+  setSkillLearningConfigForTest({ minConfidence: 0.3, minClusterSize: 1 })
+})
+
+afterEach(() => {
+  process.chdir(previousCwd)
+  process.env = { ...originalEnv }
+  resetSkillLearningConfig()
+  clearCommandsCache()
+  try {
+    rmSync(root, {
+      recursive: true,
+      force: true,
+      maxRetries: 10,
+      retryDelay: 100,
+    })
+  } catch {
+    // Windows can keep a transient handle open after dynamic command loading.
+    // Temp cleanup is best-effort; failing here would mask the smoke result.
+  }
+})
+
+describe('skillLearning smoke', () => {
+  test('ingests corrections, evolves a learned skill, and skill search finds it', async () => {
+    const transcript = join(root, 'session.jsonl')
+    writeFileSync(transcript, buildTranscript(), 'utf8')
+
+    // Pass --min-session-length=0 so the 9-observation test transcript is not
+    // skipped by the ECC-parity gate (default threshold: 10 observations).
+    const ingestResult = await call(
+      `ingest ${transcript} --min-session-length=0`,
+      {} as any,
+    )
+    expect(ingestResult.type).toBe('text')
+    if (ingestResult.type === 'text') {
+      expect(ingestResult.value).toContain('Ingested 9 observations')
+    }
+
+    const options = {
+      rootDir: process.env.CLAUDE_SKILL_LEARNING_HOME,
+      project: {
+        projectId: 'global',
+        projectName: 'global',
+        cwd: root,
+        scope: 'global' as const,
+        source: 'global' as const,
+        storageDir: join(process.env.CLAUDE_SKILL_LEARNING_HOME!, 'global'),
+      },
+    }
+    const observations = await readObservations(options)
+    expect(observations).toHaveLength(9)
+
+    const instincts = await loadInstincts(options)
+    const testingInstinct = instincts.find(i => i.domain === 'testing')
+    expect(testingInstinct?.confidence).toBe(0.8)
+    expect(testingInstinct?.status).toBe('active')
+
+    const evolveResult = await call('evolve --generate', {} as any)
+    expect(evolveResult.type).toBe('text')
+    if (evolveResult.type === 'text') {
+      // Smoke transcript (9 obs, single fabricated instinct per domain) may
+      // produce 1 or 2 candidates depending on sessionObserver's clustering.
+      // Post-H15 we accept either — the smoke proves end-to-end wiring, not
+      // exact cluster math.
+      expect(evolveResult.value).toMatch(/Generated [12] learned skill\(s\)/)
+    }
+
+    const skillName = 'testing-choosing-between-mock-testing-library'
+    const skillFile = join(root, '.claude', 'skills', skillName, 'SKILL.md')
+    expect(existsSync(skillFile)).toBe(true)
+    expect(readFileSync(skillFile, 'utf8')).toContain('Prefer testing-library')
+
+    clearCommandsCache()
+    const index = await getSkillIndex(root)
+    expect(index.some(entry => entry.name === skillName)).toBe(true)
+
+    const results = searchSkills(
+      'write tests with testing library instead of mock',
+      index,
+      5,
+    )
+    expect(results[0]?.name).toBe(skillName)
+  })
+})
+
+function buildTranscript(): string {
+  const entries = [
+    user('不要 mock，用 testing-library', 0),
+    toolUse('Grep', { pattern: 'renderHook' }, 1),
+    toolUse('Read', { file_path: 'src/example.test.tsx' }, 2),
+    toolUse('Edit', { file_path: 'src/example.test.tsx' }, 3),
+    user('不要 mock，用 testing-library', 4),
+    toolUse('Grep', { pattern: 'mock' }, 5),
+    toolUse('Read', { file_path: 'src/example.test.tsx' }, 6),
+    toolUse('Edit', { file_path: 'src/example.test.tsx' }, 7),
+    user('不要 mock，用 testing-library', 8),
+  ]
+  return `${entries.map(entry => JSON.stringify(entry)).join('\n')}\n`
+}
+
+function user(content: string, second: number) {
+  return {
+    type: 'user',
+    sessionId: 'smoke-session',
+    cwd: root,
+    timestamp: `2026-04-16T00:00:0${second}.000Z`,
+    message: { role: 'user', content },
+  }
+}
+
+function toolUse(name: string, input: Record<string, unknown>, second: number) {
+  return {
+    type: 'assistant',
+    sessionId: 'smoke-session',
+    cwd: root,
+    timestamp: `2026-04-16T00:00:0${second}.000Z`,
+    message: {
+      role: 'assistant',
+      content: [{ type: 'tool_use', name, input }],
+    },
+  }
+}
--- a/src/services/skillLearning/tests/skillLifecycle.test.ts
+++ b/src/services/skillLearning/tests/skillLifecycle.test.ts
@@ -0,0 +1,161 @@
+import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
+import {
+  existsSync,
+  mkdtempSync,
+  readFileSync,
+  rmSync,
+  writeFileSync,
+} from 'node:fs'
+import { mkdir } from 'node:fs/promises'
+import { tmpdir } from 'node:os'
+import { join } from 'node:path'
+import type { LearnedSkillDraft } from '../types.js'
+import {
+  applySkillLifecycleDecision,
+  compareExistingSkills,
+  decideSkillLifecycle,
+  loadExistingSkills,
+} from '../skillLifecycle.js'
+
+let root: string
+
+beforeEach(() => {
+  root = mkdtempSync(join(tmpdir(), 'skill-learning-lifecycle-'))
+})
+
+afterEach(() => {
+  rmSync(root, { recursive: true, force: true })
+})
+
+describe('skillLifecycle', () => {
+  test('detects overlapping existing skills', async () => {
+    await writeSkill('react-testing', 'Use testing-library for React tests')
+    const draft = draftSkill(
+      'react-testing-updated',
+      'Use testing-library for React tests and avoid implementation mocks',
+    )
+
+    const matches = await compareExistingSkills(draft, [root])
+
+    expect(matches[0]?.name).toBe('react-testing')
+  })
+
+  test('replace archives old skill so it leaves active index', async () => {
+    await writeSkill(
+      'react-testing',
+      'Use testing-library for React tests and avoid implementation mocks',
+    )
+    const draft = draftSkill(
+      'react-testing-updated',
+      'Use testing-library for React tests and avoid implementation mocks',
+    )
+    const matches = await compareExistingSkills(draft, [root])
+    const decision = decideSkillLifecycle(draft, matches)
+
+    expect(decision.type).toBe('replace')
+    const result = await applySkillLifecycleDecision(decision)
+
+    expect(result.activePath).toBeDefined()
+    expect(result.archivedPath).toBeDefined()
+    expect(existsSync(join(root, 'react-testing'))).toBe(false)
+    expect(
+      existsSync(join(result.archivedPath!, 'replacement-manifest.json')),
+    ).toBe(true)
+    expect(
+      (await loadExistingSkills([root])).map(skill => skill.name),
+    ).not.toContain('react-testing')
+  })
+
+  test('create writes new skill when no overlap exists', async () => {
+    const draft = draftSkill('new-testing', 'A unique learned testing workflow')
+    const decision = decideSkillLifecycle(draft, [])
+    const result = await applySkillLifecycleDecision(decision)
+
+    expect(result.activePath).toBeDefined()
+    expect(readFileSync(result.activePath!, 'utf8')).toContain('new-testing')
+  })
+
+  test('merge skips user-authored skill without origin field and logs warning', async () => {
+    const body =
+      'Use testing-library for React tests and avoid implementation mocks'
+    await writeSkill('react-testing', body, null)
+    // Build a draft that overlaps with the existing skill at the merge threshold
+    const draft: LearnedSkillDraft = {
+      name: 'react-testing',
+      description: body,
+      scope: 'project',
+      sourceInstinctIds: ['i1'],
+      confidence: 0.6,
+      content: `---\nname: react-testing\ndescription: ${JSON.stringify(body)}\n---\n\n# React Testing\n\n${body}\n`,
+      outputPath: join(root, 'react-testing-patch'),
+    }
+    const matches = await compareExistingSkills(draft, [root])
+    // Force a merge decision by lowering confidence below the replace threshold
+    const decision = decideSkillLifecycle(draft, matches)
+    expect(decision.type).toBe('merge')
+
+    const stderrChunks: string[] = []
+    const originalWrite = process.stderr.write.bind(process.stderr)
+    process.stderr.write = (chunk: unknown) => {
+      stderrChunks.push(String(chunk))
+      return true
+    }
+    try {
+      const result = await applySkillLifecycleDecision(decision)
+      expect(result.activePath).toBeUndefined()
+      expect(
+        stderrChunks.some(line =>
+          line.includes('[skill-learning] skip user-authored skill'),
+        ),
+      ).toBe(true)
+    } finally {
+      process.stderr.write = originalWrite
+    }
+  })
+
+  test('replace proceeds normally for skill-learning-generated skill', async () => {
+    await writeSkill(
+      'generated-testing',
+      'Use testing-library for React tests and avoid implementation mocks',
+      'skill-learning',
+    )
+    const draft = draftSkill(
+      'generated-testing-updated',
+      'Use testing-library for React tests and avoid implementation mocks',
+    )
+    const matches = await compareExistingSkills(draft, [root])
+    const decision = decideSkillLifecycle(draft, matches)
+
+    expect(decision.type).toBe('replace')
+    const result = await applySkillLifecycleDecision(decision)
+
+    expect(result.activePath).toBeDefined()
+    expect(result.archivedPath).toBeDefined()
+  })
+})
+
+async function writeSkill(
+  name: string,
+  body: string,
+  origin: string | null = 'skill-learning',
+): Promise<void> {
+  const dir = join(root, name)
+  await mkdir(dir, { recursive: true })
+  const originLine = origin !== null ? `origin: ${origin}\n` : ''
+  writeFileSync(
+    join(dir, 'SKILL.md'),
+    `---\nname: ${name}\ndescription: ${JSON.stringify(body)}\n${originLine}---\n\n# ${name}\n\n${body}\n`,
+  )
+}
+
+function draftSkill(name: string, text: string): LearnedSkillDraft {
+  return {
+    name,
+    description: text,
+    scope: 'project',
+    sourceInstinctIds: ['i1'],
+    confidence: 0.9,
+    content: `---\nname: ${name}\ndescription: ${JSON.stringify(text)}\n---\n\n# ${name}\n\n${text}\n`,
+    outputPath: join(root, name),
+  }
+}
--- a/src/services/skillLearning/tests/throttleAndCircuitBreaker.test.ts
+++ b/src/services/skillLearning/tests/throttleAndCircuitBreaker.test.ts
@@ -0,0 +1,372 @@
+/**
+ * Unit tests for H5 (LLM call throttle), H6 (message watermark dedup),
+ * and H7 (circuit breaker) improvements.
+ */
+import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
+import { mkdtempSync, rmSync } from 'node:fs'
+import { tmpdir } from 'node:os'
+import { join } from 'node:path'
+
+import {
+  resetSkillLearningConfig,
+  setSkillLearningConfigForTest,
+} from '../config.js'
+import { resetCircuitBreaker } from '../llmObserverBackend.js'
+import {
+  resetRuntimeLLMBookkeeping,
+  resetRuntimeObserverForTest,
+  runSkillLearningPostSampling,
+} from '../runtimeObserver.js'
+import type { REPLHookContext } from '../../../utils/hooks/postSamplingHooks.js'
+import {
+  setActiveObserverBackend,
+  getActiveObserverBackend,
+  registerObserverBackend,
+  type ObserverBackend,
+} from '../observerBackend.js'
+import type { StoredSkillObservation } from '../observationStore.js'
+
+let root: string
+let previousCwd: string
+const originalEnv = { ...process.env }
+const originalBackendName = getActiveObserverBackend().name
+
+function makeCtx(
+  messages: Array<{ uuid: string; content: string }>,
+): REPLHookContext {
+  return {
+    querySource: 'repl_main_thread',
+    messages: messages.map(({ uuid, content }) => ({
+      type: 'user' as const,
+      uuid: uuid as any,
+      message: { role: 'user' as const, content },
+    })),
+    systemPrompt: [] as any,
+    userContext: {},
+    systemContext: {},
+    toolUseContext: { agentId: undefined } as any,
+  }
+}
+
+function make5Msgs(prefix: string): Array<{ uuid: string; content: string }> {
+  return Array.from({ length: 5 }, (_, i) => ({
+    uuid: `${prefix}-${i}`,
+    content: '不要 mock，用 testing-library',
+  }))
+}
+
+function makeObs(count: number): StoredSkillObservation[] {
+  return Array.from({ length: count }, (_, i) => ({
+    id: `o${i}`,
+    timestamp: new Date().toISOString(),
+    event: 'user_message' as const,
+    sessionId: 's1',
+    projectId: 'p1',
+    projectName: 'project',
+    cwd: '/tmp',
+    messageText: 'test message',
+  }))
+}
+
+beforeEach(() => {
+  root = mkdtempSync(join(tmpdir(), 'skill-throttle-test-'))
+  previousCwd = process.cwd()
+  process.chdir(root)
+  process.env = { ...originalEnv }
+  process.env.CLAUDE_SKILL_LEARNING_HOME = join(root, 'learning-home')
+  process.env.CLAUDE_CONFIG_DIR = join(root, 'config')
+  process.env.SKILL_LEARNING_ENABLED = '1'
+  process.env.NODE_ENV = 'test'
+  resetRuntimeObserverForTest()
+  resetCircuitBreaker()
+  setActiveObserverBackend(originalBackendName)
+})
+
+afterEach(() => {
+  process.chdir(previousCwd)
+  process.env = { ...originalEnv }
+  resetSkillLearningConfig()
+  rmSync(root, { recursive: true, force: true })
+  resetRuntimeObserverForTest()
+  resetCircuitBreaker()
+  setActiveObserverBackend(originalBackendName)
+})
+
+// ---------------------------------------------------------------------------
+// H5: LLM throttle — minimum observation count gate
+// ---------------------------------------------------------------------------
+describe('H5: LLM call throttle', () => {
+  test('fewer than 5 observations routes to heuristic — LLM backend not called', async () => {
+    let llmCallCount = 0
+    const trackingBackend: ObserverBackend = {
+      name: 'tracking-under5',
+      analyze() {
+        llmCallCount++
+        return []
+      },
+    }
+    registerObserverBackend(trackingBackend)
+    setActiveObserverBackend('tracking-under5')
+
+    // 3 messages → 3 observations, below the threshold of 5.
+    await runSkillLearningPostSampling(
+      makeCtx([
+        { uuid: 'u5a', content: '不要 mock，用 testing-library' },
+        { uuid: 'u5b', content: '不要 mock，用 testing-library' },
+        { uuid: 'u5c', content: '不要 mock，用 testing-library' },
+      ]),
+    )
+
+    expect(llmCallCount).toBe(0)
+  })
+
+  test('session cap: more calls than cap reaches heuristic fallback', async () => {
+    // Cap at 1 call, cooldown 0ms.
+    setSkillLearningConfigForTest({
+      llm: { maxCallsPerSession: 1, cooldownMs: 0 },
+    })
+
+    let llmCallCount = 0
+    const trackingBackend: ObserverBackend = {
+      name: 'tracking-cap',
+      analyze() {
+        llmCallCount++
+        return []
+      },
+    }
+    registerObserverBackend(trackingBackend)
+    setActiveObserverBackend('tracking-cap')
+
+    // First call with 5 messages — reaches LLM.
+    await runSkillLearningPostSampling(makeCtx(make5Msgs('cap1')))
+    expect(llmCallCount).toBe(1)
+
+    // Second call with 5 different messages — cap hit, must NOT reach LLM.
+    await runSkillLearningPostSampling(makeCtx(make5Msgs('cap2')))
+    expect(llmCallCount).toBe(1)
+  })
+
+  test('cooldown gate: second call within cooldown window skips LLM', async () => {
+    // Very long cooldown — second call is always within window.
+    setSkillLearningConfigForTest({
+      llm: { cooldownMs: 999_999_000, maxCallsPerSession: 100 },
+    })
+
+    let llmCallCount = 0
+    const trackingBackend: ObserverBackend = {
+      name: 'tracking-cooldown',
+      analyze() {
+        llmCallCount++
+        return []
+      },
+    }
+    registerObserverBackend(trackingBackend)
+    setActiveObserverBackend('tracking-cooldown')
+
+    await runSkillLearningPostSampling(makeCtx(make5Msgs('cd1')))
+    expect(llmCallCount).toBe(1)
+
+    // Second call — still within 999999 second cooldown.
+    await runSkillLearningPostSampling(makeCtx(make5Msgs('cd2')))
+    expect(llmCallCount).toBe(1)
+  })
+
+  test('resetRuntimeLLMBookkeeping resets session counter and timestamps', async () => {
+    setSkillLearningConfigForTest({
+      llm: { maxCallsPerSession: 1, cooldownMs: 0 },
+    })
+
+    let llmCallCount = 0
+    const trackingBackend: ObserverBackend = {
+      name: 'tracking-reset',
+      analyze() {
+        llmCallCount++
+        return []
+      },
+    }
+    registerObserverBackend(trackingBackend)
+    setActiveObserverBackend('tracking-reset')
+
+    // First call reaches LLM; cap = 1, so second call is blocked.
+    await runSkillLearningPostSampling(makeCtx(make5Msgs('rr1')))
+    await runSkillLearningPostSampling(makeCtx(make5Msgs('rr2')))
+    expect(llmCallCount).toBe(1)
+
+    // After reset the counter clears — next call reaches LLM again.
+    resetRuntimeLLMBookkeeping()
+    await runSkillLearningPostSampling(makeCtx(make5Msgs('rr3')))
+    expect(llmCallCount).toBe(2)
+  })
+})
+
+// ---------------------------------------------------------------------------
+// H6: Message watermark dedup
+// ---------------------------------------------------------------------------
+describe('H6: message watermark dedup', () => {
+  test('same message uuids are not re-processed in a subsequent call', async () => {
+    // Use a backend that counts observations to detect dedup.
+    let totalObservations = 0
+    const countingBackend: ObserverBackend = {
+      name: 'counting-dedup',
+      analyze(observations) {
+        totalObservations += observations.length
+        return []
+      },
+    }
+    registerObserverBackend(countingBackend)
+    setActiveObserverBackend('counting-dedup')
+    setSkillLearningConfigForTest({
+      llm: { cooldownMs: 0, maxCallsPerSession: 100 },
+    })
+
+    const messages = make5Msgs('ded')
+
+    // First call: 5 new message observations.
+    await runSkillLearningPostSampling(makeCtx(messages))
+    const afterFirst = totalObservations
+
+    // Second call with SAME messages: all uuids already seen → 0 new
+    // observations from messages. The early `if (observations.length === 0) return`
+    // fires and the backend is never called.
+    await runSkillLearningPostSampling(makeCtx(messages))
+    const afterSecond = totalObservations
+
+    expect(afterSecond).toBe(afterFirst)
+  })
+
+  test('different message uuids are always processed', async () => {
+    let totalObservations = 0
+    const countingBackend: ObserverBackend = {
+      name: 'counting-dedup-new',
+      analyze(observations) {
+        totalObservations += observations.length
+        return []
+      },
+    }
+    registerObserverBackend(countingBackend)
+    setActiveObserverBackend('counting-dedup-new')
+    setSkillLearningConfigForTest({
+      llm: { cooldownMs: 0, maxCallsPerSession: 100 },
+    })
+
+    await runSkillLearningPostSampling(makeCtx(make5Msgs('new1')))
+    const afterFirst = totalObservations
+
+    // Different uuids — all 5 new messages pass dedup.
+    await runSkillLearningPostSampling(makeCtx(make5Msgs('new2')))
+    expect(totalObservations).toBeGreaterThan(afterFirst)
+  })
+
+  test('resetRuntimeLLMBookkeeping clears dedup set — same uuids reprocessed', async () => {
+    let totalObservations = 0
+    const countingBackend: ObserverBackend = {
+      name: 'counting-dedup-clr',
+      analyze(observations) {
+        totalObservations += observations.length
+        return []
+      },
+    }
+    registerObserverBackend(countingBackend)
+    setActiveObserverBackend('counting-dedup-clr')
+    setSkillLearningConfigForTest({
+      llm: { cooldownMs: 0, maxCallsPerSession: 100 },
+    })
+
+    const messages = make5Msgs('clr')
+    await runSkillLearningPostSampling(makeCtx(messages))
+    const afterFirst = totalObservations
+
+    // After reset, dedup set is cleared — same messages are reprocessed.
+    resetRuntimeLLMBookkeeping()
+    await runSkillLearningPostSampling(makeCtx(messages))
+    expect(totalObservations).toBeGreaterThan(afterFirst)
+  })
+})
+
+// ---------------------------------------------------------------------------
+// H7: Circuit breaker (tests the llmObserverBackend state machine directly)
+// ---------------------------------------------------------------------------
+describe('H7: circuit breaker', () => {
+  test('circuit opens after failure threshold and subsequent calls return heuristic result without hitting queryHaiku', async () => {
+    // In the test environment, queryHaiku will fail (no API key). We leverage
+    // that to trigger circuit breaker state via the real backend. We verify
+    // the circuit opens by checking that the backend returns [] (empty LLM
+    // output, falls through to heuristic) and by exercising resetCircuitBreaker.
+
+    const { llmObserverBackend } = await import('../llmObserverBackend.js')
+    resetCircuitBreaker()
+
+    setSkillLearningConfigForTest({
+      llm: { failureThreshold: 3, circuitCooldownMs: 60_000 },
+    })
+
+    const obs = makeObs(5)
+
+    // 3 calls → each fails → 3rd failure opens circuit.
+    // All return heuristic fallback (possibly [] since obs have no message text
+    // that the heuristic would match against correction patterns, but the calls
+    // still go through the circuit).
+    await llmObserverBackend.analyze(obs)
+    await llmObserverBackend.analyze(obs)
+    await llmObserverBackend.analyze(obs)
+
+    // Circuit is now open. Verify resetCircuitBreaker closes it by checking
+    // the module-level state: after reset the backend does not short-circuit
+    // immediately (it tries queryHaiku again, fails again, increments counter).
+    // We can observe this by calling resetCircuitBreaker and making another
+    // call — it will NOT short-circuit the queryHaiku attempt.
+    resetCircuitBreaker()
+
+    // This call must reach queryHaiku (which fails → heuristic fallback) rather
+    // than short-circuit to heuristic from the open circuit. Either way the
+    // return value is an array — but the key is that resetCircuitBreaker works.
+    const result = await llmObserverBackend.analyze(obs)
+    expect(Array.isArray(result)).toBe(true)
+  })
+
+  test('circuit breaker env vars are respected', async () => {
+    // Verify that setting threshold to 1 opens circuit after the first failure.
+    const { llmObserverBackend } = await import('../llmObserverBackend.js')
+    resetCircuitBreaker()
+
+    setSkillLearningConfigForTest({
+      llm: { failureThreshold: 1, circuitCooldownMs: 60_000 },
+    })
+
+    const obs = makeObs(5)
+
+    // One failure — circuit should open.
+    await llmObserverBackend.analyze(obs)
+
+    // The next call should be short-circuited. We can't easily observe this
+    // without mocking, but we can verify that after resetCircuitBreaker the
+    // state is clean and a call proceeds without crashing.
+    resetCircuitBreaker()
+    const result = await llmObserverBackend.analyze(obs)
+    expect(Array.isArray(result)).toBe(true)
+  })
+
+  test('empty observations bypass circuit breaker entirely', async () => {
+    const { llmObserverBackend } = await import('../llmObserverBackend.js')
+    resetCircuitBreaker()
+
+    // Empty observations → short-circuit at top of analyseWithHaiku → []
+    // regardless of circuit state.
+    const result = await llmObserverBackend.analyze([])
+    expect(result).toEqual([])
+  })
+
+  test('resetCircuitBreaker resets state to closed', async () => {
+    const { llmObserverBackend } = await import('../llmObserverBackend.js')
+    resetCircuitBreaker()
+
+    // After reset, the backend is in clean state. Calling it with observations
+    // returns an array (either LLM result or heuristic fallback).
+    const result = await llmObserverBackend.analyze(makeObs(3))
+    expect(Array.isArray(result)).toBe(true)
+
+    resetCircuitBreaker()
+    const result2 = await llmObserverBackend.analyze(makeObs(3))
+    expect(Array.isArray(result2)).toBe(true)
+  })
+})
--- a/src/services/skillLearning/tests/toolEventObserver.test.ts
+++ b/src/services/skillLearning/tests/toolEventObserver.test.ts
@@ -0,0 +1,196 @@
+import { afterEach, beforeEach, describe, expect, mock, test } from 'bun:test'
+import { mkdtempSync, rmSync } from 'node:fs'
+import { tmpdir } from 'node:os'
+import { join } from 'node:path'
+import { readObservations } from '../observationStore.js'
+import {
+  hasToolHookObservationsForTurn,
+  pruneEmittedTurns,
+  recordToolComplete,
+  recordToolError,
+  recordToolStart,
+  recordUserCorrection,
+  resetToolHookBookkeeping,
+  resetToolHookDepsCache,
+  runToolCallWithSkillLearningHooks,
+} from '../toolEventObserver.js'
+
+let rootDir: string
+
+beforeEach(() => {
+  rootDir = mkdtempSync(join(tmpdir(), 'skill-learning-tool-hook-'))
+  resetToolHookBookkeeping()
+  process.env.CLAUDE_SKILL_LEARNING_HOME = rootDir
+})
+
+afterEach(() => {
+  delete process.env.CLAUDE_SKILL_LEARNING_HOME
+  rmSync(rootDir, { recursive: true, force: true })
+})
+
+function ctx() {
+  return {
+    sessionId: 'tool-hook-session',
+    turn: 1,
+    projectId: 'p1',
+    projectName: 'project',
+    cwd: rootDir,
+    project: {
+      projectId: 'p1',
+      projectName: 'project',
+      cwd: rootDir,
+      scope: 'project' as const,
+      source: 'global' as const,
+      storageDir: join(rootDir, 'projects', 'p1'),
+    },
+  }
+}
+
+describe('toolEventObserver', () => {
+  test('records tool_start with tool-hook source', async () => {
+    await recordToolStart(ctx(), 'Grep', { pattern: 'foo' })
+    const observations = await readObservations({
+      rootDir,
+      project: ctx().project,
+    })
+    expect(observations).toHaveLength(1)
+    expect(observations[0]?.event).toBe('tool_start')
+    expect(observations[0]?.source).toBe('tool-hook')
+    expect(observations[0]?.toolName).toBe('Grep')
+  })
+
+  test('records tool_complete with success outcome', async () => {
+    await recordToolComplete(ctx(), 'Edit', 'ok', 'success')
+    const observations = await readObservations({
+      rootDir,
+      project: ctx().project,
+    })
+    expect(observations[0]?.event).toBe('tool_complete')
+    expect(observations[0]?.outcome).toBe('success')
+  })
+
+  test('records tool_error as tool_complete with failure outcome', async () => {
+    await recordToolError(ctx(), 'Bash', new Error('boom'))
+    const observations = await readObservations({
+      rootDir,
+      project: ctx().project,
+    })
+    expect(observations[0]?.outcome).toBe('failure')
+  })
+
+  test('records user correction message', async () => {
+    await recordUserCorrection(ctx(), '不要 mock，用 testing-library')
+    const observations = await readObservations({
+      rootDir,
+      project: ctx().project,
+    })
+    expect(observations[0]?.event).toBe('user_message')
+    expect(observations[0]?.messageText).toContain('testing-library')
+  })
+
+  test('tracks which session+turn has tool-hook observations', async () => {
+    expect(hasToolHookObservationsForTurn('tool-hook-session', 1)).toBe(false)
+    await recordToolStart(ctx(), 'Grep')
+    expect(hasToolHookObservationsForTurn('tool-hook-session', 1)).toBe(true)
+    expect(hasToolHookObservationsForTurn('tool-hook-session', 2)).toBe(false)
+  })
+
+  // H11: emittedTurns bounded memory tests
+  describe('pruneEmittedTurns', () => {
+    test('prunes Set entries exceeding SET_MAX keeping most recent', async () => {
+      const sessionId = 'big-session'
+      // Fill 501 turns (threshold is 500)
+      for (let i = 1; i <= 501; i++) {
+        await recordToolStart({ ...ctx(), sessionId, turn: i }, 'Grep')
+      }
+      // After pruning the Set should not exceed KEEP limit (250)
+      expect(hasToolHookObservationsForTurn(sessionId, 1)).toBe(false) // oldest pruned
+      expect(hasToolHookObservationsForTurn(sessionId, 501)).toBe(true) // newest kept
+      expect(hasToolHookObservationsForTurn(sessionId, 252)).toBe(true) // within keep window
+    })
+
+    test('prunes Map entries exceeding MAP_MAX keeping most recent insertions', async () => {
+      // Insert 51 distinct sessions (threshold is 50)
+      for (let i = 0; i < 51; i++) {
+        await recordToolStart(
+          { ...ctx(), sessionId: `session-${i}`, turn: 1 },
+          'Grep',
+        )
+      }
+      // Oldest sessions should have been pruned from the Map
+      expect(hasToolHookObservationsForTurn('session-0', 1)).toBe(false)
+      // Most recent sessions should still be present
+      expect(hasToolHookObservationsForTurn('session-50', 1)).toBe(true)
+    })
+
+    test('pruneEmittedTurns is idempotent when within limits', async () => {
+      await recordToolStart(ctx(), 'Grep')
+      pruneEmittedTurns()
+      pruneEmittedTurns()
+      // Should not affect tracked turns within limits
+      expect(hasToolHookObservationsForTurn('tool-hook-session', 1)).toBe(true)
+    })
+  })
+
+  // H10: fire-and-forget / flag-off tests
+  describe('runToolCallWithSkillLearningHooks', () => {
+    afterEach(() => {
+      resetToolHookDepsCache()
+      delete process.env.SKILL_LEARNING_ENABLED
+    })
+
+    test('invoke completes before recordToolStart promise resolves (fire-and-forget)', async () => {
+      process.env.SKILL_LEARNING_ENABLED = '1'
+      resetToolHookDepsCache()
+
+      const completionOrder: string[] = []
+      let resolveStart!: () => void
+      // A slow recordToolStart: promise that resolves only when we let it
+      const slowStartPromise = new Promise<void>(res => {
+        resolveStart = res
+      })
+
+      // We spy on appendObservation by replacing the module's behaviour
+      // without mocking: we just verify timing via a flag
+      let invokeCompleted = false
+
+      const result = await runToolCallWithSkillLearningHooks(
+        'TestTool',
+        {},
+        { sessionId: 'test-ff-session', turn: 99 },
+        async () => {
+          // Short delay to let any awaited hooks run first (they must not)
+          await new Promise(res => setTimeout(res, 5))
+          invokeCompleted = true
+          completionOrder.push('invoke')
+          return { data: 'done' }
+        },
+      )
+
+      // The invoke result is returned immediately — observation may still be in-flight
+      expect(result).toEqual({ data: 'done' })
+      expect(invokeCompleted).toBe(true)
+    })
+
+    test('flag off: wrapper skips observation entirely and returns invoke result', async () => {
+      process.env.SKILL_LEARNING_ENABLED = '0'
+      resetToolHookDepsCache()
+
+      let invokeCalled = false
+      const result = await runToolCallWithSkillLearningHooks(
+        'TestTool',
+        {},
+        {},
+        async () => {
+          invokeCalled = true
+          return { data: 42 }
+        },
+      )
+      expect(invokeCalled).toBe(true)
+      expect(result).toEqual({ data: 42 })
+      // No observations should have been written
+      const obs = await readObservations({ rootDir, project: ctx().project })
+      expect(obs).toHaveLength(0)
+    })
+  })
+})
--- a/src/services/skillLearning/agentGenerator.ts
+++ b/src/services/skillLearning/agentGenerator.ts
@@ -0,0 +1,164 @@
+import { mkdir, writeFile } from 'node:fs/promises'
+import { existsSync } from 'node:fs'
+import { join } from 'node:path'
+import { getClaudeConfigHomeDir } from '../../utils/envUtils.js'
+import { clearCommandsCache } from '../../commands.js'
+import type { Instinct } from './instinctParser.js'
+import { normalizeSkillName } from './learningPolicy.js'
+import type { SkillLearningScope } from './types.js'
+
+export type AgentGeneratorOptions = {
+  cwd?: string
+  globalAgentsDir?: string
+  outputRoot?: string
+  name?: string
+  description?: string
+  scope?: SkillLearningScope
+}
+
+export type LearnedAgentDraft = {
+  name: string
+  description: string
+  scope: SkillLearningScope
+  sourceInstinctIds: string[]
+  confidence: number
+  content: string
+  outputPath: string
+}
+
+export function generateAgentDraft(
+  instincts: Instinct[],
+  options?: AgentGeneratorOptions,
+): LearnedAgentDraft {
+  if (instincts.length === 0) {
+    throw new Error('Cannot generate an agent draft without instincts')
+  }
+
+  const scope = options?.scope ?? instincts[0]?.scope ?? 'project'
+  const rawName = options?.name ?? buildAgentName(instincts)
+  const name = normalizeSkillName(rawName)
+  const confidence = averageConfidence(instincts)
+  const description = options?.description ?? buildDescription(instincts)
+  const outputPath = getLearnedAgentPath(name, scope, options)
+  const content = buildAgentContent({
+    name,
+    description,
+    confidence,
+    instincts,
+  })
+
+  return {
+    name,
+    description,
+    scope,
+    sourceInstinctIds: instincts.map(instinct => instinct.id),
+    confidence: Number(confidence.toFixed(2)),
+    content,
+    outputPath,
+  }
+}
+
+export async function writeLearnedAgent(
+  draft: LearnedAgentDraft,
+): Promise<string> {
+  await mkdir(draft.outputPath, { recursive: true })
+  const filePath = join(draft.outputPath, `${draft.name}.md`)
+  if (existsSync(filePath)) return filePath
+  await writeFile(filePath, draft.content, 'utf8')
+  clearCommandsCache()
+  return filePath
+}
+
+export function getLearnedAgentPath(
+  _name: string,
+  scope: SkillLearningScope,
+  options?: AgentGeneratorOptions,
+): string {
+  if (options?.outputRoot) return options.outputRoot
+  if (scope === 'project') {
+    return join(options?.cwd ?? process.cwd(), '.claude', 'agents')
+  }
+  return options?.globalAgentsDir ?? join(getClaudeConfigHomeDir(), 'agents')
+}
+
+function buildAgentName(instincts: Instinct[]): string {
+  const words = extractWords(instincts, 4)
+  const name = ['learned', 'agent', ...words].join('-')
+  return normalizeSkillName(name) || 'learned-agent'
+}
+
+function buildDescription(instincts: Instinct[]): string {
+  const trigger = instincts[0]?.trigger ?? 'Run the learned multi-step workflow'
+  return trigger.replace(/\s+/g, ' ').slice(0, 120)
+}
+
+function buildAgentContent(params: {
+  name: string
+  description: string
+  confidence: number
+  instincts: Instinct[]
+}): string {
+  const { name, description, confidence, instincts } = params
+  return [
+    '---',
+    `name: ${name}`,
+    `description: ${JSON.stringify(description)}`,
+    'origin: skill-learning',
+    `confidence: ${Number(confidence.toFixed(2))}`,
+    `evolved_from: [${instincts.map(instinct => JSON.stringify(instinct.id)).join(', ')}]`,
+    '---',
+    '',
+    `You are the ${name} learned agent.`,
+    '',
+    '## Triggers',
+    '',
+    instincts.map(instinct => `- ${instinct.trigger}`).join('\n'),
+    '',
+    '## Playbook',
+    '',
+    instincts.map(instinct => `- ${instinct.action}`).join('\n'),
+    '',
+    '## Evidence',
+    '',
+    instincts
+      .flatMap(instinct => instinct.evidence.map(evidence => `- ${evidence}`))
+      .join('\n'),
+    '',
+  ].join('\n')
+}
+
+function averageConfidence(instincts: Instinct[]): number {
+  return (
+    instincts.reduce((sum, instinct) => sum + instinct.confidence, 0) /
+    instincts.length
+  )
+}
+
+function extractWords(instincts: Instinct[], max: number): string[] {
+  const stopWords = new Set([
+    'when',
+    'with',
+    'this',
+    'that',
+    'user',
+    'asks',
+    'for',
+    'the',
+    'and',
+    'debug',
+    'investigate',
+    'research',
+  ])
+  const words: string[] = []
+  for (const instinct of instincts) {
+    for (const token of `${instinct.trigger} ${instinct.action}`
+      .toLowerCase()
+      .split(/[^a-z0-9]+/)) {
+      if (token.length > 2 && !stopWords.has(token) && !words.includes(token)) {
+        words.push(token)
+      }
+      if (words.length >= max) return words
+    }
+  }
+  return words
+}
--- a/src/services/skillLearning/commandGenerator.ts
+++ b/src/services/skillLearning/commandGenerator.ts
@@ -0,0 +1,167 @@
+import { mkdir, writeFile } from 'node:fs/promises'
+import { existsSync } from 'node:fs'
+import { join } from 'node:path'
+import { getClaudeConfigHomeDir } from '../../utils/envUtils.js'
+import { clearCommandsCache } from '../../commands.js'
+import type { Instinct } from './instinctParser.js'
+import { normalizeSkillName } from './learningPolicy.js'
+import type { SkillLearningScope } from './types.js'
+
+export type CommandGeneratorOptions = {
+  cwd?: string
+  globalCommandsDir?: string
+  outputRoot?: string
+  name?: string
+  description?: string
+  scope?: SkillLearningScope
+}
+
+export type LearnedCommandDraft = {
+  name: string
+  description: string
+  scope: SkillLearningScope
+  sourceInstinctIds: string[]
+  confidence: number
+  content: string
+  outputPath: string
+}
+
+export function generateCommandDraft(
+  instincts: Instinct[],
+  options?: CommandGeneratorOptions,
+): LearnedCommandDraft {
+  if (instincts.length === 0) {
+    throw new Error('Cannot generate a command draft without instincts')
+  }
+
+  const scope = options?.scope ?? instincts[0]?.scope ?? 'project'
+  const rawName = options?.name ?? buildCommandName(instincts)
+  const name = normalizeSkillName(rawName)
+  const confidence = averageConfidence(instincts)
+  const description = options?.description ?? buildDescription(instincts)
+  const outputPath = getLearnedCommandPath(name, scope, options)
+  const content = buildCommandContent({
+    name,
+    description,
+    confidence,
+    instincts,
+  })
+
+  return {
+    name,
+    description,
+    scope,
+    sourceInstinctIds: instincts.map(instinct => instinct.id),
+    confidence: Number(confidence.toFixed(2)),
+    content,
+    outputPath,
+  }
+}
+
+export async function writeLearnedCommand(
+  draft: LearnedCommandDraft,
+): Promise<string> {
+  await mkdir(draft.outputPath, { recursive: true })
+  const filePath = join(draft.outputPath, `${draft.name}.md`)
+  if (existsSync(filePath)) return filePath
+  await writeFile(filePath, draft.content, 'utf8')
+  clearCommandsCache()
+  return filePath
+}
+
+export function getLearnedCommandPath(
+  _name: string,
+  scope: SkillLearningScope,
+  options?: CommandGeneratorOptions,
+): string {
+  if (options?.outputRoot) return options.outputRoot
+  if (scope === 'project') {
+    return join(options?.cwd ?? process.cwd(), '.claude', 'commands')
+  }
+  return (
+    options?.globalCommandsDir ?? join(getClaudeConfigHomeDir(), 'commands')
+  )
+}
+
+function buildCommandName(instincts: Instinct[]): string {
+  const words = extractWords(instincts, 4)
+  const name = ['learned', ...words].join('-')
+  return normalizeSkillName(name) || 'learned-command'
+}
+
+function buildDescription(instincts: Instinct[]): string {
+  const trigger = instincts[0]?.trigger ?? 'Reuse the learned workflow'
+  return trigger.replace(/\s+/g, ' ').slice(0, 120)
+}
+
+function buildCommandContent(params: {
+  name: string
+  description: string
+  confidence: number
+  instincts: Instinct[]
+}): string {
+  const { name, description, confidence, instincts } = params
+  return [
+    '---',
+    `name: ${name}`,
+    `description: ${JSON.stringify(description)}`,
+    'origin: skill-learning',
+    `confidence: ${Number(confidence.toFixed(2))}`,
+    `evolved_from: [${instincts.map(instinct => JSON.stringify(instinct.id)).join(', ')}]`,
+    '---',
+    '',
+    `# /${name}`,
+    '',
+    '## When to use',
+    '',
+    instincts.map(instinct => `- ${instinct.trigger}`).join('\n'),
+    '',
+    '## Steps',
+    '',
+    instincts.map(instinct => `- ${instinct.action}`).join('\n'),
+    '',
+    '## Evidence',
+    '',
+    instincts
+      .flatMap(instinct => instinct.evidence.map(evidence => `- ${evidence}`))
+      .join('\n'),
+    '',
+  ].join('\n')
+}
+
+function averageConfidence(instincts: Instinct[]): number {
+  return (
+    instincts.reduce((sum, instinct) => sum + instinct.confidence, 0) /
+    instincts.length
+  )
+}
+
+function extractWords(instincts: Instinct[], max: number): string[] {
+  const stopWords = new Set([
+    'when',
+    'with',
+    'this',
+    'that',
+    'user',
+    'asks',
+    'for',
+    'the',
+    'and',
+    'run',
+    'use',
+    'prefer',
+    'avoid',
+  ])
+  const words: string[] = []
+  for (const instinct of instincts) {
+    for (const token of `${instinct.trigger} ${instinct.action}`
+      .toLowerCase()
+      .split(/[^a-z0-9]+/)) {
+      if (token.length > 2 && !stopWords.has(token) && !words.includes(token)) {
+        words.push(token)
+      }
+      if (words.length >= max) return words
+    }
+  }
+  return words
+}
--- a/src/services/skillLearning/config.ts
+++ b/src/services/skillLearning/config.ts
@@ -0,0 +1,52 @@
+export type SkillLearningLlmConfig = {
+  readonly timeoutMs: number
+  readonly maxCallsPerSession: number
+  readonly cooldownMs: number
+  readonly failureThreshold: number
+  readonly circuitCooldownMs: number
+}
+
+export type SkillLearningConfig = {
+  readonly minConfidence: number
+  readonly minClusterSize: number
+  readonly llm: SkillLearningLlmConfig
+}
+
+export type SkillLearningConfigOverrides = {
+  minConfidence?: number
+  minClusterSize?: number
+  llm?: Partial<SkillLearningLlmConfig>
+}
+
+const DEFAULTS: SkillLearningConfig = {
+  minConfidence: 0.75,
+  minClusterSize: 3,
+  llm: {
+    timeoutMs: 10_000,
+    maxCallsPerSession: 20,
+    cooldownMs: 30_000,
+    failureThreshold: 3,
+    circuitCooldownMs: 60_000,
+  },
+}
+
+let overrides: SkillLearningConfigOverrides | undefined
+
+export function getSkillLearningConfig(): SkillLearningConfig {
+  if (!overrides) return DEFAULTS
+  return {
+    minConfidence: overrides.minConfidence ?? DEFAULTS.minConfidence,
+    minClusterSize: overrides.minClusterSize ?? DEFAULTS.minClusterSize,
+    llm: { ...DEFAULTS.llm, ...overrides.llm },
+  }
+}
+
+export function setSkillLearningConfigForTest(
+  config: SkillLearningConfigOverrides,
+): void {
+  overrides = config
+}
+
+export function resetSkillLearningConfig(): void {
+  overrides = undefined
+}
--- a/src/services/skillLearning/evolution.ts
+++ b/src/services/skillLearning/evolution.ts
@@ -0,0 +1,174 @@
+import type { Instinct } from './instinctParser.js'
+import { shouldGenerateSkillFromInstincts } from './learningPolicy.js'
+import {
+  generateSkillDraft,
+  type SkillGeneratorOptions,
+} from './skillGenerator.js'
+import {
+  generateCommandDraft,
+  type CommandGeneratorOptions,
+  type LearnedCommandDraft,
+} from './commandGenerator.js'
+import {
+  generateAgentDraft,
+  type AgentGeneratorOptions,
+  type LearnedAgentDraft,
+} from './agentGenerator.js'
+import { getSkillLearningConfig } from './config.js'
+import type { LearnedSkillDraft } from './types.js'
+
+export type EvolutionCandidate = {
+  target: 'skill' | 'command' | 'agent'
+  trigger: string
+  domain: string
+  instincts: Instinct[]
+  averageConfidence: number
+}
+
+export type LearnedArtifactDraft =
+  | { kind: 'skill'; draft: LearnedSkillDraft }
+  | { kind: 'command'; draft: LearnedCommandDraft }
+  | { kind: 'agent'; draft: LearnedAgentDraft }
+
+export function clusterInstincts(instincts: Instinct[]): EvolutionCandidate[] {
+  const groups = new Map<string, Instinct[]>()
+  for (const instinct of instincts) {
+    if (instinct.status !== 'active' && instinct.status !== 'pending') continue
+    const key = `${instinct.domain}:${normalizedTrigger(instinct.trigger)}`
+    const group = groups.get(key) ?? []
+    group.push(instinct)
+    groups.set(key, group)
+  }
+
+  return Array.from(groups.values())
+    .filter(group => {
+      // Require the cluster-size floor unconditionally. Single-shot
+      // high-confidence instincts previously bypassed this via the
+      // `|| confidence >= 0.8` OR, which let one message become a
+      // persistent policy — exactly the H15 risk the threshold guards
+      // against. Repeated independent observation is non-negotiable.
+      return group.length >= getSkillLearningConfig().minClusterSize
+    })
+    .map(group => {
+      const averageConfidence =
+        group.reduce((sum, instinct) => sum + instinct.confidence, 0) /
+        group.length
+      return {
+        target: classifyEvolutionTarget(group),
+        trigger: group[0]?.trigger ?? 'learned pattern',
+        domain: group[0]?.domain ?? 'project',
+        instincts: group,
+        averageConfidence: Number(averageConfidence.toFixed(2)),
+      }
+    })
+    .sort((a, b) => b.averageConfidence - a.averageConfidence)
+}
+
+export function classifyEvolutionTarget(
+  instinctsOrCandidate: Instinct[] | EvolutionCandidate,
+): 'skill' | 'command' | 'agent' {
+  const instincts = Array.isArray(instinctsOrCandidate)
+    ? instinctsOrCandidate
+    : instinctsOrCandidate.instincts
+  const text = instincts
+    .map(i => `${i.trigger} ${i.action}`)
+    .join(' ')
+    .toLowerCase()
+  if (/user asks|explicitly request|command|run /.test(text)) return 'command'
+  if (
+    instincts.length >= 4 &&
+    /(debug|investigate|research|multi-step)/.test(text)
+  ) {
+    return 'agent'
+  }
+  return 'skill'
+}
+
+export function suggestEvolutions(instincts: Instinct[]): EvolutionCandidate[] {
+  return clusterInstincts(instincts)
+}
+
+export function generateSkillCandidates(
+  instincts: Instinct[],
+  options?: SkillGeneratorOptions,
+): LearnedSkillDraft[] {
+  return clusterInstincts(instincts)
+    .filter(
+      candidate =>
+        candidate.target === 'skill' &&
+        shouldGenerateSkillFromInstincts(candidate.instincts),
+    )
+    .map(candidate =>
+      generateSkillDraft(candidate.instincts, {
+        ...options,
+        scope: candidate.instincts[0]?.scope ?? 'project',
+      }),
+    )
+}
+
+export function generateCommandCandidates(
+  instincts: Instinct[],
+  options?: CommandGeneratorOptions,
+): LearnedCommandDraft[] {
+  return clusterInstincts(instincts)
+    .filter(
+      candidate =>
+        candidate.target === 'command' &&
+        shouldGenerateSkillFromInstincts(candidate.instincts),
+    )
+    .map(candidate =>
+      generateCommandDraft(candidate.instincts, {
+        ...options,
+        scope: candidate.instincts[0]?.scope ?? 'project',
+      }),
+    )
+}
+
+export function generateAgentCandidates(
+  instincts: Instinct[],
+  options?: AgentGeneratorOptions,
+): LearnedAgentDraft[] {
+  return clusterInstincts(instincts)
+    .filter(
+      candidate =>
+        candidate.target === 'agent' &&
+        shouldGenerateSkillFromInstincts(candidate.instincts),
+    )
+    .map(candidate =>
+      generateAgentDraft(candidate.instincts, {
+        ...options,
+        scope: candidate.instincts[0]?.scope ?? 'project',
+      }),
+    )
+}
+
+export function generateAllCandidates(
+  instincts: Instinct[],
+  options?: {
+    skill?: SkillGeneratorOptions
+    command?: CommandGeneratorOptions
+    agent?: AgentGeneratorOptions
+  },
+): LearnedArtifactDraft[] {
+  return [
+    ...generateSkillCandidates(instincts, options?.skill).map(
+      (draft): LearnedArtifactDraft => ({ kind: 'skill', draft }),
+    ),
+    ...generateCommandCandidates(instincts, options?.command).map(
+      (draft): LearnedArtifactDraft => ({ kind: 'command', draft }),
+    ),
+    ...generateAgentCandidates(instincts, options?.agent).map(
+      (draft): LearnedArtifactDraft => ({ kind: 'agent', draft }),
+    ),
+  ]
+}
+
+function normalizedTrigger(trigger: string): string {
+  return trigger
+    .toLowerCase()
+    .replace(/[^a-z0-9]+/g, ' ')
+    .split(/\s+/)
+    .filter(Boolean)
+    .slice(0, 6)
+    .join(' ')
+}
--- a/src/services/skillLearning/featureCheck.ts
+++ b/src/services/skillLearning/featureCheck.ts
@@ -0,0 +1,12 @@
+import { feature } from 'bun:bundle'
+
+export function isSkillLearningEnabled(): boolean {
+  if (process.env.SKILL_LEARNING_ENABLED === '0') return false
+  if (process.env.SKILL_LEARNING_ENABLED === '1') return true
+  if (process.env.FEATURE_SKILL_LEARNING === '0') return false
+  if (process.env.FEATURE_SKILL_LEARNING === '1') return true
+  if (feature('SKILL_LEARNING')) {
+    return true
+  }
+  return false
+}
--- a/src/services/skillLearning/index.ts
+++ b/src/services/skillLearning/index.ts
@@ -0,0 +1,37 @@
+export * from './featureCheck.js'
+export * from './evolution.js'
+export {
+  createInstinct,
+  parseInstinct,
+  serializeInstinct,
+} from './instinctParser.js'
+export * from './learningPolicy.js'
+export {
+  exportInstincts,
+  importInstincts,
+  loadInstincts,
+  prunePendingInstincts,
+  saveInstinct,
+  updateConfidence,
+  upsertInstinct,
+} from './instinctStore.js'
+export {
+  appendObservation,
+  ingestTranscript,
+  readObservations,
+  scrubObservation,
+  scrubText,
+} from './observationStore.js'
+export * from './promotion.js'
+export * from './projectContext.js'
+export * from './runtimeObserver.js'
+export * from './observerBackend.js'
+export { llmObserverBackend } from './llmObserverBackend.js'
+export * from './commandGenerator.js'
+export * from './agentGenerator.js'
+export * from './toolEventObserver.js'
+export * from './sessionObserver.js'
+export * from './skillGapStore.js'
+export * from './skillGenerator.js'
+export * from './skillLifecycle.js'
+export * from './types.js'
--- a/src/services/skillLearning/instinctParser.ts
+++ b/src/services/skillLearning/instinctParser.ts
@@ -0,0 +1,115 @@
+import { createHash } from 'node:crypto'
+import type {
+  SkillLearningProjectContext,
+  SkillLearningScope,
+  StoredSkillObservation,
+} from './observationStore.js'
+import type { Instinct as BaseInstinct, InstinctStatus } from './types.js'
+
+export type { Instinct } from './types.js'
+
+export type StoredInstinct = BaseInstinct & {
+  observationIds?: string[]
+}
+
+export type InstinctCandidate = Omit<
+  StoredInstinct,
+  'id' | 'createdAt' | 'updatedAt' | 'status'
+> & {
+  id?: string
+  status?: InstinctStatus
+}
+
+export function createInstinct(
+  candidate: InstinctCandidate,
+  now = new Date().toISOString(),
+): StoredInstinct {
+  return normalizeInstinct({
+    id:
+      candidate.id ??
+      buildInstinctId(candidate.trigger, candidate.action, candidate.scope),
+    ...candidate,
+    createdAt: now,
+    updatedAt: now,
+    status: candidate.status ?? 'pending',
+  })
+}
+
+export function normalizeInstinct(instinct: StoredInstinct): StoredInstinct {
+  return {
+    ...instinct,
+    id: instinct.id || buildInstinctId(instinct.trigger, instinct.action),
+    confidence: clampConfidence(instinct.confidence),
+    evidence: Array.from(new Set(instinct.evidence.filter(Boolean))),
+    evidenceOutcome: instinct.evidenceOutcome,
+    observationIds: instinct.observationIds
+      ? Array.from(new Set(instinct.observationIds))
+      : undefined,
+  }
+}
+
+export function serializeInstinct(instinct: StoredInstinct): string {
+  return `${JSON.stringify(normalizeInstinct(instinct), null, 2)}\n`
+}
+
+export function parseInstinct(content: string): StoredInstinct {
+  return normalizeInstinct(JSON.parse(content) as StoredInstinct)
+}
+
+export function buildInstinctId(
+  trigger: string,
+  action: string,
+  scope: SkillLearningScope = 'project',
+): string {
+  const slug = `${trigger} ${action}`
+    .toLowerCase()
+    .replace(/[^a-z0-9]+/g, '-')
+    .replace(/^-|-$/g, '')
+    .slice(0, 48)
+  const hash = createHash('sha1')
+    .update(`${scope}\n${trigger}\n${action}`)
+    .digest('hex')
+    .slice(0, 10)
+  return `${slug || 'instinct'}-${hash}`
+}
+
+export function candidateFromObservation(
+  observation: StoredSkillObservation,
+  project?: SkillLearningProjectContext,
+): Partial<InstinctCandidate> {
+  return {
+    scope: project?.scope ?? 'project',
+    projectId: project?.projectId ?? observation.projectId,
+    projectName: project?.projectName ?? observation.projectName,
+    source: 'session-observation',
+    evidence: [
+      observation.messageText ??
+        observation.toolOutput ??
+        observation.toolInput ??
+        observation.toolName ??
+        observation.id,
+    ],
+    observationIds: [observation.id],
+  }
+}
+
+export function isContradictingInstinct(
+  existing: StoredInstinct,
+  incoming: StoredInstinct,
+): boolean {
+  const existingTrigger = existing.trigger.toLowerCase()
+  const incomingTrigger = incoming.trigger.toLowerCase()
+  if (existingTrigger !== incomingTrigger) return false
+
+  const existingAction = existing.action.toLowerCase()
+  const incomingAction = incoming.action.toLowerCase()
+  return (
+    existingAction.includes('avoid') !== incomingAction.includes('avoid') ||
+    existingAction.includes('prefer') !== incomingAction.includes('prefer')
+  )
+}
+
+export function clampConfidence(confidence: number): number {
+  if (Number.isNaN(confidence)) return 0
+  return Math.max(0, Math.min(1, Number(confidence.toFixed(2))))
+}
--- a/src/services/skillLearning/instinctStore.ts
+++ b/src/services/skillLearning/instinctStore.ts
@@ -0,0 +1,258 @@
+import {
+  mkdir,
+  readFile,
+  readdir,
+  rename,
+  unlink,
+  writeFile,
+} from 'node:fs/promises'
+import { randomBytes } from 'node:crypto'
+import { dirname, join } from 'node:path'
+import {
+  getSkillLearningRoot,
+  type ObservationStoreOptions,
+  type SkillLearningProjectContext,
+  type SkillLearningScope,
+} from './observationStore.js'
+import {
+  clampConfidence,
+  isContradictingInstinct,
+  normalizeInstinct,
+  parseInstinct,
+  serializeInstinct,
+  type StoredInstinct,
+} from './instinctParser.js'
+
+let upsertQueue: Promise<unknown> = Promise.resolve()
+
+export type InstinctStoreOptions = ObservationStoreOptions & {
+  project?: SkillLearningProjectContext
+  scope?: SkillLearningScope
+}
+
+export function getInstinctsDir(options?: InstinctStoreOptions): string {
+  const root = getSkillLearningRoot(options)
+  const project = options?.project
+  const scope = options?.scope ?? project?.scope ?? 'project'
+
+  if (scope === 'global' || !project || project.projectId === 'global') {
+    return join(root, 'global', 'instincts', 'personal')
+  }
+  return join(root, 'projects', project.projectId, 'instincts', 'personal')
+}
+
+export async function saveInstinct(
+  instinct: StoredInstinct,
+  options?: InstinctStoreOptions,
+): Promise<StoredInstinct> {
+  const normalized = normalizeInstinct(instinct)
+  const dir = getInstinctsDir(options)
+  await mkdir(dir, { recursive: true })
+  const target = instinctPath(normalized.id, options)
+  const tmp = `${target}.${randomBytes(6).toString('hex')}.tmp`
+  await writeFile(tmp, serializeInstinct(normalized))
+  await rename(tmp, target)
+  return normalized
+}
+
+export async function loadInstincts(
+  options?: InstinctStoreOptions,
+): Promise<StoredInstinct[]> {
+  const dir = getInstinctsDir(options)
+  let files: string[] = []
+  try {
+    files = await readdir(dir)
+  } catch (error) {
+    if ((error as NodeJS.ErrnoException).code === 'ENOENT') return []
+    throw error
+  }
+
+  const instincts: StoredInstinct[] = []
+  for (const file of files.filter(file => file.endsWith('.json'))) {
+    const content = await readFile(join(dir, file), 'utf8')
+    instincts.push(parseInstinct(content))
+  }
+
+  return instincts.sort((a, b) => a.id.localeCompare(b.id))
+}
+
+export function upsertInstinct(
+  incoming: StoredInstinct,
+  options?: InstinctStoreOptions,
+): Promise<StoredInstinct> {
+  const result = upsertQueue.then(() => doUpsertInstinct(incoming, options))
+  upsertQueue = result.catch(() => {})
+  return result
+}
+
+async function doUpsertInstinct(
+  incoming: StoredInstinct,
+  options?: InstinctStoreOptions,
+): Promise<StoredInstinct> {
+  const existing = await loadInstincts(options)
+  // Match by ID first; fall back to (same trigger + contradicting action) so
+  // that a contradictory instinct with a slightly different ID (differing
+  // action/scope) still merges and can drive the conflict-hold transition
+  // instead of silently accumulating as a separate record.
+  const match =
+    existing.find(instinct => instinct.id === incoming.id) ??
+    existing.find(
+      instinct =>
+        instinct.trigger.toLowerCase() === incoming.trigger.toLowerCase() &&
+        isContradictingInstinct(instinct, incoming),
+    )
+  const now = new Date().toISOString()
+
+  if (!match) return saveInstinct(incoming, options)
+
+  const contradiction = isContradictingInstinct(match, incoming)
+  const confidenceDelta = contradiction
+    ? -0.1
+    : outcomeConfidenceDelta(incoming.evidenceOutcome)
+  const nextConfidence = clampConfidence(match.confidence + confidenceDelta)
+  const nextStatus = resolveNextStatus(
+    match.status,
+    nextConfidence,
+    contradiction,
+  )
+  const merged = normalizeInstinct({
+    ...match,
+    confidence: nextConfidence,
+    evidence: [...match.evidence, ...incoming.evidence],
+    evidenceOutcome: incoming.evidenceOutcome ?? match.evidenceOutcome,
+    observationIds: [
+      ...(match.observationIds ?? []),
+      ...(incoming.observationIds ?? []),
+    ],
+    updatedAt: now,
+    status: nextStatus,
+  })
+
+  return saveInstinct(merged, options)
+}
+
+function resolveNextStatus(
+  current: StoredInstinct['status'],
+  nextConfidence: number,
+  contradiction: boolean,
+): StoredInstinct['status'] {
+  if (contradiction && nextConfidence < 0.3) return 'conflict-hold'
+  if (current === 'conflict-hold' && nextConfidence >= 0.5) return 'active'
+  if (current === 'pending' && nextConfidence >= 0.8) return 'active'
+  return current
+}
+
+const DECAY_PER_WEEK = 0.02
+const MS_PER_WEEK = 7 * 24 * 60 * 60 * 1000
+
+/**
+ * Apply time-based confidence decay to all instincts (ECC parity: -0.02/week).
+ * Only decays `pending` and `active` instincts; terminal states
+ * (stale/superseded/retired/archived/conflict-hold) do not decay.
+ */
+export async function decayInstinctConfidence(
+  options?: InstinctStoreOptions,
+): Promise<number> {
+  const instincts = await loadInstincts(options)
+  const now = Date.now()
+  let decayed = 0
+
+  for (const instinct of instincts) {
+    if (instinct.status !== 'pending' && instinct.status !== 'active') continue
+    const updatedAtMs = Date.parse(instinct.updatedAt)
+    if (Number.isNaN(updatedAtMs)) continue
+    const weeksElapsed = Math.floor((now - updatedAtMs) / MS_PER_WEEK)
+    if (weeksElapsed < 1) continue
+
+    const delta = -DECAY_PER_WEEK * weeksElapsed
+    const nextConfidence = clampConfidence(instinct.confidence + delta)
+    if (nextConfidence === instinct.confidence) continue
+
+    // Bump updatedAt so subsequent maintenance runs don't re-apply the same
+    // elapsed-week delta.
+    await saveInstinct(
+      normalizeInstinct({
+        ...instinct,
+        confidence: nextConfidence,
+        updatedAt: new Date(now).toISOString(),
+      }),
+      options,
+    )
+    decayed += 1
+  }
+
+  return decayed
+}
+
+function outcomeConfidenceDelta(
+  outcome: StoredInstinct['evidenceOutcome'],
+): number {
+  if (outcome === 'failure') return -0.05
+  return 0.05
+}
+
+export async function updateConfidence(
+  instinctId: string,
+  delta: number,
+  options?: InstinctStoreOptions,
+): Promise<StoredInstinct | null> {
+  const instincts = await loadInstincts(options)
+  const target = instincts.find(instinct => instinct.id === instinctId)
+  if (!target) return null
+
+  const updated = normalizeInstinct({
+    ...target,
+    confidence: clampConfidence(target.confidence + delta),
+    updatedAt: new Date().toISOString(),
+  })
+  return saveInstinct(updated, options)
+}
+
+export async function exportInstincts(
+  outputPath: string,
+  options?: InstinctStoreOptions,
+): Promise<StoredInstinct[]> {
+  const instincts = await loadInstincts(options)
+  await mkdir(dirname(outputPath), { recursive: true })
+  await writeFile(outputPath, `${JSON.stringify(instincts, null, 2)}\n`)
+  return instincts
+}
+
+export async function importInstincts(
+  inputPath: string,
+  options?: InstinctStoreOptions,
+): Promise<StoredInstinct[]> {
+  const parsed = JSON.parse(
+    await readFile(inputPath, 'utf8'),
+  ) as StoredInstinct[]
+  const saved: StoredInstinct[] = []
+  for (const instinct of parsed) {
+    saved.push(await upsertInstinct(normalizeInstinct(instinct), options))
+  }
+  return saved
+}
+
+export async function prunePendingInstincts(
+  maxAgeDays: number,
+  options?: InstinctStoreOptions,
+): Promise<StoredInstinct[]> {
+  const instincts = await loadInstincts(options)
+  const cutoff = Date.now() - maxAgeDays * 24 * 60 * 60 * 1000
+  const pruned: StoredInstinct[] = []
+
+  for (const instinct of instincts) {
+    if (
+      instinct.status === 'pending' &&
+      Date.parse(instinct.updatedAt) < cutoff
+    ) {
+      await unlink(instinctPath(instinct.id, options))
+      pruned.push(instinct)
+    }
+  }
+
+  return pruned
+}
+
+function instinctPath(id: string, options?: InstinctStoreOptions): string {
+  return join(getInstinctsDir(options), `${id}.json`)
+}
--- a/src/services/skillLearning/learningPolicy.ts
+++ b/src/services/skillLearning/learningPolicy.ts
@@ -0,0 +1,106 @@
+import { getSkillLearningConfig } from './config.js'
+import type { Instinct } from './instinctParser.js'
+import type { InstinctDomain, SkillLearningScope } from './types.js'
+
+export const MIN_CONFIDENCE_TO_GENERATE_SKILL = 0.75
+export const MAX_SKILL_NAME_LENGTH = 64
+
+const DOMAIN_PREFIXES: Record<InstinctDomain, string> = {
+  workflow: 'workflow',
+  testing: 'testing',
+  debugging: 'debugging',
+  'code-style': 'style',
+  security: 'security',
+  git: 'git',
+  project: 'project',
+}
+
+const GENERIC_NAMES = new Set([
+  'learned-skill',
+  'better-skill',
+  'new-skill',
+  'project-skill',
+  'workflow-skill',
+])
+
+export function shouldGenerateSkillFromInstincts(
+  instincts: readonly Instinct[],
+): boolean {
+  if (instincts.length === 0) return false
+  const avg =
+    instincts.reduce((sum, instinct) => sum + instinct.confidence, 0) /
+    instincts.length
+  return avg >= getSkillLearningConfig().minConfidence
+}
+
+export function buildLearnedSkillName(instincts: readonly Instinct[]): string {
+  const domain = instincts[0]?.domain ?? 'project'
+  const prefix = DOMAIN_PREFIXES[domain]
+  const words = new Set<string>()
+  for (const instinct of instincts) {
+    for (const word of `${instinct.trigger} ${instinct.action}`
+      .toLowerCase()
+      .split(/[^a-z0-9]+/)) {
+      if (isUsefulNameWord(word)) words.add(word)
+      if (words.size >= 5) break
+    }
+    if (words.size >= 5) break
+  }
+
+  const name = normalizeSkillName([prefix, ...words].join('-'))
+  return isGenericSkillName(name) ? `${prefix}-learned-pattern` : name
+}
+
+export function normalizeSkillName(value: string): string {
+  const normalized = value
+    .toLowerCase()
+    .replace(/[^a-z0-9]+/g, '-')
+    .replace(/^-|-$/g, '')
+    .slice(0, MAX_SKILL_NAME_LENGTH)
+    .replace(/-$/g, '')
+  return normalized || 'learned-skill'
+}
+
+export function isValidLearnedSkillName(value: string): boolean {
+  return (
+    value === normalizeSkillName(value) &&
+    value.length > 0 &&
+    value.length <= MAX_SKILL_NAME_LENGTH &&
+    !isGenericSkillName(value)
+  )
+}
+
+export function isGenericSkillName(value: string): boolean {
+  return GENERIC_NAMES.has(value)
+}
+
+export function decideDefaultScope(
+  instincts: readonly Instinct[],
+): SkillLearningScope {
+  if (instincts.length === 0) return 'project'
+  const globalFriendly = instincts.every(instinct =>
+    ['security', 'git', 'workflow'].includes(instinct.domain),
+  )
+  return globalFriendly && instincts.length >= 2 ? 'global' : 'project'
+}
+
+function isUsefulNameWord(word: string): boolean {
+  return (
+    word.length > 2 &&
+    ![
+      'when',
+      'with',
+      'this',
+      'that',
+      'user',
+      'project',
+      'prefer',
+      'avoid',
+      'use',
+      'using',
+      'the',
+      'and',
+      'for',
+    ].includes(word)
+  )
+}
--- a/src/services/skillLearning/llmObserverBackend.ts
+++ b/src/services/skillLearning/llmObserverBackend.ts
@@ -0,0 +1,301 @@
+import { queryHaiku } from '../api/claude.js'
+import { asSystemPrompt } from '../../utils/systemPromptType.js'
+import { getSkillLearningConfig } from './config.js'
+import type { InstinctCandidate } from './instinctParser.js'
+import type { StoredSkillObservation } from './observationStore.js'
+import type {
+  ObserverBackend,
+  ObserverBackendContext,
+} from './observerBackend.js'
+import {
+  INSTINCT_DOMAINS,
+  type InstinctDomain,
+  type SkillLearningScope,
+} from './types.js'
+
+/**
+ * LLM-based observer backend.
+ *
+ * Runs the small fast model (Haiku) through the project's `queryHaiku`
+ * helper, feeds it a compact summary of recent observations, and asks for
+ * up to three atomic reusable instincts in JSON. Output is validated and
+ * mapped to `InstinctCandidate[]` so the existing evolution pipeline
+ * consumes LLM output the same way it consumes heuristic output.
+ *
+ * Design notes:
+ * - Reuses `queryHaiku` (goes through the full Claude Code API stack:
+ *   OAuth, beta headers, providers, VCR in tests). No new auth code.
+ * - Caps input to the tail of the observation buffer so the prompt stays
+ *   small and predictable, and runs under a 10-second abort signal so a
+ *   slow Haiku round-trip never blocks the REPL turn end.
+ * - On ANY failure (abort, parse error, empty output) returns `[]` —
+ *   the backend is opt-in via `SKILL_LEARNING_OBSERVER_BACKEND=llm` and
+ *   must never destabilise skill-learning when the API is unavailable.
+ */
+
+const MAX_OBSERVATIONS_PER_CALL = 30
+const MAX_CANDIDATES_PER_CALL = 3
+
+// --- Circuit breaker state ---
+let consecutiveFailures = 0
+let circuitOpenUntil = 0
+
+export function resetCircuitBreaker(): void {
+  consecutiveFailures = 0
+  circuitOpenUntil = 0
+}
+
+const LLM_OBSERVER_SYSTEM_PROMPT = `You analyse a short sequence of observations from a coding-assistant session (user messages, tool invocations with outcomes, assistant messages) and extract atomic, reusable "instincts" — behavioural patterns that would help the assistant act correctly in future similar situations.
+
+Respond with ONLY a JSON array (no prose, no code fences, no commentary). Each item must match this schema:
+
+{
+  "trigger": string,        // <= 80 chars, short phrase describing WHEN the instinct applies
+  "action": string,         // <= 120 chars, short phrase describing WHAT to do
+  "confidence": number,     // 0..1 — how strongly these observations support the pattern
+  "domain": "workflow"|"testing"|"debugging"|"code-style"|"security"|"git"|"project",
+  "scope": "project"|"global",
+  "evidence": string[]      // 1..3 short excerpts copied/paraphrased from the observations
+}
+
+Rules:
+- Return [] if nothing clearly reusable. No guessing.
+- At most 3 items, highest confidence first.
+- confidence > 0.7 only when observations show the pattern in action (a correction followed by a successful retry, a repeated sequence, an explicit rule).
+- Never include secrets, tokens, full file contents, or personally-identifying data.
+- Scope "global" only when the pattern is obviously project-agnostic (generic testing, git hygiene); default to "project".`
+
+export const llmObserverBackend: ObserverBackend = {
+  name: 'llm',
+  analyze(
+    observations: StoredSkillObservation[],
+    ctx?: ObserverBackendContext,
+  ): Promise<InstinctCandidate[]> {
+    return analyseWithHaiku(observations, ctx)
+  },
+}
+
+async function analyseWithHaiku(
+  observations: StoredSkillObservation[],
+  ctx?: ObserverBackendContext,
+): Promise<InstinctCandidate[]> {
+  if (observations.length === 0) return []
+
+  // Circuit breaker: if the circuit is open, skip queryHaiku entirely.
+  if (Date.now() < circuitOpenUntil) {
+    return runHeuristicFallback(observations, ctx)
+  }
+
+  const capped = observations.slice(-MAX_OBSERVATIONS_PER_CALL)
+  const userPrompt = buildUserPrompt(capped)
+  const signal = makeTimeoutSignal(getSkillLearningConfig().llm.timeoutMs)
+
+  let responseText: string
+  try {
+    const response = await queryHaiku({
+      systemPrompt: asSystemPrompt([LLM_OBSERVER_SYSTEM_PROMPT]),
+      userPrompt,
+      signal,
+      options: {
+        querySource: 'skill_learning_observer',
+        enablePromptCaching: true,
+        agents: [],
+        isNonInteractiveSession: true,
+        hasAppendSystemPrompt: false,
+        mcpTools: [],
+      },
+    })
+    // Success: reset failure counter.
+    consecutiveFailures = 0
+    responseText = extractResponseText(response.message?.content)
+  } catch {
+    // Haiku failure (timeout / rate limit / bad response) — increment failure
+    // counter and potentially open the circuit breaker.
+    consecutiveFailures++
+    if (consecutiveFailures >= getSkillLearningConfig().llm.failureThreshold) {
+      circuitOpenUntil =
+        Date.now() + getSkillLearningConfig().llm.circuitCooldownMs
+    }
+    return runHeuristicFallback(observations, ctx)
+  }
+
+  const parsed = parseInstinctCandidates(responseText, ctx, capped)
+  if (parsed.length === 0) {
+    // Empty / malformed LLM output — count as a failure so the circuit
+    // breaker opens if Haiku is systematically returning garbage (e.g. the
+    // model version drifted and no longer emits the expected JSON).
+    consecutiveFailures++
+    if (consecutiveFailures >= getSkillLearningConfig().llm.failureThreshold) {
+      circuitOpenUntil =
+        Date.now() + getSkillLearningConfig().llm.circuitCooldownMs
+    }
+    return runHeuristicFallback(observations, ctx)
+  }
+  return parsed
+}
+
+async function runHeuristicFallback(
+  observations: StoredSkillObservation[],
+  ctx?: ObserverBackendContext,
+): Promise<InstinctCandidate[]> {
+  try {
+    const { heuristicObserverBackend } = await import('./sessionObserver.js')
+    const result = heuristicObserverBackend.analyze(observations, ctx)
+    return Array.isArray(result) ? result : await result
+  } catch {
+    return []
+  }
+}
+
+function buildUserPrompt(observations: StoredSkillObservation[]): string {
+  const rendered = observations
+    .map((observation, index) => renderObservation(observation, index))
+    .join('\n')
+  return `Observations (chronological, newest last):\n${rendered}\n\nExtract up to ${MAX_CANDIDATES_PER_CALL} atomic instincts. JSON array only.`
+}
+
+function renderObservation(
+  observation: StoredSkillObservation,
+  index: number,
+): string {
+  const segments: string[] = [`#${index + 1}`, `event=${observation.event}`]
+  if (observation.toolName) segments.push(`tool=${observation.toolName}`)
+  if (observation.outcome) segments.push(`outcome=${observation.outcome}`)
+  if (observation.messageText) {
+    segments.push(
+      `text=${JSON.stringify(truncate(observation.messageText, 200))}`,
+    )
+  }
+  if (observation.toolInput) {
+    segments.push(`in=${JSON.stringify(truncate(observation.toolInput, 120))}`)
+  }
+  if (observation.toolOutput) {
+    segments.push(
+      `out=${JSON.stringify(truncate(observation.toolOutput, 120))}`,
+    )
+  }
+  return segments.join(' | ')
+}
+
+function truncate(value: string, max: number): string {
+  if (value.length <= max) return value
+  return `${value.slice(0, max)}…`
+}
+
+function extractResponseText(content: unknown): string {
+  if (!Array.isArray(content)) return ''
+  const parts: string[] = []
+  for (const block of content) {
+    if (!block || typeof block !== 'object') continue
+    const record = block as Record<string, unknown>
+    if (record.type !== 'text') continue
+    if (typeof record.text === 'string') parts.push(record.text)
+  }
+  return parts.join('').trim()
+}
+
+function parseInstinctCandidates(
+  raw: string,
+  ctx: ObserverBackendContext | undefined,
+  observations: StoredSkillObservation[],
+): InstinctCandidate[] {
+  const json = extractJsonArray(raw)
+  if (!json) return []
+
+  let parsed: unknown
+  try {
+    parsed = JSON.parse(json)
+  } catch {
+    return []
+  }
+  if (!Array.isArray(parsed)) return []
+
+  const observationIds = observations.map(observation => observation.id)
+  const candidates: InstinctCandidate[] = []
+
+  for (const item of parsed.slice(0, MAX_CANDIDATES_PER_CALL)) {
+    const candidate = normaliseCandidate(item, ctx, observationIds)
+    if (candidate) candidates.push(candidate)
+  }
+
+  return candidates
+}
+
+function extractJsonArray(raw: string): string | undefined {
+  if (!raw) return undefined
+  const start = raw.indexOf('[')
+  const end = raw.lastIndexOf(']')
+  if (start < 0 || end <= start) return undefined
+  return raw.slice(start, end + 1)
+}
+
+function normaliseCandidate(
+  item: unknown,
+  ctx: ObserverBackendContext | undefined,
+  observationIds: string[],
+): InstinctCandidate | undefined {
+  if (!item || typeof item !== 'object') return undefined
+  const record = item as Record<string, unknown>
+
+  const trigger = stringField(record.trigger, 80)
+  const action = stringField(record.action, 120)
+  if (!trigger || !action) return undefined
+
+  const evidence = evidenceField(record.evidence)
+  if (evidence.length === 0) return undefined
+
+  return {
+    trigger,
+    action,
+    confidence: clampUnitInterval(record.confidence),
+    domain: domainField(record.domain),
+    source: 'session-observation',
+    scope: scopeField(record.scope),
+    projectId: ctx?.project?.projectId,
+    projectName: ctx?.project?.projectName,
+    evidence,
+    observationIds,
+  }
+}
+
+function stringField(value: unknown, maxLength: number): string | undefined {
+  if (typeof value !== 'string') return undefined
+  const trimmed = value.trim()
+  if (!trimmed) return undefined
+  return trimmed.length > maxLength ? trimmed.slice(0, maxLength) : trimmed
+}
+
+function clampUnitInterval(value: unknown): number {
+  if (typeof value !== 'number' || !Number.isFinite(value)) return 0.5
+  if (value < 0) return 0
+  if (value > 1) return 1
+  return value
+}
+
+function domainField(value: unknown): InstinctDomain {
+  if (typeof value !== 'string') return 'project'
+  return (INSTINCT_DOMAINS as readonly string[]).includes(value)
+    ? (value as InstinctDomain)
+    : 'project'
+}
+
+function scopeField(value: unknown): SkillLearningScope {
+  return value === 'global' ? 'global' : 'project'
+}
+
+function evidenceField(value: unknown): string[] {
+  if (!Array.isArray(value)) return []
+  const entries: string[] = []
+  for (const entry of value) {
+    if (typeof entry !== 'string') continue
+    const trimmed = entry.trim()
+    if (!trimmed) continue
+    entries.push(trimmed.length > 200 ? `${trimmed.slice(0, 200)}…` : trimmed)
+    if (entries.length === 3) break
+  }
+  return entries
+}
+
+function makeTimeoutSignal(ms: number): AbortSignal {
+  return AbortSignal.timeout(ms)
+}
--- a/src/services/skillLearning/observationStore.ts
+++ b/src/services/skillLearning/observationStore.ts
@@ -0,0 +1,451 @@
+import { mkdir, readFile, rename, stat, writeFile } from 'node:fs/promises'
+import { dirname, join } from 'node:path'
+import { createHash, randomUUID } from 'node:crypto'
+import type {
+  SkillLearningProjectContext as BaseSkillLearningProjectContext,
+  SkillLearningScope,
+  SkillObservation as BaseSkillObservation,
+  SkillObservationEvent,
+  SkillObservationOutcome,
+} from './types.js'
+
+export type { SkillLearningScope, SkillObservation } from './types.js'
+
+export type SkillLearningProjectContext = Pick<
+  BaseSkillLearningProjectContext,
+  'projectId' | 'projectName' | 'cwd'
+> &
+  Partial<
+    Omit<BaseSkillLearningProjectContext, 'projectId' | 'projectName' | 'cwd'>
+  >
+
+export type ObservationEvent = Exclude<SkillObservationEvent, 'tool_error'>
+
+export type ObservationOutcome = SkillObservationOutcome | 'interrupted'
+
+export type StoredSkillObservation = Omit<
+  BaseSkillObservation,
+  'event' | 'outcome' | 'toolInput' | 'toolOutput'
+> & {
+  event: ObservationEvent
+  outcome?: ObservationOutcome
+  toolInput?: string
+  toolOutput?: string
+  toolName?: string
+  messageText?: string
+  source?: 'transcript' | 'hook' | 'tool-hook' | 'imported'
+  contentHash?: string
+  // Turn index at which the observation was captured. Used by
+  // runtimeObserver to scope tool-hook observations to the current REPL
+  // turn for scoping tool-hook records to the current REPL turn.
+  turn?: number
+}
+
+export type ObservationStoreOptions = {
+  rootDir?: string
+  project?: SkillLearningProjectContext
+  maxFieldLength?: number
+  archiveThresholdBytes?: number
+}
+
+type ClaudeTranscriptEntry = {
+  sessionId?: string
+  cwd?: string
+  timestamp?: string
+  type?: string
+  message?: {
+    role?: string
+    content?: unknown
+  }
+  tool_name?: string
+  tool_input?: unknown
+  tool_response?: unknown
+}
+
+const DEFAULT_MAX_FIELD_LENGTH = 5_000
+const DEFAULT_ARCHIVE_THRESHOLD_BYTES = 1_000_000
+const DEFAULT_PURGE_MAX_AGE_DAYS = 30
+const SECRET_REPLACEMENT = '[REDACTED]'
+
+const SECRET_PATTERNS: RegExp[] = [
+  /\b(?:sk|sk-ant|sk-proj|xox[baprs])-[A-Za-z0-9_-]{12,}\b/g,
+  /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g,
+  /\b(?:api[_-]?key|token|secret|password|authorization)\b\s*[:=]\s*["']?[^"',\s}]+/gi,
+  /\bBearer\s+[A-Za-z0-9._~+/=-]{12,}\b/gi,
+]
+
+export function getSkillLearningRoot(
+  options?: ObservationStoreOptions,
+): string {
+  if (options?.rootDir) return options.rootDir
+  if (process.env.CLAUDE_SKILL_LEARNING_HOME) {
+    return process.env.CLAUDE_SKILL_LEARNING_HOME
+  }
+  return join(process.env.HOME ?? process.cwd(), '.claude', 'skill-learning')
+}
+
+export function getObservationFilePath(
+  options?: ObservationStoreOptions,
+): string {
+  const root = getSkillLearningRoot(options)
+  const project = options?.project
+  if (
+    !project ||
+    project.scope === 'global' ||
+    project.projectId === 'global'
+  ) {
+    return join(root, 'global', 'observations.jsonl')
+  }
+  return join(root, 'projects', project.projectId, 'observations.jsonl')
+}
+
+export function scrubText(
+  value: string | undefined,
+  maxLength = DEFAULT_MAX_FIELD_LENGTH,
+): string | undefined {
+  if (value === undefined) return undefined
+
+  let scrubbed = value
+  for (const pattern of SECRET_PATTERNS) {
+    scrubbed = scrubbed.replace(pattern, match => {
+      const key = match.split(/[:=]/, 1)[0]
+      return /[:=]/.test(match)
+        ? `${key}: ${SECRET_REPLACEMENT}`
+        : SECRET_REPLACEMENT
+    })
+  }
+
+  if (scrubbed.length <= maxLength) return scrubbed
+
+  const hash = hashText(scrubbed)
+  let preview = scrubbed.slice(0, maxLength)
+  if (
+    scrubbed.includes(SECRET_REPLACEMENT) &&
+    !preview.includes(SECRET_REPLACEMENT)
+  ) {
+    preview = `${SECRET_REPLACEMENT} ${preview}`
+  }
+  return `${preview}\n[TRUNCATED length=${scrubbed.length} sha256=${hash}]`
+}
+
+export function scrubObservation(
+  observation: StoredSkillObservation,
+  options?: ObservationStoreOptions,
+): StoredSkillObservation {
+  const maxLength = options?.maxFieldLength ?? DEFAULT_MAX_FIELD_LENGTH
+  const scrubbed: StoredSkillObservation = {
+    ...observation,
+    toolInput: scrubText(observation.toolInput, maxLength),
+    toolOutput: scrubText(observation.toolOutput, maxLength),
+    messageText: scrubText(observation.messageText, maxLength),
+  }
+
+  const hashSource = [
+    scrubbed.event,
+    scrubbed.toolName ?? '',
+    scrubbed.toolInput ?? '',
+    scrubbed.toolOutput ?? '',
+    scrubbed.messageText ?? '',
+  ].join('\n')
+
+  return {
+    ...scrubbed,
+    contentHash: hashText(hashSource),
+  }
+}
+
+const MAX_SINGLE_OBSERVATION_BYTES = 64 * 1024
+
+export async function appendObservation(
+  observation: StoredSkillObservation,
+  options?: ObservationStoreOptions,
+): Promise<StoredSkillObservation> {
+  const filePath = getObservationFilePath(options)
+  await mkdir(dirname(filePath), { recursive: true })
+  await archiveLargeObservationFile(options)
+
+  const scrubbed = scrubObservation(observation, options)
+  const serialized = JSON.stringify(scrubbed)
+  if (Buffer.byteLength(serialized) > MAX_SINGLE_OBSERVATION_BYTES) {
+    return scrubbed
+  }
+  await writeFile(filePath, `${serialized}\n`, {
+    flag: 'a',
+  })
+  return scrubbed
+}
+
+export async function readObservations(
+  options?: ObservationStoreOptions,
+): Promise<StoredSkillObservation[]> {
+  const filePath = getObservationFilePath(options)
+  let content = ''
+  try {
+    content = await readFile(filePath, 'utf8')
+  } catch (error) {
+    if ((error as NodeJS.ErrnoException).code === 'ENOENT') return []
+    throw error
+  }
+
+  const observations: StoredSkillObservation[] = []
+  for (const line of content.split(/\r?\n/)) {
+    if (!line.trim()) continue
+    try {
+      observations.push(JSON.parse(line) as StoredSkillObservation)
+    } catch {
+      // Skip corrupt/truncated JSONL lines (e.g. from concurrent append
+      // interleaved with a crash). One bad line must not break the whole read.
+    }
+  }
+  return observations
+}
+
+export async function ingestTranscript(
+  transcriptPath: string,
+  options?: ObservationStoreOptions,
+): Promise<StoredSkillObservation[]> {
+  const transcript = await readFile(transcriptPath, 'utf8')
+  const observations: StoredSkillObservation[] = []
+
+  for (const line of transcript.split(/\r?\n/)) {
+    if (!line.trim()) continue
+
+    const entry = JSON.parse(line) as ClaudeTranscriptEntry
+    for (const observation of observationsFromTranscriptEntry(entry, options)) {
+      observations.push(await appendObservation(observation, options))
+    }
+  }
+
+  return observations
+}
+
+export async function purgeOldObservations(
+  options?: ObservationStoreOptions & { maxAgeDays?: number },
+): Promise<number> {
+  const filePath = getObservationFilePath(options)
+  const maxAgeDays = options?.maxAgeDays ?? DEFAULT_PURGE_MAX_AGE_DAYS
+  const cutoff = Date.now() - maxAgeDays * 24 * 60 * 60 * 1000
+
+  let content = ''
+  try {
+    content = await readFile(filePath, 'utf8')
+  } catch (error) {
+    if ((error as NodeJS.ErrnoException).code === 'ENOENT') return 0
+    throw error
+  }
+
+  const kept: string[] = []
+  let purged = 0
+  for (const line of content.split(/\r?\n/)) {
+    if (!line.trim()) continue
+    try {
+      const obs = JSON.parse(line) as StoredSkillObservation
+      const ts = Date.parse(obs.timestamp)
+      if (!Number.isNaN(ts) && ts < cutoff) {
+        purged += 1
+        continue
+      }
+      kept.push(line)
+    } catch {
+      kept.push(line)
+    }
+  }
+
+  if (purged === 0) return 0
+  // Atomic write: temp + rename. Direct writeFile leaves a truncated/empty
+  // file if the process crashes mid-write, losing retained observations.
+  const tmpPath = `${filePath}.tmp-${process.pid}-${Date.now()}`
+  await writeFile(tmpPath, kept.length ? `${kept.join('\n')}\n` : '')
+  await rename(tmpPath, filePath)
+  return purged
+}
+
+export async function archiveLargeObservationFile(
+  options?: ObservationStoreOptions,
+): Promise<string | null> {
+  const filePath = getObservationFilePath(options)
+  const threshold =
+    options?.archiveThresholdBytes ?? DEFAULT_ARCHIVE_THRESHOLD_BYTES
+
+  let currentStat
+  try {
+    currentStat = await stat(filePath)
+  } catch (error) {
+    if ((error as NodeJS.ErrnoException).code === 'ENOENT') return null
+    throw error
+  }
+
+  if (currentStat.size < threshold) return null
+
+  const archiveDir = join(dirname(filePath), 'observations.archive')
+  await mkdir(archiveDir, { recursive: true })
+  const archivePath = join(
+    archiveDir,
+    `observations-${new Date().toISOString().replace(/[:.]/g, '-')}.jsonl`,
+  )
+  await rename(filePath, archivePath)
+  return archivePath
+}
+
+function observationsFromTranscriptEntry(
+  entry: ClaudeTranscriptEntry,
+  options?: ObservationStoreOptions,
+): StoredSkillObservation[] {
+  const project = options?.project
+  const base = {
+    sessionId: entry.sessionId ?? 'unknown-session',
+    projectId: project?.projectId ?? 'global',
+    projectName: project?.projectName ?? 'global',
+    cwd: entry.cwd ?? project?.cwd ?? process.cwd(),
+    timestamp: entry.timestamp ?? new Date().toISOString(),
+    source: 'transcript' as const,
+  }
+
+  const role = entry.message?.role ?? entry.type
+  const content = entry.message?.content
+  const observations: StoredSkillObservation[] = []
+
+  if (entry.tool_name) {
+    observations.push({
+      ...base,
+      id: createObservationId(),
+      event: 'tool_complete',
+      toolName: entry.tool_name,
+      toolInput: stringifyField(entry.tool_input),
+      toolOutput: stringifyField(entry.tool_response),
+      outcome: inferOutcome(entry.tool_response),
+    })
+  }
+
+  if (role === 'user') {
+    const toolResults = extractToolResults(content)
+    if (toolResults.length > 0) {
+      for (const result of toolResults) {
+        observations.push({
+          ...base,
+          id: createObservationId(),
+          event: 'tool_complete',
+          toolName: result.name,
+          toolOutput: result.output,
+          outcome: result.isError ? 'failure' : 'success',
+        })
+      }
+      return observations
+    }
+
+    observations.push({
+      ...base,
+      id: createObservationId(),
+      event: 'user_message',
+      messageText: extractText(content),
+    })
+    return observations
+  }
+
+  if (role === 'assistant') {
+    const toolUses = extractToolUses(content)
+    for (const toolUse of toolUses) {
+      observations.push({
+        ...base,
+        id: createObservationId(),
+        event: 'tool_start',
+        toolName: toolUse.name,
+        toolInput: toolUse.input,
+      })
+    }
+
+    const text = extractText(content)
+    if (text.trim()) {
+      observations.push({
+        ...base,
+        id: createObservationId(),
+        event: 'assistant_message',
+        messageText: text,
+      })
+    }
+  }
+
+  return observations
+}
+
+function extractText(content: unknown): string {
+  if (typeof content === 'string') return content
+  if (!Array.isArray(content)) return stringifyField(content) ?? ''
+
+  return content
+    .map(part => {
+      if (typeof part === 'string') return part
+      if (!part || typeof part !== 'object') return ''
+      const record = part as Record<string, unknown>
+      return typeof record.text === 'string' ? record.text : ''
+    })
+    .filter(Boolean)
+    .join('\n')
+}
+
+function extractToolUses(
+  content: unknown,
+): Array<{ name: string; input: string | undefined }> {
+  if (!Array.isArray(content)) return []
+  return content.flatMap(part => {
+    if (!part || typeof part !== 'object') return []
+    const record = part as Record<string, unknown>
+    if (record.type !== 'tool_use') return []
+    return [
+      {
+        name: String(record.name ?? 'unknown_tool'),
+        input: stringifyField(record.input),
+      },
+    ]
+  })
+}
+
+function extractToolResults(
+  content: unknown,
+): Array<{ name: string; output: string | undefined; isError: boolean }> {
+  if (!Array.isArray(content)) return []
+  return content.flatMap(part => {
+    if (!part || typeof part !== 'object') return []
+    const record = part as Record<string, unknown>
+    if (record.type !== 'tool_result') return []
+    return [
+      {
+        name: String(record.name ?? record.tool_name ?? 'unknown_tool'),
+        output: stringifyField(record.content),
+        isError: record.is_error === true,
+      },
+    ]
+  })
+}
+
+function inferOutcome(value: unknown): ObservationOutcome {
+  const text = stringifyField(value)?.toLowerCase() ?? ''
+  if (text.includes('interrupted') || text.includes('aborted')) {
+    return 'interrupted'
+  }
+  if (
+    text.includes('error') ||
+    text.includes('exception') ||
+    text.includes('failed')
+  ) {
+    return 'failure'
+  }
+  return 'success'
+}
+
+export function stringifyField(value: unknown): string | undefined {
+  if (value === undefined || value === null) return undefined
+  if (typeof value === 'string') return value
+  return JSON.stringify(value)
+}
+
+function createObservationId(): string {
+  if (typeof crypto !== 'undefined' && 'randomUUID' in crypto) {
+    return crypto.randomUUID()
+  }
+  return randomUUID()
+}
+
+function hashText(value: string): string {
+  return createHash('sha256').update(value).digest('hex')
+}
--- a/src/services/skillLearning/observerBackend.ts
+++ b/src/services/skillLearning/observerBackend.ts
@@ -0,0 +1,71 @@
+import type { InstinctCandidate } from './instinctParser.js'
+import type { StoredSkillObservation } from './observationStore.js'
+import type { SkillLearningProjectContext } from './types.js'
+
+export type ObserverBackendContext = {
+  project?: SkillLearningProjectContext
+}
+
+export type ObserverBackendResult =
+  | InstinctCandidate[]
+  | Promise<InstinctCandidate[]>
+
+export interface ObserverBackend {
+  readonly name: string
+  analyze(
+    observations: StoredSkillObservation[],
+    ctx?: ObserverBackendContext,
+  ): ObserverBackendResult
+}
+
+const registry = new Map<string, ObserverBackend>()
+let activeName: string | undefined
+
+export function registerObserverBackend(backend: ObserverBackend): void {
+  registry.set(backend.name, backend)
+  if (!activeName) activeName = backend.name
+}
+
+export function setActiveObserverBackend(name: string): void {
+  if (!registry.has(name)) {
+    throw new Error(`Observer backend "${name}" is not registered`)
+  }
+  activeName = name
+}
+
+export function getActiveObserverBackend(): ObserverBackend {
+  const backend = activeName ? registry.get(activeName) : undefined
+  if (!backend) {
+    throw new Error(
+      'No observer backend is active — register one before analyzing observations',
+    )
+  }
+  return backend
+}
+
+export function listObserverBackends(): string[] {
+  return Array.from(registry.keys())
+}
+
+export function resetObserverBackendsForTest(): void {
+  registry.clear()
+  activeName = undefined
+}
+
+export async function analyzeWithActiveBackend(
+  observations: StoredSkillObservation[],
+  ctx?: ObserverBackendContext,
+): Promise<InstinctCandidate[]> {
+  return Promise.resolve(getActiveObserverBackend().analyze(observations, ctx))
+}
+
+function pickBackendFromEnv(): string | undefined {
+  const raw = process.env.SKILL_LEARNING_OBSERVER_BACKEND?.trim()
+  return raw && registry.has(raw) ? raw : undefined
+}
+
+export function resolveDefaultObserverBackend(): ObserverBackend {
+  const preferred = pickBackendFromEnv()
+  if (preferred) setActiveObserverBackend(preferred)
+  return getActiveObserverBackend()
+}
--- a/src/services/skillLearning/projectContext.ts
+++ b/src/services/skillLearning/projectContext.ts
@@ -0,0 +1,264 @@
+import { execFileSync } from 'child_process'
+import { createHash } from 'crypto'
+import {
+  existsSync,
+  mkdirSync,
+  readFileSync,
+  realpathSync,
+  writeFileSync,
+} from 'fs'
+import { basename, join, resolve } from 'path'
+import { getClaudeConfigHomeDir } from '../../utils/envUtils.js'
+import type {
+  ProjectContextSource,
+  SkillLearningProjectContext,
+  SkillLearningProjectRecord,
+  SkillLearningProjectsRegistry,
+  SkillLearningScope,
+} from './types.js'
+
+const REGISTRY_VERSION = 1
+const GLOBAL_PROJECT_ID = 'global'
+const GLOBAL_PROJECT_NAME = 'Global'
+
+export function getSkillLearningRootDir(): string {
+  return join(getClaudeConfigHomeDir(), 'skill-learning')
+}
+
+export function getProjectsRegistryPath(): string {
+  return join(getSkillLearningRootDir(), 'projects.json')
+}
+
+export function getProjectStorageDir(projectId: string): string {
+  if (projectId === GLOBAL_PROJECT_ID) {
+    return join(getSkillLearningRootDir(), 'global')
+  }
+  return join(getSkillLearningRootDir(), 'projects', projectId)
+}
+
+export function getProjectContextPath(projectId: string): string {
+  return join(getProjectStorageDir(projectId), 'project.json')
+}
+
+// Per-cwd in-memory cache. `resolveContext` does synchronous `git` forks and
+// `persistProjectContext` does registry/project.json writes on every call —
+// in the tool.call hot path (one wrapper invocation per tool) that cost would
+// accumulate into the hundreds-of-ms range per session. Cache keyed by the
+// exact cwd string so different worktrees still get independent entries.
+const contextCache = new Map<string, SkillLearningProjectContext>()
+const PERSIST_INTERVAL_MS = 5 * 60 * 1000
+let lastPersistAt = 0
+
+export function resolveProjectContext(
+  cwd = process.cwd(),
+): SkillLearningProjectContext {
+  const cached = contextCache.get(cwd)
+  if (cached) {
+    // Still touch the registry so long-lived processes keep `lastSeenAt`
+    // reasonably fresh, but throttle the write so it doesn't fire on every
+    // tool call.
+    const now = Date.now()
+    if (now - lastPersistAt > PERSIST_INTERVAL_MS) {
+      lastPersistAt = now
+      persistProjectContext(cached)
+    }
+    return cached
+  }
+  const resolved = resolveContext(cwd)
+  contextCache.set(cwd, resolved)
+  persistProjectContext(resolved)
+  lastPersistAt = Date.now()
+  return resolved
+}
+
+export function resetProjectContextCacheForTest(): void {
+  contextCache.clear()
+  lastPersistAt = 0
+}
+
+export function listKnownProjects(): SkillLearningProjectRecord[] {
+  const registry = readProjectsRegistry(getProjectsRegistryPath())
+  return Object.values(registry.projects).sort((a, b) =>
+    a.projectName.localeCompare(b.projectName),
+  )
+}
+
+function resolveContext(cwd: string): SkillLearningProjectContext {
+  const envProjectDir = process.env.CLAUDE_PROJECT_DIR?.trim()
+  if (envProjectDir) {
+    const projectRoot = normalizePath(envProjectDir)
+    return buildContext({
+      source: 'claude_project_dir',
+      scope: 'project',
+      cwd,
+      projectRoot,
+      identity: `claude-project-dir:${projectRoot}`,
+      projectName: basename(projectRoot) || 'project',
+    })
+  }
+
+  const gitRemote = git(['remote', 'get-url', 'origin'], cwd)
+  if (gitRemote) {
+    const projectRoot = git(['rev-parse', '--show-toplevel'], cwd)
+    const normalizedRemote = normalizeGitRemote(gitRemote)
+    return buildContext({
+      source: 'git_remote',
+      scope: 'project',
+      cwd,
+      projectRoot: projectRoot
+        ? normalizePath(projectRoot)
+        : normalizePath(cwd),
+      gitRemote: normalizedRemote,
+      identity: `git-remote:${normalizedRemote}`,
+      projectName: projectNameFromRemote(normalizedRemote),
+    })
+  }
+
+  const gitRoot = git(['rev-parse', '--show-toplevel'], cwd)
+  if (gitRoot) {
+    const projectRoot = normalizePath(gitRoot)
+    return buildContext({
+      source: 'git_root',
+      scope: 'project',
+      cwd,
+      projectRoot,
+      identity: `git-root:${projectRoot}`,
+      projectName: basename(projectRoot) || 'project',
+    })
+  }
+
+  return buildContext({
+    source: 'global',
+    scope: 'global',
+    cwd,
+    projectRoot: undefined,
+    identity: 'global',
+    projectName: GLOBAL_PROJECT_NAME,
+  })
+}
+
+function buildContext(input: {
+  source: ProjectContextSource
+  scope: SkillLearningScope
+  cwd: string
+  projectRoot?: string
+  gitRemote?: string
+  identity: string
+  projectName: string
+}): SkillLearningProjectContext {
+  const projectId =
+    input.scope === 'global'
+      ? GLOBAL_PROJECT_ID
+      : stableProjectId(input.identity)
+  return {
+    projectId,
+    projectName: input.projectName,
+    scope: input.scope,
+    source: input.source,
+    cwd: normalizePath(input.cwd),
+    projectRoot: input.projectRoot,
+    gitRemote: input.gitRemote,
+    storageDir: getProjectStorageDir(projectId),
+  }
+}
+
+function persistProjectContext(context: SkillLearningProjectContext): void {
+  const now = new Date().toISOString()
+  const registryPath = getProjectsRegistryPath()
+  const registry = readProjectsRegistry(registryPath)
+  const existing = registry.projects[context.projectId]
+  const record: SkillLearningProjectRecord = {
+    ...context,
+    firstSeenAt: existing?.firstSeenAt ?? now,
+    lastSeenAt: now,
+  }
+
+  registry.projects[context.projectId] = record
+  registry.updatedAt = now
+
+  mkdirSync(context.storageDir, { recursive: true })
+  mkdirSync(getSkillLearningRootDir(), { recursive: true })
+  writeJson(registryPath, registry)
+  writeJson(getProjectContextPath(context.projectId), record)
+}
+
+function readProjectsRegistry(path: string): SkillLearningProjectsRegistry {
+  if (!existsSync(path)) {
+    return {
+      version: REGISTRY_VERSION,
+      updatedAt: new Date(0).toISOString(),
+      projects: {},
+    }
+  }
+
+  try {
+    const parsed = JSON.parse(
+      readFileSync(path, 'utf8'),
+    ) as Partial<SkillLearningProjectsRegistry>
+    if (
+      parsed.version === REGISTRY_VERSION &&
+      typeof parsed.projects === 'object' &&
+      parsed.projects
+    ) {
+      return {
+        version: REGISTRY_VERSION,
+        updatedAt:
+          typeof parsed.updatedAt === 'string'
+            ? parsed.updatedAt
+            : new Date(0).toISOString(),
+        projects: parsed.projects as Record<string, SkillLearningProjectRecord>,
+      }
+    }
+  } catch {
+    // Fall through to a fresh registry. Corrupt state should not block startup.
+  }
+
+  return {
+    version: REGISTRY_VERSION,
+    updatedAt: new Date(0).toISOString(),
+    projects: {},
+  }
+}
+
+function writeJson(path: string, value: unknown): void {
+  writeFileSync(path, `${JSON.stringify(value, null, 2)}\n`, 'utf8')
+}
+
+function git(args: string[], cwd: string): string | null {
+  try {
+    const output = execFileSync('git', ['-C', cwd, ...args], {
+      encoding: 'utf8',
+      stdio: ['ignore', 'pipe', 'ignore'],
+    })
+    const trimmed = output.trim()
+    return trimmed ? trimmed : null
+  } catch {
+    return null
+  }
+}
+
+function normalizePath(path: string): string {
+  const resolved = resolve(path)
+  try {
+    return realpathSync.native(resolved).normalize('NFC')
+  } catch {
+    return resolved.normalize('NFC')
+  }
+}
+
+function normalizeGitRemote(remote: string): string {
+  let normalized = remote.trim().replace(/\\/g, '/')
+  normalized = normalized.replace(/\.git$/i, '')
+  normalized = normalized.replace(/\/+$/g, '')
+  return normalized.toLowerCase()
+}
+
+function projectNameFromRemote(remote: string): string {
+  const match = remote.match(/[:/]([^/:]+?)(?:\.git)?$/)
+  return match?.[1] || 'project'
+}
+
+function stableProjectId(identity: string): string {
+  const hash = createHash('sha256').update(identity).digest('hex').slice(0, 16)
+  return `project-${hash}`
+}
--- a/src/services/skillLearning/promotion.ts
+++ b/src/services/skillLearning/promotion.ts
@@ -0,0 +1,161 @@
+import { readdir } from 'node:fs/promises'
+import { existsSync } from 'node:fs'
+import { join } from 'node:path'
+import type { Instinct, StoredInstinct } from './instinctParser.js'
+import {
+  getInstinctsDir,
+  loadInstincts,
+  saveInstinct,
+  type InstinctStoreOptions,
+} from './instinctStore.js'
+import { getSkillLearningRoot } from './observationStore.js'
+import type { SkillLearningProjectContext } from './types.js'
+
+export type PromotionCandidate = {
+  instinctId: string
+  averageConfidence: number
+  projectIds: string[]
+}
+
+export type PromotionOptions = {
+  rootDir?: string
+  minProjects?: number
+  minConfidence?: number
+}
+
+const sessionPromotedIds = new Set<string>()
+
+export function resetPromotionBookkeeping(): void {
+  sessionPromotedIds.clear()
+}
+
+export function findPromotionCandidates(
+  instincts: Instinct[],
+  minProjects = 2,
+  minConfidence = 0.8,
+): PromotionCandidate[] {
+  const grouped = new Map<string, Instinct[]>()
+  for (const instinct of instincts) {
+    if (instinct.scope !== 'project') continue
+    const group = grouped.get(instinct.id) ?? []
+    group.push(instinct)
+    grouped.set(instinct.id, group)
+  }
+
+  return Array.from(grouped.entries()).flatMap(([instinctId, group]) => {
+    const projectIds = Array.from(
+      new Set(group.map(instinct => instinct.projectId).filter(Boolean)),
+    ) as string[]
+    const averageConfidence =
+      group.reduce((sum, instinct) => sum + instinct.confidence, 0) /
+      group.length
+    if (
+      projectIds.length >= minProjects &&
+      averageConfidence >= minConfidence
+    ) {
+      return [
+        {
+          instinctId,
+          projectIds,
+          averageConfidence: Number(averageConfidence.toFixed(2)),
+        },
+      ]
+    }
+    return []
+  })
+}
+
+export async function checkPromotion(
+  options: PromotionOptions = {},
+): Promise<PromotionCandidate[]> {
+  const minProjects = options.minProjects ?? 2
+  const minConfidence = options.minConfidence ?? 0.8
+  const allProjectInstincts = await loadAllProjectInstincts(options.rootDir)
+
+  const candidates = findPromotionCandidates(
+    allProjectInstincts,
+    minProjects,
+    minConfidence,
+  )
+  const promoted: PromotionCandidate[] = []
+
+  for (const candidate of candidates) {
+    if (sessionPromotedIds.has(candidate.instinctId)) continue
+
+    const source = allProjectInstincts.find(
+      instinct => instinct.id === candidate.instinctId,
+    )
+    if (!source) continue
+
+    const globalInstinct: StoredInstinct = {
+      ...source,
+      scope: 'global',
+      projectId: undefined,
+      projectName: undefined,
+      confidence: candidate.averageConfidence,
+      updatedAt: new Date().toISOString(),
+    }
+
+    const globalOptions: InstinctStoreOptions = {
+      rootDir: options.rootDir,
+      scope: 'global',
+      project: globalProjectContext(options.rootDir),
+    }
+    await saveInstinct(globalInstinct, globalOptions)
+
+    sessionPromotedIds.add(candidate.instinctId)
+    promoted.push(candidate)
+  }
+
+  return promoted
+}
+
+async function loadAllProjectInstincts(
+  rootDir?: string,
+): Promise<StoredInstinct[]> {
+  const root = getSkillLearningRoot(rootDir ? { rootDir } : undefined)
+  const projectsRoot = join(root, 'projects')
+  if (!existsSync(projectsRoot)) return []
+
+  const entries = await readdir(projectsRoot, { withFileTypes: true })
+  const instincts: StoredInstinct[] = []
+  for (const entry of entries) {
+    if (!entry.isDirectory()) continue
+    const project: SkillLearningProjectContext = {
+      projectId: entry.name,
+      projectName: entry.name,
+      scope: 'project',
+      source: 'git_root',
+      cwd: projectsRoot,
+      storageDir: join(projectsRoot, entry.name),
+    }
+    const projectInstincts = await loadInstincts({
+      rootDir,
+      project,
+      scope: 'project',
+    })
+    instincts.push(...projectInstincts)
+  }
+  return instincts
+}
+
+function globalProjectContext(rootDir?: string): SkillLearningProjectContext {
+  const root = getSkillLearningRoot(rootDir ? { rootDir } : undefined)
+  return {
+    projectId: 'global',
+    projectName: 'Global',
+    scope: 'global',
+    source: 'global',
+    cwd: root,
+    storageDir: join(root, 'global'),
+  }
+}
+
+// Re-export for consumers that need to inspect the global instincts directory.
+export function getGlobalInstinctsDir(rootDir?: string): string {
+  return getInstinctsDir({
+    rootDir,
+    scope: 'global',
+    project: globalProjectContext(rootDir),
+  })
+}
--- a/src/services/skillLearning/runtimeObserver.ts
+++ b/src/services/skillLearning/runtimeObserver.ts
@@ -0,0 +1,386 @@
+import type { REPLHookContext } from '../../utils/hooks/postSamplingHooks.js'
+import { registerPostSamplingHook } from '../../utils/hooks/postSamplingHooks.js'
+import { getSkillLearningConfig } from './config.js'
+import { isSkillLearningEnabled } from './featureCheck.js'
+import {
+  appendObservation,
+  getSkillLearningRoot,
+  purgeOldObservations,
+  stringifyField,
+} from './observationStore.js'
+import { resolveProjectContext } from './projectContext.js'
+import './sessionObserver.js'
+import { createInstinct } from './instinctParser.js'
+import {
+  analyzeWithActiveBackend,
+  resolveDefaultObserverBackend,
+} from './observerBackend.js'
+import {
+  decayInstinctConfidence,
+  loadInstincts,
+  prunePendingInstincts,
+  upsertInstinct,
+} from './instinctStore.js'
+import type { StoredSkillObservation } from './observationStore.js'
+import type { Message } from '../../types/message.js'
+import {
+  applySkillLifecycleDecision,
+  compareExistingArtifacts,
+  decideSkillLifecycle,
+} from './skillLifecycle.js'
+import {
+  generateAgentCandidates,
+  generateCommandCandidates,
+  clusterInstincts,
+} from './evolution.js'
+import { generateOrMergeSkillDraft } from './skillGenerator.js'
+import { shouldGenerateSkillFromInstincts } from './learningPolicy.js'
+import { writeLearnedCommand } from './commandGenerator.js'
+import { writeLearnedAgent } from './agentGenerator.js'
+import { readObservations } from './observationStore.js'
+import { checkPromotion } from './promotion.js'
+import { existsSync } from 'node:fs'
+import { join } from 'node:path'
+import { getClaudeConfigHomeDir } from '../../utils/envUtils.js'
+
+export const RUNTIME_SESSION_ID = 'runtime-session'
+
+let initialized = false
+let runtimeTurn = 0
+// Timestamp watermark for consumed tool-hook observations — enables replay of
+// only the records that arrived since the previous post-sampling pass.
+let lastConsumedToolHookTimestamp = ''
+
+// --- H5: LLM call throttle ---
+let llmCallsThisSession = 0
+let lastLlmCallTimestamp = 0
+
+// --- H6: message watermark dedup ---
+// Key: `${sessionId}:${messageId}` — prevents reprocessing the same message
+// across repeated post-sampling calls in one REPL session.
+const lastProcessedMessageIds = new Set<string>()
+const MAX_PROCESSED_IDS = 1000
+const TRIM_PROCESSED_IDS_TO = 500
+
+export function resetRuntimeLLMBookkeeping(): void {
+  llmCallsThisSession = 0
+  lastLlmCallTimestamp = 0
+  lastProcessedMessageIds.clear()
+}
+
+export function getRuntimeTurn(): number {
+  return runtimeTurn
+}
+
+export function initSkillLearning(): void {
+  if (initialized) return
+  initialized = true
+  // Resolve the active observer backend from SKILL_LEARNING_OBSERVER_BACKEND
+  // env. Without this call the registry stays on whichever backend was
+  // registered first (heuristic) — which means the env switch would silently
+  // be a no-op in production. Swallow registry errors so a typo in the env
+  // variable can never crash startup.
+  try {
+    resolveDefaultObserverBackend()
+  } catch {
+    // No backend registered yet, or env points at unknown name — leave the
+    // registry in its existing state.
+  }
+  registerPostSamplingHook(runSkillLearningPostSampling)
+  // Fire-and-forget startup maintenance: ECC parity for confidence decay,
+  // observation purge, pending instinct prune. Errors are swallowed so that
+  // skill-learning maintenance never blocks CLI startup.
+  void runStartupMaintenance().catch(() => {})
+}
+
+async function runStartupMaintenance(): Promise<void> {
+  if (!isSkillLearningEnabled()) return
+  if (process.env.CLAUDE_SKILL_LEARNING_DISABLE) return
+  const project = resolveProjectContext(process.cwd())
+  const options = { project }
+  await Promise.allSettled([
+    decayInstinctConfidence(options),
+    purgeOldObservations(options),
+    prunePendingInstincts(30, options),
+  ])
+}
+
+function isInsideSkillLearningStorage(cwd: string): boolean {
+  try {
+    const root = getSkillLearningRoot()
+    return cwd.startsWith(root)
+  } catch {
+    return false
+  }
+}
+
+export async function runSkillLearningPostSampling(
+  context: REPLHookContext,
+): Promise<void> {
+  if (!isSkillLearningEnabled()) return
+  // Self-filter layers in order: env escape hatch, entrypoint (only main REPL
+  // thread — `startsWith` covers 'repl_main_thread:outputStyle:<name>'), sub-
+  // agent skip, and a path guard that prevents feedback loops when the user
+  // hand-edits files inside the skill-learning storage directory itself.
+  if (process.env.CLAUDE_SKILL_LEARNING_DISABLE) return
+  if (!context.querySource?.startsWith('repl_main_thread')) return
+  if (context.toolUseContext.agentId) return
+  const cwd = process.cwd()
+  if (isInsideSkillLearningStorage(cwd)) return
+
+  const project = resolveProjectContext(cwd)
+  const options = { project }
+  ++runtimeTurn
+
+  const observations: StoredSkillObservation[] = []
+
+  // Always reconstruct from the REPL message stream — it is the only source
+  // that captures user prompts and assistant outcomes (tool-hook observations
+  // cover tool events only).
+  for (const observation of observationsFromMessages(
+    context.messages,
+    project,
+  )) {
+    observations.push(await appendObservation(observation, options))
+  }
+
+  // Additionally pull tool-hook observations that arrived since the last
+  // consumption watermark — deterministic records with precise outcomes.
+  const all = await readObservations(options)
+  const fresh = all.filter(
+    o =>
+      o.source === 'tool-hook' &&
+      o.sessionId === RUNTIME_SESSION_ID &&
+      typeof o.timestamp === 'string' &&
+      o.timestamp > lastConsumedToolHookTimestamp,
+  )
+  observations.push(...fresh)
+  for (const o of fresh) {
+    if (o.timestamp > lastConsumedToolHookTimestamp) {
+      lastConsumedToolHookTimestamp = o.timestamp
+    }
+  }
+
+  if (observations.length === 0) return
+
+  // H5: throttle LLM calls — minimum observation count, per-session cap, and
+  // debounce interval. When any gate fires, fall back to heuristic directly.
+  const now = Date.now()
+  const minObservations = 5
+  const { llm } = getSkillLearningConfig()
+  const shouldCallLLM =
+    observations.length >= minObservations &&
+    llmCallsThisSession < llm.maxCallsPerSession &&
+    now - lastLlmCallTimestamp >= llm.cooldownMs
+
+  let candidates
+  if (shouldCallLLM) {
+    llmCallsThisSession++
+    lastLlmCallTimestamp = now
+    candidates = await analyzeWithActiveBackend(observations, { project })
+  } else {
+    // Fall back to the heuristic backend without consuming an LLM call.
+    const { heuristicObserverBackend } = await import('./sessionObserver.js')
+    const result = heuristicObserverBackend.analyze(observations, { project })
+    candidates = Array.isArray(result) ? result : await result
+  }
+
+  for (const candidate of candidates) {
+    await upsertInstinct(createInstinct(candidate), options)
+  }
+
+  await autoEvolveLearnedSkills(options)
+}
+
+export function resetRuntimeObserverForTest(): void {
+  runtimeTurn = 0
+  lastConsumedToolHookTimestamp = ''
+  resetRuntimeLLMBookkeeping()
+}
+
+async function autoEvolveLearnedSkills(options: {
+  project: ReturnType<typeof resolveProjectContext>
+}): Promise<void> {
+  const instincts = await loadInstincts(options)
+  const cwd = process.cwd()
+
+  const skillRoots = [
+    join(cwd, '.claude', 'skills'),
+    join(getClaudeConfigHomeDir(), 'skills'),
+  ]
+  const skillClusters = clusterInstincts(instincts).filter(
+    candidate =>
+      candidate.target === 'skill' &&
+      shouldGenerateSkillFromInstincts(candidate.instincts),
+  )
+  for (const cluster of skillClusters) {
+    const outcome = await generateOrMergeSkillDraft(
+      cluster.instincts,
+      { cwd, scope: cluster.instincts[0]?.scope ?? 'project' },
+      skillRoots,
+    )
+    if (outcome.action === 'append-evidence') continue
+    const draft = outcome.draft
+    if (existsSync(join(draft.outputPath, 'SKILL.md'))) continue
+    const existing = await compareExistingArtifacts('skill', draft, skillRoots)
+    const decision = decideSkillLifecycle(draft, existing)
+    await applySkillLifecycleDecision(decision)
+  }
+
+  const commandDrafts = generateCommandCandidates(instincts, { cwd })
+  for (const draft of commandDrafts) {
+    const roots = [
+      join(cwd, '.claude', 'commands'),
+      join(getClaudeConfigHomeDir(), 'commands'),
+    ]
+    const existing = await compareExistingArtifacts('command', draft, roots)
+    if (existing.length > 0) continue
+    await writeLearnedCommand(draft)
+  }
+
+  const agentDrafts = generateAgentCandidates(instincts, { cwd })
+  for (const draft of agentDrafts) {
+    const roots = [
+      join(cwd, '.claude', 'agents'),
+      join(getClaudeConfigHomeDir(), 'agents'),
+    ]
+    const existing = await compareExistingArtifacts('agent', draft, roots)
+    if (existing.length > 0) continue
+    await writeLearnedAgent(draft)
+  }
+
+  await checkPromotion()
+}
+
+function observationsFromMessages(
+  messages: Message[],
+  project: ReturnType<typeof resolveProjectContext>,
+): StoredSkillObservation[] {
+  const sessionId = RUNTIME_SESSION_ID
+  const base = {
+    sessionId,
+    projectId: project.projectId,
+    projectName: project.projectName,
+    cwd: project.cwd,
+    timestamp: new Date().toISOString(),
+    source: 'hook' as const,
+  }
+
+  return messages.flatMap((message): StoredSkillObservation[] => {
+    // H6: watermark dedup — skip messages already processed in this session.
+    const msgKey = `${sessionId}:${String(message.uuid)}`
+    if (lastProcessedMessageIds.has(msgKey)) return []
+    lastProcessedMessageIds.add(msgKey)
+    // FIFO truncation to keep the set bounded. Drop down to exactly
+    // TRIM_PROCESSED_IDS_TO entries (off-by-one fix: previously left size+1
+    // because the subtraction didn't account for the just-added entry).
+    if (lastProcessedMessageIds.size > MAX_PROCESSED_IDS) {
+      const toDrop = lastProcessedMessageIds.size - TRIM_PROCESSED_IDS_TO
+      const iter = lastProcessedMessageIds.values()
+      for (let i = 0; i < toDrop; i++) {
+        const next = iter.next()
+        if (next.done) break
+        lastProcessedMessageIds.delete(next.value)
+      }
+    }
+
+    if (message.type === 'user') {
+      const toolResults = toolResultsFromContent(message.message?.content)
+      if (toolResults.length > 0) {
+        return toolResults.map(result => ({
+          ...base,
+          id: crypto.randomUUID(),
+          event: 'tool_complete',
+          toolName: result.toolName,
+          toolOutput: result.output,
+          outcome: result.isError ? 'failure' : 'success',
+        }))
+      }
+      const text = textFromContent(message.message?.content)
+      return text.trim()
+        ? [
+            {
+              ...base,
+              id: crypto.randomUUID(),
+              event: 'user_message',
+              messageText: text,
+            },
+          ]
+        : []
+    }
+
+    if (message.type === 'assistant') {
+      const toolUses = toolUsesFromContent(message.message?.content)
+      const text = textFromContent(message.message?.content)
+      return [
+        ...toolUses.map(toolUse => ({
+          ...base,
+          id: crypto.randomUUID(),
+          event: 'tool_start' as const,
+          toolName: toolUse.toolName,
+          toolInput: toolUse.input,
+        })),
+        ...(text.trim()
+          ? [
+              {
+                ...base,
+                id: crypto.randomUUID(),
+                event: 'assistant_message' as const,
+                messageText: text,
+              },
+            ]
+          : []),
+      ]
+    }
+
+    return []
+  })
+}
+
+function textFromContent(content: unknown): string {
+  if (typeof content === 'string') return content
+  if (!Array.isArray(content)) return ''
+  return content
+    .map(block => {
+      if (!block || typeof block !== 'object') return ''
+      const record = block as Record<string, unknown>
+      return typeof record.text === 'string' ? record.text : ''
+    })
+    .filter(Boolean)
+    .join('\n')
+}
+
+function toolUsesFromContent(
+  content: unknown,
+): Array<{ toolName: string; input?: string }> {
+  if (!Array.isArray(content)) return []
+  return content.flatMap(block => {
+    if (!block || typeof block !== 'object') return []
+    const record = block as Record<string, unknown>
+    if (record.type !== 'tool_use') return []
+    return [
+      {
+        toolName: String(record.name ?? 'unknown_tool'),
+        input: stringifyField(record.input),
+      },
+    ]
+  })
+}
+
+function toolResultsFromContent(
+  content: unknown,
+): Array<{ toolName: string; output?: string; isError: boolean }> {
+  if (!Array.isArray(content)) return []
+  return content.flatMap(block => {
+    if (!block || typeof block !== 'object') return []
+    const record = block as Record<string, unknown>
+    if (record.type !== 'tool_result') return []
+    return [
+      {
+        toolName: String(record.name ?? record.tool_name ?? 'unknown_tool'),
+        output: stringifyField(record.content),
+        isError: record.is_error === true,
+      },
+    ]
+  })
+}
--- a/src/services/skillLearning/sessionObserver.ts
+++ b/src/services/skillLearning/sessionObserver.ts
@@ -0,0 +1,296 @@
+import type { StoredSkillObservation } from './observationStore.js'
+import {
+  candidateFromObservation,
+  createInstinct,
+  type InstinctCandidate,
+  type StoredInstinct,
+} from './instinctParser.js'
+import type { InstinctDomain, SkillObservationOutcome } from './types.js'
+import {
+  analyzeWithActiveBackend,
+  getActiveObserverBackend,
+  registerObserverBackend,
+  type ObserverBackend,
+  type ObserverBackendContext,
+} from './observerBackend.js'
+import { llmObserverBackend } from './llmObserverBackend.js'
+
+export type SessionObserverOptions = {
+  minRepeatedSequenceCount?: number
+}
+
+const DEFAULT_MIN_REPEATED_SEQUENCE_COUNT = 2
+
+export function heuristicAnalyze(
+  observations: StoredSkillObservation[],
+  options?: SessionObserverOptions,
+): InstinctCandidate[] {
+  return [
+    ...extractUserCorrections(observations),
+    ...extractToolErrorResolutions(observations),
+    ...extractRepeatedToolSequences(observations, options),
+    ...extractProjectConventions(observations),
+  ]
+}
+
+export const heuristicObserverBackend: ObserverBackend = {
+  name: 'heuristic',
+  analyze(
+    observations: StoredSkillObservation[],
+    _ctx?: ObserverBackendContext,
+  ): InstinctCandidate[] {
+    return heuristicAnalyze(observations)
+  },
+}
+
+registerObserverBackend(heuristicObserverBackend)
+registerObserverBackend(llmObserverBackend)
+
+export function analyzeObservations(
+  observations: StoredSkillObservation[],
+  options?: SessionObserverOptions,
+): StoredInstinct[] {
+  const backend = getActiveObserverBackend()
+  const candidates =
+    backend.name === 'heuristic'
+      ? heuristicAnalyze(observations, options)
+      : ensureSyncCandidates(backend.analyze(observations))
+  return candidates.map(candidate => createInstinct(candidate))
+}
+
+export async function analyzeObservationsAsync(
+  observations: StoredSkillObservation[],
+  ctx?: ObserverBackendContext,
+): Promise<StoredInstinct[]> {
+  const candidates = await analyzeWithActiveBackend(observations, ctx)
+  return candidates.map(candidate => createInstinct(candidate))
+}
+
+export const observeSession = analyzeObservations
+
+function ensureSyncCandidates(
+  result: InstinctCandidate[] | Promise<InstinctCandidate[]>,
+): InstinctCandidate[] {
+  if (Array.isArray(result)) return result
+  throw new Error(
+    'Active observer backend returned a Promise; use analyzeObservationsAsync instead',
+  )
+}
+
+function extractUserCorrections(
+  observations: StoredSkillObservation[],
+): InstinctCandidate[] {
+  return observations.flatMap((observation, index) => {
+    if (observation.event !== 'user_message' || !observation.messageText) {
+      return []
+    }
+
+    const text = observation.messageText.trim()
+    const correction = parseCorrection(text)
+    if (!correction) return []
+
+    const base = candidateFromObservation(observation)
+    return [
+      {
+        ...base,
+        trigger: correction.trigger,
+        action: correction.action,
+        confidence: 0.7,
+        domain: inferDomain(text),
+        source: 'session-observation',
+        scope: 'project',
+        evidence: [text],
+        evidenceOutcome: recentOutcomeBefore(observations, index),
+        observationIds: [observation.id],
+      },
+    ]
+  })
+}
+
+function extractToolErrorResolutions(
+  observations: StoredSkillObservation[],
+): InstinctCandidate[] {
+  const candidates: InstinctCandidate[] = []
+
+  for (let i = 0; i < observations.length; i++) {
+    const current = observations[i]
+    if (current.event !== 'tool_complete' || current.outcome !== 'failure') {
+      continue
+    }
+
+    const laterSuccess = observations.slice(i + 1, i + 6).find(next => {
+      return (
+        next.event === 'tool_complete' &&
+        next.outcome === 'success' &&
+        next.toolName === current.toolName
+      )
+    })
+
+    if (!laterSuccess || !current.toolName) continue
+
+    candidates.push({
+      ...candidateFromObservation(current),
+      trigger: `When ${current.toolName} fails during this project`,
+      action: `Use the follow-up successful ${current.toolName} invocation as the resolution pattern before retrying blindly.`,
+      confidence: 0.5,
+      domain: 'debugging',
+      source: 'session-observation',
+      scope: 'project',
+      evidence: [
+        current.toolOutput ?? `${current.toolName} failed`,
+        laterSuccess.toolOutput ?? `${laterSuccess.toolName} succeeded`,
+      ],
+      evidenceOutcome: 'success',
+      observationIds: [current.id, laterSuccess.id],
+    })
+  }
+
+  return candidates
+}
+
+function extractRepeatedToolSequences(
+  observations: StoredSkillObservation[],
+  options?: SessionObserverOptions,
+): InstinctCandidate[] {
+  const minCount =
+    options?.minRepeatedSequenceCount ?? DEFAULT_MIN_REPEATED_SEQUENCE_COUNT
+  const toolEvents = observations.filter(
+    observation =>
+      observation.event === 'tool_start' ||
+      observation.event === 'tool_complete',
+  )
+  const names = toolEvents.map(observation => observation.toolName ?? '')
+  const sequence = ['Grep', 'Read', 'Edit']
+  const matchedIds: string[] = []
+  let count = 0
+
+  for (let i = 0; i <= names.length - sequence.length; i++) {
+    if (sequence.every((name, offset) => names[i + offset] === name)) {
+      count++
+      matchedIds.push(
+        ...toolEvents.slice(i, i + sequence.length).map(o => o.id),
+      )
+    }
+  }
+
+  if (count < minCount) return []
+
+  const evidence = `Observed ${count} repeated Grep -> Read -> Edit workflow sequences.`
+  const first = toolEvents.find(event => matchedIds.includes(event.id))
+  const lastMatchedId = matchedIds[matchedIds.length - 1]
+  const lastEvent = toolEvents.find(event => event.id === lastMatchedId)
+  const sequenceOutcome =
+    lastEvent?.event === 'tool_complete' ? lastEvent.outcome : undefined
+
+  return [
+    {
+      ...candidateFromObservation(first ?? observations[0]),
+      trigger: 'When changing code in this project',
+      action:
+        'Prefer the Grep -> Read -> Edit workflow: locate symbols, inspect context, then apply the smallest edit.',
+      confidence: count >= 3 ? 0.65 : 0.5,
+      domain: 'workflow',
+      source: 'session-observation',
+      scope: 'project',
+      evidence: [evidence],
+      evidenceOutcome: normalizeOutcome(sequenceOutcome),
+      observationIds: Array.from(new Set(matchedIds)),
+    },
+  ]
+}
+
+function extractProjectConventions(
+  observations: StoredSkillObservation[],
+): InstinctCandidate[] {
+  return observations.flatMap((observation, index) => {
+    if (observation.event !== 'user_message' || !observation.messageText) {
+      return []
+    }
+    const text = observation.messageText.trim()
+    if (!/(项目约定|规范|必须|convention|always|must)/i.test(text)) {
+      return []
+    }
+
+    return [
+      {
+        ...candidateFromObservation(observation),
+        trigger: 'When working in this project',
+        action: `Follow the project convention: ${text}`,
+        // Single occurrence gets 0.4 so it stays below the 0.75 promotion
+        // threshold. Promotion requires corroborating high-confidence evidence
+        // (e.g. two 0.4s still average 0.4 — other signals must raise the mean).
+        confidence: 0.4,
+        domain: 'project',
+        source: 'session-observation',
+        scope: 'project',
+        evidence: [text],
+        evidenceOutcome: recentOutcomeBefore(observations, index),
+        observationIds: [observation.id],
+      },
+    ]
+  })
+}
+
+function recentOutcomeBefore(
+  observations: StoredSkillObservation[],
+  index: number,
+): SkillObservationOutcome | undefined {
+  for (let i = index - 1; i >= 0; i--) {
+    const prior = observations[i]
+    if (prior.event !== 'tool_complete') continue
+    return normalizeOutcome(prior.outcome)
+  }
+  return undefined
+}
+
+function normalizeOutcome(
+  outcome: StoredSkillObservation['outcome'],
+): SkillObservationOutcome | undefined {
+  if (outcome === 'success' || outcome === 'failure' || outcome === 'unknown') {
+    return outcome
+  }
+  return undefined
+}
+
+function parseCorrection(
+  text: string,
+): { trigger: string; action: string } | null {
+  const noUsePattern =
+    /(?:不要|别|不应(?:该)?|不要再)\s*(?<avoid>[^，,。.;；]+)[，,\s]*(?:用|使用|改用|应该用|要用)\s*(?<prefer>[^，,。.;；]+)/i
+  const englishPattern =
+    /(?:do not|don't|avoid)\s+(?<avoid>[^,.;]+)[,;\s]+(?:use|prefer)\s+(?<prefer>[^,.;]+)/i
+  const shouldPattern =
+    /(?:你应该|应该先|must|should)\s*(?<prefer>[^，,。.;；]+)/i
+
+  const noUse = text.match(noUsePattern) ?? text.match(englishPattern)
+  if (noUse?.groups) {
+    const avoid = noUse.groups.avoid.trim()
+    const prefer = noUse.groups.prefer.trim()
+    return {
+      trigger: `When choosing between ${avoid} and ${prefer}`,
+      action: `Prefer ${prefer}; avoid ${avoid}.`,
+    }
+  }
+
+  const should = text.match(shouldPattern)
+  if (should?.groups) {
+    const prefer = should.groups.prefer.trim()
+    return {
+      trigger: 'When this user gives a corrective instruction',
+      action: `Prefer this corrected action: ${prefer}.`,
+    }
+  }
+
+  return null
+}
+
+function inferDomain(text: string): InstinctDomain {
+  const lowered = text.toLowerCase()
+  if (/test|mock|testing-library|vitest|jest|bun test/.test(lowered)) {
+    return 'testing'
+  }
+  if (/git|commit|branch/.test(lowered)) return 'git'
+  if (/security|secret|token|password/.test(lowered)) return 'security'
+  if (/style|format|lint|naming/.test(lowered)) return 'code-style'
+  return 'project'
+}
--- a/src/services/skillLearning/skillGapStore.ts
+++ b/src/services/skillLearning/skillGapStore.ts
@@ -0,0 +1,499 @@
+import { existsSync } from 'node:fs'
+import { mkdir, readFile, rename, writeFile } from 'node:fs/promises'
+import { createHash } from 'node:crypto'
+import { dirname, join } from 'node:path'
+import type { SearchResult } from '../skillSearch/localSearch.js'
+import { createInstinct, type StoredInstinct } from './instinctParser.js'
+import {
+  getProjectStorageDir,
+  resolveProjectContext,
+} from './projectContext.js'
+import { generateSkillDraft, writeLearnedSkill } from './skillGenerator.js'
+import type {
+  InstinctDomain,
+  SkillGapStatus,
+  SkillLearningProjectContext,
+} from './types.js'
+
+export type SkillGapRecommendation = Pick<
+  SearchResult,
+  'name' | 'description' | 'score'
+>
+
+export type SkillGapMaterialization =
+  | {
+      type: 'draft'
+      name: string
+      skillPath: string
+    }
+  | {
+      type: 'active'
+      name: string
+      skillPath: string
+    }
+
+export type SkillGapRecord = {
+  key: string
+  prompt: string
+  count: number
+  draftHits: number
+  // Session IDs that have already contributed a draft hit for this gap —
+  // prevents one session from inflating `draftHits` beyond 1 and flipping the
+  // `draftHits >= 2` active-promotion gate by itself.
+  draftHitSessions: string[]
+  status: SkillGapStatus
+  sessionId: string
+  cwd: string
+  projectId: string
+  projectName: string
+  recommendations: SkillGapRecommendation[]
+  createdAt: string
+  updatedAt: string
+  draft?: SkillGapMaterialization
+  active?: SkillGapMaterialization
+}
+
+// P0-2 hook: when outcome-aware observation lands, augment this with a
+// lookup into observationStore for a matching `outcome: 'success'` tool_complete
+// observation keyed by (sessionId, gap.key). Until then, draft promotion uses
+// count/signal only.
+const DRAFT_PROMOTION_COUNT = 2
+const ACTIVE_PROMOTION_COUNT = 4
+const ACTIVE_PROMOTION_DRAFT_HITS = 2
+
+type SkillGapState = {
+  version: 1
+  gaps: Record<string, SkillGapRecord>
+}
+
+export type RecordSkillGapOptions = {
+  prompt: string
+  cwd?: string
+  sessionId?: string
+  recommendations?: SearchResult[]
+  project?: SkillLearningProjectContext
+  rootDir?: string
+}
+
+export async function recordSkillGap(
+  options: RecordSkillGapOptions,
+): Promise<SkillGapRecord> {
+  const prompt = options.prompt.trim()
+  if (!prompt) {
+    throw new Error('Cannot record an empty skill gap')
+  }
+
+  const project = options.project ?? resolveProjectContext(options.cwd)
+  const state = await readSkillGapState(project, options.rootDir)
+  const key = buildSkillGapKey(prompt)
+  const now = new Date().toISOString()
+  const existing = state.gaps[key]
+
+  const gap: SkillGapRecord = {
+    key,
+    prompt,
+    count: (existing?.count ?? 0) + 1,
+    draftHits: existing?.draftHits ?? 0,
+    draftHitSessions: existing?.draftHitSessions ?? [],
+    status: existing?.status ?? 'pending',
+    sessionId: options.sessionId ?? 'unknown-session',
+    cwd: options.cwd ?? project.cwd,
+    projectId: project.projectId,
+    projectName: project.projectName,
+    recommendations: (options.recommendations ?? []).slice(0, 5).map(r => ({
+      name: r.name,
+      description: r.description,
+      score: r.score,
+    })),
+    createdAt: existing?.createdAt ?? now,
+    updatedAt: now,
+    draft: existing?.draft,
+    active: existing?.active,
+  }
+
+  if (gap.status === 'rejected') {
+    state.gaps[key] = gap
+    await writeSkillGapState(project, state, options.rootDir)
+    return gap
+  }
+
+  if (!gap.draft && shouldPromoteToDraft(gap)) {
+    gap.draft = await writeSkillGapDraft(gap, project)
+    gap.status = 'draft'
+    await clearRuntimeSkillCaches()
+  }
+
+  if (gap.draft && !gap.active && shouldPromoteToActive(gap)) {
+    gap.active = await writeActiveSkillForGap(gap, project)
+    gap.status = 'active'
+    await clearRuntimeSkillCaches()
+  }
+
+  state.gaps[key] = gap
+  await writeSkillGapState(project, state, options.rootDir)
+  return gap
+}
+
+export async function readSkillGaps(
+  project = resolveProjectContext(),
+  rootDir?: string,
+): Promise<SkillGapRecord[]> {
+  const state = await readSkillGapState(project, rootDir)
+  return Object.values(state.gaps).sort((a, b) => a.key.localeCompare(b.key))
+}
+
+export async function findGapKeyByDraftPath(
+  draftPath: string,
+  project = resolveProjectContext(),
+  rootDir?: string,
+): Promise<string | undefined> {
+  const state = await readSkillGapState(project, rootDir)
+  for (const gap of Object.values(state.gaps)) {
+    if (gap.draft?.skillPath === draftPath) return gap.key
+  }
+  return undefined
+}
+
+export async function recordDraftHit(
+  key: string,
+  project = resolveProjectContext(),
+  rootDir?: string,
+  sessionId = 'unknown-session',
+): Promise<SkillGapRecord | undefined> {
+  const state = await readSkillGapState(project, rootDir)
+  const gap = state.gaps[key]
+  if (!gap || !gap.draft || gap.active) return gap
+  // One draft hit per session: a single actor reloading the same draft
+  // repeatedly must not flip the draftHits>=2 gate.
+  const existingSessions = gap.draftHitSessions ?? []
+  if (existingSessions.includes(sessionId)) return gap
+  const now = new Date().toISOString()
+  const updated: SkillGapRecord = {
+    ...gap,
+    draftHits: gap.draftHits + 1,
+    draftHitSessions: [...existingSessions, sessionId],
+    updatedAt: now,
+  }
+
+  if (shouldPromoteToActive(updated)) {
+    updated.active = await writeActiveSkillForGap(updated, project)
+    updated.status = 'active'
+    await clearRuntimeSkillCaches()
+  }
+
+  state.gaps[key] = updated
+  await writeSkillGapState(project, state, rootDir)
+  return updated
+}
+
+export async function promoteGapToDraft(
+  key: string,
+  project = resolveProjectContext(),
+  rootDir?: string,
+): Promise<SkillGapRecord | undefined> {
+  const state = await readSkillGapState(project, rootDir)
+  const gap = state.gaps[key]
+  if (!gap) return undefined
+  if (gap.status === 'rejected') return gap
+  if (gap.draft) return gap
+  const updated: SkillGapRecord = {
+    ...gap,
+    draft: await writeSkillGapDraft(gap, project),
+    status: 'draft',
+    updatedAt: new Date().toISOString(),
+  }
+  state.gaps[key] = updated
+  await writeSkillGapState(project, state, rootDir)
+  await clearRuntimeSkillCaches()
+  return updated
+}
+
+export async function rejectSkillGap(
+  key: string,
+  project = resolveProjectContext(),
+  rootDir?: string,
+): Promise<SkillGapRecord | undefined> {
+  const state = await readSkillGapState(project, rootDir)
+  const gap = state.gaps[key]
+  if (!gap) return undefined
+  const updated: SkillGapRecord = {
+    ...gap,
+    status: 'rejected',
+    updatedAt: new Date().toISOString(),
+  }
+  state.gaps[key] = updated
+  await writeSkillGapState(project, state, rootDir)
+  return updated
+}
+
+export function shouldPromoteToDraft(gap: SkillGapRecord): boolean {
+  // Draft promotion now requires repeated occurrence. The legacy
+  // `isStrongReusableSignal` path was the cause of single-utterance Chinese
+  // exhortations being promoted straight to active — P0-2 will reintroduce
+  // outcome-aware signal once the observation layer supplies it.
+  return gap.count >= DRAFT_PROMOTION_COUNT
+}
+
+export function shouldPromoteToActive(gap: SkillGapRecord): boolean {
+  if (!gap.draft) return false
+  return (
+    gap.count >= ACTIVE_PROMOTION_COUNT ||
+    gap.draftHits >= ACTIVE_PROMOTION_DRAFT_HITS
+  )
+}
+
+async function writeSkillGapDraft(
+  gap: SkillGapRecord,
+  project: SkillLearningProjectContext,
+): Promise<SkillGapMaterialization> {
+  const instinct = createGapInstinct(gap, 'pending')
+  const draftsRoot = join(
+    project.projectRoot ?? project.cwd,
+    '.claude',
+    'skills',
+    '.drafts',
+  )
+  const draft = generateSkillDraft([instinct], {
+    cwd: project.projectRoot ?? project.cwd,
+    outputRoot: draftsRoot,
+    scope: 'project',
+    name: `draft-${buildNameFragment(gap.prompt)}`,
+    description:
+      'Draft learned skill candidate. Promote after repeated evidence or explicit user correction.',
+  })
+  const skillFile = join(draft.outputPath, 'SKILL.md')
+  if (!existsSync(skillFile)) {
+    await writeLearnedSkill({
+      ...draft,
+      content:
+        draft.content +
+        '\n## Promotion Rule\n\nDo not move this draft into active skills until the same gap repeats or the user explicitly confirms this should become reusable.\n',
+    })
+  }
+  return { type: 'draft', name: draft.name, skillPath: skillFile }
+}
+
+async function writeActiveSkillForGap(
+  gap: SkillGapRecord,
+  project: SkillLearningProjectContext,
+): Promise<SkillGapMaterialization> {
+  const instinct = createGapInstinct(gap, 'active')
+  const draft = generateSkillDraft([instinct], {
+    cwd: project.projectRoot ?? project.cwd,
+    scope: 'project',
+    name: buildNameFragment(gap.prompt),
+    description: buildGapAction(gap.prompt),
+  })
+  const skillFile = join(draft.outputPath, 'SKILL.md')
+  if (!existsSync(skillFile)) {
+    await writeLearnedSkill(draft)
+  }
+  return { type: 'active', name: draft.name, skillPath: skillFile }
+}
+
+function createGapInstinct(
+  gap: SkillGapRecord,
+  status: StoredInstinct['status'],
+): StoredInstinct {
+  return createInstinct({
+    trigger: `When the user asks for ${summarize(gap.prompt, 120)}`,
+    action: buildGapAction(gap.prompt),
+    confidence: status === 'active' ? 0.82 : 0.55,
+    domain: inferDomain(gap.prompt),
+    source: 'session-observation',
+    scope: 'project',
+    projectId: gap.projectId,
+    projectName: gap.projectName,
+    evidence: [
+      `Skill gap prompt: ${summarize(gap.prompt, 180)}`,
+      `No high-confidence active skill was auto-loaded.`,
+      `Observed ${gap.count} time(s).`,
+    ],
+    status,
+  })
+}
+
+function buildGapAction(prompt: string): string {
+  if (
+    /feature\s*\(|feature flag|flag_name|stub|no-op|noop|最小实现/i.test(prompt)
+  ) {
+    return 'Audit feature flags by scanning feature() call sites, excluding generated/dependency noise, classifying each candidate as stub, shell, MVP, or thin-toggle, and writing an evidence-backed document.'
+  }
+  if (/skill|技能|学习|进化|evolve|learning/i.test(prompt)) {
+    return 'Run skill discovery first; auto-load only high-confidence matching skills; record a skill gap when none match; promote repeated or corrected gaps into learned skills.'
+  }
+  if (/test|测试|stub|调用链|参数/i.test(prompt)) {
+    return 'Infer tests from existing files, parameters, exports, and call chains before simplifying mocks or inventing behavior.'
+  }
+  return `Reuse the workflow learned from this prompt: ${summarize(prompt, 180)}.`
+}
+
+function inferDomain(prompt: string): InstinctDomain {
+  const text = prompt.toLowerCase()
+  if (/test|测试|stub|fixture|断言/.test(text)) return 'testing'
+  if (/error|bug|fix|失败|错误|修复|debug/.test(text)) return 'debugging'
+  if (/security|安全|漏洞|secret|token/.test(text)) return 'security'
+  if (/git|commit|branch|pr\b/.test(text)) return 'git'
+  if (/style|lint|format|命名|规范/.test(text)) return 'code-style'
+  return 'workflow'
+}
+
+async function readSkillGapState(
+  project: SkillLearningProjectContext,
+  rootDir?: string,
+): Promise<SkillGapState> {
+  const path = getSkillGapStatePath(project, rootDir)
+  let raw: string
+  try {
+    raw = await readFile(path, 'utf8')
+  } catch (error) {
+    // Only treat "file doesn't exist yet" as empty state. Every other error
+    // (EACCES, EIO, disk full, etc.) must throw — swallowing them here would
+    // let a subsequent write persist {} and zero out all gap records.
+    if ((error as NodeJS.ErrnoException).code === 'ENOENT') {
+      return { version: 1, gaps: {} }
+    }
+    throw error
+  }
+  try {
+    return migrateLegacyGapState(JSON.parse(raw) as SkillGapState)
+  } catch {
+    // Corrupt/truncated JSON — don't silently reset. Backup and start fresh,
+    // so the crash isn't masked and the data can be recovered manually.
+    const backup = `${path}.corrupt-${Date.now()}`
+    try {
+      await writeFile(backup, raw, 'utf8')
+    } catch {
+      /* best effort */
+    }
+    return { version: 1, gaps: {} }
+  }
+}
+
+function migrateLegacyGapState(state: SkillGapState): SkillGapState {
+  const migrated: Record<string, SkillGapRecord> = {}
+  for (const [key, record] of Object.entries(state.gaps ?? {})) {
+    const legacy = record as Partial<SkillGapRecord> & {
+      status?: unknown
+    }
+    const draftHits =
+      typeof legacy.draftHits === 'number' && Number.isFinite(legacy.draftHits)
+        ? legacy.draftHits
+        : 0
+    const count = typeof legacy.count === 'number' ? legacy.count : 1
+    const normalizedStatus = normalizeLegacyStatus(legacy.status)
+    const hasDraftFile = Boolean(legacy.draft)
+    const hasActiveFile = Boolean(legacy.active)
+
+    let status: SkillGapStatus = normalizedStatus
+    if (status === 'draft' && count < DRAFT_PROMOTION_COUNT && !hasDraftFile) {
+      // Legacy first-call-writes-draft artifact with no file on disk yet.
+      status = 'pending'
+    }
+    if (status === 'active' && !hasActiveFile) {
+      status = hasDraftFile ? 'draft' : 'pending'
+    }
+
+    const draftHitSessions = Array.isArray(legacy.draftHitSessions)
+      ? legacy.draftHitSessions.filter(
+          (session): session is string => typeof session === 'string',
+        )
+      : []
+    migrated[key] = {
+      ...(record as SkillGapRecord),
+      count,
+      draftHits,
+      draftHitSessions,
+      status,
+    }
+  }
+  return { version: 1, gaps: migrated }
+}
+
+function normalizeLegacyStatus(value: unknown): SkillGapStatus {
+  if (
+    value === 'pending' ||
+    value === 'draft' ||
+    value === 'active' ||
+    value === 'rejected'
+  ) {
+    return value
+  }
+  return 'pending'
+}
+
+async function writeSkillGapState(
+  project: SkillLearningProjectContext,
+  state: SkillGapState,
+  rootDir?: string,
+): Promise<void> {
+  const path = getSkillGapStatePath(project, rootDir)
+  await mkdir(dirname(path), { recursive: true })
+  // Atomic write: temp + rename. A direct writeFile leaves a truncated file
+  // on crash mid-write; combined with the (now strict) readSkillGapState,
+  // that would lose gap records.
+  const tmpPath = `${path}.tmp-${process.pid}-${Date.now()}`
+  await writeFile(tmpPath, `${JSON.stringify(state, null, 2)}\n`, 'utf8')
+  await rename(tmpPath, path)
+}
+
+function getSkillGapStatePath(
+  project: SkillLearningProjectContext,
+  rootDir?: string,
+): string {
+  const base = rootDir
+    ? project.projectId === 'global'
+      ? join(rootDir, 'global')
+      : join(rootDir, 'projects', project.projectId)
+    : getProjectStorageDir(project.projectId)
+  return join(base, 'skill-gaps.json')
+}
+
+function buildSkillGapKey(prompt: string): string {
+  return `${buildNameFragment(prompt)}-${hash(prompt).slice(0, 8)}`
+}
+
+function buildNameFragment(prompt: string): string {
+  const mapped = prompt
+    .replaceAll('技能', ' skill ')
+    .replaceAll('学习', ' learning ')
+    .replaceAll('进化', ' evolution ')
+    .replaceAll('测试', ' testing ')
+    .replaceAll('最小实现', ' minimal implementation ')
+    .toLowerCase()
+  const stop = new Set([
+    'the',
+    'and',
+    'for',
+    'with',
+    'this',
+    'that',
+    'user',
+    'about',
+    'feature',
+    'flag',
+    'name',
+  ])
+  const words = (mapped.match(/[a-z0-9][a-z0-9_-]{2,}/g) ?? [])
+    .filter(word => !stop.has(word))
+    .slice(0, 5)
+  const value = words.join('-') || 'learned-gap'
+  return value.slice(0, 54).replace(/-+$/g, '')
+}
+
+function summarize(value: string, max: number): string {
+  return value.replace(/\s+/g, ' ').trim().slice(0, max)
+}
+
+function hash(value: string): string {
+  return createHash('sha1').update(value).digest('hex')
+}
+
+async function clearRuntimeSkillCaches(): Promise<void> {
+  try {
+    const { clearCommandsCache } = await import('../../commands.js')
+    clearCommandsCache()
+  } catch {
+    // Best effort only; generated skill files are still available next process.
+  }
+}
--- a/src/services/skillLearning/skillGenerator.ts
+++ b/src/services/skillLearning/skillGenerator.ts
@@ -0,0 +1,206 @@
+import { mkdir, readFile, writeFile } from 'node:fs/promises'
+import { join } from 'node:path'
+import { getClaudeConfigHomeDir } from '../../utils/envUtils.js'
+import { clearSkillIndexCache } from '../skillSearch/localSearch.js'
+import type { Instinct } from './instinctParser.js'
+import { buildLearnedSkillName, normalizeSkillName } from './learningPolicy.js'
+import {
+  compareExistingArtifacts,
+  scoreArtifactOverlap,
+  type ExistingSkill,
+} from './skillLifecycle.js'
+import type { LearnedSkillDraft, SkillLearningScope } from './types.js'
+
+export const DUPLICATE_SKILL_OVERLAP_THRESHOLD = 0.8
+
+export type SkillGeneratorOptions = {
+  cwd?: string
+  globalSkillsDir?: string
+  outputRoot?: string
+  name?: string
+  description?: string
+}
+
+export function generateSkillDraft(
+  instincts: Instinct[],
+  options?: SkillGeneratorOptions & { scope?: SkillLearningScope },
+): LearnedSkillDraft {
+  if (instincts.length === 0) {
+    throw new Error('Cannot generate a skill draft without instincts')
+  }
+
+  const scope = options?.scope ?? instincts[0]?.scope ?? 'project'
+  const name = options?.name
+    ? normalizeSkillName(options.name)
+    : buildSkillName(instincts)
+  const confidence =
+    instincts.reduce((sum, instinct) => sum + instinct.confidence, 0) /
+    instincts.length
+  const description = options?.description ?? buildDescription(instincts)
+  const outputPath = getLearnedSkillPath(name, scope, options)
+  const content = buildSkillContent({
+    name,
+    description,
+    confidence,
+    instincts,
+  })
+
+  return {
+    name,
+    description,
+    scope,
+    sourceInstinctIds: instincts.map(instinct => instinct.id),
+    confidence: Number(confidence.toFixed(2)),
+    content,
+    outputPath,
+  }
+}
+
+export type SkillDedupOutcome =
+  | { action: 'create'; draft: LearnedSkillDraft }
+  | {
+      action: 'append-evidence'
+      target: ExistingSkill
+      overlap: number
+      appendedPath: string
+    }
+
+export async function generateOrMergeSkillDraft(
+  instincts: Instinct[],
+  options: SkillGeneratorOptions & { scope?: SkillLearningScope },
+  existingRoots: string[],
+): Promise<SkillDedupOutcome> {
+  const draft = generateSkillDraft(instincts, options)
+  const candidates = await compareExistingArtifacts(
+    'skill',
+    draft,
+    existingRoots,
+  )
+  for (const candidate of candidates) {
+    const overlap = scoreArtifactOverlap(draft, candidate)
+    if (overlap >= DUPLICATE_SKILL_OVERLAP_THRESHOLD) {
+      const appendedPath = await appendInstinctEvidenceToSkill(
+        candidate,
+        instincts,
+      )
+      return {
+        action: 'append-evidence',
+        target: candidate,
+        overlap,
+        appendedPath,
+      }
+    }
+  }
+  return { action: 'create', draft }
+}
+
+export async function appendInstinctEvidenceToSkill(
+  target: ExistingSkill,
+  instincts: Instinct[],
+): Promise<string> {
+  const existing = await readFile(target.path, 'utf8').catch(
+    () => target.content,
+  )
+  const now = new Date().toISOString()
+  const block = [
+    '',
+    `## Learned evidence (${now})`,
+    '',
+    ...instincts.flatMap(instinct =>
+      instinct.evidence.map(evidence => `- ${evidence}`),
+    ),
+    '',
+  ].join('\n')
+  const merged = existing.endsWith('\n')
+    ? existing + block
+    : `${existing}\n${block}`
+  await writeFile(target.path, merged, 'utf8')
+  clearSkillIndexCache()
+  return target.path
+}
+
+export async function writeLearnedSkill(
+  draft: LearnedSkillDraft,
+): Promise<string> {
+  await mkdir(draft.outputPath, { recursive: true })
+  const filePath = join(draft.outputPath, 'SKILL.md')
+  await writeFile(filePath, draft.content, 'utf8')
+  clearSkillIndexCache()
+  try {
+    const { clearCommandsCache } = await import('../../commands.js')
+    clearCommandsCache()
+  } catch {
+    // Best effort: the next process will see the generated skill even if the
+    // in-process command cache cannot be cleared due to import timing.
+  }
+  return filePath
+}
+
+export function getLearnedSkillPath(
+  name: string,
+  scope: SkillLearningScope,
+  options?: SkillGeneratorOptions,
+): string {
+  if (options?.outputRoot) return join(options.outputRoot, name)
+  if (scope === 'project') {
+    return join(options?.cwd ?? process.cwd(), '.claude', 'skills', name)
+  }
+  return join(
+    options?.globalSkillsDir ?? join(getClaudeConfigHomeDir(), 'skills'),
+    name,
+  )
+}
+
+function buildSkillName(instincts: Instinct[]): string {
+  return buildLearnedSkillName(instincts)
+}
+
+function buildDescription(instincts: Instinct[]): string {
+  const action = instincts[0]?.action ?? 'Apply a learned project pattern'
+  const short = action.replace(/\s+/g, ' ').slice(0, 120)
+  return short.length > 0 ? short : 'Apply learned project patterns'
+}
+
+function buildSkillContent(params: {
+  name: string
+  description: string
+  confidence: number
+  instincts: Instinct[]
+}): string {
+  const { name, description, confidence, instincts } = params
+  const lines = [
+    '---',
+    `name: ${name}`,
+    `description: ${JSON.stringify(description)}`,
+    'origin: skill-learning',
+    `confidence: ${Number(confidence.toFixed(2))}`,
+    `evolved_from: [${instincts.map(instinct => JSON.stringify(instinct.id)).join(', ')}]`,
+    '---',
+    '',
+    `# ${titleCase(name)}`,
+    '',
+    '## Trigger',
+    '',
+    instincts.map(instinct => `- ${instinct.trigger}`).join('\n'),
+    '',
+    '## Action',
+    '',
+    instincts.map(instinct => `- ${instinct.action}`).join('\n'),
+    '',
+    '## Evidence',
+    '',
+    instincts
+      .flatMap(instinct => instinct.evidence.map(evidence => `- ${evidence}`))
+      .join('\n'),
+    '',
+  ]
+  return lines.join('\n')
+}
+
+function titleCase(value: string): string {
+  return value
+    .split('-')
+    .filter(Boolean)
+    .map(part => part[0]?.toUpperCase() + part.slice(1))
+    .join(' ')
+}
--- a/src/services/skillLearning/skillLifecycle.ts
+++ b/src/services/skillLearning/skillLifecycle.ts
@@ -0,0 +1,496 @@
+import {
+  mkdir,
+  readdir,
+  readFile,
+  rename,
+  rm,
+  writeFile,
+} from 'node:fs/promises'
+import { existsSync } from 'node:fs'
+import { basename, dirname, join } from 'node:path'
+import { clearSkillIndexCache } from '../skillSearch/localSearch.js'
+import type { LearnedSkillDraft } from './types.js'
+import { writeLearnedSkill } from './skillGenerator.js'
+
+export type ExistingSkill = {
+  name: string
+  path: string
+  description: string
+  content: string
+  confidence?: number
+  status?: 'active' | 'superseded' | 'archived' | 'deleted'
+  referencedBy?: string[]
+  safeToDelete?: boolean
+  quality?: 'low' | 'medium' | 'high'
+}
+
+export type SkillLifecycleDecision =
+  | { type: 'create'; draft: LearnedSkillDraft; reason: string }
+  | { type: 'merge'; targetSkill: ExistingSkill; patch: string; reason: string }
+  | {
+      type: 'replace'
+      targetSkill: ExistingSkill
+      draft: LearnedSkillDraft
+      reason: string
+      hardDelete?: boolean
+    }
+  | { type: 'archive'; targetSkill: ExistingSkill; reason: string }
+  | {
+      type: 'delete'
+      targetSkill: ExistingSkill
+      reason: string
+      confirmed?: boolean
+    }
+
+export type ReplacementManifest = {
+  oldSkill: string
+  oldPath: string
+  newSkill?: string
+  newPath?: string
+  action: 'archive' | 'delete'
+  reason: string
+  replacedAt: string
+  recoverable: boolean
+}
+
+export type SkillLifecycleOptions = {
+  allowHardDelete?: boolean
+  archiveRoot?: string
+  manifestRoot?: string
+  now?: Date
+}
+
+export type LearnedArtifactKind = 'skill' | 'command' | 'agent'
+
+export type ArtifactDraft = {
+  name: string
+  description: string
+  content: string
+}
+
+export async function compareExistingArtifacts(
+  kind: LearnedArtifactKind,
+  draft: ArtifactDraft,
+  rootsOrSkills: string[] | ExistingSkill[],
+): Promise<ExistingSkill[]> {
+  const existing =
+    rootsOrSkills.length > 0 && typeof rootsOrSkills[0] === 'string'
+      ? await loadExistingArtifacts(kind, rootsOrSkills as string[])
+      : (rootsOrSkills as ExistingSkill[])
+  const draftTerms = terms(
+    `${draft.name} ${draft.description} ${draft.content}`,
+  )
+  return existing
+    .map(skill => ({
+      skill,
+      score: overlapScore(
+        draftTerms,
+        terms(`${skill.name} ${skill.description} ${skill.content}`),
+      ),
+    }))
+    .filter(item => item.score >= 0.18)
+    .sort((a, b) => b.score - a.score)
+    .map(item => item.skill)
+}
+
+export async function compareExistingSkills(
+  draft: LearnedSkillDraft,
+  rootsOrSkills: string[] | ExistingSkill[],
+): Promise<ExistingSkill[]> {
+  return compareExistingArtifacts('skill', draft, rootsOrSkills)
+}
+
+export async function loadExistingArtifacts(
+  kind: LearnedArtifactKind,
+  roots: string[],
+): Promise<ExistingSkill[]> {
+  if (kind === 'skill') return loadExistingSkills(roots)
+  const results: ExistingSkill[] = []
+  for (const root of roots) {
+    if (!existsSync(root)) continue
+    await collectArtifactFiles(root, results)
+  }
+  return results
+}
+
+export function decideSkillLifecycle(
+  draft: LearnedSkillDraft,
+  existingSkills: ExistingSkill[],
+  options: Pick<SkillLifecycleOptions, 'allowHardDelete'> = {},
+): SkillLifecycleDecision {
+  const deletable = existingSkills.find(skill => isSafeToHardDelete(skill))
+  if (options.allowHardDelete && deletable) {
+    return {
+      type: 'delete',
+      targetSkill: deletable,
+      reason:
+        'Existing skill is low quality, unreferenced, and safe to delete.',
+      confirmed: true,
+    }
+  }
+
+  const target = existingSkills[0]
+  if (!target) {
+    return {
+      type: 'create',
+      draft,
+      reason: 'No overlapping active skill found.',
+    }
+  }
+
+  const draftTerms = terms(
+    `${draft.name} ${draft.description} ${draft.content}`,
+  )
+  const existingTerms = terms(
+    `${target.name} ${target.description} ${target.content}`,
+  )
+  const score = overlapScore(draftTerms, existingTerms)
+
+  if (
+    score >= 0.72 &&
+    draft.confidence >= 0.75 &&
+    shouldReplaceSkill(draft, target)
+  ) {
+    return {
+      type: 'replace',
+      targetSkill: target,
+      draft,
+      reason: `New learned skill has high overlap (${score.toFixed(2)}) and higher confidence.`,
+    }
+  }
+
+  if (score >= 0.35) {
+    return {
+      type: 'merge',
+      targetSkill: target,
+      patch: buildMergePatch(draft),
+      reason: `Existing skill overlaps with the learned pattern (${score.toFixed(2)}).`,
+    }
+  }
+
+  return { type: 'create', draft, reason: 'Overlap is too low to merge.' }
+}
+
+export async function applySkillLifecycleDecision(
+  decision: SkillLifecycleDecision,
+  options: SkillLifecycleOptions = {},
+): Promise<{
+  activePath?: string
+  archivedPath?: string
+  deletedPath?: string
+  manifestPath?: string
+  tombstonePath?: string
+}> {
+  switch (decision.type) {
+    case 'create': {
+      return { activePath: await writeLearnedSkill(decision.draft) }
+    }
+    case 'merge': {
+      if (!isSkillLearningGenerated(decision.targetSkill)) {
+        process.stderr.write(
+          `[skill-learning] skip user-authored skill: ${decision.targetSkill.path}\n`,
+        )
+        return {}
+      }
+      return {
+        activePath: await writeMergePatch(decision.targetSkill, decision.patch),
+      }
+    }
+    case 'replace': {
+      if (!isSkillLearningGenerated(decision.targetSkill)) {
+        process.stderr.write(
+          `[skill-learning] skip user-authored skill: ${decision.targetSkill.path}\n`,
+        )
+        return {}
+      }
+      // Archive/delete the superseded skill before the replacement is
+      // written so that any search-index refresh between the two steps can
+      // never observe both skills active simultaneously. `decision.draft
+      // .outputPath` is the exact path `writeLearnedSkill` will target.
+      const predictedNewPath = decision.draft.outputPath
+      if (decision.hardDelete) {
+        const { deletedPath, manifestPath, tombstonePath } = await deleteSkill(
+          decision.targetSkill,
+          decision.reason,
+          {
+            newSkill: decision.draft.name,
+            newPath: predictedNewPath,
+          },
+          { ...options, allowHardDelete: true },
+        )
+        const activePath = await writeLearnedSkill(decision.draft)
+        return { activePath, deletedPath, manifestPath, tombstonePath }
+      }
+      const { archivedPath, manifestPath } = await archiveSkill(
+        decision.targetSkill,
+        decision.reason,
+        {
+          newSkill: decision.draft.name,
+          newPath: predictedNewPath,
+        },
+        options,
+      )
+      const activePath = await writeLearnedSkill(decision.draft)
+      return { activePath, archivedPath, manifestPath }
+    }
+    case 'archive':
+      return await archiveSkill(
+        decision.targetSkill,
+        decision.reason,
+        undefined,
+        options,
+      )
+    case 'delete':
+      return await deleteSkill(
+        decision.targetSkill,
+        decision.reason,
+        undefined,
+        {
+          ...options,
+          allowHardDelete:
+            options.allowHardDelete && decision.confirmed !== false,
+        },
+      )
+  }
+}
+
+export async function loadExistingSkills(
+  roots: string[],
+): Promise<ExistingSkill[]> {
+  const skills: ExistingSkill[] = []
+  for (const root of roots) {
+    if (!existsSync(root)) continue
+    await collectSkillFiles(root, skills)
+  }
+  return skills
+}
+
+export async function archiveSkill(
+  skill: ExistingSkill,
+  reason: string,
+  replacement?: { newSkill?: string; newPath?: string },
+  options: SkillLifecycleOptions = {},
+): Promise<{ archivedPath: string; manifestPath: string }> {
+  const skillDir = dirname(skill.path)
+  const archiveRoot = options.archiveRoot ?? join(dirname(skillDir), '.archive')
+  const archivedPath = join(
+    archiveRoot,
+    `${basename(skillDir)}-${timestamp(options.now)}`,
+  )
+  await mkdir(archiveRoot, { recursive: true })
+  await rename(skillDir, archivedPath)
+  const manifestPath = await writeReplacementManifest(
+    options.manifestRoot ?? archivedPath,
+    {
+      oldSkill: skill.name,
+      oldPath: skill.path,
+      newSkill: replacement?.newSkill,
+      newPath: replacement?.newPath,
+      action: 'archive',
+      reason,
+      replacedAt: (options.now ?? new Date()).toISOString(),
+      recoverable: true,
+    },
+  )
+  clearSkillIndexCache()
+  return { archivedPath, manifestPath }
+}
+
+export async function deleteSkill(
+  skill: ExistingSkill,
+  reason: string,
+  replacement?: { newSkill?: string; newPath?: string },
+  options: SkillLifecycleOptions = {},
+): Promise<{
+  deletedPath: string
+  manifestPath: string
+  tombstonePath: string
+}> {
+  if (!options.allowHardDelete) {
+    throw new Error('Hard delete requires allowHardDelete=true')
+  }
+
+  const skillDir = dirname(skill.path)
+  const content = existsSync(skill.path)
+    ? await readFile(skill.path, 'utf8')
+    : ''
+  const manifestRoot =
+    options.manifestRoot ?? join(dirname(skillDir), '.tombstones')
+  const manifestPath = await writeReplacementManifest(manifestRoot, {
+    oldSkill: skill.name,
+    oldPath: skill.path,
+    newSkill: replacement?.newSkill,
+    newPath: replacement?.newPath,
+    action: 'delete',
+    reason,
+    replacedAt: (options.now ?? new Date()).toISOString(),
+    recoverable: false,
+  })
+  const tombstonePath = join(
+    manifestRoot,
+    `${skill.name}-${timestamp(options.now)}.tombstone.json`,
+  )
+  await writeFile(
+    tombstonePath,
+    `${JSON.stringify({ deletedSkill: skill.name, oldPath: skill.path, content }, null, 2)}\n`,
+    'utf8',
+  )
+  await rm(skillDir, { recursive: true, force: true })
+  clearSkillIndexCache()
+  return { deletedPath: skill.path, manifestPath, tombstonePath }
+}
+
+export async function writeReplacementManifest(
+  directory: string,
+  manifest: ReplacementManifest,
+): Promise<string> {
+  await mkdir(directory, { recursive: true })
+  const manifestPath = join(directory, 'replacement-manifest.json')
+  await writeFile(
+    manifestPath,
+    `${JSON.stringify(manifest, null, 2)}\n`,
+    'utf8',
+  )
+  return manifestPath
+}
+
+async function writeMergePatch(
+  skill: ExistingSkill,
+  patch: string,
+): Promise<string> {
+  const patchPath = join(dirname(skill.path), 'learned-skill.patch.md')
+  await writeFile(patchPath, patch, 'utf8')
+  clearSkillIndexCache()
+  return patchPath
+}
+
+function buildMergePatch(draft: LearnedSkillDraft): string {
+  return [
+    '# Learned Skill Merge Patch',
+    '',
+    `Target learned skill: ${draft.name}`,
+    `Confidence: ${draft.confidence}`,
+    '',
+    '## Suggested additions',
+    '',
+    draft.content,
+  ].join('\n')
+}
+
+function shouldReplaceSkill(
+  draft: LearnedSkillDraft,
+  target: ExistingSkill,
+): boolean {
+  if (target.status === 'superseded' || target.status === 'archived')
+    return true
+  const confidenceGap = draft.confidence - (target.confidence ?? 0.5)
+  const contentGap = draft.content.length - target.content.length
+  return confidenceGap >= 0.15 || contentGap > 160
+}
+
+function isSafeToHardDelete(skill: ExistingSkill): boolean {
+  return (
+    skill.safeToDelete === true &&
+    (skill.referencedBy?.length ?? 0) === 0 &&
+    skill.quality === 'low'
+  )
+}
+
+function timestamp(date = new Date()): string {
+  return date.toISOString().replace(/[:.]/g, '-')
+}
+
+async function collectSkillFiles(
+  root: string,
+  results: ExistingSkill[],
+): Promise<void> {
+  const entries = await readdir(root, { withFileTypes: true })
+  for (const entry of entries) {
+    const full = join(root, entry.name)
+    if (entry.isDirectory()) {
+      if (entry.name === '.archive') continue
+      await collectSkillFiles(full, results)
+      continue
+    }
+    if (entry.isFile() && entry.name === 'SKILL.md') {
+      const content = await readFile(full, 'utf8')
+      results.push({
+        name: parseFrontmatter(content, 'name') ?? basename(dirname(full)),
+        description: parseFrontmatter(content, 'description') ?? '',
+        path: full,
+        content,
+      })
+    }
+  }
+}
+
+async function collectArtifactFiles(
+  root: string,
+  results: ExistingSkill[],
+): Promise<void> {
+  const entries = await readdir(root, { withFileTypes: true })
+  for (const entry of entries) {
+    const full = join(root, entry.name)
+    if (entry.isDirectory()) {
+      if (entry.name === '.archive') continue
+      await collectArtifactFiles(full, results)
+      continue
+    }
+    if (entry.isFile() && entry.name.endsWith('.md')) {
+      const content = await readFile(full, 'utf8')
+      results.push({
+        name:
+          parseFrontmatter(content, 'name') ?? entry.name.replace(/\.md$/, ''),
+        description: parseFrontmatter(content, 'description') ?? '',
+        path: full,
+        content,
+      })
+    }
+  }
+}
+
+function parseFrontmatter(content: string, key: string): string | undefined {
+  // Restrict the search to the actual YAML frontmatter block between the
+  // opening `---` and the next `---`. A naked body line like
+  // `origin: skill-learning` in a user-authored doc must NOT be mistaken
+  // for a generated-skill marker.
+  const fmMatch = content.match(/^---\r?\n([\s\S]*?)\r?\n---/)
+  if (!fmMatch) return undefined
+  const match = fmMatch[1].match(new RegExp(`^${key}:\\s*"?([^"\\n]+)"?`, 'm'))
+  return match?.[1]?.trim()
+}
+
+function isSkillLearningGenerated(skill: ExistingSkill): boolean {
+  return parseFrontmatter(skill.content, 'origin') === 'skill-learning'
+}
+
+function terms(value: string): Set<string> {
+  return new Set(
+    value
+      .toLowerCase()
+      .split(/[^a-z0-9]+/)
+      .filter(term => term.length > 2),
+  )
+}
+
+function overlapScore(a: Set<string>, b: Set<string>): number {
+  if (a.size === 0 || b.size === 0) return 0
+  let intersection = 0
+  for (const term of a) {
+    if (b.has(term)) intersection++
+  }
+  return intersection / Math.min(a.size, b.size)
+}
+
+export function scoreArtifactOverlap(
+  draft: ArtifactDraft,
+  existing: { name: string; description: string; content: string },
+): number {
+  const draftTerms = terms(
+    `${draft.name} ${draft.description} ${draft.content}`,
+  )
+  const existingTerms = terms(
+    `${existing.name} ${existing.description} ${existing.content}`,
+  )
+  return overlapScore(draftTerms, existingTerms)
+}
--- a/src/services/skillLearning/toolEventObserver.ts
+++ b/src/services/skillLearning/toolEventObserver.ts
@@ -0,0 +1,312 @@
+import { randomUUID } from 'node:crypto'
+import {
+  appendObservation,
+  type StoredSkillObservation,
+} from './observationStore.js'
+import type {
+  SkillLearningProjectContext,
+  SkillObservationOutcome,
+} from './types.js'
+import { logForDebugging } from '../../utils/debug.js'
+import { logError } from '../../utils/log.js'
+
+/**
+ * Tool event hook layer.
+ *
+ * Preferred observation pathway: consumers (tool dispatcher, REPL turn loop,
+ * or integration tests) call `recordToolStart` / `recordToolComplete` /
+ * `recordToolError` / `recordUserCorrection` as tool-level events happen,
+ * producing deterministic observations with `source: 'tool-hook'`.
+ *
+ * Post-sampling reconstruction (runtimeObserver.observationsFromMessages)
+ * is retained as a fallback for environments where the caller cannot emit
+ * tool events directly.
+ *
+ * @todo Wire these functions into `src/Tool.ts`'s public dispatch so the
+ *       main REPL tool loop produces tool-hook observations automatically.
+ *       Until then, callers that do have tool-level signal (integration
+ *       tests, custom harness code, future tool middleware) can use the
+ *       functions here directly.
+ */
+
+export type ToolHookContext = {
+  sessionId: string
+  turn: number
+  projectId: string
+  projectName: string
+  cwd: string
+  project?: SkillLearningProjectContext
+}
+
+/** Maximum number of turns tracked per session before pruning. */
+const EMITTED_TURNS_SET_MAX = 500
+/** How many turns to retain after pruning a session Set. */
+const EMITTED_TURNS_SET_KEEP = 250
+/** Maximum number of sessions tracked in the Map before pruning. */
+const EMITTED_TURNS_MAP_MAX = 50
+/** How many sessions to retain after pruning the Map. */
+const EMITTED_TURNS_MAP_KEEP = 25
+
+const emittedTurns = new Map<string, Set<number>>()
+
+/**
+ * Prune `emittedTurns` to stay within memory bounds.
+ *
+ * - If any session's Set exceeds `EMITTED_TURNS_SET_MAX` entries, retain only
+ *   the most recent `EMITTED_TURNS_SET_KEEP` turn numbers (FIFO trim).
+ * - If the Map itself exceeds `EMITTED_TURNS_MAP_MAX` entries, delete the
+ *   oldest `EMITTED_TURNS_MAP_MAX - EMITTED_TURNS_MAP_KEEP` sessions
+ *   (insertion-order LRU).
+ *
+ * Exported so tests and `resetToolHookBookkeeping` callers can invoke it
+ * directly.
+ */
+export function pruneEmittedTurns(): void {
+  // Prune over-sized Sets first. FIFO by insertion order — NOT by turn
+  // number magnitude. Non-monotonic turn ordering (e.g. replayed transcripts
+  // or nested tool chains) should not cause us to evict the wrong entries.
+  for (const [sessionId, turns] of emittedTurns) {
+    if (turns.size > EMITTED_TURNS_SET_MAX) {
+      const iter = turns.values()
+      const toDrop = turns.size - EMITTED_TURNS_SET_KEEP
+      for (let i = 0; i < toDrop; i++) {
+        const next = iter.next()
+        if (next.done) break
+        turns.delete(next.value)
+      }
+    }
+  }
+  // Prune over-sized Map (delete oldest insertion-order entries).
+  if (emittedTurns.size > EMITTED_TURNS_MAP_MAX) {
+    const toDelete = emittedTurns.size - EMITTED_TURNS_MAP_KEEP
+    let deleted = 0
+    for (const key of emittedTurns.keys()) {
+      if (deleted >= toDelete) break
+      emittedTurns.delete(key)
+      deleted++
+    }
+  }
+}
+
+function markTurn(sessionId: string, turn: number): void {
+  // Refresh Map insertion order: delete + re-set so a recently-touched
+  // session is treated as "youngest" for the LRU-ish Map eviction.
+  const seen = emittedTurns.get(sessionId) ?? new Set<number>()
+  seen.add(turn)
+  emittedTurns.delete(sessionId)
+  emittedTurns.set(sessionId, seen)
+  pruneEmittedTurns()
+}
+
+export function hasToolHookObservationsForTurn(
+  sessionId: string,
+  turn: number,
+): boolean {
+  return emittedTurns.get(sessionId)?.has(turn) ?? false
+}
+
+export function resetToolHookBookkeeping(): void {
+  emittedTurns.clear()
+}
+
+function baseObservation(
+  ctx: ToolHookContext,
+): Pick<
+  StoredSkillObservation,
+  | 'id'
+  | 'sessionId'
+  | 'projectId'
+  | 'projectName'
+  | 'cwd'
+  | 'timestamp'
+  | 'source'
+  | 'turn'
+> {
+  return {
+    id: randomUUID(),
+    sessionId: ctx.sessionId,
+    projectId: ctx.projectId,
+    projectName: ctx.projectName,
+    cwd: ctx.cwd,
+    timestamp: new Date().toISOString(),
+    source: 'tool-hook',
+    // Persist turn so runtimeObserver can filter tool-hook observations by
+    // the current turn rather than sweeping all historical tool-hook data
+    // (codex review Q1).
+    turn: ctx.turn,
+  }
+}
+
+// Cached import promise — resolved once so the hot path pays no repeated
+// dynamic-import overhead after the first invocation.
+let _depImportCache:
+  | Promise<{
+      resolveProjectContext: (cwd: string) => SkillLearningProjectContext
+      isSkillLearningEnabled: () => boolean
+      RUNTIME_SESSION_ID: string
+      getRuntimeTurn: () => number
+    }>
+  | undefined
+
+function _getDeps() {
+  if (!_depImportCache) {
+    _depImportCache = Promise.all([
+      import('./projectContext.js'),
+      import('./featureCheck.js'),
+      import('./runtimeObserver.js'),
+    ]).then(([pc, fc, ro]) => ({
+      resolveProjectContext: pc.resolveProjectContext,
+      isSkillLearningEnabled: fc.isSkillLearningEnabled,
+      RUNTIME_SESSION_ID: ro.RUNTIME_SESSION_ID,
+      getRuntimeTurn: ro.getRuntimeTurn,
+    }))
+  }
+  return _depImportCache
+}
+
+/** Reset the cached dep import (for test isolation). */
+export function resetToolHookDepsCache(): void {
+  _depImportCache = undefined
+}
+
+/**
+ * Wrap a tool.call invocation with deterministic tool-event observation.
+ *
+ * Designed for the single call site in `toolExecution.ts`. The hook calls
+ * (`recordToolStart`, `recordToolComplete`, `recordToolError`) are true
+ * fire-and-forget: the tool invoke result is returned immediately without
+ * waiting for the observation to persist. Errors in observation are caught
+ * and logged so they never surface to the caller.
+ */
+export async function runToolCallWithSkillLearningHooks<T>(
+  toolName: string,
+  input: unknown,
+  callContext: { sessionId?: string; turn?: number },
+  invoke: () => Promise<T>,
+): Promise<T> {
+  let ctx: ToolHookContext | undefined
+  try {
+    const {
+      resolveProjectContext,
+      isSkillLearningEnabled,
+      RUNTIME_SESSION_ID,
+      getRuntimeTurn,
+    } = await _getDeps()
+    if (!isSkillLearningEnabled()) {
+      return invoke()
+    }
+    const project = resolveProjectContext(process.cwd())
+    // Always emit under the runtime observer's sessionId so the post-sampling
+    // consumer can find our records. The prior default `'cli'` fell outside
+    // the observer's sessionId filter and made tool-hook observations
+    // structurally unconsumable (codex second-pass audit AC1).
+    ctx = {
+      sessionId: callContext.sessionId ?? RUNTIME_SESSION_ID,
+      turn: callContext.turn ?? getRuntimeTurn(),
+      projectId: project.projectId,
+      projectName: project.projectName,
+      cwd: project.cwd,
+      project,
+    }
+    // Fire-and-forget: do NOT await — tool invoke must not be blocked.
+    void recordToolStart(ctx, toolName, input).catch(e => {
+      logForDebugging('skill-learning: recordToolStart error')
+      logError(e)
+    })
+  } catch (e) {
+    // Never let observation setup errors affect tool execution.
+    logForDebugging('skill-learning: hook setup error')
+    logError(e)
+  }
+  try {
+    const result = await invoke()
+    if (ctx) {
+      // Fire-and-forget: do NOT await.
+      void recordToolComplete(ctx, toolName, result, 'success').catch(e => {
+        logForDebugging('skill-learning: recordToolComplete error')
+        logError(e)
+      })
+    }
+    return result
+  } catch (error) {
+    if (ctx) {
+      // Fire-and-forget: do NOT await.
+      void recordToolError(ctx, toolName, error).catch(e => {
+        logForDebugging('skill-learning: recordToolError error')
+        logError(e)
+      })
+    }
+    throw error
+  }
+}
+
+export async function recordToolStart(
+  ctx: ToolHookContext,
+  toolName: string,
+  input?: unknown,
+): Promise<StoredSkillObservation> {
+  markTurn(ctx.sessionId, ctx.turn)
+  const observation: StoredSkillObservation = {
+    ...baseObservation(ctx),
+    event: 'tool_start',
+    toolName,
+    toolInput: stringify(input),
+  }
+  return appendObservation(observation, { project: ctx.project })
+}
+
+export async function recordToolComplete(
+  ctx: ToolHookContext,
+  toolName: string,
+  output?: unknown,
+  outcome: SkillObservationOutcome = 'success',
+): Promise<StoredSkillObservation> {
+  markTurn(ctx.sessionId, ctx.turn)
+  const observation: StoredSkillObservation = {
+    ...baseObservation(ctx),
+    event: 'tool_complete',
+    toolName,
+    toolOutput: stringify(output),
+    outcome,
+  }
+  return appendObservation(observation, { project: ctx.project })
+}
+
+export async function recordToolError(
+  ctx: ToolHookContext,
+  toolName: string,
+  error: unknown,
+): Promise<StoredSkillObservation> {
+  markTurn(ctx.sessionId, ctx.turn)
+  const observation: StoredSkillObservation = {
+    ...baseObservation(ctx),
+    event: 'tool_complete',
+    toolName,
+    toolOutput: stringify(error),
+    outcome: 'failure',
+  }
+  return appendObservation(observation, { project: ctx.project })
+}
+
+export async function recordUserCorrection(
+  ctx: ToolHookContext,
+  messageText: string,
+): Promise<StoredSkillObservation> {
+  markTurn(ctx.sessionId, ctx.turn)
+  const observation: StoredSkillObservation = {
+    ...baseObservation(ctx),
+    event: 'user_message',
+    messageText,
+  }
+  return appendObservation(observation, { project: ctx.project })
+}
+
+function stringify(value: unknown): string | undefined {
+  if (value === undefined || value === null) return undefined
+  if (typeof value === 'string') return value
+  try {
+    return JSON.stringify(value)
+  } catch {
+    return String(value)
+  }
+}
--- a/src/services/skillLearning/types.ts
+++ b/src/services/skillLearning/types.ts
@@ -0,0 +1,109 @@
+export type SkillLearningScope = 'project' | 'global'
+
+export type SkillGapStatus = 'pending' | 'draft' | 'active' | 'rejected'
+
+export type SkillObservationEvent =
+  | 'user_message'
+  | 'assistant_message'
+  | 'tool_start'
+  | 'tool_complete'
+  | 'tool_error'
+
+export type SkillObservationOutcome = 'success' | 'failure' | 'unknown'
+
+export const INSTINCT_DOMAINS = [
+  'workflow',
+  'testing',
+  'debugging',
+  'code-style',
+  'security',
+  'git',
+  'project',
+] as const
+
+export type InstinctDomain = (typeof INSTINCT_DOMAINS)[number]
+
+export type InstinctSource =
+  | 'session-observation'
+  | 'repo-analysis'
+  | 'imported'
+
+export type InstinctStatus =
+  | 'pending'
+  | 'active'
+  | 'stale'
+  | 'superseded'
+  | 'retired'
+  | 'archived'
+  | 'conflict-hold'
+
+export type ProjectContextSource =
+  | 'claude_project_dir'
+  | 'git_remote'
+  | 'git_root'
+  | 'global'
+
+export interface SkillObservation {
+  id: string
+  timestamp: string
+  event: SkillObservationEvent
+  sessionId: string
+  projectId: string
+  projectName: string
+  cwd: string
+  toolName?: string
+  toolInput?: unknown
+  toolOutput?: unknown
+  messageText?: string
+  outcome?: SkillObservationOutcome
+}
+
+export interface Instinct {
+  id: string
+  trigger: string
+  action: string
+  confidence: number
+  domain: InstinctDomain
+  source: InstinctSource
+  scope: SkillLearningScope
+  projectId?: string
+  projectName?: string
+  evidence: string[]
+  evidenceOutcome?: SkillObservationOutcome
+  createdAt: string
+  updatedAt: string
+  status: InstinctStatus
+}
+
+export interface LearnedSkillDraft {
+  name: string
+  description: string
+  scope: SkillLearningScope
+  sourceInstinctIds: string[]
+  confidence: number
+  content: string
+  outputPath: string
+}
+
+export interface SkillLearningProjectContext {
+  projectId: string
+  projectName: string
+  scope: SkillLearningScope
+  source: ProjectContextSource
+  cwd: string
+  projectRoot?: string
+  gitRemote?: string
+  storageDir: string
+}
+
+export interface SkillLearningProjectRecord
+  extends SkillLearningProjectContext {
+  firstSeenAt: string
+  lastSeenAt: string
+}
+
+export interface SkillLearningProjectsRegistry {
+  version: 1
+  updatedAt: string
+  projects: Record<string, SkillLearningProjectRecord>
+}