feat: 整合功能恢复与技能学习闭环(含 ECC v2.1 parity + Opus 4.7 接入 + prompt 工程优化)

主要变更: - Skill Learning 闭环系统 (9/9 AC) - Opus 4.7 模型层接入 + adaptive thinking - Prompt 工程优化 (64 审计测试) - Agent Teams 简化门控 (默认启用) - Windows Terminal 后端修复 (EncodedCommand/WT_SESSION) - TF-IDF 技能搜索精准化 (字段加权/CJK 优化) - Autonomy 系统 (/autonomy 命令) - ACP 协议完整实现 - mock.module 泄漏修复 (CI 全绿) - 152+ lint/type 修复
2026-06-17 13:55:50 +00:00 · 2026-04-22 16:07:42 +08:00
parent 711927f01b
commit 95fece4b51
316 changed files with 39611 additions and 14298 deletions
--- a/scripts/verify-skill-learning-e2e.ts
+++ b/scripts/verify-skill-learning-e2e.ts
@@ -0,0 +1,406 @@
+/**
+ * End-to-end verification probe for the skill-learning pipeline.
+ *
+ * Exercises the real public API (not mocks, not unit test harness) so we
+ * can confirm each pipeline stage actually produces the expected on-disk
+ * artefacts under a clean CLAUDE_SKILL_LEARNING_HOME.
+ *
+ * Run with:
+ *   bun run scripts/verify-skill-learning-e2e.ts
+ *
+ * Sections:
+ *   1. Fake transcript -> ingest -> observations on disk
+ *   2. Heuristic observer -> instinct candidates -> persisted instincts
+ *   3. Evolution -> skill / command / agent candidates
+ *   4. Write learned skill -> verify skill file exists
+ *   5. Cross-project promotion -> global instinct written
+ *   6. Observer backend env switch probe
+ *   7. Gap state machine walk-through
+ *   8. Tool event observer wrapper invocation
+ */
+
+import { mkdtempSync, writeFileSync, existsSync, rmSync, readdirSync } from 'node:fs'
+import { readFile } from 'node:fs/promises'
+import { tmpdir } from 'node:os'
+import { join } from 'node:path'
+import { execSync } from 'node:child_process'
+
+type Result = { step: string; ok: boolean; detail: string }
+const results: Result[] = []
+
+function record(step: string, ok: boolean, detail: string): void {
+  results.push({ step, ok, detail })
+  const tag = ok ? 'PASS' : 'FAIL'
+  console.log(`[${tag}] ${step} — ${detail}`)
+}
+
+async function main(): Promise<void> {
+  const storage = mkdtempSync(join(tmpdir(), 'skill-learning-e2e-'))
+  const projectA = mkdtempSync(join(tmpdir(), 'project-a-'))
+  const projectB = mkdtempSync(join(tmpdir(), 'project-b-'))
+  // Real git repos so resolveProjectContext derives distinct project IDs
+  // (the default `global` fallback for non-git dirs would make A and B
+  // share the same storage and defeat the cross-project probe).
+  execSync(`git init -q "${projectA}"`, { stdio: 'ignore' })
+  execSync(
+    `git -C "${projectA}" remote add origin https://example.test/project-a.git`,
+    { stdio: 'ignore' },
+  )
+  execSync(`git init -q "${projectB}"`, { stdio: 'ignore' })
+  execSync(
+    `git -C "${projectB}" remote add origin https://example.test/project-b.git`,
+    { stdio: 'ignore' },
+  )
+
+  // === ECC / plugin isolation ===
+  // The probe must exercise only the project's own skill-learning code, not
+  // the user-level ECC plugin, auto-loaded ECC skill, or any external LLM.
+  // Strip every env that could route observations or observer calls outside
+  // this probe's temp storage.
+  for (const key of [
+    'ANTHROPIC_API_KEY',
+    'ANTHROPIC_AUTH_TOKEN',
+    'OPENAI_API_KEY',
+    'GEMINI_API_KEY',
+    'GROK_API_KEY',
+    'CLAUDE_CODE_PLUGINS_DIR',
+    'CLAUDE_PLUGINS_DIR',
+    'CLAUDE_PLUGIN_MARKETPLACE',
+    'ECC_PLUGIN_ROOT',
+    'ECC_ENABLED',
+  ]) {
+    delete process.env[key]
+  }
+  process.env.CLAUDE_SKILL_LEARNING_HOME = storage
+  process.env.SKILL_LEARNING_ENABLED = '1'
+  process.env.SKILL_SEARCH_ENABLED = '1'
+  // Force heuristic backend — no LLM round-trips allowed in clean-room probe.
+  process.env.SKILL_LEARNING_OBSERVER_BACKEND = 'heuristic'
+  process.env.CLAUDE_SKILL_LEARNING_DISABLE = ''
+  // Instrument global fetch so any stray network call from the skill-learning
+  // path (unexpected LLM fallback, plugin webhook, etc.) aborts the probe
+  // with a visible error rather than hiding behind a try/catch.
+  const originalFetch = globalThis.fetch
+  let networkCalls = 0
+  globalThis.fetch = ((...args: unknown[]) => {
+    networkCalls += 1
+    throw new Error(
+      `clean-room probe must not make network calls, attempted: ${String(args[0])}`,
+    )
+  }) as typeof globalThis.fetch
+  console.log(`storage=${storage}`)
+  console.log(`ecc-isolation: API_KEY env vars cleared, fetch stubbed, observer=heuristic`)
+
+  try {
+    const skillLearning = await import('../src/services/skillLearning/index.js')
+    const projectCtx = await import('../src/services/skillLearning/projectContext.js')
+
+    // ----------------------------------------------------------------------
+    // 1. Ingest a synthetic transcript and verify observations land on disk
+    // ----------------------------------------------------------------------
+    const transcriptPath = join(storage, 'session.jsonl')
+    const transcriptLines = [
+      { type: 'user', sessionId: 's-e2e', cwd: projectA, message: { role: 'user', content: '请重构 loader.ts 的错误处理' } },
+      { type: 'assistant', sessionId: 's-e2e', cwd: projectA, message: { role: 'assistant', content: [{ type: 'tool_use', name: 'Grep', input: { pattern: 'throw new Error', path: 'src' } }] } },
+      { type: 'user', sessionId: 's-e2e', cwd: projectA, message: { role: 'user', content: [{ type: 'tool_result', name: 'Grep', content: 'src/loader.ts:42', is_error: false }] } },
+      { type: 'assistant', sessionId: 's-e2e', cwd: projectA, message: { role: 'assistant', content: [{ type: 'tool_use', name: 'Read', input: { file_path: 'src/loader.ts' } }] } },
+      { type: 'user', sessionId: 's-e2e', cwd: projectA, message: { role: 'user', content: [{ type: 'tool_result', name: 'Read', content: 'export function load() { ... }', is_error: false }] } },
+      { type: 'assistant', sessionId: 's-e2e', cwd: projectA, message: { role: 'assistant', content: [{ type: 'tool_use', name: 'Edit', input: { file_path: 'src/loader.ts', old_string: 'throw new Error', new_string: 'throw new LoaderError' } }] } },
+      { type: 'user', sessionId: 's-e2e', cwd: projectA, message: { role: 'user', content: [{ type: 'tool_result', name: 'Edit', content: 'diff', is_error: false }] } },
+      { type: 'user', sessionId: 's-e2e', cwd: projectA, message: { role: 'user', content: '不要直接 mock，用 testing-library' } },
+      { type: 'user', sessionId: 's-e2e', cwd: projectA, message: { role: 'user', content: '必须用 testing-library 不要 mock' } },
+    ]
+    writeFileSync(transcriptPath, transcriptLines.map(JSON.stringify).join('\n'))
+
+    const projectAContext = projectCtx.resolveProjectContext(projectA)
+    const observations = await skillLearning.ingestTranscript(transcriptPath, { project: projectAContext })
+    record(
+      'ingest transcript',
+      observations.length > 0,
+      `${observations.length} observations written under project ${projectAContext.projectId}`,
+    )
+
+    const reread = await skillLearning.readObservations({ project: projectAContext })
+    record(
+      'observations persist on disk',
+      reread.length === observations.length,
+      `disk has ${reread.length} observations (expected ${observations.length})`,
+    )
+
+    // ----------------------------------------------------------------------
+    // 2. Heuristic observer -> instinct candidates -> store
+    // ----------------------------------------------------------------------
+    skillLearning.setActiveObserverBackend('heuristic')
+    const candidates = await skillLearning.analyzeWithActiveBackend(observations, { project: projectAContext })
+    record(
+      'heuristic backend produces candidates',
+      candidates.length > 0,
+      `${candidates.length} candidates; first trigger=${candidates[0]?.trigger ?? '?'}`,
+    )
+
+    for (const c of candidates) {
+      await skillLearning.upsertInstinct(skillLearning.createInstinct(c), { project: projectAContext })
+    }
+    const persistedInstincts = await skillLearning.loadInstincts({ project: projectAContext })
+    record(
+      'instincts persisted',
+      persistedInstincts.length > 0,
+      `${persistedInstincts.length} instincts on disk for project A`,
+    )
+
+    // Contradiction probe — push a contradicting instinct to verify conflict-hold
+    const first = persistedInstincts[0]
+    if (first) {
+      const contradictor = skillLearning.createInstinct({
+        trigger: first.trigger,
+        action: first.action.includes('avoid')
+          ? first.action.replace('avoid', 'prefer')
+          : first.action.replace(/^/, 'avoid '),
+        confidence: 0.5,
+        domain: first.domain,
+        source: 'session-observation',
+        scope: first.scope,
+        projectId: projectAContext.projectId,
+        projectName: projectAContext.projectName,
+        evidence: ['contradiction probe'],
+        observationIds: [],
+      })
+      await skillLearning.upsertInstinct(contradictor, { project: projectAContext })
+      const after = await skillLearning.loadInstincts({ project: projectAContext })
+      const merged = after.find(i => i.id === first.id) ?? after.find(i => i.trigger === first.trigger)
+      record(
+        'contradiction lowers confidence',
+        !!merged && merged.confidence < first.confidence,
+        `before=${first.confidence.toFixed(2)} after=${merged?.confidence.toFixed(2) ?? 'n/a'}`,
+      )
+    }
+
+    // ----------------------------------------------------------------------
+    // 3. Evolution candidates
+    //
+    // clusterInstincts requires EITHER 2+ instincts in the same
+    // (domain, normalized-trigger) bucket OR a single instinct with
+    // confidence >= 0.8. Inject a high-confidence skill instinct + a
+    // 4-instinct agent cluster + a "command"-flavoured instinct so each
+    // of the three evolution paths actually has candidates to emit.
+    // ----------------------------------------------------------------------
+    const highConfidenceSkill = skillLearning.createInstinct({
+      trigger: 'When editing TypeScript error handling',
+      action: 'prefer throwing domain-specific Error subclasses',
+      confidence: 0.9,
+      domain: 'code-style',
+      source: 'session-observation',
+      scope: 'project',
+      projectId: projectAContext.projectId,
+      projectName: projectAContext.projectName,
+      evidence: ['observed 2x in session'],
+      observationIds: [],
+    })
+    await skillLearning.upsertInstinct(highConfidenceSkill, { project: projectAContext })
+
+    const commandSeed = skillLearning.createInstinct({
+      trigger: 'User asks to run the full test suite',
+      action: 'run bun test after every multi-file edit',
+      confidence: 0.9,
+      domain: 'workflow',
+      source: 'session-observation',
+      scope: 'project',
+      projectId: projectAContext.projectId,
+      projectName: projectAContext.projectName,
+      evidence: ['user explicitly requested bun test'],
+      observationIds: [],
+    })
+    await skillLearning.upsertInstinct(commandSeed, { project: projectAContext })
+
+    for (let i = 0; i < 4; i += 1) {
+      const agentSeed = skillLearning.createInstinct({
+        trigger: 'When debugging multi-step investigate flow',
+        action: `step ${i + 1}: research root cause and verify`,
+        confidence: 0.85,
+        domain: 'debugging',
+        source: 'session-observation',
+        scope: 'project',
+        projectId: projectAContext.projectId,
+        projectName: projectAContext.projectName,
+        evidence: [`debug step ${i + 1}`],
+        observationIds: [],
+      })
+      await skillLearning.upsertInstinct(agentSeed, { project: projectAContext })
+    }
+
+    const allInstincts = await skillLearning.loadInstincts({ project: projectAContext })
+    const skillCandidates = skillLearning.generateSkillCandidates(allInstincts, { cwd: projectA })
+    const commandCandidates = skillLearning.generateCommandCandidates(allInstincts, { cwd: projectA })
+    const agentCandidates = skillLearning.generateAgentCandidates(allInstincts, { cwd: projectA })
+    record(
+      'evolution skill path emits candidate (single high-conf instinct)',
+      skillCandidates.length >= 1,
+      `skillCandidates=${skillCandidates.length}`,
+    )
+    record(
+      'evolution command path emits candidate (trigger matches user-asks heuristic)',
+      commandCandidates.length >= 1,
+      `commandCandidates=${commandCandidates.length}`,
+    )
+    record(
+      'evolution agent path emits candidate (4+ debugging instincts)',
+      agentCandidates.length >= 1,
+      `agentCandidates=${agentCandidates.length}`,
+    )
+
+    // ----------------------------------------------------------------------
+    // 4. Write learned skill + verify file on disk
+    // ----------------------------------------------------------------------
+    const firstDraft = skillCandidates[0]
+    if (firstDraft) {
+      const activePath = await skillLearning.writeLearnedSkill(firstDraft)
+      // writeLearnedSkill returns the full SKILL.md path (not the directory).
+      const exists = existsSync(activePath)
+      record(
+        'writeLearnedSkill produces SKILL.md',
+        exists,
+        `path=${activePath} exists=${exists}`,
+      )
+    } else {
+      record('writeLearnedSkill produces SKILL.md', false, 'no skill candidate to write')
+    }
+
+    // ----------------------------------------------------------------------
+    // 5. Cross-project promotion
+    // ----------------------------------------------------------------------
+    const projectBContext = projectCtx.resolveProjectContext(projectB)
+    // Duplicate one high-confidence instinct into project B so promotion threshold
+    // (>= 2 projects, avg conf >= 0.8) is met. We seeded a 0.9-confidence skill
+    // instinct above, so this lookup succeeds deterministically.
+    const pickable = allInstincts.find(i => i.confidence >= 0.8)
+    if (pickable) {
+      const projectBCopy = { ...pickable, projectId: projectBContext.projectId, projectName: projectBContext.projectName }
+      await skillLearning.saveInstinct(projectBCopy, { project: projectBContext, scope: 'project' })
+      // findPromotionCandidates groups by instinct id + distinct projectId
+      // count; give it the real merged array seen across both project stores.
+      const fromA = await skillLearning.loadInstincts({ project: projectAContext })
+      const fromB = await skillLearning.loadInstincts({ project: projectBContext })
+      const candidatesPre = skillLearning.findPromotionCandidates([
+        ...fromA,
+        ...fromB,
+      ])
+      record(
+        'cross-project candidate visible',
+        candidatesPre.length > 0,
+        `${candidatesPre.length} promotable instincts across projects (A=${fromA.length} B=${fromB.length})`,
+      )
+
+      await skillLearning.checkPromotion({ project: projectAContext })
+      const globalRoot = { scope: 'global' as const, rootDir: storage }
+      const globalInstincts = await skillLearning.loadInstincts(globalRoot)
+      record(
+        'checkPromotion writes global instinct',
+        globalInstincts.some(i => i.id === pickable.id),
+        `global scope has ${globalInstincts.length} instincts; target id ${pickable.id} present=${globalInstincts.some(i => i.id === pickable.id)}`,
+      )
+    } else {
+      record('cross-project promotion', false, 'no instinct with confidence >= 0.8 to promote')
+    }
+
+    // ----------------------------------------------------------------------
+    // 6. Observer backend env switch probe
+    // ----------------------------------------------------------------------
+    const originalBackendEnv = process.env.SKILL_LEARNING_OBSERVER_BACKEND
+    try {
+      process.env.SKILL_LEARNING_OBSERVER_BACKEND = 'llm'
+      skillLearning.resolveDefaultObserverBackend()
+      const active = skillLearning.getActiveObserverBackend().name
+      record('env switch llm activates', active === 'llm', `active backend=${active}`)
+
+      process.env.SKILL_LEARNING_OBSERVER_BACKEND = 'unknown-typo'
+      skillLearning.resolveDefaultObserverBackend()
+      const stillActive = skillLearning.getActiveObserverBackend().name
+      record('typo env does not crash', stillActive === 'llm', `active after typo=${stillActive}`)
+
+      process.env.SKILL_LEARNING_OBSERVER_BACKEND = 'heuristic'
+      skillLearning.resolveDefaultObserverBackend()
+      record('env switch back to heuristic', skillLearning.getActiveObserverBackend().name === 'heuristic', `active=${skillLearning.getActiveObserverBackend().name}`)
+    } finally {
+      if (originalBackendEnv === undefined) delete process.env.SKILL_LEARNING_OBSERVER_BACKEND
+      else process.env.SKILL_LEARNING_OBSERVER_BACKEND = originalBackendEnv
+    }
+
+    // ----------------------------------------------------------------------
+    // 7. Gap state machine walk-through
+    // ----------------------------------------------------------------------
+    const prompt = 'auto-generate e2e verify script skeleton'
+    const firstGap = await skillLearning.recordSkillGap({
+      prompt,
+      cwd: projectA,
+      sessionId: 'e2e-a',
+      project: projectAContext,
+      rootDir: storage,
+    })
+    record('first gap is pending (no draft)', firstGap.status === 'pending' && !firstGap.draft, `status=${firstGap.status} draft=${!!firstGap.draft}`)
+
+    const secondGap = await skillLearning.recordSkillGap({
+      prompt,
+      cwd: projectA,
+      sessionId: 'e2e-a',
+      project: projectAContext,
+      rootDir: storage,
+    })
+    record('second occurrence promotes to draft', secondGap.status === 'draft' && !!secondGap.draft, `status=${secondGap.status} draftPath=${secondGap.draft?.skillPath ?? 'n/a'}`)
+
+    // ----------------------------------------------------------------------
+    // 8. Tool event observer wrapper invocation
+    // ----------------------------------------------------------------------
+    let wrappedRan = false
+    const wrappedResult = await skillLearning.runToolCallWithSkillLearningHooks(
+      'VerifyProbeTool',
+      { sample: 'input' },
+      { sessionId: skillLearning.RUNTIME_SESSION_ID, turn: 1 },
+      async () => {
+        wrappedRan = true
+        return { data: { ok: true, payload: 42 } }
+      },
+    )
+    record(
+      'runToolCallWithSkillLearningHooks invokes inner fn',
+      wrappedRan && (wrappedResult as { data?: { ok?: boolean } })?.data?.ok === true,
+      `inner ran=${wrappedRan} result=${JSON.stringify(wrappedResult)}`,
+    )
+
+    // Observations produced by the wrapper are written under the project
+    // context derived from process.cwd() (the test runner repo, not our
+    // ephemeral projectA). Read from BOTH project scopes to catch either.
+    const repoProject = projectCtx.resolveProjectContext(process.cwd())
+    const [obsInProjectA, obsInRepo] = await Promise.all([
+      skillLearning.readObservations({ project: projectAContext }),
+      skillLearning.readObservations({ project: repoProject }),
+    ])
+    const toolHookRecords = [...obsInProjectA, ...obsInRepo].filter(
+      o => o.source === 'tool-hook' && o.toolName === 'VerifyProbeTool',
+    )
+    record(
+      'wrapper writes tool-hook observations',
+      toolHookRecords.length > 0,
+      `${toolHookRecords.length} tool-hook records on disk (projectA=${obsInProjectA.length} repo=${obsInRepo.length})`,
+    )
+  } catch (error) {
+    record('uncaught exception', false, String(error))
+  } finally {
+    // Assert clean-room isolation held for the whole probe.
+    record(
+      'clean-room isolation: zero network calls',
+      networkCalls === 0,
+      `${networkCalls} network calls attempted`,
+    )
+    globalThis.fetch = originalFetch
+    rmSync(storage, { recursive: true, force: true, maxRetries: 5, retryDelay: 100 })
+    rmSync(projectA, { recursive: true, force: true, maxRetries: 5, retryDelay: 100 })
+    rmSync(projectB, { recursive: true, force: true, maxRetries: 5, retryDelay: 100 })
+  }
+
+  const passed = results.filter(r => r.ok).length
+  const failed = results.filter(r => !r.ok).length
+  console.log(`\n=== SUMMARY ===\n${passed} pass, ${failed} fail, ${results.length} total`)
+  process.exit(failed > 0 ? 1 : 0)
+}
+
+void main()