feat: 整合功能恢复与技能学习闭环(含 ECC v2.1 parity + Opus 4.7 接入 + prompt 工程优化)

主要变更:
- Skill Learning 闭环系统 (9/9 AC)
- Opus 4.7 模型层接入 + adaptive thinking
- Prompt 工程优化 (64 审计测试)
- Agent Teams 简化门控 (默认启用)
- Windows Terminal 后端修复 (EncodedCommand/WT_SESSION)
- TF-IDF 技能搜索精准化 (字段加权/CJK 优化)
- Autonomy 系统 (/autonomy 命令)
- ACP 协议完整实现
- mock.module 泄漏修复 (CI 全绿)
- 152+ lint/type 修复
This commit is contained in:
unraid
2026-04-22 16:07:42 +08:00
parent 711927f01b
commit 95fece4b51
316 changed files with 39611 additions and 14298 deletions

View File

@@ -0,0 +1,406 @@
/**
* End-to-end verification probe for the skill-learning pipeline.
*
* Exercises the real public API (not mocks, not unit test harness) so we
* can confirm each pipeline stage actually produces the expected on-disk
* artefacts under a clean CLAUDE_SKILL_LEARNING_HOME.
*
* Run with:
* bun run scripts/verify-skill-learning-e2e.ts
*
* Sections:
* 1. Fake transcript -> ingest -> observations on disk
* 2. Heuristic observer -> instinct candidates -> persisted instincts
* 3. Evolution -> skill / command / agent candidates
* 4. Write learned skill -> verify skill file exists
* 5. Cross-project promotion -> global instinct written
* 6. Observer backend env switch probe
* 7. Gap state machine walk-through
* 8. Tool event observer wrapper invocation
*/
import { mkdtempSync, writeFileSync, existsSync, rmSync, readdirSync } from 'node:fs'
import { readFile } from 'node:fs/promises'
import { tmpdir } from 'node:os'
import { join } from 'node:path'
import { execSync } from 'node:child_process'
type Result = { step: string; ok: boolean; detail: string }
const results: Result[] = []
function record(step: string, ok: boolean, detail: string): void {
results.push({ step, ok, detail })
const tag = ok ? 'PASS' : 'FAIL'
console.log(`[${tag}] ${step}${detail}`)
}
async function main(): Promise<void> {
const storage = mkdtempSync(join(tmpdir(), 'skill-learning-e2e-'))
const projectA = mkdtempSync(join(tmpdir(), 'project-a-'))
const projectB = mkdtempSync(join(tmpdir(), 'project-b-'))
// Real git repos so resolveProjectContext derives distinct project IDs
// (the default `global` fallback for non-git dirs would make A and B
// share the same storage and defeat the cross-project probe).
execSync(`git init -q "${projectA}"`, { stdio: 'ignore' })
execSync(
`git -C "${projectA}" remote add origin https://example.test/project-a.git`,
{ stdio: 'ignore' },
)
execSync(`git init -q "${projectB}"`, { stdio: 'ignore' })
execSync(
`git -C "${projectB}" remote add origin https://example.test/project-b.git`,
{ stdio: 'ignore' },
)
// === ECC / plugin isolation ===
// The probe must exercise only the project's own skill-learning code, not
// the user-level ECC plugin, auto-loaded ECC skill, or any external LLM.
// Strip every env that could route observations or observer calls outside
// this probe's temp storage.
for (const key of [
'ANTHROPIC_API_KEY',
'ANTHROPIC_AUTH_TOKEN',
'OPENAI_API_KEY',
'GEMINI_API_KEY',
'GROK_API_KEY',
'CLAUDE_CODE_PLUGINS_DIR',
'CLAUDE_PLUGINS_DIR',
'CLAUDE_PLUGIN_MARKETPLACE',
'ECC_PLUGIN_ROOT',
'ECC_ENABLED',
]) {
delete process.env[key]
}
process.env.CLAUDE_SKILL_LEARNING_HOME = storage
process.env.SKILL_LEARNING_ENABLED = '1'
process.env.SKILL_SEARCH_ENABLED = '1'
// Force heuristic backend — no LLM round-trips allowed in clean-room probe.
process.env.SKILL_LEARNING_OBSERVER_BACKEND = 'heuristic'
process.env.CLAUDE_SKILL_LEARNING_DISABLE = ''
// Instrument global fetch so any stray network call from the skill-learning
// path (unexpected LLM fallback, plugin webhook, etc.) aborts the probe
// with a visible error rather than hiding behind a try/catch.
const originalFetch = globalThis.fetch
let networkCalls = 0
globalThis.fetch = ((...args: unknown[]) => {
networkCalls += 1
throw new Error(
`clean-room probe must not make network calls, attempted: ${String(args[0])}`,
)
}) as typeof globalThis.fetch
console.log(`storage=${storage}`)
console.log(`ecc-isolation: API_KEY env vars cleared, fetch stubbed, observer=heuristic`)
try {
const skillLearning = await import('../src/services/skillLearning/index.js')
const projectCtx = await import('../src/services/skillLearning/projectContext.js')
// ----------------------------------------------------------------------
// 1. Ingest a synthetic transcript and verify observations land on disk
// ----------------------------------------------------------------------
const transcriptPath = join(storage, 'session.jsonl')
const transcriptLines = [
{ type: 'user', sessionId: 's-e2e', cwd: projectA, message: { role: 'user', content: '请重构 loader.ts 的错误处理' } },
{ type: 'assistant', sessionId: 's-e2e', cwd: projectA, message: { role: 'assistant', content: [{ type: 'tool_use', name: 'Grep', input: { pattern: 'throw new Error', path: 'src' } }] } },
{ type: 'user', sessionId: 's-e2e', cwd: projectA, message: { role: 'user', content: [{ type: 'tool_result', name: 'Grep', content: 'src/loader.ts:42', is_error: false }] } },
{ type: 'assistant', sessionId: 's-e2e', cwd: projectA, message: { role: 'assistant', content: [{ type: 'tool_use', name: 'Read', input: { file_path: 'src/loader.ts' } }] } },
{ type: 'user', sessionId: 's-e2e', cwd: projectA, message: { role: 'user', content: [{ type: 'tool_result', name: 'Read', content: 'export function load() { ... }', is_error: false }] } },
{ type: 'assistant', sessionId: 's-e2e', cwd: projectA, message: { role: 'assistant', content: [{ type: 'tool_use', name: 'Edit', input: { file_path: 'src/loader.ts', old_string: 'throw new Error', new_string: 'throw new LoaderError' } }] } },
{ type: 'user', sessionId: 's-e2e', cwd: projectA, message: { role: 'user', content: [{ type: 'tool_result', name: 'Edit', content: 'diff', is_error: false }] } },
{ type: 'user', sessionId: 's-e2e', cwd: projectA, message: { role: 'user', content: '不要直接 mock用 testing-library' } },
{ type: 'user', sessionId: 's-e2e', cwd: projectA, message: { role: 'user', content: '必须用 testing-library 不要 mock' } },
]
writeFileSync(transcriptPath, transcriptLines.map(JSON.stringify).join('\n'))
const projectAContext = projectCtx.resolveProjectContext(projectA)
const observations = await skillLearning.ingestTranscript(transcriptPath, { project: projectAContext })
record(
'ingest transcript',
observations.length > 0,
`${observations.length} observations written under project ${projectAContext.projectId}`,
)
const reread = await skillLearning.readObservations({ project: projectAContext })
record(
'observations persist on disk',
reread.length === observations.length,
`disk has ${reread.length} observations (expected ${observations.length})`,
)
// ----------------------------------------------------------------------
// 2. Heuristic observer -> instinct candidates -> store
// ----------------------------------------------------------------------
skillLearning.setActiveObserverBackend('heuristic')
const candidates = await skillLearning.analyzeWithActiveBackend(observations, { project: projectAContext })
record(
'heuristic backend produces candidates',
candidates.length > 0,
`${candidates.length} candidates; first trigger=${candidates[0]?.trigger ?? '?'}`,
)
for (const c of candidates) {
await skillLearning.upsertInstinct(skillLearning.createInstinct(c), { project: projectAContext })
}
const persistedInstincts = await skillLearning.loadInstincts({ project: projectAContext })
record(
'instincts persisted',
persistedInstincts.length > 0,
`${persistedInstincts.length} instincts on disk for project A`,
)
// Contradiction probe — push a contradicting instinct to verify conflict-hold
const first = persistedInstincts[0]
if (first) {
const contradictor = skillLearning.createInstinct({
trigger: first.trigger,
action: first.action.includes('avoid')
? first.action.replace('avoid', 'prefer')
: first.action.replace(/^/, 'avoid '),
confidence: 0.5,
domain: first.domain,
source: 'session-observation',
scope: first.scope,
projectId: projectAContext.projectId,
projectName: projectAContext.projectName,
evidence: ['contradiction probe'],
observationIds: [],
})
await skillLearning.upsertInstinct(contradictor, { project: projectAContext })
const after = await skillLearning.loadInstincts({ project: projectAContext })
const merged = after.find(i => i.id === first.id) ?? after.find(i => i.trigger === first.trigger)
record(
'contradiction lowers confidence',
!!merged && merged.confidence < first.confidence,
`before=${first.confidence.toFixed(2)} after=${merged?.confidence.toFixed(2) ?? 'n/a'}`,
)
}
// ----------------------------------------------------------------------
// 3. Evolution candidates
//
// clusterInstincts requires EITHER 2+ instincts in the same
// (domain, normalized-trigger) bucket OR a single instinct with
// confidence >= 0.8. Inject a high-confidence skill instinct + a
// 4-instinct agent cluster + a "command"-flavoured instinct so each
// of the three evolution paths actually has candidates to emit.
// ----------------------------------------------------------------------
const highConfidenceSkill = skillLearning.createInstinct({
trigger: 'When editing TypeScript error handling',
action: 'prefer throwing domain-specific Error subclasses',
confidence: 0.9,
domain: 'code-style',
source: 'session-observation',
scope: 'project',
projectId: projectAContext.projectId,
projectName: projectAContext.projectName,
evidence: ['observed 2x in session'],
observationIds: [],
})
await skillLearning.upsertInstinct(highConfidenceSkill, { project: projectAContext })
const commandSeed = skillLearning.createInstinct({
trigger: 'User asks to run the full test suite',
action: 'run bun test after every multi-file edit',
confidence: 0.9,
domain: 'workflow',
source: 'session-observation',
scope: 'project',
projectId: projectAContext.projectId,
projectName: projectAContext.projectName,
evidence: ['user explicitly requested bun test'],
observationIds: [],
})
await skillLearning.upsertInstinct(commandSeed, { project: projectAContext })
for (let i = 0; i < 4; i += 1) {
const agentSeed = skillLearning.createInstinct({
trigger: 'When debugging multi-step investigate flow',
action: `step ${i + 1}: research root cause and verify`,
confidence: 0.85,
domain: 'debugging',
source: 'session-observation',
scope: 'project',
projectId: projectAContext.projectId,
projectName: projectAContext.projectName,
evidence: [`debug step ${i + 1}`],
observationIds: [],
})
await skillLearning.upsertInstinct(agentSeed, { project: projectAContext })
}
const allInstincts = await skillLearning.loadInstincts({ project: projectAContext })
const skillCandidates = skillLearning.generateSkillCandidates(allInstincts, { cwd: projectA })
const commandCandidates = skillLearning.generateCommandCandidates(allInstincts, { cwd: projectA })
const agentCandidates = skillLearning.generateAgentCandidates(allInstincts, { cwd: projectA })
record(
'evolution skill path emits candidate (single high-conf instinct)',
skillCandidates.length >= 1,
`skillCandidates=${skillCandidates.length}`,
)
record(
'evolution command path emits candidate (trigger matches user-asks heuristic)',
commandCandidates.length >= 1,
`commandCandidates=${commandCandidates.length}`,
)
record(
'evolution agent path emits candidate (4+ debugging instincts)',
agentCandidates.length >= 1,
`agentCandidates=${agentCandidates.length}`,
)
// ----------------------------------------------------------------------
// 4. Write learned skill + verify file on disk
// ----------------------------------------------------------------------
const firstDraft = skillCandidates[0]
if (firstDraft) {
const activePath = await skillLearning.writeLearnedSkill(firstDraft)
// writeLearnedSkill returns the full SKILL.md path (not the directory).
const exists = existsSync(activePath)
record(
'writeLearnedSkill produces SKILL.md',
exists,
`path=${activePath} exists=${exists}`,
)
} else {
record('writeLearnedSkill produces SKILL.md', false, 'no skill candidate to write')
}
// ----------------------------------------------------------------------
// 5. Cross-project promotion
// ----------------------------------------------------------------------
const projectBContext = projectCtx.resolveProjectContext(projectB)
// Duplicate one high-confidence instinct into project B so promotion threshold
// (>= 2 projects, avg conf >= 0.8) is met. We seeded a 0.9-confidence skill
// instinct above, so this lookup succeeds deterministically.
const pickable = allInstincts.find(i => i.confidence >= 0.8)
if (pickable) {
const projectBCopy = { ...pickable, projectId: projectBContext.projectId, projectName: projectBContext.projectName }
await skillLearning.saveInstinct(projectBCopy, { project: projectBContext, scope: 'project' })
// findPromotionCandidates groups by instinct id + distinct projectId
// count; give it the real merged array seen across both project stores.
const fromA = await skillLearning.loadInstincts({ project: projectAContext })
const fromB = await skillLearning.loadInstincts({ project: projectBContext })
const candidatesPre = skillLearning.findPromotionCandidates([
...fromA,
...fromB,
])
record(
'cross-project candidate visible',
candidatesPre.length > 0,
`${candidatesPre.length} promotable instincts across projects (A=${fromA.length} B=${fromB.length})`,
)
await skillLearning.checkPromotion({ project: projectAContext })
const globalRoot = { scope: 'global' as const, rootDir: storage }
const globalInstincts = await skillLearning.loadInstincts(globalRoot)
record(
'checkPromotion writes global instinct',
globalInstincts.some(i => i.id === pickable.id),
`global scope has ${globalInstincts.length} instincts; target id ${pickable.id} present=${globalInstincts.some(i => i.id === pickable.id)}`,
)
} else {
record('cross-project promotion', false, 'no instinct with confidence >= 0.8 to promote')
}
// ----------------------------------------------------------------------
// 6. Observer backend env switch probe
// ----------------------------------------------------------------------
const originalBackendEnv = process.env.SKILL_LEARNING_OBSERVER_BACKEND
try {
process.env.SKILL_LEARNING_OBSERVER_BACKEND = 'llm'
skillLearning.resolveDefaultObserverBackend()
const active = skillLearning.getActiveObserverBackend().name
record('env switch llm activates', active === 'llm', `active backend=${active}`)
process.env.SKILL_LEARNING_OBSERVER_BACKEND = 'unknown-typo'
skillLearning.resolveDefaultObserverBackend()
const stillActive = skillLearning.getActiveObserverBackend().name
record('typo env does not crash', stillActive === 'llm', `active after typo=${stillActive}`)
process.env.SKILL_LEARNING_OBSERVER_BACKEND = 'heuristic'
skillLearning.resolveDefaultObserverBackend()
record('env switch back to heuristic', skillLearning.getActiveObserverBackend().name === 'heuristic', `active=${skillLearning.getActiveObserverBackend().name}`)
} finally {
if (originalBackendEnv === undefined) delete process.env.SKILL_LEARNING_OBSERVER_BACKEND
else process.env.SKILL_LEARNING_OBSERVER_BACKEND = originalBackendEnv
}
// ----------------------------------------------------------------------
// 7. Gap state machine walk-through
// ----------------------------------------------------------------------
const prompt = 'auto-generate e2e verify script skeleton'
const firstGap = await skillLearning.recordSkillGap({
prompt,
cwd: projectA,
sessionId: 'e2e-a',
project: projectAContext,
rootDir: storage,
})
record('first gap is pending (no draft)', firstGap.status === 'pending' && !firstGap.draft, `status=${firstGap.status} draft=${!!firstGap.draft}`)
const secondGap = await skillLearning.recordSkillGap({
prompt,
cwd: projectA,
sessionId: 'e2e-a',
project: projectAContext,
rootDir: storage,
})
record('second occurrence promotes to draft', secondGap.status === 'draft' && !!secondGap.draft, `status=${secondGap.status} draftPath=${secondGap.draft?.skillPath ?? 'n/a'}`)
// ----------------------------------------------------------------------
// 8. Tool event observer wrapper invocation
// ----------------------------------------------------------------------
let wrappedRan = false
const wrappedResult = await skillLearning.runToolCallWithSkillLearningHooks(
'VerifyProbeTool',
{ sample: 'input' },
{ sessionId: skillLearning.RUNTIME_SESSION_ID, turn: 1 },
async () => {
wrappedRan = true
return { data: { ok: true, payload: 42 } }
},
)
record(
'runToolCallWithSkillLearningHooks invokes inner fn',
wrappedRan && (wrappedResult as { data?: { ok?: boolean } })?.data?.ok === true,
`inner ran=${wrappedRan} result=${JSON.stringify(wrappedResult)}`,
)
// Observations produced by the wrapper are written under the project
// context derived from process.cwd() (the test runner repo, not our
// ephemeral projectA). Read from BOTH project scopes to catch either.
const repoProject = projectCtx.resolveProjectContext(process.cwd())
const [obsInProjectA, obsInRepo] = await Promise.all([
skillLearning.readObservations({ project: projectAContext }),
skillLearning.readObservations({ project: repoProject }),
])
const toolHookRecords = [...obsInProjectA, ...obsInRepo].filter(
o => o.source === 'tool-hook' && o.toolName === 'VerifyProbeTool',
)
record(
'wrapper writes tool-hook observations',
toolHookRecords.length > 0,
`${toolHookRecords.length} tool-hook records on disk (projectA=${obsInProjectA.length} repo=${obsInRepo.length})`,
)
} catch (error) {
record('uncaught exception', false, String(error))
} finally {
// Assert clean-room isolation held for the whole probe.
record(
'clean-room isolation: zero network calls',
networkCalls === 0,
`${networkCalls} network calls attempted`,
)
globalThis.fetch = originalFetch
rmSync(storage, { recursive: true, force: true, maxRetries: 5, retryDelay: 100 })
rmSync(projectA, { recursive: true, force: true, maxRetries: 5, retryDelay: 100 })
rmSync(projectB, { recursive: true, force: true, maxRetries: 5, retryDelay: 100 })
}
const passed = results.filter(r => r.ok).length
const failed = results.filter(r => !r.ok).length
console.log(`\n=== SUMMARY ===\n${passed} pass, ${failed} fail, ${results.length} total`)
process.exit(failed > 0 ? 1 : 0)
}
void main()