feat: 添加 skill learning 技能学习闭环系统

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-06-18 06:15:51 +00:00 · 2026-04-22 22:38:09 +08:00
parent 04c7ed4250
commit 1837df5f88
64 changed files with 11009 additions and 36 deletions
--- a/src/services/skillLearning/runtimeObserver.ts
+++ b/src/services/skillLearning/runtimeObserver.ts
@@ -0,0 +1,386 @@
+import type { REPLHookContext } from '../../utils/hooks/postSamplingHooks.js'
+import { registerPostSamplingHook } from '../../utils/hooks/postSamplingHooks.js'
+import { getSkillLearningConfig } from './config.js'
+import { isSkillLearningEnabled } from './featureCheck.js'
+import {
+  appendObservation,
+  getSkillLearningRoot,
+  purgeOldObservations,
+  stringifyField,
+} from './observationStore.js'
+import { resolveProjectContext } from './projectContext.js'
+import './sessionObserver.js'
+import { createInstinct } from './instinctParser.js'
+import {
+  analyzeWithActiveBackend,
+  resolveDefaultObserverBackend,
+} from './observerBackend.js'
+import {
+  decayInstinctConfidence,
+  loadInstincts,
+  prunePendingInstincts,
+  upsertInstinct,
+} from './instinctStore.js'
+import type { StoredSkillObservation } from './observationStore.js'
+import type { Message } from '../../types/message.js'
+import {
+  applySkillLifecycleDecision,
+  compareExistingArtifacts,
+  decideSkillLifecycle,
+} from './skillLifecycle.js'
+import {
+  generateAgentCandidates,
+  generateCommandCandidates,
+  clusterInstincts,
+} from './evolution.js'
+import { generateOrMergeSkillDraft } from './skillGenerator.js'
+import { shouldGenerateSkillFromInstincts } from './learningPolicy.js'
+import { writeLearnedCommand } from './commandGenerator.js'
+import { writeLearnedAgent } from './agentGenerator.js'
+import { readObservations } from './observationStore.js'
+import { checkPromotion } from './promotion.js'
+import { existsSync } from 'node:fs'
+import { join } from 'node:path'
+import { getClaudeConfigHomeDir } from '../../utils/envUtils.js'
+
+export const RUNTIME_SESSION_ID = 'runtime-session'
+
+let initialized = false
+let runtimeTurn = 0
+// Timestamp watermark for consumed tool-hook observations — enables replay of
+// only the records that arrived since the previous post-sampling pass.
+let lastConsumedToolHookTimestamp = ''
+
+// --- H5: LLM call throttle ---
+let llmCallsThisSession = 0
+let lastLlmCallTimestamp = 0
+
+// --- H6: message watermark dedup ---
+// Key: `${sessionId}:${messageId}` — prevents reprocessing the same message
+// across repeated post-sampling calls in one REPL session.
+const lastProcessedMessageIds = new Set<string>()
+const MAX_PROCESSED_IDS = 1000
+const TRIM_PROCESSED_IDS_TO = 500
+
+export function resetRuntimeLLMBookkeeping(): void {
+  llmCallsThisSession = 0
+  lastLlmCallTimestamp = 0
+  lastProcessedMessageIds.clear()
+}
+
+export function getRuntimeTurn(): number {
+  return runtimeTurn
+}
+
+export function initSkillLearning(): void {
+  if (initialized) return
+  initialized = true
+  // Resolve the active observer backend from SKILL_LEARNING_OBSERVER_BACKEND
+  // env. Without this call the registry stays on whichever backend was
+  // registered first (heuristic) — which means the env switch would silently
+  // be a no-op in production. Swallow registry errors so a typo in the env
+  // variable can never crash startup.
+  try {
+    resolveDefaultObserverBackend()
+  } catch {
+    // No backend registered yet, or env points at unknown name — leave the
+    // registry in its existing state.
+  }
+  registerPostSamplingHook(runSkillLearningPostSampling)
+  // Fire-and-forget startup maintenance: ECC parity for confidence decay,
+  // observation purge, pending instinct prune. Errors are swallowed so that
+  // skill-learning maintenance never blocks CLI startup.
+  void runStartupMaintenance().catch(() => {})
+}
+
+async function runStartupMaintenance(): Promise<void> {
+  if (!isSkillLearningEnabled()) return
+  if (process.env.CLAUDE_SKILL_LEARNING_DISABLE) return
+  const project = resolveProjectContext(process.cwd())
+  const options = { project }
+  await Promise.allSettled([
+    decayInstinctConfidence(options),
+    purgeOldObservations(options),
+    prunePendingInstincts(30, options),
+  ])
+}
+
+function isInsideSkillLearningStorage(cwd: string): boolean {
+  try {
+    const root = getSkillLearningRoot()
+    return cwd.startsWith(root)
+  } catch {
+    return false
+  }
+}
+
+export async function runSkillLearningPostSampling(
+  context: REPLHookContext,
+): Promise<void> {
+  if (!isSkillLearningEnabled()) return
+  // Self-filter layers in order: env escape hatch, entrypoint (only main REPL
+  // thread — `startsWith` covers 'repl_main_thread:outputStyle:<name>'), sub-
+  // agent skip, and a path guard that prevents feedback loops when the user
+  // hand-edits files inside the skill-learning storage directory itself.
+  if (process.env.CLAUDE_SKILL_LEARNING_DISABLE) return
+  if (!context.querySource?.startsWith('repl_main_thread')) return
+  if (context.toolUseContext.agentId) return
+  const cwd = process.cwd()
+  if (isInsideSkillLearningStorage(cwd)) return
+
+  const project = resolveProjectContext(cwd)
+  const options = { project }
+  ++runtimeTurn
+
+  const observations: StoredSkillObservation[] = []
+
+  // Always reconstruct from the REPL message stream — it is the only source
+  // that captures user prompts and assistant outcomes (tool-hook observations
+  // cover tool events only).
+  for (const observation of observationsFromMessages(
+    context.messages,
+    project,
+  )) {
+    observations.push(await appendObservation(observation, options))
+  }
+
+  // Additionally pull tool-hook observations that arrived since the last
+  // consumption watermark — deterministic records with precise outcomes.
+  const all = await readObservations(options)
+  const fresh = all.filter(
+    o =>
+      o.source === 'tool-hook' &&
+      o.sessionId === RUNTIME_SESSION_ID &&
+      typeof o.timestamp === 'string' &&
+      o.timestamp > lastConsumedToolHookTimestamp,
+  )
+  observations.push(...fresh)
+  for (const o of fresh) {
+    if (o.timestamp > lastConsumedToolHookTimestamp) {
+      lastConsumedToolHookTimestamp = o.timestamp
+    }
+  }
+
+  if (observations.length === 0) return
+
+  // H5: throttle LLM calls — minimum observation count, per-session cap, and
+  // debounce interval. When any gate fires, fall back to heuristic directly.
+  const now = Date.now()
+  const minObservations = 5
+  const { llm } = getSkillLearningConfig()
+  const shouldCallLLM =
+    observations.length >= minObservations &&
+    llmCallsThisSession < llm.maxCallsPerSession &&
+    now - lastLlmCallTimestamp >= llm.cooldownMs
+
+  let candidates
+  if (shouldCallLLM) {
+    llmCallsThisSession++
+    lastLlmCallTimestamp = now
+    candidates = await analyzeWithActiveBackend(observations, { project })
+  } else {
+    // Fall back to the heuristic backend without consuming an LLM call.
+    const { heuristicObserverBackend } = await import('./sessionObserver.js')
+    const result = heuristicObserverBackend.analyze(observations, { project })
+    candidates = Array.isArray(result) ? result : await result
+  }
+
+  for (const candidate of candidates) {
+    await upsertInstinct(createInstinct(candidate), options)
+  }
+
+  await autoEvolveLearnedSkills(options)
+}
+
+export function resetRuntimeObserverForTest(): void {
+  runtimeTurn = 0
+  lastConsumedToolHookTimestamp = ''
+  resetRuntimeLLMBookkeeping()
+}
+
+async function autoEvolveLearnedSkills(options: {
+  project: ReturnType<typeof resolveProjectContext>
+}): Promise<void> {
+  const instincts = await loadInstincts(options)
+  const cwd = process.cwd()
+
+  const skillRoots = [
+    join(cwd, '.claude', 'skills'),
+    join(getClaudeConfigHomeDir(), 'skills'),
+  ]
+  const skillClusters = clusterInstincts(instincts).filter(
+    candidate =>
+      candidate.target === 'skill' &&
+      shouldGenerateSkillFromInstincts(candidate.instincts),
+  )
+  for (const cluster of skillClusters) {
+    const outcome = await generateOrMergeSkillDraft(
+      cluster.instincts,
+      { cwd, scope: cluster.instincts[0]?.scope ?? 'project' },
+      skillRoots,
+    )
+    if (outcome.action === 'append-evidence') continue
+    const draft = outcome.draft
+    if (existsSync(join(draft.outputPath, 'SKILL.md'))) continue
+    const existing = await compareExistingArtifacts('skill', draft, skillRoots)
+    const decision = decideSkillLifecycle(draft, existing)
+    await applySkillLifecycleDecision(decision)
+  }
+
+  const commandDrafts = generateCommandCandidates(instincts, { cwd })
+  for (const draft of commandDrafts) {
+    const roots = [
+      join(cwd, '.claude', 'commands'),
+      join(getClaudeConfigHomeDir(), 'commands'),
+    ]
+    const existing = await compareExistingArtifacts('command', draft, roots)
+    if (existing.length > 0) continue
+    await writeLearnedCommand(draft)
+  }
+
+  const agentDrafts = generateAgentCandidates(instincts, { cwd })
+  for (const draft of agentDrafts) {
+    const roots = [
+      join(cwd, '.claude', 'agents'),
+      join(getClaudeConfigHomeDir(), 'agents'),
+    ]
+    const existing = await compareExistingArtifacts('agent', draft, roots)
+    if (existing.length > 0) continue
+    await writeLearnedAgent(draft)
+  }
+
+  await checkPromotion()
+}
+
+function observationsFromMessages(
+  messages: Message[],
+  project: ReturnType<typeof resolveProjectContext>,
+): StoredSkillObservation[] {
+  const sessionId = RUNTIME_SESSION_ID
+  const base = {
+    sessionId,
+    projectId: project.projectId,
+    projectName: project.projectName,
+    cwd: project.cwd,
+    timestamp: new Date().toISOString(),
+    source: 'hook' as const,
+  }
+
+  return messages.flatMap((message): StoredSkillObservation[] => {
+    // H6: watermark dedup — skip messages already processed in this session.
+    const msgKey = `${sessionId}:${String(message.uuid)}`
+    if (lastProcessedMessageIds.has(msgKey)) return []
+    lastProcessedMessageIds.add(msgKey)
+    // FIFO truncation to keep the set bounded. Drop down to exactly
+    // TRIM_PROCESSED_IDS_TO entries (off-by-one fix: previously left size+1
+    // because the subtraction didn't account for the just-added entry).
+    if (lastProcessedMessageIds.size > MAX_PROCESSED_IDS) {
+      const toDrop = lastProcessedMessageIds.size - TRIM_PROCESSED_IDS_TO
+      const iter = lastProcessedMessageIds.values()
+      for (let i = 0; i < toDrop; i++) {
+        const next = iter.next()
+        if (next.done) break
+        lastProcessedMessageIds.delete(next.value)
+      }
+    }
+
+    if (message.type === 'user') {
+      const toolResults = toolResultsFromContent(message.message?.content)
+      if (toolResults.length > 0) {
+        return toolResults.map(result => ({
+          ...base,
+          id: crypto.randomUUID(),
+          event: 'tool_complete',
+          toolName: result.toolName,
+          toolOutput: result.output,
+          outcome: result.isError ? 'failure' : 'success',
+        }))
+      }
+      const text = textFromContent(message.message?.content)
+      return text.trim()
+        ? [
+            {
+              ...base,
+              id: crypto.randomUUID(),
+              event: 'user_message',
+              messageText: text,
+            },
+          ]
+        : []
+    }
+
+    if (message.type === 'assistant') {
+      const toolUses = toolUsesFromContent(message.message?.content)
+      const text = textFromContent(message.message?.content)
+      return [
+        ...toolUses.map(toolUse => ({
+          ...base,
+          id: crypto.randomUUID(),
+          event: 'tool_start' as const,
+          toolName: toolUse.toolName,
+          toolInput: toolUse.input,
+        })),
+        ...(text.trim()
+          ? [
+              {
+                ...base,
+                id: crypto.randomUUID(),
+                event: 'assistant_message' as const,
+                messageText: text,
+              },
+            ]
+          : []),
+      ]
+    }
+
+    return []
+  })
+}
+
+function textFromContent(content: unknown): string {
+  if (typeof content === 'string') return content
+  if (!Array.isArray(content)) return ''
+  return content
+    .map(block => {
+      if (!block || typeof block !== 'object') return ''
+      const record = block as Record<string, unknown>
+      return typeof record.text === 'string' ? record.text : ''
+    })
+    .filter(Boolean)
+    .join('\n')
+}
+
+function toolUsesFromContent(
+  content: unknown,
+): Array<{ toolName: string; input?: string }> {
+  if (!Array.isArray(content)) return []
+  return content.flatMap(block => {
+    if (!block || typeof block !== 'object') return []
+    const record = block as Record<string, unknown>
+    if (record.type !== 'tool_use') return []
+    return [
+      {
+        toolName: String(record.name ?? 'unknown_tool'),
+        input: stringifyField(record.input),
+      },
+    ]
+  })
+}
+
+function toolResultsFromContent(
+  content: unknown,
+): Array<{ toolName: string; output?: string; isError: boolean }> {
+  if (!Array.isArray(content)) return []
+  return content.flatMap(block => {
+    if (!block || typeof block !== 'object') return []
+    const record = block as Record<string, unknown>
+    if (record.type !== 'tool_result') return []
+    return [
+      {
+        toolName: String(record.name ?? record.tool_name ?? 'unknown_tool'),
+        output: stringifyField(record.content),
+        isError: record.is_error === true,
+      },
+    ]
+  })
+}