import type { Attachment } from '../../utils/attachments.js' import type { Message } from '../../types/message.js' import type { ToolUseContext } from '../../Tool.js' import type { DiscoverySignal } from './signals.js' import { isSkillSearchEnabled } from './featureCheck.js' import { getSkillIndex, searchSkills, type SearchResult, } from './localSearch.js' import { normalizeQueryIntent } from './intentNormalize.js' import { logForDebugging } from '../../utils/debug.js' import { readFile } from 'node:fs/promises' import { join } from 'node:path' import { parseFrontmatter } from '../../utils/frontmatterParser.js' const discoveredThisSession = new Set() const recordedGapSignals = new Set() const AUTO_LOAD_MIN_SCORE = Number( process.env.SKILL_SEARCH_AUTOLOAD_MIN_SCORE ?? '0.30', ) const AUTO_LOAD_LIMIT = Number(process.env.SKILL_SEARCH_AUTOLOAD_LIMIT ?? '2') const AUTO_LOAD_MAX_CHARS = Number( process.env.SKILL_SEARCH_AUTOLOAD_MAX_CHARS ?? '12000', ) export function extractQueryFromMessages( input: string | null, messages: Message[], ): string { const parts: string[] = [] if (input) parts.push(input) // Walk backward. In inter-turn prefetch the most recent 'user' message is // typically a tool_result (no text block), so we must keep walking until we // find a real user utterance with string content or a text block. for (let i = messages.length - 1; i >= 0; i--) { const msg = messages[i] as Record if (msg.type !== 'user') continue const content = msg.content if (typeof content === 'string') { parts.push(content.slice(0, 500)) break } if (Array.isArray(content)) { let foundText = false for (const block of content) { const entry = block as Record // Skip tool_result and other non-text blocks — they carry no discovery // signal and would return undefined here regardless. if (entry.type && entry.type !== 'text') continue const text = entry.text if (typeof text === 'string' && text.trim()) { parts.push(text.slice(0, 500)) foundText = true break } } if (foundText) break } } return parts.join(' ') } function buildDiscoveryAttachment( skills: SkillDiscoveryResult[], signal: DiscoverySignal, gap?: SkillDiscoveryGap, ): Attachment { return { type: 'skill_discovery', skills, signal, source: 'native', gap, } as Attachment } type SkillDiscoveryResult = { name: string description: string shortId?: string score?: number autoLoaded?: boolean content?: string path?: string } type SkillDiscoveryGap = { key: string status: 'pending' | 'draft' | 'active' draftName?: string draftPath?: string activeName?: string activePath?: string } async function enrichResultsForAutoLoad( results: SearchResult[], context: ToolUseContext, ): Promise { let loadedCount = 0 const enriched: SkillDiscoveryResult[] = [] for (const result of results) { const base: SkillDiscoveryResult = { name: result.name, description: result.description, score: result.score, } if (loadedCount >= AUTO_LOAD_LIMIT || result.score < AUTO_LOAD_MIN_SCORE) { enriched.push(base) continue } const loaded = await loadSkillContent(result) if (!loaded) { enriched.push(base) continue } loadedCount++ await markAutoLoadedSkill(result.name, loaded.path, loaded.content, context) enriched.push({ ...base, autoLoaded: true, content: loaded.content, path: loaded.path, }) } return enriched } async function loadSkillContent( result: SearchResult, ): Promise<{ path: string; content: string } | null> { if (!result.skillRoot) return null const candidates = [ join(result.skillRoot, 'SKILL.md'), join(result.skillRoot, 'skill.md'), ] for (const path of candidates) { try { const raw = await readFile(path, 'utf8') return { path, content: parseFrontmatter(raw).content.slice(0, AUTO_LOAD_MAX_CHARS), } } catch { // Try next candidate. } } return null } async function markAutoLoadedSkill( name: string, path: string, content: string, context: ToolUseContext, ): Promise { try { const { addInvokedSkill } = await import('../../bootstrap/state.js') addInvokedSkill(name, path, content, context.agentId ?? null) } catch { // Best effort only. } } async function maybeRecordSkillGap( queryText: string, results: SearchResult[], context: ToolUseContext, trigger: DiscoverySignal['trigger'], ): Promise { if (trigger !== 'user_input') return undefined if (!queryText.trim()) return undefined const gapSignalKey = `${trigger}:${queryText.trim().toLowerCase()}` if (recordedGapSignals.has(gapSignalKey)) return undefined recordedGapSignals.add(gapSignalKey) try { const [{ isSkillLearningEnabled }, { recordSkillGap }] = await Promise.all([ import('../skillLearning/featureCheck.js'), import('../skillLearning/skillGapStore.js'), ]) if (!isSkillLearningEnabled()) return undefined const gap = await recordSkillGap({ prompt: queryText, cwd: ((context as Record).cwd as string) ?? process.cwd(), sessionId: ((context as Record).sessionId as string) ?? 'unknown-session', recommendations: results, }) const status = gap.status if (status !== 'pending' && status !== 'draft' && status !== 'active') { return undefined } return { key: gap.key, status, draftName: gap.draft?.name, draftPath: gap.draft?.skillPath, activeName: gap.active?.name, activePath: gap.active?.skillPath, } } catch (error) { logForDebugging(`[skill-search] skill gap learning error: ${error}`) return undefined } } export async function startSkillDiscoveryPrefetch( input: string | null, messages: Message[], toolUseContext: ToolUseContext, ): Promise { if (!isSkillSearchEnabled()) return [] const startedAt = Date.now() const queryText = extractQueryFromMessages(input, messages) if (!queryText.trim()) return [] try { const cwd = ((toolUseContext as Record).cwd as string) ?? process.cwd() const index = await getSkillIndex(cwd) const results = searchSkills(queryText, index) const newResults = results.filter(r => !discoveredThisSession.has(r.name)) if (newResults.length === 0) return [] for (const r of newResults) discoveredThisSession.add(r.name) const signal: DiscoverySignal = { trigger: 'assistant_turn', queryText: queryText.slice(0, 200), startedAt, durationMs: Date.now() - startedAt, indexSize: index.length, method: 'tfidf', } logForDebugging( `[skill-search] prefetch found ${newResults.length} skills in ${signal.durationMs}ms`, ) return [ buildDiscoveryAttachment( await enrichResultsForAutoLoad(newResults, toolUseContext), signal, ), ] } catch (error) { logForDebugging(`[skill-search] prefetch error: ${error}`) return [] } } export async function collectSkillDiscoveryPrefetch( pending: Promise, ): Promise { try { return await pending } catch { return [] } } export async function getTurnZeroSkillDiscovery( input: string, messages: Message[], context: ToolUseContext, ): Promise { if (!isSkillSearchEnabled()) return null if (!input.trim()) return null const startedAt = Date.now() try { const cwd = ((context as Record).cwd as string) ?? process.cwd() const index = await getSkillIndex(cwd) // Intent normalization (feature-flagged, ASCII-only fast path, graceful // fallback to original). Turn-zero is the one blocking entry — acceptable // to add a Haiku call here since a bad match here pollutes the LLM's // context for the entire session. const searchQuery = await normalizeQueryIntent(input) const results = searchSkills(searchQuery, index) const enriched = await enrichResultsForAutoLoad(results, context) const gap = enriched.some(result => result.autoLoaded) ? undefined : await maybeRecordSkillGap(input, results, context, 'user_input') if (results.length === 0 && !gap) return null for (const r of results) discoveredThisSession.add(r.name) const signal: DiscoverySignal = { trigger: 'user_input', queryText: input.slice(0, 200), startedAt, durationMs: Date.now() - startedAt, indexSize: index.length, method: 'tfidf', } logForDebugging( `[skill-search] turn-zero found ${results.length} skills in ${signal.durationMs}ms`, ) return buildDiscoveryAttachment(enriched, signal, gap) } catch (error) { logForDebugging(`[skill-search] turn-zero error: ${error}`) return null } }