feat: 实现 Tool Search 基础设施层(CORE_TOOLS 白名单 + TF-IDF 索引 + ExecuteTool + 搜索增强)

- 新增 CORE_TOOLS 白名单常量(31 个核心工具),重构 isDeferredTool 为白名单制判定
- 新建 TF-IDF 工具索引模块(toolIndex.ts),复用 localSearch.ts 算法函数
- 新建 ExecuteTool 跨 API provider 统一工具执行入口
- 增强 ToolSearchTool:TF-IDF 搜索路径、discover: 模式、并行搜索合并、文本模式回退
- 新增 27 个单元测试,precheck 零错误通过(4108 tests pass)

Co-Authored-By: glm-5.1[1m] <zai-org@claude-code-best.win>
This commit is contained in:
claude-code-best
2026-05-08 22:29:15 +08:00
parent 02dd796706
commit 7be08f53bd
34 changed files with 4040 additions and 90 deletions

View File

@@ -209,7 +209,7 @@ const FIELD_WEIGHT = {
allowedTools: 0.3,
} as const
function computeWeightedTf(
export function computeWeightedTf(
fields: { tokens: string[]; weight: number }[],
): Map<string, number> {
const weighted = new Map<string, number>()
@@ -227,7 +227,7 @@ function computeWeightedTf(
return weighted
}
function computeIdf(index: SkillIndexEntry[]): Map<string, number> {
export function computeIdf(index: { tokens: string[] }[]): Map<string, number> {
const df = new Map<string, number>()
for (const entry of index) {
const seen = new Set<string>()
@@ -246,7 +246,7 @@ function computeIdf(index: SkillIndexEntry[]): Map<string, number> {
return idf
}
function cosineSimilarity(
export function cosineSimilarity(
queryTfIdf: Map<string, number>,
docTfIdf: Map<string, number>,
): number {

View File

@@ -0,0 +1,242 @@
import { describe, test, expect, beforeEach } from 'bun:test'
import { mock } from 'bun:test'
import { logMock } from '../../../../tests/mocks/log'
import { debugMock } from '../../../../tests/mocks/debug'
mock.module('src/utils/log.ts', logMock)
mock.module('src/utils/debug.ts', debugMock)
mock.module('src/services/analytics/growthbook.js', () => ({
getFeatureValue_CACHED_MAY_BE_STALE: () => false,
checkStatsigFeatureGate_CACHED_MAY_BE_STALE: () => false,
getFeatureValue_DEPRECATED: async () => undefined,
getFeatureValue_CACHED_WITH_REFRESH: async () => undefined,
hasGrowthBookEnvOverride: () => false,
getAllGrowthBookFeatures: () => ({}),
getGrowthBookConfigOverrides: () => ({}),
setGrowthBookConfigOverride: () => {},
clearGrowthBookConfigOverrides: () => {},
getApiBaseUrlHost: () => undefined,
onGrowthBookRefresh: () => {},
initializeGrowthBook: async () => {},
checkSecurityRestrictionGate: async () => false,
checkGate_CACHED_OR_BLOCKING: async () => false,
refreshGrowthBookAfterAuthChange: () => {},
resetGrowthBook: () => {},
refreshGrowthBookFeatures: async () => {},
setupPeriodicGrowthBookRefresh: () => {},
stopPeriodicGrowthBookRefresh: () => {},
getDynamicConfig_CACHED_MAY_BE_STALE: () => undefined,
getDynamicConfig_BLOCKS_ON_INIT: async () => undefined,
}))
// Mock skillSearch/prefetch.js (dependency of toolSearch/prefetch.ts)
mock.module('src/services/skillSearch/prefetch.js', () => ({
extractQueryFromMessages: (
_input: string | null,
messages: { type: string; content: unknown }[],
) => {
for (let i = messages.length - 1; i >= 0; i--) {
const msg = messages[i]!
if (msg.type !== 'user') continue
const content = msg.content
if (typeof content === 'string') return content
if (Array.isArray(content)) {
for (const block of content) {
if (
block &&
typeof block === 'object' &&
'text' in block &&
typeof (block as { text: unknown }).text === 'string'
) {
return (block as { text: string }).text
}
}
}
}
return ''
},
}))
const mockGetToolIndex = mock(() => Promise.resolve([] as never[]))
const mockSearchTools = mock(() => [] as never[])
mock.module('src/services/toolSearch/toolIndex.js', () => ({
getToolIndex: mockGetToolIndex,
searchTools: mockSearchTools,
clearToolIndexCache: () => {},
buildToolIndex: async () => [],
parseToolName: (name: string) => ({
parts: name.toLowerCase().split('_'),
full: name.toLowerCase(),
isMcp: name.startsWith('mcp__'),
}),
}))
const {
startToolSearchPrefetch,
getTurnZeroToolSearchPrefetch,
collectToolSearchPrefetch,
buildToolDiscoveryAttachment,
} = await import('../prefetch.js')
function makeMockMessages(text: string) {
return [
{
type: 'user',
content: [{ type: 'text', text }],
uuid: 'test-uuid',
},
] as never
}
describe('startToolSearchPrefetch', () => {
beforeEach(() => {
mockGetToolIndex.mockResolvedValue([
{ name: 'index-entry', tokens: ['test'], tfVector: new Map() },
] as never)
mockSearchTools.mockReturnValue([])
})
test('returns tool_discovery attachment for matching tools', async () => {
mockSearchTools.mockReturnValue([
{
name: 'CronCreateTool',
description: 'Create cron jobs',
searchHint: 'schedule recurring',
score: 0.5,
isMcp: false,
isDeferred: true,
inputSchema: undefined,
},
] as never)
const result = await startToolSearchPrefetch(
[],
makeMockMessages('schedule a cron job'),
)
expect(result).toHaveLength(1)
expect(result[0]!.type).toBe('tool_discovery')
expect((result[0] as Record<string, unknown>).trigger).toBe(
'assistant_turn',
)
expect((result[0] as Record<string, unknown>).tools).toBeDefined()
})
test('returns empty array for empty query', async () => {
const result = await startToolSearchPrefetch([], [
{ type: 'assistant', content: [] },
] as never)
expect(result).toEqual([])
})
test('returns empty array when no tools match', async () => {
mockSearchTools.mockReturnValue([])
const result = await startToolSearchPrefetch(
[],
makeMockMessages('quantum physics'),
)
expect(result).toEqual([])
})
test('returns empty array on error (exception safety)', async () => {
mockGetToolIndex.mockRejectedValue(new Error('index failed'))
const result = await startToolSearchPrefetch([], makeMockMessages('test'))
expect(result).toEqual([])
})
})
describe('getTurnZeroToolSearchPrefetch', () => {
beforeEach(() => {
mockGetToolIndex.mockResolvedValue([
{ name: 'index-entry', tokens: ['test'], tfVector: new Map() },
] as never)
mockSearchTools.mockReturnValue([])
})
test('returns non-null attachment for matching tools', async () => {
mockSearchTools.mockReturnValue([
{
name: 'CronCreateTool',
description: 'Create cron jobs',
searchHint: 'schedule recurring',
score: 0.5,
isMcp: false,
isDeferred: true,
inputSchema: undefined,
},
] as never)
const result = await getTurnZeroToolSearchPrefetch('schedule cron job', [])
expect(result).not.toBeNull()
expect(result!.type).toBe('tool_discovery')
expect((result as Record<string, unknown>).trigger).toBe('user_input')
})
test('returns null for empty input', async () => {
const result = await getTurnZeroToolSearchPrefetch('', [])
expect(result).toBeNull()
})
test('returns null when no tools match', async () => {
mockSearchTools.mockReturnValue([])
const result = await getTurnZeroToolSearchPrefetch('quantum physics', [])
expect(result).toBeNull()
})
})
describe('collectToolSearchPrefetch', () => {
test('returns resolved attachment array', async () => {
const attachment = {
type: 'tool_discovery' as const,
tools: [],
trigger: 'assistant_turn' as const,
queryText: 'test',
durationMs: 10,
indexSize: 5,
}
const result = await collectToolSearchPrefetch(
Promise.resolve([
attachment,
] as unknown as import('../../../utils/attachments.js').Attachment[]),
)
expect(result).toHaveLength(1)
expect(result[0]!.type).toBe('tool_discovery')
})
test('returns empty array on rejected promise', async () => {
const result = await collectToolSearchPrefetch(
Promise.reject(new Error('fail')),
)
expect(result).toEqual([])
})
})
describe('buildToolDiscoveryAttachment', () => {
test('returns attachment with all required fields', () => {
const tools = [
{
name: 'TestTool',
description: 'A test tool',
searchHint: 'test',
score: 0.5,
isMcp: false,
isDeferred: true,
inputSchema: undefined,
},
]
const attachment = buildToolDiscoveryAttachment(
tools,
'user_input',
'test query',
10,
5,
)
const att = attachment as Record<string, unknown>
expect(att.type).toBe('tool_discovery')
expect(att.tools).toBe(tools)
expect(att.trigger).toBe('user_input')
expect(att.queryText).toBe('test query')
expect(att.durationMs).toBe(10)
expect(att.indexSize).toBe(5)
})
})

View File

@@ -0,0 +1,33 @@
/**
* prefetch.test.ts
*
* Thin subprocess wrapper that runs the actual tests in an isolated bun:test
* process. This prevents mock.module() leaks from this file's toolIndex.js
* mock from affecting other test files (e.g., toolIndex.test.ts).
*/
import { describe, test, expect } from 'bun:test'
import { resolve, relative } from 'path'
const PROJECT_ROOT = resolve(__dirname, '..', '..', '..', '..', '..')
const RUNNER_ABS = resolve(__dirname, 'prefetch.runner.ts')
const RUNNER_REL = './' + relative(PROJECT_ROOT, RUNNER_ABS).replace(/\\/g, '/')
describe('prefetch', () => {
test('runs all prefetch tests in isolated subprocess', async () => {
const proc = Bun.spawn(['bun', 'test', RUNNER_REL], {
cwd: PROJECT_ROOT,
stdout: 'pipe',
stderr: 'pipe',
})
const code = await proc.exited
if (code !== 0) {
const stderr = await new Response(proc.stderr).text()
const stdout = await new Response(proc.stdout).text()
const output = (stderr + '\n' + stdout).slice(-3000)
throw new Error(
`prefetch test subprocess failed (exit ${code}):\n${output}`,
)
}
}, 60_000)
})

View File

@@ -0,0 +1,208 @@
import { describe, test, expect, beforeEach } from 'bun:test'
import { mock } from 'bun:test'
import { logMock } from '../../../../tests/mocks/log'
import { debugMock } from '../../../../tests/mocks/debug'
mock.module('src/utils/log.ts', logMock)
mock.module('src/utils/debug.ts', debugMock)
mock.module('src/services/analytics/growthbook.js', () => ({
getFeatureValue_CACHED_MAY_BE_STALE: () => false,
checkStatsigFeatureGate_CACHED_MAY_BE_STALE: () => false,
getFeatureValue_DEPRECATED: async () => undefined,
getFeatureValue_CACHED_WITH_REFRESH: async () => undefined,
hasGrowthBookEnvOverride: () => false,
getAllGrowthBookFeatures: () => ({}),
getGrowthBookConfigOverrides: () => ({}),
setGrowthBookConfigOverride: () => {},
clearGrowthBookConfigOverrides: () => {},
getApiBaseUrlHost: () => undefined,
onGrowthBookRefresh: () => {},
initializeGrowthBook: async () => {},
checkSecurityRestrictionGate: async () => false,
checkGate_CACHED_OR_BLOCKING: async () => false,
refreshGrowthBookAfterAuthChange: () => {},
resetGrowthBook: () => {},
refreshGrowthBookFeatures: async () => {},
setupPeriodicGrowthBookRefresh: () => {},
stopPeriodicGrowthBookRefresh: () => {},
}))
const {
parseToolName,
buildToolIndex,
searchTools,
getToolIndex,
clearToolIndexCache,
} = await import('../toolIndex.js')
type MockTool = {
name: string
alwaysLoad?: boolean
isMcp?: boolean
shouldDefer?: boolean
searchHint?: string
prompt: () => Promise<string>
inputJSONSchema?: object
inputSchema?: unknown
}
function makeMockTool(overrides: Partial<MockTool> = {}): MockTool {
return {
name: 'TestTool',
isMcp: false,
shouldDefer: undefined,
alwaysLoad: undefined,
searchHint: undefined,
prompt: async () => 'A test tool for testing purposes.',
inputJSONSchema: undefined,
inputSchema: undefined,
...overrides,
}
}
describe('parseToolName', () => {
test('parses MCP tool names', () => {
const result = parseToolName('mcp__github__create_issue')
expect(result.isMcp).toBe(true)
expect(result.parts).toEqual(['github', 'create', 'issue'])
})
test('parses built-in tool names', () => {
const result = parseToolName('NotebookEditTool')
expect(result.isMcp).toBe(false)
expect(result.parts).toEqual(['notebook', 'edit', 'tool'])
})
test('parses underscore-separated tool names', () => {
const result = parseToolName('EnterWorktreeTool')
expect(result.isMcp).toBe(false)
expect(result.parts).toContain('enter')
expect(result.parts).toContain('worktree')
})
})
describe('buildToolIndex', () => {
test('builds index from deferred tools only', async () => {
const tools = [
makeMockTool({ name: 'CoreRead', alwaysLoad: true }),
makeMockTool({
name: 'ConfigTool',
searchHint: 'configure settings options',
prompt: async () => 'Manage configuration settings.',
}),
makeMockTool({
name: 'CronCreateTool',
searchHint: 'schedule recurring prompt',
prompt: async () => 'Create cron jobs for scheduling.',
}),
] as unknown as import('../../../Tool.js').Tool[]
const index = await buildToolIndex(tools)
// Only non-core, non-alwaysLoad tools should be indexed
expect(index.length).toBe(2)
for (const entry of index) {
expect(entry.tokens.length).toBeGreaterThan(0)
expect(entry.tfVector.size).toBeGreaterThan(0)
}
})
test('returns empty array when all tools are core', async () => {
const tools = [
makeMockTool({ name: 'Read', alwaysLoad: true }),
makeMockTool({ name: 'Edit', alwaysLoad: true }),
] as unknown as import('../../../Tool.js').Tool[]
const index = await buildToolIndex(tools)
expect(index.length).toBe(0)
})
})
describe('searchTools', () => {
test('finds tools matching query', async () => {
const tools = [
makeMockTool({
name: 'CronCreateTool',
searchHint: 'schedule a recurring or one-shot prompt',
prompt: async () => 'Create cron jobs for scheduling tasks.',
}),
makeMockTool({
name: 'ConfigTool',
searchHint: 'configure settings options',
prompt: async () => 'Manage configuration settings.',
}),
] as unknown as import('../../../Tool.js').Tool[]
const index = await buildToolIndex(tools)
const results = searchTools('schedule cron job', index)
expect(results.length).toBeGreaterThan(0)
// CronCreateTool should rank highest for "schedule cron job"
expect(results[0]!.name).toBe('CronCreateTool')
expect(results[0]!.score).toBeGreaterThan(0)
})
test('returns empty array for empty query', async () => {
const tools = [
makeMockTool({
name: 'ConfigTool',
prompt: async () => 'Manage configuration.',
}),
] as unknown as import('../../../Tool.js').Tool[]
const index = await buildToolIndex(tools)
expect(searchTools('', index)).toEqual([])
})
test('returns empty array when no tools match', async () => {
const tools = [
makeMockTool({
name: 'ConfigTool',
prompt: async () => 'Manage configuration settings.',
}),
] as unknown as import('../../../Tool.js').Tool[]
const index = await buildToolIndex(tools)
const results = searchTools('quantum physics entanglement', index)
expect(results).toEqual([])
})
test('CJK tokenization produces bigrams', async () => {
// Verify CJK text is tokenized into bigrams (delegated to localSearch.tokenize)
const { tokenizeAndStem } = await import('../../skillSearch/localSearch.js')
const tokens = tokenizeAndStem('搜索代码')
expect(tokens).toContain('搜索')
expect(tokens).toContain('代码')
})
})
describe('getToolIndex caching', () => {
beforeEach(() => {
clearToolIndexCache()
})
test('returns cached index for same tool list', async () => {
const tools = [
makeMockTool({
name: 'ConfigTool',
prompt: async () => 'Manage configuration.',
}),
] as unknown as import('../../../Tool.js').Tool[]
const first = await getToolIndex(tools)
const second = await getToolIndex(tools)
expect(first).toBe(second) // Same reference = cached
})
test('rebuilds index after clearToolIndexCache', async () => {
const tools = [
makeMockTool({
name: 'ConfigTool',
prompt: async () => 'Manage configuration.',
}),
] as unknown as import('../../../Tool.js').Tool[]
const first = await getToolIndex(tools)
clearToolIndexCache()
const second = await getToolIndex(tools)
expect(first).not.toBe(second) // Different reference = rebuilt
})
})

View File

@@ -0,0 +1,184 @@
import type { Attachment } from '../../utils/attachments.js'
import type { Message } from '../../types/message.js'
import type { Tools } from '../../Tool.js'
import {
getToolIndex,
searchTools,
type ToolSearchResult,
} from './toolIndex.js'
import { logForDebugging } from '../../utils/debug.js'
import { extractQueryFromMessages } from '../skillSearch/prefetch.js'
export type ToolDiscoveryResult = {
name: string
description: string
searchHint: string | undefined
score: number
isMcp: boolean
isDeferred: boolean
inputSchema: object | undefined
}
const SESSION_TRACKING_MAX = 500
const SESSION_TRACKING_TRIM_TO = 400
const discoveredToolsThisSession = new Set<string>()
// Latest prefetch result for UI subscription (useSyncExternalStore)
let latestPrefetchResult: ToolDiscoveryResult[] = []
const prefetchListeners = new Set<() => void>()
function notifyPrefetchListeners(): void {
for (const listener of prefetchListeners) listener()
}
export function subscribeToToolSearchPrefetch(
listener: () => void,
): () => void {
prefetchListeners.add(listener)
return () => {
prefetchListeners.delete(listener)
}
}
export function getToolSearchPrefetchSnapshot(): ToolDiscoveryResult[] {
return latestPrefetchResult
}
export function clearToolSearchPrefetchResults(): void {
latestPrefetchResult = []
notifyPrefetchListeners()
}
function addBoundedSessionEntry(set: Set<string>, value: string): void {
set.add(value)
if (set.size > SESSION_TRACKING_MAX) {
const toDrop = set.size - SESSION_TRACKING_TRIM_TO
const iter = set.values()
for (let i = 0; i < toDrop; i++) {
const next = iter.next()
if (next.done) break
set.delete(next.value)
}
}
}
function toDiscoveryResult(r: ToolSearchResult): ToolDiscoveryResult {
return {
name: r.name,
description: r.description,
searchHint: r.searchHint,
score: r.score,
isMcp: r.isMcp,
isDeferred: r.isDeferred,
inputSchema: r.inputSchema,
}
}
export function buildToolDiscoveryAttachment(
tools: ToolDiscoveryResult[],
trigger: 'assistant_turn' | 'user_input',
queryText: string,
durationMs: number,
indexSize: number,
): Attachment {
return {
type: 'tool_discovery',
tools,
trigger,
queryText: queryText.slice(0, 200),
durationMs,
indexSize,
} as Attachment
}
export async function startToolSearchPrefetch(
tools: Tools,
messages: Message[],
): Promise<Attachment[]> {
const startedAt = Date.now()
const queryText = extractQueryFromMessages(null, messages)
if (!queryText.trim()) return []
try {
const index = await getToolIndex(tools)
const results = searchTools(queryText, index, 3)
const newResults = results.filter(
r => !discoveredToolsThisSession.has(r.name),
)
if (newResults.length === 0) return []
for (const r of newResults)
addBoundedSessionEntry(discoveredToolsThisSession, r.name)
const durationMs = Date.now() - startedAt
logForDebugging(
`[tool-search] prefetch found ${newResults.length} tools in ${durationMs}ms`,
)
const discoveryResults = newResults.map(toDiscoveryResult)
latestPrefetchResult = discoveryResults
notifyPrefetchListeners()
return [
buildToolDiscoveryAttachment(
discoveryResults,
'assistant_turn',
queryText,
durationMs,
index.length,
),
]
} catch (error) {
logForDebugging(`[tool-search] prefetch error: ${error}`)
return []
}
}
export async function getTurnZeroToolSearchPrefetch(
input: string,
tools: Tools,
): Promise<Attachment | null> {
if (!input.trim()) return null
const startedAt = Date.now()
try {
const index = await getToolIndex(tools)
const results = searchTools(input, index, 3)
if (results.length === 0) return null
for (const r of results)
addBoundedSessionEntry(discoveredToolsThisSession, r.name)
const durationMs = Date.now() - startedAt
logForDebugging(
`[tool-search] turn-zero found ${results.length} tools in ${durationMs}ms`,
)
const discoveryResults = results.map(toDiscoveryResult)
latestPrefetchResult = discoveryResults
notifyPrefetchListeners()
return buildToolDiscoveryAttachment(
discoveryResults,
'user_input',
input,
durationMs,
index.length,
)
} catch (error) {
logForDebugging(`[tool-search] turn-zero error: ${error}`)
return null
}
}
export async function collectToolSearchPrefetch(
pending: Promise<Attachment[]>,
): Promise<Attachment[]> {
try {
return await pending
} catch {
return []
}
}

View File

@@ -0,0 +1,233 @@
import type { Tools } from '../../Tool.js'
import { logForDebugging } from '../../utils/debug.js'
import {
tokenizeAndStem,
computeWeightedTf,
computeIdf,
cosineSimilarity,
} from '../skillSearch/localSearch.js'
import { isDeferredTool } from '@claude-code-best/builtin-tools/tools/ToolSearchTool/prompt.js'
export interface ToolIndexEntry {
name: string
normalizedName: string
description: string
searchHint: string | undefined
isMcp: boolean
isDeferred: boolean
inputSchema: object | undefined
tokens: string[]
tfVector: Map<string, number>
}
export interface ToolSearchResult {
name: string
description: string
searchHint: string | undefined
score: number
isMcp: boolean
isDeferred: boolean
inputSchema: object | undefined
}
const TOOL_FIELD_WEIGHT = {
name: 3.0,
searchHint: 2.5,
description: 1.0,
} as const
const TOOL_SEARCH_DISPLAY_MIN_SCORE = Number(
process.env.TOOL_SEARCH_DISPLAY_MIN_SCORE ?? '0.10',
)
const CJK_MIN_BIGRAM_MATCHES = 2
const CJK_RANGE = /[\u4e00-\u9fff\u3400-\u4dbf]/
function isCjk(ch: string): boolean {
return CJK_RANGE.test(ch)
}
export function parseToolName(name: string): {
parts: string[]
full: string
isMcp: boolean
} {
if (name.startsWith('mcp__')) {
const withoutPrefix = name.replace(/^mcp__/, '').toLowerCase()
const parts = withoutPrefix.split('__').flatMap(p => p.split('_'))
return {
parts: parts.filter(Boolean),
full: withoutPrefix.replace(/__/g, ' ').replace(/_/g, ' '),
isMcp: true,
}
}
const parts = name
.replace(/([a-z])([A-Z])/g, '$1 $2')
.replace(/_/g, ' ')
.toLowerCase()
.split(/\s+/)
.filter(Boolean)
return {
parts,
full: parts.join(' '),
isMcp: false,
}
}
export async function buildToolIndex(tools: Tools): Promise<ToolIndexEntry[]> {
const deferredTools = tools.filter(t => isDeferredTool(t))
const entries: ToolIndexEntry[] = []
for (const tool of deferredTools) {
let description = ''
try {
description = await tool.prompt({
getToolPermissionContext: async () => ({
mode: 'default' as const,
additionalWorkingDirectories: new Map(),
alwaysAllowRules: {},
alwaysDenyRules: {},
alwaysAskRules: {},
isBypassPermissionsModeAvailable: false,
}),
tools,
agents: [],
})
} catch {
description = ''
}
const { parts: nameParts, full: normalizedName } = parseToolName(tool.name)
const searchHint = tool.searchHint ?? ''
const nameTokens = tokenizeAndStem(nameParts.join(' '))
const hintTokens = tokenizeAndStem(searchHint)
const descTokens = tokenizeAndStem(description)
const allTokens = [
...new Set([...nameTokens, ...hintTokens, ...descTokens]),
]
const tfVector = computeWeightedTf([
{ tokens: nameTokens, weight: TOOL_FIELD_WEIGHT.name },
{ tokens: hintTokens, weight: TOOL_FIELD_WEIGHT.searchHint },
{ tokens: descTokens, weight: TOOL_FIELD_WEIGHT.description },
])
let inputSchema: object | undefined
if (tool.inputJSONSchema) {
inputSchema = tool.inputJSONSchema
}
entries.push({
name: tool.name,
normalizedName,
description,
searchHint: tool.searchHint,
isMcp: tool.isMcp === true,
isDeferred: true,
inputSchema,
tokens: allTokens,
tfVector,
})
}
const idf = computeIdf(entries)
for (const entry of entries) {
for (const [term, tf] of entry.tfVector) {
entry.tfVector.set(term, tf * (idf.get(term) ?? 0))
}
}
logForDebugging(
`[tool-search] indexed ${entries.length} deferred tools from ${tools.length} total tools`,
)
return entries
}
export function searchTools(
query: string,
index: ToolIndexEntry[],
limit = 5,
): ToolSearchResult[] {
if (index.length === 0 || !query.trim()) return []
const queryTokens = tokenizeAndStem(query)
if (queryTokens.length === 0) return []
const queryTf = new Map<string, number>()
const freq = new Map<string, number>()
for (const t of queryTokens) freq.set(t, (freq.get(t) ?? 0) + 1)
let max = 1
for (const v of freq.values()) if (v > max) max = v
for (const [term, count] of freq) queryTf.set(term, count / max)
const idf = computeIdf(index)
const queryTfIdf = new Map<string, number>()
for (const [term, tf] of queryTf) {
queryTfIdf.set(term, tf * (idf.get(term) ?? 0))
}
const queryCjkTokens = queryTokens.filter(t => isCjk(t[0] ?? ''))
const queryAsciiTokens = queryTokens.filter(t => !isCjk(t[0] ?? ''))
const queryLower = query.toLowerCase().replace(/[-_]/g, ' ')
const results: ToolSearchResult[] = []
for (const entry of index) {
let score = cosineSimilarity(queryTfIdf, entry.tfVector)
if (queryCjkTokens.length > 0 && score > 0) {
const matchingCjk = queryCjkTokens.filter(t => entry.tfVector.has(t))
if (matchingCjk.length < CJK_MIN_BIGRAM_MATCHES) {
const hasAsciiMatch = queryAsciiTokens.some(t => entry.tfVector.has(t))
if (!hasAsciiMatch) score = 0
}
}
if (queryLower.includes(entry.normalizedName)) {
score = Math.max(score, 0.75)
}
if (score >= TOOL_SEARCH_DISPLAY_MIN_SCORE) {
results.push({
name: entry.name,
description: entry.description,
searchHint: entry.searchHint,
score,
isMcp: entry.isMcp,
isDeferred: entry.isDeferred,
inputSchema: entry.inputSchema,
})
}
}
results.sort((a, b) => b.score - a.score)
return results.slice(0, limit)
}
let cachedIndex: ToolIndexEntry[] | null = null
let cachedToolNames: string | null = null
export async function getToolIndex(tools: Tools): Promise<ToolIndexEntry[]> {
const currentKey = tools
.map(t => t.name)
.sort()
.join(',')
if (cachedIndex && cachedToolNames === currentKey) {
return cachedIndex
}
cachedIndex = await buildToolIndex(tools)
cachedToolNames = currentKey
return cachedIndex
}
export function clearToolIndexCache(): void {
cachedIndex = null
cachedToolNames = null
logForDebugging('[tool-search] index cache cleared')
}