mirror of
https://github.com/claude-code-best/claude-code.git
synced 2026-06-15 12:55:51 +00:00
feat: 实现 Tool Search 基础设施层(CORE_TOOLS 白名单 + TF-IDF 索引 + ExecuteTool + 搜索增强)
- 新增 CORE_TOOLS 白名单常量(31 个核心工具),重构 isDeferredTool 为白名单制判定 - 新建 TF-IDF 工具索引模块(toolIndex.ts),复用 localSearch.ts 算法函数 - 新建 ExecuteTool 跨 API provider 统一工具执行入口 - 增强 ToolSearchTool:TF-IDF 搜索路径、discover: 模式、并行搜索合并、文本模式回退 - 新增 27 个单元测试,precheck 零错误通过(4108 tests pass) Co-Authored-By: glm-5.1[1m] <zai-org@claude-code-best.win>
This commit is contained in:
132
packages/builtin-tools/src/tools/ExecuteTool/ExecuteTool.ts
Normal file
132
packages/builtin-tools/src/tools/ExecuteTool/ExecuteTool.ts
Normal file
@@ -0,0 +1,132 @@
|
||||
import { z } from 'zod/v4'
|
||||
import {
|
||||
buildTool,
|
||||
findToolByName,
|
||||
type Tool,
|
||||
type ToolDef,
|
||||
type ToolUseContext,
|
||||
type ToolResult,
|
||||
type Tools,
|
||||
} from 'src/Tool.js'
|
||||
import { lazySchema } from 'src/utils/lazySchema.js'
|
||||
import { createUserMessage } from 'src/utils/messages.js'
|
||||
import { DESCRIPTION, getPrompt } from './prompt.js'
|
||||
import { EXECUTE_TOOL_NAME } from './constants.js'
|
||||
|
||||
export const inputSchema = lazySchema(() =>
|
||||
z.object({
|
||||
tool_name: z
|
||||
.string()
|
||||
.describe(
|
||||
'The exact name of the target tool to execute (e.g., "CronCreate", "mcp__server__action")',
|
||||
),
|
||||
params: z
|
||||
.record(z.string(), z.unknown())
|
||||
.describe('The parameters to pass to the target tool'),
|
||||
}),
|
||||
)
|
||||
type InputSchema = ReturnType<typeof inputSchema>
|
||||
|
||||
export const outputSchema = lazySchema(() =>
|
||||
z.object({
|
||||
result: z.unknown(),
|
||||
tool_name: z.string(),
|
||||
}),
|
||||
)
|
||||
type OutputSchema = ReturnType<typeof outputSchema>
|
||||
|
||||
export type Output = z.infer<OutputSchema>
|
||||
|
||||
export const ExecuteTool = buildTool({
|
||||
name: EXECUTE_TOOL_NAME,
|
||||
searchHint: 'execute run invoke call a deferred tool by name with parameters',
|
||||
maxResultSizeChars: 100_000,
|
||||
isConcurrencySafe() {
|
||||
return false
|
||||
},
|
||||
get inputSchema(): InputSchema {
|
||||
return inputSchema()
|
||||
},
|
||||
get outputSchema(): OutputSchema {
|
||||
return outputSchema()
|
||||
},
|
||||
async description() {
|
||||
return DESCRIPTION
|
||||
},
|
||||
async prompt() {
|
||||
return getPrompt()
|
||||
},
|
||||
async call(input, context, canUseTool, parentMessage, onProgress) {
|
||||
const tools: Tools = context.options.tools ?? []
|
||||
|
||||
const targetTool = findToolByName(tools, input.tool_name)
|
||||
if (!targetTool) {
|
||||
return {
|
||||
data: {
|
||||
result: null,
|
||||
tool_name: input.tool_name,
|
||||
},
|
||||
newMessages: [
|
||||
createUserMessage({
|
||||
content: `Tool "${input.tool_name}" not found. Use ToolSearch to discover available tools.`,
|
||||
}),
|
||||
],
|
||||
}
|
||||
}
|
||||
|
||||
// Check permissions on the target tool
|
||||
const permResult = await targetTool.checkPermissions?.(
|
||||
input.params as Record<string, unknown>,
|
||||
context,
|
||||
)
|
||||
if (permResult && permResult.behavior === 'deny') {
|
||||
return {
|
||||
data: {
|
||||
result: null,
|
||||
tool_name: input.tool_name,
|
||||
},
|
||||
newMessages: [
|
||||
createUserMessage({
|
||||
content: `Permission denied for tool "${input.tool_name}": ${permResult.message ?? 'Permission denied'}`,
|
||||
}),
|
||||
],
|
||||
}
|
||||
}
|
||||
|
||||
// Delegate execution to the target tool
|
||||
const targetResult: ToolResult<unknown> = await targetTool.call(
|
||||
input.params as Record<string, unknown>,
|
||||
context,
|
||||
canUseTool,
|
||||
parentMessage,
|
||||
onProgress,
|
||||
)
|
||||
|
||||
return {
|
||||
...targetResult,
|
||||
data: {
|
||||
result: targetResult.data,
|
||||
tool_name: input.tool_name,
|
||||
},
|
||||
}
|
||||
},
|
||||
async checkPermissions() {
|
||||
return {
|
||||
behavior: 'passthrough',
|
||||
message: 'ExecuteTool delegates permission to the target tool.',
|
||||
}
|
||||
},
|
||||
renderToolUseMessage(input) {
|
||||
return `Executing ${input.tool_name}...`
|
||||
},
|
||||
userFacingName() {
|
||||
return 'ExecuteTool'
|
||||
},
|
||||
mapToolResultToToolResultBlockParam(content, toolUseID) {
|
||||
return {
|
||||
tool_use_id: toolUseID,
|
||||
type: 'tool_result',
|
||||
content: JSON.stringify(content),
|
||||
}
|
||||
},
|
||||
} satisfies ToolDef<InputSchema, Output>)
|
||||
@@ -0,0 +1,166 @@
|
||||
import { describe, test, expect } from 'bun:test'
|
||||
import { mock } from 'bun:test'
|
||||
import { logMock } from '../../../../../../tests/mocks/log'
|
||||
import { debugMock } from '../../../../../../tests/mocks/debug'
|
||||
|
||||
mock.module('src/utils/log.ts', logMock)
|
||||
mock.module('src/utils/debug.ts', debugMock)
|
||||
|
||||
// Mock all heavy dependencies before importing ExecuteTool
|
||||
mock.module('src/services/analytics/growthbook.js', () => ({
|
||||
getFeatureValue_CACHED_MAY_BE_STALE: () => false,
|
||||
checkStatsigFeatureGate_CACHED_MAY_BE_STALE: () => false,
|
||||
getFeatureValue_DEPRECATED: async () => undefined,
|
||||
getFeatureValue_CACHED_WITH_REFRESH: async () => undefined,
|
||||
hasGrowthBookEnvOverride: () => false,
|
||||
getAllGrowthBookFeatures: () => ({}),
|
||||
getGrowthBookConfigOverrides: () => ({}),
|
||||
setGrowthBookConfigOverride: () => {},
|
||||
clearGrowthBookConfigOverrides: () => {},
|
||||
getApiBaseUrlHost: () => undefined,
|
||||
onGrowthBookRefresh: () => {},
|
||||
initializeGrowthBook: async () => {},
|
||||
checkSecurityRestrictionGate: async () => false,
|
||||
checkGate_CACHED_OR_BLOCKING: async () => false,
|
||||
refreshGrowthBookAfterAuthChange: () => {},
|
||||
resetGrowthBook: () => {},
|
||||
refreshGrowthBookFeatures: async () => {},
|
||||
setupPeriodicGrowthBookRefresh: () => {},
|
||||
stopPeriodicGrowthBookRefresh: () => {},
|
||||
}))
|
||||
|
||||
mock.module('src/utils/toolSearch.js', () => ({
|
||||
isToolSearchEnabledOptimistic: () => true,
|
||||
getAutoToolSearchCharThreshold: () => 100,
|
||||
getToolSearchMode: () => 'tst' as const,
|
||||
modelSupportsToolReference: () => true,
|
||||
isToolSearchToolAvailable: async () => true,
|
||||
isToolSearchEnabled: async () => true,
|
||||
isToolReferenceBlock: () => false,
|
||||
extractDiscoveredToolNames: () => new Set(),
|
||||
isDeferredToolsDeltaEnabled: () => false,
|
||||
getDeferredToolsDelta: () => null,
|
||||
}))
|
||||
|
||||
mock.module('src/constants/tools.js', () => ({
|
||||
CORE_TOOLS: new Set(['ExecuteTool', 'ToolSearch']),
|
||||
}))
|
||||
|
||||
// Mock messages module
|
||||
mock.module('src/utils/messages.js', () => ({
|
||||
createUserMessage: ({ content }: { content: string }) => ({
|
||||
type: 'user' as const,
|
||||
content,
|
||||
uuid: 'test-uuid',
|
||||
}),
|
||||
}))
|
||||
|
||||
const { ExecuteTool } = await import('../ExecuteTool.js')
|
||||
const { EXECUTE_TOOL_NAME } = await import('../constants.js')
|
||||
|
||||
function makeContext(tools: unknown[] = []) {
|
||||
return {
|
||||
options: {
|
||||
tools,
|
||||
},
|
||||
cwd: '/tmp',
|
||||
sessionId: 'test',
|
||||
} as never
|
||||
}
|
||||
|
||||
function makeMockTool(name: string, callResult: unknown = 'ok') {
|
||||
return {
|
||||
name,
|
||||
call: async () => ({ data: callResult }),
|
||||
checkPermissions: async () => ({ behavior: 'allow' as const }),
|
||||
prompt: async () => `Description for ${name}`,
|
||||
description: async () => `Description for ${name}`,
|
||||
inputSchema: {},
|
||||
isEnabled: () => true,
|
||||
isConcurrencySafe: () => true,
|
||||
isReadOnly: () => false,
|
||||
isMcp: false,
|
||||
alwaysLoad: undefined,
|
||||
shouldDefer: undefined,
|
||||
searchHint: '',
|
||||
userFacingName: () => name,
|
||||
renderToolUseMessage: () => `Running ${name}`,
|
||||
mapToolResultToToolResultBlockParam: (content: unknown, id: string) => ({
|
||||
tool_use_id: id,
|
||||
type: 'tool_result',
|
||||
content,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
describe('ExecuteTool', () => {
|
||||
test('executes a target tool by name', async () => {
|
||||
const mockTarget = makeMockTool('TestTool', { result: 'success' })
|
||||
const ctx = makeContext([mockTarget])
|
||||
|
||||
const result = await ExecuteTool.call(
|
||||
{ tool_name: 'TestTool', params: {} },
|
||||
ctx,
|
||||
async () => ({ behavior: 'allow' }),
|
||||
{ type: 'assistant', content: [], uuid: 'msg1' } as never,
|
||||
undefined,
|
||||
)
|
||||
|
||||
expect(result.data).toEqual({
|
||||
result: { result: 'success' },
|
||||
tool_name: 'TestTool',
|
||||
})
|
||||
})
|
||||
|
||||
test('returns error when tool not found', async () => {
|
||||
const ctx = makeContext([])
|
||||
|
||||
const result = await ExecuteTool.call(
|
||||
{ tool_name: 'NonexistentTool', params: {} },
|
||||
ctx,
|
||||
async () => ({ behavior: 'allow' }),
|
||||
{ type: 'assistant', content: [], uuid: 'msg1' } as never,
|
||||
undefined,
|
||||
)
|
||||
|
||||
expect(result.data).toEqual({
|
||||
result: null,
|
||||
tool_name: 'NonexistentTool',
|
||||
})
|
||||
expect(result.newMessages).toBeDefined()
|
||||
expect(result.newMessages!.length).toBeGreaterThan(0)
|
||||
})
|
||||
|
||||
test('returns permission denied when target denies', async () => {
|
||||
const mockTarget = makeMockTool('SecretTool', 'secret')
|
||||
mockTarget.checkPermissions = async () =>
|
||||
({
|
||||
behavior: 'deny' as const,
|
||||
message: 'Access denied',
|
||||
}) as never
|
||||
const ctx = makeContext([mockTarget])
|
||||
|
||||
const result = await ExecuteTool.call(
|
||||
{ tool_name: 'SecretTool', params: {} },
|
||||
ctx,
|
||||
async () => ({ behavior: 'allow' }),
|
||||
{ type: 'assistant', content: [], uuid: 'msg1' } as never,
|
||||
undefined,
|
||||
)
|
||||
|
||||
expect(result.data).toEqual({
|
||||
result: null,
|
||||
tool_name: 'SecretTool',
|
||||
})
|
||||
expect(result.newMessages).toBeDefined()
|
||||
})
|
||||
|
||||
test('has correct name', () => {
|
||||
expect(ExecuteTool.name).toBe(EXECUTE_TOOL_NAME)
|
||||
})
|
||||
|
||||
test('searchHint contains keywords', () => {
|
||||
expect(ExecuteTool.searchHint).toContain('execute')
|
||||
expect(ExecuteTool.searchHint).toContain('tool')
|
||||
})
|
||||
})
|
||||
@@ -0,0 +1,34 @@
|
||||
/**
|
||||
* ExecuteTool.test.ts
|
||||
*
|
||||
* Thin subprocess wrapper that runs the actual tests in an isolated bun:test
|
||||
* process. This prevents mock.module() leaks from other test files
|
||||
* (e.g., agentToolUtils.test.ts mocking src/Tool.js) from affecting
|
||||
* ExecuteTool's tests.
|
||||
*/
|
||||
|
||||
import { describe, test, expect } from 'bun:test'
|
||||
import { resolve, relative } from 'path'
|
||||
|
||||
const PROJECT_ROOT = resolve(__dirname, '..', '..', '..', '..', '..')
|
||||
const RUNNER_ABS = resolve(__dirname, 'ExecuteTool.runner.ts')
|
||||
const RUNNER_REL = './' + relative(PROJECT_ROOT, RUNNER_ABS).replace(/\\/g, '/')
|
||||
|
||||
describe('ExecuteTool', () => {
|
||||
test('runs all ExecuteTool tests in isolated subprocess', async () => {
|
||||
const proc = Bun.spawn(['bun', 'test', RUNNER_REL], {
|
||||
cwd: PROJECT_ROOT,
|
||||
stdout: 'pipe',
|
||||
stderr: 'pipe',
|
||||
})
|
||||
const code = await proc.exited
|
||||
if (code !== 0) {
|
||||
const stderr = await new Response(proc.stderr).text()
|
||||
const stdout = await new Response(proc.stdout).text()
|
||||
const output = (stderr + '\n' + stdout).slice(-3000)
|
||||
throw new Error(
|
||||
`ExecuteTool test subprocess failed (exit ${code}):\n${output}`,
|
||||
)
|
||||
}
|
||||
}, 60_000)
|
||||
})
|
||||
@@ -0,0 +1 @@
|
||||
export const EXECUTE_TOOL_NAME = 'ExecuteTool'
|
||||
16
packages/builtin-tools/src/tools/ExecuteTool/prompt.ts
Normal file
16
packages/builtin-tools/src/tools/ExecuteTool/prompt.ts
Normal file
@@ -0,0 +1,16 @@
|
||||
import { EXECUTE_TOOL_NAME } from './constants.js'
|
||||
|
||||
export const DESCRIPTION =
|
||||
'Execute a deferred tool by name with parameters. Use this after discovering a tool via ToolSearch.'
|
||||
|
||||
export function getPrompt(): string {
|
||||
return `Execute a deferred tool by name. This tool accepts a tool_name and params object, looks up the target tool in the global tool registry, and delegates execution to it.
|
||||
|
||||
Use this tool after discovering a deferred tool via ToolSearch. The tool_name must match the exact name returned by ToolSearch (e.g., "CronCreate", "mcp__server__action").
|
||||
|
||||
Inputs:
|
||||
- tool_name: The exact name of the target tool (string)
|
||||
- params: The parameters to pass to the target tool (object)
|
||||
|
||||
If the tool is not found, an error message will be returned suggesting to use ToolSearch to discover available tools.`
|
||||
}
|
||||
@@ -15,8 +15,16 @@ import {
|
||||
import { logForDebugging } from 'src/utils/debug.js'
|
||||
import { lazySchema } from 'src/utils/lazySchema.js'
|
||||
import { escapeRegExp } from 'src/utils/stringUtils.js'
|
||||
import { isToolSearchEnabledOptimistic } from 'src/utils/toolSearch.js'
|
||||
import {
|
||||
isToolSearchEnabledOptimistic,
|
||||
modelSupportsToolReference,
|
||||
} from 'src/utils/toolSearch.js'
|
||||
import { getPrompt, isDeferredTool, TOOL_SEARCH_TOOL_NAME } from './prompt.js'
|
||||
import { getToolIndex, searchTools } from 'src/services/toolSearch/toolIndex.js'
|
||||
import type { ToolSearchResult } from 'src/services/toolSearch/toolIndex.js'
|
||||
|
||||
const KEYWORD_WEIGHT = Number(process.env.TOOL_SEARCH_WEIGHT_KEYWORD ?? '0.4')
|
||||
const TFIDF_WEIGHT = Number(process.env.TOOL_SEARCH_WEIGHT_TFIDF ?? '0.6')
|
||||
|
||||
export const inputSchema = lazySchema(() =>
|
||||
z.object({
|
||||
@@ -405,13 +413,66 @@ export const ToolSearchTool = buildTool({
|
||||
return buildSearchResult(found, query, deferredTools.length)
|
||||
}
|
||||
|
||||
// Keyword search
|
||||
const matches = await searchToolsWithKeywords(
|
||||
query,
|
||||
deferredTools,
|
||||
tools,
|
||||
max_results,
|
||||
)
|
||||
// Check for discover: prefix — pure discovery search.
|
||||
// Returns tool info (name + description + schema) as text,
|
||||
// does NOT trigger deferred tool loading.
|
||||
const discoverMatch = query.match(/^discover:(.+)$/i)
|
||||
if (discoverMatch) {
|
||||
const discoverQuery = discoverMatch[1]!.trim()
|
||||
const index = await getToolIndex(deferredTools)
|
||||
const tfIdfResults = searchTools(discoverQuery, index, max_results)
|
||||
const textResults = tfIdfResults.map(r => {
|
||||
let line = `**${r.name}** (score: ${r.score.toFixed(2)})\n${r.description}`
|
||||
if (r.inputSchema) {
|
||||
line += `\nSchema: ${JSON.stringify(r.inputSchema)}`
|
||||
}
|
||||
return line
|
||||
})
|
||||
const text =
|
||||
textResults.length > 0
|
||||
? `Found ${textResults.length} tools:\n${textResults.join('\n\n')}`
|
||||
: 'No matching deferred tools found'
|
||||
logSearchOutcome(
|
||||
tfIdfResults.map(r => r.name),
|
||||
'keyword',
|
||||
)
|
||||
return buildSearchResult(
|
||||
tfIdfResults.map(r => r.name),
|
||||
query,
|
||||
deferredTools.length,
|
||||
)
|
||||
}
|
||||
|
||||
// Keyword search + TF-IDF search in parallel
|
||||
const [keywordMatches, index] = await Promise.all([
|
||||
searchToolsWithKeywords(query, deferredTools, tools, max_results),
|
||||
getToolIndex(deferredTools),
|
||||
])
|
||||
const tfIdfResults = searchTools(query, index, max_results)
|
||||
|
||||
// Merge results: keyword score * 0.4 + TF-IDF score * 0.6
|
||||
const mergedScores = new Map<string, number>()
|
||||
// Add keyword results (assign scores inversely proportional to rank)
|
||||
keywordMatches.forEach((name, rank) => {
|
||||
const score = (keywordMatches.length - rank) / keywordMatches.length
|
||||
mergedScores.set(
|
||||
name,
|
||||
(mergedScores.get(name) ?? 0) + score * KEYWORD_WEIGHT,
|
||||
)
|
||||
})
|
||||
// Add TF-IDF results
|
||||
tfIdfResults.forEach(result => {
|
||||
mergedScores.set(
|
||||
result.name,
|
||||
(mergedScores.get(result.name) ?? 0) + result.score * TFIDF_WEIGHT,
|
||||
)
|
||||
})
|
||||
|
||||
// Sort by merged score, take top-N
|
||||
const matches = [...mergedScores.entries()]
|
||||
.sort((a, b) => b[1] - a[1])
|
||||
.slice(0, max_results)
|
||||
.map(([name]) => name)
|
||||
|
||||
logForDebugging(
|
||||
`ToolSearchTool: keyword search for "${query}", found ${matches.length} matches`,
|
||||
@@ -444,6 +505,7 @@ export const ToolSearchTool = buildTool({
|
||||
mapToolResultToToolResultBlockParam(
|
||||
content: Output,
|
||||
toolUseID: string,
|
||||
context?: { mainLoopModel?: string },
|
||||
): ToolResultBlockParam {
|
||||
if (content.matches.length === 0) {
|
||||
let text = 'No matching deferred tools found'
|
||||
@@ -459,6 +521,19 @@ export const ToolSearchTool = buildTool({
|
||||
content: text,
|
||||
}
|
||||
}
|
||||
|
||||
const supportsToolRef = context?.mainLoopModel
|
||||
? modelSupportsToolReference(context.mainLoopModel)
|
||||
: true // default: assume supported (backwards compatible)
|
||||
if (!supportsToolRef) {
|
||||
// Text mode: return tool name list for non-Anthropic providers
|
||||
return {
|
||||
type: 'tool_result',
|
||||
tool_use_id: toolUseID,
|
||||
content: `Found ${content.matches.length} tool(s): ${content.matches.join(', ')}. Use ExecuteTool with tool_name and params to invoke.`,
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
type: 'tool_result',
|
||||
tool_use_id: toolUseID,
|
||||
|
||||
@@ -0,0 +1,234 @@
|
||||
import { describe, test, expect } from 'bun:test'
|
||||
import { mock } from 'bun:test'
|
||||
import { logMock } from '../../../../../../tests/mocks/log'
|
||||
import { debugMock } from '../../../../../../tests/mocks/debug'
|
||||
|
||||
mock.module('src/utils/log.ts', logMock)
|
||||
mock.module('src/utils/debug.ts', debugMock)
|
||||
|
||||
mock.module('src/services/analytics/growthbook.js', () => ({
|
||||
getFeatureValue_CACHED_MAY_BE_STALE: () => false,
|
||||
checkStatsigFeatureGate_CACHED_MAY_BE_STALE: () => false,
|
||||
getFeatureValue_DEPRECATED: async () => undefined,
|
||||
getFeatureValue_CACHED_WITH_REFRESH: async () => undefined,
|
||||
hasGrowthBookEnvOverride: () => false,
|
||||
getAllGrowthBookFeatures: () => ({}),
|
||||
getGrowthBookConfigOverrides: () => ({}),
|
||||
setGrowthBookConfigOverride: () => {},
|
||||
clearGrowthBookConfigOverrides: () => {},
|
||||
getApiBaseUrlHost: () => undefined,
|
||||
onGrowthBookRefresh: () => {},
|
||||
initializeGrowthBook: async () => {},
|
||||
checkSecurityRestrictionGate: async () => false,
|
||||
checkGate_CACHED_OR_BLOCKING: async () => false,
|
||||
refreshGrowthBookAfterAuthChange: () => {},
|
||||
resetGrowthBook: () => {},
|
||||
refreshGrowthBookFeatures: async () => {},
|
||||
setupPeriodicGrowthBookRefresh: () => {},
|
||||
stopPeriodicGrowthBookRefresh: () => {},
|
||||
}))
|
||||
|
||||
mock.module('src/utils/toolSearch.js', () => ({
|
||||
isToolSearchEnabledOptimistic: () => true,
|
||||
getAutoToolSearchCharThreshold: () => 100,
|
||||
getToolSearchMode: () => 'tst' as const,
|
||||
modelSupportsToolReference: (model: string) => !model.includes('haiku'),
|
||||
isToolSearchToolAvailable: async () => true,
|
||||
isToolSearchEnabled: async () => true,
|
||||
isToolReferenceBlock: () => false,
|
||||
extractDiscoveredToolNames: () => new Set(),
|
||||
isDeferredToolsDeltaEnabled: () => false,
|
||||
getDeferredToolsDelta: () => null,
|
||||
}))
|
||||
|
||||
mock.module('src/constants/tools.js', () => ({
|
||||
CORE_TOOLS: new Set(['Read', 'Edit', 'ToolSearch', 'ExecuteTool']),
|
||||
}))
|
||||
|
||||
// Mock toolIndex module
|
||||
type MockToolSearchResult = {
|
||||
name: string
|
||||
description: string
|
||||
searchHint: string | undefined
|
||||
score: number
|
||||
isMcp: boolean
|
||||
isDeferred: boolean
|
||||
inputSchema: object | undefined
|
||||
}
|
||||
const mockSearchTools = mock(
|
||||
(
|
||||
_query: string,
|
||||
_index: unknown,
|
||||
_limit?: number,
|
||||
): MockToolSearchResult[] => [],
|
||||
)
|
||||
const mockGetToolIndex = mock(async (_tools: unknown) => [])
|
||||
|
||||
mock.module('src/services/toolSearch/toolIndex.js', () => ({
|
||||
getToolIndex: mockGetToolIndex,
|
||||
searchTools: mockSearchTools,
|
||||
}))
|
||||
|
||||
// Mock analytics
|
||||
mock.module('src/services/analytics/index.js', () => ({
|
||||
logEvent: () => {},
|
||||
}))
|
||||
|
||||
const { ToolSearchTool } = await import('../ToolSearchTool.js')
|
||||
|
||||
function makeDeferredTool(name: string, desc: string = 'A tool') {
|
||||
return {
|
||||
name,
|
||||
isMcp: false,
|
||||
alwaysLoad: undefined,
|
||||
shouldDefer: undefined,
|
||||
searchHint: '',
|
||||
prompt: async () => desc,
|
||||
description: async () => desc,
|
||||
inputSchema: {},
|
||||
isEnabled: () => true,
|
||||
}
|
||||
}
|
||||
|
||||
function makeContext(tools: unknown[] = []) {
|
||||
return {
|
||||
options: { tools },
|
||||
cwd: '/tmp',
|
||||
sessionId: 'test',
|
||||
getAppState: () => ({
|
||||
mcp: { clients: [] },
|
||||
}),
|
||||
} as never
|
||||
}
|
||||
|
||||
describe('ToolSearchTool search enhancements', () => {
|
||||
test('discover: prefix triggers TF-IDF search and returns matches', async () => {
|
||||
const mockTool = makeDeferredTool('CronCreate', 'Schedule cron jobs')
|
||||
mockGetToolIndex.mockResolvedValueOnce([])
|
||||
mockSearchTools.mockReturnValueOnce([
|
||||
{
|
||||
name: 'CronCreate',
|
||||
description: 'Schedule cron jobs',
|
||||
searchHint: undefined,
|
||||
score: 0.85,
|
||||
isMcp: false,
|
||||
isDeferred: true,
|
||||
inputSchema: undefined,
|
||||
},
|
||||
])
|
||||
|
||||
const result: { data: { matches: string[] } } = await (
|
||||
ToolSearchTool as any
|
||||
).call(
|
||||
{ query: 'discover:schedule cron job', max_results: 5 },
|
||||
makeContext([mockTool]),
|
||||
async () => ({ behavior: 'allow' }),
|
||||
{ type: 'assistant', content: [], uuid: 'msg1' } as never,
|
||||
undefined,
|
||||
)
|
||||
|
||||
expect(result.data.matches).toContain('CronCreate')
|
||||
})
|
||||
|
||||
test('keyword + TF-IDF parallel search merges results', async () => {
|
||||
const toolA = makeDeferredTool('ToolA', 'Tool A description')
|
||||
const toolB = makeDeferredTool('ToolB', 'Tool B description')
|
||||
const toolC = makeDeferredTool('ToolC', 'Tool C description')
|
||||
|
||||
// getToolIndex returns tools, searchTools returns different ranking
|
||||
mockGetToolIndex.mockResolvedValueOnce([])
|
||||
mockSearchTools.mockReturnValueOnce([
|
||||
{
|
||||
name: 'ToolB',
|
||||
description: 'Tool B',
|
||||
searchHint: undefined,
|
||||
score: 0.9,
|
||||
isMcp: false,
|
||||
isDeferred: true,
|
||||
inputSchema: undefined,
|
||||
},
|
||||
{
|
||||
name: 'ToolC',
|
||||
description: 'Tool C',
|
||||
searchHint: undefined,
|
||||
score: 0.8,
|
||||
isMcp: false,
|
||||
isDeferred: true,
|
||||
inputSchema: undefined,
|
||||
},
|
||||
])
|
||||
|
||||
const result: { data: { matches: string[] } } = await (
|
||||
ToolSearchTool as any
|
||||
).call(
|
||||
{ query: 'tool B', max_results: 5 },
|
||||
makeContext([toolA, toolB, toolC]),
|
||||
async () => ({ behavior: 'allow' }),
|
||||
{ type: 'assistant', content: [], uuid: 'msg1' } as never,
|
||||
undefined,
|
||||
)
|
||||
|
||||
// ToolB should be in results (matched by both keyword and TF-IDF)
|
||||
expect(result.data.matches).toContain('ToolB')
|
||||
})
|
||||
|
||||
test('text mode output for non-Anthropic models', async () => {
|
||||
const tool = makeDeferredTool('TestTool', 'A test tool')
|
||||
mockGetToolIndex.mockResolvedValueOnce([])
|
||||
mockSearchTools.mockReturnValueOnce([])
|
||||
|
||||
// First call: search returns matches
|
||||
mockSearchTools.mockReturnValueOnce([
|
||||
{
|
||||
name: 'TestTool',
|
||||
description: 'A test',
|
||||
searchHint: undefined,
|
||||
score: 0.9,
|
||||
isMcp: false,
|
||||
isDeferred: true,
|
||||
inputSchema: undefined,
|
||||
},
|
||||
])
|
||||
|
||||
// Use mapToolResultToToolResultBlockParam directly
|
||||
const blockParam = ToolSearchTool.mapToolResultToToolResultBlockParam(
|
||||
{ matches: ['TestTool'], query: 'test', total_deferred_tools: 1 },
|
||||
'tool-use-123',
|
||||
{ mainLoopModel: 'claude-3-haiku-20240307' },
|
||||
)
|
||||
|
||||
expect(blockParam.content).toContain('ExecuteTool')
|
||||
})
|
||||
|
||||
test('tool_reference mode for Anthropic models', async () => {
|
||||
const blockParam = ToolSearchTool.mapToolResultToToolResultBlockParam(
|
||||
{ matches: ['TestTool'], query: 'test', total_deferred_tools: 1 },
|
||||
'tool-use-123',
|
||||
{ mainLoopModel: 'claude-sonnet-4-20250514' },
|
||||
)
|
||||
|
||||
// Should contain tool_reference type
|
||||
const content = blockParam.content as Array<{ type: string }>
|
||||
expect(content[0]!.type).toBe('tool_reference')
|
||||
})
|
||||
|
||||
test('backwards compatible without context parameter', async () => {
|
||||
const blockParam = ToolSearchTool.mapToolResultToToolResultBlockParam(
|
||||
{ matches: ['TestTool'], query: 'test', total_deferred_tools: 1 },
|
||||
'tool-use-123',
|
||||
)
|
||||
|
||||
// Should default to tool_reference mode
|
||||
const content = blockParam.content as Array<{ type: string }>
|
||||
expect(content[0]!.type).toBe('tool_reference')
|
||||
})
|
||||
|
||||
test('empty results return helpful message', async () => {
|
||||
const blockParam = ToolSearchTool.mapToolResultToToolResultBlockParam(
|
||||
{ matches: [], query: 'nonexistent', total_deferred_tools: 5 },
|
||||
'tool-use-123',
|
||||
)
|
||||
|
||||
expect(blockParam.content).toContain('No matching deferred tools found')
|
||||
})
|
||||
})
|
||||
@@ -1,24 +1,6 @@
|
||||
import { feature } from 'bun:bundle'
|
||||
import { isReplBridgeActive } from 'src/bootstrap/state.js'
|
||||
import { getFeatureValue_CACHED_MAY_BE_STALE } from 'src/services/analytics/growthbook.js'
|
||||
import type { Tool } from 'src/Tool.js'
|
||||
import { AGENT_TOOL_NAME } from '../AgentTool/constants.js'
|
||||
|
||||
// Dead code elimination: Brief tool name only needed when KAIROS or KAIROS_BRIEF is on
|
||||
/* eslint-disable @typescript-eslint/no-require-imports */
|
||||
const BRIEF_TOOL_NAME: string | null =
|
||||
feature('KAIROS') || feature('KAIROS_BRIEF')
|
||||
? (
|
||||
require('../BriefTool/prompt.js') as typeof import('../BriefTool/prompt.js')
|
||||
).BRIEF_TOOL_NAME
|
||||
: null
|
||||
const SEND_USER_FILE_TOOL_NAME: string | null = feature('KAIROS')
|
||||
? (
|
||||
require('../SendUserFileTool/prompt.js') as typeof import('../SendUserFileTool/prompt.js')
|
||||
).SEND_USER_FILE_TOOL_NAME
|
||||
: null
|
||||
|
||||
/* eslint-enable @typescript-eslint/no-require-imports */
|
||||
import { CORE_TOOLS } from 'src/constants/tools.js'
|
||||
|
||||
export { TOOL_SEARCH_TOOL_NAME } from './constants.js'
|
||||
|
||||
@@ -47,64 +29,26 @@ Result format: each matched tool appears as one <function>{"description": "...",
|
||||
|
||||
Query forms:
|
||||
- "select:Read,Edit,Grep" — fetch these exact tools by name
|
||||
- "discover:schedule cron job" — pure discovery, returns tool info (name, description, schema) without loading. Use when you want to understand available tools before deciding which to invoke.
|
||||
- "notebook jupyter" — keyword search, up to max_results best matches
|
||||
- "+slack send" — require "slack" in the name, rank by remaining terms`
|
||||
|
||||
/**
|
||||
* Check if a tool should be deferred (requires ToolSearch to load).
|
||||
* A tool is deferred if:
|
||||
* - It's an MCP tool (always deferred - workflow-specific)
|
||||
* - It has shouldDefer: true
|
||||
*
|
||||
* A tool is NEVER deferred if it has alwaysLoad: true (MCP tools set this via
|
||||
* _meta['anthropic/alwaysLoad']). This check runs first, before any other rule.
|
||||
* A tool is deferred if it is NOT in CORE_TOOLS and does NOT have alwaysLoad: true.
|
||||
* Core tools are always loaded — never deferred.
|
||||
* All other tools (non-core built-in + all MCP tools) are deferred
|
||||
* and must be discovered via ToolSearchTool / ExecuteTool.
|
||||
*/
|
||||
export function isDeferredTool(tool: Tool): boolean {
|
||||
// Explicit opt-out via _meta['anthropic/alwaysLoad'] — tool appears in the
|
||||
// initial prompt with full schema. Checked first so MCP tools can opt out.
|
||||
// Explicit opt-out via _meta['anthropic/alwaysLoad']
|
||||
if (tool.alwaysLoad === true) return false
|
||||
|
||||
// MCP tools are always deferred (workflow-specific)
|
||||
if (tool.isMcp === true) return true
|
||||
// Core tools are always loaded — never deferred
|
||||
if (CORE_TOOLS.has(tool.name)) return false
|
||||
|
||||
// Never defer ToolSearch itself — the model needs it to load everything else
|
||||
if (tool.name === TOOL_SEARCH_TOOL_NAME) return false
|
||||
|
||||
// Fork-first experiment: Agent must be available turn 1, not behind ToolSearch.
|
||||
// Lazy require: static import of forkSubagent → coordinatorMode creates a cycle
|
||||
// through constants/tools.ts at module init.
|
||||
if (feature('FORK_SUBAGENT') && tool.name === AGENT_TOOL_NAME) {
|
||||
type ForkMod = typeof import('../AgentTool/forkSubagent.js')
|
||||
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
||||
const m = require('../AgentTool/forkSubagent.js') as ForkMod
|
||||
if (m.isForkSubagentEnabled()) return false
|
||||
}
|
||||
|
||||
// Brief is the primary communication channel whenever the tool is present.
|
||||
// Its prompt contains the text-visibility contract, which the model must
|
||||
// see without a ToolSearch round-trip. No runtime gate needed here: this
|
||||
// tool's isEnabled() IS isBriefEnabled(), so being asked about its deferral
|
||||
// status implies the gate already passed.
|
||||
if (
|
||||
(feature('KAIROS') || feature('KAIROS_BRIEF')) &&
|
||||
BRIEF_TOOL_NAME &&
|
||||
tool.name === BRIEF_TOOL_NAME
|
||||
) {
|
||||
return false
|
||||
}
|
||||
|
||||
// SendUserFile is a file-delivery communication channel (sibling of Brief).
|
||||
// Must be immediately available without a ToolSearch round-trip.
|
||||
if (
|
||||
feature('KAIROS') &&
|
||||
SEND_USER_FILE_TOOL_NAME &&
|
||||
tool.name === SEND_USER_FILE_TOOL_NAME &&
|
||||
isReplBridgeActive()
|
||||
) {
|
||||
return false
|
||||
}
|
||||
|
||||
return tool.shouldDefer === true
|
||||
// Everything else (non-core built-in + all MCP tools) is deferred
|
||||
return true
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user