Merge pull request #442 from claude-code-best/feature/tool_search

feat: 支持 SearchExtraTools 能力以替代 Tool Search
2026-06-20 23:35:51 +00:00 · 2026-05-09 17:23:03 +08:00
parent df8c4f4b3c 547ce9e848
commit 7e2b8e81ca
78 changed files with 4987 additions and 791 deletions
--- a/src/services/api/claude.ts
+++ b/src/services/api/claude.ts
@@ -157,13 +157,12 @@ import {
 import { getAgentContext } from 'src/utils/agentContext.js'
 import { isClaudeAISubscriber } from 'src/utils/auth.js'
 import {
-  getToolSearchBetaHeader,
  modelSupportsStructuredOutputs,
  shouldIncludeFirstPartyOnlyBetas,
  shouldUseGlobalCacheScope,
 } from 'src/utils/betas.js'
 import { CLAUDE_IN_CHROME_MCP_SERVER_NAME } from 'src/utils/claudeInChrome/common.js'
-import { CHROME_TOOL_SEARCH_INSTRUCTIONS } from 'src/utils/claudeInChrome/prompt.js'
+import { CHROME_SEARCH_EXTRA_TOOLS_INSTRUCTIONS } from 'src/utils/claudeInChrome/prompt.js'
 import { getMaxThinkingTokensForModel } from 'src/utils/context.js'
 import { logForDebugging } from 'src/utils/debug.js'
 import { logForDiagnosticsNoPII } from 'src/utils/diagLogs.js'
@@ -185,17 +184,16 @@ import {
  type ThinkingConfig,
 } from 'src/utils/thinking.js'
 import {
-  extractDiscoveredToolNames,
  isDeferredToolsDeltaEnabled,
-  isToolSearchEnabled,
-} from 'src/utils/toolSearch.js'
+  isSearchExtraToolsEnabled,
+} from 'src/utils/searchExtraTools.js'
 import { API_MAX_MEDIA_PER_REQUEST } from '../../constants/apiLimits.js'
 import { ADVISOR_BETA_HEADER } from '../../constants/betas.js'
 import {
  formatDeferredToolLine,
  isDeferredTool,
-  TOOL_SEARCH_TOOL_NAME,
-} from '@claude-code-best/builtin-tools/tools/ToolSearchTool/prompt.js'
+  SEARCH_EXTRA_TOOLS_TOOL_NAME,
+} from '@claude-code-best/builtin-tools/tools/SearchExtraToolsTool/prompt.js'
 import { count } from '../../utils/array.js'
 import { insertBlockAfterToolResults } from '../../utils/contentArray.js'
 import { validateBoundedIntEnvVar } from '../../utils/envValidation.js'
@@ -1157,7 +1155,7 @@ async function* queryModel(

  // Check if tool search is enabled (checks mode, model support, and threshold for auto mode)
  // This is async because it may need to calculate MCP tool description sizes for TstAuto mode
-  let useToolSearch = await isToolSearchEnabled(
+  let useSearchExtraTools = await isSearchExtraToolsEnabled(
    options.model,
    tools,
    options.getToolPermissionContext,
@@ -1167,7 +1165,7 @@ async function* queryModel(

  // Precompute once — isDeferredTool does 2 GrowthBook lookups per call
  const deferredToolNames = new Set<string>()
-  if (useToolSearch) {
+  if (useSearchExtraTools) {
    for (const t of tools) {
      if (isDeferredTool(t)) deferredToolNames.add(t.name)
    }
@@ -1175,51 +1173,46 @@ async function* queryModel(

  // Even if tool search mode is enabled, skip if there are no deferred tools
  // AND no MCP servers are still connecting. When servers are pending, keep
-  // ToolSearch available so the model can discover tools after they connect.
+  // SearchExtraTools available so the model can discover tools after they connect.
  if (
-    useToolSearch &&
+    useSearchExtraTools &&
    deferredToolNames.size === 0 &&
    !options.hasPendingMcpServers
  ) {
    logForDebugging(
      'Tool search disabled: no deferred tools available to search',
    )
-    useToolSearch = false
+    useSearchExtraTools = false
  }

-  // Filter out ToolSearchTool if tool search is not enabled for this model
-  // ToolSearchTool returns tool_reference blocks which unsupported models can't handle
+  // Dynamic tool loading: filter deferred tools that haven't been discovered yet
  let filteredTools: Tools

-  if (useToolSearch) {
-    // Dynamic tool loading: Only include deferred tools that have been discovered
-    // via tool_reference blocks in the message history. This eliminates the need
-    // to predeclare all deferred tools upfront and removes limits on tool quantity.
-    const discoveredToolNames = extractDiscoveredToolNames(messages)
+  // Deferred tools that haven't been discovered are filtered out from the API
+  // request — their schemas are only included after SearchExtraTools discovers them.

+  if (useSearchExtraTools) {
+    // Never include deferred tools in the API tools array — they are invoked
+    // via ExecuteExtraTool which looks them up from the global tool registry
+    // at runtime. Keeping the tools array stable preserves the prompt cache
+    // across turns (discovered tools no longer bloat the tools JSON).
    filteredTools = tools.filter(tool => {
-      // Always include non-deferred tools
+      // Always include non-deferred tools (core tools)
      if (!deferredToolNames.has(tool.name)) return true
-      // Always include ToolSearchTool (so it can discover more tools)
-      if (toolMatchesName(tool, TOOL_SEARCH_TOOL_NAME)) return true
-      // Only include deferred tools that have been discovered
-      return discoveredToolNames.has(tool.name)
+      // Always include SearchExtraToolsTool (so it can discover more tools)
+      if (toolMatchesName(tool, SEARCH_EXTRA_TOOLS_TOOL_NAME)) return true
+      // All other deferred tools are excluded — use ExecuteExtraTool instead
+      return false
    })
  } else {
    filteredTools = tools.filter(
-      t => !toolMatchesName(t, TOOL_SEARCH_TOOL_NAME),
+      t => !toolMatchesName(t, SEARCH_EXTRA_TOOLS_TOOL_NAME),
    )
  }

-  // Add tool search beta header if enabled - required for defer_loading to be accepted
-  // Header differs by provider: 1P/Foundry use advanced-tool-use, Vertex/Bedrock use tool-search-tool
-  // For Bedrock, this header must go in extraBodyParams, not the betas array
-  const toolSearchHeader = useToolSearch ? getToolSearchBetaHeader() : null
-  if (toolSearchHeader && getAPIProvider() !== 'bedrock') {
-    if (!betas.includes(toolSearchHeader)) {
-      betas.push(toolSearchHeader)
-    }
-  }
+  // Tool search beta header and defer_loading removed — unified self-built
+  // tool search via SearchExtraToolsTool + ExecuteExtraTool for all providers.
+  // No longer relies on API-side tool_reference or defer_loading features.

  // Determine if cached microcompact is enabled for this model.
  // Computed once here (in async context) and captured by paramsFromContext.
@@ -1250,13 +1243,9 @@ async function* queryModel(
  }

  const useGlobalCacheFeature = shouldUseGlobalCacheScope()
-  const willDefer = (t: Tool) =>
-    useToolSearch && (deferredToolNames.has(t.name) || shouldDeferLspTool(t))
  // MCP tools are per-user → dynamic tool section → can't globally cache.
-  // Only gate when an MCP tool will actually render (not defer_loading).
  const needsToolBasedCacheMarker =
-    useGlobalCacheFeature &&
-    filteredTools.some(t => t.isMcp === true && !willDefer(t))
+    useGlobalCacheFeature && filteredTools.some(t => t.isMcp === true)

  // Ensure prompt_caching_scope beta header is present when global cache is enabled.
  if (
@@ -1273,9 +1262,9 @@ async function* queryModel(
      : 'system_prompt'
    : 'none'

-  // Build tool schemas, adding defer_loading for MCP tools when tool search is enabled
+  // Build tool schemas — no defer_loading since we use self-built tool search
  // Note: We pass the full `tools` list (not filteredTools) to toolToAPISchema so that
-  // ToolSearchTool's prompt can list ALL available MCP tools. The filtering only affects
+  // SearchExtraToolsTool's prompt can list ALL available MCP tools. The filtering only affects
  // which tools are actually sent to the API, not what the model sees in tool descriptions.
  const toolSchemas = await Promise.all(
    filteredTools.map(tool =>
@@ -1285,17 +1274,13 @@ async function* queryModel(
        agents: options.agents,
        allowedAgentTypes: options.allowedAgentTypes,
        model: options.model,
-        deferLoading: willDefer(tool),
      }),
    ),
  )

-  if (useToolSearch) {
-    const includedDeferredTools = count(filteredTools, t =>
-      deferredToolNames.has(t.name),
-    )
+  if (useSearchExtraTools) {
    logForDebugging(
-      `Dynamic tool loading: ${includedDeferredTools}/${deferredToolNames.size} deferred tools included`,
+      `Dynamic tool loading: 0/${deferredToolNames.size} deferred tools in API tools array (all via ExecuteExtraTool)`,
    )
  }

@@ -1315,17 +1300,17 @@ async function* queryModel(
  // selected model doesn't support tool search.
  //
  // Why is this needed in addition to normalizeMessagesForAPI?
-  // - normalizeMessagesForAPI uses isToolSearchEnabledNoModelCheck() because it's
+  // - normalizeMessagesForAPI uses isSearchExtraToolsEnabledNoModelCheck() because it's
  //   called from ~20 places (analytics, feedback, sharing, etc.), many of which
  //   don't have model context. Adding model to its signature would be a large refactor.
-  // - This post-processing uses the model-aware isToolSearchEnabled() check
+  // - This post-processing uses the model-aware isSearchExtraToolsEnabled() check
  // - This handles mid-conversation model switching (e.g., Sonnet → Haiku) where
  //   stale tool-search fields from the previous model would cause 400 errors
  //
  // Note: For assistant messages, normalizeMessagesForAPI already normalized the
  // tool inputs, so stripCallerFieldFromAssistantMessage only needs to remove the
  // 'caller' field (not re-normalize inputs).
-  if (!useToolSearch) {
+  if (!useSearchExtraTools) {
    messagesForAPI = messagesForAPI.map(msg => {
      switch (msg.type) {
        case 'user':
@@ -1365,7 +1350,7 @@ async function* queryModel(
  if (getAPIProvider() === 'openai') {
    const { queryModelOpenAI } = await import('./openai/index.js')
    // OpenAI emulates Anthropic's dynamic tool loading client-side. It needs
-    // the full tool pool so ToolSearchTool can search deferred MCP tools that
+    // the full tool pool so SearchExtraToolsTool can search deferred MCP tools that
    // were intentionally filtered out of the initial API tool list above.
    yield* queryModelOpenAI(
      messagesForAPI,
@@ -1415,7 +1400,7 @@ async function* queryModel(
  // When the delta attachment is enabled, deferred tools are announced
  // via persisted deferred_tools_delta attachments instead of this
  // ephemeral prepend (which busts cache whenever the pool changes).
-  if (useToolSearch && !isDeferredToolsDeltaEnabled()) {
+  if (useSearchExtraTools && !isDeferredToolsDeltaEnabled()) {
    const deferredToolList = tools
      .filter(t => deferredToolNames.has(t.name))
      .map(formatDeferredToolLine)
@@ -1424,7 +1409,7 @@ async function* queryModel(
    if (deferredToolList) {
      messagesForAPI = [
        createUserMessage({
-          content: `<available-deferred-tools>\n${deferredToolList}\n</available-deferred-tools>`,
+          content: `<available-deferred-tools>\n${deferredToolList}\n</available-deferred-tools>\nTo invoke any tool listed above, use ExecuteExtraTool with {"tool_name": "<name>", "params": {...}}. This is the ONLY way to call deferred tools — do not read source code or analyze implementation, just call ExecuteExtraTool directly.`,
          isMeta: true,
        }),
        ...messagesForAPI,
@@ -1440,7 +1425,7 @@ async function* queryModel(
    isToolFromMcpServer(t.name, CLAUDE_IN_CHROME_MCP_SERVER_NAME),
  )
  const injectChromeHere =
-    useToolSearch && hasChromeTools && !isMcpInstructionsDeltaEnabled()
+    useSearchExtraTools && hasChromeTools && !isMcpInstructionsDeltaEnabled()

  // filter(Boolean) works by converting each element to a boolean - empty strings become false and are filtered out.
  systemPrompt = asSystemPrompt(
@@ -1452,7 +1437,7 @@ async function* queryModel(
      }),
      ...systemPrompt,
      ...(advisorModel ? [ADVISOR_TOOL_INSTRUCTIONS] : []),
-      ...(injectChromeHere ? [CHROME_TOOL_SEARCH_INSTRUCTIONS] : []),
+      ...(injectChromeHere ? [CHROME_SEARCH_EXTRA_TOOLS_INSTRUCTIONS] : []),
    ].filter(Boolean),
  )

@@ -1653,13 +1638,10 @@ async function* queryModel(
      betasParams.push(CONTEXT_1M_BETA_HEADER)
    }

-    // For Bedrock, include both model-based betas and dynamically-added tool search header
+    // For Bedrock, include model-based betas (no tool search header — self-built search)
    const bedrockBetas =
      getAPIProvider() === 'bedrock'
-        ? [
-            ...getBedrockExtraBodyParamsBetas(retryContext.model),
-            ...(toolSearchHeader ? [toolSearchHeader] : []),
-          ]
+        ? [...getBedrockExtraBodyParamsBetas(retryContext.model)]
        : []
    const extraBodyParams = getExtraBodyParams(bedrockBetas)

--- a/src/services/api/openai/tests/queryModelOpenAI.isolated.ts
+++ b/src/services/api/openai/tests/queryModelOpenAI.isolated.ts
@@ -196,7 +196,7 @@ async function runQueryModel(
 // We mock at module level. Bun's mock.module replaces the module for the
 // entire file, so we configure the stream per-test via a shared variable.
 let _nextEvents: BetaRawMessageStreamEvent[] = []
-let _toolSearchEnabled = false
+let _searchExtraToolsEnabled = false

 /** Captured arguments from the last chat.completions.create() call */
 let _lastCreateArgs: Record<string, any> | null = null
@@ -316,15 +316,15 @@ mock.module('../../../../utils/api.js', () => ({
  toolToAPISchema: async (t: any) => t,
 }))

-mock.module('../../../../utils/toolSearch.js', () => ({
-  isToolSearchEnabled: async () => _toolSearchEnabled,
+mock.module('../../../../utils/searchExtraTools.js', () => ({
+  isSearchExtraToolsEnabled: async () => _searchExtraToolsEnabled,
  extractDiscoveredToolNames: () => new Set(),
  isDeferredToolsDeltaEnabled: () => false,
 }))

-mock.module('../../../../tools/ToolSearchTool/prompt.js', () => ({
+mock.module('../../../../tools/SearchExtraToolsTool/prompt.js', () => ({
  isDeferredTool: () => false,
-  TOOL_SEARCH_TOOL_NAME: '__tool_search__',
+  SEARCH_EXTRA_TOOLS_TOOL_NAME: '__tool_search__',
 }))

 mock.module('../../../../cost-tracker.js', () => ({
@@ -606,14 +606,14 @@ describe('queryModelOpenAI — max_tokens forwarded to request', () => {

 describe('queryModelOpenAI — deferred MCP tool visibility', () => {
  test('prepends available deferred MCP tools to OpenAI messages', async () => {
-    _toolSearchEnabled = true
+    _searchExtraToolsEnabled = true
    _nextEvents = [makeMessageStart(), makeMessageStop()]

    try {
      const { queryModelOpenAI } = await import('../index.js')
      const tools: any[] = [
        {
-          name: 'ToolSearch',
+          name: 'SearchExtraTools',
          isMcp: false,
          input_schema: { type: 'object', properties: {} },
          prompt: async () => 'Search deferred tools',
@@ -655,7 +655,7 @@ describe('queryModelOpenAI — deferred MCP tool visibility', () => {
        '<available-deferred-tools>\\nmcp__wechat__send_message\\n</available-deferred-tools>',
      )
    } finally {
-      _toolSearchEnabled = false
+      _searchExtraToolsEnabled = false
    }
  })
 })
--- a/src/services/api/openai/index.ts
+++ b/src/services/api/openai/index.ts
@@ -59,15 +59,14 @@ import {
 } from '../../../utils/messages.js'
 import type { SDKAssistantMessageError } from '../../../entrypoints/agentSdkTypes.js'
 import {
-  isToolSearchEnabled,
-  extractDiscoveredToolNames,
+  isSearchExtraToolsEnabled,
  isDeferredToolsDeltaEnabled,
-} from '../../../utils/toolSearch.js'
+} from '../../../utils/searchExtraTools.js'
 import {
  formatDeferredToolLine,
  isDeferredTool,
-  TOOL_SEARCH_TOOL_NAME,
-} from '@claude-code-best/builtin-tools/tools/ToolSearchTool/prompt.js'
+  SEARCH_EXTRA_TOOLS_TOOL_NAME,
+} from '@claude-code-best/builtin-tools/tools/SearchExtraToolsTool/prompt.js'

 function convertToResponsesReasoningEffort(
  effortValue: unknown,
@@ -98,15 +97,15 @@ function getChatGPTResponsesReasoningEffort(
 * OpenAI-compatible endpoints cannot consume Anthropic's `defer_loading` or
 * `tool_reference` beta payloads directly, so the model needs the same textual
 * list of deferred MCP tool names that Anthropic receives before it can ask
- * ToolSearchTool to load their full schemas.
+ * SearchExtraToolsTool to load their full schemas.
 */
 function prependDeferredToolListIfNeeded(
  messages: (AssistantMessage | UserMessage)[],
  tools: Tools,
  deferredToolNames: Set<string>,
-  useToolSearch: boolean,
+  useSearchExtraTools: boolean,
 ): (AssistantMessage | UserMessage)[] {
-  if (!useToolSearch || isDeferredToolsDeltaEnabled()) return messages
+  if (!useSearchExtraTools || isDeferredToolsDeltaEnabled()) return messages

  const deferredToolList = tools
    .filter(tool => deferredToolNames.has(tool.name))
@@ -225,7 +224,7 @@ export async function* queryModelOpenAI(
    const messagesForAPI = normalizeMessagesForAPI(messages, tools)

    // 3. Check if tool search is enabled (similar to Anthropic path)
-    const useToolSearch = await isToolSearchEnabled(
+    const useSearchExtraTools = await isSearchExtraToolsEnabled(
      options.model,
      tools,
      options.getToolPermissionContext ||
@@ -236,24 +235,25 @@ export async function* queryModelOpenAI(

    // 4. Build deferred tools set (similar to Anthropic path)
    const deferredToolNames = new Set<string>()
-    if (useToolSearch) {
+    if (useSearchExtraTools) {
      for (const t of tools) {
        if (isDeferredTool(t)) deferredToolNames.add(t.name)
      }
    }

    // 5. Filter tools (similar to Anthropic path)
+    // Never include deferred tools in the API tools array — they are invoked
+    // via ExecuteExtraTool which looks them up from the global tool registry
+    // at runtime. Keeping the tools array stable preserves the prompt cache.
    let filteredTools = tools
-    if (useToolSearch && deferredToolNames.size > 0) {
-      const discoveredToolNames = extractDiscoveredToolNames(messages)
-
+    if (useSearchExtraTools && deferredToolNames.size > 0) {
      filteredTools = tools.filter(tool => {
        // Always include non-deferred tools
        if (!deferredToolNames.has(tool.name)) return true
-        // Always include ToolSearchTool (so it can discover more tools)
-        if (toolMatchesName(tool, TOOL_SEARCH_TOOL_NAME)) return true
-        // Only include deferred tools that have been discovered
-        return discoveredToolNames.has(tool.name)
+        // Always include SearchExtraToolsTool (so it can discover more tools)
+        if (toolMatchesName(tool, SEARCH_EXTRA_TOOLS_TOOL_NAME)) return true
+        // All other deferred tools are excluded — use ExecuteExtraTool instead
+        return false
      })
    }

@@ -266,7 +266,7 @@ export async function* queryModelOpenAI(
          agents: options.agents,
          allowedAgentTypes: options.allowedAgentTypes,
          model: options.model,
-          deferLoading: useToolSearch && deferredToolNames.has(tool.name),
+          deferLoading: useSearchExtraTools && deferredToolNames.has(tool.name),
        }),
      ),
    )
@@ -290,7 +290,7 @@ export async function* queryModelOpenAI(
      openAIConvertibleMessages,
      tools,
      deferredToolNames,
-      useToolSearch,
+      useSearchExtraTools,
    )
    const openaiMessages = anthropicMessagesToOpenAI(
      messagesWithDeferredToolList,
@@ -304,7 +304,7 @@ export async function* queryModelOpenAI(
    )

    // 9. Log tool filtering details
-    if (useToolSearch) {
+    if (useSearchExtraTools) {
      const includedDeferredTools = filteredTools.filter(t =>
        deferredToolNames.has(t.name),
      ).length