From e33b17bde7babaf92b634c1d96527535aeb1996a Mon Sep 17 00:00:00 2001 From: claude-code-best Date: Sun, 31 May 2026 14:08:30 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20sideQuery=20=E6=94=AF=E6=8C=81=E7=AC=AC?= =?UTF-8?q?=E4=B8=89=E6=96=B9=20provider=20=E8=B7=AF=E7=94=B1=20(OpenAI/Gr?= =?UTF-8?q?ok/Gemini)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 新增 getProviderPrimaryModel() 从环境变量解析 provider 主模型 - getDefaultOpus/Sonnet/HaikuModel 在第三方 provider 下回退到用户配置的主模型 - sideQuery 根据 provider 类型分发到对应的 API 适配器 - 新增 sideQueryViaOpenAICompatible (OpenAI + Grok) 和 sideQueryViaGemini 适配函数 - 避免 sideQuery 后台任务在配置第三方端点时仍请求 Anthropic API --- src/utils/model/model.ts | 33 +++- src/utils/sideQuery.ts | 412 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 439 insertions(+), 6 deletions(-) diff --git a/src/utils/model/model.ts b/src/utils/model/model.ts index 385212f51..19c0e1561 100644 --- a/src/utils/model/model.ts +++ b/src/utils/model/model.ts @@ -120,6 +120,19 @@ export function getBestModel(): ModelName { return getDefaultOpusModel() } +/** + * Resolve the provider's primary model from its env var (e.g. OPENAI_MODEL). + * Returns undefined for providers that don't have a primary-model env var + * (Bedrock, Vertex, Foundry, firstParty). + */ +function getProviderPrimaryModel(): ModelName | undefined { + const provider = getAPIProvider() + if (provider === 'openai') return process.env.OPENAI_MODEL + if (provider === 'gemini') return process.env.GEMINI_MODEL + if (provider === 'grok') return process.env.GROK_MODEL + return undefined +} + // @[MODEL LAUNCH]: Update the default Opus model (3P providers may lag so keep defaults unchanged). export function getDefaultOpusModel(): ModelName { const provider = getAPIProvider() @@ -138,10 +151,12 @@ export function getDefaultOpusModel(): ModelName { if (process.env.ANTHROPIC_DEFAULT_OPUS_MODEL) { return process.env.ANTHROPIC_DEFAULT_OPUS_MODEL } - // 3P providers (Bedrock, Vertex, Foundry) all publish Opus 4.7 in sync - // with firstParty as of 2026-04-17 (AWS Bedrock, Google Vertex AI, and - // Microsoft Foundry announcements and model catalogs all confirm). The - // branch is kept as a structural hook in case a future launch lags on 3P. + // 3P providers: if user set a primary model (e.g. OPENAI_MODEL=glm-5.1), + // fall back to it instead of a hardcoded Anthropic model. This prevents + // sideQuery / background tasks from sending requests to Anthropic's API + // when the user configured a third-party provider. + const primaryModel = getProviderPrimaryModel() + if (primaryModel) return primaryModel if (provider !== 'firstParty') { return getModelStrings().opus47 } @@ -166,7 +181,11 @@ export function getDefaultSonnetModel(): ModelName { if (process.env.ANTHROPIC_DEFAULT_SONNET_MODEL) { return process.env.ANTHROPIC_DEFAULT_SONNET_MODEL } - // Default to Sonnet 4.5 for 3P since they may not have 4.6 yet + // 3P providers: fall back to user's primary model instead of a hardcoded + // Anthropic model name. Prevents background API calls from being routed to + // Anthropic when the user configured a third-party endpoint. + const primaryModel = getProviderPrimaryModel() + if (primaryModel) return primaryModel if (provider !== 'firstParty') { return getModelStrings().sonnet45 } @@ -191,6 +210,10 @@ export function getDefaultHaikuModel(): ModelName { if (process.env.ANTHROPIC_DEFAULT_HAIKU_MODEL) { return process.env.ANTHROPIC_DEFAULT_HAIKU_MODEL } + // 3P providers: fall back to user's primary model instead of a hardcoded + // Anthropic model name. + const primaryModel = getProviderPrimaryModel() + if (primaryModel) return primaryModel // Haiku 4.5 is available on all platforms (first-party, Foundry, Bedrock, Vertex) return getModelStrings().haiku45 diff --git a/src/utils/sideQuery.ts b/src/utils/sideQuery.ts index c08474f0c..6424d5456 100644 --- a/src/utils/sideQuery.ts +++ b/src/utils/sideQuery.ts @@ -33,6 +33,19 @@ import { errorMessage } from './errors.js' import { computeFingerprint } from './fingerprint.js' import { getAPIProvider } from './model/providers.js' import { normalizeModelStringForAPI } from './model/model.js' +import { getOpenAIClient } from '../services/api/openai/client.js' +import { getGrokClient } from '../services/api/grok/client.js' +import { + anthropicMessagesToOpenAI, + resolveOpenAIModel, + anthropicToolsToOpenAI, + anthropicToolChoiceToOpenAI, + resolveGrokModel, + resolveGeminiModel, + anthropicToolsToGemini, + anthropicToolChoiceToGemini, +} from '@ant/model-provider' +import type { SystemPrompt } from './systemPromptType.js' type MessageParam = Anthropic.MessageParam type TextBlockParam = Anthropic.TextBlockParam @@ -99,6 +112,46 @@ function extractFirstUserMessageText(messages: MessageParam[]): string { return textBlock?.type === 'text' ? textBlock.text : '' } +/** + * Extract system prompt text from the `system` option. + */ +function extractSystemText(system?: string | TextBlockParam[]): string { + if (!system) return '' + if (typeof system === 'string') return system + return system + .filter((b): b is { type: 'text'; text: string } => 'text' in b && !!b.text) + .map(b => b.text) + .join('\n\n') +} + +/** + * Convert Anthropic MessageParam[] to a list of {role, content} objects + * suitable for OpenAI-compatible chat.completions APIs. + */ +function messageParamsToOpenAIRoleContent( + messages: MessageParam[], +): Array<{ role: 'user' | 'assistant'; content: string }> { + const result: Array<{ role: 'user' | 'assistant'; content: string }> = [] + for (const m of messages) { + if (m.role !== 'user' && m.role !== 'assistant') continue + const text = + typeof m.content === 'string' + ? m.content + : Array.isArray(m.content) + ? m.content + .filter( + (b): b is { type: 'text'; text: string } => b.type === 'text', + ) + .map(b => b.text) + .join('\n') + : '' + if (text) { + result.push({ role: m.role as 'user' | 'assistant', content: text }) + } + } + return result +} + /** * Lightweight API wrapper for "side queries" outside the main conversation loop. * @@ -112,6 +165,7 @@ function extractFirstUserMessageText(messages: MessageParam[]): string { * - Proper betas for the model * - API metadata * - Model string normalization (strips [1m] suffix for API) + * - Third-party provider routing (OpenAI, Grok, Gemini) * * @example * // Permission explainer @@ -142,6 +196,14 @@ export async function sideQuery(opts: SideQueryOptions): Promise { stop_sequences, } = opts + const provider = getAPIProvider() + if (provider === 'openai' || provider === 'grok') { + return sideQueryViaOpenAICompatible(opts) + } + if (provider === 'gemini') { + return sideQueryViaGemini(opts) + } + const client = await getAnthropicClient({ maxRetries, model, @@ -198,7 +260,6 @@ export async function sideQuery(opts: SideQueryOptions): Promise { } const normalizedModel = normalizeModelStringForAPI(model) - const provider = getAPIProvider() const start = Date.now() const traceName = `side-query:${opts.querySource}` @@ -328,3 +389,352 @@ export async function sideQuery(opts: SideQueryOptions): Promise { return response } + +/** + * OpenAI-compatible side query for OpenAI and Grok providers. + * Both use the OpenAI SDK with different base URLs. + * + * Converts Anthropic-format params to OpenAI Chat Completions, sends a + * non-streaming request, and wraps the response back into a BetaMessage + * shape so callers remain provider-agnostic. + * + * Supports tools and tool_choice for structured output (e.g. yoloClassifier, + * permissionExplainer). + */ +async function sideQueryViaOpenAICompatible( + opts: SideQueryOptions, +): Promise { + const { + model, + system, + messages, + tools, + tool_choice, + max_tokens = 1024, + temperature, + signal, + } = opts + + const provider = getAPIProvider() + const normalizedModel = normalizeModelStringForAPI(model) + + // Resolve model name and client per provider + let openaiModel: string + // eslint-disable-next-line @typescript-eslint/no-redundant-type-constituents + let client: import('openai').default + if (provider === 'grok') { + openaiModel = resolveGrokModel(normalizedModel) + client = getGrokClient({ maxRetries: opts.maxRetries ?? 2 }) + } else { + openaiModel = resolveOpenAIModel(normalizedModel) + client = getOpenAIClient({ maxRetries: opts.maxRetries ?? 2 }) + } + + // Build system prompt text + const systemText = extractSystemText(system) + + // Build OpenAI messages: system first, then user/assistant + const openaiMessages: Array<{ + role: 'system' | 'user' | 'assistant' + content: string + }> = [] + if (systemText) { + openaiMessages.push({ role: 'system', content: systemText }) + } + openaiMessages.push(...messageParamsToOpenAIRoleContent(messages)) + + // Convert tools and tool_choice if provided + const openaiTools = + tools && tools.length > 0 + ? anthropicToolsToOpenAI(tools as BetaToolUnion[]) + : undefined + const openaiToolChoice = tool_choice + ? anthropicToolChoiceToOpenAI(tool_choice) + : undefined + + const start = Date.now() + + const requestParams: Record = { + model: openaiModel, + messages: openaiMessages, + max_tokens, + } + if (temperature !== undefined) requestParams.temperature = temperature + if (openaiTools && openaiTools.length > 0) { + requestParams.tools = openaiTools + if (openaiToolChoice) requestParams.tool_choice = openaiToolChoice + } + + const response = await client.chat.completions.create( + requestParams as unknown as import('openai/resources/chat/completions/completions.mjs').ChatCompletionCreateParamsNonStreaming, + { signal }, + ) + + const choice = response.choices[0] + const message = choice?.message + + // Build content blocks for BetaMessage + const contentBlocks: Array< + | { type: 'text'; text: string } + | { type: 'tool_use'; id: string; name: string; input: unknown } + > = [] + + if (message?.content) { + contentBlocks.push({ type: 'text', text: message.content }) + } + + if (message?.tool_calls) { + for (const tc of message.tool_calls) { + // ChatCompletionMessageToolCall is a union — only function-type has .function + if (tc.type === 'function' && 'function' in tc) { + const fn = (tc as { function: { name: string; arguments: string } }) + .function + contentBlocks.push({ + type: 'tool_use', + id: tc.id ?? `toolu_${Date.now()}`, + name: fn.name, + input: JSON.parse(fn.arguments || '{}'), + }) + } + } + } + + const now = Date.now() + const requestId = response.id + const lastCompletion = getLastApiCompletionTimestamp() + logEvent('tengu_api_success', { + requestId: + requestId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + querySource: + opts.querySource as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + model: + openaiModel as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + inputTokens: response.usage?.prompt_tokens ?? 0, + outputTokens: response.usage?.completion_tokens ?? 0, + cachedInputTokens: 0, + uncachedInputTokens: response.usage?.prompt_tokens ?? 0, + durationMsIncludingRetries: now - start, + timeSinceLastApiCallMs: + lastCompletion !== null ? now - lastCompletion : undefined, + }) + setLastApiCompletionTimestamp(now) + + const stopReason = + choice?.finish_reason === 'tool_calls' + ? 'tool_use' + : choice?.finish_reason === 'length' + ? 'max_tokens' + : 'end_turn' + + return { + id: response.id, + type: 'message', + role: 'assistant', + content: contentBlocks as BetaMessage['content'], + model: openaiModel, + stop_reason: stopReason as BetaMessage['stop_reason'], + stop_sequence: null, + usage: { + input_tokens: response.usage?.prompt_tokens ?? 0, + output_tokens: response.usage?.completion_tokens ?? 0, + }, + } as BetaMessage +} + +/** + * Gemini side query. Converts Anthropic-format params to Gemini + * generateContent format, sends a non-streaming request via fetch, + * and wraps the response back into a BetaMessage shape. + */ +async function sideQueryViaGemini( + opts: SideQueryOptions, +): Promise { + const { + model, + system, + messages, + tools, + tool_choice, + max_tokens = 1024, + temperature, + signal, + } = opts + + const normalizedModel = normalizeModelStringForAPI(model) + const geminiModel = resolveGeminiModel(normalizedModel) + + // Build Gemini contents from Anthropic MessageParam[] + const contents: Array<{ + role: 'user' | 'model' + parts: Array<{ text: string }> + }> = [] + for (const m of messages) { + if (m.role !== 'user' && m.role !== 'assistant') continue + const text = + typeof m.content === 'string' + ? m.content + : Array.isArray(m.content) + ? m.content + .filter( + (b): b is { type: 'text'; text: string } => b.type === 'text', + ) + .map(b => b.text) + .join('\n') + : '' + if (text) { + contents.push({ + role: m.role === 'assistant' ? 'model' : 'user', + parts: [{ text }], + }) + } + } + + // Build system instruction + const systemText = extractSystemText(system) + const systemInstruction = systemText + ? { parts: [{ text: systemText }] } + : undefined + + // Convert tools and tool_choice + const geminiTools = + tools && tools.length > 0 + ? anthropicToolsToGemini(tools as BetaToolUnion[]) + : undefined + const geminiToolConfig = tool_choice + ? anthropicToolChoiceToGemini(tool_choice) + : undefined + + const baseUrl = ( + process.env.GEMINI_BASE_URL || + 'https://generativelanguage.googleapis.com/v1beta' + ).replace(/\/+$/, '') + const modelPath = geminiModel.startsWith('models/') + ? geminiModel + : `models/${geminiModel}` + const url = `${baseUrl}/${modelPath}:generateContent` + + const body: Record = { + contents, + ...(systemInstruction && { systemInstruction }), + ...(geminiTools && geminiTools.length > 0 && { tools: geminiTools }), + ...(geminiToolConfig && { + toolConfig: { functionCallingConfig: geminiToolConfig }, + }), + ...(temperature !== undefined && { + generationConfig: { temperature }, + }), + ...(max_tokens !== undefined && { + generationConfig: { + ...(temperature !== undefined && { temperature }), + maxOutputTokens: max_tokens, + }, + }), + } + + // Merge generationConfig if both temperature and max_tokens are set + if (temperature !== undefined && max_tokens !== undefined) { + body.generationConfig = { temperature, maxOutputTokens: max_tokens } + } + + const start = Date.now() + + const res = await fetch(url, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'x-goog-api-key': process.env.GEMINI_API_KEY || '', + }, + body: JSON.stringify(body), + signal, + }) + + if (!res.ok) { + const errorBody = await res.text() + throw new Error( + `Gemini API request failed (${res.status} ${res.statusText}): ${errorBody || 'empty response body'}`, + ) + } + + const geminiResponse = (await res.json()) as { + candidates?: Array<{ + content?: { + role?: string + parts?: Array<{ + text?: string + functionCall?: { name?: string; args?: Record } + }> + } + finishReason?: string + }> + usageMetadata?: { + promptTokenCount?: number + candidatesTokenCount?: number + totalTokenCount?: number + } + id?: string + } + + // Build content blocks from Gemini response + const contentBlocks: Array< + | { type: 'text'; text: string } + | { type: 'tool_use'; id: string; name: string; input: unknown } + > = [] + + const candidate = geminiResponse.candidates?.[0] + const parts = candidate?.content?.parts + if (parts) { + for (const part of parts) { + if (part.text) { + contentBlocks.push({ type: 'text', text: part.text }) + } + if (part.functionCall) { + contentBlocks.push({ + type: 'tool_use', + id: `toolu_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`, + name: part.functionCall.name ?? '', + input: part.functionCall.args ?? {}, + }) + } + } + } + + const now = Date.now() + const lastCompletion = getLastApiCompletionTimestamp() + logEvent('tengu_api_success', { + requestId: (geminiResponse.id ?? + '') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + querySource: + opts.querySource as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + model: + geminiModel as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + inputTokens: geminiResponse.usageMetadata?.promptTokenCount ?? 0, + outputTokens: geminiResponse.usageMetadata?.candidatesTokenCount ?? 0, + cachedInputTokens: 0, + uncachedInputTokens: geminiResponse.usageMetadata?.promptTokenCount ?? 0, + durationMsIncludingRetries: now - start, + timeSinceLastApiCallMs: + lastCompletion !== null ? now - lastCompletion : undefined, + }) + setLastApiCompletionTimestamp(now) + + const stopReason = + candidate?.finishReason === 'STOP' + ? 'end_turn' + : candidate?.finishReason === 'MAX_TOKENS' + ? 'max_tokens' + : 'end_turn' + + return { + id: geminiResponse.id ?? `gemini_${Date.now()}`, + type: 'message', + role: 'assistant', + content: contentBlocks as BetaMessage['content'], + model: geminiModel, + stop_reason: stopReason as BetaMessage['stop_reason'], + stop_sequence: null, + usage: { + input_tokens: geminiResponse.usageMetadata?.promptTokenCount ?? 0, + output_tokens: geminiResponse.usageMetadata?.candidatesTokenCount ?? 0, + }, + } as BetaMessage +}