mirror of
https://github.com/claude-code-best/claude-code.git
synced 2026-06-15 12:55:51 +00:00
feat: sideQuery 支持第三方 provider 路由 (OpenAI/Grok/Gemini)
- 新增 getProviderPrimaryModel() 从环境变量解析 provider 主模型 - getDefaultOpus/Sonnet/HaikuModel 在第三方 provider 下回退到用户配置的主模型 - sideQuery 根据 provider 类型分发到对应的 API 适配器 - 新增 sideQueryViaOpenAICompatible (OpenAI + Grok) 和 sideQueryViaGemini 适配函数 - 避免 sideQuery 后台任务在配置第三方端点时仍请求 Anthropic API
This commit is contained in:
@@ -120,6 +120,19 @@ export function getBestModel(): ModelName {
|
||||
return getDefaultOpusModel()
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve the provider's primary model from its env var (e.g. OPENAI_MODEL).
|
||||
* Returns undefined for providers that don't have a primary-model env var
|
||||
* (Bedrock, Vertex, Foundry, firstParty).
|
||||
*/
|
||||
function getProviderPrimaryModel(): ModelName | undefined {
|
||||
const provider = getAPIProvider()
|
||||
if (provider === 'openai') return process.env.OPENAI_MODEL
|
||||
if (provider === 'gemini') return process.env.GEMINI_MODEL
|
||||
if (provider === 'grok') return process.env.GROK_MODEL
|
||||
return undefined
|
||||
}
|
||||
|
||||
// @[MODEL LAUNCH]: Update the default Opus model (3P providers may lag so keep defaults unchanged).
|
||||
export function getDefaultOpusModel(): ModelName {
|
||||
const provider = getAPIProvider()
|
||||
@@ -138,10 +151,12 @@ export function getDefaultOpusModel(): ModelName {
|
||||
if (process.env.ANTHROPIC_DEFAULT_OPUS_MODEL) {
|
||||
return process.env.ANTHROPIC_DEFAULT_OPUS_MODEL
|
||||
}
|
||||
// 3P providers (Bedrock, Vertex, Foundry) all publish Opus 4.7 in sync
|
||||
// with firstParty as of 2026-04-17 (AWS Bedrock, Google Vertex AI, and
|
||||
// Microsoft Foundry announcements and model catalogs all confirm). The
|
||||
// branch is kept as a structural hook in case a future launch lags on 3P.
|
||||
// 3P providers: if user set a primary model (e.g. OPENAI_MODEL=glm-5.1),
|
||||
// fall back to it instead of a hardcoded Anthropic model. This prevents
|
||||
// sideQuery / background tasks from sending requests to Anthropic's API
|
||||
// when the user configured a third-party provider.
|
||||
const primaryModel = getProviderPrimaryModel()
|
||||
if (primaryModel) return primaryModel
|
||||
if (provider !== 'firstParty') {
|
||||
return getModelStrings().opus47
|
||||
}
|
||||
@@ -166,7 +181,11 @@ export function getDefaultSonnetModel(): ModelName {
|
||||
if (process.env.ANTHROPIC_DEFAULT_SONNET_MODEL) {
|
||||
return process.env.ANTHROPIC_DEFAULT_SONNET_MODEL
|
||||
}
|
||||
// Default to Sonnet 4.5 for 3P since they may not have 4.6 yet
|
||||
// 3P providers: fall back to user's primary model instead of a hardcoded
|
||||
// Anthropic model name. Prevents background API calls from being routed to
|
||||
// Anthropic when the user configured a third-party endpoint.
|
||||
const primaryModel = getProviderPrimaryModel()
|
||||
if (primaryModel) return primaryModel
|
||||
if (provider !== 'firstParty') {
|
||||
return getModelStrings().sonnet45
|
||||
}
|
||||
@@ -191,6 +210,10 @@ export function getDefaultHaikuModel(): ModelName {
|
||||
if (process.env.ANTHROPIC_DEFAULT_HAIKU_MODEL) {
|
||||
return process.env.ANTHROPIC_DEFAULT_HAIKU_MODEL
|
||||
}
|
||||
// 3P providers: fall back to user's primary model instead of a hardcoded
|
||||
// Anthropic model name.
|
||||
const primaryModel = getProviderPrimaryModel()
|
||||
if (primaryModel) return primaryModel
|
||||
|
||||
// Haiku 4.5 is available on all platforms (first-party, Foundry, Bedrock, Vertex)
|
||||
return getModelStrings().haiku45
|
||||
|
||||
@@ -33,6 +33,19 @@ import { errorMessage } from './errors.js'
|
||||
import { computeFingerprint } from './fingerprint.js'
|
||||
import { getAPIProvider } from './model/providers.js'
|
||||
import { normalizeModelStringForAPI } from './model/model.js'
|
||||
import { getOpenAIClient } from '../services/api/openai/client.js'
|
||||
import { getGrokClient } from '../services/api/grok/client.js'
|
||||
import {
|
||||
anthropicMessagesToOpenAI,
|
||||
resolveOpenAIModel,
|
||||
anthropicToolsToOpenAI,
|
||||
anthropicToolChoiceToOpenAI,
|
||||
resolveGrokModel,
|
||||
resolveGeminiModel,
|
||||
anthropicToolsToGemini,
|
||||
anthropicToolChoiceToGemini,
|
||||
} from '@ant/model-provider'
|
||||
import type { SystemPrompt } from './systemPromptType.js'
|
||||
|
||||
type MessageParam = Anthropic.MessageParam
|
||||
type TextBlockParam = Anthropic.TextBlockParam
|
||||
@@ -99,6 +112,46 @@ function extractFirstUserMessageText(messages: MessageParam[]): string {
|
||||
return textBlock?.type === 'text' ? textBlock.text : ''
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract system prompt text from the `system` option.
|
||||
*/
|
||||
function extractSystemText(system?: string | TextBlockParam[]): string {
|
||||
if (!system) return ''
|
||||
if (typeof system === 'string') return system
|
||||
return system
|
||||
.filter((b): b is { type: 'text'; text: string } => 'text' in b && !!b.text)
|
||||
.map(b => b.text)
|
||||
.join('\n\n')
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert Anthropic MessageParam[] to a list of {role, content} objects
|
||||
* suitable for OpenAI-compatible chat.completions APIs.
|
||||
*/
|
||||
function messageParamsToOpenAIRoleContent(
|
||||
messages: MessageParam[],
|
||||
): Array<{ role: 'user' | 'assistant'; content: string }> {
|
||||
const result: Array<{ role: 'user' | 'assistant'; content: string }> = []
|
||||
for (const m of messages) {
|
||||
if (m.role !== 'user' && m.role !== 'assistant') continue
|
||||
const text =
|
||||
typeof m.content === 'string'
|
||||
? m.content
|
||||
: Array.isArray(m.content)
|
||||
? m.content
|
||||
.filter(
|
||||
(b): b is { type: 'text'; text: string } => b.type === 'text',
|
||||
)
|
||||
.map(b => b.text)
|
||||
.join('\n')
|
||||
: ''
|
||||
if (text) {
|
||||
result.push({ role: m.role as 'user' | 'assistant', content: text })
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
/**
|
||||
* Lightweight API wrapper for "side queries" outside the main conversation loop.
|
||||
*
|
||||
@@ -112,6 +165,7 @@ function extractFirstUserMessageText(messages: MessageParam[]): string {
|
||||
* - Proper betas for the model
|
||||
* - API metadata
|
||||
* - Model string normalization (strips [1m] suffix for API)
|
||||
* - Third-party provider routing (OpenAI, Grok, Gemini)
|
||||
*
|
||||
* @example
|
||||
* // Permission explainer
|
||||
@@ -142,6 +196,14 @@ export async function sideQuery(opts: SideQueryOptions): Promise<BetaMessage> {
|
||||
stop_sequences,
|
||||
} = opts
|
||||
|
||||
const provider = getAPIProvider()
|
||||
if (provider === 'openai' || provider === 'grok') {
|
||||
return sideQueryViaOpenAICompatible(opts)
|
||||
}
|
||||
if (provider === 'gemini') {
|
||||
return sideQueryViaGemini(opts)
|
||||
}
|
||||
|
||||
const client = await getAnthropicClient({
|
||||
maxRetries,
|
||||
model,
|
||||
@@ -198,7 +260,6 @@ export async function sideQuery(opts: SideQueryOptions): Promise<BetaMessage> {
|
||||
}
|
||||
|
||||
const normalizedModel = normalizeModelStringForAPI(model)
|
||||
const provider = getAPIProvider()
|
||||
const start = Date.now()
|
||||
const traceName = `side-query:${opts.querySource}`
|
||||
|
||||
@@ -328,3 +389,352 @@ export async function sideQuery(opts: SideQueryOptions): Promise<BetaMessage> {
|
||||
|
||||
return response
|
||||
}
|
||||
|
||||
/**
|
||||
* OpenAI-compatible side query for OpenAI and Grok providers.
|
||||
* Both use the OpenAI SDK with different base URLs.
|
||||
*
|
||||
* Converts Anthropic-format params to OpenAI Chat Completions, sends a
|
||||
* non-streaming request, and wraps the response back into a BetaMessage
|
||||
* shape so callers remain provider-agnostic.
|
||||
*
|
||||
* Supports tools and tool_choice for structured output (e.g. yoloClassifier,
|
||||
* permissionExplainer).
|
||||
*/
|
||||
async function sideQueryViaOpenAICompatible(
|
||||
opts: SideQueryOptions,
|
||||
): Promise<BetaMessage> {
|
||||
const {
|
||||
model,
|
||||
system,
|
||||
messages,
|
||||
tools,
|
||||
tool_choice,
|
||||
max_tokens = 1024,
|
||||
temperature,
|
||||
signal,
|
||||
} = opts
|
||||
|
||||
const provider = getAPIProvider()
|
||||
const normalizedModel = normalizeModelStringForAPI(model)
|
||||
|
||||
// Resolve model name and client per provider
|
||||
let openaiModel: string
|
||||
// eslint-disable-next-line @typescript-eslint/no-redundant-type-constituents
|
||||
let client: import('openai').default
|
||||
if (provider === 'grok') {
|
||||
openaiModel = resolveGrokModel(normalizedModel)
|
||||
client = getGrokClient({ maxRetries: opts.maxRetries ?? 2 })
|
||||
} else {
|
||||
openaiModel = resolveOpenAIModel(normalizedModel)
|
||||
client = getOpenAIClient({ maxRetries: opts.maxRetries ?? 2 })
|
||||
}
|
||||
|
||||
// Build system prompt text
|
||||
const systemText = extractSystemText(system)
|
||||
|
||||
// Build OpenAI messages: system first, then user/assistant
|
||||
const openaiMessages: Array<{
|
||||
role: 'system' | 'user' | 'assistant'
|
||||
content: string
|
||||
}> = []
|
||||
if (systemText) {
|
||||
openaiMessages.push({ role: 'system', content: systemText })
|
||||
}
|
||||
openaiMessages.push(...messageParamsToOpenAIRoleContent(messages))
|
||||
|
||||
// Convert tools and tool_choice if provided
|
||||
const openaiTools =
|
||||
tools && tools.length > 0
|
||||
? anthropicToolsToOpenAI(tools as BetaToolUnion[])
|
||||
: undefined
|
||||
const openaiToolChoice = tool_choice
|
||||
? anthropicToolChoiceToOpenAI(tool_choice)
|
||||
: undefined
|
||||
|
||||
const start = Date.now()
|
||||
|
||||
const requestParams: Record<string, unknown> = {
|
||||
model: openaiModel,
|
||||
messages: openaiMessages,
|
||||
max_tokens,
|
||||
}
|
||||
if (temperature !== undefined) requestParams.temperature = temperature
|
||||
if (openaiTools && openaiTools.length > 0) {
|
||||
requestParams.tools = openaiTools
|
||||
if (openaiToolChoice) requestParams.tool_choice = openaiToolChoice
|
||||
}
|
||||
|
||||
const response = await client.chat.completions.create(
|
||||
requestParams as unknown as import('openai/resources/chat/completions/completions.mjs').ChatCompletionCreateParamsNonStreaming,
|
||||
{ signal },
|
||||
)
|
||||
|
||||
const choice = response.choices[0]
|
||||
const message = choice?.message
|
||||
|
||||
// Build content blocks for BetaMessage
|
||||
const contentBlocks: Array<
|
||||
| { type: 'text'; text: string }
|
||||
| { type: 'tool_use'; id: string; name: string; input: unknown }
|
||||
> = []
|
||||
|
||||
if (message?.content) {
|
||||
contentBlocks.push({ type: 'text', text: message.content })
|
||||
}
|
||||
|
||||
if (message?.tool_calls) {
|
||||
for (const tc of message.tool_calls) {
|
||||
// ChatCompletionMessageToolCall is a union — only function-type has .function
|
||||
if (tc.type === 'function' && 'function' in tc) {
|
||||
const fn = (tc as { function: { name: string; arguments: string } })
|
||||
.function
|
||||
contentBlocks.push({
|
||||
type: 'tool_use',
|
||||
id: tc.id ?? `toolu_${Date.now()}`,
|
||||
name: fn.name,
|
||||
input: JSON.parse(fn.arguments || '{}'),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const now = Date.now()
|
||||
const requestId = response.id
|
||||
const lastCompletion = getLastApiCompletionTimestamp()
|
||||
logEvent('tengu_api_success', {
|
||||
requestId:
|
||||
requestId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
querySource:
|
||||
opts.querySource as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
model:
|
||||
openaiModel as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
inputTokens: response.usage?.prompt_tokens ?? 0,
|
||||
outputTokens: response.usage?.completion_tokens ?? 0,
|
||||
cachedInputTokens: 0,
|
||||
uncachedInputTokens: response.usage?.prompt_tokens ?? 0,
|
||||
durationMsIncludingRetries: now - start,
|
||||
timeSinceLastApiCallMs:
|
||||
lastCompletion !== null ? now - lastCompletion : undefined,
|
||||
})
|
||||
setLastApiCompletionTimestamp(now)
|
||||
|
||||
const stopReason =
|
||||
choice?.finish_reason === 'tool_calls'
|
||||
? 'tool_use'
|
||||
: choice?.finish_reason === 'length'
|
||||
? 'max_tokens'
|
||||
: 'end_turn'
|
||||
|
||||
return {
|
||||
id: response.id,
|
||||
type: 'message',
|
||||
role: 'assistant',
|
||||
content: contentBlocks as BetaMessage['content'],
|
||||
model: openaiModel,
|
||||
stop_reason: stopReason as BetaMessage['stop_reason'],
|
||||
stop_sequence: null,
|
||||
usage: {
|
||||
input_tokens: response.usage?.prompt_tokens ?? 0,
|
||||
output_tokens: response.usage?.completion_tokens ?? 0,
|
||||
},
|
||||
} as BetaMessage
|
||||
}
|
||||
|
||||
/**
|
||||
* Gemini side query. Converts Anthropic-format params to Gemini
|
||||
* generateContent format, sends a non-streaming request via fetch,
|
||||
* and wraps the response back into a BetaMessage shape.
|
||||
*/
|
||||
async function sideQueryViaGemini(
|
||||
opts: SideQueryOptions,
|
||||
): Promise<BetaMessage> {
|
||||
const {
|
||||
model,
|
||||
system,
|
||||
messages,
|
||||
tools,
|
||||
tool_choice,
|
||||
max_tokens = 1024,
|
||||
temperature,
|
||||
signal,
|
||||
} = opts
|
||||
|
||||
const normalizedModel = normalizeModelStringForAPI(model)
|
||||
const geminiModel = resolveGeminiModel(normalizedModel)
|
||||
|
||||
// Build Gemini contents from Anthropic MessageParam[]
|
||||
const contents: Array<{
|
||||
role: 'user' | 'model'
|
||||
parts: Array<{ text: string }>
|
||||
}> = []
|
||||
for (const m of messages) {
|
||||
if (m.role !== 'user' && m.role !== 'assistant') continue
|
||||
const text =
|
||||
typeof m.content === 'string'
|
||||
? m.content
|
||||
: Array.isArray(m.content)
|
||||
? m.content
|
||||
.filter(
|
||||
(b): b is { type: 'text'; text: string } => b.type === 'text',
|
||||
)
|
||||
.map(b => b.text)
|
||||
.join('\n')
|
||||
: ''
|
||||
if (text) {
|
||||
contents.push({
|
||||
role: m.role === 'assistant' ? 'model' : 'user',
|
||||
parts: [{ text }],
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Build system instruction
|
||||
const systemText = extractSystemText(system)
|
||||
const systemInstruction = systemText
|
||||
? { parts: [{ text: systemText }] }
|
||||
: undefined
|
||||
|
||||
// Convert tools and tool_choice
|
||||
const geminiTools =
|
||||
tools && tools.length > 0
|
||||
? anthropicToolsToGemini(tools as BetaToolUnion[])
|
||||
: undefined
|
||||
const geminiToolConfig = tool_choice
|
||||
? anthropicToolChoiceToGemini(tool_choice)
|
||||
: undefined
|
||||
|
||||
const baseUrl = (
|
||||
process.env.GEMINI_BASE_URL ||
|
||||
'https://generativelanguage.googleapis.com/v1beta'
|
||||
).replace(/\/+$/, '')
|
||||
const modelPath = geminiModel.startsWith('models/')
|
||||
? geminiModel
|
||||
: `models/${geminiModel}`
|
||||
const url = `${baseUrl}/${modelPath}:generateContent`
|
||||
|
||||
const body: Record<string, unknown> = {
|
||||
contents,
|
||||
...(systemInstruction && { systemInstruction }),
|
||||
...(geminiTools && geminiTools.length > 0 && { tools: geminiTools }),
|
||||
...(geminiToolConfig && {
|
||||
toolConfig: { functionCallingConfig: geminiToolConfig },
|
||||
}),
|
||||
...(temperature !== undefined && {
|
||||
generationConfig: { temperature },
|
||||
}),
|
||||
...(max_tokens !== undefined && {
|
||||
generationConfig: {
|
||||
...(temperature !== undefined && { temperature }),
|
||||
maxOutputTokens: max_tokens,
|
||||
},
|
||||
}),
|
||||
}
|
||||
|
||||
// Merge generationConfig if both temperature and max_tokens are set
|
||||
if (temperature !== undefined && max_tokens !== undefined) {
|
||||
body.generationConfig = { temperature, maxOutputTokens: max_tokens }
|
||||
}
|
||||
|
||||
const start = Date.now()
|
||||
|
||||
const res = await fetch(url, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'x-goog-api-key': process.env.GEMINI_API_KEY || '',
|
||||
},
|
||||
body: JSON.stringify(body),
|
||||
signal,
|
||||
})
|
||||
|
||||
if (!res.ok) {
|
||||
const errorBody = await res.text()
|
||||
throw new Error(
|
||||
`Gemini API request failed (${res.status} ${res.statusText}): ${errorBody || 'empty response body'}`,
|
||||
)
|
||||
}
|
||||
|
||||
const geminiResponse = (await res.json()) as {
|
||||
candidates?: Array<{
|
||||
content?: {
|
||||
role?: string
|
||||
parts?: Array<{
|
||||
text?: string
|
||||
functionCall?: { name?: string; args?: Record<string, unknown> }
|
||||
}>
|
||||
}
|
||||
finishReason?: string
|
||||
}>
|
||||
usageMetadata?: {
|
||||
promptTokenCount?: number
|
||||
candidatesTokenCount?: number
|
||||
totalTokenCount?: number
|
||||
}
|
||||
id?: string
|
||||
}
|
||||
|
||||
// Build content blocks from Gemini response
|
||||
const contentBlocks: Array<
|
||||
| { type: 'text'; text: string }
|
||||
| { type: 'tool_use'; id: string; name: string; input: unknown }
|
||||
> = []
|
||||
|
||||
const candidate = geminiResponse.candidates?.[0]
|
||||
const parts = candidate?.content?.parts
|
||||
if (parts) {
|
||||
for (const part of parts) {
|
||||
if (part.text) {
|
||||
contentBlocks.push({ type: 'text', text: part.text })
|
||||
}
|
||||
if (part.functionCall) {
|
||||
contentBlocks.push({
|
||||
type: 'tool_use',
|
||||
id: `toolu_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`,
|
||||
name: part.functionCall.name ?? '',
|
||||
input: part.functionCall.args ?? {},
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const now = Date.now()
|
||||
const lastCompletion = getLastApiCompletionTimestamp()
|
||||
logEvent('tengu_api_success', {
|
||||
requestId: (geminiResponse.id ??
|
||||
'') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
querySource:
|
||||
opts.querySource as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
model:
|
||||
geminiModel as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
inputTokens: geminiResponse.usageMetadata?.promptTokenCount ?? 0,
|
||||
outputTokens: geminiResponse.usageMetadata?.candidatesTokenCount ?? 0,
|
||||
cachedInputTokens: 0,
|
||||
uncachedInputTokens: geminiResponse.usageMetadata?.promptTokenCount ?? 0,
|
||||
durationMsIncludingRetries: now - start,
|
||||
timeSinceLastApiCallMs:
|
||||
lastCompletion !== null ? now - lastCompletion : undefined,
|
||||
})
|
||||
setLastApiCompletionTimestamp(now)
|
||||
|
||||
const stopReason =
|
||||
candidate?.finishReason === 'STOP'
|
||||
? 'end_turn'
|
||||
: candidate?.finishReason === 'MAX_TOKENS'
|
||||
? 'max_tokens'
|
||||
: 'end_turn'
|
||||
|
||||
return {
|
||||
id: geminiResponse.id ?? `gemini_${Date.now()}`,
|
||||
type: 'message',
|
||||
role: 'assistant',
|
||||
content: contentBlocks as BetaMessage['content'],
|
||||
model: geminiModel,
|
||||
stop_reason: stopReason as BetaMessage['stop_reason'],
|
||||
stop_sequence: null,
|
||||
usage: {
|
||||
input_tokens: geminiResponse.usageMetadata?.promptTokenCount ?? 0,
|
||||
output_tokens: geminiResponse.usageMetadata?.candidatesTokenCount ?? 0,
|
||||
},
|
||||
} as BetaMessage
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user