mirror of
https://github.com/claude-code-best/claude-code.git
synced 2026-06-17 22:05:50 +00:00
* feat: Grok 适配完善 — 防御性 usage 合并 + thinking 自动检测 1. 提取 updateOpenAIUsage 到共享模块 openaiShared.ts,供 OpenAI 和 Grok 两条路径复用,消除 Grok 中重复的 spread 漏洞。 2. 在 requestBody.ts 的 isOpenAIThinkingEnabled() 中增加 Grok 模型 自动检测(模型名含 "grok"),与 DeepSeek/MiMo 并列。 3. messaging 层的 reasoning_content 回传(openaiConvertMessages.ts) 和流解析(openaiStreamAdapter.ts)无需修改,Grok 与 DeepSeek/MiMo 共用相同的 reasoning_content 字段协议。 Co-Authored-By: deepseek-v4-pro[1m] <deepseek-ai@claude-code-best.win> * fix: 回退 Grok 从 isOpenAIThinkingEnabled 的自动检测 Grok 推理模型(如 grok-4.20-reasoning)自动进行推理,不需要 thinking/enable_thinking 请求参数。发送这些参数虽大概率被忽略 (OpenAI SDK 透传 unknown keys),但属于不正确行为。 Co-Authored-By: deepseek-v4-pro[1m] <deepseek-ai@claude-code-best.win> --------- Co-authored-by: deepseek-v4-pro[1m] <deepseek-ai@claude-code-best.win>
120 lines
4.3 KiB
TypeScript
120 lines
4.3 KiB
TypeScript
/**
|
|
* Pure utility functions for building OpenAI request bodies and detecting
|
|
* thinking mode. Extracted from index.ts so tests can import them without
|
|
* triggering heavy module side-effects (OpenAI client, stream adapter, etc.).
|
|
*/
|
|
import type { ChatCompletionCreateParamsStreaming } from 'openai/resources/chat/completions/completions.mjs'
|
|
import { isEnvTruthy, isEnvDefinedFalsy } from '../../../utils/envUtils.js'
|
|
|
|
/**
|
|
* Detect whether thinking mode should be enabled for this model.
|
|
*
|
|
* Enabled when:
|
|
* 1. OPENAI_ENABLE_THINKING=1 is set (explicit enable), OR
|
|
* 2. Model name contains "deepseek" or "mimo" (auto-detect, case-insensitive)
|
|
*
|
|
* Disabled when:
|
|
* - OPENAI_ENABLE_THINKING=0/false/no/off is explicitly set (overrides model detection)
|
|
*
|
|
* @param model - The resolved OpenAI model name
|
|
*/
|
|
export function isOpenAIThinkingEnabled(model: string): boolean {
|
|
// Explicit disable takes priority (overrides model auto-detect)
|
|
if (isEnvDefinedFalsy(process.env.OPENAI_ENABLE_THINKING)) return false
|
|
// Explicit enable
|
|
if (isEnvTruthy(process.env.OPENAI_ENABLE_THINKING)) return true
|
|
// Auto-detect from model name (DeepSeek and MiMo models support thinking mode).
|
|
// Grok is intentionally excluded — Grok reasoning models reason automatically
|
|
// and do NOT require thinking/enable_thinking request body parameters.
|
|
const modelLower = model.toLowerCase()
|
|
return modelLower.includes('deepseek') || modelLower.includes('mimo')
|
|
}
|
|
|
|
/**
|
|
* Resolve max output tokens for the OpenAI-compatible path.
|
|
*
|
|
* Override priority:
|
|
* 1. maxOutputTokensOverride (programmatic, from query pipeline)
|
|
* 2. OPENAI_MAX_TOKENS env var (OpenAI-specific, useful for local models
|
|
* with small context windows, e.g. RTX 3060 12GB running 65536-token models)
|
|
* 3. CLAUDE_CODE_MAX_OUTPUT_TOKENS env var (generic override)
|
|
* 4. upperLimit default (64000)
|
|
*/
|
|
export function resolveOpenAIMaxTokens(
|
|
upperLimit: number,
|
|
maxOutputTokensOverride?: number,
|
|
): number {
|
|
return (
|
|
maxOutputTokensOverride ??
|
|
(process.env.OPENAI_MAX_TOKENS
|
|
? parseInt(process.env.OPENAI_MAX_TOKENS, 10) || undefined
|
|
: undefined) ??
|
|
(process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS
|
|
? parseInt(process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS, 10) || undefined
|
|
: undefined) ??
|
|
upperLimit
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Build the request body for OpenAI chat.completions.create().
|
|
* Extracted for testability — the thinking mode params are injected here.
|
|
*
|
|
* Three thinking-mode formats are sent simultaneously; each endpoint uses the
|
|
* format it recognizes and ignores the others:
|
|
* - Official DeepSeek API: `thinking: { type: 'enabled' }`
|
|
* - Self-hosted DeepSeek: `enable_thinking: true` + `chat_template_kwargs: { thinking: true }`
|
|
* - MiMo (Xiaomi): `chat_template_kwargs: { enable_thinking: true }`
|
|
* OpenAI SDK passes unknown keys through to the HTTP body.
|
|
*/
|
|
export function buildOpenAIRequestBody(params: {
|
|
model: string
|
|
messages: any[]
|
|
tools: any[]
|
|
toolChoice: any
|
|
enableThinking: boolean
|
|
maxTokens: number
|
|
temperatureOverride?: number
|
|
}): ChatCompletionCreateParamsStreaming & {
|
|
thinking?: { type: string }
|
|
enable_thinking?: boolean
|
|
chat_template_kwargs?: { thinking: boolean; enable_thinking: boolean }
|
|
} {
|
|
const {
|
|
model,
|
|
messages,
|
|
tools,
|
|
toolChoice,
|
|
enableThinking,
|
|
maxTokens,
|
|
temperatureOverride,
|
|
} = params
|
|
return {
|
|
model,
|
|
messages,
|
|
max_tokens: maxTokens,
|
|
...(tools.length > 0 && {
|
|
tools,
|
|
...(toolChoice && { tool_choice: toolChoice }),
|
|
}),
|
|
stream: true,
|
|
stream_options: { include_usage: true },
|
|
// Enable chain-of-thought output for DeepSeek and MiMo models.
|
|
// When active, temperature/top_p/presence_penalty/frequency_penalty are ignored.
|
|
...(enableThinking && {
|
|
// Official DeepSeek API format
|
|
thinking: { type: 'enabled' },
|
|
// Self-hosted DeepSeek-V3.2 format
|
|
enable_thinking: true,
|
|
// Both DeepSeek self-hosted and MiMo formats in chat_template_kwargs
|
|
chat_template_kwargs: { thinking: true, enable_thinking: true },
|
|
}),
|
|
// Only send temperature when thinking mode is off (DeepSeek ignores it anyway,
|
|
// but other providers may respect it)
|
|
...(!enableThinking &&
|
|
temperatureOverride !== undefined && {
|
|
temperature: temperatureOverride,
|
|
}),
|
|
}
|
|
}
|