mirror of
https://github.com/claude-code-best/claude-code.git
synced 2026-06-19 06:45:50 +00:00
feat: Add DeepSeek thinking mode support for OpenAI compatibility layer (#206)
* feat: Add DeepSeek thinking mode support for OpenAI compatibility layer - Add DeepSeek reasoning models support (deepseek-reasoner and DeepSeek-V3.2) - Automatic thinking mode detection based on model name - Inject thinking parameters in request body (both official API and vLLM formats) - Preserve reasoning_content in message conversion for tool call iterations - Extract buildOpenAIRequestBody() for testability - Treat multimodal inputs (e.g. images) as new turn boundaries - Fix env var cleanup in tests to prevent state leak Signed-off-by: guunergooner <tongchao0923@gmail.com> * docs: update contributors --------- Signed-off-by: guunergooner <tongchao0923@gmail.com> Co-authored-by: guunergooner <18660867+guunergooner@users.noreply.github.com>
This commit is contained in:
@@ -24,6 +24,7 @@ import {
|
||||
import { logForDebugging } from '../../../utils/debug.js'
|
||||
import { addToTotalSessionCost } from '../../../cost-tracker.js'
|
||||
import { calculateUSDCost } from '../../../utils/modelCost.js'
|
||||
import { isEnvTruthy, isEnvDefinedFalsy } from '../../../utils/envUtils.js'
|
||||
import type { Options } from '../claude.js'
|
||||
import { randomUUID } from 'crypto'
|
||||
import {
|
||||
@@ -39,6 +40,76 @@ import {
|
||||
TOOL_SEARCH_TOOL_NAME,
|
||||
} from '../../../tools/ToolSearchTool/prompt.js'
|
||||
|
||||
/**
|
||||
* Detect whether DeepSeek-style thinking mode should be enabled.
|
||||
*
|
||||
* Enabled when:
|
||||
* 1. OPENAI_ENABLE_THINKING=1 is set (explicit enable), OR
|
||||
* 2. Model name contains "deepseek-reasoner" OR "DeepSeek-V3.2" (auto-detect, case-insensitive)
|
||||
*
|
||||
* Disabled when:
|
||||
* - OPENAI_ENABLE_THINKING=0/false/no/off is explicitly set (overrides model detection)
|
||||
*
|
||||
* @param model - The resolved OpenAI model name
|
||||
* @internal Exported for testing purposes only
|
||||
*/
|
||||
export function isOpenAIThinkingEnabled(model: string): boolean {
|
||||
// Explicit disable takes priority (overrides model auto-detect)
|
||||
if (isEnvDefinedFalsy(process.env.OPENAI_ENABLE_THINKING)) return false
|
||||
// Explicit enable
|
||||
if (isEnvTruthy(process.env.OPENAI_ENABLE_THINKING)) return true
|
||||
// Auto-detect from model name (deepseek-reasoner and DeepSeek-V3.2 support thinking mode)
|
||||
const modelLower = model.toLowerCase()
|
||||
return modelLower.includes('deepseek-reasoner') || modelLower.includes('deepseek-v3.2')
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the request body for OpenAI chat.completions.create().
|
||||
* Extracted for testability — the thinking mode params are injected here.
|
||||
*
|
||||
* DeepSeek thinking mode: inject thinking params via request body.
|
||||
* Two formats are added simultaneously to support different deployments:
|
||||
* - Official DeepSeek API: `thinking: { type: 'enabled' }`
|
||||
* - Self-hosted DeepSeek-V3.2: `enable_thinking: true` + `chat_template_kwargs: { thinking: true }`
|
||||
* OpenAI SDK passes unknown keys through to the HTTP body.
|
||||
* Each endpoint will use the format it recognizes and ignore the others.
|
||||
* @internal Exported for testing purposes only
|
||||
*/
|
||||
export function buildOpenAIRequestBody(params: {
|
||||
model: string
|
||||
messages: any[]
|
||||
tools: any[]
|
||||
toolChoice: any
|
||||
enableThinking: boolean
|
||||
temperatureOverride?: number
|
||||
}): Record<string, any> {
|
||||
const { model, messages, tools, toolChoice, enableThinking, temperatureOverride } = params
|
||||
return {
|
||||
model,
|
||||
messages,
|
||||
...(tools.length > 0 && {
|
||||
tools,
|
||||
...(toolChoice && { tool_choice: toolChoice }),
|
||||
}),
|
||||
stream: true,
|
||||
stream_options: { include_usage: true },
|
||||
// DeepSeek thinking mode: enable chain-of-thought output.
|
||||
// When active, temperature/top_p/presence_penalty/frequency_penalty are ignored by DeepSeek.
|
||||
...(enableThinking && {
|
||||
// Official DeepSeek API format
|
||||
thinking: { type: 'enabled' },
|
||||
// Self-hosted DeepSeek-V3.2 format
|
||||
enable_thinking: true,
|
||||
chat_template_kwargs: { thinking: true },
|
||||
}),
|
||||
// Only send temperature when thinking mode is off (DeepSeek ignores it anyway,
|
||||
// but other providers may respect it)
|
||||
...(!enableThinking && temperatureOverride !== undefined && {
|
||||
temperature: temperatureOverride,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* OpenAI-compatible query path. Converts Anthropic-format messages/tools to
|
||||
* OpenAI format, calls the OpenAI-compatible endpoint, and converts the
|
||||
@@ -120,10 +191,10 @@ export async function* queryModelOpenAI(
|
||||
)
|
||||
|
||||
// 8. Convert messages and tools to OpenAI format
|
||||
const openaiMessages = anthropicMessagesToOpenAI(
|
||||
messagesForAPI,
|
||||
systemPrompt,
|
||||
)
|
||||
const enableThinking = isOpenAIThinkingEnabled(openaiModel)
|
||||
const openaiMessages = anthropicMessagesToOpenAI(messagesForAPI, systemPrompt, {
|
||||
enableThinking,
|
||||
})
|
||||
const openaiTools = anthropicToolsToOpenAI(standardTools)
|
||||
const openaiToolChoice = anthropicToolChoiceToOpenAI(options.toolChoice)
|
||||
|
||||
@@ -149,31 +220,25 @@ export async function* queryModelOpenAI(
|
||||
})
|
||||
|
||||
logForDebugging(
|
||||
`[OpenAI] Calling model=${openaiModel}, messages=${openaiMessages.length}, tools=${openaiTools.length}`,
|
||||
`[OpenAI] Calling model=${openaiModel}, messages=${openaiMessages.length}, tools=${openaiTools.length}, thinking=${enableThinking}`,
|
||||
)
|
||||
|
||||
// 11. Call OpenAI API with streaming
|
||||
const requestBody = buildOpenAIRequestBody({
|
||||
model: openaiModel,
|
||||
messages: openaiMessages,
|
||||
tools: openaiTools,
|
||||
toolChoice: openaiToolChoice,
|
||||
enableThinking,
|
||||
temperatureOverride: options.temperatureOverride,
|
||||
})
|
||||
const stream = await client.chat.completions.create(
|
||||
{
|
||||
model: openaiModel,
|
||||
messages: openaiMessages,
|
||||
...(openaiTools.length > 0 && {
|
||||
tools: openaiTools,
|
||||
...(openaiToolChoice && { tool_choice: openaiToolChoice }),
|
||||
}),
|
||||
stream: true,
|
||||
stream_options: { include_usage: true },
|
||||
...(options.temperatureOverride !== undefined && {
|
||||
temperature: options.temperatureOverride,
|
||||
}),
|
||||
},
|
||||
{
|
||||
signal,
|
||||
},
|
||||
requestBody,
|
||||
{ signal },
|
||||
)
|
||||
|
||||
// 7. Convert OpenAI stream to Anthropic events, then process into
|
||||
// AssistantMessage + StreamEvent (matching the Anthropic path behavior)
|
||||
// 12. Convert OpenAI stream to Anthropic events, then process into
|
||||
// AssistantMessage + StreamEvent (matching the Anthropic path behavior)
|
||||
const adaptedStream = adaptOpenAIStreamToAnthropic(stream, openaiModel)
|
||||
|
||||
// Accumulate content blocks and usage, same as the Anthropic path in claude.ts
|
||||
@@ -287,4 +352,4 @@ export async function* queryModelOpenAI(
|
||||
error: error instanceof Error ? error : new Error(String(error)),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user