feat: Add DeepSeek thinking mode support for OpenAI compatibility layer (#206)

* feat: Add DeepSeek thinking mode support for OpenAI compatibility layer - Add DeepSeek reasoning models support (deepseek-reasoner and DeepSeek-V3.2) - Automatic thinking mode detection based on model name - Inject thinking parameters in request body (both official API and vLLM formats) - Preserve reasoning_content in message conversion for tool call iterations - Extract buildOpenAIRequestBody() for testability - Treat multimodal inputs (e.g. images) as new turn boundaries - Fix env var cleanup in tests to prevent state leak Signed-off-by: guunergooner <tongchao0923@gmail.com> * docs: update contributors --------- Signed-off-by: guunergooner <tongchao0923@gmail.com> Co-authored-by: guunergooner <18660867+guunergooner@users.noreply.github.com>
2026-06-19 06:45:50 +00:00 · 2026-04-08 21:33:26 +08:00
parent 73a18c30db
commit a3505aeec4
5 changed files with 602 additions and 40 deletions
--- a/src/services/api/openai/index.ts
+++ b/src/services/api/openai/index.ts
@@ -24,6 +24,7 @@ import {
 import { logForDebugging } from '../../../utils/debug.js'
 import { addToTotalSessionCost } from '../../../cost-tracker.js'
 import { calculateUSDCost } from '../../../utils/modelCost.js'
+import { isEnvTruthy, isEnvDefinedFalsy } from '../../../utils/envUtils.js'
 import type { Options } from '../claude.js'
 import { randomUUID } from 'crypto'
 import {
@@ -39,6 +40,76 @@ import {
  TOOL_SEARCH_TOOL_NAME,
 } from '../../../tools/ToolSearchTool/prompt.js'

+/**
+ * Detect whether DeepSeek-style thinking mode should be enabled.
+ *
+ * Enabled when:
+ * 1. OPENAI_ENABLE_THINKING=1 is set (explicit enable), OR
+ * 2. Model name contains "deepseek-reasoner" OR "DeepSeek-V3.2" (auto-detect, case-insensitive)
+ *
+ * Disabled when:
+ * - OPENAI_ENABLE_THINKING=0/false/no/off is explicitly set (overrides model detection)
+ *
+ * @param model - The resolved OpenAI model name
+ * @internal Exported for testing purposes only
+ */
+export function isOpenAIThinkingEnabled(model: string): boolean {
+  // Explicit disable takes priority (overrides model auto-detect)
+  if (isEnvDefinedFalsy(process.env.OPENAI_ENABLE_THINKING)) return false
+  // Explicit enable
+  if (isEnvTruthy(process.env.OPENAI_ENABLE_THINKING)) return true
+  // Auto-detect from model name (deepseek-reasoner and DeepSeek-V3.2 support thinking mode)
+  const modelLower = model.toLowerCase()
+  return modelLower.includes('deepseek-reasoner') || modelLower.includes('deepseek-v3.2')
+}
+
+/**
+ * Build the request body for OpenAI chat.completions.create().
+ * Extracted for testability — the thinking mode params are injected here.
+ *
+ * DeepSeek thinking mode: inject thinking params via request body.
+ * Two formats are added simultaneously to support different deployments:
+ * - Official DeepSeek API: `thinking: { type: 'enabled' }`
+ * - Self-hosted DeepSeek-V3.2: `enable_thinking: true` + `chat_template_kwargs: { thinking: true }`
+ * OpenAI SDK passes unknown keys through to the HTTP body.
+ * Each endpoint will use the format it recognizes and ignore the others.
+ * @internal Exported for testing purposes only
+ */
+export function buildOpenAIRequestBody(params: {
+  model: string
+  messages: any[]
+  tools: any[]
+  toolChoice: any
+  enableThinking: boolean
+  temperatureOverride?: number
+}): Record<string, any> {
+  const { model, messages, tools, toolChoice, enableThinking, temperatureOverride } = params
+  return {
+    model,
+    messages,
+    ...(tools.length > 0 && {
+      tools,
+      ...(toolChoice && { tool_choice: toolChoice }),
+    }),
+    stream: true,
+    stream_options: { include_usage: true },
+    // DeepSeek thinking mode: enable chain-of-thought output.
+    // When active, temperature/top_p/presence_penalty/frequency_penalty are ignored by DeepSeek.
+    ...(enableThinking && {
+      // Official DeepSeek API format
+      thinking: { type: 'enabled' },
+      // Self-hosted DeepSeek-V3.2 format
+      enable_thinking: true,
+      chat_template_kwargs: { thinking: true },
+    }),
+    // Only send temperature when thinking mode is off (DeepSeek ignores it anyway,
+    // but other providers may respect it)
+    ...(!enableThinking && temperatureOverride !== undefined && {
+      temperature: temperatureOverride,
+    }),
+  }
+}
+
 /**
 * OpenAI-compatible query path. Converts Anthropic-format messages/tools to
 * OpenAI format, calls the OpenAI-compatible endpoint, and converts the
@@ -120,10 +191,10 @@ export async function* queryModelOpenAI(
    )

    // 8. Convert messages and tools to OpenAI format
-    const openaiMessages = anthropicMessagesToOpenAI(
-      messagesForAPI,
-      systemPrompt,
-    )
+    const enableThinking = isOpenAIThinkingEnabled(openaiModel)
+    const openaiMessages = anthropicMessagesToOpenAI(messagesForAPI, systemPrompt, {
+      enableThinking,
+    })
    const openaiTools = anthropicToolsToOpenAI(standardTools)
    const openaiToolChoice = anthropicToolChoiceToOpenAI(options.toolChoice)

@@ -149,31 +220,25 @@ export async function* queryModelOpenAI(
    })

    logForDebugging(
-      `[OpenAI] Calling model=${openaiModel}, messages=${openaiMessages.length}, tools=${openaiTools.length}`,
+      `[OpenAI] Calling model=${openaiModel}, messages=${openaiMessages.length}, tools=${openaiTools.length}, thinking=${enableThinking}`,
    )

    // 11. Call OpenAI API with streaming
+    const requestBody = buildOpenAIRequestBody({
+      model: openaiModel,
+      messages: openaiMessages,
+      tools: openaiTools,
+      toolChoice: openaiToolChoice,
+      enableThinking,
+      temperatureOverride: options.temperatureOverride,
+    })
    const stream = await client.chat.completions.create(
-      {
-        model: openaiModel,
-        messages: openaiMessages,
-        ...(openaiTools.length > 0 && {
-          tools: openaiTools,
-          ...(openaiToolChoice && { tool_choice: openaiToolChoice }),
-        }),
-        stream: true,
-        stream_options: { include_usage: true },
-        ...(options.temperatureOverride !== undefined && {
-          temperature: options.temperatureOverride,
-        }),
-      },
-      {
-        signal,
-      },
+      requestBody,
+      { signal },
    )

-    // 7. Convert OpenAI stream to Anthropic events, then process into
-    //    AssistantMessage + StreamEvent (matching the Anthropic path behavior)
+    // 12. Convert OpenAI stream to Anthropic events, then process into
+    //     AssistantMessage + StreamEvent (matching the Anthropic path behavior)
    const adaptedStream = adaptOpenAIStreamToAnthropic(stream, openaiModel)

    // Accumulate content blocks and usage, same as the Anthropic path in claude.ts
@@ -287,4 +352,4 @@ export async function* queryModelOpenAI(
      error: error instanceof Error ? error : new Error(String(error)),
    })
  }
-}
+}