Merge pull request #1226 from Evsdrg/feat/mimo-thinking-support

feat: 添加 MiMo 模型 thinking mode 自动检测与兼容
2026-06-15 12:55:51 +00:00 · 2026-05-14 23:00:01 +08:00
parent 80b46d2221 833181e025
commit d451e30741
2 changed files with 34 additions and 14 deletions
--- a/src/services/api/openai/tests/thinking.test.ts
+++ b/src/services/api/openai/tests/thinking.test.ts
@@ -147,6 +147,22 @@ describe('isOpenAIThinkingEnabled', () => {
      expect(isOpenAIThinkingEnabled('deepseek-coder')).toBe(true)
    })

+    test('returns true when model name is "mimo-v2-flash"', () => {
+      expect(isOpenAIThinkingEnabled('mimo-v2-flash')).toBe(true)
+    })
+
+    test('returns true when model name is "mimo-v2-pro"', () => {
+      expect(isOpenAIThinkingEnabled('mimo-v2-pro')).toBe(true)
+    })
+
+    test('returns true when model name is "mimo-v2.5-pro"', () => {
+      expect(isOpenAIThinkingEnabled('mimo-v2.5-pro')).toBe(true)
+    })
+
+    test('returns true when model name contains "mimo"', () => {
+      expect(isOpenAIThinkingEnabled('MiMo-V2-Omni')).toBe(true)
+    })
+
    test('returns false when model name is "gpt-4o"', () => {
      expect(isOpenAIThinkingEnabled('gpt-4o')).toBe(false)
    })
@@ -197,7 +213,10 @@ describe('buildOpenAIRequestBody — thinking params', () => {
  test('includes vLLM/self-hosted thinking format when enabled', () => {
    const body = buildOpenAIRequestBody({ ...baseParams, enableThinking: true })
    expect(body.enable_thinking).toBe(true)
-    expect(body.chat_template_kwargs).toEqual({ thinking: true })
+    expect(body.chat_template_kwargs).toEqual({
+      thinking: true,
+      enable_thinking: true,
+    })
  })

  test('includes both formats simultaneously when enabled', () => {
--- a/src/services/api/openai/requestBody.ts
+++ b/src/services/api/openai/requestBody.ts
@@ -7,11 +7,11 @@ import type { ChatCompletionCreateParamsStreaming } from 'openai/resources/chat/
 import { isEnvTruthy, isEnvDefinedFalsy } from '../../../utils/envUtils.js'

 /**
- * Detect whether DeepSeek-style thinking mode should be enabled.
+ * Detect whether thinking mode should be enabled for this model.
 *
 * Enabled when:
 * 1. OPENAI_ENABLE_THINKING=1 is set (explicit enable), OR
- * 2. Model name contains "deepseek-reasoner" OR "DeepSeek-V3.2" (auto-detect, case-insensitive)
+ * 2. Model name contains "deepseek" or "mimo" (auto-detect, case-insensitive)
 *
 * Disabled when:
 * - OPENAI_ENABLE_THINKING=0/false/no/off is explicitly set (overrides model detection)
@@ -23,9 +23,9 @@ export function isOpenAIThinkingEnabled(model: string): boolean {
  if (isEnvDefinedFalsy(process.env.OPENAI_ENABLE_THINKING)) return false
  // Explicit enable
  if (isEnvTruthy(process.env.OPENAI_ENABLE_THINKING)) return true
-  // Auto-detect from model name (all DeepSeek models support thinking mode)
+  // Auto-detect from model name (DeepSeek and MiMo models support thinking mode)
  const modelLower = model.toLowerCase()
-  return modelLower.includes('deepseek')
+  return modelLower.includes('deepseek') || modelLower.includes('mimo')
 }

 /**
@@ -58,12 +58,12 @@ export function resolveOpenAIMaxTokens(
 * Build the request body for OpenAI chat.completions.create().
 * Extracted for testability — the thinking mode params are injected here.
 *
- * DeepSeek thinking mode: inject thinking params via request body.
- * Two formats are added simultaneously to support different deployments:
- * - Official DeepSeek API: `thinking: { type: 'enabled' }`
- * - Self-hosted DeepSeek-V3.2: `enable_thinking: true` + `chat_template_kwargs: { thinking: true }`
+ * Three thinking-mode formats are sent simultaneously; each endpoint uses the
+ * format it recognizes and ignores the others:
+ * - Official DeepSeek API:    `thinking: { type: 'enabled' }`
+ * - Self-hosted DeepSeek:     `enable_thinking: true` + `chat_template_kwargs: { thinking: true }`
+ * - MiMo (Xiaomi):            `chat_template_kwargs: { enable_thinking: true }`
 * OpenAI SDK passes unknown keys through to the HTTP body.
- * Each endpoint will use the format it recognizes and ignore the others.
 */
 export function buildOpenAIRequestBody(params: {
  model: string
@@ -76,7 +76,7 @@ export function buildOpenAIRequestBody(params: {
 }): ChatCompletionCreateParamsStreaming & {
  thinking?: { type: string }
  enable_thinking?: boolean
-  chat_template_kwargs?: { thinking: boolean }
+  chat_template_kwargs?: { thinking: boolean; enable_thinking: boolean }
 } {
  const {
    model,
@@ -97,14 +97,15 @@ export function buildOpenAIRequestBody(params: {
    }),
    stream: true,
    stream_options: { include_usage: true },
-    // DeepSeek thinking mode: enable chain-of-thought output.
-    // When active, temperature/top_p/presence_penalty/frequency_penalty are ignored by DeepSeek.
+    // Enable chain-of-thought output for DeepSeek and MiMo models.
+    // When active, temperature/top_p/presence_penalty/frequency_penalty are ignored.
    ...(enableThinking && {
      // Official DeepSeek API format
      thinking: { type: 'enabled' },
      // Self-hosted DeepSeek-V3.2 format
      enable_thinking: true,
-      chat_template_kwargs: { thinking: true },
+      // Both DeepSeek self-hosted and MiMo formats in chat_template_kwargs
+      chat_template_kwargs: { thinking: true, enable_thinking: true },
    }),
    // Only send temperature when thinking mode is off (DeepSeek ignores it anyway,
    // but other providers may respect it)