From 1b10ea391a22e0094934468a5bd37858b568b412 Mon Sep 17 00:00:00 2001
From: ymonster <ymonster@163.com>
Date: Sat, 2 May 2026 14:58:29 +0800
Subject: [PATCH] fix: preserve empty reasoning_content for DeepSeek v4
 thinking mode (#399)

DeepSeek v4 in thinking mode sometimes returns reasoning_content: ""
when the model answers directly without internal reasoning. Two places
were filtering the empty string out, which dropped the thinking block
from the assistant turn entirely. The next request then omitted
reasoning_content for that prior turn, and DeepSeek rejected with
400 "reasoning_content ... must be passed back to the API".

Fix:
- openaiStreamAdapter: open a thinking block whenever reasoning_content
  is present (including ""); skip the empty thinking_delta event since
  the empty value is already conveyed by the block's initial state.
- openaiConvertMessages: preserve empty thinking blocks as
  reasoning_content: "" when serializing assistant messages back to
  the OpenAI/DeepSeek format.

Tests:
- New: empty reasoning_content opens a thinking block (adapter).
- Updated: empty thinking blocks now round-trip as reasoning_content: ""
  instead of being dropped.
- New: assistant messages with no thinking block still omit
  reasoning_content (regression guard for non-thinking models).
---
 .../__tests__/openaiConvertMessages.test.ts   | 22 ++++++++-
 .../__tests__/openaiStreamAdapter.test.ts     | 48 +++++++++++++++++++
 .../src/shared/openaiConvertMessages.ts       | 10 ++--
 .../src/shared/openaiStreamAdapter.ts         | 26 ++++++----
 4 files changed, 91 insertions(+), 15 deletions(-)

diff --git a/packages/@ant/model-provider/src/shared/__tests__/openaiConvertMessages.test.ts b/packages/@ant/model-provider/src/shared/__tests__/openaiConvertMessages.test.ts
index 2b1733372..305e00e64 100644
--- a/packages/@ant/model-provider/src/shared/__tests__/openaiConvertMessages.test.ts
+++ b/packages/@ant/model-provider/src/shared/__tests__/openaiConvertMessages.test.ts
@@ -468,7 +468,11 @@ describe('DeepSeek thinking mode (enableThinking)', () => {
     expect(assistant.reasoning_content).toBe('First thought.\nSecond thought.')
   })
 
-  test('skips empty thinking blocks', () => {
+  test('preserves empty thinking blocks as reasoning_content: "" (DeepSeek v4 thinking mode)', () => {
+    // DeepSeek v4 thinking mode sometimes returns reasoning_content: ""
+    // when the model answers directly without reasoning. The empty value
+    // must be echoed back in the next request — otherwise DeepSeek returns
+    // 400 ("reasoning_content ... must be passed back"). See issue #399.
     const result = anthropicMessagesToOpenAI(
       [
         makeUserMsg('question'),
@@ -481,7 +485,23 @@ describe('DeepSeek thinking mode (enableThinking)', () => {
       { enableThinking: true },
     )
     const assistant = result.filter(m => m.role === 'assistant')[0] as any
+    expect(assistant.reasoning_content).toBe('')
+    expect(assistant.content).toBe('Answer.')
+  })
+
+  test('omits reasoning_content when no thinking block is present', () => {
+    // No thinking block at all → no reasoning_content field on the
+    // OpenAI-format assistant message (relevant for non-thinking models).
+    const result = anthropicMessagesToOpenAI(
+      [
+        makeUserMsg('question'),
+        makeAssistantMsg([{ type: 'text', text: 'Answer.' }]),
+      ],
+      [] as any,
+    )
+    const assistant = result.filter(m => m.role === 'assistant')[0] as any
     expect(assistant.reasoning_content).toBeUndefined()
+    expect(assistant.content).toBe('Answer.')
   })
 
   // ── fix: reorder tool and user messages for OpenAI API compatibility (#168) ──
diff --git a/packages/@ant/model-provider/src/shared/__tests__/openaiStreamAdapter.test.ts b/packages/@ant/model-provider/src/shared/__tests__/openaiStreamAdapter.test.ts
index ba29ab7f5..fef15c358 100644
--- a/packages/@ant/model-provider/src/shared/__tests__/openaiStreamAdapter.test.ts
+++ b/packages/@ant/model-provider/src/shared/__tests__/openaiStreamAdapter.test.ts
@@ -439,6 +439,54 @@ describe('thinking support (reasoning_content)', () => {
     expect(blockStarts[1].content_block.type).toBe('tool_use')
   })
 
+  test('opens thinking block on empty reasoning_content (DeepSeek v4 direct-answer)', async () => {
+    // DeepSeek v4 thinking mode sometimes streams reasoning_content: ""
+    // before answering directly. We must still open a thinking block so the
+    // resulting assistant message carries an (empty) thinking block — that
+    // round-trips back as reasoning_content: "" in the next request,
+    // satisfying DeepSeek's requirement (see issue #399).
+    const events = await collectEvents([
+      makeChunk({
+        choices: [
+          {
+            index: 0,
+            delta: { reasoning_content: '' },
+            finish_reason: null,
+          },
+        ],
+      }),
+      makeChunk({
+        choices: [
+          {
+            index: 0,
+            delta: { content: 'Direct answer.' },
+            finish_reason: null,
+          },
+        ],
+      }),
+      makeChunk({
+        choices: [{ index: 0, delta: {}, finish_reason: 'stop' }],
+      }),
+    ])
+
+    // A thinking block was opened (and closed before the text block starts)
+    const blockStarts = events.filter(
+      e => e.type === 'content_block_start',
+    ) as any[]
+    expect(blockStarts.length).toBe(2)
+    expect(blockStarts[0].content_block.type).toBe('thinking')
+    expect(blockStarts[0].content_block.thinking).toBe('')
+    expect(blockStarts[1].content_block.type).toBe('text')
+
+    // No empty thinking_delta should be emitted — the empty string is
+    // already conveyed by the thinking block's initial value.
+    const thinkingDeltas = events.filter(
+      e =>
+        e.type === 'content_block_delta' && e.delta.type === 'thinking_delta',
+    )
+    expect(thinkingDeltas.length).toBe(0)
+  })
+
   test('thinking block index is 0, text block index is 1', async () => {
     const events = await collectEvents([
       makeChunk({
diff --git a/packages/@ant/model-provider/src/shared/openaiConvertMessages.ts b/packages/@ant/model-provider/src/shared/openaiConvertMessages.ts
index 286ad55d7..f896a6743 100644
--- a/packages/@ant/model-provider/src/shared/openaiConvertMessages.ts
+++ b/packages/@ant/model-provider/src/shared/openaiConvertMessages.ts
@@ -206,12 +206,14 @@ function convertInternalAssistantMessage(
         },
       })
     } else if (block.type === 'thinking') {
-      // DeepSeek thinking mode: always preserve reasoning_content.
-      // DeepSeek requires reasoning_content to be passed back in subsequent requests,
-      // especially when tool calls are involved (returns 400 if missing).
+      // DeepSeek thinking mode: always preserve reasoning_content,
+      // including the empty-string case. DeepSeek v4 may return
+      // reasoning_content: "" when the model answers directly, and the
+      // empty value must be echoed back in the next request — otherwise
+      // DeepSeek returns 400 ("reasoning_content ... must be passed back").
       const thinkingText = (block as unknown as Record<string, unknown>)
         .thinking
-      if (typeof thinkingText === 'string' && thinkingText) {
+      if (typeof thinkingText === 'string') {
         reasoningParts.push(thinkingText)
       }
     }
diff --git a/packages/@ant/model-provider/src/shared/openaiStreamAdapter.ts b/packages/@ant/model-provider/src/shared/openaiStreamAdapter.ts
index 1e7df4ea9..02e32d957 100644
--- a/packages/@ant/model-provider/src/shared/openaiStreamAdapter.ts
+++ b/packages/@ant/model-provider/src/shared/openaiStreamAdapter.ts
@@ -106,9 +106,13 @@ export async function* adaptOpenAIStreamToAnthropic(
     // Skip chunks that carry only usage data (no delta content)
     if (!delta) continue
 
-    // Handle reasoning_content → Anthropic thinking block
+    // Handle reasoning_content → Anthropic thinking block.
+    // Empty string is a valid signal: DeepSeek v4 thinking mode sometimes
+    // returns reasoning_content: "" when the model answers directly. The
+    // empty thinking block must round-trip back to the API in subsequent
+    // requests, otherwise DeepSeek rejects with 400.
     const reasoningContent = (delta as any).reasoning_content
-    if (reasoningContent != null && reasoningContent !== '') {
+    if (reasoningContent != null) {
       if (!thinkingBlockOpen) {
         currentContentIndex++
         thinkingBlockOpen = true
@@ -125,14 +129,16 @@ export async function* adaptOpenAIStreamToAnthropic(
         } as BetaRawMessageStreamEvent
       }
 
-      yield {
-        type: 'content_block_delta',
-        index: currentContentIndex,
-        delta: {
-          type: 'thinking_delta',
-          thinking: reasoningContent,
-        },
-      } as BetaRawMessageStreamEvent
+      if (reasoningContent !== '') {
+        yield {
+          type: 'content_block_delta',
+          index: currentContentIndex,
+          delta: {
+            type: 'thinking_delta',
+            thinking: reasoningContent,
+          },
+        } as BetaRawMessageStreamEvent
+      }
     }
 
     // Handle text content