fix: preserve empty reasoning_content for DeepSeek v4 thinking mode (#399)

DeepSeek v4 in thinking mode sometimes returns reasoning_content: "" when the model answers directly without internal reasoning. Two places were filtering the empty string out, which dropped the thinking block from the assistant turn entirely. The next request then omitted reasoning_content for that prior turn, and DeepSeek rejected with 400 "reasoning_content ... must be passed back to the API". Fix: - openaiStreamAdapter: open a thinking block whenever reasoning_content is present (including ""); skip the empty thinking_delta event since the empty value is already conveyed by the block's initial state. - openaiConvertMessages: preserve empty thinking blocks as reasoning_content: "" when serializing assistant messages back to the OpenAI/DeepSeek format. Tests: - New: empty reasoning_content opens a thinking block (adapter). - Updated: empty thinking blocks now round-trip as reasoning_content: "" instead of being dropped. - New: assistant messages with no thinking block still omit reasoning_content (regression guard for non-thinking models).
2026-06-15 12:55:51 +00:00 · 2026-05-02 14:58:29 +08:00
parent 3eba5ade1a
commit 1b10ea391a
4 changed files with 91 additions and 15 deletions
--- a/packages/@ant/model-provider/src/shared/tests/openaiConvertMessages.test.ts
+++ b/packages/@ant/model-provider/src/shared/tests/openaiConvertMessages.test.ts
@@ -468,7 +468,11 @@ describe('DeepSeek thinking mode (enableThinking)', () => {
    expect(assistant.reasoning_content).toBe('First thought.\nSecond thought.')
  })

-  test('skips empty thinking blocks', () => {
+  test('preserves empty thinking blocks as reasoning_content: "" (DeepSeek v4 thinking mode)', () => {
+    // DeepSeek v4 thinking mode sometimes returns reasoning_content: ""
+    // when the model answers directly without reasoning. The empty value
+    // must be echoed back in the next request — otherwise DeepSeek returns
+    // 400 ("reasoning_content ... must be passed back"). See issue #399.
    const result = anthropicMessagesToOpenAI(
      [
        makeUserMsg('question'),
@@ -481,7 +485,23 @@ describe('DeepSeek thinking mode (enableThinking)', () => {
      { enableThinking: true },
    )
    const assistant = result.filter(m => m.role === 'assistant')[0] as any
+    expect(assistant.reasoning_content).toBe('')
+    expect(assistant.content).toBe('Answer.')
+  })
+
+  test('omits reasoning_content when no thinking block is present', () => {
+    // No thinking block at all → no reasoning_content field on the
+    // OpenAI-format assistant message (relevant for non-thinking models).
+    const result = anthropicMessagesToOpenAI(
+      [
+        makeUserMsg('question'),
+        makeAssistantMsg([{ type: 'text', text: 'Answer.' }]),
+      ],
+      [] as any,
+    )
+    const assistant = result.filter(m => m.role === 'assistant')[0] as any
    expect(assistant.reasoning_content).toBeUndefined()
+    expect(assistant.content).toBe('Answer.')
  })

  // ── fix: reorder tool and user messages for OpenAI API compatibility (#168) ──
--- a/packages/@ant/model-provider/src/shared/tests/openaiStreamAdapter.test.ts
+++ b/packages/@ant/model-provider/src/shared/tests/openaiStreamAdapter.test.ts
@@ -439,6 +439,54 @@ describe('thinking support (reasoning_content)', () => {
    expect(blockStarts[1].content_block.type).toBe('tool_use')
  })

+  test('opens thinking block on empty reasoning_content (DeepSeek v4 direct-answer)', async () => {
+    // DeepSeek v4 thinking mode sometimes streams reasoning_content: ""
+    // before answering directly. We must still open a thinking block so the
+    // resulting assistant message carries an (empty) thinking block — that
+    // round-trips back as reasoning_content: "" in the next request,
+    // satisfying DeepSeek's requirement (see issue #399).
+    const events = await collectEvents([
+      makeChunk({
+        choices: [
+          {
+            index: 0,
+            delta: { reasoning_content: '' },
+            finish_reason: null,
+          },
+        ],
+      }),
+      makeChunk({
+        choices: [
+          {
+            index: 0,
+            delta: { content: 'Direct answer.' },
+            finish_reason: null,
+          },
+        ],
+      }),
+      makeChunk({
+        choices: [{ index: 0, delta: {}, finish_reason: 'stop' }],
+      }),
+    ])
+
+    // A thinking block was opened (and closed before the text block starts)
+    const blockStarts = events.filter(
+      e => e.type === 'content_block_start',
+    ) as any[]
+    expect(blockStarts.length).toBe(2)
+    expect(blockStarts[0].content_block.type).toBe('thinking')
+    expect(blockStarts[0].content_block.thinking).toBe('')
+    expect(blockStarts[1].content_block.type).toBe('text')
+
+    // No empty thinking_delta should be emitted — the empty string is
+    // already conveyed by the thinking block's initial value.
+    const thinkingDeltas = events.filter(
+      e =>
+        e.type === 'content_block_delta' && e.delta.type === 'thinking_delta',
+    )
+    expect(thinkingDeltas.length).toBe(0)
+  })
+
  test('thinking block index is 0, text block index is 1', async () => {
    const events = await collectEvents([
      makeChunk({
--- a/packages/@ant/model-provider/src/shared/openaiConvertMessages.ts
+++ b/packages/@ant/model-provider/src/shared/openaiConvertMessages.ts
@@ -206,12 +206,14 @@ function convertInternalAssistantMessage(
        },
      })
    } else if (block.type === 'thinking') {
-      // DeepSeek thinking mode: always preserve reasoning_content.
-      // DeepSeek requires reasoning_content to be passed back in subsequent requests,
-      // especially when tool calls are involved (returns 400 if missing).
+      // DeepSeek thinking mode: always preserve reasoning_content,
+      // including the empty-string case. DeepSeek v4 may return
+      // reasoning_content: "" when the model answers directly, and the
+      // empty value must be echoed back in the next request — otherwise
+      // DeepSeek returns 400 ("reasoning_content ... must be passed back").
      const thinkingText = (block as unknown as Record<string, unknown>)
        .thinking
-      if (typeof thinkingText === 'string' && thinkingText) {
+      if (typeof thinkingText === 'string') {
        reasoningParts.push(thinkingText)
      }
    }
--- a/packages/@ant/model-provider/src/shared/openaiStreamAdapter.ts
+++ b/packages/@ant/model-provider/src/shared/openaiStreamAdapter.ts
@@ -106,9 +106,13 @@ export async function* adaptOpenAIStreamToAnthropic(
    // Skip chunks that carry only usage data (no delta content)
    if (!delta) continue

-    // Handle reasoning_content → Anthropic thinking block
+    // Handle reasoning_content → Anthropic thinking block.
+    // Empty string is a valid signal: DeepSeek v4 thinking mode sometimes
+    // returns reasoning_content: "" when the model answers directly. The
+    // empty thinking block must round-trip back to the API in subsequent
+    // requests, otherwise DeepSeek rejects with 400.
    const reasoningContent = (delta as any).reasoning_content
-    if (reasoningContent != null && reasoningContent !== '') {
+    if (reasoningContent != null) {
      if (!thinkingBlockOpen) {
        currentContentIndex++
        thinkingBlockOpen = true
@@ -125,14 +129,16 @@ export async function* adaptOpenAIStreamToAnthropic(
        } as BetaRawMessageStreamEvent
      }

-      yield {
-        type: 'content_block_delta',
-        index: currentContentIndex,
-        delta: {
-          type: 'thinking_delta',
-          thinking: reasoningContent,
-        },
-      } as BetaRawMessageStreamEvent
+      if (reasoningContent !== '') {
+        yield {
+          type: 'content_block_delta',
+          index: currentContentIndex,
+          delta: {
+            type: 'thinking_delta',
+            thinking: reasoningContent,
+          },
+        } as BetaRawMessageStreamEvent
+      }
    }

    // Handle text content