mirror of
https://github.com/claude-code-best/claude-code.git
synced 2026-06-22 00:05:51 +00:00
feat: Add DeepSeek thinking mode support for OpenAI compatibility layer (#206)
* feat: Add DeepSeek thinking mode support for OpenAI compatibility layer - Add DeepSeek reasoning models support (deepseek-reasoner and DeepSeek-V3.2) - Automatic thinking mode detection based on model name - Inject thinking parameters in request body (both official API and vLLM formats) - Preserve reasoning_content in message conversion for tool call iterations - Extract buildOpenAIRequestBody() for testability - Treat multimodal inputs (e.g. images) as new turn boundaries - Fix env var cleanup in tests to prevent state leak Signed-off-by: guunergooner <tongchao0923@gmail.com> * docs: update contributors --------- Signed-off-by: guunergooner <tongchao0923@gmail.com> Co-authored-by: guunergooner <18660867+guunergooner@users.noreply.github.com>
This commit is contained in:
File diff suppressed because one or more lines are too long
|
Before Width: | Height: | Size: 1.2 MiB After Width: | Height: | Size: 1.2 MiB |
@@ -249,3 +249,209 @@ describe('anthropicMessagesToOpenAI', () => {
|
|||||||
)
|
)
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
describe('DeepSeek thinking mode (enableThinking)', () => {
|
||||||
|
test('preserves thinking block as reasoning_content when enabled', () => {
|
||||||
|
const result = anthropicMessagesToOpenAI(
|
||||||
|
[makeUserMsg('question'), makeAssistantMsg([
|
||||||
|
{ type: 'thinking' as const, thinking: 'Let me reason about this...' },
|
||||||
|
{ type: 'text', text: 'The answer is 42.' },
|
||||||
|
])],
|
||||||
|
[] as any,
|
||||||
|
{ enableThinking: true },
|
||||||
|
)
|
||||||
|
// Should have: user, assistant with reasoning_content
|
||||||
|
expect(result).toHaveLength(2)
|
||||||
|
expect(result[0].role).toBe('user')
|
||||||
|
const assistant = result[1] as any
|
||||||
|
expect(assistant.role).toBe('assistant')
|
||||||
|
expect(assistant.content).toBe('The answer is 42.')
|
||||||
|
expect(assistant.reasoning_content).toBe('Let me reason about this...')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('drops thinking block when enableThinking is false (default)', () => {
|
||||||
|
const result = anthropicMessagesToOpenAI(
|
||||||
|
[makeAssistantMsg([
|
||||||
|
{ type: 'thinking' as const, thinking: 'internal thoughts...' },
|
||||||
|
{ type: 'text', text: 'visible response' },
|
||||||
|
])],
|
||||||
|
[] as any,
|
||||||
|
)
|
||||||
|
const assistant = result[0] as any
|
||||||
|
expect(assistant.content).toBe('visible response')
|
||||||
|
expect(assistant.reasoning_content).toBeUndefined()
|
||||||
|
})
|
||||||
|
|
||||||
|
test('preserves reasoning_content with tool_calls in same turn', () => {
|
||||||
|
const result = anthropicMessagesToOpenAI(
|
||||||
|
[
|
||||||
|
makeUserMsg('what is the weather?'),
|
||||||
|
makeAssistantMsg([
|
||||||
|
{ type: 'thinking' as const, thinking: 'I need to call the weather tool.' },
|
||||||
|
{ type: 'text', text: '' },
|
||||||
|
{
|
||||||
|
type: 'tool_use' as const,
|
||||||
|
id: 'toolu_001',
|
||||||
|
name: 'get_weather',
|
||||||
|
input: { location: 'Hangzhou' },
|
||||||
|
},
|
||||||
|
]),
|
||||||
|
makeUserMsg([
|
||||||
|
{
|
||||||
|
type: 'tool_result' as const,
|
||||||
|
tool_use_id: 'toolu_001',
|
||||||
|
content: 'Cloudy 7~13°C',
|
||||||
|
},
|
||||||
|
]),
|
||||||
|
],
|
||||||
|
[] as any,
|
||||||
|
{ enableThinking: true },
|
||||||
|
)
|
||||||
|
|
||||||
|
// Find the assistant message
|
||||||
|
const assistants = result.filter(m => m.role === 'assistant')
|
||||||
|
expect(assistants.length).toBe(1)
|
||||||
|
const assistant = assistants[0] as any
|
||||||
|
expect(assistant.reasoning_content).toBe('I need to call the weather tool.')
|
||||||
|
expect(assistant.tool_calls).toBeDefined()
|
||||||
|
expect(assistant.tool_calls[0].function.name).toBe('get_weather')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('strips reasoning_content from previous turns', () => {
|
||||||
|
const result = anthropicMessagesToOpenAI(
|
||||||
|
[
|
||||||
|
// Turn 1: user → assistant (with thinking)
|
||||||
|
makeUserMsg('question 1'),
|
||||||
|
makeAssistantMsg([
|
||||||
|
{ type: 'thinking' as const, thinking: 'Turn 1 reasoning...' },
|
||||||
|
{ type: 'text', text: 'Turn 1 answer' },
|
||||||
|
]),
|
||||||
|
// Turn 2: new user message → previous reasoning should be stripped
|
||||||
|
makeUserMsg('question 2'),
|
||||||
|
makeAssistantMsg([
|
||||||
|
{ type: 'thinking' as const, thinking: 'Turn 2 reasoning...' },
|
||||||
|
{ type: 'text', text: 'Turn 2 answer' },
|
||||||
|
]),
|
||||||
|
],
|
||||||
|
[] as any,
|
||||||
|
{ enableThinking: true },
|
||||||
|
)
|
||||||
|
|
||||||
|
const assistants = result.filter(m => m.role === 'assistant')
|
||||||
|
// Turn 1 assistant: reasoning should be stripped (previous turn)
|
||||||
|
expect((assistants[0] as any).reasoning_content).toBeUndefined()
|
||||||
|
expect((assistants[0] as any).content).toBe('Turn 1 answer')
|
||||||
|
// Turn 2 assistant: reasoning should be preserved (current turn)
|
||||||
|
expect((assistants[1] as any).reasoning_content).toBe('Turn 2 reasoning...')
|
||||||
|
expect((assistants[1] as any).content).toBe('Turn 2 answer')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('preserves reasoning_content in multi-iteration tool call within same turn', () => {
|
||||||
|
// Simulates a full DeepSeek tool call iteration:
|
||||||
|
// user → assistant(thinking+tool_call) → tool_result → assistant(thinking+tool_call) → tool_result → assistant(thinking+text)
|
||||||
|
const result = anthropicMessagesToOpenAI(
|
||||||
|
[
|
||||||
|
makeUserMsg("tomorrow's weather in Hangzhou"),
|
||||||
|
// Iteration 1: thinking + tool call
|
||||||
|
makeAssistantMsg([
|
||||||
|
{ type: 'thinking' as const, thinking: 'I need the date first.' },
|
||||||
|
{
|
||||||
|
type: 'tool_use' as const,
|
||||||
|
id: 'toolu_001',
|
||||||
|
name: 'get_date',
|
||||||
|
input: {},
|
||||||
|
},
|
||||||
|
]),
|
||||||
|
makeUserMsg([
|
||||||
|
{
|
||||||
|
type: 'tool_result' as const,
|
||||||
|
tool_use_id: 'toolu_001',
|
||||||
|
content: '2026-04-08',
|
||||||
|
},
|
||||||
|
]),
|
||||||
|
// Iteration 2: thinking + tool call
|
||||||
|
makeAssistantMsg([
|
||||||
|
{ type: 'thinking' as const, thinking: 'Now I can get the weather.' },
|
||||||
|
{
|
||||||
|
type: 'tool_use' as const,
|
||||||
|
id: 'toolu_002',
|
||||||
|
name: 'get_weather',
|
||||||
|
input: { location: 'Hangzhou', date: '2026-04-08' },
|
||||||
|
},
|
||||||
|
]),
|
||||||
|
makeUserMsg([
|
||||||
|
{
|
||||||
|
type: 'tool_result' as const,
|
||||||
|
tool_use_id: 'toolu_002',
|
||||||
|
content: 'Cloudy 7~13°C',
|
||||||
|
},
|
||||||
|
]),
|
||||||
|
// Iteration 3: thinking + final answer
|
||||||
|
makeAssistantMsg([
|
||||||
|
{ type: 'thinking' as const, thinking: 'I have the info now.' },
|
||||||
|
{ type: 'text', text: 'Tomorrow will be cloudy, 7-13°C.' },
|
||||||
|
]),
|
||||||
|
],
|
||||||
|
[] as any,
|
||||||
|
{ enableThinking: true },
|
||||||
|
)
|
||||||
|
|
||||||
|
// All 3 assistant messages are in the current turn (after last user msg is the last tool_result,
|
||||||
|
// but the "last user message" boundary logic finds the last user-typed message).
|
||||||
|
// Actually, tool_result messages are also UserMessage type, so the last user message
|
||||||
|
// is the one with tool_result for toolu_002. All assistant messages after that should have reasoning.
|
||||||
|
const assistants = result.filter(m => m.role === 'assistant')
|
||||||
|
expect(assistants.length).toBe(3)
|
||||||
|
// All iterations within the same turn preserve reasoning
|
||||||
|
expect((assistants[0] as any).reasoning_content).toBe('I need the date first.')
|
||||||
|
expect((assistants[1] as any).reasoning_content).toBe('Now I can get the weather.')
|
||||||
|
expect((assistants[2] as any).reasoning_content).toBe('I have the info now.')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('handles multiple thinking blocks in single assistant message', () => {
|
||||||
|
const result = anthropicMessagesToOpenAI(
|
||||||
|
[makeUserMsg('question'), makeAssistantMsg([
|
||||||
|
{ type: 'thinking' as const, thinking: 'First thought.' },
|
||||||
|
{ type: 'thinking' as const, thinking: 'Second thought.' },
|
||||||
|
{ type: 'text', text: 'Final answer.' },
|
||||||
|
])],
|
||||||
|
[] as any,
|
||||||
|
{ enableThinking: true },
|
||||||
|
)
|
||||||
|
const assistant = result.filter(m => m.role === 'assistant')[0] as any
|
||||||
|
expect(assistant.reasoning_content).toBe('First thought.\nSecond thought.')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('skips empty thinking blocks', () => {
|
||||||
|
const result = anthropicMessagesToOpenAI(
|
||||||
|
[makeUserMsg('question'), makeAssistantMsg([
|
||||||
|
{ type: 'thinking' as const, thinking: '' },
|
||||||
|
{ type: 'text', text: 'Answer.' },
|
||||||
|
])],
|
||||||
|
[] as any,
|
||||||
|
{ enableThinking: true },
|
||||||
|
)
|
||||||
|
const assistant = result.filter(m => m.role === 'assistant')[0] as any
|
||||||
|
expect(assistant.reasoning_content).toBeUndefined()
|
||||||
|
})
|
||||||
|
|
||||||
|
test('sets content to null when only thinking and tool_calls present', () => {
|
||||||
|
const result = anthropicMessagesToOpenAI(
|
||||||
|
[makeUserMsg('question'), makeAssistantMsg([
|
||||||
|
{ type: 'thinking' as const, thinking: 'Reasoning only.' },
|
||||||
|
{
|
||||||
|
type: 'tool_use' as const,
|
||||||
|
id: 'toolu_001',
|
||||||
|
name: 'bash',
|
||||||
|
input: { command: 'ls' },
|
||||||
|
},
|
||||||
|
])],
|
||||||
|
[] as any,
|
||||||
|
{ enableThinking: true },
|
||||||
|
)
|
||||||
|
const assistant = result.filter(m => m.role === 'assistant')[0] as any
|
||||||
|
expect(assistant.content).toBeNull()
|
||||||
|
expect(assistant.reasoning_content).toBe('Reasoning only.')
|
||||||
|
expect(assistant.tool_calls).toHaveLength(1)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|||||||
226
src/services/api/openai/__tests__/thinking.test.ts
Normal file
226
src/services/api/openai/__tests__/thinking.test.ts
Normal file
@@ -0,0 +1,226 @@
|
|||||||
|
import { describe, expect, test, beforeEach, afterEach } from 'bun:test'
|
||||||
|
import { isOpenAIThinkingEnabled, buildOpenAIRequestBody } from '../index.js'
|
||||||
|
|
||||||
|
describe('isOpenAIThinkingEnabled', () => {
|
||||||
|
const originalEnv = {
|
||||||
|
OPENAI_ENABLE_THINKING: process.env.OPENAI_ENABLE_THINKING,
|
||||||
|
}
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
// Clear env var before each test
|
||||||
|
delete process.env.OPENAI_ENABLE_THINKING
|
||||||
|
})
|
||||||
|
|
||||||
|
afterEach(() => {
|
||||||
|
// Restore original env var — delete key if it was originally undefined
|
||||||
|
// to avoid leaking the env key into subsequent tests
|
||||||
|
if (originalEnv.OPENAI_ENABLE_THINKING === undefined) {
|
||||||
|
delete process.env.OPENAI_ENABLE_THINKING
|
||||||
|
} else {
|
||||||
|
process.env.OPENAI_ENABLE_THINKING = originalEnv.OPENAI_ENABLE_THINKING
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('OPENAI_ENABLE_THINKING env var', () => {
|
||||||
|
test('returns true when OPENAI_ENABLE_THINKING=1', () => {
|
||||||
|
process.env.OPENAI_ENABLE_THINKING = '1'
|
||||||
|
expect(isOpenAIThinkingEnabled('gpt-4o')).toBe(true)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('returns true when OPENAI_ENABLE_THINKING=true', () => {
|
||||||
|
process.env.OPENAI_ENABLE_THINKING = 'true'
|
||||||
|
expect(isOpenAIThinkingEnabled('gpt-4o')).toBe(true)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('returns true when OPENAI_ENABLE_THINKING=yes', () => {
|
||||||
|
process.env.OPENAI_ENABLE_THINKING = 'yes'
|
||||||
|
expect(isOpenAIThinkingEnabled('gpt-4o')).toBe(true)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('returns true when OPENAI_ENABLE_THINKING=on', () => {
|
||||||
|
process.env.OPENAI_ENABLE_THINKING = 'on'
|
||||||
|
expect(isOpenAIThinkingEnabled('gpt-4o')).toBe(true)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('returns true when OPENAI_ENABLE_THINKING=TRUE (case insensitive)', () => {
|
||||||
|
process.env.OPENAI_ENABLE_THINKING = 'TRUE'
|
||||||
|
expect(isOpenAIThinkingEnabled('gpt-4o')).toBe(true)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('returns false when OPENAI_ENABLE_THINKING=0', () => {
|
||||||
|
process.env.OPENAI_ENABLE_THINKING = '0'
|
||||||
|
expect(isOpenAIThinkingEnabled('deepseek-reasoner')).toBe(false)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('returns false when OPENAI_ENABLE_THINKING=false', () => {
|
||||||
|
process.env.OPENAI_ENABLE_THINKING = 'false'
|
||||||
|
expect(isOpenAIThinkingEnabled('deepseek-reasoner')).toBe(false)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('returns false when OPENAI_ENABLE_THINKING is empty', () => {
|
||||||
|
process.env.OPENAI_ENABLE_THINKING = ''
|
||||||
|
expect(isOpenAIThinkingEnabled('gpt-4o')).toBe(false)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('returns false when OPENAI_ENABLE_THINKING is not set', () => {
|
||||||
|
expect(isOpenAIThinkingEnabled('gpt-4o')).toBe(false)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('model name auto-detect', () => {
|
||||||
|
test('returns true when model name is "deepseek-reasoner"', () => {
|
||||||
|
expect(isOpenAIThinkingEnabled('deepseek-reasoner')).toBe(true)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('returns true when model name contains "deepseek-reasoner" (case insensitive)', () => {
|
||||||
|
expect(isOpenAIThinkingEnabled('DeepSeek-Reasoner')).toBe(true)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('returns true when model name has prefix/suffix for deepseek-reasoner', () => {
|
||||||
|
expect(isOpenAIThinkingEnabled('my-deepseek-reasoner-v1')).toBe(true)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('returns true when model name is namespaced for deepseek-reasoner', () => {
|
||||||
|
expect(isOpenAIThinkingEnabled('TokenService/deepseek-reasoner')).toBe(true)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('returns true when model name is "deepseek-v3.2"', () => {
|
||||||
|
expect(isOpenAIThinkingEnabled('deepseek-v3.2')).toBe(true)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('returns true when model name contains "deepseek-v3.2" (case insensitive)', () => {
|
||||||
|
expect(isOpenAIThinkingEnabled('DeepSeek-V3.2')).toBe(true)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('returns true when model name has prefix/suffix for deepseek-v3.2', () => {
|
||||||
|
expect(isOpenAIThinkingEnabled('my-deepseek-v3.2-v1')).toBe(true)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('returns true when model name is namespaced for deepseek-v3.2', () => {
|
||||||
|
expect(isOpenAIThinkingEnabled('TokenService/deepseek-v3.2')).toBe(true)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('returns false when model name is "deepseek-chat"', () => {
|
||||||
|
expect(isOpenAIThinkingEnabled('deepseek-chat')).toBe(false)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('returns false when model name is "deepseek-v3"', () => {
|
||||||
|
expect(isOpenAIThinkingEnabled('deepseek-v3')).toBe(false)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('returns false when model name contains "deepseek" but not "reasoner" or "v3.2"', () => {
|
||||||
|
expect(isOpenAIThinkingEnabled('deepseek-coder')).toBe(false)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('returns false when model name is "gpt-4o"', () => {
|
||||||
|
expect(isOpenAIThinkingEnabled('gpt-4o')).toBe(false)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('returns false when model name is empty', () => {
|
||||||
|
expect(isOpenAIThinkingEnabled('')).toBe(false)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('priority and combined detection', () => {
|
||||||
|
test('OPENAI_ENABLE_THINKING=1 enables thinking for any model', () => {
|
||||||
|
process.env.OPENAI_ENABLE_THINKING = '1'
|
||||||
|
expect(isOpenAIThinkingEnabled('gpt-4o')).toBe(true)
|
||||||
|
expect(isOpenAIThinkingEnabled('deepseek-v3')).toBe(true)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('OPENAI_ENABLE_THINKING=false disables thinking even for deepseek-reasoner', () => {
|
||||||
|
process.env.OPENAI_ENABLE_THINKING = 'false'
|
||||||
|
expect(isOpenAIThinkingEnabled('deepseek-reasoner')).toBe(false)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('OPENAI_ENABLE_THINKING=0 disables thinking even for deepseek-reasoner', () => {
|
||||||
|
process.env.OPENAI_ENABLE_THINKING = '0'
|
||||||
|
expect(isOpenAIThinkingEnabled('deepseek-reasoner')).toBe(false)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('both conditions can enable thinking', () => {
|
||||||
|
process.env.OPENAI_ENABLE_THINKING = '1'
|
||||||
|
expect(isOpenAIThinkingEnabled('deepseek-reasoner')).toBe(true)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('buildOpenAIRequestBody — thinking params', () => {
|
||||||
|
const baseParams = {
|
||||||
|
model: 'deepseek-reasoner',
|
||||||
|
messages: [{ role: 'user', content: 'hello' }],
|
||||||
|
tools: [] as any[],
|
||||||
|
toolChoice: undefined as any,
|
||||||
|
}
|
||||||
|
|
||||||
|
test('includes official DeepSeek API thinking format when enabled', () => {
|
||||||
|
const body = buildOpenAIRequestBody({ ...baseParams, enableThinking: true })
|
||||||
|
expect(body.thinking).toEqual({ type: 'enabled' })
|
||||||
|
})
|
||||||
|
|
||||||
|
test('includes vLLM/self-hosted thinking format when enabled', () => {
|
||||||
|
const body = buildOpenAIRequestBody({ ...baseParams, enableThinking: true })
|
||||||
|
expect(body.enable_thinking).toBe(true)
|
||||||
|
expect(body.chat_template_kwargs).toEqual({ thinking: true })
|
||||||
|
})
|
||||||
|
|
||||||
|
test('includes both formats simultaneously when enabled', () => {
|
||||||
|
const body = buildOpenAIRequestBody({ ...baseParams, enableThinking: true })
|
||||||
|
expect(body.thinking).toEqual({ type: 'enabled' })
|
||||||
|
expect(body.enable_thinking).toBe(true)
|
||||||
|
expect(body.chat_template_kwargs.thinking).toBe(true)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('does NOT include thinking params when disabled', () => {
|
||||||
|
const body = buildOpenAIRequestBody({ ...baseParams, enableThinking: false })
|
||||||
|
expect(body.thinking).toBeUndefined()
|
||||||
|
expect(body.enable_thinking).toBeUndefined()
|
||||||
|
expect(body.chat_template_kwargs).toBeUndefined()
|
||||||
|
})
|
||||||
|
|
||||||
|
test('always includes stream and stream_options', () => {
|
||||||
|
const body = buildOpenAIRequestBody({ ...baseParams, enableThinking: false })
|
||||||
|
expect(body.stream).toBe(true)
|
||||||
|
expect(body.stream_options).toEqual({ include_usage: true })
|
||||||
|
})
|
||||||
|
|
||||||
|
test('includes temperature when thinking is off and override is set', () => {
|
||||||
|
const body = buildOpenAIRequestBody({
|
||||||
|
...baseParams,
|
||||||
|
enableThinking: false,
|
||||||
|
temperatureOverride: 0.7,
|
||||||
|
})
|
||||||
|
expect(body.temperature).toBe(0.7)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('excludes temperature when thinking is on even if override is set', () => {
|
||||||
|
const body = buildOpenAIRequestBody({
|
||||||
|
...baseParams,
|
||||||
|
enableThinking: true,
|
||||||
|
temperatureOverride: 0.7,
|
||||||
|
})
|
||||||
|
expect(body.temperature).toBeUndefined()
|
||||||
|
})
|
||||||
|
|
||||||
|
test('excludes temperature when thinking is off and no override', () => {
|
||||||
|
const body = buildOpenAIRequestBody({ ...baseParams, enableThinking: false })
|
||||||
|
expect(body.temperature).toBeUndefined()
|
||||||
|
})
|
||||||
|
|
||||||
|
test('includes tools and tool_choice when tools are provided', () => {
|
||||||
|
const body = buildOpenAIRequestBody({
|
||||||
|
...baseParams,
|
||||||
|
tools: [{ type: 'function', function: { name: 'test' } }],
|
||||||
|
toolChoice: 'auto',
|
||||||
|
enableThinking: false,
|
||||||
|
})
|
||||||
|
expect(body.tools).toHaveLength(1)
|
||||||
|
expect(body.tool_choice).toBe('auto')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('excludes tools when empty', () => {
|
||||||
|
const body = buildOpenAIRequestBody({ ...baseParams, enableThinking: false })
|
||||||
|
expect(body.tools).toBeUndefined()
|
||||||
|
expect(body.tool_choice).toBeUndefined()
|
||||||
|
})
|
||||||
|
})
|
||||||
@@ -13,6 +13,12 @@ import type {
|
|||||||
import type { AssistantMessage, UserMessage } from '../../../types/message.js'
|
import type { AssistantMessage, UserMessage } from '../../../types/message.js'
|
||||||
import type { SystemPrompt } from '../../../utils/systemPromptType.js'
|
import type { SystemPrompt } from '../../../utils/systemPromptType.js'
|
||||||
|
|
||||||
|
export interface ConvertMessagesOptions {
|
||||||
|
/** When true, preserve thinking blocks as reasoning_content on assistant messages
|
||||||
|
* (required for DeepSeek thinking mode with tool calls). */
|
||||||
|
enableThinking?: boolean
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Convert internal (UserMessage | AssistantMessage)[] to OpenAI-format messages.
|
* Convert internal (UserMessage | AssistantMessage)[] to OpenAI-format messages.
|
||||||
*
|
*
|
||||||
@@ -20,14 +26,16 @@ import type { SystemPrompt } from '../../../utils/systemPromptType.js'
|
|||||||
* - system prompt → role: "system" message prepended
|
* - system prompt → role: "system" message prepended
|
||||||
* - tool_use blocks → tool_calls[] on assistant message
|
* - tool_use blocks → tool_calls[] on assistant message
|
||||||
* - tool_result blocks → role: "tool" messages
|
* - tool_result blocks → role: "tool" messages
|
||||||
* - thinking blocks → silently dropped
|
* - thinking blocks → silently dropped (or preserved as reasoning_content when enableThinking=true)
|
||||||
* - cache_control → stripped
|
* - cache_control → stripped
|
||||||
*/
|
*/
|
||||||
export function anthropicMessagesToOpenAI(
|
export function anthropicMessagesToOpenAI(
|
||||||
messages: (UserMessage | AssistantMessage)[],
|
messages: (UserMessage | AssistantMessage)[],
|
||||||
systemPrompt: SystemPrompt,
|
systemPrompt: SystemPrompt,
|
||||||
|
options?: ConvertMessagesOptions,
|
||||||
): ChatCompletionMessageParam[] {
|
): ChatCompletionMessageParam[] {
|
||||||
const result: ChatCompletionMessageParam[] = []
|
const result: ChatCompletionMessageParam[] = []
|
||||||
|
const enableThinking = options?.enableThinking ?? false
|
||||||
|
|
||||||
// Prepend system prompt as system message
|
// Prepend system prompt as system message
|
||||||
const systemText = systemPromptToText(systemPrompt)
|
const systemText = systemPromptToText(systemPrompt)
|
||||||
@@ -38,13 +46,50 @@ export function anthropicMessagesToOpenAI(
|
|||||||
} satisfies ChatCompletionSystemMessageParam)
|
} satisfies ChatCompletionSystemMessageParam)
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const msg of messages) {
|
// When thinking mode is on, detect turn boundaries so that reasoning_content
|
||||||
|
// from *previous* user turns is stripped (saves bandwidth; DeepSeek ignores it).
|
||||||
|
// A "new turn" starts when a user text message appears after at least one assistant response.
|
||||||
|
const turnBoundaries = new Set<number>()
|
||||||
|
if (enableThinking) {
|
||||||
|
let hasSeenAssistant = false
|
||||||
|
for (let i = 0; i < messages.length; i++) {
|
||||||
|
const msg = messages[i]
|
||||||
|
if (msg.type === 'assistant') {
|
||||||
|
hasSeenAssistant = true
|
||||||
|
}
|
||||||
|
if (msg.type === 'user' && hasSeenAssistant) {
|
||||||
|
const content = msg.message.content
|
||||||
|
// A user message starts a new turn if it contains any non-tool_result content
|
||||||
|
// (text, image, or other media). Tool results alone do NOT start a new turn
|
||||||
|
// because they are continuations of the previous assistant tool call.
|
||||||
|
const startsNewUserTurn = typeof content === 'string'
|
||||||
|
? content.length > 0
|
||||||
|
: Array.isArray(content) && content.some(
|
||||||
|
(b: any) =>
|
||||||
|
typeof b === 'string' ||
|
||||||
|
(b &&
|
||||||
|
typeof b === 'object' &&
|
||||||
|
'type' in b &&
|
||||||
|
b.type !== 'tool_result'),
|
||||||
|
)
|
||||||
|
if (startsNewUserTurn) {
|
||||||
|
turnBoundaries.add(i)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (let i = 0; i < messages.length; i++) {
|
||||||
|
const msg = messages[i]
|
||||||
switch (msg.type) {
|
switch (msg.type) {
|
||||||
case 'user':
|
case 'user':
|
||||||
result.push(...convertInternalUserMessage(msg))
|
result.push(...convertInternalUserMessage(msg))
|
||||||
break
|
break
|
||||||
case 'assistant':
|
case 'assistant':
|
||||||
result.push(...convertInternalAssistantMessage(msg))
|
// Preserve reasoning_content unless we're before a turn boundary
|
||||||
|
// (i.e., from a previous user Q&A round)
|
||||||
|
const preserveReasoning = enableThinking && !isBeforeAnyTurnBoundary(i, turnBoundaries)
|
||||||
|
result.push(...convertInternalAssistantMessage(msg, preserveReasoning))
|
||||||
break
|
break
|
||||||
default:
|
default:
|
||||||
break
|
break
|
||||||
@@ -61,6 +106,17 @@ function systemPromptToText(systemPrompt: SystemPrompt): string {
|
|||||||
.join('\n\n')
|
.join('\n\n')
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if index `i` falls before any turn boundary (i.e. it belongs to a previous turn).
|
||||||
|
* A message at index i is "before" a boundary if there exists a boundary j where i < j.
|
||||||
|
*/
|
||||||
|
function isBeforeAnyTurnBoundary(i: number, boundaries: Set<number>): boolean {
|
||||||
|
for (const b of boundaries) {
|
||||||
|
if (i < b) return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
function convertInternalUserMessage(
|
function convertInternalUserMessage(
|
||||||
msg: UserMessage,
|
msg: UserMessage,
|
||||||
): ChatCompletionMessageParam[] {
|
): ChatCompletionMessageParam[] {
|
||||||
@@ -151,6 +207,7 @@ function convertToolResult(
|
|||||||
|
|
||||||
function convertInternalAssistantMessage(
|
function convertInternalAssistantMessage(
|
||||||
msg: AssistantMessage,
|
msg: AssistantMessage,
|
||||||
|
preserveReasoning = false,
|
||||||
): ChatCompletionMessageParam[] {
|
): ChatCompletionMessageParam[] {
|
||||||
const content = msg.message.content
|
const content = msg.message.content
|
||||||
|
|
||||||
@@ -174,6 +231,7 @@ function convertInternalAssistantMessage(
|
|||||||
|
|
||||||
const textParts: string[] = []
|
const textParts: string[] = []
|
||||||
const toolCalls: NonNullable<ChatCompletionAssistantMessageParam['tool_calls']> = []
|
const toolCalls: NonNullable<ChatCompletionAssistantMessageParam['tool_calls']> = []
|
||||||
|
const reasoningParts: string[] = []
|
||||||
|
|
||||||
for (const block of content) {
|
for (const block of content) {
|
||||||
if (typeof block === 'string') {
|
if (typeof block === 'string') {
|
||||||
@@ -191,14 +249,21 @@ function convertInternalAssistantMessage(
|
|||||||
typeof tu.input === 'string' ? tu.input : JSON.stringify(tu.input),
|
typeof tu.input === 'string' ? tu.input : JSON.stringify(tu.input),
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
|
} else if (block.type === 'thinking' && preserveReasoning) {
|
||||||
|
// DeepSeek thinking mode: preserve reasoning_content for tool call iterations
|
||||||
|
const thinkingText = (block as Record<string, unknown>).thinking
|
||||||
|
if (typeof thinkingText === 'string' && thinkingText) {
|
||||||
|
reasoningParts.push(thinkingText)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// Skip thinking, redacted_thinking, server_tool_use, etc.
|
// Skip redacted_thinking, server_tool_use, etc.
|
||||||
}
|
}
|
||||||
|
|
||||||
const result: ChatCompletionAssistantMessageParam = {
|
const result: ChatCompletionAssistantMessageParam = {
|
||||||
role: 'assistant',
|
role: 'assistant',
|
||||||
content: textParts.length > 0 ? textParts.join('\n') : null,
|
content: textParts.length > 0 ? textParts.join('\n') : null,
|
||||||
...(toolCalls.length > 0 && { tool_calls: toolCalls }),
|
...(toolCalls.length > 0 && { tool_calls: toolCalls }),
|
||||||
|
...(reasoningParts.length > 0 && { reasoning_content: reasoningParts.join('\n') }),
|
||||||
}
|
}
|
||||||
|
|
||||||
return [result]
|
return [result]
|
||||||
|
|||||||
@@ -24,6 +24,7 @@ import {
|
|||||||
import { logForDebugging } from '../../../utils/debug.js'
|
import { logForDebugging } from '../../../utils/debug.js'
|
||||||
import { addToTotalSessionCost } from '../../../cost-tracker.js'
|
import { addToTotalSessionCost } from '../../../cost-tracker.js'
|
||||||
import { calculateUSDCost } from '../../../utils/modelCost.js'
|
import { calculateUSDCost } from '../../../utils/modelCost.js'
|
||||||
|
import { isEnvTruthy, isEnvDefinedFalsy } from '../../../utils/envUtils.js'
|
||||||
import type { Options } from '../claude.js'
|
import type { Options } from '../claude.js'
|
||||||
import { randomUUID } from 'crypto'
|
import { randomUUID } from 'crypto'
|
||||||
import {
|
import {
|
||||||
@@ -39,6 +40,76 @@ import {
|
|||||||
TOOL_SEARCH_TOOL_NAME,
|
TOOL_SEARCH_TOOL_NAME,
|
||||||
} from '../../../tools/ToolSearchTool/prompt.js'
|
} from '../../../tools/ToolSearchTool/prompt.js'
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Detect whether DeepSeek-style thinking mode should be enabled.
|
||||||
|
*
|
||||||
|
* Enabled when:
|
||||||
|
* 1. OPENAI_ENABLE_THINKING=1 is set (explicit enable), OR
|
||||||
|
* 2. Model name contains "deepseek-reasoner" OR "DeepSeek-V3.2" (auto-detect, case-insensitive)
|
||||||
|
*
|
||||||
|
* Disabled when:
|
||||||
|
* - OPENAI_ENABLE_THINKING=0/false/no/off is explicitly set (overrides model detection)
|
||||||
|
*
|
||||||
|
* @param model - The resolved OpenAI model name
|
||||||
|
* @internal Exported for testing purposes only
|
||||||
|
*/
|
||||||
|
export function isOpenAIThinkingEnabled(model: string): boolean {
|
||||||
|
// Explicit disable takes priority (overrides model auto-detect)
|
||||||
|
if (isEnvDefinedFalsy(process.env.OPENAI_ENABLE_THINKING)) return false
|
||||||
|
// Explicit enable
|
||||||
|
if (isEnvTruthy(process.env.OPENAI_ENABLE_THINKING)) return true
|
||||||
|
// Auto-detect from model name (deepseek-reasoner and DeepSeek-V3.2 support thinking mode)
|
||||||
|
const modelLower = model.toLowerCase()
|
||||||
|
return modelLower.includes('deepseek-reasoner') || modelLower.includes('deepseek-v3.2')
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build the request body for OpenAI chat.completions.create().
|
||||||
|
* Extracted for testability — the thinking mode params are injected here.
|
||||||
|
*
|
||||||
|
* DeepSeek thinking mode: inject thinking params via request body.
|
||||||
|
* Two formats are added simultaneously to support different deployments:
|
||||||
|
* - Official DeepSeek API: `thinking: { type: 'enabled' }`
|
||||||
|
* - Self-hosted DeepSeek-V3.2: `enable_thinking: true` + `chat_template_kwargs: { thinking: true }`
|
||||||
|
* OpenAI SDK passes unknown keys through to the HTTP body.
|
||||||
|
* Each endpoint will use the format it recognizes and ignore the others.
|
||||||
|
* @internal Exported for testing purposes only
|
||||||
|
*/
|
||||||
|
export function buildOpenAIRequestBody(params: {
|
||||||
|
model: string
|
||||||
|
messages: any[]
|
||||||
|
tools: any[]
|
||||||
|
toolChoice: any
|
||||||
|
enableThinking: boolean
|
||||||
|
temperatureOverride?: number
|
||||||
|
}): Record<string, any> {
|
||||||
|
const { model, messages, tools, toolChoice, enableThinking, temperatureOverride } = params
|
||||||
|
return {
|
||||||
|
model,
|
||||||
|
messages,
|
||||||
|
...(tools.length > 0 && {
|
||||||
|
tools,
|
||||||
|
...(toolChoice && { tool_choice: toolChoice }),
|
||||||
|
}),
|
||||||
|
stream: true,
|
||||||
|
stream_options: { include_usage: true },
|
||||||
|
// DeepSeek thinking mode: enable chain-of-thought output.
|
||||||
|
// When active, temperature/top_p/presence_penalty/frequency_penalty are ignored by DeepSeek.
|
||||||
|
...(enableThinking && {
|
||||||
|
// Official DeepSeek API format
|
||||||
|
thinking: { type: 'enabled' },
|
||||||
|
// Self-hosted DeepSeek-V3.2 format
|
||||||
|
enable_thinking: true,
|
||||||
|
chat_template_kwargs: { thinking: true },
|
||||||
|
}),
|
||||||
|
// Only send temperature when thinking mode is off (DeepSeek ignores it anyway,
|
||||||
|
// but other providers may respect it)
|
||||||
|
...(!enableThinking && temperatureOverride !== undefined && {
|
||||||
|
temperature: temperatureOverride,
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* OpenAI-compatible query path. Converts Anthropic-format messages/tools to
|
* OpenAI-compatible query path. Converts Anthropic-format messages/tools to
|
||||||
* OpenAI format, calls the OpenAI-compatible endpoint, and converts the
|
* OpenAI format, calls the OpenAI-compatible endpoint, and converts the
|
||||||
@@ -120,10 +191,10 @@ export async function* queryModelOpenAI(
|
|||||||
)
|
)
|
||||||
|
|
||||||
// 8. Convert messages and tools to OpenAI format
|
// 8. Convert messages and tools to OpenAI format
|
||||||
const openaiMessages = anthropicMessagesToOpenAI(
|
const enableThinking = isOpenAIThinkingEnabled(openaiModel)
|
||||||
messagesForAPI,
|
const openaiMessages = anthropicMessagesToOpenAI(messagesForAPI, systemPrompt, {
|
||||||
systemPrompt,
|
enableThinking,
|
||||||
)
|
})
|
||||||
const openaiTools = anthropicToolsToOpenAI(standardTools)
|
const openaiTools = anthropicToolsToOpenAI(standardTools)
|
||||||
const openaiToolChoice = anthropicToolChoiceToOpenAI(options.toolChoice)
|
const openaiToolChoice = anthropicToolChoiceToOpenAI(options.toolChoice)
|
||||||
|
|
||||||
@@ -149,31 +220,25 @@ export async function* queryModelOpenAI(
|
|||||||
})
|
})
|
||||||
|
|
||||||
logForDebugging(
|
logForDebugging(
|
||||||
`[OpenAI] Calling model=${openaiModel}, messages=${openaiMessages.length}, tools=${openaiTools.length}`,
|
`[OpenAI] Calling model=${openaiModel}, messages=${openaiMessages.length}, tools=${openaiTools.length}, thinking=${enableThinking}`,
|
||||||
)
|
)
|
||||||
|
|
||||||
// 11. Call OpenAI API with streaming
|
// 11. Call OpenAI API with streaming
|
||||||
|
const requestBody = buildOpenAIRequestBody({
|
||||||
|
model: openaiModel,
|
||||||
|
messages: openaiMessages,
|
||||||
|
tools: openaiTools,
|
||||||
|
toolChoice: openaiToolChoice,
|
||||||
|
enableThinking,
|
||||||
|
temperatureOverride: options.temperatureOverride,
|
||||||
|
})
|
||||||
const stream = await client.chat.completions.create(
|
const stream = await client.chat.completions.create(
|
||||||
{
|
requestBody,
|
||||||
model: openaiModel,
|
{ signal },
|
||||||
messages: openaiMessages,
|
|
||||||
...(openaiTools.length > 0 && {
|
|
||||||
tools: openaiTools,
|
|
||||||
...(openaiToolChoice && { tool_choice: openaiToolChoice }),
|
|
||||||
}),
|
|
||||||
stream: true,
|
|
||||||
stream_options: { include_usage: true },
|
|
||||||
...(options.temperatureOverride !== undefined && {
|
|
||||||
temperature: options.temperatureOverride,
|
|
||||||
}),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
signal,
|
|
||||||
},
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// 7. Convert OpenAI stream to Anthropic events, then process into
|
// 12. Convert OpenAI stream to Anthropic events, then process into
|
||||||
// AssistantMessage + StreamEvent (matching the Anthropic path behavior)
|
// AssistantMessage + StreamEvent (matching the Anthropic path behavior)
|
||||||
const adaptedStream = adaptOpenAIStreamToAnthropic(stream, openaiModel)
|
const adaptedStream = adaptOpenAIStreamToAnthropic(stream, openaiModel)
|
||||||
|
|
||||||
// Accumulate content blocks and usage, same as the Anthropic path in claude.ts
|
// Accumulate content blocks and usage, same as the Anthropic path in claude.ts
|
||||||
@@ -287,4 +352,4 @@ export async function* queryModelOpenAI(
|
|||||||
error: error instanceof Error ? error : new Error(String(error)),
|
error: error instanceof Error ? error : new Error(String(error)),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Reference in New Issue
Block a user