From 90027279e6ac8c55d588927af2af4780266b93e4 Mon Sep 17 00:00:00 2001 From: claude-code-best Date: Thu, 16 Apr 2026 13:01:07 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=E7=8E=AF=E5=A2=83?= =?UTF-8?q?=E5=8F=98=E9=87=8F=E6=94=AF=E6=8C=81=E4=BB=A5=E8=A6=86=E7=9B=96?= =?UTF-8?q?=20max=5Ftokens=20=E8=AE=BE=E7=BD=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../openai/__tests__/queryModelOpenAI.test.ts | 105 ++++++++++++++++++ src/services/api/openai/index.ts | 33 +++++- 2 files changed, 136 insertions(+), 2 deletions(-) diff --git a/src/services/api/openai/__tests__/queryModelOpenAI.test.ts b/src/services/api/openai/__tests__/queryModelOpenAI.test.ts index f13f86782..0cf2f7888 100644 --- a/src/services/api/openai/__tests__/queryModelOpenAI.test.ts +++ b/src/services/api/openai/__tests__/queryModelOpenAI.test.ts @@ -194,6 +194,16 @@ mock.module('../convertTools.js', () => ({ mock.module('../../../../utils/context.js', () => ({ getModelMaxOutputTokens: () => ({ upperLimit: 8192, default: 8192 }), getContextWindowForModel: () => 200_000, + modelSupports1M: () => false, + has1mContext: () => false, + is1mContextDisabled: () => false, + getSonnet1mExpTreatmentEnabled: () => false, + MODEL_CONTEXT_WINDOW_DEFAULT: 200_000, + COMPACT_MAX_OUTPUT_TOKENS: 20_000, + CAPPED_DEFAULT_MAX_TOKENS: 8_000, + ESCALATED_MAX_TOKENS: 64_000, + calculateContextPercentages: () => ({ used: null, remaining: null }), + getMaxThinkingTokensForModel: () => 8191, })) mock.module('../../../../utils/messages.js', () => ({ @@ -211,6 +221,22 @@ mock.module('../../../../utils/api.js', () => ({ toolToAPISchema: async (t: any) => t, })) +mock.module('../../../../Tool.js', () => ({ + getEmptyToolPermissionContext: () => ({ + alwaysAllow: [], + alwaysDeny: [], + needsPermission: [], + mode: 'default', + isBypassingPermissions: false, + }), + toolMatchesName: () => false, +})) + +mock.module('../../../../utils/envUtils.js', () => ({ + isEnvTruthy: (v: string | undefined) => v === '1' || v === 'true', + isEnvDefinedFalsy: (v: string | undefined) => v === '0' || v === 'false' || v === 'no' || v === 'off', +})) + mock.module('../../../../utils/toolSearch.js', () => ({ isToolSearchEnabled: async () => false, extractDiscoveredToolNames: () => new Set(), @@ -451,4 +477,83 @@ describe('queryModelOpenAI — max_tokens forwarded to request', () => { expect(_lastCreateArgs).not.toBeNull() expect(_lastCreateArgs!.max_tokens).toBe(8192) }) + + test('OPENAI_MAX_TOKENS env var overrides max_tokens', async () => { + const original = process.env.OPENAI_MAX_TOKENS + process.env.OPENAI_MAX_TOKENS = '4096' + try { + _nextEvents = [ + makeMessageStart(), + makeContentBlockStart(0, 'text'), + makeTextDelta(0, 'hi'), + makeContentBlockStop(0), + makeMessageDelta('end_turn', 5), + makeMessageStop(), + ] + + await runQueryModel(_nextEvents) + + expect(_lastCreateArgs).not.toBeNull() + expect(_lastCreateArgs!.max_tokens).toBe(4096) + } finally { + if (original === undefined) { + delete process.env.OPENAI_MAX_TOKENS + } else { + process.env.OPENAI_MAX_TOKENS = original + } + } + }) + + test('CLAUDE_CODE_MAX_OUTPUT_TOKENS env var overrides max_tokens', async () => { + const original = process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS + process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS = '2048' + try { + _nextEvents = [ + makeMessageStart(), + makeContentBlockStart(0, 'text'), + makeTextDelta(0, 'hi'), + makeContentBlockStop(0), + makeMessageDelta('end_turn', 5), + makeMessageStop(), + ] + + await runQueryModel(_nextEvents) + + expect(_lastCreateArgs).not.toBeNull() + expect(_lastCreateArgs!.max_tokens).toBe(2048) + } finally { + if (original === undefined) { + delete process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS + } else { + process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS = original + } + } + }) + + test('OPENAI_MAX_TOKENS takes priority over CLAUDE_CODE_MAX_OUTPUT_TOKENS', async () => { + const origOpenai = process.env.OPENAI_MAX_TOKENS + const origClaude = process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS + process.env.OPENAI_MAX_TOKENS = '4096' + process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS = '2048' + try { + _nextEvents = [ + makeMessageStart(), + makeContentBlockStart(0, 'text'), + makeTextDelta(0, 'hi'), + makeContentBlockStop(0), + makeMessageDelta('end_turn', 5), + makeMessageStop(), + ] + + await runQueryModel(_nextEvents) + + expect(_lastCreateArgs).not.toBeNull() + expect(_lastCreateArgs!.max_tokens).toBe(4096) + } finally { + if (origOpenai === undefined) delete process.env.OPENAI_MAX_TOKENS + else process.env.OPENAI_MAX_TOKENS = origOpenai + if (origClaude === undefined) delete process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS + else process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS = origClaude + } + }) }) diff --git a/src/services/api/openai/index.ts b/src/services/api/openai/index.ts index 040907006..f4bebce34 100644 --- a/src/services/api/openai/index.ts +++ b/src/services/api/openai/index.ts @@ -71,6 +71,28 @@ export function isOpenAIThinkingEnabled(model: string): boolean { return modelLower.includes('deepseek-reasoner') || modelLower.includes('deepseek-v3.2') } +/** + * Resolve max output tokens for the OpenAI-compatible path. + * + * Override priority: + * 1. maxOutputTokensOverride (programmatic, from query pipeline) + * 2. OPENAI_MAX_TOKENS env var (OpenAI-specific, useful for local models + * with small context windows, e.g. RTX 3060 12GB running 65536-token models) + * 3. CLAUDE_CODE_MAX_OUTPUT_TOKENS env var (generic override) + * 4. upperLimit default (64000) + * + * @internal Exported for testing purposes only + */ +export function resolveOpenAIMaxTokens( + upperLimit: number, + maxOutputTokensOverride?: number, +): number { + return maxOutputTokensOverride + ?? (process.env.OPENAI_MAX_TOKENS ? parseInt(process.env.OPENAI_MAX_TOKENS, 10) || undefined : undefined) + ?? (process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS ? parseInt(process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS, 10) || undefined : undefined) + ?? upperLimit +} + /** * Build the request body for OpenAI chat.completions.create(). * Extracted for testability — the thinking mode params are injected here. @@ -165,7 +187,7 @@ function assembleFinalAssistantOutputs(params: { if (stopReason === 'max_tokens') { outputs.push(createAssistantAPIErrorMessage({ content: `Output truncated: response exceeded the ${maxTokens} token limit. ` + - `Set CLAUDE_CODE_MAX_OUTPUT_TOKENS to override.`, + `Set OPENAI_MAX_TOKENS or CLAUDE_CODE_MAX_OUTPUT_TOKENS to override.`, apiError: 'max_output_tokens', error: 'max_output_tokens', })) @@ -286,8 +308,15 @@ export async function* queryModelOpenAI( // auto-retry at 64k in query.ts. The OpenAI path has no such retry, so // using the capped 8k default would silently truncate responses in // multi-turn conversations where thinking consumes most of the budget. + // + // Override priority: + // 1. options.maxOutputTokensOverride (programmatic) + // 2. OPENAI_MAX_TOKENS env var (OpenAI-specific, useful for local models + // with small context windows, e.g. RTX 3060 12GB running 65536-token models) + // 3. CLAUDE_CODE_MAX_OUTPUT_TOKENS env var (generic override) + // 4. upperLimit default (64000) const { upperLimit } = getModelMaxOutputTokens(openaiModel) - const maxTokens = options.maxOutputTokensOverride ?? upperLimit + const maxTokens = resolveOpenAIMaxTokens(upperLimit, options.maxOutputTokensOverride) // 11. Get client const client = getOpenAIClient({