mirror of
https://github.com/claude-code-best/claude-code.git
synced 2026-06-15 12:55:51 +00:00
feat: 添加环境变量支持以覆盖 max_tokens 设置
This commit is contained in:
@@ -194,6 +194,16 @@ mock.module('../convertTools.js', () => ({
|
||||
mock.module('../../../../utils/context.js', () => ({
|
||||
getModelMaxOutputTokens: () => ({ upperLimit: 8192, default: 8192 }),
|
||||
getContextWindowForModel: () => 200_000,
|
||||
modelSupports1M: () => false,
|
||||
has1mContext: () => false,
|
||||
is1mContextDisabled: () => false,
|
||||
getSonnet1mExpTreatmentEnabled: () => false,
|
||||
MODEL_CONTEXT_WINDOW_DEFAULT: 200_000,
|
||||
COMPACT_MAX_OUTPUT_TOKENS: 20_000,
|
||||
CAPPED_DEFAULT_MAX_TOKENS: 8_000,
|
||||
ESCALATED_MAX_TOKENS: 64_000,
|
||||
calculateContextPercentages: () => ({ used: null, remaining: null }),
|
||||
getMaxThinkingTokensForModel: () => 8191,
|
||||
}))
|
||||
|
||||
mock.module('../../../../utils/messages.js', () => ({
|
||||
@@ -211,6 +221,22 @@ mock.module('../../../../utils/api.js', () => ({
|
||||
toolToAPISchema: async (t: any) => t,
|
||||
}))
|
||||
|
||||
mock.module('../../../../Tool.js', () => ({
|
||||
getEmptyToolPermissionContext: () => ({
|
||||
alwaysAllow: [],
|
||||
alwaysDeny: [],
|
||||
needsPermission: [],
|
||||
mode: 'default',
|
||||
isBypassingPermissions: false,
|
||||
}),
|
||||
toolMatchesName: () => false,
|
||||
}))
|
||||
|
||||
mock.module('../../../../utils/envUtils.js', () => ({
|
||||
isEnvTruthy: (v: string | undefined) => v === '1' || v === 'true',
|
||||
isEnvDefinedFalsy: (v: string | undefined) => v === '0' || v === 'false' || v === 'no' || v === 'off',
|
||||
}))
|
||||
|
||||
mock.module('../../../../utils/toolSearch.js', () => ({
|
||||
isToolSearchEnabled: async () => false,
|
||||
extractDiscoveredToolNames: () => new Set(),
|
||||
@@ -451,4 +477,83 @@ describe('queryModelOpenAI — max_tokens forwarded to request', () => {
|
||||
expect(_lastCreateArgs).not.toBeNull()
|
||||
expect(_lastCreateArgs!.max_tokens).toBe(8192)
|
||||
})
|
||||
|
||||
test('OPENAI_MAX_TOKENS env var overrides max_tokens', async () => {
|
||||
const original = process.env.OPENAI_MAX_TOKENS
|
||||
process.env.OPENAI_MAX_TOKENS = '4096'
|
||||
try {
|
||||
_nextEvents = [
|
||||
makeMessageStart(),
|
||||
makeContentBlockStart(0, 'text'),
|
||||
makeTextDelta(0, 'hi'),
|
||||
makeContentBlockStop(0),
|
||||
makeMessageDelta('end_turn', 5),
|
||||
makeMessageStop(),
|
||||
]
|
||||
|
||||
await runQueryModel(_nextEvents)
|
||||
|
||||
expect(_lastCreateArgs).not.toBeNull()
|
||||
expect(_lastCreateArgs!.max_tokens).toBe(4096)
|
||||
} finally {
|
||||
if (original === undefined) {
|
||||
delete process.env.OPENAI_MAX_TOKENS
|
||||
} else {
|
||||
process.env.OPENAI_MAX_TOKENS = original
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
test('CLAUDE_CODE_MAX_OUTPUT_TOKENS env var overrides max_tokens', async () => {
|
||||
const original = process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS
|
||||
process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS = '2048'
|
||||
try {
|
||||
_nextEvents = [
|
||||
makeMessageStart(),
|
||||
makeContentBlockStart(0, 'text'),
|
||||
makeTextDelta(0, 'hi'),
|
||||
makeContentBlockStop(0),
|
||||
makeMessageDelta('end_turn', 5),
|
||||
makeMessageStop(),
|
||||
]
|
||||
|
||||
await runQueryModel(_nextEvents)
|
||||
|
||||
expect(_lastCreateArgs).not.toBeNull()
|
||||
expect(_lastCreateArgs!.max_tokens).toBe(2048)
|
||||
} finally {
|
||||
if (original === undefined) {
|
||||
delete process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS
|
||||
} else {
|
||||
process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS = original
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
test('OPENAI_MAX_TOKENS takes priority over CLAUDE_CODE_MAX_OUTPUT_TOKENS', async () => {
|
||||
const origOpenai = process.env.OPENAI_MAX_TOKENS
|
||||
const origClaude = process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS
|
||||
process.env.OPENAI_MAX_TOKENS = '4096'
|
||||
process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS = '2048'
|
||||
try {
|
||||
_nextEvents = [
|
||||
makeMessageStart(),
|
||||
makeContentBlockStart(0, 'text'),
|
||||
makeTextDelta(0, 'hi'),
|
||||
makeContentBlockStop(0),
|
||||
makeMessageDelta('end_turn', 5),
|
||||
makeMessageStop(),
|
||||
]
|
||||
|
||||
await runQueryModel(_nextEvents)
|
||||
|
||||
expect(_lastCreateArgs).not.toBeNull()
|
||||
expect(_lastCreateArgs!.max_tokens).toBe(4096)
|
||||
} finally {
|
||||
if (origOpenai === undefined) delete process.env.OPENAI_MAX_TOKENS
|
||||
else process.env.OPENAI_MAX_TOKENS = origOpenai
|
||||
if (origClaude === undefined) delete process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS
|
||||
else process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS = origClaude
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
@@ -71,6 +71,28 @@ export function isOpenAIThinkingEnabled(model: string): boolean {
|
||||
return modelLower.includes('deepseek-reasoner') || modelLower.includes('deepseek-v3.2')
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve max output tokens for the OpenAI-compatible path.
|
||||
*
|
||||
* Override priority:
|
||||
* 1. maxOutputTokensOverride (programmatic, from query pipeline)
|
||||
* 2. OPENAI_MAX_TOKENS env var (OpenAI-specific, useful for local models
|
||||
* with small context windows, e.g. RTX 3060 12GB running 65536-token models)
|
||||
* 3. CLAUDE_CODE_MAX_OUTPUT_TOKENS env var (generic override)
|
||||
* 4. upperLimit default (64000)
|
||||
*
|
||||
* @internal Exported for testing purposes only
|
||||
*/
|
||||
export function resolveOpenAIMaxTokens(
|
||||
upperLimit: number,
|
||||
maxOutputTokensOverride?: number,
|
||||
): number {
|
||||
return maxOutputTokensOverride
|
||||
?? (process.env.OPENAI_MAX_TOKENS ? parseInt(process.env.OPENAI_MAX_TOKENS, 10) || undefined : undefined)
|
||||
?? (process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS ? parseInt(process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS, 10) || undefined : undefined)
|
||||
?? upperLimit
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the request body for OpenAI chat.completions.create().
|
||||
* Extracted for testability — the thinking mode params are injected here.
|
||||
@@ -165,7 +187,7 @@ function assembleFinalAssistantOutputs(params: {
|
||||
if (stopReason === 'max_tokens') {
|
||||
outputs.push(createAssistantAPIErrorMessage({
|
||||
content: `Output truncated: response exceeded the ${maxTokens} token limit. ` +
|
||||
`Set CLAUDE_CODE_MAX_OUTPUT_TOKENS to override.`,
|
||||
`Set OPENAI_MAX_TOKENS or CLAUDE_CODE_MAX_OUTPUT_TOKENS to override.`,
|
||||
apiError: 'max_output_tokens',
|
||||
error: 'max_output_tokens',
|
||||
}))
|
||||
@@ -286,8 +308,15 @@ export async function* queryModelOpenAI(
|
||||
// auto-retry at 64k in query.ts. The OpenAI path has no such retry, so
|
||||
// using the capped 8k default would silently truncate responses in
|
||||
// multi-turn conversations where thinking consumes most of the budget.
|
||||
//
|
||||
// Override priority:
|
||||
// 1. options.maxOutputTokensOverride (programmatic)
|
||||
// 2. OPENAI_MAX_TOKENS env var (OpenAI-specific, useful for local models
|
||||
// with small context windows, e.g. RTX 3060 12GB running 65536-token models)
|
||||
// 3. CLAUDE_CODE_MAX_OUTPUT_TOKENS env var (generic override)
|
||||
// 4. upperLimit default (64000)
|
||||
const { upperLimit } = getModelMaxOutputTokens(openaiModel)
|
||||
const maxTokens = options.maxOutputTokensOverride ?? upperLimit
|
||||
const maxTokens = resolveOpenAIMaxTokens(upperLimit, options.maxOutputTokensOverride)
|
||||
|
||||
// 11. Get client
|
||||
const client = getOpenAIClient({
|
||||
|
||||
Reference in New Issue
Block a user