mirror of
https://github.com/claude-code-best/claude-code.git
synced 2026-06-22 08:15:53 +00:00
feat: 添加环境变量支持以覆盖 max_tokens 设置
This commit is contained in:
@@ -194,6 +194,16 @@ mock.module('../convertTools.js', () => ({
|
|||||||
mock.module('../../../../utils/context.js', () => ({
|
mock.module('../../../../utils/context.js', () => ({
|
||||||
getModelMaxOutputTokens: () => ({ upperLimit: 8192, default: 8192 }),
|
getModelMaxOutputTokens: () => ({ upperLimit: 8192, default: 8192 }),
|
||||||
getContextWindowForModel: () => 200_000,
|
getContextWindowForModel: () => 200_000,
|
||||||
|
modelSupports1M: () => false,
|
||||||
|
has1mContext: () => false,
|
||||||
|
is1mContextDisabled: () => false,
|
||||||
|
getSonnet1mExpTreatmentEnabled: () => false,
|
||||||
|
MODEL_CONTEXT_WINDOW_DEFAULT: 200_000,
|
||||||
|
COMPACT_MAX_OUTPUT_TOKENS: 20_000,
|
||||||
|
CAPPED_DEFAULT_MAX_TOKENS: 8_000,
|
||||||
|
ESCALATED_MAX_TOKENS: 64_000,
|
||||||
|
calculateContextPercentages: () => ({ used: null, remaining: null }),
|
||||||
|
getMaxThinkingTokensForModel: () => 8191,
|
||||||
}))
|
}))
|
||||||
|
|
||||||
mock.module('../../../../utils/messages.js', () => ({
|
mock.module('../../../../utils/messages.js', () => ({
|
||||||
@@ -211,6 +221,22 @@ mock.module('../../../../utils/api.js', () => ({
|
|||||||
toolToAPISchema: async (t: any) => t,
|
toolToAPISchema: async (t: any) => t,
|
||||||
}))
|
}))
|
||||||
|
|
||||||
|
mock.module('../../../../Tool.js', () => ({
|
||||||
|
getEmptyToolPermissionContext: () => ({
|
||||||
|
alwaysAllow: [],
|
||||||
|
alwaysDeny: [],
|
||||||
|
needsPermission: [],
|
||||||
|
mode: 'default',
|
||||||
|
isBypassingPermissions: false,
|
||||||
|
}),
|
||||||
|
toolMatchesName: () => false,
|
||||||
|
}))
|
||||||
|
|
||||||
|
mock.module('../../../../utils/envUtils.js', () => ({
|
||||||
|
isEnvTruthy: (v: string | undefined) => v === '1' || v === 'true',
|
||||||
|
isEnvDefinedFalsy: (v: string | undefined) => v === '0' || v === 'false' || v === 'no' || v === 'off',
|
||||||
|
}))
|
||||||
|
|
||||||
mock.module('../../../../utils/toolSearch.js', () => ({
|
mock.module('../../../../utils/toolSearch.js', () => ({
|
||||||
isToolSearchEnabled: async () => false,
|
isToolSearchEnabled: async () => false,
|
||||||
extractDiscoveredToolNames: () => new Set(),
|
extractDiscoveredToolNames: () => new Set(),
|
||||||
@@ -451,4 +477,83 @@ describe('queryModelOpenAI — max_tokens forwarded to request', () => {
|
|||||||
expect(_lastCreateArgs).not.toBeNull()
|
expect(_lastCreateArgs).not.toBeNull()
|
||||||
expect(_lastCreateArgs!.max_tokens).toBe(8192)
|
expect(_lastCreateArgs!.max_tokens).toBe(8192)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
test('OPENAI_MAX_TOKENS env var overrides max_tokens', async () => {
|
||||||
|
const original = process.env.OPENAI_MAX_TOKENS
|
||||||
|
process.env.OPENAI_MAX_TOKENS = '4096'
|
||||||
|
try {
|
||||||
|
_nextEvents = [
|
||||||
|
makeMessageStart(),
|
||||||
|
makeContentBlockStart(0, 'text'),
|
||||||
|
makeTextDelta(0, 'hi'),
|
||||||
|
makeContentBlockStop(0),
|
||||||
|
makeMessageDelta('end_turn', 5),
|
||||||
|
makeMessageStop(),
|
||||||
|
]
|
||||||
|
|
||||||
|
await runQueryModel(_nextEvents)
|
||||||
|
|
||||||
|
expect(_lastCreateArgs).not.toBeNull()
|
||||||
|
expect(_lastCreateArgs!.max_tokens).toBe(4096)
|
||||||
|
} finally {
|
||||||
|
if (original === undefined) {
|
||||||
|
delete process.env.OPENAI_MAX_TOKENS
|
||||||
|
} else {
|
||||||
|
process.env.OPENAI_MAX_TOKENS = original
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
test('CLAUDE_CODE_MAX_OUTPUT_TOKENS env var overrides max_tokens', async () => {
|
||||||
|
const original = process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS
|
||||||
|
process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS = '2048'
|
||||||
|
try {
|
||||||
|
_nextEvents = [
|
||||||
|
makeMessageStart(),
|
||||||
|
makeContentBlockStart(0, 'text'),
|
||||||
|
makeTextDelta(0, 'hi'),
|
||||||
|
makeContentBlockStop(0),
|
||||||
|
makeMessageDelta('end_turn', 5),
|
||||||
|
makeMessageStop(),
|
||||||
|
]
|
||||||
|
|
||||||
|
await runQueryModel(_nextEvents)
|
||||||
|
|
||||||
|
expect(_lastCreateArgs).not.toBeNull()
|
||||||
|
expect(_lastCreateArgs!.max_tokens).toBe(2048)
|
||||||
|
} finally {
|
||||||
|
if (original === undefined) {
|
||||||
|
delete process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS
|
||||||
|
} else {
|
||||||
|
process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS = original
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
test('OPENAI_MAX_TOKENS takes priority over CLAUDE_CODE_MAX_OUTPUT_TOKENS', async () => {
|
||||||
|
const origOpenai = process.env.OPENAI_MAX_TOKENS
|
||||||
|
const origClaude = process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS
|
||||||
|
process.env.OPENAI_MAX_TOKENS = '4096'
|
||||||
|
process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS = '2048'
|
||||||
|
try {
|
||||||
|
_nextEvents = [
|
||||||
|
makeMessageStart(),
|
||||||
|
makeContentBlockStart(0, 'text'),
|
||||||
|
makeTextDelta(0, 'hi'),
|
||||||
|
makeContentBlockStop(0),
|
||||||
|
makeMessageDelta('end_turn', 5),
|
||||||
|
makeMessageStop(),
|
||||||
|
]
|
||||||
|
|
||||||
|
await runQueryModel(_nextEvents)
|
||||||
|
|
||||||
|
expect(_lastCreateArgs).not.toBeNull()
|
||||||
|
expect(_lastCreateArgs!.max_tokens).toBe(4096)
|
||||||
|
} finally {
|
||||||
|
if (origOpenai === undefined) delete process.env.OPENAI_MAX_TOKENS
|
||||||
|
else process.env.OPENAI_MAX_TOKENS = origOpenai
|
||||||
|
if (origClaude === undefined) delete process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS
|
||||||
|
else process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS = origClaude
|
||||||
|
}
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -71,6 +71,28 @@ export function isOpenAIThinkingEnabled(model: string): boolean {
|
|||||||
return modelLower.includes('deepseek-reasoner') || modelLower.includes('deepseek-v3.2')
|
return modelLower.includes('deepseek-reasoner') || modelLower.includes('deepseek-v3.2')
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Resolve max output tokens for the OpenAI-compatible path.
|
||||||
|
*
|
||||||
|
* Override priority:
|
||||||
|
* 1. maxOutputTokensOverride (programmatic, from query pipeline)
|
||||||
|
* 2. OPENAI_MAX_TOKENS env var (OpenAI-specific, useful for local models
|
||||||
|
* with small context windows, e.g. RTX 3060 12GB running 65536-token models)
|
||||||
|
* 3. CLAUDE_CODE_MAX_OUTPUT_TOKENS env var (generic override)
|
||||||
|
* 4. upperLimit default (64000)
|
||||||
|
*
|
||||||
|
* @internal Exported for testing purposes only
|
||||||
|
*/
|
||||||
|
export function resolveOpenAIMaxTokens(
|
||||||
|
upperLimit: number,
|
||||||
|
maxOutputTokensOverride?: number,
|
||||||
|
): number {
|
||||||
|
return maxOutputTokensOverride
|
||||||
|
?? (process.env.OPENAI_MAX_TOKENS ? parseInt(process.env.OPENAI_MAX_TOKENS, 10) || undefined : undefined)
|
||||||
|
?? (process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS ? parseInt(process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS, 10) || undefined : undefined)
|
||||||
|
?? upperLimit
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Build the request body for OpenAI chat.completions.create().
|
* Build the request body for OpenAI chat.completions.create().
|
||||||
* Extracted for testability — the thinking mode params are injected here.
|
* Extracted for testability — the thinking mode params are injected here.
|
||||||
@@ -165,7 +187,7 @@ function assembleFinalAssistantOutputs(params: {
|
|||||||
if (stopReason === 'max_tokens') {
|
if (stopReason === 'max_tokens') {
|
||||||
outputs.push(createAssistantAPIErrorMessage({
|
outputs.push(createAssistantAPIErrorMessage({
|
||||||
content: `Output truncated: response exceeded the ${maxTokens} token limit. ` +
|
content: `Output truncated: response exceeded the ${maxTokens} token limit. ` +
|
||||||
`Set CLAUDE_CODE_MAX_OUTPUT_TOKENS to override.`,
|
`Set OPENAI_MAX_TOKENS or CLAUDE_CODE_MAX_OUTPUT_TOKENS to override.`,
|
||||||
apiError: 'max_output_tokens',
|
apiError: 'max_output_tokens',
|
||||||
error: 'max_output_tokens',
|
error: 'max_output_tokens',
|
||||||
}))
|
}))
|
||||||
@@ -286,8 +308,15 @@ export async function* queryModelOpenAI(
|
|||||||
// auto-retry at 64k in query.ts. The OpenAI path has no such retry, so
|
// auto-retry at 64k in query.ts. The OpenAI path has no such retry, so
|
||||||
// using the capped 8k default would silently truncate responses in
|
// using the capped 8k default would silently truncate responses in
|
||||||
// multi-turn conversations where thinking consumes most of the budget.
|
// multi-turn conversations where thinking consumes most of the budget.
|
||||||
|
//
|
||||||
|
// Override priority:
|
||||||
|
// 1. options.maxOutputTokensOverride (programmatic)
|
||||||
|
// 2. OPENAI_MAX_TOKENS env var (OpenAI-specific, useful for local models
|
||||||
|
// with small context windows, e.g. RTX 3060 12GB running 65536-token models)
|
||||||
|
// 3. CLAUDE_CODE_MAX_OUTPUT_TOKENS env var (generic override)
|
||||||
|
// 4. upperLimit default (64000)
|
||||||
const { upperLimit } = getModelMaxOutputTokens(openaiModel)
|
const { upperLimit } = getModelMaxOutputTokens(openaiModel)
|
||||||
const maxTokens = options.maxOutputTokensOverride ?? upperLimit
|
const maxTokens = resolveOpenAIMaxTokens(upperLimit, options.maxOutputTokensOverride)
|
||||||
|
|
||||||
// 11. Get client
|
// 11. Get client
|
||||||
const client = getOpenAIClient({
|
const client = getOpenAIClient({
|
||||||
|
|||||||
Reference in New Issue
Block a user