fix: 尝试请求参数克隆以解除闭包引用

This commit is contained in:
claude-code-best
2026-05-05 09:29:09 +08:00
parent e7220c530f
commit 75952bde9c

View File

@@ -93,6 +93,7 @@ import {
asSystemPrompt, asSystemPrompt,
type SystemPrompt, type SystemPrompt,
} from '../../utils/systemPromptType.js' } from '../../utils/systemPromptType.js'
import { cloneDeep } from 'lodash-es'
import { tokenCountFromLastAPIResponse } from '../../utils/tokens.js' import { tokenCountFromLastAPIResponse } from '../../utils/tokens.js'
import { getDynamicConfig_BLOCKS_ON_INIT } from '../analytics/growthbook.js' import { getDynamicConfig_BLOCKS_ON_INIT } from '../analytics/growthbook.js'
import { import {
@@ -1442,7 +1443,7 @@ async function* queryModel(
const enablePromptCaching = const enablePromptCaching =
options.enablePromptCaching ?? getPromptCachingEnabled(options.model) options.enablePromptCaching ?? getPromptCachingEnabled(options.model)
const system = buildSystemPromptBlocks(systemPrompt, enablePromptCaching, { let system = buildSystemPromptBlocks(systemPrompt, enablePromptCaching, {
skipGlobalCacheForSystemPrompt: needsToolBasedCacheMarker, skipGlobalCacheForSystemPrompt: needsToolBasedCacheMarker,
querySource: options.querySource, querySource: options.querySource,
}) })
@@ -1462,7 +1463,7 @@ async function* queryModel(
model: advisorModel, model: advisorModel,
} as unknown as BetaToolUnion) } as unknown as BetaToolUnion)
} }
const allTools = [...toolSchemas, ...extraToolSchemas] let allTools = [...toolSchemas, ...extraToolSchemas]
const isFastMode = const isFastMode =
isFastModeEnabled() && isFastModeEnabled() &&
@@ -1586,6 +1587,39 @@ async function* queryModel(
const consumedCacheEdits = cachedMCEnabled ? consumePendingCacheEdits() : null const consumedCacheEdits = cachedMCEnabled ? consumePendingCacheEdits() : null
const consumedPinnedEdits = cachedMCEnabled ? getPinnedCacheEdits() : [] const consumedPinnedEdits = cachedMCEnabled ? getPinnedCacheEdits() : []
// ---------------------------------------------------------------------------
// Serialization boundary: deep-clone heavy data so the closure below captures
// independent copies, not references to the originals. After this point the
// original variables (messagesForAPI, system, allTools) are nulled out so
// they can be GC'd even while the generator/closure is still alive (during
// long streaming responses or retry backoff).
// ---------------------------------------------------------------------------
const frozenMessages = addCacheBreakpoints(
messagesForAPI,
enablePromptCaching,
options.querySource,
cachedMCEnabled &&
getAPIProvider() === 'firstParty' &&
options.querySource === 'repl_main_thread',
consumedCacheEdits as any,
consumedPinnedEdits as any,
options.skipCacheWrite,
)
const frozenSystem = cloneDeep(system)
const frozenTools = cloneDeep(allTools)
// Pre-compute scalars that post-streaming code needs, so messagesForAPI
// can be released before streaming starts.
const preMessagesCount = messagesForAPI.length
const preMessagesTokenCount = tokenCountFromLastAPIResponse(messagesForAPI)
// Release originals for GC — the frozen* copies and pre-computed scalars
// are now the only references to this data inside the closure.
// After null-out, all downstream code uses frozen* or pre-computed scalars.
messagesForAPI = null!
system = null!
allTools = null!
// Capture the betas sent in the last API request, including the ones that // Capture the betas sent in the last API request, including the ones that
// were dynamically added, so we can log and send it to telemetry. // were dynamically added, so we can log and send it to telemetry.
let lastRequestBetas: string[] | undefined let lastRequestBetas: string[] | undefined
@@ -1691,9 +1725,6 @@ async function* queryModel(
clearAllThinking: false, clearAllThinking: false,
}) })
const enablePromptCaching =
options.enablePromptCaching ?? getPromptCachingEnabled(retryContext.model)
// Fast mode: header is latched session-stable (cache-safe), but // Fast mode: header is latched session-stable (cache-safe), but
// `speed='fast'` stays dynamic so cooldown still suppresses the actual // `speed='fast'` stays dynamic so cooldown still suppresses the actual
// fast-mode request without changing the cache key. // fast-mode request without changing the cache key.
@@ -1724,13 +1755,10 @@ async function* queryModel(
} }
} }
// Cache editing beta: header is latched session-stable; useCachedMC // Cache editing beta: header is latched session-stable.
// (controls cache_edits body behavior) stays live so edits stop when // The useCachedMC gate (cache_edits body behavior) is baked into
// the feature disables but the header doesn't flip. // frozenMessages at the serialization boundary above, so this block
const useCachedMC = // only controls the beta header.
cachedMCEnabled &&
getAPIProvider() === 'firstParty' &&
options.querySource === 'repl_main_thread'
if ( if (
cacheEditingHeaderLatched && cacheEditingHeaderLatched &&
cacheEditingBetaHeader && cacheEditingBetaHeader &&
@@ -1759,17 +1787,9 @@ async function* queryModel(
return { return {
model: normalizeModelStringForAPI(options.model), model: normalizeModelStringForAPI(options.model),
messages: addCacheBreakpoints( messages: frozenMessages,
messagesForAPI, system: frozenSystem,
enablePromptCaching, tools: frozenTools,
options.querySource,
useCachedMC,
consumedCacheEdits as any,
consumedPinnedEdits as any,
options.skipCacheWrite,
),
system,
tools: allTools,
tool_choice: options.toolChoice, tool_choice: options.toolChoice,
...(useBetas && { betas: filteredBetas }), ...(useBetas && { betas: filteredBetas }),
metadata: getAPIMetadata(), metadata: getAPIMetadata(),
@@ -2844,8 +2864,8 @@ async function* queryModel(
logAPIError({ logAPIError({
error, error,
model: errorModel, model: errorModel,
messageCount: messagesForAPI.length, messageCount: preMessagesCount,
messageTokens: tokenCountFromLastAPIResponse(messagesForAPI), messageTokens: preMessagesTokenCount,
durationMs: Date.now() - start, durationMs: Date.now() - start,
durationMsIncludingRetries: Date.now() - startIncludingRetries, durationMsIncludingRetries: Date.now() - startIncludingRetries,
attempt: attemptNumber, attempt: attemptNumber,
@@ -2866,7 +2886,10 @@ async function* queryModel(
yield getAssistantMessageFromError(error, errorModel, { yield getAssistantMessageFromError(error, errorModel, {
messages, messages,
messagesForAPI, messagesForAPI: frozenMessages as unknown as (
| UserMessage
| AssistantMessage
)[],
}) })
releaseStreamResources() releaseStreamResources()
return return
@@ -2900,8 +2923,8 @@ async function* queryModel(
logAPIError({ logAPIError({
error, error,
model: errorModel, model: errorModel,
messageCount: messagesForAPI.length, messageCount: preMessagesCount,
messageTokens: tokenCountFromLastAPIResponse(messagesForAPI), messageTokens: preMessagesTokenCount,
durationMs: Date.now() - start, durationMs: Date.now() - start,
durationMsIncludingRetries: Date.now() - startIncludingRetries, durationMsIncludingRetries: Date.now() - startIncludingRetries,
attempt: attemptNumber, attempt: attemptNumber,
@@ -2924,7 +2947,10 @@ async function* queryModel(
yield getAssistantMessageFromError(error, errorModel, { yield getAssistantMessageFromError(error, errorModel, {
messages, messages,
messagesForAPI, messagesForAPI: frozenMessages as unknown as (
| UserMessage
| AssistantMessage
)[],
}) })
releaseStreamResources() releaseStreamResources()
return return
@@ -2980,14 +3006,19 @@ async function* queryModel(
// Precompute scalars so the fire-and-forget .then() closure doesn't pin the // Precompute scalars so the fire-and-forget .then() closure doesn't pin the
// full messagesForAPI array (the entire conversation up to the context window // full messagesForAPI array (the entire conversation up to the context window
// limit) until getToolPermissionContext() resolves. // limit) until getToolPermissionContext() resolves.
const logMessageCount = messagesForAPI.length // Note: messagesForAPI was nulled above (serialization boundary), so we use
const logMessageTokens = tokenCountFromLastAPIResponse(messagesForAPI) // the pre-computed scalars captured before the null-out.
const logMessageCount = preMessagesCount
const logMessageTokens = preMessagesTokenCount
// Record LLM observation in Langfuse (no-op if not configured) // Record LLM observation in Langfuse (no-op if not configured)
recordLLMObservation(options.langfuseTrace ?? null, { recordLLMObservation(options.langfuseTrace ?? null, {
model: resolvedModel, model: resolvedModel,
provider: getAPIProvider(), provider: getAPIProvider(),
input: convertMessagesToLangfuse(messagesForAPI, systemPrompt), input: convertMessagesToLangfuse(
frozenMessages as Parameters<typeof convertMessagesToLangfuse>[0],
systemPrompt,
),
output: convertOutputToLangfuse(newMessages), output: convertOutputToLangfuse(newMessages),
usage: { usage: {
input_tokens: usage.input_tokens, input_tokens: usage.input_tokens,