mirror of
https://github.com/claude-code-best/claude-code.git
synced 2026-06-22 08:15:53 +00:00
fix: 尝试请求参数克隆以解除闭包引用
This commit is contained in:
@@ -93,6 +93,7 @@ import {
|
|||||||
asSystemPrompt,
|
asSystemPrompt,
|
||||||
type SystemPrompt,
|
type SystemPrompt,
|
||||||
} from '../../utils/systemPromptType.js'
|
} from '../../utils/systemPromptType.js'
|
||||||
|
import { cloneDeep } from 'lodash-es'
|
||||||
import { tokenCountFromLastAPIResponse } from '../../utils/tokens.js'
|
import { tokenCountFromLastAPIResponse } from '../../utils/tokens.js'
|
||||||
import { getDynamicConfig_BLOCKS_ON_INIT } from '../analytics/growthbook.js'
|
import { getDynamicConfig_BLOCKS_ON_INIT } from '../analytics/growthbook.js'
|
||||||
import {
|
import {
|
||||||
@@ -1442,7 +1443,7 @@ async function* queryModel(
|
|||||||
|
|
||||||
const enablePromptCaching =
|
const enablePromptCaching =
|
||||||
options.enablePromptCaching ?? getPromptCachingEnabled(options.model)
|
options.enablePromptCaching ?? getPromptCachingEnabled(options.model)
|
||||||
const system = buildSystemPromptBlocks(systemPrompt, enablePromptCaching, {
|
let system = buildSystemPromptBlocks(systemPrompt, enablePromptCaching, {
|
||||||
skipGlobalCacheForSystemPrompt: needsToolBasedCacheMarker,
|
skipGlobalCacheForSystemPrompt: needsToolBasedCacheMarker,
|
||||||
querySource: options.querySource,
|
querySource: options.querySource,
|
||||||
})
|
})
|
||||||
@@ -1462,7 +1463,7 @@ async function* queryModel(
|
|||||||
model: advisorModel,
|
model: advisorModel,
|
||||||
} as unknown as BetaToolUnion)
|
} as unknown as BetaToolUnion)
|
||||||
}
|
}
|
||||||
const allTools = [...toolSchemas, ...extraToolSchemas]
|
let allTools = [...toolSchemas, ...extraToolSchemas]
|
||||||
|
|
||||||
const isFastMode =
|
const isFastMode =
|
||||||
isFastModeEnabled() &&
|
isFastModeEnabled() &&
|
||||||
@@ -1586,6 +1587,39 @@ async function* queryModel(
|
|||||||
const consumedCacheEdits = cachedMCEnabled ? consumePendingCacheEdits() : null
|
const consumedCacheEdits = cachedMCEnabled ? consumePendingCacheEdits() : null
|
||||||
const consumedPinnedEdits = cachedMCEnabled ? getPinnedCacheEdits() : []
|
const consumedPinnedEdits = cachedMCEnabled ? getPinnedCacheEdits() : []
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Serialization boundary: deep-clone heavy data so the closure below captures
|
||||||
|
// independent copies, not references to the originals. After this point the
|
||||||
|
// original variables (messagesForAPI, system, allTools) are nulled out so
|
||||||
|
// they can be GC'd even while the generator/closure is still alive (during
|
||||||
|
// long streaming responses or retry backoff).
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
const frozenMessages = addCacheBreakpoints(
|
||||||
|
messagesForAPI,
|
||||||
|
enablePromptCaching,
|
||||||
|
options.querySource,
|
||||||
|
cachedMCEnabled &&
|
||||||
|
getAPIProvider() === 'firstParty' &&
|
||||||
|
options.querySource === 'repl_main_thread',
|
||||||
|
consumedCacheEdits as any,
|
||||||
|
consumedPinnedEdits as any,
|
||||||
|
options.skipCacheWrite,
|
||||||
|
)
|
||||||
|
const frozenSystem = cloneDeep(system)
|
||||||
|
const frozenTools = cloneDeep(allTools)
|
||||||
|
|
||||||
|
// Pre-compute scalars that post-streaming code needs, so messagesForAPI
|
||||||
|
// can be released before streaming starts.
|
||||||
|
const preMessagesCount = messagesForAPI.length
|
||||||
|
const preMessagesTokenCount = tokenCountFromLastAPIResponse(messagesForAPI)
|
||||||
|
|
||||||
|
// Release originals for GC — the frozen* copies and pre-computed scalars
|
||||||
|
// are now the only references to this data inside the closure.
|
||||||
|
// After null-out, all downstream code uses frozen* or pre-computed scalars.
|
||||||
|
messagesForAPI = null!
|
||||||
|
system = null!
|
||||||
|
allTools = null!
|
||||||
|
|
||||||
// Capture the betas sent in the last API request, including the ones that
|
// Capture the betas sent in the last API request, including the ones that
|
||||||
// were dynamically added, so we can log and send it to telemetry.
|
// were dynamically added, so we can log and send it to telemetry.
|
||||||
let lastRequestBetas: string[] | undefined
|
let lastRequestBetas: string[] | undefined
|
||||||
@@ -1691,9 +1725,6 @@ async function* queryModel(
|
|||||||
clearAllThinking: false,
|
clearAllThinking: false,
|
||||||
})
|
})
|
||||||
|
|
||||||
const enablePromptCaching =
|
|
||||||
options.enablePromptCaching ?? getPromptCachingEnabled(retryContext.model)
|
|
||||||
|
|
||||||
// Fast mode: header is latched session-stable (cache-safe), but
|
// Fast mode: header is latched session-stable (cache-safe), but
|
||||||
// `speed='fast'` stays dynamic so cooldown still suppresses the actual
|
// `speed='fast'` stays dynamic so cooldown still suppresses the actual
|
||||||
// fast-mode request without changing the cache key.
|
// fast-mode request without changing the cache key.
|
||||||
@@ -1724,13 +1755,10 @@ async function* queryModel(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Cache editing beta: header is latched session-stable; useCachedMC
|
// Cache editing beta: header is latched session-stable.
|
||||||
// (controls cache_edits body behavior) stays live so edits stop when
|
// The useCachedMC gate (cache_edits body behavior) is baked into
|
||||||
// the feature disables but the header doesn't flip.
|
// frozenMessages at the serialization boundary above, so this block
|
||||||
const useCachedMC =
|
// only controls the beta header.
|
||||||
cachedMCEnabled &&
|
|
||||||
getAPIProvider() === 'firstParty' &&
|
|
||||||
options.querySource === 'repl_main_thread'
|
|
||||||
if (
|
if (
|
||||||
cacheEditingHeaderLatched &&
|
cacheEditingHeaderLatched &&
|
||||||
cacheEditingBetaHeader &&
|
cacheEditingBetaHeader &&
|
||||||
@@ -1759,17 +1787,9 @@ async function* queryModel(
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
model: normalizeModelStringForAPI(options.model),
|
model: normalizeModelStringForAPI(options.model),
|
||||||
messages: addCacheBreakpoints(
|
messages: frozenMessages,
|
||||||
messagesForAPI,
|
system: frozenSystem,
|
||||||
enablePromptCaching,
|
tools: frozenTools,
|
||||||
options.querySource,
|
|
||||||
useCachedMC,
|
|
||||||
consumedCacheEdits as any,
|
|
||||||
consumedPinnedEdits as any,
|
|
||||||
options.skipCacheWrite,
|
|
||||||
),
|
|
||||||
system,
|
|
||||||
tools: allTools,
|
|
||||||
tool_choice: options.toolChoice,
|
tool_choice: options.toolChoice,
|
||||||
...(useBetas && { betas: filteredBetas }),
|
...(useBetas && { betas: filteredBetas }),
|
||||||
metadata: getAPIMetadata(),
|
metadata: getAPIMetadata(),
|
||||||
@@ -2844,8 +2864,8 @@ async function* queryModel(
|
|||||||
logAPIError({
|
logAPIError({
|
||||||
error,
|
error,
|
||||||
model: errorModel,
|
model: errorModel,
|
||||||
messageCount: messagesForAPI.length,
|
messageCount: preMessagesCount,
|
||||||
messageTokens: tokenCountFromLastAPIResponse(messagesForAPI),
|
messageTokens: preMessagesTokenCount,
|
||||||
durationMs: Date.now() - start,
|
durationMs: Date.now() - start,
|
||||||
durationMsIncludingRetries: Date.now() - startIncludingRetries,
|
durationMsIncludingRetries: Date.now() - startIncludingRetries,
|
||||||
attempt: attemptNumber,
|
attempt: attemptNumber,
|
||||||
@@ -2866,7 +2886,10 @@ async function* queryModel(
|
|||||||
|
|
||||||
yield getAssistantMessageFromError(error, errorModel, {
|
yield getAssistantMessageFromError(error, errorModel, {
|
||||||
messages,
|
messages,
|
||||||
messagesForAPI,
|
messagesForAPI: frozenMessages as unknown as (
|
||||||
|
| UserMessage
|
||||||
|
| AssistantMessage
|
||||||
|
)[],
|
||||||
})
|
})
|
||||||
releaseStreamResources()
|
releaseStreamResources()
|
||||||
return
|
return
|
||||||
@@ -2900,8 +2923,8 @@ async function* queryModel(
|
|||||||
logAPIError({
|
logAPIError({
|
||||||
error,
|
error,
|
||||||
model: errorModel,
|
model: errorModel,
|
||||||
messageCount: messagesForAPI.length,
|
messageCount: preMessagesCount,
|
||||||
messageTokens: tokenCountFromLastAPIResponse(messagesForAPI),
|
messageTokens: preMessagesTokenCount,
|
||||||
durationMs: Date.now() - start,
|
durationMs: Date.now() - start,
|
||||||
durationMsIncludingRetries: Date.now() - startIncludingRetries,
|
durationMsIncludingRetries: Date.now() - startIncludingRetries,
|
||||||
attempt: attemptNumber,
|
attempt: attemptNumber,
|
||||||
@@ -2924,7 +2947,10 @@ async function* queryModel(
|
|||||||
|
|
||||||
yield getAssistantMessageFromError(error, errorModel, {
|
yield getAssistantMessageFromError(error, errorModel, {
|
||||||
messages,
|
messages,
|
||||||
messagesForAPI,
|
messagesForAPI: frozenMessages as unknown as (
|
||||||
|
| UserMessage
|
||||||
|
| AssistantMessage
|
||||||
|
)[],
|
||||||
})
|
})
|
||||||
releaseStreamResources()
|
releaseStreamResources()
|
||||||
return
|
return
|
||||||
@@ -2980,14 +3006,19 @@ async function* queryModel(
|
|||||||
// Precompute scalars so the fire-and-forget .then() closure doesn't pin the
|
// Precompute scalars so the fire-and-forget .then() closure doesn't pin the
|
||||||
// full messagesForAPI array (the entire conversation up to the context window
|
// full messagesForAPI array (the entire conversation up to the context window
|
||||||
// limit) until getToolPermissionContext() resolves.
|
// limit) until getToolPermissionContext() resolves.
|
||||||
const logMessageCount = messagesForAPI.length
|
// Note: messagesForAPI was nulled above (serialization boundary), so we use
|
||||||
const logMessageTokens = tokenCountFromLastAPIResponse(messagesForAPI)
|
// the pre-computed scalars captured before the null-out.
|
||||||
|
const logMessageCount = preMessagesCount
|
||||||
|
const logMessageTokens = preMessagesTokenCount
|
||||||
|
|
||||||
// Record LLM observation in Langfuse (no-op if not configured)
|
// Record LLM observation in Langfuse (no-op if not configured)
|
||||||
recordLLMObservation(options.langfuseTrace ?? null, {
|
recordLLMObservation(options.langfuseTrace ?? null, {
|
||||||
model: resolvedModel,
|
model: resolvedModel,
|
||||||
provider: getAPIProvider(),
|
provider: getAPIProvider(),
|
||||||
input: convertMessagesToLangfuse(messagesForAPI, systemPrompt),
|
input: convertMessagesToLangfuse(
|
||||||
|
frozenMessages as Parameters<typeof convertMessagesToLangfuse>[0],
|
||||||
|
systemPrompt,
|
||||||
|
),
|
||||||
output: convertOutputToLangfuse(newMessages),
|
output: convertOutputToLangfuse(newMessages),
|
||||||
usage: {
|
usage: {
|
||||||
input_tokens: usage.input_tokens,
|
input_tokens: usage.input_tokens,
|
||||||
|
|||||||
Reference in New Issue
Block a user