fix: 内存优化 — 预测性 compact 阈值、增量 lookups orphaned 修复、deferred slice 引用优化

- P0: REPL.tsx 用 useMemo 包裹 deferred messages slice,避免每次渲染创建新数组引用导致不必要的后台重渲染
- P1: 预测性 compact 阈值改用 effectiveContextWindow - growth,消除与 autocompact buffer 的双重预留;TOOL_RESULT_GROWTH_ESTIMATE 从 20K 降至 15K
- P2: 增量 lookups 增加 lastAssistantMsgId 一致性检查和 orphaned server_tool_use/mcp_tool_use 扫描,防止 UI 永久 loading
- P3: reactiveCompact 类型断言改为直接使用 'compact' 字面量
- docs: CLAUDE.md 统一使用 precheck 替代分散的 typecheck/lint/test 命令

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
claude-code-best
2026-05-02 20:32:00 +08:00
parent 4cbf406c70
commit 198c09b263
10 changed files with 432 additions and 37 deletions

View File

@@ -7,6 +7,9 @@ import type { CanUseToolFn } from './hooks/useCanUseTool.js'
import { FallbackTriggeredError } from './services/api/withRetry.js'
import {
calculateTokenWarningState,
estimateMaxTurnGrowth,
getAutoCompactThreshold,
getEffectiveContextWindowSize,
isAutoCompactEnabled,
type AutoCompactTrackingState,
} from './services/compact/autoCompact.js'
@@ -474,7 +477,7 @@ async function* queryLoop(
queryTracking,
}
let messagesForQuery = [...getMessagesAfterCompactBoundary(messages)]
let messagesForQuery = getMessagesAfterCompactBoundary(messages)
let tracking = autoCompactTracking
@@ -769,6 +772,48 @@ async function* queryLoop(
}
}
// Predictive autocompact: estimate if this turn's growth will push
// us past the context window. Uses effectiveContextWindow directly
// (without the autocompact buffer) to avoid double-reserving with
// getAutoCompactThreshold which already subtracts buffer.
if (!compactionResult && isAutoCompactEnabled()) {
const model = toolUseContext.options.mainLoopModel
const currentTokens =
tokenCountWithEstimation(messagesForQuery) - snipTokensFreed
const estimatedGrowth = estimateMaxTurnGrowth(model)
const predictiveThreshold =
getEffectiveContextWindowSize(model) - estimatedGrowth
if (currentTokens > predictiveThreshold) {
const predictiveResult = await deps.autocompact(
messagesForQuery,
toolUseContext,
{
systemPrompt,
userContext,
systemContext,
toolUseContext,
forkContextMessages: messagesForQuery,
},
querySource,
tracking,
snipTokensFreed,
)
if (predictiveResult.compactionResult) {
messagesForQuery = buildPostCompactMessages(
predictiveResult.compactionResult,
)
snipTokensFreed = 0
tracking = tracking
? {
...tracking,
compacted: true,
consecutiveFailures: predictiveResult.consecutiveFailures ?? 0,
}
: tracking
}
}
}
let attemptWithFallback = true
queryCheckpoint('query_api_loop_start')
@@ -1142,7 +1187,7 @@ async function* queryLoop(
// Execute post-sampling hooks after model response is complete
if (assistantMessages.length > 0) {
void executePostSamplingHooks(
[...messagesForQuery, ...assistantMessages],
messagesForQuery.concat(assistantMessages),
systemPrompt,
userContext,
systemContext,
@@ -1864,11 +1909,10 @@ async function* queryLoop(
userContext,
systemContext,
toolUseContext,
forkContextMessages: [
...messagesForQuery,
...assistantMessages,
...toolResults,
],
forkContextMessages: messagesForQuery.concat(
assistantMessages,
toolResults,
),
})
}
}
@@ -1885,7 +1929,7 @@ async function* queryLoop(
queryCheckpoint('query_recursive_call')
const next: State = {
messages: [...messagesForQuery, ...assistantMessages, ...toolResults],
messages: messagesForQuery.concat(assistantMessages, toolResults),
toolUseContext: toolUseContextWithQueryTracking,
autoCompactTracking: tracking,
turnCount: nextTurnCount,