fix: 内存优化 — 预测性 compact 阈值、增量 lookups orphaned 修复、deferred slice 引用优化

- P0: REPL.tsx 用 useMemo 包裹 deferred messages slice，避免每次渲染创建新数组引用导致不必要的后台重渲染 - P1: 预测性 compact 阈值改用 effectiveContextWindow - growth，消除与 autocompact buffer 的双重预留；TOOL_RESULT_GROWTH_ESTIMATE 从 20K 降至 15K - P2: 增量 lookups 增加 lastAssistantMsgId 一致性检查和 orphaned server_tool_use/mcp_tool_use 扫描，防止 UI 永久 loading - P3: reactiveCompact 类型断言改为直接使用 'compact' 字面量 - docs: CLAUDE.md 统一使用 precheck 替代分散的 typecheck/lint/test 命令 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-17 05:45:51 +00:00 · 2026-05-02 20:32:00 +08:00
parent 4cbf406c70
commit 198c09b263
10 changed files with 432 additions and 37 deletions
--- a/src/query.ts
+++ b/src/query.ts
@@ -7,6 +7,9 @@ import type { CanUseToolFn } from './hooks/useCanUseTool.js'
 import { FallbackTriggeredError } from './services/api/withRetry.js'
 import {
  calculateTokenWarningState,
+  estimateMaxTurnGrowth,
+  getAutoCompactThreshold,
+  getEffectiveContextWindowSize,
  isAutoCompactEnabled,
  type AutoCompactTrackingState,
 } from './services/compact/autoCompact.js'
@@ -474,7 +477,7 @@ async function* queryLoop(
      queryTracking,
    }

-    let messagesForQuery = [...getMessagesAfterCompactBoundary(messages)]
+    let messagesForQuery = getMessagesAfterCompactBoundary(messages)

    let tracking = autoCompactTracking

@@ -769,6 +772,48 @@ async function* queryLoop(
      }
    }

+    // Predictive autocompact: estimate if this turn's growth will push
+    // us past the context window. Uses effectiveContextWindow directly
+    // (without the autocompact buffer) to avoid double-reserving with
+    // getAutoCompactThreshold which already subtracts buffer.
+    if (!compactionResult && isAutoCompactEnabled()) {
+      const model = toolUseContext.options.mainLoopModel
+      const currentTokens =
+        tokenCountWithEstimation(messagesForQuery) - snipTokensFreed
+      const estimatedGrowth = estimateMaxTurnGrowth(model)
+      const predictiveThreshold =
+        getEffectiveContextWindowSize(model) - estimatedGrowth
+      if (currentTokens > predictiveThreshold) {
+        const predictiveResult = await deps.autocompact(
+          messagesForQuery,
+          toolUseContext,
+          {
+            systemPrompt,
+            userContext,
+            systemContext,
+            toolUseContext,
+            forkContextMessages: messagesForQuery,
+          },
+          querySource,
+          tracking,
+          snipTokensFreed,
+        )
+        if (predictiveResult.compactionResult) {
+          messagesForQuery = buildPostCompactMessages(
+            predictiveResult.compactionResult,
+          )
+          snipTokensFreed = 0
+          tracking = tracking
+            ? {
+                ...tracking,
+                compacted: true,
+                consecutiveFailures: predictiveResult.consecutiveFailures ?? 0,
+              }
+            : tracking
+        }
+      }
+    }
+
    let attemptWithFallback = true

    queryCheckpoint('query_api_loop_start')
@@ -1142,7 +1187,7 @@ async function* queryLoop(
    // Execute post-sampling hooks after model response is complete
    if (assistantMessages.length > 0) {
      void executePostSamplingHooks(
-        [...messagesForQuery, ...assistantMessages],
+        messagesForQuery.concat(assistantMessages),
        systemPrompt,
        userContext,
        systemContext,
@@ -1864,11 +1909,10 @@ async function* queryLoop(
          userContext,
          systemContext,
          toolUseContext,
-          forkContextMessages: [
-            ...messagesForQuery,
-            ...assistantMessages,
-            ...toolResults,
-          ],
+          forkContextMessages: messagesForQuery.concat(
+            assistantMessages,
+            toolResults,
+          ),
        })
      }
    }
@@ -1885,7 +1929,7 @@ async function* queryLoop(

    queryCheckpoint('query_recursive_call')
    const next: State = {
-      messages: [...messagesForQuery, ...assistantMessages, ...toolResults],
+      messages: messagesForQuery.concat(assistantMessages, toolResults),
      toolUseContext: toolUseContextWithQueryTracking,
      autoCompactTracking: tracking,
      turnCount: nextTurnCount,