fix: 内存优化 — 预测性 compact 阈值、增量 lookups orphaned 修复、deferred slice 引用优化

- P0: REPL.tsx 用 useMemo 包裹 deferred messages slice，避免每次渲染创建新数组引用导致不必要的后台重渲染 - P1: 预测性 compact 阈值改用 effectiveContextWindow - growth，消除与 autocompact buffer 的双重预留；TOOL_RESULT_GROWTH_ESTIMATE 从 20K 降至 15K - P2: 增量 lookups 增加 lastAssistantMsgId 一致性检查和 orphaned server_tool_use/mcp_tool_use 扫描，防止 UI 永久 loading - P3: reactiveCompact 类型断言改为直接使用 'compact' 字面量 - docs: CLAUDE.md 统一使用 precheck 替代分散的 typecheck/lint/test 命令 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-15 21:05:51 +00:00 · 2026-05-02 20:32:00 +08:00
parent 4cbf406c70
commit 198c09b263
10 changed files with 432 additions and 37 deletions
--- a/src/components/Messages.tsx
+++ b/src/components/Messages.tsx
@@ -18,6 +18,7 @@ import type { Tools } from '../Tool.js';
 import { findToolByName } from '../Tool.js';
 import type { AgentDefinitionsResult } from '@claude-code-best/builtin-tools/tools/AgentTool/loadAgentsDir.js';
 import type {
+  AssistantMessage,
  Message as MessageType,
  NormalizedMessage,
  ProgressMessage as ProgressMessageType,
@@ -36,6 +37,7 @@ import {
  buildMessageLookups,
  computeMessageStructureKey,
  type MessageLookups,
+  updateMessageLookupsIncremental,
  createAssistantMessage,
  deriveUUID,
  getMessagesAfterCompactBoundary,
@@ -516,7 +518,13 @@ const MessagesImpl = ({
  // message content changed during streaming (text/thinking deltas). The key
  // captures only structural info (types, IDs), so content-only deltas skip
  // the rebuild entirely.
-  const lookupsCacheRef = useRef<{ key: string; lookups: MessageLookups } | null>(null);
+  const lookupsCacheRef = useRef<{
+    key: string;
+    lookups: MessageLookups;
+    normalizedCount: number;
+    messageCount: number;
+    lastAssistantMsgId: string | undefined;
+  } | null>(null);

  // Expensive message transforms — filter, reorder, group, collapse, lookups.
  // All O(n) over 27k messages. Split from the renderRange slice so scrolling
@@ -587,12 +595,57 @@ const MessagesImpl = ({
    );

    const lookupsKey = computeMessageStructureKey(normalizedMessages, messagesToShow as MessageType[]);
+    const currentLastAssistantMsgId = (() => {
+      const lastMsg = (messagesToShow as MessageType[]).at(-1);
+      return lastMsg?.type === 'assistant' ? (lastMsg as AssistantMessage).message?.id : undefined;
+    })();
    let lookups: MessageLookups;
    if (lookupsCacheRef.current && lookupsCacheRef.current.key === lookupsKey) {
      lookups = lookupsCacheRef.current.lookups;
+    } else if (
+      lookupsCacheRef.current &&
+      normalizedMessages.length >= lookupsCacheRef.current.normalizedCount &&
+      (messagesToShow as MessageType[]).length >= lookupsCacheRef.current.messageCount &&
+      // If lastAssistantMsgId changed, previous "in-progress" assistant may
+      // now be orphaned — force a full rebuild to pick up the new status.
+      lookupsCacheRef.current.lastAssistantMsgId === currentLastAssistantMsgId
+    ) {
+      // Try incremental update when only new messages were appended
+      const updated = updateMessageLookupsIncremental(
+        lookupsCacheRef.current.lookups,
+        lookupsCacheRef.current.normalizedCount,
+        lookupsCacheRef.current.messageCount,
+        normalizedMessages,
+        messagesToShow as MessageType[],
+      );
+      if (updated) {
+        lookups = updated;
+        lookupsCacheRef.current = {
+          key: lookupsKey,
+          lookups,
+          normalizedCount: normalizedMessages.length,
+          messageCount: (messagesToShow as MessageType[]).length,
+          lastAssistantMsgId: currentLastAssistantMsgId,
+        };
+      } else {
+        lookups = buildMessageLookups(normalizedMessages, messagesToShow as MessageType[]);
+        lookupsCacheRef.current = {
+          key: lookupsKey,
+          lookups,
+          normalizedCount: normalizedMessages.length,
+          messageCount: (messagesToShow as MessageType[]).length,
+          lastAssistantMsgId: currentLastAssistantMsgId,
+        };
+      }
    } else {
      lookups = buildMessageLookups(normalizedMessages, messagesToShow as MessageType[]);
-      lookupsCacheRef.current = { key: lookupsKey, lookups };
+      lookupsCacheRef.current = {
+        key: lookupsKey,
+        lookups,
+        normalizedCount: normalizedMessages.length,
+        messageCount: (messagesToShow as MessageType[]).length,
+        lastAssistantMsgId: currentLastAssistantMsgId,
+      };
    }

    const hiddenMessageCount = messagesToShowNotTruncated.length - MAX_MESSAGES_TO_SHOW_IN_TRANSCRIPT_MODE;
--- a/src/query.ts
+++ b/src/query.ts
@@ -7,6 +7,9 @@ import type { CanUseToolFn } from './hooks/useCanUseTool.js'
 import { FallbackTriggeredError } from './services/api/withRetry.js'
 import {
  calculateTokenWarningState,
+  estimateMaxTurnGrowth,
+  getAutoCompactThreshold,
+  getEffectiveContextWindowSize,
  isAutoCompactEnabled,
  type AutoCompactTrackingState,
 } from './services/compact/autoCompact.js'
@@ -474,7 +477,7 @@ async function* queryLoop(
      queryTracking,
    }

-    let messagesForQuery = [...getMessagesAfterCompactBoundary(messages)]
+    let messagesForQuery = getMessagesAfterCompactBoundary(messages)

    let tracking = autoCompactTracking

@@ -769,6 +772,48 @@ async function* queryLoop(
      }
    }

+    // Predictive autocompact: estimate if this turn's growth will push
+    // us past the context window. Uses effectiveContextWindow directly
+    // (without the autocompact buffer) to avoid double-reserving with
+    // getAutoCompactThreshold which already subtracts buffer.
+    if (!compactionResult && isAutoCompactEnabled()) {
+      const model = toolUseContext.options.mainLoopModel
+      const currentTokens =
+        tokenCountWithEstimation(messagesForQuery) - snipTokensFreed
+      const estimatedGrowth = estimateMaxTurnGrowth(model)
+      const predictiveThreshold =
+        getEffectiveContextWindowSize(model) - estimatedGrowth
+      if (currentTokens > predictiveThreshold) {
+        const predictiveResult = await deps.autocompact(
+          messagesForQuery,
+          toolUseContext,
+          {
+            systemPrompt,
+            userContext,
+            systemContext,
+            toolUseContext,
+            forkContextMessages: messagesForQuery,
+          },
+          querySource,
+          tracking,
+          snipTokensFreed,
+        )
+        if (predictiveResult.compactionResult) {
+          messagesForQuery = buildPostCompactMessages(
+            predictiveResult.compactionResult,
+          )
+          snipTokensFreed = 0
+          tracking = tracking
+            ? {
+                ...tracking,
+                compacted: true,
+                consecutiveFailures: predictiveResult.consecutiveFailures ?? 0,
+              }
+            : tracking
+        }
+      }
+    }
+
    let attemptWithFallback = true

    queryCheckpoint('query_api_loop_start')
@@ -1142,7 +1187,7 @@ async function* queryLoop(
    // Execute post-sampling hooks after model response is complete
    if (assistantMessages.length > 0) {
      void executePostSamplingHooks(
-        [...messagesForQuery, ...assistantMessages],
+        messagesForQuery.concat(assistantMessages),
        systemPrompt,
        userContext,
        systemContext,
@@ -1864,11 +1909,10 @@ async function* queryLoop(
          userContext,
          systemContext,
          toolUseContext,
-          forkContextMessages: [
-            ...messagesForQuery,
-            ...assistantMessages,
-            ...toolResults,
-          ],
+          forkContextMessages: messagesForQuery.concat(
+            assistantMessages,
+            toolResults,
+          ),
        })
      }
    }
@@ -1885,7 +1929,7 @@ async function* queryLoop(

    queryCheckpoint('query_recursive_call')
    const next: State = {
-      messages: [...messagesForQuery, ...assistantMessages, ...toolResults],
+      messages: messagesForQuery.concat(assistantMessages, toolResults),
      toolUseContext: toolUseContextWithQueryTracking,
      autoCompactTracking: tracking,
      turnCount: nextTurnCount,
--- a/src/screens/REPL.tsx
+++ b/src/screens/REPL.tsx
@@ -1566,7 +1566,15 @@ export function REPL({
  // Deferred messages for the Messages component — renders at transition
  // priority so the reconciler yields every 5ms, keeping input responsive
  // while the expensive message processing pipeline runs.
-  const deferredMessages = useDeferredValue(messages);
+  // Cap at 500 messages to limit memory double-buffering. The bypass
+  // at display-time uses sync messages during streaming and non-loading,
+  // so this cap only affects reduced-motion scenarios.
+  const DEFERRED_CAP = 500;
+  const cappedMessages = React.useMemo(
+    () => (messages.length > DEFERRED_CAP ? messages.slice(-DEFERRED_CAP) : messages),
+    [messages],
+  );
+  const deferredMessages = useDeferredValue(cappedMessages);
  const deferredBehind = messages.length - deferredMessages.length;
  if (deferredBehind > 0) {
    logForDebugging(
--- a/src/services/compact/autoCompact.ts
+++ b/src/services/compact/autoCompact.ts
@@ -64,6 +64,35 @@ export const WARNING_THRESHOLD_BUFFER_TOKENS = 20_000
 export const ERROR_THRESHOLD_BUFFER_TOKENS = 20_000
 export const MANUAL_COMPACT_BUFFER_TOKENS = 3_000

+// Conservative estimate for tool result growth per turn.
+// Typical tool results (file reads, grep, bash) average ~5-10K tokens;
+// occasional large reads can spike to 20K+.
+const TOOL_RESULT_GROWTH_ESTIMATE = 15_000
+
+/**
+ * Context-aware autocompact buffer. Larger context windows need more
+ * headroom because a single turn can produce proportionally more tokens
+ * (longer model outputs + larger tool results).
+ */
+export function getAutocompactBufferTokens(model: string): number {
+  const effectiveWindow = getEffectiveContextWindowSize(model)
+  if (effectiveWindow >= 800_000) return 50_000
+  if (effectiveWindow >= 400_000) return 30_000
+  return AUTOCOMPACT_BUFFER_TOKENS
+}
+
+/**
+ * Estimate the maximum token growth a single turn can produce.
+ * Used for predictive autocompact checks before the API call.
+ */
+export function estimateMaxTurnGrowth(model: string): number {
+  const maxOutput = Math.min(
+    getMaxOutputTokensForModel(model),
+    MAX_OUTPUT_TOKENS_FOR_SUMMARY,
+  )
+  return maxOutput + TOOL_RESULT_GROWTH_ESTIMATE
+}
+
 // Stop trying autocompact after this many consecutive failures.
 // BQ 2026-03-10: 1,279 sessions had 50+ consecutive failures (up to 3,272)
 // in a single session, wasting ~250K API calls/day globally.
@@ -73,7 +102,7 @@ export function getAutoCompactThreshold(model: string): number {
  const effectiveContextWindow = getEffectiveContextWindowSize(model)

  const autocompactThreshold =
-    effectiveContextWindow - AUTOCOMPACT_BUFFER_TOKENS
+    effectiveContextWindow - getAutocompactBufferTokens(model)

  // Override for easier testing of autocompact
  const envPercent = process.env.CLAUDE_AUTOCOMPACT_PCT_OVERRIDE
--- a/src/services/compact/compact.ts
+++ b/src/services/compact/compact.ts
@@ -334,13 +334,12 @@ export type RecompactionInfo = {
 * Order: boundaryMarker, summaryMessages, messagesToKeep, attachments, hookResults
 */
 export function buildPostCompactMessages(result: CompactionResult): Message[] {
-  return [
-    result.boundaryMarker,
-    ...result.summaryMessages,
-    ...(result.messagesToKeep ?? []),
-    ...result.attachments,
-    ...result.hookResults,
-  ]
+  return ([result.boundaryMarker] as Message[]).concat(
+    result.summaryMessages,
+    result.messagesToKeep ?? [],
+    result.attachments,
+    result.hookResults,
+  )
 }

 /**
--- a/src/services/compact/reactiveCompact.ts
+++ b/src/services/compact/reactiveCompact.ts
@@ -1,25 +1,97 @@
-// Auto-generated stub — replace with real implementation
-export {}
-
-import type { Message } from 'src/types/message'
-import type { CompactionResult } from './compact.js'
+import { isEnvTruthy } from '../../utils/envUtils.js'
+import {
+  isMediaSizeErrorMessage,
+  isPromptTooLongMessage,
+} from '../api/errors.js'
+import type { AssistantMessage, Message } from '../../types/message.js'
+import { type CompactionResult, compactConversation } from './compact.js'
+import { logError } from '../../utils/log.js'
+import { logForDebugging } from '../../utils/debug.js'
+import type { CacheSafeParams } from '../../utils/forkedAgent.js'

 export const isReactiveOnlyMode: () => boolean = () => false
+
 export const reactiveCompactOnPromptTooLong: (
  messages: Message[],
  cacheSafeParams: Record<string, unknown>,
  options: { customInstructions?: string; trigger?: string },
 ) => Promise<{ ok: boolean; reason?: string; result?: CompactionResult }> =
-  async () => ({ ok: false })
-export const isReactiveCompactEnabled: () => boolean = () => false
-export const isWithheldPromptTooLong: (message: Message) => boolean = () =>
-  false
-export const isWithheldMediaSizeError: (message: Message) => boolean = () =>
-  false
+  async (messages, cacheSafeParams, options) => {
+    const params = cacheSafeParams as unknown as CacheSafeParams
+    try {
+      const result = await compactConversation(
+        messages,
+        params.toolUseContext,
+        params,
+        true,
+        options.customInstructions,
+        true,
+        {
+          isRecompactionInChain: false,
+          turnsSincePreviousCompact: 0,
+          autoCompactThreshold: 0,
+          querySource: 'compact',
+        },
+      )
+      return { ok: true, result }
+    } catch (error) {
+      logError(error)
+      return { ok: false, reason: String(error) }
+    }
+  }
+
+export const isReactiveCompactEnabled: () => boolean = () => {
+  if (isEnvTruthy(process.env.DISABLE_COMPACT)) return false
+  return true
+}
+
+export const isWithheldPromptTooLong: (message: Message) => boolean =
+  message => {
+    if (message.type !== 'assistant' || !message.isApiErrorMessage) return false
+    return isPromptTooLongMessage(message as AssistantMessage)
+  }
+
+export const isWithheldMediaSizeError: (message: Message) => boolean =
+  message => {
+    if (message.type !== 'assistant' || !message.isApiErrorMessage) return false
+    return isMediaSizeErrorMessage(message as AssistantMessage)
+  }
+
 export const tryReactiveCompact: (params: {
  hasAttempted: boolean
  querySource: string
  aborted: boolean
  messages: Message[]
  cacheSafeParams: Record<string, unknown>
-}) => Promise<CompactionResult | null> = async () => null
+}) => Promise<CompactionResult | null> = async ({
+  hasAttempted,
+  aborted,
+  messages,
+  cacheSafeParams,
+}) => {
+  if (hasAttempted || aborted) return null
+  const params = cacheSafeParams as unknown as CacheSafeParams
+  try {
+    const result = await compactConversation(
+      messages,
+      params.toolUseContext,
+      params,
+      true,
+      undefined,
+      true,
+      {
+        isRecompactionInChain: false,
+        turnsSincePreviousCompact: 0,
+        autoCompactThreshold: 0,
+      },
+    )
+    return result
+  } catch (error) {
+    logForDebugging(
+      `reactiveCompact: emergency compaction failed — ${String(error)}`,
+      { level: 'warn' },
+    )
+    logError(error)
+    return null
+  }
+}
--- a/src/utils/messages.ts
+++ b/src/utils/messages.ts
@@ -1397,6 +1397,172 @@ export function buildMessageLookups(
  }
 }

+/**
+ * Incrementally update lookups by processing only newly appended messages.
+ * Returns the same lookups object (mutated in place) if update succeeds,
+ * or null if a full rebuild is needed (e.g., messages were removed).
+ */
+export function updateMessageLookupsIncremental(
+  existing: MessageLookups,
+  previousNormalizedCount: number,
+  previousMessageCount: number,
+  normalizedMessages: NormalizedMessage[],
+  messages: Message[],
+): MessageLookups | null {
+  // Safety check: only handle append-only case
+  if (
+    normalizedMessages.length < previousNormalizedCount ||
+    messages.length < previousMessageCount
+  ) {
+    return null
+  }
+
+  // No new messages — nothing to do
+  if (
+    normalizedMessages.length === previousNormalizedCount &&
+    messages.length === previousMessageCount
+  ) {
+    return existing
+  }
+
+  // Process new messages entries (pass 1: assistant tool_use blocks)
+  const newMessageStart = previousMessageCount
+  for (let i = newMessageStart; i < messages.length; i++) {
+    const msg = messages[i]!
+    if (msg.type === 'assistant') {
+      const aMsg = msg as AssistantMessage
+      const id = aMsg.message.id!
+      if (Array.isArray(aMsg.message.content)) {
+        const newToolUseIDs: string[] = []
+        for (const content of aMsg.message.content) {
+          if (typeof content !== 'string' && content.type === 'tool_use') {
+            const toolUseContent = content as ToolUseBlock
+            newToolUseIDs.push(toolUseContent.id)
+            existing.toolUseByToolUseID.set(
+              toolUseContent.id,
+              content as ToolUseBlockParam,
+            )
+          }
+        }
+        // Update sibling lookup: all tool_use IDs in this message share siblings
+        const allSiblings = new Set(newToolUseIDs)
+        for (const toolUseID of newToolUseIDs) {
+          existing.siblingToolUseIDs.set(toolUseID, allSiblings)
+        }
+      }
+    }
+  }
+
+  // Process new normalizedMessages entries (pass 2: progress, hooks, tool results)
+  const newNormalizedStart = previousNormalizedCount
+  for (let i = newNormalizedStart; i < normalizedMessages.length; i++) {
+    const msg = normalizedMessages[i]!
+
+    if (msg.type === 'progress') {
+      const toolUseID = msg.parentToolUseID as string
+      const existing2 = existing.progressMessagesByToolUseID.get(toolUseID)
+      if (existing2) {
+        existing2.push(msg as ProgressMessage)
+      } else {
+        existing.progressMessagesByToolUseID.set(toolUseID, [
+          msg as ProgressMessage,
+        ])
+      }
+
+      const progressData = msg.data as { type: string; hookEvent: HookEvent }
+      if (progressData.type === 'hook_progress') {
+        const hookEvent = progressData.hookEvent
+        let byHookEvent = existing.inProgressHookCounts.get(toolUseID)
+        if (!byHookEvent) {
+          byHookEvent = new Map()
+          existing.inProgressHookCounts.set(toolUseID, byHookEvent)
+        }
+        byHookEvent.set(hookEvent, (byHookEvent.get(hookEvent) ?? 0) + 1)
+      }
+    }
+
+    if (msg.type === 'user' && Array.isArray(msg.message?.content)) {
+      for (const content of msg.message?.content ?? []) {
+        if (typeof content !== 'string' && content.type === 'tool_result') {
+          const tr = content as ToolResultBlockParam
+          existing.toolResultByToolUseID.set(tr.tool_use_id, msg)
+          existing.resolvedToolUseIDs.add(tr.tool_use_id)
+          if (tr.is_error) {
+            existing.erroredToolUseIDs.add(tr.tool_use_id)
+          }
+        }
+      }
+    }
+
+    if (msg.type === 'assistant' && Array.isArray(msg.message?.content)) {
+      for (const content of msg.message?.content ?? []) {
+        if (typeof content === 'string') continue
+        if (
+          'tool_use_id' in content &&
+          typeof (content as { tool_use_id: string }).tool_use_id === 'string'
+        ) {
+          existing.resolvedToolUseIDs.add(
+            (content as { tool_use_id: string }).tool_use_id,
+          )
+        }
+        if ((content.type as string) === 'advisor_tool_result') {
+          const result = content as {
+            tool_use_id: string
+            content: { type: string }
+          }
+          if (result.content.type === 'advisor_tool_result_error') {
+            existing.erroredToolUseIDs.add(result.tool_use_id)
+          }
+        }
+      }
+    }
+
+    if (isHookAttachmentMessage(msg)) {
+      const toolUseID = msg.attachment.toolUseID
+      const hookEvent = msg.attachment.hookEvent
+      const hookName = (msg.attachment as HookAttachmentWithName).hookName
+      if (hookName !== undefined) {
+        let byHookEvent = existing.resolvedHookCounts.get(toolUseID)
+        if (!byHookEvent) {
+          byHookEvent = new Map()
+          existing.resolvedHookCounts.set(toolUseID, byHookEvent)
+        }
+        byHookEvent.set(hookEvent, (byHookEvent.get(hookEvent) ?? 0) + 1)
+      }
+    }
+  }
+
+  existing.normalizedMessageCount = normalizedMessages.length
+
+  // Mark orphaned server_tool_use / mcp_tool_use blocks as errored.
+  // Only scan the new normalizedMessages since the previous count —
+  // existing entries were already checked by a prior full build.
+  const lastMsg = messages.at(-1)
+  const lastAssistantMsgId =
+    lastMsg?.type === 'assistant' ? lastMsg.message?.id : undefined
+  for (let i = newNormalizedStart; i < normalizedMessages.length; i++) {
+    const msg = normalizedMessages[i]!
+    if (msg.type !== 'assistant') continue
+    const aMsg = msg as AssistantMessage
+    if (aMsg.message.id === lastAssistantMsgId) continue
+    if (!Array.isArray(aMsg.message.content)) continue
+    for (const content of aMsg.message.content) {
+      if (
+        typeof content !== 'string' &&
+        ((content.type as string) === 'server_tool_use' ||
+          (content.type as string) === 'mcp_tool_use') &&
+        !existing.resolvedToolUseIDs.has((content as { id: string }).id)
+      ) {
+        const id = (content as { id: string }).id
+        existing.resolvedToolUseIDs.add(id)
+        existing.erroredToolUseIDs.add(id)
+      }
+    }
+  }
+
+  return existing
+}
+
 /**
 * Compute a lightweight structural fingerprint for buildMessageLookups caching.
 * Only captures information that affects lookup results (types, IDs, counts),
--- a/src/utils/readFileInRange.ts
+++ b/src/utils/readFileInRange.ts
@@ -101,6 +101,20 @@ export async function readFileInRange(
      throw new FileTooLargeError(stats.size, maxBytes)
    }

+    // For targeted reads of moderately large files, prefer streaming to
+    // avoid loading the full file into memory when only a slice is needed.
+    const isTargetedRead = offset > 0 || maxLines !== undefined
+    if (isTargetedRead && stats.size > FAST_PATH_MAX_SIZE / 4) {
+      return readFileInRangeStreaming(
+        filePath,
+        offset,
+        maxLines,
+        maxBytes,
+        truncateOnByteLimit,
+        signal,
+      )
+    }
+
    const text = await readFile(filePath, { encoding: 'utf8', signal })
    return readFileInRangeFast(
      text,