fix: 内存优化 — 预测性 compact 阈值、增量 lookups orphaned 修复、deferred slice 引用优化

- P0: REPL.tsx 用 useMemo 包裹 deferred messages slice，避免每次渲染创建新数组引用导致不必要的后台重渲染 - P1: 预测性 compact 阈值改用 effectiveContextWindow - growth，消除与 autocompact buffer 的双重预留；TOOL_RESULT_GROWTH_ESTIMATE 从 20K 降至 15K - P2: 增量 lookups 增加 lastAssistantMsgId 一致性检查和 orphaned server_tool_use/mcp_tool_use 扫描，防止 UI 永久 loading - P3: reactiveCompact 类型断言改为直接使用 'compact' 字面量 - docs: CLAUDE.md 统一使用 precheck 替代分散的 typecheck/lint/test 命令 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-17 13:55:50 +00:00 · 2026-05-02 20:32:00 +08:00
parent 4cbf406c70
commit 198c09b263
10 changed files with 432 additions and 37 deletions
--- a/src/services/compact/autoCompact.ts
+++ b/src/services/compact/autoCompact.ts
@@ -64,6 +64,35 @@ export const WARNING_THRESHOLD_BUFFER_TOKENS = 20_000
 export const ERROR_THRESHOLD_BUFFER_TOKENS = 20_000
 export const MANUAL_COMPACT_BUFFER_TOKENS = 3_000

+// Conservative estimate for tool result growth per turn.
+// Typical tool results (file reads, grep, bash) average ~5-10K tokens;
+// occasional large reads can spike to 20K+.
+const TOOL_RESULT_GROWTH_ESTIMATE = 15_000
+
+/**
+ * Context-aware autocompact buffer. Larger context windows need more
+ * headroom because a single turn can produce proportionally more tokens
+ * (longer model outputs + larger tool results).
+ */
+export function getAutocompactBufferTokens(model: string): number {
+  const effectiveWindow = getEffectiveContextWindowSize(model)
+  if (effectiveWindow >= 800_000) return 50_000
+  if (effectiveWindow >= 400_000) return 30_000
+  return AUTOCOMPACT_BUFFER_TOKENS
+}
+
+/**
+ * Estimate the maximum token growth a single turn can produce.
+ * Used for predictive autocompact checks before the API call.
+ */
+export function estimateMaxTurnGrowth(model: string): number {
+  const maxOutput = Math.min(
+    getMaxOutputTokensForModel(model),
+    MAX_OUTPUT_TOKENS_FOR_SUMMARY,
+  )
+  return maxOutput + TOOL_RESULT_GROWTH_ESTIMATE
+}
+
 // Stop trying autocompact after this many consecutive failures.
 // BQ 2026-03-10: 1,279 sessions had 50+ consecutive failures (up to 3,272)
 // in a single session, wasting ~250K API calls/day globally.
@@ -73,7 +102,7 @@ export function getAutoCompactThreshold(model: string): number {
  const effectiveContextWindow = getEffectiveContextWindowSize(model)

  const autocompactThreshold =
-    effectiveContextWindow - AUTOCOMPACT_BUFFER_TOKENS
+    effectiveContextWindow - getAutocompactBufferTokens(model)

  // Override for easier testing of autocompact
  const envPercent = process.env.CLAUDE_AUTOCOMPACT_PCT_OVERRIDE