fix: 内存优化 — 预测性 compact 阈值、增量 lookups orphaned 修复、deferred slice 引用优化

- P0: REPL.tsx 用 useMemo 包裹 deferred messages slice,避免每次渲染创建新数组引用导致不必要的后台重渲染
- P1: 预测性 compact 阈值改用 effectiveContextWindow - growth,消除与 autocompact buffer 的双重预留;TOOL_RESULT_GROWTH_ESTIMATE 从 20K 降至 15K
- P2: 增量 lookups 增加 lastAssistantMsgId 一致性检查和 orphaned server_tool_use/mcp_tool_use 扫描,防止 UI 永久 loading
- P3: reactiveCompact 类型断言改为直接使用 'compact' 字面量
- docs: CLAUDE.md 统一使用 precheck 替代分散的 typecheck/lint/test 命令

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
claude-code-best
2026-05-02 20:32:00 +08:00
parent 4cbf406c70
commit 198c09b263
10 changed files with 432 additions and 37 deletions

View File

@@ -64,6 +64,35 @@ export const WARNING_THRESHOLD_BUFFER_TOKENS = 20_000
export const ERROR_THRESHOLD_BUFFER_TOKENS = 20_000
export const MANUAL_COMPACT_BUFFER_TOKENS = 3_000
// Conservative estimate for tool result growth per turn.
// Typical tool results (file reads, grep, bash) average ~5-10K tokens;
// occasional large reads can spike to 20K+.
const TOOL_RESULT_GROWTH_ESTIMATE = 15_000
/**
* Context-aware autocompact buffer. Larger context windows need more
* headroom because a single turn can produce proportionally more tokens
* (longer model outputs + larger tool results).
*/
export function getAutocompactBufferTokens(model: string): number {
const effectiveWindow = getEffectiveContextWindowSize(model)
if (effectiveWindow >= 800_000) return 50_000
if (effectiveWindow >= 400_000) return 30_000
return AUTOCOMPACT_BUFFER_TOKENS
}
/**
* Estimate the maximum token growth a single turn can produce.
* Used for predictive autocompact checks before the API call.
*/
export function estimateMaxTurnGrowth(model: string): number {
const maxOutput = Math.min(
getMaxOutputTokensForModel(model),
MAX_OUTPUT_TOKENS_FOR_SUMMARY,
)
return maxOutput + TOOL_RESULT_GROWTH_ESTIMATE
}
// Stop trying autocompact after this many consecutive failures.
// BQ 2026-03-10: 1,279 sessions had 50+ consecutive failures (up to 3,272)
// in a single session, wasting ~250K API calls/day globally.
@@ -73,7 +102,7 @@ export function getAutoCompactThreshold(model: string): number {
const effectiveContextWindow = getEffectiveContextWindowSize(model)
const autocompactThreshold =
effectiveContextWindow - AUTOCOMPACT_BUFFER_TOKENS
effectiveContextWindow - getAutocompactBufferTokens(model)
// Override for easier testing of autocompact
const envPercent = process.env.CLAUDE_AUTOCOMPACT_PCT_OVERRIDE