fix: 内存优化 — 预测性 compact 阈值、增量 lookups orphaned 修复、deferred slice 引用优化

- P0: REPL.tsx 用 useMemo 包裹 deferred messages slice,避免每次渲染创建新数组引用导致不必要的后台重渲染
- P1: 预测性 compact 阈值改用 effectiveContextWindow - growth,消除与 autocompact buffer 的双重预留;TOOL_RESULT_GROWTH_ESTIMATE 从 20K 降至 15K
- P2: 增量 lookups 增加 lastAssistantMsgId 一致性检查和 orphaned server_tool_use/mcp_tool_use 扫描,防止 UI 永久 loading
- P3: reactiveCompact 类型断言改为直接使用 'compact' 字面量
- docs: CLAUDE.md 统一使用 precheck 替代分散的 typecheck/lint/test 命令

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
claude-code-best
2026-05-02 20:32:00 +08:00
parent 4cbf406c70
commit 198c09b263
10 changed files with 432 additions and 37 deletions

View File

@@ -18,6 +18,7 @@ import type { Tools } from '../Tool.js';
import { findToolByName } from '../Tool.js';
import type { AgentDefinitionsResult } from '@claude-code-best/builtin-tools/tools/AgentTool/loadAgentsDir.js';
import type {
AssistantMessage,
Message as MessageType,
NormalizedMessage,
ProgressMessage as ProgressMessageType,
@@ -36,6 +37,7 @@ import {
buildMessageLookups,
computeMessageStructureKey,
type MessageLookups,
updateMessageLookupsIncremental,
createAssistantMessage,
deriveUUID,
getMessagesAfterCompactBoundary,
@@ -516,7 +518,13 @@ const MessagesImpl = ({
// message content changed during streaming (text/thinking deltas). The key
// captures only structural info (types, IDs), so content-only deltas skip
// the rebuild entirely.
const lookupsCacheRef = useRef<{ key: string; lookups: MessageLookups } | null>(null);
const lookupsCacheRef = useRef<{
key: string;
lookups: MessageLookups;
normalizedCount: number;
messageCount: number;
lastAssistantMsgId: string | undefined;
} | null>(null);
// Expensive message transforms — filter, reorder, group, collapse, lookups.
// All O(n) over 27k messages. Split from the renderRange slice so scrolling
@@ -587,12 +595,57 @@ const MessagesImpl = ({
);
const lookupsKey = computeMessageStructureKey(normalizedMessages, messagesToShow as MessageType[]);
const currentLastAssistantMsgId = (() => {
const lastMsg = (messagesToShow as MessageType[]).at(-1);
return lastMsg?.type === 'assistant' ? (lastMsg as AssistantMessage).message?.id : undefined;
})();
let lookups: MessageLookups;
if (lookupsCacheRef.current && lookupsCacheRef.current.key === lookupsKey) {
lookups = lookupsCacheRef.current.lookups;
} else if (
lookupsCacheRef.current &&
normalizedMessages.length >= lookupsCacheRef.current.normalizedCount &&
(messagesToShow as MessageType[]).length >= lookupsCacheRef.current.messageCount &&
// If lastAssistantMsgId changed, previous "in-progress" assistant may
// now be orphaned — force a full rebuild to pick up the new status.
lookupsCacheRef.current.lastAssistantMsgId === currentLastAssistantMsgId
) {
// Try incremental update when only new messages were appended
const updated = updateMessageLookupsIncremental(
lookupsCacheRef.current.lookups,
lookupsCacheRef.current.normalizedCount,
lookupsCacheRef.current.messageCount,
normalizedMessages,
messagesToShow as MessageType[],
);
if (updated) {
lookups = updated;
lookupsCacheRef.current = {
key: lookupsKey,
lookups,
normalizedCount: normalizedMessages.length,
messageCount: (messagesToShow as MessageType[]).length,
lastAssistantMsgId: currentLastAssistantMsgId,
};
} else {
lookups = buildMessageLookups(normalizedMessages, messagesToShow as MessageType[]);
lookupsCacheRef.current = {
key: lookupsKey,
lookups,
normalizedCount: normalizedMessages.length,
messageCount: (messagesToShow as MessageType[]).length,
lastAssistantMsgId: currentLastAssistantMsgId,
};
}
} else {
lookups = buildMessageLookups(normalizedMessages, messagesToShow as MessageType[]);
lookupsCacheRef.current = { key: lookupsKey, lookups };
lookupsCacheRef.current = {
key: lookupsKey,
lookups,
normalizedCount: normalizedMessages.length,
messageCount: (messagesToShow as MessageType[]).length,
lastAssistantMsgId: currentLastAssistantMsgId,
};
}
const hiddenMessageCount = messagesToShowNotTruncated.length - MAX_MESSAGES_TO_SHOW_IN_TRANSCRIPT_MODE;

View File

@@ -7,6 +7,9 @@ import type { CanUseToolFn } from './hooks/useCanUseTool.js'
import { FallbackTriggeredError } from './services/api/withRetry.js'
import {
calculateTokenWarningState,
estimateMaxTurnGrowth,
getAutoCompactThreshold,
getEffectiveContextWindowSize,
isAutoCompactEnabled,
type AutoCompactTrackingState,
} from './services/compact/autoCompact.js'
@@ -474,7 +477,7 @@ async function* queryLoop(
queryTracking,
}
let messagesForQuery = [...getMessagesAfterCompactBoundary(messages)]
let messagesForQuery = getMessagesAfterCompactBoundary(messages)
let tracking = autoCompactTracking
@@ -769,6 +772,48 @@ async function* queryLoop(
}
}
// Predictive autocompact: estimate if this turn's growth will push
// us past the context window. Uses effectiveContextWindow directly
// (without the autocompact buffer) to avoid double-reserving with
// getAutoCompactThreshold which already subtracts buffer.
if (!compactionResult && isAutoCompactEnabled()) {
const model = toolUseContext.options.mainLoopModel
const currentTokens =
tokenCountWithEstimation(messagesForQuery) - snipTokensFreed
const estimatedGrowth = estimateMaxTurnGrowth(model)
const predictiveThreshold =
getEffectiveContextWindowSize(model) - estimatedGrowth
if (currentTokens > predictiveThreshold) {
const predictiveResult = await deps.autocompact(
messagesForQuery,
toolUseContext,
{
systemPrompt,
userContext,
systemContext,
toolUseContext,
forkContextMessages: messagesForQuery,
},
querySource,
tracking,
snipTokensFreed,
)
if (predictiveResult.compactionResult) {
messagesForQuery = buildPostCompactMessages(
predictiveResult.compactionResult,
)
snipTokensFreed = 0
tracking = tracking
? {
...tracking,
compacted: true,
consecutiveFailures: predictiveResult.consecutiveFailures ?? 0,
}
: tracking
}
}
}
let attemptWithFallback = true
queryCheckpoint('query_api_loop_start')
@@ -1142,7 +1187,7 @@ async function* queryLoop(
// Execute post-sampling hooks after model response is complete
if (assistantMessages.length > 0) {
void executePostSamplingHooks(
[...messagesForQuery, ...assistantMessages],
messagesForQuery.concat(assistantMessages),
systemPrompt,
userContext,
systemContext,
@@ -1864,11 +1909,10 @@ async function* queryLoop(
userContext,
systemContext,
toolUseContext,
forkContextMessages: [
...messagesForQuery,
...assistantMessages,
...toolResults,
],
forkContextMessages: messagesForQuery.concat(
assistantMessages,
toolResults,
),
})
}
}
@@ -1885,7 +1929,7 @@ async function* queryLoop(
queryCheckpoint('query_recursive_call')
const next: State = {
messages: [...messagesForQuery, ...assistantMessages, ...toolResults],
messages: messagesForQuery.concat(assistantMessages, toolResults),
toolUseContext: toolUseContextWithQueryTracking,
autoCompactTracking: tracking,
turnCount: nextTurnCount,

View File

@@ -1566,7 +1566,15 @@ export function REPL({
// Deferred messages for the Messages component — renders at transition
// priority so the reconciler yields every 5ms, keeping input responsive
// while the expensive message processing pipeline runs.
const deferredMessages = useDeferredValue(messages);
// Cap at 500 messages to limit memory double-buffering. The bypass
// at display-time uses sync messages during streaming and non-loading,
// so this cap only affects reduced-motion scenarios.
const DEFERRED_CAP = 500;
const cappedMessages = React.useMemo(
() => (messages.length > DEFERRED_CAP ? messages.slice(-DEFERRED_CAP) : messages),
[messages],
);
const deferredMessages = useDeferredValue(cappedMessages);
const deferredBehind = messages.length - deferredMessages.length;
if (deferredBehind > 0) {
logForDebugging(

View File

@@ -64,6 +64,35 @@ export const WARNING_THRESHOLD_BUFFER_TOKENS = 20_000
export const ERROR_THRESHOLD_BUFFER_TOKENS = 20_000
export const MANUAL_COMPACT_BUFFER_TOKENS = 3_000
// Conservative estimate for tool result growth per turn.
// Typical tool results (file reads, grep, bash) average ~5-10K tokens;
// occasional large reads can spike to 20K+.
const TOOL_RESULT_GROWTH_ESTIMATE = 15_000
/**
* Context-aware autocompact buffer. Larger context windows need more
* headroom because a single turn can produce proportionally more tokens
* (longer model outputs + larger tool results).
*/
export function getAutocompactBufferTokens(model: string): number {
const effectiveWindow = getEffectiveContextWindowSize(model)
if (effectiveWindow >= 800_000) return 50_000
if (effectiveWindow >= 400_000) return 30_000
return AUTOCOMPACT_BUFFER_TOKENS
}
/**
* Estimate the maximum token growth a single turn can produce.
* Used for predictive autocompact checks before the API call.
*/
export function estimateMaxTurnGrowth(model: string): number {
const maxOutput = Math.min(
getMaxOutputTokensForModel(model),
MAX_OUTPUT_TOKENS_FOR_SUMMARY,
)
return maxOutput + TOOL_RESULT_GROWTH_ESTIMATE
}
// Stop trying autocompact after this many consecutive failures.
// BQ 2026-03-10: 1,279 sessions had 50+ consecutive failures (up to 3,272)
// in a single session, wasting ~250K API calls/day globally.
@@ -73,7 +102,7 @@ export function getAutoCompactThreshold(model: string): number {
const effectiveContextWindow = getEffectiveContextWindowSize(model)
const autocompactThreshold =
effectiveContextWindow - AUTOCOMPACT_BUFFER_TOKENS
effectiveContextWindow - getAutocompactBufferTokens(model)
// Override for easier testing of autocompact
const envPercent = process.env.CLAUDE_AUTOCOMPACT_PCT_OVERRIDE

View File

@@ -334,13 +334,12 @@ export type RecompactionInfo = {
* Order: boundaryMarker, summaryMessages, messagesToKeep, attachments, hookResults
*/
export function buildPostCompactMessages(result: CompactionResult): Message[] {
return [
result.boundaryMarker,
...result.summaryMessages,
...(result.messagesToKeep ?? []),
...result.attachments,
...result.hookResults,
]
return ([result.boundaryMarker] as Message[]).concat(
result.summaryMessages,
result.messagesToKeep ?? [],
result.attachments,
result.hookResults,
)
}
/**

View File

@@ -1,25 +1,97 @@
// Auto-generated stub — replace with real implementation
export {}
import type { Message } from 'src/types/message'
import type { CompactionResult } from './compact.js'
import { isEnvTruthy } from '../../utils/envUtils.js'
import {
isMediaSizeErrorMessage,
isPromptTooLongMessage,
} from '../api/errors.js'
import type { AssistantMessage, Message } from '../../types/message.js'
import { type CompactionResult, compactConversation } from './compact.js'
import { logError } from '../../utils/log.js'
import { logForDebugging } from '../../utils/debug.js'
import type { CacheSafeParams } from '../../utils/forkedAgent.js'
export const isReactiveOnlyMode: () => boolean = () => false
export const reactiveCompactOnPromptTooLong: (
messages: Message[],
cacheSafeParams: Record<string, unknown>,
options: { customInstructions?: string; trigger?: string },
) => Promise<{ ok: boolean; reason?: string; result?: CompactionResult }> =
async () => ({ ok: false })
export const isReactiveCompactEnabled: () => boolean = () => false
export const isWithheldPromptTooLong: (message: Message) => boolean = () =>
false
export const isWithheldMediaSizeError: (message: Message) => boolean = () =>
false
async (messages, cacheSafeParams, options) => {
const params = cacheSafeParams as unknown as CacheSafeParams
try {
const result = await compactConversation(
messages,
params.toolUseContext,
params,
true,
options.customInstructions,
true,
{
isRecompactionInChain: false,
turnsSincePreviousCompact: 0,
autoCompactThreshold: 0,
querySource: 'compact',
},
)
return { ok: true, result }
} catch (error) {
logError(error)
return { ok: false, reason: String(error) }
}
}
export const isReactiveCompactEnabled: () => boolean = () => {
if (isEnvTruthy(process.env.DISABLE_COMPACT)) return false
return true
}
export const isWithheldPromptTooLong: (message: Message) => boolean =
message => {
if (message.type !== 'assistant' || !message.isApiErrorMessage) return false
return isPromptTooLongMessage(message as AssistantMessage)
}
export const isWithheldMediaSizeError: (message: Message) => boolean =
message => {
if (message.type !== 'assistant' || !message.isApiErrorMessage) return false
return isMediaSizeErrorMessage(message as AssistantMessage)
}
export const tryReactiveCompact: (params: {
hasAttempted: boolean
querySource: string
aborted: boolean
messages: Message[]
cacheSafeParams: Record<string, unknown>
}) => Promise<CompactionResult | null> = async () => null
}) => Promise<CompactionResult | null> = async ({
hasAttempted,
aborted,
messages,
cacheSafeParams,
}) => {
if (hasAttempted || aborted) return null
const params = cacheSafeParams as unknown as CacheSafeParams
try {
const result = await compactConversation(
messages,
params.toolUseContext,
params,
true,
undefined,
true,
{
isRecompactionInChain: false,
turnsSincePreviousCompact: 0,
autoCompactThreshold: 0,
},
)
return result
} catch (error) {
logForDebugging(
`reactiveCompact: emergency compaction failed — ${String(error)}`,
{ level: 'warn' },
)
logError(error)
return null
}
}

View File

@@ -1397,6 +1397,172 @@ export function buildMessageLookups(
}
}
/**
* Incrementally update lookups by processing only newly appended messages.
* Returns the same lookups object (mutated in place) if update succeeds,
* or null if a full rebuild is needed (e.g., messages were removed).
*/
export function updateMessageLookupsIncremental(
existing: MessageLookups,
previousNormalizedCount: number,
previousMessageCount: number,
normalizedMessages: NormalizedMessage[],
messages: Message[],
): MessageLookups | null {
// Safety check: only handle append-only case
if (
normalizedMessages.length < previousNormalizedCount ||
messages.length < previousMessageCount
) {
return null
}
// No new messages — nothing to do
if (
normalizedMessages.length === previousNormalizedCount &&
messages.length === previousMessageCount
) {
return existing
}
// Process new messages entries (pass 1: assistant tool_use blocks)
const newMessageStart = previousMessageCount
for (let i = newMessageStart; i < messages.length; i++) {
const msg = messages[i]!
if (msg.type === 'assistant') {
const aMsg = msg as AssistantMessage
const id = aMsg.message.id!
if (Array.isArray(aMsg.message.content)) {
const newToolUseIDs: string[] = []
for (const content of aMsg.message.content) {
if (typeof content !== 'string' && content.type === 'tool_use') {
const toolUseContent = content as ToolUseBlock
newToolUseIDs.push(toolUseContent.id)
existing.toolUseByToolUseID.set(
toolUseContent.id,
content as ToolUseBlockParam,
)
}
}
// Update sibling lookup: all tool_use IDs in this message share siblings
const allSiblings = new Set(newToolUseIDs)
for (const toolUseID of newToolUseIDs) {
existing.siblingToolUseIDs.set(toolUseID, allSiblings)
}
}
}
}
// Process new normalizedMessages entries (pass 2: progress, hooks, tool results)
const newNormalizedStart = previousNormalizedCount
for (let i = newNormalizedStart; i < normalizedMessages.length; i++) {
const msg = normalizedMessages[i]!
if (msg.type === 'progress') {
const toolUseID = msg.parentToolUseID as string
const existing2 = existing.progressMessagesByToolUseID.get(toolUseID)
if (existing2) {
existing2.push(msg as ProgressMessage)
} else {
existing.progressMessagesByToolUseID.set(toolUseID, [
msg as ProgressMessage,
])
}
const progressData = msg.data as { type: string; hookEvent: HookEvent }
if (progressData.type === 'hook_progress') {
const hookEvent = progressData.hookEvent
let byHookEvent = existing.inProgressHookCounts.get(toolUseID)
if (!byHookEvent) {
byHookEvent = new Map()
existing.inProgressHookCounts.set(toolUseID, byHookEvent)
}
byHookEvent.set(hookEvent, (byHookEvent.get(hookEvent) ?? 0) + 1)
}
}
if (msg.type === 'user' && Array.isArray(msg.message?.content)) {
for (const content of msg.message?.content ?? []) {
if (typeof content !== 'string' && content.type === 'tool_result') {
const tr = content as ToolResultBlockParam
existing.toolResultByToolUseID.set(tr.tool_use_id, msg)
existing.resolvedToolUseIDs.add(tr.tool_use_id)
if (tr.is_error) {
existing.erroredToolUseIDs.add(tr.tool_use_id)
}
}
}
}
if (msg.type === 'assistant' && Array.isArray(msg.message?.content)) {
for (const content of msg.message?.content ?? []) {
if (typeof content === 'string') continue
if (
'tool_use_id' in content &&
typeof (content as { tool_use_id: string }).tool_use_id === 'string'
) {
existing.resolvedToolUseIDs.add(
(content as { tool_use_id: string }).tool_use_id,
)
}
if ((content.type as string) === 'advisor_tool_result') {
const result = content as {
tool_use_id: string
content: { type: string }
}
if (result.content.type === 'advisor_tool_result_error') {
existing.erroredToolUseIDs.add(result.tool_use_id)
}
}
}
}
if (isHookAttachmentMessage(msg)) {
const toolUseID = msg.attachment.toolUseID
const hookEvent = msg.attachment.hookEvent
const hookName = (msg.attachment as HookAttachmentWithName).hookName
if (hookName !== undefined) {
let byHookEvent = existing.resolvedHookCounts.get(toolUseID)
if (!byHookEvent) {
byHookEvent = new Map()
existing.resolvedHookCounts.set(toolUseID, byHookEvent)
}
byHookEvent.set(hookEvent, (byHookEvent.get(hookEvent) ?? 0) + 1)
}
}
}
existing.normalizedMessageCount = normalizedMessages.length
// Mark orphaned server_tool_use / mcp_tool_use blocks as errored.
// Only scan the new normalizedMessages since the previous count —
// existing entries were already checked by a prior full build.
const lastMsg = messages.at(-1)
const lastAssistantMsgId =
lastMsg?.type === 'assistant' ? lastMsg.message?.id : undefined
for (let i = newNormalizedStart; i < normalizedMessages.length; i++) {
const msg = normalizedMessages[i]!
if (msg.type !== 'assistant') continue
const aMsg = msg as AssistantMessage
if (aMsg.message.id === lastAssistantMsgId) continue
if (!Array.isArray(aMsg.message.content)) continue
for (const content of aMsg.message.content) {
if (
typeof content !== 'string' &&
((content.type as string) === 'server_tool_use' ||
(content.type as string) === 'mcp_tool_use') &&
!existing.resolvedToolUseIDs.has((content as { id: string }).id)
) {
const id = (content as { id: string }).id
existing.resolvedToolUseIDs.add(id)
existing.erroredToolUseIDs.add(id)
}
}
}
return existing
}
/**
* Compute a lightweight structural fingerprint for buildMessageLookups caching.
* Only captures information that affects lookup results (types, IDs, counts),

View File

@@ -101,6 +101,20 @@ export async function readFileInRange(
throw new FileTooLargeError(stats.size, maxBytes)
}
// For targeted reads of moderately large files, prefer streaming to
// avoid loading the full file into memory when only a slice is needed.
const isTargetedRead = offset > 0 || maxLines !== undefined
if (isTargetedRead && stats.size > FAST_PATH_MAX_SIZE / 4) {
return readFileInRangeStreaming(
filePath,
offset,
maxLines,
maxBytes,
truncateOnByteLimit,
signal,
)
}
const text = await readFile(filePath, { encoding: 'utf8', signal })
return readFileInRangeFast(
text,