fix: 内存优化 — FileReadTool 100KB 上限、lookups 缓存、microcompact 替换清理

- FileReadTool maxResultSizeChars 从 Infinity 改为 100KB,大文件持久化到磁盘
- Messages.tsx 新增 computeMessageStructureKey 缓存,流式 delta 时跳过 8 个 Map/Set 重建
- microcompact 返回 clearedToolUseIds,query.ts 消费后清理 replacements Map 释放原始字符串
- 更新内存分析报告 Round 5 和 file-operations 文档

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
claude-code-best
2026-05-02 11:21:22 +08:00
parent 3eba5ade1a
commit f724300079
8 changed files with 205 additions and 32 deletions

View File

@@ -34,6 +34,8 @@ import { isFullscreenEnvEnabled } from '../utils/fullscreen.js';
import { applyGrouping } from '../utils/groupToolUses.js';
import {
buildMessageLookups,
computeMessageStructureKey,
type MessageLookups,
createAssistantMessage,
deriveUUID,
getMessagesAfterCompactBoundary,
@@ -510,6 +512,12 @@ const MessagesImpl = ({
// comment above for why this replaced count-based slicing.
const sliceAnchorRef = useRef<SliceAnchor>(null);
// Cache for buildMessageLookups: avoids rebuilding 8 Maps/Sets when only
// message content changed during streaming (text/thinking deltas). The key
// captures only structural info (types, IDs), so content-only deltas skip
// the rebuild entirely.
const lookupsCacheRef = useRef<{ key: string; lookups: MessageLookups } | null>(null);
// Expensive message transforms — filter, reorder, group, collapse, lookups.
// All O(n) over 27k messages. Split from the renderRange slice so scrolling
// (which only changes renderRange) doesn't re-run these. Previously this
@@ -578,7 +586,14 @@ const MessagesImpl = ({
verbose,
);
const lookups = buildMessageLookups(normalizedMessages, messagesToShow as MessageType[]);
const lookupsKey = computeMessageStructureKey(normalizedMessages, messagesToShow as MessageType[]);
let lookups: MessageLookups;
if (lookupsCacheRef.current && lookupsCacheRef.current.key === lookupsKey) {
lookups = lookupsCacheRef.current.lookups;
} else {
lookups = buildMessageLookups(normalizedMessages, messagesToShow as MessageType[]);
lookupsCacheRef.current = { key: lookupsKey, lookups };
}
const hiddenMessageCount = messagesToShowNotTruncated.length - MAX_MESSAGES_TO_SHOW_IN_TRANSCRIPT_MODE;

View File

@@ -529,6 +529,16 @@ async function* queryLoop(
querySource,
)
messagesForQuery = microcompactResult.messages
// Release original strings from contentReplacementState.replacements for
// tool results whose content was replaced with the cleared message.
if (microcompactResult.clearedToolUseIds?.length) {
const replacements = toolUseContext?.contentReplacementState?.replacements
if (replacements) {
for (const id of microcompactResult.clearedToolUseIds) {
replacements.delete(id)
}
}
}
// For cached microcompact (cache editing), defer boundary message until after
// the API response so we can use actual cache_deleted_input_tokens.
// Gated behind feature() so the string is eliminated from external builds.

View File

@@ -217,6 +217,10 @@ export type MicrocompactResult = {
compactionInfo?: {
pendingCacheEdits?: PendingCacheEdits
}
// Tool use IDs whose content was replaced with the cleared message.
// Callers should remove these from contentReplacementState.replacements
// to release the original strings from memory.
clearedToolUseIds?: string[]
}
/**
@@ -528,5 +532,5 @@ function maybeTimeBasedMicrocompact(
notifyCacheDeletion(querySource)
}
return { messages: result }
return { messages: result, clearedToolUseIds: [...clearSet] }
}

View File

@@ -1397,6 +1397,54 @@ export function buildMessageLookups(
}
}
/**
* Compute a lightweight structural fingerprint for buildMessageLookups caching.
* Only captures information that affects lookup results (types, IDs, counts),
* not content. Returns an empty string when the arrays are structurally empty.
*
* O(n) but allocates only a string — much cheaper than the 8 Maps/Sets that
* buildMessageLookups creates on every call.
*/
export function computeMessageStructureKey(
normalizedMessages: NormalizedMessage[],
messages: Message[],
): string {
const parts: string[] = [
String(normalizedMessages.length),
'|',
String(messages.length),
]
for (const msg of messages) {
parts.push(msg.type[0])
if (msg.type === 'assistant') {
const aMsg = msg as AssistantMessage
const content = aMsg.message?.content
if (Array.isArray(content)) {
for (const block of content) {
if (typeof block !== 'string' && block.type === 'tool_use') {
parts.push('t', (block as ToolUseBlock).id)
}
}
}
} else if (msg.type === 'user') {
const content = (msg as UserMessage).message?.content
if (Array.isArray(content)) {
for (const block of content) {
if (typeof block !== 'string' && block.type === 'tool_result') {
parts.push('r', (block as ToolResultBlockParam).tool_use_id)
}
}
}
}
}
for (const msg of normalizedMessages) {
if (msg.type === 'progress') {
parts.push('p', (msg as ProgressMessage).parentToolUseID as string)
}
}
return parts.join(',')
}
/** Empty lookups for static rendering contexts that don't need real lookups. */
export const EMPTY_LOOKUPS: MessageLookups = {
siblingToolUseIDs: new Map(),

View File

@@ -56,9 +56,9 @@ export function getPersistenceThreshold(
toolName: string,
declaredMaxResultSizeChars: number,
): number {
// Infinity = hard opt-out. Read self-bounds via maxTokens; persisting its
// output to a file the model reads back with Read is circular. Checked
// before the GB override so tengu_satin_quoll can't force it back on.
// Infinity = hard opt-out (reserved for tools that self-bound via other
// mechanisms). Checked before the GB override so tengu_satin_quoll can't
// force it back on.
if (!Number.isFinite(declaredMaxResultSizeChars)) {
return declaredMaxResultSizeChars
}
@@ -813,11 +813,12 @@ export async function enforceToolResultBudget(
continue
}
// Tools with maxResultSizeChars: Infinity (Read) — never persist.
// Mark as seen (frozen) so the decision sticks across turns. They don't
// count toward freshSize; if that lets the group slip under budget and
// the wire message is still large, that's the contract — Read's own
// maxTokens is the bound, not this wrapper.
// Tools with maxResultSizeChars: Infinity — never persist (reserved for
// tools that self-bound via other mechanisms). Mark as seen (frozen) so
// the decision sticks across turns. They don't count toward freshSize; if
// that lets the group slip under budget and the wire message is still
// large, that's the contract — the tool's own maxTokens is the bound, not
// this wrapper.
const skipped = fresh.filter(c => shouldSkip(c.toolUseId))
skipped.forEach(c => state.seenIds.add(c.toolUseId))
const eligible = fresh.filter(c => !shouldSkip(c.toolUseId))