import { z } from 'zod/v4' import type { ToolResultBlockParam } from 'src/Tool.js' import { buildTool } from 'src/Tool.js' import { lazySchema } from 'src/utils/lazySchema.js' import { tokenCountWithEstimation } from 'src/utils/tokens.js' import { getStats, isContextCollapseEnabled, } from 'src/services/contextCollapse/index.js' import { isSessionMemoryInitialized } from 'src/services/SessionMemory/sessionMemoryUtils.js' const CTX_INSPECT_TOOL_NAME = 'CtxInspect' const inputSchema = lazySchema(() => z.strictObject({ query: z .string() .optional() .describe('Optional query to filter context entries. If omitted, returns a summary of all context.'), }), ) type InputSchema = ReturnType type CtxInput = z.infer type CtxOutput = { total_tokens: number message_count: number context_window_model: string prompt_caching_enabled: boolean session_memory_enabled: boolean context_collapse_enabled: boolean summary: string } export const CtxInspectTool = buildTool({ name: CTX_INSPECT_TOOL_NAME, searchHint: 'context inspect tokens usage messages window collapse', maxResultSizeChars: 50_000, strict: true, get inputSchema(): InputSchema { return inputSchema() }, async description() { return 'Inspect the current context window contents and token usage' }, async prompt() { return `Inspect the current conversation context. Shows token usage, message count, and a breakdown of what's consuming context space. Use this to understand your context budget before deciding whether to snip old messages or adjust your approach.` }, isConcurrencySafe() { return true }, isReadOnly() { return true }, userFacingName() { return 'CtxInspect' }, renderToolUseMessage() { return 'Context Inspect' }, mapToolResultToToolResultBlockParam( content: CtxOutput, toolUseID: string, ): ToolResultBlockParam { return { tool_use_id: toolUseID, type: 'tool_result', content: `Context: ${content.total_tokens} tokens, ${content.message_count} messages\n${content.summary}`, } }, async call(input: CtxInput, context) { const messages = context.messages ?? [] const model = context.options?.mainLoopModel ?? 'unknown' const totalTokens = tokenCountWithEstimation(messages) const collapseEnabled = isContextCollapseEnabled() const collapseStats = getStats() const focused = input.query?.trim() const sessionMemoryEnabled = isSessionMemoryInitialized() // Prompt caching is an API-level feature controlled by the provider, not // a user-facing toggle. Report as enabled only for providers known to // support Anthropic-style prompt caching (first-party, Bedrock, Vertex). const promptCachingEnabled = !model.startsWith('openai/') && !model.startsWith('grok/') && !model.startsWith('gemini/') const summaryParts = [ focused ? `Focus: ${focused}` : 'Overall context summary', `Model context: ${model}`, `Prompt caching: ${promptCachingEnabled ? 'enabled' : 'disabled'}`, `Session memory: ${sessionMemoryEnabled ? 'enabled' : 'disabled'}`, `Context collapse: ${collapseEnabled ? 'enabled' : 'disabled'}`, ] if (collapseEnabled) { summaryParts.push( `Collapse spans: ${collapseStats.collapsedSpans} committed, ${collapseStats.stagedSpans} staged, ${collapseStats.collapsedMessages} messages summarized`, ) } return { data: { total_tokens: totalTokens, message_count: messages.length, context_window_model: model, prompt_caching_enabled: promptCachingEnabled, session_memory_enabled: sessionMemoryEnabled, context_collapse_enabled: collapseEnabled, summary: summaryParts.join('\n'), }, } }, })