import type { Base64ImageSource } from '@anthropic-ai/sdk/resources/index.mjs' import { readdir, readFile as readFileAsync } from 'fs/promises' import * as path from 'path' import { posix, win32 } from 'path' import { z } from 'zod/v4' import { PDF_AT_MENTION_INLINE_THRESHOLD, PDF_EXTRACT_SIZE_THRESHOLD, PDF_MAX_PAGES_PER_READ, } from 'src/constants/apiLimits.js' import { hasBinaryExtension } from 'src/constants/files.js' import { memoryFreshnessNote } from 'src/memdir/memoryAge.js' import { getFeatureValue_CACHED_MAY_BE_STALE } from 'src/services/analytics/growthbook.js' import { logEvent } from 'src/services/analytics/index.js' import { type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, getFileExtensionForAnalytics, } from 'src/services/analytics/metadata.js' import { countTokensWithAPI, roughTokenCountEstimationForFileType, } from 'src/services/tokenEstimation.js' import { activateConditionalSkillsForPaths, addSkillDirectories, discoverSkillDirsForPaths, } from 'src/skills/loadSkillsDir.js' import type { ToolUseContext } from 'src/Tool.js' import { buildTool, type ToolDef } from 'src/Tool.js' import { getCwd } from 'src/utils/cwd.js' import { getClaudeConfigHomeDir, isEnvTruthy } from 'src/utils/envUtils.js' import { getErrnoCode, isENOENT } from 'src/utils/errors.js' import { addLineNumbers, FILE_NOT_FOUND_CWD_NOTE, findSimilarFile, getFileModificationTimeAsync, suggestPathUnderCwd, } from 'src/utils/file.js' import { logFileOperation } from 'src/utils/fileOperationAnalytics.js' import { formatFileSize } from 'src/utils/format.js' import { getFsImplementation } from 'src/utils/fsOperations.js' import { compressImageBufferWithTokenLimit, createImageMetadataText, detectImageFormatFromBuffer, type ImageDimensions, ImageResizeError, maybeResizeAndDownsampleImageBuffer, } from 'src/utils/imageResizer.js' import { lazySchema } from 'src/utils/lazySchema.js' import { logError } from 'src/utils/log.js' import { isAutoMemFile } from 'src/utils/memoryFileDetection.js' import { createUserMessage } from 'src/utils/messages.js' import { mapNotebookCellsToToolResult, readNotebook, } from 'src/utils/notebook.js' import { expandPath } from 'src/utils/path.js' import { extractPDFPages, getPDFPageCount, readPDF } from 'src/utils/pdf.js' import { isPDFExtension, isPDFSupported, parsePDFPageRange, } from 'src/utils/pdfUtils.js' import { checkReadPermissionForTool, matchingRuleForInput, } from 'src/utils/permissions/filesystem.js' import type { PermissionDecision } from 'src/utils/permissions/PermissionResult.js' import { matchWildcardPattern } from 'src/utils/permissions/shellRuleMatching.js' import { readFileInRange } from 'src/utils/readFileInRange.js' import { semanticNumber } from 'src/utils/semanticNumber.js' import { jsonStringify } from 'src/utils/slowOperations.js' import { BASH_TOOL_NAME } from '../BashTool/toolName.js' import { getDefaultFileReadingLimits } from './limits.js' import { DESCRIPTION, FILE_READ_TOOL_NAME, FILE_UNCHANGED_STUB, LINE_FORMAT_INSTRUCTION, OFFSET_INSTRUCTION_DEFAULT, OFFSET_INSTRUCTION_TARGETED, renderPromptTemplate, } from './prompt.js' import { getToolUseSummary, renderToolResultMessage, renderToolUseErrorMessage, renderToolUseMessage, renderToolUseTag, userFacingName, } from './UI.js' // Device files that would hang the process: infinite output or blocking input. // Checked by path only (no I/O). Safe devices like /dev/null are intentionally omitted. const BLOCKED_DEVICE_PATHS = new Set([ // Infinite output — never reach EOF '/dev/zero', '/dev/random', '/dev/urandom', '/dev/full', // Blocks waiting for input '/dev/stdin', '/dev/tty', '/dev/console', // Nonsensical to read '/dev/stdout', '/dev/stderr', // fd aliases for stdin/stdout/stderr '/dev/fd/0', '/dev/fd/1', '/dev/fd/2', ]) function isBlockedDevicePath(filePath: string): boolean { if (BLOCKED_DEVICE_PATHS.has(filePath)) return true // /proc/self/fd/0-2 and /proc//fd/0-2 are Linux aliases for stdio if ( filePath.startsWith('/proc/') && (filePath.endsWith('/fd/0') || filePath.endsWith('/fd/1') || filePath.endsWith('/fd/2')) ) return true return false } // Narrow no-break space (U+202F) used by some macOS versions in screenshot filenames const THIN_SPACE = String.fromCharCode(8239) /** * Resolves macOS screenshot paths that may have different space characters. * macOS uses either regular space or thin space (U+202F) before AM/PM in screenshot * filenames depending on the macOS version. This function tries the alternate space * character if the file doesn't exist with the given path. * * @param filePath - The normalized file path to resolve * @returns The path to the actual file on disk (may differ in space character) */ /** * For macOS screenshot paths with AM/PM, the space before AM/PM may be a * regular space or a thin space depending on the macOS version. Returns * the alternate path to try if the original doesn't exist, or undefined. */ function getAlternateScreenshotPath(filePath: string): string | undefined { const filename = path.basename(filePath) const amPmPattern = /^(.+)([ \u202F])(AM|PM)(\.png)$/ const match = filename.match(amPmPattern) if (!match) return undefined const currentSpace = match[2] const alternateSpace = currentSpace === ' ' ? THIN_SPACE : ' ' return filePath.replace( `${currentSpace}${match[3]}${match[4]}`, `${alternateSpace}${match[3]}${match[4]}`, ) } // File read listeners - allows other services to be notified when files are read type FileReadListener = (filePath: string, content: string) => void const fileReadListeners: FileReadListener[] = [] export function registerFileReadListener( listener: FileReadListener, ): () => void { fileReadListeners.push(listener) return () => { const i = fileReadListeners.indexOf(listener) if (i >= 0) fileReadListeners.splice(i, 1) } } export class MaxFileReadTokenExceededError extends Error { constructor( public tokenCount: number, public maxTokens: number, ) { super( `File content (${tokenCount} tokens) exceeds maximum allowed tokens (${maxTokens}). Use offset and limit parameters to read specific portions of the file, or search for specific content instead of reading the whole file.`, ) this.name = 'MaxFileReadTokenExceededError' } } // Common image extensions const IMAGE_EXTENSIONS = new Set(['png', 'jpg', 'jpeg', 'gif', 'webp']) /** * Detects if a file path is a session-related file for analytics logging. * Only matches files within the Claude config directory (e.g., ~/.claude). * Returns the type of session file or null if not a session file. */ function detectSessionFileType( filePath: string, ): 'session_memory' | 'session_transcript' | null { const configDir = getClaudeConfigHomeDir() // Only match files within the Claude config directory if (!filePath.startsWith(configDir)) { return null } // Normalize path to use forward slashes for consistent matching across platforms const normalizedPath = filePath.split(win32.sep).join(posix.sep) // Session memory files: ~/.claude/session-memory/*.md (including summary.md) if ( normalizedPath.includes('/session-memory/') && normalizedPath.endsWith('.md') ) { return 'session_memory' } // Session JSONL transcript files: ~/.claude/projects/*/*.jsonl if ( normalizedPath.includes('/projects/') && normalizedPath.endsWith('.jsonl') ) { return 'session_transcript' } return null } const inputSchema = lazySchema(() => z.strictObject({ file_path: z.string().describe('The absolute path to the file to read'), offset: semanticNumber(z.number().int().nonnegative().optional()).describe( 'The line number to start reading from. Only provide if the file is too large to read at once', ), limit: semanticNumber(z.number().int().positive().optional()).describe( 'The number of lines to read. Only provide if the file is too large to read at once.', ), pages: z .string() .optional() .describe( `Page range for PDF files (e.g., "1-5", "3", "10-20"). Only applicable to PDF files. Maximum ${PDF_MAX_PAGES_PER_READ} pages per request.`, ), }), ) type InputSchema = ReturnType export type Input = z.infer const outputSchema = lazySchema(() => { // Define the media types supported for images const imageMediaTypes = z.enum([ 'image/jpeg', 'image/png', 'image/gif', 'image/webp', ]) return z.discriminatedUnion('type', [ z.object({ type: z.literal('text'), file: z.object({ filePath: z.string().describe('The path to the file that was read'), content: z.string().describe('The content of the file'), numLines: z .number() .describe('Number of lines in the returned content'), startLine: z.number().describe('The starting line number'), totalLines: z.number().describe('Total number of lines in the file'), }), }), z.object({ type: z.literal('image'), file: z.object({ base64: z.string().describe('Base64-encoded image data'), type: imageMediaTypes.describe('The MIME type of the image'), originalSize: z.number().describe('Original file size in bytes'), dimensions: z .object({ originalWidth: z .number() .optional() .describe('Original image width in pixels'), originalHeight: z .number() .optional() .describe('Original image height in pixels'), displayWidth: z .number() .optional() .describe('Displayed image width in pixels (after resizing)'), displayHeight: z .number() .optional() .describe('Displayed image height in pixels (after resizing)'), }) .optional() .describe('Image dimension info for coordinate mapping'), }), }), z.object({ type: z.literal('notebook'), file: z.object({ filePath: z.string().describe('The path to the notebook file'), cells: z.array(z.any()).describe('Array of notebook cells'), }), }), z.object({ type: z.literal('pdf'), file: z.object({ filePath: z.string().describe('The path to the PDF file'), base64: z.string().describe('Base64-encoded PDF data'), originalSize: z.number().describe('Original file size in bytes'), }), }), z.object({ type: z.literal('parts'), file: z.object({ filePath: z.string().describe('The path to the PDF file'), originalSize: z.number().describe('Original file size in bytes'), count: z.number().describe('Number of pages extracted'), outputDir: z .string() .describe('Directory containing extracted page images'), }), }), z.object({ type: z.literal('file_unchanged'), file: z.object({ filePath: z.string().describe('The path to the file'), }), }), ]) }) type OutputSchema = ReturnType export type Output = z.infer export const FileReadTool = buildTool({ name: FILE_READ_TOOL_NAME, searchHint: 'read files, images, PDFs, notebooks', // Output is bounded by maxTokens (validateContentTokens). Results exceeding // 100KB are persisted to disk (reducing memory pressure in long sessions) // rather than kept in the message array indefinitely. maxResultSizeChars: 100_000, strict: true, async description() { return DESCRIPTION }, async prompt() { const limits = getDefaultFileReadingLimits() const maxSizeInstruction = limits.includeMaxSizeInPrompt ? `. Files larger than ${formatFileSize(limits.maxSizeBytes)} will return an error; use offset and limit for larger files` : '' const offsetInstruction = limits.targetedRangeNudge ? OFFSET_INSTRUCTION_TARGETED : OFFSET_INSTRUCTION_DEFAULT return renderPromptTemplate( pickLineFormatInstruction(), maxSizeInstruction, offsetInstruction, ) }, get inputSchema(): InputSchema { return inputSchema() }, get outputSchema(): OutputSchema { return outputSchema() }, userFacingName, getToolUseSummary, getActivityDescription(input) { const summary = getToolUseSummary(input) return summary ? `Reading ${summary}` : 'Reading file' }, isConcurrencySafe() { return true }, isReadOnly() { return true }, toAutoClassifierInput(input) { return input.file_path }, isSearchOrReadCommand() { return { isSearch: false, isRead: true } }, getPath({ file_path }): string { return file_path || getCwd() }, backfillObservableInput(input) { // hooks.mdx documents file_path as absolute; expand so hook allowlists // can't be bypassed via ~ or relative paths. if (typeof input.file_path === 'string') { input.file_path = expandPath(input.file_path) } }, async preparePermissionMatcher({ file_path }) { return pattern => matchWildcardPattern(pattern, file_path) }, async checkPermissions(input, context): Promise { const appState = context.getAppState() return checkReadPermissionForTool( FileReadTool, input, appState.toolPermissionContext, ) }, renderToolUseMessage, renderToolUseTag, renderToolResultMessage, // UI.tsx:140 — ALL types render summary chrome only: "Read N lines", // "Read image (42KB)". Never the content itself. The model-facing // serialization (below) sends content + line prefixes; UI shows none of it. extractSearchText() { return '' }, renderToolUseErrorMessage, async validateInput({ file_path, pages }, toolUseContext: ToolUseContext) { // Validate pages parameter (pure string parsing, no I/O) if (pages !== undefined) { const parsed = parsePDFPageRange(pages) if (!parsed) { return { result: false, message: `Invalid pages parameter: "${pages}". Use formats like "1-5", "3", or "10-20". Pages are 1-indexed.`, errorCode: 7, } } const rangeSize = parsed.lastPage === Infinity ? PDF_MAX_PAGES_PER_READ + 1 : parsed.lastPage - parsed.firstPage + 1 if (rangeSize > PDF_MAX_PAGES_PER_READ) { return { result: false, message: `Page range "${pages}" exceeds maximum of ${PDF_MAX_PAGES_PER_READ} pages per request. Please use a smaller range.`, errorCode: 8, } } } // Path expansion + deny rule check (no I/O) const fullFilePath = expandPath(file_path) const appState = toolUseContext.getAppState() const denyRule = matchingRuleForInput( fullFilePath, appState.toolPermissionContext, 'read', 'deny', ) if (denyRule !== null) { return { result: false, message: 'File is in a directory that is denied by your permission settings.', errorCode: 1, } } // SECURITY: UNC path check (no I/O) — defer filesystem operations // until after user grants permission to prevent NTLM credential leaks const isUncPath = fullFilePath.startsWith('\\\\') || fullFilePath.startsWith('//') if (isUncPath) { return { result: true } } // Binary extension check (string check on extension only, no I/O). // PDF, images, and SVG are excluded - this tool renders them natively. const ext = path.extname(fullFilePath).toLowerCase() if ( hasBinaryExtension(fullFilePath) && !isPDFExtension(ext) && !IMAGE_EXTENSIONS.has(ext.slice(1)) ) { return { result: false, message: `This tool cannot read binary files. The file appears to be a binary ${ext} file. Please use appropriate tools for binary file analysis.`, errorCode: 4, } } // Block specific device files that would hang (infinite output or blocking input). // This is a path-based check with no I/O — safe special files like /dev/null are allowed. if (isBlockedDevicePath(fullFilePath)) { return { result: false, message: `Cannot read '${file_path}': this device file would block or produce infinite output.`, errorCode: 9, } } return { result: true } }, async call( { file_path, offset = 1, limit = undefined, pages }, context, _canUseTool?, parentMessage?, ) { const { readFileState, fileReadingLimits } = context const defaults = getDefaultFileReadingLimits() const maxSizeBytes = fileReadingLimits?.maxSizeBytes ?? defaults.maxSizeBytes const maxTokens = fileReadingLimits?.maxTokens ?? defaults.maxTokens // Telemetry: track when callers override default read limits. // Only fires on override (low volume) — event count = override frequency. if (fileReadingLimits !== undefined) { logEvent('tengu_file_read_limits_override', { hasMaxTokens: fileReadingLimits.maxTokens !== undefined, hasMaxSizeBytes: fileReadingLimits.maxSizeBytes !== undefined, }) } const ext = path.extname(file_path).toLowerCase().slice(1) // Use expandPath for consistent path normalization with FileEditTool/FileWriteTool // (especially handles whitespace trimming and Windows path separators) const fullFilePath = expandPath(file_path) // Dedup: if we've already read this exact range and the file hasn't // changed on disk, return a stub instead of re-sending the full content. // The earlier Read tool_result is still in context — two full copies // waste cache_creation tokens on every subsequent turn. BQ proxy shows // ~18% of Read calls are same-file collisions (up to 2.64% of fleet // cache_creation). Only applies to text/notebook reads — images/PDFs // aren't cached in readFileState so won't match here. // // Ant soak: 1,734 dedup hits in 2h, no Read error regression. // Killswitch pattern: GB can disable if the stub message confuses // the model externally. // 3P default: killswitch off = dedup enabled. Client-side only — no // server support needed, safe for Bedrock/Vertex/Foundry. const dedupKillswitch = getFeatureValue_CACHED_MAY_BE_STALE( 'tengu_read_dedup_killswitch', false, ) const existingState = dedupKillswitch ? undefined : readFileState.get(fullFilePath) // Only dedup entries that came from a prior Read (offset is always set // by Read). Edit/Write store offset=undefined — their readFileState // entry reflects post-edit mtime, so deduping against it would wrongly // point the model at the pre-edit Read content. if ( existingState && !existingState.isPartialView && existingState.offset !== undefined ) { const rangeMatch = existingState.offset === offset && existingState.limit === limit if (rangeMatch) { try { const mtimeMs = await getFileModificationTimeAsync(fullFilePath) if (mtimeMs === existingState.timestamp) { const analyticsExt = getFileExtensionForAnalytics(fullFilePath) logEvent('tengu_file_read_dedup', { ...(analyticsExt !== undefined && { ext: analyticsExt }), }) return { data: { type: 'file_unchanged' as const, file: { filePath: file_path }, }, } } } catch { // stat failed — fall through to full read } } } // Discover skills from this file's path (fire-and-forget, non-blocking) // Skip in simple mode - no skills available const cwd = getCwd() if (!isEnvTruthy(process.env.CLAUDE_CODE_SIMPLE)) { const newSkillDirs = await discoverSkillDirsForPaths([fullFilePath], cwd) if (newSkillDirs.length > 0) { // Store discovered dirs for attachment display for (const dir of newSkillDirs) { context.dynamicSkillDirTriggers?.add(dir) } // Don't await - let skill loading happen in the background addSkillDirectories(newSkillDirs).catch(() => {}) } // Activate conditional skills whose path patterns match this file activateConditionalSkillsForPaths([fullFilePath], cwd) } try { return await callInner( file_path, fullFilePath, fullFilePath, ext, offset, limit, pages, maxSizeBytes, maxTokens, readFileState, context, parentMessage?.message.id, ) } catch (error) { // Handle file-not-found: suggest similar files const code = getErrnoCode(error) if (code === 'ENOENT') { // macOS screenshots may use a thin space or regular space before // AM/PM — try the alternate before giving up. const altPath = getAlternateScreenshotPath(fullFilePath) if (altPath) { try { return await callInner( file_path, fullFilePath, altPath, ext, offset, limit, pages, maxSizeBytes, maxTokens, readFileState, context, parentMessage?.message.id, ) } catch (altError) { if (!isENOENT(altError)) { throw altError } // Alt path also missing — fall through to friendly error } } const similarFilename = findSimilarFile(fullFilePath) const cwdSuggestion = await suggestPathUnderCwd(fullFilePath) let message = `File does not exist. ${FILE_NOT_FOUND_CWD_NOTE} ${getCwd()}.` if (cwdSuggestion) { message += ` Did you mean ${cwdSuggestion}?` } else if (similarFilename) { message += ` Did you mean ${similarFilename}?` } throw new Error(message) } throw error } }, mapToolResultToToolResultBlockParam(data, toolUseID) { switch (data.type) { case 'image': { return { tool_use_id: toolUseID, type: 'tool_result', content: [ { type: 'image', source: { type: 'base64', data: data.file.base64, media_type: data.file.type, }, }, ], } } case 'notebook': return mapNotebookCellsToToolResult(data.file.cells, toolUseID) case 'pdf': // Return PDF metadata only - the actual content is sent as a supplemental DocumentBlockParam return { tool_use_id: toolUseID, type: 'tool_result', content: `PDF file read: ${data.file.filePath} (${formatFileSize(data.file.originalSize)})`, } case 'parts': // Extracted page images are read and sent as image blocks in mapToolResultToAPIMessage return { tool_use_id: toolUseID, type: 'tool_result', content: `PDF pages extracted: ${data.file.count} page(s) from ${data.file.filePath} (${formatFileSize(data.file.originalSize)})`, } case 'file_unchanged': return { tool_use_id: toolUseID, type: 'tool_result', content: FILE_UNCHANGED_STUB, } case 'text': { let content: string if (data.file.content) { content = memoryFileFreshnessPrefix(data) + formatFileLines(data.file) } else { // Determine the appropriate warning message content = data.file.totalLines === 0 ? 'Warning: the file exists but the contents are empty.' : `Warning: the file exists but is shorter than the provided offset (${data.file.startLine}). The file has ${data.file.totalLines} lines.` } return { tool_use_id: toolUseID, type: 'tool_result', content, } } } }, } satisfies ToolDef) function pickLineFormatInstruction(): string { return LINE_FORMAT_INSTRUCTION } /** Format file content with line numbers. */ function formatFileLines(file: { content: string; startLine: number }): string { return addLineNumbers(file) } /** * Side-channel from call() to mapToolResultToToolResultBlockParam: mtime * of auto-memory files, keyed by the `data` object identity. Avoids * adding a presentation-only field to the output schema (which flows * into SDK types) and avoids sync fs in the mapper. WeakMap auto-GCs * when the data object becomes unreachable after rendering. */ const memoryFileMtimes = new WeakMap() function memoryFileFreshnessPrefix(data: object): string { const mtimeMs = memoryFileMtimes.get(data) if (mtimeMs === undefined) return '' return memoryFreshnessNote(mtimeMs) } async function validateContentTokens( content: string, ext: string, maxTokens?: number, ): Promise { const effectiveMaxTokens = maxTokens ?? getDefaultFileReadingLimits().maxTokens // Fast rejection: if raw byte count exceeds 4x the token limit, // no encoding can possibly fit (worst case is ~4 bytes/token). const byteLength = Buffer.byteLength(content) if (byteLength > effectiveMaxTokens * 4) { throw new MaxFileReadTokenExceededError( Math.ceil(byteLength / 4), effectiveMaxTokens, ) } const tokenEstimate = roughTokenCountEstimationForFileType(content, ext) if (!tokenEstimate || tokenEstimate <= effectiveMaxTokens / 4) return const tokenCount = await countTokensWithAPI(content) const effectiveCount = tokenCount ?? tokenEstimate if (effectiveCount > effectiveMaxTokens) { throw new MaxFileReadTokenExceededError(effectiveCount, effectiveMaxTokens) } } type ImageResult = { type: 'image' file: { base64: string type: Base64ImageSource['media_type'] originalSize: number dimensions?: ImageDimensions } } function createImageResponse( buffer: Buffer, mediaType: string, originalSize: number, dimensions?: ImageDimensions, ): ImageResult { return { type: 'image', file: { base64: buffer.toString('base64'), type: `image/${mediaType}` as Base64ImageSource['media_type'], originalSize, dimensions, }, } } /** * Inner implementation of call, separated to allow ENOENT handling in the outer call. */ async function callInner( file_path: string, fullFilePath: string, resolvedFilePath: string, ext: string, offset: number, limit: number | undefined, pages: string | undefined, maxSizeBytes: number, maxTokens: number, readFileState: ToolUseContext['readFileState'], context: ToolUseContext, messageId: string | undefined, ): Promise<{ data: Output newMessages?: ReturnType[] }> { // --- Notebook --- if (ext === 'ipynb') { const cells = await readNotebook(resolvedFilePath) const cellsJson = jsonStringify(cells) const cellsJsonBytes = Buffer.byteLength(cellsJson) if (cellsJsonBytes > maxSizeBytes) { throw new Error( `Notebook content (${formatFileSize(cellsJsonBytes)}) exceeds maximum allowed size (${formatFileSize(maxSizeBytes)}). ` + `Use ${BASH_TOOL_NAME} with jq to read specific portions:\n` + ` cat "${file_path}" | jq '.cells[:20]' # First 20 cells\n` + ` cat "${file_path}" | jq '.cells[100:120]' # Cells 100-120\n` + ` cat "${file_path}" | jq '.cells | length' # Count total cells\n` + ` cat "${file_path}" | jq '.cells[] | select(.cell_type=="code") | .source' # All code sources`, ) } await validateContentTokens(cellsJson, ext, maxTokens) // Get mtime via async stat (single call, no prior existence check) const stats = await getFsImplementation().stat(resolvedFilePath) readFileState.set(fullFilePath, { content: cellsJson, timestamp: Math.floor(stats.mtimeMs), offset, limit, }) context.nestedMemoryAttachmentTriggers?.add(fullFilePath) const data = { type: 'notebook' as const, file: { filePath: file_path, cells }, } logFileOperation({ operation: 'read', tool: 'FileReadTool', filePath: fullFilePath, content: cellsJson, }) return { data } } // --- Image (single read, no double-read) --- if (IMAGE_EXTENSIONS.has(ext)) { // Images have their own size limits (token budget + compression) — // don't apply the text maxSizeBytes cap. const data = await readImageWithTokenBudget(resolvedFilePath, maxTokens) context.nestedMemoryAttachmentTriggers?.add(fullFilePath) logFileOperation({ operation: 'read', tool: 'FileReadTool', filePath: fullFilePath, content: data.file.base64, }) const metadataText = data.file.dimensions ? createImageMetadataText(data.file.dimensions) : null return { data, ...(metadataText && { newMessages: [ createUserMessage({ content: metadataText, isMeta: true }), ], }), } } // --- PDF --- if (isPDFExtension(ext)) { if (pages) { const parsedRange = parsePDFPageRange(pages) const extractResult = await extractPDFPages( resolvedFilePath, parsedRange ?? undefined, ) if (!extractResult.success) { throw new Error((extractResult as any).error.message) } logEvent('tengu_pdf_page_extraction', { success: true, pageCount: (extractResult as any).data.file.count, fileSize: extractResult.data.file.originalSize, hasPageRange: true, }) logFileOperation({ operation: 'read', tool: 'FileReadTool', filePath: fullFilePath, content: `PDF pages ${pages}`, }) const entries = await readdir(extractResult.data.file.outputDir) const imageFiles = entries.filter(f => f.endsWith('.jpg')).sort() const imageBlocks = await Promise.all( imageFiles.map(async f => { const imgPath = path.join(extractResult.data.file.outputDir, f) const imgBuffer = await readFileAsync(imgPath) const resized = await maybeResizeAndDownsampleImageBuffer( imgBuffer, imgBuffer.length, 'jpeg', ) return { type: 'image' as const, source: { type: 'base64' as const, media_type: `image/${resized.mediaType}` as Base64ImageSource['media_type'], data: resized.buffer.toString('base64'), }, } }), ) return { data: extractResult.data, ...(imageBlocks.length > 0 && { newMessages: [ createUserMessage({ content: imageBlocks, isMeta: true }), ], }), } } const pageCount = await getPDFPageCount(resolvedFilePath) if (pageCount !== null && pageCount > PDF_AT_MENTION_INLINE_THRESHOLD) { throw new Error( `This PDF has ${pageCount} pages, which is too many to read at once. ` + `Use the pages parameter to read specific page ranges (e.g., pages: "1-5"). ` + `Maximum ${PDF_MAX_PAGES_PER_READ} pages per request.`, ) } const fs = getFsImplementation() const stats = await fs.stat(resolvedFilePath) const shouldExtractPages = !isPDFSupported() || stats.size > PDF_EXTRACT_SIZE_THRESHOLD if (shouldExtractPages) { const extractResult = await extractPDFPages(resolvedFilePath) if (extractResult.success) { logEvent('tengu_pdf_page_extraction', { success: true, pageCount: extractResult.data.file.count, fileSize: extractResult.data.file.originalSize, }) } else { logEvent('tengu_pdf_page_extraction', { success: false, available: (extractResult as any).error.reason !== 'unavailable', fileSize: stats.size, }) } } if (!isPDFSupported()) { throw new Error( 'Reading full PDFs is not supported with this model. Use a newer model (Sonnet 3.5 v2 or later), ' + `or use the pages parameter to read specific page ranges (e.g., pages: "1-5", maximum ${PDF_MAX_PAGES_PER_READ} pages per request). ` + 'Page extraction requires poppler-utils: install with `brew install poppler` on macOS or `apt-get install poppler-utils` on Debian/Ubuntu.', ) } const readResult = await readPDF(resolvedFilePath) if (!readResult.success) { throw new Error((readResult as any).error.message) } const pdfData = readResult.data logFileOperation({ operation: 'read', tool: 'FileReadTool', filePath: fullFilePath, content: pdfData.file.base64, }) return { data: pdfData, newMessages: [ createUserMessage({ content: [ { type: 'document', source: { type: 'base64', media_type: 'application/pdf', data: pdfData.file.base64, }, }, ], isMeta: true, }), ], } } // --- Text file (single async read via readFileInRange) --- const lineOffset = offset === 0 ? 0 : offset - 1 const { content, lineCount, totalLines, totalBytes, readBytes, mtimeMs } = await readFileInRange( resolvedFilePath, lineOffset, limit, limit === undefined ? maxSizeBytes : undefined, context.abortController.signal, ) await validateContentTokens(content, ext, maxTokens) readFileState.set(fullFilePath, { content, timestamp: Math.floor(mtimeMs), offset, limit, }) context.nestedMemoryAttachmentTriggers?.add(fullFilePath) // Snapshot before iterating — a listener that unsubscribes mid-callback // would splice the live array and skip the next listener. for (const listener of fileReadListeners.slice()) { listener(resolvedFilePath, content) } const data = { type: 'text' as const, file: { filePath: file_path, content, numLines: lineCount, startLine: offset, totalLines, }, } if (isAutoMemFile(fullFilePath)) { memoryFileMtimes.set(data, mtimeMs) } logFileOperation({ operation: 'read', tool: 'FileReadTool', filePath: fullFilePath, content, }) const sessionFileType = detectSessionFileType(fullFilePath) const analyticsExt = getFileExtensionForAnalytics(fullFilePath) logEvent('tengu_session_file_read', { totalLines, readLines: lineCount, totalBytes, readBytes, offset, ...(limit !== undefined && { limit }), ...(analyticsExt !== undefined && { ext: analyticsExt }), ...(messageId !== undefined && { messageID: messageId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, }), is_session_memory: sessionFileType === 'session_memory', is_session_transcript: sessionFileType === 'session_transcript', }) return { data } } /** * Reads an image file and applies token-based compression if needed. * Reads the file ONCE, then applies standard resize. If the result exceeds * the token limit, applies aggressive compression from the same buffer. * * @param filePath - Path to the image file * @param maxTokens - Maximum token budget for the image * @returns Image data with appropriate compression applied */ export async function readImageWithTokenBudget( filePath: string, maxTokens: number = getDefaultFileReadingLimits().maxTokens, maxBytes?: number, ): Promise { // Read file ONCE — capped to maxBytes to avoid OOM on huge files const imageBuffer = await getFsImplementation().readFileBytes( filePath, maxBytes, ) const originalSize = imageBuffer.length if (originalSize === 0) { throw new Error(`Image file is empty: ${filePath}`) } const detectedMediaType = detectImageFormatFromBuffer(imageBuffer) const detectedFormat = detectedMediaType.split('/')[1] || 'png' // Try standard resize let result: ImageResult try { const resized = await maybeResizeAndDownsampleImageBuffer( imageBuffer, originalSize, detectedFormat, ) result = createImageResponse( resized.buffer, resized.mediaType, originalSize, resized.dimensions, ) } catch (e) { if (e instanceof ImageResizeError) throw e logError(e) result = createImageResponse(imageBuffer, detectedFormat, originalSize) } // Check if it fits in token budget const estimatedTokens = Math.ceil(result.file.base64.length * 0.125) if (estimatedTokens > maxTokens) { // Aggressive compression from the SAME buffer (no re-read) try { const compressed = await compressImageBufferWithTokenLimit( imageBuffer, maxTokens, detectedMediaType, ) return { type: 'image', file: { base64: compressed.base64, type: compressed.mediaType, originalSize, }, } } catch (e) { logError(e) // Fallback: heavily compressed version from the SAME buffer try { const sharpModule = await import('sharp') const sharp = ( sharpModule as unknown as { default?: typeof sharpModule } & typeof sharpModule ).default || sharpModule const fallbackBuffer = await (sharp as any)(imageBuffer) .resize(400, 400, { fit: 'inside', withoutEnlargement: true, }) .jpeg({ quality: 20 }) .toBuffer() return createImageResponse(fallbackBuffer, 'jpeg', originalSize) } catch (error) { logError(error) return createImageResponse(imageBuffer, detectedFormat, originalSize) } } } return result }