diff --git a/packages/@ant/computer-use-mcp/src/toolCalls.ts b/packages/@ant/computer-use-mcp/src/toolCalls.ts index eb68b0bb3..d7b796a94 100644 --- a/packages/@ant/computer-use-mcp/src/toolCalls.ts +++ b/packages/@ant/computer-use-mcp/src/toolCalls.ts @@ -37,6 +37,19 @@ import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js"; import { randomUUID } from "node:crypto"; +/** Detect actual image MIME type from base64 data using magic bytes. */ +function detectMimeFromBase64(b64: string): string { + // First byte is enough to distinguish PNG (0x89) from JPEG (0xFF) + const c = b64.charCodeAt(0); + if (c === 0x89) return "image/png"; + if (c === 0xFF) return "image/jpeg"; + // RIFF = WebP + if (c === 0x52) return "image/webp"; + // GIF + if (c === 0x47) return "image/gif"; + return "image/png"; +} + import { getDefaultTierForApp, getDeniedCategoryForApp, isPolicyDenied } from "./deniedApps.js"; import type { ComputerExecutor, @@ -2162,7 +2175,7 @@ async function handleScreenshot( { type: "image", data: shot.base64, - mimeType: "image/jpeg", + mimeType: detectMimeFromBase64(shot.base64), }, ], screenshot: shot, @@ -2231,7 +2244,7 @@ async function handleScreenshot( { type: "image", data: shot.base64, - mimeType: "image/jpeg", + mimeType: detectMimeFromBase64(shot.base64), }, ], // Piggybacked for serverDef.ts to stash on InternalServerContext. @@ -2310,7 +2323,7 @@ async function handleZoom( // Return the image. NO `.screenshot` piggyback — this is the invariant. return { - content: [{ type: "image", data: zoomed.base64, mimeType: "image/jpeg" }], + content: [{ type: "image", data: zoomed.base64, mimeType: detectMimeFromBase64(zoomed.base64) }], }; } diff --git a/src/utils/computerUse/wrapper.tsx b/src/utils/computerUse/wrapper.tsx index 05a1f81fd..5571f0437 100644 --- a/src/utils/computerUse/wrapper.tsx +++ b/src/utils/computerUse/wrapper.tsx @@ -30,6 +30,7 @@ import { getSessionId } from '../../bootstrap/state.js' import { ComputerUseApproval } from '../../components/permissions/ComputerUseApproval/ComputerUseApproval.js' import type { Tool, ToolUseContext } from '../../Tool.js' import { logForDebugging } from '../debug.js' +import { detectImageFormatFromBase64 } from '../imageResizer.js' import { checkComputerUseLock, tryAcquireComputerUseLock, @@ -330,7 +331,7 @@ export function getComputerUseMCPToolOverrides( type: 'image' as const, source: { type: 'base64' as const, - media_type: item.mimeType ?? 'image/jpeg', + media_type: item.mimeType ?? detectImageFormatFromBase64(item.data), data: item.data, }, }