From ee369549a828e4c0f42fe2bfdc153a0e55cc92d0 Mon Sep 17 00:00:00 2001 From: unraid Date: Tue, 14 Apr 2026 19:54:11 +0800 Subject: [PATCH] fix: correct detectMimeFromBase64 to decode raw bytes from base64 The charCodeAt approach compared raw byte magic numbers against base64-encoded characters which never matched. Decode first 12 raw bytes and check standard magic byte signatures directly. Fixes API 400 on Windows (JPEG) and macOS (PNG) screenshots. --- .../@ant/computer-use-mcp/src/toolCalls.ts | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/packages/@ant/computer-use-mcp/src/toolCalls.ts b/packages/@ant/computer-use-mcp/src/toolCalls.ts index d7b796a94..415ee6ecc 100644 --- a/packages/@ant/computer-use-mcp/src/toolCalls.ts +++ b/packages/@ant/computer-use-mcp/src/toolCalls.ts @@ -37,16 +37,21 @@ import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js"; import { randomUUID } from "node:crypto"; -/** Detect actual image MIME type from base64 data using magic bytes. */ +/** Detect actual image MIME type from base64 data by decoding the magic bytes. */ function detectMimeFromBase64(b64: string): string { - // First byte is enough to distinguish PNG (0x89) from JPEG (0xFF) - const c = b64.charCodeAt(0); - if (c === 0x89) return "image/png"; - if (c === 0xFF) return "image/jpeg"; - // RIFF = WebP - if (c === 0x52) return "image/webp"; - // GIF - if (c === 0x47) return "image/gif"; + // Decode first 12 raw bytes (16 base64 chars is enough) and check standard magic bytes. + // PNG: 89 50 4E 47 + // JPEG: FF D8 FF + // RIFF+WEBP: "RIFF" at 0..3 + "WEBP" at 8..11 + // GIF: "GIF" at 0..2 + const raw = Buffer.from(b64.slice(0, 16), "base64"); + if (raw[0] === 0x89 && raw[1] === 0x50 && raw[2] === 0x4e && raw[3] === 0x47) return "image/png"; + if (raw[0] === 0xff && raw[1] === 0xd8 && raw[2] === 0xff) return "image/jpeg"; + if ( + raw[0] === 0x52 && raw[1] === 0x49 && raw[2] === 0x46 && raw[3] === 0x46 && // RIFF + raw[8] === 0x57 && raw[9] === 0x45 && raw[10] === 0x42 && raw[11] === 0x50 // WEBP + ) return "image/webp"; + if (raw[0] === 0x47 && raw[1] === 0x49 && raw[2] === 0x46) return "image/gif"; return "image/png"; }