From 227083d31f7aa67093f75b87f02f7e4cb4d0362c Mon Sep 17 00:00:00 2001 From: claude-code-best Date: Sun, 12 Apr 2026 17:31:36 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E6=88=AA=E5=9B=BE=20M?= =?UTF-8?q?IME=20=E7=B1=BB=E5=9E=8B=E7=A1=AC=E7=BC=96=E7=A0=81=E5=AF=BC?= =?UTF-8?q?=E8=87=B4=20API=20=E6=8B=92=E7=BB=9D=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit macOS screencapture 输出 PNG,但代码硬编码 mimeType 为 image/jpeg, 导致 API 报错 "specified using image/jpeg but appears to be image/png"。 改为通过 magic bytes 检测实际图片格式。 Co-Authored-By: Claude Opus 4.6 --- .../@ant/computer-use-mcp/src/toolCalls.ts | 19 ++++++++++++++++--- src/utils/computerUse/wrapper.tsx | 3 ++- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/packages/@ant/computer-use-mcp/src/toolCalls.ts b/packages/@ant/computer-use-mcp/src/toolCalls.ts index eb68b0bb3..d7b796a94 100644 --- a/packages/@ant/computer-use-mcp/src/toolCalls.ts +++ b/packages/@ant/computer-use-mcp/src/toolCalls.ts @@ -37,6 +37,19 @@ import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js"; import { randomUUID } from "node:crypto"; +/** Detect actual image MIME type from base64 data using magic bytes. */ +function detectMimeFromBase64(b64: string): string { + // First byte is enough to distinguish PNG (0x89) from JPEG (0xFF) + const c = b64.charCodeAt(0); + if (c === 0x89) return "image/png"; + if (c === 0xFF) return "image/jpeg"; + // RIFF = WebP + if (c === 0x52) return "image/webp"; + // GIF + if (c === 0x47) return "image/gif"; + return "image/png"; +} + import { getDefaultTierForApp, getDeniedCategoryForApp, isPolicyDenied } from "./deniedApps.js"; import type { ComputerExecutor, @@ -2162,7 +2175,7 @@ async function handleScreenshot( { type: "image", data: shot.base64, - mimeType: "image/jpeg", + mimeType: detectMimeFromBase64(shot.base64), }, ], screenshot: shot, @@ -2231,7 +2244,7 @@ async function handleScreenshot( { type: "image", data: shot.base64, - mimeType: "image/jpeg", + mimeType: detectMimeFromBase64(shot.base64), }, ], // Piggybacked for serverDef.ts to stash on InternalServerContext. @@ -2310,7 +2323,7 @@ async function handleZoom( // Return the image. NO `.screenshot` piggyback — this is the invariant. return { - content: [{ type: "image", data: zoomed.base64, mimeType: "image/jpeg" }], + content: [{ type: "image", data: zoomed.base64, mimeType: detectMimeFromBase64(zoomed.base64) }], }; } diff --git a/src/utils/computerUse/wrapper.tsx b/src/utils/computerUse/wrapper.tsx index 05a1f81fd..5571f0437 100644 --- a/src/utils/computerUse/wrapper.tsx +++ b/src/utils/computerUse/wrapper.tsx @@ -30,6 +30,7 @@ import { getSessionId } from '../../bootstrap/state.js' import { ComputerUseApproval } from '../../components/permissions/ComputerUseApproval/ComputerUseApproval.js' import type { Tool, ToolUseContext } from '../../Tool.js' import { logForDebugging } from '../debug.js' +import { detectImageFormatFromBase64 } from '../imageResizer.js' import { checkComputerUseLock, tryAcquireComputerUseLock, @@ -330,7 +331,7 @@ export function getComputerUseMCPToolOverrides( type: 'image' as const, source: { type: 'base64' as const, - media_type: item.mimeType ?? 'image/jpeg', + media_type: item.mimeType ?? detectImageFormatFromBase64(item.data), data: item.data, }, }