diff --git a/src/services/api/gemini/__tests__/convertMessages.test.ts b/src/services/api/gemini/__tests__/convertMessages.test.ts index 11d49ca37..63a9cf60a 100644 --- a/src/services/api/gemini/__tests__/convertMessages.test.ts +++ b/src/services/api/gemini/__tests__/convertMessages.test.ts @@ -199,4 +199,69 @@ describe('anthropicMessagesToGemini', () => { }, ]) }) + + test('converts base64 image to inlineData', () => { + const result = anthropicMessagesToGemini( + [makeUserMsg([ + { type: 'text', text: 'describe this' }, + { + type: 'image', + source: { + type: 'base64', + media_type: 'image/png', + data: 'iVBORw0KGgo=', + }, + }, + ])], + [] as any, + ) + expect(result.contents).toEqual([ + { + role: 'user', + parts: [ + { text: 'describe this' }, + { inlineData: { mimeType: 'image/png', data: 'iVBORw0KGgo=' } }, + ], + }, + ]) + }) + + test('converts url image to text fallback', () => { + const result = anthropicMessagesToGemini( + [makeUserMsg([ + { + type: 'image', + source: { + type: 'url', + url: 'https://example.com/img.png', + }, + }, + ])], + [] as any, + ) + expect(result.contents).toEqual([ + { + role: 'user', + parts: [{ text: '[image: https://example.com/img.png]' }], + }, + ]) + }) + + test('defaults to image/png when media_type is missing', () => { + const result = anthropicMessagesToGemini( + [makeUserMsg([ + { + type: 'image', + source: { + type: 'base64', + data: 'ABC123', + }, + }, + ])], + [] as any, + ) + expect(result.contents[0].parts[0]).toEqual({ + inlineData: { mimeType: 'image/png', data: 'ABC123' }, + }) + }) }) diff --git a/src/services/api/gemini/convertMessages.ts b/src/services/api/gemini/convertMessages.ts index 4ac3a209d..018efb1b6 100644 --- a/src/services/api/gemini/convertMessages.ts +++ b/src/services/api/gemini/convertMessages.ts @@ -113,6 +113,26 @@ function convertUserContentBlockToGeminiParts( ] } + // 将 Anthropic image 块转换为 Gemini inlineData + if (block.type === 'image') { + const source = block.source as Record | undefined + if (source?.type === 'base64' && typeof source.data === 'string') { + const mediaType = (source.media_type as string) || 'image/png' + return [ + { + inlineData: { + mimeType: mediaType, + data: source.data, + }, + }, + ] + } + // url 类型的图片,Gemini 不直接支持,转为文本描述 + if (source?.type === 'url' && typeof source.url === 'string') { + return createTextGeminiParts(`[image: ${source.url}]`) + } + } + return [] } diff --git a/src/services/api/gemini/types.ts b/src/services/api/gemini/types.ts index 829a09f13..e8718fecd 100644 --- a/src/services/api/gemini/types.ts +++ b/src/services/api/gemini/types.ts @@ -10,12 +10,18 @@ export type GeminiFunctionResponse = { response?: Record } +export type GeminiInlineData = { + mimeType: string + data: string +} + export type GeminiPart = { text?: string thought?: boolean thoughtSignature?: string functionCall?: GeminiFunctionCall functionResponse?: GeminiFunctionResponse + inlineData?: GeminiInlineData } export type GeminiContent = { diff --git a/src/services/api/openai/__tests__/convertMessages.test.ts b/src/services/api/openai/__tests__/convertMessages.test.ts index 0e69f1ca8..0ea52757b 100644 --- a/src/services/api/openai/__tests__/convertMessages.test.ts +++ b/src/services/api/openai/__tests__/convertMessages.test.ts @@ -154,4 +154,98 @@ describe('anthropicMessagesToOpenAI', () => { expect((result[2] as any).tool_calls).toBeDefined() expect(result[3].role).toBe('tool') }) + + test('converts base64 image to image_url', () => { + const result = anthropicMessagesToOpenAI( + [makeUserMsg([ + { type: 'text', text: 'what is this?' }, + { + type: 'image' as const, + source: { + type: 'base64', + media_type: 'image/png', + data: 'iVBORw0KGgo=', + }, + }, + ])], + [] as any, + ) + expect(result).toEqual([{ + role: 'user', + content: [ + { type: 'text', text: 'what is this?' }, + { + type: 'image_url', + image_url: { url: 'data:image/png;base64,iVBORw0KGgo=' }, + }, + ], + }]) + }) + + test('converts url image to image_url', () => { + const result = anthropicMessagesToOpenAI( + [makeUserMsg([ + { + type: 'image' as const, + source: { + type: 'url', + url: 'https://example.com/img.png', + }, + }, + ])], + [] as any, + ) + expect(result).toEqual([{ + role: 'user', + content: [ + { + type: 'image_url', + image_url: { url: 'https://example.com/img.png' }, + }, + ], + }]) + }) + + test('converts image-only message without text', () => { + const result = anthropicMessagesToOpenAI( + [makeUserMsg([ + { + type: 'image' as const, + source: { + type: 'base64', + media_type: 'image/jpeg', + data: '/9j/4AAQ', + }, + }, + ])], + [] as any, + ) + expect(result).toEqual([{ + role: 'user', + content: [ + { + type: 'image_url', + image_url: { url: 'data:image/jpeg;base64,/9j/4AAQ' }, + }, + ], + }]) + }) + + test('defaults to image/png when media_type is missing', () => { + const result = anthropicMessagesToOpenAI( + [makeUserMsg([ + { + type: 'image' as const, + source: { + type: 'base64', + data: 'ABC123', + }, + }, + ])], + [] as any, + ) + expect((result[0].content as any[])[0].image_url.url).toBe( + 'data:image/png;base64,ABC123', + ) + }) }) diff --git a/src/services/api/openai/convertMessages.ts b/src/services/api/openai/convertMessages.ts index 63fe6c719..051b43d69 100644 --- a/src/services/api/openai/convertMessages.ts +++ b/src/services/api/openai/convertMessages.ts @@ -75,6 +75,7 @@ function convertInternalUserMessage( } else if (Array.isArray(content)) { const textParts: string[] = [] const toolResults: BetaToolResultBlockParam[] = [] + const imageParts: Array<{ type: 'image_url'; image_url: { url: string } }> = [] for (const block of content) { if (typeof block === 'string') { @@ -83,11 +84,26 @@ function convertInternalUserMessage( textParts.push(block.text) } else if (block.type === 'tool_result') { toolResults.push(block as BetaToolResultBlockParam) + } else if (block.type === 'image') { + const imagePart = convertImageBlockToOpenAI(block as Record) + if (imagePart) { + imageParts.push(imagePart) + } } - // Skip image, document, thinking, cache_edits, etc. } - if (textParts.length > 0) { + // 如果有图片,构建多模态 content 数组 + if (imageParts.length > 0) { + const multiContent: Array<{ type: 'text'; text: string } | { type: 'image_url'; image_url: { url: string } }> = [] + if (textParts.length > 0) { + multiContent.push({ type: 'text', text: textParts.join('\n') }) + } + multiContent.push(...imageParts) + result.push({ + role: 'user', + content: multiContent, + } satisfies ChatCompletionUserMessageParam) + } else if (textParts.length > 0) { result.push({ role: 'user', content: textParts.join('\n'), @@ -182,3 +198,38 @@ function convertInternalAssistantMessage( return [result] } + +/** + * 将 Anthropic image 块转换为 OpenAI image_url 格式。 + * + * Anthropic 格式: { type: "image", source: { type: "base64", media_type: "image/png", data: "..." } } + * OpenAI 格式: { type: "image_url", image_url: { url: "data:image/png;base64,..." } } + */ +function convertImageBlockToOpenAI( + block: Record, +): { type: 'image_url'; image_url: { url: string } } | null { + const source = block.source as Record | undefined + if (!source) return null + + if (source.type === 'base64' && typeof source.data === 'string') { + const mediaType = (source.media_type as string) || 'image/png' + return { + type: 'image_url', + image_url: { + url: `data:${mediaType};base64,${source.data}`, + }, + } + } + + // url 类型的图片直接传递 + if (source.type === 'url' && typeof source.url === 'string') { + return { + type: 'image_url', + image_url: { + url: source.url, + }, + } + } + + return null +}