diff --git a/src/services/langfuse/convert.ts b/src/services/langfuse/convert.ts index 31594d9cf..89018e4ac 100644 --- a/src/services/langfuse/convert.ts +++ b/src/services/langfuse/convert.ts @@ -4,39 +4,92 @@ * Langfuse generations expect: * input: { role, content }[] where content is string or structured parts * output: { role: 'assistant', content: string | part[] } + * + * Key conversions from Anthropic → OpenAI format: + * - tool_use blocks → tool_calls[] at message level + * - tool_result blocks → separate { role: 'tool' } messages */ import type { Message, AssistantMessage, UserMessage } from 'src/types/message.js' type LangfuseContentPart = | { type: 'text'; text: string } - | { type: 'tool_use'; id: string; name: string; input: unknown } - | { type: 'tool_result'; tool_use_id: string; content: string } | { type: 'thinking'; thinking: string } | { type: string; [key: string]: unknown } -type LangfuseChatMessage = { - role: 'user' | 'assistant' | 'system' - content: string | LangfuseContentPart[] +type LangfuseToolCall = { + id: string + type: 'function' + function: { name: string; arguments: string } } -function normalizeContent(content: unknown): string | LangfuseContentPart[] { - if (typeof content === 'string') return content - if (!Array.isArray(content)) return String(content ?? '') +type LangfuseChatMessage = { + role: 'user' | 'assistant' | 'system' | 'tool' + content: string | LangfuseContentPart[] | null + tool_calls?: LangfuseToolCall[] + tool_call_id?: string +} - const parts: LangfuseContentPart[] = [] +/** Normalize a content block into a LangfuseContentPart (non-tool_use, non-tool_result) */ +function toContentPart(block: Record): LangfuseContentPart | null { + const type = block.type as string | undefined + if (type === 'text') { + return { type: 'text', text: String(block.text ?? '') } + } + if (type === 'thinking' || type === 'redacted_thinking') { + return { type: 'thinking', thinking: String(block.thinking ?? '[redacted]') } + } + if (type === 'image') { + return { type: 'text', text: '[image]' } + } + if (type === 'document') { + const name = (block.source as Record | undefined)?.filename + ?? (block.title as string | undefined) + ?? 'document' + return { type: 'text', text: `[document: ${name}]` } + } + if (type === 'server_tool_use' || type === 'web_search_tool_result' || type === 'tool_search_tool_result') { + return { type, id: String(block.id ?? ''), name: String(block.name ?? type) } + } + // unknown block: keep type + scalar fields only + const safe: Record = { type: type ?? 'unknown' } + for (const [k, v] of Object.entries(block)) { + if (typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean') safe[k] = v + } + return safe as LangfuseContentPart +} + +/** Extract tool_use blocks from content into OpenAI-style tool_calls */ +function extractToolCalls(content: unknown[]): { tool_calls: LangfuseToolCall[]; rest: unknown[] } { + const toolCalls: LangfuseToolCall[] = [] + const rest: unknown[] = [] for (const block of content) { - if (!block || typeof block !== 'object') continue + if (!block || typeof block !== 'object') { rest.push(block); continue } const b = block as Record - const type = b.type as string | undefined + if (b.type === 'tool_use') { + toolCalls.push({ + id: String(b.id ?? ''), + type: 'function', + function: { + name: String(b.name ?? ''), + arguments: typeof b.input === 'string' ? b.input : JSON.stringify(b.input ?? {}), + }, + }) + } else { + rest.push(block) + } + } + return { tool_calls: toolCalls, rest } +} - if (type === 'text') { - parts.push({ type: 'text', text: String(b.text ?? '') }) - } else if (type === 'thinking' || type === 'redacted_thinking') { - parts.push({ type: 'thinking', thinking: String(b.thinking ?? '[redacted]') }) - } else if (type === 'tool_use') { - parts.push({ type: 'tool_use', id: String(b.id ?? ''), name: String(b.name ?? ''), input: b.input }) - } else if (type === 'tool_result') { +/** Extract tool_result blocks into separate { role: 'tool' } messages */ +function extractToolResults(content: unknown[]): { toolMessages: LangfuseChatMessage[]; rest: unknown[] } { + const toolMessages: LangfuseChatMessage[] = [] + const rest: unknown[] = [] + for (const block of content) { + if (!block || typeof block !== 'object') { rest.push(block); continue } + const b = block as Record + if (b.type === 'tool_result') { const resultContent = Array.isArray(b.content) ? (b.content as Record[]) .map(c => { @@ -47,30 +100,23 @@ function normalizeContent(content: unknown): string | LangfuseContentPart[] { }) .join('\n') : String(b.content ?? '') - parts.push({ type: 'tool_result', tool_use_id: String(b.tool_use_id ?? ''), content: resultContent }) - } else if (type === 'image') { - parts.push({ type: 'text', text: '[image]' }) - } else if (type === 'document') { - const name = (b.source as Record | undefined)?.filename - ?? (b.title as string | undefined) - ?? 'document' - parts.push({ type: 'text', text: `[document: ${name}]` }) - } else if (type === 'server_tool_use' || type === 'web_search_tool_result' || type === 'tool_search_tool_result') { - // server-side tool blocks — keep name/id, drop raw content - parts.push({ type: type, id: String(b.id ?? ''), name: String(b.name ?? type) }) + toolMessages.push({ + role: 'tool', + tool_call_id: String(b.tool_use_id ?? ''), + content: resultContent, + }) } else { - // unknown block: keep type + scalar fields only, drop any binary/large payloads - const safe: Record = { type: type ?? 'unknown' } - for (const [k, v] of Object.entries(b)) { - if (typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean') safe[k] = v - } - parts.push(safe as LangfuseContentPart) + rest.push(block) } } + return { toolMessages, rest } +} - // Collapse to plain string if only one text part - if (parts.length === 1 && parts[0]!.type === 'text') { - return (parts[0] as { type: 'text'; text: string }).text +/** Collapse content parts: join all-text arrays into a single string */ +function collapseContent(parts: LangfuseContentPart[]): string | LangfuseContentPart[] { + if (parts.length === 0) return '' + if (parts.every(p => p.type === 'text')) { + return parts.map(p => (p as { type: 'text'; text: string }).text).join('\n') } return parts } @@ -96,7 +142,36 @@ export function convertMessagesToLangfuse( const inner = msg.message if (!inner) continue const role = (inner.role as 'user' | 'assistant' | undefined) ?? toRole(msg) - result.push({ role, content: normalizeContent(inner.content) }) + const rawContent = inner.content + if (typeof rawContent === 'string' || !Array.isArray(rawContent)) { + result.push({ role, content: String(rawContent ?? '') }) + continue + } + + if (role === 'assistant') { + // Extract tool_use → tool_calls at message level + const { tool_calls, rest } = extractToolCalls(rawContent) + const parts = rest + .filter((b): b is Record => b != null && typeof b === 'object') + .map(b => toContentPart(b)) + .filter((p): p is LangfuseContentPart => p !== null) + result.push({ + role: 'assistant', + content: collapseContent(parts), + ...(tool_calls.length > 0 && { tool_calls }), + }) + } else { + // User messages: extract tool_result → separate tool messages + const { toolMessages, rest } = extractToolResults(rawContent) + const parts = rest + .filter((b): b is Record => b != null && typeof b === 'object') + .map(b => toContentPart(b)) + .filter((p): p is LangfuseContentPart => p !== null) + if (parts.length > 0 || toolMessages.length === 0) { + result.push({ role: 'user', content: collapseContent(parts) }) + } + result.push(...toolMessages) + } } return result } @@ -121,12 +196,24 @@ export function convertOutputToLangfuse( messages: AssistantMessage[], ): LangfuseChatMessage | LangfuseChatMessage[] | null { if (messages.length === 0) return null - if (messages.length === 1) { - const msg = messages[0]! - return { role: 'assistant', content: normalizeContent(msg.message?.content) } + + const convert = (msg: AssistantMessage): LangfuseChatMessage => { + const rawContent = msg.message?.content + if (typeof rawContent === 'string' || !Array.isArray(rawContent)) { + return { role: 'assistant', content: String(rawContent ?? '') } + } + const { tool_calls, rest } = extractToolCalls(rawContent) + const parts = rest + .filter((b): b is Record => b != null && typeof b === 'object') + .map(b => toContentPart(b)) + .filter((p): p is LangfuseContentPart => p !== null) + return { + role: 'assistant', + content: collapseContent(parts), + ...(tool_calls.length > 0 && { tool_calls }), + } } - return messages.map(msg => ({ - role: 'assistant' as const, - content: normalizeContent(msg.message?.content), - })) + + if (messages.length === 1) return convert(messages[0]!) + return messages.map(convert) }