diff --git a/src/services/api/claude.ts b/src/services/api/claude.ts index e811fd74e..8b3c0e622 100644 --- a/src/services/api/claude.ts +++ b/src/services/api/claude.ts @@ -2907,7 +2907,12 @@ async function* queryModel( provider: getAPIProvider(), input: convertMessagesToLangfuse(messagesForAPI, systemPrompt), output: convertOutputToLangfuse(newMessages), - usage: { input_tokens: usage.input_tokens, output_tokens: usage.output_tokens }, + usage: { + input_tokens: usage.input_tokens, + output_tokens: usage.output_tokens, + cache_creation_input_tokens: usage.cache_creation_input_tokens, + cache_read_input_tokens: usage.cache_read_input_tokens, + }, startTime: new Date(startIncludingRetries), endTime: new Date(), completionStartTime: ttftMs > 0 ? new Date(start + ttftMs) : undefined, diff --git a/src/services/langfuse/__tests__/langfuse.test.ts b/src/services/langfuse/__tests__/langfuse.test.ts index 38beaa035..ae286391f 100644 --- a/src/services/langfuse/__tests__/langfuse.test.ts +++ b/src/services/langfuse/__tests__/langfuse.test.ts @@ -284,6 +284,48 @@ describe('Langfuse integration', () => { })) expect(mockRootEnd).toHaveBeenCalled() }) + + test('includes cache tokens in usageDetails when provided', async () => { + process.env.LANGFUSE_PUBLIC_KEY = 'pk-test' + process.env.LANGFUSE_SECRET_KEY = 'sk-test' + const { createTrace, recordLLMObservation } = await import('../tracing.js') + const span = createTrace({ sessionId: 's1', model: 'claude-3', provider: 'firstParty' }) + mockStartObservation.mockClear() + mockRootUpdate.mockClear() + recordLLMObservation(span, { + model: 'claude-3', + provider: 'firstParty', + input: [], + output: [], + usage: { input_tokens: 10000, output_tokens: 50, cache_creation_input_tokens: 2000, cache_read_input_tokens: 7000 }, + }) + expect(mockRootUpdate).toHaveBeenCalledWith(expect.objectContaining({ + usageDetails: { + input: 19000, // 10000 + 2000 + 7000 + output: 50, + cache_read: 7000, + cache_creation: 2000, + }, + })) + }) + + test('omits cache fields when not provided', async () => { + process.env.LANGFUSE_PUBLIC_KEY = 'pk-test' + process.env.LANGFUSE_SECRET_KEY = 'sk-test' + const { createTrace, recordLLMObservation } = await import('../tracing.js') + const span = createTrace({ sessionId: 's1', model: 'claude-3', provider: 'firstParty' }) + mockRootUpdate.mockClear() + recordLLMObservation(span, { + model: 'claude-3', + provider: 'firstParty', + input: [], + output: [], + usage: { input_tokens: 100, output_tokens: 20 }, + }) + expect(mockRootUpdate).toHaveBeenCalledWith(expect.objectContaining({ + usageDetails: { input: 100, output: 20 }, + })) + }) }) describe('recordToolObservation', () => { diff --git a/src/services/langfuse/tracing.ts b/src/services/langfuse/tracing.ts index c9fc7df17..1e06d8ae4 100644 --- a/src/services/langfuse/tracing.ts +++ b/src/services/langfuse/tracing.ts @@ -68,7 +68,12 @@ export function recordLLMObservation( provider: string input: unknown output: unknown - usage: { input_tokens: number; output_tokens: number } + usage: { + input_tokens: number + output_tokens: number + cache_creation_input_tokens?: number + cache_read_input_tokens?: number + } startTime?: Date endTime?: Date completionStartTime?: Date @@ -109,11 +114,17 @@ export function recordLLMObservation( gen.otelSpan.setAttribute(LangfuseOtelSpanAttributes.TRACE_USER_ID, userId) } + // Anthropic splits input into uncached + cache_read + cache_creation. + // Langfuse's "input" should be the total prompt tokens so cost calc is correct. + const cacheRead = params.usage.cache_read_input_tokens ?? 0 + const cacheCreation = params.usage.cache_creation_input_tokens ?? 0 gen.update({ output: params.output, usageDetails: { - input: params.usage.input_tokens, + input: params.usage.input_tokens + cacheCreation + cacheRead, output: params.usage.output_tokens, + ...(cacheRead > 0 && { cache_read: cacheRead }), + ...(cacheCreation > 0 && { cache_creation: cacheCreation }), }, })