fix: 修复缓存

This commit is contained in:
claude-code-best
2026-04-13 20:09:23 +08:00
parent fce40fed1f
commit be80da4ce0
3 changed files with 61 additions and 3 deletions

View File

@@ -2907,7 +2907,12 @@ async function* queryModel(
provider: getAPIProvider(),
input: convertMessagesToLangfuse(messagesForAPI, systemPrompt),
output: convertOutputToLangfuse(newMessages),
usage: { input_tokens: usage.input_tokens, output_tokens: usage.output_tokens },
usage: {
input_tokens: usage.input_tokens,
output_tokens: usage.output_tokens,
cache_creation_input_tokens: usage.cache_creation_input_tokens,
cache_read_input_tokens: usage.cache_read_input_tokens,
},
startTime: new Date(startIncludingRetries),
endTime: new Date(),
completionStartTime: ttftMs > 0 ? new Date(start + ttftMs) : undefined,

View File

@@ -284,6 +284,48 @@ describe('Langfuse integration', () => {
}))
expect(mockRootEnd).toHaveBeenCalled()
})
test('includes cache tokens in usageDetails when provided', async () => {
process.env.LANGFUSE_PUBLIC_KEY = 'pk-test'
process.env.LANGFUSE_SECRET_KEY = 'sk-test'
const { createTrace, recordLLMObservation } = await import('../tracing.js')
const span = createTrace({ sessionId: 's1', model: 'claude-3', provider: 'firstParty' })
mockStartObservation.mockClear()
mockRootUpdate.mockClear()
recordLLMObservation(span, {
model: 'claude-3',
provider: 'firstParty',
input: [],
output: [],
usage: { input_tokens: 10000, output_tokens: 50, cache_creation_input_tokens: 2000, cache_read_input_tokens: 7000 },
})
expect(mockRootUpdate).toHaveBeenCalledWith(expect.objectContaining({
usageDetails: {
input: 19000, // 10000 + 2000 + 7000
output: 50,
cache_read: 7000,
cache_creation: 2000,
},
}))
})
test('omits cache fields when not provided', async () => {
process.env.LANGFUSE_PUBLIC_KEY = 'pk-test'
process.env.LANGFUSE_SECRET_KEY = 'sk-test'
const { createTrace, recordLLMObservation } = await import('../tracing.js')
const span = createTrace({ sessionId: 's1', model: 'claude-3', provider: 'firstParty' })
mockRootUpdate.mockClear()
recordLLMObservation(span, {
model: 'claude-3',
provider: 'firstParty',
input: [],
output: [],
usage: { input_tokens: 100, output_tokens: 20 },
})
expect(mockRootUpdate).toHaveBeenCalledWith(expect.objectContaining({
usageDetails: { input: 100, output: 20 },
}))
})
})
describe('recordToolObservation', () => {

View File

@@ -68,7 +68,12 @@ export function recordLLMObservation(
provider: string
input: unknown
output: unknown
usage: { input_tokens: number; output_tokens: number }
usage: {
input_tokens: number
output_tokens: number
cache_creation_input_tokens?: number
cache_read_input_tokens?: number
}
startTime?: Date
endTime?: Date
completionStartTime?: Date
@@ -109,11 +114,17 @@ export function recordLLMObservation(
gen.otelSpan.setAttribute(LangfuseOtelSpanAttributes.TRACE_USER_ID, userId)
}
// Anthropic splits input into uncached + cache_read + cache_creation.
// Langfuse's "input" should be the total prompt tokens so cost calc is correct.
const cacheRead = params.usage.cache_read_input_tokens ?? 0
const cacheCreation = params.usage.cache_creation_input_tokens ?? 0
gen.update({
output: params.output,
usageDetails: {
input: params.usage.input_tokens,
input: params.usage.input_tokens + cacheCreation + cacheRead,
output: params.usage.output_tokens,
...(cacheRead > 0 && { cache_read: cacheRead }),
...(cacheCreation > 0 && { cache_creation: cacheCreation }),
},
})