feat: 添加 prompt 缓存命中率检测与警告功能

每次 API 请求后自动计算缓存命中率,低于阈值(默认 80%)时在对话流中显示黄色警告消息。
同时更新 /context 命令输出中显示缓存命中率。

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
claude-code-best
2026-05-07 10:49:06 +08:00
parent e8759f3402
commit e3c0699f5b
5 changed files with 191 additions and 0 deletions

View File

@@ -127,6 +127,11 @@ import {
isLangfuseEnabled,
} from './services/langfuse/index.js'
import { getAPIProvider } from './utils/model/providers.js'
import {
createCacheWarningMessage,
getCacheThreshold,
shouldShowCacheWarning,
} from './utils/cacheWarning.js'
/* eslint-disable @typescript-eslint/no-require-imports */
const snipModule = feature('HISTORY_SNIP')
@@ -1229,6 +1234,32 @@ async function* queryLoop(
return { reason: 'model_error', error }
}
// 检测缓存命中率并在需要时 yield 警告消息
// 必须在 executePostSamplingHooks 之前执行,确保警告消息在工具结果之前显示
if (
assistantMessages.length > 0 &&
!toolUseContext.options.isNonInteractiveSession
) {
const lastAssistant = assistantMessages.at(-1)
const usage = lastAssistant?.message?.usage as
| {
input_tokens: number
cache_creation_input_tokens: number
cache_read_input_tokens: number
}
| undefined
if (usage) {
const warningInfo = shouldShowCacheWarning(
usage,
querySource,
getCacheThreshold(),
)
if (warningInfo) {
yield createCacheWarningMessage(warningInfo)
}
}
}
// Execute post-sampling hooks after model response is complete
if (assistantMessages.length > 0) {
void executePostSamplingHooks(