mirror of
https://github.com/claude-code-best/claude-code.git
synced 2026-06-17 22:05:50 +00:00
feat: 添加 prompt 缓存命中率检测与警告功能
每次 API 请求后自动计算缓存命中率,低于阈值(默认 80%)时在对话流中显示黄色警告消息。 同时更新 /context 命令输出中显示缓存命中率。 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
31
src/query.ts
31
src/query.ts
@@ -127,6 +127,11 @@ import {
|
||||
isLangfuseEnabled,
|
||||
} from './services/langfuse/index.js'
|
||||
import { getAPIProvider } from './utils/model/providers.js'
|
||||
import {
|
||||
createCacheWarningMessage,
|
||||
getCacheThreshold,
|
||||
shouldShowCacheWarning,
|
||||
} from './utils/cacheWarning.js'
|
||||
|
||||
/* eslint-disable @typescript-eslint/no-require-imports */
|
||||
const snipModule = feature('HISTORY_SNIP')
|
||||
@@ -1229,6 +1234,32 @@ async function* queryLoop(
|
||||
return { reason: 'model_error', error }
|
||||
}
|
||||
|
||||
// 检测缓存命中率并在需要时 yield 警告消息
|
||||
// 必须在 executePostSamplingHooks 之前执行,确保警告消息在工具结果之前显示
|
||||
if (
|
||||
assistantMessages.length > 0 &&
|
||||
!toolUseContext.options.isNonInteractiveSession
|
||||
) {
|
||||
const lastAssistant = assistantMessages.at(-1)
|
||||
const usage = lastAssistant?.message?.usage as
|
||||
| {
|
||||
input_tokens: number
|
||||
cache_creation_input_tokens: number
|
||||
cache_read_input_tokens: number
|
||||
}
|
||||
| undefined
|
||||
if (usage) {
|
||||
const warningInfo = shouldShowCacheWarning(
|
||||
usage,
|
||||
querySource,
|
||||
getCacheThreshold(),
|
||||
)
|
||||
if (warningInfo) {
|
||||
yield createCacheWarningMessage(warningInfo)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Execute post-sampling hooks after model response is complete
|
||||
if (assistantMessages.length > 0) {
|
||||
void executePostSamplingHooks(
|
||||
|
||||
Reference in New Issue
Block a user