feat: 添加 prompt 缓存命中率检测与警告功能

每次 API 请求后自动计算缓存命中率,低于阈值(默认 80%)时在对话流中显示黄色警告消息。
同时更新 /context 命令输出中显示缓存命中率。

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
claude-code-best
2026-05-07 10:49:06 +08:00
parent e8759f3402
commit e3c0699f5b
5 changed files with 191 additions and 0 deletions

View File

@@ -115,6 +115,8 @@ export function ContextVisualization({ data }: Props): React.ReactNode {
agents,
skills,
messageBreakdown,
cacheHitRate,
cacheThreshold,
} = data;
// Filter out categories with 0 tokens for the legend, and exclude Free space, Autocompact buffer, and deferred
@@ -166,6 +168,12 @@ export function ContextVisualization({ data }: Props): React.ReactNode {
{model} · {formatTokens(totalTokens)}/{formatTokens(rawMaxTokens)} tokens ({percentage}%)
</Text>
<CollapseStatus />
{cacheHitRate !== undefined && cacheThreshold !== undefined && (
<Text color={cacheHitRate < cacheThreshold ? 'warning' : undefined}>
Cache hit rate: {cacheHitRate.toFixed(0)}%
{cacheHitRate < cacheThreshold ? ` (below ${cacheThreshold}% threshold)` : ''}
</Text>
)}
<Text> </Text>
<Text dimColor italic>
Estimated usage by category

View File

@@ -127,6 +127,11 @@ import {
isLangfuseEnabled,
} from './services/langfuse/index.js'
import { getAPIProvider } from './utils/model/providers.js'
import {
createCacheWarningMessage,
getCacheThreshold,
shouldShowCacheWarning,
} from './utils/cacheWarning.js'
/* eslint-disable @typescript-eslint/no-require-imports */
const snipModule = feature('HISTORY_SNIP')
@@ -1229,6 +1234,32 @@ async function* queryLoop(
return { reason: 'model_error', error }
}
// 检测缓存命中率并在需要时 yield 警告消息
// 必须在 executePostSamplingHooks 之前执行,确保警告消息在工具结果之前显示
if (
assistantMessages.length > 0 &&
!toolUseContext.options.isNonInteractiveSession
) {
const lastAssistant = assistantMessages.at(-1)
const usage = lastAssistant?.message?.usage as
| {
input_tokens: number
cache_creation_input_tokens: number
cache_read_input_tokens: number
}
| undefined
if (usage) {
const warningInfo = shouldShowCacheWarning(
usage,
querySource,
getCacheThreshold(),
)
if (warningInfo) {
yield createCacheWarningMessage(warningInfo)
}
}
}
// Execute post-sampling hooks after model response is complete
if (assistantMessages.length > 0) {
void executePostSamplingHooks(

View File

@@ -229,6 +229,10 @@ export interface ContextData {
cache_creation_input_tokens: number
cache_read_input_tokens: number
} | null
/** Cache hit rate percentage (0-100), undefined if no data */
readonly cacheHitRate?: number
/** Cache warning threshold percentage */
readonly cacheThreshold?: number
}
export async function countToolDefinitionTokens(
@@ -1396,5 +1400,13 @@ export async function analyzeContextUsage(
isAutoCompactEnabled: isAutoCompact,
messageBreakdown: formattedMessageBreakdown,
apiUsage,
...(() => {
if (!apiUsage) return {}
const { calculateCacheHitRate, getCacheThreshold } =
require('./cacheWarning.js') as typeof import('./cacheWarning.js')
const hitRate = calculateCacheHitRate(apiUsage)
if (hitRate === null) return {}
return { cacheHitRate: hitRate, cacheThreshold: getCacheThreshold() }
})(),
}
}

131
src/utils/cacheWarning.ts Normal file
View File

@@ -0,0 +1,131 @@
import { createUserMessage } from './messages.js'
import { getInitialSettings } from './settings/settings.js'
import type { Message } from '../types/message.js'
// Usage 类型(从 API 响应中提取)
interface Usage {
input_tokens: number
cache_creation_input_tokens: number
cache_read_input_tokens: number
}
export interface CacheHitRateInfo {
hitRate: number
threshold: number
trend: number | null // 正数=上升,负数=下降
shouldWarn: boolean
}
interface CacheWarningState {
lastHitRate: number | null
lastTimestamp: number | null
}
// 模块级状态,每个 querySource 独立跟踪
const cacheWarningStateBySource = new Map<string, CacheWarningState>()
const DEFAULT_CACHE_THRESHOLD = 80
/**
* 从 settings.json 读取缓存阈值配置
*/
export function getCacheThreshold(): number {
const settings = getInitialSettings()
return settings.cacheThreshold ?? DEFAULT_CACHE_THRESHOLD
}
/**
* 计算缓存命中率
* 返回值范围 0-100null 表示无有效数据
*/
export function calculateCacheHitRate(
usage: Usage | null | undefined,
): number | null {
if (!usage) return null
const { input_tokens, cache_creation_input_tokens, cache_read_input_tokens } =
usage
// 所有缓存字段为 0 表示无缓存数据
if (cache_read_input_tokens === 0 && cache_creation_input_tokens === 0) {
return null
}
const totalInputTokens =
input_tokens + cache_creation_input_tokens + cache_read_input_tokens
if (totalInputTokens === 0) return null
return (cache_read_input_tokens / totalInputTokens) * 100
}
/**
* 检测是否需要显示缓存警告
* @param usage API usage 数据
* @param querySource 查询来源(用于独立跟踪状态)
* @param threshold 缓存阈值百分比
* @returns 警告信息,如果不需要警告则返回 null
*/
export function shouldShowCacheWarning(
usage: Usage | null | undefined,
querySource: string,
threshold: number,
): CacheHitRateInfo | null {
const hitRate = calculateCacheHitRate(usage)
// 无缓存数据
if (hitRate === null) {
return null
}
// 获取或初始化该 querySource 的状态
let state = cacheWarningStateBySource.get(querySource)
if (!state) {
state = { lastHitRate: null, lastTimestamp: null }
cacheWarningStateBySource.set(querySource, state)
}
// 首次请求不显示警告
if (state.lastHitRate === null) {
state.lastHitRate = hitRate
state.lastTimestamp = Date.now()
return null
}
// 计算趋势
const trend = hitRate - state.lastHitRate
// 更新状态
state.lastHitRate = hitRate
state.lastTimestamp = Date.now()
// 检查是否需要警告
if (hitRate < threshold) {
return { hitRate, threshold, trend, shouldWarn: true }
}
return null
}
/**
* 生成缓存警告消息
* @param info 缓存警告信息
* @returns 用户消息,标记为 isVisibleInTranscriptOnly
*/
export function createCacheWarningMessage(info: CacheHitRateInfo): Message {
const { hitRate, threshold, trend } = info
// 构建消息内容
let content = `Cache hit rate ${hitRate.toFixed(0)}%, below ${threshold}% threshold`
if (trend !== null && Math.abs(trend) > 0.1) {
const trendIcon = trend > 0 ? '^' : 'v'
const trendPercent = Math.abs(trend).toFixed(0)
content += ` (${trendIcon}${trendPercent}%)`
}
return createUserMessage({
content,
isMeta: true,
isVisibleInTranscriptOnly: true,
})
}

View File

@@ -1072,6 +1072,15 @@ export const SettingsSchema = lazySchema(() =>
'Only applies to User, Project, and Local memory types (Managed/policy files cannot be excluded). ' +
'Examples: "/home/user/monorepo/CLAUDE.md", "**/code/CLAUDE.md", "**/some-dir/.claude/rules/**"',
),
cacheThreshold: z
.number()
.int()
.min(0)
.max(100)
.optional()
.describe(
'Prompt cache hit rate threshold (0-100). Warnings shown when cache hit rate falls below this percentage. Default: 80.',
),
pluginTrustMessage: z
.string()
.optional()