mirror of
https://github.com/claude-code-best/claude-code.git
synced 2026-06-15 12:55:51 +00:00
feat: 添加 prompt 缓存命中率检测与警告功能
每次 API 请求后自动计算缓存命中率,低于阈值(默认 80%)时在对话流中显示黄色警告消息。 同时更新 /context 命令输出中显示缓存命中率。 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -115,6 +115,8 @@ export function ContextVisualization({ data }: Props): React.ReactNode {
|
||||
agents,
|
||||
skills,
|
||||
messageBreakdown,
|
||||
cacheHitRate,
|
||||
cacheThreshold,
|
||||
} = data;
|
||||
|
||||
// Filter out categories with 0 tokens for the legend, and exclude Free space, Autocompact buffer, and deferred
|
||||
@@ -166,6 +168,12 @@ export function ContextVisualization({ data }: Props): React.ReactNode {
|
||||
{model} · {formatTokens(totalTokens)}/{formatTokens(rawMaxTokens)} tokens ({percentage}%)
|
||||
</Text>
|
||||
<CollapseStatus />
|
||||
{cacheHitRate !== undefined && cacheThreshold !== undefined && (
|
||||
<Text color={cacheHitRate < cacheThreshold ? 'warning' : undefined}>
|
||||
Cache hit rate: {cacheHitRate.toFixed(0)}%
|
||||
{cacheHitRate < cacheThreshold ? ` (below ${cacheThreshold}% threshold)` : ''}
|
||||
</Text>
|
||||
)}
|
||||
<Text> </Text>
|
||||
<Text dimColor italic>
|
||||
Estimated usage by category
|
||||
|
||||
31
src/query.ts
31
src/query.ts
@@ -127,6 +127,11 @@ import {
|
||||
isLangfuseEnabled,
|
||||
} from './services/langfuse/index.js'
|
||||
import { getAPIProvider } from './utils/model/providers.js'
|
||||
import {
|
||||
createCacheWarningMessage,
|
||||
getCacheThreshold,
|
||||
shouldShowCacheWarning,
|
||||
} from './utils/cacheWarning.js'
|
||||
|
||||
/* eslint-disable @typescript-eslint/no-require-imports */
|
||||
const snipModule = feature('HISTORY_SNIP')
|
||||
@@ -1229,6 +1234,32 @@ async function* queryLoop(
|
||||
return { reason: 'model_error', error }
|
||||
}
|
||||
|
||||
// 检测缓存命中率并在需要时 yield 警告消息
|
||||
// 必须在 executePostSamplingHooks 之前执行,确保警告消息在工具结果之前显示
|
||||
if (
|
||||
assistantMessages.length > 0 &&
|
||||
!toolUseContext.options.isNonInteractiveSession
|
||||
) {
|
||||
const lastAssistant = assistantMessages.at(-1)
|
||||
const usage = lastAssistant?.message?.usage as
|
||||
| {
|
||||
input_tokens: number
|
||||
cache_creation_input_tokens: number
|
||||
cache_read_input_tokens: number
|
||||
}
|
||||
| undefined
|
||||
if (usage) {
|
||||
const warningInfo = shouldShowCacheWarning(
|
||||
usage,
|
||||
querySource,
|
||||
getCacheThreshold(),
|
||||
)
|
||||
if (warningInfo) {
|
||||
yield createCacheWarningMessage(warningInfo)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Execute post-sampling hooks after model response is complete
|
||||
if (assistantMessages.length > 0) {
|
||||
void executePostSamplingHooks(
|
||||
|
||||
@@ -229,6 +229,10 @@ export interface ContextData {
|
||||
cache_creation_input_tokens: number
|
||||
cache_read_input_tokens: number
|
||||
} | null
|
||||
/** Cache hit rate percentage (0-100), undefined if no data */
|
||||
readonly cacheHitRate?: number
|
||||
/** Cache warning threshold percentage */
|
||||
readonly cacheThreshold?: number
|
||||
}
|
||||
|
||||
export async function countToolDefinitionTokens(
|
||||
@@ -1396,5 +1400,13 @@ export async function analyzeContextUsage(
|
||||
isAutoCompactEnabled: isAutoCompact,
|
||||
messageBreakdown: formattedMessageBreakdown,
|
||||
apiUsage,
|
||||
...(() => {
|
||||
if (!apiUsage) return {}
|
||||
const { calculateCacheHitRate, getCacheThreshold } =
|
||||
require('./cacheWarning.js') as typeof import('./cacheWarning.js')
|
||||
const hitRate = calculateCacheHitRate(apiUsage)
|
||||
if (hitRate === null) return {}
|
||||
return { cacheHitRate: hitRate, cacheThreshold: getCacheThreshold() }
|
||||
})(),
|
||||
}
|
||||
}
|
||||
|
||||
131
src/utils/cacheWarning.ts
Normal file
131
src/utils/cacheWarning.ts
Normal file
@@ -0,0 +1,131 @@
|
||||
import { createUserMessage } from './messages.js'
|
||||
import { getInitialSettings } from './settings/settings.js'
|
||||
import type { Message } from '../types/message.js'
|
||||
|
||||
// Usage 类型(从 API 响应中提取)
|
||||
interface Usage {
|
||||
input_tokens: number
|
||||
cache_creation_input_tokens: number
|
||||
cache_read_input_tokens: number
|
||||
}
|
||||
|
||||
export interface CacheHitRateInfo {
|
||||
hitRate: number
|
||||
threshold: number
|
||||
trend: number | null // 正数=上升,负数=下降
|
||||
shouldWarn: boolean
|
||||
}
|
||||
|
||||
interface CacheWarningState {
|
||||
lastHitRate: number | null
|
||||
lastTimestamp: number | null
|
||||
}
|
||||
|
||||
// 模块级状态,每个 querySource 独立跟踪
|
||||
const cacheWarningStateBySource = new Map<string, CacheWarningState>()
|
||||
|
||||
const DEFAULT_CACHE_THRESHOLD = 80
|
||||
|
||||
/**
|
||||
* 从 settings.json 读取缓存阈值配置
|
||||
*/
|
||||
export function getCacheThreshold(): number {
|
||||
const settings = getInitialSettings()
|
||||
return settings.cacheThreshold ?? DEFAULT_CACHE_THRESHOLD
|
||||
}
|
||||
|
||||
/**
|
||||
* 计算缓存命中率
|
||||
* 返回值范围 0-100,null 表示无有效数据
|
||||
*/
|
||||
export function calculateCacheHitRate(
|
||||
usage: Usage | null | undefined,
|
||||
): number | null {
|
||||
if (!usage) return null
|
||||
|
||||
const { input_tokens, cache_creation_input_tokens, cache_read_input_tokens } =
|
||||
usage
|
||||
|
||||
// 所有缓存字段为 0 表示无缓存数据
|
||||
if (cache_read_input_tokens === 0 && cache_creation_input_tokens === 0) {
|
||||
return null
|
||||
}
|
||||
|
||||
const totalInputTokens =
|
||||
input_tokens + cache_creation_input_tokens + cache_read_input_tokens
|
||||
if (totalInputTokens === 0) return null
|
||||
|
||||
return (cache_read_input_tokens / totalInputTokens) * 100
|
||||
}
|
||||
|
||||
/**
|
||||
* 检测是否需要显示缓存警告
|
||||
* @param usage API usage 数据
|
||||
* @param querySource 查询来源(用于独立跟踪状态)
|
||||
* @param threshold 缓存阈值百分比
|
||||
* @returns 警告信息,如果不需要警告则返回 null
|
||||
*/
|
||||
export function shouldShowCacheWarning(
|
||||
usage: Usage | null | undefined,
|
||||
querySource: string,
|
||||
threshold: number,
|
||||
): CacheHitRateInfo | null {
|
||||
const hitRate = calculateCacheHitRate(usage)
|
||||
|
||||
// 无缓存数据
|
||||
if (hitRate === null) {
|
||||
return null
|
||||
}
|
||||
|
||||
// 获取或初始化该 querySource 的状态
|
||||
let state = cacheWarningStateBySource.get(querySource)
|
||||
if (!state) {
|
||||
state = { lastHitRate: null, lastTimestamp: null }
|
||||
cacheWarningStateBySource.set(querySource, state)
|
||||
}
|
||||
|
||||
// 首次请求不显示警告
|
||||
if (state.lastHitRate === null) {
|
||||
state.lastHitRate = hitRate
|
||||
state.lastTimestamp = Date.now()
|
||||
return null
|
||||
}
|
||||
|
||||
// 计算趋势
|
||||
const trend = hitRate - state.lastHitRate
|
||||
|
||||
// 更新状态
|
||||
state.lastHitRate = hitRate
|
||||
state.lastTimestamp = Date.now()
|
||||
|
||||
// 检查是否需要警告
|
||||
if (hitRate < threshold) {
|
||||
return { hitRate, threshold, trend, shouldWarn: true }
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
/**
|
||||
* 生成缓存警告消息
|
||||
* @param info 缓存警告信息
|
||||
* @returns 用户消息,标记为 isVisibleInTranscriptOnly
|
||||
*/
|
||||
export function createCacheWarningMessage(info: CacheHitRateInfo): Message {
|
||||
const { hitRate, threshold, trend } = info
|
||||
|
||||
// 构建消息内容
|
||||
let content = `Cache hit rate ${hitRate.toFixed(0)}%, below ${threshold}% threshold`
|
||||
|
||||
if (trend !== null && Math.abs(trend) > 0.1) {
|
||||
const trendIcon = trend > 0 ? '^' : 'v'
|
||||
const trendPercent = Math.abs(trend).toFixed(0)
|
||||
content += ` (${trendIcon}${trendPercent}%)`
|
||||
}
|
||||
|
||||
return createUserMessage({
|
||||
content,
|
||||
isMeta: true,
|
||||
isVisibleInTranscriptOnly: true,
|
||||
})
|
||||
}
|
||||
@@ -1072,6 +1072,15 @@ export const SettingsSchema = lazySchema(() =>
|
||||
'Only applies to User, Project, and Local memory types (Managed/policy files cannot be excluded). ' +
|
||||
'Examples: "/home/user/monorepo/CLAUDE.md", "**/code/CLAUDE.md", "**/some-dir/.claude/rules/**"',
|
||||
),
|
||||
cacheThreshold: z
|
||||
.number()
|
||||
.int()
|
||||
.min(0)
|
||||
.max(100)
|
||||
.optional()
|
||||
.describe(
|
||||
'Prompt cache hit rate threshold (0-100). Warnings shown when cache hit rate falls below this percentage. Default: 80.',
|
||||
),
|
||||
pluginTrustMessage: z
|
||||
.string()
|
||||
.optional()
|
||||
|
||||
Reference in New Issue
Block a user