mirror of
https://github.com/claude-code-best/claude-code.git
synced 2026-06-22 08:15:53 +00:00
feat: 添加 prompt 缓存命中率检测与警告功能
每次 API 请求后自动计算缓存命中率,低于阈值(默认 80%)时在对话流中显示黄色警告消息。 同时更新 /context 命令输出中显示缓存命中率。 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -115,6 +115,8 @@ export function ContextVisualization({ data }: Props): React.ReactNode {
|
|||||||
agents,
|
agents,
|
||||||
skills,
|
skills,
|
||||||
messageBreakdown,
|
messageBreakdown,
|
||||||
|
cacheHitRate,
|
||||||
|
cacheThreshold,
|
||||||
} = data;
|
} = data;
|
||||||
|
|
||||||
// Filter out categories with 0 tokens for the legend, and exclude Free space, Autocompact buffer, and deferred
|
// Filter out categories with 0 tokens for the legend, and exclude Free space, Autocompact buffer, and deferred
|
||||||
@@ -166,6 +168,12 @@ export function ContextVisualization({ data }: Props): React.ReactNode {
|
|||||||
{model} · {formatTokens(totalTokens)}/{formatTokens(rawMaxTokens)} tokens ({percentage}%)
|
{model} · {formatTokens(totalTokens)}/{formatTokens(rawMaxTokens)} tokens ({percentage}%)
|
||||||
</Text>
|
</Text>
|
||||||
<CollapseStatus />
|
<CollapseStatus />
|
||||||
|
{cacheHitRate !== undefined && cacheThreshold !== undefined && (
|
||||||
|
<Text color={cacheHitRate < cacheThreshold ? 'warning' : undefined}>
|
||||||
|
Cache hit rate: {cacheHitRate.toFixed(0)}%
|
||||||
|
{cacheHitRate < cacheThreshold ? ` (below ${cacheThreshold}% threshold)` : ''}
|
||||||
|
</Text>
|
||||||
|
)}
|
||||||
<Text> </Text>
|
<Text> </Text>
|
||||||
<Text dimColor italic>
|
<Text dimColor italic>
|
||||||
Estimated usage by category
|
Estimated usage by category
|
||||||
|
|||||||
31
src/query.ts
31
src/query.ts
@@ -127,6 +127,11 @@ import {
|
|||||||
isLangfuseEnabled,
|
isLangfuseEnabled,
|
||||||
} from './services/langfuse/index.js'
|
} from './services/langfuse/index.js'
|
||||||
import { getAPIProvider } from './utils/model/providers.js'
|
import { getAPIProvider } from './utils/model/providers.js'
|
||||||
|
import {
|
||||||
|
createCacheWarningMessage,
|
||||||
|
getCacheThreshold,
|
||||||
|
shouldShowCacheWarning,
|
||||||
|
} from './utils/cacheWarning.js'
|
||||||
|
|
||||||
/* eslint-disable @typescript-eslint/no-require-imports */
|
/* eslint-disable @typescript-eslint/no-require-imports */
|
||||||
const snipModule = feature('HISTORY_SNIP')
|
const snipModule = feature('HISTORY_SNIP')
|
||||||
@@ -1229,6 +1234,32 @@ async function* queryLoop(
|
|||||||
return { reason: 'model_error', error }
|
return { reason: 'model_error', error }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 检测缓存命中率并在需要时 yield 警告消息
|
||||||
|
// 必须在 executePostSamplingHooks 之前执行,确保警告消息在工具结果之前显示
|
||||||
|
if (
|
||||||
|
assistantMessages.length > 0 &&
|
||||||
|
!toolUseContext.options.isNonInteractiveSession
|
||||||
|
) {
|
||||||
|
const lastAssistant = assistantMessages.at(-1)
|
||||||
|
const usage = lastAssistant?.message?.usage as
|
||||||
|
| {
|
||||||
|
input_tokens: number
|
||||||
|
cache_creation_input_tokens: number
|
||||||
|
cache_read_input_tokens: number
|
||||||
|
}
|
||||||
|
| undefined
|
||||||
|
if (usage) {
|
||||||
|
const warningInfo = shouldShowCacheWarning(
|
||||||
|
usage,
|
||||||
|
querySource,
|
||||||
|
getCacheThreshold(),
|
||||||
|
)
|
||||||
|
if (warningInfo) {
|
||||||
|
yield createCacheWarningMessage(warningInfo)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Execute post-sampling hooks after model response is complete
|
// Execute post-sampling hooks after model response is complete
|
||||||
if (assistantMessages.length > 0) {
|
if (assistantMessages.length > 0) {
|
||||||
void executePostSamplingHooks(
|
void executePostSamplingHooks(
|
||||||
|
|||||||
@@ -229,6 +229,10 @@ export interface ContextData {
|
|||||||
cache_creation_input_tokens: number
|
cache_creation_input_tokens: number
|
||||||
cache_read_input_tokens: number
|
cache_read_input_tokens: number
|
||||||
} | null
|
} | null
|
||||||
|
/** Cache hit rate percentage (0-100), undefined if no data */
|
||||||
|
readonly cacheHitRate?: number
|
||||||
|
/** Cache warning threshold percentage */
|
||||||
|
readonly cacheThreshold?: number
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function countToolDefinitionTokens(
|
export async function countToolDefinitionTokens(
|
||||||
@@ -1396,5 +1400,13 @@ export async function analyzeContextUsage(
|
|||||||
isAutoCompactEnabled: isAutoCompact,
|
isAutoCompactEnabled: isAutoCompact,
|
||||||
messageBreakdown: formattedMessageBreakdown,
|
messageBreakdown: formattedMessageBreakdown,
|
||||||
apiUsage,
|
apiUsage,
|
||||||
|
...(() => {
|
||||||
|
if (!apiUsage) return {}
|
||||||
|
const { calculateCacheHitRate, getCacheThreshold } =
|
||||||
|
require('./cacheWarning.js') as typeof import('./cacheWarning.js')
|
||||||
|
const hitRate = calculateCacheHitRate(apiUsage)
|
||||||
|
if (hitRate === null) return {}
|
||||||
|
return { cacheHitRate: hitRate, cacheThreshold: getCacheThreshold() }
|
||||||
|
})(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
131
src/utils/cacheWarning.ts
Normal file
131
src/utils/cacheWarning.ts
Normal file
@@ -0,0 +1,131 @@
|
|||||||
|
import { createUserMessage } from './messages.js'
|
||||||
|
import { getInitialSettings } from './settings/settings.js'
|
||||||
|
import type { Message } from '../types/message.js'
|
||||||
|
|
||||||
|
// Usage 类型(从 API 响应中提取)
|
||||||
|
interface Usage {
|
||||||
|
input_tokens: number
|
||||||
|
cache_creation_input_tokens: number
|
||||||
|
cache_read_input_tokens: number
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface CacheHitRateInfo {
|
||||||
|
hitRate: number
|
||||||
|
threshold: number
|
||||||
|
trend: number | null // 正数=上升,负数=下降
|
||||||
|
shouldWarn: boolean
|
||||||
|
}
|
||||||
|
|
||||||
|
interface CacheWarningState {
|
||||||
|
lastHitRate: number | null
|
||||||
|
lastTimestamp: number | null
|
||||||
|
}
|
||||||
|
|
||||||
|
// 模块级状态,每个 querySource 独立跟踪
|
||||||
|
const cacheWarningStateBySource = new Map<string, CacheWarningState>()
|
||||||
|
|
||||||
|
const DEFAULT_CACHE_THRESHOLD = 80
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 从 settings.json 读取缓存阈值配置
|
||||||
|
*/
|
||||||
|
export function getCacheThreshold(): number {
|
||||||
|
const settings = getInitialSettings()
|
||||||
|
return settings.cacheThreshold ?? DEFAULT_CACHE_THRESHOLD
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 计算缓存命中率
|
||||||
|
* 返回值范围 0-100,null 表示无有效数据
|
||||||
|
*/
|
||||||
|
export function calculateCacheHitRate(
|
||||||
|
usage: Usage | null | undefined,
|
||||||
|
): number | null {
|
||||||
|
if (!usage) return null
|
||||||
|
|
||||||
|
const { input_tokens, cache_creation_input_tokens, cache_read_input_tokens } =
|
||||||
|
usage
|
||||||
|
|
||||||
|
// 所有缓存字段为 0 表示无缓存数据
|
||||||
|
if (cache_read_input_tokens === 0 && cache_creation_input_tokens === 0) {
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
|
const totalInputTokens =
|
||||||
|
input_tokens + cache_creation_input_tokens + cache_read_input_tokens
|
||||||
|
if (totalInputTokens === 0) return null
|
||||||
|
|
||||||
|
return (cache_read_input_tokens / totalInputTokens) * 100
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 检测是否需要显示缓存警告
|
||||||
|
* @param usage API usage 数据
|
||||||
|
* @param querySource 查询来源(用于独立跟踪状态)
|
||||||
|
* @param threshold 缓存阈值百分比
|
||||||
|
* @returns 警告信息,如果不需要警告则返回 null
|
||||||
|
*/
|
||||||
|
export function shouldShowCacheWarning(
|
||||||
|
usage: Usage | null | undefined,
|
||||||
|
querySource: string,
|
||||||
|
threshold: number,
|
||||||
|
): CacheHitRateInfo | null {
|
||||||
|
const hitRate = calculateCacheHitRate(usage)
|
||||||
|
|
||||||
|
// 无缓存数据
|
||||||
|
if (hitRate === null) {
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
|
// 获取或初始化该 querySource 的状态
|
||||||
|
let state = cacheWarningStateBySource.get(querySource)
|
||||||
|
if (!state) {
|
||||||
|
state = { lastHitRate: null, lastTimestamp: null }
|
||||||
|
cacheWarningStateBySource.set(querySource, state)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 首次请求不显示警告
|
||||||
|
if (state.lastHitRate === null) {
|
||||||
|
state.lastHitRate = hitRate
|
||||||
|
state.lastTimestamp = Date.now()
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
|
// 计算趋势
|
||||||
|
const trend = hitRate - state.lastHitRate
|
||||||
|
|
||||||
|
// 更新状态
|
||||||
|
state.lastHitRate = hitRate
|
||||||
|
state.lastTimestamp = Date.now()
|
||||||
|
|
||||||
|
// 检查是否需要警告
|
||||||
|
if (hitRate < threshold) {
|
||||||
|
return { hitRate, threshold, trend, shouldWarn: true }
|
||||||
|
}
|
||||||
|
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 生成缓存警告消息
|
||||||
|
* @param info 缓存警告信息
|
||||||
|
* @returns 用户消息,标记为 isVisibleInTranscriptOnly
|
||||||
|
*/
|
||||||
|
export function createCacheWarningMessage(info: CacheHitRateInfo): Message {
|
||||||
|
const { hitRate, threshold, trend } = info
|
||||||
|
|
||||||
|
// 构建消息内容
|
||||||
|
let content = `Cache hit rate ${hitRate.toFixed(0)}%, below ${threshold}% threshold`
|
||||||
|
|
||||||
|
if (trend !== null && Math.abs(trend) > 0.1) {
|
||||||
|
const trendIcon = trend > 0 ? '^' : 'v'
|
||||||
|
const trendPercent = Math.abs(trend).toFixed(0)
|
||||||
|
content += ` (${trendIcon}${trendPercent}%)`
|
||||||
|
}
|
||||||
|
|
||||||
|
return createUserMessage({
|
||||||
|
content,
|
||||||
|
isMeta: true,
|
||||||
|
isVisibleInTranscriptOnly: true,
|
||||||
|
})
|
||||||
|
}
|
||||||
@@ -1072,6 +1072,15 @@ export const SettingsSchema = lazySchema(() =>
|
|||||||
'Only applies to User, Project, and Local memory types (Managed/policy files cannot be excluded). ' +
|
'Only applies to User, Project, and Local memory types (Managed/policy files cannot be excluded). ' +
|
||||||
'Examples: "/home/user/monorepo/CLAUDE.md", "**/code/CLAUDE.md", "**/some-dir/.claude/rules/**"',
|
'Examples: "/home/user/monorepo/CLAUDE.md", "**/code/CLAUDE.md", "**/some-dir/.claude/rules/**"',
|
||||||
),
|
),
|
||||||
|
cacheThreshold: z
|
||||||
|
.number()
|
||||||
|
.int()
|
||||||
|
.min(0)
|
||||||
|
.max(100)
|
||||||
|
.optional()
|
||||||
|
.describe(
|
||||||
|
'Prompt cache hit rate threshold (0-100). Warnings shown when cache hit rate falls below this percentage. Default: 80.',
|
||||||
|
),
|
||||||
pluginTrustMessage: z
|
pluginTrustMessage: z
|
||||||
.string()
|
.string()
|
||||||
.optional()
|
.optional()
|
||||||
|
|||||||
Reference in New Issue
Block a user