diff --git a/src/components/ContextVisualization.tsx b/src/components/ContextVisualization.tsx index 6fffd7609..f9a65c1c8 100644 --- a/src/components/ContextVisualization.tsx +++ b/src/components/ContextVisualization.tsx @@ -115,6 +115,8 @@ export function ContextVisualization({ data }: Props): React.ReactNode { agents, skills, messageBreakdown, + cacheHitRate, + cacheThreshold, } = data; // Filter out categories with 0 tokens for the legend, and exclude Free space, Autocompact buffer, and deferred @@ -166,6 +168,12 @@ export function ContextVisualization({ data }: Props): React.ReactNode { {model} · {formatTokens(totalTokens)}/{formatTokens(rawMaxTokens)} tokens ({percentage}%) + {cacheHitRate !== undefined && cacheThreshold !== undefined && ( + + Cache hit rate: {cacheHitRate.toFixed(0)}% + {cacheHitRate < cacheThreshold ? ` (below ${cacheThreshold}% threshold)` : ''} + + )} Estimated usage by category diff --git a/src/query.ts b/src/query.ts index 197054c63..e356107bc 100644 --- a/src/query.ts +++ b/src/query.ts @@ -127,6 +127,11 @@ import { isLangfuseEnabled, } from './services/langfuse/index.js' import { getAPIProvider } from './utils/model/providers.js' +import { + createCacheWarningMessage, + getCacheThreshold, + shouldShowCacheWarning, +} from './utils/cacheWarning.js' /* eslint-disable @typescript-eslint/no-require-imports */ const snipModule = feature('HISTORY_SNIP') @@ -1229,6 +1234,32 @@ async function* queryLoop( return { reason: 'model_error', error } } + // 检测缓存命中率并在需要时 yield 警告消息 + // 必须在 executePostSamplingHooks 之前执行,确保警告消息在工具结果之前显示 + if ( + assistantMessages.length > 0 && + !toolUseContext.options.isNonInteractiveSession + ) { + const lastAssistant = assistantMessages.at(-1) + const usage = lastAssistant?.message?.usage as + | { + input_tokens: number + cache_creation_input_tokens: number + cache_read_input_tokens: number + } + | undefined + if (usage) { + const warningInfo = shouldShowCacheWarning( + usage, + querySource, + getCacheThreshold(), + ) + if (warningInfo) { + yield createCacheWarningMessage(warningInfo) + } + } + } + // Execute post-sampling hooks after model response is complete if (assistantMessages.length > 0) { void executePostSamplingHooks( diff --git a/src/utils/analyzeContext.ts b/src/utils/analyzeContext.ts index 9c379728a..0874ab8df 100644 --- a/src/utils/analyzeContext.ts +++ b/src/utils/analyzeContext.ts @@ -229,6 +229,10 @@ export interface ContextData { cache_creation_input_tokens: number cache_read_input_tokens: number } | null + /** Cache hit rate percentage (0-100), undefined if no data */ + readonly cacheHitRate?: number + /** Cache warning threshold percentage */ + readonly cacheThreshold?: number } export async function countToolDefinitionTokens( @@ -1396,5 +1400,13 @@ export async function analyzeContextUsage( isAutoCompactEnabled: isAutoCompact, messageBreakdown: formattedMessageBreakdown, apiUsage, + ...(() => { + if (!apiUsage) return {} + const { calculateCacheHitRate, getCacheThreshold } = + require('./cacheWarning.js') as typeof import('./cacheWarning.js') + const hitRate = calculateCacheHitRate(apiUsage) + if (hitRate === null) return {} + return { cacheHitRate: hitRate, cacheThreshold: getCacheThreshold() } + })(), } } diff --git a/src/utils/cacheWarning.ts b/src/utils/cacheWarning.ts new file mode 100644 index 000000000..0334fdeb8 --- /dev/null +++ b/src/utils/cacheWarning.ts @@ -0,0 +1,131 @@ +import { createUserMessage } from './messages.js' +import { getInitialSettings } from './settings/settings.js' +import type { Message } from '../types/message.js' + +// Usage 类型(从 API 响应中提取) +interface Usage { + input_tokens: number + cache_creation_input_tokens: number + cache_read_input_tokens: number +} + +export interface CacheHitRateInfo { + hitRate: number + threshold: number + trend: number | null // 正数=上升,负数=下降 + shouldWarn: boolean +} + +interface CacheWarningState { + lastHitRate: number | null + lastTimestamp: number | null +} + +// 模块级状态,每个 querySource 独立跟踪 +const cacheWarningStateBySource = new Map() + +const DEFAULT_CACHE_THRESHOLD = 80 + +/** + * 从 settings.json 读取缓存阈值配置 + */ +export function getCacheThreshold(): number { + const settings = getInitialSettings() + return settings.cacheThreshold ?? DEFAULT_CACHE_THRESHOLD +} + +/** + * 计算缓存命中率 + * 返回值范围 0-100,null 表示无有效数据 + */ +export function calculateCacheHitRate( + usage: Usage | null | undefined, +): number | null { + if (!usage) return null + + const { input_tokens, cache_creation_input_tokens, cache_read_input_tokens } = + usage + + // 所有缓存字段为 0 表示无缓存数据 + if (cache_read_input_tokens === 0 && cache_creation_input_tokens === 0) { + return null + } + + const totalInputTokens = + input_tokens + cache_creation_input_tokens + cache_read_input_tokens + if (totalInputTokens === 0) return null + + return (cache_read_input_tokens / totalInputTokens) * 100 +} + +/** + * 检测是否需要显示缓存警告 + * @param usage API usage 数据 + * @param querySource 查询来源(用于独立跟踪状态) + * @param threshold 缓存阈值百分比 + * @returns 警告信息,如果不需要警告则返回 null + */ +export function shouldShowCacheWarning( + usage: Usage | null | undefined, + querySource: string, + threshold: number, +): CacheHitRateInfo | null { + const hitRate = calculateCacheHitRate(usage) + + // 无缓存数据 + if (hitRate === null) { + return null + } + + // 获取或初始化该 querySource 的状态 + let state = cacheWarningStateBySource.get(querySource) + if (!state) { + state = { lastHitRate: null, lastTimestamp: null } + cacheWarningStateBySource.set(querySource, state) + } + + // 首次请求不显示警告 + if (state.lastHitRate === null) { + state.lastHitRate = hitRate + state.lastTimestamp = Date.now() + return null + } + + // 计算趋势 + const trend = hitRate - state.lastHitRate + + // 更新状态 + state.lastHitRate = hitRate + state.lastTimestamp = Date.now() + + // 检查是否需要警告 + if (hitRate < threshold) { + return { hitRate, threshold, trend, shouldWarn: true } + } + + return null +} + +/** + * 生成缓存警告消息 + * @param info 缓存警告信息 + * @returns 用户消息,标记为 isVisibleInTranscriptOnly + */ +export function createCacheWarningMessage(info: CacheHitRateInfo): Message { + const { hitRate, threshold, trend } = info + + // 构建消息内容 + let content = `Cache hit rate ${hitRate.toFixed(0)}%, below ${threshold}% threshold` + + if (trend !== null && Math.abs(trend) > 0.1) { + const trendIcon = trend > 0 ? '^' : 'v' + const trendPercent = Math.abs(trend).toFixed(0) + content += ` (${trendIcon}${trendPercent}%)` + } + + return createUserMessage({ + content, + isMeta: true, + isVisibleInTranscriptOnly: true, + }) +} diff --git a/src/utils/settings/types.ts b/src/utils/settings/types.ts index 3af01236f..430ed25b7 100644 --- a/src/utils/settings/types.ts +++ b/src/utils/settings/types.ts @@ -1072,6 +1072,15 @@ export const SettingsSchema = lazySchema(() => 'Only applies to User, Project, and Local memory types (Managed/policy files cannot be excluded). ' + 'Examples: "/home/user/monorepo/CLAUDE.md", "**/code/CLAUDE.md", "**/some-dir/.claude/rules/**"', ), + cacheThreshold: z + .number() + .int() + .min(0) + .max(100) + .optional() + .describe( + 'Prompt cache hit rate threshold (0-100). Warnings shown when cache hit rate falls below this percentage. Default: 80.', + ), pluginTrustMessage: z .string() .optional()