diff --git a/src/components/ContextVisualization.tsx b/src/components/ContextVisualization.tsx
index 6fffd7609..f9a65c1c8 100644
--- a/src/components/ContextVisualization.tsx
+++ b/src/components/ContextVisualization.tsx
@@ -115,6 +115,8 @@ export function ContextVisualization({ data }: Props): React.ReactNode {
agents,
skills,
messageBreakdown,
+ cacheHitRate,
+ cacheThreshold,
} = data;
// Filter out categories with 0 tokens for the legend, and exclude Free space, Autocompact buffer, and deferred
@@ -166,6 +168,12 @@ export function ContextVisualization({ data }: Props): React.ReactNode {
{model} · {formatTokens(totalTokens)}/{formatTokens(rawMaxTokens)} tokens ({percentage}%)
+ {cacheHitRate !== undefined && cacheThreshold !== undefined && (
+
+ Cache hit rate: {cacheHitRate.toFixed(0)}%
+ {cacheHitRate < cacheThreshold ? ` (below ${cacheThreshold}% threshold)` : ''}
+
+ )}
Estimated usage by category
diff --git a/src/query.ts b/src/query.ts
index 197054c63..e356107bc 100644
--- a/src/query.ts
+++ b/src/query.ts
@@ -127,6 +127,11 @@ import {
isLangfuseEnabled,
} from './services/langfuse/index.js'
import { getAPIProvider } from './utils/model/providers.js'
+import {
+ createCacheWarningMessage,
+ getCacheThreshold,
+ shouldShowCacheWarning,
+} from './utils/cacheWarning.js'
/* eslint-disable @typescript-eslint/no-require-imports */
const snipModule = feature('HISTORY_SNIP')
@@ -1229,6 +1234,32 @@ async function* queryLoop(
return { reason: 'model_error', error }
}
+ // 检测缓存命中率并在需要时 yield 警告消息
+ // 必须在 executePostSamplingHooks 之前执行,确保警告消息在工具结果之前显示
+ if (
+ assistantMessages.length > 0 &&
+ !toolUseContext.options.isNonInteractiveSession
+ ) {
+ const lastAssistant = assistantMessages.at(-1)
+ const usage = lastAssistant?.message?.usage as
+ | {
+ input_tokens: number
+ cache_creation_input_tokens: number
+ cache_read_input_tokens: number
+ }
+ | undefined
+ if (usage) {
+ const warningInfo = shouldShowCacheWarning(
+ usage,
+ querySource,
+ getCacheThreshold(),
+ )
+ if (warningInfo) {
+ yield createCacheWarningMessage(warningInfo)
+ }
+ }
+ }
+
// Execute post-sampling hooks after model response is complete
if (assistantMessages.length > 0) {
void executePostSamplingHooks(
diff --git a/src/utils/analyzeContext.ts b/src/utils/analyzeContext.ts
index 9c379728a..0874ab8df 100644
--- a/src/utils/analyzeContext.ts
+++ b/src/utils/analyzeContext.ts
@@ -229,6 +229,10 @@ export interface ContextData {
cache_creation_input_tokens: number
cache_read_input_tokens: number
} | null
+ /** Cache hit rate percentage (0-100), undefined if no data */
+ readonly cacheHitRate?: number
+ /** Cache warning threshold percentage */
+ readonly cacheThreshold?: number
}
export async function countToolDefinitionTokens(
@@ -1396,5 +1400,13 @@ export async function analyzeContextUsage(
isAutoCompactEnabled: isAutoCompact,
messageBreakdown: formattedMessageBreakdown,
apiUsage,
+ ...(() => {
+ if (!apiUsage) return {}
+ const { calculateCacheHitRate, getCacheThreshold } =
+ require('./cacheWarning.js') as typeof import('./cacheWarning.js')
+ const hitRate = calculateCacheHitRate(apiUsage)
+ if (hitRate === null) return {}
+ return { cacheHitRate: hitRate, cacheThreshold: getCacheThreshold() }
+ })(),
}
}
diff --git a/src/utils/cacheWarning.ts b/src/utils/cacheWarning.ts
new file mode 100644
index 000000000..0334fdeb8
--- /dev/null
+++ b/src/utils/cacheWarning.ts
@@ -0,0 +1,131 @@
+import { createUserMessage } from './messages.js'
+import { getInitialSettings } from './settings/settings.js'
+import type { Message } from '../types/message.js'
+
+// Usage 类型(从 API 响应中提取)
+interface Usage {
+ input_tokens: number
+ cache_creation_input_tokens: number
+ cache_read_input_tokens: number
+}
+
+export interface CacheHitRateInfo {
+ hitRate: number
+ threshold: number
+ trend: number | null // 正数=上升,负数=下降
+ shouldWarn: boolean
+}
+
+interface CacheWarningState {
+ lastHitRate: number | null
+ lastTimestamp: number | null
+}
+
+// 模块级状态,每个 querySource 独立跟踪
+const cacheWarningStateBySource = new Map()
+
+const DEFAULT_CACHE_THRESHOLD = 80
+
+/**
+ * 从 settings.json 读取缓存阈值配置
+ */
+export function getCacheThreshold(): number {
+ const settings = getInitialSettings()
+ return settings.cacheThreshold ?? DEFAULT_CACHE_THRESHOLD
+}
+
+/**
+ * 计算缓存命中率
+ * 返回值范围 0-100,null 表示无有效数据
+ */
+export function calculateCacheHitRate(
+ usage: Usage | null | undefined,
+): number | null {
+ if (!usage) return null
+
+ const { input_tokens, cache_creation_input_tokens, cache_read_input_tokens } =
+ usage
+
+ // 所有缓存字段为 0 表示无缓存数据
+ if (cache_read_input_tokens === 0 && cache_creation_input_tokens === 0) {
+ return null
+ }
+
+ const totalInputTokens =
+ input_tokens + cache_creation_input_tokens + cache_read_input_tokens
+ if (totalInputTokens === 0) return null
+
+ return (cache_read_input_tokens / totalInputTokens) * 100
+}
+
+/**
+ * 检测是否需要显示缓存警告
+ * @param usage API usage 数据
+ * @param querySource 查询来源(用于独立跟踪状态)
+ * @param threshold 缓存阈值百分比
+ * @returns 警告信息,如果不需要警告则返回 null
+ */
+export function shouldShowCacheWarning(
+ usage: Usage | null | undefined,
+ querySource: string,
+ threshold: number,
+): CacheHitRateInfo | null {
+ const hitRate = calculateCacheHitRate(usage)
+
+ // 无缓存数据
+ if (hitRate === null) {
+ return null
+ }
+
+ // 获取或初始化该 querySource 的状态
+ let state = cacheWarningStateBySource.get(querySource)
+ if (!state) {
+ state = { lastHitRate: null, lastTimestamp: null }
+ cacheWarningStateBySource.set(querySource, state)
+ }
+
+ // 首次请求不显示警告
+ if (state.lastHitRate === null) {
+ state.lastHitRate = hitRate
+ state.lastTimestamp = Date.now()
+ return null
+ }
+
+ // 计算趋势
+ const trend = hitRate - state.lastHitRate
+
+ // 更新状态
+ state.lastHitRate = hitRate
+ state.lastTimestamp = Date.now()
+
+ // 检查是否需要警告
+ if (hitRate < threshold) {
+ return { hitRate, threshold, trend, shouldWarn: true }
+ }
+
+ return null
+}
+
+/**
+ * 生成缓存警告消息
+ * @param info 缓存警告信息
+ * @returns 用户消息,标记为 isVisibleInTranscriptOnly
+ */
+export function createCacheWarningMessage(info: CacheHitRateInfo): Message {
+ const { hitRate, threshold, trend } = info
+
+ // 构建消息内容
+ let content = `Cache hit rate ${hitRate.toFixed(0)}%, below ${threshold}% threshold`
+
+ if (trend !== null && Math.abs(trend) > 0.1) {
+ const trendIcon = trend > 0 ? '^' : 'v'
+ const trendPercent = Math.abs(trend).toFixed(0)
+ content += ` (${trendIcon}${trendPercent}%)`
+ }
+
+ return createUserMessage({
+ content,
+ isMeta: true,
+ isVisibleInTranscriptOnly: true,
+ })
+}
diff --git a/src/utils/settings/types.ts b/src/utils/settings/types.ts
index 3af01236f..430ed25b7 100644
--- a/src/utils/settings/types.ts
+++ b/src/utils/settings/types.ts
@@ -1072,6 +1072,15 @@ export const SettingsSchema = lazySchema(() =>
'Only applies to User, Project, and Local memory types (Managed/policy files cannot be excluded). ' +
'Examples: "/home/user/monorepo/CLAUDE.md", "**/code/CLAUDE.md", "**/some-dir/.claude/rules/**"',
),
+ cacheThreshold: z
+ .number()
+ .int()
+ .min(0)
+ .max(100)
+ .optional()
+ .describe(
+ 'Prompt cache hit rate threshold (0-100). Warnings shown when cache hit rate falls below this percentage. Default: 80.',
+ ),
pluginTrustMessage: z
.string()
.optional()