feat: 添加 prompt 缓存命中率检测与警告功能

每次 API 请求后自动计算缓存命中率，低于阈值（默认 80%）时在对话流中显示黄色警告消息。同时更新 /context 命令输出中显示缓存命中率。 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-06-17 22:05:50 +00:00 · 2026-05-07 10:49:06 +08:00
parent e8759f3402
commit e3c0699f5b
5 changed files with 191 additions and 0 deletions
--- a/src/query.ts
+++ b/src/query.ts
@@ -127,6 +127,11 @@ import {
  isLangfuseEnabled,
 } from './services/langfuse/index.js'
 import { getAPIProvider } from './utils/model/providers.js'
+import {
+  createCacheWarningMessage,
+  getCacheThreshold,
+  shouldShowCacheWarning,
+} from './utils/cacheWarning.js'

 /* eslint-disable @typescript-eslint/no-require-imports */
 const snipModule = feature('HISTORY_SNIP')
@@ -1229,6 +1234,32 @@ async function* queryLoop(
      return { reason: 'model_error', error }
    }

+    // 检测缓存命中率并在需要时 yield 警告消息
+    // 必须在 executePostSamplingHooks 之前执行，确保警告消息在工具结果之前显示
+    if (
+      assistantMessages.length > 0 &&
+      !toolUseContext.options.isNonInteractiveSession
+    ) {
+      const lastAssistant = assistantMessages.at(-1)
+      const usage = lastAssistant?.message?.usage as
+        | {
+            input_tokens: number
+            cache_creation_input_tokens: number
+            cache_read_input_tokens: number
+          }
+        | undefined
+      if (usage) {
+        const warningInfo = shouldShowCacheWarning(
+          usage,
+          querySource,
+          getCacheThreshold(),
+        )
+        if (warningInfo) {
+          yield createCacheWarningMessage(warningInfo)
+        }
+      }
+    }
+
    // Execute post-sampling hooks after model response is complete
    if (assistantMessages.length > 0) {
      void executePostSamplingHooks(