mirror of
https://github.com/claude-code-best/claude-code.git
synced 2026-06-17 13:55:50 +00:00
* fix: 修复状态栏 context 计数器在 loading 时闪现为 0 的问题 第三方 API(如智谱)在 message_start 中可能不返回完整 usage 数据, 导致 getCurrentUsage 返回全零 usage 对象,使 ctx 显示为 0%。 双重保护: - getCurrentUsage: 跳过全零 usage,继续往前找有真实数据的 message - calculateContextPercentages: totalInputTokens 为 0 时返回 null Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: 外部化 ESM 包使用 createRequire 替代裸 require color-diff-napi、image-processor-napi、audio-capture-napi 声明 "type": "module" 但使用裸 require(),Node.js ESM 中 require 不可用。改用 createRequire(import.meta.url) 或顶层 import。 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: getDefaultSonnetModel 优先使用用户配置的模型,修复第三方 provider 模型不存在错误 当用户通过 ANTHROPIC_MODEL 或 settings 配置了自定义 provider 支持的模型时, getDefaultSonnetModel/Haiku/Opus 现在会优先使用该配置,而非硬编码 Anthropic 官方模型 ID。 同时改进 Langfuse 可观测性:sideQuery 失败时记录错误信息到 span, optional 模式下标记 WARNING 而非 ERROR。 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: 将 auto_mode classifier 的 side-query span 绑定到父 trace classifyYoloAction 及 classifyYoloActionXml 接收 parentSpan 参数, 透传给 sideQuery 调用,使 auto_mode 的 side-query span 嵌套在主 agent trace 下。 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: 穷鬼模式下跳过 memdir_relevance side-query Poor mode 启用时不执行 findRelevantMemories 的预取调用, 避免额外的 API token 消耗。 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * chore: 添加 test:all 脚本用于完成任务后的全量检查 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: Vite 构建补齐缺失的 feature flags,修复 auto mode 不可见 Vite 构建插件的 DEFAULT_BUILD_FEATURES 缺少 BUDDY、TRANSCRIPT_CLASSIFIER、 BRIDGE_MODE、ACP、BG_SESSIONS、TEMPLATES,导致 feature('TRANSCRIPT_CLASSIFIER') 被替换为 false,auto mode 从 Shift+Tab 循环中消失。与 build.ts 对齐。 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: 统一 feature flags 到 defines.ts,修复 Vite 构建缺失 auto mode 将 DEFAULT_BUILD_FEATURES 列表从 build.ts、dev.ts、vite-plugin-feature-flags.ts 三处内联定义统一到 scripts/defines.ts 单一导出。之前的 Vite 插件缺少 TRANSCRIPT_CLASSIFIER 等 feature flag,导致 auto mode 在 Vite 构建中不可见。 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
277 lines
10 KiB
TypeScript
277 lines
10 KiB
TypeScript
import type { BetaUsage as Usage } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
|
|
import { roughTokenCountEstimationForMessages } from '../services/tokenEstimation.js'
|
|
import type { AssistantMessage, ContentItem, Message } from '../types/message.js'
|
|
import { SYNTHETIC_MESSAGES, SYNTHETIC_MODEL } from './messages.js'
|
|
import { jsonStringify } from './slowOperations.js'
|
|
|
|
export function getTokenUsage(message: Message): Usage | undefined {
|
|
if (
|
|
message?.type === 'assistant' &&
|
|
message.message &&
|
|
'usage' in message.message &&
|
|
!(
|
|
Array.isArray(message.message.content) &&
|
|
(message.message.content as ContentItem[])[0]?.type === 'text' &&
|
|
SYNTHETIC_MESSAGES.has((message.message.content as Array<ContentItem & { text: string }>)[0]!.text)
|
|
) &&
|
|
message.message.model !== SYNTHETIC_MODEL
|
|
) {
|
|
return message.message.usage as Usage
|
|
}
|
|
return undefined
|
|
}
|
|
|
|
/**
|
|
* Get the API response id for an assistant message with real (non-synthetic) usage.
|
|
* Used to identify split assistant records that came from the same API response —
|
|
* when parallel tool calls are streamed, each content block becomes a separate
|
|
* AssistantMessage record, but they all share the same message.id.
|
|
*/
|
|
function getAssistantMessageId(message: Message): string | undefined {
|
|
if (
|
|
message?.type === 'assistant' &&
|
|
'id' in message.message! &&
|
|
message.message!.model !== SYNTHETIC_MODEL
|
|
) {
|
|
return message.message!.id
|
|
}
|
|
return undefined
|
|
}
|
|
|
|
/**
|
|
* Calculate total context window tokens from an API response's usage data.
|
|
* Includes input_tokens + cache tokens + output_tokens.
|
|
*
|
|
* This represents the full context size at the time of that API call.
|
|
* Use tokenCountWithEstimation() when you need context size from messages.
|
|
*/
|
|
export function getTokenCountFromUsage(usage: Usage): number {
|
|
if (!usage) {
|
|
return 0
|
|
}
|
|
return (
|
|
(usage.input_tokens ?? 0) +
|
|
(usage.cache_creation_input_tokens ?? 0) +
|
|
(usage.cache_read_input_tokens ?? 0) +
|
|
(usage.output_tokens ?? 0)
|
|
)
|
|
}
|
|
|
|
export function tokenCountFromLastAPIResponse(messages: Message[]): number {
|
|
let i = messages.length - 1
|
|
while (i >= 0) {
|
|
const message = messages[i]
|
|
const usage = message ? getTokenUsage(message) : undefined
|
|
if (usage) {
|
|
return getTokenCountFromUsage(usage)
|
|
}
|
|
i--
|
|
}
|
|
return 0
|
|
}
|
|
|
|
/**
|
|
* Final context window size from the last API response's usage.iterations[-1].
|
|
* Used for task_budget.remaining computation across compaction boundaries —
|
|
* the server's budget countdown is context-based, so remaining decrements by
|
|
* the pre-compact final window, not billing spend. See monorepo
|
|
* api/api/sampling/prompt/renderer.py:292 for the server-side computation.
|
|
*
|
|
* Falls back to top-level input_tokens + output_tokens when iterations is
|
|
* absent (no server-side tool loops, so top-level usage IS the final window).
|
|
* Both paths exclude cache tokens to match #304930's formula.
|
|
*/
|
|
export function finalContextTokensFromLastResponse(
|
|
messages: Message[],
|
|
): number {
|
|
let i = messages.length - 1
|
|
while (i >= 0) {
|
|
const message = messages[i]
|
|
const usage = message ? getTokenUsage(message) : undefined
|
|
if (usage) {
|
|
// Stainless types don't include iterations yet — cast like advisor.ts:43
|
|
const iterations = (
|
|
usage as {
|
|
iterations?: Array<{
|
|
input_tokens: number
|
|
output_tokens: number
|
|
}> | null
|
|
}
|
|
).iterations
|
|
if (iterations && iterations.length > 0) {
|
|
const last = iterations.at(-1)!
|
|
return last.input_tokens + last.output_tokens
|
|
}
|
|
// No iterations → no server tool loop → top-level usage IS the final
|
|
// window. Match the iterations path's formula (input + output, no cache)
|
|
// rather than getTokenCountFromUsage — #304930 defines final window as
|
|
// non-cache input + output. Whether the server's budget countdown
|
|
// (renderer.py:292 calculate_context_tokens) counts cache the same way
|
|
// is an open question; aligning with the iterations path keeps the two
|
|
// branches consistent until that's resolved.
|
|
return usage.input_tokens + usage.output_tokens
|
|
}
|
|
i--
|
|
}
|
|
return 0
|
|
}
|
|
|
|
/**
|
|
* Get only the output_tokens from the last API response.
|
|
* This excludes input context (system prompt, tools, prior messages).
|
|
*
|
|
* WARNING: Do NOT use this for threshold comparisons (autocompact, session memory).
|
|
* Use tokenCountWithEstimation() instead, which measures full context size.
|
|
* This function is only useful for measuring how many tokens Claude generated
|
|
* in a single response, not how full the context window is.
|
|
*/
|
|
export function messageTokenCountFromLastAPIResponse(
|
|
messages: Message[],
|
|
): number {
|
|
let i = messages.length - 1
|
|
while (i >= 0) {
|
|
const message = messages[i]
|
|
const usage = message ? getTokenUsage(message) : undefined
|
|
if (usage) {
|
|
return usage.output_tokens
|
|
}
|
|
i--
|
|
}
|
|
return 0
|
|
}
|
|
|
|
export function getCurrentUsage(messages: Message[]): {
|
|
input_tokens: number
|
|
output_tokens: number
|
|
cache_creation_input_tokens: number
|
|
cache_read_input_tokens: number
|
|
} | null {
|
|
for (let i = messages.length - 1; i >= 0; i--) {
|
|
const message = messages[i]
|
|
const usage = message ? getTokenUsage(message) : undefined
|
|
if (usage) {
|
|
const inputTokens =
|
|
(usage.input_tokens ?? 0) +
|
|
(usage.cache_creation_input_tokens ?? 0) +
|
|
(usage.cache_read_input_tokens ?? 0)
|
|
// Skip placeholder usage (all zeros) — third-party APIs may emit
|
|
// message_start without real usage data, causing the context counter
|
|
// to flash to 0. Fall through to the previous message instead.
|
|
if (inputTokens === 0 && (usage.output_tokens ?? 0) === 0) continue
|
|
return {
|
|
input_tokens: usage.input_tokens ?? 0,
|
|
output_tokens: usage.output_tokens ?? 0,
|
|
cache_creation_input_tokens: usage.cache_creation_input_tokens ?? 0,
|
|
cache_read_input_tokens: usage.cache_read_input_tokens ?? 0,
|
|
}
|
|
}
|
|
}
|
|
return null
|
|
}
|
|
|
|
export function doesMostRecentAssistantMessageExceed200k(
|
|
messages: Message[],
|
|
): boolean {
|
|
const THRESHOLD = 200_000
|
|
|
|
const lastAsst = messages.findLast(m => m.type === 'assistant')
|
|
if (!lastAsst) return false
|
|
const usage = getTokenUsage(lastAsst)
|
|
return usage ? getTokenCountFromUsage(usage) > THRESHOLD : false
|
|
}
|
|
|
|
/**
|
|
* Calculate the character content length of an assistant message.
|
|
* Used for spinner token estimation (characters / 4 ≈ tokens).
|
|
* This is used when subagent streaming events are filtered out and we
|
|
* need to count content from completed messages instead.
|
|
*
|
|
* Counts the same content that handleMessageFromStream would count via deltas:
|
|
* - text (text_delta)
|
|
* - thinking (thinking_delta)
|
|
* - redacted_thinking data
|
|
* - tool_use input (input_json_delta)
|
|
* Note: signature_delta is excluded from streaming counts (not model output).
|
|
*/
|
|
export function getAssistantMessageContentLength(
|
|
message: AssistantMessage,
|
|
): number {
|
|
let contentLength = 0
|
|
const content = message.message?.content
|
|
if (!Array.isArray(content)) return contentLength
|
|
for (const block of content as ContentItem[]) {
|
|
if (block.type === 'text') {
|
|
contentLength += (block as ContentItem & { text: string }).text.length
|
|
} else if (block.type === 'thinking') {
|
|
contentLength += (block as ContentItem & { thinking: string }).thinking.length
|
|
} else if (block.type === 'redacted_thinking') {
|
|
contentLength += (block as ContentItem & { data: string }).data.length
|
|
} else if (block.type === 'tool_use') {
|
|
contentLength += jsonStringify((block as ContentItem & { input: unknown }).input).length
|
|
}
|
|
}
|
|
return contentLength
|
|
}
|
|
|
|
/**
|
|
* Get the current context window size in tokens.
|
|
*
|
|
* This is the CANONICAL function for measuring context size when checking
|
|
* thresholds (autocompact, session memory init, etc.). Uses the last API
|
|
* response's token count (input + output + cache) plus estimates for any
|
|
* messages added since.
|
|
*
|
|
* Always use this instead of:
|
|
* - Cumulative token counting (which double-counts as context grows)
|
|
* - messageTokenCountFromLastAPIResponse (which only counts output_tokens)
|
|
* - tokenCountFromLastAPIResponse (which doesn't estimate new messages)
|
|
*
|
|
* Implementation note on parallel tool calls: when the model makes multiple
|
|
* tool calls in one response, the streaming code emits a SEPARATE assistant
|
|
* record per content block (all sharing the same message.id and usage), and
|
|
* the query loop interleaves each tool_result immediately after its tool_use.
|
|
* So the messages array looks like:
|
|
* [..., assistant(id=A), user(result), assistant(id=A), user(result), ...]
|
|
* If we stop at the LAST assistant record, we only estimate the one tool_result
|
|
* after it and miss all the earlier interleaved tool_results — which will ALL
|
|
* be in the next API request. To avoid undercounting, after finding a usage-
|
|
* bearing record we walk back to the FIRST sibling with the same message.id
|
|
* so every interleaved tool_result is included in the rough estimate.
|
|
*/
|
|
export function tokenCountWithEstimation(messages: readonly Message[]): number {
|
|
let i = messages.length - 1
|
|
while (i >= 0) {
|
|
const message = messages[i]
|
|
const usage = message ? getTokenUsage(message) : undefined
|
|
if (message && usage) {
|
|
// Walk back past any earlier sibling records split from the same API
|
|
// response (same message.id) so interleaved tool_results between them
|
|
// are included in the estimation slice.
|
|
const responseId = getAssistantMessageId(message)
|
|
if (responseId) {
|
|
let j = i - 1
|
|
while (j >= 0) {
|
|
const prior = messages[j]
|
|
const priorId = prior ? getAssistantMessageId(prior) : undefined
|
|
if (priorId === responseId) {
|
|
// Earlier split of the same API response — anchor here instead.
|
|
i = j
|
|
} else if (priorId !== undefined) {
|
|
// Hit a different API response — stop walking.
|
|
break
|
|
}
|
|
// priorId === undefined: a user/tool_result/attachment message,
|
|
// possibly interleaved between splits — keep walking.
|
|
j--
|
|
}
|
|
}
|
|
return (
|
|
getTokenCountFromUsage(usage) +
|
|
roughTokenCountEstimationForMessages(messages.slice(i + 1) as Parameters<typeof roughTokenCountEstimationForMessages>[0])
|
|
)
|
|
}
|
|
i--
|
|
}
|
|
return roughTokenCountEstimationForMessages(messages as Parameters<typeof roughTokenCountEstimationForMessages>[0])
|
|
}
|