Files
claude-code/src/utils/context.ts
claude-code-best e4ce08fe39 Fixture/langfuse record auto mode data error (#308)
* fix: 修复状态栏 context 计数器在 loading 时闪现为 0 的问题

第三方 API(如智谱)在 message_start 中可能不返回完整 usage 数据,
导致 getCurrentUsage 返回全零 usage 对象,使 ctx 显示为 0%。

双重保护:
- getCurrentUsage: 跳过全零 usage,继续往前找有真实数据的 message
- calculateContextPercentages: totalInputTokens 为 0 时返回 null

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix: 外部化 ESM 包使用 createRequire 替代裸 require

color-diff-napi、image-processor-napi、audio-capture-napi 声明
"type": "module" 但使用裸 require(),Node.js ESM 中 require
不可用。改用 createRequire(import.meta.url) 或顶层 import。

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix: getDefaultSonnetModel 优先使用用户配置的模型,修复第三方 provider 模型不存在错误

当用户通过 ANTHROPIC_MODEL 或 settings 配置了自定义 provider 支持的模型时,
getDefaultSonnetModel/Haiku/Opus 现在会优先使用该配置,而非硬编码 Anthropic 官方模型 ID。
同时改进 Langfuse 可观测性:sideQuery 失败时记录错误信息到 span,
optional 模式下标记 WARNING 而非 ERROR。

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix: 将 auto_mode classifier 的 side-query span 绑定到父 trace

classifyYoloAction 及 classifyYoloActionXml 接收 parentSpan 参数,
透传给 sideQuery 调用,使 auto_mode 的 side-query span 嵌套在主 agent trace 下。

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix: 穷鬼模式下跳过 memdir_relevance side-query

Poor mode 启用时不执行 findRelevantMemories 的预取调用,
避免额外的 API token 消耗。

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* chore: 添加 test:all 脚本用于完成任务后的全量检查

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix: Vite 构建补齐缺失的 feature flags,修复 auto mode 不可见

Vite 构建插件的 DEFAULT_BUILD_FEATURES 缺少 BUDDY、TRANSCRIPT_CLASSIFIER、
BRIDGE_MODE、ACP、BG_SESSIONS、TEMPLATES,导致 feature('TRANSCRIPT_CLASSIFIER')
被替换为 false,auto mode 从 Shift+Tab 循环中消失。与 build.ts 对齐。

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix: 统一 feature flags 到 defines.ts,修复 Vite 构建缺失 auto mode

将 DEFAULT_BUILD_FEATURES 列表从 build.ts、dev.ts、vite-plugin-feature-flags.ts
三处内联定义统一到 scripts/defines.ts 单一导出。之前的 Vite 插件缺少
TRANSCRIPT_CLASSIFIER 等 feature flag,导致 auto mode 在 Vite 构建中不可见。

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-20 13:30:05 +08:00

229 lines
7.0 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// biome-ignore-all assist/source/organizeImports: ANT-ONLY import markers must not be reordered
import { CONTEXT_1M_BETA_HEADER } from '../constants/betas.js'
import { getGlobalConfig } from './config.js'
import { isEnvTruthy } from './envUtils.js'
import { getCanonicalName } from './model/model.js'
import { resolveAntModel } from './model/antModels.js'
import { getModelCapability } from './model/modelCapabilities.js'
// Model context window size (200k tokens for all models right now)
export const MODEL_CONTEXT_WINDOW_DEFAULT = 200_000
// Maximum output tokens for compact operations
export const COMPACT_MAX_OUTPUT_TOKENS = 20_000
// Default max output tokens
const MAX_OUTPUT_TOKENS_DEFAULT = 32_000
const MAX_OUTPUT_TOKENS_UPPER_LIMIT = 64_000
// Capped default for slot-reservation optimization. BQ p99 output = 4,911
// tokens, so 32k/64k defaults over-reserve 8-16× slot capacity. With the cap
// enabled, <1% of requests hit the limit; those get one clean retry at 64k
// (see query.ts max_output_tokens_escalate). Cap is applied in
// claude.ts:getMaxOutputTokensForModel to avoid the growthbook→betas→context
// import cycle.
export const CAPPED_DEFAULT_MAX_TOKENS = 8_000
export const ESCALATED_MAX_TOKENS = 64_000
/**
* Check if 1M context is disabled via environment variable.
* Used by C4E admins to disable 1M context for HIPAA compliance.
*/
export function is1mContextDisabled(): boolean {
return isEnvTruthy(process.env.CLAUDE_CODE_DISABLE_1M_CONTEXT)
}
export function has1mContext(model: string): boolean {
if (is1mContextDisabled()) {
return false
}
return /\[1m\]/i.test(model)
}
// @[MODEL LAUNCH]: Update this pattern if the new model supports 1M context
export function modelSupports1M(model: string): boolean {
if (is1mContextDisabled()) {
return false
}
const canonical = getCanonicalName(model)
return canonical.includes('claude-sonnet-4') || canonical.includes('opus-4-6')
}
export function getContextWindowForModel(
model: string,
betas?: string[],
): number {
// Allow override via environment variable (ant-only)
// This takes precedence over all other context window resolution, including 1M detection,
// so users can cap the effective context window for local decisions (auto-compact, etc.)
// while still using a 1M-capable endpoint.
if (
process.env.USER_TYPE === 'ant' &&
process.env.CLAUDE_CODE_MAX_CONTEXT_TOKENS
) {
const override = parseInt(process.env.CLAUDE_CODE_MAX_CONTEXT_TOKENS, 10)
if (!isNaN(override) && override > 0) {
return override
}
}
// [1m] suffix — explicit client-side opt-in, respected over all detection
if (has1mContext(model)) {
return 1_000_000
}
const cap = getModelCapability(model)
if (cap?.max_input_tokens && cap.max_input_tokens >= 100_000) {
if (
cap.max_input_tokens > MODEL_CONTEXT_WINDOW_DEFAULT &&
is1mContextDisabled()
) {
return MODEL_CONTEXT_WINDOW_DEFAULT
}
return cap.max_input_tokens
}
if (betas?.includes(CONTEXT_1M_BETA_HEADER) && modelSupports1M(model)) {
return 1_000_000
}
if (getSonnet1mExpTreatmentEnabled(model)) {
return 1_000_000
}
if (process.env.USER_TYPE === 'ant') {
const antModel = resolveAntModel(model)
if (antModel?.contextWindow) {
return antModel.contextWindow
}
}
return MODEL_CONTEXT_WINDOW_DEFAULT
}
export function getSonnet1mExpTreatmentEnabled(model: string): boolean {
if (is1mContextDisabled()) {
return false
}
// Only applies to sonnet 4.6 without an explicit [1m] suffix
if (has1mContext(model)) {
return false
}
if (!getCanonicalName(model).includes('sonnet-4-6')) {
return false
}
return getGlobalConfig().clientDataCache?.['coral_reef_sonnet'] === 'true'
}
/**
* Calculate context window usage percentage from token usage data.
* Returns used and remaining percentages, or null values if no usage data.
*/
export function calculateContextPercentages(
currentUsage: {
input_tokens: number
cache_creation_input_tokens: number
cache_read_input_tokens: number
} | null,
contextWindowSize: number,
): { used: number | null; remaining: number | null } {
if (!currentUsage) {
return { used: null, remaining: null }
}
const totalInputTokens =
currentUsage.input_tokens +
currentUsage.cache_creation_input_tokens +
currentUsage.cache_read_input_tokens
// Treat zero input tokens the same as no usage data — avoids flashing
// "ctx:0%" when a third-party API omits usage from message_start.
if (totalInputTokens === 0) {
return { used: null, remaining: null }
}
const usedPercentage = Math.round(
(totalInputTokens / contextWindowSize) * 100,
)
const clampedUsed = Math.min(100, Math.max(0, usedPercentage))
return {
used: clampedUsed,
remaining: 100 - clampedUsed,
}
}
/**
* Returns the model's default and upper limit for max output tokens.
*/
export function getModelMaxOutputTokens(model: string): {
default: number
upperLimit: number
} {
let defaultTokens: number
let upperLimit: number
if (process.env.USER_TYPE === 'ant') {
const antModel = resolveAntModel(model.toLowerCase())
if (antModel) {
defaultTokens = antModel.defaultMaxTokens ?? MAX_OUTPUT_TOKENS_DEFAULT
upperLimit = antModel.upperMaxTokensLimit ?? MAX_OUTPUT_TOKENS_UPPER_LIMIT
return { default: defaultTokens, upperLimit }
}
}
const m = getCanonicalName(model)
if (m.includes('opus-4-6')) {
defaultTokens = 64_000
upperLimit = 128_000
} else if (m.includes('sonnet-4-6')) {
defaultTokens = 32_000
upperLimit = 128_000
} else if (
m.includes('opus-4-5') ||
m.includes('sonnet-4') ||
m.includes('haiku-4')
) {
defaultTokens = 32_000
upperLimit = 64_000
} else if (m.includes('opus-4-1') || m.includes('opus-4')) {
defaultTokens = 32_000
upperLimit = 32_000
} else if (m.includes('claude-3-opus')) {
defaultTokens = 4_096
upperLimit = 4_096
} else if (m.includes('claude-3-sonnet')) {
defaultTokens = 8_192
upperLimit = 8_192
} else if (m.includes('claude-3-haiku')) {
defaultTokens = 4_096
upperLimit = 4_096
} else if (m.includes('3-5-sonnet') || m.includes('3-5-haiku')) {
defaultTokens = 8_192
upperLimit = 8_192
} else if (m.includes('3-7-sonnet')) {
defaultTokens = 32_000
upperLimit = 64_000
} else {
defaultTokens = MAX_OUTPUT_TOKENS_DEFAULT
upperLimit = MAX_OUTPUT_TOKENS_UPPER_LIMIT
}
const cap = getModelCapability(model)
if (cap?.max_tokens && cap.max_tokens >= 4_096) {
upperLimit = cap.max_tokens
defaultTokens = Math.min(defaultTokens, upperLimit)
}
return { default: defaultTokens, upperLimit }
}
/**
* Returns the max thinking budget tokens for a given model. The max
* thinking tokens should be strictly less than the max output tokens.
*
* Deprecated since newer models use adaptive thinking rather than a
* strict thinking token budget.
*/
export function getMaxThinkingTokensForModel(model: string): number {
return getModelMaxOutputTokens(model).upperLimit - 1
}