Fixture/langfuse record auto mode data error (#308)

* fix: 修复状态栏 context 计数器在 loading 时闪现为 0 的问题

第三方 API(如智谱)在 message_start 中可能不返回完整 usage 数据,
导致 getCurrentUsage 返回全零 usage 对象,使 ctx 显示为 0%。

双重保护:
- getCurrentUsage: 跳过全零 usage,继续往前找有真实数据的 message
- calculateContextPercentages: totalInputTokens 为 0 时返回 null

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix: 外部化 ESM 包使用 createRequire 替代裸 require

color-diff-napi、image-processor-napi、audio-capture-napi 声明
"type": "module" 但使用裸 require(),Node.js ESM 中 require
不可用。改用 createRequire(import.meta.url) 或顶层 import。

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix: getDefaultSonnetModel 优先使用用户配置的模型,修复第三方 provider 模型不存在错误

当用户通过 ANTHROPIC_MODEL 或 settings 配置了自定义 provider 支持的模型时,
getDefaultSonnetModel/Haiku/Opus 现在会优先使用该配置,而非硬编码 Anthropic 官方模型 ID。
同时改进 Langfuse 可观测性:sideQuery 失败时记录错误信息到 span,
optional 模式下标记 WARNING 而非 ERROR。

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix: 将 auto_mode classifier 的 side-query span 绑定到父 trace

classifyYoloAction 及 classifyYoloActionXml 接收 parentSpan 参数,
透传给 sideQuery 调用,使 auto_mode 的 side-query span 嵌套在主 agent trace 下。

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix: 穷鬼模式下跳过 memdir_relevance side-query

Poor mode 启用时不执行 findRelevantMemories 的预取调用,
避免额外的 API token 消耗。

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* chore: 添加 test:all 脚本用于完成任务后的全量检查

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix: Vite 构建补齐缺失的 feature flags,修复 auto mode 不可见

Vite 构建插件的 DEFAULT_BUILD_FEATURES 缺少 BUDDY、TRANSCRIPT_CLASSIFIER、
BRIDGE_MODE、ACP、BG_SESSIONS、TEMPLATES,导致 feature('TRANSCRIPT_CLASSIFIER')
被替换为 false,auto mode 从 Shift+Tab 循环中消失。与 build.ts 对齐。

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix: 统一 feature flags 到 defines.ts,修复 Vite 构建缺失 auto mode

将 DEFAULT_BUILD_FEATURES 列表从 build.ts、dev.ts、vite-plugin-feature-flags.ts
三处内联定义统一到 scripts/defines.ts 单一导出。之前的 Vite 插件缺少
TRANSCRIPT_CLASSIFIER 等 feature flag,导致 auto mode 在 Vite 构建中不可见。

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
claude-code-best
2026-04-20 13:30:05 +08:00
committed by GitHub
parent 92f8a92fbb
commit e4ce08fe39
19 changed files with 231 additions and 137 deletions

View File

@@ -3,6 +3,7 @@ import { logForDebugging } from '../utils/debug.js'
import { errorMessage } from '../utils/errors.js'
import { getDefaultSonnetModel } from '../utils/model/model.js'
import { sideQuery } from '../utils/sideQuery.js'
import type { LangfuseSpan } from '../services/langfuse/index.js'
import { jsonParse } from '../utils/slowOperations.js'
import {
formatMemoryManifest,
@@ -42,6 +43,7 @@ export async function findRelevantMemories(
signal: AbortSignal,
recentTools: readonly string[] = [],
alreadySurfaced: ReadonlySet<string> = new Set(),
parentSpan?: LangfuseSpan | null,
): Promise<RelevantMemory[]> {
const memories = (await scanMemoryFiles(memoryDir, signal)).filter(
m => !alreadySurfaced.has(m.filePath),
@@ -55,6 +57,7 @@ export async function findRelevantMemories(
memories,
signal,
recentTools,
parentSpan,
)
const byFilename = new Map(memories.map(m => [m.filename, m]))
const selected = selectedFilenames
@@ -79,6 +82,7 @@ async function selectRelevantMemories(
memories: MemoryHeader[],
signal: AbortSignal,
recentTools: readonly string[],
parentSpan?: LangfuseSpan | null,
): Promise<string[]> {
const validFilenames = new Set(memories.map(m => m.filename))
@@ -119,6 +123,8 @@ async function selectRelevantMemories(
},
signal,
querySource: 'memdir_relevance',
optional: true,
parentSpan,
})
const textBlock = result.content.find(block => block.type === 'text')

View File

@@ -1,4 +1,4 @@
export { initLangfuse, shutdownLangfuse, isLangfuseEnabled, getLangfuseProcessor } from './client.js'
export { createTrace, createSubagentTrace, recordLLMObservation, recordToolObservation, endTrace, createToolBatchSpan, endToolBatchSpan } from './tracing.js'
export { createTrace, createSubagentTrace, createChildSpan, recordLLMObservation, recordToolObservation, endTrace, createToolBatchSpan, endToolBatchSpan } from './tracing.js'
export type { LangfuseSpan } from './tracing.js'
export { sanitizeToolInput, sanitizeToolOutput, sanitizeGlobal } from './sanitize.js'

View File

@@ -282,6 +282,60 @@ export function createSubagentTrace(params: {
}
}
/**
* Create a child span under a parent trace — used for side queries
* that should be nested under the main agent trace in Langfuse.
*/
export function createChildSpan(
parentSpan: LangfuseSpan | null,
params: {
name: string
sessionId: string
model: string
provider: string
input?: unknown
querySource?: string
username?: string
},
): LangfuseSpan | null {
if (!parentSpan || !isLangfuseEnabled()) return null
try {
const span = startObservation(
params.name,
{
input: params.input,
metadata: {
provider: params.provider,
model: params.model,
querySource: params.querySource,
},
},
{
asType: 'span',
parentSpanContext: parentSpan.otelSpan.spanContext(),
},
) as LangfuseSpan
// Propagate session ID and user ID from parent
const parent = parentSpan as unknown as RootTrace
const sessionId = parent._sessionId ?? params.sessionId
if (sessionId) {
span.otelSpan.setAttribute(LangfuseOtelSpanAttributes.TRACE_SESSION_ID, sessionId)
;(span as unknown as RootTrace)._sessionId = sessionId
}
const userId = parent._userId ?? resolveLangfuseUserId(params.username)
if (userId) {
span.otelSpan.setAttribute(LangfuseOtelSpanAttributes.TRACE_USER_ID, userId)
;(span as unknown as RootTrace)._userId = userId
}
logForDebugging(`[langfuse] Child span created: ${span.id} (parent=${parentSpan.id})`)
return span
} catch (e) {
logForDebugging(`[langfuse] createChildSpan failed: ${e}`, { level: 'error' })
return null
}
}
export function endTrace(
rootSpan: LangfuseSpan | null,
output?: unknown,

View File

@@ -2201,6 +2201,7 @@ async function getRelevantMemoryAttachments(
recentTools: readonly string[],
signal: AbortSignal,
alreadySurfaced: ReadonlySet<string>,
parentSpan?: unknown,
): Promise<Attachment[]> {
// If an agent is @-mentioned, search only its memory dir (isolation).
// Otherwise search the auto-memory dir.
@@ -2221,6 +2222,7 @@ async function getRelevantMemoryAttachments(
signal,
recentTools,
alreadySurfaced,
parentSpan as Parameters<typeof findRelevantMemories>[5],
).catch(() => []),
),
)
@@ -2370,6 +2372,12 @@ export function startRelevantMemoryPrefetch(
return undefined
}
// Poor mode: skip the side-query to save tokens
const { isPoorModeActive } = require('../commands/poor/poorMode.js') as typeof import('../commands/poor/poorMode.js')
if (isPoorModeActive()) {
return undefined
}
const lastUserMessage = messages.findLast(m => m.type === 'user' && !m.isMeta)
if (!lastUserMessage) {
return undefined
@@ -2397,6 +2405,7 @@ export function startRelevantMemoryPrefetch(
collectRecentSuccessfulTools(messages, lastUserMessage),
controller.signal,
surfaced.paths,
toolUseContext.langfuseTrace,
).catch(e => {
if (!isAbortError(e)) {
logError(e)

View File

@@ -133,6 +133,12 @@ export function calculateContextPercentages(
currentUsage.cache_creation_input_tokens +
currentUsage.cache_read_input_tokens
// Treat zero input tokens the same as no usage data — avoids flashing
// "ctx:0%" when a third-party API omits usage from message_start.
if (totalInputTokens === 0) {
return { used: null, remaining: null }
}
const usedPercentage = Math.round(
(totalInputTokens / contextWindowSize) * 100,
)

View File

@@ -126,6 +126,12 @@ export function getDefaultOpusModel(): ModelName {
if (process.env.ANTHROPIC_DEFAULT_OPUS_MODEL) {
return process.env.ANTHROPIC_DEFAULT_OPUS_MODEL
}
// Fall back to user's configured model — custom providers may not
// recognize hardcoded Anthropic model IDs.
const userSpecifiedOpus = getUserSpecifiedModelSetting()
if (userSpecifiedOpus) {
return parseUserSpecifiedModel(userSpecifiedOpus)
}
// 3P providers (Bedrock, Vertex, Foundry) — kept as a separate branch
// even when values match, since 3P availability lags firstParty and
// these will diverge again at the next model launch.
@@ -153,6 +159,13 @@ export function getDefaultSonnetModel(): ModelName {
if (process.env.ANTHROPIC_DEFAULT_SONNET_MODEL) {
return process.env.ANTHROPIC_DEFAULT_SONNET_MODEL
}
// Fall back to user's configured model (ANTHROPIC_MODEL / settings) —
// custom providers (proxies, national clouds) may not recognize the
// hardcoded Anthropic model IDs.
const userSpecified = getUserSpecifiedModelSetting()
if (userSpecified) {
return parseUserSpecifiedModel(userSpecified)
}
// Default to Sonnet 4.5 for 3P since they may not have 4.6 yet
if (provider !== 'firstParty') {
return getModelStrings().sonnet45
@@ -175,6 +188,12 @@ export function getDefaultHaikuModel(): ModelName {
if (process.env.ANTHROPIC_DEFAULT_HAIKU_MODEL) {
return process.env.ANTHROPIC_DEFAULT_HAIKU_MODEL
}
// Fall back to user's configured model — custom providers may not
// recognize hardcoded Anthropic model IDs.
const userSpecifiedHaiku = getUserSpecifiedModelSetting()
if (userSpecifiedHaiku) {
return parseUserSpecifiedModel(userSpecifiedHaiku)
}
// Haiku 4.5 is available on all platforms (first-party, Foundry, Bedrock, Vertex)
return getModelStrings().haiku45

View File

@@ -696,6 +696,7 @@ export const hasPermissionsToUseTool: CanUseToolFn = async (
context.options.tools,
appState.toolPermissionContext,
context.abortController.signal,
context.langfuseTrace,
)
} finally {
clearClassifierChecking(toolUseID)

View File

@@ -31,6 +31,7 @@ import { resolveAntModel } from '../model/antModels.js'
import { getMainLoopModel } from '../model/model.js'
import { getAutoModeConfig } from '../settings/settings.js'
import { sideQuery } from '../sideQuery.js'
import type { LangfuseSpan } from '../../services/langfuse/index.js'
import { jsonStringify } from '../slowOperations.js'
import { tokenCountWithEstimation } from '../tokens.js'
import {
@@ -731,6 +732,7 @@ async function classifyYoloActionXml(
action: string
},
mode: TwoStageMode,
parentSpan?: LangfuseSpan | null,
): Promise<YoloClassifierResult> {
const classifierType =
mode === 'both'
@@ -791,6 +793,7 @@ async function classifyYoloActionXml(
signal,
...(mode !== 'fast' && { stop_sequences: ['</block>'] }),
querySource: 'auto_mode',
parentSpan,
}
const stage1Raw = await sideQuery(stage1Opts)
stage1DurationMs = Date.now() - stage1Start
@@ -877,6 +880,7 @@ async function classifyYoloActionXml(
maxRetries: getDefaultMaxRetries(),
signal,
querySource: 'auto_mode' as const,
parentSpan,
}
const stage2Raw = await sideQuery(stage2Opts)
const stage2DurationMs = Date.now() - stage2Start
@@ -1015,6 +1019,7 @@ export async function classifyYoloAction(
tools: Tools,
context: ToolPermissionContext,
signal: AbortSignal,
parentSpan?: LangfuseSpan | null,
): Promise<YoloClassifierResult> {
const lookup = buildToolLookup(tools)
const actionCompact = toCompact(action, lookup)
@@ -1126,6 +1131,7 @@ export async function classifyYoloAction(
action: actionCompact,
},
getTwoStageMode(),
parentSpan,
)
}
const [disableThinking, thinkingPadding] = getClassifierThinkingConfig(model)
@@ -1156,6 +1162,7 @@ export async function classifyYoloAction(
maxRetries: getDefaultMaxRetries(),
signal,
querySource: 'auto_mode' as const,
parentSpan,
}
const result = await sideQuery(sideQueryOpts)
void maybeDumpAutoMode(sideQueryOpts, result, start)

View File

@@ -15,8 +15,11 @@ import { logEvent } from '../services/analytics/index.js'
import type { AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS } from '../services/analytics/metadata.js'
import { getAPIMetadata } from '../services/api/claude.js'
import { getAnthropicClient } from '../services/api/client.js'
import { createTrace, endTrace, recordLLMObservation } from '../services/langfuse/index.js'
import { createTrace, createChildSpan, endTrace, recordLLMObservation } from '../services/langfuse/index.js'
import type { LangfuseSpan } from '../services/langfuse/index.js'
import { convertMessagesToLangfuse, convertOutputToLangfuse, convertToolsToLangfuse } from '../services/langfuse/convert.js'
import { getModelBetas, modelSupportsStructuredOutputs } from './betas.js'
import { errorMessage } from './errors.js'
import { computeFingerprint } from './fingerprint.js'
import { getAPIProvider } from './model/providers.js'
import { normalizeModelStringForAPI } from './model/model.js'
@@ -64,6 +67,11 @@ export type SideQueryOptions = {
stop_sequences?: string[]
/** Attributes this call in tengu_api_success for COGS joining against reporting.sampling_calls. */
querySource: QuerySource
/** Parent Langfuse span to nest this side query under the main agent trace. */
parentSpan?: LangfuseSpan | null
/** When true, API failures are recorded as WARNING instead of ERROR in Langfuse.
* Use for optional/best-effort queries where failure is expected and handled gracefully. */
optional?: boolean
}
/**
@@ -182,13 +190,25 @@ export async function sideQuery(opts: SideQueryOptions): Promise<BetaMessage> {
const normalizedModel = normalizeModelStringForAPI(model)
const provider = getAPIProvider()
const start = Date.now()
const langfuseTrace = createTrace({
sessionId: getSessionId(),
model: normalizedModel,
provider,
name: `side-query:${opts.querySource}`,
querySource: opts.querySource,
})
const traceName = `side-query:${opts.querySource}`
// When parentSpan is provided, create a child span nested under the
// main agent trace; otherwise create a standalone root trace.
const langfuseTrace = opts.parentSpan
? createChildSpan(opts.parentSpan, {
name: traceName,
sessionId: getSessionId(),
model: normalizedModel,
provider,
querySource: opts.querySource,
})
: createTrace({
sessionId: getSessionId(),
model: normalizedModel,
provider,
name: traceName,
querySource: opts.querySource,
})
let response: BetaMessage
try {
@@ -210,7 +230,7 @@ export async function sideQuery(opts: SideQueryOptions): Promise<BetaMessage> {
{ signal },
)
} catch (error) {
endTrace(langfuseTrace, undefined, 'error')
endTrace(langfuseTrace, { error: errorMessage(error) }, opts.optional ? 'interrupted' : 'error')
throw error
}
@@ -235,12 +255,21 @@ export async function sideQuery(opts: SideQueryOptions): Promise<BetaMessage> {
})
setLastApiCompletionTimestamp(now)
// Record LLM observation in Langfuse (no-op if not configured)
// Record LLM observation in Langfuse (no-op if not configured).
// Wrap SDK types into the internal message format expected by converters.
const wrappedInput = messages.map(m => ({
type: m.role === 'assistant' ? 'assistant' as const : 'user' as const,
message: { role: m.role, content: m.content },
})) as unknown as Parameters<typeof convertMessagesToLangfuse>[0]
const wrappedOutput = [{
type: 'assistant' as const,
message: { role: 'assistant' as const, content: response.content },
}] as unknown as Parameters<typeof convertOutputToLangfuse>[0]
recordLLMObservation(langfuseTrace, {
model: normalizedModel,
provider,
input: messages,
output: response.content,
input: convertMessagesToLangfuse(wrappedInput, systemBlocks.length > 0 ? systemBlocks.map(b => b.text) : undefined),
output: convertOutputToLangfuse(wrappedOutput),
usage: {
input_tokens: response.usage.input_tokens,
output_tokens: response.usage.output_tokens,
@@ -249,6 +278,7 @@ export async function sideQuery(opts: SideQueryOptions): Promise<BetaMessage> {
},
startTime: new Date(start),
endTime: new Date(),
...(tools && { tools: convertToolsToLangfuse(tools as unknown[]) }),
})
endTrace(langfuseTrace)

View File

@@ -150,9 +150,17 @@ export function getCurrentUsage(messages: Message[]): {
const message = messages[i]
const usage = message ? getTokenUsage(message) : undefined
if (usage) {
const inputTokens =
(usage.input_tokens ?? 0) +
(usage.cache_creation_input_tokens ?? 0) +
(usage.cache_read_input_tokens ?? 0)
// Skip placeholder usage (all zeros) — third-party APIs may emit
// message_start without real usage data, causing the context counter
// to flash to 0. Fall through to the previous message instead.
if (inputTokens === 0 && (usage.output_tokens ?? 0) === 0) continue
return {
input_tokens: usage.input_tokens,
output_tokens: usage.output_tokens,
input_tokens: usage.input_tokens ?? 0,
output_tokens: usage.output_tokens ?? 0,
cache_creation_input_tokens: usage.cache_creation_input_tokens ?? 0,
cache_read_input_tokens: usage.cache_read_input_tokens ?? 0,
}