Fixture/langfuse record auto mode data error (#308)

* fix: 修复状态栏 context 计数器在 loading 时闪现为 0 的问题第三方 API（如智谱）在 message_start 中可能不返回完整 usage 数据，导致 getCurrentUsage 返回全零 usage 对象，使 ctx 显示为 0%。双重保护： - getCurrentUsage: 跳过全零 usage，继续往前找有真实数据的 message - calculateContextPercentages: totalInputTokens 为 0 时返回 null Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: 外部化 ESM 包使用 createRequire 替代裸 require color-diff-napi、image-processor-napi、audio-capture-napi 声明 "type": "module" 但使用裸 require()，Node.js ESM 中 require 不可用。改用 createRequire(import.meta.url) 或顶层 import。 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: getDefaultSonnetModel 优先使用用户配置的模型，修复第三方 provider 模型不存在错误当用户通过 ANTHROPIC_MODEL 或 settings 配置了自定义 provider 支持的模型时， getDefaultSonnetModel/Haiku/Opus 现在会优先使用该配置，而非硬编码 Anthropic 官方模型 ID。同时改进 Langfuse 可观测性：sideQuery 失败时记录错误信息到 span， optional 模式下标记 WARNING 而非 ERROR。 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: 将 auto_mode classifier 的 side-query span 绑定到父 trace classifyYoloAction 及 classifyYoloActionXml 接收 parentSpan 参数，透传给 sideQuery 调用，使 auto_mode 的 side-query span 嵌套在主 agent trace 下。 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: 穷鬼模式下跳过 memdir_relevance side-query Poor mode 启用时不执行 findRelevantMemories 的预取调用，避免额外的 API token 消耗。 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * chore: 添加 test:all 脚本用于完成任务后的全量检查 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: Vite 构建补齐缺失的 feature flags，修复 auto mode 不可见 Vite 构建插件的 DEFAULT_BUILD_FEATURES 缺少 BUDDY、TRANSCRIPT_CLASSIFIER、 BRIDGE_MODE、ACP、BG_SESSIONS、TEMPLATES，导致 feature('TRANSCRIPT_CLASSIFIER') 被替换为 false，auto mode 从 Shift+Tab 循环中消失。与 build.ts 对齐。 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: 统一 feature flags 到 defines.ts，修复 Vite 构建缺失 auto mode 将 DEFAULT_BUILD_FEATURES 列表从 build.ts、dev.ts、vite-plugin-feature-flags.ts 三处内联定义统一到 scripts/defines.ts 单一导出。之前的 Vite 插件缺少 TRANSCRIPT_CLASSIFIER 等 feature flag，导致 auto mode 在 Vite 构建中不可见。 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-06-15 12:55:51 +00:00 · 2026-04-20 13:30:05 +08:00
parent 92f8a92fbb
commit e4ce08fe39
19 changed files with 231 additions and 137 deletions
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -58,6 +58,9 @@ bun run health
 # Check unused exports
 bun run check:unused

+# Full check (typecheck + lint + test) — run after completing any task
+bun run test:all
+
 bun run typecheck

 # Remote Control Server
--- a/build.ts
+++ b/build.ts
@@ -1,6 +1,7 @@
 import { readdir, readFile, writeFile, cp } from 'fs/promises'
 import { join } from 'path'
 import { getMacroDefines } from './scripts/defines.ts'
+import { DEFAULT_BUILD_FEATURES } from './scripts/defines.ts'

 const outdir = 'dist'

@@ -8,48 +9,6 @@ const outdir = 'dist'
 const { rmSync } = await import('fs')
 rmSync(outdir, { recursive: true, force: true })

-// Default features that match the official CLI build.
-// Additional features can be enabled via FEATURE_<NAME>=1 env vars.
-const DEFAULT_BUILD_FEATURES = [
-  'BUDDY', 'TRANSCRIPT_CLASSIFIER', 'BRIDGE_MODE',
-  'AGENT_TRIGGERS_REMOTE',
-  'CHICAGO_MCP',
-  'VOICE_MODE',
-  'SHOT_STATS',
-  'PROMPT_CACHE_BREAK_DETECTION',
-  'TOKEN_BUDGET',
-  // P0: local features
-  'AGENT_TRIGGERS',
-  'ULTRATHINK',
-  'BUILTIN_EXPLORE_PLAN_AGENTS',
-  'LODESTONE',
-  // P1: API-dependent features
-  'EXTRACT_MEMORIES',
-  'VERIFICATION_AGENT',
-  'KAIROS_BRIEF',
-  'AWAY_SUMMARY',
-  'ULTRAPLAN',
-  // P2: daemon + remote control server
-  'DAEMON',
-  // ACP (Agent Client Protocol) agent mode
-  'ACP',
-  // PR-package restored features
-  'WORKFLOW_SCRIPTS',
-  'HISTORY_SNIP',
-  'CONTEXT_COLLAPSE',
-  'MONITOR_TOOL',
-  'FORK_SUBAGENT',
-//   'UDS_INBOX',
-  'KAIROS',
-  'COORDINATOR_MODE',
-  'LAN_PIPES',
-  'BG_SESSIONS',
-  'TEMPLATES',
-  // 'REVIEW_ARTIFACT', // API 请求无响应，需进一步排查 schema 兼容性
-  // P3: poor mode (disable extract_memories + prompt_suggestion)
-  'POOR',
-]
-
 // Collect FEATURE_* env vars → Bun.build features
 const envFeatures = Object.keys(process.env)
  .filter(k => k.startsWith('FEATURE_'))
--- a/package.json
+++ b/package.json
@@ -58,6 +58,7 @@
    "postinstall": "node scripts/run-parallel.mjs scripts/postinstall.cjs scripts/setup-chrome-mcp.mjs",
    "docs:dev": "npx mintlify dev",
    "typecheck": "tsc --noEmit",
+    "test:all": "bun run typecheck && bun test",
    "rcs": "bun run scripts/rcs.ts"
  },
  "dependencies": {
--- a/packages/audio-capture-napi/src/index.ts
+++ b/packages/audio-capture-napi/src/index.ts
@@ -1,3 +1,9 @@
+import { createRequire } from 'node:module'
+
+// createRequire works in both Bun and Node.js ESM contexts.
+// Needed because this package is "type": "module" but uses require() for
+// loading native .node addons — bare require is not available in Node.js ESM.
+const nodeRequire = createRequire(import.meta.url)

 type AudioCaptureNapi = {
  startRecording(
@@ -41,7 +47,7 @@ function loadModule(): AudioCaptureNapi | null {
  if (process.env.AUDIO_CAPTURE_NODE_PATH) {
    try {
      // eslint-disable-next-line @typescript-eslint/no-require-imports
-      cachedModule = require(
+      cachedModule = nodeRequire(
        process.env.AUDIO_CAPTURE_NODE_PATH,
      ) as AudioCaptureNapi
      return cachedModule
@@ -63,7 +69,7 @@ function loadModule(): AudioCaptureNapi | null {
  for (const p of fallbacks) {
    try {
      // eslint-disable-next-line @typescript-eslint/no-require-imports
-      cachedModule = require(p) as AudioCaptureNapi
+      cachedModule = nodeRequire(p) as AudioCaptureNapi
      return cachedModule
    } catch {
      // try next
--- a/packages/color-diff-napi/src/index.ts
+++ b/packages/color-diff-napi/src/index.ts
@@ -17,10 +17,16 @@
 *   getSyntaxTheme always returns the default for the given Claude theme.
 */

+import { createRequire } from 'node:module'
 import { diffArrays } from 'diff'
 import type * as hljsNamespace from 'highlight.js'
 import { basename, extname } from 'path'

+// createRequire works in both Bun and Node.js ESM contexts.
+// Needed because this package is "type": "module" but uses require() for
+// lazy loading — bare require is not available in Node.js ESM.
+const nodeRequire = createRequire(import.meta.url)
+
 // Lazy: defers loading highlight.js until first render. The full bundle
 // registers 190+ language grammars at require time (~50MB, 100-200ms on
 // macOS, several× that on Windows). With a top-level import, any caller
@@ -34,8 +40,7 @@ type HLJSApi = typeof hljsNamespace.default
 let cachedHljs: HLJSApi | null = null
 function hljs(): HLJSApi {
  if (cachedHljs) return cachedHljs
-  // eslint-disable-next-line @typescript-eslint/no-require-imports
-  const mod = require('highlight.js')
+  const mod = nodeRequire('highlight.js')
  // highlight.js uses `export =` (CJS). Under bun/ESM the interop wraps it
  // in .default; under node CJS the module IS the API. Check at runtime.
  cachedHljs = 'default' in mod && mod.default ? mod.default : mod
--- a/packages/image-processor-napi/src/index.ts
+++ b/packages/image-processor-napi/src/index.ts
@@ -1,3 +1,4 @@
+import { readFileSync, unlinkSync } from 'node:fs'
 import sharpModule from 'sharp'

 export const sharp = sharpModule
@@ -62,13 +63,11 @@ return "${tmpPath}"
        }

        const file = Bun.file(tmpPath)
-        // Use synchronous read via Node compat
-        const fs = require('fs')
-        const buffer: Buffer = fs.readFileSync(tmpPath)
+        const buffer: Buffer = readFileSync(tmpPath)

        // Clean up temp file
        try {
-          fs.unlinkSync(tmpPath)
+          unlinkSync(tmpPath)
        } catch {
          // ignore cleanup errors
        }
--- a/scripts/defines.ts
+++ b/scripts/defines.ts
@@ -16,3 +16,52 @@ export function getMacroDefines(): Record<string, string> {
        "MACRO.VERSION_CHANGELOG": JSON.stringify(""),
    };
 }
+
+/**
+ * Default feature flags enabled in both Bun.build and Vite builds.
+ * Additional features can be enabled via FEATURE_<NAME>=1 env vars.
+ *
+ * Used by:
+ *   - build.ts (Bun.build)
+ *   - scripts/vite-plugin-feature-flags.ts (Vite/Rollup)
+ *   - scripts/dev.ts (bun run dev)
+ */
+export const DEFAULT_BUILD_FEATURES = [
+    'BUDDY', 'TRANSCRIPT_CLASSIFIER', 'BRIDGE_MODE',
+    'AGENT_TRIGGERS_REMOTE',
+    'CHICAGO_MCP',
+    'VOICE_MODE',
+    'SHOT_STATS',
+    'PROMPT_CACHE_BREAK_DETECTION',
+    'TOKEN_BUDGET',
+    // P0: local features
+    'AGENT_TRIGGERS',
+    'ULTRATHINK',
+    'BUILTIN_EXPLORE_PLAN_AGENTS',
+    'LODESTONE',
+    // P1: API-dependent features
+    'EXTRACT_MEMORIES',
+    'VERIFICATION_AGENT',
+    'KAIROS_BRIEF',
+    'AWAY_SUMMARY',
+    'ULTRAPLAN',
+    // P2: daemon + remote control server
+    'DAEMON',
+    // ACP (Agent Client Protocol) agent mode
+    'ACP',
+    // PR-package restored features
+    'WORKFLOW_SCRIPTS',
+    'HISTORY_SNIP',
+    'CONTEXT_COLLAPSE',
+    'MONITOR_TOOL',
+    'FORK_SUBAGENT',
+    // 'UDS_INBOX',
+    'KAIROS',
+    'COORDINATOR_MODE',
+    'LAN_PIPES',
+    'BG_SESSIONS',
+    'TEMPLATES',
+    // 'REVIEW_ARTIFACT', // API 请求无响应，需进一步排查 schema 兼容性
+    // P3: poor mode (disable extract_memories + prompt_suggestion)
+    'POOR',
+] as const;
--- a/scripts/dev.ts
+++ b/scripts/dev.ts
@@ -6,7 +6,7 @@
 */
 import { join, dirname } from "node:path";
 import { fileURLToPath } from "node:url";
-import { getMacroDefines } from "./defines.ts";
+import { getMacroDefines, DEFAULT_BUILD_FEATURES } from "./defines.ts";

 // Resolve project root from this script's location
 const __filename = fileURLToPath(import.meta.url);
@@ -22,39 +22,7 @@ const defineArgs = Object.entries(defines).flatMap(([k, v]) => [
 ]);

 // Bun --feature flags: enable feature() gates at runtime.
-// Default features enabled in dev mode.
-const DEFAULT_FEATURES = [
-  "BUDDY", "TRANSCRIPT_CLASSIFIER", "BRIDGE_MODE",
-  "AGENT_TRIGGERS_REMOTE", "CHICAGO_MCP", "VOICE_MODE",
-  "SHOT_STATS", "PROMPT_CACHE_BREAK_DETECTION", "TOKEN_BUDGET",
-  // P0: local features
-  "AGENT_TRIGGERS",
-  "ULTRATHINK",
-  "BUILTIN_EXPLORE_PLAN_AGENTS",
-  "LODESTONE",
-  // P1: API-dependent features
-  "EXTRACT_MEMORIES", "VERIFICATION_AGENT",
-  "KAIROS_BRIEF", "AWAY_SUMMARY", "ULTRAPLAN",
-  // P2: daemon + remote control server
-  "DAEMON",
-  // ACP (Agent Client Protocol) agent mode
-  "ACP",
-  // PR-package restored features
-  "WORKFLOW_SCRIPTS",
-  "HISTORY_SNIP",
-  "CONTEXT_COLLAPSE",
-  "MONITOR_TOOL",
-  "FORK_SUBAGENT",
-  "UDS_INBOX",
-  "KAIROS",
-  "COORDINATOR_MODE",
-  "LAN_PIPES",
-  "BG_SESSIONS",
-  "TEMPLATES",
-  // "REVIEW_ARTIFACT", // API 请求无响应，需进一步排查 schema 兼容性
-  // P3: poor mode (disable extract_memories + prompt_suggestion)
-  "POOR",
-];
+// Uses the shared DEFAULT_BUILD_FEATURES list from defines.ts.

 // Any env var matching FEATURE_<NAME>=1 will also enable that feature.
 // e.g. FEATURE_PROACTIVE=1 bun run dev
@@ -62,7 +30,7 @@ const envFeatures = Object.entries(process.env)
    .filter(([k]) => k.startsWith("FEATURE_"))
    .map(([k]) => k.replace("FEATURE_", ""));

-const allFeatures = [...new Set([...DEFAULT_FEATURES, ...envFeatures])];
+const allFeatures = [...new Set([...DEFAULT_BUILD_FEATURES, ...envFeatures])];
 const featureArgs = allFeatures.flatMap((name) => ["--feature", name]);

 // If BUN_INSPECT is set, pass --inspect-wait to the child process
--- a/scripts/vite-plugin-feature-flags.ts
+++ b/scripts/vite-plugin-feature-flags.ts
@@ -1,41 +1,5 @@
 import type { Plugin } from "rollup";
-
-/**
- * Default features that match the official CLI build.
- * Additional features can be enabled via FEATURE_<NAME>=1 env vars.
- */
-const DEFAULT_BUILD_FEATURES = [
-  "AGENT_TRIGGERS_REMOTE",
-  "CHICAGO_MCP",
-  "VOICE_MODE",
-  "SHOT_STATS",
-  "PROMPT_CACHE_BREAK_DETECTION",
-  "TOKEN_BUDGET",
-  // P0: local features
-  "AGENT_TRIGGERS",
-  "ULTRATHINK",
-  "BUILTIN_EXPLORE_PLAN_AGENTS",
-  "LODESTONE",
-  // P1: API-dependent features
-  "EXTRACT_MEMORIES",
-  "VERIFICATION_AGENT",
-  "KAIROS_BRIEF",
-  "AWAY_SUMMARY",
-  "ULTRAPLAN",
-  // P2: daemon + remote control server
-  "DAEMON",
-  // PR-package restored features
-  "WORKFLOW_SCRIPTS",
-  "HISTORY_SNIP",
-  "CONTEXT_COLLAPSE",
-  "MONITOR_TOOL",
-  "FORK_SUBAGENT",
-  "KAIROS",
-  "COORDINATOR_MODE",
-  "LAN_PIPES",
-  // P3: poor mode
-  "POOR",
-];
+import { DEFAULT_BUILD_FEATURES } from "./defines.ts";

 /**
 * Collect enabled feature flags from defaults + env vars.
--- a/src/memdir/findRelevantMemories.ts
+++ b/src/memdir/findRelevantMemories.ts
@@ -3,6 +3,7 @@ import { logForDebugging } from '../utils/debug.js'
 import { errorMessage } from '../utils/errors.js'
 import { getDefaultSonnetModel } from '../utils/model/model.js'
 import { sideQuery } from '../utils/sideQuery.js'
+import type { LangfuseSpan } from '../services/langfuse/index.js'
 import { jsonParse } from '../utils/slowOperations.js'
 import {
  formatMemoryManifest,
@@ -42,6 +43,7 @@ export async function findRelevantMemories(
  signal: AbortSignal,
  recentTools: readonly string[] = [],
  alreadySurfaced: ReadonlySet<string> = new Set(),
+  parentSpan?: LangfuseSpan | null,
 ): Promise<RelevantMemory[]> {
  const memories = (await scanMemoryFiles(memoryDir, signal)).filter(
    m => !alreadySurfaced.has(m.filePath),
@@ -55,6 +57,7 @@ export async function findRelevantMemories(
    memories,
    signal,
    recentTools,
+    parentSpan,
  )
  const byFilename = new Map(memories.map(m => [m.filename, m]))
  const selected = selectedFilenames
@@ -79,6 +82,7 @@ async function selectRelevantMemories(
  memories: MemoryHeader[],
  signal: AbortSignal,
  recentTools: readonly string[],
+  parentSpan?: LangfuseSpan | null,
 ): Promise<string[]> {
  const validFilenames = new Set(memories.map(m => m.filename))

@@ -119,6 +123,8 @@ async function selectRelevantMemories(
      },
      signal,
      querySource: 'memdir_relevance',
+      optional: true,
+      parentSpan,
    })

    const textBlock = result.content.find(block => block.type === 'text')
--- a/src/services/langfuse/index.ts
+++ b/src/services/langfuse/index.ts
@@ -1,4 +1,4 @@
 export { initLangfuse, shutdownLangfuse, isLangfuseEnabled, getLangfuseProcessor } from './client.js'
-export { createTrace, createSubagentTrace, recordLLMObservation, recordToolObservation, endTrace, createToolBatchSpan, endToolBatchSpan } from './tracing.js'
+export { createTrace, createSubagentTrace, createChildSpan, recordLLMObservation, recordToolObservation, endTrace, createToolBatchSpan, endToolBatchSpan } from './tracing.js'
 export type { LangfuseSpan } from './tracing.js'
 export { sanitizeToolInput, sanitizeToolOutput, sanitizeGlobal } from './sanitize.js'
--- a/src/services/langfuse/tracing.ts
+++ b/src/services/langfuse/tracing.ts
@@ -282,6 +282,60 @@ export function createSubagentTrace(params: {
  }
 }

+/**
+ * Create a child span under a parent trace — used for side queries
+ * that should be nested under the main agent trace in Langfuse.
+ */
+export function createChildSpan(
+  parentSpan: LangfuseSpan | null,
+  params: {
+    name: string
+    sessionId: string
+    model: string
+    provider: string
+    input?: unknown
+    querySource?: string
+    username?: string
+  },
+): LangfuseSpan | null {
+  if (!parentSpan || !isLangfuseEnabled()) return null
+  try {
+    const span = startObservation(
+      params.name,
+      {
+        input: params.input,
+        metadata: {
+          provider: params.provider,
+          model: params.model,
+          querySource: params.querySource,
+        },
+      },
+      {
+        asType: 'span',
+        parentSpanContext: parentSpan.otelSpan.spanContext(),
+      },
+    ) as LangfuseSpan
+
+    // Propagate session ID and user ID from parent
+    const parent = parentSpan as unknown as RootTrace
+    const sessionId = parent._sessionId ?? params.sessionId
+    if (sessionId) {
+      span.otelSpan.setAttribute(LangfuseOtelSpanAttributes.TRACE_SESSION_ID, sessionId)
+      ;(span as unknown as RootTrace)._sessionId = sessionId
+    }
+    const userId = parent._userId ?? resolveLangfuseUserId(params.username)
+    if (userId) {
+      span.otelSpan.setAttribute(LangfuseOtelSpanAttributes.TRACE_USER_ID, userId)
+      ;(span as unknown as RootTrace)._userId = userId
+    }
+    logForDebugging(`[langfuse] Child span created: ${span.id} (parent=${parentSpan.id})`)
+    return span
+  } catch (e) {
+    logForDebugging(`[langfuse] createChildSpan failed: ${e}`, { level: 'error' })
+    return null
+  }
+}
+
 export function endTrace(
  rootSpan: LangfuseSpan | null,
  output?: unknown,
--- a/src/utils/attachments.ts
+++ b/src/utils/attachments.ts
@@ -2201,6 +2201,7 @@ async function getRelevantMemoryAttachments(
  recentTools: readonly string[],
  signal: AbortSignal,
  alreadySurfaced: ReadonlySet<string>,
+  parentSpan?: unknown,
 ): Promise<Attachment[]> {
  // If an agent is @-mentioned, search only its memory dir (isolation).
  // Otherwise search the auto-memory dir.
@@ -2221,6 +2222,7 @@ async function getRelevantMemoryAttachments(
        signal,
        recentTools,
        alreadySurfaced,
+        parentSpan as Parameters<typeof findRelevantMemories>[5],
      ).catch(() => []),
    ),
  )
@@ -2370,6 +2372,12 @@ export function startRelevantMemoryPrefetch(
    return undefined
  }

+  // Poor mode: skip the side-query to save tokens
+  const { isPoorModeActive } = require('../commands/poor/poorMode.js') as typeof import('../commands/poor/poorMode.js')
+  if (isPoorModeActive()) {
+    return undefined
+  }
+
  const lastUserMessage = messages.findLast(m => m.type === 'user' && !m.isMeta)
  if (!lastUserMessage) {
    return undefined
@@ -2397,6 +2405,7 @@ export function startRelevantMemoryPrefetch(
    collectRecentSuccessfulTools(messages, lastUserMessage),
    controller.signal,
    surfaced.paths,
+    toolUseContext.langfuseTrace,
  ).catch(e => {
    if (!isAbortError(e)) {
      logError(e)
--- a/src/utils/context.ts
+++ b/src/utils/context.ts
@@ -133,6 +133,12 @@ export function calculateContextPercentages(
    currentUsage.cache_creation_input_tokens +
    currentUsage.cache_read_input_tokens

+  // Treat zero input tokens the same as no usage data — avoids flashing
+  // "ctx:0%" when a third-party API omits usage from message_start.
+  if (totalInputTokens === 0) {
+    return { used: null, remaining: null }
+  }
+
  const usedPercentage = Math.round(
    (totalInputTokens / contextWindowSize) * 100,
  )
--- a/src/utils/model/model.ts
+++ b/src/utils/model/model.ts
@@ -126,6 +126,12 @@ export function getDefaultOpusModel(): ModelName {
  if (process.env.ANTHROPIC_DEFAULT_OPUS_MODEL) {
    return process.env.ANTHROPIC_DEFAULT_OPUS_MODEL
  }
+  // Fall back to user's configured model — custom providers may not
+  // recognize hardcoded Anthropic model IDs.
+  const userSpecifiedOpus = getUserSpecifiedModelSetting()
+  if (userSpecifiedOpus) {
+    return parseUserSpecifiedModel(userSpecifiedOpus)
+  }
  // 3P providers (Bedrock, Vertex, Foundry) — kept as a separate branch
  // even when values match, since 3P availability lags firstParty and
  // these will diverge again at the next model launch.
@@ -153,6 +159,13 @@ export function getDefaultSonnetModel(): ModelName {
  if (process.env.ANTHROPIC_DEFAULT_SONNET_MODEL) {
    return process.env.ANTHROPIC_DEFAULT_SONNET_MODEL
  }
+  // Fall back to user's configured model (ANTHROPIC_MODEL / settings) —
+  // custom providers (proxies, national clouds) may not recognize the
+  // hardcoded Anthropic model IDs.
+  const userSpecified = getUserSpecifiedModelSetting()
+  if (userSpecified) {
+    return parseUserSpecifiedModel(userSpecified)
+  }
  // Default to Sonnet 4.5 for 3P since they may not have 4.6 yet
  if (provider !== 'firstParty') {
    return getModelStrings().sonnet45
@@ -175,6 +188,12 @@ export function getDefaultHaikuModel(): ModelName {
  if (process.env.ANTHROPIC_DEFAULT_HAIKU_MODEL) {
    return process.env.ANTHROPIC_DEFAULT_HAIKU_MODEL
  }
+  // Fall back to user's configured model — custom providers may not
+  // recognize hardcoded Anthropic model IDs.
+  const userSpecifiedHaiku = getUserSpecifiedModelSetting()
+  if (userSpecifiedHaiku) {
+    return parseUserSpecifiedModel(userSpecifiedHaiku)
+  }

  // Haiku 4.5 is available on all platforms (first-party, Foundry, Bedrock, Vertex)
  return getModelStrings().haiku45
--- a/src/utils/permissions/permissions.ts
+++ b/src/utils/permissions/permissions.ts
@@ -696,6 +696,7 @@ export const hasPermissionsToUseTool: CanUseToolFn = async (
          context.options.tools,
          appState.toolPermissionContext,
          context.abortController.signal,
+          context.langfuseTrace,
        )
      } finally {
        clearClassifierChecking(toolUseID)
--- a/src/utils/permissions/yoloClassifier.ts
+++ b/src/utils/permissions/yoloClassifier.ts
@@ -31,6 +31,7 @@ import { resolveAntModel } from '../model/antModels.js'
 import { getMainLoopModel } from '../model/model.js'
 import { getAutoModeConfig } from '../settings/settings.js'
 import { sideQuery } from '../sideQuery.js'
+import type { LangfuseSpan } from '../../services/langfuse/index.js'
 import { jsonStringify } from '../slowOperations.js'
 import { tokenCountWithEstimation } from '../tokens.js'
 import {
@@ -731,6 +732,7 @@ async function classifyYoloActionXml(
    action: string
  },
  mode: TwoStageMode,
+  parentSpan?: LangfuseSpan | null,
 ): Promise<YoloClassifierResult> {
  const classifierType =
    mode === 'both'
@@ -791,6 +793,7 @@ async function classifyYoloActionXml(
        signal,
        ...(mode !== 'fast' && { stop_sequences: ['</block>'] }),
        querySource: 'auto_mode',
+        parentSpan,
      }
      const stage1Raw = await sideQuery(stage1Opts)
      stage1DurationMs = Date.now() - stage1Start
@@ -877,6 +880,7 @@ async function classifyYoloActionXml(
      maxRetries: getDefaultMaxRetries(),
      signal,
      querySource: 'auto_mode' as const,
+      parentSpan,
    }
    const stage2Raw = await sideQuery(stage2Opts)
    const stage2DurationMs = Date.now() - stage2Start
@@ -1015,6 +1019,7 @@ export async function classifyYoloAction(
  tools: Tools,
  context: ToolPermissionContext,
  signal: AbortSignal,
+  parentSpan?: LangfuseSpan | null,
 ): Promise<YoloClassifierResult> {
  const lookup = buildToolLookup(tools)
  const actionCompact = toCompact(action, lookup)
@@ -1126,6 +1131,7 @@ export async function classifyYoloAction(
        action: actionCompact,
      },
      getTwoStageMode(),
+      parentSpan,
    )
  }
  const [disableThinking, thinkingPadding] = getClassifierThinkingConfig(model)
@@ -1156,6 +1162,7 @@ export async function classifyYoloAction(
      maxRetries: getDefaultMaxRetries(),
      signal,
      querySource: 'auto_mode' as const,
+      parentSpan,
    }
    const result = await sideQuery(sideQueryOpts)
    void maybeDumpAutoMode(sideQueryOpts, result, start)
--- a/src/utils/sideQuery.ts
+++ b/src/utils/sideQuery.ts
@@ -15,8 +15,11 @@ import { logEvent } from '../services/analytics/index.js'
 import type { AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS } from '../services/analytics/metadata.js'
 import { getAPIMetadata } from '../services/api/claude.js'
 import { getAnthropicClient } from '../services/api/client.js'
-import { createTrace, endTrace, recordLLMObservation } from '../services/langfuse/index.js'
+import { createTrace, createChildSpan, endTrace, recordLLMObservation } from '../services/langfuse/index.js'
+import type { LangfuseSpan } from '../services/langfuse/index.js'
+import { convertMessagesToLangfuse, convertOutputToLangfuse, convertToolsToLangfuse } from '../services/langfuse/convert.js'
 import { getModelBetas, modelSupportsStructuredOutputs } from './betas.js'
+import { errorMessage } from './errors.js'
 import { computeFingerprint } from './fingerprint.js'
 import { getAPIProvider } from './model/providers.js'
 import { normalizeModelStringForAPI } from './model/model.js'
@@ -64,6 +67,11 @@ export type SideQueryOptions = {
  stop_sequences?: string[]
  /** Attributes this call in tengu_api_success for COGS joining against reporting.sampling_calls. */
  querySource: QuerySource
+  /** Parent Langfuse span to nest this side query under the main agent trace. */
+  parentSpan?: LangfuseSpan | null
+  /** When true, API failures are recorded as WARNING instead of ERROR in Langfuse.
+   *  Use for optional/best-effort queries where failure is expected and handled gracefully. */
+  optional?: boolean
 }

 /**
@@ -182,13 +190,25 @@ export async function sideQuery(opts: SideQueryOptions): Promise<BetaMessage> {
  const normalizedModel = normalizeModelStringForAPI(model)
  const provider = getAPIProvider()
  const start = Date.now()
-  const langfuseTrace = createTrace({
-    sessionId: getSessionId(),
-    model: normalizedModel,
-    provider,
-    name: `side-query:${opts.querySource}`,
-    querySource: opts.querySource,
-  })
+  const traceName = `side-query:${opts.querySource}`
+
+  // When parentSpan is provided, create a child span nested under the
+  // main agent trace; otherwise create a standalone root trace.
+  const langfuseTrace = opts.parentSpan
+    ? createChildSpan(opts.parentSpan, {
+        name: traceName,
+        sessionId: getSessionId(),
+        model: normalizedModel,
+        provider,
+        querySource: opts.querySource,
+      })
+    : createTrace({
+        sessionId: getSessionId(),
+        model: normalizedModel,
+        provider,
+        name: traceName,
+        querySource: opts.querySource,
+      })

  let response: BetaMessage
  try {
@@ -210,7 +230,7 @@ export async function sideQuery(opts: SideQueryOptions): Promise<BetaMessage> {
      { signal },
    )
  } catch (error) {
-    endTrace(langfuseTrace, undefined, 'error')
+    endTrace(langfuseTrace, { error: errorMessage(error) }, opts.optional ? 'interrupted' : 'error')
    throw error
  }

@@ -235,12 +255,21 @@ export async function sideQuery(opts: SideQueryOptions): Promise<BetaMessage> {
  })
  setLastApiCompletionTimestamp(now)

-  // Record LLM observation in Langfuse (no-op if not configured)
+  // Record LLM observation in Langfuse (no-op if not configured).
+  // Wrap SDK types into the internal message format expected by converters.
+  const wrappedInput = messages.map(m => ({
+    type: m.role === 'assistant' ? 'assistant' as const : 'user' as const,
+    message: { role: m.role, content: m.content },
+  })) as unknown as Parameters<typeof convertMessagesToLangfuse>[0]
+  const wrappedOutput = [{
+    type: 'assistant' as const,
+    message: { role: 'assistant' as const, content: response.content },
+  }] as unknown as Parameters<typeof convertOutputToLangfuse>[0]
  recordLLMObservation(langfuseTrace, {
    model: normalizedModel,
    provider,
-    input: messages,
-    output: response.content,
+    input: convertMessagesToLangfuse(wrappedInput, systemBlocks.length > 0 ? systemBlocks.map(b => b.text) : undefined),
+    output: convertOutputToLangfuse(wrappedOutput),
    usage: {
      input_tokens: response.usage.input_tokens,
      output_tokens: response.usage.output_tokens,
@@ -249,6 +278,7 @@ export async function sideQuery(opts: SideQueryOptions): Promise<BetaMessage> {
    },
    startTime: new Date(start),
    endTime: new Date(),
+    ...(tools && { tools: convertToolsToLangfuse(tools as unknown[]) }),
  })
  endTrace(langfuseTrace)

--- a/src/utils/tokens.ts
+++ b/src/utils/tokens.ts
@@ -150,9 +150,17 @@ export function getCurrentUsage(messages: Message[]): {
    const message = messages[i]
    const usage = message ? getTokenUsage(message) : undefined
    if (usage) {
+      const inputTokens =
+        (usage.input_tokens ?? 0) +
+        (usage.cache_creation_input_tokens ?? 0) +
+        (usage.cache_read_input_tokens ?? 0)
+      // Skip placeholder usage (all zeros) — third-party APIs may emit
+      // message_start without real usage data, causing the context counter
+      // to flash to 0. Fall through to the previous message instead.
+      if (inputTokens === 0 && (usage.output_tokens ?? 0) === 0) continue
      return {
-        input_tokens: usage.input_tokens,
-        output_tokens: usage.output_tokens,
+        input_tokens: usage.input_tokens ?? 0,
+        output_tokens: usage.output_tokens ?? 0,
        cache_creation_input_tokens: usage.cache_creation_input_tokens ?? 0,
        cache_read_input_tokens: usage.cache_read_input_tokens ?? 0,
      }