feat: 增强 auto mode 的易用性 (#312)

* feat: poor 模式降级 yolo 审阅模型

* feat: 为多模块添加 Langfuse tracing 支持

在 web search、agent creation、away summary、token estimation、
skill improvement 等模块中集成 Langfuse trace,并透传至
compact/apiQueryHook/execPromptHook 等调用链。

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix: 让 auto mode 记录回主 trace

* fix: reopen auto mode prompt when classifier is unavailable

* fix: 修复 auto mode 情况下, llm 报错导致弹窗也不打开的问题

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
claude-code-best
2026-04-20 21:13:09 +08:00
committed by GitHub
parent e4ce08fe39
commit ed4bdb9338
18 changed files with 281 additions and 145 deletions

View File

@@ -9,6 +9,9 @@ import type {
} from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs' } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
import { getFeatureValue_CACHED_MAY_BE_STALE } from 'src/services/analytics/growthbook.js' import { getFeatureValue_CACHED_MAY_BE_STALE } from 'src/services/analytics/growthbook.js'
import { queryModelWithStreaming } from 'src/services/api/claude.js' import { queryModelWithStreaming } from 'src/services/api/claude.js'
import { createTrace, endTrace, isLangfuseEnabled } from 'src/services/langfuse/index.js'
import { getSessionId } from 'src/bootstrap/state.js'
import { getAPIProvider } from 'src/utils/model/providers.js'
import { createUserMessage } from 'src/utils/messages.js' import { createUserMessage } from 'src/utils/messages.js'
import { getMainLoopModel, getSmallFastModel } from 'src/utils/model/model.js' import { getMainLoopModel, getSmallFastModel } from 'src/utils/model/model.js'
import { jsonParse } from 'src/utils/slowOperations.js' import { jsonParse } from 'src/utils/slowOperations.js'
@@ -38,6 +41,15 @@ export class ApiSearchAdapter implements WebSearchAdapter {
const toolSchema = makeToolSchema({ allowedDomains, blockedDomains }) const toolSchema = makeToolSchema({ allowedDomains, blockedDomains })
const useHaiku = getFeatureValue_CACHED_MAY_BE_STALE('tengu_plum_vx3', false) const useHaiku = getFeatureValue_CACHED_MAY_BE_STALE('tengu_plum_vx3', false)
const model = useHaiku ? getSmallFastModel() : getMainLoopModel()
const langfuseTrace = isLangfuseEnabled()
? createTrace({
sessionId: getSessionId(),
model,
provider: getAPIProvider(),
name: 'web-search-tool',
})
: null
const queryStream = queryModelWithStreaming({ const queryStream = queryModelWithStreaming({
messages: [userMessage], messages: [userMessage],
@@ -58,7 +70,7 @@ export class ApiSearchAdapter implements WebSearchAdapter {
alwaysAskRules: {}, alwaysAskRules: {},
isBypassPermissionsModeAvailable: false, isBypassPermissionsModeAvailable: false,
}), }),
model: useHaiku ? getSmallFastModel() : getMainLoopModel(), model,
toolChoice: useHaiku ? { type: 'tool' as const, name: 'web_search' } : undefined, toolChoice: useHaiku ? { type: 'tool' as const, name: 'web_search' } : undefined,
isNonInteractiveSession: false, isNonInteractiveSession: false,
hasAppendSystemPrompt: false, hasAppendSystemPrompt: false,
@@ -68,6 +80,7 @@ export class ApiSearchAdapter implements WebSearchAdapter {
mcpTools: [], mcpTools: [],
agentId: undefined, agentId: undefined,
effortValue: undefined, effortValue: undefined,
langfuseTrace,
}, },
}) })
@@ -148,6 +161,8 @@ export class ApiSearchAdapter implements WebSearchAdapter {
} }
} }
endTrace(langfuseTrace)
// Extract SearchResult[] from content blocks // Extract SearchResult[] from content blocks
return extractSearchResults(allContentBlocks) return extractSearchResults(allContentBlocks)
} }

View File

@@ -277,6 +277,8 @@ export type ToolUseContext = {
criticalSystemReminder_EXPERIMENTAL?: string criticalSystemReminder_EXPERIMENTAL?: string
/** Langfuse root trace span for this query turn. Passed down to tool execution for observability. */ /** Langfuse root trace span for this query turn. Passed down to tool execution for observability. */
langfuseTrace?: LangfuseSpan | null langfuseTrace?: LangfuseSpan | null
/** Langfuse root trace span for the outer/main agent trace. Used when subagents need to nest observations under the parent agent trace. */
langfuseRootTrace?: LangfuseSpan | null
/** Langfuse batch span wrapping a concurrent tool group. When set, tool observations are nested under it. */ /** Langfuse batch span wrapping a concurrent tool group. When set, tool observations are nested under it. */
langfuseBatchSpan?: LangfuseSpan | null langfuseBatchSpan?: LangfuseSpan | null
/** When true, preserve toolUseResult on messages even for subagents. /** When true, preserve toolUseResult on messages even for subagents.

View File

@@ -6,6 +6,7 @@
import { errorMessage } from '../../utils/errors.js' import { errorMessage } from '../../utils/errors.js'
import { import {
getMainLoopModel, getMainLoopModel,
getSmallFastModel,
parseUserSpecifiedModel, parseUserSpecifiedModel,
} from '../../utils/model/model.js' } from '../../utils/model/model.js'
import { import {
@@ -14,6 +15,7 @@ import {
getDefaultExternalAutoModeRules, getDefaultExternalAutoModeRules,
} from '../../utils/permissions/yoloClassifier.js' } from '../../utils/permissions/yoloClassifier.js'
import { getAutoModeConfig } from '../../utils/settings/settings.js' import { getAutoModeConfig } from '../../utils/settings/settings.js'
import { isPoorModeActive } from '../../commands/poor/poorMode.js'
import { sideQuery } from '../../utils/sideQuery.js' import { sideQuery } from '../../utils/sideQuery.js'
import { jsonStringify } from '../../utils/slowOperations.js' import { jsonStringify } from '../../utils/slowOperations.js'
@@ -90,6 +92,8 @@ export async function autoModeCritiqueHandler(options: {
const model = options.model const model = options.model
? parseUserSpecifiedModel(options.model) ? parseUserSpecifiedModel(options.model)
: isPoorModeActive()
? getSmallFastModel()
: getMainLoopModel() : getMainLoopModel()
const defaults = getDefaultExternalAutoModeRules() const defaults = getDefaultExternalAutoModeRules()

View File

@@ -14,6 +14,9 @@ import {
type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
logEvent, logEvent,
} from '../../services/analytics/index.js' } from '../../services/analytics/index.js'
import { createTrace, endTrace, isLangfuseEnabled } from '../../services/langfuse/index.js'
import { getSessionId } from '../../bootstrap/state.js'
import { getAPIProvider } from '../../utils/model/providers.js'
import { jsonParse } from '../../utils/slowOperations.js' import { jsonParse } from '../../utils/slowOperations.js'
import { asSystemPrompt } from '../../utils/systemPromptType.js' import { asSystemPrompt } from '../../utils/systemPromptType.js'
@@ -146,6 +149,15 @@ export async function generateAgent(
? AGENT_CREATION_SYSTEM_PROMPT + AGENT_MEMORY_INSTRUCTIONS ? AGENT_CREATION_SYSTEM_PROMPT + AGENT_MEMORY_INSTRUCTIONS
: AGENT_CREATION_SYSTEM_PROMPT : AGENT_CREATION_SYSTEM_PROMPT
const langfuseTrace = isLangfuseEnabled()
? createTrace({
sessionId: getSessionId(),
model,
provider: getAPIProvider(),
name: 'agent-creation',
})
: null
const response = await queryModelWithoutStreaming({ const response = await queryModelWithoutStreaming({
messages: normalizeMessagesForAPI(messagesWithContext), messages: normalizeMessagesForAPI(messagesWithContext),
systemPrompt: asSystemPrompt([systemPrompt]), systemPrompt: asSystemPrompt([systemPrompt]),
@@ -161,9 +173,12 @@ export async function generateAgent(
hasAppendSystemPrompt: false, hasAppendSystemPrompt: false,
querySource: 'agent_creation', querySource: 'agent_creation',
mcpTools: [], mcpTools: [],
langfuseTrace,
}, },
}) })
endTrace(langfuseTrace)
const textBlocks = (Array.isArray(response.message.content) ? response.message.content : []).filter( const textBlocks = (Array.isArray(response.message.content) ? response.message.content : []).filter(
(block): block is ContentBlock & { type: 'text' } => block.type === 'text', (block): block is ContentBlock & { type: 'text' } => block.type === 'text',
) )

View File

@@ -235,6 +235,9 @@ export async function* query(
// When called as a sub-agent, langfuseTrace is already set by runAgent() // When called as a sub-agent, langfuseTrace is already set by runAgent()
// — reuse it instead of creating an independent trace. // — reuse it instead of creating an independent trace.
const ownsTrace = !params.toolUseContext.langfuseTrace const ownsTrace = !params.toolUseContext.langfuseTrace
logForDebugging(
`[query] ownsTrace=${ownsTrace} incoming langfuseTrace=${params.toolUseContext.langfuseTrace ? 'present' : 'null/undefined'} isLangfuseEnabled=${isLangfuseEnabled()}`,
)
const langfuseTrace = params.toolUseContext.langfuseTrace const langfuseTrace = params.toolUseContext.langfuseTrace
?? (isLangfuseEnabled() ?? (isLangfuseEnabled()
? createTrace({ ? createTrace({

View File

@@ -10,6 +10,9 @@ import { getSmallFastModel } from '../utils/model/model.js'
import { asSystemPrompt } from '../utils/systemPromptType.js' import { asSystemPrompt } from '../utils/systemPromptType.js'
import { getResolvedLanguage } from '../utils/language.js' import { getResolvedLanguage } from '../utils/language.js'
import { queryModelWithoutStreaming } from './api/claude.js' import { queryModelWithoutStreaming } from './api/claude.js'
import { createTrace, endTrace, isLangfuseEnabled } from './langfuse/index.js'
import { getSessionId } from '../bootstrap/state.js'
import { getAPIProvider } from '../utils/model/providers.js'
import { getSessionMemoryContent } from './SessionMemory/sessionMemoryUtils.js' import { getSessionMemoryContent } from './SessionMemory/sessionMemoryUtils.js'
// Recap only needs recent context — truncate to avoid "prompt too long" on // Recap only needs recent context — truncate to avoid "prompt too long" on
@@ -42,6 +45,16 @@ export async function generateAwaySummary(
return null return null
} }
const model = getSmallFastModel()
const langfuseTrace = isLangfuseEnabled()
? createTrace({
sessionId: getSessionId(),
model,
provider: getAPIProvider(),
name: 'away-summary',
})
: null
try { try {
const memory = await getSessionMemoryContent() const memory = await getSessionMemoryContent()
const recent = messages.slice(-RECENT_MESSAGE_WINDOW) const recent = messages.slice(-RECENT_MESSAGE_WINDOW)
@@ -54,7 +67,7 @@ export async function generateAwaySummary(
signal, signal,
options: { options: {
getToolPermissionContext: async () => getEmptyToolPermissionContext(), getToolPermissionContext: async () => getEmptyToolPermissionContext(),
model: getSmallFastModel(), model,
toolChoice: undefined, toolChoice: undefined,
isNonInteractiveSession: false, isNonInteractiveSession: false,
hasAppendSystemPrompt: false, hasAppendSystemPrompt: false,
@@ -62,6 +75,7 @@ export async function generateAwaySummary(
querySource: 'away_summary', querySource: 'away_summary',
mcpTools: [], mcpTools: [],
skipCacheWrite: true, skipCacheWrite: true,
langfuseTrace,
}, },
}) })
@@ -69,14 +83,17 @@ export async function generateAwaySummary(
logForDebugging( logForDebugging(
`[awaySummary] API error: ${getAssistantMessageText(response)}`, `[awaySummary] API error: ${getAssistantMessageText(response)}`,
) )
endTrace(langfuseTrace, undefined, 'error')
return null return null
} }
endTrace(langfuseTrace)
return getAssistantMessageText(response) return getAssistantMessageText(response)
} catch (err) { } catch (err) {
if (err instanceof APIUserAbortError || signal.aborted) { if (err instanceof APIUserAbortError || signal.aborted) {
return null return null
} }
logForDebugging(`[awaySummary] generation failed: ${err}`) logForDebugging(`[awaySummary] generation failed: ${err}`)
endTrace(langfuseTrace, undefined, 'error')
return null return null
} }
} }

View File

@@ -1326,6 +1326,7 @@ async function streamCompactSummary({
agents: context.options.agentDefinitions.activeAgents, agents: context.options.agentDefinitions.activeAgents,
mcpTools: [], mcpTools: [],
effortValue: appState.effortValue, effortValue: appState.effortValue,
langfuseTrace: context.langfuseTrace,
}, },
}) })
const streamIter = streamingGen[Symbol.asyncIterator]() const streamIter = streamingGen[Symbol.asyncIterator]()

View File

@@ -25,6 +25,8 @@ import { jsonStringify } from '../utils/slowOperations.js'
import { isToolReferenceBlock } from '../utils/toolSearch.js' import { isToolReferenceBlock } from '../utils/toolSearch.js'
import { getAPIMetadata, getExtraBodyParams } from './api/claude.js' import { getAPIMetadata, getExtraBodyParams } from './api/claude.js'
import { getAnthropicClient } from './api/client.js' import { getAnthropicClient } from './api/client.js'
import { createTrace, endTrace, isLangfuseEnabled, recordLLMObservation } from './langfuse/index.js'
import { getSessionId } from '../bootstrap/state.js'
import { withTokenCountVCR } from './vcr.js' import { withTokenCountVCR } from './vcr.js'
// Minimal values for token counting with thinking enabled // Minimal values for token counting with thinking enabled
@@ -309,6 +311,15 @@ export async function countTokensViaHaikuFallback(
: betas : betas
// biome-ignore lint/plugin: token counting needs specialized parameters (thinking, betas) that sideQuery doesn't support // biome-ignore lint/plugin: token counting needs specialized parameters (thinking, betas) that sideQuery doesn't support
const apiStart = Date.now()
const langfuseTrace = isLangfuseEnabled()
? createTrace({
sessionId: getSessionId(),
model: normalizeModelStringForAPI(model),
provider: getAPIProvider(),
name: 'token-estimation',
})
: null
const response = await anthropic.beta.messages.create({ const response = await anthropic.beta.messages.create({
model: normalizeModelStringForAPI(model), model: normalizeModelStringForAPI(model),
max_tokens: containsThinking ? TOKEN_COUNT_MAX_TOKENS : 1, max_tokens: containsThinking ? TOKEN_COUNT_MAX_TOKENS : 1,
@@ -331,6 +342,22 @@ export async function countTokensViaHaikuFallback(
const cacheCreationTokens = usage.cache_creation_input_tokens || 0 const cacheCreationTokens = usage.cache_creation_input_tokens || 0
const cacheReadTokens = usage.cache_read_input_tokens || 0 const cacheReadTokens = usage.cache_read_input_tokens || 0
recordLLMObservation(langfuseTrace, {
model: normalizeModelStringForAPI(model),
provider: getAPIProvider(),
input: messagesToSend,
output: response.content,
usage: {
input_tokens: inputTokens,
output_tokens: usage.output_tokens,
cache_creation_input_tokens: cacheCreationTokens || undefined,
cache_read_input_tokens: cacheReadTokens || undefined,
},
startTime: new Date(apiStart),
endTime: new Date(),
})
endTrace(langfuseTrace)
return inputTokens + cacheCreationTokens + cacheReadTokens return inputTokens + cacheCreationTokens + cacheReadTokens
} }

View File

@@ -457,9 +457,14 @@ describe("buildClassifierUnavailableMessage", () => {
expect(msg).toContain("classifier-v1"); expect(msg).toContain("classifier-v1");
expect(msg).toContain("unavailable"); expect(msg).toContain("unavailable");
}); });
test("tells the model to wait and retry later", () => {
const msg = buildClassifierUnavailableMessage("Bash", "classifier-v1");
expect(msg).toContain("Wait briefly and then try this action again.");
expect(msg).toContain("come back to it later");
});
}); });
// ─── normalizeMessages ──────────────────────────────────────────────────
describe("normalizeMessages", () => { describe("normalizeMessages", () => {
test("splits multi-block assistant message into individual messages", () => { test("splits multi-block assistant message into individual messages", () => {

View File

@@ -374,6 +374,10 @@ export function createSubagentContext(
} }
return { return {
// Preserve the parent Langfuse trace separately so nested side queries
// like auto_mode can attach to the main agent trace instead of the
// subagent's own trace.
langfuseRootTrace: parentContext.langfuseTrace,
// Mutable state - cloned by default to maintain isolation // Mutable state - cloned by default to maintain isolation
// Clone overrides.readFileState if provided, otherwise clone from parent // Clone overrides.readFileState if provided, otherwise clone from parent
readFileState: cloneFileStateCache( readFileState: cloneFileStateCache(

View File

@@ -104,6 +104,7 @@ export function createApiQueryHook<TResult>(
querySource: config.name, querySource: config.name,
mcpTools: [], mcpTools: [],
agentId: context.toolUseContext.agentId, agentId: context.toolUseContext.agentId,
langfuseTrace: context.toolUseContext.langfuseTrace,
}, },
}) })

View File

@@ -84,6 +84,7 @@ Your response must be a JSON object matching one of the following schemas:
querySource: 'hook_prompt', querySource: 'hook_prompt',
mcpTools: [], mcpTools: [],
agentId: toolUseContext.agentId, agentId: toolUseContext.agentId,
langfuseTrace: toolUseContext.langfuseTrace,
outputFormat: { outputFormat: {
type: 'json_schema', type: 'json_schema',
schema: { schema: {

View File

@@ -7,6 +7,9 @@ import {
logEvent, logEvent,
} from '../../services/analytics/index.js' } from '../../services/analytics/index.js'
import { queryModelWithoutStreaming } from '../../services/api/claude.js' import { queryModelWithoutStreaming } from '../../services/api/claude.js'
import { createTrace, endTrace, isLangfuseEnabled } from '../../services/langfuse/index.js'
import { getSessionId } from '../../bootstrap/state.js'
import { getAPIProvider } from '../model/providers.js'
import { getEmptyToolPermissionContext } from '../../Tool.js' import { getEmptyToolPermissionContext } from '../../Tool.js'
import type { Message } from '../../types/message.js' import type { Message } from '../../types/message.js'
import { createAbortController } from '../abortController.js' import { createAbortController } from '../abortController.js'
@@ -209,6 +212,16 @@ export async function applySkillImprovement(
const updateList = updates.map(u => `- ${u.section}: ${u.change}`).join('\n') const updateList = updates.map(u => `- ${u.section}: ${u.change}`).join('\n')
const model = getSmallFastModel()
const langfuseTrace = isLangfuseEnabled()
? createTrace({
sessionId: getSessionId(),
model,
provider: getAPIProvider(),
name: 'skill-improvement-apply',
})
: null
const response = await queryModelWithoutStreaming({ const response = await queryModelWithoutStreaming({
messages: [ messages: [
createUserMessage({ createUserMessage({
@@ -238,7 +251,7 @@ Rules:
signal: createAbortController().signal, signal: createAbortController().signal,
options: { options: {
getToolPermissionContext: async () => getEmptyToolPermissionContext(), getToolPermissionContext: async () => getEmptyToolPermissionContext(),
model: getSmallFastModel(), model,
toolChoice: undefined, toolChoice: undefined,
isNonInteractiveSession: false, isNonInteractiveSession: false,
hasAppendSystemPrompt: false, hasAppendSystemPrompt: false,
@@ -246,9 +259,12 @@ Rules:
agents: [], agents: [],
querySource: 'skill_improvement_apply', querySource: 'skill_improvement_apply',
mcpTools: [], mcpTools: [],
langfuseTrace,
}, },
}) })
endTrace(langfuseTrace)
const responseText = extractTextContent(Array.isArray(response.message.content) ? response.message.content : []).trim() const responseText = extractTextContent(Array.isArray(response.message.content) ? response.message.content : []).trim()
const updatedContent = extractTag(responseText, 'updated_file') const updatedContent = extractTag(responseText, 'updated_file')

View File

@@ -1,153 +1,136 @@
import { mock, describe, expect, test } from "bun:test"; import { mock, describe, expect, test } from 'bun:test'
import { createFileStateCacheWithSizeLimit } from '../../../utils/fileStateCache.js'
import { createSubagentContext } from '../../../utils/forkedAgent.js'
import { getEmptyToolPermissionContext } from '../../../Tool.js'
// Mock log.ts to cut the heavy dependency chain mock.module('src/utils/log.ts', () => ({
mock.module("src/utils/log.ts", () => ({
logError: () => {}, logError: () => {},
logToFile: () => {}, logToFile: () => {},
getLogDisplayTitle: () => "", getLogDisplayTitle: () => '',
logEvent: () => {}, logEvent: () => {},
logMCPError: () => {}, logMCPError: () => {},
logMCPDebug: () => {}, logMCPDebug: () => {},
dateToFilename: (d: Date) => d.toISOString().replace(/[:.]/g, "-"), dateToFilename: (d: Date) => d.toISOString().replace(/[:.]/g, '-'),
getLogFilePath: () => "/tmp/mock-log", getLogFilePath: () => '/tmp/mock-log',
attachErrorLogSink: () => {}, attachErrorLogSink: () => {},
getInMemoryErrors: () => [], getInMemoryErrors: () => [],
loadErrorLogs: async () => [], loadErrorLogs: async () => [],
getErrorLogByIndex: async () => null, getErrorLogByIndex: async () => null,
captureAPIRequest: () => {}, captureAPIRequest: () => {},
_resetErrorLogForTesting: () => {}, _resetErrorLogForTesting: () => {},
})); }))
const { const {
getDenyRuleForTool, getDenyRuleForTool,
getAskRuleForTool, getAskRuleForTool,
getDenyRuleForAgent, getDenyRuleForAgent,
filterDeniedAgents, filterDeniedAgents,
} = await import("../permissions"); } = await import('../permissions')
import { getEmptyToolPermissionContext } from "../../../Tool"; function makeContext(opts: { denyRules?: string[]; askRules?: string[] }) {
const ctx = getEmptyToolPermissionContext()
// ─── Helper ───────────────────────────────────────────────────────────── const deny: Record<string, string[]> = {}
const ask: Record<string, string[]> = {}
function makeContext(opts: { if (opts.denyRules?.length) deny.localSettings = opts.denyRules
denyRules?: string[]; if (opts.askRules?.length) ask.localSettings = opts.askRules
askRules?: string[]; return { ...ctx, alwaysDenyRules: deny, alwaysAskRules: ask } as any
}) {
const ctx = getEmptyToolPermissionContext();
const deny: Record<string, string[]> = {};
const ask: Record<string, string[]> = {};
// alwaysDenyRules stores raw rule strings — getDenyRules() calls
// permissionRuleValueFromString internally
if (opts.denyRules?.length) {
deny["localSettings"] = opts.denyRules;
}
if (opts.askRules?.length) {
ask["localSettings"] = opts.askRules;
}
return {
...ctx,
alwaysDenyRules: deny,
alwaysAskRules: ask,
} as any;
} }
function makeTool(name: string, mcpInfo?: { serverName: string; toolName: string }) { function makeTool(name: string, mcpInfo?: { serverName: string; toolName: string }) {
return { name, mcpInfo }; return { name, mcpInfo }
} }
// ─── getDenyRuleForTool ───────────────────────────────────────────────── describe('getDenyRuleForTool', () => {
test('returns null when no deny rules', () => {
const ctx = makeContext({})
expect(getDenyRuleForTool(ctx, makeTool('Bash'))).toBeNull()
})
test('returns matching deny rule for tool', () => {
const ctx = makeContext({ denyRules: ['Bash'] })
const result = getDenyRuleForTool(ctx, makeTool('Bash'))
expect(result).not.toBeNull()
expect(result!.ruleValue.toolName).toBe('Bash')
})
test('returns null for non-matching tool', () => {
const ctx = makeContext({ denyRules: ['Bash'] })
expect(getDenyRuleForTool(ctx, makeTool('Read'))).toBeNull()
})
test('rule with content does not match whole-tool deny', () => {
const ctx = makeContext({ denyRules: ['Bash(rm -rf)'] })
const result = getDenyRuleForTool(ctx, makeTool('Bash'))
expect(result).toBeNull()
})
})
describe("getDenyRuleForTool", () => { describe('getAskRuleForTool', () => {
test("returns null when no deny rules", () => { test('returns null when no ask rules', () => {
const ctx = makeContext({}); const ctx = makeContext({})
expect(getDenyRuleForTool(ctx, makeTool("Bash"))).toBeNull(); expect(getAskRuleForTool(ctx, makeTool('Bash'))).toBeNull()
}); })
test('returns matching ask rule', () => {
const ctx = makeContext({ askRules: ['Write'] })
const result = getAskRuleForTool(ctx, makeTool('Write'))
expect(result).not.toBeNull()
})
test('returns null for non-matching tool', () => {
const ctx = makeContext({ askRules: ['Write'] })
expect(getAskRuleForTool(ctx, makeTool('Bash'))).toBeNull()
})
})
test("returns matching deny rule for tool", () => { describe('getDenyRuleForAgent', () => {
const ctx = makeContext({ denyRules: ["Bash"] }); test('returns null when no deny rules', () => {
const result = getDenyRuleForTool(ctx, makeTool("Bash")); const ctx = makeContext({})
expect(result).not.toBeNull(); expect(getDenyRuleForAgent(ctx, 'Agent', 'Explore')).toBeNull()
expect(result!.ruleValue.toolName).toBe("Bash"); })
}); test('returns matching deny rule for agent type', () => {
const ctx = makeContext({ denyRules: ['Agent(Explore)'] })
const result = getDenyRuleForAgent(ctx, 'Agent', 'Explore')
expect(result).not.toBeNull()
})
test('returns null for non-matching agent type', () => {
const ctx = makeContext({ denyRules: ['Agent(Explore)'] })
expect(getDenyRuleForAgent(ctx, 'Agent', 'Research')).toBeNull()
})
})
test("returns null for non-matching tool", () => { describe('Langfuse trace propagation', () => {
const ctx = makeContext({ denyRules: ["Bash"] }); test('subagent context preserves parent trace for nested side queries', () => {
expect(getDenyRuleForTool(ctx, makeTool("Read"))).toBeNull(); const parentTrace = { id: 'parent-trace' } as never
}); const parentContext = {
...getEmptyToolPermissionContext(),
messages: [],
abortController: new AbortController(),
readFileState: createFileStateCacheWithSizeLimit(1),
getAppState: () => ({ toolPermissionContext: getEmptyToolPermissionContext() }),
setAppState: () => {},
updateFileHistoryState: () => {},
updateAttributionState: () => {},
setInProgressToolUseIDs: () => {},
setResponseLength: () => {},
langfuseTrace: parentTrace,
} as never
const subagentContext = createSubagentContext(parentContext)
expect(subagentContext.langfuseRootTrace).toBe(parentTrace)
})
})
test("rule with content does not match whole-tool deny", () => { describe('filterDeniedAgents', () => {
// getDenyRuleForTool uses toolMatchesRule which requires ruleContent === undefined test('returns all agents when no deny rules', () => {
// Rules like "Bash(rm -rf)" only match specific invocations, not the entire tool const ctx = makeContext({})
const ctx = makeContext({ denyRules: ["Bash(rm -rf)"] }); const agents = [{ agentType: 'Explore' }, { agentType: 'Research' }]
const result = getDenyRuleForTool(ctx, makeTool("Bash")); expect(filterDeniedAgents(agents, ctx, 'Agent')).toEqual(agents)
expect(result).toBeNull(); })
}); test('filters out denied agent type', () => {
}); const ctx = makeContext({ denyRules: ['Agent(Explore)'] })
const agents = [{ agentType: 'Explore' }, { agentType: 'Research' }]
// ─── getAskRuleForTool ────────────────────────────────────────────────── const result = filterDeniedAgents(agents, ctx, 'Agent')
expect(result).toHaveLength(1)
describe("getAskRuleForTool", () => { expect(result[0]!.agentType).toBe('Research')
test("returns null when no ask rules", () => { })
const ctx = makeContext({}); test('returns empty array when all agents denied', () => {
expect(getAskRuleForTool(ctx, makeTool("Bash"))).toBeNull(); const ctx = makeContext({ denyRules: ['Agent(Explore)', 'Agent(Research)'] })
}); const agents = [{ agentType: 'Explore' }, { agentType: 'Research' }]
expect(filterDeniedAgents(agents, ctx, 'Agent')).toEqual([])
test("returns matching ask rule", () => { })
const ctx = makeContext({ askRules: ["Write"] }); })
const result = getAskRuleForTool(ctx, makeTool("Write"));
expect(result).not.toBeNull();
});
test("returns null for non-matching tool", () => {
const ctx = makeContext({ askRules: ["Write"] });
expect(getAskRuleForTool(ctx, makeTool("Bash"))).toBeNull();
});
});
// ─── getDenyRuleForAgent ────────────────────────────────────────────────
describe("getDenyRuleForAgent", () => {
test("returns null when no deny rules", () => {
const ctx = makeContext({});
expect(getDenyRuleForAgent(ctx, "Agent", "Explore")).toBeNull();
});
test("returns matching deny rule for agent type", () => {
const ctx = makeContext({ denyRules: ["Agent(Explore)"] });
const result = getDenyRuleForAgent(ctx, "Agent", "Explore");
expect(result).not.toBeNull();
});
test("returns null for non-matching agent type", () => {
const ctx = makeContext({ denyRules: ["Agent(Explore)"] });
expect(getDenyRuleForAgent(ctx, "Agent", "Research")).toBeNull();
});
});
// ─── filterDeniedAgents ─────────────────────────────────────────────────
describe("filterDeniedAgents", () => {
test("returns all agents when no deny rules", () => {
const ctx = makeContext({});
const agents = [{ agentType: "Explore" }, { agentType: "Research" }];
expect(filterDeniedAgents(agents, ctx, "Agent")).toEqual(agents);
});
test("filters out denied agent type", () => {
const ctx = makeContext({ denyRules: ["Agent(Explore)"] });
const agents = [{ agentType: "Explore" }, { agentType: "Research" }];
const result = filterDeniedAgents(agents, ctx, "Agent");
expect(result).toHaveLength(1);
expect(result[0]!.agentType).toBe("Research");
});
test("returns empty array when all agents denied", () => {
const ctx = makeContext({
denyRules: ["Agent(Explore)", "Agent(Research)"],
});
const agents = [{ agentType: "Explore" }, { agentType: "Research" }];
expect(filterDeniedAgents(agents, ctx, "Agent")).toEqual([]);
});
});

View File

@@ -7,7 +7,8 @@ import { logForDebugging } from '../debug.js'
import { errorMessage } from '../errors.js' import { errorMessage } from '../errors.js'
import { lazySchema } from '../lazySchema.js' import { lazySchema } from '../lazySchema.js'
import { logError } from '../log.js' import { logError } from '../log.js'
import { getMainLoopModel } from '../model/model.js' import { getMainLoopModel, getSmallFastModel } from '../model/model.js'
import { isPoorModeActive } from '../../commands/poor/poorMode.js'
import { sideQuery } from '../sideQuery.js' import { sideQuery } from '../sideQuery.js'
import { jsonStringify } from '../slowOperations.js' import { jsonStringify } from '../slowOperations.js'
@@ -172,7 +173,7 @@ ${conversationContext ? `\nRecent conversation context:\n${conversationContext}`
Explain this command in context.` Explain this command in context.`
const model = getMainLoopModel() const model = isPoorModeActive() ? getSmallFastModel() : getMainLoopModel()
// Use sideQuery with forced tool choice for guaranteed structured output // Use sideQuery with forced tool choice for guaranteed structured output
const response = await sideQuery({ const response = await sideQuery({

View File

@@ -690,13 +690,16 @@ export const hasPermissionsToUseTool: CanUseToolFn = async (
setClassifierChecking(toolUseID) setClassifierChecking(toolUseID)
let classifierResult let classifierResult
try { try {
logForDebugging(
`[auto-mode] classifyYoloAction called with langfuseTrace=${context.langfuseTrace ? `id=${(context.langfuseTrace as unknown as Record<string, unknown>).id ?? 'present'}` : 'null/undefined'}`,
)
classifierResult = await classifyYoloAction( classifierResult = await classifyYoloAction(
context.messages, context.messages,
action, action,
context.options.tools, context.options.tools,
appState.toolPermissionContext, appState.toolPermissionContext,
context.abortController.signal, context.abortController.signal,
context.langfuseTrace, context.langfuseRootTrace ?? context.langfuseTrace,
) )
} finally { } finally {
clearClassifierChecking(toolUseID) clearClassifierChecking(toolUseID)
@@ -851,6 +854,7 @@ export const hasPermissionsToUseTool: CanUseToolFn = async (
CLASSIFIER_FAIL_CLOSED_REFRESH_MS, CLASSIFIER_FAIL_CLOSED_REFRESH_MS,
) )
) { ) {
if (appState.toolPermissionContext.shouldAvoidPermissionPrompts) {
logForDebugging( logForDebugging(
'Auto mode classifier unavailable, denying with retry guidance (fail closed)', 'Auto mode classifier unavailable, denying with retry guidance (fail closed)',
{ level: 'warn' }, { level: 'warn' },
@@ -868,6 +872,23 @@ export const hasPermissionsToUseTool: CanUseToolFn = async (
), ),
} }
} }
logForDebugging(
'Auto mode classifier unavailable, falling back to prompting with retry guidance (fail closed)',
{ level: 'warn' },
)
return {
behavior: 'ask',
decisionReason: {
type: 'classifier',
classifier: 'auto-mode',
reason: 'Classifier unavailable',
},
message: buildClassifierUnavailableMessage(
tool.name,
classifierResult.model,
),
}
}
// Fail open: fall back to normal permission handling // Fail open: fall back to normal permission handling
logForDebugging( logForDebugging(
'Auto mode classifier unavailable, falling back to normal permission handling (fail open)', 'Auto mode classifier unavailable, falling back to normal permission handling (fail open)',

View File

@@ -28,7 +28,8 @@ import { errorMessage } from '../errors.js'
import { lazySchema } from '../lazySchema.js' import { lazySchema } from '../lazySchema.js'
import { extractTextContent } from '../messages.js' import { extractTextContent } from '../messages.js'
import { resolveAntModel } from '../model/antModels.js' import { resolveAntModel } from '../model/antModels.js'
import { getMainLoopModel } from '../model/model.js' import { getDefaultSonnetModel, getMainLoopModel } from '../model/model.js'
import { isPoorModeActive } from '../../commands/poor/poorMode.js'
import { getAutoModeConfig } from '../settings/settings.js' import { getAutoModeConfig } from '../settings/settings.js'
import { sideQuery } from '../sideQuery.js' import { sideQuery } from '../sideQuery.js'
import type { LangfuseSpan } from '../../services/langfuse/index.js' import type { LangfuseSpan } from '../../services/langfuse/index.js'
@@ -1350,6 +1351,10 @@ function getClassifierModel(): string {
if (config?.model) { if (config?.model) {
return config.model return config.model
} }
// Poor mode: downgrade classifier to Sonnet to reduce cost
if (isPoorModeActive()) {
return getDefaultSonnetModel()
}
return getMainLoopModel() return getMainLoopModel()
} }

View File

@@ -19,6 +19,7 @@ import { createTrace, createChildSpan, endTrace, recordLLMObservation } from '..
import type { LangfuseSpan } from '../services/langfuse/index.js' import type { LangfuseSpan } from '../services/langfuse/index.js'
import { convertMessagesToLangfuse, convertOutputToLangfuse, convertToolsToLangfuse } from '../services/langfuse/convert.js' import { convertMessagesToLangfuse, convertOutputToLangfuse, convertToolsToLangfuse } from '../services/langfuse/convert.js'
import { getModelBetas, modelSupportsStructuredOutputs } from './betas.js' import { getModelBetas, modelSupportsStructuredOutputs } from './betas.js'
import { logForDebugging } from './debug.js'
import { errorMessage } from './errors.js' import { errorMessage } from './errors.js'
import { computeFingerprint } from './fingerprint.js' import { computeFingerprint } from './fingerprint.js'
import { getAPIProvider } from './model/providers.js' import { getAPIProvider } from './model/providers.js'
@@ -194,14 +195,28 @@ export async function sideQuery(opts: SideQueryOptions): Promise<BetaMessage> {
// When parentSpan is provided, create a child span nested under the // When parentSpan is provided, create a child span nested under the
// main agent trace; otherwise create a standalone root trace. // main agent trace; otherwise create a standalone root trace.
const langfuseTrace = opts.parentSpan const _ps = opts.parentSpan
? createChildSpan(opts.parentSpan, { // eslint-disable-next-line no-constant-condition
if (opts.querySource === 'auto_mode') {
logForDebugging(
`[sideQuery] auto_mode parentSpan=${_ps ? `id=${(_ps as unknown as Record<string, unknown>).id ?? 'present'}` : 'null/undefined'} querySource=${opts.querySource}`,
)
}
// When parentSpan is provided, create a child span nested under the
// main agent trace. For auto_mode queries, we must always nest under
// a parent span — never create a standalone root trace (agent type),
// as auto_mode observations should appear as spans within the parent.
// For other query sources without a parent, create a standalone trace.
const langfuseTrace = _ps
? createChildSpan(_ps, {
name: traceName, name: traceName,
sessionId: getSessionId(), sessionId: getSessionId(),
model: normalizedModel, model: normalizedModel,
provider, provider,
querySource: opts.querySource, querySource: opts.querySource,
}) })
: opts.querySource === 'auto_mode'
? null
: createTrace({ : createTrace({
sessionId: getSessionId(), sessionId: getSessionId(),
model: normalizedModel, model: normalizedModel,