mirror of
https://github.com/claude-code-best/claude-code.git
synced 2026-06-15 12:55:51 +00:00
feat: 增强 auto mode 的易用性 (#312)
* feat: poor 模式降级 yolo 审阅模型 * feat: 为多模块添加 Langfuse tracing 支持 在 web search、agent creation、away summary、token estimation、 skill improvement 等模块中集成 Langfuse trace,并透传至 compact/apiQueryHook/execPromptHook 等调用链。 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: 让 auto mode 记录回主 trace * fix: reopen auto mode prompt when classifier is unavailable * fix: 修复 auto mode 情况下, llm 报错导致弹窗也不打开的问题 --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -9,6 +9,9 @@ import type {
|
||||
} from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
|
||||
import { getFeatureValue_CACHED_MAY_BE_STALE } from 'src/services/analytics/growthbook.js'
|
||||
import { queryModelWithStreaming } from 'src/services/api/claude.js'
|
||||
import { createTrace, endTrace, isLangfuseEnabled } from 'src/services/langfuse/index.js'
|
||||
import { getSessionId } from 'src/bootstrap/state.js'
|
||||
import { getAPIProvider } from 'src/utils/model/providers.js'
|
||||
import { createUserMessage } from 'src/utils/messages.js'
|
||||
import { getMainLoopModel, getSmallFastModel } from 'src/utils/model/model.js'
|
||||
import { jsonParse } from 'src/utils/slowOperations.js'
|
||||
@@ -38,6 +41,15 @@ export class ApiSearchAdapter implements WebSearchAdapter {
|
||||
const toolSchema = makeToolSchema({ allowedDomains, blockedDomains })
|
||||
|
||||
const useHaiku = getFeatureValue_CACHED_MAY_BE_STALE('tengu_plum_vx3', false)
|
||||
const model = useHaiku ? getSmallFastModel() : getMainLoopModel()
|
||||
const langfuseTrace = isLangfuseEnabled()
|
||||
? createTrace({
|
||||
sessionId: getSessionId(),
|
||||
model,
|
||||
provider: getAPIProvider(),
|
||||
name: 'web-search-tool',
|
||||
})
|
||||
: null
|
||||
|
||||
const queryStream = queryModelWithStreaming({
|
||||
messages: [userMessage],
|
||||
@@ -58,7 +70,7 @@ export class ApiSearchAdapter implements WebSearchAdapter {
|
||||
alwaysAskRules: {},
|
||||
isBypassPermissionsModeAvailable: false,
|
||||
}),
|
||||
model: useHaiku ? getSmallFastModel() : getMainLoopModel(),
|
||||
model,
|
||||
toolChoice: useHaiku ? { type: 'tool' as const, name: 'web_search' } : undefined,
|
||||
isNonInteractiveSession: false,
|
||||
hasAppendSystemPrompt: false,
|
||||
@@ -68,6 +80,7 @@ export class ApiSearchAdapter implements WebSearchAdapter {
|
||||
mcpTools: [],
|
||||
agentId: undefined,
|
||||
effortValue: undefined,
|
||||
langfuseTrace,
|
||||
},
|
||||
})
|
||||
|
||||
@@ -148,6 +161,8 @@ export class ApiSearchAdapter implements WebSearchAdapter {
|
||||
}
|
||||
}
|
||||
|
||||
endTrace(langfuseTrace)
|
||||
|
||||
// Extract SearchResult[] from content blocks
|
||||
return extractSearchResults(allContentBlocks)
|
||||
}
|
||||
|
||||
@@ -277,6 +277,8 @@ export type ToolUseContext = {
|
||||
criticalSystemReminder_EXPERIMENTAL?: string
|
||||
/** Langfuse root trace span for this query turn. Passed down to tool execution for observability. */
|
||||
langfuseTrace?: LangfuseSpan | null
|
||||
/** Langfuse root trace span for the outer/main agent trace. Used when subagents need to nest observations under the parent agent trace. */
|
||||
langfuseRootTrace?: LangfuseSpan | null
|
||||
/** Langfuse batch span wrapping a concurrent tool group. When set, tool observations are nested under it. */
|
||||
langfuseBatchSpan?: LangfuseSpan | null
|
||||
/** When true, preserve toolUseResult on messages even for subagents.
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
import { errorMessage } from '../../utils/errors.js'
|
||||
import {
|
||||
getMainLoopModel,
|
||||
getSmallFastModel,
|
||||
parseUserSpecifiedModel,
|
||||
} from '../../utils/model/model.js'
|
||||
import {
|
||||
@@ -14,6 +15,7 @@ import {
|
||||
getDefaultExternalAutoModeRules,
|
||||
} from '../../utils/permissions/yoloClassifier.js'
|
||||
import { getAutoModeConfig } from '../../utils/settings/settings.js'
|
||||
import { isPoorModeActive } from '../../commands/poor/poorMode.js'
|
||||
import { sideQuery } from '../../utils/sideQuery.js'
|
||||
import { jsonStringify } from '../../utils/slowOperations.js'
|
||||
|
||||
@@ -90,7 +92,9 @@ export async function autoModeCritiqueHandler(options: {
|
||||
|
||||
const model = options.model
|
||||
? parseUserSpecifiedModel(options.model)
|
||||
: getMainLoopModel()
|
||||
: isPoorModeActive()
|
||||
? getSmallFastModel()
|
||||
: getMainLoopModel()
|
||||
|
||||
const defaults = getDefaultExternalAutoModeRules()
|
||||
const classifierPrompt = buildDefaultExternalSystemPrompt()
|
||||
|
||||
@@ -14,6 +14,9 @@ import {
|
||||
type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
logEvent,
|
||||
} from '../../services/analytics/index.js'
|
||||
import { createTrace, endTrace, isLangfuseEnabled } from '../../services/langfuse/index.js'
|
||||
import { getSessionId } from '../../bootstrap/state.js'
|
||||
import { getAPIProvider } from '../../utils/model/providers.js'
|
||||
import { jsonParse } from '../../utils/slowOperations.js'
|
||||
import { asSystemPrompt } from '../../utils/systemPromptType.js'
|
||||
|
||||
@@ -146,6 +149,15 @@ export async function generateAgent(
|
||||
? AGENT_CREATION_SYSTEM_PROMPT + AGENT_MEMORY_INSTRUCTIONS
|
||||
: AGENT_CREATION_SYSTEM_PROMPT
|
||||
|
||||
const langfuseTrace = isLangfuseEnabled()
|
||||
? createTrace({
|
||||
sessionId: getSessionId(),
|
||||
model,
|
||||
provider: getAPIProvider(),
|
||||
name: 'agent-creation',
|
||||
})
|
||||
: null
|
||||
|
||||
const response = await queryModelWithoutStreaming({
|
||||
messages: normalizeMessagesForAPI(messagesWithContext),
|
||||
systemPrompt: asSystemPrompt([systemPrompt]),
|
||||
@@ -161,9 +173,12 @@ export async function generateAgent(
|
||||
hasAppendSystemPrompt: false,
|
||||
querySource: 'agent_creation',
|
||||
mcpTools: [],
|
||||
langfuseTrace,
|
||||
},
|
||||
})
|
||||
|
||||
endTrace(langfuseTrace)
|
||||
|
||||
const textBlocks = (Array.isArray(response.message.content) ? response.message.content : []).filter(
|
||||
(block): block is ContentBlock & { type: 'text' } => block.type === 'text',
|
||||
)
|
||||
|
||||
@@ -235,6 +235,9 @@ export async function* query(
|
||||
// When called as a sub-agent, langfuseTrace is already set by runAgent()
|
||||
// — reuse it instead of creating an independent trace.
|
||||
const ownsTrace = !params.toolUseContext.langfuseTrace
|
||||
logForDebugging(
|
||||
`[query] ownsTrace=${ownsTrace} incoming langfuseTrace=${params.toolUseContext.langfuseTrace ? 'present' : 'null/undefined'} isLangfuseEnabled=${isLangfuseEnabled()}`,
|
||||
)
|
||||
const langfuseTrace = params.toolUseContext.langfuseTrace
|
||||
?? (isLangfuseEnabled()
|
||||
? createTrace({
|
||||
|
||||
@@ -10,6 +10,9 @@ import { getSmallFastModel } from '../utils/model/model.js'
|
||||
import { asSystemPrompt } from '../utils/systemPromptType.js'
|
||||
import { getResolvedLanguage } from '../utils/language.js'
|
||||
import { queryModelWithoutStreaming } from './api/claude.js'
|
||||
import { createTrace, endTrace, isLangfuseEnabled } from './langfuse/index.js'
|
||||
import { getSessionId } from '../bootstrap/state.js'
|
||||
import { getAPIProvider } from '../utils/model/providers.js'
|
||||
import { getSessionMemoryContent } from './SessionMemory/sessionMemoryUtils.js'
|
||||
|
||||
// Recap only needs recent context — truncate to avoid "prompt too long" on
|
||||
@@ -42,6 +45,16 @@ export async function generateAwaySummary(
|
||||
return null
|
||||
}
|
||||
|
||||
const model = getSmallFastModel()
|
||||
const langfuseTrace = isLangfuseEnabled()
|
||||
? createTrace({
|
||||
sessionId: getSessionId(),
|
||||
model,
|
||||
provider: getAPIProvider(),
|
||||
name: 'away-summary',
|
||||
})
|
||||
: null
|
||||
|
||||
try {
|
||||
const memory = await getSessionMemoryContent()
|
||||
const recent = messages.slice(-RECENT_MESSAGE_WINDOW)
|
||||
@@ -54,7 +67,7 @@ export async function generateAwaySummary(
|
||||
signal,
|
||||
options: {
|
||||
getToolPermissionContext: async () => getEmptyToolPermissionContext(),
|
||||
model: getSmallFastModel(),
|
||||
model,
|
||||
toolChoice: undefined,
|
||||
isNonInteractiveSession: false,
|
||||
hasAppendSystemPrompt: false,
|
||||
@@ -62,6 +75,7 @@ export async function generateAwaySummary(
|
||||
querySource: 'away_summary',
|
||||
mcpTools: [],
|
||||
skipCacheWrite: true,
|
||||
langfuseTrace,
|
||||
},
|
||||
})
|
||||
|
||||
@@ -69,14 +83,17 @@ export async function generateAwaySummary(
|
||||
logForDebugging(
|
||||
`[awaySummary] API error: ${getAssistantMessageText(response)}`,
|
||||
)
|
||||
endTrace(langfuseTrace, undefined, 'error')
|
||||
return null
|
||||
}
|
||||
endTrace(langfuseTrace)
|
||||
return getAssistantMessageText(response)
|
||||
} catch (err) {
|
||||
if (err instanceof APIUserAbortError || signal.aborted) {
|
||||
return null
|
||||
}
|
||||
logForDebugging(`[awaySummary] generation failed: ${err}`)
|
||||
endTrace(langfuseTrace, undefined, 'error')
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1326,6 +1326,7 @@ async function streamCompactSummary({
|
||||
agents: context.options.agentDefinitions.activeAgents,
|
||||
mcpTools: [],
|
||||
effortValue: appState.effortValue,
|
||||
langfuseTrace: context.langfuseTrace,
|
||||
},
|
||||
})
|
||||
const streamIter = streamingGen[Symbol.asyncIterator]()
|
||||
|
||||
@@ -25,6 +25,8 @@ import { jsonStringify } from '../utils/slowOperations.js'
|
||||
import { isToolReferenceBlock } from '../utils/toolSearch.js'
|
||||
import { getAPIMetadata, getExtraBodyParams } from './api/claude.js'
|
||||
import { getAnthropicClient } from './api/client.js'
|
||||
import { createTrace, endTrace, isLangfuseEnabled, recordLLMObservation } from './langfuse/index.js'
|
||||
import { getSessionId } from '../bootstrap/state.js'
|
||||
import { withTokenCountVCR } from './vcr.js'
|
||||
|
||||
// Minimal values for token counting with thinking enabled
|
||||
@@ -309,6 +311,15 @@ export async function countTokensViaHaikuFallback(
|
||||
: betas
|
||||
|
||||
// biome-ignore lint/plugin: token counting needs specialized parameters (thinking, betas) that sideQuery doesn't support
|
||||
const apiStart = Date.now()
|
||||
const langfuseTrace = isLangfuseEnabled()
|
||||
? createTrace({
|
||||
sessionId: getSessionId(),
|
||||
model: normalizeModelStringForAPI(model),
|
||||
provider: getAPIProvider(),
|
||||
name: 'token-estimation',
|
||||
})
|
||||
: null
|
||||
const response = await anthropic.beta.messages.create({
|
||||
model: normalizeModelStringForAPI(model),
|
||||
max_tokens: containsThinking ? TOKEN_COUNT_MAX_TOKENS : 1,
|
||||
@@ -331,6 +342,22 @@ export async function countTokensViaHaikuFallback(
|
||||
const cacheCreationTokens = usage.cache_creation_input_tokens || 0
|
||||
const cacheReadTokens = usage.cache_read_input_tokens || 0
|
||||
|
||||
recordLLMObservation(langfuseTrace, {
|
||||
model: normalizeModelStringForAPI(model),
|
||||
provider: getAPIProvider(),
|
||||
input: messagesToSend,
|
||||
output: response.content,
|
||||
usage: {
|
||||
input_tokens: inputTokens,
|
||||
output_tokens: usage.output_tokens,
|
||||
cache_creation_input_tokens: cacheCreationTokens || undefined,
|
||||
cache_read_input_tokens: cacheReadTokens || undefined,
|
||||
},
|
||||
startTime: new Date(apiStart),
|
||||
endTime: new Date(),
|
||||
})
|
||||
endTrace(langfuseTrace)
|
||||
|
||||
return inputTokens + cacheCreationTokens + cacheReadTokens
|
||||
}
|
||||
|
||||
|
||||
@@ -457,9 +457,14 @@ describe("buildClassifierUnavailableMessage", () => {
|
||||
expect(msg).toContain("classifier-v1");
|
||||
expect(msg).toContain("unavailable");
|
||||
});
|
||||
|
||||
test("tells the model to wait and retry later", () => {
|
||||
const msg = buildClassifierUnavailableMessage("Bash", "classifier-v1");
|
||||
expect(msg).toContain("Wait briefly and then try this action again.");
|
||||
expect(msg).toContain("come back to it later");
|
||||
});
|
||||
});
|
||||
|
||||
// ─── normalizeMessages ──────────────────────────────────────────────────
|
||||
|
||||
describe("normalizeMessages", () => {
|
||||
test("splits multi-block assistant message into individual messages", () => {
|
||||
|
||||
@@ -374,6 +374,10 @@ export function createSubagentContext(
|
||||
}
|
||||
|
||||
return {
|
||||
// Preserve the parent Langfuse trace separately so nested side queries
|
||||
// like auto_mode can attach to the main agent trace instead of the
|
||||
// subagent's own trace.
|
||||
langfuseRootTrace: parentContext.langfuseTrace,
|
||||
// Mutable state - cloned by default to maintain isolation
|
||||
// Clone overrides.readFileState if provided, otherwise clone from parent
|
||||
readFileState: cloneFileStateCache(
|
||||
|
||||
@@ -104,6 +104,7 @@ export function createApiQueryHook<TResult>(
|
||||
querySource: config.name,
|
||||
mcpTools: [],
|
||||
agentId: context.toolUseContext.agentId,
|
||||
langfuseTrace: context.toolUseContext.langfuseTrace,
|
||||
},
|
||||
})
|
||||
|
||||
|
||||
@@ -84,6 +84,7 @@ Your response must be a JSON object matching one of the following schemas:
|
||||
querySource: 'hook_prompt',
|
||||
mcpTools: [],
|
||||
agentId: toolUseContext.agentId,
|
||||
langfuseTrace: toolUseContext.langfuseTrace,
|
||||
outputFormat: {
|
||||
type: 'json_schema',
|
||||
schema: {
|
||||
|
||||
@@ -7,6 +7,9 @@ import {
|
||||
logEvent,
|
||||
} from '../../services/analytics/index.js'
|
||||
import { queryModelWithoutStreaming } from '../../services/api/claude.js'
|
||||
import { createTrace, endTrace, isLangfuseEnabled } from '../../services/langfuse/index.js'
|
||||
import { getSessionId } from '../../bootstrap/state.js'
|
||||
import { getAPIProvider } from '../model/providers.js'
|
||||
import { getEmptyToolPermissionContext } from '../../Tool.js'
|
||||
import type { Message } from '../../types/message.js'
|
||||
import { createAbortController } from '../abortController.js'
|
||||
@@ -209,6 +212,16 @@ export async function applySkillImprovement(
|
||||
|
||||
const updateList = updates.map(u => `- ${u.section}: ${u.change}`).join('\n')
|
||||
|
||||
const model = getSmallFastModel()
|
||||
const langfuseTrace = isLangfuseEnabled()
|
||||
? createTrace({
|
||||
sessionId: getSessionId(),
|
||||
model,
|
||||
provider: getAPIProvider(),
|
||||
name: 'skill-improvement-apply',
|
||||
})
|
||||
: null
|
||||
|
||||
const response = await queryModelWithoutStreaming({
|
||||
messages: [
|
||||
createUserMessage({
|
||||
@@ -238,7 +251,7 @@ Rules:
|
||||
signal: createAbortController().signal,
|
||||
options: {
|
||||
getToolPermissionContext: async () => getEmptyToolPermissionContext(),
|
||||
model: getSmallFastModel(),
|
||||
model,
|
||||
toolChoice: undefined,
|
||||
isNonInteractiveSession: false,
|
||||
hasAppendSystemPrompt: false,
|
||||
@@ -246,9 +259,12 @@ Rules:
|
||||
agents: [],
|
||||
querySource: 'skill_improvement_apply',
|
||||
mcpTools: [],
|
||||
langfuseTrace,
|
||||
},
|
||||
})
|
||||
|
||||
endTrace(langfuseTrace)
|
||||
|
||||
const responseText = extractTextContent(Array.isArray(response.message.content) ? response.message.content : []).trim()
|
||||
|
||||
const updatedContent = extractTag(responseText, 'updated_file')
|
||||
|
||||
@@ -1,153 +1,136 @@
|
||||
import { mock, describe, expect, test } from "bun:test";
|
||||
import { mock, describe, expect, test } from 'bun:test'
|
||||
import { createFileStateCacheWithSizeLimit } from '../../../utils/fileStateCache.js'
|
||||
import { createSubagentContext } from '../../../utils/forkedAgent.js'
|
||||
import { getEmptyToolPermissionContext } from '../../../Tool.js'
|
||||
|
||||
// Mock log.ts to cut the heavy dependency chain
|
||||
mock.module("src/utils/log.ts", () => ({
|
||||
mock.module('src/utils/log.ts', () => ({
|
||||
logError: () => {},
|
||||
logToFile: () => {},
|
||||
getLogDisplayTitle: () => "",
|
||||
getLogDisplayTitle: () => '',
|
||||
logEvent: () => {},
|
||||
logMCPError: () => {},
|
||||
logMCPDebug: () => {},
|
||||
dateToFilename: (d: Date) => d.toISOString().replace(/[:.]/g, "-"),
|
||||
getLogFilePath: () => "/tmp/mock-log",
|
||||
dateToFilename: (d: Date) => d.toISOString().replace(/[:.]/g, '-'),
|
||||
getLogFilePath: () => '/tmp/mock-log',
|
||||
attachErrorLogSink: () => {},
|
||||
getInMemoryErrors: () => [],
|
||||
loadErrorLogs: async () => [],
|
||||
getErrorLogByIndex: async () => null,
|
||||
captureAPIRequest: () => {},
|
||||
_resetErrorLogForTesting: () => {},
|
||||
}));
|
||||
}))
|
||||
|
||||
const {
|
||||
getDenyRuleForTool,
|
||||
getAskRuleForTool,
|
||||
getDenyRuleForAgent,
|
||||
filterDeniedAgents,
|
||||
} = await import("../permissions");
|
||||
} = await import('../permissions')
|
||||
|
||||
import { getEmptyToolPermissionContext } from "../../../Tool";
|
||||
|
||||
// ─── Helper ─────────────────────────────────────────────────────────────
|
||||
|
||||
function makeContext(opts: {
|
||||
denyRules?: string[];
|
||||
askRules?: string[];
|
||||
}) {
|
||||
const ctx = getEmptyToolPermissionContext();
|
||||
const deny: Record<string, string[]> = {};
|
||||
const ask: Record<string, string[]> = {};
|
||||
|
||||
// alwaysDenyRules stores raw rule strings — getDenyRules() calls
|
||||
// permissionRuleValueFromString internally
|
||||
if (opts.denyRules?.length) {
|
||||
deny["localSettings"] = opts.denyRules;
|
||||
}
|
||||
if (opts.askRules?.length) {
|
||||
ask["localSettings"] = opts.askRules;
|
||||
}
|
||||
|
||||
return {
|
||||
...ctx,
|
||||
alwaysDenyRules: deny,
|
||||
alwaysAskRules: ask,
|
||||
} as any;
|
||||
function makeContext(opts: { denyRules?: string[]; askRules?: string[] }) {
|
||||
const ctx = getEmptyToolPermissionContext()
|
||||
const deny: Record<string, string[]> = {}
|
||||
const ask: Record<string, string[]> = {}
|
||||
if (opts.denyRules?.length) deny.localSettings = opts.denyRules
|
||||
if (opts.askRules?.length) ask.localSettings = opts.askRules
|
||||
return { ...ctx, alwaysDenyRules: deny, alwaysAskRules: ask } as any
|
||||
}
|
||||
|
||||
function makeTool(name: string, mcpInfo?: { serverName: string; toolName: string }) {
|
||||
return { name, mcpInfo };
|
||||
return { name, mcpInfo }
|
||||
}
|
||||
|
||||
// ─── getDenyRuleForTool ─────────────────────────────────────────────────
|
||||
describe('getDenyRuleForTool', () => {
|
||||
test('returns null when no deny rules', () => {
|
||||
const ctx = makeContext({})
|
||||
expect(getDenyRuleForTool(ctx, makeTool('Bash'))).toBeNull()
|
||||
})
|
||||
test('returns matching deny rule for tool', () => {
|
||||
const ctx = makeContext({ denyRules: ['Bash'] })
|
||||
const result = getDenyRuleForTool(ctx, makeTool('Bash'))
|
||||
expect(result).not.toBeNull()
|
||||
expect(result!.ruleValue.toolName).toBe('Bash')
|
||||
})
|
||||
test('returns null for non-matching tool', () => {
|
||||
const ctx = makeContext({ denyRules: ['Bash'] })
|
||||
expect(getDenyRuleForTool(ctx, makeTool('Read'))).toBeNull()
|
||||
})
|
||||
test('rule with content does not match whole-tool deny', () => {
|
||||
const ctx = makeContext({ denyRules: ['Bash(rm -rf)'] })
|
||||
const result = getDenyRuleForTool(ctx, makeTool('Bash'))
|
||||
expect(result).toBeNull()
|
||||
})
|
||||
})
|
||||
|
||||
describe("getDenyRuleForTool", () => {
|
||||
test("returns null when no deny rules", () => {
|
||||
const ctx = makeContext({});
|
||||
expect(getDenyRuleForTool(ctx, makeTool("Bash"))).toBeNull();
|
||||
});
|
||||
describe('getAskRuleForTool', () => {
|
||||
test('returns null when no ask rules', () => {
|
||||
const ctx = makeContext({})
|
||||
expect(getAskRuleForTool(ctx, makeTool('Bash'))).toBeNull()
|
||||
})
|
||||
test('returns matching ask rule', () => {
|
||||
const ctx = makeContext({ askRules: ['Write'] })
|
||||
const result = getAskRuleForTool(ctx, makeTool('Write'))
|
||||
expect(result).not.toBeNull()
|
||||
})
|
||||
test('returns null for non-matching tool', () => {
|
||||
const ctx = makeContext({ askRules: ['Write'] })
|
||||
expect(getAskRuleForTool(ctx, makeTool('Bash'))).toBeNull()
|
||||
})
|
||||
})
|
||||
|
||||
test("returns matching deny rule for tool", () => {
|
||||
const ctx = makeContext({ denyRules: ["Bash"] });
|
||||
const result = getDenyRuleForTool(ctx, makeTool("Bash"));
|
||||
expect(result).not.toBeNull();
|
||||
expect(result!.ruleValue.toolName).toBe("Bash");
|
||||
});
|
||||
describe('getDenyRuleForAgent', () => {
|
||||
test('returns null when no deny rules', () => {
|
||||
const ctx = makeContext({})
|
||||
expect(getDenyRuleForAgent(ctx, 'Agent', 'Explore')).toBeNull()
|
||||
})
|
||||
test('returns matching deny rule for agent type', () => {
|
||||
const ctx = makeContext({ denyRules: ['Agent(Explore)'] })
|
||||
const result = getDenyRuleForAgent(ctx, 'Agent', 'Explore')
|
||||
expect(result).not.toBeNull()
|
||||
})
|
||||
test('returns null for non-matching agent type', () => {
|
||||
const ctx = makeContext({ denyRules: ['Agent(Explore)'] })
|
||||
expect(getDenyRuleForAgent(ctx, 'Agent', 'Research')).toBeNull()
|
||||
})
|
||||
})
|
||||
|
||||
test("returns null for non-matching tool", () => {
|
||||
const ctx = makeContext({ denyRules: ["Bash"] });
|
||||
expect(getDenyRuleForTool(ctx, makeTool("Read"))).toBeNull();
|
||||
});
|
||||
describe('Langfuse trace propagation', () => {
|
||||
test('subagent context preserves parent trace for nested side queries', () => {
|
||||
const parentTrace = { id: 'parent-trace' } as never
|
||||
const parentContext = {
|
||||
...getEmptyToolPermissionContext(),
|
||||
messages: [],
|
||||
abortController: new AbortController(),
|
||||
readFileState: createFileStateCacheWithSizeLimit(1),
|
||||
getAppState: () => ({ toolPermissionContext: getEmptyToolPermissionContext() }),
|
||||
setAppState: () => {},
|
||||
updateFileHistoryState: () => {},
|
||||
updateAttributionState: () => {},
|
||||
setInProgressToolUseIDs: () => {},
|
||||
setResponseLength: () => {},
|
||||
langfuseTrace: parentTrace,
|
||||
} as never
|
||||
const subagentContext = createSubagentContext(parentContext)
|
||||
expect(subagentContext.langfuseRootTrace).toBe(parentTrace)
|
||||
})
|
||||
})
|
||||
|
||||
test("rule with content does not match whole-tool deny", () => {
|
||||
// getDenyRuleForTool uses toolMatchesRule which requires ruleContent === undefined
|
||||
// Rules like "Bash(rm -rf)" only match specific invocations, not the entire tool
|
||||
const ctx = makeContext({ denyRules: ["Bash(rm -rf)"] });
|
||||
const result = getDenyRuleForTool(ctx, makeTool("Bash"));
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
// ─── getAskRuleForTool ──────────────────────────────────────────────────
|
||||
|
||||
describe("getAskRuleForTool", () => {
|
||||
test("returns null when no ask rules", () => {
|
||||
const ctx = makeContext({});
|
||||
expect(getAskRuleForTool(ctx, makeTool("Bash"))).toBeNull();
|
||||
});
|
||||
|
||||
test("returns matching ask rule", () => {
|
||||
const ctx = makeContext({ askRules: ["Write"] });
|
||||
const result = getAskRuleForTool(ctx, makeTool("Write"));
|
||||
expect(result).not.toBeNull();
|
||||
});
|
||||
|
||||
test("returns null for non-matching tool", () => {
|
||||
const ctx = makeContext({ askRules: ["Write"] });
|
||||
expect(getAskRuleForTool(ctx, makeTool("Bash"))).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
// ─── getDenyRuleForAgent ────────────────────────────────────────────────
|
||||
|
||||
describe("getDenyRuleForAgent", () => {
|
||||
test("returns null when no deny rules", () => {
|
||||
const ctx = makeContext({});
|
||||
expect(getDenyRuleForAgent(ctx, "Agent", "Explore")).toBeNull();
|
||||
});
|
||||
|
||||
test("returns matching deny rule for agent type", () => {
|
||||
const ctx = makeContext({ denyRules: ["Agent(Explore)"] });
|
||||
const result = getDenyRuleForAgent(ctx, "Agent", "Explore");
|
||||
expect(result).not.toBeNull();
|
||||
});
|
||||
|
||||
test("returns null for non-matching agent type", () => {
|
||||
const ctx = makeContext({ denyRules: ["Agent(Explore)"] });
|
||||
expect(getDenyRuleForAgent(ctx, "Agent", "Research")).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
// ─── filterDeniedAgents ─────────────────────────────────────────────────
|
||||
|
||||
describe("filterDeniedAgents", () => {
|
||||
test("returns all agents when no deny rules", () => {
|
||||
const ctx = makeContext({});
|
||||
const agents = [{ agentType: "Explore" }, { agentType: "Research" }];
|
||||
expect(filterDeniedAgents(agents, ctx, "Agent")).toEqual(agents);
|
||||
});
|
||||
|
||||
test("filters out denied agent type", () => {
|
||||
const ctx = makeContext({ denyRules: ["Agent(Explore)"] });
|
||||
const agents = [{ agentType: "Explore" }, { agentType: "Research" }];
|
||||
const result = filterDeniedAgents(agents, ctx, "Agent");
|
||||
expect(result).toHaveLength(1);
|
||||
expect(result[0]!.agentType).toBe("Research");
|
||||
});
|
||||
|
||||
test("returns empty array when all agents denied", () => {
|
||||
const ctx = makeContext({
|
||||
denyRules: ["Agent(Explore)", "Agent(Research)"],
|
||||
});
|
||||
const agents = [{ agentType: "Explore" }, { agentType: "Research" }];
|
||||
expect(filterDeniedAgents(agents, ctx, "Agent")).toEqual([]);
|
||||
});
|
||||
});
|
||||
describe('filterDeniedAgents', () => {
|
||||
test('returns all agents when no deny rules', () => {
|
||||
const ctx = makeContext({})
|
||||
const agents = [{ agentType: 'Explore' }, { agentType: 'Research' }]
|
||||
expect(filterDeniedAgents(agents, ctx, 'Agent')).toEqual(agents)
|
||||
})
|
||||
test('filters out denied agent type', () => {
|
||||
const ctx = makeContext({ denyRules: ['Agent(Explore)'] })
|
||||
const agents = [{ agentType: 'Explore' }, { agentType: 'Research' }]
|
||||
const result = filterDeniedAgents(agents, ctx, 'Agent')
|
||||
expect(result).toHaveLength(1)
|
||||
expect(result[0]!.agentType).toBe('Research')
|
||||
})
|
||||
test('returns empty array when all agents denied', () => {
|
||||
const ctx = makeContext({ denyRules: ['Agent(Explore)', 'Agent(Research)'] })
|
||||
const agents = [{ agentType: 'Explore' }, { agentType: 'Research' }]
|
||||
expect(filterDeniedAgents(agents, ctx, 'Agent')).toEqual([])
|
||||
})
|
||||
})
|
||||
|
||||
@@ -7,7 +7,8 @@ import { logForDebugging } from '../debug.js'
|
||||
import { errorMessage } from '../errors.js'
|
||||
import { lazySchema } from '../lazySchema.js'
|
||||
import { logError } from '../log.js'
|
||||
import { getMainLoopModel } from '../model/model.js'
|
||||
import { getMainLoopModel, getSmallFastModel } from '../model/model.js'
|
||||
import { isPoorModeActive } from '../../commands/poor/poorMode.js'
|
||||
import { sideQuery } from '../sideQuery.js'
|
||||
import { jsonStringify } from '../slowOperations.js'
|
||||
|
||||
@@ -172,7 +173,7 @@ ${conversationContext ? `\nRecent conversation context:\n${conversationContext}`
|
||||
|
||||
Explain this command in context.`
|
||||
|
||||
const model = getMainLoopModel()
|
||||
const model = isPoorModeActive() ? getSmallFastModel() : getMainLoopModel()
|
||||
|
||||
// Use sideQuery with forced tool choice for guaranteed structured output
|
||||
const response = await sideQuery({
|
||||
|
||||
@@ -690,13 +690,16 @@ export const hasPermissionsToUseTool: CanUseToolFn = async (
|
||||
setClassifierChecking(toolUseID)
|
||||
let classifierResult
|
||||
try {
|
||||
logForDebugging(
|
||||
`[auto-mode] classifyYoloAction called with langfuseTrace=${context.langfuseTrace ? `id=${(context.langfuseTrace as unknown as Record<string, unknown>).id ?? 'present'}` : 'null/undefined'}`,
|
||||
)
|
||||
classifierResult = await classifyYoloAction(
|
||||
context.messages,
|
||||
action,
|
||||
context.options.tools,
|
||||
appState.toolPermissionContext,
|
||||
context.abortController.signal,
|
||||
context.langfuseTrace,
|
||||
context.langfuseRootTrace ?? context.langfuseTrace,
|
||||
)
|
||||
} finally {
|
||||
clearClassifierChecking(toolUseID)
|
||||
@@ -851,12 +854,30 @@ export const hasPermissionsToUseTool: CanUseToolFn = async (
|
||||
CLASSIFIER_FAIL_CLOSED_REFRESH_MS,
|
||||
)
|
||||
) {
|
||||
if (appState.toolPermissionContext.shouldAvoidPermissionPrompts) {
|
||||
logForDebugging(
|
||||
'Auto mode classifier unavailable, denying with retry guidance (fail closed)',
|
||||
{ level: 'warn' },
|
||||
)
|
||||
return {
|
||||
behavior: 'deny',
|
||||
decisionReason: {
|
||||
type: 'classifier',
|
||||
classifier: 'auto-mode',
|
||||
reason: 'Classifier unavailable',
|
||||
},
|
||||
message: buildClassifierUnavailableMessage(
|
||||
tool.name,
|
||||
classifierResult.model,
|
||||
),
|
||||
}
|
||||
}
|
||||
logForDebugging(
|
||||
'Auto mode classifier unavailable, denying with retry guidance (fail closed)',
|
||||
'Auto mode classifier unavailable, falling back to prompting with retry guidance (fail closed)',
|
||||
{ level: 'warn' },
|
||||
)
|
||||
return {
|
||||
behavior: 'deny',
|
||||
behavior: 'ask',
|
||||
decisionReason: {
|
||||
type: 'classifier',
|
||||
classifier: 'auto-mode',
|
||||
|
||||
@@ -28,7 +28,8 @@ import { errorMessage } from '../errors.js'
|
||||
import { lazySchema } from '../lazySchema.js'
|
||||
import { extractTextContent } from '../messages.js'
|
||||
import { resolveAntModel } from '../model/antModels.js'
|
||||
import { getMainLoopModel } from '../model/model.js'
|
||||
import { getDefaultSonnetModel, getMainLoopModel } from '../model/model.js'
|
||||
import { isPoorModeActive } from '../../commands/poor/poorMode.js'
|
||||
import { getAutoModeConfig } from '../settings/settings.js'
|
||||
import { sideQuery } from '../sideQuery.js'
|
||||
import type { LangfuseSpan } from '../../services/langfuse/index.js'
|
||||
@@ -1350,6 +1351,10 @@ function getClassifierModel(): string {
|
||||
if (config?.model) {
|
||||
return config.model
|
||||
}
|
||||
// Poor mode: downgrade classifier to Sonnet to reduce cost
|
||||
if (isPoorModeActive()) {
|
||||
return getDefaultSonnetModel()
|
||||
}
|
||||
return getMainLoopModel()
|
||||
}
|
||||
|
||||
|
||||
@@ -19,6 +19,7 @@ import { createTrace, createChildSpan, endTrace, recordLLMObservation } from '..
|
||||
import type { LangfuseSpan } from '../services/langfuse/index.js'
|
||||
import { convertMessagesToLangfuse, convertOutputToLangfuse, convertToolsToLangfuse } from '../services/langfuse/convert.js'
|
||||
import { getModelBetas, modelSupportsStructuredOutputs } from './betas.js'
|
||||
import { logForDebugging } from './debug.js'
|
||||
import { errorMessage } from './errors.js'
|
||||
import { computeFingerprint } from './fingerprint.js'
|
||||
import { getAPIProvider } from './model/providers.js'
|
||||
@@ -194,21 +195,35 @@ export async function sideQuery(opts: SideQueryOptions): Promise<BetaMessage> {
|
||||
|
||||
// When parentSpan is provided, create a child span nested under the
|
||||
// main agent trace; otherwise create a standalone root trace.
|
||||
const langfuseTrace = opts.parentSpan
|
||||
? createChildSpan(opts.parentSpan, {
|
||||
const _ps = opts.parentSpan
|
||||
// eslint-disable-next-line no-constant-condition
|
||||
if (opts.querySource === 'auto_mode') {
|
||||
logForDebugging(
|
||||
`[sideQuery] auto_mode parentSpan=${_ps ? `id=${(_ps as unknown as Record<string, unknown>).id ?? 'present'}` : 'null/undefined'} querySource=${opts.querySource}`,
|
||||
)
|
||||
}
|
||||
// When parentSpan is provided, create a child span nested under the
|
||||
// main agent trace. For auto_mode queries, we must always nest under
|
||||
// a parent span — never create a standalone root trace (agent type),
|
||||
// as auto_mode observations should appear as spans within the parent.
|
||||
// For other query sources without a parent, create a standalone trace.
|
||||
const langfuseTrace = _ps
|
||||
? createChildSpan(_ps, {
|
||||
name: traceName,
|
||||
sessionId: getSessionId(),
|
||||
model: normalizedModel,
|
||||
provider,
|
||||
querySource: opts.querySource,
|
||||
})
|
||||
: createTrace({
|
||||
sessionId: getSessionId(),
|
||||
model: normalizedModel,
|
||||
provider,
|
||||
name: traceName,
|
||||
querySource: opts.querySource,
|
||||
})
|
||||
: opts.querySource === 'auto_mode'
|
||||
? null
|
||||
: createTrace({
|
||||
sessionId: getSessionId(),
|
||||
model: normalizedModel,
|
||||
provider,
|
||||
name: traceName,
|
||||
querySource: opts.querySource,
|
||||
})
|
||||
|
||||
let response: BetaMessage
|
||||
try {
|
||||
|
||||
Reference in New Issue
Block a user