From d208855f07c5317b19fd976e1e899cf19a9d7557 Mon Sep 17 00:00:00 2001 From: unraid Date: Wed, 22 Apr 2026 22:38:10 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=20builtin-tools=20?= =?UTF-8?q?=E5=A2=9E=E5=BC=BA=E4=B8=8E=E6=B5=8B=E8=AF=95=E8=A6=86=E7=9B=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 --- .../tools/CtxInspectTool/CtxInspectTool.ts | 52 +- .../__tests__/CtxInspectTool.test.ts | 216 ++++ .../RemoteTriggerTool/RemoteTriggerTool.ts | 153 +-- .../__tests__/RemoteTriggerTool.test.ts | 91 ++ .../tools/TeamDeleteTool/TeamDeleteTool.ts | 102 +- .../tools/WebBrowserTool/WebBrowserTool.ts | 110 +- .../__tests__/WebBrowserTool.test.ts | 94 ++ .../src/tools/WebSearchTool/adapters/index.ts | 26 +- .../src/tools/WorkflowTool/WorkflowTool.ts | 398 ++++++- .../__tests__/WorkflowTool.test.ts | 99 ++ .../shared/__tests__/spawnMultiAgent.test.ts | 54 + .../src/tools/shared/spawnMultiAgent.ts | 1039 +++-------------- 12 files changed, 1449 insertions(+), 985 deletions(-) create mode 100644 packages/builtin-tools/src/tools/CtxInspectTool/__tests__/CtxInspectTool.test.ts create mode 100644 packages/builtin-tools/src/tools/RemoteTriggerTool/__tests__/RemoteTriggerTool.test.ts create mode 100644 packages/builtin-tools/src/tools/WebBrowserTool/__tests__/WebBrowserTool.test.ts create mode 100644 packages/builtin-tools/src/tools/WorkflowTool/__tests__/WorkflowTool.test.ts create mode 100644 packages/builtin-tools/src/tools/shared/__tests__/spawnMultiAgent.test.ts diff --git a/packages/builtin-tools/src/tools/CtxInspectTool/CtxInspectTool.ts b/packages/builtin-tools/src/tools/CtxInspectTool/CtxInspectTool.ts index c49933cd2..8299e7822 100644 --- a/packages/builtin-tools/src/tools/CtxInspectTool/CtxInspectTool.ts +++ b/packages/builtin-tools/src/tools/CtxInspectTool/CtxInspectTool.ts @@ -2,6 +2,12 @@ import { z } from 'zod/v4' import type { ToolResultBlockParam } from 'src/Tool.js' import { buildTool } from 'src/Tool.js' import { lazySchema } from 'src/utils/lazySchema.js' +import { tokenCountWithEstimation } from 'src/utils/tokens.js' +import { + getStats, + isContextCollapseEnabled, +} from 'src/services/contextCollapse/index.js' +import { isSessionMemoryInitialized } from 'src/services/SessionMemory/sessionMemoryUtils.js' const CTX_INSPECT_TOOL_NAME = 'CtxInspect' @@ -19,6 +25,10 @@ type CtxInput = z.infer type CtxOutput = { total_tokens: number message_count: number + context_window_model: string + prompt_caching_enabled: boolean + session_memory_enabled: boolean + context_collapse_enabled: boolean summary: string } @@ -67,13 +77,45 @@ Use this to understand your context budget before deciding whether to snip old m } }, - async call() { - // Context inspection is wired into the context collapse system. + async call(input: CtxInput, context) { + const messages = context.messages ?? [] + const model = context.options?.mainLoopModel ?? 'unknown' + const totalTokens = tokenCountWithEstimation(messages) + const collapseEnabled = isContextCollapseEnabled() + const collapseStats = getStats() + const focused = input.query?.trim() + + const sessionMemoryEnabled = isSessionMemoryInitialized() + // Prompt caching is an API-level feature controlled by the provider, not + // a user-facing toggle. Report as enabled only for providers known to + // support Anthropic-style prompt caching (first-party, Bedrock, Vertex). + const promptCachingEnabled = !model.startsWith('openai/') && + !model.startsWith('grok/') && + !model.startsWith('gemini/') + + const summaryParts = [ + focused ? `Focus: ${focused}` : 'Overall context summary', + `Model context: ${model}`, + `Prompt caching: ${promptCachingEnabled ? 'enabled' : 'disabled'}`, + `Session memory: ${sessionMemoryEnabled ? 'enabled' : 'disabled'}`, + `Context collapse: ${collapseEnabled ? 'enabled' : 'disabled'}`, + ] + + if (collapseEnabled) { + summaryParts.push( + `Collapse spans: ${collapseStats.collapsedSpans} committed, ${collapseStats.stagedSpans} staged, ${collapseStats.collapsedMessages} messages summarized`, + ) + } + return { data: { - total_tokens: 0, - message_count: 0, - summary: 'Context inspection requires the CONTEXT_COLLAPSE runtime.', + total_tokens: totalTokens, + message_count: messages.length, + context_window_model: model, + prompt_caching_enabled: promptCachingEnabled, + session_memory_enabled: sessionMemoryEnabled, + context_collapse_enabled: collapseEnabled, + summary: summaryParts.join('\n'), }, } }, diff --git a/packages/builtin-tools/src/tools/CtxInspectTool/__tests__/CtxInspectTool.test.ts b/packages/builtin-tools/src/tools/CtxInspectTool/__tests__/CtxInspectTool.test.ts new file mode 100644 index 000000000..36b842e4c --- /dev/null +++ b/packages/builtin-tools/src/tools/CtxInspectTool/__tests__/CtxInspectTool.test.ts @@ -0,0 +1,216 @@ +import { afterEach, beforeEach, describe, expect, mock, test } from 'bun:test' + +mock.module('src/utils/log.ts', () => ({ + logError: () => {}, + logToFile: () => {}, + getLogDisplayTitle: () => '', + logEvent: () => {}, + logMCPError: () => {}, + logMCPDebug: () => {}, + dateToFilename: (d: Date) => d.toISOString().replace(/[:.]/g, '-'), + getLogFilePath: () => '/tmp/mock-log', + attachErrorLogSink: () => {}, + getInMemoryErrors: () => [], + loadErrorLogs: async () => [], + getErrorLogByIndex: async () => null, + captureAPIRequest: () => {}, + _resetErrorLogForTesting: () => {}, +})) + +mock.module('src/services/tokenEstimation.ts', () => ({ + roughTokenCountEstimation: (text: string) => Math.ceil(text.length / 4), + roughTokenCountEstimationForMessages: (msgs: unknown[]) => msgs.length * 64, + roughTokenCountEstimationForMessage: () => 64, + roughTokenCountEstimationForFileType: () => 64, + bytesPerTokenForFileType: () => 4, + countTokensWithAPI: async () => 0, + countMessagesTokensWithAPI: async () => 0, + countTokensViaHaikuFallback: async () => 0, +})) + +let sessionMemoryInitialized = false +mock.module('src/services/SessionMemory/sessionMemoryUtils.ts', () => ({ + isSessionMemoryInitialized: () => sessionMemoryInitialized, + waitForSessionMemoryExtraction: async () => {}, + getLastSummarizedMessageId: () => undefined, + getSessionMemoryContent: async () => null, + setLastSummarizedMessageId: () => {}, + markExtractionStarted: () => {}, + markExtractionCompleted: () => {}, + setSessionMemoryConfig: () => {}, + getSessionMemoryConfig: () => ({}), + recordExtractionTokenCount: () => {}, + markSessionMemoryInitialized: () => {}, + hasMetInitializationThreshold: () => false, + hasMetUpdateThreshold: () => false, + getToolCallsBetweenUpdates: () => 0, + resetSessionMemoryState: () => {}, + DEFAULT_SESSION_MEMORY_CONFIG: {}, +})) + +mock.module('src/utils/slowOperations.ts', () => ({ + jsonStringify: JSON.stringify, + jsonParse: JSON.parse, + slowLogging: { enabled: false }, + clone: (value: unknown) => structuredClone(value), + cloneDeep: (value: unknown) => structuredClone(value), + callerFrame: () => '', + SLOW_OPERATION_THRESHOLD_MS: 100, + writeFileSync_DEPRECATED: () => {}, +})) + +const { initContextCollapse, resetContextCollapse } = await import( + 'src/services/contextCollapse/index.js' +) +const { tokenCountWithEstimation } = await import('src/utils/tokens.js') +const { CtxInspectTool } = await import('../CtxInspectTool.js') + +function makeUserMessage(text: string) { + return { + type: 'user' as const, + uuid: `user-${text}`, + message: { role: 'user' as const, content: text }, + } +} + +function makeAssistantMessage(text: string) { + return { + type: 'assistant' as const, + uuid: `assistant-${text}`, + message: { + role: 'assistant' as const, + content: [{ type: 'text' as const, text }], + }, + } +} + +function makeContext(messages: unknown[], mainLoopModel = 'claude-sonnet-4-6') { + return { + messages, + options: { + mainLoopModel, + }, + getAppState: () => ({}), + } as any +} + +const allowTool = async (input: Record) => ({ + behavior: 'allow' as const, + updatedInput: input, +}) + +const parentMessage = makeAssistantMessage('Parent tool call') + +beforeEach(() => { + resetContextCollapse() + sessionMemoryInitialized = false +}) + +afterEach(() => { + resetContextCollapse() + sessionMemoryInitialized = false +}) + +describe('CtxInspectTool', () => { + test('tool exports and metadata remain stable', async () => { + expect(CtxInspectTool).toBeDefined() + expect(CtxInspectTool.name).toBe('CtxInspect') + expect(typeof CtxInspectTool.call).toBe('function') + expect(await CtxInspectTool.description()).toContain('context') + expect(CtxInspectTool.userFacingName()).toBe('CtxInspect') + expect(CtxInspectTool.isReadOnly()).toBe(true) + expect(CtxInspectTool.isConcurrencySafe()).toBe(true) + }) + + test('formats tool results for transcript rendering', () => { + const block = CtxInspectTool.mapToolResultToToolResultBlockParam( + { + total_tokens: 192, + message_count: 3, + context_window_model: 'claude-sonnet-4-6', + prompt_caching_enabled: true, + session_memory_enabled: true, + context_collapse_enabled: false, + summary: 'Context collapse: disabled', + }, + 'tool-use-id', + ) + + expect(block.tool_use_id).toBe('tool-use-id') + expect(block.content).toContain('192 tokens') + expect(block.content).toContain('3 messages') + expect(block.content).toContain('Context collapse: disabled') + }) + + test('returns live context counts and mechanism state', async () => { + const messages = [ + makeUserMessage('Inspect the current context budget.'), + makeAssistantMessage('Looking at the current conversation state.'), + ] + const context = makeContext(messages, 'claude-sonnet-4-6') + + const result = await (CtxInspectTool as any).call( + {}, + context, + allowTool, + parentMessage, + ) + + expect(Object.keys(result.data).sort()).toEqual([ + 'context_collapse_enabled', + 'context_window_model', + 'message_count', + 'prompt_caching_enabled', + 'session_memory_enabled', + 'summary', + 'total_tokens', + ]) + expect(result.data.message_count).toBe(messages.length) + expect(result.data.total_tokens).toBe(tokenCountWithEstimation(messages as any)) + expect(result.data.context_window_model).toBe('claude-sonnet-4-6') + expect(result.data.prompt_caching_enabled).toBe(true) + expect(result.data.session_memory_enabled).toBe(false) + expect(result.data.context_collapse_enabled).toBe(false) + expect(result.data.summary).toContain('Overall context summary') + expect(result.data.summary).toContain('Session memory: disabled') + expect(result.data.summary).toContain('Context collapse: disabled') + }) + + test('query input focuses summary and collapse runtime changes the reported state', async () => { + const messages = [ + makeUserMessage('Show me tool usage pressure in this thread.'), + makeAssistantMessage('Summarizing tool-heavy context now.'), + ] + const context = makeContext(messages, 'claude-sonnet-4-6') + + const disabledResult = await (CtxInspectTool as any).call( + { query: 'tool usage' }, + context, + allowTool, + parentMessage, + ) + + initContextCollapse() + + const enabledResult = await (CtxInspectTool as any).call( + { query: 'tool usage' }, + context, + allowTool, + parentMessage, + ) + + expect(disabledResult.data.message_count).toBe(messages.length) + expect(enabledResult.data.message_count).toBe(messages.length) + expect(disabledResult.data.total_tokens).toBe( + tokenCountWithEstimation(messages as any), + ) + expect(enabledResult.data.total_tokens).toBe( + tokenCountWithEstimation(messages as any), + ) + expect(disabledResult.data.summary).toContain('Focus: tool usage') + expect(disabledResult.data.context_collapse_enabled).toBe(false) + expect(enabledResult.data.context_collapse_enabled).toBe(true) + expect(enabledResult.data.summary).toContain('Context collapse: enabled') + expect(enabledResult.data.summary).toContain('Collapse spans:') + }) +}) diff --git a/packages/builtin-tools/src/tools/RemoteTriggerTool/RemoteTriggerTool.ts b/packages/builtin-tools/src/tools/RemoteTriggerTool/RemoteTriggerTool.ts index 6d0412fa6..b3ed633d8 100644 --- a/packages/builtin-tools/src/tools/RemoteTriggerTool/RemoteTriggerTool.ts +++ b/packages/builtin-tools/src/tools/RemoteTriggerTool/RemoteTriggerTool.ts @@ -11,6 +11,7 @@ import { getClaudeAIOAuthTokens, } from 'src/utils/auth.js' import { lazySchema } from 'src/utils/lazySchema.js' +import { appendRemoteTriggerAuditRecord } from 'src/utils/remoteTriggerAudit.js' import { jsonStringify } from 'src/utils/slowOperations.js' import { DESCRIPTION, PROMPT, REMOTE_TRIGGER_TOOL_NAME } from './prompt.js' import { renderToolResultMessage, renderToolUseMessage } from './UI.js' @@ -36,6 +37,7 @@ const outputSchema = lazySchema(() => z.object({ status: z.number(), json: z.string(), + audit_id: z.string().optional(), }), ) type OutputSchema = ReturnType @@ -76,77 +78,96 @@ export const RemoteTriggerTool = buildTool({ return PROMPT }, async call(input: Input, context: ToolUseContext) { - await checkAndRefreshOAuthTokenIfNeeded() - const accessToken = getClaudeAIOAuthTokens()?.accessToken - if (!accessToken) { - throw new Error( - 'Not authenticated with a claude.ai account. Run /login and try again.', - ) - } - const orgUUID = await getOrganizationUUID() - if (!orgUUID) { - throw new Error('Unable to resolve organization UUID.') + const auditBase = { + action: input.action, + ...(input.trigger_id ? { triggerId: input.trigger_id } : {}), } + try { + await checkAndRefreshOAuthTokenIfNeeded() + const accessToken = getClaudeAIOAuthTokens()?.accessToken + if (!accessToken) { + throw new Error( + 'Not authenticated with a claude.ai account. Run /login and try again.', + ) + } + const orgUUID = await getOrganizationUUID() + if (!orgUUID) { + throw new Error('Unable to resolve organization UUID.') + } - const base = `${getOauthConfig().BASE_API_URL}/v1/code/triggers` - const headers = { - Authorization: `Bearer ${accessToken}`, - 'Content-Type': 'application/json', - 'anthropic-version': '2023-06-01', - 'anthropic-beta': TRIGGERS_BETA, - 'x-organization-uuid': orgUUID, - } + const base = `${getOauthConfig().BASE_API_URL}/v1/code/triggers` + const headers = { + Authorization: `Bearer ${accessToken}`, + 'Content-Type': 'application/json', + 'anthropic-version': '2023-06-01', + 'anthropic-beta': TRIGGERS_BETA, + 'x-organization-uuid': orgUUID, + } - const { action, trigger_id, body } = input - let method: 'GET' | 'POST' - let url: string - let data: unknown - switch (action) { - case 'list': - method = 'GET' - url = base - break - case 'get': - if (!trigger_id) throw new Error('get requires trigger_id') - method = 'GET' - url = `${base}/${trigger_id}` - break - case 'create': - if (!body) throw new Error('create requires body') - method = 'POST' - url = base - data = body - break - case 'update': - if (!trigger_id) throw new Error('update requires trigger_id') - if (!body) throw new Error('update requires body') - method = 'POST' - url = `${base}/${trigger_id}` - data = body - break - case 'run': - if (!trigger_id) throw new Error('run requires trigger_id') - method = 'POST' - url = `${base}/${trigger_id}/run` - data = {} - break - } + const { action, trigger_id, body } = input + let method: 'GET' | 'POST' + let url: string + let data: unknown + switch (action) { + case 'list': + method = 'GET' + url = base + break + case 'get': + if (!trigger_id) throw new Error('get requires trigger_id') + method = 'GET' + url = `${base}/${trigger_id}` + break + case 'create': + if (!body) throw new Error('create requires body') + method = 'POST' + url = base + data = body + break + case 'update': + if (!trigger_id) throw new Error('update requires trigger_id') + if (!body) throw new Error('update requires body') + method = 'POST' + url = `${base}/${trigger_id}` + data = body + break + case 'run': + if (!trigger_id) throw new Error('run requires trigger_id') + method = 'POST' + url = `${base}/${trigger_id}/run` + data = {} + break + } - const res = await axios.request({ - method, - url, - headers, - data, - timeout: 20_000, - signal: context.abortController.signal, - validateStatus: () => true, - }) - - return { - data: { + const res = await axios.request({ + method, + url, + headers, + data, + timeout: 20_000, + signal: context.abortController.signal, + validateStatus: () => true, + }) + const audit = await appendRemoteTriggerAuditRecord({ + ...auditBase, + ok: res.status >= 200 && res.status < 300, status: res.status, - json: jsonStringify(res.data), - }, + }) + + return { + data: { + status: res.status, + json: jsonStringify(res.data), + audit_id: audit.auditId, + }, + } + } catch (error) { + await appendRemoteTriggerAuditRecord({ + ...auditBase, + ok: false, + error: error instanceof Error ? error.message : String(error), + }) + throw error } }, mapToolResultToToolResultBlockParam(output, toolUseID) { diff --git a/packages/builtin-tools/src/tools/RemoteTriggerTool/__tests__/RemoteTriggerTool.test.ts b/packages/builtin-tools/src/tools/RemoteTriggerTool/__tests__/RemoteTriggerTool.test.ts new file mode 100644 index 000000000..dc570a803 --- /dev/null +++ b/packages/builtin-tools/src/tools/RemoteTriggerTool/__tests__/RemoteTriggerTool.test.ts @@ -0,0 +1,91 @@ +import { afterEach, beforeEach, describe, expect, mock, test } from 'bun:test' +import { mkdir, readFile, rm } from 'fs/promises' +import { tmpdir } from 'os' +import { join } from 'path' +import { + resetStateForTests, + setOriginalCwd, + setProjectRoot, +} from 'src/bootstrap/state.js' + +let requestStatus = 200 + +mock.module('axios', () => ({ + default: { + request: async () => ({ + status: requestStatus, + data: { ok: requestStatus >= 200 && requestStatus < 300 }, + }), + }, +})) + +mock.module('src/utils/auth.js', () => ({ + checkAndRefreshOAuthTokenIfNeeded: async () => {}, + getClaudeAIOAuthTokens: () => ({ accessToken: 'token' }), +})) + +mock.module('src/services/oauth/client.js', () => ({ + getOrganizationUUID: async () => 'org', +})) + +mock.module('src/constants/oauth.js', () => ({ + getOauthConfig: () => ({ BASE_API_URL: 'https://example.test' }), +})) + +let cwd = '' +let previousCwd = '' + +beforeEach(async () => { + requestStatus = 200 + previousCwd = process.cwd() + cwd = join(tmpdir(), `remote-trigger-tool-${Date.now()}-${Math.random().toString(16).slice(2)}`) + await mkdir(cwd, { recursive: true }) + process.chdir(cwd) + resetStateForTests() + setOriginalCwd(cwd) + setProjectRoot(cwd) +}) + +afterEach(async () => { + resetStateForTests() + process.chdir(previousCwd) + await rm(cwd, { recursive: true, force: true }) +}) + +describe('RemoteTriggerTool audit', () => { + test('writes an audit record for successful remote calls', async () => { + const { RemoteTriggerTool } = await import('../RemoteTriggerTool') + const result = await RemoteTriggerTool.call( + { action: 'run', trigger_id: 'trigger-1' }, + { abortController: new AbortController() } as any, + ) + + expect(result.data.audit_id).toBeString() + const raw = await readFile( + join(cwd, '.claude', 'remote-trigger-audit.jsonl'), + 'utf-8', + ) + expect(raw).toContain('"action":"run"') + expect(raw).toContain('"triggerId":"trigger-1"') + expect(raw).toContain('"ok":true') + }) + + test('writes an audit record before rethrowing validation failures', async () => { + const { RemoteTriggerTool } = await import('../RemoteTriggerTool') + + await expect( + RemoteTriggerTool.call( + { action: 'run' }, + { abortController: new AbortController() } as any, + ), + ).rejects.toThrow('run requires trigger_id') + + const raw = await readFile( + join(cwd, '.claude', 'remote-trigger-audit.jsonl'), + 'utf-8', + ) + expect(raw).toContain('"action":"run"') + expect(raw).toContain('"ok":false') + expect(raw).toContain('run requires trigger_id') + }) +}) diff --git a/packages/builtin-tools/src/tools/TeamDeleteTool/TeamDeleteTool.ts b/packages/builtin-tools/src/tools/TeamDeleteTool/TeamDeleteTool.ts index 7c80df676..11e907188 100644 --- a/packages/builtin-tools/src/tools/TeamDeleteTool/TeamDeleteTool.ts +++ b/packages/builtin-tools/src/tools/TeamDeleteTool/TeamDeleteTool.ts @@ -14,11 +14,26 @@ import { } from 'src/utils/swarm/teamHelpers.js' import { clearTeammateColors } from 'src/utils/swarm/teammateLayoutManager.js' import { clearLeaderTeamName } from 'src/utils/tasks.js' +import { ensureBackendsRegistered, getBackendByType, getInProcessBackend } from 'src/utils/swarm/backends/registry.js' +import { createPaneBackendExecutor } from 'src/utils/swarm/backends/PaneBackendExecutor.js' +import { isPaneBackend } from 'src/utils/swarm/backends/types.js' +import { sleep } from 'src/utils/sleep.js' import { TEAM_DELETE_TOOL_NAME } from './constants.js' import { getPrompt } from './prompt.js' import { renderToolResultMessage, renderToolUseMessage } from './UI.js' -const inputSchema = lazySchema(() => z.strictObject({})) +const inputSchema = lazySchema(() => + z.strictObject({ + wait_ms: z + .number() + .min(0) + .max(30_000) + .optional() + .describe( + 'Optional time to wait for active teammates to acknowledge shutdown before cleanup.', + ), + }), +) type InputSchema = ReturnType export type Output = { @@ -68,7 +83,7 @@ export const TeamDeleteTool: Tool = buildTool({ } }, - async call(_input, context) { + async call(input, context) { const { setAppState, getAppState } = context const appState = getAppState() const teamName = appState.teamContext?.teamName @@ -87,13 +102,82 @@ export const TeamDeleteTool: Tool = buildTool({ const activeMembers = nonLeadMembers.filter(m => m.isActive !== false) if (activeMembers.length > 0) { - const memberNames = activeMembers.map(m => m.name).join(', ') - return { - data: { - success: false, - message: `Cannot cleanup team with ${activeMembers.length} active member(s): ${memberNames}. Use requestShutdown to gracefully terminate teammates first.`, - team_name: teamName, - }, + const requested: string[] = [] + for (const member of activeMembers) { + let sent = false + if (member.backendType === 'in-process') { + const executor = getInProcessBackend() + executor.setContext?.(context) + sent = await executor.terminate( + member.agentId, + 'Team cleanup requested by team lead', + ) + } else if (member.backendType && isPaneBackend(member.backendType)) { + await ensureBackendsRegistered() + const executor = createPaneBackendExecutor( + getBackendByType(member.backendType), + ) + executor.setContext?.(context) + sent = await executor.terminate( + member.agentId, + 'Team cleanup requested by team lead', + ) + } + if (sent) { + requested.push(member.name) + } + } + const waitMs = input.wait_ms ?? 0 + if (waitMs > 0 && requested.length > 0) { + const deadline = Date.now() + waitMs + while (Date.now() < deadline) { + await sleep(Math.min(250, Math.max(0, deadline - Date.now()))) + const refreshed = readTeamFile(teamName) + const stillActive = + refreshed?.members.filter( + m => m.name !== TEAM_LEAD_NAME && m.isActive !== false, + ) ?? [] + if (stillActive.length === 0) { + break + } + } + const refreshed = readTeamFile(teamName) + const stillActive = + refreshed?.members.filter( + m => m.name !== TEAM_LEAD_NAME && m.isActive !== false, + ) ?? [] + if (stillActive.length === 0) { + // Fall through to cleanup with the refreshed team file state. + } else { + const memberNames = stillActive.map(m => m.name).join(', ') + return { + data: { + success: false, + message: `Shutdown requested for active teammate(s): ${requested.join(', ')}. Cleanup is still blocked after waiting ${waitMs}ms: ${memberNames}.`, + team_name: teamName, + }, + } + } + } + const latestTeamFile = readTeamFile(teamName) + const latestActiveMembers = + latestTeamFile?.members.filter( + m => m.name !== TEAM_LEAD_NAME && m.isActive !== false, + ) ?? [] + if (latestActiveMembers.length === 0) { + // Continue to cleanup below. + } else { + const memberNames = latestActiveMembers.map(m => m.name).join(', ') + return { + data: { + success: false, + message: + requested.length > 0 + ? `Shutdown requested for active teammate(s): ${requested.join(', ')}. Cleanup is blocked until they exit: ${memberNames}.` + : `Cannot cleanup team with ${latestActiveMembers.length} active member(s): ${memberNames}. Use requestShutdown to gracefully terminate teammates first.`, + team_name: teamName, + }, + } } } } diff --git a/packages/builtin-tools/src/tools/WebBrowserTool/WebBrowserTool.ts b/packages/builtin-tools/src/tools/WebBrowserTool/WebBrowserTool.ts index 5041bd778..4c174da8d 100644 --- a/packages/builtin-tools/src/tools/WebBrowserTool/WebBrowserTool.ts +++ b/packages/builtin-tools/src/tools/WebBrowserTool/WebBrowserTool.ts @@ -9,19 +9,11 @@ const inputSchema = lazySchema(() => z.strictObject({ url: z .string() - .describe('URL to navigate to in the browser.'), + .describe('URL to fetch and extract content from.'), action: z - .enum(['navigate', 'screenshot', 'click', 'type', 'scroll']) + .enum(['navigate', 'screenshot']) .optional() - .describe('Browser action to perform. Defaults to "navigate".'), - selector: z - .string() - .optional() - .describe('CSS selector for click/type actions.'), - text: z - .string() - .optional() - .describe('Text to type when action is "type".'), + .describe('Action to perform. "navigate" fetches page content (default). "screenshot" returns a text snapshot of the page.'), }), ) type InputSchema = ReturnType @@ -45,16 +37,24 @@ export const WebBrowserTool = buildTool({ }, async description() { - return 'Browse the web using an embedded browser' + return 'Fetch and read web page content via HTTP' }, async prompt() { - return `Open and interact with web pages in an embedded browser. Supports navigation, screenshots, clicking, typing, and scrolling. + return `Fetch web pages via HTTP and extract their text content. This is a lightweight browser tool (HTTP fetch, not a full browser engine). + +Supported actions: +- navigate: Fetch a URL and extract page title + text content +- screenshot: Same as navigate (returns text snapshot, not a visual screenshot) + +Limitations: +- No JavaScript execution — only sees server-rendered HTML +- click/type/scroll require a full browser runtime (not available) +- For full browser interaction, use the Claude-in-Chrome MCP tools instead Use this for: -- Viewing web pages and their content -- Taking screenshots of UI -- Interacting with web applications -- Testing web endpoints with full browser rendering` +- Reading web page content and documentation +- Checking API endpoints that return HTML +- Quick page title/content extraction` }, isConcurrencySafe() { @@ -85,12 +85,84 @@ Use this for: }, async call(input: BrowserInput) { - // Browser integration requires the WEB_BROWSER_TOOL runtime (Bun WebView). + const action = input.action ?? 'navigate' + + if (action === 'navigate' || action === 'screenshot') { + // Fetch the page content via HTTP + try { + const response = await fetch(input.url, { + headers: { + 'User-Agent': + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', + Accept: + 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', + }, + redirect: 'follow', + }) + + if (!response.ok) { + return { + data: { + title: `HTTP ${response.status}`, + url: input.url, + content: `Error: ${response.status} ${response.statusText}`, + }, + } + } + + const html = await response.text() + + // Extract title + const titleMatch = html.match(/]*>([^<]*)<\/title>/i) + const title = titleMatch?.[1]?.trim() ?? '' + + // Extract text content (strip HTML tags, scripts, styles) + let textContent = html + .replace(//gi, '') + .replace(//gi, '') + .replace(/<[^>]+>/g, ' ') + .replace(/\s+/g, ' ') + .trim() + + // Truncate to reasonable size + if (textContent.length > 50_000) { + textContent = textContent.slice(0, 50_000) + '\n[truncated]' + } + + if (action === 'screenshot') { + return { + data: { + title, + url: response.url, + content: `[Text snapshot — visual screenshots require Chrome browser tools]\n\n${textContent}`, + }, + } + } + + return { + data: { + title, + url: response.url, + content: textContent, + }, + } + } catch (err) { + return { + data: { + title: 'Error', + url: input.url, + content: `Failed to fetch: ${err instanceof Error ? err.message : String(err)}`, + }, + } + } + } + + // Unreachable — schema only allows navigate/screenshot return { data: { title: '', url: input.url, - content: 'Web browser requires the WEB_BROWSER_TOOL runtime.', + content: `Unknown action "${action}".`, }, } }, diff --git a/packages/builtin-tools/src/tools/WebBrowserTool/__tests__/WebBrowserTool.test.ts b/packages/builtin-tools/src/tools/WebBrowserTool/__tests__/WebBrowserTool.test.ts new file mode 100644 index 000000000..035ef2100 --- /dev/null +++ b/packages/builtin-tools/src/tools/WebBrowserTool/__tests__/WebBrowserTool.test.ts @@ -0,0 +1,94 @@ +import { describe, test, expect, beforeAll, afterAll } from 'bun:test' + +// Mock fetch directly — avoids flaky dependency on external hosts AND +// pollution by other tests that call setGlobalDispatcher (proxy agents make +// localhost fetches return 500 in the full-suite run). +const realFetch = globalThis.fetch + +beforeAll(() => { + globalThis.fetch = (async ( + input: string | URL | Request, + _init?: RequestInit, + ) => { + const url = typeof input === 'string' ? input : input.toString() + if (url === 'not-a-url' || !url.startsWith('http')) { + throw new TypeError('Failed to fetch') + } + const body = + 'Example Domain' + + '

Example Domain

Sample content.

' + const res = new Response(body, { + status: 200, + headers: { 'content-type': 'text/html' }, + }) + // Make response.url match the request URL so tests can assert on it. + Object.defineProperty(res, 'url', { value: url, configurable: true }) + return res + }) as typeof fetch +}) + +afterAll(() => { + globalThis.fetch = realFetch +}) + +describe('WebBrowserTool', () => { + test('tool exports and metadata', async () => { + const { WebBrowserTool } = await import('../WebBrowserTool.js') + expect(WebBrowserTool).toBeDefined() + expect(WebBrowserTool.name).toBe('WebBrowser') + expect(typeof WebBrowserTool.call).toBe('function') + expect(WebBrowserTool.userFacingName()).toBe('Browser') + expect(WebBrowserTool.isReadOnly()).toBe(true) + }) + + test('description reflects browser-lite', async () => { + const { WebBrowserTool } = await import('../WebBrowserTool.js') + const desc = await WebBrowserTool.description() + expect(desc).toContain('HTTP') + expect(desc).not.toContain('embedded browser') + }) + + test('prompt mentions limitations', async () => { + const { WebBrowserTool } = await import('../WebBrowserTool.js') + const prompt = await WebBrowserTool.prompt() + expect(prompt).toContain('Limitations') + expect(prompt).toContain('No JavaScript') + expect(prompt).toContain('Claude-in-Chrome') + }) + + test('navigate fetches URL', async () => { + const { WebBrowserTool } = await import('../WebBrowserTool.js') + const result = await WebBrowserTool.call({ + url: 'https://example.com', + } as any) + expect(result.data.title).toBe('Example Domain') + expect(result.data.url).toContain('example.com') + expect(result.data.content).toContain('Example Domain') + }, 15000) + + test('screenshot returns text snapshot', async () => { + const { WebBrowserTool } = await import('../WebBrowserTool.js') + const result = await WebBrowserTool.call({ + url: 'https://example.com', + action: 'screenshot', + } as any) + expect(result.data.content).toContain('Text snapshot') + expect(result.data.content).toContain('Example Domain') + }, 15000) + + test('schema only allows navigate and screenshot', async () => { + const { WebBrowserTool } = await import('../WebBrowserTool.js') + const schema = WebBrowserTool.inputSchema + const parseResult = schema.safeParse({ + url: 'https://example.com', + action: 'click', + }) + expect(parseResult.success).toBe(false) + }) + + test('invalid URL returns error', async () => { + const { WebBrowserTool } = await import('../WebBrowserTool.js') + const result = await WebBrowserTool.call({ url: 'not-a-url' } as any) + expect(result.data.content).toContain('Failed to fetch') + }) +}) diff --git a/packages/builtin-tools/src/tools/WebSearchTool/adapters/index.ts b/packages/builtin-tools/src/tools/WebSearchTool/adapters/index.ts index 6500e8be6..3a3c3cb0b 100644 --- a/packages/builtin-tools/src/tools/WebSearchTool/adapters/index.ts +++ b/packages/builtin-tools/src/tools/WebSearchTool/adapters/index.ts @@ -16,17 +16,37 @@ export type { WebSearchAdapter, } from './types.js' +/** + * Check if the current session uses a third-party (non-Anthropic) API provider. + * These providers don't support Anthropic's server_tools (server-side web search), + * so they must fall back to the Bing scraper adapter. + */ +function isThirdPartyProvider(): boolean { + return !!( + process.env.CLAUDE_CODE_USE_OPENAI || + process.env.CLAUDE_CODE_USE_GEMINI || + process.env.CLAUDE_CODE_USE_GROK + ) +} + let cachedAdapter: WebSearchAdapter | null = null let cachedAdapterKey: 'api' | 'bing' | 'brave' | null = null export function createAdapter(): WebSearchAdapter { const envAdapter = process.env.WEB_SEARCH_ADAPTER + // Priority: + // 1. Explicit env override (WEB_SEARCH_ADAPTER=api|bing|brave) + // 2. Third-party provider (OpenAI/Gemini/Grok) → bing (no server_tools support) + // 3. First-party Anthropic API → api (server-side web search + connector_text) + // 4. Fallback → bing const adapterKey = envAdapter === 'api' || envAdapter === 'bing' || envAdapter === 'brave' ? envAdapter - : isFirstPartyAnthropicBaseUrl() - ? 'api' - : 'bing' + : isThirdPartyProvider() + ? 'bing' + : isFirstPartyAnthropicBaseUrl() + ? 'api' + : 'bing' if (cachedAdapter && cachedAdapterKey === adapterKey) return cachedAdapter diff --git a/packages/builtin-tools/src/tools/WorkflowTool/WorkflowTool.ts b/packages/builtin-tools/src/tools/WorkflowTool/WorkflowTool.ts index 4c6bfc767..cb8d31774 100644 --- a/packages/builtin-tools/src/tools/WorkflowTool/WorkflowTool.ts +++ b/packages/builtin-tools/src/tools/WorkflowTool/WorkflowTool.ts @@ -1,18 +1,358 @@ +import { randomUUID } from 'crypto' +import { mkdir, readdir, readFile, writeFile } from 'fs/promises' +import { join, parse } from 'path' import { z } from 'zod/v4' import type { ToolResultBlockParam } from 'src/Tool.js' import { buildTool } from 'src/Tool.js' import { truncate } from 'src/utils/format.js' -import { WORKFLOW_TOOL_NAME } from './constants.js' +import { safeParseJSON } from 'src/utils/json.js' +import { + WORKFLOW_DIR_NAME, + WORKFLOW_FILE_EXTENSIONS, + WORKFLOW_TOOL_NAME, +} from './constants.js' + +const WORKFLOW_RUNS_DIR = '.claude/workflow-runs' const inputSchema = z.object({ workflow: z.string().describe('Name of the workflow to execute'), args: z.string().optional().describe('Arguments to pass to the workflow'), + action: z + .enum(['start', 'status', 'advance', 'cancel', 'list']) + .optional() + .describe('Workflow action. Defaults to start.'), + run_id: z + .string() + .optional() + .describe('Workflow run id for status, advance, or cancel.'), }) type Input = typeof inputSchema type WorkflowInput = z.infer +type WorkflowStepStatus = 'pending' | 'running' | 'completed' | 'cancelled' + +type WorkflowStep = { + name: string + prompt: string + status: WorkflowStepStatus + startedAt?: number + completedAt?: number +} + +type WorkflowRun = { + runId: string + workflow: string + args?: string + status: 'running' | 'completed' | 'cancelled' + createdAt: number + updatedAt: number + currentStepIndex: number + steps: WorkflowStep[] +} + type WorkflowOutput = { output: string } +async function findWorkflowFile( + workflowDir: string, + workflow: string, +): Promise<{ path: string; content: string } | null> { + for (const ext of WORKFLOW_FILE_EXTENSIONS) { + const path = join(workflowDir, `${workflow}${ext}`) + try { + return { path, content: await readFile(path, 'utf-8') } + } catch { + // try next + } + } + return null +} + +async function listAvailableWorkflows(workflowDir: string): Promise { + try { + const files = await readdir(workflowDir) + return files + .filter(f => WORKFLOW_FILE_EXTENSIONS.includes(parse(f).ext.toLowerCase())) + .map(f => parse(f).name) + .sort() + } catch { + return [] + } +} + +function workflowRunPath(cwd: string, runId: string): string { + return join(cwd, WORKFLOW_RUNS_DIR, `${runId}.json`) +} + +async function readWorkflowRun( + cwd: string, + runId: string, +): Promise { + try { + const parsed = safeParseJSON( + await readFile(workflowRunPath(cwd, runId), 'utf-8'), + false, + ) as Partial | null + if ( + !parsed || + typeof parsed.runId !== 'string' || + typeof parsed.workflow !== 'string' || + !Array.isArray(parsed.steps) + ) { + return null + } + return parsed as WorkflowRun + } catch { + return null + } +} + +async function writeWorkflowRun(cwd: string, run: WorkflowRun): Promise { + await mkdir(join(cwd, WORKFLOW_RUNS_DIR), { recursive: true }) + await writeFile( + workflowRunPath(cwd, run.runId), + JSON.stringify(run, null, 2) + '\n', + 'utf-8', + ) +} + +async function listWorkflowRuns(cwd: string): Promise { + let files: string[] + try { + files = await readdir(join(cwd, WORKFLOW_RUNS_DIR)) + } catch { + return [] + } + const runs = await Promise.all( + files + .filter(f => f.endsWith('.json')) + .map(f => readWorkflowRun(cwd, f.slice(0, -'.json'.length))), + ) + return runs + .filter((run): run is WorkflowRun => run !== null) + .sort((a, b) => b.updatedAt - a.updatedAt) +} + +function parseMarkdownSteps(content: string): WorkflowStep[] { + const steps: WorkflowStep[] = [] + for (const rawLine of content.split('\n')) { + const line = rawLine.trim() + const taskMatch = line.match(/^[-*]\s+\[[ xX]\]\s+(.+)$/) + const bulletMatch = line.match(/^[-*]\s+(.+)$/) + const numberedMatch = line.match(/^\d+[.)]\s+(.+)$/) + const text = taskMatch?.[1] ?? bulletMatch?.[1] ?? numberedMatch?.[1] + if (!text) continue + steps.push({ name: text.slice(0, 80), prompt: text, status: 'pending' }) + } + return steps +} + +function parseYamlSteps(content: string): WorkflowStep[] { + const steps: WorkflowStep[] = [] + let current: Partial | null = null + const flush = () => { + if (!current) return + const prompt = current.prompt ?? current.name + if (current.name && prompt) { + steps.push({ + name: current.name, + prompt, + status: 'pending', + }) + } + current = null + } + + for (const rawLine of content.split('\n')) { + const line = rawLine.trim() + const stepText = line.match(/^-\s+(.+)$/)?.[1] + if (stepText) { + flush() + const inlineName = stepText.match(/^name:\s*(.+)$/)?.[1] + current = { + name: inlineName ?? stepText, + prompt: inlineName ? undefined : stepText, + } + continue + } + const name = line.match(/^name:\s*(.+)$/)?.[1] + if (name) { + if (!current) current = {} + current.name = name + continue + } + const prompt = line.match(/^(prompt|run|command):\s*(.+)$/)?.[2] + if (prompt) { + if (!current) current = {} + current.prompt = prompt + } + } + flush() + return steps +} + +function parseWorkflowSteps(filePath: string, content: string): WorkflowStep[] { + const ext = parse(filePath).ext.toLowerCase() + const steps = + ext === '.md' ? parseMarkdownSteps(content) : parseYamlSteps(content) + if (steps.length > 0) { + return steps + } + return [ + { + name: 'Execute workflow', + prompt: content.trim(), + status: 'pending', + }, + ] +} + +function formatStep(step: WorkflowStep, index: number): string { + return `Step ${index + 1}: ${step.name}\n${step.prompt}` +} + +function formatRunStatus(run: WorkflowRun): string { + const lines = [ + `Workflow run: ${run.runId}`, + `Workflow: ${run.workflow}`, + `Status: ${run.status}`, + `Current step: ${run.steps[run.currentStepIndex]?.name ?? 'none'}`, + `Steps: ${run.steps.length}`, + ] + for (let i = 0; i < run.steps.length; i += 1) { + const step = run.steps[i]! + lines.push(` ${i + 1}. [${step.status}] ${step.name}`) + } + return lines.join('\n') +} + +async function startWorkflow( + input: WorkflowInput, + cwd: string, +): Promise { + const workflowDir = join(cwd, WORKFLOW_DIR_NAME) + const found = await findWorkflowFile(workflowDir, input.workflow) + if (!found) { + const available = await listAvailableWorkflows(workflowDir) + const hint = + available.length > 0 + ? `\nAvailable workflows: ${available.join(', ')}` + : `\nNo workflows found in ${WORKFLOW_DIR_NAME}/. Create .md or .yaml files there.` + return { output: `Error: Workflow "${input.workflow}" not found.${hint}` } + } + + const steps = parseWorkflowSteps(found.path, found.content) + const now = Date.now() + steps[0] = { ...steps[0]!, status: 'running', startedAt: now } + const run: WorkflowRun = { + runId: randomUUID(), + workflow: input.workflow, + ...(input.args ? { args: input.args } : {}), + status: 'running', + createdAt: now, + updatedAt: now, + currentStepIndex: 0, + steps, + } + await writeWorkflowRun(cwd, run) + + const argsSection = input.args ? `\n\nArguments:\n${input.args}` : '' + return { + output: [ + `Workflow run started`, + `run_id: ${run.runId}`, + `workflow: ${run.workflow}`, + '', + formatStep(steps[0]!, 0), + argsSection, + '', + `When this step is complete, call Workflow with action="advance" and run_id="${run.runId}".`, + ].join('\n'), + } +} + +async function getRunOrError( + cwd: string, + runId: string | undefined, +): Promise<{ run?: WorkflowRun; output?: string }> { + if (!runId) return { output: 'Error: run_id is required for this action.' } + const run = await readWorkflowRun(cwd, runId) + if (!run) return { output: `Error: Workflow run "${runId}" not found.` } + return { run } +} + +async function advanceWorkflow( + cwd: string, + runId: string | undefined, +): Promise { + const found = await getRunOrError(cwd, runId) + if (!found.run) return { output: found.output! } + const run = found.run + const now = Date.now() + const current = run.steps[run.currentStepIndex] + if (current && current.status === 'running') { + current.status = 'completed' + current.completedAt = now + } + const nextIndex = run.currentStepIndex + 1 + if (nextIndex >= run.steps.length) { + run.status = 'completed' + run.updatedAt = now + await writeWorkflowRun(cwd, run) + return { output: `Workflow completed\nrun_id: ${run.runId}` } + } + run.currentStepIndex = nextIndex + run.steps[nextIndex] = { + ...run.steps[nextIndex]!, + status: 'running', + startedAt: now, + } + run.updatedAt = now + await writeWorkflowRun(cwd, run) + return { + output: [ + `Next workflow step`, + `run_id: ${run.runId}`, + '', + formatStep(run.steps[nextIndex]!, nextIndex), + '', + `When this step is complete, call Workflow with action="advance" and run_id="${run.runId}".`, + ].join('\n'), + } +} + +async function cancelWorkflow( + cwd: string, + runId: string | undefined, +): Promise { + const found = await getRunOrError(cwd, runId) + if (!found.run) return { output: found.output! } + const run = found.run + const now = Date.now() + run.status = 'cancelled' + run.updatedAt = now + for (const step of run.steps) { + if (step.status === 'pending' || step.status === 'running') { + step.status = 'cancelled' + } + } + await writeWorkflowRun(cwd, run) + return { output: `Workflow cancelled\nrun_id: ${run.runId}` } +} + +async function listWorkflowRunsForOutput(cwd: string): Promise { + const runs = await listWorkflowRuns(cwd) + if (runs.length === 0) return { output: 'No workflow runs recorded.' } + return { + output: runs + .slice(0, 20) + .map( + run => + `${run.runId} | ${run.workflow} | ${run.status} | step=${run.steps[run.currentStepIndex]?.name ?? 'none'} | updated=${new Date(run.updatedAt).toLocaleString()}`, + ) + .join('\n'), + } +} + export const WorkflowTool = buildTool({ name: WORKFLOW_TOOL_NAME, searchHint: 'execute user-defined workflow scripts', @@ -22,21 +362,25 @@ export const WorkflowTool = buildTool({ inputSchema, async description() { - return 'Execute a user-defined workflow script from .claude/workflows/' + return 'Execute and track a user-defined workflow from .claude/workflows/' }, async prompt() { - return `Use the Workflow tool to execute user-defined workflow scripts located in .claude/workflows/. Workflows are YAML or Markdown files that define a sequence of steps for common development tasks. + return `Use the Workflow tool to run user-defined workflows located in .claude/workflows/. Workflows may be Markdown checklists/lists or YAML files with steps. -Guidelines: -- Specify the workflow name to execute (must match a file in .claude/workflows/) -- Optionally pass arguments that the workflow can use -- Workflows run in the context of the current project` +Actions: +- start (default): create a persisted workflow run and return the first step to execute +- advance: mark the current step complete and return the next step +- status: inspect a workflow run by run_id +- cancel: cancel a workflow run +- list: list recent workflow runs + +Workflow run state is persisted in .claude/workflow-runs/.` }, userFacingName() { return 'Workflow' }, - isReadOnly() { - return false + isReadOnly(input) { + return input.action === 'status' || input.action === 'list' }, isEnabled() { return true @@ -44,10 +388,10 @@ Guidelines: renderToolUseMessage(input: Partial) { const name = input.workflow ?? 'unknown' - if (input.args) { - return `Workflow: ${name} ${input.args}` - } - return `Workflow: ${name}` + const action = input.action ?? 'start' + return input.args + ? `Workflow: ${action} ${name} ${input.args}` + : `Workflow: ${action} ${name}` }, mapToolResultToToolResultBlockParam( @@ -61,14 +405,26 @@ Guidelines: } }, - async call(_input: WorkflowInput, _context, _progress) { - // Workflow execution is wired by the WORKFLOW_SCRIPTS feature bootstrap. - // Without it, this tool is not functional. - return { - data: { - output: - 'Error: Workflow execution requires the WORKFLOW_SCRIPTS runtime.', - }, + async call(input: WorkflowInput) { + const cwd = process.cwd() + const action = input.action ?? 'start' + switch (action) { + case 'start': + return { data: await startWorkflow(input, cwd) } + case 'status': { + const found = await getRunOrError(cwd, input.run_id) + return { + data: { + output: found.run ? formatRunStatus(found.run) : found.output!, + }, + } + } + case 'advance': + return { data: await advanceWorkflow(cwd, input.run_id) } + case 'cancel': + return { data: await cancelWorkflow(cwd, input.run_id) } + case 'list': + return { data: await listWorkflowRunsForOutput(cwd) } } }, }) diff --git a/packages/builtin-tools/src/tools/WorkflowTool/__tests__/WorkflowTool.test.ts b/packages/builtin-tools/src/tools/WorkflowTool/__tests__/WorkflowTool.test.ts new file mode 100644 index 000000000..0d736b975 --- /dev/null +++ b/packages/builtin-tools/src/tools/WorkflowTool/__tests__/WorkflowTool.test.ts @@ -0,0 +1,99 @@ +import { afterEach, beforeEach, describe, expect, test } from 'bun:test' +import { mkdir, readFile, rm, writeFile } from 'node:fs/promises' +import { tmpdir } from 'node:os' +import { join } from 'node:path' +import { WorkflowTool } from '../WorkflowTool' + +let cwd: string +let previousCwd: string + +beforeEach(async () => { + previousCwd = process.cwd() + cwd = join(tmpdir(), `workflow-tool-${Date.now()}-${Math.random().toString(16).slice(2)}`) + await mkdir(join(cwd, '.claude', 'workflows'), { recursive: true }) + process.chdir(cwd) +}) + +afterEach(async () => { + process.chdir(previousCwd) + await rm(cwd, { recursive: true, force: true }) +}) + +describe('WorkflowTool', () => { + test('starts a workflow run and persists step state', async () => { + await writeFile( + join(cwd, '.claude', 'workflows', 'release.md'), + [ + '# Release', + '', + '- [ ] Run tests', + '- [ ] Build package', + ].join('\n'), + ) + + const result = await WorkflowTool.call({ workflow: 'release' }) + + expect(result.data.output).toContain('Workflow run started') + expect(result.data.output).toContain('Run tests') + const match = result.data.output.match(/run_id: ([a-f0-9-]+)/) + expect(match?.[1]).toBeString() + + const raw = await readFile( + join(cwd, '.claude', 'workflow-runs', `${match![1]}.json`), + 'utf-8', + ) + const run = JSON.parse(raw) + expect(run.workflow).toBe('release') + expect(run.status).toBe('running') + expect(run.steps).toHaveLength(2) + expect(run.steps[0].status).toBe('running') + expect(run.steps[1].status).toBe('pending') + }) + + test('advances a workflow run through completion', async () => { + await writeFile( + join(cwd, '.claude', 'workflows', 'audit.yaml'), + [ + 'steps:', + ' - name: Inspect', + ' prompt: Inspect the code', + ' - name: Verify', + ' prompt: Run focused tests', + ].join('\n'), + ) + + const started = await WorkflowTool.call({ workflow: 'audit' }) + const runId = started.data.output.match(/run_id: ([a-f0-9-]+)/)![1]! + + const next = await WorkflowTool.call( + { workflow: 'audit', action: 'advance', run_id: runId }, + ) + expect(next.data.output).toContain('Next workflow step') + expect(next.data.output).toContain('Run focused tests') + + const done = await WorkflowTool.call( + { workflow: 'audit', action: 'advance', run_id: runId }, + ) + expect(done.data.output).toContain('Workflow completed') + }) + + test('lists and cancels workflow runs', async () => { + await writeFile( + join(cwd, '.claude', 'workflows', 'cleanup.md'), + '- Remove stale files', + ) + + const started = await WorkflowTool.call({ workflow: 'cleanup' }) + const runId = started.data.output.match(/run_id: ([a-f0-9-]+)/)![1]! + + const listed = await WorkflowTool.call( + { workflow: 'cleanup', action: 'list' }, + ) + expect(listed.data.output).toContain(runId) + + const cancelled = await WorkflowTool.call( + { workflow: 'cleanup', action: 'cancel', run_id: runId }, + ) + expect(cancelled.data.output).toContain('Workflow cancelled') + }) +}) diff --git a/packages/builtin-tools/src/tools/shared/__tests__/spawnMultiAgent.test.ts b/packages/builtin-tools/src/tools/shared/__tests__/spawnMultiAgent.test.ts new file mode 100644 index 000000000..5af2cdbad --- /dev/null +++ b/packages/builtin-tools/src/tools/shared/__tests__/spawnMultiAgent.test.ts @@ -0,0 +1,54 @@ +import { afterEach, beforeEach, describe, expect, test } from 'bun:test' +import { rmSync } from 'node:fs' +import { tmpdir } from 'node:os' +import { join } from 'node:path' +import { spawnTeammate } from '../spawnMultiAgent' + +let tempHome: string +let previousConfigDir: string | undefined + +beforeEach(() => { + previousConfigDir = process.env.CLAUDE_CONFIG_DIR + tempHome = join(tmpdir(), `spawn-multi-agent-${Date.now()}-${Math.random().toString(16).slice(2)}`) + process.env.CLAUDE_CONFIG_DIR = tempHome +}) + +afterEach(() => { + if (previousConfigDir === undefined) { + delete process.env.CLAUDE_CONFIG_DIR + } else { + process.env.CLAUDE_CONFIG_DIR = previousConfigDir + } + rmSync(tempHome, { recursive: true, force: true }) +}) + +describe('spawnTeammate', () => { + test('fails before spawn side effects when the team file is missing', async () => { + let setAppStateCalled = false + const context = { + getAppState: () => ({ + teamContext: undefined, + }), + setAppState: () => { + setAppStateCalled = true + }, + options: { + agentDefinitions: { + activeAgents: [], + }, + }, + } + + await expect( + spawnTeammate( + { + name: 'worker', + prompt: 'do work', + team_name: 'missing-team', + }, + context as any, + ), + ).rejects.toThrow('Team "missing-team" does not exist') + expect(setAppStateCalled).toBe(false) + }) +}) diff --git a/packages/builtin-tools/src/tools/shared/spawnMultiAgent.ts b/packages/builtin-tools/src/tools/shared/spawnMultiAgent.ts index 5eaf338f6..5d7c9689d 100644 --- a/packages/builtin-tools/src/tools/shared/spawnMultiAgent.ts +++ b/packages/builtin-tools/src/tools/shared/spawnMultiAgent.ts @@ -1,71 +1,39 @@ +import React from 'react' + /** * Shared spawn module for teammate creation. * Extracted from TeammateTool to allow reuse by AgentTool. */ -import React from 'react' import { - getChromeFlagOverride, - getFlagSettingsPath, - getInlinePlugins, - getMainLoopModelOverride, - getSessionBypassPermissionsMode, getSessionId, } from 'src/bootstrap/state.js' -import type { AppState } from 'src/state/AppState.js' -import { createTaskStateBase, generateTaskId } from 'src/Task.js' import type { ToolUseContext } from 'src/Tool.js' -import type { InProcessTeammateTaskState } from 'src/tasks/InProcessTeammateTask/types.js' import { formatAgentId } from 'src/utils/agentId.js' -import { quote } from 'src/utils/bash/shellQuote.js' -import { isInBundledMode } from 'src/utils/bundledMode.js' import { getGlobalConfig } from 'src/utils/config.js' import { getCwd } from 'src/utils/cwd.js' import { logForDebugging } from 'src/utils/debug.js' -import { errorMessage } from 'src/utils/errors.js' -import { execFileNoThrow } from 'src/utils/execFileNoThrow.js' import { parseUserSpecifiedModel } from 'src/utils/model/model.js' -import type { PermissionMode } from 'src/utils/permissions/PermissionMode.js' -import { isTmuxAvailable } from 'src/utils/swarm/backends/detection.js' import { - detectAndGetBackend, - getBackendByType, - isInProcessEnabled, - markInProcessFallback, - resetBackendDetection, + getTeammateExecutor, } from 'src/utils/swarm/backends/registry.js' -import { getTeammateModeFromSnapshot } from 'src/utils/swarm/backends/teammateModeSnapshot.js' -import type { BackendType } from 'src/utils/swarm/backends/types.js' -import { isPaneBackend } from 'src/utils/swarm/backends/types.js' +import type { BackendType, TeammateSpawnResult } from 'src/utils/swarm/backends/types.js' import { SWARM_SESSION_NAME, TEAM_LEAD_NAME, - TEAMMATE_COMMAND_ENV_VAR, - TMUX_COMMAND, } from 'src/utils/swarm/constants.js' import { It2SetupPrompt } from 'src/utils/swarm/It2SetupPrompt.js' -import { startInProcessTeammate } from 'src/utils/swarm/inProcessRunner.js' -import { - type InProcessSpawnConfig, - spawnInProcessTeammate, -} from 'src/utils/swarm/spawnInProcess.js' -import { buildInheritedEnvVars } from 'src/utils/swarm/spawnUtils.js' import { + getTeamFilePath, readTeamFileAsync, sanitizeAgentName, - sanitizeName, writeTeamFileAsync, + type TeamFile, } from 'src/utils/swarm/teamHelpers.js' import { assignTeammateColor, - createTeammatePaneInSwarmView, - enablePaneBorderStatus, - isInsideTmux, - sendCommandToPane, } from 'src/utils/swarm/teammateLayoutManager.js' import { getHardcodedTeammateModelFallback } from 'src/utils/swarm/teammateModel.js' -import { registerTask } from 'src/utils/task/framework.js' -import { writeToMailbox } from 'src/utils/teammateMailbox.js' import type { CustomAgentDefinition } from '../AgentTool/loadAgentsDir.js' import { isCustomAgent } from '../AgentTool/loadAgentsDir.js' @@ -153,112 +121,6 @@ type SpawnInput = { // Helper Functions // ============================================================================ -/** - * Checks if a tmux session exists - */ -async function hasSession(sessionName: string): Promise { - const result = await execFileNoThrow(TMUX_COMMAND, [ - 'has-session', - '-t', - sessionName, - ]) - return result.code === 0 -} - -/** - * Creates a new tmux session if it doesn't exist - */ -async function ensureSession(sessionName: string): Promise { - const exists = await hasSession(sessionName) - if (!exists) { - const result = await execFileNoThrow(TMUX_COMMAND, [ - 'new-session', - '-d', - '-s', - sessionName, - ]) - if (result.code !== 0) { - throw new Error( - `Failed to create tmux session '${sessionName}': ${result.stderr || 'Unknown error'}`, - ) - } - } -} - -/** - * Gets the command to spawn a teammate. - * For native builds (compiled binaries), use process.execPath. - * For non-native (node/bun running a script), use process.argv[1]. - */ -function getTeammateCommand(): string { - if (process.env[TEAMMATE_COMMAND_ENV_VAR]) { - return process.env[TEAMMATE_COMMAND_ENV_VAR] - } - return isInBundledMode() ? process.execPath : process.argv[1]! -} - -/** - * Builds CLI flags to propagate from the current session to spawned teammates. - * This ensures teammates inherit important settings like permission mode, - * model selection, and plugin configuration from their parent. - * - * @param options.planModeRequired - If true, don't inherit bypass permissions (plan mode takes precedence) - * @param options.permissionMode - Permission mode to propagate - */ -function buildInheritedCliFlags(options?: { - planModeRequired?: boolean - permissionMode?: PermissionMode -}): string { - const flags: string[] = [] - const { planModeRequired, permissionMode } = options || {} - - // Propagate permission mode to teammates, but NOT if plan mode is required - // Plan mode takes precedence over bypass permissions for safety - if (planModeRequired) { - // Don't inherit bypass permissions when plan mode is required - } else if ( - permissionMode === 'bypassPermissions' || - getSessionBypassPermissionsMode() - ) { - flags.push('--dangerously-skip-permissions') - } else if (permissionMode === 'acceptEdits') { - flags.push('--permission-mode acceptEdits') - } else if (permissionMode === 'auto') { - // Teammates inherit auto mode so the classifier auto-approves their tool - // calls too. The teammate's own startup (permissionSetup.ts) handles - // GrowthBook gate checks and setAutoModeActive(true) independently. - flags.push('--permission-mode auto') - } - - // Propagate --model if explicitly set via CLI - const modelOverride = getMainLoopModelOverride() - if (modelOverride) { - flags.push(`--model ${quote([modelOverride])}`) - } - - // Propagate --settings if set via CLI - const settingsPath = getFlagSettingsPath() - if (settingsPath) { - flags.push(`--settings ${quote([settingsPath])}`) - } - - // Propagate --plugin-dir for each inline plugin - const inlinePlugins = getInlinePlugins() - for (const pluginDir of inlinePlugins) { - flags.push(`--plugin-dir ${quote([pluginDir])}`) - } - - // Propagate --chrome / --no-chrome if explicitly set on the CLI - const chromeFlagOverride = getChromeFlagOverride() - if (chromeFlagOverride === true) { - flags.push('--chrome') - } else if (chromeFlagOverride === false) { - flags.push('--no-chrome') - } - - return flags.join(' ') -} - /** * Generates a unique teammate name by checking existing team members. * If the name already exists, appends a numeric suffix (e.g., tester-2, tester-3). @@ -294,787 +156,240 @@ export async function generateUniqueTeammateName( } // ============================================================================ -// Spawn Handlers +// Spawn Handler // ============================================================================ -/** - * Handle spawn operation using split-pane view (default). - * When inside tmux: Creates teammates in a shared window with leader on left, teammates on right. - * When outside tmux: Creates a claude-swarm session with all teammates in a tiled layout. - */ -async function handleSpawnSplitPane( +type ResolvedSpawn = { + teamName: string + teamFile: TeamFile + sanitizedName: string + teammateId: string + model: string + teammateColor: ReturnType + workingDir: string + agentDefinition?: CustomAgentDefinition +} + +async function resolveSpawn( input: SpawnInput, context: ToolUseContext, -): Promise<{ data: SpawnOutput }> { - const { setAppState, getAppState } = context - const { name, prompt, agent_type, cwd, plan_mode_required } = input - - // Resolve model: 'inherit' → leader's model; undefined → default Opus - const model = resolveTeammateModel(input.model, getAppState().mainLoopModel) - - if (!name || !prompt) { +): Promise { + if (!input.name || !input.prompt) { throw new Error('name and prompt are required for spawn operation') } - // Get team name from input or inherit from leader's team context - const appState = getAppState() + const appState = context.getAppState() const teamName = input.team_name || appState.teamContext?.teamName - if (!teamName) { throw new Error( - 'team_name is required for spawn operation. Either provide team_name in input or call spawnTeam first to establish team context.', + 'team_name is required for spawn operation. Either provide team_name in input or call TeamCreate first to establish team context.', ) } - // Generate unique name if duplicate exists in team - const uniqueName = await generateUniqueTeammateName(name, teamName) - - // Sanitize the name to prevent @ in agent IDs (would break agentName@teamName format) - const sanitizedName = sanitizeAgentName(uniqueName) - - // Generate deterministic agent ID from name and team - const teammateId = formatAgentId(sanitizedName, teamName) - const workingDir = cwd || getCwd() - - // Detect the appropriate backend and check if setup is needed - let detectionResult = await detectAndGetBackend() - - // If in iTerm2 but it2 isn't set up, prompt the user - if (detectionResult.needsIt2Setup && context.setToolJSX) { - const tmuxAvailable = await isTmuxAvailable() - - // Show the setup prompt and wait for user decision - const setupResult = await new Promise< - 'installed' | 'use-tmux' | 'cancelled' - >(resolve => { - context.setToolJSX!({ - jsx: React.createElement(It2SetupPrompt, { - onDone: resolve, - tmuxAvailable, - }), - shouldHidePromptInput: true, - }) - }) - - // Clear the JSX - context.setToolJSX(null) - - if (setupResult === 'cancelled') { - throw new Error('Teammate spawn cancelled - iTerm2 setup required') - } - - // If they installed it2 or chose tmux, clear cached detection and re-fetch - // so the local detectionResult matches the backend that will actually - // spawn the pane. - // - 'installed': re-detect to pick up the ITermBackend (it2 is now available) - // - 'use-tmux': re-detect so needsIt2Setup is false (preferTmux is now saved) - // and subsequent spawns skip this prompt - if (setupResult === 'installed' || setupResult === 'use-tmux') { - resetBackendDetection() - detectionResult = await detectAndGetBackend() - } - } - - // Check if we're inside tmux to determine session naming - const insideTmux = await isInsideTmux() - - // Assign a unique color to this teammate - const teammateColor = assignTeammateColor(teammateId) - - // Create a pane in the swarm view - // - Inside tmux: splits current window (leader on left, teammates on right) - // - In iTerm2 with it2: uses native iTerm2 split panes - // - Outside both: creates claude-swarm session with tiled teammates - const { paneId, isFirstTeammate } = await createTeammatePaneInSwarmView( - sanitizedName, - teammateColor, - ) - - // Enable pane border status on first teammate when inside tmux - // (outside tmux, this is handled in createTeammatePaneInSwarmView) - if (isFirstTeammate && insideTmux) { - await enablePaneBorderStatus() - } - - // Build the command to spawn Claude Code with teammate identity - // Note: We spawn without a prompt - initial instructions are sent via mailbox - const binaryPath = getTeammateCommand() - - // Build teammate identity CLI args (replaces CLAUDE_CODE_* env vars) - const teammateArgs = [ - `--agent-id ${quote([teammateId])}`, - `--agent-name ${quote([sanitizedName])}`, - `--team-name ${quote([teamName])}`, - `--agent-color ${quote([teammateColor])}`, - `--parent-session-id ${quote([getSessionId()])}`, - plan_mode_required ? '--plan-mode-required' : '', - agent_type ? `--agent-type ${quote([agent_type])}` : '', - ] - .filter(Boolean) - .join(' ') - - // Build CLI flags to propagate to teammate - // Pass plan_mode_required to prevent inheriting bypass permissions - let inheritedFlags = buildInheritedCliFlags({ - planModeRequired: plan_mode_required, - permissionMode: appState.toolPermissionContext.mode, - }) - - // If teammate has a custom model, add --model flag (or replace inherited one) - if (model) { - // Remove any inherited --model flag first - inheritedFlags = inheritedFlags - .split(' ') - .filter((flag, i, arr) => flag !== '--model' && arr[i - 1] !== '--model') - .join(' ') - // Add the teammate's model - inheritedFlags = inheritedFlags - ? `${inheritedFlags} --model ${quote([model])}` - : `--model ${quote([model])}` - } - - const flagsStr = inheritedFlags ? ` ${inheritedFlags}` : '' - // Propagate env vars that teammates need but may not inherit from tmux split-window shells. - // Includes CLAUDECODE, CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS, and API provider vars. - const envStr = buildInheritedEnvVars() - const spawnCommand = `cd ${quote([workingDir])} && env ${envStr} ${quote([binaryPath])} ${teammateArgs}${flagsStr}` - - // Send the command to the new pane - // Use swarm socket when running outside tmux (external swarm session) - await sendCommandToPane(paneId, spawnCommand, !insideTmux) - - // Determine session/window names for output - const sessionName = insideTmux ? 'current' : SWARM_SESSION_NAME - const windowName = insideTmux ? 'current' : 'swarm-view' - - // Track the teammate in AppState's teamContext with color - // If spawning without spawnTeam, set up the leader as team lead - setAppState(prev => ({ - ...prev, - teamContext: { - ...prev.teamContext, - teamName: teamName ?? prev.teamContext?.teamName ?? 'default', - teamFilePath: prev.teamContext?.teamFilePath ?? '', - leadAgentId: prev.teamContext?.leadAgentId ?? '', - teammates: { - ...(prev.teamContext?.teammates || {}), - [teammateId]: { - name: sanitizedName, - agentType: agent_type, - color: teammateColor, - tmuxSessionName: sessionName, - tmuxPaneId: paneId, - cwd: workingDir, - spawnedAt: Date.now(), - }, - }, - }, - })) - - // Register background task so teammates appear in the tasks pill/dialog - registerOutOfProcessTeammateTask(setAppState, { - teammateId, - sanitizedName, - teamName, - teammateColor, - prompt, - plan_mode_required, - paneId, - insideTmux, - backendType: detectionResult.backend.type, - toolUseId: context.toolUseId, - }) - - // Register agent in the team file const teamFile = await readTeamFileAsync(teamName) if (!teamFile) { throw new Error( - `Team "${teamName}" does not exist. Call spawnTeam first to create the team.`, - ) - } - teamFile.members.push({ - agentId: teammateId, - name: sanitizedName, - agentType: agent_type, - model, - prompt, - color: teammateColor, - planModeRequired: plan_mode_required, - joinedAt: Date.now(), - tmuxPaneId: paneId, - cwd: workingDir, - subscriptions: [], - backendType: detectionResult.backend.type, - }) - await writeTeamFileAsync(teamName, teamFile) - - // Send initial instructions to teammate via mailbox - // The teammate's inbox poller will pick this up and submit it as their first turn - await writeToMailbox( - sanitizedName, - { - from: TEAM_LEAD_NAME, - text: prompt, - timestamp: new Date().toISOString(), - }, - teamName, - ) - - return { - data: { - teammate_id: teammateId, - agent_id: teammateId, - agent_type, - model, - name: sanitizedName, - color: teammateColor, - tmux_session_name: sessionName, - tmux_window_name: windowName, - tmux_pane_id: paneId, - team_name: teamName, - is_splitpane: true, - plan_mode_required, - }, - } -} - -/** - * Handle spawn operation using separate windows (legacy behavior). - * Creates each teammate in its own tmux window. - */ -async function handleSpawnSeparateWindow( - input: SpawnInput, - context: ToolUseContext, -): Promise<{ data: SpawnOutput }> { - const { setAppState, getAppState } = context - const { name, prompt, agent_type, cwd, plan_mode_required } = input - - // Resolve model: 'inherit' → leader's model; undefined → default Opus - const model = resolveTeammateModel(input.model, getAppState().mainLoopModel) - - if (!name || !prompt) { - throw new Error('name and prompt are required for spawn operation') - } - - // Get team name from input or inherit from leader's team context - const appState = getAppState() - const teamName = input.team_name || appState.teamContext?.teamName - - if (!teamName) { - throw new Error( - 'team_name is required for spawn operation. Either provide team_name in input or call spawnTeam first to establish team context.', + `Team "${teamName}" does not exist. Call TeamCreate first to create the team before spawning teammates.`, ) } - // Generate unique name if duplicate exists in team - const uniqueName = await generateUniqueTeammateName(name, teamName) - - // Sanitize the name to prevent @ in agent IDs (would break agentName@teamName format) + const uniqueName = await generateUniqueTeammateName(input.name, teamName) const sanitizedName = sanitizeAgentName(uniqueName) - - // Generate deterministic agent ID from name and team const teammateId = formatAgentId(sanitizedName, teamName) - const windowName = `teammate-${sanitizeName(sanitizedName)}` - const workingDir = cwd || getCwd() - - // Ensure the swarm session exists - await ensureSession(SWARM_SESSION_NAME) - - // Assign a unique color to this teammate + const model = resolveTeammateModel(input.model, appState.mainLoopModel) const teammateColor = assignTeammateColor(teammateId) + const workingDir = input.cwd || getCwd() - // Create a new window for this teammate - const createWindowResult = await execFileNoThrow(TMUX_COMMAND, [ - 'new-window', - '-t', - SWARM_SESSION_NAME, - '-n', - windowName, - '-P', - '-F', - '#{pane_id}', - ]) - - if (createWindowResult.code !== 0) { - throw new Error( - `Failed to create tmux window: ${createWindowResult.stderr}`, - ) - } - - const paneId = createWindowResult.stdout.trim() - - // Build the command to spawn Claude Code with teammate identity - // Note: We spawn without a prompt - initial instructions are sent via mailbox - const binaryPath = getTeammateCommand() - - // Build teammate identity CLI args (replaces CLAUDE_CODE_* env vars) - const teammateArgs = [ - `--agent-id ${quote([teammateId])}`, - `--agent-name ${quote([sanitizedName])}`, - `--team-name ${quote([teamName])}`, - `--agent-color ${quote([teammateColor])}`, - `--parent-session-id ${quote([getSessionId()])}`, - plan_mode_required ? '--plan-mode-required' : '', - agent_type ? `--agent-type ${quote([agent_type])}` : '', - ] - .filter(Boolean) - .join(' ') - - // Build CLI flags to propagate to teammate - // Pass plan_mode_required to prevent inheriting bypass permissions - let inheritedFlags = buildInheritedCliFlags({ - planModeRequired: plan_mode_required, - permissionMode: appState.toolPermissionContext.mode, - }) - - // If teammate has a custom model, add --model flag (or replace inherited one) - if (model) { - // Remove any inherited --model flag first - inheritedFlags = inheritedFlags - .split(' ') - .filter((flag, i, arr) => flag !== '--model' && arr[i - 1] !== '--model') - .join(' ') - // Add the teammate's model - inheritedFlags = inheritedFlags - ? `${inheritedFlags} --model ${quote([model])}` - : `--model ${quote([model])}` - } - - const flagsStr = inheritedFlags ? ` ${inheritedFlags}` : '' - // Propagate env vars that teammates need but may not inherit from tmux split-window shells. - // Includes CLAUDECODE, CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS, and API provider vars. - const envStr = buildInheritedEnvVars() - const spawnCommand = `cd ${quote([workingDir])} && env ${envStr} ${quote([binaryPath])} ${teammateArgs}${flagsStr}` - - // Send the command to the new window - const sendKeysResult = await execFileNoThrow(TMUX_COMMAND, [ - 'send-keys', - '-t', - `${SWARM_SESSION_NAME}:${windowName}`, - spawnCommand, - 'Enter', - ]) - - if (sendKeysResult.code !== 0) { - throw new Error( - `Failed to send command to tmux window: ${sendKeysResult.stderr}`, - ) - } - - // Track the teammate in AppState's teamContext - setAppState(prev => ({ - ...prev, - teamContext: { - ...prev.teamContext, - teamName: teamName ?? prev.teamContext?.teamName ?? 'default', - teamFilePath: prev.teamContext?.teamFilePath ?? '', - leadAgentId: prev.teamContext?.leadAgentId ?? '', - teammates: { - ...(prev.teamContext?.teammates || {}), - [teammateId]: { - name: sanitizedName, - agentType: agent_type, - color: teammateColor, - tmuxSessionName: SWARM_SESSION_NAME, - tmuxPaneId: paneId, - cwd: workingDir, - spawnedAt: Date.now(), - }, - }, - }, - })) - - // Register background task so tmux teammates appear in the tasks pill/dialog - // Separate window spawns are always outside tmux (external swarm session) - registerOutOfProcessTeammateTask(setAppState, { - teammateId, - sanitizedName, - teamName, - teammateColor, - prompt, - plan_mode_required, - paneId, - insideTmux: false, - backendType: 'tmux', - toolUseId: context.toolUseId, - }) - - // Register agent in the team file - const teamFile = await readTeamFileAsync(teamName) - if (!teamFile) { - throw new Error( - `Team "${teamName}" does not exist. Call spawnTeam first to create the team.`, - ) - } - teamFile.members.push({ - agentId: teammateId, - name: sanitizedName, - agentType: agent_type, - model, - prompt, - color: teammateColor, - planModeRequired: plan_mode_required, - joinedAt: Date.now(), - tmuxPaneId: paneId, - cwd: workingDir, - subscriptions: [], - backendType: 'tmux', // This handler always uses tmux directly - }) - await writeTeamFileAsync(teamName, teamFile) - - // Send initial instructions to teammate via mailbox - // The teammate's inbox poller will pick this up and submit it as their first turn - await writeToMailbox( - sanitizedName, - { - from: TEAM_LEAD_NAME, - text: prompt, - timestamp: new Date().toISOString(), - }, - teamName, - ) - - return { - data: { - teammate_id: teammateId, - agent_id: teammateId, - agent_type, - model, - name: sanitizedName, - color: teammateColor, - tmux_session_name: SWARM_SESSION_NAME, - tmux_window_name: windowName, - tmux_pane_id: paneId, - team_name: teamName, - is_splitpane: false, - plan_mode_required, - }, - } -} - -/** - * Register a background task entry for an out-of-process (tmux/iTerm2) teammate. - * This makes tmux teammates visible in the background tasks pill and dialog, - * matching how in-process teammates are tracked. - */ -function registerOutOfProcessTeammateTask( - setAppState: (updater: (prev: AppState) => AppState) => void, - { - teammateId, - sanitizedName, - teamName, - teammateColor, - prompt, - plan_mode_required, - paneId, - insideTmux, - backendType, - toolUseId, - }: { - teammateId: string - sanitizedName: string - teamName: string - teammateColor: string - prompt: string - plan_mode_required?: boolean - paneId: string - insideTmux: boolean - backendType: BackendType - toolUseId?: string - }, -): void { - const taskId = generateTaskId('in_process_teammate') - const description = `${sanitizedName}: ${prompt.substring(0, 50)}${prompt.length > 50 ? '...' : ''}` - - const abortController = new AbortController() - - const taskState: InProcessTeammateTaskState = { - ...createTaskStateBase( - taskId, - 'in_process_teammate', - description, - toolUseId, - ), - type: 'in_process_teammate', - status: 'running', - identity: { - agentId: teammateId, - agentName: sanitizedName, - teamName, - color: teammateColor, - planModeRequired: plan_mode_required ?? false, - parentSessionId: getSessionId(), - }, - prompt, - abortController, - awaitingPlanApproval: false, - permissionMode: plan_mode_required ? 'plan' : 'default', - isIdle: false, - shutdownRequested: false, - lastReportedToolCount: 0, - lastReportedTokenCount: 0, - pendingUserMessages: [], - } - - registerTask(taskState, setAppState) - - // When abort is signaled, kill the pane using the backend that created it - // (tmux kill-pane for tmux panes, it2 session close for iTerm2 native panes). - // SDK task_notification bookend is emitted by killInProcessTeammate (the - // sole abort trigger for this controller). - abortController.signal.addEventListener( - 'abort', - () => { - if (isPaneBackend(backendType)) { - void getBackendByType(backendType).killPane(paneId, !insideTmux) - } - }, - { once: true }, - ) -} - -/** - * Handle spawn operation for in-process teammates. - * In-process teammates run in the same Node.js process using AsyncLocalStorage. - */ -async function handleSpawnInProcess( - input: SpawnInput, - context: ToolUseContext, -): Promise<{ data: SpawnOutput }> { - const { setAppState, getAppState } = context - const { name, prompt, agent_type, plan_mode_required } = input - - // Resolve model: 'inherit' → leader's model; undefined → default Opus - const model = resolveTeammateModel(input.model, getAppState().mainLoopModel) - - if (!name || !prompt) { - throw new Error('name and prompt are required for spawn operation') - } - - // Get team name from input or inherit from leader's team context - const appState = getAppState() - const teamName = input.team_name || appState.teamContext?.teamName - - if (!teamName) { - throw new Error( - 'team_name is required for spawn operation. Either provide team_name in input or call spawnTeam first to establish team context.', - ) - } - - // Generate unique name if duplicate exists in team - const uniqueName = await generateUniqueTeammateName(name, teamName) - - // Sanitize the name to prevent @ in agent IDs - const sanitizedName = sanitizeAgentName(uniqueName) - - // Generate deterministic agent ID from name and team - const teammateId = formatAgentId(sanitizedName, teamName) - - // Assign a unique color to this teammate - const teammateColor = assignTeammateColor(teammateId) - - // Look up custom agent definition if agent_type is provided let agentDefinition: CustomAgentDefinition | undefined - if (agent_type) { - const allAgents = context.options.agentDefinitions.activeAgents - const foundAgent = allAgents.find(a => a.agentType === agent_type) + if (input.agent_type) { + const foundAgent = context.options.agentDefinitions.activeAgents.find( + a => a.agentType === input.agent_type, + ) if (foundAgent && isCustomAgent(foundAgent)) { agentDefinition = foundAgent } logForDebugging( - `[handleSpawnInProcess] agent_type=${agent_type}, found=${!!agentDefinition}`, + `[spawnTeammate] agent_type=${input.agent_type}, found=${!!agentDefinition}`, ) } - // Spawn in-process teammate - const config: InProcessSpawnConfig = { - name: sanitizedName, + return { teamName, - prompt, - color: teammateColor, - planModeRequired: plan_mode_required ?? false, + teamFile, + sanitizedName, + teammateId, model, + teammateColor, + workingDir, + agentDefinition, + } +} + +function getBackendDisplay(result: TeammateSpawnResult): { + sessionName: string + windowName: string + paneId: string + isSplitPane: boolean +} { + if (result.backendType === 'in-process') { + return { + sessionName: 'in-process', + windowName: 'in-process', + paneId: 'in-process', + isSplitPane: false, + } } - const result = await spawnInProcessTeammate(config, context) - - if (!result.success) { - throw new Error(result.error ?? 'Failed to spawn in-process teammate') + return { + sessionName: result.insideTmux ? 'current' : SWARM_SESSION_NAME, + windowName: result.windowName ?? (result.insideTmux ? 'current' : 'swarm-view'), + paneId: result.paneId ?? '', + isSplitPane: result.isSplitPane ?? true, } +} - // Debug: log what spawn returned - logForDebugging( - `[handleSpawnInProcess] spawn result: taskId=${result.taskId}, hasContext=${!!result.teammateContext}, hasAbort=${!!result.abortController}`, - ) +function updateTeamContext( + context: ToolUseContext, + spawn: ResolvedSpawn, + result: TeammateSpawnResult, +): void { + const display = getBackendDisplay(result) - // Start the agent execution loop (fire-and-forget) - if (result.taskId && result.teammateContext && result.abortController) { - startInProcessTeammate({ - identity: { - agentId: teammateId, - agentName: sanitizedName, - teamName, - color: teammateColor, - planModeRequired: plan_mode_required ?? false, - parentSessionId: result.teammateContext.parentSessionId, - }, - taskId: result.taskId, - prompt, - description: input.description, - model, - agentDefinition, - teammateContext: result.teammateContext, - // Strip messages: the teammate never reads toolUseContext.messages - // (it builds its own history via allMessages in inProcessRunner). - // Passing the parent's full conversation here would pin it for the - // teammate's lifetime, surviving /clear and auto-compact. - toolUseContext: { ...context, messages: [] }, - abortController: result.abortController, - invokingRequestId: input.invokingRequestId, - }) - logForDebugging( - `[handleSpawnInProcess] Started agent execution for ${teammateId}`, - ) - } - - // Track the teammate in AppState's teamContext - // Auto-register leader if spawning without prior spawnTeam call - setAppState(prev => { - const needsLeaderSetup = !prev.teamContext?.leadAgentId - const leadAgentId = needsLeaderSetup - ? formatAgentId(TEAM_LEAD_NAME, teamName) - : prev.teamContext!.leadAgentId - - // Build teammates map, including leader if needed for inbox polling + context.setAppState(prev => { + const leadAgentId = prev.teamContext?.leadAgentId || spawn.teamFile.leadAgentId const existingTeammates = prev.teamContext?.teammates || {} - const leadEntry = needsLeaderSetup - ? { - [leadAgentId]: { - name: TEAM_LEAD_NAME, - agentType: TEAM_LEAD_NAME, - color: assignTeammateColor(leadAgentId), - tmuxSessionName: 'in-process', - tmuxPaneId: 'leader', - cwd: getCwd(), - spawnedAt: Date.now(), - }, - } - : {} + const needsLeaderEntry = !(leadAgentId in existingTeammates) + const leadMember = spawn.teamFile.members.find(m => m.name === TEAM_LEAD_NAME) return { ...prev, teamContext: { ...prev.teamContext, - teamName: teamName ?? prev.teamContext?.teamName ?? 'default', - teamFilePath: prev.teamContext?.teamFilePath ?? '', + teamName: spawn.teamName, + teamFilePath: prev.teamContext?.teamFilePath || getTeamFilePath(spawn.teamName), leadAgentId, teammates: { ...existingTeammates, - ...leadEntry, - [teammateId]: { - name: sanitizedName, - agentType: agent_type, - color: teammateColor, - tmuxSessionName: 'in-process', - tmuxPaneId: 'in-process', - cwd: getCwd(), + ...(needsLeaderEntry + ? { + [leadAgentId]: { + name: TEAM_LEAD_NAME, + agentType: leadMember?.agentType ?? TEAM_LEAD_NAME, + color: assignTeammateColor(leadAgentId), + tmuxSessionName: leadMember?.backendType === 'in-process' ? 'in-process' : '', + tmuxPaneId: leadMember?.tmuxPaneId ?? '', + cwd: leadMember?.cwd ?? getCwd(), + spawnedAt: leadMember?.joinedAt ?? Date.now(), + }, + } + : {}), + [spawn.teammateId]: { + name: spawn.sanitizedName, + agentType: spawn.agentDefinition?.agentType, + color: spawn.teammateColor, + tmuxSessionName: display.sessionName, + tmuxPaneId: display.paneId, + cwd: spawn.workingDir, spawnedAt: Date.now(), }, }, }, } }) - - // Register agent in the team file - const teamFile = await readTeamFileAsync(teamName) - if (!teamFile) { - throw new Error( - `Team "${teamName}" does not exist. Call spawnTeam first to create the team.`, - ) - } - teamFile.members.push({ - agentId: teammateId, - name: sanitizedName, - agentType: agent_type, - model, - prompt, - color: teammateColor, - planModeRequired: plan_mode_required, - joinedAt: Date.now(), - tmuxPaneId: 'in-process', - cwd: getCwd(), - subscriptions: [], - backendType: 'in-process', - }) - await writeTeamFileAsync(teamName, teamFile) - - // Note: Do NOT send the prompt via mailbox for in-process teammates. - // In-process teammates receive the prompt directly via startInProcessTeammate(). - // The mailbox is only needed for tmux-based teammates which poll for their initial message. - // Sending via both paths would cause duplicate welcome messages. - - return { - data: { - teammate_id: teammateId, - agent_id: teammateId, - agent_type, - model, - name: sanitizedName, - color: teammateColor, - tmux_session_name: 'in-process', - tmux_window_name: 'in-process', - tmux_pane_id: 'in-process', - team_name: teamName, - is_splitpane: false, - plan_mode_required, - }, - } } -/** - * Handle spawn operation - creates a new Claude Code instance. - * Uses in-process mode when enabled, otherwise uses tmux/iTerm2 split-pane view. - * Falls back to in-process if pane backend detection fails (e.g., iTerm2 without - * it2 CLI or tmux installed). - */ +async function appendTeamMember( + input: SpawnInput, + spawn: ResolvedSpawn, + result: TeammateSpawnResult, +): Promise { + const teamFile = await readTeamFileAsync(spawn.teamName) + if (!teamFile) { + throw new Error(`Team "${spawn.teamName}" disappeared during teammate spawn.`) + } + + const display = getBackendDisplay(result) + teamFile.members.push({ + agentId: spawn.teammateId, + name: spawn.sanitizedName, + agentType: input.agent_type, + model: spawn.model, + prompt: input.prompt, + color: spawn.teammateColor, + planModeRequired: input.plan_mode_required, + joinedAt: Date.now(), + tmuxPaneId: display.paneId, + cwd: spawn.workingDir, + subscriptions: [], + backendType: result.backendType, + }) + await writeTeamFileAsync(spawn.teamName, teamFile) +} + async function handleSpawn( input: SpawnInput, context: ToolUseContext, ): Promise<{ data: SpawnOutput }> { - // Check if in-process mode is enabled via feature flag - if (isInProcessEnabled()) { - return handleSpawnInProcess(input, context) + const spawn = await resolveSpawn(input, context) + const executor = await getTeammateExecutor(true, { + onNeedsIt2Setup: context.setToolJSX + ? tmuxAvailable => + new Promise(resolve => { + context.setToolJSX!({ + jsx: React.createElement(It2SetupPrompt, { + onDone: result => { + context.setToolJSX!(null) + resolve(result) + }, + tmuxAvailable, + }), + shouldHidePromptInput: true, + }) + }) + : undefined, + }) + executor.setContext?.(context) + + const result = await executor.spawn({ + name: spawn.sanitizedName, + teamName: spawn.teamName, + color: spawn.teammateColor, + prompt: input.prompt, + cwd: spawn.workingDir, + model: spawn.model, + agentType: input.agent_type, + agentDefinition: spawn.agentDefinition, + description: input.description, + planModeRequired: input.plan_mode_required ?? false, + parentSessionId: getSessionId(), + invokingRequestId: input.invokingRequestId, + useSplitPane: input.use_splitpane !== false, + }) + + if (!result.success) { + throw new Error(result.error ?? 'Failed to spawn teammate') } - // Pre-flight: ensure a pane backend is available before attempting pane-based spawn. - // This handles auto-mode cases like iTerm2 without it2 or tmux installed, where - // isInProcessEnabled() returns false but detectAndGetBackend() has no viable backend. - // Narrowly scoped so user cancellation and other spawn errors propagate normally. - try { - await detectAndGetBackend() - } catch (error) { - // Only fall back silently in auto mode. If the user explicitly configured - // teammateMode: 'tmux', let the error propagate so they see the actionable - // install instructions from getTmuxInstallInstructions(). - if (getTeammateModeFromSnapshot() !== 'auto') { - throw error - } - logForDebugging( - `[handleSpawn] No pane backend available, falling back to in-process: ${errorMessage(error)}`, - ) - // Record the fallback so isInProcessEnabled() reflects the actual mode - // (fixes banner and other UI that would otherwise show tmux attach commands). - markInProcessFallback() - return handleSpawnInProcess(input, context) - } + updateTeamContext(context, spawn, result) + await appendTeamMember(input, spawn, result) - // Backend is available (and now cached) - proceed with pane spawning. - // Any errors here (user cancellation, validation, etc.) propagate to the caller. - const useSplitPane = input.use_splitpane !== false - if (useSplitPane) { - return handleSpawnSplitPane(input, context) + const display = getBackendDisplay(result) + return { + data: { + teammate_id: spawn.teammateId, + agent_id: spawn.teammateId, + agent_type: input.agent_type, + model: spawn.model, + name: spawn.sanitizedName, + color: spawn.teammateColor, + tmux_session_name: display.sessionName, + tmux_window_name: display.windowName, + tmux_pane_id: display.paneId, + team_name: spawn.teamName, + is_splitpane: display.isSplitPane, + plan_mode_required: input.plan_mode_required, + }, } - return handleSpawnSeparateWindow(input, context) } // ============================================================================