feat: 添加 builtin-tools 增强与测试覆盖

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
unraid
2026-04-22 22:38:10 +08:00
parent 7881cc617c
commit d208855f07
12 changed files with 1449 additions and 985 deletions

View File

@@ -2,6 +2,12 @@ import { z } from 'zod/v4'
import type { ToolResultBlockParam } from 'src/Tool.js'
import { buildTool } from 'src/Tool.js'
import { lazySchema } from 'src/utils/lazySchema.js'
import { tokenCountWithEstimation } from 'src/utils/tokens.js'
import {
getStats,
isContextCollapseEnabled,
} from 'src/services/contextCollapse/index.js'
import { isSessionMemoryInitialized } from 'src/services/SessionMemory/sessionMemoryUtils.js'
const CTX_INSPECT_TOOL_NAME = 'CtxInspect'
@@ -19,6 +25,10 @@ type CtxInput = z.infer<InputSchema>
type CtxOutput = {
total_tokens: number
message_count: number
context_window_model: string
prompt_caching_enabled: boolean
session_memory_enabled: boolean
context_collapse_enabled: boolean
summary: string
}
@@ -67,13 +77,45 @@ Use this to understand your context budget before deciding whether to snip old m
}
},
async call() {
// Context inspection is wired into the context collapse system.
async call(input: CtxInput, context) {
const messages = context.messages ?? []
const model = context.options?.mainLoopModel ?? 'unknown'
const totalTokens = tokenCountWithEstimation(messages)
const collapseEnabled = isContextCollapseEnabled()
const collapseStats = getStats()
const focused = input.query?.trim()
const sessionMemoryEnabled = isSessionMemoryInitialized()
// Prompt caching is an API-level feature controlled by the provider, not
// a user-facing toggle. Report as enabled only for providers known to
// support Anthropic-style prompt caching (first-party, Bedrock, Vertex).
const promptCachingEnabled = !model.startsWith('openai/') &&
!model.startsWith('grok/') &&
!model.startsWith('gemini/')
const summaryParts = [
focused ? `Focus: ${focused}` : 'Overall context summary',
`Model context: ${model}`,
`Prompt caching: ${promptCachingEnabled ? 'enabled' : 'disabled'}`,
`Session memory: ${sessionMemoryEnabled ? 'enabled' : 'disabled'}`,
`Context collapse: ${collapseEnabled ? 'enabled' : 'disabled'}`,
]
if (collapseEnabled) {
summaryParts.push(
`Collapse spans: ${collapseStats.collapsedSpans} committed, ${collapseStats.stagedSpans} staged, ${collapseStats.collapsedMessages} messages summarized`,
)
}
return {
data: {
total_tokens: 0,
message_count: 0,
summary: 'Context inspection requires the CONTEXT_COLLAPSE runtime.',
total_tokens: totalTokens,
message_count: messages.length,
context_window_model: model,
prompt_caching_enabled: promptCachingEnabled,
session_memory_enabled: sessionMemoryEnabled,
context_collapse_enabled: collapseEnabled,
summary: summaryParts.join('\n'),
},
}
},

View File

@@ -0,0 +1,216 @@
import { afterEach, beforeEach, describe, expect, mock, test } from 'bun:test'
mock.module('src/utils/log.ts', () => ({
logError: () => {},
logToFile: () => {},
getLogDisplayTitle: () => '',
logEvent: () => {},
logMCPError: () => {},
logMCPDebug: () => {},
dateToFilename: (d: Date) => d.toISOString().replace(/[:.]/g, '-'),
getLogFilePath: () => '/tmp/mock-log',
attachErrorLogSink: () => {},
getInMemoryErrors: () => [],
loadErrorLogs: async () => [],
getErrorLogByIndex: async () => null,
captureAPIRequest: () => {},
_resetErrorLogForTesting: () => {},
}))
mock.module('src/services/tokenEstimation.ts', () => ({
roughTokenCountEstimation: (text: string) => Math.ceil(text.length / 4),
roughTokenCountEstimationForMessages: (msgs: unknown[]) => msgs.length * 64,
roughTokenCountEstimationForMessage: () => 64,
roughTokenCountEstimationForFileType: () => 64,
bytesPerTokenForFileType: () => 4,
countTokensWithAPI: async () => 0,
countMessagesTokensWithAPI: async () => 0,
countTokensViaHaikuFallback: async () => 0,
}))
let sessionMemoryInitialized = false
mock.module('src/services/SessionMemory/sessionMemoryUtils.ts', () => ({
isSessionMemoryInitialized: () => sessionMemoryInitialized,
waitForSessionMemoryExtraction: async () => {},
getLastSummarizedMessageId: () => undefined,
getSessionMemoryContent: async () => null,
setLastSummarizedMessageId: () => {},
markExtractionStarted: () => {},
markExtractionCompleted: () => {},
setSessionMemoryConfig: () => {},
getSessionMemoryConfig: () => ({}),
recordExtractionTokenCount: () => {},
markSessionMemoryInitialized: () => {},
hasMetInitializationThreshold: () => false,
hasMetUpdateThreshold: () => false,
getToolCallsBetweenUpdates: () => 0,
resetSessionMemoryState: () => {},
DEFAULT_SESSION_MEMORY_CONFIG: {},
}))
mock.module('src/utils/slowOperations.ts', () => ({
jsonStringify: JSON.stringify,
jsonParse: JSON.parse,
slowLogging: { enabled: false },
clone: (value: unknown) => structuredClone(value),
cloneDeep: (value: unknown) => structuredClone(value),
callerFrame: () => '',
SLOW_OPERATION_THRESHOLD_MS: 100,
writeFileSync_DEPRECATED: () => {},
}))
const { initContextCollapse, resetContextCollapse } = await import(
'src/services/contextCollapse/index.js'
)
const { tokenCountWithEstimation } = await import('src/utils/tokens.js')
const { CtxInspectTool } = await import('../CtxInspectTool.js')
function makeUserMessage(text: string) {
return {
type: 'user' as const,
uuid: `user-${text}`,
message: { role: 'user' as const, content: text },
}
}
function makeAssistantMessage(text: string) {
return {
type: 'assistant' as const,
uuid: `assistant-${text}`,
message: {
role: 'assistant' as const,
content: [{ type: 'text' as const, text }],
},
}
}
function makeContext(messages: unknown[], mainLoopModel = 'claude-sonnet-4-6') {
return {
messages,
options: {
mainLoopModel,
},
getAppState: () => ({}),
} as any
}
const allowTool = async (input: Record<string, unknown>) => ({
behavior: 'allow' as const,
updatedInput: input,
})
const parentMessage = makeAssistantMessage('Parent tool call')
beforeEach(() => {
resetContextCollapse()
sessionMemoryInitialized = false
})
afterEach(() => {
resetContextCollapse()
sessionMemoryInitialized = false
})
describe('CtxInspectTool', () => {
test('tool exports and metadata remain stable', async () => {
expect(CtxInspectTool).toBeDefined()
expect(CtxInspectTool.name).toBe('CtxInspect')
expect(typeof CtxInspectTool.call).toBe('function')
expect(await CtxInspectTool.description()).toContain('context')
expect(CtxInspectTool.userFacingName()).toBe('CtxInspect')
expect(CtxInspectTool.isReadOnly()).toBe(true)
expect(CtxInspectTool.isConcurrencySafe()).toBe(true)
})
test('formats tool results for transcript rendering', () => {
const block = CtxInspectTool.mapToolResultToToolResultBlockParam(
{
total_tokens: 192,
message_count: 3,
context_window_model: 'claude-sonnet-4-6',
prompt_caching_enabled: true,
session_memory_enabled: true,
context_collapse_enabled: false,
summary: 'Context collapse: disabled',
},
'tool-use-id',
)
expect(block.tool_use_id).toBe('tool-use-id')
expect(block.content).toContain('192 tokens')
expect(block.content).toContain('3 messages')
expect(block.content).toContain('Context collapse: disabled')
})
test('returns live context counts and mechanism state', async () => {
const messages = [
makeUserMessage('Inspect the current context budget.'),
makeAssistantMessage('Looking at the current conversation state.'),
]
const context = makeContext(messages, 'claude-sonnet-4-6')
const result = await (CtxInspectTool as any).call(
{},
context,
allowTool,
parentMessage,
)
expect(Object.keys(result.data).sort()).toEqual([
'context_collapse_enabled',
'context_window_model',
'message_count',
'prompt_caching_enabled',
'session_memory_enabled',
'summary',
'total_tokens',
])
expect(result.data.message_count).toBe(messages.length)
expect(result.data.total_tokens).toBe(tokenCountWithEstimation(messages as any))
expect(result.data.context_window_model).toBe('claude-sonnet-4-6')
expect(result.data.prompt_caching_enabled).toBe(true)
expect(result.data.session_memory_enabled).toBe(false)
expect(result.data.context_collapse_enabled).toBe(false)
expect(result.data.summary).toContain('Overall context summary')
expect(result.data.summary).toContain('Session memory: disabled')
expect(result.data.summary).toContain('Context collapse: disabled')
})
test('query input focuses summary and collapse runtime changes the reported state', async () => {
const messages = [
makeUserMessage('Show me tool usage pressure in this thread.'),
makeAssistantMessage('Summarizing tool-heavy context now.'),
]
const context = makeContext(messages, 'claude-sonnet-4-6')
const disabledResult = await (CtxInspectTool as any).call(
{ query: 'tool usage' },
context,
allowTool,
parentMessage,
)
initContextCollapse()
const enabledResult = await (CtxInspectTool as any).call(
{ query: 'tool usage' },
context,
allowTool,
parentMessage,
)
expect(disabledResult.data.message_count).toBe(messages.length)
expect(enabledResult.data.message_count).toBe(messages.length)
expect(disabledResult.data.total_tokens).toBe(
tokenCountWithEstimation(messages as any),
)
expect(enabledResult.data.total_tokens).toBe(
tokenCountWithEstimation(messages as any),
)
expect(disabledResult.data.summary).toContain('Focus: tool usage')
expect(disabledResult.data.context_collapse_enabled).toBe(false)
expect(enabledResult.data.context_collapse_enabled).toBe(true)
expect(enabledResult.data.summary).toContain('Context collapse: enabled')
expect(enabledResult.data.summary).toContain('Collapse spans:')
})
})

View File

@@ -11,6 +11,7 @@ import {
getClaudeAIOAuthTokens,
} from 'src/utils/auth.js'
import { lazySchema } from 'src/utils/lazySchema.js'
import { appendRemoteTriggerAuditRecord } from 'src/utils/remoteTriggerAudit.js'
import { jsonStringify } from 'src/utils/slowOperations.js'
import { DESCRIPTION, PROMPT, REMOTE_TRIGGER_TOOL_NAME } from './prompt.js'
import { renderToolResultMessage, renderToolUseMessage } from './UI.js'
@@ -36,6 +37,7 @@ const outputSchema = lazySchema(() =>
z.object({
status: z.number(),
json: z.string(),
audit_id: z.string().optional(),
}),
)
type OutputSchema = ReturnType<typeof outputSchema>
@@ -76,77 +78,96 @@ export const RemoteTriggerTool = buildTool({
return PROMPT
},
async call(input: Input, context: ToolUseContext) {
await checkAndRefreshOAuthTokenIfNeeded()
const accessToken = getClaudeAIOAuthTokens()?.accessToken
if (!accessToken) {
throw new Error(
'Not authenticated with a claude.ai account. Run /login and try again.',
)
}
const orgUUID = await getOrganizationUUID()
if (!orgUUID) {
throw new Error('Unable to resolve organization UUID.')
const auditBase = {
action: input.action,
...(input.trigger_id ? { triggerId: input.trigger_id } : {}),
}
try {
await checkAndRefreshOAuthTokenIfNeeded()
const accessToken = getClaudeAIOAuthTokens()?.accessToken
if (!accessToken) {
throw new Error(
'Not authenticated with a claude.ai account. Run /login and try again.',
)
}
const orgUUID = await getOrganizationUUID()
if (!orgUUID) {
throw new Error('Unable to resolve organization UUID.')
}
const base = `${getOauthConfig().BASE_API_URL}/v1/code/triggers`
const headers = {
Authorization: `Bearer ${accessToken}`,
'Content-Type': 'application/json',
'anthropic-version': '2023-06-01',
'anthropic-beta': TRIGGERS_BETA,
'x-organization-uuid': orgUUID,
}
const base = `${getOauthConfig().BASE_API_URL}/v1/code/triggers`
const headers = {
Authorization: `Bearer ${accessToken}`,
'Content-Type': 'application/json',
'anthropic-version': '2023-06-01',
'anthropic-beta': TRIGGERS_BETA,
'x-organization-uuid': orgUUID,
}
const { action, trigger_id, body } = input
let method: 'GET' | 'POST'
let url: string
let data: unknown
switch (action) {
case 'list':
method = 'GET'
url = base
break
case 'get':
if (!trigger_id) throw new Error('get requires trigger_id')
method = 'GET'
url = `${base}/${trigger_id}`
break
case 'create':
if (!body) throw new Error('create requires body')
method = 'POST'
url = base
data = body
break
case 'update':
if (!trigger_id) throw new Error('update requires trigger_id')
if (!body) throw new Error('update requires body')
method = 'POST'
url = `${base}/${trigger_id}`
data = body
break
case 'run':
if (!trigger_id) throw new Error('run requires trigger_id')
method = 'POST'
url = `${base}/${trigger_id}/run`
data = {}
break
}
const { action, trigger_id, body } = input
let method: 'GET' | 'POST'
let url: string
let data: unknown
switch (action) {
case 'list':
method = 'GET'
url = base
break
case 'get':
if (!trigger_id) throw new Error('get requires trigger_id')
method = 'GET'
url = `${base}/${trigger_id}`
break
case 'create':
if (!body) throw new Error('create requires body')
method = 'POST'
url = base
data = body
break
case 'update':
if (!trigger_id) throw new Error('update requires trigger_id')
if (!body) throw new Error('update requires body')
method = 'POST'
url = `${base}/${trigger_id}`
data = body
break
case 'run':
if (!trigger_id) throw new Error('run requires trigger_id')
method = 'POST'
url = `${base}/${trigger_id}/run`
data = {}
break
}
const res = await axios.request({
method,
url,
headers,
data,
timeout: 20_000,
signal: context.abortController.signal,
validateStatus: () => true,
})
return {
data: {
const res = await axios.request({
method,
url,
headers,
data,
timeout: 20_000,
signal: context.abortController.signal,
validateStatus: () => true,
})
const audit = await appendRemoteTriggerAuditRecord({
...auditBase,
ok: res.status >= 200 && res.status < 300,
status: res.status,
json: jsonStringify(res.data),
},
})
return {
data: {
status: res.status,
json: jsonStringify(res.data),
audit_id: audit.auditId,
},
}
} catch (error) {
await appendRemoteTriggerAuditRecord({
...auditBase,
ok: false,
error: error instanceof Error ? error.message : String(error),
})
throw error
}
},
mapToolResultToToolResultBlockParam(output, toolUseID) {

View File

@@ -0,0 +1,91 @@
import { afterEach, beforeEach, describe, expect, mock, test } from 'bun:test'
import { mkdir, readFile, rm } from 'fs/promises'
import { tmpdir } from 'os'
import { join } from 'path'
import {
resetStateForTests,
setOriginalCwd,
setProjectRoot,
} from 'src/bootstrap/state.js'
let requestStatus = 200
mock.module('axios', () => ({
default: {
request: async () => ({
status: requestStatus,
data: { ok: requestStatus >= 200 && requestStatus < 300 },
}),
},
}))
mock.module('src/utils/auth.js', () => ({
checkAndRefreshOAuthTokenIfNeeded: async () => {},
getClaudeAIOAuthTokens: () => ({ accessToken: 'token' }),
}))
mock.module('src/services/oauth/client.js', () => ({
getOrganizationUUID: async () => 'org',
}))
mock.module('src/constants/oauth.js', () => ({
getOauthConfig: () => ({ BASE_API_URL: 'https://example.test' }),
}))
let cwd = ''
let previousCwd = ''
beforeEach(async () => {
requestStatus = 200
previousCwd = process.cwd()
cwd = join(tmpdir(), `remote-trigger-tool-${Date.now()}-${Math.random().toString(16).slice(2)}`)
await mkdir(cwd, { recursive: true })
process.chdir(cwd)
resetStateForTests()
setOriginalCwd(cwd)
setProjectRoot(cwd)
})
afterEach(async () => {
resetStateForTests()
process.chdir(previousCwd)
await rm(cwd, { recursive: true, force: true })
})
describe('RemoteTriggerTool audit', () => {
test('writes an audit record for successful remote calls', async () => {
const { RemoteTriggerTool } = await import('../RemoteTriggerTool')
const result = await RemoteTriggerTool.call(
{ action: 'run', trigger_id: 'trigger-1' },
{ abortController: new AbortController() } as any,
)
expect(result.data.audit_id).toBeString()
const raw = await readFile(
join(cwd, '.claude', 'remote-trigger-audit.jsonl'),
'utf-8',
)
expect(raw).toContain('"action":"run"')
expect(raw).toContain('"triggerId":"trigger-1"')
expect(raw).toContain('"ok":true')
})
test('writes an audit record before rethrowing validation failures', async () => {
const { RemoteTriggerTool } = await import('../RemoteTriggerTool')
await expect(
RemoteTriggerTool.call(
{ action: 'run' },
{ abortController: new AbortController() } as any,
),
).rejects.toThrow('run requires trigger_id')
const raw = await readFile(
join(cwd, '.claude', 'remote-trigger-audit.jsonl'),
'utf-8',
)
expect(raw).toContain('"action":"run"')
expect(raw).toContain('"ok":false')
expect(raw).toContain('run requires trigger_id')
})
})

View File

@@ -14,11 +14,26 @@ import {
} from 'src/utils/swarm/teamHelpers.js'
import { clearTeammateColors } from 'src/utils/swarm/teammateLayoutManager.js'
import { clearLeaderTeamName } from 'src/utils/tasks.js'
import { ensureBackendsRegistered, getBackendByType, getInProcessBackend } from 'src/utils/swarm/backends/registry.js'
import { createPaneBackendExecutor } from 'src/utils/swarm/backends/PaneBackendExecutor.js'
import { isPaneBackend } from 'src/utils/swarm/backends/types.js'
import { sleep } from 'src/utils/sleep.js'
import { TEAM_DELETE_TOOL_NAME } from './constants.js'
import { getPrompt } from './prompt.js'
import { renderToolResultMessage, renderToolUseMessage } from './UI.js'
const inputSchema = lazySchema(() => z.strictObject({}))
const inputSchema = lazySchema(() =>
z.strictObject({
wait_ms: z
.number()
.min(0)
.max(30_000)
.optional()
.describe(
'Optional time to wait for active teammates to acknowledge shutdown before cleanup.',
),
}),
)
type InputSchema = ReturnType<typeof inputSchema>
export type Output = {
@@ -68,7 +83,7 @@ export const TeamDeleteTool: Tool<InputSchema, Output> = buildTool({
}
},
async call(_input, context) {
async call(input, context) {
const { setAppState, getAppState } = context
const appState = getAppState()
const teamName = appState.teamContext?.teamName
@@ -87,13 +102,82 @@ export const TeamDeleteTool: Tool<InputSchema, Output> = buildTool({
const activeMembers = nonLeadMembers.filter(m => m.isActive !== false)
if (activeMembers.length > 0) {
const memberNames = activeMembers.map(m => m.name).join(', ')
return {
data: {
success: false,
message: `Cannot cleanup team with ${activeMembers.length} active member(s): ${memberNames}. Use requestShutdown to gracefully terminate teammates first.`,
team_name: teamName,
},
const requested: string[] = []
for (const member of activeMembers) {
let sent = false
if (member.backendType === 'in-process') {
const executor = getInProcessBackend()
executor.setContext?.(context)
sent = await executor.terminate(
member.agentId,
'Team cleanup requested by team lead',
)
} else if (member.backendType && isPaneBackend(member.backendType)) {
await ensureBackendsRegistered()
const executor = createPaneBackendExecutor(
getBackendByType(member.backendType),
)
executor.setContext?.(context)
sent = await executor.terminate(
member.agentId,
'Team cleanup requested by team lead',
)
}
if (sent) {
requested.push(member.name)
}
}
const waitMs = input.wait_ms ?? 0
if (waitMs > 0 && requested.length > 0) {
const deadline = Date.now() + waitMs
while (Date.now() < deadline) {
await sleep(Math.min(250, Math.max(0, deadline - Date.now())))
const refreshed = readTeamFile(teamName)
const stillActive =
refreshed?.members.filter(
m => m.name !== TEAM_LEAD_NAME && m.isActive !== false,
) ?? []
if (stillActive.length === 0) {
break
}
}
const refreshed = readTeamFile(teamName)
const stillActive =
refreshed?.members.filter(
m => m.name !== TEAM_LEAD_NAME && m.isActive !== false,
) ?? []
if (stillActive.length === 0) {
// Fall through to cleanup with the refreshed team file state.
} else {
const memberNames = stillActive.map(m => m.name).join(', ')
return {
data: {
success: false,
message: `Shutdown requested for active teammate(s): ${requested.join(', ')}. Cleanup is still blocked after waiting ${waitMs}ms: ${memberNames}.`,
team_name: teamName,
},
}
}
}
const latestTeamFile = readTeamFile(teamName)
const latestActiveMembers =
latestTeamFile?.members.filter(
m => m.name !== TEAM_LEAD_NAME && m.isActive !== false,
) ?? []
if (latestActiveMembers.length === 0) {
// Continue to cleanup below.
} else {
const memberNames = latestActiveMembers.map(m => m.name).join(', ')
return {
data: {
success: false,
message:
requested.length > 0
? `Shutdown requested for active teammate(s): ${requested.join(', ')}. Cleanup is blocked until they exit: ${memberNames}.`
: `Cannot cleanup team with ${latestActiveMembers.length} active member(s): ${memberNames}. Use requestShutdown to gracefully terminate teammates first.`,
team_name: teamName,
},
}
}
}
}

View File

@@ -9,19 +9,11 @@ const inputSchema = lazySchema(() =>
z.strictObject({
url: z
.string()
.describe('URL to navigate to in the browser.'),
.describe('URL to fetch and extract content from.'),
action: z
.enum(['navigate', 'screenshot', 'click', 'type', 'scroll'])
.enum(['navigate', 'screenshot'])
.optional()
.describe('Browser action to perform. Defaults to "navigate".'),
selector: z
.string()
.optional()
.describe('CSS selector for click/type actions.'),
text: z
.string()
.optional()
.describe('Text to type when action is "type".'),
.describe('Action to perform. "navigate" fetches page content (default). "screenshot" returns a text snapshot of the page.'),
}),
)
type InputSchema = ReturnType<typeof inputSchema>
@@ -45,16 +37,24 @@ export const WebBrowserTool = buildTool({
},
async description() {
return 'Browse the web using an embedded browser'
return 'Fetch and read web page content via HTTP'
},
async prompt() {
return `Open and interact with web pages in an embedded browser. Supports navigation, screenshots, clicking, typing, and scrolling.
return `Fetch web pages via HTTP and extract their text content. This is a lightweight browser tool (HTTP fetch, not a full browser engine).
Supported actions:
- navigate: Fetch a URL and extract page title + text content
- screenshot: Same as navigate (returns text snapshot, not a visual screenshot)
Limitations:
- No JavaScript execution — only sees server-rendered HTML
- click/type/scroll require a full browser runtime (not available)
- For full browser interaction, use the Claude-in-Chrome MCP tools instead
Use this for:
- Viewing web pages and their content
- Taking screenshots of UI
- Interacting with web applications
- Testing web endpoints with full browser rendering`
- Reading web page content and documentation
- Checking API endpoints that return HTML
- Quick page title/content extraction`
},
isConcurrencySafe() {
@@ -85,12 +85,84 @@ Use this for:
},
async call(input: BrowserInput) {
// Browser integration requires the WEB_BROWSER_TOOL runtime (Bun WebView).
const action = input.action ?? 'navigate'
if (action === 'navigate' || action === 'screenshot') {
// Fetch the page content via HTTP
try {
const response = await fetch(input.url, {
headers: {
'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
Accept:
'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
},
redirect: 'follow',
})
if (!response.ok) {
return {
data: {
title: `HTTP ${response.status}`,
url: input.url,
content: `Error: ${response.status} ${response.statusText}`,
},
}
}
const html = await response.text()
// Extract title
const titleMatch = html.match(/<title[^>]*>([^<]*)<\/title>/i)
const title = titleMatch?.[1]?.trim() ?? ''
// Extract text content (strip HTML tags, scripts, styles)
let textContent = html
.replace(/<script[\s\S]*?<\/script>/gi, '')
.replace(/<style[\s\S]*?<\/style>/gi, '')
.replace(/<[^>]+>/g, ' ')
.replace(/\s+/g, ' ')
.trim()
// Truncate to reasonable size
if (textContent.length > 50_000) {
textContent = textContent.slice(0, 50_000) + '\n[truncated]'
}
if (action === 'screenshot') {
return {
data: {
title,
url: response.url,
content: `[Text snapshot — visual screenshots require Chrome browser tools]\n\n${textContent}`,
},
}
}
return {
data: {
title,
url: response.url,
content: textContent,
},
}
} catch (err) {
return {
data: {
title: 'Error',
url: input.url,
content: `Failed to fetch: ${err instanceof Error ? err.message : String(err)}`,
},
}
}
}
// Unreachable — schema only allows navigate/screenshot
return {
data: {
title: '',
url: input.url,
content: 'Web browser requires the WEB_BROWSER_TOOL runtime.',
content: `Unknown action "${action}".`,
},
}
},

View File

@@ -0,0 +1,94 @@
import { describe, test, expect, beforeAll, afterAll } from 'bun:test'
// Mock fetch directly — avoids flaky dependency on external hosts AND
// pollution by other tests that call setGlobalDispatcher (proxy agents make
// localhost fetches return 500 in the full-suite run).
const realFetch = globalThis.fetch
beforeAll(() => {
globalThis.fetch = (async (
input: string | URL | Request,
_init?: RequestInit,
) => {
const url = typeof input === 'string' ? input : input.toString()
if (url === 'not-a-url' || !url.startsWith('http')) {
throw new TypeError('Failed to fetch')
}
const body =
'<!doctype html><html><head><title>Example Domain</title></head>' +
'<body><h1>Example Domain</h1><p>Sample content.</p></body></html>'
const res = new Response(body, {
status: 200,
headers: { 'content-type': 'text/html' },
})
// Make response.url match the request URL so tests can assert on it.
Object.defineProperty(res, 'url', { value: url, configurable: true })
return res
}) as typeof fetch
})
afterAll(() => {
globalThis.fetch = realFetch
})
describe('WebBrowserTool', () => {
test('tool exports and metadata', async () => {
const { WebBrowserTool } = await import('../WebBrowserTool.js')
expect(WebBrowserTool).toBeDefined()
expect(WebBrowserTool.name).toBe('WebBrowser')
expect(typeof WebBrowserTool.call).toBe('function')
expect(WebBrowserTool.userFacingName()).toBe('Browser')
expect(WebBrowserTool.isReadOnly()).toBe(true)
})
test('description reflects browser-lite', async () => {
const { WebBrowserTool } = await import('../WebBrowserTool.js')
const desc = await WebBrowserTool.description()
expect(desc).toContain('HTTP')
expect(desc).not.toContain('embedded browser')
})
test('prompt mentions limitations', async () => {
const { WebBrowserTool } = await import('../WebBrowserTool.js')
const prompt = await WebBrowserTool.prompt()
expect(prompt).toContain('Limitations')
expect(prompt).toContain('No JavaScript')
expect(prompt).toContain('Claude-in-Chrome')
})
test('navigate fetches URL', async () => {
const { WebBrowserTool } = await import('../WebBrowserTool.js')
const result = await WebBrowserTool.call({
url: 'https://example.com',
} as any)
expect(result.data.title).toBe('Example Domain')
expect(result.data.url).toContain('example.com')
expect(result.data.content).toContain('Example Domain')
}, 15000)
test('screenshot returns text snapshot', async () => {
const { WebBrowserTool } = await import('../WebBrowserTool.js')
const result = await WebBrowserTool.call({
url: 'https://example.com',
action: 'screenshot',
} as any)
expect(result.data.content).toContain('Text snapshot')
expect(result.data.content).toContain('Example Domain')
}, 15000)
test('schema only allows navigate and screenshot', async () => {
const { WebBrowserTool } = await import('../WebBrowserTool.js')
const schema = WebBrowserTool.inputSchema
const parseResult = schema.safeParse({
url: 'https://example.com',
action: 'click',
})
expect(parseResult.success).toBe(false)
})
test('invalid URL returns error', async () => {
const { WebBrowserTool } = await import('../WebBrowserTool.js')
const result = await WebBrowserTool.call({ url: 'not-a-url' } as any)
expect(result.data.content).toContain('Failed to fetch')
})
})

View File

@@ -16,17 +16,37 @@ export type {
WebSearchAdapter,
} from './types.js'
/**
* Check if the current session uses a third-party (non-Anthropic) API provider.
* These providers don't support Anthropic's server_tools (server-side web search),
* so they must fall back to the Bing scraper adapter.
*/
function isThirdPartyProvider(): boolean {
return !!(
process.env.CLAUDE_CODE_USE_OPENAI ||
process.env.CLAUDE_CODE_USE_GEMINI ||
process.env.CLAUDE_CODE_USE_GROK
)
}
let cachedAdapter: WebSearchAdapter | null = null
let cachedAdapterKey: 'api' | 'bing' | 'brave' | null = null
export function createAdapter(): WebSearchAdapter {
const envAdapter = process.env.WEB_SEARCH_ADAPTER
// Priority:
// 1. Explicit env override (WEB_SEARCH_ADAPTER=api|bing|brave)
// 2. Third-party provider (OpenAI/Gemini/Grok) → bing (no server_tools support)
// 3. First-party Anthropic API → api (server-side web search + connector_text)
// 4. Fallback → bing
const adapterKey =
envAdapter === 'api' || envAdapter === 'bing' || envAdapter === 'brave'
? envAdapter
: isFirstPartyAnthropicBaseUrl()
? 'api'
: 'bing'
: isThirdPartyProvider()
? 'bing'
: isFirstPartyAnthropicBaseUrl()
? 'api'
: 'bing'
if (cachedAdapter && cachedAdapterKey === adapterKey) return cachedAdapter

View File

@@ -1,18 +1,358 @@
import { randomUUID } from 'crypto'
import { mkdir, readdir, readFile, writeFile } from 'fs/promises'
import { join, parse } from 'path'
import { z } from 'zod/v4'
import type { ToolResultBlockParam } from 'src/Tool.js'
import { buildTool } from 'src/Tool.js'
import { truncate } from 'src/utils/format.js'
import { WORKFLOW_TOOL_NAME } from './constants.js'
import { safeParseJSON } from 'src/utils/json.js'
import {
WORKFLOW_DIR_NAME,
WORKFLOW_FILE_EXTENSIONS,
WORKFLOW_TOOL_NAME,
} from './constants.js'
const WORKFLOW_RUNS_DIR = '.claude/workflow-runs'
const inputSchema = z.object({
workflow: z.string().describe('Name of the workflow to execute'),
args: z.string().optional().describe('Arguments to pass to the workflow'),
action: z
.enum(['start', 'status', 'advance', 'cancel', 'list'])
.optional()
.describe('Workflow action. Defaults to start.'),
run_id: z
.string()
.optional()
.describe('Workflow run id for status, advance, or cancel.'),
})
type Input = typeof inputSchema
type WorkflowInput = z.infer<Input>
type WorkflowStepStatus = 'pending' | 'running' | 'completed' | 'cancelled'
type WorkflowStep = {
name: string
prompt: string
status: WorkflowStepStatus
startedAt?: number
completedAt?: number
}
type WorkflowRun = {
runId: string
workflow: string
args?: string
status: 'running' | 'completed' | 'cancelled'
createdAt: number
updatedAt: number
currentStepIndex: number
steps: WorkflowStep[]
}
type WorkflowOutput = { output: string }
async function findWorkflowFile(
workflowDir: string,
workflow: string,
): Promise<{ path: string; content: string } | null> {
for (const ext of WORKFLOW_FILE_EXTENSIONS) {
const path = join(workflowDir, `${workflow}${ext}`)
try {
return { path, content: await readFile(path, 'utf-8') }
} catch {
// try next
}
}
return null
}
async function listAvailableWorkflows(workflowDir: string): Promise<string[]> {
try {
const files = await readdir(workflowDir)
return files
.filter(f => WORKFLOW_FILE_EXTENSIONS.includes(parse(f).ext.toLowerCase()))
.map(f => parse(f).name)
.sort()
} catch {
return []
}
}
function workflowRunPath(cwd: string, runId: string): string {
return join(cwd, WORKFLOW_RUNS_DIR, `${runId}.json`)
}
async function readWorkflowRun(
cwd: string,
runId: string,
): Promise<WorkflowRun | null> {
try {
const parsed = safeParseJSON(
await readFile(workflowRunPath(cwd, runId), 'utf-8'),
false,
) as Partial<WorkflowRun> | null
if (
!parsed ||
typeof parsed.runId !== 'string' ||
typeof parsed.workflow !== 'string' ||
!Array.isArray(parsed.steps)
) {
return null
}
return parsed as WorkflowRun
} catch {
return null
}
}
async function writeWorkflowRun(cwd: string, run: WorkflowRun): Promise<void> {
await mkdir(join(cwd, WORKFLOW_RUNS_DIR), { recursive: true })
await writeFile(
workflowRunPath(cwd, run.runId),
JSON.stringify(run, null, 2) + '\n',
'utf-8',
)
}
async function listWorkflowRuns(cwd: string): Promise<WorkflowRun[]> {
let files: string[]
try {
files = await readdir(join(cwd, WORKFLOW_RUNS_DIR))
} catch {
return []
}
const runs = await Promise.all(
files
.filter(f => f.endsWith('.json'))
.map(f => readWorkflowRun(cwd, f.slice(0, -'.json'.length))),
)
return runs
.filter((run): run is WorkflowRun => run !== null)
.sort((a, b) => b.updatedAt - a.updatedAt)
}
function parseMarkdownSteps(content: string): WorkflowStep[] {
const steps: WorkflowStep[] = []
for (const rawLine of content.split('\n')) {
const line = rawLine.trim()
const taskMatch = line.match(/^[-*]\s+\[[ xX]\]\s+(.+)$/)
const bulletMatch = line.match(/^[-*]\s+(.+)$/)
const numberedMatch = line.match(/^\d+[.)]\s+(.+)$/)
const text = taskMatch?.[1] ?? bulletMatch?.[1] ?? numberedMatch?.[1]
if (!text) continue
steps.push({ name: text.slice(0, 80), prompt: text, status: 'pending' })
}
return steps
}
function parseYamlSteps(content: string): WorkflowStep[] {
const steps: WorkflowStep[] = []
let current: Partial<WorkflowStep> | null = null
const flush = () => {
if (!current) return
const prompt = current.prompt ?? current.name
if (current.name && prompt) {
steps.push({
name: current.name,
prompt,
status: 'pending',
})
}
current = null
}
for (const rawLine of content.split('\n')) {
const line = rawLine.trim()
const stepText = line.match(/^-\s+(.+)$/)?.[1]
if (stepText) {
flush()
const inlineName = stepText.match(/^name:\s*(.+)$/)?.[1]
current = {
name: inlineName ?? stepText,
prompt: inlineName ? undefined : stepText,
}
continue
}
const name = line.match(/^name:\s*(.+)$/)?.[1]
if (name) {
if (!current) current = {}
current.name = name
continue
}
const prompt = line.match(/^(prompt|run|command):\s*(.+)$/)?.[2]
if (prompt) {
if (!current) current = {}
current.prompt = prompt
}
}
flush()
return steps
}
function parseWorkflowSteps(filePath: string, content: string): WorkflowStep[] {
const ext = parse(filePath).ext.toLowerCase()
const steps =
ext === '.md' ? parseMarkdownSteps(content) : parseYamlSteps(content)
if (steps.length > 0) {
return steps
}
return [
{
name: 'Execute workflow',
prompt: content.trim(),
status: 'pending',
},
]
}
function formatStep(step: WorkflowStep, index: number): string {
return `Step ${index + 1}: ${step.name}\n${step.prompt}`
}
function formatRunStatus(run: WorkflowRun): string {
const lines = [
`Workflow run: ${run.runId}`,
`Workflow: ${run.workflow}`,
`Status: ${run.status}`,
`Current step: ${run.steps[run.currentStepIndex]?.name ?? 'none'}`,
`Steps: ${run.steps.length}`,
]
for (let i = 0; i < run.steps.length; i += 1) {
const step = run.steps[i]!
lines.push(` ${i + 1}. [${step.status}] ${step.name}`)
}
return lines.join('\n')
}
async function startWorkflow(
input: WorkflowInput,
cwd: string,
): Promise<WorkflowOutput> {
const workflowDir = join(cwd, WORKFLOW_DIR_NAME)
const found = await findWorkflowFile(workflowDir, input.workflow)
if (!found) {
const available = await listAvailableWorkflows(workflowDir)
const hint =
available.length > 0
? `\nAvailable workflows: ${available.join(', ')}`
: `\nNo workflows found in ${WORKFLOW_DIR_NAME}/. Create .md or .yaml files there.`
return { output: `Error: Workflow "${input.workflow}" not found.${hint}` }
}
const steps = parseWorkflowSteps(found.path, found.content)
const now = Date.now()
steps[0] = { ...steps[0]!, status: 'running', startedAt: now }
const run: WorkflowRun = {
runId: randomUUID(),
workflow: input.workflow,
...(input.args ? { args: input.args } : {}),
status: 'running',
createdAt: now,
updatedAt: now,
currentStepIndex: 0,
steps,
}
await writeWorkflowRun(cwd, run)
const argsSection = input.args ? `\n\nArguments:\n${input.args}` : ''
return {
output: [
`Workflow run started`,
`run_id: ${run.runId}`,
`workflow: ${run.workflow}`,
'',
formatStep(steps[0]!, 0),
argsSection,
'',
`When this step is complete, call Workflow with action="advance" and run_id="${run.runId}".`,
].join('\n'),
}
}
async function getRunOrError(
cwd: string,
runId: string | undefined,
): Promise<{ run?: WorkflowRun; output?: string }> {
if (!runId) return { output: 'Error: run_id is required for this action.' }
const run = await readWorkflowRun(cwd, runId)
if (!run) return { output: `Error: Workflow run "${runId}" not found.` }
return { run }
}
async function advanceWorkflow(
cwd: string,
runId: string | undefined,
): Promise<WorkflowOutput> {
const found = await getRunOrError(cwd, runId)
if (!found.run) return { output: found.output! }
const run = found.run
const now = Date.now()
const current = run.steps[run.currentStepIndex]
if (current && current.status === 'running') {
current.status = 'completed'
current.completedAt = now
}
const nextIndex = run.currentStepIndex + 1
if (nextIndex >= run.steps.length) {
run.status = 'completed'
run.updatedAt = now
await writeWorkflowRun(cwd, run)
return { output: `Workflow completed\nrun_id: ${run.runId}` }
}
run.currentStepIndex = nextIndex
run.steps[nextIndex] = {
...run.steps[nextIndex]!,
status: 'running',
startedAt: now,
}
run.updatedAt = now
await writeWorkflowRun(cwd, run)
return {
output: [
`Next workflow step`,
`run_id: ${run.runId}`,
'',
formatStep(run.steps[nextIndex]!, nextIndex),
'',
`When this step is complete, call Workflow with action="advance" and run_id="${run.runId}".`,
].join('\n'),
}
}
async function cancelWorkflow(
cwd: string,
runId: string | undefined,
): Promise<WorkflowOutput> {
const found = await getRunOrError(cwd, runId)
if (!found.run) return { output: found.output! }
const run = found.run
const now = Date.now()
run.status = 'cancelled'
run.updatedAt = now
for (const step of run.steps) {
if (step.status === 'pending' || step.status === 'running') {
step.status = 'cancelled'
}
}
await writeWorkflowRun(cwd, run)
return { output: `Workflow cancelled\nrun_id: ${run.runId}` }
}
async function listWorkflowRunsForOutput(cwd: string): Promise<WorkflowOutput> {
const runs = await listWorkflowRuns(cwd)
if (runs.length === 0) return { output: 'No workflow runs recorded.' }
return {
output: runs
.slice(0, 20)
.map(
run =>
`${run.runId} | ${run.workflow} | ${run.status} | step=${run.steps[run.currentStepIndex]?.name ?? 'none'} | updated=${new Date(run.updatedAt).toLocaleString()}`,
)
.join('\n'),
}
}
export const WorkflowTool = buildTool({
name: WORKFLOW_TOOL_NAME,
searchHint: 'execute user-defined workflow scripts',
@@ -22,21 +362,25 @@ export const WorkflowTool = buildTool({
inputSchema,
async description() {
return 'Execute a user-defined workflow script from .claude/workflows/'
return 'Execute and track a user-defined workflow from .claude/workflows/'
},
async prompt() {
return `Use the Workflow tool to execute user-defined workflow scripts located in .claude/workflows/. Workflows are YAML or Markdown files that define a sequence of steps for common development tasks.
return `Use the Workflow tool to run user-defined workflows located in .claude/workflows/. Workflows may be Markdown checklists/lists or YAML files with steps.
Guidelines:
- Specify the workflow name to execute (must match a file in .claude/workflows/)
- Optionally pass arguments that the workflow can use
- Workflows run in the context of the current project`
Actions:
- start (default): create a persisted workflow run and return the first step to execute
- advance: mark the current step complete and return the next step
- status: inspect a workflow run by run_id
- cancel: cancel a workflow run
- list: list recent workflow runs
Workflow run state is persisted in .claude/workflow-runs/.`
},
userFacingName() {
return 'Workflow'
},
isReadOnly() {
return false
isReadOnly(input) {
return input.action === 'status' || input.action === 'list'
},
isEnabled() {
return true
@@ -44,10 +388,10 @@ Guidelines:
renderToolUseMessage(input: Partial<WorkflowInput>) {
const name = input.workflow ?? 'unknown'
if (input.args) {
return `Workflow: ${name} ${input.args}`
}
return `Workflow: ${name}`
const action = input.action ?? 'start'
return input.args
? `Workflow: ${action} ${name} ${input.args}`
: `Workflow: ${action} ${name}`
},
mapToolResultToToolResultBlockParam(
@@ -61,14 +405,26 @@ Guidelines:
}
},
async call(_input: WorkflowInput, _context, _progress) {
// Workflow execution is wired by the WORKFLOW_SCRIPTS feature bootstrap.
// Without it, this tool is not functional.
return {
data: {
output:
'Error: Workflow execution requires the WORKFLOW_SCRIPTS runtime.',
},
async call(input: WorkflowInput) {
const cwd = process.cwd()
const action = input.action ?? 'start'
switch (action) {
case 'start':
return { data: await startWorkflow(input, cwd) }
case 'status': {
const found = await getRunOrError(cwd, input.run_id)
return {
data: {
output: found.run ? formatRunStatus(found.run) : found.output!,
},
}
}
case 'advance':
return { data: await advanceWorkflow(cwd, input.run_id) }
case 'cancel':
return { data: await cancelWorkflow(cwd, input.run_id) }
case 'list':
return { data: await listWorkflowRunsForOutput(cwd) }
}
},
})

View File

@@ -0,0 +1,99 @@
import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
import { mkdir, readFile, rm, writeFile } from 'node:fs/promises'
import { tmpdir } from 'node:os'
import { join } from 'node:path'
import { WorkflowTool } from '../WorkflowTool'
let cwd: string
let previousCwd: string
beforeEach(async () => {
previousCwd = process.cwd()
cwd = join(tmpdir(), `workflow-tool-${Date.now()}-${Math.random().toString(16).slice(2)}`)
await mkdir(join(cwd, '.claude', 'workflows'), { recursive: true })
process.chdir(cwd)
})
afterEach(async () => {
process.chdir(previousCwd)
await rm(cwd, { recursive: true, force: true })
})
describe('WorkflowTool', () => {
test('starts a workflow run and persists step state', async () => {
await writeFile(
join(cwd, '.claude', 'workflows', 'release.md'),
[
'# Release',
'',
'- [ ] Run tests',
'- [ ] Build package',
].join('\n'),
)
const result = await WorkflowTool.call({ workflow: 'release' })
expect(result.data.output).toContain('Workflow run started')
expect(result.data.output).toContain('Run tests')
const match = result.data.output.match(/run_id: ([a-f0-9-]+)/)
expect(match?.[1]).toBeString()
const raw = await readFile(
join(cwd, '.claude', 'workflow-runs', `${match![1]}.json`),
'utf-8',
)
const run = JSON.parse(raw)
expect(run.workflow).toBe('release')
expect(run.status).toBe('running')
expect(run.steps).toHaveLength(2)
expect(run.steps[0].status).toBe('running')
expect(run.steps[1].status).toBe('pending')
})
test('advances a workflow run through completion', async () => {
await writeFile(
join(cwd, '.claude', 'workflows', 'audit.yaml'),
[
'steps:',
' - name: Inspect',
' prompt: Inspect the code',
' - name: Verify',
' prompt: Run focused tests',
].join('\n'),
)
const started = await WorkflowTool.call({ workflow: 'audit' })
const runId = started.data.output.match(/run_id: ([a-f0-9-]+)/)![1]!
const next = await WorkflowTool.call(
{ workflow: 'audit', action: 'advance', run_id: runId },
)
expect(next.data.output).toContain('Next workflow step')
expect(next.data.output).toContain('Run focused tests')
const done = await WorkflowTool.call(
{ workflow: 'audit', action: 'advance', run_id: runId },
)
expect(done.data.output).toContain('Workflow completed')
})
test('lists and cancels workflow runs', async () => {
await writeFile(
join(cwd, '.claude', 'workflows', 'cleanup.md'),
'- Remove stale files',
)
const started = await WorkflowTool.call({ workflow: 'cleanup' })
const runId = started.data.output.match(/run_id: ([a-f0-9-]+)/)![1]!
const listed = await WorkflowTool.call(
{ workflow: 'cleanup', action: 'list' },
)
expect(listed.data.output).toContain(runId)
const cancelled = await WorkflowTool.call(
{ workflow: 'cleanup', action: 'cancel', run_id: runId },
)
expect(cancelled.data.output).toContain('Workflow cancelled')
})
})

View File

@@ -0,0 +1,54 @@
import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
import { rmSync } from 'node:fs'
import { tmpdir } from 'node:os'
import { join } from 'node:path'
import { spawnTeammate } from '../spawnMultiAgent'
let tempHome: string
let previousConfigDir: string | undefined
beforeEach(() => {
previousConfigDir = process.env.CLAUDE_CONFIG_DIR
tempHome = join(tmpdir(), `spawn-multi-agent-${Date.now()}-${Math.random().toString(16).slice(2)}`)
process.env.CLAUDE_CONFIG_DIR = tempHome
})
afterEach(() => {
if (previousConfigDir === undefined) {
delete process.env.CLAUDE_CONFIG_DIR
} else {
process.env.CLAUDE_CONFIG_DIR = previousConfigDir
}
rmSync(tempHome, { recursive: true, force: true })
})
describe('spawnTeammate', () => {
test('fails before spawn side effects when the team file is missing', async () => {
let setAppStateCalled = false
const context = {
getAppState: () => ({
teamContext: undefined,
}),
setAppState: () => {
setAppStateCalled = true
},
options: {
agentDefinitions: {
activeAgents: [],
},
},
}
await expect(
spawnTeammate(
{
name: 'worker',
prompt: 'do work',
team_name: 'missing-team',
},
context as any,
),
).rejects.toThrow('Team "missing-team" does not exist')
expect(setAppStateCalled).toBe(false)
})
})

File diff suppressed because it is too large Load Diff