fix: prevent agent communication bounds from hiding CI regressions

Tighten the UDS auth, framing, and response-reader boundaries while keeping the AgentSummary lifecycle covered so Codecov and CI fail on real regressions instead of missing coverage. The poorMode settings mock mirrors unrelated real settings defaults to avoid Bun mock retention changing later permission tests.

Constraint: PR #369 must fix Codecov/CI precisely without warning suppression, fallback masking, or mock pollution

Rejected: Delete AgentSummary lifecycle coverage | would hide Codecov loss and stale-summary behavior

Rejected: Store inline UDS rejection in a hidden input sentinel | cloned observable inputs can drop it and bypass rejection

Rejected: Ignore malformed UDS frames until timeout | leaves client slots and SendMessage calls open to exhaustion

Confidence: high

Scope-risk: moderate

Directive: Keep empty #token= markers rejected; do not require a non-empty token value in hasInlineUdsToken

Tested: bun test packages/builtin-tools/src/tools/SendMessageTool/__tests__/udsRecipientSanitization.test.ts src/utils/__tests__/udsMessaging.test.ts src/utils/__tests__/udsResponseReader.test.ts src/utils/__tests__/ndjsonFramer.test.ts

Tested: bunx tsc --noEmit --pretty false

Tested: bun run lint

Tested: bun test --coverage --coverage-reporter lcov --coverage-dir coverage

Tested: bun run test:all

Tested: bun audit

Tested: bun run build

Tested: bun run build:vite

Not-tested: GitHub-hosted Codecov upload until pushed PR checks rerun
This commit is contained in:
unraid
2026-04-27 12:50:08 +08:00
parent ee0d788e58
commit 379928fa10
21 changed files with 1725 additions and 335 deletions

View File

@@ -0,0 +1,180 @@
import { describe, expect, test } from 'bun:test'
import type { Message } from 'src/types/message.js'
import { filterIncompleteToolCalls } from '../filterIncompleteToolCalls.js'
describe('filterIncompleteToolCalls', () => {
test('drops assistant tool uses that do not have matching results', () => {
const messages = [
{
type: 'assistant',
uuid: 'a1',
message: {
role: 'assistant',
content: [{ type: 'tool_use', id: 'missing', name: 'Read' }],
},
},
{
type: 'user',
uuid: 'u1',
message: { role: 'user', content: 'continue' },
},
] as unknown as Message[]
expect(
filterIncompleteToolCalls(messages).map(message => String(message.uuid)),
).toEqual(['u1'])
})
test('preserves assistant text when dropping orphan tool uses', () => {
const messages = [
{
type: 'assistant',
uuid: 'a1',
message: {
role: 'assistant',
content: [
{ type: 'text', text: 'I will read the file.' },
{ type: 'tool_use', id: 'missing', name: 'Read' },
],
},
},
] as unknown as Message[]
const filtered = filterIncompleteToolCalls(messages)
expect(filtered).toHaveLength(1)
const first = filtered[0]!
const content = first.message!.content
expect(
Array.isArray(content) ? content.map(block => block.type) : [],
).toEqual(['text'])
})
test('keeps completed parallel tool calls when dropping an orphan', () => {
const messages = [
{
type: 'assistant',
uuid: 'a1',
message: {
role: 'assistant',
content: [
{ type: 'tool_use', id: 'done', name: 'Read' },
{ type: 'tool_use', id: 'missing', name: 'Grep' },
],
},
},
{
type: 'user',
uuid: 'u1',
message: {
role: 'user',
content: [{ type: 'tool_result', tool_use_id: 'done', content: 'ok' }],
},
},
] as unknown as Message[]
const filtered = filterIncompleteToolCalls(messages)
expect(filtered.map(message => String(message.uuid))).toEqual(['a1', 'u1'])
const first = filtered[0]!
const content = first.message!.content
expect(
Array.isArray(content)
? content.map(block =>
block.type === 'tool_use' ? block.id : block.type,
)
: [],
).toEqual(['done'])
})
test('keeps assistant tool uses that have matching results', () => {
const messages = [
{
type: 'assistant',
uuid: 'a1',
message: {
role: 'assistant',
content: [{ type: 'tool_use', id: 'done', name: 'Read' }],
},
},
{
type: 'user',
uuid: 'u1',
message: {
role: 'user',
content: [{ type: 'tool_result', tool_use_id: 'done', content: 'ok' }],
},
},
] as unknown as Message[]
expect(
filterIncompleteToolCalls(messages).map(message => String(message.uuid)),
).toEqual(['a1', 'u1'])
})
test('drops orphan tool results when their tool use was removed', () => {
const messages = [
{
type: 'user',
uuid: 'u1',
message: {
role: 'user',
content: [
{ type: 'tool_result', tool_use_id: 'missing', content: 'late' },
],
},
},
] as unknown as Message[]
expect(filterIncompleteToolCalls(messages)).toEqual([])
})
test('keeps user text while dropping orphan tool results', () => {
const messages = [
{
type: 'assistant',
uuid: 'a1',
message: { role: 'assistant', content: 'done' },
},
{
type: 'user',
uuid: 'u1',
message: {
role: 'user',
content: [
{ type: 'text', text: 'keep this' },
{ type: 'tool_result', tool_use_id: 'missing', content: 'late' },
],
},
},
] as unknown as Message[]
const filtered = filterIncompleteToolCalls(messages)
expect(filtered.map(message => String(message.uuid))).toEqual(['a1', 'u1'])
const content = filtered[1]!.message!.content
expect(Array.isArray(content) ? content : []).toEqual([
{ type: 'text', text: 'keep this' },
])
})
test('drops malformed tool blocks without ids', () => {
const messages = [
{
type: 'assistant',
uuid: 'a1',
message: {
role: 'assistant',
content: [{ type: 'tool_use', name: 'Read' }],
},
},
{
type: 'user',
uuid: 'u1',
message: {
role: 'user',
content: [{ type: 'tool_result', content: 'late' }],
},
},
] as unknown as Message[]
expect(filterIncompleteToolCalls(messages)).toEqual([])
})
})

View File

@@ -0,0 +1,110 @@
import type {
AssistantMessage,
Message,
UserMessage,
} from 'src/types/message.js'
/**
* Removes invalid or orphaned tool_use/tool_result blocks while preserving
* completed tool-call pairs. This is intentionally block-level, not
* message-level, so completed parallel tool calls stay paired with results.
*/
export function filterIncompleteToolCalls(messages: Message[]): Message[] {
const toolUseIdsWithResults = new Set<string>()
for (const message of messages) {
if (message?.type === 'user') {
const userMessage = message as UserMessage
const content = userMessage.message.content
if (Array.isArray(content)) {
for (const block of content) {
if (block.type === 'tool_result' && block.tool_use_id) {
toolUseIdsWithResults.add(block.tool_use_id)
}
}
}
}
}
const retainedToolUseIds = new Set<string>()
const withoutOrphanToolUses: Message[] = []
for (const message of messages) {
if (message?.type === 'assistant') {
const assistantMessage = message as AssistantMessage
const content = assistantMessage.message.content
if (Array.isArray(content)) {
let changed = false
const filteredContent = content.filter(block => {
if (block.type !== 'tool_use') return true
if (!block.id) {
changed = true
return false
}
if (toolUseIdsWithResults.has(block.id)) {
retainedToolUseIds.add(block.id)
return true
}
changed = true
return false
})
if (!changed) {
withoutOrphanToolUses.push(message)
continue
}
if (filteredContent.length > 0) {
withoutOrphanToolUses.push({
...assistantMessage,
message: {
...assistantMessage.message,
content: filteredContent,
},
})
}
continue
}
}
withoutOrphanToolUses.push(message)
}
const filteredMessages: Message[] = []
for (const message of withoutOrphanToolUses) {
if (message?.type !== 'user') {
filteredMessages.push(message)
continue
}
const userMessage = message as UserMessage
const content = userMessage.message.content
if (!Array.isArray(content)) {
filteredMessages.push(message)
continue
}
let changed = false
const filteredContent = content.filter(block => {
if (block.type !== 'tool_result') return true
if (!block.tool_use_id) {
changed = true
return false
}
if (retainedToolUseIds.has(block.tool_use_id)) return true
changed = true
return false
})
if (!changed) {
filteredMessages.push(message)
continue
}
if (filteredContent.length > 0) {
filteredMessages.push({
...userMessage,
message: {
...userMessage.message,
content: filteredContent,
},
})
}
}
return filteredMessages
}

View File

@@ -86,8 +86,11 @@ import {
import type { ContentReplacementState } from 'src/utils/toolResultStorage.js'
import { createAgentId } from 'src/utils/uuid.js'
import { resolveAgentTools } from './agentToolUtils.js'
import { filterIncompleteToolCalls } from './filterIncompleteToolCalls.js'
import { type AgentDefinition, isBuiltInAgent } from './loadAgentsDir.js'
export { filterIncompleteToolCalls } from './filterIncompleteToolCalls.js'
/**
* Initialize agent-specific MCP servers
* Agents can define their own MCP servers in their frontmatter that are additive
@@ -886,50 +889,6 @@ export async function* runAgent({
}
}
/**
* Filters out assistant messages with incomplete tool calls (tool uses without results).
* This prevents API errors when sending messages with orphaned tool calls.
*/
export function filterIncompleteToolCalls(messages: Message[]): Message[] {
// Build a set of tool use IDs that have results
const toolUseIdsWithResults = new Set<string>()
for (const message of messages) {
if (message?.type === 'user') {
const userMessage = message as UserMessage
const content = userMessage.message.content
if (Array.isArray(content)) {
for (const block of content) {
if (block.type === 'tool_result' && block.tool_use_id) {
toolUseIdsWithResults.add(block.tool_use_id)
}
}
}
}
}
// Filter out assistant messages that contain tool calls without results
return messages.filter(message => {
if (message?.type === 'assistant') {
const assistantMessage = message as AssistantMessage
const content = assistantMessage.message.content
if (Array.isArray(content)) {
// Check if this assistant message has any tool uses without results
const hasIncompleteToolCall = content.some(
block =>
block.type === 'tool_use' &&
block.id &&
!toolUseIdsWithResults.has(block.id),
)
// Exclude messages with incomplete tool calls
return !hasIncompleteToolCall
}
}
// Keep all non-assistant messages and assistant messages without tool calls
return true
})
}
async function getAgentSystemPrompt(
agentDefinition: AgentDefinition,
toolUseContext: Pick<ToolUseContext, 'options'>,

View File

@@ -131,15 +131,16 @@ export type SendMessageToolOutput =
| ResponseOutput
const UDS_INLINE_TOKEN_MARKER = '#token='
const UDS_INLINE_TOKEN_REJECTED_KEY = '__udsInlineTokenRejected'
function stripInlineUdsToken(target: string): string {
const markerIndex = target.lastIndexOf(UDS_INLINE_TOKEN_MARKER)
const markerIndex = target.indexOf(UDS_INLINE_TOKEN_MARKER)
return markerIndex === -1 ? target : target.slice(0, markerIndex)
}
function hasInlineUdsToken(to: string): boolean {
const addr = parseAddress(to)
// Empty-token markers are still inline-token attempts. Observable input
// redaction preserves "#token=" so cloned inputs remain rejected.
return (
addr.scheme === 'uds' && addr.target.includes(UDS_INLINE_TOKEN_MARKER)
)
@@ -151,20 +152,17 @@ function recipientForDisplay(to: string): string {
return `uds:${stripInlineUdsToken(addr.target)}`
}
function markAndRedactInlineUdsToken(
input: { to: string } & Record<string, unknown>,
): void {
if (!hasInlineUdsToken(input.to)) return
input.to = recipientForDisplay(input.to)
input[UDS_INLINE_TOKEN_REJECTED_KEY] = true
function redactInlineUdsTokenForRejection(to: string): string {
const addr = parseAddress(to)
if (addr.scheme !== 'uds') return to
const markerIndex = addr.target.indexOf(UDS_INLINE_TOKEN_MARKER)
if (markerIndex === -1) return to
return `uds:${addr.target.slice(0, markerIndex)}${UDS_INLINE_TOKEN_MARKER}`
}
function wasInlineUdsTokenRejected(input: unknown): boolean {
return (
typeof input === 'object' &&
input !== null &&
(input as Record<string, unknown>)[UDS_INLINE_TOKEN_REJECTED_KEY] === true
)
function redactObservableInlineUdsToken(input: { to: string }): void {
if (!hasInlineUdsToken(input.to)) return
input.to = redactInlineUdsTokenForRejection(input.to)
}
function findTeammateColor(
@@ -580,9 +578,7 @@ export const SendMessageTool: Tool<InputSchema, SendMessageToolOutput> =
backfillObservableInput(input) {
if (typeof input.to !== 'string') return
markAndRedactInlineUdsToken(
input as { to: string } & Record<string, unknown>,
)
redactObservableInlineUdsToken(input as { to: string })
if ('type' in input) return
if (input.to === '*') {
@@ -620,7 +616,10 @@ export const SendMessageTool: Tool<InputSchema, SendMessageToolOutput> =
case 'shutdown_response':
return `shutdown_response ${input.message.approve ? 'approve' : 'reject'} ${input.message.request_id}`
case 'plan_approval_response':
return `plan_approval ${input.message.approve ? 'approve' : 'reject'} to ${recipient}`
const planApprovalDecision = input.message.approve
? 'approve'
: 'reject'
return `plan_approval ${planApprovalDecision} to ${recipient}`
}
},
@@ -674,7 +673,7 @@ export const SendMessageTool: Tool<InputSchema, SendMessageToolOutput> =
}
if (
addr.scheme === 'uds' &&
(hasInlineUdsToken(input.to) || wasInlineUdsTokenRejected(input))
hasInlineUdsToken(input.to)
) {
return {
result: false,
@@ -808,10 +807,7 @@ export const SendMessageTool: Tool<InputSchema, SendMessageToolOutput> =
async call(input, context, canUseTool, assistantMessage) {
if (typeof input.message === 'string') {
const addr = parseAddress(input.to)
if (
addr.scheme === 'uds' &&
(hasInlineUdsToken(input.to) || wasInlineUdsTokenRejected(input))
) {
if (addr.scheme === 'uds' && hasInlineUdsToken(input.to)) {
return {
data: {
success: false,
@@ -841,10 +837,10 @@ export const SendMessageTool: Tool<InputSchema, SendMessageToolOutput> =
const { postInterClaudeMessage } =
require('src/bridge/peerSessions.js') as typeof import('src/bridge/peerSessions.js')
/* eslint-enable @typescript-eslint/no-require-imports */
const result = (await postInterClaudeMessage(
const result = await postInterClaudeMessage(
addr.target,
input.message,
)) as { ok: boolean; error?: string }
) as { ok: boolean; error?: string }
const preview = input.summary || truncate(input.message, 50)
return {
data: {
@@ -856,16 +852,6 @@ export const SendMessageTool: Tool<InputSchema, SendMessageToolOutput> =
}
}
if (addr.scheme === 'uds') {
const recipient = recipientForDisplay(input.to)
if (hasInlineUdsToken(input.to) || wasInlineUdsTokenRejected(input)) {
return {
data: {
success: false,
message:
'uds addresses must not include inline auth tokens; use the ListPeers address',
},
}
}
/* eslint-disable @typescript-eslint/no-require-imports */
const { sendToUdsSocket } =
require('src/utils/udsClient.js') as typeof import('src/utils/udsClient.js')
@@ -876,14 +862,14 @@ export const SendMessageTool: Tool<InputSchema, SendMessageToolOutput> =
return {
data: {
success: true,
message: `${preview}” → ${recipient}`,
message: `${preview}” → ${input.to}`,
},
}
} catch (e) {
return {
data: {
success: false,
message: `Failed to send to ${recipient}: ${errorMessage(e)}`,
message: `Failed to send to ${input.to}: ${errorMessage(e)}`,
},
}
}

View File

@@ -1,8 +1,8 @@
import { describe, expect, test } from 'bun:test'
import { SendMessageTool } from '../SendMessageTool.js'
describe('SendMessageTool UDS recipient handling', () => {
test('redacts inline UDS tokens before classifier and observable paths', async () => {
const { SendMessageTool } = await import('../SendMessageTool.js')
const tokenAddress = 'uds:/tmp/peer.sock#token=secret-token'
const observableInput = {
@@ -12,6 +12,7 @@ describe('SendMessageTool UDS recipient handling', () => {
SendMessageTool.backfillObservableInput!(observableInput)
expect(observableInput.recipient).toBe('uds:/tmp/peer.sock')
expect(observableInput.to).toBe('uds:/tmp/peer.sock#token=')
expect(JSON.stringify(observableInput)).not.toContain('secret-token')
expect(
SendMessageTool.toAutoClassifierInput({
@@ -22,7 +23,6 @@ describe('SendMessageTool UDS recipient handling', () => {
})
test('keeps redacted UDS token rejection through observable backfill', async () => {
const { SendMessageTool } = await import('../SendMessageTool.js')
const observableInput = {
to: 'uds:/tmp/peer.sock#token=secret-token',
message: {
@@ -35,7 +35,7 @@ describe('SendMessageTool UDS recipient handling', () => {
SendMessageTool.backfillObservableInput!(observableInput)
expect(observableInput.to).toBe('uds:/tmp/peer.sock')
expect(observableInput.to).toBe('uds:/tmp/peer.sock#token=')
expect(observableInput.recipient).toBe('uds:/tmp/peer.sock')
expect(observableInput.type).toBe('plan_approval_response')
expect(observableInput.request_id).toBe('req-1')
@@ -55,8 +55,37 @@ describe('SendMessageTool UDS recipient handling', () => {
expect(result.message).toContain('inline auth tokens')
})
test('keeps inline-token rejection when observable input is cloned', async () => {
const observableInput = {
to: 'uds:/tmp/peer.sock#token=secret-token',
message: 'hello',
} as Record<string, unknown>
SendMessageTool.backfillObservableInput!(observableInput)
const clonedInput = {
to: observableInput.to,
message: observableInput.message,
summary: 'hello peer',
}
const validation = await SendMessageTool.validateInput!(
clonedInput as never,
{} as never,
)
const result = await SendMessageTool.call(
clonedInput as never,
{} as never,
undefined as never,
undefined as never,
)
expect(validation.result).toBe(false)
expect(result.data.success).toBe(false)
expect(JSON.stringify(clonedInput)).not.toContain('secret-token')
expect(JSON.stringify(result)).not.toContain('secret-token')
})
test('redacts UDS tokens in structured classifier text', async () => {
const { SendMessageTool } = await import('../SendMessageTool.js')
const to = 'uds:/tmp/peer.sock#token=secret-token'
expect(
@@ -75,10 +104,50 @@ describe('SendMessageTool UDS recipient handling', () => {
},
}),
).toBe('plan_approval approve to uds:/tmp/peer.sock')
expect(
SendMessageTool.toAutoClassifierInput({
to,
message: {
type: 'plan_approval_response',
request_id: 'req-2',
approve: false,
},
}),
).toBe('plan_approval reject to uds:/tmp/peer.sock')
expect(
SendMessageTool.toAutoClassifierInput({
to,
message: {
type: 'shutdown_response',
request_id: 'shutdown-1',
approve: false,
},
}),
).toBe('shutdown_response reject shutdown-1')
})
test('redacts from the first inline UDS token marker', async () => {
const tokenAddress = 'uds:/tmp/peer.sock#token=first#token=second'
const observableInput = {
to: tokenAddress,
message: 'hello',
} as Record<string, unknown>
SendMessageTool.backfillObservableInput!(observableInput)
expect(observableInput.to).toBe('uds:/tmp/peer.sock#token=')
expect(observableInput.recipient).toBe('uds:/tmp/peer.sock')
expect(JSON.stringify(observableInput)).not.toContain('first')
expect(JSON.stringify(observableInput)).not.toContain('second')
expect(
SendMessageTool.toAutoClassifierInput({
to: tokenAddress,
message: 'hello',
}),
).toBe('to uds:/tmp/peer.sock: hello')
})
test('rejects inline UDS tokens during validation', async () => {
const { SendMessageTool } = await import('../SendMessageTool.js')
const result = await SendMessageTool.validateInput!(
{
to: 'uds:/tmp/peer.sock#token=secret-token',
@@ -96,7 +165,6 @@ describe('SendMessageTool UDS recipient handling', () => {
})
test('rejects inline UDS tokens during execution without leaking them', async () => {
const { SendMessageTool } = await import('../SendMessageTool.js')
const result = await SendMessageTool.call(
{
to: 'uds:/tmp/peer.sock#token=secret-token',