fix: prevent agent communication bounds from hiding CI regressions

Tighten the UDS auth, framing, and response-reader boundaries while keeping the AgentSummary lifecycle covered so Codecov and CI fail on real regressions instead of missing coverage. The poorMode settings mock mirrors unrelated real settings defaults to avoid Bun mock retention changing later permission tests.

Constraint: PR #369 must fix Codecov/CI precisely without warning suppression, fallback masking, or mock pollution

Rejected: Delete AgentSummary lifecycle coverage | would hide Codecov loss and stale-summary behavior

Rejected: Store inline UDS rejection in a hidden input sentinel | cloned observable inputs can drop it and bypass rejection

Rejected: Ignore malformed UDS frames until timeout | leaves client slots and SendMessage calls open to exhaustion

Confidence: high

Scope-risk: moderate

Directive: Keep empty #token= markers rejected; do not require a non-empty token value in hasInlineUdsToken

Tested: bun test packages/builtin-tools/src/tools/SendMessageTool/__tests__/udsRecipientSanitization.test.ts src/utils/__tests__/udsMessaging.test.ts src/utils/__tests__/udsResponseReader.test.ts src/utils/__tests__/ndjsonFramer.test.ts

Tested: bunx tsc --noEmit --pretty false

Tested: bun run lint

Tested: bun test --coverage --coverage-reporter lcov --coverage-dir coverage

Tested: bun run test:all

Tested: bun audit

Tested: bun run build

Tested: bun run build:vite

Not-tested: GitHub-hosted Codecov upload until pushed PR checks rerun
This commit is contained in:
unraid
2026-04-27 12:50:08 +08:00
parent ee0d788e58
commit 379928fa10
21 changed files with 1725 additions and 335 deletions

View File

@@ -1,16 +1,14 @@
import {
afterAll,
afterEach,
beforeEach,
describe,
expect,
mock,
test,
} from 'bun:test'
import { debugMock } from '../../../../tests/mocks/debug'
import { logMock } from '../../../../tests/mocks/log'
import { beforeEach, describe, expect, test } from 'bun:test'
import { asAgentId } from '../../../types/ids.js'
import type { CacheSafeParams } from '../../../utils/forkedAgent.js'
import type { Message } from '../../../types/message.js'
import type {
CacheSafeParams,
ForkedAgentResult,
} from '../../../utils/forkedAgent.js'
import {
type AgentSummaryDependencies,
startAgentSummarization,
} from '../agentSummary.js'
const transcriptMessages = [
{ type: 'user', message: { content: 'start' }, uuid: 'u1' },
@@ -20,114 +18,195 @@ const transcriptMessages = [
uuid: 'a1',
},
{ type: 'user', message: { content: 'continue' }, uuid: 'u2' },
]
] as unknown as Message[]
let poorModeActive = false
let forkCalls = 0
let updateCalls: Array<{ taskId: string; summary: string }> = []
let transcript = { messages: transcriptMessages }
const sessionStorageSnapshot = {
...(require('../../../utils/sessionStorage.ts') as Record<string, unknown>),
type ForkCall = {
cacheSafeParams: CacheSafeParams
}
mock.module('src/commands/poor/poorMode.js', () => ({
isPoorModeActive: () => poorModeActive,
}))
mock.module('src/tasks/LocalAgentTask/LocalAgentTask.js', () => ({
updateAgentSummary: (taskId: string, summary: string) => {
updateCalls.push({ taskId, summary })
},
}))
mock.module(
'@claude-code-best/builtin-tools/tools/AgentTool/runAgent.js',
() => ({
filterIncompleteToolCalls: <T>(messages: T) => messages,
}),
)
mock.module('src/utils/debug.js', debugMock)
mock.module('src/utils/log.js', logMock)
mock.module('src/utils/forkedAgent.js', () => ({
runForkedAgent: async () => {
forkCalls += 1
return {
messages: [
{
type: 'assistant',
message: {
content: [{ type: 'text', text: 'Reading udsClient.ts' }],
},
},
],
}
},
}))
mock.module('src/utils/sessionStorage.js', () => ({
...sessionStorageSnapshot,
getAgentTranscript: async () => transcript,
}))
afterAll(() => {
mock.module('src/utils/sessionStorage.js', () =>
require('../../../utils/sessionStorage.ts'),
)
})
describe('startAgentSummarization', () => {
const realSetTimeout = globalThis.setTimeout
const realClearTimeout = globalThis.clearTimeout
let scheduled:
| ((...args: Parameters<TimerHandler & ((...args: unknown[]) => void)>) => void)
| undefined
let scheduled: (() => void | Promise<void>) | undefined
let handle: { stop: () => void } | undefined
let forkCalls: ForkCall[]
let updateCalls: Array<{ taskId: string; summary: string }>
let transcriptMessagesForTest: Message[]
let debugLogs: string[]
let loggedErrors: Error[]
let clearedHandles: unknown[]
beforeEach(() => {
poorModeActive = false
forkCalls = 0
updateCalls = []
transcript = { messages: transcriptMessages }
scheduled = undefined
globalThis.setTimeout = ((callback: TimerHandler) => {
scheduled = callback as (...args: unknown[]) => void
return 1 as unknown as ReturnType<typeof setTimeout>
}) as unknown as typeof setTimeout
globalThis.clearTimeout = (() => undefined) as typeof clearTimeout
})
afterEach(() => {
globalThis.setTimeout = realSetTimeout
globalThis.clearTimeout = realClearTimeout
})
test('summarizes bounded transcript once and skips unchanged fingerprints', async () => {
const { startAgentSummarization } = await import('../agentSummary.js')
const handle = startAgentSummarization(
function startTestSummarization(
dependencies: AgentSummaryDependencies = {},
): { stop: () => void } {
return startAgentSummarization(
'task-1',
asAgentId('a0000000000000000'),
{
forkContextMessages: [{ type: 'user', message: { content: 'old' } }],
forkContextMessages: [
{ type: 'user', message: { content: 'stale' }, uuid: 'old' },
],
model: 'claude-test',
} as unknown as CacheSafeParams,
() => undefined,
{
clearTimeout: ((timeoutId: unknown) => {
clearedHandles.push(timeoutId)
}) as typeof clearTimeout,
getAgentTranscript: async () => ({
messages: transcriptMessagesForTest,
contentReplacements: [],
}),
isPoorModeActive: () => false,
logError: error => {
loggedErrors.push(
error instanceof Error ? error : new Error(String(error)),
)
},
logForDebugging: message => {
debugLogs.push(message)
},
runForkedAgent: async (args: ForkCall) => {
forkCalls.push(args)
return {
messages: [
{
type: 'assistant',
message: {
content: [{ type: 'text', text: 'Reading udsClient.ts' }],
},
},
],
} as unknown as ForkedAgentResult
},
setTimeout: ((callback: TimerHandler) => {
if (typeof callback !== 'function') {
throw new Error('Expected timer callback')
}
scheduled = callback as () => void | Promise<void>
return 1 as unknown as ReturnType<typeof setTimeout>
}) as unknown as typeof setTimeout,
updateAgentSummary: (taskId: string, summary: string) => {
updateCalls.push({ taskId, summary })
},
...dependencies,
},
)
}
beforeEach(() => {
forkCalls = []
updateCalls = []
scheduled = undefined
handle = undefined
transcriptMessagesForTest = transcriptMessages
debugLogs = []
loggedErrors = []
clearedHandles = []
})
test('summarizes bounded transcript once and skips unchanged fingerprints', async () => {
handle = startTestSummarization()
expect(typeof scheduled).toBe('function')
await scheduled!()
expect(forkCalls).toBe(1)
expect(forkCalls).toHaveLength(1)
expect(updateCalls).toEqual([
{ taskId: 'task-1', summary: 'Reading udsClient.ts' },
])
const forkContext = forkCalls[0].cacheSafeParams.forkContextMessages ?? []
expect(forkContext.map(message => String(message.uuid))).toEqual([
'u1',
'a1',
'u2',
])
expect(forkContext.some(message => String(message.uuid) === 'old')).toBe(
false,
)
await scheduled!()
expect(forkCalls).toBe(1)
expect(forkCalls).toHaveLength(1)
expect(updateCalls).toHaveLength(1)
})
test('skips summarization when filtering leaves too little bounded context', async () => {
transcriptMessagesForTest = [
{ type: 'user', message: { content: 'start' }, uuid: 'u1' },
{
type: 'assistant',
uuid: 'a1',
message: {
content: [{ type: 'tool_use', id: 'missing', name: 'Read' }],
},
},
{ type: 'user', message: { content: 'continue' }, uuid: 'u2' },
] as unknown as Message[]
handle = startTestSummarization()
expect(typeof scheduled).toBe('function')
await scheduled!()
expect(forkCalls).toEqual([])
expect(updateCalls).toEqual([])
expect(debugLogs).toContain(
'[AgentSummary] Skipping summary for task-1: no bounded context available',
)
})
test('skips summarization before building context when transcript is too short', async () => {
transcriptMessagesForTest = transcriptMessages.slice(0, 2)
handle = startTestSummarization()
expect(typeof scheduled).toBe('function')
await scheduled!()
expect(forkCalls).toEqual([])
expect(updateCalls).toEqual([])
expect(debugLogs).toContain(
'[AgentSummary] Skipping summary for task-1: not enough messages (2)',
)
})
test('skips and reschedules while poor mode is active', async () => {
handle = startTestSummarization({
isPoorModeActive: () => true,
})
expect(typeof scheduled).toBe('function')
await scheduled!()
expect(forkCalls).toEqual([])
expect(updateCalls).toEqual([])
expect(debugLogs).toContain(
'[AgentSummary] Skipping summary — poor mode active',
)
})
test('logs summary errors and keeps the next timer owned by the summarizer', async () => {
const error = new Error('fork failed')
handle = startTestSummarization({
runForkedAgent: async () => {
throw error
},
})
expect(typeof scheduled).toBe('function')
await scheduled!()
expect(loggedErrors).toEqual([error])
expect(updateCalls).toEqual([])
})
test('stop clears the pending summary timer', () => {
handle = startTestSummarization()
handle.stop()
expect(debugLogs).toContain(
'[AgentSummary] Stopping summarization for task-1',
)
expect(clearedHandles).toEqual([1])
})
})