test: rewrite 6 stale tests to match current source behavior

Two clusters of pre-existing failures fixed by aligning tests with the
source they were meant to verify (not by changing source):

1. ultrareviewCommand (4 fails)
   The 4 "preflight integration" tests assumed `call` makes an axios POST
   and branches on `action: proceed | blocked | confirm`. That integration
   was removed; the current `call` branches on `checkOverageGate()`'s four
   `kind` values. Replaced with 6 tests covering each gate branch
   (`not-enabled`, `low-balance`, `needs-confirm`, `proceed`), arg
   pass-through to launchRemoteReview, and the null-launch failure path.

2. autonomy-lifecycle-user-flow (2 fails)
   The Bun.spawn'd subprocess used cwd=tempDir, where Bun couldn't resolve
   the `src/*` tsconfig path alias (it's resolved from cwd's tsconfig, not
   the entrypoint file's). Switched the entrypoint to the bundled
   dist/cli.js (aliases pre-resolved) and added a beforeAll that lazy-builds
   the bundle if missing — handles the CI ordering where `bun test` runs
   before `bun run build`.

Local: 5345/5345 pass (was 5339/5345).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
unraid
2026-05-09 16:55:53 +08:00
parent ea5cb3ad02
commit dbd18b4a76
2 changed files with 166 additions and 89 deletions

View File

@@ -1,18 +1,24 @@
/** /**
* Regression tests for ultrareviewCommand preflight integration. * Regression tests for `ultrareviewCommand.call` (src/commands/review/
* Uses real fetchUltrareviewPreflight with axios mocked to verify the three * ultrareviewCommand.tsx). The previous version of `call` made an axios
* action paths: proceed / confirm / blocked. * preflight POST and branched on `action: proceed | blocked | confirm`;
* that integration was removed and `call` now branches on `checkOverageGate()`'s
* four `kind` values: `not-enabled`, `low-balance`, `needs-confirm`, `proceed`.
* *
* NOTE: 4 of 6 tests are isolation flakes, not pollution. The current * These tests verify each branch:
* ultrareviewCommand.tsx source does not call fetchUltrareviewPreflight * - `proceed` → forwards billingNote and args to `launchRemoteReview`,
* (the preflight axios path was removed), so blocked/confirm/PR-args tests * calls `onDone(text)`, returns null
* can never observe the mocked axios path — they fall through to the * - `not-enabled` → onDone with paywall message + `display: 'system'`,
* launchRemoteReview mock returning "Launched successfully." The two passing * returns null, does NOT launch
* tests (proceed action / null preflight network failure) match that * - `low-balance` → onDone with balance-too-low message including the
* behavior. Out of scope for the test-flake-fix pass; needs source review * available amount, returns null, does NOT launch
* to either restore preflight or rewrite tests. * - `needs-confirm` → returns the React `UltrareviewOverageDialog` element,
* does NOT call onDone, does NOT launch
* - `proceed` + null launch result → onDone with "failed to launch" message
* - `proceed` + arg pass-through → args (e.g. PR number) reach launchRemoteReview
* verbatim (call doesn't parse them itself)
*/ */
import { afterAll, describe, expect, mock, test } from 'bun:test'; import { afterAll, beforeEach, describe, expect, mock, test } from 'bun:test';
import { debugMock } from '../../../../tests/mocks/debug.js'; import { debugMock } from '../../../../tests/mocks/debug.js';
import { logMock } from '../../../../tests/mocks/log.js'; import { logMock } from '../../../../tests/mocks/log.js';
import { setupAxiosMock } from '../../../../tests/mocks/axios.js'; import { setupAxiosMock } from '../../../../tests/mocks/axios.js';
@@ -54,11 +60,26 @@ mock.module('src/utils/auth.js', () => ({
isEnterpriseSubscriber: () => false, isEnterpriseSubscriber: () => false,
})); }));
// Mock checkOverageGate to always return proceed (gate logic tested separately) // Mock checkOverageGate with a mutable gate result so each test can drive
// the four branches in ultrareviewCommand.call (not-enabled, low-balance,
// needs-confirm, proceed). launchRemoteReview captures args for the
// args-forwarding test, and its return value is mutable too — `null` triggers
// the "failed to launch" onDone branch.
type GateResult =
| { kind: 'proceed'; billingNote: string }
| { kind: 'not-enabled' }
| { kind: 'low-balance'; available: number }
| { kind: 'needs-confirm' };
let _gateResult: GateResult = { kind: 'proceed', billingNote: '' };
let _launchResult: Array<{ type: 'text'; text: string }> | null = [{ type: 'text', text: 'Launched successfully.' }];
const _capturedLaunchArgs: string[] = [];
mock.module('src/commands/review/reviewRemote.js', () => ({ mock.module('src/commands/review/reviewRemote.js', () => ({
checkOverageGate: async () => ({ kind: 'proceed', billingNote: '' }), checkOverageGate: async () => _gateResult,
confirmOverage: () => {}, confirmOverage: () => {},
launchRemoteReview: async () => [{ type: 'text', text: 'Launched successfully.' }], launchRemoteReview: async (args: string) => {
_capturedLaunchArgs.push(args);
return _launchResult;
},
})); }));
// Mock OAuth config so real fetchUltrareviewPreflight can run // Mock OAuth config so real fetchUltrareviewPreflight can run
@@ -173,28 +194,32 @@ const makeContext = () =>
abortController: { signal: {} }, abortController: { signal: {} },
}) as Parameters<typeof call>[1]; }) as Parameters<typeof call>[1];
describe('ultrareviewCommand preflight integration', () => { describe('ultrareviewCommand.call: gate branches', () => {
test('proceed action: launches immediately without dialog', async () => { // Reset gate + launch state between tests so a previous test's mutation
mockAxiosPost.mockImplementationOnce(async () => ({ // doesn't leak into the next.
status: 200, beforeEach(() => {
data: { action: 'proceed', billing_note: null }, _gateResult = { kind: 'proceed', billingNote: '' };
})); _launchResult = [{ type: 'text', text: 'Launched successfully.' }];
_capturedLaunchArgs.length = 0;
});
test('proceed gate: forwards billingNote to launchRemoteReview, calls onDone, returns null', async () => {
_gateResult = { kind: 'proceed', billingNote: ' Free review 1 of 5.' };
const messages: string[] = []; const messages: string[] = [];
const onDone = (msg: string) => messages.push(msg); const onDone = (msg: string) => messages.push(msg);
const result = await call(onDone as Parameters<typeof call>[0], makeContext(), ''); const result = await call(onDone as Parameters<typeof call>[0], makeContext(), '');
// Should not render a dialog — returns null after calling onDone
expect(result).toBeNull(); expect(result).toBeNull();
expect(messages.length).toBe(1); expect(messages.length).toBe(1);
expect(messages[0]).toContain('Launched successfully'); expect(messages[0]).toContain('Launched successfully');
// launchRemoteReview was invoked exactly once with the empty args.
expect(_capturedLaunchArgs).toEqual(['']);
}); });
test('blocked action: calls onDone with unavailable message', async () => { test('not-enabled gate: onDone with paywall message, returns null', async () => {
mockAxiosPost.mockImplementationOnce(async () => ({ _gateResult = { kind: 'not-enabled' };
status: 200,
data: { action: 'blocked', billing_note: null },
}));
const messages: string[] = []; const messages: string[] = [];
const opts: Array<unknown> = []; const opts: Array<unknown> = [];
@@ -204,70 +229,84 @@ describe('ultrareviewCommand preflight integration', () => {
}; };
const result = await call(onDone as Parameters<typeof call>[0], makeContext(), ''); const result = await call(onDone as Parameters<typeof call>[0], makeContext(), '');
expect(result).toBeNull(); expect(result).toBeNull();
expect(messages.length).toBe(1); expect(messages).toHaveLength(1);
expect(messages[0]).toBe('Ultrareview is currently unavailable.'); expect(messages[0]).toContain('Free ultrareviews used');
expect(messages[0]).toContain('claude.ai/settings/billing');
expect((opts[0] as { display: string }).display).toBe('system');
// launchRemoteReview must NOT be called when paywalled.
expect(_capturedLaunchArgs).toEqual([]);
});
test('low-balance gate: onDone with balance-too-low message including available amount, returns null', async () => {
_gateResult = { kind: 'low-balance', available: 4.5 };
const messages: string[] = [];
const opts: Array<unknown> = [];
const onDone = (msg: string, opt: unknown) => {
messages.push(msg);
opts.push(opt);
};
const result = await call(onDone as Parameters<typeof call>[0], makeContext(), '');
expect(result).toBeNull();
expect(messages).toHaveLength(1);
expect(messages[0]).toContain('Balance too low');
expect(messages[0]).toContain('$4.50');
expect(messages[0]).toContain('claude.ai/settings/billing');
expect((opts[0] as { display: string }).display).toBe('system');
expect(_capturedLaunchArgs).toEqual([]);
});
test('needs-confirm gate: returns UltrareviewOverageDialog React element, does not launch', async () => {
_gateResult = { kind: 'needs-confirm' };
const messages: string[] = [];
const onDone = (msg: string) => messages.push(msg);
const result = await call(onDone as Parameters<typeof call>[0], makeContext(), '');
// Returns a React element rather than null.
expect(result).not.toBeNull();
expect(typeof result).toBe('object');
const element = result as { type: unknown };
expect(element.type).toBeDefined();
// No onDone call until the user interacts with the dialog.
expect(messages).toEqual([]);
expect(_capturedLaunchArgs).toEqual([]);
});
test('proceed gate + launchRemoteReview returns null: onDone with failure message', async () => {
_gateResult = { kind: 'proceed', billingNote: '' };
_launchResult = null; // teleport / non-github failure path
const messages: string[] = [];
const opts: Array<unknown> = [];
const onDone = (msg: string, opt: unknown) => {
messages.push(msg);
opts.push(opt);
};
const result = await call(onDone as Parameters<typeof call>[0], makeContext(), '');
expect(result).toBeNull();
expect(messages).toHaveLength(1);
expect(messages[0]).toContain('Ultrareview failed to launch');
expect((opts[0] as { display: string }).display).toBe('system'); expect((opts[0] as { display: string }).display).toBe('system');
}); });
test('blocked action with billing_note: shows billing_note as message', async () => { test('proceed gate: forwards args (e.g. PR number) verbatim to launchRemoteReview', async () => {
mockAxiosPost.mockImplementationOnce(async () => ({ _gateResult = { kind: 'proceed', billingNote: '' };
status: 200,
data: { action: 'blocked', billing_note: 'Ultrareview is unavailable for your organization.' },
}));
const messages: string[] = [];
const onDone = (msg: string) => messages.push(msg);
await call(onDone as Parameters<typeof call>[0], makeContext(), '');
expect(messages[0]).toBe('Ultrareview is unavailable for your organization.');
});
test('confirm action: returns UltrareviewPreflightDialog element', async () => {
mockAxiosPost.mockImplementationOnce(async () => ({
status: 200,
data: { action: 'confirm', billing_note: 'This run will cost ~$2.' },
}));
const onDone = (_msg: string) => {};
const result = await call(onDone as Parameters<typeof call>[0], makeContext(), '');
// Should return a React element (the PreflightDialog)
expect(result).not.toBeNull();
expect(typeof result).toBe('object');
// The element type should be the PreflightDialog component
const element = result as { type: unknown };
expect(element.type).toBeDefined();
});
test('null preflight (network failure): falls back to direct launch', async () => {
mockAxiosPost.mockImplementationOnce(async () => {
throw new Error('network error');
});
const messages: string[] = [];
const onDone = (msg: string) => messages.push(msg);
const result = await call(onDone as Parameters<typeof call>[0], makeContext(), '');
expect(result).toBeNull();
expect(messages.length).toBe(1);
expect(messages[0]).toContain('Launched successfully');
});
test('PR number args: extracts pr_number for preflight request', async () => {
const capturedBodies: Array<unknown> = [];
mockAxiosPost.mockImplementationOnce(async (_url: unknown, body: unknown) => {
capturedBodies.push(body);
return { status: 200, data: { action: 'proceed', billing_note: null } };
});
const messages: string[] = []; const messages: string[] = [];
const onDone = (msg: string) => messages.push(msg); const onDone = (msg: string) => messages.push(msg);
await call(onDone as Parameters<typeof call>[0], makeContext(), '42'); await call(onDone as Parameters<typeof call>[0], makeContext(), '42');
expect(capturedBodies.length).toBe(1); // ultrareviewCommand.call doesn't parse args itself — launchRemoteReview
const b = capturedBodies[0] as { pr_number: number; repo: string }; // is responsible for PR-number detection. So we only assert pass-through.
expect(b.pr_number).toBe(42); expect(_capturedLaunchArgs).toEqual(['42']);
expect(b.repo).toBe('testowner/testrepo');
}); });
}); });

View File

@@ -1,9 +1,22 @@
// NOTE: isolation flake, not pollution. The subprocess Bun.spawn'd in // Why we use the BUILT bundle instead of src/entrypoints/cli.tsx:
// runAutonomyCli does not inherit the test runner's tsconfig path-alias // `Bun.spawn` runs the CLI in a fresh process whose cwd is the per-test
// resolution, so it reports `Cannot find module 'src/bootstrap/state.js' // tempDir. Bun resolves the `src/*` tsconfig path alias from the cwd's
// from src/utils/startupProfiler.ts` even when this file is run alone. // nearest tsconfig.json, NOT from the entrypoint file's directory — so a
// Out of scope for the test-flake-fix pass; needs subprocess-launcher rework. // subprocess started with cwd=tempDir cannot resolve `import 'src/bootstrap/
import { afterEach, beforeEach, describe, expect, test } from 'bun:test' // state.js'`. The built dist/cli.js has all aliases pre-resolved, which
// makes it usable from any cwd.
//
// CI runs `bun test` BEFORE `bun run build`, so we lazy-build cli.tsx in a
// `beforeAll` if dist/cli.js is missing. Local runs after `bun run build`
// just see the file and skip the build.
import {
afterEach,
beforeAll,
beforeEach,
describe,
expect,
test,
} from 'bun:test'
import { existsSync, mkdtempSync, rmSync } from 'node:fs' import { existsSync, mkdtempSync, rmSync } from 'node:fs'
import { tmpdir } from 'node:os' import { tmpdir } from 'node:os'
import { join, resolve } from 'node:path' import { join, resolve } from 'node:path'
@@ -18,12 +31,37 @@ import {
} from '../../src/utils/autonomyRuns' } from '../../src/utils/autonomyRuns'
import { listAutonomyFlows } from '../../src/utils/autonomyFlows' import { listAutonomyFlows } from '../../src/utils/autonomyFlows'
const CLI_ENTRYPOINT = resolve(import.meta.dir, '../../src/entrypoints/cli.tsx') const CLI_ENTRYPOINT = resolve(import.meta.dir, '../../dist/cli.js')
const PROJECT_ROOT = resolve(import.meta.dir, '../..')
let tempDir = '' let tempDir = ''
let configDir = '' let configDir = ''
let previousConfigDir: string | undefined let previousConfigDir: string | undefined
async function ensureCliBundle(): Promise<void> {
if (existsSync(CLI_ENTRYPOINT)) return
const proc = Bun.spawn({
cmd: [process.execPath, 'run', 'build'],
cwd: PROJECT_ROOT,
stdin: 'ignore',
stdout: 'pipe',
stderr: 'pipe',
})
const [stderr, exitCode] = await Promise.all([
new Response(proc.stderr).text(),
proc.exited,
])
if (exitCode !== 0 || !existsSync(CLI_ENTRYPOINT)) {
throw new Error(
`Failed to build dist/cli.js for autonomy CLI tests (exit=${exitCode}):\n${stderr}`,
)
}
}
beforeAll(async () => {
await ensureCliBundle()
}, 120_000)
async function runAutonomyCli(args: string[]): Promise<string> { async function runAutonomyCli(args: string[]): Promise<string> {
const proc = Bun.spawn({ const proc = Bun.spawn({
cmd: [process.execPath, CLI_ENTRYPOINT, 'autonomy', ...args], cmd: [process.execPath, CLI_ENTRYPOINT, 'autonomy', ...args],