test: rewrite 6 stale tests to match current source behavior

Two clusters of pre-existing failures fixed by aligning tests with the
source they were meant to verify (not by changing source):

1. ultrareviewCommand (4 fails)
   The 4 "preflight integration" tests assumed `call` makes an axios POST
   and branches on `action: proceed | blocked | confirm`. That integration
   was removed; the current `call` branches on `checkOverageGate()`'s four
   `kind` values. Replaced with 6 tests covering each gate branch
   (`not-enabled`, `low-balance`, `needs-confirm`, `proceed`), arg
   pass-through to launchRemoteReview, and the null-launch failure path.

2. autonomy-lifecycle-user-flow (2 fails)
   The Bun.spawn'd subprocess used cwd=tempDir, where Bun couldn't resolve
   the `src/*` tsconfig path alias (it's resolved from cwd's tsconfig, not
   the entrypoint file's). Switched the entrypoint to the bundled
   dist/cli.js (aliases pre-resolved) and added a beforeAll that lazy-builds
   the bundle if missing — handles the CI ordering where `bun test` runs
   before `bun run build`.

Local: 5345/5345 pass (was 5339/5345).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
unraid
2026-05-09 16:55:53 +08:00
parent ea5cb3ad02
commit dbd18b4a76
2 changed files with 166 additions and 89 deletions

View File

@@ -1,18 +1,24 @@
/**
* Regression tests for ultrareviewCommand preflight integration.
* Uses real fetchUltrareviewPreflight with axios mocked to verify the three
* action paths: proceed / confirm / blocked.
* Regression tests for `ultrareviewCommand.call` (src/commands/review/
* ultrareviewCommand.tsx). The previous version of `call` made an axios
* preflight POST and branched on `action: proceed | blocked | confirm`;
* that integration was removed and `call` now branches on `checkOverageGate()`'s
* four `kind` values: `not-enabled`, `low-balance`, `needs-confirm`, `proceed`.
*
* NOTE: 4 of 6 tests are isolation flakes, not pollution. The current
* ultrareviewCommand.tsx source does not call fetchUltrareviewPreflight
* (the preflight axios path was removed), so blocked/confirm/PR-args tests
* can never observe the mocked axios path — they fall through to the
* launchRemoteReview mock returning "Launched successfully." The two passing
* tests (proceed action / null preflight network failure) match that
* behavior. Out of scope for the test-flake-fix pass; needs source review
* to either restore preflight or rewrite tests.
* These tests verify each branch:
* - `proceed` → forwards billingNote and args to `launchRemoteReview`,
* calls `onDone(text)`, returns null
* - `not-enabled` → onDone with paywall message + `display: 'system'`,
* returns null, does NOT launch
* - `low-balance` → onDone with balance-too-low message including the
* available amount, returns null, does NOT launch
* - `needs-confirm` → returns the React `UltrareviewOverageDialog` element,
* does NOT call onDone, does NOT launch
* - `proceed` + null launch result → onDone with "failed to launch" message
* - `proceed` + arg pass-through → args (e.g. PR number) reach launchRemoteReview
* verbatim (call doesn't parse them itself)
*/
import { afterAll, describe, expect, mock, test } from 'bun:test';
import { afterAll, beforeEach, describe, expect, mock, test } from 'bun:test';
import { debugMock } from '../../../../tests/mocks/debug.js';
import { logMock } from '../../../../tests/mocks/log.js';
import { setupAxiosMock } from '../../../../tests/mocks/axios.js';
@@ -54,11 +60,26 @@ mock.module('src/utils/auth.js', () => ({
isEnterpriseSubscriber: () => false,
}));
// Mock checkOverageGate to always return proceed (gate logic tested separately)
// Mock checkOverageGate with a mutable gate result so each test can drive
// the four branches in ultrareviewCommand.call (not-enabled, low-balance,
// needs-confirm, proceed). launchRemoteReview captures args for the
// args-forwarding test, and its return value is mutable too — `null` triggers
// the "failed to launch" onDone branch.
type GateResult =
| { kind: 'proceed'; billingNote: string }
| { kind: 'not-enabled' }
| { kind: 'low-balance'; available: number }
| { kind: 'needs-confirm' };
let _gateResult: GateResult = { kind: 'proceed', billingNote: '' };
let _launchResult: Array<{ type: 'text'; text: string }> | null = [{ type: 'text', text: 'Launched successfully.' }];
const _capturedLaunchArgs: string[] = [];
mock.module('src/commands/review/reviewRemote.js', () => ({
checkOverageGate: async () => ({ kind: 'proceed', billingNote: '' }),
checkOverageGate: async () => _gateResult,
confirmOverage: () => {},
launchRemoteReview: async () => [{ type: 'text', text: 'Launched successfully.' }],
launchRemoteReview: async (args: string) => {
_capturedLaunchArgs.push(args);
return _launchResult;
},
}));
// Mock OAuth config so real fetchUltrareviewPreflight can run
@@ -173,28 +194,32 @@ const makeContext = () =>
abortController: { signal: {} },
}) as Parameters<typeof call>[1];
describe('ultrareviewCommand preflight integration', () => {
test('proceed action: launches immediately without dialog', async () => {
mockAxiosPost.mockImplementationOnce(async () => ({
status: 200,
data: { action: 'proceed', billing_note: null },
}));
describe('ultrareviewCommand.call: gate branches', () => {
// Reset gate + launch state between tests so a previous test's mutation
// doesn't leak into the next.
beforeEach(() => {
_gateResult = { kind: 'proceed', billingNote: '' };
_launchResult = [{ type: 'text', text: 'Launched successfully.' }];
_capturedLaunchArgs.length = 0;
});
test('proceed gate: forwards billingNote to launchRemoteReview, calls onDone, returns null', async () => {
_gateResult = { kind: 'proceed', billingNote: ' Free review 1 of 5.' };
const messages: string[] = [];
const onDone = (msg: string) => messages.push(msg);
const result = await call(onDone as Parameters<typeof call>[0], makeContext(), '');
// Should not render a dialog — returns null after calling onDone
expect(result).toBeNull();
expect(messages.length).toBe(1);
expect(messages[0]).toContain('Launched successfully');
// launchRemoteReview was invoked exactly once with the empty args.
expect(_capturedLaunchArgs).toEqual(['']);
});
test('blocked action: calls onDone with unavailable message', async () => {
mockAxiosPost.mockImplementationOnce(async () => ({
status: 200,
data: { action: 'blocked', billing_note: null },
}));
test('not-enabled gate: onDone with paywall message, returns null', async () => {
_gateResult = { kind: 'not-enabled' };
const messages: string[] = [];
const opts: Array<unknown> = [];
@@ -204,70 +229,84 @@ describe('ultrareviewCommand preflight integration', () => {
};
const result = await call(onDone as Parameters<typeof call>[0], makeContext(), '');
expect(result).toBeNull();
expect(messages.length).toBe(1);
expect(messages[0]).toBe('Ultrareview is currently unavailable.');
expect(messages).toHaveLength(1);
expect(messages[0]).toContain('Free ultrareviews used');
expect(messages[0]).toContain('claude.ai/settings/billing');
expect((opts[0] as { display: string }).display).toBe('system');
// launchRemoteReview must NOT be called when paywalled.
expect(_capturedLaunchArgs).toEqual([]);
});
test('low-balance gate: onDone with balance-too-low message including available amount, returns null', async () => {
_gateResult = { kind: 'low-balance', available: 4.5 };
const messages: string[] = [];
const opts: Array<unknown> = [];
const onDone = (msg: string, opt: unknown) => {
messages.push(msg);
opts.push(opt);
};
const result = await call(onDone as Parameters<typeof call>[0], makeContext(), '');
expect(result).toBeNull();
expect(messages).toHaveLength(1);
expect(messages[0]).toContain('Balance too low');
expect(messages[0]).toContain('$4.50');
expect(messages[0]).toContain('claude.ai/settings/billing');
expect((opts[0] as { display: string }).display).toBe('system');
expect(_capturedLaunchArgs).toEqual([]);
});
test('needs-confirm gate: returns UltrareviewOverageDialog React element, does not launch', async () => {
_gateResult = { kind: 'needs-confirm' };
const messages: string[] = [];
const onDone = (msg: string) => messages.push(msg);
const result = await call(onDone as Parameters<typeof call>[0], makeContext(), '');
// Returns a React element rather than null.
expect(result).not.toBeNull();
expect(typeof result).toBe('object');
const element = result as { type: unknown };
expect(element.type).toBeDefined();
// No onDone call until the user interacts with the dialog.
expect(messages).toEqual([]);
expect(_capturedLaunchArgs).toEqual([]);
});
test('proceed gate + launchRemoteReview returns null: onDone with failure message', async () => {
_gateResult = { kind: 'proceed', billingNote: '' };
_launchResult = null; // teleport / non-github failure path
const messages: string[] = [];
const opts: Array<unknown> = [];
const onDone = (msg: string, opt: unknown) => {
messages.push(msg);
opts.push(opt);
};
const result = await call(onDone as Parameters<typeof call>[0], makeContext(), '');
expect(result).toBeNull();
expect(messages).toHaveLength(1);
expect(messages[0]).toContain('Ultrareview failed to launch');
expect((opts[0] as { display: string }).display).toBe('system');
});
test('blocked action with billing_note: shows billing_note as message', async () => {
mockAxiosPost.mockImplementationOnce(async () => ({
status: 200,
data: { action: 'blocked', billing_note: 'Ultrareview is unavailable for your organization.' },
}));
const messages: string[] = [];
const onDone = (msg: string) => messages.push(msg);
await call(onDone as Parameters<typeof call>[0], makeContext(), '');
expect(messages[0]).toBe('Ultrareview is unavailable for your organization.');
});
test('confirm action: returns UltrareviewPreflightDialog element', async () => {
mockAxiosPost.mockImplementationOnce(async () => ({
status: 200,
data: { action: 'confirm', billing_note: 'This run will cost ~$2.' },
}));
const onDone = (_msg: string) => {};
const result = await call(onDone as Parameters<typeof call>[0], makeContext(), '');
// Should return a React element (the PreflightDialog)
expect(result).not.toBeNull();
expect(typeof result).toBe('object');
// The element type should be the PreflightDialog component
const element = result as { type: unknown };
expect(element.type).toBeDefined();
});
test('null preflight (network failure): falls back to direct launch', async () => {
mockAxiosPost.mockImplementationOnce(async () => {
throw new Error('network error');
});
const messages: string[] = [];
const onDone = (msg: string) => messages.push(msg);
const result = await call(onDone as Parameters<typeof call>[0], makeContext(), '');
expect(result).toBeNull();
expect(messages.length).toBe(1);
expect(messages[0]).toContain('Launched successfully');
});
test('PR number args: extracts pr_number for preflight request', async () => {
const capturedBodies: Array<unknown> = [];
mockAxiosPost.mockImplementationOnce(async (_url: unknown, body: unknown) => {
capturedBodies.push(body);
return { status: 200, data: { action: 'proceed', billing_note: null } };
});
test('proceed gate: forwards args (e.g. PR number) verbatim to launchRemoteReview', async () => {
_gateResult = { kind: 'proceed', billingNote: '' };
const messages: string[] = [];
const onDone = (msg: string) => messages.push(msg);
await call(onDone as Parameters<typeof call>[0], makeContext(), '42');
expect(capturedBodies.length).toBe(1);
const b = capturedBodies[0] as { pr_number: number; repo: string };
expect(b.pr_number).toBe(42);
expect(b.repo).toBe('testowner/testrepo');
// ultrareviewCommand.call doesn't parse args itself — launchRemoteReview
// is responsible for PR-number detection. So we only assert pass-through.
expect(_capturedLaunchArgs).toEqual(['42']);
});
});

View File

@@ -1,9 +1,22 @@
// NOTE: isolation flake, not pollution. The subprocess Bun.spawn'd in
// runAutonomyCli does not inherit the test runner's tsconfig path-alias
// resolution, so it reports `Cannot find module 'src/bootstrap/state.js'
// from src/utils/startupProfiler.ts` even when this file is run alone.
// Out of scope for the test-flake-fix pass; needs subprocess-launcher rework.
import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
// Why we use the BUILT bundle instead of src/entrypoints/cli.tsx:
// `Bun.spawn` runs the CLI in a fresh process whose cwd is the per-test
// tempDir. Bun resolves the `src/*` tsconfig path alias from the cwd's
// nearest tsconfig.json, NOT from the entrypoint file's directory — so a
// subprocess started with cwd=tempDir cannot resolve `import 'src/bootstrap/
// state.js'`. The built dist/cli.js has all aliases pre-resolved, which
// makes it usable from any cwd.
//
// CI runs `bun test` BEFORE `bun run build`, so we lazy-build cli.tsx in a
// `beforeAll` if dist/cli.js is missing. Local runs after `bun run build`
// just see the file and skip the build.
import {
afterEach,
beforeAll,
beforeEach,
describe,
expect,
test,
} from 'bun:test'
import { existsSync, mkdtempSync, rmSync } from 'node:fs'
import { tmpdir } from 'node:os'
import { join, resolve } from 'node:path'
@@ -18,12 +31,37 @@ import {
} from '../../src/utils/autonomyRuns'
import { listAutonomyFlows } from '../../src/utils/autonomyFlows'
const CLI_ENTRYPOINT = resolve(import.meta.dir, '../../src/entrypoints/cli.tsx')
const CLI_ENTRYPOINT = resolve(import.meta.dir, '../../dist/cli.js')
const PROJECT_ROOT = resolve(import.meta.dir, '../..')
let tempDir = ''
let configDir = ''
let previousConfigDir: string | undefined
async function ensureCliBundle(): Promise<void> {
if (existsSync(CLI_ENTRYPOINT)) return
const proc = Bun.spawn({
cmd: [process.execPath, 'run', 'build'],
cwd: PROJECT_ROOT,
stdin: 'ignore',
stdout: 'pipe',
stderr: 'pipe',
})
const [stderr, exitCode] = await Promise.all([
new Response(proc.stderr).text(),
proc.exited,
])
if (exitCode !== 0 || !existsSync(CLI_ENTRYPOINT)) {
throw new Error(
`Failed to build dist/cli.js for autonomy CLI tests (exit=${exitCode}):\n${stderr}`,
)
}
}
beforeAll(async () => {
await ensureCliBundle()
}, 120_000)
async function runAutonomyCli(args: string[]): Promise<string> {
const proc = Bun.spawn({
cmd: [process.execPath, CLI_ENTRYPOINT, 'autonomy', ...args],