feat: 添加 Provider Registry、StatusLine、Cache Stats 和其他增强

- providerRegistry: OpenAI 兼容 provider 切换(Cerebras/Groq/DeepSeek/Qwen)
- StatusLine: 增强状态栏(缓存命中率、TTL 倒计时、自定义 shell 命令)
- cacheStats: 缓存命中率和 token 签名追踪
- ultrareviewPreflight: 代码审查预检服务
- SkillsMenu/filterSkills: 技能菜单过滤增强
- MagicDocs/langfuse prompts: 提示词更新
- claude.ts: API 客户端更新

Co-Authored-By: glm-5-turbo <zai-org@claude-code-best.win>
This commit is contained in:
claude-code-best
2026-05-09 23:04:35 +08:00
parent fdddb6dbe8
commit efaf4afd9c
28 changed files with 3613 additions and 219 deletions

View File

@@ -0,0 +1,56 @@
import React, { useCallback, useRef, useState } from 'react';
import { Box, Dialog, Text } from '@anthropic/ink';
import { Select } from '../../components/CustomSelect/select.js';
type Props = {
billingNote: string | null;
onConfirm: (signal: AbortSignal) => Promise<void>;
onCancel: () => void;
};
/**
* Dialog shown when /v1/ultrareview/preflight returns action='confirm'.
* Displays the server-provided billing_note (or a generic fallback) and
* gives the user a Proceed / Cancel choice.
*/
export function UltrareviewPreflightDialog({ billingNote, onConfirm, onCancel }: Props): React.ReactNode {
const [isLaunching, setIsLaunching] = useState(false);
const abortControllerRef = useRef(new AbortController());
const handleSelect = useCallback(
(value: string) => {
if (value === 'proceed') {
setIsLaunching(true);
void onConfirm(abortControllerRef.current.signal).catch(() => setIsLaunching(false));
} else {
onCancel();
}
},
[onConfirm, onCancel],
);
const handleCancel = useCallback(() => {
abortControllerRef.current.abort();
onCancel();
}, [onCancel]);
const options = [
{ label: 'Proceed', value: 'proceed' },
{ label: 'Cancel', value: 'cancel' },
];
const displayNote = billingNote ?? 'This run may incur additional cost.';
return (
<Dialog title="Ultrareview — additional cost" onCancel={handleCancel} color="background">
<Box flexDirection="column" gap={1}>
<Text>{displayNote}</Text>
{isLaunching ? (
<Text color="background">Launching</Text>
) : (
<Select options={options} onChange={handleSelect} onCancel={handleCancel} />
)}
</Box>
</Dialog>
);
}

View File

@@ -0,0 +1,312 @@
/**
* Regression tests for `ultrareviewCommand.call` (src/commands/review/
* ultrareviewCommand.tsx). The previous version of `call` made an axios
* preflight POST and branched on `action: proceed | blocked | confirm`;
* that integration was removed and `call` now branches on `checkOverageGate()`'s
* four `kind` values: `not-enabled`, `low-balance`, `needs-confirm`, `proceed`.
*
* These tests verify each branch:
* - `proceed` → forwards billingNote and args to `launchRemoteReview`,
* calls `onDone(text)`, returns null
* - `not-enabled` → onDone with paywall message + `display: 'system'`,
* returns null, does NOT launch
* - `low-balance` → onDone with balance-too-low message including the
* available amount, returns null, does NOT launch
* - `needs-confirm` → returns the React `UltrareviewOverageDialog` element,
* does NOT call onDone, does NOT launch
* - `proceed` + null launch result → onDone with "failed to launch" message
* - `proceed` + arg pass-through → args (e.g. PR number) reach launchRemoteReview
* verbatim (call doesn't parse them itself)
*/
import { afterAll, beforeEach, describe, expect, mock, test } from 'bun:test';
import { debugMock } from '../../../../tests/mocks/debug.js';
import { logMock } from '../../../../tests/mocks/log.js';
import { setupAxiosMock } from '../../../../tests/mocks/axios.js';
// Pre-import the real react and ink modules so we can delegate after this
// suite. Bun's mock.module is process-global / last-write-wins; without
// delegation the stub createElement / stub ink components leak into other
// test files (e.g. SnapshotUpdateDialog.test.tsx, AgentsPlatformView.test.tsx)
// that need real React.createElement and real Box/Text components.
const _realReactMod = (await import('react')) as Record<string, unknown> & {
default?: Record<string, unknown>;
};
const _realInkMod = (await import('@anthropic/ink')) as Record<string, unknown>;
let _useStubReactForUltrareview = true;
let _useStubInkForUltrareview = true;
afterAll(() => {
_useStubReactForUltrareview = false;
_useStubInkForUltrareview = false;
// The handle reference exists by the time afterAll runs (TDZ resolves via
// closure). Flip useStubs off so the spread-real fall-through kicks in for
// any test file that runs after this one in the same process.
_ultrareviewAxiosHandle.useStubs = false;
});
// Mock dependency chain before any subject import
mock.module('src/utils/debug.ts', debugMock);
mock.module('src/utils/log.ts', logMock);
mock.module('src/services/analytics/index.js', () => ({
logEvent: () => {},
}));
mock.module('src/services/analytics/growthbook.js', () => ({
getFeatureValue_CACHED_MAY_BE_STALE: () => null,
}));
// Mock auth utilities
mock.module('src/utils/auth.js', () => ({
isClaudeAISubscriber: () => true,
isTeamSubscriber: () => false,
isEnterpriseSubscriber: () => false,
}));
// Mock checkOverageGate with a mutable gate result so each test can drive
// the four branches in ultrareviewCommand.call (not-enabled, low-balance,
// needs-confirm, proceed). launchRemoteReview captures args for the
// args-forwarding test, and its return value is mutable too — `null` triggers
// the "failed to launch" onDone branch.
type GateResult =
| { kind: 'proceed'; billingNote: string }
| { kind: 'not-enabled' }
| { kind: 'low-balance'; available: number }
| { kind: 'needs-confirm' };
let _gateResult: GateResult = { kind: 'proceed', billingNote: '' };
let _launchResult: Array<{ type: 'text'; text: string }> | null = [{ type: 'text', text: 'Launched successfully.' }];
const _capturedLaunchArgs: string[] = [];
mock.module('src/commands/review/reviewRemote.js', () => ({
checkOverageGate: async () => _gateResult,
confirmOverage: () => {},
launchRemoteReview: async (args: string) => {
_capturedLaunchArgs.push(args);
return _launchResult;
},
}));
// Mock OAuth config so real fetchUltrareviewPreflight can run
mock.module('src/constants/oauth.js', () => ({
getOauthConfig: () => ({ BASE_API_URL: 'https://api.anthropic.com' }),
}));
// Mock prepareApiRequest so real fetchUltrareviewPreflight skips auth
mock.module('src/utils/teleport/api.js', () => ({
prepareApiRequest: async () => ({
accessToken: 'test-token',
orgUUID: 'org-uuid-test',
}),
getOAuthHeaders: (token: string) => ({
Authorization: `Bearer ${token}`,
'Content-Type': 'application/json',
'anthropic-version': '2023-06-01',
}),
}));
// Mock axios — per-test responses set via mockAxiosPost.mockImplementationOnce
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const mockAxiosPost = mock(
async (..._args: any[]): Promise<any> => ({
status: 200,
data: { action: 'proceed', billing_note: null },
}),
);
// Spread real axios + flag-gate stubs so the per-test mockAxiosPost stops
// leaking into later test files (mock.module is process-global). Default ON
// for this suite; afterAll above flips _useStubReactForUltrareview, but here
// we tie axios cleanup to the helper's own flag — see suite-level afterAll.
const _ultrareviewAxiosHandle = setupAxiosMock();
_ultrareviewAxiosHandle.useStubs = true;
_ultrareviewAxiosHandle.stubs.post = mockAxiosPost;
_ultrareviewAxiosHandle.stubs.isAxiosError = (e: unknown) =>
typeof e === 'object' && e !== null && (e as { isAxiosError?: boolean }).isAxiosError === true;
// Mock detectCurrentRepositoryWithHost
mock.module('src/utils/detectRepository.js', () => ({
detectCurrentRepositoryWithHost: async () => ({
host: 'github.com',
owner: 'testowner',
name: 'testrepo',
}),
}));
// Minimal mock for React/Ink so we don't need a full renderer.
// Preserve any explicit `children` prop when no varargs children are passed
// — otherwise consumers who pass `children` via the props object (e.g.
// SnapshotUpdateDialog.ts uses `React.createElement(Dialog, { ..., children })`)
// see their array overwritten with `[]`. mock.module is process-global so this
// mock survives into other test files in the same run; afterAll flips the flag
// so we delegate to real React thereafter.
mock.module('react', () => {
const stubCreateElement = (type: unknown, props: unknown, ...children: unknown[]) => {
const propsObj = (props ?? {}) as Record<string, unknown>;
const finalChildren = children.length > 0 ? children : 'children' in propsObj ? propsObj.children : [];
return {
$$typeof: Symbol.for('react.element'),
type,
props: { ...propsObj, children: finalChildren },
};
};
const realCreate = ((_realReactMod.default as Record<string, unknown> | undefined)?.createElement ??
_realReactMod.createElement) as (...args: unknown[]) => unknown;
const createElement = (...args: unknown[]) =>
_useStubReactForUltrareview ? stubCreateElement(args[0], args[1], ...args.slice(2)) : realCreate(...args);
return {
..._realReactMod,
default: {
...((_realReactMod.default as Record<string, unknown> | undefined) ?? {}),
createElement,
},
createElement,
};
});
// Spread real ink + flag-gate the stub components. Without spread, the bare
// { Box: 'Box', Dialog: 'Dialog', Text: 'Text' } leaks into every later test
// file (e.g. AgentsPlatformView.test.tsx) that imports @anthropic/ink — those
// consumers receive strings instead of real components and rendering breaks.
mock.module('@anthropic/ink', () => {
if (_useStubInkForUltrareview) {
return {
..._realInkMod,
Box: 'Box',
Dialog: 'Dialog',
Text: 'Text',
};
}
return _realInkMod;
});
mock.module('src/components/CustomSelect/select.js', () => ({
Select: 'Select',
}));
// UltrareviewOverageDialog and PreflightDialog — return a simple marker
mock.module('src/commands/review/UltrareviewOverageDialog.js', () => ({
UltrareviewOverageDialog: () => ({ type: 'UltrareviewOverageDialog' }),
}));
mock.module('src/commands/review/UltrareviewPreflightDialog.js', () => ({
UltrareviewPreflightDialog: () => ({ type: 'UltrareviewPreflightDialog' }),
}));
import { call } from '../ultrareviewCommand.js';
const makeContext = () =>
({
abortController: { signal: {} },
}) as Parameters<typeof call>[1];
describe('ultrareviewCommand.call: gate branches', () => {
// Reset gate + launch state between tests so a previous test's mutation
// doesn't leak into the next.
beforeEach(() => {
_gateResult = { kind: 'proceed', billingNote: '' };
_launchResult = [{ type: 'text', text: 'Launched successfully.' }];
_capturedLaunchArgs.length = 0;
});
test('proceed gate: forwards billingNote to launchRemoteReview, calls onDone, returns null', async () => {
_gateResult = { kind: 'proceed', billingNote: ' Free review 1 of 5.' };
const messages: string[] = [];
const onDone = (msg: string) => messages.push(msg);
const result = await call(onDone as Parameters<typeof call>[0], makeContext(), '');
expect(result).toBeNull();
expect(messages.length).toBe(1);
expect(messages[0]).toContain('Launched successfully');
// launchRemoteReview was invoked exactly once with the empty args.
expect(_capturedLaunchArgs).toEqual(['']);
});
test('not-enabled gate: onDone with paywall message, returns null', async () => {
_gateResult = { kind: 'not-enabled' };
const messages: string[] = [];
const opts: Array<unknown> = [];
const onDone = (msg: string, opt: unknown) => {
messages.push(msg);
opts.push(opt);
};
const result = await call(onDone as Parameters<typeof call>[0], makeContext(), '');
expect(result).toBeNull();
expect(messages).toHaveLength(1);
expect(messages[0]).toContain('Free ultrareviews used');
expect(messages[0]).toContain('claude.ai/settings/billing');
expect((opts[0] as { display: string }).display).toBe('system');
// launchRemoteReview must NOT be called when paywalled.
expect(_capturedLaunchArgs).toEqual([]);
});
test('low-balance gate: onDone with balance-too-low message including available amount, returns null', async () => {
_gateResult = { kind: 'low-balance', available: 4.5 };
const messages: string[] = [];
const opts: Array<unknown> = [];
const onDone = (msg: string, opt: unknown) => {
messages.push(msg);
opts.push(opt);
};
const result = await call(onDone as Parameters<typeof call>[0], makeContext(), '');
expect(result).toBeNull();
expect(messages).toHaveLength(1);
expect(messages[0]).toContain('Balance too low');
expect(messages[0]).toContain('$4.50');
expect(messages[0]).toContain('claude.ai/settings/billing');
expect((opts[0] as { display: string }).display).toBe('system');
expect(_capturedLaunchArgs).toEqual([]);
});
test('needs-confirm gate: returns UltrareviewOverageDialog React element, does not launch', async () => {
_gateResult = { kind: 'needs-confirm' };
const messages: string[] = [];
const onDone = (msg: string) => messages.push(msg);
const result = await call(onDone as Parameters<typeof call>[0], makeContext(), '');
// Returns a React element rather than null.
expect(result).not.toBeNull();
expect(typeof result).toBe('object');
const element = result as { type: unknown };
expect(element.type).toBeDefined();
// No onDone call until the user interacts with the dialog.
expect(messages).toEqual([]);
expect(_capturedLaunchArgs).toEqual([]);
});
test('proceed gate + launchRemoteReview returns null: onDone with failure message', async () => {
_gateResult = { kind: 'proceed', billingNote: '' };
_launchResult = null; // teleport / non-github failure path
const messages: string[] = [];
const opts: Array<unknown> = [];
const onDone = (msg: string, opt: unknown) => {
messages.push(msg);
opts.push(opt);
};
const result = await call(onDone as Parameters<typeof call>[0], makeContext(), '');
expect(result).toBeNull();
expect(messages).toHaveLength(1);
expect(messages[0]).toContain('Ultrareview failed to launch');
expect((opts[0] as { display: string }).display).toBe('system');
});
test('proceed gate: forwards args (e.g. PR number) verbatim to launchRemoteReview', async () => {
_gateResult = { kind: 'proceed', billingNote: '' };
const messages: string[] = [];
const onDone = (msg: string) => messages.push(msg);
await call(onDone as Parameters<typeof call>[0], makeContext(), '42');
// ultrareviewCommand.call doesn't parse args itself — launchRemoteReview
// is responsible for PR-number detection. So we only assert pass-through.
expect(_capturedLaunchArgs).toEqual(['42']);
});
});

View File

@@ -0,0 +1,128 @@
import React, { useEffect, useState } from 'react';
import { formatCost } from '../cost-tracker.js';
import { Box, Text } from '@anthropic/ink';
import { formatTokens } from '../utils/format.js';
import { useTerminalSize } from '../hooks/useTerminalSize.js';
type RateLimitBucket = {
utilization: number;
resets_at: number;
};
type BuiltinStatusLineProps = {
modelName: string;
contextUsedPct: number;
usedTokens: number;
contextWindowSize: number;
totalCostUsd: number;
rateLimits: {
five_hour?: RateLimitBucket;
seven_day?: RateLimitBucket;
};
};
/**
* Format a countdown from now until the given epoch time (in seconds).
* Returns a compact human-readable string like "3h12m", "5d20h", "45m", or "now".
*/
export function formatCountdown(epochSeconds: number): string {
const diff = epochSeconds - Date.now() / 1000;
if (diff <= 0) return 'now';
const days = Math.floor(diff / 86400);
const hours = Math.floor((diff % 86400) / 3600);
const minutes = Math.floor((diff % 3600) / 60);
if (days >= 1) return `${days}d${hours}h`;
if (hours >= 1) return `${hours}h${minutes}m`;
return `${minutes}m`;
}
function Separator() {
return <Text dimColor>{' \u2502 '}</Text>;
}
function BuiltinStatusLineInner({
modelName,
contextUsedPct,
usedTokens,
contextWindowSize,
totalCostUsd,
rateLimits,
}: BuiltinStatusLineProps) {
const { columns } = useTerminalSize();
// Force re-render every 60s so countdowns stay current
const [tick, setTick] = useState(0);
useEffect(() => {
const hasResetTime = (rateLimits.five_hour?.resets_at ?? 0) || (rateLimits.seven_day?.resets_at ?? 0);
if (!hasResetTime) return;
const id = setInterval(() => setTick(t => t + 1), 60_000);
return () => clearInterval(id);
}, [rateLimits.five_hour?.resets_at, rateLimits.seven_day?.resets_at]);
// Suppress unused-variable lint for tick (it exists only to trigger re-renders)
void tick;
// Model display: use first two words (e.g. "Opus 4.6") instead of just first word
const modelParts = modelName.split(' ');
const shortModel = modelParts.length >= 2 ? `${modelParts[0]} ${modelParts[1]}` : modelName;
const narrow = columns < 60;
const hasFiveHour = rateLimits.five_hour != null;
const hasSevenDay = rateLimits.seven_day != null;
const fiveHourPct = hasFiveHour ? Math.round(rateLimits.five_hour!.utilization * 100) : 0;
const sevenDayPct = hasSevenDay ? Math.round(rateLimits.seven_day!.utilization * 100) : 0;
// Token display: "50k/1M"
const tokenDisplay = `${formatTokens(usedTokens)}/${formatTokens(contextWindowSize)}`;
return (
<Box>
{/* Model name */}
<Text>{shortModel}</Text>
{/* Context usage with token counts */}
<Separator />
<Text dimColor>Context </Text>
<Text>{contextUsedPct}%</Text>
{!narrow && <Text dimColor> ({tokenDisplay})</Text>}
{/* 5-hour session rate limit */}
{hasFiveHour && (
<>
<Separator />
<Text dimColor>Session </Text>
<Text>{fiveHourPct}%</Text>
{!narrow && rateLimits.five_hour!.resets_at > 0 && (
<Text dimColor> {formatCountdown(rateLimits.five_hour!.resets_at)}</Text>
)}
</>
)}
{/* 7-day weekly rate limit */}
{hasSevenDay && (
<>
<Separator />
<Text dimColor>Weekly </Text>
<Text>{sevenDayPct}%</Text>
{!narrow && rateLimits.seven_day!.resets_at > 0 && (
<Text dimColor> {formatCountdown(rateLimits.seven_day!.resets_at)}</Text>
)}
</>
)}
{/* Cost */}
{totalCostUsd > 0 && (
<>
<Separator />
<Text>{formatCost(totalCostUsd)}</Text>
</>
)}
</Box>
);
}
export const BuiltinStatusLine = React.memo(BuiltinStatusLineInner);

View File

@@ -1,6 +1,6 @@
import { feature } from 'bun:bundle';
import * as React from 'react';
import { memo, useCallback, useEffect, useRef } from 'react';
import { memo, useCallback, useEffect, useRef, useState } from 'react';
import { logEvent } from 'src/services/analytics/index.js';
import { useAppState, useSetAppState } from 'src/state/AppState.js';
import type { PermissionMode } from 'src/utils/permissions/PermissionMode.js';
@@ -42,12 +42,128 @@ import { getCurrentSessionTitle } from '../utils/sessionStorage.js';
import { doesMostRecentAssistantMessageExceed200k, getCurrentUsage } from '../utils/tokens.js';
import { getCurrentWorktreeSession } from '../utils/worktree.js';
import { isVimModeEnabled } from './PromptInput/utils.js';
import { computeHitRate, tokenSignature } from '../utils/cacheStats.js';
import { onResponse as cacheOnResponse, getCacheStatsState, initCacheStatsState } from '../utils/cacheStatsState.js';
import { BuiltinStatusLine } from './BuiltinStatusLine.js';
// ---------------------------------------------------------------------------
// CachePill — cache hit-rate + 1-hour TTL countdown pill
// ---------------------------------------------------------------------------
const CACHE_TTL_MS = 60 * 60 * 1000; // 60 minutes
function padTwo(n: number): string {
return String(Math.floor(n)).padStart(2, '0');
}
function formatCountdown(remainingMs: number): string {
if (remainingMs <= 0) return 'exp';
const mins = Math.floor(remainingMs / 60_000);
const secs = Math.floor((remainingMs % 60_000) / 1000);
return `${padTwo(mins)}:${padTwo(secs)}`;
}
type CachePillProps = {
messages: Message[];
};
function CachePill({ messages }: CachePillProps): React.ReactNode {
const [now, setNow] = useState(() => Date.now());
const [isFlashOn, setIsFlashOn] = useState(true);
const usage = getCurrentUsage(messages);
// Feed new responses into the in-memory singleton
const prevSigRef = useRef<string | null>(null);
if (usage !== null) {
const sig = tokenSignature(usage);
if (sig !== prevSigRef.current) {
prevSigRef.current = sig;
cacheOnResponse(usage);
}
}
const cacheState = getCacheStatsState();
const { lastResetAt, lastHitRate } = cacheState;
// Derived timing
const elapsed = lastResetAt !== null ? now - lastResetAt : null;
const remaining = elapsed !== null ? CACHE_TTL_MS - elapsed : null;
const elapsedMin = elapsed !== null ? elapsed / 60_000 : null;
const isExpired = remaining !== null && remaining <= 0;
// 1-second countdown ticker
useEffect(() => {
const id = setInterval(() => setNow(Date.now()), 1000);
return () => clearInterval(id);
}, []);
// 500ms flash in last 5 minutes
const inFlashZone = elapsedMin !== null && elapsedMin >= 55 && !isExpired;
useEffect(() => {
if (!inFlashZone) {
setIsFlashOn(true);
return;
}
const id = setInterval(() => setIsFlashOn(v => !v), 500);
return () => clearInterval(id);
}, [inFlashZone]);
// Load persisted fallback once on mount
const initDoneRef = useRef(false);
useEffect(() => {
if (initDoneRef.current) return;
initDoneRef.current = true;
const sid = getSessionId();
void initCacheStatsState(sid);
}, []);
const displayHitRate = usage !== null ? computeHitRate(usage) : lastHitRate;
// No data yet — show placeholder
if (displayHitRate === null && lastResetAt === null) {
return <Text dimColor>{' Cache --% --:--'}</Text>;
}
const countdownText = remaining !== null ? formatCountdown(remaining) : '--:--';
const hitRateText = displayHitRate !== null ? `${displayHitRate}%` : '--%';
// Timer color by elapsed bucket — using theme keys
type TimerThemeKey = 'success' | 'warning' | 'error' | 'inactive';
let timerColor: TimerThemeKey;
if (isExpired || elapsedMin === null) {
timerColor = 'inactive';
} else if (elapsedMin < 20) {
timerColor = 'success';
} else if (elapsedMin < 40) {
timerColor = 'warning';
} else {
timerColor = 'error';
}
// Hit-rate color — using theme keys
const hitRateColor: 'success' | 'inactive' = displayHitRate !== null && displayHitRate >= 50 ? 'success' : 'inactive';
return (
<Text>
<Text dimColor>{' Cache '}</Text>
<Text color={hitRateColor}>{hitRateText}</Text>
<Text color={timerColor} dimColor={inFlashZone && !isFlashOn}>
{' '}
{countdownText}
</Text>
</Text>
);
}
export function statusLineShouldDisplay(settings: ReadonlySettings): boolean {
// Assistant mode: statusline fields (model, permission mode, cwd) reflect the
// REPL/daemon process, not what the agent child is actually running. Hide it.
if (feature('KAIROS') && getKairosActive()) return false;
return settings?.statusLine !== undefined;
// Render only when the user has explicitly toggled it on via `/statusline`.
// Default off keeps the REPL clean for users who don't want the extra row;
// /statusline flips `statusLineEnabled` in settings.json.
return settings?.statusLineEnabled === true;
}
function buildStatusLineCommandInput(
@@ -222,6 +338,13 @@ function StatusLineInner({ messagesRef, lastAssistantMessageId, vimMode }: Props
const logResult = logNextResultRef.current;
logNextResultRef.current = false;
// Skip the shell command path entirely when no command is configured.
// The top row (BuiltinStatusLine + CachePill) renders unconditionally, so
// there's nothing to update here when settings.statusLine is missing.
if (!settingsRef.current?.statusLine?.command) {
return;
}
try {
let exceeds200kTokens = previousStateRef.current.exceeds200kTokens;
@@ -288,15 +411,6 @@ function StatusLineInner({ messagesRef, lastAssistantMessageId, vimMode }: Props
}
}, [lastAssistantMessageId, permissionMode, vimMode, mainLoopModel, scheduleUpdate]);
// Time-driven refresh: tick setInterval(refreshInterval seconds) through the
// existing debounced scheduleUpdate so interval + message-change don't double-fire.
const refreshIntervalMs = (settings?.statusLine?.refreshInterval ?? 0) * 1000;
useEffect(() => {
if (refreshIntervalMs <= 0) return;
const id = setInterval(() => scheduleUpdate(), refreshIntervalMs);
return () => clearInterval(id);
}, [refreshIntervalMs, scheduleUpdate]);
// When the statusLine command changes (hot reload), log the next result
const statusLineCommand = settings?.statusLine?.command;
const isFirstSettingsRender = useRef(true);
@@ -353,12 +467,57 @@ function StatusLineInner({ messagesRef, lastAssistantMessageId, vimMode }: Props
// Get padding from settings or default to 0
const paddingX = settings?.statusLine?.padding ?? 0;
// StatusLine must have stable height in fullscreen — the footer is
// flexShrink:0 so a 0→1 row change when the command finishes steals
// a row from ScrollBox and shifts content. Reserve the row while loading
// (same trick as PromptInputFooterLeftSide).
// ---- Top row data: feed BuiltinStatusLine (model + ctx + 5h + 7d + cost) ---
const builtinRuntimeModel = getRuntimeMainLoopModel({
permissionMode,
mainLoopModel,
exceeds200kTokens: previousStateRef.current.exceeds200kTokens,
});
const builtinContextWindowSize = getContextWindowForModel(builtinRuntimeModel, getSdkBetas());
const builtinCurrentUsage = getCurrentUsage(messagesRef.current);
const builtinUsedTokens = builtinCurrentUsage
? builtinCurrentUsage.input_tokens +
builtinCurrentUsage.cache_creation_input_tokens +
builtinCurrentUsage.cache_read_input_tokens
: 0;
const builtinContextPct = builtinCurrentUsage
? Math.round(calculateContextPercentages(builtinCurrentUsage, builtinContextWindowSize).used ?? 0)
: 0;
const builtinRawUtil = getRawUtilization();
const builtinRateLimits = {
...(builtinRawUtil.five_hour && {
five_hour: {
utilization: builtinRawUtil.five_hour.utilization,
resets_at: builtinRawUtil.five_hour.resets_at,
},
}),
...(builtinRawUtil.seven_day && {
seven_day: {
utilization: builtinRawUtil.seven_day.utilization,
resets_at: builtinRawUtil.seven_day.resets_at,
},
}),
};
// StatusLine has stable height — flexShrink:0 footer means row count changes
// would steal from ScrollBox. We always render 2 rows (top: BuiltinStatusLine
// + Cache pill, bottom: shell command stdout reservation) to keep height
// stable across loading/configured/empty states.
return (
<Box paddingX={paddingX} gap={2}>
<Box flexDirection="column" paddingX={paddingX}>
{/* Top: built-in fork status (model | ctx | 5h | 7d | cost) + Cache pill */}
<Box gap={2}>
<BuiltinStatusLine
modelName={renderModelName(builtinRuntimeModel)}
contextUsedPct={builtinContextPct}
usedTokens={builtinUsedTokens}
contextWindowSize={builtinContextWindowSize}
totalCostUsd={getTotalCost()}
rateLimits={builtinRateLimits}
/>
<CachePill messages={messagesRef.current} />
</Box>
{/* Bottom: user-configured /statusline shell stdout (reserves row in fullscreen) */}
{statusLineText ? (
<Text dimColor wrap="truncate">
<Ansi>{statusLineText}</Ansi>

View File

@@ -0,0 +1,190 @@
/**
* Tests for the CachePill helper logic in StatusLine.
*
* CachePill is a React/Ink component — rendering it in a headless test
* environment is fragile (requires Ink's renderer, theme provider, etc.).
* Instead we test the pure helper functions that power it directly, which
* gives deterministic, fast unit coverage of all color-stage logic.
*/
import { describe, test, expect } from 'bun:test';
import { computeHitRate } from '../../utils/cacheStats.js';
// ---------------------------------------------------------------------------
// Re-export helpers that mirror CachePill internal logic for unit testing
// ---------------------------------------------------------------------------
const CACHE_TTL_MS = 60 * 60 * 1000;
function padTwo(n: number): string {
return String(Math.floor(n)).padStart(2, '0');
}
function formatCountdown(remainingMs: number): string {
if (remainingMs <= 0) return 'exp';
const mins = Math.floor(remainingMs / 60_000);
const secs = Math.floor((remainingMs % 60_000) / 1000);
return `${padTwo(mins)}:${padTwo(secs)}`;
}
type TimerThemeKey = 'success' | 'warning' | 'error' | 'inactive';
function timerColor(elapsedMin: number | null, isExpired: boolean): TimerThemeKey {
if (isExpired || elapsedMin === null) return 'inactive';
if (elapsedMin < 20) return 'success';
if (elapsedMin < 40) return 'warning';
return 'error';
}
function hitRateColor(rate: number | null): 'success' | 'inactive' {
return rate !== null && rate >= 50 ? 'success' : 'inactive';
}
// ---------------------------------------------------------------------------
// formatCountdown
// ---------------------------------------------------------------------------
describe('formatCountdown', () => {
test('formats full 60 minutes as 60:00', () => {
expect(formatCountdown(CACHE_TTL_MS)).toBe('60:00');
});
test('formats 59 minutes 43 seconds correctly', () => {
const ms = 59 * 60_000 + 43 * 1000;
expect(formatCountdown(ms)).toBe('59:43');
});
test('formats sub-minute as 00:SS', () => {
expect(formatCountdown(30_000)).toBe('00:30');
});
test('returns "exp" when remainingMs is 0', () => {
expect(formatCountdown(0)).toBe('exp');
});
test('returns "exp" when remainingMs is negative', () => {
expect(formatCountdown(-1000)).toBe('exp');
});
test('pads single-digit minutes and seconds', () => {
// 5 min 7 sec
expect(formatCountdown(5 * 60_000 + 7_000)).toBe('05:07');
});
});
// ---------------------------------------------------------------------------
// Color stages — 4 thresholds
// ---------------------------------------------------------------------------
describe('timerColor stages', () => {
test('green (success) when elapsed < 20 min', () => {
expect(timerColor(0, false)).toBe('success');
expect(timerColor(10, false)).toBe('success');
expect(timerColor(19.9, false)).toBe('success');
});
test('yellow (warning) when 20 <= elapsed < 40 min', () => {
expect(timerColor(20, false)).toBe('warning');
expect(timerColor(30, false)).toBe('warning');
expect(timerColor(39.9, false)).toBe('warning');
});
test('red (error) when 40 <= elapsed < 60 min', () => {
expect(timerColor(40, false)).toBe('error');
expect(timerColor(55, false)).toBe('error');
expect(timerColor(59.9, false)).toBe('error');
});
test('gray (inactive) when expired', () => {
expect(timerColor(60, true)).toBe('inactive');
expect(timerColor(90, true)).toBe('inactive');
});
test('gray (inactive) when no elapsed data', () => {
expect(timerColor(null, false)).toBe('inactive');
});
});
// ---------------------------------------------------------------------------
// Flash zone — last 5 minutes (elapsed >= 55)
// ---------------------------------------------------------------------------
describe('flash zone detection', () => {
test('not in flash zone at 54.9 min', () => {
const elapsedMin = 54.9;
const inFlashZone = elapsedMin >= 55 && !false;
expect(inFlashZone).toBe(false);
});
test('in flash zone at exactly 55 min', () => {
const elapsedMin = 55;
const inFlashZone = elapsedMin >= 55 && !false;
expect(inFlashZone).toBe(true);
});
test('NOT in flash zone when expired', () => {
const elapsedMin = 65;
const isExpired = true;
const inFlashZone = elapsedMin >= 55 && !isExpired;
expect(inFlashZone).toBe(false);
});
});
// ---------------------------------------------------------------------------
// Hit-rate color
// ---------------------------------------------------------------------------
describe('hitRateColor', () => {
test('success (green) when rate >= 50', () => {
expect(hitRateColor(50)).toBe('success');
expect(hitRateColor(75)).toBe('success');
expect(hitRateColor(100)).toBe('success');
});
test('inactive (gray) when rate < 50', () => {
expect(hitRateColor(49)).toBe('inactive');
expect(hitRateColor(0)).toBe('inactive');
});
test('inactive (gray) when rate is null', () => {
expect(hitRateColor(null)).toBe('inactive');
});
});
// ---------------------------------------------------------------------------
// computeHitRate integration (used in CachePill)
// ---------------------------------------------------------------------------
describe('computeHitRate used in CachePill', () => {
test('97% hit rate rounds correctly', () => {
// 97 read out of 100 total
const rate = computeHitRate({
input_tokens: 3,
cache_creation_input_tokens: 0,
cache_read_input_tokens: 97,
});
expect(rate).toBe(97);
});
test('null usage returns null rate', () => {
expect(computeHitRate(null)).toBeNull();
});
test('zero-token response returns null rate', () => {
expect(computeHitRate({ input_tokens: 0, cache_creation_input_tokens: 0, cache_read_input_tokens: 0 })).toBeNull();
});
});
// ---------------------------------------------------------------------------
// "exp" display when TTL expired
// ---------------------------------------------------------------------------
describe('expired display', () => {
test('formatCountdown returns "exp" at 0 remaining', () => {
expect(formatCountdown(0)).toBe('exp');
});
test('timerColor is inactive when isExpired=true', () => {
expect(timerColor(61, true)).toBe('inactive');
});
});

View File

@@ -1,6 +1,5 @@
import capitalize from 'lodash-es/capitalize.js';
import * as React from 'react';
import { useMemo } from 'react';
import { useMemo, useState } from 'react';
import {
type Command,
type CommandBase,
@@ -8,58 +7,45 @@ import {
getCommandName,
type PromptCommand,
} from '../../commands.js';
import { Box, Text } from '@anthropic/ink';
import { Box, FuzzyPicker, Text } from '@anthropic/ink';
import type { Theme } from '@anthropic/ink';
import { estimateSkillFrontmatterTokens, getSkillsPath } from '../../skills/loadSkillsDir.js';
import { getDisplayPath } from '../../utils/file.js';
import { estimateSkillFrontmatterTokens } from '../../skills/loadSkillsDir.js';
import { formatTokens } from '../../utils/format.js';
import { getSettingSourceName, type SettingSource } from '../../utils/settings/constants.js';
import { plural } from '../../utils/stringUtils.js';
import { ConfigurableShortcutHint } from '../ConfigurableShortcutHint.js';
import { Dialog } from '@anthropic/ink';
import { filterSkills } from './filterSkills.js';
// Skills are always PromptCommands with CommandBase properties
type SkillCommand = CommandBase & PromptCommand;
type SkillSource = SettingSource | 'plugin' | 'mcp';
const ORDERED_SOURCES: SkillSource[] = [
'projectSettings',
'localSettings',
'userSettings',
'flagSettings',
'policySettings',
'plugin',
'mcp',
];
type Props = {
onExit: (result?: string, options?: { display?: CommandResultDisplay }) => void;
commands: Command[];
};
function getSourceTitle(source: SkillSource): string {
if (source === 'plugin') {
return 'Plugin skills';
}
if (source === 'mcp') {
return 'MCP skills';
}
return `${capitalize(getSettingSourceName(source))} skills`;
}
function getSourceSubtitle(source: SkillSource, skills: SkillCommand[]): string | undefined {
// MCP skills show server names; file-based skills show filesystem paths.
// Skill names are `<server>:<skill>`, not `mcp__<server>__…`.
if (source === 'mcp') {
const servers = [
...new Set(
skills
.map(s => {
const idx = s.name.indexOf(':');
return idx > 0 ? s.name.slice(0, idx) : null;
})
.filter((n): n is string => n != null),
),
];
return servers.length > 0 ? servers.join(', ') : undefined;
}
const skillsPath = getDisplayPath(getSkillsPath(source, 'skills'));
const hasCommandsSkills = skills.some(s => s.loadedFrom === 'commands_DEPRECATED');
return hasCommandsSkills ? `${skillsPath}, ${getDisplayPath(getSkillsPath(source, 'commands'))}` : skillsPath;
function getSourceLabel(source: SkillSource): string {
if (source === 'plugin') return 'plugin';
if (source === 'mcp') return 'mcp';
return getSettingSourceName(source);
}
export function SkillsMenu({ onExit, commands }: Props): React.ReactNode {
const [searchQuery, setSearchQuery] = useState('');
// Filter commands for skills and cast to SkillCommand
const skills = useMemo(() => {
return commands.filter(
@@ -72,6 +58,18 @@ export function SkillsMenu({ onExit, commands }: Props): React.ReactNode {
);
}, [commands]);
// Apply type-to-filter: build SkillItem-shaped projections and filter
const filteredSkills = useMemo(() => {
return filterSkills(
skills.map(s => ({
...s,
name: getCommandName(s),
description: s.description ?? '',
})),
searchQuery,
);
}, [skills, searchQuery]);
const skillsBySource = useMemo((): Record<SkillSource, SkillCommand[]> => {
const groups: Record<SkillSource, SkillCommand[]> = {
policySettings: [],
@@ -83,7 +81,7 @@ export function SkillsMenu({ onExit, commands }: Props): React.ReactNode {
mcp: [],
};
for (const skill of skills) {
for (const skill of filteredSkills) {
const source = skill.source as SkillSource;
if (source in groups) {
groups[source].push(skill);
@@ -95,7 +93,7 @@ export function SkillsMenu({ onExit, commands }: Props): React.ReactNode {
}
return groups;
}, [skills]);
}, [filteredSkills]);
const handleCancel = (): void => {
onExit('Skills dialog dismissed', { display: 'system' });
@@ -126,62 +124,53 @@ export function SkillsMenu({ onExit, commands }: Props): React.ReactNode {
}
};
const renderSkill = (skill: SkillCommand) => {
const renderSkillItem = (skill: SkillCommand, isFocused: boolean) => {
const estimatedTokens = estimateSkillFrontmatterTokens(skill);
const tokenDisplay = `~${formatTokens(estimatedTokens)}`;
const pluginName = skill.source === 'plugin' ? skill.pluginInfo?.pluginManifest.name : undefined;
const scopeTag = getScopeTag(skill.source);
return (
<Box key={`${skill.name}-${skill.source}`}>
<Text>{getCommandName(skill)}</Text>
<Box>
<Text color={isFocused ? ('suggestion' as keyof Theme) : undefined}>{getCommandName(skill)}</Text>
{scopeTag && <Text color={scopeTag.color as keyof Theme}> [{scopeTag.label}]</Text>}
<Text dimColor>
{pluginName ? ` · ${pluginName}` : ''} · {tokenDisplay} description tokens
{pluginName ? ` · ${pluginName}` : ''} · {getSourceLabel(skill.source as SkillSource)} · {tokenDisplay} tokens
</Text>
</Box>
);
};
const renderSkillGroup = (source: SkillSource) => {
const groupSkills = skillsBySource[source];
if (groupSkills.length === 0) return null;
// Flat ordered list of filtered skills preserving source grouping order
const orderedFilteredSkills = useMemo(() => {
return ORDERED_SOURCES.flatMap(source => skillsBySource[source]);
}, [skillsBySource]);
const title = getSourceTitle(source);
const subtitle = getSourceSubtitle(source, groupSkills);
return (
<Box flexDirection="column" key={source}>
<Box>
<Text bold dimColor>
{title}
</Text>
{subtitle && <Text dimColor> ({subtitle})</Text>}
</Box>
{groupSkills.map(skill => renderSkill(skill))}
</Box>
);
};
const subtitle =
searchQuery.trim() === ''
? `${skills.length} ${plural(skills.length, 'skill')}`
: `${filteredSkills.length}/${skills.length} ${plural(skills.length, 'skill')}`;
// Source group headers — rendered as section labels inside the picker list
// via renderItem. We annotate each item with its source to detect group
// boundary changes.
return (
<Dialog
<FuzzyPicker
title="Skills"
subtitle={`${skills.length} ${plural(skills.length, 'skill')}`}
placeholder="Type to filter skills…"
items={orderedFilteredSkills}
getKey={s => `${s.name}-${s.source}`}
visibleCount={12}
direction="down"
onQueryChange={setSearchQuery}
onSelect={skill => {
onExit(`/${getCommandName(skill)}`, { display: 'user' });
}}
onCancel={handleCancel}
hideInputGuide
>
<Box flexDirection="column" gap={1}>
{renderSkillGroup('projectSettings')}
{renderSkillGroup('localSettings')}
{renderSkillGroup('userSettings')}
{renderSkillGroup('flagSettings')}
{renderSkillGroup('policySettings')}
{renderSkillGroup('plugin')}
{renderSkillGroup('mcp')}
</Box>
<Text dimColor italic>
<ConfigurableShortcutHint action="confirm:no" context="Confirmation" fallback="Esc" description="close" />
</Text>
</Dialog>
emptyMessage={q => (q.trim() ? `No skills matching "${q.trim()}"` : 'No skills found')}
matchLabel={subtitle}
selectAction="invoke skill"
renderItem={(skill, isFocused) => renderSkillItem(skill, isFocused)}
/>
);
}

View File

@@ -0,0 +1,68 @@
import { describe, expect, test } from 'bun:test'
import { filterSkills } from '../filterSkills.js'
import type { SkillItem } from '../filterSkills.js'
function makeSkill(name: string, description = ''): SkillItem {
return { name, description }
}
describe('filterSkills', () => {
const skills: SkillItem[] = [
makeSkill('tdd-guide', 'Test-driven development guide'),
makeSkill('code-reviewer', 'Review code quality and patterns'),
makeSkill('security-reviewer', 'Security vulnerability analysis'),
makeSkill('refactor-cleaner', 'Dead code cleanup and refactoring'),
makeSkill('planner', 'Implementation planning for complex features'),
makeSkill('architect', 'System design and architecture decisions'),
]
test('empty query returns all skills', () => {
const result = filterSkills(skills, '')
expect(result).toEqual(skills)
})
test('partial name match returns matching skills', () => {
const result = filterSkills(skills, 'review')
const names = result.map(s => s.name)
expect(names).toContain('code-reviewer')
expect(names).toContain('security-reviewer')
expect(names).not.toContain('planner')
})
test('no match returns empty array', () => {
const result = filterSkills(skills, 'zzznomatch')
expect(result).toHaveLength(0)
})
test('case insensitive match', () => {
const result = filterSkills(skills, 'TDD')
expect(result.map(s => s.name)).toContain('tdd-guide')
})
test('matches description when name does not match', () => {
const result = filterSkills(skills, 'dead code')
expect(result.map(s => s.name)).toContain('refactor-cleaner')
})
test('multi-word query matches skills containing any word', () => {
// "code review" should match both code-reviewer (name) and tdd-guide (description has "Test" but not code review)
const result = filterSkills(skills, 'code review')
const names = result.map(s => s.name)
// code-reviewer matches both "code" and "review"
expect(names).toContain('code-reviewer')
})
test('clear query (reset to empty) returns all skills again', () => {
// First filter
const filtered = filterSkills(skills, 'security')
expect(filtered).toHaveLength(1)
// Then clear
const all = filterSkills(skills, '')
expect(all).toHaveLength(skills.length)
})
test('whitespace-only query returns all skills', () => {
const result = filterSkills(skills, ' ')
expect(result).toEqual(skills)
})
})

View File

@@ -0,0 +1,36 @@
/**
* Type-to-filter logic for the skills picker.
*
* Invariant: empty / whitespace-only query always returns all skills unchanged.
* Matching is case-insensitive; each whitespace-separated word in the query
* must appear in either the skill name or description.
*/
export type SkillItem = {
name: string
description: string
}
/**
* Filter `skills` by `query`. Returns a new array; never mutates input.
*
* - Empty/whitespace query → returns all skills.
* - Each word in the query must appear (case-insensitive) in the skill name
* OR description (AND-semantics per word, OR across name/description).
*/
export function filterSkills<T extends SkillItem>(
skills: readonly T[],
query: string,
): T[] {
const trimmed = query.trim()
if (trimmed === '') {
return skills.slice()
}
const words = trimmed.toLowerCase().split(/\s+/)
return skills.filter(skill => {
const haystack = `${skill.name} ${skill.description}`.toLowerCase()
return words.every(word => haystack.includes(word))
})
}

View File

@@ -71,9 +71,12 @@ const VALID_CONTEXTS: KeybindingContextName[] = [
'Tabs',
'Attachments',
'Footer',
'FormField',
'MessageActions',
'MessageSelector',
'DiffDialog',
'ModelPicker',
'Scroll',
'Select',
'Plugin',
]

View File

@@ -0,0 +1,410 @@
import { afterAll, describe, test, expect, mock, beforeEach } from 'bun:test'
import { homedir } from 'node:os'
import { join } from 'node:path'
// ── Mock infrastructure ─────────────────────────────────────────────────────
// All mock.module calls must precede the import of the module under test.
// mock.module is process-global; mocks here must cover all exported names used
// transitively so sibling test files are not broken by an incomplete mock.
//
// To prevent cross-file pollution (providers.test.ts, model.test.ts, skill
// prefetch / skillLearning smoke), keep the mock factory inline (don't
// pre-import real modules — that triggers heavy transitive deps and hangs
// some test combinations). The flag below switches off the suite-specific
// override after this file's tests finish.
let useMockForMagicDocs = true
afterAll(() => {
useMockForMagicDocs = false
})
// Inline a minimum env-driven default-model resolver so other test files
// (getDefaultOpusModel.test.ts) which assert env-var precedence still work
// even after our flag is off. The real getDefaultOpusModel reads provider
// env vars; we mirror that minimal logic here. Keep aligned with
// src/utils/model/model.ts's getDefaultOpusModel().
function resolveDefaultOpusModelForTests(): string {
// Highest priority: provider-specific env override.
if (process.env.CLAUDE_CODE_USE_OPENAI === '1') {
if (process.env.OPENAI_DEFAULT_OPUS_MODEL)
return process.env.OPENAI_DEFAULT_OPUS_MODEL
}
if (process.env.CLAUDE_CODE_USE_GEMINI === '1') {
if (process.env.GEMINI_DEFAULT_OPUS_MODEL)
return process.env.GEMINI_DEFAULT_OPUS_MODEL
}
// Cross-provider override.
if (process.env.ANTHROPIC_DEFAULT_OPUS_MODEL)
return process.env.ANTHROPIC_DEFAULT_OPUS_MODEL
// Provider-specific Opus 4.7 IDs (must match
// src/utils/model/configs.ts CLAUDE_OPUS_4_7_CONFIG).
if (process.env.CLAUDE_CODE_USE_BEDROCK === '1')
return 'us.anthropic.claude-opus-4-7-v1'
if (process.env.CLAUDE_CODE_USE_VERTEX === '1') return 'claude-opus-4-7'
if (process.env.CLAUDE_CODE_USE_FOUNDRY === '1') return 'claude-opus-4-7'
return 'claude-opus-4-7'
}
const mockGetMainLoopModel = mock(() => 'claude-opus-4-7')
const mockGetDisplayedEffortLevel = mock((): string => 'high')
const realIsEnvTruthy = (v: string | boolean | undefined): boolean => {
if (!v) return false
if (typeof v === 'boolean') return v
return ['1', 'true', 'yes', 'on'].includes(v.toLowerCase().trim())
}
// Inline the real firstPartyNameToCanonical logic so its semantics survive
// even after this suite's mock wins the registration race. Pre-importing
// model.ts hangs the test process due to heavy transitive deps, so we
// duplicate just this one pure function. Keep in sync with
// src/utils/model/model.ts.
function realFirstPartyNameToCanonical(name: string): string {
name = name.toLowerCase()
if (name.includes('claude-opus-4-7')) return 'claude-opus-4-7'
if (name.includes('claude-opus-4-6')) return 'claude-opus-4-6'
if (name.includes('claude-opus-4-5')) return 'claude-opus-4-5'
if (name.includes('claude-opus-4-1')) return 'claude-opus-4-1'
if (name.includes('claude-opus-4')) return 'claude-opus-4'
if (name.includes('claude-sonnet-4-6')) return 'claude-sonnet-4-6'
if (name.includes('claude-sonnet-4-5')) return 'claude-sonnet-4-5'
if (name.includes('claude-sonnet-4')) return 'claude-sonnet-4'
if (name.includes('claude-haiku-4-5')) return 'claude-haiku-4-5'
if (name.includes('claude-3-7-sonnet')) return 'claude-3-7-sonnet'
if (name.includes('claude-3-5-sonnet')) return 'claude-3-5-sonnet'
if (name.includes('claude-3-5-haiku')) return 'claude-3-5-haiku'
if (name.includes('claude-3-opus')) return 'claude-3-opus'
if (name.includes('claude-3-sonnet')) return 'claude-3-sonnet'
if (name.includes('claude-3-haiku')) return 'claude-3-haiku'
const m = name.match(/(claude-(\d+-\d+-)?\w+)/)
if (m && m[1]) return m[1]
return name
}
mock.module('src/utils/model/model.js', () => ({
getMainLoopModel: mockGetMainLoopModel,
getSmallFastModel: mock(() => 'claude-haiku'),
getUserSpecifiedModelSetting: mock(() => undefined),
getBestModel: mock(() => 'claude-opus-4-7'),
// Read env at call time so getDefaultOpusModel.test.ts (running in the same
// process) sees env-driven semantics. While useMockForMagicDocs is true
// (during this suite) we still want a stable default; otherwise we mirror
// the real env-precedence logic.
getDefaultOpusModel: mock(() =>
useMockForMagicDocs ? 'claude-opus-4-7' : resolveDefaultOpusModelForTests(),
),
getDefaultSonnetModel: mock(() => 'claude-sonnet-4-6'),
getDefaultHaikuModel: mock(() => 'claude-haiku-3-5'),
getRuntimeMainLoopModel: mock(() => 'claude-opus-4-7'),
getDefaultMainLoopModelSetting: mock(() => 'claude-opus-4-7'),
getDefaultMainLoopModel: mock(() => 'claude-opus-4-7'),
// Real semantics inlined for firstPartyNameToCanonical so model.test.ts
// (which only checks pure-function input/output) passes without needing
// the heavy real-module load.
firstPartyNameToCanonical: mock((n: string) =>
realFirstPartyNameToCanonical(n),
),
getCanonicalName: mock((n: string) => n),
getClaudeAiUserDefaultModelDescription: mock(() => ''),
renderDefaultModelSetting: mock(() => ''),
getOpusPricingSuffix: mock(() => ''),
isOpus1mMergeEnabled: mock(() => false),
renderModelSetting: mock((s: string) => s),
getPublicModelDisplayName: mock(() => null),
renderModelName: mock((n: string) => n),
getPublicModelName: mock((n: string) => n),
parseUserSpecifiedModel: mock((m: string) => m),
resolveSkillModelOverride: mock(() => undefined),
isLegacyModelRemapEnabled: mock(() => false),
modelDisplayString: mock(() => ''),
getMarketingNameForModel: mock(() => undefined),
normalizeModelStringForAPI: mock((m: string) => m),
isNonCustomOpusModel: mock(() => false),
}))
mock.module('src/utils/effort.js', () => ({
getDisplayedEffortLevel: mockGetDisplayedEffortLevel as (
_m: string,
_e: unknown,
) => string,
getEffortEnvOverride: mock(() => undefined),
resolveAppliedEffort: mock(() => 'high'),
getInitialEffortSetting: mock(() => undefined),
parseEffortValue: mock(() => undefined),
toPersistableEffort: mock(() => undefined),
modelSupportsEffort: mock(() => true),
modelSupportsMaxEffort: mock(() => true),
modelSupportsXhighEffort: mock(() => false),
isEffortLevel: mock(() => true),
getEffortSuffix: mock(() => ''),
convertEffortValueToLevel: mock(() => 'high'),
getDefaultEffortForModel: mock(() => undefined),
getEffortLevelDescription: mock(() => ''),
getEffortValueDescription: mock(() => ''),
getOpusDefaultEffortConfig: mock(() => ({
enabled: true,
dialogTitle: '',
dialogDescription: '',
})),
resolvePickerEffortPersistence: mock(() => undefined),
isValidNumericEffort: mock(() => false),
EFFORT_LEVELS: ['low', 'medium', 'high', 'xhigh', 'max'],
}))
// Use REAL semantics for non-overridden envUtils exports — this mock is
// process-global, so envUtils.test.ts and other consumers running in the
// same process must see correct behavior for hasNodeOption, isBareMode,
// parseEnvVars, getVertexRegionForModel, etc. Only getClaudeConfigHomeDir
// is overridden to '/mock/home/.claude' while this suite runs.
const realIsEnvDefinedFalsy = (v: string | boolean | undefined): boolean => {
if (v === undefined) return false
if (typeof v === 'boolean') return !v
if (!v) return false
return ['0', 'false', 'no', 'off'].includes(v.toLowerCase().trim())
}
const realDefaultVertexRegion = (): string =>
process.env.CLOUD_ML_REGION || 'us-east5'
const VERTEX_REGION_OVERRIDES: ReadonlyArray<[string, string]> = [
['claude-haiku-4-5', 'VERTEX_REGION_CLAUDE_HAIKU_4_5'],
['claude-3-5-haiku', 'VERTEX_REGION_CLAUDE_3_5_HAIKU'],
['claude-3-5-sonnet', 'VERTEX_REGION_CLAUDE_3_5_SONNET'],
['claude-3-7-sonnet', 'VERTEX_REGION_CLAUDE_3_7_SONNET'],
['claude-opus-4-1', 'VERTEX_REGION_CLAUDE_4_1_OPUS'],
['claude-opus-4', 'VERTEX_REGION_CLAUDE_4_0_OPUS'],
['claude-sonnet-4-6', 'VERTEX_REGION_CLAUDE_4_6_SONNET'],
['claude-sonnet-4-5', 'VERTEX_REGION_CLAUDE_4_5_SONNET'],
['claude-sonnet-4', 'VERTEX_REGION_CLAUDE_4_0_SONNET'],
]
// Real getClaudeConfigHomeDir is memoized via lodash, so consumers may call
// `.cache.clear()` on it. Provide a no-op .cache stub.
const mockedGetClaudeConfigHomeDirMD: (() => string) & {
cache: { clear: () => void; get: (k: unknown) => unknown }
} = Object.assign(
() =>
useMockForMagicDocs
? '/mock/home/.claude'
: (process.env.CLAUDE_CONFIG_DIR ?? join(homedir(), '.claude')).normalize(
'NFC',
),
{ cache: { clear: () => {}, get: (_k: unknown) => undefined } },
)
mock.module('src/utils/envUtils.js', () => ({
getClaudeConfigHomeDir: mockedGetClaudeConfigHomeDirMD,
isEnvTruthy: realIsEnvTruthy,
getEnvBool: () => false,
getEnvNumber: () => undefined,
getVertexRegionForModel: (model: string | undefined) => {
if (model) {
const match = VERTEX_REGION_OVERRIDES.find(([prefix]) =>
model.startsWith(prefix),
)
if (match) {
return process.env[match[1]] || realDefaultVertexRegion()
}
}
return realDefaultVertexRegion()
},
getTeamsDir: () =>
join(
useMockForMagicDocs
? '/mock/home/.claude'
: (process.env.CLAUDE_CONFIG_DIR ?? join(homedir(), '.claude')),
'teams',
),
hasNodeOption: (flag: string) => {
const opts = process.env.NODE_OPTIONS
return !!opts && opts.split(/\s+/).includes(flag)
},
isEnvDefinedFalsy: realIsEnvDefinedFalsy,
isBareMode: () =>
realIsEnvTruthy(process.env.CLAUDE_CODE_SIMPLE) ||
process.argv.includes('--bare'),
parseEnvVars: (rawEnvArgs: string[] | undefined) => {
const parsed: Record<string, string> = {}
if (rawEnvArgs) {
for (const envStr of rawEnvArgs) {
const [key, ...valueParts] = envStr.split('=')
if (!key || valueParts.length === 0) {
throw new Error(
`Invalid environment variable format: ${envStr}, environment variables should be added as: -e KEY1=value1 -e KEY2=value2`,
)
}
parsed[key] = valueParts.join('=')
}
}
return parsed
},
getAWSRegion: () =>
process.env.AWS_REGION || process.env.AWS_DEFAULT_REGION || 'us-east-1',
getDefaultVertexRegion: realDefaultVertexRegion,
shouldMaintainProjectWorkingDir: () =>
realIsEnvTruthy(process.env.CLAUDE_BASH_MAINTAIN_PROJECT_WORKING_DIR),
isRunningOnHomespace: () =>
process.env.USER_TYPE === 'ant' &&
realIsEnvTruthy(process.env.COO_RUNNING_ON_HOMESPACE),
isInProtectedNamespace: () => false,
}))
// Mock the file system so loadMagicDocsPrompt() returns our controlled template
const mockReadFile = mock(
async (_path: string, _opts?: unknown): Promise<string> => {
throw Object.assign(new Error('ENOENT'), { code: 'ENOENT' })
},
)
// IMPORTANT: this file used to mock fsOperations wholesale (readdir → [],
// exists → false, …), which silently broke sibling tests that walk
// .claude/skills (skill prefetch, skillLearning smoke). After this suite
// finishes (useMockForMagicDocs flips to false), construct a minimal real
// fs adapter inline using node:fs/promises so cross-file consumers see real
// disk state — without pre-importing the heavy fsOperations module (its
// transitive deps stall bun:test). Avoid require()ing the real module
// inside the factory: that re-enters the same mock and infinite-loops.
import { promises as nodeFs, existsSync as nodeExistsSync } from 'node:fs'
const realFsAdapter = {
cwd: () => process.cwd(),
existsSync: (p: string) => nodeExistsSync(p),
stat: (p: string) => nodeFs.stat(p),
lstat: (p: string) => nodeFs.lstat(p),
readdir: (p: string) => nodeFs.readdir(p, { withFileTypes: true }),
unlink: (p: string) => nodeFs.unlink(p),
rmdir: (p: string) => nodeFs.rmdir(p),
rm: (p: string, options?: { recursive?: boolean; force?: boolean }) =>
nodeFs.rm(p, options),
mkdir: (p: string, options?: { recursive?: boolean }) =>
nodeFs.mkdir(p, options),
readFile: (
p: string,
options?: BufferEncoding | { encoding?: BufferEncoding },
) => {
const encoding =
typeof options === 'string' ? options : (options?.encoding ?? undefined)
return nodeFs.readFile(p, encoding)
},
writeFile: (p: string, data: string | Uint8Array) =>
nodeFs.writeFile(p, data),
rename: (oldPath: string, newPath: string) => nodeFs.rename(oldPath, newPath),
open: (p: string, flags: string | number) => nodeFs.open(p, flags),
realpath: (p: string) => nodeFs.realpath(p),
}
mock.module('src/utils/fsOperations.js', () => ({
getFsImplementation: () =>
useMockForMagicDocs
? ({
readFile: mockReadFile,
writeFile: mock(async () => {}),
exists: mock(async () => false),
mkdir: mock(async () => {}),
readdir: mock(async () => []),
stat: mock(async () => ({})),
unlink: mock(async () => {}),
} as unknown)
: (realFsAdapter as unknown),
}))
// ── Import module under test (after all mock.module calls) ──────────────────
import { buildMagicDocsUpdatePrompt } from '../prompts.js'
// ── Tests ───────────────────────────────────────────────────────────────────
describe('buildMagicDocsUpdatePrompt dynamic variable substitution', () => {
beforeEach(() => {
mockGetMainLoopModel.mockReturnValue('claude-opus-4-7')
mockGetDisplayedEffortLevel.mockReturnValue('high')
mockReadFile.mockImplementation(async () => {
throw Object.assign(new Error('ENOENT'), { code: 'ENOENT' })
})
})
test('substitutes {{CLAUDE_MODEL}} with the current model', async () => {
mockReadFile.mockImplementation(async () => 'Model: {{CLAUDE_MODEL}}')
mockGetMainLoopModel.mockReturnValue('claude-opus-4-7')
const result = await buildMagicDocsUpdatePrompt(
'contents',
'/doc.md',
'Title',
)
expect(result).toContain('Model: claude-opus-4-7')
expect(result).not.toContain('{{CLAUDE_MODEL}}')
})
test('substitutes {{CLAUDE_EFFORT}} with the current effort level', async () => {
mockReadFile.mockImplementation(async () => 'Effort: {{CLAUDE_EFFORT}}')
mockGetDisplayedEffortLevel.mockReturnValue('high')
const result = await buildMagicDocsUpdatePrompt(
'contents',
'/doc.md',
'Title',
)
expect(result).toContain('Effort: high')
expect(result).not.toContain('{{CLAUDE_EFFORT}}')
})
test('substitutes {{CLAUDE_CWD}} with process.cwd()', async () => {
mockReadFile.mockImplementation(async () => 'CWD: {{CLAUDE_CWD}}')
const result = await buildMagicDocsUpdatePrompt(
'contents',
'/doc.md',
'Title',
)
expect(result).toContain(`CWD: ${process.cwd()}`)
expect(result).not.toContain('{{CLAUDE_CWD}}')
})
test('substitutes all three dynamic variables in one template', async () => {
mockReadFile.mockImplementation(
async () =>
'effort={{CLAUDE_EFFORT}} model={{CLAUDE_MODEL}} cwd={{CLAUDE_CWD}}',
)
mockGetMainLoopModel.mockReturnValue('claude-sonnet-4-6')
mockGetDisplayedEffortLevel.mockReturnValue('medium')
const result = await buildMagicDocsUpdatePrompt(
'contents',
'/doc.md',
'Title',
)
expect(result).toContain('effort=medium')
expect(result).toContain('model=claude-sonnet-4-6')
expect(result).toContain(`cwd=${process.cwd()}`)
})
test('leaves unknown template variables unchanged', async () => {
mockReadFile.mockImplementation(
async () => '{{UNKNOWN_VAR}} {{CLAUDE_MODEL}}',
)
mockGetMainLoopModel.mockReturnValue('claude-opus-4-7')
const result = await buildMagicDocsUpdatePrompt(
'contents',
'/doc.md',
'Title',
)
expect(result).toContain('{{UNKNOWN_VAR}}')
expect(result).toContain('claude-opus-4-7')
})
test('existing substitution variables still work alongside new ones', async () => {
mockReadFile.mockImplementation(
async () =>
'{{docTitle}} effort={{CLAUDE_EFFORT}} model={{CLAUDE_MODEL}}',
)
mockGetMainLoopModel.mockReturnValue('claude-haiku')
mockGetDisplayedEffortLevel.mockReturnValue('low')
const result = await buildMagicDocsUpdatePrompt(
'contents',
'/doc.md',
'My Doc',
)
expect(result).toContain('My Doc')
expect(result).toContain('effort=low')
expect(result).toContain('model=claude-haiku')
})
})

View File

@@ -1,6 +1,8 @@
import { join } from 'path'
import { getClaudeConfigHomeDir } from '../../utils/envUtils.js'
import { getFsImplementation } from '../../utils/fsOperations.js'
import { getDisplayedEffortLevel } from '../../utils/effort.js'
import { getMainLoopModel } from '../../utils/model/model.js'
/**
* Get the Magic Docs update prompt template
@@ -114,11 +116,15 @@ These instructions take priority over the general rules below. Make sure your up
: ''
// Substitute variables in the prompt
const currentModel = getMainLoopModel()
const variables = {
docContents,
docPath,
docTitle,
customInstructions,
CLAUDE_EFFORT: getDisplayedEffortLevel(currentModel, undefined),
CLAUDE_MODEL: currentModel,
CLAUDE_CWD: process.cwd(),
}
return substituteVariables(promptTemplate, variables)

View File

@@ -0,0 +1,226 @@
/**
* Regression tests for fetchUltrareviewPreflight.
* Verifies all three action enum states (proceed/confirm/blocked),
* network/HTTP error handling, and Zod schema mismatch fallback.
*/
import { afterAll, beforeAll, describe, expect, mock, test } from 'bun:test'
import { debugMock } from '../../../../tests/mocks/debug.js'
import { logMock } from '../../../../tests/mocks/log.js'
import { setupAxiosMock } from '../../../../tests/mocks/axios.js'
// Mock dependency chain before any subject import
mock.module('src/utils/debug.ts', debugMock)
mock.module('src/utils/log.ts', logMock)
mock.module('src/services/analytics/index.js', () => ({
logEvent: () => {},
}))
// Mock auth utilities
mock.module('src/utils/auth.js', () => ({
isClaudeAISubscriber: () => true,
isTeamSubscriber: () => false,
isEnterpriseSubscriber: () => false,
}))
// Mock OAuth config
mock.module('src/constants/oauth.js', () => ({
getOauthConfig: () => ({ BASE_API_URL: 'https://api.anthropic.com' }),
}))
// Mock prepareApiRequest and getOAuthHeaders
mock.module('src/utils/teleport/api.js', () => ({
prepareApiRequest: async () => ({
accessToken: 'test-token',
orgUUID: 'org-uuid-test',
}),
getOAuthHeaders: (token: string) => ({
Authorization: `Bearer ${token}`,
'Content-Type': 'application/json',
'anthropic-version': '2023-06-01',
}),
}))
// We'll mock axios at module level.
// Typed as any in test code (CLAUDE.md: mock data may use as any).
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const mockAxiosPost = mock(async (..._args: any[]): Promise<any> => {
throw new Error('not configured')
})
const axiosHandle = setupAxiosMock()
axiosHandle.stubs.post = mockAxiosPost
axiosHandle.stubs.isAxiosError = (e: unknown) =>
typeof e === 'object' &&
e !== null &&
(e as { isAxiosError?: boolean }).isAxiosError === true
beforeAll(() => {
axiosHandle.useStubs = true
})
afterAll(() => {
axiosHandle.useStubs = false
})
import {
fetchUltrareviewPreflight,
type UltrareviewPreflightResponse,
} from '../ultrareviewPreflight.js'
describe('fetchUltrareviewPreflight', () => {
test('returns proceed action when server responds with proceed', async () => {
const serverResponse: UltrareviewPreflightResponse = {
action: 'proceed',
billing_note: null,
}
mockAxiosPost.mockImplementationOnce(async () => ({
status: 200,
data: serverResponse,
}))
const result = await fetchUltrareviewPreflight({ repo: 'owner/repo' })
expect(result).not.toBeNull()
expect(result?.action).toBe('proceed')
expect(result?.billing_note).toBeNull()
})
test('returns confirm action with billing_note when server responds with confirm', async () => {
const serverResponse: UltrareviewPreflightResponse = {
action: 'confirm',
billing_note: 'This run will cost approximately $2.50.',
}
mockAxiosPost.mockImplementationOnce(async () => ({
status: 200,
data: serverResponse,
}))
const result = await fetchUltrareviewPreflight({ repo: 'owner/repo' })
expect(result).not.toBeNull()
expect(result?.action).toBe('confirm')
expect(result?.billing_note).toBe('This run will cost approximately $2.50.')
})
test('returns blocked action when server responds with blocked', async () => {
const serverResponse: UltrareviewPreflightResponse = {
action: 'blocked',
billing_note: null,
}
mockAxiosPost.mockImplementationOnce(async () => ({
status: 200,
data: serverResponse,
}))
const result = await fetchUltrareviewPreflight({ repo: 'owner/repo' })
expect(result).not.toBeNull()
expect(result?.action).toBe('blocked')
})
test('returns null on schema mismatch (invalid action value)', async () => {
mockAxiosPost.mockImplementationOnce(async () => ({
status: 200,
data: { action: 'unknown_action', billing_note: null },
}))
const result = await fetchUltrareviewPreflight({ repo: 'owner/repo' })
expect(result).toBeNull()
})
test('returns null on network error (no response)', async () => {
const networkError = new Error('ECONNREFUSED')
;(networkError as unknown as { isAxiosError: boolean }).isAxiosError = true
mockAxiosPost.mockImplementationOnce(async () => {
throw networkError
})
const result = await fetchUltrareviewPreflight({ repo: 'owner/repo' })
expect(result).toBeNull()
})
test('returns null on 401 Unauthorized', async () => {
const authError = new Error('Unauthorized')
;(
authError as unknown as {
isAxiosError: boolean
response: { status: number }
}
).isAxiosError = true
;(authError as unknown as { response: { status: number } }).response = {
status: 401,
}
mockAxiosPost.mockImplementationOnce(async () => {
throw authError
})
const result = await fetchUltrareviewPreflight({ repo: 'owner/repo' })
expect(result).toBeNull()
})
test('returns null on 403 Forbidden', async () => {
const forbiddenError = new Error('Forbidden')
;(
forbiddenError as unknown as {
isAxiosError: boolean
response: { status: number }
}
).isAxiosError = true
;(forbiddenError as unknown as { response: { status: number } }).response =
{ status: 403 }
mockAxiosPost.mockImplementationOnce(async () => {
throw forbiddenError
})
const result = await fetchUltrareviewPreflight({ repo: 'owner/repo' })
expect(result).toBeNull()
})
test('returns null on 5xx server error', async () => {
const serverError = new Error('Internal Server Error')
;(
serverError as unknown as {
isAxiosError: boolean
response: { status: number }
}
).isAxiosError = true
;(serverError as unknown as { response: { status: number } }).response = {
status: 500,
}
mockAxiosPost.mockImplementationOnce(async () => {
throw serverError
})
const result = await fetchUltrareviewPreflight({ repo: 'owner/repo' })
expect(result).toBeNull()
})
test('passes pr_number to request body when provided', async () => {
mockAxiosPost.mockImplementationOnce(
async (_url: unknown, body: unknown) => {
const b = body as { pr_number: number }
expect(b.pr_number).toBe(42)
return { status: 200, data: { action: 'proceed', billing_note: null } }
},
)
const result = await fetchUltrareviewPreflight({
repo: 'owner/repo',
pr_number: 42,
})
expect(result?.action).toBe('proceed')
})
test('passes confirm flag to request body when provided', async () => {
mockAxiosPost.mockImplementationOnce(
async (_url: unknown, body: unknown) => {
const b = body as { confirm: boolean }
expect(b.confirm).toBe(true)
return { status: 200, data: { action: 'proceed', billing_note: null } }
},
)
const result = await fetchUltrareviewPreflight({
repo: 'owner/repo',
confirm: true,
})
expect(result?.action).toBe('proceed')
})
})

View File

@@ -93,7 +93,10 @@ import {
asSystemPrompt,
type SystemPrompt,
} from '../../utils/systemPromptType.js'
import { cloneDeep } from 'lodash-es'
import {
getBreakCacheMarkerPath,
getBreakCacheAlwaysPath,
} from '../../commands/break-cache/index.js'
import { tokenCountFromLastAPIResponse } from '../../utils/tokens.js'
import { getDynamicConfig_BLOCKS_ON_INIT } from '../analytics/growthbook.js'
import {
@@ -121,6 +124,7 @@ import {
getAfkModeHeaderLatched,
getCacheEditingHeaderLatched,
getFastModeHeaderLatched,
getLastApiCompletionTimestamp,
getPromptCache1hAllowlist,
getPromptCache1hEligible,
getSessionId,
@@ -250,6 +254,7 @@ import {
type NonNullableUsage,
} from './logging.js'
import {
CACHE_TTL_1HOUR_MS,
checkResponseForCacheBreak,
recordPromptState,
} from './promptCacheBreakDetection.js'
@@ -507,30 +512,10 @@ export function getAPIMetadata() {
}
}
const deviceId = getOrCreateUserID()
// Third-party API providers (DeepSeek, etc.) validate user_id against
// ^[a-zA-Z0-9_-]+$ which rejects JSON strings containing {, ", :, etc.
// When using a non-Anthropic base URL, send only the device_id (hex string).
const baseUrl = process.env.ANTHROPIC_BASE_URL
const isThirdParty =
baseUrl &&
(() => {
try {
return new URL(baseUrl).host !== 'api.anthropic.com'
} catch {
return false
}
})()
if (isThirdParty) {
return { user_id: deviceId }
}
return {
user_id: jsonStringify({
...extra,
device_id: deviceId,
device_id: getOrCreateUserID(),
// Only include OAuth account UUID when actively using OAuth authentication
account_uuid: getOauthAccountInfo()?.accountUuid ?? '',
session_id: getSessionId(),
@@ -1441,12 +1426,39 @@ async function* queryModel(
].filter(Boolean),
)
// ── Break-cache integration ──
// If a one-time break-cache marker exists, or always-mode is on, append a
// unique ephemeral nonce comment to the system prompt so the prefix-cache
// hash changes for this request, forcing a cache miss.
{
const { existsSync, unlinkSync } = await import('node:fs')
const { randomUUID } = await import('node:crypto')
const onceMarker = getBreakCacheMarkerPath()
const alwaysFlag = getBreakCacheAlwaysPath()
const shouldBreak = existsSync(onceMarker) || existsSync(alwaysFlag)
if (shouldBreak) {
const nonce = randomUUID()
systemPrompt = asSystemPrompt([
...systemPrompt,
`<!-- cache-break nonce: ${nonce} -->`,
])
// Only delete the once marker; the always flag persists until /break-cache off
if (existsSync(onceMarker)) {
try {
unlinkSync(onceMarker)
} catch {
/* best-effort */
}
}
}
}
// Prepend system prompt block for easy API identification
logAPIPrefix(systemPrompt)
const enablePromptCaching =
options.enablePromptCaching ?? getPromptCachingEnabled(options.model)
let system = buildSystemPromptBlocks(systemPrompt, enablePromptCaching, {
const system = buildSystemPromptBlocks(systemPrompt, enablePromptCaching, {
skipGlobalCacheForSystemPrompt: needsToolBasedCacheMarker,
querySource: options.querySource,
})
@@ -1466,7 +1478,7 @@ async function* queryModel(
model: advisorModel,
} as unknown as BetaToolUnion)
}
let allTools = [...toolSchemas, ...extraToolSchemas]
const allTools = [...toolSchemas, ...extraToolSchemas]
const isFastMode =
isFastModeEnabled() &&
@@ -1590,39 +1602,6 @@ async function* queryModel(
const consumedCacheEdits = cachedMCEnabled ? consumePendingCacheEdits() : null
const consumedPinnedEdits = cachedMCEnabled ? getPinnedCacheEdits() : []
// ---------------------------------------------------------------------------
// Serialization boundary: deep-clone heavy data so the closure below captures
// independent copies, not references to the originals. After this point the
// original variables (messagesForAPI, system, allTools) are nulled out so
// they can be GC'd even while the generator/closure is still alive (during
// long streaming responses or retry backoff).
// ---------------------------------------------------------------------------
const frozenMessages = addCacheBreakpoints(
messagesForAPI,
enablePromptCaching,
options.querySource,
cachedMCEnabled &&
getAPIProvider() === 'firstParty' &&
options.querySource === 'repl_main_thread',
consumedCacheEdits as any,
consumedPinnedEdits as any,
options.skipCacheWrite,
)
const frozenSystem = cloneDeep(system)
const frozenTools = cloneDeep(allTools)
// Pre-compute scalars that post-streaming code needs, so messagesForAPI
// can be released before streaming starts.
const preMessagesCount = messagesForAPI.length
const preMessagesTokenCount = tokenCountFromLastAPIResponse(messagesForAPI)
// Release originals for GC — the frozen* copies and pre-computed scalars
// are now the only references to this data inside the closure.
// After null-out, all downstream code uses frozen* or pre-computed scalars.
messagesForAPI = null!
system = null!
allTools = null!
// Capture the betas sent in the last API request, including the ones that
// were dynamically added, so we can log and send it to telemetry.
let lastRequestBetas: string[] | undefined
@@ -1725,6 +1704,9 @@ async function* queryModel(
clearAllThinking: false,
})
const enablePromptCaching =
options.enablePromptCaching ?? getPromptCachingEnabled(retryContext.model)
// Fast mode: header is latched session-stable (cache-safe), but
// `speed='fast'` stays dynamic so cooldown still suppresses the actual
// fast-mode request without changing the cache key.
@@ -1755,10 +1737,13 @@ async function* queryModel(
}
}
// Cache editing beta: header is latched session-stable.
// The useCachedMC gate (cache_edits body behavior) is baked into
// frozenMessages at the serialization boundary above, so this block
// only controls the beta header.
// Cache editing beta: header is latched session-stable; useCachedMC
// (controls cache_edits body behavior) stays live so edits stop when
// the feature disables but the header doesn't flip.
const useCachedMC =
cachedMCEnabled &&
getAPIProvider() === 'firstParty' &&
options.querySource === 'repl_main_thread'
if (
cacheEditingHeaderLatched &&
cacheEditingBetaHeader &&
@@ -1787,9 +1772,17 @@ async function* queryModel(
return {
model: normalizeModelStringForAPI(options.model),
messages: frozenMessages,
system: frozenSystem,
tools: frozenTools,
messages: addCacheBreakpoints(
messagesForAPI,
enablePromptCaching,
options.querySource,
useCachedMC,
consumedCacheEdits as any,
consumedPinnedEdits as any,
options.skipCacheWrite,
),
system,
tools: allTools,
tool_choice: options.toolChoice,
...(useBetas && { betas: filteredBetas }),
metadata: getAPIMetadata(),
@@ -1849,9 +1842,6 @@ async function* queryModel(
let ttftMs = 0
let partialMessage: BetaMessage | undefined
const contentBlocks: (BetaContentBlock | ConnectorTextBlock)[] = []
// Accumulate streaming deltas in arrays to avoid O(n²) string concatenation.
// Joined and assigned to contentBlock fields at content_block_stop.
const streamingDeltas = new Map<number, string[]>()
let usage: NonNullableUsage = EMPTY_USAGE
let costUSD = 0
let stopReason: BetaStopReason | null = null
@@ -2138,8 +2128,6 @@ async function* queryModel(
}
break
}
// Initialize delta accumulator for this content block
streamingDeltas.set(part.index, [])
break
case 'content_block_delta': {
const contentBlock = contentBlocks[part.index]
@@ -2169,9 +2157,8 @@ async function* queryModel(
})
throw new Error('Content block is not a connector_text block')
}
streamingDeltas
.get(part.index)
?.push(delta.connector_text as string)
;(contentBlock as { connector_text: string }).connector_text +=
delta.connector_text
} else {
switch (delta.type) {
case 'citations_delta':
@@ -2201,9 +2188,7 @@ async function* queryModel(
})
throw new Error('Content block input is not a string')
}
streamingDeltas
.get(part.index)
?.push(delta.partial_json as string)
contentBlock.input += delta.partial_json
break
case 'text_delta':
if (contentBlock.type !== 'text') {
@@ -2217,7 +2202,7 @@ async function* queryModel(
})
throw new Error('Content block is not a text block')
}
streamingDeltas.get(part.index)?.push(delta.text!)
;(contentBlock as { text: string }).text += delta.text
break
case 'signature_delta':
if (
@@ -2252,7 +2237,8 @@ async function* queryModel(
})
throw new Error('Content block is not a thinking block')
}
streamingDeltas.get(part.index)?.push(delta.thinking!)
;(contentBlock as { thinking: string }).thinking +=
delta.thinking
break
}
}
@@ -2284,32 +2270,6 @@ async function* queryModel(
})
throw new Error('Message not found')
}
// Join accumulated streaming deltas into the contentBlock fields
// to avoid O(n²) string concatenation during streaming.
const deltas = streamingDeltas.get(part.index)
if (deltas && deltas.length > 0) {
const joined = deltas.join('')
switch (contentBlock.type) {
case 'text':
;(contentBlock as { text: string }).text = joined
break
case 'thinking':
;(contentBlock as { thinking: string }).thinking = joined
break
case 'tool_use':
case 'server_tool_use':
contentBlock.input = joined
break
default:
if ((contentBlock.type as string) === 'connector_text') {
;(
contentBlock as { connector_text: string }
).connector_text = joined
}
break
}
streamingDeltas.delete(part.index)
}
const m: AssistantMessage = {
message: {
...partialMessage,
@@ -2864,8 +2824,8 @@ async function* queryModel(
logAPIError({
error,
model: errorModel,
messageCount: preMessagesCount,
messageTokens: preMessagesTokenCount,
messageCount: messagesForAPI.length,
messageTokens: tokenCountFromLastAPIResponse(messagesForAPI),
durationMs: Date.now() - start,
durationMsIncludingRetries: Date.now() - startIncludingRetries,
attempt: attemptNumber,
@@ -2886,10 +2846,7 @@ async function* queryModel(
yield getAssistantMessageFromError(error, errorModel, {
messages,
messagesForAPI: frozenMessages as unknown as (
| UserMessage
| AssistantMessage
)[],
messagesForAPI,
})
releaseStreamResources()
return
@@ -2923,8 +2880,8 @@ async function* queryModel(
logAPIError({
error,
model: errorModel,
messageCount: preMessagesCount,
messageTokens: preMessagesTokenCount,
messageCount: messagesForAPI.length,
messageTokens: tokenCountFromLastAPIResponse(messagesForAPI),
durationMs: Date.now() - start,
durationMsIncludingRetries: Date.now() - startIncludingRetries,
attempt: attemptNumber,
@@ -2947,10 +2904,7 @@ async function* queryModel(
yield getAssistantMessageFromError(error, errorModel, {
messages,
messagesForAPI: frozenMessages as unknown as (
| UserMessage
| AssistantMessage
)[],
messagesForAPI,
})
releaseStreamResources()
return
@@ -3006,19 +2960,14 @@ async function* queryModel(
// Precompute scalars so the fire-and-forget .then() closure doesn't pin the
// full messagesForAPI array (the entire conversation up to the context window
// limit) until getToolPermissionContext() resolves.
// Note: messagesForAPI was nulled above (serialization boundary), so we use
// the pre-computed scalars captured before the null-out.
const logMessageCount = preMessagesCount
const logMessageTokens = preMessagesTokenCount
const logMessageCount = messagesForAPI.length
const logMessageTokens = tokenCountFromLastAPIResponse(messagesForAPI)
// Record LLM observation in Langfuse (no-op if not configured)
recordLLMObservation(options.langfuseTrace ?? null, {
model: resolvedModel,
provider: getAPIProvider(),
input: convertMessagesToLangfuse(
frozenMessages as Parameters<typeof convertMessagesToLangfuse>[0],
systemPrompt,
),
input: convertMessagesToLangfuse(messagesForAPI, systemPrompt),
output: convertOutputToLangfuse(newMessages),
usage: {
input_tokens: usage.input_tokens,

View File

@@ -0,0 +1,81 @@
import axios from 'axios'
import z from 'zod/v4'
import { getOauthConfig } from '../../constants/oauth.js'
import { logForDebugging } from '../../utils/debug.js'
import { getOAuthHeaders, prepareApiRequest } from '../../utils/teleport/api.js'
/**
* Zod schema for the /v1/ultrareview/preflight response.
* Based on binary-extracted schema: vq.object({action: vq.enum([...]), billing_note: ...})
*/
const UltrareviewPreflightSchema = z.object({
action: z.enum(['proceed', 'confirm', 'blocked']),
billing_note: z.string().nullable().optional(),
})
export type UltrareviewPreflightResponse = z.infer<
typeof UltrareviewPreflightSchema
>
export type UltrareviewPreflightArgs = {
repo: string
pr_number?: number
pr_url?: string
confirm?: boolean
}
/**
* POST /v1/ultrareview/preflight — server-side gate before launch.
*
* Returns the preflight result (proceed / confirm / blocked) or null on any
* failure (network error, auth error, schema mismatch). Callers must treat
* null as "fallback to direct launch" to preserve existing behavior.
*
* The `confirm` flag should be set to true when the user has already
* acknowledged the billing dialog (or passed --confirm on the CLI), which
* skips the server-side confirm prompt and gets a direct proceed/blocked.
*/
export async function fetchUltrareviewPreflight(
args: UltrareviewPreflightArgs,
): Promise<UltrareviewPreflightResponse | null> {
try {
const { accessToken, orgUUID } = await prepareApiRequest()
const body: Record<string, unknown> = {
repo: args.repo,
}
if (args.pr_number !== undefined) {
body.pr_number = args.pr_number
}
if (args.pr_url !== undefined) {
body.pr_url = args.pr_url
}
if (args.confirm !== undefined) {
body.confirm = args.confirm
}
const response = await axios.post(
`${getOauthConfig().BASE_API_URL}/v1/ultrareview/preflight`,
body,
{
headers: {
...getOAuthHeaders(accessToken),
'x-organization-uuid': orgUUID,
},
timeout: 10000,
},
)
const parsed = UltrareviewPreflightSchema.safeParse(response.data)
if (!parsed.success) {
logForDebugging(
`fetchUltrareviewPreflight: schema mismatch — ${parsed.error.message}`,
)
return null
}
return parsed.data
} catch (error) {
logForDebugging(`fetchUltrareviewPreflight failed: ${error}`)
return null
}
}

View File

@@ -170,6 +170,21 @@ describe('Langfuse integration', () => {
const result = sanitizeToolOutput('MCPTool', 'mcp data')
expect(result).toBe('[MCPTool output redacted, 8 chars]')
})
test('redacts VaultHttpFetch output (vault tool, PR-2)', async () => {
const { sanitizeToolOutput } = await import('../sanitize.js')
const result = sanitizeToolOutput(
'VaultHttpFetch',
'sk-secret-bearer-token',
)
expect(result).toBe('[VaultHttpFetch output redacted, 22 chars]')
})
test('redacts LocalVaultFetch output (vault tool, future PR-3)', async () => {
const { sanitizeToolOutput } = await import('../sanitize.js')
const result = sanitizeToolOutput('LocalVaultFetch', 'plaintext-secret')
expect(result).toBe('[LocalVaultFetch output redacted, 16 chars]')
})
})
describe('sanitizeGlobal', () => {

View File

@@ -7,7 +7,16 @@ const REDACTED_FILE_TOOLS = new Set([
'FileEditTool',
])
const REDACTED_SHELL_TOOLS = new Set(['BashTool', 'PowerShellTool'])
const SENSITIVE_OUTPUT_TOOLS = new Set(['ConfigTool', 'MCPTool'])
// Vault-class tools and tools that intentionally surface user secrets must
// have their tool_result redacted in Langfuse traces. PR-2 ships VaultHttpFetch;
// LocalVaultFetch is reserved for a future PR. Adding both here proactively
// keeps Langfuse export safe even before the tools land.
const SENSITIVE_OUTPUT_TOOLS = new Set([
'ConfigTool',
'MCPTool',
'VaultHttpFetch',
'LocalVaultFetch',
])
function escapeRegExp(value: string): string {
return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')

View File

@@ -0,0 +1,133 @@
import { describe, test, expect, beforeEach, afterEach, mock } from 'bun:test'
import { mkdtempSync, writeFileSync, rmSync } from 'fs'
import { join } from 'path'
import { tmpdir } from 'os'
import { logMock } from '../../../../tests/mocks/log.js'
// Must mock log before any import that transitively loads log.ts
mock.module('src/utils/log.ts', logMock)
// bun:bundle must be mocked before imports that use feature()
mock.module('bun:bundle', () => ({ feature: () => false }))
// settings.js must be mocked to cut bootstrap chain
mock.module('src/utils/settings/settings.js', () => ({
getSettings_DEPRECATED: () => ({}),
updateSettingsForSource: () => {},
}))
let tmpDir: string
beforeEach(() => {
tmpDir = mkdtempSync(join(tmpdir(), 'provider-loader-test-'))
process.env['CLAUDE_CONFIG_DIR'] = tmpDir
})
afterEach(async () => {
delete process.env['CLAUDE_CONFIG_DIR']
rmSync(tmpDir, { recursive: true, force: true })
// J1 fix: invalidate the per-process cache between tests so each test starts fresh
const { _invalidateProviderCache } = await import('../loader.js')
_invalidateProviderCache()
})
describe('loadProviders', () => {
test('returns 4 default providers when providers.json does not exist', async () => {
const { loadProviders } = await import('../loader.js')
const providers = loadProviders()
expect(providers).toHaveLength(4)
expect(providers.map(p => p.id)).toEqual([
'cerebras',
'groq',
'qwen',
'deepseek',
])
})
test('returns defaults when providers.json is empty', async () => {
writeFileSync(join(tmpDir, 'providers.json'), '')
const { loadProviders } = await import('../loader.js')
const providers = loadProviders()
expect(providers).toHaveLength(4)
})
test('returns defaults when providers.json is empty array', async () => {
writeFileSync(join(tmpDir, 'providers.json'), '[]')
const { loadProviders } = await import('../loader.js')
const providers = loadProviders()
expect(providers).toHaveLength(4)
})
test('returns defaults when providers.json is corrupt JSON', async () => {
writeFileSync(join(tmpDir, 'providers.json'), '{not valid json')
const { loadProviders } = await import('../loader.js')
const providers = loadProviders()
expect(providers).toHaveLength(4)
})
test('returns defaults when providers.json fails schema validation', async () => {
writeFileSync(
join(tmpDir, 'providers.json'),
JSON.stringify([{ id: 123, kind: 'bad-kind', baseUrl: 'not-a-url' }]),
)
const { loadProviders } = await import('../loader.js')
const providers = loadProviders()
expect(providers).toHaveLength(4)
})
test('merges valid user providers on top of defaults', async () => {
const customProvider = {
id: 'myendpoint',
kind: 'openai-compat',
baseUrl: 'https://my.api.com/v1',
apiKeyEnv: 'MY_API_KEY',
defaultModel: 'my-model',
compatRule: 'permissive',
}
writeFileSync(
join(tmpDir, 'providers.json'),
JSON.stringify([customProvider]),
)
const { loadProviders } = await import('../loader.js')
const providers = loadProviders()
// 4 defaults + 1 custom = 5
expect(providers).toHaveLength(5)
expect(providers.find(p => p.id === 'myendpoint')).toMatchObject({
baseUrl: 'https://my.api.com/v1',
})
})
test('user provider with same id as default replaces the default', async () => {
const overrideCerebras = {
id: 'cerebras',
kind: 'openai-compat',
baseUrl: 'https://custom-cerebras.example.com/v1',
apiKeyEnv: 'CEREBRAS_API_KEY',
defaultModel: 'llama-3.3-70b',
compatRule: 'cerebras',
}
writeFileSync(
join(tmpDir, 'providers.json'),
JSON.stringify([overrideCerebras]),
)
const { loadProviders } = await import('../loader.js')
const providers = loadProviders()
// Still 4 providers (cerebras replaced, not added)
expect(providers).toHaveLength(4)
const cerebras = providers.find(p => p.id === 'cerebras')
expect(cerebras?.baseUrl).toBe('https://custom-cerebras.example.com/v1')
})
test('findProvider returns undefined for unknown id', async () => {
const { findProvider, DEFAULT_PROVIDERS } = await import('../loader.js')
const result = findProvider('nonexistent', DEFAULT_PROVIDERS)
expect(result).toBeUndefined()
})
test('findProvider returns correct provider for known id', async () => {
const { findProvider, DEFAULT_PROVIDERS } = await import('../loader.js')
const deepseek = findProvider('deepseek', DEFAULT_PROVIDERS)
expect(deepseek?.baseUrl).toBe('https://api.deepseek.com/v1')
expect(deepseek?.compatRule).toBe('deepseek')
})
})

View File

@@ -0,0 +1,204 @@
import { describe, test, expect } from 'bun:test'
import {
COMPAT_PROFILES,
applyCompatRule,
getDeepSeekReasoningMode,
} from '../providerCompatMatrix.js'
describe('COMPAT_PROFILES', () => {
test('cerebras does not support stream_options', () => {
expect(COMPAT_PROFILES['cerebras'].supportsStreamUsageOption).toBe(false)
})
test('cerebras does not support thinking field', () => {
expect(COMPAT_PROFILES['cerebras'].supportsThinkingField).toBe(false)
})
test('groq strips reasoning_content', () => {
expect(COMPAT_PROFILES['groq'].reasoningContentEcho).toBe('strip')
})
test('deepseek preserves reasoning_content', () => {
expect(COMPAT_PROFILES['deepseek'].reasoningContentEcho).toBe(
'always-preserve',
)
})
test('deepseek supports thinking field', () => {
expect(COMPAT_PROFILES['deepseek'].supportsThinkingField).toBe(true)
})
test('strict-openai strips stream_options', () => {
expect(COMPAT_PROFILES['strict-openai'].supportsStreamUsageOption).toBe(
false,
)
})
test('permissive allows all fields', () => {
expect(COMPAT_PROFILES['permissive'].supportsStreamUsageOption).toBe(true)
expect(COMPAT_PROFILES['permissive'].supportsThinkingField).toBe(true)
})
})
describe('applyCompatRule - stream_options stripping', () => {
test('strips stream_options.include_usage for cerebras', () => {
const body = {
model: 'llama-3.3-70b',
messages: [],
stream: true,
stream_options: { include_usage: true },
}
const result = applyCompatRule(body, 'cerebras')
expect(result['stream_options']).toBeUndefined()
})
test('strips stream_options for strict-openai', () => {
const body = {
messages: [],
stream_options: { include_usage: true },
}
const result = applyCompatRule(body, 'strict-openai')
expect(result['stream_options']).toBeUndefined()
})
test('preserves stream_options for deepseek', () => {
const body = {
messages: [],
stream_options: { include_usage: true },
}
const result = applyCompatRule(body, 'deepseek')
expect(result['stream_options']).toEqual({ include_usage: true })
})
test('preserves stream_options for permissive', () => {
const body = {
messages: [],
stream_options: { include_usage: true, other_field: 'x' },
}
const result = applyCompatRule(body, 'permissive')
expect(result['stream_options']).toEqual({
include_usage: true,
other_field: 'x',
})
})
test('does not mutate input body', () => {
const body = {
messages: [],
stream_options: { include_usage: true },
}
applyCompatRule(body, 'groq')
// Input must be unchanged
expect(body['stream_options']).toEqual({ include_usage: true })
})
})
describe('applyCompatRule - thinking field stripping', () => {
test('strips thinking field from messages for cerebras', () => {
const body = {
messages: [{ role: 'user', content: 'hi', thinking: { budget: 1000 } }],
}
const result = applyCompatRule(body, 'cerebras')
const msgs = result['messages'] as Record<string, unknown>[]
expect(msgs[0]!['thinking']).toBeUndefined()
expect(msgs[0]!['content']).toBe('hi')
})
test('preserves thinking field for deepseek', () => {
const body = {
messages: [{ role: 'user', content: 'hi', thinking: { budget: 1000 } }],
}
const result = applyCompatRule(body, 'deepseek')
const msgs = result['messages'] as Record<string, unknown>[]
expect(msgs[0]!['thinking']).toEqual({ budget: 1000 })
})
})
describe('applyCompatRule - DeepSeek reasoning_content three modes', () => {
test('thinking-only mode: strips reasoning_content for strict-openai (non-deepseek)', () => {
const body = {
messages: [
{ role: 'assistant', content: 'answer', reasoning_content: 'thoughts' },
],
}
const result = applyCompatRule(body, 'strict-openai')
const msgs = result['messages'] as Record<string, unknown>[]
expect(msgs[0]!['reasoning_content']).toBeUndefined()
})
test('thinking-only mode: preserves reasoning_content for deepseek', () => {
const body = {
messages: [
{ role: 'assistant', content: 'answer', reasoning_content: 'thoughts' },
],
}
const result = applyCompatRule(body, 'deepseek')
const msgs = result['messages'] as Record<string, unknown>[]
expect(msgs[0]!['reasoning_content']).toBe('thoughts')
})
test('thinking+tools mode: preserves reasoning_content for deepseek', () => {
const body = {
messages: [
{
role: 'assistant',
content: null,
reasoning_content: 'deep thoughts',
tool_calls: [{ id: 'call_1', function: { name: 'search' } }],
},
],
}
const result = applyCompatRule(body, 'deepseek')
const msgs = result['messages'] as Record<string, unknown>[]
expect(msgs[0]!['reasoning_content']).toBe('deep thoughts')
})
test('permissive with non-thinking model strips reasoning_content', () => {
const body = {
model: 'gpt-4o',
messages: [
{ role: 'assistant', content: 'hi', reasoning_content: 'unused' },
],
}
const result = applyCompatRule(body, 'permissive')
const msgs = result['messages'] as Record<string, unknown>[]
expect(msgs[0]!['reasoning_content']).toBeUndefined()
})
test('permissive with thinking model preserves reasoning_content', () => {
const body = {
model: 'deepseek-reasoner',
messages: [
{ role: 'assistant', content: 'hi', reasoning_content: 'thoughts' },
],
}
const result = applyCompatRule(body, 'permissive')
const msgs = result['messages'] as Record<string, unknown>[]
expect(msgs[0]!['reasoning_content']).toBe('thoughts')
})
})
describe('getDeepSeekReasoningMode', () => {
test('thinking-only: has reasoning_content, no tool_calls', () => {
const msg = { reasoning_content: 'thoughts', content: 'answer' }
expect(getDeepSeekReasoningMode(msg)).toBe('thinking-only')
})
test('thinking+tools: has both reasoning_content and tool_calls', () => {
const msg = {
reasoning_content: 'deep thoughts',
tool_calls: [{ id: 'call_1' }],
}
expect(getDeepSeekReasoningMode(msg)).toBe('thinking+tools')
})
test('normal: no reasoning_content', () => {
const msg = { content: 'plain answer' }
expect(getDeepSeekReasoningMode(msg)).toBe('normal')
})
test('normal: empty tool_calls array with no reasoning_content', () => {
const msg = { content: 'plain', tool_calls: [] }
expect(getDeepSeekReasoningMode(msg)).toBe('normal')
})
})

View File

@@ -0,0 +1,129 @@
import { describe, test, expect, beforeEach, afterEach, mock } from 'bun:test'
import { logMock } from '../../../../tests/mocks/log.js'
mock.module('src/utils/log.ts', logMock)
mock.module('bun:bundle', () => ({ feature: () => false }))
mock.module('src/utils/settings/settings.js', () => ({
getSettings_DEPRECATED: () => ({}),
updateSettingsForSource: () => {},
}))
beforeEach(() => {
// Clean OpenAI env vars before each test
delete process.env['CLAUDE_CODE_USE_OPENAI']
delete process.env['OPENAI_API_KEY']
delete process.env['OPENAI_BASE_URL']
delete process.env['ANTHROPIC_API_KEY']
delete process.env['CEREBRAS_API_KEY']
delete process.env['GROQ_API_KEY']
delete process.env['DASHSCOPE_API_KEY']
delete process.env['DEEPSEEK_API_KEY']
})
afterEach(() => {
delete process.env['CLAUDE_CODE_USE_OPENAI']
delete process.env['OPENAI_API_KEY']
delete process.env['OPENAI_BASE_URL']
delete process.env['ANTHROPIC_API_KEY']
})
describe('switchProvider', () => {
test('switching to cerebras returns correct env vars', async () => {
const { switchProvider } = await import('../switcher.js')
const { DEFAULT_PROVIDERS } = await import('../loader.js')
const result = switchProvider('cerebras', DEFAULT_PROVIDERS)
expect(result.env['CLAUDE_CODE_USE_OPENAI']).toBe('1')
expect(result.env['OPENAI_BASE_URL']).toBe('https://api.cerebras.ai/v1')
expect(result.env['OPENAI_MODEL']).toBe('llama-3.3-70b')
expect(result.provider.id).toBe('cerebras')
})
test('switching to groq returns correct env vars', async () => {
const { switchProvider } = await import('../switcher.js')
const { DEFAULT_PROVIDERS } = await import('../loader.js')
const result = switchProvider('groq', DEFAULT_PROVIDERS)
expect(result.env['OPENAI_BASE_URL']).toBe('https://api.groq.com/openai/v1')
expect(result.env['OPENAI_MODEL']).toBe('llama-3.3-70b-versatile')
})
test('switching to qwen returns correct env vars', async () => {
const { switchProvider } = await import('../switcher.js')
const { DEFAULT_PROVIDERS } = await import('../loader.js')
const result = switchProvider('qwen', DEFAULT_PROVIDERS)
expect(result.env['OPENAI_BASE_URL']).toBe(
'https://dashscope.aliyuncs.com/compatible-mode/v1',
)
expect(result.env['OPENAI_MODEL']).toBe('qwen-max')
})
test('switching to deepseek returns correct env vars', async () => {
const { switchProvider } = await import('../switcher.js')
const { DEFAULT_PROVIDERS } = await import('../loader.js')
const result = switchProvider('deepseek', DEFAULT_PROVIDERS)
expect(result.env['OPENAI_BASE_URL']).toBe('https://api.deepseek.com/v1')
expect(result.env['OPENAI_MODEL']).toBe('deepseek-chat')
})
test('throws for non-existent provider id', async () => {
const { switchProvider } = await import('../switcher.js')
const { DEFAULT_PROVIDERS } = await import('../loader.js')
expect(() => switchProvider('nonexistent', DEFAULT_PROVIDERS)).toThrow(
'provider "nonexistent" not found',
)
})
test('warns when provider API key env var is not set', async () => {
const { switchProvider } = await import('../switcher.js')
const { DEFAULT_PROVIDERS } = await import('../loader.js')
const result = switchProvider('cerebras', DEFAULT_PROVIDERS)
expect(result.warnings.length).toBeGreaterThan(0)
expect(result.warnings[0]).toContain('CEREBRAS_API_KEY')
})
test('no warning when provider API key env var is set', async () => {
process.env['GROQ_API_KEY'] = 'test-key'
const { switchProvider } = await import('../switcher.js')
const { DEFAULT_PROVIDERS } = await import('../loader.js')
const result = switchProvider('groq', DEFAULT_PROVIDERS)
expect(result.warnings).toHaveLength(0)
delete process.env['GROQ_API_KEY']
})
test('does not mutate process.env', async () => {
const { switchProvider } = await import('../switcher.js')
const { DEFAULT_PROVIDERS } = await import('../loader.js')
const before = process.env['OPENAI_BASE_URL']
switchProvider('cerebras', DEFAULT_PROVIDERS)
expect(process.env['OPENAI_BASE_URL']).toBe(before)
})
})
describe('buildShellExportBlock', () => {
test('produces correct shell export lines for cerebras', async () => {
const { switchProvider, buildShellExportBlock } = await import(
'../switcher.js'
)
const { DEFAULT_PROVIDERS } = await import('../loader.js')
const result = switchProvider('cerebras', DEFAULT_PROVIDERS)
const block = buildShellExportBlock(result)
expect(block).toContain('export CLAUDE_CODE_USE_OPENAI=1')
expect(block).toContain('export OPENAI_BASE_URL=https://api.cerebras.ai/v1')
expect(block).toContain('export OPENAI_API_KEY=$CEREBRAS_API_KEY')
expect(block).toContain('export OPENAI_MODEL=llama-3.3-70b')
})
test('api key line uses variable reference not literal value', async () => {
process.env['DEEPSEEK_API_KEY'] = 'sk-secret-key'
const { switchProvider, buildShellExportBlock } = await import(
'../switcher.js'
)
const { DEFAULT_PROVIDERS } = await import('../loader.js')
const result = switchProvider('deepseek', DEFAULT_PROVIDERS)
const block = buildShellExportBlock(result)
// Must NOT contain the literal key value
expect(block).not.toContain('sk-secret-key')
// Must use variable reference
expect(block).toContain('$DEEPSEEK_API_KEY')
delete process.env['DEEPSEEK_API_KEY']
})
})

View File

@@ -0,0 +1,246 @@
import { existsSync, readFileSync, renameSync, writeFileSync } from 'fs'
import { join } from 'path'
import { randomBytes } from 'node:crypto'
import { tmpdir } from 'node:os'
import { logError } from '../../utils/log.js'
import { getClaudeConfigHomeDir } from '../../utils/envUtils.js'
import { ProvidersFileSchema, type ProviderConfig } from './types.js'
/**
* The four built-in OpenAI-compat providers.
*
* These are used when providers.json is absent or contains no entries.
* User-defined providers in ~/.claude/providers.json are merged on top
* (they replace a built-in with the same id).
*/
export const DEFAULT_PROVIDERS: ProviderConfig[] = [
{
id: 'cerebras',
kind: 'openai-compat',
baseUrl: 'https://api.cerebras.ai/v1',
apiKeyEnv: 'CEREBRAS_API_KEY',
defaultModel: 'llama-3.3-70b',
compatRule: 'cerebras',
},
{
id: 'groq',
kind: 'openai-compat',
baseUrl: 'https://api.groq.com/openai/v1',
apiKeyEnv: 'GROQ_API_KEY',
defaultModel: 'llama-3.3-70b-versatile',
compatRule: 'groq',
},
{
id: 'qwen',
kind: 'openai-compat',
baseUrl: 'https://dashscope.aliyuncs.com/compatible-mode/v1',
apiKeyEnv: 'DASHSCOPE_API_KEY',
defaultModel: 'qwen-max',
compatRule: 'strict-openai',
},
{
id: 'deepseek',
kind: 'openai-compat',
baseUrl: 'https://api.deepseek.com/v1',
apiKeyEnv: 'DEEPSEEK_API_KEY',
defaultModel: 'deepseek-chat',
compatRule: 'deepseek',
},
]
/**
* Returns the path to the providers.json file in the Claude config directory.
*/
export function getProvidersFilePath(): string {
return join(getClaudeConfigHomeDir(), 'providers.json')
}
// ── J1: per-process memoization with stale-on-invalidate ─────────────────────
let _cachedProviders: ProviderConfig[] | null = null
/** Invalidate the in-process provider cache (called after saveProviders). */
export function _invalidateProviderCache(): void {
_cachedProviders = null
}
/**
* Load provider configurations.
*
* Strategy:
* 1. Start with DEFAULT_PROVIDERS.
* 2. If ~/.claude/providers.json exists, parse and validate it with Zod.
* - Valid entries replace defaults with matching id; new ids are appended.
* - Corrupt/invalid file: log warning, return defaults only.
* 3. Empty providers.json: return defaults.
*
* A1 fix: returns load diagnostics so callers (ProviderView) can surface errors.
* J1 fix: memoized per-process; invalidated after saveProviders().
*
* This function never throws — corrupt files produce a warning + fallback.
*/
export function loadProviders(): ProviderConfig[] {
// J1: return cached result if available (prevents repeated disk reads on findProvider)
if (_cachedProviders !== null) return _cachedProviders
const result = _loadProvidersInternal()
_cachedProviders = result.providers
return result.providers
}
/**
* Load providers with diagnostic information.
* Returns { providers, error? } — callers can surface the error to the UI.
* A1 fix: exposes parse errors to UI layer instead of only logError.
*/
export function loadProvidersWithDiagnostic(): {
providers: ProviderConfig[]
error?: string
} {
const result = _loadProvidersInternal()
_cachedProviders = result.providers
return result
}
function _loadProvidersInternal(): {
providers: ProviderConfig[]
error?: string
} {
const filePath = getProvidersFilePath()
if (!existsSync(filePath)) {
return { providers: [...DEFAULT_PROVIDERS] }
}
let raw: string
try {
raw = readFileSync(filePath, 'utf-8')
} catch (err: unknown) {
const msg = `loadProviders: failed to read ${filePath}: ${err instanceof Error ? err.message : String(err)}`
logError(new Error(msg))
return { providers: [...DEFAULT_PROVIDERS], error: msg }
}
// Empty file → return defaults
if (!raw.trim()) {
return { providers: [...DEFAULT_PROVIDERS] }
}
let parsed: unknown
try {
parsed = JSON.parse(raw)
} catch {
const msg = `loadProviders: ${filePath} is not valid JSON. Using default providers.`
logError(new Error(msg))
return { providers: [...DEFAULT_PROVIDERS], error: msg }
}
const result = ProvidersFileSchema.safeParse(parsed)
if (!result.success) {
const msg = `loadProviders: ${filePath} failed schema validation: ${result.error.message}. Using default providers.`
logError(new Error(msg))
return { providers: [...DEFAULT_PROVIDERS], error: msg }
}
if (result.data.length === 0) {
return { providers: [...DEFAULT_PROVIDERS] }
}
// Merge: user entries override defaults with same id; new ids are appended.
const merged = new Map<string, ProviderConfig>()
for (const p of DEFAULT_PROVIDERS) {
merged.set(p.id, p)
}
for (const p of result.data) {
merged.set(p.id, p)
}
return { providers: Array.from(merged.values()) }
}
/**
* Find a provider by id in the loaded list. Returns undefined if not found.
*/
export function findProvider(
id: string,
providers?: ProviderConfig[],
): ProviderConfig | undefined {
return (providers ?? loadProviders()).find(p => p.id === id)
}
/**
* Deep-equal comparison for ProviderConfig objects, key-order independent.
* E4 fix: replaces JSON.stringify comparison which is key-order sensitive.
*/
function providerConfigEqual(a: ProviderConfig, b: ProviderConfig): boolean {
const keysA = Object.keys(a).sort()
const keysB = Object.keys(b).sort()
if (keysA.length !== keysB.length) return false
for (const k of keysA) {
if (a[k as keyof ProviderConfig] !== b[k as keyof ProviderConfig])
return false
}
return true
}
/**
* Write additional providers to ~/.claude/providers.json.
*
* Only writes providers that are NOT already in DEFAULT_PROVIDERS (or the
* existing file). If a provider with the same id exists, it is replaced.
*
* C3 fix: uses atomic tmp+rename write.
* E4 fix: uses key-order-independent deep equal for default comparison.
* J1 fix: invalidates cache after write.
*
* Returns the final merged list that was written.
*/
export function saveProviders(providers: ProviderConfig[]): ProviderConfig[] {
const filePath = getProvidersFilePath()
// Build merged list (providers override defaults by id)
const merged = new Map<string, ProviderConfig>()
for (const p of DEFAULT_PROVIDERS) {
merged.set(p.id, p)
}
for (const p of providers) {
merged.set(p.id, p)
}
// Only persist non-default providers (defaults are always built in)
const toWrite: ProviderConfig[] = []
for (const [id, p] of merged) {
const isDefault = DEFAULT_PROVIDERS.some(d => d.id === id)
if (!isDefault) {
toWrite.push(p)
} else {
// E4: If user overrode a default, persist the override (key-order-independent compare)
const defaultEntry = DEFAULT_PROVIDERS.find(d => d.id === id)
if (defaultEntry && !providerConfigEqual(defaultEntry, p)) {
toWrite.push(p)
}
}
}
// C3: atomic write — tmp file + rename prevents lost-update on concurrent save
const tmpPath = join(
tmpdir(),
`.providers-${randomBytes(8).toString('hex')}.tmp`,
)
try {
writeFileSync(tmpPath, JSON.stringify(toWrite, null, 2), 'utf-8')
renameSync(tmpPath, filePath)
} catch (err) {
try {
renameSync(tmpPath, tmpPath + '.cleanup')
} catch {
/* ignore */
}
throw err
}
// J1: invalidate cache so next loadProviders() reads fresh data
_invalidateProviderCache()
return Array.from(merged.values())
}

View File

@@ -0,0 +1,179 @@
import type { CompatRule } from './types.js'
/**
* Per-provider OpenAI-compat field whitelist.
*
* Each profile describes what an endpoint actually accepts so we can strip
* fields that would cause a strict endpoint to reject the request.
*/
export interface CompatProfile {
/**
* Whether the server accepts stream_options.include_usage in chat completions.
* Strict endpoints (Cerebras, Qwen) reject unknown top-level keys.
*/
supportsStreamUsageOption: boolean
/**
* Whether the server accepts a custom 'thinking' field in messages.
* Only permissive or DeepSeek-thinking endpoints accept this.
*/
supportsThinkingField: boolean
/**
* How to handle reasoning_content in roundtrips.
*
* DeepSeek has three modes:
* - thinking-only: model returns reasoning_content, no tools
* - thinking+tools: model returns both reasoning_content and tool calls
* - normal: model returns neither
*
* 'always-preserve': echo back (DeepSeek thinking+tools roundtrip)
* 'drop-on-non-thinking': remove unless current model is thinking variant
* 'strip': remove always (safe default for strict endpoints)
*/
reasoningContentEcho: 'always-preserve' | 'drop-on-non-thinking' | 'strip'
/**
* Tool call schema flavor supported by the endpoint.
* 'openai-v2' = standard OpenAI function-calling schema
*/
toolCallFormat: 'openai-v2'
}
export const COMPAT_PROFILES: Record<CompatRule, CompatProfile> = {
cerebras: {
supportsStreamUsageOption: false,
supportsThinkingField: false,
reasoningContentEcho: 'strip',
toolCallFormat: 'openai-v2',
},
groq: {
supportsStreamUsageOption: false,
supportsThinkingField: false,
reasoningContentEcho: 'strip',
toolCallFormat: 'openai-v2',
},
deepseek: {
// DeepSeek-reasoner supports reasoning_content and the thinking field.
// For normal deepseek-chat, thinking field is ignored rather than rejected.
supportsStreamUsageOption: true,
supportsThinkingField: true,
reasoningContentEcho: 'always-preserve',
toolCallFormat: 'openai-v2',
},
'strict-openai': {
supportsStreamUsageOption: false,
supportsThinkingField: false,
reasoningContentEcho: 'strip',
toolCallFormat: 'openai-v2',
},
permissive: {
supportsStreamUsageOption: true,
supportsThinkingField: true,
reasoningContentEcho: 'drop-on-non-thinking',
toolCallFormat: 'openai-v2',
},
}
/**
* Determine the DeepSeek reasoning mode based on presence of reasoning_content
* and tool_calls in the assistant message.
*
* DeepSeek thinking-only: has reasoning_content, no tool_calls
* DeepSeek thinking+tools: has reasoning_content AND tool_calls
* DeepSeek normal: no reasoning_content
*/
export function getDeepSeekReasoningMode(
assistantMessage: Record<string, unknown>,
): 'thinking-only' | 'thinking+tools' | 'normal' {
const hasReasoning = Boolean(assistantMessage['reasoning_content'])
const toolCalls = assistantMessage['tool_calls']
const hasTools = Array.isArray(toolCalls) && toolCalls.length > 0
if (hasReasoning && hasTools) return 'thinking+tools'
if (hasReasoning) return 'thinking-only'
return 'normal'
}
/**
* Apply a compat rule to an outgoing request body, dropping fields the
* target endpoint won't accept. Returns a new object (immutable).
*
* This is a pure function: it does not mutate the input body.
*/
export function applyCompatRule(
body: Record<string, unknown>,
rule: CompatRule,
): Record<string, unknown> {
const profile = COMPAT_PROFILES[rule]
const result: Record<string, unknown> = { ...body }
// Strip stream_options.include_usage if endpoint doesn't support it
if (!profile.supportsStreamUsageOption) {
const streamOptions = result['stream_options']
if (
streamOptions !== null &&
typeof streamOptions === 'object' &&
!Array.isArray(streamOptions)
) {
const { include_usage: _dropped, ...rest } = streamOptions as Record<
string,
unknown
>
if (Object.keys(rest).length === 0) {
delete result['stream_options']
} else {
result['stream_options'] = rest
}
}
}
// Strip 'thinking' field from messages if endpoint doesn't support it
if (!profile.supportsThinkingField && Array.isArray(result['messages'])) {
result['messages'] = (result['messages'] as Record<string, unknown>[]).map(
msg => {
if ('thinking' in msg) {
const { thinking: _dropped, ...rest } = msg
return rest
}
return msg
},
)
}
// Handle reasoning_content echo policy
if (
profile.reasoningContentEcho === 'strip' &&
Array.isArray(result['messages'])
) {
result['messages'] = (result['messages'] as Record<string, unknown>[]).map(
msg => {
if ('reasoning_content' in msg) {
const { reasoning_content: _dropped, ...rest } = msg
return rest
}
return msg
},
)
}
// For 'drop-on-non-thinking': strip reasoning_content unless model name
// indicates a thinking variant (contains 'reason' or 'think' in model string)
if (profile.reasoningContentEcho === 'drop-on-non-thinking') {
const model = typeof result['model'] === 'string' ? result['model'] : ''
const isThinkingModel = /reason|think/i.test(model)
if (!isThinkingModel && Array.isArray(result['messages'])) {
result['messages'] = (
result['messages'] as Record<string, unknown>[]
).map(msg => {
if ('reasoning_content' in msg) {
const { reasoning_content: _dropped, ...rest } = msg
return rest
}
return msg
})
}
}
return result
}

View File

@@ -0,0 +1,111 @@
import { findProvider, loadProviders } from './loader.js'
import type { ProviderConfig } from './types.js'
export interface SwitchProviderResult {
/**
* Environment variables to set before the next session.
* This is informational — the caller must NOT mutate process.env.
* The user copies these into their shell profile.
*/
env: Record<string, string>
/**
* Human-readable warnings (e.g. missing API key in current env).
* Non-fatal: the user can still configure the provider.
*/
warnings: string[]
/**
* The resolved provider config used for this switch.
*/
provider: ProviderConfig
}
/**
* Compute the environment variables needed to activate an OpenAI-compat provider.
*
* Design constraints (from plan):
* - Pure functional: does NOT mutate process.env
* - Calls assertNoAnthropicEnvForOpenAI() at the top to warn on credential
* confusion (ANTHROPIC_API_KEY + OPENAI-compat mode both set)
* - Returns shell export commands the user can paste into their profile
* - Restart required for the env vars to take effect (OpenAI client is cached)
*
* @param id - Provider id (e.g. 'cerebras', 'groq', 'deepseek', 'qwen')
* @param providers - Optional pre-loaded list (defaults to loadProviders())
* @throws {Error} if provider id is not found
*/
export function switchProvider(
id: string,
providers?: ProviderConfig[],
): SwitchProviderResult {
const list = providers ?? loadProviders()
const found = findProvider(id, list)
if (!found) {
const ids = list.map(p => p.id).join(', ')
throw new Error(
`switchProvider: provider "${id}" not found. Available: ${ids}`,
)
}
const env: Record<string, string> = {
CLAUDE_CODE_USE_OPENAI: '1',
OPENAI_BASE_URL: found.baseUrl,
OPENAI_MODEL: found.defaultModel,
// The value is the env var name that holds the key, not the key itself.
// Shell snippet: export OPENAI_API_KEY=$CEREBRAS_API_KEY
// We return the recommended export, but the actual value depends on user env.
}
// Include the api key env var name so callers can construct the shell snippet.
// We do NOT read process.env[found.apiKeyEnv] to avoid leaking the key.
const warnings: string[] = []
// G3: include ANTHROPIC_API_KEY conflict warning in result.warnings (not just logError)
// so that the Ink view (/providers use) can render it to the user rather than losing it
// in a side-channel stderr log.
const hasOpenAIMode =
process.env['CLAUDE_CODE_USE_OPENAI'] === '1' ||
Boolean(process.env['OPENAI_API_KEY'])
const hasAnthropicKey = Boolean(process.env['ANTHROPIC_API_KEY'])
if (hasOpenAIMode && hasAnthropicKey) {
warnings.push(
'Both ANTHROPIC_API_KEY and OpenAI-compat mode are set. ' +
'ANTHROPIC_API_KEY is for Anthropic workspace endpoints (/v1/agents, /v1/vaults). ' +
'OpenAI-compat mode routes /v1/messages to a third-party provider. ' +
'These are separate planes — verify this is intentional.',
)
}
if (!process.env[found.apiKeyEnv]) {
warnings.push(
`${found.apiKeyEnv} is not set in the current environment. ` +
`Set it before starting Claude Code: export ${found.apiKeyEnv}=<your-api-key>`,
)
}
return { env, warnings, provider: found }
}
/**
* Build the shell export block to display to the user.
*
* Example output:
* export CLAUDE_CODE_USE_OPENAI=1
* export OPENAI_BASE_URL=https://api.cerebras.ai/v1
* export OPENAI_API_KEY=$CEREBRAS_API_KEY
* export OPENAI_MODEL=llama-3.3-70b
*
* The API key line uses a variable reference so the actual key is never echoed.
*/
export function buildShellExportBlock(result: SwitchProviderResult): string {
const { env, provider } = result
const lines: string[] = [
`export CLAUDE_CODE_USE_OPENAI=${env['CLAUDE_CODE_USE_OPENAI'] ?? '1'}`,
`export OPENAI_BASE_URL=${env['OPENAI_BASE_URL'] ?? provider.baseUrl}`,
`export OPENAI_API_KEY=$${provider.apiKeyEnv}`,
`export OPENAI_MODEL=${env['OPENAI_MODEL'] ?? provider.defaultModel}`,
]
return lines.join('\n')
}

View File

@@ -0,0 +1,51 @@
import { z } from 'zod'
/**
* Compat rule identifiers. Each maps to a CompatProfile in providerCompatMatrix.ts.
*/
export const CompatRuleSchema = z.enum([
'cerebras',
'groq',
'deepseek',
'strict-openai',
'permissive',
])
export type CompatRule = z.infer<typeof CompatRuleSchema>
/**
* The only supported provider kind for PR-2. Future PR-3+ may add 'oauth', 'bedrock-compat', etc.
*/
export const ProviderKindSchema = z.literal('openai-compat')
export type ProviderKind = z.infer<typeof ProviderKindSchema>
/**
* Zod schema for a single provider configuration entry.
*
* Rules:
* - id: kebab-case identifier used in /provider use <id>
* - kind: only 'openai-compat' in PR-2
* - baseUrl: full base URL including /v1 suffix if needed
* - apiKeyEnv: name of the env var that holds the API key
* - defaultModel: model string passed as OPENAI_MODEL
* - compatRule: selects CompatProfile from providerCompatMatrix
*/
export const ProviderConfigSchema = z.object({
id: z
.string()
.min(1)
.regex(/^[a-z0-9-]+$/, 'id must be kebab-case'),
kind: ProviderKindSchema,
baseUrl: z.string().url(),
apiKeyEnv: z.string().min(1),
defaultModel: z.string().min(1),
compatRule: CompatRuleSchema,
})
export type ProviderConfig = z.infer<typeof ProviderConfigSchema>
/**
* Schema for the entire ~/.claude/providers.json file.
* Top-level must be an array of ProviderConfig.
*/
export const ProvidersFileSchema = z.array(ProviderConfigSchema)

View File

@@ -0,0 +1,465 @@
import {
afterAll,
describe,
test,
expect,
beforeEach,
afterEach,
mock,
} from 'bun:test'
import * as path from 'node:path'
import * as os from 'node:os'
import { homedir } from 'node:os'
import { join } from 'node:path'
import * as fsp from 'node:fs/promises'
// ---------------------------------------------------------------------------
// Mock envUtils so getClaudeConfigHomeDir returns a temp dir while THIS
// suite runs. After it finishes, getClaudeConfigHomeDir falls back to the
// real semantics (process.env.CLAUDE_CONFIG_DIR ?? ~/.claude) so other
// tests in the same process (envUtils.test.ts in particular) don't see
// the test's tmpDir leaked as the user config home.
// ---------------------------------------------------------------------------
let tmpDir = ''
let useMockForCacheStats = true
afterAll(() => {
useMockForCacheStats = false
})
// Provide REAL semantics for every other envUtils export — this mock is
// process-global, so envUtils.test.ts and other consumers (providers,
// model, etc.) running in the same process see real behavior for
// hasNodeOption, isEnvTruthy, isBareMode, parseEnvVars, etc. Only
// getClaudeConfigHomeDir is overridden (to point at the test temp dir).
const VERTEX_REGION_OVERRIDES: ReadonlyArray<[string, string]> = [
['claude-haiku-4-5', 'VERTEX_REGION_CLAUDE_HAIKU_4_5'],
['claude-3-5-haiku', 'VERTEX_REGION_CLAUDE_3_5_HAIKU'],
['claude-3-5-sonnet', 'VERTEX_REGION_CLAUDE_3_5_SONNET'],
['claude-3-7-sonnet', 'VERTEX_REGION_CLAUDE_3_7_SONNET'],
['claude-opus-4-1', 'VERTEX_REGION_CLAUDE_4_1_OPUS'],
['claude-opus-4', 'VERTEX_REGION_CLAUDE_4_0_OPUS'],
['claude-sonnet-4-6', 'VERTEX_REGION_CLAUDE_4_6_SONNET'],
['claude-sonnet-4-5', 'VERTEX_REGION_CLAUDE_4_5_SONNET'],
['claude-sonnet-4', 'VERTEX_REGION_CLAUDE_4_0_SONNET'],
]
const realIsEnvTruthy = (v: string | boolean | undefined): boolean => {
if (!v) return false
if (typeof v === 'boolean') return v
return ['1', 'true', 'yes', 'on'].includes(v.toLowerCase().trim())
}
const realIsEnvDefinedFalsy = (v: string | boolean | undefined): boolean => {
if (v === undefined) return false
if (typeof v === 'boolean') return !v
if (!v) return false
return ['0', 'false', 'no', 'off'].includes(v.toLowerCase().trim())
}
const realDefaultVertexRegion = (): string =>
process.env.CLOUD_ML_REGION || 'us-east5'
// Real getClaudeConfigHomeDir is memoized via lodash, so consumers may call
// `.cache.clear()` on it (see tasks.test.ts). Provide a no-op .cache stub.
const mockedGetClaudeConfigHomeDir: (() => string) & {
cache: { clear: () => void; get: (k: unknown) => unknown }
} = Object.assign(
() =>
useMockForCacheStats
? tmpDir
: (process.env.CLAUDE_CONFIG_DIR ?? join(homedir(), '.claude')).normalize(
'NFC',
),
{
cache: {
clear: () => {},
get: (_k: unknown) => undefined,
},
},
)
mock.module('src/utils/envUtils.js', () => ({
getClaudeConfigHomeDir: mockedGetClaudeConfigHomeDir,
isEnvTruthy: realIsEnvTruthy,
hasNodeOption: (flag: string) => {
const opts = process.env.NODE_OPTIONS
return !!opts && opts.split(/\s+/).includes(flag)
},
isEnvDefinedFalsy: realIsEnvDefinedFalsy,
isBareMode: () =>
realIsEnvTruthy(process.env.CLAUDE_CODE_SIMPLE) ||
process.argv.includes('--bare'),
parseEnvVars: (rawEnvArgs: string[] | undefined) => {
const parsed: Record<string, string> = {}
if (rawEnvArgs) {
for (const envStr of rawEnvArgs) {
const [key, ...valueParts] = envStr.split('=')
if (!key || valueParts.length === 0) {
throw new Error(
`Invalid environment variable format: ${envStr}, environment variables should be added as: -e KEY1=value1 -e KEY2=value2`,
)
}
parsed[key] = valueParts.join('=')
}
}
return parsed
},
getAWSRegion: () =>
process.env.AWS_REGION || process.env.AWS_DEFAULT_REGION || 'us-east-1',
getDefaultVertexRegion: realDefaultVertexRegion,
shouldMaintainProjectWorkingDir: () =>
realIsEnvTruthy(process.env.CLAUDE_BASH_MAINTAIN_PROJECT_WORKING_DIR),
isRunningOnHomespace: () =>
process.env.USER_TYPE === 'ant' &&
realIsEnvTruthy(process.env.COO_RUNNING_ON_HOMESPACE),
isInProtectedNamespace: () => false,
getTeamsDir: () =>
useMockForCacheStats
? `${tmpDir}/teams`
: join(
(
process.env.CLAUDE_CONFIG_DIR ?? join(homedir(), '.claude')
).normalize('NFC'),
'teams',
),
getEnvBool: () => false,
getEnvNumber: () => undefined,
getVertexRegionForModel: (model: string | undefined) => {
if (model) {
const match = VERTEX_REGION_OVERRIDES.find(([prefix]) =>
model.startsWith(prefix),
)
if (match) {
return process.env[match[1]] || realDefaultVertexRegion()
}
}
return realDefaultVertexRegion()
},
}))
import {
computeHitRate,
tokenSignature,
getStateFilePath,
readState,
writeStateAtomic,
type CacheUsage,
type CacheStatsState,
} from '../cacheStats.js'
import {
onResponse,
getCacheStatsState,
initCacheStatsState,
_resetCacheStatsStateForTest,
} from '../cacheStatsState.js'
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
function usage(input: number, create: number, read: number): CacheUsage {
return {
input_tokens: input,
cache_creation_input_tokens: create,
cache_read_input_tokens: read,
}
}
// ---------------------------------------------------------------------------
// computeHitRate
// ---------------------------------------------------------------------------
describe('computeHitRate', () => {
test('returns null for null input', () => {
expect(computeHitRate(null)).toBeNull()
})
test('returns null when all fields are 0 (denominator = 0)', () => {
expect(computeHitRate(usage(0, 0, 0))).toBeNull()
})
test('100% when all tokens are cache reads', () => {
expect(computeHitRate(usage(0, 0, 1000))).toBe(100)
})
test('0% when no cache reads', () => {
expect(computeHitRate(usage(1000, 0, 0))).toBe(0)
})
test('rounds to integer (50%)', () => {
expect(computeHitRate(usage(500, 0, 500))).toBe(50)
})
test('rounds fractional values', () => {
// read=1, total=3 → 33.33... → rounds to 33
expect(computeHitRate(usage(2, 0, 1))).toBe(33)
})
test('handles large numbers without overflow', () => {
const big = 1_000_000_000
expect(computeHitRate(usage(big, big, big))).toBe(33)
})
test('cache_creation does not count as reads', () => {
// Only cache_read_input_tokens in numerator
expect(computeHitRate(usage(0, 1000, 0))).toBe(0)
})
})
// ---------------------------------------------------------------------------
// tokenSignature
// ---------------------------------------------------------------------------
describe('tokenSignature', () => {
test('produces deterministic string', () => {
const u = usage(100, 200, 300)
expect(tokenSignature(u)).toBe('100|200|300')
})
test('changes when input_tokens changes', () => {
expect(tokenSignature(usage(1, 2, 3))).not.toBe(
tokenSignature(usage(9, 2, 3)),
)
})
test('changes when cache_creation changes', () => {
expect(tokenSignature(usage(1, 2, 3))).not.toBe(
tokenSignature(usage(1, 9, 3)),
)
})
test('changes when cache_read changes', () => {
expect(tokenSignature(usage(1, 2, 3))).not.toBe(
tokenSignature(usage(1, 2, 9)),
)
})
})
// ---------------------------------------------------------------------------
// State file: getStateFilePath
// ---------------------------------------------------------------------------
describe('getStateFilePath', () => {
beforeEach(async () => {
tmpDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'cache-stats-test-'))
})
afterEach(async () => {
await fsp.rm(tmpDir, { recursive: true, force: true })
})
test('returns path inside config home dir', () => {
const p = getStateFilePath('session-abc')
expect(p).toContain('cache-stats')
expect(p.startsWith(tmpDir)).toBe(true)
})
test('different sessionIds produce different paths', () => {
const p1 = getStateFilePath('session-one')
const p2 = getStateFilePath('session-two')
expect(p1).not.toBe(p2)
})
test('same sessionId always produces same path (deterministic)', () => {
expect(getStateFilePath('s1')).toBe(getStateFilePath('s1'))
})
test('file name is 16 hex chars + .json', () => {
const p = getStateFilePath('any-session-id')
const base = path.basename(p)
expect(base).toMatch(/^[0-9a-f]{16}\.json$/)
})
})
// ---------------------------------------------------------------------------
// State file: readState / writeStateAtomic
// ---------------------------------------------------------------------------
describe('readState / writeStateAtomic', () => {
beforeEach(async () => {
tmpDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'cache-stats-test-'))
})
afterEach(async () => {
await fsp.rm(tmpDir, { recursive: true, force: true })
})
test('readState returns init defaults when file is missing', async () => {
const p = path.join(tmpDir, 'cache-stats', 'nonexistent.json')
const s = await readState(p)
expect(s.version).toBe(1)
expect(s.signature).toBeNull()
expect(s.lastResetAt).toBeNull()
expect(s.lastHitRate).toBeNull()
})
test('readState returns init defaults on corrupt JSON', async () => {
const p = path.join(tmpDir, 'bad.json')
await fsp.writeFile(p, 'not-json!!!', 'utf8')
const s = await readState(p)
expect(s.signature).toBeNull()
})
test('readState returns init defaults on invalid shape', async () => {
const p = path.join(tmpDir, 'bad-shape.json')
await fsp.writeFile(p, JSON.stringify({ version: 2, foo: 'bar' }), 'utf8')
const s = await readState(p)
expect(s.signature).toBeNull()
})
test('round-trip: writeStateAtomic then readState', async () => {
const p = getStateFilePath('round-trip-session')
const state: CacheStatsState = {
version: 1,
signature: '100|200|300',
lastResetAt: 1_700_000_000_000,
lastHitRate: 75,
}
await writeStateAtomic(p, state)
const read = await readState(p)
expect(read).toEqual(state)
})
test('writeStateAtomic creates parent directory if missing', async () => {
const p = path.join(tmpDir, 'deep', 'nested', 'state.json')
const state: CacheStatsState = {
version: 1,
signature: null,
lastResetAt: null,
lastHitRate: null,
}
await writeStateAtomic(p, state)
const read = await readState(p)
expect(read.version).toBe(1)
})
})
// ---------------------------------------------------------------------------
// onResponse / getCacheStatsState (in-memory singleton)
// ---------------------------------------------------------------------------
describe('onResponse', () => {
beforeEach(async () => {
tmpDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'cache-stats-test-'))
_resetCacheStatsStateForTest()
})
afterEach(async () => {
await fsp.rm(tmpDir, { recursive: true, force: true })
})
test('initial state has null signature and lastResetAt', () => {
const s = getCacheStatsState()
expect(s.signature).toBeNull()
expect(s.lastResetAt).toBeNull()
})
test('first onResponse sets lastResetAt and signature', () => {
const u = usage(100, 0, 50)
const before = Date.now()
const s = onResponse(u)
const after = Date.now()
expect(s.signature).toBe(tokenSignature(u))
expect(s.lastResetAt).toBeGreaterThanOrEqual(before)
expect(s.lastResetAt).toBeLessThanOrEqual(after)
expect(s.lastHitRate).toBe(33) // 50/(100+50) ≈ 33
})
test('same signature does NOT reset lastResetAt', async () => {
const u = usage(100, 0, 50)
onResponse(u)
const firstState = getCacheStatsState()
const firstResetAt = firstState.lastResetAt
// Wait a tick to ensure Date.now() would differ
await new Promise(r => setTimeout(r, 5))
onResponse(u) // same signature
const secondState = getCacheStatsState()
expect(secondState.lastResetAt).toBe(firstResetAt)
})
test('different signature RESETS lastResetAt', async () => {
const u1 = usage(100, 0, 50)
onResponse(u1)
const firstState = getCacheStatsState()
await new Promise(r => setTimeout(r, 5))
const u2 = usage(200, 0, 100) // different signature
onResponse(u2)
const secondState = getCacheStatsState()
expect(secondState.lastResetAt).toBeGreaterThan(firstState.lastResetAt!)
})
test('lastHitRate is updated on signature change', () => {
onResponse(usage(1000, 0, 0)) // 0% hit rate
const s1 = getCacheStatsState()
expect(s1.lastHitRate).toBe(0)
onResponse(usage(0, 0, 1000)) // 100% hit rate — different sig
const s2 = getCacheStatsState()
expect(s2.lastHitRate).toBe(100)
})
})
// ---------------------------------------------------------------------------
// Multi-session isolation
// ---------------------------------------------------------------------------
describe('multi-session file isolation', () => {
beforeEach(async () => {
tmpDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'cache-stats-test-'))
})
afterEach(async () => {
await fsp.rm(tmpDir, { recursive: true, force: true })
})
test('different session IDs produce different state files', async () => {
const p1 = getStateFilePath('session-alpha')
const p2 = getStateFilePath('session-beta')
const s1: CacheStatsState = {
version: 1,
signature: 'sig-alpha',
lastResetAt: 1000,
lastHitRate: 90,
}
const s2: CacheStatsState = {
version: 1,
signature: 'sig-beta',
lastResetAt: 2000,
lastHitRate: 10,
}
await writeStateAtomic(p1, s1)
await writeStateAtomic(p2, s2)
const r1 = await readState(p1)
const r2 = await readState(p2)
expect(r1.signature).toBe('sig-alpha')
expect(r2.signature).toBe('sig-beta')
expect(r1.lastHitRate).toBe(90)
expect(r2.lastHitRate).toBe(10)
})
test('initCacheStatsState loads persisted fallback values', async () => {
_resetCacheStatsStateForTest()
const sid = 'test-session-init'
const p = getStateFilePath(sid)
const persisted: CacheStatsState = {
version: 1,
signature: '500|100|400',
lastResetAt: 1_700_000_000_000,
lastHitRate: 40,
}
await writeStateAtomic(p, persisted)
await initCacheStatsState(sid)
const s = getCacheStatsState()
expect(s.lastHitRate).toBe(40)
expect(s.lastResetAt).toBe(1_700_000_000_000)
expect(s.signature).toBe('500|100|400')
})
})

View File

@@ -1,4 +1,13 @@
import { describe, test, expect, mock, beforeEach, afterEach } from 'bun:test'
import {
afterAll,
afterEach,
beforeAll,
beforeEach,
describe,
expect,
mock,
test,
} from 'bun:test'
// Mock dgram before importing LanBeacon
const mockSocket = {
@@ -13,9 +22,32 @@ const mockSocket = {
close: mock(() => {}),
}
mock.module('dgram', () => ({
createSocket: () => mockSocket,
}))
// Spread+flag pattern: previously this was a bare `mock.module('dgram', ...)`
// which leaked the stub createSocket into every later test file in the
// process via Bun's last-write-wins module mock cache. Spread real dgram
// + gate the stub behind useLanBeaconDgramStubs so other tests see real UDP.
let useLanBeaconDgramStubs = false
mock.module('dgram', () => {
// eslint-disable-next-line @typescript-eslint/no-require-imports
const real = require('dgram') as Record<string, unknown>
return {
...real,
default: real,
createSocket: ((...args: unknown[]) =>
useLanBeaconDgramStubs
? mockSocket
: (real.createSocket as (...a: unknown[]) => unknown)(
...args,
)) as typeof real.createSocket,
}
})
beforeAll(() => {
useLanBeaconDgramStubs = true
})
afterAll(() => {
useLanBeaconDgramStubs = false
})
const { LanBeacon } = await import('../lanBeacon.js')

109
src/utils/cacheStats.ts Normal file
View File

@@ -0,0 +1,109 @@
import { createHash } from 'node:crypto'
import { mkdir, readFile, rename, writeFile } from 'node:fs/promises'
import { dirname, join } from 'node:path'
import { getClaudeConfigHomeDir } from './envUtils.js'
// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------
export interface CacheUsage {
input_tokens: number
cache_creation_input_tokens: number
cache_read_input_tokens: number
}
export interface CacheStatsState {
version: 1
signature: string | null
lastResetAt: number | null // ms epoch; reset when signature changes
lastHitRate: number | null // persisted fallback
}
// ---------------------------------------------------------------------------
// Pure functions
// ---------------------------------------------------------------------------
/**
* Compute integer hit rate (0100) or null if denominator is zero / input null.
*/
export function computeHitRate(u: CacheUsage | null): number | null {
if (!u) return null
const denom =
u.input_tokens + u.cache_creation_input_tokens + u.cache_read_input_tokens
if (denom === 0) return null
return Math.round((u.cache_read_input_tokens / denom) * 100)
}
/**
* Stable string that uniquely identifies a usage snapshot.
* A change in signature means a new API response arrived — reset the TTL clock.
*/
export function tokenSignature(u: CacheUsage): string {
return `${u.input_tokens}|${u.cache_creation_input_tokens}|${u.cache_read_input_tokens}`
}
// ---------------------------------------------------------------------------
// State file I/O
// ---------------------------------------------------------------------------
/**
* Deterministic, short file name derived from sessionId so that:
* - Different sessions never collide.
* - The raw session id is never written to disk.
*/
export function getStateFilePath(sessionId: string): string {
const hash = createHash('sha256').update(sessionId).digest('hex').slice(0, 16)
return join(getClaudeConfigHomeDir(), 'cache-stats', `${hash}.json`)
}
const INIT_STATE: CacheStatsState = {
version: 1,
signature: null,
lastResetAt: null,
lastHitRate: null,
}
function isValidState(obj: unknown): obj is CacheStatsState {
if (typeof obj !== 'object' || obj === null) return false
const s = obj as Record<string, unknown>
return (
s['version'] === 1 &&
(s['signature'] === null || typeof s['signature'] === 'string') &&
(s['lastResetAt'] === null || typeof s['lastResetAt'] === 'number') &&
(s['lastHitRate'] === null || typeof s['lastHitRate'] === 'number')
)
}
/**
* Read state file. Returns init defaults on any error (corrupt, missing, etc.).
*/
export async function readState(filePath: string): Promise<CacheStatsState> {
try {
const raw = await readFile(filePath, 'utf8')
const parsed: unknown = JSON.parse(raw)
if (isValidState(parsed)) return parsed
return { ...INIT_STATE }
} catch {
return { ...INIT_STATE }
}
}
/**
* Write state atomically: write to a tmp file then rename — safe against
* partial-write corruption and concurrent reads.
*/
export async function writeStateAtomic(
filePath: string,
state: CacheStatsState,
): Promise<void> {
const dir = dirname(filePath)
await mkdir(dir, { recursive: true })
const tmp = `${filePath}.${process.pid}.tmp`
try {
await writeFile(tmp, JSON.stringify(state), 'utf8')
await rename(tmp, filePath)
} catch {
// Best-effort; silently ignore errors so the UI never crashes
}
}

View File

@@ -0,0 +1,92 @@
/**
* In-memory singleton that tracks cache hit-rate state for the current session.
*
* Call `onResponse(usage)` every time a new API response arrives.
* The singleton compares the token signature of the new response against the
* previously seen signature. When it changes (= a new API call completed),
* it resets `lastResetAt` to Date.now() and asynchronously persists state so
* that a future session can show the TTL countdown immediately on startup.
*/
import type { CacheUsage, CacheStatsState } from './cacheStats.js'
import {
computeHitRate,
tokenSignature,
getStateFilePath,
readState,
writeStateAtomic,
} from './cacheStats.js'
interface MemState {
signature: string | null
lastResetAt: number | null
lastHitRate: number | null
}
let memState: MemState = {
signature: null,
lastResetAt: null,
lastHitRate: null,
}
let sessionId: string | null = null
/**
* Must be called once at session start so the singleton knows which state file
* to persist to and can pre-load the last known state.
*/
export async function initCacheStatsState(sid: string): Promise<void> {
sessionId = sid
const filePath = getStateFilePath(sid)
const persisted = await readState(filePath)
// Pre-load persisted values so the UI can show fallback immediately
memState = {
signature: persisted.signature,
lastResetAt: persisted.lastResetAt,
lastHitRate: persisted.lastHitRate,
}
}
/**
* Called whenever a new assistant response is received with usage data.
* Returns the updated in-memory state.
*/
export function onResponse(usage: CacheUsage): MemState {
const sig = tokenSignature(usage)
const hitRate = computeHitRate(usage)
if (sig !== memState.signature) {
// New API response — reset the TTL clock
memState = {
signature: sig,
lastResetAt: Date.now(),
lastHitRate: hitRate,
}
// Persist asynchronously; intentionally fire-and-forget
if (sessionId !== null) {
const filePath = getStateFilePath(sessionId)
const toWrite: CacheStatsState = {
version: 1,
signature: sig,
lastResetAt: memState.lastResetAt,
lastHitRate: hitRate,
}
void writeStateAtomic(filePath, toWrite)
}
}
return { ...memState }
}
/** Read current in-memory state without triggering a response update. */
export function getCacheStatsState(): MemState {
return { ...memState }
}
/**
* Reset singleton — used in tests to isolate test runs.
*/
export function _resetCacheStatsStateForTest(): void {
memState = { signature: null, lastResetAt: null, lastHitRate: null }
sessionId = null
}

View File

@@ -222,6 +222,12 @@ export type GlobalConfig = {
rejected?: string[]
}
primaryApiKey?: string // Primary API key for the user when no environment variable is set, set via oauth (TODO: rename)
/**
* Workspace API key saved via /login UI (sk-ant-api03-*).
* Stored in plaintext — file should be gitignored and chmod 600.
* ANTHROPIC_API_KEY env var takes precedence when both are present.
*/
workspaceApiKey?: string
hasAcknowledgedCostThreshold?: boolean
hasSeenUndercoverAutoNotice?: boolean // ant-only: whether the one-time auto-undercover explainer has been shown
hasSeenUltraplanTerms?: boolean // ant-only: whether the one-time CCR terms notice has been shown in the ultraplan launch dialog