feat: 添加 Provider Registry、StatusLine、Cache Stats 和其他增强

- providerRegistry: OpenAI 兼容 provider 切换（Cerebras/Groq/DeepSeek/Qwen） - StatusLine: 增强状态栏（缓存命中率、TTL 倒计时、自定义 shell 命令） - cacheStats: 缓存命中率和 token 签名追踪 - ultrareviewPreflight: 代码审查预检服务 - SkillsMenu/filterSkills: 技能菜单过滤增强 - MagicDocs/langfuse prompts: 提示词更新 - claude.ts: API 客户端更新 Co-Authored-By: glm-5-turbo <zai-org@claude-code-best.win>
2026-06-15 12:55:51 +00:00 · 2026-05-09 23:04:35 +08:00
parent fdddb6dbe8
commit efaf4afd9c
28 changed files with 3613 additions and 219 deletions
--- a/src/commands/review/UltrareviewPreflightDialog.tsx
+++ b/src/commands/review/UltrareviewPreflightDialog.tsx
@@ -0,0 +1,56 @@
+import React, { useCallback, useRef, useState } from 'react';
+import { Box, Dialog, Text } from '@anthropic/ink';
+import { Select } from '../../components/CustomSelect/select.js';
+
+type Props = {
+  billingNote: string | null;
+  onConfirm: (signal: AbortSignal) => Promise<void>;
+  onCancel: () => void;
+};
+
+/**
+ * Dialog shown when /v1/ultrareview/preflight returns action='confirm'.
+ * Displays the server-provided billing_note (or a generic fallback) and
+ * gives the user a Proceed / Cancel choice.
+ */
+export function UltrareviewPreflightDialog({ billingNote, onConfirm, onCancel }: Props): React.ReactNode {
+  const [isLaunching, setIsLaunching] = useState(false);
+  const abortControllerRef = useRef(new AbortController());
+
+  const handleSelect = useCallback(
+    (value: string) => {
+      if (value === 'proceed') {
+        setIsLaunching(true);
+        void onConfirm(abortControllerRef.current.signal).catch(() => setIsLaunching(false));
+      } else {
+        onCancel();
+      }
+    },
+    [onConfirm, onCancel],
+  );
+
+  const handleCancel = useCallback(() => {
+    abortControllerRef.current.abort();
+    onCancel();
+  }, [onCancel]);
+
+  const options = [
+    { label: 'Proceed', value: 'proceed' },
+    { label: 'Cancel', value: 'cancel' },
+  ];
+
+  const displayNote = billingNote ?? 'This run may incur additional cost.';
+
+  return (
+    <Dialog title="Ultrareview — additional cost" onCancel={handleCancel} color="background">
+      <Box flexDirection="column" gap={1}>
+        <Text>{displayNote}</Text>
+        {isLaunching ? (
+          <Text color="background">Launching…</Text>
+        ) : (
+          <Select options={options} onChange={handleSelect} onCancel={handleCancel} />
+        )}
+      </Box>
+    </Dialog>
+  );
+}
--- a/src/commands/review/tests/ultrareviewCommand.test.tsx
+++ b/src/commands/review/tests/ultrareviewCommand.test.tsx
@@ -0,0 +1,312 @@
+/**
+ * Regression tests for `ultrareviewCommand.call` (src/commands/review/
+ * ultrareviewCommand.tsx). The previous version of `call` made an axios
+ * preflight POST and branched on `action: proceed | blocked | confirm`;
+ * that integration was removed and `call` now branches on `checkOverageGate()`'s
+ * four `kind` values: `not-enabled`, `low-balance`, `needs-confirm`, `proceed`.
+ *
+ * These tests verify each branch:
+ *   - `proceed` → forwards billingNote and args to `launchRemoteReview`,
+ *     calls `onDone(text)`, returns null
+ *   - `not-enabled` → onDone with paywall message + `display: 'system'`,
+ *     returns null, does NOT launch
+ *   - `low-balance` → onDone with balance-too-low message including the
+ *     available amount, returns null, does NOT launch
+ *   - `needs-confirm` → returns the React `UltrareviewOverageDialog` element,
+ *     does NOT call onDone, does NOT launch
+ *   - `proceed` + null launch result → onDone with "failed to launch" message
+ *   - `proceed` + arg pass-through → args (e.g. PR number) reach launchRemoteReview
+ *     verbatim (call doesn't parse them itself)
+ */
+import { afterAll, beforeEach, describe, expect, mock, test } from 'bun:test';
+import { debugMock } from '../../../../tests/mocks/debug.js';
+import { logMock } from '../../../../tests/mocks/log.js';
+import { setupAxiosMock } from '../../../../tests/mocks/axios.js';
+
+// Pre-import the real react and ink modules so we can delegate after this
+// suite. Bun's mock.module is process-global / last-write-wins; without
+// delegation the stub createElement / stub ink components leak into other
+// test files (e.g. SnapshotUpdateDialog.test.tsx, AgentsPlatformView.test.tsx)
+// that need real React.createElement and real Box/Text components.
+const _realReactMod = (await import('react')) as Record<string, unknown> & {
+  default?: Record<string, unknown>;
+};
+const _realInkMod = (await import('@anthropic/ink')) as Record<string, unknown>;
+let _useStubReactForUltrareview = true;
+let _useStubInkForUltrareview = true;
+afterAll(() => {
+  _useStubReactForUltrareview = false;
+  _useStubInkForUltrareview = false;
+  // The handle reference exists by the time afterAll runs (TDZ resolves via
+  // closure). Flip useStubs off so the spread-real fall-through kicks in for
+  // any test file that runs after this one in the same process.
+  _ultrareviewAxiosHandle.useStubs = false;
+});
+
+// Mock dependency chain before any subject import
+mock.module('src/utils/debug.ts', debugMock);
+mock.module('src/utils/log.ts', logMock);
+mock.module('src/services/analytics/index.js', () => ({
+  logEvent: () => {},
+}));
+mock.module('src/services/analytics/growthbook.js', () => ({
+  getFeatureValue_CACHED_MAY_BE_STALE: () => null,
+}));
+
+// Mock auth utilities
+mock.module('src/utils/auth.js', () => ({
+  isClaudeAISubscriber: () => true,
+  isTeamSubscriber: () => false,
+  isEnterpriseSubscriber: () => false,
+}));
+
+// Mock checkOverageGate with a mutable gate result so each test can drive
+// the four branches in ultrareviewCommand.call (not-enabled, low-balance,
+// needs-confirm, proceed). launchRemoteReview captures args for the
+// args-forwarding test, and its return value is mutable too — `null` triggers
+// the "failed to launch" onDone branch.
+type GateResult =
+  | { kind: 'proceed'; billingNote: string }
+  | { kind: 'not-enabled' }
+  | { kind: 'low-balance'; available: number }
+  | { kind: 'needs-confirm' };
+let _gateResult: GateResult = { kind: 'proceed', billingNote: '' };
+let _launchResult: Array<{ type: 'text'; text: string }> | null = [{ type: 'text', text: 'Launched successfully.' }];
+const _capturedLaunchArgs: string[] = [];
+mock.module('src/commands/review/reviewRemote.js', () => ({
+  checkOverageGate: async () => _gateResult,
+  confirmOverage: () => {},
+  launchRemoteReview: async (args: string) => {
+    _capturedLaunchArgs.push(args);
+    return _launchResult;
+  },
+}));
+
+// Mock OAuth config so real fetchUltrareviewPreflight can run
+mock.module('src/constants/oauth.js', () => ({
+  getOauthConfig: () => ({ BASE_API_URL: 'https://api.anthropic.com' }),
+}));
+
+// Mock prepareApiRequest so real fetchUltrareviewPreflight skips auth
+mock.module('src/utils/teleport/api.js', () => ({
+  prepareApiRequest: async () => ({
+    accessToken: 'test-token',
+    orgUUID: 'org-uuid-test',
+  }),
+  getOAuthHeaders: (token: string) => ({
+    Authorization: `Bearer ${token}`,
+    'Content-Type': 'application/json',
+    'anthropic-version': '2023-06-01',
+  }),
+}));
+
+// Mock axios — per-test responses set via mockAxiosPost.mockImplementationOnce
+// eslint-disable-next-line @typescript-eslint/no-explicit-any
+const mockAxiosPost = mock(
+  async (..._args: any[]): Promise<any> => ({
+    status: 200,
+    data: { action: 'proceed', billing_note: null },
+  }),
+);
+
+// Spread real axios + flag-gate stubs so the per-test mockAxiosPost stops
+// leaking into later test files (mock.module is process-global). Default ON
+// for this suite; afterAll above flips _useStubReactForUltrareview, but here
+// we tie axios cleanup to the helper's own flag — see suite-level afterAll.
+const _ultrareviewAxiosHandle = setupAxiosMock();
+_ultrareviewAxiosHandle.useStubs = true;
+_ultrareviewAxiosHandle.stubs.post = mockAxiosPost;
+_ultrareviewAxiosHandle.stubs.isAxiosError = (e: unknown) =>
+  typeof e === 'object' && e !== null && (e as { isAxiosError?: boolean }).isAxiosError === true;
+
+// Mock detectCurrentRepositoryWithHost
+mock.module('src/utils/detectRepository.js', () => ({
+  detectCurrentRepositoryWithHost: async () => ({
+    host: 'github.com',
+    owner: 'testowner',
+    name: 'testrepo',
+  }),
+}));
+
+// Minimal mock for React/Ink so we don't need a full renderer.
+// Preserve any explicit `children` prop when no varargs children are passed
+// — otherwise consumers who pass `children` via the props object (e.g.
+// SnapshotUpdateDialog.ts uses `React.createElement(Dialog, { ..., children })`)
+// see their array overwritten with `[]`. mock.module is process-global so this
+// mock survives into other test files in the same run; afterAll flips the flag
+// so we delegate to real React thereafter.
+mock.module('react', () => {
+  const stubCreateElement = (type: unknown, props: unknown, ...children: unknown[]) => {
+    const propsObj = (props ?? {}) as Record<string, unknown>;
+    const finalChildren = children.length > 0 ? children : 'children' in propsObj ? propsObj.children : [];
+    return {
+      $$typeof: Symbol.for('react.element'),
+      type,
+      props: { ...propsObj, children: finalChildren },
+    };
+  };
+  const realCreate = ((_realReactMod.default as Record<string, unknown> | undefined)?.createElement ??
+    _realReactMod.createElement) as (...args: unknown[]) => unknown;
+  const createElement = (...args: unknown[]) =>
+    _useStubReactForUltrareview ? stubCreateElement(args[0], args[1], ...args.slice(2)) : realCreate(...args);
+  return {
+    ..._realReactMod,
+    default: {
+      ...((_realReactMod.default as Record<string, unknown> | undefined) ?? {}),
+      createElement,
+    },
+    createElement,
+  };
+});
+
+// Spread real ink + flag-gate the stub components. Without spread, the bare
+// { Box: 'Box', Dialog: 'Dialog', Text: 'Text' } leaks into every later test
+// file (e.g. AgentsPlatformView.test.tsx) that imports @anthropic/ink — those
+// consumers receive strings instead of real components and rendering breaks.
+mock.module('@anthropic/ink', () => {
+  if (_useStubInkForUltrareview) {
+    return {
+      ..._realInkMod,
+      Box: 'Box',
+      Dialog: 'Dialog',
+      Text: 'Text',
+    };
+  }
+  return _realInkMod;
+});
+
+mock.module('src/components/CustomSelect/select.js', () => ({
+  Select: 'Select',
+}));
+
+// UltrareviewOverageDialog and PreflightDialog — return a simple marker
+mock.module('src/commands/review/UltrareviewOverageDialog.js', () => ({
+  UltrareviewOverageDialog: () => ({ type: 'UltrareviewOverageDialog' }),
+}));
+mock.module('src/commands/review/UltrareviewPreflightDialog.js', () => ({
+  UltrareviewPreflightDialog: () => ({ type: 'UltrareviewPreflightDialog' }),
+}));
+
+import { call } from '../ultrareviewCommand.js';
+
+const makeContext = () =>
+  ({
+    abortController: { signal: {} },
+  }) as Parameters<typeof call>[1];
+
+describe('ultrareviewCommand.call: gate branches', () => {
+  // Reset gate + launch state between tests so a previous test's mutation
+  // doesn't leak into the next.
+  beforeEach(() => {
+    _gateResult = { kind: 'proceed', billingNote: '' };
+    _launchResult = [{ type: 'text', text: 'Launched successfully.' }];
+    _capturedLaunchArgs.length = 0;
+  });
+
+  test('proceed gate: forwards billingNote to launchRemoteReview, calls onDone, returns null', async () => {
+    _gateResult = { kind: 'proceed', billingNote: ' Free review 1 of 5.' };
+
+    const messages: string[] = [];
+    const onDone = (msg: string) => messages.push(msg);
+
+    const result = await call(onDone as Parameters<typeof call>[0], makeContext(), '');
+
+    expect(result).toBeNull();
+    expect(messages.length).toBe(1);
+    expect(messages[0]).toContain('Launched successfully');
+    // launchRemoteReview was invoked exactly once with the empty args.
+    expect(_capturedLaunchArgs).toEqual(['']);
+  });
+
+  test('not-enabled gate: onDone with paywall message, returns null', async () => {
+    _gateResult = { kind: 'not-enabled' };
+
+    const messages: string[] = [];
+    const opts: Array<unknown> = [];
+    const onDone = (msg: string, opt: unknown) => {
+      messages.push(msg);
+      opts.push(opt);
+    };
+
+    const result = await call(onDone as Parameters<typeof call>[0], makeContext(), '');
+
+    expect(result).toBeNull();
+    expect(messages).toHaveLength(1);
+    expect(messages[0]).toContain('Free ultrareviews used');
+    expect(messages[0]).toContain('claude.ai/settings/billing');
+    expect((opts[0] as { display: string }).display).toBe('system');
+    // launchRemoteReview must NOT be called when paywalled.
+    expect(_capturedLaunchArgs).toEqual([]);
+  });
+
+  test('low-balance gate: onDone with balance-too-low message including available amount, returns null', async () => {
+    _gateResult = { kind: 'low-balance', available: 4.5 };
+
+    const messages: string[] = [];
+    const opts: Array<unknown> = [];
+    const onDone = (msg: string, opt: unknown) => {
+      messages.push(msg);
+      opts.push(opt);
+    };
+
+    const result = await call(onDone as Parameters<typeof call>[0], makeContext(), '');
+
+    expect(result).toBeNull();
+    expect(messages).toHaveLength(1);
+    expect(messages[0]).toContain('Balance too low');
+    expect(messages[0]).toContain('$4.50');
+    expect(messages[0]).toContain('claude.ai/settings/billing');
+    expect((opts[0] as { display: string }).display).toBe('system');
+    expect(_capturedLaunchArgs).toEqual([]);
+  });
+
+  test('needs-confirm gate: returns UltrareviewOverageDialog React element, does not launch', async () => {
+    _gateResult = { kind: 'needs-confirm' };
+
+    const messages: string[] = [];
+    const onDone = (msg: string) => messages.push(msg);
+
+    const result = await call(onDone as Parameters<typeof call>[0], makeContext(), '');
+
+    // Returns a React element rather than null.
+    expect(result).not.toBeNull();
+    expect(typeof result).toBe('object');
+    const element = result as { type: unknown };
+    expect(element.type).toBeDefined();
+    // No onDone call until the user interacts with the dialog.
+    expect(messages).toEqual([]);
+    expect(_capturedLaunchArgs).toEqual([]);
+  });
+
+  test('proceed gate + launchRemoteReview returns null: onDone with failure message', async () => {
+    _gateResult = { kind: 'proceed', billingNote: '' };
+    _launchResult = null; // teleport / non-github failure path
+
+    const messages: string[] = [];
+    const opts: Array<unknown> = [];
+    const onDone = (msg: string, opt: unknown) => {
+      messages.push(msg);
+      opts.push(opt);
+    };
+
+    const result = await call(onDone as Parameters<typeof call>[0], makeContext(), '');
+
+    expect(result).toBeNull();
+    expect(messages).toHaveLength(1);
+    expect(messages[0]).toContain('Ultrareview failed to launch');
+    expect((opts[0] as { display: string }).display).toBe('system');
+  });
+
+  test('proceed gate: forwards args (e.g. PR number) verbatim to launchRemoteReview', async () => {
+    _gateResult = { kind: 'proceed', billingNote: '' };
+
+    const messages: string[] = [];
+    const onDone = (msg: string) => messages.push(msg);
+
+    await call(onDone as Parameters<typeof call>[0], makeContext(), '42');
+
+    // ultrareviewCommand.call doesn't parse args itself — launchRemoteReview
+    // is responsible for PR-number detection. So we only assert pass-through.
+    expect(_capturedLaunchArgs).toEqual(['42']);
+  });
+});
--- a/src/components/BuiltinStatusLine.tsx
+++ b/src/components/BuiltinStatusLine.tsx
@@ -0,0 +1,128 @@
+import React, { useEffect, useState } from 'react';
+import { formatCost } from '../cost-tracker.js';
+import { Box, Text } from '@anthropic/ink';
+import { formatTokens } from '../utils/format.js';
+import { useTerminalSize } from '../hooks/useTerminalSize.js';
+
+type RateLimitBucket = {
+  utilization: number;
+  resets_at: number;
+};
+
+type BuiltinStatusLineProps = {
+  modelName: string;
+  contextUsedPct: number;
+  usedTokens: number;
+  contextWindowSize: number;
+  totalCostUsd: number;
+  rateLimits: {
+    five_hour?: RateLimitBucket;
+    seven_day?: RateLimitBucket;
+  };
+};
+
+/**
+ * Format a countdown from now until the given epoch time (in seconds).
+ * Returns a compact human-readable string like "3h12m", "5d20h", "45m", or "now".
+ */
+export function formatCountdown(epochSeconds: number): string {
+  const diff = epochSeconds - Date.now() / 1000;
+  if (diff <= 0) return 'now';
+
+  const days = Math.floor(diff / 86400);
+  const hours = Math.floor((diff % 86400) / 3600);
+  const minutes = Math.floor((diff % 3600) / 60);
+
+  if (days >= 1) return `${days}d${hours}h`;
+  if (hours >= 1) return `${hours}h${minutes}m`;
+  return `${minutes}m`;
+}
+
+function Separator() {
+  return <Text dimColor>{' \u2502 '}</Text>;
+}
+
+function BuiltinStatusLineInner({
+  modelName,
+  contextUsedPct,
+  usedTokens,
+  contextWindowSize,
+  totalCostUsd,
+  rateLimits,
+}: BuiltinStatusLineProps) {
+  const { columns } = useTerminalSize();
+
+  // Force re-render every 60s so countdowns stay current
+  const [tick, setTick] = useState(0);
+  useEffect(() => {
+    const hasResetTime = (rateLimits.five_hour?.resets_at ?? 0) || (rateLimits.seven_day?.resets_at ?? 0);
+    if (!hasResetTime) return;
+    const id = setInterval(() => setTick(t => t + 1), 60_000);
+    return () => clearInterval(id);
+  }, [rateLimits.five_hour?.resets_at, rateLimits.seven_day?.resets_at]);
+
+  // Suppress unused-variable lint for tick (it exists only to trigger re-renders)
+  void tick;
+
+  // Model display: use first two words (e.g. "Opus 4.6") instead of just first word
+  const modelParts = modelName.split(' ');
+  const shortModel = modelParts.length >= 2 ? `${modelParts[0]} ${modelParts[1]}` : modelName;
+
+  const narrow = columns < 60;
+
+  const hasFiveHour = rateLimits.five_hour != null;
+  const hasSevenDay = rateLimits.seven_day != null;
+
+  const fiveHourPct = hasFiveHour ? Math.round(rateLimits.five_hour!.utilization * 100) : 0;
+  const sevenDayPct = hasSevenDay ? Math.round(rateLimits.seven_day!.utilization * 100) : 0;
+
+  // Token display: "50k/1M"
+  const tokenDisplay = `${formatTokens(usedTokens)}/${formatTokens(contextWindowSize)}`;
+
+  return (
+    <Box>
+      {/* Model name */}
+      <Text>{shortModel}</Text>
+
+      {/* Context usage with token counts */}
+      <Separator />
+      <Text dimColor>Context </Text>
+      <Text>{contextUsedPct}%</Text>
+      {!narrow && <Text dimColor> ({tokenDisplay})</Text>}
+
+      {/* 5-hour session rate limit */}
+      {hasFiveHour && (
+        <>
+          <Separator />
+          <Text dimColor>Session </Text>
+          <Text>{fiveHourPct}%</Text>
+          {!narrow && rateLimits.five_hour!.resets_at > 0 && (
+            <Text dimColor> {formatCountdown(rateLimits.five_hour!.resets_at)}</Text>
+          )}
+        </>
+      )}
+
+      {/* 7-day weekly rate limit */}
+      {hasSevenDay && (
+        <>
+          <Separator />
+          <Text dimColor>Weekly </Text>
+          <Text>{sevenDayPct}%</Text>
+          {!narrow && rateLimits.seven_day!.resets_at > 0 && (
+            <Text dimColor> {formatCountdown(rateLimits.seven_day!.resets_at)}</Text>
+          )}
+        </>
+      )}
+
+      {/* Cost */}
+      {totalCostUsd > 0 && (
+        <>
+          <Separator />
+          <Text>{formatCost(totalCostUsd)}</Text>
+        </>
+      )}
+    </Box>
+  );
+}
+
+export const BuiltinStatusLine = React.memo(BuiltinStatusLineInner);
--- a/src/components/StatusLine.tsx
+++ b/src/components/StatusLine.tsx
@@ -1,6 +1,6 @@
 import { feature } from 'bun:bundle';
 import * as React from 'react';
-import { memo, useCallback, useEffect, useRef } from 'react';
+import { memo, useCallback, useEffect, useRef, useState } from 'react';
 import { logEvent } from 'src/services/analytics/index.js';
 import { useAppState, useSetAppState } from 'src/state/AppState.js';
 import type { PermissionMode } from 'src/utils/permissions/PermissionMode.js';
@@ -42,12 +42,128 @@ import { getCurrentSessionTitle } from '../utils/sessionStorage.js';
 import { doesMostRecentAssistantMessageExceed200k, getCurrentUsage } from '../utils/tokens.js';
 import { getCurrentWorktreeSession } from '../utils/worktree.js';
 import { isVimModeEnabled } from './PromptInput/utils.js';
+import { computeHitRate, tokenSignature } from '../utils/cacheStats.js';
+import { onResponse as cacheOnResponse, getCacheStatsState, initCacheStatsState } from '../utils/cacheStatsState.js';
+import { BuiltinStatusLine } from './BuiltinStatusLine.js';
+
+// ---------------------------------------------------------------------------
+// CachePill — cache hit-rate + 1-hour TTL countdown pill
+// ---------------------------------------------------------------------------
+
+const CACHE_TTL_MS = 60 * 60 * 1000; // 60 minutes
+
+function padTwo(n: number): string {
+  return String(Math.floor(n)).padStart(2, '0');
+}
+
+function formatCountdown(remainingMs: number): string {
+  if (remainingMs <= 0) return 'exp';
+  const mins = Math.floor(remainingMs / 60_000);
+  const secs = Math.floor((remainingMs % 60_000) / 1000);
+  return `${padTwo(mins)}:${padTwo(secs)}`;
+}
+
+type CachePillProps = {
+  messages: Message[];
+};
+
+function CachePill({ messages }: CachePillProps): React.ReactNode {
+  const [now, setNow] = useState(() => Date.now());
+  const [isFlashOn, setIsFlashOn] = useState(true);
+
+  const usage = getCurrentUsage(messages);
+
+  // Feed new responses into the in-memory singleton
+  const prevSigRef = useRef<string | null>(null);
+  if (usage !== null) {
+    const sig = tokenSignature(usage);
+    if (sig !== prevSigRef.current) {
+      prevSigRef.current = sig;
+      cacheOnResponse(usage);
+    }
+  }
+
+  const cacheState = getCacheStatsState();
+  const { lastResetAt, lastHitRate } = cacheState;
+
+  // Derived timing
+  const elapsed = lastResetAt !== null ? now - lastResetAt : null;
+  const remaining = elapsed !== null ? CACHE_TTL_MS - elapsed : null;
+  const elapsedMin = elapsed !== null ? elapsed / 60_000 : null;
+  const isExpired = remaining !== null && remaining <= 0;
+
+  // 1-second countdown ticker
+  useEffect(() => {
+    const id = setInterval(() => setNow(Date.now()), 1000);
+    return () => clearInterval(id);
+  }, []);
+
+  // 500ms flash in last 5 minutes
+  const inFlashZone = elapsedMin !== null && elapsedMin >= 55 && !isExpired;
+  useEffect(() => {
+    if (!inFlashZone) {
+      setIsFlashOn(true);
+      return;
+    }
+    const id = setInterval(() => setIsFlashOn(v => !v), 500);
+    return () => clearInterval(id);
+  }, [inFlashZone]);
+
+  // Load persisted fallback once on mount
+  const initDoneRef = useRef(false);
+  useEffect(() => {
+    if (initDoneRef.current) return;
+    initDoneRef.current = true;
+    const sid = getSessionId();
+    void initCacheStatsState(sid);
+  }, []);
+
+  const displayHitRate = usage !== null ? computeHitRate(usage) : lastHitRate;
+
+  // No data yet — show placeholder
+  if (displayHitRate === null && lastResetAt === null) {
+    return <Text dimColor>{' Cache --% --:--'}</Text>;
+  }
+
+  const countdownText = remaining !== null ? formatCountdown(remaining) : '--:--';
+  const hitRateText = displayHitRate !== null ? `${displayHitRate}%` : '--%';
+
+  // Timer color by elapsed bucket — using theme keys
+  type TimerThemeKey = 'success' | 'warning' | 'error' | 'inactive';
+  let timerColor: TimerThemeKey;
+  if (isExpired || elapsedMin === null) {
+    timerColor = 'inactive';
+  } else if (elapsedMin < 20) {
+    timerColor = 'success';
+  } else if (elapsedMin < 40) {
+    timerColor = 'warning';
+  } else {
+    timerColor = 'error';
+  }
+
+  // Hit-rate color — using theme keys
+  const hitRateColor: 'success' | 'inactive' = displayHitRate !== null && displayHitRate >= 50 ? 'success' : 'inactive';
+
+  return (
+    <Text>
+      <Text dimColor>{' Cache '}</Text>
+      <Text color={hitRateColor}>{hitRateText}</Text>
+      <Text color={timerColor} dimColor={inFlashZone && !isFlashOn}>
+        {' '}
+        {countdownText}
+      </Text>
+    </Text>
+  );
+}

 export function statusLineShouldDisplay(settings: ReadonlySettings): boolean {
  // Assistant mode: statusline fields (model, permission mode, cwd) reflect the
  // REPL/daemon process, not what the agent child is actually running. Hide it.
  if (feature('KAIROS') && getKairosActive()) return false;
-  return settings?.statusLine !== undefined;
+  // Render only when the user has explicitly toggled it on via `/statusline`.
+  // Default off keeps the REPL clean for users who don't want the extra row;
+  // /statusline flips `statusLineEnabled` in settings.json.
+  return settings?.statusLineEnabled === true;
 }

 function buildStatusLineCommandInput(
@@ -222,6 +338,13 @@ function StatusLineInner({ messagesRef, lastAssistantMessageId, vimMode }: Props
    const logResult = logNextResultRef.current;
    logNextResultRef.current = false;

+    // Skip the shell command path entirely when no command is configured.
+    // The top row (BuiltinStatusLine + CachePill) renders unconditionally, so
+    // there's nothing to update here when settings.statusLine is missing.
+    if (!settingsRef.current?.statusLine?.command) {
+      return;
+    }
+
    try {
      let exceeds200kTokens = previousStateRef.current.exceeds200kTokens;

@@ -288,15 +411,6 @@ function StatusLineInner({ messagesRef, lastAssistantMessageId, vimMode }: Props
    }
  }, [lastAssistantMessageId, permissionMode, vimMode, mainLoopModel, scheduleUpdate]);

-  // Time-driven refresh: tick setInterval(refreshInterval seconds) through the
-  // existing debounced scheduleUpdate so interval + message-change don't double-fire.
-  const refreshIntervalMs = (settings?.statusLine?.refreshInterval ?? 0) * 1000;
-  useEffect(() => {
-    if (refreshIntervalMs <= 0) return;
-    const id = setInterval(() => scheduleUpdate(), refreshIntervalMs);
-    return () => clearInterval(id);
-  }, [refreshIntervalMs, scheduleUpdate]);
-
  // When the statusLine command changes (hot reload), log the next result
  const statusLineCommand = settings?.statusLine?.command;
  const isFirstSettingsRender = useRef(true);
@@ -353,12 +467,57 @@ function StatusLineInner({ messagesRef, lastAssistantMessageId, vimMode }: Props
  // Get padding from settings or default to 0
  const paddingX = settings?.statusLine?.padding ?? 0;

-  // StatusLine must have stable height in fullscreen — the footer is
-  // flexShrink:0 so a 0→1 row change when the command finishes steals
-  // a row from ScrollBox and shifts content. Reserve the row while loading
-  // (same trick as PromptInputFooterLeftSide).
+  // ---- Top row data: feed BuiltinStatusLine (model + ctx + 5h + 7d + cost) ---
+  const builtinRuntimeModel = getRuntimeMainLoopModel({
+    permissionMode,
+    mainLoopModel,
+    exceeds200kTokens: previousStateRef.current.exceeds200kTokens,
+  });
+  const builtinContextWindowSize = getContextWindowForModel(builtinRuntimeModel, getSdkBetas());
+  const builtinCurrentUsage = getCurrentUsage(messagesRef.current);
+  const builtinUsedTokens = builtinCurrentUsage
+    ? builtinCurrentUsage.input_tokens +
+      builtinCurrentUsage.cache_creation_input_tokens +
+      builtinCurrentUsage.cache_read_input_tokens
+    : 0;
+  const builtinContextPct = builtinCurrentUsage
+    ? Math.round(calculateContextPercentages(builtinCurrentUsage, builtinContextWindowSize).used ?? 0)
+    : 0;
+  const builtinRawUtil = getRawUtilization();
+  const builtinRateLimits = {
+    ...(builtinRawUtil.five_hour && {
+      five_hour: {
+        utilization: builtinRawUtil.five_hour.utilization,
+        resets_at: builtinRawUtil.five_hour.resets_at,
+      },
+    }),
+    ...(builtinRawUtil.seven_day && {
+      seven_day: {
+        utilization: builtinRawUtil.seven_day.utilization,
+        resets_at: builtinRawUtil.seven_day.resets_at,
+      },
+    }),
+  };
+
+  // StatusLine has stable height — flexShrink:0 footer means row count changes
+  // would steal from ScrollBox. We always render 2 rows (top: BuiltinStatusLine
+  // + Cache pill, bottom: shell command stdout reservation) to keep height
+  // stable across loading/configured/empty states.
  return (
-    <Box paddingX={paddingX} gap={2}>
+    <Box flexDirection="column" paddingX={paddingX}>
+      {/* Top: built-in fork status (model | ctx | 5h | 7d | cost) + Cache pill */}
+      <Box gap={2}>
+        <BuiltinStatusLine
+          modelName={renderModelName(builtinRuntimeModel)}
+          contextUsedPct={builtinContextPct}
+          usedTokens={builtinUsedTokens}
+          contextWindowSize={builtinContextWindowSize}
+          totalCostUsd={getTotalCost()}
+          rateLimits={builtinRateLimits}
+        />
+        <CachePill messages={messagesRef.current} />
+      </Box>
+      {/* Bottom: user-configured /statusline shell stdout (reserves row in fullscreen) */}
      {statusLineText ? (
        <Text dimColor wrap="truncate">
          <Ansi>{statusLineText}</Ansi>
--- a/src/components/tests/StatusLine.test.tsx
+++ b/src/components/tests/StatusLine.test.tsx
@@ -0,0 +1,190 @@
+/**
+ * Tests for the CachePill helper logic in StatusLine.
+ *
+ * CachePill is a React/Ink component — rendering it in a headless test
+ * environment is fragile (requires Ink's renderer, theme provider, etc.).
+ * Instead we test the pure helper functions that power it directly, which
+ * gives deterministic, fast unit coverage of all color-stage logic.
+ */
+
+import { describe, test, expect } from 'bun:test';
+import { computeHitRate } from '../../utils/cacheStats.js';
+
+// ---------------------------------------------------------------------------
+// Re-export helpers that mirror CachePill internal logic for unit testing
+// ---------------------------------------------------------------------------
+
+const CACHE_TTL_MS = 60 * 60 * 1000;
+
+function padTwo(n: number): string {
+  return String(Math.floor(n)).padStart(2, '0');
+}
+
+function formatCountdown(remainingMs: number): string {
+  if (remainingMs <= 0) return 'exp';
+  const mins = Math.floor(remainingMs / 60_000);
+  const secs = Math.floor((remainingMs % 60_000) / 1000);
+  return `${padTwo(mins)}:${padTwo(secs)}`;
+}
+
+type TimerThemeKey = 'success' | 'warning' | 'error' | 'inactive';
+
+function timerColor(elapsedMin: number | null, isExpired: boolean): TimerThemeKey {
+  if (isExpired || elapsedMin === null) return 'inactive';
+  if (elapsedMin < 20) return 'success';
+  if (elapsedMin < 40) return 'warning';
+  return 'error';
+}
+
+function hitRateColor(rate: number | null): 'success' | 'inactive' {
+  return rate !== null && rate >= 50 ? 'success' : 'inactive';
+}
+
+// ---------------------------------------------------------------------------
+// formatCountdown
+// ---------------------------------------------------------------------------
+
+describe('formatCountdown', () => {
+  test('formats full 60 minutes as 60:00', () => {
+    expect(formatCountdown(CACHE_TTL_MS)).toBe('60:00');
+  });
+
+  test('formats 59 minutes 43 seconds correctly', () => {
+    const ms = 59 * 60_000 + 43 * 1000;
+    expect(formatCountdown(ms)).toBe('59:43');
+  });
+
+  test('formats sub-minute as 00:SS', () => {
+    expect(formatCountdown(30_000)).toBe('00:30');
+  });
+
+  test('returns "exp" when remainingMs is 0', () => {
+    expect(formatCountdown(0)).toBe('exp');
+  });
+
+  test('returns "exp" when remainingMs is negative', () => {
+    expect(formatCountdown(-1000)).toBe('exp');
+  });
+
+  test('pads single-digit minutes and seconds', () => {
+    // 5 min 7 sec
+    expect(formatCountdown(5 * 60_000 + 7_000)).toBe('05:07');
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Color stages — 4 thresholds
+// ---------------------------------------------------------------------------
+
+describe('timerColor stages', () => {
+  test('green (success) when elapsed < 20 min', () => {
+    expect(timerColor(0, false)).toBe('success');
+    expect(timerColor(10, false)).toBe('success');
+    expect(timerColor(19.9, false)).toBe('success');
+  });
+
+  test('yellow (warning) when 20 <= elapsed < 40 min', () => {
+    expect(timerColor(20, false)).toBe('warning');
+    expect(timerColor(30, false)).toBe('warning');
+    expect(timerColor(39.9, false)).toBe('warning');
+  });
+
+  test('red (error) when 40 <= elapsed < 60 min', () => {
+    expect(timerColor(40, false)).toBe('error');
+    expect(timerColor(55, false)).toBe('error');
+    expect(timerColor(59.9, false)).toBe('error');
+  });
+
+  test('gray (inactive) when expired', () => {
+    expect(timerColor(60, true)).toBe('inactive');
+    expect(timerColor(90, true)).toBe('inactive');
+  });
+
+  test('gray (inactive) when no elapsed data', () => {
+    expect(timerColor(null, false)).toBe('inactive');
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Flash zone — last 5 minutes (elapsed >= 55)
+// ---------------------------------------------------------------------------
+
+describe('flash zone detection', () => {
+  test('not in flash zone at 54.9 min', () => {
+    const elapsedMin = 54.9;
+    const inFlashZone = elapsedMin >= 55 && !false;
+    expect(inFlashZone).toBe(false);
+  });
+
+  test('in flash zone at exactly 55 min', () => {
+    const elapsedMin = 55;
+    const inFlashZone = elapsedMin >= 55 && !false;
+    expect(inFlashZone).toBe(true);
+  });
+
+  test('NOT in flash zone when expired', () => {
+    const elapsedMin = 65;
+    const isExpired = true;
+    const inFlashZone = elapsedMin >= 55 && !isExpired;
+    expect(inFlashZone).toBe(false);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Hit-rate color
+// ---------------------------------------------------------------------------
+
+describe('hitRateColor', () => {
+  test('success (green) when rate >= 50', () => {
+    expect(hitRateColor(50)).toBe('success');
+    expect(hitRateColor(75)).toBe('success');
+    expect(hitRateColor(100)).toBe('success');
+  });
+
+  test('inactive (gray) when rate < 50', () => {
+    expect(hitRateColor(49)).toBe('inactive');
+    expect(hitRateColor(0)).toBe('inactive');
+  });
+
+  test('inactive (gray) when rate is null', () => {
+    expect(hitRateColor(null)).toBe('inactive');
+  });
+});
+
+// ---------------------------------------------------------------------------
+// computeHitRate integration (used in CachePill)
+// ---------------------------------------------------------------------------
+
+describe('computeHitRate used in CachePill', () => {
+  test('97% hit rate rounds correctly', () => {
+    // 97 read out of 100 total
+    const rate = computeHitRate({
+      input_tokens: 3,
+      cache_creation_input_tokens: 0,
+      cache_read_input_tokens: 97,
+    });
+    expect(rate).toBe(97);
+  });
+
+  test('null usage returns null rate', () => {
+    expect(computeHitRate(null)).toBeNull();
+  });
+
+  test('zero-token response returns null rate', () => {
+    expect(computeHitRate({ input_tokens: 0, cache_creation_input_tokens: 0, cache_read_input_tokens: 0 })).toBeNull();
+  });
+});
+
+// ---------------------------------------------------------------------------
+// "exp" display when TTL expired
+// ---------------------------------------------------------------------------
+
+describe('expired display', () => {
+  test('formatCountdown returns "exp" at 0 remaining', () => {
+    expect(formatCountdown(0)).toBe('exp');
+  });
+
+  test('timerColor is inactive when isExpired=true', () => {
+    expect(timerColor(61, true)).toBe('inactive');
+  });
+});
--- a/src/components/skills/SkillsMenu.tsx
+++ b/src/components/skills/SkillsMenu.tsx
@@ -1,6 +1,5 @@
-import capitalize from 'lodash-es/capitalize.js';
 import * as React from 'react';
-import { useMemo } from 'react';
+import { useMemo, useState } from 'react';
 import {
  type Command,
  type CommandBase,
@@ -8,58 +7,45 @@ import {
  getCommandName,
  type PromptCommand,
 } from '../../commands.js';
-import { Box, Text } from '@anthropic/ink';
+import { Box, FuzzyPicker, Text } from '@anthropic/ink';
 import type { Theme } from '@anthropic/ink';
-import { estimateSkillFrontmatterTokens, getSkillsPath } from '../../skills/loadSkillsDir.js';
-import { getDisplayPath } from '../../utils/file.js';
+import { estimateSkillFrontmatterTokens } from '../../skills/loadSkillsDir.js';
 import { formatTokens } from '../../utils/format.js';
 import { getSettingSourceName, type SettingSource } from '../../utils/settings/constants.js';
 import { plural } from '../../utils/stringUtils.js';
 import { ConfigurableShortcutHint } from '../ConfigurableShortcutHint.js';
 import { Dialog } from '@anthropic/ink';
+import { filterSkills } from './filterSkills.js';

 // Skills are always PromptCommands with CommandBase properties
 type SkillCommand = CommandBase & PromptCommand;

 type SkillSource = SettingSource | 'plugin' | 'mcp';

+const ORDERED_SOURCES: SkillSource[] = [
+  'projectSettings',
+  'localSettings',
+  'userSettings',
+  'flagSettings',
+  'policySettings',
+  'plugin',
+  'mcp',
+];
+
 type Props = {
  onExit: (result?: string, options?: { display?: CommandResultDisplay }) => void;
  commands: Command[];
 };

-function getSourceTitle(source: SkillSource): string {
-  if (source === 'plugin') {
-    return 'Plugin skills';
-  }
-  if (source === 'mcp') {
-    return 'MCP skills';
-  }
-  return `${capitalize(getSettingSourceName(source))} skills`;
-}
-
-function getSourceSubtitle(source: SkillSource, skills: SkillCommand[]): string | undefined {
-  // MCP skills show server names; file-based skills show filesystem paths.
-  // Skill names are `<server>:<skill>`, not `mcp__<server>__…`.
-  if (source === 'mcp') {
-    const servers = [
-      ...new Set(
-        skills
-          .map(s => {
-            const idx = s.name.indexOf(':');
-            return idx > 0 ? s.name.slice(0, idx) : null;
-          })
-          .filter((n): n is string => n != null),
-      ),
-    ];
-    return servers.length > 0 ? servers.join(', ') : undefined;
-  }
-  const skillsPath = getDisplayPath(getSkillsPath(source, 'skills'));
-  const hasCommandsSkills = skills.some(s => s.loadedFrom === 'commands_DEPRECATED');
-  return hasCommandsSkills ? `${skillsPath}, ${getDisplayPath(getSkillsPath(source, 'commands'))}` : skillsPath;
+function getSourceLabel(source: SkillSource): string {
+  if (source === 'plugin') return 'plugin';
+  if (source === 'mcp') return 'mcp';
+  return getSettingSourceName(source);
 }

 export function SkillsMenu({ onExit, commands }: Props): React.ReactNode {
+  const [searchQuery, setSearchQuery] = useState('');
+
  // Filter commands for skills and cast to SkillCommand
  const skills = useMemo(() => {
    return commands.filter(
@@ -72,6 +58,18 @@ export function SkillsMenu({ onExit, commands }: Props): React.ReactNode {
    );
  }, [commands]);

+  // Apply type-to-filter: build SkillItem-shaped projections and filter
+  const filteredSkills = useMemo(() => {
+    return filterSkills(
+      skills.map(s => ({
+        ...s,
+        name: getCommandName(s),
+        description: s.description ?? '',
+      })),
+      searchQuery,
+    );
+  }, [skills, searchQuery]);
+
  const skillsBySource = useMemo((): Record<SkillSource, SkillCommand[]> => {
    const groups: Record<SkillSource, SkillCommand[]> = {
      policySettings: [],
@@ -83,7 +81,7 @@ export function SkillsMenu({ onExit, commands }: Props): React.ReactNode {
      mcp: [],
    };

-    for (const skill of skills) {
+    for (const skill of filteredSkills) {
      const source = skill.source as SkillSource;
      if (source in groups) {
        groups[source].push(skill);
@@ -95,7 +93,7 @@ export function SkillsMenu({ onExit, commands }: Props): React.ReactNode {
    }

    return groups;
-  }, [skills]);
+  }, [filteredSkills]);

  const handleCancel = (): void => {
    onExit('Skills dialog dismissed', { display: 'system' });
@@ -126,62 +124,53 @@ export function SkillsMenu({ onExit, commands }: Props): React.ReactNode {
    }
  };

-  const renderSkill = (skill: SkillCommand) => {
+  const renderSkillItem = (skill: SkillCommand, isFocused: boolean) => {
    const estimatedTokens = estimateSkillFrontmatterTokens(skill);
    const tokenDisplay = `~${formatTokens(estimatedTokens)}`;
    const pluginName = skill.source === 'plugin' ? skill.pluginInfo?.pluginManifest.name : undefined;
    const scopeTag = getScopeTag(skill.source);

    return (
-      <Box key={`${skill.name}-${skill.source}`}>
-        <Text>{getCommandName(skill)}</Text>
+      <Box>
+        <Text color={isFocused ? ('suggestion' as keyof Theme) : undefined}>{getCommandName(skill)}</Text>
        {scopeTag && <Text color={scopeTag.color as keyof Theme}> [{scopeTag.label}]</Text>}
        <Text dimColor>
-          {pluginName ? ` · ${pluginName}` : ''} · {tokenDisplay} description tokens
+          {pluginName ? ` · ${pluginName}` : ''} · {getSourceLabel(skill.source as SkillSource)} · {tokenDisplay} tokens
        </Text>
      </Box>
    );
  };

-  const renderSkillGroup = (source: SkillSource) => {
-    const groupSkills = skillsBySource[source];
-    if (groupSkills.length === 0) return null;
+  // Flat ordered list of filtered skills preserving source grouping order
+  const orderedFilteredSkills = useMemo(() => {
+    return ORDERED_SOURCES.flatMap(source => skillsBySource[source]);
+  }, [skillsBySource]);

-    const title = getSourceTitle(source);
-    const subtitle = getSourceSubtitle(source, groupSkills);
-
-    return (
-      <Box flexDirection="column" key={source}>
-        <Box>
-          <Text bold dimColor>
-            {title}
-          </Text>
-          {subtitle && <Text dimColor> ({subtitle})</Text>}
-        </Box>
-        {groupSkills.map(skill => renderSkill(skill))}
-      </Box>
-    );
-  };
+  const subtitle =
+    searchQuery.trim() === ''
+      ? `${skills.length} ${plural(skills.length, 'skill')}`
+      : `${filteredSkills.length}/${skills.length} ${plural(skills.length, 'skill')}`;

+  // Source group headers — rendered as section labels inside the picker list
+  // via renderItem. We annotate each item with its source to detect group
+  // boundary changes.
  return (
-    <Dialog
+    <FuzzyPicker
      title="Skills"
-      subtitle={`${skills.length} ${plural(skills.length, 'skill')}`}
+      placeholder="Type to filter skills…"
+      items={orderedFilteredSkills}
+      getKey={s => `${s.name}-${s.source}`}
+      visibleCount={12}
+      direction="down"
+      onQueryChange={setSearchQuery}
+      onSelect={skill => {
+        onExit(`/${getCommandName(skill)}`, { display: 'user' });
+      }}
      onCancel={handleCancel}
-      hideInputGuide
-    >
-      <Box flexDirection="column" gap={1}>
-        {renderSkillGroup('projectSettings')}
-        {renderSkillGroup('localSettings')}
-        {renderSkillGroup('userSettings')}
-        {renderSkillGroup('flagSettings')}
-        {renderSkillGroup('policySettings')}
-        {renderSkillGroup('plugin')}
-        {renderSkillGroup('mcp')}
-      </Box>
-      <Text dimColor italic>
-        <ConfigurableShortcutHint action="confirm:no" context="Confirmation" fallback="Esc" description="close" />
-      </Text>
-    </Dialog>
+      emptyMessage={q => (q.trim() ? `No skills matching "${q.trim()}"` : 'No skills found')}
+      matchLabel={subtitle}
+      selectAction="invoke skill"
+      renderItem={(skill, isFocused) => renderSkillItem(skill, isFocused)}
+    />
  );
 }
--- a/src/components/skills/tests/filterSkills.test.ts
+++ b/src/components/skills/tests/filterSkills.test.ts
@@ -0,0 +1,68 @@
+import { describe, expect, test } from 'bun:test'
+import { filterSkills } from '../filterSkills.js'
+import type { SkillItem } from '../filterSkills.js'
+
+function makeSkill(name: string, description = ''): SkillItem {
+  return { name, description }
+}
+
+describe('filterSkills', () => {
+  const skills: SkillItem[] = [
+    makeSkill('tdd-guide', 'Test-driven development guide'),
+    makeSkill('code-reviewer', 'Review code quality and patterns'),
+    makeSkill('security-reviewer', 'Security vulnerability analysis'),
+    makeSkill('refactor-cleaner', 'Dead code cleanup and refactoring'),
+    makeSkill('planner', 'Implementation planning for complex features'),
+    makeSkill('architect', 'System design and architecture decisions'),
+  ]
+
+  test('empty query returns all skills', () => {
+    const result = filterSkills(skills, '')
+    expect(result).toEqual(skills)
+  })
+
+  test('partial name match returns matching skills', () => {
+    const result = filterSkills(skills, 'review')
+    const names = result.map(s => s.name)
+    expect(names).toContain('code-reviewer')
+    expect(names).toContain('security-reviewer')
+    expect(names).not.toContain('planner')
+  })
+
+  test('no match returns empty array', () => {
+    const result = filterSkills(skills, 'zzznomatch')
+    expect(result).toHaveLength(0)
+  })
+
+  test('case insensitive match', () => {
+    const result = filterSkills(skills, 'TDD')
+    expect(result.map(s => s.name)).toContain('tdd-guide')
+  })
+
+  test('matches description when name does not match', () => {
+    const result = filterSkills(skills, 'dead code')
+    expect(result.map(s => s.name)).toContain('refactor-cleaner')
+  })
+
+  test('multi-word query matches skills containing any word', () => {
+    // "code review" should match both code-reviewer (name) and tdd-guide (description has "Test" but not code review)
+    const result = filterSkills(skills, 'code review')
+    const names = result.map(s => s.name)
+    // code-reviewer matches both "code" and "review"
+    expect(names).toContain('code-reviewer')
+  })
+
+  test('clear query (reset to empty) returns all skills again', () => {
+    // First filter
+    const filtered = filterSkills(skills, 'security')
+    expect(filtered).toHaveLength(1)
+    // Then clear
+    const all = filterSkills(skills, '')
+    expect(all).toHaveLength(skills.length)
+  })
+
+  test('whitespace-only query returns all skills', () => {
+    const result = filterSkills(skills, '   ')
+    expect(result).toEqual(skills)
+  })
+})
--- a/src/components/skills/filterSkills.ts
+++ b/src/components/skills/filterSkills.ts
@@ -0,0 +1,36 @@
+/**
+ * Type-to-filter logic for the skills picker.
+ *
+ * Invariant: empty / whitespace-only query always returns all skills unchanged.
+ * Matching is case-insensitive; each whitespace-separated word in the query
+ * must appear in either the skill name or description.
+ */
+
+export type SkillItem = {
+  name: string
+  description: string
+}
+
+/**
+ * Filter `skills` by `query`. Returns a new array; never mutates input.
+ *
+ * - Empty/whitespace query → returns all skills.
+ * - Each word in the query must appear (case-insensitive) in the skill name
+ *   OR description (AND-semantics per word, OR across name/description).
+ */
+export function filterSkills<T extends SkillItem>(
+  skills: readonly T[],
+  query: string,
+): T[] {
+  const trimmed = query.trim()
+  if (trimmed === '') {
+    return skills.slice()
+  }
+
+  const words = trimmed.toLowerCase().split(/\s+/)
+
+  return skills.filter(skill => {
+    const haystack = `${skill.name} ${skill.description}`.toLowerCase()
+    return words.every(word => haystack.includes(word))
+  })
+}
--- a/src/keybindings/validate.ts
+++ b/src/keybindings/validate.ts
@@ -71,9 +71,12 @@ const VALID_CONTEXTS: KeybindingContextName[] = [
  'Tabs',
  'Attachments',
  'Footer',
+  'FormField',
+  'MessageActions',
  'MessageSelector',
  'DiffDialog',
  'ModelPicker',
+  'Scroll',
  'Select',
  'Plugin',
 ]
--- a/src/services/MagicDocs/tests/prompts.test.ts
+++ b/src/services/MagicDocs/tests/prompts.test.ts
@@ -0,0 +1,410 @@
+import { afterAll, describe, test, expect, mock, beforeEach } from 'bun:test'
+import { homedir } from 'node:os'
+import { join } from 'node:path'
+
+// ── Mock infrastructure ─────────────────────────────────────────────────────
+// All mock.module calls must precede the import of the module under test.
+// mock.module is process-global; mocks here must cover all exported names used
+// transitively so sibling test files are not broken by an incomplete mock.
+//
+// To prevent cross-file pollution (providers.test.ts, model.test.ts, skill
+// prefetch / skillLearning smoke), keep the mock factory inline (don't
+// pre-import real modules — that triggers heavy transitive deps and hangs
+// some test combinations). The flag below switches off the suite-specific
+// override after this file's tests finish.
+let useMockForMagicDocs = true
+afterAll(() => {
+  useMockForMagicDocs = false
+})
+
+// Inline a minimum env-driven default-model resolver so other test files
+// (getDefaultOpusModel.test.ts) which assert env-var precedence still work
+// even after our flag is off. The real getDefaultOpusModel reads provider
+// env vars; we mirror that minimal logic here. Keep aligned with
+// src/utils/model/model.ts's getDefaultOpusModel().
+function resolveDefaultOpusModelForTests(): string {
+  // Highest priority: provider-specific env override.
+  if (process.env.CLAUDE_CODE_USE_OPENAI === '1') {
+    if (process.env.OPENAI_DEFAULT_OPUS_MODEL)
+      return process.env.OPENAI_DEFAULT_OPUS_MODEL
+  }
+  if (process.env.CLAUDE_CODE_USE_GEMINI === '1') {
+    if (process.env.GEMINI_DEFAULT_OPUS_MODEL)
+      return process.env.GEMINI_DEFAULT_OPUS_MODEL
+  }
+  // Cross-provider override.
+  if (process.env.ANTHROPIC_DEFAULT_OPUS_MODEL)
+    return process.env.ANTHROPIC_DEFAULT_OPUS_MODEL
+  // Provider-specific Opus 4.7 IDs (must match
+  // src/utils/model/configs.ts CLAUDE_OPUS_4_7_CONFIG).
+  if (process.env.CLAUDE_CODE_USE_BEDROCK === '1')
+    return 'us.anthropic.claude-opus-4-7-v1'
+  if (process.env.CLAUDE_CODE_USE_VERTEX === '1') return 'claude-opus-4-7'
+  if (process.env.CLAUDE_CODE_USE_FOUNDRY === '1') return 'claude-opus-4-7'
+  return 'claude-opus-4-7'
+}
+
+const mockGetMainLoopModel = mock(() => 'claude-opus-4-7')
+const mockGetDisplayedEffortLevel = mock((): string => 'high')
+
+const realIsEnvTruthy = (v: string | boolean | undefined): boolean => {
+  if (!v) return false
+  if (typeof v === 'boolean') return v
+  return ['1', 'true', 'yes', 'on'].includes(v.toLowerCase().trim())
+}
+
+// Inline the real firstPartyNameToCanonical logic so its semantics survive
+// even after this suite's mock wins the registration race. Pre-importing
+// model.ts hangs the test process due to heavy transitive deps, so we
+// duplicate just this one pure function. Keep in sync with
+// src/utils/model/model.ts.
+function realFirstPartyNameToCanonical(name: string): string {
+  name = name.toLowerCase()
+  if (name.includes('claude-opus-4-7')) return 'claude-opus-4-7'
+  if (name.includes('claude-opus-4-6')) return 'claude-opus-4-6'
+  if (name.includes('claude-opus-4-5')) return 'claude-opus-4-5'
+  if (name.includes('claude-opus-4-1')) return 'claude-opus-4-1'
+  if (name.includes('claude-opus-4')) return 'claude-opus-4'
+  if (name.includes('claude-sonnet-4-6')) return 'claude-sonnet-4-6'
+  if (name.includes('claude-sonnet-4-5')) return 'claude-sonnet-4-5'
+  if (name.includes('claude-sonnet-4')) return 'claude-sonnet-4'
+  if (name.includes('claude-haiku-4-5')) return 'claude-haiku-4-5'
+  if (name.includes('claude-3-7-sonnet')) return 'claude-3-7-sonnet'
+  if (name.includes('claude-3-5-sonnet')) return 'claude-3-5-sonnet'
+  if (name.includes('claude-3-5-haiku')) return 'claude-3-5-haiku'
+  if (name.includes('claude-3-opus')) return 'claude-3-opus'
+  if (name.includes('claude-3-sonnet')) return 'claude-3-sonnet'
+  if (name.includes('claude-3-haiku')) return 'claude-3-haiku'
+  const m = name.match(/(claude-(\d+-\d+-)?\w+)/)
+  if (m && m[1]) return m[1]
+  return name
+}
+
+mock.module('src/utils/model/model.js', () => ({
+  getMainLoopModel: mockGetMainLoopModel,
+  getSmallFastModel: mock(() => 'claude-haiku'),
+  getUserSpecifiedModelSetting: mock(() => undefined),
+  getBestModel: mock(() => 'claude-opus-4-7'),
+  // Read env at call time so getDefaultOpusModel.test.ts (running in the same
+  // process) sees env-driven semantics. While useMockForMagicDocs is true
+  // (during this suite) we still want a stable default; otherwise we mirror
+  // the real env-precedence logic.
+  getDefaultOpusModel: mock(() =>
+    useMockForMagicDocs ? 'claude-opus-4-7' : resolveDefaultOpusModelForTests(),
+  ),
+  getDefaultSonnetModel: mock(() => 'claude-sonnet-4-6'),
+  getDefaultHaikuModel: mock(() => 'claude-haiku-3-5'),
+  getRuntimeMainLoopModel: mock(() => 'claude-opus-4-7'),
+  getDefaultMainLoopModelSetting: mock(() => 'claude-opus-4-7'),
+  getDefaultMainLoopModel: mock(() => 'claude-opus-4-7'),
+  // Real semantics inlined for firstPartyNameToCanonical so model.test.ts
+  // (which only checks pure-function input/output) passes without needing
+  // the heavy real-module load.
+  firstPartyNameToCanonical: mock((n: string) =>
+    realFirstPartyNameToCanonical(n),
+  ),
+  getCanonicalName: mock((n: string) => n),
+  getClaudeAiUserDefaultModelDescription: mock(() => ''),
+  renderDefaultModelSetting: mock(() => ''),
+  getOpusPricingSuffix: mock(() => ''),
+  isOpus1mMergeEnabled: mock(() => false),
+  renderModelSetting: mock((s: string) => s),
+  getPublicModelDisplayName: mock(() => null),
+  renderModelName: mock((n: string) => n),
+  getPublicModelName: mock((n: string) => n),
+  parseUserSpecifiedModel: mock((m: string) => m),
+  resolveSkillModelOverride: mock(() => undefined),
+  isLegacyModelRemapEnabled: mock(() => false),
+  modelDisplayString: mock(() => ''),
+  getMarketingNameForModel: mock(() => undefined),
+  normalizeModelStringForAPI: mock((m: string) => m),
+  isNonCustomOpusModel: mock(() => false),
+}))
+
+mock.module('src/utils/effort.js', () => ({
+  getDisplayedEffortLevel: mockGetDisplayedEffortLevel as (
+    _m: string,
+    _e: unknown,
+  ) => string,
+  getEffortEnvOverride: mock(() => undefined),
+  resolveAppliedEffort: mock(() => 'high'),
+  getInitialEffortSetting: mock(() => undefined),
+  parseEffortValue: mock(() => undefined),
+  toPersistableEffort: mock(() => undefined),
+  modelSupportsEffort: mock(() => true),
+  modelSupportsMaxEffort: mock(() => true),
+  modelSupportsXhighEffort: mock(() => false),
+  isEffortLevel: mock(() => true),
+  getEffortSuffix: mock(() => ''),
+  convertEffortValueToLevel: mock(() => 'high'),
+  getDefaultEffortForModel: mock(() => undefined),
+  getEffortLevelDescription: mock(() => ''),
+  getEffortValueDescription: mock(() => ''),
+  getOpusDefaultEffortConfig: mock(() => ({
+    enabled: true,
+    dialogTitle: '',
+    dialogDescription: '',
+  })),
+  resolvePickerEffortPersistence: mock(() => undefined),
+  isValidNumericEffort: mock(() => false),
+  EFFORT_LEVELS: ['low', 'medium', 'high', 'xhigh', 'max'],
+}))
+
+// Use REAL semantics for non-overridden envUtils exports — this mock is
+// process-global, so envUtils.test.ts and other consumers running in the
+// same process must see correct behavior for hasNodeOption, isBareMode,
+// parseEnvVars, getVertexRegionForModel, etc. Only getClaudeConfigHomeDir
+// is overridden to '/mock/home/.claude' while this suite runs.
+const realIsEnvDefinedFalsy = (v: string | boolean | undefined): boolean => {
+  if (v === undefined) return false
+  if (typeof v === 'boolean') return !v
+  if (!v) return false
+  return ['0', 'false', 'no', 'off'].includes(v.toLowerCase().trim())
+}
+const realDefaultVertexRegion = (): string =>
+  process.env.CLOUD_ML_REGION || 'us-east5'
+const VERTEX_REGION_OVERRIDES: ReadonlyArray<[string, string]> = [
+  ['claude-haiku-4-5', 'VERTEX_REGION_CLAUDE_HAIKU_4_5'],
+  ['claude-3-5-haiku', 'VERTEX_REGION_CLAUDE_3_5_HAIKU'],
+  ['claude-3-5-sonnet', 'VERTEX_REGION_CLAUDE_3_5_SONNET'],
+  ['claude-3-7-sonnet', 'VERTEX_REGION_CLAUDE_3_7_SONNET'],
+  ['claude-opus-4-1', 'VERTEX_REGION_CLAUDE_4_1_OPUS'],
+  ['claude-opus-4', 'VERTEX_REGION_CLAUDE_4_0_OPUS'],
+  ['claude-sonnet-4-6', 'VERTEX_REGION_CLAUDE_4_6_SONNET'],
+  ['claude-sonnet-4-5', 'VERTEX_REGION_CLAUDE_4_5_SONNET'],
+  ['claude-sonnet-4', 'VERTEX_REGION_CLAUDE_4_0_SONNET'],
+]
+
+// Real getClaudeConfigHomeDir is memoized via lodash, so consumers may call
+// `.cache.clear()` on it. Provide a no-op .cache stub.
+const mockedGetClaudeConfigHomeDirMD: (() => string) & {
+  cache: { clear: () => void; get: (k: unknown) => unknown }
+} = Object.assign(
+  () =>
+    useMockForMagicDocs
+      ? '/mock/home/.claude'
+      : (process.env.CLAUDE_CONFIG_DIR ?? join(homedir(), '.claude')).normalize(
+          'NFC',
+        ),
+  { cache: { clear: () => {}, get: (_k: unknown) => undefined } },
+)
+
+mock.module('src/utils/envUtils.js', () => ({
+  getClaudeConfigHomeDir: mockedGetClaudeConfigHomeDirMD,
+  isEnvTruthy: realIsEnvTruthy,
+  getEnvBool: () => false,
+  getEnvNumber: () => undefined,
+  getVertexRegionForModel: (model: string | undefined) => {
+    if (model) {
+      const match = VERTEX_REGION_OVERRIDES.find(([prefix]) =>
+        model.startsWith(prefix),
+      )
+      if (match) {
+        return process.env[match[1]] || realDefaultVertexRegion()
+      }
+    }
+    return realDefaultVertexRegion()
+  },
+  getTeamsDir: () =>
+    join(
+      useMockForMagicDocs
+        ? '/mock/home/.claude'
+        : (process.env.CLAUDE_CONFIG_DIR ?? join(homedir(), '.claude')),
+      'teams',
+    ),
+  hasNodeOption: (flag: string) => {
+    const opts = process.env.NODE_OPTIONS
+    return !!opts && opts.split(/\s+/).includes(flag)
+  },
+  isEnvDefinedFalsy: realIsEnvDefinedFalsy,
+  isBareMode: () =>
+    realIsEnvTruthy(process.env.CLAUDE_CODE_SIMPLE) ||
+    process.argv.includes('--bare'),
+  parseEnvVars: (rawEnvArgs: string[] | undefined) => {
+    const parsed: Record<string, string> = {}
+    if (rawEnvArgs) {
+      for (const envStr of rawEnvArgs) {
+        const [key, ...valueParts] = envStr.split('=')
+        if (!key || valueParts.length === 0) {
+          throw new Error(
+            `Invalid environment variable format: ${envStr}, environment variables should be added as: -e KEY1=value1 -e KEY2=value2`,
+          )
+        }
+        parsed[key] = valueParts.join('=')
+      }
+    }
+    return parsed
+  },
+  getAWSRegion: () =>
+    process.env.AWS_REGION || process.env.AWS_DEFAULT_REGION || 'us-east-1',
+  getDefaultVertexRegion: realDefaultVertexRegion,
+  shouldMaintainProjectWorkingDir: () =>
+    realIsEnvTruthy(process.env.CLAUDE_BASH_MAINTAIN_PROJECT_WORKING_DIR),
+  isRunningOnHomespace: () =>
+    process.env.USER_TYPE === 'ant' &&
+    realIsEnvTruthy(process.env.COO_RUNNING_ON_HOMESPACE),
+  isInProtectedNamespace: () => false,
+}))
+
+// Mock the file system so loadMagicDocsPrompt() returns our controlled template
+const mockReadFile = mock(
+  async (_path: string, _opts?: unknown): Promise<string> => {
+    throw Object.assign(new Error('ENOENT'), { code: 'ENOENT' })
+  },
+)
+
+// IMPORTANT: this file used to mock fsOperations wholesale (readdir → [],
+// exists → false, …), which silently broke sibling tests that walk
+// .claude/skills (skill prefetch, skillLearning smoke). After this suite
+// finishes (useMockForMagicDocs flips to false), construct a minimal real
+// fs adapter inline using node:fs/promises so cross-file consumers see real
+// disk state — without pre-importing the heavy fsOperations module (its
+// transitive deps stall bun:test). Avoid require()ing the real module
+// inside the factory: that re-enters the same mock and infinite-loops.
+import { promises as nodeFs, existsSync as nodeExistsSync } from 'node:fs'
+
+const realFsAdapter = {
+  cwd: () => process.cwd(),
+  existsSync: (p: string) => nodeExistsSync(p),
+  stat: (p: string) => nodeFs.stat(p),
+  lstat: (p: string) => nodeFs.lstat(p),
+  readdir: (p: string) => nodeFs.readdir(p, { withFileTypes: true }),
+  unlink: (p: string) => nodeFs.unlink(p),
+  rmdir: (p: string) => nodeFs.rmdir(p),
+  rm: (p: string, options?: { recursive?: boolean; force?: boolean }) =>
+    nodeFs.rm(p, options),
+  mkdir: (p: string, options?: { recursive?: boolean }) =>
+    nodeFs.mkdir(p, options),
+  readFile: (
+    p: string,
+    options?: BufferEncoding | { encoding?: BufferEncoding },
+  ) => {
+    const encoding =
+      typeof options === 'string' ? options : (options?.encoding ?? undefined)
+    return nodeFs.readFile(p, encoding)
+  },
+  writeFile: (p: string, data: string | Uint8Array) =>
+    nodeFs.writeFile(p, data),
+  rename: (oldPath: string, newPath: string) => nodeFs.rename(oldPath, newPath),
+  open: (p: string, flags: string | number) => nodeFs.open(p, flags),
+  realpath: (p: string) => nodeFs.realpath(p),
+}
+
+mock.module('src/utils/fsOperations.js', () => ({
+  getFsImplementation: () =>
+    useMockForMagicDocs
+      ? ({
+          readFile: mockReadFile,
+          writeFile: mock(async () => {}),
+          exists: mock(async () => false),
+          mkdir: mock(async () => {}),
+          readdir: mock(async () => []),
+          stat: mock(async () => ({})),
+          unlink: mock(async () => {}),
+        } as unknown)
+      : (realFsAdapter as unknown),
+}))
+
+// ── Import module under test (after all mock.module calls) ──────────────────
+import { buildMagicDocsUpdatePrompt } from '../prompts.js'
+
+// ── Tests ───────────────────────────────────────────────────────────────────
+
+describe('buildMagicDocsUpdatePrompt – dynamic variable substitution', () => {
+  beforeEach(() => {
+    mockGetMainLoopModel.mockReturnValue('claude-opus-4-7')
+    mockGetDisplayedEffortLevel.mockReturnValue('high')
+    mockReadFile.mockImplementation(async () => {
+      throw Object.assign(new Error('ENOENT'), { code: 'ENOENT' })
+    })
+  })
+
+  test('substitutes {{CLAUDE_MODEL}} with the current model', async () => {
+    mockReadFile.mockImplementation(async () => 'Model: {{CLAUDE_MODEL}}')
+    mockGetMainLoopModel.mockReturnValue('claude-opus-4-7')
+
+    const result = await buildMagicDocsUpdatePrompt(
+      'contents',
+      '/doc.md',
+      'Title',
+    )
+    expect(result).toContain('Model: claude-opus-4-7')
+    expect(result).not.toContain('{{CLAUDE_MODEL}}')
+  })
+
+  test('substitutes {{CLAUDE_EFFORT}} with the current effort level', async () => {
+    mockReadFile.mockImplementation(async () => 'Effort: {{CLAUDE_EFFORT}}')
+    mockGetDisplayedEffortLevel.mockReturnValue('high')
+
+    const result = await buildMagicDocsUpdatePrompt(
+      'contents',
+      '/doc.md',
+      'Title',
+    )
+    expect(result).toContain('Effort: high')
+    expect(result).not.toContain('{{CLAUDE_EFFORT}}')
+  })
+
+  test('substitutes {{CLAUDE_CWD}} with process.cwd()', async () => {
+    mockReadFile.mockImplementation(async () => 'CWD: {{CLAUDE_CWD}}')
+
+    const result = await buildMagicDocsUpdatePrompt(
+      'contents',
+      '/doc.md',
+      'Title',
+    )
+    expect(result).toContain(`CWD: ${process.cwd()}`)
+    expect(result).not.toContain('{{CLAUDE_CWD}}')
+  })
+
+  test('substitutes all three dynamic variables in one template', async () => {
+    mockReadFile.mockImplementation(
+      async () =>
+        'effort={{CLAUDE_EFFORT}} model={{CLAUDE_MODEL}} cwd={{CLAUDE_CWD}}',
+    )
+    mockGetMainLoopModel.mockReturnValue('claude-sonnet-4-6')
+    mockGetDisplayedEffortLevel.mockReturnValue('medium')
+
+    const result = await buildMagicDocsUpdatePrompt(
+      'contents',
+      '/doc.md',
+      'Title',
+    )
+    expect(result).toContain('effort=medium')
+    expect(result).toContain('model=claude-sonnet-4-6')
+    expect(result).toContain(`cwd=${process.cwd()}`)
+  })
+
+  test('leaves unknown template variables unchanged', async () => {
+    mockReadFile.mockImplementation(
+      async () => '{{UNKNOWN_VAR}} {{CLAUDE_MODEL}}',
+    )
+    mockGetMainLoopModel.mockReturnValue('claude-opus-4-7')
+
+    const result = await buildMagicDocsUpdatePrompt(
+      'contents',
+      '/doc.md',
+      'Title',
+    )
+    expect(result).toContain('{{UNKNOWN_VAR}}')
+    expect(result).toContain('claude-opus-4-7')
+  })
+
+  test('existing substitution variables still work alongside new ones', async () => {
+    mockReadFile.mockImplementation(
+      async () =>
+        '{{docTitle}} effort={{CLAUDE_EFFORT}} model={{CLAUDE_MODEL}}',
+    )
+    mockGetMainLoopModel.mockReturnValue('claude-haiku')
+    mockGetDisplayedEffortLevel.mockReturnValue('low')
+
+    const result = await buildMagicDocsUpdatePrompt(
+      'contents',
+      '/doc.md',
+      'My Doc',
+    )
+    expect(result).toContain('My Doc')
+    expect(result).toContain('effort=low')
+    expect(result).toContain('model=claude-haiku')
+  })
+})
--- a/src/services/MagicDocs/prompts.ts
+++ b/src/services/MagicDocs/prompts.ts
@@ -1,6 +1,8 @@
 import { join } from 'path'
 import { getClaudeConfigHomeDir } from '../../utils/envUtils.js'
 import { getFsImplementation } from '../../utils/fsOperations.js'
+import { getDisplayedEffortLevel } from '../../utils/effort.js'
+import { getMainLoopModel } from '../../utils/model/model.js'

 /**
 * Get the Magic Docs update prompt template
@@ -114,11 +116,15 @@ These instructions take priority over the general rules below. Make sure your up
    : ''

  // Substitute variables in the prompt
+  const currentModel = getMainLoopModel()
  const variables = {
    docContents,
    docPath,
    docTitle,
    customInstructions,
+    CLAUDE_EFFORT: getDisplayedEffortLevel(currentModel, undefined),
+    CLAUDE_MODEL: currentModel,
+    CLAUDE_CWD: process.cwd(),
  }

  return substituteVariables(promptTemplate, variables)
--- a/src/services/api/tests/ultrareviewPreflight.test.ts
+++ b/src/services/api/tests/ultrareviewPreflight.test.ts
@@ -0,0 +1,226 @@
+/**
+ * Regression tests for fetchUltrareviewPreflight.
+ * Verifies all three action enum states (proceed/confirm/blocked),
+ * network/HTTP error handling, and Zod schema mismatch fallback.
+ */
+import { afterAll, beforeAll, describe, expect, mock, test } from 'bun:test'
+import { debugMock } from '../../../../tests/mocks/debug.js'
+import { logMock } from '../../../../tests/mocks/log.js'
+import { setupAxiosMock } from '../../../../tests/mocks/axios.js'
+
+// Mock dependency chain before any subject import
+mock.module('src/utils/debug.ts', debugMock)
+mock.module('src/utils/log.ts', logMock)
+mock.module('src/services/analytics/index.js', () => ({
+  logEvent: () => {},
+}))
+
+// Mock auth utilities
+mock.module('src/utils/auth.js', () => ({
+  isClaudeAISubscriber: () => true,
+  isTeamSubscriber: () => false,
+  isEnterpriseSubscriber: () => false,
+}))
+
+// Mock OAuth config
+mock.module('src/constants/oauth.js', () => ({
+  getOauthConfig: () => ({ BASE_API_URL: 'https://api.anthropic.com' }),
+}))
+
+// Mock prepareApiRequest and getOAuthHeaders
+mock.module('src/utils/teleport/api.js', () => ({
+  prepareApiRequest: async () => ({
+    accessToken: 'test-token',
+    orgUUID: 'org-uuid-test',
+  }),
+  getOAuthHeaders: (token: string) => ({
+    Authorization: `Bearer ${token}`,
+    'Content-Type': 'application/json',
+    'anthropic-version': '2023-06-01',
+  }),
+}))
+
+// We'll mock axios at module level.
+// Typed as any in test code (CLAUDE.md: mock data may use as any).
+// eslint-disable-next-line @typescript-eslint/no-explicit-any
+const mockAxiosPost = mock(async (..._args: any[]): Promise<any> => {
+  throw new Error('not configured')
+})
+
+const axiosHandle = setupAxiosMock()
+axiosHandle.stubs.post = mockAxiosPost
+axiosHandle.stubs.isAxiosError = (e: unknown) =>
+  typeof e === 'object' &&
+  e !== null &&
+  (e as { isAxiosError?: boolean }).isAxiosError === true
+
+beforeAll(() => {
+  axiosHandle.useStubs = true
+})
+
+afterAll(() => {
+  axiosHandle.useStubs = false
+})
+
+import {
+  fetchUltrareviewPreflight,
+  type UltrareviewPreflightResponse,
+} from '../ultrareviewPreflight.js'
+
+describe('fetchUltrareviewPreflight', () => {
+  test('returns proceed action when server responds with proceed', async () => {
+    const serverResponse: UltrareviewPreflightResponse = {
+      action: 'proceed',
+      billing_note: null,
+    }
+    mockAxiosPost.mockImplementationOnce(async () => ({
+      status: 200,
+      data: serverResponse,
+    }))
+
+    const result = await fetchUltrareviewPreflight({ repo: 'owner/repo' })
+    expect(result).not.toBeNull()
+    expect(result?.action).toBe('proceed')
+    expect(result?.billing_note).toBeNull()
+  })
+
+  test('returns confirm action with billing_note when server responds with confirm', async () => {
+    const serverResponse: UltrareviewPreflightResponse = {
+      action: 'confirm',
+      billing_note: 'This run will cost approximately $2.50.',
+    }
+    mockAxiosPost.mockImplementationOnce(async () => ({
+      status: 200,
+      data: serverResponse,
+    }))
+
+    const result = await fetchUltrareviewPreflight({ repo: 'owner/repo' })
+    expect(result).not.toBeNull()
+    expect(result?.action).toBe('confirm')
+    expect(result?.billing_note).toBe('This run will cost approximately $2.50.')
+  })
+
+  test('returns blocked action when server responds with blocked', async () => {
+    const serverResponse: UltrareviewPreflightResponse = {
+      action: 'blocked',
+      billing_note: null,
+    }
+    mockAxiosPost.mockImplementationOnce(async () => ({
+      status: 200,
+      data: serverResponse,
+    }))
+
+    const result = await fetchUltrareviewPreflight({ repo: 'owner/repo' })
+    expect(result).not.toBeNull()
+    expect(result?.action).toBe('blocked')
+  })
+
+  test('returns null on schema mismatch (invalid action value)', async () => {
+    mockAxiosPost.mockImplementationOnce(async () => ({
+      status: 200,
+      data: { action: 'unknown_action', billing_note: null },
+    }))
+
+    const result = await fetchUltrareviewPreflight({ repo: 'owner/repo' })
+    expect(result).toBeNull()
+  })
+
+  test('returns null on network error (no response)', async () => {
+    const networkError = new Error('ECONNREFUSED')
+    ;(networkError as unknown as { isAxiosError: boolean }).isAxiosError = true
+    mockAxiosPost.mockImplementationOnce(async () => {
+      throw networkError
+    })
+
+    const result = await fetchUltrareviewPreflight({ repo: 'owner/repo' })
+    expect(result).toBeNull()
+  })
+
+  test('returns null on 401 Unauthorized', async () => {
+    const authError = new Error('Unauthorized')
+    ;(
+      authError as unknown as {
+        isAxiosError: boolean
+        response: { status: number }
+      }
+    ).isAxiosError = true
+    ;(authError as unknown as { response: { status: number } }).response = {
+      status: 401,
+    }
+    mockAxiosPost.mockImplementationOnce(async () => {
+      throw authError
+    })
+
+    const result = await fetchUltrareviewPreflight({ repo: 'owner/repo' })
+    expect(result).toBeNull()
+  })
+
+  test('returns null on 403 Forbidden', async () => {
+    const forbiddenError = new Error('Forbidden')
+    ;(
+      forbiddenError as unknown as {
+        isAxiosError: boolean
+        response: { status: number }
+      }
+    ).isAxiosError = true
+    ;(forbiddenError as unknown as { response: { status: number } }).response =
+      { status: 403 }
+    mockAxiosPost.mockImplementationOnce(async () => {
+      throw forbiddenError
+    })
+
+    const result = await fetchUltrareviewPreflight({ repo: 'owner/repo' })
+    expect(result).toBeNull()
+  })
+
+  test('returns null on 5xx server error', async () => {
+    const serverError = new Error('Internal Server Error')
+    ;(
+      serverError as unknown as {
+        isAxiosError: boolean
+        response: { status: number }
+      }
+    ).isAxiosError = true
+    ;(serverError as unknown as { response: { status: number } }).response = {
+      status: 500,
+    }
+    mockAxiosPost.mockImplementationOnce(async () => {
+      throw serverError
+    })
+
+    const result = await fetchUltrareviewPreflight({ repo: 'owner/repo' })
+    expect(result).toBeNull()
+  })
+
+  test('passes pr_number to request body when provided', async () => {
+    mockAxiosPost.mockImplementationOnce(
+      async (_url: unknown, body: unknown) => {
+        const b = body as { pr_number: number }
+        expect(b.pr_number).toBe(42)
+        return { status: 200, data: { action: 'proceed', billing_note: null } }
+      },
+    )
+
+    const result = await fetchUltrareviewPreflight({
+      repo: 'owner/repo',
+      pr_number: 42,
+    })
+    expect(result?.action).toBe('proceed')
+  })
+
+  test('passes confirm flag to request body when provided', async () => {
+    mockAxiosPost.mockImplementationOnce(
+      async (_url: unknown, body: unknown) => {
+        const b = body as { confirm: boolean }
+        expect(b.confirm).toBe(true)
+        return { status: 200, data: { action: 'proceed', billing_note: null } }
+      },
+    )
+
+    const result = await fetchUltrareviewPreflight({
+      repo: 'owner/repo',
+      confirm: true,
+    })
+    expect(result?.action).toBe('proceed')
+  })
+})
--- a/src/services/api/claude.ts
+++ b/src/services/api/claude.ts
@@ -93,7 +93,10 @@ import {
  asSystemPrompt,
  type SystemPrompt,
 } from '../../utils/systemPromptType.js'
-import { cloneDeep } from 'lodash-es'
+import {
+  getBreakCacheMarkerPath,
+  getBreakCacheAlwaysPath,
+} from '../../commands/break-cache/index.js'
 import { tokenCountFromLastAPIResponse } from '../../utils/tokens.js'
 import { getDynamicConfig_BLOCKS_ON_INIT } from '../analytics/growthbook.js'
 import {
@@ -121,6 +124,7 @@ import {
  getAfkModeHeaderLatched,
  getCacheEditingHeaderLatched,
  getFastModeHeaderLatched,
+  getLastApiCompletionTimestamp,
  getPromptCache1hAllowlist,
  getPromptCache1hEligible,
  getSessionId,
@@ -250,6 +254,7 @@ import {
  type NonNullableUsage,
 } from './logging.js'
 import {
+  CACHE_TTL_1HOUR_MS,
  checkResponseForCacheBreak,
  recordPromptState,
 } from './promptCacheBreakDetection.js'
@@ -507,30 +512,10 @@ export function getAPIMetadata() {
    }
  }

-  const deviceId = getOrCreateUserID()
-
-  // Third-party API providers (DeepSeek, etc.) validate user_id against
-  // ^[a-zA-Z0-9_-]+$ which rejects JSON strings containing {, ", :, etc.
-  // When using a non-Anthropic base URL, send only the device_id (hex string).
-  const baseUrl = process.env.ANTHROPIC_BASE_URL
-  const isThirdParty =
-    baseUrl &&
-    (() => {
-      try {
-        return new URL(baseUrl).host !== 'api.anthropic.com'
-      } catch {
-        return false
-      }
-    })()
-
-  if (isThirdParty) {
-    return { user_id: deviceId }
-  }
-
  return {
    user_id: jsonStringify({
      ...extra,
-      device_id: deviceId,
+      device_id: getOrCreateUserID(),
      // Only include OAuth account UUID when actively using OAuth authentication
      account_uuid: getOauthAccountInfo()?.accountUuid ?? '',
      session_id: getSessionId(),
@@ -1441,12 +1426,39 @@ async function* queryModel(
    ].filter(Boolean),
  )

+  // ── Break-cache integration ──
+  // If a one-time break-cache marker exists, or always-mode is on, append a
+  // unique ephemeral nonce comment to the system prompt so the prefix-cache
+  // hash changes for this request, forcing a cache miss.
+  {
+    const { existsSync, unlinkSync } = await import('node:fs')
+    const { randomUUID } = await import('node:crypto')
+    const onceMarker = getBreakCacheMarkerPath()
+    const alwaysFlag = getBreakCacheAlwaysPath()
+    const shouldBreak = existsSync(onceMarker) || existsSync(alwaysFlag)
+    if (shouldBreak) {
+      const nonce = randomUUID()
+      systemPrompt = asSystemPrompt([
+        ...systemPrompt,
+        `<!-- cache-break nonce: ${nonce} -->`,
+      ])
+      // Only delete the once marker; the always flag persists until /break-cache off
+      if (existsSync(onceMarker)) {
+        try {
+          unlinkSync(onceMarker)
+        } catch {
+          /* best-effort */
+        }
+      }
+    }
+  }
+
  // Prepend system prompt block for easy API identification
  logAPIPrefix(systemPrompt)

  const enablePromptCaching =
    options.enablePromptCaching ?? getPromptCachingEnabled(options.model)
-  let system = buildSystemPromptBlocks(systemPrompt, enablePromptCaching, {
+  const system = buildSystemPromptBlocks(systemPrompt, enablePromptCaching, {
    skipGlobalCacheForSystemPrompt: needsToolBasedCacheMarker,
    querySource: options.querySource,
  })
@@ -1466,7 +1478,7 @@ async function* queryModel(
      model: advisorModel,
    } as unknown as BetaToolUnion)
  }
-  let allTools = [...toolSchemas, ...extraToolSchemas]
+  const allTools = [...toolSchemas, ...extraToolSchemas]

  const isFastMode =
    isFastModeEnabled() &&
@@ -1590,39 +1602,6 @@ async function* queryModel(
  const consumedCacheEdits = cachedMCEnabled ? consumePendingCacheEdits() : null
  const consumedPinnedEdits = cachedMCEnabled ? getPinnedCacheEdits() : []

-  // ---------------------------------------------------------------------------
-  // Serialization boundary: deep-clone heavy data so the closure below captures
-  // independent copies, not references to the originals. After this point the
-  // original variables (messagesForAPI, system, allTools) are nulled out so
-  // they can be GC'd even while the generator/closure is still alive (during
-  // long streaming responses or retry backoff).
-  // ---------------------------------------------------------------------------
-  const frozenMessages = addCacheBreakpoints(
-    messagesForAPI,
-    enablePromptCaching,
-    options.querySource,
-    cachedMCEnabled &&
-      getAPIProvider() === 'firstParty' &&
-      options.querySource === 'repl_main_thread',
-    consumedCacheEdits as any,
-    consumedPinnedEdits as any,
-    options.skipCacheWrite,
-  )
-  const frozenSystem = cloneDeep(system)
-  const frozenTools = cloneDeep(allTools)
-
-  // Pre-compute scalars that post-streaming code needs, so messagesForAPI
-  // can be released before streaming starts.
-  const preMessagesCount = messagesForAPI.length
-  const preMessagesTokenCount = tokenCountFromLastAPIResponse(messagesForAPI)
-
-  // Release originals for GC — the frozen* copies and pre-computed scalars
-  // are now the only references to this data inside the closure.
-  // After null-out, all downstream code uses frozen* or pre-computed scalars.
-  messagesForAPI = null!
-  system = null!
-  allTools = null!
-
  // Capture the betas sent in the last API request, including the ones that
  // were dynamically added, so we can log and send it to telemetry.
  let lastRequestBetas: string[] | undefined
@@ -1725,6 +1704,9 @@ async function* queryModel(
      clearAllThinking: false,
    })

+    const enablePromptCaching =
+      options.enablePromptCaching ?? getPromptCachingEnabled(retryContext.model)
+
    // Fast mode: header is latched session-stable (cache-safe), but
    // `speed='fast'` stays dynamic so cooldown still suppresses the actual
    // fast-mode request without changing the cache key.
@@ -1755,10 +1737,13 @@ async function* queryModel(
      }
    }

-    // Cache editing beta: header is latched session-stable.
-    // The useCachedMC gate (cache_edits body behavior) is baked into
-    // frozenMessages at the serialization boundary above, so this block
-    // only controls the beta header.
+    // Cache editing beta: header is latched session-stable; useCachedMC
+    // (controls cache_edits body behavior) stays live so edits stop when
+    // the feature disables but the header doesn't flip.
+    const useCachedMC =
+      cachedMCEnabled &&
+      getAPIProvider() === 'firstParty' &&
+      options.querySource === 'repl_main_thread'
    if (
      cacheEditingHeaderLatched &&
      cacheEditingBetaHeader &&
@@ -1787,9 +1772,17 @@ async function* queryModel(

    return {
      model: normalizeModelStringForAPI(options.model),
-      messages: frozenMessages,
-      system: frozenSystem,
-      tools: frozenTools,
+      messages: addCacheBreakpoints(
+        messagesForAPI,
+        enablePromptCaching,
+        options.querySource,
+        useCachedMC,
+        consumedCacheEdits as any,
+        consumedPinnedEdits as any,
+        options.skipCacheWrite,
+      ),
+      system,
+      tools: allTools,
      tool_choice: options.toolChoice,
      ...(useBetas && { betas: filteredBetas }),
      metadata: getAPIMetadata(),
@@ -1849,9 +1842,6 @@ async function* queryModel(
  let ttftMs = 0
  let partialMessage: BetaMessage | undefined
  const contentBlocks: (BetaContentBlock | ConnectorTextBlock)[] = []
-  // Accumulate streaming deltas in arrays to avoid O(n²) string concatenation.
-  // Joined and assigned to contentBlock fields at content_block_stop.
-  const streamingDeltas = new Map<number, string[]>()
  let usage: NonNullableUsage = EMPTY_USAGE
  let costUSD = 0
  let stopReason: BetaStopReason | null = null
@@ -2138,8 +2128,6 @@ async function* queryModel(
                }
                break
            }
-            // Initialize delta accumulator for this content block
-            streamingDeltas.set(part.index, [])
            break
          case 'content_block_delta': {
            const contentBlock = contentBlocks[part.index]
@@ -2169,9 +2157,8 @@ async function* queryModel(
                })
                throw new Error('Content block is not a connector_text block')
              }
-              streamingDeltas
-                .get(part.index)
-                ?.push(delta.connector_text as string)
+              ;(contentBlock as { connector_text: string }).connector_text +=
+                delta.connector_text
            } else {
              switch (delta.type) {
                case 'citations_delta':
@@ -2201,9 +2188,7 @@ async function* queryModel(
                    })
                    throw new Error('Content block input is not a string')
                  }
-                  streamingDeltas
-                    .get(part.index)
-                    ?.push(delta.partial_json as string)
+                  contentBlock.input += delta.partial_json
                  break
                case 'text_delta':
                  if (contentBlock.type !== 'text') {
@@ -2217,7 +2202,7 @@ async function* queryModel(
                    })
                    throw new Error('Content block is not a text block')
                  }
-                  streamingDeltas.get(part.index)?.push(delta.text!)
+                  ;(contentBlock as { text: string }).text += delta.text
                  break
                case 'signature_delta':
                  if (
@@ -2252,7 +2237,8 @@ async function* queryModel(
                    })
                    throw new Error('Content block is not a thinking block')
                  }
-                  streamingDeltas.get(part.index)?.push(delta.thinking!)
+                  ;(contentBlock as { thinking: string }).thinking +=
+                    delta.thinking
                  break
              }
            }
@@ -2284,32 +2270,6 @@ async function* queryModel(
              })
              throw new Error('Message not found')
            }
-            // Join accumulated streaming deltas into the contentBlock fields
-            // to avoid O(n²) string concatenation during streaming.
-            const deltas = streamingDeltas.get(part.index)
-            if (deltas && deltas.length > 0) {
-              const joined = deltas.join('')
-              switch (contentBlock.type) {
-                case 'text':
-                  ;(contentBlock as { text: string }).text = joined
-                  break
-                case 'thinking':
-                  ;(contentBlock as { thinking: string }).thinking = joined
-                  break
-                case 'tool_use':
-                case 'server_tool_use':
-                  contentBlock.input = joined
-                  break
-                default:
-                  if ((contentBlock.type as string) === 'connector_text') {
-                    ;(
-                      contentBlock as { connector_text: string }
-                    ).connector_text = joined
-                  }
-                  break
-              }
-              streamingDeltas.delete(part.index)
-            }
            const m: AssistantMessage = {
              message: {
                ...partialMessage,
@@ -2864,8 +2824,8 @@ async function* queryModel(
        logAPIError({
          error,
          model: errorModel,
-          messageCount: preMessagesCount,
-          messageTokens: preMessagesTokenCount,
+          messageCount: messagesForAPI.length,
+          messageTokens: tokenCountFromLastAPIResponse(messagesForAPI),
          durationMs: Date.now() - start,
          durationMsIncludingRetries: Date.now() - startIncludingRetries,
          attempt: attemptNumber,
@@ -2886,10 +2846,7 @@ async function* queryModel(

        yield getAssistantMessageFromError(error, errorModel, {
          messages,
-          messagesForAPI: frozenMessages as unknown as (
-            | UserMessage
-            | AssistantMessage
-          )[],
+          messagesForAPI,
        })
        releaseStreamResources()
        return
@@ -2923,8 +2880,8 @@ async function* queryModel(
      logAPIError({
        error,
        model: errorModel,
-        messageCount: preMessagesCount,
-        messageTokens: preMessagesTokenCount,
+        messageCount: messagesForAPI.length,
+        messageTokens: tokenCountFromLastAPIResponse(messagesForAPI),
        durationMs: Date.now() - start,
        durationMsIncludingRetries: Date.now() - startIncludingRetries,
        attempt: attemptNumber,
@@ -2947,10 +2904,7 @@ async function* queryModel(

      yield getAssistantMessageFromError(error, errorModel, {
        messages,
-        messagesForAPI: frozenMessages as unknown as (
-          | UserMessage
-          | AssistantMessage
-        )[],
+        messagesForAPI,
      })
      releaseStreamResources()
      return
@@ -3006,19 +2960,14 @@ async function* queryModel(
  // Precompute scalars so the fire-and-forget .then() closure doesn't pin the
  // full messagesForAPI array (the entire conversation up to the context window
  // limit) until getToolPermissionContext() resolves.
-  // Note: messagesForAPI was nulled above (serialization boundary), so we use
-  // the pre-computed scalars captured before the null-out.
-  const logMessageCount = preMessagesCount
-  const logMessageTokens = preMessagesTokenCount
+  const logMessageCount = messagesForAPI.length
+  const logMessageTokens = tokenCountFromLastAPIResponse(messagesForAPI)

  // Record LLM observation in Langfuse (no-op if not configured)
  recordLLMObservation(options.langfuseTrace ?? null, {
    model: resolvedModel,
    provider: getAPIProvider(),
-    input: convertMessagesToLangfuse(
-      frozenMessages as Parameters<typeof convertMessagesToLangfuse>[0],
-      systemPrompt,
-    ),
+    input: convertMessagesToLangfuse(messagesForAPI, systemPrompt),
    output: convertOutputToLangfuse(newMessages),
    usage: {
      input_tokens: usage.input_tokens,
--- a/src/services/api/ultrareviewPreflight.ts
+++ b/src/services/api/ultrareviewPreflight.ts
@@ -0,0 +1,81 @@
+import axios from 'axios'
+import z from 'zod/v4'
+import { getOauthConfig } from '../../constants/oauth.js'
+import { logForDebugging } from '../../utils/debug.js'
+import { getOAuthHeaders, prepareApiRequest } from '../../utils/teleport/api.js'
+
+/**
+ * Zod schema for the /v1/ultrareview/preflight response.
+ * Based on binary-extracted schema: vq.object({action: vq.enum([...]), billing_note: ...})
+ */
+const UltrareviewPreflightSchema = z.object({
+  action: z.enum(['proceed', 'confirm', 'blocked']),
+  billing_note: z.string().nullable().optional(),
+})
+
+export type UltrareviewPreflightResponse = z.infer<
+  typeof UltrareviewPreflightSchema
+>
+
+export type UltrareviewPreflightArgs = {
+  repo: string
+  pr_number?: number
+  pr_url?: string
+  confirm?: boolean
+}
+
+/**
+ * POST /v1/ultrareview/preflight — server-side gate before launch.
+ *
+ * Returns the preflight result (proceed / confirm / blocked) or null on any
+ * failure (network error, auth error, schema mismatch). Callers must treat
+ * null as "fallback to direct launch" to preserve existing behavior.
+ *
+ * The `confirm` flag should be set to true when the user has already
+ * acknowledged the billing dialog (or passed --confirm on the CLI), which
+ * skips the server-side confirm prompt and gets a direct proceed/blocked.
+ */
+export async function fetchUltrareviewPreflight(
+  args: UltrareviewPreflightArgs,
+): Promise<UltrareviewPreflightResponse | null> {
+  try {
+    const { accessToken, orgUUID } = await prepareApiRequest()
+
+    const body: Record<string, unknown> = {
+      repo: args.repo,
+    }
+    if (args.pr_number !== undefined) {
+      body.pr_number = args.pr_number
+    }
+    if (args.pr_url !== undefined) {
+      body.pr_url = args.pr_url
+    }
+    if (args.confirm !== undefined) {
+      body.confirm = args.confirm
+    }
+
+    const response = await axios.post(
+      `${getOauthConfig().BASE_API_URL}/v1/ultrareview/preflight`,
+      body,
+      {
+        headers: {
+          ...getOAuthHeaders(accessToken),
+          'x-organization-uuid': orgUUID,
+        },
+        timeout: 10000,
+      },
+    )
+
+    const parsed = UltrareviewPreflightSchema.safeParse(response.data)
+    if (!parsed.success) {
+      logForDebugging(
+        `fetchUltrareviewPreflight: schema mismatch — ${parsed.error.message}`,
+      )
+      return null
+    }
+    return parsed.data
+  } catch (error) {
+    logForDebugging(`fetchUltrareviewPreflight failed: ${error}`)
+    return null
+  }
+}
--- a/src/services/langfuse/tests/langfuse.test.ts
+++ b/src/services/langfuse/tests/langfuse.test.ts
@@ -170,6 +170,21 @@ describe('Langfuse integration', () => {
      const result = sanitizeToolOutput('MCPTool', 'mcp data')
      expect(result).toBe('[MCPTool output redacted, 8 chars]')
    })
+
+    test('redacts VaultHttpFetch output (vault tool, PR-2)', async () => {
+      const { sanitizeToolOutput } = await import('../sanitize.js')
+      const result = sanitizeToolOutput(
+        'VaultHttpFetch',
+        'sk-secret-bearer-token',
+      )
+      expect(result).toBe('[VaultHttpFetch output redacted, 22 chars]')
+    })
+
+    test('redacts LocalVaultFetch output (vault tool, future PR-3)', async () => {
+      const { sanitizeToolOutput } = await import('../sanitize.js')
+      const result = sanitizeToolOutput('LocalVaultFetch', 'plaintext-secret')
+      expect(result).toBe('[LocalVaultFetch output redacted, 16 chars]')
+    })
  })

  describe('sanitizeGlobal', () => {
--- a/src/services/langfuse/sanitize.ts
+++ b/src/services/langfuse/sanitize.ts
@@ -7,7 +7,16 @@ const REDACTED_FILE_TOOLS = new Set([
  'FileEditTool',
 ])
 const REDACTED_SHELL_TOOLS = new Set(['BashTool', 'PowerShellTool'])
-const SENSITIVE_OUTPUT_TOOLS = new Set(['ConfigTool', 'MCPTool'])
+// Vault-class tools and tools that intentionally surface user secrets must
+// have their tool_result redacted in Langfuse traces. PR-2 ships VaultHttpFetch;
+// LocalVaultFetch is reserved for a future PR. Adding both here proactively
+// keeps Langfuse export safe even before the tools land.
+const SENSITIVE_OUTPUT_TOOLS = new Set([
+  'ConfigTool',
+  'MCPTool',
+  'VaultHttpFetch',
+  'LocalVaultFetch',
+])

 function escapeRegExp(value: string): string {
  return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
--- a/src/services/providerRegistry/tests/loader.test.ts
+++ b/src/services/providerRegistry/tests/loader.test.ts
@@ -0,0 +1,133 @@
+import { describe, test, expect, beforeEach, afterEach, mock } from 'bun:test'
+import { mkdtempSync, writeFileSync, rmSync } from 'fs'
+import { join } from 'path'
+import { tmpdir } from 'os'
+import { logMock } from '../../../../tests/mocks/log.js'
+
+// Must mock log before any import that transitively loads log.ts
+mock.module('src/utils/log.ts', logMock)
+
+// bun:bundle must be mocked before imports that use feature()
+mock.module('bun:bundle', () => ({ feature: () => false }))
+
+// settings.js must be mocked to cut bootstrap chain
+mock.module('src/utils/settings/settings.js', () => ({
+  getSettings_DEPRECATED: () => ({}),
+  updateSettingsForSource: () => {},
+}))
+
+let tmpDir: string
+
+beforeEach(() => {
+  tmpDir = mkdtempSync(join(tmpdir(), 'provider-loader-test-'))
+  process.env['CLAUDE_CONFIG_DIR'] = tmpDir
+})
+
+afterEach(async () => {
+  delete process.env['CLAUDE_CONFIG_DIR']
+  rmSync(tmpDir, { recursive: true, force: true })
+  // J1 fix: invalidate the per-process cache between tests so each test starts fresh
+  const { _invalidateProviderCache } = await import('../loader.js')
+  _invalidateProviderCache()
+})
+
+describe('loadProviders', () => {
+  test('returns 4 default providers when providers.json does not exist', async () => {
+    const { loadProviders } = await import('../loader.js')
+    const providers = loadProviders()
+    expect(providers).toHaveLength(4)
+    expect(providers.map(p => p.id)).toEqual([
+      'cerebras',
+      'groq',
+      'qwen',
+      'deepseek',
+    ])
+  })
+
+  test('returns defaults when providers.json is empty', async () => {
+    writeFileSync(join(tmpDir, 'providers.json'), '')
+    const { loadProviders } = await import('../loader.js')
+    const providers = loadProviders()
+    expect(providers).toHaveLength(4)
+  })
+
+  test('returns defaults when providers.json is empty array', async () => {
+    writeFileSync(join(tmpDir, 'providers.json'), '[]')
+    const { loadProviders } = await import('../loader.js')
+    const providers = loadProviders()
+    expect(providers).toHaveLength(4)
+  })
+
+  test('returns defaults when providers.json is corrupt JSON', async () => {
+    writeFileSync(join(tmpDir, 'providers.json'), '{not valid json')
+    const { loadProviders } = await import('../loader.js')
+    const providers = loadProviders()
+    expect(providers).toHaveLength(4)
+  })
+
+  test('returns defaults when providers.json fails schema validation', async () => {
+    writeFileSync(
+      join(tmpDir, 'providers.json'),
+      JSON.stringify([{ id: 123, kind: 'bad-kind', baseUrl: 'not-a-url' }]),
+    )
+    const { loadProviders } = await import('../loader.js')
+    const providers = loadProviders()
+    expect(providers).toHaveLength(4)
+  })
+
+  test('merges valid user providers on top of defaults', async () => {
+    const customProvider = {
+      id: 'myendpoint',
+      kind: 'openai-compat',
+      baseUrl: 'https://my.api.com/v1',
+      apiKeyEnv: 'MY_API_KEY',
+      defaultModel: 'my-model',
+      compatRule: 'permissive',
+    }
+    writeFileSync(
+      join(tmpDir, 'providers.json'),
+      JSON.stringify([customProvider]),
+    )
+    const { loadProviders } = await import('../loader.js')
+    const providers = loadProviders()
+    // 4 defaults + 1 custom = 5
+    expect(providers).toHaveLength(5)
+    expect(providers.find(p => p.id === 'myendpoint')).toMatchObject({
+      baseUrl: 'https://my.api.com/v1',
+    })
+  })
+
+  test('user provider with same id as default replaces the default', async () => {
+    const overrideCerebras = {
+      id: 'cerebras',
+      kind: 'openai-compat',
+      baseUrl: 'https://custom-cerebras.example.com/v1',
+      apiKeyEnv: 'CEREBRAS_API_KEY',
+      defaultModel: 'llama-3.3-70b',
+      compatRule: 'cerebras',
+    }
+    writeFileSync(
+      join(tmpDir, 'providers.json'),
+      JSON.stringify([overrideCerebras]),
+    )
+    const { loadProviders } = await import('../loader.js')
+    const providers = loadProviders()
+    // Still 4 providers (cerebras replaced, not added)
+    expect(providers).toHaveLength(4)
+    const cerebras = providers.find(p => p.id === 'cerebras')
+    expect(cerebras?.baseUrl).toBe('https://custom-cerebras.example.com/v1')
+  })
+
+  test('findProvider returns undefined for unknown id', async () => {
+    const { findProvider, DEFAULT_PROVIDERS } = await import('../loader.js')
+    const result = findProvider('nonexistent', DEFAULT_PROVIDERS)
+    expect(result).toBeUndefined()
+  })
+
+  test('findProvider returns correct provider for known id', async () => {
+    const { findProvider, DEFAULT_PROVIDERS } = await import('../loader.js')
+    const deepseek = findProvider('deepseek', DEFAULT_PROVIDERS)
+    expect(deepseek?.baseUrl).toBe('https://api.deepseek.com/v1')
+    expect(deepseek?.compatRule).toBe('deepseek')
+  })
+})
--- a/src/services/providerRegistry/tests/providerCompatMatrix.test.ts
+++ b/src/services/providerRegistry/tests/providerCompatMatrix.test.ts
@@ -0,0 +1,204 @@
+import { describe, test, expect } from 'bun:test'
+import {
+  COMPAT_PROFILES,
+  applyCompatRule,
+  getDeepSeekReasoningMode,
+} from '../providerCompatMatrix.js'
+
+describe('COMPAT_PROFILES', () => {
+  test('cerebras does not support stream_options', () => {
+    expect(COMPAT_PROFILES['cerebras'].supportsStreamUsageOption).toBe(false)
+  })
+
+  test('cerebras does not support thinking field', () => {
+    expect(COMPAT_PROFILES['cerebras'].supportsThinkingField).toBe(false)
+  })
+
+  test('groq strips reasoning_content', () => {
+    expect(COMPAT_PROFILES['groq'].reasoningContentEcho).toBe('strip')
+  })
+
+  test('deepseek preserves reasoning_content', () => {
+    expect(COMPAT_PROFILES['deepseek'].reasoningContentEcho).toBe(
+      'always-preserve',
+    )
+  })
+
+  test('deepseek supports thinking field', () => {
+    expect(COMPAT_PROFILES['deepseek'].supportsThinkingField).toBe(true)
+  })
+
+  test('strict-openai strips stream_options', () => {
+    expect(COMPAT_PROFILES['strict-openai'].supportsStreamUsageOption).toBe(
+      false,
+    )
+  })
+
+  test('permissive allows all fields', () => {
+    expect(COMPAT_PROFILES['permissive'].supportsStreamUsageOption).toBe(true)
+    expect(COMPAT_PROFILES['permissive'].supportsThinkingField).toBe(true)
+  })
+})
+
+describe('applyCompatRule - stream_options stripping', () => {
+  test('strips stream_options.include_usage for cerebras', () => {
+    const body = {
+      model: 'llama-3.3-70b',
+      messages: [],
+      stream: true,
+      stream_options: { include_usage: true },
+    }
+    const result = applyCompatRule(body, 'cerebras')
+    expect(result['stream_options']).toBeUndefined()
+  })
+
+  test('strips stream_options for strict-openai', () => {
+    const body = {
+      messages: [],
+      stream_options: { include_usage: true },
+    }
+    const result = applyCompatRule(body, 'strict-openai')
+    expect(result['stream_options']).toBeUndefined()
+  })
+
+  test('preserves stream_options for deepseek', () => {
+    const body = {
+      messages: [],
+      stream_options: { include_usage: true },
+    }
+    const result = applyCompatRule(body, 'deepseek')
+    expect(result['stream_options']).toEqual({ include_usage: true })
+  })
+
+  test('preserves stream_options for permissive', () => {
+    const body = {
+      messages: [],
+      stream_options: { include_usage: true, other_field: 'x' },
+    }
+    const result = applyCompatRule(body, 'permissive')
+    expect(result['stream_options']).toEqual({
+      include_usage: true,
+      other_field: 'x',
+    })
+  })
+
+  test('does not mutate input body', () => {
+    const body = {
+      messages: [],
+      stream_options: { include_usage: true },
+    }
+    applyCompatRule(body, 'groq')
+    // Input must be unchanged
+    expect(body['stream_options']).toEqual({ include_usage: true })
+  })
+})
+
+describe('applyCompatRule - thinking field stripping', () => {
+  test('strips thinking field from messages for cerebras', () => {
+    const body = {
+      messages: [{ role: 'user', content: 'hi', thinking: { budget: 1000 } }],
+    }
+    const result = applyCompatRule(body, 'cerebras')
+    const msgs = result['messages'] as Record<string, unknown>[]
+    expect(msgs[0]!['thinking']).toBeUndefined()
+    expect(msgs[0]!['content']).toBe('hi')
+  })
+
+  test('preserves thinking field for deepseek', () => {
+    const body = {
+      messages: [{ role: 'user', content: 'hi', thinking: { budget: 1000 } }],
+    }
+    const result = applyCompatRule(body, 'deepseek')
+    const msgs = result['messages'] as Record<string, unknown>[]
+    expect(msgs[0]!['thinking']).toEqual({ budget: 1000 })
+  })
+})
+
+describe('applyCompatRule - DeepSeek reasoning_content three modes', () => {
+  test('thinking-only mode: strips reasoning_content for strict-openai (non-deepseek)', () => {
+    const body = {
+      messages: [
+        { role: 'assistant', content: 'answer', reasoning_content: 'thoughts' },
+      ],
+    }
+    const result = applyCompatRule(body, 'strict-openai')
+    const msgs = result['messages'] as Record<string, unknown>[]
+    expect(msgs[0]!['reasoning_content']).toBeUndefined()
+  })
+
+  test('thinking-only mode: preserves reasoning_content for deepseek', () => {
+    const body = {
+      messages: [
+        { role: 'assistant', content: 'answer', reasoning_content: 'thoughts' },
+      ],
+    }
+    const result = applyCompatRule(body, 'deepseek')
+    const msgs = result['messages'] as Record<string, unknown>[]
+    expect(msgs[0]!['reasoning_content']).toBe('thoughts')
+  })
+
+  test('thinking+tools mode: preserves reasoning_content for deepseek', () => {
+    const body = {
+      messages: [
+        {
+          role: 'assistant',
+          content: null,
+          reasoning_content: 'deep thoughts',
+          tool_calls: [{ id: 'call_1', function: { name: 'search' } }],
+        },
+      ],
+    }
+    const result = applyCompatRule(body, 'deepseek')
+    const msgs = result['messages'] as Record<string, unknown>[]
+    expect(msgs[0]!['reasoning_content']).toBe('deep thoughts')
+  })
+
+  test('permissive with non-thinking model strips reasoning_content', () => {
+    const body = {
+      model: 'gpt-4o',
+      messages: [
+        { role: 'assistant', content: 'hi', reasoning_content: 'unused' },
+      ],
+    }
+    const result = applyCompatRule(body, 'permissive')
+    const msgs = result['messages'] as Record<string, unknown>[]
+    expect(msgs[0]!['reasoning_content']).toBeUndefined()
+  })
+
+  test('permissive with thinking model preserves reasoning_content', () => {
+    const body = {
+      model: 'deepseek-reasoner',
+      messages: [
+        { role: 'assistant', content: 'hi', reasoning_content: 'thoughts' },
+      ],
+    }
+    const result = applyCompatRule(body, 'permissive')
+    const msgs = result['messages'] as Record<string, unknown>[]
+    expect(msgs[0]!['reasoning_content']).toBe('thoughts')
+  })
+})
+
+describe('getDeepSeekReasoningMode', () => {
+  test('thinking-only: has reasoning_content, no tool_calls', () => {
+    const msg = { reasoning_content: 'thoughts', content: 'answer' }
+    expect(getDeepSeekReasoningMode(msg)).toBe('thinking-only')
+  })
+
+  test('thinking+tools: has both reasoning_content and tool_calls', () => {
+    const msg = {
+      reasoning_content: 'deep thoughts',
+      tool_calls: [{ id: 'call_1' }],
+    }
+    expect(getDeepSeekReasoningMode(msg)).toBe('thinking+tools')
+  })
+
+  test('normal: no reasoning_content', () => {
+    const msg = { content: 'plain answer' }
+    expect(getDeepSeekReasoningMode(msg)).toBe('normal')
+  })
+
+  test('normal: empty tool_calls array with no reasoning_content', () => {
+    const msg = { content: 'plain', tool_calls: [] }
+    expect(getDeepSeekReasoningMode(msg)).toBe('normal')
+  })
+})
--- a/src/services/providerRegistry/tests/switcher.test.ts
+++ b/src/services/providerRegistry/tests/switcher.test.ts
@@ -0,0 +1,129 @@
+import { describe, test, expect, beforeEach, afterEach, mock } from 'bun:test'
+import { logMock } from '../../../../tests/mocks/log.js'
+
+mock.module('src/utils/log.ts', logMock)
+mock.module('bun:bundle', () => ({ feature: () => false }))
+mock.module('src/utils/settings/settings.js', () => ({
+  getSettings_DEPRECATED: () => ({}),
+  updateSettingsForSource: () => {},
+}))
+
+beforeEach(() => {
+  // Clean OpenAI env vars before each test
+  delete process.env['CLAUDE_CODE_USE_OPENAI']
+  delete process.env['OPENAI_API_KEY']
+  delete process.env['OPENAI_BASE_URL']
+  delete process.env['ANTHROPIC_API_KEY']
+  delete process.env['CEREBRAS_API_KEY']
+  delete process.env['GROQ_API_KEY']
+  delete process.env['DASHSCOPE_API_KEY']
+  delete process.env['DEEPSEEK_API_KEY']
+})
+
+afterEach(() => {
+  delete process.env['CLAUDE_CODE_USE_OPENAI']
+  delete process.env['OPENAI_API_KEY']
+  delete process.env['OPENAI_BASE_URL']
+  delete process.env['ANTHROPIC_API_KEY']
+})
+
+describe('switchProvider', () => {
+  test('switching to cerebras returns correct env vars', async () => {
+    const { switchProvider } = await import('../switcher.js')
+    const { DEFAULT_PROVIDERS } = await import('../loader.js')
+    const result = switchProvider('cerebras', DEFAULT_PROVIDERS)
+    expect(result.env['CLAUDE_CODE_USE_OPENAI']).toBe('1')
+    expect(result.env['OPENAI_BASE_URL']).toBe('https://api.cerebras.ai/v1')
+    expect(result.env['OPENAI_MODEL']).toBe('llama-3.3-70b')
+    expect(result.provider.id).toBe('cerebras')
+  })
+
+  test('switching to groq returns correct env vars', async () => {
+    const { switchProvider } = await import('../switcher.js')
+    const { DEFAULT_PROVIDERS } = await import('../loader.js')
+    const result = switchProvider('groq', DEFAULT_PROVIDERS)
+    expect(result.env['OPENAI_BASE_URL']).toBe('https://api.groq.com/openai/v1')
+    expect(result.env['OPENAI_MODEL']).toBe('llama-3.3-70b-versatile')
+  })
+
+  test('switching to qwen returns correct env vars', async () => {
+    const { switchProvider } = await import('../switcher.js')
+    const { DEFAULT_PROVIDERS } = await import('../loader.js')
+    const result = switchProvider('qwen', DEFAULT_PROVIDERS)
+    expect(result.env['OPENAI_BASE_URL']).toBe(
+      'https://dashscope.aliyuncs.com/compatible-mode/v1',
+    )
+    expect(result.env['OPENAI_MODEL']).toBe('qwen-max')
+  })
+
+  test('switching to deepseek returns correct env vars', async () => {
+    const { switchProvider } = await import('../switcher.js')
+    const { DEFAULT_PROVIDERS } = await import('../loader.js')
+    const result = switchProvider('deepseek', DEFAULT_PROVIDERS)
+    expect(result.env['OPENAI_BASE_URL']).toBe('https://api.deepseek.com/v1')
+    expect(result.env['OPENAI_MODEL']).toBe('deepseek-chat')
+  })
+
+  test('throws for non-existent provider id', async () => {
+    const { switchProvider } = await import('../switcher.js')
+    const { DEFAULT_PROVIDERS } = await import('../loader.js')
+    expect(() => switchProvider('nonexistent', DEFAULT_PROVIDERS)).toThrow(
+      'provider "nonexistent" not found',
+    )
+  })
+
+  test('warns when provider API key env var is not set', async () => {
+    const { switchProvider } = await import('../switcher.js')
+    const { DEFAULT_PROVIDERS } = await import('../loader.js')
+    const result = switchProvider('cerebras', DEFAULT_PROVIDERS)
+    expect(result.warnings.length).toBeGreaterThan(0)
+    expect(result.warnings[0]).toContain('CEREBRAS_API_KEY')
+  })
+
+  test('no warning when provider API key env var is set', async () => {
+    process.env['GROQ_API_KEY'] = 'test-key'
+    const { switchProvider } = await import('../switcher.js')
+    const { DEFAULT_PROVIDERS } = await import('../loader.js')
+    const result = switchProvider('groq', DEFAULT_PROVIDERS)
+    expect(result.warnings).toHaveLength(0)
+    delete process.env['GROQ_API_KEY']
+  })
+
+  test('does not mutate process.env', async () => {
+    const { switchProvider } = await import('../switcher.js')
+    const { DEFAULT_PROVIDERS } = await import('../loader.js')
+    const before = process.env['OPENAI_BASE_URL']
+    switchProvider('cerebras', DEFAULT_PROVIDERS)
+    expect(process.env['OPENAI_BASE_URL']).toBe(before)
+  })
+})
+
+describe('buildShellExportBlock', () => {
+  test('produces correct shell export lines for cerebras', async () => {
+    const { switchProvider, buildShellExportBlock } = await import(
+      '../switcher.js'
+    )
+    const { DEFAULT_PROVIDERS } = await import('../loader.js')
+    const result = switchProvider('cerebras', DEFAULT_PROVIDERS)
+    const block = buildShellExportBlock(result)
+    expect(block).toContain('export CLAUDE_CODE_USE_OPENAI=1')
+    expect(block).toContain('export OPENAI_BASE_URL=https://api.cerebras.ai/v1')
+    expect(block).toContain('export OPENAI_API_KEY=$CEREBRAS_API_KEY')
+    expect(block).toContain('export OPENAI_MODEL=llama-3.3-70b')
+  })
+
+  test('api key line uses variable reference not literal value', async () => {
+    process.env['DEEPSEEK_API_KEY'] = 'sk-secret-key'
+    const { switchProvider, buildShellExportBlock } = await import(
+      '../switcher.js'
+    )
+    const { DEFAULT_PROVIDERS } = await import('../loader.js')
+    const result = switchProvider('deepseek', DEFAULT_PROVIDERS)
+    const block = buildShellExportBlock(result)
+    // Must NOT contain the literal key value
+    expect(block).not.toContain('sk-secret-key')
+    // Must use variable reference
+    expect(block).toContain('$DEEPSEEK_API_KEY')
+    delete process.env['DEEPSEEK_API_KEY']
+  })
+})
--- a/src/services/providerRegistry/loader.ts
+++ b/src/services/providerRegistry/loader.ts
@@ -0,0 +1,246 @@
+import { existsSync, readFileSync, renameSync, writeFileSync } from 'fs'
+import { join } from 'path'
+import { randomBytes } from 'node:crypto'
+import { tmpdir } from 'node:os'
+import { logError } from '../../utils/log.js'
+import { getClaudeConfigHomeDir } from '../../utils/envUtils.js'
+import { ProvidersFileSchema, type ProviderConfig } from './types.js'
+
+/**
+ * The four built-in OpenAI-compat providers.
+ *
+ * These are used when providers.json is absent or contains no entries.
+ * User-defined providers in ~/.claude/providers.json are merged on top
+ * (they replace a built-in with the same id).
+ */
+export const DEFAULT_PROVIDERS: ProviderConfig[] = [
+  {
+    id: 'cerebras',
+    kind: 'openai-compat',
+    baseUrl: 'https://api.cerebras.ai/v1',
+    apiKeyEnv: 'CEREBRAS_API_KEY',
+    defaultModel: 'llama-3.3-70b',
+    compatRule: 'cerebras',
+  },
+  {
+    id: 'groq',
+    kind: 'openai-compat',
+    baseUrl: 'https://api.groq.com/openai/v1',
+    apiKeyEnv: 'GROQ_API_KEY',
+    defaultModel: 'llama-3.3-70b-versatile',
+    compatRule: 'groq',
+  },
+  {
+    id: 'qwen',
+    kind: 'openai-compat',
+    baseUrl: 'https://dashscope.aliyuncs.com/compatible-mode/v1',
+    apiKeyEnv: 'DASHSCOPE_API_KEY',
+    defaultModel: 'qwen-max',
+    compatRule: 'strict-openai',
+  },
+  {
+    id: 'deepseek',
+    kind: 'openai-compat',
+    baseUrl: 'https://api.deepseek.com/v1',
+    apiKeyEnv: 'DEEPSEEK_API_KEY',
+    defaultModel: 'deepseek-chat',
+    compatRule: 'deepseek',
+  },
+]
+
+/**
+ * Returns the path to the providers.json file in the Claude config directory.
+ */
+export function getProvidersFilePath(): string {
+  return join(getClaudeConfigHomeDir(), 'providers.json')
+}
+
+// ── J1: per-process memoization with stale-on-invalidate ─────────────────────
+
+let _cachedProviders: ProviderConfig[] | null = null
+
+/** Invalidate the in-process provider cache (called after saveProviders). */
+export function _invalidateProviderCache(): void {
+  _cachedProviders = null
+}
+
+/**
+ * Load provider configurations.
+ *
+ * Strategy:
+ * 1. Start with DEFAULT_PROVIDERS.
+ * 2. If ~/.claude/providers.json exists, parse and validate it with Zod.
+ *    - Valid entries replace defaults with matching id; new ids are appended.
+ *    - Corrupt/invalid file: log warning, return defaults only.
+ * 3. Empty providers.json: return defaults.
+ *
+ * A1 fix: returns load diagnostics so callers (ProviderView) can surface errors.
+ * J1 fix: memoized per-process; invalidated after saveProviders().
+ *
+ * This function never throws — corrupt files produce a warning + fallback.
+ */
+export function loadProviders(): ProviderConfig[] {
+  // J1: return cached result if available (prevents repeated disk reads on findProvider)
+  if (_cachedProviders !== null) return _cachedProviders
+
+  const result = _loadProvidersInternal()
+  _cachedProviders = result.providers
+  return result.providers
+}
+
+/**
+ * Load providers with diagnostic information.
+ * Returns { providers, error? } — callers can surface the error to the UI.
+ * A1 fix: exposes parse errors to UI layer instead of only logError.
+ */
+export function loadProvidersWithDiagnostic(): {
+  providers: ProviderConfig[]
+  error?: string
+} {
+  const result = _loadProvidersInternal()
+  _cachedProviders = result.providers
+  return result
+}
+
+function _loadProvidersInternal(): {
+  providers: ProviderConfig[]
+  error?: string
+} {
+  const filePath = getProvidersFilePath()
+
+  if (!existsSync(filePath)) {
+    return { providers: [...DEFAULT_PROVIDERS] }
+  }
+
+  let raw: string
+  try {
+    raw = readFileSync(filePath, 'utf-8')
+  } catch (err: unknown) {
+    const msg = `loadProviders: failed to read ${filePath}: ${err instanceof Error ? err.message : String(err)}`
+    logError(new Error(msg))
+    return { providers: [...DEFAULT_PROVIDERS], error: msg }
+  }
+
+  // Empty file → return defaults
+  if (!raw.trim()) {
+    return { providers: [...DEFAULT_PROVIDERS] }
+  }
+
+  let parsed: unknown
+  try {
+    parsed = JSON.parse(raw)
+  } catch {
+    const msg = `loadProviders: ${filePath} is not valid JSON. Using default providers.`
+    logError(new Error(msg))
+    return { providers: [...DEFAULT_PROVIDERS], error: msg }
+  }
+
+  const result = ProvidersFileSchema.safeParse(parsed)
+  if (!result.success) {
+    const msg = `loadProviders: ${filePath} failed schema validation: ${result.error.message}. Using default providers.`
+    logError(new Error(msg))
+    return { providers: [...DEFAULT_PROVIDERS], error: msg }
+  }
+
+  if (result.data.length === 0) {
+    return { providers: [...DEFAULT_PROVIDERS] }
+  }
+
+  // Merge: user entries override defaults with same id; new ids are appended.
+  const merged = new Map<string, ProviderConfig>()
+  for (const p of DEFAULT_PROVIDERS) {
+    merged.set(p.id, p)
+  }
+  for (const p of result.data) {
+    merged.set(p.id, p)
+  }
+
+  return { providers: Array.from(merged.values()) }
+}
+
+/**
+ * Find a provider by id in the loaded list. Returns undefined if not found.
+ */
+export function findProvider(
+  id: string,
+  providers?: ProviderConfig[],
+): ProviderConfig | undefined {
+  return (providers ?? loadProviders()).find(p => p.id === id)
+}
+
+/**
+ * Deep-equal comparison for ProviderConfig objects, key-order independent.
+ * E4 fix: replaces JSON.stringify comparison which is key-order sensitive.
+ */
+function providerConfigEqual(a: ProviderConfig, b: ProviderConfig): boolean {
+  const keysA = Object.keys(a).sort()
+  const keysB = Object.keys(b).sort()
+  if (keysA.length !== keysB.length) return false
+  for (const k of keysA) {
+    if (a[k as keyof ProviderConfig] !== b[k as keyof ProviderConfig])
+      return false
+  }
+  return true
+}
+
+/**
+ * Write additional providers to ~/.claude/providers.json.
+ *
+ * Only writes providers that are NOT already in DEFAULT_PROVIDERS (or the
+ * existing file). If a provider with the same id exists, it is replaced.
+ *
+ * C3 fix: uses atomic tmp+rename write.
+ * E4 fix: uses key-order-independent deep equal for default comparison.
+ * J1 fix: invalidates cache after write.
+ *
+ * Returns the final merged list that was written.
+ */
+export function saveProviders(providers: ProviderConfig[]): ProviderConfig[] {
+  const filePath = getProvidersFilePath()
+
+  // Build merged list (providers override defaults by id)
+  const merged = new Map<string, ProviderConfig>()
+  for (const p of DEFAULT_PROVIDERS) {
+    merged.set(p.id, p)
+  }
+  for (const p of providers) {
+    merged.set(p.id, p)
+  }
+
+  // Only persist non-default providers (defaults are always built in)
+  const toWrite: ProviderConfig[] = []
+  for (const [id, p] of merged) {
+    const isDefault = DEFAULT_PROVIDERS.some(d => d.id === id)
+    if (!isDefault) {
+      toWrite.push(p)
+    } else {
+      // E4: If user overrode a default, persist the override (key-order-independent compare)
+      const defaultEntry = DEFAULT_PROVIDERS.find(d => d.id === id)
+      if (defaultEntry && !providerConfigEqual(defaultEntry, p)) {
+        toWrite.push(p)
+      }
+    }
+  }
+
+  // C3: atomic write — tmp file + rename prevents lost-update on concurrent save
+  const tmpPath = join(
+    tmpdir(),
+    `.providers-${randomBytes(8).toString('hex')}.tmp`,
+  )
+  try {
+    writeFileSync(tmpPath, JSON.stringify(toWrite, null, 2), 'utf-8')
+    renameSync(tmpPath, filePath)
+  } catch (err) {
+    try {
+      renameSync(tmpPath, tmpPath + '.cleanup')
+    } catch {
+      /* ignore */
+    }
+    throw err
+  }
+
+  // J1: invalidate cache so next loadProviders() reads fresh data
+  _invalidateProviderCache()
+
+  return Array.from(merged.values())
+}
--- a/src/services/providerRegistry/providerCompatMatrix.ts
+++ b/src/services/providerRegistry/providerCompatMatrix.ts
@@ -0,0 +1,179 @@
+import type { CompatRule } from './types.js'
+
+/**
+ * Per-provider OpenAI-compat field whitelist.
+ *
+ * Each profile describes what an endpoint actually accepts so we can strip
+ * fields that would cause a strict endpoint to reject the request.
+ */
+export interface CompatProfile {
+  /**
+   * Whether the server accepts stream_options.include_usage in chat completions.
+   * Strict endpoints (Cerebras, Qwen) reject unknown top-level keys.
+   */
+  supportsStreamUsageOption: boolean
+
+  /**
+   * Whether the server accepts a custom 'thinking' field in messages.
+   * Only permissive or DeepSeek-thinking endpoints accept this.
+   */
+  supportsThinkingField: boolean
+
+  /**
+   * How to handle reasoning_content in roundtrips.
+   *
+   * DeepSeek has three modes:
+   *   - thinking-only:    model returns reasoning_content, no tools
+   *   - thinking+tools:   model returns both reasoning_content and tool calls
+   *   - normal:           model returns neither
+   *
+   * 'always-preserve':      echo back (DeepSeek thinking+tools roundtrip)
+   * 'drop-on-non-thinking': remove unless current model is thinking variant
+   * 'strip':                remove always (safe default for strict endpoints)
+   */
+  reasoningContentEcho: 'always-preserve' | 'drop-on-non-thinking' | 'strip'
+
+  /**
+   * Tool call schema flavor supported by the endpoint.
+   * 'openai-v2' = standard OpenAI function-calling schema
+   */
+  toolCallFormat: 'openai-v2'
+}
+
+export const COMPAT_PROFILES: Record<CompatRule, CompatProfile> = {
+  cerebras: {
+    supportsStreamUsageOption: false,
+    supportsThinkingField: false,
+    reasoningContentEcho: 'strip',
+    toolCallFormat: 'openai-v2',
+  },
+  groq: {
+    supportsStreamUsageOption: false,
+    supportsThinkingField: false,
+    reasoningContentEcho: 'strip',
+    toolCallFormat: 'openai-v2',
+  },
+  deepseek: {
+    // DeepSeek-reasoner supports reasoning_content and the thinking field.
+    // For normal deepseek-chat, thinking field is ignored rather than rejected.
+    supportsStreamUsageOption: true,
+    supportsThinkingField: true,
+    reasoningContentEcho: 'always-preserve',
+    toolCallFormat: 'openai-v2',
+  },
+  'strict-openai': {
+    supportsStreamUsageOption: false,
+    supportsThinkingField: false,
+    reasoningContentEcho: 'strip',
+    toolCallFormat: 'openai-v2',
+  },
+  permissive: {
+    supportsStreamUsageOption: true,
+    supportsThinkingField: true,
+    reasoningContentEcho: 'drop-on-non-thinking',
+    toolCallFormat: 'openai-v2',
+  },
+}
+
+/**
+ * Determine the DeepSeek reasoning mode based on presence of reasoning_content
+ * and tool_calls in the assistant message.
+ *
+ * DeepSeek thinking-only:    has reasoning_content, no tool_calls
+ * DeepSeek thinking+tools:   has reasoning_content AND tool_calls
+ * DeepSeek normal:           no reasoning_content
+ */
+export function getDeepSeekReasoningMode(
+  assistantMessage: Record<string, unknown>,
+): 'thinking-only' | 'thinking+tools' | 'normal' {
+  const hasReasoning = Boolean(assistantMessage['reasoning_content'])
+  const toolCalls = assistantMessage['tool_calls']
+  const hasTools = Array.isArray(toolCalls) && toolCalls.length > 0
+
+  if (hasReasoning && hasTools) return 'thinking+tools'
+  if (hasReasoning) return 'thinking-only'
+  return 'normal'
+}
+
+/**
+ * Apply a compat rule to an outgoing request body, dropping fields the
+ * target endpoint won't accept. Returns a new object (immutable).
+ *
+ * This is a pure function: it does not mutate the input body.
+ */
+export function applyCompatRule(
+  body: Record<string, unknown>,
+  rule: CompatRule,
+): Record<string, unknown> {
+  const profile = COMPAT_PROFILES[rule]
+  const result: Record<string, unknown> = { ...body }
+
+  // Strip stream_options.include_usage if endpoint doesn't support it
+  if (!profile.supportsStreamUsageOption) {
+    const streamOptions = result['stream_options']
+    if (
+      streamOptions !== null &&
+      typeof streamOptions === 'object' &&
+      !Array.isArray(streamOptions)
+    ) {
+      const { include_usage: _dropped, ...rest } = streamOptions as Record<
+        string,
+        unknown
+      >
+      if (Object.keys(rest).length === 0) {
+        delete result['stream_options']
+      } else {
+        result['stream_options'] = rest
+      }
+    }
+  }
+
+  // Strip 'thinking' field from messages if endpoint doesn't support it
+  if (!profile.supportsThinkingField && Array.isArray(result['messages'])) {
+    result['messages'] = (result['messages'] as Record<string, unknown>[]).map(
+      msg => {
+        if ('thinking' in msg) {
+          const { thinking: _dropped, ...rest } = msg
+          return rest
+        }
+        return msg
+      },
+    )
+  }
+
+  // Handle reasoning_content echo policy
+  if (
+    profile.reasoningContentEcho === 'strip' &&
+    Array.isArray(result['messages'])
+  ) {
+    result['messages'] = (result['messages'] as Record<string, unknown>[]).map(
+      msg => {
+        if ('reasoning_content' in msg) {
+          const { reasoning_content: _dropped, ...rest } = msg
+          return rest
+        }
+        return msg
+      },
+    )
+  }
+
+  // For 'drop-on-non-thinking': strip reasoning_content unless model name
+  // indicates a thinking variant (contains 'reason' or 'think' in model string)
+  if (profile.reasoningContentEcho === 'drop-on-non-thinking') {
+    const model = typeof result['model'] === 'string' ? result['model'] : ''
+    const isThinkingModel = /reason|think/i.test(model)
+    if (!isThinkingModel && Array.isArray(result['messages'])) {
+      result['messages'] = (
+        result['messages'] as Record<string, unknown>[]
+      ).map(msg => {
+        if ('reasoning_content' in msg) {
+          const { reasoning_content: _dropped, ...rest } = msg
+          return rest
+        }
+        return msg
+      })
+    }
+  }
+
+  return result
+}
--- a/src/services/providerRegistry/switcher.ts
+++ b/src/services/providerRegistry/switcher.ts
@@ -0,0 +1,111 @@
+import { findProvider, loadProviders } from './loader.js'
+import type { ProviderConfig } from './types.js'
+
+export interface SwitchProviderResult {
+  /**
+   * Environment variables to set before the next session.
+   * This is informational — the caller must NOT mutate process.env.
+   * The user copies these into their shell profile.
+   */
+  env: Record<string, string>
+
+  /**
+   * Human-readable warnings (e.g. missing API key in current env).
+   * Non-fatal: the user can still configure the provider.
+   */
+  warnings: string[]
+
+  /**
+   * The resolved provider config used for this switch.
+   */
+  provider: ProviderConfig
+}
+
+/**
+ * Compute the environment variables needed to activate an OpenAI-compat provider.
+ *
+ * Design constraints (from plan):
+ * - Pure functional: does NOT mutate process.env
+ * - Calls assertNoAnthropicEnvForOpenAI() at the top to warn on credential
+ *   confusion (ANTHROPIC_API_KEY + OPENAI-compat mode both set)
+ * - Returns shell export commands the user can paste into their profile
+ * - Restart required for the env vars to take effect (OpenAI client is cached)
+ *
+ * @param id - Provider id (e.g. 'cerebras', 'groq', 'deepseek', 'qwen')
+ * @param providers - Optional pre-loaded list (defaults to loadProviders())
+ * @throws {Error} if provider id is not found
+ */
+export function switchProvider(
+  id: string,
+  providers?: ProviderConfig[],
+): SwitchProviderResult {
+  const list = providers ?? loadProviders()
+  const found = findProvider(id, list)
+
+  if (!found) {
+    const ids = list.map(p => p.id).join(', ')
+    throw new Error(
+      `switchProvider: provider "${id}" not found. Available: ${ids}`,
+    )
+  }
+
+  const env: Record<string, string> = {
+    CLAUDE_CODE_USE_OPENAI: '1',
+    OPENAI_BASE_URL: found.baseUrl,
+    OPENAI_MODEL: found.defaultModel,
+    // The value is the env var name that holds the key, not the key itself.
+    // Shell snippet: export OPENAI_API_KEY=$CEREBRAS_API_KEY
+    // We return the recommended export, but the actual value depends on user env.
+  }
+
+  // Include the api key env var name so callers can construct the shell snippet.
+  // We do NOT read process.env[found.apiKeyEnv] to avoid leaking the key.
+  const warnings: string[] = []
+
+  // G3: include ANTHROPIC_API_KEY conflict warning in result.warnings (not just logError)
+  // so that the Ink view (/providers use) can render it to the user rather than losing it
+  // in a side-channel stderr log.
+  const hasOpenAIMode =
+    process.env['CLAUDE_CODE_USE_OPENAI'] === '1' ||
+    Boolean(process.env['OPENAI_API_KEY'])
+  const hasAnthropicKey = Boolean(process.env['ANTHROPIC_API_KEY'])
+  if (hasOpenAIMode && hasAnthropicKey) {
+    warnings.push(
+      'Both ANTHROPIC_API_KEY and OpenAI-compat mode are set. ' +
+        'ANTHROPIC_API_KEY is for Anthropic workspace endpoints (/v1/agents, /v1/vaults). ' +
+        'OpenAI-compat mode routes /v1/messages to a third-party provider. ' +
+        'These are separate planes — verify this is intentional.',
+    )
+  }
+
+  if (!process.env[found.apiKeyEnv]) {
+    warnings.push(
+      `${found.apiKeyEnv} is not set in the current environment. ` +
+        `Set it before starting Claude Code: export ${found.apiKeyEnv}=<your-api-key>`,
+    )
+  }
+
+  return { env, warnings, provider: found }
+}
+
+/**
+ * Build the shell export block to display to the user.
+ *
+ * Example output:
+ *   export CLAUDE_CODE_USE_OPENAI=1
+ *   export OPENAI_BASE_URL=https://api.cerebras.ai/v1
+ *   export OPENAI_API_KEY=$CEREBRAS_API_KEY
+ *   export OPENAI_MODEL=llama-3.3-70b
+ *
+ * The API key line uses a variable reference so the actual key is never echoed.
+ */
+export function buildShellExportBlock(result: SwitchProviderResult): string {
+  const { env, provider } = result
+  const lines: string[] = [
+    `export CLAUDE_CODE_USE_OPENAI=${env['CLAUDE_CODE_USE_OPENAI'] ?? '1'}`,
+    `export OPENAI_BASE_URL=${env['OPENAI_BASE_URL'] ?? provider.baseUrl}`,
+    `export OPENAI_API_KEY=$${provider.apiKeyEnv}`,
+    `export OPENAI_MODEL=${env['OPENAI_MODEL'] ?? provider.defaultModel}`,
+  ]
+  return lines.join('\n')
+}
--- a/src/services/providerRegistry/types.ts
+++ b/src/services/providerRegistry/types.ts
@@ -0,0 +1,51 @@
+import { z } from 'zod'
+
+/**
+ * Compat rule identifiers. Each maps to a CompatProfile in providerCompatMatrix.ts.
+ */
+export const CompatRuleSchema = z.enum([
+  'cerebras',
+  'groq',
+  'deepseek',
+  'strict-openai',
+  'permissive',
+])
+
+export type CompatRule = z.infer<typeof CompatRuleSchema>
+
+/**
+ * The only supported provider kind for PR-2. Future PR-3+ may add 'oauth', 'bedrock-compat', etc.
+ */
+export const ProviderKindSchema = z.literal('openai-compat')
+export type ProviderKind = z.infer<typeof ProviderKindSchema>
+
+/**
+ * Zod schema for a single provider configuration entry.
+ *
+ * Rules:
+ * - id: kebab-case identifier used in /provider use <id>
+ * - kind: only 'openai-compat' in PR-2
+ * - baseUrl: full base URL including /v1 suffix if needed
+ * - apiKeyEnv: name of the env var that holds the API key
+ * - defaultModel: model string passed as OPENAI_MODEL
+ * - compatRule: selects CompatProfile from providerCompatMatrix
+ */
+export const ProviderConfigSchema = z.object({
+  id: z
+    .string()
+    .min(1)
+    .regex(/^[a-z0-9-]+$/, 'id must be kebab-case'),
+  kind: ProviderKindSchema,
+  baseUrl: z.string().url(),
+  apiKeyEnv: z.string().min(1),
+  defaultModel: z.string().min(1),
+  compatRule: CompatRuleSchema,
+})
+
+export type ProviderConfig = z.infer<typeof ProviderConfigSchema>
+
+/**
+ * Schema for the entire ~/.claude/providers.json file.
+ * Top-level must be an array of ProviderConfig.
+ */
+export const ProvidersFileSchema = z.array(ProviderConfigSchema)
--- a/src/utils/tests/cacheStats.test.ts
+++ b/src/utils/tests/cacheStats.test.ts
@@ -0,0 +1,465 @@
+import {
+  afterAll,
+  describe,
+  test,
+  expect,
+  beforeEach,
+  afterEach,
+  mock,
+} from 'bun:test'
+import * as path from 'node:path'
+import * as os from 'node:os'
+import { homedir } from 'node:os'
+import { join } from 'node:path'
+import * as fsp from 'node:fs/promises'
+
+// ---------------------------------------------------------------------------
+// Mock envUtils so getClaudeConfigHomeDir returns a temp dir while THIS
+// suite runs. After it finishes, getClaudeConfigHomeDir falls back to the
+// real semantics (process.env.CLAUDE_CONFIG_DIR ?? ~/.claude) so other
+// tests in the same process (envUtils.test.ts in particular) don't see
+// the test's tmpDir leaked as the user config home.
+// ---------------------------------------------------------------------------
+let tmpDir = ''
+let useMockForCacheStats = true
+afterAll(() => {
+  useMockForCacheStats = false
+})
+
+// Provide REAL semantics for every other envUtils export — this mock is
+// process-global, so envUtils.test.ts and other consumers (providers,
+// model, etc.) running in the same process see real behavior for
+// hasNodeOption, isEnvTruthy, isBareMode, parseEnvVars, etc. Only
+// getClaudeConfigHomeDir is overridden (to point at the test temp dir).
+const VERTEX_REGION_OVERRIDES: ReadonlyArray<[string, string]> = [
+  ['claude-haiku-4-5', 'VERTEX_REGION_CLAUDE_HAIKU_4_5'],
+  ['claude-3-5-haiku', 'VERTEX_REGION_CLAUDE_3_5_HAIKU'],
+  ['claude-3-5-sonnet', 'VERTEX_REGION_CLAUDE_3_5_SONNET'],
+  ['claude-3-7-sonnet', 'VERTEX_REGION_CLAUDE_3_7_SONNET'],
+  ['claude-opus-4-1', 'VERTEX_REGION_CLAUDE_4_1_OPUS'],
+  ['claude-opus-4', 'VERTEX_REGION_CLAUDE_4_0_OPUS'],
+  ['claude-sonnet-4-6', 'VERTEX_REGION_CLAUDE_4_6_SONNET'],
+  ['claude-sonnet-4-5', 'VERTEX_REGION_CLAUDE_4_5_SONNET'],
+  ['claude-sonnet-4', 'VERTEX_REGION_CLAUDE_4_0_SONNET'],
+]
+
+const realIsEnvTruthy = (v: string | boolean | undefined): boolean => {
+  if (!v) return false
+  if (typeof v === 'boolean') return v
+  return ['1', 'true', 'yes', 'on'].includes(v.toLowerCase().trim())
+}
+const realIsEnvDefinedFalsy = (v: string | boolean | undefined): boolean => {
+  if (v === undefined) return false
+  if (typeof v === 'boolean') return !v
+  if (!v) return false
+  return ['0', 'false', 'no', 'off'].includes(v.toLowerCase().trim())
+}
+const realDefaultVertexRegion = (): string =>
+  process.env.CLOUD_ML_REGION || 'us-east5'
+
+// Real getClaudeConfigHomeDir is memoized via lodash, so consumers may call
+// `.cache.clear()` on it (see tasks.test.ts). Provide a no-op .cache stub.
+const mockedGetClaudeConfigHomeDir: (() => string) & {
+  cache: { clear: () => void; get: (k: unknown) => unknown }
+} = Object.assign(
+  () =>
+    useMockForCacheStats
+      ? tmpDir
+      : (process.env.CLAUDE_CONFIG_DIR ?? join(homedir(), '.claude')).normalize(
+          'NFC',
+        ),
+  {
+    cache: {
+      clear: () => {},
+      get: (_k: unknown) => undefined,
+    },
+  },
+)
+
+mock.module('src/utils/envUtils.js', () => ({
+  getClaudeConfigHomeDir: mockedGetClaudeConfigHomeDir,
+  isEnvTruthy: realIsEnvTruthy,
+  hasNodeOption: (flag: string) => {
+    const opts = process.env.NODE_OPTIONS
+    return !!opts && opts.split(/\s+/).includes(flag)
+  },
+  isEnvDefinedFalsy: realIsEnvDefinedFalsy,
+  isBareMode: () =>
+    realIsEnvTruthy(process.env.CLAUDE_CODE_SIMPLE) ||
+    process.argv.includes('--bare'),
+  parseEnvVars: (rawEnvArgs: string[] | undefined) => {
+    const parsed: Record<string, string> = {}
+    if (rawEnvArgs) {
+      for (const envStr of rawEnvArgs) {
+        const [key, ...valueParts] = envStr.split('=')
+        if (!key || valueParts.length === 0) {
+          throw new Error(
+            `Invalid environment variable format: ${envStr}, environment variables should be added as: -e KEY1=value1 -e KEY2=value2`,
+          )
+        }
+        parsed[key] = valueParts.join('=')
+      }
+    }
+    return parsed
+  },
+  getAWSRegion: () =>
+    process.env.AWS_REGION || process.env.AWS_DEFAULT_REGION || 'us-east-1',
+  getDefaultVertexRegion: realDefaultVertexRegion,
+  shouldMaintainProjectWorkingDir: () =>
+    realIsEnvTruthy(process.env.CLAUDE_BASH_MAINTAIN_PROJECT_WORKING_DIR),
+  isRunningOnHomespace: () =>
+    process.env.USER_TYPE === 'ant' &&
+    realIsEnvTruthy(process.env.COO_RUNNING_ON_HOMESPACE),
+  isInProtectedNamespace: () => false,
+  getTeamsDir: () =>
+    useMockForCacheStats
+      ? `${tmpDir}/teams`
+      : join(
+          (
+            process.env.CLAUDE_CONFIG_DIR ?? join(homedir(), '.claude')
+          ).normalize('NFC'),
+          'teams',
+        ),
+  getEnvBool: () => false,
+  getEnvNumber: () => undefined,
+  getVertexRegionForModel: (model: string | undefined) => {
+    if (model) {
+      const match = VERTEX_REGION_OVERRIDES.find(([prefix]) =>
+        model.startsWith(prefix),
+      )
+      if (match) {
+        return process.env[match[1]] || realDefaultVertexRegion()
+      }
+    }
+    return realDefaultVertexRegion()
+  },
+}))
+
+import {
+  computeHitRate,
+  tokenSignature,
+  getStateFilePath,
+  readState,
+  writeStateAtomic,
+  type CacheUsage,
+  type CacheStatsState,
+} from '../cacheStats.js'
+
+import {
+  onResponse,
+  getCacheStatsState,
+  initCacheStatsState,
+  _resetCacheStatsStateForTest,
+} from '../cacheStatsState.js'
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+function usage(input: number, create: number, read: number): CacheUsage {
+  return {
+    input_tokens: input,
+    cache_creation_input_tokens: create,
+    cache_read_input_tokens: read,
+  }
+}
+
+// ---------------------------------------------------------------------------
+// computeHitRate
+// ---------------------------------------------------------------------------
+
+describe('computeHitRate', () => {
+  test('returns null for null input', () => {
+    expect(computeHitRate(null)).toBeNull()
+  })
+
+  test('returns null when all fields are 0 (denominator = 0)', () => {
+    expect(computeHitRate(usage(0, 0, 0))).toBeNull()
+  })
+
+  test('100% when all tokens are cache reads', () => {
+    expect(computeHitRate(usage(0, 0, 1000))).toBe(100)
+  })
+
+  test('0% when no cache reads', () => {
+    expect(computeHitRate(usage(1000, 0, 0))).toBe(0)
+  })
+
+  test('rounds to integer (50%)', () => {
+    expect(computeHitRate(usage(500, 0, 500))).toBe(50)
+  })
+
+  test('rounds fractional values', () => {
+    // read=1, total=3 → 33.33... → rounds to 33
+    expect(computeHitRate(usage(2, 0, 1))).toBe(33)
+  })
+
+  test('handles large numbers without overflow', () => {
+    const big = 1_000_000_000
+    expect(computeHitRate(usage(big, big, big))).toBe(33)
+  })
+
+  test('cache_creation does not count as reads', () => {
+    // Only cache_read_input_tokens in numerator
+    expect(computeHitRate(usage(0, 1000, 0))).toBe(0)
+  })
+})
+
+// ---------------------------------------------------------------------------
+// tokenSignature
+// ---------------------------------------------------------------------------
+
+describe('tokenSignature', () => {
+  test('produces deterministic string', () => {
+    const u = usage(100, 200, 300)
+    expect(tokenSignature(u)).toBe('100|200|300')
+  })
+
+  test('changes when input_tokens changes', () => {
+    expect(tokenSignature(usage(1, 2, 3))).not.toBe(
+      tokenSignature(usage(9, 2, 3)),
+    )
+  })
+
+  test('changes when cache_creation changes', () => {
+    expect(tokenSignature(usage(1, 2, 3))).not.toBe(
+      tokenSignature(usage(1, 9, 3)),
+    )
+  })
+
+  test('changes when cache_read changes', () => {
+    expect(tokenSignature(usage(1, 2, 3))).not.toBe(
+      tokenSignature(usage(1, 2, 9)),
+    )
+  })
+})
+
+// ---------------------------------------------------------------------------
+// State file: getStateFilePath
+// ---------------------------------------------------------------------------
+
+describe('getStateFilePath', () => {
+  beforeEach(async () => {
+    tmpDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'cache-stats-test-'))
+  })
+
+  afterEach(async () => {
+    await fsp.rm(tmpDir, { recursive: true, force: true })
+  })
+
+  test('returns path inside config home dir', () => {
+    const p = getStateFilePath('session-abc')
+    expect(p).toContain('cache-stats')
+    expect(p.startsWith(tmpDir)).toBe(true)
+  })
+
+  test('different sessionIds produce different paths', () => {
+    const p1 = getStateFilePath('session-one')
+    const p2 = getStateFilePath('session-two')
+    expect(p1).not.toBe(p2)
+  })
+
+  test('same sessionId always produces same path (deterministic)', () => {
+    expect(getStateFilePath('s1')).toBe(getStateFilePath('s1'))
+  })
+
+  test('file name is 16 hex chars + .json', () => {
+    const p = getStateFilePath('any-session-id')
+    const base = path.basename(p)
+    expect(base).toMatch(/^[0-9a-f]{16}\.json$/)
+  })
+})
+
+// ---------------------------------------------------------------------------
+// State file: readState / writeStateAtomic
+// ---------------------------------------------------------------------------
+
+describe('readState / writeStateAtomic', () => {
+  beforeEach(async () => {
+    tmpDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'cache-stats-test-'))
+  })
+
+  afterEach(async () => {
+    await fsp.rm(tmpDir, { recursive: true, force: true })
+  })
+
+  test('readState returns init defaults when file is missing', async () => {
+    const p = path.join(tmpDir, 'cache-stats', 'nonexistent.json')
+    const s = await readState(p)
+    expect(s.version).toBe(1)
+    expect(s.signature).toBeNull()
+    expect(s.lastResetAt).toBeNull()
+    expect(s.lastHitRate).toBeNull()
+  })
+
+  test('readState returns init defaults on corrupt JSON', async () => {
+    const p = path.join(tmpDir, 'bad.json')
+    await fsp.writeFile(p, 'not-json!!!', 'utf8')
+    const s = await readState(p)
+    expect(s.signature).toBeNull()
+  })
+
+  test('readState returns init defaults on invalid shape', async () => {
+    const p = path.join(tmpDir, 'bad-shape.json')
+    await fsp.writeFile(p, JSON.stringify({ version: 2, foo: 'bar' }), 'utf8')
+    const s = await readState(p)
+    expect(s.signature).toBeNull()
+  })
+
+  test('round-trip: writeStateAtomic then readState', async () => {
+    const p = getStateFilePath('round-trip-session')
+    const state: CacheStatsState = {
+      version: 1,
+      signature: '100|200|300',
+      lastResetAt: 1_700_000_000_000,
+      lastHitRate: 75,
+    }
+    await writeStateAtomic(p, state)
+    const read = await readState(p)
+    expect(read).toEqual(state)
+  })
+
+  test('writeStateAtomic creates parent directory if missing', async () => {
+    const p = path.join(tmpDir, 'deep', 'nested', 'state.json')
+    const state: CacheStatsState = {
+      version: 1,
+      signature: null,
+      lastResetAt: null,
+      lastHitRate: null,
+    }
+    await writeStateAtomic(p, state)
+    const read = await readState(p)
+    expect(read.version).toBe(1)
+  })
+})
+
+// ---------------------------------------------------------------------------
+// onResponse / getCacheStatsState (in-memory singleton)
+// ---------------------------------------------------------------------------
+
+describe('onResponse', () => {
+  beforeEach(async () => {
+    tmpDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'cache-stats-test-'))
+    _resetCacheStatsStateForTest()
+  })
+
+  afterEach(async () => {
+    await fsp.rm(tmpDir, { recursive: true, force: true })
+  })
+
+  test('initial state has null signature and lastResetAt', () => {
+    const s = getCacheStatsState()
+    expect(s.signature).toBeNull()
+    expect(s.lastResetAt).toBeNull()
+  })
+
+  test('first onResponse sets lastResetAt and signature', () => {
+    const u = usage(100, 0, 50)
+    const before = Date.now()
+    const s = onResponse(u)
+    const after = Date.now()
+    expect(s.signature).toBe(tokenSignature(u))
+    expect(s.lastResetAt).toBeGreaterThanOrEqual(before)
+    expect(s.lastResetAt).toBeLessThanOrEqual(after)
+    expect(s.lastHitRate).toBe(33) // 50/(100+50) ≈ 33
+  })
+
+  test('same signature does NOT reset lastResetAt', async () => {
+    const u = usage(100, 0, 50)
+    onResponse(u)
+    const firstState = getCacheStatsState()
+    const firstResetAt = firstState.lastResetAt
+
+    // Wait a tick to ensure Date.now() would differ
+    await new Promise(r => setTimeout(r, 5))
+
+    onResponse(u) // same signature
+    const secondState = getCacheStatsState()
+    expect(secondState.lastResetAt).toBe(firstResetAt)
+  })
+
+  test('different signature RESETS lastResetAt', async () => {
+    const u1 = usage(100, 0, 50)
+    onResponse(u1)
+    const firstState = getCacheStatsState()
+
+    await new Promise(r => setTimeout(r, 5))
+
+    const u2 = usage(200, 0, 100) // different signature
+    onResponse(u2)
+    const secondState = getCacheStatsState()
+    expect(secondState.lastResetAt).toBeGreaterThan(firstState.lastResetAt!)
+  })
+
+  test('lastHitRate is updated on signature change', () => {
+    onResponse(usage(1000, 0, 0)) // 0% hit rate
+    const s1 = getCacheStatsState()
+    expect(s1.lastHitRate).toBe(0)
+
+    onResponse(usage(0, 0, 1000)) // 100% hit rate — different sig
+    const s2 = getCacheStatsState()
+    expect(s2.lastHitRate).toBe(100)
+  })
+})
+
+// ---------------------------------------------------------------------------
+// Multi-session isolation
+// ---------------------------------------------------------------------------
+
+describe('multi-session file isolation', () => {
+  beforeEach(async () => {
+    tmpDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'cache-stats-test-'))
+  })
+
+  afterEach(async () => {
+    await fsp.rm(tmpDir, { recursive: true, force: true })
+  })
+
+  test('different session IDs produce different state files', async () => {
+    const p1 = getStateFilePath('session-alpha')
+    const p2 = getStateFilePath('session-beta')
+
+    const s1: CacheStatsState = {
+      version: 1,
+      signature: 'sig-alpha',
+      lastResetAt: 1000,
+      lastHitRate: 90,
+    }
+    const s2: CacheStatsState = {
+      version: 1,
+      signature: 'sig-beta',
+      lastResetAt: 2000,
+      lastHitRate: 10,
+    }
+
+    await writeStateAtomic(p1, s1)
+    await writeStateAtomic(p2, s2)
+
+    const r1 = await readState(p1)
+    const r2 = await readState(p2)
+
+    expect(r1.signature).toBe('sig-alpha')
+    expect(r2.signature).toBe('sig-beta')
+    expect(r1.lastHitRate).toBe(90)
+    expect(r2.lastHitRate).toBe(10)
+  })
+
+  test('initCacheStatsState loads persisted fallback values', async () => {
+    _resetCacheStatsStateForTest()
+    const sid = 'test-session-init'
+    const p = getStateFilePath(sid)
+    const persisted: CacheStatsState = {
+      version: 1,
+      signature: '500|100|400',
+      lastResetAt: 1_700_000_000_000,
+      lastHitRate: 40,
+    }
+    await writeStateAtomic(p, persisted)
+
+    await initCacheStatsState(sid)
+    const s = getCacheStatsState()
+    expect(s.lastHitRate).toBe(40)
+    expect(s.lastResetAt).toBe(1_700_000_000_000)
+    expect(s.signature).toBe('500|100|400')
+  })
+})
--- a/src/utils/tests/lanBeacon.test.ts
+++ b/src/utils/tests/lanBeacon.test.ts
@@ -1,4 +1,13 @@
-import { describe, test, expect, mock, beforeEach, afterEach } from 'bun:test'
+import {
+  afterAll,
+  afterEach,
+  beforeAll,
+  beforeEach,
+  describe,
+  expect,
+  mock,
+  test,
+} from 'bun:test'

 // Mock dgram before importing LanBeacon
 const mockSocket = {
@@ -13,9 +22,32 @@ const mockSocket = {
  close: mock(() => {}),
 }

-mock.module('dgram', () => ({
-  createSocket: () => mockSocket,
-}))
+// Spread+flag pattern: previously this was a bare `mock.module('dgram', ...)`
+// which leaked the stub createSocket into every later test file in the
+// process via Bun's last-write-wins module mock cache. Spread real dgram
+// + gate the stub behind useLanBeaconDgramStubs so other tests see real UDP.
+let useLanBeaconDgramStubs = false
+mock.module('dgram', () => {
+  // eslint-disable-next-line @typescript-eslint/no-require-imports
+  const real = require('dgram') as Record<string, unknown>
+  return {
+    ...real,
+    default: real,
+    createSocket: ((...args: unknown[]) =>
+      useLanBeaconDgramStubs
+        ? mockSocket
+        : (real.createSocket as (...a: unknown[]) => unknown)(
+            ...args,
+          )) as typeof real.createSocket,
+  }
+})
+
+beforeAll(() => {
+  useLanBeaconDgramStubs = true
+})
+afterAll(() => {
+  useLanBeaconDgramStubs = false
+})

 const { LanBeacon } = await import('../lanBeacon.js')

--- a/src/utils/cacheStats.ts
+++ b/src/utils/cacheStats.ts
@@ -0,0 +1,109 @@
+import { createHash } from 'node:crypto'
+import { mkdir, readFile, rename, writeFile } from 'node:fs/promises'
+import { dirname, join } from 'node:path'
+import { getClaudeConfigHomeDir } from './envUtils.js'
+
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+
+export interface CacheUsage {
+  input_tokens: number
+  cache_creation_input_tokens: number
+  cache_read_input_tokens: number
+}
+
+export interface CacheStatsState {
+  version: 1
+  signature: string | null
+  lastResetAt: number | null // ms epoch; reset when signature changes
+  lastHitRate: number | null // persisted fallback
+}
+
+// ---------------------------------------------------------------------------
+// Pure functions
+// ---------------------------------------------------------------------------
+
+/**
+ * Compute integer hit rate (0–100) or null if denominator is zero / input null.
+ */
+export function computeHitRate(u: CacheUsage | null): number | null {
+  if (!u) return null
+  const denom =
+    u.input_tokens + u.cache_creation_input_tokens + u.cache_read_input_tokens
+  if (denom === 0) return null
+  return Math.round((u.cache_read_input_tokens / denom) * 100)
+}
+
+/**
+ * Stable string that uniquely identifies a usage snapshot.
+ * A change in signature means a new API response arrived — reset the TTL clock.
+ */
+export function tokenSignature(u: CacheUsage): string {
+  return `${u.input_tokens}|${u.cache_creation_input_tokens}|${u.cache_read_input_tokens}`
+}
+
+// ---------------------------------------------------------------------------
+// State file I/O
+// ---------------------------------------------------------------------------
+
+/**
+ * Deterministic, short file name derived from sessionId so that:
+ *   - Different sessions never collide.
+ *   - The raw session id is never written to disk.
+ */
+export function getStateFilePath(sessionId: string): string {
+  const hash = createHash('sha256').update(sessionId).digest('hex').slice(0, 16)
+  return join(getClaudeConfigHomeDir(), 'cache-stats', `${hash}.json`)
+}
+
+const INIT_STATE: CacheStatsState = {
+  version: 1,
+  signature: null,
+  lastResetAt: null,
+  lastHitRate: null,
+}
+
+function isValidState(obj: unknown): obj is CacheStatsState {
+  if (typeof obj !== 'object' || obj === null) return false
+  const s = obj as Record<string, unknown>
+  return (
+    s['version'] === 1 &&
+    (s['signature'] === null || typeof s['signature'] === 'string') &&
+    (s['lastResetAt'] === null || typeof s['lastResetAt'] === 'number') &&
+    (s['lastHitRate'] === null || typeof s['lastHitRate'] === 'number')
+  )
+}
+
+/**
+ * Read state file. Returns init defaults on any error (corrupt, missing, etc.).
+ */
+export async function readState(filePath: string): Promise<CacheStatsState> {
+  try {
+    const raw = await readFile(filePath, 'utf8')
+    const parsed: unknown = JSON.parse(raw)
+    if (isValidState(parsed)) return parsed
+    return { ...INIT_STATE }
+  } catch {
+    return { ...INIT_STATE }
+  }
+}
+
+/**
+ * Write state atomically: write to a tmp file then rename — safe against
+ * partial-write corruption and concurrent reads.
+ */
+export async function writeStateAtomic(
+  filePath: string,
+  state: CacheStatsState,
+): Promise<void> {
+  const dir = dirname(filePath)
+  await mkdir(dir, { recursive: true })
+  const tmp = `${filePath}.${process.pid}.tmp`
+  try {
+    await writeFile(tmp, JSON.stringify(state), 'utf8')
+    await rename(tmp, filePath)
+  } catch {
+    // Best-effort; silently ignore errors so the UI never crashes
+  }
+}
--- a/src/utils/cacheStatsState.ts
+++ b/src/utils/cacheStatsState.ts
@@ -0,0 +1,92 @@
+/**
+ * In-memory singleton that tracks cache hit-rate state for the current session.
+ *
+ * Call `onResponse(usage)` every time a new API response arrives.
+ * The singleton compares the token signature of the new response against the
+ * previously seen signature.  When it changes (= a new API call completed),
+ * it resets `lastResetAt` to Date.now() and asynchronously persists state so
+ * that a future session can show the TTL countdown immediately on startup.
+ */
+
+import type { CacheUsage, CacheStatsState } from './cacheStats.js'
+import {
+  computeHitRate,
+  tokenSignature,
+  getStateFilePath,
+  readState,
+  writeStateAtomic,
+} from './cacheStats.js'
+
+interface MemState {
+  signature: string | null
+  lastResetAt: number | null
+  lastHitRate: number | null
+}
+
+let memState: MemState = {
+  signature: null,
+  lastResetAt: null,
+  lastHitRate: null,
+}
+
+let sessionId: string | null = null
+
+/**
+ * Must be called once at session start so the singleton knows which state file
+ * to persist to and can pre-load the last known state.
+ */
+export async function initCacheStatsState(sid: string): Promise<void> {
+  sessionId = sid
+  const filePath = getStateFilePath(sid)
+  const persisted = await readState(filePath)
+  // Pre-load persisted values so the UI can show fallback immediately
+  memState = {
+    signature: persisted.signature,
+    lastResetAt: persisted.lastResetAt,
+    lastHitRate: persisted.lastHitRate,
+  }
+}
+
+/**
+ * Called whenever a new assistant response is received with usage data.
+ * Returns the updated in-memory state.
+ */
+export function onResponse(usage: CacheUsage): MemState {
+  const sig = tokenSignature(usage)
+  const hitRate = computeHitRate(usage)
+
+  if (sig !== memState.signature) {
+    // New API response — reset the TTL clock
+    memState = {
+      signature: sig,
+      lastResetAt: Date.now(),
+      lastHitRate: hitRate,
+    }
+    // Persist asynchronously; intentionally fire-and-forget
+    if (sessionId !== null) {
+      const filePath = getStateFilePath(sessionId)
+      const toWrite: CacheStatsState = {
+        version: 1,
+        signature: sig,
+        lastResetAt: memState.lastResetAt,
+        lastHitRate: hitRate,
+      }
+      void writeStateAtomic(filePath, toWrite)
+    }
+  }
+
+  return { ...memState }
+}
+
+/** Read current in-memory state without triggering a response update. */
+export function getCacheStatsState(): MemState {
+  return { ...memState }
+}
+
+/**
+ * Reset singleton — used in tests to isolate test runs.
+ */
+export function _resetCacheStatsStateForTest(): void {
+  memState = { signature: null, lastResetAt: null, lastHitRate: null }
+  sessionId = null
+}
--- a/src/utils/config.ts
+++ b/src/utils/config.ts
@@ -222,6 +222,12 @@ export type GlobalConfig = {
    rejected?: string[]
  }
  primaryApiKey?: string // Primary API key for the user when no environment variable is set, set via oauth (TODO: rename)
+  /**
+   * Workspace API key saved via /login UI (sk-ant-api03-*).
+   * Stored in plaintext — file should be gitignored and chmod 600.
+   * ANTHROPIC_API_KEY env var takes precedence when both are present.
+   */
+  workspaceApiKey?: string
  hasAcknowledgedCostThreshold?: boolean
  hasSeenUndercoverAutoNotice?: boolean // ant-only: whether the one-time auto-undercover explainer has been shown
  hasSeenUltraplanTerms?: boolean // ant-only: whether the one-time CCR terms notice has been shown in the ultraplan launch dialog