feat: 添加 Provider Registry、StatusLine、Cache Stats 和其他增强

- providerRegistry: OpenAI 兼容 provider 切换（Cerebras/Groq/DeepSeek/Qwen） - StatusLine: 增强状态栏（缓存命中率、TTL 倒计时、自定义 shell 命令） - cacheStats: 缓存命中率和 token 签名追踪 - ultrareviewPreflight: 代码审查预检服务 - SkillsMenu/filterSkills: 技能菜单过滤增强 - MagicDocs/langfuse prompts: 提示词更新 - claude.ts: API 客户端更新 Co-Authored-By: glm-5-turbo <zai-org@claude-code-best.win>
2026-06-17 22:05:50 +00:00 · 2026-05-09 23:04:35 +08:00
parent fdddb6dbe8
commit efaf4afd9c
28 changed files with 3613 additions and 219 deletions
--- a/src/services/MagicDocs/tests/prompts.test.ts
+++ b/src/services/MagicDocs/tests/prompts.test.ts
@@ -0,0 +1,410 @@
+import { afterAll, describe, test, expect, mock, beforeEach } from 'bun:test'
+import { homedir } from 'node:os'
+import { join } from 'node:path'
+
+// ── Mock infrastructure ─────────────────────────────────────────────────────
+// All mock.module calls must precede the import of the module under test.
+// mock.module is process-global; mocks here must cover all exported names used
+// transitively so sibling test files are not broken by an incomplete mock.
+//
+// To prevent cross-file pollution (providers.test.ts, model.test.ts, skill
+// prefetch / skillLearning smoke), keep the mock factory inline (don't
+// pre-import real modules — that triggers heavy transitive deps and hangs
+// some test combinations). The flag below switches off the suite-specific
+// override after this file's tests finish.
+let useMockForMagicDocs = true
+afterAll(() => {
+  useMockForMagicDocs = false
+})
+
+// Inline a minimum env-driven default-model resolver so other test files
+// (getDefaultOpusModel.test.ts) which assert env-var precedence still work
+// even after our flag is off. The real getDefaultOpusModel reads provider
+// env vars; we mirror that minimal logic here. Keep aligned with
+// src/utils/model/model.ts's getDefaultOpusModel().
+function resolveDefaultOpusModelForTests(): string {
+  // Highest priority: provider-specific env override.
+  if (process.env.CLAUDE_CODE_USE_OPENAI === '1') {
+    if (process.env.OPENAI_DEFAULT_OPUS_MODEL)
+      return process.env.OPENAI_DEFAULT_OPUS_MODEL
+  }
+  if (process.env.CLAUDE_CODE_USE_GEMINI === '1') {
+    if (process.env.GEMINI_DEFAULT_OPUS_MODEL)
+      return process.env.GEMINI_DEFAULT_OPUS_MODEL
+  }
+  // Cross-provider override.
+  if (process.env.ANTHROPIC_DEFAULT_OPUS_MODEL)
+    return process.env.ANTHROPIC_DEFAULT_OPUS_MODEL
+  // Provider-specific Opus 4.7 IDs (must match
+  // src/utils/model/configs.ts CLAUDE_OPUS_4_7_CONFIG).
+  if (process.env.CLAUDE_CODE_USE_BEDROCK === '1')
+    return 'us.anthropic.claude-opus-4-7-v1'
+  if (process.env.CLAUDE_CODE_USE_VERTEX === '1') return 'claude-opus-4-7'
+  if (process.env.CLAUDE_CODE_USE_FOUNDRY === '1') return 'claude-opus-4-7'
+  return 'claude-opus-4-7'
+}
+
+const mockGetMainLoopModel = mock(() => 'claude-opus-4-7')
+const mockGetDisplayedEffortLevel = mock((): string => 'high')
+
+const realIsEnvTruthy = (v: string | boolean | undefined): boolean => {
+  if (!v) return false
+  if (typeof v === 'boolean') return v
+  return ['1', 'true', 'yes', 'on'].includes(v.toLowerCase().trim())
+}
+
+// Inline the real firstPartyNameToCanonical logic so its semantics survive
+// even after this suite's mock wins the registration race. Pre-importing
+// model.ts hangs the test process due to heavy transitive deps, so we
+// duplicate just this one pure function. Keep in sync with
+// src/utils/model/model.ts.
+function realFirstPartyNameToCanonical(name: string): string {
+  name = name.toLowerCase()
+  if (name.includes('claude-opus-4-7')) return 'claude-opus-4-7'
+  if (name.includes('claude-opus-4-6')) return 'claude-opus-4-6'
+  if (name.includes('claude-opus-4-5')) return 'claude-opus-4-5'
+  if (name.includes('claude-opus-4-1')) return 'claude-opus-4-1'
+  if (name.includes('claude-opus-4')) return 'claude-opus-4'
+  if (name.includes('claude-sonnet-4-6')) return 'claude-sonnet-4-6'
+  if (name.includes('claude-sonnet-4-5')) return 'claude-sonnet-4-5'
+  if (name.includes('claude-sonnet-4')) return 'claude-sonnet-4'
+  if (name.includes('claude-haiku-4-5')) return 'claude-haiku-4-5'
+  if (name.includes('claude-3-7-sonnet')) return 'claude-3-7-sonnet'
+  if (name.includes('claude-3-5-sonnet')) return 'claude-3-5-sonnet'
+  if (name.includes('claude-3-5-haiku')) return 'claude-3-5-haiku'
+  if (name.includes('claude-3-opus')) return 'claude-3-opus'
+  if (name.includes('claude-3-sonnet')) return 'claude-3-sonnet'
+  if (name.includes('claude-3-haiku')) return 'claude-3-haiku'
+  const m = name.match(/(claude-(\d+-\d+-)?\w+)/)
+  if (m && m[1]) return m[1]
+  return name
+}
+
+mock.module('src/utils/model/model.js', () => ({
+  getMainLoopModel: mockGetMainLoopModel,
+  getSmallFastModel: mock(() => 'claude-haiku'),
+  getUserSpecifiedModelSetting: mock(() => undefined),
+  getBestModel: mock(() => 'claude-opus-4-7'),
+  // Read env at call time so getDefaultOpusModel.test.ts (running in the same
+  // process) sees env-driven semantics. While useMockForMagicDocs is true
+  // (during this suite) we still want a stable default; otherwise we mirror
+  // the real env-precedence logic.
+  getDefaultOpusModel: mock(() =>
+    useMockForMagicDocs ? 'claude-opus-4-7' : resolveDefaultOpusModelForTests(),
+  ),
+  getDefaultSonnetModel: mock(() => 'claude-sonnet-4-6'),
+  getDefaultHaikuModel: mock(() => 'claude-haiku-3-5'),
+  getRuntimeMainLoopModel: mock(() => 'claude-opus-4-7'),
+  getDefaultMainLoopModelSetting: mock(() => 'claude-opus-4-7'),
+  getDefaultMainLoopModel: mock(() => 'claude-opus-4-7'),
+  // Real semantics inlined for firstPartyNameToCanonical so model.test.ts
+  // (which only checks pure-function input/output) passes without needing
+  // the heavy real-module load.
+  firstPartyNameToCanonical: mock((n: string) =>
+    realFirstPartyNameToCanonical(n),
+  ),
+  getCanonicalName: mock((n: string) => n),
+  getClaudeAiUserDefaultModelDescription: mock(() => ''),
+  renderDefaultModelSetting: mock(() => ''),
+  getOpusPricingSuffix: mock(() => ''),
+  isOpus1mMergeEnabled: mock(() => false),
+  renderModelSetting: mock((s: string) => s),
+  getPublicModelDisplayName: mock(() => null),
+  renderModelName: mock((n: string) => n),
+  getPublicModelName: mock((n: string) => n),
+  parseUserSpecifiedModel: mock((m: string) => m),
+  resolveSkillModelOverride: mock(() => undefined),
+  isLegacyModelRemapEnabled: mock(() => false),
+  modelDisplayString: mock(() => ''),
+  getMarketingNameForModel: mock(() => undefined),
+  normalizeModelStringForAPI: mock((m: string) => m),
+  isNonCustomOpusModel: mock(() => false),
+}))
+
+mock.module('src/utils/effort.js', () => ({
+  getDisplayedEffortLevel: mockGetDisplayedEffortLevel as (
+    _m: string,
+    _e: unknown,
+  ) => string,
+  getEffortEnvOverride: mock(() => undefined),
+  resolveAppliedEffort: mock(() => 'high'),
+  getInitialEffortSetting: mock(() => undefined),
+  parseEffortValue: mock(() => undefined),
+  toPersistableEffort: mock(() => undefined),
+  modelSupportsEffort: mock(() => true),
+  modelSupportsMaxEffort: mock(() => true),
+  modelSupportsXhighEffort: mock(() => false),
+  isEffortLevel: mock(() => true),
+  getEffortSuffix: mock(() => ''),
+  convertEffortValueToLevel: mock(() => 'high'),
+  getDefaultEffortForModel: mock(() => undefined),
+  getEffortLevelDescription: mock(() => ''),
+  getEffortValueDescription: mock(() => ''),
+  getOpusDefaultEffortConfig: mock(() => ({
+    enabled: true,
+    dialogTitle: '',
+    dialogDescription: '',
+  })),
+  resolvePickerEffortPersistence: mock(() => undefined),
+  isValidNumericEffort: mock(() => false),
+  EFFORT_LEVELS: ['low', 'medium', 'high', 'xhigh', 'max'],
+}))
+
+// Use REAL semantics for non-overridden envUtils exports — this mock is
+// process-global, so envUtils.test.ts and other consumers running in the
+// same process must see correct behavior for hasNodeOption, isBareMode,
+// parseEnvVars, getVertexRegionForModel, etc. Only getClaudeConfigHomeDir
+// is overridden to '/mock/home/.claude' while this suite runs.
+const realIsEnvDefinedFalsy = (v: string | boolean | undefined): boolean => {
+  if (v === undefined) return false
+  if (typeof v === 'boolean') return !v
+  if (!v) return false
+  return ['0', 'false', 'no', 'off'].includes(v.toLowerCase().trim())
+}
+const realDefaultVertexRegion = (): string =>
+  process.env.CLOUD_ML_REGION || 'us-east5'
+const VERTEX_REGION_OVERRIDES: ReadonlyArray<[string, string]> = [
+  ['claude-haiku-4-5', 'VERTEX_REGION_CLAUDE_HAIKU_4_5'],
+  ['claude-3-5-haiku', 'VERTEX_REGION_CLAUDE_3_5_HAIKU'],
+  ['claude-3-5-sonnet', 'VERTEX_REGION_CLAUDE_3_5_SONNET'],
+  ['claude-3-7-sonnet', 'VERTEX_REGION_CLAUDE_3_7_SONNET'],
+  ['claude-opus-4-1', 'VERTEX_REGION_CLAUDE_4_1_OPUS'],
+  ['claude-opus-4', 'VERTEX_REGION_CLAUDE_4_0_OPUS'],
+  ['claude-sonnet-4-6', 'VERTEX_REGION_CLAUDE_4_6_SONNET'],
+  ['claude-sonnet-4-5', 'VERTEX_REGION_CLAUDE_4_5_SONNET'],
+  ['claude-sonnet-4', 'VERTEX_REGION_CLAUDE_4_0_SONNET'],
+]
+
+// Real getClaudeConfigHomeDir is memoized via lodash, so consumers may call
+// `.cache.clear()` on it. Provide a no-op .cache stub.
+const mockedGetClaudeConfigHomeDirMD: (() => string) & {
+  cache: { clear: () => void; get: (k: unknown) => unknown }
+} = Object.assign(
+  () =>
+    useMockForMagicDocs
+      ? '/mock/home/.claude'
+      : (process.env.CLAUDE_CONFIG_DIR ?? join(homedir(), '.claude')).normalize(
+          'NFC',
+        ),
+  { cache: { clear: () => {}, get: (_k: unknown) => undefined } },
+)
+
+mock.module('src/utils/envUtils.js', () => ({
+  getClaudeConfigHomeDir: mockedGetClaudeConfigHomeDirMD,
+  isEnvTruthy: realIsEnvTruthy,
+  getEnvBool: () => false,
+  getEnvNumber: () => undefined,
+  getVertexRegionForModel: (model: string | undefined) => {
+    if (model) {
+      const match = VERTEX_REGION_OVERRIDES.find(([prefix]) =>
+        model.startsWith(prefix),
+      )
+      if (match) {
+        return process.env[match[1]] || realDefaultVertexRegion()
+      }
+    }
+    return realDefaultVertexRegion()
+  },
+  getTeamsDir: () =>
+    join(
+      useMockForMagicDocs
+        ? '/mock/home/.claude'
+        : (process.env.CLAUDE_CONFIG_DIR ?? join(homedir(), '.claude')),
+      'teams',
+    ),
+  hasNodeOption: (flag: string) => {
+    const opts = process.env.NODE_OPTIONS
+    return !!opts && opts.split(/\s+/).includes(flag)
+  },
+  isEnvDefinedFalsy: realIsEnvDefinedFalsy,
+  isBareMode: () =>
+    realIsEnvTruthy(process.env.CLAUDE_CODE_SIMPLE) ||
+    process.argv.includes('--bare'),
+  parseEnvVars: (rawEnvArgs: string[] | undefined) => {
+    const parsed: Record<string, string> = {}
+    if (rawEnvArgs) {
+      for (const envStr of rawEnvArgs) {
+        const [key, ...valueParts] = envStr.split('=')
+        if (!key || valueParts.length === 0) {
+          throw new Error(
+            `Invalid environment variable format: ${envStr}, environment variables should be added as: -e KEY1=value1 -e KEY2=value2`,
+          )
+        }
+        parsed[key] = valueParts.join('=')
+      }
+    }
+    return parsed
+  },
+  getAWSRegion: () =>
+    process.env.AWS_REGION || process.env.AWS_DEFAULT_REGION || 'us-east-1',
+  getDefaultVertexRegion: realDefaultVertexRegion,
+  shouldMaintainProjectWorkingDir: () =>
+    realIsEnvTruthy(process.env.CLAUDE_BASH_MAINTAIN_PROJECT_WORKING_DIR),
+  isRunningOnHomespace: () =>
+    process.env.USER_TYPE === 'ant' &&
+    realIsEnvTruthy(process.env.COO_RUNNING_ON_HOMESPACE),
+  isInProtectedNamespace: () => false,
+}))
+
+// Mock the file system so loadMagicDocsPrompt() returns our controlled template
+const mockReadFile = mock(
+  async (_path: string, _opts?: unknown): Promise<string> => {
+    throw Object.assign(new Error('ENOENT'), { code: 'ENOENT' })
+  },
+)
+
+// IMPORTANT: this file used to mock fsOperations wholesale (readdir → [],
+// exists → false, …), which silently broke sibling tests that walk
+// .claude/skills (skill prefetch, skillLearning smoke). After this suite
+// finishes (useMockForMagicDocs flips to false), construct a minimal real
+// fs adapter inline using node:fs/promises so cross-file consumers see real
+// disk state — without pre-importing the heavy fsOperations module (its
+// transitive deps stall bun:test). Avoid require()ing the real module
+// inside the factory: that re-enters the same mock and infinite-loops.
+import { promises as nodeFs, existsSync as nodeExistsSync } from 'node:fs'
+
+const realFsAdapter = {
+  cwd: () => process.cwd(),
+  existsSync: (p: string) => nodeExistsSync(p),
+  stat: (p: string) => nodeFs.stat(p),
+  lstat: (p: string) => nodeFs.lstat(p),
+  readdir: (p: string) => nodeFs.readdir(p, { withFileTypes: true }),
+  unlink: (p: string) => nodeFs.unlink(p),
+  rmdir: (p: string) => nodeFs.rmdir(p),
+  rm: (p: string, options?: { recursive?: boolean; force?: boolean }) =>
+    nodeFs.rm(p, options),
+  mkdir: (p: string, options?: { recursive?: boolean }) =>
+    nodeFs.mkdir(p, options),
+  readFile: (
+    p: string,
+    options?: BufferEncoding | { encoding?: BufferEncoding },
+  ) => {
+    const encoding =
+      typeof options === 'string' ? options : (options?.encoding ?? undefined)
+    return nodeFs.readFile(p, encoding)
+  },
+  writeFile: (p: string, data: string | Uint8Array) =>
+    nodeFs.writeFile(p, data),
+  rename: (oldPath: string, newPath: string) => nodeFs.rename(oldPath, newPath),
+  open: (p: string, flags: string | number) => nodeFs.open(p, flags),
+  realpath: (p: string) => nodeFs.realpath(p),
+}
+
+mock.module('src/utils/fsOperations.js', () => ({
+  getFsImplementation: () =>
+    useMockForMagicDocs
+      ? ({
+          readFile: mockReadFile,
+          writeFile: mock(async () => {}),
+          exists: mock(async () => false),
+          mkdir: mock(async () => {}),
+          readdir: mock(async () => []),
+          stat: mock(async () => ({})),
+          unlink: mock(async () => {}),
+        } as unknown)
+      : (realFsAdapter as unknown),
+}))
+
+// ── Import module under test (after all mock.module calls) ──────────────────
+import { buildMagicDocsUpdatePrompt } from '../prompts.js'
+
+// ── Tests ───────────────────────────────────────────────────────────────────
+
+describe('buildMagicDocsUpdatePrompt – dynamic variable substitution', () => {
+  beforeEach(() => {
+    mockGetMainLoopModel.mockReturnValue('claude-opus-4-7')
+    mockGetDisplayedEffortLevel.mockReturnValue('high')
+    mockReadFile.mockImplementation(async () => {
+      throw Object.assign(new Error('ENOENT'), { code: 'ENOENT' })
+    })
+  })
+
+  test('substitutes {{CLAUDE_MODEL}} with the current model', async () => {
+    mockReadFile.mockImplementation(async () => 'Model: {{CLAUDE_MODEL}}')
+    mockGetMainLoopModel.mockReturnValue('claude-opus-4-7')
+
+    const result = await buildMagicDocsUpdatePrompt(
+      'contents',
+      '/doc.md',
+      'Title',
+    )
+    expect(result).toContain('Model: claude-opus-4-7')
+    expect(result).not.toContain('{{CLAUDE_MODEL}}')
+  })
+
+  test('substitutes {{CLAUDE_EFFORT}} with the current effort level', async () => {
+    mockReadFile.mockImplementation(async () => 'Effort: {{CLAUDE_EFFORT}}')
+    mockGetDisplayedEffortLevel.mockReturnValue('high')
+
+    const result = await buildMagicDocsUpdatePrompt(
+      'contents',
+      '/doc.md',
+      'Title',
+    )
+    expect(result).toContain('Effort: high')
+    expect(result).not.toContain('{{CLAUDE_EFFORT}}')
+  })
+
+  test('substitutes {{CLAUDE_CWD}} with process.cwd()', async () => {
+    mockReadFile.mockImplementation(async () => 'CWD: {{CLAUDE_CWD}}')
+
+    const result = await buildMagicDocsUpdatePrompt(
+      'contents',
+      '/doc.md',
+      'Title',
+    )
+    expect(result).toContain(`CWD: ${process.cwd()}`)
+    expect(result).not.toContain('{{CLAUDE_CWD}}')
+  })
+
+  test('substitutes all three dynamic variables in one template', async () => {
+    mockReadFile.mockImplementation(
+      async () =>
+        'effort={{CLAUDE_EFFORT}} model={{CLAUDE_MODEL}} cwd={{CLAUDE_CWD}}',
+    )
+    mockGetMainLoopModel.mockReturnValue('claude-sonnet-4-6')
+    mockGetDisplayedEffortLevel.mockReturnValue('medium')
+
+    const result = await buildMagicDocsUpdatePrompt(
+      'contents',
+      '/doc.md',
+      'Title',
+    )
+    expect(result).toContain('effort=medium')
+    expect(result).toContain('model=claude-sonnet-4-6')
+    expect(result).toContain(`cwd=${process.cwd()}`)
+  })
+
+  test('leaves unknown template variables unchanged', async () => {
+    mockReadFile.mockImplementation(
+      async () => '{{UNKNOWN_VAR}} {{CLAUDE_MODEL}}',
+    )
+    mockGetMainLoopModel.mockReturnValue('claude-opus-4-7')
+
+    const result = await buildMagicDocsUpdatePrompt(
+      'contents',
+      '/doc.md',
+      'Title',
+    )
+    expect(result).toContain('{{UNKNOWN_VAR}}')
+    expect(result).toContain('claude-opus-4-7')
+  })
+
+  test('existing substitution variables still work alongside new ones', async () => {
+    mockReadFile.mockImplementation(
+      async () =>
+        '{{docTitle}} effort={{CLAUDE_EFFORT}} model={{CLAUDE_MODEL}}',
+    )
+    mockGetMainLoopModel.mockReturnValue('claude-haiku')
+    mockGetDisplayedEffortLevel.mockReturnValue('low')
+
+    const result = await buildMagicDocsUpdatePrompt(
+      'contents',
+      '/doc.md',
+      'My Doc',
+    )
+    expect(result).toContain('My Doc')
+    expect(result).toContain('effort=low')
+    expect(result).toContain('model=claude-haiku')
+  })
+})
--- a/src/services/MagicDocs/prompts.ts
+++ b/src/services/MagicDocs/prompts.ts
@@ -1,6 +1,8 @@
 import { join } from 'path'
 import { getClaudeConfigHomeDir } from '../../utils/envUtils.js'
 import { getFsImplementation } from '../../utils/fsOperations.js'
+import { getDisplayedEffortLevel } from '../../utils/effort.js'
+import { getMainLoopModel } from '../../utils/model/model.js'

 /**
 * Get the Magic Docs update prompt template
@@ -114,11 +116,15 @@ These instructions take priority over the general rules below. Make sure your up
    : ''

  // Substitute variables in the prompt
+  const currentModel = getMainLoopModel()
  const variables = {
    docContents,
    docPath,
    docTitle,
    customInstructions,
+    CLAUDE_EFFORT: getDisplayedEffortLevel(currentModel, undefined),
+    CLAUDE_MODEL: currentModel,
+    CLAUDE_CWD: process.cwd(),
  }

  return substituteVariables(promptTemplate, variables)
--- a/src/services/api/tests/ultrareviewPreflight.test.ts
+++ b/src/services/api/tests/ultrareviewPreflight.test.ts
@@ -0,0 +1,226 @@
+/**
+ * Regression tests for fetchUltrareviewPreflight.
+ * Verifies all three action enum states (proceed/confirm/blocked),
+ * network/HTTP error handling, and Zod schema mismatch fallback.
+ */
+import { afterAll, beforeAll, describe, expect, mock, test } from 'bun:test'
+import { debugMock } from '../../../../tests/mocks/debug.js'
+import { logMock } from '../../../../tests/mocks/log.js'
+import { setupAxiosMock } from '../../../../tests/mocks/axios.js'
+
+// Mock dependency chain before any subject import
+mock.module('src/utils/debug.ts', debugMock)
+mock.module('src/utils/log.ts', logMock)
+mock.module('src/services/analytics/index.js', () => ({
+  logEvent: () => {},
+}))
+
+// Mock auth utilities
+mock.module('src/utils/auth.js', () => ({
+  isClaudeAISubscriber: () => true,
+  isTeamSubscriber: () => false,
+  isEnterpriseSubscriber: () => false,
+}))
+
+// Mock OAuth config
+mock.module('src/constants/oauth.js', () => ({
+  getOauthConfig: () => ({ BASE_API_URL: 'https://api.anthropic.com' }),
+}))
+
+// Mock prepareApiRequest and getOAuthHeaders
+mock.module('src/utils/teleport/api.js', () => ({
+  prepareApiRequest: async () => ({
+    accessToken: 'test-token',
+    orgUUID: 'org-uuid-test',
+  }),
+  getOAuthHeaders: (token: string) => ({
+    Authorization: `Bearer ${token}`,
+    'Content-Type': 'application/json',
+    'anthropic-version': '2023-06-01',
+  }),
+}))
+
+// We'll mock axios at module level.
+// Typed as any in test code (CLAUDE.md: mock data may use as any).
+// eslint-disable-next-line @typescript-eslint/no-explicit-any
+const mockAxiosPost = mock(async (..._args: any[]): Promise<any> => {
+  throw new Error('not configured')
+})
+
+const axiosHandle = setupAxiosMock()
+axiosHandle.stubs.post = mockAxiosPost
+axiosHandle.stubs.isAxiosError = (e: unknown) =>
+  typeof e === 'object' &&
+  e !== null &&
+  (e as { isAxiosError?: boolean }).isAxiosError === true
+
+beforeAll(() => {
+  axiosHandle.useStubs = true
+})
+
+afterAll(() => {
+  axiosHandle.useStubs = false
+})
+
+import {
+  fetchUltrareviewPreflight,
+  type UltrareviewPreflightResponse,
+} from '../ultrareviewPreflight.js'
+
+describe('fetchUltrareviewPreflight', () => {
+  test('returns proceed action when server responds with proceed', async () => {
+    const serverResponse: UltrareviewPreflightResponse = {
+      action: 'proceed',
+      billing_note: null,
+    }
+    mockAxiosPost.mockImplementationOnce(async () => ({
+      status: 200,
+      data: serverResponse,
+    }))
+
+    const result = await fetchUltrareviewPreflight({ repo: 'owner/repo' })
+    expect(result).not.toBeNull()
+    expect(result?.action).toBe('proceed')
+    expect(result?.billing_note).toBeNull()
+  })
+
+  test('returns confirm action with billing_note when server responds with confirm', async () => {
+    const serverResponse: UltrareviewPreflightResponse = {
+      action: 'confirm',
+      billing_note: 'This run will cost approximately $2.50.',
+    }
+    mockAxiosPost.mockImplementationOnce(async () => ({
+      status: 200,
+      data: serverResponse,
+    }))
+
+    const result = await fetchUltrareviewPreflight({ repo: 'owner/repo' })
+    expect(result).not.toBeNull()
+    expect(result?.action).toBe('confirm')
+    expect(result?.billing_note).toBe('This run will cost approximately $2.50.')
+  })
+
+  test('returns blocked action when server responds with blocked', async () => {
+    const serverResponse: UltrareviewPreflightResponse = {
+      action: 'blocked',
+      billing_note: null,
+    }
+    mockAxiosPost.mockImplementationOnce(async () => ({
+      status: 200,
+      data: serverResponse,
+    }))
+
+    const result = await fetchUltrareviewPreflight({ repo: 'owner/repo' })
+    expect(result).not.toBeNull()
+    expect(result?.action).toBe('blocked')
+  })
+
+  test('returns null on schema mismatch (invalid action value)', async () => {
+    mockAxiosPost.mockImplementationOnce(async () => ({
+      status: 200,
+      data: { action: 'unknown_action', billing_note: null },
+    }))
+
+    const result = await fetchUltrareviewPreflight({ repo: 'owner/repo' })
+    expect(result).toBeNull()
+  })
+
+  test('returns null on network error (no response)', async () => {
+    const networkError = new Error('ECONNREFUSED')
+    ;(networkError as unknown as { isAxiosError: boolean }).isAxiosError = true
+    mockAxiosPost.mockImplementationOnce(async () => {
+      throw networkError
+    })
+
+    const result = await fetchUltrareviewPreflight({ repo: 'owner/repo' })
+    expect(result).toBeNull()
+  })
+
+  test('returns null on 401 Unauthorized', async () => {
+    const authError = new Error('Unauthorized')
+    ;(
+      authError as unknown as {
+        isAxiosError: boolean
+        response: { status: number }
+      }
+    ).isAxiosError = true
+    ;(authError as unknown as { response: { status: number } }).response = {
+      status: 401,
+    }
+    mockAxiosPost.mockImplementationOnce(async () => {
+      throw authError
+    })
+
+    const result = await fetchUltrareviewPreflight({ repo: 'owner/repo' })
+    expect(result).toBeNull()
+  })
+
+  test('returns null on 403 Forbidden', async () => {
+    const forbiddenError = new Error('Forbidden')
+    ;(
+      forbiddenError as unknown as {
+        isAxiosError: boolean
+        response: { status: number }
+      }
+    ).isAxiosError = true
+    ;(forbiddenError as unknown as { response: { status: number } }).response =
+      { status: 403 }
+    mockAxiosPost.mockImplementationOnce(async () => {
+      throw forbiddenError
+    })
+
+    const result = await fetchUltrareviewPreflight({ repo: 'owner/repo' })
+    expect(result).toBeNull()
+  })
+
+  test('returns null on 5xx server error', async () => {
+    const serverError = new Error('Internal Server Error')
+    ;(
+      serverError as unknown as {
+        isAxiosError: boolean
+        response: { status: number }
+      }
+    ).isAxiosError = true
+    ;(serverError as unknown as { response: { status: number } }).response = {
+      status: 500,
+    }
+    mockAxiosPost.mockImplementationOnce(async () => {
+      throw serverError
+    })
+
+    const result = await fetchUltrareviewPreflight({ repo: 'owner/repo' })
+    expect(result).toBeNull()
+  })
+
+  test('passes pr_number to request body when provided', async () => {
+    mockAxiosPost.mockImplementationOnce(
+      async (_url: unknown, body: unknown) => {
+        const b = body as { pr_number: number }
+        expect(b.pr_number).toBe(42)
+        return { status: 200, data: { action: 'proceed', billing_note: null } }
+      },
+    )
+
+    const result = await fetchUltrareviewPreflight({
+      repo: 'owner/repo',
+      pr_number: 42,
+    })
+    expect(result?.action).toBe('proceed')
+  })
+
+  test('passes confirm flag to request body when provided', async () => {
+    mockAxiosPost.mockImplementationOnce(
+      async (_url: unknown, body: unknown) => {
+        const b = body as { confirm: boolean }
+        expect(b.confirm).toBe(true)
+        return { status: 200, data: { action: 'proceed', billing_note: null } }
+      },
+    )
+
+    const result = await fetchUltrareviewPreflight({
+      repo: 'owner/repo',
+      confirm: true,
+    })
+    expect(result?.action).toBe('proceed')
+  })
+})
--- a/src/services/api/claude.ts
+++ b/src/services/api/claude.ts
@@ -93,7 +93,10 @@ import {
  asSystemPrompt,
  type SystemPrompt,
 } from '../../utils/systemPromptType.js'
-import { cloneDeep } from 'lodash-es'
+import {
+  getBreakCacheMarkerPath,
+  getBreakCacheAlwaysPath,
+} from '../../commands/break-cache/index.js'
 import { tokenCountFromLastAPIResponse } from '../../utils/tokens.js'
 import { getDynamicConfig_BLOCKS_ON_INIT } from '../analytics/growthbook.js'
 import {
@@ -121,6 +124,7 @@ import {
  getAfkModeHeaderLatched,
  getCacheEditingHeaderLatched,
  getFastModeHeaderLatched,
+  getLastApiCompletionTimestamp,
  getPromptCache1hAllowlist,
  getPromptCache1hEligible,
  getSessionId,
@@ -250,6 +254,7 @@ import {
  type NonNullableUsage,
 } from './logging.js'
 import {
+  CACHE_TTL_1HOUR_MS,
  checkResponseForCacheBreak,
  recordPromptState,
 } from './promptCacheBreakDetection.js'
@@ -507,30 +512,10 @@ export function getAPIMetadata() {
    }
  }

-  const deviceId = getOrCreateUserID()
-
-  // Third-party API providers (DeepSeek, etc.) validate user_id against
-  // ^[a-zA-Z0-9_-]+$ which rejects JSON strings containing {, ", :, etc.
-  // When using a non-Anthropic base URL, send only the device_id (hex string).
-  const baseUrl = process.env.ANTHROPIC_BASE_URL
-  const isThirdParty =
-    baseUrl &&
-    (() => {
-      try {
-        return new URL(baseUrl).host !== 'api.anthropic.com'
-      } catch {
-        return false
-      }
-    })()
-
-  if (isThirdParty) {
-    return { user_id: deviceId }
-  }
-
  return {
    user_id: jsonStringify({
      ...extra,
-      device_id: deviceId,
+      device_id: getOrCreateUserID(),
      // Only include OAuth account UUID when actively using OAuth authentication
      account_uuid: getOauthAccountInfo()?.accountUuid ?? '',
      session_id: getSessionId(),
@@ -1441,12 +1426,39 @@ async function* queryModel(
    ].filter(Boolean),
  )

+  // ── Break-cache integration ──
+  // If a one-time break-cache marker exists, or always-mode is on, append a
+  // unique ephemeral nonce comment to the system prompt so the prefix-cache
+  // hash changes for this request, forcing a cache miss.
+  {
+    const { existsSync, unlinkSync } = await import('node:fs')
+    const { randomUUID } = await import('node:crypto')
+    const onceMarker = getBreakCacheMarkerPath()
+    const alwaysFlag = getBreakCacheAlwaysPath()
+    const shouldBreak = existsSync(onceMarker) || existsSync(alwaysFlag)
+    if (shouldBreak) {
+      const nonce = randomUUID()
+      systemPrompt = asSystemPrompt([
+        ...systemPrompt,
+        `<!-- cache-break nonce: ${nonce} -->`,
+      ])
+      // Only delete the once marker; the always flag persists until /break-cache off
+      if (existsSync(onceMarker)) {
+        try {
+          unlinkSync(onceMarker)
+        } catch {
+          /* best-effort */
+        }
+      }
+    }
+  }
+
  // Prepend system prompt block for easy API identification
  logAPIPrefix(systemPrompt)

  const enablePromptCaching =
    options.enablePromptCaching ?? getPromptCachingEnabled(options.model)
-  let system = buildSystemPromptBlocks(systemPrompt, enablePromptCaching, {
+  const system = buildSystemPromptBlocks(systemPrompt, enablePromptCaching, {
    skipGlobalCacheForSystemPrompt: needsToolBasedCacheMarker,
    querySource: options.querySource,
  })
@@ -1466,7 +1478,7 @@ async function* queryModel(
      model: advisorModel,
    } as unknown as BetaToolUnion)
  }
-  let allTools = [...toolSchemas, ...extraToolSchemas]
+  const allTools = [...toolSchemas, ...extraToolSchemas]

  const isFastMode =
    isFastModeEnabled() &&
@@ -1590,39 +1602,6 @@ async function* queryModel(
  const consumedCacheEdits = cachedMCEnabled ? consumePendingCacheEdits() : null
  const consumedPinnedEdits = cachedMCEnabled ? getPinnedCacheEdits() : []

-  // ---------------------------------------------------------------------------
-  // Serialization boundary: deep-clone heavy data so the closure below captures
-  // independent copies, not references to the originals. After this point the
-  // original variables (messagesForAPI, system, allTools) are nulled out so
-  // they can be GC'd even while the generator/closure is still alive (during
-  // long streaming responses or retry backoff).
-  // ---------------------------------------------------------------------------
-  const frozenMessages = addCacheBreakpoints(
-    messagesForAPI,
-    enablePromptCaching,
-    options.querySource,
-    cachedMCEnabled &&
-      getAPIProvider() === 'firstParty' &&
-      options.querySource === 'repl_main_thread',
-    consumedCacheEdits as any,
-    consumedPinnedEdits as any,
-    options.skipCacheWrite,
-  )
-  const frozenSystem = cloneDeep(system)
-  const frozenTools = cloneDeep(allTools)
-
-  // Pre-compute scalars that post-streaming code needs, so messagesForAPI
-  // can be released before streaming starts.
-  const preMessagesCount = messagesForAPI.length
-  const preMessagesTokenCount = tokenCountFromLastAPIResponse(messagesForAPI)
-
-  // Release originals for GC — the frozen* copies and pre-computed scalars
-  // are now the only references to this data inside the closure.
-  // After null-out, all downstream code uses frozen* or pre-computed scalars.
-  messagesForAPI = null!
-  system = null!
-  allTools = null!
-
  // Capture the betas sent in the last API request, including the ones that
  // were dynamically added, so we can log and send it to telemetry.
  let lastRequestBetas: string[] | undefined
@@ -1725,6 +1704,9 @@ async function* queryModel(
      clearAllThinking: false,
    })

+    const enablePromptCaching =
+      options.enablePromptCaching ?? getPromptCachingEnabled(retryContext.model)
+
    // Fast mode: header is latched session-stable (cache-safe), but
    // `speed='fast'` stays dynamic so cooldown still suppresses the actual
    // fast-mode request without changing the cache key.
@@ -1755,10 +1737,13 @@ async function* queryModel(
      }
    }

-    // Cache editing beta: header is latched session-stable.
-    // The useCachedMC gate (cache_edits body behavior) is baked into
-    // frozenMessages at the serialization boundary above, so this block
-    // only controls the beta header.
+    // Cache editing beta: header is latched session-stable; useCachedMC
+    // (controls cache_edits body behavior) stays live so edits stop when
+    // the feature disables but the header doesn't flip.
+    const useCachedMC =
+      cachedMCEnabled &&
+      getAPIProvider() === 'firstParty' &&
+      options.querySource === 'repl_main_thread'
    if (
      cacheEditingHeaderLatched &&
      cacheEditingBetaHeader &&
@@ -1787,9 +1772,17 @@ async function* queryModel(

    return {
      model: normalizeModelStringForAPI(options.model),
-      messages: frozenMessages,
-      system: frozenSystem,
-      tools: frozenTools,
+      messages: addCacheBreakpoints(
+        messagesForAPI,
+        enablePromptCaching,
+        options.querySource,
+        useCachedMC,
+        consumedCacheEdits as any,
+        consumedPinnedEdits as any,
+        options.skipCacheWrite,
+      ),
+      system,
+      tools: allTools,
      tool_choice: options.toolChoice,
      ...(useBetas && { betas: filteredBetas }),
      metadata: getAPIMetadata(),
@@ -1849,9 +1842,6 @@ async function* queryModel(
  let ttftMs = 0
  let partialMessage: BetaMessage | undefined
  const contentBlocks: (BetaContentBlock | ConnectorTextBlock)[] = []
-  // Accumulate streaming deltas in arrays to avoid O(n²) string concatenation.
-  // Joined and assigned to contentBlock fields at content_block_stop.
-  const streamingDeltas = new Map<number, string[]>()
  let usage: NonNullableUsage = EMPTY_USAGE
  let costUSD = 0
  let stopReason: BetaStopReason | null = null
@@ -2138,8 +2128,6 @@ async function* queryModel(
                }
                break
            }
-            // Initialize delta accumulator for this content block
-            streamingDeltas.set(part.index, [])
            break
          case 'content_block_delta': {
            const contentBlock = contentBlocks[part.index]
@@ -2169,9 +2157,8 @@ async function* queryModel(
                })
                throw new Error('Content block is not a connector_text block')
              }
-              streamingDeltas
-                .get(part.index)
-                ?.push(delta.connector_text as string)
+              ;(contentBlock as { connector_text: string }).connector_text +=
+                delta.connector_text
            } else {
              switch (delta.type) {
                case 'citations_delta':
@@ -2201,9 +2188,7 @@ async function* queryModel(
                    })
                    throw new Error('Content block input is not a string')
                  }
-                  streamingDeltas
-                    .get(part.index)
-                    ?.push(delta.partial_json as string)
+                  contentBlock.input += delta.partial_json
                  break
                case 'text_delta':
                  if (contentBlock.type !== 'text') {
@@ -2217,7 +2202,7 @@ async function* queryModel(
                    })
                    throw new Error('Content block is not a text block')
                  }
-                  streamingDeltas.get(part.index)?.push(delta.text!)
+                  ;(contentBlock as { text: string }).text += delta.text
                  break
                case 'signature_delta':
                  if (
@@ -2252,7 +2237,8 @@ async function* queryModel(
                    })
                    throw new Error('Content block is not a thinking block')
                  }
-                  streamingDeltas.get(part.index)?.push(delta.thinking!)
+                  ;(contentBlock as { thinking: string }).thinking +=
+                    delta.thinking
                  break
              }
            }
@@ -2284,32 +2270,6 @@ async function* queryModel(
              })
              throw new Error('Message not found')
            }
-            // Join accumulated streaming deltas into the contentBlock fields
-            // to avoid O(n²) string concatenation during streaming.
-            const deltas = streamingDeltas.get(part.index)
-            if (deltas && deltas.length > 0) {
-              const joined = deltas.join('')
-              switch (contentBlock.type) {
-                case 'text':
-                  ;(contentBlock as { text: string }).text = joined
-                  break
-                case 'thinking':
-                  ;(contentBlock as { thinking: string }).thinking = joined
-                  break
-                case 'tool_use':
-                case 'server_tool_use':
-                  contentBlock.input = joined
-                  break
-                default:
-                  if ((contentBlock.type as string) === 'connector_text') {
-                    ;(
-                      contentBlock as { connector_text: string }
-                    ).connector_text = joined
-                  }
-                  break
-              }
-              streamingDeltas.delete(part.index)
-            }
            const m: AssistantMessage = {
              message: {
                ...partialMessage,
@@ -2864,8 +2824,8 @@ async function* queryModel(
        logAPIError({
          error,
          model: errorModel,
-          messageCount: preMessagesCount,
-          messageTokens: preMessagesTokenCount,
+          messageCount: messagesForAPI.length,
+          messageTokens: tokenCountFromLastAPIResponse(messagesForAPI),
          durationMs: Date.now() - start,
          durationMsIncludingRetries: Date.now() - startIncludingRetries,
          attempt: attemptNumber,
@@ -2886,10 +2846,7 @@ async function* queryModel(

        yield getAssistantMessageFromError(error, errorModel, {
          messages,
-          messagesForAPI: frozenMessages as unknown as (
-            | UserMessage
-            | AssistantMessage
-          )[],
+          messagesForAPI,
        })
        releaseStreamResources()
        return
@@ -2923,8 +2880,8 @@ async function* queryModel(
      logAPIError({
        error,
        model: errorModel,
-        messageCount: preMessagesCount,
-        messageTokens: preMessagesTokenCount,
+        messageCount: messagesForAPI.length,
+        messageTokens: tokenCountFromLastAPIResponse(messagesForAPI),
        durationMs: Date.now() - start,
        durationMsIncludingRetries: Date.now() - startIncludingRetries,
        attempt: attemptNumber,
@@ -2947,10 +2904,7 @@ async function* queryModel(

      yield getAssistantMessageFromError(error, errorModel, {
        messages,
-        messagesForAPI: frozenMessages as unknown as (
-          | UserMessage
-          | AssistantMessage
-        )[],
+        messagesForAPI,
      })
      releaseStreamResources()
      return
@@ -3006,19 +2960,14 @@ async function* queryModel(
  // Precompute scalars so the fire-and-forget .then() closure doesn't pin the
  // full messagesForAPI array (the entire conversation up to the context window
  // limit) until getToolPermissionContext() resolves.
-  // Note: messagesForAPI was nulled above (serialization boundary), so we use
-  // the pre-computed scalars captured before the null-out.
-  const logMessageCount = preMessagesCount
-  const logMessageTokens = preMessagesTokenCount
+  const logMessageCount = messagesForAPI.length
+  const logMessageTokens = tokenCountFromLastAPIResponse(messagesForAPI)

  // Record LLM observation in Langfuse (no-op if not configured)
  recordLLMObservation(options.langfuseTrace ?? null, {
    model: resolvedModel,
    provider: getAPIProvider(),
-    input: convertMessagesToLangfuse(
-      frozenMessages as Parameters<typeof convertMessagesToLangfuse>[0],
-      systemPrompt,
-    ),
+    input: convertMessagesToLangfuse(messagesForAPI, systemPrompt),
    output: convertOutputToLangfuse(newMessages),
    usage: {
      input_tokens: usage.input_tokens,
--- a/src/services/api/ultrareviewPreflight.ts
+++ b/src/services/api/ultrareviewPreflight.ts
@@ -0,0 +1,81 @@
+import axios from 'axios'
+import z from 'zod/v4'
+import { getOauthConfig } from '../../constants/oauth.js'
+import { logForDebugging } from '../../utils/debug.js'
+import { getOAuthHeaders, prepareApiRequest } from '../../utils/teleport/api.js'
+
+/**
+ * Zod schema for the /v1/ultrareview/preflight response.
+ * Based on binary-extracted schema: vq.object({action: vq.enum([...]), billing_note: ...})
+ */
+const UltrareviewPreflightSchema = z.object({
+  action: z.enum(['proceed', 'confirm', 'blocked']),
+  billing_note: z.string().nullable().optional(),
+})
+
+export type UltrareviewPreflightResponse = z.infer<
+  typeof UltrareviewPreflightSchema
+>
+
+export type UltrareviewPreflightArgs = {
+  repo: string
+  pr_number?: number
+  pr_url?: string
+  confirm?: boolean
+}
+
+/**
+ * POST /v1/ultrareview/preflight — server-side gate before launch.
+ *
+ * Returns the preflight result (proceed / confirm / blocked) or null on any
+ * failure (network error, auth error, schema mismatch). Callers must treat
+ * null as "fallback to direct launch" to preserve existing behavior.
+ *
+ * The `confirm` flag should be set to true when the user has already
+ * acknowledged the billing dialog (or passed --confirm on the CLI), which
+ * skips the server-side confirm prompt and gets a direct proceed/blocked.
+ */
+export async function fetchUltrareviewPreflight(
+  args: UltrareviewPreflightArgs,
+): Promise<UltrareviewPreflightResponse | null> {
+  try {
+    const { accessToken, orgUUID } = await prepareApiRequest()
+
+    const body: Record<string, unknown> = {
+      repo: args.repo,
+    }
+    if (args.pr_number !== undefined) {
+      body.pr_number = args.pr_number
+    }
+    if (args.pr_url !== undefined) {
+      body.pr_url = args.pr_url
+    }
+    if (args.confirm !== undefined) {
+      body.confirm = args.confirm
+    }
+
+    const response = await axios.post(
+      `${getOauthConfig().BASE_API_URL}/v1/ultrareview/preflight`,
+      body,
+      {
+        headers: {
+          ...getOAuthHeaders(accessToken),
+          'x-organization-uuid': orgUUID,
+        },
+        timeout: 10000,
+      },
+    )
+
+    const parsed = UltrareviewPreflightSchema.safeParse(response.data)
+    if (!parsed.success) {
+      logForDebugging(
+        `fetchUltrareviewPreflight: schema mismatch — ${parsed.error.message}`,
+      )
+      return null
+    }
+    return parsed.data
+  } catch (error) {
+    logForDebugging(`fetchUltrareviewPreflight failed: ${error}`)
+    return null
+  }
+}
--- a/src/services/langfuse/tests/langfuse.test.ts
+++ b/src/services/langfuse/tests/langfuse.test.ts
@@ -170,6 +170,21 @@ describe('Langfuse integration', () => {
      const result = sanitizeToolOutput('MCPTool', 'mcp data')
      expect(result).toBe('[MCPTool output redacted, 8 chars]')
    })
+
+    test('redacts VaultHttpFetch output (vault tool, PR-2)', async () => {
+      const { sanitizeToolOutput } = await import('../sanitize.js')
+      const result = sanitizeToolOutput(
+        'VaultHttpFetch',
+        'sk-secret-bearer-token',
+      )
+      expect(result).toBe('[VaultHttpFetch output redacted, 22 chars]')
+    })
+
+    test('redacts LocalVaultFetch output (vault tool, future PR-3)', async () => {
+      const { sanitizeToolOutput } = await import('../sanitize.js')
+      const result = sanitizeToolOutput('LocalVaultFetch', 'plaintext-secret')
+      expect(result).toBe('[LocalVaultFetch output redacted, 16 chars]')
+    })
  })

  describe('sanitizeGlobal', () => {
--- a/src/services/langfuse/sanitize.ts
+++ b/src/services/langfuse/sanitize.ts
@@ -7,7 +7,16 @@ const REDACTED_FILE_TOOLS = new Set([
  'FileEditTool',
 ])
 const REDACTED_SHELL_TOOLS = new Set(['BashTool', 'PowerShellTool'])
-const SENSITIVE_OUTPUT_TOOLS = new Set(['ConfigTool', 'MCPTool'])
+// Vault-class tools and tools that intentionally surface user secrets must
+// have their tool_result redacted in Langfuse traces. PR-2 ships VaultHttpFetch;
+// LocalVaultFetch is reserved for a future PR. Adding both here proactively
+// keeps Langfuse export safe even before the tools land.
+const SENSITIVE_OUTPUT_TOOLS = new Set([
+  'ConfigTool',
+  'MCPTool',
+  'VaultHttpFetch',
+  'LocalVaultFetch',
+])

 function escapeRegExp(value: string): string {
  return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
--- a/src/services/providerRegistry/tests/loader.test.ts
+++ b/src/services/providerRegistry/tests/loader.test.ts
@@ -0,0 +1,133 @@
+import { describe, test, expect, beforeEach, afterEach, mock } from 'bun:test'
+import { mkdtempSync, writeFileSync, rmSync } from 'fs'
+import { join } from 'path'
+import { tmpdir } from 'os'
+import { logMock } from '../../../../tests/mocks/log.js'
+
+// Must mock log before any import that transitively loads log.ts
+mock.module('src/utils/log.ts', logMock)
+
+// bun:bundle must be mocked before imports that use feature()
+mock.module('bun:bundle', () => ({ feature: () => false }))
+
+// settings.js must be mocked to cut bootstrap chain
+mock.module('src/utils/settings/settings.js', () => ({
+  getSettings_DEPRECATED: () => ({}),
+  updateSettingsForSource: () => {},
+}))
+
+let tmpDir: string
+
+beforeEach(() => {
+  tmpDir = mkdtempSync(join(tmpdir(), 'provider-loader-test-'))
+  process.env['CLAUDE_CONFIG_DIR'] = tmpDir
+})
+
+afterEach(async () => {
+  delete process.env['CLAUDE_CONFIG_DIR']
+  rmSync(tmpDir, { recursive: true, force: true })
+  // J1 fix: invalidate the per-process cache between tests so each test starts fresh
+  const { _invalidateProviderCache } = await import('../loader.js')
+  _invalidateProviderCache()
+})
+
+describe('loadProviders', () => {
+  test('returns 4 default providers when providers.json does not exist', async () => {
+    const { loadProviders } = await import('../loader.js')
+    const providers = loadProviders()
+    expect(providers).toHaveLength(4)
+    expect(providers.map(p => p.id)).toEqual([
+      'cerebras',
+      'groq',
+      'qwen',
+      'deepseek',
+    ])
+  })
+
+  test('returns defaults when providers.json is empty', async () => {
+    writeFileSync(join(tmpDir, 'providers.json'), '')
+    const { loadProviders } = await import('../loader.js')
+    const providers = loadProviders()
+    expect(providers).toHaveLength(4)
+  })
+
+  test('returns defaults when providers.json is empty array', async () => {
+    writeFileSync(join(tmpDir, 'providers.json'), '[]')
+    const { loadProviders } = await import('../loader.js')
+    const providers = loadProviders()
+    expect(providers).toHaveLength(4)
+  })
+
+  test('returns defaults when providers.json is corrupt JSON', async () => {
+    writeFileSync(join(tmpDir, 'providers.json'), '{not valid json')
+    const { loadProviders } = await import('../loader.js')
+    const providers = loadProviders()
+    expect(providers).toHaveLength(4)
+  })
+
+  test('returns defaults when providers.json fails schema validation', async () => {
+    writeFileSync(
+      join(tmpDir, 'providers.json'),
+      JSON.stringify([{ id: 123, kind: 'bad-kind', baseUrl: 'not-a-url' }]),
+    )
+    const { loadProviders } = await import('../loader.js')
+    const providers = loadProviders()
+    expect(providers).toHaveLength(4)
+  })
+
+  test('merges valid user providers on top of defaults', async () => {
+    const customProvider = {
+      id: 'myendpoint',
+      kind: 'openai-compat',
+      baseUrl: 'https://my.api.com/v1',
+      apiKeyEnv: 'MY_API_KEY',
+      defaultModel: 'my-model',
+      compatRule: 'permissive',
+    }
+    writeFileSync(
+      join(tmpDir, 'providers.json'),
+      JSON.stringify([customProvider]),
+    )
+    const { loadProviders } = await import('../loader.js')
+    const providers = loadProviders()
+    // 4 defaults + 1 custom = 5
+    expect(providers).toHaveLength(5)
+    expect(providers.find(p => p.id === 'myendpoint')).toMatchObject({
+      baseUrl: 'https://my.api.com/v1',
+    })
+  })
+
+  test('user provider with same id as default replaces the default', async () => {
+    const overrideCerebras = {
+      id: 'cerebras',
+      kind: 'openai-compat',
+      baseUrl: 'https://custom-cerebras.example.com/v1',
+      apiKeyEnv: 'CEREBRAS_API_KEY',
+      defaultModel: 'llama-3.3-70b',
+      compatRule: 'cerebras',
+    }
+    writeFileSync(
+      join(tmpDir, 'providers.json'),
+      JSON.stringify([overrideCerebras]),
+    )
+    const { loadProviders } = await import('../loader.js')
+    const providers = loadProviders()
+    // Still 4 providers (cerebras replaced, not added)
+    expect(providers).toHaveLength(4)
+    const cerebras = providers.find(p => p.id === 'cerebras')
+    expect(cerebras?.baseUrl).toBe('https://custom-cerebras.example.com/v1')
+  })
+
+  test('findProvider returns undefined for unknown id', async () => {
+    const { findProvider, DEFAULT_PROVIDERS } = await import('../loader.js')
+    const result = findProvider('nonexistent', DEFAULT_PROVIDERS)
+    expect(result).toBeUndefined()
+  })
+
+  test('findProvider returns correct provider for known id', async () => {
+    const { findProvider, DEFAULT_PROVIDERS } = await import('../loader.js')
+    const deepseek = findProvider('deepseek', DEFAULT_PROVIDERS)
+    expect(deepseek?.baseUrl).toBe('https://api.deepseek.com/v1')
+    expect(deepseek?.compatRule).toBe('deepseek')
+  })
+})
--- a/src/services/providerRegistry/tests/providerCompatMatrix.test.ts
+++ b/src/services/providerRegistry/tests/providerCompatMatrix.test.ts
@@ -0,0 +1,204 @@
+import { describe, test, expect } from 'bun:test'
+import {
+  COMPAT_PROFILES,
+  applyCompatRule,
+  getDeepSeekReasoningMode,
+} from '../providerCompatMatrix.js'
+
+describe('COMPAT_PROFILES', () => {
+  test('cerebras does not support stream_options', () => {
+    expect(COMPAT_PROFILES['cerebras'].supportsStreamUsageOption).toBe(false)
+  })
+
+  test('cerebras does not support thinking field', () => {
+    expect(COMPAT_PROFILES['cerebras'].supportsThinkingField).toBe(false)
+  })
+
+  test('groq strips reasoning_content', () => {
+    expect(COMPAT_PROFILES['groq'].reasoningContentEcho).toBe('strip')
+  })
+
+  test('deepseek preserves reasoning_content', () => {
+    expect(COMPAT_PROFILES['deepseek'].reasoningContentEcho).toBe(
+      'always-preserve',
+    )
+  })
+
+  test('deepseek supports thinking field', () => {
+    expect(COMPAT_PROFILES['deepseek'].supportsThinkingField).toBe(true)
+  })
+
+  test('strict-openai strips stream_options', () => {
+    expect(COMPAT_PROFILES['strict-openai'].supportsStreamUsageOption).toBe(
+      false,
+    )
+  })
+
+  test('permissive allows all fields', () => {
+    expect(COMPAT_PROFILES['permissive'].supportsStreamUsageOption).toBe(true)
+    expect(COMPAT_PROFILES['permissive'].supportsThinkingField).toBe(true)
+  })
+})
+
+describe('applyCompatRule - stream_options stripping', () => {
+  test('strips stream_options.include_usage for cerebras', () => {
+    const body = {
+      model: 'llama-3.3-70b',
+      messages: [],
+      stream: true,
+      stream_options: { include_usage: true },
+    }
+    const result = applyCompatRule(body, 'cerebras')
+    expect(result['stream_options']).toBeUndefined()
+  })
+
+  test('strips stream_options for strict-openai', () => {
+    const body = {
+      messages: [],
+      stream_options: { include_usage: true },
+    }
+    const result = applyCompatRule(body, 'strict-openai')
+    expect(result['stream_options']).toBeUndefined()
+  })
+
+  test('preserves stream_options for deepseek', () => {
+    const body = {
+      messages: [],
+      stream_options: { include_usage: true },
+    }
+    const result = applyCompatRule(body, 'deepseek')
+    expect(result['stream_options']).toEqual({ include_usage: true })
+  })
+
+  test('preserves stream_options for permissive', () => {
+    const body = {
+      messages: [],
+      stream_options: { include_usage: true, other_field: 'x' },
+    }
+    const result = applyCompatRule(body, 'permissive')
+    expect(result['stream_options']).toEqual({
+      include_usage: true,
+      other_field: 'x',
+    })
+  })
+
+  test('does not mutate input body', () => {
+    const body = {
+      messages: [],
+      stream_options: { include_usage: true },
+    }
+    applyCompatRule(body, 'groq')
+    // Input must be unchanged
+    expect(body['stream_options']).toEqual({ include_usage: true })
+  })
+})
+
+describe('applyCompatRule - thinking field stripping', () => {
+  test('strips thinking field from messages for cerebras', () => {
+    const body = {
+      messages: [{ role: 'user', content: 'hi', thinking: { budget: 1000 } }],
+    }
+    const result = applyCompatRule(body, 'cerebras')
+    const msgs = result['messages'] as Record<string, unknown>[]
+    expect(msgs[0]!['thinking']).toBeUndefined()
+    expect(msgs[0]!['content']).toBe('hi')
+  })
+
+  test('preserves thinking field for deepseek', () => {
+    const body = {
+      messages: [{ role: 'user', content: 'hi', thinking: { budget: 1000 } }],
+    }
+    const result = applyCompatRule(body, 'deepseek')
+    const msgs = result['messages'] as Record<string, unknown>[]
+    expect(msgs[0]!['thinking']).toEqual({ budget: 1000 })
+  })
+})
+
+describe('applyCompatRule - DeepSeek reasoning_content three modes', () => {
+  test('thinking-only mode: strips reasoning_content for strict-openai (non-deepseek)', () => {
+    const body = {
+      messages: [
+        { role: 'assistant', content: 'answer', reasoning_content: 'thoughts' },
+      ],
+    }
+    const result = applyCompatRule(body, 'strict-openai')
+    const msgs = result['messages'] as Record<string, unknown>[]
+    expect(msgs[0]!['reasoning_content']).toBeUndefined()
+  })
+
+  test('thinking-only mode: preserves reasoning_content for deepseek', () => {
+    const body = {
+      messages: [
+        { role: 'assistant', content: 'answer', reasoning_content: 'thoughts' },
+      ],
+    }
+    const result = applyCompatRule(body, 'deepseek')
+    const msgs = result['messages'] as Record<string, unknown>[]
+    expect(msgs[0]!['reasoning_content']).toBe('thoughts')
+  })
+
+  test('thinking+tools mode: preserves reasoning_content for deepseek', () => {
+    const body = {
+      messages: [
+        {
+          role: 'assistant',
+          content: null,
+          reasoning_content: 'deep thoughts',
+          tool_calls: [{ id: 'call_1', function: { name: 'search' } }],
+        },
+      ],
+    }
+    const result = applyCompatRule(body, 'deepseek')
+    const msgs = result['messages'] as Record<string, unknown>[]
+    expect(msgs[0]!['reasoning_content']).toBe('deep thoughts')
+  })
+
+  test('permissive with non-thinking model strips reasoning_content', () => {
+    const body = {
+      model: 'gpt-4o',
+      messages: [
+        { role: 'assistant', content: 'hi', reasoning_content: 'unused' },
+      ],
+    }
+    const result = applyCompatRule(body, 'permissive')
+    const msgs = result['messages'] as Record<string, unknown>[]
+    expect(msgs[0]!['reasoning_content']).toBeUndefined()
+  })
+
+  test('permissive with thinking model preserves reasoning_content', () => {
+    const body = {
+      model: 'deepseek-reasoner',
+      messages: [
+        { role: 'assistant', content: 'hi', reasoning_content: 'thoughts' },
+      ],
+    }
+    const result = applyCompatRule(body, 'permissive')
+    const msgs = result['messages'] as Record<string, unknown>[]
+    expect(msgs[0]!['reasoning_content']).toBe('thoughts')
+  })
+})
+
+describe('getDeepSeekReasoningMode', () => {
+  test('thinking-only: has reasoning_content, no tool_calls', () => {
+    const msg = { reasoning_content: 'thoughts', content: 'answer' }
+    expect(getDeepSeekReasoningMode(msg)).toBe('thinking-only')
+  })
+
+  test('thinking+tools: has both reasoning_content and tool_calls', () => {
+    const msg = {
+      reasoning_content: 'deep thoughts',
+      tool_calls: [{ id: 'call_1' }],
+    }
+    expect(getDeepSeekReasoningMode(msg)).toBe('thinking+tools')
+  })
+
+  test('normal: no reasoning_content', () => {
+    const msg = { content: 'plain answer' }
+    expect(getDeepSeekReasoningMode(msg)).toBe('normal')
+  })
+
+  test('normal: empty tool_calls array with no reasoning_content', () => {
+    const msg = { content: 'plain', tool_calls: [] }
+    expect(getDeepSeekReasoningMode(msg)).toBe('normal')
+  })
+})
--- a/src/services/providerRegistry/tests/switcher.test.ts
+++ b/src/services/providerRegistry/tests/switcher.test.ts
@@ -0,0 +1,129 @@
+import { describe, test, expect, beforeEach, afterEach, mock } from 'bun:test'
+import { logMock } from '../../../../tests/mocks/log.js'
+
+mock.module('src/utils/log.ts', logMock)
+mock.module('bun:bundle', () => ({ feature: () => false }))
+mock.module('src/utils/settings/settings.js', () => ({
+  getSettings_DEPRECATED: () => ({}),
+  updateSettingsForSource: () => {},
+}))
+
+beforeEach(() => {
+  // Clean OpenAI env vars before each test
+  delete process.env['CLAUDE_CODE_USE_OPENAI']
+  delete process.env['OPENAI_API_KEY']
+  delete process.env['OPENAI_BASE_URL']
+  delete process.env['ANTHROPIC_API_KEY']
+  delete process.env['CEREBRAS_API_KEY']
+  delete process.env['GROQ_API_KEY']
+  delete process.env['DASHSCOPE_API_KEY']
+  delete process.env['DEEPSEEK_API_KEY']
+})
+
+afterEach(() => {
+  delete process.env['CLAUDE_CODE_USE_OPENAI']
+  delete process.env['OPENAI_API_KEY']
+  delete process.env['OPENAI_BASE_URL']
+  delete process.env['ANTHROPIC_API_KEY']
+})
+
+describe('switchProvider', () => {
+  test('switching to cerebras returns correct env vars', async () => {
+    const { switchProvider } = await import('../switcher.js')
+    const { DEFAULT_PROVIDERS } = await import('../loader.js')
+    const result = switchProvider('cerebras', DEFAULT_PROVIDERS)
+    expect(result.env['CLAUDE_CODE_USE_OPENAI']).toBe('1')
+    expect(result.env['OPENAI_BASE_URL']).toBe('https://api.cerebras.ai/v1')
+    expect(result.env['OPENAI_MODEL']).toBe('llama-3.3-70b')
+    expect(result.provider.id).toBe('cerebras')
+  })
+
+  test('switching to groq returns correct env vars', async () => {
+    const { switchProvider } = await import('../switcher.js')
+    const { DEFAULT_PROVIDERS } = await import('../loader.js')
+    const result = switchProvider('groq', DEFAULT_PROVIDERS)
+    expect(result.env['OPENAI_BASE_URL']).toBe('https://api.groq.com/openai/v1')
+    expect(result.env['OPENAI_MODEL']).toBe('llama-3.3-70b-versatile')
+  })
+
+  test('switching to qwen returns correct env vars', async () => {
+    const { switchProvider } = await import('../switcher.js')
+    const { DEFAULT_PROVIDERS } = await import('../loader.js')
+    const result = switchProvider('qwen', DEFAULT_PROVIDERS)
+    expect(result.env['OPENAI_BASE_URL']).toBe(
+      'https://dashscope.aliyuncs.com/compatible-mode/v1',
+    )
+    expect(result.env['OPENAI_MODEL']).toBe('qwen-max')
+  })
+
+  test('switching to deepseek returns correct env vars', async () => {
+    const { switchProvider } = await import('../switcher.js')
+    const { DEFAULT_PROVIDERS } = await import('../loader.js')
+    const result = switchProvider('deepseek', DEFAULT_PROVIDERS)
+    expect(result.env['OPENAI_BASE_URL']).toBe('https://api.deepseek.com/v1')
+    expect(result.env['OPENAI_MODEL']).toBe('deepseek-chat')
+  })
+
+  test('throws for non-existent provider id', async () => {
+    const { switchProvider } = await import('../switcher.js')
+    const { DEFAULT_PROVIDERS } = await import('../loader.js')
+    expect(() => switchProvider('nonexistent', DEFAULT_PROVIDERS)).toThrow(
+      'provider "nonexistent" not found',
+    )
+  })
+
+  test('warns when provider API key env var is not set', async () => {
+    const { switchProvider } = await import('../switcher.js')
+    const { DEFAULT_PROVIDERS } = await import('../loader.js')
+    const result = switchProvider('cerebras', DEFAULT_PROVIDERS)
+    expect(result.warnings.length).toBeGreaterThan(0)
+    expect(result.warnings[0]).toContain('CEREBRAS_API_KEY')
+  })
+
+  test('no warning when provider API key env var is set', async () => {
+    process.env['GROQ_API_KEY'] = 'test-key'
+    const { switchProvider } = await import('../switcher.js')
+    const { DEFAULT_PROVIDERS } = await import('../loader.js')
+    const result = switchProvider('groq', DEFAULT_PROVIDERS)
+    expect(result.warnings).toHaveLength(0)
+    delete process.env['GROQ_API_KEY']
+  })
+
+  test('does not mutate process.env', async () => {
+    const { switchProvider } = await import('../switcher.js')
+    const { DEFAULT_PROVIDERS } = await import('../loader.js')
+    const before = process.env['OPENAI_BASE_URL']
+    switchProvider('cerebras', DEFAULT_PROVIDERS)
+    expect(process.env['OPENAI_BASE_URL']).toBe(before)
+  })
+})
+
+describe('buildShellExportBlock', () => {
+  test('produces correct shell export lines for cerebras', async () => {
+    const { switchProvider, buildShellExportBlock } = await import(
+      '../switcher.js'
+    )
+    const { DEFAULT_PROVIDERS } = await import('../loader.js')
+    const result = switchProvider('cerebras', DEFAULT_PROVIDERS)
+    const block = buildShellExportBlock(result)
+    expect(block).toContain('export CLAUDE_CODE_USE_OPENAI=1')
+    expect(block).toContain('export OPENAI_BASE_URL=https://api.cerebras.ai/v1')
+    expect(block).toContain('export OPENAI_API_KEY=$CEREBRAS_API_KEY')
+    expect(block).toContain('export OPENAI_MODEL=llama-3.3-70b')
+  })
+
+  test('api key line uses variable reference not literal value', async () => {
+    process.env['DEEPSEEK_API_KEY'] = 'sk-secret-key'
+    const { switchProvider, buildShellExportBlock } = await import(
+      '../switcher.js'
+    )
+    const { DEFAULT_PROVIDERS } = await import('../loader.js')
+    const result = switchProvider('deepseek', DEFAULT_PROVIDERS)
+    const block = buildShellExportBlock(result)
+    // Must NOT contain the literal key value
+    expect(block).not.toContain('sk-secret-key')
+    // Must use variable reference
+    expect(block).toContain('$DEEPSEEK_API_KEY')
+    delete process.env['DEEPSEEK_API_KEY']
+  })
+})
--- a/src/services/providerRegistry/loader.ts
+++ b/src/services/providerRegistry/loader.ts
@@ -0,0 +1,246 @@
+import { existsSync, readFileSync, renameSync, writeFileSync } from 'fs'
+import { join } from 'path'
+import { randomBytes } from 'node:crypto'
+import { tmpdir } from 'node:os'
+import { logError } from '../../utils/log.js'
+import { getClaudeConfigHomeDir } from '../../utils/envUtils.js'
+import { ProvidersFileSchema, type ProviderConfig } from './types.js'
+
+/**
+ * The four built-in OpenAI-compat providers.
+ *
+ * These are used when providers.json is absent or contains no entries.
+ * User-defined providers in ~/.claude/providers.json are merged on top
+ * (they replace a built-in with the same id).
+ */
+export const DEFAULT_PROVIDERS: ProviderConfig[] = [
+  {
+    id: 'cerebras',
+    kind: 'openai-compat',
+    baseUrl: 'https://api.cerebras.ai/v1',
+    apiKeyEnv: 'CEREBRAS_API_KEY',
+    defaultModel: 'llama-3.3-70b',
+    compatRule: 'cerebras',
+  },
+  {
+    id: 'groq',
+    kind: 'openai-compat',
+    baseUrl: 'https://api.groq.com/openai/v1',
+    apiKeyEnv: 'GROQ_API_KEY',
+    defaultModel: 'llama-3.3-70b-versatile',
+    compatRule: 'groq',
+  },
+  {
+    id: 'qwen',
+    kind: 'openai-compat',
+    baseUrl: 'https://dashscope.aliyuncs.com/compatible-mode/v1',
+    apiKeyEnv: 'DASHSCOPE_API_KEY',
+    defaultModel: 'qwen-max',
+    compatRule: 'strict-openai',
+  },
+  {
+    id: 'deepseek',
+    kind: 'openai-compat',
+    baseUrl: 'https://api.deepseek.com/v1',
+    apiKeyEnv: 'DEEPSEEK_API_KEY',
+    defaultModel: 'deepseek-chat',
+    compatRule: 'deepseek',
+  },
+]
+
+/**
+ * Returns the path to the providers.json file in the Claude config directory.
+ */
+export function getProvidersFilePath(): string {
+  return join(getClaudeConfigHomeDir(), 'providers.json')
+}
+
+// ── J1: per-process memoization with stale-on-invalidate ─────────────────────
+
+let _cachedProviders: ProviderConfig[] | null = null
+
+/** Invalidate the in-process provider cache (called after saveProviders). */
+export function _invalidateProviderCache(): void {
+  _cachedProviders = null
+}
+
+/**
+ * Load provider configurations.
+ *
+ * Strategy:
+ * 1. Start with DEFAULT_PROVIDERS.
+ * 2. If ~/.claude/providers.json exists, parse and validate it with Zod.
+ *    - Valid entries replace defaults with matching id; new ids are appended.
+ *    - Corrupt/invalid file: log warning, return defaults only.
+ * 3. Empty providers.json: return defaults.
+ *
+ * A1 fix: returns load diagnostics so callers (ProviderView) can surface errors.
+ * J1 fix: memoized per-process; invalidated after saveProviders().
+ *
+ * This function never throws — corrupt files produce a warning + fallback.
+ */
+export function loadProviders(): ProviderConfig[] {
+  // J1: return cached result if available (prevents repeated disk reads on findProvider)
+  if (_cachedProviders !== null) return _cachedProviders
+
+  const result = _loadProvidersInternal()
+  _cachedProviders = result.providers
+  return result.providers
+}
+
+/**
+ * Load providers with diagnostic information.
+ * Returns { providers, error? } — callers can surface the error to the UI.
+ * A1 fix: exposes parse errors to UI layer instead of only logError.
+ */
+export function loadProvidersWithDiagnostic(): {
+  providers: ProviderConfig[]
+  error?: string
+} {
+  const result = _loadProvidersInternal()
+  _cachedProviders = result.providers
+  return result
+}
+
+function _loadProvidersInternal(): {
+  providers: ProviderConfig[]
+  error?: string
+} {
+  const filePath = getProvidersFilePath()
+
+  if (!existsSync(filePath)) {
+    return { providers: [...DEFAULT_PROVIDERS] }
+  }
+
+  let raw: string
+  try {
+    raw = readFileSync(filePath, 'utf-8')
+  } catch (err: unknown) {
+    const msg = `loadProviders: failed to read ${filePath}: ${err instanceof Error ? err.message : String(err)}`
+    logError(new Error(msg))
+    return { providers: [...DEFAULT_PROVIDERS], error: msg }
+  }
+
+  // Empty file → return defaults
+  if (!raw.trim()) {
+    return { providers: [...DEFAULT_PROVIDERS] }
+  }
+
+  let parsed: unknown
+  try {
+    parsed = JSON.parse(raw)
+  } catch {
+    const msg = `loadProviders: ${filePath} is not valid JSON. Using default providers.`
+    logError(new Error(msg))
+    return { providers: [...DEFAULT_PROVIDERS], error: msg }
+  }
+
+  const result = ProvidersFileSchema.safeParse(parsed)
+  if (!result.success) {
+    const msg = `loadProviders: ${filePath} failed schema validation: ${result.error.message}. Using default providers.`
+    logError(new Error(msg))
+    return { providers: [...DEFAULT_PROVIDERS], error: msg }
+  }
+
+  if (result.data.length === 0) {
+    return { providers: [...DEFAULT_PROVIDERS] }
+  }
+
+  // Merge: user entries override defaults with same id; new ids are appended.
+  const merged = new Map<string, ProviderConfig>()
+  for (const p of DEFAULT_PROVIDERS) {
+    merged.set(p.id, p)
+  }
+  for (const p of result.data) {
+    merged.set(p.id, p)
+  }
+
+  return { providers: Array.from(merged.values()) }
+}
+
+/**
+ * Find a provider by id in the loaded list. Returns undefined if not found.
+ */
+export function findProvider(
+  id: string,
+  providers?: ProviderConfig[],
+): ProviderConfig | undefined {
+  return (providers ?? loadProviders()).find(p => p.id === id)
+}
+
+/**
+ * Deep-equal comparison for ProviderConfig objects, key-order independent.
+ * E4 fix: replaces JSON.stringify comparison which is key-order sensitive.
+ */
+function providerConfigEqual(a: ProviderConfig, b: ProviderConfig): boolean {
+  const keysA = Object.keys(a).sort()
+  const keysB = Object.keys(b).sort()
+  if (keysA.length !== keysB.length) return false
+  for (const k of keysA) {
+    if (a[k as keyof ProviderConfig] !== b[k as keyof ProviderConfig])
+      return false
+  }
+  return true
+}
+
+/**
+ * Write additional providers to ~/.claude/providers.json.
+ *
+ * Only writes providers that are NOT already in DEFAULT_PROVIDERS (or the
+ * existing file). If a provider with the same id exists, it is replaced.
+ *
+ * C3 fix: uses atomic tmp+rename write.
+ * E4 fix: uses key-order-independent deep equal for default comparison.
+ * J1 fix: invalidates cache after write.
+ *
+ * Returns the final merged list that was written.
+ */
+export function saveProviders(providers: ProviderConfig[]): ProviderConfig[] {
+  const filePath = getProvidersFilePath()
+
+  // Build merged list (providers override defaults by id)
+  const merged = new Map<string, ProviderConfig>()
+  for (const p of DEFAULT_PROVIDERS) {
+    merged.set(p.id, p)
+  }
+  for (const p of providers) {
+    merged.set(p.id, p)
+  }
+
+  // Only persist non-default providers (defaults are always built in)
+  const toWrite: ProviderConfig[] = []
+  for (const [id, p] of merged) {
+    const isDefault = DEFAULT_PROVIDERS.some(d => d.id === id)
+    if (!isDefault) {
+      toWrite.push(p)
+    } else {
+      // E4: If user overrode a default, persist the override (key-order-independent compare)
+      const defaultEntry = DEFAULT_PROVIDERS.find(d => d.id === id)
+      if (defaultEntry && !providerConfigEqual(defaultEntry, p)) {
+        toWrite.push(p)
+      }
+    }
+  }
+
+  // C3: atomic write — tmp file + rename prevents lost-update on concurrent save
+  const tmpPath = join(
+    tmpdir(),
+    `.providers-${randomBytes(8).toString('hex')}.tmp`,
+  )
+  try {
+    writeFileSync(tmpPath, JSON.stringify(toWrite, null, 2), 'utf-8')
+    renameSync(tmpPath, filePath)
+  } catch (err) {
+    try {
+      renameSync(tmpPath, tmpPath + '.cleanup')
+    } catch {
+      /* ignore */
+    }
+    throw err
+  }
+
+  // J1: invalidate cache so next loadProviders() reads fresh data
+  _invalidateProviderCache()
+
+  return Array.from(merged.values())
+}
--- a/src/services/providerRegistry/providerCompatMatrix.ts
+++ b/src/services/providerRegistry/providerCompatMatrix.ts
@@ -0,0 +1,179 @@
+import type { CompatRule } from './types.js'
+
+/**
+ * Per-provider OpenAI-compat field whitelist.
+ *
+ * Each profile describes what an endpoint actually accepts so we can strip
+ * fields that would cause a strict endpoint to reject the request.
+ */
+export interface CompatProfile {
+  /**
+   * Whether the server accepts stream_options.include_usage in chat completions.
+   * Strict endpoints (Cerebras, Qwen) reject unknown top-level keys.
+   */
+  supportsStreamUsageOption: boolean
+
+  /**
+   * Whether the server accepts a custom 'thinking' field in messages.
+   * Only permissive or DeepSeek-thinking endpoints accept this.
+   */
+  supportsThinkingField: boolean
+
+  /**
+   * How to handle reasoning_content in roundtrips.
+   *
+   * DeepSeek has three modes:
+   *   - thinking-only:    model returns reasoning_content, no tools
+   *   - thinking+tools:   model returns both reasoning_content and tool calls
+   *   - normal:           model returns neither
+   *
+   * 'always-preserve':      echo back (DeepSeek thinking+tools roundtrip)
+   * 'drop-on-non-thinking': remove unless current model is thinking variant
+   * 'strip':                remove always (safe default for strict endpoints)
+   */
+  reasoningContentEcho: 'always-preserve' | 'drop-on-non-thinking' | 'strip'
+
+  /**
+   * Tool call schema flavor supported by the endpoint.
+   * 'openai-v2' = standard OpenAI function-calling schema
+   */
+  toolCallFormat: 'openai-v2'
+}
+
+export const COMPAT_PROFILES: Record<CompatRule, CompatProfile> = {
+  cerebras: {
+    supportsStreamUsageOption: false,
+    supportsThinkingField: false,
+    reasoningContentEcho: 'strip',
+    toolCallFormat: 'openai-v2',
+  },
+  groq: {
+    supportsStreamUsageOption: false,
+    supportsThinkingField: false,
+    reasoningContentEcho: 'strip',
+    toolCallFormat: 'openai-v2',
+  },
+  deepseek: {
+    // DeepSeek-reasoner supports reasoning_content and the thinking field.
+    // For normal deepseek-chat, thinking field is ignored rather than rejected.
+    supportsStreamUsageOption: true,
+    supportsThinkingField: true,
+    reasoningContentEcho: 'always-preserve',
+    toolCallFormat: 'openai-v2',
+  },
+  'strict-openai': {
+    supportsStreamUsageOption: false,
+    supportsThinkingField: false,
+    reasoningContentEcho: 'strip',
+    toolCallFormat: 'openai-v2',
+  },
+  permissive: {
+    supportsStreamUsageOption: true,
+    supportsThinkingField: true,
+    reasoningContentEcho: 'drop-on-non-thinking',
+    toolCallFormat: 'openai-v2',
+  },
+}
+
+/**
+ * Determine the DeepSeek reasoning mode based on presence of reasoning_content
+ * and tool_calls in the assistant message.
+ *
+ * DeepSeek thinking-only:    has reasoning_content, no tool_calls
+ * DeepSeek thinking+tools:   has reasoning_content AND tool_calls
+ * DeepSeek normal:           no reasoning_content
+ */
+export function getDeepSeekReasoningMode(
+  assistantMessage: Record<string, unknown>,
+): 'thinking-only' | 'thinking+tools' | 'normal' {
+  const hasReasoning = Boolean(assistantMessage['reasoning_content'])
+  const toolCalls = assistantMessage['tool_calls']
+  const hasTools = Array.isArray(toolCalls) && toolCalls.length > 0
+
+  if (hasReasoning && hasTools) return 'thinking+tools'
+  if (hasReasoning) return 'thinking-only'
+  return 'normal'
+}
+
+/**
+ * Apply a compat rule to an outgoing request body, dropping fields the
+ * target endpoint won't accept. Returns a new object (immutable).
+ *
+ * This is a pure function: it does not mutate the input body.
+ */
+export function applyCompatRule(
+  body: Record<string, unknown>,
+  rule: CompatRule,
+): Record<string, unknown> {
+  const profile = COMPAT_PROFILES[rule]
+  const result: Record<string, unknown> = { ...body }
+
+  // Strip stream_options.include_usage if endpoint doesn't support it
+  if (!profile.supportsStreamUsageOption) {
+    const streamOptions = result['stream_options']
+    if (
+      streamOptions !== null &&
+      typeof streamOptions === 'object' &&
+      !Array.isArray(streamOptions)
+    ) {
+      const { include_usage: _dropped, ...rest } = streamOptions as Record<
+        string,
+        unknown
+      >
+      if (Object.keys(rest).length === 0) {
+        delete result['stream_options']
+      } else {
+        result['stream_options'] = rest
+      }
+    }
+  }
+
+  // Strip 'thinking' field from messages if endpoint doesn't support it
+  if (!profile.supportsThinkingField && Array.isArray(result['messages'])) {
+    result['messages'] = (result['messages'] as Record<string, unknown>[]).map(
+      msg => {
+        if ('thinking' in msg) {
+          const { thinking: _dropped, ...rest } = msg
+          return rest
+        }
+        return msg
+      },
+    )
+  }
+
+  // Handle reasoning_content echo policy
+  if (
+    profile.reasoningContentEcho === 'strip' &&
+    Array.isArray(result['messages'])
+  ) {
+    result['messages'] = (result['messages'] as Record<string, unknown>[]).map(
+      msg => {
+        if ('reasoning_content' in msg) {
+          const { reasoning_content: _dropped, ...rest } = msg
+          return rest
+        }
+        return msg
+      },
+    )
+  }
+
+  // For 'drop-on-non-thinking': strip reasoning_content unless model name
+  // indicates a thinking variant (contains 'reason' or 'think' in model string)
+  if (profile.reasoningContentEcho === 'drop-on-non-thinking') {
+    const model = typeof result['model'] === 'string' ? result['model'] : ''
+    const isThinkingModel = /reason|think/i.test(model)
+    if (!isThinkingModel && Array.isArray(result['messages'])) {
+      result['messages'] = (
+        result['messages'] as Record<string, unknown>[]
+      ).map(msg => {
+        if ('reasoning_content' in msg) {
+          const { reasoning_content: _dropped, ...rest } = msg
+          return rest
+        }
+        return msg
+      })
+    }
+  }
+
+  return result
+}
--- a/src/services/providerRegistry/switcher.ts
+++ b/src/services/providerRegistry/switcher.ts
@@ -0,0 +1,111 @@
+import { findProvider, loadProviders } from './loader.js'
+import type { ProviderConfig } from './types.js'
+
+export interface SwitchProviderResult {
+  /**
+   * Environment variables to set before the next session.
+   * This is informational — the caller must NOT mutate process.env.
+   * The user copies these into their shell profile.
+   */
+  env: Record<string, string>
+
+  /**
+   * Human-readable warnings (e.g. missing API key in current env).
+   * Non-fatal: the user can still configure the provider.
+   */
+  warnings: string[]
+
+  /**
+   * The resolved provider config used for this switch.
+   */
+  provider: ProviderConfig
+}
+
+/**
+ * Compute the environment variables needed to activate an OpenAI-compat provider.
+ *
+ * Design constraints (from plan):
+ * - Pure functional: does NOT mutate process.env
+ * - Calls assertNoAnthropicEnvForOpenAI() at the top to warn on credential
+ *   confusion (ANTHROPIC_API_KEY + OPENAI-compat mode both set)
+ * - Returns shell export commands the user can paste into their profile
+ * - Restart required for the env vars to take effect (OpenAI client is cached)
+ *
+ * @param id - Provider id (e.g. 'cerebras', 'groq', 'deepseek', 'qwen')
+ * @param providers - Optional pre-loaded list (defaults to loadProviders())
+ * @throws {Error} if provider id is not found
+ */
+export function switchProvider(
+  id: string,
+  providers?: ProviderConfig[],
+): SwitchProviderResult {
+  const list = providers ?? loadProviders()
+  const found = findProvider(id, list)
+
+  if (!found) {
+    const ids = list.map(p => p.id).join(', ')
+    throw new Error(
+      `switchProvider: provider "${id}" not found. Available: ${ids}`,
+    )
+  }
+
+  const env: Record<string, string> = {
+    CLAUDE_CODE_USE_OPENAI: '1',
+    OPENAI_BASE_URL: found.baseUrl,
+    OPENAI_MODEL: found.defaultModel,
+    // The value is the env var name that holds the key, not the key itself.
+    // Shell snippet: export OPENAI_API_KEY=$CEREBRAS_API_KEY
+    // We return the recommended export, but the actual value depends on user env.
+  }
+
+  // Include the api key env var name so callers can construct the shell snippet.
+  // We do NOT read process.env[found.apiKeyEnv] to avoid leaking the key.
+  const warnings: string[] = []
+
+  // G3: include ANTHROPIC_API_KEY conflict warning in result.warnings (not just logError)
+  // so that the Ink view (/providers use) can render it to the user rather than losing it
+  // in a side-channel stderr log.
+  const hasOpenAIMode =
+    process.env['CLAUDE_CODE_USE_OPENAI'] === '1' ||
+    Boolean(process.env['OPENAI_API_KEY'])
+  const hasAnthropicKey = Boolean(process.env['ANTHROPIC_API_KEY'])
+  if (hasOpenAIMode && hasAnthropicKey) {
+    warnings.push(
+      'Both ANTHROPIC_API_KEY and OpenAI-compat mode are set. ' +
+        'ANTHROPIC_API_KEY is for Anthropic workspace endpoints (/v1/agents, /v1/vaults). ' +
+        'OpenAI-compat mode routes /v1/messages to a third-party provider. ' +
+        'These are separate planes — verify this is intentional.',
+    )
+  }
+
+  if (!process.env[found.apiKeyEnv]) {
+    warnings.push(
+      `${found.apiKeyEnv} is not set in the current environment. ` +
+        `Set it before starting Claude Code: export ${found.apiKeyEnv}=<your-api-key>`,
+    )
+  }
+
+  return { env, warnings, provider: found }
+}
+
+/**
+ * Build the shell export block to display to the user.
+ *
+ * Example output:
+ *   export CLAUDE_CODE_USE_OPENAI=1
+ *   export OPENAI_BASE_URL=https://api.cerebras.ai/v1
+ *   export OPENAI_API_KEY=$CEREBRAS_API_KEY
+ *   export OPENAI_MODEL=llama-3.3-70b
+ *
+ * The API key line uses a variable reference so the actual key is never echoed.
+ */
+export function buildShellExportBlock(result: SwitchProviderResult): string {
+  const { env, provider } = result
+  const lines: string[] = [
+    `export CLAUDE_CODE_USE_OPENAI=${env['CLAUDE_CODE_USE_OPENAI'] ?? '1'}`,
+    `export OPENAI_BASE_URL=${env['OPENAI_BASE_URL'] ?? provider.baseUrl}`,
+    `export OPENAI_API_KEY=$${provider.apiKeyEnv}`,
+    `export OPENAI_MODEL=${env['OPENAI_MODEL'] ?? provider.defaultModel}`,
+  ]
+  return lines.join('\n')
+}
--- a/src/services/providerRegistry/types.ts
+++ b/src/services/providerRegistry/types.ts
@@ -0,0 +1,51 @@
+import { z } from 'zod'
+
+/**
+ * Compat rule identifiers. Each maps to a CompatProfile in providerCompatMatrix.ts.
+ */
+export const CompatRuleSchema = z.enum([
+  'cerebras',
+  'groq',
+  'deepseek',
+  'strict-openai',
+  'permissive',
+])
+
+export type CompatRule = z.infer<typeof CompatRuleSchema>
+
+/**
+ * The only supported provider kind for PR-2. Future PR-3+ may add 'oauth', 'bedrock-compat', etc.
+ */
+export const ProviderKindSchema = z.literal('openai-compat')
+export type ProviderKind = z.infer<typeof ProviderKindSchema>
+
+/**
+ * Zod schema for a single provider configuration entry.
+ *
+ * Rules:
+ * - id: kebab-case identifier used in /provider use <id>
+ * - kind: only 'openai-compat' in PR-2
+ * - baseUrl: full base URL including /v1 suffix if needed
+ * - apiKeyEnv: name of the env var that holds the API key
+ * - defaultModel: model string passed as OPENAI_MODEL
+ * - compatRule: selects CompatProfile from providerCompatMatrix
+ */
+export const ProviderConfigSchema = z.object({
+  id: z
+    .string()
+    .min(1)
+    .regex(/^[a-z0-9-]+$/, 'id must be kebab-case'),
+  kind: ProviderKindSchema,
+  baseUrl: z.string().url(),
+  apiKeyEnv: z.string().min(1),
+  defaultModel: z.string().min(1),
+  compatRule: CompatRuleSchema,
+})
+
+export type ProviderConfig = z.infer<typeof ProviderConfigSchema>
+
+/**
+ * Schema for the entire ~/.claude/providers.json file.
+ * Top-level must be an array of ProviderConfig.
+ */
+export const ProvidersFileSchema = z.array(ProviderConfigSchema)