feat: 添加 Provider Registry、StatusLine、Cache Stats 和其他增强

- providerRegistry: OpenAI 兼容 provider 切换(Cerebras/Groq/DeepSeek/Qwen)
- StatusLine: 增强状态栏(缓存命中率、TTL 倒计时、自定义 shell 命令)
- cacheStats: 缓存命中率和 token 签名追踪
- ultrareviewPreflight: 代码审查预检服务
- SkillsMenu/filterSkills: 技能菜单过滤增强
- MagicDocs/langfuse prompts: 提示词更新
- claude.ts: API 客户端更新

Co-Authored-By: glm-5-turbo <zai-org@claude-code-best.win>
This commit is contained in:
claude-code-best
2026-05-09 23:04:35 +08:00
parent fdddb6dbe8
commit efaf4afd9c
28 changed files with 3613 additions and 219 deletions

View File

@@ -0,0 +1,226 @@
/**
* Regression tests for fetchUltrareviewPreflight.
* Verifies all three action enum states (proceed/confirm/blocked),
* network/HTTP error handling, and Zod schema mismatch fallback.
*/
import { afterAll, beforeAll, describe, expect, mock, test } from 'bun:test'
import { debugMock } from '../../../../tests/mocks/debug.js'
import { logMock } from '../../../../tests/mocks/log.js'
import { setupAxiosMock } from '../../../../tests/mocks/axios.js'
// Mock dependency chain before any subject import
mock.module('src/utils/debug.ts', debugMock)
mock.module('src/utils/log.ts', logMock)
mock.module('src/services/analytics/index.js', () => ({
logEvent: () => {},
}))
// Mock auth utilities
mock.module('src/utils/auth.js', () => ({
isClaudeAISubscriber: () => true,
isTeamSubscriber: () => false,
isEnterpriseSubscriber: () => false,
}))
// Mock OAuth config
mock.module('src/constants/oauth.js', () => ({
getOauthConfig: () => ({ BASE_API_URL: 'https://api.anthropic.com' }),
}))
// Mock prepareApiRequest and getOAuthHeaders
mock.module('src/utils/teleport/api.js', () => ({
prepareApiRequest: async () => ({
accessToken: 'test-token',
orgUUID: 'org-uuid-test',
}),
getOAuthHeaders: (token: string) => ({
Authorization: `Bearer ${token}`,
'Content-Type': 'application/json',
'anthropic-version': '2023-06-01',
}),
}))
// We'll mock axios at module level.
// Typed as any in test code (CLAUDE.md: mock data may use as any).
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const mockAxiosPost = mock(async (..._args: any[]): Promise<any> => {
throw new Error('not configured')
})
const axiosHandle = setupAxiosMock()
axiosHandle.stubs.post = mockAxiosPost
axiosHandle.stubs.isAxiosError = (e: unknown) =>
typeof e === 'object' &&
e !== null &&
(e as { isAxiosError?: boolean }).isAxiosError === true
beforeAll(() => {
axiosHandle.useStubs = true
})
afterAll(() => {
axiosHandle.useStubs = false
})
import {
fetchUltrareviewPreflight,
type UltrareviewPreflightResponse,
} from '../ultrareviewPreflight.js'
describe('fetchUltrareviewPreflight', () => {
test('returns proceed action when server responds with proceed', async () => {
const serverResponse: UltrareviewPreflightResponse = {
action: 'proceed',
billing_note: null,
}
mockAxiosPost.mockImplementationOnce(async () => ({
status: 200,
data: serverResponse,
}))
const result = await fetchUltrareviewPreflight({ repo: 'owner/repo' })
expect(result).not.toBeNull()
expect(result?.action).toBe('proceed')
expect(result?.billing_note).toBeNull()
})
test('returns confirm action with billing_note when server responds with confirm', async () => {
const serverResponse: UltrareviewPreflightResponse = {
action: 'confirm',
billing_note: 'This run will cost approximately $2.50.',
}
mockAxiosPost.mockImplementationOnce(async () => ({
status: 200,
data: serverResponse,
}))
const result = await fetchUltrareviewPreflight({ repo: 'owner/repo' })
expect(result).not.toBeNull()
expect(result?.action).toBe('confirm')
expect(result?.billing_note).toBe('This run will cost approximately $2.50.')
})
test('returns blocked action when server responds with blocked', async () => {
const serverResponse: UltrareviewPreflightResponse = {
action: 'blocked',
billing_note: null,
}
mockAxiosPost.mockImplementationOnce(async () => ({
status: 200,
data: serverResponse,
}))
const result = await fetchUltrareviewPreflight({ repo: 'owner/repo' })
expect(result).not.toBeNull()
expect(result?.action).toBe('blocked')
})
test('returns null on schema mismatch (invalid action value)', async () => {
mockAxiosPost.mockImplementationOnce(async () => ({
status: 200,
data: { action: 'unknown_action', billing_note: null },
}))
const result = await fetchUltrareviewPreflight({ repo: 'owner/repo' })
expect(result).toBeNull()
})
test('returns null on network error (no response)', async () => {
const networkError = new Error('ECONNREFUSED')
;(networkError as unknown as { isAxiosError: boolean }).isAxiosError = true
mockAxiosPost.mockImplementationOnce(async () => {
throw networkError
})
const result = await fetchUltrareviewPreflight({ repo: 'owner/repo' })
expect(result).toBeNull()
})
test('returns null on 401 Unauthorized', async () => {
const authError = new Error('Unauthorized')
;(
authError as unknown as {
isAxiosError: boolean
response: { status: number }
}
).isAxiosError = true
;(authError as unknown as { response: { status: number } }).response = {
status: 401,
}
mockAxiosPost.mockImplementationOnce(async () => {
throw authError
})
const result = await fetchUltrareviewPreflight({ repo: 'owner/repo' })
expect(result).toBeNull()
})
test('returns null on 403 Forbidden', async () => {
const forbiddenError = new Error('Forbidden')
;(
forbiddenError as unknown as {
isAxiosError: boolean
response: { status: number }
}
).isAxiosError = true
;(forbiddenError as unknown as { response: { status: number } }).response =
{ status: 403 }
mockAxiosPost.mockImplementationOnce(async () => {
throw forbiddenError
})
const result = await fetchUltrareviewPreflight({ repo: 'owner/repo' })
expect(result).toBeNull()
})
test('returns null on 5xx server error', async () => {
const serverError = new Error('Internal Server Error')
;(
serverError as unknown as {
isAxiosError: boolean
response: { status: number }
}
).isAxiosError = true
;(serverError as unknown as { response: { status: number } }).response = {
status: 500,
}
mockAxiosPost.mockImplementationOnce(async () => {
throw serverError
})
const result = await fetchUltrareviewPreflight({ repo: 'owner/repo' })
expect(result).toBeNull()
})
test('passes pr_number to request body when provided', async () => {
mockAxiosPost.mockImplementationOnce(
async (_url: unknown, body: unknown) => {
const b = body as { pr_number: number }
expect(b.pr_number).toBe(42)
return { status: 200, data: { action: 'proceed', billing_note: null } }
},
)
const result = await fetchUltrareviewPreflight({
repo: 'owner/repo',
pr_number: 42,
})
expect(result?.action).toBe('proceed')
})
test('passes confirm flag to request body when provided', async () => {
mockAxiosPost.mockImplementationOnce(
async (_url: unknown, body: unknown) => {
const b = body as { confirm: boolean }
expect(b.confirm).toBe(true)
return { status: 200, data: { action: 'proceed', billing_note: null } }
},
)
const result = await fetchUltrareviewPreflight({
repo: 'owner/repo',
confirm: true,
})
expect(result?.action).toBe('proceed')
})
})

View File

@@ -93,7 +93,10 @@ import {
asSystemPrompt,
type SystemPrompt,
} from '../../utils/systemPromptType.js'
import { cloneDeep } from 'lodash-es'
import {
getBreakCacheMarkerPath,
getBreakCacheAlwaysPath,
} from '../../commands/break-cache/index.js'
import { tokenCountFromLastAPIResponse } from '../../utils/tokens.js'
import { getDynamicConfig_BLOCKS_ON_INIT } from '../analytics/growthbook.js'
import {
@@ -121,6 +124,7 @@ import {
getAfkModeHeaderLatched,
getCacheEditingHeaderLatched,
getFastModeHeaderLatched,
getLastApiCompletionTimestamp,
getPromptCache1hAllowlist,
getPromptCache1hEligible,
getSessionId,
@@ -250,6 +254,7 @@ import {
type NonNullableUsage,
} from './logging.js'
import {
CACHE_TTL_1HOUR_MS,
checkResponseForCacheBreak,
recordPromptState,
} from './promptCacheBreakDetection.js'
@@ -507,30 +512,10 @@ export function getAPIMetadata() {
}
}
const deviceId = getOrCreateUserID()
// Third-party API providers (DeepSeek, etc.) validate user_id against
// ^[a-zA-Z0-9_-]+$ which rejects JSON strings containing {, ", :, etc.
// When using a non-Anthropic base URL, send only the device_id (hex string).
const baseUrl = process.env.ANTHROPIC_BASE_URL
const isThirdParty =
baseUrl &&
(() => {
try {
return new URL(baseUrl).host !== 'api.anthropic.com'
} catch {
return false
}
})()
if (isThirdParty) {
return { user_id: deviceId }
}
return {
user_id: jsonStringify({
...extra,
device_id: deviceId,
device_id: getOrCreateUserID(),
// Only include OAuth account UUID when actively using OAuth authentication
account_uuid: getOauthAccountInfo()?.accountUuid ?? '',
session_id: getSessionId(),
@@ -1441,12 +1426,39 @@ async function* queryModel(
].filter(Boolean),
)
// ── Break-cache integration ──
// If a one-time break-cache marker exists, or always-mode is on, append a
// unique ephemeral nonce comment to the system prompt so the prefix-cache
// hash changes for this request, forcing a cache miss.
{
const { existsSync, unlinkSync } = await import('node:fs')
const { randomUUID } = await import('node:crypto')
const onceMarker = getBreakCacheMarkerPath()
const alwaysFlag = getBreakCacheAlwaysPath()
const shouldBreak = existsSync(onceMarker) || existsSync(alwaysFlag)
if (shouldBreak) {
const nonce = randomUUID()
systemPrompt = asSystemPrompt([
...systemPrompt,
`<!-- cache-break nonce: ${nonce} -->`,
])
// Only delete the once marker; the always flag persists until /break-cache off
if (existsSync(onceMarker)) {
try {
unlinkSync(onceMarker)
} catch {
/* best-effort */
}
}
}
}
// Prepend system prompt block for easy API identification
logAPIPrefix(systemPrompt)
const enablePromptCaching =
options.enablePromptCaching ?? getPromptCachingEnabled(options.model)
let system = buildSystemPromptBlocks(systemPrompt, enablePromptCaching, {
const system = buildSystemPromptBlocks(systemPrompt, enablePromptCaching, {
skipGlobalCacheForSystemPrompt: needsToolBasedCacheMarker,
querySource: options.querySource,
})
@@ -1466,7 +1478,7 @@ async function* queryModel(
model: advisorModel,
} as unknown as BetaToolUnion)
}
let allTools = [...toolSchemas, ...extraToolSchemas]
const allTools = [...toolSchemas, ...extraToolSchemas]
const isFastMode =
isFastModeEnabled() &&
@@ -1590,39 +1602,6 @@ async function* queryModel(
const consumedCacheEdits = cachedMCEnabled ? consumePendingCacheEdits() : null
const consumedPinnedEdits = cachedMCEnabled ? getPinnedCacheEdits() : []
// ---------------------------------------------------------------------------
// Serialization boundary: deep-clone heavy data so the closure below captures
// independent copies, not references to the originals. After this point the
// original variables (messagesForAPI, system, allTools) are nulled out so
// they can be GC'd even while the generator/closure is still alive (during
// long streaming responses or retry backoff).
// ---------------------------------------------------------------------------
const frozenMessages = addCacheBreakpoints(
messagesForAPI,
enablePromptCaching,
options.querySource,
cachedMCEnabled &&
getAPIProvider() === 'firstParty' &&
options.querySource === 'repl_main_thread',
consumedCacheEdits as any,
consumedPinnedEdits as any,
options.skipCacheWrite,
)
const frozenSystem = cloneDeep(system)
const frozenTools = cloneDeep(allTools)
// Pre-compute scalars that post-streaming code needs, so messagesForAPI
// can be released before streaming starts.
const preMessagesCount = messagesForAPI.length
const preMessagesTokenCount = tokenCountFromLastAPIResponse(messagesForAPI)
// Release originals for GC — the frozen* copies and pre-computed scalars
// are now the only references to this data inside the closure.
// After null-out, all downstream code uses frozen* or pre-computed scalars.
messagesForAPI = null!
system = null!
allTools = null!
// Capture the betas sent in the last API request, including the ones that
// were dynamically added, so we can log and send it to telemetry.
let lastRequestBetas: string[] | undefined
@@ -1725,6 +1704,9 @@ async function* queryModel(
clearAllThinking: false,
})
const enablePromptCaching =
options.enablePromptCaching ?? getPromptCachingEnabled(retryContext.model)
// Fast mode: header is latched session-stable (cache-safe), but
// `speed='fast'` stays dynamic so cooldown still suppresses the actual
// fast-mode request without changing the cache key.
@@ -1755,10 +1737,13 @@ async function* queryModel(
}
}
// Cache editing beta: header is latched session-stable.
// The useCachedMC gate (cache_edits body behavior) is baked into
// frozenMessages at the serialization boundary above, so this block
// only controls the beta header.
// Cache editing beta: header is latched session-stable; useCachedMC
// (controls cache_edits body behavior) stays live so edits stop when
// the feature disables but the header doesn't flip.
const useCachedMC =
cachedMCEnabled &&
getAPIProvider() === 'firstParty' &&
options.querySource === 'repl_main_thread'
if (
cacheEditingHeaderLatched &&
cacheEditingBetaHeader &&
@@ -1787,9 +1772,17 @@ async function* queryModel(
return {
model: normalizeModelStringForAPI(options.model),
messages: frozenMessages,
system: frozenSystem,
tools: frozenTools,
messages: addCacheBreakpoints(
messagesForAPI,
enablePromptCaching,
options.querySource,
useCachedMC,
consumedCacheEdits as any,
consumedPinnedEdits as any,
options.skipCacheWrite,
),
system,
tools: allTools,
tool_choice: options.toolChoice,
...(useBetas && { betas: filteredBetas }),
metadata: getAPIMetadata(),
@@ -1849,9 +1842,6 @@ async function* queryModel(
let ttftMs = 0
let partialMessage: BetaMessage | undefined
const contentBlocks: (BetaContentBlock | ConnectorTextBlock)[] = []
// Accumulate streaming deltas in arrays to avoid O(n²) string concatenation.
// Joined and assigned to contentBlock fields at content_block_stop.
const streamingDeltas = new Map<number, string[]>()
let usage: NonNullableUsage = EMPTY_USAGE
let costUSD = 0
let stopReason: BetaStopReason | null = null
@@ -2138,8 +2128,6 @@ async function* queryModel(
}
break
}
// Initialize delta accumulator for this content block
streamingDeltas.set(part.index, [])
break
case 'content_block_delta': {
const contentBlock = contentBlocks[part.index]
@@ -2169,9 +2157,8 @@ async function* queryModel(
})
throw new Error('Content block is not a connector_text block')
}
streamingDeltas
.get(part.index)
?.push(delta.connector_text as string)
;(contentBlock as { connector_text: string }).connector_text +=
delta.connector_text
} else {
switch (delta.type) {
case 'citations_delta':
@@ -2201,9 +2188,7 @@ async function* queryModel(
})
throw new Error('Content block input is not a string')
}
streamingDeltas
.get(part.index)
?.push(delta.partial_json as string)
contentBlock.input += delta.partial_json
break
case 'text_delta':
if (contentBlock.type !== 'text') {
@@ -2217,7 +2202,7 @@ async function* queryModel(
})
throw new Error('Content block is not a text block')
}
streamingDeltas.get(part.index)?.push(delta.text!)
;(contentBlock as { text: string }).text += delta.text
break
case 'signature_delta':
if (
@@ -2252,7 +2237,8 @@ async function* queryModel(
})
throw new Error('Content block is not a thinking block')
}
streamingDeltas.get(part.index)?.push(delta.thinking!)
;(contentBlock as { thinking: string }).thinking +=
delta.thinking
break
}
}
@@ -2284,32 +2270,6 @@ async function* queryModel(
})
throw new Error('Message not found')
}
// Join accumulated streaming deltas into the contentBlock fields
// to avoid O(n²) string concatenation during streaming.
const deltas = streamingDeltas.get(part.index)
if (deltas && deltas.length > 0) {
const joined = deltas.join('')
switch (contentBlock.type) {
case 'text':
;(contentBlock as { text: string }).text = joined
break
case 'thinking':
;(contentBlock as { thinking: string }).thinking = joined
break
case 'tool_use':
case 'server_tool_use':
contentBlock.input = joined
break
default:
if ((contentBlock.type as string) === 'connector_text') {
;(
contentBlock as { connector_text: string }
).connector_text = joined
}
break
}
streamingDeltas.delete(part.index)
}
const m: AssistantMessage = {
message: {
...partialMessage,
@@ -2864,8 +2824,8 @@ async function* queryModel(
logAPIError({
error,
model: errorModel,
messageCount: preMessagesCount,
messageTokens: preMessagesTokenCount,
messageCount: messagesForAPI.length,
messageTokens: tokenCountFromLastAPIResponse(messagesForAPI),
durationMs: Date.now() - start,
durationMsIncludingRetries: Date.now() - startIncludingRetries,
attempt: attemptNumber,
@@ -2886,10 +2846,7 @@ async function* queryModel(
yield getAssistantMessageFromError(error, errorModel, {
messages,
messagesForAPI: frozenMessages as unknown as (
| UserMessage
| AssistantMessage
)[],
messagesForAPI,
})
releaseStreamResources()
return
@@ -2923,8 +2880,8 @@ async function* queryModel(
logAPIError({
error,
model: errorModel,
messageCount: preMessagesCount,
messageTokens: preMessagesTokenCount,
messageCount: messagesForAPI.length,
messageTokens: tokenCountFromLastAPIResponse(messagesForAPI),
durationMs: Date.now() - start,
durationMsIncludingRetries: Date.now() - startIncludingRetries,
attempt: attemptNumber,
@@ -2947,10 +2904,7 @@ async function* queryModel(
yield getAssistantMessageFromError(error, errorModel, {
messages,
messagesForAPI: frozenMessages as unknown as (
| UserMessage
| AssistantMessage
)[],
messagesForAPI,
})
releaseStreamResources()
return
@@ -3006,19 +2960,14 @@ async function* queryModel(
// Precompute scalars so the fire-and-forget .then() closure doesn't pin the
// full messagesForAPI array (the entire conversation up to the context window
// limit) until getToolPermissionContext() resolves.
// Note: messagesForAPI was nulled above (serialization boundary), so we use
// the pre-computed scalars captured before the null-out.
const logMessageCount = preMessagesCount
const logMessageTokens = preMessagesTokenCount
const logMessageCount = messagesForAPI.length
const logMessageTokens = tokenCountFromLastAPIResponse(messagesForAPI)
// Record LLM observation in Langfuse (no-op if not configured)
recordLLMObservation(options.langfuseTrace ?? null, {
model: resolvedModel,
provider: getAPIProvider(),
input: convertMessagesToLangfuse(
frozenMessages as Parameters<typeof convertMessagesToLangfuse>[0],
systemPrompt,
),
input: convertMessagesToLangfuse(messagesForAPI, systemPrompt),
output: convertOutputToLangfuse(newMessages),
usage: {
input_tokens: usage.input_tokens,

View File

@@ -0,0 +1,81 @@
import axios from 'axios'
import z from 'zod/v4'
import { getOauthConfig } from '../../constants/oauth.js'
import { logForDebugging } from '../../utils/debug.js'
import { getOAuthHeaders, prepareApiRequest } from '../../utils/teleport/api.js'
/**
* Zod schema for the /v1/ultrareview/preflight response.
* Based on binary-extracted schema: vq.object({action: vq.enum([...]), billing_note: ...})
*/
const UltrareviewPreflightSchema = z.object({
action: z.enum(['proceed', 'confirm', 'blocked']),
billing_note: z.string().nullable().optional(),
})
export type UltrareviewPreflightResponse = z.infer<
typeof UltrareviewPreflightSchema
>
export type UltrareviewPreflightArgs = {
repo: string
pr_number?: number
pr_url?: string
confirm?: boolean
}
/**
* POST /v1/ultrareview/preflight — server-side gate before launch.
*
* Returns the preflight result (proceed / confirm / blocked) or null on any
* failure (network error, auth error, schema mismatch). Callers must treat
* null as "fallback to direct launch" to preserve existing behavior.
*
* The `confirm` flag should be set to true when the user has already
* acknowledged the billing dialog (or passed --confirm on the CLI), which
* skips the server-side confirm prompt and gets a direct proceed/blocked.
*/
export async function fetchUltrareviewPreflight(
args: UltrareviewPreflightArgs,
): Promise<UltrareviewPreflightResponse | null> {
try {
const { accessToken, orgUUID } = await prepareApiRequest()
const body: Record<string, unknown> = {
repo: args.repo,
}
if (args.pr_number !== undefined) {
body.pr_number = args.pr_number
}
if (args.pr_url !== undefined) {
body.pr_url = args.pr_url
}
if (args.confirm !== undefined) {
body.confirm = args.confirm
}
const response = await axios.post(
`${getOauthConfig().BASE_API_URL}/v1/ultrareview/preflight`,
body,
{
headers: {
...getOAuthHeaders(accessToken),
'x-organization-uuid': orgUUID,
},
timeout: 10000,
},
)
const parsed = UltrareviewPreflightSchema.safeParse(response.data)
if (!parsed.success) {
logForDebugging(
`fetchUltrareviewPreflight: schema mismatch — ${parsed.error.message}`,
)
return null
}
return parsed.data
} catch (error) {
logForDebugging(`fetchUltrareviewPreflight failed: ${error}`)
return null
}
}