mirror of
https://github.com/claude-code-best/claude-code.git
synced 2026-06-22 16:25:51 +00:00
feat: 添加 Bedrock API 客户端及 API 层增强
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
139
src/services/api/__tests__/bedrockClient.test.ts
Normal file
139
src/services/api/__tests__/bedrockClient.test.ts
Normal file
@@ -0,0 +1,139 @@
|
|||||||
|
/**
|
||||||
|
* Tests for the Bedrock anthropic_beta body-vs-header workaround
|
||||||
|
* (see src/services/api/bedrockClient.ts and anthropics/claude-code#49238).
|
||||||
|
*/
|
||||||
|
import { describe, expect, test } from 'bun:test'
|
||||||
|
import { AnthropicBedrock } from '@anthropic-ai/bedrock-sdk'
|
||||||
|
import { BedrockClient } from '../bedrockClient.js'
|
||||||
|
|
||||||
|
type Captured = {
|
||||||
|
url: string
|
||||||
|
method: string
|
||||||
|
headers: Record<string, string>
|
||||||
|
body: string
|
||||||
|
}
|
||||||
|
|
||||||
|
function makeCaptureFetch(): {
|
||||||
|
fetch: typeof fetch
|
||||||
|
get(): Captured | null
|
||||||
|
} {
|
||||||
|
let captured: Captured | null = null
|
||||||
|
const capture = async (
|
||||||
|
input: URL | RequestInfo,
|
||||||
|
init?: RequestInit,
|
||||||
|
): Promise<Response> => {
|
||||||
|
const req = new Request(input as RequestInfo, init)
|
||||||
|
const body = await req.clone().text()
|
||||||
|
const headers: Record<string, string> = {}
|
||||||
|
req.headers.forEach((v, k) => {
|
||||||
|
headers[k.toLowerCase()] = v
|
||||||
|
})
|
||||||
|
captured = { url: req.url, method: req.method, headers, body }
|
||||||
|
const streamBody =
|
||||||
|
'event: message_start\ndata: {"type":"message_start","message":{"id":"m","type":"message","role":"assistant","content":[],"model":"x","stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":0,"output_tokens":0}}}\n\nevent: message_stop\ndata: {"type":"message_stop"}\n\n'
|
||||||
|
return new Response(streamBody, {
|
||||||
|
status: 200,
|
||||||
|
headers: { 'content-type': 'text/event-stream' },
|
||||||
|
})
|
||||||
|
}
|
||||||
|
// SDK only calls the fetch function form, never the static `preconnect` that
|
||||||
|
// Bun/Node's `typeof fetch` declares. Cast is safe (mirrors openai/client.ts).
|
||||||
|
return { fetch: capture as unknown as typeof fetch, get: () => captured }
|
||||||
|
}
|
||||||
|
|
||||||
|
const BEDROCK_ARGS = {
|
||||||
|
awsRegion: 'us-east-1',
|
||||||
|
awsAccessKey: 'AKIAIOSFODNN7EXAMPLE',
|
||||||
|
awsSecretKey: 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY',
|
||||||
|
}
|
||||||
|
const REQUEST_PARAMS = {
|
||||||
|
model: 'anthropic.claude-opus-4-7',
|
||||||
|
max_tokens: 10,
|
||||||
|
messages: [{ role: 'user' as const, content: 'hi' }],
|
||||||
|
betas: ['interleaved-thinking-2025-05-14', 'effort-2025-11-24'],
|
||||||
|
stream: true as const,
|
||||||
|
}
|
||||||
|
|
||||||
|
async function dispatch(client: AnthropicBedrock): Promise<void> {
|
||||||
|
try {
|
||||||
|
const stream = await client.beta.messages.create(REQUEST_PARAMS)
|
||||||
|
for await (const _ of stream) {
|
||||||
|
/* drain */
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
/* ignore: only the captured request shape matters */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
describe('BedrockClient.buildRequest body.anthropic_beta cleanup', () => {
|
||||||
|
test('BUG REPRO: unmodified AnthropicBedrock puts anthropic_beta in body', async () => {
|
||||||
|
const { fetch: captureFetch, get } = makeCaptureFetch()
|
||||||
|
const client = new AnthropicBedrock({
|
||||||
|
...BEDROCK_ARGS,
|
||||||
|
fetch: captureFetch,
|
||||||
|
})
|
||||||
|
await dispatch(client)
|
||||||
|
const c = get()
|
||||||
|
expect(c).not.toBeNull()
|
||||||
|
const body = JSON.parse(c!.body) as Record<string, unknown>
|
||||||
|
expect('anthropic_beta' in body).toBe(true)
|
||||||
|
expect(body.anthropic_beta).toEqual([
|
||||||
|
'interleaved-thinking-2025-05-14',
|
||||||
|
'effort-2025-11-24',
|
||||||
|
])
|
||||||
|
})
|
||||||
|
|
||||||
|
test('FIX: BedrockClient strips anthropic_beta from body', async () => {
|
||||||
|
const { fetch: captureFetch, get } = makeCaptureFetch()
|
||||||
|
const client = new BedrockClient({ ...BEDROCK_ARGS, fetch: captureFetch })
|
||||||
|
await dispatch(client)
|
||||||
|
const c = get()
|
||||||
|
expect(c).not.toBeNull()
|
||||||
|
const body = JSON.parse(c!.body) as Record<string, unknown>
|
||||||
|
expect('anthropic_beta' in body).toBe(false)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('FIX preserves anthropic-beta HTTP header with the original csv value', async () => {
|
||||||
|
const { fetch: captureFetch, get } = makeCaptureFetch()
|
||||||
|
const client = new BedrockClient({ ...BEDROCK_ARGS, fetch: captureFetch })
|
||||||
|
await dispatch(client)
|
||||||
|
const c = get()
|
||||||
|
expect(c).not.toBeNull()
|
||||||
|
expect(c!.headers['anthropic-beta']).toBe(
|
||||||
|
'interleaved-thinking-2025-05-14,effort-2025-11-24',
|
||||||
|
)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('FIX keeps a valid AWS SigV4 authorization header (signing happens after cleanup)', async () => {
|
||||||
|
const { fetch: captureFetch, get } = makeCaptureFetch()
|
||||||
|
const client = new BedrockClient({ ...BEDROCK_ARGS, fetch: captureFetch })
|
||||||
|
await dispatch(client)
|
||||||
|
const c = get()
|
||||||
|
expect(c).not.toBeNull()
|
||||||
|
expect(c!.headers.authorization).toBeDefined()
|
||||||
|
expect(c!.headers.authorization.startsWith('AWS4-HMAC-SHA256')).toBe(true)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('FIX does not disturb requests that never had anthropic_beta', async () => {
|
||||||
|
const { fetch: captureFetch, get } = makeCaptureFetch()
|
||||||
|
const client = new BedrockClient({ ...BEDROCK_ARGS, fetch: captureFetch })
|
||||||
|
try {
|
||||||
|
const stream = await client.beta.messages.create({
|
||||||
|
model: 'anthropic.claude-opus-4-7',
|
||||||
|
max_tokens: 10,
|
||||||
|
messages: [{ role: 'user', content: 'hi' }],
|
||||||
|
stream: true,
|
||||||
|
})
|
||||||
|
for await (const _ of stream) {
|
||||||
|
/* drain */
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
/* ignore */
|
||||||
|
}
|
||||||
|
const c = get()
|
||||||
|
expect(c).not.toBeNull()
|
||||||
|
const body = JSON.parse(c!.body) as Record<string, unknown>
|
||||||
|
expect('anthropic_beta' in body).toBe(false)
|
||||||
|
expect(c!.headers['anthropic-beta']).toBeUndefined()
|
||||||
|
})
|
||||||
|
})
|
||||||
302
src/services/api/__tests__/betaHeaders.test.ts
Normal file
302
src/services/api/__tests__/betaHeaders.test.ts
Normal file
@@ -0,0 +1,302 @@
|
|||||||
|
/**
|
||||||
|
* Beta header 安全性测试
|
||||||
|
*
|
||||||
|
* 验证:
|
||||||
|
* 1. 空字符串 beta header 不会泄漏到 API 请求中
|
||||||
|
* 2. getExtraBodyParams 正确合并 beta headers
|
||||||
|
* 3. 常量层可能产生空值的 beta header 被妥善处理
|
||||||
|
* 4. SDK 的 betas.toString() 行为与预期一致
|
||||||
|
*/
|
||||||
|
import { describe, expect, test } from 'bun:test'
|
||||||
|
|
||||||
|
// ── Part 1: SDK 层面的 toString 行为验证 ─────────────────────────
|
||||||
|
|
||||||
|
describe('SDK betas.toString() behavior', () => {
|
||||||
|
test('empty string in array produces invalid header value', () => {
|
||||||
|
// 这就是导致 400 的根因:SDK 对 betas 调用 toString()
|
||||||
|
const betas = [
|
||||||
|
'claude-code-20250219',
|
||||||
|
'',
|
||||||
|
'interleaved-thinking-2025-05-14',
|
||||||
|
]
|
||||||
|
const headerValue = betas.toString()
|
||||||
|
|
||||||
|
// 产生 "claude-code-20250219,,interleaved-thinking-2025-05-14"
|
||||||
|
// 逗号之间的空值就是 API 拒绝的 ``
|
||||||
|
expect(headerValue).toContain(',,')
|
||||||
|
expect(headerValue).toBe(
|
||||||
|
'claude-code-20250219,,interleaved-thinking-2025-05-14',
|
||||||
|
)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('filter(Boolean) removes empty strings', () => {
|
||||||
|
const betas = [
|
||||||
|
'claude-code-20250219',
|
||||||
|
'',
|
||||||
|
'interleaved-thinking-2025-05-14',
|
||||||
|
]
|
||||||
|
const filtered = betas.filter(Boolean)
|
||||||
|
const headerValue = filtered.toString()
|
||||||
|
|
||||||
|
expect(filtered).not.toContain('')
|
||||||
|
expect(headerValue).not.toContain(',,')
|
||||||
|
expect(headerValue).toBe(
|
||||||
|
'claude-code-20250219,interleaved-thinking-2025-05-14',
|
||||||
|
)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('filter(Boolean) handles multiple empty strings', () => {
|
||||||
|
const betas = ['', 'a', '', '', 'b', '']
|
||||||
|
const filtered = betas.filter(Boolean)
|
||||||
|
|
||||||
|
expect(filtered).toEqual(['a', 'b'])
|
||||||
|
expect(filtered.toString()).toBe('a,b')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('filter(Boolean) on clean array is no-op', () => {
|
||||||
|
const betas = ['claude-code-20250219', 'interleaved-thinking-2025-05-14']
|
||||||
|
const filtered = betas.filter(Boolean)
|
||||||
|
|
||||||
|
expect(filtered).toEqual(betas)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('empty array after filter produces no header', () => {
|
||||||
|
const betas = ['', '']
|
||||||
|
const filtered = betas.filter(Boolean)
|
||||||
|
|
||||||
|
expect(filtered).toEqual([])
|
||||||
|
expect(filtered.length > 0).toBe(false)
|
||||||
|
// useBetas would be false, header not sent at all
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
// ── Part 2: 常量层空值检测 ───────────────────────────────────────
|
||||||
|
|
||||||
|
describe('beta header constants safety', () => {
|
||||||
|
test('known potentially-empty constants are identified', () => {
|
||||||
|
// 这些常量在特定条件下可能是空字符串
|
||||||
|
// 测试的目的是确认我们知道哪些是空的,以便防御
|
||||||
|
|
||||||
|
// CACHE_EDITING_BETA_HEADER — 上游未公开,永远为空
|
||||||
|
// 动态 import 以避免 bun:bundle 依赖
|
||||||
|
// 这里我们直接测试值
|
||||||
|
const CACHE_EDITING_VALUE = '' // 对应 constants/betas.ts:50
|
||||||
|
expect(CACHE_EDITING_VALUE).toBe('')
|
||||||
|
expect(Boolean(CACHE_EDITING_VALUE)).toBe(false)
|
||||||
|
|
||||||
|
// CLI_INTERNAL_BETA_HEADER — USER_TYPE !== 'ant' 时为空
|
||||||
|
// 在测试环境中 USER_TYPE 通常不是 'ant'
|
||||||
|
const CLI_INTERNAL_VALUE =
|
||||||
|
process.env.USER_TYPE === 'ant' ? 'cli-internal-2026-02-09' : ''
|
||||||
|
if (process.env.USER_TYPE !== 'ant') {
|
||||||
|
expect(CLI_INTERNAL_VALUE).toBe('')
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
test('truthy check correctly gates empty beta headers', () => {
|
||||||
|
const emptyHeader = ''
|
||||||
|
const validHeader = 'some-beta-2025-01-01'
|
||||||
|
|
||||||
|
// 模拟 claude.ts 中的 truthy 检查
|
||||||
|
const betasParams: string[] = []
|
||||||
|
|
||||||
|
// 空 header — 不应被 push
|
||||||
|
if (emptyHeader) {
|
||||||
|
betasParams.push(emptyHeader)
|
||||||
|
}
|
||||||
|
expect(betasParams).toEqual([])
|
||||||
|
|
||||||
|
// 有效 header — 应被 push
|
||||||
|
if (validHeader) {
|
||||||
|
betasParams.push(validHeader)
|
||||||
|
}
|
||||||
|
expect(betasParams).toEqual(['some-beta-2025-01-01'])
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
// ── Part 3: getExtraBodyParams beta 合并逻辑 ─────────────────────
|
||||||
|
|
||||||
|
describe('getExtraBodyParams beta merge', () => {
|
||||||
|
// getExtraBodyParams 从 CLAUDE_CODE_EXTRA_BODY 解析 JSON 并合并 betaHeaders
|
||||||
|
// 我们在这里验证合并逻辑的边界情况
|
||||||
|
|
||||||
|
test('empty beta headers array should not add anthropic_beta', () => {
|
||||||
|
const result: Record<string, unknown> = {}
|
||||||
|
const betaHeaders: string[] = []
|
||||||
|
|
||||||
|
// 模拟 getExtraBodyParams 中的合并逻辑
|
||||||
|
if (betaHeaders && betaHeaders.length > 0) {
|
||||||
|
result.anthropic_beta = betaHeaders
|
||||||
|
}
|
||||||
|
|
||||||
|
expect(result.anthropic_beta).toBeUndefined()
|
||||||
|
})
|
||||||
|
|
||||||
|
test('beta headers with empty strings should be filtered', () => {
|
||||||
|
const betaHeaders = ['valid-header', '', 'another-valid']
|
||||||
|
|
||||||
|
// 修复后的逻辑应该在合并前过滤
|
||||||
|
const clean = betaHeaders.filter(Boolean)
|
||||||
|
expect(clean).toEqual(['valid-header', 'another-valid'])
|
||||||
|
})
|
||||||
|
|
||||||
|
test('merging avoids duplicates', () => {
|
||||||
|
const existing = ['header-a', 'header-b']
|
||||||
|
const incoming = ['header-b', 'header-c']
|
||||||
|
|
||||||
|
const merged = [...existing, ...incoming.filter(h => !existing.includes(h))]
|
||||||
|
|
||||||
|
expect(merged).toEqual(['header-a', 'header-b', 'header-c'])
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
// ── Part 4: ANTHROPIC_BETAS 环境变量解析 ─────────────────────────
|
||||||
|
|
||||||
|
describe('ANTHROPIC_BETAS env var parsing', () => {
|
||||||
|
test('empty string env var produces no betas', () => {
|
||||||
|
const envVal: string = ''
|
||||||
|
const result = envVal
|
||||||
|
? envVal
|
||||||
|
.split(',')
|
||||||
|
.map((s: string) => s.trim())
|
||||||
|
.filter(Boolean)
|
||||||
|
: []
|
||||||
|
|
||||||
|
expect(result).toEqual([])
|
||||||
|
})
|
||||||
|
|
||||||
|
test('trailing comma does not produce empty entry', () => {
|
||||||
|
const envVal = 'beta-a,beta-b,'
|
||||||
|
const result = envVal
|
||||||
|
.split(',')
|
||||||
|
.map(s => s.trim())
|
||||||
|
.filter(Boolean)
|
||||||
|
|
||||||
|
expect(result).toEqual(['beta-a', 'beta-b'])
|
||||||
|
})
|
||||||
|
|
||||||
|
test('whitespace-only entries are filtered', () => {
|
||||||
|
const envVal = 'beta-a, , beta-b, '
|
||||||
|
const result = envVal
|
||||||
|
.split(',')
|
||||||
|
.map(s => s.trim())
|
||||||
|
.filter(Boolean)
|
||||||
|
|
||||||
|
expect(result).toEqual(['beta-a', 'beta-b'])
|
||||||
|
})
|
||||||
|
|
||||||
|
test('single comma produces no betas', () => {
|
||||||
|
const envVal = ','
|
||||||
|
const result = envVal
|
||||||
|
.split(',')
|
||||||
|
.map(s => s.trim())
|
||||||
|
.filter(Boolean)
|
||||||
|
|
||||||
|
expect(result).toEqual([])
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
// ── Part 5: 完整请求参数模拟 ─────────────────────────────────────
|
||||||
|
|
||||||
|
describe('request params beta assembly (simulated)', () => {
|
||||||
|
test('simulates the full beta assembly pipeline with empty constants', () => {
|
||||||
|
// 模拟 claude.ts 中 paramsFromContext 的 beta 组装流程
|
||||||
|
const CLAUDE_CODE_HEADER = 'claude-code-20250219'
|
||||||
|
const INTERLEAVED_HEADER = 'interleaved-thinking-2025-05-14'
|
||||||
|
const CONTEXT_1M_HEADER = 'context-1m-2025-08-07'
|
||||||
|
const CACHE_EDITING_HEADER = '' // 空!
|
||||||
|
const AFK_MODE_HEADER = '' // 也是空!
|
||||||
|
|
||||||
|
// Step 1: 基础 betas(来自 getAllModelBetas)
|
||||||
|
const baseBetas = [
|
||||||
|
CLAUDE_CODE_HEADER,
|
||||||
|
INTERLEAVED_HEADER,
|
||||||
|
CONTEXT_1M_HEADER,
|
||||||
|
]
|
||||||
|
|
||||||
|
// Step 2: paramsFromContext 中的动态添加
|
||||||
|
const betasParams = [...baseBetas]
|
||||||
|
|
||||||
|
// 模拟 cache editing latch 触发但 header 为空
|
||||||
|
const cacheEditingHeaderLatched = true
|
||||||
|
if (
|
||||||
|
cacheEditingHeaderLatched &&
|
||||||
|
CACHE_EDITING_HEADER && // ← 修复:truthy 检查
|
||||||
|
!betasParams.includes(CACHE_EDITING_HEADER)
|
||||||
|
) {
|
||||||
|
betasParams.push(CACHE_EDITING_HEADER)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 模拟 AFK mode latch 触发但 header 为空
|
||||||
|
const afkHeaderLatched = true
|
||||||
|
// feature('TRANSCRIPT_CLASSIFIER') 为 false 时,整个 if block 不进入
|
||||||
|
// 但假设进入了,header 也是空的
|
||||||
|
if (
|
||||||
|
afkHeaderLatched &&
|
||||||
|
AFK_MODE_HEADER && // 空字符串,不会进入
|
||||||
|
!betasParams.includes(AFK_MODE_HEADER)
|
||||||
|
) {
|
||||||
|
betasParams.push(AFK_MODE_HEADER)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 3: 最终过滤(我们的防御层)
|
||||||
|
const filteredBetas = betasParams.filter(Boolean)
|
||||||
|
|
||||||
|
// 验证:没有空字符串泄漏
|
||||||
|
expect(filteredBetas).not.toContain('')
|
||||||
|
expect(filteredBetas).toEqual([
|
||||||
|
CLAUDE_CODE_HEADER,
|
||||||
|
INTERLEAVED_HEADER,
|
||||||
|
CONTEXT_1M_HEADER,
|
||||||
|
])
|
||||||
|
|
||||||
|
// 验证:toString() 不会产生 ,,
|
||||||
|
expect(filteredBetas.toString()).not.toContain(',,')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('simulates the bug scenario WITHOUT fix', () => {
|
||||||
|
// 重现修复前的行为,验证 bug 确实存在
|
||||||
|
const CACHE_EDITING_HEADER = '' // 空值
|
||||||
|
|
||||||
|
const betasParams = [
|
||||||
|
'claude-code-20250219',
|
||||||
|
'interleaved-thinking-2025-05-14',
|
||||||
|
]
|
||||||
|
|
||||||
|
// 修复前:没有 truthy 检查,空字符串被 push
|
||||||
|
const cacheEditingHeaderLatched = true
|
||||||
|
if (
|
||||||
|
cacheEditingHeaderLatched &&
|
||||||
|
// 注意:没有 CACHE_EDITING_HEADER && 检查
|
||||||
|
!betasParams.includes(CACHE_EDITING_HEADER) // '' 不在数组中 → true
|
||||||
|
) {
|
||||||
|
betasParams.push(CACHE_EDITING_HEADER) // push 了空字符串!
|
||||||
|
}
|
||||||
|
|
||||||
|
// 证明 bug:数组包含空字符串
|
||||||
|
expect(betasParams).toContain('')
|
||||||
|
// SDK toString() 会产生尾部逗号(空字符串在末尾)或 ,,(在中间)
|
||||||
|
// 两者都是 API 不接受的无效 header 值
|
||||||
|
const headerStr = betasParams.toString()
|
||||||
|
// 空字符串在末尾 → 尾部逗号 "a,b,"
|
||||||
|
// 空字符串在中间 → 连续逗号 "a,,b"
|
||||||
|
expect(headerStr.endsWith(',') || headerStr.includes(',,')).toBe(true)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('useBetas flag correctly handles empty-after-filter', () => {
|
||||||
|
// 如果所有 betas 都是空字符串,过滤后应该不发送 betas 参数
|
||||||
|
const betasParams = ['', '']
|
||||||
|
const filteredBetas = betasParams.filter(Boolean)
|
||||||
|
const useBetas = filteredBetas.length > 0
|
||||||
|
|
||||||
|
expect(useBetas).toBe(false)
|
||||||
|
// API 请求不应包含 betas 字段
|
||||||
|
const requestParams = {
|
||||||
|
model: 'claude-opus-4-6',
|
||||||
|
max_tokens: 1024,
|
||||||
|
messages: [],
|
||||||
|
...(useBetas && { betas: filteredBetas }),
|
||||||
|
}
|
||||||
|
expect(requestParams).not.toHaveProperty('betas')
|
||||||
|
})
|
||||||
|
})
|
||||||
65
src/services/api/bedrockClient.ts
Normal file
65
src/services/api/bedrockClient.ts
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
import { AnthropicBedrock } from '@anthropic-ai/bedrock-sdk'
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extends AnthropicBedrock to work around an upstream bug where the SDK
|
||||||
|
* re-plants the `anthropic-beta` HTTP header value into the request body
|
||||||
|
* as `anthropic_beta`. Bedrock's Opus 4.7 endpoint rejects any request with
|
||||||
|
* `anthropic_beta` in the body with a 400 "invalid beta flag" error.
|
||||||
|
*
|
||||||
|
* Source of the bug (SDK 0.26.4, still present through 0.28.1):
|
||||||
|
* node_modules/@anthropic-ai/bedrock-sdk/client.js lines 122-127
|
||||||
|
* (TS source: packages/bedrock-sdk/src/client.ts lines 193-198)
|
||||||
|
*
|
||||||
|
* Related upstream issue: anthropics/claude-code#49238 (opened 2026-04-16).
|
||||||
|
*
|
||||||
|
* Fix strategy: let super.buildRequest do its work, then strip
|
||||||
|
* `body.anthropic_beta` from the resulting Request before the SDK computes
|
||||||
|
* the AWS SigV4 signature (signing happens downstream of buildRequest, so
|
||||||
|
* the signature hashes the cleaned body — no 403 risk). The `anthropic-beta`
|
||||||
|
* HTTP header remains intact (base SDK placed it there from the `betas:`
|
||||||
|
* parameter), so beta flags still reach the API the way Bedrock accepts them.
|
||||||
|
*
|
||||||
|
* When upstream ships a fix, verify the probe in scripts/probe-bedrock-beta-fix.ts
|
||||||
|
* shows "bug reproduced: false", then delete this class and change
|
||||||
|
* `services/api/client.ts` to instantiate `AnthropicBedrock` directly.
|
||||||
|
*/
|
||||||
|
type BuildRequestArg = Parameters<AnthropicBedrock['buildRequest']>[0]
|
||||||
|
type BuildRequestRet = Awaited<ReturnType<AnthropicBedrock['buildRequest']>>
|
||||||
|
|
||||||
|
export class BedrockClient extends AnthropicBedrock {
|
||||||
|
async buildRequest(options: BuildRequestArg): Promise<BuildRequestRet> {
|
||||||
|
const req = await super.buildRequest(options)
|
||||||
|
|
||||||
|
const inner = (
|
||||||
|
req as unknown as { req?: { body?: unknown; headers?: unknown } }
|
||||||
|
)?.req
|
||||||
|
if (!inner || typeof inner.body !== 'string' || inner.body.length === 0) {
|
||||||
|
return req
|
||||||
|
}
|
||||||
|
|
||||||
|
let parsed: Record<string, unknown>
|
||||||
|
try {
|
||||||
|
parsed = JSON.parse(inner.body) as Record<string, unknown>
|
||||||
|
} catch {
|
||||||
|
return req
|
||||||
|
}
|
||||||
|
if (!('anthropic_beta' in parsed)) {
|
||||||
|
return req
|
||||||
|
}
|
||||||
|
|
||||||
|
delete parsed.anthropic_beta
|
||||||
|
const cleanedBody = JSON.stringify(parsed)
|
||||||
|
inner.body = cleanedBody
|
||||||
|
|
||||||
|
const byteLen = String(new TextEncoder().encode(cleanedBody).length)
|
||||||
|
const h = inner.headers
|
||||||
|
if (typeof Headers !== 'undefined' && h instanceof Headers) {
|
||||||
|
if (h.has('content-length')) h.set('content-length', byteLen)
|
||||||
|
} else if (h && typeof h === 'object') {
|
||||||
|
const asDict = h as Record<string, string>
|
||||||
|
if ('content-length' in asDict) asDict['content-length'] = byteLen
|
||||||
|
}
|
||||||
|
|
||||||
|
return req
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -101,6 +101,8 @@ import {
|
|||||||
extractQuotaStatusFromHeaders,
|
extractQuotaStatusFromHeaders,
|
||||||
} from '../claudeAiLimits.js'
|
} from '../claudeAiLimits.js'
|
||||||
import { getAPIContextManagement } from '../compact/apiMicrocompact.js'
|
import { getAPIContextManagement } from '../compact/apiMicrocompact.js'
|
||||||
|
import { bedrockAdapter } from '../providerUsage/adapters/bedrock.js'
|
||||||
|
import { updateProviderBuckets } from '../providerUsage/store.js'
|
||||||
|
|
||||||
/* eslint-disable @typescript-eslint/no-require-imports */
|
/* eslint-disable @typescript-eslint/no-require-imports */
|
||||||
const autoModeStateModule = feature('TRANSCRIPT_CLASSIFIER')
|
const autoModeStateModule = feature('TRANSCRIPT_CLASSIFIER')
|
||||||
@@ -541,13 +543,12 @@ export async function verifyApiKey(
|
|||||||
}),
|
}),
|
||||||
async anthropic => {
|
async anthropic => {
|
||||||
const messages: MessageParam[] = [{ role: 'user', content: 'test' }]
|
const messages: MessageParam[] = [{ role: 'user', content: 'test' }]
|
||||||
// biome-ignore lint/plugin: API key verification is intentionally a minimal direct call
|
|
||||||
await anthropic.beta.messages.create({
|
await anthropic.beta.messages.create({
|
||||||
model,
|
model,
|
||||||
max_tokens: 1,
|
max_tokens: 1,
|
||||||
messages,
|
messages,
|
||||||
temperature: 1,
|
temperature: 1,
|
||||||
...(betas.length > 0 && { betas }),
|
...(betas.length > 0 && { betas: betas.filter(Boolean) }),
|
||||||
metadata: getAPIMetadata(),
|
metadata: getAPIMetadata(),
|
||||||
...getExtraBodyParams(),
|
...getExtraBodyParams(),
|
||||||
})
|
})
|
||||||
@@ -878,7 +879,6 @@ export async function* executeNonStreamingRequest(
|
|||||||
)
|
)
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// biome-ignore lint/plugin: non-streaming API call
|
|
||||||
return await anthropic.beta.messages.create(
|
return await anthropic.beta.messages.create(
|
||||||
{
|
{
|
||||||
...adjustedParams,
|
...adjustedParams,
|
||||||
@@ -1215,10 +1215,15 @@ async function* queryModel(
|
|||||||
cacheEditingBetaHeader = betas.CACHE_EDITING_BETA_HEADER
|
cacheEditingBetaHeader = betas.CACHE_EDITING_BETA_HEADER
|
||||||
const featureEnabled = isCachedMicrocompactEnabled()
|
const featureEnabled = isCachedMicrocompactEnabled()
|
||||||
const modelSupported = isModelSupportedForCacheEditing(options.model)
|
const modelSupported = isModelSupportedForCacheEditing(options.model)
|
||||||
cachedMCEnabled = featureEnabled && modelSupported
|
// cachedMC requires a non-empty beta header; the CACHE_EDITING_BETA_HEADER
|
||||||
|
// constant is '' in this fork (upstream hasn't published the real value).
|
||||||
|
// Without it, cache_reference and cache_edits in the request body cause
|
||||||
|
// API 400: "tool_result.cache_reference: Extra inputs are not permitted".
|
||||||
|
const headerAvailable = !!cacheEditingBetaHeader
|
||||||
|
cachedMCEnabled = featureEnabled && modelSupported && headerAvailable
|
||||||
const config = getCachedMCConfig()
|
const config = getCachedMCConfig()
|
||||||
logForDebugging(
|
logForDebugging(
|
||||||
`Cached MC gate: enabled=${featureEnabled} modelSupported=${modelSupported} model=${options.model} supportedModels=${jsonStringify((config as any).supportedModels)}`,
|
`Cached MC gate: enabled=${featureEnabled} modelSupported=${modelSupported} headerAvailable=${headerAvailable} model=${options.model} supportedModels=${jsonStringify((config as Record<string, unknown>).supportedModels)}`,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1724,6 +1729,7 @@ async function* queryModel(
|
|||||||
options.querySource === 'repl_main_thread'
|
options.querySource === 'repl_main_thread'
|
||||||
if (
|
if (
|
||||||
cacheEditingHeaderLatched &&
|
cacheEditingHeaderLatched &&
|
||||||
|
cacheEditingBetaHeader &&
|
||||||
getAPIProvider() === 'firstParty' &&
|
getAPIProvider() === 'firstParty' &&
|
||||||
options.querySource === 'repl_main_thread' &&
|
options.querySource === 'repl_main_thread' &&
|
||||||
!betasParams.includes(cacheEditingBetaHeader)
|
!betasParams.includes(cacheEditingBetaHeader)
|
||||||
@@ -1740,7 +1746,12 @@ async function* queryModel(
|
|||||||
? (options.temperatureOverride ?? 1)
|
? (options.temperatureOverride ?? 1)
|
||||||
: undefined
|
: undefined
|
||||||
|
|
||||||
lastRequestBetas = betasParams
|
// Filter out any empty-string beta headers before sending.
|
||||||
|
// Constants like CACHE_EDITING_BETA_HEADER or AFK_MODE_BETA_HEADER
|
||||||
|
// can be '' when their feature gate is off; an empty string in the
|
||||||
|
// betas array produces an invalid anthropic-beta header (400 error).
|
||||||
|
const filteredBetas = betasParams.filter(Boolean)
|
||||||
|
lastRequestBetas = filteredBetas
|
||||||
|
|
||||||
return {
|
return {
|
||||||
model: normalizeModelStringForAPI(options.model),
|
model: normalizeModelStringForAPI(options.model),
|
||||||
@@ -1756,7 +1767,7 @@ async function* queryModel(
|
|||||||
system,
|
system,
|
||||||
tools: allTools,
|
tools: allTools,
|
||||||
tool_choice: options.toolChoice,
|
tool_choice: options.toolChoice,
|
||||||
...(useBetas && { betas: betasParams }),
|
...(useBetas && { betas: filteredBetas }),
|
||||||
metadata: getAPIMetadata(),
|
metadata: getAPIMetadata(),
|
||||||
max_tokens: maxOutputTokens,
|
max_tokens: maxOutputTokens,
|
||||||
thinking,
|
thinking,
|
||||||
@@ -1864,7 +1875,6 @@ async function* queryModel(
|
|||||||
// Use raw stream instead of BetaMessageStream to avoid O(n²) partial JSON parsing
|
// Use raw stream instead of BetaMessageStream to avoid O(n²) partial JSON parsing
|
||||||
// BetaMessageStream calls partialParse() on every input_json_delta, which we don't need
|
// BetaMessageStream calls partialParse() on every input_json_delta, which we don't need
|
||||||
// since we handle tool input accumulation ourselves
|
// since we handle tool input accumulation ourselves
|
||||||
// biome-ignore lint/plugin: main conversation loop handles attribution separately
|
|
||||||
const result = await anthropic.beta.messages
|
const result = await anthropic.beta.messages
|
||||||
.create(
|
.create(
|
||||||
{ ...params, stream: true },
|
{ ...params, stream: true },
|
||||||
@@ -2445,6 +2455,16 @@ async function* queryModel(
|
|||||||
const resp = streamResponse as unknown as Response | undefined
|
const resp = streamResponse as unknown as Response | undefined
|
||||||
if (resp) {
|
if (resp) {
|
||||||
extractQuotaStatusFromHeaders(resp.headers)
|
extractQuotaStatusFromHeaders(resp.headers)
|
||||||
|
// Non-Anthropic providers that flow through this same client path
|
||||||
|
// (Bedrock) expose their own throttle headers — let their adapter
|
||||||
|
// overwrite the store with its bucket(s). Anthropic's adapter runs
|
||||||
|
// inside extractQuotaStatusFromHeaders.
|
||||||
|
if (getAPIProvider() === 'bedrock') {
|
||||||
|
updateProviderBuckets(
|
||||||
|
'bedrock',
|
||||||
|
bedrockAdapter.parseHeaders(resp.headers),
|
||||||
|
)
|
||||||
|
}
|
||||||
// Store headers for gateway detection
|
// Store headers for gateway detection
|
||||||
responseHeaders = resp.headers
|
responseHeaders = resp.headers
|
||||||
}
|
}
|
||||||
@@ -3229,6 +3249,7 @@ export function addCacheBreakpoints(
|
|||||||
|
|
||||||
// Add cache_reference to tool_result blocks that are within the cached prefix.
|
// Add cache_reference to tool_result blocks that are within the cached prefix.
|
||||||
// Must be done AFTER cache_edits insertion since that modifies content arrays.
|
// Must be done AFTER cache_edits insertion since that modifies content arrays.
|
||||||
|
// Note: this code only runs when useCachedMC=true (early return at line ~3202).
|
||||||
if (enablePromptCaching) {
|
if (enablePromptCaching) {
|
||||||
// Find the last message containing a cache_control marker
|
// Find the last message containing a cache_control marker
|
||||||
let lastCCMsg = -1
|
let lastCCMsg = -1
|
||||||
|
|||||||
@@ -73,14 +73,10 @@ import {
|
|||||||
function createStderrLogger(): ClientOptions['logger'] {
|
function createStderrLogger(): ClientOptions['logger'] {
|
||||||
return {
|
return {
|
||||||
error: (msg, ...args) =>
|
error: (msg, ...args) =>
|
||||||
// biome-ignore lint/suspicious/noConsole:: intentional console output -- SDK logger must use console
|
|
||||||
console.error('[Anthropic SDK ERROR]', msg, ...args),
|
console.error('[Anthropic SDK ERROR]', msg, ...args),
|
||||||
// biome-ignore lint/suspicious/noConsole:: intentional console output -- SDK logger must use console
|
|
||||||
warn: (msg, ...args) => console.error('[Anthropic SDK WARN]', msg, ...args),
|
warn: (msg, ...args) => console.error('[Anthropic SDK WARN]', msg, ...args),
|
||||||
// biome-ignore lint/suspicious/noConsole:: intentional console output -- SDK logger must use console
|
|
||||||
info: (msg, ...args) => console.error('[Anthropic SDK INFO]', msg, ...args),
|
info: (msg, ...args) => console.error('[Anthropic SDK INFO]', msg, ...args),
|
||||||
debug: (msg, ...args) =>
|
debug: (msg, ...args) =>
|
||||||
// biome-ignore lint/suspicious/noConsole:: intentional console output -- SDK logger must use console
|
|
||||||
console.error('[Anthropic SDK DEBUG]', msg, ...args),
|
console.error('[Anthropic SDK DEBUG]', msg, ...args),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -151,7 +147,7 @@ export async function getAnthropicClient({
|
|||||||
}),
|
}),
|
||||||
}
|
}
|
||||||
if (isEnvTruthy(process.env.CLAUDE_CODE_USE_BEDROCK)) {
|
if (isEnvTruthy(process.env.CLAUDE_CODE_USE_BEDROCK)) {
|
||||||
const { AnthropicBedrock } = await import('@anthropic-ai/bedrock-sdk')
|
const { BedrockClient } = await import('./bedrockClient.js')
|
||||||
// Use region override for small fast model if specified
|
// Use region override for small fast model if specified
|
||||||
const awsRegion =
|
const awsRegion =
|
||||||
model === getSmallFastModel() &&
|
model === getSmallFastModel() &&
|
||||||
@@ -186,7 +182,7 @@ export async function getAnthropicClient({
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
// we have always been lying about the return type - this doesn't support batching or models
|
// we have always been lying about the return type - this doesn't support batching or models
|
||||||
return new AnthropicBedrock(bedrockArgs) as unknown as Anthropic
|
return new BedrockClient(bedrockArgs) as unknown as Anthropic
|
||||||
}
|
}
|
||||||
if (isEnvTruthy(process.env.CLAUDE_CODE_USE_FOUNDRY)) {
|
if (isEnvTruthy(process.env.CLAUDE_CODE_USE_FOUNDRY)) {
|
||||||
const { AnthropicFoundry } = await import('@anthropic-ai/foundry-sdk')
|
const { AnthropicFoundry } = await import('@anthropic-ai/foundry-sdk')
|
||||||
|
|||||||
@@ -944,6 +944,9 @@ function get3PModelFallbackSuggestion(model: string): string | undefined {
|
|||||||
// @[MODEL LAUNCH]: Add a fallback suggestion chain for the new model → previous version for 3P
|
// @[MODEL LAUNCH]: Add a fallback suggestion chain for the new model → previous version for 3P
|
||||||
const m = model.toLowerCase()
|
const m = model.toLowerCase()
|
||||||
// If the failing model looks like an Opus 4.6 variant, suggest the default Opus (4.1 for 3P)
|
// If the failing model looks like an Opus 4.6 variant, suggest the default Opus (4.1 for 3P)
|
||||||
|
if (m.includes('opus-4-7') || m.includes('opus_4_7')) {
|
||||||
|
return getModelStrings().opus46
|
||||||
|
}
|
||||||
if (m.includes('opus-4-6') || m.includes('opus_4_6')) {
|
if (m.includes('opus-4-6') || m.includes('opus_4_6')) {
|
||||||
return getModelStrings().opus41
|
return getModelStrings().opus41
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -377,7 +377,7 @@ export function logAPIError({
|
|||||||
// Pass the span to correctly match responses to requests when beta tracing is enabled
|
// Pass the span to correctly match responses to requests when beta tracing is enabled
|
||||||
endLLMRequestSpan(llmSpan, {
|
endLLMRequestSpan(llmSpan, {
|
||||||
success: false,
|
success: false,
|
||||||
statusCode: status ? parseInt(status) : undefined,
|
statusCode: status ? parseInt(status, 10) : undefined,
|
||||||
error: errStr,
|
error: errStr,
|
||||||
attempt,
|
attempt,
|
||||||
})
|
})
|
||||||
|
|||||||
545
src/services/api/openai/__tests__/queryModelOpenAI.isolated.ts
Normal file
545
src/services/api/openai/__tests__/queryModelOpenAI.isolated.ts
Normal file
@@ -0,0 +1,545 @@
|
|||||||
|
/**
|
||||||
|
* Tests for queryModelOpenAI in index.ts.
|
||||||
|
*
|
||||||
|
* Focused on the two bugs fixed:
|
||||||
|
* 1. stop_reason was always null in the assembled AssistantMessage because
|
||||||
|
* partialMessage (from message_start) has stop_reason: null, and the
|
||||||
|
* stop_reason captured from message_delta was never applied.
|
||||||
|
* 2. partialMessage was not reset to null after message_stop, so the safety
|
||||||
|
* fallback at the end of the loop would yield a second identical
|
||||||
|
* AssistantMessage (causing doubled content in the next API request).
|
||||||
|
*
|
||||||
|
* Strategy: mock getOpenAIClient + adaptOpenAIStreamToAnthropic so we can
|
||||||
|
* feed pre-built Anthropic events directly into queryModelOpenAI and inspect
|
||||||
|
* what it emits — without any real HTTP calls.
|
||||||
|
*/
|
||||||
|
import { describe, expect, test, mock, beforeEach, afterEach } from 'bun:test'
|
||||||
|
import type { BetaRawMessageStreamEvent } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
|
||||||
|
import type {
|
||||||
|
AssistantMessage,
|
||||||
|
StreamEvent,
|
||||||
|
} from '../../../../types/message.js'
|
||||||
|
|
||||||
|
// ─── helpers ─────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/** Build a minimal message_start event */
|
||||||
|
function makeMessageStart(
|
||||||
|
overrides: Record<string, any> = {},
|
||||||
|
): BetaRawMessageStreamEvent {
|
||||||
|
return {
|
||||||
|
type: 'message_start',
|
||||||
|
message: {
|
||||||
|
id: 'msg_test',
|
||||||
|
type: 'message',
|
||||||
|
role: 'assistant',
|
||||||
|
content: [],
|
||||||
|
model: 'test-model',
|
||||||
|
stop_reason: null,
|
||||||
|
stop_sequence: null,
|
||||||
|
usage: {
|
||||||
|
input_tokens: 0,
|
||||||
|
output_tokens: 0,
|
||||||
|
cache_creation_input_tokens: 0,
|
||||||
|
cache_read_input_tokens: 0,
|
||||||
|
},
|
||||||
|
...overrides,
|
||||||
|
},
|
||||||
|
} as any
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Build a content_block_start event for the given block type */
|
||||||
|
function makeContentBlockStart(
|
||||||
|
index: number,
|
||||||
|
type: 'text' | 'tool_use' | 'thinking',
|
||||||
|
extra: Record<string, any> = {},
|
||||||
|
): BetaRawMessageStreamEvent {
|
||||||
|
const block =
|
||||||
|
type === 'text'
|
||||||
|
? { type: 'text', text: '' }
|
||||||
|
: type === 'tool_use'
|
||||||
|
? { type: 'tool_use', id: 'toolu_test', name: 'bash', input: {} }
|
||||||
|
: { type: 'thinking', thinking: '', signature: '' }
|
||||||
|
return {
|
||||||
|
type: 'content_block_start',
|
||||||
|
index,
|
||||||
|
content_block: { ...block, ...extra },
|
||||||
|
} as any
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Build a text_delta content_block_delta event */
|
||||||
|
function makeTextDelta(index: number, text: string): BetaRawMessageStreamEvent {
|
||||||
|
return {
|
||||||
|
type: 'content_block_delta',
|
||||||
|
index,
|
||||||
|
delta: { type: 'text_delta', text },
|
||||||
|
} as any
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Build an input_json_delta content_block_delta event */
|
||||||
|
function makeInputJsonDelta(
|
||||||
|
index: number,
|
||||||
|
json: string,
|
||||||
|
): BetaRawMessageStreamEvent {
|
||||||
|
return {
|
||||||
|
type: 'content_block_delta',
|
||||||
|
index,
|
||||||
|
delta: { type: 'input_json_delta', partial_json: json },
|
||||||
|
} as any
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Build a thinking_delta content_block_delta event */
|
||||||
|
function makeThinkingDelta(
|
||||||
|
index: number,
|
||||||
|
thinking: string,
|
||||||
|
): BetaRawMessageStreamEvent {
|
||||||
|
return {
|
||||||
|
type: 'content_block_delta',
|
||||||
|
index,
|
||||||
|
delta: { type: 'thinking_delta', thinking },
|
||||||
|
} as any
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Build a content_block_stop event */
|
||||||
|
function makeContentBlockStop(index: number): BetaRawMessageStreamEvent {
|
||||||
|
return { type: 'content_block_stop', index } as any
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Build a message_delta event with stop_reason and output_tokens */
|
||||||
|
function makeMessageDelta(
|
||||||
|
stopReason: string,
|
||||||
|
outputTokens: number,
|
||||||
|
): BetaRawMessageStreamEvent {
|
||||||
|
return {
|
||||||
|
type: 'message_delta',
|
||||||
|
delta: { stop_reason: stopReason, stop_sequence: null },
|
||||||
|
usage: { output_tokens: outputTokens },
|
||||||
|
} as any
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Build a message_stop event */
|
||||||
|
function makeMessageStop(): BetaRawMessageStreamEvent {
|
||||||
|
return { type: 'message_stop' } as any
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Async generator from a fixed array of events */
|
||||||
|
async function* eventStream(events: BetaRawMessageStreamEvent[]) {
|
||||||
|
for (const e of events) yield e
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Collect all outputs from queryModelOpenAI into typed buckets */
|
||||||
|
async function runQueryModel(
|
||||||
|
events: BetaRawMessageStreamEvent[],
|
||||||
|
envOverrides: Record<string, string | undefined> = {},
|
||||||
|
) {
|
||||||
|
// Wire events into the mocked stream adapter
|
||||||
|
_nextEvents = events
|
||||||
|
// Save + apply env overrides
|
||||||
|
const saved: Record<string, string | undefined> = {}
|
||||||
|
for (const [k, v] of Object.entries(envOverrides)) {
|
||||||
|
saved[k] = process.env[k]
|
||||||
|
if (v === undefined) delete process.env[k]
|
||||||
|
else process.env[k] = v
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// We inline mock.module inside the try block.
|
||||||
|
// Bun resolves mock.module at the call site synchronously (hoisted),
|
||||||
|
// so we register once per test file, then re-import each time.
|
||||||
|
const { queryModelOpenAI } = await import('../index.js')
|
||||||
|
|
||||||
|
const assistantMessages: AssistantMessage[] = []
|
||||||
|
const streamEvents: StreamEvent[] = []
|
||||||
|
const otherOutputs: any[] = []
|
||||||
|
|
||||||
|
const minimalOptions: any = {
|
||||||
|
model: 'test-model',
|
||||||
|
tools: [],
|
||||||
|
agents: [],
|
||||||
|
querySource: 'main_loop',
|
||||||
|
getToolPermissionContext: async () => ({
|
||||||
|
alwaysAllow: [],
|
||||||
|
alwaysDeny: [],
|
||||||
|
needsPermission: [],
|
||||||
|
mode: 'default',
|
||||||
|
isBypassingPermissions: false,
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
|
||||||
|
for await (const item of queryModelOpenAI(
|
||||||
|
[],
|
||||||
|
{ type: 'text', text: '' } as any,
|
||||||
|
[],
|
||||||
|
new AbortController().signal,
|
||||||
|
minimalOptions,
|
||||||
|
)) {
|
||||||
|
if (item.type === 'assistant') {
|
||||||
|
assistantMessages.push(item as AssistantMessage)
|
||||||
|
} else if (item.type === 'stream_event') {
|
||||||
|
streamEvents.push(item as StreamEvent)
|
||||||
|
} else {
|
||||||
|
otherOutputs.push(item)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return { assistantMessages, streamEvents, otherOutputs }
|
||||||
|
} finally {
|
||||||
|
// Restore env
|
||||||
|
for (const [k, v] of Object.entries(saved)) {
|
||||||
|
if (v === undefined) delete process.env[k]
|
||||||
|
else process.env[k] = v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── mock setup ──────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
// We mock at module level. Bun's mock.module replaces the module for the
|
||||||
|
// entire file, so we configure the stream per-test via a shared variable.
|
||||||
|
let _nextEvents: BetaRawMessageStreamEvent[] = []
|
||||||
|
|
||||||
|
/** Captured arguments from the last chat.completions.create() call */
|
||||||
|
let _lastCreateArgs: Record<string, any> | null = null
|
||||||
|
|
||||||
|
mock.module('../client.js', () => ({
|
||||||
|
getOpenAIClient: () => ({
|
||||||
|
chat: {
|
||||||
|
completions: {
|
||||||
|
create: async (args: Record<string, any>) => {
|
||||||
|
_lastCreateArgs = args
|
||||||
|
return { [Symbol.asyncIterator]: async function* () {} }
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
}))
|
||||||
|
|
||||||
|
mock.module('../streamAdapter.js', () => ({
|
||||||
|
adaptOpenAIStreamToAnthropic: (_stream: any, _model: string) =>
|
||||||
|
eventStream(_nextEvents),
|
||||||
|
}))
|
||||||
|
|
||||||
|
mock.module('../modelMapping.js', () => ({
|
||||||
|
resolveOpenAIModel: (m: string) => m,
|
||||||
|
}))
|
||||||
|
|
||||||
|
mock.module('../convertMessages.js', () => ({
|
||||||
|
anthropicMessagesToOpenAI: () => [],
|
||||||
|
}))
|
||||||
|
|
||||||
|
mock.module('../convertTools.js', () => ({
|
||||||
|
anthropicToolsToOpenAI: () => [],
|
||||||
|
anthropicToolChoiceToOpenAI: () => undefined,
|
||||||
|
}))
|
||||||
|
|
||||||
|
mock.module('../../../../utils/context.js', () => ({
|
||||||
|
MODEL_CONTEXT_WINDOW_DEFAULT: 200_000,
|
||||||
|
COMPACT_MAX_OUTPUT_TOKENS: 20_000,
|
||||||
|
CAPPED_DEFAULT_MAX_TOKENS: 8_000,
|
||||||
|
ESCALATED_MAX_TOKENS: 64_000,
|
||||||
|
is1mContextDisabled: () => false,
|
||||||
|
has1mContext: () => false,
|
||||||
|
modelSupports1M: () => false,
|
||||||
|
getModelMaxOutputTokens: () => ({ upperLimit: 8192, default: 8192 }),
|
||||||
|
getContextWindowForModel: () => 200_000,
|
||||||
|
getSonnet1mExpTreatmentEnabled: () => false,
|
||||||
|
calculateContextPercentages: () => ({
|
||||||
|
usedPercent: 0,
|
||||||
|
remainingPercent: 100,
|
||||||
|
}),
|
||||||
|
getMaxThinkingTokensForModel: () => 0,
|
||||||
|
}))
|
||||||
|
|
||||||
|
mock.module('../../../../utils/messages.js', () => ({
|
||||||
|
normalizeMessagesForAPI: (msgs: any) => msgs,
|
||||||
|
normalizeContentFromAPI: (blocks: any[]) => blocks,
|
||||||
|
createAssistantAPIErrorMessage: (opts: any) => ({
|
||||||
|
type: 'assistant',
|
||||||
|
message: {
|
||||||
|
content: [{ type: 'text', text: opts.content }],
|
||||||
|
apiError: opts.apiError,
|
||||||
|
},
|
||||||
|
uuid: 'error-uuid',
|
||||||
|
timestamp: new Date().toISOString(),
|
||||||
|
}),
|
||||||
|
}))
|
||||||
|
|
||||||
|
mock.module('../../../../utils/api.js', () => ({
|
||||||
|
toolToAPISchema: async (t: any) => t,
|
||||||
|
}))
|
||||||
|
|
||||||
|
mock.module('../../../../utils/toolSearch.js', () => ({
|
||||||
|
isToolSearchEnabled: async () => false,
|
||||||
|
extractDiscoveredToolNames: () => new Set(),
|
||||||
|
}))
|
||||||
|
|
||||||
|
mock.module('../../../../tools/ToolSearchTool/prompt.js', () => ({
|
||||||
|
isDeferredTool: () => false,
|
||||||
|
TOOL_SEARCH_TOOL_NAME: '__tool_search__',
|
||||||
|
}))
|
||||||
|
|
||||||
|
mock.module('../../../../cost-tracker.js', () => ({
|
||||||
|
addToTotalSessionCost: () => {},
|
||||||
|
}))
|
||||||
|
|
||||||
|
mock.module('../../../../utils/modelCost.js', () => ({
|
||||||
|
COST_TIER_3_15: {},
|
||||||
|
COST_TIER_15_75: {},
|
||||||
|
COST_TIER_5_25: {},
|
||||||
|
COST_TIER_30_150: {},
|
||||||
|
COST_HAIKU_35: {},
|
||||||
|
COST_HAIKU_45: {},
|
||||||
|
getOpus46CostTier: () => ({}),
|
||||||
|
MODEL_COSTS: {},
|
||||||
|
getModelCosts: () => ({}),
|
||||||
|
calculateUSDCost: () => 0,
|
||||||
|
calculateCostFromTokens: () => 0,
|
||||||
|
formatModelPricing: () => '',
|
||||||
|
getModelPricingString: () => undefined,
|
||||||
|
}))
|
||||||
|
|
||||||
|
mock.module('../../../../utils/debug.js', () => ({
|
||||||
|
logForDebugging: () => {},
|
||||||
|
logAntError: () => {},
|
||||||
|
isDebugMode: () => false,
|
||||||
|
isDebugToStdErr: () => false,
|
||||||
|
getDebugFilePath: () => null,
|
||||||
|
getDebugLogPath: () => '',
|
||||||
|
getDebugFilter: () => null,
|
||||||
|
getMinDebugLogLevel: () => 'debug',
|
||||||
|
enableDebugLogging: () => false,
|
||||||
|
setHasFormattedOutput: () => {},
|
||||||
|
getHasFormattedOutput: () => false,
|
||||||
|
flushDebugLogs: async () => {},
|
||||||
|
}))
|
||||||
|
|
||||||
|
// ─── tests ───────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
describe('queryModelOpenAI — stop_reason propagation', () => {
|
||||||
|
test('assembled AssistantMessage has stop_reason end_turn (not null)', async () => {
|
||||||
|
_nextEvents = [
|
||||||
|
makeMessageStart(),
|
||||||
|
makeContentBlockStart(0, 'text'),
|
||||||
|
makeTextDelta(0, 'Hello'),
|
||||||
|
makeContentBlockStop(0),
|
||||||
|
makeMessageDelta('end_turn', 10),
|
||||||
|
makeMessageStop(),
|
||||||
|
]
|
||||||
|
|
||||||
|
const { assistantMessages } = await runQueryModel(_nextEvents)
|
||||||
|
|
||||||
|
expect(assistantMessages).toHaveLength(1)
|
||||||
|
expect(assistantMessages[0]!.message.stop_reason).toBe('end_turn')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('assembled AssistantMessage has stop_reason tool_use', async () => {
|
||||||
|
_nextEvents = [
|
||||||
|
makeMessageStart(),
|
||||||
|
makeContentBlockStart(0, 'tool_use'),
|
||||||
|
makeInputJsonDelta(0, '{"cmd":"ls"}'),
|
||||||
|
makeContentBlockStop(0),
|
||||||
|
makeMessageDelta('tool_use', 20),
|
||||||
|
makeMessageStop(),
|
||||||
|
]
|
||||||
|
|
||||||
|
const { assistantMessages } = await runQueryModel(_nextEvents)
|
||||||
|
|
||||||
|
expect(assistantMessages).toHaveLength(1)
|
||||||
|
expect(assistantMessages[0]!.message.stop_reason).toBe('tool_use')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('assembled AssistantMessage has stop_reason max_tokens', async () => {
|
||||||
|
_nextEvents = [
|
||||||
|
makeMessageStart(),
|
||||||
|
makeContentBlockStart(0, 'text'),
|
||||||
|
makeTextDelta(0, 'truncated'),
|
||||||
|
makeContentBlockStop(0),
|
||||||
|
makeMessageDelta('max_tokens', 8192),
|
||||||
|
makeMessageStop(),
|
||||||
|
]
|
||||||
|
|
||||||
|
const { assistantMessages } = await runQueryModel(_nextEvents)
|
||||||
|
|
||||||
|
// Two assistant-typed items: the content message + the max_output_tokens error signal.
|
||||||
|
// The error signal is emitted as a synthetic assistant message by createAssistantAPIErrorMessage.
|
||||||
|
expect(assistantMessages).toHaveLength(2)
|
||||||
|
const contentMsg = assistantMessages[0]!
|
||||||
|
expect(contentMsg.message.stop_reason).toBe('max_tokens')
|
||||||
|
// Second item is the error signal (has apiError set)
|
||||||
|
const errorMsg = assistantMessages[1]!.message as any
|
||||||
|
expect(errorMsg.apiError).toBe('max_output_tokens')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('stop_reason is null when no message_delta was received (safety fallback path)', async () => {
|
||||||
|
// Stream ends without message_stop — triggers the safety fallback branch.
|
||||||
|
// stop_reason stays null since no message_delta was ever seen.
|
||||||
|
_nextEvents = [
|
||||||
|
makeMessageStart(),
|
||||||
|
makeContentBlockStart(0, 'text'),
|
||||||
|
makeTextDelta(0, 'partial'),
|
||||||
|
makeContentBlockStop(0),
|
||||||
|
// No message_delta / message_stop
|
||||||
|
]
|
||||||
|
|
||||||
|
const { assistantMessages } = await runQueryModel(_nextEvents)
|
||||||
|
|
||||||
|
// Safety fallback should yield the partial content
|
||||||
|
expect(assistantMessages).toHaveLength(1)
|
||||||
|
expect(assistantMessages[0]!.message.stop_reason).toBeNull()
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('queryModelOpenAI — usage accumulation', () => {
|
||||||
|
test('usage in assembled message reflects all four fields from message_delta', async () => {
|
||||||
|
// message_start has all fields=0 (trailing-chunk pattern: usage not yet available).
|
||||||
|
// message_delta carries the real values after stream ends.
|
||||||
|
// The spread in the message_delta handler must override all zeros from message_start,
|
||||||
|
// including cache_read_input_tokens which was previously missing from message_delta.
|
||||||
|
_nextEvents = [
|
||||||
|
makeMessageStart({
|
||||||
|
usage: {
|
||||||
|
input_tokens: 0,
|
||||||
|
output_tokens: 0,
|
||||||
|
cache_creation_input_tokens: 0,
|
||||||
|
cache_read_input_tokens: 0,
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
makeContentBlockStart(0, 'text'),
|
||||||
|
makeTextDelta(0, 'response'),
|
||||||
|
makeContentBlockStop(0),
|
||||||
|
// message_delta carries all four Anthropic usage fields (as emitted by the fixed streamAdapter)
|
||||||
|
{
|
||||||
|
type: 'message_delta',
|
||||||
|
delta: { stop_reason: 'end_turn', stop_sequence: null },
|
||||||
|
usage: {
|
||||||
|
input_tokens: 30011,
|
||||||
|
output_tokens: 190,
|
||||||
|
cache_read_input_tokens: 19904,
|
||||||
|
cache_creation_input_tokens: 0,
|
||||||
|
},
|
||||||
|
} as any,
|
||||||
|
makeMessageStop(),
|
||||||
|
]
|
||||||
|
|
||||||
|
const { assistantMessages } = await runQueryModel(_nextEvents)
|
||||||
|
|
||||||
|
expect(assistantMessages).toHaveLength(1)
|
||||||
|
const usage = assistantMessages[0]!.message.usage as any
|
||||||
|
expect(usage.input_tokens).toBe(30011)
|
||||||
|
expect(usage.output_tokens).toBe(190)
|
||||||
|
// cache_read_input_tokens from message_delta overrides the 0 from message_start
|
||||||
|
expect(usage.cache_read_input_tokens).toBe(19904)
|
||||||
|
expect(usage.cache_creation_input_tokens).toBe(0)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('usage is zero when no usage events arrive (prevents false autocompact)', async () => {
|
||||||
|
// If usage stays 0, tokenCountWithEstimation will undercount — so at least
|
||||||
|
// verify the field exists and is numeric (to detect regressions).
|
||||||
|
_nextEvents = [
|
||||||
|
makeMessageStart(),
|
||||||
|
makeContentBlockStart(0, 'text'),
|
||||||
|
makeTextDelta(0, 'hi'),
|
||||||
|
makeContentBlockStop(0),
|
||||||
|
makeMessageDelta('end_turn', 0),
|
||||||
|
makeMessageStop(),
|
||||||
|
]
|
||||||
|
|
||||||
|
const { assistantMessages } = await runQueryModel(_nextEvents)
|
||||||
|
|
||||||
|
const usage = assistantMessages[0]!.message.usage as any
|
||||||
|
expect(typeof usage.input_tokens).toBe('number')
|
||||||
|
expect(typeof usage.output_tokens).toBe('number')
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('queryModelOpenAI — no duplicate AssistantMessage (partialMessage reset)', () => {
|
||||||
|
test('yields exactly one AssistantMessage per message_stop when content is present', async () => {
|
||||||
|
_nextEvents = [
|
||||||
|
makeMessageStart(),
|
||||||
|
makeContentBlockStart(0, 'text'),
|
||||||
|
makeTextDelta(0, 'only once'),
|
||||||
|
makeContentBlockStop(0),
|
||||||
|
makeMessageDelta('end_turn', 5),
|
||||||
|
makeMessageStop(),
|
||||||
|
]
|
||||||
|
|
||||||
|
const { assistantMessages } = await runQueryModel(_nextEvents)
|
||||||
|
|
||||||
|
// Before the fix, partialMessage was not reset to null, so the safety
|
||||||
|
// fallback at the end of the loop would yield a second message with the
|
||||||
|
// same message.id — causing mergeAssistantMessages to concatenate content.
|
||||||
|
expect(assistantMessages).toHaveLength(1)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('thinking + text response yields exactly one AssistantMessage', async () => {
|
||||||
|
_nextEvents = [
|
||||||
|
makeMessageStart(),
|
||||||
|
makeContentBlockStart(0, 'thinking'),
|
||||||
|
makeThinkingDelta(0, 'let me think'),
|
||||||
|
makeContentBlockStop(0),
|
||||||
|
makeContentBlockStart(1, 'text'),
|
||||||
|
makeTextDelta(1, 'answer'),
|
||||||
|
makeContentBlockStop(1),
|
||||||
|
makeMessageDelta('end_turn', 30),
|
||||||
|
makeMessageStop(),
|
||||||
|
]
|
||||||
|
|
||||||
|
const { assistantMessages } = await runQueryModel(_nextEvents)
|
||||||
|
|
||||||
|
expect(assistantMessages).toHaveLength(1)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('safety fallback path still yields message when stream ends without message_stop', async () => {
|
||||||
|
// Simulates a stream that cuts off without the normal termination sequence.
|
||||||
|
_nextEvents = [
|
||||||
|
makeMessageStart(),
|
||||||
|
makeContentBlockStart(0, 'text'),
|
||||||
|
makeTextDelta(0, 'abrupt end'),
|
||||||
|
// No content_block_stop, no message_delta, no message_stop
|
||||||
|
]
|
||||||
|
|
||||||
|
const { assistantMessages } = await runQueryModel(_nextEvents)
|
||||||
|
|
||||||
|
expect(assistantMessages).toHaveLength(1)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('queryModelOpenAI — stream_events forwarded', () => {
|
||||||
|
test('every adapted event is also yielded as stream_event for real-time display', async () => {
|
||||||
|
_nextEvents = [
|
||||||
|
makeMessageStart(),
|
||||||
|
makeContentBlockStart(0, 'text'),
|
||||||
|
makeTextDelta(0, 'hello'),
|
||||||
|
makeContentBlockStop(0),
|
||||||
|
makeMessageDelta('end_turn', 5),
|
||||||
|
makeMessageStop(),
|
||||||
|
]
|
||||||
|
|
||||||
|
const { streamEvents } = await runQueryModel(_nextEvents)
|
||||||
|
|
||||||
|
const eventTypes = streamEvents.map(e => (e as any).event?.type)
|
||||||
|
expect(eventTypes).toContain('message_start')
|
||||||
|
expect(eventTypes).toContain('content_block_start')
|
||||||
|
expect(eventTypes).toContain('content_block_delta')
|
||||||
|
expect(eventTypes).toContain('content_block_stop')
|
||||||
|
expect(eventTypes).toContain('message_delta')
|
||||||
|
expect(eventTypes).toContain('message_stop')
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('queryModelOpenAI — max_tokens forwarded to request', () => {
|
||||||
|
test('buildOpenAIRequestBody includes max_tokens in the request payload', async () => {
|
||||||
|
_nextEvents = [
|
||||||
|
makeMessageStart(),
|
||||||
|
makeContentBlockStart(0, 'text'),
|
||||||
|
makeTextDelta(0, 'hi'),
|
||||||
|
makeContentBlockStop(0),
|
||||||
|
makeMessageDelta('end_turn', 5),
|
||||||
|
makeMessageStop(),
|
||||||
|
]
|
||||||
|
|
||||||
|
await runQueryModel(_nextEvents)
|
||||||
|
|
||||||
|
expect(_lastCreateArgs).not.toBeNull()
|
||||||
|
expect(_lastCreateArgs!.max_tokens).toBe(8192)
|
||||||
|
})
|
||||||
|
})
|
||||||
@@ -1,4 +1,6 @@
|
|||||||
import OpenAI from 'openai'
|
import OpenAI from 'openai'
|
||||||
|
import { openaiAdapter } from 'src/services/providerUsage/adapters/openai.js'
|
||||||
|
import { updateProviderBuckets } from 'src/services/providerUsage/store.js'
|
||||||
import { getProxyFetchOptions } from 'src/utils/proxy.js'
|
import { getProxyFetchOptions } from 'src/utils/proxy.js'
|
||||||
import { isEnvTruthy } from 'src/utils/envUtils.js'
|
import { isEnvTruthy } from 'src/utils/envUtils.js'
|
||||||
|
|
||||||
@@ -13,6 +15,28 @@ import { isEnvTruthy } from 'src/utils/envUtils.js'
|
|||||||
|
|
||||||
let cachedClient: OpenAI | null = null
|
let cachedClient: OpenAI | null = null
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wrap a fetch so that every response's rate-limit headers are fed into the
|
||||||
|
* provider usage store. Errors in parsing must never break the request.
|
||||||
|
*
|
||||||
|
* The cast to `typeof fetch` is safe: OpenAI SDK only calls the function form,
|
||||||
|
* not the static `preconnect` method that Bun/Node's `fetch` type declares.
|
||||||
|
*/
|
||||||
|
function wrapFetchForUsage(base: typeof fetch): typeof fetch {
|
||||||
|
const wrapped = async (
|
||||||
|
...args: Parameters<typeof fetch>
|
||||||
|
): Promise<Response> => {
|
||||||
|
const res = await base(...args)
|
||||||
|
try {
|
||||||
|
updateProviderBuckets('openai', openaiAdapter.parseHeaders(res.headers))
|
||||||
|
} catch {
|
||||||
|
// Ignore — usage tracking must not affect the request path.
|
||||||
|
}
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
return wrapped as unknown as typeof fetch
|
||||||
|
}
|
||||||
|
|
||||||
export function getOpenAIClient(options?: {
|
export function getOpenAIClient(options?: {
|
||||||
maxRetries?: number
|
maxRetries?: number
|
||||||
fetchOverride?: typeof fetch
|
fetchOverride?: typeof fetch
|
||||||
@@ -23,6 +47,9 @@ export function getOpenAIClient(options?: {
|
|||||||
const apiKey = process.env.OPENAI_API_KEY || ''
|
const apiKey = process.env.OPENAI_API_KEY || ''
|
||||||
const baseURL = process.env.OPENAI_BASE_URL
|
const baseURL = process.env.OPENAI_BASE_URL
|
||||||
|
|
||||||
|
const baseFetch = options?.fetchOverride ?? (globalThis.fetch as typeof fetch)
|
||||||
|
const wrappedFetch = wrapFetchForUsage(baseFetch)
|
||||||
|
|
||||||
const client = new OpenAI({
|
const client = new OpenAI({
|
||||||
apiKey,
|
apiKey,
|
||||||
...(baseURL && { baseURL }),
|
...(baseURL && { baseURL }),
|
||||||
@@ -32,7 +59,7 @@ export function getOpenAIClient(options?: {
|
|||||||
...(process.env.OPENAI_ORG_ID && { organization: process.env.OPENAI_ORG_ID }),
|
...(process.env.OPENAI_ORG_ID && { organization: process.env.OPENAI_ORG_ID }),
|
||||||
...(process.env.OPENAI_PROJECT_ID && { project: process.env.OPENAI_PROJECT_ID }),
|
...(process.env.OPENAI_PROJECT_ID && { project: process.env.OPENAI_PROJECT_ID }),
|
||||||
fetchOptions: getProxyFetchOptions({ forAnthropicAPI: false }),
|
fetchOptions: getProxyFetchOptions({ forAnthropicAPI: false }),
|
||||||
...(options?.fetchOverride && { fetch: options.fetchOverride }),
|
fetch: wrappedFetch,
|
||||||
})
|
})
|
||||||
|
|
||||||
if (!options?.fetchOverride) {
|
if (!options?.fetchOverride) {
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
// Auto-generated type stub — replace with real implementation
|
// Auto-generated type stub — replace with real implementation
|
||||||
export type EffortValue = 'low' | 'medium' | 'high' | 'max' | number;
|
export type EffortValue = 'low' | 'medium' | 'high' | 'xhigh' | 'max' | number
|
||||||
export type modelSupportsEffort = (model: string) => boolean;
|
export type modelSupportsEffort = (model: string) => boolean
|
||||||
export type EffortLevel = 'low' | 'medium' | 'high' | 'max';
|
export type EffortLevel = 'low' | 'medium' | 'high' | 'xhigh' | 'max'
|
||||||
|
|||||||
@@ -12,6 +12,8 @@ import type { AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS } from
|
|||||||
import { logEvent } from './analytics/index.js'
|
import { logEvent } from './analytics/index.js'
|
||||||
import { getAPIMetadata } from './api/claude.js'
|
import { getAPIMetadata } from './api/claude.js'
|
||||||
import { getAnthropicClient } from './api/client.js'
|
import { getAnthropicClient } from './api/client.js'
|
||||||
|
import { anthropicAdapter } from './providerUsage/adapters/anthropic.js'
|
||||||
|
import { updateProviderBuckets } from './providerUsage/store.js'
|
||||||
import {
|
import {
|
||||||
processRateLimitHeaders,
|
processRateLimitHeaders,
|
||||||
shouldProcessRateLimits,
|
shouldProcessRateLimits,
|
||||||
@@ -205,7 +207,6 @@ async function makeTestQuery() {
|
|||||||
})
|
})
|
||||||
const messages: MessageParam[] = [{ role: 'user', content: 'quota' }]
|
const messages: MessageParam[] = [{ role: 'user', content: 'quota' }]
|
||||||
const betas = getModelBetas(model)
|
const betas = getModelBetas(model)
|
||||||
// biome-ignore lint/plugin: quota check needs raw response access via asResponse()
|
|
||||||
return anthropic.beta.messages
|
return anthropic.beta.messages
|
||||||
.create({
|
.create({
|
||||||
model,
|
model,
|
||||||
@@ -460,6 +461,7 @@ export function extractQuotaStatusFromHeaders(
|
|||||||
if (!shouldProcessRateLimits(isSubscriber)) {
|
if (!shouldProcessRateLimits(isSubscriber)) {
|
||||||
// If we have any rate limit state, clear it
|
// If we have any rate limit state, clear it
|
||||||
rawUtilization = {}
|
rawUtilization = {}
|
||||||
|
updateProviderBuckets('anthropic', [])
|
||||||
if (currentLimits.status !== 'allowed' || currentLimits.resetsAt) {
|
if (currentLimits.status !== 'allowed' || currentLimits.resetsAt) {
|
||||||
const defaultLimits: ClaudeAILimits = {
|
const defaultLimits: ClaudeAILimits = {
|
||||||
status: 'allowed',
|
status: 'allowed',
|
||||||
@@ -474,6 +476,10 @@ export function extractQuotaStatusFromHeaders(
|
|||||||
// Process headers (applies mocks from /mock-limits command if active)
|
// Process headers (applies mocks from /mock-limits command if active)
|
||||||
const headersToUse = processRateLimitHeaders(headers)
|
const headersToUse = processRateLimitHeaders(headers)
|
||||||
rawUtilization = extractRawUtilization(headersToUse)
|
rawUtilization = extractRawUtilization(headersToUse)
|
||||||
|
updateProviderBuckets(
|
||||||
|
'anthropic',
|
||||||
|
anthropicAdapter.parseHeaders(headersToUse),
|
||||||
|
)
|
||||||
const newLimits = computeNewLimitsFromHeaders(headersToUse)
|
const newLimits = computeNewLimitsFromHeaders(headersToUse)
|
||||||
|
|
||||||
// Cache extra usage status (persists across sessions)
|
// Cache extra usage status (persists across sessions)
|
||||||
@@ -498,6 +504,10 @@ export function extractQuotaStatusFromError(error: APIError): void {
|
|||||||
// Process headers (applies mocks from /mock-limits command if active)
|
// Process headers (applies mocks from /mock-limits command if active)
|
||||||
const headersToUse = processRateLimitHeaders(error.headers)
|
const headersToUse = processRateLimitHeaders(error.headers)
|
||||||
rawUtilization = extractRawUtilization(headersToUse)
|
rawUtilization = extractRawUtilization(headersToUse)
|
||||||
|
updateProviderBuckets(
|
||||||
|
'anthropic',
|
||||||
|
anthropicAdapter.parseHeaders(headersToUse),
|
||||||
|
)
|
||||||
newLimits = computeNewLimitsFromHeaders(headersToUse)
|
newLimits = computeNewLimitsFromHeaders(headersToUse)
|
||||||
|
|
||||||
// Cache extra usage status (persists across sessions)
|
// Cache extra usage status (persists across sessions)
|
||||||
|
|||||||
@@ -1,23 +1,23 @@
|
|||||||
import { describe, expect, test, beforeEach, afterEach, mock } from "bun:test";
|
import { describe, expect, test, beforeEach, afterEach, mock } from 'bun:test'
|
||||||
|
|
||||||
// Mock heavy dependencies to avoid import chain issues
|
// Mock heavy dependencies to avoid import chain issues
|
||||||
mock.module("src/utils/thinking.js", () => ({
|
mock.module('src/utils/thinking.js', () => ({
|
||||||
isUltrathinkEnabled: () => false,
|
isUltrathinkEnabled: () => false,
|
||||||
}));
|
}))
|
||||||
mock.module("src/utils/settings/settings.js", () => ({
|
mock.module('src/utils/settings/settings.js', () => ({
|
||||||
getInitialSettings: () => ({}),
|
getInitialSettings: () => ({}),
|
||||||
}));
|
}))
|
||||||
mock.module("src/utils/auth.js", () => ({
|
mock.module('src/utils/auth.js', () => ({
|
||||||
isProSubscriber: () => false,
|
isProSubscriber: () => false,
|
||||||
isMaxSubscriber: () => false,
|
isMaxSubscriber: () => false,
|
||||||
isTeamSubscriber: () => false,
|
isTeamSubscriber: () => false,
|
||||||
}));
|
}))
|
||||||
mock.module("src/services/analytics/growthbook.js", () => ({
|
mock.module('src/services/analytics/growthbook.js', () => ({
|
||||||
getFeatureValue_CACHED_MAY_BE_STALE: () => null,
|
getFeatureValue_CACHED_MAY_BE_STALE: (_key: string, defaultValue: unknown) => defaultValue ?? {},
|
||||||
}));
|
}))
|
||||||
mock.module("src/utils/model/modelSupportOverrides.js", () => ({
|
mock.module('src/utils/model/modelSupportOverrides.js', () => ({
|
||||||
get3PModelCapabilityOverride: () => undefined,
|
get3PModelCapabilityOverride: () => undefined,
|
||||||
}));
|
}))
|
||||||
|
|
||||||
const {
|
const {
|
||||||
isEffortLevel,
|
isEffortLevel,
|
||||||
@@ -27,229 +27,249 @@ const {
|
|||||||
getEffortLevelDescription,
|
getEffortLevelDescription,
|
||||||
resolvePickerEffortPersistence,
|
resolvePickerEffortPersistence,
|
||||||
EFFORT_LEVELS,
|
EFFORT_LEVELS,
|
||||||
} = await import("src/utils/effort.js");
|
} = await import('src/utils/effort.js')
|
||||||
|
|
||||||
// ─── EFFORT_LEVELS constant ────────────────────────────────────────────
|
// ─── EFFORT_LEVELS constant ────────────────────────────────────────────
|
||||||
|
|
||||||
describe("EFFORT_LEVELS", () => {
|
describe('EFFORT_LEVELS', () => {
|
||||||
test("contains the four canonical levels", () => {
|
test('contains the five canonical levels', () => {
|
||||||
expect(EFFORT_LEVELS).toEqual(["low", "medium", "high", "max"]);
|
expect(EFFORT_LEVELS).toEqual(['low', 'medium', 'high', 'xhigh', 'max'])
|
||||||
});
|
})
|
||||||
});
|
})
|
||||||
|
|
||||||
// ─── isEffortLevel ─────────────────────────────────────────────────────
|
// ─── isEffortLevel ─────────────────────────────────────────────────────
|
||||||
|
|
||||||
describe("isEffortLevel", () => {
|
describe('isEffortLevel', () => {
|
||||||
test("returns true for 'low'", () => {
|
test("returns true for 'low'", () => {
|
||||||
expect(isEffortLevel("low")).toBe(true);
|
expect(isEffortLevel('low')).toBe(true)
|
||||||
});
|
})
|
||||||
|
|
||||||
test("returns true for 'medium'", () => {
|
test("returns true for 'medium'", () => {
|
||||||
expect(isEffortLevel("medium")).toBe(true);
|
expect(isEffortLevel('medium')).toBe(true)
|
||||||
});
|
})
|
||||||
|
|
||||||
test("returns true for 'high'", () => {
|
test("returns true for 'high'", () => {
|
||||||
expect(isEffortLevel("high")).toBe(true);
|
expect(isEffortLevel('high')).toBe(true)
|
||||||
});
|
})
|
||||||
|
|
||||||
test("returns true for 'max'", () => {
|
test("returns true for 'max'", () => {
|
||||||
expect(isEffortLevel("max")).toBe(true);
|
expect(isEffortLevel('max')).toBe(true)
|
||||||
});
|
})
|
||||||
|
|
||||||
test("returns false for 'invalid'", () => {
|
test("returns false for 'invalid'", () => {
|
||||||
expect(isEffortLevel("invalid")).toBe(false);
|
expect(isEffortLevel('invalid')).toBe(false)
|
||||||
});
|
})
|
||||||
|
|
||||||
test("returns false for empty string", () => {
|
test('returns false for empty string', () => {
|
||||||
expect(isEffortLevel("")).toBe(false);
|
expect(isEffortLevel('')).toBe(false)
|
||||||
});
|
})
|
||||||
});
|
})
|
||||||
|
|
||||||
// ─── parseEffortValue ──────────────────────────────────────────────────
|
// ─── parseEffortValue ──────────────────────────────────────────────────
|
||||||
|
|
||||||
describe("parseEffortValue", () => {
|
describe('parseEffortValue', () => {
|
||||||
test("returns undefined for undefined", () => {
|
test('returns undefined for undefined', () => {
|
||||||
expect(parseEffortValue(undefined)).toBeUndefined();
|
expect(parseEffortValue(undefined)).toBeUndefined()
|
||||||
});
|
})
|
||||||
|
|
||||||
test("returns undefined for null", () => {
|
test('returns undefined for null', () => {
|
||||||
expect(parseEffortValue(null)).toBeUndefined();
|
expect(parseEffortValue(null)).toBeUndefined()
|
||||||
});
|
})
|
||||||
|
|
||||||
test("returns undefined for empty string", () => {
|
test('returns undefined for empty string', () => {
|
||||||
expect(parseEffortValue("")).toBeUndefined();
|
expect(parseEffortValue('')).toBeUndefined()
|
||||||
});
|
})
|
||||||
|
|
||||||
test("returns number for integer input", () => {
|
test('returns number for integer input', () => {
|
||||||
expect(parseEffortValue(42)).toBe(42);
|
expect(parseEffortValue(42)).toBe(42)
|
||||||
});
|
})
|
||||||
|
|
||||||
test("returns string for valid effort level string", () => {
|
test('returns string for valid effort level string', () => {
|
||||||
expect(parseEffortValue("low")).toBe("low");
|
expect(parseEffortValue('low')).toBe('low')
|
||||||
expect(parseEffortValue("medium")).toBe("medium");
|
expect(parseEffortValue('medium')).toBe('medium')
|
||||||
expect(parseEffortValue("high")).toBe("high");
|
expect(parseEffortValue('high')).toBe('high')
|
||||||
expect(parseEffortValue("max")).toBe("max");
|
expect(parseEffortValue('max')).toBe('max')
|
||||||
});
|
})
|
||||||
|
|
||||||
test("parses numeric string to number", () => {
|
test('parses numeric string to number', () => {
|
||||||
expect(parseEffortValue("42")).toBe(42);
|
expect(parseEffortValue('42')).toBe(42)
|
||||||
});
|
})
|
||||||
|
|
||||||
test("returns undefined for invalid string", () => {
|
test('returns undefined for invalid string', () => {
|
||||||
expect(parseEffortValue("invalid")).toBeUndefined();
|
expect(parseEffortValue('invalid')).toBeUndefined()
|
||||||
});
|
})
|
||||||
|
|
||||||
test("non-integer number falls through to string parsing (parseInt truncates)", () => {
|
test('non-integer number falls through to string parsing (parseInt truncates)', () => {
|
||||||
// 3.14 fails isValidNumericEffort, then String(3.14) -> "3.14" -> parseInt = 3
|
// 3.14 fails isValidNumericEffort, then String(3.14) -> "3.14" -> parseInt = 3
|
||||||
expect(parseEffortValue(3.14)).toBe(3);
|
expect(parseEffortValue(3.14)).toBe(3)
|
||||||
});
|
})
|
||||||
|
|
||||||
test("handles case-insensitive effort level strings", () => {
|
test('handles case-insensitive effort level strings', () => {
|
||||||
expect(parseEffortValue("LOW")).toBe("low");
|
expect(parseEffortValue('LOW')).toBe('low')
|
||||||
expect(parseEffortValue("HIGH")).toBe("high");
|
expect(parseEffortValue('HIGH')).toBe('high')
|
||||||
});
|
})
|
||||||
});
|
})
|
||||||
|
|
||||||
// ─── isValidNumericEffort ──────────────────────────────────────────────
|
// ─── isValidNumericEffort ──────────────────────────────────────────────
|
||||||
|
|
||||||
describe("isValidNumericEffort", () => {
|
describe('isValidNumericEffort', () => {
|
||||||
test("returns true for integer", () => {
|
test('returns true for integer', () => {
|
||||||
expect(isValidNumericEffort(50)).toBe(true);
|
expect(isValidNumericEffort(50)).toBe(true)
|
||||||
});
|
})
|
||||||
|
|
||||||
test("returns true for zero", () => {
|
test('returns true for zero', () => {
|
||||||
expect(isValidNumericEffort(0)).toBe(true);
|
expect(isValidNumericEffort(0)).toBe(true)
|
||||||
});
|
})
|
||||||
|
|
||||||
test("returns true for negative integer", () => {
|
test('returns true for negative integer', () => {
|
||||||
expect(isValidNumericEffort(-1)).toBe(true);
|
expect(isValidNumericEffort(-1)).toBe(true)
|
||||||
});
|
})
|
||||||
|
|
||||||
test("returns false for float", () => {
|
test('returns false for float', () => {
|
||||||
expect(isValidNumericEffort(3.14)).toBe(false);
|
expect(isValidNumericEffort(3.14)).toBe(false)
|
||||||
});
|
})
|
||||||
|
|
||||||
test("returns false for NaN", () => {
|
test('returns false for NaN', () => {
|
||||||
expect(isValidNumericEffort(NaN)).toBe(false);
|
expect(isValidNumericEffort(NaN)).toBe(false)
|
||||||
});
|
})
|
||||||
|
|
||||||
test("returns false for Infinity", () => {
|
test('returns false for Infinity', () => {
|
||||||
expect(isValidNumericEffort(Infinity)).toBe(false);
|
expect(isValidNumericEffort(Infinity)).toBe(false)
|
||||||
});
|
})
|
||||||
});
|
})
|
||||||
|
|
||||||
// ─── convertEffortValueToLevel ─────────────────────────────────────────
|
// ─── convertEffortValueToLevel ─────────────────────────────────────────
|
||||||
|
|
||||||
describe("convertEffortValueToLevel", () => {
|
describe('convertEffortValueToLevel', () => {
|
||||||
test("returns valid effort level string as-is", () => {
|
test('returns valid effort level string as-is', () => {
|
||||||
expect(convertEffortValueToLevel("low")).toBe("low");
|
expect(convertEffortValueToLevel('low')).toBe('low')
|
||||||
expect(convertEffortValueToLevel("medium")).toBe("medium");
|
expect(convertEffortValueToLevel('medium')).toBe('medium')
|
||||||
expect(convertEffortValueToLevel("high")).toBe("high");
|
expect(convertEffortValueToLevel('high')).toBe('high')
|
||||||
expect(convertEffortValueToLevel("max")).toBe("max");
|
expect(convertEffortValueToLevel('max')).toBe('max')
|
||||||
});
|
})
|
||||||
|
|
||||||
test("returns 'high' for unknown string", () => {
|
test("returns 'high' for unknown string", () => {
|
||||||
expect(convertEffortValueToLevel("unknown" as any)).toBe("high");
|
expect(convertEffortValueToLevel('unknown' as any)).toBe('high')
|
||||||
});
|
})
|
||||||
|
|
||||||
test("non-ant numeric value returns 'high'", () => {
|
test("non-ant numeric value returns 'high'", () => {
|
||||||
const saved = process.env.USER_TYPE;
|
const saved = process.env.USER_TYPE
|
||||||
delete process.env.USER_TYPE;
|
delete process.env.USER_TYPE
|
||||||
|
|
||||||
expect(convertEffortValueToLevel(50)).toBe("high");
|
expect(convertEffortValueToLevel(50)).toBe('high')
|
||||||
expect(convertEffortValueToLevel(100)).toBe("high");
|
expect(convertEffortValueToLevel(100)).toBe('high')
|
||||||
|
|
||||||
process.env.USER_TYPE = saved;
|
process.env.USER_TYPE = saved
|
||||||
});
|
})
|
||||||
|
|
||||||
describe("ant numeric mapping", () => {
|
describe('ant numeric mapping', () => {
|
||||||
let savedUserType: string | undefined;
|
let savedUserType: string | undefined
|
||||||
|
|
||||||
beforeEach(() => {
|
beforeEach(() => {
|
||||||
savedUserType = process.env.USER_TYPE;
|
savedUserType = process.env.USER_TYPE
|
||||||
process.env.USER_TYPE = "ant";
|
process.env.USER_TYPE = 'ant'
|
||||||
});
|
})
|
||||||
|
|
||||||
afterEach(() => {
|
afterEach(() => {
|
||||||
if (savedUserType === undefined) {
|
if (savedUserType === undefined) {
|
||||||
delete process.env.USER_TYPE;
|
delete process.env.USER_TYPE
|
||||||
} else {
|
} else {
|
||||||
process.env.USER_TYPE = savedUserType;
|
process.env.USER_TYPE = savedUserType
|
||||||
}
|
}
|
||||||
});
|
})
|
||||||
|
|
||||||
test("value <= 50 maps to 'low'", () => {
|
test("value <= 50 maps to 'low'", () => {
|
||||||
expect(convertEffortValueToLevel(50)).toBe("low");
|
expect(convertEffortValueToLevel(50)).toBe('low')
|
||||||
expect(convertEffortValueToLevel(0)).toBe("low");
|
expect(convertEffortValueToLevel(0)).toBe('low')
|
||||||
expect(convertEffortValueToLevel(-10)).toBe("low");
|
expect(convertEffortValueToLevel(-10)).toBe('low')
|
||||||
});
|
})
|
||||||
|
|
||||||
test("value 51-85 maps to 'medium'", () => {
|
test("value 51-85 maps to 'medium'", () => {
|
||||||
expect(convertEffortValueToLevel(51)).toBe("medium");
|
expect(convertEffortValueToLevel(51)).toBe('medium')
|
||||||
expect(convertEffortValueToLevel(85)).toBe("medium");
|
expect(convertEffortValueToLevel(85)).toBe('medium')
|
||||||
});
|
})
|
||||||
|
|
||||||
test("value 86-100 maps to 'high'", () => {
|
test("value 86-100 maps to 'high'", () => {
|
||||||
expect(convertEffortValueToLevel(86)).toBe("high");
|
expect(convertEffortValueToLevel(86)).toBe('high')
|
||||||
expect(convertEffortValueToLevel(100)).toBe("high");
|
expect(convertEffortValueToLevel(100)).toBe('high')
|
||||||
});
|
})
|
||||||
|
|
||||||
test("value > 100 maps to 'max'", () => {
|
test("value > 100 maps to 'max'", () => {
|
||||||
expect(convertEffortValueToLevel(101)).toBe("max");
|
expect(convertEffortValueToLevel(101)).toBe('max')
|
||||||
expect(convertEffortValueToLevel(200)).toBe("max");
|
expect(convertEffortValueToLevel(200)).toBe('max')
|
||||||
});
|
})
|
||||||
});
|
})
|
||||||
});
|
})
|
||||||
|
|
||||||
// ─── getEffortLevelDescription ─────────────────────────────────────────
|
// ─── getEffortLevelDescription ─────────────────────────────────────────
|
||||||
|
|
||||||
describe("getEffortLevelDescription", () => {
|
describe('getEffortLevelDescription', () => {
|
||||||
test("returns description for 'low'", () => {
|
test("returns description for 'low'", () => {
|
||||||
const desc = getEffortLevelDescription("low");
|
const desc = getEffortLevelDescription('low')
|
||||||
expect(desc).toContain("Quick");
|
expect(desc).toContain('Quick')
|
||||||
});
|
})
|
||||||
|
|
||||||
test("returns description for 'medium'", () => {
|
test("returns description for 'medium'", () => {
|
||||||
const desc = getEffortLevelDescription("medium");
|
const desc = getEffortLevelDescription('medium')
|
||||||
expect(desc).toContain("Balanced");
|
expect(desc).toContain('Balanced')
|
||||||
});
|
})
|
||||||
|
|
||||||
test("returns description for 'high'", () => {
|
test("returns description for 'high'", () => {
|
||||||
const desc = getEffortLevelDescription("high");
|
const desc = getEffortLevelDescription('high')
|
||||||
expect(desc).toContain("Comprehensive");
|
expect(desc).toContain('Comprehensive')
|
||||||
});
|
})
|
||||||
|
|
||||||
test("returns description for 'max'", () => {
|
test("returns description for 'max'", () => {
|
||||||
const desc = getEffortLevelDescription("max");
|
const desc = getEffortLevelDescription('max')
|
||||||
expect(desc).toContain("Maximum");
|
expect(desc).toContain('Maximum')
|
||||||
});
|
})
|
||||||
});
|
})
|
||||||
|
|
||||||
// ─── resolvePickerEffortPersistence ────────────────────────────────────
|
// ─── resolvePickerEffortPersistence ────────────────────────────────────
|
||||||
|
|
||||||
describe("resolvePickerEffortPersistence", () => {
|
describe('resolvePickerEffortPersistence', () => {
|
||||||
test("returns undefined when picked matches model default and no prior persistence", () => {
|
test('returns undefined when picked matches model default and no prior persistence', () => {
|
||||||
const result = resolvePickerEffortPersistence("high", "high", undefined, false);
|
const result = resolvePickerEffortPersistence(
|
||||||
expect(result).toBeUndefined();
|
'high',
|
||||||
});
|
'high',
|
||||||
|
undefined,
|
||||||
|
false,
|
||||||
|
)
|
||||||
|
expect(result).toBeUndefined()
|
||||||
|
})
|
||||||
|
|
||||||
test("returns picked when it differs from model default", () => {
|
test('returns picked when it differs from model default', () => {
|
||||||
const result = resolvePickerEffortPersistence("low", "high", undefined, false);
|
const result = resolvePickerEffortPersistence(
|
||||||
expect(result).toBe("low");
|
'low',
|
||||||
});
|
'high',
|
||||||
|
undefined,
|
||||||
|
false,
|
||||||
|
)
|
||||||
|
expect(result).toBe('low')
|
||||||
|
})
|
||||||
|
|
||||||
test("returns picked when priorPersisted is set (even if same as default)", () => {
|
test('returns picked when priorPersisted is set (even if same as default)', () => {
|
||||||
const result = resolvePickerEffortPersistence("high", "high", "high", false);
|
const result = resolvePickerEffortPersistence('high', 'high', 'high', false)
|
||||||
expect(result).toBe("high");
|
expect(result).toBe('high')
|
||||||
});
|
})
|
||||||
|
|
||||||
test("returns picked when toggledInPicker is true (even if same as default)", () => {
|
test('returns picked when toggledInPicker is true (even if same as default)', () => {
|
||||||
const result = resolvePickerEffortPersistence("high", "high", undefined, true);
|
const result = resolvePickerEffortPersistence(
|
||||||
expect(result).toBe("high");
|
'high',
|
||||||
});
|
'high',
|
||||||
|
undefined,
|
||||||
|
true,
|
||||||
|
)
|
||||||
|
expect(result).toBe('high')
|
||||||
|
})
|
||||||
|
|
||||||
test("returns undefined picked value when no explicit and matches default", () => {
|
test('returns undefined picked value when no explicit and matches default', () => {
|
||||||
const result = resolvePickerEffortPersistence(undefined, "high" as any, undefined, false);
|
const result = resolvePickerEffortPersistence(
|
||||||
expect(result).toBeUndefined();
|
undefined,
|
||||||
});
|
'high' as any,
|
||||||
});
|
undefined,
|
||||||
|
false,
|
||||||
|
)
|
||||||
|
expect(result).toBeUndefined()
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|||||||
@@ -360,9 +360,7 @@ export function splitSysPromptPrefix(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (useGlobalCacheFeature) {
|
if (useGlobalCacheFeature) {
|
||||||
const boundaryIndex = systemPrompt.findIndex(
|
const boundaryIndex = systemPrompt.indexOf(SYSTEM_PROMPT_DYNAMIC_BOUNDARY)
|
||||||
s => s === SYSTEM_PROMPT_DYNAMIC_BOUNDARY,
|
|
||||||
)
|
|
||||||
if (boundaryIndex !== -1) {
|
if (boundaryIndex !== -1) {
|
||||||
let attributionHeader: string | undefined
|
let attributionHeader: string | undefined
|
||||||
let systemPromptPrefix: string | undefined
|
let systemPromptPrefix: string | undefined
|
||||||
|
|||||||
@@ -68,7 +68,6 @@ export function filterAllowedSdkBetas(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (isClaudeAISubscriber()) {
|
if (isClaudeAISubscriber()) {
|
||||||
// biome-ignore lint/suspicious/noConsole: intentional warning
|
|
||||||
console.warn(
|
console.warn(
|
||||||
'Warning: Custom betas are only available for API key users. Ignoring provided betas.',
|
'Warning: Custom betas are only available for API key users. Ignoring provided betas.',
|
||||||
)
|
)
|
||||||
@@ -77,7 +76,6 @@ export function filterAllowedSdkBetas(
|
|||||||
|
|
||||||
const { allowed, disallowed } = partitionBetasByAllowlist(sdkBetas)
|
const { allowed, disallowed } = partitionBetasByAllowlist(sdkBetas)
|
||||||
for (const beta of disallowed) {
|
for (const beta of disallowed) {
|
||||||
// biome-ignore lint/suspicious/noConsole: intentional warning
|
|
||||||
console.warn(
|
console.warn(
|
||||||
`Warning: Beta header '${beta}' is not allowed. Only the following betas are supported: ${ALLOWED_SDK_BETAS.join(', ')}`,
|
`Warning: Beta header '${beta}' is not allowed. Only the following betas are supported: ${ALLOWED_SDK_BETAS.join(', ')}`,
|
||||||
)
|
)
|
||||||
@@ -151,6 +149,7 @@ export function modelSupportsStructuredOutputs(model: string): boolean {
|
|||||||
canonical.includes('claude-opus-4-1') ||
|
canonical.includes('claude-opus-4-1') ||
|
||||||
canonical.includes('claude-opus-4-5') ||
|
canonical.includes('claude-opus-4-5') ||
|
||||||
canonical.includes('claude-opus-4-6') ||
|
canonical.includes('claude-opus-4-6') ||
|
||||||
|
canonical.includes('claude-opus-4-7') ||
|
||||||
canonical.includes('claude-haiku-4-5')
|
canonical.includes('claude-haiku-4-5')
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@@ -188,7 +187,7 @@ export function modelSupportsAutoMode(model: string): boolean {
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
// External allowlist (firstParty already checked above).
|
// External allowlist (firstParty already checked above).
|
||||||
return /^claude-(opus|sonnet)-4-6/.test(m)
|
return /^claude-(opus|sonnet)-4-[67]/.test(m)
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
@@ -275,16 +274,18 @@ export const getAllModelBetas = memoize((model: string): string[] => {
|
|||||||
betaHeaders.push(REDACT_THINKING_BETA_HEADER)
|
betaHeaders.push(REDACT_THINKING_BETA_HEADER)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add context management beta for tool clearing (ant opt-in) or thinking preservation
|
// Add context management beta for tool clearing or thinking preservation.
|
||||||
const antOptedIntoToolClearing =
|
// Tool clearing is enabled by default for all users (upstream gates on ant);
|
||||||
isEnvTruthy(process.env.USE_API_CONTEXT_MANAGEMENT) &&
|
// thinking preservation activates when the model supports context management.
|
||||||
process.env.USER_TYPE === 'ant'
|
const toolClearingOptIn =
|
||||||
|
isEnvTruthy(process.env.USE_API_CONTEXT_MANAGEMENT) ||
|
||||||
|
modelSupportsContextManagement(model)
|
||||||
|
|
||||||
const thinkingPreservationEnabled = modelSupportsContextManagement(model)
|
const thinkingPreservationEnabled = modelSupportsContextManagement(model)
|
||||||
|
|
||||||
if (
|
if (
|
||||||
shouldIncludeFirstPartyOnlyBetas() &&
|
shouldIncludeFirstPartyOnlyBetas() &&
|
||||||
(antOptedIntoToolClearing || thinkingPreservationEnabled)
|
(toolClearingOptIn || thinkingPreservationEnabled)
|
||||||
) {
|
) {
|
||||||
betaHeaders.push(CONTEXT_MANAGEMENT_BETA_HEADER)
|
betaHeaders.push(CONTEXT_MANAGEMENT_BETA_HEADER)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ export const EFFORT_LEVELS = [
|
|||||||
'low',
|
'low',
|
||||||
'medium',
|
'medium',
|
||||||
'high',
|
'high',
|
||||||
|
'xhigh',
|
||||||
'max',
|
'max',
|
||||||
] as const satisfies readonly EffortLevel[]
|
] as const satisfies readonly EffortLevel[]
|
||||||
|
|
||||||
@@ -32,7 +33,11 @@ export function modelSupportsEffort(model: string): boolean {
|
|||||||
return supported3P
|
return supported3P
|
||||||
}
|
}
|
||||||
// Supported by a subset of Claude 4 models
|
// Supported by a subset of Claude 4 models
|
||||||
if (m.includes('opus-4-6') || m.includes('sonnet-4-6')) {
|
if (
|
||||||
|
m.includes('opus-4-7') ||
|
||||||
|
m.includes('opus-4-6') ||
|
||||||
|
m.includes('sonnet-4-6')
|
||||||
|
) {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
// Exclude any other known legacy models (haiku, older opus/sonnet variants)
|
// Exclude any other known legacy models (haiku, older opus/sonnet variants)
|
||||||
@@ -51,13 +56,32 @@ export function modelSupportsEffort(model: string): boolean {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// @[MODEL LAUNCH]: Add the new model to the allowlist if it supports 'max' effort.
|
// @[MODEL LAUNCH]: Add the new model to the allowlist if it supports 'max' effort.
|
||||||
// Per API docs, 'max' is Opus 4.6 only for public models — other models return an error.
|
// Per API docs, 'max' is Opus 4.6/4.7 only for public models — other models return an error.
|
||||||
export function modelSupportsMaxEffort(model: string): boolean {
|
export function modelSupportsMaxEffort(model: string): boolean {
|
||||||
const supported3P = get3PModelCapabilityOverride(model, 'max_effort')
|
const supported3P = get3PModelCapabilityOverride(model, 'max_effort')
|
||||||
if (supported3P !== undefined) {
|
if (supported3P !== undefined) {
|
||||||
return supported3P
|
return supported3P
|
||||||
}
|
}
|
||||||
if (model.toLowerCase().includes('opus-4-6')) {
|
if (
|
||||||
|
model.toLowerCase().includes('opus-4-7') ||
|
||||||
|
model.toLowerCase().includes('opus-4-6')
|
||||||
|
) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if (process.env.USER_TYPE === 'ant' && resolveAntModel(model)) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// @[MODEL LAUNCH]: Add the new model to the allowlist if it supports 'xhigh' effort.
|
||||||
|
// 'xhigh' was introduced with Opus 4.7 as a level between 'high' and 'max'.
|
||||||
|
export function modelSupportsXhighEffort(model: string): boolean {
|
||||||
|
const supported3P = get3PModelCapabilityOverride(model, 'xhigh_effort')
|
||||||
|
if (supported3P !== undefined) {
|
||||||
|
return supported3P
|
||||||
|
}
|
||||||
|
if (model.toLowerCase().includes('opus-4-7')) {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
if (process.env.USER_TYPE === 'ant' && resolveAntModel(model)) {
|
if (process.env.USER_TYPE === 'ant' && resolveAntModel(model)) {
|
||||||
@@ -97,7 +121,12 @@ export function parseEffortValue(value: unknown): EffortValue | undefined {
|
|||||||
export function toPersistableEffort(
|
export function toPersistableEffort(
|
||||||
value: EffortValue | undefined,
|
value: EffortValue | undefined,
|
||||||
): EffortLevel | undefined {
|
): EffortLevel | undefined {
|
||||||
if (value === 'low' || value === 'medium' || value === 'high') {
|
if (
|
||||||
|
value === 'low' ||
|
||||||
|
value === 'medium' ||
|
||||||
|
value === 'high' ||
|
||||||
|
value === 'xhigh'
|
||||||
|
) {
|
||||||
return value
|
return value
|
||||||
}
|
}
|
||||||
if (value === 'max' && process.env.USER_TYPE === 'ant') {
|
if (value === 'max' && process.env.USER_TYPE === 'ant') {
|
||||||
@@ -161,6 +190,10 @@ export function resolveAppliedEffort(
|
|||||||
}
|
}
|
||||||
const resolved =
|
const resolved =
|
||||||
envOverride ?? appStateEffortValue ?? getDefaultEffortForModel(model)
|
envOverride ?? appStateEffortValue ?? getDefaultEffortForModel(model)
|
||||||
|
// API rejects 'xhigh' on pre-Opus-4.7 models — downgrade to 'high'.
|
||||||
|
if (resolved === 'xhigh' && !modelSupportsXhighEffort(model)) {
|
||||||
|
return 'high'
|
||||||
|
}
|
||||||
// API rejects 'max' on non-Opus-4.6 models — downgrade to 'high'.
|
// API rejects 'max' on non-Opus-4.6 models — downgrade to 'high'.
|
||||||
if (resolved === 'max' && !modelSupportsMaxEffort(model)) {
|
if (resolved === 'max' && !modelSupportsMaxEffort(model)) {
|
||||||
return 'high'
|
return 'high'
|
||||||
@@ -231,8 +264,10 @@ export function getEffortLevelDescription(level: EffortLevel): string {
|
|||||||
return 'Balanced approach with standard implementation and testing'
|
return 'Balanced approach with standard implementation and testing'
|
||||||
case 'high':
|
case 'high':
|
||||||
return 'Comprehensive implementation with extensive testing and documentation'
|
return 'Comprehensive implementation with extensive testing and documentation'
|
||||||
|
case 'xhigh':
|
||||||
|
return 'Extended reasoning beyond high, short of max (Opus 4.7 only)'
|
||||||
case 'max':
|
case 'max':
|
||||||
return 'Maximum capability with deepest reasoning (Opus 4.6 only)'
|
return 'Maximum capability with deepest reasoning (Opus 4.6/4.7 only)'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -308,7 +343,10 @@ export function getDefaultEffortForModel(
|
|||||||
|
|
||||||
// Default effort on Opus 4.6 to medium for Pro.
|
// Default effort on Opus 4.6 to medium for Pro.
|
||||||
// Max/Team also get medium when the tengu_grey_step2 config is enabled.
|
// Max/Team also get medium when the tengu_grey_step2 config is enabled.
|
||||||
if (model.toLowerCase().includes('opus-4-6')) {
|
if (
|
||||||
|
model.toLowerCase().includes('opus-4-7') ||
|
||||||
|
model.toLowerCase().includes('opus-4-6')
|
||||||
|
) {
|
||||||
if (isProSubscriber()) {
|
if (isProSubscriber()) {
|
||||||
return 'medium'
|
return 'medium'
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user