feat: 添加 Bedrock API 客户端及 API 层增强

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
unraid
2026-04-22 22:38:09 +08:00
parent 59f8675fa3
commit be97a0b010
15 changed files with 1362 additions and 197 deletions

View File

@@ -101,6 +101,8 @@ import {
extractQuotaStatusFromHeaders,
} from '../claudeAiLimits.js'
import { getAPIContextManagement } from '../compact/apiMicrocompact.js'
import { bedrockAdapter } from '../providerUsage/adapters/bedrock.js'
import { updateProviderBuckets } from '../providerUsage/store.js'
/* eslint-disable @typescript-eslint/no-require-imports */
const autoModeStateModule = feature('TRANSCRIPT_CLASSIFIER')
@@ -541,13 +543,12 @@ export async function verifyApiKey(
}),
async anthropic => {
const messages: MessageParam[] = [{ role: 'user', content: 'test' }]
// biome-ignore lint/plugin: API key verification is intentionally a minimal direct call
await anthropic.beta.messages.create({
model,
max_tokens: 1,
messages,
temperature: 1,
...(betas.length > 0 && { betas }),
...(betas.length > 0 && { betas: betas.filter(Boolean) }),
metadata: getAPIMetadata(),
...getExtraBodyParams(),
})
@@ -878,7 +879,6 @@ export async function* executeNonStreamingRequest(
)
try {
// biome-ignore lint/plugin: non-streaming API call
return await anthropic.beta.messages.create(
{
...adjustedParams,
@@ -1215,10 +1215,15 @@ async function* queryModel(
cacheEditingBetaHeader = betas.CACHE_EDITING_BETA_HEADER
const featureEnabled = isCachedMicrocompactEnabled()
const modelSupported = isModelSupportedForCacheEditing(options.model)
cachedMCEnabled = featureEnabled && modelSupported
// cachedMC requires a non-empty beta header; the CACHE_EDITING_BETA_HEADER
// constant is '' in this fork (upstream hasn't published the real value).
// Without it, cache_reference and cache_edits in the request body cause
// API 400: "tool_result.cache_reference: Extra inputs are not permitted".
const headerAvailable = !!cacheEditingBetaHeader
cachedMCEnabled = featureEnabled && modelSupported && headerAvailable
const config = getCachedMCConfig()
logForDebugging(
`Cached MC gate: enabled=${featureEnabled} modelSupported=${modelSupported} model=${options.model} supportedModels=${jsonStringify((config as any).supportedModels)}`,
`Cached MC gate: enabled=${featureEnabled} modelSupported=${modelSupported} headerAvailable=${headerAvailable} model=${options.model} supportedModels=${jsonStringify((config as Record<string, unknown>).supportedModels)}`,
)
}
@@ -1724,6 +1729,7 @@ async function* queryModel(
options.querySource === 'repl_main_thread'
if (
cacheEditingHeaderLatched &&
cacheEditingBetaHeader &&
getAPIProvider() === 'firstParty' &&
options.querySource === 'repl_main_thread' &&
!betasParams.includes(cacheEditingBetaHeader)
@@ -1740,7 +1746,12 @@ async function* queryModel(
? (options.temperatureOverride ?? 1)
: undefined
lastRequestBetas = betasParams
// Filter out any empty-string beta headers before sending.
// Constants like CACHE_EDITING_BETA_HEADER or AFK_MODE_BETA_HEADER
// can be '' when their feature gate is off; an empty string in the
// betas array produces an invalid anthropic-beta header (400 error).
const filteredBetas = betasParams.filter(Boolean)
lastRequestBetas = filteredBetas
return {
model: normalizeModelStringForAPI(options.model),
@@ -1756,7 +1767,7 @@ async function* queryModel(
system,
tools: allTools,
tool_choice: options.toolChoice,
...(useBetas && { betas: betasParams }),
...(useBetas && { betas: filteredBetas }),
metadata: getAPIMetadata(),
max_tokens: maxOutputTokens,
thinking,
@@ -1864,7 +1875,6 @@ async function* queryModel(
// Use raw stream instead of BetaMessageStream to avoid O(n²) partial JSON parsing
// BetaMessageStream calls partialParse() on every input_json_delta, which we don't need
// since we handle tool input accumulation ourselves
// biome-ignore lint/plugin: main conversation loop handles attribution separately
const result = await anthropic.beta.messages
.create(
{ ...params, stream: true },
@@ -2445,6 +2455,16 @@ async function* queryModel(
const resp = streamResponse as unknown as Response | undefined
if (resp) {
extractQuotaStatusFromHeaders(resp.headers)
// Non-Anthropic providers that flow through this same client path
// (Bedrock) expose their own throttle headers — let their adapter
// overwrite the store with its bucket(s). Anthropic's adapter runs
// inside extractQuotaStatusFromHeaders.
if (getAPIProvider() === 'bedrock') {
updateProviderBuckets(
'bedrock',
bedrockAdapter.parseHeaders(resp.headers),
)
}
// Store headers for gateway detection
responseHeaders = resp.headers
}
@@ -3229,6 +3249,7 @@ export function addCacheBreakpoints(
// Add cache_reference to tool_result blocks that are within the cached prefix.
// Must be done AFTER cache_edits insertion since that modifies content arrays.
// Note: this code only runs when useCachedMC=true (early return at line ~3202).
if (enablePromptCaching) {
// Find the last message containing a cache_control marker
let lastCCMsg = -1