mirror of
https://github.com/claude-code-best/claude-code.git
synced 2026-06-15 12:55:51 +00:00
Squash-merge of feat/autofix-pr-test (69 commits) onto upstream/main with -X ours strategy (upstream as authoritative for content conflicts). Key features brought in from fork: - LocalMemoryRecall + VaultHttpFetch tools (end-to-end wired) - /local-memory, /local-vault, /memory-stores, /skill-store interactive panels - /agents-platform, /schedule, /vault command scaffolding - /login: switch / replace / remove of workspace API key - statusline refactor (built-in status row, /statusline as info command) - autofix-pr command + workflow Conflict resolutions (upstream-wins): - 10 .js command stubs kept from upstream (alongside fork's .ts implementations) - src/components/BuiltinStatusLine.tsx accepted upstream's deletion (fork's wire-up references in StatusLine.tsx will be cleaned up next) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
187 lines
7.3 KiB
TypeScript
187 lines
7.3 KiB
TypeScript
/**
|
||
* Scrubbing functions for VaultHttpFetchTool.
|
||
*
|
||
* The cardinal rule: NO secret-derived string ever leaves this tool's
|
||
* boundary in any field that would land in tool_result, jsonl, transcript
|
||
* search, telemetry, or compact summaries. The scrub layer applies to:
|
||
* - response body (server might echo Authorization)
|
||
* - response headers (Authorization / X-Api-Key / Set-Cookie)
|
||
* - axios error messages (axios.AxiosError.config can carry the request
|
||
* headers — including the Authorization we just sent)
|
||
*
|
||
* Strategy: build all "derived forms" of the secret BEFORE the request, then
|
||
* apply scrubAllSecretForms to every byte that crosses the tool boundary.
|
||
*
|
||
* Derived forms covered:
|
||
* - raw secret value
|
||
* - 'Bearer <secret>'
|
||
* - <secret> base64-encoded (for Basic-style payloads)
|
||
* - 'Basic <base64>' full header value
|
||
*
|
||
* Custom auth_header_name puts the raw secret as the header value, which is
|
||
* already covered by the raw-secret form.
|
||
*/
|
||
|
||
const REDACTED = '[REDACTED]'
|
||
|
||
const SENSITIVE_HEADER_NAMES = new Set([
|
||
'authorization',
|
||
'x-api-key',
|
||
'cookie',
|
||
'set-cookie',
|
||
'proxy-authorization',
|
||
'www-authenticate',
|
||
])
|
||
|
||
/**
|
||
* Minimum secret length for scrubbing the RAW form. Below this threshold,
|
||
* scrubbing causes pathological output amplification — e.g. a 1-char
|
||
* secret 'X' on a 1MB body that happens to contain many X chars produces
|
||
* ~10MB of [REDACTED].
|
||
*
|
||
* 4 chars is below any realistic secret (API tokens, OAuth tokens, JWTs,
|
||
* passwords are all >>4). The vault store should reject sub-4-char values
|
||
* at write time, but this is defense-in-depth at scrub time.
|
||
*/
|
||
const MIN_SCRUB_LENGTH = 4
|
||
|
||
/**
|
||
* Minimum secret length for scrubbing the BASE64-derived forms.
|
||
*
|
||
* M3 fix (codecov-100 audit #6): a 4-char secret has a 7-8 char base64
|
||
* representation that is short enough to collide with naturally-occurring
|
||
* tokens in the response body (`x4Kp` → `eDRLcA==`, which can match
|
||
* unrelated short identifiers). Raw + Bearer forms are still scrubbed
|
||
* for short secrets because their substring match is much more specific
|
||
* (e.g. `Bearer x4Kp` is unlikely to collide). For base64 forms we wait
|
||
* until the secret is >= 8 chars (yielding >= 12 base64 chars), which is
|
||
* the OWASP minimum for a credential and is well clear of incidental
|
||
* collisions. This is a TIGHTER scrub for short secrets, not looser:
|
||
* we still scrub the raw secret value itself.
|
||
*/
|
||
const MIN_SCRUB_BASE64_LENGTH = 8
|
||
|
||
/**
|
||
* Compute every form the secret could appear in across response body /
|
||
* headers / error message.
|
||
*
|
||
* L7 fix: returns `[]` (empty) when secret is shorter than MIN_SCRUB_LENGTH
|
||
* — scrubbing a too-short pattern is worse than not scrubbing. Caller
|
||
* should guard `if (secret && secret.length >= MIN_SCRUB_LENGTH)` before
|
||
* trusting the result is non-empty. The previous JSDoc claimed "always
|
||
* non-empty" which was inaccurate.
|
||
*
|
||
* M3 fix (codecov-100 audit #6): for short secrets (4-7 chars) we omit
|
||
* the bare-base64 form because its 7-8 char encoding is short enough to
|
||
* collide with unrelated tokens in the response body and produce
|
||
* spurious [REDACTED] markers. We still emit raw + Bearer + Basic-base64
|
||
* because those have a longer/more-specific match shape.
|
||
*
|
||
* Returned forms are sorted longest-first so callers don't need to re-sort.
|
||
*/
|
||
export function buildDerivedSecretForms(secret: string): readonly string[] {
|
||
if (!secret || secret.length < MIN_SCRUB_LENGTH) return []
|
||
const base64 = Buffer.from(secret, 'utf8').toString('base64')
|
||
// Pre-sorted longest-first (Basic > Bearer > base64 > raw, generally)
|
||
// so callers don't pay the sort cost on every scrub call.
|
||
if (secret.length < MIN_SCRUB_BASE64_LENGTH) {
|
||
// M3 fix: omit the bare-base64 form for short secrets (collision risk).
|
||
// The Basic-prefixed form keeps base64 content in the scrub list but
|
||
// anchored on the literal "Basic " prefix so collisions with random
|
||
// 8-char tokens in the body are vanishingly unlikely.
|
||
return [`Basic ${base64}`, `Bearer ${secret}`, secret]
|
||
}
|
||
return [`Basic ${base64}`, `Bearer ${secret}`, base64, secret]
|
||
}
|
||
|
||
/**
|
||
* Replace every occurrence of any derived secret form in `s` with [REDACTED].
|
||
*
|
||
* M7 fix: forms array is pre-sorted longest-first by buildDerivedSecretForms,
|
||
* so we no longer allocate a sorted copy on every call. Also added a
|
||
* `s.length >= form.length` fast-path before `includes()` to skip
|
||
* impossible-match work, and the `includes()` check itself is the fast path
|
||
* that lets us skip the split/join allocation for clean bodies.
|
||
*/
|
||
export function scrubAllSecretForms(
|
||
s: string,
|
||
forms: readonly string[],
|
||
): string {
|
||
if (!s || forms.length === 0) return s
|
||
let out = s
|
||
for (const form of forms) {
|
||
if (form.length > 0 && out.length >= form.length && out.includes(form)) {
|
||
out = out.split(form).join(REDACTED)
|
||
}
|
||
}
|
||
return out
|
||
}
|
||
|
||
/**
|
||
* Sanitize response headers: redact sensitive header names entirely, and
|
||
* scrub any remaining headers' values for secret echo.
|
||
*/
|
||
export function scrubResponseHeaders(
|
||
headers: unknown,
|
||
forms: readonly string[],
|
||
): Record<string, string> {
|
||
const out: Record<string, string> = {}
|
||
if (!headers || typeof headers !== 'object') return out
|
||
for (const [key, value] of Object.entries(
|
||
headers as Record<string, unknown>,
|
||
)) {
|
||
const lname = key.toLowerCase()
|
||
if (SENSITIVE_HEADER_NAMES.has(lname)) {
|
||
out[key] = REDACTED
|
||
continue
|
||
}
|
||
const sv = Array.isArray(value)
|
||
? value.map(v => String(v ?? '')).join(', ')
|
||
: String(value ?? '')
|
||
out[key] = scrubAllSecretForms(sv, forms)
|
||
}
|
||
return out
|
||
}
|
||
|
||
/**
|
||
* Truncate a string to at most `maxBytes` UTF-8 bytes, returning a value that
|
||
* is still valid UTF-8 (no half-encoded code points).
|
||
*
|
||
* H1 fix (codecov-100 audit): the previous code used `String#slice(0, 80)`
|
||
* which counts UTF-16 *code units*. With multi-byte UTF-8 (CJK, emoji,
|
||
* combining marks) an 80-char slice can balloon to 240+ bytes — violating
|
||
* the analytics field's byte-cap contract. We walk the byte buffer and
|
||
* back off to the start of the last complete UTF-8 code point. (We also
|
||
* walk back any combining-mark continuation bytes that depend on a
|
||
* just-truncated lead byte; this is handled implicitly by the
|
||
* leading-byte check since UTF-8 continuation bytes are 0b10xxxxxx.)
|
||
*
|
||
* Empty / null-ish inputs return ''.
|
||
*/
|
||
export function truncateToBytes(input: string, maxBytes: number): string {
|
||
if (!input || maxBytes <= 0) return ''
|
||
const buf = Buffer.from(input, 'utf8')
|
||
if (buf.length <= maxBytes) return input
|
||
// Walk back from maxBytes until we land on a code-point boundary.
|
||
// UTF-8 continuation bytes match 10xxxxxx (0x80–0xBF). A code-point
|
||
// boundary is any byte that does NOT match that mask.
|
||
let end = maxBytes
|
||
while (end > 0 && (buf[end]! & 0xc0) === 0x80) {
|
||
end--
|
||
}
|
||
return buf.subarray(0, end).toString('utf8')
|
||
}
|
||
|
||
/**
|
||
* Convert an axios / fetch error into a safe summary string. NEVER stringify
|
||
* the raw error: axios.AxiosError carries .config.headers which contains the
|
||
* Authorization we just sent. Build a synthetic message and scrub it.
|
||
*/
|
||
export function scrubAxiosError(e: unknown, forms: readonly string[]): string {
|
||
if (e instanceof Error) {
|
||
const msg = scrubAllSecretForms(e.message, forms)
|
||
return `Request failed: ${msg}`
|
||
}
|
||
return 'Request failed (unknown error)'
|
||
}
|