mirror of
https://github.com/claude-code-best/claude-code.git
synced 2026-06-17 13:55:50 +00:00
Feat/integrate lint preview (#285)
* feat: 适配 zed acp 协议 * docs: 完善 acp 文档 * feat: integrate feature branches + daemon/job 命令层级化 + 跨平台后台引擎 Cherry-picked from origin/lint/preview (637c908), excluding lint-only changes. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: correct detectMimeFromBase64 to decode raw bytes from base64 Cherry-picked from origin/lint/preview (ee36954). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: daemon 子进程 spawn 跨平台修复 + CliLaunchSpec 集中化重构 Cherry-picked from origin/lint/preview (c5f52cd), excluding lint-only formatting changes. - 新建 src/utils/cliLaunch.ts: 集中化 CLI 子进程启动层 - 修复 --daemon-worker=kind 等号格式解析 - 修复 daemon/bg fast path 缺少 setShellIfWindows() - 修复 checkPathExists 用 existsSync 替代 execSync('dir') - 7 个 spawn 站点迁移到 CliLaunchSpec Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: merge tsconfig.base.json into tsconfig.json with full compiler options The cherry-pick from637c908dropped jsx/strict/etc settings when removing tsconfig.base.json. This commit restores them in a single tsconfig.json. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: merge tsconfig.base.json into tsconfig.json with full compiler options The cherry-pick from637c908dropped jsx/strict/etc settings when removing tsconfig.base.json. This commit restores them in a single tsconfig.json. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -466,6 +466,7 @@ const LOCAL_GATE_DEFAULTS: Record<string, unknown> = {
|
||||
tengu_birch_trellis: true, // Tree-sitter bash security analysis
|
||||
tengu_collage_kaleidoscope: true, // macOS clipboard image reading
|
||||
tengu_compact_cache_prefix: true, // Reuse prompt cache during compaction
|
||||
tengu_kairos_assistant: true, // KAIROS assistant mode activation
|
||||
tengu_kairos_cron_durable: true, // Persistent cron tasks
|
||||
tengu_attribution_header: true, // API request attribution header
|
||||
tengu_slate_prism: true, // Agent progress summaries
|
||||
@@ -830,6 +831,16 @@ export function getFeatureValue_CACHED_MAY_BE_STALE<T>(
|
||||
return localDefault !== undefined ? (localDefault as T) : defaultValue
|
||||
}
|
||||
|
||||
// LOCAL_GATE_DEFAULTS take priority over remote values and disk cache.
|
||||
// In fork/self-hosted deployments, the GrowthBook server may push false
|
||||
// for gates we intentionally enable. Local defaults represent the
|
||||
// project's intentional configuration and override everything except
|
||||
// env/config overrides (which are explicit user intent).
|
||||
const localDefault = getLocalGateDefault(feature)
|
||||
if (localDefault !== undefined) {
|
||||
return localDefault as T
|
||||
}
|
||||
|
||||
// Log experiment exposure if data is available, otherwise defer until after init
|
||||
if (experimentDataByFeature.has(feature)) {
|
||||
logExposureForFeature(feature)
|
||||
@@ -838,10 +849,6 @@ export function getFeatureValue_CACHED_MAY_BE_STALE<T>(
|
||||
}
|
||||
|
||||
// In-memory payload is authoritative once processRemoteEvalPayload has run.
|
||||
// Disk is also fresh by then (syncRemoteEvalToDisk runs synchronously inside
|
||||
// init), so this is correctness-equivalent to the disk read below — but it
|
||||
// skips the config JSON parse and is what onGrowthBookRefresh subscribers
|
||||
// depend on to read fresh values the instant they're notified.
|
||||
if (remoteEvalFeatureValues.has(feature)) {
|
||||
return remoteEvalFeatureValues.get(feature) as T
|
||||
}
|
||||
@@ -853,14 +860,9 @@ export function getFeatureValue_CACHED_MAY_BE_STALE<T>(
|
||||
return cached as T
|
||||
}
|
||||
} catch {
|
||||
// Config not yet initialized — fall through to local gate defaults
|
||||
// Config not yet initialized — fall through to defaultValue
|
||||
}
|
||||
// Disk cache miss (or config not initialized) — use local gate defaults
|
||||
// before falling back to the caller's defaultValue. This covers:
|
||||
// 1. GrowthBook "enabled" but never connected (caches empty)
|
||||
// 2. Config not yet initialized (early in startup)
|
||||
const localDefault = getLocalGateDefault(feature)
|
||||
return localDefault !== undefined ? (localDefault as T) : defaultValue
|
||||
return defaultValue
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
487
src/services/api/openai/__tests__/queryModelOpenAI.isolated.ts
Normal file
487
src/services/api/openai/__tests__/queryModelOpenAI.isolated.ts
Normal file
@@ -0,0 +1,487 @@
|
||||
/**
|
||||
* Tests for queryModelOpenAI in index.ts.
|
||||
*
|
||||
* Focused on the two bugs fixed:
|
||||
* 1. stop_reason was always null in the assembled AssistantMessage because
|
||||
* partialMessage (from message_start) has stop_reason: null, and the
|
||||
* stop_reason captured from message_delta was never applied.
|
||||
* 2. partialMessage was not reset to null after message_stop, so the safety
|
||||
* fallback at the end of the loop would yield a second identical
|
||||
* AssistantMessage (causing doubled content in the next API request).
|
||||
*
|
||||
* Strategy: mock getOpenAIClient + adaptOpenAIStreamToAnthropic so we can
|
||||
* feed pre-built Anthropic events directly into queryModelOpenAI and inspect
|
||||
* what it emits — without any real HTTP calls.
|
||||
*/
|
||||
import { describe, expect, test, mock, beforeEach, afterEach } from 'bun:test'
|
||||
import type { BetaRawMessageStreamEvent } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
|
||||
import type { AssistantMessage, StreamEvent } from '../../../../types/message.js'
|
||||
|
||||
// ─── helpers ─────────────────────────────────────────────────────────────────
|
||||
|
||||
/** Build a minimal message_start event */
|
||||
function makeMessageStart(overrides: Record<string, any> = {}): BetaRawMessageStreamEvent {
|
||||
return {
|
||||
type: 'message_start',
|
||||
message: {
|
||||
id: 'msg_test',
|
||||
type: 'message',
|
||||
role: 'assistant',
|
||||
content: [],
|
||||
model: 'test-model',
|
||||
stop_reason: null,
|
||||
stop_sequence: null,
|
||||
usage: { input_tokens: 0, output_tokens: 0, cache_creation_input_tokens: 0, cache_read_input_tokens: 0 },
|
||||
...overrides,
|
||||
},
|
||||
} as any
|
||||
}
|
||||
|
||||
/** Build a content_block_start event for the given block type */
|
||||
function makeContentBlockStart(index: number, type: 'text' | 'tool_use' | 'thinking', extra: Record<string, any> = {}): BetaRawMessageStreamEvent {
|
||||
const block =
|
||||
type === 'text'
|
||||
? { type: 'text', text: '' }
|
||||
: type === 'tool_use'
|
||||
? { type: 'tool_use', id: 'toolu_test', name: 'bash', input: {} }
|
||||
: { type: 'thinking', thinking: '', signature: '' }
|
||||
return { type: 'content_block_start', index, content_block: { ...block, ...extra } } as any
|
||||
}
|
||||
|
||||
/** Build a text_delta content_block_delta event */
|
||||
function makeTextDelta(index: number, text: string): BetaRawMessageStreamEvent {
|
||||
return { type: 'content_block_delta', index, delta: { type: 'text_delta', text } } as any
|
||||
}
|
||||
|
||||
/** Build an input_json_delta content_block_delta event */
|
||||
function makeInputJsonDelta(index: number, json: string): BetaRawMessageStreamEvent {
|
||||
return { type: 'content_block_delta', index, delta: { type: 'input_json_delta', partial_json: json } } as any
|
||||
}
|
||||
|
||||
/** Build a thinking_delta content_block_delta event */
|
||||
function makeThinkingDelta(index: number, thinking: string): BetaRawMessageStreamEvent {
|
||||
return { type: 'content_block_delta', index, delta: { type: 'thinking_delta', thinking } } as any
|
||||
}
|
||||
|
||||
/** Build a content_block_stop event */
|
||||
function makeContentBlockStop(index: number): BetaRawMessageStreamEvent {
|
||||
return { type: 'content_block_stop', index } as any
|
||||
}
|
||||
|
||||
/** Build a message_delta event with stop_reason and output_tokens */
|
||||
function makeMessageDelta(stopReason: string, outputTokens: number): BetaRawMessageStreamEvent {
|
||||
return {
|
||||
type: 'message_delta',
|
||||
delta: { stop_reason: stopReason, stop_sequence: null },
|
||||
usage: { output_tokens: outputTokens },
|
||||
} as any
|
||||
}
|
||||
|
||||
/** Build a message_stop event */
|
||||
function makeMessageStop(): BetaRawMessageStreamEvent {
|
||||
return { type: 'message_stop' } as any
|
||||
}
|
||||
|
||||
/** Async generator from a fixed array of events */
|
||||
async function* eventStream(events: BetaRawMessageStreamEvent[]) {
|
||||
for (const e of events) yield e
|
||||
}
|
||||
|
||||
/** Collect all outputs from queryModelOpenAI into typed buckets */
|
||||
async function runQueryModel(
|
||||
events: BetaRawMessageStreamEvent[],
|
||||
envOverrides: Record<string, string | undefined> = {},
|
||||
) {
|
||||
// Wire events into the mocked stream adapter
|
||||
_nextEvents = events
|
||||
// Save + apply env overrides
|
||||
const saved: Record<string, string | undefined> = {}
|
||||
for (const [k, v] of Object.entries(envOverrides)) {
|
||||
saved[k] = process.env[k]
|
||||
if (v === undefined) delete process.env[k]
|
||||
else process.env[k] = v
|
||||
}
|
||||
|
||||
try {
|
||||
// We inline mock.module inside the try block.
|
||||
// Bun resolves mock.module at the call site synchronously (hoisted),
|
||||
// so we register once per test file, then re-import each time.
|
||||
const { queryModelOpenAI } = await import('../index.js')
|
||||
|
||||
const assistantMessages: AssistantMessage[] = []
|
||||
const streamEvents: StreamEvent[] = []
|
||||
const otherOutputs: any[] = []
|
||||
|
||||
const minimalOptions: any = {
|
||||
model: 'test-model',
|
||||
tools: [],
|
||||
agents: [],
|
||||
querySource: 'main_loop',
|
||||
getToolPermissionContext: async () => ({
|
||||
alwaysAllow: [],
|
||||
alwaysDeny: [],
|
||||
needsPermission: [],
|
||||
mode: 'default',
|
||||
isBypassingPermissions: false,
|
||||
}),
|
||||
}
|
||||
|
||||
for await (const item of queryModelOpenAI(
|
||||
[],
|
||||
{ type: 'text', text: '' } as any,
|
||||
[],
|
||||
new AbortController().signal,
|
||||
minimalOptions,
|
||||
)) {
|
||||
if (item.type === 'assistant') {
|
||||
assistantMessages.push(item as AssistantMessage)
|
||||
} else if (item.type === 'stream_event') {
|
||||
streamEvents.push(item as StreamEvent)
|
||||
} else {
|
||||
otherOutputs.push(item)
|
||||
}
|
||||
}
|
||||
|
||||
return { assistantMessages, streamEvents, otherOutputs }
|
||||
} finally {
|
||||
// Restore env
|
||||
for (const [k, v] of Object.entries(saved)) {
|
||||
if (v === undefined) delete process.env[k]
|
||||
else process.env[k] = v
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ─── mock setup ──────────────────────────────────────────────────────────────
|
||||
|
||||
// We mock at module level. Bun's mock.module replaces the module for the
|
||||
// entire file, so we configure the stream per-test via a shared variable.
|
||||
let _nextEvents: BetaRawMessageStreamEvent[] = []
|
||||
|
||||
/** Captured arguments from the last chat.completions.create() call */
|
||||
let _lastCreateArgs: Record<string, any> | null = null
|
||||
|
||||
mock.module('../client.js', () => ({
|
||||
getOpenAIClient: () => ({
|
||||
chat: {
|
||||
completions: {
|
||||
create: async (args: Record<string, any>) => {
|
||||
_lastCreateArgs = args
|
||||
return { [Symbol.asyncIterator]: async function* () {} }
|
||||
},
|
||||
},
|
||||
},
|
||||
}),
|
||||
}))
|
||||
|
||||
mock.module('../streamAdapter.js', () => ({
|
||||
adaptOpenAIStreamToAnthropic: (_stream: any, _model: string) => eventStream(_nextEvents),
|
||||
}))
|
||||
|
||||
mock.module('../modelMapping.js', () => ({
|
||||
resolveOpenAIModel: (m: string) => m,
|
||||
}))
|
||||
|
||||
mock.module('../convertMessages.js', () => ({
|
||||
anthropicMessagesToOpenAI: () => [],
|
||||
}))
|
||||
|
||||
mock.module('../convertTools.js', () => ({
|
||||
anthropicToolsToOpenAI: () => [],
|
||||
anthropicToolChoiceToOpenAI: () => undefined,
|
||||
}))
|
||||
|
||||
mock.module('../../../../utils/context.js', () => ({
|
||||
MODEL_CONTEXT_WINDOW_DEFAULT: 200_000,
|
||||
COMPACT_MAX_OUTPUT_TOKENS: 20_000,
|
||||
CAPPED_DEFAULT_MAX_TOKENS: 8_000,
|
||||
ESCALATED_MAX_TOKENS: 64_000,
|
||||
is1mContextDisabled: () => false,
|
||||
has1mContext: () => false,
|
||||
modelSupports1M: () => false,
|
||||
getModelMaxOutputTokens: () => ({ upperLimit: 8192, default: 8192 }),
|
||||
getContextWindowForModel: () => 200_000,
|
||||
getSonnet1mExpTreatmentEnabled: () => false,
|
||||
calculateContextPercentages: () => ({ usedPercent: 0, remainingPercent: 100 }),
|
||||
getMaxThinkingTokensForModel: () => 0,
|
||||
}))
|
||||
|
||||
mock.module('../../../../utils/messages.js', () => ({
|
||||
normalizeMessagesForAPI: (msgs: any) => msgs,
|
||||
normalizeContentFromAPI: (blocks: any[]) => blocks,
|
||||
createAssistantAPIErrorMessage: (opts: any) => ({
|
||||
type: 'assistant',
|
||||
message: { content: [{ type: 'text', text: opts.content }], apiError: opts.apiError },
|
||||
uuid: 'error-uuid',
|
||||
timestamp: new Date().toISOString(),
|
||||
}),
|
||||
}))
|
||||
|
||||
mock.module('../../../../utils/api.js', () => ({
|
||||
toolToAPISchema: async (t: any) => t,
|
||||
}))
|
||||
|
||||
mock.module('../../../../utils/toolSearch.js', () => ({
|
||||
isToolSearchEnabled: async () => false,
|
||||
extractDiscoveredToolNames: () => new Set(),
|
||||
}))
|
||||
|
||||
mock.module('../../../../tools/ToolSearchTool/prompt.js', () => ({
|
||||
isDeferredTool: () => false,
|
||||
TOOL_SEARCH_TOOL_NAME: '__tool_search__',
|
||||
}))
|
||||
|
||||
mock.module('../../../../cost-tracker.js', () => ({
|
||||
addToTotalSessionCost: () => {},
|
||||
}))
|
||||
|
||||
mock.module('../../../../utils/modelCost.js', () => ({
|
||||
COST_TIER_3_15: {},
|
||||
COST_TIER_15_75: {},
|
||||
COST_TIER_5_25: {},
|
||||
COST_TIER_30_150: {},
|
||||
COST_HAIKU_35: {},
|
||||
COST_HAIKU_45: {},
|
||||
getOpus46CostTier: () => ({}),
|
||||
MODEL_COSTS: {},
|
||||
getModelCosts: () => ({}),
|
||||
calculateUSDCost: () => 0,
|
||||
calculateCostFromTokens: () => 0,
|
||||
formatModelPricing: () => '',
|
||||
getModelPricingString: () => undefined,
|
||||
}))
|
||||
|
||||
mock.module('../../../../utils/debug.js', () => ({
|
||||
logForDebugging: () => {},
|
||||
logAntError: () => {},
|
||||
isDebugMode: () => false,
|
||||
isDebugToStdErr: () => false,
|
||||
getDebugFilePath: () => null,
|
||||
getDebugLogPath: () => '',
|
||||
getDebugFilter: () => null,
|
||||
getMinDebugLogLevel: () => 'debug',
|
||||
enableDebugLogging: () => false,
|
||||
setHasFormattedOutput: () => {},
|
||||
getHasFormattedOutput: () => false,
|
||||
flushDebugLogs: async () => {},
|
||||
}))
|
||||
|
||||
// ─── tests ───────────────────────────────────────────────────────────────────
|
||||
|
||||
describe('queryModelOpenAI — stop_reason propagation', () => {
|
||||
test('assembled AssistantMessage has stop_reason end_turn (not null)', async () => {
|
||||
_nextEvents = [
|
||||
makeMessageStart(),
|
||||
makeContentBlockStart(0, 'text'),
|
||||
makeTextDelta(0, 'Hello'),
|
||||
makeContentBlockStop(0),
|
||||
makeMessageDelta('end_turn', 10),
|
||||
makeMessageStop(),
|
||||
]
|
||||
|
||||
const { assistantMessages } = await runQueryModel(_nextEvents)
|
||||
|
||||
expect(assistantMessages).toHaveLength(1)
|
||||
expect(assistantMessages[0]!.message.stop_reason).toBe('end_turn')
|
||||
})
|
||||
|
||||
test('assembled AssistantMessage has stop_reason tool_use', async () => {
|
||||
_nextEvents = [
|
||||
makeMessageStart(),
|
||||
makeContentBlockStart(0, 'tool_use'),
|
||||
makeInputJsonDelta(0, '{"cmd":"ls"}'),
|
||||
makeContentBlockStop(0),
|
||||
makeMessageDelta('tool_use', 20),
|
||||
makeMessageStop(),
|
||||
]
|
||||
|
||||
const { assistantMessages } = await runQueryModel(_nextEvents)
|
||||
|
||||
expect(assistantMessages).toHaveLength(1)
|
||||
expect(assistantMessages[0]!.message.stop_reason).toBe('tool_use')
|
||||
})
|
||||
|
||||
test('assembled AssistantMessage has stop_reason max_tokens', async () => {
|
||||
_nextEvents = [
|
||||
makeMessageStart(),
|
||||
makeContentBlockStart(0, 'text'),
|
||||
makeTextDelta(0, 'truncated'),
|
||||
makeContentBlockStop(0),
|
||||
makeMessageDelta('max_tokens', 8192),
|
||||
makeMessageStop(),
|
||||
]
|
||||
|
||||
const { assistantMessages } = await runQueryModel(_nextEvents)
|
||||
|
||||
// Two assistant-typed items: the content message + the max_output_tokens error signal.
|
||||
// The error signal is emitted as a synthetic assistant message by createAssistantAPIErrorMessage.
|
||||
expect(assistantMessages).toHaveLength(2)
|
||||
const contentMsg = assistantMessages[0]!
|
||||
expect(contentMsg.message.stop_reason).toBe('max_tokens')
|
||||
// Second item is the error signal (has apiError set)
|
||||
const errorMsg = assistantMessages[1]!.message as any
|
||||
expect(errorMsg.apiError).toBe('max_output_tokens')
|
||||
})
|
||||
|
||||
test('stop_reason is null when no message_delta was received (safety fallback path)', async () => {
|
||||
// Stream ends without message_stop — triggers the safety fallback branch.
|
||||
// stop_reason stays null since no message_delta was ever seen.
|
||||
_nextEvents = [
|
||||
makeMessageStart(),
|
||||
makeContentBlockStart(0, 'text'),
|
||||
makeTextDelta(0, 'partial'),
|
||||
makeContentBlockStop(0),
|
||||
// No message_delta / message_stop
|
||||
]
|
||||
|
||||
const { assistantMessages } = await runQueryModel(_nextEvents)
|
||||
|
||||
// Safety fallback should yield the partial content
|
||||
expect(assistantMessages).toHaveLength(1)
|
||||
expect(assistantMessages[0]!.message.stop_reason).toBeNull()
|
||||
})
|
||||
})
|
||||
|
||||
describe('queryModelOpenAI — usage accumulation', () => {
|
||||
test('usage in assembled message reflects all four fields from message_delta', async () => {
|
||||
// message_start has all fields=0 (trailing-chunk pattern: usage not yet available).
|
||||
// message_delta carries the real values after stream ends.
|
||||
// The spread in the message_delta handler must override all zeros from message_start,
|
||||
// including cache_read_input_tokens which was previously missing from message_delta.
|
||||
_nextEvents = [
|
||||
makeMessageStart({ usage: { input_tokens: 0, output_tokens: 0, cache_creation_input_tokens: 0, cache_read_input_tokens: 0 } }),
|
||||
makeContentBlockStart(0, 'text'),
|
||||
makeTextDelta(0, 'response'),
|
||||
makeContentBlockStop(0),
|
||||
// message_delta carries all four Anthropic usage fields (as emitted by the fixed streamAdapter)
|
||||
{
|
||||
type: 'message_delta',
|
||||
delta: { stop_reason: 'end_turn', stop_sequence: null },
|
||||
usage: { input_tokens: 30011, output_tokens: 190, cache_read_input_tokens: 19904, cache_creation_input_tokens: 0 },
|
||||
} as any,
|
||||
makeMessageStop(),
|
||||
]
|
||||
|
||||
const { assistantMessages } = await runQueryModel(_nextEvents)
|
||||
|
||||
expect(assistantMessages).toHaveLength(1)
|
||||
const usage = assistantMessages[0]!.message.usage as any
|
||||
expect(usage.input_tokens).toBe(30011)
|
||||
expect(usage.output_tokens).toBe(190)
|
||||
// cache_read_input_tokens from message_delta overrides the 0 from message_start
|
||||
expect(usage.cache_read_input_tokens).toBe(19904)
|
||||
expect(usage.cache_creation_input_tokens).toBe(0)
|
||||
})
|
||||
|
||||
test('usage is zero when no usage events arrive (prevents false autocompact)', async () => {
|
||||
// If usage stays 0, tokenCountWithEstimation will undercount — so at least
|
||||
// verify the field exists and is numeric (to detect regressions).
|
||||
_nextEvents = [
|
||||
makeMessageStart(),
|
||||
makeContentBlockStart(0, 'text'),
|
||||
makeTextDelta(0, 'hi'),
|
||||
makeContentBlockStop(0),
|
||||
makeMessageDelta('end_turn', 0),
|
||||
makeMessageStop(),
|
||||
]
|
||||
|
||||
const { assistantMessages } = await runQueryModel(_nextEvents)
|
||||
|
||||
const usage = assistantMessages[0]!.message.usage as any
|
||||
expect(typeof usage.input_tokens).toBe('number')
|
||||
expect(typeof usage.output_tokens).toBe('number')
|
||||
})
|
||||
})
|
||||
|
||||
describe('queryModelOpenAI — no duplicate AssistantMessage (partialMessage reset)', () => {
|
||||
test('yields exactly one AssistantMessage per message_stop when content is present', async () => {
|
||||
_nextEvents = [
|
||||
makeMessageStart(),
|
||||
makeContentBlockStart(0, 'text'),
|
||||
makeTextDelta(0, 'only once'),
|
||||
makeContentBlockStop(0),
|
||||
makeMessageDelta('end_turn', 5),
|
||||
makeMessageStop(),
|
||||
]
|
||||
|
||||
const { assistantMessages } = await runQueryModel(_nextEvents)
|
||||
|
||||
// Before the fix, partialMessage was not reset to null, so the safety
|
||||
// fallback at the end of the loop would yield a second message with the
|
||||
// same message.id — causing mergeAssistantMessages to concatenate content.
|
||||
expect(assistantMessages).toHaveLength(1)
|
||||
})
|
||||
|
||||
test('thinking + text response yields exactly one AssistantMessage', async () => {
|
||||
_nextEvents = [
|
||||
makeMessageStart(),
|
||||
makeContentBlockStart(0, 'thinking'),
|
||||
makeThinkingDelta(0, 'let me think'),
|
||||
makeContentBlockStop(0),
|
||||
makeContentBlockStart(1, 'text'),
|
||||
makeTextDelta(1, 'answer'),
|
||||
makeContentBlockStop(1),
|
||||
makeMessageDelta('end_turn', 30),
|
||||
makeMessageStop(),
|
||||
]
|
||||
|
||||
const { assistantMessages } = await runQueryModel(_nextEvents)
|
||||
|
||||
expect(assistantMessages).toHaveLength(1)
|
||||
})
|
||||
|
||||
test('safety fallback path still yields message when stream ends without message_stop', async () => {
|
||||
// Simulates a stream that cuts off without the normal termination sequence.
|
||||
_nextEvents = [
|
||||
makeMessageStart(),
|
||||
makeContentBlockStart(0, 'text'),
|
||||
makeTextDelta(0, 'abrupt end'),
|
||||
// No content_block_stop, no message_delta, no message_stop
|
||||
]
|
||||
|
||||
const { assistantMessages } = await runQueryModel(_nextEvents)
|
||||
|
||||
expect(assistantMessages).toHaveLength(1)
|
||||
})
|
||||
})
|
||||
|
||||
describe('queryModelOpenAI — stream_events forwarded', () => {
|
||||
test('every adapted event is also yielded as stream_event for real-time display', async () => {
|
||||
_nextEvents = [
|
||||
makeMessageStart(),
|
||||
makeContentBlockStart(0, 'text'),
|
||||
makeTextDelta(0, 'hello'),
|
||||
makeContentBlockStop(0),
|
||||
makeMessageDelta('end_turn', 5),
|
||||
makeMessageStop(),
|
||||
]
|
||||
|
||||
const { streamEvents } = await runQueryModel(_nextEvents)
|
||||
|
||||
const eventTypes = streamEvents.map(e => (e as any).event?.type)
|
||||
expect(eventTypes).toContain('message_start')
|
||||
expect(eventTypes).toContain('content_block_start')
|
||||
expect(eventTypes).toContain('content_block_delta')
|
||||
expect(eventTypes).toContain('content_block_stop')
|
||||
expect(eventTypes).toContain('message_delta')
|
||||
expect(eventTypes).toContain('message_stop')
|
||||
})
|
||||
})
|
||||
|
||||
describe('queryModelOpenAI — max_tokens forwarded to request', () => {
|
||||
test('buildOpenAIRequestBody includes max_tokens in the request payload', async () => {
|
||||
_nextEvents = [
|
||||
makeMessageStart(),
|
||||
makeContentBlockStart(0, 'text'),
|
||||
makeTextDelta(0, 'hi'),
|
||||
makeContentBlockStop(0),
|
||||
makeMessageDelta('end_turn', 5),
|
||||
makeMessageStop(),
|
||||
]
|
||||
|
||||
await runQueryModel(_nextEvents)
|
||||
|
||||
expect(_lastCreateArgs).not.toBeNull()
|
||||
expect(_lastCreateArgs!.max_tokens).toBe(8192)
|
||||
})
|
||||
})
|
||||
@@ -1,6 +1,21 @@
|
||||
import { describe, expect, test } from 'bun:test'
|
||||
import { adaptOpenAIStreamToAnthropic } from '../streamAdapter.js'
|
||||
import type { ChatCompletionChunk } from 'openai/resources/chat/completions/completions.mjs'
|
||||
import { join, dirname } from 'path'
|
||||
import { fileURLToPath } from 'url'
|
||||
import { readFileSync, writeFileSync, mkdirSync } from 'fs'
|
||||
import { tmpdir } from 'os'
|
||||
|
||||
// Guard against mock pollution from queryModelOpenAI.test.ts which replaces
|
||||
// ../streamAdapter.js process-wide via mock.module (bun has no un-mock API).
|
||||
// We copy the source to a unique temp path so the import bypasses bun's
|
||||
// module mock cache completely.
|
||||
const _testDir = dirname(fileURLToPath(import.meta.url))
|
||||
const _realSource = readFileSync(join(_testDir, '..', 'streamAdapter.ts'), 'utf-8')
|
||||
const _tempDir = join(tmpdir(), `stream-adapter-test-${Date.now()}`)
|
||||
mkdirSync(_tempDir, { recursive: true })
|
||||
const _tempFile = join(_tempDir, 'streamAdapter.ts')
|
||||
writeFileSync(_tempFile, _realSource, 'utf-8')
|
||||
const { adaptOpenAIStreamToAnthropic } = await import(_tempFile)
|
||||
|
||||
/** Helper to create a mock async iterable from chunk array */
|
||||
function mockStream(chunks: ChatCompletionChunk[]): AsyncIterable<ChatCompletionChunk> {
|
||||
@@ -31,6 +46,11 @@ function makeChunk(overrides: Partial<ChatCompletionChunk> & any = {}): ChatComp
|
||||
|
||||
/** Collect all emitted Anthropic events from the stream adapter for assertion */
|
||||
async function collectEvents(chunks: ChatCompletionChunk[]) {
|
||||
const realModuleUrl = new URL(
|
||||
`../streamAdapter.js?real=${Date.now()}-${Math.random().toString(36).slice(2)}`,
|
||||
import.meta.url,
|
||||
).href
|
||||
const { adaptOpenAIStreamToAnthropic } = await import(realModuleUrl)
|
||||
const events: any[] = []
|
||||
for await (const event of adaptOpenAIStreamToAnthropic(mockStream(chunks), 'gpt-4o')) {
|
||||
events.push(event)
|
||||
|
||||
@@ -8,6 +8,7 @@ import {
|
||||
} from '../utils/messages.js'
|
||||
import { getSmallFastModel } from '../utils/model/model.js'
|
||||
import { asSystemPrompt } from '../utils/systemPromptType.js'
|
||||
import { getResolvedLanguage } from '../utils/language.js'
|
||||
import { queryModelWithoutStreaming } from './api/claude.js'
|
||||
import { getSessionMemoryContent } from './SessionMemory/sessionMemoryUtils.js'
|
||||
|
||||
@@ -15,11 +16,18 @@ import { getSessionMemoryContent } from './SessionMemory/sessionMemoryUtils.js'
|
||||
// large sessions. 30 messages ≈ ~15 exchanges, plenty for "where we left off."
|
||||
const RECENT_MESSAGE_WINDOW = 30
|
||||
|
||||
const PROMPT_EN =
|
||||
'The user stepped away and is coming back. Write exactly 1-3 short sentences. Start by stating the high-level task — what they are building or debugging, not implementation details. Next: the concrete next step. Skip status reports and commit recaps.'
|
||||
|
||||
const PROMPT_ZH =
|
||||
'用户离开后回来了。用中文写 1-3 句话。先说明用户在做什么(高层目标,不是实现细节),然后说明下一步具体操作。不要写状态报告或提交总结。'
|
||||
|
||||
function buildAwaySummaryPrompt(memory: string | null): string {
|
||||
const memoryBlock = memory
|
||||
? `Session memory (broader context):\n${memory}\n\n`
|
||||
: ''
|
||||
return `${memoryBlock}The user stepped away and is coming back. Write exactly 1-3 short sentences. Start by stating the high-level task — what they are building or debugging, not implementation details. Next: the concrete next step. Skip status reports and commit recaps.`
|
||||
const prompt = getResolvedLanguage() === 'zh' ? PROMPT_ZH : PROMPT_EN
|
||||
return `${memoryBlock}${prompt}`
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
702
src/services/langfuse/__tests__/langfuse.isolated.ts
Normal file
702
src/services/langfuse/__tests__/langfuse.isolated.ts
Normal file
@@ -0,0 +1,702 @@
|
||||
import { mock, describe, test, expect, beforeEach } from 'bun:test'
|
||||
|
||||
// Mock @langfuse/otel before any imports
|
||||
const mockForceFlush = mock(() => Promise.resolve())
|
||||
const mockShutdown = mock(() => Promise.resolve())
|
||||
|
||||
mock.module('@langfuse/otel', () => ({
|
||||
LangfuseSpanProcessor: class MockLangfuseSpanProcessor {
|
||||
forceFlush = mockForceFlush
|
||||
shutdown = mockShutdown
|
||||
onStart = mock(() => {})
|
||||
onEnd = mock(() => {})
|
||||
},
|
||||
}))
|
||||
|
||||
// Mock @opentelemetry/sdk-trace-base
|
||||
mock.module('@opentelemetry/sdk-trace-base', () => ({
|
||||
BasicTracerProvider: class MockBasicTracerProvider {
|
||||
constructor(_opts?: unknown) {}
|
||||
},
|
||||
}))
|
||||
|
||||
// Mock @langfuse/tracing
|
||||
const mockChildUpdate = mock(() => {})
|
||||
const mockChildEnd = mock(() => {})
|
||||
const mockRootUpdate = mock(() => {})
|
||||
const mockRootEnd = mock(() => {})
|
||||
|
||||
// Mock LangfuseOtelSpanAttributes (re-exported from @langfuse/core)
|
||||
const mockLangfuseOtelSpanAttributes: Record<string, string> = {
|
||||
TRACE_SESSION_ID: 'session.id',
|
||||
TRACE_USER_ID: 'user.id',
|
||||
OBSERVATION_TYPE: 'observation.type',
|
||||
OBSERVATION_INPUT: 'observation.input',
|
||||
OBSERVATION_OUTPUT: 'observation.output',
|
||||
OBSERVATION_MODEL: 'observation.model',
|
||||
OBSERVATION_COMPLETION_START_TIME: 'observation.completionStartTime',
|
||||
OBSERVATION_USAGE_DETAILS: 'observation.usageDetails',
|
||||
}
|
||||
|
||||
const mockSpanContext = {
|
||||
traceId: 'test-trace-id',
|
||||
spanId: 'test-span-id',
|
||||
traceFlags: 1,
|
||||
}
|
||||
const mockSetAttribute = mock(() => {})
|
||||
|
||||
// Child observation mock (returned by startObservation for tools/generations)
|
||||
const mockStartObservation = mock(() => ({
|
||||
id: 'test-span-id',
|
||||
traceId: 'test-trace-id',
|
||||
type: 'span',
|
||||
otelSpan: {
|
||||
spanContext: () => mockSpanContext,
|
||||
setAttribute: mockSetAttribute,
|
||||
},
|
||||
update: mockRootUpdate,
|
||||
end: mockRootEnd,
|
||||
}))
|
||||
const mockSetLangfuseTracerProvider = mock(() => {})
|
||||
|
||||
mock.module('@langfuse/tracing', () => ({
|
||||
startObservation: mockStartObservation,
|
||||
LangfuseOtelSpanAttributes: mockLangfuseOtelSpanAttributes,
|
||||
propagateAttributes: mock((_params: unknown, fn?: () => void) => fn?.()),
|
||||
setLangfuseTracerProvider: mockSetLangfuseTracerProvider,
|
||||
}))
|
||||
|
||||
// Mock debug logger
|
||||
mock.module('src/utils/debug.js', () => ({
|
||||
logForDebugging: mock(() => {}),
|
||||
logAntError: mock(() => {}),
|
||||
isDebugToStdErr: () => false,
|
||||
isDebugMode: () => false,
|
||||
getDebugLogPath: () => '/tmp/debug.log',
|
||||
}))
|
||||
|
||||
// Mock user module to avoid heavy dependency chain (execa, config, cwd, env, etc.)
|
||||
mock.module('src/utils/user.js', () => ({
|
||||
getCoreUserData: () => ({
|
||||
email: 'test@example.com',
|
||||
deviceId: 'test-device',
|
||||
}),
|
||||
getUserDataForLogging: () => ({}),
|
||||
}))
|
||||
|
||||
describe('Langfuse integration', () => {
|
||||
beforeEach(() => {
|
||||
// Reset env
|
||||
process.env.HOME = '/Users/testuser'
|
||||
delete process.env.LANGFUSE_PUBLIC_KEY
|
||||
delete process.env.LANGFUSE_SECRET_KEY
|
||||
delete process.env.LANGFUSE_BASE_URL
|
||||
delete process.env.LANGFUSE_USER_ID
|
||||
mockStartObservation.mockClear()
|
||||
mockRootUpdate.mockClear()
|
||||
mockRootEnd.mockClear()
|
||||
mockForceFlush.mockClear()
|
||||
mockShutdown.mockClear()
|
||||
mockSetAttribute.mockClear()
|
||||
})
|
||||
|
||||
// ── sanitize tests ──────────────────────────────────────────────────────────
|
||||
|
||||
describe('sanitizeToolInput', () => {
|
||||
test('replaces home dir in file_path', async () => {
|
||||
const { sanitizeToolInput } = await import('../sanitize.js')
|
||||
const home = process.env.HOME ?? '/Users/testuser'
|
||||
const result = sanitizeToolInput('FileReadTool', {
|
||||
file_path: `${home}/project/file.ts`,
|
||||
}) as Record<string, string>
|
||||
expect(result.file_path).toBe('~/project/file.ts')
|
||||
})
|
||||
|
||||
test('redacts sensitive keys', async () => {
|
||||
const { sanitizeToolInput } = await import('../sanitize.js')
|
||||
const result = sanitizeToolInput('MCPTool', {
|
||||
api_key: 'secret123',
|
||||
token: 'abc',
|
||||
}) as Record<string, string>
|
||||
expect(result.api_key).toBe('[REDACTED]')
|
||||
expect(result.token).toBe('[REDACTED]')
|
||||
})
|
||||
|
||||
test('returns non-object input unchanged', async () => {
|
||||
const { sanitizeToolInput } = await import('../sanitize.js')
|
||||
expect(sanitizeToolInput('BashTool', 'raw string')).toBe('raw string')
|
||||
expect(sanitizeToolInput('BashTool', null)).toBe(null)
|
||||
})
|
||||
})
|
||||
|
||||
describe('sanitizeToolOutput', () => {
|
||||
test('redacts FileReadTool output', async () => {
|
||||
const { sanitizeToolOutput } = await import('../sanitize.js')
|
||||
const result = sanitizeToolOutput('FileReadTool', 'file content here')
|
||||
expect(result).toBe('[file content redacted, 17 chars]')
|
||||
})
|
||||
|
||||
test('redacts FileWriteTool output', async () => {
|
||||
const { sanitizeToolOutput } = await import('../sanitize.js')
|
||||
const result = sanitizeToolOutput('FileWriteTool', 'written content')
|
||||
expect(result).toBe('[file content redacted, 15 chars]')
|
||||
})
|
||||
|
||||
test('truncates BashTool output over 500 chars', async () => {
|
||||
const { sanitizeToolOutput } = await import('../sanitize.js')
|
||||
const longOutput = 'x'.repeat(600)
|
||||
const result = sanitizeToolOutput('BashTool', longOutput)
|
||||
expect(result).toContain('[truncated]')
|
||||
expect(result.length).toBeLessThan(600)
|
||||
})
|
||||
|
||||
test('does not truncate BashTool output under 500 chars', async () => {
|
||||
const { sanitizeToolOutput } = await import('../sanitize.js')
|
||||
const shortOutput = 'hello world'
|
||||
expect(sanitizeToolOutput('BashTool', shortOutput)).toBe('hello world')
|
||||
})
|
||||
|
||||
test('redacts ConfigTool output', async () => {
|
||||
const { sanitizeToolOutput } = await import('../sanitize.js')
|
||||
const result = sanitizeToolOutput('ConfigTool', 'config data')
|
||||
expect(result).toBe('[ConfigTool output redacted, 11 chars]')
|
||||
})
|
||||
|
||||
test('redacts MCPTool output', async () => {
|
||||
const { sanitizeToolOutput } = await import('../sanitize.js')
|
||||
const result = sanitizeToolOutput('MCPTool', 'mcp data')
|
||||
expect(result).toBe('[MCPTool output redacted, 8 chars]')
|
||||
})
|
||||
})
|
||||
|
||||
describe('sanitizeGlobal', () => {
|
||||
test('replaces home dir in strings', async () => {
|
||||
const { sanitizeGlobal } = await import('../sanitize.js')
|
||||
const home = process.env.HOME ?? '/Users/testuser'
|
||||
expect(sanitizeGlobal(`path: ${home}/file`)).toBe('path: ~/file')
|
||||
})
|
||||
|
||||
test('recursively sanitizes nested objects', async () => {
|
||||
const { sanitizeGlobal } = await import('../sanitize.js')
|
||||
const result = sanitizeGlobal({
|
||||
nested: { api_key: 'secret', name: 'test' },
|
||||
}) as Record<string, Record<string, string>>
|
||||
expect(result.nested.api_key).toBe('[REDACTED]')
|
||||
expect(result.nested.name).toBe('test')
|
||||
})
|
||||
|
||||
test('returns non-string/object values unchanged', async () => {
|
||||
const { sanitizeGlobal } = await import('../sanitize.js')
|
||||
expect(sanitizeGlobal(42)).toBe(42)
|
||||
expect(sanitizeGlobal(true)).toBe(true)
|
||||
})
|
||||
})
|
||||
|
||||
// ── client tests ────────────────────────────────────────────────────────────
|
||||
|
||||
describe('isLangfuseEnabled', () => {
|
||||
test('returns false when keys not configured', async () => {
|
||||
const { isLangfuseEnabled } = await import('../client.js')
|
||||
expect(isLangfuseEnabled()).toBe(false)
|
||||
})
|
||||
|
||||
test('returns true when both keys are set', async () => {
|
||||
process.env.LANGFUSE_PUBLIC_KEY = 'pk-test'
|
||||
process.env.LANGFUSE_SECRET_KEY = 'sk-test'
|
||||
const { isLangfuseEnabled } = await import('../client.js')
|
||||
expect(isLangfuseEnabled()).toBe(true)
|
||||
})
|
||||
})
|
||||
|
||||
describe('initLangfuse', () => {
|
||||
test('returns false when keys not configured', async () => {
|
||||
const { initLangfuse } = await import('../client.js')
|
||||
expect(initLangfuse()).toBe(false)
|
||||
})
|
||||
|
||||
test('returns true when keys are configured', async () => {
|
||||
process.env.LANGFUSE_PUBLIC_KEY = 'pk-test'
|
||||
process.env.LANGFUSE_SECRET_KEY = 'sk-test'
|
||||
const { isLangfuseEnabled } = await import('../client.js')
|
||||
expect(isLangfuseEnabled()).toBe(true)
|
||||
})
|
||||
|
||||
test('is idempotent — multiple calls do not re-initialize', async () => {
|
||||
const { initLangfuse } = await import('../client.js')
|
||||
expect(() => {
|
||||
initLangfuse()
|
||||
initLangfuse()
|
||||
}).not.toThrow()
|
||||
})
|
||||
})
|
||||
|
||||
describe('shutdownLangfuse', () => {
|
||||
test('calls forceFlush and shutdown on processor', async () => {
|
||||
const { shutdownLangfuse } = await import('../client.js')
|
||||
await expect(shutdownLangfuse()).resolves.toBeUndefined()
|
||||
})
|
||||
})
|
||||
|
||||
// ── tracing tests ───────────────────────────────────────────────────────────
|
||||
|
||||
describe('createTrace', () => {
|
||||
test('returns null when langfuse not enabled', async () => {
|
||||
const { createTrace } = await import('../tracing.js')
|
||||
const span = createTrace({
|
||||
sessionId: 's1',
|
||||
model: 'claude-3',
|
||||
provider: 'firstParty',
|
||||
})
|
||||
expect(span).toBeNull()
|
||||
})
|
||||
|
||||
test('creates root span when enabled', async () => {
|
||||
process.env.LANGFUSE_PUBLIC_KEY = 'pk-test'
|
||||
process.env.LANGFUSE_SECRET_KEY = 'sk-test'
|
||||
const { createTrace } = await import('../tracing.js')
|
||||
const span = createTrace({
|
||||
sessionId: 's1',
|
||||
model: 'claude-3',
|
||||
provider: 'firstParty',
|
||||
input: [],
|
||||
})
|
||||
expect(span).not.toBeNull()
|
||||
expect(mockStartObservation).toHaveBeenCalledWith(
|
||||
'agent-run',
|
||||
expect.objectContaining({
|
||||
metadata: expect.objectContaining({
|
||||
provider: 'firstParty',
|
||||
model: 'claude-3',
|
||||
agentType: 'main',
|
||||
}),
|
||||
}),
|
||||
{ asType: 'agent' },
|
||||
)
|
||||
// Should set session.id attribute
|
||||
expect(mockSetAttribute).toHaveBeenCalledWith('session.id', 's1')
|
||||
})
|
||||
})
|
||||
|
||||
describe('recordLLMObservation', () => {
|
||||
test('no-ops when rootSpan is null', async () => {
|
||||
const { recordLLMObservation } = await import('../tracing.js')
|
||||
recordLLMObservation(null, {
|
||||
model: 'm',
|
||||
provider: 'firstParty',
|
||||
input: [],
|
||||
output: [],
|
||||
usage: { input_tokens: 10, output_tokens: 5 },
|
||||
})
|
||||
expect(mockStartObservation).toHaveBeenCalledTimes(0)
|
||||
})
|
||||
|
||||
test('records generation child observation via global startObservation', async () => {
|
||||
process.env.LANGFUSE_PUBLIC_KEY = 'pk-test'
|
||||
process.env.LANGFUSE_SECRET_KEY = 'sk-test'
|
||||
const { createTrace, recordLLMObservation } = await import(
|
||||
'../tracing.js'
|
||||
)
|
||||
const span = createTrace({
|
||||
sessionId: 's1',
|
||||
model: 'claude-3',
|
||||
provider: 'firstParty',
|
||||
})
|
||||
mockStartObservation.mockClear()
|
||||
mockRootUpdate.mockClear()
|
||||
mockRootEnd.mockClear()
|
||||
recordLLMObservation(span, {
|
||||
model: 'claude-3',
|
||||
provider: 'firstParty',
|
||||
input: [{ role: 'user', content: 'hello' }],
|
||||
output: [{ role: 'assistant', content: 'hi' }],
|
||||
usage: { input_tokens: 10, output_tokens: 5 },
|
||||
})
|
||||
// Should call the global startObservation with asType: 'generation' and parentSpanContext
|
||||
expect(mockStartObservation).toHaveBeenCalledWith(
|
||||
'ChatAnthropic',
|
||||
expect.objectContaining({
|
||||
model: 'claude-3',
|
||||
}),
|
||||
expect.objectContaining({
|
||||
asType: 'generation',
|
||||
parentSpanContext: mockSpanContext,
|
||||
}),
|
||||
)
|
||||
expect(mockRootUpdate).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
usageDetails: { input: 10, output: 5 },
|
||||
}),
|
||||
)
|
||||
expect(mockRootEnd).toHaveBeenCalled()
|
||||
})
|
||||
})
|
||||
|
||||
describe('recordToolObservation', () => {
|
||||
test('no-ops when rootSpan is null', async () => {
|
||||
const { recordToolObservation } = await import('../tracing.js')
|
||||
recordToolObservation(null, {
|
||||
toolName: 'BashTool',
|
||||
toolUseId: 'id1',
|
||||
input: {},
|
||||
output: 'out',
|
||||
})
|
||||
})
|
||||
|
||||
test('records tool child observation via global startObservation', async () => {
|
||||
process.env.LANGFUSE_PUBLIC_KEY = 'pk-test'
|
||||
process.env.LANGFUSE_SECRET_KEY = 'sk-test'
|
||||
const { createTrace, recordToolObservation } = await import(
|
||||
'../tracing.js'
|
||||
)
|
||||
const span = createTrace({
|
||||
sessionId: 's1',
|
||||
model: 'claude-3',
|
||||
provider: 'firstParty',
|
||||
})
|
||||
mockStartObservation.mockClear()
|
||||
mockRootUpdate.mockClear()
|
||||
mockRootEnd.mockClear()
|
||||
recordToolObservation(span, {
|
||||
toolName: 'BashTool',
|
||||
toolUseId: 'tu-1',
|
||||
input: { command: 'ls' },
|
||||
output: 'file.ts',
|
||||
})
|
||||
// Should call the global startObservation with asType: 'tool' and parentSpanContext
|
||||
expect(mockStartObservation).toHaveBeenCalledWith(
|
||||
'BashTool',
|
||||
expect.objectContaining({
|
||||
input: expect.any(Object),
|
||||
}),
|
||||
expect.objectContaining({
|
||||
asType: 'tool',
|
||||
parentSpanContext: mockSpanContext,
|
||||
}),
|
||||
)
|
||||
expect(mockRootUpdate).toHaveBeenCalled()
|
||||
expect(mockRootEnd).toHaveBeenCalled()
|
||||
})
|
||||
|
||||
test('passes startTime to global startObservation', async () => {
|
||||
process.env.LANGFUSE_PUBLIC_KEY = 'pk-test'
|
||||
process.env.LANGFUSE_SECRET_KEY = 'sk-test'
|
||||
const { createTrace, recordToolObservation } = await import(
|
||||
'../tracing.js'
|
||||
)
|
||||
const span = createTrace({
|
||||
sessionId: 's1',
|
||||
model: 'claude-3',
|
||||
provider: 'firstParty',
|
||||
})
|
||||
mockStartObservation.mockClear()
|
||||
const startTime = new Date('2026-01-01T00:00:00Z')
|
||||
recordToolObservation(span, {
|
||||
toolName: 'BashTool',
|
||||
toolUseId: 'tu-2',
|
||||
input: {},
|
||||
output: 'out',
|
||||
startTime,
|
||||
})
|
||||
expect(mockStartObservation).toHaveBeenCalledWith(
|
||||
'BashTool',
|
||||
expect.any(Object),
|
||||
expect.objectContaining({
|
||||
startTime,
|
||||
parentSpanContext: mockSpanContext,
|
||||
}),
|
||||
)
|
||||
})
|
||||
|
||||
test('sanitizes FileReadTool output', async () => {
|
||||
process.env.LANGFUSE_PUBLIC_KEY = 'pk-test'
|
||||
process.env.LANGFUSE_SECRET_KEY = 'sk-test'
|
||||
const { createTrace, recordToolObservation } = await import(
|
||||
'../tracing.js'
|
||||
)
|
||||
const span = createTrace({
|
||||
sessionId: 's1',
|
||||
model: 'claude-3',
|
||||
provider: 'firstParty',
|
||||
})
|
||||
mockRootUpdate.mockClear()
|
||||
recordToolObservation(span, {
|
||||
toolName: 'FileReadTool',
|
||||
toolUseId: 'tu-2',
|
||||
input: { file_path: '/tmp/file.ts' },
|
||||
output: 'file content here',
|
||||
})
|
||||
expect(mockRootUpdate).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
output: '[file content redacted, 17 chars]',
|
||||
}),
|
||||
)
|
||||
})
|
||||
|
||||
test('sets ERROR level for error observations', async () => {
|
||||
process.env.LANGFUSE_PUBLIC_KEY = 'pk-test'
|
||||
process.env.LANGFUSE_SECRET_KEY = 'sk-test'
|
||||
const { createTrace, recordToolObservation } = await import(
|
||||
'../tracing.js'
|
||||
)
|
||||
const span = createTrace({
|
||||
sessionId: 's1',
|
||||
model: 'claude-3',
|
||||
provider: 'firstParty',
|
||||
})
|
||||
mockRootUpdate.mockClear()
|
||||
recordToolObservation(span, {
|
||||
toolName: 'BashTool',
|
||||
toolUseId: 'tu-3',
|
||||
input: {},
|
||||
output: 'error occurred',
|
||||
isError: true,
|
||||
})
|
||||
expect(mockRootUpdate).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ level: 'ERROR' }),
|
||||
)
|
||||
})
|
||||
})
|
||||
|
||||
describe('endTrace', () => {
|
||||
test('no-ops when rootSpan is null', async () => {
|
||||
const { endTrace } = await import('../tracing.js')
|
||||
endTrace(null)
|
||||
expect(mockRootEnd).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
test('calls span.end()', async () => {
|
||||
process.env.LANGFUSE_PUBLIC_KEY = 'pk-test'
|
||||
process.env.LANGFUSE_SECRET_KEY = 'sk-test'
|
||||
const { createTrace, endTrace } = await import('../tracing.js')
|
||||
const span = createTrace({
|
||||
sessionId: 's1',
|
||||
model: 'claude-3',
|
||||
provider: 'firstParty',
|
||||
})
|
||||
mockRootEnd.mockClear()
|
||||
endTrace(span)
|
||||
expect(mockRootEnd).toHaveBeenCalled()
|
||||
})
|
||||
|
||||
test('calls span.update() with output when provided', async () => {
|
||||
process.env.LANGFUSE_PUBLIC_KEY = 'pk-test'
|
||||
process.env.LANGFUSE_SECRET_KEY = 'sk-test'
|
||||
const { createTrace, endTrace } = await import('../tracing.js')
|
||||
const span = createTrace({
|
||||
sessionId: 's1',
|
||||
model: 'claude-3',
|
||||
provider: 'firstParty',
|
||||
})
|
||||
mockRootUpdate.mockClear()
|
||||
mockRootEnd.mockClear()
|
||||
endTrace(span, 'final output')
|
||||
expect(mockRootUpdate).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ output: 'final output' }),
|
||||
)
|
||||
expect(mockRootEnd).toHaveBeenCalled()
|
||||
})
|
||||
})
|
||||
|
||||
describe('createSubagentTrace', () => {
|
||||
test('returns null when langfuse not enabled', async () => {
|
||||
const { createSubagentTrace } = await import('../tracing.js')
|
||||
const span = createSubagentTrace({
|
||||
sessionId: 's1',
|
||||
agentType: 'Explore',
|
||||
agentId: 'agent-1',
|
||||
model: 'claude-3',
|
||||
provider: 'firstParty',
|
||||
})
|
||||
expect(span).toBeNull()
|
||||
})
|
||||
|
||||
test('creates trace with agentType and agentId metadata', async () => {
|
||||
process.env.LANGFUSE_PUBLIC_KEY = 'pk-test'
|
||||
process.env.LANGFUSE_SECRET_KEY = 'sk-test'
|
||||
const { createSubagentTrace } = await import('../tracing.js')
|
||||
const span = createSubagentTrace({
|
||||
sessionId: 's1',
|
||||
agentType: 'Explore',
|
||||
agentId: 'agent-1',
|
||||
model: 'claude-3',
|
||||
provider: 'firstParty',
|
||||
input: [{ role: 'user', content: 'search for X' }],
|
||||
})
|
||||
expect(span).not.toBeNull()
|
||||
expect(mockStartObservation).toHaveBeenCalledWith(
|
||||
'agent:Explore',
|
||||
expect.objectContaining({
|
||||
metadata: expect.objectContaining({
|
||||
agentType: 'Explore',
|
||||
agentId: 'agent-1',
|
||||
provider: 'firstParty',
|
||||
model: 'claude-3',
|
||||
}),
|
||||
}),
|
||||
{ asType: 'agent' },
|
||||
)
|
||||
// Verify session.id attribute is set
|
||||
expect(mockSetAttribute).toHaveBeenCalledWith('session.id', 's1')
|
||||
})
|
||||
|
||||
test('returns null on SDK error', async () => {
|
||||
process.env.LANGFUSE_PUBLIC_KEY = 'pk-test'
|
||||
process.env.LANGFUSE_SECRET_KEY = 'sk-test'
|
||||
mockStartObservation.mockImplementationOnce(() => {
|
||||
throw new Error('SDK error')
|
||||
})
|
||||
const { createSubagentTrace } = await import('../tracing.js')
|
||||
const span = createSubagentTrace({
|
||||
sessionId: 's1',
|
||||
agentType: 'Plan',
|
||||
agentId: 'agent-2',
|
||||
model: 'claude-3',
|
||||
provider: 'firstParty',
|
||||
})
|
||||
expect(span).toBeNull()
|
||||
})
|
||||
})
|
||||
|
||||
describe('createTrace with querySource', () => {
|
||||
test('includes querySource in metadata', async () => {
|
||||
process.env.LANGFUSE_PUBLIC_KEY = 'pk-test'
|
||||
process.env.LANGFUSE_SECRET_KEY = 'sk-test'
|
||||
const { createTrace } = await import('../tracing.js')
|
||||
const span = createTrace({
|
||||
sessionId: 's1',
|
||||
model: 'claude-3',
|
||||
provider: 'firstParty',
|
||||
querySource: 'user',
|
||||
})
|
||||
expect(span).not.toBeNull()
|
||||
expect(mockStartObservation).toHaveBeenCalledWith(
|
||||
'agent-run:user',
|
||||
expect.objectContaining({
|
||||
metadata: expect.objectContaining({
|
||||
agentType: 'main',
|
||||
querySource: 'user',
|
||||
}),
|
||||
}),
|
||||
{ asType: 'agent' },
|
||||
)
|
||||
})
|
||||
|
||||
test('omits querySource when not provided', async () => {
|
||||
process.env.LANGFUSE_PUBLIC_KEY = 'pk-test'
|
||||
process.env.LANGFUSE_SECRET_KEY = 'sk-test'
|
||||
mockStartObservation.mockClear()
|
||||
const { createTrace } = await import('../tracing.js')
|
||||
createTrace({
|
||||
sessionId: 's1',
|
||||
model: 'claude-3',
|
||||
provider: 'firstParty',
|
||||
})
|
||||
const calls = mockStartObservation.mock.calls as unknown[][]
|
||||
const secondArg = calls[0]?.[1] as Record<string, unknown> | undefined
|
||||
const metadata = (secondArg?.metadata ?? {}) as Record<string, unknown>
|
||||
expect(metadata).not.toHaveProperty('querySource')
|
||||
})
|
||||
})
|
||||
|
||||
describe('nested agent scenario', () => {
|
||||
test('sub-agent trace shares sessionId with parent', async () => {
|
||||
process.env.LANGFUSE_PUBLIC_KEY = 'pk-test'
|
||||
process.env.LANGFUSE_SECRET_KEY = 'sk-test'
|
||||
const { createTrace, createSubagentTrace } = await import('../tracing.js')
|
||||
mockSetAttribute.mockClear()
|
||||
|
||||
// Create parent trace
|
||||
const parentSpan = createTrace({
|
||||
sessionId: 'shared-session',
|
||||
model: 'claude-3',
|
||||
provider: 'firstParty',
|
||||
})
|
||||
|
||||
// Create sub-agent trace with same sessionId
|
||||
const subSpan = createSubagentTrace({
|
||||
sessionId: 'shared-session',
|
||||
agentType: 'Explore',
|
||||
agentId: 'agent-explore-1',
|
||||
model: 'claude-3',
|
||||
provider: 'firstParty',
|
||||
})
|
||||
|
||||
expect(parentSpan).not.toBeNull()
|
||||
expect(subSpan).not.toBeNull()
|
||||
|
||||
// Both should have set session.id attribute
|
||||
const sessionAttributeCalls = mockSetAttribute.mock.calls.filter(
|
||||
(call: unknown[]) =>
|
||||
Array.isArray(call) &&
|
||||
call[0] === 'session.id' &&
|
||||
call[1] === 'shared-session',
|
||||
)
|
||||
expect(sessionAttributeCalls.length).toBeGreaterThanOrEqual(2)
|
||||
})
|
||||
|
||||
test('query reuses passed langfuseTrace instead of creating new one', async () => {
|
||||
process.env.LANGFUSE_PUBLIC_KEY = 'pk-test'
|
||||
process.env.LANGFUSE_SECRET_KEY = 'sk-test'
|
||||
const { createSubagentTrace } = await import('../tracing.js')
|
||||
|
||||
const subTrace = createSubagentTrace({
|
||||
sessionId: 's1',
|
||||
agentType: 'Explore',
|
||||
agentId: 'agent-1',
|
||||
model: 'claude-3',
|
||||
provider: 'firstParty',
|
||||
})
|
||||
expect(subTrace).not.toBeNull()
|
||||
|
||||
// Simulate query.ts logic: if langfuseTrace already set, don't create new one
|
||||
const ownsTrace = false
|
||||
const langfuseTrace = subTrace
|
||||
|
||||
expect(ownsTrace).toBe(false)
|
||||
expect(langfuseTrace).toBe(subTrace)
|
||||
})
|
||||
})
|
||||
|
||||
describe('SDK exceptions do not affect main flow', () => {
|
||||
test('createTrace returns null on SDK error', async () => {
|
||||
process.env.LANGFUSE_PUBLIC_KEY = 'pk-test'
|
||||
process.env.LANGFUSE_SECRET_KEY = 'sk-test'
|
||||
mockStartObservation.mockImplementationOnce(() => {
|
||||
throw new Error('SDK error')
|
||||
})
|
||||
const { createTrace } = await import('../tracing.js')
|
||||
const span = createTrace({
|
||||
sessionId: 's1',
|
||||
model: 'claude-3',
|
||||
provider: 'firstParty',
|
||||
})
|
||||
expect(span).toBeNull()
|
||||
})
|
||||
|
||||
test('recordLLMObservation silently fails on SDK error', async () => {
|
||||
process.env.LANGFUSE_PUBLIC_KEY = 'pk-test'
|
||||
process.env.LANGFUSE_SECRET_KEY = 'sk-test'
|
||||
const { createTrace, recordLLMObservation } = await import(
|
||||
'../tracing.js'
|
||||
)
|
||||
const span = createTrace({
|
||||
sessionId: 's1',
|
||||
model: 'claude-3',
|
||||
provider: 'firstParty',
|
||||
})
|
||||
// The next call to startObservation (for the generation) will throw
|
||||
mockStartObservation.mockImplementationOnce(() => {
|
||||
throw new Error('SDK error')
|
||||
})
|
||||
expect(() =>
|
||||
recordLLMObservation(span, {
|
||||
model: 'm',
|
||||
provider: 'firstParty',
|
||||
input: [],
|
||||
output: [],
|
||||
usage: { input_tokens: 1, output_tokens: 1 },
|
||||
}),
|
||||
).not.toThrow()
|
||||
})
|
||||
})
|
||||
})
|
||||
Reference in New Issue
Block a user