mirror of
https://github.com/claude-code-best/claude-code.git
synced 2026-06-22 08:15:53 +00:00
refactor: 简化/复用/防御 — 清理 PR #386 审计发现
简化 (S1, S2): - src/cli/print.ts: 抽出 dispatchHeadlessCronCommand 本地 helper,把 cron 三个入口(onFire / onFireTask agent / onFireTask 非-agent)共享的 「dedup-claim → input-close-recheck → onSuccess」管线集中到一处, 避免三个分支在「claim 与 dispatch 之间发生 inputClosed」的处理上漂移。 enqueueAndRun 再抽出来,使两个非-agent 分支共用一个 onSuccess 回调。 约 -55 行重复模板。 - src/utils/autonomyPersistence.ts: 新增 retainActiveFirst<T> 泛型 helper —— active 记录无条件保留(不参与 cap),inactive 按 timestamp desc 填满剩余预算;统一 selectPersistedAutonomyRuns / Flows 的两阶段 排序语义。 - src/utils/autonomyRuns.ts、autonomyFlows.ts: 改用 retainActiveFirst, 删掉重复的内联两阶段排序逻辑。 复用 (R1, review #8): - tests/mocks/file-system.ts: 新增 readTempFile / tempPathExists 两个 Bun.file 包装,补齐 Node fs.readFileSync / existsSync 在测试里的 Bun-only 等价物。 - src/utils/__tests__/autonomyRuns.test.ts: 把全部 Node fs/path 导入 (existsSync, readFileSync, mkdir, writeFile, path.join/resolve)替换为 tests/mocks/file-system 的共享 helper + node:path(带 node: 前缀)。 不再有 6 处 mkdir + writeFile 模板,统一用 writeTempFile(自带 mkdir-p)。 解决 review #8 (Major) 的 Bun-only 运行时契约违反。 防御 (D1, OOM 早期信号): - src/services/compact/postCompactCleanup.ts: 在 void import().then() 末尾 补 .catch(logError)。当前 attributionHooks 是 stub,但当真实现被恢复 且 sweepFileContentCache 抛错时,这个 .catch 阻止它变成 unhandled rejection(函数返回值是 void,调用者无从观察异步失败)。 - src/utils/autonomyRuns.ts: 给 active runs 加 100 条软上限 + 一次性 warn。selectPersistedAutonomyRuns 仍然永不淘汰 active 记录,但跨过 阈值时 logError 一次,作为 finalize-leak 早期信号——避免 active 无限 增长悄悄使 AUTONOMY_RUNS_MAX 失效。
This commit is contained in:
156
src/cli/print.ts
156
src/cli/print.ts
@@ -2819,97 +2819,89 @@ function runHeadlessStreaming(
|
|||||||
let cronScheduler: import('../utils/cronScheduler.js').CronScheduler | null =
|
let cronScheduler: import('../utils/cronScheduler.js').CronScheduler | null =
|
||||||
null
|
null
|
||||||
if (cronGate.isKairosCronEnabled()) {
|
if (cronGate.isKairosCronEnabled()) {
|
||||||
|
// Shared dedup-claim → input-close-recheck → onSuccess pipeline for the
|
||||||
|
// three cron entry points (legacy onFire, onFireTask agent, onFireTask
|
||||||
|
// non-agent). Centralizing the cancel-on-late-shutdown contract here keeps
|
||||||
|
// the three branches from drifting on what happens between claim and
|
||||||
|
// dispatch. onSuccess receives the claimed QueuedCommand and decides
|
||||||
|
// whether to enqueue it (normal path) or mark the run failed (agent path).
|
||||||
|
const dispatchHeadlessCronCommand = (params: {
|
||||||
|
basePrompt: string
|
||||||
|
sourceId: string
|
||||||
|
sourceLabel: string
|
||||||
|
logSuffix: string
|
||||||
|
onSuccess: (command: QueuedCommand) => void | Promise<void>
|
||||||
|
}): void => {
|
||||||
|
if (inputClosed) return
|
||||||
|
void (async () => {
|
||||||
|
const command = await createAutonomyQueuedPromptIfNoActiveSource({
|
||||||
|
basePrompt: params.basePrompt,
|
||||||
|
trigger: 'scheduled-task',
|
||||||
|
currentDir: cwd(),
|
||||||
|
sourceId: params.sourceId,
|
||||||
|
sourceLabel: params.sourceLabel,
|
||||||
|
workload: WORKLOAD_CRON,
|
||||||
|
shouldCreate: () => !inputClosed,
|
||||||
|
})
|
||||||
|
if (!command) return
|
||||||
|
if (inputClosed) {
|
||||||
|
await cancelQueuedAutonomyCommands({ commands: [command] })
|
||||||
|
return
|
||||||
|
}
|
||||||
|
await params.onSuccess(command)
|
||||||
|
})().catch(error => {
|
||||||
|
logError(error)
|
||||||
|
logForDebugging(
|
||||||
|
`[ScheduledTasks] failed to enqueue headless task${params.logSuffix}: ${error}`,
|
||||||
|
{ level: 'error' },
|
||||||
|
)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
const enqueueAndRun = (command: QueuedCommand): void => {
|
||||||
|
enqueue({
|
||||||
|
...command,
|
||||||
|
uuid: randomUUID(),
|
||||||
|
})
|
||||||
|
void run()
|
||||||
|
}
|
||||||
|
|
||||||
cronScheduler = cronSchedulerModule.createCronScheduler({
|
cronScheduler = cronSchedulerModule.createCronScheduler({
|
||||||
onFire: prompt => {
|
onFire: prompt => {
|
||||||
if (inputClosed) return
|
// Legacy KAIROS-style entries: the prompt text is what uniquely
|
||||||
void (async () => {
|
// identifies the cron entry, so it doubles as both source id and
|
||||||
// Use the prompt itself as the dedup source: legacy KAIROS-style
|
// source label for dedup.
|
||||||
// cron entries fire the same prompt repeatedly, and without a
|
dispatchHeadlessCronCommand({
|
||||||
// dedicated task id the prompt text is what uniquely identifies
|
basePrompt: prompt,
|
||||||
// the entry. Without source-dedup, repeated fires would stack
|
sourceId: prompt,
|
||||||
// additional runs while an earlier one is still active. Match the
|
sourceLabel: prompt,
|
||||||
// onFireTask branch below to keep the two paths consistent.
|
logSuffix: '',
|
||||||
const command = await createAutonomyQueuedPromptIfNoActiveSource({
|
onSuccess: enqueueAndRun,
|
||||||
basePrompt: prompt,
|
|
||||||
trigger: 'scheduled-task',
|
|
||||||
currentDir: cwd(),
|
|
||||||
sourceId: prompt,
|
|
||||||
sourceLabel: prompt,
|
|
||||||
workload: WORKLOAD_CRON,
|
|
||||||
shouldCreate: () => !inputClosed,
|
|
||||||
})
|
|
||||||
if (!command) return
|
|
||||||
if (inputClosed) {
|
|
||||||
await cancelQueuedAutonomyCommands({ commands: [command] })
|
|
||||||
return
|
|
||||||
}
|
|
||||||
enqueue({
|
|
||||||
...command,
|
|
||||||
uuid: randomUUID(),
|
|
||||||
})
|
|
||||||
void run()
|
|
||||||
})().catch(error => {
|
|
||||||
logError(error)
|
|
||||||
logForDebugging(
|
|
||||||
`[ScheduledTasks] failed to enqueue headless task: ${error}`,
|
|
||||||
{
|
|
||||||
level: 'error',
|
|
||||||
},
|
|
||||||
)
|
|
||||||
})
|
})
|
||||||
},
|
},
|
||||||
onFireTask: task => {
|
onFireTask: task => {
|
||||||
if (inputClosed) return
|
if (task.agentId) {
|
||||||
void (async () => {
|
dispatchHeadlessCronCommand({
|
||||||
if (task.agentId) {
|
|
||||||
const command = await createAutonomyQueuedPromptIfNoActiveSource({
|
|
||||||
basePrompt: task.prompt,
|
|
||||||
trigger: 'scheduled-task',
|
|
||||||
currentDir: cwd(),
|
|
||||||
sourceId: task.id,
|
|
||||||
sourceLabel: task.prompt,
|
|
||||||
workload: WORKLOAD_CRON,
|
|
||||||
shouldCreate: () => !inputClosed,
|
|
||||||
})
|
|
||||||
if (!command) return
|
|
||||||
if (inputClosed) {
|
|
||||||
await cancelQueuedAutonomyCommands({ commands: [command] })
|
|
||||||
return
|
|
||||||
}
|
|
||||||
await markAutonomyRunFailed(
|
|
||||||
command.autonomy!.runId,
|
|
||||||
`No teammate runtime available for scheduled task owner ${task.agentId} in headless mode.`,
|
|
||||||
command.autonomy!.rootDir,
|
|
||||||
)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
const command = await createAutonomyQueuedPromptIfNoActiveSource({
|
|
||||||
basePrompt: task.prompt,
|
basePrompt: task.prompt,
|
||||||
trigger: 'scheduled-task',
|
|
||||||
currentDir: cwd(),
|
|
||||||
sourceId: task.id,
|
sourceId: task.id,
|
||||||
sourceLabel: task.prompt,
|
sourceLabel: task.prompt,
|
||||||
workload: WORKLOAD_CRON,
|
logSuffix: ` ${task.id}`,
|
||||||
shouldCreate: () => !inputClosed,
|
onSuccess: async command => {
|
||||||
})
|
await markAutonomyRunFailed(
|
||||||
if (!command) return
|
command.autonomy!.runId,
|
||||||
if (inputClosed) {
|
`No teammate runtime available for scheduled task owner ${task.agentId} in headless mode.`,
|
||||||
await cancelQueuedAutonomyCommands({ commands: [command] })
|
command.autonomy!.rootDir,
|
||||||
return
|
)
|
||||||
}
|
|
||||||
enqueue({
|
|
||||||
...command,
|
|
||||||
uuid: randomUUID(),
|
|
||||||
})
|
|
||||||
void run()
|
|
||||||
})().catch(error => {
|
|
||||||
logError(error)
|
|
||||||
logForDebugging(
|
|
||||||
`[ScheduledTasks] failed to enqueue headless task ${task.id}: ${error}`,
|
|
||||||
{
|
|
||||||
level: 'error',
|
|
||||||
},
|
},
|
||||||
)
|
})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
dispatchHeadlessCronCommand({
|
||||||
|
basePrompt: task.prompt,
|
||||||
|
sourceId: task.id,
|
||||||
|
sourceLabel: task.prompt,
|
||||||
|
logSuffix: ` ${task.id}`,
|
||||||
|
onSuccess: enqueueAndRun,
|
||||||
})
|
})
|
||||||
},
|
},
|
||||||
isLoading: () => running || inputClosed,
|
isLoading: () => running || inputClosed,
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import { getUserContext } from '../../context.js'
|
|||||||
import { clearSpeculativeChecks } from '@claude-code-best/builtin-tools/tools/BashTool/bashPermissions.js'
|
import { clearSpeculativeChecks } from '@claude-code-best/builtin-tools/tools/BashTool/bashPermissions.js'
|
||||||
import { clearClassifierApprovals } from '../../utils/classifierApprovals.js'
|
import { clearClassifierApprovals } from '../../utils/classifierApprovals.js'
|
||||||
import { resetGetMemoryFilesCache } from '../../utils/claudemd.js'
|
import { resetGetMemoryFilesCache } from '../../utils/claudemd.js'
|
||||||
|
import { logError } from '../../utils/log.js'
|
||||||
import { clearSessionMessagesCache } from '../../utils/sessionStorage.js'
|
import { clearSessionMessagesCache } from '../../utils/sessionStorage.js'
|
||||||
import { clearBetaTracingState } from '../../utils/telemetry/betaSessionTracing.js'
|
import { clearBetaTracingState } from '../../utils/telemetry/betaSessionTracing.js'
|
||||||
import { resetMicrocompactState } from './microCompact.js'
|
import { resetMicrocompactState } from './microCompact.js'
|
||||||
@@ -75,9 +76,16 @@ export function runPostCompactCleanup(querySource?: QuerySource): void {
|
|||||||
// (REPL post-compact handler, /compact command, autoCompact) finish their
|
// (REPL post-compact handler, /compact command, autoCompact) finish their
|
||||||
// own state transitions without an extra microtask round-trip — the sweep
|
// own state transitions without an extra microtask round-trip — the sweep
|
||||||
// catches up on the next event-loop tick.
|
// catches up on the next event-loop tick.
|
||||||
void import('../../utils/attributionHooks.js').then(m =>
|
//
|
||||||
m.sweepFileContentCache(),
|
// The .catch is required even though the current attributionHooks.ts is a
|
||||||
)
|
// no-op stub: without it, a future restored sweepFileContentCache that
|
||||||
|
// throws would surface as an unhandled promise rejection from a function
|
||||||
|
// whose synchronous signature gives callers no way to observe it.
|
||||||
|
void import('../../utils/attributionHooks.js')
|
||||||
|
.then(m => m.sweepFileContentCache())
|
||||||
|
.catch(error => {
|
||||||
|
logError(error)
|
||||||
|
})
|
||||||
}
|
}
|
||||||
clearSessionMessagesCache()
|
clearSessionMessagesCache()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,7 +1,5 @@
|
|||||||
import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
|
import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
|
||||||
import { existsSync, readFileSync } from 'fs'
|
import { join, resolve as resolvePath } from 'node:path'
|
||||||
import { mkdir, writeFile } from 'fs/promises'
|
|
||||||
import { join, resolve as resolvePath } from 'path'
|
|
||||||
import {
|
import {
|
||||||
resetStateForTests,
|
resetStateForTests,
|
||||||
setCwdState,
|
setCwdState,
|
||||||
@@ -42,11 +40,14 @@ import {
|
|||||||
cleanupTempDir,
|
cleanupTempDir,
|
||||||
createTempDir,
|
createTempDir,
|
||||||
createTempSubdir,
|
createTempSubdir,
|
||||||
|
readTempFile,
|
||||||
|
tempPathExists,
|
||||||
writeTempFile,
|
writeTempFile,
|
||||||
} from '../../../tests/mocks/file-system'
|
} from '../../../tests/mocks/file-system'
|
||||||
|
|
||||||
const AGENTS_REL = join(AUTONOMY_DIR, 'AGENTS.md')
|
const AGENTS_REL = join(AUTONOMY_DIR, 'AGENTS.md')
|
||||||
const HEARTBEAT_REL = join(AUTONOMY_DIR, 'HEARTBEAT.md')
|
const HEARTBEAT_REL = join(AUTONOMY_DIR, 'HEARTBEAT.md')
|
||||||
|
const RUNS_REL = join(AUTONOMY_DIR, 'runs.json')
|
||||||
|
|
||||||
let tempDir = ''
|
let tempDir = ''
|
||||||
|
|
||||||
@@ -349,9 +350,9 @@ describe('autonomyRuns', () => {
|
|||||||
)
|
)
|
||||||
|
|
||||||
// Seed an active queued run for cron-1 so the next dedup attempt skips.
|
// Seed an active queued run for cron-1 so the next dedup attempt skips.
|
||||||
await mkdir(join(tempDir, AUTONOMY_DIR), { recursive: true })
|
await writeTempFile(
|
||||||
await writeFile(
|
tempDir,
|
||||||
resolveAutonomyRunsPath(tempDir),
|
RUNS_REL,
|
||||||
`${JSON.stringify(
|
`${JSON.stringify(
|
||||||
{
|
{
|
||||||
runs: [
|
runs: [
|
||||||
@@ -373,7 +374,6 @@ describe('autonomyRuns', () => {
|
|||||||
null,
|
null,
|
||||||
2,
|
2,
|
||||||
)}\n`,
|
)}\n`,
|
||||||
'utf-8',
|
|
||||||
)
|
)
|
||||||
|
|
||||||
const skipped = await createAutonomyQueuedPromptIfNoActiveSource({
|
const skipped = await createAutonomyQueuedPromptIfNoActiveSource({
|
||||||
@@ -400,9 +400,9 @@ describe('autonomyRuns', () => {
|
|||||||
})
|
})
|
||||||
|
|
||||||
test('createAutonomyQueuedPromptIfNoActiveSource recovers stale active runs from dead owner processes', async () => {
|
test('createAutonomyQueuedPromptIfNoActiveSource recovers stale active runs from dead owner processes', async () => {
|
||||||
await mkdir(join(tempDir, AUTONOMY_DIR), { recursive: true })
|
await writeTempFile(
|
||||||
await writeFile(
|
tempDir,
|
||||||
resolveAutonomyRunsPath(tempDir),
|
RUNS_REL,
|
||||||
`${JSON.stringify(
|
`${JSON.stringify(
|
||||||
{
|
{
|
||||||
runs: [
|
runs: [
|
||||||
@@ -426,7 +426,6 @@ describe('autonomyRuns', () => {
|
|||||||
null,
|
null,
|
||||||
2,
|
2,
|
||||||
)}\n`,
|
)}\n`,
|
||||||
'utf-8',
|
|
||||||
)
|
)
|
||||||
|
|
||||||
await expect(
|
await expect(
|
||||||
@@ -483,7 +482,7 @@ describe('autonomyRuns', () => {
|
|||||||
await markAutonomyRunRunning(runId, tempDir, 100)
|
await markAutonomyRunRunning(runId, tempDir, 100)
|
||||||
|
|
||||||
const runsPath = resolveAutonomyRunsPath(tempDir)
|
const runsPath = resolveAutonomyRunsPath(tempDir)
|
||||||
const file = JSON.parse(readFileSync(runsPath, 'utf-8')) as {
|
const file = JSON.parse(await readTempFile(runsPath)) as {
|
||||||
runs: Array<Record<string, unknown>>
|
runs: Array<Record<string, unknown>>
|
||||||
}
|
}
|
||||||
file.runs = file.runs.map(run =>
|
file.runs = file.runs.map(run =>
|
||||||
@@ -491,7 +490,7 @@ describe('autonomyRuns', () => {
|
|||||||
? { ...run, ownerProcessId: 2_147_483_647 }
|
? { ...run, ownerProcessId: 2_147_483_647 }
|
||||||
: run,
|
: run,
|
||||||
)
|
)
|
||||||
await writeFile(runsPath, `${JSON.stringify(file, null, 2)}\n`, 'utf-8')
|
await writeTempFile(tempDir, RUNS_REL, `${JSON.stringify(file, null, 2)}\n`)
|
||||||
|
|
||||||
const replacement = await createAutonomyQueuedPromptIfNoActiveSource({
|
const replacement = await createAutonomyQueuedPromptIfNoActiveSource({
|
||||||
basePrompt: 'replacement prompt',
|
basePrompt: 'replacement prompt',
|
||||||
@@ -615,11 +614,10 @@ describe('autonomyRuns', () => {
|
|||||||
endedAt: 2_000 + index,
|
endedAt: 2_000 + index,
|
||||||
})),
|
})),
|
||||||
]
|
]
|
||||||
await mkdir(join(tempDir, AUTONOMY_DIR), { recursive: true })
|
await writeTempFile(
|
||||||
await writeFile(
|
tempDir,
|
||||||
resolveAutonomyRunsPath(tempDir),
|
RUNS_REL,
|
||||||
`${JSON.stringify({ runs }, null, 2)}\n`,
|
`${JSON.stringify({ runs }, null, 2)}\n`,
|
||||||
'utf-8',
|
|
||||||
)
|
)
|
||||||
|
|
||||||
await createAutonomyRun({
|
await createAutonomyRun({
|
||||||
@@ -637,10 +635,9 @@ describe('autonomyRuns', () => {
|
|||||||
})
|
})
|
||||||
|
|
||||||
test('listAutonomyRuns keeps older persisted records by normalizing missing runtime and owner metadata', async () => {
|
test('listAutonomyRuns keeps older persisted records by normalizing missing runtime and owner metadata', async () => {
|
||||||
const runsPath = resolveAutonomyRunsPath(tempDir)
|
await writeTempFile(
|
||||||
await mkdir(join(tempDir, '.claude', 'autonomy'), { recursive: true })
|
tempDir,
|
||||||
await writeFile(
|
RUNS_REL,
|
||||||
runsPath,
|
|
||||||
`${JSON.stringify(
|
`${JSON.stringify(
|
||||||
{
|
{
|
||||||
runs: [
|
runs: [
|
||||||
@@ -657,7 +654,6 @@ describe('autonomyRuns', () => {
|
|||||||
null,
|
null,
|
||||||
2,
|
2,
|
||||||
)}\n`,
|
)}\n`,
|
||||||
'utf-8',
|
|
||||||
)
|
)
|
||||||
|
|
||||||
const [legacy] = await listAutonomyRuns(tempDir)
|
const [legacy] = await listAutonomyRuns(tempDir)
|
||||||
@@ -832,7 +828,7 @@ describe('autonomyRuns', () => {
|
|||||||
expect(recovered!.autonomy?.flowId).toBe(flow!.flowId)
|
expect(recovered!.autonomy?.flowId).toBe(flow!.flowId)
|
||||||
})
|
})
|
||||||
|
|
||||||
test('STALE_ACTIVE_RUN_ERROR_PREFIX stays in sync with HEARTBEAT.md stale-recovery-health task', () => {
|
test('STALE_ACTIVE_RUN_ERROR_PREFIX stays in sync with HEARTBEAT.md stale-recovery-health task', async () => {
|
||||||
// The HEARTBEAT.md stale-recovery-health task prompt embeds this prefix
|
// The HEARTBEAT.md stale-recovery-health task prompt embeds this prefix
|
||||||
// as a literal string. Changing the constant without updating the
|
// as a literal string. Changing the constant without updating the
|
||||||
// heartbeat prompt would silently break the monitor — this test fails
|
// heartbeat prompt would silently break the monitor — this test fails
|
||||||
@@ -846,12 +842,12 @@ describe('autonomyRuns', () => {
|
|||||||
'autonomy',
|
'autonomy',
|
||||||
'HEARTBEAT.md',
|
'HEARTBEAT.md',
|
||||||
)
|
)
|
||||||
if (!existsSync(heartbeatPath)) {
|
if (!(await tempPathExists(heartbeatPath))) {
|
||||||
// .claude/ may be absent in some checkout layouts (e.g., shallow clone
|
// .claude/ may be absent in some checkout layouts (e.g., shallow clone
|
||||||
// for npm pack). Skip rather than fail in that case.
|
// for npm pack). Skip rather than fail in that case.
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
const content = readFileSync(heartbeatPath, 'utf8')
|
const content = await readTempFile(heartbeatPath)
|
||||||
expect(content).toContain(STALE_ACTIVE_RUN_ERROR_PREFIX)
|
expect(content).toContain(STALE_ACTIVE_RUN_ERROR_PREFIX)
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -3,7 +3,10 @@ import { mkdir, writeFile } from 'fs/promises'
|
|||||||
import { dirname, join, resolve } from 'path'
|
import { dirname, join, resolve } from 'path'
|
||||||
import { getProjectRoot } from '../bootstrap/state.js'
|
import { getProjectRoot } from '../bootstrap/state.js'
|
||||||
import { AUTONOMY_DIR, type AutonomyTriggerKind } from './autonomyAuthority.js'
|
import { AUTONOMY_DIR, type AutonomyTriggerKind } from './autonomyAuthority.js'
|
||||||
import { withAutonomyPersistenceLock } from './autonomyPersistence.js'
|
import {
|
||||||
|
retainActiveFirst,
|
||||||
|
withAutonomyPersistenceLock,
|
||||||
|
} from './autonomyPersistence.js'
|
||||||
import { getFsImplementation } from './fsOperations.js'
|
import { getFsImplementation } from './fsOperations.js'
|
||||||
|
|
||||||
const AUTONOMY_FLOWS_MAX = 100
|
const AUTONOMY_FLOWS_MAX = 100
|
||||||
@@ -170,26 +173,12 @@ function isManagedFlowStatusActive(status: AutonomyFlowStatus): boolean {
|
|||||||
function selectPersistedAutonomyFlows(
|
function selectPersistedAutonomyFlows(
|
||||||
flows: AutonomyFlowRecord[],
|
flows: AutonomyFlowRecord[],
|
||||||
): AutonomyFlowRecord[] {
|
): AutonomyFlowRecord[] {
|
||||||
// Two-phase sort. Phase 1: priority sort (active flows first, then by
|
return retainActiveFirst(
|
||||||
// updatedAt desc) selects the AUTONOMY_FLOWS_MAX most-relevant records to
|
flows.map(cloneFlowRecord),
|
||||||
// retain — active flows are guaranteed a slot before any inactive flow is
|
flow => isManagedFlowStatusActive(flow.status),
|
||||||
// considered. Phase 2: re-sort the retained slice by updatedAt desc only,
|
flow => flow.updatedAt,
|
||||||
// so the persisted file is in plain reverse-chronological order regardless
|
AUTONOMY_FLOWS_MAX,
|
||||||
// of activity status. Listings/UI consume the persisted order directly.
|
)
|
||||||
const retained = flows
|
|
||||||
.slice()
|
|
||||||
.map(cloneFlowRecord)
|
|
||||||
.sort((left, right) => {
|
|
||||||
const leftActive = isManagedFlowStatusActive(left.status)
|
|
||||||
const rightActive = isManagedFlowStatusActive(right.status)
|
|
||||||
if (leftActive !== rightActive) {
|
|
||||||
return leftActive ? -1 : 1
|
|
||||||
}
|
|
||||||
return right.updatedAt - left.updatedAt
|
|
||||||
})
|
|
||||||
.slice(0, AUTONOMY_FLOWS_MAX)
|
|
||||||
|
|
||||||
return retained.sort((left, right) => right.updatedAt - left.updatedAt)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function defaultFlowSource(params: {
|
function defaultFlowSource(params: {
|
||||||
|
|||||||
@@ -4,6 +4,33 @@ import { lock } from './lockfile.js'
|
|||||||
|
|
||||||
const persistenceLocks = new Map<string, Promise<void>>()
|
const persistenceLocks = new Map<string, Promise<void>>()
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Two-phase persistence retention. Active records (queued/running, etc.) are
|
||||||
|
* always kept — capping them risks evicting in-flight work; that responsibility
|
||||||
|
* lives in caller-side leak detection. Inactive (terminal) records are ranked
|
||||||
|
* by `getTimestamp` desc and capped to fill the remaining budget below `max`.
|
||||||
|
*
|
||||||
|
* Returned list is sorted by `getTimestamp` desc regardless of activity, so
|
||||||
|
* the persisted file is plain reverse-chronological order — listings/UI can
|
||||||
|
* consume it directly without re-sorting.
|
||||||
|
*/
|
||||||
|
export function retainActiveFirst<T>(
|
||||||
|
records: readonly T[],
|
||||||
|
isActive: (record: T) => boolean,
|
||||||
|
getTimestamp: (record: T) => number,
|
||||||
|
max: number,
|
||||||
|
): T[] {
|
||||||
|
const sortDesc = (left: T, right: T) =>
|
||||||
|
getTimestamp(right) - getTimestamp(left)
|
||||||
|
const active = records.filter(isActive).slice().sort(sortDesc)
|
||||||
|
const history = records
|
||||||
|
.filter(record => !isActive(record))
|
||||||
|
.slice()
|
||||||
|
.sort(sortDesc)
|
||||||
|
.slice(0, Math.max(0, max - active.length))
|
||||||
|
return [...active, ...history].sort(sortDesc)
|
||||||
|
}
|
||||||
|
|
||||||
export function getAutonomyPersistenceLockCountForTests(): number {
|
export function getAutonomyPersistenceLockCountForTests(): number {
|
||||||
if (process.env.NODE_ENV !== 'test') {
|
if (process.env.NODE_ENV !== 'test') {
|
||||||
throw new Error(
|
throw new Error(
|
||||||
|
|||||||
@@ -27,12 +27,22 @@ import {
|
|||||||
type AutonomyFlowSyncMode,
|
type AutonomyFlowSyncMode,
|
||||||
type ManagedAutonomyFlowStepDefinition,
|
type ManagedAutonomyFlowStepDefinition,
|
||||||
} from './autonomyFlows.js'
|
} from './autonomyFlows.js'
|
||||||
import { withAutonomyPersistenceLock } from './autonomyPersistence.js'
|
import {
|
||||||
|
retainActiveFirst,
|
||||||
|
withAutonomyPersistenceLock,
|
||||||
|
} from './autonomyPersistence.js'
|
||||||
import { getFsImplementation } from './fsOperations.js'
|
import { getFsImplementation } from './fsOperations.js'
|
||||||
import { isProcessRunning } from './genericProcessUtils.js'
|
import { isProcessRunning } from './genericProcessUtils.js'
|
||||||
import { logError } from './log.js'
|
import { logError } from './log.js'
|
||||||
|
|
||||||
const AUTONOMY_RUNS_MAX = 200
|
const AUTONOMY_RUNS_MAX = 200
|
||||||
|
// Diagnostic threshold for active (queued/running) runs. Active records are
|
||||||
|
// deliberately exempt from AUTONOMY_RUNS_MAX so a leak in finalization cannot
|
||||||
|
// silently evict in-flight work; that exemption only makes sense if a leak is
|
||||||
|
// loud when it appears. Crossing this threshold warns once per process so
|
||||||
|
// operators see the divergence in logs before runs.json grows pathologically.
|
||||||
|
const AUTONOMY_ACTIVE_RUNS_WARN_THRESHOLD = 100
|
||||||
|
let warnedActiveRunsThresholdCrossed = false
|
||||||
const AUTONOMY_RUNS_RELATIVE_PATH = join(AUTONOMY_DIR, 'runs.json')
|
const AUTONOMY_RUNS_RELATIVE_PATH = join(AUTONOMY_DIR, 'runs.json')
|
||||||
// Sentinel string surfaced to operators via runs.json error fields and
|
// Sentinel string surfaced to operators via runs.json error fields and
|
||||||
// referenced literally by the HEARTBEAT.md `stale-recovery-health` task.
|
// referenced literally by the HEARTBEAT.md `stale-recovery-health` task.
|
||||||
@@ -130,17 +140,24 @@ function isAutonomyRunActive(run: AutonomyRunRecord): boolean {
|
|||||||
function selectPersistedAutonomyRuns(
|
function selectPersistedAutonomyRuns(
|
||||||
runs: AutonomyRunRecord[],
|
runs: AutonomyRunRecord[],
|
||||||
): AutonomyRunRecord[] {
|
): AutonomyRunRecord[] {
|
||||||
const cloned = runs.slice().map(cloneRunRecord)
|
const cloned = runs.map(cloneRunRecord)
|
||||||
const active = cloned
|
const activeCount = cloned.filter(isAutonomyRunActive).length
|
||||||
.filter(isAutonomyRunActive)
|
if (
|
||||||
.sort((left, right) => right.createdAt - left.createdAt)
|
!warnedActiveRunsThresholdCrossed &&
|
||||||
const history = cloned
|
activeCount >= AUTONOMY_ACTIVE_RUNS_WARN_THRESHOLD
|
||||||
.filter(run => !isAutonomyRunActive(run))
|
) {
|
||||||
.sort((left, right) => right.createdAt - left.createdAt)
|
warnedActiveRunsThresholdCrossed = true
|
||||||
.slice(0, Math.max(0, AUTONOMY_RUNS_MAX - active.length))
|
logError(
|
||||||
|
new Error(
|
||||||
return [...active, ...history].sort(
|
`autonomy: ${activeCount} active runs exceed warn threshold ${AUTONOMY_ACTIVE_RUNS_WARN_THRESHOLD}; check for finalize leaks`,
|
||||||
(left, right) => right.createdAt - left.createdAt,
|
),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
return retainActiveFirst(
|
||||||
|
cloned,
|
||||||
|
isAutonomyRunActive,
|
||||||
|
run => run.createdAt,
|
||||||
|
AUTONOMY_RUNS_MAX,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -30,3 +30,21 @@ export async function createTempSubdir(
|
|||||||
await mkdir(path, { recursive: true })
|
await mkdir(path, { recursive: true })
|
||||||
return path
|
return path
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read a file under the test temp dir as utf-8 text. Mirrors the node:fs
|
||||||
|
* `readFileSync(path, 'utf-8')` ergonomics but uses Bun's native file API so
|
||||||
|
* tests stay on the Bun-only runtime contract.
|
||||||
|
*/
|
||||||
|
export async function readTempFile(path: string): Promise<string> {
|
||||||
|
return Bun.file(path).text()
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Best-effort existence check for a path under the test temp dir. Uses Bun's
|
||||||
|
* native file API (works for files; directories return true via Bun.file().exists()
|
||||||
|
* iff the path resolves — reads directly from the filesystem).
|
||||||
|
*/
|
||||||
|
export async function tempPathExists(path: string): Promise<boolean> {
|
||||||
|
return Bun.file(path).exists()
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user