mirror of
https://github.com/claude-code-best/claude-code.git
synced 2026-06-17 22:05:50 +00:00
refactor: 简化/复用/防御 — 清理 PR #386 审计发现
简化 (S1, S2): - src/cli/print.ts: 抽出 dispatchHeadlessCronCommand 本地 helper,把 cron 三个入口(onFire / onFireTask agent / onFireTask 非-agent)共享的 「dedup-claim → input-close-recheck → onSuccess」管线集中到一处, 避免三个分支在「claim 与 dispatch 之间发生 inputClosed」的处理上漂移。 enqueueAndRun 再抽出来,使两个非-agent 分支共用一个 onSuccess 回调。 约 -55 行重复模板。 - src/utils/autonomyPersistence.ts: 新增 retainActiveFirst<T> 泛型 helper —— active 记录无条件保留(不参与 cap),inactive 按 timestamp desc 填满剩余预算;统一 selectPersistedAutonomyRuns / Flows 的两阶段 排序语义。 - src/utils/autonomyRuns.ts、autonomyFlows.ts: 改用 retainActiveFirst, 删掉重复的内联两阶段排序逻辑。 复用 (R1, review #8): - tests/mocks/file-system.ts: 新增 readTempFile / tempPathExists 两个 Bun.file 包装,补齐 Node fs.readFileSync / existsSync 在测试里的 Bun-only 等价物。 - src/utils/__tests__/autonomyRuns.test.ts: 把全部 Node fs/path 导入 (existsSync, readFileSync, mkdir, writeFile, path.join/resolve)替换为 tests/mocks/file-system 的共享 helper + node:path(带 node: 前缀)。 不再有 6 处 mkdir + writeFile 模板,统一用 writeTempFile(自带 mkdir-p)。 解决 review #8 (Major) 的 Bun-only 运行时契约违反。 防御 (D1, OOM 早期信号): - src/services/compact/postCompactCleanup.ts: 在 void import().then() 末尾 补 .catch(logError)。当前 attributionHooks 是 stub,但当真实现被恢复 且 sweepFileContentCache 抛错时,这个 .catch 阻止它变成 unhandled rejection(函数返回值是 void,调用者无从观察异步失败)。 - src/utils/autonomyRuns.ts: 给 active runs 加 100 条软上限 + 一次性 warn。selectPersistedAutonomyRuns 仍然永不淘汰 active 记录,但跨过 阈值时 logError 一次,作为 finalize-leak 早期信号——避免 active 无限 增长悄悄使 AUTONOMY_RUNS_MAX 失效。
This commit is contained in:
@@ -1,7 +1,5 @@
|
||||
import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
|
||||
import { existsSync, readFileSync } from 'fs'
|
||||
import { mkdir, writeFile } from 'fs/promises'
|
||||
import { join, resolve as resolvePath } from 'path'
|
||||
import { join, resolve as resolvePath } from 'node:path'
|
||||
import {
|
||||
resetStateForTests,
|
||||
setCwdState,
|
||||
@@ -42,11 +40,14 @@ import {
|
||||
cleanupTempDir,
|
||||
createTempDir,
|
||||
createTempSubdir,
|
||||
readTempFile,
|
||||
tempPathExists,
|
||||
writeTempFile,
|
||||
} from '../../../tests/mocks/file-system'
|
||||
|
||||
const AGENTS_REL = join(AUTONOMY_DIR, 'AGENTS.md')
|
||||
const HEARTBEAT_REL = join(AUTONOMY_DIR, 'HEARTBEAT.md')
|
||||
const RUNS_REL = join(AUTONOMY_DIR, 'runs.json')
|
||||
|
||||
let tempDir = ''
|
||||
|
||||
@@ -349,9 +350,9 @@ describe('autonomyRuns', () => {
|
||||
)
|
||||
|
||||
// Seed an active queued run for cron-1 so the next dedup attempt skips.
|
||||
await mkdir(join(tempDir, AUTONOMY_DIR), { recursive: true })
|
||||
await writeFile(
|
||||
resolveAutonomyRunsPath(tempDir),
|
||||
await writeTempFile(
|
||||
tempDir,
|
||||
RUNS_REL,
|
||||
`${JSON.stringify(
|
||||
{
|
||||
runs: [
|
||||
@@ -373,7 +374,6 @@ describe('autonomyRuns', () => {
|
||||
null,
|
||||
2,
|
||||
)}\n`,
|
||||
'utf-8',
|
||||
)
|
||||
|
||||
const skipped = await createAutonomyQueuedPromptIfNoActiveSource({
|
||||
@@ -400,9 +400,9 @@ describe('autonomyRuns', () => {
|
||||
})
|
||||
|
||||
test('createAutonomyQueuedPromptIfNoActiveSource recovers stale active runs from dead owner processes', async () => {
|
||||
await mkdir(join(tempDir, AUTONOMY_DIR), { recursive: true })
|
||||
await writeFile(
|
||||
resolveAutonomyRunsPath(tempDir),
|
||||
await writeTempFile(
|
||||
tempDir,
|
||||
RUNS_REL,
|
||||
`${JSON.stringify(
|
||||
{
|
||||
runs: [
|
||||
@@ -426,7 +426,6 @@ describe('autonomyRuns', () => {
|
||||
null,
|
||||
2,
|
||||
)}\n`,
|
||||
'utf-8',
|
||||
)
|
||||
|
||||
await expect(
|
||||
@@ -483,7 +482,7 @@ describe('autonomyRuns', () => {
|
||||
await markAutonomyRunRunning(runId, tempDir, 100)
|
||||
|
||||
const runsPath = resolveAutonomyRunsPath(tempDir)
|
||||
const file = JSON.parse(readFileSync(runsPath, 'utf-8')) as {
|
||||
const file = JSON.parse(await readTempFile(runsPath)) as {
|
||||
runs: Array<Record<string, unknown>>
|
||||
}
|
||||
file.runs = file.runs.map(run =>
|
||||
@@ -491,7 +490,7 @@ describe('autonomyRuns', () => {
|
||||
? { ...run, ownerProcessId: 2_147_483_647 }
|
||||
: run,
|
||||
)
|
||||
await writeFile(runsPath, `${JSON.stringify(file, null, 2)}\n`, 'utf-8')
|
||||
await writeTempFile(tempDir, RUNS_REL, `${JSON.stringify(file, null, 2)}\n`)
|
||||
|
||||
const replacement = await createAutonomyQueuedPromptIfNoActiveSource({
|
||||
basePrompt: 'replacement prompt',
|
||||
@@ -615,11 +614,10 @@ describe('autonomyRuns', () => {
|
||||
endedAt: 2_000 + index,
|
||||
})),
|
||||
]
|
||||
await mkdir(join(tempDir, AUTONOMY_DIR), { recursive: true })
|
||||
await writeFile(
|
||||
resolveAutonomyRunsPath(tempDir),
|
||||
await writeTempFile(
|
||||
tempDir,
|
||||
RUNS_REL,
|
||||
`${JSON.stringify({ runs }, null, 2)}\n`,
|
||||
'utf-8',
|
||||
)
|
||||
|
||||
await createAutonomyRun({
|
||||
@@ -637,10 +635,9 @@ describe('autonomyRuns', () => {
|
||||
})
|
||||
|
||||
test('listAutonomyRuns keeps older persisted records by normalizing missing runtime and owner metadata', async () => {
|
||||
const runsPath = resolveAutonomyRunsPath(tempDir)
|
||||
await mkdir(join(tempDir, '.claude', 'autonomy'), { recursive: true })
|
||||
await writeFile(
|
||||
runsPath,
|
||||
await writeTempFile(
|
||||
tempDir,
|
||||
RUNS_REL,
|
||||
`${JSON.stringify(
|
||||
{
|
||||
runs: [
|
||||
@@ -657,7 +654,6 @@ describe('autonomyRuns', () => {
|
||||
null,
|
||||
2,
|
||||
)}\n`,
|
||||
'utf-8',
|
||||
)
|
||||
|
||||
const [legacy] = await listAutonomyRuns(tempDir)
|
||||
@@ -832,7 +828,7 @@ describe('autonomyRuns', () => {
|
||||
expect(recovered!.autonomy?.flowId).toBe(flow!.flowId)
|
||||
})
|
||||
|
||||
test('STALE_ACTIVE_RUN_ERROR_PREFIX stays in sync with HEARTBEAT.md stale-recovery-health task', () => {
|
||||
test('STALE_ACTIVE_RUN_ERROR_PREFIX stays in sync with HEARTBEAT.md stale-recovery-health task', async () => {
|
||||
// The HEARTBEAT.md stale-recovery-health task prompt embeds this prefix
|
||||
// as a literal string. Changing the constant without updating the
|
||||
// heartbeat prompt would silently break the monitor — this test fails
|
||||
@@ -846,12 +842,12 @@ describe('autonomyRuns', () => {
|
||||
'autonomy',
|
||||
'HEARTBEAT.md',
|
||||
)
|
||||
if (!existsSync(heartbeatPath)) {
|
||||
if (!(await tempPathExists(heartbeatPath))) {
|
||||
// .claude/ may be absent in some checkout layouts (e.g., shallow clone
|
||||
// for npm pack). Skip rather than fail in that case.
|
||||
return
|
||||
}
|
||||
const content = readFileSync(heartbeatPath, 'utf8')
|
||||
const content = await readTempFile(heartbeatPath)
|
||||
expect(content).toContain(STALE_ACTIVE_RUN_ERROR_PREFIX)
|
||||
})
|
||||
})
|
||||
|
||||
@@ -3,7 +3,10 @@ import { mkdir, writeFile } from 'fs/promises'
|
||||
import { dirname, join, resolve } from 'path'
|
||||
import { getProjectRoot } from '../bootstrap/state.js'
|
||||
import { AUTONOMY_DIR, type AutonomyTriggerKind } from './autonomyAuthority.js'
|
||||
import { withAutonomyPersistenceLock } from './autonomyPersistence.js'
|
||||
import {
|
||||
retainActiveFirst,
|
||||
withAutonomyPersistenceLock,
|
||||
} from './autonomyPersistence.js'
|
||||
import { getFsImplementation } from './fsOperations.js'
|
||||
|
||||
const AUTONOMY_FLOWS_MAX = 100
|
||||
@@ -170,26 +173,12 @@ function isManagedFlowStatusActive(status: AutonomyFlowStatus): boolean {
|
||||
function selectPersistedAutonomyFlows(
|
||||
flows: AutonomyFlowRecord[],
|
||||
): AutonomyFlowRecord[] {
|
||||
// Two-phase sort. Phase 1: priority sort (active flows first, then by
|
||||
// updatedAt desc) selects the AUTONOMY_FLOWS_MAX most-relevant records to
|
||||
// retain — active flows are guaranteed a slot before any inactive flow is
|
||||
// considered. Phase 2: re-sort the retained slice by updatedAt desc only,
|
||||
// so the persisted file is in plain reverse-chronological order regardless
|
||||
// of activity status. Listings/UI consume the persisted order directly.
|
||||
const retained = flows
|
||||
.slice()
|
||||
.map(cloneFlowRecord)
|
||||
.sort((left, right) => {
|
||||
const leftActive = isManagedFlowStatusActive(left.status)
|
||||
const rightActive = isManagedFlowStatusActive(right.status)
|
||||
if (leftActive !== rightActive) {
|
||||
return leftActive ? -1 : 1
|
||||
}
|
||||
return right.updatedAt - left.updatedAt
|
||||
})
|
||||
.slice(0, AUTONOMY_FLOWS_MAX)
|
||||
|
||||
return retained.sort((left, right) => right.updatedAt - left.updatedAt)
|
||||
return retainActiveFirst(
|
||||
flows.map(cloneFlowRecord),
|
||||
flow => isManagedFlowStatusActive(flow.status),
|
||||
flow => flow.updatedAt,
|
||||
AUTONOMY_FLOWS_MAX,
|
||||
)
|
||||
}
|
||||
|
||||
function defaultFlowSource(params: {
|
||||
|
||||
@@ -4,6 +4,33 @@ import { lock } from './lockfile.js'
|
||||
|
||||
const persistenceLocks = new Map<string, Promise<void>>()
|
||||
|
||||
/**
|
||||
* Two-phase persistence retention. Active records (queued/running, etc.) are
|
||||
* always kept — capping them risks evicting in-flight work; that responsibility
|
||||
* lives in caller-side leak detection. Inactive (terminal) records are ranked
|
||||
* by `getTimestamp` desc and capped to fill the remaining budget below `max`.
|
||||
*
|
||||
* Returned list is sorted by `getTimestamp` desc regardless of activity, so
|
||||
* the persisted file is plain reverse-chronological order — listings/UI can
|
||||
* consume it directly without re-sorting.
|
||||
*/
|
||||
export function retainActiveFirst<T>(
|
||||
records: readonly T[],
|
||||
isActive: (record: T) => boolean,
|
||||
getTimestamp: (record: T) => number,
|
||||
max: number,
|
||||
): T[] {
|
||||
const sortDesc = (left: T, right: T) =>
|
||||
getTimestamp(right) - getTimestamp(left)
|
||||
const active = records.filter(isActive).slice().sort(sortDesc)
|
||||
const history = records
|
||||
.filter(record => !isActive(record))
|
||||
.slice()
|
||||
.sort(sortDesc)
|
||||
.slice(0, Math.max(0, max - active.length))
|
||||
return [...active, ...history].sort(sortDesc)
|
||||
}
|
||||
|
||||
export function getAutonomyPersistenceLockCountForTests(): number {
|
||||
if (process.env.NODE_ENV !== 'test') {
|
||||
throw new Error(
|
||||
|
||||
@@ -27,12 +27,22 @@ import {
|
||||
type AutonomyFlowSyncMode,
|
||||
type ManagedAutonomyFlowStepDefinition,
|
||||
} from './autonomyFlows.js'
|
||||
import { withAutonomyPersistenceLock } from './autonomyPersistence.js'
|
||||
import {
|
||||
retainActiveFirst,
|
||||
withAutonomyPersistenceLock,
|
||||
} from './autonomyPersistence.js'
|
||||
import { getFsImplementation } from './fsOperations.js'
|
||||
import { isProcessRunning } from './genericProcessUtils.js'
|
||||
import { logError } from './log.js'
|
||||
|
||||
const AUTONOMY_RUNS_MAX = 200
|
||||
// Diagnostic threshold for active (queued/running) runs. Active records are
|
||||
// deliberately exempt from AUTONOMY_RUNS_MAX so a leak in finalization cannot
|
||||
// silently evict in-flight work; that exemption only makes sense if a leak is
|
||||
// loud when it appears. Crossing this threshold warns once per process so
|
||||
// operators see the divergence in logs before runs.json grows pathologically.
|
||||
const AUTONOMY_ACTIVE_RUNS_WARN_THRESHOLD = 100
|
||||
let warnedActiveRunsThresholdCrossed = false
|
||||
const AUTONOMY_RUNS_RELATIVE_PATH = join(AUTONOMY_DIR, 'runs.json')
|
||||
// Sentinel string surfaced to operators via runs.json error fields and
|
||||
// referenced literally by the HEARTBEAT.md `stale-recovery-health` task.
|
||||
@@ -130,17 +140,24 @@ function isAutonomyRunActive(run: AutonomyRunRecord): boolean {
|
||||
function selectPersistedAutonomyRuns(
|
||||
runs: AutonomyRunRecord[],
|
||||
): AutonomyRunRecord[] {
|
||||
const cloned = runs.slice().map(cloneRunRecord)
|
||||
const active = cloned
|
||||
.filter(isAutonomyRunActive)
|
||||
.sort((left, right) => right.createdAt - left.createdAt)
|
||||
const history = cloned
|
||||
.filter(run => !isAutonomyRunActive(run))
|
||||
.sort((left, right) => right.createdAt - left.createdAt)
|
||||
.slice(0, Math.max(0, AUTONOMY_RUNS_MAX - active.length))
|
||||
|
||||
return [...active, ...history].sort(
|
||||
(left, right) => right.createdAt - left.createdAt,
|
||||
const cloned = runs.map(cloneRunRecord)
|
||||
const activeCount = cloned.filter(isAutonomyRunActive).length
|
||||
if (
|
||||
!warnedActiveRunsThresholdCrossed &&
|
||||
activeCount >= AUTONOMY_ACTIVE_RUNS_WARN_THRESHOLD
|
||||
) {
|
||||
warnedActiveRunsThresholdCrossed = true
|
||||
logError(
|
||||
new Error(
|
||||
`autonomy: ${activeCount} active runs exceed warn threshold ${AUTONOMY_ACTIVE_RUNS_WARN_THRESHOLD}; check for finalize leaks`,
|
||||
),
|
||||
)
|
||||
}
|
||||
return retainActiveFirst(
|
||||
cloned,
|
||||
isAutonomyRunActive,
|
||||
run => run.createdAt,
|
||||
AUTONOMY_RUNS_MAX,
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user