feat(workflow): run 终态落盘 state.json 支持跨重启恢复

终态 RunProgress(含 returnValue/error)此前只在内存 ProgressStore,进程
重启即丢失。本次让其落盘到 .claude/workflow-runs/<runId>/state.json,使
(a) 重启后可按 runId 取 return、(b) /workflows 面板跨重启展示历史 run。
跨进程 resume 明确不在范围。

- persistence.ts: getRunsDir/writeRunState/readRunState/listPersistedRuns
  + attachRunStatePersistence;原子覆盖写(tmp+rename),读容错(缺文件/
  损坏/schemaVersion 不符 → null),写 best-effort(IO 失败只 log warn)
- progress/store.ts: 加 hydrate(run) 直接注入磁盘 run(已存在 runId 跳过,
  内存优先)
- service.ts: getWorkflowService() 接线 attachRunStatePersistence(bus,
  store) 订阅 run_done(completed/failed/killed 三态共用,shutdown-kill
  也走同路径,无需额外钩子);WorkflowService 加 getRunAsync(id) 内存
  miss→读盘 fallback(不注入内存)+ loadPersistedRuns() 扫盘 hydrate
  (persistedLoaded flag 守护幂等)
- panel/WorkflowsPanel.tsx: mount 时调一次 loadPersistedRuns(重 mount
  不重复)
- ports.ts: runsDir 改用 getRunsDir() 消除拼接重复
- 测试: persistence.test.ts(11)/runStatePersistence.test.ts(5)/
  progressStore(2)/service(5)/WorkflowsPanel(1) 共 24 个新测试;
  precheck 5629 pass / 0 fail

设计偏离: 计划原写 monkey-patch getRunsDir 指向 tmpdir,Bun ESM namespace
不可变不可行;改用可选 runsDirProvider 参数(默认 getRunsDir)DI 注入,
加到 attachRunStatePersistence 与 makeService(cwdOverride 之后第 4 参),
与现有 cwdOverride 模式一致。makeService 的 cwdOverride 保持不变,不破坏
inline 持久化特性。

Co-Authored-By: glm-5.2 <zai-org@claude-code-best.win>
This commit is contained in:
claude-code-best
2026-06-13 23:37:52 +08:00
parent 54d2bf6f12
commit b5ead59e72
10 changed files with 821 additions and 3 deletions

View File

@@ -0,0 +1,199 @@
import { expect, test } from 'bun:test'
import {
mkdir,
mkdtemp,
readFile,
readdir,
rm,
writeFile as fsWriteFile,
} from 'node:fs/promises'
import { tmpdir } from 'node:os'
import { join } from 'node:path'
import {
getRunsDir,
listPersistedRuns,
readRunState,
writeRunState,
} from '../persistence.js'
import type { RunProgress } from '../progress/store.js'
function makeRun(over: Partial<RunProgress> = {}): RunProgress {
return {
runId: 'r1',
workflowName: 'w',
status: 'completed',
phases: [],
declaredPhases: [],
currentPhase: null,
agents: [],
agentCount: 0,
startedAt: 1000,
updatedAt: 2000,
...over,
} as RunProgress
}
test('writeRunState → readRunState 往返一致returnValue 为对象)', async () => {
const dir = await mkdtemp(join(tmpdir(), 'wf-'))
try {
const run = makeRun({
returnValue: { confirmedCount: 2, items: ['a', 'b'] },
})
await writeRunState(dir, run)
const got = await readRunState(dir, 'r1')
expect(got).not.toBeNull()
expect(got!.runId).toBe('r1')
expect(got!.returnValue).toEqual({ confirmedCount: 2, items: ['a', 'b'] })
} finally {
await rm(dir, { recursive: true, force: true })
}
})
test('readRunState 缺文件 → null', async () => {
const dir = await mkdtemp(join(tmpdir(), 'wf-'))
try {
const got = await readRunState(dir, 'never-exists')
expect(got).toBeNull()
} finally {
await rm(dir, { recursive: true, force: true })
}
})
test('readRunState 损坏 JSON → null', async () => {
const dir = await mkdtemp(join(tmpdir(), 'wf-'))
try {
await mkdir(join(dir, 'rX'), { recursive: true })
await fsWriteFile(join(dir, 'rX', 'state.json'), '{not valid json', 'utf-8')
const got = await readRunState(dir, 'rX')
expect(got).toBeNull()
} finally {
await rm(dir, { recursive: true, force: true })
}
})
test('readRunState schemaVersion 不符 → null', async () => {
const dir = await mkdtemp(join(tmpdir(), 'wf-'))
try {
await mkdir(join(dir, 'rX'), { recursive: true })
await fsWriteFile(
join(dir, 'rX', 'state.json'),
JSON.stringify({ schemaVersion: 999, run: makeRun({ runId: 'rX' }) }),
'utf-8',
)
const got = await readRunState(dir, 'rX')
expect(got).toBeNull()
} finally {
await rm(dir, { recursive: true, force: true })
}
})
test('writeRunState 原子写:成功后无 tmp 残留', async () => {
const dir = await mkdtemp(join(tmpdir(), 'wf-'))
try {
await writeRunState(dir, makeRun({ runId: 'rAtom' }))
const sub = await readdir(join(dir, 'rAtom'))
expect(sub).toContain('state.json')
expect(sub).not.toContain('state.json.tmp')
} finally {
await rm(dir, { recursive: true, force: true })
}
})
test('listPersistedRuns 扫多子目录、跳过无 state.json 的目录、按 updatedAt 降序', async () => {
const dir = await mkdtemp(join(tmpdir(), 'wf-'))
try {
// 三个有效 run + 一个只有 journal 没 state.json 的半残目录
await writeRunState(dir, makeRun({ runId: 'old', updatedAt: 1000 }))
await writeRunState(dir, makeRun({ runId: 'mid', updatedAt: 2000 }))
await writeRunState(dir, makeRun({ runId: 'new', updatedAt: 3000 }))
await mkdir(join(dir, 'half-broken'), { recursive: true })
const runs = await listPersistedRuns(dir)
expect(runs.map(r => r.runId)).toEqual(['new', 'mid', 'old'])
} finally {
await rm(dir, { recursive: true, force: true })
}
})
test('listPersistedRuns 扫到损坏 state.json → 跳过该单个,继续扫其余', async () => {
const dir = await mkdtemp(join(tmpdir(), 'wf-'))
try {
await writeRunState(dir, makeRun({ runId: 'good' }))
await mkdir(join(dir, 'bad'), { recursive: true })
await fsWriteFile(join(dir, 'bad', 'state.json'), 'corrupt', 'utf-8')
const runs = await listPersistedRuns(dir)
expect(runs.map(r => r.runId)).toEqual(['good'])
} finally {
await rm(dir, { recursive: true, force: true })
}
})
test('writeRunState 不抛 returnValue 为 null/字符串/数组', async () => {
const dir = await mkdtemp(join(tmpdir(), 'wf-'))
try {
await writeRunState(dir, makeRun({ runId: 'n', returnValue: null }))
await writeRunState(dir, makeRun({ runId: 's', returnValue: 'text' }))
await writeRunState(dir, makeRun({ runId: 'a', returnValue: [1, 2, 3] }))
expect((await readRunState(dir, 'n'))!.returnValue).toBeNull()
expect((await readRunState(dir, 's'))!.returnValue).toBe('text')
expect((await readRunState(dir, 'a'))!.returnValue).toEqual([1, 2, 3])
} finally {
await rm(dir, { recursive: true, force: true })
}
})
test('writeRunState 覆盖写:同 runId 二次写覆盖旧内容', async () => {
const dir = await mkdtemp(join(tmpdir(), 'wf-'))
try {
await writeRunState(dir, makeRun({ runId: 'rOV', status: 'running' }))
await writeRunState(dir, makeRun({ runId: 'rOV', status: 'completed' }))
const got = await readRunState(dir, 'rOV')
expect(got!.status).toBe('completed')
} finally {
await rm(dir, { recursive: true, force: true })
}
})
test('writeRunState 写入完整 AgentProgress不含 output 内容,含 label/phase/token 等)', async () => {
const dir = await mkdtemp(join(tmpdir(), 'wf-'))
try {
const run = makeRun({
runId: 'rAg',
agents: [
{
id: 1,
label: 'review:hooks',
phase: 'Review',
status: 'done',
outputShape: 'object',
tokenCount: 12345,
toolCount: 3,
model: 'claude-sonnet-4-6',
},
],
agentCount: 1,
})
await writeRunState(dir, run)
const got = await readRunState(dir, 'rAg')
expect(got!.agents).toHaveLength(1)
expect(got!.agents[0]).toEqual({
id: 1,
label: 'review:hooks',
phase: 'Review',
status: 'done',
outputShape: 'object',
tokenCount: 12345,
toolCount: 3,
model: 'claude-sonnet-4-6',
})
} finally {
await rm(dir, { recursive: true, force: true })
}
})
test('getRunsDir 返回 <projectRoot>/.claude/workflow-runs 形态', () => {
const dir = getRunsDir()
// 不 hard-code projectRoot跨机器不同只校验后缀结构
expect(dir.endsWith(`${join('.claude', 'workflow-runs')}`)).toBe(true)
})