feat(workflow): run 终态落盘 state.json 支持跨重启恢复

终态 RunProgress(含 returnValue/error)此前只在内存 ProgressStore,进程
重启即丢失。本次让其落盘到 .claude/workflow-runs/<runId>/state.json,使
(a) 重启后可按 runId 取 return、(b) /workflows 面板跨重启展示历史 run。
跨进程 resume 明确不在范围。

- persistence.ts: getRunsDir/writeRunState/readRunState/listPersistedRuns
  + attachRunStatePersistence;原子覆盖写(tmp+rename),读容错(缺文件/
  损坏/schemaVersion 不符 → null),写 best-effort(IO 失败只 log warn)
- progress/store.ts: 加 hydrate(run) 直接注入磁盘 run(已存在 runId 跳过,
  内存优先)
- service.ts: getWorkflowService() 接线 attachRunStatePersistence(bus,
  store) 订阅 run_done(completed/failed/killed 三态共用,shutdown-kill
  也走同路径,无需额外钩子);WorkflowService 加 getRunAsync(id) 内存
  miss→读盘 fallback(不注入内存)+ loadPersistedRuns() 扫盘 hydrate
  (persistedLoaded flag 守护幂等)
- panel/WorkflowsPanel.tsx: mount 时调一次 loadPersistedRuns(重 mount
  不重复)
- ports.ts: runsDir 改用 getRunsDir() 消除拼接重复
- 测试: persistence.test.ts(11)/runStatePersistence.test.ts(5)/
  progressStore(2)/service(5)/WorkflowsPanel(1) 共 24 个新测试;
  precheck 5629 pass / 0 fail

设计偏离: 计划原写 monkey-patch getRunsDir 指向 tmpdir,Bun ESM namespace
不可变不可行;改用可选 runsDirProvider 参数(默认 getRunsDir)DI 注入,
加到 attachRunStatePersistence 与 makeService(cwdOverride 之后第 4 参),
与现有 cwdOverride 模式一致。makeService 的 cwdOverride 保持不变,不破坏
inline 持久化特性。

Co-Authored-By: glm-5.2 <zai-org@claude-code-best.win>
This commit is contained in:
claude-code-best
2026-06-13 23:37:52 +08:00
parent 54d2bf6f12
commit b5ead59e72
10 changed files with 821 additions and 3 deletions

View File

@@ -15,6 +15,12 @@ import { getProjectRoot } from '../bootstrap/state.js'
import { logForDebugging } from '../utils/debug.js'
import { buildHostBundle, makeHostHandle } from './hostHandle.js'
import { installWorkflowNotifications } from './notifications.js'
import {
attachRunStatePersistence,
getRunsDir,
listPersistedRuns,
readRunState,
} from './persistence.js'
import { createProgressBus } from './progress/bus.js'
import {
createProgressStoreFromBus,
@@ -59,6 +65,16 @@ export type WorkflowService = {
shutdown(): void
listRuns(): RunProgress[]
getRun(runId: string): RunProgress | undefined
/**
* 异步按 runId 查内存命中则返回miss 读盘 state.json不注入内存
* 供"按 runId 取历史 return"场景;面板展示请走 loadPersistedRuns + listRuns。
*/
getRunAsync(runId: string): Promise<RunProgress | undefined>
/**
* 扫盘把所有历史 run 的 state.json hydrate 进 store已存在 runId 跳过)。
* 进程单例内仅实际扫盘一次persistedLoaded flag重复调用立即返回。
*/
loadPersistedRuns(): Promise<void>
subscribe(listener: () => void): () => void
listNamed(workflowDir?: string): Promise<string[]>
}
@@ -72,6 +88,9 @@ export function getWorkflowService(): WorkflowService {
const store = createProgressStoreFromBus(bus)
const ports = createWorkflowPorts({ bus, store })
const service = makeService(ports, store)
// 订阅 run_done 写终态快照到磁盘completed/failed/killed 三态共用入口shutdown-kill 也走此路径)。
// store 先于本订阅注册到 bus故 listener 执行时 store.get(runId) 已是终态。
attachRunStatePersistence(bus, store)
// 安装状态变更通知桥接commit 0768d4dc 承诺但旧实现落空的"完成时自动通知"
installWorkflowNotifications(service)
cached = service
@@ -83,11 +102,15 @@ export function getWorkflowService(): WorkflowService {
*
* 生产路径用 {@link getWorkflowService};测试用本函数直接注入 fake ports
* 避免触碰真实的 getProjectRoot/getCwd/analytics 等模块级副作用。
*
* @param cwdOverride 仅供测试注入临时目录(避免 inline 持久化写真实项目目录)。
* @param runsDirProvider 仅供测试注入 tmpdirBun ESM 模块命名空间只读,无法 monkey-patch getRunsDir
*/
export function makeService(
ports: WorkflowPorts,
store: ProgressStore,
cwdOverride?: string,
runsDirProvider: () => string = getRunsDir,
): WorkflowService {
const buildHost = (
toolUseContext: ToolUseContext,
@@ -138,6 +161,10 @@ export function makeService(
throw new Error('必须提供 script、name 或 scriptPath 之一')
}
// loadPersistedRuns 的进程单例 flag首次调用后置 true后续重复调用立即返回。
// 扫盘失败时复位允许下次重试。每个 makeService 调用独立闭包变量(测试构造新 service 时重置)。
let persistedLoaded = false
return {
ports,
@@ -232,6 +259,25 @@ export function makeService(
listRuns: () => store.list(),
getRun: id => store.get(id),
async getRunAsync(id) {
const mem = store.get(id)
if (mem) return mem
return (await readRunState(runsDirProvider(), id)) ?? undefined
},
async loadPersistedRuns() {
if (persistedLoaded) return
persistedLoaded = true
try {
const runs = await listPersistedRuns(runsDirProvider())
for (const run of runs) store.hydrate(run)
} catch (e) {
// 扫盘失败不阻断面板log + 复位 flag 允许下次重试
logForDebugging(
`[workflow warn] loadPersistedRuns failed: ${(e as Error).message}`,
)
persistedLoaded = false
}
},
subscribe: fn => store.subscribe(fn),
async listNamed(workflowDir) {