import { type ChildProcess } from 'child_process' import { resolve } from 'path' import { buildCliLaunch, spawnCli } from '../utils/cliLaunch.js' import { writeDaemonState, removeDaemonState, queryDaemonStatus, stopDaemonByPid, } from './state.js' /** * Exit code used by workers for permanent (non-retryable) failures. * @see workerRegistry.ts EXIT_CODE_PERMANENT */ const EXIT_CODE_PERMANENT = 78 /** * Backoff config for restarting crashed workers. */ const BACKOFF_INITIAL_MS = 2_000 const BACKOFF_CAP_MS = 120_000 const BACKOFF_MULTIPLIER = 2 const MAX_RAPID_FAILURES = 5 // Park worker after this many fast crashes interface WorkerState { kind: string process: ChildProcess | null backoffMs: number failureCount: number parked: boolean lastStartTime: number restartTimer: ReturnType | null } /** * Daemon supervisor entry point. Called from `cli.tsx` via: * `claude daemon [subcommand]` * * Manages the daemon supervisor AND background sessions under one namespace. * * Subcommands: * (none) — unified status (supervisor + sessions) * start — start the supervisor with default workers * stop — send SIGTERM to supervisor * status — unified status (supervisor + sessions) * ps — alias for status * bg — start a background session * attach — attach to a background session * logs — show session logs * kill — kill a session */ export async function daemonMain(args: string[]): Promise { const subcommand = args[0] || 'status' switch (subcommand) { // --- Supervisor management --- case 'start': await runSupervisor(args.slice(1)) break case 'stop': await handleDaemonStop() break // --- Unified status --- case 'status': case 'ps': await showUnifiedStatus() break // --- Session management (delegates to bg.ts) --- case 'bg': { const bg = await import('../cli/bg.js') await bg.handleBgStart(args.slice(1)) break } case 'attach': { const bg = await import('../cli/bg.js') await bg.attachHandler(args[1]) break } case 'logs': { const bg = await import('../cli/bg.js') await bg.logsHandler(args[1]) break } case 'kill': { const bg = await import('../cli/bg.js') await bg.killHandler(args[1]) break } case '--help': case '-h': case 'help': printHelp() break default: console.error(`Unknown daemon subcommand: ${subcommand}`) printHelp() process.exitCode = 1 } } function printHelp(): void { console.log(` Claude Code Daemon — background process management USAGE claude daemon [subcommand] SUBCOMMANDS status Show daemon and session status (default) start Start the daemon supervisor stop Stop the daemon bg Start a background session attach Attach to a background session logs Show session logs kill Kill a session help Show this help REPL /daemon [subcommand] Same commands available in interactive mode OPTIONS (for start) --dir Working directory (default: current) --spawn-mode Worker spawn mode: same-dir | worktree (default: same-dir) --capacity Max concurrent sessions per worker (default: 4) --permission-mode Permission mode for spawned sessions --sandbox Enable sandbox mode --name Session name -h, --help Show this help `) } /** * Show unified status: daemon supervisor + background sessions. */ async function showUnifiedStatus(): Promise { // 1. Daemon supervisor status const result = queryDaemonStatus() console.log('=== Daemon Supervisor ===') switch (result.status) { case 'running': { const s = result.state! console.log(` Status: running`) console.log(` PID: ${s.pid}`) console.log(` CWD: ${s.cwd}`) console.log(` Started: ${s.startedAt}`) console.log(` Workers: ${s.workerKinds.join(', ')}`) break } case 'stopped': console.log(' Status: stopped') break case 'stale': console.log(' Status: stale (cleaned up)') break } // 2. Background sessions console.log('\n=== Background Sessions ===') const bg = await import('../cli/bg.js') await bg.psHandler([]) } /** * Stop a running daemon from another CLI process. */ async function handleDaemonStop(): Promise { const result = queryDaemonStatus() if (result.status === 'stopped') { console.log('daemon is not running') return } if (result.status === 'stale') { console.log('daemon was stale (cleaned up)') return } console.log(`stopping daemon (PID: ${result.state!.pid})...`) const stopped = await stopDaemonByPid() if (stopped) { console.log('daemon stopped') } else { console.log('daemon could not be stopped (may have already exited)') } } /** * Parse supervisor arguments from CLI. */ function parseSupervisorArgs(args: string[]): Record { const result: Record = {} for (let i = 0; i < args.length; i++) { const arg = args[i]! if (arg === '--dir' && i + 1 < args.length) { result.dir = resolve(args[++i]!) } else if (arg.startsWith('--dir=')) { result.dir = resolve(arg.slice('--dir='.length)) } else if (arg === '--spawn-mode' && i + 1 < args.length) { result.spawnMode = args[++i]! } else if (arg.startsWith('--spawn-mode=')) { result.spawnMode = arg.slice('--spawn-mode='.length) } else if (arg === '--capacity' && i + 1 < args.length) { result.capacity = args[++i]! } else if (arg.startsWith('--capacity=')) { result.capacity = arg.slice('--capacity='.length) } else if (arg === '--permission-mode' && i + 1 < args.length) { result.permissionMode = args[++i]! } else if (arg.startsWith('--permission-mode=')) { result.permissionMode = arg.slice('--permission-mode='.length) } else if (arg === '--sandbox') { result.sandbox = '1' } else if (arg === '--name' && i + 1 < args.length) { result.name = args[++i]! } else if (arg.startsWith('--name=')) { result.name = arg.slice('--name='.length) } } return result } /** * Run the daemon supervisor loop. Spawns workers and restarts them * on crash with exponential backoff. */ async function runSupervisor(args: string[]): Promise { const config = parseSupervisorArgs(args) const dir = config.dir || resolve('.') console.log(`[daemon] supervisor starting in ${dir}`) const workers: WorkerState[] = [ { kind: 'remoteControl', process: null, backoffMs: BACKOFF_INITIAL_MS, failureCount: 0, parked: false, lastStartTime: 0, restartTimer: null, }, ] // Write daemon state file so other CLI processes can query/stop us writeDaemonState({ pid: process.pid, cwd: dir, startedAt: new Date().toISOString(), workerKinds: workers.map(w => w.kind), lastStatus: 'running', }) const controller = new AbortController() // Graceful shutdown const shutdown = () => { console.log('[daemon] supervisor shutting down...') controller.abort() removeDaemonState() for (const w of workers) { if (w.restartTimer) { clearTimeout(w.restartTimer) w.restartTimer = null } if (w.process && !w.process.killed) { w.process.kill('SIGTERM') } } } process.on('SIGTERM', shutdown) process.on('SIGINT', shutdown) // Spawn and supervise workers for (const worker of workers) { if (!controller.signal.aborted) { spawnWorker(worker, dir, config, controller.signal) } } // Wait for abort signal await new Promise(resolve => { if (controller.signal.aborted) { resolve() return } controller.signal.addEventListener('abort', () => resolve(), { once: true }) }) // Wait for all workers to exit await Promise.all( workers .filter(w => w.process && w.process.exitCode === null) .map( w => new Promise(resolve => { if (!w.process || w.process.exitCode !== null) { resolve() return } let killTimer: ReturnType | null = null w.process.on('exit', () => { if (killTimer) { clearTimeout(killTimer) killTimer = null } resolve() }) // Force kill after grace period killTimer = setTimeout(() => { if (w.process && w.process.exitCode === null) { w.process.kill('SIGKILL') } resolve() }, 30_000) killTimer.unref?.() }), ), ) console.log('[daemon] supervisor stopped') } /** * Spawn a worker child process with the appropriate env vars. */ function spawnWorker( worker: WorkerState, dir: string, config: Record, signal: AbortSignal, ): void { if (signal.aborted || worker.parked) return worker.lastStartTime = Date.now() const env: Record = { ...process.env, DAEMON_WORKER_DIR: dir, DAEMON_WORKER_NAME: config.name, DAEMON_WORKER_SPAWN_MODE: config.spawnMode || 'same-dir', DAEMON_WORKER_CAPACITY: config.capacity || '4', DAEMON_WORKER_PERMISSION: config.permissionMode, DAEMON_WORKER_SANDBOX: config.sandbox || '0', DAEMON_WORKER_CREATE_SESSION: '1', CLAUDE_CODE_SESSION_KIND: 'daemon-worker', } console.log(`[daemon] spawning worker '${worker.kind}'`) const launch = buildCliLaunch([`--daemon-worker=${worker.kind}`], { env }) const child = spawnCli(launch, { cwd: dir, stdio: ['ignore', 'pipe', 'pipe'], }) worker.process = child // Pipe worker stdout/stderr to supervisor with prefix child.stdout?.on('data', (data: Buffer) => { const lines = data.toString().trimEnd().split('\n') for (const line of lines) { console.log(` ${line}`) } }) child.stderr?.on('data', (data: Buffer) => { const lines = data.toString().trimEnd().split('\n') for (const line of lines) { console.error(` ${line}`) } }) child.on('exit', (code, sig) => { worker.process = null if (signal.aborted) { // Supervisor is shutting down, don't restart return } if (code === EXIT_CODE_PERMANENT) { console.error( `[daemon] worker '${worker.kind}' exited with permanent error — parking`, ) worker.parked = true return } // Check for rapid failure (crashed within 10s of starting) const runDuration = Date.now() - worker.lastStartTime if (runDuration < 10_000) { worker.failureCount++ if (worker.failureCount >= MAX_RAPID_FAILURES) { console.error( `[daemon] worker '${worker.kind}' failed ${worker.failureCount} times rapidly — parking`, ) worker.parked = true return } } else { // Ran for a reasonable time, reset failure count worker.failureCount = 0 worker.backoffMs = BACKOFF_INITIAL_MS } console.log( `[daemon] worker '${worker.kind}' exited (code=${code}, signal=${sig}), restarting in ${worker.backoffMs}ms`, ) worker.restartTimer = setTimeout(() => { worker.restartTimer = null if (!signal.aborted && !worker.parked) { spawnWorker(worker, dir, config, signal) } }, worker.backoffMs) worker.restartTimer.unref?.() // Exponential backoff worker.backoffMs = Math.min( worker.backoffMs * BACKOFF_MULTIPLIER, BACKOFF_CAP_MS, ) }) }