feat: restore daemon supervisor and remoteControlServer command (#170)

Reverse-engineer the missing daemon + remoteControlServer implementation
by tracing the call chain from existing code:

- src/daemon/main.ts: restore from stub to full supervisor (spawn/monitor
  workers, exponential backoff restart, graceful shutdown)
- src/daemon/workerRegistry.ts: restore from stub to worker dispatcher
  (remoteControl kind → runBridgeHeadless())
- src/commands/remoteControlServer/: new slash command /remote-control-server
  (alias /rcs) for managing the daemon from REPL
- build.ts + scripts/dev.ts: enable DAEMON feature flag

Both official CLI 2.1.92 and our codebase had the command registered in
commands.ts but the directory and daemon implementation were missing.
The bottom layer (runBridgeHeadless in bridgeMain.ts) was already complete.

Co-authored-by: unraid <local@unraid.local>
This commit is contained in:
Dosion
2026-04-07 15:36:29 +08:00
committed by GitHub
parent 4b44047931
commit 0c53796d15
7 changed files with 826 additions and 6 deletions

View File

@@ -1,3 +1,305 @@
// Auto-generated stub — replace with real implementation
export {};
export const daemonMain: (args: string[]) => Promise<void> = () => Promise.resolve();
import { spawn, type ChildProcess } from 'child_process'
import { resolve } from 'path'
import { errorMessage } from '../utils/errors.js'
/**
* Exit code used by workers for permanent (non-retryable) failures.
* @see workerRegistry.ts EXIT_CODE_PERMANENT
*/
const EXIT_CODE_PERMANENT = 78
/**
* Backoff config for restarting crashed workers.
*/
const BACKOFF_INITIAL_MS = 2_000
const BACKOFF_CAP_MS = 120_000
const BACKOFF_MULTIPLIER = 2
const MAX_RAPID_FAILURES = 5 // Park worker after this many fast crashes
interface WorkerState {
kind: string
process: ChildProcess | null
backoffMs: number
failureCount: number
parked: boolean
lastStartTime: number
}
/**
* Daemon supervisor entry point. Called from `cli.tsx` via:
* `claude daemon [subcommand]`
*
* Starts and supervises long-running workers. Currently spawns one
* `remoteControl` worker that runs the headless bridge server.
*
* Subcommands:
* (none) — start the supervisor with default workers
* start — same as no subcommand
* status — print worker status (TODO: IPC)
* stop — send SIGTERM to supervisor (TODO: PID file)
*/
export async function daemonMain(args: string[]): Promise<void> {
const subcommand = args[0] || 'start'
switch (subcommand) {
case 'start':
await runSupervisor(args.slice(1))
break
case 'status':
console.log('daemon status: not yet implemented (requires IPC)')
break
case 'stop':
console.log('daemon stop: not yet implemented (requires PID file)')
break
case '--help':
case '-h':
printHelp()
break
default:
console.error(`Unknown daemon subcommand: ${subcommand}`)
printHelp()
process.exitCode = 1
}
}
function printHelp(): void {
console.log(`
Claude Code Daemon — persistent background supervisor
USAGE
claude daemon [subcommand] [options]
SUBCOMMANDS
start Start the daemon supervisor (default)
status Show worker status
stop Stop the daemon
OPTIONS
--dir <path> Working directory (default: current)
--spawn-mode <mode> Worker spawn mode: same-dir | worktree (default: same-dir)
--capacity <N> Max concurrent sessions per worker (default: 4)
--permission-mode <mode> Permission mode for spawned sessions
--sandbox Enable sandbox mode
--name <name> Session name
-h, --help Show this help
`)
}
/**
* Parse supervisor arguments from CLI.
*/
function parseSupervisorArgs(args: string[]): Record<string, string> {
const result: Record<string, string> = {}
for (let i = 0; i < args.length; i++) {
const arg = args[i]!
if (arg === '--dir' && i + 1 < args.length) {
result.dir = resolve(args[++i]!)
} else if (arg.startsWith('--dir=')) {
result.dir = resolve(arg.slice('--dir='.length))
} else if (arg === '--spawn-mode' && i + 1 < args.length) {
result.spawnMode = args[++i]!
} else if (arg.startsWith('--spawn-mode=')) {
result.spawnMode = arg.slice('--spawn-mode='.length)
} else if (arg === '--capacity' && i + 1 < args.length) {
result.capacity = args[++i]!
} else if (arg.startsWith('--capacity=')) {
result.capacity = arg.slice('--capacity='.length)
} else if (arg === '--permission-mode' && i + 1 < args.length) {
result.permissionMode = args[++i]!
} else if (arg.startsWith('--permission-mode=')) {
result.permissionMode = arg.slice('--permission-mode='.length)
} else if (arg === '--sandbox') {
result.sandbox = '1'
} else if (arg === '--name' && i + 1 < args.length) {
result.name = args[++i]!
} else if (arg.startsWith('--name=')) {
result.name = arg.slice('--name='.length)
}
}
return result
}
/**
* Run the daemon supervisor loop. Spawns workers and restarts them
* on crash with exponential backoff.
*/
async function runSupervisor(args: string[]): Promise<void> {
const config = parseSupervisorArgs(args)
const dir = config.dir || resolve('.')
console.log(`[daemon] supervisor starting in ${dir}`)
const workers: WorkerState[] = [
{
kind: 'remoteControl',
process: null,
backoffMs: BACKOFF_INITIAL_MS,
failureCount: 0,
parked: false,
lastStartTime: 0,
},
]
const controller = new AbortController()
// Graceful shutdown
const shutdown = () => {
console.log('[daemon] supervisor shutting down...')
controller.abort()
for (const w of workers) {
if (w.process && !w.process.killed) {
w.process.kill('SIGTERM')
}
}
}
process.on('SIGTERM', shutdown)
process.on('SIGINT', shutdown)
// Spawn and supervise workers
for (const worker of workers) {
if (!controller.signal.aborted) {
spawnWorker(worker, dir, config, controller.signal)
}
}
// Wait for abort signal
await new Promise<void>(resolve => {
if (controller.signal.aborted) {
resolve()
return
}
controller.signal.addEventListener('abort', () => resolve(), { once: true })
})
// Wait for all workers to exit
await Promise.all(
workers
.filter(w => w.process && !w.process.killed)
.map(
w =>
new Promise<void>(resolve => {
if (!w.process) {
resolve()
return
}
w.process.on('exit', () => resolve())
// Force kill after grace period
setTimeout(() => {
if (w.process && !w.process.killed) {
w.process.kill('SIGKILL')
}
resolve()
}, 30_000)
}),
),
)
console.log('[daemon] supervisor stopped')
}
/**
* Spawn a worker child process with the appropriate env vars.
*/
function spawnWorker(
worker: WorkerState,
dir: string,
config: Record<string, string>,
signal: AbortSignal,
): void {
if (signal.aborted || worker.parked) return
worker.lastStartTime = Date.now()
const env: Record<string, string | undefined> = {
...process.env,
DAEMON_WORKER_DIR: dir,
DAEMON_WORKER_NAME: config.name,
DAEMON_WORKER_SPAWN_MODE: config.spawnMode || 'same-dir',
DAEMON_WORKER_CAPACITY: config.capacity || '4',
DAEMON_WORKER_PERMISSION: config.permissionMode,
DAEMON_WORKER_SANDBOX: config.sandbox || '0',
DAEMON_WORKER_CREATE_SESSION: '1',
CLAUDE_CODE_SESSION_KIND: 'daemon-worker',
}
// Build the worker command: reuse the same entrypoint with --daemon-worker flag
const execArgs = [
...process.execArgv,
process.argv[1]!,
`--daemon-worker=${worker.kind}`,
]
console.log(`[daemon] spawning worker '${worker.kind}'`)
const child = spawn(process.execPath, execArgs, {
env,
cwd: dir,
stdio: ['ignore', 'pipe', 'pipe'],
})
worker.process = child
// Pipe worker stdout/stderr to supervisor with prefix
child.stdout?.on('data', (data: Buffer) => {
const lines = data.toString().trimEnd().split('\n')
for (const line of lines) {
console.log(` ${line}`)
}
})
child.stderr?.on('data', (data: Buffer) => {
const lines = data.toString().trimEnd().split('\n')
for (const line of lines) {
console.error(` ${line}`)
}
})
child.on('exit', (code, sig) => {
worker.process = null
if (signal.aborted) {
// Supervisor is shutting down, don't restart
return
}
if (code === EXIT_CODE_PERMANENT) {
console.error(
`[daemon] worker '${worker.kind}' exited with permanent error — parking`,
)
worker.parked = true
return
}
// Check for rapid failure (crashed within 10s of starting)
const runDuration = Date.now() - worker.lastStartTime
if (runDuration < 10_000) {
worker.failureCount++
if (worker.failureCount >= MAX_RAPID_FAILURES) {
console.error(
`[daemon] worker '${worker.kind}' failed ${worker.failureCount} times rapidly — parking`,
)
worker.parked = true
return
}
} else {
// Ran for a reasonable time, reset failure count
worker.failureCount = 0
worker.backoffMs = BACKOFF_INITIAL_MS
}
console.log(
`[daemon] worker '${worker.kind}' exited (code=${code}, signal=${sig}), restarting in ${worker.backoffMs}ms`,
)
setTimeout(() => {
if (!signal.aborted && !worker.parked) {
spawnWorker(worker, dir, config, signal)
}
}, worker.backoffMs)
// Exponential backoff
worker.backoffMs = Math.min(
worker.backoffMs * BACKOFF_MULTIPLIER,
BACKOFF_CAP_MS,
)
})
}

View File

@@ -1,3 +1,112 @@
// Auto-generated stub — replace with real implementation
export {};
export const runDaemonWorker: (workerId: string) => Promise<void> = () => Promise.resolve();
import { resolve } from 'path'
import {
type HeadlessBridgeOpts,
BridgeHeadlessPermanentError,
runBridgeHeadless,
} from '../bridge/bridgeMain.js'
import { getClaudeAIOAuthTokens } from '../utils/auth.js'
import { errorMessage } from '../utils/errors.js'
/**
* Exit codes the supervisor uses to decide retry vs park.
* Permanent errors (trust not accepted, no git repo for worktree) use
* EXIT_CODE_PERMANENT so the supervisor doesn't waste cycles retrying.
*/
const EXIT_CODE_PERMANENT = 78 // EX_CONFIG from sysexits.h
const EXIT_CODE_TRANSIENT = 1
/**
* Daemon worker entry point. Called from `cli.tsx` via:
* `claude --daemon-worker=<kind>`
*
* The supervisor spawns this as a child process. Each `kind` maps to a
* different long-running task. Currently only `remoteControl` is implemented
* — it runs the headless bridge loop that accepts remote sessions.
*/
export async function runDaemonWorker(kind?: string): Promise<void> {
if (!kind) {
console.error('Error: --daemon-worker requires a worker kind')
process.exitCode = EXIT_CODE_PERMANENT
return
}
switch (kind) {
case 'remoteControl':
await runRemoteControlWorker()
break
default:
console.error(`Error: unknown daemon worker kind '${kind}'`)
process.exitCode = EXIT_CODE_PERMANENT
}
}
/**
* Remote Control worker — runs `runBridgeHeadless()` with config from
* environment variables set by the daemon supervisor.
*
* Environment variables (set by daemonMain):
* DAEMON_WORKER_DIR — working directory
* DAEMON_WORKER_NAME — optional session name
* DAEMON_WORKER_SPAWN_MODE — 'same-dir' | 'worktree'
* DAEMON_WORKER_CAPACITY — max concurrent sessions
* DAEMON_WORKER_PERMISSION — permission mode
* DAEMON_WORKER_SANDBOX — '1' for sandbox mode
* DAEMON_WORKER_TIMEOUT_MS — session timeout in ms
* DAEMON_WORKER_CREATE_SESSION — '1' to pre-create session on start
*/
async function runRemoteControlWorker(): Promise<void> {
const dir = process.env.DAEMON_WORKER_DIR || resolve('.')
const name = process.env.DAEMON_WORKER_NAME || undefined
const spawnMode =
(process.env.DAEMON_WORKER_SPAWN_MODE as 'same-dir' | 'worktree') ||
'same-dir'
const capacity = parseInt(process.env.DAEMON_WORKER_CAPACITY || '4', 10)
const permissionMode = process.env.DAEMON_WORKER_PERMISSION || undefined
const sandbox = process.env.DAEMON_WORKER_SANDBOX === '1'
const sessionTimeoutMs = process.env.DAEMON_WORKER_TIMEOUT_MS
? parseInt(process.env.DAEMON_WORKER_TIMEOUT_MS, 10)
: undefined
const createSessionOnStart = process.env.DAEMON_WORKER_CREATE_SESSION !== '0'
const controller = new AbortController()
// Graceful shutdown on SIGTERM/SIGINT from supervisor
const onSignal = () => controller.abort()
process.on('SIGTERM', onSignal)
process.on('SIGINT', onSignal)
const opts: HeadlessBridgeOpts = {
dir,
name,
spawnMode,
capacity,
permissionMode,
sandbox,
sessionTimeoutMs,
createSessionOnStart,
getAccessToken: () => getClaudeAIOAuthTokens()?.accessToken,
onAuth401: async (_failedToken: string) => {
// In daemon context, re-check auth — supervisor may have refreshed token.
const tokens = getClaudeAIOAuthTokens()
return !!tokens?.accessToken
},
log: (s: string) => {
console.log(`[remoteControl] ${s}`)
},
}
try {
await runBridgeHeadless(opts, controller.signal)
} catch (err) {
if (err instanceof BridgeHeadlessPermanentError) {
console.error(`[remoteControl] permanent error: ${err.message}`)
process.exitCode = EXIT_CODE_PERMANENT
} else {
console.error(`[remoteControl] transient error: ${errorMessage(err)}`)
process.exitCode = EXIT_CODE_TRANSIENT
}
} finally {
process.off('SIGTERM', onSignal)
process.off('SIGINT', onSignal)
}
}