mirror of
https://github.com/claude-code-best/claude-code.git
synced 2026-06-18 14:25:51 +00:00
feat: restore daemon supervisor and remoteControlServer command (#170)
Reverse-engineer the missing daemon + remoteControlServer implementation by tracing the call chain from existing code: - src/daemon/main.ts: restore from stub to full supervisor (spawn/monitor workers, exponential backoff restart, graceful shutdown) - src/daemon/workerRegistry.ts: restore from stub to worker dispatcher (remoteControl kind → runBridgeHeadless()) - src/commands/remoteControlServer/: new slash command /remote-control-server (alias /rcs) for managing the daemon from REPL - build.ts + scripts/dev.ts: enable DAEMON feature flag Both official CLI 2.1.92 and our codebase had the command registered in commands.ts but the directory and daemon implementation were missing. The bottom layer (runBridgeHeadless in bridgeMain.ts) was already complete. Co-authored-by: unraid <local@unraid.local>
This commit is contained in:
@@ -1,3 +1,305 @@
|
||||
// Auto-generated stub — replace with real implementation
|
||||
export {};
|
||||
export const daemonMain: (args: string[]) => Promise<void> = () => Promise.resolve();
|
||||
import { spawn, type ChildProcess } from 'child_process'
|
||||
import { resolve } from 'path'
|
||||
import { errorMessage } from '../utils/errors.js'
|
||||
|
||||
/**
|
||||
* Exit code used by workers for permanent (non-retryable) failures.
|
||||
* @see workerRegistry.ts EXIT_CODE_PERMANENT
|
||||
*/
|
||||
const EXIT_CODE_PERMANENT = 78
|
||||
|
||||
/**
|
||||
* Backoff config for restarting crashed workers.
|
||||
*/
|
||||
const BACKOFF_INITIAL_MS = 2_000
|
||||
const BACKOFF_CAP_MS = 120_000
|
||||
const BACKOFF_MULTIPLIER = 2
|
||||
const MAX_RAPID_FAILURES = 5 // Park worker after this many fast crashes
|
||||
|
||||
interface WorkerState {
|
||||
kind: string
|
||||
process: ChildProcess | null
|
||||
backoffMs: number
|
||||
failureCount: number
|
||||
parked: boolean
|
||||
lastStartTime: number
|
||||
}
|
||||
|
||||
/**
|
||||
* Daemon supervisor entry point. Called from `cli.tsx` via:
|
||||
* `claude daemon [subcommand]`
|
||||
*
|
||||
* Starts and supervises long-running workers. Currently spawns one
|
||||
* `remoteControl` worker that runs the headless bridge server.
|
||||
*
|
||||
* Subcommands:
|
||||
* (none) — start the supervisor with default workers
|
||||
* start — same as no subcommand
|
||||
* status — print worker status (TODO: IPC)
|
||||
* stop — send SIGTERM to supervisor (TODO: PID file)
|
||||
*/
|
||||
export async function daemonMain(args: string[]): Promise<void> {
|
||||
const subcommand = args[0] || 'start'
|
||||
|
||||
switch (subcommand) {
|
||||
case 'start':
|
||||
await runSupervisor(args.slice(1))
|
||||
break
|
||||
case 'status':
|
||||
console.log('daemon status: not yet implemented (requires IPC)')
|
||||
break
|
||||
case 'stop':
|
||||
console.log('daemon stop: not yet implemented (requires PID file)')
|
||||
break
|
||||
case '--help':
|
||||
case '-h':
|
||||
printHelp()
|
||||
break
|
||||
default:
|
||||
console.error(`Unknown daemon subcommand: ${subcommand}`)
|
||||
printHelp()
|
||||
process.exitCode = 1
|
||||
}
|
||||
}
|
||||
|
||||
function printHelp(): void {
|
||||
console.log(`
|
||||
Claude Code Daemon — persistent background supervisor
|
||||
|
||||
USAGE
|
||||
claude daemon [subcommand] [options]
|
||||
|
||||
SUBCOMMANDS
|
||||
start Start the daemon supervisor (default)
|
||||
status Show worker status
|
||||
stop Stop the daemon
|
||||
|
||||
OPTIONS
|
||||
--dir <path> Working directory (default: current)
|
||||
--spawn-mode <mode> Worker spawn mode: same-dir | worktree (default: same-dir)
|
||||
--capacity <N> Max concurrent sessions per worker (default: 4)
|
||||
--permission-mode <mode> Permission mode for spawned sessions
|
||||
--sandbox Enable sandbox mode
|
||||
--name <name> Session name
|
||||
-h, --help Show this help
|
||||
`)
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse supervisor arguments from CLI.
|
||||
*/
|
||||
function parseSupervisorArgs(args: string[]): Record<string, string> {
|
||||
const result: Record<string, string> = {}
|
||||
for (let i = 0; i < args.length; i++) {
|
||||
const arg = args[i]!
|
||||
if (arg === '--dir' && i + 1 < args.length) {
|
||||
result.dir = resolve(args[++i]!)
|
||||
} else if (arg.startsWith('--dir=')) {
|
||||
result.dir = resolve(arg.slice('--dir='.length))
|
||||
} else if (arg === '--spawn-mode' && i + 1 < args.length) {
|
||||
result.spawnMode = args[++i]!
|
||||
} else if (arg.startsWith('--spawn-mode=')) {
|
||||
result.spawnMode = arg.slice('--spawn-mode='.length)
|
||||
} else if (arg === '--capacity' && i + 1 < args.length) {
|
||||
result.capacity = args[++i]!
|
||||
} else if (arg.startsWith('--capacity=')) {
|
||||
result.capacity = arg.slice('--capacity='.length)
|
||||
} else if (arg === '--permission-mode' && i + 1 < args.length) {
|
||||
result.permissionMode = args[++i]!
|
||||
} else if (arg.startsWith('--permission-mode=')) {
|
||||
result.permissionMode = arg.slice('--permission-mode='.length)
|
||||
} else if (arg === '--sandbox') {
|
||||
result.sandbox = '1'
|
||||
} else if (arg === '--name' && i + 1 < args.length) {
|
||||
result.name = args[++i]!
|
||||
} else if (arg.startsWith('--name=')) {
|
||||
result.name = arg.slice('--name='.length)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
/**
|
||||
* Run the daemon supervisor loop. Spawns workers and restarts them
|
||||
* on crash with exponential backoff.
|
||||
*/
|
||||
async function runSupervisor(args: string[]): Promise<void> {
|
||||
const config = parseSupervisorArgs(args)
|
||||
const dir = config.dir || resolve('.')
|
||||
|
||||
console.log(`[daemon] supervisor starting in ${dir}`)
|
||||
|
||||
const workers: WorkerState[] = [
|
||||
{
|
||||
kind: 'remoteControl',
|
||||
process: null,
|
||||
backoffMs: BACKOFF_INITIAL_MS,
|
||||
failureCount: 0,
|
||||
parked: false,
|
||||
lastStartTime: 0,
|
||||
},
|
||||
]
|
||||
|
||||
const controller = new AbortController()
|
||||
|
||||
// Graceful shutdown
|
||||
const shutdown = () => {
|
||||
console.log('[daemon] supervisor shutting down...')
|
||||
controller.abort()
|
||||
for (const w of workers) {
|
||||
if (w.process && !w.process.killed) {
|
||||
w.process.kill('SIGTERM')
|
||||
}
|
||||
}
|
||||
}
|
||||
process.on('SIGTERM', shutdown)
|
||||
process.on('SIGINT', shutdown)
|
||||
|
||||
// Spawn and supervise workers
|
||||
for (const worker of workers) {
|
||||
if (!controller.signal.aborted) {
|
||||
spawnWorker(worker, dir, config, controller.signal)
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for abort signal
|
||||
await new Promise<void>(resolve => {
|
||||
if (controller.signal.aborted) {
|
||||
resolve()
|
||||
return
|
||||
}
|
||||
controller.signal.addEventListener('abort', () => resolve(), { once: true })
|
||||
})
|
||||
|
||||
// Wait for all workers to exit
|
||||
await Promise.all(
|
||||
workers
|
||||
.filter(w => w.process && !w.process.killed)
|
||||
.map(
|
||||
w =>
|
||||
new Promise<void>(resolve => {
|
||||
if (!w.process) {
|
||||
resolve()
|
||||
return
|
||||
}
|
||||
w.process.on('exit', () => resolve())
|
||||
// Force kill after grace period
|
||||
setTimeout(() => {
|
||||
if (w.process && !w.process.killed) {
|
||||
w.process.kill('SIGKILL')
|
||||
}
|
||||
resolve()
|
||||
}, 30_000)
|
||||
}),
|
||||
),
|
||||
)
|
||||
|
||||
console.log('[daemon] supervisor stopped')
|
||||
}
|
||||
|
||||
/**
|
||||
* Spawn a worker child process with the appropriate env vars.
|
||||
*/
|
||||
function spawnWorker(
|
||||
worker: WorkerState,
|
||||
dir: string,
|
||||
config: Record<string, string>,
|
||||
signal: AbortSignal,
|
||||
): void {
|
||||
if (signal.aborted || worker.parked) return
|
||||
|
||||
worker.lastStartTime = Date.now()
|
||||
|
||||
const env: Record<string, string | undefined> = {
|
||||
...process.env,
|
||||
DAEMON_WORKER_DIR: dir,
|
||||
DAEMON_WORKER_NAME: config.name,
|
||||
DAEMON_WORKER_SPAWN_MODE: config.spawnMode || 'same-dir',
|
||||
DAEMON_WORKER_CAPACITY: config.capacity || '4',
|
||||
DAEMON_WORKER_PERMISSION: config.permissionMode,
|
||||
DAEMON_WORKER_SANDBOX: config.sandbox || '0',
|
||||
DAEMON_WORKER_CREATE_SESSION: '1',
|
||||
CLAUDE_CODE_SESSION_KIND: 'daemon-worker',
|
||||
}
|
||||
|
||||
// Build the worker command: reuse the same entrypoint with --daemon-worker flag
|
||||
const execArgs = [
|
||||
...process.execArgv,
|
||||
process.argv[1]!,
|
||||
`--daemon-worker=${worker.kind}`,
|
||||
]
|
||||
|
||||
console.log(`[daemon] spawning worker '${worker.kind}'`)
|
||||
|
||||
const child = spawn(process.execPath, execArgs, {
|
||||
env,
|
||||
cwd: dir,
|
||||
stdio: ['ignore', 'pipe', 'pipe'],
|
||||
})
|
||||
|
||||
worker.process = child
|
||||
|
||||
// Pipe worker stdout/stderr to supervisor with prefix
|
||||
child.stdout?.on('data', (data: Buffer) => {
|
||||
const lines = data.toString().trimEnd().split('\n')
|
||||
for (const line of lines) {
|
||||
console.log(` ${line}`)
|
||||
}
|
||||
})
|
||||
child.stderr?.on('data', (data: Buffer) => {
|
||||
const lines = data.toString().trimEnd().split('\n')
|
||||
for (const line of lines) {
|
||||
console.error(` ${line}`)
|
||||
}
|
||||
})
|
||||
|
||||
child.on('exit', (code, sig) => {
|
||||
worker.process = null
|
||||
|
||||
if (signal.aborted) {
|
||||
// Supervisor is shutting down, don't restart
|
||||
return
|
||||
}
|
||||
|
||||
if (code === EXIT_CODE_PERMANENT) {
|
||||
console.error(
|
||||
`[daemon] worker '${worker.kind}' exited with permanent error — parking`,
|
||||
)
|
||||
worker.parked = true
|
||||
return
|
||||
}
|
||||
|
||||
// Check for rapid failure (crashed within 10s of starting)
|
||||
const runDuration = Date.now() - worker.lastStartTime
|
||||
if (runDuration < 10_000) {
|
||||
worker.failureCount++
|
||||
if (worker.failureCount >= MAX_RAPID_FAILURES) {
|
||||
console.error(
|
||||
`[daemon] worker '${worker.kind}' failed ${worker.failureCount} times rapidly — parking`,
|
||||
)
|
||||
worker.parked = true
|
||||
return
|
||||
}
|
||||
} else {
|
||||
// Ran for a reasonable time, reset failure count
|
||||
worker.failureCount = 0
|
||||
worker.backoffMs = BACKOFF_INITIAL_MS
|
||||
}
|
||||
|
||||
console.log(
|
||||
`[daemon] worker '${worker.kind}' exited (code=${code}, signal=${sig}), restarting in ${worker.backoffMs}ms`,
|
||||
)
|
||||
|
||||
setTimeout(() => {
|
||||
if (!signal.aborted && !worker.parked) {
|
||||
spawnWorker(worker, dir, config, signal)
|
||||
}
|
||||
}, worker.backoffMs)
|
||||
|
||||
// Exponential backoff
|
||||
worker.backoffMs = Math.min(
|
||||
worker.backoffMs * BACKOFF_MULTIPLIER,
|
||||
BACKOFF_CAP_MS,
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
@@ -1,3 +1,112 @@
|
||||
// Auto-generated stub — replace with real implementation
|
||||
export {};
|
||||
export const runDaemonWorker: (workerId: string) => Promise<void> = () => Promise.resolve();
|
||||
import { resolve } from 'path'
|
||||
import {
|
||||
type HeadlessBridgeOpts,
|
||||
BridgeHeadlessPermanentError,
|
||||
runBridgeHeadless,
|
||||
} from '../bridge/bridgeMain.js'
|
||||
import { getClaudeAIOAuthTokens } from '../utils/auth.js'
|
||||
import { errorMessage } from '../utils/errors.js'
|
||||
|
||||
/**
|
||||
* Exit codes the supervisor uses to decide retry vs park.
|
||||
* Permanent errors (trust not accepted, no git repo for worktree) use
|
||||
* EXIT_CODE_PERMANENT so the supervisor doesn't waste cycles retrying.
|
||||
*/
|
||||
const EXIT_CODE_PERMANENT = 78 // EX_CONFIG from sysexits.h
|
||||
const EXIT_CODE_TRANSIENT = 1
|
||||
|
||||
/**
|
||||
* Daemon worker entry point. Called from `cli.tsx` via:
|
||||
* `claude --daemon-worker=<kind>`
|
||||
*
|
||||
* The supervisor spawns this as a child process. Each `kind` maps to a
|
||||
* different long-running task. Currently only `remoteControl` is implemented
|
||||
* — it runs the headless bridge loop that accepts remote sessions.
|
||||
*/
|
||||
export async function runDaemonWorker(kind?: string): Promise<void> {
|
||||
if (!kind) {
|
||||
console.error('Error: --daemon-worker requires a worker kind')
|
||||
process.exitCode = EXIT_CODE_PERMANENT
|
||||
return
|
||||
}
|
||||
|
||||
switch (kind) {
|
||||
case 'remoteControl':
|
||||
await runRemoteControlWorker()
|
||||
break
|
||||
default:
|
||||
console.error(`Error: unknown daemon worker kind '${kind}'`)
|
||||
process.exitCode = EXIT_CODE_PERMANENT
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Remote Control worker — runs `runBridgeHeadless()` with config from
|
||||
* environment variables set by the daemon supervisor.
|
||||
*
|
||||
* Environment variables (set by daemonMain):
|
||||
* DAEMON_WORKER_DIR — working directory
|
||||
* DAEMON_WORKER_NAME — optional session name
|
||||
* DAEMON_WORKER_SPAWN_MODE — 'same-dir' | 'worktree'
|
||||
* DAEMON_WORKER_CAPACITY — max concurrent sessions
|
||||
* DAEMON_WORKER_PERMISSION — permission mode
|
||||
* DAEMON_WORKER_SANDBOX — '1' for sandbox mode
|
||||
* DAEMON_WORKER_TIMEOUT_MS — session timeout in ms
|
||||
* DAEMON_WORKER_CREATE_SESSION — '1' to pre-create session on start
|
||||
*/
|
||||
async function runRemoteControlWorker(): Promise<void> {
|
||||
const dir = process.env.DAEMON_WORKER_DIR || resolve('.')
|
||||
const name = process.env.DAEMON_WORKER_NAME || undefined
|
||||
const spawnMode =
|
||||
(process.env.DAEMON_WORKER_SPAWN_MODE as 'same-dir' | 'worktree') ||
|
||||
'same-dir'
|
||||
const capacity = parseInt(process.env.DAEMON_WORKER_CAPACITY || '4', 10)
|
||||
const permissionMode = process.env.DAEMON_WORKER_PERMISSION || undefined
|
||||
const sandbox = process.env.DAEMON_WORKER_SANDBOX === '1'
|
||||
const sessionTimeoutMs = process.env.DAEMON_WORKER_TIMEOUT_MS
|
||||
? parseInt(process.env.DAEMON_WORKER_TIMEOUT_MS, 10)
|
||||
: undefined
|
||||
const createSessionOnStart = process.env.DAEMON_WORKER_CREATE_SESSION !== '0'
|
||||
|
||||
const controller = new AbortController()
|
||||
|
||||
// Graceful shutdown on SIGTERM/SIGINT from supervisor
|
||||
const onSignal = () => controller.abort()
|
||||
process.on('SIGTERM', onSignal)
|
||||
process.on('SIGINT', onSignal)
|
||||
|
||||
const opts: HeadlessBridgeOpts = {
|
||||
dir,
|
||||
name,
|
||||
spawnMode,
|
||||
capacity,
|
||||
permissionMode,
|
||||
sandbox,
|
||||
sessionTimeoutMs,
|
||||
createSessionOnStart,
|
||||
getAccessToken: () => getClaudeAIOAuthTokens()?.accessToken,
|
||||
onAuth401: async (_failedToken: string) => {
|
||||
// In daemon context, re-check auth — supervisor may have refreshed token.
|
||||
const tokens = getClaudeAIOAuthTokens()
|
||||
return !!tokens?.accessToken
|
||||
},
|
||||
log: (s: string) => {
|
||||
console.log(`[remoteControl] ${s}`)
|
||||
},
|
||||
}
|
||||
|
||||
try {
|
||||
await runBridgeHeadless(opts, controller.signal)
|
||||
} catch (err) {
|
||||
if (err instanceof BridgeHeadlessPermanentError) {
|
||||
console.error(`[remoteControl] permanent error: ${err.message}`)
|
||||
process.exitCode = EXIT_CODE_PERMANENT
|
||||
} else {
|
||||
console.error(`[remoteControl] transient error: ${errorMessage(err)}`)
|
||||
process.exitCode = EXIT_CODE_TRANSIENT
|
||||
}
|
||||
} finally {
|
||||
process.off('SIGTERM', onSignal)
|
||||
process.off('SIGINT', onSignal)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user