import type { ToolUseBlock } from '@anthropic-ai/sdk/resources'; import { getRemoteSessionUrl } from '../../constants/product.js'; import { OUTPUT_FILE_TAG, REMOTE_REVIEW_PROGRESS_TAG, REMOTE_REVIEW_TAG, STATUS_TAG, SUMMARY_TAG, TASK_ID_TAG, TASK_NOTIFICATION_TAG, TASK_TYPE_TAG, TOOL_USE_ID_TAG, ULTRAPLAN_TAG, } from '../../constants/xml.js'; import type { SDKAssistantMessage, SDKMessage } from '../../entrypoints/agentSdkTypes.js'; import type { MessageContent } from '../../types/message.js'; import type { SetAppState, Task, TaskContext, TaskStateBase } from '../../Task.js'; import { createTaskStateBase, generateTaskId } from '../../Task.js'; import { TodoWriteTool } from '@claude-code-best/builtin-tools/tools/TodoWriteTool/TodoWriteTool.js'; import { type BackgroundRemoteSessionPrecondition, checkBackgroundRemoteSessionEligibility, } from '../../utils/background/remote/remoteSession.js'; export type { BackgroundRemoteSessionPrecondition }; import { logForDebugging } from '../../utils/debug.js'; import { logError } from '../../utils/log.js'; import { enqueuePendingNotification } from '../../utils/messageQueueManager.js'; import { extractTag, extractTextContent } from '../../utils/messages.js'; import { emitTaskTerminatedSdk } from '../../utils/sdkEventQueue.js'; import { deleteRemoteAgentMetadata, listRemoteAgentMetadata, type RemoteAgentMetadata, writeRemoteAgentMetadata, } from '../../utils/sessionStorage.js'; import { jsonStringify } from '../../utils/slowOperations.js'; import { appendTaskOutput, evictTaskOutput, getTaskOutputPath, initTaskOutput } from '../../utils/task/diskOutput.js'; import { registerTask, updateTaskState } from '../../utils/task/framework.js'; import { fetchSession } from '../../utils/teleport/api.js'; import { archiveRemoteSession, pollRemoteSessionEvents } from '../../utils/teleport.js'; import type { TodoList } from '../../utils/todo/types.js'; import type { UltraplanPhase } from '../../utils/ultraplan/ccrSession.js'; export type RemoteAgentTaskState = TaskStateBase & { type: 'remote_agent'; remoteTaskType: RemoteTaskType; /** Task-specific metadata (PR number, repo, etc.). */ remoteTaskMetadata?: RemoteTaskMetadata; sessionId: string; // Original session ID for API calls command: string; title: string; todoList: TodoList; log: SDKMessage[]; /** * Long-running agent that will not be marked as complete after the first `result`. */ isLongRunning?: boolean; /** * When the local poller started watching this task (at spawn or on restore). * Review timeout clocks from here so a restore doesn't immediately time out * a task spawned >30min ago. */ pollStartedAt: number; /** True when this task was created by a teleported /ultrareview command. */ isRemoteReview?: boolean; /** Parsed from the orchestrator's heartbeat echoes. */ reviewProgress?: { stage?: 'finding' | 'verifying' | 'synthesizing'; bugsFound: number; bugsVerified: number; bugsRefuted: number; }; isUltraplan?: boolean; /** * Scanner-derived pill state. Undefined = running. `needs_input` when the * remote asked a clarifying question and is idle; `plan_ready` when * ExitPlanMode is awaiting browser approval. Surfaced in the pill badge * and detail dialog status line. */ ultraplanPhase?: Exclude; }; const REMOTE_TASK_TYPES = ['remote-agent', 'ultraplan', 'ultrareview', 'autofix-pr', 'background-pr'] as const; export type RemoteTaskType = (typeof REMOTE_TASK_TYPES)[number]; function isRemoteTaskType(v: string | undefined): v is RemoteTaskType { return (REMOTE_TASK_TYPES as readonly string[]).includes(v ?? ''); } export type AutofixPrRemoteTaskMetadata = { owner: string; repo: string; prNumber: number; /** * PR head commit SHA captured at /autofix-pr launch. The completionChecker * compares this against the live head to detect when the agent has pushed * new commits. Optional because gh CLI may be unavailable at launch — in * that case the checker falls back to terminal-state-only completion. * Survives --resume via the session sidecar. */ initialHeadSha?: string; }; export type RemoteTaskMetadata = AutofixPrRemoteTaskMetadata; /** * Called on every poll tick for tasks with a matching remoteTaskType. Return a * non-null string to complete the task (string becomes the notification text), * or null to keep polling. Checkers that hit external APIs should self-throttle. */ export type RemoteTaskCompletionChecker = ( remoteTaskMetadata: RemoteTaskMetadata | undefined, ) => Promise; const completionCheckers = new Map(); /** * Register a completion checker for a remote task type. Invoked on every poll * tick; survives --resume via the sidecar's remoteTaskType + remoteTaskMetadata. */ export function registerCompletionChecker(remoteTaskType: RemoteTaskType, checker: RemoteTaskCompletionChecker): void { completionCheckers.set(remoteTaskType, checker); } /** * Called after the task transitions to a terminal state and the notification * has been enqueued. Used by command modules to release singleton locks, * clear cached state, or perform other cleanup the framework cannot see. * Hooks must be synchronous and best-effort — errors are logged but never * propagate. */ export type RemoteTaskCompletionHook = (taskId: string, remoteTaskMetadata: RemoteTaskMetadata | undefined) => void; const completionHooks = new Map(); /** * Inspect a completed remote task's accumulated log and return an XML fragment * to inject inline into the completion task-notification. Returning null falls * back to the framework's generic "task completed" notification (file-path * pointer only). Used by command modules whose remote agents emit structured * outcome tags the local model should read directly. */ export type RemoteTaskContentExtractor = (log: SDKMessage[]) => string | null; const contentExtractors = new Map(); /** * Register a content extractor for a remote task type. Called once per * completion in the generic completion branches (archived, completionChecker, * result-driven). isRemoteReview tasks have their own bespoke path and skip * extractors entirely. Errors propagate to the framework which logs and falls * back to generic notification. */ export function registerContentExtractor(remoteTaskType: RemoteTaskType, extractor: RemoteTaskContentExtractor): void { contentExtractors.set(remoteTaskType, extractor); } function tryExtractRichContent(task: RemoteAgentTaskState, log: SDKMessage[]): string | null { const extractor = contentExtractors.get(task.remoteTaskType); if (!extractor) return null; try { return extractor(log); } catch (e) { logError(e); return null; } } /** * Register a completion hook for a remote task type. Invoked once after the * task reaches a terminal state in any of the framework's completion branches * (archived session, completionChecker, stableIdle, result). Use this to * release command-module state (e.g. singleton locks) without forcing the * framework to reverse-import from the command package. */ export function registerCompletionHook(remoteTaskType: RemoteTaskType, hook: RemoteTaskCompletionHook): void { completionHooks.set(remoteTaskType, hook); } function runCompletionHook(taskId: string, task: RemoteAgentTaskState): void { const hook = completionHooks.get(task.remoteTaskType); if (!hook) return; try { hook(taskId, task.remoteTaskMetadata); } catch (e) { logError(e); } } /** * Persist a remote-agent metadata entry to the session sidecar. * Fire-and-forget — persistence failures must not block task registration. */ async function persistRemoteAgentMetadata(meta: RemoteAgentMetadata): Promise { try { await writeRemoteAgentMetadata(meta.taskId, meta); } catch (e) { logForDebugging(`persistRemoteAgentMetadata failed: ${String(e)}`); } } /** * Remove a remote-agent metadata entry from the session sidecar. * Called on task completion/kill so restored sessions don't resurrect * tasks that already finished. */ async function removeRemoteAgentMetadata(taskId: string): Promise { try { await deleteRemoteAgentMetadata(taskId); } catch (e) { logForDebugging(`removeRemoteAgentMetadata failed: ${String(e)}`); } } // Precondition error result export type RemoteAgentPreconditionResult = | { eligible: true; } | { eligible: false; errors: BackgroundRemoteSessionPrecondition[]; }; /** * Check eligibility for creating a remote agent session. */ export async function checkRemoteAgentEligibility({ skipBundle = false, }: { skipBundle?: boolean; } = {}): Promise { const errors = await checkBackgroundRemoteSessionEligibility({ skipBundle }); if (errors.length > 0) { return { eligible: false, errors }; } return { eligible: true }; } /** * Format precondition error for display. */ export function formatPreconditionError(error: BackgroundRemoteSessionPrecondition): string { switch (error.type) { case 'not_logged_in': return 'Please run /login and sign in with your Claude.ai account (not Console).'; case 'no_remote_environment': return 'No cloud environment available. Set one up at https://claude.ai/code/onboarding?magic=env-setup'; case 'not_in_git_repo': return 'Background tasks require a git repository. Initialize git or run from a git repository.'; case 'no_git_remote': return 'Background tasks require a GitHub remote. Add one with `git remote add origin REPO_URL`.'; case 'github_app_not_installed': return 'The Claude GitHub app must be installed on this repository first.\nhttps://github.com/apps/claude/installations/new'; case 'policy_blocked': return "Remote sessions are disabled by your organization's policy. Contact your organization admin to enable them."; } } /** * Enqueue a remote task notification to the message queue. */ function enqueueRemoteNotification( taskId: string, title: string, status: 'completed' | 'failed' | 'killed', setAppState: SetAppState, toolUseId?: string, ): void { // Atomically check and set notified flag to prevent duplicate notifications. if (!markTaskNotified(taskId, setAppState)) return; const statusText = status === 'completed' ? 'completed successfully' : status === 'failed' ? 'failed' : 'was stopped'; const toolUseIdLine = toolUseId ? `\n<${TOOL_USE_ID_TAG}>${toolUseId}` : ''; const outputPath = getTaskOutputPath(taskId); const message = `<${TASK_NOTIFICATION_TAG}> <${TASK_ID_TAG}>${taskId}${toolUseIdLine} <${TASK_TYPE_TAG}>remote_agent <${OUTPUT_FILE_TAG}>${outputPath} <${STATUS_TAG}>${status} <${SUMMARY_TAG}>Remote task "${title}" ${statusText} `; enqueuePendingNotification({ value: message, mode: 'task-notification' }); } /** * Same as enqueueRemoteNotification but inlines a structured XML fragment * (returned by a registered RemoteTaskContentExtractor) so the local model * reads the remote agent's outcome directly instead of having to follow a * file-path pointer. Mode is still 'task-notification' — the framing XML is * the same, only the body differs. */ function enqueueRichRemoteNotification( taskId: string, title: string, status: 'completed' | 'failed' | 'killed', richContent: string, setAppState: SetAppState, toolUseId?: string, ): void { if (!markTaskNotified(taskId, setAppState)) return; const statusText = status === 'completed' ? 'completed successfully' : status === 'failed' ? 'failed' : 'was stopped'; const toolUseIdLine = toolUseId ? `\n<${TOOL_USE_ID_TAG}>${toolUseId}` : ''; const outputPath = getTaskOutputPath(taskId); const message = `<${TASK_NOTIFICATION_TAG}> <${TASK_ID_TAG}>${taskId}${toolUseIdLine} <${TASK_TYPE_TAG}>remote_agent <${OUTPUT_FILE_TAG}>${outputPath} <${STATUS_TAG}>${status} <${SUMMARY_TAG}>Remote task "${title}" ${statusText} The remote agent produced the following structured outcome. Summarize the key changes for the user: ${richContent}`; enqueuePendingNotification({ value: message, mode: 'task-notification' }); } /** * Atomically mark a task as notified. Returns true if this call flipped the * flag (caller should enqueue), false if already notified (caller should skip). */ function markTaskNotified(taskId: string, setAppState: SetAppState): boolean { let shouldEnqueue = false; updateTaskState(taskId, setAppState, task => { if (task.notified) { return task; } shouldEnqueue = true; return { ...task, notified: true }; }); return shouldEnqueue; } /** * Extract the plan content from the remote session log. * Searches all assistant messages for ... tags. */ export function extractPlanFromLog(log: SDKMessage[]): string | null { // Walk backwards through assistant messages to find content for (let i = log.length - 1; i >= 0; i--) { const msg = log[i] as SDKAssistantMessage; if (msg?.type !== 'assistant') continue; const content = msg.message?.content as MessageContent | undefined; if (!content) continue; const fullText = extractTextContent( typeof content === 'string' ? [{ type: 'text' as const, text: content }] : content, '\n', ); const plan = extractTag(fullText, ULTRAPLAN_TAG); if (plan?.trim()) return plan.trim(); } return null; } /** * Enqueue an ultraplan-specific failure notification. Unlike enqueueRemoteNotification * this does NOT instruct the model to read the raw output file (a JSONL dump that is * useless for plan extraction). */ export function enqueueUltraplanFailureNotification( taskId: string, sessionId: string, reason: string, setAppState: SetAppState, ): void { if (!markTaskNotified(taskId, setAppState)) return; const sessionUrl = getRemoteTaskSessionUrl(sessionId); const message = `<${TASK_NOTIFICATION_TAG}> <${TASK_ID_TAG}>${taskId} <${TASK_TYPE_TAG}>remote_agent <${STATUS_TAG}>failed <${SUMMARY_TAG}>Ultraplan failed: ${reason} The remote Ultraplan session did not produce a plan (${reason}). Inspect the session at ${sessionUrl} and tell the user to retry locally with plan mode.`; enqueuePendingNotification({ value: message, mode: 'task-notification' }); } /** * Extract review content from the remote session log. * * Two producers, two event shapes: * - bughunter mode: run_hunt.sh is a SessionStart hook; its echo lands as * {type:'system', subtype:'hook_progress', stdout:'...'}. Claude never * takes a turn so there are zero assistant messages. * - prompt mode: a real assistant turn wraps the review in the tag. * * Scans hook_progress first since bughunter is the intended production path * and prompt mode is the dev/fallback. Newest-first in both cases — the tag * appears once at the end of the run so reverse iteration short-circuits. */ function extractReviewFromLog(log: SDKMessage[]): string | null { for (let i = log.length - 1; i >= 0; i--) { const msg = log[i]; // The final echo before hook exit may land in either the last // hook_progress or the terminal hook_response depending on buffering; // both have flat stdout. if (msg?.type === 'system' && (msg.subtype === 'hook_progress' || msg.subtype === 'hook_response')) { const tagged = extractTag(msg.stdout as string, REMOTE_REVIEW_TAG); if (tagged?.trim()) return tagged.trim(); } } for (let i = log.length - 1; i >= 0; i--) { const msg = log[i]; if (msg?.type !== 'assistant') continue; const content = (msg as SDKAssistantMessage).message?.content as MessageContent | undefined; if (!content) continue; const fullText = extractTextContent( typeof content === 'string' ? [{ type: 'text' as const, text: content }] : content, '\n', ); const tagged = extractTag(fullText, REMOTE_REVIEW_TAG); if (tagged?.trim()) return tagged.trim(); } // Hook-stdout concat fallback: a single echo should land in one event, but // large JSON payloads can flush across two if the pipe buffer fills // mid-write. Per-message scan above misses a tag split across events. const hookStdout = log .filter(msg => msg.type === 'system' && (msg.subtype === 'hook_progress' || msg.subtype === 'hook_response')) .map(msg => msg.stdout as string) .join(''); const hookTagged = extractTag(hookStdout, REMOTE_REVIEW_TAG); if (hookTagged?.trim()) return hookTagged.trim(); // Fallback: concatenate all assistant text in chronological order. const allText = log .filter((msg): msg is SDKAssistantMessage => msg.type === 'assistant') .map(msg => { const content = msg.message?.content as MessageContent | undefined; if (!content) return ''; return extractTextContent( typeof content === 'string' ? [{ type: 'text' as const, text: content }] : content, '\n', ); }) .join('\n') .trim(); return allText || null; } /** * Tag-only variant of extractReviewFromLog for delta scanning. * * Returns non-null ONLY when an explicit tag is found. * Unlike extractReviewFromLog, this does NOT fall back to concatenated * assistant text. This is critical for the delta scan: in prompt mode, * early untagged assistant messages (e.g. "I'm analyzing the diff...") * would trigger the fallback and prematurely set cachedReviewContent, * completing the review before the actual tagged output arrives. */ function extractReviewTagFromLog(log: SDKMessage[]): string | null { // hook_progress / hook_response per-message scan (bughunter path) for (let i = log.length - 1; i >= 0; i--) { const msg = log[i]; if (msg?.type === 'system' && (msg.subtype === 'hook_progress' || msg.subtype === 'hook_response')) { const tagged = extractTag(msg.stdout as string, REMOTE_REVIEW_TAG); if (tagged?.trim()) return tagged.trim(); } } // assistant text per-message scan (prompt mode) for (let i = log.length - 1; i >= 0; i--) { const msg = log[i]; if (msg?.type !== 'assistant') continue; const content = (msg as SDKAssistantMessage).message?.content as MessageContent | undefined; if (!content) continue; const fullText = extractTextContent( typeof content === 'string' ? [{ type: 'text' as const, text: content }] : content, '\n', ); const tagged = extractTag(fullText, REMOTE_REVIEW_TAG); if (tagged?.trim()) return tagged.trim(); } // Hook-stdout concat fallback for split tags const hookStdout = log .filter(msg => msg.type === 'system' && (msg.subtype === 'hook_progress' || msg.subtype === 'hook_response')) .map(msg => msg.stdout as string) .join(''); const hookTagged = extractTag(hookStdout, REMOTE_REVIEW_TAG); if (hookTagged?.trim()) return hookTagged.trim(); return null; } /** * Enqueue a remote-review completion notification. Injects the review text * directly into the message queue so the local model receives it on the next * turn — no file indirection, no mode change. Session is kept alive so the * claude.ai URL stays a durable record the user can revisit; TTL handles cleanup. */ function enqueueRemoteReviewNotification(taskId: string, reviewContent: string, setAppState: SetAppState): void { if (!markTaskNotified(taskId, setAppState)) return; const message = `<${TASK_NOTIFICATION_TAG}> <${TASK_ID_TAG}>${taskId} <${TASK_TYPE_TAG}>remote_agent <${STATUS_TAG}>completed <${SUMMARY_TAG}>Remote review completed The remote review produced the following findings: ${reviewContent}`; enqueuePendingNotification({ value: message, mode: 'task-notification' }); } /** * Enqueue a remote-review failure notification. */ function enqueueRemoteReviewFailureNotification(taskId: string, reason: string, setAppState: SetAppState): void { if (!markTaskNotified(taskId, setAppState)) return; const message = `<${TASK_NOTIFICATION_TAG}> <${TASK_ID_TAG}>${taskId} <${TASK_TYPE_TAG}>remote_agent <${STATUS_TAG}>failed <${SUMMARY_TAG}>Remote review failed: ${reason} Remote review did not produce output (${reason}). Tell the user to retry /ultrareview, or use /review for a local review instead.`; enqueuePendingNotification({ value: message, mode: 'task-notification' }); } /** * Extract todo list from SDK messages (finds last TodoWrite tool use). */ function extractTodoListFromLog(log: SDKMessage[]): TodoList { const todoListMessage = log.findLast( (msg): msg is SDKAssistantMessage => msg.type === 'assistant' && Array.isArray((msg as SDKAssistantMessage).message?.content) && (((msg as SDKAssistantMessage).message?.content ?? []) as Array<{ type: string; name?: string }>).some( block => block.type === 'tool_use' && block.name === TodoWriteTool.name, ), ); if (!todoListMessage) { return []; } const contentBlocks = (todoListMessage.message?.content ?? []) as Array<{ type: string; name?: string; input?: unknown; }>; const input = contentBlocks.find( (block): block is ToolUseBlock => block.type === 'tool_use' && block.name === TodoWriteTool.name, )?.input; if (!input) { return []; } const parsedInput = TodoWriteTool.inputSchema.safeParse(input); if (!parsedInput.success) { return []; } return parsedInput.data.todos; } /** * Register a remote agent task in the unified task framework. * Bundles task ID generation, output init, state creation, registration, and polling. * Callers remain responsible for custom pre-registration logic (git dialogs, transcript upload, teleport options). */ export function registerRemoteAgentTask(options: { remoteTaskType: RemoteTaskType; session: { id: string; title: string }; command: string; context: TaskContext; toolUseId?: string; isRemoteReview?: boolean; isUltraplan?: boolean; isLongRunning?: boolean; remoteTaskMetadata?: RemoteTaskMetadata; }): { taskId: string; sessionId: string; cleanup: () => void; } { const { remoteTaskType, session, command, context, toolUseId, isRemoteReview, isUltraplan, isLongRunning, remoteTaskMetadata, } = options; const taskId = generateTaskId('remote_agent'); // Create the output file before registering the task. // RemoteAgentTask uses appendTaskOutput() (not TaskOutput), so // the file must exist for readers before any output arrives. void initTaskOutput(taskId); const taskState: RemoteAgentTaskState = { ...createTaskStateBase(taskId, 'remote_agent', session.title, toolUseId), type: 'remote_agent', remoteTaskType, status: 'running', sessionId: session.id, command, title: session.title, todoList: [], log: [], isRemoteReview, isUltraplan, isLongRunning, pollStartedAt: Date.now(), remoteTaskMetadata, }; registerTask(taskState, context.setAppState); // Persist identity to the session sidecar so --resume can reconnect to // still-running remote sessions. Status is not stored — it's fetched // fresh from CCR on restore. void persistRemoteAgentMetadata({ taskId, remoteTaskType, sessionId: session.id, title: session.title, command, spawnedAt: Date.now(), toolUseId, isUltraplan, isRemoteReview, isLongRunning, remoteTaskMetadata, }); // Ultraplan lifecycle is owned by startDetachedPoll in ultraplan.tsx. Generic // polling still runs so session.log populates for the detail view's progress // counts; the result-lookup guard below prevents early completion. // TODO(#23985): fold ExitPlanModeScanner into this poller, drop startDetachedPoll. const stopPolling = startRemoteSessionPolling(taskId, context); return { taskId, sessionId: session.id, cleanup: stopPolling, }; } /** * Restore remote-agent tasks from the session sidecar on --resume. * * Scans remote-agents/, fetches live CCR status for each, reconstructs * RemoteAgentTaskState into AppState.tasks, and restarts polling for sessions * still running. Sessions that are archived or 404 have their sidecar file * removed. Must run after switchSession() so getSessionId() points at the * resumed session's sidecar directory. */ export async function restoreRemoteAgentTasks(context: TaskContext): Promise { try { await restoreRemoteAgentTasksImpl(context); } catch (e) { logForDebugging(`restoreRemoteAgentTasks failed: ${String(e)}`); } } async function restoreRemoteAgentTasksImpl(context: TaskContext): Promise { const persisted = await listRemoteAgentMetadata(); if (persisted.length === 0) return; for (const meta of persisted) { let remoteStatus: string; try { const session = await fetchSession(meta.sessionId); remoteStatus = session.session_status; } catch (e) { // Only 404 means the CCR session is truly gone. Auth errors (401, // missing OAuth token) are recoverable via /login — the remote // session is still running. fetchSession throws plain Error for all // 4xx (validateStatus treats <500 as success), so isTransientNetworkError // can't distinguish them; match the 404 message instead. if (e instanceof Error && e.message.startsWith('Session not found:')) { logForDebugging(`restoreRemoteAgentTasks: dropping ${meta.taskId} (404: ${String(e)})`); void removeRemoteAgentMetadata(meta.taskId); } else { logForDebugging(`restoreRemoteAgentTasks: skipping ${meta.taskId} (recoverable: ${String(e)})`); } continue; } if (remoteStatus === 'archived') { // Session ended while the local client was offline. Don't resurrect. void removeRemoteAgentMetadata(meta.taskId); continue; } const taskState: RemoteAgentTaskState = { ...createTaskStateBase(meta.taskId, 'remote_agent', meta.title, meta.toolUseId), type: 'remote_agent', remoteTaskType: isRemoteTaskType(meta.remoteTaskType) ? meta.remoteTaskType : 'remote-agent', status: 'running', sessionId: meta.sessionId, command: meta.command, title: meta.title, todoList: [], log: [], isRemoteReview: meta.isRemoteReview, isUltraplan: meta.isUltraplan, isLongRunning: meta.isLongRunning, startTime: meta.spawnedAt, pollStartedAt: Date.now(), remoteTaskMetadata: meta.remoteTaskMetadata as RemoteTaskMetadata | undefined, }; registerTask(taskState, context.setAppState); void initTaskOutput(meta.taskId); startRemoteSessionPolling(meta.taskId, context); } } /** * Start polling for remote session updates. * Returns a cleanup function to stop polling. */ function startRemoteSessionPolling(taskId: string, context: TaskContext): () => void { let isRunning = true; const POLL_INTERVAL_MS = 1000; const REMOTE_REVIEW_TIMEOUT_MS = 30 * 60 * 1000; // Remote sessions flip to 'idle' between tool turns. With 100+ rapid // turns, a 1s poll WILL catch a transient idle mid-run. Require stable // idle (no log growth for N consecutive polls) before believing it. const STABLE_IDLE_POLLS = 5; let consecutiveIdlePolls = 0; let lastEventId: string | null = null; let accumulatedLog: SDKMessage[] = []; // Cached across ticks so we don't re-scan the full log. Tag appears once // at end of run; scanning only the delta (response.newEvents) is O(new). let cachedReviewContent: string | null = null; const poll = async (): Promise => { if (!isRunning) return; try { const appState = context.getAppState(); const task = appState.tasks?.[taskId] as RemoteAgentTaskState | undefined; if (!task || task.status !== 'running') { // Task was killed externally (TaskStopTool) or already terminal. // Session left alive so the claude.ai URL stays valid — the run_hunt.sh // post_stage() calls land as assistant events there, and the user may // want to revisit them after closing the terminal. TTL reaps it. return; } const response = await pollRemoteSessionEvents(task.sessionId, lastEventId); lastEventId = response.lastEventId; const logGrew = response.newEvents.length > 0; if (logGrew) { accumulatedLog = [...accumulatedLog, ...response.newEvents]; const deltaText = response.newEvents .map(msg => { if (msg.type === 'assistant') { const content = (msg as SDKAssistantMessage).message?.content; if (!content || typeof content === 'string') return ''; return (content as Array<{ type: string; text?: string }>) .filter(block => block.type === 'text') .map(block => ('text' in block ? block.text : '')) .join('\n'); } return jsonStringify(msg); }) .join('\n'); if (deltaText) { appendTaskOutput(taskId, deltaText + '\n'); } } if (response.sessionStatus === 'archived') { updateTaskState(taskId, context.setAppState, t => t.status === 'running' ? { ...t, status: 'completed', endTime: Date.now() } : t, ); const richContent = tryExtractRichContent(task, accumulatedLog); if (richContent) { enqueueRichRemoteNotification( taskId, task.title, 'completed', richContent, context.setAppState, task.toolUseId, ); } else { enqueueRemoteNotification(taskId, task.title, 'completed', context.setAppState, task.toolUseId); } void evictTaskOutput(taskId); void removeRemoteAgentMetadata(taskId); runCompletionHook(taskId, task); return; } const checker = completionCheckers.get(task.remoteTaskType); if (checker) { const completionResult = await checker(task.remoteTaskMetadata); if (completionResult !== null) { updateTaskState(taskId, context.setAppState, t => t.status === 'running' ? { ...t, status: 'completed', endTime: Date.now() } : t, ); const richContent = tryExtractRichContent(task, accumulatedLog); if (richContent) { enqueueRichRemoteNotification( taskId, completionResult, 'completed', richContent, context.setAppState, task.toolUseId, ); } else { enqueueRemoteNotification(taskId, completionResult, 'completed', context.setAppState, task.toolUseId); } void evictTaskOutput(taskId); void removeRemoteAgentMetadata(taskId); runCompletionHook(taskId, task); return; } } // Ultraplan: result(success) fires after every CCR turn, so it must not // drive completion — startDetachedPoll owns that via ExitPlanMode scan. // Long-running monitors (autofix-pr) emit result per notification cycle, // so the same skip applies. const result = task.isUltraplan || task.isLongRunning ? undefined : accumulatedLog.findLast(msg => msg.type === 'result'); // For remote-review: in hook_progress stdout is the // bughunter path's completion signal. Scan only the delta to stay O(new); // tag appears once at end of run so we won't miss it across ticks. // For the failure signal, debounce idle: remote sessions briefly flip // to 'idle' between every tool turn, so a single idle observation means // nothing. Require STABLE_IDLE_POLLS consecutive idle polls with no log // growth. if (task.isRemoteReview && logGrew && cachedReviewContent === null) { cachedReviewContent = extractReviewTagFromLog(response.newEvents); } // Parse live progress counts from the orchestrator's heartbeat echoes. // hook_progress stdout is cumulative (every echo since hook start), so // each event contains all progress tags. Grab the LAST occurrence — // extractTag returns the first match which would always be the earliest // value (0/0). let newProgress: RemoteAgentTaskState['reviewProgress']; if (task.isRemoteReview && logGrew) { const open = `<${REMOTE_REVIEW_PROGRESS_TAG}>`; const close = ``; for (const ev of response.newEvents) { if (ev.type === 'system' && (ev.subtype === 'hook_progress' || ev.subtype === 'hook_response')) { const s = ev.stdout as string; const closeAt = s.lastIndexOf(close); const openAt = closeAt === -1 ? -1 : s.lastIndexOf(open, closeAt); if (openAt !== -1 && closeAt > openAt) { try { const p = JSON.parse(s.slice(openAt + open.length, closeAt)) as { stage?: 'finding' | 'verifying' | 'synthesizing'; bugs_found?: number; bugs_verified?: number; bugs_refuted?: number; }; newProgress = { stage: p.stage, bugsFound: p.bugs_found ?? 0, bugsVerified: p.bugs_verified ?? 0, bugsRefuted: p.bugs_refuted ?? 0, }; } catch { // ignore malformed progress } } } } } // Hook events count as output only for remote-review — bughunter's // SessionStart hook produces zero assistant turns so stableIdle would // never arm without this. const hasAnyOutput = accumulatedLog.some( msg => msg.type === 'assistant' || (task.isRemoteReview && msg.type === 'system' && (msg.subtype === 'hook_progress' || msg.subtype === 'hook_response')), ); if (response.sessionStatus === 'idle' && !logGrew && hasAnyOutput) { consecutiveIdlePolls++; } else { consecutiveIdlePolls = 0; } const stableIdle = consecutiveIdlePolls >= STABLE_IDLE_POLLS; // stableIdle is a prompt-mode completion signal (Claude stops writing // → session idles → done). In bughunter mode the session is "idle" the // entire time the SessionStart hook runs; the previous guard checked // hasAssistantEvents as a prompt-mode proxy, but post_stage() now // writes assistant events in bughunter mode too, so that check // misfires between heartbeats. Presence of a SessionStart hook event // is the discriminator — bughunter mode always has one (run_hunt.sh), // prompt mode never does — and it arrives before the kickoff // post_stage so there's no race. When the hook is running, only the // tag or the 30min timeout complete the task. // Filtering on hook_event avoids a (theoretical) non-SessionStart hook // in prompt mode from blocking stableIdle — the code_review container // only registers SessionStart, but the 30min-hang failure mode is // worth defending against. const hasSessionStartHook = accumulatedLog.some( m => m.type === 'system' && (m.subtype === 'hook_started' || m.subtype === 'hook_progress' || m.subtype === 'hook_response') && (m as { hook_event?: string }).hook_event === 'SessionStart', ); const hasAssistantEvents = accumulatedLog.some(m => m.type === 'assistant'); const sessionDone = task.isRemoteReview && (cachedReviewContent !== null || (!hasSessionStartHook && stableIdle && hasAssistantEvents)); const reviewTimedOut = task.isRemoteReview && Date.now() - task.pollStartedAt > REMOTE_REVIEW_TIMEOUT_MS; const newStatus = result ? result.subtype === 'success' ? ('completed' as const) : ('failed' as const) : sessionDone || reviewTimedOut ? ('completed' as const) : accumulatedLog.length > 0 ? ('running' as const) : ('starting' as const); // Update task state. Guard against terminal states — if stopTask raced // while pollRemoteSessionEvents was in-flight (status set to 'killed', // notified set to true), bail without overwriting status or proceeding to // side effects (notification, permission-mode flip). let raceTerminated = false; updateTaskState(taskId, context.setAppState, prevTask => { if (prevTask.status !== 'running') { raceTerminated = true; return prevTask; } // No log growth and status unchanged → nothing to report. Return // same ref so updateTaskState skips the spread and 18 s.tasks // subscribers (REPL, Spinner, PromptInput, ...) don't re-render. // newProgress only arrives via log growth (heartbeat echo is a // hook_progress event), so !logGrew already covers no-update. const statusUnchanged = newStatus === 'running' || newStatus === 'starting'; if (!logGrew && statusUnchanged) { return prevTask; } return { ...prevTask, status: newStatus === 'starting' ? 'running' : newStatus, log: accumulatedLog, // Only re-scan for TodoWrite when log grew — log is append-only, // so no growth means no new tool_use blocks. Avoids findLast + // some + find + safeParse every second when idle. todoList: logGrew ? extractTodoListFromLog(accumulatedLog) : prevTask.todoList, reviewProgress: newProgress ?? prevTask.reviewProgress, endTime: result || sessionDone || reviewTimedOut ? Date.now() : undefined, }; }); if (raceTerminated) return; // Send notification if task completed or timed out if (result || sessionDone || reviewTimedOut) { const finalStatus = result && result.subtype !== 'success' ? 'failed' : 'completed'; // For remote-review tasks: inject the review text directly into the // message queue. No mode change, no file indirection — the local model // just sees the review appear as a task-notification on its next turn. // Session kept alive — run_hunt.sh's post_stage() has already written // the formatted findings as an assistant event, so the claude.ai URL // stays a durable record the user can revisit. TTL handles cleanup. if (task.isRemoteReview) { // cachedReviewContent hit the tag in the delta scan. Full-log scan // catches the stableIdle path where the tag arrived in an earlier // tick but the delta scan wasn't wired yet (first poll after resume). const reviewContent = cachedReviewContent ?? extractReviewFromLog(accumulatedLog); if (reviewContent && finalStatus === 'completed') { enqueueRemoteReviewNotification(taskId, reviewContent, context.setAppState); void evictTaskOutput(taskId); void removeRemoteAgentMetadata(taskId); runCompletionHook(taskId, task); return; // Stop polling } // No output or remote error — mark failed with a review-specific message. updateTaskState(taskId, context.setAppState, t => ({ ...t, status: 'failed', })); const reason = result && result.subtype !== 'success' ? 'remote session returned an error' : reviewTimedOut && !sessionDone ? 'remote session exceeded 30 minutes' : 'no review output — orchestrator may have exited early'; enqueueRemoteReviewFailureNotification(taskId, reason, context.setAppState); void evictTaskOutput(taskId); void removeRemoteAgentMetadata(taskId); runCompletionHook(taskId, task); return; // Stop polling } // finalStatus is 'completed' | 'failed' on this path — kill is a // separate code path (RemoteAgentTask.kill) and never reaches here. const richContent = tryExtractRichContent(task, accumulatedLog); if (richContent) { enqueueRichRemoteNotification( taskId, task.title, finalStatus, richContent, context.setAppState, task.toolUseId, ); } else { enqueueRemoteNotification(taskId, task.title, finalStatus, context.setAppState, task.toolUseId); } void evictTaskOutput(taskId); void removeRemoteAgentMetadata(taskId); runCompletionHook(taskId, task); return; // Stop polling } } catch (error) { logError(error); // Reset so an API error doesn't let non-consecutive idle polls accumulate. consecutiveIdlePolls = 0; // Check review timeout even when the API call fails — without this, // persistent API errors skip the timeout check and poll forever. try { const appState = context.getAppState(); const task = appState.tasks?.[taskId] as RemoteAgentTaskState | undefined; if ( task?.isRemoteReview && task.status === 'running' && Date.now() - task.pollStartedAt > REMOTE_REVIEW_TIMEOUT_MS ) { updateTaskState(taskId, context.setAppState, t => ({ ...t, status: 'failed', endTime: Date.now(), })); enqueueRemoteReviewFailureNotification(taskId, 'remote session exceeded 30 minutes', context.setAppState); void evictTaskOutput(taskId); void removeRemoteAgentMetadata(taskId); return; // Stop polling } } catch { // Best effort — if getAppState fails, continue polling } } // Continue polling if (isRunning) { setTimeout(poll, POLL_INTERVAL_MS); } }; // Start polling void poll(); // Return cleanup function return () => { isRunning = false; }; } /** * RemoteAgentTask - Handles remote Claude.ai session execution. * * Replaces the BackgroundRemoteSession implementation from: * - src/utils/background/remote/remoteSession.ts * - src/components/tasks/BackgroundTaskStatus.tsx (polling logic) */ export const RemoteAgentTask: Task = { name: 'RemoteAgentTask', type: 'remote_agent', async kill(taskId, setAppState) { let toolUseId: string | undefined; let description: string | undefined; let sessionId: string | undefined; let killed = false; updateTaskState(taskId, setAppState, task => { if (task.status !== 'running') { return task; } toolUseId = task.toolUseId; description = task.description; sessionId = task.sessionId; killed = true; return { ...task, status: 'killed', notified: true, endTime: Date.now(), }; }); // Close the task_started bookend for SDK consumers. The poll loop's // early-return when status!=='running' won't emit a notification. if (killed) { emitTaskTerminatedSdk(taskId, 'stopped', { toolUseId, summary: description, }); // Archive the remote session so it stops consuming cloud resources. if (sessionId) { void archiveRemoteSession(sessionId).catch(e => logForDebugging(`RemoteAgentTask archive failed: ${String(e)}`), ); } } void evictTaskOutput(taskId); void removeRemoteAgentMetadata(taskId); logForDebugging(`RemoteAgentTask ${taskId} killed, archiving session ${sessionId ?? 'unknown'}`); }, }; /** * Get the session URL for a remote task. */ export function getRemoteTaskSessionUrl(sessionId: string): string { return getRemoteSessionUrl(sessionId, process.env.SESSION_INGRESS_URL); }