claude-code/src/tasks/RemoteAgentTask/RemoteAgentTask.tsx

import type { ToolUseBlock } from '@anthropic-ai/sdk/resources';
import { getRemoteSessionUrl } from '../../constants/product.js';
import {
  OUTPUT_FILE_TAG,
  REMOTE_REVIEW_PROGRESS_TAG,
  REMOTE_REVIEW_TAG,
  STATUS_TAG,
  SUMMARY_TAG,
  TASK_ID_TAG,
  TASK_NOTIFICATION_TAG,
  TASK_TYPE_TAG,
  TOOL_USE_ID_TAG,
  ULTRAPLAN_TAG,
} from '../../constants/xml.js';
import type { SDKAssistantMessage, SDKMessage } from '../../entrypoints/agentSdkTypes.js';
import type { MessageContent } from '../../types/message.js';
import type { SetAppState, Task, TaskContext, TaskStateBase } from '../../Task.js';
import { createTaskStateBase, generateTaskId } from '../../Task.js';
import { TodoWriteTool } from '@claude-code-best/builtin-tools/tools/TodoWriteTool/TodoWriteTool.js';
import {
  type BackgroundRemoteSessionPrecondition,
  checkBackgroundRemoteSessionEligibility,
} from '../../utils/background/remote/remoteSession.js';
export type { BackgroundRemoteSessionPrecondition };
import { logForDebugging } from '../../utils/debug.js';
import { logError } from '../../utils/log.js';
import { enqueuePendingNotification } from '../../utils/messageQueueManager.js';
import { extractTag, extractTextContent } from '../../utils/messages.js';
import { emitTaskTerminatedSdk } from '../../utils/sdkEventQueue.js';
import {
  deleteRemoteAgentMetadata,
  listRemoteAgentMetadata,
  type RemoteAgentMetadata,
  writeRemoteAgentMetadata,
} from '../../utils/sessionStorage.js';
import { jsonStringify } from '../../utils/slowOperations.js';
import { appendTaskOutput, evictTaskOutput, getTaskOutputPath, initTaskOutput } from '../../utils/task/diskOutput.js';
import { registerTask, updateTaskState } from '../../utils/task/framework.js';
import { fetchSession } from '../../utils/teleport/api.js';
import { archiveRemoteSession, pollRemoteSessionEvents } from '../../utils/teleport.js';
import type { TodoList } from '../../utils/todo/types.js';
import type { UltraplanPhase } from '../../utils/ultraplan/ccrSession.js';

export type RemoteAgentTaskState = TaskStateBase & {
  type: 'remote_agent';
  remoteTaskType: RemoteTaskType;
  /** Task-specific metadata (PR number, repo, etc.). */
  remoteTaskMetadata?: RemoteTaskMetadata;
  sessionId: string; // Original session ID for API calls
  command: string;
  title: string;
  todoList: TodoList;
  log: SDKMessage[];
  /**
   * Long-running agent that will not be marked as complete after the first `result`.
   */
  isLongRunning?: boolean;
  /**
   * When the local poller started watching this task (at spawn or on restore).
   * Review timeout clocks from here so a restore doesn't immediately time out
   * a task spawned >30min ago.
   */
  pollStartedAt: number;
  /** True when this task was created by a teleported /ultrareview command. */
  isRemoteReview?: boolean;
  /** Parsed from the orchestrator's <remote-review-progress> heartbeat echoes. */
  reviewProgress?: {
    stage?: 'finding' | 'verifying' | 'synthesizing';
    bugsFound: number;
    bugsVerified: number;
    bugsRefuted: number;
  };
  isUltraplan?: boolean;
  /**
   * Scanner-derived pill state. Undefined = running. `needs_input` when the
   * remote asked a clarifying question and is idle; `plan_ready` when
   * ExitPlanMode is awaiting browser approval. Surfaced in the pill badge
   * and detail dialog status line.
   */
  ultraplanPhase?: Exclude<UltraplanPhase, 'running'>;
};

const REMOTE_TASK_TYPES = ['remote-agent', 'ultraplan', 'ultrareview', 'autofix-pr', 'background-pr'] as const;
export type RemoteTaskType = (typeof REMOTE_TASK_TYPES)[number];

function isRemoteTaskType(v: string | undefined): v is RemoteTaskType {
  return (REMOTE_TASK_TYPES as readonly string[]).includes(v ?? '');
}

export type AutofixPrRemoteTaskMetadata = {
  owner: string;
  repo: string;
  prNumber: number;
  /**
   * PR head commit SHA captured at /autofix-pr launch. The completionChecker
   * compares this against the live head to detect when the agent has pushed
   * new commits. Optional because gh CLI may be unavailable at launch — in
   * that case the checker falls back to terminal-state-only completion.
   * Survives --resume via the session sidecar.
   */
  initialHeadSha?: string;
};

export type RemoteTaskMetadata = AutofixPrRemoteTaskMetadata;

/**
 * Called on every poll tick for tasks with a matching remoteTaskType. Return a
 * non-null string to complete the task (string becomes the notification text),
 * or null to keep polling. Checkers that hit external APIs should self-throttle.
 */
export type RemoteTaskCompletionChecker = (
  remoteTaskMetadata: RemoteTaskMetadata | undefined,
) => Promise<string | null>;

const completionCheckers = new Map<RemoteTaskType, RemoteTaskCompletionChecker>();

/**
 * Register a completion checker for a remote task type. Invoked on every poll
 * tick; survives --resume via the sidecar's remoteTaskType + remoteTaskMetadata.
 */
export function registerCompletionChecker(remoteTaskType: RemoteTaskType, checker: RemoteTaskCompletionChecker): void {
  completionCheckers.set(remoteTaskType, checker);
}

/**
 * Called after the task transitions to a terminal state and the notification
 * has been enqueued. Used by command modules to release singleton locks,
 * clear cached state, or perform other cleanup the framework cannot see.
 * Hooks must be synchronous and best-effort — errors are logged but never
 * propagate.
 */
export type RemoteTaskCompletionHook = (taskId: string, remoteTaskMetadata: RemoteTaskMetadata | undefined) => void;

const completionHooks = new Map<RemoteTaskType, RemoteTaskCompletionHook>();

/**
 * Inspect a completed remote task's accumulated log and return an XML fragment
 * to inject inline into the completion task-notification. Returning null falls
 * back to the framework's generic "task completed" notification (file-path
 * pointer only). Used by command modules whose remote agents emit structured
 * outcome tags the local model should read directly.
 */
export type RemoteTaskContentExtractor = (log: SDKMessage[]) => string | null;

const contentExtractors = new Map<RemoteTaskType, RemoteTaskContentExtractor>();

/**
 * Register a content extractor for a remote task type. Called once per
 * completion in the generic completion branches (archived, completionChecker,
 * result-driven). isRemoteReview tasks have their own bespoke path and skip
 * extractors entirely. Errors propagate to the framework which logs and falls
 * back to generic notification.
 */
export function registerContentExtractor(remoteTaskType: RemoteTaskType, extractor: RemoteTaskContentExtractor): void {
  contentExtractors.set(remoteTaskType, extractor);
}

function tryExtractRichContent(task: RemoteAgentTaskState, log: SDKMessage[]): string | null {
  const extractor = contentExtractors.get(task.remoteTaskType);
  if (!extractor) return null;
  try {
    return extractor(log);
  } catch (e) {
    logError(e);
    return null;
  }
}

/**
 * Register a completion hook for a remote task type. Invoked once after the
 * task reaches a terminal state in any of the framework's completion branches
 * (archived session, completionChecker, stableIdle, result). Use this to
 * release command-module state (e.g. singleton locks) without forcing the
 * framework to reverse-import from the command package.
 */
export function registerCompletionHook(remoteTaskType: RemoteTaskType, hook: RemoteTaskCompletionHook): void {
  completionHooks.set(remoteTaskType, hook);
}

function runCompletionHook(taskId: string, task: RemoteAgentTaskState): void {
  const hook = completionHooks.get(task.remoteTaskType);
  if (!hook) return;
  try {
    hook(taskId, task.remoteTaskMetadata);
  } catch (e) {
    logError(e);
  }
}

/**
 * Persist a remote-agent metadata entry to the session sidecar.
 * Fire-and-forget — persistence failures must not block task registration.
 */
async function persistRemoteAgentMetadata(meta: RemoteAgentMetadata): Promise<void> {
  try {
    await writeRemoteAgentMetadata(meta.taskId, meta);
  } catch (e) {
    logForDebugging(`persistRemoteAgentMetadata failed: ${String(e)}`);
  }
}

/**
 * Remove a remote-agent metadata entry from the session sidecar.
 * Called on task completion/kill so restored sessions don't resurrect
 * tasks that already finished.
 */
async function removeRemoteAgentMetadata(taskId: string): Promise<void> {
  try {
    await deleteRemoteAgentMetadata(taskId);
  } catch (e) {
    logForDebugging(`removeRemoteAgentMetadata failed: ${String(e)}`);
  }
}

// Precondition error result
export type RemoteAgentPreconditionResult =
  | {
      eligible: true;
    }
  | {
      eligible: false;
      errors: BackgroundRemoteSessionPrecondition[];
    };

/**
 * Check eligibility for creating a remote agent session.
 */
export async function checkRemoteAgentEligibility({
  skipBundle = false,
}: {
  skipBundle?: boolean;
} = {}): Promise<RemoteAgentPreconditionResult> {
  const errors = await checkBackgroundRemoteSessionEligibility({ skipBundle });
  if (errors.length > 0) {
    return { eligible: false, errors };
  }
  return { eligible: true };
}

/**
 * Format precondition error for display.
 */
export function formatPreconditionError(error: BackgroundRemoteSessionPrecondition): string {
  switch (error.type) {
    case 'not_logged_in':
      return 'Please run /login and sign in with your Claude.ai account (not Console).';
    case 'no_remote_environment':
      return 'No cloud environment available. Set one up at https://claude.ai/code/onboarding?magic=env-setup';
    case 'not_in_git_repo':
      return 'Background tasks require a git repository. Initialize git or run from a git repository.';
    case 'no_git_remote':
      return 'Background tasks require a GitHub remote. Add one with `git remote add origin REPO_URL`.';
    case 'github_app_not_installed':
      return 'The Claude GitHub app must be installed on this repository first.\nhttps://github.com/apps/claude/installations/new';
    case 'policy_blocked':
      return "Remote sessions are disabled by your organization's policy. Contact your organization admin to enable them.";
  }
}

/**
 * Enqueue a remote task notification to the message queue.
 */
function enqueueRemoteNotification(
  taskId: string,
  title: string,
  status: 'completed' | 'failed' | 'killed',
  setAppState: SetAppState,
  toolUseId?: string,
): void {
  // Atomically check and set notified flag to prevent duplicate notifications.
  if (!markTaskNotified(taskId, setAppState)) return;

  const statusText = status === 'completed' ? 'completed successfully' : status === 'failed' ? 'failed' : 'was stopped';

  const toolUseIdLine = toolUseId ? `\n<${TOOL_USE_ID_TAG}>${toolUseId}</${TOOL_USE_ID_TAG}>` : '';

  const outputPath = getTaskOutputPath(taskId);
  const message = `<${TASK_NOTIFICATION_TAG}>
<${TASK_ID_TAG}>${taskId}</${TASK_ID_TAG}>${toolUseIdLine}
<${TASK_TYPE_TAG}>remote_agent</${TASK_TYPE_TAG}>
<${OUTPUT_FILE_TAG}>${outputPath}</${OUTPUT_FILE_TAG}>
<${STATUS_TAG}>${status}</${STATUS_TAG}>
<${SUMMARY_TAG}>Remote task "${title}" ${statusText}</${SUMMARY_TAG}>
</${TASK_NOTIFICATION_TAG}>`;

  enqueuePendingNotification({ value: message, mode: 'task-notification' });
}

/**
 * Same as enqueueRemoteNotification but inlines a structured XML fragment
 * (returned by a registered RemoteTaskContentExtractor) so the local model
 * reads the remote agent's outcome directly instead of having to follow a
 * file-path pointer. Mode is still 'task-notification' — the framing XML is
 * the same, only the body differs.
 */
function enqueueRichRemoteNotification(
  taskId: string,
  title: string,
  status: 'completed' | 'failed' | 'killed',
  richContent: string,
  setAppState: SetAppState,
  toolUseId?: string,
): void {
  if (!markTaskNotified(taskId, setAppState)) return;

  const statusText = status === 'completed' ? 'completed successfully' : status === 'failed' ? 'failed' : 'was stopped';
  const toolUseIdLine = toolUseId ? `\n<${TOOL_USE_ID_TAG}>${toolUseId}</${TOOL_USE_ID_TAG}>` : '';
  const outputPath = getTaskOutputPath(taskId);

  const message = `<${TASK_NOTIFICATION_TAG}>
<${TASK_ID_TAG}>${taskId}</${TASK_ID_TAG}>${toolUseIdLine}
<${TASK_TYPE_TAG}>remote_agent</${TASK_TYPE_TAG}>
<${OUTPUT_FILE_TAG}>${outputPath}</${OUTPUT_FILE_TAG}>
<${STATUS_TAG}>${status}</${STATUS_TAG}>
<${SUMMARY_TAG}>Remote task "${title}" ${statusText}</${SUMMARY_TAG}>
</${TASK_NOTIFICATION_TAG}>
The remote agent produced the following structured outcome. Summarize the key changes for the user:

${richContent}`;

  enqueuePendingNotification({ value: message, mode: 'task-notification' });
}

/**
 * Atomically mark a task as notified. Returns true if this call flipped the
 * flag (caller should enqueue), false if already notified (caller should skip).
 */
function markTaskNotified(taskId: string, setAppState: SetAppState): boolean {
  let shouldEnqueue = false;
  updateTaskState(taskId, setAppState, task => {
    if (task.notified) {
      return task;
    }
    shouldEnqueue = true;
    return { ...task, notified: true };
  });
  return shouldEnqueue;
}

/**
 * Extract the plan content from the remote session log.
 * Searches all assistant messages for <ultraplan>...</ultraplan> tags.
 */
export function extractPlanFromLog(log: SDKMessage[]): string | null {
  // Walk backwards through assistant messages to find <ultraplan> content
  for (let i = log.length - 1; i >= 0; i--) {
    const msg = log[i] as SDKAssistantMessage;
    if (msg?.type !== 'assistant') continue;
    const content = msg.message?.content as MessageContent | undefined;
    if (!content) continue;
    const fullText = extractTextContent(
      typeof content === 'string' ? [{ type: 'text' as const, text: content }] : content,
      '\n',
    );
    const plan = extractTag(fullText, ULTRAPLAN_TAG);
    if (plan?.trim()) return plan.trim();
  }
  return null;
}

/**
 * Enqueue an ultraplan-specific failure notification. Unlike enqueueRemoteNotification
 * this does NOT instruct the model to read the raw output file (a JSONL dump that is
 * useless for plan extraction).
 */
export function enqueueUltraplanFailureNotification(
  taskId: string,
  sessionId: string,
  reason: string,
  setAppState: SetAppState,
): void {
  if (!markTaskNotified(taskId, setAppState)) return;

  const sessionUrl = getRemoteTaskSessionUrl(sessionId);
  const message = `<${TASK_NOTIFICATION_TAG}>
<${TASK_ID_TAG}>${taskId}</${TASK_ID_TAG}>
<${TASK_TYPE_TAG}>remote_agent</${TASK_TYPE_TAG}>
<${STATUS_TAG}>failed</${STATUS_TAG}>
<${SUMMARY_TAG}>Ultraplan failed: ${reason}</${SUMMARY_TAG}>
</${TASK_NOTIFICATION_TAG}>
The remote Ultraplan session did not produce a plan (${reason}). Inspect the session at ${sessionUrl} and tell the user to retry locally with plan mode.`;

  enqueuePendingNotification({ value: message, mode: 'task-notification' });
}

/**
 * Extract review content from the remote session log.
 *
 * Two producers, two event shapes:
 * - bughunter mode: run_hunt.sh is a SessionStart hook; its echo lands as
 *   {type:'system', subtype:'hook_progress', stdout:'...'}. Claude never
 *   takes a turn so there are zero assistant messages.
 * - prompt mode: a real assistant turn wraps the review in the tag.
 *
 * Scans hook_progress first since bughunter is the intended production path
 * and prompt mode is the dev/fallback. Newest-first in both cases — the tag
 * appears once at the end of the run so reverse iteration short-circuits.
 */
function extractReviewFromLog(log: SDKMessage[]): string | null {
  for (let i = log.length - 1; i >= 0; i--) {
    const msg = log[i];
    // The final echo before hook exit may land in either the last
    // hook_progress or the terminal hook_response depending on buffering;
    // both have flat stdout.
    if (msg?.type === 'system' && (msg.subtype === 'hook_progress' || msg.subtype === 'hook_response')) {
      const tagged = extractTag(msg.stdout as string, REMOTE_REVIEW_TAG);
      if (tagged?.trim()) return tagged.trim();
    }
  }

  for (let i = log.length - 1; i >= 0; i--) {
    const msg = log[i];
    if (msg?.type !== 'assistant') continue;
    const content = (msg as SDKAssistantMessage).message?.content as MessageContent | undefined;
    if (!content) continue;
    const fullText = extractTextContent(
      typeof content === 'string' ? [{ type: 'text' as const, text: content }] : content,
      '\n',
    );
    const tagged = extractTag(fullText, REMOTE_REVIEW_TAG);
    if (tagged?.trim()) return tagged.trim();
  }

  // Hook-stdout concat fallback: a single echo should land in one event, but
  // large JSON payloads can flush across two if the pipe buffer fills
  // mid-write. Per-message scan above misses a tag split across events.
  const hookStdout = log
    .filter(msg => msg.type === 'system' && (msg.subtype === 'hook_progress' || msg.subtype === 'hook_response'))
    .map(msg => msg.stdout as string)
    .join('');
  const hookTagged = extractTag(hookStdout, REMOTE_REVIEW_TAG);
  if (hookTagged?.trim()) return hookTagged.trim();

  // Fallback: concatenate all assistant text in chronological order.
  const allText = log
    .filter((msg): msg is SDKAssistantMessage => msg.type === 'assistant')
    .map(msg => {
      const content = msg.message?.content as MessageContent | undefined;
      if (!content) return '';
      return extractTextContent(
        typeof content === 'string' ? [{ type: 'text' as const, text: content }] : content,
        '\n',
      );
    })
    .join('\n')
    .trim();

  return allText || null;
}

/**
 * Tag-only variant of extractReviewFromLog for delta scanning.
 *
 * Returns non-null ONLY when an explicit <remote-review> tag is found.
 * Unlike extractReviewFromLog, this does NOT fall back to concatenated
 * assistant text. This is critical for the delta scan: in prompt mode,
 * early untagged assistant messages (e.g. "I'm analyzing the diff...")
 * would trigger the fallback and prematurely set cachedReviewContent,
 * completing the review before the actual tagged output arrives.
 */
function extractReviewTagFromLog(log: SDKMessage[]): string | null {
  // hook_progress / hook_response per-message scan (bughunter path)
  for (let i = log.length - 1; i >= 0; i--) {
    const msg = log[i];
    if (msg?.type === 'system' && (msg.subtype === 'hook_progress' || msg.subtype === 'hook_response')) {
      const tagged = extractTag(msg.stdout as string, REMOTE_REVIEW_TAG);
      if (tagged?.trim()) return tagged.trim();
    }
  }

  // assistant text per-message scan (prompt mode)
  for (let i = log.length - 1; i >= 0; i--) {
    const msg = log[i];
    if (msg?.type !== 'assistant') continue;
    const content = (msg as SDKAssistantMessage).message?.content as MessageContent | undefined;
    if (!content) continue;
    const fullText = extractTextContent(
      typeof content === 'string' ? [{ type: 'text' as const, text: content }] : content,
      '\n',
    );
    const tagged = extractTag(fullText, REMOTE_REVIEW_TAG);
    if (tagged?.trim()) return tagged.trim();
  }

  // Hook-stdout concat fallback for split tags
  const hookStdout = log
    .filter(msg => msg.type === 'system' && (msg.subtype === 'hook_progress' || msg.subtype === 'hook_response'))
    .map(msg => msg.stdout as string)
    .join('');
  const hookTagged = extractTag(hookStdout, REMOTE_REVIEW_TAG);
  if (hookTagged?.trim()) return hookTagged.trim();

  return null;
}

/**
 * Enqueue a remote-review completion notification. Injects the review text
 * directly into the message queue so the local model receives it on the next
 * turn — no file indirection, no mode change. Session is kept alive so the
 * claude.ai URL stays a durable record the user can revisit; TTL handles cleanup.
 */
function enqueueRemoteReviewNotification(taskId: string, reviewContent: string, setAppState: SetAppState): void {
  if (!markTaskNotified(taskId, setAppState)) return;

  const message = `<${TASK_NOTIFICATION_TAG}>
<${TASK_ID_TAG}>${taskId}</${TASK_ID_TAG}>
<${TASK_TYPE_TAG}>remote_agent</${TASK_TYPE_TAG}>
<${STATUS_TAG}>completed</${STATUS_TAG}>
<${SUMMARY_TAG}>Remote review completed</${SUMMARY_TAG}>
</${TASK_NOTIFICATION_TAG}>
The remote review produced the following findings:

${reviewContent}`;

  enqueuePendingNotification({ value: message, mode: 'task-notification' });
}

/**
 * Enqueue a remote-review failure notification.
 */
function enqueueRemoteReviewFailureNotification(taskId: string, reason: string, setAppState: SetAppState): void {
  if (!markTaskNotified(taskId, setAppState)) return;

  const message = `<${TASK_NOTIFICATION_TAG}>
<${TASK_ID_TAG}>${taskId}</${TASK_ID_TAG}>
<${TASK_TYPE_TAG}>remote_agent</${TASK_TYPE_TAG}>
<${STATUS_TAG}>failed</${STATUS_TAG}>
<${SUMMARY_TAG}>Remote review failed: ${reason}</${SUMMARY_TAG}>
</${TASK_NOTIFICATION_TAG}>
Remote review did not produce output (${reason}). Tell the user to retry /ultrareview, or use /review for a local review instead.`;

  enqueuePendingNotification({ value: message, mode: 'task-notification' });
}

/**
 * Extract todo list from SDK messages (finds last TodoWrite tool use).
 */
function extractTodoListFromLog(log: SDKMessage[]): TodoList {
  const todoListMessage = log.findLast(
    (msg): msg is SDKAssistantMessage =>
      msg.type === 'assistant' &&
      Array.isArray((msg as SDKAssistantMessage).message?.content) &&
      (((msg as SDKAssistantMessage).message?.content ?? []) as Array<{ type: string; name?: string }>).some(
        block => block.type === 'tool_use' && block.name === TodoWriteTool.name,
      ),
  );
  if (!todoListMessage) {
    return [];
  }

  const contentBlocks = (todoListMessage.message?.content ?? []) as Array<{
    type: string;
    name?: string;
    input?: unknown;
  }>;
  const input = contentBlocks.find(
    (block): block is ToolUseBlock => block.type === 'tool_use' && block.name === TodoWriteTool.name,
  )?.input;
  if (!input) {
    return [];
  }

  const parsedInput = TodoWriteTool.inputSchema.safeParse(input);
  if (!parsedInput.success) {
    return [];
  }

  return parsedInput.data.todos;
}

/**
 * Register a remote agent task in the unified task framework.
 * Bundles task ID generation, output init, state creation, registration, and polling.
 * Callers remain responsible for custom pre-registration logic (git dialogs, transcript upload, teleport options).
 */
export function registerRemoteAgentTask(options: {
  remoteTaskType: RemoteTaskType;
  session: { id: string; title: string };
  command: string;
  context: TaskContext;
  toolUseId?: string;
  isRemoteReview?: boolean;
  isUltraplan?: boolean;
  isLongRunning?: boolean;
  remoteTaskMetadata?: RemoteTaskMetadata;
}): {
  taskId: string;
  sessionId: string;
  cleanup: () => void;
} {
  const {
    remoteTaskType,
    session,
    command,
    context,
    toolUseId,
    isRemoteReview,
    isUltraplan,
    isLongRunning,
    remoteTaskMetadata,
  } = options;
  const taskId = generateTaskId('remote_agent');

  // Create the output file before registering the task.
  // RemoteAgentTask uses appendTaskOutput() (not TaskOutput), so
  // the file must exist for readers before any output arrives.
  void initTaskOutput(taskId);

  const taskState: RemoteAgentTaskState = {
    ...createTaskStateBase(taskId, 'remote_agent', session.title, toolUseId),
    type: 'remote_agent',
    remoteTaskType,
    status: 'running',
    sessionId: session.id,
    command,
    title: session.title,
    todoList: [],
    log: [],
    isRemoteReview,
    isUltraplan,
    isLongRunning,
    pollStartedAt: Date.now(),
    remoteTaskMetadata,
  };

  registerTask(taskState, context.setAppState);

  // Persist identity to the session sidecar so --resume can reconnect to
  // still-running remote sessions. Status is not stored — it's fetched
  // fresh from CCR on restore.
  void persistRemoteAgentMetadata({
    taskId,
    remoteTaskType,
    sessionId: session.id,
    title: session.title,
    command,
    spawnedAt: Date.now(),
    toolUseId,
    isUltraplan,
    isRemoteReview,
    isLongRunning,
    remoteTaskMetadata,
  });

  // Ultraplan lifecycle is owned by startDetachedPoll in ultraplan.tsx. Generic
  // polling still runs so session.log populates for the detail view's progress
  // counts; the result-lookup guard below prevents early completion.
  // TODO(#23985): fold ExitPlanModeScanner into this poller, drop startDetachedPoll.
  const stopPolling = startRemoteSessionPolling(taskId, context);

  return {
    taskId,
    sessionId: session.id,
    cleanup: stopPolling,
  };
}

/**
 * Restore remote-agent tasks from the session sidecar on --resume.
 *
 * Scans remote-agents/, fetches live CCR status for each, reconstructs
 * RemoteAgentTaskState into AppState.tasks, and restarts polling for sessions
 * still running. Sessions that are archived or 404 have their sidecar file
 * removed. Must run after switchSession() so getSessionId() points at the
 * resumed session's sidecar directory.
 */
export async function restoreRemoteAgentTasks(context: TaskContext): Promise<void> {
  try {
    await restoreRemoteAgentTasksImpl(context);
  } catch (e) {
    logForDebugging(`restoreRemoteAgentTasks failed: ${String(e)}`);
  }
}

async function restoreRemoteAgentTasksImpl(context: TaskContext): Promise<void> {
  const persisted = await listRemoteAgentMetadata();
  if (persisted.length === 0) return;

  for (const meta of persisted) {
    let remoteStatus: string;
    try {
      const session = await fetchSession(meta.sessionId);
      remoteStatus = session.session_status;
    } catch (e) {
      // Only 404 means the CCR session is truly gone. Auth errors (401,
      // missing OAuth token) are recoverable via /login — the remote
      // session is still running. fetchSession throws plain Error for all
      // 4xx (validateStatus treats <500 as success), so isTransientNetworkError
      // can't distinguish them; match the 404 message instead.
      if (e instanceof Error && e.message.startsWith('Session not found:')) {
        logForDebugging(`restoreRemoteAgentTasks: dropping ${meta.taskId} (404: ${String(e)})`);
        void removeRemoteAgentMetadata(meta.taskId);
      } else {
        logForDebugging(`restoreRemoteAgentTasks: skipping ${meta.taskId} (recoverable: ${String(e)})`);
      }
      continue;
    }

    if (remoteStatus === 'archived') {
      // Session ended while the local client was offline. Don't resurrect.
      void removeRemoteAgentMetadata(meta.taskId);
      continue;
    }

    const taskState: RemoteAgentTaskState = {
      ...createTaskStateBase(meta.taskId, 'remote_agent', meta.title, meta.toolUseId),
      type: 'remote_agent',
      remoteTaskType: isRemoteTaskType(meta.remoteTaskType) ? meta.remoteTaskType : 'remote-agent',
      status: 'running',
      sessionId: meta.sessionId,
      command: meta.command,
      title: meta.title,
      todoList: [],
      log: [],
      isRemoteReview: meta.isRemoteReview,
      isUltraplan: meta.isUltraplan,
      isLongRunning: meta.isLongRunning,
      startTime: meta.spawnedAt,
      pollStartedAt: Date.now(),
      remoteTaskMetadata: meta.remoteTaskMetadata as RemoteTaskMetadata | undefined,
    };

    registerTask(taskState, context.setAppState);
    void initTaskOutput(meta.taskId);
    startRemoteSessionPolling(meta.taskId, context);
  }
}

/**
 * Start polling for remote session updates.
 * Returns a cleanup function to stop polling.
 */
function startRemoteSessionPolling(taskId: string, context: TaskContext): () => void {
  let isRunning = true;
  const POLL_INTERVAL_MS = 1000;
  const REMOTE_REVIEW_TIMEOUT_MS = 30 * 60 * 1000;
  // Remote sessions flip to 'idle' between tool turns. With 100+ rapid
  // turns, a 1s poll WILL catch a transient idle mid-run. Require stable
  // idle (no log growth for N consecutive polls) before believing it.
  const STABLE_IDLE_POLLS = 5;
  let consecutiveIdlePolls = 0;
  let lastEventId: string | null = null;
  let accumulatedLog: SDKMessage[] = [];
  // Cached across ticks so we don't re-scan the full log. Tag appears once
  // at end of run; scanning only the delta (response.newEvents) is O(new).
  let cachedReviewContent: string | null = null;

  const poll = async (): Promise<void> => {
    if (!isRunning) return;

    try {
      const appState = context.getAppState();
      const task = appState.tasks?.[taskId] as RemoteAgentTaskState | undefined;
      if (!task || task.status !== 'running') {
        // Task was killed externally (TaskStopTool) or already terminal.
        // Session left alive so the claude.ai URL stays valid — the run_hunt.sh
        // post_stage() calls land as assistant events there, and the user may
        // want to revisit them after closing the terminal. TTL reaps it.
        return;
      }

      const response = await pollRemoteSessionEvents(task.sessionId, lastEventId);
      lastEventId = response.lastEventId;
      const logGrew = response.newEvents.length > 0;
      if (logGrew) {
        accumulatedLog = [...accumulatedLog, ...response.newEvents];
        const deltaText = response.newEvents
          .map(msg => {
            if (msg.type === 'assistant') {
              const content = (msg as SDKAssistantMessage).message?.content;
              if (!content || typeof content === 'string') return '';
              return (content as Array<{ type: string; text?: string }>)
                .filter(block => block.type === 'text')
                .map(block => ('text' in block ? block.text : ''))
                .join('\n');
            }
            return jsonStringify(msg);
          })
          .join('\n');
        if (deltaText) {
          appendTaskOutput(taskId, deltaText + '\n');
        }
      }

      if (response.sessionStatus === 'archived') {
        updateTaskState<RemoteAgentTaskState>(taskId, context.setAppState, t =>
          t.status === 'running' ? { ...t, status: 'completed', endTime: Date.now() } : t,
        );
        const richContent = tryExtractRichContent(task, accumulatedLog);
        if (richContent) {
          enqueueRichRemoteNotification(
            taskId,
            task.title,
            'completed',
            richContent,
            context.setAppState,
            task.toolUseId,
          );
        } else {
          enqueueRemoteNotification(taskId, task.title, 'completed', context.setAppState, task.toolUseId);
        }
        void evictTaskOutput(taskId);
        void removeRemoteAgentMetadata(taskId);
        runCompletionHook(taskId, task);
        return;
      }

      const checker = completionCheckers.get(task.remoteTaskType);
      if (checker) {
        const completionResult = await checker(task.remoteTaskMetadata);
        if (completionResult !== null) {
          updateTaskState<RemoteAgentTaskState>(taskId, context.setAppState, t =>
            t.status === 'running' ? { ...t, status: 'completed', endTime: Date.now() } : t,
          );
          const richContent = tryExtractRichContent(task, accumulatedLog);
          if (richContent) {
            enqueueRichRemoteNotification(
              taskId,
              completionResult,
              'completed',
              richContent,
              context.setAppState,
              task.toolUseId,
            );
          } else {
            enqueueRemoteNotification(taskId, completionResult, 'completed', context.setAppState, task.toolUseId);
          }
          void evictTaskOutput(taskId);
          void removeRemoteAgentMetadata(taskId);
          runCompletionHook(taskId, task);
          return;
        }
      }

      // Ultraplan: result(success) fires after every CCR turn, so it must not
      // drive completion — startDetachedPoll owns that via ExitPlanMode scan.
      // Long-running monitors (autofix-pr) emit result per notification cycle,
      // so the same skip applies.
      const result =
        task.isUltraplan || task.isLongRunning ? undefined : accumulatedLog.findLast(msg => msg.type === 'result');

      // For remote-review: <remote-review> in hook_progress stdout is the
      // bughunter path's completion signal. Scan only the delta to stay O(new);
      // tag appears once at end of run so we won't miss it across ticks.
      // For the failure signal, debounce idle: remote sessions briefly flip
      // to 'idle' between every tool turn, so a single idle observation means
      // nothing. Require STABLE_IDLE_POLLS consecutive idle polls with no log
      // growth.
      if (task.isRemoteReview && logGrew && cachedReviewContent === null) {
        cachedReviewContent = extractReviewTagFromLog(response.newEvents);
      }
      // Parse live progress counts from the orchestrator's heartbeat echoes.
      // hook_progress stdout is cumulative (every echo since hook start), so
      // each event contains all progress tags. Grab the LAST occurrence —
      // extractTag returns the first match which would always be the earliest
      // value (0/0).
      let newProgress: RemoteAgentTaskState['reviewProgress'];
      if (task.isRemoteReview && logGrew) {
        const open = `<${REMOTE_REVIEW_PROGRESS_TAG}>`;
        const close = `</${REMOTE_REVIEW_PROGRESS_TAG}>`;
        for (const ev of response.newEvents) {
          if (ev.type === 'system' && (ev.subtype === 'hook_progress' || ev.subtype === 'hook_response')) {
            const s = ev.stdout as string;
            const closeAt = s.lastIndexOf(close);
            const openAt = closeAt === -1 ? -1 : s.lastIndexOf(open, closeAt);
            if (openAt !== -1 && closeAt > openAt) {
              try {
                const p = JSON.parse(s.slice(openAt + open.length, closeAt)) as {
                  stage?: 'finding' | 'verifying' | 'synthesizing';
                  bugs_found?: number;
                  bugs_verified?: number;
                  bugs_refuted?: number;
                };
                newProgress = {
                  stage: p.stage,
                  bugsFound: p.bugs_found ?? 0,
                  bugsVerified: p.bugs_verified ?? 0,
                  bugsRefuted: p.bugs_refuted ?? 0,
                };
              } catch {
                // ignore malformed progress
              }
            }
          }
        }
      }
      // Hook events count as output only for remote-review — bughunter's
      // SessionStart hook produces zero assistant turns so stableIdle would
      // never arm without this.
      const hasAnyOutput = accumulatedLog.some(
        msg =>
          msg.type === 'assistant' ||
          (task.isRemoteReview &&
            msg.type === 'system' &&
            (msg.subtype === 'hook_progress' || msg.subtype === 'hook_response')),
      );
      if (response.sessionStatus === 'idle' && !logGrew && hasAnyOutput) {
        consecutiveIdlePolls++;
      } else {
        consecutiveIdlePolls = 0;
      }
      const stableIdle = consecutiveIdlePolls >= STABLE_IDLE_POLLS;
      // stableIdle is a prompt-mode completion signal (Claude stops writing
      // → session idles → done). In bughunter mode the session is "idle" the
      // entire time the SessionStart hook runs; the previous guard checked
      // hasAssistantEvents as a prompt-mode proxy, but post_stage() now
      // writes assistant events in bughunter mode too, so that check
      // misfires between heartbeats. Presence of a SessionStart hook event
      // is the discriminator — bughunter mode always has one (run_hunt.sh),
      // prompt mode never does — and it arrives before the kickoff
      // post_stage so there's no race. When the hook is running, only the
      // <remote-review> tag or the 30min timeout complete the task.
      // Filtering on hook_event avoids a (theoretical) non-SessionStart hook
      // in prompt mode from blocking stableIdle — the code_review container
      // only registers SessionStart, but the 30min-hang failure mode is
      // worth defending against.
      const hasSessionStartHook = accumulatedLog.some(
        m =>
          m.type === 'system' &&
          (m.subtype === 'hook_started' || m.subtype === 'hook_progress' || m.subtype === 'hook_response') &&
          (m as { hook_event?: string }).hook_event === 'SessionStart',
      );
      const hasAssistantEvents = accumulatedLog.some(m => m.type === 'assistant');
      const sessionDone =
        task.isRemoteReview &&
        (cachedReviewContent !== null || (!hasSessionStartHook && stableIdle && hasAssistantEvents));
      const reviewTimedOut = task.isRemoteReview && Date.now() - task.pollStartedAt > REMOTE_REVIEW_TIMEOUT_MS;
      const newStatus = result
        ? result.subtype === 'success'
          ? ('completed' as const)
          : ('failed' as const)
        : sessionDone || reviewTimedOut
          ? ('completed' as const)
          : accumulatedLog.length > 0
            ? ('running' as const)
            : ('starting' as const);

      // Update task state. Guard against terminal states — if stopTask raced
      // while pollRemoteSessionEvents was in-flight (status set to 'killed',
      // notified set to true), bail without overwriting status or proceeding to
      // side effects (notification, permission-mode flip).
      let raceTerminated = false;
      updateTaskState<RemoteAgentTaskState>(taskId, context.setAppState, prevTask => {
        if (prevTask.status !== 'running') {
          raceTerminated = true;
          return prevTask;
        }
        // No log growth and status unchanged → nothing to report. Return
        // same ref so updateTaskState skips the spread and 18 s.tasks
        // subscribers (REPL, Spinner, PromptInput, ...) don't re-render.
        // newProgress only arrives via log growth (heartbeat echo is a
        // hook_progress event), so !logGrew already covers no-update.
        const statusUnchanged = newStatus === 'running' || newStatus === 'starting';
        if (!logGrew && statusUnchanged) {
          return prevTask;
        }
        return {
          ...prevTask,
          status: newStatus === 'starting' ? 'running' : newStatus,
          log: accumulatedLog,
          // Only re-scan for TodoWrite when log grew — log is append-only,
          // so no growth means no new tool_use blocks. Avoids findLast +
          // some + find + safeParse every second when idle.
          todoList: logGrew ? extractTodoListFromLog(accumulatedLog) : prevTask.todoList,
          reviewProgress: newProgress ?? prevTask.reviewProgress,
          endTime: result || sessionDone || reviewTimedOut ? Date.now() : undefined,
        };
      });
      if (raceTerminated) return;

      // Send notification if task completed or timed out
      if (result || sessionDone || reviewTimedOut) {
        const finalStatus = result && result.subtype !== 'success' ? 'failed' : 'completed';

        // For remote-review tasks: inject the review text directly into the
        // message queue. No mode change, no file indirection — the local model
        // just sees the review appear as a task-notification on its next turn.
        // Session kept alive — run_hunt.sh's post_stage() has already written
        // the formatted findings as an assistant event, so the claude.ai URL
        // stays a durable record the user can revisit. TTL handles cleanup.
        if (task.isRemoteReview) {
          // cachedReviewContent hit the tag in the delta scan. Full-log scan
          // catches the stableIdle path where the tag arrived in an earlier
          // tick but the delta scan wasn't wired yet (first poll after resume).
          const reviewContent = cachedReviewContent ?? extractReviewFromLog(accumulatedLog);
          if (reviewContent && finalStatus === 'completed') {
            enqueueRemoteReviewNotification(taskId, reviewContent, context.setAppState);
            void evictTaskOutput(taskId);
            void removeRemoteAgentMetadata(taskId);
            runCompletionHook(taskId, task);
            return; // Stop polling
          }

          // No output or remote error — mark failed with a review-specific message.
          updateTaskState(taskId, context.setAppState, t => ({
            ...t,
            status: 'failed',
          }));
          const reason =
            result && result.subtype !== 'success'
              ? 'remote session returned an error'
              : reviewTimedOut && !sessionDone
                ? 'remote session exceeded 30 minutes'
                : 'no review output — orchestrator may have exited early';
          enqueueRemoteReviewFailureNotification(taskId, reason, context.setAppState);
          void evictTaskOutput(taskId);
          void removeRemoteAgentMetadata(taskId);
          runCompletionHook(taskId, task);
          return; // Stop polling
        }

        // finalStatus is 'completed' | 'failed' on this path — kill is a
        // separate code path (RemoteAgentTask.kill) and never reaches here.
        const richContent = tryExtractRichContent(task, accumulatedLog);
        if (richContent) {
          enqueueRichRemoteNotification(
            taskId,
            task.title,
            finalStatus,
            richContent,
            context.setAppState,
            task.toolUseId,
          );
        } else {
          enqueueRemoteNotification(taskId, task.title, finalStatus, context.setAppState, task.toolUseId);
        }
        void evictTaskOutput(taskId);
        void removeRemoteAgentMetadata(taskId);
        runCompletionHook(taskId, task);
        return; // Stop polling
      }
    } catch (error) {
      logError(error);
      // Reset so an API error doesn't let non-consecutive idle polls accumulate.
      consecutiveIdlePolls = 0;

      // Check review timeout even when the API call fails — without this,
      // persistent API errors skip the timeout check and poll forever.
      try {
        const appState = context.getAppState();
        const task = appState.tasks?.[taskId] as RemoteAgentTaskState | undefined;
        if (
          task?.isRemoteReview &&
          task.status === 'running' &&
          Date.now() - task.pollStartedAt > REMOTE_REVIEW_TIMEOUT_MS
        ) {
          updateTaskState(taskId, context.setAppState, t => ({
            ...t,
            status: 'failed',
            endTime: Date.now(),
          }));
          enqueueRemoteReviewFailureNotification(taskId, 'remote session exceeded 30 minutes', context.setAppState);
          void evictTaskOutput(taskId);
          void removeRemoteAgentMetadata(taskId);
          return; // Stop polling
        }
      } catch {
        // Best effort — if getAppState fails, continue polling
      }
    }

    // Continue polling
    if (isRunning) {
      setTimeout(poll, POLL_INTERVAL_MS);
    }
  };

  // Start polling
  void poll();

  // Return cleanup function
  return () => {
    isRunning = false;
  };
}

/**
 * RemoteAgentTask - Handles remote Claude.ai session execution.
 *
 * Replaces the BackgroundRemoteSession implementation from:
 * - src/utils/background/remote/remoteSession.ts
 * - src/components/tasks/BackgroundTaskStatus.tsx (polling logic)
 */
export const RemoteAgentTask: Task = {
  name: 'RemoteAgentTask',
  type: 'remote_agent',
  async kill(taskId, setAppState) {
    let toolUseId: string | undefined;
    let description: string | undefined;
    let sessionId: string | undefined;
    let killed = false;
    updateTaskState<RemoteAgentTaskState>(taskId, setAppState, task => {
      if (task.status !== 'running') {
        return task;
      }
      toolUseId = task.toolUseId;
      description = task.description;
      sessionId = task.sessionId;
      killed = true;
      return {
        ...task,
        status: 'killed',
        notified: true,
        endTime: Date.now(),
      };
    });

    // Close the task_started bookend for SDK consumers. The poll loop's
    // early-return when status!=='running' won't emit a notification.
    if (killed) {
      emitTaskTerminatedSdk(taskId, 'stopped', {
        toolUseId,
        summary: description,
      });
      // Archive the remote session so it stops consuming cloud resources.
      if (sessionId) {
        void archiveRemoteSession(sessionId).catch(e =>
          logForDebugging(`RemoteAgentTask archive failed: ${String(e)}`),
        );
      }
    }

    void evictTaskOutput(taskId);
    void removeRemoteAgentMetadata(taskId);
    logForDebugging(`RemoteAgentTask ${taskId} killed, archiving session ${sessionId ?? 'unknown'}`);
  },
};

/**
 * Get the session URL for a remote task.
 */
export function getRemoteTaskSessionUrl(sessionId: string): string {
  return getRemoteSessionUrl(sessionId, process.env.SESSION_INGRESS_URL);
}