/** * MCP server factory + session-context binder. * * Two entry points: * * `bindSessionContext` — the wrapper closure. Takes a `ComputerUseSessionContext` * (getters + callbacks backed by host session state), returns a dispatcher. * Reusable by both the MCP CallTool handler here AND Cowork's * `InternalServerDefinition.handleToolCall` (which doesn't go through MCP). * This replaces the duplicated wrapper closures in apps/desktop/…/serverDef.ts * and the Claude Code CLI's CU host wrapper — both did the same thing: build `ComputerUseOverrides` * fresh from getters, call `handleToolCall`, stash screenshot, merge permissions. * * `createComputerUseMcpServer` — the Server object. When `context` is provided, * the CallTool handler is real (uses `bindSessionContext`). When not, it's the * legacy stub that returns a not-wired error. The tool-schema ListTools handler * is the same either way. */ import { Server } from "@modelcontextprotocol/sdk/server/index.js"; import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js"; import { CallToolRequestSchema, ListToolsRequestSchema, } from "@modelcontextprotocol/sdk/types.js"; import type { ScreenshotResult } from "./executor.js"; import type { CuCallToolResult } from "./toolCalls.js"; import { defersLockAcquire, handleToolCall, resetMouseButtonHeld, } from "./toolCalls.js"; import { buildComputerUseTools } from "./tools.js"; import type { AppGrant, ComputerUseHostAdapter, ComputerUseOverrides, ComputerUseSessionContext, CoordinateMode, CuGrantFlags, CuPermissionResponse, } from "./types.js"; import { DEFAULT_GRANT_FLAGS } from "./types.js"; const DEFAULT_LOCK_HELD_MESSAGE = "Another Claude session is currently using the computer. Wait for that " + "session to finish, or find a non-computer-use approach."; /** * Dedupe `granted` into `existing` on bundleId, spread truthy-only flags over * defaults+existing. Truthy-only: a subsequent `request_access` that doesn't * request clipboard can't revoke an earlier clipboard grant — revocation lives * in a Settings page, not here. * * Same merge both hosts implemented independently today. */ function mergePermissionResponse( existing: readonly AppGrant[], existingFlags: CuGrantFlags, response: CuPermissionResponse, ): { apps: AppGrant[]; flags: CuGrantFlags } { const seen = new Set(existing.map((a) => a.bundleId)); const apps = [ ...existing, ...response.granted.filter((g) => !seen.has(g.bundleId)), ]; const truthyFlags = Object.fromEntries( Object.entries(response.flags).filter(([, v]) => v === true), ); const flags: CuGrantFlags = { ...DEFAULT_GRANT_FLAGS, ...existingFlags, ...truthyFlags, }; return { apps, flags }; } /** * Bind session state to a reusable dispatcher. The returned function is the * wrapper closure: async lock gate → build overrides fresh → `handleToolCall` * → stash screenshot → strip piggybacked fields. * * The last-screenshot blob is held in a closure cell here (not on `ctx`), so * hosts don't need to guarantee `ctx` object identity across calls — they just * need to hold onto the returned dispatcher. Cowork caches per * `InternalServerContext` in a WeakMap; the CLI host constructs once at server creation. */ export function bindSessionContext( adapter: ComputerUseHostAdapter, coordinateMode: CoordinateMode, ctx: ComputerUseSessionContext, ): (name: string, args: unknown) => Promise { const { logger, serverName } = adapter; // Screenshot blob persists here across calls — NOT on `ctx`. Hosts hold // onto the returned dispatcher; that's the identity that matters. let lastScreenshot: ScreenshotResult | undefined; const wrapPermission = ctx.onPermissionRequest ? async ( req: Parameters>[0], signal: AbortSignal, ): Promise => { const response = await ctx.onPermissionRequest!(req, signal); const { apps, flags } = mergePermissionResponse( ctx.getAllowedApps(), ctx.getGrantFlags(), response, ); logger.debug( `[${serverName}] permission result: granted=${response.granted.length} denied=${response.denied.length}`, ); ctx.onAllowedAppsChanged?.(apps, flags); return response; } : undefined; const wrapTeachPermission = ctx.onTeachPermissionRequest ? async ( req: Parameters>[0], signal: AbortSignal, ): Promise => { const response = await ctx.onTeachPermissionRequest!(req, signal); logger.debug( `[${serverName}] teach permission result: granted=${response.granted.length} denied=${response.denied.length}`, ); // Teach doesn't request grant flags — preserve existing. const { apps } = mergePermissionResponse( ctx.getAllowedApps(), ctx.getGrantFlags(), response, ); ctx.onAllowedAppsChanged?.(apps, { ...DEFAULT_GRANT_FLAGS, ...ctx.getGrantFlags(), }); return response; } : undefined; return async (name, args) => { // ─── Async lock gate ───────────────────────────────────────────────── // Replaces the sync Gate-3 in `handleToolCall` — we pass // `checkCuLock: undefined` below so it no-ops. Hosts with // cross-process locks (O_EXCL file) await the real primitive here // instead of pre-computing + feeding a fake sync result. if (ctx.checkCuLock) { const lock = await ctx.checkCuLock(); if (lock.holder !== undefined && !lock.isSelf) { const text = ctx.formatLockHeldMessage?.(lock.holder) ?? DEFAULT_LOCK_HELD_MESSAGE; return { content: [{ type: "text", text }], isError: true, telemetry: { error_kind: "cu_lock_held" }, }; } if (lock.holder === undefined && !defersLockAcquire(name)) { await ctx.acquireCuLock?.(); // Re-check: the awaits above yield the microtask queue, so another // session's check+acquire can interleave with ours. Hosts where // acquire is a no-op when already held (Cowork's CuLockManager) give // no signal that we lost — verify we're now the holder before // proceeding. The CLI's O_EXCL file lock would surface this as a throw from // acquire instead; this re-check is a belt-and-suspenders for that // path too. const recheck = await ctx.checkCuLock(); if (recheck.holder !== undefined && !recheck.isSelf) { const text = ctx.formatLockHeldMessage?.(recheck.holder) ?? DEFAULT_LOCK_HELD_MESSAGE; return { content: [{ type: "text", text }], isError: true, telemetry: { error_kind: "cu_lock_held" }, }; } // Fresh holder → any prior session's mouseButtonHeld is stale. // Mirrors what Gate-3 does on the acquire branch. After the // re-check so we only clear module state when we actually won. resetMouseButtonHeld(); } } // ─── Build overrides fresh ─────────────────────────────────────────── // Blob-first; dims-fallback with base64:"" when the closure cell is // unset (cross-respawn). scaleCoord reads dims; pixelCompare sees "" → // isEmpty → skip. const dimsFallback = lastScreenshot ? undefined : ctx.getLastScreenshotDims?.(); // Per-call AbortController for dialog dismissal. Aborted in `finally` — // if handleToolCall finishes (MCP timeout, throw) before the user // answers, the host's dialog handler sees the abort and tears down. const dialogAbort = new AbortController(); const overrides: ComputerUseOverrides = { allowedApps: [...ctx.getAllowedApps()], grantFlags: ctx.getGrantFlags(), userDeniedBundleIds: ctx.getUserDeniedBundleIds(), coordinateMode, selectedDisplayId: ctx.getSelectedDisplayId(), displayPinnedByModel: ctx.getDisplayPinnedByModel?.(), displayResolvedForApps: ctx.getDisplayResolvedForApps?.(), lastScreenshot: lastScreenshot ?? (dimsFallback ? { ...dimsFallback, base64: "" } : undefined), onPermissionRequest: wrapPermission ? (req) => wrapPermission(req, dialogAbort.signal) : undefined, onTeachPermissionRequest: wrapTeachPermission ? (req) => wrapTeachPermission(req, dialogAbort.signal) : undefined, onAppsHidden: ctx.onAppsHidden, getClipboardStash: ctx.getClipboardStash, onClipboardStashChanged: ctx.onClipboardStashChanged, onResolvedDisplayUpdated: ctx.onResolvedDisplayUpdated, onDisplayPinned: ctx.onDisplayPinned, onDisplayResolvedForApps: ctx.onDisplayResolvedForApps, onTeachModeActivated: ctx.onTeachModeActivated, onTeachStep: ctx.onTeachStep, onTeachWorking: ctx.onTeachWorking, getTeachModeActive: ctx.getTeachModeActive, // Undefined → handleToolCall's sync Gate-3 no-ops. The async gate // above already ran. checkCuLock: undefined, acquireCuLock: undefined, isAborted: ctx.isAborted, }; logger.debug( `[${serverName}] tool=${name} allowedApps=${overrides.allowedApps.length} coordMode=${coordinateMode}`, ); // ─── Dispatch ──────────────────────────────────────────────────────── try { const result = await handleToolCall(adapter, name, args, overrides); if (result.screenshot) { lastScreenshot = result.screenshot; const { base64: _blob, ...dims } = result.screenshot; logger.debug(`[${serverName}] screenshot dims: ${JSON.stringify(dims)}`); ctx.onScreenshotCaptured?.(dims); } return result; } finally { dialogAbort.abort(); } }; } export function createComputerUseMcpServer( adapter: ComputerUseHostAdapter, coordinateMode: CoordinateMode, context?: ComputerUseSessionContext, ): Server { const { serverName, logger } = adapter; const server = new Server( { name: serverName, version: "0.1.3" }, { capabilities: { tools: {}, logging: {} } }, ); const tools = buildComputerUseTools( adapter.executor.capabilities, coordinateMode, ); server.setRequestHandler(ListToolsRequestSchema, async () => adapter.isDisabled() ? { tools: [] } : { tools }, ); if (context) { const dispatch = bindSessionContext(adapter, coordinateMode, context); server.setRequestHandler( CallToolRequestSchema, async (request): Promise => { const { screenshot: _s, telemetry: _t, ...result } = await dispatch( request.params.name, request.params.arguments ?? {}, ); return result; }, ); return server; } // Legacy: no context → stub handler. Reached only if something calls the // server over MCP transport WITHOUT going through a binder (a wiring // regression). Clear error instead of silent failure. server.setRequestHandler( CallToolRequestSchema, async (request): Promise => { logger.warn( `[${serverName}] tool call "${request.params.name}" reached the stub handler — no session context bound. Per-session state unavailable.`, ); return { content: [ { type: "text", text: "This computer-use server instance is not wired to a session. Per-session app permissions are not available on this code path.", }, ], isError: true, }; }, ); return server; }