diff --git a/packages/@ant/claude-for-chrome-mcp/package.json b/packages/@ant/claude-for-chrome-mcp/package.json index deefa08f8..11b14267d 100644 --- a/packages/@ant/claude-for-chrome-mcp/package.json +++ b/packages/@ant/claude-for-chrome-mcp/package.json @@ -1,8 +1,8 @@ { - "name": "@ant/claude-for-chrome-mcp", - "version": "1.0.0", - "private": true, - "type": "module", - "main": "./src/index.ts", - "types": "./src/index.ts" + "name": "@ant/claude-for-chrome-mcp", + "version": "1.0.0", + "private": true, + "type": "module", + "main": "./src/index.ts", + "types": "./src/index.ts" } diff --git a/packages/@ant/claude-for-chrome-mcp/src/browserTools.ts b/packages/@ant/claude-for-chrome-mcp/src/browserTools.ts index a1aba77ab..92323a3e8 100644 --- a/packages/@ant/claude-for-chrome-mcp/src/browserTools.ts +++ b/packages/@ant/claude-for-chrome-mcp/src/browserTools.ts @@ -1,546 +1,546 @@ export const BROWSER_TOOLS = [ { - name: "javascript_tool", + name: 'javascript_tool', description: "Execute JavaScript code in the context of the current page. The code runs in the page's context and can interact with the DOM, window object, and page variables. Returns the result of the last expression or any thrown errors. If you don't have a valid tab ID, use tabs_context_mcp first to get available tabs.", inputSchema: { - type: "object", + type: 'object', properties: { action: { - type: "string", + type: 'string', description: "Must be set to 'javascript_exec'", }, text: { - type: "string", + type: 'string', description: "The JavaScript code to execute. The code will be evaluated in the page context. The result of the last expression will be returned automatically. Do NOT use 'return' statements - just write the expression you want to evaluate (e.g., 'window.myData.value' not 'return window.myData.value'). You can access and modify the DOM, call page functions, and interact with page variables.", }, tabId: { - type: "number", + type: 'number', description: "Tab ID to execute the code in. Must be a tab in the current group. Use tabs_context_mcp first if you don't have a valid tab ID.", }, }, - required: ["action", "text", "tabId"], + required: ['action', 'text', 'tabId'], }, }, { - name: "read_page", + name: 'read_page', description: "Get an accessibility tree representation of elements on the page. By default returns all elements including non-visible ones. Output is limited to 50000 characters by default. If the output exceeds this limit, you will receive an error asking you to specify a smaller depth or focus on a specific element using ref_id. Optionally filter for only interactive elements. If you don't have a valid tab ID, use tabs_context_mcp first to get available tabs.", inputSchema: { - type: "object", + type: 'object', properties: { filter: { - type: "string", - enum: ["interactive", "all"], + type: 'string', + enum: ['interactive', 'all'], description: 'Filter elements: "interactive" for buttons/links/inputs only, "all" for all elements including non-visible ones (default: all elements)', }, tabId: { - type: "number", + type: 'number', description: "Tab ID to read from. Must be a tab in the current group. Use tabs_context_mcp first if you don't have a valid tab ID.", }, depth: { - type: "number", + type: 'number', description: - "Maximum depth of the tree to traverse (default: 15). Use a smaller depth if output is too large.", + 'Maximum depth of the tree to traverse (default: 15). Use a smaller depth if output is too large.', }, ref_id: { - type: "string", + type: 'string', description: - "Reference ID of a parent element to read. Will return the specified element and all its children. Use this to focus on a specific part of the page when output is too large.", + 'Reference ID of a parent element to read. Will return the specified element and all its children. Use this to focus on a specific part of the page when output is too large.', }, max_chars: { - type: "number", + type: 'number', description: - "Maximum characters for output (default: 50000). Set to a higher value if your client can handle large outputs.", + 'Maximum characters for output (default: 50000). Set to a higher value if your client can handle large outputs.', }, }, - required: ["tabId"], + required: ['tabId'], }, }, { - name: "find", + name: 'find', description: 'Find elements on the page using natural language. Can search for elements by their purpose (e.g., "search bar", "login button") or by text content (e.g., "organic mango product"). Returns up to 20 matching elements with references that can be used with other tools. If more than 20 matches exist, you\'ll be notified to use a more specific query. If you don\'t have a valid tab ID, use tabs_context_mcp first to get available tabs.', inputSchema: { - type: "object", + type: 'object', properties: { query: { - type: "string", + type: 'string', description: 'Natural language description of what to find (e.g., "search bar", "add to cart button", "product title containing organic")', }, tabId: { - type: "number", + type: 'number', description: "Tab ID to search in. Must be a tab in the current group. Use tabs_context_mcp first if you don't have a valid tab ID.", }, }, - required: ["query", "tabId"], + required: ['query', 'tabId'], }, }, { - name: "form_input", + name: 'form_input', description: "Set values in form elements using element reference ID from the read_page tool. If you don't have a valid tab ID, use tabs_context_mcp first to get available tabs.", inputSchema: { - type: "object", + type: 'object', properties: { ref: { - type: "string", + type: 'string', description: 'Element reference ID from the read_page tool (e.g., "ref_1", "ref_2")', }, value: { - type: ["string", "boolean", "number"], + type: ['string', 'boolean', 'number'], description: - "The value to set. For checkboxes use boolean, for selects use option value or text, for other inputs use appropriate string/number", + 'The value to set. For checkboxes use boolean, for selects use option value or text, for other inputs use appropriate string/number', }, tabId: { - type: "number", + type: 'number', description: "Tab ID to set form value in. Must be a tab in the current group. Use tabs_context_mcp first if you don't have a valid tab ID.", }, }, - required: ["ref", "value", "tabId"], + required: ['ref', 'value', 'tabId'], }, }, { - name: "computer", + name: 'computer', description: `Use a mouse and keyboard to interact with a web browser, and take screenshots. If you don't have a valid tab ID, use tabs_context_mcp first to get available tabs.\n* Whenever you intend to click on an element like an icon, you should consult a screenshot to determine the coordinates of the element before moving the cursor.\n* If you tried clicking on a program or link but it failed to load, even after waiting, try adjusting your click location so that the tip of the cursor visually falls on the element that you want to click.\n* Make sure to click any buttons, links, icons, etc with the cursor tip in the center of the element. Don't click boxes on their edges unless asked.`, inputSchema: { - type: "object", + type: 'object', properties: { action: { - type: "string", + type: 'string', enum: [ - "left_click", - "right_click", - "type", - "screenshot", - "wait", - "scroll", - "key", - "left_click_drag", - "double_click", - "triple_click", - "zoom", - "scroll_to", - "hover", + 'left_click', + 'right_click', + 'type', + 'screenshot', + 'wait', + 'scroll', + 'key', + 'left_click_drag', + 'double_click', + 'triple_click', + 'zoom', + 'scroll_to', + 'hover', ], description: - "The action to perform:\n* `left_click`: Click the left mouse button at the specified coordinates.\n* `right_click`: Click the right mouse button at the specified coordinates to open context menus.\n* `double_click`: Double-click the left mouse button at the specified coordinates.\n* `triple_click`: Triple-click the left mouse button at the specified coordinates.\n* `type`: Type a string of text.\n* `screenshot`: Take a screenshot of the screen.\n* `wait`: Wait for a specified number of seconds.\n* `scroll`: Scroll up, down, left, or right at the specified coordinates.\n* `key`: Press a specific keyboard key.\n* `left_click_drag`: Drag from start_coordinate to coordinate.\n* `zoom`: Take a screenshot of a specific region for closer inspection.\n* `scroll_to`: Scroll an element into view using its element reference ID from read_page or find tools.\n* `hover`: Move the mouse cursor to the specified coordinates or element without clicking. Useful for revealing tooltips, dropdown menus, or triggering hover states.", + 'The action to perform:\n* `left_click`: Click the left mouse button at the specified coordinates.\n* `right_click`: Click the right mouse button at the specified coordinates to open context menus.\n* `double_click`: Double-click the left mouse button at the specified coordinates.\n* `triple_click`: Triple-click the left mouse button at the specified coordinates.\n* `type`: Type a string of text.\n* `screenshot`: Take a screenshot of the screen.\n* `wait`: Wait for a specified number of seconds.\n* `scroll`: Scroll up, down, left, or right at the specified coordinates.\n* `key`: Press a specific keyboard key.\n* `left_click_drag`: Drag from start_coordinate to coordinate.\n* `zoom`: Take a screenshot of a specific region for closer inspection.\n* `scroll_to`: Scroll an element into view using its element reference ID from read_page or find tools.\n* `hover`: Move the mouse cursor to the specified coordinates or element without clicking. Useful for revealing tooltips, dropdown menus, or triggering hover states.', }, coordinate: { - type: "array", - items: { type: "number" }, + type: 'array', + items: { type: 'number' }, minItems: 2, maxItems: 2, description: - "(x, y): The x (pixels from the left edge) and y (pixels from the top edge) coordinates. Required for `left_click`, `right_click`, `double_click`, `triple_click`, and `scroll`. For `left_click_drag`, this is the end position.", + '(x, y): The x (pixels from the left edge) and y (pixels from the top edge) coordinates. Required for `left_click`, `right_click`, `double_click`, `triple_click`, and `scroll`. For `left_click_drag`, this is the end position.', }, text: { - type: "string", + type: 'string', description: 'The text to type (for `type` action) or the key(s) to press (for `key` action). For `key` action: Provide space-separated keys (e.g., "Backspace Backspace Delete"). Supports keyboard shortcuts using the platform\'s modifier key (use "cmd" on Mac, "ctrl" on Windows/Linux, e.g., "cmd+a" or "ctrl+a" for select all).', }, duration: { - type: "number", + type: 'number', minimum: 0, maximum: 30, description: - "The number of seconds to wait. Required for `wait`. Maximum 30 seconds.", + 'The number of seconds to wait. Required for `wait`. Maximum 30 seconds.', }, scroll_direction: { - type: "string", - enum: ["up", "down", "left", "right"], - description: "The direction to scroll. Required for `scroll`.", + type: 'string', + enum: ['up', 'down', 'left', 'right'], + description: 'The direction to scroll. Required for `scroll`.', }, scroll_amount: { - type: "number", + type: 'number', minimum: 1, maximum: 10, description: - "The number of scroll wheel ticks. Optional for `scroll`, defaults to 3.", + 'The number of scroll wheel ticks. Optional for `scroll`, defaults to 3.', }, start_coordinate: { - type: "array", - items: { type: "number" }, + type: 'array', + items: { type: 'number' }, minItems: 2, maxItems: 2, description: - "(x, y): The starting coordinates for `left_click_drag`.", + '(x, y): The starting coordinates for `left_click_drag`.', }, region: { - type: "array", - items: { type: "number" }, + type: 'array', + items: { type: 'number' }, minItems: 4, maxItems: 4, description: - "(x0, y0, x1, y1): The rectangular region to capture for `zoom`. Coordinates define a rectangle from top-left (x0, y0) to bottom-right (x1, y1) in pixels from the viewport origin. Required for `zoom` action. Useful for inspecting small UI elements like icons, buttons, or text.", + '(x0, y0, x1, y1): The rectangular region to capture for `zoom`. Coordinates define a rectangle from top-left (x0, y0) to bottom-right (x1, y1) in pixels from the viewport origin. Required for `zoom` action. Useful for inspecting small UI elements like icons, buttons, or text.', }, repeat: { - type: "number", + type: 'number', minimum: 1, maximum: 100, description: - "Number of times to repeat the key sequence. Only applicable for `key` action. Must be a positive integer between 1 and 100. Default is 1. Useful for navigation tasks like pressing arrow keys multiple times.", + 'Number of times to repeat the key sequence. Only applicable for `key` action. Must be a positive integer between 1 and 100. Default is 1. Useful for navigation tasks like pressing arrow keys multiple times.', }, ref: { - type: "string", + type: 'string', description: 'Element reference ID from read_page or find tools (e.g., "ref_1", "ref_2"). Required for `scroll_to` action. Can be used as alternative to `coordinate` for click actions.', }, modifiers: { - type: "string", + type: 'string', description: 'Modifier keys for click actions. Supports: "ctrl", "shift", "alt", "cmd" (or "meta"), "win" (or "windows"). Can be combined with "+" (e.g., "ctrl+shift", "cmd+alt"). Optional.', }, tabId: { - type: "number", + type: 'number', description: "Tab ID to execute the action on. Must be a tab in the current group. Use tabs_context_mcp first if you don't have a valid tab ID.", }, }, - required: ["action", "tabId"], + required: ['action', 'tabId'], }, }, { - name: "navigate", + name: 'navigate', description: "Navigate to a URL, or go forward/back in browser history. If you don't have a valid tab ID, use tabs_context_mcp first to get available tabs.", inputSchema: { - type: "object", + type: 'object', properties: { url: { - type: "string", + type: 'string', description: 'The URL to navigate to. Can be provided with or without protocol (defaults to https://). Use "forward" to go forward in history or "back" to go back in history.', }, tabId: { - type: "number", + type: 'number', description: "Tab ID to navigate. Must be a tab in the current group. Use tabs_context_mcp first if you don't have a valid tab ID.", }, }, - required: ["url", "tabId"], + required: ['url', 'tabId'], }, }, { - name: "resize_window", + name: 'resize_window', description: "Resize the current browser window to specified dimensions. Useful for testing responsive designs or setting up specific screen sizes. If you don't have a valid tab ID, use tabs_context_mcp first to get available tabs.", inputSchema: { - type: "object", + type: 'object', properties: { width: { - type: "number", - description: "Target window width in pixels", + type: 'number', + description: 'Target window width in pixels', }, height: { - type: "number", - description: "Target window height in pixels", + type: 'number', + description: 'Target window height in pixels', }, tabId: { - type: "number", + type: 'number', description: "Tab ID to get the window for. Must be a tab in the current group. Use tabs_context_mcp first if you don't have a valid tab ID.", }, }, - required: ["width", "height", "tabId"], + required: ['width', 'height', 'tabId'], }, }, { - name: "gif_creator", + name: 'gif_creator', description: "Manage GIF recording and export for browser automation sessions. Control when to start/stop recording browser actions (clicks, scrolls, navigation), then export as an animated GIF with visual overlays (click indicators, action labels, progress bar, watermark). All operations are scoped to the tab's group. When starting recording, take a screenshot immediately after to capture the initial state as the first frame. When stopping recording, take a screenshot immediately before to capture the final state as the last frame. For export, either provide 'coordinate' to drag/drop upload to a page element, or set 'download: true' to download the GIF.", inputSchema: { - type: "object", + type: 'object', properties: { action: { - type: "string", - enum: ["start_recording", "stop_recording", "export", "clear"], + type: 'string', + enum: ['start_recording', 'stop_recording', 'export', 'clear'], description: "Action to perform: 'start_recording' (begin capturing), 'stop_recording' (stop capturing but keep frames), 'export' (generate and export GIF), 'clear' (discard frames)", }, tabId: { - type: "number", + type: 'number', description: - "Tab ID to identify which tab group this operation applies to", + 'Tab ID to identify which tab group this operation applies to', }, download: { - type: "boolean", + type: 'boolean', description: "Always set this to true for the 'export' action only. This causes the gif to be downloaded in the browser.", }, filename: { - type: "string", + type: 'string', description: "Optional filename for exported GIF (default: 'recording-[timestamp].gif'). For 'export' action only.", }, options: { - type: "object", + type: 'object', description: "Optional GIF enhancement options for 'export' action. Properties: showClickIndicators (bool), showDragPaths (bool), showActionLabels (bool), showProgressBar (bool), showWatermark (bool), quality (number 1-30). All default to true except quality (default: 10).", properties: { showClickIndicators: { - type: "boolean", + type: 'boolean', description: - "Show orange circles at click locations (default: true)", + 'Show orange circles at click locations (default: true)', }, showDragPaths: { - type: "boolean", - description: "Show red arrows for drag actions (default: true)", + type: 'boolean', + description: 'Show red arrows for drag actions (default: true)', }, showActionLabels: { - type: "boolean", + type: 'boolean', description: - "Show black labels describing actions (default: true)", + 'Show black labels describing actions (default: true)', }, showProgressBar: { - type: "boolean", - description: "Show orange progress bar at bottom (default: true)", + type: 'boolean', + description: 'Show orange progress bar at bottom (default: true)', }, showWatermark: { - type: "boolean", - description: "Show Claude logo watermark (default: true)", + type: 'boolean', + description: 'Show Claude logo watermark (default: true)', }, quality: { - type: "number", + type: 'number', description: - "GIF compression quality, 1-30 (lower = better quality, slower encoding). Default: 10", + 'GIF compression quality, 1-30 (lower = better quality, slower encoding). Default: 10', }, }, }, }, - required: ["action", "tabId"], + required: ['action', 'tabId'], }, }, { - name: "upload_image", + name: 'upload_image', description: - "Upload a previously captured screenshot or user-uploaded image to a file input or drag & drop target. Supports two approaches: (1) ref - for targeting specific elements, especially hidden file inputs, (2) coordinate - for drag & drop to visible locations like Google Docs. Provide either ref or coordinate, not both.", + 'Upload a previously captured screenshot or user-uploaded image to a file input or drag & drop target. Supports two approaches: (1) ref - for targeting specific elements, especially hidden file inputs, (2) coordinate - for drag & drop to visible locations like Google Docs. Provide either ref or coordinate, not both.', inputSchema: { - type: "object", + type: 'object', properties: { imageId: { - type: "string", + type: 'string', description: "ID of a previously captured screenshot (from the computer tool's screenshot action) or a user-uploaded image", }, ref: { - type: "string", + type: 'string', description: 'Element reference ID from read_page or find tools (e.g., "ref_1", "ref_2"). Use this for file inputs (especially hidden ones) or specific elements. Provide either ref or coordinate, not both.', }, coordinate: { - type: "array", + type: 'array', items: { - type: "number", + type: 'number', }, description: - "Viewport coordinates [x, y] for drag & drop to a visible location. Use this for drag & drop targets like Google Docs. Provide either ref or coordinate, not both.", + 'Viewport coordinates [x, y] for drag & drop to a visible location. Use this for drag & drop targets like Google Docs. Provide either ref or coordinate, not both.', }, tabId: { - type: "number", + type: 'number', description: - "Tab ID where the target element is located. This is where the image will be uploaded to.", + 'Tab ID where the target element is located. This is where the image will be uploaded to.', }, filename: { - type: "string", + type: 'string', description: 'Optional filename for the uploaded file (default: "image.png")', }, }, - required: ["imageId", "tabId"], + required: ['imageId', 'tabId'], }, }, { - name: "get_page_text", + name: 'get_page_text', description: "Extract raw text content from the page, prioritizing article content. Ideal for reading articles, blog posts, or other text-heavy pages. Returns plain text without HTML formatting. If you don't have a valid tab ID, use tabs_context_mcp first to get available tabs.", inputSchema: { - type: "object", + type: 'object', properties: { tabId: { - type: "number", + type: 'number', description: "Tab ID to extract text from. Must be a tab in the current group. Use tabs_context_mcp first if you don't have a valid tab ID.", }, }, - required: ["tabId"], + required: ['tabId'], }, }, { - name: "tabs_context_mcp", - title: "Tabs Context", + name: 'tabs_context_mcp', + title: 'Tabs Context', description: - "Get context information about the current MCP tab group. Returns all tab IDs inside the group if it exists. CRITICAL: You must get the context at least once before using other browser automation tools so you know what tabs exist. Each new conversation should create its own new tab (using tabs_create_mcp) rather than reusing existing tabs, unless the user explicitly asks to use an existing tab.", + 'Get context information about the current MCP tab group. Returns all tab IDs inside the group if it exists. CRITICAL: You must get the context at least once before using other browser automation tools so you know what tabs exist. Each new conversation should create its own new tab (using tabs_create_mcp) rather than reusing existing tabs, unless the user explicitly asks to use an existing tab.', inputSchema: { - type: "object", + type: 'object', properties: { createIfEmpty: { - type: "boolean", + type: 'boolean', description: - "Creates a new MCP tab group if none exists, creates a new Window with a new tab group containing an empty tab (which can be used for this conversation). If a MCP tab group already exists, this parameter has no effect.", + 'Creates a new MCP tab group if none exists, creates a new Window with a new tab group containing an empty tab (which can be used for this conversation). If a MCP tab group already exists, this parameter has no effect.', }, }, required: [], }, }, { - name: "tabs_create_mcp", - title: "Tabs Create", + name: 'tabs_create_mcp', + title: 'Tabs Create', description: - "Creates a new empty tab in the MCP tab group. CRITICAL: You must get the context using tabs_context_mcp at least once before using other browser automation tools so you know what tabs exist.", + 'Creates a new empty tab in the MCP tab group. CRITICAL: You must get the context using tabs_context_mcp at least once before using other browser automation tools so you know what tabs exist.', inputSchema: { - type: "object", + type: 'object', properties: {}, required: [], }, }, { - name: "update_plan", + name: 'update_plan', description: - "Present a plan to the user for approval before taking actions. The user will see the domains you intend to visit and your approach. Once approved, you can proceed with actions on the approved domains without additional permission prompts.", + 'Present a plan to the user for approval before taking actions. The user will see the domains you intend to visit and your approach. Once approved, you can proceed with actions on the approved domains without additional permission prompts.', inputSchema: { - type: "object" as const, + type: 'object' as const, properties: { domains: { - type: "array" as const, - items: { type: "string" as const }, + type: 'array' as const, + items: { type: 'string' as const }, description: "List of domains you will visit (e.g., ['github.com', 'stackoverflow.com']). These domains will be approved for the session when the user accepts the plan.", }, approach: { - type: "array" as const, - items: { type: "string" as const }, + type: 'array' as const, + items: { type: 'string' as const }, description: - "High-level description of what you will do. Focus on outcomes and key actions, not implementation details. Be concise - aim for 3-7 items.", + 'High-level description of what you will do. Focus on outcomes and key actions, not implementation details. Be concise - aim for 3-7 items.', }, }, - required: ["domains", "approach"], + required: ['domains', 'approach'], }, }, { - name: "read_console_messages", + name: 'read_console_messages', description: "Read browser console messages (console.log, console.error, console.warn, etc.) from a specific tab. Useful for debugging JavaScript errors, viewing application logs, or understanding what's happening in the browser console. Returns console messages from the current domain only. If you don't have a valid tab ID, use tabs_context_mcp first to get available tabs. IMPORTANT: Always provide a pattern to filter messages - without a pattern, you may get too many irrelevant messages.", inputSchema: { - type: "object", + type: 'object', properties: { tabId: { - type: "number", + type: 'number', description: "Tab ID to read console messages from. Must be a tab in the current group. Use tabs_context_mcp first if you don't have a valid tab ID.", }, onlyErrors: { - type: "boolean", + type: 'boolean', description: - "If true, only return error and exception messages. Default is false (return all message types).", + 'If true, only return error and exception messages. Default is false (return all message types).', }, clear: { - type: "boolean", + type: 'boolean', description: - "If true, clear the console messages after reading to avoid duplicates on subsequent calls. Default is false.", + 'If true, clear the console messages after reading to avoid duplicates on subsequent calls. Default is false.', }, pattern: { - type: "string", + type: 'string', description: "Regex pattern to filter console messages. Only messages matching this pattern will be returned (e.g., 'error|warning' to find errors and warnings, 'MyApp' to filter app-specific logs). You should always provide a pattern to avoid getting too many irrelevant messages.", }, limit: { - type: "number", + type: 'number', description: - "Maximum number of messages to return. Defaults to 100. Increase only if you need more results.", + 'Maximum number of messages to return. Defaults to 100. Increase only if you need more results.', }, }, - required: ["tabId"], + required: ['tabId'], }, }, { - name: "read_network_requests", + name: 'read_network_requests', description: "Read HTTP network requests (XHR, Fetch, documents, images, etc.) from a specific tab. Useful for debugging API calls, monitoring network activity, or understanding what requests a page is making. Returns all network requests made by the current page, including cross-origin requests. Requests are automatically cleared when the page navigates to a different domain. If you don't have a valid tab ID, use tabs_context_mcp first to get available tabs.", inputSchema: { - type: "object", + type: 'object', properties: { tabId: { - type: "number", + type: 'number', description: "Tab ID to read network requests from. Must be a tab in the current group. Use tabs_context_mcp first if you don't have a valid tab ID.", }, urlPattern: { - type: "string", + type: 'string', description: "Optional URL pattern to filter requests. Only requests whose URL contains this string will be returned (e.g., '/api/' to filter API calls, 'example.com' to filter by domain).", }, clear: { - type: "boolean", + type: 'boolean', description: - "If true, clear the network requests after reading to avoid duplicates on subsequent calls. Default is false.", + 'If true, clear the network requests after reading to avoid duplicates on subsequent calls. Default is false.', }, limit: { - type: "number", + type: 'number', description: - "Maximum number of requests to return. Defaults to 100. Increase only if you need more results.", + 'Maximum number of requests to return. Defaults to 100. Increase only if you need more results.', }, }, - required: ["tabId"], + required: ['tabId'], }, }, { - name: "shortcuts_list", + name: 'shortcuts_list', description: - "List all available shortcuts and workflows (shortcuts and workflows are interchangeable). Returns shortcuts with their commands, descriptions, and whether they are workflows. Use shortcuts_execute to run a shortcut or workflow.", + 'List all available shortcuts and workflows (shortcuts and workflows are interchangeable). Returns shortcuts with their commands, descriptions, and whether they are workflows. Use shortcuts_execute to run a shortcut or workflow.', inputSchema: { - type: "object", + type: 'object', properties: { tabId: { - type: "number", + type: 'number', description: "Tab ID to list shortcuts from. Must be a tab in the current group. Use tabs_context_mcp first if you don't have a valid tab ID.", }, }, - required: ["tabId"], + required: ['tabId'], }, }, { - name: "shortcuts_execute", + name: 'shortcuts_execute', description: - "Execute a shortcut or workflow by running it in a new sidepanel window using the current tab (shortcuts and workflows are interchangeable). Use shortcuts_list first to see available shortcuts. This starts the execution and returns immediately - it does not wait for completion.", + 'Execute a shortcut or workflow by running it in a new sidepanel window using the current tab (shortcuts and workflows are interchangeable). Use shortcuts_list first to see available shortcuts. This starts the execution and returns immediately - it does not wait for completion.', inputSchema: { - type: "object", + type: 'object', properties: { tabId: { - type: "number", + type: 'number', description: "Tab ID to execute the shortcut on. Must be a tab in the current group. Use tabs_context_mcp first if you don't have a valid tab ID.", }, shortcutId: { - type: "string", - description: "The ID of the shortcut to execute", + type: 'string', + description: 'The ID of the shortcut to execute', }, command: { - type: "string", + type: 'string', description: "The command name of the shortcut to execute (e.g., 'debug', 'summarize'). Do not include the leading slash.", }, }, - required: ["tabId"], + required: ['tabId'], }, }, { - name: "switch_browser", + name: 'switch_browser', description: "Switch which Chrome browser is used for browser automation. Call this when the user wants to connect to a different Chrome browser. Broadcasts a connection request to all Chrome browsers with the extension installed — the user clicks 'Connect' in the desired browser.", inputSchema: { - type: "object", + type: 'object', properties: {}, required: [], }, }, -]; +] diff --git a/packages/@ant/claude-for-chrome-mcp/src/index.ts b/packages/@ant/claude-for-chrome-mcp/src/index.ts index 68b694069..c6d82f73f 100644 --- a/packages/@ant/claude-for-chrome-mcp/src/index.ts +++ b/packages/@ant/claude-for-chrome-mcp/src/index.ts @@ -1,10 +1,10 @@ -export { BridgeClient, createBridgeClient } from "./bridgeClient.js"; -export { BROWSER_TOOLS } from "./browserTools.js"; +export { BridgeClient, createBridgeClient } from './bridgeClient.js' +export { BROWSER_TOOLS } from './browserTools.js' export { createChromeSocketClient, createClaudeForChromeMcpServer, -} from "./mcpServer.js"; -export { localPlatformLabel } from "./types.js"; +} from './mcpServer.js' +export { localPlatformLabel } from './types.js' export type { BridgeConfig, ChromeExtensionInfo, @@ -12,4 +12,4 @@ export type { Logger, PermissionMode, SocketClient, -} from "./types.js"; +} from './types.js' diff --git a/packages/@ant/claude-for-chrome-mcp/src/mcpServer.ts b/packages/@ant/claude-for-chrome-mcp/src/mcpServer.ts index 40b1812a5..ed2a366fc 100644 --- a/packages/@ant/claude-for-chrome-mcp/src/mcpServer.ts +++ b/packages/@ant/claude-for-chrome-mcp/src/mcpServer.ts @@ -1,16 +1,16 @@ -import { Server } from "@modelcontextprotocol/sdk/server/index.js"; -import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js"; +import { Server } from '@modelcontextprotocol/sdk/server/index.js' +import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js' import { CallToolRequestSchema, ListToolsRequestSchema, -} from "@modelcontextprotocol/sdk/types.js"; +} from '@modelcontextprotocol/sdk/types.js' -import { createBridgeClient } from "./bridgeClient.js"; -import { BROWSER_TOOLS } from "./browserTools.js"; -import { createMcpSocketClient } from "./mcpSocketClient.js"; -import { createMcpSocketPool } from "./mcpSocketPool.js"; -import { handleToolCall } from "./toolCalls.js"; -import type { ClaudeForChromeContext, SocketClient } from "./types.js"; +import { createBridgeClient } from './bridgeClient.js' +import { BROWSER_TOOLS } from './browserTools.js' +import { createMcpSocketClient } from './mcpSocketClient.js' +import { createMcpSocketPool } from './mcpSocketPool.js' +import { handleToolCall } from './toolCalls.js' +import type { ClaudeForChromeContext, SocketClient } from './types.js' /** * Create the socket/bridge client for the Chrome extension MCP server. @@ -24,23 +24,22 @@ export function createChromeSocketClient( ? createBridgeClient(context) : context.getSocketPaths ? createMcpSocketPool(context) - : createMcpSocketClient(context); + : createMcpSocketClient(context) } export function createClaudeForChromeMcpServer( context: ClaudeForChromeContext, existingSocketClient?: SocketClient, ): Server { - const { serverName, logger } = context; + const { serverName, logger } = context // Choose transport: bridge (WebSocket) > socket pool (multi-profile) > single socket. - const socketClient = - existingSocketClient ?? createChromeSocketClient(context); + const socketClient = existingSocketClient ?? createChromeSocketClient(context) const server = new Server( { name: serverName, - version: "1.0.0", + version: '1.0.0', }, { capabilities: { @@ -48,49 +47,49 @@ export function createClaudeForChromeMcpServer( logging: {}, }, }, - ); + ) server.setRequestHandler(ListToolsRequestSchema, async () => { if (context.isDisabled?.()) { - return { tools: [] }; + return { tools: [] } } return { tools: context.bridgeConfig ? BROWSER_TOOLS - : BROWSER_TOOLS.filter((t) => t.name !== "switch_browser"), - }; - }); + : BROWSER_TOOLS.filter(t => t.name !== 'switch_browser'), + } + }) server.setRequestHandler( CallToolRequestSchema, async (request): Promise => { - logger.info(`[${serverName}] Executing tool: ${request.params.name}`); + logger.info(`[${serverName}] Executing tool: ${request.params.name}`) return handleToolCall( context, socketClient, request.params.name, request.params.arguments || {}, - ); + ) }, - ); + ) - socketClient.setNotificationHandler((notification) => { + socketClient.setNotificationHandler(notification => { logger.info( `[${serverName}] Forwarding MCP notification: ${notification.method}`, - ); + ) server .notification({ method: notification.method, params: notification.params, }) - .catch((error) => { + .catch(error => { // Server may not be connected yet (e.g., during startup or after disconnect) logger.info( `[${serverName}] Failed to forward MCP notification: ${error.message}`, - ); - }); - }); + ) + }) + }) - return server; + return server } diff --git a/packages/@ant/claude-for-chrome-mcp/src/mcpSocketPool.ts b/packages/@ant/claude-for-chrome-mcp/src/mcpSocketPool.ts index 9329fb6fa..3456b42f6 100644 --- a/packages/@ant/claude-for-chrome-mcp/src/mcpSocketPool.ts +++ b/packages/@ant/claude-for-chrome-mcp/src/mcpSocketPool.ts @@ -1,13 +1,13 @@ import { createMcpSocketClient, SocketConnectionError, -} from "./mcpSocketClient.js"; -import type { McpSocketClient } from "./mcpSocketClient.js"; +} from './mcpSocketClient.js' +import type { McpSocketClient } from './mcpSocketClient.js' import type { ClaudeForChromeContext, PermissionMode, PermissionOverrides, -} from "./types.js"; +} from './types.js' /** * Manages connections to multiple Chrome native host sockets (one per Chrome profile). @@ -18,26 +18,29 @@ import type { * built from tabs_context_mcp responses. */ export class McpSocketPool { - private clients: Map = new Map(); - private tabRoutes: Map = new Map(); - private context: ClaudeForChromeContext; + private clients: Map = new Map() + private tabRoutes: Map = new Map() + private context: ClaudeForChromeContext private notificationHandler: - | ((notification: { method: string; params?: Record }) => void) - | null = null; + | ((notification: { + method: string + params?: Record + }) => void) + | null = null constructor(context: ClaudeForChromeContext) { - this.context = context; + this.context = context } public setNotificationHandler( handler: (notification: { - method: string; - params?: Record; + method: string + params?: Record }) => void, ): void { - this.notificationHandler = handler; + this.notificationHandler = handler for (const client of this.clients.values()) { - client.setNotificationHandler(handler); + client.setNotificationHandler(handler) } } @@ -45,32 +48,30 @@ export class McpSocketPool { * Discover available sockets and ensure at least one is connected. */ public async ensureConnected(): Promise { - const { logger, serverName } = this.context; + const { logger, serverName } = this.context - this.refreshClients(); + this.refreshClients() // Try to connect any disconnected clients - const connectPromises: Promise[] = []; + const connectPromises: Promise[] = [] for (const client of this.clients.values()) { if (!client.isConnected()) { - connectPromises.push( - client.ensureConnected().catch(() => false), - ); + connectPromises.push(client.ensureConnected().catch(() => false)) } } if (connectPromises.length > 0) { - await Promise.all(connectPromises); + await Promise.all(connectPromises) } - const connectedCount = this.getConnectedClients().length; + const connectedCount = this.getConnectedClients().length if (connectedCount === 0) { - logger.info(`[${serverName}] No connected sockets in pool`); - return false; + logger.info(`[${serverName}] No connected sockets in pool`) + return false } - logger.info(`[${serverName}] Socket pool: ${connectedCount} connected`); - return true; + logger.info(`[${serverName}] Socket pool: ${connectedCount} connected`) + return true } /** @@ -82,57 +83,57 @@ export class McpSocketPool { args: Record, _permissionOverrides?: PermissionOverrides, ): Promise { - if (name === "tabs_context_mcp") { - return this.callTabsContext(args); + if (name === 'tabs_context_mcp') { + return this.callTabsContext(args) } // Route by tabId if present - const tabId = args.tabId as number | undefined; + const tabId = args.tabId as number | undefined if (tabId !== undefined) { - const socketPath = this.tabRoutes.get(tabId); + const socketPath = this.tabRoutes.get(tabId) if (socketPath) { - const client = this.clients.get(socketPath); + const client = this.clients.get(socketPath) if (client?.isConnected()) { - return client.callTool(name, args); + return client.callTool(name, args) } } // Tab route not found or client disconnected — fall through to any connected } // Fallback: use first connected client - const connected = this.getConnectedClients(); + const connected = this.getConnectedClients() if (connected.length === 0) { throw new SocketConnectionError( `[${this.context.serverName}] No connected sockets available`, - ); + ) } - return connected[0]!.callTool(name, args); + return connected[0]!.callTool(name, args) } public async setPermissionMode( mode: PermissionMode, allowedDomains?: string[], ): Promise { - const connected = this.getConnectedClients(); + const connected = this.getConnectedClients() await Promise.all( - connected.map((client) => client.setPermissionMode(mode, allowedDomains)), - ); + connected.map(client => client.setPermissionMode(mode, allowedDomains)), + ) } public isConnected(): boolean { - return this.getConnectedClients().length > 0; + return this.getConnectedClients().length > 0 } public disconnect(): void { for (const client of this.clients.values()) { - client.disconnect(); + client.disconnect() } - this.clients.clear(); - this.tabRoutes.clear(); + this.clients.clear() + this.tabRoutes.clear() } private getConnectedClients(): McpSocketClient[] { - return [...this.clients.values()].filter((c) => c.isConnected()); + return [...this.clients.values()].filter(c => c.isConnected()) } /** @@ -142,173 +143,173 @@ export class McpSocketPool { private async callTabsContext( args: Record, ): Promise { - const { logger, serverName } = this.context; - const connected = this.getConnectedClients(); + const { logger, serverName } = this.context + const connected = this.getConnectedClients() if (connected.length === 0) { throw new SocketConnectionError( `[${serverName}] No connected sockets available`, - ); + ) } // If only one client, skip merging overhead if (connected.length === 1) { - const result = await connected[0]!.callTool("tabs_context_mcp", args); - this.updateTabRoutes(result, this.getSocketPathForClient(connected[0]!)); - return result; + const result = await connected[0]!.callTool('tabs_context_mcp', args) + this.updateTabRoutes(result, this.getSocketPathForClient(connected[0]!)) + return result } // Query all connected clients in parallel const results = await Promise.allSettled( - connected.map(async (client) => { - const result = await client.callTool("tabs_context_mcp", args); - const socketPath = this.getSocketPathForClient(client); - return { result, socketPath }; + connected.map(async client => { + const result = await client.callTool('tabs_context_mcp', args) + const socketPath = this.getSocketPathForClient(client) + return { result, socketPath } }), - ); + ) // Merge tab results - const mergedTabs: unknown[] = []; - this.tabRoutes.clear(); + const mergedTabs: unknown[] = [] + this.tabRoutes.clear() for (const settledResult of results) { - if (settledResult.status !== "fulfilled") { + if (settledResult.status !== 'fulfilled') { logger.info( `[${serverName}] tabs_context_mcp failed on one socket: ${settledResult.reason}`, - ); - continue; + ) + continue } - const { result, socketPath } = settledResult.value; - this.updateTabRoutes(result, socketPath); + const { result, socketPath } = settledResult.value + this.updateTabRoutes(result, socketPath) - const tabs = this.extractTabs(result); + const tabs = this.extractTabs(result) if (tabs) { - mergedTabs.push(...tabs); + mergedTabs.push(...tabs) } } // Return merged result in the same format as the extension response if (mergedTabs.length > 0) { const tabListText = mergedTabs - .map((t) => { - const tab = t as { tabId: number; title: string; url: string }; - return ` • tabId ${tab.tabId}: "${tab.title}" (${tab.url})`; + .map(t => { + const tab = t as { tabId: number; title: string; url: string } + return ` • tabId ${tab.tabId}: "${tab.title}" (${tab.url})` }) - .join("\n"); + .join('\n') return { result: { content: [ { - type: "text", + type: 'text', text: JSON.stringify({ availableTabs: mergedTabs }), }, { - type: "text", + type: 'text', text: `\n\nTab Context:\n- Available tabs:\n${tabListText}`, }, ], }, - }; + } } // Fallback: return first successful result as-is for (const settledResult of results) { - if (settledResult.status === "fulfilled") { - return settledResult.value.result; + if (settledResult.status === 'fulfilled') { + return settledResult.value.result } } throw new SocketConnectionError( `[${serverName}] All sockets failed for tabs_context_mcp`, - ); + ) } /** * Extract tab objects from a tool response to update routing table. */ private updateTabRoutes(result: unknown, socketPath: string): void { - const tabs = this.extractTabs(result); - if (!tabs) return; + const tabs = this.extractTabs(result) + if (!tabs) return for (const tab of tabs) { - if (typeof tab === "object" && tab !== null && "tabId" in tab) { - const tabId = (tab as { tabId: number }).tabId; - this.tabRoutes.set(tabId, socketPath); + if (typeof tab === 'object' && tab !== null && 'tabId' in tab) { + const tabId = (tab as { tabId: number }).tabId + this.tabRoutes.set(tabId, socketPath) } } } private extractTabs(result: unknown): unknown[] | null { - if (!result || typeof result !== "object") return null; + if (!result || typeof result !== 'object') return null // Response format: { result: { content: [{ type: "text", text: "{\"availableTabs\":[...],\"tabGroupId\":...}" }] } } const asResponse = result as { - result?: { content?: Array<{ type: string; text?: string }> }; - }; - const content = asResponse.result?.content; - if (!content || !Array.isArray(content)) return null; + result?: { content?: Array<{ type: string; text?: string }> } + } + const content = asResponse.result?.content + if (!content || !Array.isArray(content)) return null for (const item of content) { - if (item.type === "text" && item.text) { + if (item.type === 'text' && item.text) { try { - const parsed = JSON.parse(item.text); - if (Array.isArray(parsed)) return parsed; + const parsed = JSON.parse(item.text) + if (Array.isArray(parsed)) return parsed // Handle { availableTabs: [...] } format if (parsed && Array.isArray(parsed.availableTabs)) { - return parsed.availableTabs; + return parsed.availableTabs } } catch { // Not JSON, skip } } } - return null; + return null } private getSocketPathForClient(client: McpSocketClient): string { for (const [path, c] of this.clients.entries()) { - if (c === client) return path; + if (c === client) return path } - return ""; + return '' } /** * Scan for available sockets and create/remove clients as needed. */ private refreshClients(): void { - const socketPaths = this.getAvailableSocketPaths(); - const { logger, serverName } = this.context; + const socketPaths = this.getAvailableSocketPaths() + const { logger, serverName } = this.context // Add new clients for newly discovered sockets for (const path of socketPaths) { if (!this.clients.has(path)) { - logger.info(`[${serverName}] Adding socket to pool: ${path}`); + logger.info(`[${serverName}] Adding socket to pool: ${path}`) const clientContext: ClaudeForChromeContext = { ...this.context, socketPath: path, getSocketPath: undefined, getSocketPaths: undefined, - }; - const client = createMcpSocketClient(clientContext); - client.disableAutoReconnect = true; - if (this.notificationHandler) { - client.setNotificationHandler(this.notificationHandler); } - this.clients.set(path, client); + const client = createMcpSocketClient(clientContext) + client.disableAutoReconnect = true + if (this.notificationHandler) { + client.setNotificationHandler(this.notificationHandler) + } + this.clients.set(path, client) } } // Remove clients for sockets that no longer exist for (const [path, client] of this.clients.entries()) { if (!socketPaths.includes(path)) { - logger.info(`[${serverName}] Removing stale socket from pool: ${path}`); - client.disconnect(); - this.clients.delete(path); + logger.info(`[${serverName}] Removing stale socket from pool: ${path}`) + client.disconnect() + this.clients.delete(path) for (const [tabId, socketPath] of this.tabRoutes.entries()) { if (socketPath === path) { - this.tabRoutes.delete(tabId); + this.tabRoutes.delete(tabId) } } } @@ -316,12 +317,12 @@ export class McpSocketPool { } private getAvailableSocketPaths(): string[] { - return this.context.getSocketPaths?.() ?? []; + return this.context.getSocketPaths?.() ?? [] } } export function createMcpSocketPool( context: ClaudeForChromeContext, ): McpSocketPool { - return new McpSocketPool(context); + return new McpSocketPool(context) } diff --git a/packages/@ant/claude-for-chrome-mcp/src/toolCalls.ts b/packages/@ant/claude-for-chrome-mcp/src/toolCalls.ts index 65bd8d178..587428d40 100644 --- a/packages/@ant/claude-for-chrome-mcp/src/toolCalls.ts +++ b/packages/@ant/claude-for-chrome-mcp/src/toolCalls.ts @@ -1,12 +1,12 @@ -import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js"; +import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js' -import { SocketConnectionError } from "./mcpSocketClient.js"; +import { SocketConnectionError } from './mcpSocketClient.js' import type { ClaudeForChromeContext, PermissionMode, PermissionOverrides, SocketClient, -} from "./types.js"; +} from './types.js' export const handleToolCall = async ( context: ClaudeForChromeContext, @@ -16,21 +16,21 @@ export const handleToolCall = async ( permissionOverrides?: PermissionOverrides, ): Promise => { // Handle permission mode changes locally (not forwarded to extension) - if (name === "set_permission_mode") { - return handleSetPermissionMode(socketClient, args); + if (name === 'set_permission_mode') { + return handleSetPermissionMode(socketClient, args) } // Handle switch_browser outside the normal tool call flow (manages its own connection) - if (name === "switch_browser") { - return handleSwitchBrowser(context, socketClient); + if (name === 'switch_browser') { + return handleSwitchBrowser(context, socketClient) } try { - const isConnected = await socketClient.ensureConnected(); + const isConnected = await socketClient.ensureConnected() context.logger.silly( `[${context.serverName}] Server is connected: ${isConnected}. Received tool call: ${name} with args: ${JSON.stringify(args)}.`, - ); + ) if (isConnected) { return await handleToolCallConnected( @@ -39,28 +39,28 @@ export const handleToolCall = async ( name, args, permissionOverrides, - ); + ) } - return handleToolCallDisconnected(context); + return handleToolCallDisconnected(context) } catch (error) { - context.logger.info(`[${context.serverName}] Error calling tool:`, error); + context.logger.info(`[${context.serverName}] Error calling tool:`, error) if (error instanceof SocketConnectionError) { - return handleToolCallDisconnected(context); + return handleToolCallDisconnected(context) } return { content: [ { - type: "text", + type: 'text', text: `Error calling tool, please try again. : ${error instanceof Error ? error.message : String(error)}`, }, ], isError: true, - }; + } } -}; +} async function handleToolCallConnected( context: ClaudeForChromeContext, @@ -69,119 +69,119 @@ async function handleToolCallConnected( args: Record, permissionOverrides?: PermissionOverrides, ): Promise { - const response = await socketClient.callTool(name, args, permissionOverrides); + const response = await socketClient.callTool(name, args, permissionOverrides) context.logger.silly( `[${context.serverName}] Received result from socket bridge: ${JSON.stringify(response)}`, - ); + ) if (response === null || response === undefined) { return { - content: [{ type: "text", text: "Tool execution completed" }], - }; + content: [{ type: 'text', text: 'Tool execution completed' }], + } } // Response will have either result or error field const { result, error } = response as { - result?: { content: unknown[] | string }; - error?: { content: unknown[] | string }; - }; + result?: { content: unknown[] | string } + error?: { content: unknown[] | string } + } // Determine which field has the content and whether it's an error - const contentData = error || result; - const isError = !!error; + const contentData = error || result + const isError = !!error if (!contentData) { return { - content: [{ type: "text", text: "Tool execution completed" }], - }; + content: [{ type: 'text', text: 'Tool execution completed' }], + } } if (isError && isAuthenticationError(contentData.content)) { - context.onAuthenticationError(); + context.onAuthenticationError() } - const { content } = contentData; + const { content } = contentData if (content && Array.isArray(content)) { if (isError) { return { content: content.map((item: unknown) => { - if (typeof item === "object" && item !== null && "type" in item) { - return item; + if (typeof item === 'object' && item !== null && 'type' in item) { + return item } - return { type: "text", text: String(item) }; + return { type: 'text', text: String(item) } }), isError: true, - } as CallToolResult; + } as CallToolResult } const convertedContent = content.map((item: unknown) => { if ( - typeof item === "object" && + typeof item === 'object' && item !== null && - "type" in item && - "source" in item + 'type' in item && + 'source' in item ) { - const typedItem = item; + const typedItem = item if ( - typedItem.type === "image" && - typeof typedItem.source === "object" && + typedItem.type === 'image' && + typeof typedItem.source === 'object' && typedItem.source !== null && - "data" in typedItem.source + 'data' in typedItem.source ) { return { - type: "image", + type: 'image', data: typedItem.source.data, mimeType: - "media_type" in typedItem.source - ? typedItem.source.media_type || "image/png" - : "image/png", - }; + 'media_type' in typedItem.source + ? typedItem.source.media_type || 'image/png' + : 'image/png', + } } } - if (typeof item === "object" && item !== null && "type" in item) { - return item; + if (typeof item === 'object' && item !== null && 'type' in item) { + return item } - return { type: "text", text: String(item) }; - }); + return { type: 'text', text: String(item) } + }) return { content: convertedContent, isError, - } as CallToolResult; + } as CallToolResult } // Handle string content - if (typeof content === "string") { + if (typeof content === 'string') { return { - content: [{ type: "text", text: content }], + content: [{ type: 'text', text: content }], isError, - } as CallToolResult; + } as CallToolResult } // Fallback for unexpected result format context.logger.warn( `[${context.serverName}] Unexpected result format from socket bridge`, response, - ); + ) return { - content: [{ type: "text", text: JSON.stringify(response) }], + content: [{ type: 'text', text: JSON.stringify(response) }], isError, - }; + } } function handleToolCallDisconnected( context: ClaudeForChromeContext, ): CallToolResult { - const text = context.onToolCallDisconnected(); + const text = context.onToolCallDisconnected() return { - content: [{ type: "text", text }], - }; + content: [{ type: 'text', text }], + } } /** @@ -194,28 +194,28 @@ async function handleSetPermissionMode( ): Promise { // Validate permission mode at runtime const validModes = [ - "ask", - "skip_all_permission_checks", - "follow_a_plan", - ] as const; - const mode = args.mode as string | undefined; + 'ask', + 'skip_all_permission_checks', + 'follow_a_plan', + ] as const + const mode = args.mode as string | undefined const permissionMode: PermissionMode = mode && validModes.includes(mode as PermissionMode) ? (mode as PermissionMode) - : "ask"; + : 'ask' if (socketClient.setPermissionMode) { await socketClient.setPermissionMode( permissionMode, args.allowed_domains as string[] | undefined, - ); + ) } return { content: [ - { type: "text", text: `Permission mode set to: ${permissionMode}` }, + { type: 'text', text: `Permission mode set to: ${permissionMode}` }, ], - }; + } } /** @@ -230,50 +230,50 @@ async function handleSwitchBrowser( return { content: [ { - type: "text", - text: "Browser switching is only available with bridge connections.", + type: 'text', + text: 'Browser switching is only available with bridge connections.', }, ], isError: true, - }; + } } - const isConnected = await socketClient.ensureConnected(); + const isConnected = await socketClient.ensureConnected() if (!isConnected) { - return handleToolCallDisconnected(context); + return handleToolCallDisconnected(context) } - const result = (await socketClient.switchBrowser?.()) ?? null; + const result = (await socketClient.switchBrowser?.()) ?? null - if (result === "no_other_browsers") { + if (result === 'no_other_browsers') { return { content: [ { - type: "text", - text: "No other browsers available to switch to. Open Chrome with the Claude extension in another browser to switch.", + type: 'text', + text: 'No other browsers available to switch to. Open Chrome with the Claude extension in another browser to switch.', }, ], isError: true, - }; + } } if (result) { return { content: [ - { type: "text", text: `Connected to browser "${result.name}".` }, + { type: 'text', text: `Connected to browser "${result.name}".` }, ], - }; + } } return { content: [ { - type: "text", - text: "No browser responded within the timeout. Make sure Chrome is open with the Claude extension installed, then try again.", + type: 'text', + text: 'No browser responded within the timeout. Make sure Chrome is open with the Claude extension installed, then try again.', }, ], isError: true, - }; + } } /** @@ -282,20 +282,20 @@ async function handleSwitchBrowser( function isAuthenticationError(content: unknown[] | string): boolean { const errorText = Array.isArray(content) ? content - .map((item) => { - if (typeof item === "string") return item; + .map(item => { + if (typeof item === 'string') return item if ( - typeof item === "object" && + typeof item === 'object' && item !== null && - "text" in item && - typeof item.text === "string" + 'text' in item && + typeof item.text === 'string' ) { - return item.text; + return item.text } - return ""; + return '' }) - .join(" ") - : String(content); + .join(' ') + : String(content) - return errorText.toLowerCase().includes("re-authenticated"); + return errorText.toLowerCase().includes('re-authenticated') } diff --git a/packages/@ant/claude-for-chrome-mcp/src/types.ts b/packages/@ant/claude-for-chrome-mcp/src/types.ts index 0334882b9..67927e002 100644 --- a/packages/@ant/claude-for-chrome-mcp/src/types.ts +++ b/packages/@ant/claude-for-chrome-mcp/src/types.ts @@ -1,64 +1,64 @@ export interface Logger { - info: (message: string, ...args: unknown[]) => void; - error: (message: string, ...args: unknown[]) => void; - warn: (message: string, ...args: unknown[]) => void; - debug: (message: string, ...args: unknown[]) => void; - silly: (message: string, ...args: unknown[]) => void; + info: (message: string, ...args: unknown[]) => void + error: (message: string, ...args: unknown[]) => void + warn: (message: string, ...args: unknown[]) => void + debug: (message: string, ...args: unknown[]) => void + silly: (message: string, ...args: unknown[]) => void } export type PermissionMode = - | "ask" - | "skip_all_permission_checks" - | "follow_a_plan"; + | 'ask' + | 'skip_all_permission_checks' + | 'follow_a_plan' export interface BridgeConfig { /** Bridge WebSocket base URL (e.g., wss://bridge.claudeusercontent.com) */ - url: string; + url: string /** Returns the user's account UUID for the connection path */ - getUserId: () => Promise; + getUserId: () => Promise /** Returns a valid OAuth token for bridge authentication */ - getOAuthToken: () => Promise; + getOAuthToken: () => Promise /** Optional dev user ID for local development (bypasses OAuth) */ - devUserId?: string; + devUserId?: string } /** Metadata about a connected Chrome extension instance. */ export interface ChromeExtensionInfo { - deviceId: string; - osPlatform?: string; - connectedAt: number; - name?: string; + deviceId: string + osPlatform?: string + connectedAt: number + name?: string } export interface ClaudeForChromeContext { - serverName: string; - logger: Logger; - socketPath: string; + serverName: string + logger: Logger + socketPath: string // Optional dynamic resolver for socket path. When provided, called on each // connection attempt to handle runtime conditions (e.g., TMPDIR mismatch). - getSocketPath?: () => string; + getSocketPath?: () => string // Optional resolver returning all available socket paths (for multi-profile support). // When provided, a socket pool connects to all sockets and routes by tab ID. - getSocketPaths?: () => string[]; - clientTypeId: string; // "desktop" | "claude-code" - onToolCallDisconnected: () => string; - onAuthenticationError: () => void; - isDisabled?: () => boolean; + getSocketPaths?: () => string[] + clientTypeId: string // "desktop" | "claude-code" + onToolCallDisconnected: () => string + onAuthenticationError: () => void + isDisabled?: () => boolean /** Bridge WebSocket configuration. When provided, uses bridge instead of socket. */ - bridgeConfig?: BridgeConfig; + bridgeConfig?: BridgeConfig /** If set, permission mode is sent to the extension immediately on bridge connection. */ - initialPermissionMode?: PermissionMode; + initialPermissionMode?: PermissionMode /** Optional callback to track telemetry events for bridge connections */ trackEvent?: ( eventName: K, metadata: Record | null, - ) => void; + ) => void /** Called when user pairs with an extension via the browser pairing flow. */ - onExtensionPaired?: (deviceId: string, name: string) => void; + onExtensionPaired?: (deviceId: string, name: string) => void /** Returns the previously paired deviceId, if any. */ - getPersistedDeviceId?: () => string | undefined; + getPersistedDeviceId?: () => string | undefined /** Called when a remote extension is auto-selected (only option available). */ - onRemoteExtensionWarning?: (ext: ChromeExtensionInfo) => void; + onRemoteExtensionWarning?: (ext: ChromeExtensionInfo) => void } /** @@ -66,69 +66,69 @@ export interface ClaudeForChromeContext { * via navigator.userAgentData.platform. */ export function localPlatformLabel(): string { - return process.platform === "darwin" - ? "macOS" - : process.platform === "win32" - ? "Windows" - : "Linux"; + return process.platform === 'darwin' + ? 'macOS' + : process.platform === 'win32' + ? 'Windows' + : 'Linux' } /** Permission request forwarded from the extension to the desktop for user approval. */ export interface BridgePermissionRequest { /** Links to the pending tool_call */ - toolUseId: string; + toolUseId: string /** Unique ID for this permission request */ - requestId: string; + requestId: string /** Tool type, e.g. "navigate", "click", "execute_javascript" */ - toolType: string; + toolType: string /** The URL/domain context */ - url: string; + url: string /** Additional action data (click coordinates, text, etc.) */ - actionData?: Record; + actionData?: Record } /** Desktop response to a bridge permission request. */ export interface BridgePermissionResponse { - requestId: string; - allowed: boolean; + requestId: string + allowed: boolean } /** Per-call permission overrides, allowing each session to use its own permission state. */ export interface PermissionOverrides { - permissionMode: PermissionMode; - allowedDomains?: string[]; + permissionMode: PermissionMode + allowedDomains?: string[] /** Callback invoked when the extension requests user permission via the bridge. */ - onPermissionRequest?: (request: BridgePermissionRequest) => Promise; + onPermissionRequest?: (request: BridgePermissionRequest) => Promise } /** Shared interface for McpSocketClient and McpSocketPool */ export interface SocketClient { - ensureConnected(): Promise; + ensureConnected(): Promise callTool( name: string, args: Record, permissionOverrides?: PermissionOverrides, - ): Promise; - isConnected(): boolean; - disconnect(): void; + ): Promise + isConnected(): boolean + disconnect(): void setNotificationHandler( handler: (notification: { - method: string; - params?: Record; + method: string + params?: Record }) => void, - ): void; + ): void /** Set permission mode for the current session. Only effective on BridgeClient. */ setPermissionMode?( mode: PermissionMode, allowedDomains?: string[], - ): Promise; + ): Promise /** Switch to a different browser. Only available on BridgeClient. */ switchBrowser?(): Promise< | { - deviceId: string; - name: string; + deviceId: string + name: string } - | "no_other_browsers" + | 'no_other_browsers' | null - >; + > } diff --git a/packages/@ant/computer-use-input/package.json b/packages/@ant/computer-use-input/package.json index 9a25c089a..20c442249 100644 --- a/packages/@ant/computer-use-input/package.json +++ b/packages/@ant/computer-use-input/package.json @@ -1,7 +1,7 @@ { - "name": "@ant/computer-use-input", - "version": "1.0.0", - "private": true, - "main": "./src/index.ts", - "types": "./src/index.ts" + "name": "@ant/computer-use-input", + "version": "1.0.0", + "private": true, + "main": "./src/index.ts", + "types": "./src/index.ts" } diff --git a/packages/@ant/computer-use-input/src/backends/darwin.ts b/packages/@ant/computer-use-input/src/backends/darwin.ts index 37af38cff..f3e272f1d 100644 --- a/packages/@ant/computer-use-input/src/backends/darwin.ts +++ b/packages/@ant/computer-use-input/src/backends/darwin.ts @@ -12,19 +12,46 @@ import type { FrontmostAppInfo, InputBackend } from '../types.js' const execFileAsync = promisify(execFile) const KEY_MAP: Record = { - return: 36, enter: 36, tab: 48, space: 49, delete: 51, backspace: 51, - escape: 53, esc: 53, - left: 123, right: 124, down: 125, up: 126, - f1: 122, f2: 120, f3: 99, f4: 118, f5: 96, f6: 97, - f7: 98, f8: 100, f9: 101, f10: 109, f11: 103, f12: 111, - home: 115, end: 119, pageup: 116, pagedown: 121, + return: 36, + enter: 36, + tab: 48, + space: 49, + delete: 51, + backspace: 51, + escape: 53, + esc: 53, + left: 123, + right: 124, + down: 125, + up: 126, + f1: 122, + f2: 120, + f3: 99, + f4: 118, + f5: 96, + f6: 97, + f7: 98, + f8: 100, + f9: 101, + f10: 109, + f11: 103, + f12: 111, + home: 115, + end: 119, + pageup: 116, + pagedown: 121, } const MODIFIER_MAP: Record = { - command: 'command down', cmd: 'command down', meta: 'command down', super: 'command down', + command: 'command down', + cmd: 'command down', + meta: 'command down', + super: 'command down', shift: 'shift down', - option: 'option down', alt: 'option down', - control: 'control down', ctrl: 'control down', + option: 'option down', + alt: 'option down', + control: 'control down', + ctrl: 'control down', } async function osascript(script: string): Promise { @@ -35,13 +62,23 @@ async function osascript(script: string): Promise { } async function jxa(script: string): Promise { - const { stdout } = await execFileAsync('osascript', ['-l', 'JavaScript', '-e', script], { - encoding: 'utf-8', - }) + const { stdout } = await execFileAsync( + 'osascript', + ['-l', 'JavaScript', '-e', script], + { + encoding: 'utf-8', + }, + ) return stdout.trim() } -function buildMouseJxa(eventType: string, x: number, y: number, btn: number, clickState?: number): string { +function buildMouseJxa( + eventType: string, + x: number, + y: number, + btn: number, + clickState?: number, +): string { let script = `ObjC.import("CoreGraphics"); var p = $.CGPointMake(${x},${y}); var e = $.CGEventCreateMouseEvent(null, $.${eventType}, p, ${btn});` if (clickState !== undefined) { script += ` $.CGEventSetIntegerValueField(e, $.kCGMouseEventClickState, ${clickState});` @@ -61,11 +98,13 @@ export const key: InputBackend['key'] = async (keyName, action) => { if (keyCode !== undefined) { await osascript(`tell application "System Events" to key code ${keyCode}`) } else { - await osascript(`tell application "System Events" to keystroke "${keyName.length === 1 ? keyName : lower}"`) + await osascript( + `tell application "System Events" to keystroke "${keyName.length === 1 ? keyName : lower}"`, + ) } } -export const keys: InputBackend['keys'] = async (parts) => { +export const keys: InputBackend['keys'] = async parts => { const modifiers: string[] = [] let finalKey: string | null = null for (const part of parts) { @@ -78,23 +117,43 @@ export const keys: InputBackend['keys'] = async (parts) => { const keyCode = KEY_MAP[lower] const modStr = modifiers.length > 0 ? ` using {${modifiers.join(', ')}}` : '' if (keyCode !== undefined) { - await osascript(`tell application "System Events" to key code ${keyCode}${modStr}`) + await osascript( + `tell application "System Events" to key code ${keyCode}${modStr}`, + ) } else { - await osascript(`tell application "System Events" to keystroke "${finalKey.length === 1 ? finalKey : lower}"${modStr}`) + await osascript( + `tell application "System Events" to keystroke "${finalKey.length === 1 ? finalKey : lower}"${modStr}`, + ) } } export const mouseLocation: InputBackend['mouseLocation'] = async () => { - const result = await jxa('ObjC.import("CoreGraphics"); var e = $.CGEventCreate(null); var p = $.CGEventGetLocation(e); p.x + "," + p.y') + const result = await jxa( + 'ObjC.import("CoreGraphics"); var e = $.CGEventCreate(null); var p = $.CGEventGetLocation(e); p.x + "," + p.y', + ) const [xStr, yStr] = result.split(',') return { x: Math.round(Number(xStr)), y: Math.round(Number(yStr)) } } -export const mouseButton: InputBackend['mouseButton'] = async (button, action, count) => { +export const mouseButton: InputBackend['mouseButton'] = async ( + button, + action, + count, +) => { const pos = await mouseLocation() const btn = button === 'left' ? 0 : button === 'right' ? 1 : 2 - const downType = btn === 0 ? 'kCGEventLeftMouseDown' : btn === 1 ? 'kCGEventRightMouseDown' : 'kCGEventOtherMouseDown' - const upType = btn === 0 ? 'kCGEventLeftMouseUp' : btn === 1 ? 'kCGEventRightMouseUp' : 'kCGEventOtherMouseUp' + const downType = + btn === 0 + ? 'kCGEventLeftMouseDown' + : btn === 1 + ? 'kCGEventRightMouseDown' + : 'kCGEventOtherMouseDown' + const upType = + btn === 0 + ? 'kCGEventLeftMouseUp' + : btn === 1 + ? 'kCGEventRightMouseUp' + : 'kCGEventOtherMouseUp' if (action === 'click') { for (let i = 0; i < (count ?? 1); i++) { @@ -108,28 +167,39 @@ export const mouseButton: InputBackend['mouseButton'] = async (button, action, c } } -export const mouseScroll: InputBackend['mouseScroll'] = async (amount, direction) => { - const script = direction === 'vertical' - ? `ObjC.import("CoreGraphics"); var e = $.CGEventCreateScrollWheelEvent(null, 0, 1, ${amount}); $.CGEventPost($.kCGHIDEventTap, e);` - : `ObjC.import("CoreGraphics"); var e = $.CGEventCreateScrollWheelEvent(null, 0, 2, 0, ${amount}); $.CGEventPost($.kCGHIDEventTap, e);` +export const mouseScroll: InputBackend['mouseScroll'] = async ( + amount, + direction, +) => { + const script = + direction === 'vertical' + ? `ObjC.import("CoreGraphics"); var e = $.CGEventCreateScrollWheelEvent(null, 0, 1, ${amount}); $.CGEventPost($.kCGHIDEventTap, e);` + : `ObjC.import("CoreGraphics"); var e = $.CGEventCreateScrollWheelEvent(null, 0, 2, 0, ${amount}); $.CGEventPost($.kCGHIDEventTap, e);` await jxa(script) } -export const typeText: InputBackend['typeText'] = async (text) => { +export const typeText: InputBackend['typeText'] = async text => { const escaped = text.replace(/\\/g, '\\\\').replace(/"/g, '\\"') await osascript(`tell application "System Events" to keystroke "${escaped}"`) } export const getFrontmostAppInfo: InputBackend['getFrontmostAppInfo'] = () => { try { - const output = execFileSync('osascript', ['-e', ` + const output = execFileSync( + 'osascript', + [ + '-e', + ` tell application "System Events" set frontApp to first application process whose frontmost is true set appName to name of frontApp set bundleId to bundle identifier of frontApp return bundleId & "|" & appName end tell - `], { encoding: 'utf-8', stdio: ['pipe', 'pipe', 'ignore'] }).trim() + `, + ], + { encoding: 'utf-8', stdio: ['pipe', 'pipe', 'ignore'] }, + ).trim() if (!output || !output.includes('|')) return null const [bundleId, appName] = output.split('|', 2) return { bundleId: bundleId!, appName: appName! } diff --git a/packages/@ant/computer-use-input/src/backends/linux.ts b/packages/@ant/computer-use-input/src/backends/linux.ts index 60d27347a..c9045d283 100644 --- a/packages/@ant/computer-use-input/src/backends/linux.ts +++ b/packages/@ant/computer-use-input/src/backends/linux.ts @@ -32,23 +32,75 @@ async function runAsync(cmd: string[]): Promise { // --------------------------------------------------------------------------- const KEY_MAP: Record = { - return: 'Return', enter: 'Return', tab: 'Tab', space: 'space', - backspace: 'BackSpace', delete: 'Delete', escape: 'Escape', esc: 'Escape', - left: 'Left', up: 'Up', right: 'Right', down: 'Down', - home: 'Home', end: 'End', pageup: 'Prior', pagedown: 'Next', - f1: 'F1', f2: 'F2', f3: 'F3', f4: 'F4', f5: 'F5', f6: 'F6', - f7: 'F7', f8: 'F8', f9: 'F9', f10: 'F10', f11: 'F11', f12: 'F12', - shift: 'shift', lshift: 'shift', rshift: 'shift', - control: 'ctrl', ctrl: 'ctrl', lcontrol: 'ctrl', rcontrol: 'ctrl', - alt: 'alt', option: 'alt', lalt: 'alt', ralt: 'alt', - win: 'super', meta: 'super', command: 'super', cmd: 'super', super: 'super', - insert: 'Insert', printscreen: 'Print', pause: 'Pause', - numlock: 'Num_Lock', capslock: 'Caps_Lock', scrolllock: 'Scroll_Lock', + return: 'Return', + enter: 'Return', + tab: 'Tab', + space: 'space', + backspace: 'BackSpace', + delete: 'Delete', + escape: 'Escape', + esc: 'Escape', + left: 'Left', + up: 'Up', + right: 'Right', + down: 'Down', + home: 'Home', + end: 'End', + pageup: 'Prior', + pagedown: 'Next', + f1: 'F1', + f2: 'F2', + f3: 'F3', + f4: 'F4', + f5: 'F5', + f6: 'F6', + f7: 'F7', + f8: 'F8', + f9: 'F9', + f10: 'F10', + f11: 'F11', + f12: 'F12', + shift: 'shift', + lshift: 'shift', + rshift: 'shift', + control: 'ctrl', + ctrl: 'ctrl', + lcontrol: 'ctrl', + rcontrol: 'ctrl', + alt: 'alt', + option: 'alt', + lalt: 'alt', + ralt: 'alt', + win: 'super', + meta: 'super', + command: 'super', + cmd: 'super', + super: 'super', + insert: 'Insert', + printscreen: 'Print', + pause: 'Pause', + numlock: 'Num_Lock', + capslock: 'Caps_Lock', + scrolllock: 'Scroll_Lock', } const MODIFIER_KEYS = new Set([ - 'shift', 'lshift', 'rshift', 'control', 'ctrl', 'lcontrol', 'rcontrol', - 'alt', 'option', 'lalt', 'ralt', 'win', 'meta', 'command', 'cmd', 'super', + 'shift', + 'lshift', + 'rshift', + 'control', + 'ctrl', + 'lcontrol', + 'rcontrol', + 'alt', + 'option', + 'lalt', + 'ralt', + 'win', + 'meta', + 'command', + 'cmd', + 'super', ]) function mapKey(name: string): string { @@ -68,7 +120,13 @@ function mouseButtonNum(button: 'left' | 'right' | 'middle'): string { // --------------------------------------------------------------------------- export const moveMouse: InputBackend['moveMouse'] = async (x, y, _animated) => { - run(['xdotool', 'mousemove', '--sync', String(Math.round(x)), String(Math.round(y))]) + run([ + 'xdotool', + 'mousemove', + '--sync', + String(Math.round(x)), + String(Math.round(y)), + ]) } export const mouseLocation: InputBackend['mouseLocation'] = async () => { @@ -82,7 +140,11 @@ export const mouseLocation: InputBackend['mouseLocation'] = async () => { } } -export const mouseButton: InputBackend['mouseButton'] = async (button, action, count) => { +export const mouseButton: InputBackend['mouseButton'] = async ( + button, + action, + count, +) => { const btn = mouseButtonNum(button) if (action === 'click') { const n = count ?? 1 @@ -94,7 +156,10 @@ export const mouseButton: InputBackend['mouseButton'] = async (button, action, c } } -export const mouseScroll: InputBackend['mouseScroll'] = async (amount, direction) => { +export const mouseScroll: InputBackend['mouseScroll'] = async ( + amount, + direction, +) => { // xdotool click 4=scroll up, 5=scroll down, 6=scroll left, 7=scroll right // Positive amount = down/right, negative = up/left if (direction === 'vertical') { @@ -121,7 +186,7 @@ export const key: InputBackend['key'] = async (keyName, action) => { } } -export const keys: InputBackend['keys'] = async (parts) => { +export const keys: InputBackend['keys'] = async parts => { // xdotool key accepts "modifier+modifier+key" format const modifiers: string[] = [] let finalKey: string | null = null @@ -139,7 +204,7 @@ export const keys: InputBackend['keys'] = async (parts) => { run(['xdotool', 'key', combo]) } -export const typeText: InputBackend['typeText'] = async (text) => { +export const typeText: InputBackend['typeText'] = async text => { run(['xdotool', 'type', '--delay', '12', text]) } @@ -157,16 +222,23 @@ export const getFrontmostAppInfo: InputBackend['getFrontmostAppInfo'] = () => { let exePath = '' try { exePath = run(['readlink', '-f', `/proc/${pid}/exe`]) - } catch { /* ignore */ } + } catch { + /* ignore */ + } // Read the process name from /proc/comm let appName = '' try { appName = run(['cat', `/proc/${pid}/comm`]) - } catch { /* ignore */ } + } catch { + /* ignore */ + } if (!exePath && !appName) return null - return { bundleId: exePath || `/proc/${pid}/exe`, appName: appName || 'unknown' } + return { + bundleId: exePath || `/proc/${pid}/exe`, + appName: appName || 'unknown', + } } catch { return null } diff --git a/packages/@ant/computer-use-input/src/backends/win32.ts b/packages/@ant/computer-use-input/src/backends/win32.ts index 08900be56..d031ad77d 100644 --- a/packages/@ant/computer-use-input/src/backends/win32.ts +++ b/packages/@ant/computer-use-input/src/backends/win32.ts @@ -92,43 +92,112 @@ public class CuWin32 { // --------------------------------------------------------------------------- const VK_MAP: Record = { - return: 0x0D, enter: 0x0D, tab: 0x09, space: 0x20, - backspace: 0x08, delete: 0x2E, escape: 0x1B, esc: 0x1B, - left: 0x25, up: 0x26, right: 0x27, down: 0x28, - home: 0x24, end: 0x23, pageup: 0x21, pagedown: 0x22, - f1: 0x70, f2: 0x71, f3: 0x72, f4: 0x73, f5: 0x74, f6: 0x75, - f7: 0x76, f8: 0x77, f9: 0x78, f10: 0x79, f11: 0x7A, f12: 0x7B, - shift: 0xA0, lshift: 0xA0, rshift: 0xA1, - control: 0xA2, ctrl: 0xA2, lcontrol: 0xA2, rcontrol: 0xA3, - alt: 0xA4, option: 0xA4, lalt: 0xA4, ralt: 0xA5, - win: 0x5B, meta: 0x5B, command: 0x5B, cmd: 0x5B, super: 0x5B, - insert: 0x2D, printscreen: 0x2C, pause: 0x13, - numlock: 0x90, capslock: 0x14, scrolllock: 0x91, + return: 0x0d, + enter: 0x0d, + tab: 0x09, + space: 0x20, + backspace: 0x08, + delete: 0x2e, + escape: 0x1b, + esc: 0x1b, + left: 0x25, + up: 0x26, + right: 0x27, + down: 0x28, + home: 0x24, + end: 0x23, + pageup: 0x21, + pagedown: 0x22, + f1: 0x70, + f2: 0x71, + f3: 0x72, + f4: 0x73, + f5: 0x74, + f6: 0x75, + f7: 0x76, + f8: 0x77, + f9: 0x78, + f10: 0x79, + f11: 0x7a, + f12: 0x7b, + shift: 0xa0, + lshift: 0xa0, + rshift: 0xa1, + control: 0xa2, + ctrl: 0xa2, + lcontrol: 0xa2, + rcontrol: 0xa3, + alt: 0xa4, + option: 0xa4, + lalt: 0xa4, + ralt: 0xa5, + win: 0x5b, + meta: 0x5b, + command: 0x5b, + cmd: 0x5b, + super: 0x5b, + insert: 0x2d, + printscreen: 0x2c, + pause: 0x13, + numlock: 0x90, + capslock: 0x14, + scrolllock: 0x91, } -const MODIFIER_KEYS = new Set(['shift', 'lshift', 'rshift', 'control', 'ctrl', 'lcontrol', 'rcontrol', 'alt', 'option', 'lalt', 'ralt', 'win', 'meta', 'command', 'cmd', 'super']) +const MODIFIER_KEYS = new Set([ + 'shift', + 'lshift', + 'rshift', + 'control', + 'ctrl', + 'lcontrol', + 'rcontrol', + 'alt', + 'option', + 'lalt', + 'ralt', + 'win', + 'meta', + 'command', + 'cmd', + 'super', +]) // --------------------------------------------------------------------------- // Implementation // --------------------------------------------------------------------------- export const moveMouse: InputBackend['moveMouse'] = async (x, y, _animated) => { - ps(`${WIN32_TYPES}; [CuWin32]::SetCursorPos(${Math.round(x)}, ${Math.round(y)}) | Out-Null`) + ps( + `${WIN32_TYPES}; [CuWin32]::SetCursorPos(${Math.round(x)}, ${Math.round(y)}) | Out-Null`, + ) } export const mouseLocation: InputBackend['mouseLocation'] = async () => { - const out = ps(`${WIN32_TYPES}; $p = New-Object CuWin32+POINT; [CuWin32]::GetCursorPos([ref]$p) | Out-Null; "$($p.X),$($p.Y)"`) + const out = ps( + `${WIN32_TYPES}; $p = New-Object CuWin32+POINT; [CuWin32]::GetCursorPos([ref]$p) | Out-Null; "$($p.X),$($p.Y)"`, + ) const [xStr, yStr] = out.split(',') return { x: Number(xStr), y: Number(yStr) } } -export const mouseButton: InputBackend['mouseButton'] = async (button, action, count) => { - const downFlag = button === 'left' ? 'MOUSEEVENTF_LEFTDOWN' - : button === 'right' ? 'MOUSEEVENTF_RIGHTDOWN' - : 'MOUSEEVENTF_MIDDLEDOWN' - const upFlag = button === 'left' ? 'MOUSEEVENTF_LEFTUP' - : button === 'right' ? 'MOUSEEVENTF_RIGHTUP' - : 'MOUSEEVENTF_MIDDLEUP' +export const mouseButton: InputBackend['mouseButton'] = async ( + button, + action, + count, +) => { + const downFlag = + button === 'left' + ? 'MOUSEEVENTF_LEFTDOWN' + : button === 'right' + ? 'MOUSEEVENTF_RIGHTDOWN' + : 'MOUSEEVENTF_MIDDLEDOWN' + const upFlag = + button === 'left' + ? 'MOUSEEVENTF_LEFTUP' + : button === 'right' + ? 'MOUSEEVENTF_RIGHTUP' + : 'MOUSEEVENTF_MIDDLEUP' if (action === 'click') { const n = count ?? 1 @@ -136,17 +205,29 @@ export const mouseButton: InputBackend['mouseButton'] = async (button, action, c for (let i = 0; i < n; i++) { clicks += `$i.mi.dwFlags=[CuWin32]::${downFlag}; [CuWin32]::SendInput(1, @($i), [Runtime.InteropServices.Marshal]::SizeOf($i)) | Out-Null; $i.mi.dwFlags=[CuWin32]::${upFlag}; [CuWin32]::SendInput(1, @($i), [Runtime.InteropServices.Marshal]::SizeOf($i)) | Out-Null; ` } - ps(`${WIN32_TYPES}; $i = New-Object CuWin32+INPUT; $i.type=[CuWin32]::INPUT_MOUSE; ${clicks}`) + ps( + `${WIN32_TYPES}; $i = New-Object CuWin32+INPUT; $i.type=[CuWin32]::INPUT_MOUSE; ${clicks}`, + ) } else if (action === 'press') { - ps(`${WIN32_TYPES}; $i = New-Object CuWin32+INPUT; $i.type=[CuWin32]::INPUT_MOUSE; $i.mi.dwFlags=[CuWin32]::${downFlag}; [CuWin32]::SendInput(1, @($i), [Runtime.InteropServices.Marshal]::SizeOf($i)) | Out-Null`) + ps( + `${WIN32_TYPES}; $i = New-Object CuWin32+INPUT; $i.type=[CuWin32]::INPUT_MOUSE; $i.mi.dwFlags=[CuWin32]::${downFlag}; [CuWin32]::SendInput(1, @($i), [Runtime.InteropServices.Marshal]::SizeOf($i)) | Out-Null`, + ) } else { - ps(`${WIN32_TYPES}; $i = New-Object CuWin32+INPUT; $i.type=[CuWin32]::INPUT_MOUSE; $i.mi.dwFlags=[CuWin32]::${upFlag}; [CuWin32]::SendInput(1, @($i), [Runtime.InteropServices.Marshal]::SizeOf($i)) | Out-Null`) + ps( + `${WIN32_TYPES}; $i = New-Object CuWin32+INPUT; $i.type=[CuWin32]::INPUT_MOUSE; $i.mi.dwFlags=[CuWin32]::${upFlag}; [CuWin32]::SendInput(1, @($i), [Runtime.InteropServices.Marshal]::SizeOf($i)) | Out-Null`, + ) } } -export const mouseScroll: InputBackend['mouseScroll'] = async (amount, direction) => { - const flag = direction === 'vertical' ? 'MOUSEEVENTF_WHEEL' : 'MOUSEEVENTF_HWHEEL' - ps(`${WIN32_TYPES}; $i = New-Object CuWin32+INPUT; $i.type=[CuWin32]::INPUT_MOUSE; $i.mi.dwFlags=[CuWin32]::${flag}; $i.mi.mouseData=${amount * 120}; [CuWin32]::SendInput(1, @($i), [Runtime.InteropServices.Marshal]::SizeOf($i)) | Out-Null`) +export const mouseScroll: InputBackend['mouseScroll'] = async ( + amount, + direction, +) => { + const flag = + direction === 'vertical' ? 'MOUSEEVENTF_WHEEL' : 'MOUSEEVENTF_HWHEEL' + ps( + `${WIN32_TYPES}; $i = New-Object CuWin32+INPUT; $i.type=[CuWin32]::INPUT_MOUSE; $i.mi.dwFlags=[CuWin32]::${flag}; $i.mi.mouseData=${amount * 120}; [CuWin32]::SendInput(1, @($i), [Runtime.InteropServices.Marshal]::SizeOf($i)) | Out-Null`, + ) } export const key: InputBackend['key'] = async (keyName, action) => { @@ -154,15 +235,19 @@ export const key: InputBackend['key'] = async (keyName, action) => { const vk = VK_MAP[lower] const flags = action === 'release' ? '2' : '0' if (vk !== undefined) { - ps(`${WIN32_TYPES}; [CuWin32]::keybd_event(${vk}, 0, ${flags}, [UIntPtr]::Zero)`) + ps( + `${WIN32_TYPES}; [CuWin32]::keybd_event(${vk}, 0, ${flags}, [UIntPtr]::Zero)`, + ) } else if (keyName.length === 1) { // Single character — use VkKeyScan to resolve const charCode = keyName.charCodeAt(0) - ps(`${WIN32_TYPES}; $vk = [CuWin32]::VkKeyScan([char]${charCode}) -band 0xFF; [CuWin32]::keybd_event([byte]$vk, 0, ${flags}, [UIntPtr]::Zero)`) + ps( + `${WIN32_TYPES}; $vk = [CuWin32]::VkKeyScan([char]${charCode}) -band 0xFF; [CuWin32]::keybd_event([byte]$vk, 0, ${flags}, [UIntPtr]::Zero)`, + ) } } -export const keys: InputBackend['keys'] = async (parts) => { +export const keys: InputBackend['keys'] = async parts => { const modifiers: number[] = [] let finalKey: string | null = null @@ -196,9 +281,11 @@ export const keys: InputBackend['keys'] = async (parts) => { ps(script) } -export const typeText: InputBackend['typeText'] = async (text) => { +export const typeText: InputBackend['typeText'] = async text => { const escaped = text.replace(/'/g, "''") - ps(`Add-Type -AssemblyName System.Windows.Forms; [System.Windows.Forms.SendKeys]::SendWait('${escaped}')`) + ps( + `Add-Type -AssemblyName System.Windows.Forms; [System.Windows.Forms.SendKeys]::SendWait('${escaped}')`, + ) } export const getFrontmostAppInfo: InputBackend['getFrontmostAppInfo'] = () => { diff --git a/packages/@ant/computer-use-input/src/index.ts b/packages/@ant/computer-use-input/src/index.ts index 22cb9bf96..78e8107a1 100644 --- a/packages/@ant/computer-use-input/src/index.ts +++ b/packages/@ant/computer-use-input/src/index.ts @@ -15,8 +15,15 @@ export interface InputBackend { key(key: string, action: 'press' | 'release'): Promise keys(parts: string[]): Promise mouseLocation(): Promise<{ x: number; y: number }> - mouseButton(button: 'left' | 'right' | 'middle', action: 'click' | 'press' | 'release', count?: number): Promise - mouseScroll(amount: number, direction: 'vertical' | 'horizontal'): Promise + mouseButton( + button: 'left' | 'right' | 'middle', + action: 'click' | 'press' | 'release', + count?: number, + ): Promise + mouseScroll( + amount: number, + direction: 'vertical' | 'horizontal', + ): Promise typeText(text: string): Promise getFrontmostAppInfo(): FrontmostAppInfo | null } @@ -60,5 +67,7 @@ export class ComputerUseInputAPI { declare isSupported: true } -interface ComputerUseInputUnsupported { isSupported: false } +interface ComputerUseInputUnsupported { + isSupported: false +} export type ComputerUseInput = ComputerUseInputAPI | ComputerUseInputUnsupported diff --git a/packages/@ant/computer-use-input/src/types.ts b/packages/@ant/computer-use-input/src/types.ts index ec80708b9..446145f78 100644 --- a/packages/@ant/computer-use-input/src/types.ts +++ b/packages/@ant/computer-use-input/src/types.ts @@ -1,5 +1,5 @@ export interface FrontmostAppInfo { - bundleId: string // macOS: bundle ID, Windows: exe path + bundleId: string // macOS: bundle ID, Windows: exe path appName: string } @@ -13,7 +13,10 @@ export interface InputBackend { action: 'click' | 'press' | 'release', count?: number, ): Promise - mouseScroll(amount: number, direction: 'vertical' | 'horizontal'): Promise + mouseScroll( + amount: number, + direction: 'vertical' | 'horizontal', + ): Promise typeText(text: string): Promise getFrontmostAppInfo(): FrontmostAppInfo | null } diff --git a/packages/@ant/computer-use-mcp/package.json b/packages/@ant/computer-use-mcp/package.json index 00c8f927d..93939f47b 100644 --- a/packages/@ant/computer-use-mcp/package.json +++ b/packages/@ant/computer-use-mcp/package.json @@ -1,13 +1,13 @@ { - "name": "@ant/computer-use-mcp", - "version": "1.0.0", - "private": true, - "type": "module", - "main": "./src/index.ts", - "types": "./src/index.ts", - "exports": { - ".": "./src/index.ts", - "./sentinelApps": "./src/sentinelApps.ts", - "./types": "./src/types.ts" - } + "name": "@ant/computer-use-mcp", + "version": "1.0.0", + "private": true, + "type": "module", + "main": "./src/index.ts", + "types": "./src/index.ts", + "exports": { + ".": "./src/index.ts", + "./sentinelApps": "./src/sentinelApps.ts", + "./types": "./src/types.ts" + } } diff --git a/packages/@ant/computer-use-mcp/src/deniedApps.ts b/packages/@ant/computer-use-mcp/src/deniedApps.ts index 92f14e0b1..cb01095aa 100644 --- a/packages/@ant/computer-use-mcp/src/deniedApps.ts +++ b/packages/@ant/computer-use-mcp/src/deniedApps.ts @@ -31,7 +31,7 @@ * duplicated as a string literal below rather than imported. */ -export type DeniedCategory = "browser" | "terminal" | "trading"; +export type DeniedCategory = 'browser' | 'terminal' | 'trading' /** * Map a category to its hardcoded tier. Return-type is the string-literal @@ -44,54 +44,54 @@ export type DeniedCategory = "browser" | "terminal" | "trading"; */ export function categoryToTier( category: DeniedCategory | null, -): "read" | "click" | "full" { - if (category === "browser" || category === "trading") return "read"; - if (category === "terminal") return "click"; - return "full"; +): 'read' | 'click' | 'full' { + if (category === 'browser' || category === 'trading') return 'read' + if (category === 'terminal') return 'click' + return 'full' } // ─── Bundle-ID deny sets (macOS) ───────────────────────────────────────── const BROWSER_BUNDLE_IDS: ReadonlySet = new Set([ // Apple - "com.apple.Safari", - "com.apple.SafariTechnologyPreview", + 'com.apple.Safari', + 'com.apple.SafariTechnologyPreview', // Google - "com.google.Chrome", - "com.google.Chrome.beta", - "com.google.Chrome.dev", - "com.google.Chrome.canary", + 'com.google.Chrome', + 'com.google.Chrome.beta', + 'com.google.Chrome.dev', + 'com.google.Chrome.canary', // Microsoft - "com.microsoft.edgemac", - "com.microsoft.edgemac.Beta", - "com.microsoft.edgemac.Dev", - "com.microsoft.edgemac.Canary", + 'com.microsoft.edgemac', + 'com.microsoft.edgemac.Beta', + 'com.microsoft.edgemac.Dev', + 'com.microsoft.edgemac.Canary', // Mozilla - "org.mozilla.firefox", - "org.mozilla.firefoxdeveloperedition", - "org.mozilla.nightly", + 'org.mozilla.firefox', + 'org.mozilla.firefoxdeveloperedition', + 'org.mozilla.nightly', // Chromium-based - "org.chromium.Chromium", - "com.brave.Browser", - "com.brave.Browser.beta", - "com.brave.Browser.nightly", - "com.operasoftware.Opera", - "com.operasoftware.OperaGX", - "com.operasoftware.OperaDeveloper", - "com.vivaldi.Vivaldi", + 'org.chromium.Chromium', + 'com.brave.Browser', + 'com.brave.Browser.beta', + 'com.brave.Browser.nightly', + 'com.operasoftware.Opera', + 'com.operasoftware.OperaGX', + 'com.operasoftware.OperaDeveloper', + 'com.vivaldi.Vivaldi', // The Browser Company - "company.thebrowser.Browser", // Arc - "company.thebrowser.dia", // Dia (agentic) + 'company.thebrowser.Browser', // Arc + 'company.thebrowser.dia', // Dia (agentic) // Privacy-focused - "org.torproject.torbrowser", - "com.duckduckgo.macos.browser", - "ru.yandex.desktop.yandex-browser", + 'org.torproject.torbrowser', + 'com.duckduckgo.macos.browser', + 'ru.yandex.desktop.yandex-browser', // Agentic / AI browsers — newer entrants with LLM integrations - "ai.perplexity.comet", - "com.sigmaos.sigmaos.macos", // SigmaOS + 'ai.perplexity.comet', + 'com.sigmaos.sigmaos.macos', // SigmaOS // Webkit-based misc - "com.kagi.kagimacOS", // Orion -]); + 'com.kagi.kagimacOS', // Orion +]) /** * Terminals + IDEs with integrated terminals. Supersets @@ -101,66 +101,66 @@ const BROWSER_BUNDLE_IDS: ReadonlySet = new Set([ */ const TERMINAL_BUNDLE_IDS: ReadonlySet = new Set([ // Dedicated terminals - "com.apple.Terminal", - "com.googlecode.iterm2", - "dev.warp.Warp-Stable", - "dev.warp.Warp-Beta", - "com.github.wez.wezterm", - "org.alacritty", - "io.alacritty", // pre-v0.11.0 (renamed 2022-07) — kept for legacy installs - "net.kovidgoyal.kitty", - "co.zeit.hyper", - "com.mitchellh.ghostty", - "org.tabby", - "com.termius-dmg.mac", // Termius + 'com.apple.Terminal', + 'com.googlecode.iterm2', + 'dev.warp.Warp-Stable', + 'dev.warp.Warp-Beta', + 'com.github.wez.wezterm', + 'org.alacritty', + 'io.alacritty', // pre-v0.11.0 (renamed 2022-07) — kept for legacy installs + 'net.kovidgoyal.kitty', + 'co.zeit.hyper', + 'com.mitchellh.ghostty', + 'org.tabby', + 'com.termius-dmg.mac', // Termius // IDEs with integrated terminals — we can't distinguish "type in the // editor" from "type in the integrated terminal" via screenshot+click. // VS Code family - "com.microsoft.VSCode", - "com.microsoft.VSCodeInsiders", - "com.vscodium", // VSCodium - "com.todesktop.230313mzl4w4u92", // Cursor - "com.exafunction.windsurf", // Windsurf / Codeium - "dev.zed.Zed", - "dev.zed.Zed-Preview", + 'com.microsoft.VSCode', + 'com.microsoft.VSCodeInsiders', + 'com.vscodium', // VSCodium + 'com.todesktop.230313mzl4w4u92', // Cursor + 'com.exafunction.windsurf', // Windsurf / Codeium + 'dev.zed.Zed', + 'dev.zed.Zed-Preview', // JetBrains family (all have integrated terminals) - "com.jetbrains.intellij", - "com.jetbrains.intellij.ce", - "com.jetbrains.pycharm", - "com.jetbrains.pycharm.ce", - "com.jetbrains.WebStorm", - "com.jetbrains.CLion", - "com.jetbrains.goland", - "com.jetbrains.rubymine", - "com.jetbrains.PhpStorm", - "com.jetbrains.datagrip", - "com.jetbrains.rider", - "com.jetbrains.AppCode", - "com.jetbrains.rustrover", - "com.jetbrains.fleet", - "com.google.android.studio", // Android Studio (JetBrains-based) + 'com.jetbrains.intellij', + 'com.jetbrains.intellij.ce', + 'com.jetbrains.pycharm', + 'com.jetbrains.pycharm.ce', + 'com.jetbrains.WebStorm', + 'com.jetbrains.CLion', + 'com.jetbrains.goland', + 'com.jetbrains.rubymine', + 'com.jetbrains.PhpStorm', + 'com.jetbrains.datagrip', + 'com.jetbrains.rider', + 'com.jetbrains.AppCode', + 'com.jetbrains.rustrover', + 'com.jetbrains.fleet', + 'com.google.android.studio', // Android Studio (JetBrains-based) // Other IDEs - "com.axosoft.gitkraken", // GitKraken has an integrated terminal panel. Also keeps the "kraken" trading-substring from miscategorizing it — bundle-ID wins. - "com.sublimetext.4", - "com.sublimetext.3", - "org.vim.MacVim", - "com.neovim.neovim", - "org.gnu.Emacs", + 'com.axosoft.gitkraken', // GitKraken has an integrated terminal panel. Also keeps the "kraken" trading-substring from miscategorizing it — bundle-ID wins. + 'com.sublimetext.4', + 'com.sublimetext.3', + 'org.vim.MacVim', + 'com.neovim.neovim', + 'org.gnu.Emacs', // Xcode's previous carve-out (full tier for Interface Builder / simulator) // was reversed — at tier "click" IB and simulator taps still work (both are // plain clicks) while the integrated terminal is blocked from keyboard input. - "com.apple.dt.Xcode", - "org.eclipse.platform.ide", - "org.netbeans.ide", - "com.microsoft.visual-studio", // Visual Studio for Mac + 'com.apple.dt.Xcode', + 'org.eclipse.platform.ide', + 'org.netbeans.ide', + 'com.microsoft.visual-studio', // Visual Studio for Mac // AppleScript/automation execution surfaces — same threat as terminals: // type(script) → key("cmd+r") runs arbitrary code. Added after #28011 // removed the osascript MCP server, making CU the only tool-call route // to AppleScript. - "com.apple.ScriptEditor2", - "com.apple.Automator", - "com.apple.shortcuts", -]); + 'com.apple.ScriptEditor2', + 'com.apple.Automator', + 'com.apple.shortcuts', +]) /** * Trading / crypto platforms — granted at tier `"read"` so the agent can see @@ -178,29 +178,29 @@ const TERMINAL_BUNDLE_IDS: ReadonlySet = new Set([ const TRADING_BUNDLE_IDS: ReadonlySet = new Set([ // Verified via Homebrew quit/zap stanzas + mdls + electron-builder source. // Trading - "com.webull.desktop.v1", // Webull (direct download, Qt) - "com.webull.trade.mac.v1", // Webull (Mac App Store) - "com.tastytrade.desktop", - "com.tradingview.tradingviewapp.desktop", - "com.fidelity.activetrader", // Fidelity Trader+ (new) - "com.fmr.activetrader", // Fidelity Active Trader Pro (legacy) + 'com.webull.desktop.v1', // Webull (direct download, Qt) + 'com.webull.trade.mac.v1', // Webull (Mac App Store) + 'com.tastytrade.desktop', + 'com.tradingview.tradingviewapp.desktop', + 'com.fidelity.activetrader', // Fidelity Trader+ (new) + 'com.fmr.activetrader', // Fidelity Active Trader Pro (legacy) // Interactive Brokers TWS — install4j wrapper; Homebrew quit stanza is // authoritative for this exact value but install4j IDs can drift across // major versions — name-substring "trader workstation" is the fallback. - "com.install4j.5889-6375-8446-2021", + 'com.install4j.5889-6375-8446-2021', // Crypto - "com.binance.BinanceDesktop", - "com.electron.exodus", + 'com.binance.BinanceDesktop', + 'com.electron.exodus', // Electrum uses PyInstaller with bundle_identifier=None → defaults to // org.pythonmac.unspecified.. Confirmed in spesmilo/electrum // source + Homebrew zap. IntuneBrew's "org.electrum.electrum" is a fork. - "org.pythonmac.unspecified.Electrum", - "com.ledger.live", - "io.trezor.TrezorSuite", + 'org.pythonmac.unspecified.Electrum', + 'com.ledger.live', + 'io.trezor.TrezorSuite', // No native macOS app (name-substring only): Schwab, E*TRADE, TradeStation, // Robinhood, NinjaTrader, Coinbase, Kraken, Bloomberg. thinkorswim // install4j ID drifts per-install — substring safer. -]); +]) // ─── Policy-deny (not a tier — cannot be granted at all) ───────────────── // @@ -215,78 +215,78 @@ const TRADING_BUNDLE_IDS: ReadonlySet = new Set([ const POLICY_DENIED_BUNDLE_IDS: ReadonlySet = new Set([ // Verified via Homebrew quit/zap + mdls /System/Applications + IntuneBrew. // Apple built-ins - "com.apple.TV", - "com.apple.Music", - "com.apple.iBooksX", - "com.apple.podcasts", + 'com.apple.TV', + 'com.apple.Music', + 'com.apple.iBooksX', + 'com.apple.podcasts', // Music - "com.spotify.client", - "com.amazon.music", - "com.tidal.desktop", - "com.deezer.deezer-desktop", - "com.pandora.desktop", - "com.electron.pocket-casts", // direct-download Electron wrapper - "au.com.shiftyjelly.PocketCasts", // Mac App Store + 'com.spotify.client', + 'com.amazon.music', + 'com.tidal.desktop', + 'com.deezer.deezer-desktop', + 'com.pandora.desktop', + 'com.electron.pocket-casts', // direct-download Electron wrapper + 'au.com.shiftyjelly.PocketCasts', // Mac App Store // Video - "tv.plex.desktop", - "tv.plex.htpc", - "tv.plex.plexamp", - "com.amazon.aiv.AIVApp", // Prime Video (iOS-on-Apple-Silicon) + 'tv.plex.desktop', + 'tv.plex.htpc', + 'tv.plex.plexamp', + 'com.amazon.aiv.AIVApp', // Prime Video (iOS-on-Apple-Silicon) // Ebooks - "net.kovidgoyal.calibre", - "com.amazon.Kindle", // legacy desktop, discontinued - "com.amazon.Lassen", // current Mac App Store (iOS-on-Mac) - "com.kobo.desktop.Kobo", + 'net.kovidgoyal.calibre', + 'com.amazon.Kindle', // legacy desktop, discontinued + 'com.amazon.Lassen', // current Mac App Store (iOS-on-Mac) + 'com.kobo.desktop.Kobo', // No native macOS app (name-substring only): Netflix, Disney+, Hulu, // HBO Max, Peacock, Paramount+, YouTube, Crunchyroll, Tubi, Vudu, // Audible, Reddit, NYTimes. Their iOS apps don't opt into iPad-on-Mac. -]); +]) const POLICY_DENIED_NAME_SUBSTRINGS: readonly string[] = [ // Video streaming - "netflix", - "disney+", - "hulu", - "prime video", - "apple tv", - "peacock", - "paramount+", + 'netflix', + 'disney+', + 'hulu', + 'prime video', + 'apple tv', + 'peacock', + 'paramount+', // "plex" is too generic — would match "Perplexity". Covered by // tv.plex.* bundle IDs on macOS. - "tubi", - "crunchyroll", - "vudu", + 'tubi', + 'crunchyroll', + 'vudu', // E-readers / audiobooks - "kindle", - "apple books", - "kobo", - "play books", - "calibre", - "libby", - "readium", - "audible", - "libro.fm", - "speechify", + 'kindle', + 'apple books', + 'kobo', + 'play books', + 'calibre', + 'libby', + 'readium', + 'audible', + 'libro.fm', + 'speechify', // Music - "spotify", - "apple music", - "amazon music", - "youtube music", - "tidal", - "deezer", - "pandora", - "pocket casts", + 'spotify', + 'apple music', + 'amazon music', + 'youtube music', + 'tidal', + 'deezer', + 'pandora', + 'pocket casts', // Publisher / social apps (from the same blocklist tab) - "naver", - "reddit", - "sony music", - "vegas pro", - "pitchfork", - "economist", - "nytimes", + 'naver', + 'reddit', + 'sony music', + 'vegas pro', + 'pitchfork', + 'economist', + 'nytimes', // Skipped (too generic for substring matching — need bundle ID): // HBO Max / Max, YouTube (non-Music), Nook, Sony Catalyst, Wired -]; +] /** * Policy-level auto-deny. Unlike `userDeniedBundleIds` (per-user Settings @@ -298,19 +298,19 @@ export function isPolicyDenied( bundleId: string | undefined, displayName: string, ): boolean { - if (bundleId && POLICY_DENIED_BUNDLE_IDS.has(bundleId)) return true; - const lower = displayName.toLowerCase(); + if (bundleId && POLICY_DENIED_BUNDLE_IDS.has(bundleId)) return true + const lower = displayName.toLowerCase() for (const sub of POLICY_DENIED_NAME_SUBSTRINGS) { - if (lower.includes(sub)) return true; + if (lower.includes(sub)) return true } - return false; + return false } export function getDeniedCategory(bundleId: string): DeniedCategory | null { - if (BROWSER_BUNDLE_IDS.has(bundleId)) return "browser"; - if (TERMINAL_BUNDLE_IDS.has(bundleId)) return "terminal"; - if (TRADING_BUNDLE_IDS.has(bundleId)) return "trading"; - return null; + if (BROWSER_BUNDLE_IDS.has(bundleId)) return 'browser' + if (TERMINAL_BUNDLE_IDS.has(bundleId)) return 'terminal' + if (TRADING_BUNDLE_IDS.has(bundleId)) return 'trading' + return null } // ─── Display-name fallback (cross-platform) ────────────────────────────── @@ -325,160 +325,160 @@ export function getDeniedCategory(bundleId: string): DeniedCategory | null { * first match, but groupings are by category for readability). */ const BROWSER_NAME_SUBSTRINGS: readonly string[] = [ - "safari", - "chrome", - "firefox", - "microsoft edge", - "brave", - "opera", - "vivaldi", - "chromium", + 'safari', + 'chrome', + 'firefox', + 'microsoft edge', + 'brave', + 'opera', + 'vivaldi', + 'chromium', // Arc/Dia: the canonical display name is just "Arc"/"Dia" — too short for // substring matching (false-positives: "Arcade", "Diagram"). Covered by // bundle ID on macOS. The "... browser" entries below catch natural-language // phrasings ("the arc browser") but NOT the canonical short name. - "arc browser", - "tor browser", - "duckduckgo", - "yandex", - "orion browser", + 'arc browser', + 'tor browser', + 'duckduckgo', + 'yandex', + 'orion browser', // Agentic / AI browsers - "comet", // Perplexity's browser — "Comet" substring risks false positives + 'comet', // Perplexity's browser — "Comet" substring risks false positives // but leaving for now; "comet" in an app name is rare - "sigmaos", - "dia browser", -]; + 'sigmaos', + 'dia browser', +] const TERMINAL_NAME_SUBSTRINGS: readonly string[] = [ // macOS / cross-platform terminals - "terminal", // catches Terminal, Windows Terminal (NOT iTerm — separate entry) - "iterm", - "wezterm", - "alacritty", - "kitty", - "ghostty", - "tabby", - "termius", + 'terminal', // catches Terminal, Windows Terminal (NOT iTerm — separate entry) + 'iterm', + 'wezterm', + 'alacritty', + 'kitty', + 'ghostty', + 'tabby', + 'termius', // AppleScript runners — see bundle-ID comment above. "shortcuts" is too // generic for substring matching (many apps have "shortcuts" in the name); // covered by bundle ID only, like warp/hyper. - "script editor", - "automator", + 'script editor', + 'automator', // NOTE: "warp" and "hyper" are too generic for substring matching — // they'd false-positive on "Warpaint" or "Hyperion". Covered by bundle ID // (dev.warp.Warp-Stable, co.zeit.hyper) for macOS; Windows exe-name // matching can be added when Windows CU ships. // Windows shells (activate when the darwin gate lifts) - "powershell", - "cmd.exe", - "command prompt", - "git bash", - "conemu", - "cmder", + 'powershell', + 'cmd.exe', + 'command prompt', + 'git bash', + 'conemu', + 'cmder', // IDEs (VS Code family) - "visual studio code", - "visual studio", // catches VS for Mac + Windows - "vscode", - "vs code", - "vscodium", - "cursor", // Cursor IDE — "cursor" is generic but IDE is the only common app - "windsurf", + 'visual studio code', + 'visual studio', // catches VS for Mac + Windows + 'vscode', + 'vs code', + 'vscodium', + 'cursor', // Cursor IDE — "cursor" is generic but IDE is the only common app + 'windsurf', // Zed: display name is just "Zed" — too short for substring matching // (false-positives). Covered by bundle ID (dev.zed.Zed) on macOS. // IDEs (JetBrains family) - "intellij", - "pycharm", - "webstorm", - "clion", - "goland", - "rubymine", - "phpstorm", - "datagrip", - "rider", - "appcode", - "rustrover", - "fleet", - "android studio", + 'intellij', + 'pycharm', + 'webstorm', + 'clion', + 'goland', + 'rubymine', + 'phpstorm', + 'datagrip', + 'rider', + 'appcode', + 'rustrover', + 'fleet', + 'android studio', // Other IDEs - "sublime text", - "macvim", - "neovim", - "emacs", - "xcode", - "eclipse", - "netbeans", -]; + 'sublime text', + 'macvim', + 'neovim', + 'emacs', + 'xcode', + 'eclipse', + 'netbeans', +] const TRADING_NAME_SUBSTRINGS: readonly string[] = [ // Trading — brokerage apps. Sourced from the ACP CU-apps blocklist xlsx // ("Read Only" tab). Name-substring safe for proper nouns below; generic // names (IG, Delta, HTX) are skipped and need bundle-ID matching once // verified. - "bloomberg", - "ameritrade", - "thinkorswim", - "schwab", - "fidelity", - "e*trade", - "interactive brokers", - "trader workstation", // Interactive Brokers TWS - "tradestation", - "webull", - "robinhood", - "tastytrade", - "ninjatrader", - "tradingview", - "moomoo", - "tradezero", - "prorealtime", - "plus500", - "saxotrader", - "oanda", - "metatrader", - "forex.com", - "avaoptions", - "ctrader", - "jforex", - "iq option", - "olymp trade", - "binomo", - "pocket option", - "raceoption", - "expertoption", - "quotex", - "naga", - "morgan stanley", - "ubs neo", - "eikon", // Thomson Reuters / LSEG Workspace + 'bloomberg', + 'ameritrade', + 'thinkorswim', + 'schwab', + 'fidelity', + 'e*trade', + 'interactive brokers', + 'trader workstation', // Interactive Brokers TWS + 'tradestation', + 'webull', + 'robinhood', + 'tastytrade', + 'ninjatrader', + 'tradingview', + 'moomoo', + 'tradezero', + 'prorealtime', + 'plus500', + 'saxotrader', + 'oanda', + 'metatrader', + 'forex.com', + 'avaoptions', + 'ctrader', + 'jforex', + 'iq option', + 'olymp trade', + 'binomo', + 'pocket option', + 'raceoption', + 'expertoption', + 'quotex', + 'naga', + 'morgan stanley', + 'ubs neo', + 'eikon', // Thomson Reuters / LSEG Workspace // Crypto — exchanges, wallets, portfolio trackers - "coinbase", - "kraken", - "binance", - "okx", - "bybit", + 'coinbase', + 'kraken', + 'binance', + 'okx', + 'bybit', // "gate.io" is too generic — the ".io" TLD suffix is common in app names // (e.g., "Draw.io"). Needs bundle-ID matching once verified. - "phemex", - "stormgain", - "crypto.com", + 'phemex', + 'stormgain', + 'crypto.com', // "exodus" is too generic — it's a common noun and would match unrelated // apps/games. Needs bundle-ID matching once verified. - "electrum", - "ledger live", - "trezor", - "guarda", - "atomic wallet", - "bitpay", - "bisq", - "koinly", - "cointracker", - "blockfi", - "stripe cli", + 'electrum', + 'ledger live', + 'trezor', + 'guarda', + 'atomic wallet', + 'bitpay', + 'bisq', + 'koinly', + 'cointracker', + 'blockfi', + 'stripe cli', // Crypto games / metaverse (same trade-execution risk model) - "decentraland", - "axie infinity", - "gods unchained", -]; + 'decentraland', + 'axie infinity', + 'gods unchained', +] /** * Display-name substring match. Called when bundle-ID resolution returned @@ -491,20 +491,20 @@ const TRADING_NAME_SUBSTRINGS: readonly string[] = [ export function getDeniedCategoryByDisplayName( name: string, ): DeniedCategory | null { - const lower = name.toLowerCase(); + const lower = name.toLowerCase() // Trading first — proper-noun-only set, most specific. "Bloomberg Terminal" // contains "terminal" and would miscategorize if TERMINAL_NAME_SUBSTRINGS // ran first. for (const sub of TRADING_NAME_SUBSTRINGS) { - if (lower.includes(sub)) return "trading"; + if (lower.includes(sub)) return 'trading' } for (const sub of BROWSER_NAME_SUBSTRINGS) { - if (lower.includes(sub)) return "browser"; + if (lower.includes(sub)) return 'browser' } for (const sub of TERMINAL_NAME_SUBSTRINGS) { - if (lower.includes(sub)) return "terminal"; + if (lower.includes(sub)) return 'terminal' } - return null; + return null } /** @@ -520,10 +520,10 @@ export function getDeniedCategoryForApp( displayName: string, ): DeniedCategory | null { if (bundleId) { - const byId = getDeniedCategory(bundleId); - if (byId) return byId; + const byId = getDeniedCategory(bundleId) + if (byId) return byId } - return getDeniedCategoryByDisplayName(displayName); + return getDeniedCategoryByDisplayName(displayName) } /** @@ -537,8 +537,8 @@ export function getDeniedCategoryForApp( export function getDefaultTierForApp( bundleId: string | undefined, displayName: string, -): "read" | "click" | "full" { - return categoryToTier(getDeniedCategoryForApp(bundleId, displayName)); +): 'read' | 'click' | 'full' { + return categoryToTier(getDeniedCategoryForApp(bundleId, displayName)) } export const _test = { @@ -550,4 +550,4 @@ export const _test = { TERMINAL_NAME_SUBSTRINGS, TRADING_NAME_SUBSTRINGS, POLICY_DENIED_NAME_SUBSTRINGS, -}; +} diff --git a/packages/@ant/computer-use-mcp/src/executor.ts b/packages/@ant/computer-use-mcp/src/executor.ts index 5b070298b..15df0706f 100644 --- a/packages/@ant/computer-use-mcp/src/executor.ts +++ b/packages/@ant/computer-use-mcp/src/executor.ts @@ -116,9 +116,17 @@ export interface ComputerExecutor { // ── Window management (Windows only, optional) ────────────────────────── /** Perform a window management action on the bound window. Win32 API only — no global shortcuts. */ - manageWindow?(action: string, opts?: { x?: number; y?: number; width?: number; height?: number }): Promise + manageWindow?( + action: string, + opts?: { x?: number; y?: number; width?: number; height?: number }, + ): Promise /** Get the current window rect of the bound window */ - getWindowRect?(): Promise<{ x: number; y: number; width: number; height: number } | null> + getWindowRect?(): Promise<{ + x: number + y: number + width: number + height: number + } | null> // ── Element-targeted actions (Windows UIA, optional) ──────────────────── /** Open terminal and launch an agent CLI */ @@ -129,17 +137,32 @@ export interface ComputerExecutor { workingDirectory?: string }): Promise<{ hwnd: string; title: string; launched: boolean } | null> /** Bind to a window by hwnd/title/pid. Returns bound window info or null. */ - bindToWindow?(query: { hwnd?: string; title?: string; pid?: number }): Promise<{ hwnd: string; title: string; pid: number } | null> + bindToWindow?(query: { + hwnd?: string + title?: string + pid?: number + }): Promise<{ hwnd: string; title: string; pid: number } | null> /** Unbind from the current window */ unbindFromWindow?(): Promise /** Cheap binding-state check for window-targeted routing decisions. */ hasBoundWindow?(): Promise /** Get current binding status */ - getBindingStatus?(): Promise<{ bound: boolean; hwnd?: string; title?: string; pid?: number; rect?: { x: number; y: number; width: number; height: number } } | null> + getBindingStatus?(): Promise<{ + bound: boolean + hwnd?: string + title?: string + pid?: number + rect?: { x: number; y: number; width: number; height: number } + } | null> /** List all visible windows */ - listVisibleWindows?(): Promise> + listVisibleWindows?(): Promise< + Array<{ hwnd: string; pid: number; title: string }> + > /** Control the status indicator overlay */ - statusIndicator?(action: 'show' | 'hide' | 'status', message?: string): Promise<{ active: boolean; message?: string }> + statusIndicator?( + action: 'show' | 'hide' | 'status', + message?: string, + ): Promise<{ active: boolean; message?: string }> /** Virtual keyboard — send keys/text/combos to bound window only */ virtualKeyboard?(opts: { action: 'type' | 'combo' | 'press' | 'release' | 'hold' @@ -149,12 +172,26 @@ export interface ComputerExecutor { }): Promise /** Virtual mouse — click/move/drag on bound window only */ virtualMouse?(opts: { - action: 'click' | 'double_click' | 'right_click' | 'move' | 'drag' | 'down' | 'up' - x: number; y: number - startX?: number; startY?: number + action: + | 'click' + | 'double_click' + | 'right_click' + | 'move' + | 'drag' + | 'down' + | 'up' + x: number + y: number + startX?: number + startY?: number }): Promise /** Mouse wheel scroll at client coordinates (works on Excel, browsers, modern UI) */ - mouseWheel?(x: number, y: number, delta: number, horizontal?: boolean): Promise + mouseWheel?( + x: number, + y: number, + delta: number, + horizontal?: boolean, + ): Promise /** Activate the bound window (foreground + click to focus) */ activateWindow?(clickX?: number, clickY?: number): Promise /** Handle a terminal prompt (yes/no/select/type + enter) */ @@ -165,7 +202,14 @@ export interface ComputerExecutor { text?: string }): Promise /** Click an element by name/role/automationId via UI Automation */ - clickElement?(query: { name?: string; role?: string; automationId?: string }): Promise + clickElement?(query: { + name?: string + role?: string + automationId?: string + }): Promise /** Type text into an element by name/role/automationId via UI Automation ValuePattern */ - typeIntoElement?(query: { name?: string; role?: string; automationId?: string }, text: string): Promise + typeIntoElement?( + query: { name?: string; role?: string; automationId?: string }, + text: string, + ): Promise } diff --git a/packages/@ant/computer-use-mcp/src/imageResize.ts b/packages/@ant/computer-use-mcp/src/imageResize.ts index fc529714c..8d25ded85 100644 --- a/packages/@ant/computer-use-mcp/src/imageResize.ts +++ b/packages/@ant/computer-use-mcp/src/imageResize.ts @@ -13,9 +13,9 @@ */ export interface ResizeParams { - pxPerToken: number; - maxTargetPx: number; - maxTargetTokens: number; + pxPerToken: number + maxTargetPx: number + maxTargetTokens: number } /** @@ -27,11 +27,11 @@ export const API_RESIZE_PARAMS: ResizeParams = { pxPerToken: 28, maxTargetPx: 1568, maxTargetTokens: 1568, -}; +} /** ceil(px / pxPerToken). Matches resize.rs:74-76 (which uses integer ceil-div). */ export function nTokensForPx(px: number, pxPerToken: number): number { - return Math.floor((px - 1) / pxPerToken) + 1; + return Math.floor((px - 1) / pxPerToken) + 1 } function nTokensForImg( @@ -39,7 +39,7 @@ function nTokensForImg( height: number, pxPerToken: number, ): number { - return nTokensForPx(width, pxPerToken) * nTokensForPx(height, pxPerToken); + return nTokensForPx(width, pxPerToken) * nTokensForPx(height, pxPerToken) } /** @@ -62,47 +62,47 @@ export function targetImageSize( height: number, params: ResizeParams, ): [number, number] { - const { pxPerToken, maxTargetPx, maxTargetTokens } = params; + const { pxPerToken, maxTargetPx, maxTargetTokens } = params if ( width <= maxTargetPx && height <= maxTargetPx && nTokensForImg(width, height, pxPerToken) <= maxTargetTokens ) { - return [width, height]; + return [width, height] } // Normalize to landscape for the search; transpose result back. if (height > width) { - const [w, h] = targetImageSize(height, width, params); - return [h, w]; + const [w, h] = targetImageSize(height, width, params) + return [h, w] } - const aspectRatio = width / height; + const aspectRatio = width / height // Loop invariant: lowerBoundWidth is always valid, upperBoundWidth is // always invalid. ~12 iterations for a 4000px image. - let upperBoundWidth = width; - let lowerBoundWidth = 1; + let upperBoundWidth = width + let lowerBoundWidth = 1 for (;;) { if (lowerBoundWidth + 1 === upperBoundWidth) { return [ lowerBoundWidth, Math.max(Math.round(lowerBoundWidth / aspectRatio), 1), - ]; + ] } - const middleWidth = Math.floor((lowerBoundWidth + upperBoundWidth) / 2); - const middleHeight = Math.max(Math.round(middleWidth / aspectRatio), 1); + const middleWidth = Math.floor((lowerBoundWidth + upperBoundWidth) / 2) + const middleHeight = Math.max(Math.round(middleWidth / aspectRatio), 1) if ( middleWidth <= maxTargetPx && nTokensForImg(middleWidth, middleHeight, pxPerToken) <= maxTargetTokens ) { - lowerBoundWidth = middleWidth; + lowerBoundWidth = middleWidth } else { - upperBoundWidth = middleWidth; + upperBoundWidth = middleWidth } } } diff --git a/packages/@ant/computer-use-mcp/src/index.ts b/packages/@ant/computer-use-mcp/src/index.ts index 1e012cb2d..d55e026c6 100644 --- a/packages/@ant/computer-use-mcp/src/index.ts +++ b/packages/@ant/computer-use-mcp/src/index.ts @@ -6,7 +6,7 @@ export type { ResolvePrepareCaptureResult, RunningApp, ScreenshotResult, -} from "./executor.js"; +} from './executor.js' export type { AppGrant, @@ -25,15 +25,15 @@ export type { ScreenshotDims, TeachStepRequest, TeachStepResult, -} from "./types.js"; +} from './types.js' -export { DEFAULT_GRANT_FLAGS } from "./types.js"; +export { DEFAULT_GRANT_FLAGS } from './types.js' export { SENTINEL_BUNDLE_IDS, getSentinelCategory, -} from "./sentinelApps.js"; -export type { SentinelCategory } from "./sentinelApps.js"; +} from './sentinelApps.js' +export type { SentinelCategory } from './sentinelApps.js' export { categoryToTier, @@ -42,28 +42,28 @@ export { getDeniedCategoryByDisplayName, getDeniedCategoryForApp, isPolicyDenied, -} from "./deniedApps.js"; -export type { DeniedCategory } from "./deniedApps.js"; +} from './deniedApps.js' +export type { DeniedCategory } from './deniedApps.js' -export { isSystemKeyCombo, normalizeKeySequence } from "./keyBlocklist.js"; +export { isSystemKeyCombo, normalizeKeySequence } from './keyBlocklist.js' -export { ALL_SUB_GATES_OFF, ALL_SUB_GATES_ON } from "./subGates.js"; +export { ALL_SUB_GATES_OFF, ALL_SUB_GATES_ON } from './subGates.js' -export { API_RESIZE_PARAMS, targetImageSize } from "./imageResize.js"; -export type { ResizeParams } from "./imageResize.js"; +export { API_RESIZE_PARAMS, targetImageSize } from './imageResize.js' +export type { ResizeParams } from './imageResize.js' -export { defersLockAcquire, handleToolCall } from "./toolCalls.js"; +export { defersLockAcquire, handleToolCall } from './toolCalls.js' export type { CuCallTelemetry, CuCallToolResult, CuErrorKind, -} from "./toolCalls.js"; +} from './toolCalls.js' -export { bindSessionContext, createComputerUseMcpServer } from "./mcpServer.js"; -export { buildComputerUseTools } from "./tools.js"; +export { bindSessionContext, createComputerUseMcpServer } from './mcpServer.js' +export { buildComputerUseTools } from './tools.js' export { comparePixelAtLocation, validateClickTarget, -} from "./pixelCompare.js"; -export type { CropRawPatchFn, PixelCompareResult } from "./pixelCompare.js"; +} from './pixelCompare.js' +export type { CropRawPatchFn, PixelCompareResult } from './pixelCompare.js' diff --git a/packages/@ant/computer-use-mcp/src/keyBlocklist.ts b/packages/@ant/computer-use-mcp/src/keyBlocklist.ts index 1373e1506..b530dbaa4 100644 --- a/packages/@ant/computer-use-mcp/src/keyBlocklist.ts +++ b/packages/@ant/computer-use-mcp/src/keyBlocklist.ts @@ -21,32 +21,32 @@ */ const CANONICAL_MODIFIER: Readonly> = { // Key::Meta — "meta"|"super"|"command"|"cmd"|"windows"|"win" - meta: "meta", - super: "meta", - command: "meta", - cmd: "meta", - windows: "meta", - win: "meta", + meta: 'meta', + super: 'meta', + command: 'meta', + cmd: 'meta', + windows: 'meta', + win: 'meta', // Key::Control + LControl + RControl - ctrl: "ctrl", - control: "ctrl", - lctrl: "ctrl", - lcontrol: "ctrl", - rctrl: "ctrl", - rcontrol: "ctrl", + ctrl: 'ctrl', + control: 'ctrl', + lctrl: 'ctrl', + lcontrol: 'ctrl', + rctrl: 'ctrl', + rcontrol: 'ctrl', // Key::Shift + LShift + RShift - shift: "shift", - lshift: "shift", - rshift: "shift", + shift: 'shift', + lshift: 'shift', + rshift: 'shift', // Key::Alt and Key::Option — distinct Rust variants but same keycode on // darwin (kVK_Option). Collapse: cmd+alt+escape and cmd+option+escape // both Force Quit. - alt: "alt", - option: "alt", -}; + alt: 'alt', + option: 'alt', +} /** Sort order for canonicals. ctrl < alt < shift < meta. */ -const MODIFIER_ORDER = ["ctrl", "alt", "shift", "meta"]; +const MODIFIER_ORDER = ['ctrl', 'alt', 'shift', 'meta'] /** * Canonical-form entries only. Every modifier must be a CANONICAL_MODIFIER @@ -54,21 +54,21 @@ const MODIFIER_ORDER = ["ctrl", "alt", "shift", "meta"]; * The self-consistency test enforces this. */ const BLOCKED_DARWIN = new Set([ - "meta+q", // Cmd+Q — quit frontmost app - "shift+meta+q", // Cmd+Shift+Q — log out - "alt+meta+escape", // Cmd+Option+Esc — Force Quit dialog - "meta+tab", // Cmd+Tab — app switcher - "meta+space", // Cmd+Space — Spotlight - "ctrl+meta+q", // Ctrl+Cmd+Q — lock screen -]); + 'meta+q', // Cmd+Q — quit frontmost app + 'shift+meta+q', // Cmd+Shift+Q — log out + 'alt+meta+escape', // Cmd+Option+Esc — Force Quit dialog + 'meta+tab', // Cmd+Tab — app switcher + 'meta+space', // Cmd+Space — Spotlight + 'ctrl+meta+q', // Ctrl+Cmd+Q — lock screen +]) const BLOCKED_WIN32 = new Set([ - "ctrl+alt+delete", // Secure Attention Sequence - "alt+f4", // close window - "alt+tab", // window switcher - "meta+l", // Win+L — lock - "meta+d", // Win+D — show desktop -]); + 'ctrl+alt+delete', // Secure Attention Sequence + 'alt+f4', // close window + 'alt+tab', // window switcher + 'meta+l', // Win+L — lock + 'meta+d', // Win+D — show desktop +]) /** * Partition into sorted-canonical modifiers and non-modifier keys. @@ -78,25 +78,25 @@ const BLOCKED_WIN32 = new Set([ function partitionKeys(seq: string): { mods: string[]; keys: string[] } { const parts = seq .toLowerCase() - .split("+") - .map((p) => p.trim()) - .filter(Boolean); - const mods: string[] = []; - const keys: string[] = []; + .split('+') + .map(p => p.trim()) + .filter(Boolean) + const mods: string[] = [] + const keys: string[] = [] for (const p of parts) { - const canonical = CANONICAL_MODIFIER[p]; + const canonical = CANONICAL_MODIFIER[p] if (canonical !== undefined) { - mods.push(canonical); + mods.push(canonical) } else { - keys.push(p); + keys.push(p) } } // Dedupe: "cmd+command+q" → "meta+q", not "meta+meta+q". - const uniqueMods = [...new Set(mods)]; + const uniqueMods = [...new Set(mods)] uniqueMods.sort( (a, b) => MODIFIER_ORDER.indexOf(a) - MODIFIER_ORDER.indexOf(b), - ); - return { mods: uniqueMods, keys }; + ) + return { mods: uniqueMods, keys } } /** @@ -104,8 +104,8 @@ function partitionKeys(seq: string): { mods: string[]; keys: string[] } { * canonical, dedupe, sort modifiers, non-modifiers last. */ export function normalizeKeySequence(seq: string): string { - const { mods, keys } = partitionKeys(seq); - return [...mods, ...keys].join("+"); + const { mods, keys } = partitionKeys(seq) + return [...mods, ...keys].join('+') } /** @@ -123,26 +123,26 @@ export function normalizeKeySequence(seq: string): string { */ export function isSystemKeyCombo( seq: string, - platform: "darwin" | "win32", + platform: 'darwin' | 'win32', ): boolean { - const blocklist = platform === "darwin" ? BLOCKED_DARWIN : BLOCKED_WIN32; - const { mods, keys } = partitionKeys(seq); - const prefix = mods.length > 0 ? mods.join("+") + "+" : ""; + const blocklist = platform === 'darwin' ? BLOCKED_DARWIN : BLOCKED_WIN32 + const { mods, keys } = partitionKeys(seq) + const prefix = mods.length > 0 ? mods.join('+') + '+' : '' // No non-modifier keys (e.g. "cmd+shift" as click-modifiers) — check the // whole thing. Never matches (no blocklist entry is modifier-only) but // keeps the contract simple: every call reaches a .has(). if (keys.length === 0) { - return blocklist.has(mods.join("+")); + return blocklist.has(mods.join('+')) } // mods + each key. Any hit blocks the whole sequence. for (const key of keys) { if (blocklist.has(prefix + key)) { - return true; + return true } } - return false; + return false } export const _test = { @@ -150,4 +150,4 @@ export const _test = { BLOCKED_DARWIN, BLOCKED_WIN32, MODIFIER_ORDER, -}; +} diff --git a/packages/@ant/computer-use-mcp/src/mcpServer.ts b/packages/@ant/computer-use-mcp/src/mcpServer.ts index 4b1f0ca24..912ae2b54 100644 --- a/packages/@ant/computer-use-mcp/src/mcpServer.ts +++ b/packages/@ant/computer-use-mcp/src/mcpServer.ts @@ -17,21 +17,21 @@ * is the same either way. */ -import { Server } from "@modelcontextprotocol/sdk/server/index.js"; -import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js"; +import { Server } from '@modelcontextprotocol/sdk/server/index.js' +import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js' import { CallToolRequestSchema, ListToolsRequestSchema, -} from "@modelcontextprotocol/sdk/types.js"; +} from '@modelcontextprotocol/sdk/types.js' -import type { ScreenshotResult } from "./executor.js"; -import type { CuCallToolResult } from "./toolCalls.js"; +import type { ScreenshotResult } from './executor.js' +import type { CuCallToolResult } from './toolCalls.js' import { defersLockAcquire, handleToolCall, resetMouseButtonHeld, -} from "./toolCalls.js"; -import { buildComputerUseTools } from "./tools.js"; +} from './toolCalls.js' +import { buildComputerUseTools } from './tools.js' import type { AppGrant, ComputerUseHostAdapter, @@ -40,12 +40,12 @@ import type { CoordinateMode, CuGrantFlags, CuPermissionResponse, -} from "./types.js"; -import { DEFAULT_GRANT_FLAGS } from "./types.js"; +} from './types.js' +import { DEFAULT_GRANT_FLAGS } from './types.js' const DEFAULT_LOCK_HELD_MESSAGE = - "Another Claude session is currently using the computer. Wait for that " + - "session to finish, or find a non-computer-use approach."; + 'Another Claude session is currently using the computer. Wait for that ' + + 'session to finish, or find a non-computer-use approach.' /** * Dedupe `granted` into `existing` on bundleId, spread truthy-only flags over @@ -60,20 +60,20 @@ function mergePermissionResponse( existingFlags: CuGrantFlags, response: CuPermissionResponse, ): { apps: AppGrant[]; flags: CuGrantFlags } { - const seen = new Set(existing.map((a) => a.bundleId)); + const seen = new Set(existing.map(a => a.bundleId)) const apps = [ ...existing, - ...response.granted.filter((g) => !seen.has(g.bundleId)), - ]; + ...response.granted.filter(g => !seen.has(g.bundleId)), + ] const truthyFlags = Object.fromEntries( Object.entries(response.flags).filter(([, v]) => v === true), - ); + ) const flags: CuGrantFlags = { ...DEFAULT_GRANT_FLAGS, ...existingFlags, ...truthyFlags, - }; - return { apps, flags }; + } + return { apps, flags } } /** @@ -91,53 +91,53 @@ export function bindSessionContext( coordinateMode: CoordinateMode, ctx: ComputerUseSessionContext, ): (name: string, args: unknown) => Promise { - const { logger, serverName } = adapter; + const { logger, serverName } = adapter // Screenshot blob persists here across calls — NOT on `ctx`. Hosts hold // onto the returned dispatcher; that's the identity that matters. - let lastScreenshot: ScreenshotResult | undefined; + let lastScreenshot: ScreenshotResult | undefined const wrapPermission = ctx.onPermissionRequest ? async ( req: Parameters>[0], signal: AbortSignal, ): Promise => { - const response = await ctx.onPermissionRequest!(req, signal); + const response = await ctx.onPermissionRequest!(req, signal) const { apps, flags } = mergePermissionResponse( ctx.getAllowedApps(), ctx.getGrantFlags(), response, - ); + ) logger.debug( `[${serverName}] permission result: granted=${response.granted.length} denied=${response.denied.length}`, - ); - ctx.onAllowedAppsChanged?.(apps, flags); - return response; + ) + ctx.onAllowedAppsChanged?.(apps, flags) + return response } - : undefined; + : undefined const wrapTeachPermission = ctx.onTeachPermissionRequest ? async ( req: Parameters>[0], signal: AbortSignal, ): Promise => { - const response = await ctx.onTeachPermissionRequest!(req, signal); + const response = await ctx.onTeachPermissionRequest!(req, signal) logger.debug( `[${serverName}] teach permission result: granted=${response.granted.length} denied=${response.denied.length}`, - ); + ) // Teach doesn't request grant flags — preserve existing. const { apps } = mergePermissionResponse( ctx.getAllowedApps(), ctx.getGrantFlags(), response, - ); + ) ctx.onAllowedAppsChanged?.(apps, { ...DEFAULT_GRANT_FLAGS, ...ctx.getGrantFlags(), - }); - return response; + }) + return response } - : undefined; + : undefined return async (name, args) => { // ─── Async lock gate ───────────────────────────────────────────────── @@ -146,18 +146,18 @@ export function bindSessionContext( // cross-process locks (O_EXCL file) await the real primitive here // instead of pre-computing + feeding a fake sync result. if (ctx.checkCuLock) { - const lock = await ctx.checkCuLock(); + const lock = await ctx.checkCuLock() if (lock.holder !== undefined && !lock.isSelf) { const text = - ctx.formatLockHeldMessage?.(lock.holder) ?? DEFAULT_LOCK_HELD_MESSAGE; + ctx.formatLockHeldMessage?.(lock.holder) ?? DEFAULT_LOCK_HELD_MESSAGE return { - content: [{ type: "text", text }], + content: [{ type: 'text', text }], isError: true, - telemetry: { error_kind: "cu_lock_held" }, - }; + telemetry: { error_kind: 'cu_lock_held' }, + } } if (lock.holder === undefined && !defersLockAcquire(name)) { - await ctx.acquireCuLock?.(); + await ctx.acquireCuLock?.() // Re-check: the awaits above yield the microtask queue, so another // session's check+acquire can interleave with ours. Hosts where // acquire is a no-op when already held (Cowork's CuLockManager) give @@ -165,21 +165,21 @@ export function bindSessionContext( // proceeding. The CLI's O_EXCL file lock would surface this as a throw from // acquire instead; this re-check is a belt-and-suspenders for that // path too. - const recheck = await ctx.checkCuLock(); + const recheck = await ctx.checkCuLock() if (recheck.holder !== undefined && !recheck.isSelf) { const text = ctx.formatLockHeldMessage?.(recheck.holder) ?? - DEFAULT_LOCK_HELD_MESSAGE; + DEFAULT_LOCK_HELD_MESSAGE return { - content: [{ type: "text", text }], + content: [{ type: 'text', text }], isError: true, - telemetry: { error_kind: "cu_lock_held" }, - }; + telemetry: { error_kind: 'cu_lock_held' }, + } } // Fresh holder → any prior session's mouseButtonHeld is stale. // Mirrors what Gate-3 does on the acquire branch. After the // re-check so we only clear module state when we actually won. - resetMouseButtonHeld(); + resetMouseButtonHeld() } } @@ -189,12 +189,12 @@ export function bindSessionContext( // isEmpty → skip. const dimsFallback = lastScreenshot ? undefined - : ctx.getLastScreenshotDims?.(); + : ctx.getLastScreenshotDims?.() // Per-call AbortController for dialog dismissal. Aborted in `finally` — // if handleToolCall finishes (MCP timeout, throw) before the user // answers, the host's dialog handler sees the abort and tears down. - const dialogAbort = new AbortController(); + const dialogAbort = new AbortController() const overrides: ComputerUseOverrides = { allowedApps: [...ctx.getAllowedApps()], @@ -206,12 +206,12 @@ export function bindSessionContext( displayResolvedForApps: ctx.getDisplayResolvedForApps?.(), lastScreenshot: lastScreenshot ?? - (dimsFallback ? { ...dimsFallback, base64: "" } : undefined), + (dimsFallback ? { ...dimsFallback, base64: '' } : undefined), onPermissionRequest: wrapPermission - ? (req) => wrapPermission(req, dialogAbort.signal) + ? req => wrapPermission(req, dialogAbort.signal) : undefined, onTeachPermissionRequest: wrapTeachPermission - ? (req) => wrapTeachPermission(req, dialogAbort.signal) + ? req => wrapTeachPermission(req, dialogAbort.signal) : undefined, onAppsHidden: ctx.onAppsHidden, getClipboardStash: ctx.getClipboardStash, @@ -228,28 +228,28 @@ export function bindSessionContext( checkCuLock: undefined, acquireCuLock: undefined, isAborted: ctx.isAborted, - }; + } logger.debug( `[${serverName}] tool=${name} allowedApps=${overrides.allowedApps.length} coordMode=${coordinateMode}`, - ); + ) // ─── Dispatch ──────────────────────────────────────────────────────── try { - const result = await handleToolCall(adapter, name, args, overrides); + const result = await handleToolCall(adapter, name, args, overrides) if (result.screenshot) { - lastScreenshot = result.screenshot; - const { base64: _blob, ...dims } = result.screenshot; - logger.debug(`[${serverName}] screenshot dims: ${JSON.stringify(dims)}`); - ctx.onScreenshotCaptured?.(dims); + lastScreenshot = result.screenshot + const { base64: _blob, ...dims } = result.screenshot + logger.debug(`[${serverName}] screenshot dims: ${JSON.stringify(dims)}`) + ctx.onScreenshotCaptured?.(dims) } - return result; + return result } finally { - dialogAbort.abort(); + dialogAbort.abort() } - }; + } } export function createComputerUseMcpServer( @@ -257,35 +257,36 @@ export function createComputerUseMcpServer( coordinateMode: CoordinateMode, context?: ComputerUseSessionContext, ): Server { - const { serverName, logger } = adapter; + const { serverName, logger } = adapter const server = new Server( - { name: serverName, version: "0.1.3" }, + { name: serverName, version: '0.1.3' }, { capabilities: { tools: {}, logging: {} } }, - ); + ) const tools = buildComputerUseTools( adapter.executor.capabilities, coordinateMode, - ); + ) server.setRequestHandler(ListToolsRequestSchema, async () => adapter.isDisabled() ? { tools: [] } : { tools }, - ); + ) if (context) { - const dispatch = bindSessionContext(adapter, coordinateMode, context); + const dispatch = bindSessionContext(adapter, coordinateMode, context) server.setRequestHandler( CallToolRequestSchema, async (request): Promise => { - const { screenshot: _s, telemetry: _t, ...result } = await dispatch( - request.params.name, - request.params.arguments ?? {}, - ); - return result; + const { + screenshot: _s, + telemetry: _t, + ...result + } = await dispatch(request.params.name, request.params.arguments ?? {}) + return result }, - ); - return server; + ) + return server } // Legacy: no context → stub handler. Reached only if something calls the @@ -296,18 +297,18 @@ export function createComputerUseMcpServer( async (request): Promise => { logger.warn( `[${serverName}] tool call "${request.params.name}" reached the stub handler — no session context bound. Per-session state unavailable.`, - ); + ) return { content: [ { - type: "text", - text: "This computer-use server instance is not wired to a session. Per-session app permissions are not available on this code path.", + type: 'text', + text: 'This computer-use server instance is not wired to a session. Per-session app permissions are not available on this code path.', }, ], isError: true, - }; + } }, - ); + ) - return server; + return server } diff --git a/packages/@ant/computer-use-mcp/src/pixelCompare.ts b/packages/@ant/computer-use-mcp/src/pixelCompare.ts index 05153f602..b8f0efae3 100644 --- a/packages/@ant/computer-use-mcp/src/pixelCompare.ts +++ b/packages/@ant/computer-use-mcp/src/pixelCompare.ts @@ -19,28 +19,28 @@ * this package never imports it — the crop is a function parameter. */ -import type { ScreenshotResult } from "./executor.js"; -import type { Logger } from "./types.js"; +import type { ScreenshotResult } from './executor.js' +import type { Logger } from './types.js' /** Injected by the host. See `ComputerUseHostAdapter.cropRawPatch`. */ export type CropRawPatchFn = ( jpegBase64: string, rect: { x: number; y: number; width: number; height: number }, -) => Buffer | null; +) => Buffer | null /** 9×9 is empirically the sweet spot — large enough to catch a tooltip * appearing, small enough to not false-positive on surrounding animation. **/ -const DEFAULT_GRID_SIZE = 9; +const DEFAULT_GRID_SIZE = 9 export interface PixelCompareResult { /** true → click may proceed. false → patch changed, abort the click. */ - valid: boolean; + valid: boolean /** true → validation did not run (cold start, sub-gate off, or internal * error). The caller MUST treat this identically to `valid: true`. */ - skipped: boolean; + skipped: boolean /** Populated when valid === false. Returned to the model verbatim. */ - warning?: string; + warning?: string } /** @@ -57,22 +57,22 @@ function computeCropRect( yPercent: number, gridSize: number, ): { x: number; y: number; width: number; height: number } | null { - if (!imgW || !imgH) return null; + if (!imgW || !imgH) return null - const clampedX = Math.max(0, Math.min(100, xPercent)); - const clampedY = Math.max(0, Math.min(100, yPercent)); + const clampedX = Math.max(0, Math.min(100, xPercent)) + const clampedY = Math.max(0, Math.min(100, yPercent)) - const centerX = Math.round((clampedX / 100.0) * imgW); - const centerY = Math.round((clampedY / 100.0) * imgH); + const centerX = Math.round((clampedX / 100.0) * imgW) + const centerY = Math.round((clampedY / 100.0) * imgH) - const halfGrid = Math.floor(gridSize / 2); - const cropX = Math.max(0, centerX - halfGrid); - const cropY = Math.max(0, centerY - halfGrid); - const cropW = Math.min(gridSize, imgW - cropX); - const cropH = Math.min(gridSize, imgH - cropY); - if (cropW <= 0 || cropH <= 0) return null; + const halfGrid = Math.floor(gridSize / 2) + const cropX = Math.max(0, centerX - halfGrid) + const cropY = Math.max(0, centerY - halfGrid) + const cropW = Math.min(gridSize, imgW - cropX) + const cropH = Math.min(gridSize, imgH - cropY) + if (cropW <= 0 || cropH <= 0) return null - return { x: cropX, y: cropY, width: cropW, height: cropH }; + return { x: cropX, y: cropY, width: cropW, height: cropH } } /** @@ -98,17 +98,17 @@ export function comparePixelAtLocation( xPercent, yPercent, gridSize, - ); - if (!rect) return false; + ) + if (!rect) return false - const patch1 = crop(lastScreenshot.base64, rect); - const patch2 = crop(freshScreenshot.base64, rect); - if (!patch1 || !patch2) return false; + const patch1 = crop(lastScreenshot.base64, rect) + const patch2 = crop(freshScreenshot.base64, rect) + if (!patch1 || !patch2) return false // Direct buffer equality. Note: nativeImage.toBitmap() gives BGRA, sharp's // .raw() gave RGB. // Doesn't matter — we're comparing two same-format buffers for equality. - return patch1.equals(patch2); + return patch1.equals(patch2) } /** @@ -135,13 +135,13 @@ export async function validateClickTarget( gridSize: number = DEFAULT_GRID_SIZE, ): Promise { if (!lastScreenshot) { - return { valid: true, skipped: true }; + return { valid: true, skipped: true } } try { - const fresh = await takeFreshScreenshot(); + const fresh = await takeFreshScreenshot() if (!fresh) { - return { valid: true, skipped: true }; + return { valid: true, skipped: true } } const pixelsMatch = comparePixelAtLocation( @@ -151,21 +151,21 @@ export async function validateClickTarget( xPercent, yPercent, gridSize, - ); + ) if (pixelsMatch) { - return { valid: true, skipped: false }; + return { valid: true, skipped: false } } return { valid: false, skipped: false, warning: - "Screen content at the target location changed since the last screenshot. Take a new screenshot before clicking.", - }; + 'Screen content at the target location changed since the last screenshot. Take a new screenshot before clicking.', + } } catch (err) { // Skip validation on technical errors, execute action anyway. // Battle-tested: validation failure must never block the click. - logger.debug("[pixelCompare] validation error, skipping", err); - return { valid: true, skipped: true }; + logger.debug('[pixelCompare] validation error, skipping', err) + return { valid: true, skipped: true } } } diff --git a/packages/@ant/computer-use-mcp/src/sentinelApps.ts b/packages/@ant/computer-use-mcp/src/sentinelApps.ts index 0d26de600..206bdf4e5 100644 --- a/packages/@ant/computer-use-mcp/src/sentinelApps.ts +++ b/packages/@ant/computer-use-mcp/src/sentinelApps.ts @@ -11,33 +11,33 @@ /** These apps can execute arbitrary shell commands. */ const SHELL_ACCESS_BUNDLE_IDS = new Set([ - "com.apple.Terminal", - "com.googlecode.iterm2", - "com.microsoft.VSCode", - "dev.warp.Warp-Stable", - "com.github.wez.wezterm", - "io.alacritty", - "net.kovidgoyal.kitty", - "com.jetbrains.intellij", - "com.jetbrains.pycharm", -]); + 'com.apple.Terminal', + 'com.googlecode.iterm2', + 'com.microsoft.VSCode', + 'dev.warp.Warp-Stable', + 'com.github.wez.wezterm', + 'io.alacritty', + 'net.kovidgoyal.kitty', + 'com.jetbrains.intellij', + 'com.jetbrains.pycharm', +]) /** Finder in the allowlist ≈ browse + open-any-file. */ -const FILESYSTEM_ACCESS_BUNDLE_IDS = new Set(["com.apple.finder"]); +const FILESYSTEM_ACCESS_BUNDLE_IDS = new Set(['com.apple.finder']) -const SYSTEM_SETTINGS_BUNDLE_IDS = new Set(["com.apple.systempreferences"]); +const SYSTEM_SETTINGS_BUNDLE_IDS = new Set(['com.apple.systempreferences']) export const SENTINEL_BUNDLE_IDS: ReadonlySet = new Set([ ...SHELL_ACCESS_BUNDLE_IDS, ...FILESYSTEM_ACCESS_BUNDLE_IDS, ...SYSTEM_SETTINGS_BUNDLE_IDS, -]); +]) -export type SentinelCategory = "shell" | "filesystem" | "system_settings"; +export type SentinelCategory = 'shell' | 'filesystem' | 'system_settings' export function getSentinelCategory(bundleId: string): SentinelCategory | null { - if (SHELL_ACCESS_BUNDLE_IDS.has(bundleId)) return "shell"; - if (FILESYSTEM_ACCESS_BUNDLE_IDS.has(bundleId)) return "filesystem"; - if (SYSTEM_SETTINGS_BUNDLE_IDS.has(bundleId)) return "system_settings"; - return null; + if (SHELL_ACCESS_BUNDLE_IDS.has(bundleId)) return 'shell' + if (FILESYSTEM_ACCESS_BUNDLE_IDS.has(bundleId)) return 'filesystem' + if (SYSTEM_SETTINGS_BUNDLE_IDS.has(bundleId)) return 'system_settings' + return null } diff --git a/packages/@ant/computer-use-mcp/src/toolCalls.ts b/packages/@ant/computer-use-mcp/src/toolCalls.ts index 415ee6ecc..e42130d40 100644 --- a/packages/@ant/computer-use-mcp/src/toolCalls.ts +++ b/packages/@ant/computer-use-mcp/src/toolCalls.ts @@ -34,8 +34,8 @@ * c. Pixel-validation staleness check (sub-gated). */ -import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js"; -import { randomUUID } from "node:crypto"; +import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js' +import { randomUUID } from 'node:crypto' /** Detect actual image MIME type from base64 data by decoding the magic bytes. */ function detectMimeFromBase64(b64: string): string { @@ -44,27 +44,39 @@ function detectMimeFromBase64(b64: string): string { // JPEG: FF D8 FF // RIFF+WEBP: "RIFF" at 0..3 + "WEBP" at 8..11 // GIF: "GIF" at 0..2 - const raw = Buffer.from(b64.slice(0, 16), "base64"); - if (raw[0] === 0x89 && raw[1] === 0x50 && raw[2] === 0x4e && raw[3] === 0x47) return "image/png"; - if (raw[0] === 0xff && raw[1] === 0xd8 && raw[2] === 0xff) return "image/jpeg"; + const raw = Buffer.from(b64.slice(0, 16), 'base64') + if (raw[0] === 0x89 && raw[1] === 0x50 && raw[2] === 0x4e && raw[3] === 0x47) + return 'image/png' + if (raw[0] === 0xff && raw[1] === 0xd8 && raw[2] === 0xff) return 'image/jpeg' if ( - raw[0] === 0x52 && raw[1] === 0x49 && raw[2] === 0x46 && raw[3] === 0x46 && // RIFF - raw[8] === 0x57 && raw[9] === 0x45 && raw[10] === 0x42 && raw[11] === 0x50 // WEBP - ) return "image/webp"; - if (raw[0] === 0x47 && raw[1] === 0x49 && raw[2] === 0x46) return "image/gif"; - return "image/png"; + raw[0] === 0x52 && + raw[1] === 0x49 && + raw[2] === 0x46 && + raw[3] === 0x46 && // RIFF + raw[8] === 0x57 && + raw[9] === 0x45 && + raw[10] === 0x42 && + raw[11] === 0x50 // WEBP + ) + return 'image/webp' + if (raw[0] === 0x47 && raw[1] === 0x49 && raw[2] === 0x46) return 'image/gif' + return 'image/png' } -import { getDefaultTierForApp, getDeniedCategoryForApp, isPolicyDenied } from "./deniedApps.js"; +import { + getDefaultTierForApp, + getDeniedCategoryForApp, + isPolicyDenied, +} from './deniedApps.js' import type { ComputerExecutor, DisplayGeometry, InstalledApp, ScreenshotResult, -} from "./executor.js"; -import { isSystemKeyCombo } from "./keyBlocklist.js"; -import { validateClickTarget } from "./pixelCompare.js"; -import { SENTINEL_BUNDLE_IDS } from "./sentinelApps.js"; +} from './executor.js' +import { isSystemKeyCombo } from './keyBlocklist.js' +import { validateClickTarget } from './pixelCompare.js' +import { SENTINEL_BUNDLE_IDS } from './sentinelApps.js' import type { AppGrant, ComputerUseHostAdapter, @@ -78,37 +90,37 @@ import type { Logger, ResolvedAppRequest, TeachStepRequest, -} from "./types.js"; +} from './types.js' /** * Finder is never hidden by the hide loop (hiding Finder kills the Desktop), * so it's always a valid frontmost. */ -const FINDER_BUNDLE_ID = "com.apple.finder"; +const FINDER_BUNDLE_ID = 'com.apple.finder' /** * Categorical error classes for the cu_tool_call telemetry event. Never * free text — error messages may contain file paths / app content (PII). */ export type CuErrorKind = - | "allowlist_empty" - | "tcc_not_granted" - | "cu_lock_held" - | "teach_mode_conflict" - | "teach_mode_not_active" - | "executor_threw" - | "capture_failed" - | "app_denied" // no longer emitted (tiered model replaced hard-deny); kept for schema compat - | "bad_args" // malformed tool args (type/shape/range/unknown value) - | "app_not_granted" // target app not in session allowlist (distinct from allowlist_empty) - | "tier_insufficient" // app in allowlist but at a tier too low for the action - | "feature_unavailable" // tool callable but session not wired for it - | "state_conflict" // wrong state for action (call sequence, mouse already held) - | "grant_flag_required" // action needs a grant flag (systemKeyCombos, clipboard*) from request_access - | "display_error" // display enumeration failed (platform) - | "launch_failed" // failed to launch an external process (e.g. terminal) - | "element_not_found" // UI element not found (e.g. window, automation element) - | "other"; + | 'allowlist_empty' + | 'tcc_not_granted' + | 'cu_lock_held' + | 'teach_mode_conflict' + | 'teach_mode_not_active' + | 'executor_threw' + | 'capture_failed' + | 'app_denied' // no longer emitted (tiered model replaced hard-deny); kept for schema compat + | 'bad_args' // malformed tool args (type/shape/range/unknown value) + | 'app_not_granted' // target app not in session allowlist (distinct from allowlist_empty) + | 'tier_insufficient' // app in allowlist but at a tier too low for the action + | 'feature_unavailable' // tool callable but session not wired for it + | 'state_conflict' // wrong state for action (call sequence, mouse already held) + | 'grant_flag_required' // action needs a grant flag (systemKeyCombos, clipboard*) from request_access + | 'display_error' // display enumeration failed (platform) + | 'launch_failed' // failed to launch an external process (e.g. terminal) + | 'element_not_found' // UI element not found (e.g. window, automation element) + | 'other' /** * Telemetry payload piggybacked on the result — populated by handlers, @@ -118,15 +130,15 @@ export type CuErrorKind = export interface CuCallTelemetry { /** request_access / request_teach_access: apps NEWLY granted in THIS call * (does NOT include idempotent re-grants of already-allowed apps). */ - granted_count?: number; + granted_count?: number /** request_access / request_teach_access: apps denied in THIS call */ - denied_count?: number; + denied_count?: number /** request_access / request_teach_access: apps safety-denied (browser) this call */ - denied_browser_count?: number; + denied_browser_count?: number /** request_access / request_teach_access: apps safety-denied (terminal) this call */ - denied_terminal_count?: number; + denied_terminal_count?: number /** Categorical error class (only set when isError) */ - error_kind?: CuErrorKind; + error_kind?: CuErrorKind } /** @@ -136,10 +148,10 @@ export interface CuCallTelemetry { * field — the host wrapper strips it before returning to the SDK. */ export type CuCallToolResult = CallToolResult & { - screenshot?: ScreenshotResult; + screenshot?: ScreenshotResult /** Piggybacked telemetry — stripped by the host wrapper before SDK return. */ - telemetry?: CuCallTelemetry; -}; + telemetry?: CuCallTelemetry +} // --------------------------------------------------------------------------- // Small result helpers (mirror of chrome-mcp's inline `{content, isError}`) @@ -147,21 +159,21 @@ export type CuCallToolResult = CallToolResult & { function errorResult(text: string, errorKind?: CuErrorKind): CuCallToolResult { return { - content: [{ type: "text", text }], + content: [{ type: 'text', text }], isError: true, telemetry: errorKind ? { error_kind: errorKind } : undefined, - }; + } } function okText(text: string): CuCallToolResult { - return { content: [{ type: "text", text }] }; + return { content: [{ type: 'text', text }] } } function okJson(obj: unknown, telemetry?: CuCallTelemetry): CuCallToolResult { return { - content: [{ type: "text", text: JSON.stringify(obj) }], + content: [{ type: 'text', text: JSON.stringify(obj) }], telemetry, - }; + } } // --------------------------------------------------------------------------- @@ -169,32 +181,32 @@ function okJson(obj: unknown, telemetry?: CuCallTelemetry): CuCallToolResult { // --------------------------------------------------------------------------- function asRecord(args: unknown): Record { - if (typeof args === "object" && args !== null) { - return args as Record; + if (typeof args === 'object' && args !== null) { + return args as Record } - return {}; + return {} } function requireNumber( args: Record, key: string, ): number | Error { - const v = args[key]; - if (typeof v !== "number" || !Number.isFinite(v)) { - return new Error(`"${key}" must be a finite number.`); + const v = args[key] + if (typeof v !== 'number' || !Number.isFinite(v)) { + return new Error(`"${key}" must be a finite number.`) } - return v; + return v } function requireString( args: Record, key: string, ): string | Error { - const v = args[key]; - if (typeof v !== "string") { - return new Error(`"${key}" must be a string.`); + const v = args[key] + if (typeof v !== 'string') { + return new Error(`"${key}" must be a string.`) } - return v; + return v } /** @@ -203,20 +215,20 @@ function requireString( */ function extractCoordinate( args: Record, - paramName: string = "coordinate", + paramName: string = 'coordinate', ): [number, number] | Error { - const coord = args[paramName]; + const coord = args[paramName] if (coord === undefined) { - return new Error(`${paramName} is required`); + return new Error(`${paramName} is required`) } if (!Array.isArray(coord) || coord.length !== 2) { - return new Error(`${paramName} must be an array of length 2`); + return new Error(`${paramName} must be an array of length 2`) } - const [x, y] = coord; - if (typeof x !== "number" || typeof y !== "number" || x < 0 || y < 0) { - return new Error(`${paramName} must be a tuple of non-negative numbers`); + const [x, y] = coord + if (typeof x !== 'number' || typeof y !== 'number' || x < 0 || y < 0) { + return new Error(`${paramName} must be a tuple of non-negative numbers`) } - return [x, y]; + return [x, y] } // --------------------------------------------------------------------------- @@ -245,12 +257,12 @@ function scaleCoord( lastScreenshot: ScreenshotResult | undefined, logger: Logger, ): { x: number; y: number } { - if (mode === "normalized_0_100") { + if (mode === 'normalized_0_100') { // Origin offset targets the selected display in virtual-screen space. return { x: Math.round((rawX / 100) * display.width) + display.originX, y: Math.round((rawY / 100) * display.height) + display.originY, - }; + } } // mode === "pixels": model sent image-space pixel coords. @@ -268,19 +280,19 @@ function scaleCoord( Math.round( rawY * (lastScreenshot.displayHeight / lastScreenshot.height), ) + lastScreenshot.originY, - }; + } } // Cold start: model sent pixel coords without having taken a screenshot. // Degenerate — fall back to the old /sf behavior and warn. logger.warn( - "[computer-use] pixels-mode coordinate received with no prior screenshot; " + - "falling back to /scaleFactor. Click may be off if downsample is active.", - ); + '[computer-use] pixels-mode coordinate received with no prior screenshot; ' + + 'falling back to /scaleFactor. Click may be off if downsample is active.', + ) return { x: Math.round(rawX / display.scaleFactor) + display.originX, y: Math.round(rawY / display.scaleFactor) + display.originY, - }; + } } /** @@ -301,21 +313,21 @@ function coordToPercentageForPixelCompare( mode: CoordinateMode, lastScreenshot: ScreenshotResult | undefined, ): { xPct: number; yPct: number } { - if (mode === "normalized_0_100") { + if (mode === 'normalized_0_100') { // Unchanged — already a percentage. - return { xPct: rawX, yPct: rawY }; + return { xPct: rawX, yPct: rawY } } // mode === "pixels" if (!lastScreenshot) { // validateClickTarget at pixelCompare.ts:141-143 already skips when // lastScreenshot is undefined, so this return value never reaches a crop. - return { xPct: 0, yPct: 0 }; + return { xPct: 0, yPct: 0 } } return { xPct: (rawX / lastScreenshot.width) * 100, yPct: (rawY / lastScreenshot.height) * 100, - }; + } } // --------------------------------------------------------------------------- @@ -339,28 +351,28 @@ function coordToPercentageForPixelCompare( * click-tier apps; `scroll` is the tier-"click" way to scroll. * - `"keyboard"` — type, key, hold_key. Requires tier `"full"`. */ -type CuActionKind = "mouse_position" | "mouse" | "mouse_full" | "keyboard"; +type CuActionKind = 'mouse_position' | 'mouse' | 'mouse_full' | 'keyboard' function tierSatisfies( grantTier: CuAppPermTier | undefined, actionKind: CuActionKind, ): boolean { - const tier = grantTier ?? "full"; - if (actionKind === "mouse_position") return true; - if (actionKind === "keyboard" || actionKind === "mouse_full") { - return tier === "full"; + const tier = grantTier ?? 'full' + if (actionKind === 'mouse_position') return true + if (actionKind === 'keyboard' || actionKind === 'mouse_full') { + return tier === 'full' } // mouse - return tier === "click" || tier === "full"; + return tier === 'click' || tier === 'full' } // Appended to every tier_insufficient error. The model may try to route // around the gate (osascript, System Events, cliclick via Bash) — this // closes that door explicitly. Leading space so it concatenates cleanly. const TIER_ANTI_SUBVERSION = - " Do not attempt to work around this restriction — never use AppleScript, " + - "System Events, shell commands, or any other method to send clicks or " + - "keystrokes to this app."; + ' Do not attempt to work around this restriction — never use AppleScript, ' + + 'System Events, shell commands, or any other method to send clicks or ' + + 'keystrokes to this app.' // --------------------------------------------------------------------------- // Clipboard guard — stash+clear while a click-tier app is frontmost @@ -391,29 +403,29 @@ async function syncClipboardStash( overrides: ComputerUseOverrides, frontmostIsClickTier: boolean, ): Promise { - const current = overrides.getClipboardStash?.(); + const current = overrides.getClipboardStash?.() if (!frontmostIsClickTier) { // Restore + clear. Idempotent — if nothing is stashed, no-op. - if (current === undefined) return; + if (current === undefined) return try { - await adapter.executor.writeClipboard(current); + await adapter.executor.writeClipboard(current) // Clear only after a successful write — a transient pasteboard // failure must not irrecoverably drop the stash. - overrides.onClipboardStashChanged?.(undefined); + overrides.onClipboardStashChanged?.(undefined) } catch { // Best effort — stash held, next non-click action retries. } - return; + return } // Stash the user's clipboard on FIRST entry to click-tier only. if (current === undefined) { try { - const read = await adapter.executor.readClipboard(); - overrides.onClipboardStashChanged?.(read); + const read = await adapter.executor.readClipboard() + overrides.onClipboardStashChanged?.(read) } catch { // readClipboard failed — use empty sentinel so we don't retry the stash // on the next action; restore becomes a harmless writeClipboard(""). - overrides.onClipboardStashChanged?.(""); + overrides.onClipboardStashChanged?.('') } } // Re-clear on EVERY click-tier action, not just the first. Defeats the @@ -422,7 +434,7 @@ async function syncClipboardStash( // Paste button — the next action's clear clobbers the agent's write // before the click lands. try { - await adapter.executor.writeClipboard(""); + await adapter.executor.writeClipboard('') } catch { // Transient pasteboard failure. The tier-"click" right-click/modifier // block still holds; this is a net, not a promise. @@ -444,13 +456,13 @@ async function runInputActionGates( // ALL grant tiers stay visible — visibility is the baseline (tier "read"). if (subGates.hideBeforeAction) { const hidden = await adapter.executor.prepareForAction( - overrides.allowedApps.map((a) => a.bundleId), + overrides.allowedApps.map(a => a.bundleId), overrides.selectedDisplayId, - ); + ) // Empty-check so we don't spam the callback on every action when nothing // was hidden (the common case after the first action of a turn). if (hidden.length > 0) { - overrides.onAppsHidden?.(hidden); + overrides.onAppsHidden?.(hidden) } } @@ -460,21 +472,21 @@ async function runInputActionGates( // when the platform's screenshotFiltering is 'none' (no per-app filtering, // meaning no hide/defocus, meaning frontmost is meaningless). if (adapter.executor.capabilities.screenshotFiltering === 'none') { - return null; // pass — non-macOS platform, frontmost irrelevant + return null // pass — non-macOS platform, frontmost irrelevant } // Frontmost gate. Check FRESH on every call. - const frontmost = await adapter.executor.getFrontmostApp(); + const frontmost = await adapter.executor.getFrontmostApp() const tierByBundleId = new Map( - overrides.allowedApps.map((a) => [a.bundleId, a.tier] as const), - ); + overrides.allowedApps.map(a => [a.bundleId, a.tier] as const), + ) // After handleToolCall's tier backfill, every grant has a concrete tier — // .get() returning undefined means the app is not in the allowlist at all. const frontmostTier = frontmost ? tierByBundleId.get(frontmost.bundleId) - : undefined; + : undefined // Clipboard guard. Per-action, not per-tool-call — runs for every sub-action // inside computer_batch and teach_step/teach_batch, so clicking into a @@ -483,52 +495,53 @@ async function runInputActionGates( // list_granted_applications), `wait`, and the teach_step blocking-dialog // phase don't trigger a sync — only input actions do. if (subGates.clipboardGuard) { - await syncClipboardStash(adapter, overrides, frontmostTier === "click"); + await syncClipboardStash(adapter, overrides, frontmostTier === 'click') } if (!frontmost) { // No frontmost app (rare — login window?). Let it through; the click // will land somewhere and PixelCompare catches staleness. - return null; + return null } - const { hostBundleId } = adapter.executor.capabilities; + const { hostBundleId } = adapter.executor.capabilities if (frontmostTier !== undefined) { - if (tierSatisfies(frontmostTier, actionKind)) return null; + if (tierSatisfies(frontmostTier, actionKind)) return null // In the allowlist but tier doesn't cover this action. Tailor the // guidance to the actual tier — at "read", suggesting left_click or Bash // is wrong (nothing is allowed; use Chrome MCP). At "click", the // mouse_full/keyboard-specific messages apply. - if (frontmostTier === "read") { + if (frontmostTier === 'read') { // tier "read" is not category-unique (browser AND trading map to it) — // re-look-up so the CiC hint only shows for actual browsers. const isBrowser = getDeniedCategoryForApp(frontmost.bundleId, frontmost.displayName) === - "browser"; + 'browser' return errorResult( `"${frontmost.displayName}" is granted at tier "read" — ` + `visible in screenshots only, no clicks or typing.` + (isBrowser - ? " Use the Claude-in-Chrome MCP for browser interaction (tools " + - "named `mcp__Claude_in_Chrome__*`; load via ToolSearch if " + - "deferred)." - : " No interaction is permitted; ask the user to take any " + - "actions in this app themselves.") + + ? ' Use the Claude-in-Chrome MCP for browser interaction (tools ' + + 'named `mcp__Claude_in_Chrome__*`; load via ToolSearch if ' + + 'deferred).' + : ' No interaction is permitted; ask the user to take any ' + + 'actions in this app themselves.') + TIER_ANTI_SUBVERSION, - "tier_insufficient", - ); + 'tier_insufficient', + ) } // frontmostTier === "click" (tier === "full" would have passed tierSatisfies) - if (actionKind === "keyboard") { + if (actionKind === 'keyboard') { return errorResult( `"${frontmost.displayName}" is granted at tier "click" — ` + `typing, key presses, and paste require tier "full". The keys ` + `would go to this app's text fields or integrated terminal. To ` + `type into a different app, click it first to bring it forward. ` + - `For shell commands, use the Bash tool.` + TIER_ANTI_SUBVERSION, - "tier_insufficient", - ); + `For shell commands, use the Bash tool.` + + TIER_ANTI_SUBVERSION, + 'tier_insufficient', + ) } // actionKind === "mouse_full" ("mouse" and "mouse_position" pass at "click") return errorResult( @@ -536,26 +549,27 @@ async function runInputActionGates( `right-click, middle-click, and clicks with modifier keys require ` + `tier "full". Right-click opens a context menu with Paste/Cut, and ` + `modifier chords fire as keystrokes before the click. Plain ` + - `left_click is allowed here.` + TIER_ANTI_SUBVERSION, - "tier_insufficient", - ); + `left_click is allowed here.` + + TIER_ANTI_SUBVERSION, + 'tier_insufficient', + ) } // Finder is never-hide, always allowed. - if (frontmost.bundleId === FINDER_BUNDLE_ID) return null; + if (frontmost.bundleId === FINDER_BUNDLE_ID) return null if (frontmost.bundleId === hostBundleId) { - if (actionKind !== "keyboard") { + if (actionKind !== 'keyboard') { // mouse and mouse_full are both click events — click-through works. // We're click-through (executor's withClickThrough). Pass. - return null; + return null } // Keyboard safety net — defocus (prepareForAction step B) should have // moved us off. If we're still here, typing would go to our chat box. return errorResult( "Claude's own window still has keyboard focus. This should not happen " + - "after the pre-action defocus. Click on the target application first.", - "state_conflict", - ); + 'after the pre-action defocus. Click on the target application first.', + 'state_conflict', + ) } // Non-allowlisted, non-us, non-Finder. RARE after the hide loop — means @@ -564,8 +578,8 @@ async function runInputActionGates( `"${frontmost.displayName}" is not in the allowed applications and is ` + `currently in front. Take a new screenshot — it may have appeared ` + `since your last one.`, - "app_not_granted", - ); + 'app_not_granted', + ) } /** @@ -594,19 +608,19 @@ async function runHitTestGate( // SendMessage with window-relative coordinates. Hit-test against the // real screen is meaningless. if (adapter.executor.capabilities.screenshotFiltering === 'none') { - return null; + return null } - const target = await adapter.executor.appUnderPoint(x, y); - if (!target) return null; // desktop / nothing under point / platform no-op + const target = await adapter.executor.appUnderPoint(x, y) + if (!target) return null // desktop / nothing under point / platform no-op // Finder (desktop, file dialogs) is always clickable — same exemption as // runInputActionGates. Our own overlay is filtered by Swift (pid != self). - if (target.bundleId === FINDER_BUNDLE_ID) return null; + if (target.bundleId === FINDER_BUNDLE_ID) return null const tierByBundleId = new Map( - overrides.allowedApps.map((a) => [a.bundleId, a.tier] as const), - ); + overrides.allowedApps.map(a => [a.bundleId, a.tier] as const), + ) if (!tierByBundleId.has(target.bundleId)) { // Not in the allowlist at all. The frontmost check would catch this if @@ -617,47 +631,48 @@ async function runHitTestGate( `Click at these coordinates would land on "${target.displayName}", ` + `which is not in the allowed applications. Take a fresh screenshot ` + `to see the current window layout.`, - "app_not_granted", - ); + 'app_not_granted', + ) } - const targetTier = tierByBundleId.get(target.bundleId); + const targetTier = tierByBundleId.get(target.bundleId) // Frontmost-based sync (runInputActionGates) misses the case where // the click lands on a NON-FRONTMOST click-tier window. Re-sync by // the hit-test target's tier — if target is click-tier, stash+clear // before the click lands, regardless of what's frontmost. - if (subGates.clipboardGuard && targetTier === "click") { - await syncClipboardStash(adapter, overrides, true); + if (subGates.clipboardGuard && targetTier === 'click') { + await syncClipboardStash(adapter, overrides, true) } - if (tierSatisfies(targetTier, actionKind)) return null; + if (tierSatisfies(targetTier, actionKind)) return null // Target is in the allowlist but tier doesn't cover this action. // runHitTestGate is only called with mouse/mouse_full (keyboard routes to // frontmost, not window-under-cursor). The branch above catches // mouse_full ∧ click; the only remaining fall-through is tier "read". - if (actionKind === "mouse_full" && targetTier === "click") { + if (actionKind === 'mouse_full' && targetTier === 'click') { return errorResult( `Click at these coordinates would land on "${target.displayName}", ` + `which is granted at tier "click" — right-click, middle-click, and ` + `clicks with modifier keys require tier "full" (they can Paste via ` + `the context menu or fire modifier-chord keystrokes). Plain ` + - `left_click is allowed here.` + TIER_ANTI_SUBVERSION, - "tier_insufficient", - ); + `left_click is allowed here.` + + TIER_ANTI_SUBVERSION, + 'tier_insufficient', + ) } const isBrowser = - getDeniedCategoryForApp(target.bundleId, target.displayName) === "browser"; + getDeniedCategoryForApp(target.bundleId, target.displayName) === 'browser' return errorResult( `Click at these coordinates would land on "${target.displayName}", ` + `which is granted at tier "read" (screenshots only, no interaction). ` + (isBrowser - ? "Use the Claude-in-Chrome MCP for browser interaction." - : "Ask the user to take any actions in this app themselves.") + + ? 'Use the Claude-in-Chrome MCP for browser interaction.' + : 'Ask the user to take any actions in this app themselves.') + TIER_ANTI_SUBVERSION, - "tier_insufficient", - ); + 'tier_insufficient', + ) } // --------------------------------------------------------------------------- @@ -668,35 +683,35 @@ async function runHitTestGate( * §6 item 9 — screenshot retry on implausibly-small buffer. Battle-tested * threshold (1024 bytes). We retry exactly once. */ -const MIN_SCREENSHOT_BYTES = 1024; +const MIN_SCREENSHOT_BYTES = 1024 function decodedByteLength(base64: string): number { // 3 bytes per 4 chars, minus padding. Good enough for a threshold check. - const padding = base64.endsWith("==") ? 2 : base64.endsWith("=") ? 1 : 0; - return Math.floor((base64.length * 3) / 4) - padding; + const padding = base64.endsWith('==') ? 2 : base64.endsWith('=') ? 1 : 0 + return Math.floor((base64.length * 3) / 4) - padding } async function takeScreenshotWithRetry( executor: ComputerExecutor, allowedBundleIds: string[], - logger: ComputerUseHostAdapter["logger"], + logger: ComputerUseHostAdapter['logger'], displayId?: number, ): Promise { - let shot = await executor.screenshot({ allowedBundleIds, displayId }); + let shot = await executor.screenshot({ allowedBundleIds, displayId }) if (decodedByteLength(shot.base64) < MIN_SCREENSHOT_BYTES) { logger.warn( `[computer-use] screenshot implausibly small (${decodedByteLength(shot.base64)} bytes decoded), retrying once`, - ); - shot = await executor.screenshot({ allowedBundleIds, displayId }); + ) + shot = await executor.screenshot({ allowedBundleIds, displayId }) } - return shot; + return shot } // --------------------------------------------------------------------------- // Grapheme iteration — §6 item 7, ported from the Vercept acquisition // --------------------------------------------------------------------------- -const INTER_GRAPHEME_SLEEP_MS = 8; // §6 item 4 — 125 Hz USB polling +const INTER_GRAPHEME_SLEEP_MS = 8 // §6 item 4 — 125 Hz USB polling function segmentGraphemes(text: string): string[] { try { @@ -706,23 +721,23 @@ function segmentGraphemes(text: string): string[] { Intl as typeof Intl & { Segmenter?: new ( locale?: string, - options?: { granularity: "grapheme" | "word" | "sentence" }, - ) => { segment: (s: string) => Iterable<{ segment: string }> }; + options?: { granularity: 'grapheme' | 'word' | 'sentence' }, + ) => { segment: (s: string) => Iterable<{ segment: string }> } } - ).Segmenter; - if (typeof Segmenter === "function") { - const seg = new Segmenter(undefined, { granularity: "grapheme" }); - return Array.from(seg.segment(text), (s) => s.segment); + ).Segmenter + if (typeof Segmenter === 'function') { + const seg = new Segmenter(undefined, { granularity: 'grapheme' }) + return Array.from(seg.segment(text), s => s.segment) } } catch { // fall through } // Code-point iteration. Keeps surrogate pairs together but splits ZWJ. - return Array.from(text); + return Array.from(text) } function sleep(ms: number): Promise { - return new Promise((r) => setTimeout(r, ms)); + return new Promise(r => setTimeout(r, ms)) } /** @@ -731,9 +746,9 @@ function sleep(ms: number): Promise { */ function parseKeyChord(text: string): string[] { return text - .split("+") - .map((s) => s.trim()) - .filter(Boolean); + .split('+') + .map(s => s.trim()) + .filter(Boolean) } // --------------------------------------------------------------------------- @@ -750,18 +765,18 @@ function parseKeyChord(text: string): string[] { * tool calls, but the lock enforces at-most-one-session-uses-CU so they * can't. The per-turn reset is the correctness boundary. */ -let mouseButtonHeld = false; +let mouseButtonHeld = false /** Whether mouse_move occurred between left_mouse_down and left_mouse_up. * When false at mouseUp, the decomposed sequence is a click-release (not a * drop) — hit-test at "mouse", not "mouse_full". */ -let mouseMoved = false; +let mouseMoved = false /** Clears the cross-call drag flags. Called from Gate-3 on lock-acquire and * from `bindSessionContext` in mcpServer.ts — a fresh lock holder must not * inherit a prior session's mid-drag state. */ export function resetMouseButtonHeld(): void { - mouseButtonHeld = false; - mouseMoved = false; + mouseButtonHeld = false + mouseMoved = false } /** If a left_mouse_down set the OS button without a matching left_mouse_up @@ -770,10 +785,10 @@ export function resetMouseButtonHeld(): void { async function releaseHeldMouse( adapter: ComputerUseHostAdapter, ): Promise { - if (!mouseButtonHeld) return; - await adapter.executor.mouseUp(); - mouseButtonHeld = false; - mouseMoved = false; + if (!mouseButtonHeld) return + await adapter.executor.mouseUp() + mouseButtonHeld = false + mouseMoved = false } /** @@ -791,9 +806,8 @@ async function releaseHeldMouse( */ export function defersLockAcquire(toolName: string): boolean { return ( - toolName === "request_access" || - toolName === "list_granted_applications" - ); + toolName === 'request_access' || toolName === 'list_granted_applications' + ) } // --------------------------------------------------------------------------- @@ -802,10 +816,10 @@ export function defersLockAcquire(toolName: string): boolean { /** Reverse-DNS-ish: contains at least one dot, no spaces, no slashes. Lets * raw bundle IDs pass through resolution. */ -const REVERSE_DNS_RE = /^[A-Za-z0-9][\w.-]*\.[A-Za-z0-9][\w.-]*$/; +const REVERSE_DNS_RE = /^[A-Za-z0-9][\w.-]*\.[A-Za-z0-9][\w.-]*$/ function looksLikeBundleId(s: string): boolean { - return REVERSE_DNS_RE.test(s) && !s.includes(" "); + return REVERSE_DNS_RE.test(s) && !s.includes(' ') } function resolveRequestedApps( @@ -813,42 +827,45 @@ function resolveRequestedApps( installed: InstalledApp[], alreadyGrantedBundleIds: ReadonlySet, ): ResolvedAppRequest[] { - const byLowerDisplayName = new Map(); - const byBundleId = new Map(); + const byLowerDisplayName = new Map() + const byBundleId = new Map() for (const app of installed) { - byBundleId.set(app.bundleId, app); + byBundleId.set(app.bundleId, app) // Last write wins on collisions. Ambiguous-name handling (multiple // candidates in the dialog) is plan-documented but deferred — the // InstalledApps enumerator dedupes by bundle ID, so true display-name // collisions are rare. TODO(chicago, post-P1): surface all candidates. - byLowerDisplayName.set(app.displayName.toLowerCase(), app); + byLowerDisplayName.set(app.displayName.toLowerCase(), app) } return requestedNames.map((requested): ResolvedAppRequest => { - let resolved: InstalledApp | undefined; + let resolved: InstalledApp | undefined if (looksLikeBundleId(requested)) { - resolved = byBundleId.get(requested); + resolved = byBundleId.get(requested) } if (!resolved) { - resolved = byLowerDisplayName.get(requested.toLowerCase()); + resolved = byLowerDisplayName.get(requested.toLowerCase()) } // Windows fuzzy matching: strip .exe suffix, try substring match if (!resolved) { - const clean = requested.toLowerCase().replace(/\.exe$/, '').trim(); + const clean = requested + .toLowerCase() + .replace(/\.exe$/, '') + .trim() // Try: "chrome" matches "Google Chrome", "notepad" matches "Notepad" for (const [name, app] of byLowerDisplayName) { if (name.includes(clean) || clean.includes(name)) { - resolved = app; - break; + resolved = app + break } } } - const bundleId = resolved?.bundleId; + const bundleId = resolved?.bundleId // When unresolved AND the requested string looks like a bundle ID, use it // directly for tier lookup (e.g. "company.thebrowser.Browser" with Arc not // installed — the reverse-DNS string won't match any display-name substring). const bundleIdCandidate = - bundleId ?? (looksLikeBundleId(requested) ? requested : undefined); + bundleId ?? (looksLikeBundleId(requested) ? requested : undefined) return { requestedName: requested, resolved, @@ -858,8 +875,8 @@ function resolveRequestedApps( bundleIdCandidate, resolved?.displayName ?? requested, ), - }; - }); + } + }) } // --------------------------------------------------------------------------- @@ -874,9 +891,9 @@ async function handleRequestAccess( ): Promise { if (!overrides.onPermissionRequest) { return errorResult( - "This session was not wired with a permission handler. Computer control is not available here.", - "feature_unavailable", - ); + 'This session was not wired with a permission handler. Computer control is not available here.', + 'feature_unavailable', + ) } // Teach mode hides the main window; permission dialogs render in that @@ -885,13 +902,13 @@ async function handleRequestAccess( // mode, request access, then re-enter. if (overrides.getTeachModeActive?.()) { return errorResult( - "Cannot request additional permissions during teach mode — the permission dialog would be hidden. End teach mode (finish the tour or let the turn complete), then call request_access, then start a new tour.", - "teach_mode_conflict", - ); + 'Cannot request additional permissions during teach mode — the permission dialog would be hidden. End teach mode (finish the tour or let the turn complete), then call request_access, then start a new tour.', + 'teach_mode_conflict', + ) } - const reason = requireString(args, "reason"); - if (reason instanceof Error) return errorResult(reason.message, "bad_args"); + const reason = requireString(args, 'reason') + if (reason instanceof Error) return errorResult(reason.message, 'bad_args') // TCC-ungranted branch. The renderer shows a toggle panel INSTEAD OF the // app list when `tccState` is present on the request, so we skip app @@ -908,8 +925,8 @@ async function handleRequestAccess( requestedFlags: {}, screenshotFiltering: adapter.executor.capabilities.screenshotFiltering, tccState, - }; - await overrides.onPermissionRequest(req); + } + await overrides.onPermissionRequest(req) // Re-check: the user may have granted in System Settings while the // dialog was up. The `tccState` arg is a pre-dialog snapshot — reading @@ -917,42 +934,46 @@ async function handleRequestAccess( // granted, and the model waits for confirmation instead of retrying. // The renderer's TCC panel already live-polls (computerUseTccStore); // this is the same re-check on the tool-result side. - const recheck = await adapter.ensureOsPermissions(); + const recheck = await adapter.ensureOsPermissions() if (recheck.granted) { return errorResult( - "macOS Accessibility and Screen Recording are now both granted. " + - "Call request_access again immediately — the next call will show " + - "the app selection list.", - ); + 'macOS Accessibility and Screen Recording are now both granted. ' + + 'Call request_access again immediately — the next call will show ' + + 'the app selection list.', + ) } - const perms = recheck as { granted: false; accessibility: boolean; screenRecording: boolean }; - const missing: string[] = []; - if (!perms.accessibility) missing.push("Accessibility"); - if (!perms.screenRecording) missing.push("Screen Recording"); + const perms = recheck as { + granted: false + accessibility: boolean + screenRecording: boolean + } + const missing: string[] = [] + if (!perms.accessibility) missing.push('Accessibility') + if (!perms.screenRecording) missing.push('Screen Recording') return errorResult( - `macOS ${missing.join(" and ")} permission(s) not yet granted. ` + + `macOS ${missing.join(' and ')} permission(s) not yet granted. ` + `The permission panel has been shown. Once the user grants the ` + `missing permission(s), call request_access again.`, - "tcc_not_granted", - ); + 'tcc_not_granted', + ) } - const rawApps = args.apps; - if (!Array.isArray(rawApps) || !rawApps.every((a) => typeof a === "string")) { - return errorResult('"apps" must be an array of strings.', "bad_args"); + const rawApps = args.apps + if (!Array.isArray(rawApps) || !rawApps.every(a => typeof a === 'string')) { + return errorResult('"apps" must be an array of strings.', 'bad_args') } - const apps = rawApps as string[]; + const apps = rawApps as string[] - const requestedFlags: Partial = {}; - if (typeof args.clipboardRead === "boolean") { - requestedFlags.clipboardRead = args.clipboardRead; + const requestedFlags: Partial = {} + if (typeof args.clipboardRead === 'boolean') { + requestedFlags.clipboardRead = args.clipboardRead } - if (typeof args.clipboardWrite === "boolean") { - requestedFlags.clipboardWrite = args.clipboardWrite; + if (typeof args.clipboardWrite === 'boolean') { + requestedFlags.clipboardWrite = args.clipboardWrite } - if (typeof args.systemKeyCombos === "boolean") { - requestedFlags.systemKeyCombos = args.systemKeyCombos; + if (typeof args.systemKeyCombos === 'boolean') { + requestedFlags.systemKeyCombos = args.systemKeyCombos } const { @@ -968,14 +989,14 @@ async function handleRequestAccess( overrides.allowedApps, new Set(overrides.userDeniedBundleIds), overrides.selectedDisplayId, - ); + ) - let dialogGranted: AppGrant[] = []; + let dialogGranted: AppGrant[] = [] let dialogDenied: Array<{ - bundleId: string; - reason: "user_denied" | "not_installed"; - }> = []; - let dialogFlags: CuGrantFlags = overrides.grantFlags; + bundleId: string + reason: 'user_denied' | 'not_installed' + }> = [] + let dialogFlags: CuGrantFlags = overrides.grantFlags if (needDialog.length > 0 || Object.keys(requestedFlags).length > 0) { const req: CuPermissionRequest = { @@ -989,35 +1010,35 @@ async function handleRequestAccess( willHide, autoUnhideEnabled: adapter.getAutoUnhideEnabled(), }), - }; - const response = await overrides.onPermissionRequest(req); - dialogGranted = response.granted; - dialogDenied = response.denied; - dialogFlags = response.flags; + } + const response = await overrides.onPermissionRequest(req) + dialogGranted = response.granted + dialogDenied = response.denied + dialogFlags = response.flags } // Do NOT return display geometry or coordinateMode. See COORDINATES.md // ("Never give the model a number that invites rescaling"). scaleCoord // already transforms server-side; the coordinate convention is baked into // the tool param descriptions at server-construction time. - const allGranted = [...skipDialogGrants, ...dialogGranted]; + const allGranted = [...skipDialogGrants, ...dialogGranted] // Filter tieredApps to what was actually granted — if the user unchecked // Chrome in the dialog, don't explain Chrome's tier. - const grantedBundleIds = new Set(allGranted.map((g) => g.bundleId)); - const grantedTieredApps = tieredApps.filter((t) => + const grantedBundleIds = new Set(allGranted.map(g => g.bundleId)) + const grantedTieredApps = tieredApps.filter(t => grantedBundleIds.has(t.bundleId), - ); + ) // Best-effort — grants are already persisted by wrappedPermissionHandler; // a listDisplays/findWindowDisplays failure (monitor hot-unplug, NAPI // error) must not tank the grant response. Same discipline as // buildMonitorNote's listDisplays try/catch. - let windowLocations: Awaited> = []; + let windowLocations: Awaited> = [] try { - windowLocations = await buildWindowLocations(adapter, allGranted); + windowLocations = await buildWindowLocations(adapter, allGranted) } catch (e) { adapter.logger.warn( `[computer-use] buildWindowLocations failed: ${String(e)}`, - ); + ) } return okJson( { @@ -1057,7 +1078,7 @@ async function handleRequestAccess( denied_count: dialogDenied.length, ...tierAssignmentTelemetry(grantedTieredApps), }, - ); + ) } /** @@ -1070,35 +1091,35 @@ async function buildWindowLocations( granted: AppGrant[], ): Promise< Array<{ - bundleId: string; - displayName: string; - displays: Array<{ id: number; label?: string; isPrimary?: boolean }>; + bundleId: string + displayName: string + displays: Array<{ id: number; label?: string; isPrimary?: boolean }> }> > { - if (granted.length === 0) return []; + if (granted.length === 0) return [] - const displays = await adapter.executor.listDisplays(); - if (displays.length <= 1) return []; + const displays = await adapter.executor.listDisplays() + if (displays.length <= 1) return [] - const grantedBundleIds = granted.map((g) => g.bundleId); - const windowLocs = await adapter.executor.findWindowDisplays(grantedBundleIds); - const displayById = new Map(displays.map((d) => [d.displayId, d])); - const idsByBundle = new Map(windowLocs.map((w) => [w.bundleId, w.displayIds])); + const grantedBundleIds = granted.map(g => g.bundleId) + const windowLocs = await adapter.executor.findWindowDisplays(grantedBundleIds) + const displayById = new Map(displays.map(d => [d.displayId, d])) + const idsByBundle = new Map(windowLocs.map(w => [w.bundleId, w.displayIds])) - const out = []; + const out = [] for (const g of granted) { - const displayIds = idsByBundle.get(g.bundleId); - if (!displayIds || displayIds.length === 0) continue; + const displayIds = idsByBundle.get(g.bundleId) + if (!displayIds || displayIds.length === 0) continue out.push({ bundleId: g.bundleId, displayName: g.displayName, - displays: displayIds.map((id) => { - const d = displayById.get(id); - return { id, label: d?.label, isPrimary: d?.isPrimary }; + displays: displayIds.map(id => { + const d = displayById.get(id) + return { id, label: d?.label, isPrimary: d?.isPrimary } }), - }); + }) } - return out; + return out } /** @@ -1115,25 +1136,25 @@ async function buildWindowLocations( /** An app assigned a restricted tier (not `"full"`). Used to build the * guidance message telling the model what it can/can't do. */ interface TieredApp { - bundleId: string; - displayName: string; + bundleId: string + displayName: string /** Never `"full"` — only restricted tiers are collected. */ - tier: "read" | "click"; + tier: 'read' | 'click' } interface AccessRequestParts { - needDialog: ResolvedAppRequest[]; - skipDialogGrants: AppGrant[]; - willHide: Array<{ bundleId: string; displayName: string }>; + needDialog: ResolvedAppRequest[] + skipDialogGrants: AppGrant[] + willHide: Array<{ bundleId: string; displayName: string }> /** Resolved apps with `proposedTier !== "full"` — for the guidance text. * Unresolved apps are omitted (they go to `denied` with `not_installed`). */ - tieredApps: TieredApp[]; + tieredApps: TieredApp[] /** Apps stripped by the user's Settings auto-deny list. Surfaced in the * response with guidance; never reach the dialog. */ - userDenied: Array<{ requestedName: string; displayName: string }>; + userDenied: Array<{ requestedName: string; displayName: string }> /** Apps stripped by the baked-in policy blocklist (streaming/music/ebooks, * etc. — `deniedApps.isPolicyDenied`). Precedence over userDenied. */ - policyDenied: Array<{ requestedName: string; displayName: string }>; + policyDenied: Array<{ requestedName: string; displayName: string }> } async function buildAccessRequest( @@ -1143,22 +1164,21 @@ async function buildAccessRequest( userDeniedBundleIds: ReadonlySet, selectedDisplayId?: number, ): Promise { - const alreadyGranted = new Set(allowedApps.map((g) => g.bundleId)); - const installed = await adapter.executor.listInstalledApps(); - const resolved = resolveRequestedApps(apps, installed, alreadyGranted); + const alreadyGranted = new Set(allowedApps.map(g => g.bundleId)) + const installed = await adapter.executor.listInstalledApps() + const resolved = resolveRequestedApps(apps, installed, alreadyGranted) // Policy-level auto-deny (baked-in, not user-configurable). Stripped // before userDenied — checks bundle ID AND display name (covers // unresolved requests). Precedence: policy > user setting > tier. - const policyDenied: Array<{ requestedName: string; displayName: string }> = - []; - const afterPolicy: typeof resolved = []; + const policyDenied: Array<{ requestedName: string; displayName: string }> = [] + const afterPolicy: typeof resolved = [] for (const r of resolved) { - const displayName = r.resolved?.displayName ?? r.requestedName; + const displayName = r.resolved?.displayName ?? r.requestedName if (isPolicyDenied(r.resolved?.bundleId, displayName)) { - policyDenied.push({ requestedName: r.requestedName, displayName }); + policyDenied.push({ requestedName: r.requestedName, displayName }) } else { - afterPolicy.push(r); + afterPolicy.push(r) } } @@ -1168,16 +1188,16 @@ async function buildAccessRequest( // Bundle-ID match only (the Settings UI picks from installed apps, which // always have a bundle ID). Unresolved requests pass through to the tier // system; the user can't preemptively deny an app that isn't installed. - const userDenied: Array<{ requestedName: string; displayName: string }> = []; - const surviving: typeof afterPolicy = []; + const userDenied: Array<{ requestedName: string; displayName: string }> = [] + const surviving: typeof afterPolicy = [] for (const r of afterPolicy) { if (r.resolved && userDeniedBundleIds.has(r.resolved.bundleId)) { userDenied.push({ requestedName: r.requestedName, displayName: r.resolved.displayName, - }); + }) } else { - surviving.push(r); + surviving.push(r) } } @@ -1185,21 +1205,21 @@ async function buildAccessRequest( // Unresolved apps with a restricted tier (e.g. model asks for "Chrome" but // it's not installed) are omitted — they'll end up in the `denied` list // with reason "not_installed" and the model will see that instead. - const tieredApps: TieredApp[] = []; + const tieredApps: TieredApp[] = [] for (const r of surviving) { - if (r.proposedTier === "full" || !r.resolved) continue; + if (r.proposedTier === 'full' || !r.resolved) continue tieredApps.push({ bundleId: r.resolved.bundleId, displayName: r.resolved.displayName, tier: r.proposedTier, - }); + }) } // Idempotence: apps that are already granted skip the dialog and are // merged into the `granted` response. Existing grants keep their tier // (which may differ from the current proposedTier if policy changed). - const skipDialog = surviving.filter((r) => r.alreadyGranted); - const needDialog = surviving.filter((r) => !r.alreadyGranted); + const skipDialog = surviving.filter(r => r.alreadyGranted) + const needDialog = surviving.filter(r => !r.alreadyGranted) // Populate icons only for what the dialog will actually show. Sequential // awaits are fine — the Swift module is cached (listInstalledApps above @@ -1207,25 +1227,25 @@ async function buildAccessRequest( // memoizes by path. Failures leave iconDataUrl undefined; renderer falls // back to a grey box. for (const r of needDialog) { - if (!r.resolved) continue; + if (!r.resolved) continue try { r.resolved.iconDataUrl = await adapter.executor.getAppIcon( r.resolved.path, - ); + ) } catch { // leave undefined } } - const now = Date.now(); + const now = Date.now() const skipDialogGrants: AppGrant[] = skipDialog - .filter((r) => r.resolved) - .map((r) => { + .filter(r => r.resolved) + .map(r => { // Reuse the existing grant (preserving grantedAt + tier) rather than // synthesizing a new one — keeps Settings-page "Granted 3m ago" honest. const existing = allowedApps.find( - (g) => g.bundleId === r.resolved!.bundleId, - ); + g => g.bundleId === r.resolved!.bundleId, + ) return ( existing ?? { bundleId: r.resolved!.bundleId, @@ -1233,20 +1253,20 @@ async function buildAccessRequest( grantedAt: now, tier: r.proposedTier, } - ); - }); + ) + }) // Preview what will be hidden if the user approves exactly the requested // set plus what they already have. All tiers are visible, so everything // resolved goes in the exempt set. const exemptForPreview = [ - ...allowedApps.map((a) => a.bundleId), - ...surviving.filter((r) => r.resolved).map((r) => r.resolved!.bundleId), - ]; + ...allowedApps.map(a => a.bundleId), + ...surviving.filter(r => r.resolved).map(r => r.resolved!.bundleId), + ] const willHide = await adapter.executor.previewHideSet( exemptForPreview, selectedDisplayId, - ); + ) return { needDialog, @@ -1255,7 +1275,7 @@ async function buildAccessRequest( tieredApps, userDenied, policyDenied, - }; + } } /** @@ -1267,60 +1287,60 @@ function buildTierGuidanceMessage(tiered: TieredApp[]): string { // tier "read" is not category-unique — split so browsers get the CiC hint // and trading platforms get "ask the user" instead. const readBrowsers = tiered.filter( - (t) => - t.tier === "read" && - getDeniedCategoryForApp(t.bundleId, t.displayName) === "browser", - ); + t => + t.tier === 'read' && + getDeniedCategoryForApp(t.bundleId, t.displayName) === 'browser', + ) const readOther = tiered.filter( - (t) => - t.tier === "read" && - getDeniedCategoryForApp(t.bundleId, t.displayName) !== "browser", - ); - const clickTier = tiered.filter((t) => t.tier === "click"); + t => + t.tier === 'read' && + getDeniedCategoryForApp(t.bundleId, t.displayName) !== 'browser', + ) + const clickTier = tiered.filter(t => t.tier === 'click') - const parts: string[] = []; + const parts: string[] = [] if (readBrowsers.length > 0) { - const names = readBrowsers.map((b) => `"${b.displayName}"`).join(", "); + const names = readBrowsers.map(b => `"${b.displayName}"`).join(', ') parts.push( - `${names} ${readBrowsers.length === 1 ? "is a browser" : "are browsers"} — ` + + `${names} ${readBrowsers.length === 1 ? 'is a browser' : 'are browsers'} — ` + `granted at tier "read" (visible in screenshots only; no clicks or ` + `typing). You can read what's on screen but cannot navigate, click, ` + - `or type into ${readBrowsers.length === 1 ? "it" : "them"}. For browser ` + + `or type into ${readBrowsers.length === 1 ? 'it' : 'them'}. For browser ` + `interaction, use the Claude-in-Chrome MCP (tools named ` + `\`mcp__Claude_in_Chrome__*\`; load via ToolSearch if deferred).`, - ); + ) } if (readOther.length > 0) { - const names = readOther.map((t) => `"${t.displayName}"`).join(", "); + const names = readOther.map(t => `"${t.displayName}"`).join(', ') parts.push( - `${names} ${readOther.length === 1 ? "is" : "are"} granted at tier ` + + `${names} ${readOther.length === 1 ? 'is' : 'are'} granted at tier ` + `"read" (visible in screenshots only; no clicks or typing). You can ` + `read what's on screen but cannot interact. Ask the user to take any ` + - `actions in ${readOther.length === 1 ? "this app" : "these apps"} ` + + `actions in ${readOther.length === 1 ? 'this app' : 'these apps'} ` + `themselves.`, - ); + ) } if (clickTier.length > 0) { - const names = clickTier.map((t) => `"${t.displayName}"`).join(", "); + const names = clickTier.map(t => `"${t.displayName}"`).join(', ') parts.push( - `${names} ${clickTier.length === 1 ? "has" : "have"} terminal or IDE ` + + `${names} ${clickTier.length === 1 ? 'has' : 'have'} terminal or IDE ` + `capabilities — granted at tier "click" (visible + plain left-click ` + `only; NO typing, key presses, right-click, modifier-clicks, or ` + `drag-drop). You can click buttons and scroll output, but ` + - `${clickTier.length === 1 ? "its" : "their"} integrated terminal and ` + + `${clickTier.length === 1 ? 'its' : 'their'} integrated terminal and ` + `editor are off-limits to keyboard input. Right-click (context-menu ` + - `Paste) and dragging text onto ${clickTier.length === 1 ? "it" : "them"} ` + + `Paste) and dragging text onto ${clickTier.length === 1 ? 'it' : 'them'} ` + `require tier "full". For shell commands, use the Bash tool.`, - ); + ) } - if (parts.length === 0) return ""; + if (parts.length === 0) return '' // Same anti-subversion clause the gate errors carry — said upfront so the // model doesn't reach for osascript/cliclick after seeing "no clicks/typing". - return parts.join("\n\n") + TIER_ANTI_SUBVERSION; + return parts.join('\n\n') + TIER_ANTI_SUBVERSION } /** @@ -1332,16 +1352,16 @@ function buildTierGuidanceMessage(tiered: TieredApp[]): string { function buildUserDeniedGuidance( userDenied: Array<{ requestedName: string; displayName: string }>, ): string { - const names = userDenied.map((d) => `"${d.displayName}"`).join(", "); - const one = userDenied.length === 1; + const names = userDenied.map(d => `"${d.displayName}"`).join(', ') + const one = userDenied.length === 1 return ( - `${names} ${one ? "is" : "are"} in the user's auto-deny list ` + + `${names} ${one ? 'is' : 'are'} in the user's auto-deny list ` + `(Settings → Desktop app (General) → Computer Use → Denied apps). ` + `Requests for ` + - `${one ? "this app" : "these apps"} are automatically denied. If you need access for ` + - `this task, ask the user to remove ${one ? "it" : "them"} from their ` + + `${one ? 'this app' : 'these apps'} are automatically denied. If you need access for ` + + `this task, ask the user to remove ${one ? 'it' : 'them'} from their ` + `deny list in Settings — you cannot request this through the tool.` - ); + ) } /** @@ -1352,17 +1372,17 @@ function buildUserDeniedGuidance( function buildPolicyDeniedGuidance( policyDenied: Array<{ requestedName: string; displayName: string }>, ): string { - const names = policyDenied.map((d) => `"${d.displayName}"`).join(", "); - const one = policyDenied.length === 1; + const names = policyDenied.map(d => `"${d.displayName}"`).join(', ') + const one = policyDenied.length === 1 return ( - `${names} ${one ? "is" : "are"} blocked by policy for computer use. ` + - `Requests for ${one ? "this app" : "these apps"} are automatically ` + + `${names} ${one ? 'is' : 'are'} blocked by policy for computer use. ` + + `Requests for ${one ? 'this app' : 'these apps'} are automatically ` + `denied regardless of what the user has approved. There is no Settings ` + `override. Inform the user that you cannot access ` + - `${one ? "this app" : "these apps"} and suggest an alternative ` + + `${one ? 'this app' : 'these apps'} and suggest an alternative ` + `approach if one exists. Do not try to directly subvert this block ` + `regardless of the user's request.` - ); + ) } /** @@ -1371,16 +1391,16 @@ function buildPolicyDeniedGuidance( */ function tierAssignmentTelemetry( tiered: TieredApp[], -): Pick { +): Pick { // `denied_browser_count` now counts ALL tier-"read" grants (browsers + // trading). The field name was already legacy-only before trading existed // (dashboards read it as "non-full tier"), so no new column. - const browserCount = tiered.filter((t) => t.tier === "read").length; - const terminalCount = tiered.filter((t) => t.tier === "click").length; + const browserCount = tiered.filter(t => t.tier === 'read').length + const terminalCount = tiered.filter(t => t.tier === 'click').length return { ...(browserCount > 0 && { denied_browser_count: browserCount }), ...(terminalCount > 0 && { denied_terminal_count: terminalCount }), - }; + } } /** @@ -1402,9 +1422,9 @@ async function handleRequestTeachAccess( ): Promise { if (!overrides.onTeachPermissionRequest) { return errorResult( - "Teach mode is not available in this session.", - "feature_unavailable", - ); + 'Teach mode is not available in this session.', + 'feature_unavailable', + ) } // Same as handleRequestAccess above — the dialog renders in the hidden @@ -1413,13 +1433,13 @@ async function handleRequestTeachAccess( // mid-session to add more apps" and this uses the same grant model. if (overrides.getTeachModeActive?.()) { return errorResult( - "Teach mode is already active. To add more apps, end the current tour first, then call request_teach_access again with the full app list.", - "teach_mode_conflict", - ); + 'Teach mode is already active. To add more apps, end the current tour first, then call request_teach_access again with the full app list.', + 'teach_mode_conflict', + ) } - const reason = requireString(args, "reason"); - if (reason instanceof Error) return errorResult(reason.message, "bad_args"); + const reason = requireString(args, 'reason') + if (reason instanceof Error) return errorResult(reason.message, 'bad_args') // TCC-ungranted branch — identical to handleRequestAccess's. The renderer // shows the same TCC toggle panel regardless of which request tool got here. @@ -1430,37 +1450,41 @@ async function handleRequestTeachAccess( apps: [], screenshotFiltering: adapter.executor.capabilities.screenshotFiltering, tccState, - }; - await overrides.onTeachPermissionRequest(req); + } + await overrides.onTeachPermissionRequest(req) // Same re-check as handleRequestAccess — user may have granted while the // dialog was up, and the pre-dialog snapshot would mislead the model. - const recheck = await adapter.ensureOsPermissions(); + const recheck = await adapter.ensureOsPermissions() if (recheck.granted) { return errorResult( - "macOS Accessibility and Screen Recording are now both granted. " + - "Call request_teach_access again immediately — the next call will " + - "show the app selection list.", - ); + 'macOS Accessibility and Screen Recording are now both granted. ' + + 'Call request_teach_access again immediately — the next call will ' + + 'show the app selection list.', + ) } - const perms = recheck as { granted: false; accessibility: boolean; screenRecording: boolean }; - const missing: string[] = []; - if (!perms.accessibility) missing.push("Accessibility"); - if (!perms.screenRecording) missing.push("Screen Recording"); + const perms = recheck as { + granted: false + accessibility: boolean + screenRecording: boolean + } + const missing: string[] = [] + if (!perms.accessibility) missing.push('Accessibility') + if (!perms.screenRecording) missing.push('Screen Recording') return errorResult( - `macOS ${missing.join(" and ")} permission(s) not yet granted. ` + + `macOS ${missing.join(' and ')} permission(s) not yet granted. ` + `The permission panel has been shown. Once the user grants the ` + `missing permission(s), call request_teach_access again.`, - "tcc_not_granted", - ); + 'tcc_not_granted', + ) } - const rawApps = args.apps; - if (!Array.isArray(rawApps) || !rawApps.every((a) => typeof a === "string")) { - return errorResult('"apps" must be an array of strings.', "bad_args"); + const rawApps = args.apps + if (!Array.isArray(rawApps) || !rawApps.every(a => typeof a === 'string')) { + return errorResult('"apps" must be an array of strings.', 'bad_args') } - const apps = rawApps as string[]; + const apps = rawApps as string[] const { needDialog, @@ -1475,7 +1499,7 @@ async function handleRequestTeachAccess( overrides.allowedApps, new Set(overrides.userDeniedBundleIds), overrides.selectedDisplayId, - ); + ) // All requested apps were user-denied (or unresolvable) and none pre-granted // — skip the dialog entirely. Without this, onTeachPermissionRequest fires @@ -1505,7 +1529,7 @@ async function handleRequestTeachAccess( screenshotFiltering: adapter.executor.capabilities.screenshotFiltering, }, { granted_count: 0, denied_count: 0 }, - ); + ) } const req: CuTeachPermissionRequest = { @@ -1517,25 +1541,25 @@ async function handleRequestTeachAccess( willHide, autoUnhideEnabled: adapter.getAutoUnhideEnabled(), }), - }; - const response = await overrides.onTeachPermissionRequest(req); + } + const response = await overrides.onTeachPermissionRequest(req) - const granted = [...skipDialogGrants, ...response.granted]; + const granted = [...skipDialogGrants, ...response.granted] // Gate on explicit dialog consent, NOT on merged grant length. // skipDialogGrants are pre-existing idempotent app grants — they don't // imply the user said yes to THIS dialog. Without the userConsented // check, Deny would still activate teach mode whenever any requested // app was previously granted (worst case: needDialog=[] → Allow and // Deny payloads are structurally identical). - const teachModeActive = response.userConsented === true && granted.length > 0; + const teachModeActive = response.userConsented === true && granted.length > 0 if (teachModeActive) { - overrides.onTeachModeActivated?.(); + overrides.onTeachModeActivated?.() } - const grantedBundleIds = new Set(granted.map((g) => g.bundleId)); - const grantedTieredApps = tieredApps.filter((t) => + const grantedBundleIds = new Set(granted.map(g => g.bundleId)) + const grantedTieredApps = tieredApps.filter(t => grantedBundleIds.has(t.bundleId), - ); + ) return okJson( { @@ -1566,7 +1590,7 @@ async function handleRequestTeachAccess( denied_count: response.denied.length, ...tierAssignmentTelemetry(grantedTieredApps), }, - ); + ) } // --------------------------------------------------------------------------- @@ -1575,10 +1599,10 @@ async function handleRequestTeachAccess( /** A fully-validated teach step, anchor already scaled to logical points. */ interface ValidatedTeachStep { - explanation: string; - nextPreview: string; - anchorLogical: TeachStepRequest["anchorLogical"]; - actions: Array>; + explanation: string + nextPreview: string + anchorLogical: TeachStepRequest['anchorLogical'] + actions: Array> } /** @@ -1600,55 +1624,53 @@ async function validateTeachStepArgs( overrides: ComputerUseOverrides, label: string, ): Promise { - const explanation = requireString(raw, "explanation"); + const explanation = requireString(raw, 'explanation') if (explanation instanceof Error) { - return new Error(`${label}: ${explanation.message}`); + return new Error(`${label}: ${explanation.message}`) } - const nextPreview = requireString(raw, "next_preview"); + const nextPreview = requireString(raw, 'next_preview') if (nextPreview instanceof Error) { - return new Error(`${label}: ${nextPreview.message}`); + return new Error(`${label}: ${nextPreview.message}`) } - const actions = raw.actions; + const actions = raw.actions if (!Array.isArray(actions)) { - return new Error( - `${label}: "actions" must be an array (empty is allowed).`, - ); + return new Error(`${label}: "actions" must be an array (empty is allowed).`) } for (const [i, act] of actions.entries()) { - if (typeof act !== "object" || act === null) { - return new Error(`${label}: actions[${i}] must be an object`); + if (typeof act !== 'object' || act === null) { + return new Error(`${label}: actions[${i}] must be an object`) } - const action = (act as Record).action; - if (typeof action !== "string") { - return new Error(`${label}: actions[${i}].action must be a string`); + const action = (act as Record).action + if (typeof action !== 'string') { + return new Error(`${label}: actions[${i}].action must be a string`) } if (!BATCHABLE_ACTIONS.has(action)) { return new Error( `${label}: actions[${i}].action="${action}" is not allowed. ` + - `Allowed: ${[...BATCHABLE_ACTIONS].join(", ")}.`, - ); + `Allowed: ${[...BATCHABLE_ACTIONS].join(', ')}.`, + ) } } - let anchorLogical: TeachStepRequest["anchorLogical"]; + let anchorLogical: TeachStepRequest['anchorLogical'] if (raw.anchor !== undefined) { - const anchor = raw.anchor; + const anchor = raw.anchor if ( !Array.isArray(anchor) || anchor.length !== 2 || - typeof anchor[0] !== "number" || - typeof anchor[1] !== "number" || + typeof anchor[0] !== 'number' || + typeof anchor[1] !== 'number' || !Number.isFinite(anchor[0]) || !Number.isFinite(anchor[1]) ) { return new Error( `${label}: "anchor" must be a [x, y] number tuple or omitted.`, - ); + ) } const display = await adapter.executor.getDisplaySize( overrides.selectedDisplayId, - ); + ) anchorLogical = scaleCoord( anchor[0], anchor[1], @@ -1656,7 +1678,7 @@ async function validateTeachStepArgs( display, overrides.lastScreenshot, adapter.logger, - ); + ) } return { @@ -1664,23 +1686,23 @@ async function validateTeachStepArgs( nextPreview, anchorLogical, actions: actions as Array>, - }; + } } /** Outcome of showing one tooltip + running its actions. */ type TeachStepOutcome = - | { kind: "exit" } - | { kind: "ok"; results: BatchActionResult[] } + | { kind: 'exit' } + | { kind: 'ok'; results: BatchActionResult[] } | { - kind: "action_error"; - executed: number; - failed: BatchActionResult; - remaining: number; + kind: 'action_error' + executed: number + failed: BatchActionResult + remaining: number /** The inner action's telemetry (error_kind), forwarded so the * caller can pass it to okJson and keep cu_tool_call accurate * when the failure happened inside a batch. */ - telemetry: CuCallTelemetry | undefined; - }; + telemetry: CuCallTelemetry | undefined + } /** * Show the tooltip, block for Next/Exit, run actions on Next. @@ -1706,30 +1728,30 @@ async function executeTeachStep( explanation: step.explanation, nextPreview: step.nextPreview, anchorLogical: step.anchorLogical, - }); + }) - if (stepResult.action === "exit") { + if (stepResult.action === 'exit') { // The host's Exit handler also calls stopSession, so the turn is // already unwinding. Caller decides what to return for the transcript. // A PREVIOUS step's left_mouse_down may have left the OS button held. - await releaseHeldMouse(adapter); - return { kind: "exit" }; + await releaseHeldMouse(adapter) + return { kind: 'exit' } } // Next clicked. Flip overlay to spinner before we start driving. - overrides.onTeachWorking?.(); + overrides.onTeachWorking?.() if (step.actions.length === 0) { - return { kind: "ok", results: [] }; + return { kind: 'ok', results: [] } } if (subGates.hideBeforeAction) { const hidden = await adapter.executor.prepareForAction( - overrides.allowedApps.map((a) => a.bundleId), + overrides.allowedApps.map(a => a.bundleId), overrides.selectedDisplayId, - ); + ) if (hidden.length > 0) { - overrides.onAppsHidden?.(hidden); + overrides.onAppsHidden?.(hidden) } } @@ -1740,20 +1762,20 @@ async function executeTeachStep( // Anchors are pre-computed against the display at batch start. // A mid-batch resolver switch would break tooltip positioning. autoTargetDisplay: false, - }; + } - const results: BatchActionResult[] = []; + const results: BatchActionResult[] = [] for (const [i, act] of step.actions.entries()) { // Same abort check as handleComputerBatch — Exit calls stopSession so // this IS the exit path, just caught mid-dispatch instead of at the // onTeachStep await above. Callers already handle { kind: "exit" }. if (overrides.isAborted?.()) { - await releaseHeldMouse(adapter); - return { kind: "exit" }; + await releaseHeldMouse(adapter) + return { kind: 'exit' } } // Same inter-step settle as handleComputerBatch. - if (i > 0) await sleep(10); - const action = act.action as string; + if (i > 0) await sleep(10) + const action = act.action as string // Drop mid-step screenshot piggyback — same invariant as computer_batch. // Click coords stay anchored to the screenshot the model took BEFORE @@ -1764,25 +1786,25 @@ async function executeTeachStep( adapter, overrides, stepSubGates, - ); + ) - const text = firstTextContent(inner); - const result = { action, ok: !inner.isError, output: text }; - results.push(result); + const text = firstTextContent(inner) + const result = { action, ok: !inner.isError, output: text } + results.push(result) if (inner.isError) { - await releaseHeldMouse(adapter); + await releaseHeldMouse(adapter) return { - kind: "action_error", + kind: 'action_error', executed: results.length - 1, failed: result, remaining: step.actions.length - results.length, telemetry: inner.telemetry, - }; + } } } - return { kind: "ok", results }; + return { kind: 'ok', results } } /** @@ -1801,16 +1823,16 @@ async function appendTeachScreenshot( overrides: ComputerUseOverrides, subGates: CuSubGates, ): Promise { - const shotResult = await handleScreenshot(adapter, overrides, subGates); + const shotResult = await handleScreenshot(adapter, overrides, subGates) if (shotResult.isError) { // Hide+screenshot failed (rare — e.g. SCContentFilter error). Don't // tank the step; just omit the image. Model will call screenshot // itself and see the real error. - return okJson(resultJson); + return okJson(resultJson) } return { content: [ - { type: "text", text: JSON.stringify(resultJson) }, + { type: 'text', text: JSON.stringify(resultJson) }, // handleScreenshot's content is [maybeMonitorNote, maybeHiddenNote, // image]. Spread all — both notes are useful context and the model // expects them alongside screenshots. @@ -1818,7 +1840,7 @@ async function appendTeachScreenshot( ], // For serverDef.ts to stash. Next teach_step.anchor scales against this. screenshot: shotResult.screenshot, - }; + } } /** @@ -1833,25 +1855,25 @@ async function handleTeachStep( ): Promise { if (!overrides.onTeachStep) { return errorResult( - "Teach mode is not active. Call request_teach_access first.", - "teach_mode_not_active", - ); + 'Teach mode is not active. Call request_teach_access first.', + 'teach_mode_not_active', + ) } const step = await validateTeachStepArgs( args, adapter, overrides, - "teach_step", - ); - if (step instanceof Error) return errorResult(step.message, "bad_args"); + 'teach_step', + ) + if (step instanceof Error) return errorResult(step.message, 'bad_args') - const outcome = await executeTeachStep(step, adapter, overrides, subGates); + const outcome = await executeTeachStep(step, adapter, overrides, subGates) - if (outcome.kind === "exit") { - return okJson({ exited: true }); + if (outcome.kind === 'exit') { + return okJson({ exited: true }) } - if (outcome.kind === "action_error") { + if (outcome.kind === 'action_error') { return okJson( { executed: outcome.executed, @@ -1859,20 +1881,20 @@ async function handleTeachStep( remaining: outcome.remaining, }, outcome.telemetry, - ); + ) } // ok. No screenshot for empty actions — screen didn't change, model's // existing screenshot is still accurate. if (step.actions.length === 0) { - return okJson({ executed: 0, results: [] }); + return okJson({ executed: 0, results: [] }) } return appendTeachScreenshot( { executed: outcome.results.length, results: outcome.results }, adapter, overrides, subGates, - ); + ) } /** @@ -1901,40 +1923,40 @@ async function handleTeachBatch( ): Promise { if (!overrides.onTeachStep) { return errorResult( - "Teach mode is not active. Call request_teach_access first.", - "teach_mode_not_active", - ); + 'Teach mode is not active. Call request_teach_access first.', + 'teach_mode_not_active', + ) } - const rawSteps = args.steps; + const rawSteps = args.steps if (!Array.isArray(rawSteps) || rawSteps.length < 1) { - return errorResult('"steps" must be a non-empty array.', "bad_args"); + return errorResult('"steps" must be a non-empty array.', 'bad_args') } // Validate upfront — fail fast before showing any tooltip. - const steps: ValidatedTeachStep[] = []; + const steps: ValidatedTeachStep[] = [] for (const [i, raw] of rawSteps.entries()) { - if (typeof raw !== "object" || raw === null) { - return errorResult(`steps[${i}] must be an object`, "bad_args"); + if (typeof raw !== 'object' || raw === null) { + return errorResult(`steps[${i}] must be an object`, 'bad_args') } const v = await validateTeachStepArgs( raw as Record, adapter, overrides, `steps[${i}]`, - ); - if (v instanceof Error) return errorResult(v.message, "bad_args"); - steps.push(v); + ) + if (v instanceof Error) return errorResult(v.message, 'bad_args') + steps.push(v) } - const allResults: BatchActionResult[][] = []; + const allResults: BatchActionResult[][] = [] for (const [i, step] of steps.entries()) { - const outcome = await executeTeachStep(step, adapter, overrides, subGates); + const outcome = await executeTeachStep(step, adapter, overrides, subGates) - if (outcome.kind === "exit") { - return okJson({ exited: true, stepsCompleted: i }); + if (outcome.kind === 'exit') { + return okJson({ exited: true, stepsCompleted: i }) } - if (outcome.kind === "action_error") { + if (outcome.kind === 'action_error') { return okJson( { stepsCompleted: i, @@ -1945,18 +1967,18 @@ async function handleTeachBatch( results: allResults, }, outcome.telemetry, - ); + ) } - allResults.push(outcome.results); + allResults.push(outcome.results) } // Final screenshot only if any step ran actions (screen changed). - const screenChanged = steps.some((s) => s.actions.length > 0); - const resultJson = { stepsCompleted: steps.length, results: allResults }; + const screenChanged = steps.some(s => s.actions.length > 0) + const resultJson = { stepsCompleted: steps.length, results: allResults } if (!screenChanged) { - return okJson(resultJson); + return okJson(resultJson) } - return appendTeachScreenshot(resultJson, adapter, overrides, subGates); + return appendTeachScreenshot(resultJson, adapter, overrides, subGates) } /** @@ -1968,18 +1990,18 @@ async function buildHiddenNote( adapter: ComputerUseHostAdapter, hiddenSinceLastSeen: string[], ): Promise { - if (hiddenSinceLastSeen.length === 0) return undefined; - const running = await adapter.executor.listRunningApps(); - const nameOf = new Map(running.map((a) => [a.bundleId, a.displayName])); - const names = hiddenSinceLastSeen.map((id) => nameOf.get(id) ?? id); - const list = names.map((n) => `"${n}"`).join(", "); - const one = names.length === 1; + if (hiddenSinceLastSeen.length === 0) return undefined + const running = await adapter.executor.listRunningApps() + const nameOf = new Map(running.map(a => [a.bundleId, a.displayName])) + const names = hiddenSinceLastSeen.map(id => nameOf.get(id) ?? id) + const list = names.map(n => `"${n}"`).join(', ') + const one = names.length === 1 return ( - `${list} ${one ? "was" : "were"} open and got hidden before this screenshot ` + + `${list} ${one ? 'was' : 'were'} open and got hidden before this screenshot ` + `(not in the session allowlist). If a previous action was meant to open ` + - `${one ? "it" : "one of them"}, that's why you don't see it — call ` + - `request_access to add ${one ? "it" : "them"} to the allowlist.` - ); + `${one ? 'it' : 'one of them'}, that's why you don't see it — call ` + + `request_access to add ${one ? 'it' : 'them'} to the allowlist.` + ) } /** @@ -1996,16 +2018,16 @@ function uniqueDisplayLabels( // NSScreen.screens iteration order — same label always maps to same // physical display across buildMonitorNote → switch_display round-trip, // even if display configuration reorders between the two calls. - const sorted = [...displays].sort((a, b) => a.displayId - b.displayId); - const counts = new Map(); - const out = new Map(); + const sorted = [...displays].sort((a, b) => a.displayId - b.displayId) + const counts = new Map() + const out = new Map() for (const d of sorted) { - const base = d.label ?? `display ${d.displayId}`; - const n = (counts.get(base) ?? 0) + 1; - counts.set(base, n); - out.set(d.displayId, n === 1 ? base : `${base} (${n})`); + const base = d.label ?? `display ${d.displayId}` + const n = (counts.get(base) ?? 0) + 1 + counts.set(base, n) + out.set(d.displayId, n === 1 ? base : `${base} (${n})`) } - return out; + return out } /** @@ -2025,45 +2047,45 @@ async function buildMonitorNote( ): Promise { // listDisplays failure (e.g. Swift returns zero screens during monitor // hot-unplug) must not tank the screenshot — this note is optional context. - let displays; + let displays try { - displays = await adapter.executor.listDisplays(); + displays = await adapter.executor.listDisplays() } catch (e) { - adapter.logger.warn(`[computer-use] listDisplays failed: ${String(e)}`); - return undefined; + adapter.logger.warn(`[computer-use] listDisplays failed: ${String(e)}`) + return undefined } - if (displays.length < 2) return undefined; + if (displays.length < 2) return undefined - const labels = uniqueDisplayLabels(displays); - const nameOf = (id: number): string => labels.get(id) ?? `display ${id}`; + const labels = uniqueDisplayLabels(displays) + const nameOf = (id: number): string => labels.get(id) ?? `display ${id}` - const current = nameOf(shotDisplayId); + const current = nameOf(shotDisplayId) const others = displays - .filter((d) => d.displayId !== shotDisplayId) - .map((d) => nameOf(d.displayId)); + .filter(d => d.displayId !== shotDisplayId) + .map(d => nameOf(d.displayId)) const switchHint = canSwitchDisplay - ? " Use switch_display to capture a different monitor." - : ""; + ? ' Use switch_display to capture a different monitor.' + : '' const othersList = others.length > 0 - ? ` Other attached monitors: ${others.map((n) => `"${n}"`).join(", ")}.` + + ? ` Other attached monitors: ${others.map(n => `"${n}"`).join(', ')}.` + switchHint - : ""; + : '' // 0 is kCGNullDirectDisplay (sentinel from old sessions persisted // pre-multimon) — treat same as undefined. if (lastDisplayId === undefined || lastDisplayId === 0) { - return `This screenshot was taken on monitor "${current}".` + othersList; + return `This screenshot was taken on monitor "${current}".` + othersList } if (lastDisplayId !== shotDisplayId) { - const prev = nameOf(lastDisplayId); + const prev = nameOf(lastDisplayId) return ( `This screenshot was taken on monitor "${current}", which is different ` + `from your previous screenshot (taken on "${prev}").` + othersList - ); + ) } - return undefined; + return undefined } async function handleScreenshot( @@ -2074,9 +2096,9 @@ async function handleScreenshot( // §2 — empty allowlist → tool error, no screenshot. if (overrides.allowedApps.length === 0) { return errorResult( - "No applications are granted for this session. Call request_access first.", - "allowlist_empty", - ); + 'No applications are granted for this session. Call request_access first.', + 'allowlist_empty', + ) } // Atomic resolve→prepare→capture (one Swift call, no scheduler gap). @@ -2087,10 +2109,10 @@ async function handleScreenshot( // Otherwise sticky display: only auto-resolve when the allowed-app // set has changed since the display was last resolved. Prevents the // resolver yanking the display on every screenshot. - const allowedBundleIds = overrides.allowedApps.map((a) => a.bundleId); - const currentAppSetKey = allowedBundleIds.slice().sort().join(","); - const appSetChanged = currentAppSetKey !== overrides.displayResolvedForApps; - const autoResolve = !overrides.displayPinnedByModel && appSetChanged; + const allowedBundleIds = overrides.allowedApps.map(a => a.bundleId) + const currentAppSetKey = allowedBundleIds.slice().sort().join(',') + const appSetChanged = currentAppSetKey !== overrides.displayResolvedForApps + const autoResolve = !overrides.displayPinnedByModel && appSetChanged const result = await adapter.executor.resolvePrepareCapture({ allowedBundleIds, @@ -2100,7 +2122,7 @@ async function handleScreenshot( // atomic path honors the same toggle the non-atomic path checks // at the prepareForAction call site. doHide: subGates.hideBeforeAction, - }); + }) // Non-atomic path's takeScreenshotWithRetry has a MIN_SCREENSHOT_BYTES // check + retry. The atomic call is expensive (resolve+prepare+capture), @@ -2113,7 +2135,7 @@ async function handleScreenshot( ) { adapter.logger.warn( `[computer-use] resolvePrepareCapture result implausibly small (${decodedByteLength(result.base64)} bytes decoded) — possible transient display state`, - ); + ) } // Resolver picked a different display than the session had selected @@ -2123,34 +2145,34 @@ async function handleScreenshot( if (result.displayId !== overrides.selectedDisplayId) { adapter.logger.debug( `[computer-use] resolver: preferred=${overrides.selectedDisplayId} resolved=${result.displayId}`, - ); - overrides.onResolvedDisplayUpdated?.(result.displayId); + ) + overrides.onResolvedDisplayUpdated?.(result.displayId) } // Record the app set this display was resolved for, so the next // screenshot skips auto-resolve until the set changes again. Gated on // autoResolve (not just appSetChanged) — when pinned, we didn't // actually resolve, so don't update the key. if (autoResolve) { - overrides.onDisplayResolvedForApps?.(currentAppSetKey); + overrides.onDisplayResolvedForApps?.(currentAppSetKey) } // Report hidden apps only when the model has already seen the screen. - let hiddenSinceLastSeen: string[] = []; + let hiddenSinceLastSeen: string[] = [] if (overrides.lastScreenshot !== undefined) { - hiddenSinceLastSeen = result.hidden; + hiddenSinceLastSeen = result.hidden } if (result.hidden.length > 0) { - overrides.onAppsHidden?.(result.hidden); + overrides.onAppsHidden?.(result.hidden) } // Partial-success case: hide succeeded, capture failed (SCK perm // revoked mid-session). onAppsHidden fired above so auto-unhide will // restore hidden apps at turn end. Now surface the error to the model. if (result.captureError !== undefined) { - return errorResult(result.captureError, "capture_failed"); + return errorResult(result.captureError, 'capture_failed') } - const hiddenNote = await buildHiddenNote(adapter, hiddenSinceLastSeen); + const hiddenNote = await buildHiddenNote(adapter, hiddenSinceLastSeen) // Cherry-pick — don't spread `result` (would leak resolver fields into lastScreenshot). const shot: ScreenshotResult = { @@ -2162,41 +2184,48 @@ async function handleScreenshot( displayId: result.displayId, originX: result.originX, originY: result.originY, - }; + } const monitorNote = await buildMonitorNote( adapter, shot.displayId ?? 0, overrides.lastScreenshot?.displayId, overrides.onDisplayPinned !== undefined, - ); + ) return { content: [ - ...(monitorNote ? [{ type: "text" as const, text: monitorNote }] : []), - ...(hiddenNote ? [{ type: "text" as const, text: hiddenNote }] : []), + ...(monitorNote ? [{ type: 'text' as const, text: monitorNote }] : []), + ...(hiddenNote ? [{ type: 'text' as const, text: hiddenNote }] : []), // Accessibility snapshot: structured GUI element tree (Windows bound-window mode) - ...(shot.accessibilityText ? [{ type: "text" as const, text: `GUI elements in this window:\n${shot.accessibilityText}` }] : []), + ...(shot.accessibilityText + ? [ + { + type: 'text' as const, + text: `GUI elements in this window:\n${shot.accessibilityText}`, + }, + ] + : []), { - type: "image", + type: 'image', data: shot.base64, mimeType: detectMimeFromBase64(shot.base64), }, ], screenshot: shot, - }; + } } // Same hide+defocus sequence as input actions. Screenshot needs hide too // — if a non-allowlisted app is on top, SCContentFilter would composite it // out, but the pixels BELOW it are what the model would see, and those are // NOT what's actually there. Hiding first makes the screenshot TRUE. - let hiddenSinceLastSeen: string[] = []; + let hiddenSinceLastSeen: string[] = [] if (subGates.hideBeforeAction) { const hidden = await adapter.executor.prepareForAction( - overrides.allowedApps.map((a) => a.bundleId), + overrides.allowedApps.map(a => a.bundleId), overrides.selectedDisplayId, - ); + ) // "Something appeared since the model last looked." Report whenever: // (a) prepare hid something AND // (b) the model has ALREADY SEEN the screen (lastScreenshot is set). @@ -2216,45 +2245,52 @@ async function handleScreenshot( // Rare, and "Safari appeared" is at worst mild noise — far better than // the false-negative of never explaining why the file vanished. if (overrides.lastScreenshot !== undefined) { - hiddenSinceLastSeen = hidden; + hiddenSinceLastSeen = hidden } if (hidden.length > 0) { - overrides.onAppsHidden?.(hidden); + overrides.onAppsHidden?.(hidden) } } - const allowedBundleIds = overrides.allowedApps.map((g) => g.bundleId); + const allowedBundleIds = overrides.allowedApps.map(g => g.bundleId) const shot = await takeScreenshotWithRetry( adapter.executor, allowedBundleIds, adapter.logger, overrides.selectedDisplayId, - ); + ) - const hiddenNote = await buildHiddenNote(adapter, hiddenSinceLastSeen); + const hiddenNote = await buildHiddenNote(adapter, hiddenSinceLastSeen) const monitorNote = await buildMonitorNote( adapter, shot.displayId ?? 0, overrides.lastScreenshot?.displayId, overrides.onDisplayPinned !== undefined, - ); + ) return { content: [ - ...(monitorNote ? [{ type: "text" as const, text: monitorNote }] : []), - ...(hiddenNote ? [{ type: "text" as const, text: hiddenNote }] : []), + ...(monitorNote ? [{ type: 'text' as const, text: monitorNote }] : []), + ...(hiddenNote ? [{ type: 'text' as const, text: hiddenNote }] : []), // Accessibility snapshot: structured GUI element tree (Windows bound-window mode) - ...(shot.accessibilityText ? [{ type: "text" as const, text: `GUI elements in this window:\n${shot.accessibilityText}` }] : []), + ...(shot.accessibilityText + ? [ + { + type: 'text' as const, + text: `GUI elements in this window:\n${shot.accessibilityText}`, + }, + ] + : []), { - type: "image", + type: 'image', data: shot.base64, mimeType: detectMimeFromBase64(shot.base64), }, ], // Piggybacked for serverDef.ts to stash on InternalServerContext. screenshot: shot, - }; + } } /** @@ -2273,63 +2309,66 @@ async function handleZoom( ): Promise { // region: [x0, y0, x1, y1] in IMAGE-PX of lastScreenshot — same space the // model reads click coords from. - const region = args.region; + const region = args.region if (!Array.isArray(region) || region.length !== 4) { return errorResult( - "region must be an array of length 4: [x0, y0, x1, y1]", - "bad_args", - ); + 'region must be an array of length 4: [x0, y0, x1, y1]', + 'bad_args', + ) } - const [x0, y0, x1, y1] = region; - if (![x0, y0, x1, y1].every((v) => typeof v === "number" && v >= 0)) { - return errorResult( - "region values must be non-negative numbers", - "bad_args", - ); + const [x0, y0, x1, y1] = region + if (![x0, y0, x1, y1].every(v => typeof v === 'number' && v >= 0)) { + return errorResult('region values must be non-negative numbers', 'bad_args') } if (x1 <= x0) - return errorResult("region x1 must be greater than x0", "bad_args"); + return errorResult('region x1 must be greater than x0', 'bad_args') if (y1 <= y0) - return errorResult("region y1 must be greater than y0", "bad_args"); + return errorResult('region y1 must be greater than y0', 'bad_args') - const last = overrides.lastScreenshot; + const last = overrides.lastScreenshot if (!last) { return errorResult( - "take a screenshot before zooming (region coords are relative to it)", - "state_conflict", - ); + 'take a screenshot before zooming (region coords are relative to it)', + 'state_conflict', + ) } if (x1 > last.width || y1 > last.height) { return errorResult( `region exceeds screenshot bounds (${last.width}×${last.height})`, - "bad_args", - ); + 'bad_args', + ) } // image-px → logical-pt. Same ratio as scaleCoord (:198-199) — // displayWidth / width, not 1/scaleFactor. The ratio is folded. - const ratioX = last.displayWidth / last.width; - const ratioY = last.displayHeight / last.height; + const ratioX = last.displayWidth / last.width + const ratioY = last.displayHeight / last.height const regionLogical = { x: x0 * ratioX, y: y0 * ratioY, w: (x1 - x0) * ratioX, h: (y1 - y0) * ratioY, - }; + } - const allowedIds = overrides.allowedApps.map((g) => g.bundleId); + const allowedIds = overrides.allowedApps.map(g => g.bundleId) // Crop from the same display as lastScreenshot so the zoom region // matches the image the model is reading coords from. const zoomed = await adapter.executor.zoom( regionLogical, allowedIds, last.displayId, - ); + ) // Return the image. NO `.screenshot` piggyback — this is the invariant. return { - content: [{ type: "image", data: zoomed.base64, mimeType: detectMimeFromBase64(zoomed.base64) }], - }; + content: [ + { + type: 'image', + data: zoomed.base64, + mimeType: detectMimeFromBase64(zoomed.base64), + }, + ], + } } /** Shared handler for all five click variants. */ @@ -2338,7 +2377,7 @@ async function handleClickVariant( args: Record, overrides: ComputerUseOverrides, subGates: CuSubGates, - button: "left" | "right" | "middle", + button: 'left' | 'right' | 'middle', count: 1 | 2 | 3, ): Promise { // A prior left_mouse_down may have set mouseButtonHeld without a matching @@ -2350,21 +2389,21 @@ async function handleClickVariant( // click-tier and read-tier windows. Release first so click() gets a clean // slate. if (mouseButtonHeld) { - await adapter.executor.mouseUp(); - mouseButtonHeld = false; - mouseMoved = false; + await adapter.executor.mouseUp() + mouseButtonHeld = false + mouseMoved = false } - const coord = extractCoordinate(args); - if (coord instanceof Error) return errorResult(coord.message, "bad_args"); - const [rawX, rawY] = coord; + const coord = extractCoordinate(args) + if (coord instanceof Error) return errorResult(coord.message, 'bad_args') + const [rawX, rawY] = coord // left_click(coordinate=[x,y], text="shift") — hold modifiers // during the click. Same chord parsing as the key tool. - let modifiers: string[] | undefined; + let modifiers: string[] | undefined if (args.text !== undefined) { - if (typeof args.text !== "string") { - return errorResult("text must be a string", "bad_args"); + if (typeof args.text !== 'string') { + return errorResult('text must be a string', 'bad_args') } // Same gate as handleKey/handleHoldKey. withModifiers presses each name // via native.key(m, "press") — a non-modifier like "q" in text="cmd+q" @@ -2375,33 +2414,33 @@ async function handleClickVariant( ) { return errorResult( `The modifier chord "${args.text}" would fire a system shortcut. ` + - "Request the systemKeyCombos grant flag via request_access, or use " + - "only modifier keys (shift, ctrl, alt, cmd) in the text parameter.", - "grant_flag_required", - ); + 'Request the systemKeyCombos grant flag via request_access, or use ' + + 'only modifier keys (shift, ctrl, alt, cmd) in the text parameter.', + 'grant_flag_required', + ) } - modifiers = parseKeyChord(args.text); + modifiers = parseKeyChord(args.text) } // Right/middle-click and any click with a modifier chord escalate to // keyboard-equivalent input at tier "click" (context-menu Paste, chord // keystrokes). Compute once, pass to both gates. const clickActionKind: CuActionKind = - button !== "left" || (modifiers !== undefined && modifiers.length > 0) - ? "mouse_full" - : "mouse"; + button !== 'left' || (modifiers !== undefined && modifiers.length > 0) + ? 'mouse_full' + : 'mouse' const gate = await runInputActionGates( adapter, overrides, subGates, clickActionKind, - ); - if (gate) return gate; + ) + if (gate) return gate const display = await adapter.executor.getDisplaySize( overrides.selectedDisplayId, - ); + ) // §6 item P — pixel-validation staleness check. Sub-gated. // Runs AFTER the gates (no point validating if we're about to refuse @@ -2412,7 +2451,7 @@ async function handleClickVariant( rawY, overrides.coordinateMode, overrides.lastScreenshot, - ); + ) const validation = await validateClickTarget( adapter.cropRawPatch, overrides.lastScreenshot, @@ -2421,23 +2460,23 @@ async function handleClickVariant( async () => { // The fresh screenshot for validation uses the SAME allow-set as // the model's last screenshot did, so we compare like with like. - const allowedIds = overrides.allowedApps.map((g) => g.bundleId); + const allowedIds = overrides.allowedApps.map(g => g.bundleId) try { // Fresh shot must match lastScreenshot's display, not the current // selection — pixel-compare is against the model's last image. return await adapter.executor.screenshot({ allowedBundleIds: allowedIds, displayId: overrides.lastScreenshot?.displayId, - }); + }) } catch { - return null; + return null } }, adapter.logger, - ); + ) if (!validation.valid && validation.warning) { // Warning result — model told to re-screenshot. - return okText(validation.warning); + return okText(validation.warning) } } @@ -2448,7 +2487,7 @@ async function handleClickVariant( display, overrides.lastScreenshot, adapter.logger, - ); + ) const hitGate = await runHitTestGate( adapter, @@ -2457,11 +2496,11 @@ async function handleClickVariant( x, y, clickActionKind, - ); - if (hitGate) return hitGate; + ) + if (hitGate) return hitGate - await adapter.executor.click(x, y, button, count, modifiers); - return okText("Clicked."); + await adapter.executor.click(x, y, button, count, modifiers) + return okText('Clicked.') } async function handleType( @@ -2470,28 +2509,28 @@ async function handleType( overrides: ComputerUseOverrides, subGates: CuSubGates, ): Promise { - const text = requireString(args, "text"); - if (text instanceof Error) return errorResult(text.message, "bad_args"); + const text = requireString(args, 'text') + if (text instanceof Error) return errorResult(text.message, 'bad_args') const gate = await runInputActionGates( adapter, overrides, subGates, - "keyboard", - ); - if (gate) return gate; + 'keyboard', + ) + if (gate) return gate // §6 item 3 — clipboard-paste fast path for multi-line. Sub-gated AND // requires clipboardWrite grant. The save/restore + read-back-verify // lives in the EXECUTOR (task #5), not here. Here we just route. const viaClipboard = - text.includes("\n") && + text.includes('\n') && overrides.grantFlags.clipboardWrite && - subGates.clipboardPasteMultiline; + subGates.clipboardPasteMultiline if (viaClipboard) { - await adapter.executor.type(text, { viaClipboard: true }); - return okText("Typed (via clipboard)."); + await adapter.executor.type(text, { viaClipboard: true }) + return okText('Typed (via clipboard).') } // §6 item 7 — grapheme-cluster iteration. Prevents ZWJ emoji → �. @@ -2505,7 +2544,7 @@ async function handleType( // 2. Unicode text-insertion of '\n' is not a Return key press. URL bars // and terminals ignore it; the model's intent (submit/execute) is lost. // CRLF (\r\n) is one grapheme cluster (UAX #29 GB3), so check for it too. - const graphemes = segmentGraphemes(text); + const graphemes = segmentGraphemes(text) for (const [i, g] of graphemes.entries()) { // Same abort check as handleComputerBatch. At 8ms/grapheme a 50-char // type() runs ~400ms; this is where an in-flight batch actually @@ -2513,18 +2552,18 @@ async function handleType( if (overrides.isAborted?.()) { return errorResult( `Typing aborted after ${i} of ${graphemes.length} graphemes (user interrupt).`, - ); + ) } - await sleep(INTER_GRAPHEME_SLEEP_MS); - if (g === "\n" || g === "\r" || g === "\r\n") { - await adapter.executor.key("return"); - } else if (g === "\t") { - await adapter.executor.key("tab"); + await sleep(INTER_GRAPHEME_SLEEP_MS) + if (g === '\n' || g === '\r' || g === '\r\n') { + await adapter.executor.key('return') + } else if (g === '\t') { + await adapter.executor.key('tab') } else { - await adapter.executor.type(g, { viaClipboard: false }); + await adapter.executor.type(g, { viaClipboard: false }) } } - return okText(`Typed ${graphemes.length} grapheme(s).`); + return okText(`Typed ${graphemes.length} grapheme(s).`) } async function handleKey( @@ -2533,24 +2572,24 @@ async function handleKey( overrides: ComputerUseOverrides, subGates: CuSubGates, ): Promise { - const keySequence = requireString(args, "text"); + const keySequence = requireString(args, 'text') if (keySequence instanceof Error) - return errorResult("text is required", "bad_args"); + return errorResult('text is required', 'bad_args') // Cap 100, error strings match. - let repeat: number | undefined; + let repeat: number | undefined if (args.repeat !== undefined) { if ( - typeof args.repeat !== "number" || + typeof args.repeat !== 'number' || !Number.isInteger(args.repeat) || args.repeat < 1 ) { - return errorResult("repeat must be a positive integer", "bad_args"); + return errorResult('repeat must be a positive integer', 'bad_args') } if (args.repeat > 100) { - return errorResult("repeat exceeds maximum of 100", "bad_args"); + return errorResult('repeat exceeds maximum of 100', 'bad_args') } - repeat = args.repeat; + repeat = args.repeat } // §2 — blocklist check BEFORE gates. A blocked combo with an ungranted @@ -2562,20 +2601,20 @@ async function handleKey( ) { return errorResult( `"${keySequence}" is a system-level shortcut. Request the \`systemKeyCombos\` grant via request_access to use it.`, - "grant_flag_required", - ); + 'grant_flag_required', + ) } const gate = await runInputActionGates( adapter, overrides, subGates, - "keyboard", - ); - if (gate) return gate; + 'keyboard', + ) + if (gate) return gate - await adapter.executor.key(keySequence, repeat); - return okText("Key pressed."); + await adapter.executor.key(keySequence, repeat) + return okText('Key pressed.') } async function handleScroll( @@ -2584,36 +2623,36 @@ async function handleScroll( overrides: ComputerUseOverrides, subGates: CuSubGates, ): Promise { - const coord = extractCoordinate(args); - if (coord instanceof Error) return errorResult(coord.message, "bad_args"); - const [rawX, rawY] = coord; + const coord = extractCoordinate(args) + if (coord instanceof Error) return errorResult(coord.message, 'bad_args') + const [rawX, rawY] = coord // Uses scroll_direction + scroll_amount. // Map to our dx/dy executor interface. - const dir = args.scroll_direction; - if (dir !== "up" && dir !== "down" && dir !== "left" && dir !== "right") { + const dir = args.scroll_direction + if (dir !== 'up' && dir !== 'down' && dir !== 'left' && dir !== 'right') { return errorResult( "scroll_direction must be 'up', 'down', 'left', or 'right'", - "bad_args", - ); + 'bad_args', + ) } - const amount = args.scroll_amount; - if (typeof amount !== "number" || !Number.isInteger(amount) || amount < 0) { - return errorResult("scroll_amount must be a non-negative int", "bad_args"); + const amount = args.scroll_amount + if (typeof amount !== 'number' || !Number.isInteger(amount) || amount < 0) { + return errorResult('scroll_amount must be a non-negative int', 'bad_args') } if (amount > 100) { - return errorResult("scroll_amount exceeds maximum of 100", "bad_args"); + return errorResult('scroll_amount exceeds maximum of 100', 'bad_args') } // up → dy = -amount; down → dy = +amount; left → dx = -amount; right → dx = +amount. - const dx = dir === "left" ? -amount : dir === "right" ? amount : 0; - const dy = dir === "up" ? -amount : dir === "down" ? amount : 0; + const dx = dir === 'left' ? -amount : dir === 'right' ? amount : 0 + const dy = dir === 'up' ? -amount : dir === 'down' ? amount : 0 - const gate = await runInputActionGates(adapter, overrides, subGates, "mouse"); - if (gate) return gate; + const gate = await runInputActionGates(adapter, overrides, subGates, 'mouse') + if (gate) return gate const display = await adapter.executor.getDisplaySize( overrides.selectedDisplayId, - ); + ) const { x, y } = scaleCoord( rawX, rawY, @@ -2621,7 +2660,7 @@ async function handleScroll( display, overrides.lastScreenshot, adapter.logger, - ); + ) // When the button is held, executor.scroll's internal moveMouse generates // a leftMouseDragged event (enigo reads NSEvent.pressedMouseButtons) — @@ -2635,13 +2674,13 @@ async function handleScroll( subGates, x, y, - mouseButtonHeld ? "mouse_full" : "mouse", - ); - if (hitGate) return hitGate; - if (mouseButtonHeld) mouseMoved = true; + mouseButtonHeld ? 'mouse_full' : 'mouse', + ) + if (hitGate) return hitGate + if (mouseButtonHeld) mouseMoved = true - await adapter.executor.scroll(x, y, dx, dy); - return okText("Scrolled."); + await adapter.executor.scroll(x, y, dx, dy) + return okText('Scrolled.') } async function handleDrag( @@ -2656,34 +2695,34 @@ async function handleDrag( // the handleClickVariant clear above. Release first so drag() gets a // clean slate. if (mouseButtonHeld) { - await adapter.executor.mouseUp(); - mouseButtonHeld = false; - mouseMoved = false; + await adapter.executor.mouseUp() + mouseButtonHeld = false + mouseMoved = false } // `coordinate` is the END point // (required). `start_coordinate` is OPTIONAL — when omitted, drag from // current cursor position. - const endCoord = extractCoordinate(args, "coordinate"); + const endCoord = extractCoordinate(args, 'coordinate') if (endCoord instanceof Error) - return errorResult(endCoord.message, "bad_args"); - const rawTo = endCoord; + return errorResult(endCoord.message, 'bad_args') + const rawTo = endCoord - let rawFrom: [number, number] | undefined; + let rawFrom: [number, number] | undefined if (args.start_coordinate !== undefined) { - const startCoord = extractCoordinate(args, "start_coordinate"); + const startCoord = extractCoordinate(args, 'start_coordinate') if (startCoord instanceof Error) - return errorResult(startCoord.message, "bad_args"); - rawFrom = startCoord; + return errorResult(startCoord.message, 'bad_args') + rawFrom = startCoord } // else: rawFrom stays undefined → executor drags from current cursor. - const gate = await runInputActionGates(adapter, overrides, subGates, "mouse"); - if (gate) return gate; + const gate = await runInputActionGates(adapter, overrides, subGates, 'mouse') + if (gate) return gate const display = await adapter.executor.getDisplaySize( overrides.selectedDisplayId, - ); + ) const from = rawFrom === undefined ? undefined @@ -2694,7 +2733,7 @@ async function handleDrag( display, overrides.lastScreenshot, adapter.logger, - ); + ) const to = scaleCoord( rawTo[0], rawTo[1], @@ -2702,7 +2741,7 @@ async function handleDrag( display, overrides.lastScreenshot, adapter.logger, - ); + ) // Check both drag endpoints. `from` is where the mouseDown happens (picks // up), `to` is where mouseUp happens (drops). When start_coordinate is @@ -2713,28 +2752,28 @@ async function handleDrag( // The `to` endpoint uses "mouse_full" (not "mouse"): dropping text onto a // terminal inserts it as if typed (macOS text drag-drop). Same threat as // right-click→Paste. `from` stays "mouse" — picking up is a read. - const fromPoint = from ?? (await adapter.executor.getCursorPosition()); + const fromPoint = from ?? (await adapter.executor.getCursorPosition()) const fromGate = await runHitTestGate( adapter, overrides, subGates, fromPoint.x, fromPoint.y, - "mouse", - ); - if (fromGate) return fromGate; + 'mouse', + ) + if (fromGate) return fromGate const toGate = await runHitTestGate( adapter, overrides, subGates, to.x, to.y, - "mouse_full", - ); - if (toGate) return toGate; + 'mouse_full', + ) + if (toGate) return toGate - await adapter.executor.drag(from, to); - return okText("Dragged."); + await adapter.executor.drag(from, to) + return okText('Dragged.') } async function handleMoveMouse( @@ -2743,27 +2782,27 @@ async function handleMoveMouse( overrides: ComputerUseOverrides, subGates: CuSubGates, ): Promise { - const coord = extractCoordinate(args); - if (coord instanceof Error) return errorResult(coord.message, "bad_args"); - const [rawX, rawY] = coord; + const coord = extractCoordinate(args) + if (coord instanceof Error) return errorResult(coord.message, 'bad_args') + const [rawX, rawY] = coord // When the button is held, moveMouse generates leftMouseDragged events on // the window under the cursor — that's interaction, not positioning. // Upgrade to "mouse" and hit-test the destination. When the button is NOT // held: pure positioning, passes at any tier, no hit-test (mouseDown/Up // hit-test the cursor to close the mouse_move→left_mouse_down decomposition). - const actionKind: CuActionKind = mouseButtonHeld ? "mouse" : "mouse_position"; + const actionKind: CuActionKind = mouseButtonHeld ? 'mouse' : 'mouse_position' const gate = await runInputActionGates( adapter, overrides, subGates, actionKind, - ); - if (gate) return gate; + ) + if (gate) return gate const display = await adapter.executor.getDisplaySize( overrides.selectedDisplayId, - ); + ) const { x, y } = scaleCoord( rawX, rawY, @@ -2771,7 +2810,7 @@ async function handleMoveMouse( display, overrides.lastScreenshot, adapter.logger, - ); + ) if (mouseButtonHeld) { // "mouse_full" — same as left_click_drag's to-endpoint. Dragging onto a @@ -2783,14 +2822,14 @@ async function handleMoveMouse( subGates, x, y, - "mouse_full", - ); - if (hitGate) return hitGate; + 'mouse_full', + ) + if (hitGate) return hitGate } - await adapter.executor.moveMouse(x, y); - if (mouseButtonHeld) mouseMoved = true; - return okText("Moved."); + await adapter.executor.moveMouse(x, y) + if (mouseButtonHeld) mouseMoved = true + return okText('Moved.') } async function handleOpenApplication( @@ -2798,38 +2837,38 @@ async function handleOpenApplication( args: Record, overrides: ComputerUseOverrides, ): Promise { - const app = requireString(args, "app"); - if (app instanceof Error) return errorResult(app.message, "bad_args"); + const app = requireString(args, 'app') + if (app instanceof Error) return errorResult(app.message, 'bad_args') // Resolve display-name → bundle ID. Same logic as request_access. - const allowed = new Set(overrides.allowedApps.map((g) => g.bundleId)); - let targetBundleId: string | undefined; + const allowed = new Set(overrides.allowedApps.map(g => g.bundleId)) + let targetBundleId: string | undefined if (looksLikeBundleId(app) && allowed.has(app)) { - targetBundleId = app; + targetBundleId = app } else { // Try display name → bundle ID, but ONLY against the allowlist itself. // Avoids paying the listInstalledApps() cost on the hot path and is // arguably more correct: if the user granted "Slack", the model asking // to open "Slack" should match THAT grant. const match = overrides.allowedApps.find( - (g) => g.displayName.toLowerCase() === app.toLowerCase(), - ); - targetBundleId = match?.bundleId; + g => g.displayName.toLowerCase() === app.toLowerCase(), + ) + targetBundleId = match?.bundleId } if (!targetBundleId || !allowed.has(targetBundleId)) { return errorResult( `"${app}" is not granted for this session. Call request_access first.`, - "app_not_granted", - ); + 'app_not_granted', + ) } // open_application works at any tier — bringing an app forward is exactly // what tier "read" enables (you need it on screen to screenshot it). The // tier gates on click/type catch any follow-up interaction. - await adapter.executor.openApp(targetBundleId); + await adapter.executor.openApp(targetBundleId) // On multi-monitor setups, macOS may place the opened window on a monitor // the resolver won't pick (e.g. Claude + another allowed app are co-located @@ -2837,9 +2876,9 @@ async function handleOpenApplication( // clicking on dock icons. Single-monitor → no hint. listDisplays failure is // non-fatal — the hint is advisory. if (overrides.onDisplayPinned !== undefined) { - let displayCount = 1; + let displayCount = 1 try { - displayCount = (await adapter.executor.listDisplays()).length; + displayCount = (await adapter.executor.listDisplays()).length } catch { // hint skipped } @@ -2847,11 +2886,11 @@ async function handleOpenApplication( return okText( `Opened "${app}". If it isn't visible in the next screenshot, it may ` + `have opened on a different monitor — use switch_display to check.`, - ); + ) } } - return okText(`Opened "${app}".`); + return okText(`Opened "${app}".`) } async function handleVirtualMouse( @@ -2859,37 +2898,55 @@ async function handleVirtualMouse( args: Record, ): Promise { if (!adapter.executor.virtualMouse) { - return errorResult("virtual_mouse is only available on Windows with a bound window.", "feature_unavailable"); + return errorResult( + 'virtual_mouse is only available on Windows with a bound window.', + 'feature_unavailable', + ) } - const action = requireString(args, "action"); - if (action instanceof Error) return errorResult(action.message, "bad_args"); - const coord = args.coordinate; + const action = requireString(args, 'action') + if (action instanceof Error) return errorResult(action.message, 'bad_args') + const coord = args.coordinate if (!Array.isArray(coord) || coord.length < 2) { - return errorResult("coordinate [x, y] is required.", "bad_args"); + return errorResult('coordinate [x, y] is required.', 'bad_args') } - const validActions = new Set(["click", "double_click", "right_click", "move", "drag", "down", "up"]); + const validActions = new Set([ + 'click', + 'double_click', + 'right_click', + 'move', + 'drag', + 'down', + 'up', + ]) if (!validActions.has(action)) { - return errorResult(`Invalid action "${action}". Valid: ${[...validActions].join(", ")}`, "bad_args"); + return errorResult( + `Invalid action "${action}". Valid: ${[...validActions].join(', ')}`, + 'bad_args', + ) } - const startCoord = Array.isArray(args.start_coordinate) ? args.start_coordinate : undefined; + const startCoord = Array.isArray(args.start_coordinate) + ? args.start_coordinate + : undefined const ok = await adapter.executor.virtualMouse({ action: action as any, - x: coord[0], y: coord[1], - startX: startCoord?.[0], startY: startCoord?.[1], - }); + x: coord[0], + y: coord[1], + startX: startCoord?.[0], + startY: startCoord?.[1], + }) if (!ok) { - return errorResult("No window is currently bound.", "bad_args"); + return errorResult('No window is currently bound.', 'bad_args') } const desc: Record = { click: `Click at (${coord[0]},${coord[1]})`, double_click: `Double-click at (${coord[0]},${coord[1]})`, right_click: `Right-click at (${coord[0]},${coord[1]})`, move: `Moved to (${coord[0]},${coord[1]})`, - drag: `Dragged ${startCoord ? `(${startCoord[0]},${startCoord[1]})` : "current"} → (${coord[0]},${coord[1]})`, + drag: `Dragged ${startCoord ? `(${startCoord[0]},${startCoord[1]})` : 'current'} → (${coord[0]},${coord[1]})`, down: `Button down at (${coord[0]},${coord[1]})`, up: `Button up at (${coord[0]},${coord[1]})`, - }; - return okText(desc[action] ?? action); + } + return okText(desc[action] ?? action) } async function handleVirtualKeyboard( @@ -2897,41 +2954,50 @@ async function handleVirtualKeyboard( args: Record, ): Promise { if (!adapter.executor.virtualKeyboard) { - return errorResult("virtual_keyboard is only available on Windows with a bound window.", "feature_unavailable"); + return errorResult( + 'virtual_keyboard is only available on Windows with a bound window.', + 'feature_unavailable', + ) } - const action = requireString(args, "action"); - if (action instanceof Error) return errorResult(action.message, "bad_args"); - const text = requireString(args, "text"); - if (text instanceof Error) return errorResult(text.message, "bad_args"); + const action = requireString(args, 'action') + if (action instanceof Error) return errorResult(action.message, 'bad_args') + const text = requireString(args, 'text') + if (text instanceof Error) return errorResult(text.message, 'bad_args') - const validActions = new Set(["type", "combo", "press", "release", "hold"]); + const validActions = new Set(['type', 'combo', 'press', 'release', 'hold']) if (!validActions.has(action)) { - return errorResult(`Invalid action "${action}". Valid: ${[...validActions].join(", ")}`, "bad_args"); + return errorResult( + `Invalid action "${action}". Valid: ${[...validActions].join(', ')}`, + 'bad_args', + ) } - const duration = typeof args.duration === "number" ? args.duration : undefined; - const repeat = typeof args.repeat === "number" ? args.repeat : undefined; + const duration = typeof args.duration === 'number' ? args.duration : undefined + const repeat = typeof args.repeat === 'number' ? args.repeat : undefined const ok = await adapter.executor.virtualKeyboard({ action: action as any, text, duration, repeat, - }); + }) if (!ok) { - return errorResult("No window is currently bound. Use open_application or bind_window first.", "bad_args"); + return errorResult( + 'No window is currently bound. Use open_application or bind_window first.', + 'bad_args', + ) } const desc: Record = { - type: `Typed "${text.length > 40 ? text.slice(0, 40) + "..." : text}"`, + type: `Typed "${text.length > 40 ? text.slice(0, 40) + '...' : text}"`, combo: `Sent ${text}`, press: `Pressed ${text} (holding)`, release: `Released ${text}`, hold: `Held ${text} for ${duration ?? 1}s`, - }; + } - return okText(`${desc[action]}${repeat && repeat > 1 ? ` ×${repeat}` : ""}`); + return okText(`${desc[action]}${repeat && repeat > 1 ? ` ×${repeat}` : ''}`) } async function handleStatusIndicator( @@ -2939,25 +3005,35 @@ async function handleStatusIndicator( args: Record, ): Promise { if (!adapter.executor.statusIndicator) { - return errorResult("status_indicator is only available on Windows.", "feature_unavailable"); + return errorResult( + 'status_indicator is only available on Windows.', + 'feature_unavailable', + ) } - const action = requireString(args, "action"); - if (action instanceof Error) return errorResult(action.message, "bad_args"); - if (!["show", "hide", "status"].includes(action)) { - return errorResult(`Invalid action "${action}". Valid: show, hide, status.`, "bad_args"); + const action = requireString(args, 'action') + if (action instanceof Error) return errorResult(action.message, 'bad_args') + if (!['show', 'hide', 'status'].includes(action)) { + return errorResult( + `Invalid action "${action}". Valid: show, hide, status.`, + 'bad_args', + ) } - const message = typeof args.message === "string" ? args.message : undefined; - if (action === "show" && !message) { - return errorResult("'show' requires a message parameter.", "bad_args"); + const message = typeof args.message === 'string' ? args.message : undefined + if (action === 'show' && !message) { + return errorResult("'show' requires a message parameter.", 'bad_args') } - const result = await adapter.executor.statusIndicator(action as any, message); - if (action === "status") { - return okText(result.active ? "Indicator is active on the bound window." : "Indicator is not active (no window bound)."); + const result = await adapter.executor.statusIndicator(action as any, message) + if (action === 'status') { + return okText( + result.active + ? 'Indicator is active on the bound window.' + : 'Indicator is not active (no window bound).', + ) } - if (action === "show") { - return okText(`Indicator showing: "${message}"`); + if (action === 'show') { + return okText(`Indicator showing: "${message}"`) } - return okText("Indicator hidden."); + return okText('Indicator hidden.') } async function handleMouseWheel( @@ -2965,24 +3041,38 @@ async function handleMouseWheel( args: Record, ): Promise { if (!adapter.executor.mouseWheel) { - return errorResult("mouse_wheel is only available on Windows with a bound window.", "feature_unavailable"); + return errorResult( + 'mouse_wheel is only available on Windows with a bound window.', + 'feature_unavailable', + ) } - const coord = args.coordinate; + const coord = args.coordinate if (!Array.isArray(coord) || coord.length < 2) { - return errorResult("coordinate must be [x, y] array.", "bad_args"); + return errorResult('coordinate must be [x, y] array.', 'bad_args') } - const delta = typeof args.delta === "number" ? args.delta : undefined; + const delta = typeof args.delta === 'number' ? args.delta : undefined if (delta === undefined) { - return errorResult("delta is required (positive=up, negative=down).", "bad_args"); + return errorResult( + 'delta is required (positive=up, negative=down).', + 'bad_args', + ) } - const horizontal = args.direction === "horizontal"; - const ok = await adapter.executor.mouseWheel(coord[0], coord[1], delta, horizontal); + const horizontal = args.direction === 'horizontal' + const ok = await adapter.executor.mouseWheel( + coord[0], + coord[1], + delta, + horizontal, + ) if (!ok) { - return errorResult("No window is currently bound. Use open_application or bind_window first.", "bad_args"); + return errorResult( + 'No window is currently bound. Use open_application or bind_window first.', + 'bad_args', + ) } return okText( - `Mouse wheel: ${horizontal ? "horizontal" : "vertical"} scroll ${delta > 0 ? "up" : "down"} ${Math.abs(delta)} click(s) at (${coord[0]},${coord[1]}).`, - ); + `Mouse wheel: ${horizontal ? 'horizontal' : 'vertical'} scroll ${delta > 0 ? 'up' : 'down'} ${Math.abs(delta)} click(s) at (${coord[0]},${coord[1]}).`, + ) } async function handleActivateWindow( @@ -2990,15 +3080,21 @@ async function handleActivateWindow( args: Record, ): Promise { if (!adapter.executor.activateWindow) { - return errorResult("activate_window is only available on Windows with a bound window.", "feature_unavailable"); + return errorResult( + 'activate_window is only available on Windows with a bound window.', + 'feature_unavailable', + ) } - const clickX = typeof args.click_x === "number" ? args.click_x : undefined; - const clickY = typeof args.click_y === "number" ? args.click_y : undefined; - const ok = await adapter.executor.activateWindow(clickX, clickY); + const clickX = typeof args.click_x === 'number' ? args.click_x : undefined + const clickY = typeof args.click_y === 'number' ? args.click_y : undefined + const ok = await adapter.executor.activateWindow(clickX, clickY) if (!ok) { - return errorResult("No window is currently bound. Use open_application or bind_window first.", "bad_args"); + return errorResult( + 'No window is currently bound. Use open_application or bind_window first.', + 'bad_args', + ) } - return okText("Window activated and focused. Ready for input."); + return okText('Window activated and focused. Ready for input.') } async function handlePromptRespond( @@ -3006,44 +3102,60 @@ async function handlePromptRespond( args: Record, ): Promise { if (!adapter.executor.respondToPrompt) { - return errorResult("prompt_respond is only available on Windows with a bound window.", "feature_unavailable"); + return errorResult( + 'prompt_respond is only available on Windows with a bound window.', + 'feature_unavailable', + ) } - const responseType = requireString(args, "response_type"); - if (responseType instanceof Error) return errorResult(responseType.message, "bad_args"); + const responseType = requireString(args, 'response_type') + if (responseType instanceof Error) + return errorResult(responseType.message, 'bad_args') - const validTypes = new Set(["yes", "no", "enter", "escape", "select", "type"]); + const validTypes = new Set(['yes', 'no', 'enter', 'escape', 'select', 'type']) if (!validTypes.has(responseType)) { - return errorResult(`Invalid response_type "${responseType}". Valid: ${[...validTypes].join(", ")}`, "bad_args"); + return errorResult( + `Invalid response_type "${responseType}". Valid: ${[...validTypes].join(', ')}`, + 'bad_args', + ) } - if (responseType === "select" && typeof args.arrow_count !== "number") { - return errorResult("'select' requires arrow_count parameter.", "bad_args"); + if (responseType === 'select' && typeof args.arrow_count !== 'number') { + return errorResult("'select' requires arrow_count parameter.", 'bad_args') } - if (responseType === "type" && typeof args.text !== "string") { - return errorResult("'type' requires text parameter.", "bad_args"); + if (responseType === 'type' && typeof args.text !== 'string') { + return errorResult("'type' requires text parameter.", 'bad_args') } const ok = await adapter.executor.respondToPrompt({ responseType: responseType as any, - arrowDirection: typeof args.arrow_direction === "string" ? args.arrow_direction as any : undefined, - arrowCount: typeof args.arrow_count === "number" ? args.arrow_count : undefined, - text: typeof args.text === "string" ? args.text : undefined, - }); + arrowDirection: + typeof args.arrow_direction === 'string' + ? (args.arrow_direction as any) + : undefined, + arrowCount: + typeof args.arrow_count === 'number' ? args.arrow_count : undefined, + text: typeof args.text === 'string' ? args.text : undefined, + }) if (!ok) { - return errorResult("No window is currently bound. Use open_application or bind_window first.", "bad_args"); + return errorResult( + 'No window is currently bound. Use open_application or bind_window first.', + 'bad_args', + ) } const descriptions: Record = { yes: "Sent 'y' + Enter.", no: "Sent 'n' + Enter.", - enter: "Sent Enter.", - escape: "Sent Escape.", - select: `Navigated ${args.arrow_direction ?? "down"} ${args.arrow_count ?? 1} time(s) + Enter.`, + enter: 'Sent Enter.', + escape: 'Sent Escape.', + select: `Navigated ${args.arrow_direction ?? 'down'} ${args.arrow_count ?? 1} time(s) + Enter.`, type: `Typed "${args.text}" + Enter.`, - }; + } - return okText(`Prompt responded: ${descriptions[responseType] ?? responseType}. Take a screenshot to verify.`); + return okText( + `Prompt responded: ${descriptions[responseType] ?? responseType}. Take a screenshot to verify.`, + ) } async function handleOpenTerminal( @@ -3051,113 +3163,151 @@ async function handleOpenTerminal( args: Record, ): Promise { if (!adapter.executor.openTerminal) { - return errorResult("open_terminal is only available on Windows.", "feature_unavailable"); + return errorResult( + 'open_terminal is only available on Windows.', + 'feature_unavailable', + ) } - const agent = requireString(args, "agent"); - if (agent instanceof Error) return errorResult(agent.message, "bad_args"); + const agent = requireString(args, 'agent') + if (agent instanceof Error) return errorResult(agent.message, 'bad_args') - const validAgents = new Set(["claude", "codex", "gemini", "custom"]); + const validAgents = new Set(['claude', 'codex', 'gemini', 'custom']) if (!validAgents.has(agent)) { - return errorResult(`Invalid agent "${agent}". Valid: claude, codex, gemini, custom.`, "bad_args"); + return errorResult( + `Invalid agent "${agent}". Valid: claude, codex, gemini, custom.`, + 'bad_args', + ) } - if (agent === "custom" && typeof args.command !== "string") { - return errorResult("agent='custom' requires 'command' parameter.", "bad_args"); + if (agent === 'custom' && typeof args.command !== 'string') { + return errorResult( + "agent='custom' requires 'command' parameter.", + 'bad_args', + ) } const result = await adapter.executor.openTerminal({ agent: agent as any, - command: typeof args.command === "string" ? args.command : undefined, - terminal: typeof args.terminal === "string" ? args.terminal as any : undefined, - workingDirectory: typeof args.working_directory === "string" ? args.working_directory : undefined, - }); + command: typeof args.command === 'string' ? args.command : undefined, + terminal: + typeof args.terminal === 'string' ? (args.terminal as any) : undefined, + workingDirectory: + typeof args.working_directory === 'string' + ? args.working_directory + : undefined, + }) if (!result) { return errorResult( - "Failed to open terminal. Windows Terminal (wt.exe) may not be installed.", - "launch_failed", - ); + 'Failed to open terminal. Windows Terminal (wt.exe) may not be installed.', + 'launch_failed', + ) } if (!result.launched) { return okText( `Terminal opened (hwnd=${result.hwnd}, "${result.title}") but no command was sent. Window is now bound.`, - ); + ) } const agentNames: Record = { - claude: "Claude Code", codex: "Codex", gemini: "Gemini", + claude: 'Claude Code', + codex: 'Codex', + gemini: 'Gemini', custom: args.command as string, - }; + } return okText( `Terminal opened and ${agentNames[agent] ?? agent} launched.\n` + - `Window: hwnd=${result.hwnd} "${result.title}"\n` + - `Command: '${agent === "custom" ? args.command : agent}' + Enter\n` + - `Status: bound to this terminal. Take a screenshot to verify the agent started.`, - ); + `Window: hwnd=${result.hwnd} "${result.title}"\n` + + `Command: '${agent === 'custom' ? args.command : agent}' + Enter\n` + + `Status: bound to this terminal. Take a screenshot to verify the agent started.`, + ) } async function handleBindWindow( adapter: ComputerUseHostAdapter, args: Record, ): Promise { - const action = requireString(args, "action"); - if (action instanceof Error) return errorResult(action.message, "bad_args"); + const action = requireString(args, 'action') + if (action instanceof Error) return errorResult(action.message, 'bad_args') switch (action) { - case "list": { + case 'list': { if (!adapter.executor.listVisibleWindows) { - return errorResult("bind_window is only available on Windows.", "feature_unavailable"); + return errorResult( + 'bind_window is only available on Windows.', + 'feature_unavailable', + ) } - const windows = await adapter.executor.listVisibleWindows(); - if (windows.length === 0) return okText("No visible windows found."); - const lines = windows.map( - (w) => `hwnd=${w.hwnd} pid=${w.pid} "${w.title}"`, - ); - return okText(`Visible windows (${windows.length}):\n${lines.join("\n")}`); + const windows = await adapter.executor.listVisibleWindows() + if (windows.length === 0) return okText('No visible windows found.') + const lines = windows.map(w => `hwnd=${w.hwnd} pid=${w.pid} "${w.title}"`) + return okText(`Visible windows (${windows.length}):\n${lines.join('\n')}`) } - case "status": { + case 'status': { if (!adapter.executor.getBindingStatus) { - return errorResult("bind_window is only available on Windows.", "feature_unavailable"); + return errorResult( + 'bind_window is only available on Windows.', + 'feature_unavailable', + ) } - const status = await adapter.executor.getBindingStatus(); + const status = await adapter.executor.getBindingStatus() if (!status || !status.bound) { - return okText("No window is currently bound. Use bind_window(action='list') to see available windows, then bind_window(action='bind', title='...') to bind."); + return okText( + "No window is currently bound. Use bind_window(action='list') to see available windows, then bind_window(action='bind', title='...') to bind.", + ) } - let text = `Bound to: hwnd=${status.hwnd}`; - if (status.title) text += ` "${status.title}"`; - if (status.pid) text += ` pid=${status.pid}`; - if (status.rect) text += ` rect=(${status.rect.x},${status.rect.y} ${status.rect.width}x${status.rect.height})`; - return okText(text); + let text = `Bound to: hwnd=${status.hwnd}` + if (status.title) text += ` "${status.title}"` + if (status.pid) text += ` pid=${status.pid}` + if (status.rect) + text += ` rect=(${status.rect.x},${status.rect.y} ${status.rect.width}x${status.rect.height})` + return okText(text) } - case "bind": { + case 'bind': { if (!adapter.executor.bindToWindow) { - return errorResult("bind_window is only available on Windows.", "feature_unavailable"); + return errorResult( + 'bind_window is only available on Windows.', + 'feature_unavailable', + ) } - const title = typeof args.title === "string" ? args.title : undefined; - const hwnd = typeof args.hwnd === "string" ? args.hwnd : undefined; - const pid = typeof args.pid === "number" ? args.pid : undefined; + const title = typeof args.title === 'string' ? args.title : undefined + const hwnd = typeof args.hwnd === 'string' ? args.hwnd : undefined + const pid = typeof args.pid === 'number' ? args.pid : undefined if (!title && !hwnd && !pid) { - return errorResult("Specify at least one of: title, hwnd, or pid.", "bad_args"); + return errorResult( + 'Specify at least one of: title, hwnd, or pid.', + 'bad_args', + ) } - const result = await adapter.executor.bindToWindow({ hwnd, title, pid }); + const result = await adapter.executor.bindToWindow({ hwnd, title, pid }) if (!result) { return errorResult( - `No window found matching: ${[title && `title="${title}"`, hwnd && `hwnd=${hwnd}`, pid && `pid=${pid}`].filter(Boolean).join(", ")}. Use bind_window(action='list') to see available windows.`, - "element_not_found", - ); + `No window found matching: ${[title && `title="${title}"`, hwnd && `hwnd=${hwnd}`, pid && `pid=${pid}`].filter(Boolean).join(', ')}. Use bind_window(action='list') to see available windows.`, + 'element_not_found', + ) } - return okText(`Bound to window: hwnd=${result.hwnd} pid=${result.pid} "${result.title}". All subsequent screenshot/click/type operations target this window.`); + return okText( + `Bound to window: hwnd=${result.hwnd} pid=${result.pid} "${result.title}". All subsequent screenshot/click/type operations target this window.`, + ) } - case "unbind": { + case 'unbind': { if (!adapter.executor.unbindFromWindow) { - return errorResult("bind_window is only available on Windows.", "feature_unavailable"); + return errorResult( + 'bind_window is only available on Windows.', + 'feature_unavailable', + ) } - await adapter.executor.unbindFromWindow(); - return okText("Window binding released. Operations now target the full screen."); + await adapter.executor.unbindFromWindow() + return okText( + 'Window binding released. Operations now target the full screen.', + ) } default: - return errorResult(`Unknown bind_window action "${action}". Valid: list, bind, unbind, status.`, "bad_args"); + return errorResult( + `Unknown bind_window action "${action}". Valid: list, bind, unbind, status.`, + 'bad_args', + ) } } @@ -3167,24 +3317,30 @@ async function handleClickElement( ): Promise { if (!adapter.executor.clickElement) { return errorResult( - "click_element is only available on Windows with a bound window.", - "feature_unavailable", - ); + 'click_element is only available on Windows with a bound window.', + 'feature_unavailable', + ) } - const name = typeof args.name === "string" ? args.name : undefined; - const role = typeof args.role === "string" ? args.role : undefined; - const automationId = typeof args.automationId === "string" ? args.automationId : undefined; + const name = typeof args.name === 'string' ? args.name : undefined + const role = typeof args.role === 'string' ? args.role : undefined + const automationId = + typeof args.automationId === 'string' ? args.automationId : undefined if (!name && !role && !automationId) { - return errorResult("At least one of name, role, or automationId is required.", "bad_args"); + return errorResult( + 'At least one of name, role, or automationId is required.', + 'bad_args', + ) } - const ok = await adapter.executor.clickElement({ name, role, automationId }); + const ok = await adapter.executor.clickElement({ name, role, automationId }) if (!ok) { return errorResult( - `Element not found: ${[name && `name="${name}"`, role && `role=${role}`, automationId && `id=${automationId}`].filter(Boolean).join(", ")}. Take a screenshot to see current GUI elements.`, - "element_not_found", - ); + `Element not found: ${[name && `name="${name}"`, role && `role=${role}`, automationId && `id=${automationId}`].filter(Boolean).join(', ')}. Take a screenshot to see current GUI elements.`, + 'element_not_found', + ) } - return okText(`Clicked element: ${[name && `"${name}"`, role, automationId].filter(Boolean).join(" ")}`); + return okText( + `Clicked element: ${[name && `"${name}"`, role, automationId].filter(Boolean).join(' ')}`, + ) } async function handleTypeIntoElement( @@ -3193,102 +3349,128 @@ async function handleTypeIntoElement( ): Promise { if (!adapter.executor.typeIntoElement) { return errorResult( - "type_into_element is only available on Windows with a bound window.", - "feature_unavailable", - ); + 'type_into_element is only available on Windows with a bound window.', + 'feature_unavailable', + ) } - const text = requireString(args, "text"); - if (text instanceof Error) return errorResult(text.message, "bad_args"); - const name = typeof args.name === "string" ? args.name : undefined; - const role = typeof args.role === "string" ? args.role : undefined; - const automationId = typeof args.automationId === "string" ? args.automationId : undefined; - const ok = await adapter.executor.typeIntoElement({ name, role, automationId }, text); + const text = requireString(args, 'text') + if (text instanceof Error) return errorResult(text.message, 'bad_args') + const name = typeof args.name === 'string' ? args.name : undefined + const role = typeof args.role === 'string' ? args.role : undefined + const automationId = + typeof args.automationId === 'string' ? args.automationId : undefined + const ok = await adapter.executor.typeIntoElement( + { name, role, automationId }, + text, + ) if (!ok) { return errorResult( - `Could not type into element: ${[name && `name="${name}"`, role && `role=${role}`, automationId && `id=${automationId}`].filter(Boolean).join(", ")}. The element was not found or doesn't support text input.`, - "element_not_found", - ); + `Could not type into element: ${[name && `name="${name}"`, role && `role=${role}`, automationId && `id=${automationId}`].filter(Boolean).join(', ')}. The element was not found or doesn't support text input.`, + 'element_not_found', + ) } - return okText(`Typed ${text.length} chars into: ${[name && `"${name}"`, role, automationId].filter(Boolean).join(" ")}`); + return okText( + `Typed ${text.length} chars into: ${[name && `"${name}"`, role, automationId].filter(Boolean).join(' ')}`, + ) } async function handleWindowManagement( adapter: ComputerUseHostAdapter, args: Record, ): Promise { - const action = requireString(args, "action"); - if (action instanceof Error) return errorResult(action.message, "bad_args"); + const action = requireString(args, 'action') + if (action instanceof Error) return errorResult(action.message, 'bad_args') const VALID_ACTIONS = new Set([ - "minimize", "maximize", "restore", "close", "focus", "move_offscreen", "move_resize", "get_rect", - ]); + 'minimize', + 'maximize', + 'restore', + 'close', + 'focus', + 'move_offscreen', + 'move_resize', + 'get_rect', + ]) if (!VALID_ACTIONS.has(action)) { return errorResult( - `Unknown window_management action "${action}". Valid: ${[...VALID_ACTIONS].join(", ")}`, - "bad_args", - ); + `Unknown window_management action "${action}". Valid: ${[...VALID_ACTIONS].join(', ')}`, + 'bad_args', + ) } if (!adapter.executor.manageWindow) { return errorResult( - "window_management is only available on Windows with a bound window.", - "feature_unavailable", - ); + 'window_management is only available on Windows with a bound window.', + 'feature_unavailable', + ) } // get_rect: just return the current window position and size - if (action === "get_rect") { + if (action === 'get_rect') { if (!adapter.executor.getWindowRect) { - return errorResult("getWindowRect not available.", "feature_unavailable"); + return errorResult('getWindowRect not available.', 'feature_unavailable') } - const rect = await adapter.executor.getWindowRect(); + const rect = await adapter.executor.getWindowRect() if (!rect) { - return errorResult("No window is currently bound. Call open_application first.", "bad_args"); + return errorResult( + 'No window is currently bound. Call open_application first.', + 'bad_args', + ) } return okText( `Window rect: x=${rect.x}, y=${rect.y}, width=${rect.width}, height=${rect.height}`, - ); + ) } // move_resize: requires x, y (width/height optional) - if (action === "move_resize") { - const x = typeof args.x === "number" ? args.x : undefined; - const y = typeof args.y === "number" ? args.y : undefined; + if (action === 'move_resize') { + const x = typeof args.x === 'number' ? args.x : undefined + const y = typeof args.y === 'number' ? args.y : undefined if (x === undefined || y === undefined) { - return errorResult("move_resize requires x and y parameters.", "bad_args"); + return errorResult('move_resize requires x and y parameters.', 'bad_args') } - const width = typeof args.width === "number" ? args.width : undefined; - const height = typeof args.height === "number" ? args.height : undefined; - const ok = await adapter.executor.manageWindow(action, { x, y, width, height }); + const width = typeof args.width === 'number' ? args.width : undefined + const height = typeof args.height === 'number' ? args.height : undefined + const ok = await adapter.executor.manageWindow(action, { + x, + y, + width, + height, + }) if (!ok) { - return errorResult("No window is currently bound. Call open_application first.", "bad_args"); + return errorResult( + 'No window is currently bound. Call open_application first.', + 'bad_args', + ) } return okText( width && height ? `Moved window to (${x}, ${y}) and resized to ${width}×${height}.` : `Moved window to (${x}, ${y}).`, - ); + ) } // All other actions: minimize, maximize, restore, close, focus, move_offscreen - const ok = await adapter.executor.manageWindow(action); + const ok = await adapter.executor.manageWindow(action) if (!ok) { return errorResult( - "No window is currently bound. Call open_application first.", - "bad_args", - ); + 'No window is currently bound. Call open_application first.', + 'bad_args', + ) } const descriptions: Record = { - minimize: "Window minimized (ShowWindow SW_MINIMIZE).", - maximize: "Window maximized (ShowWindow SW_MAXIMIZE).", - restore: "Window restored (ShowWindow SW_RESTORE).", - close: "Window closed (SendMessage WM_CLOSE). The window binding has been released.", - focus: "Window brought to front (SetForegroundWindow).", - move_offscreen: "Window moved offscreen (-32000,-32000). Still usable via SendMessage/PrintWindow.", - }; + minimize: 'Window minimized (ShowWindow SW_MINIMIZE).', + maximize: 'Window maximized (ShowWindow SW_MAXIMIZE).', + restore: 'Window restored (ShowWindow SW_RESTORE).', + close: + 'Window closed (SendMessage WM_CLOSE). The window binding has been released.', + focus: 'Window brought to front (SetForegroundWindow).', + move_offscreen: + 'Window moved offscreen (-32000,-32000). Still usable via SendMessage/PrintWindow.', + } - return okText(descriptions[action] ?? `Action "${action}" completed.`); + return okText(descriptions[action] ?? `Action "${action}" completed.`) } async function handleSwitchDisplay( @@ -3296,61 +3478,61 @@ async function handleSwitchDisplay( args: Record, overrides: ComputerUseOverrides, ): Promise { - const display = requireString(args, "display"); - if (display instanceof Error) return errorResult(display.message, "bad_args"); + const display = requireString(args, 'display') + if (display instanceof Error) return errorResult(display.message, 'bad_args') if (!overrides.onDisplayPinned) { return errorResult( - "Display switching is not available in this session.", - "feature_unavailable", - ); + 'Display switching is not available in this session.', + 'feature_unavailable', + ) } - if (display.toLowerCase() === "auto") { - overrides.onDisplayPinned(undefined); + if (display.toLowerCase() === 'auto') { + overrides.onDisplayPinned(undefined) return okText( - "Returned to automatic monitor selection. Call screenshot to continue.", - ); + 'Returned to automatic monitor selection. Call screenshot to continue.', + ) } // Resolve label → displayId fresh. Same source buildMonitorNote reads, // so whatever name the model saw in a screenshot note resolves here. - let displays; + let displays try { - displays = await adapter.executor.listDisplays(); + displays = await adapter.executor.listDisplays() } catch (e) { return errorResult( `Failed to enumerate displays: ${String(e)}`, - "display_error", - ); + 'display_error', + ) } if (displays.length < 2) { return errorResult( - "Only one monitor is connected. There is nothing to switch to.", - "bad_args", - ); + 'Only one monitor is connected. There is nothing to switch to.', + 'bad_args', + ) } - const labels = uniqueDisplayLabels(displays); - const wanted = display.toLowerCase(); + const labels = uniqueDisplayLabels(displays) + const wanted = display.toLowerCase() const target = displays.find( - (d) => labels.get(d.displayId)?.toLowerCase() === wanted, - ); + d => labels.get(d.displayId)?.toLowerCase() === wanted, + ) if (!target) { const available = displays - .map((d) => `"${labels.get(d.displayId)}"`) - .join(", "); + .map(d => `"${labels.get(d.displayId)}"`) + .join(', ') return errorResult( `No monitor named "${display}" is connected. Available monitors: ${available}.`, - "bad_args", - ); + 'bad_args', + ) } - overrides.onDisplayPinned(target.displayId); + overrides.onDisplayPinned(target.displayId) return okText( `Switched to monitor "${labels.get(target.displayId)}". Call screenshot to see it.`, - ); + ) } function handleListGrantedApplications( @@ -3359,7 +3541,7 @@ function handleListGrantedApplications( return okJson({ allowedApps: overrides.allowedApps, grantFlags: overrides.grantFlags, - }); + }) } async function handleReadClipboard( @@ -3369,29 +3551,29 @@ async function handleReadClipboard( ): Promise { if (!overrides.grantFlags.clipboardRead) { return errorResult( - "Clipboard read is not granted. Request `clipboardRead` via request_access.", - "grant_flag_required", - ); + 'Clipboard read is not granted. Request `clipboardRead` via request_access.', + 'grant_flag_required', + ) } // read_clipboard doesn't route through runInputActionGates — sync here so // reading after clicking into a click-tier app sees the cleared clipboard // (same as what the app's own Paste would see). if (subGates.clipboardGuard) { - const frontmost = await adapter.executor.getFrontmostApp(); + const frontmost = await adapter.executor.getFrontmostApp() const tierByBundleId = new Map( - overrides.allowedApps.map((a) => [a.bundleId, a.tier] as const), - ); + overrides.allowedApps.map(a => [a.bundleId, a.tier] as const), + ) const frontmostTier = frontmost ? tierByBundleId.get(frontmost.bundleId) - : undefined; - await syncClipboardStash(adapter, overrides, frontmostTier === "click"); + : undefined + await syncClipboardStash(adapter, overrides, frontmostTier === 'click') } // clipboardGuard may have stashed+cleared — read the actual (possibly // empty) clipboard. The agent sees what the app would see. - const text = await adapter.executor.readClipboard(); - return okJson({ text }); + const text = await adapter.executor.readClipboard() + return okJson({ text }) } async function handleWriteClipboard( @@ -3402,28 +3584,28 @@ async function handleWriteClipboard( ): Promise { if (!overrides.grantFlags.clipboardWrite) { return errorResult( - "Clipboard write is not granted. Request `clipboardWrite` via request_access.", - "grant_flag_required", - ); + 'Clipboard write is not granted. Request `clipboardWrite` via request_access.', + 'grant_flag_required', + ) } - const text = requireString(args, "text"); - if (text instanceof Error) return errorResult(text.message, "bad_args"); + const text = requireString(args, 'text') + if (text instanceof Error) return errorResult(text.message, 'bad_args') if (subGates.clipboardGuard) { - const frontmost = await adapter.executor.getFrontmostApp(); + const frontmost = await adapter.executor.getFrontmostApp() const tierByBundleId = new Map( - overrides.allowedApps.map((a) => [a.bundleId, a.tier] as const), - ); + overrides.allowedApps.map(a => [a.bundleId, a.tier] as const), + ) const frontmostTier = frontmost ? tierByBundleId.get(frontmost.bundleId) - : undefined; + : undefined // Defense-in-depth for the clipboardGuard bypass: write_clipboard + // left_click on a click-tier app's UI Paste button. The re-clear in // syncClipboardStash already defeats it (the next action clobbers the // write), but rejecting here gives the agent a clear signal instead of // silently voiding its write. - if (frontmost && frontmostTier === "click") { + if (frontmost && frontmostTier === 'click') { return errorResult( `"${frontmost.displayName}" is a tier-"click" app and currently ` + `frontmost. write_clipboard is blocked because the next action ` + @@ -3431,18 +3613,18 @@ async function handleWriteClipboard( `app cannot be used to inject text. Bring a tier-"full" app ` + `forward before writing to the clipboard.` + TIER_ANTI_SUBVERSION, - "tier_insufficient", - ); + 'tier_insufficient', + ) } // write_clipboard doesn't route through runInputActionGates — sync here // so clicking away from a click-tier app then writing restores the user's // stash before the agent's text lands. - await syncClipboardStash(adapter, overrides, frontmostTier === "click"); + await syncClipboardStash(adapter, overrides, frontmostTier === 'click') } - await adapter.executor.writeClipboard(text); - return okText("Clipboard written."); + await adapter.executor.writeClipboard(text) + return okText('Clipboard written.') } /** @@ -3453,21 +3635,21 @@ async function handleWriteClipboard( async function handleWait( args: Record, ): Promise { - const duration = args.duration; - if (typeof duration !== "number" || !Number.isFinite(duration)) { - return errorResult("duration must be a number", "bad_args"); + const duration = args.duration + if (typeof duration !== 'number' || !Number.isFinite(duration)) { + return errorResult('duration must be a number', 'bad_args') } if (duration < 0) { - return errorResult("duration must be non-negative", "bad_args"); + return errorResult('duration must be non-negative', 'bad_args') } if (duration > 100) { return errorResult( - "duration is too long. Duration is in seconds.", - "bad_args", - ); + 'duration is too long. Duration is in seconds.', + 'bad_args', + ) } - await sleep(duration * 1000); - return okText(`Waited ${duration}s.`); + await sleep(duration * 1000) + return okText(`Waited ${duration}s.`) } /** @@ -3485,13 +3667,13 @@ async function handleCursorPosition( adapter: ComputerUseHostAdapter, overrides: ComputerUseOverrides, ): Promise { - const logical = await adapter.executor.getCursorPosition(); - const shot = overrides.lastScreenshot; + const logical = await adapter.executor.getCursorPosition() + const shot = overrides.lastScreenshot if (shot) { // Inverse of scaleCoord: subtract capture-time origin to go from // virtual-screen to display-relative before the image-px transform. - const localX = logical.x - shot.originX; - const localY = logical.y - shot.originY; + const localX = logical.x - shot.originX + const localY = logical.y - shot.originY // Cursor off the captured display (multi-monitor): local coords go // negative or exceed display dims. Return logical_points + hint rather // than garbage image-px. @@ -3504,20 +3686,20 @@ async function handleCursorPosition( return okJson({ x: logical.x, y: logical.y, - coordinateSpace: "logical_points", - note: "cursor is on a different monitor than your last screenshot; take a fresh screenshot", - }); + coordinateSpace: 'logical_points', + note: 'cursor is on a different monitor than your last screenshot; take a fresh screenshot', + }) } - const x = Math.round(localX * (shot.width / shot.displayWidth)); - const y = Math.round(localY * (shot.height / shot.displayHeight)); - return okJson({ x, y, coordinateSpace: "image_pixels" }); + const x = Math.round(localX * (shot.width / shot.displayWidth)) + const y = Math.round(localY * (shot.height / shot.displayHeight)) + return okJson({ x, y, coordinateSpace: 'image_pixels' }) } return okJson({ x: logical.x, y: logical.y, - coordinateSpace: "logical_points", - note: "take a screenshot first for image-pixel coordinates", - }); + coordinateSpace: 'logical_points', + note: 'take a screenshot first for image-pixel coordinates', + }) } /** @@ -3532,21 +3714,21 @@ async function handleHoldKey( overrides: ComputerUseOverrides, subGates: CuSubGates, ): Promise { - const text = requireString(args, "text"); - if (text instanceof Error) return errorResult(text.message, "bad_args"); + const text = requireString(args, 'text') + if (text instanceof Error) return errorResult(text.message, 'bad_args') - const duration = args.duration; - if (typeof duration !== "number" || !Number.isFinite(duration)) { - return errorResult("duration must be a number", "bad_args"); + const duration = args.duration + if (typeof duration !== 'number' || !Number.isFinite(duration)) { + return errorResult('duration must be a number', 'bad_args') } if (duration < 0) { - return errorResult("duration must be non-negative", "bad_args"); + return errorResult('duration must be non-negative', 'bad_args') } if (duration > 100) { return errorResult( - "duration is too long. Duration is in seconds.", - "bad_args", - ); + 'duration is too long. Duration is in seconds.', + 'bad_args', + ) } // Blocklist check BEFORE gates — same reasoning as handleKey. Holding @@ -3557,21 +3739,21 @@ async function handleHoldKey( ) { return errorResult( `"${text}" is a system-level shortcut. Request the \`systemKeyCombos\` grant via request_access to use it.`, - "grant_flag_required", - ); + 'grant_flag_required', + ) } const gate = await runInputActionGates( adapter, overrides, subGates, - "keyboard", - ); - if (gate) return gate; + 'keyboard', + ) + if (gate) return gate - const keyNames = parseKeyChord(text); - await adapter.executor.holdKey(keyNames, duration * 1000); - return okText("Key held."); + const keyNames = parseKeyChord(text) + await adapter.executor.holdKey(keyNames, duration * 1000) + return okText('Key held.') } /** @@ -3585,34 +3767,34 @@ async function handleLeftMouseDown( ): Promise { if (mouseButtonHeld) { return errorResult( - "mouse button already held, call left_mouse_up first", - "state_conflict", - ); + 'mouse button already held, call left_mouse_up first', + 'state_conflict', + ) } - const gate = await runInputActionGates(adapter, overrides, subGates, "mouse"); - if (gate) return gate; + const gate = await runInputActionGates(adapter, overrides, subGates, 'mouse') + if (gate) return gate // macOS routes mouseDown to the window under the cursor, not the frontmost // app. Without this hit-test, mouse_move (positioning, passes at any tier) // + left_mouse_down decomposes a click that lands on a tier-"read" window // overlapping a tier-"full" frontmost app — bypassing runHitTestGate's // whole purpose. All three are batchable, so the bypass is atomic. - const cursor = await adapter.executor.getCursorPosition(); + const cursor = await adapter.executor.getCursorPosition() const hitGate = await runHitTestGate( adapter, overrides, subGates, cursor.x, cursor.y, - "mouse", - ); - if (hitGate) return hitGate; + 'mouse', + ) + if (hitGate) return hitGate - await adapter.executor.mouseDown(); - mouseButtonHeld = true; - mouseMoved = false; - return okText("Mouse button pressed."); + await adapter.executor.mouseDown() + mouseButtonHeld = true + mouseMoved = false + return okText('Mouse button pressed.') } /** @@ -3637,14 +3819,14 @@ async function handleLeftMouseUp( const releaseFirst = async ( err: CuCallToolResult, ): Promise => { - await adapter.executor.mouseUp(); - mouseButtonHeld = false; - mouseMoved = false; - return err; - }; + await adapter.executor.mouseUp() + mouseButtonHeld = false + mouseMoved = false + return err + } - const gate = await runInputActionGates(adapter, overrides, subGates, "mouse"); - if (gate) return releaseFirst(gate); + const gate = await runInputActionGates(adapter, overrides, subGates, 'mouse') + if (gate) return releaseFirst(gate) // When the cursor moved since mouseDown, this is a drop (text-injection // vector) — hit-test at "mouse_full" same as left_click_drag's `to`. When @@ -3653,21 +3835,21 @@ async function handleLeftMouseUp( // click on a click-tier app fails here while the atomic left_click works, // and releaseFirst fires mouseUp anyway so the OS sees a complete click // while the model gets a misleading error. - const cursor = await adapter.executor.getCursorPosition(); + const cursor = await adapter.executor.getCursorPosition() const hitGate = await runHitTestGate( adapter, overrides, subGates, cursor.x, cursor.y, - mouseMoved ? "mouse_full" : "mouse", - ); - if (hitGate) return releaseFirst(hitGate); + mouseMoved ? 'mouse_full' : 'mouse', + ) + if (hitGate) return releaseFirst(hitGate) - await adapter.executor.mouseUp(); - mouseButtonHeld = false; - mouseMoved = false; - return okText("Mouse button released."); + await adapter.executor.mouseUp() + mouseButtonHeld = false + mouseMoved = false + return okText('Mouse button released.') } // --------------------------------------------------------------------------- @@ -3680,28 +3862,28 @@ async function handleLeftMouseUp( * security model). */ const BATCHABLE_ACTIONS: ReadonlySet = new Set([ - "key", - "type", - "mouse_move", - "left_click", - "left_click_drag", - "right_click", - "middle_click", - "double_click", - "triple_click", - "scroll", - "hold_key", - "screenshot", - "cursor_position", - "left_mouse_down", - "left_mouse_up", - "wait", -]); + 'key', + 'type', + 'mouse_move', + 'left_click', + 'left_click_drag', + 'right_click', + 'middle_click', + 'double_click', + 'triple_click', + 'scroll', + 'hold_key', + 'screenshot', + 'cursor_position', + 'left_mouse_down', + 'left_mouse_up', + 'wait', +]) interface BatchActionResult { - action: string; - ok: boolean; - output: string; + action: string + ok: boolean + output: string } /** @@ -3740,25 +3922,25 @@ async function handleComputerBatch( overrides: ComputerUseOverrides, subGates: CuSubGates, ): Promise { - const actions = args.actions; + const actions = args.actions if (!Array.isArray(actions) || actions.length === 0) { - return errorResult("actions must be a non-empty array", "bad_args"); + return errorResult('actions must be a non-empty array', 'bad_args') } for (const [i, act] of actions.entries()) { - if (typeof act !== "object" || act === null) { - return errorResult(`actions[${i}] must be an object`, "bad_args"); + if (typeof act !== 'object' || act === null) { + return errorResult(`actions[${i}] must be an object`, 'bad_args') } - const action = (act as Record).action; - if (typeof action !== "string") { - return errorResult(`actions[${i}].action must be a string`, "bad_args"); + const action = (act as Record).action + if (typeof action !== 'string') { + return errorResult(`actions[${i}].action must be a string`, 'bad_args') } if (!BATCHABLE_ACTIONS.has(action)) { return errorResult( `actions[${i}].action="${action}" is not allowed in a batch. ` + - `Allowed: ${[...BATCHABLE_ACTIONS].join(", ")}.`, - "bad_args", - ); + `Allowed: ${[...BATCHABLE_ACTIONS].join(', ')}.`, + 'bad_args', + ) } } @@ -3766,11 +3948,11 @@ async function handleComputerBatch( // hideBeforeAction:false. if (subGates.hideBeforeAction) { const hidden = await adapter.executor.prepareForAction( - overrides.allowedApps.map((a) => a.bundleId), + overrides.allowedApps.map(a => a.bundleId), overrides.selectedDisplayId, - ); + ) if (hidden.length > 0) { - overrides.onAppsHidden?.(hidden); + overrides.onAppsHidden?.(hidden) } } @@ -3785,28 +3967,28 @@ async function handleComputerBatch( // resolver switch would make that screenshot inconsistent with // earlier clicks' lastScreenshot-based scaleCoord targeting. autoTargetDisplay: false, - }; + } - const results: BatchActionResult[] = []; + const results: BatchActionResult[] = [] for (const [i, act] of actions.entries()) { // Overlay Stop → host's stopSession → lifecycleState leaves "running" // synchronously before query.interrupt(). The SDK abort tears down the // host's await but not this loop — without this check the remaining // actions fire into a dead session. if (overrides.isAborted?.()) { - await releaseHeldMouse(adapter); + await releaseHeldMouse(adapter) return errorResult( `Batch aborted after ${results.length} of ${actions.length} actions (user interrupt).`, - ); + ) } // Small inter-step settle. Synthetic CGEvents post instantly; some apps // need a tick to process step N's input before step N+1 lands (e.g. a // click opening a menu before the next click targets a menu item). - if (i > 0) await sleep(10); + if (i > 0) await sleep(10) - const actionArgs = act as Record; - const action = actionArgs.action as string; + const actionArgs = act as Record + const action = actionArgs.action as string // Drop mid-batch screenshot piggyback (strip .screenshot). Click coords // stay anchored to the pre-batch lastScreenshot. @@ -3816,11 +3998,11 @@ async function handleComputerBatch( adapter, overrides, batchSubGates, - ); + ) - const text = firstTextContent(inner); - const result = { action, ok: !inner.isError, output: text }; - results.push(result); + const text = firstTextContent(inner) + const result = { action, ok: !inner.isError, output: text } + results.push(result) if (inner.isError) { // Stop-on-first-error. Return everything so far + the error. @@ -3830,7 +4012,7 @@ async function handleComputerBatch( // Release held mouse: the error may be a mid-grapheme abort in // handleType, or a frontmost gate, landing between mouse_down and // mouse_up. - await releaseHeldMouse(adapter); + await releaseHeldMouse(adapter) return okJson( { completed: results.slice(0, -1), @@ -3838,16 +4020,16 @@ async function handleComputerBatch( remaining: actions.length - results.length, }, inner.telemetry, - ); + ) } } - return okJson({ completed: results }); + return okJson({ completed: results }) } function firstTextContent(r: CuCallToolResult): string { - const first = r.content[0]; - return first && first.type === "text" ? first.text : ""; + const first = r.content[0] + return first && first.type === 'text' ? first.text : '' } /** @@ -3869,158 +4051,206 @@ async function dispatchAction( const hasBoundWindow = (await adapter.executor.hasBoundWindow?.()) === true && adapter.executor.virtualMouse && - adapter.executor.virtualKeyboard; + adapter.executor.virtualKeyboard if (hasBoundWindow) { - const coord = Array.isArray(a.coordinate) ? a.coordinate as number[] : undefined; + const coord = Array.isArray(a.coordinate) + ? (a.coordinate as number[]) + : undefined switch (name) { - case "left_click": - if (coord) return handleVirtualMouse(adapter, { action: "click", coordinate: coord }); - break; - case "double_click": - if (coord) return handleVirtualMouse(adapter, { action: "double_click", coordinate: coord }); - break; - case "right_click": - if (coord) return handleVirtualMouse(adapter, { action: "right_click", coordinate: coord }); - break; - case "mouse_move": - if (coord) return handleVirtualMouse(adapter, { action: "move", coordinate: coord }); - break; - case "left_click_drag": - if (coord) return handleVirtualMouse(adapter, { - action: "drag", coordinate: coord, - start_coordinate: Array.isArray(a.start_coordinate) ? a.start_coordinate : undefined, - }); - break; - case "left_mouse_down": - if (coord) return handleVirtualMouse(adapter, { action: "down", coordinate: coord }); - break; - case "left_mouse_up": - if (coord) return handleVirtualMouse(adapter, { action: "up", coordinate: coord }); - break; - case "type": - if (typeof a.text === "string") return handleVirtualKeyboard(adapter, { action: "type", text: a.text }); - break; - case "key": - if (typeof a.text === "string") return handleVirtualKeyboard(adapter, { action: "combo", text: a.text, repeat: a.repeat }); - break; - case "hold_key": - if (typeof a.text === "string") return handleVirtualKeyboard(adapter, { - action: "hold", text: a.text, - duration: typeof a.duration === "number" ? a.duration : 1, - }); - break; - case "scroll": - if (coord) return handleMouseWheel(adapter, { - coordinate: coord, - delta: a.scroll_direction === "up" ? (a.scroll_amount ?? 3) : -(a.scroll_amount ?? 3), - direction: (a.scroll_direction === "left" || a.scroll_direction === "right") ? "horizontal" : "vertical", - }); - break; + case 'left_click': + if (coord) + return handleVirtualMouse(adapter, { + action: 'click', + coordinate: coord, + }) + break + case 'double_click': + if (coord) + return handleVirtualMouse(adapter, { + action: 'double_click', + coordinate: coord, + }) + break + case 'right_click': + if (coord) + return handleVirtualMouse(adapter, { + action: 'right_click', + coordinate: coord, + }) + break + case 'mouse_move': + if (coord) + return handleVirtualMouse(adapter, { + action: 'move', + coordinate: coord, + }) + break + case 'left_click_drag': + if (coord) + return handleVirtualMouse(adapter, { + action: 'drag', + coordinate: coord, + start_coordinate: Array.isArray(a.start_coordinate) + ? a.start_coordinate + : undefined, + }) + break + case 'left_mouse_down': + if (coord) + return handleVirtualMouse(adapter, { + action: 'down', + coordinate: coord, + }) + break + case 'left_mouse_up': + if (coord) + return handleVirtualMouse(adapter, { + action: 'up', + coordinate: coord, + }) + break + case 'type': + if (typeof a.text === 'string') + return handleVirtualKeyboard(adapter, { + action: 'type', + text: a.text, + }) + break + case 'key': + if (typeof a.text === 'string') + return handleVirtualKeyboard(adapter, { + action: 'combo', + text: a.text, + repeat: a.repeat, + }) + break + case 'hold_key': + if (typeof a.text === 'string') + return handleVirtualKeyboard(adapter, { + action: 'hold', + text: a.text, + duration: typeof a.duration === 'number' ? a.duration : 1, + }) + break + case 'scroll': + if (coord) + return handleMouseWheel(adapter, { + coordinate: coord, + delta: + a.scroll_direction === 'up' + ? (a.scroll_amount ?? 3) + : -(a.scroll_amount ?? 3), + direction: + a.scroll_direction === 'left' || a.scroll_direction === 'right' + ? 'horizontal' + : 'vertical', + }) + break // screenshot, zoom, wait, cursor_position — not rerouted, pass through } } // ── Standard dispatch (unbound or tools not rerouted above) ──────── switch (name) { - case "screenshot": - return handleScreenshot(adapter, overrides, subGates); + case 'screenshot': + return handleScreenshot(adapter, overrides, subGates) - case "zoom": - return handleZoom(adapter, a, overrides); + case 'zoom': + return handleZoom(adapter, a, overrides) - case "left_click": - return handleClickVariant(adapter, a, overrides, subGates, "left", 1); - case "double_click": - return handleClickVariant(adapter, a, overrides, subGates, "left", 2); - case "triple_click": - return handleClickVariant(adapter, a, overrides, subGates, "left", 3); - case "right_click": - return handleClickVariant(adapter, a, overrides, subGates, "right", 1); - case "middle_click": - return handleClickVariant(adapter, a, overrides, subGates, "middle", 1); + case 'left_click': + return handleClickVariant(adapter, a, overrides, subGates, 'left', 1) + case 'double_click': + return handleClickVariant(adapter, a, overrides, subGates, 'left', 2) + case 'triple_click': + return handleClickVariant(adapter, a, overrides, subGates, 'left', 3) + case 'right_click': + return handleClickVariant(adapter, a, overrides, subGates, 'right', 1) + case 'middle_click': + return handleClickVariant(adapter, a, overrides, subGates, 'middle', 1) - case "type": - return handleType(adapter, a, overrides, subGates); + case 'type': + return handleType(adapter, a, overrides, subGates) - case "key": - return handleKey(adapter, a, overrides, subGates); + case 'key': + return handleKey(adapter, a, overrides, subGates) - case "scroll": - return handleScroll(adapter, a, overrides, subGates); + case 'scroll': + return handleScroll(adapter, a, overrides, subGates) - case "left_click_drag": - return handleDrag(adapter, a, overrides, subGates); + case 'left_click_drag': + return handleDrag(adapter, a, overrides, subGates) - case "mouse_move": - return handleMoveMouse(adapter, a, overrides, subGates); + case 'mouse_move': + return handleMoveMouse(adapter, a, overrides, subGates) - case "wait": - return handleWait(a); + case 'wait': + return handleWait(a) - case "cursor_position": - return handleCursorPosition(adapter, overrides); + case 'cursor_position': + return handleCursorPosition(adapter, overrides) - case "hold_key": - return handleHoldKey(adapter, a, overrides, subGates); + case 'hold_key': + return handleHoldKey(adapter, a, overrides, subGates) - case "left_mouse_down": - return handleLeftMouseDown(adapter, overrides, subGates); + case 'left_mouse_down': + return handleLeftMouseDown(adapter, overrides, subGates) - case "left_mouse_up": - return handleLeftMouseUp(adapter, overrides, subGates); + case 'left_mouse_up': + return handleLeftMouseUp(adapter, overrides, subGates) - case "open_application": - return handleOpenApplication(adapter, a, overrides); + case 'open_application': + return handleOpenApplication(adapter, a, overrides) - case "window_management": - return handleWindowManagement(adapter, a); + case 'window_management': + return handleWindowManagement(adapter, a) - case "click_element": - return handleClickElement(adapter, a); + case 'click_element': + return handleClickElement(adapter, a) - case "type_into_element": - return handleTypeIntoElement(adapter, a); + case 'type_into_element': + return handleTypeIntoElement(adapter, a) - case "open_terminal": - return handleOpenTerminal(adapter, a); + case 'open_terminal': + return handleOpenTerminal(adapter, a) - case "bind_window": - return handleBindWindow(adapter, a); + case 'bind_window': + return handleBindWindow(adapter, a) - case "virtual_mouse": - return handleVirtualMouse(adapter, a); + case 'virtual_mouse': + return handleVirtualMouse(adapter, a) - case "virtual_keyboard": - return handleVirtualKeyboard(adapter, a); + case 'virtual_keyboard': + return handleVirtualKeyboard(adapter, a) - case "status_indicator": - return handleStatusIndicator(adapter, a); + case 'status_indicator': + return handleStatusIndicator(adapter, a) - case "mouse_wheel": - return handleMouseWheel(adapter, a); + case 'mouse_wheel': + return handleMouseWheel(adapter, a) - case "activate_window": - return handleActivateWindow(adapter, a); + case 'activate_window': + return handleActivateWindow(adapter, a) - case "prompt_respond": - return handlePromptRespond(adapter, a); + case 'prompt_respond': + return handlePromptRespond(adapter, a) - case "switch_display": - return handleSwitchDisplay(adapter, a, overrides); + case 'switch_display': + return handleSwitchDisplay(adapter, a, overrides) - case "list_granted_applications": - return handleListGrantedApplications(overrides); + case 'list_granted_applications': + return handleListGrantedApplications(overrides) - case "read_clipboard": - return handleReadClipboard(adapter, overrides, subGates); + case 'read_clipboard': + return handleReadClipboard(adapter, overrides, subGates) - case "write_clipboard": - return handleWriteClipboard(adapter, a, overrides, subGates); + case 'write_clipboard': + return handleWriteClipboard(adapter, a, overrides, subGates) - case "computer_batch": - return handleComputerBatch(adapter, a, overrides, subGates); + case 'computer_batch': + return handleComputerBatch(adapter, a, overrides, subGates) default: - return errorResult(`Unknown tool "${name}".`, "bad_args"); + return errorResult(`Unknown tool "${name}".`, 'bad_args') } } @@ -4034,7 +4264,7 @@ export async function handleToolCall( args: unknown, rawOverrides: ComputerUseOverrides, ): Promise { - const { logger, serverName } = adapter; + const { logger, serverName } = adapter // Normalize the allowlist before any gate runs: // @@ -4056,9 +4286,9 @@ export async function handleToolCall( // // `.some()` guard keeps the hot path (empty deny list, no legacy grants) // zero-alloc. - const userDeniedSet = new Set(rawOverrides.userDeniedBundleIds); + const userDeniedSet = new Set(rawOverrides.userDeniedBundleIds) const overrides: ComputerUseOverrides = rawOverrides.allowedApps.some( - (a) => + a => a.tier === undefined || userDeniedSet.has(a.bundleId) || isPolicyDenied(a.bundleId, a.displayName), @@ -4066,22 +4296,22 @@ export async function handleToolCall( ? { ...rawOverrides, allowedApps: rawOverrides.allowedApps - .filter((a) => !userDeniedSet.has(a.bundleId)) - .filter((a) => !isPolicyDenied(a.bundleId, a.displayName)) - .map((a) => + .filter(a => !userDeniedSet.has(a.bundleId)) + .filter(a => !isPolicyDenied(a.bundleId, a.displayName)) + .map(a => a.tier !== undefined ? a : { ...a, tier: getDefaultTierForApp(a.bundleId, a.displayName) }, ), } - : rawOverrides; + : rawOverrides // ─── Gate 1: kill switch ───────────────────────────────────────────── if (adapter.isDisabled()) { return errorResult( - "Computer control is disabled in Settings. Enable it and try again.", - "other", - ); + 'Computer control is disabled in Settings. Enable it and try again.', + 'other', + ) } // ─── Gate 2: TCC ───────────────────────────────────────────────────── @@ -4089,24 +4319,34 @@ export async function handleToolCall( // no relaunch. `request_access` is exempted: it threads the ungranted // state through to the renderer, which shows a TCC toggle panel instead // of the app list. Every other tool short-circuits here. - const osPerms = await adapter.ensureOsPermissions(); - let tccState: - | { accessibility: boolean; screenRecording: boolean } - | undefined; + const osPerms = await adapter.ensureOsPermissions() + let tccState: { accessibility: boolean; screenRecording: boolean } | undefined if (!osPerms.granted) { // Both request_* tools thread tccState through to the renderer's // TCC toggle panel. Every other tool short-circuits. - if (name !== "request_access" && name !== "request_teach_access") { + if (name !== 'request_access' && name !== 'request_teach_access') { return errorResult( - "Accessibility and Screen Recording permissions are required. " + - "Call request_access to show the permission panel.", - "tcc_not_granted", - ); + 'Accessibility and Screen Recording permissions are required. ' + + 'Call request_access to show the permission panel.', + 'tcc_not_granted', + ) } tccState = { - accessibility: (osPerms as { granted: false; accessibility: boolean; screenRecording: boolean }).accessibility, - screenRecording: (osPerms as { granted: false; accessibility: boolean; screenRecording: boolean }).screenRecording, - }; + accessibility: ( + osPerms as { + granted: false + accessibility: boolean + screenRecording: boolean + } + ).accessibility, + screenRecording: ( + osPerms as { + granted: false + accessibility: boolean + screenRecording: boolean + } + ).screenRecording, + } } // ─── Gate 3: global CU lock ────────────────────────────────────────── @@ -4134,28 +4374,28 @@ export async function handleToolCall( // Cowork (LAM) and CCD (LSM) wire checkCuLock via the shared cuLock // singleton. When undefined (tests/future hosts), no gate — absence of // the mechanism ≠ locked out. - const deferAcquire = defersLockAcquire(name); - const lock = overrides.checkCuLock?.(); + const deferAcquire = defersLockAcquire(name) + const lock = overrides.checkCuLock?.() if (lock) { if (lock.holder !== undefined && !lock.isSelf) { return errorResult( - "Another Claude session is currently using the computer. Wait for " + - "the user to acknowledge it is finished (stop button in the Claude " + - "window), or find a non-computer-use approach if one is readily " + - "apparent.", - "cu_lock_held", - ); + 'Another Claude session is currently using the computer. Wait for ' + + 'the user to acknowledge it is finished (stop button in the Claude ' + + 'window), or find a non-computer-use approach if one is readily ' + + 'apparent.', + 'cu_lock_held', + ) } if (lock.holder === undefined && !deferAcquire) { // Acquire. Emits cuLockChanged → overlay shows. Idempotent — if // someone else acquired between check and here (won't happen on a // single-threaded event loop, but defensive), this is a no-op. - overrides.acquireCuLock?.(); + overrides.acquireCuLock?.() // Fresh lock holder → any prior session's mouseButtonHeld is stale // (e.g. overlay stop mid-drag). Clear it so this session doesn't get // a spurious "already held" error. resetMouseButtonHeld is file-local; // this is the one non-test callsite. - resetMouseButtonHeld(); + resetMouseButtonHeld() } // lock.isSelf → already held by us, proceed. // lock.holder === undefined && deferAcquire → @@ -4164,7 +4404,7 @@ export async function handleToolCall( // Sub-gates read FRESH every call so a GrowthBook flip takes effect // mid-session (plan §3). - const subGates = adapter.getSubGates(); + const subGates = adapter.getSubGates() // Clipboard guard runs per-action inside runInputActionGates + inline in // handleReadClipboard/handleWriteClipboard. NOT here — per-tool-call sync @@ -4172,11 +4412,11 @@ export async function handleToolCall( // fire during deferAcquire tools / `wait` / teach_step's blocking-dialog // phase where no input is happening. - const a = asRecord(args); + const a = asRecord(args) logger.silly( `[${serverName}] tool=${name} args=${JSON.stringify(a).slice(0, 200)}`, - ); + ) // ─── Fail-closed dispatch ──────────────────────────────────────────── // ANY exception below → tool error, executor never left in a half-called @@ -4186,28 +4426,28 @@ export async function handleToolCall( // dispatchAction never sees them (not batchable). // teach_step: blocking UI tool, also not batchable; needs subGates for // its action-execution phase. - if (name === "request_access") { - return await handleRequestAccess(adapter, a, overrides, tccState); + if (name === 'request_access') { + return await handleRequestAccess(adapter, a, overrides, tccState) } - if (name === "request_teach_access") { - return await handleRequestTeachAccess(adapter, a, overrides, tccState); + if (name === 'request_teach_access') { + return await handleRequestTeachAccess(adapter, a, overrides, tccState) } - if (name === "teach_step") { - return await handleTeachStep(adapter, a, overrides, subGates); + if (name === 'teach_step') { + return await handleTeachStep(adapter, a, overrides, subGates) } - if (name === "teach_batch") { - return await handleTeachBatch(adapter, a, overrides, subGates); + if (name === 'teach_batch') { + return await handleTeachBatch(adapter, a, overrides, subGates) } - return await dispatchAction(name, a, adapter, overrides, subGates); + return await dispatchAction(name, a, adapter, overrides, subGates) } catch (err) { // Fail-closed. If the gate machinery itself throws (e.g. // getFrontmostApp() rejects), the executor has NOT been called yet for // the gated tools — the gates run before the executor in every handler. // For ungated tools, the executor may have been mid-call; that's fine — // the result is still a tool error, never an implicit success. - const msg = err instanceof Error ? err.message : String(err); - logger.error(`[${serverName}] tool=${name} threw: ${msg}`, err); - return errorResult(`Tool "${name}" failed: ${msg}`, "executor_threw"); + const msg = err instanceof Error ? err.message : String(err) + logger.error(`[${serverName}] tool=${name} threw: ${msg}`, err) + return errorResult(`Tool "${name}" failed: ${msg}`, 'executor_threw') } } @@ -4227,4 +4467,4 @@ export const _test = { buildMonitorNote, handleSwitchDisplay, uniqueDisplayLabels, -}; +} diff --git a/packages/@ant/computer-use-mcp/src/tools.ts b/packages/@ant/computer-use-mcp/src/tools.ts index 1904700d8..ba31fbc47 100644 --- a/packages/@ant/computer-use-mcp/src/tools.ts +++ b/packages/@ant/computer-use-mcp/src/tools.ts @@ -10,26 +10,26 @@ * `scaleCoord` — both must agree or clicks land in the wrong space. */ -import type { Tool } from "@modelcontextprotocol/sdk/types.js"; +import type { Tool } from '@modelcontextprotocol/sdk/types.js' -import type { CoordinateMode } from "./types.js"; +import type { CoordinateMode } from './types.js' // See packages/desktop/computer-use-mcp/COORDINATES.md before touching any // model-facing coordinate text. Chrome's browserTools.ts:143 is the reference // phrasing — "pixels from the left edge", no geometry, no number to do math with. const COORD_DESC: Record = { pixels: { - x: "Horizontal pixel position read directly from the most recent screenshot image, measured from the left edge. The server handles all scaling.", - y: "Vertical pixel position read directly from the most recent screenshot image, measured from the top edge. The server handles all scaling.", + x: 'Horizontal pixel position read directly from the most recent screenshot image, measured from the left edge. The server handles all scaling.', + y: 'Vertical pixel position read directly from the most recent screenshot image, measured from the top edge. The server handles all scaling.', }, normalized_0_100: { - x: "Horizontal position as a percentage of screen width, 0.0–100.0 (0 = left edge, 100 = right edge).", - y: "Vertical position as a percentage of screen height, 0.0–100.0 (0 = top edge, 100 = bottom edge).", + x: 'Horizontal position as a percentage of screen width, 0.0–100.0 (0 = left edge, 100 = right edge).', + y: 'Vertical position as a percentage of screen height, 0.0–100.0 (0 = top edge, 100 = bottom edge).', }, -}; +} const FRONTMOST_GATE_DESC = - "The frontmost application must be in the session allowlist at the time of this call, or this tool returns an error and does nothing."; + 'The frontmost application must be in the session allowlist at the time of this call, or this tool returns an error and does nothing.' /** * Item schema for the `actions` array in `computer_batch`, `teach_step`, and @@ -38,69 +38,69 @@ const FRONTMOST_GATE_DESC = * in toolCalls.ts. */ const BATCH_ACTION_ITEM_SCHEMA = { - type: "object", + type: 'object', properties: { action: { - type: "string", + type: 'string', enum: [ - "key", - "type", - "mouse_move", - "left_click", - "left_click_drag", - "right_click", - "middle_click", - "double_click", - "triple_click", - "scroll", - "hold_key", - "screenshot", - "cursor_position", - "left_mouse_down", - "left_mouse_up", - "wait", + 'key', + 'type', + 'mouse_move', + 'left_click', + 'left_click_drag', + 'right_click', + 'middle_click', + 'double_click', + 'triple_click', + 'scroll', + 'hold_key', + 'screenshot', + 'cursor_position', + 'left_mouse_down', + 'left_mouse_up', + 'wait', ], - description: "The action to perform.", + description: 'The action to perform.', }, coordinate: { - type: "array", - items: { type: "number" }, + type: 'array', + items: { type: 'number' }, minItems: 2, maxItems: 2, description: - "(x, y) for click/mouse_move/scroll/left_click_drag end point.", + '(x, y) for click/mouse_move/scroll/left_click_drag end point.', }, start_coordinate: { - type: "array", - items: { type: "number" }, + type: 'array', + items: { type: 'number' }, minItems: 2, maxItems: 2, description: - "(x, y) drag start — left_click_drag only. Omit to drag from current cursor.", + '(x, y) drag start — left_click_drag only. Omit to drag from current cursor.', }, text: { - type: "string", + type: 'string', description: - "For type: the text. For key/hold_key: the chord string. For click/scroll: modifier keys to hold.", + 'For type: the text. For key/hold_key: the chord string. For click/scroll: modifier keys to hold.', }, scroll_direction: { - type: "string", - enum: ["up", "down", "left", "right"], + type: 'string', + enum: ['up', 'down', 'left', 'right'], }, - scroll_amount: { type: "integer", minimum: 0, maximum: 100 }, + scroll_amount: { type: 'integer', minimum: 0, maximum: 100 }, duration: { - type: "number", - description: "Seconds (0–100). For hold_key/wait.", + type: 'number', + description: 'Seconds (0–100). For hold_key/wait.', }, repeat: { - type: "integer", + type: 'integer', minimum: 1, maximum: 100, - description: "For key: repeat count.", + description: 'For key: repeat count.', }, }, - required: ["action"], -}; + required: ['action'], +} /** * Build the tool list. Parameterized by capabilities and coordinate mode so @@ -117,98 +117,98 @@ const BATCH_ACTION_ITEM_SCHEMA = { */ export function buildComputerUseTools( caps: { - screenshotFiltering: "native" | "none"; - platform: "darwin" | "win32" | "linux"; + screenshotFiltering: 'native' | 'none' + platform: 'darwin' | 'win32' | 'linux' /** Include request_teach_access + teach_step. Read once at server construction. */ - teachMode?: boolean; + teachMode?: boolean }, coordinateMode: CoordinateMode, installedAppNames?: string[], ): Tool[] { - const coord = COORD_DESC[coordinateMode]; + const coord = COORD_DESC[coordinateMode] // Shared hint suffix for BOTH request_access and request_teach_access — // they use the same resolveRequestedApps path, so the model should get // the same enumeration for both. const installedAppsHint = installedAppNames && installedAppNames.length > 0 - ? ` Available applications on this machine: ${installedAppNames.join(", ")}.` - : ""; + ? ` Available applications on this machine: ${installedAppNames.join(', ')}.` + : '' // [x, y]` tuple — param shape for all // click/move/scroll tools. const coordinateTuple = { - type: "array", - items: { type: "number" }, + type: 'array', + items: { type: 'number' }, minItems: 2, maxItems: 2, description: `(x, y): ${coord.x}`, - }; + } // Modifier hold during click. Shared across all 5 click variants. const clickModifierText = { - type: "string", + type: 'string', description: 'Modifier keys to hold during the click (e.g. "shift", "ctrl+shift"). Supports the same syntax as the key tool.', - }; + } const screenshotDesc = - caps.screenshotFiltering === "native" - ? "Take a screenshot of the primary display. Applications not in the session allowlist are excluded at the compositor level — only granted apps and the desktop are visible." - : "Take a screenshot of the primary display. On this platform, screenshots are NOT filtered — all open windows are visible. Input actions targeting apps not in the session allowlist are rejected."; + caps.screenshotFiltering === 'native' + ? 'Take a screenshot of the primary display. Applications not in the session allowlist are excluded at the compositor level — only granted apps and the desktop are visible.' + : 'Take a screenshot of the primary display. On this platform, screenshots are NOT filtered — all open windows are visible. Input actions targeting apps not in the session allowlist are rejected.' return [ { - name: "request_access", + name: 'request_access', description: - "Request user permission to control a set of applications for this session. Must be called before any other tool in this server. " + - "The user sees a single dialog listing all requested apps and either allows the whole set or denies it. " + - "Call this again mid-session to add more apps; previously granted apps remain granted. " + - "Returns the granted apps, denied apps, and screenshot filtering capability.", + 'Request user permission to control a set of applications for this session. Must be called before any other tool in this server. ' + + 'The user sees a single dialog listing all requested apps and either allows the whole set or denies it. ' + + 'Call this again mid-session to add more apps; previously granted apps remain granted. ' + + 'Returns the granted apps, denied apps, and screenshot filtering capability.', inputSchema: { - type: "object" as const, + type: 'object' as const, properties: { apps: { - type: "array", - items: { type: "string" }, + type: 'array', + items: { type: 'string' }, description: - "Application display names (e.g. \"Slack\", \"Calendar\") or bundle identifiers (e.g. \"com.tinyspeck.slackmacgap\"). Display names are resolved case-insensitively against installed apps." + + 'Application display names (e.g. "Slack", "Calendar") or bundle identifiers (e.g. "com.tinyspeck.slackmacgap"). Display names are resolved case-insensitively against installed apps.' + installedAppsHint, }, reason: { - type: "string", + type: 'string', description: - "One-sentence explanation shown to the user in the approval dialog. Explain the task, not the mechanism.", + 'One-sentence explanation shown to the user in the approval dialog. Explain the task, not the mechanism.', }, clipboardRead: { - type: "boolean", + type: 'boolean', description: "Also request permission to read the user's clipboard (separate checkbox in the dialog).", }, clipboardWrite: { - type: "boolean", + type: 'boolean', description: "Also request permission to write the user's clipboard. When granted, multi-line `type` calls use the clipboard fast path.", }, systemKeyCombos: { - type: "boolean", + type: 'boolean', description: - "Also request permission to send system-level key combos (quit app, switch app, lock screen). Without this, those specific combos are blocked.", + 'Also request permission to send system-level key combos (quit app, switch app, lock screen). Without this, those specific combos are blocked.', }, }, - required: ["apps", "reason"], + required: ['apps', 'reason'], }, }, { - name: "screenshot", + name: 'screenshot', description: screenshotDesc + - " Returns an error if the allowlist is empty. The returned image is what subsequent click coordinates are relative to.", + ' Returns an error if the allowlist is empty. The returned image is what subsequent click coordinates are relative to.', inputSchema: { - type: "object" as const, + type: 'object' as const, properties: { save_to_disk: { - type: "boolean", + type: 'boolean', description: "Save the image to disk so it can be attached to a message for the user. Returns the saved path in the tool result. Only set this when you intend to share the image — screenshots you're just looking at don't need saving.", }, @@ -218,159 +218,160 @@ export function buildComputerUseTools( }, { - name: "zoom", + name: 'zoom', description: - "Take a higher-resolution screenshot of a specific region of the last full-screen screenshot. Use this liberally to inspect small text, button labels, or fine UI details that are hard to read in the downsampled full-screen image. " + - "IMPORTANT: Coordinates in subsequent click calls always refer to the full-screen screenshot, never the zoomed image. This tool is read-only for inspecting detail.", + 'Take a higher-resolution screenshot of a specific region of the last full-screen screenshot. Use this liberally to inspect small text, button labels, or fine UI details that are hard to read in the downsampled full-screen image. ' + + 'IMPORTANT: Coordinates in subsequent click calls always refer to the full-screen screenshot, never the zoomed image. This tool is read-only for inspecting detail.', inputSchema: { - type: "object" as const, + type: 'object' as const, properties: { region: { - type: "array", - items: { type: "integer" }, + type: 'array', + items: { type: 'integer' }, minItems: 4, maxItems: 4, description: - "(x0, y0, x1, y1): Rectangle to zoom into, in the coordinate space of the most recent full-screen screenshot. x0,y0 = top-left, x1,y1 = bottom-right.", + '(x0, y0, x1, y1): Rectangle to zoom into, in the coordinate space of the most recent full-screen screenshot. x0,y0 = top-left, x1,y1 = bottom-right.', }, save_to_disk: { - type: "boolean", + type: 'boolean', description: - "Save the image to disk so it can be attached to a message for the user. Returns the saved path in the tool result. Only set this when you intend to share the image.", + 'Save the image to disk so it can be attached to a message for the user. Returns the saved path in the tool result. Only set this when you intend to share the image.', }, }, - required: ["region"], + required: ['region'], }, }, { - name: "left_click", + name: 'left_click', description: `Left-click at the given coordinates. ${FRONTMOST_GATE_DESC}`, inputSchema: { - type: "object" as const, + type: 'object' as const, properties: { coordinate: coordinateTuple, text: clickModifierText, }, - required: ["coordinate"], + required: ['coordinate'], }, }, { - name: "double_click", + name: 'double_click', description: `Double-click at the given coordinates. Selects a word in most text editors. ${FRONTMOST_GATE_DESC}`, inputSchema: { - type: "object" as const, + type: 'object' as const, properties: { coordinate: coordinateTuple, text: clickModifierText, }, - required: ["coordinate"], + required: ['coordinate'], }, }, { - name: "triple_click", + name: 'triple_click', description: `Triple-click at the given coordinates. Selects a line in most text editors. ${FRONTMOST_GATE_DESC}`, inputSchema: { - type: "object" as const, + type: 'object' as const, properties: { coordinate: coordinateTuple, text: clickModifierText, }, - required: ["coordinate"], + required: ['coordinate'], }, }, { - name: "right_click", + name: 'right_click', description: `Right-click at the given coordinates. Opens a context menu in most applications. ${FRONTMOST_GATE_DESC}`, inputSchema: { - type: "object" as const, + type: 'object' as const, properties: { coordinate: coordinateTuple, text: clickModifierText, }, - required: ["coordinate"], + required: ['coordinate'], }, }, { - name: "middle_click", + name: 'middle_click', description: `Middle-click (scroll-wheel click) at the given coordinates. ${FRONTMOST_GATE_DESC}`, inputSchema: { - type: "object" as const, + type: 'object' as const, properties: { coordinate: coordinateTuple, text: clickModifierText, }, - required: ["coordinate"], + required: ['coordinate'], }, }, { - name: "type", + name: 'type', description: `Type text into whatever currently has keyboard focus. ${FRONTMOST_GATE_DESC} Newlines are supported. For keyboard shortcuts use \`key\` instead.`, inputSchema: { - type: "object" as const, + type: 'object' as const, properties: { - text: { type: "string", description: "Text to type." }, + text: { type: 'string', description: 'Text to type.' }, }, - required: ["text"], + required: ['text'], }, }, { - name: "key", + name: 'key', description: `Press a key or key combination (e.g. "return", "escape", "cmd+a", "ctrl+shift+tab"). ${FRONTMOST_GATE_DESC} ` + - "System-level combos (quit app, switch app, lock screen) require the `systemKeyCombos` grant — without it they return an error. All other combos work.", + 'System-level combos (quit app, switch app, lock screen) require the `systemKeyCombos` grant — without it they return an error. All other combos work.', inputSchema: { - type: "object" as const, + type: 'object' as const, properties: { text: { - type: "string", + type: 'string', description: 'Modifiers joined with "+", e.g. "cmd+shift+a".', }, repeat: { - type: "integer", + type: 'integer', minimum: 1, maximum: 100, - description: "Number of times to repeat the key press. Default is 1.", + description: + 'Number of times to repeat the key press. Default is 1.', }, }, - required: ["text"], + required: ['text'], }, }, { - name: "scroll", + name: 'scroll', description: `Scroll at the given coordinates. ${FRONTMOST_GATE_DESC}`, inputSchema: { - type: "object" as const, + type: 'object' as const, properties: { coordinate: coordinateTuple, scroll_direction: { - type: "string", - enum: ["up", "down", "left", "right"], - description: "Direction to scroll.", + type: 'string', + enum: ['up', 'down', 'left', 'right'], + description: 'Direction to scroll.', }, scroll_amount: { - type: "integer", + type: 'integer', minimum: 0, maximum: 100, - description: "Number of scroll ticks.", + description: 'Number of scroll ticks.', }, }, - required: ["coordinate", "scroll_direction", "scroll_amount"], + required: ['coordinate', 'scroll_direction', 'scroll_amount'], }, }, { - name: "left_click_drag", + name: 'left_click_drag', description: `Press, move to target, and release. ${FRONTMOST_GATE_DESC}`, inputSchema: { - type: "object" as const, + type: 'object' as const, properties: { coordinate: { ...coordinateTuple, @@ -381,540 +382,613 @@ export function buildComputerUseTools( description: `(x, y) start point. If omitted, drags from the current cursor position. ${coord.x}`, }, }, - required: ["coordinate"], + required: ['coordinate'], }, }, { - name: "mouse_move", + name: 'mouse_move', description: `Move the mouse cursor without clicking. Useful for triggering hover states. ${FRONTMOST_GATE_DESC}`, inputSchema: { - type: "object" as const, + type: 'object' as const, properties: { coordinate: coordinateTuple, }, - required: ["coordinate"], + required: ['coordinate'], }, }, { - name: "open_application", + name: 'open_application', description: - "Bring an application to the front, launching it if necessary. The target application must already be in the session allowlist — call request_access first.", + 'Bring an application to the front, launching it if necessary. The target application must already be in the session allowlist — call request_access first.', inputSchema: { - type: "object" as const, + type: 'object' as const, properties: { app: { - type: "string", + type: 'string', description: - "Display name (e.g. \"Slack\") or bundle identifier (e.g. \"com.tinyspeck.slackmacgap\").", + 'Display name (e.g. "Slack") or bundle identifier (e.g. "com.tinyspeck.slackmacgap").', }, }, - required: ["app"], + required: ['app'], }, }, // Window management — Win32 API targeted at bound HWND, no global shortcuts. // Only available on Windows when a window is bound via open_application. - ...(caps.platform === 'win32' ? [{ - name: "window_management", - description: - "Manage the bound application window via Win32 API calls (ShowWindow, SetWindowPos, SendMessage). " + - "All operations target the bound HWND directly — NO global shortcuts (Win+Down, Alt+F4, etc.). " + - "The window must have been opened via open_application first. " + - "Actions: minimize (hide to taskbar), maximize (fill screen), restore (undo min/max), " + - "close (graceful WM_CLOSE), focus (bring to front), move_offscreen (move to -32000,-32000 for background operation). " + - "Use move_resize to reposition or resize the window to specific coordinates.", - inputSchema: { - type: "object" as const, - properties: { - action: { - type: "string", - enum: ["minimize", "maximize", "restore", "close", "focus", "move_offscreen", "move_resize", "get_rect"], + ...(caps.platform === 'win32' + ? [ + { + name: 'window_management', description: - "minimize: ShowWindow(SW_MINIMIZE). " + - "maximize: ShowWindow(SW_MAXIMIZE). " + - "restore: ShowWindow(SW_RESTORE) — undo minimize or maximize. " + - "close: SendMessage(WM_CLOSE) — graceful close. " + - "focus: SetForegroundWindow + BringWindowToTop. " + - "move_offscreen: SetWindowPos(-32000,-32000) — keeps window usable by SendMessage/PrintWindow but invisible. " + - "move_resize: SetWindowPos to specific x,y,width,height. " + - "get_rect: GetWindowRect — returns current position and size.", - }, - x: { type: "integer", description: "X position for move_resize." }, - y: { type: "integer", description: "Y position for move_resize." }, - width: { type: "integer", description: "Width for move_resize." }, - height: { type: "integer", description: "Height for move_resize." }, - }, - required: ["action"], - }, - } as Tool, - { - name: "click_element", - description: - "Click a GUI element by its accessible name, role, or automationId — no pixel coordinates needed. " + - "Uses Windows UI Automation to find the element and InvokePattern to click it. " + - "Prefer this over left_click when the element name is visible in the accessibility snapshot. " + - "Falls back to BoundingRect center-click if InvokePattern is not supported.", - inputSchema: { - type: "object" as const, - properties: { - name: { - type: "string", - description: "Accessible name of the element (e.g. \"Save\", \"File\", \"Search...\"). Case-insensitive partial match.", - }, - role: { - type: "string", - description: "Control type (e.g. \"Button\", \"MenuItem\", \"Edit\", \"Link\"). Optional — narrows the search.", - }, - automationId: { - type: "string", - description: "Exact automationId from the accessibility snapshot. Most precise selector.", - }, - }, - required: [], - }, - } as Tool, - { - name: "type_into_element", - description: - "Type text into a named GUI element using Windows UI Automation ValuePattern. " + - "Finds the element by name/role/automationId, then sets its value directly — " + - "no need to click first or use pixel coordinates. Works on Edit, ComboBox, and other value-holding controls.", - inputSchema: { - type: "object" as const, - properties: { - name: { type: "string", description: "Accessible name of the target element." }, - role: { type: "string", description: "Control type (optional, e.g. \"Edit\")." }, - automationId: { type: "string", description: "Exact automationId." }, - text: { type: "string", description: "Text to type/set into the element." }, - }, - required: ["text"], - }, - } as Tool, - { - name: "open_terminal", - description: - "Open a new terminal window and launch an AI agent CLI. " + - "This is a workflow tool that automates: open terminal → type startup command → press Enter → wait → verify. " + - "Supported agents: claude (runs 'claude'), codex (runs 'codex'), gemini (runs 'gemini'), " + - "or any custom command. After launching, the tool binds to the new terminal window " + - "and takes a screenshot to verify the agent started successfully. " + - "Use this when the user says: 'open Claude Code', 'start a Codex terminal', 'launch Gemini', etc.", - inputSchema: { - type: "object" as const, - properties: { - agent: { - type: "string", - enum: ["claude", "codex", "gemini", "custom"], + 'Manage the bound application window via Win32 API calls (ShowWindow, SetWindowPos, SendMessage). ' + + 'All operations target the bound HWND directly — NO global shortcuts (Win+Down, Alt+F4, etc.). ' + + 'The window must have been opened via open_application first. ' + + 'Actions: minimize (hide to taskbar), maximize (fill screen), restore (undo min/max), ' + + 'close (graceful WM_CLOSE), focus (bring to front), move_offscreen (move to -32000,-32000 for background operation). ' + + 'Use move_resize to reposition or resize the window to specific coordinates.', + inputSchema: { + type: 'object' as const, + properties: { + action: { + type: 'string', + enum: [ + 'minimize', + 'maximize', + 'restore', + 'close', + 'focus', + 'move_offscreen', + 'move_resize', + 'get_rect', + ], + description: + 'minimize: ShowWindow(SW_MINIMIZE). ' + + 'maximize: ShowWindow(SW_MAXIMIZE). ' + + 'restore: ShowWindow(SW_RESTORE) — undo minimize or maximize. ' + + 'close: SendMessage(WM_CLOSE) — graceful close. ' + + 'focus: SetForegroundWindow + BringWindowToTop. ' + + 'move_offscreen: SetWindowPos(-32000,-32000) — keeps window usable by SendMessage/PrintWindow but invisible. ' + + 'move_resize: SetWindowPos to specific x,y,width,height. ' + + 'get_rect: GetWindowRect — returns current position and size.', + }, + x: { + type: 'integer', + description: 'X position for move_resize.', + }, + y: { + type: 'integer', + description: 'Y position for move_resize.', + }, + width: { + type: 'integer', + description: 'Width for move_resize.', + }, + height: { + type: 'integer', + description: 'Height for move_resize.', + }, + }, + required: ['action'], + }, + } as Tool, + { + name: 'click_element', description: - "Which agent to launch. " + - "claude: runs 'claude' command. " + - "codex: runs 'codex' command. " + - "gemini: runs 'gemini' command. " + - "custom: runs the command specified in 'command' parameter.", - }, - command: { - type: "string", - description: "Custom command to run in the terminal. Only used when agent='custom'. Example: 'python app.py'", - }, - terminal: { - type: "string", - enum: ["wt", "powershell", "cmd"], - description: "Which terminal to open. Default: 'wt' (Windows Terminal). 'powershell' for PowerShell window, 'cmd' for Command Prompt.", - }, - working_directory: { - type: "string", - description: "Working directory for the terminal. If omitted, uses current directory.", - }, - }, - required: ["agent"], - }, - } as Tool, - { - name: "bind_window", - description: - "Bind to a specific window for all subsequent operations (screenshot, click, type, etc.). " + - "Once bound, screenshots capture only that window via PrintWindow, and all input goes through SendMessageW — " + - "no cursor movement, no focus steal, no interference with the user's desktop. " + - "Actions: bind (by title, hwnd, or pid), unbind (release binding), status (show current binding), list (show all visible windows). " + - "Use 'list' first to see available windows, then 'bind' with a title or hwnd. " + - "open_application auto-binds the launched window, but use this tool to bind to already-running windows or switch between windows.", - inputSchema: { - type: "object" as const, - properties: { - action: { - type: "string", - enum: ["bind", "unbind", "status", "list"], + 'Click a GUI element by its accessible name, role, or automationId — no pixel coordinates needed. ' + + 'Uses Windows UI Automation to find the element and InvokePattern to click it. ' + + 'Prefer this over left_click when the element name is visible in the accessibility snapshot. ' + + 'Falls back to BoundingRect center-click if InvokePattern is not supported.', + inputSchema: { + type: 'object' as const, + properties: { + name: { + type: 'string', + description: + 'Accessible name of the element (e.g. "Save", "File", "Search..."). Case-insensitive partial match.', + }, + role: { + type: 'string', + description: + 'Control type (e.g. "Button", "MenuItem", "Edit", "Link"). Optional — narrows the search.', + }, + automationId: { + type: 'string', + description: + 'Exact automationId from the accessibility snapshot. Most precise selector.', + }, + }, + required: [], + }, + } as Tool, + { + name: 'type_into_element', description: - "bind: Bind to a window (specify title, hwnd, or pid). " + - "unbind: Release the current binding, return to full-screen mode. " + - "status: Show the currently bound window (hwnd, title, rect). " + - "list: List all visible windows with hwnd, pid, and title.", - }, - title: { - type: "string", - description: "Window title to search for (partial match, case-insensitive). For 'bind' action.", - }, - hwnd: { - type: "string", - description: "Exact window handle from 'list' output. For 'bind' action.", - }, - pid: { - type: "integer", - description: "Process ID to find window for. For 'bind' action.", - }, - }, - required: ["action"], - }, - } as Tool, - { - name: "activate_window", - description: - "Activate the bound window: bring it to foreground, click to ensure keyboard focus, " + - "and optionally send an initial key sequence. Use this before any input operations to guarantee " + - "the window is ready to receive keyboard/mouse events. " + - "Combines SetForegroundWindow + BringWindowToTop + SendMessage(WM_LBUTTONDOWN) in one call.", - inputSchema: { - type: "object" as const, - properties: { - click_x: { type: "integer", description: "X coordinate to click after activation (client-area). If omitted, clicks center of window." }, - click_y: { type: "integer", description: "Y coordinate to click after activation (client-area). If omitted, clicks center of window." }, - }, - required: [], - }, - } as Tool, - { - name: "prompt_respond", - description: - "Handle interactive CLI/terminal prompts (Yes/No, selection menus, confirmations). " + - "Sends a sequence of key events to the bound window to navigate and confirm a prompt. " + - "This is a convenience wrapper around bound-window keyboard input for common prompt flows. " + - "Typical flows: " + - "1) Yes/No prompt → send 'y' or 'n' + Enter. " + - "2) Arrow-key selection menu → send arrow_down/arrow_up N times + Enter. " + - "3) Text input prompt → type the response + Enter. " + - "After responding, take a screenshot to verify the result.", - inputSchema: { - type: "object" as const, - properties: { - response_type: { - type: "string", - enum: ["yes", "no", "enter", "escape", "select", "type"], + 'Type text into a named GUI element using Windows UI Automation ValuePattern. ' + + 'Finds the element by name/role/automationId, then sets its value directly — ' + + 'no need to click first or use pixel coordinates. Works on Edit, ComboBox, and other value-holding controls.', + inputSchema: { + type: 'object' as const, + properties: { + name: { + type: 'string', + description: 'Accessible name of the target element.', + }, + role: { + type: 'string', + description: 'Control type (optional, e.g. "Edit").', + }, + automationId: { + type: 'string', + description: 'Exact automationId.', + }, + text: { + type: 'string', + description: 'Text to type/set into the element.', + }, + }, + required: ['text'], + }, + } as Tool, + { + name: 'open_terminal', description: - "yes: send 'y' + Enter. " + - "no: send 'n' + Enter. " + - "enter: send Enter only. " + - "escape: send Escape (cancel). " + - "select: use arrow keys to navigate to an option, then Enter. Requires 'arrow_count'. " + - "type: type custom text then Enter. Requires 'text'.", - }, - arrow_direction: { - type: "string", - enum: ["up", "down"], - description: "Arrow key direction for 'select' type. Default: 'down'.", - }, - arrow_count: { - type: "integer", - description: "Number of arrow key presses for 'select' type. Default: 1.", - minimum: 0, - maximum: 50, - }, - text: { - type: "string", - description: "Text to type for 'type' response_type.", - }, - }, - required: ["response_type"], - }, - } as Tool, - { - name: "status_indicator", - description: - "Control the visual status indicator overlay on the bound window. " + - "The indicator is a small floating label at the bottom of the window that shows what Computer Use is doing. " + - "It auto-shows during click/type/key/scroll operations, but you can also send custom messages. " + - "Actions: show (display a custom message), hide (dismiss), status (check if active).", - inputSchema: { - type: "object" as const, - properties: { - action: { - type: "string", - enum: ["show", "hide", "status"], - description: "show: display a custom message on the indicator. hide: dismiss the indicator. status: check if indicator is active.", - }, - message: { - type: "string", - description: "Custom message to display (for 'show' action). Supports emoji. Auto-fades after 2 seconds.", - }, - }, - required: ["action"], - }, - } as Tool, - { - name: "virtual_keyboard", - description: - "Send keyboard input directly to the bound window via SendMessageW — independent of the physical keyboard. " + - "The user can keep typing on their own keyboard without interference. " + - "Supports: single keys, key combinations (Ctrl+S, Alt+F4), text input, and hold-key operations. " + - "All input targets the bound HWND only — no global keyboard events.", - inputSchema: { - type: "object" as const, - properties: { - action: { - type: "string", - enum: ["type", "combo", "press", "release", "hold"], + 'Open a new terminal window and launch an AI agent CLI. ' + + 'This is a workflow tool that automates: open terminal → type startup command → press Enter → wait → verify. ' + + "Supported agents: claude (runs 'claude'), codex (runs 'codex'), gemini (runs 'gemini'), " + + 'or any custom command. After launching, the tool binds to the new terminal window ' + + 'and takes a screenshot to verify the agent started successfully. ' + + "Use this when the user says: 'open Claude Code', 'start a Codex terminal', 'launch Gemini', etc.", + inputSchema: { + type: 'object' as const, + properties: { + agent: { + type: 'string', + enum: ['claude', 'codex', 'gemini', 'custom'], + description: + 'Which agent to launch. ' + + "claude: runs 'claude' command. " + + "codex: runs 'codex' command. " + + "gemini: runs 'gemini' command. " + + "custom: runs the command specified in 'command' parameter.", + }, + command: { + type: 'string', + description: + "Custom command to run in the terminal. Only used when agent='custom'. Example: 'python app.py'", + }, + terminal: { + type: 'string', + enum: ['wt', 'powershell', 'cmd'], + description: + "Which terminal to open. Default: 'wt' (Windows Terminal). 'powershell' for PowerShell window, 'cmd' for Command Prompt.", + }, + working_directory: { + type: 'string', + description: + 'Working directory for the terminal. If omitted, uses current directory.', + }, + }, + required: ['agent'], + }, + } as Tool, + { + name: 'bind_window', description: - "type: Send text string via WM_CHAR (Unicode, supports Chinese/emoji). " + - "combo: Send a key combination like ctrl+s, alt+f4, ctrl+shift+a (press all, release in reverse). " + - "press: Press a key down and hold it (pair with 'release'). " + - "release: Release a previously pressed key. " + - "hold: Press key(s) for a duration then release.", - }, - text: { - type: "string", - description: "For 'type': the text to input. For 'combo': key combination string (e.g. 'ctrl+s', 'alt+tab', 'ctrl+shift+a'). For 'press'/'release': single key name (e.g. 'shift', 'ctrl', 'a').", - }, - duration: { - type: "number", - description: "For 'hold': seconds to hold the key(s) before releasing. Default: 1.", - }, - repeat: { - type: "integer", - description: "Number of times to repeat the action. Default: 1.", - minimum: 1, - maximum: 100, - }, - }, - required: ["action", "text"], - }, - } as Tool, - { - name: "virtual_mouse", - description: - "Control a virtual mouse on the bound window via SendMessageW — independent of the physical mouse. " + - "The user's real cursor stays free. All operations target the bound HWND only.", - inputSchema: { - type: "object" as const, - properties: { - action: { - type: "string", - enum: ["click", "double_click", "right_click", "move", "drag", "down", "up"], + 'Bind to a specific window for all subsequent operations (screenshot, click, type, etc.). ' + + 'Once bound, screenshots capture only that window via PrintWindow, and all input goes through SendMessageW — ' + + "no cursor movement, no focus steal, no interference with the user's desktop. " + + 'Actions: bind (by title, hwnd, or pid), unbind (release binding), status (show current binding), list (show all visible windows). ' + + "Use 'list' first to see available windows, then 'bind' with a title or hwnd. " + + 'open_application auto-binds the launched window, but use this tool to bind to already-running windows or switch between windows.', + inputSchema: { + type: 'object' as const, + properties: { + action: { + type: 'string', + enum: ['bind', 'unbind', 'status', 'list'], + description: + 'bind: Bind to a window (specify title, hwnd, or pid). ' + + 'unbind: Release the current binding, return to full-screen mode. ' + + 'status: Show the currently bound window (hwnd, title, rect). ' + + 'list: List all visible windows with hwnd, pid, and title.', + }, + title: { + type: 'string', + description: + "Window title to search for (partial match, case-insensitive). For 'bind' action.", + }, + hwnd: { + type: 'string', + description: + "Exact window handle from 'list' output. For 'bind' action.", + }, + pid: { + type: 'integer', + description: + "Process ID to find window for. For 'bind' action.", + }, + }, + required: ['action'], + }, + } as Tool, + { + name: 'activate_window', description: - "click: left-click at coordinate. " + - "double_click: double left-click. " + - "right_click: right-click. " + - "move: move virtual cursor (visual only, no click). " + - "drag: press at start, move to end, release. Requires coordinate (end) and start_coordinate. " + - "down: press left button at coordinate (hold). " + - "up: release left button at coordinate.", - }, - coordinate: { - type: "array", - items: { type: "number" }, - minItems: 2, - maxItems: 2, - description: "(x, y) client-area coordinate on the bound window.", - }, - start_coordinate: { - type: "array", - items: { type: "number" }, - minItems: 2, - maxItems: 2, - description: "(x, y) start point for drag. If omitted, drags from current virtual cursor position.", - }, - }, - required: ["action", "coordinate"], - }, - } as Tool, - { - name: "mouse_wheel", - description: - "Scroll inside the bound window using mouse wheel (WM_MOUSEWHEEL / WM_MOUSEHWHEEL). " + - "Unlike the generic 'scroll' tool which uses WM_VSCROLL (only works on scrollbar controls), " + - "mouse_wheel simulates the physical mouse wheel and works on Excel spreadsheets, web pages, " + - "code editors, PDF viewers, and any modern UI. " + - "Specify the click point within the window where the scroll should occur — " + - "this determines which panel/pane/element receives the scroll.", - inputSchema: { - type: "object" as const, - properties: { - coordinate: { - type: "array", - items: { type: "number" }, - minItems: 2, - maxItems: 2, - description: "(x, y) client-area coordinate where the scroll should occur. Determines which element receives the scroll.", - }, - delta: { - type: "integer", - description: "Scroll amount in 'clicks'. Positive = scroll up, negative = scroll down. Each click = 3 lines typically. Use -3 to -5 for page-like scrolling.", - }, - direction: { - type: "string", - enum: ["vertical", "horizontal"], - description: "Scroll direction. Default: 'vertical'. Use 'horizontal' for side-scrolling (e.g. wide Excel sheets, timeline views).", - }, - }, - required: ["coordinate", "delta"], - }, - } as Tool, - ] : []), + 'Activate the bound window: bring it to foreground, click to ensure keyboard focus, ' + + 'and optionally send an initial key sequence. Use this before any input operations to guarantee ' + + 'the window is ready to receive keyboard/mouse events. ' + + 'Combines SetForegroundWindow + BringWindowToTop + SendMessage(WM_LBUTTONDOWN) in one call.', + inputSchema: { + type: 'object' as const, + properties: { + click_x: { + type: 'integer', + description: + 'X coordinate to click after activation (client-area). If omitted, clicks center of window.', + }, + click_y: { + type: 'integer', + description: + 'Y coordinate to click after activation (client-area). If omitted, clicks center of window.', + }, + }, + required: [], + }, + } as Tool, + { + name: 'prompt_respond', + description: + 'Handle interactive CLI/terminal prompts (Yes/No, selection menus, confirmations). ' + + 'Sends a sequence of key events to the bound window to navigate and confirm a prompt. ' + + 'This is a convenience wrapper around bound-window keyboard input for common prompt flows. ' + + 'Typical flows: ' + + "1) Yes/No prompt → send 'y' or 'n' + Enter. " + + '2) Arrow-key selection menu → send arrow_down/arrow_up N times + Enter. ' + + '3) Text input prompt → type the response + Enter. ' + + 'After responding, take a screenshot to verify the result.', + inputSchema: { + type: 'object' as const, + properties: { + response_type: { + type: 'string', + enum: ['yes', 'no', 'enter', 'escape', 'select', 'type'], + description: + "yes: send 'y' + Enter. " + + "no: send 'n' + Enter. " + + 'enter: send Enter only. ' + + 'escape: send Escape (cancel). ' + + "select: use arrow keys to navigate to an option, then Enter. Requires 'arrow_count'. " + + "type: type custom text then Enter. Requires 'text'.", + }, + arrow_direction: { + type: 'string', + enum: ['up', 'down'], + description: + "Arrow key direction for 'select' type. Default: 'down'.", + }, + arrow_count: { + type: 'integer', + description: + "Number of arrow key presses for 'select' type. Default: 1.", + minimum: 0, + maximum: 50, + }, + text: { + type: 'string', + description: "Text to type for 'type' response_type.", + }, + }, + required: ['response_type'], + }, + } as Tool, + { + name: 'status_indicator', + description: + 'Control the visual status indicator overlay on the bound window. ' + + 'The indicator is a small floating label at the bottom of the window that shows what Computer Use is doing. ' + + 'It auto-shows during click/type/key/scroll operations, but you can also send custom messages. ' + + 'Actions: show (display a custom message), hide (dismiss), status (check if active).', + inputSchema: { + type: 'object' as const, + properties: { + action: { + type: 'string', + enum: ['show', 'hide', 'status'], + description: + 'show: display a custom message on the indicator. hide: dismiss the indicator. status: check if indicator is active.', + }, + message: { + type: 'string', + description: + "Custom message to display (for 'show' action). Supports emoji. Auto-fades after 2 seconds.", + }, + }, + required: ['action'], + }, + } as Tool, + { + name: 'virtual_keyboard', + description: + 'Send keyboard input directly to the bound window via SendMessageW — independent of the physical keyboard. ' + + 'The user can keep typing on their own keyboard without interference. ' + + 'Supports: single keys, key combinations (Ctrl+S, Alt+F4), text input, and hold-key operations. ' + + 'All input targets the bound HWND only — no global keyboard events.', + inputSchema: { + type: 'object' as const, + properties: { + action: { + type: 'string', + enum: ['type', 'combo', 'press', 'release', 'hold'], + description: + 'type: Send text string via WM_CHAR (Unicode, supports Chinese/emoji). ' + + 'combo: Send a key combination like ctrl+s, alt+f4, ctrl+shift+a (press all, release in reverse). ' + + "press: Press a key down and hold it (pair with 'release'). " + + 'release: Release a previously pressed key. ' + + 'hold: Press key(s) for a duration then release.', + }, + text: { + type: 'string', + description: + "For 'type': the text to input. For 'combo': key combination string (e.g. 'ctrl+s', 'alt+tab', 'ctrl+shift+a'). For 'press'/'release': single key name (e.g. 'shift', 'ctrl', 'a').", + }, + duration: { + type: 'number', + description: + "For 'hold': seconds to hold the key(s) before releasing. Default: 1.", + }, + repeat: { + type: 'integer', + description: + 'Number of times to repeat the action. Default: 1.', + minimum: 1, + maximum: 100, + }, + }, + required: ['action', 'text'], + }, + } as Tool, + { + name: 'virtual_mouse', + description: + 'Control a virtual mouse on the bound window via SendMessageW — independent of the physical mouse. ' + + "The user's real cursor stays free. All operations target the bound HWND only.", + inputSchema: { + type: 'object' as const, + properties: { + action: { + type: 'string', + enum: [ + 'click', + 'double_click', + 'right_click', + 'move', + 'drag', + 'down', + 'up', + ], + description: + 'click: left-click at coordinate. ' + + 'double_click: double left-click. ' + + 'right_click: right-click. ' + + 'move: move virtual cursor (visual only, no click). ' + + 'drag: press at start, move to end, release. Requires coordinate (end) and start_coordinate. ' + + 'down: press left button at coordinate (hold). ' + + 'up: release left button at coordinate.', + }, + coordinate: { + type: 'array', + items: { type: 'number' }, + minItems: 2, + maxItems: 2, + description: + '(x, y) client-area coordinate on the bound window.', + }, + start_coordinate: { + type: 'array', + items: { type: 'number' }, + minItems: 2, + maxItems: 2, + description: + '(x, y) start point for drag. If omitted, drags from current virtual cursor position.', + }, + }, + required: ['action', 'coordinate'], + }, + } as Tool, + { + name: 'mouse_wheel', + description: + 'Scroll inside the bound window using mouse wheel (WM_MOUSEWHEEL / WM_MOUSEHWHEEL). ' + + "Unlike the generic 'scroll' tool which uses WM_VSCROLL (only works on scrollbar controls), " + + 'mouse_wheel simulates the physical mouse wheel and works on Excel spreadsheets, web pages, ' + + 'code editors, PDF viewers, and any modern UI. ' + + 'Specify the click point within the window where the scroll should occur — ' + + 'this determines which panel/pane/element receives the scroll.', + inputSchema: { + type: 'object' as const, + properties: { + coordinate: { + type: 'array', + items: { type: 'number' }, + minItems: 2, + maxItems: 2, + description: + '(x, y) client-area coordinate where the scroll should occur. Determines which element receives the scroll.', + }, + delta: { + type: 'integer', + description: + "Scroll amount in 'clicks'. Positive = scroll up, negative = scroll down. Each click = 3 lines typically. Use -3 to -5 for page-like scrolling.", + }, + direction: { + type: 'string', + enum: ['vertical', 'horizontal'], + description: + "Scroll direction. Default: 'vertical'. Use 'horizontal' for side-scrolling (e.g. wide Excel sheets, timeline views).", + }, + }, + required: ['coordinate', 'delta'], + }, + } as Tool, + ] + : []), { - name: "switch_display", + name: 'switch_display', description: - "Switch which monitor subsequent screenshots capture. Use this when the " + - "application you need is on a different monitor than the one shown. " + - "The screenshot tool tells you which monitor it captured and lists " + - "other attached monitors by name — pass one of those names here. " + - "After switching, call screenshot to see the new monitor. " + + 'Switch which monitor subsequent screenshots capture. Use this when the ' + + 'application you need is on a different monitor than the one shown. ' + + 'The screenshot tool tells you which monitor it captured and lists ' + + 'other attached monitors by name — pass one of those names here. ' + + 'After switching, call screenshot to see the new monitor. ' + 'Pass "auto" to return to automatic monitor selection.', inputSchema: { - type: "object" as const, + type: 'object' as const, properties: { display: { - type: "string", + type: 'string', description: 'Monitor name from the screenshot note (e.g. "Built-in Retina Display", ' + '"LG UltraFine"), or "auto" to re-enable automatic selection.', }, }, - required: ["display"], + required: ['display'], }, }, { - name: "list_granted_applications", + name: 'list_granted_applications', description: - "List the applications currently in the session allowlist, plus the active grant flags and coordinate mode. No side effects.", + 'List the applications currently in the session allowlist, plus the active grant flags and coordinate mode. No side effects.', inputSchema: { - type: "object" as const, + type: 'object' as const, properties: {}, required: [], }, }, { - name: "read_clipboard", + name: 'read_clipboard', description: - "Read the current clipboard contents as text. Requires the `clipboardRead` grant.", + 'Read the current clipboard contents as text. Requires the `clipboardRead` grant.', inputSchema: { - type: "object" as const, + type: 'object' as const, properties: {}, required: [], }, }, { - name: "write_clipboard", + name: 'write_clipboard', description: - "Write text to the clipboard. Requires the `clipboardWrite` grant.", + 'Write text to the clipboard. Requires the `clipboardWrite` grant.', inputSchema: { - type: "object" as const, + type: 'object' as const, properties: { - text: { type: "string" }, + text: { type: 'string' }, }, - required: ["text"], + required: ['text'], }, }, { - name: "wait", - description: "Wait for a specified duration.", + name: 'wait', + description: 'Wait for a specified duration.', inputSchema: { - type: "object" as const, + type: 'object' as const, properties: { duration: { - type: "number", - description: "Duration in seconds (0–100).", + type: 'number', + description: 'Duration in seconds (0–100).', }, }, - required: ["duration"], + required: ['duration'], }, }, { - name: "cursor_position", + name: 'cursor_position', description: - "Get the current mouse cursor position. Returns image-pixel coordinates relative to the most recent screenshot, or logical points if no screenshot has been taken.", + 'Get the current mouse cursor position. Returns image-pixel coordinates relative to the most recent screenshot, or logical points if no screenshot has been taken.', inputSchema: { - type: "object" as const, + type: 'object' as const, properties: {}, required: [], }, }, { - name: "hold_key", + name: 'hold_key', description: `Press and hold a key or key combination for the specified duration, then release. ${FRONTMOST_GATE_DESC} ` + - "System-level combos require the `systemKeyCombos` grant.", + 'System-level combos require the `systemKeyCombos` grant.', inputSchema: { - type: "object" as const, + type: 'object' as const, properties: { text: { - type: "string", + type: 'string', description: 'Key or chord to hold, e.g. "space", "shift+down".', }, duration: { - type: "number", - description: "Duration in seconds (0–100).", + type: 'number', + description: 'Duration in seconds (0–100).', }, }, - required: ["text", "duration"], + required: ['text', 'duration'], }, }, { - name: "left_mouse_down", + name: 'left_mouse_down', description: `Press the left mouse button at the current cursor position and leave it held. ${FRONTMOST_GATE_DESC} ` + - "Use mouse_move first to position the cursor. Call left_mouse_up to release. Errors if the button is already held.", + 'Use mouse_move first to position the cursor. Call left_mouse_up to release. Errors if the button is already held.', inputSchema: { - type: "object" as const, + type: 'object' as const, properties: {}, required: [], }, }, { - name: "left_mouse_up", + name: 'left_mouse_up', description: `Release the left mouse button at the current cursor position. ${FRONTMOST_GATE_DESC} ` + - "Pairs with left_mouse_down. Safe to call even if the button is not currently held.", + 'Pairs with left_mouse_down. Safe to call even if the button is not currently held.', inputSchema: { - type: "object" as const, + type: 'object' as const, properties: {}, required: [], }, }, { - name: "computer_batch", + name: 'computer_batch', description: - "Execute a sequence of actions in ONE tool call. Each individual tool call requires a model→API round trip (seconds); " + - "batching a predictable sequence eliminates all but one. Use this whenever you can predict the outcome of several actions ahead — " + - "e.g. click a field, type into it, press Return. Actions execute sequentially and stop on the first error. " + + 'Execute a sequence of actions in ONE tool call. Each individual tool call requires a model→API round trip (seconds); ' + + 'batching a predictable sequence eliminates all but one. Use this whenever you can predict the outcome of several actions ahead — ' + + 'e.g. click a field, type into it, press Return. Actions execute sequentially and stop on the first error. ' + `${FRONTMOST_GATE_DESC} The frontmost check runs before EACH action inside the batch — if an action opens a non-allowed app, the next action's gate fires and the batch stops there. ` + - "Mid-batch screenshot actions are allowed for inspection but coordinates in subsequent clicks always refer to the PRE-BATCH full-screen screenshot.", + 'Mid-batch screenshot actions are allowed for inspection but coordinates in subsequent clicks always refer to the PRE-BATCH full-screen screenshot.', inputSchema: { - type: "object" as const, + type: 'object' as const, properties: { actions: { - type: "array", + type: 'array', minItems: 1, items: BATCH_ACTION_ITEM_SCHEMA, description: 'List of actions. Example: [{"action":"left_click","coordinate":[100,200]},{"action":"type","text":"hello"},{"action":"key","text":"Return"}]', }, }, - required: ["actions"], + required: ['actions'], }, }, ...(caps.teachMode ? buildTeachTools(coord, installedAppsHint) : []), - ]; + ] } /** @@ -932,122 +1006,122 @@ function buildTeachTools( // items). Depends on coord, so it lives inside this factory. const teachStepProperties = { explanation: { - type: "string", + type: 'string', description: - "Tooltip body text. Explain what the user is looking at and why it matters. " + - "This is the ONLY place the user sees your words — be complete but concise.", + 'Tooltip body text. Explain what the user is looking at and why it matters. ' + + 'This is the ONLY place the user sees your words — be complete but concise.', }, next_preview: { - type: "string", + type: 'string', description: - "One line describing exactly what will happen when the user clicks Next. " + + 'One line describing exactly what will happen when the user clicks Next. ' + 'Example: "Next: I\'ll click Create Bucket and type the name." ' + - "Shown below the explanation in a smaller font.", + 'Shown below the explanation in a smaller font.', }, anchor: { - type: "array", - items: { type: "number" }, + type: 'array', + items: { type: 'number' }, minItems: 2, maxItems: 2, description: `(x, y) — where the tooltip arrow points. ${coord.x} ` + - "Omit to center the tooltip with no arrow (for general-context steps).", + 'Omit to center the tooltip with no arrow (for general-context steps).', }, actions: { - type: "array", + type: 'array', // Empty allowed — "read this, click Next" steps. items: BATCH_ACTION_ITEM_SCHEMA, description: - "Actions to execute when the user clicks Next. Same item schema as computer_batch.actions. " + - "Empty array is valid for purely explanatory steps. Actions run sequentially and stop on first error.", + 'Actions to execute when the user clicks Next. Same item schema as computer_batch.actions. ' + + 'Empty array is valid for purely explanatory steps. Actions run sequentially and stop on first error.', }, - } as const; + } as const return [ { - name: "request_teach_access", + name: 'request_teach_access', description: - "Request permission to guide the user through a task step-by-step with on-screen tooltips. " + - "Use this INSTEAD OF request_access when the user wants to LEARN how to do something " + + 'Request permission to guide the user through a task step-by-step with on-screen tooltips. ' + + 'Use this INSTEAD OF request_access when the user wants to LEARN how to do something ' + '(phrases like "teach me", "walk me through", "show me how", "help me learn"). ' + - "On approval the main Claude window hides and a fullscreen tooltip overlay appears. " + - "You then call teach_step repeatedly; each call shows one tooltip and waits for the user to click Next. " + - "Same app-allowlist semantics as request_access, but no clipboard/system-key flags. " + - "Teach mode ends automatically when your turn ends.", + 'On approval the main Claude window hides and a fullscreen tooltip overlay appears. ' + + 'You then call teach_step repeatedly; each call shows one tooltip and waits for the user to click Next. ' + + 'Same app-allowlist semantics as request_access, but no clipboard/system-key flags. ' + + 'Teach mode ends automatically when your turn ends.', inputSchema: { - type: "object" as const, + type: 'object' as const, properties: { apps: { - type: "array", - items: { type: "string" }, + type: 'array', + items: { type: 'string' }, description: 'Application display names (e.g. "Slack", "Calendar") or bundle identifiers. Resolved case-insensitively against installed apps.' + installedAppsHint, }, reason: { - type: "string", + type: 'string', description: 'What you will be teaching. Shown in the approval dialog as "Claude wants to guide you through {reason}". Keep it short and task-focused.', }, }, - required: ["apps", "reason"], + required: ['apps', 'reason'], }, }, { - name: "teach_step", + name: 'teach_step', description: - "Show one guided-tour tooltip and wait for the user to click Next. On Next, execute the actions, " + - "take a fresh screenshot, and return both — you do NOT need a separate screenshot call between steps. " + - "The returned image shows the state after your actions ran; anchor the next teach_step against it. " + - "IMPORTANT — the user only sees the tooltip during teach mode. Put ALL narration in `explanation`. " + - "Text you emit outside teach_step calls is NOT visible until teach mode ends. " + + 'Show one guided-tour tooltip and wait for the user to click Next. On Next, execute the actions, ' + + 'take a fresh screenshot, and return both — you do NOT need a separate screenshot call between steps. ' + + 'The returned image shows the state after your actions ran; anchor the next teach_step against it. ' + + 'IMPORTANT — the user only sees the tooltip during teach mode. Put ALL narration in `explanation`. ' + + 'Text you emit outside teach_step calls is NOT visible until teach mode ends. ' + "Pack as many actions as possible into each step's `actions` array — the user waits through " + - "the whole round trip between clicks, so one step that fills a form beats five steps that fill one field each. " + - "Returns {exited:true} if the user clicks Exit — do not call teach_step again after that. " + - "Take an initial screenshot before your FIRST teach_step to anchor it.", + 'the whole round trip between clicks, so one step that fills a form beats five steps that fill one field each. ' + + 'Returns {exited:true} if the user clicks Exit — do not call teach_step again after that. ' + + 'Take an initial screenshot before your FIRST teach_step to anchor it.', inputSchema: { - type: "object" as const, + type: 'object' as const, properties: teachStepProperties, - required: ["explanation", "next_preview", "actions"], + required: ['explanation', 'next_preview', 'actions'], }, }, { - name: "teach_batch", + name: 'teach_batch', description: - "Queue multiple teach steps in one tool call. Parallels computer_batch: " + - "N steps → one model↔API round trip instead of N. Each step still shows a tooltip " + + 'Queue multiple teach steps in one tool call. Parallels computer_batch: ' + + 'N steps → one model↔API round trip instead of N. Each step still shows a tooltip ' + "and waits for the user's Next click, but YOU aren't waiting for a round trip between steps. " + - "You can call teach_batch multiple times in one tour — treat each batch as one predictable " + - "SEGMENT (typically: all the steps on one page). The returned screenshot shows the state " + + 'You can call teach_batch multiple times in one tour — treat each batch as one predictable ' + + 'SEGMENT (typically: all the steps on one page). The returned screenshot shows the state ' + "after the batch's final actions; anchor the NEXT teach_batch against it. " + - "WITHIN a batch, all anchors and click coordinates refer to the PRE-BATCH screenshot " + - "(same invariant as computer_batch) — for steps 2+ in a batch, either omit anchor " + + 'WITHIN a batch, all anchors and click coordinates refer to the PRE-BATCH screenshot ' + + '(same invariant as computer_batch) — for steps 2+ in a batch, either omit anchor ' + "(centered tooltip) or target elements you know won't have moved. " + - "Good pattern: batch 5 tooltips on page A (last step navigates) → read returned screenshot → " + - "batch 3 tooltips on page B → done. " + - "Returns {exited:true, stepsCompleted:N} if the user clicks Exit — do NOT call again after that; " + - "{stepsCompleted, stepFailed, ...} if an action errors mid-batch; " + - "otherwise {stepsCompleted, results:[...]} plus a final screenshot. " + - "Fall back to individual teach_step calls when you need to react to each intermediate screenshot.", + 'Good pattern: batch 5 tooltips on page A (last step navigates) → read returned screenshot → ' + + 'batch 3 tooltips on page B → done. ' + + 'Returns {exited:true, stepsCompleted:N} if the user clicks Exit — do NOT call again after that; ' + + '{stepsCompleted, stepFailed, ...} if an action errors mid-batch; ' + + 'otherwise {stepsCompleted, results:[...]} plus a final screenshot. ' + + 'Fall back to individual teach_step calls when you need to react to each intermediate screenshot.', inputSchema: { - type: "object" as const, + type: 'object' as const, properties: { steps: { - type: "array", + type: 'array', minItems: 1, items: { - type: "object", + type: 'object', properties: teachStepProperties, - required: ["explanation", "next_preview", "actions"], + required: ['explanation', 'next_preview', 'actions'], }, description: - "Ordered steps. Validated upfront — a typo in step 5 errors before any tooltip shows.", + 'Ordered steps. Validated upfront — a typo in step 5 errors before any tooltip shows.', }, }, - required: ["steps"], + required: ['steps'], }, }, - ]; + ] } diff --git a/packages/@ant/computer-use-mcp/src/types.ts b/packages/@ant/computer-use-mcp/src/types.ts index 656f795dc..03725c9f6 100644 --- a/packages/@ant/computer-use-mcp/src/types.ts +++ b/packages/@ant/computer-use-mcp/src/types.ts @@ -2,19 +2,19 @@ import type { ComputerExecutor, InstalledApp, ScreenshotResult, -} from "./executor.js"; +} from './executor.js' /** `ScreenshotResult` without the base64 blob. The shape hosts persist for * cross-respawn `scaleCoord` survival. */ -export type ScreenshotDims = Omit; +export type ScreenshotDims = Omit /** Shape mirrors claude-for-chrome-mcp/src/types.ts:1-7 */ export interface Logger { - info: (message: string, ...args: unknown[]) => void; - error: (message: string, ...args: unknown[]) => void; - warn: (message: string, ...args: unknown[]) => void; - debug: (message: string, ...args: unknown[]) => void; - silly: (message: string, ...args: unknown[]) => void; + info: (message: string, ...args: unknown[]) => void + error: (message: string, ...args: unknown[]) => void + warn: (message: string, ...args: unknown[]) => void + debug: (message: string, ...args: unknown[]) => void + silly: (message: string, ...args: unknown[]) => void } /** @@ -35,7 +35,7 @@ export interface Logger { * Enforced in `runInputActionGates` via the frontmost-app check: keyboard * actions require `"full"`, mouse actions require `"click"` or higher. */ -export type CuAppPermTier = "read" | "click" | "full"; +export type CuAppPermTier = 'read' | 'click' | 'full' /** * A single app the user has approved for the current session. Session-scoped @@ -45,32 +45,32 @@ export type CuAppPermTier = "read" | "click" | "full"; * scope. */ export interface AppGrant { - bundleId: string; - displayName: string; + bundleId: string + displayName: string /** Epoch ms. For Settings-page display ("Granted 3m ago"). */ - grantedAt: number; + grantedAt: number /** Undefined → `"full"` (back-compat for pre-tier grants persisted in * session state). */ - tier?: CuAppPermTier; + tier?: CuAppPermTier } /** Orthogonal to the app allowlist. */ export interface CuGrantFlags { - clipboardRead: boolean; - clipboardWrite: boolean; + clipboardRead: boolean + clipboardWrite: boolean /** * When false, the `key` tool rejects combos in `keyBlocklist.ts` * (cmd+q, cmd+tab, cmd+space, cmd+shift+q, ctrl+alt+delete). All other * key sequences work regardless. */ - systemKeyCombos: boolean; + systemKeyCombos: boolean } export const DEFAULT_GRANT_FLAGS: CuGrantFlags = { clipboardRead: false, clipboardWrite: false, systemKeyCombos: false, -}; +} /** * Host picks via GrowthBook JSON feature `chicago_coordinate_mode`, baked @@ -78,7 +78,7 @@ export const DEFAULT_GRANT_FLAGS: CuGrantFlags = { * ONE convention and never learns the other exists. `normalized_0_100` * sidesteps the Retina scaleFactor bug class entirely. */ -export type CoordinateMode = "pixels" | "normalized_0_100"; +export type CoordinateMode = 'pixels' | 'normalized_0_100' /** * Independent kill switches for subtle/risky ported behaviors. Read from @@ -86,28 +86,28 @@ export type CoordinateMode = "pixels" | "normalized_0_100"; */ export interface CuSubGates { /** 9×9 exact-byte staleness guard before click. */ - pixelValidation: boolean; + pixelValidation: boolean /** Route `type("foo\nbar")` through clipboard instead of keystroke-by-keystroke. */ - clipboardPasteMultiline: boolean; + clipboardPasteMultiline: boolean /** * Ease-out-cubic mouse glide at 60fps, distance-proportional duration * (2000 px/sec, capped at 0.5s). Adds up to ~0.5s latency * per click. When off, cursor teleports instantly. */ - mouseAnimation: boolean; + mouseAnimation: boolean /** * Pre-action sequence: hide non-allowlisted apps, then defocus us (from the * Vercept acquisition). When off, the * frontmost gate fires in the normal case and the model gets stuck — this * is the A/B-test-the-old-broken-behavior switch. */ - hideBeforeAction: boolean; + hideBeforeAction: boolean /** * Auto-resolve the target display before each screenshot when the * selected display has no allowed-app windows. When on, `handleScreenshot` * uses the atomic Swift path; off → sticks with `selectedDisplayId`. */ - autoTargetDisplay: boolean; + autoTargetDisplay: boolean /** * Stash+clear the clipboard while a tier-"click" app is frontmost. * Closes the gap where a click-tier terminal/IDE has a UI Paste button @@ -115,7 +115,7 @@ export interface CuSubGates { * keyboard block can be routed around by clicking Paste. Restored when * a non-"click" app becomes frontmost, or at turn end. */ - clipboardGuard: boolean; + clipboardGuard: boolean } // ---------------------------------------------------------------------------- @@ -125,17 +125,17 @@ export interface CuSubGates { /** One entry per app the model asked for, after name → bundle ID resolution. */ export interface ResolvedAppRequest { /** What the model asked for (e.g. "Slack", "com.tinyspeck.slackmacgap"). */ - requestedName: string; + requestedName: string /** The resolved InstalledApp if found, else undefined (shown greyed in the UI). */ - resolved?: InstalledApp; + resolved?: InstalledApp /** Shell-access-equivalent bundle IDs get a UI warning. See sentinelApps.ts. */ - isSentinel: boolean; + isSentinel: boolean /** Already in the allowlist → skip the checkbox, return in `granted` immediately. */ - alreadyGranted: boolean; + alreadyGranted: boolean /** Hardcoded tier for this app (browser→"read", terminal→"click", else "full"). * The dialog displays this read-only; the renderer passes it through * verbatim in the AppGrant. */ - proposedTier: CuAppPermTier; + proposedTier: CuAppPermTier } /** @@ -145,18 +145,18 @@ export interface ResolvedAppRequest { * change needed. */ export interface CuPermissionRequest { - requestId: string; + requestId: string /** Model-provided reason string. Shown prominently in the approval UI. */ - reason: string; - apps: ResolvedAppRequest[]; + reason: string + apps: ResolvedAppRequest[] /** What the model asked for. User can toggle independently of apps. */ - requestedFlags: Partial; + requestedFlags: Partial /** * For the "On Windows, Claude can see all apps..." footnote. Taken from * `executor.capabilities.screenshotFiltering` so the renderer doesn't * need to know about platforms. */ - screenshotFiltering: "native" | "none"; + screenshotFiltering: 'native' | 'none' /** * Present only when TCC permissions are NOT yet granted. When present, * the renderer shows a TCC toggle panel (two rows: Accessibility, Screen @@ -166,9 +166,9 @@ export interface CuPermissionRequest { * restart after granting Screen Recording — we don't. */ tccState?: { - accessibility: boolean; - screenRecording: boolean; - }; + accessibility: boolean + screenRecording: boolean + } /** * Apps with windows on the CU display that aren't in the requested * allowlist. These will be hidden the first time Claude takes an action. @@ -176,13 +176,13 @@ export interface CuPermissionRequest { * user clicks Allow, but it's a preview, not a contract. Absent when * empty so the renderer can skip the section cleanly. */ - willHide?: Array<{ bundleId: string; displayName: string }>; + willHide?: Array<{ bundleId: string; displayName: string }> /** * `chicagoAutoUnhide` app preference at request time. The renderer picks * between "...then restored when Claude is done" and "...will be hidden" * copy. Absent when `willHide` is absent (same condition). */ - autoUnhideEnabled?: boolean; + autoUnhideEnabled?: boolean } /** @@ -191,10 +191,10 @@ export interface CuPermissionRequest { * LocalAgentModeSessionManager.ts:2794). */ export interface CuPermissionResponse { - granted: AppGrant[]; + granted: AppGrant[] /** Bundle IDs the user unchecked, or apps that weren't installed. */ - denied: Array<{ bundleId: string; reason: "user_denied" | "not_installed" }>; - flags: CuGrantFlags; + denied: Array<{ bundleId: string; reason: 'user_denied' | 'not_installed' }> + flags: CuGrantFlags /** * Whether the user clicked Allow in THIS dialog. Only set by the * teach-mode handler — regular request_access doesn't need it (the @@ -205,7 +205,7 @@ export interface CuPermissionResponse { * them apart without this. Undefined → legacy/regular path, do not * gate on it. */ - userConsented?: boolean; + userConsented?: boolean } // ---------------------------------------------------------------------------- @@ -218,9 +218,9 @@ export interface CuPermissionResponse { * No Electron imports in this package — the host injects everything. */ export interface ComputerUseHostAdapter { - serverName: string; - logger: Logger; - executor: ComputerExecutor; + serverName: string + logger: Logger + executor: ComputerExecutor /** * TCC state check — Accessibility + Screen Recording on macOS. Pure check, @@ -231,23 +231,23 @@ export interface ComputerUseHostAdapter { ensureOsPermissions(): Promise< | { granted: true } | { granted: false; accessibility: boolean; screenRecording: boolean } - >; + > /** The Settings-page kill switch (`chicagoEnabled` app preference). */ - isDisabled(): boolean; + isDisabled(): boolean /** * The `chicagoAutoUnhide` app preference. Consumed by `buildAccessRequest` * to populate `CuPermissionRequest.autoUnhideEnabled` so the renderer's * "will be hidden" copy can say "then restored" only when true. */ - getAutoUnhideEnabled(): boolean; + getAutoUnhideEnabled(): boolean /** * Sub-gates re-read on every tool call so GrowthBook flips take effect * mid-session without restart. */ - getSubGates(): CuSubGates; + getSubGates(): CuSubGates /** * JPEG decode + crop + raw pixel bytes, for the PixelCompare staleness guard. @@ -261,7 +261,7 @@ export interface ComputerUseHostAdapter { cropRawPatch( jpegBase64: string, rect: { x: number; y: number; width: number; height: number }, - ): Buffer | null; + ): Buffer | null } // ---------------------------------------------------------------------------- @@ -286,18 +286,18 @@ export interface ComputerUseHostAdapter { export interface ComputerUseSessionContext { // ── Read state fresh per call ────────────────────────────────────── - getAllowedApps(): readonly AppGrant[]; - getGrantFlags(): CuGrantFlags; + getAllowedApps(): readonly AppGrant[] + getGrantFlags(): CuGrantFlags /** Per-user auto-deny list (Settings page). Empty array = none. */ - getUserDeniedBundleIds(): readonly string[]; - getSelectedDisplayId(): number | undefined; - getDisplayPinnedByModel?(): boolean; - getDisplayResolvedForApps?(): string | undefined; - getTeachModeActive?(): boolean; + getUserDeniedBundleIds(): readonly string[] + getSelectedDisplayId(): number | undefined + getDisplayPinnedByModel?(): boolean + getDisplayResolvedForApps?(): string | undefined + getTeachModeActive?(): boolean /** Dims-only fallback when `lastScreenshot` is unset (cross-respawn). * `bindSessionContext` reconstructs `{...dims, base64: ""}` so scaleCoord * works and pixelCompare correctly skips. */ - getLastScreenshotDims?(): ScreenshotDims | undefined; + getLastScreenshotDims?(): ScreenshotDims | undefined // ── Write-back callbacks ─────────────────────────────────────────── @@ -307,46 +307,46 @@ export interface ComputerUseSessionContext { onPermissionRequest?( req: CuPermissionRequest, signal: AbortSignal, - ): Promise; + ): Promise /** Teach-mode sibling of `onPermissionRequest`. */ onTeachPermissionRequest?( req: CuTeachPermissionRequest, signal: AbortSignal, - ): Promise; + ): Promise /** Called by `bindSessionContext` after merging a permission response into * the allowlist (dedupe on bundleId, truthy-only flag spread). Host * persists for resume survival. */ - onAllowedAppsChanged?(apps: readonly AppGrant[], flags: CuGrantFlags): void; - onAppsHidden?(bundleIds: string[]): void; + onAllowedAppsChanged?(apps: readonly AppGrant[], flags: CuGrantFlags): void + onAppsHidden?(bundleIds: string[]): void /** Reads the session's clipboardGuard stash. undefined → no stash held. */ - getClipboardStash?(): string | undefined; + getClipboardStash?(): string | undefined /** Writes the clipboardGuard stash. undefined clears it. */ - onClipboardStashChanged?(stash: string | undefined): void; - onResolvedDisplayUpdated?(displayId: number): void; - onDisplayPinned?(displayId: number | undefined): void; - onDisplayResolvedForApps?(sortedBundleIdsKey: string): void; + onClipboardStashChanged?(stash: string | undefined): void + onResolvedDisplayUpdated?(displayId: number): void + onDisplayPinned?(displayId: number | undefined): void + onDisplayResolvedForApps?(sortedBundleIdsKey: string): void /** Called after each screenshot. Host persists for respawn survival. */ - onScreenshotCaptured?(dims: ScreenshotDims): void; - onTeachModeActivated?(): void; - onTeachStep?(req: TeachStepRequest): Promise; - onTeachWorking?(): void; + onScreenshotCaptured?(dims: ScreenshotDims): void + onTeachModeActivated?(): void + onTeachStep?(req: TeachStepRequest): Promise + onTeachWorking?(): void // ── Lock (async) ─────────────────────────────────────────────────── /** At most one session uses CU at a time. Awaited by `bindSessionContext` * before dispatch. Undefined → no lock gating (proceed). */ - checkCuLock?(): Promise<{ holder: string | undefined; isSelf: boolean }>; + checkCuLock?(): Promise<{ holder: string | undefined; isSelf: boolean }> /** Take the lock. Called when `checkCuLock` returned `holder: undefined` * on a non-deferring tool. Host emits enter-CU signals here. */ - acquireCuLock?(): Promise; + acquireCuLock?(): Promise /** Host-specific lock-held error text. Default is the package's generic * message. The CLI host includes the holder session-ID prefix. */ - formatLockHeldMessage?(holder: string): string; + formatLockHeldMessage?(holder: string): string /** User-abort signal. Passed through to `ComputerUseOverrides.isAborted` * for the mid-loop checks in handleComputerBatch / handleType. See that * field for semantics. */ - isAborted?(): boolean; + isAborted?(): boolean } // ---------------------------------------------------------------------------- @@ -360,9 +360,9 @@ export interface ComputerUseSessionContext { * store, not the server. */ export interface ComputerUseOverrides { - allowedApps: AppGrant[]; - grantFlags: CuGrantFlags; - coordinateMode: CoordinateMode; + allowedApps: AppGrant[] + grantFlags: CuGrantFlags + coordinateMode: CoordinateMode /** * User-configured auto-deny list (Settings → Desktop app → Computer Use). @@ -376,14 +376,14 @@ export interface ComputerUseOverrides { * not session state). Contrast with `allowedApps` which is per-session. * Empty array = no user-configured denies (the default). */ - userDeniedBundleIds: readonly string[]; + userDeniedBundleIds: readonly string[] /** * Display CU operates on; read fresh per call. `scaleCoord` uses the * `originX/Y` snapshotted in `lastScreenshot`, so mid-session switches * only affect the NEXT screenshot/prepare call. */ - selectedDisplayId?: number; + selectedDisplayId?: number /** * The `request_access` tool handler calls this and awaits. The wrapper @@ -395,14 +395,16 @@ export interface ComputerUseOverrides { * Undefined when the session wasn't wired with a permission handler (e.g. * a future headless mode). `request_access` returns a tool error in that case. */ - onPermissionRequest?: (req: CuPermissionRequest) => Promise; + onPermissionRequest?: ( + req: CuPermissionRequest, + ) => Promise /** * For the pixel-validation staleness guard. The model's-last-screenshot, * stashed by serverDef.ts after each `screenshot` tool call. Undefined on * cold start → pixel validation skipped (click proceeds). */ - lastScreenshot?: ScreenshotResult; + lastScreenshot?: ScreenshotResult /** * Fired after every `prepareForAction` with the bundle IDs it just hid. @@ -416,7 +418,7 @@ export interface ComputerUseOverrides { * Undefined when the session wasn't wired with a tracker — unhide just * doesn't happen. */ - onAppsHidden?: (bundleIds: string[]) => void; + onAppsHidden?: (bundleIds: string[]) => void /** * Reads the clipboardGuard stash from session state. `undefined` means no @@ -424,7 +426,7 @@ export interface ComputerUseOverrides { * and clears on restore. Sibling of the `cuHiddenDuringTurn` getter pattern * — state lives on the host's session, not module-level here. */ - getClipboardStash?: () => string | undefined; + getClipboardStash?: () => string | undefined /** * Writes the clipboardGuard stash to session state. `undefined` clears. @@ -433,7 +435,7 @@ export interface ComputerUseOverrides { * directly and restores via Electron's `clipboard.writeText` (no nest-only * import surface). */ - onClipboardStashChanged?: (stash: string | undefined) => void; + onClipboardStashChanged?: (stash: string | undefined) => void /** * Write the resolver's picked display back to session so teach overlay @@ -442,7 +444,7 @@ export interface ComputerUseOverrides { * `resolvePrepareCapture`'s pick differs from `selectedDisplayId`. * Fire-and-forget. */ - onResolvedDisplayUpdated?: (displayId: number) => void; + onResolvedDisplayUpdated?: (displayId: number) => void /** * Set when the model explicitly picked a display via `switch_display`. @@ -453,7 +455,7 @@ export interface ComputerUseOverrides { * overrides any `selectedDisplayId` whenever an allowed app shares the * host's monitor. */ - displayPinnedByModel?: boolean; + displayPinnedByModel?: boolean /** * Write the model's explicit display pick to session. `displayId: @@ -461,7 +463,7 @@ export interface ComputerUseOverrides { * Sibling of `onResolvedDisplayUpdated` but also sets the pin flag — * the two are semantically distinct (resolver-picked vs model-picked). */ - onDisplayPinned?: (displayId: number | undefined) => void; + onDisplayPinned?: (displayId: number | undefined) => void /** * Sorted comma-joined bundle-ID set the display was last auto-resolved @@ -470,14 +472,14 @@ export interface ComputerUseOverrides { * doesn't yank the display on every screenshot, only when the app set * has changed since the last resolve (or manual switch). */ - displayResolvedForApps?: string; + displayResolvedForApps?: string /** * Records which app set the current display selection was made for. Fired * alongside `onResolvedDisplayUpdated` when the resolver picks, so the next * screenshot sees a matching set and skips auto-resolve. */ - onDisplayResolvedForApps?: (sortedBundleIdsKey: string) => void; + onDisplayResolvedForApps?: (sortedBundleIdsKey: string) => void /** * Global CU lock — at most one session actively uses CU at a time. Checked @@ -494,7 +496,7 @@ export interface ComputerUseOverrides { * The host manages release (on session idle/stop/archive) — this package * never releases. */ - checkCuLock?: () => { holder: string | undefined; isSelf: boolean }; + checkCuLock?: () => { holder: string | undefined; isSelf: boolean } /** * Take the lock for this session. `handleToolCall` calls this exactly once @@ -502,7 +504,7 @@ export interface ComputerUseOverrides { * undefined. No-op if already held (defensive — the check should have * short-circuited). Host emits an event the overlay listens to. */ - acquireCuLock?: () => void; + acquireCuLock?: () => void /** * User-abort signal. Checked mid-iteration inside `handleComputerBatch` @@ -513,7 +515,7 @@ export interface ComputerUseOverrides { * Undefined → never aborts (e.g. unwired host). Live per-check read — * same lazy-getter pattern as `checkCuLock`. */ - isAborted?: () => boolean; + isAborted?: () => boolean // ── Teach mode ─────────────────────────────────────────────────────── // Wired only when the host's teachModeEnabled gate is on. All five @@ -529,7 +531,7 @@ export interface ComputerUseOverrides { */ onTeachPermissionRequest?: ( req: CuTeachPermissionRequest, - ) => Promise; + ) => Promise /** * Called by `handleRequestTeachAccess` after the user approves and at least @@ -538,7 +540,7 @@ export interface ComputerUseOverrides { * fullscreen overlay. Cleared by the host on turn end (`transitionTo("idle")`) * alongside the CU lock release. */ - onTeachModeActivated?: () => void; + onTeachModeActivated?: () => void /** * Read by `handleRequestAccess` and `handleRequestTeachAccess` to @@ -549,7 +551,7 @@ export interface ComputerUseOverrides { * (not a boolean field) because teach mode state lives on the session, * not on this per-call overrides object. */ - getTeachModeActive?: () => boolean; + getTeachModeActive?: () => boolean /** * Called by `handleTeachStep` with the scaled anchor + text. Host stores @@ -562,7 +564,7 @@ export interface ComputerUseOverrides { * Same blocking-promise pattern as `onPermissionRequest`, but resolved by * the teach overlay's own preload (not the main renderer's tool-approval UI). */ - onTeachStep?: (req: TeachStepRequest) => Promise; + onTeachStep?: (req: TeachStepRequest) => Promise /** * Called immediately after `onTeachStep` resolves with "next", before @@ -571,7 +573,7 @@ export interface ComputerUseOverrides { * notch). The next `onTeachStep` call replaces the spinner with the new * tooltip content. */ - onTeachWorking?: () => void; + onTeachWorking?: () => void } // ---------------------------------------------------------------------------- @@ -590,13 +592,13 @@ export interface ComputerUseOverrides { * CSS coords match. */ export interface TeachStepRequest { - explanation: string; - nextPreview: string; + explanation: string + nextPreview: string /** Full-display logical points. Undefined → overlay centers the tooltip, hides the arrow. */ - anchorLogical?: { x: number; y: number }; + anchorLogical?: { x: number; y: number } } -export type TeachStepResult = { action: "next" } | { action: "exit" }; +export type TeachStepResult = { action: 'next' } | { action: 'exit' } /** * Payload for the renderer's ComputerUseTeachApproval dialog. Rides through @@ -606,17 +608,17 @@ export type TeachStepResult = { action: "next" } | { action: "exit" }; * fields it doesn't render (no grant-flag checkboxes in teach mode). */ export interface CuTeachPermissionRequest { - requestId: string; + requestId: string /** Model-provided reason. Shown in the dialog headline ("guide you through {reason}"). */ - reason: string; - apps: ResolvedAppRequest[]; - screenshotFiltering: "native" | "none"; + reason: string + apps: ResolvedAppRequest[] + screenshotFiltering: 'native' | 'none' /** Present only when TCC is ungranted — same semantics as `CuPermissionRequest.tccState`. */ tccState?: { - accessibility: boolean; - screenRecording: boolean; - }; - willHide?: Array<{ bundleId: string; displayName: string }>; + accessibility: boolean + screenRecording: boolean + } + willHide?: Array<{ bundleId: string; displayName: string }> /** Same semantics as `CuPermissionRequest.autoUnhideEnabled`. */ - autoUnhideEnabled?: boolean; + autoUnhideEnabled?: boolean } diff --git a/packages/@ant/computer-use-swift/package.json b/packages/@ant/computer-use-swift/package.json index 3c4a83b73..6472a35c3 100644 --- a/packages/@ant/computer-use-swift/package.json +++ b/packages/@ant/computer-use-swift/package.json @@ -1,8 +1,8 @@ { - "name": "@ant/computer-use-swift", - "version": "1.0.0", - "private": true, - "type": "module", - "main": "./src/index.ts", - "types": "./src/index.ts" + "name": "@ant/computer-use-swift", + "version": "1.0.0", + "private": true, + "type": "module", + "main": "./src/index.ts", + "types": "./src/index.ts" } diff --git a/packages/@ant/computer-use-swift/src/backends/darwin.ts b/packages/@ant/computer-use-swift/src/backends/darwin.ts index f0fad85af..4a5276c9a 100644 --- a/packages/@ant/computer-use-swift/src/backends/darwin.ts +++ b/packages/@ant/computer-use-swift/src/backends/darwin.ts @@ -9,9 +9,17 @@ import { readFileSync, unlinkSync } from 'fs' import { tmpdir } from 'os' import { join } from 'path' import type { - AppInfo, AppsAPI, DisplayAPI, DisplayGeometry, InstalledApp, - PrepareDisplayResult, RunningApp, ScreenshotAPI, ScreenshotResult, - SwiftBackend, WindowDisplayInfo, + AppInfo, + AppsAPI, + DisplayAPI, + DisplayGeometry, + InstalledApp, + PrepareDisplayResult, + RunningApp, + ScreenshotAPI, + ScreenshotResult, + SwiftBackend, + WindowDisplayInfo, } from '../types.js' export type { @@ -32,7 +40,8 @@ export type { function jxaSync(script: string): string { const result = Bun.spawnSync({ cmd: ['osascript', '-l', 'JavaScript', '-e', script], - stdout: 'pipe', stderr: 'pipe', + stdout: 'pipe', + stderr: 'pipe', }) return new TextDecoder().decode(result.stdout).trim() } @@ -40,14 +49,16 @@ function jxaSync(script: string): string { function osascriptSync(script: string): string { const result = Bun.spawnSync({ cmd: ['osascript', '-e', script], - stdout: 'pipe', stderr: 'pipe', + stdout: 'pipe', + stderr: 'pipe', }) return new TextDecoder().decode(result.stdout).trim() } async function osascript(script: string): Promise { const proc = Bun.spawn(['osascript', '-e', script], { - stdout: 'pipe', stderr: 'pipe', + stdout: 'pipe', + stderr: 'pipe', }) const text = await new Response(proc.stdout).text() await proc.exited @@ -56,7 +67,8 @@ async function osascript(script: string): Promise { async function jxa(script: string): Promise { const proc = Bun.spawn(['osascript', '-l', 'JavaScript', '-e', script], { - stdout: 'pipe', stderr: 'pipe', + stdout: 'pipe', + stderr: 'pipe', }) const text = await new Response(proc.stdout).text() await proc.exited @@ -101,8 +113,10 @@ export const display: DisplayAPI = { JSON.stringify(result); `) return (JSON.parse(raw) as DisplayGeometry[]).map(d => ({ - width: Number(d.width), height: Number(d.height), - scaleFactor: Number(d.scaleFactor), displayId: Number(d.displayId), + width: Number(d.width), + height: Number(d.height), + scaleFactor: Number(d.scaleFactor), + displayId: Number(d.displayId), })) } catch { try { @@ -126,8 +140,10 @@ export const display: DisplayAPI = { JSON.stringify(result); `) return (JSON.parse(raw) as DisplayGeometry[]).map(d => ({ - width: Number(d.width), height: Number(d.height), - scaleFactor: Number(d.scaleFactor), displayId: Number(d.displayId), + width: Number(d.width), + height: Number(d.height), + scaleFactor: Number(d.scaleFactor), + displayId: Number(d.displayId), })) } catch { return [{ width: 1920, height: 1080, scaleFactor: 2, displayId: 1 }] @@ -177,9 +193,15 @@ export const apps: AppsAPI = { const dirs = ['/Applications', '~/Applications', '/System/Applications'] const allApps: InstalledApp[] = [] for (const dir of dirs) { - const expanded = dir.startsWith('~') ? join(process.env.HOME ?? '~', dir.slice(1)) : dir + const expanded = dir.startsWith('~') + ? join(process.env.HOME ?? '~', dir.slice(1)) + : dir const proc = Bun.spawn( - ['bash', '-c', `for f in "${expanded}"/*.app; do [ -d "$f" ] || continue; bid=$(mdls -name kMDItemCFBundleIdentifier "$f" 2>/dev/null | sed 's/.*= "//;s/"//'); name=$(basename "$f" .app); echo "$f|$name|$bid"; done`], + [ + 'bash', + '-c', + `for f in "${expanded}"/*.app; do [ -d "$f" ] || continue; bid=$(mdls -name kMDItemCFBundleIdentifier "$f" 2>/dev/null | sed 's/.*= "//;s/"//'); name=$(basename "$f" .app); echo "$f|$name|$bid"; done`, + ], { stdout: 'pipe', stderr: 'pipe' }, ) const text = await new Response(proc.stdout).text() @@ -245,10 +267,13 @@ export const apps: AppsAPI = { // ScreenshotAPI // --------------------------------------------------------------------------- -async function captureScreenToBase64(args: string[]): Promise<{ base64: string; width: number; height: number }> { +async function captureScreenToBase64( + args: string[], +): Promise<{ base64: string; width: number; height: number }> { const tmpFile = join(tmpdir(), `cu-screenshot-${Date.now()}.png`) const proc = Bun.spawn(['screencapture', ...args, tmpFile], { - stdout: 'pipe', stderr: 'pipe', + stdout: 'pipe', + stderr: 'pipe', }) await proc.exited try { @@ -258,18 +283,36 @@ async function captureScreenToBase64(args: string[]): Promise<{ base64: string; const height = buf.readUInt32BE(20) return { base64, width, height } } finally { - try { unlinkSync(tmpFile) } catch {} + try { + unlinkSync(tmpFile) + } catch {} } } export const screenshot: ScreenshotAPI = { - async captureExcluding(_allowedBundleIds, _quality, _targetW, _targetH, displayId) { + async captureExcluding( + _allowedBundleIds, + _quality, + _targetW, + _targetH, + displayId, + ) { const args = ['-x'] if (displayId !== undefined) args.push('-D', String(displayId)) return captureScreenToBase64(args) }, - async captureRegion(_allowedBundleIds, x, y, w, h, _outW, _outH, _quality, displayId) { + async captureRegion( + _allowedBundleIds, + x, + y, + w, + h, + _outW, + _outH, + _quality, + displayId, + ) { const args = ['-x', '-R', `${x},${y},${w},${h}`] if (displayId !== undefined) args.push('-D', String(displayId)) return captureScreenToBase64(args) diff --git a/packages/@ant/computer-use-swift/src/backends/linux.ts b/packages/@ant/computer-use-swift/src/backends/linux.ts index da63efea9..56062e1d2 100644 --- a/packages/@ant/computer-use-swift/src/backends/linux.ts +++ b/packages/@ant/computer-use-swift/src/backends/linux.ts @@ -8,9 +8,17 @@ */ import type { - AppInfo, AppsAPI, DisplayAPI, DisplayGeometry, InstalledApp, - PrepareDisplayResult, RunningApp, ScreenshotAPI, ScreenshotResult, - SwiftBackend, WindowDisplayInfo, + AppInfo, + AppsAPI, + DisplayAPI, + DisplayGeometry, + InstalledApp, + PrepareDisplayResult, + RunningApp, + ScreenshotAPI, + ScreenshotResult, + SwiftBackend, + WindowDisplayInfo, } from '../types.js' // --------------------------------------------------------------------------- @@ -34,7 +42,11 @@ async function runAsync(cmd: string[]): Promise { } function commandExists(name: string): boolean { - const result = Bun.spawnSync({ cmd: ['which', name], stdout: 'pipe', stderr: 'pipe' }) + const result = Bun.spawnSync({ + cmd: ['which', name], + stdout: 'pipe', + stderr: 'pipe', + }) return result.exitCode === 0 } @@ -85,7 +97,11 @@ export const display: DisplayAPI = { // --------------------------------------------------------------------------- export const apps: AppsAPI = { - async prepareDisplay(_allowlistBundleIds, _surrogateHost, _displayId): Promise { + async prepareDisplay( + _allowlistBundleIds, + _surrogateHost, + _displayId, + ): Promise { return { activated: '', hidden: [] } }, @@ -100,7 +116,15 @@ export const apps: AppsAPI = { async appUnderPoint(x, y): Promise { try { // Move mouse to point, get window under cursor - const out = run(['xdotool', 'mousemove', '--sync', String(x), String(y), 'getmouselocation', '--shell']) + const out = run([ + 'xdotool', + 'mousemove', + '--sync', + String(x), + String(y), + 'getmouselocation', + '--shell', + ]) const windowMatch = out.match(/WINDOW=(\d+)/) if (!windowMatch) return null @@ -109,10 +133,18 @@ export const apps: AppsAPI = { if (!pidStr) return null let exePath = '' - try { exePath = run(['readlink', '-f', `/proc/${pidStr}/exe`]) } catch { /* ignore */ } + try { + exePath = run(['readlink', '-f', `/proc/${pidStr}/exe`]) + } catch { + /* ignore */ + } let appName = '' - try { appName = run(['cat', `/proc/${pidStr}/comm`]) } catch { /* ignore */ } + try { + appName = run(['cat', `/proc/${pidStr}/comm`]) + } catch { + /* ignore */ + } if (!exePath && !appName) return null return { bundleId: exePath || pidStr!, displayName: appName || 'unknown' } @@ -124,14 +156,20 @@ export const apps: AppsAPI = { async listInstalled(): Promise { try { // Read .desktop files from standard locations - const dirs = ['/usr/share/applications', '/usr/local/share/applications', `${process.env.HOME}/.local/share/applications`] + const dirs = [ + '/usr/share/applications', + '/usr/local/share/applications', + `${process.env.HOME}/.local/share/applications`, + ] const apps: InstalledApp[] = [] for (const dir of dirs) { let files: string try { files = run(['find', dir, '-name', '*.desktop', '-maxdepth', '1']) - } catch { continue } + } catch { + continue + } for (const filepath of files.split('\n').filter(Boolean)) { try { @@ -146,11 +184,14 @@ export const apps: AppsAPI = { if (!name) continue apps.push({ - bundleId: filepath.split('/').pop()?.replace('.desktop', '') ?? '', + bundleId: + filepath.split('/').pop()?.replace('.desktop', '') ?? '', displayName: name, path: exec.split(/\s+/)[0] ?? '', }) - } catch { /* skip unreadable files */ } + } catch { + /* skip unreadable files */ + } } } @@ -177,9 +218,17 @@ export const apps: AppsAPI = { if (!pid || pid === '0') continue let exePath = '' - try { exePath = run(['readlink', '-f', `/proc/${pid}/exe`]) } catch { /* ignore */ } + try { + exePath = run(['readlink', '-f', `/proc/${pid}/exe`]) + } catch { + /* ignore */ + } let appName = '' - try { appName = run(['cat', `/proc/${pid}/comm`]) } catch { /* ignore */ } + try { + appName = run(['cat', `/proc/${pid}/comm`]) + } catch { + /* ignore */ + } if (appName) { apps.push({ bundleId: exePath || pid, displayName: appName }) @@ -187,11 +236,13 @@ export const apps: AppsAPI = { } // Deduplicate by bundleId const seen = new Set() - return apps.filter(a => { - if (seen.has(a.bundleId)) return false - seen.add(a.bundleId) - return true - }).slice(0, 50) + return apps + .filter(a => { + if (seen.has(a.bundleId)) return false + seen.add(a.bundleId) + return true + }) + .slice(0, 50) } // Fallback: ps with visible processes @@ -217,7 +268,9 @@ export const apps: AppsAPI = { await runAsync(['gtk-launch', desktopName]) return } - } catch { /* fall through */ } + } catch { + /* fall through */ + } await runAsync(['xdg-open', name]) }, @@ -232,7 +285,9 @@ export const apps: AppsAPI = { // Try xdotool windowactivate with search by name await runAsync(['xdotool', 'search', '--name', id, 'windowactivate']) } - } catch { /* ignore failures for individual windows */ } + } catch { + /* ignore failures for individual windows */ + } } }, } @@ -244,7 +299,13 @@ export const apps: AppsAPI = { const SCREENSHOT_PATH = '/tmp/cu-screenshot.png' export const screenshot: ScreenshotAPI = { - async captureExcluding(_allowedBundleIds, _quality, _targetW, _targetH, _displayId): Promise { + async captureExcluding( + _allowedBundleIds, + _quality, + _targetW, + _targetH, + _displayId, + ): Promise { try { await runAsync(['scrot', '-o', SCREENSHOT_PATH]) @@ -261,10 +322,26 @@ export const screenshot: ScreenshotAPI = { } }, - async captureRegion(_allowedBundleIds, x, y, w, h, _outW, _outH, _quality, _displayId): Promise { + async captureRegion( + _allowedBundleIds, + x, + y, + w, + h, + _outW, + _outH, + _quality, + _displayId, + ): Promise { try { // scrot -a x,y,w,h captures a specific region - await runAsync(['scrot', '-a', `${x},${y},${w},${h}`, '-o', SCREENSHOT_PATH]) + await runAsync([ + 'scrot', + '-a', + `${x},${y},${w},${h}`, + '-o', + SCREENSHOT_PATH, + ]) const file = Bun.file(SCREENSHOT_PATH) const buffer = await file.arrayBuffer() diff --git a/packages/@ant/computer-use-swift/src/backends/win32.ts b/packages/@ant/computer-use-swift/src/backends/win32.ts index 70f006bf1..47aa51853 100644 --- a/packages/@ant/computer-use-swift/src/backends/win32.ts +++ b/packages/@ant/computer-use-swift/src/backends/win32.ts @@ -6,13 +6,24 @@ */ import type { - AppInfo, AppsAPI, DisplayAPI, DisplayGeometry, InstalledApp, - PrepareDisplayResult, RunningApp, ScreenshotAPI, ScreenshotResult, - SwiftBackend, WindowDisplayInfo, + AppInfo, + AppsAPI, + DisplayAPI, + DisplayGeometry, + InstalledApp, + PrepareDisplayResult, + RunningApp, + ScreenshotAPI, + ScreenshotResult, + SwiftBackend, + WindowDisplayInfo, } from '../types.js' import { listWindows } from 'src/utils/computerUse/win32/windowEnum.js' -import { captureWindow, captureWindowByHwnd } from 'src/utils/computerUse/win32/windowCapture.js' +import { + captureWindow, + captureWindowByHwnd, +} from 'src/utils/computerUse/win32/windowCapture.js' // --------------------------------------------------------------------------- // PowerShell helper @@ -63,15 +74,18 @@ foreach ($s in [System.Windows.Forms.Screen]::AllScreens) { } $result -join "|" `) - return raw.split('|').filter(Boolean).map(entry => { - const [w, h, id, primary] = entry.split(',') - return { - width: Number(w), - height: Number(h), - scaleFactor: 1, // Windows DPI scaling handled at system level - displayId: Number(id), - } - }) + return raw + .split('|') + .filter(Boolean) + .map(entry => { + const [w, h, id, primary] = entry.split(',') + return { + width: Number(w), + height: Number(h), + scaleFactor: 1, // Windows DPI scaling handled at system level + displayId: Number(id), + } + }) } catch { return [{ width: 1920, height: 1080, scaleFactor: 1, displayId: 0 }] } @@ -139,14 +153,17 @@ foreach ($p in $paths) { } $apps | Select-Object -Unique | Select-Object -First 200 `) - return raw.split('\n').filter(Boolean).map(line => { - const [name, path, id] = line.split('|', 3) - return { - bundleId: id ?? name ?? '', - displayName: name ?? '', - path: path ?? '', - } - }) + return raw + .split('\n') + .filter(Boolean) + .map(line => { + const [name, path, id] = line.split('|', 3) + return { + bundleId: id ?? name ?? '', + displayName: name ?? '', + path: path ?? '', + } + }) } catch { return [] } @@ -204,7 +221,13 @@ if ($proc) { [WinShow]::ShowWindow($proc.MainWindowHandle, 9) | Out-Null; [WinSh // --------------------------------------------------------------------------- export const screenshot: ScreenshotAPI = { - async captureExcluding(_allowedBundleIds, _quality, _targetW, _targetH, displayId) { + async captureExcluding( + _allowedBundleIds, + _quality, + _targetW, + _targetH, + displayId, + ) { const raw = await psAsync(` Add-Type -AssemblyName System.Windows.Forms Add-Type -AssemblyName System.Drawing @@ -229,7 +252,17 @@ $ms.Dispose() return { base64, width, height } }, - async captureRegion(_allowedBundleIds, x, y, w, h, _outW, _outH, _quality, _displayId) { + async captureRegion( + _allowedBundleIds, + x, + y, + w, + h, + _outW, + _outH, + _quality, + _displayId, + ) { const raw = await psAsync(` Add-Type -AssemblyName System.Windows.Forms Add-Type -AssemblyName System.Drawing diff --git a/packages/@ant/computer-use-swift/src/index.ts b/packages/@ant/computer-use-swift/src/index.ts index 28a1780cd..f63984345 100644 --- a/packages/@ant/computer-use-swift/src/index.ts +++ b/packages/@ant/computer-use-swift/src/index.ts @@ -37,25 +37,52 @@ const backend = loadBackend() export class ComputerUseAPI { apps = backend?.apps ?? { - async prepareDisplay() { return { activated: '', hidden: [] } }, - async previewHideSet() { return [] }, - async findWindowDisplays(ids: string[]) { return ids.map((b: string) => ({ bundleId: b, displayIds: [] as number[] })) }, - async appUnderPoint() { return null }, - async listInstalled() { return [] }, - iconDataUrl() { return null }, - listRunning() { return [] }, - async open() { throw new Error('@ant/computer-use-swift: macOS only') }, + async prepareDisplay() { + return { activated: '', hidden: [] } + }, + async previewHideSet() { + return [] + }, + async findWindowDisplays(ids: string[]) { + return ids.map((b: string) => ({ + bundleId: b, + displayIds: [] as number[], + })) + }, + async appUnderPoint() { + return null + }, + async listInstalled() { + return [] + }, + iconDataUrl() { + return null + }, + listRunning() { + return [] + }, + async open() { + throw new Error('@ant/computer-use-swift: macOS only') + }, async unhide() {}, } display = backend?.display ?? { - getSize() { throw new Error('@ant/computer-use-swift: macOS only') }, - listAll() { throw new Error('@ant/computer-use-swift: macOS only') }, + getSize() { + throw new Error('@ant/computer-use-swift: macOS only') + }, + listAll() { + throw new Error('@ant/computer-use-swift: macOS only') + }, } screenshot = backend?.screenshot ?? { - async captureExcluding() { throw new Error('@ant/computer-use-swift: macOS only') }, - async captureRegion() { throw new Error('@ant/computer-use-swift: macOS only') }, + async captureExcluding() { + throw new Error('@ant/computer-use-swift: macOS only') + }, + async captureRegion() { + throw new Error('@ant/computer-use-swift: macOS only') + }, } async resolvePrepareCapture( @@ -66,6 +93,12 @@ export class ComputerUseAPI { targetH: number, displayId?: number, ): Promise { - return this.screenshot.captureExcluding(allowedBundleIds, quality, targetW, targetH, displayId) + return this.screenshot.captureExcluding( + allowedBundleIds, + quality, + targetW, + targetH, + displayId, + ) } } diff --git a/packages/@ant/computer-use-swift/src/types.ts b/packages/@ant/computer-use-swift/src/types.ts index 67b3cba11..ba26e591f 100644 --- a/packages/@ant/computer-use-swift/src/types.ts +++ b/packages/@ant/computer-use-swift/src/types.ts @@ -55,7 +55,11 @@ export interface DisplayAPI { } export interface AppsAPI { - prepareDisplay(allowlistBundleIds: string[], surrogateHost: string, displayId?: number): Promise + prepareDisplay( + allowlistBundleIds: string[], + surrogateHost: string, + displayId?: number, + ): Promise previewHideSet(bundleIds: string[], displayId?: number): Promise findWindowDisplays(bundleIds: string[]): Promise appUnderPoint(x: number, y: number): Promise @@ -68,13 +72,22 @@ export interface AppsAPI { export interface ScreenshotAPI { captureExcluding( - allowedBundleIds: string[], quality: number, - targetW: number, targetH: number, displayId?: number, + allowedBundleIds: string[], + quality: number, + targetW: number, + targetH: number, + displayId?: number, ): Promise captureRegion( allowedBundleIds: string[], - x: number, y: number, w: number, h: number, - outW: number, outH: number, quality: number, displayId?: number, + x: number, + y: number, + w: number, + h: number, + outW: number, + outH: number, + quality: number, + displayId?: number, ): Promise captureWindowTarget(titleOrHwnd: string | number): ScreenshotResult | null } diff --git a/packages/@ant/ink/src/components/AlternateScreen.tsx b/packages/@ant/ink/src/components/AlternateScreen.tsx index 2e07a4cc7..403f25ce7 100644 --- a/packages/@ant/ink/src/components/AlternateScreen.tsx +++ b/packages/@ant/ink/src/components/AlternateScreen.tsx @@ -1,23 +1,19 @@ -import React, { - type PropsWithChildren, - useContext, - useInsertionEffect, -} from 'react' -import instances from '../core/instances.js' +import React, { type PropsWithChildren, useContext, useInsertionEffect } from 'react'; +import instances from '../core/instances.js'; import { DISABLE_MOUSE_TRACKING, ENABLE_MOUSE_TRACKING, ENTER_ALT_SCREEN, EXIT_ALT_SCREEN, -} from '../core/termio/dec.js' -import { TerminalWriteContext } from '../hooks/useTerminalNotification.js' -import Box from './Box.js' -import { TerminalSizeContext } from './TerminalSizeContext.js' +} from '../core/termio/dec.js'; +import { TerminalWriteContext } from '../hooks/useTerminalNotification.js'; +import Box from './Box.js'; +import { TerminalSizeContext } from './TerminalSizeContext.js'; type Props = PropsWithChildren<{ /** Enable SGR mouse tracking (wheel + click/drag). Default true. */ - mouseTracking?: boolean -}> + mouseTracking?: boolean; +}>; /** * Run children in the terminal's alternate screen buffer, constrained to @@ -39,12 +35,9 @@ type Props = PropsWithChildren<{ * from scrolling content) and so signal-exit cleanup can exit the alt * screen if the component's own unmount doesn't run. */ -export function AlternateScreen({ - children, - mouseTracking = true, -}: Props): React.ReactNode { - const size = useContext(TerminalSizeContext) - const writeRaw = useContext(TerminalWriteContext) +export function AlternateScreen({ children, mouseTracking = true }: Props): React.ReactNode { + const size = useContext(TerminalSizeContext); + const writeRaw = useContext(TerminalWriteContext); // useInsertionEffect (not useLayoutEffect): react-reconciler calls // resetAfterCommit between the mutation and layout commit phases, and @@ -57,31 +50,22 @@ export function AlternateScreen({ // Cleanup timing is unchanged: both insertion and layout effect cleanup // run in the mutation phase on unmount, before resetAfterCommit. useInsertionEffect(() => { - const ink = instances.get(process.stdout) - if (!writeRaw) return + const ink = instances.get(process.stdout); + if (!writeRaw) return; - writeRaw( - ENTER_ALT_SCREEN + - '\x1b[2J\x1b[H' + - (mouseTracking ? ENABLE_MOUSE_TRACKING : ''), - ) - ink?.setAltScreenActive(true, mouseTracking) + writeRaw(ENTER_ALT_SCREEN + '\x1b[2J\x1b[H' + (mouseTracking ? ENABLE_MOUSE_TRACKING : '')); + ink?.setAltScreenActive(true, mouseTracking); return () => { - ink?.setAltScreenActive(false) - ink?.clearTextSelection() - writeRaw((mouseTracking ? DISABLE_MOUSE_TRACKING : '') + EXIT_ALT_SCREEN) - } - }, [writeRaw, mouseTracking]) + ink?.setAltScreenActive(false); + ink?.clearTextSelection(); + writeRaw((mouseTracking ? DISABLE_MOUSE_TRACKING : '') + EXIT_ALT_SCREEN); + }; + }, [writeRaw, mouseTracking]); return ( - + {children} - ) + ); } diff --git a/packages/@ant/ink/src/components/App.tsx b/packages/@ant/ink/src/components/App.tsx index 543cd359b..1ba33fdd7 100644 --- a/packages/@ant/ink/src/components/App.tsx +++ b/packages/@ant/ink/src/components/App.tsx @@ -1,14 +1,14 @@ -import React, { PureComponent, type ReactNode } from 'react' +import React, { PureComponent, type ReactNode } from 'react'; // Business-layer callbacks — replaced with inline defaults so this package // has zero dependencies on business code. The business layer can inject // implementations via AppCallbacks when needed. type AppCallbacks = { - updateLastInteractionTime?: () => void - stopCapturingEarlyInput?: () => void - isMouseClicksDisabled?: () => boolean - logError?: (error: unknown) => void - logForDebugging?: (message: string, opts?: { level?: string }) => void -} + updateLastInteractionTime?: () => void; + stopCapturingEarlyInput?: () => void; + isMouseClicksDisabled?: () => boolean; + logError?: (error: unknown) => void; + logForDebugging?: (message: string, opts?: { level?: string }) => void; +}; /** Default no-op / safe-default implementations */ const defaultCallbacks: Required = { @@ -17,46 +17,34 @@ const defaultCallbacks: Required = { isMouseClicksDisabled: () => false, logError: (error: unknown) => console.error(error), logForDebugging: (_message: string, _opts?: { level?: string }) => {}, -} +}; /** * Override the default no-op callbacks. Call this from the business layer * (e.g. src/ink.tsx) before mounting . */ export function setAppCallbacks(cb: AppCallbacks): void { - Object.assign(defaultCallbacks, cb) + Object.assign(defaultCallbacks, cb); } function isEnvTruthy(value: string | undefined): boolean { - return value === '1' || value === 'true' + return value === '1' || value === 'true'; } -import { EventEmitter } from '../core/events/emitter.js' -import { InputEvent } from '../core/events/input-event.js' -import { TerminalFocusEvent } from '../core/events/terminal-focus-event.js' +import { EventEmitter } from '../core/events/emitter.js'; +import { InputEvent } from '../core/events/input-event.js'; +import { TerminalFocusEvent } from '../core/events/terminal-focus-event.js'; import { INITIAL_STATE, type ParsedInput, type ParsedKey, type ParsedMouse, parseMultipleKeypresses, -} from '../core/parse-keypress.js' -import reconciler from '../core/reconciler.js' -import { - finishSelection, - hasSelection, - type SelectionState, - startSelection, -} from '../core/selection.js' -import { - isXtermJs, - setXtversionName, - supportsExtendedKeys, -} from '../core/terminal.js' -import { - getTerminalFocused, - setTerminalFocused, -} from '../core/terminal-focus-state.js' -import { TerminalQuerier, xtversion } from '../core/terminal-querier.js' +} from '../core/parse-keypress.js'; +import reconciler from '../core/reconciler.js'; +import { finishSelection, hasSelection, type SelectionState, startSelection } from '../core/selection.js'; +import { isXtermJs, setXtversionName, supportsExtendedKeys } from '../core/terminal.js'; +import { getTerminalFocused, setTerminalFocused } from '../core/terminal-focus-state.js'; +import { TerminalQuerier, xtversion } from '../core/terminal-querier.js'; import { DISABLE_KITTY_KEYBOARD, DISABLE_MODIFY_OTHER_KEYS, @@ -64,155 +52,145 @@ import { ENABLE_MODIFY_OTHER_KEYS, FOCUS_IN, FOCUS_OUT, -} from '../core/termio/csi.js' -import { - DBP, - DFE, - DISABLE_MOUSE_TRACKING, - EBP, - EFE, - HIDE_CURSOR, - SHOW_CURSOR, -} from '../core/termio/dec.js' -import AppContext from './AppContext.js' -import { ClockProvider } from './ClockContext.js' -import CursorDeclarationContext, { - type CursorDeclarationSetter, -} from './CursorDeclarationContext.js' -import ErrorOverview from './ErrorOverview.js' -import StdinContext from './StdinContext.js' -import { TerminalFocusProvider } from './TerminalFocusContext.js' -import { TerminalSizeContext } from './TerminalSizeContext.js' +} from '../core/termio/csi.js'; +import { DBP, DFE, DISABLE_MOUSE_TRACKING, EBP, EFE, HIDE_CURSOR, SHOW_CURSOR } from '../core/termio/dec.js'; +import AppContext from './AppContext.js'; +import { ClockProvider } from './ClockContext.js'; +import CursorDeclarationContext, { type CursorDeclarationSetter } from './CursorDeclarationContext.js'; +import ErrorOverview from './ErrorOverview.js'; +import StdinContext from './StdinContext.js'; +import { TerminalFocusProvider } from './TerminalFocusContext.js'; +import { TerminalSizeContext } from './TerminalSizeContext.js'; // Platforms that support Unix-style process suspension (SIGSTOP/SIGCONT) -const SUPPORTS_SUSPEND = process.platform !== 'win32' +const SUPPORTS_SUSPEND = process.platform !== 'win32'; // After this many milliseconds of stdin silence, the next chunk triggers // a terminal mode re-assert (mouse tracking). Catches tmux detach→attach, // ssh reconnect, and laptop wake — the terminal resets DEC private modes // but no signal reaches us. 5s is well above normal inter-keystroke gaps // but short enough that the first scroll after reattach works. -const STDIN_RESUME_GAP_MS = 5000 +const STDIN_RESUME_GAP_MS = 5000; type Props = { - readonly children: ReactNode - readonly stdin: NodeJS.ReadStream - readonly stdout: NodeJS.WriteStream - readonly stderr: NodeJS.WriteStream - readonly exitOnCtrlC: boolean - readonly onExit: (error?: Error) => void - readonly terminalColumns: number - readonly terminalRows: number + readonly children: ReactNode; + readonly stdin: NodeJS.ReadStream; + readonly stdout: NodeJS.WriteStream; + readonly stderr: NodeJS.WriteStream; + readonly exitOnCtrlC: boolean; + readonly onExit: (error?: Error) => void; + readonly terminalColumns: number; + readonly terminalRows: number; // Text selection state. App mutates this directly from mouse events // and calls onSelectionChange to trigger a repaint. Mouse events only // arrive when (or similar) enables mouse tracking, // so the handler is always wired but dormant until tracking is on. - readonly selection: SelectionState - readonly onSelectionChange: () => void + readonly selection: SelectionState; + readonly onSelectionChange: () => void; // Dispatch a click at (col, row) — hit-tests the DOM tree and bubbles // onClick handlers. Returns true if a DOM handler consumed the click. // No-op (returns false) outside fullscreen mode (Ink.dispatchClick // gates on altScreenActive). - readonly onClickAt: (col: number, row: number) => boolean + readonly onClickAt: (col: number, row: number) => boolean; // Dispatch hover (onMouseEnter/onMouseLeave) as the pointer moves over // DOM elements. Called for mode-1003 motion events with no button held. // No-op outside fullscreen (Ink.dispatchHover gates on altScreenActive). - readonly onHoverAt: (col: number, row: number) => void + readonly onHoverAt: (col: number, row: number) => void; // Look up the OSC 8 hyperlink at (col, row) synchronously at click // time. Returns the URL or undefined. The browser-open is deferred by // MULTI_CLICK_TIMEOUT_MS so double-click can cancel it. - readonly getHyperlinkAt: (col: number, row: number) => string | undefined + readonly getHyperlinkAt: (col: number, row: number) => string | undefined; // Open a hyperlink URL in the browser. Called after the timer fires. - readonly onOpenHyperlink: (url: string) => void + readonly onOpenHyperlink: (url: string) => void; // Called on double/triple-click PRESS at (col, row). count=2 selects // the word under the cursor; count=3 selects the line. Ink reads the // screen buffer to find word/line boundaries and mutates selection, // setting isDragging=true so a subsequent drag extends by word/line. - readonly onMultiClick: (col: number, row: number, count: 2 | 3) => void + readonly onMultiClick: (col: number, row: number, count: 2 | 3) => void; // Called on drag-motion. Mode-aware: char mode updates focus to the // exact cell; word/line mode snaps to word/line boundaries. Needs // screen-buffer access (word boundaries) so lives on Ink, not here. - readonly onSelectionDrag: (col: number, row: number) => void + readonly onSelectionDrag: (col: number, row: number) => void; // Called when stdin data arrives after a >STDIN_RESUME_GAP_MS gap. // Ink re-asserts terminal modes: extended key reporting, and (when in // fullscreen) re-enters alt-screen + mouse tracking. Idempotent on the // terminal side. Optional so testing.tsx doesn't need to stub it. - readonly onStdinResume?: () => void + readonly onStdinResume?: () => void; // Receives the declared native-cursor position from useDeclaredCursor // so ink.tsx can park the terminal cursor there after each frame. // Enables IME composition at the input caret and lets screen readers / // magnifiers track the input. Optional so testing.tsx doesn't stub it. - readonly onCursorDeclaration?: CursorDeclarationSetter + readonly onCursorDeclaration?: CursorDeclarationSetter; // Dispatch a keyboard event through the DOM tree. Called for each // parsed key alongside the legacy EventEmitter path. - readonly dispatchKeyboardEvent: (parsedKey: ParsedKey) => void -} + readonly dispatchKeyboardEvent: (parsedKey: ParsedKey) => void; +}; // Multi-click detection thresholds. 500ms is the macOS default; a small // position tolerance allows for trackpad jitter between clicks. -const MULTI_CLICK_TIMEOUT_MS = 500 -const MULTI_CLICK_DISTANCE = 1 +const MULTI_CLICK_TIMEOUT_MS = 500; +const MULTI_CLICK_DISTANCE = 1; type State = { - readonly error?: Error -} + readonly error?: Error; +}; // Root component for all Ink apps // It renders stdin and stdout contexts, so that children can access them if needed // It also handles Ctrl+C exiting and cursor visibility export default class App extends PureComponent { - static displayName = 'InternalApp' + static displayName = 'InternalApp'; static getDerivedStateFromError(error: Error) { - return { error } + return { error }; } override state = { error: undefined, - } + }; // Count how many components enabled raw mode to avoid disabling // raw mode until all components don't need it anymore - rawModeEnabledCount = 0 + rawModeEnabledCount = 0; - internal_eventEmitter = new EventEmitter() - keyParseState = INITIAL_STATE + internal_eventEmitter = new EventEmitter(); + keyParseState = INITIAL_STATE; // Timer for flushing incomplete escape sequences - incompleteEscapeTimer: NodeJS.Timeout | null = null + incompleteEscapeTimer: NodeJS.Timeout | null = null; // Timeout durations for incomplete sequences (ms) - readonly NORMAL_TIMEOUT = 50 // Short timeout for regular esc sequences - readonly PASTE_TIMEOUT = 500 // Longer timeout for paste operations + readonly NORMAL_TIMEOUT = 50; // Short timeout for regular esc sequences + readonly PASTE_TIMEOUT = 500; // Longer timeout for paste operations // Terminal query/response dispatch. Responses arrive on stdin (parsed // out by parse-keypress) and are routed to pending promise resolvers. - querier = new TerminalQuerier(this.props.stdout) + querier = new TerminalQuerier(this.props.stdout); // Multi-click tracking for double/triple-click text selection. A click // within MULTI_CLICK_TIMEOUT_MS and MULTI_CLICK_DISTANCE of the previous // click increments clickCount; otherwise it resets to 1. - lastClickTime = 0 - lastClickCol = -1 - lastClickRow = -1 - clickCount = 0 + lastClickTime = 0; + lastClickCol = -1; + lastClickRow = -1; + clickCount = 0; // Deferred hyperlink-open timer — cancelled if a second click arrives // within MULTI_CLICK_TIMEOUT_MS (so double-clicking a hyperlink selects // the word without also opening the browser). DOM onClick dispatch is // NOT deferred — it returns true from onClickAt and skips this timer. - pendingHyperlinkTimer: ReturnType | null = null + pendingHyperlinkTimer: ReturnType | null = null; // Last mode-1003 motion position. Terminals already dedupe to cell // granularity but this also lets us skip dispatchHover entirely on // repeat events (drag-then-release at same cell, etc.). - lastHoverCol = -1 - lastHoverRow = -1 + lastHoverCol = -1; + lastHoverRow = -1; // Timestamp of last stdin chunk. Used to detect long gaps (tmux attach, // ssh reconnect, laptop wake) and trigger terminal mode re-assert. // Initialized to now so startup doesn't false-trigger. - lastStdinTime = Date.now() + lastStdinTime = Date.now(); // Determines if TTY is supported on the provided stdin isRawModeSupported(): boolean { - return this.props.stdin.isTTY + return this.props.stdin.isTTY; } override render() { @@ -242,56 +220,47 @@ export default class App extends PureComponent { > - {})} - > - {this.state.error ? ( - - ) : ( - this.props.children - )} + {})}> + {this.state.error ? : this.props.children} - ) + ); } override componentDidMount() { // In accessibility mode, keep the native cursor visible for screen magnifiers and other tools - if ( - this.props.stdout.isTTY && - !isEnvTruthy(process.env.CLAUDE_CODE_ACCESSIBILITY) - ) { - this.props.stdout.write(HIDE_CURSOR) + if (this.props.stdout.isTTY && !isEnvTruthy(process.env.CLAUDE_CODE_ACCESSIBILITY)) { + this.props.stdout.write(HIDE_CURSOR); } } override componentWillUnmount() { if (this.props.stdout.isTTY) { - this.props.stdout.write(SHOW_CURSOR) + this.props.stdout.write(SHOW_CURSOR); } // Clear any pending timers if (this.incompleteEscapeTimer) { - clearTimeout(this.incompleteEscapeTimer) - this.incompleteEscapeTimer = null + clearTimeout(this.incompleteEscapeTimer); + this.incompleteEscapeTimer = null; } if (this.pendingHyperlinkTimer) { - clearTimeout(this.pendingHyperlinkTimer) - this.pendingHyperlinkTimer = null + clearTimeout(this.pendingHyperlinkTimer); + this.pendingHyperlinkTimer = null; } // ignore calling setRawMode on an handle stdin it cannot be called if (this.isRawModeSupported()) { - this.handleSetRawMode(false) + this.handleSetRawMode(false); } else { // Even when raw mode was never enabled (e.g. non-TTY stdin on // Windows Node.js), ensure stdin is unref'd so the process can // exit. earlyInput may have called ref() before Ink mounted. try { - this.props.stdin.unref() + this.props.stdin.unref(); } catch { // stdin may already be destroyed } @@ -299,25 +268,25 @@ export default class App extends PureComponent { } override componentDidCatch(error: Error) { - this.handleExit(error) + this.handleExit(error); } handleSetRawMode = (isEnabled: boolean): void => { - const { stdin } = this.props + const { stdin } = this.props; if (!this.isRawModeSupported()) { if (stdin === process.stdin) { throw new Error( 'Raw mode is not supported on the current process.stdin, which Ink uses as input stream by default.\nRead about how to prevent this error on https://github.com/vadimdemedes/ink/#israwmodesupported', - ) + ); } else { throw new Error( 'Raw mode is not supported on the stdin provided to Ink.\nRead about how to prevent this error on https://github.com/vadimdemedes/ink/#israwmodesupported', - ) + ); } } - stdin.setEncoding('utf8') + stdin.setEncoding('utf8'); if (isEnabled) { // Ensure raw mode is enabled only once @@ -326,34 +295,34 @@ export default class App extends PureComponent { // Both use the same stdin 'readable' + read() pattern, so they can't // coexist -- the early capture handler would drain stdin before ours // can see it. The buffered text is preserved for REPL.tsx via consumeEarlyInput(). - defaultCallbacks.stopCapturingEarlyInput() + defaultCallbacks.stopCapturingEarlyInput(); // Safety net: remove any pre-existing readable listeners that aren't // ours. In builds where setAppCallbacks() was never called, the early // input capture's readableHandler remains attached and would consume // all stdin data before our handleReadable sees it. - const existingListeners = stdin.listeners('readable') + const existingListeners = stdin.listeners('readable'); for (const listener of existingListeners) { if (listener !== this.handleReadable) { - stdin.removeListener('readable', listener as any) + stdin.removeListener('readable', listener as any); } } - stdin.ref() - stdin.setRawMode(true) - stdin.addListener('readable', this.handleReadable) + stdin.ref(); + stdin.setRawMode(true); + stdin.addListener('readable', this.handleReadable); // Enable bracketed paste mode - this.props.stdout.write(EBP) + this.props.stdout.write(EBP); // Enable terminal focus reporting (DECSET 1004) - this.props.stdout.write(EFE) + this.props.stdout.write(EFE); // Enable extended key reporting so ctrl+shift+ is // distinguishable from ctrl+. We write both the kitty stack // push (CSI >1u) and xterm modifyOtherKeys level 2 (CSI >4;2m) — // terminals honor whichever they implement (tmux only accepts the // latter). if (supportsExtendedKeys()) { - this.props.stdout.write(ENABLE_KITTY_KEYBOARD) - this.props.stdout.write(ENABLE_MODIFY_OTHER_KEYS) + this.props.stdout.write(ENABLE_KITTY_KEYBOARD); + this.props.stdout.write(ENABLE_MODIFY_OTHER_KEYS); } // Probe terminal identity. XTVERSION survives SSH (query/reply goes // through the pty), unlike TERM_PROGRAM. Used for wheel-scroll base @@ -364,22 +333,19 @@ export default class App extends PureComponent { // init sequence completes — avoids interleaving with alt-screen/mouse // tracking enable writes that may happen in the same render cycle. setImmediate(() => { - void Promise.all([ - this.querier.send(xtversion()), - this.querier.flush(), - ]).then(([r]) => { + void Promise.all([this.querier.send(xtversion()), this.querier.flush()]).then(([r]) => { if (r) { - setXtversionName(r.name) - defaultCallbacks.logForDebugging(`XTVERSION: terminal identified as "${r.name}"`) + setXtversionName(r.name); + defaultCallbacks.logForDebugging(`XTVERSION: terminal identified as "${r.name}"`); } else { - defaultCallbacks.logForDebugging('XTVERSION: no reply (terminal ignored query)') + defaultCallbacks.logForDebugging('XTVERSION: no reply (terminal ignored query)'); } - }) - }) + }); + }); } - this.rawModeEnabledCount++ - return + this.rawModeEnabledCount++; + return; } // Disable raw mode only when no components left that are using it @@ -389,31 +355,31 @@ export default class App extends PureComponent { // If the old tree had more useInput hooks than the new tree, the old // cleanup over-decrements the count to 0 even though the new tree has // active listeners. Detect this and fix the count instead of disabling. - const activeListeners = this.internal_eventEmitter.listenerCount('input') + const activeListeners = this.internal_eventEmitter.listenerCount('input'); if (activeListeners > 0) { - this.rawModeEnabledCount = activeListeners - return + this.rawModeEnabledCount = activeListeners; + return; } - this.props.stdout.write(DISABLE_MODIFY_OTHER_KEYS) - this.props.stdout.write(DISABLE_KITTY_KEYBOARD) + this.props.stdout.write(DISABLE_MODIFY_OTHER_KEYS); + this.props.stdout.write(DISABLE_KITTY_KEYBOARD); // Disable terminal focus reporting (DECSET 1004) - this.props.stdout.write(DFE) + this.props.stdout.write(DFE); // Disable bracketed paste mode - this.props.stdout.write(DBP) - stdin.setRawMode(false) - stdin.removeListener('readable', this.handleReadable) - stdin.unref() + this.props.stdout.write(DBP); + stdin.setRawMode(false); + stdin.removeListener('readable', this.handleReadable); + stdin.unref(); } - } + }; // Helper to flush incomplete escape sequences flushIncomplete = (): void => { // Clear the timer reference - this.incompleteEscapeTimer = null + this.incompleteEscapeTimer = null; // Only proceed if we have incomplete sequences - if (!this.keyParseState.incomplete) return + if (!this.keyParseState.incomplete) return; // Fullscreen: if stdin has data waiting, it's almost certainly the // continuation of the buffered sequence (e.g. `[<64;74;16M` after a @@ -424,23 +390,20 @@ export default class App extends PureComponent { // drain stdin next and clear this timer. Prevents both the spurious // Escape key and the lost scroll event. if (this.props.stdin.readableLength > 0) { - this.incompleteEscapeTimer = setTimeout( - this.flushIncomplete, - this.NORMAL_TIMEOUT, - ) - return + this.incompleteEscapeTimer = setTimeout(this.flushIncomplete, this.NORMAL_TIMEOUT); + return; } // Process incomplete as a flush operation (input=null) // This reuses all existing parsing logic - this.processInput(null) - } + this.processInput(null); + }; // Process input through the parser and handle the results processInput = (input: string | Buffer | null): void => { // Parse input using our state machine - const [keys, newState] = parseMultipleKeypresses(this.keyParseState, input) - this.keyParseState = newState + const [keys, newState] = parseMultipleKeypresses(this.keyParseState, input); + this.keyParseState = newState; // Process ALL keys in a SINGLE discreteUpdates call to prevent // "Maximum update depth exceeded" error when many keys arrive at once @@ -448,106 +411,94 @@ export default class App extends PureComponent { // This batches all state updates from handleInput and all useInput // listeners together within one high-priority update context. if (keys.length > 0) { - reconciler.discreteUpdates( - processKeysInBatch, - this, - keys, - undefined, - undefined, - ) + reconciler.discreteUpdates(processKeysInBatch, this, keys, undefined, undefined); } // If we have incomplete escape sequences, set a timer to flush them if (this.keyParseState.incomplete) { // Cancel any existing timer first if (this.incompleteEscapeTimer) { - clearTimeout(this.incompleteEscapeTimer) + clearTimeout(this.incompleteEscapeTimer); } this.incompleteEscapeTimer = setTimeout( this.flushIncomplete, - this.keyParseState.mode === 'IN_PASTE' - ? this.PASTE_TIMEOUT - : this.NORMAL_TIMEOUT, - ) + this.keyParseState.mode === 'IN_PASTE' ? this.PASTE_TIMEOUT : this.NORMAL_TIMEOUT, + ); } - } + }; handleReadable = (): void => { // Detect long stdin gaps (tmux attach, ssh reconnect, laptop wake). // The terminal may have reset DEC private modes; re-assert mouse // tracking. Checked before the read loop so one Date.now() covers // all chunks in this readable event. - const now = Date.now() + const now = Date.now(); if (now - this.lastStdinTime > STDIN_RESUME_GAP_MS) { - this.props.onStdinResume?.() + this.props.onStdinResume?.(); } - this.lastStdinTime = now + this.lastStdinTime = now; try { - let chunk + let chunk; while ((chunk = this.props.stdin.read() as string | null) !== null) { // Process the input chunk - this.processInput(chunk) + this.processInput(chunk); } } catch (error) { // In Bun, an uncaught throw inside a stream 'readable' handler can // permanently wedge the stream: data stays buffered and 'readable' // never re-emits. Catching here ensures the stream stays healthy so // subsequent keystrokes are still delivered. - defaultCallbacks.logError(error) + defaultCallbacks.logError(error); // Re-attach the listener in case the exception detached it. // Bun may remove the listener after an error; without this, // the session freezes permanently (stdin reader dead, event loop alive). - const { stdin } = this.props - if ( - this.rawModeEnabledCount > 0 && - !stdin.listeners('readable').includes(this.handleReadable) - ) { - defaultCallbacks.logForDebugging( - 'handleReadable: re-attaching stdin readable listener after error recovery', - { level: 'warn' }, - ) - stdin.addListener('readable', this.handleReadable) + const { stdin } = this.props; + if (this.rawModeEnabledCount > 0 && !stdin.listeners('readable').includes(this.handleReadable)) { + defaultCallbacks.logForDebugging('handleReadable: re-attaching stdin readable listener after error recovery', { + level: 'warn', + }); + stdin.addListener('readable', this.handleReadable); } } - } + }; handleInput = (input: string | undefined): void => { // Exit on Ctrl+C if (input === '\x03' && this.props.exitOnCtrlC) { - this.handleExit() + this.handleExit(); } // Note: Ctrl+Z (suspend) is now handled in processKeysInBatch using the // parsed key to support both raw (\x1a) and CSI u format from Kitty // keyboard protocol terminals (Ghostty, iTerm2, kitty, WezTerm) - } + }; handleExit = (error?: Error): void => { if (this.isRawModeSupported()) { - this.handleSetRawMode(false) + this.handleSetRawMode(false); } - this.props.onExit(error) - } + this.props.onExit(error); + }; handleTerminalFocus = (isFocused: boolean): void => { // setTerminalFocused notifies subscribers: TerminalFocusProvider (context) // and Clock (interval speed) — no App setState needed. - setTerminalFocused(isFocused) - } + setTerminalFocused(isFocused); + }; handleSuspend = (): void => { if (!this.isRawModeSupported()) { - return + return; } // Store the exact raw mode count to restore it properly - const rawModeCountBeforeSuspend = this.rawModeEnabledCount + const rawModeCountBeforeSuspend = this.rawModeEnabledCount; // Completely disable raw mode before suspending while (this.rawModeEnabledCount > 0) { - this.handleSetRawMode(false) + this.handleSetRawMode(false); } // Show cursor, disable focus reporting, and disable mouse tracking @@ -556,49 +507,44 @@ export default class App extends PureComponent { // it, SGR mouse sequences would appear as garbled text at the // shell prompt while suspended. if (this.props.stdout.isTTY) { - this.props.stdout.write(SHOW_CURSOR + DFE + DISABLE_MOUSE_TRACKING) + this.props.stdout.write(SHOW_CURSOR + DFE + DISABLE_MOUSE_TRACKING); } // Emit suspend event for Claude Code to handle. Mostly just has a notification - this.internal_eventEmitter.emit('suspend') + this.internal_eventEmitter.emit('suspend'); // Set up resume handler const resumeHandler = () => { // Restore raw mode to exact previous state for (let i = 0; i < rawModeCountBeforeSuspend; i++) { if (this.isRawModeSupported()) { - this.handleSetRawMode(true) + this.handleSetRawMode(true); } } // Hide cursor (unless in accessibility mode) and re-enable focus reporting after resuming if (this.props.stdout.isTTY) { if (!isEnvTruthy(process.env.CLAUDE_CODE_ACCESSIBILITY)) { - this.props.stdout.write(HIDE_CURSOR) + this.props.stdout.write(HIDE_CURSOR); } // Re-enable focus reporting to restore terminal state - this.props.stdout.write(EFE) + this.props.stdout.write(EFE); } // Emit resume event for Claude Code to handle - this.internal_eventEmitter.emit('resume') + this.internal_eventEmitter.emit('resume'); - process.removeListener('SIGCONT', resumeHandler) - } + process.removeListener('SIGCONT', resumeHandler); + }; - process.on('SIGCONT', resumeHandler) - process.kill(process.pid, 'SIGSTOP') - } + process.on('SIGCONT', resumeHandler); + process.kill(process.pid, 'SIGSTOP'); + }; } // Helper to process all keys within a single discrete update context. // discreteUpdates expects (fn, a, b, c, d) -> fn(a, b, c, d) -function processKeysInBatch( - app: App, - items: ParsedInput[], - _unused1: undefined, - _unused2: undefined, -): void { +function processKeysInBatch(app: App, items: ParsedInput[], _unused1: undefined, _unused2: undefined): void { // Update interaction time for notification timeout tracking. // This is called from the central input handler to avoid having multiple // stdin listeners that can cause race conditions and dropped input. @@ -606,75 +552,70 @@ function processKeysInBatch( // Mode-1003 no-button motion is also excluded — passive cursor drift is // not engagement (would suppress idle notifications + defer housekeeping). if ( - items.some( - i => - i.kind === 'key' || - (i.kind === 'mouse' && - !((i.button & 0x20) !== 0 && (i.button & 0x03) === 3)), - ) + items.some(i => i.kind === 'key' || (i.kind === 'mouse' && !((i.button & 0x20) !== 0 && (i.button & 0x03) === 3))) ) { - defaultCallbacks.updateLastInteractionTime() + defaultCallbacks.updateLastInteractionTime(); } for (const item of items) { // Terminal responses (DECRPM, DA1, OSC replies, etc.) are not user // input — route them to the querier to resolve pending promises. if (item.kind === 'response') { - app.querier.onResponse(item.response) - continue + app.querier.onResponse(item.response); + continue; } // Mouse click/drag events update selection state (fullscreen only). // Terminal sends 1-indexed col/row; convert to 0-indexed for the // screen buffer. Button bit 0x20 = drag (motion while button held). if (item.kind === 'mouse') { - handleMouseEvent(app, item) - continue + handleMouseEvent(app, item); + continue; } - const sequence = item.sequence + const sequence = item.sequence; // Handle terminal focus events (DECSET 1004) if (sequence === FOCUS_IN) { - app.handleTerminalFocus(true) - const event = new TerminalFocusEvent('terminalfocus') - app.internal_eventEmitter.emit('terminalfocus', event) - continue + app.handleTerminalFocus(true); + const event = new TerminalFocusEvent('terminalfocus'); + app.internal_eventEmitter.emit('terminalfocus', event); + continue; } if (sequence === FOCUS_OUT) { - app.handleTerminalFocus(false) + app.handleTerminalFocus(false); // Defensive: if we lost the release event (mouse released outside // terminal window — some emulators drop it rather than capturing the // pointer), focus-out is the next observable signal that the drag is // over. Without this, drag-to-scroll's timer runs until the scroll // boundary is hit. if (app.props.selection.isDragging) { - finishSelection(app.props.selection) - app.props.onSelectionChange() + finishSelection(app.props.selection); + app.props.onSelectionChange(); } - const event = new TerminalFocusEvent('terminalblur') - app.internal_eventEmitter.emit('terminalblur', event) - continue + const event = new TerminalFocusEvent('terminalblur'); + app.internal_eventEmitter.emit('terminalblur', event); + continue; } // Failsafe: if we receive input, the terminal must be focused if (!getTerminalFocused()) { - setTerminalFocused(true) + setTerminalFocused(true); } // Handle Ctrl+Z (suspend) using parsed key to support both raw (\x1a) and // CSI u format (\x1b[122;5u) from Kitty keyboard protocol terminals if (item.name === 'z' && item.ctrl && SUPPORTS_SUSPEND) { - app.handleSuspend() - continue + app.handleSuspend(); + continue; } - app.handleInput(sequence) - const event = new InputEvent(item) - app.internal_eventEmitter.emit('input', event) + app.handleInput(sequence); + const event = new InputEvent(item); + app.internal_eventEmitter.emit('input', event); // Also dispatch through the DOM tree so onKeyDown handlers fire. - app.props.dispatchKeyboardEvent(item) + app.props.dispatchKeyboardEvent(item); } } @@ -682,13 +623,13 @@ function processKeysInBatch( export function handleMouseEvent(app: App, m: ParsedMouse): void { // Allow disabling click handling while keeping wheel scroll (which goes // through the keybinding system as 'wheelup'/'wheeldown', not here). - if (defaultCallbacks.isMouseClicksDisabled()) return + if (defaultCallbacks.isMouseClicksDisabled()) return; - const sel = app.props.selection + const sel = app.props.selection; // Terminal coords are 1-indexed; screen buffer is 0-indexed - const col = m.col - 1 - const row = m.row - 1 - const baseButton = m.button & 0x03 + const col = m.col - 1; + const row = m.row - 1; + const baseButton = m.button & 0x03; if (m.action === 'press') { if ((m.button & 0x20) !== 0 && baseButton === 3) { @@ -702,25 +643,25 @@ export function handleMouseEvent(app: App, m: ParsedMouse): void { // past the edge, came back" — and tmux drops focus events unless // `focus-events on` is set, so this is the more reliable signal. if (sel.isDragging) { - finishSelection(sel) - app.props.onSelectionChange() + finishSelection(sel); + app.props.onSelectionChange(); } - if (col === app.lastHoverCol && row === app.lastHoverRow) return - app.lastHoverCol = col - app.lastHoverRow = row - app.props.onHoverAt(col, row) - return + if (col === app.lastHoverCol && row === app.lastHoverRow) return; + app.lastHoverCol = col; + app.lastHoverRow = row; + app.props.onHoverAt(col, row); + return; } if (baseButton !== 0) { // Non-left press breaks the multi-click chain. - app.clickCount = 0 - return + app.clickCount = 0; + return; } if ((m.button & 0x20) !== 0) { // Drag motion: mode-aware extension (char/word/line). onSelectionDrag // calls notifySelectionChange internally — no extra onSelectionChange. - app.props.onSelectionDrag(col, row) - return + app.props.onSelectionDrag(col, row); + return; } // Lost-release fallback for mode-1002-only terminals: a fresh press // while isDragging=true means the previous release was dropped (cursor @@ -728,43 +669,43 @@ export function handleMouseEvent(app: App, m: ParsedMouse): void { // before startSelection/onMultiClick clobbers it. Mode-1003 terminals // hit the no-button-motion recovery above instead, so this is rare. if (sel.isDragging) { - finishSelection(sel) - app.props.onSelectionChange() + finishSelection(sel); + app.props.onSelectionChange(); } // Fresh left press. Detect multi-click HERE (not on release) so the // word/line highlight appears immediately and a subsequent drag can // extend by word/line like native macOS. Previously detected on // release, which meant (a) visible latency before the word highlights // and (b) double-click+drag fell through to char-mode selection. - const now = Date.now() + const now = Date.now(); const nearLast = now - app.lastClickTime < MULTI_CLICK_TIMEOUT_MS && Math.abs(col - app.lastClickCol) <= MULTI_CLICK_DISTANCE && - Math.abs(row - app.lastClickRow) <= MULTI_CLICK_DISTANCE - app.clickCount = nearLast ? app.clickCount + 1 : 1 - app.lastClickTime = now - app.lastClickCol = col - app.lastClickRow = row + Math.abs(row - app.lastClickRow) <= MULTI_CLICK_DISTANCE; + app.clickCount = nearLast ? app.clickCount + 1 : 1; + app.lastClickTime = now; + app.lastClickCol = col; + app.lastClickRow = row; if (app.clickCount >= 2) { // Cancel any pending hyperlink-open from the first click — this is // a double-click, not a single-click on a link. if (app.pendingHyperlinkTimer) { - clearTimeout(app.pendingHyperlinkTimer) - app.pendingHyperlinkTimer = null + clearTimeout(app.pendingHyperlinkTimer); + app.pendingHyperlinkTimer = null; } // Cap at 3 (line select) for quadruple+ clicks. - const count = app.clickCount === 2 ? 2 : 3 - app.props.onMultiClick(col, row, count) - return + const count = app.clickCount === 2 ? 2 : 3; + app.props.onMultiClick(col, row, count); + return; } - startSelection(sel, col, row) + startSelection(sel, col, row); // SGR bit 0x08 = alt (xterm.js wires altKey here, not metaKey — see // comment at the hyperlink-open guard below). On macOS xterm.js, // receiving alt means macOptionClickForcesSelection is OFF (otherwise // xterm.js would have consumed the event for native selection). - sel.lastPressHadAlt = (m.button & 0x08) !== 0 - app.props.onSelectionChange() - return + sel.lastPressHadAlt = (m.button & 0x08) !== 0; + app.props.onSelectionChange(); + return; } // Release: end the drag even for non-zero button codes. Some terminals @@ -774,12 +715,12 @@ export function handleMouseEvent(app: App, m: ParsedMouse): void { // scroll boundary. Only act on non-left releases when we ARE dragging // (so an unrelated middle/right click-release doesn't touch selection). if (baseButton !== 0) { - if (!sel.isDragging) return - finishSelection(sel) - app.props.onSelectionChange() - return + if (!sel.isDragging) return; + finishSelection(sel); + app.props.onSelectionChange(); + return; } - finishSelection(sel) + finishSelection(sel); // NOTE: unlike the old release-based detection we do NOT reset clickCount // on release-after-drag. This aligns with NSEvent.clickCount semantics: // an intervening drag doesn't break the click chain. Practical upside: @@ -800,7 +741,7 @@ export function handleMouseEvent(app: App, m: ParsedMouse): void { // Resolve the hyperlink URL synchronously while the screen buffer // still reflects what the user clicked — deferring only the // browser-open so double-click can cancel it. - const url = app.props.getHyperlinkAt(col, row) + const url = app.props.getHyperlinkAt(col, row); // xterm.js (VS Code, Cursor, Windsurf, etc.) has its own OSC 8 link // handler that fires on Cmd+click *without consuming the mouse event* // (Linkifier._handleMouseUp calls link.activate() but never @@ -816,19 +757,19 @@ export function handleMouseEvent(app: App, m: ParsedMouse): void { // Clear any prior pending timer — clicking a second link // supersedes the first (only the latest click opens). if (app.pendingHyperlinkTimer) { - clearTimeout(app.pendingHyperlinkTimer) + clearTimeout(app.pendingHyperlinkTimer); } app.pendingHyperlinkTimer = setTimeout( (app, url) => { - app.pendingHyperlinkTimer = null - app.props.onOpenHyperlink(url) + app.pendingHyperlinkTimer = null; + app.props.onOpenHyperlink(url); }, MULTI_CLICK_TIMEOUT_MS, app, url, - ) + ); } } } - app.props.onSelectionChange() + app.props.onSelectionChange(); } diff --git a/packages/@ant/ink/src/components/Box.tsx b/packages/@ant/ink/src/components/Box.tsx index 895f82578..9b871d1c0 100644 --- a/packages/@ant/ink/src/components/Box.tsx +++ b/packages/@ant/ink/src/components/Box.tsx @@ -1,48 +1,48 @@ -import React, { type PropsWithChildren, type Ref } from 'react' -import type { Except } from 'type-fest' -import type { DOMElement } from '../core/dom.js' -import type { ClickEvent } from '../core/events/click-event.js' -import type { FocusEvent } from '../core/events/focus-event.js' -import type { KeyboardEvent } from '../core/events/keyboard-event.js' -import type { Styles } from '../core/styles.js' -import * as warn from '../core/warn.js' +import React, { type PropsWithChildren, type Ref } from 'react'; +import type { Except } from 'type-fest'; +import type { DOMElement } from '../core/dom.js'; +import type { ClickEvent } from '../core/events/click-event.js'; +import type { FocusEvent } from '../core/events/focus-event.js'; +import type { KeyboardEvent } from '../core/events/keyboard-event.js'; +import type { Styles } from '../core/styles.js'; +import * as warn from '../core/warn.js'; export type Props = Except & { - ref?: Ref + ref?: Ref; /** * Tab order index. Nodes with `tabIndex >= 0` participate in * Tab/Shift+Tab cycling; `-1` means programmatically focusable only. */ - tabIndex?: number + tabIndex?: number; /** * Focus this element when it mounts. Like the HTML `autofocus` * attribute — the FocusManager calls `focus(node)` during the * reconciler's `commitMount` phase. */ - autoFocus?: boolean + autoFocus?: boolean; /** * Fired on left-button click (press + release without drag). Only works * inside `` where mouse tracking is enabled — no-op * otherwise. The event bubbles from the deepest hit Box up through * ancestors; call `event.stopImmediatePropagation()` to stop bubbling. */ - onClick?: (event: ClickEvent) => void - onFocus?: (event: FocusEvent) => void - onFocusCapture?: (event: FocusEvent) => void - onBlur?: (event: FocusEvent) => void - onBlurCapture?: (event: FocusEvent) => void - onKeyDown?: (event: KeyboardEvent) => void - onKeyDownCapture?: (event: KeyboardEvent) => void + onClick?: (event: ClickEvent) => void; + onFocus?: (event: FocusEvent) => void; + onFocusCapture?: (event: FocusEvent) => void; + onBlur?: (event: FocusEvent) => void; + onBlurCapture?: (event: FocusEvent) => void; + onKeyDown?: (event: KeyboardEvent) => void; + onKeyDownCapture?: (event: KeyboardEvent) => void; /** * Fired when the mouse moves into this Box's rendered rect. Like DOM * `mouseenter`, does NOT bubble — moving between children does not * re-fire on the parent. Only works inside `` where * mode-1003 mouse tracking is enabled. */ - onMouseEnter?: () => void + onMouseEnter?: () => void; /** Fired when the mouse moves out of this Box's rendered rect. */ - onMouseLeave?: () => void -} + onMouseLeave?: () => void; +}; /** * `` is an essential Ink component to build your layout. It's like `
` in the browser. @@ -68,23 +68,23 @@ function Box({ ...style }: PropsWithChildren): React.ReactNode { // Warn if spacing values are not integers to prevent fractional layout dimensions - warn.ifNotInteger(style.margin, 'margin') - warn.ifNotInteger(style.marginX, 'marginX') - warn.ifNotInteger(style.marginY, 'marginY') - warn.ifNotInteger(style.marginTop, 'marginTop') - warn.ifNotInteger(style.marginBottom, 'marginBottom') - warn.ifNotInteger(style.marginLeft, 'marginLeft') - warn.ifNotInteger(style.marginRight, 'marginRight') - warn.ifNotInteger(style.padding, 'padding') - warn.ifNotInteger(style.paddingX, 'paddingX') - warn.ifNotInteger(style.paddingY, 'paddingY') - warn.ifNotInteger(style.paddingTop, 'paddingTop') - warn.ifNotInteger(style.paddingBottom, 'paddingBottom') - warn.ifNotInteger(style.paddingLeft, 'paddingLeft') - warn.ifNotInteger(style.paddingRight, 'paddingRight') - warn.ifNotInteger(style.gap, 'gap') - warn.ifNotInteger(style.columnGap, 'columnGap') - warn.ifNotInteger(style.rowGap, 'rowGap') + warn.ifNotInteger(style.margin, 'margin'); + warn.ifNotInteger(style.marginX, 'marginX'); + warn.ifNotInteger(style.marginY, 'marginY'); + warn.ifNotInteger(style.marginTop, 'marginTop'); + warn.ifNotInteger(style.marginBottom, 'marginBottom'); + warn.ifNotInteger(style.marginLeft, 'marginLeft'); + warn.ifNotInteger(style.marginRight, 'marginRight'); + warn.ifNotInteger(style.padding, 'padding'); + warn.ifNotInteger(style.paddingX, 'paddingX'); + warn.ifNotInteger(style.paddingY, 'paddingY'); + warn.ifNotInteger(style.paddingTop, 'paddingTop'); + warn.ifNotInteger(style.paddingBottom, 'paddingBottom'); + warn.ifNotInteger(style.paddingLeft, 'paddingLeft'); + warn.ifNotInteger(style.paddingRight, 'paddingRight'); + warn.ifNotInteger(style.gap, 'gap'); + warn.ifNotInteger(style.columnGap, 'columnGap'); + warn.ifNotInteger(style.rowGap, 'rowGap'); return ( {children} - ) + ); } -export default Box +export default Box; diff --git a/packages/@ant/ink/src/components/Button.tsx b/packages/@ant/ink/src/components/Button.tsx index 487c38e13..e0943db5f 100644 --- a/packages/@ant/ink/src/components/Button.tsx +++ b/packages/@ant/ink/src/components/Button.tsx @@ -1,39 +1,33 @@ -import React, { - type Ref, - useCallback, - useEffect, - useRef, - useState, -} from 'react' -import type { Except } from 'type-fest' -import type { DOMElement } from '../core/dom.js' -import type { ClickEvent } from '../core/events/click-event.js' -import type { FocusEvent } from '../core/events/focus-event.js' -import type { KeyboardEvent } from '../core/events/keyboard-event.js' -import type { Styles } from '../core/styles.js' -import Box from './Box.js' +import React, { type Ref, useCallback, useEffect, useRef, useState } from 'react'; +import type { Except } from 'type-fest'; +import type { DOMElement } from '../core/dom.js'; +import type { ClickEvent } from '../core/events/click-event.js'; +import type { FocusEvent } from '../core/events/focus-event.js'; +import type { KeyboardEvent } from '../core/events/keyboard-event.js'; +import type { Styles } from '../core/styles.js'; +import Box from './Box.js'; type ButtonState = { - focused: boolean - hovered: boolean - active: boolean -} + focused: boolean; + hovered: boolean; + active: boolean; +}; export type Props = Except & { - ref?: Ref + ref?: Ref; /** * Called when the button is activated via Enter, Space, or click. */ - onAction: () => void + onAction: () => void; /** * Tab order index. Defaults to 0 (in tab order). * Set to -1 for programmatically focusable only. */ - tabIndex?: number + tabIndex?: number; /** * Focus this button when it mounts. */ - autoFocus?: boolean + autoFocus?: boolean; /** * Render prop receiving the interactive state. Use this to * style children based on focus/hover/active — Button itself @@ -41,64 +35,53 @@ export type Props = Except & { * * If not provided, children render as-is (no state-dependent styling). */ - children: ((state: ButtonState) => React.ReactNode) | React.ReactNode -} + children: ((state: ButtonState) => React.ReactNode) | React.ReactNode; +}; -function Button({ - onAction, - tabIndex = 0, - autoFocus, - children, - ref, - ...style -}: Props): React.ReactNode { - const [isFocused, setIsFocused] = useState(false) - const [isHovered, setIsHovered] = useState(false) - const [isActive, setIsActive] = useState(false) +function Button({ onAction, tabIndex = 0, autoFocus, children, ref, ...style }: Props): React.ReactNode { + const [isFocused, setIsFocused] = useState(false); + const [isHovered, setIsHovered] = useState(false); + const [isActive, setIsActive] = useState(false); - const activeTimer = useRef | null>(null) + const activeTimer = useRef | null>(null); useEffect(() => { return () => { - if (activeTimer.current) clearTimeout(activeTimer.current) - } - }, []) + if (activeTimer.current) clearTimeout(activeTimer.current); + }; + }, []); const handleKeyDown = useCallback( (e: KeyboardEvent) => { if (e.key === 'return' || e.key === ' ') { - e.preventDefault() - setIsActive(true) - onAction() - if (activeTimer.current) clearTimeout(activeTimer.current) - activeTimer.current = setTimeout( - setter => setter(false), - 100, - setIsActive, - ) + e.preventDefault(); + setIsActive(true); + onAction(); + if (activeTimer.current) clearTimeout(activeTimer.current); + activeTimer.current = setTimeout(setter => setter(false), 100, setIsActive); } }, [onAction], - ) + ); const handleClick = useCallback( (_e: ClickEvent) => { - onAction() + onAction(); }, [onAction], - ) + ); - const handleFocus = useCallback((_e: FocusEvent) => setIsFocused(true), []) - const handleBlur = useCallback((_e: FocusEvent) => setIsFocused(false), []) - const handleMouseEnter = useCallback(() => setIsHovered(true), []) - const handleMouseLeave = useCallback(() => setIsHovered(false), []) + const handleFocus = useCallback((_e: FocusEvent) => setIsFocused(true), []); + const handleBlur = useCallback((_e: FocusEvent) => setIsFocused(false), []); + const handleMouseEnter = useCallback(() => setIsHovered(true), []); + const handleMouseLeave = useCallback(() => setIsHovered(false), []); const state: ButtonState = { focused: isFocused, hovered: isHovered, active: isActive, - } - const content = typeof children === 'function' ? children(state) : children + }; + const content = typeof children === 'function' ? children(state) : children; return ( {content} - ) + ); } -export default Button -export type { ButtonState } +export default Button; +export type { ButtonState }; diff --git a/packages/@ant/ink/src/components/ClockContext.tsx b/packages/@ant/ink/src/components/ClockContext.tsx index 2822a84ba..9e876e206 100644 --- a/packages/@ant/ink/src/components/ClockContext.tsx +++ b/packages/@ant/ink/src/components/ClockContext.tsx @@ -1,99 +1,93 @@ -import React, { createContext, useEffect, useState } from 'react' -import { FRAME_INTERVAL_MS } from '../core/constants.js' -import { useTerminalFocus } from '../hooks/use-terminal-focus.js' +import React, { createContext, useEffect, useState } from 'react'; +import { FRAME_INTERVAL_MS } from '../core/constants.js'; +import { useTerminalFocus } from '../hooks/use-terminal-focus.js'; export type Clock = { - subscribe: (onChange: () => void, keepAlive: boolean) => () => void - now: () => number - setTickInterval: (ms: number) => void -} + subscribe: (onChange: () => void, keepAlive: boolean) => () => void; + now: () => number; + setTickInterval: (ms: number) => void; +}; export function createClock(tickIntervalMs: number): Clock { - const subscribers = new Map<() => void, boolean>() - let interval: ReturnType | null = null - let currentTickIntervalMs = tickIntervalMs - let startTime = 0 + const subscribers = new Map<() => void, boolean>(); + let interval: ReturnType | null = null; + let currentTickIntervalMs = tickIntervalMs; + let startTime = 0; // Snapshot of the current tick's time, ensuring all subscribers in the same // tick see the same value (keeps animations synchronized) - let tickTime = 0 + let tickTime = 0; function tick(): void { - tickTime = Date.now() - startTime + tickTime = Date.now() - startTime; for (const onChange of subscribers.keys()) { - onChange() + onChange(); } } function updateInterval(): void { - const anyKeepAlive = [...subscribers.values()].some(Boolean) + const anyKeepAlive = [...subscribers.values()].some(Boolean); if (anyKeepAlive) { if (interval) { - clearInterval(interval) - interval = null + clearInterval(interval); + interval = null; } if (startTime === 0) { - startTime = Date.now() + startTime = Date.now(); } - interval = setInterval(tick, currentTickIntervalMs) + interval = setInterval(tick, currentTickIntervalMs); } else if (interval) { - clearInterval(interval) - interval = null + clearInterval(interval); + interval = null; } } return { subscribe(onChange, keepAlive) { - subscribers.set(onChange, keepAlive) - updateInterval() + subscribers.set(onChange, keepAlive); + updateInterval(); return () => { - subscribers.delete(onChange) - updateInterval() - } + subscribers.delete(onChange); + updateInterval(); + }; }, now() { if (startTime === 0) { - startTime = Date.now() + startTime = Date.now(); } // When the clock interval is running, return the synchronized tickTime // so all subscribers in the same tick see the same value. // When paused (no keepAlive subscribers), return real-time to avoid // returning a stale tickTime from the last tick before the pause. if (interval && tickTime) { - return tickTime + return tickTime; } - return Date.now() - startTime + return Date.now() - startTime; }, setTickInterval(ms) { - if (ms === currentTickIntervalMs) return - currentTickIntervalMs = ms - updateInterval() + if (ms === currentTickIntervalMs) return; + currentTickIntervalMs = ms; + updateInterval(); }, - } + }; } -export const ClockContext = createContext(null) +export const ClockContext = createContext(null); -const BLURRED_TICK_INTERVAL_MS = FRAME_INTERVAL_MS * 2 +const BLURRED_TICK_INTERVAL_MS = FRAME_INTERVAL_MS * 2; // Own component so App.tsx doesn't re-render when the clock is created. // The clock value is stable (created once via useState), so the provider // never causes consumer re-renders on its own. -export function ClockProvider({ - children, -}: { - children: React.ReactNode -}): React.ReactNode { - const [clock] = useState(() => createClock(FRAME_INTERVAL_MS)) - const focused = useTerminalFocus() +export function ClockProvider({ children }: { children: React.ReactNode }): React.ReactNode { + const [clock] = useState(() => createClock(FRAME_INTERVAL_MS)); + const focused = useTerminalFocus(); useEffect(() => { - clock.setTickInterval( - focused ? FRAME_INTERVAL_MS : BLURRED_TICK_INTERVAL_MS, - ) - }, [clock, focused]) + clock.setTickInterval(focused ? FRAME_INTERVAL_MS : BLURRED_TICK_INTERVAL_MS); + }, [clock, focused]); - return {children} + return {children}; } diff --git a/packages/@ant/ink/src/components/ErrorOverview.tsx b/packages/@ant/ink/src/components/ErrorOverview.tsx index 3effc4217..5f215536b 100644 --- a/packages/@ant/ink/src/components/ErrorOverview.tsx +++ b/packages/@ant/ink/src/components/ErrorOverview.tsx @@ -1,48 +1,48 @@ -import codeExcerpt, { type CodeExcerpt } from 'code-excerpt' -import { readFileSync } from 'fs' -import React from 'react' -import StackUtils from 'stack-utils' -import Box from './Box.js' -import Text from './Text.js' +import codeExcerpt, { type CodeExcerpt } from 'code-excerpt'; +import { readFileSync } from 'fs'; +import React from 'react'; +import StackUtils from 'stack-utils'; +import Box from './Box.js'; +import Text from './Text.js'; /* eslint-disable custom-rules/no-process-cwd -- stack trace file:// paths are relative to the real OS cwd, not the virtual cwd */ // Error's source file is reported as file:///home/user/file.js // This function removes the file://[cwd] part const cleanupPath = (path: string | undefined): string | undefined => { - return path?.replace(`file://${process.cwd()}/`, '') -} + return path?.replace(`file://${process.cwd()}/`, ''); +}; -let stackUtils: StackUtils | undefined +let stackUtils: StackUtils | undefined; function getStackUtils(): StackUtils { return (stackUtils ??= new StackUtils({ cwd: process.cwd(), internals: StackUtils.nodeInternals(), - })) + })); } /* eslint-enable custom-rules/no-process-cwd */ type Props = { - readonly error: Error -} + readonly error: Error; +}; export default function ErrorOverview({ error }: Props) { - const stack = error.stack ? error.stack.split('\n').slice(1) : undefined - const origin = stack ? getStackUtils().parseLine(stack[0]!) : undefined - const filePath = cleanupPath(origin?.file) - let excerpt: CodeExcerpt[] | undefined - let lineWidth = 0 + const stack = error.stack ? error.stack.split('\n').slice(1) : undefined; + const origin = stack ? getStackUtils().parseLine(stack[0]!) : undefined; + const filePath = cleanupPath(origin?.file); + let excerpt: CodeExcerpt[] | undefined; + let lineWidth = 0; if (filePath && origin?.line) { try { // eslint-disable-next-line custom-rules/no-sync-fs -- sync render path; error overlay can't go async without suspense restructuring - const sourceCode = readFileSync(filePath, 'utf8') - excerpt = codeExcerpt(sourceCode, origin.line) + const sourceCode = readFileSync(filePath, 'utf8'); + excerpt = codeExcerpt(sourceCode, origin.line); if (excerpt) { for (const { line } of excerpt) { - lineWidth = Math.max(lineWidth, String(line).length) + lineWidth = Math.max(lineWidth, String(line).length); } } } catch { @@ -76,9 +76,7 @@ export default function ErrorOverview({ error }: Props) { {String(line).padStart(lineWidth, ' ')}: @@ -103,7 +101,7 @@ export default function ErrorOverview({ error }: Props) { .split('\n') .slice(1) .map(line => { - const parsedLine = getStackUtils().parseLine(line) + const parsedLine = getStackUtils().parseLine(line); // If the line from the stack cannot be parsed, we print out the unparsed line. if (!parsedLine) { @@ -112,7 +110,7 @@ export default function ErrorOverview({ error }: Props) { - {line} - ) + ); } return ( @@ -121,14 +119,13 @@ export default function ErrorOverview({ error }: Props) { {parsedLine.function} {' '} - ({cleanupPath(parsedLine.file) ?? ''}:{parsedLine.line}: - {parsedLine.column}) + ({cleanupPath(parsedLine.file) ?? ''}:{parsedLine.line}:{parsedLine.column}) - ) + ); })} )} - ) + ); } diff --git a/packages/@ant/ink/src/components/Link.tsx b/packages/@ant/ink/src/components/Link.tsx index c3ad1e2f3..f06d2ea10 100644 --- a/packages/@ant/ink/src/components/Link.tsx +++ b/packages/@ant/ink/src/components/Link.tsx @@ -1,21 +1,17 @@ -import type { ReactNode } from 'react' -import React from 'react' -import { supportsHyperlinks } from '../core/supports-hyperlinks.js' -import Text from './Text.js' +import type { ReactNode } from 'react'; +import React from 'react'; +import { supportsHyperlinks } from '../core/supports-hyperlinks.js'; +import Text from './Text.js'; export type Props = { - readonly children?: ReactNode - readonly url: string - readonly fallback?: ReactNode -} + readonly children?: ReactNode; + readonly url: string; + readonly fallback?: ReactNode; +}; -export default function Link({ - children, - url, - fallback, -}: Props): React.ReactNode { +export default function Link({ children, url, fallback }: Props): React.ReactNode { // Use children if provided, otherwise display the URL - const content = children ?? url + const content = children ?? url; if (supportsHyperlinks()) { // Wrap in Text to ensure we're in a text context @@ -24,8 +20,8 @@ export default function Link({ {content} - ) + ); } - return {fallback ?? content} + return {fallback ?? content}; } diff --git a/packages/@ant/ink/src/components/Newline.tsx b/packages/@ant/ink/src/components/Newline.tsx index b8d6a88a2..9340685f9 100644 --- a/packages/@ant/ink/src/components/Newline.tsx +++ b/packages/@ant/ink/src/components/Newline.tsx @@ -1,4 +1,4 @@ -import React from 'react' +import React from 'react'; export type Props = { /** @@ -6,12 +6,12 @@ export type Props = { * * @default 1 */ - readonly count?: number -} + readonly count?: number; +}; /** * Adds one or more newline (\n) characters. Must be used within components. */ export default function Newline({ count = 1 }: Props) { - return {'\n'.repeat(count)} + return {'\n'.repeat(count)}; } diff --git a/packages/@ant/ink/src/components/NoSelect.tsx b/packages/@ant/ink/src/components/NoSelect.tsx index 882097608..790b77225 100644 --- a/packages/@ant/ink/src/components/NoSelect.tsx +++ b/packages/@ant/ink/src/components/NoSelect.tsx @@ -1,5 +1,5 @@ -import React, { type PropsWithChildren } from 'react' -import Box, { type Props as BoxProps } from './Box.js' +import React, { type PropsWithChildren } from 'react'; +import Box, { type Props as BoxProps } from './Box.js'; type Props = Omit & { /** @@ -11,8 +11,8 @@ type Props = Omit & { * * @default false */ - fromLeftEdge?: boolean -} + fromLeftEdge?: boolean; +}; /** * Marks its contents as non-selectable in fullscreen text selection. @@ -32,14 +32,10 @@ type Props = Omit & { * tracking). No-op in the main-screen scrollback render where the * terminal's native selection is used instead. */ -export function NoSelect({ - children, - fromLeftEdge, - ...boxProps -}: PropsWithChildren): React.ReactNode { +export function NoSelect({ children, fromLeftEdge, ...boxProps }: PropsWithChildren): React.ReactNode { return ( {children} - ) + ); } diff --git a/packages/@ant/ink/src/components/RawAnsi.tsx b/packages/@ant/ink/src/components/RawAnsi.tsx index a1a23ab4b..ac548101b 100644 --- a/packages/@ant/ink/src/components/RawAnsi.tsx +++ b/packages/@ant/ink/src/components/RawAnsi.tsx @@ -1,14 +1,14 @@ -import React from 'react' +import React from 'react'; type Props = { /** * Pre-rendered ANSI lines. Each element must be exactly one terminal row * (already wrapped to `width` by the producer) with ANSI escape codes inline. */ - lines: string[] + lines: string[]; /** Column width the producer wrapped to. Sent to Yoga as the fixed leaf width. */ - width: number -} + width: number; +}; /** * Bypass the → React tree → Yoga → squash → re-serialize roundtrip for @@ -27,13 +27,7 @@ type Props = { */ export function RawAnsi({ lines, width }: Props): React.ReactNode { if (lines.length === 0) { - return null + return null; } - return ( - - ) + return ; } diff --git a/packages/@ant/ink/src/components/ScrollBox.tsx b/packages/@ant/ink/src/components/ScrollBox.tsx index 371ab0ab0..3e14f9389 100644 --- a/packages/@ant/ink/src/components/ScrollBox.tsx +++ b/packages/@ant/ink/src/components/ScrollBox.tsx @@ -1,20 +1,14 @@ -import React, { - type PropsWithChildren, - type Ref, - useImperativeHandle, - useRef, - useState, -} from 'react' -import type { Except } from 'type-fest' -import type { DOMElement } from '../core/dom.js' -import { markDirty, scheduleRenderFrom } from '../core/dom.js' -import { markCommitStart } from '../core/reconciler.js' -import type { Styles } from '../core/styles.js' -import Box from './Box.js' +import React, { type PropsWithChildren, type Ref, useImperativeHandle, useRef, useState } from 'react'; +import type { Except } from 'type-fest'; +import type { DOMElement } from '../core/dom.js'; +import { markDirty, scheduleRenderFrom } from '../core/dom.js'; +import { markCommitStart } from '../core/reconciler.js'; +import type { Styles } from '../core/styles.js'; +import Box from './Box.js'; export type ScrollBoxHandle = { - scrollTo: (y: number) => void - scrollBy: (dy: number) => void + scrollTo: (y: number) => void; + scrollBy: (dy: number) => void; /** * Scroll so `el`'s top is at the viewport top (plus `offset`). Unlike * scrollTo which bakes a number that's stale by the time the throttled @@ -22,24 +16,24 @@ export type ScrollBoxHandle = { * render-node-to-output reads `el.yogaNode.getComputedTop()` in the * SAME Yoga pass that computes scrollHeight. Deterministic. One-shot. */ - scrollToElement: (el: DOMElement, offset?: number) => void - scrollToBottom: () => void - getScrollTop: () => number - getPendingDelta: () => number - getScrollHeight: () => number + scrollToElement: (el: DOMElement, offset?: number) => void; + scrollToBottom: () => void; + getScrollTop: () => number; + getPendingDelta: () => number; + getScrollHeight: () => number; /** * Like getScrollHeight, but reads Yoga directly instead of the cached * value written by render-node-to-output (throttled, up to 16ms stale). * Use when you need a fresh value in useLayoutEffect after a React commit * that grew content. Slightly more expensive (native Yoga call). */ - getFreshScrollHeight: () => number - getViewportHeight: () => number + getFreshScrollHeight: () => number; + getViewportHeight: () => number; /** * Absolute screen-buffer row of the first visible content line (inside * padding). Used for drag-to-scroll edge detection. */ - getViewportTop: () => number + getViewportTop: () => number; /** * True when scroll is pinned to the bottom. Set by scrollToBottom, the * initial stickyScroll attribute, and by the renderer when positional @@ -47,14 +41,14 @@ export type ScrollBoxHandle = { * scrollTo/scrollBy. Stable signal for "at bottom" that doesn't depend on * layout values (unlike scrollTop+viewportH >= scrollHeight). */ - isSticky: () => boolean + isSticky: () => boolean; /** * Subscribe to imperative scroll changes (scrollTo/scrollBy/scrollToBottom). * Does NOT fire for stickyScroll updates done by the Ink renderer — those * happen during Ink's render phase after React has committed. Callers that * care about the sticky case should treat "at bottom" as a fallback. */ - subscribe: (listener: () => void) => () => void + subscribe: (listener: () => void) => () => void; /** * Set the render-time scrollTop clamp to the currently-mounted children's * coverage span. Called by useVirtualScroll after computing its range; @@ -63,20 +57,17 @@ export type ScrollBoxHandle = { * content instead of blank spacer. Pass undefined to disable (sticky, * cold start). */ - setClampBounds: (min: number | undefined, max: number | undefined) => void -} + setClampBounds: (min: number | undefined, max: number | undefined) => void; +}; -export type ScrollBoxProps = Except< - Styles, - 'textWrap' | 'overflow' | 'overflowX' | 'overflowY' -> & { - ref?: Ref +export type ScrollBoxProps = Except & { + ref?: Ref; /** * When true, automatically pins scroll position to the bottom when content * grows. Unset manually via scrollTo/scrollBy to break the stickiness. */ - stickyScroll?: boolean -} + stickyScroll?: boolean; +}; /** * A Box with `overflow: scroll` and an imperative scroll API. @@ -88,13 +79,8 @@ export type ScrollBoxProps = Except< * * Works best inside a fullscreen (constrained-height root) Ink tree. */ -function ScrollBox({ - children, - ref, - stickyScroll, - ...style -}: PropsWithChildren): React.ReactNode { - const domRef = useRef(null) +function ScrollBox({ children, ref, stickyScroll, ...style }: PropsWithChildren): React.ReactNode { + const domRef = useRef(null); // scrollTo/scrollBy bypass React: they mutate scrollTop on the DOM node, // mark it dirty, and call the root's throttled scheduleRender directly. // The Ink renderer reads scrollTop from the node — no React state needed, @@ -103,113 +89,109 @@ function ScrollBox({ // render — otherwise scheduleRender's leading edge fires on the FIRST // event before subsequent events mutate scrollTop. scrollToBottom still // forces a React render: sticky is attribute-observed, no DOM-only path. - const [, forceRender] = useState(0) - const listenersRef = useRef(new Set<() => void>()) - const renderQueuedRef = useRef(false) + const [, forceRender] = useState(0); + const listenersRef = useRef(new Set<() => void>()); + const renderQueuedRef = useRef(false); const notify = () => { - for (const l of listenersRef.current) l() - } + for (const l of listenersRef.current) l(); + }; function scrollMutated(el: DOMElement): void { // Signal background intervals (IDE poll, LSP poll, GCS fetch, orphan // check) to skip their next tick — they compete for the event loop and // contributed to 1402ms max frame gaps during scroll drain. // noop — injected by business layer via onScrollActivity callback - markDirty(el) - markCommitStart() - notify() - if (renderQueuedRef.current) return - renderQueuedRef.current = true + markDirty(el); + markCommitStart(); + notify(); + if (renderQueuedRef.current) return; + renderQueuedRef.current = true; queueMicrotask(() => { - renderQueuedRef.current = false - scheduleRenderFrom(el) - }) + renderQueuedRef.current = false; + scheduleRenderFrom(el); + }); } useImperativeHandle( ref, (): ScrollBoxHandle => ({ scrollTo(y: number) { - const el = domRef.current - if (!el) return + const el = domRef.current; + if (!el) return; // Explicit false overrides the DOM attribute so manual scroll // breaks stickiness. Render code checks ?? precedence. - el.stickyScroll = false - el.pendingScrollDelta = undefined - el.scrollAnchor = undefined - el.scrollTop = Math.max(0, Math.floor(y)) - scrollMutated(el) + el.stickyScroll = false; + el.pendingScrollDelta = undefined; + el.scrollAnchor = undefined; + el.scrollTop = Math.max(0, Math.floor(y)); + scrollMutated(el); }, scrollToElement(el: DOMElement, offset = 0) { - const box = domRef.current - if (!box) return - box.stickyScroll = false - box.pendingScrollDelta = undefined - box.scrollAnchor = { el, offset } - scrollMutated(box) + const box = domRef.current; + if (!box) return; + box.stickyScroll = false; + box.pendingScrollDelta = undefined; + box.scrollAnchor = { el, offset }; + scrollMutated(box); }, scrollBy(dy: number) { - const el = domRef.current - if (!el) return - el.stickyScroll = false + const el = domRef.current; + if (!el) return; + el.stickyScroll = false; // Wheel input cancels any in-flight anchor seek — user override. - el.scrollAnchor = undefined + el.scrollAnchor = undefined; // Accumulate in pendingScrollDelta; renderer drains it at a capped // rate so fast flicks show intermediate frames. Pure accumulator: // scroll-up followed by scroll-down naturally cancels. - el.pendingScrollDelta = (el.pendingScrollDelta ?? 0) + Math.floor(dy) - scrollMutated(el) + el.pendingScrollDelta = (el.pendingScrollDelta ?? 0) + Math.floor(dy); + scrollMutated(el); }, scrollToBottom() { - const el = domRef.current - if (!el) return - el.pendingScrollDelta = undefined - el.stickyScroll = true - markDirty(el) - notify() - forceRender(n => n + 1) + const el = domRef.current; + if (!el) return; + el.pendingScrollDelta = undefined; + el.stickyScroll = true; + markDirty(el); + notify(); + forceRender(n => n + 1); }, getScrollTop() { - return domRef.current?.scrollTop ?? 0 + return domRef.current?.scrollTop ?? 0; }, getPendingDelta() { // Accumulated-but-not-yet-drained delta. useVirtualScroll needs // this to mount the union [committed, committed+pending] range — // otherwise intermediate drain frames find no children (blank). - return domRef.current?.pendingScrollDelta ?? 0 + return domRef.current?.pendingScrollDelta ?? 0; }, getScrollHeight() { - return domRef.current?.scrollHeight ?? 0 + return domRef.current?.scrollHeight ?? 0; }, getFreshScrollHeight() { - const content = domRef.current?.childNodes[0] as DOMElement | undefined - return ( - content?.yogaNode?.getComputedHeight() ?? - domRef.current?.scrollHeight ?? - 0 - ) + const content = domRef.current?.childNodes[0] as DOMElement | undefined; + return content?.yogaNode?.getComputedHeight() ?? domRef.current?.scrollHeight ?? 0; }, getViewportHeight() { - return domRef.current?.scrollViewportHeight ?? 0 + return domRef.current?.scrollViewportHeight ?? 0; }, getViewportTop() { - return domRef.current?.scrollViewportTop ?? 0 + return domRef.current?.scrollViewportTop ?? 0; }, isSticky() { - const el = domRef.current - if (!el) return false - return el.stickyScroll ?? Boolean(el.attributes['stickyScroll']) + const el = domRef.current; + if (!el) return false; + return el.stickyScroll ?? Boolean(el.attributes['stickyScroll']); }, subscribe(listener: () => void) { - listenersRef.current.add(listener) - return () => listenersRef.current.delete(listener) + listenersRef.current.add(listener); + return () => listenersRef.current.delete(listener); }, setClampBounds(min, max) { - const el = domRef.current - if (!el) return - el.scrollClampMin = min - el.scrollClampMax = max + const el = domRef.current; + if (!el) return; + el.scrollClampMin = min; + el.scrollClampMax = max; }, }), // notify/scrollMutated are inline (no useCallback) but only close over @@ -217,7 +199,7 @@ function ScrollBox({ // every render (which re-registers the ref = churn). // eslint-disable-next-line react-hooks/exhaustive-deps [], - ) + ); // Structure: outer viewport (overflow:scroll, constrained height) > // inner content (flexGrow:1, flexShrink:0 — fills at least the viewport @@ -233,8 +215,8 @@ function ScrollBox({ return ( { - domRef.current = el - if (el) el.scrollTop ??= 0 + domRef.current = el; + if (el) el.scrollTop ??= 0; }} style={{ flexWrap: 'nowrap', @@ -251,7 +233,7 @@ function ScrollBox({ {children} - ) + ); } -export default ScrollBox +export default ScrollBox; diff --git a/packages/@ant/ink/src/components/Spacer.tsx b/packages/@ant/ink/src/components/Spacer.tsx index eb55fa9e4..749f9ad0a 100644 --- a/packages/@ant/ink/src/components/Spacer.tsx +++ b/packages/@ant/ink/src/components/Spacer.tsx @@ -1,10 +1,10 @@ -import React from 'react' -import Box from './Box.js' +import React from 'react'; +import Box from './Box.js'; /** * A flexible space that expands along the major axis of its containing layout. * It's useful as a shortcut for filling all the available spaces between elements. */ export default function Spacer() { - return + return ; } diff --git a/packages/@ant/ink/src/components/TerminalFocusContext.tsx b/packages/@ant/ink/src/components/TerminalFocusContext.tsx index e1fca2563..27fa3dfb8 100644 --- a/packages/@ant/ink/src/components/TerminalFocusContext.tsx +++ b/packages/@ant/ink/src/components/TerminalFocusContext.tsx @@ -1,53 +1,36 @@ -import React, { createContext, useMemo, useSyncExternalStore } from 'react' +import React, { createContext, useMemo, useSyncExternalStore } from 'react'; import { getTerminalFocused, getTerminalFocusState, subscribeTerminalFocus, type TerminalFocusState, -} from '../core/terminal-focus-state.js' +} from '../core/terminal-focus-state.js'; -export type { TerminalFocusState } +export type { TerminalFocusState }; export type TerminalFocusContextProps = { - readonly isTerminalFocused: boolean - readonly terminalFocusState: TerminalFocusState -} + readonly isTerminalFocused: boolean; + readonly terminalFocusState: TerminalFocusState; +}; const TerminalFocusContext = createContext({ isTerminalFocused: true, terminalFocusState: 'unknown', -}) +}); // eslint-disable-next-line custom-rules/no-top-level-side-effects -TerminalFocusContext.displayName = 'TerminalFocusContext' +TerminalFocusContext.displayName = 'TerminalFocusContext'; // Separate component so App.tsx doesn't re-render on focus changes. // Children are a stable prop reference, so they don't re-render either — // only components that consume the context will re-render. -export function TerminalFocusProvider({ - children, -}: { - children: React.ReactNode -}): React.ReactNode { - const isTerminalFocused = useSyncExternalStore( - subscribeTerminalFocus, - getTerminalFocused, - ) - const terminalFocusState = useSyncExternalStore( - subscribeTerminalFocus, - getTerminalFocusState, - ) +export function TerminalFocusProvider({ children }: { children: React.ReactNode }): React.ReactNode { + const isTerminalFocused = useSyncExternalStore(subscribeTerminalFocus, getTerminalFocused); + const terminalFocusState = useSyncExternalStore(subscribeTerminalFocus, getTerminalFocusState); - const value = useMemo( - () => ({ isTerminalFocused, terminalFocusState }), - [isTerminalFocused, terminalFocusState], - ) + const value = useMemo(() => ({ isTerminalFocused, terminalFocusState }), [isTerminalFocused, terminalFocusState]); - return ( - - {children} - - ) + return {children}; } -export default TerminalFocusContext +export default TerminalFocusContext; diff --git a/packages/@ant/ink/src/components/TerminalSizeContext.tsx b/packages/@ant/ink/src/components/TerminalSizeContext.tsx index cdf139c57..bf5a19d2b 100644 --- a/packages/@ant/ink/src/components/TerminalSizeContext.tsx +++ b/packages/@ant/ink/src/components/TerminalSizeContext.tsx @@ -1,8 +1,8 @@ -import { createContext } from 'react' +import { createContext } from 'react'; export type TerminalSize = { - columns: number - rows: number -} + columns: number; + rows: number; +}; -export const TerminalSizeContext = createContext(null) +export const TerminalSizeContext = createContext(null); diff --git a/packages/@ant/ink/src/components/Text.tsx b/packages/@ant/ink/src/components/Text.tsx index 620881450..6039c28e1 100644 --- a/packages/@ant/ink/src/components/Text.tsx +++ b/packages/@ant/ink/src/components/Text.tsx @@ -1,58 +1,55 @@ -import type { ReactNode } from 'react' -import React from 'react' -import type { Color, Styles, TextStyles } from '../core/styles.js' +import type { ReactNode } from 'react'; +import React from 'react'; +import type { Color, Styles, TextStyles } from '../core/styles.js'; type BaseProps = { /** * Change text color. Accepts a raw color value (rgb, hex, ansi). */ - readonly color?: Color + readonly color?: Color; /** * Same as `color`, but for background. */ - readonly backgroundColor?: Color + readonly backgroundColor?: Color; /** * Make the text italic. */ - readonly italic?: boolean + readonly italic?: boolean; /** * Make the text underlined. */ - readonly underline?: boolean + readonly underline?: boolean; /** * Make the text crossed with a line. */ - readonly strikethrough?: boolean + readonly strikethrough?: boolean; /** * Inverse background and foreground colors. */ - readonly inverse?: boolean + readonly inverse?: boolean; /** * This property tells Ink to wrap or truncate text if its width is larger than container. * If `wrap` is passed (by default), Ink will wrap text and split it into multiple lines. * If `truncate-*` is passed, Ink will truncate text instead, which will result in one line of text with the rest cut off. */ - readonly wrap?: Styles['textWrap'] + readonly wrap?: Styles['textWrap']; - readonly children?: ReactNode -} + readonly children?: ReactNode; +}; /** * Bold and dim are mutually exclusive in terminals. * This type ensures you can use one or the other, but not both. */ -type WeightProps = - | { bold?: never; dim?: never } - | { bold: boolean; dim?: never } - | { dim: boolean; bold?: never } +type WeightProps = { bold?: never; dim?: never } | { bold: boolean; dim?: never } | { dim: boolean; bold?: never }; -export type Props = BaseProps & WeightProps +export type Props = BaseProps & WeightProps; const memoizedStylesForWrap: Record, Styles> = { wrap: { @@ -103,7 +100,7 @@ const memoizedStylesForWrap: Record, Styles> = { flexDirection: 'row', textWrap: 'truncate-start', }, -} as const +} as const; /** * This component can display text, and change its style to make it colorful, bold, underline, italic or strikethrough. @@ -121,7 +118,7 @@ export default function Text({ children, }: Props): React.ReactNode { if (children === undefined || children === null) { - return null + return null; } // Build textStyles object with only the properties that are set @@ -134,11 +131,11 @@ export default function Text({ ...(underline && { underline }), ...(strikethrough && { strikethrough }), ...(inverse && { inverse }), - } + }; return ( {children} - ) + ); } diff --git a/packages/@ant/ink/src/core/Ansi.tsx b/packages/@ant/ink/src/core/Ansi.tsx index a8a0999a0..1df502cd2 100644 --- a/packages/@ant/ink/src/core/Ansi.tsx +++ b/packages/@ant/ink/src/core/Ansi.tsx @@ -1,31 +1,26 @@ -import React from 'react' -import Link from '../components/Link.js' -import Text from '../components/Text.js' -import type { Color } from './styles.js' -import { - type NamedColor, - Parser, - type Color as TermioColor, - type TextStyle, -} from './termio.js' +import React from 'react'; +import Link from '../components/Link.js'; +import Text from '../components/Text.js'; +import type { Color } from './styles.js'; +import { type NamedColor, Parser, type Color as TermioColor, type TextStyle } from './termio.js'; type Props = { - children: string + children: string; /** When true, force all text to be rendered with dim styling */ - dimColor?: boolean -} + dimColor?: boolean; +}; type SpanProps = { - color?: Color - backgroundColor?: Color - dim?: boolean - bold?: boolean - italic?: boolean - underline?: boolean - strikethrough?: boolean - inverse?: boolean - hyperlink?: string -} + color?: Color; + backgroundColor?: Color; + dim?: boolean; + bold?: boolean; + italic?: boolean; + underline?: boolean; + strikethrough?: boolean; + inverse?: boolean; + hyperlink?: string; +}; /** * Component that parses ANSI escape codes and renders them using Text components. @@ -35,43 +30,32 @@ type SpanProps = { * * Memoized to prevent re-renders when parent changes but children string is the same. */ -export const Ansi = React.memo(function Ansi({ - children, - dimColor, -}: Props): React.ReactNode { +export const Ansi = React.memo(function Ansi({ children, dimColor }: Props): React.ReactNode { if (typeof children !== 'string') { - return dimColor ? ( - {String(children)} - ) : ( - {String(children)} - ) + return dimColor ? {String(children)} : {String(children)}; } if (children === '') { - return null + return null; } - const spans = parseToSpans(children) + const spans = parseToSpans(children); if (spans.length === 0) { - return null + return null; } if (spans.length === 1 && !hasAnyProps(spans[0]!.props)) { - return dimColor ? ( - {spans[0]!.text} - ) : ( - {spans[0]!.text} - ) + return dimColor ? {spans[0]!.text} : {spans[0]!.text}; } const content = spans.map((span, i) => { - const hyperlink = span.props.hyperlink + const hyperlink = span.props.hyperlink; // When dimColor is forced, override the span's dim prop if (dimColor) { - span.props.dim = true + span.props.dim = true; } - const hasTextProps = hasAnyTextProps(span.props) + const hasTextProps = hasAnyTextProps(span.props); if (hyperlink) { return hasTextProps ? ( @@ -93,7 +77,7 @@ export const Ansi = React.memo(function Ansi({ {span.text} - ) + ); } return hasTextProps ? ( @@ -112,79 +96,79 @@ export const Ansi = React.memo(function Ansi({ ) : ( span.text - ) - }) + ); + }); - return dimColor ? {content} : {content} -}) + return dimColor ? {content} : {content}; +}); type Span = { - text: string - props: SpanProps -} + text: string; + props: SpanProps; +}; /** * Parse an ANSI string into spans using the termio parser. */ function parseToSpans(input: string): Span[] { - const parser = new Parser() - const actions = parser.feed(input) - const spans: Span[] = [] + const parser = new Parser(); + const actions = parser.feed(input); + const spans: Span[] = []; - let currentHyperlink: string | undefined + let currentHyperlink: string | undefined; for (const action of actions) { if (action.type === 'link') { if (action.action.type === 'start') { - currentHyperlink = action.action.url + currentHyperlink = action.action.url; } else { - currentHyperlink = undefined + currentHyperlink = undefined; } - continue + continue; } if (action.type === 'text') { - const text = action.graphemes.map(g => g.value).join('') - if (!text) continue + const text = action.graphemes.map(g => g.value).join(''); + if (!text) continue; - const props = textStyleToSpanProps(action.style) + const props = textStyleToSpanProps(action.style); if (currentHyperlink) { - props.hyperlink = currentHyperlink + props.hyperlink = currentHyperlink; } // Try to merge with previous span if props match - const lastSpan = spans[spans.length - 1] + const lastSpan = spans[spans.length - 1]; if (lastSpan && propsEqual(lastSpan.props, props)) { - lastSpan.text += text + lastSpan.text += text; } else { - spans.push({ text, props }) + spans.push({ text, props }); } } } - return spans + return spans; } /** * Convert termio's TextStyle to SpanProps. */ function textStyleToSpanProps(style: TextStyle): SpanProps { - const props: SpanProps = {} + const props: SpanProps = {}; - if (style.bold) props.bold = true - if (style.dim) props.dim = true - if (style.italic) props.italic = true - if (style.underline !== 'none') props.underline = true - if (style.strikethrough) props.strikethrough = true - if (style.inverse) props.inverse = true + if (style.bold) props.bold = true; + if (style.dim) props.dim = true; + if (style.italic) props.italic = true; + if (style.underline !== 'none') props.underline = true; + if (style.strikethrough) props.strikethrough = true; + if (style.inverse) props.inverse = true; - const fgColor = colorToString(style.fg) - if (fgColor) props.color = fgColor + const fgColor = colorToString(style.fg); + if (fgColor) props.color = fgColor; - const bgColor = colorToString(style.bg) - if (bgColor) props.backgroundColor = bgColor + const bgColor = colorToString(style.bg); + if (bgColor) props.backgroundColor = bgColor; - return props + return props; } // Map termio named colors to the ansi: format @@ -205,7 +189,7 @@ const NAMED_COLOR_MAP: Record = { brightMagenta: 'ansi:magentaBright', brightCyan: 'ansi:cyanBright', brightWhite: 'ansi:whiteBright', -} +}; /** * Convert termio's Color to the string format used by Ink. @@ -213,13 +197,13 @@ const NAMED_COLOR_MAP: Record = { function colorToString(color: TermioColor): Color | undefined { switch (color.type) { case 'named': - return NAMED_COLOR_MAP[color.name] as Color + return NAMED_COLOR_MAP[color.name] as Color; case 'indexed': - return `ansi256(${color.index})` as Color + return `ansi256(${color.index})` as Color; case 'rgb': - return `rgb(${color.r},${color.g},${color.b})` as Color + return `rgb(${color.r},${color.g},${color.b})` as Color; case 'default': - return undefined + return undefined; } } @@ -237,7 +221,7 @@ function propsEqual(a: SpanProps, b: SpanProps): boolean { a.strikethrough === b.strikethrough && a.inverse === b.inverse && a.hyperlink === b.hyperlink - ) + ); } function hasAnyProps(props: SpanProps): boolean { @@ -251,7 +235,7 @@ function hasAnyProps(props: SpanProps): boolean { props.strikethrough === true || props.inverse === true || props.hyperlink !== undefined - ) + ); } function hasAnyTextProps(props: SpanProps): boolean { @@ -264,18 +248,18 @@ function hasAnyTextProps(props: SpanProps): boolean { props.underline === true || props.strikethrough === true || props.inverse === true - ) + ); } // Text style props without weight (bold/dim) - these are handled separately type BaseTextStyleProps = { - color?: Color - backgroundColor?: Color - italic?: boolean - underline?: boolean - strikethrough?: boolean - inverse?: boolean -} + color?: Color; + backgroundColor?: Color; + italic?: boolean; + underline?: boolean; + strikethrough?: boolean; + inverse?: boolean; +}; // Wrapper component that handles bold/dim mutual exclusivity for Text function StyledText({ @@ -284,9 +268,9 @@ function StyledText({ children, ...rest }: BaseTextStyleProps & { - bold?: boolean - dim?: boolean - children: string + bold?: boolean; + dim?: boolean; + children: string; }): React.ReactNode { // dim takes precedence over bold when both are set (terminals treat them as mutually exclusive) if (dim) { @@ -294,14 +278,14 @@ function StyledText({ {children} - ) + ); } if (bold) { return ( {children} - ) + ); } - return {children} + return {children}; } diff --git a/packages/@ant/ink/src/core/bidi.ts b/packages/@ant/ink/src/core/bidi.ts index 07502e550..b8818ccdd 100644 --- a/packages/@ant/ink/src/core/bidi.ts +++ b/packages/@ant/ink/src/core/bidi.ts @@ -17,8 +17,16 @@ import bidiFactory from 'bidi-js' type BidiInstance = { - getEmbeddingLevels: (text: string, defaultDirection?: string) => { paragraphLevel: number; levels: Uint8Array } - getReorderSegments: (text: string, embeddingLevels: { paragraphLevel: number; levels: Uint8Array }, start?: number, end?: number) => [number, number][] + getEmbeddingLevels: ( + text: string, + defaultDirection?: string, + ) => { paragraphLevel: number; levels: Uint8Array } + getReorderSegments: ( + text: string, + embeddingLevels: { paragraphLevel: number; levels: Uint8Array }, + start?: number, + end?: number, + ) => [number, number][] getVisualOrder: (reorderSegments: [number, number][]) => number[] } diff --git a/packages/@ant/ink/src/core/cursor.ts b/packages/@ant/ink/src/core/cursor.ts index dc58a0eec..2e1c4005e 100644 --- a/packages/@ant/ink/src/core/cursor.ts +++ b/packages/@ant/ink/src/core/cursor.ts @@ -1,2 +1,2 @@ // Auto-generated stub — replace with real implementation -export type Cursor = any; +export type Cursor = any diff --git a/packages/@ant/ink/src/core/devtools.ts b/packages/@ant/ink/src/core/devtools.ts index 655d5da82..64c2cfba7 100644 --- a/packages/@ant/ink/src/core/devtools.ts +++ b/packages/@ant/ink/src/core/devtools.ts @@ -1,2 +1,2 @@ // Auto-generated stub — replace with real implementation -export {}; +export {} diff --git a/packages/@ant/ink/src/core/events/mouse-action-event.ts b/packages/@ant/ink/src/core/events/mouse-action-event.ts index b13d40dda..6b5523656 100644 --- a/packages/@ant/ink/src/core/events/mouse-action-event.ts +++ b/packages/@ant/ink/src/core/events/mouse-action-event.ts @@ -37,7 +37,9 @@ export class MouseActionEvent extends Event { /** Recompute local coords relative to the target Box. */ prepareForTarget(target: EventTarget): void { - const dom = target as unknown as { yogaNode?: { getComputedLeft?(): number; getComputedTop?(): number } } + const dom = target as unknown as { + yogaNode?: { getComputedLeft?(): number; getComputedTop?(): number } + } this.localCol = this.col - (dom.yogaNode?.getComputedLeft?.() ?? 0) this.localRow = this.row - (dom.yogaNode?.getComputedTop?.() ?? 0) } diff --git a/packages/@ant/ink/src/core/events/paste-event.ts b/packages/@ant/ink/src/core/events/paste-event.ts index 14136e76a..d933430f8 100644 --- a/packages/@ant/ink/src/core/events/paste-event.ts +++ b/packages/@ant/ink/src/core/events/paste-event.ts @@ -1,2 +1,2 @@ // Auto-generated stub — replace with real implementation -export type PasteEvent = any; +export type PasteEvent = any diff --git a/packages/@ant/ink/src/core/events/resize-event.ts b/packages/@ant/ink/src/core/events/resize-event.ts index 99d596988..bae2915bd 100644 --- a/packages/@ant/ink/src/core/events/resize-event.ts +++ b/packages/@ant/ink/src/core/events/resize-event.ts @@ -1,2 +1,2 @@ // Auto-generated stub — replace with real implementation -export type ResizeEvent = any; +export type ResizeEvent = any diff --git a/packages/@ant/ink/src/core/termio/osc.ts b/packages/@ant/ink/src/core/termio/osc.ts index f5e6f3712..81456808d 100644 --- a/packages/@ant/ink/src/core/termio/osc.ts +++ b/packages/@ant/ink/src/core/termio/osc.ts @@ -14,9 +14,18 @@ function execFileNoThrow( ): Promise<{ code: number; stdout: string; stderr: string }> { return new Promise(resolve => { const { input, timeout } = options - const proc = nodeExecFile(command, args, { timeout }, (error, stdout, stderr) => { - resolve({ code: error ? 1 : 0, stdout: stdout ?? '', stderr: stderr ?? '' }) - }) + const proc = nodeExecFile( + command, + args, + { timeout }, + (error, stdout, stderr) => { + resolve({ + code: error ? 1 : 0, + stdout: stdout ?? '', + stderr: stderr ?? '', + }) + }, + ) if (input && proc.stdin) { proc.stdin.write(input) proc.stdin.end() diff --git a/packages/@ant/ink/src/core/utils/sliceAnsi.ts b/packages/@ant/ink/src/core/utils/sliceAnsi.ts index 42abe7ac7..a75369fc4 100644 --- a/packages/@ant/ink/src/core/utils/sliceAnsi.ts +++ b/packages/@ant/ink/src/core/utils/sliceAnsi.ts @@ -49,7 +49,13 @@ export default function sliceAnsi( // pass start/end in display cells (via stringWidth), so position must // track the same units. const width = - token.type === 'ansi' ? 0 : token.type === 'char' ? (token.fullWidth ? 2 : stringWidth(token.value)) : 0 + token.type === 'ansi' + ? 0 + : token.type === 'char' + ? token.fullWidth + ? 2 + : stringWidth(token.value) + : 0 // Break AFTER trailing zero-width marks — a combining mark attaches to // the preceding base char, so "भा" (भ + ा, 1 display cell) sliced at diff --git a/packages/@ant/ink/src/hooks/useSearchInput.ts b/packages/@ant/ink/src/hooks/useSearchInput.ts index 943d0a8a7..d865dd603 100644 --- a/packages/@ant/ink/src/hooks/useSearchInput.ts +++ b/packages/@ant/ink/src/hooks/useSearchInput.ts @@ -110,7 +110,9 @@ export function useSearchInput({ if (e.key === 'delete') { e.preventDefault() if (cursorOffset < query.length) { - setQueryState(query.slice(0, cursorOffset) + query.slice(cursorOffset + 1)) + setQueryState( + query.slice(0, cursorOffset) + query.slice(cursorOffset + 1), + ) } return } @@ -159,7 +161,9 @@ export function useSearchInput({ return } if (cursorOffset < query.length) { - setQueryState(query.slice(0, cursorOffset) + query.slice(cursorOffset + 1)) + setQueryState( + query.slice(0, cursorOffset) + query.slice(cursorOffset + 1), + ) } return } @@ -207,7 +211,9 @@ export function useSearchInput({ // Regular character input if (e.key.length >= 1 && !UNHANDLED_SPECIAL_KEYS.has(e.key)) { e.preventDefault() - setQueryState(query.slice(0, cursorOffset) + e.key + query.slice(cursorOffset)) + setQueryState( + query.slice(0, cursorOffset) + e.key + query.slice(cursorOffset), + ) setCursorOffset(cursorOffset + 1) } } diff --git a/packages/@ant/ink/src/hooks/useTerminalNotification.ts b/packages/@ant/ink/src/hooks/useTerminalNotification.ts index 083a94ad0..d45e68687 100644 --- a/packages/@ant/ink/src/hooks/useTerminalNotification.ts +++ b/packages/@ant/ink/src/hooks/useTerminalNotification.ts @@ -1,7 +1,16 @@ import { createContext, useCallback, useContext, useMemo } from 'react' -import { isProgressReportingAvailable, type Progress } from '../core/terminal.js' +import { + isProgressReportingAvailable, + type Progress, +} from '../core/terminal.js' import { BEL } from '../core/termio/ansi.js' -import { ITERM2, OSC, osc, PROGRESS, wrapForMultiplexer } from '../core/termio/osc.js' +import { + ITERM2, + OSC, + osc, + PROGRESS, + wrapForMultiplexer, +} from '../core/termio/osc.js' type WriteRaw = (data: string) => void diff --git a/packages/@ant/ink/src/index.ts b/packages/@ant/ink/src/index.ts index 0911185bc..4cfffc8e1 100644 --- a/packages/@ant/ink/src/index.ts +++ b/packages/@ant/ink/src/index.ts @@ -10,13 +10,16 @@ // ============================================================ // Core API (render/createRoot) // ============================================================ -export { default as wrappedRender, renderSync, createRoot } from './core/root.js' +export { + default as wrappedRender, + renderSync, + createRoot, +} from './core/root.js' export type { RenderOptions, Instance, Root } from './core/root.js' export * from './theme/theme-types.js' // InkCore class export { default as Ink } from './core/ink.js' - // ============================================================ // Keybindings // ============================================================ @@ -68,8 +71,21 @@ export type { // ============================================================ // Core types // ============================================================ -export type { DOMElement, TextNode, ElementNames, DOMNodeAttribute } from './core/dom.js' -export type { Styles, TextStyles, Color, RGBColor, HexColor, Ansi256Color, AnsiColor } from './core/styles.js' +export type { + DOMElement, + TextNode, + ElementNames, + DOMNodeAttribute, +} from './core/dom.js' +export type { + Styles, + TextStyles, + Color, + RGBColor, + HexColor, + Ansi256Color, + AnsiColor, +} from './core/styles.js' export type { Key } from './core/events/input-event.js' export type { FlickerReason, FrameEvent } from './core/frame.js' export type { MatchPosition } from './core/render-to-screen.js' @@ -83,7 +99,10 @@ export { ClickEvent } from './core/events/click-event.js' export { EventEmitter } from './core/events/emitter.js' export { Event } from './core/events/event.js' export { InputEvent } from './core/events/input-event.js' -export { TerminalFocusEvent, type TerminalFocusEventType } from './core/events/terminal-focus-event.js' +export { + TerminalFocusEvent, + type TerminalFocusEventType, +} from './core/events/terminal-focus-event.js' export { KeyboardEvent } from './core/events/keyboard-event.js' export { FocusEvent } from './core/events/focus-event.js' export { FocusManager } from './core/focus.js' @@ -92,17 +111,53 @@ export { stringWidth } from './core/stringWidth.js' export { default as wrapText } from './core/wrap-text.js' export { default as measureElement } from './core/measure-element.js' export { supportsTabStatus } from './core/termio/osc.js' -export { setClipboard, getClipboardPath, CLEAR_ITERM2_PROGRESS, CLEAR_TAB_STATUS, CLEAR_TERMINAL_TITLE, wrapForMultiplexer } from './core/termio/osc.js' -export { DISABLE_KITTY_KEYBOARD, DISABLE_MODIFY_OTHER_KEYS } from './core/termio/csi.js' -export { SHOW_CURSOR, DBP, DFE, DISABLE_MOUSE_TRACKING, EXIT_ALT_SCREEN, HIDE_CURSOR, ENTER_ALT_SCREEN, ENABLE_MOUSE_TRACKING } from './core/termio/dec.js' +export { + setClipboard, + getClipboardPath, + CLEAR_ITERM2_PROGRESS, + CLEAR_TAB_STATUS, + CLEAR_TERMINAL_TITLE, + wrapForMultiplexer, +} from './core/termio/osc.js' +export { + DISABLE_KITTY_KEYBOARD, + DISABLE_MODIFY_OTHER_KEYS, +} from './core/termio/csi.js' +export { + SHOW_CURSOR, + DBP, + DFE, + DISABLE_MOUSE_TRACKING, + EXIT_ALT_SCREEN, + HIDE_CURSOR, + ENTER_ALT_SCREEN, + ENABLE_MOUSE_TRACKING, +} from './core/termio/dec.js' export { default as instances } from './core/instances.js' -export { default as renderBorder, type BorderTextOptions } from './core/render-border.js' -export { isSynchronizedOutputSupported, isXtermJs, hasCursorUpViewportYankBug, writeDiffToTerminal } from './core/terminal.js' -export { colorize, applyColor, applyTextStyles, type ColorType } from './core/colorize.js' +export { + default as renderBorder, + type BorderTextOptions, +} from './core/render-border.js' +export { + isSynchronizedOutputSupported, + isXtermJs, + hasCursorUpViewportYankBug, + writeDiffToTerminal, +} from './core/terminal.js' +export { + colorize, + applyColor, + applyTextStyles, + type ColorType, +} from './core/colorize.js' export { wrapAnsi } from './core/wrapAnsi.js' export { default as styles } from './core/styles.js' export { clamp } from './core/layout/geometry.js' -export { getTerminalFocusState, getTerminalFocused, subscribeTerminalFocus } from './core/terminal-focus-state.js' +export { + getTerminalFocusState, + getTerminalFocused, + subscribeTerminalFocus, +} from './core/terminal-focus-state.js' export { supportsHyperlinks } from './core/supports-hyperlinks.js' // ============================================================ @@ -112,7 +167,11 @@ export { default as BaseBox } from './components/Box.js' export type { Props as BaseBoxProps } from './components/Box.js' export { default as BaseText } from './components/Text.js' export type { Props as BaseTextProps } from './components/Text.js' -export { default as Button, type ButtonState, type Props as ButtonProps } from './components/Button.js' +export { + default as Button, + type ButtonState, + type Props as ButtonProps, +} from './components/Button.js' export { default as Link } from './components/Link.js' export type { Props as LinkProps } from './components/Link.js' export { default as Newline } from './components/Newline.js' @@ -120,13 +179,19 @@ export type { Props as NewlineProps } from './components/Newline.js' export { default as Spacer } from './components/Spacer.js' export { NoSelect } from './components/NoSelect.js' export { RawAnsi } from './components/RawAnsi.js' -export { default as ScrollBox, type ScrollBoxHandle } from './components/ScrollBox.js' +export { + default as ScrollBox, + type ScrollBoxHandle, +} from './components/ScrollBox.js' export { AlternateScreen } from './components/AlternateScreen.js' // App types export type { Props as AppProps } from './components/AppContext.js' export type { Props as StdinProps } from './components/StdinContext.js' -export { TerminalSizeContext, type TerminalSize } from './components/TerminalSizeContext.js' +export { + TerminalSizeContext, + type TerminalSize, +} from './components/TerminalSizeContext.js' // ============================================================ // Hooks @@ -140,14 +205,21 @@ export { default as useStdin } from './hooks/use-stdin.js' export { useTerminalSize } from './hooks/useTerminalSize.js' export { useTimeout } from './hooks/useTimeout.js' export { useMinDisplayTime } from './hooks/useMinDisplayTime.js' -export { useDoublePress, DOUBLE_PRESS_TIMEOUT_MS } from './hooks/useDoublePress.js' +export { + useDoublePress, + DOUBLE_PRESS_TIMEOUT_MS, +} from './hooks/useDoublePress.js' export { useTabStatus, type TabStatusKind } from './hooks/use-tab-status.js' export { useTerminalFocus } from './hooks/use-terminal-focus.js' export { useTerminalTitle } from './hooks/use-terminal-title.js' export { useTerminalViewport } from './hooks/use-terminal-viewport.js' export { useSearchHighlight } from './hooks/use-search-highlight.js' export { useDeclaredCursor } from './hooks/use-declared-cursor.js' -export { TerminalWriteProvider, useTerminalNotification, type TerminalNotification } from './hooks/useTerminalNotification.js' +export { + TerminalWriteProvider, + useTerminalNotification, + type TerminalNotification, +} from './hooks/useTerminalNotification.js' // ============================================================ // Theme (Layer 3) diff --git a/packages/@ant/ink/src/keybindings/KeybindingContext.tsx b/packages/@ant/ink/src/keybindings/KeybindingContext.tsx index 8cd56a408..4365f3cdb 100644 --- a/packages/@ant/ink/src/keybindings/KeybindingContext.tsx +++ b/packages/@ant/ink/src/keybindings/KeybindingContext.tsx @@ -1,84 +1,63 @@ -import React, { - createContext, - type RefObject, - useContext, - useLayoutEffect, - useMemo, -} from 'react' -import type { Key } from '../core/events/input-event.js' -import { - type ChordResolveResult, - getBindingDisplayText, - resolveKeyWithChordState, -} from './resolver.js' -import type { - KeybindingContextName, - ParsedBinding, - ParsedKeystroke, -} from './types.js' +import React, { createContext, type RefObject, useContext, useLayoutEffect, useMemo } from 'react'; +import type { Key } from '../core/events/input-event.js'; +import { type ChordResolveResult, getBindingDisplayText, resolveKeyWithChordState } from './resolver.js'; +import type { KeybindingContextName, ParsedBinding, ParsedKeystroke } from './types.js'; /** Handler registration for action callbacks */ type HandlerRegistration = { - action: string - context: KeybindingContextName - handler: () => void -} + action: string; + context: KeybindingContextName; + handler: () => void; +}; type KeybindingContextValue = { /** Resolve a key input to an action name (with chord support) */ - resolve: ( - input: string, - key: Key, - activeContexts: KeybindingContextName[], - ) => ChordResolveResult + resolve: (input: string, key: Key, activeContexts: KeybindingContextName[]) => ChordResolveResult; /** Update the pending chord state */ - setPendingChord: (pending: ParsedKeystroke[] | null) => void + setPendingChord: (pending: ParsedKeystroke[] | null) => void; /** Get display text for an action (e.g., "ctrl+t") */ - getDisplayText: ( - action: string, - context: KeybindingContextName, - ) => string | undefined + getDisplayText: (action: string, context: KeybindingContextName) => string | undefined; /** All parsed bindings (for help display) */ - bindings: ParsedBinding[] + bindings: ParsedBinding[]; /** Current pending chord keystrokes (null if not in a chord) */ - pendingChord: ParsedKeystroke[] | null + pendingChord: ParsedKeystroke[] | null; /** Currently active keybinding contexts (for priority resolution) */ - activeContexts: Set + activeContexts: Set; /** Register a context as active (call on mount) */ - registerActiveContext: (context: KeybindingContextName) => void + registerActiveContext: (context: KeybindingContextName) => void; /** Unregister a context (call on unmount) */ - unregisterActiveContext: (context: KeybindingContextName) => void + unregisterActiveContext: (context: KeybindingContextName) => void; /** Register a handler for an action (used by useKeybinding) */ - registerHandler: (registration: HandlerRegistration) => () => void + registerHandler: (registration: HandlerRegistration) => () => void; /** Invoke all handlers for an action (used by ChordInterceptor) */ - invokeAction: (action: string) => boolean -} + invokeAction: (action: string) => boolean; +}; -const KeybindingContext = createContext(null) +const KeybindingContext = createContext(null); type ProviderProps = { - bindings: ParsedBinding[] + bindings: ParsedBinding[]; /** Ref for immediate access to pending chord (avoids React state delay) */ - pendingChordRef: RefObject + pendingChordRef: RefObject; /** State value for re-renders (UI updates) */ - pendingChord: ParsedKeystroke[] | null - setPendingChord: (pending: ParsedKeystroke[] | null) => void - activeContexts: Set - registerActiveContext: (context: KeybindingContextName) => void - unregisterActiveContext: (context: KeybindingContextName) => void + pendingChord: ParsedKeystroke[] | null; + setPendingChord: (pending: ParsedKeystroke[] | null) => void; + activeContexts: Set; + registerActiveContext: (context: KeybindingContextName) => void; + unregisterActiveContext: (context: KeybindingContextName) => void; /** Ref to handler registry (used by ChordInterceptor) */ - handlerRegistryRef: RefObject>> - children: React.ReactNode -} + handlerRegistryRef: RefObject>>; + children: React.ReactNode; +}; export function KeybindingProvider({ bindings, @@ -93,60 +72,54 @@ export function KeybindingProvider({ }: ProviderProps): React.ReactNode { const value = useMemo(() => { const getDisplay = (action: string, context: KeybindingContextName) => - getBindingDisplayText(action, context, bindings) + getBindingDisplayText(action, context, bindings); // Register a handler for an action const registerHandler = (registration: HandlerRegistration) => { - const registry = handlerRegistryRef.current - if (!registry) return () => {} + const registry = handlerRegistryRef.current; + if (!registry) return () => {}; if (!registry.has(registration.action)) { - registry.set(registration.action, new Set()) + registry.set(registration.action, new Set()); } - registry.get(registration.action)!.add(registration) + registry.get(registration.action)!.add(registration); // Return unregister function return () => { - const handlers = registry.get(registration.action) + const handlers = registry.get(registration.action); if (handlers) { - handlers.delete(registration) + handlers.delete(registration); if (handlers.size === 0) { - registry.delete(registration.action) + registry.delete(registration.action); } } - } - } + }; + }; // Invoke all handlers for an action const invokeAction = (action: string): boolean => { - const registry = handlerRegistryRef.current - if (!registry) return false + const registry = handlerRegistryRef.current; + if (!registry) return false; - const handlers = registry.get(action) - if (!handlers || handlers.size === 0) return false + const handlers = registry.get(action); + if (!handlers || handlers.size === 0) return false; // Find handlers whose context is active for (const registration of handlers) { if (activeContexts.has(registration.context)) { - registration.handler() - return true + registration.handler(); + return true; } } - return false - } + return false; + }; return { // Use ref for immediate access to pending chord, avoiding React state delay // This is critical for chord sequences where the second key might be pressed // before React re-renders with the updated pendingChord state resolve: (input, key, contexts) => - resolveKeyWithChordState( - input, - key, - contexts, - bindings, - pendingChordRef.current, - ), + resolveKeyWithChordState(input, key, contexts, bindings, pendingChordRef.current), setPendingChord, getDisplayText: getDisplay, bindings, @@ -156,7 +129,7 @@ export function KeybindingProvider({ unregisterActiveContext, registerHandler, invokeAction, - } + }; }, [ bindings, pendingChordRef, @@ -166,23 +139,17 @@ export function KeybindingProvider({ registerActiveContext, unregisterActiveContext, handlerRegistryRef, - ]) + ]); - return ( - - {children} - - ) + return {children}; } export function useKeybindingContext(): KeybindingContextValue { - const ctx = useContext(KeybindingContext) + const ctx = useContext(KeybindingContext); if (!ctx) { - throw new Error( - 'useKeybindingContext must be used within KeybindingProvider', - ) + throw new Error('useKeybindingContext must be used within KeybindingProvider'); } - return ctx + return ctx; } /** @@ -190,7 +157,7 @@ export function useKeybindingContext(): KeybindingContextValue { * Useful for components that may render before provider is available. */ export function useOptionalKeybindingContext(): KeybindingContextValue | null { - return useContext(KeybindingContext) + return useContext(KeybindingContext); } /** @@ -208,18 +175,15 @@ export function useOptionalKeybindingContext(): KeybindingContextValue | null { * } * ``` */ -export function useRegisterKeybindingContext( - context: KeybindingContextName, - isActive: boolean = true, -): void { - const keybindingContext = useOptionalKeybindingContext() +export function useRegisterKeybindingContext(context: KeybindingContextName, isActive: boolean = true): void { + const keybindingContext = useOptionalKeybindingContext(); useLayoutEffect(() => { - if (!keybindingContext || !isActive) return + if (!keybindingContext || !isActive) return; - keybindingContext.registerActiveContext(context) + keybindingContext.registerActiveContext(context); return () => { - keybindingContext.unregisterActiveContext(context) - } - }, [context, keybindingContext, isActive]) + keybindingContext.unregisterActiveContext(context); + }; + }, [context, keybindingContext, isActive]); } diff --git a/packages/@ant/ink/src/keybindings/KeybindingSetup.tsx b/packages/@ant/ink/src/keybindings/KeybindingSetup.tsx index 91c047de8..2a6b69d21 100644 --- a/packages/@ant/ink/src/keybindings/KeybindingSetup.tsx +++ b/packages/@ant/ink/src/keybindings/KeybindingSetup.tsx @@ -5,49 +5,47 @@ * wrapper. App-specific dependencies (binding loading, change subscription, * warning display, debug logging) are injected via props. */ -import React, { useCallback, useEffect, useRef, useState } from 'react' -import type { InputEvent } from '../core/events/input-event.js' +import React, { useCallback, useEffect, useRef, useState } from 'react'; +import type { InputEvent } from '../core/events/input-event.js'; // ChordInterceptor intentionally uses useInput to intercept all keystrokes before // other handlers process them - this is required for chord sequence support // eslint-disable-next-line custom-rules/prefer-use-keybindings -import useInput from '../hooks/use-input.js' -import type { Key } from '../core/events/input-event.js' -import { KeybindingProvider } from './KeybindingContext.js' -import { resolveKeyWithChordState } from './resolver.js' +import useInput from '../hooks/use-input.js'; +import type { Key } from '../core/events/input-event.js'; +import { KeybindingProvider } from './KeybindingContext.js'; +import { resolveKeyWithChordState } from './resolver.js'; import type { KeybindingContextName, KeybindingsLoadResult, ParsedBinding, ParsedKeystroke, KeybindingWarning, -} from './types.js' +} from './types.js'; /** * Timeout for chord sequences in milliseconds. * If the user doesn't complete the chord within this time, it's cancelled. */ -const CHORD_TIMEOUT_MS = 1000 +const CHORD_TIMEOUT_MS = 1000; export type KeybindingSetupProps = { - children: React.ReactNode + children: React.ReactNode; /** Load bindings synchronously for initial render */ - loadBindings: () => KeybindingsLoadResult + loadBindings: () => KeybindingsLoadResult; /** Subscribe to binding changes; return an unsubscribe function */ - subscribeToChanges: ( - callback: (result: KeybindingsLoadResult) => void, - ) => () => void + subscribeToChanges: (callback: (result: KeybindingsLoadResult) => void) => () => void; /** Initialize any file watcher (idempotent). Called once on mount. */ - initWatcher?: () => void | Promise + initWatcher?: () => void | Promise; /** Optional callback when warnings are emitted (initial load or reload) */ - onWarnings?: (warnings: KeybindingWarning[], isReload: boolean) => void + onWarnings?: (warnings: KeybindingWarning[], isReload: boolean) => void; /** Optional debug logger */ - onDebugLog?: (message: string) => void -} + onDebugLog?: (message: string) => void; +}; export function KeybindingSetup({ children, @@ -59,115 +57,105 @@ export function KeybindingSetup({ }: KeybindingSetupProps): React.ReactNode { // Load bindings synchronously for initial render const [loadResult, setLoadResult] = useState(() => { - const result = loadBindings() + const result = loadBindings(); onDebugLog?.( `[keybindings] KeybindingSetup initialized with ${result.bindings.length} bindings, ${result.warnings.length} warnings`, - ) - return result - }) + ); + return result; + }); - const { bindings, warnings } = loadResult + const { bindings, warnings } = loadResult; // Track if this is a reload (not initial load) - const [isReload, setIsReload] = useState(false) + const [isReload, setIsReload] = useState(false); // Notify about warnings useEffect(() => { - onWarnings?.(warnings, isReload) - }, [warnings, isReload, onWarnings]) + onWarnings?.(warnings, isReload); + }, [warnings, isReload, onWarnings]); // Chord state management - use ref for immediate access, state for re-renders - const pendingChordRef = useRef(null) - const [pendingChord, setPendingChordState] = useState< - ParsedKeystroke[] | null - >(null) - const chordTimeoutRef = useRef(null) + const pendingChordRef = useRef(null); + const [pendingChord, setPendingChordState] = useState(null); + const chordTimeoutRef = useRef(null); // Handler registry for action callbacks (used by ChordInterceptor to invoke handlers) const handlerRegistryRef = useRef( new Map< string, Set<{ - action: string - context: KeybindingContextName - handler: () => void + action: string; + context: KeybindingContextName; + handler: () => void; }> >(), - ) + ); // Active context tracking for keybinding priority resolution - const activeContextsRef = useRef>(new Set()) + const activeContextsRef = useRef>(new Set()); - const registerActiveContext = useCallback( - (context: KeybindingContextName) => { - activeContextsRef.current.add(context) - }, - [], - ) + const registerActiveContext = useCallback((context: KeybindingContextName) => { + activeContextsRef.current.add(context); + }, []); - const unregisterActiveContext = useCallback( - (context: KeybindingContextName) => { - activeContextsRef.current.delete(context) - }, - [], - ) + const unregisterActiveContext = useCallback((context: KeybindingContextName) => { + activeContextsRef.current.delete(context); + }, []); // Clear chord timeout when component unmounts or chord changes const clearChordTimeout = useCallback(() => { if (chordTimeoutRef.current) { - clearTimeout(chordTimeoutRef.current) - chordTimeoutRef.current = null + clearTimeout(chordTimeoutRef.current); + chordTimeoutRef.current = null; } - }, []) + }, []); // Wrapper for setPendingChord that manages timeout and syncs ref+state const setPendingChord = useCallback( (pending: ParsedKeystroke[] | null) => { - clearChordTimeout() + clearChordTimeout(); if (pending !== null) { // Set timeout to cancel chord if not completed chordTimeoutRef.current = setTimeout( (pendingChordRef, setPendingChordState) => { - onDebugLog?.('[keybindings] Chord timeout - cancelling') - pendingChordRef.current = null - setPendingChordState(null) + onDebugLog?.('[keybindings] Chord timeout - cancelling'); + pendingChordRef.current = null; + setPendingChordState(null); }, CHORD_TIMEOUT_MS, pendingChordRef, setPendingChordState, - ) + ); } // Update ref immediately for synchronous access in resolve() - pendingChordRef.current = pending + pendingChordRef.current = pending; // Update state to trigger re-renders for UI updates - setPendingChordState(pending) + setPendingChordState(pending); }, [clearChordTimeout, onDebugLog], - ) + ); useEffect(() => { // Initialize file watcher (idempotent - only runs once) - void initWatcher?.() + void initWatcher?.(); // Subscribe to changes const unsubscribe = subscribeToChanges(result => { // Any callback invocation is a reload since initial load happens // synchronously in useState, not via this subscription - setIsReload(true) + setIsReload(true); - setLoadResult(result) - onDebugLog?.( - `[keybindings] Reloaded: ${result.bindings.length} bindings, ${result.warnings.length} warnings`, - ) - }) + setLoadResult(result); + onDebugLog?.(`[keybindings] Reloaded: ${result.bindings.length} bindings, ${result.warnings.length} warnings`); + }); return () => { - unsubscribe() - clearChordTimeout() - } - }, [subscribeToChanges, initWatcher, clearChordTimeout, onDebugLog]) + unsubscribe(); + clearChordTimeout(); + }; + }, [subscribeToChanges, initWatcher, clearChordTimeout, onDebugLog]); return ( {children} - ) + ); } /** @@ -203,10 +191,10 @@ export function KeybindingSetup({ * system could recognize it as completing a chord. */ type HandlerRegistration = { - action: string - context: KeybindingContextName - handler: () => void -} + action: string; + context: KeybindingContextName; + handler: () => void; +}; function ChordInterceptor({ bindings, @@ -215,11 +203,11 @@ function ChordInterceptor({ activeContexts, handlerRegistryRef, }: { - bindings: ParsedBinding[] - pendingChordRef: React.RefObject - setPendingChord: (pending: ParsedKeystroke[] | null) => void - activeContexts: Set - handlerRegistryRef: React.RefObject>> + bindings: ParsedBinding[]; + pendingChordRef: React.RefObject; + setPendingChord: (pending: ParsedKeystroke[] | null) => void; + activeContexts: Set; + handlerRegistryRef: React.RefObject>>; }): null { const handleInput = useCallback( (input: string, key: Key, event: InputEvent) => { @@ -228,94 +216,78 @@ function ChordInterceptor({ // here. Skip the registry scan. Mid-chord wheel still falls through so // scrolling cancels the pending chord like any other non-matching key. if ((key.wheelUp || key.wheelDown) && pendingChordRef.current === null) { - return + return; } // Build context list from registered handlers + activeContexts + Global - const registry = handlerRegistryRef.current - const handlerContexts = new Set() + const registry = handlerRegistryRef.current; + const handlerContexts = new Set(); if (registry) { for (const handlers of registry.values()) { for (const registration of handlers) { - handlerContexts.add(registration.context) + handlerContexts.add(registration.context); } } } - const contexts: KeybindingContextName[] = [ - ...handlerContexts, - ...activeContexts, - 'Global', - ] + const contexts: KeybindingContextName[] = [...handlerContexts, ...activeContexts, 'Global']; // Track whether we're completing a chord (pending was non-null) - const wasInChord = pendingChordRef.current !== null + const wasInChord = pendingChordRef.current !== null; // Check if this keystroke is part of a chord sequence - const result = resolveKeyWithChordState( - input, - key, - contexts, - bindings, - pendingChordRef.current, - ) + const result = resolveKeyWithChordState(input, key, contexts, bindings, pendingChordRef.current); switch (result.type) { case 'chord_started': // This key starts a chord - store pending state and stop propagation - setPendingChord(result.pending) - event.stopImmediatePropagation() - break + setPendingChord(result.pending); + event.stopImmediatePropagation(); + break; case 'match': { // Clear pending state - setPendingChord(null) + setPendingChord(null); // Only invoke handlers and stop propagation for chord completions // (multi-keystroke sequences). Single-keystroke matches should propagate // to per-hook handlers to avoid interfering with other input handling. if (wasInChord) { - const contextsSet = new Set(contexts) + const contextsSet = new Set(contexts); if (registry) { - const handlers = registry.get(result.action) + const handlers = registry.get(result.action); if (handlers && handlers.size > 0) { for (const registration of handlers) { if (contextsSet.has(registration.context)) { - registration.handler() - event.stopImmediatePropagation() - break + registration.handler(); + event.stopImmediatePropagation(); + break; } } } } } - break + break; } case 'chord_cancelled': - setPendingChord(null) - event.stopImmediatePropagation() - break + setPendingChord(null); + event.stopImmediatePropagation(); + break; case 'unbound': - setPendingChord(null) - event.stopImmediatePropagation() - break + setPendingChord(null); + event.stopImmediatePropagation(); + break; case 'none': // No chord involvement - let other handlers process - break + break; } }, - [ - bindings, - pendingChordRef, - setPendingChord, - activeContexts, - handlerRegistryRef, - ], - ) + [bindings, pendingChordRef, setPendingChord, activeContexts, handlerRegistryRef], + ); - useInput(handleInput) + useInput(handleInput); - return null + return null; } diff --git a/packages/@ant/ink/src/theme/Byline.tsx b/packages/@ant/ink/src/theme/Byline.tsx index bb34e52fc..ca6630530 100644 --- a/packages/@ant/ink/src/theme/Byline.tsx +++ b/packages/@ant/ink/src/theme/Byline.tsx @@ -1,10 +1,10 @@ -import React, { Children, isValidElement } from 'react' -import { Text } from '../index.js' +import React, { Children, isValidElement } from 'react'; +import { Text } from '../index.js'; type Props = { /** The items to join with a middot separator */ - children: React.ReactNode -} + children: React.ReactNode; +}; /** * Joins children with a middot separator (" · ") for inline metadata display. @@ -36,22 +36,20 @@ type Props = { */ export function Byline({ children }: Props): React.ReactNode { // Children.toArray already filters out null, undefined, and booleans - const validChildren = Children.toArray(children) + const validChildren = Children.toArray(children); if (validChildren.length === 0) { - return null + return null; } return ( <> {validChildren.map((child, index) => ( - + {index > 0 && · } {child} ))} - ) + ); } diff --git a/packages/@ant/ink/src/theme/ConfigurableShortcutHint.tsx b/packages/@ant/ink/src/theme/ConfigurableShortcutHint.tsx index 85911d387..683fb45fb 100644 --- a/packages/@ant/ink/src/theme/ConfigurableShortcutHint.tsx +++ b/packages/@ant/ink/src/theme/ConfigurableShortcutHint.tsx @@ -6,30 +6,18 @@ * internal theme components. */ -import React from 'react' -import { KeyboardShortcutHint } from './KeyboardShortcutHint.js' +import React from 'react'; +import { KeyboardShortcutHint } from './KeyboardShortcutHint.js'; type Props = { - action: string - context: string - fallback: string - description: string - parens?: boolean - bold?: boolean -} + action: string; + context: string; + fallback: string; + description: string; + parens?: boolean; + bold?: boolean; +}; -export function ConfigurableShortcutHint({ - fallback, - description, - parens, - bold, -}: Props): React.ReactNode { - return ( - - ) +export function ConfigurableShortcutHint({ fallback, description, parens, bold }: Props): React.ReactNode { + return ; } diff --git a/packages/@ant/ink/src/theme/Dialog.tsx b/packages/@ant/ink/src/theme/Dialog.tsx index caf6166a0..4a396cead 100644 --- a/packages/@ant/ink/src/theme/Dialog.tsx +++ b/packages/@ant/ink/src/theme/Dialog.tsx @@ -1,26 +1,23 @@ -import React from 'react' -import { - type ExitState, - useExitOnCtrlCDWithKeybindings, -} from '../hooks/useExitOnCtrlCD.js' -import { Box, Text } from '../index.js' -import { useKeybinding } from '../keybindings/useKeybinding.js' -import type { Theme } from './theme-types.js' -import { ConfigurableShortcutHint } from './ConfigurableShortcutHint.js' -import { Byline } from './Byline.js' -import { KeyboardShortcutHint } from './KeyboardShortcutHint.js' -import { Pane } from './Pane.js' +import React from 'react'; +import { type ExitState, useExitOnCtrlCDWithKeybindings } from '../hooks/useExitOnCtrlCD.js'; +import { Box, Text } from '../index.js'; +import { useKeybinding } from '../keybindings/useKeybinding.js'; +import type { Theme } from './theme-types.js'; +import { ConfigurableShortcutHint } from './ConfigurableShortcutHint.js'; +import { Byline } from './Byline.js'; +import { KeyboardShortcutHint } from './KeyboardShortcutHint.js'; +import { Pane } from './Pane.js'; type DialogProps = { - title: React.ReactNode - subtitle?: React.ReactNode - children: React.ReactNode - onCancel: () => void - color?: keyof Theme - hideInputGuide?: boolean - hideBorder?: boolean + title: React.ReactNode; + subtitle?: React.ReactNode; + children: React.ReactNode; + onCancel: () => void; + color?: keyof Theme; + hideInputGuide?: boolean; + hideBorder?: boolean; /** Custom input guide content. Receives exitState for Ctrl+C/D pending display. */ - inputGuide?: (exitState: ExitState) => React.ReactNode + inputGuide?: (exitState: ExitState) => React.ReactNode; /** * Controls whether Dialog's built-in confirm:no (Esc/n) and app:exit/interrupt * (Ctrl-C/D) keybindings are active. Set to `false` while an embedded text @@ -28,8 +25,8 @@ type DialogProps = { * consumed by Dialog. TextInput has its own ctrl+c/d handlers (cancel on * press, delete-forward on ctrl+d with text). Defaults to `true`. */ - isCancelActive?: boolean -} + isCancelActive?: boolean; +}; export function Dialog({ title, @@ -42,11 +39,7 @@ export function Dialog({ inputGuide, isCancelActive = true, }: DialogProps): React.ReactNode { - const exitState = useExitOnCtrlCDWithKeybindings( - undefined, - undefined, - isCancelActive, - ) + const exitState = useExitOnCtrlCDWithKeybindings(undefined, undefined, isCancelActive); // Use configurable keybinding for ESC to cancel. // isCancelActive lets consumers (e.g. ElicitationDialog) disable this while @@ -55,21 +48,16 @@ export function Dialog({ useKeybinding('confirm:no', onCancel, { context: 'Confirmation', isActive: isCancelActive, - }) + }); const defaultInputGuide = exitState.pending ? ( Press {exitState.keyName} again to exit ) : ( - + - ) + ); const content = ( <> @@ -90,11 +78,11 @@ export function Dialog({ )} - ) + ); if (hideBorder) { - return content + return content; } - return {content} + return {content}; } diff --git a/packages/@ant/ink/src/theme/Divider.tsx b/packages/@ant/ink/src/theme/Divider.tsx index 077546a75..1792a1fdf 100644 --- a/packages/@ant/ink/src/theme/Divider.tsx +++ b/packages/@ant/ink/src/theme/Divider.tsx @@ -1,33 +1,33 @@ -import React from 'react' -import { useTerminalSize } from '../hooks/useTerminalSize.js' -import { stringWidth } from '../core/stringWidth.js' -import { Ansi, Text } from '../index.js' -import type { Theme } from './theme-types.js' +import React from 'react'; +import { useTerminalSize } from '../hooks/useTerminalSize.js'; +import { stringWidth } from '../core/stringWidth.js'; +import { Ansi, Text } from '../index.js'; +import type { Theme } from './theme-types.js'; type DividerProps = { /** * Width of the divider in characters. * Defaults to terminal width. */ - width?: number + width?: number; /** * Theme color for the divider. * If not provided, dimColor is used. */ - color?: keyof Theme + color?: keyof Theme; /** * Character to use for the divider line. * @default '─' */ - char?: string + char?: string; /** * Padding to subtract from the width (e.g., for indentation). * @default 0 */ - padding?: number + padding?: number; /** * Title shown in the middle of the divider. @@ -37,8 +37,8 @@ type DividerProps = { * // ─────────── Title ─────────── * */ - title?: string -} + title?: string; +}; /** * A horizontal divider line. @@ -63,21 +63,15 @@ type DividerProps = { * // With centered title * */ -export function Divider({ - width, - color, - char = '─', - padding = 0, - title, -}: DividerProps): React.ReactNode { - const { columns: terminalWidth } = useTerminalSize() - const effectiveWidth = Math.max(0, (width ?? terminalWidth) - padding) +export function Divider({ width, color, char = '─', padding = 0, title }: DividerProps): React.ReactNode { + const { columns: terminalWidth } = useTerminalSize(); + const effectiveWidth = Math.max(0, (width ?? terminalWidth) - padding); if (title) { - const titleWidth = stringWidth(title) + 2 // +2 for spaces around title - const sideWidth = Math.max(0, effectiveWidth - titleWidth) - const leftWidth = Math.floor(sideWidth / 2) - const rightWidth = sideWidth - leftWidth + const titleWidth = stringWidth(title) + 2; // +2 for spaces around title + const sideWidth = Math.max(0, effectiveWidth - titleWidth); + const leftWidth = Math.floor(sideWidth / 2); + const rightWidth = sideWidth - leftWidth; return ( {char.repeat(leftWidth)}{' '} @@ -86,12 +80,12 @@ export function Divider({ {' '} {char.repeat(rightWidth)} - ) + ); } return ( {char.repeat(effectiveWidth)} - ) + ); } diff --git a/packages/@ant/ink/src/theme/FuzzyPicker.tsx b/packages/@ant/ink/src/theme/FuzzyPicker.tsx index 642c09e7d..86ce0287b 100644 --- a/packages/@ant/ink/src/theme/FuzzyPicker.tsx +++ b/packages/@ant/ink/src/theme/FuzzyPicker.tsx @@ -1,72 +1,72 @@ -import * as React from 'react' -import { useEffect, useState } from 'react' -import { useSearchInput } from '../hooks/useSearchInput.js' -import { useTerminalSize } from '../hooks/useTerminalSize.js' -import type { KeyboardEvent } from '../core/events/keyboard-event.js' -import { clamp } from '../core/layout/geometry.js' -import { Box, Text, useTerminalFocus } from '../index.js' -import { SearchBox } from './SearchBox.js' -import { Byline } from './Byline.js' -import { KeyboardShortcutHint } from './KeyboardShortcutHint.js' -import { ListItem } from './ListItem.js' -import { Pane } from './Pane.js' +import * as React from 'react'; +import { useEffect, useState } from 'react'; +import { useSearchInput } from '../hooks/useSearchInput.js'; +import { useTerminalSize } from '../hooks/useTerminalSize.js'; +import type { KeyboardEvent } from '../core/events/keyboard-event.js'; +import { clamp } from '../core/layout/geometry.js'; +import { Box, Text, useTerminalFocus } from '../index.js'; +import { SearchBox } from './SearchBox.js'; +import { Byline } from './Byline.js'; +import { KeyboardShortcutHint } from './KeyboardShortcutHint.js'; +import { ListItem } from './ListItem.js'; +import { Pane } from './Pane.js'; type PickerAction = { /** Hint label shown in the byline, e.g. "mention" → "Tab to mention". */ - action: string - handler: (item: T) => void -} + action: string; + handler: (item: T) => void; +}; type Props = { - title: string - placeholder?: string - initialQuery?: string - items: readonly T[] - getKey: (item: T) => string + title: string; + placeholder?: string; + initialQuery?: string; + items: readonly T[]; + getKey: (item: T) => string; /** Keep to one line — preview handles overflow. */ - renderItem: (item: T, isFocused: boolean) => React.ReactNode - renderPreview?: (item: T) => React.ReactNode + renderItem: (item: T, isFocused: boolean) => React.ReactNode; + renderPreview?: (item: T) => React.ReactNode; /** 'right' keeps hints stable (no bounce), but needs width. */ - previewPosition?: 'bottom' | 'right' - visibleCount?: number + previewPosition?: 'bottom' | 'right'; + visibleCount?: number; /** * 'up' puts items[0] at the bottom next to the input (atuin-style). Arrows * always match screen direction — ↑ walks visually up regardless. */ - direction?: 'down' | 'up' + direction?: 'down' | 'up'; /** Caller owns filtering: re-filter on each call and pass new items. */ - onQueryChange: (query: string) => void + onQueryChange: (query: string) => void; /** Enter key. Primary action. */ - onSelect: (item: T) => void + onSelect: (item: T) => void; /** * Tab key. If provided, Tab no longer aliases Enter — it gets its own * handler and hint. Shift+Tab falls through to this if onShiftTab is unset. */ - onTab?: PickerAction + onTab?: PickerAction; /** Shift+Tab key. Gets its own hint. */ - onShiftTab?: PickerAction + onShiftTab?: PickerAction; /** * Fires when the focused item changes (via arrows or when items reset). * Useful for async preview loading — keeps I/O out of renderPreview. */ - onFocus?: (item: T | undefined) => void - onCancel: () => void + onFocus?: (item: T | undefined) => void; + onCancel: () => void; /** Shown when items is empty. Caller bakes loading/searching state into this. */ - emptyMessage?: string | ((query: string) => string) + emptyMessage?: string | ((query: string) => string); /** * Status line below the list, e.g. "500+ matches" or "42 matches…". * Caller decides when to show it — pass undefined to hide. */ - matchLabel?: string - selectAction?: string - extraHints?: React.ReactNode -} + matchLabel?: string; + selectAction?: string; + extraHints?: React.ReactNode; +}; -const DEFAULT_VISIBLE = 8 +const DEFAULT_VISIBLE = 8; // Pane (paddingTop + Divider) + title + 3 gaps + SearchBox (rounded border = 3 // rows) + hints. matchLabel adds +1 when present, accounted for separately. -const CHROME_ROWS = 10 -const MIN_VISIBLE = 2 +const CHROME_ROWS = 10; +const MIN_VISIBLE = 2; export function FuzzyPicker({ title, @@ -90,25 +90,22 @@ export function FuzzyPicker({ selectAction = 'select', extraHints, }: Props): React.ReactNode { - const isTerminalFocused = useTerminalFocus() - const { rows, columns } = useTerminalSize() - const [focusedIndex, setFocusedIndex] = useState(0) + const isTerminalFocused = useTerminalFocus(); + const { rows, columns } = useTerminalSize(); + const [focusedIndex, setFocusedIndex] = useState(0); // Cap visibleCount so the picker never exceeds the terminal height. When it // overflows, each re-render (arrow key, ctrl+p) mis-positions the cursor-up // by the overflow amount and a previously-drawn line flashes blank. - const visibleCount = Math.max( - MIN_VISIBLE, - Math.min(requestedVisible, rows - CHROME_ROWS - (matchLabel ? 1 : 0)), - ) + const visibleCount = Math.max(MIN_VISIBLE, Math.min(requestedVisible, rows - CHROME_ROWS - (matchLabel ? 1 : 0))); // Full hint row with onTab+onShiftTab is ~100 chars and wraps inconsistently // below that. Compact mode drops shift+tab and shortens labels. - const compact = columns < 120 + const compact = columns < 120; const step = (delta: 1 | -1) => { - setFocusedIndex(i => clamp(i + delta, 0, items.length - 1)) - } + setFocusedIndex(i => clamp(i + delta, 0, items.length - 1)); + }; // onKeyDown fires after useSearchInput's useInput, so onExit must be a // no-op — return/downArrow are handled by handleKeyDown below. onCancel @@ -120,67 +117,62 @@ export function FuzzyPicker({ onCancel, initialQuery, backspaceExitsOnEmpty: false, - }) + }); const handleKeyDown = (e: KeyboardEvent) => { if (e.key === 'up' || (e.ctrl && e.key === 'p')) { - e.preventDefault() - e.stopImmediatePropagation() - step(direction === 'up' ? 1 : -1) - return + e.preventDefault(); + e.stopImmediatePropagation(); + step(direction === 'up' ? 1 : -1); + return; } if (e.key === 'down' || (e.ctrl && e.key === 'n')) { - e.preventDefault() - e.stopImmediatePropagation() - step(direction === 'up' ? -1 : 1) - return + e.preventDefault(); + e.stopImmediatePropagation(); + step(direction === 'up' ? -1 : 1); + return; } if (e.key === 'return') { - e.preventDefault() - e.stopImmediatePropagation() - const selected = items[focusedIndex] - if (selected) onSelect(selected) - return + e.preventDefault(); + e.stopImmediatePropagation(); + const selected = items[focusedIndex]; + if (selected) onSelect(selected); + return; } if (e.key === 'tab') { - e.preventDefault() - e.stopImmediatePropagation() - const selected = items[focusedIndex] - if (!selected) return - const tabAction = e.shift ? (onShiftTab ?? onTab) : onTab + e.preventDefault(); + e.stopImmediatePropagation(); + const selected = items[focusedIndex]; + if (!selected) return; + const tabAction = e.shift ? (onShiftTab ?? onTab) : onTab; if (tabAction) { - tabAction.handler(selected) + tabAction.handler(selected); } else { - onSelect(selected) + onSelect(selected); } } - } + }; useEffect(() => { - onQueryChange(query) - setFocusedIndex(0) + onQueryChange(query); + setFocusedIndex(0); // eslint-disable-next-line react-hooks/exhaustive-deps - }, [query]) + }, [query]); useEffect(() => { - setFocusedIndex(i => clamp(i, 0, items.length - 1)) - }, [items.length]) + setFocusedIndex(i => clamp(i, 0, items.length - 1)); + }, [items.length]); - const focused = items[focusedIndex] + const focused = items[focusedIndex]; useEffect(() => { - onFocus?.(focused) + onFocus?.(focused); // eslint-disable-next-line react-hooks/exhaustive-deps - }, [focused]) + }, [focused]); - const windowStart = clamp( - focusedIndex - visibleCount + 1, - 0, - items.length - visibleCount, - ) - const visible = items.slice(windowStart, windowStart + visibleCount) + const windowStart = clamp(focusedIndex - visibleCount + 1, 0, items.length - visibleCount); + const visible = items.slice(windowStart, windowStart + visibleCount); - const emptyText = - typeof emptyMessage === 'function' ? emptyMessage(query) : emptyMessage + const emptyText = typeof emptyMessage === 'function' ? emptyMessage(query) : emptyMessage; const searchBox = ( ({ isFocused isTerminalFocused={isTerminalFocused} /> - ) + ); const listBlock = ( ({ renderItem={renderItem} emptyText={emptyText} /> - ) + ); const preview = renderPreview && focused ? ( {renderPreview(focused)} - ) : null + ) : null; // Structure must not depend on preview truthiness — when focused goes // undefined (e.g. delete clears matches), switching row→fragment would // change both layout AND gap count, bouncing the searchBox below. const listGroup = renderPreview && previewPosition === 'right' ? ( - + {listBlock} {matchLabel && {matchLabel}} @@ -238,18 +226,12 @@ export function FuzzyPicker({ {matchLabel && {matchLabel}} {preview} - ) + ); - const inputAbove = direction !== 'up' + const inputAbove = direction !== 'up'; return ( - + {title} @@ -258,42 +240,26 @@ export function FuzzyPicker({ {!inputAbove && searchBox} - - - {onTab && ( - - )} - {onShiftTab && !compact && ( - - )} + + + {onTab && } + {onShiftTab && !compact && } {extraHints} - ) + ); } -type ListProps = Pick< - Props, - 'visibleCount' | 'direction' | 'getKey' | 'renderItem' -> & { - visible: readonly T[] - windowStart: number - total: number - focusedIndex: number - emptyText: string -} +type ListProps = Pick, 'visibleCount' | 'direction' | 'getKey' | 'renderItem'> & { + visible: readonly T[]; + windowStart: number; + total: number; + focusedIndex: number; + emptyText: string; +}; function List({ visible, @@ -311,15 +277,14 @@ function List({ {emptyText} - ) + ); } const rows = visible.map((item, i) => { - const actualIndex = windowStart + i - const isFocused = actualIndex === focusedIndex - const atLowEdge = i === 0 && windowStart > 0 - const atHighEdge = - i === visible.length - 1 && windowStart + visibleCount! < total + const actualIndex = windowStart + i; + const isFocused = actualIndex === focusedIndex; + const atLowEdge = i === 0 && windowStart > 0; + const atHighEdge = i === visible.length - 1 && windowStart + visibleCount! < total; return ( ({ > {renderItem(item, isFocused)} - ) - }) + ); + }); return ( - + {rows} - ) + ); } function firstWord(s: string): string { - const i = s.indexOf(' ') - return i === -1 ? s : s.slice(0, i) + const i = s.indexOf(' '); + return i === -1 ? s : s.slice(0, i); } diff --git a/packages/@ant/ink/src/theme/KeyboardShortcutHint.tsx b/packages/@ant/ink/src/theme/KeyboardShortcutHint.tsx index 30d4d2ccc..a3f0a52c6 100644 --- a/packages/@ant/ink/src/theme/KeyboardShortcutHint.tsx +++ b/packages/@ant/ink/src/theme/KeyboardShortcutHint.tsx @@ -1,16 +1,16 @@ -import React from 'react' -import Text from '../components/Text.js' +import React from 'react'; +import Text from '../components/Text.js'; type Props = { /** The key or chord to display (e.g., "ctrl+o", "Enter", "↑/↓") */ - shortcut: string + shortcut: string; /** The action the key performs (e.g., "expand", "select", "navigate") */ - action: string + action: string; /** Whether to wrap the hint in parentheses. Default: false */ - parens?: boolean + parens?: boolean; /** Whether to render the shortcut in bold. Default: false */ - bold?: boolean -} + bold?: boolean; +}; /** * Renders a keyboard shortcut hint like "ctrl+o to expand" or "(tab to toggle)" @@ -35,24 +35,19 @@ type Props = { * * */ -export function KeyboardShortcutHint({ - shortcut, - action, - parens = false, - bold = false, -}: Props): React.ReactNode { - const shortcutText = bold ? {shortcut} : shortcut +export function KeyboardShortcutHint({ shortcut, action, parens = false, bold = false }: Props): React.ReactNode { + const shortcutText = bold ? {shortcut} : shortcut; if (parens) { return ( ({shortcutText} to {action}) - ) + ); } return ( {shortcutText} to {action} - ) + ); } diff --git a/packages/@ant/ink/src/theme/ListItem.tsx b/packages/@ant/ink/src/theme/ListItem.tsx index b74619de1..b1b45a9b5 100644 --- a/packages/@ant/ink/src/theme/ListItem.tsx +++ b/packages/@ant/ink/src/theme/ListItem.tsx @@ -1,44 +1,44 @@ -import figures from 'figures' -import type { ReactNode } from 'react' -import React from 'react' -import { useDeclaredCursor } from '../hooks/use-declared-cursor.js' -import { Box, Text } from '../index.js' +import figures from 'figures'; +import type { ReactNode } from 'react'; +import React from 'react'; +import { useDeclaredCursor } from '../hooks/use-declared-cursor.js'; +import { Box, Text } from '../index.js'; type ListItemProps = { /** * Whether this item is currently focused (keyboard selection). * Shows the pointer indicator (❯) when true. */ - isFocused: boolean + isFocused: boolean; /** * Whether this item is selected (chosen/checked). * Shows the checkmark indicator (✓) when true. * @default false */ - isSelected?: boolean + isSelected?: boolean; /** * The content to display for this item. */ - children: ReactNode + children: ReactNode; /** * Optional description text displayed below the main content. */ - description?: string + description?: string; /** * Show a down arrow indicator instead of pointer (for scroll hints). * Only applies when not focused. */ - showScrollDown?: boolean + showScrollDown?: boolean; /** * Show an up arrow indicator instead of pointer (for scroll hints). * Only applies when not focused. */ - showScrollUp?: boolean + showScrollUp?: boolean; /** * Whether to apply automatic styling to the children based on focus/selection state. @@ -46,21 +46,21 @@ type ListItemProps = { * - When false: children are rendered as-is, allowing custom styling * @default true */ - styled?: boolean + styled?: boolean; /** * Whether this item is disabled. Disabled items show dimmed text and no indicators. * @default false */ - disabled?: boolean + disabled?: boolean; /** * Whether this ListItem should declare the terminal cursor position. * Set false when a child (e.g. BaseTextInput) declares its own cursor. * @default true */ - declareCursor?: boolean -} + declareCursor?: boolean; +}; /** * A list item component for selection UIs (dropdowns, multi-selects, menus). @@ -115,46 +115,46 @@ export function ListItem({ // Determine which indicator to show function renderIndicator(): ReactNode { if (disabled) { - return + return ; } if (isFocused) { - return {figures.pointer} + return {figures.pointer}; } if (showScrollDown) { - return {figures.arrowDown} + return {figures.arrowDown}; } if (showScrollUp) { - return {figures.arrowUp} + return {figures.arrowUp}; } - return + return ; } // Determine text color based on state function getTextColor(): 'success' | 'suggestion' | 'inactive' | undefined { if (disabled) { - return 'inactive' + return 'inactive'; } if (!styled) { - return undefined + return undefined; } if (isSelected) { - return 'success' + return 'success'; } if (isFocused) { - return 'suggestion' + return 'suggestion'; } - return undefined + return undefined; } - const textColor = getTextColor() + const textColor = getTextColor(); // Park the native terminal cursor on the pointer indicator so screen // readers / magnifiers track the focused item. (0,0) is the top-left of @@ -163,7 +163,7 @@ export function ListItem({ line: 0, column: 0, active: isFocused && !disabled && declareCursor !== false, - }) + }); return ( @@ -184,5 +184,5 @@ export function ListItem({ )} - ) + ); } diff --git a/packages/@ant/ink/src/theme/LoadingState.tsx b/packages/@ant/ink/src/theme/LoadingState.tsx index ec1459cee..0357d8648 100644 --- a/packages/@ant/ink/src/theme/LoadingState.tsx +++ b/packages/@ant/ink/src/theme/LoadingState.tsx @@ -1,30 +1,30 @@ -import React from 'react' -import { Box, Text } from '../index.js' -import { Spinner } from './Spinner.js' +import React from 'react'; +import { Box, Text } from '../index.js'; +import { Spinner } from './Spinner.js'; type LoadingStateProps = { /** * The loading message to display next to the spinner. */ - message: string + message: string; /** * Display the message in bold. * @default false */ - bold?: boolean + bold?: boolean; /** * Display the message in dimmed color. * @default false */ - dimColor?: boolean + dimColor?: boolean; /** * Optional subtitle displayed below the main message. */ - subtitle?: string -} + subtitle?: string; +}; /** * A spinner with loading message for async operations. @@ -62,5 +62,5 @@ export function LoadingState({ {subtitle && {subtitle}} - ) + ); } diff --git a/packages/@ant/ink/src/theme/Pane.tsx b/packages/@ant/ink/src/theme/Pane.tsx index d6868faec..f2117d2ae 100644 --- a/packages/@ant/ink/src/theme/Pane.tsx +++ b/packages/@ant/ink/src/theme/Pane.tsx @@ -1,16 +1,16 @@ -import React from 'react' -import { useIsInsideModal } from './modalContext.js' -import { Box } from '../index.js' -import type { Theme } from './theme-types.js' -import { Divider } from './Divider.js' +import React from 'react'; +import { useIsInsideModal } from './modalContext.js'; +import { Box } from '../index.js'; +import type { Theme } from './theme-types.js'; +import { Divider } from './Divider.js'; type PaneProps = { - children: React.ReactNode + children: React.ReactNode; /** * Theme color for the top border line. */ - color?: keyof Theme -} + color?: keyof Theme; +}; /** * A pane — a region of the terminal that appears below the REPL prompt, @@ -44,7 +44,7 @@ export function Pane({ children, color }: PaneProps): React.ReactNode { {children} - ) + ); } return ( @@ -53,5 +53,5 @@ export function Pane({ children, color }: PaneProps): React.ReactNode { {children} - ) + ); } diff --git a/packages/@ant/ink/src/theme/ProgressBar.tsx b/packages/@ant/ink/src/theme/ProgressBar.tsx index 1d5c1f674..5425731fc 100644 --- a/packages/@ant/ink/src/theme/ProgressBar.tsx +++ b/packages/@ant/ink/src/theme/ProgressBar.tsx @@ -1,48 +1,43 @@ -import React from 'react' -import { Text } from '../index.js' -import type { Theme } from './theme-types.js' +import React from 'react'; +import { Text } from '../index.js'; +import type { Theme } from './theme-types.js'; type Props = { /** * How much progress to display, between 0 and 1 inclusive */ - ratio: number // [0, 1] + ratio: number; // [0, 1] /** * How many characters wide to draw the progress bar */ - width: number // how many characters wide + width: number; // how many characters wide /** * Optional color for the filled portion of the bar */ - fillColor?: keyof Theme + fillColor?: keyof Theme; /** * Optional color for the empty portion of the bar */ - emptyColor?: keyof Theme -} + emptyColor?: keyof Theme; +}; -const BLOCKS = [' ', '▏', '▎', '▍', '▌', '▋', '▊', '▉', '█'] +const BLOCKS = [' ', '▏', '▎', '▍', '▌', '▋', '▊', '▉', '█']; -export function ProgressBar({ - ratio: inputRatio, - width, - fillColor, - emptyColor, -}: Props): React.ReactNode { - const ratio = Math.min(1, Math.max(0, inputRatio)) - const whole = Math.floor(ratio * width) - const segments = [BLOCKS[BLOCKS.length - 1]!.repeat(whole)] +export function ProgressBar({ ratio: inputRatio, width, fillColor, emptyColor }: Props): React.ReactNode { + const ratio = Math.min(1, Math.max(0, inputRatio)); + const whole = Math.floor(ratio * width); + const segments = [BLOCKS[BLOCKS.length - 1]!.repeat(whole)]; if (whole < width) { - const remainder = ratio * width - whole - const middle = Math.floor(remainder * BLOCKS.length) - segments.push(BLOCKS[middle]!) + const remainder = ratio * width - whole; + const middle = Math.floor(remainder * BLOCKS.length); + segments.push(BLOCKS[middle]!); - const empty = width - whole - 1 + const empty = width - whole - 1; if (empty > 0) { - segments.push(BLOCKS[0]!.repeat(empty)) + segments.push(BLOCKS[0]!.repeat(empty)); } } @@ -50,5 +45,5 @@ export function ProgressBar({ {segments.join('')} - ) + ); } diff --git a/packages/@ant/ink/src/theme/Ratchet.tsx b/packages/@ant/ink/src/theme/Ratchet.tsx index dbb80c3f4..5ca371b24 100644 --- a/packages/@ant/ink/src/theme/Ratchet.tsx +++ b/packages/@ant/ink/src/theme/Ratchet.tsx @@ -1,39 +1,39 @@ -import React, { useCallback, useLayoutEffect, useRef, useState } from 'react' -import { useTerminalSize } from '../hooks/useTerminalSize.js' -import { useTerminalViewport } from '../hooks/use-terminal-viewport.js' -import { Box, type DOMElement, measureElement } from '../index.js' +import React, { useCallback, useLayoutEffect, useRef, useState } from 'react'; +import { useTerminalSize } from '../hooks/useTerminalSize.js'; +import { useTerminalViewport } from '../hooks/use-terminal-viewport.js'; +import { Box, type DOMElement, measureElement } from '../index.js'; type Props = { - children: React.ReactNode - lock?: 'always' | 'offscreen' -} + children: React.ReactNode; + lock?: 'always' | 'offscreen'; +}; export function Ratchet({ children, lock = 'always' }: Props): React.ReactNode { - const [viewportRef, { isVisible }] = useTerminalViewport() - const { rows } = useTerminalSize() - const innerRef = useRef(null) - const maxHeight = useRef(0) - const [minHeight, setMinHeight] = useState(0) + const [viewportRef, { isVisible }] = useTerminalViewport(); + const { rows } = useTerminalSize(); + const innerRef = useRef(null); + const maxHeight = useRef(0); + const [minHeight, setMinHeight] = useState(0); const outerRef = useCallback( (el: DOMElement | null) => { - viewportRef(el) + viewportRef(el); }, [viewportRef], - ) + ); - const engaged = lock === 'always' || !isVisible + const engaged = lock === 'always' || !isVisible; useLayoutEffect(() => { if (!innerRef.current) { - return + return; } - const { height } = measureElement(innerRef.current) + const { height } = measureElement(innerRef.current); if (height > maxHeight.current) { - maxHeight.current = Math.min(height, rows) - setMinHeight(maxHeight.current) + maxHeight.current = Math.min(height, rows); + setMinHeight(maxHeight.current); } - }) + }); return ( @@ -41,5 +41,5 @@ export function Ratchet({ children, lock = 'always' }: Props): React.ReactNode { {children} - ) + ); } diff --git a/packages/@ant/ink/src/theme/SearchBox.tsx b/packages/@ant/ink/src/theme/SearchBox.tsx index bf716be9e..ca75c4519 100644 --- a/packages/@ant/ink/src/theme/SearchBox.tsx +++ b/packages/@ant/ink/src/theme/SearchBox.tsx @@ -1,16 +1,16 @@ -import React from 'react' -import { Box, Text } from '../index.js' +import React from 'react'; +import { Box, Text } from '../index.js'; type Props = { - query: string - placeholder?: string - isFocused: boolean - isTerminalFocused: boolean - prefix?: string - width?: number | string - cursorOffset?: number - borderless?: boolean -} + query: string; + placeholder?: string; + isFocused: boolean; + isTerminalFocused: boolean; + prefix?: string; + width?: number | string; + cursorOffset?: number; + borderless?: boolean; +}; export function SearchBox({ query, @@ -22,7 +22,7 @@ export function SearchBox({ cursorOffset, borderless = false, }: Props): React.ReactNode { - const offset = cursorOffset ?? query.length + const offset = cursorOffset ?? query.length; return ( {query.slice(0, offset)} - - {offset < query.length ? query[offset] : ' '} - - {offset < query.length && ( - {query.slice(offset + 1)} - )} + {offset < query.length ? query[offset] : ' '} + {offset < query.length && {query.slice(offset + 1)}} ) : ( {query} @@ -67,5 +63,5 @@ export function SearchBox({ )} - ) + ); } diff --git a/packages/@ant/ink/src/theme/Spinner.tsx b/packages/@ant/ink/src/theme/Spinner.tsx index 2d85ec08c..8abd81aec 100644 --- a/packages/@ant/ink/src/theme/Spinner.tsx +++ b/packages/@ant/ink/src/theme/Spinner.tsx @@ -1,20 +1,20 @@ -import React, { useState, useEffect } from 'react' -import { Text } from '../index.js' +import React, { useState, useEffect } from 'react'; +import { Text } from '../index.js'; -const FRAMES = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'] +const FRAMES = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏']; /** * A simple animated spinner for loading states. */ export function Spinner(): React.ReactNode { - const [frame, setFrame] = useState(0) + const [frame, setFrame] = useState(0); useEffect(() => { const timer = setInterval(() => { - setFrame(f => (f + 1) % FRAMES.length) - }, 80) - return () => clearInterval(timer) - }, []) + setFrame(f => (f + 1) % FRAMES.length); + }, 80); + return () => clearInterval(timer); + }, []); - return {FRAMES[frame]} + return {FRAMES[frame]}; } diff --git a/packages/@ant/ink/src/theme/StatusIcon.tsx b/packages/@ant/ink/src/theme/StatusIcon.tsx index 943d6be25..e9ed546d1 100644 --- a/packages/@ant/ink/src/theme/StatusIcon.tsx +++ b/packages/@ant/ink/src/theme/StatusIcon.tsx @@ -1,8 +1,8 @@ -import figures from 'figures' -import React from 'react' -import { Text } from '../index.js' +import figures from 'figures'; +import React from 'react'; +import { Text } from '../index.js'; -type Status = 'success' | 'error' | 'warning' | 'info' | 'pending' | 'loading' +type Status = 'success' | 'error' | 'warning' | 'info' | 'pending' | 'loading'; type Props = { /** @@ -15,19 +15,19 @@ type Props = { * - `pending`: Dimmed circle (○) * - `loading`: Dimmed ellipsis (…) */ - status: Status + status: Status; /** * Include a trailing space after the icon. Useful when followed by text. * @default false */ - withSpace?: boolean -} + withSpace?: boolean; +}; const STATUS_CONFIG: Record< Status, { - icon: string - color: 'success' | 'error' | 'warning' | 'suggestion' | undefined + icon: string; + color: 'success' | 'error' | 'warning' | 'suggestion' | undefined; } > = { success: { icon: figures.tick, color: 'success' }, @@ -36,7 +36,7 @@ const STATUS_CONFIG: Record< info: { icon: figures.info, color: 'suggestion' }, pending: { icon: figures.circle, color: undefined }, loading: { icon: '…', color: undefined }, -} +}; /** * Renders a status indicator icon with appropriate color. @@ -56,16 +56,13 @@ const STATUS_CONFIG: Record< * Waiting for response * */ -export function StatusIcon({ - status, - withSpace = false, -}: Props): React.ReactNode { - const config = STATUS_CONFIG[status] +export function StatusIcon({ status, withSpace = false }: Props): React.ReactNode { + const config = STATUS_CONFIG[status]; return ( {config.icon} {withSpace && ' '} - ) + ); } diff --git a/packages/@ant/ink/src/theme/Tabs.tsx b/packages/@ant/ink/src/theme/Tabs.tsx index df49f4619..250398b7f 100644 --- a/packages/@ant/ink/src/theme/Tabs.tsx +++ b/packages/@ant/ink/src/theme/Tabs.tsx @@ -1,37 +1,28 @@ -import React, { - createContext, - useCallback, - useContext, - useEffect, - useState, -} from 'react' -import { - useIsInsideModal, - useModalScrollRef, -} from './modalContext.js' -import { useTerminalSize } from '../hooks/useTerminalSize.js' -import ScrollBox from '../components/ScrollBox.js' -import type { KeyboardEvent } from '../core/events/keyboard-event.js' -import { stringWidth } from '../core/stringWidth.js' -import { Box, Text } from '../index.js' -import { useKeybindings } from '../keybindings/useKeybinding.js' -import type { Theme } from './theme-types.js' +import React, { createContext, useCallback, useContext, useEffect, useState } from 'react'; +import { useIsInsideModal, useModalScrollRef } from './modalContext.js'; +import { useTerminalSize } from '../hooks/useTerminalSize.js'; +import ScrollBox from '../components/ScrollBox.js'; +import type { KeyboardEvent } from '../core/events/keyboard-event.js'; +import { stringWidth } from '../core/stringWidth.js'; +import { Box, Text } from '../index.js'; +import { useKeybindings } from '../keybindings/useKeybinding.js'; +import type { Theme } from './theme-types.js'; type TabsProps = { - children: Array> - title?: string - color?: keyof Theme - defaultTab?: string - hidden?: boolean - useFullWidth?: boolean + children: Array>; + title?: string; + color?: keyof Theme; + defaultTab?: string; + hidden?: boolean; + useFullWidth?: boolean; /** Controlled mode: current selected tab id/title */ - selectedTab?: string + selectedTab?: string; /** Controlled mode: callback when tab changes */ - onTabChange?: (tabId: string) => void + onTabChange?: (tabId: string) => void; /** Optional banner to display below tabs header */ - banner?: React.ReactNode + banner?: React.ReactNode; /** Disable keyboard navigation (e.g. when a child component handles arrow keys) */ - disableNavigation?: boolean + disableNavigation?: boolean; /** * Initial focus state for the tab header row. Defaults to true (header * focused, nav always works). Keep the default for Select/list content — @@ -40,29 +31,29 @@ type TabsProps = { * content actually binds left/right/tab (e.g. enum cycling), and show a * "↑ tabs" footer hint — without it tabs look broken. */ - initialHeaderFocused?: boolean + initialHeaderFocused?: boolean; /** * Fixed height for the content area. When set, all tabs render within the * same height (overflow hidden) so switching tabs doesn't cause layout * shifts. Shorter tabs get whitespace; taller tabs are clipped. */ - contentHeight?: number + contentHeight?: number; /** * Let Tab/←/→ switch tabs from focused content. Opt-in since some * content uses those keys; pass a reactive boolean to cede them when * needed. Switching from content focuses the header. */ - navFromContent?: boolean -} + navFromContent?: boolean; +}; type TabsContextValue = { - selectedTab: string | undefined - width: number | undefined - headerFocused: boolean - focusHeader: () => void - blurHeader: () => void - registerOptIn: () => () => void -} + selectedTab: string | undefined; + width: number | undefined; + headerFocused: boolean; + focusHeader: () => void; + blurHeader: () => void; + registerOptIn: () => () => void; +}; const TabsContext = createContext({ selectedTab: undefined, @@ -73,7 +64,7 @@ const TabsContext = createContext({ focusHeader: () => {}, blurHeader: () => {}, registerOptIn: () => () => {}, -}) +}); export function Tabs({ title, @@ -90,64 +81,51 @@ export function Tabs({ contentHeight, navFromContent = false, }: TabsProps): React.ReactNode { - const { columns: terminalWidth } = useTerminalSize() - const tabs = children.map(child => [ - child.props.id ?? child.props.title, - child.props.title, - ]) - const defaultTabIndex = defaultTab - ? tabs.findIndex(tab => defaultTab === tab[0]) - : 0 + const { columns: terminalWidth } = useTerminalSize(); + const tabs = children.map(child => [child.props.id ?? child.props.title, child.props.title]); + const defaultTabIndex = defaultTab ? tabs.findIndex(tab => defaultTab === tab[0]) : 0; // Support both controlled and uncontrolled modes - const isControlled = controlledSelectedTab !== undefined - const [internalSelectedTab, setInternalSelectedTab] = useState( - defaultTabIndex !== -1 ? defaultTabIndex : 0, - ) + const isControlled = controlledSelectedTab !== undefined; + const [internalSelectedTab, setInternalSelectedTab] = useState(defaultTabIndex !== -1 ? defaultTabIndex : 0); // In controlled mode, find the index of the controlled tab - const controlledTabIndex = isControlled - ? tabs.findIndex(tab => tab[0] === controlledSelectedTab) - : -1 - const selectedTabIndex = isControlled - ? controlledTabIndex !== -1 - ? controlledTabIndex - : 0 - : internalSelectedTab + const controlledTabIndex = isControlled ? tabs.findIndex(tab => tab[0] === controlledSelectedTab) : -1; + const selectedTabIndex = isControlled ? (controlledTabIndex !== -1 ? controlledTabIndex : 0) : internalSelectedTab; - const modalScrollRef = useModalScrollRef() + const modalScrollRef = useModalScrollRef(); // Header focus: left/right/tab only switch tabs when the header row is // focused. Children with interactive content call focusHeader() (via // useTabHeaderFocus) on up-arrow to hand focus back here; down-arrow // returns it. Tabs that never call the hook see no behavior change — // initialHeaderFocused defaults to true so nav always works. - const [headerFocused, setHeaderFocused] = useState(initialHeaderFocused) - const focusHeader = useCallback(() => setHeaderFocused(true), []) - const blurHeader = useCallback(() => setHeaderFocused(false), []) + const [headerFocused, setHeaderFocused] = useState(initialHeaderFocused); + const focusHeader = useCallback(() => setHeaderFocused(true), []); + const blurHeader = useCallback(() => setHeaderFocused(false), []); // Count of mounted children using useTabHeaderFocus(). Down-arrow blur and // the ↓ hint only engage when at least one child has opted in — otherwise // pressing down on a legacy tab would strand the user with nav disabled. - const [optInCount, setOptInCount] = useState(0) + const [optInCount, setOptInCount] = useState(0); const registerOptIn = useCallback(() => { - setOptInCount(n => n + 1) - return () => setOptInCount(n => n - 1) - }, []) - const optedIn = optInCount > 0 + setOptInCount(n => n + 1); + return () => setOptInCount(n => n - 1); + }, []); + const optedIn = optInCount > 0; const handleTabChange = (offset: number) => { - const newIndex = (selectedTabIndex + tabs.length + offset) % tabs.length - const newTabId = tabs[newIndex]?.[0] + const newIndex = (selectedTabIndex + tabs.length + offset) % tabs.length; + const newTabId = tabs[newIndex]?.[0]; if (isControlled && onTabChange && newTabId) { - onTabChange(newTabId) + onTabChange(newTabId); } else { - setInternalSelectedTab(newIndex) + setInternalSelectedTab(newIndex); } // Tab switching is a header action — stay focused so the user can keep // cycling. The newly mounted tab can blur via its own interaction. - setHeaderFocused(true) - } + setHeaderFocused(true); + }; useKeybindings( { @@ -158,54 +136,49 @@ export function Tabs({ context: 'Tabs', isActive: !hidden && !disableNavigation && headerFocused, }, - ) + ); // When the header is focused, down-arrow returns focus to content. Only // active when the selected tab has opted in via useTabHeaderFocus() — // legacy tabs have nowhere to return focus to. const handleKeyDown = (e: KeyboardEvent) => { - if (!headerFocused || !optedIn || hidden) return + if (!headerFocused || !optedIn || hidden) return; if (e.key === 'down') { - e.preventDefault() - setHeaderFocused(false) + e.preventDefault(); + setHeaderFocused(false); } - } + }; // Opt-in: same tabs:next/previous actions, active from content. Focuses // the header so subsequent presses cycle via the handler above. useKeybindings( { 'tabs:next': () => { - handleTabChange(1) - setHeaderFocused(true) + handleTabChange(1); + setHeaderFocused(true); }, 'tabs:previous': () => { - handleTabChange(-1) - setHeaderFocused(true) + handleTabChange(-1); + setHeaderFocused(true); }, }, { context: 'Tabs', - isActive: - navFromContent && - !headerFocused && - optedIn && - !hidden && - !disableNavigation, + isActive: navFromContent && !headerFocused && optedIn && !hidden && !disableNavigation, }, - ) + ); // Calculate spacing to fill the available width. No keyboard hint in the // header row — content footers own hints (see useTabHeaderFocus docs). - const titleWidth = title ? stringWidth(title) + 1 : 0 // +1 for gap + const titleWidth = title ? stringWidth(title) + 1 : 0; // +1 for gap const tabsWidth = tabs.reduce( (sum, [, tabTitle]) => sum + (tabTitle ? stringWidth(tabTitle) : 0) + 2 + 1, // +2 for padding, +1 for gap 0, - ) - const usedWidth = titleWidth + tabsWidth - const spacerWidth = useFullWidth ? Math.max(0, terminalWidth - usedWidth) : 0 + ); + const usedWidth = titleWidth + tabsWidth; + const spacerWidth = useFullWidth ? Math.max(0, terminalWidth - usedWidth) : 0; - const contentWidth = useFullWidth ? terminalWidth : undefined + const contentWidth = useFullWidth ? terminalWidth : undefined; return ( {!hidden && ( - + {title !== undefined && ( {title} )} {tabs.map(([id, title], i) => { - const isCurrent = selectedTabIndex === i - const hasColorCursor = color && isCurrent && headerFocused + const isCurrent = selectedTabIndex === i; + const hasColorCursor = color && isCurrent && headerFocused; return ( - ) + ); })} {spacerWidth > 0 && {' '.repeat(spacerWidth)}} @@ -267,12 +236,7 @@ export function Tabs({ // ModalContext. Keyed by selectedTabIndex → remounts on tab // switch, resetting scrollTop to 0 without scrollTo() timing games. - + {children} @@ -288,32 +252,32 @@ export function Tabs({ )} - ) + ); } type TabProps = { - title: string - id?: string - children: React.ReactNode -} + title: string; + id?: string; + children: React.ReactNode; +}; export function Tab({ title, id, children }: TabProps): React.ReactNode { - const { selectedTab, width } = useContext(TabsContext) - const insideModal = useIsInsideModal() + const { selectedTab, width } = useContext(TabsContext); + const insideModal = useIsInsideModal(); if (selectedTab !== (id ?? title)) { - return null + return null; } return ( {children} - ) + ); } export function useTabsWidth(): number | undefined { - const { width } = useContext(TabsContext) - return width + const { width } = useContext(TabsContext); + return width; } /** @@ -328,12 +292,11 @@ export function useTabsWidth(): number | undefined { * when the Select renders. */ export function useTabHeaderFocus(): { - headerFocused: boolean - focusHeader: () => void - blurHeader: () => void + headerFocused: boolean; + focusHeader: () => void; + blurHeader: () => void; } { - const { headerFocused, focusHeader, blurHeader, registerOptIn } = - useContext(TabsContext) - useEffect(registerOptIn, [registerOptIn]) - return { headerFocused, focusHeader, blurHeader } + const { headerFocused, focusHeader, blurHeader, registerOptIn } = useContext(TabsContext); + useEffect(registerOptIn, [registerOptIn]); + return { headerFocused, focusHeader, blurHeader }; } diff --git a/packages/@ant/ink/src/theme/ThemeProvider.tsx b/packages/@ant/ink/src/theme/ThemeProvider.tsx index bfd6a4307..1d56dc284 100644 --- a/packages/@ant/ink/src/theme/ThemeProvider.tsx +++ b/packages/@ant/ink/src/theme/ThemeProvider.tsx @@ -1,44 +1,38 @@ -import { feature } from 'bun:bundle' -import React, { - createContext, - useContext, - useEffect, - useMemo, - useState, -} from 'react' -import useStdin from '../hooks/use-stdin.js' -import { getSystemThemeName, type SystemTheme } from './systemTheme.js' -import type { ThemeName, ThemeSetting } from './theme-types.js' +import { feature } from 'bun:bundle'; +import React, { createContext, useContext, useEffect, useMemo, useState } from 'react'; +import useStdin from '../hooks/use-stdin.js'; +import { getSystemThemeName, type SystemTheme } from './systemTheme.js'; +import type { ThemeName, ThemeSetting } from './theme-types.js'; // -- Config persistence injection -- // Business layer provides these via setThemeConfigCallbacks(). // Defaults read/write from a simple module-level store. -let _loadTheme: () => ThemeSetting = () => 'dark' -let _saveTheme: (setting: ThemeSetting) => void = () => {} +let _loadTheme: () => ThemeSetting = () => 'dark'; +let _saveTheme: (setting: ThemeSetting) => void = () => {}; /** Inject config persistence from the business layer. Call once at startup. */ export function setThemeConfigCallbacks(opts: { - loadTheme: () => ThemeSetting - saveTheme: (setting: ThemeSetting) => void + loadTheme: () => ThemeSetting; + saveTheme: (setting: ThemeSetting) => void; }): void { - _loadTheme = opts.loadTheme - _saveTheme = opts.saveTheme + _loadTheme = opts.loadTheme; + _saveTheme = opts.saveTheme; } type ThemeContextValue = { /** The saved user preference. May be 'auto'. */ - themeSetting: ThemeSetting - setThemeSetting: (setting: ThemeSetting) => void - setPreviewTheme: (setting: ThemeSetting) => void - savePreview: () => void - cancelPreview: () => void + themeSetting: ThemeSetting; + setThemeSetting: (setting: ThemeSetting) => void; + setPreviewTheme: (setting: ThemeSetting) => void; + savePreview: () => void; + cancelPreview: () => void; /** The resolved theme to render with. Never 'auto'. */ - currentTheme: ThemeName -} + currentTheme: ThemeName; +}; // Non-'auto' default so useTheme() works without a provider (tests, tooling). -const DEFAULT_THEME: ThemeName = 'dark' +const DEFAULT_THEME: ThemeName = 'dark'; const ThemeContext = createContext({ themeSetting: DEFAULT_THEME, @@ -47,105 +41,96 @@ const ThemeContext = createContext({ savePreview: () => {}, cancelPreview: () => {}, currentTheme: DEFAULT_THEME, -}) +}); type Props = { - children: React.ReactNode - initialState?: ThemeSetting - onThemeSave?: (setting: ThemeSetting) => void -} + children: React.ReactNode; + initialState?: ThemeSetting; + onThemeSave?: (setting: ThemeSetting) => void; +}; function defaultInitialTheme(): ThemeSetting { - return _loadTheme() + return _loadTheme(); } function defaultSaveTheme(setting: ThemeSetting): void { - _saveTheme(setting) + _saveTheme(setting); } -export function ThemeProvider({ - children, - initialState, - onThemeSave = defaultSaveTheme, -}: Props) { - const [themeSetting, setThemeSetting] = useState( - initialState ?? defaultInitialTheme, - ) - const [previewTheme, setPreviewTheme] = useState(null) +export function ThemeProvider({ children, initialState, onThemeSave = defaultSaveTheme }: Props) { + const [themeSetting, setThemeSetting] = useState(initialState ?? defaultInitialTheme); + const [previewTheme, setPreviewTheme] = useState(null); // Track terminal theme for 'auto' resolution. Seeds from $COLORFGBG (or // 'dark' if unset); the OSC 11 watcher corrects it on first poll. const [systemTheme, setSystemTheme] = useState(() => (initialState ?? themeSetting) === 'auto' ? getSystemThemeName() : 'dark', - ) + ); // The setting currently in effect (preview wins while picker is open) - const activeSetting = previewTheme ?? themeSetting + const activeSetting = previewTheme ?? themeSetting; - const { internal_querier } = useStdin() + const { internal_querier } = useStdin(); // Watch for live terminal theme changes while 'auto' is active. // Positive feature() pattern so the watcher import is dead-code-eliminated // in external builds. useEffect(() => { if (feature('AUTO_THEME')) { - if (activeSetting !== 'auto' || !internal_querier) return - let cleanup: (() => void) | undefined - let cancelled = false - void import('../../utils/systemThemeWatcher.js').then( - ({ watchSystemTheme }) => { - if (cancelled) return - cleanup = watchSystemTheme(internal_querier, setSystemTheme) - }, - ) + if (activeSetting !== 'auto' || !internal_querier) return; + let cleanup: (() => void) | undefined; + let cancelled = false; + void import('../../utils/systemThemeWatcher.js').then(({ watchSystemTheme }) => { + if (cancelled) return; + cleanup = watchSystemTheme(internal_querier, setSystemTheme); + }); return () => { - cancelled = true - cleanup?.() - } + cancelled = true; + cleanup?.(); + }; } - }, [activeSetting, internal_querier]) + }, [activeSetting, internal_querier]); - const currentTheme: ThemeName = - activeSetting === 'auto' ? systemTheme : activeSetting + const currentTheme: ThemeName = activeSetting === 'auto' ? systemTheme : activeSetting; const value = useMemo( () => ({ themeSetting, setThemeSetting: (newSetting: ThemeSetting) => { - setThemeSetting(newSetting) - setPreviewTheme(null) + setThemeSetting(newSetting); + setPreviewTheme(null); // Switching to 'auto' restarts the watcher (activeSetting dep), whose // first poll fires immediately. Seed from the cache so the OSC // round-trip doesn't flash the wrong palette. if (newSetting === 'auto') { - setSystemTheme(getSystemThemeName()) + setSystemTheme(getSystemThemeName()); } - onThemeSave?.(newSetting) + onThemeSave?.(newSetting); }, setPreviewTheme: (newSetting: ThemeSetting) => { - setPreviewTheme(newSetting) + setPreviewTheme(newSetting); if (newSetting === 'auto') { - setSystemTheme(getSystemThemeName()) + setSystemTheme(getSystemThemeName()); } }, savePreview: () => { if (previewTheme !== null) { - setThemeSetting(previewTheme) - setPreviewTheme(null) - onThemeSave?.(previewTheme) + setThemeSetting(previewTheme); + setPreviewTheme(null); + onThemeSave?.(previewTheme); } }, cancelPreview: () => { if (previewTheme !== null) { - setPreviewTheme(null) + setPreviewTheme(null); } }, currentTheme, }), [themeSetting, previewTheme, currentTheme, onThemeSave], - ) + ); - return {children} + return {children}; } /** @@ -153,8 +138,8 @@ export function ThemeProvider({ * accepts any ThemeSetting (including 'auto'). */ export function useTheme(): [ThemeName, (setting: ThemeSetting) => void] { - const { currentTheme, setThemeSetting } = useContext(ThemeContext) - return [currentTheme, setThemeSetting] + const { currentTheme, setThemeSetting } = useContext(ThemeContext); + return [currentTheme, setThemeSetting]; } /** @@ -162,11 +147,10 @@ export function useTheme(): [ThemeName, (setting: ThemeSetting) => void] { * needs to show 'auto' as a distinct choice (e.g., ThemePicker). */ export function useThemeSetting(): ThemeSetting { - return useContext(ThemeContext).themeSetting + return useContext(ThemeContext).themeSetting; } export function usePreviewTheme() { - const { setPreviewTheme, savePreview, cancelPreview } = - useContext(ThemeContext) - return { setPreviewTheme, savePreview, cancelPreview } + const { setPreviewTheme, savePreview, cancelPreview } = useContext(ThemeContext); + return { setPreviewTheme, savePreview, cancelPreview }; } diff --git a/packages/@ant/ink/src/theme/ThemedBox.tsx b/packages/@ant/ink/src/theme/ThemedBox.tsx index 46aadeab9..f7220020f 100644 --- a/packages/@ant/ink/src/theme/ThemedBox.tsx +++ b/packages/@ant/ink/src/theme/ThemedBox.tsx @@ -1,22 +1,22 @@ -import React, { type PropsWithChildren, type Ref } from 'react' -import Box from '../components/Box.js' -import type { DOMElement } from '../core/dom.js' -import type { ClickEvent } from '../core/events/click-event.js' -import type { FocusEvent } from '../core/events/focus-event.js' -import type { KeyboardEvent } from '../core/events/keyboard-event.js' -import type { Color, Styles } from '../core/styles.js' -import { getTheme, type Theme } from './theme-types.js' -import { useTheme } from './ThemeProvider.js' +import React, { type PropsWithChildren, type Ref } from 'react'; +import Box from '../components/Box.js'; +import type { DOMElement } from '../core/dom.js'; +import type { ClickEvent } from '../core/events/click-event.js'; +import type { FocusEvent } from '../core/events/focus-event.js'; +import type { KeyboardEvent } from '../core/events/keyboard-event.js'; +import type { Color, Styles } from '../core/styles.js'; +import { getTheme, type Theme } from './theme-types.js'; +import { useTheme } from './ThemeProvider.js'; // Color props that accept theme keys type ThemedColorProps = { - readonly borderColor?: keyof Theme | Color - readonly borderTopColor?: keyof Theme | Color - readonly borderBottomColor?: keyof Theme | Color - readonly borderLeftColor?: keyof Theme | Color - readonly borderRightColor?: keyof Theme | Color - readonly backgroundColor?: keyof Theme | Color -} + readonly borderColor?: keyof Theme | Color; + readonly borderTopColor?: keyof Theme | Color; + readonly borderBottomColor?: keyof Theme | Color; + readonly borderLeftColor?: keyof Theme | Color; + readonly borderRightColor?: keyof Theme | Color; + readonly backgroundColor?: keyof Theme | Color; +}; // Base Styles without color props (they'll be overridden) type BaseStylesWithoutColors = Omit< @@ -28,43 +28,35 @@ type BaseStylesWithoutColors = Omit< | 'borderLeftColor' | 'borderRightColor' | 'backgroundColor' -> +>; export type Props = BaseStylesWithoutColors & ThemedColorProps & { - ref?: Ref - tabIndex?: number - autoFocus?: boolean - onClick?: (event: ClickEvent) => void - onFocus?: (event: FocusEvent) => void - onFocusCapture?: (event: FocusEvent) => void - onBlur?: (event: FocusEvent) => void - onBlurCapture?: (event: FocusEvent) => void - onKeyDown?: (event: KeyboardEvent) => void - onKeyDownCapture?: (event: KeyboardEvent) => void - onMouseEnter?: () => void - onMouseLeave?: () => void - } + ref?: Ref; + tabIndex?: number; + autoFocus?: boolean; + onClick?: (event: ClickEvent) => void; + onFocus?: (event: FocusEvent) => void; + onFocusCapture?: (event: FocusEvent) => void; + onBlur?: (event: FocusEvent) => void; + onBlurCapture?: (event: FocusEvent) => void; + onKeyDown?: (event: KeyboardEvent) => void; + onKeyDownCapture?: (event: KeyboardEvent) => void; + onMouseEnter?: () => void; + onMouseLeave?: () => void; + }; /** * Resolves a color value that may be a theme key to a raw Color. */ -function resolveColor( - color: keyof Theme | Color | undefined, - theme: Theme, -): Color | undefined { - if (!color) return undefined +function resolveColor(color: keyof Theme | Color | undefined, theme: Theme): Color | undefined { + if (!color) return undefined; // Check if it's a raw color (starts with rgb(, #, ansi256(, or ansi:) - if ( - color.startsWith('rgb(') || - color.startsWith('#') || - color.startsWith('ansi256(') || - color.startsWith('ansi:') - ) { - return color as Color + if (color.startsWith('rgb(') || color.startsWith('#') || color.startsWith('ansi256(') || color.startsWith('ansi:')) { + return color as Color; } // It's a theme key - resolve it - return theme[color as keyof Theme] as Color + return theme[color as keyof Theme] as Color; } /** @@ -82,16 +74,16 @@ function ThemedBox({ ref, ...rest }: PropsWithChildren): React.ReactNode { - const [themeName] = useTheme() - const theme = getTheme(themeName) + const [themeName] = useTheme(); + const theme = getTheme(themeName); // Resolve theme keys to raw colors - const resolvedBorderColor = resolveColor(borderColor, theme) - const resolvedBorderTopColor = resolveColor(borderTopColor, theme) - const resolvedBorderBottomColor = resolveColor(borderBottomColor, theme) - const resolvedBorderLeftColor = resolveColor(borderLeftColor, theme) - const resolvedBorderRightColor = resolveColor(borderRightColor, theme) - const resolvedBackgroundColor = resolveColor(backgroundColor, theme) + const resolvedBorderColor = resolveColor(borderColor, theme); + const resolvedBorderTopColor = resolveColor(borderTopColor, theme); + const resolvedBorderBottomColor = resolveColor(borderBottomColor, theme); + const resolvedBorderLeftColor = resolveColor(borderLeftColor, theme); + const resolvedBorderRightColor = resolveColor(borderRightColor, theme); + const resolvedBackgroundColor = resolveColor(backgroundColor, theme); return ( {children} - ) + ); } -export default ThemedBox +export default ThemedBox; diff --git a/packages/@ant/ink/src/theme/ThemedText.tsx b/packages/@ant/ink/src/theme/ThemedText.tsx index 35e884ca9..68ff863ae 100644 --- a/packages/@ant/ink/src/theme/ThemedText.tsx +++ b/packages/@ant/ink/src/theme/ThemedText.tsx @@ -1,87 +1,77 @@ -import type { ReactNode } from 'react' -import React, { useContext } from 'react' -import Text from '../components/Text.js' -import type { Color, Styles } from '../core/styles.js' -import { getTheme, type Theme } from './theme-types.js' -import { useTheme } from './ThemeProvider.js' +import type { ReactNode } from 'react'; +import React, { useContext } from 'react'; +import Text from '../components/Text.js'; +import type { Color, Styles } from '../core/styles.js'; +import { getTheme, type Theme } from './theme-types.js'; +import { useTheme } from './ThemeProvider.js'; /** Colors uncolored ThemedText in the subtree. Precedence: explicit `color` > * this > dimColor. Crosses Box boundaries (Ink's style cascade doesn't). */ -export const TextHoverColorContext = React.createContext< - keyof Theme | undefined ->(undefined) +export const TextHoverColorContext = React.createContext(undefined); export type Props = { /** * Change text color. Accepts a theme key or raw color value. */ - readonly color?: keyof Theme | Color + readonly color?: keyof Theme | Color; /** * Same as `color`, but for background. Must be a theme key. */ - readonly backgroundColor?: keyof Theme + readonly backgroundColor?: keyof Theme; /** * Dim the color using the theme's inactive color. * This is compatible with bold (unlike ANSI dim). */ - readonly dimColor?: boolean + readonly dimColor?: boolean; /** * Make the text bold. */ - readonly bold?: boolean + readonly bold?: boolean; /** * Make the text italic. */ - readonly italic?: boolean + readonly italic?: boolean; /** * Make the text underlined. */ - readonly underline?: boolean + readonly underline?: boolean; /** * Make the text crossed with a line. */ - readonly strikethrough?: boolean + readonly strikethrough?: boolean; /** * Inverse background and foreground colors. */ - readonly inverse?: boolean + readonly inverse?: boolean; /** * This property tells Ink to wrap or truncate text if its width is larger than container. * If `wrap` is passed (by default), Ink will wrap text and split it into multiple lines. * If `truncate-*` is passed, Ink will truncate text instead, which will result in one line of text with the rest cut off. */ - readonly wrap?: Styles['textWrap'] + readonly wrap?: Styles['textWrap']; - readonly children?: ReactNode -} + readonly children?: ReactNode; +}; /** * Resolves a color value that may be a theme key to a raw Color. */ -function resolveColor( - color: keyof Theme | Color | undefined, - theme: Theme, -): Color | undefined { - if (!color) return undefined +function resolveColor(color: keyof Theme | Color | undefined, theme: Theme): Color | undefined { + if (!color) return undefined; // Check if it's a raw color (starts with rgb(, #, ansi256(, or ansi:) - if ( - color.startsWith('rgb(') || - color.startsWith('#') || - color.startsWith('ansi256(') || - color.startsWith('ansi:') - ) { - return color as Color + if (color.startsWith('rgb(') || color.startsWith('#') || color.startsWith('ansi256(') || color.startsWith('ansi:')) { + return color as Color; } // It's a theme key - resolve it - return theme[color as keyof Theme] as Color + return theme[color as keyof Theme] as Color; } /** @@ -100,9 +90,9 @@ export default function ThemedText({ wrap = 'wrap', children, }: Props): React.ReactNode { - const [themeName] = useTheme() - const theme = getTheme(themeName) - const hoverColor = useContext(TextHoverColorContext) + const [themeName] = useTheme(); + const theme = getTheme(themeName); + const hoverColor = useContext(TextHoverColorContext); // Resolve theme keys to raw colors const resolvedColor = @@ -110,10 +100,8 @@ export default function ThemedText({ ? resolveColor(hoverColor, theme) : dimColor ? (theme.inactive as Color) - : resolveColor(color, theme) - const resolvedBackgroundColor = backgroundColor - ? (theme[backgroundColor] as Color) - : undefined + : resolveColor(color, theme); + const resolvedBackgroundColor = backgroundColor ? (theme[backgroundColor] as Color) : undefined; return ( {children} - ) + ); } diff --git a/packages/@ant/ink/src/types/ink-elements.d.ts b/packages/@ant/ink/src/types/ink-elements.d.ts index e439ae56b..b0943fa79 100644 --- a/packages/@ant/ink/src/types/ink-elements.d.ts +++ b/packages/@ant/ink/src/types/ink-elements.d.ts @@ -1,49 +1,49 @@ // Type declarations for custom Ink JSX elements // Note: The detailed prop types are defined in ink-jsx.d.ts via React module augmentation. // This file provides the global JSX namespace fallback declarations. -import type { ReactNode, Ref } from 'react'; -import type { ClickEvent } from '../core/events/click-event.js'; -import type { FocusEvent } from '../core/events/focus-event.js'; -import type { KeyboardEvent } from '../core/events/keyboard-event.js'; -import type { Styles, TextStyles } from '../core/styles.js'; -import type { DOMElement } from '../core/dom.js'; +import type { ReactNode, Ref } from 'react' +import type { ClickEvent } from '../core/events/click-event.js' +import type { FocusEvent } from '../core/events/focus-event.js' +import type { KeyboardEvent } from '../core/events/keyboard-event.js' +import type { Styles, TextStyles } from '../core/styles.js' +import type { DOMElement } from '../core/dom.js' declare global { namespace JSX { interface IntrinsicElements { 'ink-box': { - ref?: Ref; - tabIndex?: number; - autoFocus?: boolean; - onClick?: (event: ClickEvent) => void; - onFocus?: (event: FocusEvent) => void; - onFocusCapture?: (event: FocusEvent) => void; - onBlur?: (event: FocusEvent) => void; - onBlurCapture?: (event: FocusEvent) => void; - onMouseEnter?: () => void; - onMouseLeave?: () => void; - onKeyDown?: (event: KeyboardEvent) => void; - onKeyDownCapture?: (event: KeyboardEvent) => void; - style?: Styles; - stickyScroll?: boolean; - children?: ReactNode; - }; + ref?: Ref + tabIndex?: number + autoFocus?: boolean + onClick?: (event: ClickEvent) => void + onFocus?: (event: FocusEvent) => void + onFocusCapture?: (event: FocusEvent) => void + onBlur?: (event: FocusEvent) => void + onBlurCapture?: (event: FocusEvent) => void + onMouseEnter?: () => void + onMouseLeave?: () => void + onKeyDown?: (event: KeyboardEvent) => void + onKeyDownCapture?: (event: KeyboardEvent) => void + style?: Styles + stickyScroll?: boolean + children?: ReactNode + } 'ink-text': { - style?: Styles; - textStyles?: TextStyles; - children?: ReactNode; - }; + style?: Styles + textStyles?: TextStyles + children?: ReactNode + } 'ink-link': { - href?: string; - children?: ReactNode; - }; + href?: string + children?: ReactNode + } 'ink-raw-ansi': { - rawText?: string; - rawWidth?: number; - rawHeight?: number; - }; + rawText?: string + rawWidth?: number + rawHeight?: number + } } } } -export {}; +export {} diff --git a/packages/@ant/ink/src/types/ink-jsx.d.ts b/packages/@ant/ink/src/types/ink-jsx.d.ts index 456be9399..11800b03e 100644 --- a/packages/@ant/ink/src/types/ink-jsx.d.ts +++ b/packages/@ant/ink/src/types/ink-jsx.d.ts @@ -8,47 +8,47 @@ * This file must be a module (have an import/export) for `declare module` * augmentation to work correctly. */ -import type { ReactNode, Ref } from 'react'; -import type { ClickEvent } from '../core/events/click-event.js'; -import type { FocusEvent } from '../core/events/focus-event.js'; -import type { KeyboardEvent } from '../core/events/keyboard-event.js'; -import type { Styles, TextStyles } from '../core/styles.js'; -import type { DOMElement } from '../core/dom.js'; +import type { ReactNode, Ref } from 'react' +import type { ClickEvent } from '../core/events/click-event.js' +import type { FocusEvent } from '../core/events/focus-event.js' +import type { KeyboardEvent } from '../core/events/keyboard-event.js' +import type { Styles, TextStyles } from '../core/styles.js' +import type { DOMElement } from '../core/dom.js' declare module 'react' { namespace JSX { interface IntrinsicElements { 'ink-box': { - ref?: Ref; - tabIndex?: number; - autoFocus?: boolean; - onClick?: (event: ClickEvent) => void; - onFocus?: (event: FocusEvent) => void; - onFocusCapture?: (event: FocusEvent) => void; - onBlur?: (event: FocusEvent) => void; - onBlurCapture?: (event: FocusEvent) => void; - onMouseEnter?: () => void; - onMouseLeave?: () => void; - onKeyDown?: (event: KeyboardEvent) => void; - onKeyDownCapture?: (event: KeyboardEvent) => void; - style?: Styles; - stickyScroll?: boolean; - children?: ReactNode; - }; + ref?: Ref + tabIndex?: number + autoFocus?: boolean + onClick?: (event: ClickEvent) => void + onFocus?: (event: FocusEvent) => void + onFocusCapture?: (event: FocusEvent) => void + onBlur?: (event: FocusEvent) => void + onBlurCapture?: (event: FocusEvent) => void + onMouseEnter?: () => void + onMouseLeave?: () => void + onKeyDown?: (event: KeyboardEvent) => void + onKeyDownCapture?: (event: KeyboardEvent) => void + style?: Styles + stickyScroll?: boolean + children?: ReactNode + } 'ink-text': { - style?: Styles; - textStyles?: TextStyles; - children?: ReactNode; - }; + style?: Styles + textStyles?: TextStyles + children?: ReactNode + } 'ink-link': { - href?: string; - children?: ReactNode; - }; + href?: string + children?: ReactNode + } 'ink-raw-ansi': { - rawText?: string; - rawWidth?: number; - rawHeight?: number; - }; + rawText?: string + rawWidth?: number + rawHeight?: number + } } } } diff --git a/packages/@ant/model-provider/src/index.ts b/packages/@ant/model-provider/src/index.ts index a4acf428c..6f2b1a56c 100644 --- a/packages/@ant/model-provider/src/index.ts +++ b/packages/@ant/model-provider/src/index.ts @@ -29,7 +29,10 @@ export { resolveGeminiModel } from './providers/gemini/modelMapping.js' // Gemini provider utilities export { anthropicMessagesToGemini } from './providers/gemini/convertMessages.js' -export { anthropicToolsToGemini, anthropicToolChoiceToGemini } from './providers/gemini/convertTools.js' +export { + anthropicToolsToGemini, + anthropicToolChoiceToGemini, +} from './providers/gemini/convertTools.js' export { adaptGeminiStreamToAnthropic } from './providers/gemini/streamAdapter.js' export { GEMINI_THOUGHT_SIGNATURE_FIELD, @@ -59,5 +62,8 @@ export { // Shared OpenAI conversion utilities export { anthropicMessagesToOpenAI } from './shared/openaiConvertMessages.js' export type { ConvertMessagesOptions } from './shared/openaiConvertMessages.js' -export { anthropicToolsToOpenAI, anthropicToolChoiceToOpenAI } from './shared/openaiConvertTools.js' +export { + anthropicToolsToOpenAI, + anthropicToolChoiceToOpenAI, +} from './shared/openaiConvertTools.js' export { adaptOpenAIStreamToAnthropic } from './shared/openaiStreamAdapter.js' diff --git a/packages/@ant/model-provider/src/providers/gemini/__tests__/convertMessages.test.ts b/packages/@ant/model-provider/src/providers/gemini/__tests__/convertMessages.test.ts index ea86c841f..39ba31cb6 100644 --- a/packages/@ant/model-provider/src/providers/gemini/__tests__/convertMessages.test.ts +++ b/packages/@ant/model-provider/src/providers/gemini/__tests__/convertMessages.test.ts @@ -1,8 +1,5 @@ import { describe, expect, test } from 'bun:test' -import type { - AssistantMessage, - UserMessage, -} from '../../../types/message.js' +import type { AssistantMessage, UserMessage } from '../../../types/message.js' import { anthropicMessagesToGemini } from '../convertMessages.js' function makeUserMsg(content: string | any[]): UserMessage { @@ -23,10 +20,9 @@ function makeAssistantMsg(content: string | any[]): AssistantMessage { describe('anthropicMessagesToGemini', () => { test('converts system prompt to systemInstruction', () => { - const result = anthropicMessagesToGemini( - [makeUserMsg('hello')], - ['You are helpful.'] as any, - ) + const result = anthropicMessagesToGemini([makeUserMsg('hello')], [ + 'You are helpful.', + ] as any) expect(result.systemInstruction).toEqual({ parts: [{ text: 'You are helpful.' }], @@ -202,17 +198,19 @@ describe('anthropicMessagesToGemini', () => { test('converts base64 image to inlineData', () => { const result = anthropicMessagesToGemini( - [makeUserMsg([ - { type: 'text', text: 'describe this' }, - { - type: 'image', - source: { - type: 'base64', - media_type: 'image/png', - data: 'iVBORw0KGgo=', + [ + makeUserMsg([ + { type: 'text', text: 'describe this' }, + { + type: 'image', + source: { + type: 'base64', + media_type: 'image/png', + data: 'iVBORw0KGgo=', + }, }, - }, - ])], + ]), + ], [] as any, ) expect(result.contents).toEqual([ @@ -228,15 +226,17 @@ describe('anthropicMessagesToGemini', () => { test('converts url image to text fallback', () => { const result = anthropicMessagesToGemini( - [makeUserMsg([ - { - type: 'image', - source: { - type: 'url', - url: 'https://example.com/img.png', + [ + makeUserMsg([ + { + type: 'image', + source: { + type: 'url', + url: 'https://example.com/img.png', + }, }, - }, - ])], + ]), + ], [] as any, ) expect(result.contents).toEqual([ @@ -249,15 +249,17 @@ describe('anthropicMessagesToGemini', () => { test('defaults to image/png when media_type is missing', () => { const result = anthropicMessagesToGemini( - [makeUserMsg([ - { - type: 'image', - source: { - type: 'base64', - data: 'ABC123', + [ + makeUserMsg([ + { + type: 'image', + source: { + type: 'base64', + data: 'ABC123', + }, }, - }, - ])], + ]), + ], [] as any, ) expect(result.contents[0].parts[0]).toEqual({ diff --git a/packages/@ant/model-provider/src/providers/gemini/__tests__/convertTools.test.ts b/packages/@ant/model-provider/src/providers/gemini/__tests__/convertTools.test.ts index 999f362cd..8aae1c20f 100644 --- a/packages/@ant/model-provider/src/providers/gemini/__tests__/convertTools.test.ts +++ b/packages/@ant/model-provider/src/providers/gemini/__tests__/convertTools.test.ts @@ -120,11 +120,11 @@ describe('anthropicToolChoiceToGemini', () => { }) test('maps explicit tool choice', () => { - expect( - anthropicToolChoiceToGemini({ type: 'tool', name: 'bash' }), - ).toEqual({ - mode: 'ANY', - allowedFunctionNames: ['bash'], - }) + expect(anthropicToolChoiceToGemini({ type: 'tool', name: 'bash' })).toEqual( + { + mode: 'ANY', + allowedFunctionNames: ['bash'], + }, + ) }) }) diff --git a/packages/@ant/model-provider/src/providers/gemini/__tests__/streamAdapter.test.ts b/packages/@ant/model-provider/src/providers/gemini/__tests__/streamAdapter.test.ts index d7b42229f..2ac8836d8 100644 --- a/packages/@ant/model-provider/src/providers/gemini/__tests__/streamAdapter.test.ts +++ b/packages/@ant/model-provider/src/providers/gemini/__tests__/streamAdapter.test.ts @@ -57,7 +57,8 @@ describe('adaptGeminiStreamToAnthropic', () => { const textDeltas = events.filter( event => - event.type === 'content_block_delta' && event.delta.type === 'text_delta', + event.type === 'content_block_delta' && + event.delta.type === 'text_delta', ) expect(events[0].type).toBe('message_start') @@ -92,7 +93,9 @@ describe('adaptGeminiStreamToAnthropic', () => { }, ]) - const blockStart = events.find(event => event.type === 'content_block_start') + const blockStart = events.find( + event => event.type === 'content_block_start', + ) expect(blockStart.content_block.type).toBe('thinking') const signatureDelta = events.find( @@ -125,7 +128,9 @@ describe('adaptGeminiStreamToAnthropic', () => { }, ]) - const blockStart = events.find(event => event.type === 'content_block_start') + const blockStart = events.find( + event => event.type === 'content_block_start', + ) expect(blockStart.content_block.type).toBe('tool_use') expect(blockStart.content_block.name).toBe('bash') diff --git a/packages/@ant/model-provider/src/providers/gemini/convertMessages.ts b/packages/@ant/model-provider/src/providers/gemini/convertMessages.ts index 4b7acdb62..f648a8760 100644 --- a/packages/@ant/model-provider/src/providers/gemini/convertMessages.ts +++ b/packages/@ant/model-provider/src/providers/gemini/convertMessages.ts @@ -93,7 +93,10 @@ function convertInternalUserMessage( return { role: 'user', parts: content.flatMap(block => - convertUserContentBlockToGeminiParts(block as unknown as string | Record, toolNamesById), + convertUserContentBlockToGeminiParts( + block as unknown as string | Record, + toolNamesById, + ), ), } } @@ -115,7 +118,8 @@ function convertUserContentBlockToGeminiParts( return [ { functionResponse: { - name: toolNamesById.get(toolResult.tool_use_id) ?? toolResult.tool_use_id, + name: + toolNamesById.get(toolResult.tool_use_id) ?? toolResult.tool_use_id, response: toolResultToResponseObject(toolResult), }, }, @@ -170,7 +174,9 @@ function convertInternalAssistantMessage(msg: AssistantMessage): GeminiContent { parts.push( ...createTextGeminiParts( block.text, - getGeminiThoughtSignature(block as unknown as Record), + getGeminiThoughtSignature( + block as unknown as Record, + ), ), ) continue @@ -194,8 +200,12 @@ function convertInternalAssistantMessage(msg: AssistantMessage): GeminiContent { name: toolUse.name, args: normalizeToolUseInput(toolUse.input), }, - ...(getGeminiThoughtSignature(block as unknown as Record) && { - thoughtSignature: getGeminiThoughtSignature(block as unknown as Record), + ...(getGeminiThoughtSignature( + block as unknown as Record, + ) && { + thoughtSignature: getGeminiThoughtSignature( + block as unknown as Record, + ), }), }) } @@ -255,12 +265,10 @@ function toolResultToResponseObject( block: BetaToolResultBlockParam, ): Record { const result = normalizeToolResultContent(block.content) - if ( - result && - typeof result === 'object' && - !Array.isArray(result) - ) { - return block.is_error ? { ...(result as Record), is_error: true } : result as Record + if (result && typeof result === 'object' && !Array.isArray(result)) { + return block.is_error + ? { ...(result as Record), is_error: true } + : (result as Record) } return { @@ -299,7 +307,9 @@ function normalizeToolResultContent(content: unknown): unknown { return content ?? '' } -function getGeminiThoughtSignature(block: Record): string | undefined { +function getGeminiThoughtSignature( + block: Record, +): string | undefined { const signature = block[GEMINI_THOUGHT_SIGNATURE_FIELD] return typeof signature === 'string' && signature.length > 0 ? signature diff --git a/packages/@ant/model-provider/src/providers/gemini/convertTools.ts b/packages/@ant/model-provider/src/providers/gemini/convertTools.ts index 7f6fc82c5..0473174c6 100644 --- a/packages/@ant/model-provider/src/providers/gemini/convertTools.ts +++ b/packages/@ant/model-provider/src/providers/gemini/convertTools.ts @@ -1,8 +1,5 @@ import type { BetaToolUnion } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs' -import type { - GeminiFunctionCallingConfig, - GeminiTool, -} from './types.js' +import type { GeminiFunctionCallingConfig, GeminiTool } from './types.js' const GEMINI_JSON_SCHEMA_TYPES = new Set([ 'string', @@ -34,7 +31,9 @@ function normalizeGeminiJsonSchemaType( return undefined } -function inferGeminiJsonSchemaTypeFromValue(value: unknown): string | undefined { +function inferGeminiJsonSchemaTypeFromValue( + value: unknown, +): string | undefined { if (value === null) return 'null' if (Array.isArray(value)) return 'array' if (typeof value === 'string') return 'string' @@ -97,9 +96,7 @@ function sanitizeGeminiJsonSchemaArray( return sanitized.length > 0 ? sanitized : undefined } -function sanitizeGeminiJsonSchema( - schema: unknown, -): Record { +function sanitizeGeminiJsonSchema(schema: unknown): Record { if (!schema || typeof schema !== 'object' || Array.isArray(schema)) { return {} } @@ -236,17 +233,20 @@ export function anthropicToolsToGemini(tools: BetaToolUnion[]): GeminiTool[] { const functionDeclarations = tools .filter(tool => { const toolType = (tool as unknown as { type?: string }).type - return tool.type === 'custom' || !('type' in tool) || toolType !== 'server' + return ( + tool.type === 'custom' || !('type' in tool) || toolType !== 'server' + ) }) .map(tool => { const anyTool = tool as unknown as Record const name = (anyTool.name as string) || '' const description = (anyTool.description as string) || '' - const inputSchema = - (anyTool.input_schema as Record | undefined) ?? { - type: 'object', - properties: {}, - } + const inputSchema = (anyTool.input_schema as + | Record + | undefined) ?? { + type: 'object', + properties: {}, + } return { name, @@ -255,9 +255,7 @@ export function anthropicToolsToGemini(tools: BetaToolUnion[]): GeminiTool[] { } }) - return functionDeclarations.length > 0 - ? [{ functionDeclarations }] - : [] + return functionDeclarations.length > 0 ? [{ functionDeclarations }] : [] } export function anthropicToolChoiceToGemini( diff --git a/packages/@ant/model-provider/src/providers/gemini/streamAdapter.ts b/packages/@ant/model-provider/src/providers/gemini/streamAdapter.ts index d40980e04..9095b6da0 100644 --- a/packages/@ant/model-provider/src/providers/gemini/streamAdapter.ts +++ b/packages/@ant/model-provider/src/providers/gemini/streamAdapter.ts @@ -10,9 +10,8 @@ export async function* adaptGeminiStreamToAnthropic( let started = false let stopped = false let nextContentIndex = 0 - let openTextLikeBlock: - | { index: number; type: 'text' | 'thinking' } - | null = null + let openTextLikeBlock: { index: number; type: 'text' | 'thinking' } | null = + null let sawToolUse = false let finishReason: string | undefined let inputTokens = 0 @@ -85,7 +84,10 @@ export async function* adaptGeminiStreamToAnthropic( } as BetaRawMessageStreamEvent } - if (part.functionCall.args && Object.keys(part.functionCall.args).length > 0) { + if ( + part.functionCall.args && + Object.keys(part.functionCall.args).length > 0 + ) { yield { type: 'content_block_delta', index: toolIndex, @@ -213,9 +215,7 @@ export async function* adaptGeminiStreamToAnthropic( } } -function getTextLikeBlockType( - part: GeminiPart, -): 'text' | 'thinking' | null { +function getTextLikeBlockType(part: GeminiPart): 'text' | 'thinking' | null { if (typeof part.text !== 'string') { return null } diff --git a/packages/@ant/model-provider/src/providers/grok/__tests__/modelMapping.test.ts b/packages/@ant/model-provider/src/providers/grok/__tests__/modelMapping.test.ts index 168f236fa..84253dac4 100644 --- a/packages/@ant/model-provider/src/providers/grok/__tests__/modelMapping.test.ts +++ b/packages/@ant/model-provider/src/providers/grok/__tests__/modelMapping.test.ts @@ -33,11 +33,14 @@ describe('resolveGrokModel', () => { }) test('maps haiku models to grok-3-mini-fast', () => { - expect(resolveGrokModel('claude-haiku-4-5-20251001')).toBe('grok-3-mini-fast') + expect(resolveGrokModel('claude-haiku-4-5-20251001')).toBe( + 'grok-3-mini-fast', + ) }) test('GROK_MODEL_MAP overrides family mapping', () => { - process.env.GROK_MODEL_MAP = '{"opus":"grok-4","sonnet":"grok-3","haiku":"grok-mini"}' + process.env.GROK_MODEL_MAP = + '{"opus":"grok-4","sonnet":"grok-3","haiku":"grok-mini"}' expect(resolveGrokModel('claude-opus-4-6')).toBe('grok-4') expect(resolveGrokModel('claude-sonnet-4-6')).toBe('grok-3') expect(resolveGrokModel('claude-haiku-4-5-20251001')).toBe('grok-mini') @@ -62,6 +65,8 @@ describe('resolveGrokModel', () => { }) test('falls back to family default for unlisted model', () => { - expect(resolveGrokModel('claude-opus-99-20300101')).toBe('grok-4.20-reasoning') + expect(resolveGrokModel('claude-opus-99-20300101')).toBe( + 'grok-4.20-reasoning', + ) }) }) diff --git a/packages/@ant/model-provider/src/shared/__tests__/openaiConvertMessages.test.ts b/packages/@ant/model-provider/src/shared/__tests__/openaiConvertMessages.test.ts index 974849af9..2b1733372 100644 --- a/packages/@ant/model-provider/src/shared/__tests__/openaiConvertMessages.test.ts +++ b/packages/@ant/model-provider/src/shared/__tests__/openaiConvertMessages.test.ts @@ -131,7 +131,13 @@ describe('anthropicMessagesToOpenAI', () => { ], [] as any, ) - expect(result).toEqual([{ role: 'assistant', content: 'visible response', reasoning_content: 'internal thoughts...' }] as any) + expect(result).toEqual([ + { + role: 'assistant', + content: 'visible response', + reasoning_content: 'internal thoughts...', + }, + ] as any) }) test('handles full conversation with tools', () => { @@ -487,10 +493,19 @@ describe('DeepSeek thinking mode (enableThinking)', () => { [ makeUserMsg('run ls'), makeAssistantMsg([ - { type: 'tool_use' as const, id: 'toolu_1', name: 'bash', input: { command: 'ls' } }, + { + type: 'tool_use' as const, + id: 'toolu_1', + name: 'bash', + input: { command: 'ls' }, + }, ]), makeUserMsg([ - { type: 'tool_result' as const, tool_use_id: 'toolu_1', content: 'file.txt' }, + { + type: 'tool_result' as const, + tool_use_id: 'toolu_1', + content: 'file.txt', + }, { type: 'text' as const, text: 'looks good' }, ]), ], @@ -499,7 +514,10 @@ describe('DeepSeek thinking mode (enableThinking)', () => { // Find the tool message and the user text message const toolIdx = result.findIndex(m => m.role === 'tool') const userTextIdx = result.findIndex( - m => m.role === 'user' && typeof m.content === 'string' && m.content.includes('looks good'), + m => + m.role === 'user' && + typeof m.content === 'string' && + m.content.includes('looks good'), ) expect(toolIdx).toBeGreaterThanOrEqual(0) expect(userTextIdx).toBeGreaterThanOrEqual(0) @@ -512,15 +530,26 @@ describe('DeepSeek thinking mode (enableThinking)', () => { [ makeUserMsg('do something'), makeAssistantMsg([ - { type: 'tool_use' as const, id: 'toolu_2', name: 'bash', input: { command: 'pwd' } }, + { + type: 'tool_use' as const, + id: 'toolu_2', + name: 'bash', + input: { command: 'pwd' }, + }, ]), makeUserMsg([ - { type: 'tool_result' as const, tool_use_id: 'toolu_2', content: '/home/user' }, + { + type: 'tool_result' as const, + tool_use_id: 'toolu_2', + content: '/home/user', + }, ]), ], [] as any, ) - const assistantIdx = result.findIndex(m => m.role === 'assistant' && (m as any).tool_calls) + const assistantIdx = result.findIndex( + m => m.role === 'assistant' && (m as any).tool_calls, + ) const toolIdx = result.findIndex(m => m.role === 'tool') expect(assistantIdx).toBeGreaterThanOrEqual(0) expect(toolIdx).toBe(assistantIdx + 1) diff --git a/packages/@ant/model-provider/src/shared/__tests__/openaiConvertTools.test.ts b/packages/@ant/model-provider/src/shared/__tests__/openaiConvertTools.test.ts index dbe6455e1..5ffe69397 100644 --- a/packages/@ant/model-provider/src/shared/__tests__/openaiConvertTools.test.ts +++ b/packages/@ant/model-provider/src/shared/__tests__/openaiConvertTools.test.ts @@ -1,5 +1,8 @@ import { describe, expect, test } from 'bun:test' -import { anthropicToolsToOpenAI, anthropicToolChoiceToOpenAI } from '../openaiConvertTools.js' +import { + anthropicToolsToOpenAI, + anthropicToolChoiceToOpenAI, +} from '../openaiConvertTools.js' describe('anthropicToolsToOpenAI', () => { test('converts basic tool', () => { diff --git a/packages/@ant/model-provider/src/shared/__tests__/openaiStreamAdapter.test.ts b/packages/@ant/model-provider/src/shared/__tests__/openaiStreamAdapter.test.ts index 24d83b8d6..ba29ab7f5 100644 --- a/packages/@ant/model-provider/src/shared/__tests__/openaiStreamAdapter.test.ts +++ b/packages/@ant/model-provider/src/shared/__tests__/openaiStreamAdapter.test.ts @@ -3,7 +3,9 @@ import type { ChatCompletionChunk } from 'openai/resources/chat/completions/comp import { adaptOpenAIStreamToAnthropic } from '../openaiStreamAdapter.js' /** Helper to create a mock async iterable from chunk array */ -function mockStream(chunks: ChatCompletionChunk[]): AsyncIterable { +function mockStream( + chunks: ChatCompletionChunk[], +): AsyncIterable { return { [Symbol.asyncIterator]() { let i = 0 @@ -18,7 +20,9 @@ function mockStream(chunks: ChatCompletionChunk[]): AsyncIterable & any = {}): ChatCompletionChunk { +function makeChunk( + overrides: Partial & any = {}, +): ChatCompletionChunk { return { id: 'chatcmpl-test', object: 'chat.completion.chunk', @@ -32,7 +36,10 @@ function makeChunk(overrides: Partial & any = {}): ChatComp /** Collect all emitted Anthropic events from the stream adapter for assertion */ async function collectEvents(chunks: ChatCompletionChunk[]) { const events: any[] = [] - for await (const event of adaptOpenAIStreamToAnthropic(mockStream(chunks), 'gpt-4o')) { + for await (const event of adaptOpenAIStreamToAnthropic( + mockStream(chunks), + 'gpt-4o', + )) { events.push(event) } return events @@ -42,25 +49,31 @@ describe('adaptOpenAIStreamToAnthropic', () => { test('emits message_start on first chunk', async () => { const events = await collectEvents([ makeChunk({ - choices: [{ - index: 0, - delta: { role: 'assistant', content: '' }, - finish_reason: null, - }], + choices: [ + { + index: 0, + delta: { role: 'assistant', content: '' }, + finish_reason: null, + }, + ], }), makeChunk({ - choices: [{ - index: 0, - delta: { content: 'hello' }, - finish_reason: null, - }], + choices: [ + { + index: 0, + delta: { content: 'hello' }, + finish_reason: null, + }, + ], }), makeChunk({ - choices: [{ - index: 0, - delta: {}, - finish_reason: 'stop', - }], + choices: [ + { + index: 0, + delta: {}, + finish_reason: 'stop', + }, + ], usage: { prompt_tokens: 10, completion_tokens: 5, total_tokens: 15 }, }), ]) @@ -73,10 +86,14 @@ describe('adaptOpenAIStreamToAnthropic', () => { test('converts text content stream', async () => { const events = await collectEvents([ makeChunk({ - choices: [{ index: 0, delta: { content: 'Hello' }, finish_reason: null }], + choices: [ + { index: 0, delta: { content: 'Hello' }, finish_reason: null }, + ], }), makeChunk({ - choices: [{ index: 0, delta: { content: ' world' }, finish_reason: null }], + choices: [ + { index: 0, delta: { content: ' world' }, finish_reason: null }, + ], }), makeChunk({ choices: [{ index: 0, delta: {}, finish_reason: 'stop' }], @@ -91,7 +108,9 @@ describe('adaptOpenAIStreamToAnthropic', () => { expect(types).toContain('message_delta') expect(types).toContain('message_stop') - const textDeltas = events.filter(e => e.type === 'content_block_delta') as any[] + const textDeltas = events.filter( + e => e.type === 'content_block_delta', + ) as any[] expect(textDeltas[0].delta.text).toBe('Hello') expect(textDeltas[1].delta.text).toBe(' world') }) @@ -99,42 +118,54 @@ describe('adaptOpenAIStreamToAnthropic', () => { test('converts tool_calls stream', async () => { const events = await collectEvents([ makeChunk({ - choices: [{ - index: 0, - delta: { - tool_calls: [{ - index: 0, - id: 'call_abc', - type: 'function', - function: { name: 'bash', arguments: '' }, - }], + choices: [ + { + index: 0, + delta: { + tool_calls: [ + { + index: 0, + id: 'call_abc', + type: 'function', + function: { name: 'bash', arguments: '' }, + }, + ], + }, + finish_reason: null, }, - finish_reason: null, - }], + ], }), makeChunk({ - choices: [{ - index: 0, - delta: { - tool_calls: [{ - index: 0, - function: { arguments: '{"comm' }, - }], + choices: [ + { + index: 0, + delta: { + tool_calls: [ + { + index: 0, + function: { arguments: '{"comm' }, + }, + ], + }, + finish_reason: null, }, - finish_reason: null, - }], + ], }), makeChunk({ - choices: [{ - index: 0, - delta: { - tool_calls: [{ - index: 0, - function: { arguments: 'and":"ls"}' }, - }], + choices: [ + { + index: 0, + delta: { + tool_calls: [ + { + index: 0, + function: { arguments: 'and":"ls"}' }, + }, + ], + }, + finish_reason: null, }, - finish_reason: null, - }], + ], }), makeChunk({ choices: [{ index: 0, delta: {}, finish_reason: 'tool_calls' }], @@ -146,7 +177,8 @@ describe('adaptOpenAIStreamToAnthropic', () => { expect(blockStart.content_block.name).toBe('bash') const jsonDeltas = events.filter( - e => e.type === 'content_block_delta' && e.delta.type === 'input_json_delta', + e => + e.type === 'content_block_delta' && e.delta.type === 'input_json_delta', ) as any[] const fullArgs = jsonDeltas.map(d => d.delta.partial_json).join('') expect(fullArgs).toBe('{"command":"ls"}') @@ -171,13 +203,21 @@ describe('adaptOpenAIStreamToAnthropic', () => { // return finish_reason "stop" when they actually made tool calls. const events = await collectEvents([ makeChunk({ - choices: [{ - index: 0, - delta: { - tool_calls: [{ index: 0, id: 'call_1', function: { name: 'bash', arguments: '{"cmd":"ls"}' } }], + choices: [ + { + index: 0, + delta: { + tool_calls: [ + { + index: 0, + id: 'call_1', + function: { name: 'bash', arguments: '{"cmd":"ls"}' }, + }, + ], + }, + finish_reason: null, }, - finish_reason: null, - }], + ], }), makeChunk({ choices: [{ index: 0, delta: {}, finish_reason: 'stop' }], @@ -191,13 +231,21 @@ describe('adaptOpenAIStreamToAnthropic', () => { test('maps finish_reason tool_calls to tool_use', async () => { const events = await collectEvents([ makeChunk({ - choices: [{ - index: 0, - delta: { - tool_calls: [{ index: 0, id: 'call_1', function: { name: 'bash', arguments: '{}' } }], + choices: [ + { + index: 0, + delta: { + tool_calls: [ + { + index: 0, + id: 'call_1', + function: { name: 'bash', arguments: '{}' }, + }, + ], + }, + finish_reason: null, }, - finish_reason: null, - }], + ], }), makeChunk({ choices: [{ index: 0, delta: {}, finish_reason: 'tool_calls' }], @@ -211,7 +259,9 @@ describe('adaptOpenAIStreamToAnthropic', () => { test('maps finish_reason length to max_tokens', async () => { const events = await collectEvents([ makeChunk({ - choices: [{ index: 0, delta: { content: 'truncated' }, finish_reason: null }], + choices: [ + { index: 0, delta: { content: 'truncated' }, finish_reason: null }, + ], }), makeChunk({ choices: [{ index: 0, delta: {}, finish_reason: 'length' }], @@ -225,23 +275,35 @@ describe('adaptOpenAIStreamToAnthropic', () => { test('handles mixed text and tool_calls', async () => { const events = await collectEvents([ makeChunk({ - choices: [{ index: 0, delta: { content: 'Thinking...' }, finish_reason: null }], + choices: [ + { index: 0, delta: { content: 'Thinking...' }, finish_reason: null }, + ], }), makeChunk({ - choices: [{ - index: 0, - delta: { - tool_calls: [{ index: 0, id: 'call_1', function: { name: 'grep', arguments: '{"p":"test"}' } }], + choices: [ + { + index: 0, + delta: { + tool_calls: [ + { + index: 0, + id: 'call_1', + function: { name: 'grep', arguments: '{"p":"test"}' }, + }, + ], + }, + finish_reason: null, }, - finish_reason: null, - }], + ], }), makeChunk({ choices: [{ index: 0, delta: {}, finish_reason: 'tool_calls' }], }), ]) - const blockStarts = events.filter(e => e.type === 'content_block_start') as any[] + const blockStarts = events.filter( + e => e.type === 'content_block_start', + ) as any[] expect(blockStarts.length).toBe(2) expect(blockStarts[0].content_block.type).toBe('text') expect(blockStarts[1].content_block.type).toBe('tool_use') @@ -252,18 +314,22 @@ describe('thinking support (reasoning_content)', () => { test('converts reasoning_content to thinking block', async () => { const events = await collectEvents([ makeChunk({ - choices: [{ - index: 0, - delta: { reasoning_content: 'Let me analyze this...' }, - finish_reason: null, - }], + choices: [ + { + index: 0, + delta: { reasoning_content: 'Let me analyze this...' }, + finish_reason: null, + }, + ], }), makeChunk({ - choices: [{ - index: 0, - delta: { reasoning_content: ' step by step.' }, - finish_reason: null, - }], + choices: [ + { + index: 0, + delta: { reasoning_content: ' step by step.' }, + finish_reason: null, + }, + ], }), makeChunk({ choices: [{ index: 0, delta: {}, finish_reason: 'stop' }], @@ -277,7 +343,8 @@ describe('thinking support (reasoning_content)', () => { // Should have thinking_delta events const thinkingDeltas = events.filter( - e => e.type === 'content_block_delta' && e.delta.type === 'thinking_delta', + e => + e.type === 'content_block_delta' && e.delta.type === 'thinking_delta', ) as any[] expect(thinkingDeltas.length).toBe(2) expect(thinkingDeltas[0].delta.thinking).toBe('Let me analyze this...') @@ -287,18 +354,22 @@ describe('thinking support (reasoning_content)', () => { test('converts reasoning then content (DeepSeek-style)', async () => { const events = await collectEvents([ makeChunk({ - choices: [{ - index: 0, - delta: { reasoning_content: 'Thinking about the answer...' }, - finish_reason: null, - }], + choices: [ + { + index: 0, + delta: { reasoning_content: 'Thinking about the answer...' }, + finish_reason: null, + }, + ], }), makeChunk({ - choices: [{ - index: 0, - delta: { content: 'Here is my answer.' }, - finish_reason: null, - }], + choices: [ + { + index: 0, + delta: { content: 'Here is my answer.' }, + finish_reason: null, + }, + ], }), makeChunk({ choices: [{ index: 0, delta: {}, finish_reason: 'stop' }], @@ -306,13 +377,17 @@ describe('thinking support (reasoning_content)', () => { ]) // Should have two content blocks: thinking + text - const blockStarts = events.filter(e => e.type === 'content_block_start') as any[] + const blockStarts = events.filter( + e => e.type === 'content_block_start', + ) as any[] expect(blockStarts.length).toBe(2) expect(blockStarts[0].content_block.type).toBe('thinking') expect(blockStarts[1].content_block.type).toBe('text') // Thinking block should be closed before text block starts - const blockStops = events.filter(e => e.type === 'content_block_stop') as any[] + const blockStops = events.filter( + e => e.type === 'content_block_stop', + ) as any[] expect(blockStops[0].index).toBe(0) // thinking block closed at index 0 expect(blockStarts[1].index).toBe(1) // text block starts at index 1 @@ -326,27 +401,39 @@ describe('thinking support (reasoning_content)', () => { test('handles reasoning then tool_calls', async () => { const events = await collectEvents([ makeChunk({ - choices: [{ - index: 0, - delta: { reasoning_content: 'I need to run a command.' }, - finish_reason: null, - }], + choices: [ + { + index: 0, + delta: { reasoning_content: 'I need to run a command.' }, + finish_reason: null, + }, + ], }), makeChunk({ - choices: [{ - index: 0, - delta: { - tool_calls: [{ index: 0, id: 'call_1', function: { name: 'bash', arguments: '{"c":"ls"}' } }], + choices: [ + { + index: 0, + delta: { + tool_calls: [ + { + index: 0, + id: 'call_1', + function: { name: 'bash', arguments: '{"c":"ls"}' }, + }, + ], + }, + finish_reason: null, }, - finish_reason: null, - }], + ], }), makeChunk({ choices: [{ index: 0, delta: {}, finish_reason: 'tool_calls' }], }), ]) - const blockStarts = events.filter(e => e.type === 'content_block_start') as any[] + const blockStarts = events.filter( + e => e.type === 'content_block_start', + ) as any[] expect(blockStarts.length).toBe(2) expect(blockStarts[0].content_block.type).toBe('thinking') expect(blockStarts[1].content_block.type).toBe('tool_use') @@ -355,25 +442,31 @@ describe('thinking support (reasoning_content)', () => { test('thinking block index is 0, text block index is 1', async () => { const events = await collectEvents([ makeChunk({ - choices: [{ - index: 0, - delta: { reasoning_content: 'reason' }, - finish_reason: null, - }], + choices: [ + { + index: 0, + delta: { reasoning_content: 'reason' }, + finish_reason: null, + }, + ], }), makeChunk({ - choices: [{ - index: 0, - delta: { content: 'answer' }, - finish_reason: null, - }], + choices: [ + { + index: 0, + delta: { content: 'answer' }, + finish_reason: null, + }, + ], }), makeChunk({ choices: [{ index: 0, delta: {}, finish_reason: 'stop' }], }), ]) - const blockStarts = events.filter(e => e.type === 'content_block_start') as any[] + const blockStarts = events.filter( + e => e.type === 'content_block_start', + ) as any[] expect(blockStarts[0].index).toBe(0) expect(blockStarts[1].index).toBe(1) }) @@ -383,11 +476,13 @@ describe('prompt caching support', () => { test('maps cached_tokens to cache_read_input_tokens', async () => { const events = await collectEvents([ makeChunk({ - choices: [{ - index: 0, - delta: { content: 'hi' }, - finish_reason: null, - }], + choices: [ + { + index: 0, + delta: { content: 'hi' }, + finish_reason: null, + }, + ], usage: { prompt_tokens: 1000, completion_tokens: 0, @@ -463,7 +558,9 @@ describe('prompt caching support', () => { // emitted before the trailing chunk and always has input_tokens=0. const events = await collectEvents([ makeChunk({ - choices: [{ index: 0, delta: { content: 'hello' }, finish_reason: null }], + choices: [ + { index: 0, delta: { content: 'hello' }, finish_reason: null }, + ], }), // finish_reason chunk — usage not yet available makeChunk({ @@ -493,14 +590,20 @@ describe('prompt caching support', () => { // the autocompact threshold (~33k), so compaction never fires. const events = await collectEvents([ makeChunk({ - choices: [{ index: 0, delta: { content: 'answer' }, finish_reason: null }], + choices: [ + { index: 0, delta: { content: 'answer' }, finish_reason: null }, + ], }), makeChunk({ choices: [{ index: 0, delta: {}, finish_reason: 'stop' }], }), makeChunk({ choices: [], - usage: { prompt_tokens: 800, completion_tokens: 200, total_tokens: 1000 }, + usage: { + prompt_tokens: 800, + completion_tokens: 200, + total_tokens: 1000, + }, }), ]) @@ -514,13 +617,21 @@ describe('prompt caching support', () => { // when the model made tool calls and usage arrives in a trailing chunk. const events = await collectEvents([ makeChunk({ - choices: [{ - index: 0, - delta: { - tool_calls: [{ index: 0, id: 'call_x', function: { name: 'bash', arguments: '{"cmd":"ls"}' } }], + choices: [ + { + index: 0, + delta: { + tool_calls: [ + { + index: 0, + id: 'call_x', + function: { name: 'bash', arguments: '{"cmd":"ls"}' }, + }, + ], + }, + finish_reason: null, }, - finish_reason: null, - }], + ], }), makeChunk({ choices: [{ index: 0, delta: {}, finish_reason: 'tool_calls' }], @@ -540,9 +651,14 @@ describe('prompt caching support', () => { test('message_delta always comes before message_stop', async () => { // Verifies event ordering is preserved after deferring to post-loop emission. const events = await collectEvents([ - makeChunk({ choices: [{ index: 0, delta: { content: 'x' }, finish_reason: null }] }), + makeChunk({ + choices: [{ index: 0, delta: { content: 'x' }, finish_reason: null }], + }), makeChunk({ choices: [{ index: 0, delta: {}, finish_reason: 'stop' }] }), - makeChunk({ choices: [], usage: { prompt_tokens: 10, completion_tokens: 5, total_tokens: 15 } }), + makeChunk({ + choices: [], + usage: { prompt_tokens: 10, completion_tokens: 5, total_tokens: 15 }, + }), ]) const types = events.map(e => e.type) @@ -561,7 +677,9 @@ describe('prompt caching support', () => { // queryModelOpenAI's spread — even though cachedTokens was captured internally. const events = await collectEvents([ makeChunk({ - choices: [{ index: 0, delta: { content: 'answer' }, finish_reason: null }], + choices: [ + { index: 0, delta: { content: 'answer' }, finish_reason: null }, + ], }), makeChunk({ choices: [{ index: 0, delta: {}, finish_reason: 'stop' }], @@ -638,7 +756,9 @@ describe('prompt caching support', () => { // Some endpoints send usage in the finish_reason chunk instead of a trailing chunk. const events = await collectEvents([ makeChunk({ - choices: [{ index: 0, delta: { content: 'result' }, finish_reason: null }], + choices: [ + { index: 0, delta: { content: 'result' }, finish_reason: null }, + ], }), makeChunk({ choices: [{ index: 0, delta: {}, finish_reason: 'stop' }], diff --git a/packages/@ant/model-provider/src/types/message.ts b/packages/@ant/model-provider/src/types/message.ts index 1f6f15832..fe738d32c 100644 --- a/packages/@ant/model-provider/src/types/message.ts +++ b/packages/@ant/model-provider/src/types/message.ts @@ -13,7 +13,14 @@ import type { BetaUsage } from '@anthropic-ai/sdk/resources/beta/messages/messag * Individual message subtypes (UserMessage, AssistantMessage, etc.) extend * this with narrower `type` literals and additional fields. */ -export type MessageType = 'user' | 'assistant' | 'system' | 'attachment' | 'progress' | 'grouped_tool_use' | 'collapsed_read_search' +export type MessageType = + | 'user' + | 'assistant' + | 'system' + | 'attachment' + | 'progress' + | 'grouped_tool_use' + | 'collapsed_read_search' /** A single content element inside message.content arrays. */ export type ContentItem = ContentBlockParam | ContentBlock @@ -34,7 +41,14 @@ export type Message = { isCompactSummary?: boolean toolUseResult?: unknown isVisibleInTranscriptOnly?: boolean - attachment?: { type: string; toolUseID?: string; [key: string]: unknown; addedNames: string[]; addedLines: string[]; removedNames: string[] } + attachment?: { + type: string + toolUseID?: string + [key: string]: unknown + addedNames: string[] + addedLines: string[] + removedNames: string[] + } message?: { role?: string id?: string @@ -49,8 +63,12 @@ export type AssistantMessage = Message & { type: 'assistant' message: NonNullable } -export type AttachmentMessage = Message & { type: 'attachment'; attachment: T } -export type ProgressMessage = Message & { type: 'progress'; data: T } +export type AttachmentMessage = + Message & { type: 'attachment'; attachment: T } +export type ProgressMessage = Message & { + type: 'progress' + data: T +} export type SystemLocalCommandMessage = Message & { type: 'system' } export type SystemMessage = Message & { type: 'system' } export type UserMessage = Message & {