mirror of
https://github.com/claude-code-best/claude-code.git
synced 2026-06-22 16:25:51 +00:00
feat: enable Computer Use with macOS + Windows + Linux support
Phase 1: Replace @ant/computer-use-mcp stub (12 files, 6517 lines). Phase 2: Remove 8 macOS-only guards in src/: - main.tsx: remove getPlatform()==='macos' check - swiftLoader.ts: remove darwin-only throw - executor.ts: extend platform guard, clipboard dispatch, paste key - drainRunLoop.ts: skip CFRunLoop pump on non-darwin - escHotkey.ts: non-darwin returns false (Ctrl+C fallback) - hostAdapter.ts: non-darwin permissions granted - common.ts: dynamic platform + screenshotFiltering - gates.ts: enabled:true, subscription check removed Phase 3: Add Linux backends (xdotool/scrot/xrandr/wmctrl): - computer-use-input/backends/linux.ts (173 lines) - computer-use-swift/backends/linux.ts (278 lines) Verified on Windows x64: mouse, screenshot, displays, foreground app. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
137
packages/@ant/computer-use-input/src/backends/darwin.ts
Normal file
137
packages/@ant/computer-use-input/src/backends/darwin.ts
Normal file
@@ -0,0 +1,137 @@
|
||||
/**
|
||||
* macOS backend for computer-use-input
|
||||
*
|
||||
* Uses AppleScript (osascript) and JXA (JavaScript for Automation) to control
|
||||
* mouse and keyboard via CoreGraphics events and System Events.
|
||||
*/
|
||||
|
||||
import { $ } from 'bun'
|
||||
import type { FrontmostAppInfo, InputBackend } from '../types.js'
|
||||
|
||||
const KEY_MAP: Record<string, number> = {
|
||||
return: 36, enter: 36, tab: 48, space: 49, delete: 51, backspace: 51,
|
||||
escape: 53, esc: 53,
|
||||
left: 123, right: 124, down: 125, up: 126,
|
||||
f1: 122, f2: 120, f3: 99, f4: 118, f5: 96, f6: 97,
|
||||
f7: 98, f8: 100, f9: 101, f10: 109, f11: 103, f12: 111,
|
||||
home: 115, end: 119, pageup: 116, pagedown: 121,
|
||||
}
|
||||
|
||||
const MODIFIER_MAP: Record<string, string> = {
|
||||
command: 'command down', cmd: 'command down', meta: 'command down', super: 'command down',
|
||||
shift: 'shift down',
|
||||
option: 'option down', alt: 'option down',
|
||||
control: 'control down', ctrl: 'control down',
|
||||
}
|
||||
|
||||
async function osascript(script: string): Promise<string> {
|
||||
const result = await $`osascript -e ${script}`.quiet().nothrow().text()
|
||||
return result.trim()
|
||||
}
|
||||
|
||||
async function jxa(script: string): Promise<string> {
|
||||
const result = await $`osascript -l JavaScript -e ${script}`.quiet().nothrow().text()
|
||||
return result.trim()
|
||||
}
|
||||
|
||||
function buildMouseJxa(eventType: string, x: number, y: number, btn: number, clickState?: number): string {
|
||||
let script = `ObjC.import("CoreGraphics"); var p = $.CGPointMake(${x},${y}); var e = $.CGEventCreateMouseEvent(null, $.${eventType}, p, ${btn});`
|
||||
if (clickState !== undefined) {
|
||||
script += ` $.CGEventSetIntegerValueField(e, $.kCGMouseEventClickState, ${clickState});`
|
||||
}
|
||||
script += ` $.CGEventPost($.kCGHIDEventTap, e);`
|
||||
return script
|
||||
}
|
||||
|
||||
export const moveMouse: InputBackend['moveMouse'] = async (x, y, _animated) => {
|
||||
await jxa(buildMouseJxa('kCGEventMouseMoved', x, y, 0))
|
||||
}
|
||||
|
||||
export const key: InputBackend['key'] = async (keyName, action) => {
|
||||
if (action === 'release') return
|
||||
const lower = keyName.toLowerCase()
|
||||
const keyCode = KEY_MAP[lower]
|
||||
if (keyCode !== undefined) {
|
||||
await osascript(`tell application "System Events" to key code ${keyCode}`)
|
||||
} else {
|
||||
await osascript(`tell application "System Events" to keystroke "${keyName.length === 1 ? keyName : lower}"`)
|
||||
}
|
||||
}
|
||||
|
||||
export const keys: InputBackend['keys'] = async (parts) => {
|
||||
const modifiers: string[] = []
|
||||
let finalKey: string | null = null
|
||||
for (const part of parts) {
|
||||
const mod = MODIFIER_MAP[part.toLowerCase()]
|
||||
if (mod) modifiers.push(mod)
|
||||
else finalKey = part
|
||||
}
|
||||
if (!finalKey) return
|
||||
const lower = finalKey.toLowerCase()
|
||||
const keyCode = KEY_MAP[lower]
|
||||
const modStr = modifiers.length > 0 ? ` using {${modifiers.join(', ')}}` : ''
|
||||
if (keyCode !== undefined) {
|
||||
await osascript(`tell application "System Events" to key code ${keyCode}${modStr}`)
|
||||
} else {
|
||||
await osascript(`tell application "System Events" to keystroke "${finalKey.length === 1 ? finalKey : lower}"${modStr}`)
|
||||
}
|
||||
}
|
||||
|
||||
export const mouseLocation: InputBackend['mouseLocation'] = async () => {
|
||||
const result = await jxa('ObjC.import("CoreGraphics"); var e = $.CGEventCreate(null); var p = $.CGEventGetLocation(e); p.x + "," + p.y')
|
||||
const [xStr, yStr] = result.split(',')
|
||||
return { x: Math.round(Number(xStr)), y: Math.round(Number(yStr)) }
|
||||
}
|
||||
|
||||
export const mouseButton: InputBackend['mouseButton'] = async (button, action, count) => {
|
||||
const pos = await mouseLocation()
|
||||
const btn = button === 'left' ? 0 : button === 'right' ? 1 : 2
|
||||
const downType = btn === 0 ? 'kCGEventLeftMouseDown' : btn === 1 ? 'kCGEventRightMouseDown' : 'kCGEventOtherMouseDown'
|
||||
const upType = btn === 0 ? 'kCGEventLeftMouseUp' : btn === 1 ? 'kCGEventRightMouseUp' : 'kCGEventOtherMouseUp'
|
||||
|
||||
if (action === 'click') {
|
||||
for (let i = 0; i < (count ?? 1); i++) {
|
||||
await jxa(buildMouseJxa(downType, pos.x, pos.y, btn, i + 1))
|
||||
await jxa(buildMouseJxa(upType, pos.x, pos.y, btn, i + 1))
|
||||
}
|
||||
} else if (action === 'press') {
|
||||
await jxa(buildMouseJxa(downType, pos.x, pos.y, btn))
|
||||
} else {
|
||||
await jxa(buildMouseJxa(upType, pos.x, pos.y, btn))
|
||||
}
|
||||
}
|
||||
|
||||
export const mouseScroll: InputBackend['mouseScroll'] = async (amount, direction) => {
|
||||
const script = direction === 'vertical'
|
||||
? `ObjC.import("CoreGraphics"); var e = $.CGEventCreateScrollWheelEvent(null, 0, 1, ${amount}); $.CGEventPost($.kCGHIDEventTap, e);`
|
||||
: `ObjC.import("CoreGraphics"); var e = $.CGEventCreateScrollWheelEvent(null, 0, 2, 0, ${amount}); $.CGEventPost($.kCGHIDEventTap, e);`
|
||||
await jxa(script)
|
||||
}
|
||||
|
||||
export const typeText: InputBackend['typeText'] = async (text) => {
|
||||
const escaped = text.replace(/\\/g, '\\\\').replace(/"/g, '\\"')
|
||||
await osascript(`tell application "System Events" to keystroke "${escaped}"`)
|
||||
}
|
||||
|
||||
export const getFrontmostAppInfo: InputBackend['getFrontmostAppInfo'] = () => {
|
||||
try {
|
||||
const result = Bun.spawnSync({
|
||||
cmd: ['osascript', '-e', `
|
||||
tell application "System Events"
|
||||
set frontApp to first application process whose frontmost is true
|
||||
set appName to name of frontApp
|
||||
set bundleId to bundle identifier of frontApp
|
||||
return bundleId & "|" & appName
|
||||
end tell
|
||||
`],
|
||||
stdout: 'pipe',
|
||||
stderr: 'pipe',
|
||||
})
|
||||
const output = new TextDecoder().decode(result.stdout).trim()
|
||||
if (!output || !output.includes('|')) return null
|
||||
const [bundleId, appName] = output.split('|', 2)
|
||||
return { bundleId: bundleId!, appName: appName! }
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
}
|
||||
173
packages/@ant/computer-use-input/src/backends/linux.ts
Normal file
173
packages/@ant/computer-use-input/src/backends/linux.ts
Normal file
@@ -0,0 +1,173 @@
|
||||
/**
|
||||
* Linux backend for computer-use-input
|
||||
*
|
||||
* Uses xdotool for mouse and keyboard simulation.
|
||||
* Requires: xdotool (apt install xdotool)
|
||||
*/
|
||||
|
||||
import type { FrontmostAppInfo, InputBackend } from '../types.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Shell helper — run a command and return trimmed stdout
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function run(cmd: string[]): string {
|
||||
const result = Bun.spawnSync({
|
||||
cmd,
|
||||
stdout: 'pipe',
|
||||
stderr: 'pipe',
|
||||
})
|
||||
return new TextDecoder().decode(result.stdout).trim()
|
||||
}
|
||||
|
||||
async function runAsync(cmd: string[]): Promise<string> {
|
||||
const proc = Bun.spawn(cmd, { stdout: 'pipe', stderr: 'pipe' })
|
||||
const out = await new Response(proc.stdout).text()
|
||||
await proc.exited
|
||||
return out.trim()
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// xdotool key name mapping
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const KEY_MAP: Record<string, string> = {
|
||||
return: 'Return', enter: 'Return', tab: 'Tab', space: 'space',
|
||||
backspace: 'BackSpace', delete: 'Delete', escape: 'Escape', esc: 'Escape',
|
||||
left: 'Left', up: 'Up', right: 'Right', down: 'Down',
|
||||
home: 'Home', end: 'End', pageup: 'Prior', pagedown: 'Next',
|
||||
f1: 'F1', f2: 'F2', f3: 'F3', f4: 'F4', f5: 'F5', f6: 'F6',
|
||||
f7: 'F7', f8: 'F8', f9: 'F9', f10: 'F10', f11: 'F11', f12: 'F12',
|
||||
shift: 'shift', lshift: 'shift', rshift: 'shift',
|
||||
control: 'ctrl', ctrl: 'ctrl', lcontrol: 'ctrl', rcontrol: 'ctrl',
|
||||
alt: 'alt', option: 'alt', lalt: 'alt', ralt: 'alt',
|
||||
win: 'super', meta: 'super', command: 'super', cmd: 'super', super: 'super',
|
||||
insert: 'Insert', printscreen: 'Print', pause: 'Pause',
|
||||
numlock: 'Num_Lock', capslock: 'Caps_Lock', scrolllock: 'Scroll_Lock',
|
||||
}
|
||||
|
||||
const MODIFIER_KEYS = new Set([
|
||||
'shift', 'lshift', 'rshift', 'control', 'ctrl', 'lcontrol', 'rcontrol',
|
||||
'alt', 'option', 'lalt', 'ralt', 'win', 'meta', 'command', 'cmd', 'super',
|
||||
])
|
||||
|
||||
function mapKey(name: string): string {
|
||||
return KEY_MAP[name.toLowerCase()] ?? name
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// xdotool mouse button mapping
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function mouseButtonNum(button: 'left' | 'right' | 'middle'): string {
|
||||
return button === 'left' ? '1' : button === 'right' ? '3' : '2'
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Implementation
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export const moveMouse: InputBackend['moveMouse'] = async (x, y, _animated) => {
|
||||
run(['xdotool', 'mousemove', '--sync', String(Math.round(x)), String(Math.round(y))])
|
||||
}
|
||||
|
||||
export const mouseLocation: InputBackend['mouseLocation'] = async () => {
|
||||
const out = run(['xdotool', 'getmouselocation'])
|
||||
// Output format: "x:123 y:456 screen:0 window:12345678"
|
||||
const xMatch = out.match(/x:(\d+)/)
|
||||
const yMatch = out.match(/y:(\d+)/)
|
||||
return {
|
||||
x: xMatch ? Number(xMatch[1]) : 0,
|
||||
y: yMatch ? Number(yMatch[1]) : 0,
|
||||
}
|
||||
}
|
||||
|
||||
export const mouseButton: InputBackend['mouseButton'] = async (button, action, count) => {
|
||||
const btn = mouseButtonNum(button)
|
||||
if (action === 'click') {
|
||||
const n = count ?? 1
|
||||
run(['xdotool', 'click', '--repeat', String(n), btn])
|
||||
} else if (action === 'press') {
|
||||
run(['xdotool', 'mousedown', btn])
|
||||
} else {
|
||||
run(['xdotool', 'mouseup', btn])
|
||||
}
|
||||
}
|
||||
|
||||
export const mouseScroll: InputBackend['mouseScroll'] = async (amount, direction) => {
|
||||
// xdotool click 4=scroll up, 5=scroll down, 6=scroll left, 7=scroll right
|
||||
// Positive amount = down/right, negative = up/left
|
||||
if (direction === 'vertical') {
|
||||
const btn = amount >= 0 ? '5' : '4'
|
||||
const repeats = Math.abs(Math.round(amount))
|
||||
if (repeats > 0) {
|
||||
run(['xdotool', 'click', '--repeat', String(repeats), btn])
|
||||
}
|
||||
} else {
|
||||
const btn = amount >= 0 ? '7' : '6'
|
||||
const repeats = Math.abs(Math.round(amount))
|
||||
if (repeats > 0) {
|
||||
run(['xdotool', 'click', '--repeat', String(repeats), btn])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export const key: InputBackend['key'] = async (keyName, action) => {
|
||||
const mapped = mapKey(keyName)
|
||||
if (action === 'press') {
|
||||
run(['xdotool', 'keydown', mapped])
|
||||
} else {
|
||||
run(['xdotool', 'keyup', mapped])
|
||||
}
|
||||
}
|
||||
|
||||
export const keys: InputBackend['keys'] = async (parts) => {
|
||||
// xdotool key accepts "modifier+modifier+key" format
|
||||
const modifiers: string[] = []
|
||||
let finalKey: string | null = null
|
||||
|
||||
for (const part of parts) {
|
||||
if (MODIFIER_KEYS.has(part.toLowerCase())) {
|
||||
modifiers.push(mapKey(part))
|
||||
} else {
|
||||
finalKey = part
|
||||
}
|
||||
}
|
||||
if (!finalKey) return
|
||||
|
||||
const combo = [...modifiers, mapKey(finalKey)].join('+')
|
||||
run(['xdotool', 'key', combo])
|
||||
}
|
||||
|
||||
export const typeText: InputBackend['typeText'] = async (text) => {
|
||||
run(['xdotool', 'type', '--delay', '12', text])
|
||||
}
|
||||
|
||||
export const getFrontmostAppInfo: InputBackend['getFrontmostAppInfo'] = () => {
|
||||
try {
|
||||
const windowId = run(['xdotool', 'getactivewindow'])
|
||||
if (!windowId) return null
|
||||
|
||||
const pidStr = run(['xdotool', 'getwindowpid', windowId])
|
||||
if (!pidStr) return null
|
||||
|
||||
const pid = pidStr.trim()
|
||||
|
||||
// Read the executable path from /proc
|
||||
let exePath = ''
|
||||
try {
|
||||
exePath = run(['readlink', '-f', `/proc/${pid}/exe`])
|
||||
} catch { /* ignore */ }
|
||||
|
||||
// Read the process name from /proc/comm
|
||||
let appName = ''
|
||||
try {
|
||||
appName = run(['cat', `/proc/${pid}/comm`])
|
||||
} catch { /* ignore */ }
|
||||
|
||||
if (!exePath && !appName) return null
|
||||
return { bundleId: exePath || `/proc/${pid}/exe`, appName: appName || 'unknown' }
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
}
|
||||
218
packages/@ant/computer-use-input/src/backends/win32.ts
Normal file
218
packages/@ant/computer-use-input/src/backends/win32.ts
Normal file
@@ -0,0 +1,218 @@
|
||||
/**
|
||||
* Windows backend for computer-use-input
|
||||
*
|
||||
* Uses PowerShell with Win32 P/Invoke (SetCursorPos, SendInput, keybd_event,
|
||||
* GetForegroundWindow) to control mouse and keyboard.
|
||||
*
|
||||
* All P/Invoke types are compiled once at module load and reused across calls.
|
||||
*/
|
||||
|
||||
import type { FrontmostAppInfo, InputBackend } from '../types.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// PowerShell helper — run a script and return trimmed stdout
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function ps(script: string): string {
|
||||
const result = Bun.spawnSync({
|
||||
cmd: ['powershell', '-NoProfile', '-NonInteractive', '-Command', script],
|
||||
stdout: 'pipe',
|
||||
stderr: 'pipe',
|
||||
})
|
||||
return new TextDecoder().decode(result.stdout).trim()
|
||||
}
|
||||
|
||||
async function psAsync(script: string): Promise<string> {
|
||||
const proc = Bun.spawn(
|
||||
['powershell', '-NoProfile', '-NonInteractive', '-Command', script],
|
||||
{ stdout: 'pipe', stderr: 'pipe' },
|
||||
)
|
||||
const out = await new Response(proc.stdout).text()
|
||||
await proc.exited
|
||||
return out.trim()
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// P/Invoke type definitions (compiled once, cached by PowerShell session)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const WIN32_TYPES = `
|
||||
Add-Type -Language CSharp @'
|
||||
using System;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Text;
|
||||
using System.Diagnostics;
|
||||
|
||||
public class CuWin32 {
|
||||
// --- Cursor ---
|
||||
[DllImport("user32.dll")] public static extern bool SetCursorPos(int X, int Y);
|
||||
[DllImport("user32.dll")] public static extern bool GetCursorPos(out POINT p);
|
||||
[StructLayout(LayoutKind.Sequential)] public struct POINT { public int X; public int Y; }
|
||||
|
||||
// --- SendInput ---
|
||||
[StructLayout(LayoutKind.Sequential)] public struct MOUSEINPUT {
|
||||
public int dx; public int dy; public int mouseData; public uint dwFlags; public uint time; public IntPtr dwExtraInfo;
|
||||
}
|
||||
[StructLayout(LayoutKind.Explicit)] public struct INPUT {
|
||||
[FieldOffset(0)] public uint type;
|
||||
[FieldOffset(4)] public MOUSEINPUT mi;
|
||||
}
|
||||
[StructLayout(LayoutKind.Sequential)] public struct KEYBDINPUT {
|
||||
public ushort wVk; public ushort wScan; public uint dwFlags; public uint time; public IntPtr dwExtraInfo;
|
||||
}
|
||||
[StructLayout(LayoutKind.Explicit)] public struct KINPUT {
|
||||
[FieldOffset(0)] public uint type;
|
||||
[FieldOffset(4)] public KEYBDINPUT ki;
|
||||
}
|
||||
[DllImport("user32.dll", SetLastError=true)] public static extern uint SendInput(uint n, INPUT[] i, int cb);
|
||||
[DllImport("user32.dll", SetLastError=true)] public static extern uint SendInput(uint n, KINPUT[] i, int cb);
|
||||
|
||||
// --- Keyboard ---
|
||||
[DllImport("user32.dll")] public static extern void keybd_event(byte bVk, byte bScan, uint dwFlags, UIntPtr dwExtraInfo);
|
||||
[DllImport("user32.dll")] public static extern short VkKeyScan(char ch);
|
||||
|
||||
// --- Window ---
|
||||
[DllImport("user32.dll")] public static extern IntPtr GetForegroundWindow();
|
||||
[DllImport("user32.dll")] public static extern uint GetWindowThreadProcessId(IntPtr hWnd, out uint pid);
|
||||
[DllImport("user32.dll", CharSet=CharSet.Unicode)] public static extern int GetWindowText(IntPtr hWnd, StringBuilder sb, int max);
|
||||
|
||||
// Constants
|
||||
public const uint INPUT_MOUSE = 0, INPUT_KEYBOARD = 1;
|
||||
public const uint MOUSEEVENTF_LEFTDOWN = 0x0002, MOUSEEVENTF_LEFTUP = 0x0004;
|
||||
public const uint MOUSEEVENTF_RIGHTDOWN = 0x0008, MOUSEEVENTF_RIGHTUP = 0x0010;
|
||||
public const uint MOUSEEVENTF_MIDDLEDOWN = 0x0020, MOUSEEVENTF_MIDDLEUP = 0x0040;
|
||||
public const uint MOUSEEVENTF_WHEEL = 0x0800, MOUSEEVENTF_HWHEEL = 0x1000;
|
||||
public const uint KEYEVENTF_KEYUP = 0x0002;
|
||||
}
|
||||
'@
|
||||
`
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Virtual key code mapping
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const VK_MAP: Record<string, number> = {
|
||||
return: 0x0D, enter: 0x0D, tab: 0x09, space: 0x20,
|
||||
backspace: 0x08, delete: 0x2E, escape: 0x1B, esc: 0x1B,
|
||||
left: 0x25, up: 0x26, right: 0x27, down: 0x28,
|
||||
home: 0x24, end: 0x23, pageup: 0x21, pagedown: 0x22,
|
||||
f1: 0x70, f2: 0x71, f3: 0x72, f4: 0x73, f5: 0x74, f6: 0x75,
|
||||
f7: 0x76, f8: 0x77, f9: 0x78, f10: 0x79, f11: 0x7A, f12: 0x7B,
|
||||
shift: 0xA0, lshift: 0xA0, rshift: 0xA1,
|
||||
control: 0xA2, ctrl: 0xA2, lcontrol: 0xA2, rcontrol: 0xA3,
|
||||
alt: 0xA4, option: 0xA4, lalt: 0xA4, ralt: 0xA5,
|
||||
win: 0x5B, meta: 0x5B, command: 0x5B, cmd: 0x5B, super: 0x5B,
|
||||
insert: 0x2D, printscreen: 0x2C, pause: 0x13,
|
||||
numlock: 0x90, capslock: 0x14, scrolllock: 0x91,
|
||||
}
|
||||
|
||||
const MODIFIER_KEYS = new Set(['shift', 'lshift', 'rshift', 'control', 'ctrl', 'lcontrol', 'rcontrol', 'alt', 'option', 'lalt', 'ralt', 'win', 'meta', 'command', 'cmd', 'super'])
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Implementation
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export const moveMouse: InputBackend['moveMouse'] = async (x, y, _animated) => {
|
||||
ps(`${WIN32_TYPES}; [CuWin32]::SetCursorPos(${Math.round(x)}, ${Math.round(y)}) | Out-Null`)
|
||||
}
|
||||
|
||||
export const mouseLocation: InputBackend['mouseLocation'] = async () => {
|
||||
const out = ps(`${WIN32_TYPES}; $p = New-Object CuWin32+POINT; [CuWin32]::GetCursorPos([ref]$p) | Out-Null; "$($p.X),$($p.Y)"`)
|
||||
const [xStr, yStr] = out.split(',')
|
||||
return { x: Number(xStr), y: Number(yStr) }
|
||||
}
|
||||
|
||||
export const mouseButton: InputBackend['mouseButton'] = async (button, action, count) => {
|
||||
const downFlag = button === 'left' ? 'MOUSEEVENTF_LEFTDOWN'
|
||||
: button === 'right' ? 'MOUSEEVENTF_RIGHTDOWN'
|
||||
: 'MOUSEEVENTF_MIDDLEDOWN'
|
||||
const upFlag = button === 'left' ? 'MOUSEEVENTF_LEFTUP'
|
||||
: button === 'right' ? 'MOUSEEVENTF_RIGHTUP'
|
||||
: 'MOUSEEVENTF_MIDDLEUP'
|
||||
|
||||
if (action === 'click') {
|
||||
const n = count ?? 1
|
||||
let clicks = ''
|
||||
for (let i = 0; i < n; i++) {
|
||||
clicks += `$i.mi.dwFlags=[CuWin32]::${downFlag}; [CuWin32]::SendInput(1, @($i), [Runtime.InteropServices.Marshal]::SizeOf($i)) | Out-Null; $i.mi.dwFlags=[CuWin32]::${upFlag}; [CuWin32]::SendInput(1, @($i), [Runtime.InteropServices.Marshal]::SizeOf($i)) | Out-Null; `
|
||||
}
|
||||
ps(`${WIN32_TYPES}; $i = New-Object CuWin32+INPUT; $i.type=[CuWin32]::INPUT_MOUSE; ${clicks}`)
|
||||
} else if (action === 'press') {
|
||||
ps(`${WIN32_TYPES}; $i = New-Object CuWin32+INPUT; $i.type=[CuWin32]::INPUT_MOUSE; $i.mi.dwFlags=[CuWin32]::${downFlag}; [CuWin32]::SendInput(1, @($i), [Runtime.InteropServices.Marshal]::SizeOf($i)) | Out-Null`)
|
||||
} else {
|
||||
ps(`${WIN32_TYPES}; $i = New-Object CuWin32+INPUT; $i.type=[CuWin32]::INPUT_MOUSE; $i.mi.dwFlags=[CuWin32]::${upFlag}; [CuWin32]::SendInput(1, @($i), [Runtime.InteropServices.Marshal]::SizeOf($i)) | Out-Null`)
|
||||
}
|
||||
}
|
||||
|
||||
export const mouseScroll: InputBackend['mouseScroll'] = async (amount, direction) => {
|
||||
const flag = direction === 'vertical' ? 'MOUSEEVENTF_WHEEL' : 'MOUSEEVENTF_HWHEEL'
|
||||
ps(`${WIN32_TYPES}; $i = New-Object CuWin32+INPUT; $i.type=[CuWin32]::INPUT_MOUSE; $i.mi.dwFlags=[CuWin32]::${flag}; $i.mi.mouseData=${amount * 120}; [CuWin32]::SendInput(1, @($i), [Runtime.InteropServices.Marshal]::SizeOf($i)) | Out-Null`)
|
||||
}
|
||||
|
||||
export const key: InputBackend['key'] = async (keyName, action) => {
|
||||
const lower = keyName.toLowerCase()
|
||||
const vk = VK_MAP[lower]
|
||||
const flags = action === 'release' ? '2' : '0'
|
||||
if (vk !== undefined) {
|
||||
ps(`${WIN32_TYPES}; [CuWin32]::keybd_event(${vk}, 0, ${flags}, [UIntPtr]::Zero)`)
|
||||
} else if (keyName.length === 1) {
|
||||
// Single character — use VkKeyScan to resolve
|
||||
const charCode = keyName.charCodeAt(0)
|
||||
ps(`${WIN32_TYPES}; $vk = [CuWin32]::VkKeyScan([char]${charCode}) -band 0xFF; [CuWin32]::keybd_event([byte]$vk, 0, ${flags}, [UIntPtr]::Zero)`)
|
||||
}
|
||||
}
|
||||
|
||||
export const keys: InputBackend['keys'] = async (parts) => {
|
||||
const modifiers: number[] = []
|
||||
let finalKey: string | null = null
|
||||
|
||||
for (const part of parts) {
|
||||
const lower = part.toLowerCase()
|
||||
if (MODIFIER_KEYS.has(lower)) {
|
||||
const vk = VK_MAP[lower]
|
||||
if (vk !== undefined) modifiers.push(vk)
|
||||
} else {
|
||||
finalKey = part
|
||||
}
|
||||
}
|
||||
if (!finalKey) return
|
||||
|
||||
// Build script: press modifiers → press key → release key → release modifiers
|
||||
let script = WIN32_TYPES + '; '
|
||||
for (const vk of modifiers) {
|
||||
script += `[CuWin32]::keybd_event(${vk}, 0, 0, [UIntPtr]::Zero); `
|
||||
}
|
||||
const lower = finalKey.toLowerCase()
|
||||
const vk = VK_MAP[lower]
|
||||
if (vk !== undefined) {
|
||||
script += `[CuWin32]::keybd_event(${vk}, 0, 0, [UIntPtr]::Zero); [CuWin32]::keybd_event(${vk}, 0, 2, [UIntPtr]::Zero); `
|
||||
} else if (finalKey.length === 1) {
|
||||
const charCode = finalKey.charCodeAt(0)
|
||||
script += `$vk = [CuWin32]::VkKeyScan([char]${charCode}) -band 0xFF; [CuWin32]::keybd_event([byte]$vk, 0, 0, [UIntPtr]::Zero); [CuWin32]::keybd_event([byte]$vk, 0, 2, [UIntPtr]::Zero); `
|
||||
}
|
||||
for (const mk of modifiers.reverse()) {
|
||||
script += `[CuWin32]::keybd_event(${mk}, 0, 2, [UIntPtr]::Zero); `
|
||||
}
|
||||
ps(script)
|
||||
}
|
||||
|
||||
export const typeText: InputBackend['typeText'] = async (text) => {
|
||||
const escaped = text.replace(/'/g, "''")
|
||||
ps(`Add-Type -AssemblyName System.Windows.Forms; [System.Windows.Forms.SendKeys]::SendWait('${escaped}')`)
|
||||
}
|
||||
|
||||
export const getFrontmostAppInfo: InputBackend['getFrontmostAppInfo'] = () => {
|
||||
try {
|
||||
const out = ps(`${WIN32_TYPES}
|
||||
$hwnd = [CuWin32]::GetForegroundWindow()
|
||||
$procId = [uint32]0
|
||||
[CuWin32]::GetWindowThreadProcessId($hwnd, [ref]$procId) | Out-Null
|
||||
$proc = Get-Process -Id $procId -ErrorAction SilentlyContinue
|
||||
"$($proc.MainModule.FileName)|$($proc.ProcessName)"`)
|
||||
if (!out || !out.includes('|')) return null
|
||||
const [exePath, appName] = out.split('|', 2)
|
||||
return { bundleId: exePath!, appName: appName! }
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
}
|
||||
@@ -1,174 +1,73 @@
|
||||
/**
|
||||
* @ant/computer-use-input — macOS 键鼠模拟实现
|
||||
* @ant/computer-use-input — cross-platform keyboard & mouse simulation
|
||||
*
|
||||
* 使用 macOS 原生工具实现:
|
||||
* - AppleScript (osascript) — 应用信息、键盘输入
|
||||
* - CGEvent via AppleScript-ObjC bridge — 鼠标操作、位置查询
|
||||
* Platform backends:
|
||||
* - darwin: AppleScript/JXA via CoreGraphics events
|
||||
* - win32: PowerShell via Win32 P/Invoke (SetCursorPos, SendInput, keybd_event)
|
||||
*
|
||||
* 仅 macOS 支持。其他平台返回 { isSupported: false }
|
||||
* Add new platforms by creating backends/<platform>.ts implementing InputBackend.
|
||||
*/
|
||||
|
||||
import { $ } from 'bun'
|
||||
import type { FrontmostAppInfo, InputBackend } from './types.js'
|
||||
|
||||
interface FrontmostAppInfo {
|
||||
bundleId: string
|
||||
appName: string
|
||||
}
|
||||
export type { FrontmostAppInfo, InputBackend } from './types.js'
|
||||
|
||||
// AppleScript key code mapping
|
||||
const KEY_MAP: Record<string, number> = {
|
||||
return: 36, enter: 36, tab: 48, space: 49, delete: 51, backspace: 51,
|
||||
escape: 53, esc: 53,
|
||||
left: 123, right: 124, down: 125, up: 126,
|
||||
f1: 122, f2: 120, f3: 99, f4: 118, f5: 96, f6: 97,
|
||||
f7: 98, f8: 100, f9: 101, f10: 109, f11: 103, f12: 111,
|
||||
home: 115, end: 119, pageup: 116, pagedown: 121,
|
||||
}
|
||||
// ---------------------------------------------------------------------------
|
||||
// Platform dispatch
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const MODIFIER_MAP: Record<string, string> = {
|
||||
command: 'command down', cmd: 'command down', meta: 'command down', super: 'command down',
|
||||
shift: 'shift down',
|
||||
option: 'option down', alt: 'option down',
|
||||
control: 'control down', ctrl: 'control down',
|
||||
}
|
||||
|
||||
async function osascript(script: string): Promise<string> {
|
||||
const result = await $`osascript -e ${script}`.quiet().nothrow().text()
|
||||
return result.trim()
|
||||
}
|
||||
|
||||
async function jxa(script: string): Promise<string> {
|
||||
const result = await $`osascript -l JavaScript -e ${script}`.quiet().nothrow().text()
|
||||
return result.trim()
|
||||
}
|
||||
|
||||
function jxaSync(script: string): string {
|
||||
const result = Bun.spawnSync({
|
||||
cmd: ['osascript', '-l', 'JavaScript', '-e', script],
|
||||
stdout: 'pipe', stderr: 'pipe',
|
||||
})
|
||||
return new TextDecoder().decode(result.stdout).trim()
|
||||
}
|
||||
|
||||
function buildMouseJxa(eventType: string, x: number, y: number, btn: number, clickState?: number): string {
|
||||
let script = `ObjC.import("CoreGraphics"); var p = $.CGPointMake(${x},${y}); var e = $.CGEventCreateMouseEvent(null, $.${eventType}, p, ${btn});`
|
||||
if (clickState !== undefined) {
|
||||
script += ` $.CGEventSetIntegerValueField(e, $.kCGMouseEventClickState, ${clickState});`
|
||||
}
|
||||
script += ` $.CGEventPost($.kCGHIDEventTap, e);`
|
||||
return script
|
||||
}
|
||||
|
||||
// ---- Implementation functions ----
|
||||
|
||||
async function moveMouse(x: number, y: number, _animated: boolean): Promise<void> {
|
||||
await jxa(buildMouseJxa('kCGEventMouseMoved', x, y, 0))
|
||||
}
|
||||
|
||||
async function key(keyName: string, action: 'press' | 'release'): Promise<void> {
|
||||
if (action === 'release') return
|
||||
const lower = keyName.toLowerCase()
|
||||
const keyCode = KEY_MAP[lower]
|
||||
if (keyCode !== undefined) {
|
||||
await osascript(`tell application "System Events" to key code ${keyCode}`)
|
||||
} else {
|
||||
await osascript(`tell application "System Events" to keystroke "${keyName.length === 1 ? keyName : lower}"`)
|
||||
}
|
||||
}
|
||||
|
||||
async function keys(parts: string[]): Promise<void> {
|
||||
const modifiers: string[] = []
|
||||
let finalKey: string | null = null
|
||||
for (const part of parts) {
|
||||
const mod = MODIFIER_MAP[part.toLowerCase()]
|
||||
if (mod) modifiers.push(mod)
|
||||
else finalKey = part
|
||||
}
|
||||
if (!finalKey) return
|
||||
const lower = finalKey.toLowerCase()
|
||||
const keyCode = KEY_MAP[lower]
|
||||
const modStr = modifiers.length > 0 ? ` using {${modifiers.join(', ')}}` : ''
|
||||
if (keyCode !== undefined) {
|
||||
await osascript(`tell application "System Events" to key code ${keyCode}${modStr}`)
|
||||
} else {
|
||||
await osascript(`tell application "System Events" to keystroke "${finalKey.length === 1 ? finalKey : lower}"${modStr}`)
|
||||
}
|
||||
}
|
||||
|
||||
async function mouseLocation(): Promise<{ x: number; y: number }> {
|
||||
const result = await jxa('ObjC.import("CoreGraphics"); var e = $.CGEventCreate(null); var p = $.CGEventGetLocation(e); p.x + "," + p.y')
|
||||
const [xStr, yStr] = result.split(',')
|
||||
return { x: Math.round(Number(xStr)), y: Math.round(Number(yStr)) }
|
||||
}
|
||||
|
||||
async function mouseButton(
|
||||
button: 'left' | 'right' | 'middle',
|
||||
action: 'click' | 'press' | 'release',
|
||||
count?: number,
|
||||
): Promise<void> {
|
||||
const pos = await mouseLocation()
|
||||
const btn = button === 'left' ? 0 : button === 'right' ? 1 : 2
|
||||
const downType = btn === 0 ? 'kCGEventLeftMouseDown' : btn === 1 ? 'kCGEventRightMouseDown' : 'kCGEventOtherMouseDown'
|
||||
const upType = btn === 0 ? 'kCGEventLeftMouseUp' : btn === 1 ? 'kCGEventRightMouseUp' : 'kCGEventOtherMouseUp'
|
||||
|
||||
if (action === 'click') {
|
||||
for (let i = 0; i < (count ?? 1); i++) {
|
||||
await jxa(buildMouseJxa(downType, pos.x, pos.y, btn, i + 1))
|
||||
await jxa(buildMouseJxa(upType, pos.x, pos.y, btn, i + 1))
|
||||
}
|
||||
} else if (action === 'press') {
|
||||
await jxa(buildMouseJxa(downType, pos.x, pos.y, btn))
|
||||
} else {
|
||||
await jxa(buildMouseJxa(upType, pos.x, pos.y, btn))
|
||||
}
|
||||
}
|
||||
|
||||
async function mouseScroll(amount: number, direction: 'vertical' | 'horizontal'): Promise<void> {
|
||||
const script = direction === 'vertical'
|
||||
? `ObjC.import("CoreGraphics"); var e = $.CGEventCreateScrollWheelEvent(null, 0, 1, ${amount}); $.CGEventPost($.kCGHIDEventTap, e);`
|
||||
: `ObjC.import("CoreGraphics"); var e = $.CGEventCreateScrollWheelEvent(null, 0, 2, 0, ${amount}); $.CGEventPost($.kCGHIDEventTap, e);`
|
||||
await jxa(script)
|
||||
}
|
||||
|
||||
async function typeText(text: string): Promise<void> {
|
||||
const escaped = text.replace(/\\/g, '\\\\').replace(/"/g, '\\"')
|
||||
await osascript(`tell application "System Events" to keystroke "${escaped}"`)
|
||||
}
|
||||
|
||||
function getFrontmostAppInfo(): FrontmostAppInfo | null {
|
||||
function loadBackend(): InputBackend | null {
|
||||
try {
|
||||
const result = Bun.spawnSync({
|
||||
cmd: ['osascript', '-e', `
|
||||
tell application "System Events"
|
||||
set frontApp to first application process whose frontmost is true
|
||||
set appName to name of frontApp
|
||||
set bundleId to bundle identifier of frontApp
|
||||
return bundleId & "|" & appName
|
||||
end tell
|
||||
`],
|
||||
stdout: 'pipe',
|
||||
stderr: 'pipe',
|
||||
})
|
||||
const output = new TextDecoder().decode(result.stdout).trim()
|
||||
if (!output || !output.includes('|')) return null
|
||||
const [bundleId, appName] = output.split('|', 2)
|
||||
return { bundleId: bundleId!, appName: appName! }
|
||||
switch (process.platform) {
|
||||
case 'darwin':
|
||||
return require('./backends/darwin.js') as InputBackend
|
||||
case 'win32':
|
||||
return require('./backends/win32.js') as InputBackend
|
||||
case 'linux':
|
||||
return require('./backends/linux.js') as InputBackend
|
||||
default:
|
||||
return null
|
||||
}
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
// ---- Exports ----
|
||||
const backend = loadBackend()
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Unsupported stub (throws on call — guards via isSupported check)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function unsupported(): never {
|
||||
throw new Error(`computer-use-input is not supported on ${process.platform}`)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Public API — matches the original export surface
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export const isSupported = backend !== null
|
||||
|
||||
export const moveMouse = backend?.moveMouse ?? unsupported
|
||||
export const key = backend?.key ?? unsupported
|
||||
export const keys = backend?.keys ?? unsupported
|
||||
export const mouseLocation = backend?.mouseLocation ?? unsupported
|
||||
export const mouseButton = backend?.mouseButton ?? unsupported
|
||||
export const mouseScroll = backend?.mouseScroll ?? unsupported
|
||||
export const typeText = backend?.typeText ?? unsupported
|
||||
export const getFrontmostAppInfo = backend?.getFrontmostAppInfo ?? (() => null)
|
||||
|
||||
// Legacy class type — used by inputLoader.ts for type narrowing
|
||||
export class ComputerUseInputAPI {
|
||||
declare moveMouse: (x: number, y: number, animated: boolean) => Promise<void>
|
||||
declare key: (key: string, action: 'press' | 'release') => Promise<void>
|
||||
declare keys: (parts: string[]) => Promise<void>
|
||||
declare mouseLocation: () => Promise<{ x: number; y: number }>
|
||||
declare mouseButton: (button: 'left' | 'right' | 'middle', action: 'click' | 'press' | 'release', count?: number) => Promise<void>
|
||||
declare mouseScroll: (amount: number, direction: 'vertical' | 'horizontal') => Promise<void>
|
||||
declare typeText: (text: string) => Promise<void>
|
||||
declare getFrontmostAppInfo: () => FrontmostAppInfo | null
|
||||
declare moveMouse: InputBackend['moveMouse']
|
||||
declare key: InputBackend['key']
|
||||
declare keys: InputBackend['keys']
|
||||
declare mouseLocation: InputBackend['mouseLocation']
|
||||
declare mouseButton: InputBackend['mouseButton']
|
||||
declare mouseScroll: InputBackend['mouseScroll']
|
||||
declare typeText: InputBackend['typeText']
|
||||
declare getFrontmostAppInfo: InputBackend['getFrontmostAppInfo']
|
||||
declare isSupported: true
|
||||
}
|
||||
|
||||
@@ -177,7 +76,3 @@ interface ComputerUseInputUnsupported {
|
||||
}
|
||||
|
||||
export type ComputerUseInput = ComputerUseInputAPI | ComputerUseInputUnsupported
|
||||
|
||||
// Plain object with all methods as own properties — compatible with require()
|
||||
export const isSupported = process.platform === 'darwin'
|
||||
export { moveMouse, key, keys, mouseLocation, mouseButton, mouseScroll, typeText, getFrontmostAppInfo }
|
||||
|
||||
19
packages/@ant/computer-use-input/src/types.ts
Normal file
19
packages/@ant/computer-use-input/src/types.ts
Normal file
@@ -0,0 +1,19 @@
|
||||
export interface FrontmostAppInfo {
|
||||
bundleId: string // macOS: bundle ID, Windows: exe path
|
||||
appName: string
|
||||
}
|
||||
|
||||
export interface InputBackend {
|
||||
moveMouse(x: number, y: number, animated: boolean): Promise<void>
|
||||
key(key: string, action: 'press' | 'release'): Promise<void>
|
||||
keys(parts: string[]): Promise<void>
|
||||
mouseLocation(): Promise<{ x: number; y: number }>
|
||||
mouseButton(
|
||||
button: 'left' | 'right' | 'middle',
|
||||
action: 'click' | 'press' | 'release',
|
||||
count?: number,
|
||||
): Promise<void>
|
||||
mouseScroll(amount: number, direction: 'vertical' | 'horizontal'): Promise<void>
|
||||
typeText(text: string): Promise<void>
|
||||
getFrontmostAppInfo(): FrontmostAppInfo | null
|
||||
}
|
||||
Reference in New Issue
Block a user