feat: enable Computer Use with macOS + Windows + Linux support

Phase 1: Replace @ant/computer-use-mcp stub (12 files, 6517 lines).

Phase 2: Remove 8 macOS-only guards in src/:
- main.tsx: remove getPlatform()==='macos' check
- swiftLoader.ts: remove darwin-only throw
- executor.ts: extend platform guard, clipboard dispatch, paste key
- drainRunLoop.ts: skip CFRunLoop pump on non-darwin
- escHotkey.ts: non-darwin returns false (Ctrl+C fallback)
- hostAdapter.ts: non-darwin permissions granted
- common.ts: dynamic platform + screenshotFiltering
- gates.ts: enabled:true, subscription check removed

Phase 3: Add Linux backends (xdotool/scrot/xrandr/wmctrl):
- computer-use-input/backends/linux.ts (173 lines)
- computer-use-swift/backends/linux.ts (278 lines)

Verified on Windows x64: mouse, screenshot, displays, foreground app.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
unraid
2026-04-03 22:33:00 +08:00
parent 465e9f01c6
commit e3264a1691
34 changed files with 8291 additions and 750 deletions

View File

@@ -1,174 +1,73 @@
/**
* @ant/computer-use-input — macOS 键鼠模拟实现
* @ant/computer-use-input — cross-platform keyboard & mouse simulation
*
* 使用 macOS 原生工具实现:
* - AppleScript (osascript) — 应用信息、键盘输入
* - CGEvent via AppleScript-ObjC bridge — 鼠标操作、位置查询
* Platform backends:
* - darwin: AppleScript/JXA via CoreGraphics events
* - win32: PowerShell via Win32 P/Invoke (SetCursorPos, SendInput, keybd_event)
*
* 仅 macOS 支持。其他平台返回 { isSupported: false }
* Add new platforms by creating backends/<platform>.ts implementing InputBackend.
*/
import { $ } from 'bun'
import type { FrontmostAppInfo, InputBackend } from './types.js'
interface FrontmostAppInfo {
bundleId: string
appName: string
}
export type { FrontmostAppInfo, InputBackend } from './types.js'
// AppleScript key code mapping
const KEY_MAP: Record<string, number> = {
return: 36, enter: 36, tab: 48, space: 49, delete: 51, backspace: 51,
escape: 53, esc: 53,
left: 123, right: 124, down: 125, up: 126,
f1: 122, f2: 120, f3: 99, f4: 118, f5: 96, f6: 97,
f7: 98, f8: 100, f9: 101, f10: 109, f11: 103, f12: 111,
home: 115, end: 119, pageup: 116, pagedown: 121,
}
// ---------------------------------------------------------------------------
// Platform dispatch
// ---------------------------------------------------------------------------
const MODIFIER_MAP: Record<string, string> = {
command: 'command down', cmd: 'command down', meta: 'command down', super: 'command down',
shift: 'shift down',
option: 'option down', alt: 'option down',
control: 'control down', ctrl: 'control down',
}
async function osascript(script: string): Promise<string> {
const result = await $`osascript -e ${script}`.quiet().nothrow().text()
return result.trim()
}
async function jxa(script: string): Promise<string> {
const result = await $`osascript -l JavaScript -e ${script}`.quiet().nothrow().text()
return result.trim()
}
function jxaSync(script: string): string {
const result = Bun.spawnSync({
cmd: ['osascript', '-l', 'JavaScript', '-e', script],
stdout: 'pipe', stderr: 'pipe',
})
return new TextDecoder().decode(result.stdout).trim()
}
function buildMouseJxa(eventType: string, x: number, y: number, btn: number, clickState?: number): string {
let script = `ObjC.import("CoreGraphics"); var p = $.CGPointMake(${x},${y}); var e = $.CGEventCreateMouseEvent(null, $.${eventType}, p, ${btn});`
if (clickState !== undefined) {
script += ` $.CGEventSetIntegerValueField(e, $.kCGMouseEventClickState, ${clickState});`
}
script += ` $.CGEventPost($.kCGHIDEventTap, e);`
return script
}
// ---- Implementation functions ----
async function moveMouse(x: number, y: number, _animated: boolean): Promise<void> {
await jxa(buildMouseJxa('kCGEventMouseMoved', x, y, 0))
}
async function key(keyName: string, action: 'press' | 'release'): Promise<void> {
if (action === 'release') return
const lower = keyName.toLowerCase()
const keyCode = KEY_MAP[lower]
if (keyCode !== undefined) {
await osascript(`tell application "System Events" to key code ${keyCode}`)
} else {
await osascript(`tell application "System Events" to keystroke "${keyName.length === 1 ? keyName : lower}"`)
}
}
async function keys(parts: string[]): Promise<void> {
const modifiers: string[] = []
let finalKey: string | null = null
for (const part of parts) {
const mod = MODIFIER_MAP[part.toLowerCase()]
if (mod) modifiers.push(mod)
else finalKey = part
}
if (!finalKey) return
const lower = finalKey.toLowerCase()
const keyCode = KEY_MAP[lower]
const modStr = modifiers.length > 0 ? ` using {${modifiers.join(', ')}}` : ''
if (keyCode !== undefined) {
await osascript(`tell application "System Events" to key code ${keyCode}${modStr}`)
} else {
await osascript(`tell application "System Events" to keystroke "${finalKey.length === 1 ? finalKey : lower}"${modStr}`)
}
}
async function mouseLocation(): Promise<{ x: number; y: number }> {
const result = await jxa('ObjC.import("CoreGraphics"); var e = $.CGEventCreate(null); var p = $.CGEventGetLocation(e); p.x + "," + p.y')
const [xStr, yStr] = result.split(',')
return { x: Math.round(Number(xStr)), y: Math.round(Number(yStr)) }
}
async function mouseButton(
button: 'left' | 'right' | 'middle',
action: 'click' | 'press' | 'release',
count?: number,
): Promise<void> {
const pos = await mouseLocation()
const btn = button === 'left' ? 0 : button === 'right' ? 1 : 2
const downType = btn === 0 ? 'kCGEventLeftMouseDown' : btn === 1 ? 'kCGEventRightMouseDown' : 'kCGEventOtherMouseDown'
const upType = btn === 0 ? 'kCGEventLeftMouseUp' : btn === 1 ? 'kCGEventRightMouseUp' : 'kCGEventOtherMouseUp'
if (action === 'click') {
for (let i = 0; i < (count ?? 1); i++) {
await jxa(buildMouseJxa(downType, pos.x, pos.y, btn, i + 1))
await jxa(buildMouseJxa(upType, pos.x, pos.y, btn, i + 1))
}
} else if (action === 'press') {
await jxa(buildMouseJxa(downType, pos.x, pos.y, btn))
} else {
await jxa(buildMouseJxa(upType, pos.x, pos.y, btn))
}
}
async function mouseScroll(amount: number, direction: 'vertical' | 'horizontal'): Promise<void> {
const script = direction === 'vertical'
? `ObjC.import("CoreGraphics"); var e = $.CGEventCreateScrollWheelEvent(null, 0, 1, ${amount}); $.CGEventPost($.kCGHIDEventTap, e);`
: `ObjC.import("CoreGraphics"); var e = $.CGEventCreateScrollWheelEvent(null, 0, 2, 0, ${amount}); $.CGEventPost($.kCGHIDEventTap, e);`
await jxa(script)
}
async function typeText(text: string): Promise<void> {
const escaped = text.replace(/\\/g, '\\\\').replace(/"/g, '\\"')
await osascript(`tell application "System Events" to keystroke "${escaped}"`)
}
function getFrontmostAppInfo(): FrontmostAppInfo | null {
function loadBackend(): InputBackend | null {
try {
const result = Bun.spawnSync({
cmd: ['osascript', '-e', `
tell application "System Events"
set frontApp to first application process whose frontmost is true
set appName to name of frontApp
set bundleId to bundle identifier of frontApp
return bundleId & "|" & appName
end tell
`],
stdout: 'pipe',
stderr: 'pipe',
})
const output = new TextDecoder().decode(result.stdout).trim()
if (!output || !output.includes('|')) return null
const [bundleId, appName] = output.split('|', 2)
return { bundleId: bundleId!, appName: appName! }
switch (process.platform) {
case 'darwin':
return require('./backends/darwin.js') as InputBackend
case 'win32':
return require('./backends/win32.js') as InputBackend
case 'linux':
return require('./backends/linux.js') as InputBackend
default:
return null
}
} catch {
return null
}
}
// ---- Exports ----
const backend = loadBackend()
// ---------------------------------------------------------------------------
// Unsupported stub (throws on call — guards via isSupported check)
// ---------------------------------------------------------------------------
function unsupported(): never {
throw new Error(`computer-use-input is not supported on ${process.platform}`)
}
// ---------------------------------------------------------------------------
// Public API — matches the original export surface
// ---------------------------------------------------------------------------
export const isSupported = backend !== null
export const moveMouse = backend?.moveMouse ?? unsupported
export const key = backend?.key ?? unsupported
export const keys = backend?.keys ?? unsupported
export const mouseLocation = backend?.mouseLocation ?? unsupported
export const mouseButton = backend?.mouseButton ?? unsupported
export const mouseScroll = backend?.mouseScroll ?? unsupported
export const typeText = backend?.typeText ?? unsupported
export const getFrontmostAppInfo = backend?.getFrontmostAppInfo ?? (() => null)
// Legacy class type — used by inputLoader.ts for type narrowing
export class ComputerUseInputAPI {
declare moveMouse: (x: number, y: number, animated: boolean) => Promise<void>
declare key: (key: string, action: 'press' | 'release') => Promise<void>
declare keys: (parts: string[]) => Promise<void>
declare mouseLocation: () => Promise<{ x: number; y: number }>
declare mouseButton: (button: 'left' | 'right' | 'middle', action: 'click' | 'press' | 'release', count?: number) => Promise<void>
declare mouseScroll: (amount: number, direction: 'vertical' | 'horizontal') => Promise<void>
declare typeText: (text: string) => Promise<void>
declare getFrontmostAppInfo: () => FrontmostAppInfo | null
declare moveMouse: InputBackend['moveMouse']
declare key: InputBackend['key']
declare keys: InputBackend['keys']
declare mouseLocation: InputBackend['mouseLocation']
declare mouseButton: InputBackend['mouseButton']
declare mouseScroll: InputBackend['mouseScroll']
declare typeText: InputBackend['typeText']
declare getFrontmostAppInfo: InputBackend['getFrontmostAppInfo']
declare isSupported: true
}
@@ -177,7 +76,3 @@ interface ComputerUseInputUnsupported {
}
export type ComputerUseInput = ComputerUseInputAPI | ComputerUseInputUnsupported
// Plain object with all methods as own properties — compatible with require()
export const isSupported = process.platform === 'darwin'
export { moveMouse, key, keys, mouseLocation, mouseButton, mouseScroll, typeText, getFrontmostAppInfo }