feat: enable Computer Use with macOS + Windows + Linux support

Phase 1: Replace @ant/computer-use-mcp stub (12 files, 6517 lines).

Phase 2: Remove 8 macOS-only guards in src/:
- main.tsx: remove getPlatform()==='macos' check
- swiftLoader.ts: remove darwin-only throw
- executor.ts: extend platform guard, clipboard dispatch, paste key
- drainRunLoop.ts: skip CFRunLoop pump on non-darwin
- escHotkey.ts: non-darwin returns false (Ctrl+C fallback)
- hostAdapter.ts: non-darwin permissions granted
- common.ts: dynamic platform + screenshotFiltering
- gates.ts: enabled:true, subscription check removed

Phase 3: Add Linux backends (xdotool/scrot/xrandr/wmctrl):
- computer-use-input/backends/linux.ts (173 lines)
- computer-use-swift/backends/linux.ts (278 lines)

Verified on Windows x64: mouse, screenshot, displays, foreground app.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
unraid
2026-04-03 22:33:00 +08:00
parent 465e9f01c6
commit e3264a1691
34 changed files with 8291 additions and 750 deletions

View File

@@ -0,0 +1,258 @@
/**
* macOS backend for computer-use-swift
*
* Uses AppleScript/JXA/screencapture for display info, app management,
* and screenshots.
*/
import { readFileSync, unlinkSync } from 'fs'
import { tmpdir } from 'os'
import { join } from 'path'
import type {
AppInfo, AppsAPI, DisplayAPI, DisplayGeometry, InstalledApp,
PrepareDisplayResult, RunningApp, ScreenshotAPI, ScreenshotResult,
SwiftBackend, WindowDisplayInfo,
} from '../types.js'
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
function jxaSync(script: string): string {
const result = Bun.spawnSync({
cmd: ['osascript', '-l', 'JavaScript', '-e', script],
stdout: 'pipe', stderr: 'pipe',
})
return new TextDecoder().decode(result.stdout).trim()
}
function osascriptSync(script: string): string {
const result = Bun.spawnSync({
cmd: ['osascript', '-e', script],
stdout: 'pipe', stderr: 'pipe',
})
return new TextDecoder().decode(result.stdout).trim()
}
async function osascript(script: string): Promise<string> {
const proc = Bun.spawn(['osascript', '-e', script], {
stdout: 'pipe', stderr: 'pipe',
})
const text = await new Response(proc.stdout).text()
await proc.exited
return text.trim()
}
async function jxa(script: string): Promise<string> {
const proc = Bun.spawn(['osascript', '-l', 'JavaScript', '-e', script], {
stdout: 'pipe', stderr: 'pipe',
})
const text = await new Response(proc.stdout).text()
await proc.exited
return text.trim()
}
// ---------------------------------------------------------------------------
// DisplayAPI
// ---------------------------------------------------------------------------
export const display: DisplayAPI = {
getSize(displayId?: number): DisplayGeometry {
const all = this.listAll()
if (displayId !== undefined) {
const found = all.find(d => d.displayId === displayId)
if (found) return found
}
return all[0] ?? { width: 1920, height: 1080, scaleFactor: 2, displayId: 1 }
},
listAll(): DisplayGeometry[] {
try {
const raw = jxaSync(`
ObjC.import("CoreGraphics");
var displays = $.CGDisplayCopyAllDisplayModes ? [] : [];
var active = $.CGGetActiveDisplayList(10, null, Ref());
var countRef = Ref();
$.CGGetActiveDisplayList(0, null, countRef);
var count = countRef[0];
var idBuf = Ref();
$.CGGetActiveDisplayList(count, idBuf, countRef);
var result = [];
for (var i = 0; i < count; i++) {
var did = idBuf[i];
var w = $.CGDisplayPixelsWide(did);
var h = $.CGDisplayPixelsHigh(did);
var mode = $.CGDisplayCopyDisplayMode(did);
var pw = $.CGDisplayModeGetPixelWidth(mode);
var sf = pw > 0 && w > 0 ? pw / w : 2;
result.push({width: w, height: h, scaleFactor: sf, displayId: did});
}
JSON.stringify(result);
`)
return (JSON.parse(raw) as DisplayGeometry[]).map(d => ({
width: Number(d.width), height: Number(d.height),
scaleFactor: Number(d.scaleFactor), displayId: Number(d.displayId),
}))
} catch {
try {
const raw = jxaSync(`
ObjC.import("AppKit");
var screens = $.NSScreen.screens;
var result = [];
for (var i = 0; i < screens.count; i++) {
var s = screens.objectAtIndex(i);
var frame = s.frame;
var desc = s.deviceDescription;
var screenNumber = desc.objectForKey($("NSScreenNumber")).intValue;
var backingFactor = s.backingScaleFactor;
result.push({
width: Math.round(frame.size.width),
height: Math.round(frame.size.height),
scaleFactor: backingFactor,
displayId: screenNumber
});
}
JSON.stringify(result);
`)
return (JSON.parse(raw) as DisplayGeometry[]).map(d => ({
width: Number(d.width), height: Number(d.height),
scaleFactor: Number(d.scaleFactor), displayId: Number(d.displayId),
}))
} catch {
return [{ width: 1920, height: 1080, scaleFactor: 2, displayId: 1 }]
}
}
},
}
// ---------------------------------------------------------------------------
// AppsAPI
// ---------------------------------------------------------------------------
export const apps: AppsAPI = {
async prepareDisplay(_allowlistBundleIds, _surrogateHost, _displayId) {
return { activated: '', hidden: [] }
},
async previewHideSet(_bundleIds, _displayId) {
return []
},
async findWindowDisplays(bundleIds) {
return bundleIds.map(bundleId => ({ bundleId, displayIds: [1] }))
},
async appUnderPoint(_x, _y) {
try {
const result = await jxa(`
ObjC.import("CoreGraphics");
ObjC.import("AppKit");
var pt = $.CGPointMake(${_x}, ${_y});
var app = $.NSWorkspace.sharedWorkspace.frontmostApplication;
JSON.stringify({bundleId: app.bundleIdentifier.js, displayName: app.localizedName.js});
`)
return JSON.parse(result)
} catch {
return null
}
},
async listInstalled() {
try {
const result = await osascript(`
tell application "System Events"
set appList to ""
repeat with appFile in (every file of folder "Applications" of startup disk whose name ends with ".app")
set appPath to POSIX path of (appFile as alias)
set appName to name of appFile
set appList to appList & appPath & "|" & appName & "\\n"
end repeat
return appList
end tell
`)
return result.split('\n').filter(Boolean).map(line => {
const [path, name] = line.split('|', 2)
const displayName = (name ?? '').replace(/\.app$/, '')
return {
bundleId: `com.app.${displayName.toLowerCase().replace(/\s+/g, '-')}`,
displayName,
path: path ?? '',
}
})
} catch {
return []
}
},
iconDataUrl(_path) {
return null
},
listRunning() {
try {
const raw = jxaSync(`
var apps = Application("System Events").applicationProcesses.whose({backgroundOnly: false});
var result = [];
for (var i = 0; i < apps.length; i++) {
try {
var a = apps[i];
result.push({bundleId: a.bundleIdentifier(), displayName: a.name()});
} catch(e) {}
}
JSON.stringify(result);
`)
return JSON.parse(raw)
} catch {
return []
}
},
async open(bundleId) {
await osascript(`tell application id "${bundleId}" to activate`)
},
async unhide(bundleIds) {
for (const bundleId of bundleIds) {
await osascript(`
tell application "System Events"
set visible of application process (name of application process whose bundle identifier is "${bundleId}") to true
end tell
`)
}
},
}
// ---------------------------------------------------------------------------
// ScreenshotAPI
// ---------------------------------------------------------------------------
async function captureScreenToBase64(args: string[]): Promise<{ base64: string; width: number; height: number }> {
const tmpFile = join(tmpdir(), `cu-screenshot-${Date.now()}.png`)
const proc = Bun.spawn(['screencapture', ...args, tmpFile], {
stdout: 'pipe', stderr: 'pipe',
})
await proc.exited
try {
const buf = readFileSync(tmpFile)
const base64 = buf.toString('base64')
const width = buf.readUInt32BE(16)
const height = buf.readUInt32BE(20)
return { base64, width, height }
} finally {
try { unlinkSync(tmpFile) } catch {}
}
}
export const screenshot: ScreenshotAPI = {
async captureExcluding(_allowedBundleIds, _quality, _targetW, _targetH, displayId) {
const args = ['-x']
if (displayId !== undefined) args.push('-D', String(displayId))
return captureScreenToBase64(args)
},
async captureRegion(_allowedBundleIds, x, y, w, h, _outW, _outH, _quality, displayId) {
const args = ['-x', '-R', `${x},${y},${w},${h}`]
if (displayId !== undefined) args.push('-D', String(displayId))
return captureScreenToBase64(args)
},
}