feat: enable Computer Use with macOS + Windows + Linux support

Phase 1: Replace @ant/computer-use-mcp stub (12 files, 6517 lines).

Phase 2: Remove 8 macOS-only guards in src/:
- main.tsx: remove getPlatform()==='macos' check
- swiftLoader.ts: remove darwin-only throw
- executor.ts: extend platform guard, clipboard dispatch, paste key
- drainRunLoop.ts: skip CFRunLoop pump on non-darwin
- escHotkey.ts: non-darwin returns false (Ctrl+C fallback)
- hostAdapter.ts: non-darwin permissions granted
- common.ts: dynamic platform + screenshotFiltering
- gates.ts: enabled:true, subscription check removed

Phase 3: Add Linux backends (xdotool/scrot/xrandr/wmctrl):
- computer-use-input/backends/linux.ts (173 lines)
- computer-use-swift/backends/linux.ts (278 lines)

Verified on Windows x64: mouse, screenshot, displays, foreground app.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
unraid
2026-04-03 22:33:00 +08:00
parent 465e9f01c6
commit e3264a1691
34 changed files with 8291 additions and 750 deletions

View File

@@ -0,0 +1,111 @@
export interface DisplayGeometry {
displayId: number
width: number
height: number
scaleFactor: number
originX: number
originY: number
}
export interface ScreenshotResult {
base64: string
width: number
height: number
displayWidth: number
displayHeight: number
originX: number
originY: number
displayId?: number
}
export interface FrontmostApp {
bundleId: string
displayName: string
}
export interface InstalledApp {
bundleId: string
displayName: string
path: string
iconDataUrl?: string
}
export interface RunningApp {
bundleId: string
displayName: string
pid?: number
}
export interface ResolvePrepareCaptureResult extends ScreenshotResult {
hidden: string[]
activated?: string
displayId: number
}
export interface ComputerExecutorCapabilities {
screenshotFiltering: 'native' | 'none'
platform: 'darwin' | 'win32'
hostBundleId: string
}
export interface ComputerExecutor {
capabilities: ComputerExecutorCapabilities
prepareForAction(
allowlistBundleIds: string[],
displayId?: number,
): Promise<string[]>
previewHideSet(
allowlistBundleIds: string[],
displayId?: number,
): Promise<Array<{ bundleId: string; displayName: string }>>
getDisplaySize(displayId?: number): Promise<DisplayGeometry>
listDisplays(): Promise<DisplayGeometry[]>
findWindowDisplays(
bundleIds: string[],
): Promise<Array<{ bundleId: string; displayIds: number[] }>>
resolvePrepareCapture(opts: {
allowedBundleIds: string[]
preferredDisplayId?: number
autoResolve: boolean
doHide?: boolean
}): Promise<ResolvePrepareCaptureResult>
screenshot(opts: {
allowedBundleIds: string[]
displayId?: number
}): Promise<ScreenshotResult>
zoom(
regionLogical: { x: number; y: number; w: number; h: number },
allowedBundleIds: string[],
displayId?: number,
): Promise<{ base64: string; width: number; height: number }>
key(keySequence: string, repeat?: number): Promise<void>
holdKey(keyNames: string[], durationMs: number): Promise<void>
type(text: string, opts: { viaClipboard: boolean }): Promise<void>
readClipboard(): Promise<string>
writeClipboard(text: string): Promise<void>
moveMouse(x: number, y: number): Promise<void>
click(
x: number,
y: number,
button: 'left' | 'right' | 'middle',
count: 1 | 2 | 3,
modifiers?: string[],
): Promise<void>
mouseDown(): Promise<void>
mouseUp(): Promise<void>
getCursorPosition(): Promise<{ x: number; y: number }>
drag(
from: { x: number; y: number } | undefined,
to: { x: number; y: number },
): Promise<void>
scroll(x: number, y: number, dx: number, dy: number): Promise<void>
getFrontmostApp(): Promise<FrontmostApp | null>
appUnderPoint(
x: number,
y: number,
): Promise<{ bundleId: string; displayName: string } | null>
listInstalledApps(): Promise<InstalledApp[]>
getAppIcon(path: string): Promise<string | undefined>
listRunningApps(): Promise<RunningApp[]>
openApp(bundleId: string): Promise<void>
}