mirror of
https://github.com/claude-code-best/claude-code.git
synced 2026-06-23 08:45:50 +00:00
feat: enable Computer Use with macOS + Windows + Linux support
Phase 1: Replace @ant/computer-use-mcp stub (12 files, 6517 lines). Phase 2: Remove 8 macOS-only guards in src/: - main.tsx: remove getPlatform()==='macos' check - swiftLoader.ts: remove darwin-only throw - executor.ts: extend platform guard, clipboard dispatch, paste key - drainRunLoop.ts: skip CFRunLoop pump on non-darwin - escHotkey.ts: non-darwin returns false (Ctrl+C fallback) - hostAdapter.ts: non-darwin permissions granted - common.ts: dynamic platform + screenshotFiltering - gates.ts: enabled:true, subscription check removed Phase 3: Add Linux backends (xdotool/scrot/xrandr/wmctrl): - computer-use-input/backends/linux.ts (173 lines) - computer-use-swift/backends/linux.ts (278 lines) Verified on Windows x64: mouse, screenshot, displays, foreground app. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
258
packages/@ant/computer-use-swift/src/backends/darwin.ts
Normal file
258
packages/@ant/computer-use-swift/src/backends/darwin.ts
Normal file
@@ -0,0 +1,258 @@
|
||||
/**
|
||||
* macOS backend for computer-use-swift
|
||||
*
|
||||
* Uses AppleScript/JXA/screencapture for display info, app management,
|
||||
* and screenshots.
|
||||
*/
|
||||
|
||||
import { readFileSync, unlinkSync } from 'fs'
|
||||
import { tmpdir } from 'os'
|
||||
import { join } from 'path'
|
||||
import type {
|
||||
AppInfo, AppsAPI, DisplayAPI, DisplayGeometry, InstalledApp,
|
||||
PrepareDisplayResult, RunningApp, ScreenshotAPI, ScreenshotResult,
|
||||
SwiftBackend, WindowDisplayInfo,
|
||||
} from '../types.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function jxaSync(script: string): string {
|
||||
const result = Bun.spawnSync({
|
||||
cmd: ['osascript', '-l', 'JavaScript', '-e', script],
|
||||
stdout: 'pipe', stderr: 'pipe',
|
||||
})
|
||||
return new TextDecoder().decode(result.stdout).trim()
|
||||
}
|
||||
|
||||
function osascriptSync(script: string): string {
|
||||
const result = Bun.spawnSync({
|
||||
cmd: ['osascript', '-e', script],
|
||||
stdout: 'pipe', stderr: 'pipe',
|
||||
})
|
||||
return new TextDecoder().decode(result.stdout).trim()
|
||||
}
|
||||
|
||||
async function osascript(script: string): Promise<string> {
|
||||
const proc = Bun.spawn(['osascript', '-e', script], {
|
||||
stdout: 'pipe', stderr: 'pipe',
|
||||
})
|
||||
const text = await new Response(proc.stdout).text()
|
||||
await proc.exited
|
||||
return text.trim()
|
||||
}
|
||||
|
||||
async function jxa(script: string): Promise<string> {
|
||||
const proc = Bun.spawn(['osascript', '-l', 'JavaScript', '-e', script], {
|
||||
stdout: 'pipe', stderr: 'pipe',
|
||||
})
|
||||
const text = await new Response(proc.stdout).text()
|
||||
await proc.exited
|
||||
return text.trim()
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// DisplayAPI
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export const display: DisplayAPI = {
|
||||
getSize(displayId?: number): DisplayGeometry {
|
||||
const all = this.listAll()
|
||||
if (displayId !== undefined) {
|
||||
const found = all.find(d => d.displayId === displayId)
|
||||
if (found) return found
|
||||
}
|
||||
return all[0] ?? { width: 1920, height: 1080, scaleFactor: 2, displayId: 1 }
|
||||
},
|
||||
|
||||
listAll(): DisplayGeometry[] {
|
||||
try {
|
||||
const raw = jxaSync(`
|
||||
ObjC.import("CoreGraphics");
|
||||
var displays = $.CGDisplayCopyAllDisplayModes ? [] : [];
|
||||
var active = $.CGGetActiveDisplayList(10, null, Ref());
|
||||
var countRef = Ref();
|
||||
$.CGGetActiveDisplayList(0, null, countRef);
|
||||
var count = countRef[0];
|
||||
var idBuf = Ref();
|
||||
$.CGGetActiveDisplayList(count, idBuf, countRef);
|
||||
var result = [];
|
||||
for (var i = 0; i < count; i++) {
|
||||
var did = idBuf[i];
|
||||
var w = $.CGDisplayPixelsWide(did);
|
||||
var h = $.CGDisplayPixelsHigh(did);
|
||||
var mode = $.CGDisplayCopyDisplayMode(did);
|
||||
var pw = $.CGDisplayModeGetPixelWidth(mode);
|
||||
var sf = pw > 0 && w > 0 ? pw / w : 2;
|
||||
result.push({width: w, height: h, scaleFactor: sf, displayId: did});
|
||||
}
|
||||
JSON.stringify(result);
|
||||
`)
|
||||
return (JSON.parse(raw) as DisplayGeometry[]).map(d => ({
|
||||
width: Number(d.width), height: Number(d.height),
|
||||
scaleFactor: Number(d.scaleFactor), displayId: Number(d.displayId),
|
||||
}))
|
||||
} catch {
|
||||
try {
|
||||
const raw = jxaSync(`
|
||||
ObjC.import("AppKit");
|
||||
var screens = $.NSScreen.screens;
|
||||
var result = [];
|
||||
for (var i = 0; i < screens.count; i++) {
|
||||
var s = screens.objectAtIndex(i);
|
||||
var frame = s.frame;
|
||||
var desc = s.deviceDescription;
|
||||
var screenNumber = desc.objectForKey($("NSScreenNumber")).intValue;
|
||||
var backingFactor = s.backingScaleFactor;
|
||||
result.push({
|
||||
width: Math.round(frame.size.width),
|
||||
height: Math.round(frame.size.height),
|
||||
scaleFactor: backingFactor,
|
||||
displayId: screenNumber
|
||||
});
|
||||
}
|
||||
JSON.stringify(result);
|
||||
`)
|
||||
return (JSON.parse(raw) as DisplayGeometry[]).map(d => ({
|
||||
width: Number(d.width), height: Number(d.height),
|
||||
scaleFactor: Number(d.scaleFactor), displayId: Number(d.displayId),
|
||||
}))
|
||||
} catch {
|
||||
return [{ width: 1920, height: 1080, scaleFactor: 2, displayId: 1 }]
|
||||
}
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// AppsAPI
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export const apps: AppsAPI = {
|
||||
async prepareDisplay(_allowlistBundleIds, _surrogateHost, _displayId) {
|
||||
return { activated: '', hidden: [] }
|
||||
},
|
||||
|
||||
async previewHideSet(_bundleIds, _displayId) {
|
||||
return []
|
||||
},
|
||||
|
||||
async findWindowDisplays(bundleIds) {
|
||||
return bundleIds.map(bundleId => ({ bundleId, displayIds: [1] }))
|
||||
},
|
||||
|
||||
async appUnderPoint(_x, _y) {
|
||||
try {
|
||||
const result = await jxa(`
|
||||
ObjC.import("CoreGraphics");
|
||||
ObjC.import("AppKit");
|
||||
var pt = $.CGPointMake(${_x}, ${_y});
|
||||
var app = $.NSWorkspace.sharedWorkspace.frontmostApplication;
|
||||
JSON.stringify({bundleId: app.bundleIdentifier.js, displayName: app.localizedName.js});
|
||||
`)
|
||||
return JSON.parse(result)
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
},
|
||||
|
||||
async listInstalled() {
|
||||
try {
|
||||
const result = await osascript(`
|
||||
tell application "System Events"
|
||||
set appList to ""
|
||||
repeat with appFile in (every file of folder "Applications" of startup disk whose name ends with ".app")
|
||||
set appPath to POSIX path of (appFile as alias)
|
||||
set appName to name of appFile
|
||||
set appList to appList & appPath & "|" & appName & "\\n"
|
||||
end repeat
|
||||
return appList
|
||||
end tell
|
||||
`)
|
||||
return result.split('\n').filter(Boolean).map(line => {
|
||||
const [path, name] = line.split('|', 2)
|
||||
const displayName = (name ?? '').replace(/\.app$/, '')
|
||||
return {
|
||||
bundleId: `com.app.${displayName.toLowerCase().replace(/\s+/g, '-')}`,
|
||||
displayName,
|
||||
path: path ?? '',
|
||||
}
|
||||
})
|
||||
} catch {
|
||||
return []
|
||||
}
|
||||
},
|
||||
|
||||
iconDataUrl(_path) {
|
||||
return null
|
||||
},
|
||||
|
||||
listRunning() {
|
||||
try {
|
||||
const raw = jxaSync(`
|
||||
var apps = Application("System Events").applicationProcesses.whose({backgroundOnly: false});
|
||||
var result = [];
|
||||
for (var i = 0; i < apps.length; i++) {
|
||||
try {
|
||||
var a = apps[i];
|
||||
result.push({bundleId: a.bundleIdentifier(), displayName: a.name()});
|
||||
} catch(e) {}
|
||||
}
|
||||
JSON.stringify(result);
|
||||
`)
|
||||
return JSON.parse(raw)
|
||||
} catch {
|
||||
return []
|
||||
}
|
||||
},
|
||||
|
||||
async open(bundleId) {
|
||||
await osascript(`tell application id "${bundleId}" to activate`)
|
||||
},
|
||||
|
||||
async unhide(bundleIds) {
|
||||
for (const bundleId of bundleIds) {
|
||||
await osascript(`
|
||||
tell application "System Events"
|
||||
set visible of application process (name of application process whose bundle identifier is "${bundleId}") to true
|
||||
end tell
|
||||
`)
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// ScreenshotAPI
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
async function captureScreenToBase64(args: string[]): Promise<{ base64: string; width: number; height: number }> {
|
||||
const tmpFile = join(tmpdir(), `cu-screenshot-${Date.now()}.png`)
|
||||
const proc = Bun.spawn(['screencapture', ...args, tmpFile], {
|
||||
stdout: 'pipe', stderr: 'pipe',
|
||||
})
|
||||
await proc.exited
|
||||
try {
|
||||
const buf = readFileSync(tmpFile)
|
||||
const base64 = buf.toString('base64')
|
||||
const width = buf.readUInt32BE(16)
|
||||
const height = buf.readUInt32BE(20)
|
||||
return { base64, width, height }
|
||||
} finally {
|
||||
try { unlinkSync(tmpFile) } catch {}
|
||||
}
|
||||
}
|
||||
|
||||
export const screenshot: ScreenshotAPI = {
|
||||
async captureExcluding(_allowedBundleIds, _quality, _targetW, _targetH, displayId) {
|
||||
const args = ['-x']
|
||||
if (displayId !== undefined) args.push('-D', String(displayId))
|
||||
return captureScreenToBase64(args)
|
||||
},
|
||||
|
||||
async captureRegion(_allowedBundleIds, x, y, w, h, _outW, _outH, _quality, displayId) {
|
||||
const args = ['-x', '-R', `${x},${y},${w},${h}`]
|
||||
if (displayId !== undefined) args.push('-D', String(displayId))
|
||||
return captureScreenToBase64(args)
|
||||
},
|
||||
}
|
||||
278
packages/@ant/computer-use-swift/src/backends/linux.ts
Normal file
278
packages/@ant/computer-use-swift/src/backends/linux.ts
Normal file
@@ -0,0 +1,278 @@
|
||||
/**
|
||||
* Linux backend for computer-use-swift
|
||||
*
|
||||
* Uses xrandr for display info, scrot for screenshots,
|
||||
* wmctrl/xdotool for window management, and xdg-open for launching apps.
|
||||
*
|
||||
* Requires: xrandr, scrot, xdotool, wmctrl (optional)
|
||||
*/
|
||||
|
||||
import type {
|
||||
AppInfo, AppsAPI, DisplayAPI, DisplayGeometry, InstalledApp,
|
||||
PrepareDisplayResult, RunningApp, ScreenshotAPI, ScreenshotResult,
|
||||
SwiftBackend, WindowDisplayInfo,
|
||||
} from '../types.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Shell helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function run(cmd: string[]): string {
|
||||
const result = Bun.spawnSync({
|
||||
cmd,
|
||||
stdout: 'pipe',
|
||||
stderr: 'pipe',
|
||||
})
|
||||
return new TextDecoder().decode(result.stdout).trim()
|
||||
}
|
||||
|
||||
async function runAsync(cmd: string[]): Promise<string> {
|
||||
const proc = Bun.spawn(cmd, { stdout: 'pipe', stderr: 'pipe' })
|
||||
const out = await new Response(proc.stdout).text()
|
||||
await proc.exited
|
||||
return out.trim()
|
||||
}
|
||||
|
||||
function commandExists(name: string): boolean {
|
||||
const result = Bun.spawnSync({ cmd: ['which', name], stdout: 'pipe', stderr: 'pipe' })
|
||||
return result.exitCode === 0
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// DisplayAPI
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export const display: DisplayAPI = {
|
||||
getSize(displayId?: number): DisplayGeometry {
|
||||
const all = this.listAll()
|
||||
if (displayId !== undefined) {
|
||||
const found = all.find(d => d.displayId === displayId)
|
||||
if (found) return found
|
||||
}
|
||||
return all[0] ?? { width: 1920, height: 1080, scaleFactor: 1, displayId: 0 }
|
||||
},
|
||||
|
||||
listAll(): DisplayGeometry[] {
|
||||
try {
|
||||
const raw = run(['xrandr', '--query'])
|
||||
const displays: DisplayGeometry[] = []
|
||||
let idx = 0
|
||||
|
||||
// Match lines like: "HDMI-1 connected 1920x1080+0+0" or "eDP-1 connected primary 2560x1440+0+0"
|
||||
const regex = /^\S+\s+connected\s+(?:primary\s+)?(\d+)x(\d+)\+\d+\+\d+/gm
|
||||
let match: RegExpExecArray | null
|
||||
while ((match = regex.exec(raw)) !== null) {
|
||||
displays.push({
|
||||
width: Number(match[1]),
|
||||
height: Number(match[2]),
|
||||
scaleFactor: 1,
|
||||
displayId: idx++,
|
||||
})
|
||||
}
|
||||
|
||||
if (displays.length === 0) {
|
||||
return [{ width: 1920, height: 1080, scaleFactor: 1, displayId: 0 }]
|
||||
}
|
||||
return displays
|
||||
} catch {
|
||||
return [{ width: 1920, height: 1080, scaleFactor: 1, displayId: 0 }]
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// AppsAPI
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export const apps: AppsAPI = {
|
||||
async prepareDisplay(_allowlistBundleIds, _surrogateHost, _displayId): Promise<PrepareDisplayResult> {
|
||||
return { activated: '', hidden: [] }
|
||||
},
|
||||
|
||||
async previewHideSet(_bundleIds, _displayId): Promise<AppInfo[]> {
|
||||
return []
|
||||
},
|
||||
|
||||
async findWindowDisplays(bundleIds): Promise<WindowDisplayInfo[]> {
|
||||
return bundleIds.map(bundleId => ({ bundleId, displayIds: [0] }))
|
||||
},
|
||||
|
||||
async appUnderPoint(x, y): Promise<AppInfo | null> {
|
||||
try {
|
||||
// Move mouse to point, get window under cursor
|
||||
const out = run(['xdotool', 'mousemove', '--sync', String(x), String(y), 'getmouselocation', '--shell'])
|
||||
const windowMatch = out.match(/WINDOW=(\d+)/)
|
||||
if (!windowMatch) return null
|
||||
|
||||
const windowId = windowMatch[1]
|
||||
const pidStr = run(['xdotool', 'getwindowpid', windowId!])
|
||||
if (!pidStr) return null
|
||||
|
||||
let exePath = ''
|
||||
try { exePath = run(['readlink', '-f', `/proc/${pidStr}/exe`]) } catch { /* ignore */ }
|
||||
|
||||
let appName = ''
|
||||
try { appName = run(['cat', `/proc/${pidStr}/comm`]) } catch { /* ignore */ }
|
||||
|
||||
if (!exePath && !appName) return null
|
||||
return { bundleId: exePath || pidStr!, displayName: appName || 'unknown' }
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
},
|
||||
|
||||
async listInstalled(): Promise<InstalledApp[]> {
|
||||
try {
|
||||
// Read .desktop files from standard locations
|
||||
const dirs = ['/usr/share/applications', '/usr/local/share/applications', `${process.env.HOME}/.local/share/applications`]
|
||||
const apps: InstalledApp[] = []
|
||||
|
||||
for (const dir of dirs) {
|
||||
let files: string
|
||||
try {
|
||||
files = run(['find', dir, '-name', '*.desktop', '-maxdepth', '1'])
|
||||
} catch { continue }
|
||||
|
||||
for (const filepath of files.split('\n').filter(Boolean)) {
|
||||
try {
|
||||
const content = run(['cat', filepath])
|
||||
const nameMatch = content.match(/^Name=(.+)$/m)
|
||||
const execMatch = content.match(/^Exec=(.+)$/m)
|
||||
const noDisplay = content.match(/^NoDisplay=true$/m)
|
||||
if (noDisplay) continue
|
||||
|
||||
const name = nameMatch?.[1] ?? ''
|
||||
const exec = execMatch?.[1] ?? ''
|
||||
if (!name) continue
|
||||
|
||||
apps.push({
|
||||
bundleId: filepath.split('/').pop()?.replace('.desktop', '') ?? '',
|
||||
displayName: name,
|
||||
path: exec.split(/\s+/)[0] ?? '',
|
||||
})
|
||||
} catch { /* skip unreadable files */ }
|
||||
}
|
||||
}
|
||||
|
||||
return apps.slice(0, 200)
|
||||
} catch {
|
||||
return []
|
||||
}
|
||||
},
|
||||
|
||||
iconDataUrl(_path): string | null {
|
||||
return null
|
||||
},
|
||||
|
||||
listRunning(): RunningApp[] {
|
||||
try {
|
||||
// Try wmctrl first
|
||||
if (commandExists('wmctrl')) {
|
||||
const raw = run(['wmctrl', '-l', '-p'])
|
||||
const apps: RunningApp[] = []
|
||||
for (const line of raw.split('\n').filter(Boolean)) {
|
||||
// wmctrl format: "0x04000003 0 12345 hostname Window Title"
|
||||
const parts = line.split(/\s+/)
|
||||
const pid = parts[2]
|
||||
if (!pid || pid === '0') continue
|
||||
|
||||
let exePath = ''
|
||||
try { exePath = run(['readlink', '-f', `/proc/${pid}/exe`]) } catch { /* ignore */ }
|
||||
let appName = ''
|
||||
try { appName = run(['cat', `/proc/${pid}/comm`]) } catch { /* ignore */ }
|
||||
|
||||
if (appName) {
|
||||
apps.push({ bundleId: exePath || pid, displayName: appName })
|
||||
}
|
||||
}
|
||||
// Deduplicate by bundleId
|
||||
const seen = new Set<string>()
|
||||
return apps.filter(a => {
|
||||
if (seen.has(a.bundleId)) return false
|
||||
seen.add(a.bundleId)
|
||||
return true
|
||||
}).slice(0, 50)
|
||||
}
|
||||
|
||||
// Fallback: ps with visible processes
|
||||
const raw = run(['ps', '-eo', 'pid,comm', '--no-headers'])
|
||||
const apps: RunningApp[] = []
|
||||
for (const line of raw.split('\n').filter(Boolean).slice(0, 50)) {
|
||||
const match = line.trim().match(/^(\d+)\s+(.+)$/)
|
||||
if (match) {
|
||||
apps.push({ bundleId: match[1]!, displayName: match[2]! })
|
||||
}
|
||||
}
|
||||
return apps
|
||||
} catch {
|
||||
return []
|
||||
}
|
||||
},
|
||||
|
||||
async open(name): Promise<void> {
|
||||
// Try gtk-launch first (for .desktop file names), fall back to xdg-open
|
||||
try {
|
||||
const desktopName = name.endsWith('.desktop') ? name : `${name}.desktop`
|
||||
if (commandExists('gtk-launch')) {
|
||||
await runAsync(['gtk-launch', desktopName])
|
||||
return
|
||||
}
|
||||
} catch { /* fall through */ }
|
||||
|
||||
await runAsync(['xdg-open', name])
|
||||
},
|
||||
|
||||
async unhide(bundleIds): Promise<void> {
|
||||
for (const id of bundleIds) {
|
||||
try {
|
||||
if (commandExists('wmctrl') && id.startsWith('0x')) {
|
||||
// Window ID — use wmctrl
|
||||
await runAsync(['wmctrl', '-i', '-R', id])
|
||||
} else {
|
||||
// Try xdotool windowactivate with search by name
|
||||
await runAsync(['xdotool', 'search', '--name', id, 'windowactivate'])
|
||||
}
|
||||
} catch { /* ignore failures for individual windows */ }
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// ScreenshotAPI
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const SCREENSHOT_PATH = '/tmp/cu-screenshot.png'
|
||||
|
||||
export const screenshot: ScreenshotAPI = {
|
||||
async captureExcluding(_allowedBundleIds, _quality, _targetW, _targetH, _displayId): Promise<ScreenshotResult> {
|
||||
try {
|
||||
await runAsync(['scrot', '-o', SCREENSHOT_PATH])
|
||||
|
||||
// Read the file as base64
|
||||
const file = Bun.file(SCREENSHOT_PATH)
|
||||
const buffer = await file.arrayBuffer()
|
||||
const base64 = Buffer.from(buffer).toString('base64')
|
||||
|
||||
// Get dimensions from display info
|
||||
const size = display.getSize(_displayId)
|
||||
return { base64, width: size.width, height: size.height }
|
||||
} catch {
|
||||
return { base64: '', width: 0, height: 0 }
|
||||
}
|
||||
},
|
||||
|
||||
async captureRegion(_allowedBundleIds, x, y, w, h, _outW, _outH, _quality, _displayId): Promise<ScreenshotResult> {
|
||||
try {
|
||||
// scrot -a x,y,w,h captures a specific region
|
||||
await runAsync(['scrot', '-a', `${x},${y},${w},${h}`, '-o', SCREENSHOT_PATH])
|
||||
|
||||
const file = Bun.file(SCREENSHOT_PATH)
|
||||
const buffer = await file.arrayBuffer()
|
||||
const base64 = Buffer.from(buffer).toString('base64')
|
||||
|
||||
return { base64, width: w, height: h }
|
||||
} catch {
|
||||
return { base64: '', width: 0, height: 0 }
|
||||
}
|
||||
},
|
||||
}
|
||||
249
packages/@ant/computer-use-swift/src/backends/win32.ts
Normal file
249
packages/@ant/computer-use-swift/src/backends/win32.ts
Normal file
@@ -0,0 +1,249 @@
|
||||
/**
|
||||
* Windows backend for computer-use-swift
|
||||
*
|
||||
* Uses PowerShell with .NET System.Drawing / System.Windows.Forms for
|
||||
* screenshots and Win32 P/Invoke for window/process management.
|
||||
*/
|
||||
|
||||
import type {
|
||||
AppInfo, AppsAPI, DisplayAPI, DisplayGeometry, InstalledApp,
|
||||
PrepareDisplayResult, RunningApp, ScreenshotAPI, ScreenshotResult,
|
||||
SwiftBackend, WindowDisplayInfo,
|
||||
} from '../types.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// PowerShell helper
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function ps(script: string): string {
|
||||
const result = Bun.spawnSync({
|
||||
cmd: ['powershell', '-NoProfile', '-NonInteractive', '-Command', script],
|
||||
stdout: 'pipe',
|
||||
stderr: 'pipe',
|
||||
})
|
||||
return new TextDecoder().decode(result.stdout).trim()
|
||||
}
|
||||
|
||||
async function psAsync(script: string): Promise<string> {
|
||||
const proc = Bun.spawn(
|
||||
['powershell', '-NoProfile', '-NonInteractive', '-Command', script],
|
||||
{ stdout: 'pipe', stderr: 'pipe' },
|
||||
)
|
||||
const out = await new Response(proc.stdout).text()
|
||||
await proc.exited
|
||||
return out.trim()
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// DisplayAPI
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export const display: DisplayAPI = {
|
||||
getSize(displayId?: number): DisplayGeometry {
|
||||
const all = this.listAll()
|
||||
if (displayId !== undefined) {
|
||||
const found = all.find(d => d.displayId === displayId)
|
||||
if (found) return found
|
||||
}
|
||||
return all[0] ?? { width: 1920, height: 1080, scaleFactor: 1, displayId: 0 }
|
||||
},
|
||||
|
||||
listAll(): DisplayGeometry[] {
|
||||
try {
|
||||
const raw = ps(`
|
||||
Add-Type -AssemblyName System.Windows.Forms
|
||||
$result = @()
|
||||
$idx = 0
|
||||
foreach ($s in [System.Windows.Forms.Screen]::AllScreens) {
|
||||
$result += "$($s.Bounds.Width),$($s.Bounds.Height),$idx,$($s.Primary)"
|
||||
$idx++
|
||||
}
|
||||
$result -join "|"
|
||||
`)
|
||||
return raw.split('|').filter(Boolean).map(entry => {
|
||||
const [w, h, id, primary] = entry.split(',')
|
||||
return {
|
||||
width: Number(w),
|
||||
height: Number(h),
|
||||
scaleFactor: 1, // Windows DPI scaling handled at system level
|
||||
displayId: Number(id),
|
||||
}
|
||||
})
|
||||
} catch {
|
||||
return [{ width: 1920, height: 1080, scaleFactor: 1, displayId: 0 }]
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// AppsAPI
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export const apps: AppsAPI = {
|
||||
async prepareDisplay(_allowlistBundleIds, _surrogateHost, _displayId) {
|
||||
return { activated: '', hidden: [] }
|
||||
},
|
||||
|
||||
async previewHideSet(_bundleIds, _displayId) {
|
||||
return []
|
||||
},
|
||||
|
||||
async findWindowDisplays(bundleIds) {
|
||||
return bundleIds.map(bundleId => ({ bundleId, displayIds: [0] }))
|
||||
},
|
||||
|
||||
async appUnderPoint(_x, _y) {
|
||||
try {
|
||||
const out = ps(`
|
||||
Add-Type @'
|
||||
using System;
|
||||
using System.Runtime.InteropServices;
|
||||
public class WinPt {
|
||||
[StructLayout(LayoutKind.Sequential)] public struct POINT { public int X; public int Y; }
|
||||
[DllImport("user32.dll")] public static extern IntPtr WindowFromPoint(POINT p);
|
||||
[DllImport("user32.dll")] public static extern uint GetWindowThreadProcessId(IntPtr hWnd, out uint pid);
|
||||
}
|
||||
'@
|
||||
$pt = New-Object WinPt+POINT
|
||||
$pt.X = ${_x}; $pt.Y = ${_y}
|
||||
$hwnd = [WinPt]::WindowFromPoint($pt)
|
||||
$pid = [uint32]0
|
||||
[WinPt]::GetWindowThreadProcessId($hwnd, [ref]$pid) | Out-Null
|
||||
$proc = Get-Process -Id $pid -ErrorAction SilentlyContinue
|
||||
"$($proc.MainModule.FileName)|$($proc.ProcessName)"
|
||||
`)
|
||||
if (!out || !out.includes('|')) return null
|
||||
const [exePath, name] = out.split('|', 2)
|
||||
return { bundleId: exePath!, displayName: name! }
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
},
|
||||
|
||||
async listInstalled() {
|
||||
try {
|
||||
const raw = await psAsync(`
|
||||
$apps = @()
|
||||
$paths = @(
|
||||
'HKLM:\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\*',
|
||||
'HKLM:\\SOFTWARE\\WOW6432Node\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\*',
|
||||
'HKCU:\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\*'
|
||||
)
|
||||
foreach ($p in $paths) {
|
||||
Get-ItemProperty $p -ErrorAction SilentlyContinue | Where-Object { $_.DisplayName } | ForEach-Object {
|
||||
$apps += "$($_.DisplayName)|$($_.InstallLocation)|$($_.PSChildName)"
|
||||
}
|
||||
}
|
||||
$apps | Select-Object -Unique | Select-Object -First 200
|
||||
`)
|
||||
return raw.split('\n').filter(Boolean).map(line => {
|
||||
const [name, path, id] = line.split('|', 3)
|
||||
return {
|
||||
bundleId: id ?? name ?? '',
|
||||
displayName: name ?? '',
|
||||
path: path ?? '',
|
||||
}
|
||||
})
|
||||
} catch {
|
||||
return []
|
||||
}
|
||||
},
|
||||
|
||||
iconDataUrl(_path) {
|
||||
return null
|
||||
},
|
||||
|
||||
listRunning() {
|
||||
try {
|
||||
const raw = ps(`Get-Process | Where-Object { $_.MainWindowTitle -ne '' } | Select-Object -First 50 | ForEach-Object { "$($_.MainModule.FileName)|$($_.ProcessName)" }`)
|
||||
return raw.split('\n').filter(Boolean).map(line => {
|
||||
const [exePath, name] = line.split('|', 2)
|
||||
return { bundleId: exePath ?? '', displayName: name ?? '' }
|
||||
})
|
||||
} catch {
|
||||
return []
|
||||
}
|
||||
},
|
||||
|
||||
async open(name) {
|
||||
// On Windows, name is the exe path (bundleId) or process name.
|
||||
// Try exe path first, fall back to process name lookup.
|
||||
const escaped = name.replace(/'/g, "''")
|
||||
await psAsync(`
|
||||
if (Test-Path '${escaped}') {
|
||||
Start-Process '${escaped}'
|
||||
} else {
|
||||
Start-Process -FilePath '${escaped}' -ErrorAction SilentlyContinue
|
||||
}`)
|
||||
},
|
||||
|
||||
async unhide(bundleIds) {
|
||||
// Windows: bring window to foreground
|
||||
for (const name of bundleIds) {
|
||||
await psAsync(`
|
||||
Add-Type @'
|
||||
using System;
|
||||
using System.Runtime.InteropServices;
|
||||
public class WinShow {
|
||||
[DllImport("user32.dll")] public static extern bool ShowWindow(IntPtr hWnd, int nCmd);
|
||||
[DllImport("user32.dll")] public static extern bool SetForegroundWindow(IntPtr hWnd);
|
||||
}
|
||||
'@
|
||||
$proc = Get-Process -Name "${name}" -ErrorAction SilentlyContinue | Select-Object -First 1
|
||||
if ($proc) { [WinShow]::ShowWindow($proc.MainWindowHandle, 9) | Out-Null; [WinShow]::SetForegroundWindow($proc.MainWindowHandle) | Out-Null }
|
||||
`)
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// ScreenshotAPI
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export const screenshot: ScreenshotAPI = {
|
||||
async captureExcluding(_allowedBundleIds, _quality, _targetW, _targetH, displayId) {
|
||||
const raw = await psAsync(`
|
||||
Add-Type -AssemblyName System.Windows.Forms
|
||||
Add-Type -AssemblyName System.Drawing
|
||||
$screen = if (${displayId ?? -1} -ge 0) { [System.Windows.Forms.Screen]::AllScreens[${displayId ?? 0}] } else { [System.Windows.Forms.Screen]::PrimaryScreen }
|
||||
$bounds = $screen.Bounds
|
||||
$bmp = New-Object System.Drawing.Bitmap($bounds.Width, $bounds.Height)
|
||||
$g = [System.Drawing.Graphics]::FromImage($bmp)
|
||||
$g.CopyFromScreen($bounds.Location, [System.Drawing.Point]::Empty, $bounds.Size)
|
||||
$g.Dispose()
|
||||
$ms = New-Object System.IO.MemoryStream
|
||||
$bmp.Save($ms, [System.Drawing.Imaging.ImageFormat]::Png)
|
||||
$bmp.Dispose()
|
||||
$bytes = $ms.ToArray()
|
||||
$ms.Dispose()
|
||||
"$($bounds.Width),$($bounds.Height)," + [Convert]::ToBase64String($bytes)
|
||||
`)
|
||||
const firstComma = raw.indexOf(',')
|
||||
const secondComma = raw.indexOf(',', firstComma + 1)
|
||||
const width = Number(raw.slice(0, firstComma))
|
||||
const height = Number(raw.slice(firstComma + 1, secondComma))
|
||||
const base64 = raw.slice(secondComma + 1)
|
||||
return { base64, width, height }
|
||||
},
|
||||
|
||||
async captureRegion(_allowedBundleIds, x, y, w, h, _outW, _outH, _quality, _displayId) {
|
||||
const raw = await psAsync(`
|
||||
Add-Type -AssemblyName System.Windows.Forms
|
||||
Add-Type -AssemblyName System.Drawing
|
||||
$bmp = New-Object System.Drawing.Bitmap(${w}, ${h})
|
||||
$g = [System.Drawing.Graphics]::FromImage($bmp)
|
||||
$g.CopyFromScreen(${x}, ${y}, 0, 0, (New-Object System.Drawing.Size(${w}, ${h})))
|
||||
$g.Dispose()
|
||||
$ms = New-Object System.IO.MemoryStream
|
||||
$bmp.Save($ms, [System.Drawing.Imaging.ImageFormat]::Png)
|
||||
$bmp.Dispose()
|
||||
$bytes = $ms.ToArray()
|
||||
$ms.Dispose()
|
||||
"${w},${h}," + [Convert]::ToBase64String($bytes)
|
||||
`)
|
||||
const firstComma = raw.indexOf(',')
|
||||
const secondComma = raw.indexOf(',', firstComma + 1)
|
||||
const base64 = raw.slice(secondComma + 1)
|
||||
return { base64, width: w, height: h }
|
||||
},
|
||||
}
|
||||
@@ -1,377 +1,84 @@
|
||||
/**
|
||||
* @ant/computer-use-swift — macOS 实现
|
||||
* @ant/computer-use-swift — cross-platform display, apps, and screenshot API
|
||||
*
|
||||
* 用 AppleScript/JXA/screencapture 替代原始 Swift 原生模块。
|
||||
* 提供显示器信息、应用管理、截图等功能。
|
||||
* Platform backends:
|
||||
* - darwin: AppleScript/JXA + screencapture
|
||||
* - win32: PowerShell + System.Drawing + Win32 P/Invoke
|
||||
*
|
||||
* 仅 macOS 支持。
|
||||
* Add new platforms by creating backends/<platform>.ts implementing SwiftBackend.
|
||||
*/
|
||||
|
||||
import { readFileSync, unlinkSync } from 'fs'
|
||||
import { tmpdir } from 'os'
|
||||
import { join } from 'path'
|
||||
// Re-export all types
|
||||
export type {
|
||||
DisplayGeometry,
|
||||
PrepareDisplayResult,
|
||||
AppInfo,
|
||||
InstalledApp,
|
||||
RunningApp,
|
||||
ScreenshotResult,
|
||||
ResolvePrepareCaptureResult,
|
||||
WindowDisplayInfo,
|
||||
DisplayAPI,
|
||||
AppsAPI,
|
||||
ScreenshotAPI,
|
||||
SwiftBackend,
|
||||
} from './types.js'
|
||||
|
||||
import type { ResolvePrepareCaptureResult, SwiftBackend } from './types.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Types (exported for callers)
|
||||
// Platform dispatch
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface DisplayGeometry {
|
||||
width: number
|
||||
height: number
|
||||
scaleFactor: number
|
||||
displayId: number
|
||||
}
|
||||
|
||||
export interface PrepareDisplayResult {
|
||||
activated: string
|
||||
hidden: string[]
|
||||
}
|
||||
|
||||
export interface AppInfo {
|
||||
bundleId: string
|
||||
displayName: string
|
||||
}
|
||||
|
||||
export interface InstalledApp {
|
||||
bundleId: string
|
||||
displayName: string
|
||||
path: string
|
||||
iconDataUrl?: string
|
||||
}
|
||||
|
||||
export interface RunningApp {
|
||||
bundleId: string
|
||||
displayName: string
|
||||
}
|
||||
|
||||
export interface ScreenshotResult {
|
||||
base64: string
|
||||
width: number
|
||||
height: number
|
||||
}
|
||||
|
||||
export interface ResolvePrepareCaptureResult {
|
||||
base64: string
|
||||
width: number
|
||||
height: number
|
||||
}
|
||||
|
||||
export interface WindowDisplayInfo {
|
||||
bundleId: string
|
||||
displayIds: number[]
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function jxaSync(script: string): string {
|
||||
const result = Bun.spawnSync({
|
||||
cmd: ['osascript', '-l', 'JavaScript', '-e', script],
|
||||
stdout: 'pipe', stderr: 'pipe',
|
||||
})
|
||||
return new TextDecoder().decode(result.stdout).trim()
|
||||
}
|
||||
|
||||
function osascriptSync(script: string): string {
|
||||
const result = Bun.spawnSync({
|
||||
cmd: ['osascript', '-e', script],
|
||||
stdout: 'pipe', stderr: 'pipe',
|
||||
})
|
||||
return new TextDecoder().decode(result.stdout).trim()
|
||||
}
|
||||
|
||||
async function osascript(script: string): Promise<string> {
|
||||
const proc = Bun.spawn(['osascript', '-e', script], {
|
||||
stdout: 'pipe', stderr: 'pipe',
|
||||
})
|
||||
const text = await new Response(proc.stdout).text()
|
||||
await proc.exited
|
||||
return text.trim()
|
||||
}
|
||||
|
||||
async function jxa(script: string): Promise<string> {
|
||||
const proc = Bun.spawn(['osascript', '-l', 'JavaScript', '-e', script], {
|
||||
stdout: 'pipe', stderr: 'pipe',
|
||||
})
|
||||
const text = await new Response(proc.stdout).text()
|
||||
await proc.exited
|
||||
return text.trim()
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// DisplayAPI
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
interface DisplayAPI {
|
||||
getSize(displayId?: number): DisplayGeometry
|
||||
listAll(): DisplayGeometry[]
|
||||
}
|
||||
|
||||
const displayAPI: DisplayAPI = {
|
||||
getSize(displayId?: number): DisplayGeometry {
|
||||
const all = this.listAll()
|
||||
if (displayId !== undefined) {
|
||||
const found = all.find(d => d.displayId === displayId)
|
||||
if (found) return found
|
||||
}
|
||||
return all[0] ?? { width: 1920, height: 1080, scaleFactor: 2, displayId: 1 }
|
||||
},
|
||||
|
||||
listAll(): DisplayGeometry[] {
|
||||
try {
|
||||
const raw = jxaSync(`
|
||||
ObjC.import("CoreGraphics");
|
||||
var displays = $.CGDisplayCopyAllDisplayModes ? [] : [];
|
||||
var active = $.CGGetActiveDisplayList(10, null, Ref());
|
||||
var countRef = Ref();
|
||||
$.CGGetActiveDisplayList(0, null, countRef);
|
||||
var count = countRef[0];
|
||||
var idBuf = Ref();
|
||||
$.CGGetActiveDisplayList(count, idBuf, countRef);
|
||||
var result = [];
|
||||
for (var i = 0; i < count; i++) {
|
||||
var did = idBuf[i];
|
||||
var w = $.CGDisplayPixelsWide(did);
|
||||
var h = $.CGDisplayPixelsHigh(did);
|
||||
var mode = $.CGDisplayCopyDisplayMode(did);
|
||||
var pw = $.CGDisplayModeGetPixelWidth(mode);
|
||||
var sf = pw > 0 && w > 0 ? pw / w : 2;
|
||||
result.push({width: w, height: h, scaleFactor: sf, displayId: did});
|
||||
}
|
||||
JSON.stringify(result);
|
||||
`)
|
||||
return (JSON.parse(raw) as DisplayGeometry[]).map(d => ({
|
||||
width: Number(d.width), height: Number(d.height),
|
||||
scaleFactor: Number(d.scaleFactor), displayId: Number(d.displayId),
|
||||
}))
|
||||
} catch {
|
||||
// Fallback: use NSScreen via JXA
|
||||
try {
|
||||
const raw = jxaSync(`
|
||||
ObjC.import("AppKit");
|
||||
var screens = $.NSScreen.screens;
|
||||
var result = [];
|
||||
for (var i = 0; i < screens.count; i++) {
|
||||
var s = screens.objectAtIndex(i);
|
||||
var frame = s.frame;
|
||||
var desc = s.deviceDescription;
|
||||
var screenNumber = desc.objectForKey($("NSScreenNumber")).intValue;
|
||||
var backingFactor = s.backingScaleFactor;
|
||||
result.push({
|
||||
width: Math.round(frame.size.width),
|
||||
height: Math.round(frame.size.height),
|
||||
scaleFactor: backingFactor,
|
||||
displayId: screenNumber
|
||||
});
|
||||
}
|
||||
JSON.stringify(result);
|
||||
`)
|
||||
return (JSON.parse(raw) as DisplayGeometry[]).map(d => ({
|
||||
width: Number(d.width),
|
||||
height: Number(d.height),
|
||||
scaleFactor: Number(d.scaleFactor),
|
||||
displayId: Number(d.displayId),
|
||||
}))
|
||||
} catch {
|
||||
return [{ width: 1920, height: 1080, scaleFactor: 2, displayId: 1 }]
|
||||
}
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// AppsAPI
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
interface AppsAPI {
|
||||
prepareDisplay(allowlistBundleIds: string[], surrogateHost: string, displayId?: number): Promise<PrepareDisplayResult>
|
||||
previewHideSet(bundleIds: string[], displayId?: number): Promise<AppInfo[]>
|
||||
findWindowDisplays(bundleIds: string[]): Promise<WindowDisplayInfo[]>
|
||||
appUnderPoint(x: number, y: number): Promise<AppInfo | null>
|
||||
listInstalled(): Promise<InstalledApp[]>
|
||||
iconDataUrl(path: string): string | null
|
||||
listRunning(): RunningApp[]
|
||||
open(bundleId: string): Promise<void>
|
||||
unhide(bundleIds: string[]): Promise<void>
|
||||
}
|
||||
|
||||
const appsAPI: AppsAPI = {
|
||||
async prepareDisplay(
|
||||
_allowlistBundleIds: string[],
|
||||
_surrogateHost: string,
|
||||
_displayId?: number,
|
||||
): Promise<PrepareDisplayResult> {
|
||||
return { activated: '', hidden: [] }
|
||||
},
|
||||
|
||||
async previewHideSet(
|
||||
_bundleIds: string[],
|
||||
_displayId?: number,
|
||||
): Promise<AppInfo[]> {
|
||||
return []
|
||||
},
|
||||
|
||||
async findWindowDisplays(bundleIds: string[]): Promise<WindowDisplayInfo[]> {
|
||||
// Each running app is assumed to be on display 1
|
||||
return bundleIds.map(bundleId => ({ bundleId, displayIds: [1] }))
|
||||
},
|
||||
|
||||
async appUnderPoint(_x: number, _y: number): Promise<AppInfo | null> {
|
||||
// Use JXA to find app at mouse position via accessibility
|
||||
try {
|
||||
const result = await jxa(`
|
||||
ObjC.import("CoreGraphics");
|
||||
ObjC.import("AppKit");
|
||||
var pt = $.CGPointMake(${_x}, ${_y});
|
||||
// Get frontmost app as a fallback
|
||||
var app = $.NSWorkspace.sharedWorkspace.frontmostApplication;
|
||||
JSON.stringify({bundleId: app.bundleIdentifier.js, displayName: app.localizedName.js});
|
||||
`)
|
||||
return JSON.parse(result)
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
},
|
||||
|
||||
async listInstalled(): Promise<InstalledApp[]> {
|
||||
try {
|
||||
const result = await osascript(`
|
||||
tell application "System Events"
|
||||
set appList to ""
|
||||
repeat with appFile in (every file of folder "Applications" of startup disk whose name ends with ".app")
|
||||
set appPath to POSIX path of (appFile as alias)
|
||||
set appName to name of appFile
|
||||
set appList to appList & appPath & "|" & appName & "\\n"
|
||||
end repeat
|
||||
return appList
|
||||
end tell
|
||||
`)
|
||||
return result.split('\n').filter(Boolean).map(line => {
|
||||
const [path, name] = line.split('|', 2)
|
||||
// Derive bundleId from Info.plist would be ideal, but use path-based fallback
|
||||
const displayName = (name ?? '').replace(/\.app$/, '')
|
||||
return {
|
||||
bundleId: `com.app.${displayName.toLowerCase().replace(/\s+/g, '-')}`,
|
||||
displayName,
|
||||
path: path ?? '',
|
||||
}
|
||||
})
|
||||
} catch {
|
||||
return []
|
||||
}
|
||||
},
|
||||
|
||||
iconDataUrl(_path: string): string | null {
|
||||
return null
|
||||
},
|
||||
|
||||
listRunning(): RunningApp[] {
|
||||
try {
|
||||
const raw = jxaSync(`
|
||||
var apps = Application("System Events").applicationProcesses.whose({backgroundOnly: false});
|
||||
var result = [];
|
||||
for (var i = 0; i < apps.length; i++) {
|
||||
try {
|
||||
var a = apps[i];
|
||||
result.push({bundleId: a.bundleIdentifier(), displayName: a.name()});
|
||||
} catch(e) {}
|
||||
}
|
||||
JSON.stringify(result);
|
||||
`)
|
||||
return JSON.parse(raw)
|
||||
} catch {
|
||||
return []
|
||||
}
|
||||
},
|
||||
|
||||
async open(bundleId: string): Promise<void> {
|
||||
await osascript(`tell application id "${bundleId}" to activate`)
|
||||
},
|
||||
|
||||
async unhide(bundleIds: string[]): Promise<void> {
|
||||
for (const bundleId of bundleIds) {
|
||||
await osascript(`
|
||||
tell application "System Events"
|
||||
set visible of application process (name of application process whose bundle identifier is "${bundleId}") to true
|
||||
end tell
|
||||
`)
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// ScreenshotAPI
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
interface ScreenshotAPI {
|
||||
captureExcluding(
|
||||
allowedBundleIds: string[], quality: number,
|
||||
targetW: number, targetH: number, displayId?: number,
|
||||
): Promise<ScreenshotResult>
|
||||
captureRegion(
|
||||
allowedBundleIds: string[],
|
||||
x: number, y: number, w: number, h: number,
|
||||
outW: number, outH: number, quality: number, displayId?: number,
|
||||
): Promise<ScreenshotResult>
|
||||
}
|
||||
|
||||
async function captureScreenToBase64(args: string[]): Promise<{ base64: string; width: number; height: number }> {
|
||||
const tmpFile = join(tmpdir(), `cu-screenshot-${Date.now()}.png`)
|
||||
const proc = Bun.spawn(['screencapture', ...args, tmpFile], {
|
||||
stdout: 'pipe', stderr: 'pipe',
|
||||
})
|
||||
await proc.exited
|
||||
|
||||
function loadBackend(): SwiftBackend | null {
|
||||
try {
|
||||
const buf = readFileSync(tmpFile)
|
||||
const base64 = buf.toString('base64')
|
||||
// Parse PNG header for dimensions (bytes 16-23)
|
||||
const width = buf.readUInt32BE(16)
|
||||
const height = buf.readUInt32BE(20)
|
||||
return { base64, width, height }
|
||||
} finally {
|
||||
try { unlinkSync(tmpFile) } catch {}
|
||||
switch (process.platform) {
|
||||
case 'darwin':
|
||||
return require('./backends/darwin.js') as SwiftBackend
|
||||
case 'win32':
|
||||
return require('./backends/win32.js') as SwiftBackend
|
||||
case 'linux':
|
||||
return require('./backends/linux.js') as SwiftBackend
|
||||
default:
|
||||
return null
|
||||
}
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
const screenshotAPI: ScreenshotAPI = {
|
||||
async captureExcluding(
|
||||
_allowedBundleIds: string[],
|
||||
_quality: number,
|
||||
_targetW: number,
|
||||
_targetH: number,
|
||||
displayId?: number,
|
||||
): Promise<ScreenshotResult> {
|
||||
const args = ['-x'] // silent
|
||||
if (displayId !== undefined) {
|
||||
args.push('-D', String(displayId))
|
||||
}
|
||||
return captureScreenToBase64(args)
|
||||
},
|
||||
|
||||
async captureRegion(
|
||||
_allowedBundleIds: string[],
|
||||
x: number, y: number, w: number, h: number,
|
||||
_outW: number, _outH: number, _quality: number,
|
||||
displayId?: number,
|
||||
): Promise<ScreenshotResult> {
|
||||
const args = ['-x', '-R', `${x},${y},${w},${h}`]
|
||||
if (displayId !== undefined) {
|
||||
args.push('-D', String(displayId))
|
||||
}
|
||||
return captureScreenToBase64(args)
|
||||
},
|
||||
}
|
||||
const backend = loadBackend()
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// ComputerUseAPI — Main export
|
||||
// ComputerUseAPI — Main export (preserves original class interface)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export class ComputerUseAPI {
|
||||
apps: AppsAPI = appsAPI
|
||||
display: DisplayAPI = displayAPI
|
||||
screenshot: ScreenshotAPI = screenshotAPI
|
||||
// When no backend is loaded (unsupported platform), all APIs are no-op stubs.
|
||||
// These stubs should never be reached in practice — callers check isSupported
|
||||
// or the feature gate before invoking.
|
||||
|
||||
apps = backend?.apps ?? {
|
||||
async prepareDisplay() { return { activated: '', hidden: [] } },
|
||||
async previewHideSet() { return [] },
|
||||
async findWindowDisplays(ids: string[]) { return ids.map(b => ({ bundleId: b, displayIds: [] as number[] })) },
|
||||
async appUnderPoint() { return null },
|
||||
async listInstalled() { return [] },
|
||||
iconDataUrl() { return null },
|
||||
listRunning() { return [] },
|
||||
async open() { throw new Error('computer-use-swift: no backend for this platform') },
|
||||
async unhide() {},
|
||||
}
|
||||
|
||||
display = backend?.display ?? {
|
||||
getSize() { throw new Error('computer-use-swift: no backend for this platform') },
|
||||
listAll() { throw new Error('computer-use-swift: no backend for this platform') },
|
||||
}
|
||||
|
||||
screenshot = backend?.screenshot ?? {
|
||||
async captureExcluding() { throw new Error('computer-use-swift: no backend for this platform') },
|
||||
async captureRegion() { throw new Error('computer-use-swift: no backend for this platform') },
|
||||
}
|
||||
|
||||
async resolvePrepareCapture(
|
||||
allowedBundleIds: string[],
|
||||
|
||||
80
packages/@ant/computer-use-swift/src/types.ts
Normal file
80
packages/@ant/computer-use-swift/src/types.ts
Normal file
@@ -0,0 +1,80 @@
|
||||
export interface DisplayGeometry {
|
||||
width: number
|
||||
height: number
|
||||
scaleFactor: number
|
||||
displayId: number
|
||||
}
|
||||
|
||||
export interface PrepareDisplayResult {
|
||||
activated: string
|
||||
hidden: string[]
|
||||
}
|
||||
|
||||
export interface AppInfo {
|
||||
bundleId: string
|
||||
displayName: string
|
||||
}
|
||||
|
||||
export interface InstalledApp {
|
||||
bundleId: string
|
||||
displayName: string
|
||||
path: string
|
||||
iconDataUrl?: string
|
||||
}
|
||||
|
||||
export interface RunningApp {
|
||||
bundleId: string
|
||||
displayName: string
|
||||
}
|
||||
|
||||
export interface ScreenshotResult {
|
||||
base64: string
|
||||
width: number
|
||||
height: number
|
||||
}
|
||||
|
||||
export interface ResolvePrepareCaptureResult {
|
||||
base64: string
|
||||
width: number
|
||||
height: number
|
||||
}
|
||||
|
||||
export interface WindowDisplayInfo {
|
||||
bundleId: string
|
||||
displayIds: number[]
|
||||
}
|
||||
|
||||
export interface DisplayAPI {
|
||||
getSize(displayId?: number): DisplayGeometry
|
||||
listAll(): DisplayGeometry[]
|
||||
}
|
||||
|
||||
export interface AppsAPI {
|
||||
prepareDisplay(allowlistBundleIds: string[], surrogateHost: string, displayId?: number): Promise<PrepareDisplayResult>
|
||||
previewHideSet(bundleIds: string[], displayId?: number): Promise<AppInfo[]>
|
||||
findWindowDisplays(bundleIds: string[]): Promise<WindowDisplayInfo[]>
|
||||
appUnderPoint(x: number, y: number): Promise<AppInfo | null>
|
||||
listInstalled(): Promise<InstalledApp[]>
|
||||
iconDataUrl(path: string): string | null
|
||||
listRunning(): RunningApp[]
|
||||
open(bundleId: string): Promise<void>
|
||||
unhide(bundleIds: string[]): Promise<void>
|
||||
}
|
||||
|
||||
export interface ScreenshotAPI {
|
||||
captureExcluding(
|
||||
allowedBundleIds: string[], quality: number,
|
||||
targetW: number, targetH: number, displayId?: number,
|
||||
): Promise<ScreenshotResult>
|
||||
captureRegion(
|
||||
allowedBundleIds: string[],
|
||||
x: number, y: number, w: number, h: number,
|
||||
outW: number, outH: number, quality: number, displayId?: number,
|
||||
): Promise<ScreenshotResult>
|
||||
}
|
||||
|
||||
export interface SwiftBackend {
|
||||
display: DisplayAPI
|
||||
apps: AppsAPI
|
||||
screenshot: ScreenshotAPI
|
||||
}
|
||||
Reference in New Issue
Block a user