feat: Computer Use — Windows 跨平台支持 + GUI 无障碍增强 + Python Bridge

三平台 Computer Use (macOS + Windows + Linux),Windows 专项增强。

- MCP server: toolCalls/tools/executor/mcpServer 等 12 文件完整实现
- 平台抽象层: platforms/{win32,darwin,linux}.ts
- 跨平台 executor: executorCrossPlatform.ts
- CHICAGO_MCP + VOICE_MODE feature flags 启用

- windowMessage.ts: SendMessageW (WM_CHAR Unicode + 剪贴板粘贴)
- windowBorder.ts: 4 叠加窗口边框 (30fps 跟踪)
- uiAutomation.ts: UI Automation 元素树/点击/写值
- accessibilitySnapshot.ts: 无障碍快照 → 模型感知 GUI
- bridge.py + bridgeClient.ts: Python 长驻进程 (替代 per-call PS)

- window_management: min/max/restore/close/focus (Win32 API)
- click_element / type_into_element: 按名称操作 (无需坐标)
- 截图自动附带 Accessibility Snapshot

- 17 种方法, stdin/stdout JSON 通信
- 窗口枚举 1.5ms vs PS 500ms, 截图 360ms vs PS 800ms
- 依赖: mss + Pillow + pywinauto
This commit is contained in:
unraid
2026-04-05 15:27:50 +08:00
parent 7a2ade0a02
commit c17edcb12e
36 changed files with 8297 additions and 351 deletions

View File

@@ -1,33 +1,30 @@
/**
* @ant/computer-use-input — cross-platform keyboard & mouse simulation
* @ant/computer-use-input — macOS keyboard & mouse simulation (enigo)
*
* Platform backends:
* - darwin: AppleScript/JXA via CoreGraphics events
* - win32: PowerShell via Win32 P/Invoke (SetCursorPos, SendInput, keybd_event)
*
* Add new platforms by creating backends/<platform>.ts implementing InputBackend.
* This package wraps the macOS-only native enigo .node module.
* For Windows/Linux, use src/utils/computerUse/platforms/ instead.
*/
import type { FrontmostAppInfo, InputBackend } from './types.js'
export interface FrontmostAppInfo {
bundleId: string
appName: string
}
export type { FrontmostAppInfo, InputBackend } from './types.js'
// ---------------------------------------------------------------------------
// Platform dispatch
// ---------------------------------------------------------------------------
export interface InputBackend {
moveMouse(x: number, y: number, animated: boolean): Promise<void>
key(key: string, action: 'press' | 'release'): Promise<void>
keys(parts: string[]): Promise<void>
mouseLocation(): Promise<{ x: number; y: number }>
mouseButton(button: 'left' | 'right' | 'middle', action: 'click' | 'press' | 'release', count?: number): Promise<void>
mouseScroll(amount: number, direction: 'vertical' | 'horizontal'): Promise<void>
typeText(text: string): Promise<void>
getFrontmostAppInfo(): FrontmostAppInfo | null
}
function loadBackend(): InputBackend | null {
if (process.platform !== 'darwin') return null
try {
switch (process.platform) {
case 'darwin':
return require('./backends/darwin.js') as InputBackend
case 'win32':
return require('./backends/win32.js') as InputBackend
case 'linux':
return require('./backends/linux.js') as InputBackend
default:
return null
}
return require('./backends/darwin.js') as InputBackend
} catch {
return null
}
@@ -35,30 +32,16 @@ function loadBackend(): InputBackend | null {
const backend = loadBackend()
// ---------------------------------------------------------------------------
// Unsupported stub (throws on call — guards via isSupported check)
// ---------------------------------------------------------------------------
function unsupported(): never {
throw new Error(`computer-use-input is not supported on ${process.platform}`)
}
// ---------------------------------------------------------------------------
// Public API — matches the original export surface
// ---------------------------------------------------------------------------
export const isSupported = backend !== null
export const moveMouse = backend?.moveMouse ?? unsupported
export const key = backend?.key ?? unsupported
export const keys = backend?.keys ?? unsupported
export const mouseLocation = backend?.mouseLocation ?? unsupported
export const mouseButton = backend?.mouseButton ?? unsupported
export const mouseScroll = backend?.mouseScroll ?? unsupported
export const typeText = backend?.typeText ?? unsupported
export const moveMouse = backend?.moveMouse
export const key = backend?.key
export const keys = backend?.keys
export const mouseLocation = backend?.mouseLocation
export const mouseButton = backend?.mouseButton
export const mouseScroll = backend?.mouseScroll
export const typeText = backend?.typeText
export const getFrontmostAppInfo = backend?.getFrontmostAppInfo ?? (() => null)
// Legacy class type — used by inputLoader.ts for type narrowing
export class ComputerUseInputAPI {
declare moveMouse: InputBackend['moveMouse']
declare key: InputBackend['key']
@@ -71,8 +54,5 @@ export class ComputerUseInputAPI {
declare isSupported: true
}
interface ComputerUseInputUnsupported {
isSupported: false
}
interface ComputerUseInputUnsupported { isSupported: false }
export type ComputerUseInput = ComputerUseInputAPI | ComputerUseInputUnsupported