feat: Computer Use — Windows 跨平台支持 + GUI 无障碍增强 + Python Bridge

三平台 Computer Use (macOS + Windows + Linux),Windows 专项增强。

- MCP server: toolCalls/tools/executor/mcpServer 等 12 文件完整实现
- 平台抽象层: platforms/{win32,darwin,linux}.ts
- 跨平台 executor: executorCrossPlatform.ts
- CHICAGO_MCP + VOICE_MODE feature flags 启用

- windowMessage.ts: SendMessageW (WM_CHAR Unicode + 剪贴板粘贴)
- windowBorder.ts: 4 叠加窗口边框 (30fps 跟踪)
- uiAutomation.ts: UI Automation 元素树/点击/写值
- accessibilitySnapshot.ts: 无障碍快照 → 模型感知 GUI
- bridge.py + bridgeClient.ts: Python 长驻进程 (替代 per-call PS)

- window_management: min/max/restore/close/focus (Win32 API)
- click_element / type_into_element: 按名称操作 (无需坐标)
- 截图自动附带 Accessibility Snapshot

- 17 种方法, stdin/stdout JSON 通信
- 窗口枚举 1.5ms vs PS 500ms, 截图 360ms vs PS 800ms
- 依赖: mss + Pillow + pywinauto
This commit is contained in:
unraid
2026-04-05 15:27:50 +08:00
parent 7a2ade0a02
commit c17edcb12e
36 changed files with 8297 additions and 351 deletions

View File

@@ -0,0 +1,41 @@
/**
* Platform dispatcher for Computer Use.
*
* Loads the correct platform backend based on `process.platform`.
* Each backend implements the same unified interface.
*/
import type { InputPlatform, ScreenshotPlatform, DisplayPlatform, AppsPlatform, WindowManagementPlatform } from './types.js'
export interface Platform {
input: InputPlatform
screenshot: ScreenshotPlatform
display: DisplayPlatform
apps: AppsPlatform
windowManagement?: WindowManagementPlatform
}
let cached: Platform | undefined
export function loadPlatform(): Platform {
if (cached) return cached
switch (process.platform) {
case 'darwin':
cached = require('./darwin.js').platform
break
case 'win32':
cached = require('./win32.js').platform
break
case 'linux':
cached = require('./linux.js').platform
break
default:
throw new Error(`Computer Use not supported on ${process.platform}`)
}
return cached!
}
export type { InputPlatform, ScreenshotPlatform, DisplayPlatform, AppsPlatform, WindowManagementPlatform } from './types.js'
export type { WindowHandle, ScreenshotResult, DisplayInfo, InstalledApp, FrontmostAppInfo, WindowAction } from './types.js'