feat: Computer Use — Windows 跨平台支持 + GUI 无障碍增强 + Python Bridge

三平台 Computer Use (macOS + Windows + Linux),Windows 专项增强。

- MCP server: toolCalls/tools/executor/mcpServer 等 12 文件完整实现
- 平台抽象层: platforms/{win32,darwin,linux}.ts
- 跨平台 executor: executorCrossPlatform.ts
- CHICAGO_MCP + VOICE_MODE feature flags 启用

- windowMessage.ts: SendMessageW (WM_CHAR Unicode + 剪贴板粘贴)
- windowBorder.ts: 4 叠加窗口边框 (30fps 跟踪)
- uiAutomation.ts: UI Automation 元素树/点击/写值
- accessibilitySnapshot.ts: 无障碍快照 → 模型感知 GUI
- bridge.py + bridgeClient.ts: Python 长驻进程 (替代 per-call PS)

- window_management: min/max/restore/close/focus (Win32 API)
- click_element / type_into_element: 按名称操作 (无需坐标)
- 截图自动附带 Accessibility Snapshot

- 17 种方法, stdin/stdout JSON 通信
- 窗口枚举 1.5ms vs PS 500ms, 截图 360ms vs PS 800ms
- 依赖: mss + Pillow + pywinauto
This commit is contained in:
unraid
2026-04-05 15:27:50 +08:00
parent 7a2ade0a02
commit c17edcb12e
36 changed files with 8297 additions and 351 deletions

View File

@@ -5,6 +5,8 @@
* value setting, and hit-testing via PowerShell + System.Windows.Automation.
*/
import { ps } from './shared.js'
export interface UIElement {
name: string
controlType: string // Button, Edit, Text, List, Window, etc.
@@ -15,6 +17,48 @@ export interface UIElement {
children?: UIElement[]
}
const VALID_CONTROL_TYPES = new Set([
'Button',
'Calendar',
'CheckBox',
'ComboBox',
'Custom',
'DataGrid',
'DataItem',
'Document',
'Edit',
'Group',
'Header',
'HeaderItem',
'Hyperlink',
'Image',
'List',
'ListItem',
'Menu',
'MenuBar',
'MenuItem',
'Pane',
'ProgressBar',
'RadioButton',
'ScrollBar',
'Separator',
'Slider',
'Spinner',
'SplitButton',
'StatusBar',
'Tab',
'TabItem',
'Table',
'Text',
'Thumb',
'TitleBar',
'ToolBar',
'ToolTip',
'Tree',
'TreeItem',
'Window',
])
// ---------------------------------------------------------------------------
// Helper
// ---------------------------------------------------------------------------
@@ -25,15 +69,6 @@ Add-Type -AssemblyName UIAutomationTypes
Add-Type -AssemblyName WindowsBase
`
function ps(script: string): string {
const result = Bun.spawnSync({
cmd: ['powershell', '-NoProfile', '-NonInteractive', '-Command', script],
stdout: 'pipe',
stderr: 'pipe',
})
return new TextDecoder().decode(result.stdout).trim()
}
function parseJsonSafe<T>(raw: string, fallback: T): T {
try {
if (!raw) return fallback
@@ -143,6 +178,9 @@ export function findElement(
)
}
if (query.controlType) {
if (!VALID_CONTROL_TYPES.has(query.controlType)) {
return null // Invalid control type
}
const v = query.controlType.replace(/'/g, "''")
conditions.push(
`[System.Windows.Automation.PropertyCondition]::new([System.Windows.Automation.AutomationElement]::ControlTypeProperty, [System.Windows.Automation.ControlType]::${v})`,
@@ -204,7 +242,10 @@ $obj | ConvertTo-Json -Compress
/**
* Click an element by its automationId using InvokePattern.
*/
export function clickElement(windowTitle: string, automationId: string): boolean {
export function clickElement(
windowTitle: string,
automationId: string,
): boolean {
const escapedTitle = windowTitle.replace(/'/g, "''")
const escapedId = automationId.replace(/'/g, "''")
@@ -237,7 +278,11 @@ try {
/**
* Set the value of an element by its automationId using ValuePattern.
*/
export function setValue(windowTitle: string, automationId: string, value: string): boolean {
export function setValue(
windowTitle: string,
automationId: string,
value: string,
): boolean {
const escapedTitle = windowTitle.replace(/'/g, "''")
const escapedId = automationId.replace(/'/g, "''")
const escapedValue = value.replace(/'/g, "''")