Files
claude-code/src/utils/computerUse/win32/uiAutomation.ts
unraid c17edcb12e feat: Computer Use — Windows 跨平台支持 + GUI 无障碍增强 + Python Bridge
三平台 Computer Use (macOS + Windows + Linux),Windows 专项增强。

- MCP server: toolCalls/tools/executor/mcpServer 等 12 文件完整实现
- 平台抽象层: platforms/{win32,darwin,linux}.ts
- 跨平台 executor: executorCrossPlatform.ts
- CHICAGO_MCP + VOICE_MODE feature flags 启用

- windowMessage.ts: SendMessageW (WM_CHAR Unicode + 剪贴板粘贴)
- windowBorder.ts: 4 叠加窗口边框 (30fps 跟踪)
- uiAutomation.ts: UI Automation 元素树/点击/写值
- accessibilitySnapshot.ts: 无障碍快照 → 模型感知 GUI
- bridge.py + bridgeClient.ts: Python 长驻进程 (替代 per-call PS)

- window_management: min/max/restore/close/focus (Win32 API)
- click_element / type_into_element: 按名称操作 (无需坐标)
- 截图自动附带 Accessibility Snapshot

- 17 种方法, stdin/stdout JSON 通信
- 窗口枚举 1.5ms vs PS 500ms, 截图 360ms vs PS 800ms
- 依赖: mss + Pillow + pywinauto
2026-04-05 15:47:20 +08:00

354 lines
9.3 KiB
TypeScript

/**
* Windows UI Automation module
*
* Provides UI element tree inspection, element lookup, programmatic click,
* value setting, and hit-testing via PowerShell + System.Windows.Automation.
*/
import { ps } from './shared.js'
export interface UIElement {
name: string
controlType: string // Button, Edit, Text, List, Window, etc.
automationId: string
boundingRect: { x: number; y: number; w: number; h: number }
isEnabled: boolean
value?: string
children?: UIElement[]
}
const VALID_CONTROL_TYPES = new Set([
'Button',
'Calendar',
'CheckBox',
'ComboBox',
'Custom',
'DataGrid',
'DataItem',
'Document',
'Edit',
'Group',
'Header',
'HeaderItem',
'Hyperlink',
'Image',
'List',
'ListItem',
'Menu',
'MenuBar',
'MenuItem',
'Pane',
'ProgressBar',
'RadioButton',
'ScrollBar',
'Separator',
'Slider',
'Spinner',
'SplitButton',
'StatusBar',
'Tab',
'TabItem',
'Table',
'Text',
'Thumb',
'TitleBar',
'ToolBar',
'ToolTip',
'Tree',
'TreeItem',
'Window',
])
// ---------------------------------------------------------------------------
// Helper
// ---------------------------------------------------------------------------
const UIA_ASSEMBLIES = `
Add-Type -AssemblyName UIAutomationClient
Add-Type -AssemblyName UIAutomationTypes
Add-Type -AssemblyName WindowsBase
`
function parseJsonSafe<T>(raw: string, fallback: T): T {
try {
if (!raw) return fallback
return JSON.parse(raw) as T
} catch {
return fallback
}
}
// PowerShell snippet that finds a window by exact or partial title match.
// Assumes $title is already set in the calling script.
const PS_FIND_WINDOW = `
$root = [System.Windows.Automation.AutomationElement]::RootElement
$window = $root.FindFirst(
[System.Windows.Automation.TreeScope]::Children,
[System.Windows.Automation.PropertyCondition]::new(
[System.Windows.Automation.AutomationElement]::NameProperty, $title))
if ($window -eq $null) {
$all = $root.FindAll(
[System.Windows.Automation.TreeScope]::Children,
[System.Windows.Automation.Condition]::TrueCondition)
foreach ($el in $all) {
if ($el.Current.Name -and $el.Current.Name.Contains($title)) {
$window = $el
break
}
}
}
`
// ---------------------------------------------------------------------------
// Public API
// ---------------------------------------------------------------------------
/**
* Get the UI element tree of a window, up to `depth` levels deep (default 3).
*/
export function getUITree(windowTitle: string, depth: number = 3): UIElement[] {
const escapedTitle = windowTitle.replace(/'/g, "''")
const script = `
${UIA_ASSEMBLIES}
$title = '${escapedTitle}'
${PS_FIND_WINDOW}
if ($window -eq $null) {
Write-Output '[]'
exit
}
function Get-UIChildren($parent, $currentDepth, $maxDepth) {
if ($currentDepth -ge $maxDepth) { return @() }
$children = $parent.FindAll(
[System.Windows.Automation.TreeScope]::Children,
[System.Windows.Automation.Condition]::TrueCondition)
$result = @()
foreach ($el in $children) {
$rect = $el.Current.BoundingRectangle
$obj = @{
name = [string]$el.Current.Name
controlType = $el.Current.ControlType.ProgrammaticName -replace 'ControlType\\.', ''
automationId = [string]$el.Current.AutomationId
boundingRect = @{
x = [int]$rect.X
y = [int]$rect.Y
w = [int]$rect.Width
h = [int]$rect.Height
}
isEnabled = $el.Current.IsEnabled
}
try {
$vp = $el.GetCurrentPattern([System.Windows.Automation.ValuePattern]::Pattern)
if ($vp -ne $null) { $obj['value'] = $vp.Current.Value }
} catch {}
$sub = Get-UIChildren $el ($currentDepth + 1) $maxDepth
if ($sub.Count -gt 0) { $obj['children'] = $sub }
$result += $obj
}
return $result
}
$tree = Get-UIChildren $window 0 ${depth}
if ($tree -eq $null -or $tree.Count -eq 0) {
Write-Output '[]'
} else {
$tree | ConvertTo-Json -Depth 20 -Compress
}
`
const raw = ps(script)
const parsed = parseJsonSafe<UIElement | UIElement[]>(raw, [])
return Array.isArray(parsed) ? parsed : [parsed]
}
/**
* Find a single element inside a window matching the given query fields.
*/
export function findElement(
windowTitle: string,
query: { name?: string; controlType?: string; automationId?: string },
): UIElement | null {
const escapedTitle = windowTitle.replace(/'/g, "''")
// Build conditions array
const conditions: string[] = []
if (query.name) {
const v = query.name.replace(/'/g, "''")
conditions.push(
`[System.Windows.Automation.PropertyCondition]::new([System.Windows.Automation.AutomationElement]::NameProperty, '${v}')`,
)
}
if (query.controlType) {
if (!VALID_CONTROL_TYPES.has(query.controlType)) {
return null // Invalid control type
}
const v = query.controlType.replace(/'/g, "''")
conditions.push(
`[System.Windows.Automation.PropertyCondition]::new([System.Windows.Automation.AutomationElement]::ControlTypeProperty, [System.Windows.Automation.ControlType]::${v})`,
)
}
if (query.automationId) {
const v = query.automationId.replace(/'/g, "''")
conditions.push(
`[System.Windows.Automation.PropertyCondition]::new([System.Windows.Automation.AutomationElement]::AutomationIdProperty, '${v}')`,
)
}
if (conditions.length === 0) return null
let conditionExpr: string
if (conditions.length === 1) {
conditionExpr = conditions[0]
} else {
conditionExpr = `[System.Windows.Automation.AndCondition]::new(@(${conditions.join(', ')}))`
}
const script = `
${UIA_ASSEMBLIES}
$title = '${escapedTitle}'
${PS_FIND_WINDOW}
if ($window -eq $null) {
Write-Output 'null'
exit
}
$cond = ${conditionExpr}
$el = $window.FindFirst([System.Windows.Automation.TreeScope]::Descendants, $cond)
if ($el -eq $null) {
Write-Output 'null'
exit
}
$rect = $el.Current.BoundingRectangle
$obj = @{
name = [string]$el.Current.Name
controlType = $el.Current.ControlType.ProgrammaticName -replace 'ControlType\\.', ''
automationId = [string]$el.Current.AutomationId
boundingRect = @{
x = [int]$rect.X
y = [int]$rect.Y
w = [int]$rect.Width
h = [int]$rect.Height
}
isEnabled = $el.Current.IsEnabled
}
try {
$vp = $el.GetCurrentPattern([System.Windows.Automation.ValuePattern]::Pattern)
if ($vp -ne $null) { $obj['value'] = $vp.Current.Value }
} catch {}
$obj | ConvertTo-Json -Compress
`
const raw = ps(script)
return parseJsonSafe<UIElement | null>(raw, null)
}
/**
* Click an element by its automationId using InvokePattern.
*/
export function clickElement(
windowTitle: string,
automationId: string,
): boolean {
const escapedTitle = windowTitle.replace(/'/g, "''")
const escapedId = automationId.replace(/'/g, "''")
const script = `
${UIA_ASSEMBLIES}
$title = '${escapedTitle}'
${PS_FIND_WINDOW}
if ($window -eq $null) {
Write-Output 'false'
exit
}
$cond = [System.Windows.Automation.PropertyCondition]::new(
[System.Windows.Automation.AutomationElement]::AutomationIdProperty, '${escapedId}')
$el = $window.FindFirst([System.Windows.Automation.TreeScope]::Descendants, $cond)
if ($el -eq $null) {
Write-Output 'false'
exit
}
try {
$ip = $el.GetCurrentPattern([System.Windows.Automation.InvokePattern]::Pattern)
$ip.Invoke()
Write-Output 'true'
} catch {
Write-Output 'false'
}
`
return ps(script) === 'true'
}
/**
* Set the value of an element by its automationId using ValuePattern.
*/
export function setValue(
windowTitle: string,
automationId: string,
value: string,
): boolean {
const escapedTitle = windowTitle.replace(/'/g, "''")
const escapedId = automationId.replace(/'/g, "''")
const escapedValue = value.replace(/'/g, "''")
const script = `
${UIA_ASSEMBLIES}
$title = '${escapedTitle}'
${PS_FIND_WINDOW}
if ($window -eq $null) {
Write-Output 'false'
exit
}
$cond = [System.Windows.Automation.PropertyCondition]::new(
[System.Windows.Automation.AutomationElement]::AutomationIdProperty, '${escapedId}')
$el = $window.FindFirst([System.Windows.Automation.TreeScope]::Descendants, $cond)
if ($el -eq $null) {
Write-Output 'false'
exit
}
try {
$vp = $el.GetCurrentPattern([System.Windows.Automation.ValuePattern]::Pattern)
$vp.SetValue('${escapedValue}')
Write-Output 'true'
} catch {
Write-Output 'false'
}
`
return ps(script) === 'true'
}
/**
* Get the UI element at a specific screen coordinate.
*/
export function elementAtPoint(x: number, y: number): UIElement | null {
const script = `
${UIA_ASSEMBLIES}
try {
$point = [System.Windows.Point]::new(${x}, ${y})
$el = [System.Windows.Automation.AutomationElement]::FromPoint($point)
if ($el -eq $null) {
Write-Output 'null'
exit
}
$rect = $el.Current.BoundingRectangle
$obj = @{
name = [string]$el.Current.Name
controlType = $el.Current.ControlType.ProgrammaticName -replace 'ControlType\\.', ''
automationId = [string]$el.Current.AutomationId
boundingRect = @{
x = [int]$rect.X
y = [int]$rect.Y
w = [int]$rect.Width
h = [int]$rect.Height
}
isEnabled = $el.Current.IsEnabled
}
try {
$vp = $el.GetCurrentPattern([System.Windows.Automation.ValuePattern]::Pattern)
if ($vp -ne $null) { $obj['value'] = $vp.Current.Value }
} catch {}
$obj | ConvertTo-Json -Compress
} catch {
Write-Output 'null'
}
`
const raw = ps(script)
return parseJsonSafe<UIElement | null>(raw, null)
}