mirror of
https://github.com/claude-code-best/claude-code.git
synced 2026-06-15 12:55:51 +00:00
三平台 Computer Use (macOS + Windows + Linux),Windows 专项增强。
- MCP server: toolCalls/tools/executor/mcpServer 等 12 文件完整实现
- 平台抽象层: platforms/{win32,darwin,linux}.ts
- 跨平台 executor: executorCrossPlatform.ts
- CHICAGO_MCP + VOICE_MODE feature flags 启用
- windowMessage.ts: SendMessageW (WM_CHAR Unicode + 剪贴板粘贴)
- windowBorder.ts: 4 叠加窗口边框 (30fps 跟踪)
- uiAutomation.ts: UI Automation 元素树/点击/写值
- accessibilitySnapshot.ts: 无障碍快照 → 模型感知 GUI
- bridge.py + bridgeClient.ts: Python 长驻进程 (替代 per-call PS)
- window_management: min/max/restore/close/focus (Win32 API)
- click_element / type_into_element: 按名称操作 (无需坐标)
- 截图自动附带 Accessibility Snapshot
- 17 种方法, stdin/stdout JSON 通信
- 窗口枚举 1.5ms vs PS 500ms, 截图 360ms vs PS 800ms
- 依赖: mss + Pillow + pywinauto
226 lines
7.0 KiB
TypeScript
226 lines
7.0 KiB
TypeScript
/**
|
|
* Accessibility Snapshot — captures the UI Automation tree of a window
|
|
* and formats it as compact, model-friendly text.
|
|
*
|
|
* Sent alongside screenshots so the model has BOTH visual + structural
|
|
* understanding of the GUI. This enables:
|
|
* - Knowing exact element names, types, and positions
|
|
* - Using click_element/type_into_element by name instead of pixel coords
|
|
* - Understanding disabled/enabled state, current values
|
|
*
|
|
* Only includes interactive elements (buttons, edits, menus, links, etc.)
|
|
* to keep token count low (~200-500 tokens for typical windows).
|
|
*/
|
|
|
|
import { validateHwnd, ps } from './shared.js'
|
|
|
|
export interface AccessibilityNode {
|
|
role: string // Button, Edit, MenuItem, Link, Text, CheckBox, etc.
|
|
name: string // Visible text / accessible name
|
|
automationId: string
|
|
bounds: { x: number; y: number; w: number; h: number }
|
|
enabled: boolean
|
|
value?: string // Current text value (for Edit/ComboBox)
|
|
children?: AccessibilityNode[]
|
|
}
|
|
|
|
export interface AccessibilitySnapshot {
|
|
/** Compact text representation for the model */
|
|
text: string
|
|
/** Structured tree (for element-targeted actions) */
|
|
nodes: AccessibilityNode[]
|
|
/** Capture timestamp */
|
|
timestamp: number
|
|
}
|
|
|
|
/**
|
|
* Capture the accessibility tree of a window, returning only interactive
|
|
* and visible elements. Uses Windows UI Automation (crosses process boundaries).
|
|
*
|
|
* @param hwnd - Window handle as string
|
|
* @param maxDepth - Maximum tree depth (default 4)
|
|
* @param interactiveOnly - Only include interactive elements (default true)
|
|
*/
|
|
export function captureAccessibilitySnapshot(
|
|
hwnd: string,
|
|
maxDepth: number = 4,
|
|
interactiveOnly: boolean = true,
|
|
): AccessibilitySnapshot | null {
|
|
hwnd = validateHwnd(hwnd)
|
|
const filterClause = interactiveOnly
|
|
? `
|
|
# Interactive control types only
|
|
$interactiveTypes = @(
|
|
'Button','Edit','ComboBox','CheckBox','RadioButton',
|
|
'MenuItem','Menu','MenuBar','Link','Slider','Spinner',
|
|
'Tab','TabItem','List','ListItem','Tree','TreeItem',
|
|
'DataGrid','DataItem','Document','ScrollBar','ToolBar',
|
|
'SplitButton','ToggleButton','Hyperlink'
|
|
)
|
|
function Is-Interactive($ct) {
|
|
$typeName = $ct -replace 'ControlType\\.', ''
|
|
return $interactiveTypes -contains $typeName
|
|
}`
|
|
: `
|
|
function Is-Interactive($ct) { return $true }`
|
|
|
|
const script = `
|
|
Add-Type -AssemblyName UIAutomationClient
|
|
Add-Type -AssemblyName UIAutomationTypes
|
|
Add-Type -AssemblyName WindowsBase
|
|
${filterClause}
|
|
|
|
function Get-Tree($el, $depth, $maxDepth) {
|
|
if ($depth -ge $maxDepth) { return @() }
|
|
$result = @()
|
|
$children = $el.FindAll(
|
|
[System.Windows.Automation.TreeScope]::Children,
|
|
[System.Windows.Automation.Condition]::TrueCondition)
|
|
foreach ($child in $children) {
|
|
$ct = $child.Current.ControlType.ProgrammaticName
|
|
$typeName = $ct -replace 'ControlType\\.', ''
|
|
$name = [string]$child.Current.Name
|
|
$autoId = [string]$child.Current.AutomationId
|
|
$rect = $child.Current.BoundingRectangle
|
|
$enabled = $child.Current.IsEnabled
|
|
|
|
# Skip invisible/offscreen elements
|
|
if ($rect.Width -le 0 -or $rect.Height -le 0) { continue }
|
|
if ($rect.X -lt -10000) { continue }
|
|
|
|
$val = $null
|
|
try {
|
|
$vp = $child.GetCurrentPattern([System.Windows.Automation.ValuePattern]::Pattern)
|
|
if ($vp -ne $null) { $val = $vp.Current.Value }
|
|
} catch {}
|
|
|
|
$isInteractive = Is-Interactive $ct
|
|
$sub = Get-Tree $child ($depth + 1) $maxDepth
|
|
|
|
if ($isInteractive -or $sub.Count -gt 0) {
|
|
$node = @{
|
|
role = $typeName
|
|
name = $name
|
|
id = $autoId
|
|
x = [int]$rect.X; y = [int]$rect.Y
|
|
w = [int]$rect.Width; h = [int]$rect.Height
|
|
on = $enabled
|
|
}
|
|
if ($val -ne $null -and $val -ne '') { $node['v'] = $val }
|
|
if ($sub.Count -gt 0) { $node['c'] = $sub }
|
|
$result += $node
|
|
}
|
|
}
|
|
return $result
|
|
}
|
|
|
|
try {
|
|
$root = [System.Windows.Automation.AutomationElement]::FromHandle([IntPtr]::new([long]${hwnd}))
|
|
if ($root -eq $null) { Write-Output '[]'; exit }
|
|
$tree = Get-Tree $root 0 ${maxDepth}
|
|
if ($tree -eq $null -or $tree.Count -eq 0) {
|
|
Write-Output '[]'
|
|
} else {
|
|
$tree | ConvertTo-Json -Depth 20 -Compress
|
|
}
|
|
} catch {
|
|
Write-Output '[]'
|
|
}
|
|
`
|
|
|
|
try {
|
|
const raw = ps(script)
|
|
if (!raw || raw === '[]') return null
|
|
|
|
const parsed = JSON.parse(raw)
|
|
const nodes: AccessibilityNode[] = Array.isArray(parsed)
|
|
? parsed.map(parseNode)
|
|
: [parseNode(parsed)]
|
|
const text = formatForModel(nodes)
|
|
|
|
return { text, nodes, timestamp: Date.now() }
|
|
} catch {
|
|
return null
|
|
}
|
|
}
|
|
|
|
function parseNode(raw: any): AccessibilityNode {
|
|
return {
|
|
role: raw.role || '',
|
|
name: raw.name || '',
|
|
automationId: raw.id || '',
|
|
bounds: { x: raw.x || 0, y: raw.y || 0, w: raw.w || 0, h: raw.h || 0 },
|
|
enabled: raw.on !== false,
|
|
value: raw.v,
|
|
children: raw.c
|
|
? Array.isArray(raw.c)
|
|
? raw.c.map(parseNode)
|
|
: [parseNode(raw.c)]
|
|
: undefined,
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Format the accessibility tree as compact text for the model.
|
|
* Example output:
|
|
* [Button] "Save" (120,50 80x30) enabled
|
|
* [Edit] "" (200,80 400x25) enabled value="hello world" id=textBox1
|
|
* [MenuItem] "File" (10,0 40x25) enabled
|
|
*/
|
|
function formatForModel(
|
|
nodes: AccessibilityNode[],
|
|
indent: number = 0,
|
|
): string {
|
|
const lines: string[] = []
|
|
const pad = ' '.repeat(indent)
|
|
|
|
for (const node of nodes) {
|
|
let line = `${pad}[${node.role}]`
|
|
if (node.name) line += ` "${truncate(node.name, 40)}"`
|
|
line += ` (${node.bounds.x},${node.bounds.y} ${node.bounds.w}x${node.bounds.h})`
|
|
if (!node.enabled) line += ' DISABLED'
|
|
if (node.value) line += ` value="${truncate(node.value, 30)}"`
|
|
if (node.automationId) line += ` id=${node.automationId}`
|
|
lines.push(line)
|
|
|
|
if (node.children) {
|
|
lines.push(formatForModel(node.children, indent + 1))
|
|
}
|
|
}
|
|
|
|
return lines.join('\n')
|
|
}
|
|
|
|
function truncate(s: string, max: number): string {
|
|
return s.length > max ? s.slice(0, max - 1) + '…' : s
|
|
}
|
|
|
|
/**
|
|
* Find an element in the accessibility tree by name, role, or automationId.
|
|
* Returns the first match.
|
|
*/
|
|
export function findNodeInSnapshot(
|
|
nodes: AccessibilityNode[],
|
|
query: { name?: string; role?: string; automationId?: string },
|
|
): AccessibilityNode | null {
|
|
for (const node of nodes) {
|
|
let match = true
|
|
if (
|
|
query.name &&
|
|
!node.name.toLowerCase().includes(query.name.toLowerCase())
|
|
)
|
|
match = false
|
|
if (query.role && node.role.toLowerCase() !== query.role.toLowerCase())
|
|
match = false
|
|
if (query.automationId && node.automationId !== query.automationId)
|
|
match = false
|
|
if (match && (query.name || query.role || query.automationId)) return node
|
|
|
|
if (node.children) {
|
|
const found = findNodeInSnapshot(node.children, query)
|
|
if (found) return found
|
|
}
|
|
}
|
|
return null
|
|
}
|