feat: enable Computer Use with macOS + Windows + Linux support

Phase 1: Replace @ant/computer-use-mcp stub (12 files, 6517 lines).

Phase 2: Remove 8 macOS-only guards in src/:
- main.tsx: remove getPlatform()==='macos' check
- swiftLoader.ts: remove darwin-only throw
- executor.ts: extend platform guard, clipboard dispatch, paste key
- drainRunLoop.ts: skip CFRunLoop pump on non-darwin
- escHotkey.ts: non-darwin returns false (Ctrl+C fallback)
- hostAdapter.ts: non-darwin permissions granted
- common.ts: dynamic platform + screenshotFiltering
- gates.ts: enabled:true, subscription check removed

Phase 3: Add Linux backends (xdotool/scrot/xrandr/wmctrl):
- computer-use-input/backends/linux.ts (173 lines)
- computer-use-swift/backends/linux.ts (278 lines)

Verified on Windows x64: mouse, screenshot, displays, foreground app.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
unraid
2026-04-03 22:33:00 +08:00
parent 465e9f01c6
commit e3264a1691
34 changed files with 8291 additions and 750 deletions

View File

@@ -0,0 +1,108 @@
/**
* Port of the API's image transcoder target-size algorithm. Pre-sizing
* screenshots to this function's output means the API's early-return fires
* (tokens ≤ max) and the image is NOT resized server-side — so the model
* sees exactly the dimensions in `ScreenshotResult.width/height` and
* `scaleCoord` stays coherent.
*
* Rust reference: api/api/image_transcoder/rust_transcoder/src/utils/resize.rs
* Sibling TS port: apps/claude-browser-use/src/utils/imageResize.ts (identical
* algorithm, lives in the Chrome extension tree — not a shared package).
*
* See COORDINATES.md for why this matters for click accuracy.
*/
export interface ResizeParams {
pxPerToken: number;
maxTargetPx: number;
maxTargetTokens: number;
}
/**
* Production defaults — match `resize.rs:160-164` and Chrome's
* `CDPService.ts:638-642`. Vision encoder uses 28px tiles; 1568 is both
* the long-edge cap (56 tiles) AND the token budget.
*/
export const API_RESIZE_PARAMS: ResizeParams = {
pxPerToken: 28,
maxTargetPx: 1568,
maxTargetTokens: 1568,
};
/** ceil(px / pxPerToken). Matches resize.rs:74-76 (which uses integer ceil-div). */
export function nTokensForPx(px: number, pxPerToken: number): number {
return Math.floor((px - 1) / pxPerToken) + 1;
}
function nTokensForImg(
width: number,
height: number,
pxPerToken: number,
): number {
return nTokensForPx(width, pxPerToken) * nTokensForPx(height, pxPerToken);
}
/**
* Binary-search along the width dimension for the largest image that:
* - preserves the input aspect ratio
* - has long edge ≤ maxTargetPx
* - has ceil(w/pxPerToken) × ceil(h/pxPerToken) ≤ maxTargetTokens
*
* Returns [width, height]. No-op if input already satisfies all three.
*
* The long-edge constraint alone (what we used to use) is insufficient on
* squarer-than-16:9 displays: 1568×1014 (MBP 16" AR) is 56×37 = 2072 tokens,
* over budget, and gets server-resized to 1372×887 — model then clicks in
* 1372-space but scaleCoord assumed 1568-space → ~14% coord error.
*
* Matches resize.rs:91-155 exactly (verified against its test vectors).
*/
export function targetImageSize(
width: number,
height: number,
params: ResizeParams,
): [number, number] {
const { pxPerToken, maxTargetPx, maxTargetTokens } = params;
if (
width <= maxTargetPx &&
height <= maxTargetPx &&
nTokensForImg(width, height, pxPerToken) <= maxTargetTokens
) {
return [width, height];
}
// Normalize to landscape for the search; transpose result back.
if (height > width) {
const [w, h] = targetImageSize(height, width, params);
return [h, w];
}
const aspectRatio = width / height;
// Loop invariant: lowerBoundWidth is always valid, upperBoundWidth is
// always invalid. ~12 iterations for a 4000px image.
let upperBoundWidth = width;
let lowerBoundWidth = 1;
for (;;) {
if (lowerBoundWidth + 1 === upperBoundWidth) {
return [
lowerBoundWidth,
Math.max(Math.round(lowerBoundWidth / aspectRatio), 1),
];
}
const middleWidth = Math.floor((lowerBoundWidth + upperBoundWidth) / 2);
const middleHeight = Math.max(Math.round(middleWidth / aspectRatio), 1);
if (
middleWidth <= maxTargetPx &&
nTokensForImg(middleWidth, middleHeight, pxPerToken) <= maxTargetTokens
) {
lowerBoundWidth = middleWidth;
} else {
upperBoundWidth = middleWidth;
}
}
}