Files
claude-code/packages/@ant/computer-use-mcp/src/imageResize.ts
unraid e3264a1691 feat: enable Computer Use with macOS + Windows + Linux support
Phase 1: Replace @ant/computer-use-mcp stub (12 files, 6517 lines).

Phase 2: Remove 8 macOS-only guards in src/:
- main.tsx: remove getPlatform()==='macos' check
- swiftLoader.ts: remove darwin-only throw
- executor.ts: extend platform guard, clipboard dispatch, paste key
- drainRunLoop.ts: skip CFRunLoop pump on non-darwin
- escHotkey.ts: non-darwin returns false (Ctrl+C fallback)
- hostAdapter.ts: non-darwin permissions granted
- common.ts: dynamic platform + screenshotFiltering
- gates.ts: enabled:true, subscription check removed

Phase 3: Add Linux backends (xdotool/scrot/xrandr/wmctrl):
- computer-use-input/backends/linux.ts (173 lines)
- computer-use-swift/backends/linux.ts (278 lines)

Verified on Windows x64: mouse, screenshot, displays, foreground app.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-03 23:17:14 +08:00

109 lines
3.4 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* Port of the API's image transcoder target-size algorithm. Pre-sizing
* screenshots to this function's output means the API's early-return fires
* (tokens ≤ max) and the image is NOT resized server-side — so the model
* sees exactly the dimensions in `ScreenshotResult.width/height` and
* `scaleCoord` stays coherent.
*
* Rust reference: api/api/image_transcoder/rust_transcoder/src/utils/resize.rs
* Sibling TS port: apps/claude-browser-use/src/utils/imageResize.ts (identical
* algorithm, lives in the Chrome extension tree — not a shared package).
*
* See COORDINATES.md for why this matters for click accuracy.
*/
export interface ResizeParams {
pxPerToken: number;
maxTargetPx: number;
maxTargetTokens: number;
}
/**
* Production defaults — match `resize.rs:160-164` and Chrome's
* `CDPService.ts:638-642`. Vision encoder uses 28px tiles; 1568 is both
* the long-edge cap (56 tiles) AND the token budget.
*/
export const API_RESIZE_PARAMS: ResizeParams = {
pxPerToken: 28,
maxTargetPx: 1568,
maxTargetTokens: 1568,
};
/** ceil(px / pxPerToken). Matches resize.rs:74-76 (which uses integer ceil-div). */
export function nTokensForPx(px: number, pxPerToken: number): number {
return Math.floor((px - 1) / pxPerToken) + 1;
}
function nTokensForImg(
width: number,
height: number,
pxPerToken: number,
): number {
return nTokensForPx(width, pxPerToken) * nTokensForPx(height, pxPerToken);
}
/**
* Binary-search along the width dimension for the largest image that:
* - preserves the input aspect ratio
* - has long edge ≤ maxTargetPx
* - has ceil(w/pxPerToken) × ceil(h/pxPerToken) ≤ maxTargetTokens
*
* Returns [width, height]. No-op if input already satisfies all three.
*
* The long-edge constraint alone (what we used to use) is insufficient on
* squarer-than-16:9 displays: 1568×1014 (MBP 16" AR) is 56×37 = 2072 tokens,
* over budget, and gets server-resized to 1372×887 — model then clicks in
* 1372-space but scaleCoord assumed 1568-space → ~14% coord error.
*
* Matches resize.rs:91-155 exactly (verified against its test vectors).
*/
export function targetImageSize(
width: number,
height: number,
params: ResizeParams,
): [number, number] {
const { pxPerToken, maxTargetPx, maxTargetTokens } = params;
if (
width <= maxTargetPx &&
height <= maxTargetPx &&
nTokensForImg(width, height, pxPerToken) <= maxTargetTokens
) {
return [width, height];
}
// Normalize to landscape for the search; transpose result back.
if (height > width) {
const [w, h] = targetImageSize(height, width, params);
return [h, w];
}
const aspectRatio = width / height;
// Loop invariant: lowerBoundWidth is always valid, upperBoundWidth is
// always invalid. ~12 iterations for a 4000px image.
let upperBoundWidth = width;
let lowerBoundWidth = 1;
for (;;) {
if (lowerBoundWidth + 1 === upperBoundWidth) {
return [
lowerBoundWidth,
Math.max(Math.round(lowerBoundWidth / aspectRatio), 1),
];
}
const middleWidth = Math.floor((lowerBoundWidth + upperBoundWidth) / 2);
const middleHeight = Math.max(Math.round(middleWidth / aspectRatio), 1);
if (
middleWidth <= maxTargetPx &&
nTokensForImg(middleWidth, middleHeight, pxPerToken) <= maxTargetTokens
) {
lowerBoundWidth = middleWidth;
} else {
upperBoundWidth = middleWidth;
}
}
}