mirror of
https://github.com/claude-code-best/claude-code.git
synced 2026-06-23 08:45:50 +00:00
feat: enable Computer Use with macOS + Windows + Linux support
Phase 1: Replace @ant/computer-use-mcp stub (12 files, 6517 lines). Phase 2: Remove 8 macOS-only guards in src/: - main.tsx: remove getPlatform()==='macos' check - swiftLoader.ts: remove darwin-only throw - executor.ts: extend platform guard, clipboard dispatch, paste key - drainRunLoop.ts: skip CFRunLoop pump on non-darwin - escHotkey.ts: non-darwin returns false (Ctrl+C fallback) - hostAdapter.ts: non-darwin permissions granted - common.ts: dynamic platform + screenshotFiltering - gates.ts: enabled:true, subscription check removed Phase 3: Add Linux backends (xdotool/scrot/xrandr/wmctrl): - computer-use-input/backends/linux.ts (173 lines) - computer-use-swift/backends/linux.ts (278 lines) Verified on Windows x64: mouse, screenshot, displays, foreground app. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
171
packages/@ant/computer-use-mcp/src/pixelCompare.ts
Normal file
171
packages/@ant/computer-use-mcp/src/pixelCompare.ts
Normal file
@@ -0,0 +1,171 @@
|
||||
/**
|
||||
* Staleness guard ported from the Vercept acquisition.
|
||||
*
|
||||
* Compares the model's last-seen screenshot against a fresh-right-now
|
||||
* screenshot at the click target, so the model never clicks pixels it hasn't
|
||||
* seen. If the 9×9 patch around the target differs, the click is aborted and
|
||||
* the model is told to re-screenshot. This is NOT a popup detector.
|
||||
*
|
||||
* Semantics preserved exactly:
|
||||
* - Skip on no `lastScreenshot` (cold start) — click proceeds.
|
||||
* - Skip on any internal error (crop throws, screenshot fails, etc.) —
|
||||
* click proceeds. Validation failure must never block the action.
|
||||
* - 9×9 exact byte equality on raw pixel bytes. No fuzzing, no tolerance.
|
||||
* - Compare in percentage coords so Retina scale doesn't matter.
|
||||
*
|
||||
* JPEG decode + crop is INJECTED via `ComputerUseHostAdapter.cropRawPatch`.
|
||||
* The original used `sharp` (LGPL, native `.node` addon); we inject Electron's
|
||||
* `nativeImage` (Chromium decoders, BSD, nothing to bundle) from the host, so
|
||||
* this package never imports it — the crop is a function parameter.
|
||||
*/
|
||||
|
||||
import type { ScreenshotResult } from "./executor.js";
|
||||
import type { Logger } from "./types.js";
|
||||
|
||||
/** Injected by the host. See `ComputerUseHostAdapter.cropRawPatch`. */
|
||||
export type CropRawPatchFn = (
|
||||
jpegBase64: string,
|
||||
rect: { x: number; y: number; width: number; height: number },
|
||||
) => Buffer | null;
|
||||
|
||||
/** 9×9 is empirically the sweet spot — large enough to catch a tooltip
|
||||
* appearing, small enough to not false-positive on surrounding animation.
|
||||
**/
|
||||
const DEFAULT_GRID_SIZE = 9;
|
||||
|
||||
export interface PixelCompareResult {
|
||||
/** true → click may proceed. false → patch changed, abort the click. */
|
||||
valid: boolean;
|
||||
/** true → validation did not run (cold start, sub-gate off, or internal
|
||||
* error). The caller MUST treat this identically to `valid: true`. */
|
||||
skipped: boolean;
|
||||
/** Populated when valid === false. Returned to the model verbatim. */
|
||||
warning?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the crop rect for a patch centered on (xPercent, yPercent).
|
||||
*
|
||||
* Dimensions come from ScreenshotResult.width/height (physical pixels). Both
|
||||
* screenshots have the same dimensions (same display, consecutive captures),
|
||||
* so the rect is the same for both.
|
||||
*/
|
||||
function computeCropRect(
|
||||
imgW: number,
|
||||
imgH: number,
|
||||
xPercent: number,
|
||||
yPercent: number,
|
||||
gridSize: number,
|
||||
): { x: number; y: number; width: number; height: number } | null {
|
||||
if (!imgW || !imgH) return null;
|
||||
|
||||
const clampedX = Math.max(0, Math.min(100, xPercent));
|
||||
const clampedY = Math.max(0, Math.min(100, yPercent));
|
||||
|
||||
const centerX = Math.round((clampedX / 100.0) * imgW);
|
||||
const centerY = Math.round((clampedY / 100.0) * imgH);
|
||||
|
||||
const halfGrid = Math.floor(gridSize / 2);
|
||||
const cropX = Math.max(0, centerX - halfGrid);
|
||||
const cropY = Math.max(0, centerY - halfGrid);
|
||||
const cropW = Math.min(gridSize, imgW - cropX);
|
||||
const cropH = Math.min(gridSize, imgH - cropY);
|
||||
if (cropW <= 0 || cropH <= 0) return null;
|
||||
|
||||
return { x: cropX, y: cropY, width: cropW, height: cropH };
|
||||
}
|
||||
|
||||
/**
|
||||
* Compare the same patch location between two screenshots.
|
||||
*
|
||||
* @returns true when the raw pixel bytes are identical. false on any
|
||||
* difference, or on any internal error (the caller treats an error here as
|
||||
* `skipped`, so the false is harmless).
|
||||
*/
|
||||
export function comparePixelAtLocation(
|
||||
crop: CropRawPatchFn,
|
||||
lastScreenshot: ScreenshotResult,
|
||||
freshScreenshot: ScreenshotResult,
|
||||
xPercent: number,
|
||||
yPercent: number,
|
||||
gridSize: number = DEFAULT_GRID_SIZE,
|
||||
): boolean {
|
||||
// Both screenshots are of the same display — use the fresh one's
|
||||
// dimensions (less likely to be stale than last's).
|
||||
const rect = computeCropRect(
|
||||
freshScreenshot.width,
|
||||
freshScreenshot.height,
|
||||
xPercent,
|
||||
yPercent,
|
||||
gridSize,
|
||||
);
|
||||
if (!rect) return false;
|
||||
|
||||
const patch1 = crop(lastScreenshot.base64, rect);
|
||||
const patch2 = crop(freshScreenshot.base64, rect);
|
||||
if (!patch1 || !patch2) return false;
|
||||
|
||||
// Direct buffer equality. Note: nativeImage.toBitmap() gives BGRA, sharp's
|
||||
// .raw() gave RGB.
|
||||
// Doesn't matter — we're comparing two same-format buffers for equality.
|
||||
return patch1.equals(patch2);
|
||||
}
|
||||
|
||||
/**
|
||||
* Battle-tested click-target validation ported from the Vercept acquisition,
|
||||
* with the fresh-screenshot capture delegated to the caller (we don't have
|
||||
* a global `SystemActions.takeScreenshot()` — the executor is injected).
|
||||
*
|
||||
* Skip conditions (any of these → `{ valid: true, skipped: true }`):
|
||||
* - `lastScreenshot` is undefined (cold start).
|
||||
* - `takeFreshScreenshot()` throws or returns null.
|
||||
* - Injected crop function returns null (decode failure).
|
||||
* - Any other exception.
|
||||
*
|
||||
* The caller decides whether to invoke this at all (sub-gate check lives
|
||||
* in toolCalls.ts, not here).
|
||||
*/
|
||||
export async function validateClickTarget(
|
||||
crop: CropRawPatchFn,
|
||||
lastScreenshot: ScreenshotResult | undefined,
|
||||
xPercent: number,
|
||||
yPercent: number,
|
||||
takeFreshScreenshot: () => Promise<ScreenshotResult | null>,
|
||||
logger: Logger,
|
||||
gridSize: number = DEFAULT_GRID_SIZE,
|
||||
): Promise<PixelCompareResult> {
|
||||
if (!lastScreenshot) {
|
||||
return { valid: true, skipped: true };
|
||||
}
|
||||
|
||||
try {
|
||||
const fresh = await takeFreshScreenshot();
|
||||
if (!fresh) {
|
||||
return { valid: true, skipped: true };
|
||||
}
|
||||
|
||||
const pixelsMatch = comparePixelAtLocation(
|
||||
crop,
|
||||
lastScreenshot,
|
||||
fresh,
|
||||
xPercent,
|
||||
yPercent,
|
||||
gridSize,
|
||||
);
|
||||
|
||||
if (pixelsMatch) {
|
||||
return { valid: true, skipped: false };
|
||||
}
|
||||
return {
|
||||
valid: false,
|
||||
skipped: false,
|
||||
warning:
|
||||
"Screen content at the target location changed since the last screenshot. Take a new screenshot before clicking.",
|
||||
};
|
||||
} catch (err) {
|
||||
// Skip validation on technical errors, execute action anyway.
|
||||
// Battle-tested: validation failure must never block the click.
|
||||
logger.debug("[pixelCompare] validation error, skipping", err);
|
||||
return { valid: true, skipped: true };
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user