Merge branch 'pr/amDosion/92'

This commit is contained in:
claude-code-best
2026-04-04 00:12:15 +08:00
11 changed files with 318 additions and 136 deletions

View File

@@ -1,5 +1,35 @@
# DEV-LOG
## Enable Voice Mode / VOICE_MODE (2026-04-03)
恢复 `/voice` 语音输入功能。`src/` 下所有 voice 相关源码已与官方一致0 行差异),问题出在:① `VOICE_MODE` 编译开关未开,命令不显示;② `audio-capture-napi` 是 SoX 子进程 stubWindows 不支持),缺少官方原生 `.node` 二进制。
**新增文件:**
| 文件 | 说明 |
|------|------|
| `vendor/audio-capture/{platform}/audio-capture.node` | 6 个平台的原生音频二进制cpal来自参考项目 |
| `vendor/audio-capture-src/index.ts` | 原生模块加载器(按 `${arch}-${platform}` 动态 require `.node` |
**修改文件:**
| 文件 | 变更 |
|------|------|
| `packages/audio-capture-napi/src/index.ts` | SoX 子进程 stub → 原生 `.node` 加载器(含 `process.cwd()` workspace 路径 fallback |
| `scripts/dev.ts` | `DEFAULT_FEATURES``"VOICE_MODE"` |
| `build.ts` | `DEFAULT_BUILD_FEATURES``"VOICE_MODE"` |
| `docs/features/voice-mode.md` | 追加恢复计划章节(第八节) |
**验证结果:**
- `isNativeAudioAvailable()``true`Windows x64 原生 `.node` 加载成功)
- `feature('VOICE_MODE')``ENABLED`
- `bun run build` → voice 代码编入产物
**运行时前置条件:** claude.ai OAuth 登录 + 麦克风权限
---
## Enable Claude in Chrome MCP (2026-04-03)
恢复 Chrome 浏览器控制功能。`src/` 下所有 claudeInChrome 相关源码已与官方一致0 行差异),问题出在 `@ant/claude-for-chrome-mcp` 包是 6 行 stub返回空工具列表和 null server

View File

@@ -10,7 +10,7 @@ rmSync(outdir, { recursive: true, force: true });
// Default features that match the official CLI build.
// Additional features can be enabled via FEATURE_<NAME>=1 env vars.
const DEFAULT_BUILD_FEATURES = ["AGENT_TRIGGERS_REMOTE"];
const DEFAULT_BUILD_FEATURES = ["AGENT_TRIGGERS_REMOTE", "VOICE_MODE"];
// Collect FEATURE_* env vars → Bun.build features
const envFeatures = Object.keys(process.env)

View File

@@ -1,151 +1,152 @@
// audio-capture-napi: cross-platform audio capture using SoX (rec) on macOS
// and arecord (ALSA) on Linux. Replaces the original cpal-based native module.
import { type ChildProcess, spawn, spawnSync } from 'child_process'
// ─── State ───────────────────────────────────────────────────────────
let recordingProcess: ChildProcess | null = null
let availabilityCache: boolean | null = null
// ─── Helpers ─────────────────────────────────────────────────────────
function commandExists(cmd: string): boolean {
const result = spawnSync(cmd, ['--version'], {
stdio: 'ignore',
timeout: 3000,
})
return result.error === undefined
type AudioCaptureNapi = {
startRecording(
onData: (data: Buffer) => void,
onEnd: () => void,
): boolean
stopRecording(): void
isRecording(): boolean
startPlayback(sampleRate: number, channels: number): boolean
writePlaybackData(data: Buffer): void
stopPlayback(): void
isPlaying(): boolean
// TCC microphone authorization status (macOS only):
// 0 = notDetermined, 1 = restricted, 2 = denied, 3 = authorized.
// Linux: always returns 3 (authorized) — no system-level microphone permission API.
// Windows: returns 3 (authorized) if registry key absent or allowed,
// 2 (denied) if microphone access is explicitly denied.
microphoneAuthorizationStatus?(): number
}
// ─── Public API ──────────────────────────────────────────────────────
let cachedModule: AudioCaptureNapi | null = null
let loadAttempted = false
/**
* Check whether a supported audio recording command is available.
* Returns true if `rec` (SoX) is found on macOS, or `arecord` (ALSA) on Linux.
* Windows is not supported and always returns false.
*/
export function isNativeAudioAvailable(): boolean {
if (availabilityCache !== null) {
return availabilityCache
function loadModule(): AudioCaptureNapi | null {
if (loadAttempted) {
return cachedModule
}
loadAttempted = true
// Supported platforms: macOS (darwin), Linux, Windows (win32)
const platform = process.platform
if (platform !== 'darwin' && platform !== 'linux' && platform !== 'win32') {
return null
}
if (process.platform === 'win32') {
availabilityCache = false
return false
}
if (process.platform === 'darwin') {
// macOS: use SoX rec
availabilityCache = commandExists('rec')
return availabilityCache
}
if (process.platform === 'linux') {
// Linux: prefer arecord, fall back to rec
availabilityCache = commandExists('arecord') || commandExists('rec')
return availabilityCache
}
availabilityCache = false
return false
}
/**
* Check whether a recording is currently in progress.
*/
export function isNativeRecordingActive(): boolean {
return recordingProcess !== null && !recordingProcess.killed
}
/**
* Stop the active recording process, if any.
*/
export function stopNativeRecording(): void {
if (recordingProcess) {
const proc = recordingProcess
recordingProcess = null
if (!proc.killed) {
proc.kill('SIGTERM')
// Candidate 1: native-embed path (bun compile). AUDIO_CAPTURE_NODE_PATH is
// defined at build time in build-with-plugins.ts for native builds only — the
// define resolves it to the static literal "../../audio-capture.node" so bun
// compile can rewrite it to /$bunfs/root/audio-capture.node. MUST stay a
// direct require(env var) — bun cannot analyze require(variable) from a loop.
if (process.env.AUDIO_CAPTURE_NODE_PATH) {
try {
// eslint-disable-next-line @typescript-eslint/no-require-imports
cachedModule = require(
process.env.AUDIO_CAPTURE_NODE_PATH,
) as AudioCaptureNapi
return cachedModule
} catch {
// fall through to runtime fallbacks below
}
}
// Candidates 2-4: npm-install, dev/source, and workspace layouts.
// In bundled output, require() resolves relative to cli.js at the package root.
// In dev, it resolves relative to this file. When loaded from a workspace
// package (packages/audio-capture-napi/src/), we need an absolute path fallback.
const platformDir = `${process.arch}-${platform}`
const fallbacks = [
`./vendor/audio-capture/${platformDir}/audio-capture.node`,
`../audio-capture/${platformDir}/audio-capture.node`,
`${process.cwd()}/vendor/audio-capture/${platformDir}/audio-capture.node`,
]
for (const p of fallbacks) {
try {
// eslint-disable-next-line @typescript-eslint/no-require-imports
cachedModule = require(p) as AudioCaptureNapi
return cachedModule
} catch {
// try next
}
}
return null
}
export function isNativeAudioAvailable(): boolean {
return loadModule() !== null
}
/**
* Start recording audio. Raw PCM data (16kHz, 16-bit signed, mono) is
* streamed via the onData callback. onEnd is called when recording stops
* (either from silence detection or process termination).
*
* Returns true if recording started successfully, false otherwise.
*/
export function startNativeRecording(
onData: (data: Buffer) => void,
onEnd: () => void,
): boolean {
// Don't start if already recording
if (isNativeRecordingActive()) {
stopNativeRecording()
}
if (!isNativeAudioAvailable()) {
const mod = loadModule()
if (!mod) {
return false
}
let child: ChildProcess
if (process.platform === 'darwin' || (process.platform === 'linux' && commandExists('rec'))) {
// Use SoX rec: output raw PCM 16kHz 16-bit signed mono to stdout
child = spawn(
'rec',
[
'-q', // quiet
'--buffer',
'1024', // small buffer for low latency
'-t', 'raw', // raw PCM output
'-r', '16000', // 16kHz sample rate
'-e', 'signed', // signed integer encoding
'-b', '16', // 16-bit
'-c', '1', // mono
'-', // output to stdout
],
{ stdio: ['pipe', 'pipe', 'pipe'] },
)
} else if (process.platform === 'linux' && commandExists('arecord')) {
// Use arecord: output raw PCM 16kHz 16-bit signed LE mono to stdout
child = spawn(
'arecord',
[
'-f', 'S16_LE', // signed 16-bit little-endian
'-r', '16000', // 16kHz sample rate
'-c', '1', // mono
'-t', 'raw', // raw PCM, no header
'-q', // quiet
'-', // output to stdout
],
{ stdio: ['pipe', 'pipe', 'pipe'] },
)
} else {
return false
}
recordingProcess = child
child.stdout?.on('data', (chunk: Buffer) => {
onData(chunk)
})
// Consume stderr to prevent backpressure
child.stderr?.on('data', () => {})
child.on('close', () => {
recordingProcess = null
onEnd()
})
child.on('error', () => {
recordingProcess = null
onEnd()
})
return true
return mod.startRecording(onData, onEnd)
}
export function stopNativeRecording(): void {
const mod = loadModule()
if (!mod) {
return
}
mod.stopRecording()
}
export function isNativeRecordingActive(): boolean {
const mod = loadModule()
if (!mod) {
return false
}
return mod.isRecording()
}
export function startNativePlayback(
sampleRate: number,
channels: number,
): boolean {
const mod = loadModule()
if (!mod) {
return false
}
return mod.startPlayback(sampleRate, channels)
}
export function writeNativePlaybackData(data: Buffer): void {
const mod = loadModule()
if (!mod) {
return
}
mod.writePlaybackData(data)
}
export function stopNativePlayback(): void {
const mod = loadModule()
if (!mod) {
return
}
mod.stopPlayback()
}
export function isNativePlaying(): boolean {
const mod = loadModule()
if (!mod) {
return false
}
return mod.isPlaying()
}
// Returns the microphone authorization status.
// On macOS, returns the TCC status: 0=notDetermined, 1=restricted, 2=denied, 3=authorized.
// On Linux, always returns 3 (authorized) — no system-level mic permission API.
// On Windows, returns 3 (authorized) if registry key absent or allowed, 2 (denied) if explicitly denied.
// Returns 0 (notDetermined) if the native module is unavailable.
export function microphoneAuthorizationStatus(): number {
const mod = loadModule()
if (!mod || !mod.microphoneAuthorizationStatus) {
return 0
}
return mod.microphoneAuthorizationStatus()
}

View File

@@ -15,7 +15,7 @@ const defineArgs = Object.entries(defines).flatMap(([k, v]) => [
// Bun --feature flags: enable feature() gates at runtime.
// Default features enabled in dev mode.
const DEFAULT_FEATURES = ["BUDDY", "TRANSCRIPT_CLASSIFIER", "BRIDGE_MODE", "AGENT_TRIGGERS_REMOTE"];
const DEFAULT_FEATURES = ["BUDDY", "TRANSCRIPT_CLASSIFIER", "BRIDGE_MODE", "AGENT_TRIGGERS_REMOTE", "VOICE_MODE"];
// Any env var matching FEATURE_<NAME>=1 will also enable that feature.
// e.g. FEATURE_PROACTIVE=1 bun run dev

151
vendor/audio-capture-src/index.ts vendored Normal file
View File

@@ -0,0 +1,151 @@
type AudioCaptureNapi = {
startRecording(
onData: (data: Buffer) => void,
onEnd: () => void,
): boolean
stopRecording(): void
isRecording(): boolean
startPlayback(sampleRate: number, channels: number): boolean
writePlaybackData(data: Buffer): void
stopPlayback(): void
isPlaying(): boolean
// TCC microphone authorization status (macOS only):
// 0 = notDetermined, 1 = restricted, 2 = denied, 3 = authorized.
// Linux: always returns 3 (authorized) — no system-level microphone permission API.
// Windows: returns 3 (authorized) if registry key absent or allowed,
// 2 (denied) if microphone access is explicitly denied.
microphoneAuthorizationStatus?(): number
}
let cachedModule: AudioCaptureNapi | null = null
let loadAttempted = false
function loadModule(): AudioCaptureNapi | null {
if (loadAttempted) {
return cachedModule
}
loadAttempted = true
// Supported platforms: macOS (darwin), Linux, Windows (win32)
const platform = process.platform
if (platform !== 'darwin' && platform !== 'linux' && platform !== 'win32') {
return null
}
// Candidate 1: native-embed path (bun compile). AUDIO_CAPTURE_NODE_PATH is
// defined at build time in build-with-plugins.ts for native builds only — the
// define resolves it to the static literal "../../audio-capture.node" so bun
// compile can rewrite it to /$bunfs/root/audio-capture.node. MUST stay a
// direct require(env var) — bun cannot analyze require(variable) from a loop.
if (process.env.AUDIO_CAPTURE_NODE_PATH) {
try {
// eslint-disable-next-line @typescript-eslint/no-require-imports
cachedModule = require(
process.env.AUDIO_CAPTURE_NODE_PATH,
) as AudioCaptureNapi
return cachedModule
} catch {
// fall through to runtime fallbacks below
}
}
// Candidates 2/3: npm-install and dev/source layouts. Dynamic require is
// fine here — in bundled output (node --target build) require() resolves at
// runtime relative to cli.js at the package root; in dev it resolves
// relative to this file (vendor/audio-capture-src/index.ts).
const platformDir = `${process.arch}-${platform}`
const fallbacks = [
`./vendor/audio-capture/${platformDir}/audio-capture.node`,
`../audio-capture/${platformDir}/audio-capture.node`,
]
for (const p of fallbacks) {
try {
// eslint-disable-next-line @typescript-eslint/no-require-imports
cachedModule = require(p) as AudioCaptureNapi
return cachedModule
} catch {
// try next
}
}
return null
}
export function isNativeAudioAvailable(): boolean {
return loadModule() !== null
}
export function startNativeRecording(
onData: (data: Buffer) => void,
onEnd: () => void,
): boolean {
const mod = loadModule()
if (!mod) {
return false
}
return mod.startRecording(onData, onEnd)
}
export function stopNativeRecording(): void {
const mod = loadModule()
if (!mod) {
return
}
mod.stopRecording()
}
export function isNativeRecordingActive(): boolean {
const mod = loadModule()
if (!mod) {
return false
}
return mod.isRecording()
}
export function startNativePlayback(
sampleRate: number,
channels: number,
): boolean {
const mod = loadModule()
if (!mod) {
return false
}
return mod.startPlayback(sampleRate, channels)
}
export function writeNativePlaybackData(data: Buffer): void {
const mod = loadModule()
if (!mod) {
return
}
mod.writePlaybackData(data)
}
export function stopNativePlayback(): void {
const mod = loadModule()
if (!mod) {
return
}
mod.stopPlayback()
}
export function isNativePlaying(): boolean {
const mod = loadModule()
if (!mod) {
return false
}
return mod.isPlaying()
}
// Returns the microphone authorization status.
// On macOS, returns the TCC status: 0=notDetermined, 1=restricted, 2=denied, 3=authorized.
// On Linux, always returns 3 (authorized) — no system-level mic permission API.
// On Windows, returns 3 (authorized) if registry key absent or allowed, 2 (denied) if explicitly denied.
// Returns 0 (notDetermined) if the native module is unavailable.
export function microphoneAuthorizationStatus(): number {
const mod = loadModule()
if (!mod || !mod.microphoneAuthorizationStatus) {
return 0
}
return mod.microphoneAuthorizationStatus()
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.