import { feature } from 'bun:bundle' import * as React from 'react' import { useCallback, useEffect, useMemo, useRef } from 'react' import { useNotifications } from '../context/notifications.js' import { useIsModalOverlayActive } from '../context/overlayContext.js' import { useGetVoiceState, useSetVoiceState, useVoiceState, } from '../context/voice.js' import { KeyboardEvent, useInput } from '@anthropic/ink' // backward-compat bridge until REPL wires handleKeyDown to import { useOptionalKeybindingContext } from '../keybindings/KeybindingContext.js' import { keystrokesEqual } from '../keybindings/resolver.js' import type { ParsedKeystroke } from '../keybindings/types.js' import { normalizeFullWidthSpace } from '../utils/stringUtils.js' import { useVoiceEnabled } from './useVoiceEnabled.js' // Dead code elimination: conditional import for voice input hook. /* eslint-disable @typescript-eslint/no-require-imports */ // Capture the module namespace, not the function: spyOn() mutates the module // object, so `voiceNs.useVoice(...)` resolves to the spy even if this module // was loaded before the spy was installed (test ordering independence). const voiceNs: { useVoice: typeof import('./useVoice.js').useVoice } = feature( 'VOICE_MODE', ) ? require('./useVoice.js') : { useVoice: ({ enabled: _e, }: { onTranscript: (t: string) => void enabled: boolean }) => ({ state: 'idle' as const, handleKeyEvent: (_fallbackMs?: number) => {}, }), } /* eslint-enable @typescript-eslint/no-require-imports */ // Maximum gap (ms) between key presses to count as held (auto-repeat). // Terminal auto-repeat fires every 30-80ms; 120ms covers jitter while // excluding normal typing speed (100-300ms between keystrokes). const RAPID_KEY_GAP_MS = 120 // Fallback (ms) for modifier-combo first-press activation. Must match // FIRST_PRESS_FALLBACK_MS in useVoice.ts. Covers the max OS initial // key-repeat delay (~2s on macOS with slider at "Long") so holding a // modifier combo doesn't fragment into two sessions when the first // auto-repeat arrives after the default 600ms REPEAT_FALLBACK_MS. const MODIFIER_FIRST_PRESS_FALLBACK_MS = 2000 // Number of rapid consecutive key events required to activate voice. // Only applies to bare-char bindings (space, v, etc.) where a single press // could be normal typing. Modifier combos activate on the first press. const HOLD_THRESHOLD = 5 // Number of rapid key events to start showing warmup feedback. const WARMUP_THRESHOLD = 2 // Match a KeyboardEvent against a ParsedKeystroke. Replaces the legacy // matchesKeystroke(input, Key, ...) path which assumed useInput's raw // `input` arg — KeyboardEvent.key holds normalized names (e.g. 'space', // 'f9') that getKeyName() didn't handle, so modifier combos and f-keys // silently failed to match after the onKeyDown migration (#23524). function matchesKeyboardEvent( e: KeyboardEvent, target: ParsedKeystroke, ): boolean { // KeyboardEvent stores key names; ParsedKeystroke stores ' ' for space // and 'enter' for return (see parser.ts case 'space'/'return'). const key = e.key === 'space' ? ' ' : e.key === 'return' ? 'enter' : e.key.toLowerCase() if (key !== target.key) return false if (e.ctrl !== target.ctrl) return false if (e.shift !== target.shift) return false // KeyboardEvent.meta folds alt|option (terminal limitation — esc-prefix); // ParsedKeystroke has both alt and meta as aliases for the same thing. if (e.meta !== (target.alt || target.meta)) return false if (e.superKey !== target.super) return false return true } // Hardcoded default for when there's no KeybindingProvider at all (e.g. // headless/test contexts). NOT used when the provider exists and the // lookup returns null — that means the user null-unbound or reassigned // space, and falling back to space would pick a dead or conflicting key. const DEFAULT_VOICE_KEYSTROKE: ParsedKeystroke = { key: ' ', ctrl: false, alt: false, shift: false, meta: false, super: false, } type InsertTextHandle = { insert: (text: string) => void setInputWithCursor: (value: string, cursor: number) => void cursorOffset: number } type UseVoiceIntegrationArgs = { setInputValueRaw: React.Dispatch> inputValueRef: React.RefObject insertTextRef: React.RefObject } type InterimRange = { start: number; end: number } type StripOpts = { // Which char to strip (the configured hold key). Defaults to space. char?: string // Capture the voice prefix/suffix anchor at the stripped position. anchor?: boolean // Minimum trailing count to leave behind — prevents stripping the // intentional warmup chars when defensively cleaning up leaks. floor?: number } type UseVoiceIntegrationResult = { // Returns the number of trailing chars remaining after stripping. stripTrailing: (maxStrip: number, opts?: StripOpts) => number // Undo the gap space and reset anchor refs after a failed voice activation. resetAnchor: () => void handleKeyEvent: (fallbackMs?: number) => void interimRange: InterimRange | null } export function useVoiceIntegration({ setInputValueRaw, inputValueRef, insertTextRef, }: UseVoiceIntegrationArgs): UseVoiceIntegrationResult { const { addNotification } = useNotifications() // Tracks the input content before/after the cursor when voice starts, // so interim transcripts can be inserted at the cursor position without // clobbering surrounding user text. const voicePrefixRef = useRef(null) const voiceSuffixRef = useRef('') // Tracks the last input value this hook wrote (via anchor, interim effect, // or handleVoiceTranscript). If inputValueRef.current diverges, the user // submitted or edited — both write paths bail to avoid clobbering. This is // the only guard that correctly handles empty-prefix-empty-suffix: a // startsWith('')/endsWith('') check vacuously passes, and a length check // can't distinguish a cleared input from a never-set one. const lastSetInputRef = useRef(null) // Strip trailing hold-key chars (and optionally capture the voice // anchor). Called during warmup (to clean up chars that leaked past // stopImmediatePropagation — listener order is not guaranteed) and // on activation (with anchor=true to capture the prefix/suffix around // the cursor for interim transcript placement). The caller passes the // exact count it expects to strip so pre-existing chars at the // boundary are preserved (e.g. the "v" in "hav" when hold-key is "v"). // The floor option sets a minimum trailing count to leave behind // (during warmup this is the count we intentionally let through, so // defensive cleanup only removes leaks). Returns the number of // trailing chars remaining after stripping. When nothing changes, no // state update is performed. const stripTrailing = useCallback( ( maxStrip: number, { char = ' ', anchor = false, floor = 0 }: StripOpts = {}, ) => { const prev = inputValueRef.current const offset = insertTextRef.current?.cursorOffset ?? prev.length const beforeCursor = prev.slice(0, offset) const afterCursor = prev.slice(offset) // When the hold key is space, also count full-width spaces (U+3000) // that a CJK IME may have inserted for the same physical key. // U+3000 is BMP single-code-unit so indices align with beforeCursor. const scan = char === ' ' ? normalizeFullWidthSpace(beforeCursor) : beforeCursor let trailing = 0 while ( trailing < scan.length && scan[scan.length - 1 - trailing] === char ) { trailing++ } const stripCount = Math.max(0, Math.min(trailing - floor, maxStrip)) const remaining = trailing - stripCount const stripped = beforeCursor.slice(0, beforeCursor.length - stripCount) // When anchoring with a non-space suffix, insert a gap space so the // waveform cursor sits on the gap instead of covering the first // suffix letter. The interim transcript effect maintains this same // structure (prefix + leading + interim + trailing + suffix), so // the gap is seamless once transcript text arrives. // Always overwrite on anchor — if a prior activation failed to start // voice (voiceState stayed 'idle'), the cleanup effect didn't fire and // the old anchor is stale. anchor=true is only passed on the single // activation call, never during recording, so overwrite is safe. let gap = '' if (anchor) { voicePrefixRef.current = stripped voiceSuffixRef.current = afterCursor if (afterCursor.length > 0 && !/^\s/.test(afterCursor)) { gap = ' ' } } const newValue = stripped + gap + afterCursor if (anchor) lastSetInputRef.current = newValue if (newValue === prev && stripCount === 0) return remaining if (insertTextRef.current) { insertTextRef.current.setInputWithCursor(newValue, stripped.length) } else { setInputValueRaw(newValue) } return remaining }, [setInputValueRaw, inputValueRef, insertTextRef], ) // Undo the gap space inserted by stripTrailing(..., {anchor:true}) and // reset the voice prefix/suffix refs. Called when voice activation fails // (voiceState stays 'idle' after voiceHandleKeyEvent), so the cleanup // effect (voiceState useEffect below) — which only fires on voiceState transitions — can't // reach the stale anchor. Without this, the gap space and stale refs // persist in the input. const resetAnchor = useCallback(() => { const prefix = voicePrefixRef.current if (prefix === null) return const suffix = voiceSuffixRef.current voicePrefixRef.current = null voiceSuffixRef.current = '' const restored = prefix + suffix if (insertTextRef.current) { insertTextRef.current.setInputWithCursor(restored, prefix.length) } else { setInputValueRaw(restored) } }, [setInputValueRaw, insertTextRef]) // Voice state selectors. useVoiceEnabled = user intent (settings) + // auth + GB kill-switch, with the auth half memoized on authVersion so // render loops never hit a cold keychain spawn. const voiceEnabled = feature('VOICE_MODE') ? useVoiceEnabled() : false const voiceState = feature('VOICE_MODE') ? useVoiceState(s => s.voiceState) : ('idle' as const) const voiceInterimTranscript = feature('VOICE_MODE') ? useVoiceState(s => s.voiceInterimTranscript) : '' // Set the voice anchor for focus mode (where recording starts via terminal // focus, not key hold). Key-hold sets the anchor in stripTrailing. useEffect(() => { if (!feature('VOICE_MODE')) return if (voiceState === 'recording' && voicePrefixRef.current === null) { const input = inputValueRef.current const offset = insertTextRef.current?.cursorOffset ?? input.length voicePrefixRef.current = input.slice(0, offset) voiceSuffixRef.current = input.slice(offset) lastSetInputRef.current = input } if (voiceState === 'idle') { voicePrefixRef.current = null voiceSuffixRef.current = '' lastSetInputRef.current = null } }, [voiceState, inputValueRef, insertTextRef]) // Live-update the prompt input with the interim transcript as voice // transcribes speech. The prefix (user-typed text before the cursor) is // preserved and the transcript is inserted between prefix and suffix. useEffect(() => { if (!feature('VOICE_MODE')) return if (voicePrefixRef.current === null) return const prefix = voicePrefixRef.current const suffix = voiceSuffixRef.current // Submit race: if the input isn't what this hook last set it to, the // user submitted (clearing it) or edited it. voicePrefixRef is only // cleared on voiceState→idle, so it's still set during the 'processing' // window between CloseStream and WS close — this catches refined // TranscriptText arriving then and re-filling a cleared input. if (inputValueRef.current !== lastSetInputRef.current) return const needsSpace = prefix.length > 0 && !/\s$/.test(prefix) && voiceInterimTranscript.length > 0 // Don't gate on voiceInterimTranscript.length -- when interim clears to '' // after handleVoiceTranscript sets the final text, the trailing space // between prefix and suffix must still be preserved. const needsTrailingSpace = suffix.length > 0 && !/^\s/.test(suffix) const leadingSpace = needsSpace ? ' ' : '' const trailingSpace = needsTrailingSpace ? ' ' : '' const newValue = prefix + leadingSpace + voiceInterimTranscript + trailingSpace + suffix // Position cursor after the transcribed text (before suffix) const cursorPos = prefix.length + leadingSpace.length + voiceInterimTranscript.length if (insertTextRef.current) { insertTextRef.current.setInputWithCursor(newValue, cursorPos) } else { setInputValueRaw(newValue) } lastSetInputRef.current = newValue }, [voiceInterimTranscript, setInputValueRaw, inputValueRef, insertTextRef]) const handleVoiceTranscript = useCallback( (text: string) => { if (!feature('VOICE_MODE')) return const prefix = voicePrefixRef.current // No voice anchor — voice was reset (or never started). Nothing to do. if (prefix === null) return const suffix = voiceSuffixRef.current // Submit race: finishRecording() → user presses Enter (input cleared) // → WebSocket close → this callback fires with stale prefix/suffix. // If the input isn't what this hook last set (via the interim effect // or anchor), the user submitted or edited — don't re-fill. Comparing // against `text.length` would false-positive when the final is longer // than the interim (ASR routinely adds punctuation/corrections). if (inputValueRef.current !== lastSetInputRef.current) return const needsSpace = prefix.length > 0 && !/\s$/.test(prefix) && text.length > 0 const needsTrailingSpace = suffix.length > 0 && !/^\s/.test(suffix) && text.length > 0 const leadingSpace = needsSpace ? ' ' : '' const trailingSpace = needsTrailingSpace ? ' ' : '' const newInput = prefix + leadingSpace + text + trailingSpace + suffix // Position cursor after the transcribed text (before suffix) const cursorPos = prefix.length + leadingSpace.length + text.length if (insertTextRef.current) { insertTextRef.current.setInputWithCursor(newInput, cursorPos) } else { setInputValueRaw(newInput) } lastSetInputRef.current = newInput // Update the prefix to include this chunk so focus mode can continue // appending subsequent transcripts after it. voicePrefixRef.current = prefix + leadingSpace + text }, [setInputValueRaw, inputValueRef, insertTextRef], ) const voice = voiceNs.useVoice({ onTranscript: handleVoiceTranscript, onError: (message: string) => { addNotification({ key: 'voice-error', text: message, color: 'error', priority: 'immediate', timeoutMs: 10_000, }) }, enabled: voiceEnabled, focusMode: false, }) // Compute the character range of interim (not-yet-finalized) transcript // text in the input value, so the UI can dim it. const interimRange = useMemo((): InterimRange | null => { if (!feature('VOICE_MODE')) return null if (voicePrefixRef.current === null) return null if (voiceInterimTranscript.length === 0) return null const prefix = voicePrefixRef.current const needsSpace = prefix.length > 0 && !/\s$/.test(prefix) && voiceInterimTranscript.length > 0 const start = prefix.length + (needsSpace ? 1 : 0) const end = start + voiceInterimTranscript.length return { start, end } }, [voiceInterimTranscript]) return { stripTrailing, resetAnchor, handleKeyEvent: voice.handleKeyEvent, interimRange, } } /** * Component that handles hold-to-talk voice activation. * * The activation key is configurable via keybindings (voice:pushToTalk, * default: space). Hold detection depends on OS auto-repeat delivering a * stream of events at 30-80ms intervals. Two binding types work: * * **Modifier + letter (meta+k, ctrl+x, alt+v):** Cleanest. Activates on * the first press — a modifier combo is unambiguous intent (can't be * typed accidentally), so no hold threshold applies. The letter part * auto-repeats while held, feeding release detection in useVoice.ts. * No flow-through, no stripping. * * **Bare chars (space, v, x):** Require HOLD_THRESHOLD rapid presses to * activate (a single space could be normal typing). The first * WARMUP_THRESHOLD presses flow into the input so a single press types * normally. Past that, rapid presses are swallowed; on activation the * flow-through chars are stripped. Binding "v" doesn't make "v" * untypable — normal typing (>120ms between keystrokes) flows through; * only rapid auto-repeat from a held key triggers activation. * * Known broken: modifier+space (NUL → parsed as ctrl+backtick), chords * (discrete sequences, no hold). Validation warns on these. */ export function useVoiceKeybindingHandler({ voiceHandleKeyEvent, stripTrailing, resetAnchor, isActive, }: { voiceHandleKeyEvent: (fallbackMs?: number) => void stripTrailing: (maxStrip: number, opts?: StripOpts) => number resetAnchor: () => void isActive: boolean }): { handleKeyDown: (e: KeyboardEvent) => void } { const getVoiceState = useGetVoiceState() const setVoiceState = useSetVoiceState() const keybindingContext = useOptionalKeybindingContext() const isModalOverlayActive = useIsModalOverlayActive() const voiceEnabled = feature('VOICE_MODE') ? useVoiceEnabled() : false const voiceState = feature('VOICE_MODE') ? useVoiceState(s => s.voiceState) : 'idle' // Find the configured key for voice:pushToTalk from keybinding context. // Forward iteration with last-wins (matching the resolver): if a later // Chat binding overrides the same chord with null or a different // action, the voice binding is discarded and null is returned — the // user explicitly disabled hold-to-talk via binding override, so // don't second-guess them with a fallback. The DEFAULT is only used // when there's no provider at all. Context filter is required — space // is also bound in Settings/Confirmation/Plugin (select:accept etc.); // without the filter those would null out the default. const voiceKeystroke = useMemo((): ParsedKeystroke | null => { if (!keybindingContext) return DEFAULT_VOICE_KEYSTROKE let result: ParsedKeystroke | null = null for (const binding of keybindingContext.bindings) { if (binding.context !== 'Chat') continue if (binding.chord.length !== 1) continue const ks = binding.chord[0] if (!ks) continue if (binding.action === 'voice:pushToTalk') { result = ks } else if (result !== null && keystrokesEqual(ks, result)) { // A later binding overrides this chord (null unbind or reassignment) result = null } } return result }, [keybindingContext]) // If the binding is a bare (unmodified) single printable char, terminal // auto-repeat may batch N keystrokes into one input event (e.g. "vvv"), // and the char flows into the text input — we need flow-through + strip. // Modifier combos (meta+k, ctrl+x) also auto-repeat (the letter part // repeats) but don't insert text, so they're swallowed from the first // press with no stripping needed. matchesKeyboardEvent handles those. const bareChar = voiceKeystroke !== null && voiceKeystroke.key.length === 1 && !voiceKeystroke.ctrl && !voiceKeystroke.alt && !voiceKeystroke.shift && !voiceKeystroke.meta && !voiceKeystroke.super ? voiceKeystroke.key : null const rapidCountRef = useRef(0) // How many rapid chars we intentionally let through to the text // input (the first WARMUP_THRESHOLD). The activation strip removes // up to this many + the activation event's potential leak. For the // default (space) this is precise — pre-existing trailing spaces are // rare. For letter bindings (validation warns) this may over-strip // one pre-existing char if the input already ended in the bound // letter (e.g. "hav" + hold "v" → "ha"). We don't track that // boundary — it's best-effort and the warning says so. const charsInInputRef = useRef(0) // Trailing-char count remaining after the activation strip — these // belong to the user's anchored prefix and must be preserved during // recording's defensive leak cleanup. const recordingFloorRef = useRef(0) // True when the current recording was started by key-hold (not focus). // Used to avoid swallowing keypresses during focus-mode recording. const isHoldActiveRef = useRef(false) const resetTimerRef = useRef | null>(null) // Reset hold state as soon as we leave 'recording'. The physical hold // ends when key-repeat stops (state → 'processing'); keeping the ref // set through 'processing' swallows new space presses the user types // while the transcript finalizes. useEffect(() => { if (voiceState !== 'recording') { isHoldActiveRef.current = false rapidCountRef.current = 0 charsInInputRef.current = 0 recordingFloorRef.current = 0 setVoiceState(prev => { if (!prev.voiceWarmingUp) return prev return { ...prev, voiceWarmingUp: false } }) } }, [voiceState, setVoiceState]) const handleKeyDown = (e: KeyboardEvent): void => { if (!voiceEnabled) return // PromptInput is not a valid transcript target — let the hold key // flow through instead of swallowing it into stale refs (#33556). // Two distinct unmount/unfocus paths (both needed): // - !isActive: local-jsx command hid PromptInput (shouldHidePromptInput) // without registering an overlay — e.g. /install-github-app, // /plugin. Mirrors CommandKeybindingHandlers' isActive gate. // - isModalOverlayActive: overlay (permission dialog, Select with // onCancel) has focus; PromptInput is mounted but focus=false. if (!isActive || isModalOverlayActive) return // null means the user overrode the default (null-unbind/reassign) — // hold-to-talk is disabled via binding. To toggle the feature // itself, use /voice. if (voiceKeystroke === null) return // Match the configured key. Bare chars match by content (handles // batched auto-repeat like "vvv") with a modifier reject so e.g. // ctrl+v doesn't trip a "v" binding. Modifier combos go through // matchesKeyboardEvent (one event per repeat, no batching). let repeatCount: number if (bareChar !== null) { if (e.ctrl || e.meta || e.shift) return // When bound to space, also accept U+3000 (full-width space) — // CJK IMEs emit it for the same physical key. const normalized = bareChar === ' ' ? normalizeFullWidthSpace(e.key) : e.key // Fast-path: normal typing (any char that isn't the bound one) // bails here without allocating. The repeat() check only matters // for batched auto-repeat (input.length > 1) which is rare. if (normalized[0] !== bareChar) return if ( normalized.length > 1 && normalized !== bareChar.repeat(normalized.length) ) return repeatCount = normalized.length } else { if (!matchesKeyboardEvent(e, voiceKeystroke)) return repeatCount = 1 } // Guard: only swallow keypresses when recording was triggered by // key-hold. Focus-mode recording also sets voiceState to 'recording', // but keypresses should flow through normally (voiceHandleKeyEvent // returns early for focus-triggered sessions). We also check voiceState // from the store so that if voiceHandleKeyEvent() fails to transition // state (module not loaded, stream unavailable) we don't permanently // swallow keypresses. const currentVoiceState = getVoiceState().voiceState if (isHoldActiveRef.current && currentVoiceState !== 'idle') { // Already recording — swallow continued keypresses and forward // to voice for release detection. For bare chars, defensively // strip in case the text input handler fired before this one // (listener order is not guaranteed). Modifier combos don't // insert text, so nothing to strip. e.stopImmediatePropagation() if (bareChar !== null) { stripTrailing(repeatCount, { char: bareChar, floor: recordingFloorRef.current, }) } voiceHandleKeyEvent() return } // Non-hold recording (focus-mode) or processing is active. // Modifier combos must not re-activate: stripTrailing(0,{anchor:true}) // would overwrite voicePrefixRef with interim text and duplicate the // transcript on the next interim update. Pre-#22144, a single tap // hit the warmup else-branch (swallow only). Bare chars flow through // unconditionally — user may be typing during focus-recording. if (currentVoiceState !== 'idle') { if (bareChar === null) e.stopImmediatePropagation() return } const countBefore = rapidCountRef.current rapidCountRef.current += repeatCount // ── Activation ──────────────────────────────────────────── // Handled first so the warmup branch below does NOT also run // on this event — two strip calls in the same tick would both // read the stale inputValueRef and the second would under-strip. // Modifier combos activate on the first press — they can't be // typed accidentally, so the hold threshold (which exists to // distinguish typing a space from holding space) doesn't apply. if (bareChar === null || rapidCountRef.current >= HOLD_THRESHOLD) { e.stopImmediatePropagation() if (resetTimerRef.current) { clearTimeout(resetTimerRef.current) resetTimerRef.current = null } rapidCountRef.current = 0 isHoldActiveRef.current = true setVoiceState(prev => { if (!prev.voiceWarmingUp) return prev return { ...prev, voiceWarmingUp: false } }) if (bareChar !== null) { // Strip the intentional warmup chars plus this event's leak // (if text input fired first). Cap covers both; min(trailing) // handles the no-leak case. Anchor the voice prefix here. // The return value (remaining) becomes the floor for // recording-time leak cleanup. recordingFloorRef.current = stripTrailing( charsInInputRef.current + repeatCount, { char: bareChar, anchor: true }, ) charsInInputRef.current = 0 voiceHandleKeyEvent() } else { // Modifier combo: nothing inserted, nothing to strip. Just // anchor the voice prefix at the current cursor position. // Longer fallback: this call is at t=0 (before auto-repeat), // so the gap to the next keypress is the OS initial repeat // *delay* (up to ~2s), not the repeat *rate* (~30-80ms). stripTrailing(0, { anchor: true }) voiceHandleKeyEvent(MODIFIER_FIRST_PRESS_FALLBACK_MS) } // If voice failed to transition (module not loaded, stream // unavailable, stale enabled), clear the ref so a later // focus-mode recording doesn't inherit stale hold state // and swallow keypresses. Store is synchronous — the check is // immediate. The anchor set by stripTrailing above will // be overwritten on retry (anchor always overwrites now). if (getVoiceState().voiceState === 'idle') { isHoldActiveRef.current = false resetAnchor() } return } // ── Warmup (bare-char only; modifier combos activated above) ── // First WARMUP_THRESHOLD chars flow to the text input so normal // typing has zero latency (a single press types normally). // Subsequent rapid chars are swallowed so the input stays aligned // with the warmup UI. Strip defensively (listener order is not // guaranteed — text input may have already added the char). The // floor preserves the intentional warmup chars; the strip is a // no-op when nothing leaked. Check countBefore so the event that // crosses the threshold still flows through (terminal batching). if (countBefore >= WARMUP_THRESHOLD) { e.stopImmediatePropagation() stripTrailing(repeatCount, { char: bareChar, floor: charsInInputRef.current, }) } else { charsInInputRef.current += repeatCount } // Show warmup feedback once we detect a hold pattern if (rapidCountRef.current >= WARMUP_THRESHOLD) { setVoiceState(prev => { if (prev.voiceWarmingUp) return prev return { ...prev, voiceWarmingUp: true } }) } if (resetTimerRef.current) { clearTimeout(resetTimerRef.current) } resetTimerRef.current = setTimeout( (resetTimerRef, rapidCountRef, charsInInputRef, setVoiceState) => { resetTimerRef.current = null rapidCountRef.current = 0 charsInInputRef.current = 0 setVoiceState(prev => { if (!prev.voiceWarmingUp) return prev return { ...prev, voiceWarmingUp: false } }) }, RAPID_KEY_GAP_MS, resetTimerRef, rapidCountRef, charsInInputRef, setVoiceState, ) } // Backward-compat bridge: REPL.tsx doesn't yet wire handleKeyDown to // . Subscribe via useInput and adapt InputEvent → // KeyboardEvent until the consumer is migrated (separate PR). // TODO(onKeyDown-migration): remove once REPL passes handleKeyDown. useInput( (_input, _key, event) => { const kbEvent = new KeyboardEvent(event.keypress) handleKeyDown(kbEvent) // handleKeyDown stopped the adapter event, not the InputEvent the // emitter actually checks — forward it so the text input's useInput // listener is skipped and held spaces don't leak into the prompt. if (kbEvent.didStopImmediatePropagation()) { event.stopImmediatePropagation() } }, { isActive }, ) return { handleKeyDown } } // TODO(onKeyDown-migration): temporary shim so existing JSX callers // () keep compiling. Remove once REPL.tsx // wires handleKeyDown directly. export function VoiceKeybindingHandler(props: { voiceHandleKeyEvent: (fallbackMs?: number) => void stripTrailing: (maxStrip: number, opts?: StripOpts) => number resetAnchor: () => void isActive: boolean }): null { useVoiceKeybindingHandler(props) return null }