mirror of
https://github.com/claude-code-best/claude-code.git
synced 2026-06-15 12:55:51 +00:00
feat: 集成豆包 ASR 语音识别后端,支持 /voice doubao 切换 (#357)
* feat: 集成豆包 ASR 语音识别后端,支持 /voice doubao 切换 - 新增 src/services/doubaoSTT.ts 适配模块,将 doubaoime-asr 的 AsyncGenerator 协议适配为现有 VoiceStreamConnection 接口 - /voice doubao 启用豆包后端,/voice 使用默认 Anthropic 后端 - 后端选择持久化到 settings.json 的 voiceProvider 字段 - 豆包后端跳过 Anthropic OAuth 认证、语言限制和 Focus Mode - 豆包后端松手即出结果,跳过 processing 状态 - 凭证文件存放在 ~/.claude/tts/doubao/credentials.json - doubaoime-asr 作为 optionalDependencies 安装 - 移除 /voice 命令的 claude-ai 可用性限制,所有用户可用 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> * docs: 更新 Voice Mode 文档,添加豆包 ASR 后端说明和致谢 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -1,17 +1,15 @@
|
||||
import type { Command } from '../../commands.js'
|
||||
import {
|
||||
isVoiceGrowthBookEnabled,
|
||||
isVoiceModeEnabled,
|
||||
isVoiceAvailable,
|
||||
} from '../../voice/voiceModeEnabled.js'
|
||||
|
||||
const voice = {
|
||||
type: 'local',
|
||||
name: 'voice',
|
||||
description: 'Toggle voice mode',
|
||||
availability: ['claude-ai'],
|
||||
isEnabled: () => isVoiceGrowthBookEnabled(),
|
||||
description: 'Toggle voice mode. Use /voice doubao for Doubao ASR backend',
|
||||
isEnabled: () => isVoiceAvailable(),
|
||||
get isHidden() {
|
||||
return !isVoiceModeEnabled()
|
||||
return !isVoiceAvailable()
|
||||
},
|
||||
supportsNonInteractive: false,
|
||||
load: () => import('./voice.js'),
|
||||
|
||||
@@ -2,29 +2,19 @@ import { normalizeLanguageForSTT } from '../../hooks/useVoice.js'
|
||||
import { getShortcutDisplay } from '../../keybindings/shortcutFormat.js'
|
||||
import { logEvent } from '../../services/analytics/index.js'
|
||||
import type { LocalCommandCall } from '../../types/command.js'
|
||||
import { isAnthropicAuthEnabled } from '../../utils/auth.js'
|
||||
import { getGlobalConfig, saveGlobalConfig } from '../../utils/config.js'
|
||||
import { settingsChangeDetector } from '../../utils/settings/changeDetector.js'
|
||||
import {
|
||||
getInitialSettings,
|
||||
updateSettingsForSource,
|
||||
} from '../../utils/settings/settings.js'
|
||||
import { isVoiceModeEnabled } from '../../voice/voiceModeEnabled.js'
|
||||
import { isVoiceAvailable } from '../../voice/voiceModeEnabled.js'
|
||||
|
||||
const LANG_HINT_MAX_SHOWS = 2
|
||||
|
||||
export const call: LocalCommandCall = async () => {
|
||||
// Check auth and kill-switch before allowing voice mode
|
||||
if (!isVoiceModeEnabled()) {
|
||||
// Differentiate: OAuth-less users get an auth hint, everyone else
|
||||
// gets nothing (command shouldn't be reachable when the kill-switch is on).
|
||||
if (!isAnthropicAuthEnabled()) {
|
||||
return {
|
||||
type: 'text' as const,
|
||||
value:
|
||||
'Voice mode requires a Claude.ai account. Please run /login to sign in.',
|
||||
}
|
||||
}
|
||||
export const call: LocalCommandCall = async (args) => {
|
||||
// Check kill-switch before allowing voice mode
|
||||
if (!isVoiceAvailable()) {
|
||||
return {
|
||||
type: 'text' as const,
|
||||
value: 'Voice mode is not available.',
|
||||
@@ -33,6 +23,47 @@ export const call: LocalCommandCall = async () => {
|
||||
|
||||
const currentSettings = getInitialSettings()
|
||||
const isCurrentlyEnabled = currentSettings.voiceEnabled === true
|
||||
const providerArg = args?.trim().toLowerCase()
|
||||
|
||||
// Handle provider argument when already enabled — switch backend only
|
||||
if (isCurrentlyEnabled && providerArg === 'doubao') {
|
||||
const result = updateSettingsForSource('userSettings', {
|
||||
voiceProvider: 'doubao',
|
||||
})
|
||||
if (result.error) {
|
||||
return {
|
||||
type: 'text' as const,
|
||||
value:
|
||||
'Failed to update settings. Check your settings file for syntax errors.',
|
||||
}
|
||||
}
|
||||
settingsChangeDetector.notifyChange('userSettings')
|
||||
const key = getShortcutDisplay('voice:pushToTalk', 'Chat', 'Space')
|
||||
return {
|
||||
type: 'text' as const,
|
||||
value: `Voice mode switched to Doubao ASR. Hold ${key} to record.`,
|
||||
}
|
||||
}
|
||||
|
||||
// Handle provider argument when already enabled — switch to anthropic
|
||||
if (isCurrentlyEnabled && providerArg === 'anthropic') {
|
||||
const result = updateSettingsForSource('userSettings', {
|
||||
voiceProvider: 'anthropic',
|
||||
})
|
||||
if (result.error) {
|
||||
return {
|
||||
type: 'text' as const,
|
||||
value:
|
||||
'Failed to update settings. Check your settings file for syntax errors.',
|
||||
}
|
||||
}
|
||||
settingsChangeDetector.notifyChange('userSettings')
|
||||
const key = getShortcutDisplay('voice:pushToTalk', 'Chat', 'Space')
|
||||
return {
|
||||
type: 'text' as const,
|
||||
value: `Voice mode switched to Anthropic STT. Hold ${key} to record.`,
|
||||
}
|
||||
}
|
||||
|
||||
// Toggle OFF — no checks needed
|
||||
if (isCurrentlyEnabled) {
|
||||
@@ -54,7 +85,10 @@ export const call: LocalCommandCall = async () => {
|
||||
}
|
||||
}
|
||||
|
||||
// Toggle ON — run pre-flight checks first
|
||||
// Toggle ON — determine provider from argument or default
|
||||
const provider = providerArg === 'doubao' ? 'doubao' : 'anthropic'
|
||||
|
||||
// Run pre-flight checks
|
||||
const { isVoiceStreamAvailable } = await import(
|
||||
'../../services/voiceStreamSTT.js'
|
||||
)
|
||||
@@ -70,8 +104,8 @@ export const call: LocalCommandCall = async () => {
|
||||
}
|
||||
}
|
||||
|
||||
// Check for API key
|
||||
if (!isVoiceStreamAvailable()) {
|
||||
// Check for API key (only for Anthropic backend — Doubao uses its own credentials)
|
||||
if (provider !== 'doubao' && !isVoiceStreamAvailable()) {
|
||||
return {
|
||||
type: 'text' as const,
|
||||
value:
|
||||
@@ -111,8 +145,11 @@ export const call: LocalCommandCall = async () => {
|
||||
}
|
||||
}
|
||||
|
||||
// All checks passed — enable voice
|
||||
const result = updateSettingsForSource('userSettings', { voiceEnabled: true })
|
||||
// All checks passed — enable voice with provider
|
||||
const result = updateSettingsForSource('userSettings', {
|
||||
voiceEnabled: true,
|
||||
...(provider === 'doubao' ? { voiceProvider: 'doubao' } : {}),
|
||||
})
|
||||
if (result.error) {
|
||||
return {
|
||||
type: 'text' as const,
|
||||
@@ -123,28 +160,30 @@ export const call: LocalCommandCall = async () => {
|
||||
settingsChangeDetector.notifyChange('userSettings')
|
||||
logEvent('tengu_voice_toggled', { enabled: true })
|
||||
const key = getShortcutDisplay('voice:pushToTalk', 'Chat', 'Space')
|
||||
const stt = normalizeLanguageForSTT(currentSettings.language)
|
||||
const cfg = getGlobalConfig()
|
||||
// Reset the hint counter whenever the resolved STT language changes
|
||||
// (including first-ever enable, where lastLanguage is undefined).
|
||||
const langChanged = cfg.voiceLangHintLastLanguage !== stt.code
|
||||
const priorCount = langChanged ? 0 : (cfg.voiceLangHintShownCount ?? 0)
|
||||
const showHint = !stt.fellBackFrom && priorCount < LANG_HINT_MAX_SHOWS
|
||||
let langNote = ''
|
||||
if (stt.fellBackFrom) {
|
||||
langNote = ` Note: "${stt.fellBackFrom}" is not a supported dictation language; using English. Change it via /config.`
|
||||
} else if (showHint) {
|
||||
langNote = ` Dictation language: ${stt.code} (/config to change).`
|
||||
}
|
||||
if (langChanged || showHint) {
|
||||
saveGlobalConfig(prev => ({
|
||||
...prev,
|
||||
voiceLangHintShownCount: priorCount + (showHint ? 1 : 0),
|
||||
voiceLangHintLastLanguage: stt.code,
|
||||
}))
|
||||
const providerLabel = provider === 'doubao' ? 'Doubao ASR' : 'Anthropic'
|
||||
// Doubao backend handles all languages natively — skip language hints
|
||||
if (provider !== 'doubao') {
|
||||
const stt = normalizeLanguageForSTT(currentSettings.language)
|
||||
const cfg = getGlobalConfig()
|
||||
const langChanged = cfg.voiceLangHintLastLanguage !== stt.code
|
||||
const priorCount = langChanged ? 0 : (cfg.voiceLangHintShownCount ?? 0)
|
||||
const showHint = !stt.fellBackFrom && priorCount < LANG_HINT_MAX_SHOWS
|
||||
if (stt.fellBackFrom) {
|
||||
langNote = ` Note: "${stt.fellBackFrom}" is not a supported dictation language; using English. Change it via /config.`
|
||||
} else if (showHint) {
|
||||
langNote = ` Dictation language: ${stt.code} (/config to change).`
|
||||
}
|
||||
if (langChanged || showHint) {
|
||||
saveGlobalConfig(prev => ({
|
||||
...prev,
|
||||
voiceLangHintShownCount: priorCount + (showHint ? 1 : 0),
|
||||
voiceLangHintLastLanguage: stt.code,
|
||||
}))
|
||||
}
|
||||
}
|
||||
return {
|
||||
type: 'text' as const,
|
||||
value: `Voice mode enabled. Hold ${key} to record.${langNote}`,
|
||||
value: `Voice mode enabled (${providerLabel}). Hold ${key} to record.${langNote}`,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,6 +20,10 @@ import {
|
||||
isVoiceStreamAvailable,
|
||||
type VoiceStreamConnection,
|
||||
} from '../services/voiceStreamSTT.js'
|
||||
import {
|
||||
connectDoubaoStream,
|
||||
isDoubaoAvailableSync,
|
||||
} from '../services/doubaoSTT.js'
|
||||
import { logForDebugging } from '../utils/debug.js'
|
||||
import { toError } from '../utils/errors.js'
|
||||
import { getSystemLocaleLanguage } from '../utils/intl.js'
|
||||
@@ -27,6 +31,10 @@ import { logError } from '../utils/log.js'
|
||||
import { getInitialSettings } from '../utils/settings/settings.js'
|
||||
import { sleep } from '../utils/sleep.js'
|
||||
|
||||
function isDoubaoProvider(): boolean {
|
||||
return getInitialSettings().voiceProvider === 'doubao'
|
||||
}
|
||||
|
||||
// ─── Language normalization ─────────────────────────────────────────────
|
||||
|
||||
const DEFAULT_STT_LANGUAGE = 'en'
|
||||
@@ -574,7 +582,7 @@ export function useVoice({
|
||||
// stop when it loses focus. This enables a "multi-clauding army"
|
||||
// workflow where voice input follows window focus.
|
||||
useEffect(() => {
|
||||
if (!enabled || !focusMode) {
|
||||
if (!enabled || !focusMode || isDoubaoProvider()) {
|
||||
// Focus mode was disabled while a focus-driven recording was active —
|
||||
// stop the recording so it doesn't linger until the silence timer fires.
|
||||
if (focusTriggeredRef.current && stateRef.current === 'recording') {
|
||||
@@ -778,7 +786,11 @@ export function useVoice({
|
||||
|
||||
const attemptConnect = (keyterms: string[]): void => {
|
||||
const myAttemptGen = attemptGenRef.current
|
||||
void connectVoiceStream(
|
||||
// Select STT backend based on settings.voiceProvider
|
||||
const connectFn = isDoubaoProvider()
|
||||
? (cbs: Parameters<typeof connectDoubaoStream>[0], opts: Parameters<typeof connectDoubaoStream>[1]) => connectDoubaoStream(cbs, opts)
|
||||
: (cbs: Parameters<typeof connectVoiceStream>[0], opts: Parameters<typeof connectVoiceStream>[1]) => connectVoiceStream(cbs, opts)
|
||||
void connectFn(
|
||||
{
|
||||
onTranscript: (text: string, isFinal: boolean) => {
|
||||
if (isStale()) return
|
||||
@@ -1007,7 +1019,12 @@ export function useVoice({
|
||||
})
|
||||
}
|
||||
|
||||
void getVoiceKeyterms().then(attemptConnect)
|
||||
// Doubao backend doesn't use keyterms — skip the async fetch
|
||||
if (isDoubaoProvider()) {
|
||||
attemptConnect([])
|
||||
} else {
|
||||
void getVoiceKeyterms().then(attemptConnect)
|
||||
}
|
||||
}
|
||||
|
||||
// ── Hold-to-talk handler ────────────────────────────────────────────
|
||||
@@ -1021,7 +1038,8 @@ export function useVoice({
|
||||
// delay of ~500ms on macOS).
|
||||
const handleKeyEvent = useCallback(
|
||||
(fallbackMs = REPEAT_FALLBACK_MS): void => {
|
||||
if (!enabled || !isVoiceStreamAvailable()) {
|
||||
const sttAvailable = isDoubaoProvider() ? isDoubaoAvailableSync() : isVoiceStreamAvailable()
|
||||
if (!enabled || !sttAvailable) {
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@@ -7,19 +7,22 @@ import {
|
||||
|
||||
/**
|
||||
* Combines user intent (settings.voiceEnabled) with auth + GB kill-switch.
|
||||
* When using Doubao backend, auth check is skipped (Doubao has its own credentials).
|
||||
* Only the auth half is memoized on authVersion — it's the expensive one
|
||||
* (cold getClaudeAIOAuthTokens memoize → sync `security` spawn, ~60ms/call,
|
||||
* ~180ms total in profile v5 when token refresh cleared the cache mid-session).
|
||||
* GB is a cheap cached-map lookup and stays outside the memo so a mid-session
|
||||
* kill-switch flip still takes effect on the next render.
|
||||
*
|
||||
* authVersion bumps on /login only. Background token refresh leaves it alone
|
||||
* (user is still authed), so the auth memo stays correct without re-eval.
|
||||
*/
|
||||
export function useVoiceEnabled(): boolean {
|
||||
const userIntent = useAppState(s => s.settings.voiceEnabled === true)
|
||||
const provider = useAppState(s => s.settings.voiceProvider)
|
||||
// All hooks must be called unconditionally (Rules of Hooks)
|
||||
const authVersion = useAppState(s => s.authVersion)
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
const authed = useMemo(hasVoiceAuth, [authVersion])
|
||||
if (provider === 'doubao') {
|
||||
return userIntent && isVoiceGrowthBookEnabled()
|
||||
}
|
||||
return userIntent && authed && isVoiceGrowthBookEnabled()
|
||||
}
|
||||
|
||||
230
src/services/doubaoSTT.ts
Normal file
230
src/services/doubaoSTT.ts
Normal file
@@ -0,0 +1,230 @@
|
||||
// Doubao (豆包) ASR speech-to-text adapter for voice mode.
|
||||
//
|
||||
// Wraps the doubaoime-asr npm package to expose the same interface as
|
||||
// voiceStreamSTT.ts. The doubao backend uses an AsyncGenerator-based
|
||||
// streaming protocol internally; this adapter bridges it to the
|
||||
// send/finalize/close pattern used by useVoice.ts.
|
||||
|
||||
import { homedir } from 'node:os'
|
||||
import type { ASRResponse } from 'doubaoime-asr'
|
||||
import type { FinalizeSource, VoiceStreamCallbacks, VoiceStreamConnection } from './voiceStreamSTT.js'
|
||||
import { logForDebugging } from '../utils/debug.js'
|
||||
import { logError } from '../utils/log.js'
|
||||
|
||||
// Re-export FinalizeSource so useVoice can import from either module
|
||||
export type { FinalizeSource } from './voiceStreamSTT.js'
|
||||
|
||||
// Maximum time to wait for the generator to finish after end-of-stream signal.
|
||||
const FINALIZE_SAFETY_TIMEOUT_MS = 5_000
|
||||
|
||||
// ─── AsyncIterable audio queue ─────────────────────────────────────────
|
||||
|
||||
// A push-based queue that implements AsyncIterable<Uint8Array>.
|
||||
// send() pushes chunks; push(null) signals end-of-stream.
|
||||
class AudioChunkQueue {
|
||||
private chunks: (Uint8Array | null)[] = []
|
||||
private waiting: ((result: IteratorResult<Uint8Array>) => void) | null = null
|
||||
private done = false
|
||||
|
||||
push(chunk: Uint8Array | null): void {
|
||||
if (this.done) return
|
||||
if (chunk === null) {
|
||||
this.done = true
|
||||
if (this.waiting) {
|
||||
const resolve = this.waiting
|
||||
this.waiting = null
|
||||
resolve({ value: undefined, done: true })
|
||||
}
|
||||
return
|
||||
}
|
||||
if (this.waiting) {
|
||||
const resolve = this.waiting
|
||||
this.waiting = null
|
||||
resolve({ value: chunk, done: false })
|
||||
} else {
|
||||
this.chunks.push(chunk)
|
||||
}
|
||||
}
|
||||
|
||||
abort(): void {
|
||||
this.done = true
|
||||
this.chunks.length = 0
|
||||
if (this.waiting) {
|
||||
const resolve = this.waiting
|
||||
this.waiting = null
|
||||
resolve({ value: undefined, done: true })
|
||||
}
|
||||
}
|
||||
|
||||
[Symbol.asyncIterator](): AsyncIterator<Uint8Array> {
|
||||
return {
|
||||
next: async (): Promise<IteratorResult<Uint8Array>> => {
|
||||
if (this.chunks.length > 0) {
|
||||
const chunk = this.chunks.shift()!
|
||||
return { value: chunk, done: false }
|
||||
}
|
||||
if (this.done) {
|
||||
return { value: undefined, done: true }
|
||||
}
|
||||
return new Promise<IteratorResult<Uint8Array>>((resolve) => {
|
||||
this.waiting = resolve
|
||||
})
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Availability ────────────────────────────────────────────────────────
|
||||
|
||||
let doubaoAvailable: boolean | null = null
|
||||
|
||||
export async function isDoubaoAvailable(): Promise<boolean> {
|
||||
if (doubaoAvailable !== null) return doubaoAvailable
|
||||
try {
|
||||
await import('doubaoime-asr')
|
||||
doubaoAvailable = true
|
||||
} catch {
|
||||
doubaoAvailable = false
|
||||
}
|
||||
return doubaoAvailable
|
||||
}
|
||||
|
||||
// Synchronous check — returns cached result or optimistic true when
|
||||
// VOICE_PROVIDER=doubao is set and no cached result exists yet.
|
||||
// The actual import happens in connectDoubaoStream which reports errors.
|
||||
export function isDoubaoAvailableSync(): boolean {
|
||||
if (doubaoAvailable !== null) return doubaoAvailable
|
||||
return true
|
||||
}
|
||||
|
||||
// ─── Connection ──────────────────────────────────────────────────────────
|
||||
|
||||
export async function connectDoubaoStream(
|
||||
callbacks: VoiceStreamCallbacks,
|
||||
_options?: { language?: string },
|
||||
): Promise<VoiceStreamConnection | null> {
|
||||
let doubaoAsr: typeof import('doubaoime-asr')
|
||||
try {
|
||||
doubaoAsr = await import('doubaoime-asr')
|
||||
} catch {
|
||||
logError(new Error('[doubao-asr] Failed to import doubaoime-asr package'))
|
||||
callbacks.onError('doubaoime-asr package is not installed. Install it with: bun add doubaoime-asr', { fatal: true })
|
||||
return null
|
||||
}
|
||||
|
||||
const { transcribeRealtime, ASRConfig, ResponseType } = doubaoAsr
|
||||
|
||||
const queue = new AudioChunkQueue()
|
||||
let finalized = false
|
||||
|
||||
// Resolve handle for finalize() promise — wrapped in an object to avoid
|
||||
// TypeScript closure-scope type narrowing issues (TS2349 "not callable").
|
||||
const finalizeHandle: { resolve: ((source: FinalizeSource) => void) | null } = { resolve: null }
|
||||
|
||||
const connection: VoiceStreamConnection = {
|
||||
send(audioChunk: Buffer): void {
|
||||
if (finalized) return
|
||||
queue.push(new Uint8Array(audioChunk.buffer, audioChunk.byteOffset, audioChunk.byteLength))
|
||||
},
|
||||
finalize(): Promise<FinalizeSource> {
|
||||
if (finalized) return Promise.resolve<FinalizeSource>('ws_already_closed')
|
||||
finalized = true
|
||||
queue.push(null) // signal end-of-stream to the generator
|
||||
// Doubao returns FINAL_RESULT during recording — by the time the user
|
||||
// releases the key, all transcripts are already in accumulatedRef.
|
||||
// Resolve immediately so the UI skips the 'processing' state and goes
|
||||
// straight to displaying the result.
|
||||
logForDebugging('[doubao-asr] Finalize — resolving immediately')
|
||||
return Promise.resolve<FinalizeSource>('post_closestream_endpoint')
|
||||
},
|
||||
close(): void {
|
||||
finalized = true
|
||||
queue.abort()
|
||||
const r = finalizeHandle.resolve
|
||||
finalizeHandle.resolve = null
|
||||
if (r) r('ws_close')
|
||||
callbacks.onClose()
|
||||
},
|
||||
isConnected(): boolean {
|
||||
return true
|
||||
},
|
||||
}
|
||||
|
||||
// Start the ASR session in the background
|
||||
const config = new ASRConfig({ credentialPath: `${homedir()}/.claude/tts/doubao/credentials.json` })
|
||||
|
||||
// Ensure credentials are initialized (may auto-generate)
|
||||
try {
|
||||
await config.ensureCredentials()
|
||||
} catch (err) {
|
||||
logError(new Error(`[doubao-asr] Credential initialization failed: ${String(err)}`))
|
||||
callbacks.onError(`Doubao ASR 凭证初始化失败: ${String(err)}`, { fatal: true })
|
||||
return null
|
||||
}
|
||||
|
||||
// Fire onReady immediately — unlike the Anthropic WebSocket which needs to
|
||||
// wait for a handshake, the doubao backend accepts audio through the queue
|
||||
// and handles connection internally. The caller (useVoice.ts) needs onReady
|
||||
// to fire before it will route audio chunks via connection.send().
|
||||
logForDebugging('[doubao-asr] Firing onReady immediately')
|
||||
callbacks.onReady(connection)
|
||||
|
||||
// Consume the AsyncGenerator in the background
|
||||
void (async () => {
|
||||
try {
|
||||
const audioSource: AsyncIterable<Uint8Array> = queue
|
||||
const gen: AsyncGenerator<ASRResponse> = transcribeRealtime(audioSource, { config })
|
||||
|
||||
for await (const resp of gen) {
|
||||
if (finalized && resp.type !== ResponseType.FINAL_RESULT && resp.type !== ResponseType.SESSION_FINISHED) {
|
||||
continue
|
||||
}
|
||||
|
||||
switch (resp.type) {
|
||||
case ResponseType.SESSION_STARTED:
|
||||
logForDebugging('[doubao-asr] Session started')
|
||||
break
|
||||
case ResponseType.VAD_START:
|
||||
logForDebugging('[doubao-asr] VAD detected speech start')
|
||||
break
|
||||
case ResponseType.INTERIM_RESULT:
|
||||
if (resp.text) {
|
||||
callbacks.onTranscript(resp.text, false)
|
||||
}
|
||||
break
|
||||
case ResponseType.FINAL_RESULT:
|
||||
if (resp.text) {
|
||||
callbacks.onTranscript(resp.text, true)
|
||||
}
|
||||
break
|
||||
case ResponseType.ERROR:
|
||||
logError(new Error(`[doubao-asr] Error: ${resp.errorMsg}`))
|
||||
if (!finalized) {
|
||||
callbacks.onError(resp.errorMsg || 'Doubao ASR 识别错误')
|
||||
}
|
||||
break
|
||||
case ResponseType.SESSION_FINISHED:
|
||||
logForDebugging('[doubao-asr] Session finished')
|
||||
break
|
||||
default:
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Generator exhausted naturally
|
||||
const r = finalizeHandle.resolve
|
||||
finalizeHandle.resolve = null
|
||||
if (r) r('post_closestream_endpoint')
|
||||
} catch (err) {
|
||||
logError(new Error(`[doubao-asr] Stream error: ${String(err)}`))
|
||||
if (!finalized) {
|
||||
callbacks.onError(`Doubao ASR 连接错误: ${String(err)}`)
|
||||
}
|
||||
const r2 = finalizeHandle.resolve
|
||||
finalizeHandle.resolve = null
|
||||
if (r2) r2('ws_close')
|
||||
}
|
||||
})()
|
||||
|
||||
return connection
|
||||
}
|
||||
@@ -880,6 +880,10 @@ export const SettingsSchema = lazySchema(() =>
|
||||
.boolean()
|
||||
.optional()
|
||||
.describe('Enable voice mode (hold-to-talk dictation)'),
|
||||
voiceProvider: z
|
||||
.enum(['anthropic', 'doubao'])
|
||||
.optional()
|
||||
.describe('Voice STT backend: "anthropic" (default) or "doubao" (Doubao ASR)'),
|
||||
}
|
||||
: {}),
|
||||
...(feature('KAIROS')
|
||||
|
||||
@@ -23,7 +23,7 @@ function makeCommand(name: string, opts?: Partial<Command>): Command {
|
||||
type: 'local',
|
||||
handler: () => {},
|
||||
...opts,
|
||||
} as Command
|
||||
} as unknown as Command
|
||||
}
|
||||
|
||||
function makePromptCommand(
|
||||
@@ -37,7 +37,7 @@ function makePromptCommand(
|
||||
handler: () => {},
|
||||
source: 'userSettings',
|
||||
...opts,
|
||||
} as Command
|
||||
} as unknown as Command
|
||||
}
|
||||
|
||||
// ─── isCommandInput ───────────────────────────────────────────────────
|
||||
|
||||
@@ -44,11 +44,18 @@ export function hasVoiceAuth(): boolean {
|
||||
}
|
||||
|
||||
/**
|
||||
* Full runtime check: auth + GrowthBook kill-switch. Callers: `/voice`
|
||||
* (voice.ts, voice/index.ts), ConfigTool, VoiceModeNotice — command-time
|
||||
* paths where a fresh keychain read is acceptable. For React render
|
||||
* paths use useVoiceEnabled() instead (memoizes the auth half).
|
||||
* Full runtime check for Anthropic voice_stream backend.
|
||||
* Returns true when both auth + GrowthBook kill-switch pass.
|
||||
*/
|
||||
export function isVoiceModeEnabled(): boolean {
|
||||
return hasVoiceAuth() && isVoiceGrowthBookEnabled()
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if voice mode can be activated with any STT backend.
|
||||
* Always returns true when VOICE_MODE feature flag is on and GrowthBook
|
||||
* kill-switch is off — the Doubao backend does not require Anthropic auth.
|
||||
*/
|
||||
export function isVoiceAvailable(): boolean {
|
||||
return isVoiceGrowthBookEnabled()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user