mirror of
https://github.com/claude-code-best/claude-code.git
synced 2026-06-17 22:05:50 +00:00
feat: 集成豆包 ASR 语音识别后端,支持 /voice doubao 切换 (#357)
* feat: 集成豆包 ASR 语音识别后端,支持 /voice doubao 切换 - 新增 src/services/doubaoSTT.ts 适配模块,将 doubaoime-asr 的 AsyncGenerator 协议适配为现有 VoiceStreamConnection 接口 - /voice doubao 启用豆包后端,/voice 使用默认 Anthropic 后端 - 后端选择持久化到 settings.json 的 voiceProvider 字段 - 豆包后端跳过 Anthropic OAuth 认证、语言限制和 Focus Mode - 豆包后端松手即出结果,跳过 processing 状态 - 凭证文件存放在 ~/.claude/tts/doubao/credentials.json - doubaoime-asr 作为 optionalDependencies 安装 - 移除 /voice 命令的 claude-ai 可用性限制,所有用户可用 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> * docs: 更新 Voice Mode 文档,添加豆包 ASR 后端说明和致谢 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -20,6 +20,10 @@ import {
|
||||
isVoiceStreamAvailable,
|
||||
type VoiceStreamConnection,
|
||||
} from '../services/voiceStreamSTT.js'
|
||||
import {
|
||||
connectDoubaoStream,
|
||||
isDoubaoAvailableSync,
|
||||
} from '../services/doubaoSTT.js'
|
||||
import { logForDebugging } from '../utils/debug.js'
|
||||
import { toError } from '../utils/errors.js'
|
||||
import { getSystemLocaleLanguage } from '../utils/intl.js'
|
||||
@@ -27,6 +31,10 @@ import { logError } from '../utils/log.js'
|
||||
import { getInitialSettings } from '../utils/settings/settings.js'
|
||||
import { sleep } from '../utils/sleep.js'
|
||||
|
||||
function isDoubaoProvider(): boolean {
|
||||
return getInitialSettings().voiceProvider === 'doubao'
|
||||
}
|
||||
|
||||
// ─── Language normalization ─────────────────────────────────────────────
|
||||
|
||||
const DEFAULT_STT_LANGUAGE = 'en'
|
||||
@@ -574,7 +582,7 @@ export function useVoice({
|
||||
// stop when it loses focus. This enables a "multi-clauding army"
|
||||
// workflow where voice input follows window focus.
|
||||
useEffect(() => {
|
||||
if (!enabled || !focusMode) {
|
||||
if (!enabled || !focusMode || isDoubaoProvider()) {
|
||||
// Focus mode was disabled while a focus-driven recording was active —
|
||||
// stop the recording so it doesn't linger until the silence timer fires.
|
||||
if (focusTriggeredRef.current && stateRef.current === 'recording') {
|
||||
@@ -778,7 +786,11 @@ export function useVoice({
|
||||
|
||||
const attemptConnect = (keyterms: string[]): void => {
|
||||
const myAttemptGen = attemptGenRef.current
|
||||
void connectVoiceStream(
|
||||
// Select STT backend based on settings.voiceProvider
|
||||
const connectFn = isDoubaoProvider()
|
||||
? (cbs: Parameters<typeof connectDoubaoStream>[0], opts: Parameters<typeof connectDoubaoStream>[1]) => connectDoubaoStream(cbs, opts)
|
||||
: (cbs: Parameters<typeof connectVoiceStream>[0], opts: Parameters<typeof connectVoiceStream>[1]) => connectVoiceStream(cbs, opts)
|
||||
void connectFn(
|
||||
{
|
||||
onTranscript: (text: string, isFinal: boolean) => {
|
||||
if (isStale()) return
|
||||
@@ -1007,7 +1019,12 @@ export function useVoice({
|
||||
})
|
||||
}
|
||||
|
||||
void getVoiceKeyterms().then(attemptConnect)
|
||||
// Doubao backend doesn't use keyterms — skip the async fetch
|
||||
if (isDoubaoProvider()) {
|
||||
attemptConnect([])
|
||||
} else {
|
||||
void getVoiceKeyterms().then(attemptConnect)
|
||||
}
|
||||
}
|
||||
|
||||
// ── Hold-to-talk handler ────────────────────────────────────────────
|
||||
@@ -1021,7 +1038,8 @@ export function useVoice({
|
||||
// delay of ~500ms on macOS).
|
||||
const handleKeyEvent = useCallback(
|
||||
(fallbackMs = REPEAT_FALLBACK_MS): void => {
|
||||
if (!enabled || !isVoiceStreamAvailable()) {
|
||||
const sttAvailable = isDoubaoProvider() ? isDoubaoAvailableSync() : isVoiceStreamAvailable()
|
||||
if (!enabled || !sttAvailable) {
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@@ -7,19 +7,22 @@ import {
|
||||
|
||||
/**
|
||||
* Combines user intent (settings.voiceEnabled) with auth + GB kill-switch.
|
||||
* When using Doubao backend, auth check is skipped (Doubao has its own credentials).
|
||||
* Only the auth half is memoized on authVersion — it's the expensive one
|
||||
* (cold getClaudeAIOAuthTokens memoize → sync `security` spawn, ~60ms/call,
|
||||
* ~180ms total in profile v5 when token refresh cleared the cache mid-session).
|
||||
* GB is a cheap cached-map lookup and stays outside the memo so a mid-session
|
||||
* kill-switch flip still takes effect on the next render.
|
||||
*
|
||||
* authVersion bumps on /login only. Background token refresh leaves it alone
|
||||
* (user is still authed), so the auth memo stays correct without re-eval.
|
||||
*/
|
||||
export function useVoiceEnabled(): boolean {
|
||||
const userIntent = useAppState(s => s.settings.voiceEnabled === true)
|
||||
const provider = useAppState(s => s.settings.voiceProvider)
|
||||
// All hooks must be called unconditionally (Rules of Hooks)
|
||||
const authVersion = useAppState(s => s.authVersion)
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
const authed = useMemo(hasVoiceAuth, [authVersion])
|
||||
if (provider === 'doubao') {
|
||||
return userIntent && isVoiceGrowthBookEnabled()
|
||||
}
|
||||
return userIntent && authed && isVoiceGrowthBookEnabled()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user