mirror of
https://github.com/claude-code-best/claude-code.git
synced 2026-06-15 12:55:51 +00:00
feat: 集成豆包 ASR 语音识别后端,支持 /voice doubao 切换 (#357)
* feat: 集成豆包 ASR 语音识别后端,支持 /voice doubao 切换 - 新增 src/services/doubaoSTT.ts 适配模块,将 doubaoime-asr 的 AsyncGenerator 协议适配为现有 VoiceStreamConnection 接口 - /voice doubao 启用豆包后端,/voice 使用默认 Anthropic 后端 - 后端选择持久化到 settings.json 的 voiceProvider 字段 - 豆包后端跳过 Anthropic OAuth 认证、语言限制和 Focus Mode - 豆包后端松手即出结果,跳过 processing 状态 - 凭证文件存放在 ~/.claude/tts/doubao/credentials.json - doubaoime-asr 作为 optionalDependencies 安装 - 移除 /voice 命令的 claude-ai 可用性限制,所有用户可用 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> * docs: 更新 Voice Mode 文档,添加豆包 ASR 后端说明和致谢 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -44,3 +44,4 @@ data
|
|||||||
!.codex/prompts/
|
!.codex/prompts/
|
||||||
!.codex/prompts/**
|
!.codex/prompts/**
|
||||||
teach-me
|
teach-me
|
||||||
|
credentials.json
|
||||||
|
|||||||
@@ -27,7 +27,7 @@
|
|||||||
| **Poor Mode** | 穷鬼模式,关闭记忆提取和键入建议,大幅度减少并发请求 | /poor 可以开关 |
|
| **Poor Mode** | 穷鬼模式,关闭记忆提取和键入建议,大幅度减少并发请求 | /poor 可以开关 |
|
||||||
| **Channels 频道通知** | MCP 服务器推送外部消息到会话(飞书/Slack/Discord/微信等),`--channels plugin:name@marketplace` 启用 | [文档](https://ccb.agent-aura.top/docs/features/channels) |
|
| **Channels 频道通知** | MCP 服务器推送外部消息到会话(飞书/Slack/Discord/微信等),`--channels plugin:name@marketplace` 启用 | [文档](https://ccb.agent-aura.top/docs/features/channels) |
|
||||||
| **自定义模型供应商** | OpenAI/Anthropic/Gemini/Grok 兼容 | [文档](https://ccb.agent-aura.top/docs/features/custom-platform-login) |
|
| **自定义模型供应商** | OpenAI/Anthropic/Gemini/Grok 兼容 | [文档](https://ccb.agent-aura.top/docs/features/custom-platform-login) |
|
||||||
| Voice Mode | Push-to-Talk 语音输入 | [文档](https://ccb.agent-aura.top/docs/features/voice-mode) |
|
| Voice Mode | 语音输入,支持豆包语言输入(`/voice doubao`) | [文档](https://ccb.agent-aura.top/docs/features/voice-mode) |
|
||||||
| Computer Use | 屏幕截图、键鼠控制 | [文档](https://ccb.agent-aura.top/docs/features/computer-use) |
|
| Computer Use | 屏幕截图、键鼠控制 | [文档](https://ccb.agent-aura.top/docs/features/computer-use) |
|
||||||
| Chrome Use | 浏览器自动化、表单填写、数据抓取 | [自托管](https://ccb.agent-aura.top/docs/features/chrome-use-mcp) [原生版](https://ccb.agent-aura.top/docs/features/claude-in-chrome-mcp) |
|
| Chrome Use | 浏览器自动化、表单填写、数据抓取 | [自托管](https://ccb.agent-aura.top/docs/features/chrome-use-mcp) [原生版](https://ccb.agent-aura.top/docs/features/claude-in-chrome-mcp) |
|
||||||
| Sentry | 企业级错误追踪 | [文档](https://ccb.agent-aura.top/docs/internals/sentry-setup) |
|
| Sentry | 企业级错误追踪 | [文档](https://ccb.agent-aura.top/docs/internals/sentry-setup) |
|
||||||
@@ -233,6 +233,10 @@ TUI (REPL) 模式需要真实终端,无法直接通过 VS Code launch 启动
|
|||||||
</picture>
|
</picture>
|
||||||
</a>
|
</a>
|
||||||
|
|
||||||
|
## 致谢
|
||||||
|
|
||||||
|
- [doubaoime-asr](https://github.com/starccy/doubaoime-asr) — 豆包 ASR 语音识别 SDK,为 Voice Mode 提供无需 Anthropic OAuth 的语音输入方案
|
||||||
|
|
||||||
## 许可证
|
## 许可证
|
||||||
|
|
||||||
本项目仅供学习研究用途。Claude Code 的所有权利归 [Anthropic](https://www.anthropic.com/) 所有。
|
本项目仅供学习研究用途。Claude Code 的所有权利归 [Anthropic](https://www.anthropic.com/) 所有。
|
||||||
|
|||||||
13
bun.lock
13
bun.lock
@@ -145,6 +145,9 @@
|
|||||||
"yaml": "^2.8.3",
|
"yaml": "^2.8.3",
|
||||||
"zod": "^4.3.6",
|
"zod": "^4.3.6",
|
||||||
},
|
},
|
||||||
|
"optionalDependencies": {
|
||||||
|
"doubaoime-asr": "^0.1.0",
|
||||||
|
},
|
||||||
},
|
},
|
||||||
"packages/@ant/claude-for-chrome-mcp": {
|
"packages/@ant/claude-for-chrome-mcp": {
|
||||||
"name": "@ant/claude-for-chrome-mcp",
|
"name": "@ant/claude-for-chrome-mcp",
|
||||||
@@ -1791,6 +1794,8 @@
|
|||||||
|
|
||||||
"dompurify": ["dompurify@3.4.0", "https://registry.npmmirror.com/dompurify/-/dompurify-3.4.0.tgz", { "optionalDependencies": { "@types/trusted-types": "^2.0.7" } }, "sha512-nolgK9JcaUXMSmW+j1yaSvaEaoXYHwWyGJlkoCTghc97KgGDDSnpoU/PlEnw63Ah+TGKFOyY+X5LnxaWbCSfXg=="],
|
"dompurify": ["dompurify@3.4.0", "https://registry.npmmirror.com/dompurify/-/dompurify-3.4.0.tgz", { "optionalDependencies": { "@types/trusted-types": "^2.0.7" } }, "sha512-nolgK9JcaUXMSmW+j1yaSvaEaoXYHwWyGJlkoCTghc97KgGDDSnpoU/PlEnw63Ah+TGKFOyY+X5LnxaWbCSfXg=="],
|
||||||
|
|
||||||
|
"doubaoime-asr": ["doubaoime-asr@0.1.0", "", { "dependencies": { "opus-encdec": "^0.1.1", "protobufjs": "^8.0.0", "ws": "^8.18.0" }, "bin": { "doubaoime-asr": "bin/doubaoime-asr.mjs" } }, "sha512-HYUfHkTxNdOoztXwS18e6GBRLY9dSDWX43K4WvPvEmO6+RevO6WbawMMoUfHKPb4ySQn461un7XyN5l4UGejwg=="],
|
||||||
|
|
||||||
"dunder-proto": ["dunder-proto@1.0.1", "https://registry.npmmirror.com/dunder-proto/-/dunder-proto-1.0.1.tgz", { "dependencies": { "call-bind-apply-helpers": "^1.0.1", "es-errors": "^1.3.0", "gopd": "^1.2.0" } }, "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A=="],
|
"dunder-proto": ["dunder-proto@1.0.1", "https://registry.npmmirror.com/dunder-proto/-/dunder-proto-1.0.1.tgz", { "dependencies": { "call-bind-apply-helpers": "^1.0.1", "es-errors": "^1.3.0", "gopd": "^1.2.0" } }, "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A=="],
|
||||||
|
|
||||||
"ecdsa-sig-formatter": ["ecdsa-sig-formatter@1.0.11", "https://registry.npmmirror.com/ecdsa-sig-formatter/-/ecdsa-sig-formatter-1.0.11.tgz", { "dependencies": { "safe-buffer": "^5.0.1" } }, "sha512-nagl3RYrbNv6kQkeJIpt6NJZy8twLB/2vtz6yN9Z4vRKHN4/QZJIEbqohALSgwKdnksuY3k5Addp5lg8sVoVcQ=="],
|
"ecdsa-sig-formatter": ["ecdsa-sig-formatter@1.0.11", "https://registry.npmmirror.com/ecdsa-sig-formatter/-/ecdsa-sig-formatter-1.0.11.tgz", { "dependencies": { "safe-buffer": "^5.0.1" } }, "sha512-nagl3RYrbNv6kQkeJIpt6NJZy8twLB/2vtz6yN9Z4vRKHN4/QZJIEbqohALSgwKdnksuY3k5Addp5lg8sVoVcQ=="],
|
||||||
@@ -2343,6 +2348,8 @@
|
|||||||
|
|
||||||
"openai": ["openai@6.34.0", "https://registry.npmmirror.com/openai/-/openai-6.34.0.tgz", { "peerDependencies": { "ws": "^8.18.0", "zod": "^3.25 || ^4.0" }, "optionalPeers": ["ws", "zod"], "bin": { "openai": "bin/cli" } }, "sha512-yEr2jdGf4tVFYG6ohmr3pF6VJuveP0EA/sS8TBx+4Eq5NT10alu5zg2dmxMXMgqpihRDQlFGpRt2XwsGj+Fyxw=="],
|
"openai": ["openai@6.34.0", "https://registry.npmmirror.com/openai/-/openai-6.34.0.tgz", { "peerDependencies": { "ws": "^8.18.0", "zod": "^3.25 || ^4.0" }, "optionalPeers": ["ws", "zod"], "bin": { "openai": "bin/cli" } }, "sha512-yEr2jdGf4tVFYG6ohmr3pF6VJuveP0EA/sS8TBx+4Eq5NT10alu5zg2dmxMXMgqpihRDQlFGpRt2XwsGj+Fyxw=="],
|
||||||
|
|
||||||
|
"opus-encdec": ["opus-encdec@0.1.1", "", {}, "sha512-TDzyGqYqrwn5UEUNaLsfLGu8Ma+HRNrgLYj7Vx5wfTnafAA21G6Bnm/qTIa3orQi/yZPZYmkdpO/gez4nfA1Rw=="],
|
||||||
|
|
||||||
"os-tmpdir": ["os-tmpdir@1.0.2", "https://registry.npmmirror.com/os-tmpdir/-/os-tmpdir-1.0.2.tgz", {}, "sha512-D2FR03Vir7FIu45XBY20mTb+/ZSWB00sjU9jdQXt83gDrI4Ztz5Fs7/yy74g2N5SVQY4xY1qDr4rNddwYRVX0g=="],
|
"os-tmpdir": ["os-tmpdir@1.0.2", "https://registry.npmmirror.com/os-tmpdir/-/os-tmpdir-1.0.2.tgz", {}, "sha512-D2FR03Vir7FIu45XBY20mTb+/ZSWB00sjU9jdQXt83gDrI4Ztz5Fs7/yy74g2N5SVQY4xY1qDr4rNddwYRVX0g=="],
|
||||||
|
|
||||||
"oxc-parser": ["oxc-parser@0.121.0", "https://registry.npmmirror.com/oxc-parser/-/oxc-parser-0.121.0.tgz", { "dependencies": { "@oxc-project/types": "^0.121.0" }, "optionalDependencies": { "@oxc-parser/binding-android-arm-eabi": "0.121.0", "@oxc-parser/binding-android-arm64": "0.121.0", "@oxc-parser/binding-darwin-arm64": "0.121.0", "@oxc-parser/binding-darwin-x64": "0.121.0", "@oxc-parser/binding-freebsd-x64": "0.121.0", "@oxc-parser/binding-linux-arm-gnueabihf": "0.121.0", "@oxc-parser/binding-linux-arm-musleabihf": "0.121.0", "@oxc-parser/binding-linux-arm64-gnu": "0.121.0", "@oxc-parser/binding-linux-arm64-musl": "0.121.0", "@oxc-parser/binding-linux-ppc64-gnu": "0.121.0", "@oxc-parser/binding-linux-riscv64-gnu": "0.121.0", "@oxc-parser/binding-linux-riscv64-musl": "0.121.0", "@oxc-parser/binding-linux-s390x-gnu": "0.121.0", "@oxc-parser/binding-linux-x64-gnu": "0.121.0", "@oxc-parser/binding-linux-x64-musl": "0.121.0", "@oxc-parser/binding-openharmony-arm64": "0.121.0", "@oxc-parser/binding-wasm32-wasi": "0.121.0", "@oxc-parser/binding-win32-arm64-msvc": "0.121.0", "@oxc-parser/binding-win32-ia32-msvc": "0.121.0", "@oxc-parser/binding-win32-x64-msvc": "0.121.0" } }, "sha512-ek9o58+SCv6AV7nchiAcUJy1DNE2CC5WRdBcO0mF+W4oRjNQfPO7b3pLjTHSFECpHkKGOZSQxx3hk8viIL5YCg=="],
|
"oxc-parser": ["oxc-parser@0.121.0", "https://registry.npmmirror.com/oxc-parser/-/oxc-parser-0.121.0.tgz", { "dependencies": { "@oxc-project/types": "^0.121.0" }, "optionalDependencies": { "@oxc-parser/binding-android-arm-eabi": "0.121.0", "@oxc-parser/binding-android-arm64": "0.121.0", "@oxc-parser/binding-darwin-arm64": "0.121.0", "@oxc-parser/binding-darwin-x64": "0.121.0", "@oxc-parser/binding-freebsd-x64": "0.121.0", "@oxc-parser/binding-linux-arm-gnueabihf": "0.121.0", "@oxc-parser/binding-linux-arm-musleabihf": "0.121.0", "@oxc-parser/binding-linux-arm64-gnu": "0.121.0", "@oxc-parser/binding-linux-arm64-musl": "0.121.0", "@oxc-parser/binding-linux-ppc64-gnu": "0.121.0", "@oxc-parser/binding-linux-riscv64-gnu": "0.121.0", "@oxc-parser/binding-linux-riscv64-musl": "0.121.0", "@oxc-parser/binding-linux-s390x-gnu": "0.121.0", "@oxc-parser/binding-linux-x64-gnu": "0.121.0", "@oxc-parser/binding-linux-x64-musl": "0.121.0", "@oxc-parser/binding-openharmony-arm64": "0.121.0", "@oxc-parser/binding-wasm32-wasi": "0.121.0", "@oxc-parser/binding-win32-arm64-msvc": "0.121.0", "@oxc-parser/binding-win32-ia32-msvc": "0.121.0", "@oxc-parser/binding-win32-x64-msvc": "0.121.0" } }, "sha512-ek9o58+SCv6AV7nchiAcUJy1DNE2CC5WRdBcO0mF+W4oRjNQfPO7b3pLjTHSFECpHkKGOZSQxx3hk8viIL5YCg=="],
|
||||||
@@ -2435,7 +2442,7 @@
|
|||||||
|
|
||||||
"property-information": ["property-information@7.1.0", "https://registry.npmmirror.com/property-information/-/property-information-7.1.0.tgz", {}, "sha512-TwEZ+X+yCJmYfL7TPUOcvBZ4QfoT5YenQiJuX//0th53DE6w0xxLEtfK3iyryQFddXuvkIk51EEgrJQ0WJkOmQ=="],
|
"property-information": ["property-information@7.1.0", "https://registry.npmmirror.com/property-information/-/property-information-7.1.0.tgz", {}, "sha512-TwEZ+X+yCJmYfL7TPUOcvBZ4QfoT5YenQiJuX//0th53DE6w0xxLEtfK3iyryQFddXuvkIk51EEgrJQ0WJkOmQ=="],
|
||||||
|
|
||||||
"protobufjs": ["protobufjs@7.5.4", "https://registry.npmmirror.com/protobufjs/-/protobufjs-7.5.4.tgz", { "dependencies": { "@protobufjs/aspromise": "^1.1.2", "@protobufjs/base64": "^1.1.2", "@protobufjs/codegen": "^2.0.4", "@protobufjs/eventemitter": "^1.1.0", "@protobufjs/fetch": "^1.1.0", "@protobufjs/float": "^1.0.2", "@protobufjs/inquire": "^1.1.0", "@protobufjs/path": "^1.1.2", "@protobufjs/pool": "^1.1.0", "@protobufjs/utf8": "^1.1.0", "@types/node": ">=13.7.0", "long": "^5.0.0" } }, "sha512-CvexbZtbov6jW2eXAvLukXjXUW1TzFaivC46BpWc/3BpcCysb5Vffu+B3XHMm8lVEuy2Mm4XGex8hBSg1yapPg=="],
|
"protobufjs": ["protobufjs@8.0.1", "", { "dependencies": { "@protobufjs/aspromise": "^1.1.2", "@protobufjs/base64": "^1.1.2", "@protobufjs/codegen": "^2.0.4", "@protobufjs/eventemitter": "^1.1.0", "@protobufjs/fetch": "^1.1.0", "@protobufjs/float": "^1.0.2", "@protobufjs/inquire": "^1.1.0", "@protobufjs/path": "^1.1.2", "@protobufjs/pool": "^1.1.0", "@protobufjs/utf8": "^1.1.0", "@types/node": ">=13.7.0", "long": "^5.0.0" } }, "sha512-NWWCCscLjs+cOKF/s/XVNFRW7Yih0fdH+9brffR5NZCy8k42yRdl5KlWKMVXuI1vfCoy4o1z80XR/W/QUb3V3w=="],
|
||||||
|
|
||||||
"proxy-addr": ["proxy-addr@2.0.7", "https://registry.npmmirror.com/proxy-addr/-/proxy-addr-2.0.7.tgz", { "dependencies": { "forwarded": "0.2.0", "ipaddr.js": "1.9.1" } }, "sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg=="],
|
"proxy-addr": ["proxy-addr@2.0.7", "https://registry.npmmirror.com/proxy-addr/-/proxy-addr-2.0.7.tgz", { "dependencies": { "forwarded": "0.2.0", "ipaddr.js": "1.9.1" } }, "sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg=="],
|
||||||
|
|
||||||
@@ -3029,6 +3036,8 @@
|
|||||||
|
|
||||||
"@fastify/otel/@opentelemetry/instrumentation": ["@opentelemetry/instrumentation@0.212.0", "https://registry.npmmirror.com/@opentelemetry/instrumentation/-/instrumentation-0.212.0.tgz", { "dependencies": { "@opentelemetry/api-logs": "0.212.0", "import-in-the-middle": "^2.0.6", "require-in-the-middle": "^8.0.0" }, "peerDependencies": { "@opentelemetry/api": "^1.3.0" } }, "sha512-IyXmpNnifNouMOe0I/gX7ENfv2ZCNdYTF0FpCsoBcpbIHzk81Ww9rQTYTnvghszCg7qGrIhNvWC8dhEifgX9Jg=="],
|
"@fastify/otel/@opentelemetry/instrumentation": ["@opentelemetry/instrumentation@0.212.0", "https://registry.npmmirror.com/@opentelemetry/instrumentation/-/instrumentation-0.212.0.tgz", { "dependencies": { "@opentelemetry/api-logs": "0.212.0", "import-in-the-middle": "^2.0.6", "require-in-the-middle": "^8.0.0" }, "peerDependencies": { "@opentelemetry/api": "^1.3.0" } }, "sha512-IyXmpNnifNouMOe0I/gX7ENfv2ZCNdYTF0FpCsoBcpbIHzk81Ww9rQTYTnvghszCg7qGrIhNvWC8dhEifgX9Jg=="],
|
||||||
|
|
||||||
|
"@grpc/proto-loader/protobufjs": ["protobufjs@7.5.4", "https://registry.npmmirror.com/protobufjs/-/protobufjs-7.5.4.tgz", { "dependencies": { "@protobufjs/aspromise": "^1.1.2", "@protobufjs/base64": "^1.1.2", "@protobufjs/codegen": "^2.0.4", "@protobufjs/eventemitter": "^1.1.0", "@protobufjs/fetch": "^1.1.0", "@protobufjs/float": "^1.0.2", "@protobufjs/inquire": "^1.1.0", "@protobufjs/path": "^1.1.2", "@protobufjs/pool": "^1.1.0", "@protobufjs/utf8": "^1.1.0", "@types/node": ">=13.7.0", "long": "^5.0.0" } }, "sha512-CvexbZtbov6jW2eXAvLukXjXUW1TzFaivC46BpWc/3BpcCysb5Vffu+B3XHMm8lVEuy2Mm4XGex8hBSg1yapPg=="],
|
||||||
|
|
||||||
"@grpc/proto-loader/yargs": ["yargs@17.7.2", "https://registry.npmmirror.com/yargs/-/yargs-17.7.2.tgz", { "dependencies": { "cliui": "^8.0.1", "escalade": "^3.1.1", "get-caller-file": "^2.0.5", "require-directory": "^2.1.1", "string-width": "^4.2.3", "y18n": "^5.0.5", "yargs-parser": "^21.1.1" } }, "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w=="],
|
"@grpc/proto-loader/yargs": ["yargs@17.7.2", "https://registry.npmmirror.com/yargs/-/yargs-17.7.2.tgz", { "dependencies": { "cliui": "^8.0.1", "escalade": "^3.1.1", "get-caller-file": "^2.0.5", "require-directory": "^2.1.1", "string-width": "^4.2.3", "y18n": "^5.0.5", "yargs-parser": "^21.1.1" } }, "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w=="],
|
||||||
|
|
||||||
"@hono/node-ws/@hono/node-server": ["@hono/node-server@1.19.13", "https://registry.npmmirror.com/@hono/node-server/-/node-server-1.19.13.tgz", { "peerDependencies": { "hono": "^4" } }, "sha512-TsQLe4i2gvoTtrHje625ngThGBySOgSK3Xo2XRYOdqGN1teR8+I7vchQC46uLJi8OF62YTYA3AhSpumtkhsaKQ=="],
|
"@hono/node-ws/@hono/node-server": ["@hono/node-server@1.19.13", "https://registry.npmmirror.com/@hono/node-server/-/node-server-1.19.13.tgz", { "peerDependencies": { "hono": "^4" } }, "sha512-TsQLe4i2gvoTtrHje625ngThGBySOgSK3Xo2XRYOdqGN1teR8+I7vchQC46uLJi8OF62YTYA3AhSpumtkhsaKQ=="],
|
||||||
@@ -3123,6 +3132,8 @@
|
|||||||
|
|
||||||
"@opentelemetry/otlp-transformer/@opentelemetry/sdk-trace-base": ["@opentelemetry/sdk-trace-base@2.6.1", "https://registry.npmmirror.com/@opentelemetry/sdk-trace-base/-/sdk-trace-base-2.6.1.tgz", { "dependencies": { "@opentelemetry/core": "2.6.1", "@opentelemetry/resources": "2.6.1", "@opentelemetry/semantic-conventions": "^1.29.0" }, "peerDependencies": { "@opentelemetry/api": ">=1.3.0 <1.10.0" } }, "sha512-r86ut4T1e8vNwB35CqCcKd45yzqH6/6Wzvpk2/cZB8PsPLlZFTvrh8yfOS3CYZYcUmAx4hHTZJ8AO8Dj8nrdhw=="],
|
"@opentelemetry/otlp-transformer/@opentelemetry/sdk-trace-base": ["@opentelemetry/sdk-trace-base@2.6.1", "https://registry.npmmirror.com/@opentelemetry/sdk-trace-base/-/sdk-trace-base-2.6.1.tgz", { "dependencies": { "@opentelemetry/core": "2.6.1", "@opentelemetry/resources": "2.6.1", "@opentelemetry/semantic-conventions": "^1.29.0" }, "peerDependencies": { "@opentelemetry/api": ">=1.3.0 <1.10.0" } }, "sha512-r86ut4T1e8vNwB35CqCcKd45yzqH6/6Wzvpk2/cZB8PsPLlZFTvrh8yfOS3CYZYcUmAx4hHTZJ8AO8Dj8nrdhw=="],
|
||||||
|
|
||||||
|
"@opentelemetry/otlp-transformer/protobufjs": ["protobufjs@7.5.4", "https://registry.npmmirror.com/protobufjs/-/protobufjs-7.5.4.tgz", { "dependencies": { "@protobufjs/aspromise": "^1.1.2", "@protobufjs/base64": "^1.1.2", "@protobufjs/codegen": "^2.0.4", "@protobufjs/eventemitter": "^1.1.0", "@protobufjs/fetch": "^1.1.0", "@protobufjs/float": "^1.0.2", "@protobufjs/inquire": "^1.1.0", "@protobufjs/path": "^1.1.2", "@protobufjs/pool": "^1.1.0", "@protobufjs/utf8": "^1.1.0", "@types/node": ">=13.7.0", "long": "^5.0.0" } }, "sha512-CvexbZtbov6jW2eXAvLukXjXUW1TzFaivC46BpWc/3BpcCysb5Vffu+B3XHMm8lVEuy2Mm4XGex8hBSg1yapPg=="],
|
||||||
|
|
||||||
"@opentelemetry/sdk-logs/@opentelemetry/core": ["@opentelemetry/core@2.6.1", "https://registry.npmmirror.com/@opentelemetry/core/-/core-2.6.1.tgz", { "dependencies": { "@opentelemetry/semantic-conventions": "^1.29.0" }, "peerDependencies": { "@opentelemetry/api": ">=1.0.0 <1.10.0" } }, "sha512-8xHSGWpJP9wBxgBpnqGL0R3PbdWQndL1Qp50qrg71+B28zK5OQmUgcDKLJgzyAAV38t4tOyLMGDD60LneR5W8g=="],
|
"@opentelemetry/sdk-logs/@opentelemetry/core": ["@opentelemetry/core@2.6.1", "https://registry.npmmirror.com/@opentelemetry/core/-/core-2.6.1.tgz", { "dependencies": { "@opentelemetry/semantic-conventions": "^1.29.0" }, "peerDependencies": { "@opentelemetry/api": ">=1.0.0 <1.10.0" } }, "sha512-8xHSGWpJP9wBxgBpnqGL0R3PbdWQndL1Qp50qrg71+B28zK5OQmUgcDKLJgzyAAV38t4tOyLMGDD60LneR5W8g=="],
|
||||||
|
|
||||||
"@opentelemetry/sdk-logs/@opentelemetry/resources": ["@opentelemetry/resources@2.6.1", "https://registry.npmmirror.com/@opentelemetry/resources/-/resources-2.6.1.tgz", { "dependencies": { "@opentelemetry/core": "2.6.1", "@opentelemetry/semantic-conventions": "^1.29.0" }, "peerDependencies": { "@opentelemetry/api": ">=1.3.0 <1.10.0" } }, "sha512-lID/vxSuKWXM55XhAKNoYXu9Cutoq5hFdkbTdI/zDKQktXzcWBVhNsOkiZFTMU9UtEWuGRNe0HUgmsFldIdxVA=="],
|
"@opentelemetry/sdk-logs/@opentelemetry/resources": ["@opentelemetry/resources@2.6.1", "https://registry.npmmirror.com/@opentelemetry/resources/-/resources-2.6.1.tgz", { "dependencies": { "@opentelemetry/core": "2.6.1", "@opentelemetry/semantic-conventions": "^1.29.0" }, "peerDependencies": { "@opentelemetry/api": ">=1.3.0 <1.10.0" } }, "sha512-lID/vxSuKWXM55XhAKNoYXu9Cutoq5hFdkbTdI/zDKQktXzcWBVhNsOkiZFTMU9UtEWuGRNe0HUgmsFldIdxVA=="],
|
||||||
|
|||||||
@@ -1,27 +1,32 @@
|
|||||||
# VOICE_MODE — 语音输入
|
# VOICE_MODE — 语音输入
|
||||||
|
|
||||||
> Feature Flag: `FEATURE_VOICE_MODE=1`
|
> Feature Flag: `FEATURE_VOICE_MODE=1`
|
||||||
> 实现状态:完整可用(需要 Anthropic OAuth)
|
> 实现状态:完整可用(双后端:Anthropic OAuth / 豆包 ASR)
|
||||||
> 引用数:46
|
> 引用数:46
|
||||||
|
|
||||||
## 一、功能概述
|
## 一、功能概述
|
||||||
|
|
||||||
VOICE_MODE 实现"按键说话"(Push-to-Talk)语音输入。用户按住空格键录音,音频通过 WebSocket 流式传输到 Anthropic STT 端点(Nova 3),实时转录显示在终端中。
|
VOICE_MODE 实现"按键说话"(Push-to-Talk)语音输入。用户按住空格键录音,音频流式传输到 STT 后端,实时转录显示在终端中。支持两个后端:
|
||||||
|
|
||||||
|
- **Anthropic STT(默认)**:通过 WebSocket 流式传输到 Nova 3 端点,需要 Anthropic OAuth
|
||||||
|
- **豆包 ASR(Doubao)**:通过 `doubaoime-asr` 包的 AsyncGenerator 协议流式识别,使用独立凭证文件,无需 Anthropic OAuth
|
||||||
|
|
||||||
### 核心特性
|
### 核心特性
|
||||||
|
|
||||||
- **Push-to-Talk**:长按空格键录音,释放后自动发送
|
- **Push-to-Talk**:长按空格键录音,释放后自动发送
|
||||||
- **流式转录**:录音过程中实时显示中间转录结果
|
- **流式转录**:录音过程中实时显示中间转录结果
|
||||||
- **无缝集成**:转录文本直接作为用户消息提交到对话
|
- **无缝集成**:转录文本直接作为用户消息提交到对话
|
||||||
|
- **双后端切换**:通过 `/voice` 命令参数选择 STT 后端,持久化到 settings.json
|
||||||
|
|
||||||
## 二、用户交互
|
## 二、用户交互
|
||||||
|
|
||||||
| 操作 | 行为 |
|
| 操作 | 行为 |
|
||||||
|------|------|
|
|------|------|
|
||||||
| 长按空格 | 开始录音,显示录音状态 |
|
| 长按空格 | 开始录音,显示录音状态 |
|
||||||
| 释放空格 | 停止录音,等待最终转录 |
|
| 释放空格 | 停止录音,转录结果自动提交 |
|
||||||
| 转录完成 | 自动插入到输入框并提交 |
|
| `/voice` | 切换语音模式开关(默认使用 Anthropic 后端) |
|
||||||
| `/voice` 命令 | 切换语音模式开关 |
|
| `/voice doubao` | 启用语音模式并使用豆包 ASR 后端 |
|
||||||
|
| `/voice anthropic` | 切换回 Anthropic STT 后端 |
|
||||||
|
|
||||||
### UI 反馈
|
### UI 反馈
|
||||||
|
|
||||||
@@ -35,26 +40,37 @@ VOICE_MODE 实现"按键说话"(Push-to-Talk)语音输入。用户按住空
|
|||||||
|
|
||||||
文件:`src/voice/voiceModeEnabled.ts`
|
文件:`src/voice/voiceModeEnabled.ts`
|
||||||
|
|
||||||
三层检查:
|
两层检查函数:
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
|
// Anthropic 后端(需要 OAuth)
|
||||||
isVoiceModeEnabled() = hasVoiceAuth() && isVoiceGrowthBookEnabled()
|
isVoiceModeEnabled() = hasVoiceAuth() && isVoiceGrowthBookEnabled()
|
||||||
|
|
||||||
|
// 豆包后端 / 通用可用性检查(不需要 OAuth)
|
||||||
|
isVoiceAvailable() = isVoiceGrowthBookEnabled()
|
||||||
```
|
```
|
||||||
|
|
||||||
1. **Feature Flag**:`feature('VOICE_MODE')` — 编译时/运行时开关
|
1. **Feature Flag**:`feature('VOICE_MODE')` — 编译时/运行时开关
|
||||||
2. **GrowthBook Kill-Switch**:`!getFeatureValue_CACHED_MAY_BE_STALE('tengu_amber_quartz_disabled', false)` — 紧急关闭开关(默认 false = 未禁用)
|
2. **GrowthBook Kill-Switch**:`!getFeatureValue_CACHED_MAY_BE_STALE('tengu_amber_quartz_disabled', false)` — 紧急关闭开关(默认 false = 未禁用)
|
||||||
3. **Auth 检查**:`hasVoiceAuth()` — 需要 Anthropic OAuth token(非 API key)
|
3. **Auth 检查(仅 Anthropic)**:`hasVoiceAuth()` — 需要 Anthropic OAuth token(非 API key)
|
||||||
|
4. **Provider 检查**:`voiceProvider` 设置决定使用哪个后端,豆包后端跳过 OAuth 检查
|
||||||
|
|
||||||
### 3.2 核心模块
|
### 3.2 核心模块
|
||||||
|
|
||||||
| 模块 | 职责 |
|
| 模块 | 职责 |
|
||||||
|------|------|
|
|------|------|
|
||||||
| `src/voice/voiceModeEnabled.ts` | Feature flag + GrowthBook + Auth 三层门控 |
|
| `src/voice/voiceModeEnabled.ts` | Feature flag + GrowthBook + Auth 三层门控 |
|
||||||
| `src/hooks/useVoice.ts` | React hook 管理录音状态和 WebSocket 连接 |
|
| `src/hooks/useVoice.ts` | React hook 管理录音状态和后端连接 |
|
||||||
| `src/services/voiceStreamSTT.ts` | WebSocket 流式传输到 Anthropic STT |
|
| `src/services/voiceStreamSTT.ts` | Anthropic WebSocket 流式 STT |
|
||||||
|
| `src/services/doubaoSTT.ts` | 豆包 ASR 适配器(AsyncGenerator → VoiceStreamConnection) |
|
||||||
|
| `src/commands/voice/voice.ts` | `/voice` 命令实现,处理后端选择和持久化 |
|
||||||
|
| `src/hooks/useVoiceEnabled.ts` | 语音启用状态 hook,根据 provider 决定是否跳过 OAuth |
|
||||||
|
| `src/utils/settings/types.ts` | `voiceProvider: 'anthropic' | 'doubao'` 设置类型定义 |
|
||||||
|
|
||||||
### 3.3 数据流
|
### 3.3 数据流
|
||||||
|
|
||||||
|
#### Anthropic 后端
|
||||||
|
|
||||||
```
|
```
|
||||||
用户按下空格键
|
用户按下空格键
|
||||||
│
|
│
|
||||||
@@ -79,20 +95,108 @@ WebSocket 连接到 Anthropic STT 端点
|
|||||||
转录文本 → 插入输入框 → 自动提交
|
转录文本 → 插入输入框 → 自动提交
|
||||||
```
|
```
|
||||||
|
|
||||||
|
#### 豆包 ASR 后端
|
||||||
|
|
||||||
|
```
|
||||||
|
用户按下空格键
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
useVoice hook 激活(检测到 voiceProvider === 'doubao')
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
macOS 原生音频 / SoX 开始录音
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
connectDoubaoStream() 创建 AudioChunkQueue + VoiceStreamConnection
|
||||||
|
│
|
||||||
|
├──→ onReady 立即触发(无需等待握手)
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
音频数据通过 AudioChunkQueue 传入 transcribeRealtime()
|
||||||
|
│
|
||||||
|
├──→ INTERIM_RESULT → 实时显示中间转录
|
||||||
|
├──→ FINAL_RESULT → 显示最终转录
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
用户释放空格键
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
finalize() 立即返回(豆包在录音过程中已返回结果,无需等待)
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
转录文本 → 插入输入框 → 自动提交
|
||||||
|
```
|
||||||
|
|
||||||
### 3.4 音频录制
|
### 3.4 音频录制
|
||||||
|
|
||||||
支持两种音频后端:
|
支持两种音频后端(两个 STT 后端共享):
|
||||||
- **macOS 原生音频**:优先使用,低延迟
|
- **macOS 原生音频**:优先使用,低延迟
|
||||||
- **SoX(Sound eXchange)**:回退方案,跨平台
|
- **SoX(Sound eXchange)**:回退方案,跨平台
|
||||||
|
|
||||||
音频流通过 WebSocket 发送到 Anthropic 的 Nova 3 STT 模型。
|
### 3.5 豆包 ASR 适配器设计
|
||||||
|
|
||||||
|
文件:`src/services/doubaoSTT.ts`
|
||||||
|
|
||||||
|
豆包后端使用适配器模式,将 `doubaoime-asr` 的 AsyncGenerator 协议桥接到 `VoiceStreamConnection` 接口:
|
||||||
|
|
||||||
|
**AudioChunkQueue** — push 式异步队列:
|
||||||
|
- 实现 `AsyncIterable<Uint8Array>` 接口
|
||||||
|
- `push(chunk)` 将音频数据入队,`push(null)` 发送结束信号
|
||||||
|
- 内部维护等待者(waiting)和缓冲队列(chunks)两个状态
|
||||||
|
|
||||||
|
**connectDoubaoStream()** — 连接入口:
|
||||||
|
- 动态导入 `doubaoime-asr`(optionalDependencies)
|
||||||
|
- 从 `~/.claude/tts/doubao/credentials.json` 加载凭证
|
||||||
|
- 创建 AudioChunkQueue 和 VoiceStreamConnection
|
||||||
|
- 立即触发 `onReady`(避免与 useVoice 的音频缓冲死锁)
|
||||||
|
- `finalize()` 立即返回(豆包在录音过程中已返回结果)
|
||||||
|
- 后台 async IIFE 消费 `transcribeRealtime` generator,映射响应类型到回调
|
||||||
|
|
||||||
|
**响应类型映射**:
|
||||||
|
|
||||||
|
| doubaoime-asr ResponseType | 回调映射 |
|
||||||
|
|----------------------------|----------|
|
||||||
|
| SESSION_STARTED | 日志记录 |
|
||||||
|
| VAD_START | 日志记录 |
|
||||||
|
| INTERIM_RESULT | `onTranscript(text, false)` |
|
||||||
|
| FINAL_RESULT | `onTranscript(text, true)` |
|
||||||
|
| ERROR | `onError(errorMsg)` |
|
||||||
|
| SESSION_FINISHED | 日志记录 |
|
||||||
|
|
||||||
|
### 3.6 后端选择逻辑
|
||||||
|
|
||||||
|
文件:`src/hooks/useVoice.ts`
|
||||||
|
|
||||||
|
```ts
|
||||||
|
// 判断当前 provider
|
||||||
|
isDoubaoProvider() → 读取 settings.voiceProvider
|
||||||
|
|
||||||
|
// handleKeyEvent 中的可用性检查
|
||||||
|
const sttAvailable = isDoubaoProvider()
|
||||||
|
? isDoubaoAvailableSync() // 乐观检查(首次返回 true)
|
||||||
|
: isVoiceStreamAvailable() // Anthropic WebSocket 检查
|
||||||
|
|
||||||
|
// attemptConnect 中的连接函数选择
|
||||||
|
const connectFn = isDoubaoProvider()
|
||||||
|
? connectDoubaoStream
|
||||||
|
: connectVoiceStream
|
||||||
|
```
|
||||||
|
|
||||||
|
豆包后端的特殊处理:
|
||||||
|
- 跳过 `getVoiceKeyterms()` 调用(豆包无需关键词提示)
|
||||||
|
- 跳过 Focus Mode(`if (!enabled || !focusMode || isDoubaoProvider())`)
|
||||||
|
|
||||||
## 四、关键设计决策
|
## 四、关键设计决策
|
||||||
|
|
||||||
1. **OAuth 独占**:语音模式使用 `voice_stream` 端点(claude.ai),仅 Anthropic OAuth 用户可用。API key、Bedrock、Vertex 用户无法使用
|
1. **双后端共存**:豆包后端作为独立适配器与 Anthropic 后端并存,不替换原有流程,通过 `voiceProvider` 设置切换
|
||||||
2. **GrowthBook 负向门控**:`tengu_amber_quartz_disabled` 默认 `false`,新安装自动可用(无需等 GrowthBook 初始化)
|
2. **设置持久化**:`voiceProvider` 存储在 `settings.json`,通过 `/voice` 命令修改,跨会话生效
|
||||||
3. **Keychain 缓存**:`getClaudeAIOAuthTokens()` 首次调用访问 macOS keychain(~20-50ms),后续缓存命中
|
3. **OAuth 独占(Anthropic)**:Anthropic 后端使用 `voice_stream` 端点(claude.ai),仅 OAuth 用户可用
|
||||||
4. **独立于主 feature flag**:`isVoiceGrowthBookEnabled()` 在 feature flag 关闭时短路返回 `false`,不触发任何模块加载
|
4. **豆包无需 OAuth**:豆包后端使用独立凭证文件,不依赖 Anthropic 认证,通过 `isVoiceAvailable()` 放宽门控
|
||||||
|
5. **GrowthBook 负向门控**:`tengu_amber_quartz_disabled` 默认 `false`,新安装自动可用
|
||||||
|
6. **onReady 立即触发**:豆包后端在连接建立后立即触发 `onReady`,避免与 useVoice 音频缓冲的时序死锁(Anthropic 需要等待 WebSocket 握手)
|
||||||
|
7. **finalize() 立即返回**:豆包在录音过程中已返回所有结果,用户抬手时无需等待处理
|
||||||
|
8. **乐观可用性检查**:`isDoubaoAvailableSync()` 在首次调用时返回 `true`,实际导入错误在 `connectDoubaoStream` 中处理
|
||||||
|
9. **optionalDependencies**:`doubaoime-asr` 作为可选依赖,安装失败不影响 Anthropic 后端
|
||||||
|
|
||||||
## 五、使用方式
|
## 五、使用方式
|
||||||
|
|
||||||
@@ -100,26 +204,60 @@ WebSocket 连接到 Anthropic STT 端点
|
|||||||
# 启用 feature
|
# 启用 feature
|
||||||
FEATURE_VOICE_MODE=1 bun run dev
|
FEATURE_VOICE_MODE=1 bun run dev
|
||||||
|
|
||||||
# 在 REPL 中使用
|
# 在 REPL 中使用 Anthropic 后端
|
||||||
# 1. 确保已通过 OAuth 登录(claude.ai 订阅)
|
# 1. 确保已通过 OAuth 登录(claude.ai 订阅)
|
||||||
# 2. 按住空格键说话
|
# 2. 输入 /voice 启用
|
||||||
# 3. 释放空格键等待转录
|
# 3. 按住空格键说话
|
||||||
# 4. 或使用 /voice 命令切换开关
|
# 4. 释放空格键等待转录
|
||||||
|
|
||||||
|
# 在 REPL 中使用豆包 ASR 后端
|
||||||
|
# 1. 确保 doubaoime-asr 已安装(bun add doubaoime-asr)
|
||||||
|
# 2. 配置凭证文件:~/.claude/tts/doubao/credentials.json
|
||||||
|
# 3. 输入 /voice doubao 启用
|
||||||
|
# 4. 按住空格键说话
|
||||||
|
# 5. 释放空格键,转录结果即刻显示
|
||||||
|
|
||||||
|
# 切换后端
|
||||||
|
/voice doubao # 切换到豆包 ASR
|
||||||
|
/voice anthropic # 切换回 Anthropic STT
|
||||||
|
/voice # 关闭语音模式
|
||||||
|
```
|
||||||
|
|
||||||
|
### 豆包凭证配置
|
||||||
|
|
||||||
|
凭证文件路径:`~/.claude/tts/doubao/credentials.json`
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"deviceId": "...",
|
||||||
|
"installId": "...",
|
||||||
|
"cdid": "...",
|
||||||
|
"openudid": "...",
|
||||||
|
"clientudid": "...",
|
||||||
|
"token": "..."
|
||||||
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
## 六、外部依赖
|
## 六、外部依赖
|
||||||
|
|
||||||
| 依赖 | 说明 |
|
| 依赖 | 说明 | 适用后端 |
|
||||||
|------|------|
|
|------|------|----------|
|
||||||
| Anthropic OAuth | claude.ai 订阅登录,非 API key |
|
| Anthropic OAuth | claude.ai 订阅登录,非 API key | Anthropic |
|
||||||
| GrowthBook | `tengu_amber_quartz_disabled` 紧急关闭 |
|
| GrowthBook | `tengu_amber_quartz_disabled` 紧急关闭 | 通用 |
|
||||||
| macOS 原生音频 或 SoX | 音频录制 |
|
| macOS 原生音频 或 SoX | 音频录制 | 通用 |
|
||||||
| Nova 3 STT | 语音转文本模型 |
|
| Nova 3 STT | Anthropic 语音转文本模型 | Anthropic |
|
||||||
|
| doubaoime-asr | 豆包 ASR SDK(optionalDependencies) | 豆包 |
|
||||||
|
| 凭证文件 | `~/.claude/tts/doubao/credentials.json` | 豆包 |
|
||||||
|
|
||||||
## 七、文件索引
|
## 七、文件索引
|
||||||
|
|
||||||
| 文件 | 行数 | 职责 |
|
| 文件 | 职责 |
|
||||||
|------|------|------|
|
|------|------|
|
||||||
| `src/voice/voiceModeEnabled.ts` | 54 | 三层门控逻辑 |
|
| `src/voice/voiceModeEnabled.ts` | 三层门控逻辑 + `isVoiceAvailable()` |
|
||||||
| `src/hooks/useVoice.ts` | — | React hook(录音状态 + WebSocket) |
|
| `src/hooks/useVoice.ts` | React hook(录音状态 + 后端选择 + 连接管理) |
|
||||||
| `src/services/voiceStreamSTT.ts` | — | STT WebSocket 流式传输 |
|
| `src/hooks/useVoiceEnabled.ts` | 语音启用状态 hook(按 provider 决定 OAuth 检查) |
|
||||||
|
| `src/services/voiceStreamSTT.ts` | Anthropic STT WebSocket 流式传输 |
|
||||||
|
| `src/services/doubaoSTT.ts` | 豆包 ASR 适配器(AudioChunkQueue + connectDoubaoStream) |
|
||||||
|
| `src/commands/voice/voice.ts` | `/voice` 命令(开关 + 后端选择) |
|
||||||
|
| `src/commands/voice/index.ts` | 命令注册(去除 availability 限制) |
|
||||||
|
| `src/utils/settings/types.ts` | `voiceProvider` 类型定义 |
|
||||||
|
|||||||
@@ -205,5 +205,8 @@
|
|||||||
"xss": "^1.0.15",
|
"xss": "^1.0.15",
|
||||||
"yaml": "^2.8.3",
|
"yaml": "^2.8.3",
|
||||||
"zod": "^4.3.6"
|
"zod": "^4.3.6"
|
||||||
|
},
|
||||||
|
"optionalDependencies": {
|
||||||
|
"doubaoime-asr": "^0.1.0"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,17 +1,15 @@
|
|||||||
import type { Command } from '../../commands.js'
|
import type { Command } from '../../commands.js'
|
||||||
import {
|
import {
|
||||||
isVoiceGrowthBookEnabled,
|
isVoiceAvailable,
|
||||||
isVoiceModeEnabled,
|
|
||||||
} from '../../voice/voiceModeEnabled.js'
|
} from '../../voice/voiceModeEnabled.js'
|
||||||
|
|
||||||
const voice = {
|
const voice = {
|
||||||
type: 'local',
|
type: 'local',
|
||||||
name: 'voice',
|
name: 'voice',
|
||||||
description: 'Toggle voice mode',
|
description: 'Toggle voice mode. Use /voice doubao for Doubao ASR backend',
|
||||||
availability: ['claude-ai'],
|
isEnabled: () => isVoiceAvailable(),
|
||||||
isEnabled: () => isVoiceGrowthBookEnabled(),
|
|
||||||
get isHidden() {
|
get isHidden() {
|
||||||
return !isVoiceModeEnabled()
|
return !isVoiceAvailable()
|
||||||
},
|
},
|
||||||
supportsNonInteractive: false,
|
supportsNonInteractive: false,
|
||||||
load: () => import('./voice.js'),
|
load: () => import('./voice.js'),
|
||||||
|
|||||||
@@ -2,29 +2,19 @@ import { normalizeLanguageForSTT } from '../../hooks/useVoice.js'
|
|||||||
import { getShortcutDisplay } from '../../keybindings/shortcutFormat.js'
|
import { getShortcutDisplay } from '../../keybindings/shortcutFormat.js'
|
||||||
import { logEvent } from '../../services/analytics/index.js'
|
import { logEvent } from '../../services/analytics/index.js'
|
||||||
import type { LocalCommandCall } from '../../types/command.js'
|
import type { LocalCommandCall } from '../../types/command.js'
|
||||||
import { isAnthropicAuthEnabled } from '../../utils/auth.js'
|
|
||||||
import { getGlobalConfig, saveGlobalConfig } from '../../utils/config.js'
|
import { getGlobalConfig, saveGlobalConfig } from '../../utils/config.js'
|
||||||
import { settingsChangeDetector } from '../../utils/settings/changeDetector.js'
|
import { settingsChangeDetector } from '../../utils/settings/changeDetector.js'
|
||||||
import {
|
import {
|
||||||
getInitialSettings,
|
getInitialSettings,
|
||||||
updateSettingsForSource,
|
updateSettingsForSource,
|
||||||
} from '../../utils/settings/settings.js'
|
} from '../../utils/settings/settings.js'
|
||||||
import { isVoiceModeEnabled } from '../../voice/voiceModeEnabled.js'
|
import { isVoiceAvailable } from '../../voice/voiceModeEnabled.js'
|
||||||
|
|
||||||
const LANG_HINT_MAX_SHOWS = 2
|
const LANG_HINT_MAX_SHOWS = 2
|
||||||
|
|
||||||
export const call: LocalCommandCall = async () => {
|
export const call: LocalCommandCall = async (args) => {
|
||||||
// Check auth and kill-switch before allowing voice mode
|
// Check kill-switch before allowing voice mode
|
||||||
if (!isVoiceModeEnabled()) {
|
if (!isVoiceAvailable()) {
|
||||||
// Differentiate: OAuth-less users get an auth hint, everyone else
|
|
||||||
// gets nothing (command shouldn't be reachable when the kill-switch is on).
|
|
||||||
if (!isAnthropicAuthEnabled()) {
|
|
||||||
return {
|
|
||||||
type: 'text' as const,
|
|
||||||
value:
|
|
||||||
'Voice mode requires a Claude.ai account. Please run /login to sign in.',
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return {
|
return {
|
||||||
type: 'text' as const,
|
type: 'text' as const,
|
||||||
value: 'Voice mode is not available.',
|
value: 'Voice mode is not available.',
|
||||||
@@ -33,6 +23,47 @@ export const call: LocalCommandCall = async () => {
|
|||||||
|
|
||||||
const currentSettings = getInitialSettings()
|
const currentSettings = getInitialSettings()
|
||||||
const isCurrentlyEnabled = currentSettings.voiceEnabled === true
|
const isCurrentlyEnabled = currentSettings.voiceEnabled === true
|
||||||
|
const providerArg = args?.trim().toLowerCase()
|
||||||
|
|
||||||
|
// Handle provider argument when already enabled — switch backend only
|
||||||
|
if (isCurrentlyEnabled && providerArg === 'doubao') {
|
||||||
|
const result = updateSettingsForSource('userSettings', {
|
||||||
|
voiceProvider: 'doubao',
|
||||||
|
})
|
||||||
|
if (result.error) {
|
||||||
|
return {
|
||||||
|
type: 'text' as const,
|
||||||
|
value:
|
||||||
|
'Failed to update settings. Check your settings file for syntax errors.',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
settingsChangeDetector.notifyChange('userSettings')
|
||||||
|
const key = getShortcutDisplay('voice:pushToTalk', 'Chat', 'Space')
|
||||||
|
return {
|
||||||
|
type: 'text' as const,
|
||||||
|
value: `Voice mode switched to Doubao ASR. Hold ${key} to record.`,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle provider argument when already enabled — switch to anthropic
|
||||||
|
if (isCurrentlyEnabled && providerArg === 'anthropic') {
|
||||||
|
const result = updateSettingsForSource('userSettings', {
|
||||||
|
voiceProvider: 'anthropic',
|
||||||
|
})
|
||||||
|
if (result.error) {
|
||||||
|
return {
|
||||||
|
type: 'text' as const,
|
||||||
|
value:
|
||||||
|
'Failed to update settings. Check your settings file for syntax errors.',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
settingsChangeDetector.notifyChange('userSettings')
|
||||||
|
const key = getShortcutDisplay('voice:pushToTalk', 'Chat', 'Space')
|
||||||
|
return {
|
||||||
|
type: 'text' as const,
|
||||||
|
value: `Voice mode switched to Anthropic STT. Hold ${key} to record.`,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Toggle OFF — no checks needed
|
// Toggle OFF — no checks needed
|
||||||
if (isCurrentlyEnabled) {
|
if (isCurrentlyEnabled) {
|
||||||
@@ -54,7 +85,10 @@ export const call: LocalCommandCall = async () => {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Toggle ON — run pre-flight checks first
|
// Toggle ON — determine provider from argument or default
|
||||||
|
const provider = providerArg === 'doubao' ? 'doubao' : 'anthropic'
|
||||||
|
|
||||||
|
// Run pre-flight checks
|
||||||
const { isVoiceStreamAvailable } = await import(
|
const { isVoiceStreamAvailable } = await import(
|
||||||
'../../services/voiceStreamSTT.js'
|
'../../services/voiceStreamSTT.js'
|
||||||
)
|
)
|
||||||
@@ -70,8 +104,8 @@ export const call: LocalCommandCall = async () => {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check for API key
|
// Check for API key (only for Anthropic backend — Doubao uses its own credentials)
|
||||||
if (!isVoiceStreamAvailable()) {
|
if (provider !== 'doubao' && !isVoiceStreamAvailable()) {
|
||||||
return {
|
return {
|
||||||
type: 'text' as const,
|
type: 'text' as const,
|
||||||
value:
|
value:
|
||||||
@@ -111,8 +145,11 @@ export const call: LocalCommandCall = async () => {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// All checks passed — enable voice
|
// All checks passed — enable voice with provider
|
||||||
const result = updateSettingsForSource('userSettings', { voiceEnabled: true })
|
const result = updateSettingsForSource('userSettings', {
|
||||||
|
voiceEnabled: true,
|
||||||
|
...(provider === 'doubao' ? { voiceProvider: 'doubao' } : {}),
|
||||||
|
})
|
||||||
if (result.error) {
|
if (result.error) {
|
||||||
return {
|
return {
|
||||||
type: 'text' as const,
|
type: 'text' as const,
|
||||||
@@ -123,28 +160,30 @@ export const call: LocalCommandCall = async () => {
|
|||||||
settingsChangeDetector.notifyChange('userSettings')
|
settingsChangeDetector.notifyChange('userSettings')
|
||||||
logEvent('tengu_voice_toggled', { enabled: true })
|
logEvent('tengu_voice_toggled', { enabled: true })
|
||||||
const key = getShortcutDisplay('voice:pushToTalk', 'Chat', 'Space')
|
const key = getShortcutDisplay('voice:pushToTalk', 'Chat', 'Space')
|
||||||
const stt = normalizeLanguageForSTT(currentSettings.language)
|
|
||||||
const cfg = getGlobalConfig()
|
|
||||||
// Reset the hint counter whenever the resolved STT language changes
|
|
||||||
// (including first-ever enable, where lastLanguage is undefined).
|
|
||||||
const langChanged = cfg.voiceLangHintLastLanguage !== stt.code
|
|
||||||
const priorCount = langChanged ? 0 : (cfg.voiceLangHintShownCount ?? 0)
|
|
||||||
const showHint = !stt.fellBackFrom && priorCount < LANG_HINT_MAX_SHOWS
|
|
||||||
let langNote = ''
|
let langNote = ''
|
||||||
if (stt.fellBackFrom) {
|
const providerLabel = provider === 'doubao' ? 'Doubao ASR' : 'Anthropic'
|
||||||
langNote = ` Note: "${stt.fellBackFrom}" is not a supported dictation language; using English. Change it via /config.`
|
// Doubao backend handles all languages natively — skip language hints
|
||||||
} else if (showHint) {
|
if (provider !== 'doubao') {
|
||||||
langNote = ` Dictation language: ${stt.code} (/config to change).`
|
const stt = normalizeLanguageForSTT(currentSettings.language)
|
||||||
}
|
const cfg = getGlobalConfig()
|
||||||
if (langChanged || showHint) {
|
const langChanged = cfg.voiceLangHintLastLanguage !== stt.code
|
||||||
saveGlobalConfig(prev => ({
|
const priorCount = langChanged ? 0 : (cfg.voiceLangHintShownCount ?? 0)
|
||||||
...prev,
|
const showHint = !stt.fellBackFrom && priorCount < LANG_HINT_MAX_SHOWS
|
||||||
voiceLangHintShownCount: priorCount + (showHint ? 1 : 0),
|
if (stt.fellBackFrom) {
|
||||||
voiceLangHintLastLanguage: stt.code,
|
langNote = ` Note: "${stt.fellBackFrom}" is not a supported dictation language; using English. Change it via /config.`
|
||||||
}))
|
} else if (showHint) {
|
||||||
|
langNote = ` Dictation language: ${stt.code} (/config to change).`
|
||||||
|
}
|
||||||
|
if (langChanged || showHint) {
|
||||||
|
saveGlobalConfig(prev => ({
|
||||||
|
...prev,
|
||||||
|
voiceLangHintShownCount: priorCount + (showHint ? 1 : 0),
|
||||||
|
voiceLangHintLastLanguage: stt.code,
|
||||||
|
}))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return {
|
return {
|
||||||
type: 'text' as const,
|
type: 'text' as const,
|
||||||
value: `Voice mode enabled. Hold ${key} to record.${langNote}`,
|
value: `Voice mode enabled (${providerLabel}). Hold ${key} to record.${langNote}`,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -20,6 +20,10 @@ import {
|
|||||||
isVoiceStreamAvailable,
|
isVoiceStreamAvailable,
|
||||||
type VoiceStreamConnection,
|
type VoiceStreamConnection,
|
||||||
} from '../services/voiceStreamSTT.js'
|
} from '../services/voiceStreamSTT.js'
|
||||||
|
import {
|
||||||
|
connectDoubaoStream,
|
||||||
|
isDoubaoAvailableSync,
|
||||||
|
} from '../services/doubaoSTT.js'
|
||||||
import { logForDebugging } from '../utils/debug.js'
|
import { logForDebugging } from '../utils/debug.js'
|
||||||
import { toError } from '../utils/errors.js'
|
import { toError } from '../utils/errors.js'
|
||||||
import { getSystemLocaleLanguage } from '../utils/intl.js'
|
import { getSystemLocaleLanguage } from '../utils/intl.js'
|
||||||
@@ -27,6 +31,10 @@ import { logError } from '../utils/log.js'
|
|||||||
import { getInitialSettings } from '../utils/settings/settings.js'
|
import { getInitialSettings } from '../utils/settings/settings.js'
|
||||||
import { sleep } from '../utils/sleep.js'
|
import { sleep } from '../utils/sleep.js'
|
||||||
|
|
||||||
|
function isDoubaoProvider(): boolean {
|
||||||
|
return getInitialSettings().voiceProvider === 'doubao'
|
||||||
|
}
|
||||||
|
|
||||||
// ─── Language normalization ─────────────────────────────────────────────
|
// ─── Language normalization ─────────────────────────────────────────────
|
||||||
|
|
||||||
const DEFAULT_STT_LANGUAGE = 'en'
|
const DEFAULT_STT_LANGUAGE = 'en'
|
||||||
@@ -574,7 +582,7 @@ export function useVoice({
|
|||||||
// stop when it loses focus. This enables a "multi-clauding army"
|
// stop when it loses focus. This enables a "multi-clauding army"
|
||||||
// workflow where voice input follows window focus.
|
// workflow where voice input follows window focus.
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (!enabled || !focusMode) {
|
if (!enabled || !focusMode || isDoubaoProvider()) {
|
||||||
// Focus mode was disabled while a focus-driven recording was active —
|
// Focus mode was disabled while a focus-driven recording was active —
|
||||||
// stop the recording so it doesn't linger until the silence timer fires.
|
// stop the recording so it doesn't linger until the silence timer fires.
|
||||||
if (focusTriggeredRef.current && stateRef.current === 'recording') {
|
if (focusTriggeredRef.current && stateRef.current === 'recording') {
|
||||||
@@ -778,7 +786,11 @@ export function useVoice({
|
|||||||
|
|
||||||
const attemptConnect = (keyterms: string[]): void => {
|
const attemptConnect = (keyterms: string[]): void => {
|
||||||
const myAttemptGen = attemptGenRef.current
|
const myAttemptGen = attemptGenRef.current
|
||||||
void connectVoiceStream(
|
// Select STT backend based on settings.voiceProvider
|
||||||
|
const connectFn = isDoubaoProvider()
|
||||||
|
? (cbs: Parameters<typeof connectDoubaoStream>[0], opts: Parameters<typeof connectDoubaoStream>[1]) => connectDoubaoStream(cbs, opts)
|
||||||
|
: (cbs: Parameters<typeof connectVoiceStream>[0], opts: Parameters<typeof connectVoiceStream>[1]) => connectVoiceStream(cbs, opts)
|
||||||
|
void connectFn(
|
||||||
{
|
{
|
||||||
onTranscript: (text: string, isFinal: boolean) => {
|
onTranscript: (text: string, isFinal: boolean) => {
|
||||||
if (isStale()) return
|
if (isStale()) return
|
||||||
@@ -1007,7 +1019,12 @@ export function useVoice({
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
void getVoiceKeyterms().then(attemptConnect)
|
// Doubao backend doesn't use keyterms — skip the async fetch
|
||||||
|
if (isDoubaoProvider()) {
|
||||||
|
attemptConnect([])
|
||||||
|
} else {
|
||||||
|
void getVoiceKeyterms().then(attemptConnect)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Hold-to-talk handler ────────────────────────────────────────────
|
// ── Hold-to-talk handler ────────────────────────────────────────────
|
||||||
@@ -1021,7 +1038,8 @@ export function useVoice({
|
|||||||
// delay of ~500ms on macOS).
|
// delay of ~500ms on macOS).
|
||||||
const handleKeyEvent = useCallback(
|
const handleKeyEvent = useCallback(
|
||||||
(fallbackMs = REPEAT_FALLBACK_MS): void => {
|
(fallbackMs = REPEAT_FALLBACK_MS): void => {
|
||||||
if (!enabled || !isVoiceStreamAvailable()) {
|
const sttAvailable = isDoubaoProvider() ? isDoubaoAvailableSync() : isVoiceStreamAvailable()
|
||||||
|
if (!enabled || !sttAvailable) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -7,19 +7,22 @@ import {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Combines user intent (settings.voiceEnabled) with auth + GB kill-switch.
|
* Combines user intent (settings.voiceEnabled) with auth + GB kill-switch.
|
||||||
|
* When using Doubao backend, auth check is skipped (Doubao has its own credentials).
|
||||||
* Only the auth half is memoized on authVersion — it's the expensive one
|
* Only the auth half is memoized on authVersion — it's the expensive one
|
||||||
* (cold getClaudeAIOAuthTokens memoize → sync `security` spawn, ~60ms/call,
|
* (cold getClaudeAIOAuthTokens memoize → sync `security` spawn, ~60ms/call,
|
||||||
* ~180ms total in profile v5 when token refresh cleared the cache mid-session).
|
* ~180ms total in profile v5 when token refresh cleared the cache mid-session).
|
||||||
* GB is a cheap cached-map lookup and stays outside the memo so a mid-session
|
* GB is a cheap cached-map lookup and stays outside the memo so a mid-session
|
||||||
* kill-switch flip still takes effect on the next render.
|
* kill-switch flip still takes effect on the next render.
|
||||||
*
|
|
||||||
* authVersion bumps on /login only. Background token refresh leaves it alone
|
|
||||||
* (user is still authed), so the auth memo stays correct without re-eval.
|
|
||||||
*/
|
*/
|
||||||
export function useVoiceEnabled(): boolean {
|
export function useVoiceEnabled(): boolean {
|
||||||
const userIntent = useAppState(s => s.settings.voiceEnabled === true)
|
const userIntent = useAppState(s => s.settings.voiceEnabled === true)
|
||||||
|
const provider = useAppState(s => s.settings.voiceProvider)
|
||||||
|
// All hooks must be called unconditionally (Rules of Hooks)
|
||||||
const authVersion = useAppState(s => s.authVersion)
|
const authVersion = useAppState(s => s.authVersion)
|
||||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||||
const authed = useMemo(hasVoiceAuth, [authVersion])
|
const authed = useMemo(hasVoiceAuth, [authVersion])
|
||||||
|
if (provider === 'doubao') {
|
||||||
|
return userIntent && isVoiceGrowthBookEnabled()
|
||||||
|
}
|
||||||
return userIntent && authed && isVoiceGrowthBookEnabled()
|
return userIntent && authed && isVoiceGrowthBookEnabled()
|
||||||
}
|
}
|
||||||
|
|||||||
230
src/services/doubaoSTT.ts
Normal file
230
src/services/doubaoSTT.ts
Normal file
@@ -0,0 +1,230 @@
|
|||||||
|
// Doubao (豆包) ASR speech-to-text adapter for voice mode.
|
||||||
|
//
|
||||||
|
// Wraps the doubaoime-asr npm package to expose the same interface as
|
||||||
|
// voiceStreamSTT.ts. The doubao backend uses an AsyncGenerator-based
|
||||||
|
// streaming protocol internally; this adapter bridges it to the
|
||||||
|
// send/finalize/close pattern used by useVoice.ts.
|
||||||
|
|
||||||
|
import { homedir } from 'node:os'
|
||||||
|
import type { ASRResponse } from 'doubaoime-asr'
|
||||||
|
import type { FinalizeSource, VoiceStreamCallbacks, VoiceStreamConnection } from './voiceStreamSTT.js'
|
||||||
|
import { logForDebugging } from '../utils/debug.js'
|
||||||
|
import { logError } from '../utils/log.js'
|
||||||
|
|
||||||
|
// Re-export FinalizeSource so useVoice can import from either module
|
||||||
|
export type { FinalizeSource } from './voiceStreamSTT.js'
|
||||||
|
|
||||||
|
// Maximum time to wait for the generator to finish after end-of-stream signal.
|
||||||
|
const FINALIZE_SAFETY_TIMEOUT_MS = 5_000
|
||||||
|
|
||||||
|
// ─── AsyncIterable audio queue ─────────────────────────────────────────
|
||||||
|
|
||||||
|
// A push-based queue that implements AsyncIterable<Uint8Array>.
|
||||||
|
// send() pushes chunks; push(null) signals end-of-stream.
|
||||||
|
class AudioChunkQueue {
|
||||||
|
private chunks: (Uint8Array | null)[] = []
|
||||||
|
private waiting: ((result: IteratorResult<Uint8Array>) => void) | null = null
|
||||||
|
private done = false
|
||||||
|
|
||||||
|
push(chunk: Uint8Array | null): void {
|
||||||
|
if (this.done) return
|
||||||
|
if (chunk === null) {
|
||||||
|
this.done = true
|
||||||
|
if (this.waiting) {
|
||||||
|
const resolve = this.waiting
|
||||||
|
this.waiting = null
|
||||||
|
resolve({ value: undefined, done: true })
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if (this.waiting) {
|
||||||
|
const resolve = this.waiting
|
||||||
|
this.waiting = null
|
||||||
|
resolve({ value: chunk, done: false })
|
||||||
|
} else {
|
||||||
|
this.chunks.push(chunk)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
abort(): void {
|
||||||
|
this.done = true
|
||||||
|
this.chunks.length = 0
|
||||||
|
if (this.waiting) {
|
||||||
|
const resolve = this.waiting
|
||||||
|
this.waiting = null
|
||||||
|
resolve({ value: undefined, done: true })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[Symbol.asyncIterator](): AsyncIterator<Uint8Array> {
|
||||||
|
return {
|
||||||
|
next: async (): Promise<IteratorResult<Uint8Array>> => {
|
||||||
|
if (this.chunks.length > 0) {
|
||||||
|
const chunk = this.chunks.shift()!
|
||||||
|
return { value: chunk, done: false }
|
||||||
|
}
|
||||||
|
if (this.done) {
|
||||||
|
return { value: undefined, done: true }
|
||||||
|
}
|
||||||
|
return new Promise<IteratorResult<Uint8Array>>((resolve) => {
|
||||||
|
this.waiting = resolve
|
||||||
|
})
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Availability ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
let doubaoAvailable: boolean | null = null
|
||||||
|
|
||||||
|
export async function isDoubaoAvailable(): Promise<boolean> {
|
||||||
|
if (doubaoAvailable !== null) return doubaoAvailable
|
||||||
|
try {
|
||||||
|
await import('doubaoime-asr')
|
||||||
|
doubaoAvailable = true
|
||||||
|
} catch {
|
||||||
|
doubaoAvailable = false
|
||||||
|
}
|
||||||
|
return doubaoAvailable
|
||||||
|
}
|
||||||
|
|
||||||
|
// Synchronous check — returns cached result or optimistic true when
|
||||||
|
// VOICE_PROVIDER=doubao is set and no cached result exists yet.
|
||||||
|
// The actual import happens in connectDoubaoStream which reports errors.
|
||||||
|
export function isDoubaoAvailableSync(): boolean {
|
||||||
|
if (doubaoAvailable !== null) return doubaoAvailable
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Connection ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export async function connectDoubaoStream(
|
||||||
|
callbacks: VoiceStreamCallbacks,
|
||||||
|
_options?: { language?: string },
|
||||||
|
): Promise<VoiceStreamConnection | null> {
|
||||||
|
let doubaoAsr: typeof import('doubaoime-asr')
|
||||||
|
try {
|
||||||
|
doubaoAsr = await import('doubaoime-asr')
|
||||||
|
} catch {
|
||||||
|
logError(new Error('[doubao-asr] Failed to import doubaoime-asr package'))
|
||||||
|
callbacks.onError('doubaoime-asr package is not installed. Install it with: bun add doubaoime-asr', { fatal: true })
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
|
const { transcribeRealtime, ASRConfig, ResponseType } = doubaoAsr
|
||||||
|
|
||||||
|
const queue = new AudioChunkQueue()
|
||||||
|
let finalized = false
|
||||||
|
|
||||||
|
// Resolve handle for finalize() promise — wrapped in an object to avoid
|
||||||
|
// TypeScript closure-scope type narrowing issues (TS2349 "not callable").
|
||||||
|
const finalizeHandle: { resolve: ((source: FinalizeSource) => void) | null } = { resolve: null }
|
||||||
|
|
||||||
|
const connection: VoiceStreamConnection = {
|
||||||
|
send(audioChunk: Buffer): void {
|
||||||
|
if (finalized) return
|
||||||
|
queue.push(new Uint8Array(audioChunk.buffer, audioChunk.byteOffset, audioChunk.byteLength))
|
||||||
|
},
|
||||||
|
finalize(): Promise<FinalizeSource> {
|
||||||
|
if (finalized) return Promise.resolve<FinalizeSource>('ws_already_closed')
|
||||||
|
finalized = true
|
||||||
|
queue.push(null) // signal end-of-stream to the generator
|
||||||
|
// Doubao returns FINAL_RESULT during recording — by the time the user
|
||||||
|
// releases the key, all transcripts are already in accumulatedRef.
|
||||||
|
// Resolve immediately so the UI skips the 'processing' state and goes
|
||||||
|
// straight to displaying the result.
|
||||||
|
logForDebugging('[doubao-asr] Finalize — resolving immediately')
|
||||||
|
return Promise.resolve<FinalizeSource>('post_closestream_endpoint')
|
||||||
|
},
|
||||||
|
close(): void {
|
||||||
|
finalized = true
|
||||||
|
queue.abort()
|
||||||
|
const r = finalizeHandle.resolve
|
||||||
|
finalizeHandle.resolve = null
|
||||||
|
if (r) r('ws_close')
|
||||||
|
callbacks.onClose()
|
||||||
|
},
|
||||||
|
isConnected(): boolean {
|
||||||
|
return true
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start the ASR session in the background
|
||||||
|
const config = new ASRConfig({ credentialPath: `${homedir()}/.claude/tts/doubao/credentials.json` })
|
||||||
|
|
||||||
|
// Ensure credentials are initialized (may auto-generate)
|
||||||
|
try {
|
||||||
|
await config.ensureCredentials()
|
||||||
|
} catch (err) {
|
||||||
|
logError(new Error(`[doubao-asr] Credential initialization failed: ${String(err)}`))
|
||||||
|
callbacks.onError(`Doubao ASR 凭证初始化失败: ${String(err)}`, { fatal: true })
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fire onReady immediately — unlike the Anthropic WebSocket which needs to
|
||||||
|
// wait for a handshake, the doubao backend accepts audio through the queue
|
||||||
|
// and handles connection internally. The caller (useVoice.ts) needs onReady
|
||||||
|
// to fire before it will route audio chunks via connection.send().
|
||||||
|
logForDebugging('[doubao-asr] Firing onReady immediately')
|
||||||
|
callbacks.onReady(connection)
|
||||||
|
|
||||||
|
// Consume the AsyncGenerator in the background
|
||||||
|
void (async () => {
|
||||||
|
try {
|
||||||
|
const audioSource: AsyncIterable<Uint8Array> = queue
|
||||||
|
const gen: AsyncGenerator<ASRResponse> = transcribeRealtime(audioSource, { config })
|
||||||
|
|
||||||
|
for await (const resp of gen) {
|
||||||
|
if (finalized && resp.type !== ResponseType.FINAL_RESULT && resp.type !== ResponseType.SESSION_FINISHED) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (resp.type) {
|
||||||
|
case ResponseType.SESSION_STARTED:
|
||||||
|
logForDebugging('[doubao-asr] Session started')
|
||||||
|
break
|
||||||
|
case ResponseType.VAD_START:
|
||||||
|
logForDebugging('[doubao-asr] VAD detected speech start')
|
||||||
|
break
|
||||||
|
case ResponseType.INTERIM_RESULT:
|
||||||
|
if (resp.text) {
|
||||||
|
callbacks.onTranscript(resp.text, false)
|
||||||
|
}
|
||||||
|
break
|
||||||
|
case ResponseType.FINAL_RESULT:
|
||||||
|
if (resp.text) {
|
||||||
|
callbacks.onTranscript(resp.text, true)
|
||||||
|
}
|
||||||
|
break
|
||||||
|
case ResponseType.ERROR:
|
||||||
|
logError(new Error(`[doubao-asr] Error: ${resp.errorMsg}`))
|
||||||
|
if (!finalized) {
|
||||||
|
callbacks.onError(resp.errorMsg || 'Doubao ASR 识别错误')
|
||||||
|
}
|
||||||
|
break
|
||||||
|
case ResponseType.SESSION_FINISHED:
|
||||||
|
logForDebugging('[doubao-asr] Session finished')
|
||||||
|
break
|
||||||
|
default:
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generator exhausted naturally
|
||||||
|
const r = finalizeHandle.resolve
|
||||||
|
finalizeHandle.resolve = null
|
||||||
|
if (r) r('post_closestream_endpoint')
|
||||||
|
} catch (err) {
|
||||||
|
logError(new Error(`[doubao-asr] Stream error: ${String(err)}`))
|
||||||
|
if (!finalized) {
|
||||||
|
callbacks.onError(`Doubao ASR 连接错误: ${String(err)}`)
|
||||||
|
}
|
||||||
|
const r2 = finalizeHandle.resolve
|
||||||
|
finalizeHandle.resolve = null
|
||||||
|
if (r2) r2('ws_close')
|
||||||
|
}
|
||||||
|
})()
|
||||||
|
|
||||||
|
return connection
|
||||||
|
}
|
||||||
@@ -880,6 +880,10 @@ export const SettingsSchema = lazySchema(() =>
|
|||||||
.boolean()
|
.boolean()
|
||||||
.optional()
|
.optional()
|
||||||
.describe('Enable voice mode (hold-to-talk dictation)'),
|
.describe('Enable voice mode (hold-to-talk dictation)'),
|
||||||
|
voiceProvider: z
|
||||||
|
.enum(['anthropic', 'doubao'])
|
||||||
|
.optional()
|
||||||
|
.describe('Voice STT backend: "anthropic" (default) or "doubao" (Doubao ASR)'),
|
||||||
}
|
}
|
||||||
: {}),
|
: {}),
|
||||||
...(feature('KAIROS')
|
...(feature('KAIROS')
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ function makeCommand(name: string, opts?: Partial<Command>): Command {
|
|||||||
type: 'local',
|
type: 'local',
|
||||||
handler: () => {},
|
handler: () => {},
|
||||||
...opts,
|
...opts,
|
||||||
} as Command
|
} as unknown as Command
|
||||||
}
|
}
|
||||||
|
|
||||||
function makePromptCommand(
|
function makePromptCommand(
|
||||||
@@ -37,7 +37,7 @@ function makePromptCommand(
|
|||||||
handler: () => {},
|
handler: () => {},
|
||||||
source: 'userSettings',
|
source: 'userSettings',
|
||||||
...opts,
|
...opts,
|
||||||
} as Command
|
} as unknown as Command
|
||||||
}
|
}
|
||||||
|
|
||||||
// ─── isCommandInput ───────────────────────────────────────────────────
|
// ─── isCommandInput ───────────────────────────────────────────────────
|
||||||
|
|||||||
@@ -44,11 +44,18 @@ export function hasVoiceAuth(): boolean {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Full runtime check: auth + GrowthBook kill-switch. Callers: `/voice`
|
* Full runtime check for Anthropic voice_stream backend.
|
||||||
* (voice.ts, voice/index.ts), ConfigTool, VoiceModeNotice — command-time
|
* Returns true when both auth + GrowthBook kill-switch pass.
|
||||||
* paths where a fresh keychain read is acceptable. For React render
|
|
||||||
* paths use useVoiceEnabled() instead (memoizes the auth half).
|
|
||||||
*/
|
*/
|
||||||
export function isVoiceModeEnabled(): boolean {
|
export function isVoiceModeEnabled(): boolean {
|
||||||
return hasVoiceAuth() && isVoiceGrowthBookEnabled()
|
return hasVoiceAuth() && isVoiceGrowthBookEnabled()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if voice mode can be activated with any STT backend.
|
||||||
|
* Always returns true when VOICE_MODE feature flag is on and GrowthBook
|
||||||
|
* kill-switch is off — the Doubao backend does not require Anthropic auth.
|
||||||
|
*/
|
||||||
|
export function isVoiceAvailable(): boolean {
|
||||||
|
return isVoiceGrowthBookEnabled()
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user