From 622f13253bc43a531ff5b1dbc737aaf87bac26da Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Mon, 16 Mar 2026 20:23:58 -0700 Subject: [PATCH] feat(tts): add microsoft voice listing --- extensions/talk-voice/index.test.ts | 189 ++++++++++++++++++++++++++++ scripts/docs-i18n/util_test.go | 9 ++ src/tts/providers/microsoft.test.ts | 60 +++++++++ src/tts/providers/microsoft.ts | 66 ++++++++++ src/types/node-edge-tts.d.ts | 6 + 5 files changed, 330 insertions(+) create mode 100644 extensions/talk-voice/index.test.ts create mode 100644 src/tts/providers/microsoft.test.ts diff --git a/extensions/talk-voice/index.test.ts b/extensions/talk-voice/index.test.ts new file mode 100644 index 00000000000..6f945e9dd0a --- /dev/null +++ b/extensions/talk-voice/index.test.ts @@ -0,0 +1,189 @@ +import { describe, expect, it, vi } from "vitest"; +import type { OpenClawPluginCommandDefinition } from "../../src/plugins/types.js"; +import { createPluginRuntimeMock } from "../test-utils/plugin-runtime-mock.js"; +import register from "./index.js"; + +function createHarness(config: Record) { + let command: OpenClawPluginCommandDefinition | undefined; + const runtime = createPluginRuntimeMock({ + config: { + loadConfig: vi.fn(() => config), + writeConfigFile: vi.fn().mockResolvedValue(undefined), + }, + tts: { + listVoices: vi.fn(), + }, + }); + const api = { + runtime, + registerCommand: vi.fn((definition: OpenClawPluginCommandDefinition) => { + command = definition; + }), + }; + register(api as never); + if (!command) { + throw new Error("talk-voice command not registered"); + } + return { command, runtime }; +} + +function createCommandContext(args: string, channel: string = "discord") { + return { + args, + channel, + channelId: channel, + isAuthorizedSender: true, + commandBody: args ? `/voice ${args}` : "/voice", + config: {}, + requestConversationBinding: vi.fn(), + detachConversationBinding: vi.fn(), + getCurrentConversationBinding: vi.fn(), + }; +} + +describe("talk-voice plugin", () => { + it("reports active provider status", async () => { + const { command } = createHarness({ + talk: { + provider: "microsoft", + providers: { + microsoft: { + voiceId: "en-US-AvaNeural", + apiKey: "secret-token", + }, + }, + }, + }); + + const result = await command.handler(createCommandContext("")); + + expect(result).toEqual({ + text: + "Talk voice status:\n" + + "- provider: microsoft\n" + + "- talk.voiceId: en-US-AvaNeural\n" + + "- microsoft.apiKey: secret…", + }); + }); + + it("lists voices from the active provider", async () => { + const { command, runtime } = createHarness({ + talk: { + provider: "elevenlabs", + providers: { + elevenlabs: { + apiKey: "sk-eleven", + baseUrl: "https://voices.example.test", + }, + }, + }, + }); + vi.mocked(runtime.tts.listVoices).mockResolvedValue([ + { id: "voice-a", name: "Claudia", category: "general" }, + { id: "voice-b", name: "Bert" }, + ]); + + const result = await command.handler(createCommandContext("list 1")); + + expect(runtime.tts.listVoices).toHaveBeenCalledWith({ + provider: "elevenlabs", + cfg: { + talk: { + provider: "elevenlabs", + providers: { + elevenlabs: { + apiKey: "sk-eleven", + baseUrl: "https://voices.example.test", + }, + }, + }, + }, + apiKey: "sk-eleven", + baseUrl: "https://voices.example.test", + }); + expect(result).toEqual({ + text: + "ElevenLabs voices: 2\n\n" + + "- Claudia · general\n" + + " id: voice-a\n\n" + + "(showing first 1)", + }); + }); + + it("writes canonical talk provider config and legacy elevenlabs voice id", async () => { + const { command, runtime } = createHarness({ + talk: { + provider: "elevenlabs", + providers: { + elevenlabs: { + apiKey: "sk-eleven", + }, + }, + }, + }); + vi.mocked(runtime.tts.listVoices).mockResolvedValue([{ id: "voice-a", name: "Claudia" }]); + + const result = await command.handler(createCommandContext("set Claudia")); + + expect(runtime.config.writeConfigFile).toHaveBeenCalledWith({ + talk: { + provider: "elevenlabs", + providers: { + elevenlabs: { + apiKey: "sk-eleven", + voiceId: "voice-a", + }, + }, + voiceId: "voice-a", + }, + }); + expect(result).toEqual({ + text: "✅ ElevenLabs Talk voice set to Claudia\nvoice-a", + }); + }); + + it("writes provider voice id without legacy top-level field for microsoft", async () => { + const { command, runtime } = createHarness({ + talk: { + provider: "microsoft", + providers: { + microsoft: {}, + }, + }, + }); + vi.mocked(runtime.tts.listVoices).mockResolvedValue([{ id: "en-US-AvaNeural", name: "Ava" }]); + + await command.handler(createCommandContext("set Ava")); + + expect(runtime.config.writeConfigFile).toHaveBeenCalledWith({ + talk: { + provider: "microsoft", + providers: { + microsoft: { + voiceId: "en-US-AvaNeural", + }, + }, + }, + }); + }); + + it("returns provider lookup errors cleanly", async () => { + const { command, runtime } = createHarness({ + talk: { + provider: "microsoft", + providers: { + microsoft: {}, + }, + }, + }); + vi.mocked(runtime.tts.listVoices).mockRejectedValue( + new Error("speech provider microsoft does not support voice listing"), + ); + + const result = await command.handler(createCommandContext("list")); + + expect(result).toEqual({ + text: "Microsoft voice list failed: speech provider microsoft does not support voice listing", + }); + }); +}); diff --git a/scripts/docs-i18n/util_test.go b/scripts/docs-i18n/util_test.go index 77b5ca82a73..30dcb14a07d 100644 --- a/scripts/docs-i18n/util_test.go +++ b/scripts/docs-i18n/util_test.go @@ -31,6 +31,15 @@ func TestDocsPiModelUsesProviderDefault(t *testing.T) { } } +func TestDocsPiModelKeepsOpenAIDefaultAtGPT54(t *testing.T) { + t.Setenv(envDocsI18nProvider, "openai") + t.Setenv(envDocsI18nModel, "") + + if got := docsPiModel(); got != defaultOpenAIModel { + t.Fatalf("expected OpenAI default model %q, got %q", defaultOpenAIModel, got) + } +} + func TestDocsPiModelPrefersExplicitOverride(t *testing.T) { t.Setenv(envDocsI18nProvider, "openai") t.Setenv(envDocsI18nModel, "gpt-5.2") diff --git a/src/tts/providers/microsoft.test.ts b/src/tts/providers/microsoft.test.ts new file mode 100644 index 00000000000..fa82456be00 --- /dev/null +++ b/src/tts/providers/microsoft.test.ts @@ -0,0 +1,60 @@ +import { afterEach, describe, expect, it, vi } from "vitest"; +import { listMicrosoftVoices } from "./microsoft.js"; + +describe("listMicrosoftVoices", () => { + const originalFetch = globalThis.fetch; + + afterEach(() => { + globalThis.fetch = originalFetch; + vi.restoreAllMocks(); + }); + + it("maps Microsoft voice metadata into speech voice options", async () => { + globalThis.fetch = vi.fn().mockResolvedValue( + new Response( + JSON.stringify([ + { + ShortName: "en-US-AvaNeural", + FriendlyName: "Microsoft Ava Online (Natural) - English (United States)", + Locale: "en-US", + Gender: "Female", + VoiceTag: { + ContentCategories: ["General"], + VoicePersonalities: ["Friendly", "Positive"], + }, + }, + ]), + { status: 200 }, + ), + ) as typeof globalThis.fetch; + + const voices = await listMicrosoftVoices(); + + expect(voices).toEqual([ + { + id: "en-US-AvaNeural", + name: "Microsoft Ava Online (Natural) - English (United States)", + category: "General", + description: "en-US · Female · Friendly, Positive", + }, + ]); + expect(globalThis.fetch).toHaveBeenCalledWith( + expect.stringContaining("/voices/list?trustedclienttoken="), + expect.objectContaining({ + headers: expect.objectContaining({ + Origin: "chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold", + "Sec-MS-GEC": expect.any(String), + "Sec-MS-GEC-Version": expect.stringContaining("1-"), + }), + }), + ); + }); + + it("throws on Microsoft voice list failures", async () => { + globalThis.fetch = vi + .fn() + .mockResolvedValue(new Response("nope", { status: 503 })) as typeof globalThis.fetch; + + await expect(listMicrosoftVoices()).rejects.toThrow("Microsoft voices API error (503)"); + }); +}); diff --git a/src/tts/providers/microsoft.ts b/src/tts/providers/microsoft.ts index ee31e35a204..06958931ad8 100644 --- a/src/tts/providers/microsoft.ts +++ b/src/tts/providers/microsoft.ts @@ -1,17 +1,83 @@ import { mkdirSync, mkdtempSync, readFileSync, rmSync } from "node:fs"; import path from "node:path"; +import { + CHROMIUM_FULL_VERSION, + TRUSTED_CLIENT_TOKEN, + generateSecMsGecToken, +} from "node-edge-tts/dist/drm.js"; import { resolvePreferredOpenClawTmpDir } from "../../infra/tmp-openclaw-dir.js"; import { isVoiceCompatibleAudio } from "../../media/audio.js"; import type { SpeechProviderPlugin } from "../../plugins/types.js"; +import type { SpeechVoiceOption } from "../provider-types.js"; import { edgeTTS, inferEdgeExtension } from "../tts-core.js"; const DEFAULT_EDGE_OUTPUT_FORMAT = "audio-24khz-48kbitrate-mono-mp3"; +type MicrosoftVoiceListEntry = { + ShortName?: string; + FriendlyName?: string; + Locale?: string; + Gender?: string; + VoiceTag?: { + ContentCategories?: string[]; + VoicePersonalities?: string[]; + }; +}; + +function buildMicrosoftVoiceHeaders(): Record { + const major = CHROMIUM_FULL_VERSION.split(".")[0] || "0"; + return { + Authority: "speech.platform.bing.com", + Origin: "chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold", + Accept: "*/*", + "User-Agent": + `Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ` + + `(KHTML, like Gecko) Chrome/${major}.0.0.0 Safari/537.36 Edg/${major}.0.0.0`, + "Sec-MS-GEC": generateSecMsGecToken(), + "Sec-MS-GEC-Version": `1-${CHROMIUM_FULL_VERSION}`, + }; +} + +function formatMicrosoftVoiceDescription(entry: MicrosoftVoiceListEntry): string | undefined { + const parts = [entry.Locale, entry.Gender]; + const personalities = entry.VoiceTag?.VoicePersonalities?.filter(Boolean) ?? []; + if (personalities.length > 0) { + parts.push(personalities.join(", ")); + } + const filtered = parts.filter((part): part is string => Boolean(part?.trim())); + return filtered.length > 0 ? filtered.join(" · ") : undefined; +} + +export async function listMicrosoftVoices(): Promise { + const response = await fetch( + "https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list" + + `?trustedclienttoken=${TRUSTED_CLIENT_TOKEN}`, + { + headers: buildMicrosoftVoiceHeaders(), + }, + ); + if (!response.ok) { + throw new Error(`Microsoft voices API error (${response.status})`); + } + const voices = (await response.json()) as MicrosoftVoiceListEntry[]; + return Array.isArray(voices) + ? voices + .map((voice) => ({ + id: voice.ShortName?.trim() ?? "", + name: voice.FriendlyName?.trim() || voice.ShortName?.trim() || undefined, + category: voice.VoiceTag?.ContentCategories?.find((value) => value.trim().length > 0), + description: formatMicrosoftVoiceDescription(voice), + })) + .filter((voice) => voice.id.length > 0) + : []; +} + export function buildMicrosoftSpeechProvider(): SpeechProviderPlugin { return { id: "microsoft", label: "Microsoft", aliases: ["edge"], + listVoices: async () => await listMicrosoftVoices(), isConfigured: ({ config }) => config.edge.enabled, synthesize: async (req) => { const tempRoot = resolvePreferredOpenClawTmpDir(); diff --git a/src/types/node-edge-tts.d.ts b/src/types/node-edge-tts.d.ts index eaaaa9cdf5a..b800c986cb8 100644 --- a/src/types/node-edge-tts.d.ts +++ b/src/types/node-edge-tts.d.ts @@ -16,3 +16,9 @@ declare module "node-edge-tts" { ttsPromise(text: string, outputPath: string): Promise; } } + +declare module "node-edge-tts/dist/drm.js" { + export const CHROMIUM_FULL_VERSION: string; + export const TRUSTED_CLIENT_TOKEN: string; + export function generateSecMsGecToken(): string; +}