feat(tts): add microsoft voice listing

This commit is contained in:
Peter Steinberger 2026-03-16 20:23:58 -07:00
parent a71c61122d
commit 622f13253b
No known key found for this signature in database
5 changed files with 330 additions and 0 deletions

View File

@ -0,0 +1,189 @@
import { describe, expect, it, vi } from "vitest";
import type { OpenClawPluginCommandDefinition } from "../../src/plugins/types.js";
import { createPluginRuntimeMock } from "../test-utils/plugin-runtime-mock.js";
import register from "./index.js";
function createHarness(config: Record<string, unknown>) {
let command: OpenClawPluginCommandDefinition | undefined;
const runtime = createPluginRuntimeMock({
config: {
loadConfig: vi.fn(() => config),
writeConfigFile: vi.fn().mockResolvedValue(undefined),
},
tts: {
listVoices: vi.fn(),
},
});
const api = {
runtime,
registerCommand: vi.fn((definition: OpenClawPluginCommandDefinition) => {
command = definition;
}),
};
register(api as never);
if (!command) {
throw new Error("talk-voice command not registered");
}
return { command, runtime };
}
function createCommandContext(args: string, channel: string = "discord") {
return {
args,
channel,
channelId: channel,
isAuthorizedSender: true,
commandBody: args ? `/voice ${args}` : "/voice",
config: {},
requestConversationBinding: vi.fn(),
detachConversationBinding: vi.fn(),
getCurrentConversationBinding: vi.fn(),
};
}
describe("talk-voice plugin", () => {
it("reports active provider status", async () => {
const { command } = createHarness({
talk: {
provider: "microsoft",
providers: {
microsoft: {
voiceId: "en-US-AvaNeural",
apiKey: "secret-token",
},
},
},
});
const result = await command.handler(createCommandContext(""));
expect(result).toEqual({
text:
"Talk voice status:\n" +
"- provider: microsoft\n" +
"- talk.voiceId: en-US-AvaNeural\n" +
"- microsoft.apiKey: secret…",
});
});
it("lists voices from the active provider", async () => {
const { command, runtime } = createHarness({
talk: {
provider: "elevenlabs",
providers: {
elevenlabs: {
apiKey: "sk-eleven",
baseUrl: "https://voices.example.test",
},
},
},
});
vi.mocked(runtime.tts.listVoices).mockResolvedValue([
{ id: "voice-a", name: "Claudia", category: "general" },
{ id: "voice-b", name: "Bert" },
]);
const result = await command.handler(createCommandContext("list 1"));
expect(runtime.tts.listVoices).toHaveBeenCalledWith({
provider: "elevenlabs",
cfg: {
talk: {
provider: "elevenlabs",
providers: {
elevenlabs: {
apiKey: "sk-eleven",
baseUrl: "https://voices.example.test",
},
},
},
},
apiKey: "sk-eleven",
baseUrl: "https://voices.example.test",
});
expect(result).toEqual({
text:
"ElevenLabs voices: 2\n\n" +
"- Claudia · general\n" +
" id: voice-a\n\n" +
"(showing first 1)",
});
});
it("writes canonical talk provider config and legacy elevenlabs voice id", async () => {
const { command, runtime } = createHarness({
talk: {
provider: "elevenlabs",
providers: {
elevenlabs: {
apiKey: "sk-eleven",
},
},
},
});
vi.mocked(runtime.tts.listVoices).mockResolvedValue([{ id: "voice-a", name: "Claudia" }]);
const result = await command.handler(createCommandContext("set Claudia"));
expect(runtime.config.writeConfigFile).toHaveBeenCalledWith({
talk: {
provider: "elevenlabs",
providers: {
elevenlabs: {
apiKey: "sk-eleven",
voiceId: "voice-a",
},
},
voiceId: "voice-a",
},
});
expect(result).toEqual({
text: "✅ ElevenLabs Talk voice set to Claudia\nvoice-a",
});
});
it("writes provider voice id without legacy top-level field for microsoft", async () => {
const { command, runtime } = createHarness({
talk: {
provider: "microsoft",
providers: {
microsoft: {},
},
},
});
vi.mocked(runtime.tts.listVoices).mockResolvedValue([{ id: "en-US-AvaNeural", name: "Ava" }]);
await command.handler(createCommandContext("set Ava"));
expect(runtime.config.writeConfigFile).toHaveBeenCalledWith({
talk: {
provider: "microsoft",
providers: {
microsoft: {
voiceId: "en-US-AvaNeural",
},
},
},
});
});
it("returns provider lookup errors cleanly", async () => {
const { command, runtime } = createHarness({
talk: {
provider: "microsoft",
providers: {
microsoft: {},
},
},
});
vi.mocked(runtime.tts.listVoices).mockRejectedValue(
new Error("speech provider microsoft does not support voice listing"),
);
const result = await command.handler(createCommandContext("list"));
expect(result).toEqual({
text: "Microsoft voice list failed: speech provider microsoft does not support voice listing",
});
});
});

View File

@ -31,6 +31,15 @@ func TestDocsPiModelUsesProviderDefault(t *testing.T) {
} }
} }
func TestDocsPiModelKeepsOpenAIDefaultAtGPT54(t *testing.T) {
t.Setenv(envDocsI18nProvider, "openai")
t.Setenv(envDocsI18nModel, "")
if got := docsPiModel(); got != defaultOpenAIModel {
t.Fatalf("expected OpenAI default model %q, got %q", defaultOpenAIModel, got)
}
}
func TestDocsPiModelPrefersExplicitOverride(t *testing.T) { func TestDocsPiModelPrefersExplicitOverride(t *testing.T) {
t.Setenv(envDocsI18nProvider, "openai") t.Setenv(envDocsI18nProvider, "openai")
t.Setenv(envDocsI18nModel, "gpt-5.2") t.Setenv(envDocsI18nModel, "gpt-5.2")

View File

@ -0,0 +1,60 @@
import { afterEach, describe, expect, it, vi } from "vitest";
import { listMicrosoftVoices } from "./microsoft.js";
describe("listMicrosoftVoices", () => {
const originalFetch = globalThis.fetch;
afterEach(() => {
globalThis.fetch = originalFetch;
vi.restoreAllMocks();
});
it("maps Microsoft voice metadata into speech voice options", async () => {
globalThis.fetch = vi.fn().mockResolvedValue(
new Response(
JSON.stringify([
{
ShortName: "en-US-AvaNeural",
FriendlyName: "Microsoft Ava Online (Natural) - English (United States)",
Locale: "en-US",
Gender: "Female",
VoiceTag: {
ContentCategories: ["General"],
VoicePersonalities: ["Friendly", "Positive"],
},
},
]),
{ status: 200 },
),
) as typeof globalThis.fetch;
const voices = await listMicrosoftVoices();
expect(voices).toEqual([
{
id: "en-US-AvaNeural",
name: "Microsoft Ava Online (Natural) - English (United States)",
category: "General",
description: "en-US · Female · Friendly, Positive",
},
]);
expect(globalThis.fetch).toHaveBeenCalledWith(
expect.stringContaining("/voices/list?trustedclienttoken="),
expect.objectContaining({
headers: expect.objectContaining({
Origin: "chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold",
"Sec-MS-GEC": expect.any(String),
"Sec-MS-GEC-Version": expect.stringContaining("1-"),
}),
}),
);
});
it("throws on Microsoft voice list failures", async () => {
globalThis.fetch = vi
.fn()
.mockResolvedValue(new Response("nope", { status: 503 })) as typeof globalThis.fetch;
await expect(listMicrosoftVoices()).rejects.toThrow("Microsoft voices API error (503)");
});
});

View File

@ -1,17 +1,83 @@
import { mkdirSync, mkdtempSync, readFileSync, rmSync } from "node:fs"; import { mkdirSync, mkdtempSync, readFileSync, rmSync } from "node:fs";
import path from "node:path"; import path from "node:path";
import {
CHROMIUM_FULL_VERSION,
TRUSTED_CLIENT_TOKEN,
generateSecMsGecToken,
} from "node-edge-tts/dist/drm.js";
import { resolvePreferredOpenClawTmpDir } from "../../infra/tmp-openclaw-dir.js"; import { resolvePreferredOpenClawTmpDir } from "../../infra/tmp-openclaw-dir.js";
import { isVoiceCompatibleAudio } from "../../media/audio.js"; import { isVoiceCompatibleAudio } from "../../media/audio.js";
import type { SpeechProviderPlugin } from "../../plugins/types.js"; import type { SpeechProviderPlugin } from "../../plugins/types.js";
import type { SpeechVoiceOption } from "../provider-types.js";
import { edgeTTS, inferEdgeExtension } from "../tts-core.js"; import { edgeTTS, inferEdgeExtension } from "../tts-core.js";
const DEFAULT_EDGE_OUTPUT_FORMAT = "audio-24khz-48kbitrate-mono-mp3"; const DEFAULT_EDGE_OUTPUT_FORMAT = "audio-24khz-48kbitrate-mono-mp3";
type MicrosoftVoiceListEntry = {
ShortName?: string;
FriendlyName?: string;
Locale?: string;
Gender?: string;
VoiceTag?: {
ContentCategories?: string[];
VoicePersonalities?: string[];
};
};
function buildMicrosoftVoiceHeaders(): Record<string, string> {
const major = CHROMIUM_FULL_VERSION.split(".")[0] || "0";
return {
Authority: "speech.platform.bing.com",
Origin: "chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold",
Accept: "*/*",
"User-Agent":
`Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ` +
`(KHTML, like Gecko) Chrome/${major}.0.0.0 Safari/537.36 Edg/${major}.0.0.0`,
"Sec-MS-GEC": generateSecMsGecToken(),
"Sec-MS-GEC-Version": `1-${CHROMIUM_FULL_VERSION}`,
};
}
function formatMicrosoftVoiceDescription(entry: MicrosoftVoiceListEntry): string | undefined {
const parts = [entry.Locale, entry.Gender];
const personalities = entry.VoiceTag?.VoicePersonalities?.filter(Boolean) ?? [];
if (personalities.length > 0) {
parts.push(personalities.join(", "));
}
const filtered = parts.filter((part): part is string => Boolean(part?.trim()));
return filtered.length > 0 ? filtered.join(" · ") : undefined;
}
export async function listMicrosoftVoices(): Promise<SpeechVoiceOption[]> {
const response = await fetch(
"https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list" +
`?trustedclienttoken=${TRUSTED_CLIENT_TOKEN}`,
{
headers: buildMicrosoftVoiceHeaders(),
},
);
if (!response.ok) {
throw new Error(`Microsoft voices API error (${response.status})`);
}
const voices = (await response.json()) as MicrosoftVoiceListEntry[];
return Array.isArray(voices)
? voices
.map((voice) => ({
id: voice.ShortName?.trim() ?? "",
name: voice.FriendlyName?.trim() || voice.ShortName?.trim() || undefined,
category: voice.VoiceTag?.ContentCategories?.find((value) => value.trim().length > 0),
description: formatMicrosoftVoiceDescription(voice),
}))
.filter((voice) => voice.id.length > 0)
: [];
}
export function buildMicrosoftSpeechProvider(): SpeechProviderPlugin { export function buildMicrosoftSpeechProvider(): SpeechProviderPlugin {
return { return {
id: "microsoft", id: "microsoft",
label: "Microsoft", label: "Microsoft",
aliases: ["edge"], aliases: ["edge"],
listVoices: async () => await listMicrosoftVoices(),
isConfigured: ({ config }) => config.edge.enabled, isConfigured: ({ config }) => config.edge.enabled,
synthesize: async (req) => { synthesize: async (req) => {
const tempRoot = resolvePreferredOpenClawTmpDir(); const tempRoot = resolvePreferredOpenClawTmpDir();

View File

@ -16,3 +16,9 @@ declare module "node-edge-tts" {
ttsPromise(text: string, outputPath: string): Promise<void>; ttsPromise(text: string, outputPath: string): Promise<void>;
} }
} }
declare module "node-edge-tts/dist/drm.js" {
export const CHROMIUM_FULL_VERSION: string;
export const TRUSTED_CLIENT_TOKEN: string;
export function generateSecMsGecToken(): string;
}