feat(tts): add microsoft voice listing
This commit is contained in:
parent
a71c61122d
commit
622f13253b
189
extensions/talk-voice/index.test.ts
Normal file
189
extensions/talk-voice/index.test.ts
Normal file
@ -0,0 +1,189 @@
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
import type { OpenClawPluginCommandDefinition } from "../../src/plugins/types.js";
|
||||
import { createPluginRuntimeMock } from "../test-utils/plugin-runtime-mock.js";
|
||||
import register from "./index.js";
|
||||
|
||||
function createHarness(config: Record<string, unknown>) {
|
||||
let command: OpenClawPluginCommandDefinition | undefined;
|
||||
const runtime = createPluginRuntimeMock({
|
||||
config: {
|
||||
loadConfig: vi.fn(() => config),
|
||||
writeConfigFile: vi.fn().mockResolvedValue(undefined),
|
||||
},
|
||||
tts: {
|
||||
listVoices: vi.fn(),
|
||||
},
|
||||
});
|
||||
const api = {
|
||||
runtime,
|
||||
registerCommand: vi.fn((definition: OpenClawPluginCommandDefinition) => {
|
||||
command = definition;
|
||||
}),
|
||||
};
|
||||
register(api as never);
|
||||
if (!command) {
|
||||
throw new Error("talk-voice command not registered");
|
||||
}
|
||||
return { command, runtime };
|
||||
}
|
||||
|
||||
function createCommandContext(args: string, channel: string = "discord") {
|
||||
return {
|
||||
args,
|
||||
channel,
|
||||
channelId: channel,
|
||||
isAuthorizedSender: true,
|
||||
commandBody: args ? `/voice ${args}` : "/voice",
|
||||
config: {},
|
||||
requestConversationBinding: vi.fn(),
|
||||
detachConversationBinding: vi.fn(),
|
||||
getCurrentConversationBinding: vi.fn(),
|
||||
};
|
||||
}
|
||||
|
||||
describe("talk-voice plugin", () => {
|
||||
it("reports active provider status", async () => {
|
||||
const { command } = createHarness({
|
||||
talk: {
|
||||
provider: "microsoft",
|
||||
providers: {
|
||||
microsoft: {
|
||||
voiceId: "en-US-AvaNeural",
|
||||
apiKey: "secret-token",
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const result = await command.handler(createCommandContext(""));
|
||||
|
||||
expect(result).toEqual({
|
||||
text:
|
||||
"Talk voice status:\n" +
|
||||
"- provider: microsoft\n" +
|
||||
"- talk.voiceId: en-US-AvaNeural\n" +
|
||||
"- microsoft.apiKey: secret…",
|
||||
});
|
||||
});
|
||||
|
||||
it("lists voices from the active provider", async () => {
|
||||
const { command, runtime } = createHarness({
|
||||
talk: {
|
||||
provider: "elevenlabs",
|
||||
providers: {
|
||||
elevenlabs: {
|
||||
apiKey: "sk-eleven",
|
||||
baseUrl: "https://voices.example.test",
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
vi.mocked(runtime.tts.listVoices).mockResolvedValue([
|
||||
{ id: "voice-a", name: "Claudia", category: "general" },
|
||||
{ id: "voice-b", name: "Bert" },
|
||||
]);
|
||||
|
||||
const result = await command.handler(createCommandContext("list 1"));
|
||||
|
||||
expect(runtime.tts.listVoices).toHaveBeenCalledWith({
|
||||
provider: "elevenlabs",
|
||||
cfg: {
|
||||
talk: {
|
||||
provider: "elevenlabs",
|
||||
providers: {
|
||||
elevenlabs: {
|
||||
apiKey: "sk-eleven",
|
||||
baseUrl: "https://voices.example.test",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
apiKey: "sk-eleven",
|
||||
baseUrl: "https://voices.example.test",
|
||||
});
|
||||
expect(result).toEqual({
|
||||
text:
|
||||
"ElevenLabs voices: 2\n\n" +
|
||||
"- Claudia · general\n" +
|
||||
" id: voice-a\n\n" +
|
||||
"(showing first 1)",
|
||||
});
|
||||
});
|
||||
|
||||
it("writes canonical talk provider config and legacy elevenlabs voice id", async () => {
|
||||
const { command, runtime } = createHarness({
|
||||
talk: {
|
||||
provider: "elevenlabs",
|
||||
providers: {
|
||||
elevenlabs: {
|
||||
apiKey: "sk-eleven",
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
vi.mocked(runtime.tts.listVoices).mockResolvedValue([{ id: "voice-a", name: "Claudia" }]);
|
||||
|
||||
const result = await command.handler(createCommandContext("set Claudia"));
|
||||
|
||||
expect(runtime.config.writeConfigFile).toHaveBeenCalledWith({
|
||||
talk: {
|
||||
provider: "elevenlabs",
|
||||
providers: {
|
||||
elevenlabs: {
|
||||
apiKey: "sk-eleven",
|
||||
voiceId: "voice-a",
|
||||
},
|
||||
},
|
||||
voiceId: "voice-a",
|
||||
},
|
||||
});
|
||||
expect(result).toEqual({
|
||||
text: "✅ ElevenLabs Talk voice set to Claudia\nvoice-a",
|
||||
});
|
||||
});
|
||||
|
||||
it("writes provider voice id without legacy top-level field for microsoft", async () => {
|
||||
const { command, runtime } = createHarness({
|
||||
talk: {
|
||||
provider: "microsoft",
|
||||
providers: {
|
||||
microsoft: {},
|
||||
},
|
||||
},
|
||||
});
|
||||
vi.mocked(runtime.tts.listVoices).mockResolvedValue([{ id: "en-US-AvaNeural", name: "Ava" }]);
|
||||
|
||||
await command.handler(createCommandContext("set Ava"));
|
||||
|
||||
expect(runtime.config.writeConfigFile).toHaveBeenCalledWith({
|
||||
talk: {
|
||||
provider: "microsoft",
|
||||
providers: {
|
||||
microsoft: {
|
||||
voiceId: "en-US-AvaNeural",
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it("returns provider lookup errors cleanly", async () => {
|
||||
const { command, runtime } = createHarness({
|
||||
talk: {
|
||||
provider: "microsoft",
|
||||
providers: {
|
||||
microsoft: {},
|
||||
},
|
||||
},
|
||||
});
|
||||
vi.mocked(runtime.tts.listVoices).mockRejectedValue(
|
||||
new Error("speech provider microsoft does not support voice listing"),
|
||||
);
|
||||
|
||||
const result = await command.handler(createCommandContext("list"));
|
||||
|
||||
expect(result).toEqual({
|
||||
text: "Microsoft voice list failed: speech provider microsoft does not support voice listing",
|
||||
});
|
||||
});
|
||||
});
|
||||
@ -31,6 +31,15 @@ func TestDocsPiModelUsesProviderDefault(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestDocsPiModelKeepsOpenAIDefaultAtGPT54(t *testing.T) {
|
||||
t.Setenv(envDocsI18nProvider, "openai")
|
||||
t.Setenv(envDocsI18nModel, "")
|
||||
|
||||
if got := docsPiModel(); got != defaultOpenAIModel {
|
||||
t.Fatalf("expected OpenAI default model %q, got %q", defaultOpenAIModel, got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDocsPiModelPrefersExplicitOverride(t *testing.T) {
|
||||
t.Setenv(envDocsI18nProvider, "openai")
|
||||
t.Setenv(envDocsI18nModel, "gpt-5.2")
|
||||
|
||||
60
src/tts/providers/microsoft.test.ts
Normal file
60
src/tts/providers/microsoft.test.ts
Normal file
@ -0,0 +1,60 @@
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import { listMicrosoftVoices } from "./microsoft.js";
|
||||
|
||||
describe("listMicrosoftVoices", () => {
|
||||
const originalFetch = globalThis.fetch;
|
||||
|
||||
afterEach(() => {
|
||||
globalThis.fetch = originalFetch;
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
it("maps Microsoft voice metadata into speech voice options", async () => {
|
||||
globalThis.fetch = vi.fn().mockResolvedValue(
|
||||
new Response(
|
||||
JSON.stringify([
|
||||
{
|
||||
ShortName: "en-US-AvaNeural",
|
||||
FriendlyName: "Microsoft Ava Online (Natural) - English (United States)",
|
||||
Locale: "en-US",
|
||||
Gender: "Female",
|
||||
VoiceTag: {
|
||||
ContentCategories: ["General"],
|
||||
VoicePersonalities: ["Friendly", "Positive"],
|
||||
},
|
||||
},
|
||||
]),
|
||||
{ status: 200 },
|
||||
),
|
||||
) as typeof globalThis.fetch;
|
||||
|
||||
const voices = await listMicrosoftVoices();
|
||||
|
||||
expect(voices).toEqual([
|
||||
{
|
||||
id: "en-US-AvaNeural",
|
||||
name: "Microsoft Ava Online (Natural) - English (United States)",
|
||||
category: "General",
|
||||
description: "en-US · Female · Friendly, Positive",
|
||||
},
|
||||
]);
|
||||
expect(globalThis.fetch).toHaveBeenCalledWith(
|
||||
expect.stringContaining("/voices/list?trustedclienttoken="),
|
||||
expect.objectContaining({
|
||||
headers: expect.objectContaining({
|
||||
Origin: "chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold",
|
||||
"Sec-MS-GEC": expect.any(String),
|
||||
"Sec-MS-GEC-Version": expect.stringContaining("1-"),
|
||||
}),
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("throws on Microsoft voice list failures", async () => {
|
||||
globalThis.fetch = vi
|
||||
.fn()
|
||||
.mockResolvedValue(new Response("nope", { status: 503 })) as typeof globalThis.fetch;
|
||||
|
||||
await expect(listMicrosoftVoices()).rejects.toThrow("Microsoft voices API error (503)");
|
||||
});
|
||||
});
|
||||
@ -1,17 +1,83 @@
|
||||
import { mkdirSync, mkdtempSync, readFileSync, rmSync } from "node:fs";
|
||||
import path from "node:path";
|
||||
import {
|
||||
CHROMIUM_FULL_VERSION,
|
||||
TRUSTED_CLIENT_TOKEN,
|
||||
generateSecMsGecToken,
|
||||
} from "node-edge-tts/dist/drm.js";
|
||||
import { resolvePreferredOpenClawTmpDir } from "../../infra/tmp-openclaw-dir.js";
|
||||
import { isVoiceCompatibleAudio } from "../../media/audio.js";
|
||||
import type { SpeechProviderPlugin } from "../../plugins/types.js";
|
||||
import type { SpeechVoiceOption } from "../provider-types.js";
|
||||
import { edgeTTS, inferEdgeExtension } from "../tts-core.js";
|
||||
|
||||
const DEFAULT_EDGE_OUTPUT_FORMAT = "audio-24khz-48kbitrate-mono-mp3";
|
||||
|
||||
type MicrosoftVoiceListEntry = {
|
||||
ShortName?: string;
|
||||
FriendlyName?: string;
|
||||
Locale?: string;
|
||||
Gender?: string;
|
||||
VoiceTag?: {
|
||||
ContentCategories?: string[];
|
||||
VoicePersonalities?: string[];
|
||||
};
|
||||
};
|
||||
|
||||
function buildMicrosoftVoiceHeaders(): Record<string, string> {
|
||||
const major = CHROMIUM_FULL_VERSION.split(".")[0] || "0";
|
||||
return {
|
||||
Authority: "speech.platform.bing.com",
|
||||
Origin: "chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold",
|
||||
Accept: "*/*",
|
||||
"User-Agent":
|
||||
`Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ` +
|
||||
`(KHTML, like Gecko) Chrome/${major}.0.0.0 Safari/537.36 Edg/${major}.0.0.0`,
|
||||
"Sec-MS-GEC": generateSecMsGecToken(),
|
||||
"Sec-MS-GEC-Version": `1-${CHROMIUM_FULL_VERSION}`,
|
||||
};
|
||||
}
|
||||
|
||||
function formatMicrosoftVoiceDescription(entry: MicrosoftVoiceListEntry): string | undefined {
|
||||
const parts = [entry.Locale, entry.Gender];
|
||||
const personalities = entry.VoiceTag?.VoicePersonalities?.filter(Boolean) ?? [];
|
||||
if (personalities.length > 0) {
|
||||
parts.push(personalities.join(", "));
|
||||
}
|
||||
const filtered = parts.filter((part): part is string => Boolean(part?.trim()));
|
||||
return filtered.length > 0 ? filtered.join(" · ") : undefined;
|
||||
}
|
||||
|
||||
export async function listMicrosoftVoices(): Promise<SpeechVoiceOption[]> {
|
||||
const response = await fetch(
|
||||
"https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list" +
|
||||
`?trustedclienttoken=${TRUSTED_CLIENT_TOKEN}`,
|
||||
{
|
||||
headers: buildMicrosoftVoiceHeaders(),
|
||||
},
|
||||
);
|
||||
if (!response.ok) {
|
||||
throw new Error(`Microsoft voices API error (${response.status})`);
|
||||
}
|
||||
const voices = (await response.json()) as MicrosoftVoiceListEntry[];
|
||||
return Array.isArray(voices)
|
||||
? voices
|
||||
.map((voice) => ({
|
||||
id: voice.ShortName?.trim() ?? "",
|
||||
name: voice.FriendlyName?.trim() || voice.ShortName?.trim() || undefined,
|
||||
category: voice.VoiceTag?.ContentCategories?.find((value) => value.trim().length > 0),
|
||||
description: formatMicrosoftVoiceDescription(voice),
|
||||
}))
|
||||
.filter((voice) => voice.id.length > 0)
|
||||
: [];
|
||||
}
|
||||
|
||||
export function buildMicrosoftSpeechProvider(): SpeechProviderPlugin {
|
||||
return {
|
||||
id: "microsoft",
|
||||
label: "Microsoft",
|
||||
aliases: ["edge"],
|
||||
listVoices: async () => await listMicrosoftVoices(),
|
||||
isConfigured: ({ config }) => config.edge.enabled,
|
||||
synthesize: async (req) => {
|
||||
const tempRoot = resolvePreferredOpenClawTmpDir();
|
||||
|
||||
6
src/types/node-edge-tts.d.ts
vendored
6
src/types/node-edge-tts.d.ts
vendored
@ -16,3 +16,9 @@ declare module "node-edge-tts" {
|
||||
ttsPromise(text: string, outputPath: string): Promise<void>;
|
||||
}
|
||||
}
|
||||
|
||||
declare module "node-edge-tts/dist/drm.js" {
|
||||
export const CHROMIUM_FULL_VERSION: string;
|
||||
export const TRUSTED_CLIENT_TOKEN: string;
|
||||
export function generateSecMsGecToken(): string;
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user