feat(tts): add microsoft voice listing
This commit is contained in:
parent
a71c61122d
commit
622f13253b
189
extensions/talk-voice/index.test.ts
Normal file
189
extensions/talk-voice/index.test.ts
Normal file
@ -0,0 +1,189 @@
|
|||||||
|
import { describe, expect, it, vi } from "vitest";
|
||||||
|
import type { OpenClawPluginCommandDefinition } from "../../src/plugins/types.js";
|
||||||
|
import { createPluginRuntimeMock } from "../test-utils/plugin-runtime-mock.js";
|
||||||
|
import register from "./index.js";
|
||||||
|
|
||||||
|
function createHarness(config: Record<string, unknown>) {
|
||||||
|
let command: OpenClawPluginCommandDefinition | undefined;
|
||||||
|
const runtime = createPluginRuntimeMock({
|
||||||
|
config: {
|
||||||
|
loadConfig: vi.fn(() => config),
|
||||||
|
writeConfigFile: vi.fn().mockResolvedValue(undefined),
|
||||||
|
},
|
||||||
|
tts: {
|
||||||
|
listVoices: vi.fn(),
|
||||||
|
},
|
||||||
|
});
|
||||||
|
const api = {
|
||||||
|
runtime,
|
||||||
|
registerCommand: vi.fn((definition: OpenClawPluginCommandDefinition) => {
|
||||||
|
command = definition;
|
||||||
|
}),
|
||||||
|
};
|
||||||
|
register(api as never);
|
||||||
|
if (!command) {
|
||||||
|
throw new Error("talk-voice command not registered");
|
||||||
|
}
|
||||||
|
return { command, runtime };
|
||||||
|
}
|
||||||
|
|
||||||
|
function createCommandContext(args: string, channel: string = "discord") {
|
||||||
|
return {
|
||||||
|
args,
|
||||||
|
channel,
|
||||||
|
channelId: channel,
|
||||||
|
isAuthorizedSender: true,
|
||||||
|
commandBody: args ? `/voice ${args}` : "/voice",
|
||||||
|
config: {},
|
||||||
|
requestConversationBinding: vi.fn(),
|
||||||
|
detachConversationBinding: vi.fn(),
|
||||||
|
getCurrentConversationBinding: vi.fn(),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
describe("talk-voice plugin", () => {
|
||||||
|
it("reports active provider status", async () => {
|
||||||
|
const { command } = createHarness({
|
||||||
|
talk: {
|
||||||
|
provider: "microsoft",
|
||||||
|
providers: {
|
||||||
|
microsoft: {
|
||||||
|
voiceId: "en-US-AvaNeural",
|
||||||
|
apiKey: "secret-token",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await command.handler(createCommandContext(""));
|
||||||
|
|
||||||
|
expect(result).toEqual({
|
||||||
|
text:
|
||||||
|
"Talk voice status:\n" +
|
||||||
|
"- provider: microsoft\n" +
|
||||||
|
"- talk.voiceId: en-US-AvaNeural\n" +
|
||||||
|
"- microsoft.apiKey: secret…",
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it("lists voices from the active provider", async () => {
|
||||||
|
const { command, runtime } = createHarness({
|
||||||
|
talk: {
|
||||||
|
provider: "elevenlabs",
|
||||||
|
providers: {
|
||||||
|
elevenlabs: {
|
||||||
|
apiKey: "sk-eleven",
|
||||||
|
baseUrl: "https://voices.example.test",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
vi.mocked(runtime.tts.listVoices).mockResolvedValue([
|
||||||
|
{ id: "voice-a", name: "Claudia", category: "general" },
|
||||||
|
{ id: "voice-b", name: "Bert" },
|
||||||
|
]);
|
||||||
|
|
||||||
|
const result = await command.handler(createCommandContext("list 1"));
|
||||||
|
|
||||||
|
expect(runtime.tts.listVoices).toHaveBeenCalledWith({
|
||||||
|
provider: "elevenlabs",
|
||||||
|
cfg: {
|
||||||
|
talk: {
|
||||||
|
provider: "elevenlabs",
|
||||||
|
providers: {
|
||||||
|
elevenlabs: {
|
||||||
|
apiKey: "sk-eleven",
|
||||||
|
baseUrl: "https://voices.example.test",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
apiKey: "sk-eleven",
|
||||||
|
baseUrl: "https://voices.example.test",
|
||||||
|
});
|
||||||
|
expect(result).toEqual({
|
||||||
|
text:
|
||||||
|
"ElevenLabs voices: 2\n\n" +
|
||||||
|
"- Claudia · general\n" +
|
||||||
|
" id: voice-a\n\n" +
|
||||||
|
"(showing first 1)",
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it("writes canonical talk provider config and legacy elevenlabs voice id", async () => {
|
||||||
|
const { command, runtime } = createHarness({
|
||||||
|
talk: {
|
||||||
|
provider: "elevenlabs",
|
||||||
|
providers: {
|
||||||
|
elevenlabs: {
|
||||||
|
apiKey: "sk-eleven",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
vi.mocked(runtime.tts.listVoices).mockResolvedValue([{ id: "voice-a", name: "Claudia" }]);
|
||||||
|
|
||||||
|
const result = await command.handler(createCommandContext("set Claudia"));
|
||||||
|
|
||||||
|
expect(runtime.config.writeConfigFile).toHaveBeenCalledWith({
|
||||||
|
talk: {
|
||||||
|
provider: "elevenlabs",
|
||||||
|
providers: {
|
||||||
|
elevenlabs: {
|
||||||
|
apiKey: "sk-eleven",
|
||||||
|
voiceId: "voice-a",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
voiceId: "voice-a",
|
||||||
|
},
|
||||||
|
});
|
||||||
|
expect(result).toEqual({
|
||||||
|
text: "✅ ElevenLabs Talk voice set to Claudia\nvoice-a",
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it("writes provider voice id without legacy top-level field for microsoft", async () => {
|
||||||
|
const { command, runtime } = createHarness({
|
||||||
|
talk: {
|
||||||
|
provider: "microsoft",
|
||||||
|
providers: {
|
||||||
|
microsoft: {},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
vi.mocked(runtime.tts.listVoices).mockResolvedValue([{ id: "en-US-AvaNeural", name: "Ava" }]);
|
||||||
|
|
||||||
|
await command.handler(createCommandContext("set Ava"));
|
||||||
|
|
||||||
|
expect(runtime.config.writeConfigFile).toHaveBeenCalledWith({
|
||||||
|
talk: {
|
||||||
|
provider: "microsoft",
|
||||||
|
providers: {
|
||||||
|
microsoft: {
|
||||||
|
voiceId: "en-US-AvaNeural",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns provider lookup errors cleanly", async () => {
|
||||||
|
const { command, runtime } = createHarness({
|
||||||
|
talk: {
|
||||||
|
provider: "microsoft",
|
||||||
|
providers: {
|
||||||
|
microsoft: {},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
vi.mocked(runtime.tts.listVoices).mockRejectedValue(
|
||||||
|
new Error("speech provider microsoft does not support voice listing"),
|
||||||
|
);
|
||||||
|
|
||||||
|
const result = await command.handler(createCommandContext("list"));
|
||||||
|
|
||||||
|
expect(result).toEqual({
|
||||||
|
text: "Microsoft voice list failed: speech provider microsoft does not support voice listing",
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
@ -31,6 +31,15 @@ func TestDocsPiModelUsesProviderDefault(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestDocsPiModelKeepsOpenAIDefaultAtGPT54(t *testing.T) {
|
||||||
|
t.Setenv(envDocsI18nProvider, "openai")
|
||||||
|
t.Setenv(envDocsI18nModel, "")
|
||||||
|
|
||||||
|
if got := docsPiModel(); got != defaultOpenAIModel {
|
||||||
|
t.Fatalf("expected OpenAI default model %q, got %q", defaultOpenAIModel, got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestDocsPiModelPrefersExplicitOverride(t *testing.T) {
|
func TestDocsPiModelPrefersExplicitOverride(t *testing.T) {
|
||||||
t.Setenv(envDocsI18nProvider, "openai")
|
t.Setenv(envDocsI18nProvider, "openai")
|
||||||
t.Setenv(envDocsI18nModel, "gpt-5.2")
|
t.Setenv(envDocsI18nModel, "gpt-5.2")
|
||||||
|
|||||||
60
src/tts/providers/microsoft.test.ts
Normal file
60
src/tts/providers/microsoft.test.ts
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||||
|
import { listMicrosoftVoices } from "./microsoft.js";
|
||||||
|
|
||||||
|
describe("listMicrosoftVoices", () => {
|
||||||
|
const originalFetch = globalThis.fetch;
|
||||||
|
|
||||||
|
afterEach(() => {
|
||||||
|
globalThis.fetch = originalFetch;
|
||||||
|
vi.restoreAllMocks();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("maps Microsoft voice metadata into speech voice options", async () => {
|
||||||
|
globalThis.fetch = vi.fn().mockResolvedValue(
|
||||||
|
new Response(
|
||||||
|
JSON.stringify([
|
||||||
|
{
|
||||||
|
ShortName: "en-US-AvaNeural",
|
||||||
|
FriendlyName: "Microsoft Ava Online (Natural) - English (United States)",
|
||||||
|
Locale: "en-US",
|
||||||
|
Gender: "Female",
|
||||||
|
VoiceTag: {
|
||||||
|
ContentCategories: ["General"],
|
||||||
|
VoicePersonalities: ["Friendly", "Positive"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]),
|
||||||
|
{ status: 200 },
|
||||||
|
),
|
||||||
|
) as typeof globalThis.fetch;
|
||||||
|
|
||||||
|
const voices = await listMicrosoftVoices();
|
||||||
|
|
||||||
|
expect(voices).toEqual([
|
||||||
|
{
|
||||||
|
id: "en-US-AvaNeural",
|
||||||
|
name: "Microsoft Ava Online (Natural) - English (United States)",
|
||||||
|
category: "General",
|
||||||
|
description: "en-US · Female · Friendly, Positive",
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
expect(globalThis.fetch).toHaveBeenCalledWith(
|
||||||
|
expect.stringContaining("/voices/list?trustedclienttoken="),
|
||||||
|
expect.objectContaining({
|
||||||
|
headers: expect.objectContaining({
|
||||||
|
Origin: "chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold",
|
||||||
|
"Sec-MS-GEC": expect.any(String),
|
||||||
|
"Sec-MS-GEC-Version": expect.stringContaining("1-"),
|
||||||
|
}),
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("throws on Microsoft voice list failures", async () => {
|
||||||
|
globalThis.fetch = vi
|
||||||
|
.fn()
|
||||||
|
.mockResolvedValue(new Response("nope", { status: 503 })) as typeof globalThis.fetch;
|
||||||
|
|
||||||
|
await expect(listMicrosoftVoices()).rejects.toThrow("Microsoft voices API error (503)");
|
||||||
|
});
|
||||||
|
});
|
||||||
@ -1,17 +1,83 @@
|
|||||||
import { mkdirSync, mkdtempSync, readFileSync, rmSync } from "node:fs";
|
import { mkdirSync, mkdtempSync, readFileSync, rmSync } from "node:fs";
|
||||||
import path from "node:path";
|
import path from "node:path";
|
||||||
|
import {
|
||||||
|
CHROMIUM_FULL_VERSION,
|
||||||
|
TRUSTED_CLIENT_TOKEN,
|
||||||
|
generateSecMsGecToken,
|
||||||
|
} from "node-edge-tts/dist/drm.js";
|
||||||
import { resolvePreferredOpenClawTmpDir } from "../../infra/tmp-openclaw-dir.js";
|
import { resolvePreferredOpenClawTmpDir } from "../../infra/tmp-openclaw-dir.js";
|
||||||
import { isVoiceCompatibleAudio } from "../../media/audio.js";
|
import { isVoiceCompatibleAudio } from "../../media/audio.js";
|
||||||
import type { SpeechProviderPlugin } from "../../plugins/types.js";
|
import type { SpeechProviderPlugin } from "../../plugins/types.js";
|
||||||
|
import type { SpeechVoiceOption } from "../provider-types.js";
|
||||||
import { edgeTTS, inferEdgeExtension } from "../tts-core.js";
|
import { edgeTTS, inferEdgeExtension } from "../tts-core.js";
|
||||||
|
|
||||||
const DEFAULT_EDGE_OUTPUT_FORMAT = "audio-24khz-48kbitrate-mono-mp3";
|
const DEFAULT_EDGE_OUTPUT_FORMAT = "audio-24khz-48kbitrate-mono-mp3";
|
||||||
|
|
||||||
|
type MicrosoftVoiceListEntry = {
|
||||||
|
ShortName?: string;
|
||||||
|
FriendlyName?: string;
|
||||||
|
Locale?: string;
|
||||||
|
Gender?: string;
|
||||||
|
VoiceTag?: {
|
||||||
|
ContentCategories?: string[];
|
||||||
|
VoicePersonalities?: string[];
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
function buildMicrosoftVoiceHeaders(): Record<string, string> {
|
||||||
|
const major = CHROMIUM_FULL_VERSION.split(".")[0] || "0";
|
||||||
|
return {
|
||||||
|
Authority: "speech.platform.bing.com",
|
||||||
|
Origin: "chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold",
|
||||||
|
Accept: "*/*",
|
||||||
|
"User-Agent":
|
||||||
|
`Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ` +
|
||||||
|
`(KHTML, like Gecko) Chrome/${major}.0.0.0 Safari/537.36 Edg/${major}.0.0.0`,
|
||||||
|
"Sec-MS-GEC": generateSecMsGecToken(),
|
||||||
|
"Sec-MS-GEC-Version": `1-${CHROMIUM_FULL_VERSION}`,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatMicrosoftVoiceDescription(entry: MicrosoftVoiceListEntry): string | undefined {
|
||||||
|
const parts = [entry.Locale, entry.Gender];
|
||||||
|
const personalities = entry.VoiceTag?.VoicePersonalities?.filter(Boolean) ?? [];
|
||||||
|
if (personalities.length > 0) {
|
||||||
|
parts.push(personalities.join(", "));
|
||||||
|
}
|
||||||
|
const filtered = parts.filter((part): part is string => Boolean(part?.trim()));
|
||||||
|
return filtered.length > 0 ? filtered.join(" · ") : undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function listMicrosoftVoices(): Promise<SpeechVoiceOption[]> {
|
||||||
|
const response = await fetch(
|
||||||
|
"https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list" +
|
||||||
|
`?trustedclienttoken=${TRUSTED_CLIENT_TOKEN}`,
|
||||||
|
{
|
||||||
|
headers: buildMicrosoftVoiceHeaders(),
|
||||||
|
},
|
||||||
|
);
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error(`Microsoft voices API error (${response.status})`);
|
||||||
|
}
|
||||||
|
const voices = (await response.json()) as MicrosoftVoiceListEntry[];
|
||||||
|
return Array.isArray(voices)
|
||||||
|
? voices
|
||||||
|
.map((voice) => ({
|
||||||
|
id: voice.ShortName?.trim() ?? "",
|
||||||
|
name: voice.FriendlyName?.trim() || voice.ShortName?.trim() || undefined,
|
||||||
|
category: voice.VoiceTag?.ContentCategories?.find((value) => value.trim().length > 0),
|
||||||
|
description: formatMicrosoftVoiceDescription(voice),
|
||||||
|
}))
|
||||||
|
.filter((voice) => voice.id.length > 0)
|
||||||
|
: [];
|
||||||
|
}
|
||||||
|
|
||||||
export function buildMicrosoftSpeechProvider(): SpeechProviderPlugin {
|
export function buildMicrosoftSpeechProvider(): SpeechProviderPlugin {
|
||||||
return {
|
return {
|
||||||
id: "microsoft",
|
id: "microsoft",
|
||||||
label: "Microsoft",
|
label: "Microsoft",
|
||||||
aliases: ["edge"],
|
aliases: ["edge"],
|
||||||
|
listVoices: async () => await listMicrosoftVoices(),
|
||||||
isConfigured: ({ config }) => config.edge.enabled,
|
isConfigured: ({ config }) => config.edge.enabled,
|
||||||
synthesize: async (req) => {
|
synthesize: async (req) => {
|
||||||
const tempRoot = resolvePreferredOpenClawTmpDir();
|
const tempRoot = resolvePreferredOpenClawTmpDir();
|
||||||
|
|||||||
6
src/types/node-edge-tts.d.ts
vendored
6
src/types/node-edge-tts.d.ts
vendored
@ -16,3 +16,9 @@ declare module "node-edge-tts" {
|
|||||||
ttsPromise(text: string, outputPath: string): Promise<void>;
|
ttsPromise(text: string, outputPath: string): Promise<void>;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
declare module "node-edge-tts/dist/drm.js" {
|
||||||
|
export const CHROMIUM_FULL_VERSION: string;
|
||||||
|
export const TRUSTED_CLIENT_TOKEN: string;
|
||||||
|
export function generateSecMsGecToken(): string;
|
||||||
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user