From 57f1ab1fca1a5b7442afb237c076bdc8488200f4 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Mon, 16 Mar 2026 20:27:25 -0700 Subject: [PATCH] feat(tts): enrich speech voice metadata --- extensions/talk-voice/index.test.ts | 33 +++++++++++++++++++ extensions/talk-voice/index.ts | 18 ++++++++++ .../contracts/registry.contract.test.ts | 14 ++++++++ src/tts/provider-types.ts | 3 ++ src/tts/providers/microsoft.test.ts | 5 ++- src/tts/providers/microsoft.ts | 12 +++---- 6 files changed, 78 insertions(+), 7 deletions(-) diff --git a/extensions/talk-voice/index.test.ts b/extensions/talk-voice/index.test.ts index 6f945e9dd0a..2d0a991aa47 100644 --- a/extensions/talk-voice/index.test.ts +++ b/extensions/talk-voice/index.test.ts @@ -110,6 +110,39 @@ describe("talk-voice plugin", () => { }); }); + it("surfaces richer provider voice metadata when available", async () => { + const { command, runtime } = createHarness({ + talk: { + provider: "microsoft", + providers: { + microsoft: {}, + }, + }, + }); + vi.mocked(runtime.tts.listVoices).mockResolvedValue([ + { + id: "en-US-AvaNeural", + name: "Ava", + category: "General", + locale: "en-US", + gender: "Female", + personalities: ["Friendly", "Positive"], + description: "Friendly, Positive", + }, + ]); + + const result = await command.handler(createCommandContext("list")); + + expect(result).toEqual({ + text: + "Microsoft voices: 1\n\n" + + "- Ava · General\n" + + " id: en-US-AvaNeural\n" + + " meta: en-US · Female · Friendly, Positive\n" + + " note: Friendly, Positive", + }); + }); + it("writes canonical talk provider config and legacy elevenlabs voice id", async () => { const { command, runtime } = createHarness({ talk: { diff --git a/extensions/talk-voice/index.ts b/extensions/talk-voice/index.ts index 3c8ee3ba09e..8f698262e3e 100644 --- a/extensions/talk-voice/index.ts +++ b/extensions/talk-voice/index.ts @@ -31,6 +31,16 @@ function resolveProviderLabel(providerId: string): string { } } +function formatVoiceMeta(voice: SpeechVoiceOption): string | undefined { + const parts = [voice.locale, voice.gender]; + const personalities = voice.personalities?.filter((value) => value.trim().length > 0) ?? []; + if (personalities.length > 0) { + parts.push(personalities.join(", ")); + } + const filtered = parts.filter((part): part is string => Boolean(part?.trim())); + return filtered.length > 0 ? filtered.join(" · ") : undefined; +} + function formatVoiceList(voices: SpeechVoiceOption[], limit: number, providerId: string): string { const sliced = voices.slice(0, Math.max(1, Math.min(limit, 50))); const lines: string[] = []; @@ -42,6 +52,14 @@ function formatVoiceList(voices: SpeechVoiceOption[], limit: number, providerId: const meta = category ? ` · ${category}` : ""; lines.push(`- ${name}${meta}`); lines.push(` id: ${v.id}`); + const details = formatVoiceMeta(v); + if (details) { + lines.push(` meta: ${details}`); + } + const description = (v.description ?? "").trim(); + if (description) { + lines.push(` note: ${description}`); + } } if (voices.length > sliced.length) { lines.push(""); diff --git a/src/plugins/contracts/registry.contract.test.ts b/src/plugins/contracts/registry.contract.test.ts index cf728b9a91b..48da6c3d9a1 100644 --- a/src/plugins/contracts/registry.contract.test.ts +++ b/src/plugins/contracts/registry.contract.test.ts @@ -27,6 +27,14 @@ function findSpeechProviderIdsForPlugin(pluginId: string) { .toSorted((left, right) => left.localeCompare(right)); } +function findSpeechProviderForPlugin(pluginId: string) { + const entry = speechProviderContractRegistry.find((candidate) => candidate.pluginId === pluginId); + if (!entry) { + throw new Error(`speech provider contract missing for ${pluginId}`); + } + return entry.provider; +} + function findRegistrationForPlugin(pluginId: string) { const entry = pluginRegistrationContractRegistry.find( (candidate) => candidate.pluginId === pluginId, @@ -97,4 +105,10 @@ describe("plugin contract registry", () => { speechProviderIds: ["microsoft"], }); }); + + it("keeps bundled speech voice-list support explicit", () => { + expect(findSpeechProviderForPlugin("openai").listVoices).toEqual(expect.any(Function)); + expect(findSpeechProviderForPlugin("elevenlabs").listVoices).toEqual(expect.any(Function)); + expect(findSpeechProviderForPlugin("microsoft").listVoices).toEqual(expect.any(Function)); + }); }); diff --git a/src/tts/provider-types.ts b/src/tts/provider-types.ts index be0a083127d..c0640b63614 100644 --- a/src/tts/provider-types.ts +++ b/src/tts/provider-types.ts @@ -42,6 +42,9 @@ export type SpeechVoiceOption = { name?: string; category?: string; description?: string; + locale?: string; + gender?: string; + personalities?: string[]; }; export type SpeechListVoicesRequest = { diff --git a/src/tts/providers/microsoft.test.ts b/src/tts/providers/microsoft.test.ts index fa82456be00..f78e09f70e4 100644 --- a/src/tts/providers/microsoft.test.ts +++ b/src/tts/providers/microsoft.test.ts @@ -35,7 +35,10 @@ describe("listMicrosoftVoices", () => { id: "en-US-AvaNeural", name: "Microsoft Ava Online (Natural) - English (United States)", category: "General", - description: "en-US · Female · Friendly, Positive", + description: "Friendly, Positive", + locale: "en-US", + gender: "Female", + personalities: ["Friendly", "Positive"], }, ]); expect(globalThis.fetch).toHaveBeenCalledWith( diff --git a/src/tts/providers/microsoft.ts b/src/tts/providers/microsoft.ts index 06958931ad8..fef369740cb 100644 --- a/src/tts/providers/microsoft.ts +++ b/src/tts/providers/microsoft.ts @@ -39,13 +39,8 @@ function buildMicrosoftVoiceHeaders(): Record { } function formatMicrosoftVoiceDescription(entry: MicrosoftVoiceListEntry): string | undefined { - const parts = [entry.Locale, entry.Gender]; const personalities = entry.VoiceTag?.VoicePersonalities?.filter(Boolean) ?? []; - if (personalities.length > 0) { - parts.push(personalities.join(", ")); - } - const filtered = parts.filter((part): part is string => Boolean(part?.trim())); - return filtered.length > 0 ? filtered.join(" · ") : undefined; + return personalities.length > 0 ? personalities.join(", ") : undefined; } export async function listMicrosoftVoices(): Promise { @@ -67,6 +62,11 @@ export async function listMicrosoftVoices(): Promise { name: voice.FriendlyName?.trim() || voice.ShortName?.trim() || undefined, category: voice.VoiceTag?.ContentCategories?.find((value) => value.trim().length > 0), description: formatMicrosoftVoiceDescription(voice), + locale: voice.Locale?.trim() || undefined, + gender: voice.Gender?.trim() || undefined, + personalities: voice.VoiceTag?.VoicePersonalities?.filter( + (value): value is string => value.trim().length > 0, + ), })) .filter((voice) => voice.id.length > 0) : [];