Merge def16119667670b39037cea088ec82feb774153c into 598f1826d8b2bc969aace2c6459824737667218c
This commit is contained in:
commit
c38e2aaea1
@ -93,6 +93,16 @@ export type TtsConfig = {
|
|||||||
proxy?: string;
|
proxy?: string;
|
||||||
timeoutMs?: number;
|
timeoutMs?: number;
|
||||||
};
|
};
|
||||||
|
/** MiniMax speech configuration. */
|
||||||
|
minimax?: {
|
||||||
|
apiKey?: SecretInput;
|
||||||
|
baseUrl?: string;
|
||||||
|
model?: string;
|
||||||
|
voiceId?: string;
|
||||||
|
speed?: number;
|
||||||
|
volume?: number;
|
||||||
|
pitch?: number;
|
||||||
|
};
|
||||||
/** Optional path for local TTS user preferences JSON. */
|
/** Optional path for local TTS user preferences JSON. */
|
||||||
prefsPath?: string;
|
prefsPath?: string;
|
||||||
/** Hard cap for text sent to TTS (chars). */
|
/** Hard cap for text sent to TTS (chars). */
|
||||||
|
|||||||
@ -392,6 +392,18 @@ const TtsMicrosoftConfigSchema = z
|
|||||||
})
|
})
|
||||||
.strict()
|
.strict()
|
||||||
.optional();
|
.optional();
|
||||||
|
const TtsMiniMaxConfigSchema = z
|
||||||
|
.object({
|
||||||
|
apiKey: SecretInputSchema.optional().register(sensitive),
|
||||||
|
baseUrl: z.string().optional(),
|
||||||
|
model: z.string().optional(),
|
||||||
|
voiceId: z.string().optional(),
|
||||||
|
speed: z.number().min(0.5).max(2).optional(),
|
||||||
|
volume: z.number().min(0).max(2).optional(),
|
||||||
|
pitch: z.number().min(-24).max(24).optional(),
|
||||||
|
})
|
||||||
|
.strict()
|
||||||
|
.optional();
|
||||||
export const TtsConfigSchema = z
|
export const TtsConfigSchema = z
|
||||||
.object({
|
.object({
|
||||||
auto: TtsAutoSchema.optional(),
|
auto: TtsAutoSchema.optional(),
|
||||||
@ -447,6 +459,7 @@ export const TtsConfigSchema = z
|
|||||||
.optional(),
|
.optional(),
|
||||||
edge: TtsMicrosoftConfigSchema,
|
edge: TtsMicrosoftConfigSchema,
|
||||||
microsoft: TtsMicrosoftConfigSchema,
|
microsoft: TtsMicrosoftConfigSchema,
|
||||||
|
minimax: TtsMiniMaxConfigSchema,
|
||||||
prefsPath: z.string().optional(),
|
prefsPath: z.string().optional(),
|
||||||
maxTextLength: z.number().int().min(1).optional(),
|
maxTextLength: z.number().int().min(1).optional(),
|
||||||
timeoutMs: z.number().int().min(1000).max(120000).optional(),
|
timeoutMs: z.number().int().min(1000).max(120000).optional(),
|
||||||
|
|||||||
@ -5,12 +5,14 @@ import type { SpeechProviderPlugin } from "../plugins/types.js";
|
|||||||
import type { SpeechProviderId } from "./provider-types.js";
|
import type { SpeechProviderId } from "./provider-types.js";
|
||||||
import { buildElevenLabsSpeechProvider } from "./providers/elevenlabs.js";
|
import { buildElevenLabsSpeechProvider } from "./providers/elevenlabs.js";
|
||||||
import { buildMicrosoftSpeechProvider } from "./providers/microsoft.js";
|
import { buildMicrosoftSpeechProvider } from "./providers/microsoft.js";
|
||||||
|
import { buildMiniMaxSpeechProvider } from "./providers/minimax.js";
|
||||||
import { buildOpenAISpeechProvider } from "./providers/openai.js";
|
import { buildOpenAISpeechProvider } from "./providers/openai.js";
|
||||||
|
|
||||||
const BUILTIN_SPEECH_PROVIDER_BUILDERS = [
|
const BUILTIN_SPEECH_PROVIDER_BUILDERS = [
|
||||||
buildOpenAISpeechProvider,
|
buildOpenAISpeechProvider,
|
||||||
buildElevenLabsSpeechProvider,
|
buildElevenLabsSpeechProvider,
|
||||||
buildMicrosoftSpeechProvider,
|
buildMicrosoftSpeechProvider,
|
||||||
|
buildMiniMaxSpeechProvider,
|
||||||
] as const satisfies readonly (() => SpeechProviderPlugin)[];
|
] as const satisfies readonly (() => SpeechProviderPlugin)[];
|
||||||
|
|
||||||
function trimToUndefined(value: string | undefined): string | undefined {
|
function trimToUndefined(value: string | undefined): string | undefined {
|
||||||
|
|||||||
157
src/tts/providers/minimax.ts
Normal file
157
src/tts/providers/minimax.ts
Normal file
@ -0,0 +1,157 @@
|
|||||||
|
import type { SpeechProviderPlugin } from "../../plugins/types.js";
|
||||||
|
import type { SpeechVoiceOption } from "../provider-types.js";
|
||||||
|
|
||||||
|
const MINIMAX_TTS_MODELS = [
|
||||||
|
"speech-01-turbo",
|
||||||
|
"speech-01-hd",
|
||||||
|
"speech-02-hd",
|
||||||
|
"speech-02",
|
||||||
|
] as const;
|
||||||
|
|
||||||
|
// Popular MiniMax voice IDs
|
||||||
|
const MINIMAX_VOICE_IDS = [
|
||||||
|
"female-shaonv",
|
||||||
|
"male-baijia",
|
||||||
|
"male-yunyang",
|
||||||
|
"female-tianmei",
|
||||||
|
"male-john",
|
||||||
|
"female-emma",
|
||||||
|
] as const;
|
||||||
|
|
||||||
|
const DEFAULT_MINIMAX_BASE_URL = "https://api.minimaxi.com";
|
||||||
|
const DEFAULT_MINIMAX_MODEL = "speech-01-turbo";
|
||||||
|
const DEFAULT_MINIMAX_VOICE = "female-shaonv";
|
||||||
|
|
||||||
|
function normalizeMiniMaxBaseUrl(baseUrl: string | undefined): string {
|
||||||
|
const trimmed = baseUrl?.trim();
|
||||||
|
return trimmed?.replace(/\/+$/, "") || DEFAULT_MINIMAX_BASE_URL;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function minimaxTTS(params: {
|
||||||
|
text: string;
|
||||||
|
apiKey: string;
|
||||||
|
baseUrl?: string;
|
||||||
|
model?: string;
|
||||||
|
voiceId?: string;
|
||||||
|
speed?: number;
|
||||||
|
volume?: number;
|
||||||
|
pitch?: number;
|
||||||
|
timeoutMs?: number;
|
||||||
|
}): Promise<Buffer> {
|
||||||
|
const {
|
||||||
|
text,
|
||||||
|
apiKey,
|
||||||
|
baseUrl,
|
||||||
|
model = DEFAULT_MINIMAX_MODEL,
|
||||||
|
voiceId = DEFAULT_MINIMAX_VOICE,
|
||||||
|
speed = 1.0,
|
||||||
|
volume = 1.0,
|
||||||
|
pitch = 0,
|
||||||
|
timeoutMs = 30_000,
|
||||||
|
} = params;
|
||||||
|
|
||||||
|
const controller = new AbortController();
|
||||||
|
const timeout = setTimeout(() => controller.abort(), timeoutMs);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await fetch(`${normalizeMiniMaxBaseUrl(baseUrl)}/v1/t2a_v2`, {
|
||||||
|
method: "POST",
|
||||||
|
headers: {
|
||||||
|
Authorization: `Bearer ${apiKey}`,
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
body: JSON.stringify({
|
||||||
|
model,
|
||||||
|
text,
|
||||||
|
voice_setting: {
|
||||||
|
voice_id: voiceId,
|
||||||
|
speed: Math.round(speed * 100) / 100,
|
||||||
|
vol: Math.round(volume * 100) / 100,
|
||||||
|
pitch,
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
signal: controller.signal,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
const error = await response.text().catch(() => "Unknown error");
|
||||||
|
throw new Error(`MiniMax TTS API error (${response.status}): ${error}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return Buffer.from(await response.arrayBuffer());
|
||||||
|
} finally {
|
||||||
|
clearTimeout(timeout);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function listMiniMaxVoices(): Promise<SpeechVoiceOption[]> {
|
||||||
|
// MiniMax doesn't have a public list voices API, so we return common voices
|
||||||
|
// Users can use custom voice IDs from their MiniMax dashboard
|
||||||
|
return MINIMAX_VOICE_IDS.map((voiceId) => ({
|
||||||
|
id: voiceId,
|
||||||
|
name: voiceId.replace(/-/g, " ").replace(/\b\w/g, (c) => c.toUpperCase()),
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
export function buildMiniMaxSpeechProvider(): SpeechProviderPlugin {
|
||||||
|
return {
|
||||||
|
id: "minimax",
|
||||||
|
label: "MiniMax",
|
||||||
|
models: MINIMAX_TTS_MODELS,
|
||||||
|
listVoices: async (_req) => {
|
||||||
|
return listMiniMaxVoices();
|
||||||
|
},
|
||||||
|
isConfigured: ({ config }) =>
|
||||||
|
Boolean(config.minimax?.apiKey || process.env.MINIMAX_API_KEY),
|
||||||
|
synthesize: async (req) => {
|
||||||
|
const apiKey =
|
||||||
|
req.config.minimax?.apiKey || process.env.MINIMAX_API_KEY;
|
||||||
|
if (!apiKey) {
|
||||||
|
throw new Error("MiniMax API key missing");
|
||||||
|
}
|
||||||
|
const audioBuffer = await minimaxTTS({
|
||||||
|
text: req.text,
|
||||||
|
apiKey,
|
||||||
|
baseUrl: req.config.minimax?.baseUrl,
|
||||||
|
model: req.config.minimax?.model ?? DEFAULT_MINIMAX_MODEL,
|
||||||
|
voiceId: req.config.minimax?.voiceId ?? DEFAULT_MINIMAX_VOICE,
|
||||||
|
speed: req.config.minimax?.speed,
|
||||||
|
volume: req.config.minimax?.volume,
|
||||||
|
pitch: req.config.minimax?.pitch,
|
||||||
|
timeoutMs: req.config.timeoutMs,
|
||||||
|
});
|
||||||
|
return {
|
||||||
|
audioBuffer,
|
||||||
|
outputFormat: "mp3",
|
||||||
|
fileExtension: ".mp3",
|
||||||
|
voiceCompatible: req.target === "voice-note",
|
||||||
|
};
|
||||||
|
},
|
||||||
|
synthesizeTelephony: async (req) => {
|
||||||
|
// MiniMax doesn't natively support telephony formats
|
||||||
|
// For Discord voice, we'd need to convert MP3 to PCM/Opus
|
||||||
|
// This is handled by the voice-call extension's audio pipeline
|
||||||
|
const apiKey =
|
||||||
|
req.config.minimax?.apiKey || process.env.MINIMAX_API_KEY;
|
||||||
|
if (!apiKey) {
|
||||||
|
throw new Error("MiniMax API key missing");
|
||||||
|
}
|
||||||
|
const audioBuffer = await minimaxTTS({
|
||||||
|
text: req.text,
|
||||||
|
apiKey,
|
||||||
|
baseUrl: req.config.minimax?.baseUrl,
|
||||||
|
model: req.config.minimax?.model ?? DEFAULT_MINIMAX_MODEL,
|
||||||
|
voiceId: req.config.minimax?.voiceId ?? DEFAULT_MINIMAX_VOICE,
|
||||||
|
speed: req.config.minimax?.speed,
|
||||||
|
volume: req.config.minimax?.volume,
|
||||||
|
pitch: req.config.minimax?.pitch,
|
||||||
|
timeoutMs: req.config.timeoutMs,
|
||||||
|
});
|
||||||
|
return {
|
||||||
|
audioBuffer,
|
||||||
|
outputFormat: "mp3",
|
||||||
|
sampleRate: 24000, // MiniMax default sample rate
|
||||||
|
};
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
@ -130,6 +130,15 @@ export type ResolvedTtsConfig = {
|
|||||||
proxy?: string;
|
proxy?: string;
|
||||||
timeoutMs?: number;
|
timeoutMs?: number;
|
||||||
};
|
};
|
||||||
|
minimax: {
|
||||||
|
apiKey?: string;
|
||||||
|
baseUrl: string;
|
||||||
|
model: string;
|
||||||
|
voiceId: string;
|
||||||
|
speed?: number;
|
||||||
|
volume?: number;
|
||||||
|
pitch?: number;
|
||||||
|
};
|
||||||
prefsPath?: string;
|
prefsPath?: string;
|
||||||
maxTextLength: number;
|
maxTextLength: number;
|
||||||
timeoutMs: number;
|
timeoutMs: number;
|
||||||
@ -337,6 +346,18 @@ export function resolveTtsConfig(cfg: OpenClawConfig): ResolvedTtsConfig {
|
|||||||
proxy: rawMicrosoft.proxy?.trim() || undefined,
|
proxy: rawMicrosoft.proxy?.trim() || undefined,
|
||||||
timeoutMs: rawMicrosoft.timeoutMs,
|
timeoutMs: rawMicrosoft.timeoutMs,
|
||||||
},
|
},
|
||||||
|
minimax: {
|
||||||
|
apiKey: normalizeResolvedSecretInputString({
|
||||||
|
value: raw.minimax?.apiKey,
|
||||||
|
path: "messages.tts.minimax.apiKey",
|
||||||
|
}),
|
||||||
|
baseUrl: (raw.minimax?.baseUrl?.trim() || "https://api.minimaxi.com").replace(/\/+$/, ""),
|
||||||
|
model: raw.minimax?.model || "speech-01-turbo",
|
||||||
|
voiceId: raw.minimax?.voiceId || "female-shaonv",
|
||||||
|
speed: raw.minimax?.speed,
|
||||||
|
volume: raw.minimax?.volume,
|
||||||
|
pitch: raw.minimax?.pitch,
|
||||||
|
},
|
||||||
prefsPath: raw.prefsPath,
|
prefsPath: raw.prefsPath,
|
||||||
maxTextLength: raw.maxTextLength ?? DEFAULT_MAX_TEXT_LENGTH,
|
maxTextLength: raw.maxTextLength ?? DEFAULT_MAX_TEXT_LENGTH,
|
||||||
timeoutMs: raw.timeoutMs ?? DEFAULT_TIMEOUT_MS,
|
timeoutMs: raw.timeoutMs ?? DEFAULT_TIMEOUT_MS,
|
||||||
@ -476,6 +497,9 @@ export function getTtsProvider(config: ResolvedTtsConfig, prefsPath: string): Tt
|
|||||||
if (resolveTtsApiKey(config, "elevenlabs")) {
|
if (resolveTtsApiKey(config, "elevenlabs")) {
|
||||||
return "elevenlabs";
|
return "elevenlabs";
|
||||||
}
|
}
|
||||||
|
if (resolveTtsApiKey(config, "minimax")) {
|
||||||
|
return "minimax";
|
||||||
|
}
|
||||||
return "microsoft";
|
return "microsoft";
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -544,10 +568,13 @@ export function resolveTtsApiKey(
|
|||||||
if (normalizedProvider === "openai") {
|
if (normalizedProvider === "openai") {
|
||||||
return config.openai.apiKey || process.env.OPENAI_API_KEY;
|
return config.openai.apiKey || process.env.OPENAI_API_KEY;
|
||||||
}
|
}
|
||||||
|
if (normalizedProvider === "minimax") {
|
||||||
|
return config.minimax.apiKey || process.env.MINIMAX_API_KEY;
|
||||||
|
}
|
||||||
return undefined;
|
return undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
export const TTS_PROVIDERS = ["openai", "elevenlabs", "microsoft"] as const;
|
export const TTS_PROVIDERS = ["openai", "elevenlabs", "microsoft", "minimax"] as const;
|
||||||
|
|
||||||
export function resolveTtsProviderOrder(primary: TtsProvider, cfg?: OpenClawConfig): TtsProvider[] {
|
export function resolveTtsProviderOrder(primary: TtsProvider, cfg?: OpenClawConfig): TtsProvider[] {
|
||||||
const normalizedPrimary = normalizeSpeechProviderId(primary) ?? primary;
|
const normalizedPrimary = normalizeSpeechProviderId(primary) ?? primary;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user