Merge def16119667670b39037cea088ec82feb774153c into 598f1826d8b2bc969aace2c6459824737667218c
This commit is contained in:
commit
c38e2aaea1
@ -93,6 +93,16 @@ export type TtsConfig = {
|
||||
proxy?: string;
|
||||
timeoutMs?: number;
|
||||
};
|
||||
/** MiniMax speech configuration. */
|
||||
minimax?: {
|
||||
apiKey?: SecretInput;
|
||||
baseUrl?: string;
|
||||
model?: string;
|
||||
voiceId?: string;
|
||||
speed?: number;
|
||||
volume?: number;
|
||||
pitch?: number;
|
||||
};
|
||||
/** Optional path for local TTS user preferences JSON. */
|
||||
prefsPath?: string;
|
||||
/** Hard cap for text sent to TTS (chars). */
|
||||
|
||||
@ -392,6 +392,18 @@ const TtsMicrosoftConfigSchema = z
|
||||
})
|
||||
.strict()
|
||||
.optional();
|
||||
const TtsMiniMaxConfigSchema = z
|
||||
.object({
|
||||
apiKey: SecretInputSchema.optional().register(sensitive),
|
||||
baseUrl: z.string().optional(),
|
||||
model: z.string().optional(),
|
||||
voiceId: z.string().optional(),
|
||||
speed: z.number().min(0.5).max(2).optional(),
|
||||
volume: z.number().min(0).max(2).optional(),
|
||||
pitch: z.number().min(-24).max(24).optional(),
|
||||
})
|
||||
.strict()
|
||||
.optional();
|
||||
export const TtsConfigSchema = z
|
||||
.object({
|
||||
auto: TtsAutoSchema.optional(),
|
||||
@ -447,6 +459,7 @@ export const TtsConfigSchema = z
|
||||
.optional(),
|
||||
edge: TtsMicrosoftConfigSchema,
|
||||
microsoft: TtsMicrosoftConfigSchema,
|
||||
minimax: TtsMiniMaxConfigSchema,
|
||||
prefsPath: z.string().optional(),
|
||||
maxTextLength: z.number().int().min(1).optional(),
|
||||
timeoutMs: z.number().int().min(1000).max(120000).optional(),
|
||||
|
||||
@ -5,12 +5,14 @@ import type { SpeechProviderPlugin } from "../plugins/types.js";
|
||||
import type { SpeechProviderId } from "./provider-types.js";
|
||||
import { buildElevenLabsSpeechProvider } from "./providers/elevenlabs.js";
|
||||
import { buildMicrosoftSpeechProvider } from "./providers/microsoft.js";
|
||||
import { buildMiniMaxSpeechProvider } from "./providers/minimax.js";
|
||||
import { buildOpenAISpeechProvider } from "./providers/openai.js";
|
||||
|
||||
const BUILTIN_SPEECH_PROVIDER_BUILDERS = [
|
||||
buildOpenAISpeechProvider,
|
||||
buildElevenLabsSpeechProvider,
|
||||
buildMicrosoftSpeechProvider,
|
||||
buildMiniMaxSpeechProvider,
|
||||
] as const satisfies readonly (() => SpeechProviderPlugin)[];
|
||||
|
||||
function trimToUndefined(value: string | undefined): string | undefined {
|
||||
|
||||
157
src/tts/providers/minimax.ts
Normal file
157
src/tts/providers/minimax.ts
Normal file
@ -0,0 +1,157 @@
|
||||
import type { SpeechProviderPlugin } from "../../plugins/types.js";
|
||||
import type { SpeechVoiceOption } from "../provider-types.js";
|
||||
|
||||
const MINIMAX_TTS_MODELS = [
|
||||
"speech-01-turbo",
|
||||
"speech-01-hd",
|
||||
"speech-02-hd",
|
||||
"speech-02",
|
||||
] as const;
|
||||
|
||||
// Popular MiniMax voice IDs
|
||||
const MINIMAX_VOICE_IDS = [
|
||||
"female-shaonv",
|
||||
"male-baijia",
|
||||
"male-yunyang",
|
||||
"female-tianmei",
|
||||
"male-john",
|
||||
"female-emma",
|
||||
] as const;
|
||||
|
||||
const DEFAULT_MINIMAX_BASE_URL = "https://api.minimaxi.com";
|
||||
const DEFAULT_MINIMAX_MODEL = "speech-01-turbo";
|
||||
const DEFAULT_MINIMAX_VOICE = "female-shaonv";
|
||||
|
||||
function normalizeMiniMaxBaseUrl(baseUrl: string | undefined): string {
|
||||
const trimmed = baseUrl?.trim();
|
||||
return trimmed?.replace(/\/+$/, "") || DEFAULT_MINIMAX_BASE_URL;
|
||||
}
|
||||
|
||||
export async function minimaxTTS(params: {
|
||||
text: string;
|
||||
apiKey: string;
|
||||
baseUrl?: string;
|
||||
model?: string;
|
||||
voiceId?: string;
|
||||
speed?: number;
|
||||
volume?: number;
|
||||
pitch?: number;
|
||||
timeoutMs?: number;
|
||||
}): Promise<Buffer> {
|
||||
const {
|
||||
text,
|
||||
apiKey,
|
||||
baseUrl,
|
||||
model = DEFAULT_MINIMAX_MODEL,
|
||||
voiceId = DEFAULT_MINIMAX_VOICE,
|
||||
speed = 1.0,
|
||||
volume = 1.0,
|
||||
pitch = 0,
|
||||
timeoutMs = 30_000,
|
||||
} = params;
|
||||
|
||||
const controller = new AbortController();
|
||||
const timeout = setTimeout(() => controller.abort(), timeoutMs);
|
||||
|
||||
try {
|
||||
const response = await fetch(`${normalizeMiniMaxBaseUrl(baseUrl)}/v1/t2a_v2`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
Authorization: `Bearer ${apiKey}`,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model,
|
||||
text,
|
||||
voice_setting: {
|
||||
voice_id: voiceId,
|
||||
speed: Math.round(speed * 100) / 100,
|
||||
vol: Math.round(volume * 100) / 100,
|
||||
pitch,
|
||||
},
|
||||
}),
|
||||
signal: controller.signal,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await response.text().catch(() => "Unknown error");
|
||||
throw new Error(`MiniMax TTS API error (${response.status}): ${error}`);
|
||||
}
|
||||
|
||||
return Buffer.from(await response.arrayBuffer());
|
||||
} finally {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
}
|
||||
|
||||
export async function listMiniMaxVoices(): Promise<SpeechVoiceOption[]> {
|
||||
// MiniMax doesn't have a public list voices API, so we return common voices
|
||||
// Users can use custom voice IDs from their MiniMax dashboard
|
||||
return MINIMAX_VOICE_IDS.map((voiceId) => ({
|
||||
id: voiceId,
|
||||
name: voiceId.replace(/-/g, " ").replace(/\b\w/g, (c) => c.toUpperCase()),
|
||||
}));
|
||||
}
|
||||
|
||||
export function buildMiniMaxSpeechProvider(): SpeechProviderPlugin {
|
||||
return {
|
||||
id: "minimax",
|
||||
label: "MiniMax",
|
||||
models: MINIMAX_TTS_MODELS,
|
||||
listVoices: async (_req) => {
|
||||
return listMiniMaxVoices();
|
||||
},
|
||||
isConfigured: ({ config }) =>
|
||||
Boolean(config.minimax?.apiKey || process.env.MINIMAX_API_KEY),
|
||||
synthesize: async (req) => {
|
||||
const apiKey =
|
||||
req.config.minimax?.apiKey || process.env.MINIMAX_API_KEY;
|
||||
if (!apiKey) {
|
||||
throw new Error("MiniMax API key missing");
|
||||
}
|
||||
const audioBuffer = await minimaxTTS({
|
||||
text: req.text,
|
||||
apiKey,
|
||||
baseUrl: req.config.minimax?.baseUrl,
|
||||
model: req.config.minimax?.model ?? DEFAULT_MINIMAX_MODEL,
|
||||
voiceId: req.config.minimax?.voiceId ?? DEFAULT_MINIMAX_VOICE,
|
||||
speed: req.config.minimax?.speed,
|
||||
volume: req.config.minimax?.volume,
|
||||
pitch: req.config.minimax?.pitch,
|
||||
timeoutMs: req.config.timeoutMs,
|
||||
});
|
||||
return {
|
||||
audioBuffer,
|
||||
outputFormat: "mp3",
|
||||
fileExtension: ".mp3",
|
||||
voiceCompatible: req.target === "voice-note",
|
||||
};
|
||||
},
|
||||
synthesizeTelephony: async (req) => {
|
||||
// MiniMax doesn't natively support telephony formats
|
||||
// For Discord voice, we'd need to convert MP3 to PCM/Opus
|
||||
// This is handled by the voice-call extension's audio pipeline
|
||||
const apiKey =
|
||||
req.config.minimax?.apiKey || process.env.MINIMAX_API_KEY;
|
||||
if (!apiKey) {
|
||||
throw new Error("MiniMax API key missing");
|
||||
}
|
||||
const audioBuffer = await minimaxTTS({
|
||||
text: req.text,
|
||||
apiKey,
|
||||
baseUrl: req.config.minimax?.baseUrl,
|
||||
model: req.config.minimax?.model ?? DEFAULT_MINIMAX_MODEL,
|
||||
voiceId: req.config.minimax?.voiceId ?? DEFAULT_MINIMAX_VOICE,
|
||||
speed: req.config.minimax?.speed,
|
||||
volume: req.config.minimax?.volume,
|
||||
pitch: req.config.minimax?.pitch,
|
||||
timeoutMs: req.config.timeoutMs,
|
||||
});
|
||||
return {
|
||||
audioBuffer,
|
||||
outputFormat: "mp3",
|
||||
sampleRate: 24000, // MiniMax default sample rate
|
||||
};
|
||||
},
|
||||
};
|
||||
}
|
||||
@ -130,6 +130,15 @@ export type ResolvedTtsConfig = {
|
||||
proxy?: string;
|
||||
timeoutMs?: number;
|
||||
};
|
||||
minimax: {
|
||||
apiKey?: string;
|
||||
baseUrl: string;
|
||||
model: string;
|
||||
voiceId: string;
|
||||
speed?: number;
|
||||
volume?: number;
|
||||
pitch?: number;
|
||||
};
|
||||
prefsPath?: string;
|
||||
maxTextLength: number;
|
||||
timeoutMs: number;
|
||||
@ -337,6 +346,18 @@ export function resolveTtsConfig(cfg: OpenClawConfig): ResolvedTtsConfig {
|
||||
proxy: rawMicrosoft.proxy?.trim() || undefined,
|
||||
timeoutMs: rawMicrosoft.timeoutMs,
|
||||
},
|
||||
minimax: {
|
||||
apiKey: normalizeResolvedSecretInputString({
|
||||
value: raw.minimax?.apiKey,
|
||||
path: "messages.tts.minimax.apiKey",
|
||||
}),
|
||||
baseUrl: (raw.minimax?.baseUrl?.trim() || "https://api.minimaxi.com").replace(/\/+$/, ""),
|
||||
model: raw.minimax?.model || "speech-01-turbo",
|
||||
voiceId: raw.minimax?.voiceId || "female-shaonv",
|
||||
speed: raw.minimax?.speed,
|
||||
volume: raw.minimax?.volume,
|
||||
pitch: raw.minimax?.pitch,
|
||||
},
|
||||
prefsPath: raw.prefsPath,
|
||||
maxTextLength: raw.maxTextLength ?? DEFAULT_MAX_TEXT_LENGTH,
|
||||
timeoutMs: raw.timeoutMs ?? DEFAULT_TIMEOUT_MS,
|
||||
@ -476,6 +497,9 @@ export function getTtsProvider(config: ResolvedTtsConfig, prefsPath: string): Tt
|
||||
if (resolveTtsApiKey(config, "elevenlabs")) {
|
||||
return "elevenlabs";
|
||||
}
|
||||
if (resolveTtsApiKey(config, "minimax")) {
|
||||
return "minimax";
|
||||
}
|
||||
return "microsoft";
|
||||
}
|
||||
|
||||
@ -544,10 +568,13 @@ export function resolveTtsApiKey(
|
||||
if (normalizedProvider === "openai") {
|
||||
return config.openai.apiKey || process.env.OPENAI_API_KEY;
|
||||
}
|
||||
if (normalizedProvider === "minimax") {
|
||||
return config.minimax.apiKey || process.env.MINIMAX_API_KEY;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
export const TTS_PROVIDERS = ["openai", "elevenlabs", "microsoft"] as const;
|
||||
export const TTS_PROVIDERS = ["openai", "elevenlabs", "microsoft", "minimax"] as const;
|
||||
|
||||
export function resolveTtsProviderOrder(primary: TtsProvider, cfg?: OpenClawConfig): TtsProvider[] {
|
||||
const normalizedPrimary = normalizeSpeechProviderId(primary) ?? primary;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user