Merge def16119667670b39037cea088ec82feb774153c into 598f1826d8b2bc969aace2c6459824737667218c

This commit is contained in:
LongZhou 2026-03-20 21:26:51 -07:00 committed by GitHub
commit c38e2aaea1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 210 additions and 1 deletions

View File

@ -93,6 +93,16 @@ export type TtsConfig = {
proxy?: string;
timeoutMs?: number;
};
/** MiniMax speech configuration. */
minimax?: {
apiKey?: SecretInput;
baseUrl?: string;
model?: string;
voiceId?: string;
speed?: number;
volume?: number;
pitch?: number;
};
/** Optional path for local TTS user preferences JSON. */
prefsPath?: string;
/** Hard cap for text sent to TTS (chars). */

View File

@ -392,6 +392,18 @@ const TtsMicrosoftConfigSchema = z
})
.strict()
.optional();
const TtsMiniMaxConfigSchema = z
.object({
apiKey: SecretInputSchema.optional().register(sensitive),
baseUrl: z.string().optional(),
model: z.string().optional(),
voiceId: z.string().optional(),
speed: z.number().min(0.5).max(2).optional(),
volume: z.number().min(0).max(2).optional(),
pitch: z.number().min(-24).max(24).optional(),
})
.strict()
.optional();
export const TtsConfigSchema = z
.object({
auto: TtsAutoSchema.optional(),
@ -447,6 +459,7 @@ export const TtsConfigSchema = z
.optional(),
edge: TtsMicrosoftConfigSchema,
microsoft: TtsMicrosoftConfigSchema,
minimax: TtsMiniMaxConfigSchema,
prefsPath: z.string().optional(),
maxTextLength: z.number().int().min(1).optional(),
timeoutMs: z.number().int().min(1000).max(120000).optional(),

View File

@ -5,12 +5,14 @@ import type { SpeechProviderPlugin } from "../plugins/types.js";
import type { SpeechProviderId } from "./provider-types.js";
import { buildElevenLabsSpeechProvider } from "./providers/elevenlabs.js";
import { buildMicrosoftSpeechProvider } from "./providers/microsoft.js";
import { buildMiniMaxSpeechProvider } from "./providers/minimax.js";
import { buildOpenAISpeechProvider } from "./providers/openai.js";
const BUILTIN_SPEECH_PROVIDER_BUILDERS = [
buildOpenAISpeechProvider,
buildElevenLabsSpeechProvider,
buildMicrosoftSpeechProvider,
buildMiniMaxSpeechProvider,
] as const satisfies readonly (() => SpeechProviderPlugin)[];
function trimToUndefined(value: string | undefined): string | undefined {

View File

@ -0,0 +1,157 @@
import type { SpeechProviderPlugin } from "../../plugins/types.js";
import type { SpeechVoiceOption } from "../provider-types.js";
const MINIMAX_TTS_MODELS = [
"speech-01-turbo",
"speech-01-hd",
"speech-02-hd",
"speech-02",
] as const;
// Popular MiniMax voice IDs
const MINIMAX_VOICE_IDS = [
"female-shaonv",
"male-baijia",
"male-yunyang",
"female-tianmei",
"male-john",
"female-emma",
] as const;
const DEFAULT_MINIMAX_BASE_URL = "https://api.minimaxi.com";
const DEFAULT_MINIMAX_MODEL = "speech-01-turbo";
const DEFAULT_MINIMAX_VOICE = "female-shaonv";
function normalizeMiniMaxBaseUrl(baseUrl: string | undefined): string {
const trimmed = baseUrl?.trim();
return trimmed?.replace(/\/+$/, "") || DEFAULT_MINIMAX_BASE_URL;
}
export async function minimaxTTS(params: {
text: string;
apiKey: string;
baseUrl?: string;
model?: string;
voiceId?: string;
speed?: number;
volume?: number;
pitch?: number;
timeoutMs?: number;
}): Promise<Buffer> {
const {
text,
apiKey,
baseUrl,
model = DEFAULT_MINIMAX_MODEL,
voiceId = DEFAULT_MINIMAX_VOICE,
speed = 1.0,
volume = 1.0,
pitch = 0,
timeoutMs = 30_000,
} = params;
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), timeoutMs);
try {
const response = await fetch(`${normalizeMiniMaxBaseUrl(baseUrl)}/v1/t2a_v2`, {
method: "POST",
headers: {
Authorization: `Bearer ${apiKey}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
model,
text,
voice_setting: {
voice_id: voiceId,
speed: Math.round(speed * 100) / 100,
vol: Math.round(volume * 100) / 100,
pitch,
},
}),
signal: controller.signal,
});
if (!response.ok) {
const error = await response.text().catch(() => "Unknown error");
throw new Error(`MiniMax TTS API error (${response.status}): ${error}`);
}
return Buffer.from(await response.arrayBuffer());
} finally {
clearTimeout(timeout);
}
}
export async function listMiniMaxVoices(): Promise<SpeechVoiceOption[]> {
// MiniMax doesn't have a public list voices API, so we return common voices
// Users can use custom voice IDs from their MiniMax dashboard
return MINIMAX_VOICE_IDS.map((voiceId) => ({
id: voiceId,
name: voiceId.replace(/-/g, " ").replace(/\b\w/g, (c) => c.toUpperCase()),
}));
}
export function buildMiniMaxSpeechProvider(): SpeechProviderPlugin {
return {
id: "minimax",
label: "MiniMax",
models: MINIMAX_TTS_MODELS,
listVoices: async (_req) => {
return listMiniMaxVoices();
},
isConfigured: ({ config }) =>
Boolean(config.minimax?.apiKey || process.env.MINIMAX_API_KEY),
synthesize: async (req) => {
const apiKey =
req.config.minimax?.apiKey || process.env.MINIMAX_API_KEY;
if (!apiKey) {
throw new Error("MiniMax API key missing");
}
const audioBuffer = await minimaxTTS({
text: req.text,
apiKey,
baseUrl: req.config.minimax?.baseUrl,
model: req.config.minimax?.model ?? DEFAULT_MINIMAX_MODEL,
voiceId: req.config.minimax?.voiceId ?? DEFAULT_MINIMAX_VOICE,
speed: req.config.minimax?.speed,
volume: req.config.minimax?.volume,
pitch: req.config.minimax?.pitch,
timeoutMs: req.config.timeoutMs,
});
return {
audioBuffer,
outputFormat: "mp3",
fileExtension: ".mp3",
voiceCompatible: req.target === "voice-note",
};
},
synthesizeTelephony: async (req) => {
// MiniMax doesn't natively support telephony formats
// For Discord voice, we'd need to convert MP3 to PCM/Opus
// This is handled by the voice-call extension's audio pipeline
const apiKey =
req.config.minimax?.apiKey || process.env.MINIMAX_API_KEY;
if (!apiKey) {
throw new Error("MiniMax API key missing");
}
const audioBuffer = await minimaxTTS({
text: req.text,
apiKey,
baseUrl: req.config.minimax?.baseUrl,
model: req.config.minimax?.model ?? DEFAULT_MINIMAX_MODEL,
voiceId: req.config.minimax?.voiceId ?? DEFAULT_MINIMAX_VOICE,
speed: req.config.minimax?.speed,
volume: req.config.minimax?.volume,
pitch: req.config.minimax?.pitch,
timeoutMs: req.config.timeoutMs,
});
return {
audioBuffer,
outputFormat: "mp3",
sampleRate: 24000, // MiniMax default sample rate
};
},
};
}

View File

@ -130,6 +130,15 @@ export type ResolvedTtsConfig = {
proxy?: string;
timeoutMs?: number;
};
minimax: {
apiKey?: string;
baseUrl: string;
model: string;
voiceId: string;
speed?: number;
volume?: number;
pitch?: number;
};
prefsPath?: string;
maxTextLength: number;
timeoutMs: number;
@ -337,6 +346,18 @@ export function resolveTtsConfig(cfg: OpenClawConfig): ResolvedTtsConfig {
proxy: rawMicrosoft.proxy?.trim() || undefined,
timeoutMs: rawMicrosoft.timeoutMs,
},
minimax: {
apiKey: normalizeResolvedSecretInputString({
value: raw.minimax?.apiKey,
path: "messages.tts.minimax.apiKey",
}),
baseUrl: (raw.minimax?.baseUrl?.trim() || "https://api.minimaxi.com").replace(/\/+$/, ""),
model: raw.minimax?.model || "speech-01-turbo",
voiceId: raw.minimax?.voiceId || "female-shaonv",
speed: raw.minimax?.speed,
volume: raw.minimax?.volume,
pitch: raw.minimax?.pitch,
},
prefsPath: raw.prefsPath,
maxTextLength: raw.maxTextLength ?? DEFAULT_MAX_TEXT_LENGTH,
timeoutMs: raw.timeoutMs ?? DEFAULT_TIMEOUT_MS,
@ -476,6 +497,9 @@ export function getTtsProvider(config: ResolvedTtsConfig, prefsPath: string): Tt
if (resolveTtsApiKey(config, "elevenlabs")) {
return "elevenlabs";
}
if (resolveTtsApiKey(config, "minimax")) {
return "minimax";
}
return "microsoft";
}
@ -544,10 +568,13 @@ export function resolveTtsApiKey(
if (normalizedProvider === "openai") {
return config.openai.apiKey || process.env.OPENAI_API_KEY;
}
if (normalizedProvider === "minimax") {
return config.minimax.apiKey || process.env.MINIMAX_API_KEY;
}
return undefined;
}
export const TTS_PROVIDERS = ["openai", "elevenlabs", "microsoft"] as const;
export const TTS_PROVIDERS = ["openai", "elevenlabs", "microsoft", "minimax"] as const;
export function resolveTtsProviderOrder(primary: TtsProvider, cfg?: OpenClawConfig): TtsProvider[] {
const normalizedPrimary = normalizeSpeechProviderId(primary) ?? primary;