From 5041507da770d4ca6a454586a9d339a96fab88a6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hanabi=20=28=E8=8A=B1=E7=81=AB=29?=
 <hanabi_openclaw@example.com>
Date: Wed, 18 Mar 2026 23:25:31 +0800
Subject: [PATCH 1/2] feat(tts): add MiniMax TTS provider support

Add MiniMax as a new TTS provider option for OpenClaw voice synthesis.

Features:
- Support for MiniMax Speech API (speech-01-turbo, speech-01-hd, speech-02, speech-02-hd)
- Configurable voice_id, speed, volume, and pitch parameters
- Support for both audio-file and voice-note synthesis targets
- Telephony synthesis support (for Discord voice channel integration)
- Environment variable support via MINIMAX_API_KEY

Configuration example:

This enables Discord voice channel users to use MiniMax's high-quality
Chinese TTS voices for text-to-speech playback.
---
 src/config/types.tts.ts       |  10 ++
 src/config/zod-schema.core.ts |  13 +++
 src/tts/provider-registry.ts  |   2 +
 src/tts/providers/minimax.ts  | 170 ++++++++++++++++++++++++++++++++++
 src/tts/tts.ts                |  26 +++++-
 5 files changed, 220 insertions(+), 1 deletion(-)
 create mode 100644 src/tts/providers/minimax.ts

diff --git a/src/config/types.tts.ts b/src/config/types.tts.ts
index 4703f43ae12..448d1871111 100644
--- a/src/config/types.tts.ts
+++ b/src/config/types.tts.ts
@@ -93,6 +93,16 @@ export type TtsConfig = {
     proxy?: string;
     timeoutMs?: number;
   };
+  /** MiniMax speech configuration. */
+  minimax?: {
+    apiKey?: SecretInput;
+    baseUrl?: string;
+    model?: string;
+    voiceId?: string;
+    speed?: number;
+    volume?: number;
+    pitch?: number;
+  };
   /** Optional path for local TTS user preferences JSON. */
   prefsPath?: string;
   /** Hard cap for text sent to TTS (chars). */
diff --git a/src/config/zod-schema.core.ts b/src/config/zod-schema.core.ts
index 22c589c8490..fe38f2840c1 100644
--- a/src/config/zod-schema.core.ts
+++ b/src/config/zod-schema.core.ts
@@ -392,6 +392,18 @@ const TtsMicrosoftConfigSchema = z
   })
   .strict()
   .optional();
+const TtsMiniMaxConfigSchema = z
+  .object({
+    apiKey: SecretInputSchema.optional().register(sensitive),
+    baseUrl: z.string().optional(),
+    model: z.string().optional(),
+    voiceId: z.string().optional(),
+    speed: z.number().min(0.5).max(2).optional(),
+    volume: z.number().min(0).max(2).optional(),
+    pitch: z.number().min(-24).max(24).optional(),
+  })
+  .strict()
+  .optional();
 export const TtsConfigSchema = z
   .object({
     auto: TtsAutoSchema.optional(),
@@ -447,6 +459,7 @@ export const TtsConfigSchema = z
       .optional(),
     edge: TtsMicrosoftConfigSchema,
     microsoft: TtsMicrosoftConfigSchema,
+    minimax: TtsMiniMaxConfigSchema,
     prefsPath: z.string().optional(),
     maxTextLength: z.number().int().min(1).optional(),
     timeoutMs: z.number().int().min(1000).max(120000).optional(),
diff --git a/src/tts/provider-registry.ts b/src/tts/provider-registry.ts
index d1462880a99..9285b8da136 100644
--- a/src/tts/provider-registry.ts
+++ b/src/tts/provider-registry.ts
@@ -5,12 +5,14 @@ import type { SpeechProviderPlugin } from "../plugins/types.js";
 import type { SpeechProviderId } from "./provider-types.js";
 import { buildElevenLabsSpeechProvider } from "./providers/elevenlabs.js";
 import { buildMicrosoftSpeechProvider } from "./providers/microsoft.js";
+import { buildMiniMaxSpeechProvider } from "./providers/minimax.js";
 import { buildOpenAISpeechProvider } from "./providers/openai.js";
 
 const BUILTIN_SPEECH_PROVIDER_BUILDERS = [
   buildOpenAISpeechProvider,
   buildElevenLabsSpeechProvider,
   buildMicrosoftSpeechProvider,
+  buildMiniMaxSpeechProvider,
 ] as const satisfies readonly (() => SpeechProviderPlugin)[];
 
 function trimToUndefined(value: string | undefined): string | undefined {
diff --git a/src/tts/providers/minimax.ts b/src/tts/providers/minimax.ts
new file mode 100644
index 00000000000..658056eb25c
--- /dev/null
+++ b/src/tts/providers/minimax.ts
@@ -0,0 +1,170 @@
+import type { SpeechProviderPlugin } from "../../plugins/types.js";
+import type { SpeechVoiceOption } from "../provider-types.js";
+
+const MINIMAX_TTS_MODELS = [
+  "speech-01-turbo",
+  "speech-01-hd",
+  "speech-02-hd",
+  "speech-02",
+] as const;
+
+// Popular MiniMax voice IDs
+const MINIMAX_VOICE_IDS = [
+  "female-shaonv",
+  "male-baijia",
+  "male-yunyang",
+  "female-tianmei",
+  "male-john",
+  "female-emma",
+] as const;
+
+const DEFAULT_MINIMAX_BASE_URL = "https://api.minimaxi.com";
+const DEFAULT_MINIMAX_MODEL = "speech-01-turbo";
+const DEFAULT_MINIMAX_VOICE = "female-shaonv";
+
+function normalizeMiniMaxBaseUrl(baseUrl: string | undefined): string {
+  const trimmed = baseUrl?.trim();
+  return trimmed?.replace(/\/+$/, "") || DEFAULT_MINIMAX_BASE_URL;
+}
+
+export async function minimaxTTS(params: {
+  text: string;
+  apiKey: string;
+  baseUrl?: string;
+  model?: string;
+  voiceId?: string;
+  speed?: number;
+  volume?: number;
+  pitch?: number;
+  timeoutMs?: number;
+}): Promise<Buffer> {
+  const {
+    text,
+    apiKey,
+    baseUrl,
+    model = DEFAULT_MINIMAX_MODEL,
+    voiceId = DEFAULT_MINIMAX_VOICE,
+    speed = 1.0,
+    volume = 1.0,
+    pitch = 0,
+    timeoutMs = 30_000,
+  } = params;
+
+  const controller = new AbortController();
+  const timeout = setTimeout(() => controller.abort(), timeoutMs);
+
+  try {
+    const response = await fetch(`${normalizeMiniMaxBaseUrl(baseUrl)}/v1/t2a_v2`, {
+      method: "POST",
+      headers: {
+        Authorization: `Bearer ${apiKey}`,
+        "Content-Type": "application/json",
+      },
+      body: JSON.stringify({
+        model,
+        text,
+        voice_setting: {
+          voice_id: voiceId,
+          speed: Math.round(speed * 100) / 100,
+          vol: Math.round(volume * 100) / 100,
+          pitch,
+        },
+      }),
+      signal: controller.signal,
+    });
+
+    if (!response.ok) {
+      const error = await response.text().catch(() => "Unknown error");
+      throw new Error(`MiniMax TTS API error (${response.status}): ${error}`);
+    }
+
+    return Buffer.from(await response.arrayBuffer());
+  } finally {
+    clearTimeout(timeout);
+  }
+}
+
+export async function listMiniMaxVoices(params: {
+  apiKey: string;
+  baseUrl?: string;
+}): Promise<SpeechVoiceOption[]> {
+  // MiniMax doesn't have a public list voices API, so we return common voices
+  // Users can use custom voice IDs from their MiniMax dashboard
+  return MINIMAX_VOICE_IDS.map((voiceId) => ({
+    id: voiceId,
+    name: voiceId.replace(/-/g, " ").replace(/\b\w/g, (c) => c.toUpperCase()),
+  }));
+}
+
+export function buildMiniMaxSpeechProvider(): SpeechProviderPlugin {
+  return {
+    id: "minimax",
+    label: "MiniMax",
+    models: MINIMAX_TTS_MODELS,
+    listVoices: async (req) => {
+      const apiKey =
+        req.apiKey ||
+        req.config?.minimax.apiKey ||
+        process.env.MINIMAX_API_KEY;
+      if (!apiKey) {
+        throw new Error("MiniMax API key missing");
+      }
+      return listMiniMaxVoices({
+        apiKey,
+        baseUrl: req.baseUrl ?? req.config?.minimax.baseUrl,
+      });
+    },
+    isConfigured: ({ config }) =>
+      Boolean(config.minimax?.apiKey || process.env.MINIMAX_API_KEY),
+    synthesize: async (req) => {
+      const apiKey =
+        req.config.minimax?.apiKey || process.env.MINIMAX_API_KEY;
+      if (!apiKey) {
+        throw new Error("MiniMax API key missing");
+      }
+      const audioBuffer = await minimaxTTS({
+        text: req.text,
+        apiKey,
+        baseUrl: req.config.minimax?.baseUrl,
+        model: req.config.minimax?.model ?? DEFAULT_MINIMAX_MODEL,
+        voiceId: req.config.minimax?.voiceId ?? DEFAULT_MINIMAX_VOICE,
+        speed: req.config.minimax?.speed,
+        volume: req.config.minimax?.volume,
+        pitch: req.config.minimax?.pitch,
+        timeoutMs: req.config.timeoutMs,
+      });
+      return {
+        audioBuffer,
+        outputFormat: "mp3",
+        fileExtension: ".mp3",
+        voiceCompatible: req.target === "voice-note",
+      };
+    },
+    synthesizeTelephony: async (req) => {
+      // MiniMax doesn't natively support telephony formats
+      // For Discord voice, we'd need to convert MP3 to PCM/Opus
+      // This is handled by the voice-call extension's audio pipeline
+      const apiKey =
+        req.config.minimax?.apiKey || process.env.MINIMAX_API_KEY;
+      if (!apiKey) {
+        throw new Error("MiniMax API key missing");
+      }
+      const audioBuffer = await minimaxTTS({
+        text: req.text,
+        apiKey,
+        baseUrl: req.config.minimax?.baseUrl,
+        model: req.config.minimax?.model ?? DEFAULT_MINIMAX_MODEL,
+        voiceId: req.config.minimax?.voiceId ?? DEFAULT_MINIMAX_VOICE,
+        speed: req.config.minimax?.speed,
+        volume: req.config.minimax?.volume,
+        pitch: req.config.minimax?.pitch,
+        timeoutMs: req.config.timeoutMs,
+      });
+      return {
+        audioBuffer,
+        outputFormat: "mp3",
+        sampleRate: 24000, // MiniMax default sample rate
+      };
+    },
+  };
+}
diff --git a/src/tts/tts.ts b/src/tts/tts.ts
index 7d48dfb8e07..7a6a8e5d976 100644
--- a/src/tts/tts.ts
+++ b/src/tts/tts.ts
@@ -129,6 +129,15 @@ export type ResolvedTtsConfig = {
     proxy?: string;
     timeoutMs?: number;
   };
+  minimax: {
+    apiKey?: string;
+    baseUrl: string;
+    model: string;
+    voiceId: string;
+    speed?: number;
+    volume?: number;
+    pitch?: number;
+  };
   prefsPath?: string;
   maxTextLength: number;
   timeoutMs: number;
@@ -319,6 +328,18 @@ export function resolveTtsConfig(cfg: OpenClawConfig): ResolvedTtsConfig {
       proxy: rawMicrosoft.proxy?.trim() || undefined,
       timeoutMs: rawMicrosoft.timeoutMs,
     },
+    minimax: {
+      apiKey: normalizeResolvedSecretInputString({
+        value: raw.minimax?.apiKey,
+        path: "messages.tts.minimax.apiKey",
+      }),
+      baseUrl: (raw.minimax?.baseUrl?.trim() || "https://api.minimaxi.com").replace(/\/+$/, ""),
+      model: raw.minimax?.model || "speech-01-turbo",
+      voiceId: raw.minimax?.voiceId || "female-shaonv",
+      speed: raw.minimax?.speed,
+      volume: raw.minimax?.volume,
+      pitch: raw.minimax?.pitch,
+    },
     prefsPath: raw.prefsPath,
     maxTextLength: raw.maxTextLength ?? DEFAULT_MAX_TEXT_LENGTH,
     timeoutMs: raw.timeoutMs ?? DEFAULT_TIMEOUT_MS,
@@ -526,10 +547,13 @@ export function resolveTtsApiKey(
   if (normalizedProvider === "openai") {
     return config.openai.apiKey || process.env.OPENAI_API_KEY;
   }
+  if (normalizedProvider === "minimax") {
+    return config.minimax.apiKey || process.env.MINIMAX_API_KEY;
+  }
   return undefined;
 }
 
-export const TTS_PROVIDERS = ["openai", "elevenlabs", "microsoft"] as const;
+export const TTS_PROVIDERS = ["openai", "elevenlabs", "microsoft", "minimax"] as const;
 
 export function resolveTtsProviderOrder(primary: TtsProvider, cfg?: OpenClawConfig): TtsProvider[] {
   const normalizedPrimary = normalizeSpeechProviderId(primary) ?? primary;

From def16119667670b39037cea088ec82feb774153c Mon Sep 17 00:00:00 2001
From: ViccRondo <pancnlz@hotmail.com>
Date: Thu, 19 Mar 2026 18:48:53 +0800
Subject: [PATCH 2/2] fix(tts): add MiniMax to auto-detection chain and fix
 listVoices

- Add MiniMax to getTtsProvider() auto-detection, after elevenlabs
- Make listMiniMaxVoices params optional (no API call needed for static voice list)
- Remove unnecessary API key check from listVoices callback

Fixes PR feedback from greptile-apps and Codex reviewers
---
 src/tts/providers/minimax.ts | 19 +++----------------
 src/tts/tts.ts               |  3 +++
 2 files changed, 6 insertions(+), 16 deletions(-)

diff --git a/src/tts/providers/minimax.ts b/src/tts/providers/minimax.ts
index 658056eb25c..948acca6b93 100644
--- a/src/tts/providers/minimax.ts
+++ b/src/tts/providers/minimax.ts
@@ -84,10 +84,7 @@ export async function minimaxTTS(params: {
   }
 }
 
-export async function listMiniMaxVoices(params: {
-  apiKey: string;
-  baseUrl?: string;
-}): Promise<SpeechVoiceOption[]> {
+export async function listMiniMaxVoices(): Promise<SpeechVoiceOption[]> {
   // MiniMax doesn't have a public list voices API, so we return common voices
   // Users can use custom voice IDs from their MiniMax dashboard
   return MINIMAX_VOICE_IDS.map((voiceId) => ({
@@ -101,18 +98,8 @@ export function buildMiniMaxSpeechProvider(): SpeechProviderPlugin {
     id: "minimax",
     label: "MiniMax",
     models: MINIMAX_TTS_MODELS,
-    listVoices: async (req) => {
-      const apiKey =
-        req.apiKey ||
-        req.config?.minimax.apiKey ||
-        process.env.MINIMAX_API_KEY;
-      if (!apiKey) {
-        throw new Error("MiniMax API key missing");
-      }
-      return listMiniMaxVoices({
-        apiKey,
-        baseUrl: req.baseUrl ?? req.config?.minimax.baseUrl,
-      });
+    listVoices: async (_req) => {
+      return listMiniMaxVoices();
     },
     isConfigured: ({ config }) =>
       Boolean(config.minimax?.apiKey || process.env.MINIMAX_API_KEY),
diff --git a/src/tts/tts.ts b/src/tts/tts.ts
index 7a6a8e5d976..5e23d779bec 100644
--- a/src/tts/tts.ts
+++ b/src/tts/tts.ts
@@ -479,6 +479,9 @@ export function getTtsProvider(config: ResolvedTtsConfig, prefsPath: string): Tt
   if (resolveTtsApiKey(config, "elevenlabs")) {
     return "elevenlabs";
   }
+  if (resolveTtsApiKey(config, "minimax")) {
+    return "minimax";
+  }
   return "microsoft";
 }