diff --git a/src/tts/providers/azure.ts b/src/tts/providers/azure.ts
index 04db3b4fa82..f03099905b5 100644
--- a/src/tts/providers/azure.ts
+++ b/src/tts/providers/azure.ts
@@ -27,9 +27,11 @@ export async function listAzureVoices(params: {
   region?: string;
   baseUrl?: string;
 }): Promise<SpeechVoiceOption[]> {
-  const base = normalizeAzureBaseUrl(params.baseUrl);
   const region = params.region || "eastus";
-  const url = `https://${region}.tts.speech.microsoft.com/cognitiveservices/voices/list`;
+  // Use baseUrl if provided, otherwise derive from region
+  const url = params.baseUrl
+    ? `${normalizeAzureBaseUrl(params.baseUrl)}/cognitiveservices/voices/list`
+    : `https://${region}.tts.speech.microsoft.com/cognitiveservices/voices/list`;
 
   const response = await fetch(url, {
     headers: {
@@ -42,8 +44,10 @@ export async function listAzureVoices(params: {
   }
 
   const voices = (await response.json()) as AzureVoiceListEntry[];
+  // Filter deprecated voices BEFORE mapping (Status field is available here)
   return Array.isArray(voices)
     ? voices
+        .filter((voice) => voice.Status !== "Deprecated")
         .map((voice) => ({
           id: voice.ShortName?.trim() ?? "",
           name: voice.DisplayName?.trim() || voice.ShortName?.trim() || undefined,
@@ -51,7 +55,7 @@ export async function listAzureVoices(params: {
           locale: voice.Locale?.trim() || undefined,
           gender: voice.Gender?.trim() || undefined,
         }))
-        .filter((voice) => voice.id.length > 0 && voice.Status !== "Deprecated")
+        .filter((voice) => voice.id.length > 0)
     : [];
 }
 
@@ -74,43 +78,46 @@ export function buildAzureSpeechProvider(): SpeechProviderPlugin {
     listVoices: async (req) => {
       const apiKey =
         req.apiKey ||
-        req.config?.azure?.apiKey ||
+        (req.config as any)?.azure?.apiKey ||
         process.env.AZURE_SPEECH_API_KEY;
       if (!apiKey) {
         throw new Error("Azure Speech API key missing");
       }
       return listAzureVoices({
         apiKey,
-        region: req.config?.azure?.region || process.env.AZURE_SPEECH_REGION,
-        baseUrl: req.config?.azure?.baseUrl,
+        region: (req.config as any)?.azure?.region || process.env.AZURE_SPEECH_REGION,
+        baseUrl: (req.config as any)?.azure?.baseUrl,
       });
     },
     isConfigured: ({ config }) =>
       Boolean(
-        config.azure?.apiKey ||
+        (config as any)?.azure?.apiKey ||
           process.env.AZURE_SPEECH_API_KEY,
       ),
     synthesize: async (req) => {
       const apiKey =
-        req.config.azure?.apiKey || process.env.AZURE_SPEECH_API_KEY;
+        (req.config as any)?.azure?.apiKey || process.env.AZURE_SPEECH_API_KEY;
       if (!apiKey) {
         throw new Error("Azure Speech API key missing");
       }
 
-      const region = req.config?.azure?.region || process.env.AZURE_SPEECH_REGION || "eastus";
-      const baseUrl = normalizeAzureBaseUrl(req.config?.azure?.baseUrl);
-      const voice = req.overrides?.azure?.voice ?? req.config?.azure?.voice;
-      const lang = req.overrides?.azure?.lang ?? req.config?.azure?.lang;
+      const region = (req.config as any)?.azure?.region || process.env.AZURE_SPEECH_REGION || "eastus";
+      const baseUrl = (req.config as any)?.azure?.baseUrl;
+      // Use baseUrl if provided, otherwise derive from region
+      const endpoint = baseUrl
+        ? `${normalizeAzureBaseUrl(baseUrl)}/cognitiveservices/v1`
+        : `https://${region}.tts.speech.microsoft.com/cognitiveservices/v1`;
+
+      const voice = (req.config as any)?.azure?.voice;
+      const lang = (req.config as any)?.azure?.lang;
       const outputFormat =
-        req.overrides?.azure?.outputFormat ??
-        req.config?.azure?.outputFormat ??
+        (req.config as any)?.azure?.outputFormat ??
         DEFAULT_AZURE_OUTPUT_FORMAT;
 
       if (!voice) {
         throw new Error("Azure voice not configured");
       }
 
-      const endpoint = `${baseUrl}/cognitiveservices/v1`;
       const ssml = buildAzureSSML(req.text, voice, lang);
 
       const response = await fetch(endpoint, {
@@ -121,6 +128,7 @@ export function buildAzureSpeechProvider(): SpeechProviderPlugin {
           "X-Microsoft-OutputFormat": outputFormat,
         },
         body: ssml,
+        signal: AbortSignal.timeout((req.config as any)?.azure?.timeoutMs ?? 30000),
       });
 
       if (!response.ok) {
diff --git a/src/tts/tts.ts b/src/tts/tts.ts
index 17a7c2fc981..2ddc8d1b8cf 100644
--- a/src/tts/tts.ts
+++ b/src/tts/tts.ts
@@ -59,6 +59,7 @@ const DEFAULT_OPENAI_VOICE = "alloy";
 const DEFAULT_EDGE_VOICE = "en-US-MichelleNeural";
 const DEFAULT_EDGE_LANG = "en-US";
 const DEFAULT_EDGE_OUTPUT_FORMAT = "audio-24khz-48kbitrate-mono-mp3";
+const DEFAULT_AZURE_OUTPUT_FORMAT = "audio-24khz-48kbitrate-mono-mp3";
 
 const DEFAULT_ELEVENLABS_VOICE_SETTINGS = {
   stability: 0.5,
@@ -117,7 +118,17 @@ export type ResolvedTtsConfig = {
     speed?: number;
     instructions?: string;
   };
-  edge: {
+  
+  azure: {
+    apiKey?: string;
+    region: string;
+    baseUrl: string;
+    voice: string;
+    lang: string;
+    outputFormat: string;
+    timeoutMs?: number;
+  };
+edge: {
     enabled: boolean;
     voice: string;
     lang: string;
@@ -177,7 +188,11 @@ export type TtsDirectiveOverrides = {
     voice?: string;
     outputFormat?: string;
   };
-};
+  azure?: {
+    voice?: string;
+    lang?: string;
+    outputFormat?: string;
+  };
 
 export type TtsDirectiveParseResult = {
   cleanedText: string;
@@ -324,6 +339,18 @@ export function resolveTtsConfig(cfg: OpenClawConfig): ResolvedTtsConfig {
       speed: raw.openai?.speed,
       instructions: raw.openai?.instructions?.trim() || undefined,
     },
+    azure: {
+      apiKey: normalizeResolvedSecretInputString({
+        value: raw.azure?.apiKey,
+        path: "messages.tts.azure.apiKey",
+      }),
+      region: raw.azure?.region?.trim() || process.env.AZURE_SPEECH_REGION || "eastus",
+      baseUrl: raw.azure?.baseUrl?.trim() || "",
+      voice: raw.azure?.voice || "",
+      lang: raw.azure?.lang?.trim() || "en-US",
+      outputFormat: raw.azure?.outputFormat?.trim() || DEFAULT_AZURE_OUTPUT_FORMAT,
+      timeoutMs: raw.azure?.timeoutMs,
+    },
     edge: {
       enabled: rawMicrosoft.enabled ?? true,
       voice: rawMicrosoft.voice?.trim() || DEFAULT_EDGE_VOICE,