From b36e456b09160b98779826302961c98b0ff09d9d Mon Sep 17 00:00:00 2001
From: Lakshya Agarwal <lakshya.agarwal@tavily.com>
Date: Fri, 20 Mar 2026 01:06:26 -0400
Subject: [PATCH 01/11] feat: add Tavily as a bundled web search plugin with
 search and extract tools (#49200)

Merged via squash.

Prepared head SHA: ece9226e886004f1e0536dd5de3ddc2946fc118c
Co-authored-by: lakshyaag-tavily <266572148+lakshyaag-tavily@users.noreply.github.com>
Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com>
Reviewed-by: @gumadeiras
---
 .github/labeler.yml                           |   4 +
 CHANGELOG.md                                  |   1 +
 docs/docs.json                                |   1 +
 .../reference/secretref-credential-surface.md |   1 +
 ...tref-user-supplied-credentials-matrix.json |   7 +
 docs/tools/index.md                           |   2 +-
 docs/tools/tavily.md                          | 125 ++++++++
 docs/tools/web.md                             |  44 ++-
 extensions/brave/openclaw.plugin.json         |   3 +
 extensions/firecrawl/openclaw.plugin.json     |   3 +
 extensions/perplexity/openclaw.plugin.json    |   3 +
 extensions/tavily/index.test.ts               |  41 +++
 extensions/tavily/index.ts                    |  15 +
 extensions/tavily/openclaw.plugin.json        |  37 +++
 extensions/tavily/package.json                |  12 +
 extensions/tavily/skills/tavily/SKILL.md      |  94 ++++++
 extensions/tavily/src/config.ts               |  71 +++++
 extensions/tavily/src/tavily-client.ts        | 286 ++++++++++++++++++
 .../tavily/src/tavily-extract-tool.test.ts    |  53 ++++
 extensions/tavily/src/tavily-extract-tool.ts  |  74 +++++
 .../tavily/src/tavily-search-provider.ts      |  76 +++++
 extensions/tavily/src/tavily-search-tool.ts   |  81 +++++
 pnpm-lock.yaml                                |   2 +
 src/agents/tools/web-search.ts                | 112 +------
 src/commands/onboard-search.test.ts           | 121 +++++++-
 src/commands/onboard-search.ts                |  11 +-
 src/config/config.web-search-provider.test.ts |  68 +++++
 ...undled-provider-auth-env-vars.generated.ts |   4 +
 .../bundled-provider-auth-env-vars.test.ts    |   7 +
 src/plugins/bundled-web-search.test.ts        |   1 +
 src/plugins/bundled-web-search.ts             |  15 +
 .../contracts/registry.contract.test.ts       |   9 +
 src/plugins/contracts/registry.ts             |   3 +-
 src/plugins/web-search-providers.test.ts      |   5 +
 src/secrets/provider-env-vars.test.ts         |  22 +-
 src/secrets/target-registry-data.ts           |  11 +
 src/web-search/runtime.test.ts                |  77 +++++
 37 files changed, 1378 insertions(+), 124 deletions(-)
 create mode 100644 docs/tools/tavily.md
 create mode 100644 extensions/tavily/index.test.ts
 create mode 100644 extensions/tavily/index.ts
 create mode 100644 extensions/tavily/openclaw.plugin.json
 create mode 100644 extensions/tavily/package.json
 create mode 100644 extensions/tavily/skills/tavily/SKILL.md
 create mode 100644 extensions/tavily/src/config.ts
 create mode 100644 extensions/tavily/src/tavily-client.ts
 create mode 100644 extensions/tavily/src/tavily-extract-tool.test.ts
 create mode 100644 extensions/tavily/src/tavily-extract-tool.ts
 create mode 100644 extensions/tavily/src/tavily-search-provider.ts
 create mode 100644 extensions/tavily/src/tavily-search-tool.ts

diff --git a/.github/labeler.yml b/.github/labeler.yml
index 4ee43d5e6fa..67a74985465 100644
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -293,6 +293,10 @@
   - changed-files:
       - any-glob-to-any-file:
           - "extensions/synthetic/**"
+"extensions: tavily":
+  - changed-files:
+      - any-glob-to-any-file:
+          - "extensions/tavily/**"
 "extensions: talk-voice":
   - changed-files:
       - any-glob-to-any-file:
diff --git a/CHANGELOG.md b/CHANGELOG.md
index e0c87b836a9..37ff9e33f36 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -47,6 +47,7 @@ Docs: https://docs.openclaw.ai
 - Plugins/Xiaomi: switch the bundled Xiaomi provider to the `/v1` OpenAI-compatible endpoint and add MiMo V2 Pro plus MiMo V2 Omni to the built-in catalog. (#49214) thanks @DJjjjhao.
 - Plugins/Matrix: add `allowBots` room policy so configured Matrix bot accounts can talk to each other, with optional mention-only gating. Thanks @gumadeiras.
 - Plugins/Matrix: add per-account `allowPrivateNetwork` opt-in for private/internal homeservers, while keeping public cleartext homeservers blocked. Thanks @gumadeiras.
+- Web tools/Tavily: add Tavily as a bundled web-search provider with dedicated `tavily_search` and `tavily_extract` tools, using canonical plugin-owned config under `plugins.entries.tavily.config.webSearch.*`. (#49200) thanks @lakshyaag-tavily.
 
 ### Fixes
 
diff --git a/docs/docs.json b/docs/docs.json
index bd7d01fc43b..a941bec2601 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -1031,6 +1031,7 @@
                   "tools/exec",
                   "tools/exec-approvals",
                   "tools/firecrawl",
+                  "tools/tavily",
                   "tools/llm-task",
                   "tools/lobster",
                   "tools/loop-detection",
diff --git a/docs/reference/secretref-credential-surface.md b/docs/reference/secretref-credential-surface.md
index 39420e335bf..d0a11bc68ef 100644
--- a/docs/reference/secretref-credential-surface.md
+++ b/docs/reference/secretref-credential-surface.md
@@ -38,6 +38,7 @@ Scope intent:
 - `plugins.entries.moonshot.config.webSearch.apiKey`
 - `plugins.entries.perplexity.config.webSearch.apiKey`
 - `plugins.entries.firecrawl.config.webSearch.apiKey`
+- `plugins.entries.tavily.config.webSearch.apiKey`
 - `tools.web.search.apiKey`
 - `tools.web.search.gemini.apiKey`
 - `tools.web.search.grok.apiKey`
diff --git a/docs/reference/secretref-user-supplied-credentials-matrix.json b/docs/reference/secretref-user-supplied-credentials-matrix.json
index d4706e40304..cca7bb38c4b 100644
--- a/docs/reference/secretref-user-supplied-credentials-matrix.json
+++ b/docs/reference/secretref-user-supplied-credentials-matrix.json
@@ -551,6 +551,13 @@
       "path": "tools.web.search.perplexity.apiKey",
       "secretShape": "secret_input",
       "optIn": true
+    },
+    {
+      "id": "plugins.entries.tavily.config.webSearch.apiKey",
+      "configFile": "openclaw.json",
+      "path": "plugins.entries.tavily.config.webSearch.apiKey",
+      "secretShape": "secret_input",
+      "optIn": true
     }
   ]
 }
diff --git a/docs/tools/index.md b/docs/tools/index.md
index 55e52bf46da..91297e5775c 100644
--- a/docs/tools/index.md
+++ b/docs/tools/index.md
@@ -256,7 +256,7 @@ Enable with `tools.loopDetection.enabled: true` (default is `false`).
 
 ### `web_search`
 
-Search the web using Brave, Firecrawl, Gemini, Grok, Kimi, or Perplexity.
+Search the web using Brave, Firecrawl, Gemini, Grok, Kimi, Perplexity, or Tavily.
 
 Core parameters:
 
diff --git a/docs/tools/tavily.md b/docs/tools/tavily.md
new file mode 100644
index 00000000000..dcf7ce4c1ad
--- /dev/null
+++ b/docs/tools/tavily.md
@@ -0,0 +1,125 @@
+---
+summary: "Tavily search and extract tools"
+read_when:
+  - You want Tavily-backed web search
+  - You need a Tavily API key
+  - You want Tavily as a web_search provider
+  - You want content extraction from URLs
+title: "Tavily"
+---
+
+# Tavily
+
+OpenClaw can use **Tavily** in two ways:
+
+- as the `web_search` provider
+- as explicit plugin tools: `tavily_search` and `tavily_extract`
+
+Tavily is a search API designed for AI applications, returning structured results
+optimized for LLM consumption. It supports configurable search depth, topic
+filtering, domain filters, AI-generated answer summaries, and content extraction
+from URLs (including JavaScript-rendered pages).
+
+## Get an API key
+
+1. Create a Tavily account at [tavily.com](https://tavily.com/).
+2. Generate an API key in the dashboard.
+3. Store it in config or set `TAVILY_API_KEY` in the gateway environment.
+
+## Configure Tavily search
+
+```json5
+{
+  plugins: {
+    entries: {
+      tavily: {
+        enabled: true,
+        config: {
+          webSearch: {
+            apiKey: "tvly-...", // optional if TAVILY_API_KEY is set
+            baseUrl: "https://api.tavily.com",
+          },
+        },
+      },
+    },
+  },
+  tools: {
+    web: {
+      search: {
+        provider: "tavily",
+      },
+    },
+  },
+}
+```
+
+Notes:
+
+- Choosing Tavily in onboarding or `openclaw configure --section web` enables
+  the bundled Tavily plugin automatically.
+- Store Tavily config under `plugins.entries.tavily.config.webSearch.*`.
+- `web_search` with Tavily supports `query` and `count` (up to 20 results).
+- For Tavily-specific controls like `search_depth`, `topic`, `include_answer`,
+  or domain filters, use `tavily_search`.
+
+## Tavily plugin tools
+
+### `tavily_search`
+
+Use this when you want Tavily-specific search controls instead of generic
+`web_search`.
+
+| Parameter         | Description                                                           |
+| ----------------- | --------------------------------------------------------------------- |
+| `query`           | Search query string (keep under 400 characters)                       |
+| `search_depth`    | `basic` (default, balanced) or `advanced` (highest relevance, slower) |
+| `topic`           | `general` (default), `news` (real-time updates), or `finance`         |
+| `max_results`     | Number of results, 1-20 (default: 5)                                  |
+| `include_answer`  | Include an AI-generated answer summary (default: false)               |
+| `time_range`      | Filter by recency: `day`, `week`, `month`, or `year`                  |
+| `include_domains` | Array of domains to restrict results to                               |
+| `exclude_domains` | Array of domains to exclude from results                              |
+
+**Search depth:**
+
+| Depth      | Speed  | Relevance | Best for                            |
+| ---------- | ------ | --------- | ----------------------------------- |
+| `basic`    | Faster | High      | General-purpose queries (default)   |
+| `advanced` | Slower | Highest   | Precision, specific facts, research |
+
+### `tavily_extract`
+
+Use this to extract clean content from one or more URLs. Handles
+JavaScript-rendered pages and supports query-focused chunking for targeted
+extraction.
+
+| Parameter           | Description                                                |
+| ------------------- | ---------------------------------------------------------- |
+| `urls`              | Array of URLs to extract (1-20 per request)                |
+| `query`             | Rerank extracted chunks by relevance to this query         |
+| `extract_depth`     | `basic` (default, fast) or `advanced` (for JS-heavy pages) |
+| `chunks_per_source` | Chunks per URL, 1-5 (requires `query`)                     |
+| `include_images`    | Include image URLs in results (default: false)             |
+
+**Extract depth:**
+
+| Depth      | When to use                               |
+| ---------- | ----------------------------------------- |
+| `basic`    | Simple pages - try this first             |
+| `advanced` | JS-rendered SPAs, dynamic content, tables |
+
+Tips:
+
+- Max 20 URLs per request. Batch larger lists into multiple calls.
+- Use `query` + `chunks_per_source` to get only relevant content instead of full pages.
+- Try `basic` first; fall back to `advanced` if content is missing or incomplete.
+
+## Choosing the right tool
+
+| Need                                 | Tool             |
+| ------------------------------------ | ---------------- |
+| Quick web search, no special options | `web_search`     |
+| Search with depth, topic, AI answers | `tavily_search`  |
+| Extract content from specific URLs   | `tavily_extract` |
+
+See [Web tools](/tools/web) for the full web tool setup and provider comparison.
diff --git a/docs/tools/web.md b/docs/tools/web.md
index 313e709c32f..8d5b6bff5f1 100644
--- a/docs/tools/web.md
+++ b/docs/tools/web.md
@@ -1,5 +1,5 @@
 ---
-summary: "Web search + fetch tools (Brave, Firecrawl, Gemini, Grok, Kimi, and Perplexity providers)"
+summary: "Web search + fetch tools (Brave, Firecrawl, Gemini, Grok, Kimi, Perplexity, and Tavily providers)"
 read_when:
   - You want to enable web_search or web_fetch
   - You need provider API key setup
@@ -11,7 +11,7 @@ title: "Web Tools"
 
 OpenClaw ships two lightweight web tools:
 
-- `web_search` — Search the web using Brave Search API, Firecrawl Search, Gemini with Google Search grounding, Grok, Kimi, or Perplexity Search API.
+- `web_search` — Search the web using Brave Search API, Firecrawl Search, Gemini with Google Search grounding, Grok, Kimi, Perplexity Search API, or Tavily Search API.
 - `web_fetch` — HTTP fetch + readable extraction (HTML → markdown/text).
 
 These are **not** browser automation. For JS-heavy sites or logins, use the
@@ -25,8 +25,9 @@ These are **not** browser automation. For JS-heavy sites or logins, use the
   (HTML → markdown/text). It does **not** execute JavaScript.
 - `web_fetch` is enabled by default (unless explicitly disabled).
 - The bundled Firecrawl plugin also adds `firecrawl_search` and `firecrawl_scrape` when enabled.
+- The bundled Tavily plugin also adds `tavily_search` and `tavily_extract` when enabled.
 
-See [Brave Search setup](/tools/brave-search) and [Perplexity Search setup](/tools/perplexity-search) for provider-specific details.
+See [Brave Search setup](/tools/brave-search), [Perplexity Search setup](/tools/perplexity-search), and [Tavily Search setup](/tools/tavily) for provider-specific details.
 
 ## Choosing a search provider
 
@@ -38,6 +39,7 @@ See [Brave Search setup](/tools/brave-search) and [Perplexity Search setup](/too
 | **Grok**                  | AI-synthesized answers + citations | —                                                            | Uses xAI web-grounded responses                                                | `XAI_API_KEY`                               |
 | **Kimi**                  | AI-synthesized answers + citations | —                                                            | Uses Moonshot web search                                                       | `KIMI_API_KEY` / `MOONSHOT_API_KEY`         |
 | **Perplexity Search API** | Structured results with snippets   | `country`, `language`, time, `domain_filter`                 | Supports content extraction controls; OpenRouter uses Sonar compatibility path | `PERPLEXITY_API_KEY` / `OPENROUTER_API_KEY` |
+| **Tavily Search API**     | Structured results with snippets   | Use `tavily_search` for Tavily-specific search options       | Search depth, topic filtering, AI answers, URL extraction via `tavily_extract` | `TAVILY_API_KEY`                            |
 
 ### Auto-detection
 
@@ -49,6 +51,7 @@ The table above is alphabetical. If no `provider` is explicitly set, runtime aut
 4. **Kimi** — `KIMI_API_KEY` / `MOONSHOT_API_KEY` env var or `plugins.entries.moonshot.config.webSearch.apiKey`
 5. **Perplexity** — `PERPLEXITY_API_KEY`, `OPENROUTER_API_KEY`, or `plugins.entries.perplexity.config.webSearch.apiKey`
 6. **Firecrawl** — `FIRECRAWL_API_KEY` env var or `plugins.entries.firecrawl.config.webSearch.apiKey`
+7. **Tavily** — `TAVILY_API_KEY` env var or `plugins.entries.tavily.config.webSearch.apiKey`
 
 If no keys are found, it falls back to Brave (you'll get a missing-key error prompting you to configure one).
 
@@ -97,6 +100,7 @@ See [Perplexity Search API Docs](https://docs.perplexity.ai/guides/search-quicks
 - Grok: `plugins.entries.xai.config.webSearch.apiKey`
 - Kimi: `plugins.entries.moonshot.config.webSearch.apiKey`
 - Perplexity: `plugins.entries.perplexity.config.webSearch.apiKey`
+- Tavily: `plugins.entries.tavily.config.webSearch.apiKey`
 
 All of these fields also support SecretRef objects.
 
@@ -108,6 +112,7 @@ All of these fields also support SecretRef objects.
 - Grok: `XAI_API_KEY`
 - Kimi: `KIMI_API_KEY` or `MOONSHOT_API_KEY`
 - Perplexity: `PERPLEXITY_API_KEY` or `OPENROUTER_API_KEY`
+- Tavily: `TAVILY_API_KEY`
 
 For a gateway install, put these in `~/.openclaw/.env` (or your service environment). See [Env vars](/help/faq#how-does-openclaw-load-environment-variables).
 
@@ -176,6 +181,36 @@ For a gateway install, put these in `~/.openclaw/.env` (or your service environm
 
 When you choose Firecrawl in onboarding or `openclaw configure --section web`, OpenClaw enables the bundled Firecrawl plugin automatically so `web_search`, `firecrawl_search`, and `firecrawl_scrape` are all available.
 
+**Tavily Search:**
+
+```json5
+{
+  plugins: {
+    entries: {
+      tavily: {
+        enabled: true,
+        config: {
+          webSearch: {
+            apiKey: "tvly-...", // optional if TAVILY_API_KEY is set
+            baseUrl: "https://api.tavily.com",
+          },
+        },
+      },
+    },
+  },
+  tools: {
+    web: {
+      search: {
+        enabled: true,
+        provider: "tavily",
+      },
+    },
+  },
+}
+```
+
+When you choose Tavily in onboarding or `openclaw configure --section web`, OpenClaw enables the bundled Tavily plugin automatically so `web_search`, `tavily_search`, and `tavily_extract` are all available.
+
 **Brave LLM Context mode:**
 
 ```json5
@@ -326,6 +361,7 @@ Search the web using your configured provider.
   - **Grok**: `XAI_API_KEY` or `plugins.entries.xai.config.webSearch.apiKey`
   - **Kimi**: `KIMI_API_KEY`, `MOONSHOT_API_KEY`, or `plugins.entries.moonshot.config.webSearch.apiKey`
   - **Perplexity**: `PERPLEXITY_API_KEY`, `OPENROUTER_API_KEY`, or `plugins.entries.perplexity.config.webSearch.apiKey`
+  - **Tavily**: `TAVILY_API_KEY` or `plugins.entries.tavily.config.webSearch.apiKey`
 - All provider key fields above support SecretRef objects.
 
 ### Config
@@ -369,6 +405,8 @@ If you set `plugins.entries.perplexity.config.webSearch.baseUrl` / `model`, use
 
 Firecrawl `web_search` supports `query` and `count`. For Firecrawl-specific controls like `sources`, `categories`, result scraping, or scrape timeout, use `firecrawl_search` from the bundled Firecrawl plugin.
 
+Tavily `web_search` supports `query` and `count` (up to 20 results). For Tavily-specific controls like `search_depth`, `topic`, `include_answer`, or domain filters, use `tavily_search` from the bundled Tavily plugin. For URL content extraction, use `tavily_extract`. See [Tavily](/tools/tavily) for details.
+
 **Examples:**
 
 ```javascript
diff --git a/extensions/brave/openclaw.plugin.json b/extensions/brave/openclaw.plugin.json
index 2077f174d62..791a413ec66 100644
--- a/extensions/brave/openclaw.plugin.json
+++ b/extensions/brave/openclaw.plugin.json
@@ -1,5 +1,8 @@
 {
   "id": "brave",
+  "providerAuthEnvVars": {
+    "brave": ["BRAVE_API_KEY"]
+  },
   "uiHints": {
     "webSearch.apiKey": {
       "label": "Brave Search API Key",
diff --git a/extensions/firecrawl/openclaw.plugin.json b/extensions/firecrawl/openclaw.plugin.json
index e9c50c589d2..adbe2a2a9c8 100644
--- a/extensions/firecrawl/openclaw.plugin.json
+++ b/extensions/firecrawl/openclaw.plugin.json
@@ -1,5 +1,8 @@
 {
   "id": "firecrawl",
+  "providerAuthEnvVars": {
+    "firecrawl": ["FIRECRAWL_API_KEY"]
+  },
   "uiHints": {
     "webSearch.apiKey": {
       "label": "Firecrawl Search API Key",
diff --git a/extensions/perplexity/openclaw.plugin.json b/extensions/perplexity/openclaw.plugin.json
index 89c7a0fb902..32567c76cb2 100644
--- a/extensions/perplexity/openclaw.plugin.json
+++ b/extensions/perplexity/openclaw.plugin.json
@@ -1,5 +1,8 @@
 {
   "id": "perplexity",
+  "providerAuthEnvVars": {
+    "perplexity": ["PERPLEXITY_API_KEY", "OPENROUTER_API_KEY"]
+  },
   "uiHints": {
     "webSearch.apiKey": {
       "label": "Perplexity API Key",
diff --git a/extensions/tavily/index.test.ts b/extensions/tavily/index.test.ts
new file mode 100644
index 00000000000..5b71aeb6f7b
--- /dev/null
+++ b/extensions/tavily/index.test.ts
@@ -0,0 +1,41 @@
+import { describe, expect, it } from "vitest";
+import plugin from "./index.js";
+
+describe("tavily plugin", () => {
+  it("exports a valid plugin entry with correct id and name", () => {
+    expect(plugin.id).toBe("tavily");
+    expect(plugin.name).toBe("Tavily Plugin");
+    expect(typeof plugin.register).toBe("function");
+  });
+
+  it("registers web search provider and two tools", () => {
+    const registrations: {
+      webSearchProviders: unknown[];
+      tools: unknown[];
+    } = { webSearchProviders: [], tools: [] };
+
+    const mockApi = {
+      registerWebSearchProvider(provider: unknown) {
+        registrations.webSearchProviders.push(provider);
+      },
+      registerTool(tool: unknown) {
+        registrations.tools.push(tool);
+      },
+      config: {},
+    };
+
+    plugin.register(mockApi as never);
+
+    expect(registrations.webSearchProviders).toHaveLength(1);
+    expect(registrations.tools).toHaveLength(2);
+
+    const provider = registrations.webSearchProviders[0] as Record<string, unknown>;
+    expect(provider.id).toBe("tavily");
+    expect(provider.autoDetectOrder).toBe(70);
+    expect(provider.envVars).toEqual(["TAVILY_API_KEY"]);
+
+    const toolNames = registrations.tools.map((t) => (t as Record<string, unknown>).name);
+    expect(toolNames).toContain("tavily_search");
+    expect(toolNames).toContain("tavily_extract");
+  });
+});
diff --git a/extensions/tavily/index.ts b/extensions/tavily/index.ts
new file mode 100644
index 00000000000..f35fda3129d
--- /dev/null
+++ b/extensions/tavily/index.ts
@@ -0,0 +1,15 @@
+import { definePluginEntry, type AnyAgentTool } from "openclaw/plugin-sdk/core";
+import { createTavilyExtractTool } from "./src/tavily-extract-tool.js";
+import { createTavilyWebSearchProvider } from "./src/tavily-search-provider.js";
+import { createTavilySearchTool } from "./src/tavily-search-tool.js";
+
+export default definePluginEntry({
+  id: "tavily",
+  name: "Tavily Plugin",
+  description: "Bundled Tavily search and extract plugin",
+  register(api) {
+    api.registerWebSearchProvider(createTavilyWebSearchProvider());
+    api.registerTool(createTavilySearchTool(api) as AnyAgentTool);
+    api.registerTool(createTavilyExtractTool(api) as AnyAgentTool);
+  },
+});
diff --git a/extensions/tavily/openclaw.plugin.json b/extensions/tavily/openclaw.plugin.json
new file mode 100644
index 00000000000..9ed930bfe63
--- /dev/null
+++ b/extensions/tavily/openclaw.plugin.json
@@ -0,0 +1,37 @@
+{
+  "id": "tavily",
+  "skills": ["./skills"],
+  "providerAuthEnvVars": {
+    "tavily": ["TAVILY_API_KEY"]
+  },
+  "uiHints": {
+    "webSearch.apiKey": {
+      "label": "Tavily API Key",
+      "help": "Tavily API key for web search and extraction (fallback: TAVILY_API_KEY env var).",
+      "sensitive": true,
+      "placeholder": "tvly-..."
+    },
+    "webSearch.baseUrl": {
+      "label": "Tavily Base URL",
+      "help": "Tavily API base URL override."
+    }
+  },
+  "configSchema": {
+    "type": "object",
+    "additionalProperties": false,
+    "properties": {
+      "webSearch": {
+        "type": "object",
+        "additionalProperties": false,
+        "properties": {
+          "apiKey": {
+            "type": ["string", "object"]
+          },
+          "baseUrl": {
+            "type": "string"
+          }
+        }
+      }
+    }
+  }
+}
diff --git a/extensions/tavily/package.json b/extensions/tavily/package.json
new file mode 100644
index 00000000000..3d693a6ca38
--- /dev/null
+++ b/extensions/tavily/package.json
@@ -0,0 +1,12 @@
+{
+  "name": "@openclaw/tavily-plugin",
+  "version": "2026.3.17",
+  "private": true,
+  "description": "OpenClaw Tavily plugin",
+  "type": "module",
+  "openclaw": {
+    "extensions": [
+      "./index.ts"
+    ]
+  }
+}
diff --git a/extensions/tavily/skills/tavily/SKILL.md b/extensions/tavily/skills/tavily/SKILL.md
new file mode 100644
index 00000000000..4026537362a
--- /dev/null
+++ b/extensions/tavily/skills/tavily/SKILL.md
@@ -0,0 +1,94 @@
+---
+name: tavily
+description: Tavily web search, content extraction, and research tools.
+metadata:
+  { "openclaw": { "emoji": "🔍", "requires": { "config": ["plugins.entries.tavily.enabled"] } } }
+---
+
+# Tavily Tools
+
+## When to use which tool
+
+| Need                         | Tool             | When                                                          |
+| ---------------------------- | ---------------- | ------------------------------------------------------------- |
+| Quick web search             | `web_search`     | Basic queries, no special options needed                      |
+| Search with advanced options | `tavily_search`  | Need depth, topic, domain filters, time ranges, or AI answers |
+| Extract content from URLs    | `tavily_extract` | Have specific URLs, need their content                        |
+
+## web_search
+
+Tavily powers this automatically when selected as the search provider. Use for
+straightforward queries where you don't need Tavily-specific options.
+
+| Parameter | Description              |
+| --------- | ------------------------ |
+| `query`   | Search query string      |
+| `count`   | Number of results (1-20) |
+
+## tavily_search
+
+Use when you need fine-grained control over search behavior.
+
+| Parameter         | Description                                                           |
+| ----------------- | --------------------------------------------------------------------- |
+| `query`           | Search query string (keep under 400 characters)                       |
+| `search_depth`    | `basic` (default, balanced) or `advanced` (highest relevance, slower) |
+| `topic`           | `general` (default), `news` (real-time updates), or `finance`         |
+| `max_results`     | Number of results, 1-20 (default: 5)                                  |
+| `include_answer`  | Include an AI-generated answer summary (default: false)               |
+| `time_range`      | Filter by recency: `day`, `week`, `month`, or `year`                  |
+| `include_domains` | Array of domains to restrict results to                               |
+| `exclude_domains` | Array of domains to exclude from results                              |
+
+### Search depth
+
+| Depth      | Speed  | Relevance | Best for                                     |
+| ---------- | ------ | --------- | -------------------------------------------- |
+| `basic`    | Faster | High      | General-purpose queries (default)            |
+| `advanced` | Slower | Highest   | Precision, specific facts, detailed research |
+
+### Tips
+
+- **Keep queries under 400 characters** — think search query, not prompt.
+- **Break complex queries into sub-queries** for better results.
+- **Use `include_domains`** to focus on trusted sources.
+- **Use `time_range`** for recent information (news, current events).
+- **Use `include_answer`** when you need a quick synthesized answer.
+
+## tavily_extract
+
+Use when you have specific URLs and need their content. Handles JavaScript-rendered
+pages and returns clean markdown. Supports query-focused chunking for targeted
+extraction.
+
+| Parameter           | Description                                                        |
+| ------------------- | ------------------------------------------------------------------ |
+| `urls`              | Array of URLs to extract (1-20 per request)                        |
+| `query`             | Rerank extracted chunks by relevance to this query                 |
+| `extract_depth`     | `basic` (default, fast) or `advanced` (for JS-heavy pages, tables) |
+| `chunks_per_source` | Chunks per URL, 1-5 (requires `query`)                             |
+| `include_images`    | Include image URLs in results (default: false)                     |
+
+### Extract depth
+
+| Depth      | When to use                                                 |
+| ---------- | ----------------------------------------------------------- |
+| `basic`    | Simple pages — try this first                               |
+| `advanced` | JS-rendered SPAs, dynamic content, tables, embedded content |
+
+### Tips
+
+- **Max 20 URLs per request** — batch larger lists into multiple calls.
+- **Use `query` + `chunks_per_source`** to get only relevant content instead of full pages.
+- **Try `basic` first**, fall back to `advanced` if content is missing or incomplete.
+- If `tavily_search` results already contain the snippets you need, skip the extract step.
+
+## Choosing the right workflow
+
+Follow this escalation pattern — start simple, escalate only when needed:
+
+1. **`web_search`** — Quick lookup, no special options needed.
+2. **`tavily_search`** — Need depth control, topic filtering, domain filters, time ranges, or AI answers.
+3. **`tavily_extract`** — Have specific URLs, need their full content or targeted chunks.
+
+Combine search + extract when you need to find pages first, then get their full content.
diff --git a/extensions/tavily/src/config.ts b/extensions/tavily/src/config.ts
new file mode 100644
index 00000000000..752a721d17c
--- /dev/null
+++ b/extensions/tavily/src/config.ts
@@ -0,0 +1,71 @@
+import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime";
+import { normalizeResolvedSecretInputString } from "openclaw/plugin-sdk/config-runtime";
+import { normalizeSecretInput } from "openclaw/plugin-sdk/provider-auth";
+
+export const DEFAULT_TAVILY_BASE_URL = "https://api.tavily.com";
+export const DEFAULT_TAVILY_SEARCH_TIMEOUT_SECONDS = 30;
+export const DEFAULT_TAVILY_EXTRACT_TIMEOUT_SECONDS = 60;
+
+type TavilySearchConfig =
+  | {
+      apiKey?: unknown;
+      baseUrl?: string;
+    }
+  | undefined;
+
+type PluginEntryConfig = {
+  webSearch?: {
+    apiKey?: unknown;
+    baseUrl?: string;
+  };
+};
+
+export function resolveTavilySearchConfig(cfg?: OpenClawConfig): TavilySearchConfig {
+  const pluginConfig = cfg?.plugins?.entries?.tavily?.config as PluginEntryConfig;
+  const pluginWebSearch = pluginConfig?.webSearch;
+  if (pluginWebSearch && typeof pluginWebSearch === "object" && !Array.isArray(pluginWebSearch)) {
+    return pluginWebSearch;
+  }
+  return undefined;
+}
+
+function normalizeConfiguredSecret(value: unknown, path: string): string | undefined {
+  return normalizeSecretInput(
+    normalizeResolvedSecretInputString({
+      value,
+      path,
+    }),
+  );
+}
+
+export function resolveTavilyApiKey(cfg?: OpenClawConfig): string | undefined {
+  const search = resolveTavilySearchConfig(cfg);
+  return (
+    normalizeConfiguredSecret(search?.apiKey, "plugins.entries.tavily.config.webSearch.apiKey") ||
+    normalizeSecretInput(process.env.TAVILY_API_KEY) ||
+    undefined
+  );
+}
+
+export function resolveTavilyBaseUrl(cfg?: OpenClawConfig): string {
+  const search = resolveTavilySearchConfig(cfg);
+  const configured =
+    (typeof search?.baseUrl === "string" ? search.baseUrl.trim() : "") ||
+    normalizeSecretInput(process.env.TAVILY_BASE_URL) ||
+    "";
+  return configured || DEFAULT_TAVILY_BASE_URL;
+}
+
+export function resolveTavilySearchTimeoutSeconds(override?: number): number {
+  if (typeof override === "number" && Number.isFinite(override) && override > 0) {
+    return Math.floor(override);
+  }
+  return DEFAULT_TAVILY_SEARCH_TIMEOUT_SECONDS;
+}
+
+export function resolveTavilyExtractTimeoutSeconds(override?: number): number {
+  if (typeof override === "number" && Number.isFinite(override) && override > 0) {
+    return Math.floor(override);
+  }
+  return DEFAULT_TAVILY_EXTRACT_TIMEOUT_SECONDS;
+}
diff --git a/extensions/tavily/src/tavily-client.ts b/extensions/tavily/src/tavily-client.ts
new file mode 100644
index 00000000000..8308f8b8772
--- /dev/null
+++ b/extensions/tavily/src/tavily-client.ts
@@ -0,0 +1,286 @@
+import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime";
+import { withTrustedWebToolsEndpoint } from "openclaw/plugin-sdk/provider-web-search";
+import {
+  DEFAULT_CACHE_TTL_MINUTES,
+  normalizeCacheKey,
+  readCache,
+  readResponseText,
+  resolveCacheTtlMs,
+  writeCache,
+} from "openclaw/plugin-sdk/provider-web-search";
+import { wrapExternalContent, wrapWebContent } from "openclaw/plugin-sdk/security-runtime";
+import {
+  DEFAULT_TAVILY_BASE_URL,
+  resolveTavilyApiKey,
+  resolveTavilyBaseUrl,
+  resolveTavilyExtractTimeoutSeconds,
+  resolveTavilySearchTimeoutSeconds,
+} from "./config.js";
+
+const SEARCH_CACHE = new Map<
+  string,
+  { value: Record<string, unknown>; expiresAt: number; insertedAt: number }
+>();
+const EXTRACT_CACHE = new Map<
+  string,
+  { value: Record<string, unknown>; expiresAt: number; insertedAt: number }
+>();
+const DEFAULT_SEARCH_COUNT = 5;
+const DEFAULT_ERROR_MAX_BYTES = 64_000;
+
+export type TavilySearchParams = {
+  cfg?: OpenClawConfig;
+  query: string;
+  searchDepth?: string;
+  topic?: string;
+  maxResults?: number;
+  includeAnswer?: boolean;
+  timeRange?: string;
+  includeDomains?: string[];
+  excludeDomains?: string[];
+  timeoutSeconds?: number;
+};
+
+export type TavilyExtractParams = {
+  cfg?: OpenClawConfig;
+  urls: string[];
+  query?: string;
+  extractDepth?: string;
+  chunksPerSource?: number;
+  includeImages?: boolean;
+  timeoutSeconds?: number;
+};
+
+function resolveEndpoint(baseUrl: string, pathname: string): string {
+  const trimmed = baseUrl.trim();
+  if (!trimmed) {
+    return `${DEFAULT_TAVILY_BASE_URL}${pathname}`;
+  }
+  try {
+    const url = new URL(trimmed);
+    // Always append the endpoint pathname to the base URL path,
+    // supporting both bare hosts and reverse-proxy path prefixes.
+    url.pathname = url.pathname.replace(/\/$/, "") + pathname;
+    return url.toString();
+  } catch {
+    return `${DEFAULT_TAVILY_BASE_URL}${pathname}`;
+  }
+}
+
+async function postTavilyJson(params: {
+  baseUrl: string;
+  pathname: string;
+  apiKey: string;
+  body: Record<string, unknown>;
+  timeoutSeconds: number;
+  errorLabel: string;
+}): Promise<Record<string, unknown>> {
+  const endpoint = resolveEndpoint(params.baseUrl, params.pathname);
+  return await withTrustedWebToolsEndpoint(
+    {
+      url: endpoint,
+      timeoutSeconds: params.timeoutSeconds,
+      init: {
+        method: "POST",
+        headers: {
+          Accept: "application/json",
+          Authorization: `Bearer ${params.apiKey}`,
+          "Content-Type": "application/json",
+        },
+        body: JSON.stringify(params.body),
+      },
+    },
+    async ({ response }) => {
+      if (!response.ok) {
+        const detail = await readResponseText(response, { maxBytes: DEFAULT_ERROR_MAX_BYTES });
+        throw new Error(
+          `${params.errorLabel} API error (${response.status}): ${detail.text || response.statusText}`,
+        );
+      }
+      return (await response.json()) as Record<string, unknown>;
+    },
+  );
+}
+
+export async function runTavilySearch(
+  params: TavilySearchParams,
+): Promise<Record<string, unknown>> {
+  const apiKey = resolveTavilyApiKey(params.cfg);
+  if (!apiKey) {
+    throw new Error(
+      "web_search (tavily) needs a Tavily API key. Set TAVILY_API_KEY in the Gateway environment, or configure plugins.entries.tavily.config.webSearch.apiKey.",
+    );
+  }
+  const count =
+    typeof params.maxResults === "number" && Number.isFinite(params.maxResults)
+      ? Math.max(1, Math.min(20, Math.floor(params.maxResults)))
+      : DEFAULT_SEARCH_COUNT;
+  const timeoutSeconds = resolveTavilySearchTimeoutSeconds(params.timeoutSeconds);
+  const baseUrl = resolveTavilyBaseUrl(params.cfg);
+
+  const cacheKey = normalizeCacheKey(
+    JSON.stringify({
+      type: "tavily-search",
+      q: params.query,
+      count,
+      baseUrl,
+      searchDepth: params.searchDepth,
+      topic: params.topic,
+      includeAnswer: params.includeAnswer,
+      timeRange: params.timeRange,
+      includeDomains: params.includeDomains,
+      excludeDomains: params.excludeDomains,
+    }),
+  );
+  const cached = readCache(SEARCH_CACHE, cacheKey);
+  if (cached) {
+    return { ...cached.value, cached: true };
+  }
+
+  const body: Record<string, unknown> = {
+    query: params.query,
+    max_results: count,
+  };
+  if (params.searchDepth) body.search_depth = params.searchDepth;
+  if (params.topic) body.topic = params.topic;
+  if (params.includeAnswer) body.include_answer = true;
+  if (params.timeRange) body.time_range = params.timeRange;
+  if (params.includeDomains?.length) body.include_domains = params.includeDomains;
+  if (params.excludeDomains?.length) body.exclude_domains = params.excludeDomains;
+
+  const start = Date.now();
+  const payload = await postTavilyJson({
+    baseUrl,
+    pathname: "/search",
+    apiKey,
+    body,
+    timeoutSeconds,
+    errorLabel: "Tavily Search",
+  });
+
+  const rawResults = Array.isArray(payload.results) ? payload.results : [];
+  const results = rawResults.map((r: Record<string, unknown>) => ({
+    title: typeof r.title === "string" ? wrapWebContent(r.title, "web_search") : "",
+    url: typeof r.url === "string" ? r.url : "",
+    snippet: typeof r.content === "string" ? wrapWebContent(r.content, "web_search") : "",
+    score: typeof r.score === "number" ? r.score : undefined,
+    ...(typeof r.published_date === "string" ? { published: r.published_date } : {}),
+  }));
+
+  const result: Record<string, unknown> = {
+    query: params.query,
+    provider: "tavily",
+    count: results.length,
+    tookMs: Date.now() - start,
+    externalContent: {
+      untrusted: true,
+      source: "web_search",
+      provider: "tavily",
+      wrapped: true,
+    },
+    results,
+  };
+  if (typeof payload.answer === "string" && payload.answer) {
+    result.answer = wrapWebContent(payload.answer, "web_search");
+  }
+
+  writeCache(
+    SEARCH_CACHE,
+    cacheKey,
+    result,
+    resolveCacheTtlMs(undefined, DEFAULT_CACHE_TTL_MINUTES),
+  );
+  return result;
+}
+
+export async function runTavilyExtract(
+  params: TavilyExtractParams,
+): Promise<Record<string, unknown>> {
+  const apiKey = resolveTavilyApiKey(params.cfg);
+  if (!apiKey) {
+    throw new Error(
+      "tavily_extract needs a Tavily API key. Set TAVILY_API_KEY in the Gateway environment, or configure plugins.entries.tavily.config.webSearch.apiKey.",
+    );
+  }
+  const baseUrl = resolveTavilyBaseUrl(params.cfg);
+  const timeoutSeconds = resolveTavilyExtractTimeoutSeconds(params.timeoutSeconds);
+
+  const cacheKey = normalizeCacheKey(
+    JSON.stringify({
+      type: "tavily-extract",
+      urls: params.urls,
+      baseUrl,
+      query: params.query,
+      extractDepth: params.extractDepth,
+      chunksPerSource: params.chunksPerSource,
+      includeImages: params.includeImages,
+    }),
+  );
+  const cached = readCache(EXTRACT_CACHE, cacheKey);
+  if (cached) {
+    return { ...cached.value, cached: true };
+  }
+
+  const body: Record<string, unknown> = { urls: params.urls };
+  if (params.query) body.query = params.query;
+  if (params.extractDepth) body.extract_depth = params.extractDepth;
+  if (params.chunksPerSource) body.chunks_per_source = params.chunksPerSource;
+  if (params.includeImages) body.include_images = true;
+
+  const start = Date.now();
+  const payload = await postTavilyJson({
+    baseUrl,
+    pathname: "/extract",
+    apiKey,
+    body,
+    timeoutSeconds,
+    errorLabel: "Tavily Extract",
+  });
+
+  const rawResults = Array.isArray(payload.results) ? payload.results : [];
+  const results = rawResults.map((r: Record<string, unknown>) => ({
+    url: typeof r.url === "string" ? r.url : "",
+    rawContent:
+      typeof r.raw_content === "string"
+        ? wrapExternalContent(r.raw_content, { source: "web_fetch", includeWarning: false })
+        : "",
+    ...(typeof r.content === "string"
+      ? { content: wrapExternalContent(r.content, { source: "web_fetch", includeWarning: false }) }
+      : {}),
+    ...(Array.isArray(r.images)
+      ? {
+          images: (r.images as string[]).map((img) =>
+            wrapExternalContent(String(img), { source: "web_fetch", includeWarning: false }),
+          ),
+        }
+      : {}),
+  }));
+
+  const failedResults = Array.isArray(payload.failed_results) ? payload.failed_results : [];
+
+  const result: Record<string, unknown> = {
+    provider: "tavily",
+    count: results.length,
+    tookMs: Date.now() - start,
+    externalContent: {
+      untrusted: true,
+      source: "web_fetch",
+      provider: "tavily",
+      wrapped: true,
+    },
+    results,
+    ...(failedResults.length > 0 ? { failedResults } : {}),
+  };
+
+  writeCache(
+    EXTRACT_CACHE,
+    cacheKey,
+    result,
+    resolveCacheTtlMs(undefined, DEFAULT_CACHE_TTL_MINUTES),
+  );
+  return result;
+}
+
+export const __testing = {
+  postTavilyJson,
+};
diff --git a/extensions/tavily/src/tavily-extract-tool.test.ts b/extensions/tavily/src/tavily-extract-tool.test.ts
new file mode 100644
index 00000000000..f571e196d0b
--- /dev/null
+++ b/extensions/tavily/src/tavily-extract-tool.test.ts
@@ -0,0 +1,53 @@
+import type { OpenClawPluginApi } from "openclaw/plugin-sdk/plugin-runtime";
+import { beforeEach, describe, expect, it, vi } from "vitest";
+
+vi.mock("./tavily-client.js", () => ({
+  runTavilyExtract: vi.fn(async (params: unknown) => ({ ok: true, params })),
+}));
+
+import { runTavilyExtract } from "./tavily-client.js";
+import { createTavilyExtractTool } from "./tavily-extract-tool.js";
+
+function fakeApi(): OpenClawPluginApi {
+  return {
+    config: {},
+  } as OpenClawPluginApi;
+}
+
+describe("tavily_extract", () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+
+  it("rejects chunks_per_source without query", async () => {
+    const tool = createTavilyExtractTool(fakeApi());
+
+    await expect(
+      tool.execute("id", {
+        urls: ["https://example.com"],
+        chunks_per_source: 2,
+      }),
+    ).rejects.toThrow("tavily_extract requires query when chunks_per_source is set.");
+
+    expect(runTavilyExtract).not.toHaveBeenCalled();
+  });
+
+  it("forwards query-scoped chunking when query is provided", async () => {
+    const tool = createTavilyExtractTool(fakeApi());
+
+    await tool.execute("id", {
+      urls: ["https://example.com"],
+      query: "pricing",
+      chunks_per_source: 2,
+    });
+
+    expect(runTavilyExtract).toHaveBeenCalledWith(
+      expect.objectContaining({
+        cfg: {},
+        urls: ["https://example.com"],
+        query: "pricing",
+        chunksPerSource: 2,
+      }),
+    );
+  });
+});
diff --git a/extensions/tavily/src/tavily-extract-tool.ts b/extensions/tavily/src/tavily-extract-tool.ts
new file mode 100644
index 00000000000..1a3c381fc64
--- /dev/null
+++ b/extensions/tavily/src/tavily-extract-tool.ts
@@ -0,0 +1,74 @@
+import { Type } from "@sinclair/typebox";
+import { optionalStringEnum } from "openclaw/plugin-sdk/agent-runtime";
+import { jsonResult, readNumberParam, readStringParam } from "openclaw/plugin-sdk/agent-runtime";
+import type { OpenClawPluginApi } from "openclaw/plugin-sdk/plugin-runtime";
+import { runTavilyExtract } from "./tavily-client.js";
+
+const TavilyExtractToolSchema = Type.Object(
+  {
+    urls: Type.Array(Type.String(), {
+      description: "One or more URLs to extract content from (max 20).",
+      minItems: 1,
+      maxItems: 20,
+    }),
+    query: Type.Optional(
+      Type.String({
+        description: "Rerank extracted chunks by relevance to this query.",
+      }),
+    ),
+    extract_depth: optionalStringEnum(["basic", "advanced"] as const, {
+      description: '"basic" (default) or "advanced" (for JS-heavy pages).',
+    }),
+    chunks_per_source: Type.Optional(
+      Type.Number({
+        description: "Chunks per URL (1-5, requires query).",
+        minimum: 1,
+        maximum: 5,
+      }),
+    ),
+    include_images: Type.Optional(
+      Type.Boolean({
+        description: "Include image URLs in extraction results.",
+      }),
+    ),
+  },
+  { additionalProperties: false },
+);
+
+export function createTavilyExtractTool(api: OpenClawPluginApi) {
+  return {
+    name: "tavily_extract",
+    label: "Tavily Extract",
+    description:
+      "Extract clean content from one or more URLs using Tavily. Handles JS-rendered pages. Supports query-focused chunking.",
+    parameters: TavilyExtractToolSchema,
+    execute: async (_toolCallId: string, rawParams: Record<string, unknown>) => {
+      const urls = Array.isArray(rawParams.urls)
+        ? (rawParams.urls as string[]).filter(Boolean)
+        : [];
+      if (urls.length === 0) {
+        throw new Error("tavily_extract requires at least one URL.");
+      }
+      const query = readStringParam(rawParams, "query") || undefined;
+      const extractDepth = readStringParam(rawParams, "extract_depth") || undefined;
+      const chunksPerSource = readNumberParam(rawParams, "chunks_per_source", {
+        integer: true,
+      });
+      if (chunksPerSource !== undefined && !query) {
+        throw new Error("tavily_extract requires query when chunks_per_source is set.");
+      }
+      const includeImages = rawParams.include_images === true;
+
+      return jsonResult(
+        await runTavilyExtract({
+          cfg: api.config,
+          urls,
+          query,
+          extractDepth,
+          chunksPerSource,
+          includeImages,
+        }),
+      );
+    },
+  };
+}
diff --git a/extensions/tavily/src/tavily-search-provider.ts b/extensions/tavily/src/tavily-search-provider.ts
new file mode 100644
index 00000000000..2ad33362353
--- /dev/null
+++ b/extensions/tavily/src/tavily-search-provider.ts
@@ -0,0 +1,76 @@
+import { Type } from "@sinclair/typebox";
+import {
+  enablePluginInConfig,
+  resolveProviderWebSearchPluginConfig,
+  setProviderWebSearchPluginConfigValue,
+  type WebSearchProviderPlugin,
+} from "openclaw/plugin-sdk/provider-web-search";
+import { runTavilySearch } from "./tavily-client.js";
+
+const GenericTavilySearchSchema = Type.Object(
+  {
+    query: Type.String({ description: "Search query string." }),
+    count: Type.Optional(
+      Type.Number({
+        description: "Number of results to return (1-20).",
+        minimum: 1,
+        maximum: 20,
+      }),
+    ),
+  },
+  { additionalProperties: false },
+);
+
+function getScopedCredentialValue(searchConfig?: Record<string, unknown>): unknown {
+  const scoped = searchConfig?.tavily;
+  if (!scoped || typeof scoped !== "object" || Array.isArray(scoped)) {
+    return undefined;
+  }
+  return (scoped as Record<string, unknown>).apiKey;
+}
+
+function setScopedCredentialValue(
+  searchConfigTarget: Record<string, unknown>,
+  value: unknown,
+): void {
+  const scoped = searchConfigTarget.tavily;
+  if (!scoped || typeof scoped !== "object" || Array.isArray(scoped)) {
+    searchConfigTarget.tavily = { apiKey: value };
+    return;
+  }
+  (scoped as Record<string, unknown>).apiKey = value;
+}
+
+export function createTavilyWebSearchProvider(): WebSearchProviderPlugin {
+  return {
+    id: "tavily",
+    label: "Tavily Search",
+    hint: "Structured results with domain filters and AI answer summaries",
+    envVars: ["TAVILY_API_KEY"],
+    placeholder: "tvly-...",
+    signupUrl: "https://tavily.com/",
+    docsUrl: "https://docs.openclaw.ai/tools/tavily",
+    autoDetectOrder: 70,
+    credentialPath: "plugins.entries.tavily.config.webSearch.apiKey",
+    inactiveSecretPaths: ["plugins.entries.tavily.config.webSearch.apiKey"],
+    getCredentialValue: getScopedCredentialValue,
+    setCredentialValue: setScopedCredentialValue,
+    getConfiguredCredentialValue: (config) =>
+      resolveProviderWebSearchPluginConfig(config, "tavily")?.apiKey,
+    setConfiguredCredentialValue: (configTarget, value) => {
+      setProviderWebSearchPluginConfigValue(configTarget, "tavily", "apiKey", value);
+    },
+    applySelectionConfig: (config) => enablePluginInConfig(config, "tavily").config,
+    createTool: (ctx) => ({
+      description:
+        "Search the web using Tavily. Returns structured results with snippets. Use tavily_search for Tavily-specific options like search depth, topic filtering, or AI answers.",
+      parameters: GenericTavilySearchSchema,
+      execute: async (args) =>
+        await runTavilySearch({
+          cfg: ctx.config,
+          query: typeof args.query === "string" ? args.query : "",
+          maxResults: typeof args.count === "number" ? args.count : undefined,
+        }),
+    }),
+  };
+}
diff --git a/extensions/tavily/src/tavily-search-tool.ts b/extensions/tavily/src/tavily-search-tool.ts
new file mode 100644
index 00000000000..1d925973fe0
--- /dev/null
+++ b/extensions/tavily/src/tavily-search-tool.ts
@@ -0,0 +1,81 @@
+import { Type } from "@sinclair/typebox";
+import { optionalStringEnum } from "openclaw/plugin-sdk/agent-runtime";
+import { jsonResult, readNumberParam, readStringParam } from "openclaw/plugin-sdk/agent-runtime";
+import type { OpenClawPluginApi } from "openclaw/plugin-sdk/plugin-runtime";
+import { runTavilySearch } from "./tavily-client.js";
+
+const TavilySearchToolSchema = Type.Object(
+  {
+    query: Type.String({ description: "Search query string." }),
+    search_depth: optionalStringEnum(["basic", "advanced"] as const, {
+      description: 'Search depth: "basic" (default, faster) or "advanced" (more thorough).',
+    }),
+    topic: optionalStringEnum(["general", "news", "finance"] as const, {
+      description: 'Search topic: "general" (default), "news", or "finance".',
+    }),
+    max_results: Type.Optional(
+      Type.Number({
+        description: "Number of results to return (1-20).",
+        minimum: 1,
+        maximum: 20,
+      }),
+    ),
+    include_answer: Type.Optional(
+      Type.Boolean({
+        description: "Include an AI-generated answer summary (default: false).",
+      }),
+    ),
+    time_range: optionalStringEnum(["day", "week", "month", "year"] as const, {
+      description: "Filter results by recency: 'day', 'week', 'month', or 'year'.",
+    }),
+    include_domains: Type.Optional(
+      Type.Array(Type.String(), {
+        description: "Only include results from these domains.",
+      }),
+    ),
+    exclude_domains: Type.Optional(
+      Type.Array(Type.String(), {
+        description: "Exclude results from these domains.",
+      }),
+    ),
+  },
+  { additionalProperties: false },
+);
+
+export function createTavilySearchTool(api: OpenClawPluginApi) {
+  return {
+    name: "tavily_search",
+    label: "Tavily Search",
+    description:
+      "Search the web using Tavily Search API. Supports search depth, topic filtering, domain filters, time ranges, and AI answer summaries.",
+    parameters: TavilySearchToolSchema,
+    execute: async (_toolCallId: string, rawParams: Record<string, unknown>) => {
+      const query = readStringParam(rawParams, "query", { required: true });
+      const searchDepth = readStringParam(rawParams, "search_depth") || undefined;
+      const topic = readStringParam(rawParams, "topic") || undefined;
+      const maxResults = readNumberParam(rawParams, "max_results", { integer: true });
+      const includeAnswer = rawParams.include_answer === true;
+      const timeRange = readStringParam(rawParams, "time_range") || undefined;
+      const includeDomains = Array.isArray(rawParams.include_domains)
+        ? (rawParams.include_domains as string[]).filter(Boolean)
+        : undefined;
+      const excludeDomains = Array.isArray(rawParams.exclude_domains)
+        ? (rawParams.exclude_domains as string[]).filter(Boolean)
+        : undefined;
+
+      return jsonResult(
+        await runTavilySearch({
+          cfg: api.config,
+          query,
+          searchDepth,
+          topic,
+          maxResults,
+          includeAnswer,
+          timeRange,
+          includeDomains: includeDomains?.length ? includeDomains : undefined,
+          excludeDomains: excludeDomains?.length ? excludeDomains : undefined,
+        }),
+      );
+    },
+  };
+}
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index f0d503f2346..f821a4aa3c4 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -519,6 +519,8 @@ importers:
 
   extensions/synthetic: {}
 
+  extensions/tavily: {}
+
   extensions/telegram:
     dependencies:
       '@grammyjs/runner':
diff --git a/src/agents/tools/web-search.ts b/src/agents/tools/web-search.ts
index 151cfc4e6c4..11955d4a9b0 100644
--- a/src/agents/tools/web-search.ts
+++ b/src/agents/tools/web-search.ts
@@ -1,123 +1,35 @@
 import type { OpenClawConfig } from "../../config/config.js";
-import { normalizeResolvedSecretInputString } from "../../config/types.secrets.js";
-import { logVerbose } from "../../globals.js";
-import type { PluginWebSearchProviderEntry } from "../../plugins/types.js";
-import { resolvePluginWebSearchProviders } from "../../plugins/web-search-providers.js";
 import type { RuntimeWebSearchMetadata } from "../../secrets/runtime-web-tools.types.js";
-import { normalizeSecretInput } from "../../utils/normalize-secret-input.js";
+import {
+  resolveWebSearchDefinition,
+  resolveWebSearchProviderId,
+} from "../../web-search/runtime.js";
 import type { AnyAgentTool } from "./common.js";
 import { jsonResult } from "./common.js";
 import { SEARCH_CACHE } from "./web-search-provider-common.js";
-import {
-  resolveSearchConfig,
-  resolveSearchEnabled,
-  type WebSearchConfig,
-} from "./web-search-provider-config.js";
-
-function readProviderEnvValue(envVars: string[]): string | undefined {
-  for (const envVar of envVars) {
-    const value = normalizeSecretInput(process.env[envVar]);
-    if (value) {
-      return value;
-    }
-  }
-  return undefined;
-}
-
-function hasProviderCredential(
-  provider: PluginWebSearchProviderEntry,
-  search: WebSearchConfig | undefined,
-): boolean {
-  const rawValue = provider.getCredentialValue(search as Record<string, unknown> | undefined);
-  const fromConfig = normalizeSecretInput(
-    normalizeResolvedSecretInputString({
-      value: rawValue,
-      path: provider.credentialPath,
-    }),
-  );
-  return Boolean(fromConfig || readProviderEnvValue(provider.envVars));
-}
-
-function resolveSearchProvider(search?: WebSearchConfig): string {
-  const providers = resolvePluginWebSearchProviders({
-    bundledAllowlistCompat: true,
-  });
-  const raw =
-    search && "provider" in search && typeof search.provider === "string"
-      ? search.provider.trim().toLowerCase()
-      : "";
-
-  if (raw) {
-    const explicit = providers.find((provider) => provider.id === raw);
-    if (explicit) {
-      return explicit.id;
-    }
-  }
-
-  if (!raw) {
-    for (const provider of providers) {
-      if (!hasProviderCredential(provider, search)) {
-        continue;
-      }
-      logVerbose(
-        `web_search: no provider configured, auto-detected "${provider.id}" from available API keys`,
-      );
-      return provider.id;
-    }
-  }
-
-  return providers[0]?.id ?? "";
-}
 
 export function createWebSearchTool(options?: {
   config?: OpenClawConfig;
   sandboxed?: boolean;
   runtimeWebSearch?: RuntimeWebSearchMetadata;
 }): AnyAgentTool | null {
-  const search = resolveSearchConfig(options?.config);
-  if (!resolveSearchEnabled({ search, sandboxed: options?.sandboxed })) {
-    return null;
-  }
-
-  const providers = resolvePluginWebSearchProviders({
-    config: options?.config,
-    bundledAllowlistCompat: true,
-  });
-  if (providers.length === 0) {
-    return null;
-  }
-
-  const providerId =
-    options?.runtimeWebSearch?.selectedProvider ??
-    options?.runtimeWebSearch?.providerConfigured ??
-    resolveSearchProvider(search);
-  const provider =
-    providers.find((entry) => entry.id === providerId) ??
-    providers.find((entry) => entry.id === resolveSearchProvider(search)) ??
-    providers[0];
-  if (!provider) {
-    return null;
-  }
-
-  const definition = provider.createTool({
-    config: options?.config,
-    searchConfig: search as Record<string, unknown> | undefined,
-    runtimeMetadata: options?.runtimeWebSearch,
-  });
-  if (!definition) {
+  const resolved = resolveWebSearchDefinition(options);
+  if (!resolved) {
     return null;
   }
 
   return {
     label: "Web Search",
     name: "web_search",
-    description: definition.description,
-    parameters: definition.parameters,
-    execute: async (_toolCallId, args) => jsonResult(await definition.execute(args)),
+    description: resolved.definition.description,
+    parameters: resolved.definition.parameters,
+    execute: async (_toolCallId, args) => jsonResult(await resolved.definition.execute(args)),
   };
 }
 
 export const __testing = {
   SEARCH_CACHE,
-  resolveSearchProvider,
+  resolveSearchProvider: (
+    search?: NonNullable<NonNullable<OpenClawConfig["tools"]>["web"]>["search"],
+  ) => resolveWebSearchProviderId({ search }),
 };
diff --git a/src/commands/onboard-search.test.ts b/src/commands/onboard-search.test.ts
index 00bfd6382a6..c15fdefcf72 100644
--- a/src/commands/onboard-search.test.ts
+++ b/src/commands/onboard-search.test.ts
@@ -48,6 +48,15 @@ function createPerplexityConfig(apiKey: string, enabled?: boolean): OpenClawConf
   };
 }
 
+function pluginWebSearchApiKey(config: OpenClawConfig, pluginId: string): unknown {
+  const entry = (
+    config.plugins?.entries as
+      | Record<string, { config?: { webSearch?: { apiKey?: unknown } } }>
+      | undefined
+  )?.[pluginId];
+  return entry?.config?.webSearch?.apiKey;
+}
+
 async function runBlankPerplexityKeyEntry(
   apiKey: string,
   enabled?: boolean,
@@ -88,8 +97,9 @@ describe("setupSearch", () => {
     });
     const result = await setupSearch(cfg, runtime, prompter);
     expect(result.tools?.web?.search?.provider).toBe("perplexity");
-    expect(result.tools?.web?.search?.perplexity?.apiKey).toBe("pplx-test-key");
+    expect(pluginWebSearchApiKey(result, "perplexity")).toBe("pplx-test-key");
     expect(result.tools?.web?.search?.enabled).toBe(true);
+    expect(result.plugins?.entries?.perplexity?.enabled).toBe(true);
   });
 
   it("sets provider and key for brave", async () => {
@@ -101,7 +111,8 @@ describe("setupSearch", () => {
     const result = await setupSearch(cfg, runtime, prompter);
     expect(result.tools?.web?.search?.provider).toBe("brave");
     expect(result.tools?.web?.search?.enabled).toBe(true);
-    expect(result.tools?.web?.search?.apiKey).toBe("BSA-test-key");
+    expect(pluginWebSearchApiKey(result, "brave")).toBe("BSA-test-key");
+    expect(result.plugins?.entries?.brave?.enabled).toBe(true);
   });
 
   it("sets provider and key for gemini", async () => {
@@ -113,7 +124,8 @@ describe("setupSearch", () => {
     const result = await setupSearch(cfg, runtime, prompter);
     expect(result.tools?.web?.search?.provider).toBe("gemini");
     expect(result.tools?.web?.search?.enabled).toBe(true);
-    expect(result.tools?.web?.search?.gemini?.apiKey).toBe("AIza-test");
+    expect(pluginWebSearchApiKey(result, "google")).toBe("AIza-test");
+    expect(result.plugins?.entries?.google?.enabled).toBe(true);
   });
 
   it("sets provider and key for firecrawl and enables the plugin", async () => {
@@ -125,7 +137,7 @@ describe("setupSearch", () => {
     const result = await setupSearch(cfg, runtime, prompter);
     expect(result.tools?.web?.search?.provider).toBe("firecrawl");
     expect(result.tools?.web?.search?.enabled).toBe(true);
-    expect(result.tools?.web?.search?.firecrawl?.apiKey).toBe("fc-test-key");
+    expect(pluginWebSearchApiKey(result, "firecrawl")).toBe("fc-test-key");
     expect(result.plugins?.entries?.firecrawl?.enabled).toBe(true);
   });
 
@@ -150,7 +162,21 @@ describe("setupSearch", () => {
     const result = await setupSearch(cfg, runtime, prompter);
     expect(result.tools?.web?.search?.provider).toBe("kimi");
     expect(result.tools?.web?.search?.enabled).toBe(true);
-    expect(result.tools?.web?.search?.kimi?.apiKey).toBe("sk-moonshot");
+    expect(pluginWebSearchApiKey(result, "moonshot")).toBe("sk-moonshot");
+    expect(result.plugins?.entries?.moonshot?.enabled).toBe(true);
+  });
+
+  it("sets provider and key for tavily and enables the plugin", async () => {
+    const cfg: OpenClawConfig = {};
+    const { prompter } = createPrompter({
+      selectValue: "tavily",
+      textValue: "tvly-test-key",
+    });
+    const result = await setupSearch(cfg, runtime, prompter);
+    expect(result.tools?.web?.search?.provider).toBe("tavily");
+    expect(result.tools?.web?.search?.enabled).toBe(true);
+    expect(pluginWebSearchApiKey(result, "tavily")).toBe("tvly-test-key");
+    expect(result.plugins?.entries?.tavily?.enabled).toBe(true);
   });
 
   it("shows missing-key note when no key is provided and no env var", async () => {
@@ -198,7 +224,7 @@ describe("setupSearch", () => {
       "stored-pplx-key", // pragma: allowlist secret
     );
     expect(result.tools?.web?.search?.provider).toBe("perplexity");
-    expect(result.tools?.web?.search?.perplexity?.apiKey).toBe("stored-pplx-key");
+    expect(pluginWebSearchApiKey(result, "perplexity")).toBe("stored-pplx-key");
     expect(result.tools?.web?.search?.enabled).toBe(true);
     expect(prompter.text).not.toHaveBeenCalled();
   });
@@ -209,11 +235,43 @@ describe("setupSearch", () => {
       false,
     );
     expect(result.tools?.web?.search?.provider).toBe("perplexity");
-    expect(result.tools?.web?.search?.perplexity?.apiKey).toBe("stored-pplx-key");
+    expect(pluginWebSearchApiKey(result, "perplexity")).toBe("stored-pplx-key");
     expect(result.tools?.web?.search?.enabled).toBe(false);
     expect(prompter.text).not.toHaveBeenCalled();
   });
 
+  it("quickstart skips key prompt when canonical plugin config key exists", async () => {
+    const cfg: OpenClawConfig = {
+      tools: {
+        web: {
+          search: {
+            provider: "tavily",
+          },
+        },
+      },
+      plugins: {
+        entries: {
+          tavily: {
+            enabled: true,
+            config: {
+              webSearch: {
+                apiKey: "tvly-existing-key",
+              },
+            },
+          },
+        },
+      },
+    };
+    const { prompter } = createPrompter({ selectValue: "tavily" });
+    const result = await setupSearch(cfg, runtime, prompter, {
+      quickstartDefaults: true,
+    });
+    expect(result.tools?.web?.search?.provider).toBe("tavily");
+    expect(pluginWebSearchApiKey(result, "tavily")).toBe("tvly-existing-key");
+    expect(result.tools?.web?.search?.enabled).toBe(true);
+    expect(prompter.text).not.toHaveBeenCalled();
+  });
+
   it("quickstart falls through to key prompt when no key and no env var", async () => {
     const original = process.env.XAI_API_KEY;
     delete process.env.XAI_API_KEY;
@@ -268,7 +326,7 @@ describe("setupSearch", () => {
         secretInputMode: "ref", // pragma: allowlist secret
       });
       expect(result.tools?.web?.search?.provider).toBe("perplexity");
-      expect(result.tools?.web?.search?.perplexity?.apiKey).toEqual({
+      expect(pluginWebSearchApiKey(result, "perplexity")).toEqual({
         source: "env",
         provider: "default",
         id: "PERPLEXITY_API_KEY", // pragma: allowlist secret
@@ -299,7 +357,7 @@ describe("setupSearch", () => {
       const result = await setupSearch(cfg, runtime, prompter, {
         secretInputMode: "ref", // pragma: allowlist secret
       });
-      expect(result.tools?.web?.search?.perplexity?.apiKey).toEqual({
+      expect(pluginWebSearchApiKey(result, "perplexity")).toEqual({
         source: "env",
         provider: "default",
         id: "OPENROUTER_API_KEY", // pragma: allowlist secret
@@ -326,14 +384,41 @@ describe("setupSearch", () => {
       secretInputMode: "ref", // pragma: allowlist secret
     });
     expect(result.tools?.web?.search?.provider).toBe("brave");
-    expect(result.tools?.web?.search?.apiKey).toEqual({
+    expect(pluginWebSearchApiKey(result, "brave")).toEqual({
       source: "env",
       provider: "default",
       id: "BRAVE_API_KEY",
     });
+    expect(result.plugins?.entries?.brave?.enabled).toBe(true);
     expect(prompter.text).not.toHaveBeenCalled();
   });
 
+  it("stores env-backed SecretRef when secretInputMode=ref for tavily", async () => {
+    const original = process.env.TAVILY_API_KEY;
+    delete process.env.TAVILY_API_KEY;
+    const cfg: OpenClawConfig = {};
+    try {
+      const { prompter } = createPrompter({ selectValue: "tavily" });
+      const result = await setupSearch(cfg, runtime, prompter, {
+        secretInputMode: "ref", // pragma: allowlist secret
+      });
+      expect(result.tools?.web?.search?.provider).toBe("tavily");
+      expect(pluginWebSearchApiKey(result, "tavily")).toEqual({
+        source: "env",
+        provider: "default",
+        id: "TAVILY_API_KEY",
+      });
+      expect(result.plugins?.entries?.tavily?.enabled).toBe(true);
+      expect(prompter.text).not.toHaveBeenCalled();
+    } finally {
+      if (original === undefined) {
+        delete process.env.TAVILY_API_KEY;
+      } else {
+        process.env.TAVILY_API_KEY = original;
+      }
+    }
+  });
+
   it("stores plaintext key when secretInputMode is unset", async () => {
     const cfg: OpenClawConfig = {};
     const { prompter } = createPrompter({
@@ -341,12 +426,20 @@ describe("setupSearch", () => {
       textValue: "BSA-plain",
     });
     const result = await setupSearch(cfg, runtime, prompter);
-    expect(result.tools?.web?.search?.apiKey).toBe("BSA-plain");
+    expect(pluginWebSearchApiKey(result, "brave")).toBe("BSA-plain");
   });
 
-  it("exports all 6 providers in SEARCH_PROVIDER_OPTIONS", () => {
-    expect(SEARCH_PROVIDER_OPTIONS).toHaveLength(6);
+  it("exports all 7 providers in SEARCH_PROVIDER_OPTIONS", () => {
+    expect(SEARCH_PROVIDER_OPTIONS).toHaveLength(7);
     const values = SEARCH_PROVIDER_OPTIONS.map((e) => e.value);
-    expect(values).toEqual(["brave", "gemini", "grok", "kimi", "perplexity", "firecrawl"]);
+    expect(values).toEqual([
+      "brave",
+      "gemini",
+      "grok",
+      "kimi",
+      "perplexity",
+      "firecrawl",
+      "tavily",
+    ]);
   });
 });
diff --git a/src/commands/onboard-search.ts b/src/commands/onboard-search.ts
index 566362f9f03..0d414017c31 100644
--- a/src/commands/onboard-search.ts
+++ b/src/commands/onboard-search.ts
@@ -53,7 +53,10 @@ function rawKeyValue(config: OpenClawConfig, provider: SearchProvider): unknown
     config,
     bundledAllowlistCompat: true,
   }).find((candidate) => candidate.id === provider);
-  return entry?.getCredentialValue(search as Record<string, unknown> | undefined);
+  return (
+    entry?.getConfiguredCredentialValue?.(config) ??
+    entry?.getCredentialValue(search as Record<string, unknown> | undefined)
+  );
 }
 
 /** Returns the plaintext key string, or undefined for SecretRefs/missing. */
@@ -104,7 +107,7 @@ export function applySearchKey(
     bundledAllowlistCompat: true,
   }).find((candidate) => candidate.id === provider);
   const search: MutableSearchConfig = { ...config.tools?.web?.search, provider, enabled: true };
-  if (providerEntry) {
+  if (providerEntry && !providerEntry.setConfiguredCredentialValue) {
     providerEntry.setCredentialValue(search, key);
   }
   const nextBase: OpenClawConfig = {
@@ -114,7 +117,9 @@ export function applySearchKey(
       web: { ...config.tools?.web, search },
     },
   };
-  return providerEntry?.applySelectionConfig?.(nextBase) ?? nextBase;
+  const next = providerEntry?.applySelectionConfig?.(nextBase) ?? nextBase;
+  providerEntry?.setConfiguredCredentialValue?.(next, key);
+  return next;
 }
 
 function applyProviderOnly(config: OpenClawConfig, provider: SearchProvider): OpenClawConfig {
diff --git a/src/config/config.web-search-provider.test.ts b/src/config/config.web-search-provider.test.ts
index 85ce1c2700a..d89d913fcba 100644
--- a/src/config/config.web-search-provider.test.ts
+++ b/src/config/config.web-search-provider.test.ts
@@ -59,6 +59,13 @@ vi.mock("../plugins/web-search-providers.js", () => {
         getCredentialValue: getScoped("perplexity"),
         getConfiguredCredentialValue: getConfigured("perplexity"),
       },
+      {
+        id: "tavily",
+        envVars: ["TAVILY_API_KEY"],
+        credentialPath: "plugins.entries.tavily.config.webSearch.apiKey",
+        getCredentialValue: getScoped("tavily"),
+        getConfiguredCredentialValue: getConfigured("tavily"),
+      },
     ],
   };
 });
@@ -66,6 +73,17 @@ vi.mock("../plugins/web-search-providers.js", () => {
 const { __testing } = await import("../agents/tools/web-search.js");
 const { resolveSearchProvider } = __testing;
 
+function pluginWebSearchApiKey(
+  config: Record<string, unknown> | undefined,
+  pluginId: string,
+): unknown {
+  return (
+    config?.plugins as
+      | { entries?: Record<string, { config?: { webSearch?: { apiKey?: unknown } } }> }
+      | undefined
+  )?.entries?.[pluginId]?.config?.webSearch?.apiKey;
+}
+
 describe("web search provider config", () => {
   it("accepts perplexity provider and config", () => {
     const res = validateConfigObjectWithPlugins(
@@ -113,6 +131,50 @@ describe("web search provider config", () => {
     expect(res.ok).toBe(true);
   });
 
+  it("accepts tavily provider config on the plugin-owned path", () => {
+    const res = validateConfigObjectWithPlugins(
+      buildWebSearchProviderConfig({
+        enabled: true,
+        provider: "tavily",
+        providerConfig: {
+          apiKey: {
+            source: "env",
+            provider: "default",
+            id: "TAVILY_API_KEY",
+          },
+          baseUrl: "https://api.tavily.com",
+        },
+      }),
+    );
+
+    expect(res.ok).toBe(true);
+  });
+
+  it("does not migrate the nonexistent legacy Tavily scoped config", () => {
+    const res = validateConfigObjectWithPlugins({
+      tools: {
+        web: {
+          search: {
+            provider: "tavily",
+            tavily: {
+              apiKey: "tvly-test-key",
+            },
+          },
+        },
+      },
+    });
+
+    expect(res.ok).toBe(true);
+    if (!res.ok) {
+      return;
+    }
+    expect(res.config.tools?.web?.search?.provider).toBe("tavily");
+    expect((res.config.tools?.web?.search as Record<string, unknown> | undefined)?.tavily).toBe(
+      undefined,
+    );
+    expect(pluginWebSearchApiKey(res.config as Record<string, unknown>, "tavily")).toBe(undefined);
+  });
+
   it("accepts gemini provider with no extra config", () => {
     const res = validateConfigObjectWithPlugins(
       buildWebSearchProviderConfig({
@@ -161,6 +223,7 @@ describe("web search provider auto-detection", () => {
     delete process.env.MOONSHOT_API_KEY;
     delete process.env.PERPLEXITY_API_KEY;
     delete process.env.OPENROUTER_API_KEY;
+    delete process.env.TAVILY_API_KEY;
     delete process.env.XAI_API_KEY;
     delete process.env.KIMI_API_KEY;
     delete process.env.MOONSHOT_API_KEY;
@@ -185,6 +248,11 @@ describe("web search provider auto-detection", () => {
     expect(resolveSearchProvider({})).toBe("gemini");
   });
 
+  it("auto-detects tavily when only TAVILY_API_KEY is set", () => {
+    process.env.TAVILY_API_KEY = "tvly-test-key"; // pragma: allowlist secret
+    expect(resolveSearchProvider({})).toBe("tavily");
+  });
+
   it("auto-detects firecrawl when only FIRECRAWL_API_KEY is set", () => {
     process.env.FIRECRAWL_API_KEY = "fc-test-key"; // pragma: allowlist secret
     expect(resolveSearchProvider({})).toBe("firecrawl");
diff --git a/src/plugins/bundled-provider-auth-env-vars.generated.ts b/src/plugins/bundled-provider-auth-env-vars.generated.ts
index 416036b28ea..80ebcedc2b9 100644
--- a/src/plugins/bundled-provider-auth-env-vars.generated.ts
+++ b/src/plugins/bundled-provider-auth-env-vars.generated.ts
@@ -2,10 +2,12 @@
 
 export const BUNDLED_PROVIDER_AUTH_ENV_VAR_CANDIDATES = {
   anthropic: ["ANTHROPIC_OAUTH_TOKEN", "ANTHROPIC_API_KEY"],
+  brave: ["BRAVE_API_KEY"],
   byteplus: ["BYTEPLUS_API_KEY"],
   chutes: ["CHUTES_API_KEY", "CHUTES_OAUTH_TOKEN"],
   "cloudflare-ai-gateway": ["CLOUDFLARE_AI_GATEWAY_API_KEY"],
   fal: ["FAL_KEY"],
+  firecrawl: ["FIRECRAWL_API_KEY"],
   "github-copilot": ["COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"],
   google: ["GEMINI_API_KEY", "GOOGLE_API_KEY"],
   huggingface: ["HUGGINGFACE_HUB_TOKEN", "HF_TOKEN"],
@@ -23,10 +25,12 @@ export const BUNDLED_PROVIDER_AUTH_ENV_VAR_CANDIDATES = {
   opencode: ["OPENCODE_API_KEY", "OPENCODE_ZEN_API_KEY"],
   "opencode-go": ["OPENCODE_API_KEY", "OPENCODE_ZEN_API_KEY"],
   openrouter: ["OPENROUTER_API_KEY"],
+  perplexity: ["PERPLEXITY_API_KEY", "OPENROUTER_API_KEY"],
   qianfan: ["QIANFAN_API_KEY"],
   "qwen-portal": ["QWEN_OAUTH_TOKEN", "QWEN_PORTAL_API_KEY"],
   sglang: ["SGLANG_API_KEY"],
   synthetic: ["SYNTHETIC_API_KEY"],
+  tavily: ["TAVILY_API_KEY"],
   together: ["TOGETHER_API_KEY"],
   venice: ["VENICE_API_KEY"],
   "vercel-ai-gateway": ["AI_GATEWAY_API_KEY"],
diff --git a/src/plugins/bundled-provider-auth-env-vars.test.ts b/src/plugins/bundled-provider-auth-env-vars.test.ts
index a41b60d7b6d..bf0d481834b 100644
--- a/src/plugins/bundled-provider-auth-env-vars.test.ts
+++ b/src/plugins/bundled-provider-auth-env-vars.test.ts
@@ -31,15 +31,22 @@ describe("bundled provider auth env vars", () => {
   });
 
   it("reads bundled provider auth env vars from plugin manifests", () => {
+    expect(BUNDLED_PROVIDER_AUTH_ENV_VAR_CANDIDATES.brave).toEqual(["BRAVE_API_KEY"]);
+    expect(BUNDLED_PROVIDER_AUTH_ENV_VAR_CANDIDATES.firecrawl).toEqual(["FIRECRAWL_API_KEY"]);
     expect(BUNDLED_PROVIDER_AUTH_ENV_VAR_CANDIDATES["github-copilot"]).toEqual([
       "COPILOT_GITHUB_TOKEN",
       "GH_TOKEN",
       "GITHUB_TOKEN",
     ]);
+    expect(BUNDLED_PROVIDER_AUTH_ENV_VAR_CANDIDATES.perplexity).toEqual([
+      "PERPLEXITY_API_KEY",
+      "OPENROUTER_API_KEY",
+    ]);
     expect(BUNDLED_PROVIDER_AUTH_ENV_VAR_CANDIDATES["qwen-portal"]).toEqual([
       "QWEN_OAUTH_TOKEN",
       "QWEN_PORTAL_API_KEY",
     ]);
+    expect(BUNDLED_PROVIDER_AUTH_ENV_VAR_CANDIDATES.tavily).toEqual(["TAVILY_API_KEY"]);
     expect(BUNDLED_PROVIDER_AUTH_ENV_VAR_CANDIDATES["minimax-portal"]).toEqual([
       "MINIMAX_OAUTH_TOKEN",
       "MINIMAX_API_KEY",
diff --git a/src/plugins/bundled-web-search.test.ts b/src/plugins/bundled-web-search.test.ts
index 921bd66868e..b8d5c6142ad 100644
--- a/src/plugins/bundled-web-search.test.ts
+++ b/src/plugins/bundled-web-search.test.ts
@@ -71,6 +71,7 @@ describe("bundled web search metadata", () => {
       "google",
       "moonshot",
       "perplexity",
+      "tavily",
       "xai",
     ]);
   });
diff --git a/src/plugins/bundled-web-search.ts b/src/plugins/bundled-web-search.ts
index d1f2ce342f8..4b9594caaf8 100644
--- a/src/plugins/bundled-web-search.ts
+++ b/src/plugins/bundled-web-search.ts
@@ -191,6 +191,21 @@ const BUNDLED_WEB_SEARCH_PROVIDER_DESCRIPTORS = [
     credentialScope: { kind: "scoped", key: "firecrawl" },
     applySelectionConfig: (config) => enablePluginInConfig(config, "firecrawl").config,
   },
+  {
+    pluginId: "tavily",
+    id: "tavily",
+    label: "Tavily Search",
+    hint: "Structured results with domain filters and AI answer summaries",
+    envVars: ["TAVILY_API_KEY"],
+    placeholder: "tvly-...",
+    signupUrl: "https://tavily.com/",
+    docsUrl: "https://docs.openclaw.ai/tools/tavily",
+    autoDetectOrder: 70,
+    credentialPath: "plugins.entries.tavily.config.webSearch.apiKey",
+    inactiveSecretPaths: ["plugins.entries.tavily.config.webSearch.apiKey"],
+    credentialScope: { kind: "scoped", key: "tavily" },
+    applySelectionConfig: (config) => enablePluginInConfig(config, "tavily").config,
+  },
 ] as const satisfies ReadonlyArray<BundledWebSearchProviderDescriptor>;
 
 export const BUNDLED_WEB_SEARCH_PLUGIN_IDS = [
diff --git a/src/plugins/contracts/registry.contract.test.ts b/src/plugins/contracts/registry.contract.test.ts
index a5214106d52..f2cfd9e1392 100644
--- a/src/plugins/contracts/registry.contract.test.ts
+++ b/src/plugins/contracts/registry.contract.test.ts
@@ -146,6 +146,7 @@ describe("plugin contract registry", () => {
     expect(findWebSearchIdsForPlugin("google")).toEqual(["gemini"]);
     expect(findWebSearchIdsForPlugin("moonshot")).toEqual(["kimi"]);
     expect(findWebSearchIdsForPlugin("perplexity")).toEqual(["perplexity"]);
+    expect(findWebSearchIdsForPlugin("tavily")).toEqual(["tavily"]);
     expect(findWebSearchIdsForPlugin("xai")).toEqual(["grok"]);
   });
 
@@ -183,6 +184,14 @@ describe("plugin contract registry", () => {
       webSearchProviderIds: ["firecrawl"],
       toolNames: ["firecrawl_search", "firecrawl_scrape"],
     });
+    expect(findRegistrationForPlugin("tavily")).toMatchObject({
+      providerIds: [],
+      speechProviderIds: [],
+      mediaUnderstandingProviderIds: [],
+      imageGenerationProviderIds: [],
+      webSearchProviderIds: ["tavily"],
+      toolNames: ["tavily_search", "tavily_extract"],
+    });
   });
 
   it("tracks speech registrations on bundled provider plugins", () => {
diff --git a/src/plugins/contracts/registry.ts b/src/plugins/contracts/registry.ts
index 60d6f96dc3d..cde5b8e8e2d 100644
--- a/src/plugins/contracts/registry.ts
+++ b/src/plugins/contracts/registry.ts
@@ -29,6 +29,7 @@ import qianfanPlugin from "../../../extensions/qianfan/index.js";
 import qwenPortalAuthPlugin from "../../../extensions/qwen-portal-auth/index.js";
 import sglangPlugin from "../../../extensions/sglang/index.js";
 import syntheticPlugin from "../../../extensions/synthetic/index.js";
+import tavilyPlugin from "../../../extensions/tavily/index.js";
 import togetherPlugin from "../../../extensions/together/index.js";
 import venicePlugin from "../../../extensions/venice/index.js";
 import vercelAiGatewayPlugin from "../../../extensions/vercel-ai-gateway/index.js";
@@ -84,9 +85,9 @@ const bundledWebSearchPlugins: Array<RegistrablePlugin & { credentialValue: unkn
   { ...googlePlugin, credentialValue: "AIza-test" },
   { ...moonshotPlugin, credentialValue: "sk-test" },
   { ...perplexityPlugin, credentialValue: "pplx-test" },
+  { ...tavilyPlugin, credentialValue: "tvly-test" },
   { ...xaiPlugin, credentialValue: "xai-test" },
 ];
-
 const bundledSpeechPlugins: RegistrablePlugin[] = [elevenLabsPlugin, microsoftPlugin, openAIPlugin];
 
 const bundledMediaUnderstandingPlugins: RegistrablePlugin[] = [
diff --git a/src/plugins/web-search-providers.test.ts b/src/plugins/web-search-providers.test.ts
index 77efae73237..87a4da1973c 100644
--- a/src/plugins/web-search-providers.test.ts
+++ b/src/plugins/web-search-providers.test.ts
@@ -15,6 +15,7 @@ const BUNDLED_WEB_SEARCH_PROVIDERS = [
   { pluginId: "moonshot", id: "kimi", order: 40 },
   { pluginId: "perplexity", id: "perplexity", order: 50 },
   { pluginId: "firecrawl", id: "firecrawl", order: 60 },
+  { pluginId: "tavily", id: "tavily", order: 70 },
 ] as const;
 
 const { loadOpenClawPluginsMock } = vi.hoisted(() => ({
@@ -96,6 +97,7 @@ describe("resolvePluginWebSearchProviders", () => {
       "moonshot:kimi",
       "perplexity:perplexity",
       "firecrawl:firecrawl",
+      "tavily:tavily",
     ]);
     expect(providers.map((provider) => provider.credentialPath)).toEqual([
       "plugins.entries.brave.config.webSearch.apiKey",
@@ -104,6 +106,7 @@ describe("resolvePluginWebSearchProviders", () => {
       "plugins.entries.moonshot.config.webSearch.apiKey",
       "plugins.entries.perplexity.config.webSearch.apiKey",
       "plugins.entries.firecrawl.config.webSearch.apiKey",
+      "plugins.entries.tavily.config.webSearch.apiKey",
     ]);
     expect(providers.find((provider) => provider.id === "firecrawl")?.applySelectionConfig).toEqual(
       expect.any(Function),
@@ -130,6 +133,7 @@ describe("resolvePluginWebSearchProviders", () => {
       "moonshot",
       "perplexity",
       "firecrawl",
+      "tavily",
     ]);
   });
 
@@ -183,6 +187,7 @@ describe("resolvePluginWebSearchProviders", () => {
       "moonshot:kimi",
       "perplexity:perplexity",
       "firecrawl:firecrawl",
+      "tavily:tavily",
     ]);
     expect(loadOpenClawPluginsMock).not.toHaveBeenCalled();
   });
diff --git a/src/secrets/provider-env-vars.test.ts b/src/secrets/provider-env-vars.test.ts
index 6405d322e2f..63d12fd6c0e 100644
--- a/src/secrets/provider-env-vars.test.ts
+++ b/src/secrets/provider-env-vars.test.ts
@@ -8,10 +8,28 @@ import {
 describe("provider env vars", () => {
   it("keeps the auth scrub list broader than the global secret env list", () => {
     expect(listKnownProviderAuthEnvVarNames()).toEqual(
-      expect.arrayContaining(["GITHUB_TOKEN", "GH_TOKEN", "ANTHROPIC_OAUTH_TOKEN"]),
+      expect.arrayContaining([
+        "GITHUB_TOKEN",
+        "GH_TOKEN",
+        "ANTHROPIC_OAUTH_TOKEN",
+        "BRAVE_API_KEY",
+        "FIRECRAWL_API_KEY",
+        "PERPLEXITY_API_KEY",
+        "OPENROUTER_API_KEY",
+        "TAVILY_API_KEY",
+      ]),
     );
     expect(listKnownSecretEnvVarNames()).toEqual(
-      expect.arrayContaining(["GITHUB_TOKEN", "GH_TOKEN", "ANTHROPIC_OAUTH_TOKEN"]),
+      expect.arrayContaining([
+        "GITHUB_TOKEN",
+        "GH_TOKEN",
+        "ANTHROPIC_OAUTH_TOKEN",
+        "BRAVE_API_KEY",
+        "FIRECRAWL_API_KEY",
+        "PERPLEXITY_API_KEY",
+        "OPENROUTER_API_KEY",
+        "TAVILY_API_KEY",
+      ]),
     );
     expect(listKnownProviderAuthEnvVarNames()).toEqual(
       expect.arrayContaining(["MINIMAX_CODE_PLAN_KEY"]),
diff --git a/src/secrets/target-registry-data.ts b/src/secrets/target-registry-data.ts
index 30aa096004b..7d1a7854867 100644
--- a/src/secrets/target-registry-data.ts
+++ b/src/secrets/target-registry-data.ts
@@ -843,6 +843,17 @@ const SECRET_TARGET_REGISTRY: SecretTargetRegistryEntry[] = [
     includeInConfigure: true,
     includeInAudit: true,
   },
+  {
+    id: "plugins.entries.tavily.config.webSearch.apiKey",
+    targetType: "plugins.entries.tavily.config.webSearch.apiKey",
+    configFile: "openclaw.json",
+    pathPattern: "plugins.entries.tavily.config.webSearch.apiKey",
+    secretShape: SECRET_INPUT_SHAPE,
+    expectedResolvedValue: "string",
+    includeInPlan: true,
+    includeInConfigure: true,
+    includeInAudit: true,
+  },
 ];
 
 export { SECRET_TARGET_REGISTRY };
diff --git a/src/web-search/runtime.test.ts b/src/web-search/runtime.test.ts
index 72d1e4ad3f3..ab5a59ca993 100644
--- a/src/web-search/runtime.test.ts
+++ b/src/web-search/runtime.test.ts
@@ -1,8 +1,15 @@
 import { afterEach, describe, expect, it } from "vitest";
+import type { OpenClawConfig } from "../config/config.js";
 import { createEmptyPluginRegistry } from "../plugins/registry.js";
 import { setActivePluginRegistry } from "../plugins/runtime.js";
 import { runWebSearch } from "./runtime.js";
 
+type TestPluginWebSearchConfig = {
+  webSearch?: {
+    apiKey?: unknown;
+  };
+};
+
 describe("web search runtime", () => {
   afterEach(() => {
     setActivePluginRegistry(createEmptyPluginRegistry());
@@ -44,4 +51,74 @@ describe("web search runtime", () => {
       result: { query: "hello", ok: true },
     });
   });
+
+  it("auto-detects a provider from canonical plugin-owned credentials", async () => {
+    const registry = createEmptyPluginRegistry();
+    registry.webSearchProviders.push({
+      pluginId: "custom-search",
+      pluginName: "Custom Search",
+      provider: {
+        id: "custom",
+        label: "Custom Search",
+        hint: "Custom runtime provider",
+        envVars: ["CUSTOM_SEARCH_API_KEY"],
+        placeholder: "custom-...",
+        signupUrl: "https://example.com/signup",
+        credentialPath: "plugins.entries.custom-search.config.webSearch.apiKey",
+        autoDetectOrder: 1,
+        getCredentialValue: () => undefined,
+        setCredentialValue: () => {},
+        getConfiguredCredentialValue: (config) => {
+          const pluginConfig = config?.plugins?.entries?.["custom-search"]?.config as
+            | TestPluginWebSearchConfig
+            | undefined;
+          return pluginConfig?.webSearch?.apiKey;
+        },
+        setConfiguredCredentialValue: (configTarget, value) => {
+          configTarget.plugins = {
+            ...configTarget.plugins,
+            entries: {
+              ...configTarget.plugins?.entries,
+              "custom-search": {
+                enabled: true,
+                config: { webSearch: { apiKey: value } },
+              },
+            },
+          };
+        },
+        createTool: () => ({
+          description: "custom",
+          parameters: {},
+          execute: async (args) => ({ ...args, ok: true }),
+        }),
+      },
+      source: "test",
+    });
+    setActivePluginRegistry(registry);
+
+    const config: OpenClawConfig = {
+      plugins: {
+        entries: {
+          "custom-search": {
+            enabled: true,
+            config: {
+              webSearch: {
+                apiKey: "custom-config-key",
+              },
+            },
+          },
+        },
+      },
+    };
+
+    await expect(
+      runWebSearch({
+        config,
+        args: { query: "hello" },
+      }),
+    ).resolves.toEqual({
+      provider: "custom",
+      result: { query: "hello", ok: true },
+    });
+  });
 });

From 84ee6fbb76b5b255c6e84ea834d4b2a9562b33d6 Mon Sep 17 00:00:00 2001
From: Ayaan Zaidi <hi@obviy.us>
Date: Fri, 20 Mar 2026 10:26:24 +0530
Subject: [PATCH 02/11] feat(tts): add in-memory speech synthesis

---
 src/tts/providers/microsoft.ts |  1 +
 src/tts/providers/openai.ts    |  2 +-
 src/tts/tts.ts                 | 64 +++++++++++++++++++++++++++-------
 3 files changed, 54 insertions(+), 13 deletions(-)

diff --git a/src/tts/providers/microsoft.ts b/src/tts/providers/microsoft.ts
index fef369740cb..ba2511e4de6 100644
--- a/src/tts/providers/microsoft.ts
+++ b/src/tts/providers/microsoft.ts
@@ -96,6 +96,7 @@ export function buildMicrosoftSpeechProvider(): SpeechProviderPlugin {
             outputPath,
             config: {
               ...req.config.edge,
+              voice: req.overrides?.microsoft?.voice ?? req.config.edge.voice,
               outputFormat: format,
             },
             timeoutMs: req.config.timeoutMs,
diff --git a/src/tts/providers/openai.ts b/src/tts/providers/openai.ts
index 9f96e9ea6e9..01e5997e85c 100644
--- a/src/tts/providers/openai.ts
+++ b/src/tts/providers/openai.ts
@@ -21,7 +21,7 @@ export function buildOpenAISpeechProvider(): SpeechProviderPlugin {
         baseUrl: req.config.openai.baseUrl,
         model: req.overrides?.openai?.model ?? req.config.openai.model,
         voice: req.overrides?.openai?.voice ?? req.config.openai.voice,
-        speed: req.config.openai.speed,
+        speed: req.overrides?.openai?.speed ?? req.config.openai.speed,
         instructions: req.config.openai.instructions,
         responseFormat,
         timeoutMs: req.config.timeoutMs,
diff --git a/src/tts/tts.ts b/src/tts/tts.ts
index 0a5aa81126e..c64dda83909 100644
--- a/src/tts/tts.ts
+++ b/src/tts/tts.ts
@@ -162,6 +162,7 @@ export type TtsDirectiveOverrides = {
   openai?: {
     voice?: string;
     model?: string;
+    speed?: number;
   };
   elevenlabs?: {
     voiceId?: string;
@@ -171,6 +172,9 @@ export type TtsDirectiveOverrides = {
     languageCode?: string;
     voiceSettings?: Partial<ResolvedTtsConfig["elevenlabs"]["voiceSettings"]>;
   };
+  microsoft?: {
+    voice?: string;
+  };
 };
 
 export type TtsDirectiveParseResult = {
@@ -191,6 +195,17 @@ export type TtsResult = {
   voiceCompatible?: boolean;
 };
 
+export type TtsSynthesisResult = {
+  success: boolean;
+  audioBuffer?: Buffer;
+  error?: string;
+  latencyMs?: number;
+  provider?: string;
+  outputFormat?: string;
+  voiceCompatible?: boolean;
+  fileExtension?: string;
+};
+
 export type TtsTelephonyResult = {
   success: boolean;
   audioBuffer?: Buffer;
@@ -601,6 +616,7 @@ function resolveTtsRequestSetup(params: {
   cfg: OpenClawConfig;
   prefsPath?: string;
   providerOverride?: TtsProvider;
+  disableFallback?: boolean;
 }):
   | {
       config: ResolvedTtsConfig;
@@ -621,7 +637,7 @@ function resolveTtsRequestSetup(params: {
   const provider = normalizeSpeechProviderId(params.providerOverride) ?? userProvider;
   return {
     config,
-    providers: resolveTtsProviderOrder(provider, params.cfg),
+    providers: params.disableFallback ? [provider] : resolveTtsProviderOrder(provider, params.cfg),
   };
 }
 
@@ -631,12 +647,44 @@ export async function textToSpeech(params: {
   prefsPath?: string;
   channel?: string;
   overrides?: TtsDirectiveOverrides;
+  disableFallback?: boolean;
 }): Promise<TtsResult> {
+  const synthesis = await synthesizeSpeech(params);
+  if (!synthesis.success || !synthesis.audioBuffer || !synthesis.fileExtension) {
+    return buildTtsFailureResult([synthesis.error ?? "TTS conversion failed"]);
+  }
+
+  const tempRoot = resolvePreferredOpenClawTmpDir();
+  mkdirSync(tempRoot, { recursive: true, mode: 0o700 });
+  const tempDir = mkdtempSync(path.join(tempRoot, "tts-"));
+  const audioPath = path.join(tempDir, `voice-${Date.now()}${synthesis.fileExtension}`);
+  writeFileSync(audioPath, synthesis.audioBuffer);
+  scheduleCleanup(tempDir);
+
+  return {
+    success: true,
+    audioPath,
+    latencyMs: synthesis.latencyMs,
+    provider: synthesis.provider,
+    outputFormat: synthesis.outputFormat,
+    voiceCompatible: synthesis.voiceCompatible,
+  };
+}
+
+export async function synthesizeSpeech(params: {
+  text: string;
+  cfg: OpenClawConfig;
+  prefsPath?: string;
+  channel?: string;
+  overrides?: TtsDirectiveOverrides;
+  disableFallback?: boolean;
+}): Promise<TtsSynthesisResult> {
   const setup = resolveTtsRequestSetup({
     text: params.text,
     cfg: params.cfg,
     prefsPath: params.prefsPath,
     providerOverride: params.overrides?.provider,
+    disableFallback: params.disableFallback,
   });
   if ("error" in setup) {
     return { success: false, error: setup.error };
@@ -667,22 +715,14 @@ export async function textToSpeech(params: {
         target,
         overrides: params.overrides,
       });
-      const latencyMs = Date.now() - providerStart;
-
-      const tempRoot = resolvePreferredOpenClawTmpDir();
-      mkdirSync(tempRoot, { recursive: true, mode: 0o700 });
-      const tempDir = mkdtempSync(path.join(tempRoot, "tts-"));
-      const audioPath = path.join(tempDir, `voice-${Date.now()}${synthesis.fileExtension}`);
-      writeFileSync(audioPath, synthesis.audioBuffer);
-      scheduleCleanup(tempDir);
-
       return {
         success: true,
-        audioPath,
-        latencyMs,
+        audioBuffer: synthesis.audioBuffer,
+        latencyMs: Date.now() - providerStart,
         provider,
         outputFormat: synthesis.outputFormat,
         voiceCompatible: synthesis.voiceCompatible,
+        fileExtension: synthesis.fileExtension,
       };
     } catch (err) {
       errors.push(formatTtsProviderError(provider, err));

From 4ac355babbeffdf133c46f77352829ad23e38eda Mon Sep 17 00:00:00 2001
From: Ayaan Zaidi <hi@obviy.us>
Date: Fri, 20 Mar 2026 10:27:05 +0530
Subject: [PATCH 03/11] feat(gateway): add talk speak rpc

---
 src/gateway/method-scopes.ts                  |   1 +
 src/gateway/protocol/index.ts                 |  10 +
 src/gateway/protocol/schema/channels.ts       |  29 ++
 .../protocol/schema/protocol-schemas.ts       |   4 +
 src/gateway/protocol/schema/types.ts          |   2 +
 src/gateway/server-methods-list.ts            |   1 +
 src/gateway/server-methods/talk.ts            | 335 +++++++++++++++++-
 src/gateway/server.talk-config.test.ts        |  67 +++-
 8 files changed, 447 insertions(+), 2 deletions(-)

diff --git a/src/gateway/method-scopes.ts b/src/gateway/method-scopes.ts
index c31ff30db7b..f3a969301bf 100644
--- a/src/gateway/method-scopes.ts
+++ b/src/gateway/method-scopes.ts
@@ -98,6 +98,7 @@ const METHOD_SCOPE_GROUPS: Record<OperatorScope, readonly string[]> = {
     "agent.wait",
     "wake",
     "talk.mode",
+    "talk.speak",
     "tts.enable",
     "tts.disable",
     "tts.convert",
diff --git a/src/gateway/protocol/index.ts b/src/gateway/protocol/index.ts
index 408e3239cc1..408074d44e4 100644
--- a/src/gateway/protocol/index.ts
+++ b/src/gateway/protocol/index.ts
@@ -48,6 +48,10 @@ import {
   TalkConfigParamsSchema,
   type TalkConfigResult,
   TalkConfigResultSchema,
+  type TalkSpeakParams,
+  TalkSpeakParamsSchema,
+  type TalkSpeakResult,
+  TalkSpeakResultSchema,
   type ChannelsStatusParams,
   ChannelsStatusParamsSchema,
   type ChannelsStatusResult,
@@ -375,6 +379,8 @@ export const validateWizardStatusParams = ajv.compile<WizardStatusParams>(Wizard
 export const validateTalkModeParams = ajv.compile<TalkModeParams>(TalkModeParamsSchema);
 export const validateTalkConfigParams = ajv.compile<TalkConfigParams>(TalkConfigParamsSchema);
 export const validateTalkConfigResult = ajv.compile<TalkConfigResult>(TalkConfigResultSchema);
+export const validateTalkSpeakParams = ajv.compile<TalkSpeakParams>(TalkSpeakParamsSchema);
+export const validateTalkSpeakResult = ajv.compile<TalkSpeakResult>(TalkSpeakResultSchema);
 export const validateChannelsStatusParams = ajv.compile<ChannelsStatusParams>(
   ChannelsStatusParamsSchema,
 );
@@ -540,6 +546,8 @@ export {
   WizardStatusResultSchema,
   TalkConfigParamsSchema,
   TalkConfigResultSchema,
+  TalkSpeakParamsSchema,
+  TalkSpeakResultSchema,
   ChannelsStatusParamsSchema,
   ChannelsStatusResultSchema,
   ChannelsLogoutParamsSchema,
@@ -629,6 +637,8 @@ export type {
   WizardStatusResult,
   TalkConfigParams,
   TalkConfigResult,
+  TalkSpeakParams,
+  TalkSpeakResult,
   TalkModeParams,
   ChannelsStatusParams,
   ChannelsStatusResult,
diff --git a/src/gateway/protocol/schema/channels.ts b/src/gateway/protocol/schema/channels.ts
index 041318897ac..923432c7ac8 100644
--- a/src/gateway/protocol/schema/channels.ts
+++ b/src/gateway/protocol/schema/channels.ts
@@ -16,6 +16,23 @@ export const TalkConfigParamsSchema = Type.Object(
   { additionalProperties: false },
 );
 
+export const TalkSpeakParamsSchema = Type.Object(
+  {
+    text: NonEmptyString,
+    voiceId: Type.Optional(Type.String()),
+    modelId: Type.Optional(Type.String()),
+    speed: Type.Optional(Type.Number()),
+    stability: Type.Optional(Type.Number()),
+    similarity: Type.Optional(Type.Number()),
+    style: Type.Optional(Type.Number()),
+    speakerBoost: Type.Optional(Type.Boolean()),
+    seed: Type.Optional(Type.Integer({ minimum: 0 })),
+    normalize: Type.Optional(Type.String()),
+    language: Type.Optional(Type.String()),
+  },
+  { additionalProperties: false },
+);
+
 const talkProviderFieldSchemas = {
   voiceId: Type.Optional(Type.String()),
   voiceAliases: Type.Optional(Type.Record(Type.String(), Type.String())),
@@ -85,6 +102,18 @@ export const TalkConfigResultSchema = Type.Object(
   { additionalProperties: false },
 );
 
+export const TalkSpeakResultSchema = Type.Object(
+  {
+    audioBase64: NonEmptyString,
+    provider: NonEmptyString,
+    outputFormat: Type.Optional(Type.String()),
+    voiceCompatible: Type.Optional(Type.Boolean()),
+    mimeType: Type.Optional(Type.String()),
+    fileExtension: Type.Optional(Type.String()),
+  },
+  { additionalProperties: false },
+);
+
 export const ChannelsStatusParamsSchema = Type.Object(
   {
     probe: Type.Optional(Type.Boolean()),
diff --git a/src/gateway/protocol/schema/protocol-schemas.ts b/src/gateway/protocol/schema/protocol-schemas.ts
index 60636e3eb5f..cf14fc44610 100644
--- a/src/gateway/protocol/schema/protocol-schemas.ts
+++ b/src/gateway/protocol/schema/protocol-schemas.ts
@@ -44,6 +44,8 @@ import {
   ChannelsLogoutParamsSchema,
   TalkConfigParamsSchema,
   TalkConfigResultSchema,
+  TalkSpeakParamsSchema,
+  TalkSpeakResultSchema,
   ChannelsStatusParamsSchema,
   ChannelsStatusResultSchema,
   TalkModeParamsSchema,
@@ -238,6 +240,8 @@ export const ProtocolSchemas = {
   TalkModeParams: TalkModeParamsSchema,
   TalkConfigParams: TalkConfigParamsSchema,
   TalkConfigResult: TalkConfigResultSchema,
+  TalkSpeakParams: TalkSpeakParamsSchema,
+  TalkSpeakResult: TalkSpeakResultSchema,
   ChannelsStatusParams: ChannelsStatusParamsSchema,
   ChannelsStatusResult: ChannelsStatusResultSchema,
   ChannelsLogoutParams: ChannelsLogoutParamsSchema,
diff --git a/src/gateway/protocol/schema/types.ts b/src/gateway/protocol/schema/types.ts
index 58ddb142cd5..d74c08ad10b 100644
--- a/src/gateway/protocol/schema/types.ts
+++ b/src/gateway/protocol/schema/types.ts
@@ -70,6 +70,8 @@ export type WizardStatusResult = SchemaType<"WizardStatusResult">;
 export type TalkModeParams = SchemaType<"TalkModeParams">;
 export type TalkConfigParams = SchemaType<"TalkConfigParams">;
 export type TalkConfigResult = SchemaType<"TalkConfigResult">;
+export type TalkSpeakParams = SchemaType<"TalkSpeakParams">;
+export type TalkSpeakResult = SchemaType<"TalkSpeakResult">;
 export type ChannelsStatusParams = SchemaType<"ChannelsStatusParams">;
 export type ChannelsStatusResult = SchemaType<"ChannelsStatusResult">;
 export type ChannelsLogoutParams = SchemaType<"ChannelsLogoutParams">;
diff --git a/src/gateway/server-methods-list.ts b/src/gateway/server-methods-list.ts
index b4de49f1198..e930f8b0517 100644
--- a/src/gateway/server-methods-list.ts
+++ b/src/gateway/server-methods-list.ts
@@ -34,6 +34,7 @@ const BASE_METHODS = [
   "wizard.cancel",
   "wizard.status",
   "talk.config",
+  "talk.speak",
   "talk.mode",
   "models.list",
   "tools.catalog",
diff --git a/src/gateway/server-methods/talk.ts b/src/gateway/server-methods/talk.ts
index 693f3447537..33cb6d7f116 100644
--- a/src/gateway/server-methods/talk.ts
+++ b/src/gateway/server-methods/talk.ts
@@ -1,23 +1,297 @@
 import { readConfigFileSnapshot } from "../../config/config.js";
 import { redactConfigObject } from "../../config/redact-snapshot.js";
-import { buildTalkConfigResponse } from "../../config/talk.js";
+import { buildTalkConfigResponse, resolveActiveTalkProviderConfig } from "../../config/talk.js";
+import type { TalkProviderConfig } from "../../config/types.gateway.js";
+import type { OpenClawConfig, TtsConfig } from "../../config/types.js";
+import { normalizeSpeechProviderId } from "../../tts/provider-registry.js";
+import { synthesizeSpeech, type TtsDirectiveOverrides } from "../../tts/tts.js";
 import {
   ErrorCodes,
   errorShape,
   formatValidationErrors,
   validateTalkConfigParams,
   validateTalkModeParams,
+  validateTalkSpeakParams,
 } from "../protocol/index.js";
+import { formatForLog } from "../ws-log.js";
 import type { GatewayRequestHandlers } from "./types.js";
 
 const ADMIN_SCOPE = "operator.admin";
 const TALK_SECRETS_SCOPE = "operator.talk.secrets";
+type ElevenLabsVoiceSettings = NonNullable<NonNullable<TtsConfig["elevenlabs"]>["voiceSettings"]>;
 
 function canReadTalkSecrets(client: { connect?: { scopes?: string[] } } | null): boolean {
   const scopes = Array.isArray(client?.connect?.scopes) ? client.connect.scopes : [];
   return scopes.includes(ADMIN_SCOPE) || scopes.includes(TALK_SECRETS_SCOPE);
 }
 
+function trimString(value: unknown): string | undefined {
+  if (typeof value !== "string") {
+    return undefined;
+  }
+  const trimmed = value.trim();
+  return trimmed.length > 0 ? trimmed : undefined;
+}
+
+function finiteNumber(value: unknown): number | undefined {
+  return typeof value === "number" && Number.isFinite(value) ? value : undefined;
+}
+
+function optionalBoolean(value: unknown): boolean | undefined {
+  return typeof value === "boolean" ? value : undefined;
+}
+
+function plainObject(value: unknown): Record<string, unknown> | undefined {
+  return typeof value === "object" && value !== null && !Array.isArray(value)
+    ? (value as Record<string, unknown>)
+    : undefined;
+}
+
+function normalizeTextNormalization(value: unknown): "auto" | "on" | "off" | undefined {
+  const normalized = trimString(value)?.toLowerCase();
+  return normalized === "auto" || normalized === "on" || normalized === "off"
+    ? normalized
+    : undefined;
+}
+
+function normalizeAliasKey(value: string): string {
+  return value.trim().toLowerCase();
+}
+
+function resolveTalkVoiceId(
+  providerConfig: TalkProviderConfig,
+  requested: string | undefined,
+): string | undefined {
+  if (!requested) {
+    return undefined;
+  }
+  const aliases = providerConfig.voiceAliases;
+  if (!aliases) {
+    return requested;
+  }
+  return aliases[normalizeAliasKey(requested)] ?? requested;
+}
+
+function readTalkVoiceSettings(
+  providerConfig: TalkProviderConfig,
+): ElevenLabsVoiceSettings | undefined {
+  const source = plainObject(providerConfig.voiceSettings);
+  if (!source) {
+    return undefined;
+  }
+  const stability = finiteNumber(source.stability);
+  const similarityBoost = finiteNumber(source.similarityBoost);
+  const style = finiteNumber(source.style);
+  const useSpeakerBoost = optionalBoolean(source.useSpeakerBoost);
+  const speed = finiteNumber(source.speed);
+  const voiceSettings = {
+    ...(stability == null ? {} : { stability }),
+    ...(similarityBoost == null ? {} : { similarityBoost }),
+    ...(style == null ? {} : { style }),
+    ...(useSpeakerBoost == null ? {} : { useSpeakerBoost }),
+    ...(speed == null ? {} : { speed }),
+  };
+  return Object.keys(voiceSettings).length > 0 ? voiceSettings : undefined;
+}
+
+function buildTalkTtsConfig(
+  config: OpenClawConfig,
+):
+  | { cfg: OpenClawConfig; provider: string; providerConfig: TalkProviderConfig }
+  | { error: string } {
+  const resolved = resolveActiveTalkProviderConfig(config.talk);
+  const provider = normalizeSpeechProviderId(resolved?.provider);
+  if (!resolved || !provider) {
+    return { error: "talk.speak unavailable: talk provider not configured" };
+  }
+
+  const baseTts = config.messages?.tts ?? {};
+  const providerConfig = resolved.config;
+  const talkTts: TtsConfig = {
+    ...baseTts,
+    auto: "always",
+    provider,
+  };
+
+  if (provider === "elevenlabs") {
+    talkTts.elevenlabs = {
+      ...baseTts.elevenlabs,
+      ...(providerConfig.apiKey === undefined ? {} : { apiKey: providerConfig.apiKey }),
+      ...(trimString(providerConfig.baseUrl) == null
+        ? {}
+        : { baseUrl: trimString(providerConfig.baseUrl) }),
+      ...(trimString(providerConfig.voiceId) == null
+        ? {}
+        : { voiceId: trimString(providerConfig.voiceId) }),
+      ...(trimString(providerConfig.modelId) == null
+        ? {}
+        : { modelId: trimString(providerConfig.modelId) }),
+      ...(finiteNumber(providerConfig.seed) == null
+        ? {}
+        : { seed: finiteNumber(providerConfig.seed) }),
+      ...(normalizeTextNormalization(providerConfig.applyTextNormalization) == null
+        ? {}
+        : {
+            applyTextNormalization: normalizeTextNormalization(
+              providerConfig.applyTextNormalization,
+            ),
+          }),
+      ...(trimString(providerConfig.languageCode) == null
+        ? {}
+        : { languageCode: trimString(providerConfig.languageCode) }),
+      ...(readTalkVoiceSettings(providerConfig) == null
+        ? {}
+        : { voiceSettings: readTalkVoiceSettings(providerConfig) }),
+    };
+  } else if (provider === "openai") {
+    talkTts.openai = {
+      ...baseTts.openai,
+      ...(providerConfig.apiKey === undefined ? {} : { apiKey: providerConfig.apiKey }),
+      ...(trimString(providerConfig.baseUrl) == null
+        ? {}
+        : { baseUrl: trimString(providerConfig.baseUrl) }),
+      ...(trimString(providerConfig.modelId) == null
+        ? {}
+        : { model: trimString(providerConfig.modelId) }),
+      ...(trimString(providerConfig.voiceId) == null
+        ? {}
+        : { voice: trimString(providerConfig.voiceId) }),
+      ...(finiteNumber(providerConfig.speed) == null
+        ? {}
+        : { speed: finiteNumber(providerConfig.speed) }),
+      ...(trimString(providerConfig.instructions) == null
+        ? {}
+        : { instructions: trimString(providerConfig.instructions) }),
+    };
+  } else if (provider === "microsoft") {
+    talkTts.microsoft = {
+      ...baseTts.microsoft,
+      enabled: true,
+      ...(trimString(providerConfig.voiceId) == null
+        ? {}
+        : { voice: trimString(providerConfig.voiceId) }),
+      ...(trimString(providerConfig.languageCode) == null
+        ? {}
+        : { lang: trimString(providerConfig.languageCode) }),
+      ...(trimString(providerConfig.outputFormat) == null
+        ? {}
+        : { outputFormat: trimString(providerConfig.outputFormat) }),
+      ...(trimString(providerConfig.pitch) == null
+        ? {}
+        : { pitch: trimString(providerConfig.pitch) }),
+      ...(trimString(providerConfig.rate) == null ? {} : { rate: trimString(providerConfig.rate) }),
+      ...(trimString(providerConfig.volume) == null
+        ? {}
+        : { volume: trimString(providerConfig.volume) }),
+      ...(trimString(providerConfig.proxy) == null
+        ? {}
+        : { proxy: trimString(providerConfig.proxy) }),
+      ...(finiteNumber(providerConfig.timeoutMs) == null
+        ? {}
+        : { timeoutMs: finiteNumber(providerConfig.timeoutMs) }),
+    };
+  } else {
+    return { error: `talk.speak unavailable: unsupported talk provider '${resolved.provider}'` };
+  }
+
+  return {
+    provider,
+    providerConfig,
+    cfg: {
+      ...config,
+      messages: {
+        ...config.messages,
+        tts: talkTts,
+      },
+    },
+  };
+}
+
+function buildTalkSpeakOverrides(
+  provider: string,
+  providerConfig: TalkProviderConfig,
+  params: Record<string, unknown>,
+): TtsDirectiveOverrides {
+  const voiceId = resolveTalkVoiceId(providerConfig, trimString(params.voiceId));
+  const modelId = trimString(params.modelId);
+  const speed = finiteNumber(params.speed);
+  const seed = finiteNumber(params.seed);
+  const normalize = normalizeTextNormalization(params.normalize);
+  const language = trimString(params.language)?.toLowerCase();
+  const overrides: TtsDirectiveOverrides = { provider };
+
+  if (provider === "elevenlabs") {
+    const voiceSettings = {
+      ...(speed == null ? {} : { speed }),
+      ...(finiteNumber(params.stability) == null
+        ? {}
+        : { stability: finiteNumber(params.stability) }),
+      ...(finiteNumber(params.similarity) == null
+        ? {}
+        : { similarityBoost: finiteNumber(params.similarity) }),
+      ...(finiteNumber(params.style) == null ? {} : { style: finiteNumber(params.style) }),
+      ...(optionalBoolean(params.speakerBoost) == null
+        ? {}
+        : { useSpeakerBoost: optionalBoolean(params.speakerBoost) }),
+    };
+    overrides.elevenlabs = {
+      ...(voiceId == null ? {} : { voiceId }),
+      ...(modelId == null ? {} : { modelId }),
+      ...(seed == null ? {} : { seed }),
+      ...(normalize == null ? {} : { applyTextNormalization: normalize }),
+      ...(language == null ? {} : { languageCode: language }),
+      ...(Object.keys(voiceSettings).length === 0 ? {} : { voiceSettings }),
+    };
+    return overrides;
+  }
+
+  if (provider === "openai") {
+    overrides.openai = {
+      ...(voiceId == null ? {} : { voice: voiceId }),
+      ...(modelId == null ? {} : { model: modelId }),
+      ...(speed == null ? {} : { speed }),
+    };
+    return overrides;
+  }
+
+  if (provider === "microsoft") {
+    overrides.microsoft = voiceId == null ? undefined : { voice: voiceId };
+  }
+
+  return overrides;
+}
+
+function inferMimeType(
+  outputFormat: string | undefined,
+  fileExtension: string | undefined,
+): string | undefined {
+  const normalizedOutput = outputFormat?.trim().toLowerCase();
+  const normalizedExtension = fileExtension?.trim().toLowerCase();
+  if (
+    normalizedOutput === "mp3" ||
+    normalizedOutput?.startsWith("mp3_") ||
+    normalizedOutput?.endsWith("-mp3") ||
+    normalizedExtension === ".mp3"
+  ) {
+    return "audio/mpeg";
+  }
+  if (
+    normalizedOutput === "opus" ||
+    normalizedOutput?.startsWith("opus_") ||
+    normalizedExtension === ".opus" ||
+    normalizedExtension === ".ogg"
+  ) {
+    return "audio/ogg";
+  }
+  if (normalizedOutput?.endsWith("-wav") || normalizedExtension === ".wav") {
+    return "audio/wav";
+  }
+  if (normalizedOutput?.endsWith("-webm") || normalizedExtension === ".webm") {
+    return "audio/webm";
+  }
+  return undefined;
+}
+
 export const talkHandlers: GatewayRequestHandlers = {
   "talk.config": async ({ params, respond, client }) => {
     if (!validateTalkConfigParams(params)) {
@@ -65,6 +339,65 @@ export const talkHandlers: GatewayRequestHandlers = {
 
     respond(true, { config: configPayload }, undefined);
   },
+  "talk.speak": async ({ params, respond }) => {
+    if (!validateTalkSpeakParams(params)) {
+      respond(
+        false,
+        undefined,
+        errorShape(
+          ErrorCodes.INVALID_REQUEST,
+          `invalid talk.speak params: ${formatValidationErrors(validateTalkSpeakParams.errors)}`,
+        ),
+      );
+      return;
+    }
+
+    const text = trimString((params as { text?: unknown }).text);
+    if (!text) {
+      respond(false, undefined, errorShape(ErrorCodes.INVALID_REQUEST, "talk.speak requires text"));
+      return;
+    }
+
+    try {
+      const snapshot = await readConfigFileSnapshot();
+      const setup = buildTalkTtsConfig(snapshot.config);
+      if ("error" in setup) {
+        respond(false, undefined, errorShape(ErrorCodes.UNAVAILABLE, setup.error));
+        return;
+      }
+
+      const overrides = buildTalkSpeakOverrides(setup.provider, setup.providerConfig, params);
+      const result = await synthesizeSpeech({
+        text,
+        cfg: setup.cfg,
+        overrides,
+        disableFallback: true,
+      });
+      if (!result.success || !result.audioBuffer) {
+        respond(
+          false,
+          undefined,
+          errorShape(ErrorCodes.UNAVAILABLE, result.error ?? "talk synthesis failed"),
+        );
+        return;
+      }
+
+      respond(
+        true,
+        {
+          audioBase64: result.audioBuffer.toString("base64"),
+          provider: result.provider ?? setup.provider,
+          outputFormat: result.outputFormat,
+          voiceCompatible: result.voiceCompatible,
+          mimeType: inferMimeType(result.outputFormat, result.fileExtension),
+          fileExtension: result.fileExtension,
+        },
+        undefined,
+      );
+    } catch (err) {
+      respond(false, undefined, errorShape(ErrorCodes.UNAVAILABLE, formatForLog(err)));
+    }
+  },
   "talk.mode": ({ params, respond, context, client, isWebchatConnect }) => {
     if (client && isWebchatConnect(client.connect) && !context.hasConnectedMobileNode()) {
       respond(
diff --git a/src/gateway/server.talk-config.test.ts b/src/gateway/server.talk-config.test.ts
index a47addbb0e0..eb2925db158 100644
--- a/src/gateway/server.talk-config.test.ts
+++ b/src/gateway/server.talk-config.test.ts
@@ -1,6 +1,6 @@
 import os from "node:os";
 import path from "node:path";
-import { describe, expect, it } from "vitest";
+import { describe, expect, it, vi } from "vitest";
 import {
   loadOrCreateDeviceIdentity,
   publicKeyRawBase64UrlFromPem,
@@ -41,6 +41,13 @@ type TalkConfigPayload = {
   };
 };
 type TalkConfig = NonNullable<NonNullable<TalkConfigPayload["config"]>["talk"]>;
+type TalkSpeakPayload = {
+  audioBase64?: string;
+  provider?: string;
+  outputFormat?: string;
+  mimeType?: string;
+  fileExtension?: string;
+};
 const TALK_CONFIG_DEVICE_PATH = path.join(
   os.tmpdir(),
   `openclaw-talk-config-device-${process.pid}.json`,
@@ -95,6 +102,10 @@ async function fetchTalkConfig(
   return rpcReq<TalkConfigPayload>(ws, "talk.config", params ?? {});
 }
 
+async function fetchTalkSpeak(ws: GatewaySocket, params: Record<string, unknown>) {
+  return rpcReq<TalkSpeakPayload>(ws, "talk.speak", params);
+}
+
 function expectElevenLabsTalkConfig(
   talk: TalkConfig | undefined,
   expected: {
@@ -236,4 +247,58 @@ describe("gateway talk.config", () => {
       });
     });
   });
+
+  it("synthesizes talk audio via the active talk provider", async () => {
+    const { writeConfigFile } = await import("../config/config.js");
+    await writeConfigFile({
+      talk: {
+        provider: "openai",
+        providers: {
+          openai: {
+            apiKey: "openai-talk-key", // pragma: allowlist secret
+            voiceId: "alloy",
+            modelId: "gpt-4o-mini-tts",
+          },
+        },
+      },
+    });
+
+    const originalFetch = globalThis.fetch;
+    const requestInits: RequestInit[] = [];
+    const fetchMock = vi.fn(async (_input: RequestInfo | URL, init?: RequestInit) => {
+      if (init) {
+        requestInits.push(init);
+      }
+      return new Response(new Uint8Array([1, 2, 3]), { status: 200 });
+    });
+    globalThis.fetch = fetchMock as typeof fetch;
+
+    try {
+      await withServer(async (ws) => {
+        await connectOperator(ws, ["operator.read", "operator.write"]);
+        const res = await fetchTalkSpeak(ws, {
+          text: "Hello from talk mode.",
+          voiceId: "nova",
+          modelId: "tts-1",
+          speed: 1.25,
+        });
+        expect(res.ok).toBe(true);
+        expect(res.payload?.provider).toBe("openai");
+        expect(res.payload?.outputFormat).toBe("mp3");
+        expect(res.payload?.mimeType).toBe("audio/mpeg");
+        expect(res.payload?.fileExtension).toBe(".mp3");
+        expect(res.payload?.audioBase64).toBe(Buffer.from([1, 2, 3]).toString("base64"));
+      });
+
+      expect(fetchMock).toHaveBeenCalled();
+      const requestInit = requestInits.find((init) => typeof init.body === "string");
+      expect(requestInit).toBeDefined();
+      const body = JSON.parse(requestInit?.body as string) as Record<string, unknown>;
+      expect(body.model).toBe("tts-1");
+      expect(body.voice).toBe("nova");
+      expect(body.speed).toBe(1.25);
+    } finally {
+      globalThis.fetch = originalFetch;
+    }
+  });
 });

From f7fe75a68bb28ed2cf8631264991d52f20e219b0 Mon Sep 17 00:00:00 2001
From: Ayaan Zaidi <hi@obviy.us>
Date: Fri, 20 Mar 2026 10:27:48 +0530
Subject: [PATCH 04/11] refactor(android): simplify talk config parsing

---
 .../app/voice/TalkModeGatewayConfig.kt        | 119 +----------------
 .../app/voice/TalkModeConfigContractTest.kt   | 100 ---------------
 .../app/voice/TalkModeConfigParsingTest.kt    | 120 ++----------------
 3 files changed, 15 insertions(+), 324 deletions(-)
 delete mode 100644 apps/android/app/src/test/java/ai/openclaw/app/voice/TalkModeConfigContractTest.kt

diff --git a/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeGatewayConfig.kt b/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeGatewayConfig.kt
index 58208acc0bb..d0545b2baf0 100644
--- a/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeGatewayConfig.kt
+++ b/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeGatewayConfig.kt
@@ -4,116 +4,23 @@ import ai.openclaw.app.normalizeMainKey
 import kotlinx.serialization.json.JsonElement
 import kotlinx.serialization.json.JsonObject
 import kotlinx.serialization.json.JsonPrimitive
-import kotlinx.serialization.json.buildJsonObject
 import kotlinx.serialization.json.booleanOrNull
 import kotlinx.serialization.json.contentOrNull
 
-internal data class TalkProviderConfigSelection(
-  val provider: String,
-  val config: JsonObject,
-  val normalizedPayload: Boolean,
-)
-
 internal data class TalkModeGatewayConfigState(
-  val activeProvider: String,
-  val normalizedPayload: Boolean,
-  val missingResolvedPayload: Boolean,
   val mainSessionKey: String,
-  val defaultVoiceId: String?,
-  val voiceAliases: Map<String, String>,
-  val defaultModelId: String,
-  val defaultOutputFormat: String,
-  val apiKey: String?,
   val interruptOnSpeech: Boolean?,
   val silenceTimeoutMs: Long,
 )
 
 internal object TalkModeGatewayConfigParser {
-  private const val defaultTalkProvider = "elevenlabs"
-
-  fun parse(
-    config: JsonObject?,
-    defaultProvider: String,
-    defaultModelIdFallback: String,
-    defaultOutputFormatFallback: String,
-    envVoice: String?,
-    sagVoice: String?,
-    envKey: String?,
-  ): TalkModeGatewayConfigState {
+  fun parse(config: JsonObject?): TalkModeGatewayConfigState {
     val talk = config?.get("talk").asObjectOrNull()
-    val selection = selectTalkProviderConfig(talk)
-    val activeProvider = selection?.provider ?: defaultProvider
-    val activeConfig = selection?.config
     val sessionCfg = config?.get("session").asObjectOrNull()
-    val mainKey = normalizeMainKey(sessionCfg?.get("mainKey").asStringOrNull())
-    val voice = activeConfig?.get("voiceId")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
-    val aliases =
-      activeConfig?.get("voiceAliases").asObjectOrNull()?.entries?.mapNotNull { (key, value) ->
-        val id = value.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() } ?: return@mapNotNull null
-        normalizeTalkAliasKey(key).takeIf { it.isNotEmpty() }?.let { it to id }
-      }?.toMap().orEmpty()
-    val model = activeConfig?.get("modelId")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
-    val outputFormat =
-      activeConfig?.get("outputFormat")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
-    val key = activeConfig?.get("apiKey")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
-    val interrupt = talk?.get("interruptOnSpeech")?.asBooleanOrNull()
-    val silenceTimeoutMs = resolvedSilenceTimeoutMs(talk)
-
     return TalkModeGatewayConfigState(
-      activeProvider = activeProvider,
-      normalizedPayload = selection?.normalizedPayload == true,
-      missingResolvedPayload = talk != null && selection == null,
-      mainSessionKey = mainKey,
-      defaultVoiceId =
-        if (activeProvider == defaultProvider) {
-          voice ?: envVoice?.takeIf { it.isNotEmpty() } ?: sagVoice?.takeIf { it.isNotEmpty() }
-        } else {
-          voice
-        },
-      voiceAliases = aliases,
-      defaultModelId = model ?: defaultModelIdFallback,
-      defaultOutputFormat = outputFormat ?: defaultOutputFormatFallback,
-      apiKey = key ?: envKey?.takeIf { it.isNotEmpty() },
-      interruptOnSpeech = interrupt,
-      silenceTimeoutMs = silenceTimeoutMs,
-    )
-  }
-
-  fun fallback(
-    defaultProvider: String,
-    defaultModelIdFallback: String,
-    defaultOutputFormatFallback: String,
-    envVoice: String?,
-    sagVoice: String?,
-    envKey: String?,
-  ): TalkModeGatewayConfigState =
-    TalkModeGatewayConfigState(
-      activeProvider = defaultProvider,
-      normalizedPayload = false,
-      missingResolvedPayload = false,
-      mainSessionKey = "main",
-      defaultVoiceId = envVoice?.takeIf { it.isNotEmpty() } ?: sagVoice?.takeIf { it.isNotEmpty() },
-      voiceAliases = emptyMap(),
-      defaultModelId = defaultModelIdFallback,
-      defaultOutputFormat = defaultOutputFormatFallback,
-      apiKey = envKey?.takeIf { it.isNotEmpty() },
-      interruptOnSpeech = null,
-      silenceTimeoutMs = TalkDefaults.defaultSilenceTimeoutMs,
-    )
-
-  fun selectTalkProviderConfig(talk: JsonObject?): TalkProviderConfigSelection? {
-    if (talk == null) return null
-    selectResolvedTalkProviderConfig(talk)?.let { return it }
-    val rawProvider = talk["provider"].asStringOrNull()
-    val rawProviders = talk["providers"].asObjectOrNull()
-    val hasNormalizedPayload = rawProvider != null || rawProviders != null
-    if (hasNormalizedPayload) {
-      return null
-    }
-    return TalkProviderConfigSelection(
-      provider = defaultTalkProvider,
-      config = talk,
-      normalizedPayload = false,
+      mainSessionKey = normalizeMainKey(sessionCfg?.get("mainKey").asStringOrNull()),
+      interruptOnSpeech = talk?.get("interruptOnSpeech").asBooleanOrNull(),
+      silenceTimeoutMs = resolvedSilenceTimeoutMs(talk),
     )
   }
 
@@ -127,26 +34,8 @@ internal object TalkModeGatewayConfigParser {
     }
     return timeout.toLong()
   }
-
-  private fun selectResolvedTalkProviderConfig(talk: JsonObject): TalkProviderConfigSelection? {
-    val resolved = talk["resolved"].asObjectOrNull() ?: return null
-    val providerId = normalizeTalkProviderId(resolved["provider"].asStringOrNull()) ?: return null
-    return TalkProviderConfigSelection(
-      provider = providerId,
-      config = resolved["config"].asObjectOrNull() ?: buildJsonObject {},
-      normalizedPayload = true,
-    )
-  }
-
-  private fun normalizeTalkProviderId(raw: String?): String? {
-    val trimmed = raw?.trim()?.lowercase().orEmpty()
-    return trimmed.takeIf { it.isNotEmpty() }
-  }
 }
 
-private fun normalizeTalkAliasKey(value: String): String =
-  value.trim().lowercase()
-
 private fun JsonElement?.asStringOrNull(): String? =
   this?.let { element ->
     element as? JsonPrimitive
diff --git a/apps/android/app/src/test/java/ai/openclaw/app/voice/TalkModeConfigContractTest.kt b/apps/android/app/src/test/java/ai/openclaw/app/voice/TalkModeConfigContractTest.kt
deleted file mode 100644
index ca9be8b1280..00000000000
--- a/apps/android/app/src/test/java/ai/openclaw/app/voice/TalkModeConfigContractTest.kt
+++ /dev/null
@@ -1,100 +0,0 @@
-package ai.openclaw.app.voice
-
-import java.io.File
-import kotlinx.serialization.SerialName
-import kotlinx.serialization.Serializable
-import kotlinx.serialization.json.Json
-import kotlinx.serialization.json.JsonObject
-import kotlinx.serialization.json.JsonPrimitive
-import org.junit.Assert.assertEquals
-import org.junit.Assert.assertNotNull
-import org.junit.Assert.assertNull
-import org.junit.Test
-
-@Serializable
-private data class TalkConfigContractFixture(
-  @SerialName("selectionCases") val selectionCases: List<SelectionCase>,
-  @SerialName("timeoutCases") val timeoutCases: List<TimeoutCase>,
-) {
-  @Serializable
-  data class SelectionCase(
-    val id: String,
-    val defaultProvider: String,
-    val payloadValid: Boolean,
-    val expectedSelection: ExpectedSelection? = null,
-    val talk: JsonObject,
-  )
-
-  @Serializable
-  data class ExpectedSelection(
-    val provider: String,
-    val normalizedPayload: Boolean,
-    val voiceId: String? = null,
-    val apiKey: String? = null,
-  )
-
-  @Serializable
-  data class TimeoutCase(
-    val id: String,
-    val fallback: Long,
-    val expectedTimeoutMs: Long,
-    val talk: JsonObject,
-  )
-}
-
-class TalkModeConfigContractTest {
-  private val json = Json { ignoreUnknownKeys = true }
-
-  @Test
-  fun selectionFixtures() {
-    for (fixture in loadFixtures().selectionCases) {
-      val selection = TalkModeGatewayConfigParser.selectTalkProviderConfig(fixture.talk)
-      val expected = fixture.expectedSelection
-      if (expected == null) {
-        assertNull(fixture.id, selection)
-        continue
-      }
-      assertNotNull(fixture.id, selection)
-      assertEquals(fixture.id, expected.provider, selection?.provider)
-      assertEquals(fixture.id, expected.normalizedPayload, selection?.normalizedPayload)
-      assertEquals(
-        fixture.id,
-        expected.voiceId,
-        (selection?.config?.get("voiceId") as? JsonPrimitive)?.content,
-      )
-      assertEquals(
-        fixture.id,
-        expected.apiKey,
-        (selection?.config?.get("apiKey") as? JsonPrimitive)?.content,
-      )
-      assertEquals(fixture.id, true, fixture.payloadValid)
-    }
-  }
-
-  @Test
-  fun timeoutFixtures() {
-    for (fixture in loadFixtures().timeoutCases) {
-      val timeout = TalkModeGatewayConfigParser.resolvedSilenceTimeoutMs(fixture.talk)
-      assertEquals(fixture.id, fixture.expectedTimeoutMs, timeout)
-      assertEquals(fixture.id, TalkDefaults.defaultSilenceTimeoutMs, fixture.fallback)
-    }
-  }
-
-  private fun loadFixtures(): TalkConfigContractFixture {
-    val fixturePath = findFixtureFile()
-    return json.decodeFromString(File(fixturePath).readText())
-  }
-
-  private fun findFixtureFile(): String {
-    val startDir = System.getProperty("user.dir") ?: error("user.dir unavailable")
-    var current = File(startDir).absoluteFile
-    while (true) {
-      val candidate = File(current, "test-fixtures/talk-config-contract.json")
-      if (candidate.exists()) {
-        return candidate.absolutePath
-      }
-      current = current.parentFile ?: break
-    }
-    error("talk-config-contract.json not found from $startDir")
-  }
-}
diff --git a/apps/android/app/src/test/java/ai/openclaw/app/voice/TalkModeConfigParsingTest.kt b/apps/android/app/src/test/java/ai/openclaw/app/voice/TalkModeConfigParsingTest.kt
index e9c46231961..79f0cb94074 100644
--- a/apps/android/app/src/test/java/ai/openclaw/app/voice/TalkModeConfigParsingTest.kt
+++ b/apps/android/app/src/test/java/ai/openclaw/app/voice/TalkModeConfigParsingTest.kt
@@ -2,135 +2,37 @@ package ai.openclaw.app.voice
 
 import kotlinx.serialization.json.Json
 import kotlinx.serialization.json.buildJsonObject
-import kotlinx.serialization.json.jsonPrimitive
 import kotlinx.serialization.json.jsonObject
 import kotlinx.serialization.json.put
 import org.junit.Assert.assertEquals
-import org.junit.Assert.assertNotNull
-import org.junit.Assert.assertTrue
 import org.junit.Test
 
 class TalkModeConfigParsingTest {
   private val json = Json { ignoreUnknownKeys = true }
 
   @Test
-  fun prefersCanonicalResolvedTalkProviderPayload() {
-    val talk =
+  fun readsMainSessionKeyAndInterruptFlag() {
+    val config =
       json.parseToJsonElement(
           """
           {
-            "resolved": {
-              "provider": "elevenlabs",
-              "config": {
-                "voiceId": "voice-resolved"
-              }
+            "talk": {
+              "interruptOnSpeech": true,
+              "silenceTimeoutMs": 1800
             },
-            "provider": "elevenlabs",
-            "providers": {
-              "elevenlabs": {
-                "voiceId": "voice-normalized"
-              }
+            "session": {
+              "mainKey": "voice-main"
             }
           }
           """.trimIndent(),
         )
         .jsonObject
 
-    val selection = TalkModeGatewayConfigParser.selectTalkProviderConfig(talk)
-    assertNotNull(selection)
-    assertEquals("elevenlabs", selection?.provider)
-    assertTrue(selection?.normalizedPayload == true)
-    assertEquals("voice-resolved", selection?.config?.get("voiceId")?.jsonPrimitive?.content)
-  }
+    val parsed = TalkModeGatewayConfigParser.parse(config)
 
-  @Test
-  fun prefersNormalizedTalkProviderPayload() {
-    val talk =
-      json.parseToJsonElement(
-          """
-          {
-            "provider": "elevenlabs",
-            "providers": {
-              "elevenlabs": {
-                "voiceId": "voice-normalized"
-              }
-            },
-            "voiceId": "voice-legacy"
-          }
-          """.trimIndent(),
-        )
-        .jsonObject
-
-    val selection = TalkModeGatewayConfigParser.selectTalkProviderConfig(talk)
-    assertEquals(null, selection)
-  }
-
-  @Test
-  fun rejectsNormalizedTalkProviderPayloadWhenProviderMissingFromProviders() {
-    val talk =
-      json.parseToJsonElement(
-          """
-          {
-            "provider": "acme",
-            "providers": {
-              "elevenlabs": {
-                "voiceId": "voice-normalized"
-              }
-            }
-          }
-          """.trimIndent(),
-        )
-        .jsonObject
-
-    val selection = TalkModeGatewayConfigParser.selectTalkProviderConfig(talk)
-    assertEquals(null, selection)
-  }
-
-  @Test
-  fun rejectsNormalizedTalkProviderPayloadWhenProviderIsAmbiguous() {
-    val talk =
-      json.parseToJsonElement(
-          """
-          {
-            "providers": {
-              "acme": {
-                "voiceId": "voice-acme"
-              },
-              "elevenlabs": {
-                "voiceId": "voice-normalized"
-              }
-            }
-          }
-          """.trimIndent(),
-        )
-        .jsonObject
-
-    val selection = TalkModeGatewayConfigParser.selectTalkProviderConfig(talk)
-    assertEquals(null, selection)
-  }
-
-  @Test
-  fun fallsBackToLegacyTalkFieldsWhenNormalizedPayloadMissing() {
-    val legacyApiKey = "legacy-key" // pragma: allowlist secret
-    val talk =
-      buildJsonObject {
-        put("voiceId", "voice-legacy")
-        put("apiKey", legacyApiKey) // pragma: allowlist secret
-      }
-
-    val selection = TalkModeGatewayConfigParser.selectTalkProviderConfig(talk)
-    assertNotNull(selection)
-    assertEquals("elevenlabs", selection?.provider)
-    assertTrue(selection?.normalizedPayload == false)
-    assertEquals("voice-legacy", selection?.config?.get("voiceId")?.jsonPrimitive?.content)
-    assertEquals("legacy-key", selection?.config?.get("apiKey")?.jsonPrimitive?.content)
-  }
-
-  @Test
-  fun readsConfiguredSilenceTimeoutMs() {
-    val talk = buildJsonObject { put("silenceTimeoutMs", 1500) }
-
-    assertEquals(1500L, TalkModeGatewayConfigParser.resolvedSilenceTimeoutMs(talk))
+    assertEquals("voice-main", parsed.mainSessionKey)
+    assertEquals(true, parsed.interruptOnSpeech)
+    assertEquals(1800L, parsed.silenceTimeoutMs)
   }
 
   @Test

From e3afaca1a61de4a821518024599fee0c9dcff228 Mon Sep 17 00:00:00 2001
From: Ayaan Zaidi <hi@obviy.us>
Date: Fri, 20 Mar 2026 10:28:28 +0530
Subject: [PATCH 05/11] refactor(android): route talk playback through gateway

---
 .../ai/openclaw/app/voice/TalkModeManager.kt  | 943 ++----------------
 1 file changed, 106 insertions(+), 837 deletions(-)

diff --git a/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeManager.kt b/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeManager.kt
index 70b6113fc35..4ba2c2ef043 100644
--- a/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeManager.kt
+++ b/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeManager.kt
@@ -6,9 +6,7 @@ import android.content.Intent
 import android.content.pm.PackageManager
 import android.media.AudioAttributes
 import android.media.AudioFocusRequest
-import android.media.AudioFormat
 import android.media.AudioManager
-import android.media.AudioTrack
 import android.media.MediaPlayer
 import android.os.Bundle
 import android.os.Handler
@@ -17,16 +15,12 @@ import android.os.SystemClock
 import android.speech.RecognitionListener
 import android.speech.RecognizerIntent
 import android.speech.SpeechRecognizer
-import android.speech.tts.TextToSpeech
-import android.speech.tts.UtteranceProgressListener
+import android.util.Base64
 import android.util.Log
 import androidx.core.content.ContextCompat
 import ai.openclaw.app.gateway.GatewaySession
 import ai.openclaw.app.isCanonicalMainSessionKey
-import ai.openclaw.app.normalizeMainKey
 import java.io.File
-import java.net.HttpURLConnection
-import java.net.URL
 import java.util.UUID
 import java.util.concurrent.atomic.AtomicLong
 import kotlinx.coroutines.CancellationException
@@ -46,7 +40,6 @@ import kotlinx.serialization.json.JsonElement
 import kotlinx.serialization.json.JsonObject
 import kotlinx.serialization.json.JsonPrimitive
 import kotlinx.serialization.json.buildJsonObject
-import kotlin.math.max
 
 class TalkModeManager(
   private val context: Context,
@@ -57,9 +50,6 @@ class TalkModeManager(
 ) {
   companion object {
     private const val tag = "TalkMode"
-    private const val defaultModelIdFallback = "eleven_v3"
-    private const val defaultOutputFormatFallback = "pcm_24000"
-    private const val defaultTalkProvider = "elevenlabs"
     private const val listenWatchdogMs = 12_000L
     private const val chatFinalWaitWithSubscribeMs = 45_000L
     private const val chatFinalWaitWithoutSubscribeMs = 6_000L
@@ -84,9 +74,6 @@ class TalkModeManager(
   private val _lastAssistantText = MutableStateFlow<String?>(null)
   val lastAssistantText: StateFlow<String?> = _lastAssistantText
 
-  private val _usingFallbackTts = MutableStateFlow(false)
-  val usingFallbackTts: StateFlow<Boolean> = _usingFallbackTts
-
   private var recognizer: SpeechRecognizer? = null
   private var restartJob: Job? = null
   private var stopRequested = false
@@ -99,21 +86,11 @@ class TalkModeManager(
   private var lastSpokenText: String? = null
   private var lastInterruptedAtSeconds: Double? = null
 
-  private var defaultVoiceId: String? = null
   private var currentVoiceId: String? = null
-  private var fallbackVoiceId: String? = null
-  private var defaultModelId: String? = null
   private var currentModelId: String? = null
-  private var defaultOutputFormat: String? = null
-  private var apiKey: String? = null
-  private var voiceAliases: Map<String, String> = emptyMap()
   // Interrupt-on-speech is disabled by default: starting a SpeechRecognizer during
-  // TTS creates an audio session conflict on OxygenOS/OnePlus that causes AudioTrack
-  // write to return 0 and MediaPlayer to error. Can be enabled via gateway talk config.
-  private var activeProviderIsElevenLabs: Boolean = true
+  // TTS creates an audio session conflict on some OEMs. Can be enabled via gateway talk config.
   private var interruptOnSpeech: Boolean = false
-  private var voiceOverrideActive = false
-  private var modelOverrideActive = false
   private var mainSessionKey: String = "main"
 
   @Volatile private var pendingRunId: String? = null
@@ -128,14 +105,8 @@ class TalkModeManager(
 
   private var ttsJob: Job? = null
   private var player: MediaPlayer? = null
-  private var streamingSource: StreamingMediaDataSource? = null
-  private var pcmTrack: AudioTrack? = null
-  @Volatile private var pcmStopRequested = false
   @Volatile private var finalizeInFlight = false
   private var listenWatchdogJob: Job? = null
-  private var systemTts: TextToSpeech? = null
-  private var systemTtsPending: CompletableDeferred<Unit>? = null
-  private var systemTtsPendingId: String? = null
 
   private var audioFocusRequest: AudioFocusRequest? = null
   private val audioFocusListener = AudioManager.OnAudioFocusChangeListener { focusChange ->
@@ -208,118 +179,6 @@ class TalkModeManager(
   /** When true, play TTS for all final chat responses (even ones we didn't initiate). */
   @Volatile var ttsOnAllResponses = false
 
-  // Streaming TTS: active session keyed by runId
-  private var streamingTts: ElevenLabsStreamingTts? = null
-  private var streamingFullText: String = ""
-  @Volatile private var lastHandledStreamingRunId: String? = null
-  private var drainingTts: ElevenLabsStreamingTts? = null
-
-  private fun stopActiveStreamingTts() {
-    streamingTts?.stop()
-    streamingTts = null
-    drainingTts?.stop()
-    drainingTts = null
-    streamingFullText = ""
-  }
-
-  /** Handle agent stream events — only speak assistant text, not tool calls or thinking. */
-  private fun handleAgentStreamEvent(payloadJson: String?) {
-    if (payloadJson.isNullOrBlank()) return
-    val payload = try {
-      json.parseToJsonElement(payloadJson).asObjectOrNull()
-    } catch (_: Throwable) { null } ?: return
-
-    // Only speak events for the active session — prevents TTS leaking from
-    // concurrent sessions/channels (privacy + correctness).
-    val eventSession = payload["sessionKey"]?.asStringOrNull()
-    val activeSession = mainSessionKey.ifBlank { "main" }
-    if (eventSession != null && eventSession != activeSession) return
-
-    val stream = payload["stream"]?.asStringOrNull() ?: return
-    if (stream != "assistant") return  // Only speak assistant text
-    val data = payload["data"]?.asObjectOrNull() ?: return
-    if (data["type"]?.asStringOrNull() == "thinking") return  // Skip thinking tokens
-    val text = data["text"]?.asStringOrNull()?.trim() ?: return
-    if (text.isEmpty()) return
-    if (!playbackEnabled) {
-      stopActiveStreamingTts()
-      return
-    }
-
-    // Start streaming session if not already active
-    if (streamingTts == null) {
-      if (!activeProviderIsElevenLabs) return  // Non-ElevenLabs provider — skip streaming TTS
-      val voiceId = currentVoiceId ?: defaultVoiceId
-      val apiKey = this.apiKey
-      if (voiceId == null || apiKey == null) {
-        Log.w(tag, "streaming TTS: missing voiceId or apiKey")
-        return
-      }
-      val modelId = currentModelId ?: defaultModelId ?: ""
-      val streamModel = if (ElevenLabsStreamingTts.supportsStreaming(modelId)) {
-        modelId
-      } else {
-        "eleven_flash_v2_5"
-      }
-      val tts = ElevenLabsStreamingTts(
-        scope = scope,
-        voiceId = voiceId,
-        apiKey = apiKey,
-        modelId = streamModel,
-        outputFormat = "pcm_24000",
-        sampleRate = 24000,
-      )
-      streamingTts = tts
-      streamingFullText = ""
-      _isSpeaking.value = true
-      _statusText.value = "Speaking…"
-      tts.start()
-      Log.d(tag, "streaming TTS started for agent assistant text")
-      lastHandledStreamingRunId = null  // will be set on final
-    }
-
-    val accepted = streamingTts?.sendText(text) ?: false
-    if (!accepted && streamingTts != null) {
-      Log.d(tag, "text diverged, restarting streaming TTS")
-      streamingTts?.stop()
-      streamingTts = null
-      // Restart with the new text
-      val voiceId2 = currentVoiceId ?: defaultVoiceId
-      val apiKey2 = this.apiKey
-      if (voiceId2 != null && apiKey2 != null) {
-        val modelId2 = currentModelId ?: defaultModelId ?: ""
-        val streamModel2 = if (ElevenLabsStreamingTts.supportsStreaming(modelId2)) modelId2 else "eleven_flash_v2_5"
-        val newTts = ElevenLabsStreamingTts(
-          scope = scope, voiceId = voiceId2, apiKey = apiKey2,
-          modelId = streamModel2, outputFormat = "pcm_24000", sampleRate = 24000,
-        )
-        streamingTts = newTts
-        streamingFullText = text
-        newTts.start()
-        newTts.sendText(streamingFullText)
-        Log.d(tag, "streaming TTS restarted with new text")
-      }
-    }
-  }
-
-  /** Called when chat final/error/aborted arrives — finish any active streaming TTS. */
-  private fun finishStreamingTts() {
-    streamingFullText = ""
-    val tts = streamingTts ?: return
-    // Null out immediately so the next response creates a fresh TTS instance.
-    // The drain coroutine below holds a reference to this instance for cleanup.
-    streamingTts = null
-    drainingTts = tts
-    tts.finish()
-    scope.launch {
-      delay(500)
-      while (tts.isPlaying.value) { delay(200) }
-      if (drainingTts === tts) drainingTts = null
-      _isSpeaking.value = false
-      _statusText.value = "Ready"
-    }
-  }
-
   fun playTtsForText(text: String) {
     val playbackToken = playbackGeneration.incrementAndGet()
     ttsJob?.cancel()
@@ -338,7 +197,6 @@ class TalkModeManager(
       Log.d(tag, "gateway event: $event")
     }
     if (event == "agent" && ttsOnAllResponses) {
-      handleAgentStreamEvent(payloadJson)
       return
     }
     if (event != "chat") return
@@ -362,27 +220,10 @@ class TalkModeManager(
     // Otherwise, if ttsOnAllResponses, finish streaming TTS on terminal events.
     val pending = pendingRunId
     if (pending == null || runId != pending) {
-      if (ttsOnAllResponses && state in listOf("final", "error", "aborted")) {
-        // Skip if we already handled TTS for this run (multiple final events
-        // can arrive on different threads for the same run).
-        if (lastHandledStreamingRunId == runId) {
-          if (pending == null || runId != pending) return
-        }
-        lastHandledStreamingRunId = runId
-        val stts = streamingTts
-        if (stts != null) {
-          // Finish streaming and let the drain coroutine handle playback completion.
-          // Don’t check hasReceivedAudio synchronously — audio may still be in flight
-          // from the WebSocket (EOS was just sent). The drain coroutine in finishStreamingTts
-          // waits for playback to complete; if ElevenLabs truly fails, the user just won’t
-          // hear anything (silent failure is better than double-speaking with system TTS).
-          finishStreamingTts()
-        } else if (state == "final") {
-          // No streaming was active — fall back to non-streaming
-          val text = extractTextFromChatEventMessage(obj["message"])
-          if (!text.isNullOrBlank()) {
-            playTtsForText(text)
-          }
+      if (ttsOnAllResponses && state == "final") {
+        val text = extractTextFromChatEventMessage(obj["message"])
+        if (!text.isNullOrBlank()) {
+          playTtsForText(text)
         }
       }
       if (pending == null || runId != pending) return
@@ -419,7 +260,6 @@ class TalkModeManager(
     playbackEnabled = enabled
     if (!enabled) {
       playbackGeneration.incrementAndGet()
-      stopActiveStreamingTts()
       stopSpeaking()
     }
   }
@@ -485,7 +325,6 @@ class TalkModeManager(
     _isListening.value = false
     _statusText.value = "Off"
     stopSpeaking()
-    _usingFallbackTts.value = false
     chatSubscribedSessionKey = null
     pendingRunId = null
     pendingFinal?.cancel()
@@ -500,10 +339,6 @@ class TalkModeManager(
       recognizer?.destroy()
       recognizer = null
     }
-    systemTts?.stop()
-    systemTtsPending?.cancel()
-    systemTtsPending = null
-    systemTtsPendingId = null
   }
 
   private fun startListeningInternal(markListening: Boolean) {
@@ -813,59 +648,19 @@ class TalkModeManager(
     _lastAssistantText.value = cleaned
 
     val requestedVoice = directive?.voiceId?.trim()?.takeIf { it.isNotEmpty() }
-    val resolvedVoice = TalkModeVoiceResolver.resolveVoiceAlias(requestedVoice, voiceAliases)
-    if (requestedVoice != null && resolvedVoice == null) {
-      Log.w(tag, "unknown voice alias: $requestedVoice")
-    }
 
     if (directive?.voiceId != null) {
       if (directive.once != true) {
-        currentVoiceId = resolvedVoice
-        voiceOverrideActive = true
+        currentVoiceId = requestedVoice
       }
     }
     if (directive?.modelId != null) {
       if (directive.once != true) {
-        currentModelId = directive.modelId
-        modelOverrideActive = true
+        currentModelId = directive.modelId?.trim()?.takeIf { it.isNotEmpty() }
       }
     }
     ensurePlaybackActive(playbackToken)
 
-    val apiKey =
-      apiKey?.trim()?.takeIf { it.isNotEmpty() }
-        ?: System.getenv("ELEVENLABS_API_KEY")?.trim()
-    val preferredVoice = resolvedVoice ?: currentVoiceId ?: defaultVoiceId
-    val resolvedPlaybackVoice =
-      if (!apiKey.isNullOrEmpty()) {
-        try {
-          TalkModeVoiceResolver.resolveVoiceId(
-            preferred = preferredVoice,
-            fallbackVoiceId = fallbackVoiceId,
-            defaultVoiceId = defaultVoiceId,
-            currentVoiceId = currentVoiceId,
-            voiceOverrideActive = voiceOverrideActive,
-            listVoices = { TalkModeVoiceResolver.listVoices(apiKey, json) },
-          )
-        } catch (err: Throwable) {
-          Log.w(tag, "list voices failed: ${err.message ?: err::class.simpleName}")
-          null
-        }
-      } else {
-        null
-      }
-    resolvedPlaybackVoice?.let { resolved ->
-      fallbackVoiceId = resolved.fallbackVoiceId
-      defaultVoiceId = resolved.defaultVoiceId
-      currentVoiceId = resolved.currentVoiceId
-      resolved.selectedVoiceName?.let { name ->
-        resolved.voiceId?.let { voiceId ->
-          Log.d(tag, "default voice selected $name ($voiceId)")
-        }
-      }
-    }
-    val voiceId = resolvedPlaybackVoice?.voiceId
-
     _statusText.value = "Speaking…"
     _isSpeaking.value = true
     lastSpokenText = cleaned
@@ -873,210 +668,99 @@ class TalkModeManager(
     requestAudioFocusForTts()
 
     try {
-      val canUseElevenLabs = !voiceId.isNullOrBlank() && !apiKey.isNullOrEmpty()
-      if (!canUseElevenLabs) {
-        if (voiceId.isNullOrBlank()) {
-          Log.w(tag, "missing voiceId; falling back to system voice")
-        }
-        if (apiKey.isNullOrEmpty()) {
-          Log.w(tag, "missing ELEVENLABS_API_KEY; falling back to system voice")
-        }
-        ensurePlaybackActive(playbackToken)
-        _usingFallbackTts.value = true
-        _statusText.value = "Speaking (System)…"
-        speakWithSystemTts(cleaned, playbackToken)
-      } else {
-        _usingFallbackTts.value = false
-        val ttsStarted = SystemClock.elapsedRealtime()
-        val modelId = directive?.modelId ?: currentModelId ?: defaultModelId
-        val request =
-          ElevenLabsRequest(
-            text = cleaned,
-            modelId = modelId,
-            outputFormat =
-              TalkModeRuntime.validatedOutputFormat(directive?.outputFormat ?: defaultOutputFormat),
-            speed = TalkModeRuntime.resolveSpeed(directive?.speed, directive?.rateWpm),
-            stability = TalkModeRuntime.validatedStability(directive?.stability, modelId),
-            similarity = TalkModeRuntime.validatedUnit(directive?.similarity),
-            style = TalkModeRuntime.validatedUnit(directive?.style),
-            speakerBoost = directive?.speakerBoost,
-            seed = TalkModeRuntime.validatedSeed(directive?.seed),
-            normalize = TalkModeRuntime.validatedNormalize(directive?.normalize),
-            language = TalkModeRuntime.validatedLanguage(directive?.language),
-            latencyTier = TalkModeRuntime.validatedLatencyTier(directive?.latencyTier),
-          )
-        streamAndPlay(voiceId = voiceId!!, apiKey = apiKey!!, request = request, playbackToken = playbackToken)
-        Log.d(tag, "elevenlabs stream ok durMs=${SystemClock.elapsedRealtime() - ttsStarted}")
-      }
+      val ttsStarted = SystemClock.elapsedRealtime()
+      val speech = requestTalkSpeak(cleaned, directive)
+      playGatewaySpeech(speech, playbackToken)
+      Log.d(tag, "talk.speak ok durMs=${SystemClock.elapsedRealtime() - ttsStarted} provider=${speech.provider}")
     } catch (err: Throwable) {
       if (isPlaybackCancelled(err, playbackToken)) {
         Log.d(tag, "assistant speech cancelled")
         return
       }
-      Log.w(tag, "speak failed: ${err.message ?: err::class.simpleName}; falling back to system voice")
-      try {
-        ensurePlaybackActive(playbackToken)
-        _usingFallbackTts.value = true
-        _statusText.value = "Speaking (System)…"
-        speakWithSystemTts(cleaned, playbackToken)
-      } catch (fallbackErr: Throwable) {
-        if (isPlaybackCancelled(fallbackErr, playbackToken)) {
-          Log.d(tag, "assistant fallback speech cancelled")
-          return
-        }
-        _statusText.value = "Speak failed: ${fallbackErr.message ?: fallbackErr::class.simpleName}"
-        Log.w(tag, "system voice failed: ${fallbackErr.message ?: fallbackErr::class.simpleName}")
-      }
+      _statusText.value = "Speak failed: ${err.message ?: err::class.simpleName}"
+      Log.w(tag, "talk.speak failed: ${err.message ?: err::class.simpleName}")
     } finally {
 
       _isSpeaking.value = false
     }
   }
 
-  private suspend fun streamAndPlay(
-    voiceId: String,
-    apiKey: String,
-    request: ElevenLabsRequest,
-    playbackToken: Long,
-  ) {
+  private data class GatewayTalkSpeech(
+    val audioBase64: String,
+    val provider: String,
+    val outputFormat: String?,
+    val mimeType: String?,
+    val fileExtension: String?,
+  )
+
+  private suspend fun requestTalkSpeak(text: String, directive: TalkDirective?): GatewayTalkSpeech {
+    val modelId =
+      directive?.modelId?.trim()?.takeIf { it.isNotEmpty() } ?: currentModelId?.trim()?.takeIf { it.isNotEmpty() }
+    val voiceId =
+      directive?.voiceId?.trim()?.takeIf { it.isNotEmpty() } ?: currentVoiceId?.trim()?.takeIf { it.isNotEmpty() }
+    val params =
+      buildJsonObject {
+        put("text", JsonPrimitive(text))
+        voiceId?.let { put("voiceId", JsonPrimitive(it)) }
+        modelId?.let { put("modelId", JsonPrimitive(it)) }
+        TalkModeRuntime.resolveSpeed(directive?.speed, directive?.rateWpm)?.let {
+          put("speed", JsonPrimitive(it))
+        }
+        TalkModeRuntime.validatedStability(directive?.stability, modelId)?.let {
+          put("stability", JsonPrimitive(it))
+        }
+        TalkModeRuntime.validatedUnit(directive?.similarity)?.let {
+          put("similarity", JsonPrimitive(it))
+        }
+        TalkModeRuntime.validatedUnit(directive?.style)?.let {
+          put("style", JsonPrimitive(it))
+        }
+        directive?.speakerBoost?.let { put("speakerBoost", JsonPrimitive(it)) }
+        TalkModeRuntime.validatedSeed(directive?.seed)?.let { put("seed", JsonPrimitive(it)) }
+        TalkModeRuntime.validatedNormalize(directive?.normalize)?.let {
+          put("normalize", JsonPrimitive(it))
+        }
+        TalkModeRuntime.validatedLanguage(directive?.language)?.let {
+          put("language", JsonPrimitive(it))
+        }
+      }
+    val res = session.request("talk.speak", params.toString())
+    val root = json.parseToJsonElement(res).asObjectOrNull() ?: error("talk.speak returned invalid JSON")
+    val audioBase64 = root["audioBase64"].asStringOrNull()?.trim().orEmpty()
+    val provider = root["provider"].asStringOrNull()?.trim().orEmpty()
+    if (audioBase64.isEmpty()) {
+      error("talk.speak missing audioBase64")
+    }
+    if (provider.isEmpty()) {
+      error("talk.speak missing provider")
+    }
+    return GatewayTalkSpeech(
+      audioBase64 = audioBase64,
+      provider = provider,
+      outputFormat = root["outputFormat"].asStringOrNull()?.trim(),
+      mimeType = root["mimeType"].asStringOrNull()?.trim(),
+      fileExtension = root["fileExtension"].asStringOrNull()?.trim(),
+    )
+  }
+
+  private suspend fun playGatewaySpeech(speech: GatewayTalkSpeech, playbackToken: Long) {
     ensurePlaybackActive(playbackToken)
     stopSpeaking(resetInterrupt = false)
     ensurePlaybackActive(playbackToken)
 
-    pcmStopRequested = false
-    val pcmSampleRate = TalkModeRuntime.parsePcmSampleRate(request.outputFormat)
-    if (pcmSampleRate != null) {
+    val audioBytes =
       try {
-        streamAndPlayPcm(
-          voiceId = voiceId,
-          apiKey = apiKey,
-          request = request,
-          sampleRate = pcmSampleRate,
-          playbackToken = playbackToken,
-        )
-        return
-      } catch (err: Throwable) {
-        if (isPlaybackCancelled(err, playbackToken) || pcmStopRequested) return
-        Log.w(tag, "pcm playback failed; falling back to mp3: ${err.message ?: err::class.simpleName}")
+        Base64.decode(speech.audioBase64, Base64.DEFAULT)
+      } catch (err: IllegalArgumentException) {
+        throw IllegalStateException("talk.speak returned invalid audio", err)
       }
-    }
-
-    // When falling back from PCM, rewrite format to MP3 and download to file.
-    // File-based playback avoids custom DataSource races and is reliable across OEMs.
-    val mp3Request = if (request.outputFormat?.startsWith("pcm_") == true) {
-      request.copy(outputFormat = "mp3_44100_128")
-    } else {
-      request
-    }
-    streamAndPlayMp3(voiceId = voiceId, apiKey = apiKey, request = mp3Request, playbackToken = playbackToken)
-  }
-
-  private suspend fun streamAndPlayMp3(
-    voiceId: String,
-    apiKey: String,
-    request: ElevenLabsRequest,
-    playbackToken: Long,
-  ) {
-    val dataSource = StreamingMediaDataSource()
-    streamingSource = dataSource
-
-    val player = MediaPlayer()
-    this.player = player
-
-    val prepared = CompletableDeferred<Unit>()
-    val finished = CompletableDeferred<Unit>()
-
-    player.setAudioAttributes(
-      AudioAttributes.Builder()
-        .setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
-        .setUsage(AudioAttributes.USAGE_MEDIA)
-        .build(),
-    )
-    player.setOnPreparedListener {
-      it.start()
-      prepared.complete(Unit)
-    }
-    player.setOnCompletionListener {
-      finished.complete(Unit)
-    }
-    player.setOnErrorListener { _, _, _ ->
-      finished.completeExceptionally(IllegalStateException("MediaPlayer error"))
-      true
-    }
-
-    player.setDataSource(dataSource)
-    withContext(Dispatchers.Main) {
-      player.prepareAsync()
-    }
-
-    val fetchError = CompletableDeferred<Throwable?>()
-    val fetchJob =
-      scope.launch(Dispatchers.IO) {
-        try {
-          streamTts(voiceId = voiceId, apiKey = apiKey, request = request, sink = dataSource, playbackToken = playbackToken)
-          fetchError.complete(null)
-        } catch (err: Throwable) {
-          dataSource.fail()
-          fetchError.complete(err)
+    val suffix = resolveGatewayAudioSuffix(speech)
+    val tempFile =
+      withContext(Dispatchers.IO) {
+        File.createTempFile("tts_", suffix, context.cacheDir).apply {
+          writeBytes(audioBytes)
         }
       }
-
-    Log.d(tag, "play start")
-    try {
-      ensurePlaybackActive(playbackToken)
-      prepared.await()
-      ensurePlaybackActive(playbackToken)
-      finished.await()
-      ensurePlaybackActive(playbackToken)
-      fetchError.await()?.let { throw it }
-    } finally {
-      fetchJob.cancel()
-      cleanupPlayer()
-    }
-    Log.d(tag, "play done")
-  }
-
-  /**
-   * Download ElevenLabs audio to a temp file, then play from disk via MediaPlayer.
-   * Simpler and more reliable than streaming: avoids custom DataSource races and
-   * AudioTrack underrun issues on OxygenOS/OnePlus.
-   */
-  private suspend fun streamAndPlayViaFile(voiceId: String, apiKey: String, request: ElevenLabsRequest) {
-    val tempFile = withContext(Dispatchers.IO) {
-      val file = File.createTempFile("tts_", ".mp3", context.cacheDir)
-      val conn = openTtsConnection(voiceId = voiceId, apiKey = apiKey, request = request)
-      try {
-        val payload = buildRequestPayload(request)
-        conn.outputStream.use { it.write(payload.toByteArray()) }
-        val code = conn.responseCode
-        if (code >= 400) {
-          val body = conn.errorStream?.readBytes()?.toString(Charsets.UTF_8) ?: ""
-          file.delete()
-          throw IllegalStateException("ElevenLabs failed: $code $body")
-        }
-        Log.d(tag, "elevenlabs http code=$code voiceId=$voiceId format=${request.outputFormat}")
-        // Manual loop so cancellation is honoured on every chunk.
-        // input.copyTo() is a single blocking call with no yield points; if the
-        // coroutine is cancelled mid-download the entire response would finish
-        // before cancellation was observed.
-        conn.inputStream.use { input ->
-          file.outputStream().use { out ->
-            val buf = ByteArray(8192)
-            var n: Int
-            while (input.read(buf).also { n = it } != -1) {
-              ensureActive()
-              out.write(buf, 0, n)
-            }
-          }
-        }
-      } catch (err: Throwable) {
-        file.delete()
-        throw err
-      } finally {
-        conn.disconnect()
-      }
-      file
-    }
     try {
       val player = MediaPlayer()
       this.player = player
@@ -1094,181 +778,45 @@ class TalkModeManager(
       }
       player.setDataSource(tempFile.absolutePath)
       withContext(Dispatchers.IO) { player.prepare() }
-      Log.d(tag, "file play start bytes=${tempFile.length()}")
+      ensurePlaybackActive(playbackToken)
       player.start()
       finished.await()
-      Log.d(tag, "file play done")
+      ensurePlaybackActive(playbackToken)
     } finally {
-      try { cleanupPlayer() } catch (_: Throwable) {}
+      try {
+        cleanupPlayer()
+      } catch (_: Throwable) {}
       tempFile.delete()
     }
   }
 
-  private suspend fun streamAndPlayPcm(
-    voiceId: String,
-    apiKey: String,
-    request: ElevenLabsRequest,
-    sampleRate: Int,
-    playbackToken: Long,
-  ) {
-    ensurePlaybackActive(playbackToken)
-    val minBuffer =
-      AudioTrack.getMinBufferSize(
-        sampleRate,
-        AudioFormat.CHANNEL_OUT_MONO,
-        AudioFormat.ENCODING_PCM_16BIT,
-      )
-    if (minBuffer <= 0) {
-      throw IllegalStateException("AudioTrack buffer size invalid: $minBuffer")
+  private fun resolveGatewayAudioSuffix(speech: GatewayTalkSpeech): String {
+    val extension = speech.fileExtension?.trim()
+    if (!extension.isNullOrEmpty()) {
+      return if (extension.startsWith(".")) extension else ".$extension"
     }
-
-    val bufferSize = max(minBuffer * 2, 8 * 1024)
-    val track =
-      AudioTrack(
-        AudioAttributes.Builder()
-          .setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
-          .setUsage(AudioAttributes.USAGE_MEDIA)
-          .build(),
-        AudioFormat.Builder()
-          .setSampleRate(sampleRate)
-          .setChannelMask(AudioFormat.CHANNEL_OUT_MONO)
-          .setEncoding(AudioFormat.ENCODING_PCM_16BIT)
-          .build(),
-        bufferSize,
-        AudioTrack.MODE_STREAM,
-        AudioManager.AUDIO_SESSION_ID_GENERATE,
-      )
-    if (track.state != AudioTrack.STATE_INITIALIZED) {
-      track.release()
-      throw IllegalStateException("AudioTrack init failed")
-    }
-    pcmTrack = track
-    // Don't call track.play() yet — start the track only when the first audio
-    // chunk arrives from ElevenLabs (see streamPcm). OxygenOS/OnePlus kills an
-    // AudioTrack that underruns (no data written) for ~1+ seconds, causing
-    // write() to return 0. Deferring play() until first data avoids the underrun.
-
-    Log.d(tag, "pcm play start sampleRate=$sampleRate bufferSize=$bufferSize")
-    try {
-      streamPcm(voiceId = voiceId, apiKey = apiKey, request = request, track = track, playbackToken = playbackToken)
-    } finally {
-      cleanupPcmTrack()
-    }
-    Log.d(tag, "pcm play done")
+    val mimeType = speech.mimeType?.trim()?.lowercase()
+    if (mimeType == "audio/mpeg") return ".mp3"
+    if (mimeType == "audio/ogg") return ".ogg"
+    if (mimeType == "audio/wav") return ".wav"
+    if (mimeType == "audio/webm") return ".webm"
+    val outputFormat = speech.outputFormat?.trim()?.lowercase().orEmpty()
+    if (outputFormat == "mp3" || outputFormat.startsWith("mp3_") || outputFormat.endsWith("-mp3")) return ".mp3"
+    if (outputFormat == "opus" || outputFormat.startsWith("opus_")) return ".ogg"
+    if (outputFormat.endsWith("-wav")) return ".wav"
+    if (outputFormat.endsWith("-webm")) return ".webm"
+    return ".audio"
   }
 
-  private suspend fun speakWithSystemTts(text: String, playbackToken: Long) {
-    val trimmed = text.trim()
-    if (trimmed.isEmpty()) return
-    ensurePlaybackActive(playbackToken)
-    val ok = ensureSystemTts()
-    if (!ok) {
-      throw IllegalStateException("system TTS unavailable")
-    }
-    ensurePlaybackActive(playbackToken)
-
-    val tts = systemTts ?: throw IllegalStateException("system TTS unavailable")
-    val utteranceId = "talk-${UUID.randomUUID()}"
-    val deferred = CompletableDeferred<Unit>()
-    systemTtsPending?.cancel()
-    systemTtsPending = deferred
-    systemTtsPendingId = utteranceId
-
-    withContext(Dispatchers.Main) {
-      ensurePlaybackActive(playbackToken)
-      val params = Bundle()
-      tts.speak(trimmed, TextToSpeech.QUEUE_FLUSH, params, utteranceId)
-    }
-
-    withContext(Dispatchers.IO) {
-      try {
-        kotlinx.coroutines.withTimeout(180_000) { deferred.await() }
-      } catch (err: Throwable) {
-        throw err
-      }
-      ensurePlaybackActive(playbackToken)
-    }
-  }
-
-  private suspend fun ensureSystemTts(): Boolean {
-    if (systemTts != null) return true
-    return withContext(Dispatchers.Main) {
-      val deferred = CompletableDeferred<Boolean>()
-      val tts =
-        try {
-          TextToSpeech(context) { status ->
-            deferred.complete(status == TextToSpeech.SUCCESS)
-          }
-        } catch (_: Throwable) {
-          deferred.complete(false)
-          null
-        }
-      if (tts == null) return@withContext false
-
-      tts.setOnUtteranceProgressListener(
-        object : UtteranceProgressListener() {
-          override fun onStart(utteranceId: String?) {}
-
-          override fun onDone(utteranceId: String?) {
-            if (utteranceId == null) return
-            if (utteranceId != systemTtsPendingId) return
-            systemTtsPending?.complete(Unit)
-            systemTtsPending = null
-            systemTtsPendingId = null
-          }
-
-          @Suppress("OVERRIDE_DEPRECATION")
-          @Deprecated("Deprecated in Java")
-          override fun onError(utteranceId: String?) {
-            if (utteranceId == null) return
-            if (utteranceId != systemTtsPendingId) return
-            systemTtsPending?.completeExceptionally(IllegalStateException("system TTS error"))
-            systemTtsPending = null
-            systemTtsPendingId = null
-          }
-
-          override fun onError(utteranceId: String?, errorCode: Int) {
-            if (utteranceId == null) return
-            if (utteranceId != systemTtsPendingId) return
-            systemTtsPending?.completeExceptionally(IllegalStateException("system TTS error $errorCode"))
-            systemTtsPending = null
-            systemTtsPendingId = null
-          }
-        },
-      )
-
-      val ok =
-        try {
-          deferred.await()
-        } catch (_: Throwable) {
-          false
-        }
-      if (ok) {
-        systemTts = tts
-      } else {
-        tts.shutdown()
-      }
-      ok
-    }
-  }
-
-  /** Stop any active TTS immediately — call when user taps mic to barge in. */
   fun stopTts() {
-    stopActiveStreamingTts()
     stopSpeaking(resetInterrupt = true)
     _isSpeaking.value = false
     _statusText.value = "Listening"
   }
 
   private fun stopSpeaking(resetInterrupt: Boolean = true) {
-    pcmStopRequested = true
     if (!_isSpeaking.value) {
       cleanupPlayer()
-      cleanupPcmTrack()
-      systemTts?.stop()
-      systemTtsPending?.cancel()
-      systemTtsPending = null
-      systemTtsPendingId = null
       abandonAudioFocus()
       return
     }
@@ -1277,11 +825,6 @@ class TalkModeManager(
       lastInterruptedAtSeconds = currentMs / 1000.0
     }
     cleanupPlayer()
-    cleanupPcmTrack()
-    systemTts?.stop()
-    systemTtsPending?.cancel()
-    systemTtsPending = null
-    systemTtsPendingId = null
     _isSpeaking.value = false
     abandonAudioFocus()
   }
@@ -1325,22 +868,6 @@ class TalkModeManager(
     player?.stop()
     player?.release()
     player = null
-    streamingSource?.close()
-    streamingSource = null
-  }
-
-  private fun cleanupPcmTrack() {
-    val track = pcmTrack ?: return
-    try {
-      track.pause()
-      track.flush()
-      track.stop()
-    } catch (_: Throwable) {
-      // ignore cleanup errors
-    } finally {
-      track.release()
-    }
-    pcmTrack = null
   }
 
   private fun shouldInterrupt(transcript: String): Boolean {
@@ -1369,71 +896,18 @@ class TalkModeManager(
   }
 
   private suspend fun reloadConfig() {
-    val envVoice = System.getenv("ELEVENLABS_VOICE_ID")?.trim()
-    val sagVoice = System.getenv("SAG_VOICE_ID")?.trim()
-    val envKey = System.getenv("ELEVENLABS_API_KEY")?.trim()
     try {
-      val res = session.request("talk.config", """{"includeSecrets":true}""")
+      val res = session.request("talk.config", "{}")
       val root = json.parseToJsonElement(res).asObjectOrNull()
-      val parsed =
-        TalkModeGatewayConfigParser.parse(
-          config = root?.get("config").asObjectOrNull(),
-          defaultProvider = defaultTalkProvider,
-          defaultModelIdFallback = defaultModelIdFallback,
-          defaultOutputFormatFallback = defaultOutputFormatFallback,
-          envVoice = envVoice,
-          sagVoice = sagVoice,
-          envKey = envKey,
-        )
-      if (parsed.missingResolvedPayload) {
-        Log.w(tag, "talk config ignored: normalized payload missing talk.resolved")
-      }
-
+      val parsed = TalkModeGatewayConfigParser.parse(root?.get("config").asObjectOrNull())
       if (!isCanonicalMainSessionKey(mainSessionKey)) {
         mainSessionKey = parsed.mainSessionKey
       }
-      defaultVoiceId = parsed.defaultVoiceId
-      voiceAliases = parsed.voiceAliases
-      if (!voiceOverrideActive) currentVoiceId = defaultVoiceId
-      defaultModelId = parsed.defaultModelId
-      if (!modelOverrideActive) currentModelId = defaultModelId
-      defaultOutputFormat = parsed.defaultOutputFormat
-      apiKey = parsed.apiKey
       silenceWindowMs = parsed.silenceTimeoutMs
-      Log.d(
-        tag,
-        "reloadConfig apiKey=${if (apiKey != null) "set" else "null"} voiceId=$defaultVoiceId silenceTimeoutMs=${parsed.silenceTimeoutMs}",
-      )
-      if (parsed.interruptOnSpeech != null) interruptOnSpeech = parsed.interruptOnSpeech
-      activeProviderIsElevenLabs = parsed.activeProvider == defaultTalkProvider
-      if (!activeProviderIsElevenLabs) {
-        // Clear ElevenLabs credentials so playAssistant won't attempt ElevenLabs calls
-        apiKey = null
-        defaultVoiceId = null
-        if (!voiceOverrideActive) currentVoiceId = null
-        Log.w(tag, "talk provider ${parsed.activeProvider} unsupported; using system voice fallback")
-      } else if (parsed.normalizedPayload) {
-        Log.d(tag, "talk config provider=elevenlabs")
-      }
+      parsed.interruptOnSpeech?.let { interruptOnSpeech = it }
       configLoaded = true
     } catch (_: Throwable) {
-      val fallback =
-        TalkModeGatewayConfigParser.fallback(
-          defaultProvider = defaultTalkProvider,
-          defaultModelIdFallback = defaultModelIdFallback,
-          defaultOutputFormatFallback = defaultOutputFormatFallback,
-          envVoice = envVoice,
-          sagVoice = sagVoice,
-          envKey = envKey,
-        )
-      silenceWindowMs = fallback.silenceTimeoutMs
-      defaultVoiceId = fallback.defaultVoiceId
-      defaultModelId = fallback.defaultModelId
-      if (!modelOverrideActive) currentModelId = defaultModelId
-      apiKey = fallback.apiKey
-      voiceAliases = fallback.voiceAliases
-      defaultOutputFormat = fallback.defaultOutputFormat
-      // Keep config load retryable after transient fetch failures.
+      silenceWindowMs = TalkDefaults.defaultSilenceTimeoutMs
       configLoaded = false
     }
   }
@@ -1443,189 +917,6 @@ class TalkModeManager(
     return obj["runId"].asStringOrNull()
   }
 
-  private suspend fun streamTts(
-    voiceId: String,
-    apiKey: String,
-    request: ElevenLabsRequest,
-    sink: StreamingMediaDataSource,
-    playbackToken: Long,
-  ) {
-    withContext(Dispatchers.IO) {
-      ensurePlaybackActive(playbackToken)
-      val conn = openTtsConnection(voiceId = voiceId, apiKey = apiKey, request = request)
-      try {
-        val payload = buildRequestPayload(request)
-        conn.outputStream.use { it.write(payload.toByteArray()) }
-
-        val code = conn.responseCode
-        Log.d(tag, "elevenlabs http code=$code voiceId=$voiceId format=${request.outputFormat} keyLen=${apiKey.length}")
-        if (code >= 400) {
-          val message = conn.errorStream?.readBytes()?.toString(Charsets.UTF_8) ?: ""
-          Log.w(tag, "elevenlabs error code=$code voiceId=$voiceId body=$message")
-          sink.fail()
-          throw IllegalStateException("ElevenLabs failed: $code $message")
-        }
-
-        val buffer = ByteArray(8 * 1024)
-        conn.inputStream.use { input ->
-          while (true) {
-            ensurePlaybackActive(playbackToken)
-            val read = input.read(buffer)
-            if (read <= 0) break
-            ensurePlaybackActive(playbackToken)
-            sink.append(buffer.copyOf(read))
-          }
-        }
-        sink.finish()
-      } finally {
-        conn.disconnect()
-      }
-    }
-  }
-
-  private suspend fun streamPcm(
-    voiceId: String,
-    apiKey: String,
-    request: ElevenLabsRequest,
-    track: AudioTrack,
-    playbackToken: Long,
-  ) {
-    withContext(Dispatchers.IO) {
-      ensurePlaybackActive(playbackToken)
-      val conn = openTtsConnection(voiceId = voiceId, apiKey = apiKey, request = request)
-      try {
-        val payload = buildRequestPayload(request)
-        conn.outputStream.use { it.write(payload.toByteArray()) }
-
-        val code = conn.responseCode
-        if (code >= 400) {
-          val message = conn.errorStream?.readBytes()?.toString(Charsets.UTF_8) ?: ""
-          throw IllegalStateException("ElevenLabs failed: $code $message")
-        }
-
-        var totalBytesWritten = 0L
-        var trackStarted = false
-        val buffer = ByteArray(8 * 1024)
-        conn.inputStream.use { input ->
-          while (true) {
-            if (pcmStopRequested || isPlaybackCancelled(null, playbackToken)) return@withContext
-            val read = input.read(buffer)
-            if (read <= 0) break
-            // Start the AudioTrack only when the first chunk is ready — avoids
-            // the ~1.4s underrun window while ElevenLabs prepares audio.
-            // OxygenOS kills a track that underruns for >1s (write() returns 0).
-            if (!trackStarted) {
-              track.play()
-              trackStarted = true
-            }
-            var offset = 0
-            while (offset < read) {
-              if (pcmStopRequested || isPlaybackCancelled(null, playbackToken)) return@withContext
-              val wrote =
-                try {
-                  track.write(buffer, offset, read - offset)
-                } catch (err: Throwable) {
-                  if (pcmStopRequested || isPlaybackCancelled(err, playbackToken)) return@withContext
-                  throw err
-                }
-              if (wrote <= 0) {
-                if (pcmStopRequested || isPlaybackCancelled(null, playbackToken)) return@withContext
-                throw IllegalStateException("AudioTrack write failed: $wrote")
-              }
-              offset += wrote
-            }
-          }
-        }
-      } finally {
-        conn.disconnect()
-      }
-    }
-  }
-
-  private suspend fun waitForPcmDrain(track: AudioTrack, totalFrames: Long, sampleRate: Int) {
-    if (totalFrames <= 0) return
-    withContext(Dispatchers.IO) {
-      val drainDeadline = SystemClock.elapsedRealtime() + 15_000
-      while (!pcmStopRequested && SystemClock.elapsedRealtime() < drainDeadline) {
-        val played = track.playbackHeadPosition.toLong().and(0xFFFFFFFFL)
-        if (played >= totalFrames) break
-        val remainingFrames = totalFrames - played
-        val sleepMs = ((remainingFrames * 1000L) / sampleRate.toLong()).coerceIn(12L, 120L)
-        delay(sleepMs)
-      }
-    }
-  }
-
-  private fun openTtsConnection(
-    voiceId: String,
-    apiKey: String,
-    request: ElevenLabsRequest,
-  ): HttpURLConnection {
-    val baseUrl = "https://api.elevenlabs.io/v1/text-to-speech/$voiceId/stream"
-    val latencyTier = request.latencyTier
-    val url =
-      if (latencyTier != null) {
-        URL("$baseUrl?optimize_streaming_latency=$latencyTier")
-      } else {
-        URL(baseUrl)
-      }
-    val conn = url.openConnection() as HttpURLConnection
-    conn.requestMethod = "POST"
-    conn.connectTimeout = 30_000
-    conn.readTimeout = 30_000
-    conn.setRequestProperty("Content-Type", "application/json")
-    conn.setRequestProperty("Accept", resolveAcceptHeader(request.outputFormat))
-    conn.setRequestProperty("xi-api-key", apiKey)
-    conn.doOutput = true
-    return conn
-  }
-
-  private fun resolveAcceptHeader(outputFormat: String?): String {
-    val normalized = outputFormat?.trim()?.lowercase().orEmpty()
-    return if (normalized.startsWith("pcm_")) "audio/pcm" else "audio/mpeg"
-  }
-
-  private fun buildRequestPayload(request: ElevenLabsRequest): String {
-    val voiceSettingsEntries =
-      buildJsonObject {
-        request.speed?.let { put("speed", JsonPrimitive(it)) }
-        request.stability?.let { put("stability", JsonPrimitive(it)) }
-        request.similarity?.let { put("similarity_boost", JsonPrimitive(it)) }
-        request.style?.let { put("style", JsonPrimitive(it)) }
-        request.speakerBoost?.let { put("use_speaker_boost", JsonPrimitive(it)) }
-      }
-
-    val payload =
-      buildJsonObject {
-        put("text", JsonPrimitive(request.text))
-        request.modelId?.takeIf { it.isNotEmpty() }?.let { put("model_id", JsonPrimitive(it)) }
-        request.outputFormat?.takeIf { it.isNotEmpty() }?.let { put("output_format", JsonPrimitive(it)) }
-        request.seed?.let { put("seed", JsonPrimitive(it)) }
-        request.normalize?.let { put("apply_text_normalization", JsonPrimitive(it)) }
-        request.language?.let { put("language_code", JsonPrimitive(it)) }
-        if (voiceSettingsEntries.isNotEmpty()) {
-          put("voice_settings", voiceSettingsEntries)
-        }
-      }
-
-    return payload.toString()
-  }
-
-  private data class ElevenLabsRequest(
-    val text: String,
-    val modelId: String?,
-    val outputFormat: String?,
-    val speed: Double?,
-    val stability: Double?,
-    val similarity: Double?,
-    val style: Double?,
-    val speakerBoost: Boolean?,
-    val seed: Long?,
-    val normalize: String?,
-    val language: String?,
-    val latencyTier: Int?,
-  )
-
   private object TalkModeRuntime {
     fun resolveSpeed(speed: Double?, rateWpm: Int?): Double? {
       if (rateWpm != null && rateWpm > 0) {
@@ -1673,28 +964,6 @@ class TalkModeManager(
       return normalized
     }
 
-    fun validatedOutputFormat(value: String?): String? {
-      val trimmed = value?.trim()?.lowercase() ?: return null
-      if (trimmed.isEmpty()) return null
-      if (trimmed.startsWith("mp3_")) return trimmed
-      return if (parsePcmSampleRate(trimmed) != null) trimmed else null
-    }
-
-    fun validatedLatencyTier(value: Int?): Int? {
-      if (value == null) return null
-      if (value < 0 || value > 4) return null
-      return value
-    }
-
-    fun parsePcmSampleRate(value: String?): Int? {
-      val trimmed = value?.trim()?.lowercase() ?: return null
-      if (!trimmed.startsWith("pcm_")) return null
-      val suffix = trimmed.removePrefix("pcm_")
-      val digits = suffix.takeWhile { it.isDigit() }
-      val rate = digits.toIntOrNull() ?: return null
-      return if (rate in setOf(16000, 22050, 24000, 44100)) rate else null
-    }
-
     fun isMessageTimestampAfter(timestamp: Double, sinceSeconds: Double): Boolean {
       val sinceMs = sinceSeconds * 1000
       return if (timestamp > 10_000_000_000) {

From 4386a0ace8ada00f88dd0688b5023e93afe94ea2 Mon Sep 17 00:00:00 2001
From: Ayaan Zaidi <hi@obviy.us>
Date: Fri, 20 Mar 2026 10:29:06 +0530
Subject: [PATCH 06/11] refactor(android): remove legacy elevenlabs talk stack

---
 .../app/voice/ElevenLabsStreamingTts.kt       | 338 ------------------
 .../app/voice/StreamingMediaDataSource.kt     |  98 -----
 .../app/voice/TalkModeVoiceResolver.kt        | 122 -------
 .../app/voice/TalkModeVoiceResolverTest.kt    |  92 -----
 4 files changed, 650 deletions(-)
 delete mode 100644 apps/android/app/src/main/java/ai/openclaw/app/voice/ElevenLabsStreamingTts.kt
 delete mode 100644 apps/android/app/src/main/java/ai/openclaw/app/voice/StreamingMediaDataSource.kt
 delete mode 100644 apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeVoiceResolver.kt
 delete mode 100644 apps/android/app/src/test/java/ai/openclaw/app/voice/TalkModeVoiceResolverTest.kt

diff --git a/apps/android/app/src/main/java/ai/openclaw/app/voice/ElevenLabsStreamingTts.kt b/apps/android/app/src/main/java/ai/openclaw/app/voice/ElevenLabsStreamingTts.kt
deleted file mode 100644
index ff13cf73911..00000000000
--- a/apps/android/app/src/main/java/ai/openclaw/app/voice/ElevenLabsStreamingTts.kt
+++ /dev/null
@@ -1,338 +0,0 @@
-package ai.openclaw.app.voice
-
-import android.media.AudioAttributes
-import android.media.AudioFormat
-import android.media.AudioManager
-import android.media.AudioTrack
-import android.util.Base64
-import android.util.Log
-import kotlinx.coroutines.*
-import kotlinx.coroutines.flow.MutableStateFlow
-import kotlinx.coroutines.flow.StateFlow
-import okhttp3.*
-import org.json.JSONObject
-import kotlin.math.max
-
-/**
- * Streams text chunks to ElevenLabs WebSocket API and plays audio in real-time.
- *
- * Usage:
- *   1. Create instance with voice/API config
- *   2. Call [start] to open WebSocket + AudioTrack
- *   3. Call [sendText] with incremental text chunks as they arrive
- *   4. Call [finish] when the full response is ready (sends EOS to ElevenLabs)
- *   5. Call [stop] to cancel/cleanup at any time
- *
- * Audio playback begins as soon as the first audio chunk arrives from ElevenLabs,
- * typically within ~100ms of the first text chunk for eleven_flash_v2_5.
- *
- * Note: eleven_v3 does NOT support WebSocket streaming. Use eleven_flash_v2_5
- * or eleven_flash_v2 for lowest latency.
- */
-class ElevenLabsStreamingTts(
-  private val scope: CoroutineScope,
-  private val voiceId: String,
-  private val apiKey: String,
-  private val modelId: String = "eleven_flash_v2_5",
-  private val outputFormat: String = "pcm_24000",
-  private val sampleRate: Int = 24000,
-) {
-  companion object {
-    private const val TAG = "ElevenLabsStreamTTS"
-    private const val BASE_URL = "wss://api.elevenlabs.io/v1/text-to-speech"
-
-    /** Models that support WebSocket input streaming */
-    val STREAMING_MODELS = setOf(
-      "eleven_flash_v2_5",
-      "eleven_flash_v2",
-      "eleven_multilingual_v2",
-      "eleven_turbo_v2_5",
-      "eleven_turbo_v2",
-      "eleven_monolingual_v1",
-    )
-
-    fun supportsStreaming(modelId: String): Boolean = modelId in STREAMING_MODELS
-  }
-
-  private val _isPlaying = MutableStateFlow(false)
-  val isPlaying: StateFlow<Boolean> = _isPlaying
-
-  private var webSocket: WebSocket? = null
-  private var audioTrack: AudioTrack? = null
-  private var trackStarted = false
-  private var client: OkHttpClient? = null
-  @Volatile private var stopped = false
-  @Volatile private var finished = false
-  @Volatile var hasReceivedAudio = false
-    private set
-  private var drainJob: Job? = null
-
-  // Track text already sent so we only send incremental chunks
-  private var sentTextLength = 0
-  @Volatile private var wsReady = false
-  private val pendingText = mutableListOf<String>()
-
-  /**
-   * Open the WebSocket connection and prepare AudioTrack.
-   * Must be called before [sendText].
-   */
-  fun start() {
-    stopped = false
-    finished = false
-    hasReceivedAudio = false
-    sentTextLength = 0
-    trackStarted = false
-    wsReady = false
-    sentFullText = ""
-    synchronized(pendingText) { pendingText.clear() }
-
-    // Prepare AudioTrack
-    val minBuffer = AudioTrack.getMinBufferSize(
-      sampleRate,
-      AudioFormat.CHANNEL_OUT_MONO,
-      AudioFormat.ENCODING_PCM_16BIT,
-    )
-    val bufferSize = max(minBuffer * 2, 8 * 1024)
-    val track = AudioTrack(
-      AudioAttributes.Builder()
-        .setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
-        .setUsage(AudioAttributes.USAGE_MEDIA)
-        .build(),
-      AudioFormat.Builder()
-        .setSampleRate(sampleRate)
-        .setChannelMask(AudioFormat.CHANNEL_OUT_MONO)
-        .setEncoding(AudioFormat.ENCODING_PCM_16BIT)
-        .build(),
-      bufferSize,
-      AudioTrack.MODE_STREAM,
-      AudioManager.AUDIO_SESSION_ID_GENERATE,
-    )
-    if (track.state != AudioTrack.STATE_INITIALIZED) {
-      track.release()
-      Log.e(TAG, "AudioTrack init failed")
-      return
-    }
-    audioTrack = track
-    _isPlaying.value = true
-
-    // Open WebSocket
-    val url = "$BASE_URL/$voiceId/stream-input?model_id=$modelId&output_format=$outputFormat"
-    val okClient = OkHttpClient.Builder()
-      .readTimeout(30, java.util.concurrent.TimeUnit.SECONDS)
-      .writeTimeout(10, java.util.concurrent.TimeUnit.SECONDS)
-      .build()
-    client = okClient
-
-    val request = Request.Builder()
-      .url(url)
-      .header("xi-api-key", apiKey)
-      .build()
-
-    webSocket = okClient.newWebSocket(request, object : WebSocketListener() {
-      override fun onOpen(webSocket: WebSocket, response: Response) {
-        Log.d(TAG, "WebSocket connected")
-        // Send initial config with voice settings
-        val config = JSONObject().apply {
-          put("text", " ")
-          put("voice_settings", JSONObject().apply {
-            put("stability", 0.5)
-            put("similarity_boost", 0.8)
-            put("use_speaker_boost", false)
-          })
-          put("generation_config", JSONObject().apply {
-            put("chunk_length_schedule", org.json.JSONArray(listOf(120, 160, 250, 290)))
-          })
-        }
-        webSocket.send(config.toString())
-        wsReady = true
-        // Flush any text that was queued before WebSocket was ready
-        synchronized(pendingText) {
-          for (queued in pendingText) {
-            val msg = JSONObject().apply { put("text", queued) }
-            webSocket.send(msg.toString())
-            Log.d(TAG, "flushed queued chunk: ${queued.length} chars")
-          }
-          pendingText.clear()
-        }
-        // Send deferred EOS if finish() was called before WebSocket was ready
-        if (finished) {
-          val eos = JSONObject().apply { put("text", "") }
-          webSocket.send(eos.toString())
-          Log.d(TAG, "sent deferred EOS")
-        }
-      }
-
-      override fun onMessage(webSocket: WebSocket, text: String) {
-        if (stopped) return
-        try {
-          val json = JSONObject(text)
-          val audio = json.optString("audio", "")
-          if (audio.isNotEmpty()) {
-            val pcmBytes = Base64.decode(audio, Base64.DEFAULT)
-            writeToTrack(pcmBytes)
-          }
-        } catch (e: Exception) {
-          Log.e(TAG, "Error parsing WebSocket message: ${e.message}")
-        }
-      }
-
-      override fun onFailure(webSocket: WebSocket, t: Throwable, response: Response?) {
-        Log.e(TAG, "WebSocket error: ${t.message}")
-        stopped = true
-        cleanup()
-      }
-
-      override fun onClosed(webSocket: WebSocket, code: Int, reason: String) {
-        Log.d(TAG, "WebSocket closed: $code $reason")
-        // Wait for AudioTrack to finish playing buffered audio, then cleanup
-        drainJob = scope.launch(Dispatchers.IO) {
-          drainAudioTrack()
-          cleanup()
-        }
-      }
-    })
-  }
-
-  /**
-   * Send incremental text. Call with the full accumulated text so far —
-   * only the new portion (since last send) will be transmitted.
-   */
-  // Track the full text we've sent so we can detect replacement vs append
-  private var sentFullText = ""
-
-  /**
-      // If we already sent a superset of this text, it's just a stale/out-of-order
-      // event from a different thread — not a real divergence. Ignore it.
-      if (sentFullText.startsWith(fullText)) return true
-   * Returns true if text was accepted, false if text diverged (caller should restart).
-   */
-  @Synchronized
-  fun sendText(fullText: String): Boolean {
-    if (stopped) return false
-    if (finished) return true  // Already finishing — not a diverge, don't restart
-
-    // Detect text replacement: if the new text doesn't start with what we already sent,
-    // the stream has diverged (e.g., tool call interrupted and text was replaced).
-    if (sentFullText.isNotEmpty() && !fullText.startsWith(sentFullText)) {
-      // If we already sent a superset of this text, it's just a stale/out-of-order
-      // event from a different thread — not a real divergence. Ignore it.
-      if (sentFullText.startsWith(fullText)) return true
-      Log.d(TAG, "text diverged — sent='${sentFullText.take(60)}' new='${fullText.take(60)}'")
-      return false
-    }
-
-    if (fullText.length > sentTextLength) {
-      val newText = fullText.substring(sentTextLength)
-      sentTextLength = fullText.length
-      sentFullText = fullText
-
-      val ws = webSocket
-      if (ws != null && wsReady) {
-        val msg = JSONObject().apply { put("text", newText) }
-        ws.send(msg.toString())
-        Log.d(TAG, "sent chunk: ${newText.length} chars")
-      } else {
-        // Queue if WebSocket not connected yet (ws null = still connecting, wsReady false = handshake pending)
-        synchronized(pendingText) { pendingText.add(newText) }
-        Log.d(TAG, "queued chunk: ${newText.length} chars (ws not ready)")
-      }
-    }
-    return true
-  }
-
-  /**
-   * Signal that no more text is coming. Sends EOS to ElevenLabs.
-   * The WebSocket will close after generating remaining audio.
-   */
-  @Synchronized
-  fun finish() {
-    if (stopped || finished) return
-    finished = true
-    val ws = webSocket
-    if (ws != null && wsReady) {
-      // Send empty text to signal end of stream
-      val eos = JSONObject().apply { put("text", "") }
-      ws.send(eos.toString())
-      Log.d(TAG, "sent EOS")
-    }
-    // else: WebSocket not ready yet; onOpen will send EOS after flushing queued text
-  }
-
-  /**
-   * Immediately stop playback and close everything.
-   */
-  fun stop() {
-    stopped = true
-    finished = true
-    drainJob?.cancel()
-    drainJob = null
-    webSocket?.cancel()
-    webSocket = null
-    val track = audioTrack
-    audioTrack = null
-    if (track != null) {
-      try {
-        track.pause()
-        track.flush()
-        track.release()
-      } catch (_: Throwable) {}
-    }
-    _isPlaying.value = false
-    client?.dispatcher?.executorService?.shutdown()
-    client = null
-  }
-
-  private fun writeToTrack(pcmBytes: ByteArray) {
-    val track = audioTrack ?: return
-    if (stopped) return
-
-    // Start playback on first audio chunk — avoids underrun
-    if (!trackStarted) {
-      track.play()
-      trackStarted = true
-      hasReceivedAudio = true
-      Log.d(TAG, "AudioTrack started on first chunk")
-    }
-
-    var offset = 0
-    while (offset < pcmBytes.size && !stopped) {
-      val wrote = track.write(pcmBytes, offset, pcmBytes.size - offset)
-      if (wrote <= 0) {
-        if (stopped) return
-        Log.w(TAG, "AudioTrack write returned $wrote")
-        break
-      }
-      offset += wrote
-    }
-  }
-
-  private fun drainAudioTrack() {
-    if (stopped) return
-    // Wait up to 10s for audio to finish playing
-    val deadline = System.currentTimeMillis() + 10_000
-    while (!stopped && System.currentTimeMillis() < deadline) {
-      // Check if track is still playing
-      val track = audioTrack ?: return
-      if (track.playState != AudioTrack.PLAYSTATE_PLAYING) return
-      try {
-        Thread.sleep(100)
-      } catch (_: InterruptedException) {
-        return
-      }
-    }
-  }
-
-  private fun cleanup() {
-    val track = audioTrack
-    audioTrack = null
-    if (track != null) {
-      try {
-        track.stop()
-        track.release()
-      } catch (_: Throwable) {}
-    }
-    _isPlaying.value = false
-    client?.dispatcher?.executorService?.shutdown()
-    client = null
-  }
-}
diff --git a/apps/android/app/src/main/java/ai/openclaw/app/voice/StreamingMediaDataSource.kt b/apps/android/app/src/main/java/ai/openclaw/app/voice/StreamingMediaDataSource.kt
deleted file mode 100644
index 90bbd81b8bd..00000000000
--- a/apps/android/app/src/main/java/ai/openclaw/app/voice/StreamingMediaDataSource.kt
+++ /dev/null
@@ -1,98 +0,0 @@
-package ai.openclaw.app.voice
-
-import android.media.MediaDataSource
-import kotlin.math.min
-
-internal class StreamingMediaDataSource : MediaDataSource() {
-  private data class Chunk(val start: Long, val data: ByteArray)
-
-  private val lock = Object()
-  private val chunks = ArrayList<Chunk>()
-  private var totalSize: Long = 0
-  private var closed = false
-  private var finished = false
-  private var lastReadIndex = 0
-
-  fun append(data: ByteArray) {
-    if (data.isEmpty()) return
-    synchronized(lock) {
-      if (closed || finished) return
-      val chunk = Chunk(totalSize, data)
-      chunks.add(chunk)
-      totalSize += data.size.toLong()
-      lock.notifyAll()
-    }
-  }
-
-  fun finish() {
-    synchronized(lock) {
-      if (closed) return
-      finished = true
-      lock.notifyAll()
-    }
-  }
-
-  fun fail() {
-    synchronized(lock) {
-      closed = true
-      lock.notifyAll()
-    }
-  }
-
-  override fun readAt(position: Long, buffer: ByteArray, offset: Int, size: Int): Int {
-    if (position < 0) return -1
-    synchronized(lock) {
-      while (!closed && !finished && position >= totalSize) {
-        lock.wait()
-      }
-      if (closed) return -1
-      if (position >= totalSize && finished) return -1
-
-      val available = (totalSize - position).toInt()
-      val toRead = min(size, available)
-      var remaining = toRead
-      var destOffset = offset
-      var pos = position
-
-      var index = findChunkIndex(pos)
-      while (remaining > 0 && index < chunks.size) {
-        val chunk = chunks[index]
-        val inChunkOffset = (pos - chunk.start).toInt()
-        if (inChunkOffset >= chunk.data.size) {
-          index++
-          continue
-        }
-        val copyLen = min(remaining, chunk.data.size - inChunkOffset)
-        System.arraycopy(chunk.data, inChunkOffset, buffer, destOffset, copyLen)
-        remaining -= copyLen
-        destOffset += copyLen
-        pos += copyLen
-        if (inChunkOffset + copyLen >= chunk.data.size) {
-          index++
-        }
-      }
-
-      return toRead - remaining
-    }
-  }
-
-  override fun getSize(): Long = -1
-
-  override fun close() {
-    synchronized(lock) {
-      closed = true
-      lock.notifyAll()
-    }
-  }
-
-  private fun findChunkIndex(position: Long): Int {
-    var index = lastReadIndex
-    while (index < chunks.size) {
-      val chunk = chunks[index]
-      if (position < chunk.start + chunk.data.size) break
-      index++
-    }
-    lastReadIndex = index
-    return index
-  }
-}
diff --git a/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeVoiceResolver.kt b/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeVoiceResolver.kt
deleted file mode 100644
index 7ada19e166b..00000000000
--- a/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeVoiceResolver.kt
+++ /dev/null
@@ -1,122 +0,0 @@
-package ai.openclaw.app.voice
-
-import java.net.HttpURLConnection
-import java.net.URL
-import kotlinx.coroutines.Dispatchers
-import kotlinx.coroutines.withContext
-import kotlinx.serialization.json.Json
-import kotlinx.serialization.json.JsonArray
-import kotlinx.serialization.json.JsonElement
-import kotlinx.serialization.json.JsonObject
-import kotlinx.serialization.json.JsonPrimitive
-
-internal data class ElevenLabsVoice(val voiceId: String, val name: String?)
-
-internal data class TalkModeResolvedVoice(
-  val voiceId: String?,
-  val fallbackVoiceId: String?,
-  val defaultVoiceId: String?,
-  val currentVoiceId: String?,
-  val selectedVoiceName: String? = null,
-)
-
-internal object TalkModeVoiceResolver {
-  fun resolveVoiceAlias(value: String?, voiceAliases: Map<String, String>): String? {
-    val trimmed = value?.trim().orEmpty()
-    if (trimmed.isEmpty()) return null
-    val normalized = normalizeAliasKey(trimmed)
-    voiceAliases[normalized]?.let { return it }
-    if (voiceAliases.values.any { it.equals(trimmed, ignoreCase = true) }) return trimmed
-    return if (isLikelyVoiceId(trimmed)) trimmed else null
-  }
-
-  suspend fun resolveVoiceId(
-    preferred: String?,
-    fallbackVoiceId: String?,
-    defaultVoiceId: String?,
-    currentVoiceId: String?,
-    voiceOverrideActive: Boolean,
-    listVoices: suspend () -> List<ElevenLabsVoice>,
-  ): TalkModeResolvedVoice {
-    val trimmed = preferred?.trim().orEmpty()
-    if (trimmed.isNotEmpty()) {
-      return TalkModeResolvedVoice(
-        voiceId = trimmed,
-        fallbackVoiceId = fallbackVoiceId,
-        defaultVoiceId = defaultVoiceId,
-        currentVoiceId = currentVoiceId,
-      )
-    }
-    if (!fallbackVoiceId.isNullOrBlank()) {
-      return TalkModeResolvedVoice(
-        voiceId = fallbackVoiceId,
-        fallbackVoiceId = fallbackVoiceId,
-        defaultVoiceId = defaultVoiceId,
-        currentVoiceId = currentVoiceId,
-      )
-    }
-
-    val first = listVoices().firstOrNull()
-    if (first == null) {
-      return TalkModeResolvedVoice(
-        voiceId = null,
-        fallbackVoiceId = fallbackVoiceId,
-        defaultVoiceId = defaultVoiceId,
-        currentVoiceId = currentVoiceId,
-      )
-    }
-
-    return TalkModeResolvedVoice(
-      voiceId = first.voiceId,
-      fallbackVoiceId = first.voiceId,
-      defaultVoiceId = if (defaultVoiceId.isNullOrBlank()) first.voiceId else defaultVoiceId,
-      currentVoiceId = if (voiceOverrideActive) currentVoiceId else first.voiceId,
-      selectedVoiceName = first.name,
-    )
-  }
-
-  suspend fun listVoices(apiKey: String, json: Json): List<ElevenLabsVoice> {
-    return withContext(Dispatchers.IO) {
-      val url = URL("https://api.elevenlabs.io/v1/voices")
-      val conn = url.openConnection() as HttpURLConnection
-      try {
-        conn.requestMethod = "GET"
-        conn.connectTimeout = 15_000
-        conn.readTimeout = 15_000
-        conn.setRequestProperty("xi-api-key", apiKey)
-
-        val code = conn.responseCode
-        val stream = if (code >= 400) conn.errorStream else conn.inputStream
-        val data = stream?.use { it.readBytes() } ?: byteArrayOf()
-        if (code >= 400) {
-          val message = data.toString(Charsets.UTF_8)
-          throw IllegalStateException("ElevenLabs voices failed: $code $message")
-        }
-
-        val root = json.parseToJsonElement(data.toString(Charsets.UTF_8)).asObjectOrNull()
-        val voices = (root?.get("voices") as? JsonArray) ?: JsonArray(emptyList())
-        voices.mapNotNull { entry ->
-          val obj = entry.asObjectOrNull() ?: return@mapNotNull null
-          val voiceId = obj["voice_id"].asStringOrNull() ?: return@mapNotNull null
-          val name = obj["name"].asStringOrNull()
-          ElevenLabsVoice(voiceId, name)
-        }
-      } finally {
-        conn.disconnect()
-      }
-    }
-  }
-
-  private fun isLikelyVoiceId(value: String): Boolean {
-    if (value.length < 10) return false
-    return value.all { it.isLetterOrDigit() || it == '-' || it == '_' }
-  }
-
-  private fun normalizeAliasKey(value: String): String =
-    value.trim().lowercase()
-}
-
-private fun JsonElement?.asObjectOrNull(): JsonObject? = this as? JsonObject
-
-private fun JsonElement?.asStringOrNull(): String? =
-  (this as? JsonPrimitive)?.takeIf { it.isString }?.content
diff --git a/apps/android/app/src/test/java/ai/openclaw/app/voice/TalkModeVoiceResolverTest.kt b/apps/android/app/src/test/java/ai/openclaw/app/voice/TalkModeVoiceResolverTest.kt
deleted file mode 100644
index 5cd46895d42..00000000000
--- a/apps/android/app/src/test/java/ai/openclaw/app/voice/TalkModeVoiceResolverTest.kt
+++ /dev/null
@@ -1,92 +0,0 @@
-package ai.openclaw.app.voice
-
-import kotlinx.coroutines.runBlocking
-import org.junit.Assert.assertEquals
-import org.junit.Assert.assertNull
-import org.junit.Test
-
-class TalkModeVoiceResolverTest {
-  @Test
-  fun resolvesVoiceAliasCaseInsensitively() {
-    val resolved =
-      TalkModeVoiceResolver.resolveVoiceAlias(
-        " Clawd ",
-        mapOf("clawd" to "voice-123"),
-      )
-
-    assertEquals("voice-123", resolved)
-  }
-
-  @Test
-  fun acceptsDirectVoiceIds() {
-    val resolved = TalkModeVoiceResolver.resolveVoiceAlias("21m00Tcm4TlvDq8ikWAM", emptyMap())
-
-    assertEquals("21m00Tcm4TlvDq8ikWAM", resolved)
-  }
-
-  @Test
-  fun rejectsUnknownAliases() {
-    val resolved = TalkModeVoiceResolver.resolveVoiceAlias("nickname", emptyMap())
-
-    assertNull(resolved)
-  }
-
-  @Test
-  fun reusesCachedFallbackVoiceBeforeFetchingCatalog() =
-    runBlocking {
-      var fetchCount = 0
-
-      val resolved =
-        TalkModeVoiceResolver.resolveVoiceId(
-          preferred = null,
-          fallbackVoiceId = "cached-voice",
-          defaultVoiceId = null,
-          currentVoiceId = null,
-          voiceOverrideActive = false,
-          listVoices = {
-            fetchCount += 1
-            emptyList()
-          },
-        )
-
-      assertEquals("cached-voice", resolved.voiceId)
-      assertEquals(0, fetchCount)
-    }
-
-  @Test
-  fun seedsDefaultVoiceFromCatalogWhenNeeded() =
-    runBlocking {
-      val resolved =
-        TalkModeVoiceResolver.resolveVoiceId(
-          preferred = null,
-          fallbackVoiceId = null,
-          defaultVoiceId = null,
-          currentVoiceId = null,
-          voiceOverrideActive = false,
-          listVoices = { listOf(ElevenLabsVoice("voice-1", "First")) },
-        )
-
-      assertEquals("voice-1", resolved.voiceId)
-      assertEquals("voice-1", resolved.fallbackVoiceId)
-      assertEquals("voice-1", resolved.defaultVoiceId)
-      assertEquals("voice-1", resolved.currentVoiceId)
-      assertEquals("First", resolved.selectedVoiceName)
-    }
-
-  @Test
-  fun preservesCurrentVoiceWhenOverrideIsActive() =
-    runBlocking {
-      val resolved =
-        TalkModeVoiceResolver.resolveVoiceId(
-          preferred = null,
-          fallbackVoiceId = null,
-          defaultVoiceId = null,
-          currentVoiceId = null,
-          voiceOverrideActive = true,
-          listVoices = { listOf(ElevenLabsVoice("voice-1", "First")) },
-        )
-
-      assertEquals("voice-1", resolved.voiceId)
-      assertNull(resolved.currentVoiceId)
-    }
-}

From 4a0341ed035cae117ee560def33a74e87dd036ef Mon Sep 17 00:00:00 2001
From: Ayaan Zaidi <hi@obviy.us>
Date: Fri, 20 Mar 2026 10:45:32 +0530
Subject: [PATCH 07/11] fix(review): address talk cleanup feedback

---
 .../ai/openclaw/app/voice/TalkModeManager.kt  |  7 +-
 src/gateway/server-methods/talk.ts            | 99 +++++++------------
 2 files changed, 39 insertions(+), 67 deletions(-)

diff --git a/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeManager.kt b/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeManager.kt
index 4ba2c2ef043..be62498e24e 100644
--- a/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeManager.kt
+++ b/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeManager.kt
@@ -756,12 +756,9 @@ class TalkModeManager(
       }
     val suffix = resolveGatewayAudioSuffix(speech)
     val tempFile =
-      withContext(Dispatchers.IO) {
-        File.createTempFile("tts_", suffix, context.cacheDir).apply {
-          writeBytes(audioBytes)
-        }
-      }
+      withContext(Dispatchers.IO) { File.createTempFile("tts_", suffix, context.cacheDir) }
     try {
+      withContext(Dispatchers.IO) { tempFile.writeBytes(audioBytes) }
       val player = MediaPlayer()
       this.player = player
       val finished = CompletableDeferred<Unit>()
diff --git a/src/gateway/server-methods/talk.ts b/src/gateway/server-methods/talk.ts
index 33cb6d7f116..85f78e91b6a 100644
--- a/src/gateway/server-methods/talk.ts
+++ b/src/gateway/server-methods/talk.ts
@@ -112,83 +112,58 @@ function buildTalkTtsConfig(
     auto: "always",
     provider,
   };
+  const baseUrl = trimString(providerConfig.baseUrl);
+  const voiceId = trimString(providerConfig.voiceId);
+  const modelId = trimString(providerConfig.modelId);
+  const languageCode = trimString(providerConfig.languageCode);
 
   if (provider === "elevenlabs") {
+    const seed = finiteNumber(providerConfig.seed);
+    const applyTextNormalization = normalizeTextNormalization(
+      providerConfig.applyTextNormalization,
+    );
+    const voiceSettings = readTalkVoiceSettings(providerConfig);
     talkTts.elevenlabs = {
       ...baseTts.elevenlabs,
       ...(providerConfig.apiKey === undefined ? {} : { apiKey: providerConfig.apiKey }),
-      ...(trimString(providerConfig.baseUrl) == null
-        ? {}
-        : { baseUrl: trimString(providerConfig.baseUrl) }),
-      ...(trimString(providerConfig.voiceId) == null
-        ? {}
-        : { voiceId: trimString(providerConfig.voiceId) }),
-      ...(trimString(providerConfig.modelId) == null
-        ? {}
-        : { modelId: trimString(providerConfig.modelId) }),
-      ...(finiteNumber(providerConfig.seed) == null
-        ? {}
-        : { seed: finiteNumber(providerConfig.seed) }),
-      ...(normalizeTextNormalization(providerConfig.applyTextNormalization) == null
-        ? {}
-        : {
-            applyTextNormalization: normalizeTextNormalization(
-              providerConfig.applyTextNormalization,
-            ),
-          }),
-      ...(trimString(providerConfig.languageCode) == null
-        ? {}
-        : { languageCode: trimString(providerConfig.languageCode) }),
-      ...(readTalkVoiceSettings(providerConfig) == null
-        ? {}
-        : { voiceSettings: readTalkVoiceSettings(providerConfig) }),
+      ...(baseUrl == null ? {} : { baseUrl }),
+      ...(voiceId == null ? {} : { voiceId }),
+      ...(modelId == null ? {} : { modelId }),
+      ...(seed == null ? {} : { seed }),
+      ...(applyTextNormalization == null ? {} : { applyTextNormalization }),
+      ...(languageCode == null ? {} : { languageCode }),
+      ...(voiceSettings == null ? {} : { voiceSettings }),
     };
   } else if (provider === "openai") {
+    const speed = finiteNumber(providerConfig.speed);
+    const instructions = trimString(providerConfig.instructions);
     talkTts.openai = {
       ...baseTts.openai,
       ...(providerConfig.apiKey === undefined ? {} : { apiKey: providerConfig.apiKey }),
-      ...(trimString(providerConfig.baseUrl) == null
-        ? {}
-        : { baseUrl: trimString(providerConfig.baseUrl) }),
-      ...(trimString(providerConfig.modelId) == null
-        ? {}
-        : { model: trimString(providerConfig.modelId) }),
-      ...(trimString(providerConfig.voiceId) == null
-        ? {}
-        : { voice: trimString(providerConfig.voiceId) }),
-      ...(finiteNumber(providerConfig.speed) == null
-        ? {}
-        : { speed: finiteNumber(providerConfig.speed) }),
-      ...(trimString(providerConfig.instructions) == null
-        ? {}
-        : { instructions: trimString(providerConfig.instructions) }),
+      ...(baseUrl == null ? {} : { baseUrl }),
+      ...(modelId == null ? {} : { model: modelId }),
+      ...(voiceId == null ? {} : { voice: voiceId }),
+      ...(speed == null ? {} : { speed }),
+      ...(instructions == null ? {} : { instructions }),
     };
   } else if (provider === "microsoft") {
+    const outputFormat = trimString(providerConfig.outputFormat);
+    const pitch = trimString(providerConfig.pitch);
+    const rate = trimString(providerConfig.rate);
+    const volume = trimString(providerConfig.volume);
+    const proxy = trimString(providerConfig.proxy);
+    const timeoutMs = finiteNumber(providerConfig.timeoutMs);
     talkTts.microsoft = {
       ...baseTts.microsoft,
       enabled: true,
-      ...(trimString(providerConfig.voiceId) == null
-        ? {}
-        : { voice: trimString(providerConfig.voiceId) }),
-      ...(trimString(providerConfig.languageCode) == null
-        ? {}
-        : { lang: trimString(providerConfig.languageCode) }),
-      ...(trimString(providerConfig.outputFormat) == null
-        ? {}
-        : { outputFormat: trimString(providerConfig.outputFormat) }),
-      ...(trimString(providerConfig.pitch) == null
-        ? {}
-        : { pitch: trimString(providerConfig.pitch) }),
-      ...(trimString(providerConfig.rate) == null ? {} : { rate: trimString(providerConfig.rate) }),
-      ...(trimString(providerConfig.volume) == null
-        ? {}
-        : { volume: trimString(providerConfig.volume) }),
-      ...(trimString(providerConfig.proxy) == null
-        ? {}
-        : { proxy: trimString(providerConfig.proxy) }),
-      ...(finiteNumber(providerConfig.timeoutMs) == null
-        ? {}
-        : { timeoutMs: finiteNumber(providerConfig.timeoutMs) }),
+      ...(voiceId == null ? {} : { voice: voiceId }),
+      ...(languageCode == null ? {} : { lang: languageCode }),
+      ...(outputFormat == null ? {} : { outputFormat }),
+      ...(pitch == null ? {} : { pitch }),
+      ...(rate == null ? {} : { rate }),
+      ...(volume == null ? {} : { volume }),
+      ...(proxy == null ? {} : { proxy }),
+      ...(timeoutMs == null ? {} : { timeoutMs }),
     };
   } else {
     return { error: `talk.speak unavailable: unsupported talk provider '${resolved.provider}'` };

From 47e412bd0b2bd81ad02613a8ec7ed41228c82bcb Mon Sep 17 00:00:00 2001
From: Ayaan Zaidi <hi@obviy.us>
Date: Fri, 20 Mar 2026 10:51:29 +0530
Subject: [PATCH 08/11] fix(review): preserve talk directive overrides

---
 .../ai/openclaw/app/voice/TalkModeManager.kt  |  3 ++
 src/gateway/protocol/schema/channels.ts       |  1 +
 src/gateway/server-methods/talk.ts            | 15 +++++-
 src/gateway/server.talk-config.test.ts        | 47 +++++++++++++++++++
 src/tts/providers/elevenlabs.ts               |  4 +-
 src/tts/providers/microsoft.ts                |  2 +-
 src/tts/tts.ts                                |  2 +
 7 files changed, 70 insertions(+), 4 deletions(-)

diff --git a/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeManager.kt b/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeManager.kt
index be62498e24e..d4433d72a9c 100644
--- a/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeManager.kt
+++ b/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeManager.kt
@@ -723,6 +723,9 @@ class TalkModeManager(
         TalkModeRuntime.validatedLanguage(directive?.language)?.let {
           put("language", JsonPrimitive(it))
         }
+        directive?.outputFormat?.trim()?.takeIf { it.isNotEmpty() }?.let {
+          put("outputFormat", JsonPrimitive(it))
+        }
       }
     val res = session.request("talk.speak", params.toString())
     val root = json.parseToJsonElement(res).asObjectOrNull() ?: error("talk.speak returned invalid JSON")
diff --git a/src/gateway/protocol/schema/channels.ts b/src/gateway/protocol/schema/channels.ts
index 923432c7ac8..52f5ad597bc 100644
--- a/src/gateway/protocol/schema/channels.ts
+++ b/src/gateway/protocol/schema/channels.ts
@@ -21,6 +21,7 @@ export const TalkSpeakParamsSchema = Type.Object(
     text: NonEmptyString,
     voiceId: Type.Optional(Type.String()),
     modelId: Type.Optional(Type.String()),
+    outputFormat: Type.Optional(Type.String()),
     speed: Type.Optional(Type.Number()),
     stability: Type.Optional(Type.Number()),
     similarity: Type.Optional(Type.Number()),
diff --git a/src/gateway/server-methods/talk.ts b/src/gateway/server-methods/talk.ts
index 85f78e91b6a..acbede0b33d 100644
--- a/src/gateway/server-methods/talk.ts
+++ b/src/gateway/server-methods/talk.ts
@@ -69,7 +69,13 @@ function resolveTalkVoiceId(
   if (!aliases) {
     return requested;
   }
-  return aliases[normalizeAliasKey(requested)] ?? requested;
+  const normalizedRequested = normalizeAliasKey(requested);
+  for (const [alias, voiceId] of Object.entries(aliases)) {
+    if (normalizeAliasKey(alias) === normalizedRequested) {
+      return voiceId;
+    }
+  }
+  return requested;
 }
 
 function readTalkVoiceSettings(
@@ -189,6 +195,7 @@ function buildTalkSpeakOverrides(
 ): TtsDirectiveOverrides {
   const voiceId = resolveTalkVoiceId(providerConfig, trimString(params.voiceId));
   const modelId = trimString(params.modelId);
+  const outputFormat = trimString(params.outputFormat);
   const speed = finiteNumber(params.speed);
   const seed = finiteNumber(params.seed);
   const normalize = normalizeTextNormalization(params.normalize);
@@ -212,6 +219,7 @@ function buildTalkSpeakOverrides(
     overrides.elevenlabs = {
       ...(voiceId == null ? {} : { voiceId }),
       ...(modelId == null ? {} : { modelId }),
+      ...(outputFormat == null ? {} : { outputFormat }),
       ...(seed == null ? {} : { seed }),
       ...(normalize == null ? {} : { applyTextNormalization: normalize }),
       ...(language == null ? {} : { languageCode: language }),
@@ -230,7 +238,10 @@ function buildTalkSpeakOverrides(
   }
 
   if (provider === "microsoft") {
-    overrides.microsoft = voiceId == null ? undefined : { voice: voiceId };
+    overrides.microsoft = {
+      ...(voiceId == null ? {} : { voice: voiceId }),
+      ...(outputFormat == null ? {} : { outputFormat }),
+    };
   }
 
   return overrides;
diff --git a/src/gateway/server.talk-config.test.ts b/src/gateway/server.talk-config.test.ts
index eb2925db158..6433445795f 100644
--- a/src/gateway/server.talk-config.test.ts
+++ b/src/gateway/server.talk-config.test.ts
@@ -301,4 +301,51 @@ describe("gateway talk.config", () => {
       globalThis.fetch = originalFetch;
     }
   });
+
+  it("resolves talk voice aliases case-insensitively and forwards output format", async () => {
+    const { writeConfigFile } = await import("../config/config.js");
+    await writeConfigFile({
+      talk: {
+        provider: "elevenlabs",
+        providers: {
+          elevenlabs: {
+            apiKey: "elevenlabs-talk-key", // pragma: allowlist secret
+            voiceId: "voice-default",
+            voiceAliases: {
+              Clawd: "EXAVITQu4vr4xnSDxMaL",
+            },
+          },
+        },
+      },
+    });
+
+    const originalFetch = globalThis.fetch;
+    let fetchUrl: string | undefined;
+    const fetchMock = vi.fn(async (input: RequestInfo | URL) => {
+      fetchUrl = typeof input === "string" ? input : input instanceof URL ? input.href : input.url;
+      return new Response(new Uint8Array([4, 5, 6]), { status: 200 });
+    });
+    globalThis.fetch = fetchMock as typeof fetch;
+
+    try {
+      await withServer(async (ws) => {
+        await connectOperator(ws, ["operator.read", "operator.write"]);
+        const res = await fetchTalkSpeak(ws, {
+          text: "Hello from talk mode.",
+          voiceId: "clawd",
+          outputFormat: "pcm_44100",
+        });
+        expect(res.ok).toBe(true);
+        expect(res.payload?.provider).toBe("elevenlabs");
+        expect(res.payload?.outputFormat).toBe("pcm_44100");
+        expect(res.payload?.audioBase64).toBe(Buffer.from([4, 5, 6]).toString("base64"));
+      });
+
+      expect(fetchMock).toHaveBeenCalled();
+      expect(fetchUrl).toContain("/v1/text-to-speech/EXAVITQu4vr4xnSDxMaL");
+      expect(fetchUrl).toContain("output_format=pcm_44100");
+    } finally {
+      globalThis.fetch = originalFetch;
+    }
+  });
 });
diff --git a/src/tts/providers/elevenlabs.ts b/src/tts/providers/elevenlabs.ts
index c22425926bf..99097fc42f3 100644
--- a/src/tts/providers/elevenlabs.ts
+++ b/src/tts/providers/elevenlabs.ts
@@ -72,7 +72,9 @@ export function buildElevenLabsSpeechProvider(): SpeechProviderPlugin {
       if (!apiKey) {
         throw new Error("ElevenLabs API key missing");
       }
-      const outputFormat = req.target === "voice-note" ? "opus_48000_64" : "mp3_44100_128";
+      const outputFormat =
+        req.overrides?.elevenlabs?.outputFormat ??
+        (req.target === "voice-note" ? "opus_48000_64" : "mp3_44100_128");
       const audioBuffer = await elevenLabsTTS({
         text: req.text,
         apiKey,
diff --git a/src/tts/providers/microsoft.ts b/src/tts/providers/microsoft.ts
index ba2511e4de6..f6c5aa8c379 100644
--- a/src/tts/providers/microsoft.ts
+++ b/src/tts/providers/microsoft.ts
@@ -83,7 +83,7 @@ export function buildMicrosoftSpeechProvider(): SpeechProviderPlugin {
       const tempRoot = resolvePreferredOpenClawTmpDir();
       mkdirSync(tempRoot, { recursive: true, mode: 0o700 });
       const tempDir = mkdtempSync(path.join(tempRoot, "tts-microsoft-"));
-      let outputFormat = req.config.edge.outputFormat;
+      let outputFormat = req.overrides?.microsoft?.outputFormat ?? req.config.edge.outputFormat;
       const fallbackOutputFormat =
         outputFormat !== DEFAULT_EDGE_OUTPUT_FORMAT ? DEFAULT_EDGE_OUTPUT_FORMAT : undefined;
 
diff --git a/src/tts/tts.ts b/src/tts/tts.ts
index c64dda83909..17a7c2fc981 100644
--- a/src/tts/tts.ts
+++ b/src/tts/tts.ts
@@ -167,6 +167,7 @@ export type TtsDirectiveOverrides = {
   elevenlabs?: {
     voiceId?: string;
     modelId?: string;
+    outputFormat?: string;
     seed?: number;
     applyTextNormalization?: "auto" | "on" | "off";
     languageCode?: string;
@@ -174,6 +175,7 @@ export type TtsDirectiveOverrides = {
   };
   microsoft?: {
     voice?: string;
+    outputFormat?: string;
   };
 };
 

From 61965e500f93b039d21b9dbca34b320ed23dc704 Mon Sep 17 00:00:00 2001
From: Ayaan Zaidi <hi@obviy.us>
Date: Fri, 20 Mar 2026 10:56:18 +0530
Subject: [PATCH 09/11] fix: route Android Talk synthesis through the gateway
 (#50849)

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 37ff9e33f36..553fab9d3a8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -45,6 +45,7 @@ Docs: https://docs.openclaw.ai
 - Plugins/context engines: expose `delegateCompactionToRuntime(...)` on the public plugin SDK, refactor the legacy engine to use the shared helper, and clarify `ownsCompaction` delegation semantics for non-owning engines. (#49061) Thanks @jalehman.
 - Plugins/MiniMax: add MiniMax-M2.7 and MiniMax-M2.7-highspeed models and update the default model from M2.5 to M2.7. (#49691) Thanks @liyuan97.
 - Plugins/Xiaomi: switch the bundled Xiaomi provider to the `/v1` OpenAI-compatible endpoint and add MiMo V2 Pro plus MiMo V2 Omni to the built-in catalog. (#49214) thanks @DJjjjhao.
+- Android/Talk: move Talk speech synthesis behind gateway `talk.speak`, keep Talk secrets on the gateway, and switch Android playback to final-response audio instead of device-local ElevenLabs streaming. (#50849)
 - Plugins/Matrix: add `allowBots` room policy so configured Matrix bot accounts can talk to each other, with optional mention-only gating. Thanks @gumadeiras.
 - Plugins/Matrix: add per-account `allowPrivateNetwork` opt-in for private/internal homeservers, while keeping public cleartext homeservers blocked. Thanks @gumadeiras.
 - Web tools/Tavily: add Tavily as a bundled web-search provider with dedicated `tavily_search` and `tavily_extract` tools, using canonical plugin-owned config under `plugins.entries.tavily.config.webSearch.*`. (#49200) thanks @lakshyaag-tavily.

From 2afd65741cdaa4808f43b11a0947a8f1fe6fe257 Mon Sep 17 00:00:00 2001
From: Ayaan Zaidi <hi@obviy.us>
Date: Fri, 20 Mar 2026 11:07:13 +0530
Subject: [PATCH 10/11] fix: preserve talk provider and speaking state

---
 .../ai/openclaw/app/voice/TalkModeManager.kt  |  2 +-
 src/gateway/server-methods/talk.ts            |  2 -
 src/gateway/server.talk-config.test.ts        | 52 +++++++++++++++++++
 3 files changed, 53 insertions(+), 3 deletions(-)

diff --git a/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeManager.kt b/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeManager.kt
index d4433d72a9c..2a82588b46b 100644
--- a/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeManager.kt
+++ b/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeManager.kt
@@ -748,7 +748,7 @@ class TalkModeManager(
 
   private suspend fun playGatewaySpeech(speech: GatewayTalkSpeech, playbackToken: Long) {
     ensurePlaybackActive(playbackToken)
-    stopSpeaking(resetInterrupt = false)
+    cleanupPlayer()
     ensurePlaybackActive(playbackToken)
 
     val audioBytes =
diff --git a/src/gateway/server-methods/talk.ts b/src/gateway/server-methods/talk.ts
index acbede0b33d..3930dc4c4ca 100644
--- a/src/gateway/server-methods/talk.ts
+++ b/src/gateway/server-methods/talk.ts
@@ -171,8 +171,6 @@ function buildTalkTtsConfig(
       ...(proxy == null ? {} : { proxy }),
       ...(timeoutMs == null ? {} : { timeoutMs }),
     };
-  } else {
-    return { error: `talk.speak unavailable: unsupported talk provider '${resolved.provider}'` };
   }
 
   return {
diff --git a/src/gateway/server.talk-config.test.ts b/src/gateway/server.talk-config.test.ts
index 6433445795f..1dccbfab5c6 100644
--- a/src/gateway/server.talk-config.test.ts
+++ b/src/gateway/server.talk-config.test.ts
@@ -6,6 +6,8 @@ import {
   publicKeyRawBase64UrlFromPem,
   signDevicePayload,
 } from "../infra/device-identity.js";
+import { createEmptyPluginRegistry } from "../plugins/registry-empty.js";
+import { getActivePluginRegistry, setActivePluginRegistry } from "../plugins/runtime.js";
 import { withEnvAsync } from "../test-utils/env.js";
 import { buildDeviceAuthPayload } from "./device-auth.js";
 import { validateTalkConfigResult } from "./protocol/index.js";
@@ -348,4 +350,54 @@ describe("gateway talk.config", () => {
       globalThis.fetch = originalFetch;
     }
   });
+
+  it("allows extension speech providers through talk.speak", async () => {
+    const { writeConfigFile } = await import("../config/config.js");
+    await writeConfigFile({
+      talk: {
+        provider: "acme",
+        providers: {
+          acme: {
+            voiceId: "plugin-voice",
+          },
+        },
+      },
+    });
+
+    const previousRegistry = getActivePluginRegistry() ?? createEmptyPluginRegistry();
+    setActivePluginRegistry({
+      ...createEmptyPluginRegistry(),
+      speechProviders: [
+        {
+          pluginId: "acme-plugin",
+          source: "test",
+          provider: {
+            id: "acme",
+            label: "Acme Speech",
+            isConfigured: () => true,
+            synthesize: async () => ({
+              audioBuffer: Buffer.from([7, 8, 9]),
+              outputFormat: "mp3",
+              fileExtension: ".mp3",
+              voiceCompatible: false,
+            }),
+          },
+        },
+      ],
+    });
+
+    try {
+      await withServer(async (ws) => {
+        await connectOperator(ws, ["operator.read", "operator.write"]);
+        const res = await fetchTalkSpeak(ws, {
+          text: "Hello from plugin talk mode.",
+        });
+        expect(res.ok).toBe(true);
+        expect(res.payload?.provider).toBe("acme");
+        expect(res.payload?.audioBase64).toBe(Buffer.from([7, 8, 9]).toString("base64"));
+      });
+    } finally {
+      setActivePluginRegistry(previousRegistry);
+    }
+  });
 });

From a73e517ae3b8fc1f6c1ab48c2a98274eb36accb9 Mon Sep 17 00:00:00 2001
From: Ayaan Zaidi <hi@obviy.us>
Date: Fri, 20 Mar 2026 11:12:53 +0530
Subject: [PATCH 11/11] build(protocol): regenerate swift talk models

---
 .../OpenClawProtocol/GatewayModels.swift      | 92 +++++++++++++++++++
 .../OpenClawProtocol/GatewayModels.swift      | 92 +++++++++++++++++++
 2 files changed, 184 insertions(+)

diff --git a/apps/macos/Sources/OpenClawProtocol/GatewayModels.swift b/apps/macos/Sources/OpenClawProtocol/GatewayModels.swift
index 6f97c9bf9f1..0b1d7b13e01 100644
--- a/apps/macos/Sources/OpenClawProtocol/GatewayModels.swift
+++ b/apps/macos/Sources/OpenClawProtocol/GatewayModels.swift
@@ -2012,6 +2012,98 @@ public struct TalkConfigResult: Codable, Sendable {
     }
 }
 
+public struct TalkSpeakParams: Codable, Sendable {
+    public let text: String
+    public let voiceid: String?
+    public let modelid: String?
+    public let outputformat: String?
+    public let speed: Double?
+    public let stability: Double?
+    public let similarity: Double?
+    public let style: Double?
+    public let speakerboost: Bool?
+    public let seed: Int?
+    public let normalize: String?
+    public let language: String?
+
+    public init(
+        text: String,
+        voiceid: String?,
+        modelid: String?,
+        outputformat: String?,
+        speed: Double?,
+        stability: Double?,
+        similarity: Double?,
+        style: Double?,
+        speakerboost: Bool?,
+        seed: Int?,
+        normalize: String?,
+        language: String?)
+    {
+        self.text = text
+        self.voiceid = voiceid
+        self.modelid = modelid
+        self.outputformat = outputformat
+        self.speed = speed
+        self.stability = stability
+        self.similarity = similarity
+        self.style = style
+        self.speakerboost = speakerboost
+        self.seed = seed
+        self.normalize = normalize
+        self.language = language
+    }
+
+    private enum CodingKeys: String, CodingKey {
+        case text
+        case voiceid = "voiceId"
+        case modelid = "modelId"
+        case outputformat = "outputFormat"
+        case speed
+        case stability
+        case similarity
+        case style
+        case speakerboost = "speakerBoost"
+        case seed
+        case normalize
+        case language
+    }
+}
+
+public struct TalkSpeakResult: Codable, Sendable {
+    public let audiobase64: String
+    public let provider: String
+    public let outputformat: String?
+    public let voicecompatible: Bool?
+    public let mimetype: String?
+    public let fileextension: String?
+
+    public init(
+        audiobase64: String,
+        provider: String,
+        outputformat: String?,
+        voicecompatible: Bool?,
+        mimetype: String?,
+        fileextension: String?)
+    {
+        self.audiobase64 = audiobase64
+        self.provider = provider
+        self.outputformat = outputformat
+        self.voicecompatible = voicecompatible
+        self.mimetype = mimetype
+        self.fileextension = fileextension
+    }
+
+    private enum CodingKeys: String, CodingKey {
+        case audiobase64 = "audioBase64"
+        case provider
+        case outputformat = "outputFormat"
+        case voicecompatible = "voiceCompatible"
+        case mimetype = "mimeType"
+        case fileextension = "fileExtension"
+    }
+}
+
 public struct ChannelsStatusParams: Codable, Sendable {
     public let probe: Bool?
     public let timeoutms: Int?
diff --git a/apps/shared/OpenClawKit/Sources/OpenClawProtocol/GatewayModels.swift b/apps/shared/OpenClawKit/Sources/OpenClawProtocol/GatewayModels.swift
index 6f97c9bf9f1..0b1d7b13e01 100644
--- a/apps/shared/OpenClawKit/Sources/OpenClawProtocol/GatewayModels.swift
+++ b/apps/shared/OpenClawKit/Sources/OpenClawProtocol/GatewayModels.swift
@@ -2012,6 +2012,98 @@ public struct TalkConfigResult: Codable, Sendable {
     }
 }
 
+public struct TalkSpeakParams: Codable, Sendable {
+    public let text: String
+    public let voiceid: String?
+    public let modelid: String?
+    public let outputformat: String?
+    public let speed: Double?
+    public let stability: Double?
+    public let similarity: Double?
+    public let style: Double?
+    public let speakerboost: Bool?
+    public let seed: Int?
+    public let normalize: String?
+    public let language: String?
+
+    public init(
+        text: String,
+        voiceid: String?,
+        modelid: String?,
+        outputformat: String?,
+        speed: Double?,
+        stability: Double?,
+        similarity: Double?,
+        style: Double?,
+        speakerboost: Bool?,
+        seed: Int?,
+        normalize: String?,
+        language: String?)
+    {
+        self.text = text
+        self.voiceid = voiceid
+        self.modelid = modelid
+        self.outputformat = outputformat
+        self.speed = speed
+        self.stability = stability
+        self.similarity = similarity
+        self.style = style
+        self.speakerboost = speakerboost
+        self.seed = seed
+        self.normalize = normalize
+        self.language = language
+    }
+
+    private enum CodingKeys: String, CodingKey {
+        case text
+        case voiceid = "voiceId"
+        case modelid = "modelId"
+        case outputformat = "outputFormat"
+        case speed
+        case stability
+        case similarity
+        case style
+        case speakerboost = "speakerBoost"
+        case seed
+        case normalize
+        case language
+    }
+}
+
+public struct TalkSpeakResult: Codable, Sendable {
+    public let audiobase64: String
+    public let provider: String
+    public let outputformat: String?
+    public let voicecompatible: Bool?
+    public let mimetype: String?
+    public let fileextension: String?
+
+    public init(
+        audiobase64: String,
+        provider: String,
+        outputformat: String?,
+        voicecompatible: Bool?,
+        mimetype: String?,
+        fileextension: String?)
+    {
+        self.audiobase64 = audiobase64
+        self.provider = provider
+        self.outputformat = outputformat
+        self.voicecompatible = voicecompatible
+        self.mimetype = mimetype
+        self.fileextension = fileextension
+    }
+
+    private enum CodingKeys: String, CodingKey {
+        case audiobase64 = "audioBase64"
+        case provider
+        case outputformat = "outputFormat"
+        case voicecompatible = "voiceCompatible"
+        case mimetype = "mimeType"
+        case fileextension = "fileExtension"
+    }
+}
+
 public struct ChannelsStatusParams: Codable, Sendable {
     public let probe: Bool?
     public let timeoutms: Int?