From 089c8bc65e4bf914bdd633d6ecbff0550b530359 Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Thu, 19 Mar 2026 10:42:46 -0700 Subject: [PATCH] =?UTF-8?q?docs:=20Phase=203=20IA=20restructure=20?= =?UTF-8?q?=E2=80=94=20move=20pi=20to=20Reference,=20merge=20Models=20grou?= =?UTF-8?q?ps,=20move=20install/node=20to=20Install,=20move=20prose=20to?= =?UTF-8?q?=20Skills,=20migrate=20brave-search/perplexity/tts=20into=20too?= =?UTF-8?q?ls/?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/docs.json | 47 +-- docs/providers/perplexity-provider.md | 2 +- docs/tools/brave-search.md | 93 ++++++ docs/tools/perplexity-search.md | 174 +++++++++++ docs/tools/slash-commands.md | 2 +- docs/tools/tts.md | 406 ++++++++++++++++++++++++++ docs/tools/web.md | 2 +- 7 files changed, 700 insertions(+), 26 deletions(-) create mode 100644 docs/tools/brave-search.md create mode 100644 docs/tools/perplexity-search.md create mode 100644 docs/tools/tts.md diff --git a/docs/docs.json b/docs/docs.json index 772a8a476cd..1b19bc564b2 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -51,6 +51,18 @@ "source": "/start/quickstart", "destination": "/start/getting-started" }, + { + "source": "/brave-search", + "destination": "/tools/brave-search" + }, + { + "source": "/perplexity", + "destination": "/tools/perplexity-search" + }, + { + "source": "/tts", + "destination": "/tools/tts" + }, { "source": "/messages", "destination": "/concepts/messages" @@ -864,7 +876,7 @@ "groups": [ { "group": "Install overview", - "pages": ["install/index", "install/installer"] + "pages": ["install/index", "install/installer", "install/node"] }, { "group": "Other install methods", @@ -956,7 +968,6 @@ { "group": "Fundamentals", "pages": [ - "pi", "concepts/architecture", "concepts/agent", "concepts/agent-loop", @@ -964,13 +975,10 @@ "concepts/context", "concepts/context-engine", "concepts/agent-workspace", - "concepts/oauth" + "concepts/oauth", + "start/bootstrapping" ] }, - { - "group": "Bootstrapping", - "pages": ["start/bootstrapping"] - }, { "group": "Sessions and memory", "pages": [ @@ -1007,7 +1015,7 @@ "group": "Built-in tools", "pages": [ "tools/apply-patch", - "brave-search", + "tools/brave-search", "tools/btw", "tools/diffs", "tools/elevated", @@ -1018,7 +1026,7 @@ "tools/lobster", "tools/loop-detection", "tools/pdf", - "perplexity", + "tools/perplexity-search", "tools/reactions", "tools/thinking", "tools/web" @@ -1050,7 +1058,8 @@ "tools/skills", "tools/skills-config", "tools/clawhub", - "tools/plugin" + "tools/plugin", + "prose" ] }, { @@ -1064,8 +1073,7 @@ "plugins/zalouser", "plugins/manifest", "plugins/agent-tools", - "tools/capability-cookbook", - "prose" + "tools/capability-cookbook" ] }, { @@ -1093,7 +1101,7 @@ "nodes/talk", "nodes/voicewake", "nodes/location-command", - "tts" + "tools/tts" ] } ] @@ -1106,12 +1114,8 @@ "pages": ["providers/index", "providers/models"] }, { - "group": "Model concepts", - "pages": ["concepts/models"] - }, - { - "group": "Configuration", - "pages": ["concepts/model-providers", "concepts/model-failover"] + "group": "Concepts and configuration", + "pages": ["concepts/models", "concepts/model-providers", "concepts/model-failover"] }, { "group": "Providers", @@ -1351,6 +1355,7 @@ { "group": "Technical reference", "pages": [ + "pi", "reference/wizard", "reference/token-use", "reference/secretref-credential-surface", @@ -1402,10 +1407,6 @@ "diagnostics/flags" ] }, - { - "group": "Node runtime", - "pages": ["install/node"] - }, { "group": "Compaction internals", "pages": ["reference/session-management-compaction"] diff --git a/docs/providers/perplexity-provider.md b/docs/providers/perplexity-provider.md index 63880385353..c93475efdd3 100644 --- a/docs/providers/perplexity-provider.md +++ b/docs/providers/perplexity-provider.md @@ -13,7 +13,7 @@ Search API or Perplexity Sonar via OpenRouter. This page covers the Perplexity **provider** setup. For the Perplexity -**tool** (how the agent uses it), see [Perplexity tool](/perplexity). +**tool** (how the agent uses it), see [Perplexity tool](/tools/perplexity-search). - Type: web search provider (not a model provider) diff --git a/docs/tools/brave-search.md b/docs/tools/brave-search.md new file mode 100644 index 00000000000..12cd78c358f --- /dev/null +++ b/docs/tools/brave-search.md @@ -0,0 +1,93 @@ +--- +summary: "Brave Search API setup for web_search" +read_when: + - You want to use Brave Search for web_search + - You need a BRAVE_API_KEY or plan details +title: "Brave Search" +--- + +# Brave Search API + +OpenClaw supports Brave Search API as a `web_search` provider. + +## Get an API key + +1. Create a Brave Search API account at [https://brave.com/search/api/](https://brave.com/search/api/) +2. In the dashboard, choose the **Search** plan and generate an API key. +3. Store the key in config or set `BRAVE_API_KEY` in the Gateway environment. + +## Config example + +```json5 +{ + plugins: { + entries: { + brave: { + config: { + webSearch: { + apiKey: "BRAVE_API_KEY_HERE", + }, + }, + }, + }, + }, + tools: { + web: { + search: { + provider: "brave", + maxResults: 5, + timeoutSeconds: 30, + }, + }, + }, +} +``` + +Provider-specific Brave search settings now live under `plugins.entries.brave.config.webSearch.*`. +Legacy `tools.web.search.apiKey` still loads through the compatibility shim, but it is no longer the canonical config path. + +## Tool parameters + +| Parameter | Description | +| ------------- | ------------------------------------------------------------------- | +| `query` | Search query (required) | +| `count` | Number of results to return (1-10, default: 5) | +| `country` | 2-letter ISO country code (e.g., "US", "DE") | +| `language` | ISO 639-1 language code for search results (e.g., "en", "de", "fr") | +| `ui_lang` | ISO language code for UI elements | +| `freshness` | Time filter: `day` (24h), `week`, `month`, or `year` | +| `date_after` | Only results published after this date (YYYY-MM-DD) | +| `date_before` | Only results published before this date (YYYY-MM-DD) | + +**Examples:** + +```javascript +// Country and language-specific search +await web_search({ + query: "renewable energy", + country: "DE", + language: "de", +}); + +// Recent results (past week) +await web_search({ + query: "AI news", + freshness: "week", +}); + +// Date range search +await web_search({ + query: "AI developments", + date_after: "2024-01-01", + date_before: "2024-06-30", +}); +``` + +## Notes + +- OpenClaw uses the Brave **Search** plan. If you have a legacy subscription (e.g. the original Free plan with 2,000 queries/month), it remains valid but does not include newer features like LLM Context or higher rate limits. +- Each Brave plan includes **\$5/month in free credit** (renewing). The Search plan costs \$5 per 1,000 requests, so the credit covers 1,000 queries/month. Set your usage limit in the Brave dashboard to avoid unexpected charges. See the [Brave API portal](https://brave.com/search/api/) for current plans. +- The Search plan includes the LLM Context endpoint and AI inference rights. Storing results to train or tune models requires a plan with explicit storage rights. See the Brave [Terms of Service](https://api-dashboard.search.brave.com/terms-of-service). +- Results are cached for 15 minutes by default (configurable via `cacheTtlMinutes`). + +See [Web tools](/tools/web) for the full web_search configuration. diff --git a/docs/tools/perplexity-search.md b/docs/tools/perplexity-search.md new file mode 100644 index 00000000000..3ad4c50c3f7 --- /dev/null +++ b/docs/tools/perplexity-search.md @@ -0,0 +1,174 @@ +--- +summary: "Perplexity Search API and Sonar/OpenRouter compatibility for web_search" +read_when: + - You want to use Perplexity Search for web search + - You need PERPLEXITY_API_KEY or OPENROUTER_API_KEY setup +title: "Perplexity Search" +--- + +# Perplexity Search API + +OpenClaw supports Perplexity Search API as a `web_search` provider. +It returns structured results with `title`, `url`, and `snippet` fields. + +For compatibility, OpenClaw also supports legacy Perplexity Sonar/OpenRouter setups. +If you use `OPENROUTER_API_KEY`, an `sk-or-...` key in `plugins.entries.perplexity.config.webSearch.apiKey`, or set `plugins.entries.perplexity.config.webSearch.baseUrl` / `model`, the provider switches to the chat-completions path and returns AI-synthesized answers with citations instead of structured Search API results. + +## Getting a Perplexity API key + +1. Create a Perplexity account at [perplexity.ai/settings/api](https://www.perplexity.ai/settings/api) +2. Generate an API key in the dashboard +3. Store the key in config or set `PERPLEXITY_API_KEY` in the Gateway environment. + +## OpenRouter compatibility + +If you were already using OpenRouter for Perplexity Sonar, keep `provider: "perplexity"` and set `OPENROUTER_API_KEY` in the Gateway environment, or store an `sk-or-...` key in `plugins.entries.perplexity.config.webSearch.apiKey`. + +Optional compatibility controls: + +- `plugins.entries.perplexity.config.webSearch.baseUrl` +- `plugins.entries.perplexity.config.webSearch.model` + +## Config examples + +### Native Perplexity Search API + +```json5 +{ + plugins: { + entries: { + perplexity: { + config: { + webSearch: { + apiKey: "pplx-...", + }, + }, + }, + }, + }, + tools: { + web: { + search: { + provider: "perplexity", + }, + }, + }, +} +``` + +### OpenRouter / Sonar compatibility + +```json5 +{ + plugins: { + entries: { + perplexity: { + config: { + webSearch: { + apiKey: "", + baseUrl: "https://openrouter.ai/api/v1", + model: "perplexity/sonar-pro", + }, + }, + }, + }, + }, + tools: { + web: { + search: { + provider: "perplexity", + }, + }, + }, +} +``` + +## Where to set the key + +**Via config:** run `openclaw configure --section web`. It stores the key in +`~/.openclaw/openclaw.json` under `plugins.entries.perplexity.config.webSearch.apiKey`. +That field also accepts SecretRef objects. + +**Via environment:** set `PERPLEXITY_API_KEY` or `OPENROUTER_API_KEY` +in the Gateway process environment. For a gateway install, put it in +`~/.openclaw/.env` (or your service environment). See [Env vars](/help/faq#how-does-openclaw-load-environment-variables). + +If `provider: "perplexity"` is configured and the Perplexity key SecretRef is unresolved with no env fallback, startup/reload fails fast. + +## Tool parameters + +These parameters apply to the native Perplexity Search API path. + +| Parameter | Description | +| --------------------- | ---------------------------------------------------- | +| `query` | Search query (required) | +| `count` | Number of results to return (1-10, default: 5) | +| `country` | 2-letter ISO country code (e.g., "US", "DE") | +| `language` | ISO 639-1 language code (e.g., "en", "de", "fr") | +| `freshness` | Time filter: `day` (24h), `week`, `month`, or `year` | +| `date_after` | Only results published after this date (YYYY-MM-DD) | +| `date_before` | Only results published before this date (YYYY-MM-DD) | +| `domain_filter` | Domain allowlist/denylist array (max 20) | +| `max_tokens` | Total content budget (default: 25000, max: 1000000) | +| `max_tokens_per_page` | Per-page token limit (default: 2048) | + +For the legacy Sonar/OpenRouter compatibility path, only `query` and `freshness` are supported. +Search API-only filters such as `country`, `language`, `date_after`, `date_before`, `domain_filter`, `max_tokens`, and `max_tokens_per_page` return explicit errors. + +**Examples:** + +```javascript +// Country and language-specific search +await web_search({ + query: "renewable energy", + country: "DE", + language: "de", +}); + +// Recent results (past week) +await web_search({ + query: "AI news", + freshness: "week", +}); + +// Date range search +await web_search({ + query: "AI developments", + date_after: "2024-01-01", + date_before: "2024-06-30", +}); + +// Domain filtering (allowlist) +await web_search({ + query: "climate research", + domain_filter: ["nature.com", "science.org", ".edu"], +}); + +// Domain filtering (denylist - prefix with -) +await web_search({ + query: "product reviews", + domain_filter: ["-reddit.com", "-pinterest.com"], +}); + +// More content extraction +await web_search({ + query: "detailed AI research", + max_tokens: 50000, + max_tokens_per_page: 4096, +}); +``` + +### Domain filter rules + +- Maximum 20 domains per filter +- Cannot mix allowlist and denylist in the same request +- Use `-` prefix for denylist entries (e.g., `["-reddit.com"]`) + +## Notes + +- Perplexity Search API returns structured web search results (`title`, `url`, `snippet`) +- OpenRouter or explicit `plugins.entries.perplexity.config.webSearch.baseUrl` / `model` switches Perplexity back to Sonar chat completions for compatibility +- Results are cached for 15 minutes by default (configurable via `cacheTtlMinutes`) + +See [Web tools](/tools/web) for the full web_search configuration. +See [Perplexity Search API docs](https://docs.perplexity.ai/docs/search/quickstart) for more details. diff --git a/docs/tools/slash-commands.md b/docs/tools/slash-commands.md index c62612d312b..0910931b660 100644 --- a/docs/tools/slash-commands.md +++ b/docs/tools/slash-commands.md @@ -98,7 +98,7 @@ Text + native (when enabled): - `/plugins list|show|get|enable|disable` (inspect discovered plugins and toggle enablement, owner-only for writes; requires `commands.plugins: true`) - `/debug show|set|unset|reset` (runtime overrides, owner-only; requires `commands.debug: true`) - `/usage off|tokens|full|cost` (per-response usage footer or local cost summary) -- `/tts off|always|inbound|tagged|status|provider|limit|summary|audio` (control TTS; see [/tts](/tts)) +- `/tts off|always|inbound|tagged|status|provider|limit|summary|audio` (control TTS; see [/tts](/tools/tts)) - Discord: native command is `/voice` (Discord reserves `/tts`); text `/tts` still works. - `/stop` - `/restart` diff --git a/docs/tools/tts.md b/docs/tools/tts.md new file mode 100644 index 00000000000..4fe0da77e0a --- /dev/null +++ b/docs/tools/tts.md @@ -0,0 +1,406 @@ +--- +summary: "Text-to-speech (TTS) for outbound replies" +read_when: + - Enabling text-to-speech for replies + - Configuring TTS providers or limits + - Using /tts commands +title: "Text-to-Speech" +--- + +# Text-to-speech (TTS) + +OpenClaw can convert outbound replies into audio using ElevenLabs, Microsoft, or OpenAI. +It works anywhere OpenClaw can send audio; Telegram gets a round voice-note bubble. + +## Supported services + +- **ElevenLabs** (primary or fallback provider) +- **Microsoft** (primary or fallback provider; current bundled implementation uses `node-edge-tts`, default when no API keys) +- **OpenAI** (primary or fallback provider; also used for summaries) + +### Microsoft speech notes + +The bundled Microsoft speech provider currently uses Microsoft Edge's online +neural TTS service via the `node-edge-tts` library. It's a hosted service (not +local), uses Microsoft endpoints, and does not require an API key. +`node-edge-tts` exposes speech configuration options and output formats, but +not all options are supported by the service. Legacy config and directive input +using `edge` still works and is normalized to `microsoft`. + +Because this path is a public web service without a published SLA or quota, +treat it as best-effort. If you need guaranteed limits and support, use OpenAI +or ElevenLabs. + +## Optional keys + +If you want OpenAI or ElevenLabs: + +- `ELEVENLABS_API_KEY` (or `XI_API_KEY`) +- `OPENAI_API_KEY` + +Microsoft speech does **not** require an API key. If no API keys are found, +OpenClaw defaults to Microsoft (unless disabled via +`messages.tts.microsoft.enabled=false` or `messages.tts.edge.enabled=false`). + +If multiple providers are configured, the selected provider is used first and the others are fallback options. +Auto-summary uses the configured `summaryModel` (or `agents.defaults.model.primary`), +so that provider must also be authenticated if you enable summaries. + +## Service links + +- [OpenAI Text-to-Speech guide](https://platform.openai.com/docs/guides/text-to-speech) +- [OpenAI Audio API reference](https://platform.openai.com/docs/api-reference/audio) +- [ElevenLabs Text to Speech](https://elevenlabs.io/docs/api-reference/text-to-speech) +- [ElevenLabs Authentication](https://elevenlabs.io/docs/api-reference/authentication) +- [node-edge-tts](https://github.com/SchneeHertz/node-edge-tts) +- [Microsoft Speech output formats](https://learn.microsoft.com/azure/ai-services/speech-service/rest-text-to-speech#audio-outputs) + +## Is it enabled by default? + +No. Auto‑TTS is **off** by default. Enable it in config with +`messages.tts.auto` or per session with `/tts always` (alias: `/tts on`). + +Microsoft speech **is** enabled by default once TTS is on, and is used automatically +when no OpenAI or ElevenLabs API keys are available. + +## Config + +TTS config lives under `messages.tts` in `openclaw.json`. +Full schema is in [Gateway configuration](/gateway/configuration). + +### Minimal config (enable + provider) + +```json5 +{ + messages: { + tts: { + auto: "always", + provider: "elevenlabs", + }, + }, +} +``` + +### OpenAI primary with ElevenLabs fallback + +```json5 +{ + messages: { + tts: { + auto: "always", + provider: "openai", + summaryModel: "openai/gpt-4.1-mini", + modelOverrides: { + enabled: true, + }, + openai: { + apiKey: "openai_api_key", + baseUrl: "https://api.openai.com/v1", + model: "gpt-4o-mini-tts", + voice: "alloy", + }, + elevenlabs: { + apiKey: "elevenlabs_api_key", + baseUrl: "https://api.elevenlabs.io", + voiceId: "voice_id", + modelId: "eleven_multilingual_v2", + seed: 42, + applyTextNormalization: "auto", + languageCode: "en", + voiceSettings: { + stability: 0.5, + similarityBoost: 0.75, + style: 0.0, + useSpeakerBoost: true, + speed: 1.0, + }, + }, + }, + }, +} +``` + +### Microsoft primary (no API key) + +```json5 +{ + messages: { + tts: { + auto: "always", + provider: "microsoft", + microsoft: { + enabled: true, + voice: "en-US-MichelleNeural", + lang: "en-US", + outputFormat: "audio-24khz-48kbitrate-mono-mp3", + rate: "+10%", + pitch: "-5%", + }, + }, + }, +} +``` + +### Disable Microsoft speech + +```json5 +{ + messages: { + tts: { + microsoft: { + enabled: false, + }, + }, + }, +} +``` + +### Custom limits + prefs path + +```json5 +{ + messages: { + tts: { + auto: "always", + maxTextLength: 4000, + timeoutMs: 30000, + prefsPath: "~/.openclaw/settings/tts.json", + }, + }, +} +``` + +### Only reply with audio after an inbound voice note + +```json5 +{ + messages: { + tts: { + auto: "inbound", + }, + }, +} +``` + +### Disable auto-summary for long replies + +```json5 +{ + messages: { + tts: { + auto: "always", + }, + }, +} +``` + +Then run: + +``` +/tts summary off +``` + +### Notes on fields + +- `auto`: auto‑TTS mode (`off`, `always`, `inbound`, `tagged`). + - `inbound` only sends audio after an inbound voice note. + - `tagged` only sends audio when the reply includes `[[tts]]` tags. +- `enabled`: legacy toggle (doctor migrates this to `auto`). +- `mode`: `"final"` (default) or `"all"` (includes tool/block replies). +- `provider`: speech provider id such as `"elevenlabs"`, `"microsoft"`, or `"openai"` (fallback is automatic). +- If `provider` is **unset**, OpenClaw prefers `openai` (if key), then `elevenlabs` (if key), + otherwise `microsoft`. +- Legacy `provider: "edge"` still works and is normalized to `microsoft`. +- `summaryModel`: optional cheap model for auto-summary; defaults to `agents.defaults.model.primary`. + - Accepts `provider/model` or a configured model alias. +- `modelOverrides`: allow the model to emit TTS directives (on by default). + - `allowProvider` defaults to `false` (provider switching is opt-in). +- `maxTextLength`: hard cap for TTS input (chars). `/tts audio` fails if exceeded. +- `timeoutMs`: request timeout (ms). +- `prefsPath`: override the local prefs JSON path (provider/limit/summary). +- `apiKey` values fall back to env vars (`ELEVENLABS_API_KEY`/`XI_API_KEY`, `OPENAI_API_KEY`). +- `elevenlabs.baseUrl`: override ElevenLabs API base URL. +- `openai.baseUrl`: override the OpenAI TTS endpoint. + - Resolution order: `messages.tts.openai.baseUrl` -> `OPENAI_TTS_BASE_URL` -> `https://api.openai.com/v1` + - Non-default values are treated as OpenAI-compatible TTS endpoints, so custom model and voice names are accepted. +- `elevenlabs.voiceSettings`: + - `stability`, `similarityBoost`, `style`: `0..1` + - `useSpeakerBoost`: `true|false` + - `speed`: `0.5..2.0` (1.0 = normal) +- `elevenlabs.applyTextNormalization`: `auto|on|off` +- `elevenlabs.languageCode`: 2-letter ISO 639-1 (e.g. `en`, `de`) +- `elevenlabs.seed`: integer `0..4294967295` (best-effort determinism) +- `microsoft.enabled`: allow Microsoft speech usage (default `true`; no API key). +- `microsoft.voice`: Microsoft neural voice name (e.g. `en-US-MichelleNeural`). +- `microsoft.lang`: language code (e.g. `en-US`). +- `microsoft.outputFormat`: Microsoft output format (e.g. `audio-24khz-48kbitrate-mono-mp3`). + - See Microsoft Speech output formats for valid values; not all formats are supported by the bundled Edge-backed transport. +- `microsoft.rate` / `microsoft.pitch` / `microsoft.volume`: percent strings (e.g. `+10%`, `-5%`). +- `microsoft.saveSubtitles`: write JSON subtitles alongside the audio file. +- `microsoft.proxy`: proxy URL for Microsoft speech requests. +- `microsoft.timeoutMs`: request timeout override (ms). +- `edge.*`: legacy alias for the same Microsoft settings. + +## Model-driven overrides (default on) + +By default, the model **can** emit TTS directives for a single reply. +When `messages.tts.auto` is `tagged`, these directives are required to trigger audio. + +When enabled, the model can emit `[[tts:...]]` directives to override the voice +for a single reply, plus an optional `[[tts:text]]...[[/tts:text]]` block to +provide expressive tags (laughter, singing cues, etc) that should only appear in +the audio. + +`provider=...` directives are ignored unless `modelOverrides.allowProvider: true`. + +Example reply payload: + +``` +Here you go. + +[[tts:voiceId=pMsXgVXv3BLzUgSXRplE model=eleven_v3 speed=1.1]] +[[tts:text]](laughs) Read the song once more.[[/tts:text]] +``` + +Available directive keys (when enabled): + +- `provider` (registered speech provider id, for example `openai`, `elevenlabs`, or `microsoft`; requires `allowProvider: true`) +- `voice` (OpenAI voice) or `voiceId` (ElevenLabs) +- `model` (OpenAI TTS model or ElevenLabs model id) +- `stability`, `similarityBoost`, `style`, `speed`, `useSpeakerBoost` +- `applyTextNormalization` (`auto|on|off`) +- `languageCode` (ISO 639-1) +- `seed` + +Disable all model overrides: + +```json5 +{ + messages: { + tts: { + modelOverrides: { + enabled: false, + }, + }, + }, +} +``` + +Optional allowlist (enable provider switching while keeping other knobs configurable): + +```json5 +{ + messages: { + tts: { + modelOverrides: { + enabled: true, + allowProvider: true, + allowSeed: false, + }, + }, + }, +} +``` + +## Per-user preferences + +Slash commands write local overrides to `prefsPath` (default: +`~/.openclaw/settings/tts.json`, override with `OPENCLAW_TTS_PREFS` or +`messages.tts.prefsPath`). + +Stored fields: + +- `enabled` +- `provider` +- `maxLength` (summary threshold; default 1500 chars) +- `summarize` (default `true`) + +These override `messages.tts.*` for that host. + +## Output formats (fixed) + +- **Telegram**: Opus voice note (`opus_48000_64` from ElevenLabs, `opus` from OpenAI). + - 48kHz / 64kbps is a good voice-note tradeoff and required for the round bubble. +- **Other channels**: MP3 (`mp3_44100_128` from ElevenLabs, `mp3` from OpenAI). + - 44.1kHz / 128kbps is the default balance for speech clarity. +- **Microsoft**: uses `microsoft.outputFormat` (default `audio-24khz-48kbitrate-mono-mp3`). + - The bundled transport accepts an `outputFormat`, but not all formats are available from the service. + - Output format values follow Microsoft Speech output formats (including Ogg/WebM Opus). + - Telegram `sendVoice` accepts OGG/MP3/M4A; use OpenAI/ElevenLabs if you need + guaranteed Opus voice notes. citeturn1search1 + - If the configured Microsoft output format fails, OpenClaw retries with MP3. + +OpenAI/ElevenLabs formats are fixed; Telegram expects Opus for voice-note UX. + +## Auto-TTS behavior + +When enabled, OpenClaw: + +- skips TTS if the reply already contains media or a `MEDIA:` directive. +- skips very short replies (< 10 chars). +- summarizes long replies when enabled using `agents.defaults.model.primary` (or `summaryModel`). +- attaches the generated audio to the reply. + +If the reply exceeds `maxLength` and summary is off (or no API key for the +summary model), audio +is skipped and the normal text reply is sent. + +## Flow diagram + +``` +Reply -> TTS enabled? + no -> send text + yes -> has media / MEDIA: / short? + yes -> send text + no -> length > limit? + no -> TTS -> attach audio + yes -> summary enabled? + no -> send text + yes -> summarize (summaryModel or agents.defaults.model.primary) + -> TTS -> attach audio +``` + +## Slash command usage + +There is a single command: `/tts`. +See [Slash commands](/tools/slash-commands) for enablement details. + +Discord note: `/tts` is a built-in Discord command, so OpenClaw registers +`/voice` as the native command there. Text `/tts ...` still works. + +``` +/tts off +/tts always +/tts inbound +/tts tagged +/tts status +/tts provider openai +/tts limit 2000 +/tts summary off +/tts audio Hello from OpenClaw +``` + +Notes: + +- Commands require an authorized sender (allowlist/owner rules still apply). +- `commands.text` or native command registration must be enabled. +- `off|always|inbound|tagged` are per‑session toggles (`/tts on` is an alias for `/tts always`). +- `limit` and `summary` are stored in local prefs, not the main config. +- `/tts audio` generates a one-off audio reply (does not toggle TTS on). + +## Agent tool + +The `tts` tool converts text to speech and returns a `MEDIA:` path. When the +result is Telegram-compatible, the tool includes `[[audio_as_voice]]` so +Telegram sends a voice bubble. + +## Gateway RPC + +Gateway methods: + +- `tts.status` +- `tts.enable` +- `tts.disable` +- `tts.convert` +- `tts.setProvider` +- `tts.providers` diff --git a/docs/tools/web.md b/docs/tools/web.md index 0e30c6c9c7c..313e709c32f 100644 --- a/docs/tools/web.md +++ b/docs/tools/web.md @@ -26,7 +26,7 @@ These are **not** browser automation. For JS-heavy sites or logins, use the - `web_fetch` is enabled by default (unless explicitly disabled). - The bundled Firecrawl plugin also adds `firecrawl_search` and `firecrawl_scrape` when enabled. -See [Brave Search setup](/brave-search) and [Perplexity Search setup](/perplexity) for provider-specific details. +See [Brave Search setup](/tools/brave-search) and [Perplexity Search setup](/tools/perplexity-search) for provider-specific details. ## Choosing a search provider