From 725741486fe1338cf372c1e3b401d868ed3d52da Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 15 Feb 2026 03:41:01 +0100 Subject: [PATCH] fix(discord): harden voice message media loading --- CHANGELOG.md | 1 + src/agents/tools/discord-actions-messaging.ts | 9 +-- src/discord/send.outbound.ts | 75 ++++++++++++++----- .../send.voice-message.security.test.ts | 24 ++++++ src/discord/voice-message.ts | 14 +++- 5 files changed, 96 insertions(+), 27 deletions(-) create mode 100644 src/discord/send.voice-message.security.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index e96dbb1067c..5b1521b323b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,6 +33,7 @@ Docs: https://docs.openclaw.ai - Cron: prevent `cron list`/`cron status` from silently skipping past-due recurring jobs by using maintenance recompute semantics. (#16156) Thanks @zerone0x. - Cron: repair missing/corrupt `nextRunAtMs` for the updated job without globally recomputing unrelated due jobs during `cron update`. (#15750) - Discord: prefer gateway guild id when logging inbound messages so cached-miss guilds do not appear as `guild=dm`. Thanks @thewilloftheshadow. +- Discord/Security: harden voice message media loading (SSRF + allowed-local-root checks) so tool-supplied paths/URLs cannot be used to probe internal URLs or read arbitrary local files. - TUI: use available terminal width for session name display in searchable select lists. (#16238) Thanks @robbyczgw-cla. - TUI: refactor searchable select list description layout and add regression coverage for ANSI-highlight width bounds. - TUI/Gateway: resolve local gateway target URL from `gateway.bind` mode (tailnet/lan) instead of hardcoded localhost so `openclaw tui` connects when gateway is non-loopback. (#16299) Thanks @cortexuvula. diff --git a/src/agents/tools/discord-actions-messaging.ts b/src/agents/tools/discord-actions-messaging.ts index c650c27faf8..54f5e970bf1 100644 --- a/src/agents/tools/discord-actions-messaging.ts +++ b/src/agents/tools/discord-actions-messaging.ts @@ -23,6 +23,7 @@ import { } from "../../discord/send.js"; import { resolveDiscordChannelId } from "../../discord/targets.js"; import { withNormalizedTimestamp } from "../date-time.js"; +import { assertMediaNotDataUrl } from "../sandbox-paths.js"; import { type ActionGate, jsonResult, @@ -247,7 +248,7 @@ export async function handleDiscordMessagingAction( if (asVoice) { if (!mediaUrl) { throw new Error( - "Voice messages require a local media file path (mediaUrl, path, or filePath).", + "Voice messages require a media file reference (mediaUrl, path, or filePath).", ); } if (content && content.trim()) { @@ -255,11 +256,7 @@ export async function handleDiscordMessagingAction( "Voice messages cannot include text content (Discord limitation). Remove the content parameter.", ); } - if (mediaUrl.startsWith("http://") || mediaUrl.startsWith("https://")) { - throw new Error( - "Voice messages require a local file path, not a URL. Download the file first.", - ); - } + assertMediaNotDataUrl(mediaUrl); const result = await sendVoiceMessageDiscord(to, mediaUrl, { ...(accountId ? { accountId } : {}), replyTo, diff --git a/src/discord/send.outbound.ts b/src/discord/send.outbound.ts index 20d7567d472..cbbadcc2f20 100644 --- a/src/discord/send.outbound.ts +++ b/src/discord/send.outbound.ts @@ -1,7 +1,9 @@ import type { RequestClient } from "@buape/carbon"; import type { APIChannel } from "discord-api-types/v10"; import { ChannelType, Routes } from "discord-api-types/v10"; +import crypto from "node:crypto"; import fs from "node:fs/promises"; +import path from "node:path"; import type { RetryConfig } from "../infra/retry.js"; import type { PollInput } from "../polls.js"; import type { DiscordSendResult } from "./send.types.js"; @@ -9,7 +11,11 @@ import { resolveChunkMode } from "../auto-reply/chunk.js"; import { loadConfig } from "../config/config.js"; import { resolveMarkdownTableMode } from "../config/markdown-tables.js"; import { recordChannelActivity } from "../infra/channel-activity.js"; +import { resolvePreferredOpenClawTmpDir } from "../infra/tmp-openclaw-dir.js"; import { convertMarkdownTables } from "../markdown/tables.js"; +import { maxBytesForKind } from "../media/constants.js"; +import { extensionForMime } from "../media/mime.js"; +import { loadWebMediaRaw } from "../web/media.js"; import { resolveDiscordAccount } from "./accounts.js"; import { buildDiscordSendError, @@ -306,6 +312,19 @@ type VoiceMessageOpts = { silent?: boolean; }; +async function materializeVoiceMessageInput(mediaUrl: string): Promise<{ filePath: string }> { + // Security: reuse the standard media loader so we apply SSRF guards + allowed-local-root checks. + // Then write to a private temp file so ffmpeg/ffprobe never sees the original URL/path string. + const media = await loadWebMediaRaw(mediaUrl, maxBytesForKind("audio")); + const extFromName = media.fileName ? path.extname(media.fileName) : ""; + const extFromMime = media.contentType ? extensionForMime(media.contentType) : ""; + const ext = extFromName || extFromMime || ".bin"; + const tempDir = resolvePreferredOpenClawTmpDir(); + const filePath = path.join(tempDir, `voice-src-${crypto.randomUUID()}${ext}`); + await fs.writeFile(filePath, media.buffer, { mode: 0o600 }); + return { filePath }; +} + /** * Send a voice message to Discord. * @@ -321,19 +340,31 @@ export async function sendVoiceMessageDiscord( audioPath: string, opts: VoiceMessageOpts = {}, ): Promise { - const cfg = loadConfig(); - const accountInfo = resolveDiscordAccount({ - cfg, - accountId: opts.accountId, - }); - const { token, rest, request } = createDiscordClient(opts, cfg); - const recipient = await parseAndResolveRecipient(to, opts.accountId); - const { channelId } = await resolveChannelId(rest, recipient, request); - - // Convert to OGG/Opus if needed - const { path: oggPath, cleanup } = await ensureOggOpus(audioPath); + const { filePath: localInputPath } = await materializeVoiceMessageInput(audioPath); + let oggPath: string | null = null; + let oggCleanup = false; + let token: string | undefined; + let rest: RequestClient | undefined; + let channelId: string | undefined; try { + const cfg = loadConfig(); + const accountInfo = resolveDiscordAccount({ + cfg, + accountId: opts.accountId, + }); + const client = createDiscordClient(opts, cfg); + token = client.token; + rest = client.rest; + const request = client.request; + const recipient = await parseAndResolveRecipient(to, opts.accountId); + channelId = (await resolveChannelId(rest, recipient, request)).channelId; + + // Convert to OGG/Opus if needed + const ogg = await ensureOggOpus(localInputPath); + oggPath = ogg.path; + oggCleanup = ogg.cleanup; + // Get voice message metadata (duration and waveform) const metadata = await getVoiceMessageMetadata(oggPath); @@ -362,20 +393,28 @@ export async function sendVoiceMessageDiscord( channelId: String(result.channel_id ?? channelId), }; } catch (err) { - throw await buildDiscordSendError(err, { - channelId, - rest, - token, - hasMedia: true, - }); + if (channelId && rest && token) { + throw await buildDiscordSendError(err, { + channelId, + rest, + token, + hasMedia: true, + }); + } + throw err; } finally { // Clean up temporary OGG file if we created one - if (cleanup) { + if (oggCleanup && oggPath) { try { await fs.unlink(oggPath); } catch { // Ignore cleanup errors } } + try { + await fs.unlink(localInputPath); + } catch { + // Ignore cleanup errors + } } } diff --git a/src/discord/send.voice-message.security.test.ts b/src/discord/send.voice-message.security.test.ts new file mode 100644 index 00000000000..9651f57c5ae --- /dev/null +++ b/src/discord/send.voice-message.security.test.ts @@ -0,0 +1,24 @@ +import path from "node:path"; +import { describe, expect, it } from "vitest"; +import { sendVoiceMessageDiscord } from "./send.js"; + +describe("sendVoiceMessageDiscord - media hardening", () => { + it("rejects local paths outside allowed media roots (prevents local file exfiltration)", async () => { + const candidate = path.join(process.cwd(), "package.json"); + await expect(sendVoiceMessageDiscord("channel:123", candidate)).rejects.toThrow( + /Local media path is not under an allowed directory/, + ); + }); + + it("blocks SSRF targets when given a private-network URL", async () => { + await expect( + sendVoiceMessageDiscord("channel:123", "http://127.0.0.1/voice.ogg"), + ).rejects.toThrow(/Failed to fetch media|Blocked/); + }); + + it("does not allow non-http URL schemes to reach ffmpeg/ffprobe", async () => { + await expect( + sendVoiceMessageDiscord("channel:123", "rtsp://example.com/voice.ogg"), + ).rejects.toThrow(/Local media path is not under an allowed directory|ENOENT|no such file/i); + }); +}); diff --git a/src/discord/voice-message.ts b/src/discord/voice-message.ts index a7e7c0014c5..b5e289d6775 100644 --- a/src/discord/voice-message.ts +++ b/src/discord/voice-message.ts @@ -14,10 +14,10 @@ import type { RequestClient } from "@buape/carbon"; import { execFile } from "node:child_process"; import crypto from "node:crypto"; import fs from "node:fs/promises"; -import os from "node:os"; import path from "node:path"; import { promisify } from "node:util"; import type { RetryRunner } from "../infra/retry-policy.js"; +import { resolvePreferredOpenClawTmpDir } from "../infra/tmp-openclaw-dir.js"; const execFileAsync = promisify(execFile); @@ -73,7 +73,7 @@ export async function generateWaveform(filePath: string): Promise { * Generate waveform by extracting raw PCM data and sampling amplitudes */ async function generateWaveformFromPcm(filePath: string): Promise { - const tempDir = os.tmpdir(); + const tempDir = resolvePreferredOpenClawTmpDir(); const tempPcm = path.join(tempDir, `waveform-${crypto.randomUUID()}.raw`); try { @@ -148,6 +148,14 @@ function generatePlaceholderWaveform(): string { * Returns path to the OGG file (may be same as input if already OGG/Opus) */ export async function ensureOggOpus(filePath: string): Promise<{ path: string; cleanup: boolean }> { + const trimmed = filePath.trim(); + // Defense-in-depth: callers should never hand ffmpeg/ffprobe a URL/protocol path. + if (/^[a-z][a-z0-9+.-]*:\/\//i.test(trimmed)) { + throw new Error( + `Voice message conversion requires a local file path; received a URL/protocol source: ${trimmed}`, + ); + } + const ext = path.extname(filePath).toLowerCase(); // Check if already OGG @@ -174,7 +182,7 @@ export async function ensureOggOpus(filePath: string): Promise<{ path: string; c } // Convert to OGG/Opus - const tempDir = os.tmpdir(); + const tempDir = resolvePreferredOpenClawTmpDir(); const outputPath = path.join(tempDir, `voice-${crypto.randomUUID()}.ogg`); await execFileAsync("ffmpeg", [