From 520d2129becdb79d463341fade34d339f43ea973 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Fri, 13 Mar 2026 00:44:11 +0000 Subject: [PATCH] fix(security): persist voice-call replay protection --- extensions/voice-call/src/manager.test.ts | 20 +++++ extensions/voice-call/src/manager.ts | 27 ++++++ extensions/voice-call/src/replay-ledger.ts | 93 +++++++++++++++++++++ extensions/voice-call/src/webhook.ts | 47 +++++++++-- src/agents/bash-tools.exec.path.e2e.test.ts | 17 ++++ src/agents/bash-tools.exec.ts | 7 +- 6 files changed, 199 insertions(+), 12 deletions(-) create mode 100644 extensions/voice-call/src/replay-ledger.ts diff --git a/extensions/voice-call/src/manager.test.ts b/extensions/voice-call/src/manager.test.ts index 3ffe9b040a4..8649e6b57d0 100644 --- a/extensions/voice-call/src/manager.test.ts +++ b/extensions/voice-call/src/manager.test.ts @@ -261,4 +261,24 @@ describe("CallManager", () => { expect(manager.getCallByProviderCallId("provider-exact")).toBeDefined(); }); + + it("persists the webhook replay ledger across restarts", () => { + const config = VoiceCallConfigSchema.parse({ + enabled: true, + provider: "plivo", + fromNumber: "+15550000000", + }); + + const now = Date.now(); + const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`); + const manager = new CallManager(config, storePath); + manager.initialize(new FakeProvider(), "https://example.com/voice/webhook"); + manager.rememberWebhookReplay("plivo:replay-key", 60_000, now); + + const restarted = new CallManager(config, storePath); + restarted.initialize(new FakeProvider(), "https://example.com/voice/webhook"); + + expect(restarted.isRecentWebhookReplay("plivo:replay-key", now + 1)).toBe(true); + expect(restarted.isRecentWebhookReplay("plivo:replay-key", now + 60_001)).toBe(false); + }); }); diff --git a/extensions/voice-call/src/manager.ts b/extensions/voice-call/src/manager.ts index 3b3a5b7c061..f3e9e08f968 100644 --- a/extensions/voice-call/src/manager.ts +++ b/extensions/voice-call/src/manager.ts @@ -15,6 +15,7 @@ import { speakInitialMessage as speakInitialMessageWithContext, } from "./manager/outbound.js"; import { getCallHistoryFromStore, loadActiveCallsFromStore } from "./manager/store.js"; +import { loadReplayLedger, persistReplayLedgerEntry, pruneReplayLedger } from "./replay-ledger.js"; import { resolveUserPath } from "./utils.js"; function resolveDefaultStoreBase(config: VoiceCallConfig, storePath?: string): string { @@ -43,6 +44,7 @@ export class CallManager { private providerCallIdMap = new Map(); private processedEventIds = new Set(); private rejectedProviderCallIds = new Set(); + private webhookReplayLedger = new Map(); private provider: VoiceCallProvider | null = null; private config: VoiceCallConfig; private storePath: string; @@ -76,6 +78,7 @@ export class CallManager { this.providerCallIdMap = persisted.providerCallIdMap; this.processedEventIds = persisted.processedEventIds; this.rejectedProviderCallIds = persisted.rejectedProviderCallIds; + this.webhookReplayLedger = loadReplayLedger(this.storePath); } /** @@ -198,6 +201,30 @@ export class CallManager { return Array.from(this.activeCalls.values()); } + isRecentWebhookReplay(key: string, now = Date.now()): boolean { + this.pruneWebhookReplayLedger(now); + const expiresAt = this.webhookReplayLedger.get(key); + return typeof expiresAt === "number" && expiresAt > now; + } + + rememberWebhookReplay(key: string, ttlMs: number, now = Date.now()): void { + if (!Number.isFinite(ttlMs) || ttlMs <= 0) { + return; + } + this.pruneWebhookReplayLedger(now); + const expiresAt = now + ttlMs; + const previous = this.webhookReplayLedger.get(key); + if (typeof previous === "number" && previous >= expiresAt) { + return; + } + this.webhookReplayLedger.set(key, expiresAt); + persistReplayLedgerEntry(this.storePath, { key, expiresAt }); + } + + private pruneWebhookReplayLedger(now = Date.now()): void { + pruneReplayLedger(this.webhookReplayLedger, now); + } + /** * Get call history (from persisted logs). */ diff --git a/extensions/voice-call/src/replay-ledger.ts b/extensions/voice-call/src/replay-ledger.ts new file mode 100644 index 00000000000..65ad0b57d8a --- /dev/null +++ b/extensions/voice-call/src/replay-ledger.ts @@ -0,0 +1,93 @@ +import crypto from "node:crypto"; +import fs from "node:fs"; +import path from "node:path"; +import type { ProviderName, WebhookContext } from "./types.js"; + +const REPLAY_LEDGER_FILE = "webhook-replay.jsonl"; + +type ReplayLedgerRecord = { + key: string; + expiresAt: number; +}; + +export function buildWebhookReplayKey(params: { + provider: ProviderName; + ctx: WebhookContext; +}): string { + const url = new URL(params.ctx.url); + const sortedQuery = Array.from(url.searchParams.entries()) + .sort(([aKey, aValue], [bKey, bValue]) => { + if (aKey === bKey) { + return aValue.localeCompare(bValue); + } + return aKey.localeCompare(bKey); + }) + .map(([key, value]) => `${key}=${value}`) + .join("&"); + const digest = crypto + .createHash("sha256") + .update(params.provider) + .update("\n") + .update(params.ctx.method) + .update("\n") + .update(url.pathname) + .update("\n") + .update(sortedQuery) + .update("\n") + .update(params.ctx.rawBody) + .digest("hex"); + return `${params.provider}:${digest}`; +} + +export function loadReplayLedger(storePath: string, now = Date.now()): Map { + const logPath = path.join(storePath, REPLAY_LEDGER_FILE); + if (!fs.existsSync(logPath)) { + return new Map(); + } + + const ledger = new Map(); + const lines = fs.readFileSync(logPath, "utf-8").split("\n"); + for (const line of lines) { + if (!line.trim()) { + continue; + } + try { + const record = JSON.parse(line) as ReplayLedgerRecord; + if ( + typeof record.key !== "string" || + !record.key || + typeof record.expiresAt !== "number" || + !Number.isFinite(record.expiresAt) || + record.expiresAt <= now + ) { + continue; + } + ledger.set(record.key, record.expiresAt); + } catch { + // Ignore malformed lines. + } + } + + return ledger; +} + +export function pruneReplayLedger(ledger: Map, now = Date.now()): void { + for (const [key, expiresAt] of ledger) { + if (expiresAt <= now) { + ledger.delete(key); + } + } +} + +export function persistReplayLedgerEntry( + storePath: string, + params: { key: string; expiresAt: number }, +): void { + const logPath = path.join(storePath, REPLAY_LEDGER_FILE); + const line = `${JSON.stringify(params)}\n`; + try { + fs.appendFileSync(logPath, line); + } catch (err) { + console.error("[voice-call] Failed to persist replay ledger entry:", err); + } +} diff --git a/extensions/voice-call/src/webhook.ts b/extensions/voice-call/src/webhook.ts index 79ecc843cd4..2a13bb6d38e 100644 --- a/extensions/voice-call/src/webhook.ts +++ b/extensions/voice-call/src/webhook.ts @@ -15,8 +15,10 @@ import type { TwilioProvider } from "./providers/twilio.js"; import type { NormalizedEvent, WebhookContext } from "./types.js"; import { MediaStreamHandler } from "./media-stream.js"; import { OpenAIRealtimeSTTProvider } from "./providers/stt-openai-realtime.js"; +import { buildWebhookReplayKey } from "./replay-ledger.js"; const MAX_WEBHOOK_BODY_BYTES = 1024 * 1024; +const WEBHOOK_REPLAY_TTL_MS = 15 * 60_000; /** * HTTP server for receiving voice call webhooks from providers. @@ -281,9 +283,25 @@ export class VoiceCallWebhookServer { return; } + const replayKey = buildWebhookReplayKey({ + provider: this.provider.name, + ctx, + }); + if (this.manager.isRecentWebhookReplay(replayKey)) { + console.warn(`[voice-call] Dropping replayed ${this.provider.name} webhook request`); + const replayResult = this.provider.parseWebhookEvent(ctx); + this.sendProviderResponse(res, replayResult); + return; + } + // Parse events const result = this.provider.parseWebhookEvent(ctx); + if ((result.statusCode ?? 200) >= 400) { + this.sendProviderResponse(res, result); + return; + } + // Process each event for (const event of result.events) { try { @@ -293,16 +311,10 @@ export class VoiceCallWebhookServer { } } + this.manager.rememberWebhookReplay(replayKey, WEBHOOK_REPLAY_TTL_MS); + // Send response - res.statusCode = result.statusCode || 200; - - if (result.providerResponseHeaders) { - for (const [key, value] of Object.entries(result.providerResponseHeaders)) { - res.setHeader(key, value); - } - } - - res.end(result.providerResponseBody || "OK"); + this.sendProviderResponse(res, result); } /** @@ -316,6 +328,23 @@ export class VoiceCallWebhookServer { return readRequestBodyWithLimit(req, { maxBytes, timeoutMs }); } + private sendProviderResponse( + res: http.ServerResponse, + result: { + providerResponseBody?: string; + providerResponseHeaders?: Record; + statusCode?: number; + }, + ): void { + res.statusCode = result.statusCode || 200; + if (result.providerResponseHeaders) { + for (const [key, value] of Object.entries(result.providerResponseHeaders)) { + res.setHeader(key, value); + } + } + res.end(result.providerResponseBody || "OK"); + } + /** * Handle auto-response for inbound calls using the agent system. * Supports tool calling for richer voice interactions. diff --git a/src/agents/bash-tools.exec.path.e2e.test.ts b/src/agents/bash-tools.exec.path.e2e.test.ts index 2002970735a..92916712ed2 100644 --- a/src/agents/bash-tools.exec.path.e2e.test.ts +++ b/src/agents/bash-tools.exec.path.e2e.test.ts @@ -108,6 +108,23 @@ describe("exec PATH login shell merge", () => { expect(shellPathMock).not.toHaveBeenCalled(); }); + + it("throws security violation on sandbox fallback when env.PATH is provided", async () => { + if (isWin) { + return; + } + process.env.PATH = "/usr/bin"; + + const { createExecTool } = await import("./bash-tools.exec.js"); + const tool = createExecTool({ host: "sandbox", security: "full", ask: "off" }); + + await expect( + tool.execute("call1", { + command: "echo ok", + env: { PATH: "/explicit/bin" }, + }), + ).rejects.toThrow(/Security Violation: Custom 'PATH' variable is forbidden/); + }); }); describe("exec host env validation", () => { diff --git a/src/agents/bash-tools.exec.ts b/src/agents/bash-tools.exec.ts index b9a7e83b28a..3239b94067e 100644 --- a/src/agents/bash-tools.exec.ts +++ b/src/agents/bash-tools.exec.ts @@ -293,10 +293,11 @@ export function createExecTool( } const baseEnv = coerceEnv(process.env); + const isHostExecution = !sandbox || host === "gateway" || host === "node"; - // Logic: Sandbox gets raw env. Host (gateway/node) must pass validation. - // We validate BEFORE merging to prevent any dangerous vars from entering the stream. - if (host !== "sandbox" && params.env) { + // When sandboxing is disabled, host=sandbox still executes on the gateway host. + // Validate dangerous env overrides on every real host execution path. + if (isHostExecution && params.env) { validateHostEnv(params.env); }