From fa267d6966a1065e58ec21966ed96180790c2523 Mon Sep 17 00:00:00 2001 From: Forrest Blount Date: Wed, 11 Mar 2026 21:17:39 +0000 Subject: [PATCH] voice-call: add one-time nonce to realtime WebSocket upgrade path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Issue a UUID stream token in buildTwiMLPayload and embed it in the URL (?token=). handleWebSocketUpgrade validates and consumes the token before accepting the upgrade — connections without a valid token receive 401 and the socket is destroyed. The token is only issued after verifyWebhook passes on the initial POST, so the authentication chain is: Twilio HMAC signature check → TwiML with nonce → WS upgrade validates nonce. Prevents unauthenticated connections from triggering OpenAI Realtime API sessions. Tokens expire after 30 s; expired entries are evicted on next issue. Co-Authored-By: Claude Sonnet 4.6 --- .../src/webhook/realtime-handler.test.ts | 56 ++++++++++++++++++- .../src/webhook/realtime-handler.ts | 48 ++++++++++++++-- 2 files changed, 98 insertions(+), 6 deletions(-) diff --git a/extensions/voice-call/src/webhook/realtime-handler.test.ts b/extensions/voice-call/src/webhook/realtime-handler.test.ts index f4ad179d441..755b21cc7cb 100644 --- a/extensions/voice-call/src/webhook/realtime-handler.test.ts +++ b/extensions/voice-call/src/webhook/realtime-handler.test.ts @@ -5,6 +5,12 @@ import type { CallRecord } from "../types.js"; import type { VoiceCallProvider } from "../providers/base.js"; import { RealtimeCallHandler } from "./realtime-handler.js"; +/** Extract the stream token from a TwiML body string. */ +function extractStreamToken(twiml: string): string | null { + const match = twiml.match(/\?token=([^"&\s]+)/); + return match?.[1] ?? null; +} + // Minimal realtime config used across tests const baseRealtimeConfig = { enabled: true, @@ -93,7 +99,7 @@ describe("RealtimeCallHandler", () => { expect(payload.headers?.["Content-Type"]).toBe("text/xml"); expect(payload.body).toContain(""); expect(payload.body).toContain(" { @@ -106,7 +112,53 @@ describe("RealtimeCallHandler", () => { const req = makeRequest("/voice/webhook", ""); const payload = handler.buildTwiMLPayload(req); - expect(payload.body).toContain("wss://localhost:8443/voice/stream/realtime"); + expect(payload.body).toContain("wss://localhost:8443/voice/stream/realtime?token="); + }); + + it("embeds a unique token on each call", () => { + const handler = new RealtimeCallHandler( + baseRealtimeConfig, + makeManager(), + makeProvider(), + null, + ); + const req = makeRequest("/voice/webhook", "host.example.com"); + const token1 = extractStreamToken(handler.buildTwiMLPayload(req).body); + const token2 = extractStreamToken(handler.buildTwiMLPayload(req).body); + expect(token1).toBeTruthy(); + expect(token2).toBeTruthy(); + expect(token1).not.toBe(token2); + }); + }); + + // --------------------------------------------------------------------------- + // Stream token (nonce) validation + // --------------------------------------------------------------------------- + + describe("stream token (nonce)", () => { + it("issueStreamToken + consumeStreamToken: valid token accepted once then rejected", () => { + const handler = new RealtimeCallHandler( + baseRealtimeConfig, + makeManager(), + makeProvider(), + null, + ); + const issue = (handler as unknown as { issueStreamToken: () => string }).issueStreamToken; + const consume = (handler as unknown as { consumeStreamToken: (t: string) => boolean }).consumeStreamToken; + const token = issue.call(handler); + expect(consume.call(handler, token)).toBe(true); + expect(consume.call(handler, token)).toBe(false); + }); + + it("rejects unknown tokens", () => { + const handler = new RealtimeCallHandler( + baseRealtimeConfig, + makeManager(), + makeProvider(), + null, + ); + const consume = (handler as unknown as { consumeStreamToken: (t: string) => boolean }).consumeStreamToken; + expect(consume.call(handler, "not-a-real-token")).toBe(false); }); }); diff --git a/extensions/voice-call/src/webhook/realtime-handler.ts b/extensions/voice-call/src/webhook/realtime-handler.ts index d459e100aa7..25635a30172 100644 --- a/extensions/voice-call/src/webhook/realtime-handler.ts +++ b/extensions/voice-call/src/webhook/realtime-handler.ts @@ -1,4 +1,5 @@ import http from "node:http"; +import { randomUUID } from "node:crypto"; import type { Duplex } from "node:stream"; import { type WebSocket, Server as WebSocketServer } from "ws"; import type { VoiceCallRealtimeConfig } from "../config.js"; @@ -23,8 +24,13 @@ export type ToolHandlerFn = (args: unknown, callId: string) => Promise; * - Register each call with CallManager (appears in voice status/history) * - Route tool calls to registered handlers (Phase 5 tool framework) */ +/** How long (ms) a stream token remains valid after TwiML is issued. */ +const STREAM_TOKEN_TTL_MS = 30_000; + export class RealtimeCallHandler { private toolHandlers = new Map(); + /** One-time tokens issued per TwiML response; consumed on WS upgrade. */ + private pendingStreamTokens = new Map(); constructor( private config: VoiceCallRealtimeConfig, @@ -37,9 +43,22 @@ export class RealtimeCallHandler { /** * Handle a WebSocket upgrade request from Twilio for a realtime media stream. - * Called from VoiceCallWebhookServer's upgrade handler when isRealtimeMode() is true. + * Called from VoiceCallWebhookServer's upgrade handler when isRealtimeWebSocketUpgrade() is true. + * + * Validates the one-time stream token embedded in the URL by buildTwiMLPayload before + * accepting the upgrade. This ensures the WS connection was preceded by a properly + * Twilio-signed POST webhook — the token is only issued after verifyWebhook passes. */ handleWebSocketUpgrade(request: http.IncomingMessage, socket: Duplex, head: Buffer): void { + const url = new URL(request.url ?? "/", "wss://localhost"); + const token = url.searchParams.get("token"); + if (!token || !this.consumeStreamToken(token)) { + console.warn("[voice-call] Rejecting WS upgrade: missing or invalid stream token"); + socket.write("HTTP/1.1 401 Unauthorized\r\n\r\n"); + socket.destroy(); + return; + } + const wss = new WebSocketServer({ noServer: true }); wss.handleUpgrade(request, socket, head, (ws) => { let bridge: OpenAIRealtimeVoiceBridge | null = null; @@ -81,12 +100,14 @@ export class RealtimeCallHandler { /** * Build the TwiML response payload for a realtime call. * The WebSocket URL is derived from the incoming request host so no hostname - * is hardcoded. + * is hardcoded. A one-time stream token is embedded in the URL and validated + * by handleWebSocketUpgrade to prevent unauthenticated WS connections. */ buildTwiMLPayload(req: http.IncomingMessage): WebhookResponsePayload { const host = req.headers.host || "localhost:8443"; - const wsUrl = `wss://${host}/voice/stream/realtime`; - console.log(`[voice-call] Returning realtime TwiML with WebSocket: ${wsUrl}`); + const token = this.issueStreamToken(); + const wsUrl = `wss://${host}/voice/stream/realtime?token=${token}`; + console.log(`[voice-call] Returning realtime TwiML with WebSocket: wss://${host}/voice/stream/realtime`); const twiml = ` @@ -116,6 +137,25 @@ export class RealtimeCallHandler { // Private // --------------------------------------------------------------------------- + /** Generate a single-use stream token valid for STREAM_TOKEN_TTL_MS. */ + private issueStreamToken(): string { + const token = randomUUID(); + this.pendingStreamTokens.set(token, Date.now() + STREAM_TOKEN_TTL_MS); + // Evict expired tokens to prevent unbounded growth if calls are abandoned + for (const [t, expiry] of this.pendingStreamTokens) { + if (Date.now() > expiry) this.pendingStreamTokens.delete(t); + } + return token; + } + + /** Consume a stream token. Returns true if valid and not yet used. */ + private consumeStreamToken(token: string): boolean { + const expiry = this.pendingStreamTokens.get(token); + if (expiry === undefined) return false; + this.pendingStreamTokens.delete(token); + return Date.now() <= expiry; + } + /** * Create and start the OpenAI Realtime bridge for a single call session. * Registers the call with CallManager so it appears in status/history.