From 18fc208ee53de464e194ac5d06e0b003da3e2c3b Mon Sep 17 00:00:00 2001 From: Forrest Blount Date: Wed, 11 Mar 2026 21:02:13 +0000 Subject: [PATCH] voice-call: webhook correctness fixes for realtime path - Export WebhookResponsePayload from webhook.ts; import in realtime-handler.ts to remove the duplicate local definition - Rename isRealtimeMode -> isRealtimeWebSocketUpgrade to clarify it is only used for WS upgrade routing, not HTTP POST routing - Move realtime TwiML intercept to after verifyWebhook so inbound calls are authenticated against the provider signature before the handler responds Co-Authored-By: Claude Sonnet 4.6 --- extensions/voice-call/src/webhook.ts | 25 ++++++++----------- .../src/webhook/realtime-handler.ts | 7 +----- 2 files changed, 12 insertions(+), 20 deletions(-) diff --git a/extensions/voice-call/src/webhook.ts b/extensions/voice-call/src/webhook.ts index 44ab5df373c..a8c3bf0c4e1 100644 --- a/extensions/voice-call/src/webhook.ts +++ b/extensions/voice-call/src/webhook.ts @@ -19,7 +19,7 @@ import type { RealtimeCallHandler } from "./webhook/realtime-handler.js"; const MAX_WEBHOOK_BODY_BYTES = 1024 * 1024; -type WebhookResponsePayload = { +export type WebhookResponsePayload = { statusCode: number; body: string; headers?: Record; @@ -244,7 +244,7 @@ export class VoiceCallWebhookServer { if (this.realtimeHandler || this.mediaStreamHandler) { this.server.on("upgrade", (request, socket, head) => { // Realtime voice takes precedence when the path matches - if (this.realtimeHandler && this.isRealtimeMode(request)) { + if (this.realtimeHandler && this.isRealtimeWebSocketUpgrade(request)) { console.log("[voice-call] WebSocket upgrade for realtime voice"); this.realtimeHandler.handleWebSocketUpgrade(request, socket, head); return; @@ -359,7 +359,7 @@ export class VoiceCallWebhookServer { * Returns true for WebSocket upgrade paths that belong to the realtime handler. * Used only for upgrade routing — not for the inbound HTTP webhook POST. */ - private isRealtimeMode(req: http.IncomingMessage): boolean { + private isRealtimeWebSocketUpgrade(req: http.IncomingMessage): boolean { return (req.url ?? "/").includes("/realtime"); } @@ -367,17 +367,6 @@ export class VoiceCallWebhookServer { req: http.IncomingMessage, webhookPath: string, ): Promise { - // Realtime mode: whenever the realtime handler is active, ALL inbound calls - // use it. The handler returns TwiML so Twilio opens a - // WebSocket to the /voice/stream/realtime path, which is routed back here - // via the upgrade handler's isRealtimeMode() check. - if (this.realtimeHandler && req.method === "POST") { - const url = buildRequestUrl(req.url, req.headers.host); - if (this.isWebhookPathMatch(url.pathname, webhookPath)) { - return this.realtimeHandler.buildTwiMLPayload(req); - } - } - const url = buildRequestUrl(req.url, req.headers.host); if (url.pathname === "/voice/hold-music") { @@ -432,6 +421,14 @@ export class VoiceCallWebhookServer { return { statusCode: 401, body: "Unauthorized" }; } + // Realtime mode: return TwiML after verification so + // the request is still authenticated against the provider's signature. + // The WebSocket that Twilio opens in response is routed via the upgrade + // handler's isRealtimeWebSocketUpgrade() check. + if (this.realtimeHandler) { + return this.realtimeHandler.buildTwiMLPayload(req); + } + const parsed = this.provider.parseWebhookEvent(ctx, { verifiedRequestKey: verification.verifiedRequestKey, }); diff --git a/extensions/voice-call/src/webhook/realtime-handler.ts b/extensions/voice-call/src/webhook/realtime-handler.ts index 0d409abdd17..4fe0e071e84 100644 --- a/extensions/voice-call/src/webhook/realtime-handler.ts +++ b/extensions/voice-call/src/webhook/realtime-handler.ts @@ -10,15 +10,10 @@ import { } from "../providers/openai-realtime-voice.js"; import type { VoiceCallProvider } from "../providers/base.js"; import type { NormalizedEvent } from "../types.js"; +import type { WebhookResponsePayload } from "../webhook.js"; export type ToolHandlerFn = (args: unknown, callId: string) => Promise; -type WebhookResponsePayload = { - statusCode: number; - body: string; - headers?: Record; -}; - /** * Handles inbound voice calls bridged directly to the OpenAI Realtime API. *