voice-call: add one-time nonce to realtime WebSocket upgrade path
Issue a UUID stream token in buildTwiMLPayload and embed it in the <Stream> URL (?token=<uuid>). handleWebSocketUpgrade validates and consumes the token before accepting the upgrade — connections without a valid token receive 401 and the socket is destroyed. The token is only issued after verifyWebhook passes on the initial POST, so the authentication chain is: Twilio HMAC signature check → TwiML with nonce → WS upgrade validates nonce. Prevents unauthenticated connections from triggering OpenAI Realtime API sessions. Tokens expire after 30 s; expired entries are evicted on next issue. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
6327d64070
commit
fa267d6966
@ -5,6 +5,12 @@ import type { CallRecord } from "../types.js";
|
||||
import type { VoiceCallProvider } from "../providers/base.js";
|
||||
import { RealtimeCallHandler } from "./realtime-handler.js";
|
||||
|
||||
/** Extract the stream token from a TwiML body string. */
|
||||
function extractStreamToken(twiml: string): string | null {
|
||||
const match = twiml.match(/\?token=([^"&\s]+)/);
|
||||
return match?.[1] ?? null;
|
||||
}
|
||||
|
||||
// Minimal realtime config used across tests
|
||||
const baseRealtimeConfig = {
|
||||
enabled: true,
|
||||
@ -93,7 +99,7 @@ describe("RealtimeCallHandler", () => {
|
||||
expect(payload.headers?.["Content-Type"]).toBe("text/xml");
|
||||
expect(payload.body).toContain("<Connect>");
|
||||
expect(payload.body).toContain("<Stream");
|
||||
expect(payload.body).toContain('url="wss://gateway.ts.net/voice/stream/realtime"');
|
||||
expect(payload.body).toContain("wss://gateway.ts.net/voice/stream/realtime?token=");
|
||||
});
|
||||
|
||||
it("falls back to localhost when no host header is present", () => {
|
||||
@ -106,7 +112,53 @@ describe("RealtimeCallHandler", () => {
|
||||
const req = makeRequest("/voice/webhook", "");
|
||||
const payload = handler.buildTwiMLPayload(req);
|
||||
|
||||
expect(payload.body).toContain("wss://localhost:8443/voice/stream/realtime");
|
||||
expect(payload.body).toContain("wss://localhost:8443/voice/stream/realtime?token=");
|
||||
});
|
||||
|
||||
it("embeds a unique token on each call", () => {
|
||||
const handler = new RealtimeCallHandler(
|
||||
baseRealtimeConfig,
|
||||
makeManager(),
|
||||
makeProvider(),
|
||||
null,
|
||||
);
|
||||
const req = makeRequest("/voice/webhook", "host.example.com");
|
||||
const token1 = extractStreamToken(handler.buildTwiMLPayload(req).body);
|
||||
const token2 = extractStreamToken(handler.buildTwiMLPayload(req).body);
|
||||
expect(token1).toBeTruthy();
|
||||
expect(token2).toBeTruthy();
|
||||
expect(token1).not.toBe(token2);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Stream token (nonce) validation
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe("stream token (nonce)", () => {
|
||||
it("issueStreamToken + consumeStreamToken: valid token accepted once then rejected", () => {
|
||||
const handler = new RealtimeCallHandler(
|
||||
baseRealtimeConfig,
|
||||
makeManager(),
|
||||
makeProvider(),
|
||||
null,
|
||||
);
|
||||
const issue = (handler as unknown as { issueStreamToken: () => string }).issueStreamToken;
|
||||
const consume = (handler as unknown as { consumeStreamToken: (t: string) => boolean }).consumeStreamToken;
|
||||
const token = issue.call(handler);
|
||||
expect(consume.call(handler, token)).toBe(true);
|
||||
expect(consume.call(handler, token)).toBe(false);
|
||||
});
|
||||
|
||||
it("rejects unknown tokens", () => {
|
||||
const handler = new RealtimeCallHandler(
|
||||
baseRealtimeConfig,
|
||||
makeManager(),
|
||||
makeProvider(),
|
||||
null,
|
||||
);
|
||||
const consume = (handler as unknown as { consumeStreamToken: (t: string) => boolean }).consumeStreamToken;
|
||||
expect(consume.call(handler, "not-a-real-token")).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
import http from "node:http";
|
||||
import { randomUUID } from "node:crypto";
|
||||
import type { Duplex } from "node:stream";
|
||||
import { type WebSocket, Server as WebSocketServer } from "ws";
|
||||
import type { VoiceCallRealtimeConfig } from "../config.js";
|
||||
@ -23,8 +24,13 @@ export type ToolHandlerFn = (args: unknown, callId: string) => Promise<unknown>;
|
||||
* - Register each call with CallManager (appears in voice status/history)
|
||||
* - Route tool calls to registered handlers (Phase 5 tool framework)
|
||||
*/
|
||||
/** How long (ms) a stream token remains valid after TwiML is issued. */
|
||||
const STREAM_TOKEN_TTL_MS = 30_000;
|
||||
|
||||
export class RealtimeCallHandler {
|
||||
private toolHandlers = new Map<string, ToolHandlerFn>();
|
||||
/** One-time tokens issued per TwiML response; consumed on WS upgrade. */
|
||||
private pendingStreamTokens = new Map<string, number>();
|
||||
|
||||
constructor(
|
||||
private config: VoiceCallRealtimeConfig,
|
||||
@ -37,9 +43,22 @@ export class RealtimeCallHandler {
|
||||
|
||||
/**
|
||||
* Handle a WebSocket upgrade request from Twilio for a realtime media stream.
|
||||
* Called from VoiceCallWebhookServer's upgrade handler when isRealtimeMode() is true.
|
||||
* Called from VoiceCallWebhookServer's upgrade handler when isRealtimeWebSocketUpgrade() is true.
|
||||
*
|
||||
* Validates the one-time stream token embedded in the URL by buildTwiMLPayload before
|
||||
* accepting the upgrade. This ensures the WS connection was preceded by a properly
|
||||
* Twilio-signed POST webhook — the token is only issued after verifyWebhook passes.
|
||||
*/
|
||||
handleWebSocketUpgrade(request: http.IncomingMessage, socket: Duplex, head: Buffer): void {
|
||||
const url = new URL(request.url ?? "/", "wss://localhost");
|
||||
const token = url.searchParams.get("token");
|
||||
if (!token || !this.consumeStreamToken(token)) {
|
||||
console.warn("[voice-call] Rejecting WS upgrade: missing or invalid stream token");
|
||||
socket.write("HTTP/1.1 401 Unauthorized\r\n\r\n");
|
||||
socket.destroy();
|
||||
return;
|
||||
}
|
||||
|
||||
const wss = new WebSocketServer({ noServer: true });
|
||||
wss.handleUpgrade(request, socket, head, (ws) => {
|
||||
let bridge: OpenAIRealtimeVoiceBridge | null = null;
|
||||
@ -81,12 +100,14 @@ export class RealtimeCallHandler {
|
||||
/**
|
||||
* Build the TwiML <Connect><Stream> response payload for a realtime call.
|
||||
* The WebSocket URL is derived from the incoming request host so no hostname
|
||||
* is hardcoded.
|
||||
* is hardcoded. A one-time stream token is embedded in the URL and validated
|
||||
* by handleWebSocketUpgrade to prevent unauthenticated WS connections.
|
||||
*/
|
||||
buildTwiMLPayload(req: http.IncomingMessage): WebhookResponsePayload {
|
||||
const host = req.headers.host || "localhost:8443";
|
||||
const wsUrl = `wss://${host}/voice/stream/realtime`;
|
||||
console.log(`[voice-call] Returning realtime TwiML with WebSocket: ${wsUrl}`);
|
||||
const token = this.issueStreamToken();
|
||||
const wsUrl = `wss://${host}/voice/stream/realtime?token=${token}`;
|
||||
console.log(`[voice-call] Returning realtime TwiML with WebSocket: wss://${host}/voice/stream/realtime`);
|
||||
const twiml = `<?xml version="1.0" encoding="UTF-8"?>
|
||||
<Response>
|
||||
<Connect>
|
||||
@ -116,6 +137,25 @@ export class RealtimeCallHandler {
|
||||
// Private
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Generate a single-use stream token valid for STREAM_TOKEN_TTL_MS. */
|
||||
private issueStreamToken(): string {
|
||||
const token = randomUUID();
|
||||
this.pendingStreamTokens.set(token, Date.now() + STREAM_TOKEN_TTL_MS);
|
||||
// Evict expired tokens to prevent unbounded growth if calls are abandoned
|
||||
for (const [t, expiry] of this.pendingStreamTokens) {
|
||||
if (Date.now() > expiry) this.pendingStreamTokens.delete(t);
|
||||
}
|
||||
return token;
|
||||
}
|
||||
|
||||
/** Consume a stream token. Returns true if valid and not yet used. */
|
||||
private consumeStreamToken(token: string): boolean {
|
||||
const expiry = this.pendingStreamTokens.get(token);
|
||||
if (expiry === undefined) return false;
|
||||
this.pendingStreamTokens.delete(token);
|
||||
return Date.now() <= expiry;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create and start the OpenAI Realtime bridge for a single call session.
|
||||
* Registers the call with CallManager so it appears in status/history.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user