diff --git a/extensions/voice-call/src/config.test.ts b/extensions/voice-call/src/config.test.ts index 1b12e9e84c5..899a12af5a7 100644 --- a/extensions/voice-call/src/config.test.ts +++ b/extensions/voice-call/src/config.test.ts @@ -3,6 +3,7 @@ import { validateProviderConfig, normalizeVoiceCallConfig, resolveVoiceCallConfig, + VoiceCallRealtimeConfigSchema, type VoiceCallConfig, } from "./config.js"; import { createVoiceCallBaseConfig } from "./test-fixtures.js"; @@ -216,3 +217,115 @@ describe("normalizeVoiceCallConfig", () => { expect(normalized.tts?.elevenlabs?.voiceSettings).toEqual({ speed: 1.1 }); }); }); + +describe("VoiceCallRealtimeConfigSchema", () => { + it("defaults to disabled with empty tools array", () => { + const config = VoiceCallRealtimeConfigSchema.parse({}); + expect(config.enabled).toBe(false); + expect(config.tools).toEqual([]); + }); + + it("accepts all valid Realtime API voice names", () => { + const voices = ["alloy", "ash", "ballad", "cedar", "coral", "echo", "marin", "sage", "shimmer", "verse"]; + for (const voice of voices) { + expect(() => VoiceCallRealtimeConfigSchema.parse({ voice })).not.toThrow(); + } + }); + + it("rejects voice names that are not in the Realtime API (e.g. nova, fable, onyx)", () => { + for (const voice of ["nova", "fable", "onyx"]) { + expect(() => VoiceCallRealtimeConfigSchema.parse({ voice })).toThrow(); + } + }); + + it("normalizeVoiceCallConfig propagates realtime sub-config", () => { + const normalized = normalizeVoiceCallConfig({ + enabled: true, + provider: "mock", + realtime: { enabled: true, voice: "marin", instructions: "Be helpful." }, + }); + expect(normalized.realtime.enabled).toBe(true); + expect(normalized.realtime.voice).toBe("marin"); + expect(normalized.realtime.instructions).toBe("Be helpful."); + expect(normalized.realtime.tools).toEqual([]); + }); +}); + +describe("resolveVoiceCallConfig — realtime env vars", () => { + const originalEnv = { ...process.env }; + + afterEach(() => { + process.env = { ...originalEnv }; + }); + + it("auto-enables realtime from REALTIME_VOICE_ENABLED=true", () => { + process.env.REALTIME_VOICE_ENABLED = "true"; + const resolved = resolveVoiceCallConfig(createVoiceCallBaseConfig()); + expect(resolved.realtime.enabled).toBe(true); + }); + + it("does not auto-enable when REALTIME_VOICE_ENABLED is absent or not 'true'", () => { + delete process.env.REALTIME_VOICE_ENABLED; + expect(resolveVoiceCallConfig(createVoiceCallBaseConfig()).realtime.enabled).toBe(false); + + process.env.REALTIME_VOICE_ENABLED = "false"; + expect(resolveVoiceCallConfig(createVoiceCallBaseConfig()).realtime.enabled).toBe(false); + }); + + it("resolves model, voice, instructions, temperature from env vars", () => { + process.env.REALTIME_VOICE_MODEL = "gpt-4o-realtime-preview"; + process.env.REALTIME_VOICE_VOICE = "ash"; + process.env.REALTIME_VOICE_INSTRUCTIONS = "You are helpful."; + process.env.REALTIME_VOICE_TEMPERATURE = "0.8"; + const resolved = resolveVoiceCallConfig(createVoiceCallBaseConfig()); + expect(resolved.realtime.model).toBe("gpt-4o-realtime-preview"); + expect(resolved.realtime.voice).toBe("ash"); + expect(resolved.realtime.instructions).toBe("You are helpful."); + expect(resolved.realtime.temperature).toBeCloseTo(0.8); + }); + + it("resolves vadThreshold and silenceDurationMs from env vars", () => { + process.env.VAD_THRESHOLD = "0.7"; + process.env.SILENCE_DURATION_MS = "1200"; + const resolved = resolveVoiceCallConfig(createVoiceCallBaseConfig()); + expect(resolved.realtime.vadThreshold).toBeCloseTo(0.7); + expect(resolved.realtime.silenceDurationMs).toBe(1200); + }); + + it("config values take precedence over env vars", () => { + process.env.REALTIME_VOICE_VOICE = "ash"; + const base = createVoiceCallBaseConfig(); + base.realtime = { enabled: false, voice: "coral", tools: [] }; + const resolved = resolveVoiceCallConfig(base); + expect(resolved.realtime.voice).toBe("coral"); + }); +}); + +describe("validateProviderConfig — realtime mode", () => { + it("rejects realtime.enabled when inboundPolicy is 'disabled'", () => { + const config = createVoiceCallBaseConfig({ provider: "mock" }); + config.realtime = { enabled: true, tools: [] }; + // inboundPolicy defaults to "disabled" in createVoiceCallBaseConfig + const result = validateProviderConfig(config); + expect(result.valid).toBe(false); + expect(result.errors.some((e) => e.includes("inboundPolicy"))).toBe(true); + }); + + it("passes when realtime.enabled with inboundPolicy 'open'", () => { + const config = createVoiceCallBaseConfig({ provider: "mock" }); + config.inboundPolicy = "open"; + config.realtime = { enabled: true, tools: [] }; + const result = validateProviderConfig(config); + expect(result.errors.some((e) => e.includes("inboundPolicy"))).toBe(false); + }); + + it("rejects when both realtime.enabled and streaming.enabled are true", () => { + const config = createVoiceCallBaseConfig({ provider: "mock" }); + config.inboundPolicy = "open"; + config.realtime = { enabled: true, tools: [] }; + config.streaming = { ...config.streaming, enabled: true }; + const result = validateProviderConfig(config); + expect(result.valid).toBe(false); + expect(result.errors.some((e) => e.includes("streaming"))).toBe(true); + }); +}); diff --git a/extensions/voice-call/src/webhook/realtime-handler.test.ts b/extensions/voice-call/src/webhook/realtime-handler.test.ts new file mode 100644 index 00000000000..f4ad179d441 --- /dev/null +++ b/extensions/voice-call/src/webhook/realtime-handler.test.ts @@ -0,0 +1,274 @@ +import http from "node:http"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import type { CallManager } from "../manager.js"; +import type { CallRecord } from "../types.js"; +import type { VoiceCallProvider } from "../providers/base.js"; +import { RealtimeCallHandler } from "./realtime-handler.js"; + +// Minimal realtime config used across tests +const baseRealtimeConfig = { + enabled: true, + voice: "ash" as const, + tools: [] as never[], +}; + +// Fake CallRecord for manager stubs +function makeCallRecord(overrides: Partial = {}): CallRecord { + return { + callId: "call-rt-1", + providerCallId: "CA_test", + provider: "twilio", + direction: "inbound", + state: "answered", + from: "+15550001234", + to: "+15550005678", + startedAt: Date.now(), + transcript: [], + processedEventIds: [], + metadata: { + initialMessage: "Hello! How can I help you today?", + }, + ...overrides, + }; +} + +function makeManager(record?: CallRecord): CallManager { + const storedRecord = record ?? makeCallRecord(); + return { + processEvent: vi.fn(), + getCallByProviderCallId: vi.fn(() => storedRecord), + getCall: vi.fn(() => storedRecord), + } as unknown as CallManager; +} + +function makeProvider(): VoiceCallProvider { + return { + name: "twilio", + verifyWebhook: vi.fn(() => ({ ok: true, verifiedRequestKey: "mock:key" })), + parseWebhookEvent: vi.fn(() => ({ events: [] })), + initiateCall: vi.fn(async () => ({ providerCallId: "CA_test", status: "initiated" as const })), + hangupCall: vi.fn(async () => {}), + playTts: vi.fn(async () => {}), + startListening: vi.fn(async () => {}), + stopListening: vi.fn(async () => {}), + getCallStatus: vi.fn(async () => ({ status: "in-progress" as const, isTerminal: false })), + }; +} + +function makeRequest(url: string, host = "example.ts.net"): http.IncomingMessage { + const req = new http.IncomingMessage(null as never); + req.url = url; + req.method = "POST"; + req.headers = { host }; + return req; +} + +describe("RealtimeCallHandler", () => { + let originalEnv: NodeJS.ProcessEnv; + + beforeEach(() => { + originalEnv = { ...process.env }; + }); + + afterEach(() => { + process.env = { ...originalEnv }; + }); + + // --------------------------------------------------------------------------- + // buildTwiMLPayload + // --------------------------------------------------------------------------- + + describe("buildTwiMLPayload", () => { + it("returns TwiML with wss URL derived from request host", () => { + const handler = new RealtimeCallHandler( + baseRealtimeConfig, + makeManager(), + makeProvider(), + null, + ); + const req = makeRequest("/voice/webhook", "gateway.ts.net"); + const payload = handler.buildTwiMLPayload(req); + + expect(payload.statusCode).toBe(200); + expect(payload.headers?.["Content-Type"]).toBe("text/xml"); + expect(payload.body).toContain(""); + expect(payload.body).toContain(" { + const handler = new RealtimeCallHandler( + baseRealtimeConfig, + makeManager(), + makeProvider(), + null, + ); + const req = makeRequest("/voice/webhook", ""); + const payload = handler.buildTwiMLPayload(req); + + expect(payload.body).toContain("wss://localhost:8443/voice/stream/realtime"); + }); + }); + + // --------------------------------------------------------------------------- + // registerCallInManager — greeting suppression + // --------------------------------------------------------------------------- + + describe("registerCallInManager (via handleCall)", () => { + it("clears metadata.initialMessage so the inboundGreeting TTS path is skipped", () => { + const callRecord = makeCallRecord({ + metadata: { initialMessage: "Hello from config!" }, + }); + const manager = makeManager(callRecord); + + const handler = new RealtimeCallHandler( + baseRealtimeConfig, + manager, + makeProvider(), + null, + ); + + // Access private method via type assertion for unit testing + (handler as unknown as { registerCallInManager: (sid: string) => string }) + .registerCallInManager("CA_test"); + + // call.initiated + call.answered should both have been emitted + expect(vi.mocked(manager.processEvent)).toHaveBeenCalledTimes(2); + const eventTypes = vi.mocked(manager.processEvent).mock.calls.map( + ([e]) => (e as { type: string }).type, + ); + expect(eventTypes).toEqual(["call.initiated", "call.answered"]); + + // initialMessage must be cleared before call.answered fires + expect(callRecord.metadata?.initialMessage).toBeUndefined(); + }); + + it("returns callId from the manager-created call record", () => { + const callRecord = makeCallRecord({ callId: "manager-gen-id" }); + const manager = makeManager(callRecord); + + const handler = new RealtimeCallHandler( + baseRealtimeConfig, + manager, + makeProvider(), + null, + ); + + const result = (handler as unknown as { registerCallInManager: (sid: string) => string }) + .registerCallInManager("CA_test"); + + expect(result).toBe("manager-gen-id"); + }); + + it("falls back to providerCallId when manager has no record", () => { + const manager = { + processEvent: vi.fn(), + getCallByProviderCallId: vi.fn(() => undefined), + } as unknown as CallManager; + + const handler = new RealtimeCallHandler( + baseRealtimeConfig, + manager, + makeProvider(), + null, + ); + + const result = (handler as unknown as { registerCallInManager: (sid: string) => string }) + .registerCallInManager("CA_fallback"); + + expect(result).toBe("CA_fallback"); + }); + }); + + // --------------------------------------------------------------------------- + // Tool handler framework + // --------------------------------------------------------------------------- + + describe("registerToolHandler", () => { + it("routes tool calls to registered handlers and returns their result", async () => { + const handler = new RealtimeCallHandler( + baseRealtimeConfig, + makeManager(), + makeProvider(), + null, + ); + + handler.registerToolHandler("get_time", async () => ({ utc: "2026-03-10T00:00:00Z" })); + + const fakeSubmit = vi.fn(); + const fakeBridge = { submitToolResult: fakeSubmit } as never; + + await ( + handler as unknown as { + executeToolCall: ( + bridge: never, + callId: string, + bridgeCallId: string, + name: string, + args: unknown, + ) => Promise; + } + ).executeToolCall(fakeBridge, "call-1", "bridge-call-1", "get_time", {}); + + expect(fakeSubmit).toHaveBeenCalledWith("bridge-call-1", { utc: "2026-03-10T00:00:00Z" }); + }); + + it("returns an error result for unregistered tool names", async () => { + const handler = new RealtimeCallHandler( + baseRealtimeConfig, + makeManager(), + makeProvider(), + null, + ); + + const fakeSubmit = vi.fn(); + const fakeBridge = { submitToolResult: fakeSubmit } as never; + + await ( + handler as unknown as { + executeToolCall: ( + bridge: never, + callId: string, + bridgeCallId: string, + name: string, + args: unknown, + ) => Promise; + } + ).executeToolCall(fakeBridge, "call-1", "bridge-call-1", "unknown_tool", {}); + + expect(fakeSubmit).toHaveBeenCalledWith("bridge-call-1", { + error: 'Tool "unknown_tool" not available', + }); + }); + + it("returns an error result when a handler throws", async () => { + const handler = new RealtimeCallHandler( + baseRealtimeConfig, + makeManager(), + makeProvider(), + null, + ); + + handler.registerToolHandler("boom", async () => { + throw new Error("handler blew up"); + }); + + const fakeSubmit = vi.fn(); + const fakeBridge = { submitToolResult: fakeSubmit } as never; + + await ( + handler as unknown as { + executeToolCall: ( + bridge: never, + callId: string, + bridgeCallId: string, + name: string, + args: unknown, + ) => Promise; + } + ).executeToolCall(fakeBridge, "call-1", "bridge-call-1", "boom", {}); + + expect(fakeSubmit).toHaveBeenCalledWith("bridge-call-1", { error: "handler blew up" }); + }); + }); +});