voice-call: fix tsgo type errors and formatting
- Import WebSocketServer by named export (not Server alias) from ws - Add input_audio_transcription to RealtimeSessionUpdate interface - Fix z.record() call to pass explicit key schema (zod strict compat) - Cast DeepPartial tools to RealtimeToolConfig[] in normalizeVoiceCallConfig - Update consumeStreamToken test casts to reflect new nullable return type - Apply oxfmt formatting across voice-call extension files Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
1ea1695edc
commit
70bd1ffe92
@ -186,16 +186,16 @@ Realtime mode routes inbound calls directly to the [OpenAI Realtime API](https:/
|
||||
|
||||
```json5
|
||||
{
|
||||
inboundPolicy: "open", // required: realtime needs inbound calls enabled
|
||||
inboundPolicy: "open", // required: realtime needs inbound calls enabled
|
||||
|
||||
realtime: {
|
||||
enabled: true,
|
||||
voice: "alloy", // Realtime API voices: alloy, ash, ballad, cedar, coral,
|
||||
// echo, marin, sage, shimmer, verse
|
||||
voice: "alloy", // Realtime API voices: alloy, ash, ballad, cedar, coral,
|
||||
// echo, marin, sage, shimmer, verse
|
||||
instructions: "You are a helpful assistant.",
|
||||
model: "gpt-4o-mini-realtime-preview", // optional, this is the default
|
||||
temperature: 0.8, // 0–2, optional
|
||||
vadThreshold: 0.5, // voice activity detection sensitivity, 0–1, optional
|
||||
model: "gpt-4o-mini-realtime-preview", // optional, this is the default
|
||||
temperature: 0.8, // 0–2, optional
|
||||
vadThreshold: 0.5, // voice activity detection sensitivity, 0–1, optional
|
||||
silenceDurationMs: 500, // ms of silence before end-of-turn, optional
|
||||
},
|
||||
}
|
||||
@ -205,15 +205,15 @@ Realtime mode routes inbound calls directly to the [OpenAI Realtime API](https:/
|
||||
|
||||
All `realtime.*` fields can be set via environment variables (config takes precedence):
|
||||
|
||||
| Env var | Config field |
|
||||
|---|---|
|
||||
| `REALTIME_VOICE_ENABLED=true` | `realtime.enabled` |
|
||||
| `REALTIME_VOICE_MODEL` | `realtime.model` |
|
||||
| `REALTIME_VOICE_VOICE` | `realtime.voice` |
|
||||
| `REALTIME_VOICE_INSTRUCTIONS` | `realtime.instructions` |
|
||||
| `REALTIME_VOICE_TEMPERATURE` | `realtime.temperature` |
|
||||
| `VAD_THRESHOLD` | `realtime.vadThreshold` |
|
||||
| `SILENCE_DURATION_MS` | `realtime.silenceDurationMs` |
|
||||
| Env var | Config field |
|
||||
| ----------------------------- | ---------------------------- |
|
||||
| `REALTIME_VOICE_ENABLED=true` | `realtime.enabled` |
|
||||
| `REALTIME_VOICE_MODEL` | `realtime.model` |
|
||||
| `REALTIME_VOICE_VOICE` | `realtime.voice` |
|
||||
| `REALTIME_VOICE_INSTRUCTIONS` | `realtime.instructions` |
|
||||
| `REALTIME_VOICE_TEMPERATURE` | `realtime.temperature` |
|
||||
| `VAD_THRESHOLD` | `realtime.vadThreshold` |
|
||||
| `SILENCE_DURATION_MS` | `realtime.silenceDurationMs` |
|
||||
|
||||
### How it works
|
||||
|
||||
|
||||
@ -427,7 +427,18 @@
|
||||
},
|
||||
"voice": {
|
||||
"type": "string",
|
||||
"enum": ["alloy", "ash", "ballad", "cedar", "coral", "echo", "marin", "sage", "shimmer", "verse"]
|
||||
"enum": [
|
||||
"alloy",
|
||||
"ash",
|
||||
"ballad",
|
||||
"cedar",
|
||||
"coral",
|
||||
"echo",
|
||||
"marin",
|
||||
"sage",
|
||||
"shimmer",
|
||||
"verse"
|
||||
]
|
||||
},
|
||||
"instructions": {
|
||||
"type": "string"
|
||||
|
||||
@ -226,7 +226,18 @@ describe("VoiceCallRealtimeConfigSchema", () => {
|
||||
});
|
||||
|
||||
it("accepts all valid Realtime API voice names", () => {
|
||||
const voices = ["alloy", "ash", "ballad", "cedar", "coral", "echo", "marin", "sage", "shimmer", "verse"];
|
||||
const voices = [
|
||||
"alloy",
|
||||
"ash",
|
||||
"ballad",
|
||||
"cedar",
|
||||
"coral",
|
||||
"echo",
|
||||
"marin",
|
||||
"sage",
|
||||
"shimmer",
|
||||
"verse",
|
||||
];
|
||||
for (const voice of voices) {
|
||||
expect(() => VoiceCallRealtimeConfigSchema.parse({ voice })).not.toThrow();
|
||||
}
|
||||
|
||||
@ -215,7 +215,7 @@ export const RealtimeToolSchema = z
|
||||
description: z.string(),
|
||||
parameters: z.object({
|
||||
type: z.literal("object"),
|
||||
properties: z.record(z.unknown()),
|
||||
properties: z.record(z.string(), z.unknown()),
|
||||
required: z.array(z.string()).optional(),
|
||||
}),
|
||||
})
|
||||
@ -230,7 +230,18 @@ export const VoiceCallRealtimeConfigSchema = z
|
||||
model: z.string().optional(),
|
||||
/** Voice for AI speech output (env: REALTIME_VOICE_VOICE) */
|
||||
voice: z
|
||||
.enum(["alloy", "ash", "ballad", "cedar", "coral", "echo", "marin", "sage", "shimmer", "verse"])
|
||||
.enum([
|
||||
"alloy",
|
||||
"ash",
|
||||
"ballad",
|
||||
"cedar",
|
||||
"coral",
|
||||
"echo",
|
||||
"marin",
|
||||
"sage",
|
||||
"shimmer",
|
||||
"verse",
|
||||
])
|
||||
.optional(),
|
||||
/** System instructions / persona (env: REALTIME_VOICE_INSTRUCTIONS) */
|
||||
instructions: z.string().optional(),
|
||||
@ -453,7 +464,10 @@ export function normalizeVoiceCallConfig(config: VoiceCallConfigInput): VoiceCal
|
||||
realtime: {
|
||||
...defaults.realtime,
|
||||
...config.realtime,
|
||||
tools: config.realtime?.tools ?? defaults.realtime.tools,
|
||||
// Cast: DeepPartial makes tool fields appear optional in the input type,
|
||||
// but Zod validates the full shape before it reaches here.
|
||||
tools:
|
||||
(config.realtime?.tools as RealtimeToolConfig[] | undefined) ?? defaults.realtime.tools,
|
||||
},
|
||||
stt: { ...defaults.stt, ...config.stt },
|
||||
tts: normalizeVoiceCallTtsConfig(defaults.tts, config.tts),
|
||||
@ -519,7 +533,8 @@ export function resolveVoiceCallConfig(config: VoiceCallConfigInput): VoiceCallC
|
||||
resolved.realtime.model = resolved.realtime.model ?? process.env.REALTIME_VOICE_MODEL;
|
||||
resolved.realtime.voice =
|
||||
(resolved.realtime.voice ??
|
||||
(process.env.REALTIME_VOICE_VOICE as VoiceCallRealtimeConfig["voice"])) || undefined;
|
||||
(process.env.REALTIME_VOICE_VOICE as VoiceCallRealtimeConfig["voice"])) ||
|
||||
undefined;
|
||||
resolved.realtime.instructions =
|
||||
resolved.realtime.instructions ?? process.env.REALTIME_VOICE_INSTRUCTIONS;
|
||||
if (resolved.realtime.temperature == null && process.env.REALTIME_VOICE_TEMPERATURE) {
|
||||
@ -605,8 +620,8 @@ export function validateProviderConfig(config: VoiceCallConfig): {
|
||||
// "open" or "allowlist" are the correct choices when realtime.enabled = true.
|
||||
if (config.realtime?.enabled && config.inboundPolicy === "disabled") {
|
||||
errors.push(
|
||||
"plugins.entries.voice-call.config.inboundPolicy must not be \"disabled\" when realtime.enabled is true " +
|
||||
"(use \"open\" or \"allowlist\" — realtime calls are answered before policy can reject them)",
|
||||
'plugins.entries.voice-call.config.inboundPolicy must not be "disabled" when realtime.enabled is true ' +
|
||||
'(use "open" or "allowlist" — realtime calls are answered before policy can reject them)',
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@ -450,9 +450,7 @@ export class OpenAIRealtimeVoiceBridge {
|
||||
private async attemptReconnect(): Promise<void> {
|
||||
if (this.intentionallyClosed) return;
|
||||
|
||||
if (
|
||||
this.reconnectAttempts >= OpenAIRealtimeVoiceBridge.MAX_RECONNECT_ATTEMPTS
|
||||
) {
|
||||
if (this.reconnectAttempts >= OpenAIRealtimeVoiceBridge.MAX_RECONNECT_ATTEMPTS) {
|
||||
const err = new Error(
|
||||
`[RealtimeVoice] Max reconnect attempts (${OpenAIRealtimeVoiceBridge.MAX_RECONNECT_ATTEMPTS}) exceeded`,
|
||||
);
|
||||
@ -464,8 +462,7 @@ export class OpenAIRealtimeVoiceBridge {
|
||||
|
||||
this.reconnectAttempts++;
|
||||
const delay =
|
||||
OpenAIRealtimeVoiceBridge.BASE_RECONNECT_DELAY_MS *
|
||||
2 ** (this.reconnectAttempts - 1);
|
||||
OpenAIRealtimeVoiceBridge.BASE_RECONNECT_DELAY_MS * 2 ** (this.reconnectAttempts - 1);
|
||||
|
||||
console.log(
|
||||
`[RealtimeVoice] Reconnecting (${this.reconnectAttempts}/${OpenAIRealtimeVoiceBridge.MAX_RECONNECT_ATTEMPTS}) in ${delay}ms...`,
|
||||
@ -870,6 +867,7 @@ interface RealtimeSessionUpdate {
|
||||
create_response: boolean;
|
||||
};
|
||||
temperature: number;
|
||||
input_audio_transcription?: { model: string };
|
||||
tools?: RealtimeTool[];
|
||||
tool_choice?: string;
|
||||
};
|
||||
|
||||
@ -11,8 +11,8 @@ import type { TelephonyTtsRuntime } from "./telephony-tts.js";
|
||||
import { createTelephonyTtsProvider } from "./telephony-tts.js";
|
||||
import { startTunnel, type TunnelResult } from "./tunnel.js";
|
||||
import { VoiceCallWebhookServer } from "./webhook.js";
|
||||
import { cleanupTailscaleExposure, setupTailscaleExposure } from "./webhook/tailscale.js";
|
||||
import { RealtimeCallHandler } from "./webhook/realtime-handler.js";
|
||||
import { cleanupTailscaleExposure, setupTailscaleExposure } from "./webhook/tailscale.js";
|
||||
|
||||
export type VoiceCallRuntime = {
|
||||
config: VoiceCallConfig;
|
||||
|
||||
@ -14,8 +14,8 @@ import type { VoiceCallProvider } from "./providers/base.js";
|
||||
import { OpenAIRealtimeSTTProvider } from "./providers/stt-openai-realtime.js";
|
||||
import type { TwilioProvider } from "./providers/twilio.js";
|
||||
import type { NormalizedEvent, WebhookContext } from "./types.js";
|
||||
import { startStaleCallReaper } from "./webhook/stale-call-reaper.js";
|
||||
import type { RealtimeCallHandler } from "./webhook/realtime-handler.js";
|
||||
import { startStaleCallReaper } from "./webhook/stale-call-reaper.js";
|
||||
|
||||
const MAX_WEBHOOK_BODY_BYTES = 1024 * 1024;
|
||||
|
||||
|
||||
@ -1,8 +1,8 @@
|
||||
import http from "node:http";
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import type { CallManager } from "../manager.js";
|
||||
import type { CallRecord } from "../types.js";
|
||||
import type { VoiceCallProvider } from "../providers/base.js";
|
||||
import type { CallRecord } from "../types.js";
|
||||
import { RealtimeCallHandler } from "./realtime-handler.js";
|
||||
|
||||
/** Extract the stream token from a TwiML body string. */
|
||||
@ -144,10 +144,14 @@ describe("RealtimeCallHandler", () => {
|
||||
null,
|
||||
);
|
||||
const issue = (handler as unknown as { issueStreamToken: () => string }).issueStreamToken;
|
||||
const consume = (handler as unknown as { consumeStreamToken: (t: string) => boolean }).consumeStreamToken;
|
||||
const consume = (
|
||||
handler as unknown as {
|
||||
consumeStreamToken: (t: string) => { from?: string; to?: string } | null;
|
||||
}
|
||||
).consumeStreamToken;
|
||||
const token = issue.call(handler);
|
||||
expect(consume.call(handler, token)).toBe(true);
|
||||
expect(consume.call(handler, token)).toBe(false);
|
||||
expect(consume.call(handler, token)).not.toBeNull();
|
||||
expect(consume.call(handler, token)).toBeNull();
|
||||
});
|
||||
|
||||
it("rejects unknown tokens", () => {
|
||||
@ -157,8 +161,12 @@ describe("RealtimeCallHandler", () => {
|
||||
makeProvider(),
|
||||
null,
|
||||
);
|
||||
const consume = (handler as unknown as { consumeStreamToken: (t: string) => boolean }).consumeStreamToken;
|
||||
expect(consume.call(handler, "not-a-real-token")).toBe(false);
|
||||
const consume = (
|
||||
handler as unknown as {
|
||||
consumeStreamToken: (t: string) => { from?: string; to?: string } | null;
|
||||
}
|
||||
).consumeStreamToken;
|
||||
expect(consume.call(handler, "not-a-real-token")).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
@ -173,22 +181,18 @@ describe("RealtimeCallHandler", () => {
|
||||
});
|
||||
const manager = makeManager(callRecord);
|
||||
|
||||
const handler = new RealtimeCallHandler(
|
||||
baseRealtimeConfig,
|
||||
manager,
|
||||
makeProvider(),
|
||||
null,
|
||||
);
|
||||
const handler = new RealtimeCallHandler(baseRealtimeConfig, manager, makeProvider(), null);
|
||||
|
||||
// Access private method via type assertion for unit testing
|
||||
(handler as unknown as { registerCallInManager: (sid: string) => string })
|
||||
.registerCallInManager("CA_test");
|
||||
(
|
||||
handler as unknown as { registerCallInManager: (sid: string) => string }
|
||||
).registerCallInManager("CA_test");
|
||||
|
||||
// call.initiated + call.answered should both have been emitted
|
||||
expect(vi.mocked(manager.processEvent)).toHaveBeenCalledTimes(2);
|
||||
const eventTypes = vi.mocked(manager.processEvent).mock.calls.map(
|
||||
([e]) => (e as { type: string }).type,
|
||||
);
|
||||
const eventTypes = vi
|
||||
.mocked(manager.processEvent)
|
||||
.mock.calls.map(([e]) => (e as { type: string }).type);
|
||||
expect(eventTypes).toEqual(["call.initiated", "call.answered"]);
|
||||
|
||||
// initialMessage must be cleared before call.answered fires
|
||||
@ -199,15 +203,11 @@ describe("RealtimeCallHandler", () => {
|
||||
const callRecord = makeCallRecord({ callId: "manager-gen-id" });
|
||||
const manager = makeManager(callRecord);
|
||||
|
||||
const handler = new RealtimeCallHandler(
|
||||
baseRealtimeConfig,
|
||||
manager,
|
||||
makeProvider(),
|
||||
null,
|
||||
);
|
||||
const handler = new RealtimeCallHandler(baseRealtimeConfig, manager, makeProvider(), null);
|
||||
|
||||
const result = (handler as unknown as { registerCallInManager: (sid: string) => string })
|
||||
.registerCallInManager("CA_test");
|
||||
const result = (
|
||||
handler as unknown as { registerCallInManager: (sid: string) => string }
|
||||
).registerCallInManager("CA_test");
|
||||
|
||||
expect(result).toBe("manager-gen-id");
|
||||
});
|
||||
@ -218,15 +218,11 @@ describe("RealtimeCallHandler", () => {
|
||||
getCallByProviderCallId: vi.fn(() => undefined),
|
||||
} as unknown as CallManager;
|
||||
|
||||
const handler = new RealtimeCallHandler(
|
||||
baseRealtimeConfig,
|
||||
manager,
|
||||
makeProvider(),
|
||||
null,
|
||||
);
|
||||
const handler = new RealtimeCallHandler(baseRealtimeConfig, manager, makeProvider(), null);
|
||||
|
||||
const result = (handler as unknown as { registerCallInManager: (sid: string) => string })
|
||||
.registerCallInManager("CA_fallback");
|
||||
const result = (
|
||||
handler as unknown as { registerCallInManager: (sid: string) => string }
|
||||
).registerCallInManager("CA_fallback");
|
||||
|
||||
expect(result).toBe("CA_fallback");
|
||||
});
|
||||
|
||||
@ -1,15 +1,15 @@
|
||||
import http from "node:http";
|
||||
import { randomUUID } from "node:crypto";
|
||||
import http from "node:http";
|
||||
import type { Duplex } from "node:stream";
|
||||
import { type WebSocket, Server as WebSocketServer } from "ws";
|
||||
import WebSocket, { WebSocketServer } from "ws";
|
||||
import type { VoiceCallRealtimeConfig } from "../config.js";
|
||||
import type { CoreConfig } from "../core-bridge.js";
|
||||
import type { CallManager } from "../manager.js";
|
||||
import type { VoiceCallProvider } from "../providers/base.js";
|
||||
import {
|
||||
OpenAIRealtimeVoiceBridge,
|
||||
type RealtimeTool,
|
||||
} from "../providers/openai-realtime-voice.js";
|
||||
import type { VoiceCallProvider } from "../providers/base.js";
|
||||
import type { NormalizedEvent } from "../types.js";
|
||||
import type { WebhookResponsePayload } from "../webhook.js";
|
||||
|
||||
@ -124,7 +124,9 @@ export class RealtimeCallHandler {
|
||||
to: params?.get("To") ?? undefined,
|
||||
});
|
||||
const wsUrl = `wss://${host}/voice/stream/realtime?token=${token}`;
|
||||
console.log(`[voice-call] Returning realtime TwiML with WebSocket: wss://${host}/voice/stream/realtime`);
|
||||
console.log(
|
||||
`[voice-call] Returning realtime TwiML with WebSocket: wss://${host}/voice/stream/realtime`,
|
||||
);
|
||||
const twiml = `<?xml version="1.0" encoding="UTF-8"?>
|
||||
<Response>
|
||||
<Connect>
|
||||
@ -186,13 +188,17 @@ export class RealtimeCallHandler {
|
||||
): OpenAIRealtimeVoiceBridge | null {
|
||||
const apiKey = this.openaiApiKey ?? process.env.OPENAI_API_KEY;
|
||||
if (!apiKey) {
|
||||
console.error("[voice-call] No OpenAI API key for realtime call (set streaming.openaiApiKey or OPENAI_API_KEY)");
|
||||
console.error(
|
||||
"[voice-call] No OpenAI API key for realtime call (set streaming.openaiApiKey or OPENAI_API_KEY)",
|
||||
);
|
||||
ws.close(1011, "No API key");
|
||||
return null;
|
||||
}
|
||||
|
||||
const callId = this.registerCallInManager(callSid, callerMeta);
|
||||
console.log(`[voice-call] Realtime call: streamSid=${streamSid}, callSid=${callSid}, callId=${callId}`);
|
||||
console.log(
|
||||
`[voice-call] Realtime call: streamSid=${streamSid}, callSid=${callSid}, callId=${callId}`,
|
||||
);
|
||||
|
||||
// Declare as null first so closures can capture the reference before bridge is created.
|
||||
// By the time any callback fires, bridge will be fully assigned.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user