refactor: normalize voice-call runtime defaults
This commit is contained in:
parent
5759b93dda
commit
3087893ef9
@ -1,5 +1,10 @@
|
||||
import { afterEach, beforeEach, describe, expect, it } from "vitest";
|
||||
import { validateProviderConfig, resolveVoiceCallConfig, type VoiceCallConfig } from "./config.js";
|
||||
import {
|
||||
validateProviderConfig,
|
||||
normalizeVoiceCallConfig,
|
||||
resolveVoiceCallConfig,
|
||||
type VoiceCallConfig,
|
||||
} from "./config.js";
|
||||
import { createVoiceCallBaseConfig } from "./test-fixtures.js";
|
||||
|
||||
function createBaseConfig(provider: "telnyx" | "twilio" | "plivo" | "mock"): VoiceCallConfig {
|
||||
@ -166,3 +171,22 @@ describe("validateProviderConfig", () => {
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("normalizeVoiceCallConfig", () => {
|
||||
it("fills nested runtime defaults from a partial config boundary", () => {
|
||||
const normalized = normalizeVoiceCallConfig({
|
||||
enabled: true,
|
||||
provider: "mock",
|
||||
streaming: {
|
||||
enabled: true,
|
||||
streamPath: "/custom-stream",
|
||||
},
|
||||
});
|
||||
|
||||
expect(normalized.serve.path).toBe("/voice/webhook");
|
||||
expect(normalized.streaming.streamPath).toBe("/custom-stream");
|
||||
expect(normalized.streaming.sttModel).toBe("gpt-4o-transcribe");
|
||||
expect(normalized.tunnel.provider).toBe("none");
|
||||
expect(normalized.webhookSecurity.allowedHosts).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
@ -350,17 +350,53 @@ export const VoiceCallConfigSchema = z
|
||||
.strict();
|
||||
|
||||
export type VoiceCallConfig = z.infer<typeof VoiceCallConfigSchema>;
|
||||
type DeepPartial<T> =
|
||||
T extends Array<infer U>
|
||||
? DeepPartial<U>[]
|
||||
: T extends object
|
||||
? { [K in keyof T]?: DeepPartial<T[K]> }
|
||||
: T;
|
||||
export type VoiceCallConfigInput = DeepPartial<VoiceCallConfig>;
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Configuration Helpers
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
const DEFAULT_VOICE_CALL_CONFIG = VoiceCallConfigSchema.parse({});
|
||||
|
||||
function cloneDefaultVoiceCallConfig(): VoiceCallConfig {
|
||||
return structuredClone(DEFAULT_VOICE_CALL_CONFIG);
|
||||
}
|
||||
|
||||
export function normalizeVoiceCallConfig(config: VoiceCallConfigInput): VoiceCallConfig {
|
||||
const defaults = cloneDefaultVoiceCallConfig();
|
||||
return {
|
||||
...defaults,
|
||||
...config,
|
||||
allowFrom: config.allowFrom ?? defaults.allowFrom,
|
||||
outbound: { ...defaults.outbound, ...config.outbound },
|
||||
serve: { ...defaults.serve, ...config.serve },
|
||||
tailscale: { ...defaults.tailscale, ...config.tailscale },
|
||||
tunnel: { ...defaults.tunnel, ...config.tunnel },
|
||||
webhookSecurity: {
|
||||
...defaults.webhookSecurity,
|
||||
...config.webhookSecurity,
|
||||
allowedHosts: config.webhookSecurity?.allowedHosts ?? defaults.webhookSecurity.allowedHosts,
|
||||
trustedProxyIPs:
|
||||
config.webhookSecurity?.trustedProxyIPs ?? defaults.webhookSecurity.trustedProxyIPs,
|
||||
},
|
||||
streaming: { ...defaults.streaming, ...config.streaming },
|
||||
stt: { ...defaults.stt, ...config.stt },
|
||||
tts: config.tts ?? defaults.tts,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolves the configuration by merging environment variables into missing fields.
|
||||
* Returns a new configuration object with environment variables applied.
|
||||
*/
|
||||
export function resolveVoiceCallConfig(config: VoiceCallConfig): VoiceCallConfig {
|
||||
const resolved = JSON.parse(JSON.stringify(config)) as VoiceCallConfig;
|
||||
export function resolveVoiceCallConfig(config: VoiceCallConfigInput): VoiceCallConfig {
|
||||
const resolved = normalizeVoiceCallConfig(config);
|
||||
|
||||
// Telnyx
|
||||
if (resolved.provider === "telnyx") {
|
||||
@ -405,7 +441,7 @@ export function resolveVoiceCallConfig(config: VoiceCallConfig): VoiceCallConfig
|
||||
resolved.webhookSecurity.trustForwardingHeaders ?? false;
|
||||
resolved.webhookSecurity.trustedProxyIPs = resolved.webhookSecurity.trustedProxyIPs ?? [];
|
||||
|
||||
return resolved;
|
||||
return normalizeVoiceCallConfig(resolved);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@ -3,6 +3,8 @@ import type { OpenAITTSConfig } from "./tts-openai.js";
|
||||
import { OpenAITTSProvider } from "./tts-openai.js";
|
||||
|
||||
type ProviderInternals = {
|
||||
model: string;
|
||||
voice: string;
|
||||
speed: number;
|
||||
};
|
||||
|
||||
@ -27,4 +29,15 @@ describe("OpenAITTSProvider constructor defaults", () => {
|
||||
|
||||
expect(provider.speed).toBe(1.0);
|
||||
});
|
||||
|
||||
it("treats blank model and voice overrides as unset", () => {
|
||||
const provider = readProviderInternals({
|
||||
apiKey: "sk-test", // pragma: allowlist secret
|
||||
model: " ",
|
||||
voice: "",
|
||||
});
|
||||
|
||||
expect(provider.model).toBe("gpt-4o-mini-tts");
|
||||
expect(provider.voice).toBe("coral");
|
||||
});
|
||||
});
|
||||
|
||||
@ -66,6 +66,11 @@ export const OPENAI_TTS_VOICES = [
|
||||
|
||||
export type OpenAITTSVoice = (typeof OPENAI_TTS_VOICES)[number];
|
||||
|
||||
function trimToUndefined(value: string | undefined): string | undefined {
|
||||
const trimmed = value?.trim();
|
||||
return trimmed ? trimmed : undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* OpenAI TTS Provider for generating speech audio.
|
||||
*/
|
||||
@ -77,13 +82,14 @@ export class OpenAITTSProvider {
|
||||
private instructions?: string;
|
||||
|
||||
constructor(config: OpenAITTSConfig = {}) {
|
||||
this.apiKey = config.apiKey || process.env.OPENAI_API_KEY || "";
|
||||
this.apiKey =
|
||||
trimToUndefined(config.apiKey) ?? trimToUndefined(process.env.OPENAI_API_KEY) ?? "";
|
||||
// Default to gpt-4o-mini-tts for intelligent realtime applications
|
||||
this.model = config.model || "gpt-4o-mini-tts";
|
||||
this.model = trimToUndefined(config.model) ?? "gpt-4o-mini-tts";
|
||||
// Default to coral - good balance of quality and natural tone
|
||||
this.voice = (config.voice as OpenAITTSVoice) || "coral";
|
||||
this.voice = (trimToUndefined(config.voice) as OpenAITTSVoice | undefined) ?? "coral";
|
||||
this.speed = config.speed ?? 1.0;
|
||||
this.instructions = config.instructions;
|
||||
this.instructions = trimToUndefined(config.instructions);
|
||||
|
||||
if (!this.apiKey) {
|
||||
throw new Error("OpenAI API key required (set OPENAI_API_KEY or pass apiKey)");
|
||||
@ -105,7 +111,7 @@ export class OpenAITTSProvider {
|
||||
};
|
||||
|
||||
// Add instructions if using gpt-4o-mini-tts model
|
||||
const effectiveInstructions = instructions || this.instructions;
|
||||
const effectiveInstructions = trimToUndefined(instructions) ?? this.instructions;
|
||||
if (effectiveInstructions && this.model.includes("gpt-4o-mini-tts")) {
|
||||
body.instructions = effectiveInstructions;
|
||||
}
|
||||
|
||||
@ -274,6 +274,32 @@ describe("VoiceCallWebhookServer replay handling", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("VoiceCallWebhookServer response normalization", () => {
|
||||
it("preserves explicit empty provider response bodies", async () => {
|
||||
const responseProvider: VoiceCallProvider = {
|
||||
...provider,
|
||||
parseWebhookEvent: () => ({
|
||||
events: [],
|
||||
statusCode: 204,
|
||||
providerResponseBody: "",
|
||||
}),
|
||||
};
|
||||
const { manager } = createManager([]);
|
||||
const config = createConfig({ serve: { port: 0, bind: "127.0.0.1", path: "/voice/webhook" } });
|
||||
const server = new VoiceCallWebhookServer(config, manager, responseProvider);
|
||||
|
||||
try {
|
||||
const baseUrl = await server.start();
|
||||
const response = await postWebhookForm(server, baseUrl, "CallSid=CA123&SpeechResult=hello");
|
||||
|
||||
expect(response.status).toBe(204);
|
||||
expect(await response.text()).toBe("");
|
||||
} finally {
|
||||
await server.stop();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("VoiceCallWebhookServer start idempotency", () => {
|
||||
it("returns existing URL when start() is called twice without stop()", async () => {
|
||||
const { manager } = createManager([]);
|
||||
|
||||
@ -5,7 +5,7 @@ import {
|
||||
readRequestBodyWithLimit,
|
||||
requestBodyErrorToText,
|
||||
} from "openclaw/plugin-sdk/voice-call";
|
||||
import type { VoiceCallConfig } from "./config.js";
|
||||
import { normalizeVoiceCallConfig, type VoiceCallConfig } from "./config.js";
|
||||
import type { CoreConfig } from "./core-bridge.js";
|
||||
import type { CallManager } from "./manager.js";
|
||||
import type { MediaStreamConfig } from "./media-stream.js";
|
||||
@ -24,6 +24,26 @@ type WebhookResponsePayload = {
|
||||
headers?: Record<string, string>;
|
||||
};
|
||||
|
||||
function buildRequestUrl(
|
||||
requestUrl: string | undefined,
|
||||
requestHost: string | undefined,
|
||||
fallbackHost = "localhost",
|
||||
): URL {
|
||||
return new URL(requestUrl ?? "/", `http://${requestHost ?? fallbackHost}`);
|
||||
}
|
||||
|
||||
function normalizeWebhookResponse(parsed: {
|
||||
statusCode?: number;
|
||||
providerResponseHeaders?: Record<string, string>;
|
||||
providerResponseBody?: string;
|
||||
}): WebhookResponsePayload {
|
||||
return {
|
||||
statusCode: parsed.statusCode ?? 200,
|
||||
headers: parsed.providerResponseHeaders,
|
||||
body: parsed.providerResponseBody ?? "OK",
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* HTTP server for receiving voice call webhooks from providers.
|
||||
* Supports WebSocket upgrades for media streams when streaming is enabled.
|
||||
@ -46,13 +66,13 @@ export class VoiceCallWebhookServer {
|
||||
provider: VoiceCallProvider,
|
||||
coreConfig?: CoreConfig,
|
||||
) {
|
||||
this.config = config;
|
||||
this.config = normalizeVoiceCallConfig(config);
|
||||
this.manager = manager;
|
||||
this.provider = provider;
|
||||
this.coreConfig = coreConfig ?? null;
|
||||
|
||||
// Initialize media stream handler if streaming is enabled
|
||||
if (config.streaming?.enabled) {
|
||||
if (this.config.streaming.enabled) {
|
||||
this.initializeMediaStreaming();
|
||||
}
|
||||
}
|
||||
@ -68,7 +88,8 @@ export class VoiceCallWebhookServer {
|
||||
* Initialize media streaming with OpenAI Realtime STT.
|
||||
*/
|
||||
private initializeMediaStreaming(): void {
|
||||
const apiKey = this.config.streaming?.openaiApiKey || process.env.OPENAI_API_KEY;
|
||||
const streaming = this.config.streaming;
|
||||
const apiKey = streaming.openaiApiKey ?? process.env.OPENAI_API_KEY;
|
||||
|
||||
if (!apiKey) {
|
||||
console.warn("[voice-call] Streaming enabled but no OpenAI API key found");
|
||||
@ -77,17 +98,17 @@ export class VoiceCallWebhookServer {
|
||||
|
||||
const sttProvider = new OpenAIRealtimeSTTProvider({
|
||||
apiKey,
|
||||
model: this.config.streaming?.sttModel,
|
||||
silenceDurationMs: this.config.streaming?.silenceDurationMs,
|
||||
vadThreshold: this.config.streaming?.vadThreshold,
|
||||
model: streaming.sttModel,
|
||||
silenceDurationMs: streaming.silenceDurationMs,
|
||||
vadThreshold: streaming.vadThreshold,
|
||||
});
|
||||
|
||||
const streamConfig: MediaStreamConfig = {
|
||||
sttProvider,
|
||||
preStartTimeoutMs: this.config.streaming?.preStartTimeoutMs,
|
||||
maxPendingConnections: this.config.streaming?.maxPendingConnections,
|
||||
maxPendingConnectionsPerIp: this.config.streaming?.maxPendingConnectionsPerIp,
|
||||
maxConnections: this.config.streaming?.maxConnections,
|
||||
preStartTimeoutMs: streaming.preStartTimeoutMs,
|
||||
maxPendingConnections: streaming.maxPendingConnections,
|
||||
maxPendingConnectionsPerIp: streaming.maxPendingConnectionsPerIp,
|
||||
maxConnections: streaming.maxConnections,
|
||||
shouldAcceptStream: ({ callId, token }) => {
|
||||
const call = this.manager.getCallByProviderCallId(callId);
|
||||
if (!call) {
|
||||
@ -190,7 +211,7 @@ export class VoiceCallWebhookServer {
|
||||
*/
|
||||
async start(): Promise<string> {
|
||||
const { port, bind, path: webhookPath } = this.config.serve;
|
||||
const streamPath = this.config.streaming?.streamPath || "/voice/stream";
|
||||
const streamPath = this.config.streaming.streamPath;
|
||||
|
||||
// Guard: if a server is already listening, return the existing URL.
|
||||
// This prevents EADDRINUSE when start() is called more than once on the
|
||||
@ -280,8 +301,7 @@ export class VoiceCallWebhookServer {
|
||||
|
||||
private getUpgradePathname(request: http.IncomingMessage): string | null {
|
||||
try {
|
||||
const host = request.headers.host || "localhost";
|
||||
return new URL(request.url || "/", `http://${host}`).pathname;
|
||||
return buildRequestUrl(request.url, request.headers.host).pathname;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
@ -322,7 +342,7 @@ export class VoiceCallWebhookServer {
|
||||
req: http.IncomingMessage,
|
||||
webhookPath: string,
|
||||
): Promise<WebhookResponsePayload> {
|
||||
const url = new URL(req.url || "/", `http://${req.headers.host}`);
|
||||
const url = buildRequestUrl(req.url, req.headers.host);
|
||||
|
||||
if (url.pathname === "/voice/hold-music") {
|
||||
return {
|
||||
@ -360,7 +380,7 @@ export class VoiceCallWebhookServer {
|
||||
const ctx: WebhookContext = {
|
||||
headers: req.headers as Record<string, string | string[] | undefined>,
|
||||
rawBody: body,
|
||||
url: `http://${req.headers.host}${req.url}`,
|
||||
url: url.toString(),
|
||||
method: "POST",
|
||||
query: Object.fromEntries(url.searchParams),
|
||||
remoteAddress: req.socket.remoteAddress ?? undefined,
|
||||
@ -386,11 +406,7 @@ export class VoiceCallWebhookServer {
|
||||
this.processParsedEvents(parsed.events);
|
||||
}
|
||||
|
||||
return {
|
||||
statusCode: parsed.statusCode || 200,
|
||||
headers: parsed.providerResponseHeaders,
|
||||
body: parsed.providerResponseBody || "OK",
|
||||
};
|
||||
return normalizeWebhookResponse(parsed);
|
||||
}
|
||||
|
||||
private processParsedEvents(events: NormalizedEvent[]): void {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user