refactor: normalize voice-call runtime defaults

2026-03-08 02:49:40 +00:00 · 2026-03-08 02:49:40 +00:00 · 3087893ef9
commit 3087893ef9
parent 5759b93dda
6 changed files with 151 additions and 30 deletions
--- a/extensions/voice-call/src/config.test.ts
+++ b/extensions/voice-call/src/config.test.ts
@ -1,5 +1,10 @@
 import { afterEach, beforeEach, describe, expect, it } from "vitest";
-import { validateProviderConfig, resolveVoiceCallConfig, type VoiceCallConfig } from "./config.js";
+import {
+  validateProviderConfig,
+  normalizeVoiceCallConfig,
+  resolveVoiceCallConfig,
+  type VoiceCallConfig,
+} from "./config.js";
 import { createVoiceCallBaseConfig } from "./test-fixtures.js";

 function createBaseConfig(provider: "telnyx" | "twilio" | "plivo" | "mock"): VoiceCallConfig {
@ -166,3 +171,22 @@ describe("validateProviderConfig", () => {
    });
  });
 });
+
+describe("normalizeVoiceCallConfig", () => {
+  it("fills nested runtime defaults from a partial config boundary", () => {
+    const normalized = normalizeVoiceCallConfig({
+      enabled: true,
+      provider: "mock",
+      streaming: {
+        enabled: true,
+        streamPath: "/custom-stream",
+      },
+    });
+
+    expect(normalized.serve.path).toBe("/voice/webhook");
+    expect(normalized.streaming.streamPath).toBe("/custom-stream");
+    expect(normalized.streaming.sttModel).toBe("gpt-4o-transcribe");
+    expect(normalized.tunnel.provider).toBe("none");
+    expect(normalized.webhookSecurity.allowedHosts).toEqual([]);
+  });
+});
--- a/extensions/voice-call/src/config.ts
+++ b/extensions/voice-call/src/config.ts
@ -350,17 +350,53 @@ export const VoiceCallConfigSchema = z
  .strict();

 export type VoiceCallConfig = z.infer<typeof VoiceCallConfigSchema>;
+type DeepPartial<T> =
+  T extends Array<infer U>
+    ? DeepPartial<U>[]
+    : T extends object
+      ? { [K in keyof T]?: DeepPartial<T[K]> }
+      : T;
+export type VoiceCallConfigInput = DeepPartial<VoiceCallConfig>;

 // -----------------------------------------------------------------------------
 // Configuration Helpers
 // -----------------------------------------------------------------------------

+const DEFAULT_VOICE_CALL_CONFIG = VoiceCallConfigSchema.parse({});
+
+function cloneDefaultVoiceCallConfig(): VoiceCallConfig {
+  return structuredClone(DEFAULT_VOICE_CALL_CONFIG);
+}
+
+export function normalizeVoiceCallConfig(config: VoiceCallConfigInput): VoiceCallConfig {
+  const defaults = cloneDefaultVoiceCallConfig();
+  return {
+    ...defaults,
+    ...config,
+    allowFrom: config.allowFrom ?? defaults.allowFrom,
+    outbound: { ...defaults.outbound, ...config.outbound },
+    serve: { ...defaults.serve, ...config.serve },
+    tailscale: { ...defaults.tailscale, ...config.tailscale },
+    tunnel: { ...defaults.tunnel, ...config.tunnel },
+    webhookSecurity: {
+      ...defaults.webhookSecurity,
+      ...config.webhookSecurity,
+      allowedHosts: config.webhookSecurity?.allowedHosts ?? defaults.webhookSecurity.allowedHosts,
+      trustedProxyIPs:
+        config.webhookSecurity?.trustedProxyIPs ?? defaults.webhookSecurity.trustedProxyIPs,
+    },
+    streaming: { ...defaults.streaming, ...config.streaming },
+    stt: { ...defaults.stt, ...config.stt },
+    tts: config.tts ?? defaults.tts,
+  };
+}
+
 /**
 * Resolves the configuration by merging environment variables into missing fields.
 * Returns a new configuration object with environment variables applied.
 */
-export function resolveVoiceCallConfig(config: VoiceCallConfig): VoiceCallConfig {
-  const resolved = JSON.parse(JSON.stringify(config)) as VoiceCallConfig;
+export function resolveVoiceCallConfig(config: VoiceCallConfigInput): VoiceCallConfig {
+  const resolved = normalizeVoiceCallConfig(config);

  // Telnyx
  if (resolved.provider === "telnyx") {
@ -405,7 +441,7 @@ export function resolveVoiceCallConfig(config: VoiceCallConfig): VoiceCallConfig
    resolved.webhookSecurity.trustForwardingHeaders ?? false;
  resolved.webhookSecurity.trustedProxyIPs = resolved.webhookSecurity.trustedProxyIPs ?? [];

-  return resolved;
+  return normalizeVoiceCallConfig(resolved);
 }

 /**
--- a/extensions/voice-call/src/providers/tts-openai.test.ts
+++ b/extensions/voice-call/src/providers/tts-openai.test.ts
@ -3,6 +3,8 @@ import type { OpenAITTSConfig } from "./tts-openai.js";
 import { OpenAITTSProvider } from "./tts-openai.js";

 type ProviderInternals = {
+  model: string;
+  voice: string;
  speed: number;
 };

@ -27,4 +29,15 @@ describe("OpenAITTSProvider constructor defaults", () => {

    expect(provider.speed).toBe(1.0);
  });
+
+  it("treats blank model and voice overrides as unset", () => {
+    const provider = readProviderInternals({
+      apiKey: "sk-test", // pragma: allowlist secret
+      model: "   ",
+      voice: "",
+    });
+
+    expect(provider.model).toBe("gpt-4o-mini-tts");
+    expect(provider.voice).toBe("coral");
+  });
 });
--- a/extensions/voice-call/src/providers/tts-openai.ts
+++ b/extensions/voice-call/src/providers/tts-openai.ts
@ -66,6 +66,11 @@ export const OPENAI_TTS_VOICES = [

 export type OpenAITTSVoice = (typeof OPENAI_TTS_VOICES)[number];

+function trimToUndefined(value: string | undefined): string | undefined {
+  const trimmed = value?.trim();
+  return trimmed ? trimmed : undefined;
+}
+
 /**
 * OpenAI TTS Provider for generating speech audio.
 */
@ -77,13 +82,14 @@ export class OpenAITTSProvider {
  private instructions?: string;

  constructor(config: OpenAITTSConfig = {}) {
-    this.apiKey = config.apiKey || process.env.OPENAI_API_KEY || "";
+    this.apiKey =
+      trimToUndefined(config.apiKey) ?? trimToUndefined(process.env.OPENAI_API_KEY) ?? "";
    // Default to gpt-4o-mini-tts for intelligent realtime applications
-    this.model = config.model || "gpt-4o-mini-tts";
+    this.model = trimToUndefined(config.model) ?? "gpt-4o-mini-tts";
    // Default to coral - good balance of quality and natural tone
-    this.voice = (config.voice as OpenAITTSVoice) || "coral";
+    this.voice = (trimToUndefined(config.voice) as OpenAITTSVoice | undefined) ?? "coral";
    this.speed = config.speed ?? 1.0;
-    this.instructions = config.instructions;
+    this.instructions = trimToUndefined(config.instructions);

    if (!this.apiKey) {
      throw new Error("OpenAI API key required (set OPENAI_API_KEY or pass apiKey)");
@ -105,7 +111,7 @@ export class OpenAITTSProvider {
    };

    // Add instructions if using gpt-4o-mini-tts model
-    const effectiveInstructions = instructions || this.instructions;
+    const effectiveInstructions = trimToUndefined(instructions) ?? this.instructions;
    if (effectiveInstructions && this.model.includes("gpt-4o-mini-tts")) {
      body.instructions = effectiveInstructions;
    }
--- a/extensions/voice-call/src/webhook.test.ts
+++ b/extensions/voice-call/src/webhook.test.ts
@ -274,6 +274,32 @@ describe("VoiceCallWebhookServer replay handling", () => {
  });
 });

+describe("VoiceCallWebhookServer response normalization", () => {
+  it("preserves explicit empty provider response bodies", async () => {
+    const responseProvider: VoiceCallProvider = {
+      ...provider,
+      parseWebhookEvent: () => ({
+        events: [],
+        statusCode: 204,
+        providerResponseBody: "",
+      }),
+    };
+    const { manager } = createManager([]);
+    const config = createConfig({ serve: { port: 0, bind: "127.0.0.1", path: "/voice/webhook" } });
+    const server = new VoiceCallWebhookServer(config, manager, responseProvider);
+
+    try {
+      const baseUrl = await server.start();
+      const response = await postWebhookForm(server, baseUrl, "CallSid=CA123&SpeechResult=hello");
+
+      expect(response.status).toBe(204);
+      expect(await response.text()).toBe("");
+    } finally {
+      await server.stop();
+    }
+  });
+});
+
 describe("VoiceCallWebhookServer start idempotency", () => {
  it("returns existing URL when start() is called twice without stop()", async () => {
    const { manager } = createManager([]);
--- a/extensions/voice-call/src/webhook.ts
+++ b/extensions/voice-call/src/webhook.ts
@ -5,7 +5,7 @@ import {
  readRequestBodyWithLimit,
  requestBodyErrorToText,
 } from "openclaw/plugin-sdk/voice-call";
-import type { VoiceCallConfig } from "./config.js";
+import { normalizeVoiceCallConfig, type VoiceCallConfig } from "./config.js";
 import type { CoreConfig } from "./core-bridge.js";
 import type { CallManager } from "./manager.js";
 import type { MediaStreamConfig } from "./media-stream.js";
@ -24,6 +24,26 @@ type WebhookResponsePayload = {
  headers?: Record<string, string>;
 };

+function buildRequestUrl(
+  requestUrl: string | undefined,
+  requestHost: string | undefined,
+  fallbackHost = "localhost",
+): URL {
+  return new URL(requestUrl ?? "/", `http://${requestHost ?? fallbackHost}`);
+}
+
+function normalizeWebhookResponse(parsed: {
+  statusCode?: number;
+  providerResponseHeaders?: Record<string, string>;
+  providerResponseBody?: string;
+}): WebhookResponsePayload {
+  return {
+    statusCode: parsed.statusCode ?? 200,
+    headers: parsed.providerResponseHeaders,
+    body: parsed.providerResponseBody ?? "OK",
+  };
+}
+
 /**
 * HTTP server for receiving voice call webhooks from providers.
 * Supports WebSocket upgrades for media streams when streaming is enabled.
@ -46,13 +66,13 @@ export class VoiceCallWebhookServer {
    provider: VoiceCallProvider,
    coreConfig?: CoreConfig,
  ) {
-    this.config = config;
+    this.config = normalizeVoiceCallConfig(config);
    this.manager = manager;
    this.provider = provider;
    this.coreConfig = coreConfig ?? null;

    // Initialize media stream handler if streaming is enabled
-    if (config.streaming?.enabled) {
+    if (this.config.streaming.enabled) {
      this.initializeMediaStreaming();
    }
  }
@ -68,7 +88,8 @@ export class VoiceCallWebhookServer {
   * Initialize media streaming with OpenAI Realtime STT.
   */
  private initializeMediaStreaming(): void {
-    const apiKey = this.config.streaming?.openaiApiKey || process.env.OPENAI_API_KEY;
+    const streaming = this.config.streaming;
+    const apiKey = streaming.openaiApiKey ?? process.env.OPENAI_API_KEY;

    if (!apiKey) {
      console.warn("[voice-call] Streaming enabled but no OpenAI API key found");
@ -77,17 +98,17 @@ export class VoiceCallWebhookServer {

    const sttProvider = new OpenAIRealtimeSTTProvider({
      apiKey,
-      model: this.config.streaming?.sttModel,
-      silenceDurationMs: this.config.streaming?.silenceDurationMs,
-      vadThreshold: this.config.streaming?.vadThreshold,
+      model: streaming.sttModel,
+      silenceDurationMs: streaming.silenceDurationMs,
+      vadThreshold: streaming.vadThreshold,
    });

    const streamConfig: MediaStreamConfig = {
      sttProvider,
-      preStartTimeoutMs: this.config.streaming?.preStartTimeoutMs,
-      maxPendingConnections: this.config.streaming?.maxPendingConnections,
-      maxPendingConnectionsPerIp: this.config.streaming?.maxPendingConnectionsPerIp,
-      maxConnections: this.config.streaming?.maxConnections,
+      preStartTimeoutMs: streaming.preStartTimeoutMs,
+      maxPendingConnections: streaming.maxPendingConnections,
+      maxPendingConnectionsPerIp: streaming.maxPendingConnectionsPerIp,
+      maxConnections: streaming.maxConnections,
      shouldAcceptStream: ({ callId, token }) => {
        const call = this.manager.getCallByProviderCallId(callId);
        if (!call) {
@ -190,7 +211,7 @@ export class VoiceCallWebhookServer {
   */
  async start(): Promise<string> {
    const { port, bind, path: webhookPath } = this.config.serve;
-    const streamPath = this.config.streaming?.streamPath || "/voice/stream";
+    const streamPath = this.config.streaming.streamPath;

    // Guard: if a server is already listening, return the existing URL.
    // This prevents EADDRINUSE when start() is called more than once on the
@ -280,8 +301,7 @@ export class VoiceCallWebhookServer {

  private getUpgradePathname(request: http.IncomingMessage): string | null {
    try {
-      const host = request.headers.host || "localhost";
-      return new URL(request.url || "/", `http://${host}`).pathname;
+      return buildRequestUrl(request.url, request.headers.host).pathname;
    } catch {
      return null;
    }
@ -322,7 +342,7 @@ export class VoiceCallWebhookServer {
    req: http.IncomingMessage,
    webhookPath: string,
  ): Promise<WebhookResponsePayload> {
-    const url = new URL(req.url || "/", `http://${req.headers.host}`);
+    const url = buildRequestUrl(req.url, req.headers.host);

    if (url.pathname === "/voice/hold-music") {
      return {
@ -360,7 +380,7 @@ export class VoiceCallWebhookServer {
    const ctx: WebhookContext = {
      headers: req.headers as Record<string, string | string[] | undefined>,
      rawBody: body,
-      url: `http://${req.headers.host}${req.url}`,
+      url: url.toString(),
      method: "POST",
      query: Object.fromEntries(url.searchParams),
      remoteAddress: req.socket.remoteAddress ?? undefined,
@ -386,11 +406,7 @@ export class VoiceCallWebhookServer {
      this.processParsedEvents(parsed.events);
    }

-    return {
-      statusCode: parsed.statusCode || 200,
-      headers: parsed.providerResponseHeaders,
-      body: parsed.providerResponseBody || "OK",
-    };
+    return normalizeWebhookResponse(parsed);
  }

  private processParsedEvents(events: NormalizedEvent[]): void {