149 lines
4.8 KiB
TypeScript
149 lines
4.8 KiB
TypeScript
/**
|
|
* Voice call response generator - uses the embedded Pi agent for tool support.
|
|
* Routes voice responses through the same agent infrastructure as messaging.
|
|
*/
|
|
|
|
import crypto from "node:crypto";
|
|
import type { SessionEntry } from "../api.js";
|
|
import type { VoiceCallConfig } from "./config.js";
|
|
import type { CoreAgentDeps, CoreConfig } from "./core-bridge.js";
|
|
|
|
export type VoiceResponseParams = {
|
|
/** Voice call config */
|
|
voiceConfig: VoiceCallConfig;
|
|
/** Core OpenClaw config */
|
|
coreConfig: CoreConfig;
|
|
/** Injected host agent runtime */
|
|
agentRuntime: CoreAgentDeps;
|
|
/** Call ID for session tracking */
|
|
callId: string;
|
|
/** Caller's phone number */
|
|
from: string;
|
|
/** Conversation transcript */
|
|
transcript: Array<{ speaker: "user" | "bot"; text: string }>;
|
|
/** Latest user message */
|
|
userMessage: string;
|
|
};
|
|
|
|
export type VoiceResponseResult = {
|
|
text: string | null;
|
|
error?: string;
|
|
};
|
|
|
|
/**
|
|
* Generate a voice response using the embedded Pi agent with full tool support.
|
|
* Uses the same agent infrastructure as messaging for consistent behavior.
|
|
*/
|
|
export async function generateVoiceResponse(
|
|
params: VoiceResponseParams,
|
|
): Promise<VoiceResponseResult> {
|
|
const { voiceConfig, callId, from, transcript, userMessage, coreConfig, agentRuntime } = params;
|
|
|
|
if (!coreConfig) {
|
|
return { text: null, error: "Core config unavailable for voice response" };
|
|
}
|
|
const cfg = coreConfig;
|
|
|
|
// Build voice-specific session key based on phone number
|
|
const normalizedPhone = from.replace(/\D/g, "");
|
|
const sessionKey = `voice:${normalizedPhone}`;
|
|
const agentId = "main";
|
|
|
|
// Resolve paths
|
|
const storePath = agentRuntime.session.resolveStorePath(cfg.session?.store, { agentId });
|
|
const agentDir = agentRuntime.resolveAgentDir(cfg, agentId);
|
|
const workspaceDir = agentRuntime.resolveAgentWorkspaceDir(cfg, agentId);
|
|
|
|
// Ensure workspace exists
|
|
await agentRuntime.ensureAgentWorkspace({ dir: workspaceDir });
|
|
|
|
// Load or create session entry
|
|
const sessionStore = agentRuntime.session.loadSessionStore(storePath);
|
|
const now = Date.now();
|
|
let sessionEntry = sessionStore[sessionKey] as SessionEntry | undefined;
|
|
|
|
if (!sessionEntry) {
|
|
sessionEntry = {
|
|
sessionId: crypto.randomUUID(),
|
|
updatedAt: now,
|
|
};
|
|
sessionStore[sessionKey] = sessionEntry;
|
|
await agentRuntime.session.saveSessionStore(storePath, sessionStore);
|
|
}
|
|
|
|
const sessionId = sessionEntry.sessionId;
|
|
const sessionFile = agentRuntime.session.resolveSessionFilePath(sessionId, sessionEntry, {
|
|
agentId,
|
|
});
|
|
|
|
// Resolve model from config
|
|
const modelRef =
|
|
voiceConfig.responseModel || `${agentRuntime.defaults.provider}/${agentRuntime.defaults.model}`;
|
|
const slashIndex = modelRef.indexOf("/");
|
|
const provider =
|
|
slashIndex === -1 ? agentRuntime.defaults.provider : modelRef.slice(0, slashIndex);
|
|
const model = slashIndex === -1 ? modelRef : modelRef.slice(slashIndex + 1);
|
|
|
|
// Resolve thinking level
|
|
const thinkLevel = agentRuntime.resolveThinkingDefault({ cfg, provider, model });
|
|
|
|
// Resolve agent identity for personalized prompt
|
|
const identity = agentRuntime.resolveAgentIdentity(cfg, agentId);
|
|
const agentName = identity?.name?.trim() || "assistant";
|
|
|
|
// Build system prompt with conversation history
|
|
const basePrompt =
|
|
voiceConfig.responseSystemPrompt ??
|
|
`You are ${agentName}, a helpful voice assistant on a phone call. Keep responses brief and conversational (1-2 sentences max). Be natural and friendly. The caller's phone number is ${from}. You have access to tools - use them when helpful.`;
|
|
|
|
let extraSystemPrompt = basePrompt;
|
|
if (transcript.length > 0) {
|
|
const history = transcript
|
|
.map((entry) => `${entry.speaker === "bot" ? "You" : "Caller"}: ${entry.text}`)
|
|
.join("\n");
|
|
extraSystemPrompt = `${basePrompt}\n\nConversation so far:\n${history}`;
|
|
}
|
|
|
|
// Resolve timeout
|
|
const timeoutMs = voiceConfig.responseTimeoutMs ?? agentRuntime.resolveAgentTimeoutMs({ cfg });
|
|
const runId = `voice:${callId}:${Date.now()}`;
|
|
|
|
try {
|
|
const result = await agentRuntime.runEmbeddedPiAgent({
|
|
sessionId,
|
|
sessionKey,
|
|
messageProvider: "voice",
|
|
sessionFile,
|
|
workspaceDir,
|
|
config: cfg,
|
|
prompt: userMessage,
|
|
provider,
|
|
model,
|
|
thinkLevel,
|
|
verboseLevel: "off",
|
|
timeoutMs,
|
|
runId,
|
|
lane: "voice",
|
|
extraSystemPrompt,
|
|
agentDir,
|
|
});
|
|
|
|
// Extract text from payloads
|
|
const texts = (result.payloads ?? [])
|
|
.filter((p) => p.text && !p.isError)
|
|
.map((p) => p.text?.trim())
|
|
.filter(Boolean);
|
|
|
|
const text = texts.join(" ") || null;
|
|
|
|
if (!text && result.meta?.aborted) {
|
|
return { text: null, error: "Response generation was aborted" };
|
|
}
|
|
|
|
return { text };
|
|
} catch (err) {
|
|
console.error(`[voice-call] Response generation failed:`, err);
|
|
return { text: null, error: String(err) };
|
|
}
|
|
}
|