diff --git a/extensions/discord/src/voice/manager.e2e.test.ts b/extensions/discord/src/voice/manager.e2e.test.ts index 17d21ff7414..73c6f249021 100644 --- a/extensions/discord/src/voice/manager.e2e.test.ts +++ b/extensions/discord/src/voice/manager.e2e.test.ts @@ -8,10 +8,7 @@ const { createAudioPlayerMock, resolveAgentRouteMock, agentCommandMock, - buildProviderRegistryMock, - createMediaAttachmentCacheMock, - normalizeMediaAttachmentsMock, - runCapabilityMock, + transcribeAudioFileMock, } = vi.hoisted(() => { type EventHandler = (...args: unknown[]) => unknown; type MockConnection = { @@ -68,14 +65,7 @@ const { })), resolveAgentRouteMock: vi.fn(() => ({ agentId: "agent-1", sessionKey: "discord:g1:c1" })), agentCommandMock: vi.fn(async (_opts?: unknown, _runtime?: unknown) => ({ payloads: [] })), - buildProviderRegistryMock: vi.fn(() => ({})), - createMediaAttachmentCacheMock: vi.fn(() => ({ - cleanup: vi.fn(async () => undefined), - })), - normalizeMediaAttachmentsMock: vi.fn(() => [{ kind: "audio", path: "/tmp/test.wav" }]), - runCapabilityMock: vi.fn(async () => ({ - outputs: [{ kind: "audio.transcription", text: "hello from voice" }], - })), + transcribeAudioFileMock: vi.fn(async () => ({ text: "hello from voice" })), }; }); @@ -103,11 +93,8 @@ vi.mock("../../../../src/commands/agent.js", () => ({ agentCommandFromIngress: agentCommandMock, })); -vi.mock("../../../../src/media-understanding/runner.js", () => ({ - buildProviderRegistry: buildProviderRegistryMock, - createMediaAttachmentCache: createMediaAttachmentCacheMock, - normalizeMediaAttachments: normalizeMediaAttachmentsMock, - runCapability: runCapabilityMock, +vi.mock("../../../../src/media-understanding/runtime.js", () => ({ + transcribeAudioFile: transcribeAudioFileMock, })); let managerModule: typeof import("./manager.js"); @@ -149,15 +136,8 @@ describe("DiscordVoiceManager", () => { resolveAgentRouteMock.mockClear(); agentCommandMock.mockReset(); agentCommandMock.mockResolvedValue({ payloads: [] }); - buildProviderRegistryMock.mockReset(); - buildProviderRegistryMock.mockReturnValue({}); - createMediaAttachmentCacheMock.mockClear(); - normalizeMediaAttachmentsMock.mockReset(); - normalizeMediaAttachmentsMock.mockReturnValue([{ kind: "audio", path: "/tmp/test.wav" }]); - runCapabilityMock.mockReset(); - runCapabilityMock.mockResolvedValue({ - outputs: [{ kind: "audio.transcription", text: "hello from voice" }], - }); + transcribeAudioFileMock.mockReset(); + transcribeAudioFileMock.mockResolvedValue({ text: "hello from voice" }); }); const createManager = ( diff --git a/extensions/discord/src/voice/manager.ts b/extensions/discord/src/voice/manager.ts index 90c6c3bb1e6..a9f8d0fd721 100644 --- a/extensions/discord/src/voice/manager.ts +++ b/extensions/discord/src/voice/manager.ts @@ -17,7 +17,6 @@ import { type VoiceConnection, } from "@discordjs/voice"; import { resolveAgentDir } from "../../../../src/agents/agent-scope.js"; -import type { MsgContext } from "../../../../src/auto-reply/templating.js"; import { agentCommandFromIngress } from "../../../../src/commands/agent.js"; import type { OpenClawConfig } from "../../../../src/config/config.js"; import { isDangerousNameMatchingEnabled } from "../../../../src/config/dangerous-name-matching.js"; @@ -26,12 +25,7 @@ import { logVerbose, shouldLogVerbose } from "../../../../src/globals.js"; import { formatErrorMessage } from "../../../../src/infra/errors.js"; import { resolvePreferredOpenClawTmpDir } from "../../../../src/infra/tmp-openclaw-dir.js"; import { createSubsystemLogger } from "../../../../src/logging/subsystem.js"; -import { - buildProviderRegistry, - createMediaAttachmentCache, - normalizeMediaAttachments, - runCapability, -} from "../../../../src/media-understanding/runner.js"; +import { transcribeAudioFile } from "../../../../src/media-understanding/runtime.js"; import { resolveAgentRoute } from "../../../../src/routing/resolve-route.js"; import type { RuntimeEnv } from "../../../../src/runtime.js"; import { parseTtsDirectives } from "../../../../src/tts/tts-core.js"; @@ -236,33 +230,13 @@ async function transcribeAudio(params: { agentId: string; filePath: string; }): Promise { - const ctx: MsgContext = { - MediaPath: params.filePath, - MediaType: "audio/wav", - }; - const attachments = normalizeMediaAttachments(ctx); - if (attachments.length === 0) { - return undefined; - } - const cache = createMediaAttachmentCache(attachments); - const providerRegistry = buildProviderRegistry(); - try { - const result = await runCapability({ - capability: "audio", - cfg: params.cfg, - ctx, - attachments: cache, - media: attachments, - agentDir: resolveAgentDir(params.cfg, params.agentId), - providerRegistry, - config: params.cfg.tools?.media?.audio, - }); - const output = result.outputs.find((entry) => entry.kind === "audio.transcription"); - const text = output?.text?.trim(); - return text || undefined; - } finally { - await cache.cleanup(); - } + const result = await transcribeAudioFile({ + cfg: params.cfg, + filePath: params.filePath, + mime: "audio/wav", + agentDir: resolveAgentDir(params.cfg, params.agentId), + }); + return result.text?.trim() || undefined; } export class DiscordVoiceManager { diff --git a/extensions/whatsapp/src/setup-surface.ts b/extensions/whatsapp/src/setup-surface.ts index 50a28d419cb..47e84de6860 100644 --- a/extensions/whatsapp/src/setup-surface.ts +++ b/extensions/whatsapp/src/setup-surface.ts @@ -9,6 +9,7 @@ import { pathExists, splitSetupEntries, setSetupChannelEnabled, + type DmPolicy, type OpenClawConfig, } from "../../../src/plugin-sdk-internal/setup.js"; import type { ChannelSetupWizard } from "../../../src/plugin-sdk-internal/setup.js"; diff --git a/extensions/zalo/src/channel.runtime.ts b/extensions/zalo/src/channel.runtime.ts index fc4488b5be8..a376d52b94e 100644 --- a/extensions/zalo/src/channel.runtime.ts +++ b/extensions/zalo/src/channel.runtime.ts @@ -41,7 +41,9 @@ export async function probeZaloAccount(params: { export async function startZaloGatewayAccount( ctx: Parameters< - NonNullable["startAccount"] + NonNullable< + NonNullable["startAccount"] + > >[0], ) { const account = ctx.account; diff --git a/src/plugins/contracts/registry.ts b/src/plugins/contracts/registry.ts index 14dbb17262c..3c5cc8935c9 100644 --- a/src/plugins/contracts/registry.ts +++ b/src/plugins/contracts/registry.ts @@ -47,26 +47,20 @@ type RegistrablePlugin = { register: (api: ReturnType["api"]) => void; }; -type ProviderContractEntry = { +type CapabilityContractEntry = { pluginId: string; - provider: ProviderPlugin; + provider: T; }; -type WebSearchProviderContractEntry = { - pluginId: string; - provider: WebSearchProviderPlugin; +type ProviderContractEntry = CapabilityContractEntry; + +type WebSearchProviderContractEntry = CapabilityContractEntry & { credentialValue: unknown; }; -type SpeechProviderContractEntry = { - pluginId: string; - provider: SpeechProviderPlugin; -}; - -type MediaUnderstandingProviderContractEntry = { - pluginId: string; - provider: MediaUnderstandingProviderPlugin; -}; +type SpeechProviderContractEntry = CapabilityContractEntry; +type MediaUnderstandingProviderContractEntry = + CapabilityContractEntry; type PluginRegistrationContractEntry = { pluginId: string; @@ -138,15 +132,23 @@ function captureRegistrations(plugin: RegistrablePlugin) { return captured; } -export const providerContractRegistry: ProviderContractEntry[] = bundledProviderPlugins.flatMap( - (plugin) => { +function buildCapabilityContractRegistry(params: { + plugins: RegistrablePlugin[]; + select: (captured: ReturnType) => T[]; +}): CapabilityContractEntry[] { + return params.plugins.flatMap((plugin) => { const captured = captureRegistrations(plugin); - return captured.providers.map((provider) => ({ + return params.select(captured).map((provider) => ({ pluginId: plugin.id, provider, })); - }, -); + }); +} + +export const providerContractRegistry: ProviderContractEntry[] = buildCapabilityContractRegistry({ + plugins: bundledProviderPlugins, + select: (captured) => captured.providers, +}); export const webSearchProviderContractRegistry: WebSearchProviderContractEntry[] = bundledWebSearchPlugins.flatMap((plugin) => { @@ -159,21 +161,15 @@ export const webSearchProviderContractRegistry: WebSearchProviderContractEntry[] }); export const speechProviderContractRegistry: SpeechProviderContractEntry[] = - bundledSpeechPlugins.flatMap((plugin) => { - const captured = captureRegistrations(plugin); - return captured.speechProviders.map((provider) => ({ - pluginId: plugin.id, - provider, - })); + buildCapabilityContractRegistry({ + plugins: bundledSpeechPlugins, + select: (captured) => captured.speechProviders, }); export const mediaUnderstandingProviderContractRegistry: MediaUnderstandingProviderContractEntry[] = - bundledMediaUnderstandingPlugins.flatMap((plugin) => { - const captured = captureRegistrations(plugin); - return captured.mediaUnderstandingProviders.map((provider) => ({ - pluginId: plugin.id, - provider, - })); + buildCapabilityContractRegistry({ + plugins: bundledMediaUnderstandingPlugins, + select: (captured) => captured.mediaUnderstandingProviders, }); const bundledPluginRegistrationList = [ diff --git a/src/plugins/registry.ts b/src/plugins/registry.ts index 6ec51d889fc..c81c2253e0a 100644 --- a/src/plugins/registry.ts +++ b/src/plugins/registry.ts @@ -104,29 +104,20 @@ export type PluginProviderRegistration = { rootDir?: string; }; -export type PluginWebSearchProviderRegistration = { +type PluginOwnedProviderRegistration = { pluginId: string; pluginName?: string; - provider: WebSearchProviderPlugin; + provider: T; source: string; rootDir?: string; }; -export type PluginSpeechProviderRegistration = { - pluginId: string; - pluginName?: string; - provider: SpeechProviderPlugin; - source: string; - rootDir?: string; -}; - -export type PluginMediaUnderstandingProviderRegistration = { - pluginId: string; - pluginName?: string; - provider: MediaUnderstandingProviderPlugin; - source: string; - rootDir?: string; -}; +export type PluginSpeechProviderRegistration = + PluginOwnedProviderRegistration; +export type PluginMediaUnderstandingProviderRegistration = + PluginOwnedProviderRegistration; +export type PluginWebSearchProviderRegistration = + PluginOwnedProviderRegistration; export type PluginHookRegistration = { pluginId: string; @@ -576,13 +567,7 @@ export function createPluginRegistry(registryParams: PluginRegistryParams) { const registerUniqueProviderLike = < T extends { id: string }, - R extends { - pluginId: string; - pluginName?: string; - provider: T; - source: string; - rootDir?: string; - }, + R extends PluginOwnedProviderRegistration, >(params: { record: PluginRecord; provider: T; diff --git a/src/plugins/runtime/index.test.ts b/src/plugins/runtime/index.test.ts index dfca1cfaf4a..9f7613881a5 100644 --- a/src/plugins/runtime/index.test.ts +++ b/src/plugins/runtime/index.test.ts @@ -55,6 +55,14 @@ describe("plugin runtime command execution", () => { expect(runtime.events.onSessionTranscriptUpdate).toBe(onSessionTranscriptUpdate); }); + it("exposes runtime.mediaUnderstanding helpers and keeps stt as an alias", () => { + const runtime = createPluginRuntime(); + expect(typeof runtime.mediaUnderstanding.runFile).toBe("function"); + expect(typeof runtime.mediaUnderstanding.describeImageFile).toBe("function"); + expect(typeof runtime.mediaUnderstanding.describeVideoFile).toBe("function"); + expect(runtime.mediaUnderstanding.transcribeAudioFile).toBe(runtime.stt.transcribeAudioFile); + }); + it("exposes runtime.system.requestHeartbeatNow", () => { const runtime = createPluginRuntime(); expect(runtime.system.requestHeartbeatNow).toBe(requestHeartbeatNow); diff --git a/vitest.e2e.config.ts b/vitest.e2e.config.ts index b70d8c8eedb..67e7cada10e 100644 --- a/vitest.e2e.config.ts +++ b/vitest.e2e.config.ts @@ -26,7 +26,7 @@ export default defineConfig({ pool: "forks", maxWorkers: e2eWorkers, silent: !verboseE2E, - include: ["test/**/*.e2e.test.ts", "src/**/*.e2e.test.ts"], + include: ["test/**/*.e2e.test.ts", "src/**/*.e2e.test.ts", "extensions/**/*.e2e.test.ts"], exclude, }, });