feat(plugins): tighten media runtime integration

This commit is contained in:
Peter Steinberger 2026-03-16 21:13:38 -07:00
parent 45cb02b1dd
commit f4fa84aea7
No known key found for this signature in database
8 changed files with 63 additions and 117 deletions

View File

@ -8,10 +8,7 @@ const {
createAudioPlayerMock, createAudioPlayerMock,
resolveAgentRouteMock, resolveAgentRouteMock,
agentCommandMock, agentCommandMock,
buildProviderRegistryMock, transcribeAudioFileMock,
createMediaAttachmentCacheMock,
normalizeMediaAttachmentsMock,
runCapabilityMock,
} = vi.hoisted(() => { } = vi.hoisted(() => {
type EventHandler = (...args: unknown[]) => unknown; type EventHandler = (...args: unknown[]) => unknown;
type MockConnection = { type MockConnection = {
@ -68,14 +65,7 @@ const {
})), })),
resolveAgentRouteMock: vi.fn(() => ({ agentId: "agent-1", sessionKey: "discord:g1:c1" })), resolveAgentRouteMock: vi.fn(() => ({ agentId: "agent-1", sessionKey: "discord:g1:c1" })),
agentCommandMock: vi.fn(async (_opts?: unknown, _runtime?: unknown) => ({ payloads: [] })), agentCommandMock: vi.fn(async (_opts?: unknown, _runtime?: unknown) => ({ payloads: [] })),
buildProviderRegistryMock: vi.fn(() => ({})), transcribeAudioFileMock: vi.fn(async () => ({ text: "hello from voice" })),
createMediaAttachmentCacheMock: vi.fn(() => ({
cleanup: vi.fn(async () => undefined),
})),
normalizeMediaAttachmentsMock: vi.fn(() => [{ kind: "audio", path: "/tmp/test.wav" }]),
runCapabilityMock: vi.fn(async () => ({
outputs: [{ kind: "audio.transcription", text: "hello from voice" }],
})),
}; };
}); });
@ -103,11 +93,8 @@ vi.mock("../../../../src/commands/agent.js", () => ({
agentCommandFromIngress: agentCommandMock, agentCommandFromIngress: agentCommandMock,
})); }));
vi.mock("../../../../src/media-understanding/runner.js", () => ({ vi.mock("../../../../src/media-understanding/runtime.js", () => ({
buildProviderRegistry: buildProviderRegistryMock, transcribeAudioFile: transcribeAudioFileMock,
createMediaAttachmentCache: createMediaAttachmentCacheMock,
normalizeMediaAttachments: normalizeMediaAttachmentsMock,
runCapability: runCapabilityMock,
})); }));
let managerModule: typeof import("./manager.js"); let managerModule: typeof import("./manager.js");
@ -149,15 +136,8 @@ describe("DiscordVoiceManager", () => {
resolveAgentRouteMock.mockClear(); resolveAgentRouteMock.mockClear();
agentCommandMock.mockReset(); agentCommandMock.mockReset();
agentCommandMock.mockResolvedValue({ payloads: [] }); agentCommandMock.mockResolvedValue({ payloads: [] });
buildProviderRegistryMock.mockReset(); transcribeAudioFileMock.mockReset();
buildProviderRegistryMock.mockReturnValue({}); transcribeAudioFileMock.mockResolvedValue({ text: "hello from voice" });
createMediaAttachmentCacheMock.mockClear();
normalizeMediaAttachmentsMock.mockReset();
normalizeMediaAttachmentsMock.mockReturnValue([{ kind: "audio", path: "/tmp/test.wav" }]);
runCapabilityMock.mockReset();
runCapabilityMock.mockResolvedValue({
outputs: [{ kind: "audio.transcription", text: "hello from voice" }],
});
}); });
const createManager = ( const createManager = (

View File

@ -17,7 +17,6 @@ import {
type VoiceConnection, type VoiceConnection,
} from "@discordjs/voice"; } from "@discordjs/voice";
import { resolveAgentDir } from "../../../../src/agents/agent-scope.js"; import { resolveAgentDir } from "../../../../src/agents/agent-scope.js";
import type { MsgContext } from "../../../../src/auto-reply/templating.js";
import { agentCommandFromIngress } from "../../../../src/commands/agent.js"; import { agentCommandFromIngress } from "../../../../src/commands/agent.js";
import type { OpenClawConfig } from "../../../../src/config/config.js"; import type { OpenClawConfig } from "../../../../src/config/config.js";
import { isDangerousNameMatchingEnabled } from "../../../../src/config/dangerous-name-matching.js"; import { isDangerousNameMatchingEnabled } from "../../../../src/config/dangerous-name-matching.js";
@ -26,12 +25,7 @@ import { logVerbose, shouldLogVerbose } from "../../../../src/globals.js";
import { formatErrorMessage } from "../../../../src/infra/errors.js"; import { formatErrorMessage } from "../../../../src/infra/errors.js";
import { resolvePreferredOpenClawTmpDir } from "../../../../src/infra/tmp-openclaw-dir.js"; import { resolvePreferredOpenClawTmpDir } from "../../../../src/infra/tmp-openclaw-dir.js";
import { createSubsystemLogger } from "../../../../src/logging/subsystem.js"; import { createSubsystemLogger } from "../../../../src/logging/subsystem.js";
import { import { transcribeAudioFile } from "../../../../src/media-understanding/runtime.js";
buildProviderRegistry,
createMediaAttachmentCache,
normalizeMediaAttachments,
runCapability,
} from "../../../../src/media-understanding/runner.js";
import { resolveAgentRoute } from "../../../../src/routing/resolve-route.js"; import { resolveAgentRoute } from "../../../../src/routing/resolve-route.js";
import type { RuntimeEnv } from "../../../../src/runtime.js"; import type { RuntimeEnv } from "../../../../src/runtime.js";
import { parseTtsDirectives } from "../../../../src/tts/tts-core.js"; import { parseTtsDirectives } from "../../../../src/tts/tts-core.js";
@ -236,33 +230,13 @@ async function transcribeAudio(params: {
agentId: string; agentId: string;
filePath: string; filePath: string;
}): Promise<string | undefined> { }): Promise<string | undefined> {
const ctx: MsgContext = { const result = await transcribeAudioFile({
MediaPath: params.filePath, cfg: params.cfg,
MediaType: "audio/wav", filePath: params.filePath,
}; mime: "audio/wav",
const attachments = normalizeMediaAttachments(ctx); agentDir: resolveAgentDir(params.cfg, params.agentId),
if (attachments.length === 0) { });
return undefined; return result.text?.trim() || undefined;
}
const cache = createMediaAttachmentCache(attachments);
const providerRegistry = buildProviderRegistry();
try {
const result = await runCapability({
capability: "audio",
cfg: params.cfg,
ctx,
attachments: cache,
media: attachments,
agentDir: resolveAgentDir(params.cfg, params.agentId),
providerRegistry,
config: params.cfg.tools?.media?.audio,
});
const output = result.outputs.find((entry) => entry.kind === "audio.transcription");
const text = output?.text?.trim();
return text || undefined;
} finally {
await cache.cleanup();
}
} }
export class DiscordVoiceManager { export class DiscordVoiceManager {

View File

@ -9,6 +9,7 @@ import {
pathExists, pathExists,
splitSetupEntries, splitSetupEntries,
setSetupChannelEnabled, setSetupChannelEnabled,
type DmPolicy,
type OpenClawConfig, type OpenClawConfig,
} from "../../../src/plugin-sdk-internal/setup.js"; } from "../../../src/plugin-sdk-internal/setup.js";
import type { ChannelSetupWizard } from "../../../src/plugin-sdk-internal/setup.js"; import type { ChannelSetupWizard } from "../../../src/plugin-sdk-internal/setup.js";

View File

@ -41,7 +41,9 @@ export async function probeZaloAccount(params: {
export async function startZaloGatewayAccount( export async function startZaloGatewayAccount(
ctx: Parameters< ctx: Parameters<
NonNullable<import("openclaw/plugin-sdk/zalo").ChannelPlugin["gateway"]>["startAccount"] NonNullable<
NonNullable<import("openclaw/plugin-sdk/zalo").ChannelPlugin["gateway"]>["startAccount"]
>
>[0], >[0],
) { ) {
const account = ctx.account; const account = ctx.account;

View File

@ -47,26 +47,20 @@ type RegistrablePlugin = {
register: (api: ReturnType<typeof createCapturedPluginRegistration>["api"]) => void; register: (api: ReturnType<typeof createCapturedPluginRegistration>["api"]) => void;
}; };
type ProviderContractEntry = { type CapabilityContractEntry<T> = {
pluginId: string; pluginId: string;
provider: ProviderPlugin; provider: T;
}; };
type WebSearchProviderContractEntry = { type ProviderContractEntry = CapabilityContractEntry<ProviderPlugin>;
pluginId: string;
provider: WebSearchProviderPlugin; type WebSearchProviderContractEntry = CapabilityContractEntry<WebSearchProviderPlugin> & {
credentialValue: unknown; credentialValue: unknown;
}; };
type SpeechProviderContractEntry = { type SpeechProviderContractEntry = CapabilityContractEntry<SpeechProviderPlugin>;
pluginId: string; type MediaUnderstandingProviderContractEntry =
provider: SpeechProviderPlugin; CapabilityContractEntry<MediaUnderstandingProviderPlugin>;
};
type MediaUnderstandingProviderContractEntry = {
pluginId: string;
provider: MediaUnderstandingProviderPlugin;
};
type PluginRegistrationContractEntry = { type PluginRegistrationContractEntry = {
pluginId: string; pluginId: string;
@ -138,15 +132,23 @@ function captureRegistrations(plugin: RegistrablePlugin) {
return captured; return captured;
} }
export const providerContractRegistry: ProviderContractEntry[] = bundledProviderPlugins.flatMap( function buildCapabilityContractRegistry<T>(params: {
(plugin) => { plugins: RegistrablePlugin[];
select: (captured: ReturnType<typeof createCapturedPluginRegistration>) => T[];
}): CapabilityContractEntry<T>[] {
return params.plugins.flatMap((plugin) => {
const captured = captureRegistrations(plugin); const captured = captureRegistrations(plugin);
return captured.providers.map((provider) => ({ return params.select(captured).map((provider) => ({
pluginId: plugin.id, pluginId: plugin.id,
provider, provider,
})); }));
}, });
); }
export const providerContractRegistry: ProviderContractEntry[] = buildCapabilityContractRegistry({
plugins: bundledProviderPlugins,
select: (captured) => captured.providers,
});
export const webSearchProviderContractRegistry: WebSearchProviderContractEntry[] = export const webSearchProviderContractRegistry: WebSearchProviderContractEntry[] =
bundledWebSearchPlugins.flatMap((plugin) => { bundledWebSearchPlugins.flatMap((plugin) => {
@ -159,21 +161,15 @@ export const webSearchProviderContractRegistry: WebSearchProviderContractEntry[]
}); });
export const speechProviderContractRegistry: SpeechProviderContractEntry[] = export const speechProviderContractRegistry: SpeechProviderContractEntry[] =
bundledSpeechPlugins.flatMap((plugin) => { buildCapabilityContractRegistry({
const captured = captureRegistrations(plugin); plugins: bundledSpeechPlugins,
return captured.speechProviders.map((provider) => ({ select: (captured) => captured.speechProviders,
pluginId: plugin.id,
provider,
}));
}); });
export const mediaUnderstandingProviderContractRegistry: MediaUnderstandingProviderContractEntry[] = export const mediaUnderstandingProviderContractRegistry: MediaUnderstandingProviderContractEntry[] =
bundledMediaUnderstandingPlugins.flatMap((plugin) => { buildCapabilityContractRegistry({
const captured = captureRegistrations(plugin); plugins: bundledMediaUnderstandingPlugins,
return captured.mediaUnderstandingProviders.map((provider) => ({ select: (captured) => captured.mediaUnderstandingProviders,
pluginId: plugin.id,
provider,
}));
}); });
const bundledPluginRegistrationList = [ const bundledPluginRegistrationList = [

View File

@ -104,29 +104,20 @@ export type PluginProviderRegistration = {
rootDir?: string; rootDir?: string;
}; };
export type PluginWebSearchProviderRegistration = { type PluginOwnedProviderRegistration<T extends { id: string }> = {
pluginId: string; pluginId: string;
pluginName?: string; pluginName?: string;
provider: WebSearchProviderPlugin; provider: T;
source: string; source: string;
rootDir?: string; rootDir?: string;
}; };
export type PluginSpeechProviderRegistration = { export type PluginSpeechProviderRegistration =
pluginId: string; PluginOwnedProviderRegistration<SpeechProviderPlugin>;
pluginName?: string; export type PluginMediaUnderstandingProviderRegistration =
provider: SpeechProviderPlugin; PluginOwnedProviderRegistration<MediaUnderstandingProviderPlugin>;
source: string; export type PluginWebSearchProviderRegistration =
rootDir?: string; PluginOwnedProviderRegistration<WebSearchProviderPlugin>;
};
export type PluginMediaUnderstandingProviderRegistration = {
pluginId: string;
pluginName?: string;
provider: MediaUnderstandingProviderPlugin;
source: string;
rootDir?: string;
};
export type PluginHookRegistration = { export type PluginHookRegistration = {
pluginId: string; pluginId: string;
@ -576,13 +567,7 @@ export function createPluginRegistry(registryParams: PluginRegistryParams) {
const registerUniqueProviderLike = < const registerUniqueProviderLike = <
T extends { id: string }, T extends { id: string },
R extends { R extends PluginOwnedProviderRegistration<T>,
pluginId: string;
pluginName?: string;
provider: T;
source: string;
rootDir?: string;
},
>(params: { >(params: {
record: PluginRecord; record: PluginRecord;
provider: T; provider: T;

View File

@ -55,6 +55,14 @@ describe("plugin runtime command execution", () => {
expect(runtime.events.onSessionTranscriptUpdate).toBe(onSessionTranscriptUpdate); expect(runtime.events.onSessionTranscriptUpdate).toBe(onSessionTranscriptUpdate);
}); });
it("exposes runtime.mediaUnderstanding helpers and keeps stt as an alias", () => {
const runtime = createPluginRuntime();
expect(typeof runtime.mediaUnderstanding.runFile).toBe("function");
expect(typeof runtime.mediaUnderstanding.describeImageFile).toBe("function");
expect(typeof runtime.mediaUnderstanding.describeVideoFile).toBe("function");
expect(runtime.mediaUnderstanding.transcribeAudioFile).toBe(runtime.stt.transcribeAudioFile);
});
it("exposes runtime.system.requestHeartbeatNow", () => { it("exposes runtime.system.requestHeartbeatNow", () => {
const runtime = createPluginRuntime(); const runtime = createPluginRuntime();
expect(runtime.system.requestHeartbeatNow).toBe(requestHeartbeatNow); expect(runtime.system.requestHeartbeatNow).toBe(requestHeartbeatNow);

View File

@ -26,7 +26,7 @@ export default defineConfig({
pool: "forks", pool: "forks",
maxWorkers: e2eWorkers, maxWorkers: e2eWorkers,
silent: !verboseE2E, silent: !verboseE2E,
include: ["test/**/*.e2e.test.ts", "src/**/*.e2e.test.ts"], include: ["test/**/*.e2e.test.ts", "src/**/*.e2e.test.ts", "extensions/**/*.e2e.test.ts"],
exclude, exclude,
}, },
}); });