diff --git a/src/extension-host/media-runtime-api.test.ts b/src/extension-host/media-runtime-api.test.ts new file mode 100644 index 00000000000..04e4171ab74 --- /dev/null +++ b/src/extension-host/media-runtime-api.test.ts @@ -0,0 +1,140 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; +import { + buildExtensionHostMediaProviderRegistry, + normalizeExtensionHostMediaAttachments, + resolveExtensionHostAutoImageModel, + resolveExtensionHostMediaAttachmentLocalRoots, + runExtensionHostMediaApiCapability, +} from "./media-runtime-api.js"; + +vi.mock("./media-runtime-auto.js", () => ({ + clearMediaUnderstandingBinaryCacheForTests: vi.fn(), + resolveAutoImageModel: vi.fn(), +})); + +vi.mock("./media-runtime-orchestration.js", () => ({ + runCapability: vi.fn(), +})); + +vi.mock("./media-runtime-registry.js", () => ({ + buildExtensionHostMediaUnderstandingRegistry: vi.fn(), +})); + +vi.mock("../media/inbound-path-policy.js", () => ({ + mergeInboundPathRoots: vi.fn(), + resolveIMessageAttachmentRoots: vi.fn(), +})); + +vi.mock("../media/local-roots.js", () => ({ + getDefaultMediaLocalRoots: vi.fn(), +})); + +vi.mock("../media-understanding/attachments.js", () => ({ + MediaAttachmentCache: class MediaAttachmentCache { + constructor( + readonly attachments: unknown[], + readonly options?: unknown, + ) {} + }, + normalizeAttachments: vi.fn(), +})); + +describe("media-runtime-api", () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it("delegates provider-registry construction to the host-owned registry", async () => { + const registryModule = await import("./media-runtime-registry.js"); + const registry = new Map(); + vi.mocked(registryModule.buildExtensionHostMediaUnderstandingRegistry).mockReturnValue( + registry, + ); + + expect(buildExtensionHostMediaProviderRegistry({ openai: {} as never })).toBe(registry); + expect(registryModule.buildExtensionHostMediaUnderstandingRegistry).toHaveBeenCalledWith({ + openai: {} as never, + }); + }); + + it("resolves local roots through the host-owned inbound-path policy", async () => { + const localRootsModule = await import("../media/local-roots.js"); + const inboundPolicyModule = await import("../media/inbound-path-policy.js"); + + vi.mocked(localRootsModule.getDefaultMediaLocalRoots).mockReturnValue(["/tmp/openclaw"]); + vi.mocked(inboundPolicyModule.resolveIMessageAttachmentRoots).mockReturnValue(["/messages"]); + vi.mocked(inboundPolicyModule.mergeInboundPathRoots).mockReturnValue([ + "/tmp/openclaw", + "/messages", + ]); + + const roots = resolveExtensionHostMediaAttachmentLocalRoots({ + cfg: { channels: { imessage: {} } } as never, + ctx: { AccountId: "primary" } as never, + }); + + expect(roots).toEqual(["/tmp/openclaw", "/messages"]); + expect(inboundPolicyModule.resolveIMessageAttachmentRoots).toHaveBeenCalledWith({ + cfg: { channels: { imessage: {} } }, + accountId: "primary", + }); + }); + + it("injects the default registry when resolving the auto image model", async () => { + const registryModule = await import("./media-runtime-registry.js"); + const autoModule = await import("./media-runtime-auto.js"); + const registry = new Map(); + + vi.mocked(registryModule.buildExtensionHostMediaUnderstandingRegistry).mockReturnValue( + registry, + ); + vi.mocked(autoModule.resolveAutoImageModel).mockResolvedValue({ + provider: "openai", + model: "gpt-4.1", + }); + + await expect( + resolveExtensionHostAutoImageModel({ + cfg: {} as never, + }), + ).resolves.toEqual({ + provider: "openai", + model: "gpt-4.1", + }); + + expect(autoModule.resolveAutoImageModel).toHaveBeenCalledWith({ + cfg: {}, + providerRegistry: registry, + }); + }); + + it("delegates top-level capability execution to the host-owned orchestration", async () => { + const orchestrationModule = await import("./media-runtime-orchestration.js"); + const attachments = { cleanup: vi.fn() } as never; + const media = [{ kind: "image" }] as never; + const providerRegistry = new Map() as never; + const result = { outputs: [], decision: { capability: "image", outcome: "skipped" } } as never; + + vi.mocked(orchestrationModule.runCapability).mockResolvedValue(result); + + await expect( + runExtensionHostMediaApiCapability({ + capability: "image", + cfg: {} as never, + ctx: {} as never, + attachments, + media, + providerRegistry, + }), + ).resolves.toBe(result); + }); + + it("delegates attachment normalization to the shared media attachment helper", async () => { + const attachmentsModule = await import("../media-understanding/attachments.js"); + vi.mocked(attachmentsModule.normalizeAttachments).mockReturnValue([{ kind: "audio" }] as never); + + expect(normalizeExtensionHostMediaAttachments({ MediaPath: "/tmp/test.wav" } as never)).toEqual( + [{ kind: "audio" }], + ); + }); +}); diff --git a/src/extension-host/media-runtime-api.ts b/src/extension-host/media-runtime-api.ts new file mode 100644 index 00000000000..2f108981f6b --- /dev/null +++ b/src/extension-host/media-runtime-api.ts @@ -0,0 +1,95 @@ +import type { MsgContext } from "../auto-reply/templating.js"; +import type { OpenClawConfig } from "../config/config.js"; +import type { MediaUnderstandingConfig } from "../config/types.tools.js"; +import { + MediaAttachmentCache, + type MediaAttachmentCacheOptions, + normalizeAttachments, +} from "../media-understanding/attachments.js"; +import type { + MediaAttachment, + MediaUnderstandingCapability, + MediaUnderstandingProvider, +} from "../media-understanding/types.js"; +import { + mergeInboundPathRoots, + resolveIMessageAttachmentRoots, +} from "../media/inbound-path-policy.js"; +import { getDefaultMediaLocalRoots } from "../media/local-roots.js"; +import { + clearMediaUnderstandingBinaryCacheForTests as clearExtensionHostMediaUnderstandingBinaryCacheForTests, + resolveAutoImageModel as resolveExtensionHostMediaRuntimeAutoImageModel, + type ActiveMediaModel, +} from "./media-runtime-auto.js"; +import { + runCapability as runExtensionHostMediaCapability, + type RunCapabilityResult, +} from "./media-runtime-orchestration.js"; +import { + buildExtensionHostMediaUnderstandingRegistry, + type ExtensionHostMediaUnderstandingProviderRegistry, +} from "./media-runtime-registry.js"; + +type ProviderRegistry = ExtensionHostMediaUnderstandingProviderRegistry; + +export type { ActiveMediaModel, RunCapabilityResult }; +export type ExtensionHostMediaProviderRegistry = ProviderRegistry; + +export function buildExtensionHostMediaProviderRegistry( + overrides?: Record, +): ProviderRegistry { + return buildExtensionHostMediaUnderstandingRegistry(overrides); +} + +export function normalizeExtensionHostMediaAttachments(ctx: MsgContext): MediaAttachment[] { + return normalizeAttachments(ctx); +} + +export function resolveExtensionHostMediaAttachmentLocalRoots(params: { + cfg: OpenClawConfig; + ctx: MsgContext; +}): readonly string[] { + return mergeInboundPathRoots( + getDefaultMediaLocalRoots(), + resolveIMessageAttachmentRoots({ + cfg: params.cfg, + accountId: params.ctx.AccountId, + }), + ); +} + +export function createExtensionHostMediaAttachmentCache( + attachments: MediaAttachment[], + options?: MediaAttachmentCacheOptions, +): MediaAttachmentCache { + return new MediaAttachmentCache(attachments, options); +} + +export function clearExtensionHostMediaBinaryCacheForTests(): void { + clearExtensionHostMediaUnderstandingBinaryCacheForTests(); +} + +export async function resolveExtensionHostAutoImageModel(params: { + cfg: OpenClawConfig; + agentDir?: string; + activeModel?: ActiveMediaModel; +}): Promise { + return await resolveExtensionHostMediaRuntimeAutoImageModel({ + ...params, + providerRegistry: buildExtensionHostMediaProviderRegistry(), + }); +} + +export async function runExtensionHostMediaApiCapability(params: { + capability: MediaUnderstandingCapability; + cfg: OpenClawConfig; + ctx: MsgContext; + attachments: MediaAttachmentCache; + media: MediaAttachment[]; + agentDir?: string; + providerRegistry: ProviderRegistry; + config?: MediaUnderstandingConfig; + activeModel?: ActiveMediaModel; +}): Promise { + return await runExtensionHostMediaCapability(params); +} diff --git a/src/extension-host/media-runtime-entrypoints.ts b/src/extension-host/media-runtime-entrypoints.ts new file mode 100644 index 00000000000..becd4d667ac --- /dev/null +++ b/src/extension-host/media-runtime-entrypoints.ts @@ -0,0 +1,42 @@ +import type { MsgContext } from "../auto-reply/templating.js"; +import type { OpenClawConfig } from "../config/config.js"; +import type { + MediaUnderstandingConfig, + MediaUnderstandingModelConfig, +} from "../config/types.tools.js"; +import type { MediaAttachmentCache } from "../media-understanding/attachments.js"; +import type { + MediaUnderstandingCapability, + MediaUnderstandingOutput, + MediaUnderstandingProvider, +} from "../media-understanding/types.js"; + +export type ExtensionHostMediaProviderRegistry = Map; + +export async function runExtensionHostMediaProviderEntry(params: { + capability: MediaUnderstandingCapability; + entry: MediaUnderstandingModelConfig; + cfg: OpenClawConfig; + ctx: MsgContext; + attachmentIndex: number; + cache: MediaAttachmentCache; + agentDir?: string; + providerRegistry: ExtensionHostMediaProviderRegistry; + config?: MediaUnderstandingConfig; +}): Promise { + const runtime = await import("./media-runtime-execution.js"); + return runtime.runProviderEntry(params); +} + +export async function runExtensionHostMediaCliEntry(params: { + capability: MediaUnderstandingCapability; + entry: MediaUnderstandingModelConfig; + cfg: OpenClawConfig; + ctx: MsgContext; + attachmentIndex: number; + cache: MediaAttachmentCache; + config?: MediaUnderstandingConfig; +}): Promise { + const runtime = await import("./media-runtime-execution.js"); + return runtime.runCliEntry(params); +} diff --git a/src/extension-host/media-runtime-orchestration.ts b/src/extension-host/media-runtime-orchestration.ts index 5bc6f4ce47b..63c8876410f 100644 --- a/src/extension-host/media-runtime-orchestration.ts +++ b/src/extension-host/media-runtime-orchestration.ts @@ -12,7 +12,6 @@ import type { import { logVerbose, shouldLogVerbose } from "../globals.js"; import { MediaAttachmentCache, selectAttachments } from "../media-understanding/attachments.js"; import { isMediaUnderstandingSkipError } from "../media-understanding/errors.js"; -import { runCliEntry, runProviderEntry } from "../media-understanding/runner.entries.js"; import type { MediaAttachment, MediaUnderstandingCapability, @@ -24,6 +23,10 @@ import type { import { resolveAutoEntries, type ActiveMediaModel } from "./media-runtime-auto.js"; import { resolveModelEntries, resolveScopeDecision } from "./media-runtime-config.js"; import { buildModelDecision, formatDecisionSummary } from "./media-runtime-decision.js"; +import { + runExtensionHostMediaCliEntry, + runExtensionHostMediaProviderEntry, +} from "./media-runtime-entrypoints.js"; type ProviderRegistry = Map; @@ -53,7 +56,7 @@ async function runAttachmentEntries(params: { try { const result = entryType === "cli" - ? await runCliEntry({ + ? await runExtensionHostMediaCliEntry({ capability, entry, cfg: params.cfg, @@ -62,7 +65,7 @@ async function runAttachmentEntries(params: { cache: params.cache, config: params.config, }) - : await runProviderEntry({ + : await runExtensionHostMediaProviderEntry({ capability, entry, cfg: params.cfg, diff --git a/src/media-understanding/runner.entries.ts b/src/media-understanding/runner.entries.ts index 0c8a079290c..7d649e9574a 100644 --- a/src/media-understanding/runner.entries.ts +++ b/src/media-understanding/runner.entries.ts @@ -1,42 +1,9 @@ -import type { MsgContext } from "../auto-reply/templating.js"; -import type { OpenClawConfig } from "../config/config.js"; -import type { - MediaUnderstandingConfig, - MediaUnderstandingModelConfig, -} from "../config/types.tools.js"; export { buildModelDecision, formatDecisionSummary, } from "../extension-host/media-runtime-decision.js"; -import type { MediaAttachmentCache } from "./attachments.js"; -import type { MediaUnderstandingCapability, MediaUnderstandingOutput } from "./types.js"; - -export type ProviderRegistry = Map; - -export async function runProviderEntry(params: { - capability: MediaUnderstandingCapability; - entry: MediaUnderstandingModelConfig; - cfg: OpenClawConfig; - ctx: MsgContext; - attachmentIndex: number; - cache: MediaAttachmentCache; - agentDir?: string; - providerRegistry: ProviderRegistry; - config?: MediaUnderstandingConfig; -}): Promise { - const runtime = await import("../extension-host/media-runtime-execution.js"); - return runtime.runProviderEntry(params); -} - -export async function runCliEntry(params: { - capability: MediaUnderstandingCapability; - entry: MediaUnderstandingModelConfig; - cfg: OpenClawConfig; - ctx: MsgContext; - attachmentIndex: number; - cache: MediaAttachmentCache; - config?: MediaUnderstandingConfig; -}): Promise { - const runtime = await import("../extension-host/media-runtime-execution.js"); - return runtime.runCliEntry(params); -} +export { + runExtensionHostMediaCliEntry as runCliEntry, + runExtensionHostMediaProviderEntry as runProviderEntry, + type ExtensionHostMediaProviderRegistry as ProviderRegistry, +} from "../extension-host/media-runtime-entrypoints.js"; diff --git a/src/media-understanding/runner.ts b/src/media-understanding/runner.ts index 4cf2a923e9c..0858f73b5c0 100644 --- a/src/media-understanding/runner.ts +++ b/src/media-understanding/runner.ts @@ -1,94 +1,17 @@ -import type { MsgContext } from "../auto-reply/templating.js"; -import type { OpenClawConfig } from "../config/config.js"; -import type { MediaUnderstandingConfig } from "../config/types.tools.js"; -import { - clearMediaUnderstandingBinaryCacheForTests as clearExtensionHostMediaUnderstandingBinaryCacheForTests, - resolveAutoImageModel as resolveExtensionHostAutoImageModel, +export { + buildExtensionHostMediaProviderRegistry as buildProviderRegistry, + clearExtensionHostMediaBinaryCacheForTests as clearMediaUnderstandingBinaryCacheForTests, + createExtensionHostMediaAttachmentCache as createMediaAttachmentCache, + normalizeExtensionHostMediaAttachments as normalizeMediaAttachments, + resolveExtensionHostAutoImageModel as resolveAutoImageModel, + resolveExtensionHostMediaAttachmentLocalRoots as resolveMediaAttachmentLocalRoots, + runExtensionHostMediaApiCapability as runCapability, type ActiveMediaModel, -} from "../extension-host/media-runtime-auto.js"; -import { - runCapability as runExtensionHostMediaCapability, + type ExtensionHostMediaProviderRegistry as ProviderRegistry, type RunCapabilityResult, -} from "../extension-host/media-runtime-orchestration.js"; -import { - buildExtensionHostMediaUnderstandingRegistry, - type ExtensionHostMediaUnderstandingProviderRegistry, -} from "../extension-host/media-runtime-registry.js"; -import { - mergeInboundPathRoots, - resolveIMessageAttachmentRoots, -} from "../media/inbound-path-policy.js"; -import { getDefaultMediaLocalRoots } from "../media/local-roots.js"; -import { - MediaAttachmentCache, - type MediaAttachmentCacheOptions, - normalizeAttachments, -} from "./attachments.js"; -import type { +} from "../extension-host/media-runtime-api.js"; +export type { MediaAttachment, MediaUnderstandingCapability, MediaUnderstandingProvider, } from "./types.js"; - -type ProviderRegistry = ExtensionHostMediaUnderstandingProviderRegistry; - -export type { ActiveMediaModel, RunCapabilityResult }; - -export function buildProviderRegistry( - overrides?: Record, -): ProviderRegistry { - return buildExtensionHostMediaUnderstandingRegistry(overrides); -} - -export function normalizeMediaAttachments(ctx: MsgContext): MediaAttachment[] { - return normalizeAttachments(ctx); -} - -export function resolveMediaAttachmentLocalRoots(params: { - cfg: OpenClawConfig; - ctx: MsgContext; -}): readonly string[] { - return mergeInboundPathRoots( - getDefaultMediaLocalRoots(), - resolveIMessageAttachmentRoots({ - cfg: params.cfg, - accountId: params.ctx.AccountId, - }), - ); -} - -export function createMediaAttachmentCache( - attachments: MediaAttachment[], - options?: MediaAttachmentCacheOptions, -): MediaAttachmentCache { - return new MediaAttachmentCache(attachments, options); -} - -export function clearMediaUnderstandingBinaryCacheForTests(): void { - clearExtensionHostMediaUnderstandingBinaryCacheForTests(); -} - -export async function resolveAutoImageModel(params: { - cfg: OpenClawConfig; - agentDir?: string; - activeModel?: ActiveMediaModel; -}): Promise { - return await resolveExtensionHostAutoImageModel({ - ...params, - providerRegistry: buildProviderRegistry(), - }); -} - -export async function runCapability(params: { - capability: MediaUnderstandingCapability; - cfg: OpenClawConfig; - ctx: MsgContext; - attachments: MediaAttachmentCache; - media: MediaAttachment[]; - agentDir?: string; - providerRegistry: ProviderRegistry; - config?: MediaUnderstandingConfig; - activeModel?: ActiveMediaModel; -}): Promise { - return await runExtensionHostMediaCapability(params); -}