From 740c9dfeb9f44c617f05825d05583c749bef8d33 Mon Sep 17 00:00:00 2001 From: Gustavo Madeira Santana Date: Sun, 15 Mar 2026 19:14:03 +0000 Subject: [PATCH] Media: extract runtime provider registry --- .../media-runtime-registry.test.ts | 47 ++++++++++++ src/extension-host/media-runtime-registry.ts | 73 +++++++++++++++++++ src/media-understanding/providers/index.ts | 69 ++---------------- src/media-understanding/resolve.ts | 6 +- src/media-understanding/runner.entries.ts | 18 +++-- src/media-understanding/runner.ts | 18 ++--- 6 files changed, 151 insertions(+), 80 deletions(-) create mode 100644 src/extension-host/media-runtime-registry.test.ts create mode 100644 src/extension-host/media-runtime-registry.ts diff --git a/src/extension-host/media-runtime-registry.test.ts b/src/extension-host/media-runtime-registry.test.ts new file mode 100644 index 00000000000..b99867873b9 --- /dev/null +++ b/src/extension-host/media-runtime-registry.test.ts @@ -0,0 +1,47 @@ +import { describe, expect, it } from "vitest"; +import { + buildExtensionHostMediaUnderstandingRegistry, + getExtensionHostMediaUnderstandingProvider, + normalizeExtensionHostMediaProviderId, +} from "./media-runtime-registry.js"; + +describe("extension host media runtime registry", () => { + it("registers built-in providers", () => { + const registry = buildExtensionHostMediaUnderstandingRegistry(); + const provider = getExtensionHostMediaUnderstandingProvider("mistral", registry); + + expect(provider?.id).toBe("mistral"); + expect(provider?.capabilities).toEqual(["audio"]); + }); + + it("keeps media-specific provider normalization", () => { + expect(normalizeExtensionHostMediaProviderId("gemini")).toBe("google"); + }); + + it("merges overrides onto built-in providers", () => { + const registry = buildExtensionHostMediaUnderstandingRegistry({ + openai: { + id: "openai", + capabilities: ["image"], + }, + }); + + const provider = getExtensionHostMediaUnderstandingProvider("openai", registry); + expect(provider?.id).toBe("openai"); + expect(provider?.capabilities).toEqual(["image"]); + expect(provider?.describeImage).toBeTypeOf("function"); + }); + + it("adds brand new providers", () => { + const registry = buildExtensionHostMediaUnderstandingRegistry({ + custom: { + id: "custom", + capabilities: ["audio"], + }, + }); + + const provider = getExtensionHostMediaUnderstandingProvider("custom", registry); + expect(provider?.id).toBe("custom"); + expect(provider?.capabilities).toEqual(["audio"]); + }); +}); diff --git a/src/extension-host/media-runtime-registry.ts b/src/extension-host/media-runtime-registry.ts new file mode 100644 index 00000000000..7b6479dab61 --- /dev/null +++ b/src/extension-host/media-runtime-registry.ts @@ -0,0 +1,73 @@ +import { normalizeProviderId } from "../agents/provider-id.js"; +import { anthropicProvider } from "../media-understanding/providers/anthropic/index.js"; +import { deepgramProvider } from "../media-understanding/providers/deepgram/index.js"; +import { googleProvider } from "../media-understanding/providers/google/index.js"; +import { groqProvider } from "../media-understanding/providers/groq/index.js"; +import { + minimaxPortalProvider, + minimaxProvider, +} from "../media-understanding/providers/minimax/index.js"; +import { mistralProvider } from "../media-understanding/providers/mistral/index.js"; +import { moonshotProvider } from "../media-understanding/providers/moonshot/index.js"; +import { openaiProvider } from "../media-understanding/providers/openai/index.js"; +import { zaiProvider } from "../media-understanding/providers/zai/index.js"; +import type { MediaUnderstandingProvider } from "../media-understanding/types.js"; + +const EXTENSION_HOST_MEDIA_PROVIDERS: readonly MediaUnderstandingProvider[] = [ + groqProvider, + openaiProvider, + googleProvider, + anthropicProvider, + minimaxProvider, + minimaxPortalProvider, + moonshotProvider, + mistralProvider, + zaiProvider, + deepgramProvider, +]; + +export type ExtensionHostMediaUnderstandingProviderRegistry = Map< + string, + MediaUnderstandingProvider +>; + +export function normalizeExtensionHostMediaProviderId(id: string): string { + const normalized = normalizeProviderId(id); + if (normalized === "gemini") { + return "google"; + } + return normalized; +} + +export function buildExtensionHostMediaUnderstandingRegistry( + overrides?: Record, +): ExtensionHostMediaUnderstandingProviderRegistry { + const registry: ExtensionHostMediaUnderstandingProviderRegistry = new Map(); + for (const provider of EXTENSION_HOST_MEDIA_PROVIDERS) { + registry.set(normalizeExtensionHostMediaProviderId(provider.id), provider); + } + if (!overrides) { + return registry; + } + + for (const [key, provider] of Object.entries(overrides)) { + const normalizedKey = normalizeExtensionHostMediaProviderId(key); + const existing = registry.get(normalizedKey); + const merged = existing + ? { + ...existing, + ...provider, + capabilities: provider.capabilities ?? existing.capabilities, + } + : provider; + registry.set(normalizedKey, merged); + } + return registry; +} + +export function getExtensionHostMediaUnderstandingProvider( + id: string, + registry: ExtensionHostMediaUnderstandingProviderRegistry, +): MediaUnderstandingProvider | undefined { + return registry.get(normalizeExtensionHostMediaProviderId(id)); +} diff --git a/src/media-understanding/providers/index.ts b/src/media-understanding/providers/index.ts index 0ceaa78fd80..6e13d5871de 100644 --- a/src/media-understanding/providers/index.ts +++ b/src/media-understanding/providers/index.ts @@ -1,63 +1,6 @@ -import { normalizeProviderId } from "../../agents/model-selection.js"; -import type { MediaUnderstandingProvider } from "../types.js"; -import { anthropicProvider } from "./anthropic/index.js"; -import { deepgramProvider } from "./deepgram/index.js"; -import { googleProvider } from "./google/index.js"; -import { groqProvider } from "./groq/index.js"; -import { minimaxPortalProvider, minimaxProvider } from "./minimax/index.js"; -import { mistralProvider } from "./mistral/index.js"; -import { moonshotProvider } from "./moonshot/index.js"; -import { openaiProvider } from "./openai/index.js"; -import { zaiProvider } from "./zai/index.js"; - -const PROVIDERS: MediaUnderstandingProvider[] = [ - groqProvider, - openaiProvider, - googleProvider, - anthropicProvider, - minimaxProvider, - minimaxPortalProvider, - moonshotProvider, - mistralProvider, - zaiProvider, - deepgramProvider, -]; - -export function normalizeMediaProviderId(id: string): string { - const normalized = normalizeProviderId(id); - if (normalized === "gemini") { - return "google"; - } - return normalized; -} - -export function buildMediaUnderstandingRegistry( - overrides?: Record, -): Map { - const registry = new Map(); - for (const provider of PROVIDERS) { - registry.set(normalizeMediaProviderId(provider.id), provider); - } - if (overrides) { - for (const [key, provider] of Object.entries(overrides)) { - const normalizedKey = normalizeMediaProviderId(key); - const existing = registry.get(normalizedKey); - const merged = existing - ? { - ...existing, - ...provider, - capabilities: provider.capabilities ?? existing.capabilities, - } - : provider; - registry.set(normalizedKey, merged); - } - } - return registry; -} - -export function getMediaUnderstandingProvider( - id: string, - registry: Map, -): MediaUnderstandingProvider | undefined { - return registry.get(normalizeMediaProviderId(id)); -} +export { + buildExtensionHostMediaUnderstandingRegistry as buildMediaUnderstandingRegistry, + getExtensionHostMediaUnderstandingProvider as getMediaUnderstandingProvider, + normalizeExtensionHostMediaProviderId as normalizeMediaProviderId, + type ExtensionHostMediaUnderstandingProviderRegistry as MediaUnderstandingProviderRegistry, +} from "../../extension-host/media-runtime-registry.js"; diff --git a/src/media-understanding/resolve.ts b/src/media-understanding/resolve.ts index 824f5603c9e..bc852bb71ec 100644 --- a/src/media-understanding/resolve.ts +++ b/src/media-understanding/resolve.ts @@ -5,6 +5,7 @@ import type { MediaUnderstandingModelConfig, MediaUnderstandingScopeConfig, } from "../config/types.tools.js"; +import { normalizeExtensionHostMediaProviderId } from "../extension-host/media-runtime-registry.js"; import { logVerbose, shouldLogVerbose } from "../globals.js"; import { DEFAULT_MAX_BYTES, @@ -12,7 +13,6 @@ import { DEFAULT_MEDIA_CONCURRENCY, DEFAULT_PROMPT, } from "./defaults.js"; -import { normalizeMediaProviderId } from "./providers/index.js"; import { normalizeMediaUnderstandingChatType, resolveMediaUnderstandingScope } from "./scope.js"; import type { MediaUnderstandingCapability } from "./types.js"; @@ -91,7 +91,7 @@ function resolveEntryCapabilities(params: { if (entryType === "cli") { return undefined; } - const providerId = normalizeMediaProviderId(params.entry.provider ?? ""); + const providerId = normalizeExtensionHostMediaProviderId(params.entry.provider ?? ""); if (!providerId) { return undefined; } @@ -169,7 +169,7 @@ export function resolveEntriesWithActiveFallback(params: { if (!activeProviderRaw) { return entries; } - const activeProvider = normalizeMediaProviderId(activeProviderRaw); + const activeProvider = normalizeExtensionHostMediaProviderId(activeProviderRaw); if (!activeProvider) { return entries; } diff --git a/src/media-understanding/runner.entries.ts b/src/media-understanding/runner.entries.ts index cdd9468c4a7..1a104714bb8 100644 --- a/src/media-understanding/runner.entries.ts +++ b/src/media-understanding/runner.entries.ts @@ -12,6 +12,10 @@ import type { MediaUnderstandingConfig, MediaUnderstandingModelConfig, } from "../config/types.tools.js"; +import { + getExtensionHostMediaUnderstandingProvider, + normalizeExtensionHostMediaProviderId, +} from "../extension-host/media-runtime-registry.js"; import { logVerbose, shouldLogVerbose } from "../globals.js"; import { resolveProxyFetchFromEnv } from "../infra/net/proxy-fetch.js"; import { resolvePreferredOpenClawTmpDir } from "../infra/tmp-openclaw-dir.js"; @@ -27,7 +31,6 @@ import { MediaUnderstandingSkipError } from "./errors.js"; import { fileExists } from "./fs.js"; import { extractGeminiResponse } from "./output-extract.js"; import { describeImageWithModel } from "./providers/image.js"; -import { getMediaUnderstandingProvider, normalizeMediaProviderId } from "./providers/index.js"; import { resolveMaxBytes, resolveMaxChars, resolvePrompt, resolveTimeoutMs } from "./resolve.js"; import type { MediaUnderstandingCapability, @@ -302,7 +305,9 @@ export function buildModelDecision(params: { }; } const providerIdRaw = params.entry.provider?.trim(); - const providerId = providerIdRaw ? normalizeMediaProviderId(providerIdRaw) : undefined; + const providerId = providerIdRaw + ? normalizeExtensionHostMediaProviderId(providerIdRaw) + : undefined; return { type: "provider", provider: providerId ?? providerIdRaw, @@ -429,7 +434,7 @@ export async function runProviderEntry(params: { if (!providerIdRaw) { throw new Error(`Provider entry missing provider for ${capability}`); } - const providerId = normalizeMediaProviderId(providerIdRaw); + const providerId = normalizeExtensionHostMediaProviderId(providerIdRaw); const { maxBytes, maxChars, timeoutMs, prompt } = resolveEntryRunOptions({ capability, entry, @@ -450,7 +455,10 @@ export async function runProviderEntry(params: { maxBytes, timeoutMs, }); - const provider = getMediaUnderstandingProvider(providerId, params.providerRegistry); + const provider = getExtensionHostMediaUnderstandingProvider( + providerId, + params.providerRegistry, + ); const imageInput = { buffer: media.buffer, fileName: media.fileName, @@ -475,7 +483,7 @@ export async function runProviderEntry(params: { }; } - const provider = getMediaUnderstandingProvider(providerId, params.providerRegistry); + const provider = getExtensionHostMediaUnderstandingProvider(providerId, params.providerRegistry); if (!provider) { throw new Error(`Media provider not available: ${providerId}`); } diff --git a/src/media-understanding/runner.ts b/src/media-understanding/runner.ts index c2ffe584448..f4b9e09d13c 100644 --- a/src/media-understanding/runner.ts +++ b/src/media-understanding/runner.ts @@ -18,6 +18,11 @@ import type { MediaUnderstandingConfig, MediaUnderstandingModelConfig, } from "../config/types.tools.js"; +import { + buildExtensionHostMediaUnderstandingRegistry, + getExtensionHostMediaUnderstandingProvider, + normalizeExtensionHostMediaProviderId, +} from "../extension-host/media-runtime-registry.js"; import { logVerbose, shouldLogVerbose } from "../globals.js"; import { mergeInboundPathRoots, @@ -40,11 +45,6 @@ import { import { isMediaUnderstandingSkipError } from "./errors.js"; import { fileExists } from "./fs.js"; import { extractGeminiResponse } from "./output-extract.js"; -import { - buildMediaUnderstandingRegistry, - getMediaUnderstandingProvider, - normalizeMediaProviderId, -} from "./providers/index.js"; import { resolveModelEntries, resolveScopeDecision } from "./resolve.js"; import { buildModelDecision, @@ -76,7 +76,7 @@ export type RunCapabilityResult = { export function buildProviderRegistry( overrides?: Record, ): ProviderRegistry { - return buildMediaUnderstandingRegistry(overrides); + return buildExtensionHostMediaUnderstandingRegistry(overrides); } export function normalizeMediaAttachments(ctx: MsgContext): MediaAttachment[] { @@ -349,7 +349,7 @@ async function resolveKeyEntry(params: { providerId: string, model?: string, ): Promise => { - const provider = getMediaUnderstandingProvider(providerId, providerRegistry); + const provider = getExtensionHostMediaUnderstandingProvider(providerId, providerRegistry); if (!provider) { return null; } @@ -536,11 +536,11 @@ async function resolveActiveModelEntry(params: { if (!activeProviderRaw) { return null; } - const providerId = normalizeMediaProviderId(activeProviderRaw); + const providerId = normalizeExtensionHostMediaProviderId(activeProviderRaw); if (!providerId) { return null; } - const provider = getMediaUnderstandingProvider(providerId, params.providerRegistry); + const provider = getExtensionHostMediaUnderstandingProvider(providerId, params.providerRegistry); if (!provider) { return null; }