From 864386c89c12a9a8a3f76d433685e5c64e2166c1 Mon Sep 17 00:00:00 2001 From: Gustavo Madeira Santana Date: Sun, 15 Mar 2026 21:08:53 +0000 Subject: [PATCH] Runtime: adopt backend catalog consumers --- src/commands/doctor-memory-search.test.ts | 10 ++ src/commands/doctor-memory-search.ts | 21 ++- src/extension-host/media-runtime-auto.test.ts | 66 ++++++++- src/extension-host/media-runtime-auto.ts | 28 ++-- src/extension-host/media-runtime-execution.ts | 10 +- .../runtime-backend-catalog.test.ts | 16 +++ src/extension-host/runtime-backend-catalog.ts | 129 ++++++++++++++++-- 7 files changed, 253 insertions(+), 27 deletions(-) diff --git a/src/commands/doctor-memory-search.test.ts b/src/commands/doctor-memory-search.test.ts index 0c01c1c7688..3d939b2b295 100644 --- a/src/commands/doctor-memory-search.test.ts +++ b/src/commands/doctor-memory-search.test.ts @@ -8,6 +8,9 @@ const resolveAgentDir = vi.hoisted(() => vi.fn(() => "/tmp/agent-default")); const resolveMemorySearchConfig = vi.hoisted(() => vi.fn()); const resolveApiKeyForProvider = vi.hoisted(() => vi.fn()); const resolveMemoryBackendConfig = vi.hoisted(() => vi.fn()); +const listExtensionHostEmbeddingRemoteRuntimeBackendIds = vi.hoisted(() => + vi.fn(() => ["openai", "gemini", "voyage", "mistral"]), +); vi.mock("../terminal/note.js", () => ({ note, @@ -30,6 +33,10 @@ vi.mock("../memory/backend-config.js", () => ({ resolveMemoryBackendConfig, })); +vi.mock("../extension-host/runtime-backend-catalog.js", () => ({ + listExtensionHostEmbeddingRemoteRuntimeBackendIds, +})); + import { noteMemorySearchHealth } from "./doctor-memory-search.js"; import { detectLegacyWorkspaceDirs } from "./doctor-workspace.js"; @@ -58,6 +65,7 @@ describe("noteMemorySearchHealth", () => { resolveApiKeyForProvider.mockRejectedValue(new Error("missing key")); resolveMemoryBackendConfig.mockReset(); resolveMemoryBackendConfig.mockReturnValue({ backend: "builtin", citations: "auto" }); + listExtensionHostEmbeddingRemoteRuntimeBackendIds.mockClear(); }); it("does not warn when local provider is set with no explicit modelPath (default model fallback)", async () => { @@ -264,6 +272,7 @@ describe("noteMemorySearchHealth", () => { expect(note).toHaveBeenCalledTimes(1); const message = String(note.mock.calls[0]?.[0] ?? ""); expect(message).toContain("openclaw configure --section model"); + expect(message).toContain("OPENAI_API_KEY, GEMINI_API_KEY, VOYAGE_API_KEY, or MISTRAL_API_KEY"); }); it("still warns in auto mode when only ollama credentials exist", async () => { @@ -289,6 +298,7 @@ describe("noteMemorySearchHealth", () => { const providerCalls = resolveApiKeyForProvider.mock.calls as Array<[{ provider: string }]>; const providersChecked = providerCalls.map(([arg]) => arg.provider); expect(providersChecked).toEqual(["openai", "google", "voyage", "mistral"]); + expect(listExtensionHostEmbeddingRemoteRuntimeBackendIds).toHaveBeenCalledTimes(1); }); }); diff --git a/src/commands/doctor-memory-search.ts b/src/commands/doctor-memory-search.ts index 4bafe3f244c..1466855283a 100644 --- a/src/commands/doctor-memory-search.ts +++ b/src/commands/doctor-memory-search.ts @@ -5,6 +5,7 @@ import { resolveApiKeyForProvider } from "../agents/model-auth.js"; import { formatCliCommand } from "../cli/command-format.js"; import type { OpenClawConfig } from "../config/config.js"; import { DEFAULT_LOCAL_EMBEDDING_MODEL } from "../extension-host/embedding-runtime.js"; +import { listExtensionHostEmbeddingRemoteRuntimeBackendIds } from "../extension-host/runtime-backend-catalog.js"; import { resolveMemoryBackendConfig } from "../memory/backend-config.js"; import { hasConfiguredMemorySecretInput } from "../memory/secret-input.js"; import { note } from "../terminal/note.js"; @@ -118,7 +119,8 @@ export async function noteMemorySearchHealth( if (hasLocalEmbeddings(resolved.local)) { return; } - for (const provider of ["openai", "gemini", "voyage", "mistral"] as const) { + const autoRemoteProviders = listExtensionHostEmbeddingRemoteRuntimeBackendIds(); + for (const provider of autoRemoteProviders) { if (hasRemoteApiKey || (await hasApiKeyForProvider(provider, cfg, agentDir))) { return; } @@ -144,7 +146,7 @@ export async function noteMemorySearchHealth( gatewayProbeWarning ? gatewayProbeWarning : null, "", "Fix (pick one):", - "- Set OPENAI_API_KEY, GEMINI_API_KEY, VOYAGE_API_KEY, or MISTRAL_API_KEY in your environment", + `- Set ${formatProviderEnvVarList(autoRemoteProviders.map(providerEnvVar))} in your environment`, `- Configure credentials: ${formatCliCommand("openclaw configure --section model")}`, `- For local embeddings: configure agents.defaults.memorySearch.provider and local model path`, `- To disable: ${formatCliCommand("openclaw config set agents.defaults.memorySearch.enabled false")}`, @@ -214,6 +216,21 @@ function providerEnvVar(provider: string): string { } } +function formatProviderEnvVarList(envVars: readonly string[]): string { + if (envVars.length === 0) { + return "an embedding provider API key"; + } + if (envVars.length === 1) { + return envVars[0] ?? "an embedding provider API key"; + } + if (envVars.length === 2) { + return `${envVars[0]} or ${envVars[1]}`; + } + const head = envVars.slice(0, -1).join(", "); + const tail = envVars.at(-1); + return tail ? `${head}, or ${tail}` : head; +} + function buildGatewayProbeWarning( probe: | { diff --git a/src/extension-host/media-runtime-auto.test.ts b/src/extension-host/media-runtime-auto.test.ts index e33897e8431..cb6ec36f6fa 100644 --- a/src/extension-host/media-runtime-auto.test.ts +++ b/src/extension-host/media-runtime-auto.test.ts @@ -1,8 +1,15 @@ -import { describe, expect, it } from "vitest"; +import { beforeEach, describe, expect, it, vi } from "vitest"; import type { OpenClawConfig } from "../config/config.js"; -import { DEFAULT_IMAGE_MODELS } from "../media-understanding/defaults.js"; -import { resolveAutoImageModel } from "./media-runtime-auto.js"; import { buildExtensionHostMediaUnderstandingRegistry } from "./media-runtime-registry.js"; +import { resolveExtensionHostMediaRuntimeDefaultModel } from "./runtime-backend-catalog.js"; + +const resolveApiKeyForProvider = vi.hoisted(() => vi.fn()); + +vi.mock("../agents/model-auth.js", () => ({ + resolveApiKeyForProvider, +})); + +import { resolveAutoImageModel } from "./media-runtime-auto.js"; function createImageCfg(): OpenClawConfig { return { @@ -18,6 +25,22 @@ function createImageCfg(): OpenClawConfig { } describe("media runtime auto image model", () => { + beforeEach(() => { + resolveApiKeyForProvider.mockReset(); + resolveApiKeyForProvider.mockImplementation( + async ({ provider, cfg }: { provider: string; cfg: OpenClawConfig }) => { + if (cfg.models?.providers?.[provider]) { + return { + apiKey: "test-key", + source: "config", + mode: "api-key", + }; + } + throw new Error("missing key"); + }, + ); + }); + it("keeps a valid active image model", async () => { const result = await resolveAutoImageModel({ cfg: createImageCfg(), @@ -46,7 +69,42 @@ describe("media runtime auto image model", () => { expect(result).toEqual({ provider: "openai", - model: DEFAULT_IMAGE_MODELS.openai, + model: resolveExtensionHostMediaRuntimeDefaultModel({ + capability: "image", + backendId: "openai", + }), + }); + }); + + it("keeps catalog image provider ordering when multiple keyed providers are available", async () => { + const result = await resolveAutoImageModel({ + cfg: { + models: { + providers: { + anthropic: { + apiKey: "anthropic-test-key", + models: [], + }, + google: { + apiKey: "google-test-key", + models: [], + }, + }, + }, + } as unknown as OpenClawConfig, + providerRegistry: buildExtensionHostMediaUnderstandingRegistry(), + activeModel: { + provider: "missing-provider", + model: "ignored", + }, + }); + + expect(result).toEqual({ + provider: "anthropic", + model: resolveExtensionHostMediaRuntimeDefaultModel({ + capability: "image", + backendId: "anthropic", + }), }); }); }); diff --git a/src/extension-host/media-runtime-auto.ts b/src/extension-host/media-runtime-auto.ts index 508666f14eb..a9a62a7c384 100644 --- a/src/extension-host/media-runtime-auto.ts +++ b/src/extension-host/media-runtime-auto.ts @@ -14,16 +14,14 @@ import { normalizeExtensionHostMediaProviderId, type ExtensionHostMediaUnderstandingProviderRegistry, } from "../extension-host/media-runtime-registry.js"; -import { - AUTO_AUDIO_KEY_PROVIDERS, - AUTO_IMAGE_KEY_PROVIDERS, - AUTO_VIDEO_KEY_PROVIDERS, - DEFAULT_IMAGE_MODELS, -} from "../media-understanding/defaults.js"; import { fileExists } from "../media-understanding/fs.js"; import { extractGeminiResponse } from "../media-understanding/output-extract.js"; import type { MediaUnderstandingCapability } from "../media-understanding/types.js"; import { runExec } from "../process/exec.js"; +import { + listExtensionHostMediaAutoRuntimeBackendIds, + resolveExtensionHostMediaRuntimeDefaultModel, +} from "./runtime-backend-catalog.js"; export type ActiveMediaModel = { provider: string; @@ -351,8 +349,11 @@ async function resolveKeyEntry(params: { return activeEntry; } } - for (const providerId of AUTO_IMAGE_KEY_PROVIDERS) { - const model = DEFAULT_IMAGE_MODELS[providerId]; + for (const providerId of listExtensionHostMediaAutoRuntimeBackendIds("image")) { + const model = resolveExtensionHostMediaRuntimeDefaultModel({ + capability: "image", + backendId: providerId, + }); const entry = await checkProvider(providerId, model); if (entry) { return entry; @@ -369,7 +370,7 @@ async function resolveKeyEntry(params: { return activeEntry; } } - for (const providerId of AUTO_VIDEO_KEY_PROVIDERS) { + for (const providerId of listExtensionHostMediaAutoRuntimeBackendIds("video")) { const entry = await checkProvider(providerId, undefined); if (entry) { return entry; @@ -385,7 +386,7 @@ async function resolveKeyEntry(params: { return activeEntry; } } - for (const providerId of AUTO_AUDIO_KEY_PROVIDERS) { + for (const providerId of listExtensionHostMediaAutoRuntimeBackendIds("audio")) { const entry = await checkProvider(providerId, undefined); if (entry) { return entry; @@ -471,7 +472,12 @@ export async function resolveAutoImageModel(params: { if (!provider) { return null; } - const model = entry.model ?? DEFAULT_IMAGE_MODELS[provider]; + const model = + entry.model ?? + resolveExtensionHostMediaRuntimeDefaultModel({ + capability: "image", + backendId: provider, + }); if (!model) { return null; } diff --git a/src/extension-host/media-runtime-execution.ts b/src/extension-host/media-runtime-execution.ts index 6162ec037f8..2d0af83f6f5 100644 --- a/src/extension-host/media-runtime-execution.ts +++ b/src/extension-host/media-runtime-execution.ts @@ -22,7 +22,6 @@ import { resolvePreferredOpenClawTmpDir } from "../infra/tmp-openclaw-dir.js"; import { MediaAttachmentCache } from "../media-understanding/attachments.js"; import { CLI_OUTPUT_MAX_BUFFER, - DEFAULT_AUDIO_MODELS, DEFAULT_TIMEOUT_SECONDS, MIN_AUDIO_FILE_BYTES, } from "../media-understanding/defaults.js"; @@ -42,6 +41,7 @@ import { resolvePrompt, resolveTimeoutMs, } from "./media-runtime-config.js"; +import { resolveExtensionHostMediaRuntimeDefaultModel } from "./runtime-backend-catalog.js"; export type ProviderRegistry = Map; @@ -466,7 +466,13 @@ export async function runProviderEntry(params: { config: params.config, entry, }); - const model = entry.model?.trim() || DEFAULT_AUDIO_MODELS[providerId] || entry.model; + const model = + entry.model?.trim() || + resolveExtensionHostMediaRuntimeDefaultModel({ + capability: "audio", + backendId: providerId, + }) || + entry.model; const result = await executeWithApiKeyRotation({ provider: providerId, apiKeys, diff --git a/src/extension-host/runtime-backend-catalog.test.ts b/src/extension-host/runtime-backend-catalog.test.ts index 8c3f565e8c3..7f30bc31bfc 100644 --- a/src/extension-host/runtime-backend-catalog.test.ts +++ b/src/extension-host/runtime-backend-catalog.test.ts @@ -89,6 +89,16 @@ describe("runtime-backend-catalog", () => { ]), ); expect(entries.find((entry) => entry.backendId === "google")?.selectorKeys).toContain("gemini"); + expect(catalog.listExtensionHostMediaAutoRuntimeBackendIds("image")).toEqual([ + "openai", + "google", + ]); + expect( + catalog.resolveExtensionHostMediaRuntimeDefaultModel({ + capability: "image", + backendId: "openai", + }), + ).toBe("gpt-5-mini"); }); it("publishes TTS backends with telephony capability metadata", async () => { @@ -122,5 +132,11 @@ describe("runtime-backend-catalog", () => { subsystemId: "tts", backendId: "edge", }); + expect(catalog.listExtensionHostEmbeddingRemoteRuntimeBackendIds()).toEqual([ + "openai", + "gemini", + "voyage", + "mistral", + ]); }); }); diff --git a/src/extension-host/runtime-backend-catalog.ts b/src/extension-host/runtime-backend-catalog.ts index e821a375fca..f1d0e31b878 100644 --- a/src/extension-host/runtime-backend-catalog.ts +++ b/src/extension-host/runtime-backend-catalog.ts @@ -1,4 +1,11 @@ import type { TtsProvider } from "../config/types.tts.js"; +import { + AUTO_AUDIO_KEY_PROVIDERS, + AUTO_IMAGE_KEY_PROVIDERS, + AUTO_VIDEO_KEY_PROVIDERS, + DEFAULT_AUDIO_MODELS, + DEFAULT_IMAGE_MODELS, +} from "../media-understanding/defaults.js"; import type { MediaUnderstandingCapability } from "../media-understanding/types.js"; import { EXTENSION_HOST_REMOTE_EMBEDDING_PROVIDER_IDS } from "./embedding-runtime-registry.js"; import type { EmbeddingProviderId } from "./embedding-runtime-types.js"; @@ -31,12 +38,26 @@ export type ExtensionHostRuntimeBackendCatalogEntry = { metadata?: Record; }; +type ExtensionHostMediaRuntimeSubsystemId = Extract< + ExtensionHostRuntimeBackendSubsystemId, + "media.audio" | "media.image" | "media.video" +>; + const EXTENSION_HOST_EMBEDDING_BACKEND_IDS = [ "local", ...EXTENSION_HOST_REMOTE_EMBEDDING_PROVIDER_IDS, "ollama", ] as const satisfies readonly EmbeddingProviderId[]; +const EXTENSION_HOST_MEDIA_AUTO_PROVIDER_IDS: Record< + MediaUnderstandingCapability, + readonly string[] +> = { + audio: AUTO_AUDIO_KEY_PROVIDERS, + image: AUTO_IMAGE_KEY_PROVIDERS, + video: AUTO_VIDEO_KEY_PROVIDERS, +}; + function buildRuntimeBackendCatalogId( subsystemId: ExtensionHostRuntimeBackendSubsystemId, backendId: string, @@ -64,6 +85,44 @@ function buildMediaSelectorKeys(providerId: string): readonly string[] { return normalized === providerId ? [providerId] : [providerId, normalized]; } +function buildExtensionHostMediaRuntimeProviderIds( + capability: MediaUnderstandingCapability, +): readonly string[] { + const registry = buildExtensionHostMediaUnderstandingRegistry(); + const ordered: string[] = []; + const seen = new Set(); + const pushProvider = (providerId: string) => { + const normalized = normalizeExtensionHostMediaProviderId(providerId); + const provider = registry.get(normalized); + if (!provider || seen.has(normalized) || !(provider.capabilities ?? []).includes(capability)) { + return; + } + seen.add(normalized); + ordered.push(normalized); + }; + + for (const providerId of EXTENSION_HOST_MEDIA_AUTO_PROVIDER_IDS[capability]) { + pushProvider(providerId); + } + for (const provider of registry.values()) { + pushProvider(provider.id); + } + return ordered; +} + +function resolveExtensionHostMediaRuntimeDefaultModelFromDefaults(params: { + capability: MediaUnderstandingCapability; + backendId: string; +}): string | undefined { + if (params.capability === "audio") { + return DEFAULT_AUDIO_MODELS[params.backendId]; + } + if (params.capability === "image") { + return DEFAULT_IMAGE_MODELS[params.backendId]; + } + return undefined; +} + export function listExtensionHostEmbeddingRuntimeBackendCatalogEntries(): readonly ExtensionHostRuntimeBackendCatalogEntry[] { return EXTENSION_HOST_EMBEDDING_BACKEND_IDS.map((backendId, defaultRank) => ({ id: buildRuntimeBackendCatalogId("embedding", backendId), @@ -81,29 +140,68 @@ export function listExtensionHostEmbeddingRuntimeBackendCatalogEntries(): readon })); } +export function listExtensionHostEmbeddingRemoteRuntimeBackendIds(): readonly EmbeddingProviderId[] { + return listExtensionHostEmbeddingRuntimeBackendCatalogEntries() + .filter((entry) => entry.backendId !== "local" && entry.metadata?.autoSelectable === true) + .map((entry) => entry.backendId as EmbeddingProviderId); +} + export function listExtensionHostMediaRuntimeBackendCatalogEntries(): readonly ExtensionHostRuntimeBackendCatalogEntry[] { - const registry = buildExtensionHostMediaUnderstandingRegistry(); const entries: ExtensionHostRuntimeBackendCatalogEntry[] = []; - let defaultRank = 0; - for (const provider of registry.values()) { - for (const capability of provider.capabilities ?? []) { - const subsystemId = mapMediaCapabilityToSubsystem(capability); + const registry = buildExtensionHostMediaUnderstandingRegistry(); + for (const capability of ["audio", "image", "video"] as const) { + const providerIds = buildExtensionHostMediaRuntimeProviderIds(capability); + for (const [defaultRank, providerId] of providerIds.entries()) { + const provider = registry.get(providerId); + if (!provider) { + continue; + } + const defaultModel = resolveExtensionHostMediaRuntimeDefaultModelFromDefaults({ + capability, + backendId: providerId, + }); entries.push({ - id: buildRuntimeBackendCatalogId(subsystemId, provider.id), + id: buildRuntimeBackendCatalogId(mapMediaCapabilityToSubsystem(capability), provider.id), family: EXTENSION_HOST_RUNTIME_BACKEND_FAMILY, - subsystemId, + subsystemId: mapMediaCapabilityToSubsystem(capability), backendId: provider.id, source: "builtin", defaultRank, selectorKeys: buildMediaSelectorKeys(provider.id), capabilities: [capability], + metadata: { + autoSelectable: EXTENSION_HOST_MEDIA_AUTO_PROVIDER_IDS[capability].includes(provider.id), + ...(defaultModel ? { defaultModel } : {}), + }, }); } - defaultRank += 1; } return entries; } +export function listExtensionHostMediaAutoRuntimeBackendIds( + capability: MediaUnderstandingCapability, +): readonly string[] { + const subsystemId = mapMediaCapabilityToSubsystem(capability); + return listExtensionHostMediaRuntimeBackendCatalogEntries() + .filter((entry) => entry.subsystemId === subsystemId && entry.metadata?.autoSelectable === true) + .toSorted((left, right) => left.defaultRank - right.defaultRank) + .map((entry) => entry.backendId); +} + +export function resolveExtensionHostMediaRuntimeDefaultModel(params: { + capability: MediaUnderstandingCapability; + backendId: string; +}): string | undefined { + const subsystemId = mapMediaCapabilityToSubsystem(params.capability); + const entry = listExtensionHostMediaRuntimeBackendCatalogEntries().find( + (candidate) => + candidate.subsystemId === subsystemId && candidate.backendId === params.backendId, + ); + const defaultModel = entry?.metadata?.defaultModel; + return typeof defaultModel === "string" ? defaultModel : undefined; +} + export function listExtensionHostTtsRuntimeBackendCatalogEntries(): readonly ExtensionHostRuntimeBackendCatalogEntry[] { return listExtensionHostTtsRuntimeProviders().map((provider, defaultRank) => ({ id: buildRuntimeBackendCatalogId("tts", provider.id), @@ -128,6 +226,21 @@ export function listExtensionHostTtsRuntimeBackendIds(): readonly TtsProvider[] ); } +export function listExtensionHostRuntimeBackendIdsForSubsystem( + subsystemId: ExtensionHostRuntimeBackendSubsystemId, +): readonly string[] { + return listExtensionHostRuntimeBackendCatalogEntries() + .filter((entry) => entry.subsystemId === subsystemId) + .toSorted((left, right) => left.defaultRank - right.defaultRank) + .map((entry) => entry.backendId); +} + +export function listExtensionHostMediaRuntimeBackendIds( + subsystemId: ExtensionHostMediaRuntimeSubsystemId, +): readonly string[] { + return listExtensionHostRuntimeBackendIdsForSubsystem(subsystemId); +} + export function listExtensionHostRuntimeBackendCatalogEntries(): readonly ExtensionHostRuntimeBackendCatalogEntry[] { return [ ...listExtensionHostEmbeddingRuntimeBackendCatalogEntries(),