fix(media-understanding): auto-register image capability for config providers with image input (#51392)

This commit is contained in:
xydt-610 2026-03-21 12:30:05 +08:00
parent 598f1826d8
commit 93540b5af9
2 changed files with 50 additions and 0 deletions

View File

@ -60,4 +60,28 @@ describe("media-understanding provider registry", () => {
expect(provider?.id).toBe("google");
});
it("auto-registers media-understanding for config providers with image-capable models (#51392)", () => {
const cfg = {
models: {
providers: {
glm: {
models: [{ id: "glm-4.6v", input: ["text", "image"] }],
},
textOnly: {
models: [{ id: "text-model", input: ["text"] }],
},
},
},
} as never;
const registry = buildMediaUnderstandingRegistry(undefined, cfg);
const glmProvider = getMediaUnderstandingProvider("glm", registry);
const textOnlyProvider = getMediaUnderstandingProvider("textOnly", registry);
expect(glmProvider?.id).toBe("glm");
expect(glmProvider?.capabilities).toEqual(["image"]);
expect(glmProvider?.describeImage).toBeDefined();
expect(glmProvider?.describeImages).toBeDefined();
expect(textOnlyProvider).toBeUndefined();
});
});

View File

@ -5,6 +5,10 @@ import { getActivePluginRegistry } from "../../plugins/runtime.js";
import type { MediaUnderstandingProvider } from "../types.js";
import { deepgramProvider } from "./deepgram/index.js";
import { groqProvider } from "./groq/index.js";
import {
describeImageWithModel,
describeImagesWithModel,
} from "./image.js";
const PROVIDERS: MediaUnderstandingProvider[] = [groqProvider, deepgramProvider];
@ -48,6 +52,28 @@ export function buildMediaUnderstandingRegistry(
for (const entry of pluginRegistry?.mediaUnderstandingProviders ?? []) {
mergeProviderIntoRegistry(registry, entry.provider);
}
// Auto-register media-understanding for config providers with image-capable models (#51392)
const configProviders = cfg?.models?.providers;
if (configProviders && typeof configProviders === "object") {
for (const [providerKey, providerCfg] of Object.entries(configProviders)) {
if (!providerKey?.trim()) continue;
const normalizedKey = normalizeMediaProviderId(providerKey);
if (registry.has(normalizedKey)) continue;
const models = (providerCfg as { models?: Array<{ input?: string[] }> })?.models ?? [];
const hasImageModel = models.some(
(m) => Array.isArray(m?.input) && m.input.includes("image"),
);
if (hasImageModel) {
const autoProvider: MediaUnderstandingProvider = {
id: normalizedKey,
capabilities: ["image"],
describeImage: describeImageWithModel,
describeImages: describeImagesWithModel,
};
mergeProviderIntoRegistry(registry, autoProvider);
}
}
}
if (overrides) {
for (const [key, provider] of Object.entries(overrides)) {
const normalizedKey = normalizeMediaProviderId(key);