Merge c7738ab9fbf67faaa5877dd1b6ccde767dbe061b into d78e13f545136fcbba1feceecc5e0485a06c33a6
This commit is contained in:
commit
1c49fa8501
@ -114,6 +114,7 @@ describe("model-selection", () => {
|
||||
expect(normalizeProviderId("qwen")).toBe("qwen-portal");
|
||||
expect(normalizeProviderId("kimi-code")).toBe("kimi");
|
||||
expect(normalizeProviderId("kimi-coding")).toBe("kimi");
|
||||
expect(normalizeProviderId("nvidia-api")).toBe("nvidia");
|
||||
expect(normalizeProviderId("bedrock")).toBe("amazon-bedrock");
|
||||
expect(normalizeProviderId("aws-bedrock")).toBe("amazon-bedrock");
|
||||
expect(normalizeProviderId("amazon-bedrock")).toBe("amazon-bedrock");
|
||||
@ -251,6 +252,12 @@ describe("model-selection", () => {
|
||||
defaultProvider: "anthropic",
|
||||
expected: { provider: "openai", model: "gpt-5.3-codex-codex" },
|
||||
},
|
||||
{
|
||||
name: "normalizes nvidia-api provider alias to nvidia and preserves nested model id",
|
||||
variants: ["nvidia-api/meta/llama-3.2-90b-vision-instruct"],
|
||||
defaultProvider: "openai",
|
||||
expected: { provider: "nvidia", model: "meta/llama-3.2-90b-vision-instruct" },
|
||||
},
|
||||
{
|
||||
name: "normalizes gemini 3.1 flash-lite ids for google-vertex",
|
||||
variants: ["google-vertex/gemini-3.1-flash-lite", "gemini-3.1-flash-lite"],
|
||||
|
||||
@ -15,6 +15,9 @@ export function normalizeProviderId(provider: string): string {
|
||||
if (normalized === "kimi" || normalized === "kimi-code" || normalized === "kimi-coding") {
|
||||
return "kimi";
|
||||
}
|
||||
if (normalized === "nvidia-api") {
|
||||
return "nvidia";
|
||||
}
|
||||
if (normalized === "bedrock" || normalized === "aws-bedrock") {
|
||||
return "amazon-bedrock";
|
||||
}
|
||||
|
||||
@ -883,3 +883,119 @@ describe("image tool response validation", () => {
|
||||
expect(text).toBe("hello");
|
||||
});
|
||||
});
|
||||
|
||||
describe("image tool custom provider fallback (#33185)", () => {
|
||||
const pngB64 = ONE_PIXEL_PNG_B64;
|
||||
const priorFetch = global.fetch;
|
||||
registerImageToolEnvReset(priorFetch, [
|
||||
"OPENAI_API_KEY",
|
||||
"ANTHROPIC_API_KEY",
|
||||
"COPILOT_GITHUB_TOKEN",
|
||||
"GH_TOKEN",
|
||||
"GITHUB_TOKEN",
|
||||
]);
|
||||
|
||||
it("falls back to describeImageWithModel for single image when no media-understanding provider is registered", async () => {
|
||||
await withTempAgentDir(async (agentDir) => {
|
||||
await writeAuthProfiles(agentDir, {
|
||||
version: 1,
|
||||
profiles: {
|
||||
"vllm:default": { type: "api_key", provider: "vllm", key: "sk-vllm-test" },
|
||||
},
|
||||
});
|
||||
const cfg: OpenClawConfig = {
|
||||
agents: {
|
||||
defaults: {
|
||||
model: { primary: "vllm/Qwen3.5" },
|
||||
imageModel: { primary: "vllm/Qwen3.5" },
|
||||
},
|
||||
},
|
||||
models: {
|
||||
providers: {
|
||||
vllm: {
|
||||
baseUrl: "http://127.0.0.1:1234/v1",
|
||||
models: [makeModelDefinition("Qwen3.5", ["text", "image"])],
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
const tool = createRequiredImageTool({ config: cfg, agentDir });
|
||||
|
||||
// Mock the fallback function at module level
|
||||
const spy = vi
|
||||
.spyOn(
|
||||
await import("../../media-understanding/providers/image.js"),
|
||||
"describeImageWithModel",
|
||||
)
|
||||
.mockResolvedValue({ text: "custom fallback ok", model: "Qwen3.5" });
|
||||
|
||||
const res = await tool.execute("t1", {
|
||||
prompt: "Describe the image.",
|
||||
image: `data:image/png;base64,${pngB64}`,
|
||||
});
|
||||
|
||||
expect(spy).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
provider: "vllm",
|
||||
model: "Qwen3.5",
|
||||
prompt: "Describe the image.",
|
||||
}),
|
||||
);
|
||||
const text =
|
||||
(res.content?.find((b: { type: string }) => b.type === "text") as { text?: string })
|
||||
?.text ?? "";
|
||||
expect(text).toBe("custom fallback ok");
|
||||
spy.mockRestore();
|
||||
});
|
||||
});
|
||||
|
||||
it("falls back to describeImagesWithModel for multiple images when no media-understanding provider is registered", async () => {
|
||||
await withTempAgentDir(async (agentDir) => {
|
||||
await writeAuthProfiles(agentDir, {
|
||||
version: 1,
|
||||
profiles: {
|
||||
"vllm:default": { type: "api_key", provider: "vllm", key: "sk-vllm-test" },
|
||||
},
|
||||
});
|
||||
const cfg: OpenClawConfig = {
|
||||
agents: {
|
||||
defaults: {
|
||||
model: { primary: "vllm/Qwen3.5" },
|
||||
imageModel: { primary: "vllm/Qwen3.5" },
|
||||
},
|
||||
},
|
||||
models: {
|
||||
providers: {
|
||||
vllm: {
|
||||
baseUrl: "http://127.0.0.1:1234/v1",
|
||||
models: [makeModelDefinition("Qwen3.5", ["text", "image"])],
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
const tool = createRequiredImageTool({ config: cfg, agentDir });
|
||||
|
||||
const spy = vi
|
||||
.spyOn(
|
||||
await import("../../media-understanding/providers/image.js"),
|
||||
"describeImagesWithModel",
|
||||
)
|
||||
.mockResolvedValue({ text: "Image 1:\nfirst\n\nImage 2:\nsecond", model: "Qwen3.5" });
|
||||
|
||||
const _res = await tool.execute("t1", {
|
||||
prompt: "Compare these images.",
|
||||
images: [`data:image/png;base64,${pngB64}`, `data:image/png;base64,${pngB64}`],
|
||||
});
|
||||
|
||||
expect(spy).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
provider: "vllm",
|
||||
model: "Qwen3.5",
|
||||
prompt: "Compare these images.",
|
||||
}),
|
||||
);
|
||||
expect(spy.mock.calls[0][0].images).toHaveLength(2);
|
||||
spy.mockRestore();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@ -1,5 +1,9 @@
|
||||
import { Type } from "@sinclair/typebox";
|
||||
import type { OpenClawConfig } from "../../config/config.js";
|
||||
import {
|
||||
describeImageWithModel,
|
||||
describeImagesWithModel,
|
||||
} from "../../media-understanding/providers/image.js";
|
||||
import { getMediaUnderstandingProvider } from "../../media-understanding/providers/index.js";
|
||||
import { buildProviderRegistry } from "../../media-understanding/runner.js";
|
||||
import { loadWebMedia } from "../../media/web-media.js";
|
||||
@ -146,9 +150,44 @@ async function runImagePrompt(params: {
|
||||
modelOverride: params.modelOverride,
|
||||
run: async (provider, modelId) => {
|
||||
const imageProvider = getMediaUnderstandingProvider(provider, providerRegistry);
|
||||
|
||||
// When no media-understanding provider is registered (custom/self-hosted
|
||||
// providers like vllm, nvidia-api, iflow), fall back to the generic
|
||||
// model-based image description — same pattern as runner.entries.ts.
|
||||
if (!imageProvider) {
|
||||
throw new Error(`No media-understanding provider registered for ${provider}`);
|
||||
if (params.images.length > 1) {
|
||||
const described = await describeImagesWithModel({
|
||||
images: params.images.map((image, index) => ({
|
||||
buffer: image.buffer,
|
||||
fileName: `image-${index + 1}`,
|
||||
mime: image.mimeType,
|
||||
})),
|
||||
provider,
|
||||
model: modelId,
|
||||
prompt: params.prompt,
|
||||
maxTokens: resolveImageToolMaxTokens(undefined),
|
||||
timeoutMs: 30_000,
|
||||
cfg: providerCfg,
|
||||
agentDir: params.agentDir,
|
||||
});
|
||||
return { text: described.text, provider, model: described.model ?? modelId };
|
||||
}
|
||||
const image = params.images[0];
|
||||
const described = await describeImageWithModel({
|
||||
buffer: image.buffer,
|
||||
fileName: "image-1",
|
||||
mime: image.mimeType,
|
||||
provider,
|
||||
model: modelId,
|
||||
prompt: params.prompt,
|
||||
maxTokens: resolveImageToolMaxTokens(undefined),
|
||||
timeoutMs: 30_000,
|
||||
cfg: providerCfg,
|
||||
agentDir: params.agentDir,
|
||||
});
|
||||
return { text: described.text, provider, model: described.model ?? modelId };
|
||||
}
|
||||
|
||||
if (params.images.length > 1 && imageProvider.describeImages) {
|
||||
const described = await imageProvider.describeImages({
|
||||
images: params.images.map((image, index) => ({
|
||||
|
||||
@ -17,6 +17,7 @@ const hoisted = vi.hoisted(() => ({
|
||||
requireApiKeyMock: vi.fn((auth: { apiKey?: string }) => auth.apiKey ?? ""),
|
||||
setRuntimeApiKeyMock: vi.fn(),
|
||||
discoverModelsMock: vi.fn(),
|
||||
resolveModelWithRegistryMock: vi.fn(),
|
||||
}));
|
||||
const {
|
||||
completeMock,
|
||||
@ -27,6 +28,7 @@ const {
|
||||
requireApiKeyMock,
|
||||
setRuntimeApiKeyMock,
|
||||
discoverModelsMock,
|
||||
resolveModelWithRegistryMock,
|
||||
} = hoisted;
|
||||
|
||||
vi.mock("@mariozechner/pi-ai", async (importOriginal) => {
|
||||
@ -62,19 +64,22 @@ vi.mock("../../agents/pi-model-discovery-runtime.js", () => ({
|
||||
discoverModels: discoverModelsMock,
|
||||
}));
|
||||
|
||||
vi.mock("../../agents/pi-embedded-runner/model.js", () => ({
|
||||
resolveModelWithRegistry: resolveModelWithRegistryMock,
|
||||
}));
|
||||
|
||||
const { describeImageWithModel } = await import("./image.js");
|
||||
|
||||
describe("describeImageWithModel", () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
minimaxUnderstandImageMock.mockResolvedValue("portal ok");
|
||||
discoverModelsMock.mockReturnValue({
|
||||
find: vi.fn(() => ({
|
||||
provider: "minimax-portal",
|
||||
id: "MiniMax-VL-01",
|
||||
input: ["text", "image"],
|
||||
baseUrl: "https://api.minimax.io/anthropic",
|
||||
})),
|
||||
discoverModelsMock.mockReturnValue({ find: vi.fn(() => null) });
|
||||
resolveModelWithRegistryMock.mockReturnValue({
|
||||
provider: "minimax-portal",
|
||||
id: "MiniMax-VL-01",
|
||||
input: ["text", "image"],
|
||||
baseUrl: "https://api.minimax.io/anthropic",
|
||||
});
|
||||
});
|
||||
|
||||
@ -109,13 +114,11 @@ describe("describeImageWithModel", () => {
|
||||
});
|
||||
|
||||
it("uses generic completion for non-canonical minimax-portal image models", async () => {
|
||||
discoverModelsMock.mockReturnValue({
|
||||
find: vi.fn(() => ({
|
||||
provider: "minimax-portal",
|
||||
id: "custom-vision",
|
||||
input: ["text", "image"],
|
||||
baseUrl: "https://api.minimax.io/anthropic",
|
||||
})),
|
||||
resolveModelWithRegistryMock.mockReturnValue({
|
||||
provider: "minimax-portal",
|
||||
id: "custom-vision",
|
||||
input: ["text", "image"],
|
||||
baseUrl: "https://api.minimax.io/anthropic",
|
||||
});
|
||||
completeMock.mockResolvedValue({
|
||||
role: "assistant",
|
||||
@ -148,17 +151,12 @@ describe("describeImageWithModel", () => {
|
||||
});
|
||||
|
||||
it("normalizes deprecated google flash ids before lookup and keeps profile auth selection", async () => {
|
||||
const findMock = vi.fn((provider: string, modelId: string) => {
|
||||
expect(provider).toBe("google");
|
||||
expect(modelId).toBe("gemini-3-flash-preview");
|
||||
return {
|
||||
provider: "google",
|
||||
id: "gemini-3-flash-preview",
|
||||
input: ["text", "image"],
|
||||
baseUrl: "https://generativelanguage.googleapis.com/v1beta",
|
||||
};
|
||||
resolveModelWithRegistryMock.mockReturnValue({
|
||||
provider: "google",
|
||||
id: "gemini-3-flash-preview",
|
||||
input: ["text", "image"],
|
||||
baseUrl: "https://generativelanguage.googleapis.com/v1beta",
|
||||
});
|
||||
discoverModelsMock.mockReturnValue({ find: findMock });
|
||||
completeMock.mockResolvedValue({
|
||||
role: "assistant",
|
||||
api: "google-generative-ai",
|
||||
@ -186,7 +184,9 @@ describe("describeImageWithModel", () => {
|
||||
text: "flash ok",
|
||||
model: "gemini-3-flash-preview",
|
||||
});
|
||||
expect(findMock).toHaveBeenCalledOnce();
|
||||
expect(resolveModelWithRegistryMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ provider: "google", modelId: "gemini-3-flash-preview" }),
|
||||
);
|
||||
expect(getApiKeyForModelMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
profileId: "google:default",
|
||||
@ -196,17 +196,12 @@ describe("describeImageWithModel", () => {
|
||||
});
|
||||
|
||||
it("normalizes gemini 3.1 flash-lite ids before lookup and keeps profile auth selection", async () => {
|
||||
const findMock = vi.fn((provider: string, modelId: string) => {
|
||||
expect(provider).toBe("google");
|
||||
expect(modelId).toBe("gemini-3.1-flash-lite-preview");
|
||||
return {
|
||||
provider: "google",
|
||||
id: "gemini-3.1-flash-lite-preview",
|
||||
input: ["text", "image"],
|
||||
baseUrl: "https://generativelanguage.googleapis.com/v1beta",
|
||||
};
|
||||
resolveModelWithRegistryMock.mockReturnValue({
|
||||
provider: "google",
|
||||
id: "gemini-3.1-flash-lite-preview",
|
||||
input: ["text", "image"],
|
||||
baseUrl: "https://generativelanguage.googleapis.com/v1beta",
|
||||
});
|
||||
discoverModelsMock.mockReturnValue({ find: findMock });
|
||||
completeMock.mockResolvedValue({
|
||||
role: "assistant",
|
||||
api: "google-generative-ai",
|
||||
@ -234,7 +229,9 @@ describe("describeImageWithModel", () => {
|
||||
text: "flash lite ok",
|
||||
model: "gemini-3.1-flash-lite-preview",
|
||||
});
|
||||
expect(findMock).toHaveBeenCalledOnce();
|
||||
expect(resolveModelWithRegistryMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ provider: "google", modelId: "gemini-3.1-flash-lite-preview" }),
|
||||
);
|
||||
expect(getApiKeyForModelMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
profileId: "google:default",
|
||||
@ -242,4 +239,164 @@ describe("describeImageWithModel", () => {
|
||||
);
|
||||
expect(setRuntimeApiKeyMock).toHaveBeenCalledWith("google", "oauth-test");
|
||||
});
|
||||
|
||||
it("resolves custom provider image models via config fallback when not in registry (#33185)", async () => {
|
||||
// Simulate resolveModelWithRegistry returning an ad-hoc model with input: ["text"]
|
||||
// (the default when model ID matching fails due to provider-prefixed IDs).
|
||||
resolveModelWithRegistryMock.mockReturnValue({
|
||||
provider: "vllm",
|
||||
id: "Qwen3.5",
|
||||
api: "openai-completions",
|
||||
baseUrl: "http://127.0.0.1:1234/v1",
|
||||
input: ["text"],
|
||||
contextWindow: 128000,
|
||||
maxTokens: 8192,
|
||||
});
|
||||
completeMock.mockResolvedValue({
|
||||
role: "assistant",
|
||||
api: "openai-completions",
|
||||
provider: "vllm",
|
||||
model: "Qwen3.5",
|
||||
stopReason: "stop",
|
||||
timestamp: Date.now(),
|
||||
content: [{ type: "text", text: "custom vision ok" }],
|
||||
});
|
||||
|
||||
const cfg = {
|
||||
models: {
|
||||
providers: {
|
||||
vllm: {
|
||||
baseUrl: "http://127.0.0.1:1234/v1",
|
||||
apiKey: "vllm-local", // pragma: allowlist secret
|
||||
api: "openai-completions" as const,
|
||||
models: [
|
||||
{
|
||||
id: "vllm/Qwen3.5",
|
||||
name: "Qwen3.5",
|
||||
reasoning: false,
|
||||
input: ["image", "text"] as Array<"text" | "image">,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 128000,
|
||||
maxTokens: 8192,
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const result = await describeImageWithModel({
|
||||
cfg,
|
||||
agentDir: "/tmp/openclaw-agent",
|
||||
provider: "vllm",
|
||||
model: "Qwen3.5",
|
||||
buffer: Buffer.from("png-bytes"),
|
||||
fileName: "image.png",
|
||||
mime: "image/png",
|
||||
prompt: "Describe the image.",
|
||||
timeoutMs: 1000,
|
||||
});
|
||||
|
||||
expect(result).toEqual({
|
||||
text: "custom vision ok",
|
||||
model: "Qwen3.5",
|
||||
});
|
||||
expect(resolveModelWithRegistryMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
provider: "vllm",
|
||||
modelId: "Qwen3.5",
|
||||
}),
|
||||
);
|
||||
expect(completeMock).toHaveBeenCalledOnce();
|
||||
});
|
||||
|
||||
it("prefers exact provider alias over normalized lookup for config fallback (#33185)", async () => {
|
||||
// When provider is "nvidia-api", resolvedRef.provider normalizes to "nvidia".
|
||||
// If the config contains both "nvidia" and "nvidia-api" entries, the exact
|
||||
// params.provider key must be used so the nvidia-api/<model> definition is
|
||||
// found rather than falling into the "nvidia" block.
|
||||
resolveModelWithRegistryMock.mockReturnValue({
|
||||
provider: "nvidia",
|
||||
id: "meta-llama",
|
||||
api: "openai-completions",
|
||||
baseUrl: "https://integrate.api.nvidia.com/v1",
|
||||
input: ["text"],
|
||||
contextWindow: 128000,
|
||||
maxTokens: 4096,
|
||||
});
|
||||
completeMock.mockResolvedValue({
|
||||
role: "assistant",
|
||||
api: "openai-completions",
|
||||
provider: "nvidia",
|
||||
model: "meta-llama",
|
||||
stopReason: "stop",
|
||||
timestamp: Date.now(),
|
||||
content: [{ type: "text", text: "nvidia vision ok" }],
|
||||
});
|
||||
|
||||
const cfg = {
|
||||
models: {
|
||||
providers: {
|
||||
nvidia: {
|
||||
baseUrl: "https://integrate.api.nvidia.com/v1",
|
||||
apiKey: "nvidia-key", // pragma: allowlist secret
|
||||
api: "openai-completions" as const,
|
||||
models: [],
|
||||
},
|
||||
"nvidia-api": {
|
||||
baseUrl: "https://integrate.api.nvidia.com/v1",
|
||||
apiKey: "nvidia-key", // pragma: allowlist secret
|
||||
api: "openai-completions" as const,
|
||||
models: [
|
||||
{
|
||||
id: "nvidia-api/meta-llama",
|
||||
name: "meta-llama",
|
||||
reasoning: false,
|
||||
input: ["image", "text"] as Array<"text" | "image">,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 128000,
|
||||
maxTokens: 4096,
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const result = await describeImageWithModel({
|
||||
cfg,
|
||||
agentDir: "/tmp/openclaw-agent",
|
||||
provider: "nvidia-api",
|
||||
model: "meta-llama",
|
||||
buffer: Buffer.from("png-bytes"),
|
||||
fileName: "image.png",
|
||||
mime: "image/png",
|
||||
prompt: "Describe the image.",
|
||||
timeoutMs: 1000,
|
||||
});
|
||||
|
||||
expect(result).toEqual({
|
||||
text: "nvidia vision ok",
|
||||
model: "meta-llama",
|
||||
});
|
||||
expect(completeMock).toHaveBeenCalledOnce();
|
||||
});
|
||||
|
||||
it("throws Unknown model when custom provider model is not resolvable at all (#33185)", async () => {
|
||||
resolveModelWithRegistryMock.mockReturnValue(undefined);
|
||||
|
||||
await expect(
|
||||
describeImageWithModel({
|
||||
cfg: {},
|
||||
agentDir: "/tmp/openclaw-agent",
|
||||
provider: "nonexistent",
|
||||
model: "fake-model",
|
||||
buffer: Buffer.from("png-bytes"),
|
||||
fileName: "image.png",
|
||||
mime: "image/png",
|
||||
prompt: "Describe the image.",
|
||||
timeoutMs: 1000,
|
||||
}),
|
||||
).rejects.toThrow("Unknown model: nonexistent/fake-model");
|
||||
});
|
||||
});
|
||||
|
||||
@ -6,8 +6,9 @@ import {
|
||||
requireApiKey,
|
||||
resolveApiKeyForProvider,
|
||||
} from "../../agents/model-auth.js";
|
||||
import { normalizeModelRef } from "../../agents/model-selection.js";
|
||||
import { findNormalizedProviderValue, normalizeModelRef } from "../../agents/model-selection.js";
|
||||
import { ensureOpenClawModelsJson } from "../../agents/models-config.js";
|
||||
import { resolveModelWithRegistry } from "../../agents/pi-embedded-runner/model.js";
|
||||
import { coerceImageAssistantText } from "../../agents/tools/image-tool.helpers.js";
|
||||
import type {
|
||||
ImageDescriptionRequest,
|
||||
@ -49,10 +50,51 @@ async function resolveImageRuntime(params: {
|
||||
const authStorage = discoverAuthStorage(params.agentDir);
|
||||
const modelRegistry = discoverModels(authStorage, params.agentDir);
|
||||
const resolvedRef = normalizeModelRef(params.provider, params.model);
|
||||
const model = modelRegistry.find(resolvedRef.provider, resolvedRef.model) as Model<Api> | null;
|
||||
|
||||
// Use the full model resolution stack (registry → inline config → plugin →
|
||||
// ad-hoc provider config) instead of bare modelRegistry.find(), which misses
|
||||
// user-configured custom provider models (e.g. vllm, nvidia-api, iflow).
|
||||
let model: Model<Api> | null =
|
||||
resolveModelWithRegistry({
|
||||
provider: resolvedRef.provider,
|
||||
modelId: resolvedRef.model,
|
||||
modelRegistry,
|
||||
cfg: params.cfg,
|
||||
agentDir: params.agentDir,
|
||||
}) ?? null;
|
||||
|
||||
if (!model) {
|
||||
throw new Error(`Unknown model: ${resolvedRef.provider}/${resolvedRef.model}`);
|
||||
}
|
||||
|
||||
// When the model was resolved via the ad-hoc provider config fallback, the
|
||||
// input field defaults to ["text"] because the config model lookup uses exact
|
||||
// ID matching which can miss provider-prefixed IDs (e.g. "vllm/Qwen3.5" in
|
||||
// config vs "Qwen3.5" after model ref parsing). Check the user's configured
|
||||
// model definition for explicit image support so the tool works correctly.
|
||||
// We prefer the exact params.provider key first so that configs containing
|
||||
// both an alias (e.g. "nvidia-api") and the canonical name ("nvidia") resolve
|
||||
// to the correct block — findNormalizedProviderValue would pick whichever
|
||||
// entry normalizes first, which may be the wrong one.
|
||||
if (!model.input?.includes("image")) {
|
||||
const providers = params.cfg?.models?.providers;
|
||||
const providerConfig =
|
||||
providers?.[params.provider] ?? findNormalizedProviderValue(providers, resolvedRef.provider);
|
||||
const configuredModel = providerConfig?.models?.find(
|
||||
(m) =>
|
||||
m.id === resolvedRef.model ||
|
||||
m.id === `${params.provider}/${resolvedRef.model}` ||
|
||||
m.id === `${resolvedRef.provider}/${resolvedRef.model}`,
|
||||
);
|
||||
if (configuredModel?.input?.includes("image")) {
|
||||
model = {
|
||||
...model,
|
||||
input: configuredModel.input,
|
||||
...(configuredModel.api ? { api: configuredModel.api } : {}),
|
||||
} as Model<Api>;
|
||||
}
|
||||
}
|
||||
|
||||
if (!model.input?.includes("image")) {
|
||||
throw new Error(`Model does not support images: ${params.provider}/${params.model}`);
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user