From 86231b5dbf718c2dff60e8baba6a623bb004a3a8 Mon Sep 17 00:00:00 2001 From: JaiminBhojani Date: Fri, 20 Mar 2026 23:00:19 +0530 Subject: [PATCH] =?UTF-8?q?fix:=20address=20PR=20review=20feedback=20?= =?UTF-8?q?=E2=80=94=20test=20coverage,=20formatting,=20and=20config=20fal?= =?UTF-8?q?lback?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix oxfmt formatting issue in image.ts (CI check failure) - Add toHaveBeenCalledWith assertions to Google model normalization tests to verify normalized model IDs are passed to resolveModelWithRegistry - Copy api field from configured model in config fallback, not just input, so custom providers with explicit api configs route through correct adapter - Add dedicated tests for !imageProvider fallback in image-tool.test.ts covering both single-image and multi-image branches - Fix type errors in test configs (missing reasoning/cost fields) Co-Authored-By: Claude Opus 4.6 --- src/agents/tools/image-tool.test.ts | 116 ++++++++++++++++++ .../providers/image.test.ts | 12 +- src/media-understanding/providers/image.ts | 10 +- 3 files changed, 131 insertions(+), 7 deletions(-) diff --git a/src/agents/tools/image-tool.test.ts b/src/agents/tools/image-tool.test.ts index c48a705dc01..b2dae2dd14e 100644 --- a/src/agents/tools/image-tool.test.ts +++ b/src/agents/tools/image-tool.test.ts @@ -883,3 +883,119 @@ describe("image tool response validation", () => { expect(text).toBe("hello"); }); }); + +describe("image tool custom provider fallback (#33185)", () => { + const pngB64 = ONE_PIXEL_PNG_B64; + const priorFetch = global.fetch; + registerImageToolEnvReset(priorFetch, [ + "OPENAI_API_KEY", + "ANTHROPIC_API_KEY", + "COPILOT_GITHUB_TOKEN", + "GH_TOKEN", + "GITHUB_TOKEN", + ]); + + it("falls back to describeImageWithModel for single image when no media-understanding provider is registered", async () => { + await withTempAgentDir(async (agentDir) => { + await writeAuthProfiles(agentDir, { + version: 1, + profiles: { + "vllm:default": { type: "api_key", provider: "vllm", key: "sk-vllm-test" }, + }, + }); + const cfg: OpenClawConfig = { + agents: { + defaults: { + model: { primary: "vllm/Qwen3.5" }, + imageModel: { primary: "vllm/Qwen3.5" }, + }, + }, + models: { + providers: { + vllm: { + baseUrl: "http://127.0.0.1:1234/v1", + models: [makeModelDefinition("Qwen3.5", ["text", "image"])], + }, + }, + }, + }; + const tool = createRequiredImageTool({ config: cfg, agentDir }); + + // Mock the fallback function at module level + const spy = vi + .spyOn( + await import("../../media-understanding/providers/image.js"), + "describeImageWithModel", + ) + .mockResolvedValue({ text: "custom fallback ok", model: "Qwen3.5" }); + + const res = await tool.execute("t1", { + prompt: "Describe the image.", + image: `data:image/png;base64,${pngB64}`, + }); + + expect(spy).toHaveBeenCalledWith( + expect.objectContaining({ + provider: "vllm", + model: "Qwen3.5", + prompt: "Describe the image.", + }), + ); + const text = + (res.content?.find((b: { type: string }) => b.type === "text") as { text?: string }) + ?.text ?? ""; + expect(text).toBe("custom fallback ok"); + spy.mockRestore(); + }); + }); + + it("falls back to describeImagesWithModel for multiple images when no media-understanding provider is registered", async () => { + await withTempAgentDir(async (agentDir) => { + await writeAuthProfiles(agentDir, { + version: 1, + profiles: { + "vllm:default": { type: "api_key", provider: "vllm", key: "sk-vllm-test" }, + }, + }); + const cfg: OpenClawConfig = { + agents: { + defaults: { + model: { primary: "vllm/Qwen3.5" }, + imageModel: { primary: "vllm/Qwen3.5" }, + }, + }, + models: { + providers: { + vllm: { + baseUrl: "http://127.0.0.1:1234/v1", + models: [makeModelDefinition("Qwen3.5", ["text", "image"])], + }, + }, + }, + }; + const tool = createRequiredImageTool({ config: cfg, agentDir }); + + const spy = vi + .spyOn( + await import("../../media-understanding/providers/image.js"), + "describeImagesWithModel", + ) + .mockResolvedValue({ text: "Image 1:\nfirst\n\nImage 2:\nsecond", model: "Qwen3.5" }); + + const res = await tool.execute("t1", { + prompt: "Compare these images.", + images: [`data:image/png;base64,${pngB64}`, `data:image/png;base64,${pngB64}`], + }); + + expect(spy).toHaveBeenCalledWith( + expect.objectContaining({ + provider: "vllm", + model: "Qwen3.5", + prompt: "Compare these images.", + }), + ); + expect(spy.mock.calls[0][0].images).toHaveLength(2); + spy.mockRestore(); + }); + }); +}); diff --git a/src/media-understanding/providers/image.test.ts b/src/media-understanding/providers/image.test.ts index 96bd467ce84..d78361f23bd 100644 --- a/src/media-understanding/providers/image.test.ts +++ b/src/media-understanding/providers/image.test.ts @@ -168,7 +168,9 @@ describe("describeImageWithModel", () => { text: "flash ok", model: "gemini-3-flash-preview", }); - expect(resolveModelWithRegistryMock).toHaveBeenCalledOnce(); + expect(resolveModelWithRegistryMock).toHaveBeenCalledWith( + expect.objectContaining({ provider: "google", modelId: "gemini-3-flash-preview" }), + ); expect(getApiKeyForModelMock).toHaveBeenCalledWith( expect.objectContaining({ profileId: "google:default", @@ -211,7 +213,9 @@ describe("describeImageWithModel", () => { text: "flash lite ok", model: "gemini-3.1-flash-lite-preview", }); - expect(resolveModelWithRegistryMock).toHaveBeenCalledOnce(); + expect(resolveModelWithRegistryMock).toHaveBeenCalledWith( + expect.objectContaining({ provider: "google", modelId: "gemini-3.1-flash-lite-preview" }), + ); expect(getApiKeyForModelMock).toHaveBeenCalledWith( expect.objectContaining({ profileId: "google:default", @@ -253,7 +257,9 @@ describe("describeImageWithModel", () => { { id: "vllm/Qwen3.5", name: "Qwen3.5", - input: ["image", "text"] as string[], + reasoning: false, + input: ["image", "text"] as Array<"text" | "image">, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, contextWindow: 128000, maxTokens: 8192, }, diff --git a/src/media-understanding/providers/image.ts b/src/media-understanding/providers/image.ts index 85fb754fdaf..630b9644515 100644 --- a/src/media-understanding/providers/image.ts +++ b/src/media-understanding/providers/image.ts @@ -77,12 +77,14 @@ async function resolveImageRuntime(params: { resolvedRef.provider, ); const configuredModel = providerConfig?.models?.find( - (m) => - m.id === resolvedRef.model || - m.id === `${resolvedRef.provider}/${resolvedRef.model}`, + (m) => m.id === resolvedRef.model || m.id === `${resolvedRef.provider}/${resolvedRef.model}`, ); if (configuredModel?.input?.includes("image")) { - model = { ...model, input: configuredModel.input } as Model; + model = { + ...model, + input: configuredModel.input, + ...(configuredModel.api ? { api: configuredModel.api } : {}), + } as Model; } }