From 0a59973284143d8aacd197641e67ca225169dcab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BF=BB=E5=BD=B9?= Date: Fri, 13 Mar 2026 13:30:16 +0800 Subject: [PATCH 1/8] fix(image): respect imageModel config for OpenRouter vision models Fix OpenRouter models being hardcoded with `input: ["text"]` which caused image tool validation to fail even when `imageModel.primary` was correctly configured to an OpenRouter vision model. Changes: - Check provider config for model input capabilities before falling back - Add heuristic detection of vision models by model ID patterns - Propagate configured model metadata (reasoning, cost, context window) Fixes #44648 --- src/agents/pi-embedded-runner/model.test.ts | 126 ++++++++++++++++++-- src/agents/pi-embedded-runner/model.ts | 76 ++++++++++++ src/agents/tools/image-tool.test.ts | 67 +++++++++++ 3 files changed, 262 insertions(+), 7 deletions(-) diff --git a/src/agents/pi-embedded-runner/model.test.ts b/src/agents/pi-embedded-runner/model.test.ts index 47da838cc6a..c4b4ae8d841 100644 --- a/src/agents/pi-embedded-runner/model.test.ts +++ b/src/agents/pi-embedded-runner/model.test.ts @@ -458,16 +458,24 @@ describe("resolveModel", () => { }); }); - it("falls back to text-only when OpenRouter API cache is empty", () => { + it("falls back to heuristic vision detection when OpenRouter API cache is empty", () => { mockGetOpenRouterModelCapabilities.mockReturnValue(undefined); - const result = resolveModel("openrouter", "openrouter/healer-alpha", "/tmp/agent"); - - expect(result.error).toBeUndefined(); - expect(result.model).toMatchObject({ + // Vision model detected by heuristic + const visionResult = resolveModel("openrouter", "openai/gpt-4o", "/tmp/agent"); + expect(visionResult.error).toBeUndefined(); + expect(visionResult.model).toMatchObject({ provider: "openrouter", - id: "openrouter/healer-alpha", - reasoning: false, + id: "openai/gpt-4o", + input: ["text", "image"], + }); + + // Non-vision model defaults to text-only + const textResult = resolveModel("openrouter", "deepseek/deepseek-chat", "/tmp/agent"); + expect(textResult.error).toBeUndefined(); + expect(textResult.model).toMatchObject({ + provider: "openrouter", + id: "deepseek/deepseek-chat", input: ["text"], }); }); @@ -535,6 +543,110 @@ describe("resolveModel", () => { }); }); + it("resolves OpenRouter vision models with image input based on configured model", () => { + const cfg = { + models: { + providers: { + openrouter: { + baseUrl: "https://openrouter.ai/api/v1", + api: "openai-completions", + models: [ + { + ...makeModel("anthropic/claude-opus-4-6"), + input: ["text", "image"], + }, + ], + }, + }, + }, + } as OpenClawConfig; + + const result = resolveModel("openrouter", "anthropic/claude-opus-4-6", "/tmp/agent", cfg); + + expect(result.error).toBeUndefined(); + expect(result.model).toMatchObject({ + provider: "openrouter", + id: "anthropic/claude-opus-4-6", + input: ["text", "image"], + }); + }); + + it("resolves OpenRouter vision models by model ID pattern heuristic", () => { + // Without explicit config, vision models are detected by ID pattern + mockGetOpenRouterModelCapabilities.mockReturnValue(undefined); + const visionModelIds = [ + "openai/gpt-4o", + "openai/gpt-4-turbo", + "anthropic/claude-3-opus", + "anthropic/claude-sonnet-4-5", + "google/gemini-1.5-pro", + "google/gemini-flash-2.0", + "qwen/qwen2-vl-72b", + "mistralai/pixtral-large", + "meta-llama/llama-3.2-90b-vision", + ]; + + for (const modelId of visionModelIds) { + const result = resolveModel("openrouter", modelId, "/tmp/agent"); + + expect(result.error).toBeUndefined(); + expect(result.model?.input).toContain("image"); + expect(result.model?.input).toContain("text"); + } + }); + + it("resolves OpenRouter text-only models without image input", () => { + // Models without vision patterns default to text-only + mockGetOpenRouterModelCapabilities.mockReturnValue(undefined); + const textOnlyModelIds = [ + "deepseek/deepseek-chat", + "meta-llama/llama-3.1-70b-instruct", + "mistralai/mistral-large", + ]; + + for (const modelId of textOnlyModelIds) { + const result = resolveModel("openrouter", modelId, "/tmp/agent"); + + expect(result.error).toBeUndefined(); + expect(result.model?.input).toEqual(["text"]); + } + }); + + it("uses explicitly configured imageModel.primary with OpenRouter (#44648)", () => { + // Regression test: when imageModel.primary is configured to an OpenRouter vision model, + // the model should resolve with image input capability + const cfg = { + agents: { + defaults: { + imageModel: { primary: "openrouter/openai/gpt-4o" }, + }, + }, + models: { + providers: { + openrouter: { + baseUrl: "https://openrouter.ai/api/v1", + api: "openai-completions", + models: [ + { + ...makeModel("openai/gpt-4o"), + input: ["text", "image"], + }, + ], + }, + }, + }, + } as unknown as OpenClawConfig; + + const result = resolveModel("openrouter", "openai/gpt-4o", "/tmp/agent", cfg); + + expect(result.error).toBeUndefined(); + expect(result.model).toMatchObject({ + provider: "openrouter", + id: "openai/gpt-4o", + input: ["text", "image"], + }); + }); + it("prefers configured provider api metadata over discovered registry model", () => { mockDiscoveredModel({ provider: "onehub", diff --git a/src/agents/pi-embedded-runner/model.ts b/src/agents/pi-embedded-runner/model.ts index 5bf97a683d0..d8cee25dfba 100644 --- a/src/agents/pi-embedded-runner/model.ts +++ b/src/agents/pi-embedded-runner/model.ts @@ -33,6 +33,47 @@ type InlineProviderConfig = { headers?: unknown; }; +/** + * Heuristic to detect vision-capable models based on their model ID. + * OpenRouter models often follow naming patterns that indicate vision support. + */ +function isLikelyVisionModel(modelId: string): boolean { + const lower = modelId.toLowerCase(); + // Common vision model patterns: + // - Models with "vision", "vl", or "visual" in the name + // - Claude 3+ models (claude-3-*, claude-opus-4-*, claude-sonnet-4-*) except claude-2 + // - GPT-4 vision variants (gpt-4-vision, gpt-4o, gpt-4-turbo) + // - Gemini models (gemini-1.5-*, gemini-2-*, gemini-pro-vision) + // - Llama vision models (llava, llama-3.2-*-vision) + // - Qwen vision models (qwen-vl, qwen2-vl) + // - Pixtral models + const visionPatterns = [ + /vision/, + /\bvl\b/, // "vl" as a separate segment (e.g., qwen-vl, MiniMax-VL-01) + /-vl-/, // "vl" as a segment (e.g., qwen2-vl-72b) + /visual/, + /claude-3/, + /claude-opus-4/, + /claude-sonnet-4/, + /claude-haiku-4/, + /gpt-4o/, + /gpt-4-turbo/, + /gpt-4-vision/, + /gpt-5/, + /gemini-1\.5/, + /gemini-2/, + /gemini-pro-vision/, + /gemini-flash/, + /llava/, + /llama-3\.2.*vision/, + /pixtral/, + /qwen-vl/, + /qwen2-vl/, + /qwen2\.5-vl/, + ]; + return visionPatterns.some((pattern) => pattern.test(lower)); +} + function sanitizeModelHeaders( headers: unknown, opts?: { stripSecretRefMarkers?: boolean }, @@ -248,6 +289,7 @@ export function resolveModelWithRegistry(params: { } const { provider, modelId, cfg, modelRegistry, agentDir } = params; + const normalizedProvider = normalizeProviderId(provider); const providerConfig = resolveConfiguredProviderConfig(cfg, provider); const pluginDynamicModel = runProviderDynamicModel({ provider, @@ -270,6 +312,40 @@ export function resolveModelWithRegistry(params: { }); } + // OpenRouter is a pass-through proxy - any model ID available on OpenRouter + // should work without being pre-registered in the local catalog. + // This fallback uses heuristics when the plugin-based capability lookup returns nothing. + if (normalizedProvider === "openrouter") { + // Check if the provider config specifies input capabilities for this model + const configuredModel = providerConfig?.models?.find((candidate) => candidate.id === modelId); + const configuredInput = configuredModel?.input; + // Use configured input if available, otherwise detect vision models by ID pattern + const resolvedInput: Array<"text" | "image"> = + Array.isArray(configuredInput) && configuredInput.length > 0 + ? configuredInput.filter((item): item is "text" | "image" => item === "text" || item === "image") + : isLikelyVisionModel(modelId) + ? ["text", "image"] + : ["text"]; + return normalizeResolvedModel({ + provider, + cfg, + agentDir, + model: { + id: modelId, + name: modelId, + api: "openai-completions", + provider, + baseUrl: "https://openrouter.ai/api/v1", + reasoning: configuredModel?.reasoning ?? false, + input: resolvedInput, + cost: configuredModel?.cost ?? { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: configuredModel?.contextWindow ?? DEFAULT_CONTEXT_TOKENS, + // Align with OPENROUTER_DEFAULT_MAX_TOKENS in models-config.providers.ts + maxTokens: configuredModel?.maxTokens ?? 8192, + } as Model, + }); + } + const configuredModel = providerConfig?.models?.find((candidate) => candidate.id === modelId); const providerHeaders = sanitizeModelHeaders(providerConfig?.headers, { stripSecretRefMarkers: true, diff --git a/src/agents/tools/image-tool.test.ts b/src/agents/tools/image-tool.test.ts index bcec7f32de7..b855081d3cd 100644 --- a/src/agents/tools/image-tool.test.ts +++ b/src/agents/tools/image-tool.test.ts @@ -465,6 +465,73 @@ describe("image tool implicit imageModel config", () => { }); }); + it("uses explicitly configured openrouter imageModel.primary with image input (#44648)", async () => { + await withTempAgentDir(async (agentDir) => { + vi.stubEnv("OPENROUTER_API_KEY", "openrouter-test"); + const fetch = stubOpenAiCompletionsOkFetch("ok openrouter"); + const cfg: OpenClawConfig = { + agents: { + defaults: { + model: { primary: "openrouter/deepseek/deepseek-chat" }, + imageModel: { primary: "openrouter/openai/gpt-4o" }, + }, + }, + models: { + providers: { + openrouter: { + api: "openai-completions", + baseUrl: "https://openrouter.ai/api/v1", + models: [ + makeModelDefinition("openai/gpt-4o", ["text", "image"]), + makeModelDefinition("deepseek/deepseek-chat", ["text"]), + ], + }, + }, + }, + }; + + const tool = requireImageTool(createImageTool({ config: cfg, agentDir })); + const result = await tool.execute("t1", { + prompt: "Describe this image.", + image: `data:image/png;base64,${ONE_PIXEL_PNG_B64}`, + }); + + expect(fetch).toHaveBeenCalledTimes(1); + const [url] = fetch.mock.calls[0] as [unknown]; + expect(String(url)).toBe("https://openrouter.ai/api/v1/chat/completions"); + expect(result.content).toEqual( + expect.arrayContaining([expect.objectContaining({ type: "text", text: "ok openrouter" })]), + ); + }); + }); + + it("resolves OpenRouter vision models by ID pattern heuristic (#44648)", async () => { + await withTempAgentDir(async (agentDir) => { + vi.stubEnv("OPENROUTER_API_KEY", "openrouter-test"); + const fetch = stubOpenAiCompletionsOkFetch("ok vision"); + // No explicit model config - relies on ID pattern detection + const cfg: OpenClawConfig = { + agents: { + defaults: { + model: { primary: "openrouter/some-text-model" }, + imageModel: { primary: "openrouter/anthropic/claude-3-opus" }, + }, + }, + }; + + const tool = requireImageTool(createImageTool({ config: cfg, agentDir })); + const result = await tool.execute("t1", { + prompt: "Describe this image.", + image: `data:image/png;base64,${ONE_PIXEL_PNG_B64}`, + }); + + expect(fetch).toHaveBeenCalledTimes(1); + expect(result.content).toEqual( + expect.arrayContaining([expect.objectContaining({ type: "text", text: "ok vision" })]), + ); + }); + }); + it("exposes an Anthropic-safe image schema without union keywords", async () => { await withMinimaxImageToolFromTempAgentDir(async (tool) => { const violations = findSchemaUnionKeywords(tool.parameters, "image.parameters"); From 7f3c2a50e6d2943486c39e00363a8af46b73dac0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BF=BB=E5=BD=B9?= Date: Fri, 13 Mar 2026 14:51:25 +0800 Subject: [PATCH 2/8] fix(image): address review feedback - remove redundant regex and dead code --- src/agents/pi-embedded-runner/model.ts | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/src/agents/pi-embedded-runner/model.ts b/src/agents/pi-embedded-runner/model.ts index d8cee25dfba..58e08c9160b 100644 --- a/src/agents/pi-embedded-runner/model.ts +++ b/src/agents/pi-embedded-runner/model.ts @@ -49,8 +49,7 @@ function isLikelyVisionModel(modelId: string): boolean { // - Pixtral models const visionPatterns = [ /vision/, - /\bvl\b/, // "vl" as a separate segment (e.g., qwen-vl, MiniMax-VL-01) - /-vl-/, // "vl" as a segment (e.g., qwen2-vl-72b) + /\bvl\b/, // "vl" as a separate segment (e.g., qwen-vl, qwen2-vl-72b, MiniMax-VL-01) /visual/, /claude-3/, /claude-opus-4/, @@ -315,17 +314,9 @@ export function resolveModelWithRegistry(params: { // OpenRouter is a pass-through proxy - any model ID available on OpenRouter // should work without being pre-registered in the local catalog. // This fallback uses heuristics when the plugin-based capability lookup returns nothing. + // Note: configured models with provider-level `api` return early via inlineMatch, + // so we rely on heuristic detection for vision support here. if (normalizedProvider === "openrouter") { - // Check if the provider config specifies input capabilities for this model - const configuredModel = providerConfig?.models?.find((candidate) => candidate.id === modelId); - const configuredInput = configuredModel?.input; - // Use configured input if available, otherwise detect vision models by ID pattern - const resolvedInput: Array<"text" | "image"> = - Array.isArray(configuredInput) && configuredInput.length > 0 - ? configuredInput.filter((item): item is "text" | "image" => item === "text" || item === "image") - : isLikelyVisionModel(modelId) - ? ["text", "image"] - : ["text"]; return normalizeResolvedModel({ provider, cfg, @@ -336,12 +327,12 @@ export function resolveModelWithRegistry(params: { api: "openai-completions", provider, baseUrl: "https://openrouter.ai/api/v1", - reasoning: configuredModel?.reasoning ?? false, - input: resolvedInput, - cost: configuredModel?.cost ?? { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, - contextWindow: configuredModel?.contextWindow ?? DEFAULT_CONTEXT_TOKENS, + reasoning: false, + input: isLikelyVisionModel(modelId) ? ["text", "image"] : ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: DEFAULT_CONTEXT_TOKENS, // Align with OPENROUTER_DEFAULT_MAX_TOKENS in models-config.providers.ts - maxTokens: configuredModel?.maxTokens ?? 8192, + maxTokens: 8192, } as Model, }); } From 8b135f3b45e66703064c98df923d675aa400172f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BF=BB=E5=BD=B9?= Date: Fri, 13 Mar 2026 15:01:50 +0800 Subject: [PATCH 3/8] fix(image): address review feedback - remove redundant regex and dead code - Remove redundant /qwen-vl/, /qwen2-vl/, /qwen2\.5-vl/ patterns since /\bvl\b/ already matches these models via word boundaries - Add clarifying comment for the non-OpenRouter provider fallback path, explaining that OpenRouter returns early using isLikelyVisionModel --- src/agents/pi-embedded-runner/model.ts | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/agents/pi-embedded-runner/model.ts b/src/agents/pi-embedded-runner/model.ts index 58e08c9160b..f7ebbc8177a 100644 --- a/src/agents/pi-embedded-runner/model.ts +++ b/src/agents/pi-embedded-runner/model.ts @@ -66,9 +66,6 @@ function isLikelyVisionModel(modelId: string): boolean { /llava/, /llama-3\.2.*vision/, /pixtral/, - /qwen-vl/, - /qwen2-vl/, - /qwen2\.5-vl/, ]; return visionPatterns.some((pattern) => pattern.test(lower)); } @@ -337,6 +334,8 @@ export function resolveModelWithRegistry(params: { }); } + // Fallback for non-OpenRouter providers with custom providerConfig or mock models. + // OpenRouter returns early above using isLikelyVisionModel heuristic. const configuredModel = providerConfig?.models?.find((candidate) => candidate.id === modelId); const providerHeaders = sanitizeModelHeaders(providerConfig?.headers, { stripSecretRefMarkers: true, From 997343bc3f92854255849c541aa222e4ac70dcd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BF=BB=E5=BD=B9?= Date: Fri, 13 Mar 2026 16:51:17 +0800 Subject: [PATCH 4/8] fix(image): honor configured input and remove broad gpt-5 heuristic --- src/agents/pi-embedded-runner/model.ts | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/src/agents/pi-embedded-runner/model.ts b/src/agents/pi-embedded-runner/model.ts index f7ebbc8177a..74dcc296955 100644 --- a/src/agents/pi-embedded-runner/model.ts +++ b/src/agents/pi-embedded-runner/model.ts @@ -58,7 +58,6 @@ function isLikelyVisionModel(modelId: string): boolean { /gpt-4o/, /gpt-4-turbo/, /gpt-4-vision/, - /gpt-5/, /gemini-1\.5/, /gemini-2/, /gemini-pro-vision/, @@ -312,8 +311,20 @@ export function resolveModelWithRegistry(params: { // should work without being pre-registered in the local catalog. // This fallback uses heuristics when the plugin-based capability lookup returns nothing. // Note: configured models with provider-level `api` return early via inlineMatch, - // so we rely on heuristic detection for vision support here. + // so we rely on heuristic detection for vision support here, unless explicitly configured. if (normalizedProvider === "openrouter") { + // Honor explicitly configured input from providerConfig.models before applying heuristic. + const configuredOpenRouterModel = providerConfig?.models?.find( + (candidate) => candidate.id === modelId, + ); + const resolvedInput: Array<"text" | "image"> = configuredOpenRouterModel?.input + ? (configuredOpenRouterModel.input.filter( + (item) => item === "text" || item === "image", + ) as Array<"text" | "image">) + : isLikelyVisionModel(modelId) + ? ["text", "image"] + : ["text"]; + return normalizeResolvedModel({ provider, cfg, @@ -324,12 +335,12 @@ export function resolveModelWithRegistry(params: { api: "openai-completions", provider, baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: isLikelyVisionModel(modelId) ? ["text", "image"] : ["text"], + reasoning: configuredOpenRouterModel?.reasoning ?? false, + input: resolvedInput, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, - contextWindow: DEFAULT_CONTEXT_TOKENS, + contextWindow: configuredOpenRouterModel?.contextWindow ?? DEFAULT_CONTEXT_TOKENS, // Align with OPENROUTER_DEFAULT_MAX_TOKENS in models-config.providers.ts - maxTokens: 8192, + maxTokens: configuredOpenRouterModel?.maxTokens ?? 8192, } as Model, }); } From 29616c33f34e888bccf9a0fe9dc0eb87cb266abe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BF=BB=E5=BD=B9?= Date: Fri, 13 Mar 2026 19:06:53 +0800 Subject: [PATCH 5/8] fix(image): remove unnecessary type assertion --- src/agents/pi-embedded-runner/model.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/agents/pi-embedded-runner/model.ts b/src/agents/pi-embedded-runner/model.ts index 74dcc296955..af49646d93c 100644 --- a/src/agents/pi-embedded-runner/model.ts +++ b/src/agents/pi-embedded-runner/model.ts @@ -318,9 +318,9 @@ export function resolveModelWithRegistry(params: { (candidate) => candidate.id === modelId, ); const resolvedInput: Array<"text" | "image"> = configuredOpenRouterModel?.input - ? (configuredOpenRouterModel.input.filter( + ? configuredOpenRouterModel.input.filter( (item) => item === "text" || item === "image", - ) as Array<"text" | "image">) + ) : isLikelyVisionModel(modelId) ? ["text", "image"] : ["text"]; From 2355b0603ce7e9708e421b634d161105469982cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BF=BB=E5=BD=B9?= Date: Fri, 13 Mar 2026 20:05:08 +0800 Subject: [PATCH 6/8] style: fix formatting in model.ts --- src/agents/pi-embedded-runner/model.ts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/agents/pi-embedded-runner/model.ts b/src/agents/pi-embedded-runner/model.ts index af49646d93c..30f2dede5ff 100644 --- a/src/agents/pi-embedded-runner/model.ts +++ b/src/agents/pi-embedded-runner/model.ts @@ -318,9 +318,7 @@ export function resolveModelWithRegistry(params: { (candidate) => candidate.id === modelId, ); const resolvedInput: Array<"text" | "image"> = configuredOpenRouterModel?.input - ? configuredOpenRouterModel.input.filter( - (item) => item === "text" || item === "image", - ) + ? configuredOpenRouterModel.input.filter((item) => item === "text" || item === "image") : isLikelyVisionModel(modelId) ? ["text", "image"] : ["text"]; From e39a8515f2dbeb0e380015bcb0793c7b62ab9b8f Mon Sep 17 00:00:00 2001 From: Jerry-Xin Date: Mon, 16 Mar 2026 15:28:40 +0800 Subject: [PATCH 7/8] fix(image): preserve provider config overrides in OpenRouter fallback Honor providerConfig.baseUrl, providerConfig.api, and headers in the OpenRouter fallback path instead of hardcoding defaults. This ensures custom OpenRouter gateways/proxies and auth headers are preserved when plugin-based capability lookup returns nothing. --- src/agents/pi-embedded-runner/model.ts | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/agents/pi-embedded-runner/model.ts b/src/agents/pi-embedded-runner/model.ts index 30f2dede5ff..35e692bc769 100644 --- a/src/agents/pi-embedded-runner/model.ts +++ b/src/agents/pi-embedded-runner/model.ts @@ -323,6 +323,13 @@ export function resolveModelWithRegistry(params: { ? ["text", "image"] : ["text"]; + const providerHeaders = sanitizeModelHeaders(providerConfig?.headers, { + stripSecretRefMarkers: true, + }); + const modelHeaders = sanitizeModelHeaders(configuredOpenRouterModel?.headers, { + stripSecretRefMarkers: true, + }); + return normalizeResolvedModel({ provider, cfg, @@ -330,15 +337,18 @@ export function resolveModelWithRegistry(params: { model: { id: modelId, name: modelId, - api: "openai-completions", + api: configuredOpenRouterModel?.api ?? providerConfig?.api ?? "openai-completions", provider, - baseUrl: "https://openrouter.ai/api/v1", + baseUrl: providerConfig?.baseUrl ?? "https://openrouter.ai/api/v1", reasoning: configuredOpenRouterModel?.reasoning ?? false, input: resolvedInput, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, contextWindow: configuredOpenRouterModel?.contextWindow ?? DEFAULT_CONTEXT_TOKENS, // Align with OPENROUTER_DEFAULT_MAX_TOKENS in models-config.providers.ts maxTokens: configuredOpenRouterModel?.maxTokens ?? 8192, + ...(providerHeaders || modelHeaders + ? { headers: { ...providerHeaders, ...modelHeaders } } + : {}), } as Model, }); } From 2ce93382dd96dae787e426d7d097a809ceeeac5b Mon Sep 17 00:00:00 2001 From: Jerry-Xin Date: Mon, 16 Mar 2026 16:07:26 +0800 Subject: [PATCH 8/8] fix(image): fall back to text when configured OpenRouter input list is empty When a model has an explicitly configured but empty input array, the filter returns an empty array, leaving the model with no supported modalities. Apply the same empty-array guard used by the non-OpenRouter fallback path, defaulting to ["text"] when the filtered result is empty. --- src/agents/pi-embedded-runner/model.ts | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/agents/pi-embedded-runner/model.ts b/src/agents/pi-embedded-runner/model.ts index 35e692bc769..544690a3b34 100644 --- a/src/agents/pi-embedded-runner/model.ts +++ b/src/agents/pi-embedded-runner/model.ts @@ -317,11 +317,15 @@ export function resolveModelWithRegistry(params: { const configuredOpenRouterModel = providerConfig?.models?.find( (candidate) => candidate.id === modelId, ); - const resolvedInput: Array<"text" | "image"> = configuredOpenRouterModel?.input + const configuredInput = configuredOpenRouterModel?.input ? configuredOpenRouterModel.input.filter((item) => item === "text" || item === "image") - : isLikelyVisionModel(modelId) - ? ["text", "image"] - : ["text"]; + : undefined; + const resolvedInput: Array<"text" | "image"> = + configuredInput && configuredInput.length > 0 + ? configuredInput + : isLikelyVisionModel(modelId) + ? ["text", "image"] + : ["text"]; const providerHeaders = sanitizeModelHeaders(providerConfig?.headers, { stripSecretRefMarkers: true,