Merge 2ce93382dd96dae787e426d7d097a809ceeeac5b into 8a05c05596ca9ba0735dafd8e359885de4c2c969
This commit is contained in:
commit
354344da53
@ -459,16 +459,24 @@ describe("resolveModel", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("falls back to text-only when OpenRouter API cache is empty", () => {
|
||||
it("falls back to heuristic vision detection when OpenRouter API cache is empty", () => {
|
||||
mockGetOpenRouterModelCapabilities.mockReturnValue(undefined);
|
||||
|
||||
const result = resolveModel("openrouter", "openrouter/healer-alpha", "/tmp/agent");
|
||||
|
||||
expect(result.error).toBeUndefined();
|
||||
expect(result.model).toMatchObject({
|
||||
// Vision model detected by heuristic
|
||||
const visionResult = resolveModel("openrouter", "openai/gpt-4o", "/tmp/agent");
|
||||
expect(visionResult.error).toBeUndefined();
|
||||
expect(visionResult.model).toMatchObject({
|
||||
provider: "openrouter",
|
||||
id: "openrouter/healer-alpha",
|
||||
reasoning: false,
|
||||
id: "openai/gpt-4o",
|
||||
input: ["text", "image"],
|
||||
});
|
||||
|
||||
// Non-vision model defaults to text-only
|
||||
const textResult = resolveModel("openrouter", "deepseek/deepseek-chat", "/tmp/agent");
|
||||
expect(textResult.error).toBeUndefined();
|
||||
expect(textResult.model).toMatchObject({
|
||||
provider: "openrouter",
|
||||
id: "deepseek/deepseek-chat",
|
||||
input: ["text"],
|
||||
});
|
||||
});
|
||||
@ -536,6 +544,110 @@ describe("resolveModel", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("resolves OpenRouter vision models with image input based on configured model", () => {
|
||||
const cfg = {
|
||||
models: {
|
||||
providers: {
|
||||
openrouter: {
|
||||
baseUrl: "https://openrouter.ai/api/v1",
|
||||
api: "openai-completions",
|
||||
models: [
|
||||
{
|
||||
...makeModel("anthropic/claude-opus-4-6"),
|
||||
input: ["text", "image"],
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
} as OpenClawConfig;
|
||||
|
||||
const result = resolveModel("openrouter", "anthropic/claude-opus-4-6", "/tmp/agent", cfg);
|
||||
|
||||
expect(result.error).toBeUndefined();
|
||||
expect(result.model).toMatchObject({
|
||||
provider: "openrouter",
|
||||
id: "anthropic/claude-opus-4-6",
|
||||
input: ["text", "image"],
|
||||
});
|
||||
});
|
||||
|
||||
it("resolves OpenRouter vision models by model ID pattern heuristic", () => {
|
||||
// Without explicit config, vision models are detected by ID pattern
|
||||
mockGetOpenRouterModelCapabilities.mockReturnValue(undefined);
|
||||
const visionModelIds = [
|
||||
"openai/gpt-4o",
|
||||
"openai/gpt-4-turbo",
|
||||
"anthropic/claude-3-opus",
|
||||
"anthropic/claude-sonnet-4-5",
|
||||
"google/gemini-1.5-pro",
|
||||
"google/gemini-flash-2.0",
|
||||
"qwen/qwen2-vl-72b",
|
||||
"mistralai/pixtral-large",
|
||||
"meta-llama/llama-3.2-90b-vision",
|
||||
];
|
||||
|
||||
for (const modelId of visionModelIds) {
|
||||
const result = resolveModel("openrouter", modelId, "/tmp/agent");
|
||||
|
||||
expect(result.error).toBeUndefined();
|
||||
expect(result.model?.input).toContain("image");
|
||||
expect(result.model?.input).toContain("text");
|
||||
}
|
||||
});
|
||||
|
||||
it("resolves OpenRouter text-only models without image input", () => {
|
||||
// Models without vision patterns default to text-only
|
||||
mockGetOpenRouterModelCapabilities.mockReturnValue(undefined);
|
||||
const textOnlyModelIds = [
|
||||
"deepseek/deepseek-chat",
|
||||
"meta-llama/llama-3.1-70b-instruct",
|
||||
"mistralai/mistral-large",
|
||||
];
|
||||
|
||||
for (const modelId of textOnlyModelIds) {
|
||||
const result = resolveModel("openrouter", modelId, "/tmp/agent");
|
||||
|
||||
expect(result.error).toBeUndefined();
|
||||
expect(result.model?.input).toEqual(["text"]);
|
||||
}
|
||||
});
|
||||
|
||||
it("uses explicitly configured imageModel.primary with OpenRouter (#44648)", () => {
|
||||
// Regression test: when imageModel.primary is configured to an OpenRouter vision model,
|
||||
// the model should resolve with image input capability
|
||||
const cfg = {
|
||||
agents: {
|
||||
defaults: {
|
||||
imageModel: { primary: "openrouter/openai/gpt-4o" },
|
||||
},
|
||||
},
|
||||
models: {
|
||||
providers: {
|
||||
openrouter: {
|
||||
baseUrl: "https://openrouter.ai/api/v1",
|
||||
api: "openai-completions",
|
||||
models: [
|
||||
{
|
||||
...makeModel("openai/gpt-4o"),
|
||||
input: ["text", "image"],
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
} as unknown as OpenClawConfig;
|
||||
|
||||
const result = resolveModel("openrouter", "openai/gpt-4o", "/tmp/agent", cfg);
|
||||
|
||||
expect(result.error).toBeUndefined();
|
||||
expect(result.model).toMatchObject({
|
||||
provider: "openrouter",
|
||||
id: "openai/gpt-4o",
|
||||
input: ["text", "image"],
|
||||
});
|
||||
});
|
||||
|
||||
it("prefers configured provider api metadata over discovered registry model", () => {
|
||||
mockDiscoveredModel({
|
||||
provider: "onehub",
|
||||
|
||||
@ -33,6 +33,42 @@ type InlineProviderConfig = {
|
||||
headers?: unknown;
|
||||
};
|
||||
|
||||
/**
|
||||
* Heuristic to detect vision-capable models based on their model ID.
|
||||
* OpenRouter models often follow naming patterns that indicate vision support.
|
||||
*/
|
||||
function isLikelyVisionModel(modelId: string): boolean {
|
||||
const lower = modelId.toLowerCase();
|
||||
// Common vision model patterns:
|
||||
// - Models with "vision", "vl", or "visual" in the name
|
||||
// - Claude 3+ models (claude-3-*, claude-opus-4-*, claude-sonnet-4-*) except claude-2
|
||||
// - GPT-4 vision variants (gpt-4-vision, gpt-4o, gpt-4-turbo)
|
||||
// - Gemini models (gemini-1.5-*, gemini-2-*, gemini-pro-vision)
|
||||
// - Llama vision models (llava, llama-3.2-*-vision)
|
||||
// - Qwen vision models (qwen-vl, qwen2-vl)
|
||||
// - Pixtral models
|
||||
const visionPatterns = [
|
||||
/vision/,
|
||||
/\bvl\b/, // "vl" as a separate segment (e.g., qwen-vl, qwen2-vl-72b, MiniMax-VL-01)
|
||||
/visual/,
|
||||
/claude-3/,
|
||||
/claude-opus-4/,
|
||||
/claude-sonnet-4/,
|
||||
/claude-haiku-4/,
|
||||
/gpt-4o/,
|
||||
/gpt-4-turbo/,
|
||||
/gpt-4-vision/,
|
||||
/gemini-1\.5/,
|
||||
/gemini-2/,
|
||||
/gemini-pro-vision/,
|
||||
/gemini-flash/,
|
||||
/llava/,
|
||||
/llama-3\.2.*vision/,
|
||||
/pixtral/,
|
||||
];
|
||||
return visionPatterns.some((pattern) => pattern.test(lower));
|
||||
}
|
||||
|
||||
function sanitizeModelHeaders(
|
||||
headers: unknown,
|
||||
opts?: { stripSecretRefMarkers?: boolean },
|
||||
@ -248,6 +284,7 @@ export function resolveModelWithRegistry(params: {
|
||||
}
|
||||
|
||||
const { provider, modelId, cfg, modelRegistry, agentDir } = params;
|
||||
const normalizedProvider = normalizeProviderId(provider);
|
||||
const providerConfig = resolveConfiguredProviderConfig(cfg, provider);
|
||||
const pluginDynamicModel = runProviderDynamicModel({
|
||||
provider,
|
||||
@ -270,6 +307,58 @@ export function resolveModelWithRegistry(params: {
|
||||
});
|
||||
}
|
||||
|
||||
// OpenRouter is a pass-through proxy - any model ID available on OpenRouter
|
||||
// should work without being pre-registered in the local catalog.
|
||||
// This fallback uses heuristics when the plugin-based capability lookup returns nothing.
|
||||
// Note: configured models with provider-level `api` return early via inlineMatch,
|
||||
// so we rely on heuristic detection for vision support here, unless explicitly configured.
|
||||
if (normalizedProvider === "openrouter") {
|
||||
// Honor explicitly configured input from providerConfig.models before applying heuristic.
|
||||
const configuredOpenRouterModel = providerConfig?.models?.find(
|
||||
(candidate) => candidate.id === modelId,
|
||||
);
|
||||
const configuredInput = configuredOpenRouterModel?.input
|
||||
? configuredOpenRouterModel.input.filter((item) => item === "text" || item === "image")
|
||||
: undefined;
|
||||
const resolvedInput: Array<"text" | "image"> =
|
||||
configuredInput && configuredInput.length > 0
|
||||
? configuredInput
|
||||
: isLikelyVisionModel(modelId)
|
||||
? ["text", "image"]
|
||||
: ["text"];
|
||||
|
||||
const providerHeaders = sanitizeModelHeaders(providerConfig?.headers, {
|
||||
stripSecretRefMarkers: true,
|
||||
});
|
||||
const modelHeaders = sanitizeModelHeaders(configuredOpenRouterModel?.headers, {
|
||||
stripSecretRefMarkers: true,
|
||||
});
|
||||
|
||||
return normalizeResolvedModel({
|
||||
provider,
|
||||
cfg,
|
||||
agentDir,
|
||||
model: {
|
||||
id: modelId,
|
||||
name: modelId,
|
||||
api: configuredOpenRouterModel?.api ?? providerConfig?.api ?? "openai-completions",
|
||||
provider,
|
||||
baseUrl: providerConfig?.baseUrl ?? "https://openrouter.ai/api/v1",
|
||||
reasoning: configuredOpenRouterModel?.reasoning ?? false,
|
||||
input: resolvedInput,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: configuredOpenRouterModel?.contextWindow ?? DEFAULT_CONTEXT_TOKENS,
|
||||
// Align with OPENROUTER_DEFAULT_MAX_TOKENS in models-config.providers.ts
|
||||
maxTokens: configuredOpenRouterModel?.maxTokens ?? 8192,
|
||||
...(providerHeaders || modelHeaders
|
||||
? { headers: { ...providerHeaders, ...modelHeaders } }
|
||||
: {}),
|
||||
} as Model<Api>,
|
||||
});
|
||||
}
|
||||
|
||||
// Fallback for non-OpenRouter providers with custom providerConfig or mock models.
|
||||
// OpenRouter returns early above using isLikelyVisionModel heuristic.
|
||||
const configuredModel = providerConfig?.models?.find((candidate) => candidate.id === modelId);
|
||||
const providerHeaders = sanitizeModelHeaders(providerConfig?.headers, {
|
||||
stripSecretRefMarkers: true,
|
||||
|
||||
@ -466,6 +466,73 @@ describe("image tool implicit imageModel config", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("uses explicitly configured openrouter imageModel.primary with image input (#44648)", async () => {
|
||||
await withTempAgentDir(async (agentDir) => {
|
||||
vi.stubEnv("OPENROUTER_API_KEY", "openrouter-test");
|
||||
const fetch = stubOpenAiCompletionsOkFetch("ok openrouter");
|
||||
const cfg: OpenClawConfig = {
|
||||
agents: {
|
||||
defaults: {
|
||||
model: { primary: "openrouter/deepseek/deepseek-chat" },
|
||||
imageModel: { primary: "openrouter/openai/gpt-4o" },
|
||||
},
|
||||
},
|
||||
models: {
|
||||
providers: {
|
||||
openrouter: {
|
||||
api: "openai-completions",
|
||||
baseUrl: "https://openrouter.ai/api/v1",
|
||||
models: [
|
||||
makeModelDefinition("openai/gpt-4o", ["text", "image"]),
|
||||
makeModelDefinition("deepseek/deepseek-chat", ["text"]),
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const tool = requireImageTool(createImageTool({ config: cfg, agentDir }));
|
||||
const result = await tool.execute("t1", {
|
||||
prompt: "Describe this image.",
|
||||
image: `data:image/png;base64,${ONE_PIXEL_PNG_B64}`,
|
||||
});
|
||||
|
||||
expect(fetch).toHaveBeenCalledTimes(1);
|
||||
const [url] = fetch.mock.calls[0] as [unknown];
|
||||
expect(String(url)).toBe("https://openrouter.ai/api/v1/chat/completions");
|
||||
expect(result.content).toEqual(
|
||||
expect.arrayContaining([expect.objectContaining({ type: "text", text: "ok openrouter" })]),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
it("resolves OpenRouter vision models by ID pattern heuristic (#44648)", async () => {
|
||||
await withTempAgentDir(async (agentDir) => {
|
||||
vi.stubEnv("OPENROUTER_API_KEY", "openrouter-test");
|
||||
const fetch = stubOpenAiCompletionsOkFetch("ok vision");
|
||||
// No explicit model config - relies on ID pattern detection
|
||||
const cfg: OpenClawConfig = {
|
||||
agents: {
|
||||
defaults: {
|
||||
model: { primary: "openrouter/some-text-model" },
|
||||
imageModel: { primary: "openrouter/anthropic/claude-3-opus" },
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const tool = requireImageTool(createImageTool({ config: cfg, agentDir }));
|
||||
const result = await tool.execute("t1", {
|
||||
prompt: "Describe this image.",
|
||||
image: `data:image/png;base64,${ONE_PIXEL_PNG_B64}`,
|
||||
});
|
||||
|
||||
expect(fetch).toHaveBeenCalledTimes(1);
|
||||
expect(result.content).toEqual(
|
||||
expect.arrayContaining([expect.objectContaining({ type: "text", text: "ok vision" })]),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
it("exposes an Anthropic-safe image schema without union keywords", async () => {
|
||||
await withMinimaxImageToolFromTempAgentDir(async (tool) => {
|
||||
const violations = findSchemaUnionKeywords(tool.parameters, "image.parameters");
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user