diff --git a/.github/labeler.yml b/.github/labeler.yml index 67a74985465..dccc4db9c5b 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -325,3 +325,7 @@ - changed-files: - any-glob-to-any-file: - "extensions/fal/**" +"extensions: deepinfra": + - changed-files: + - any-glob-to-any-file: + - "extensions/deepinfra/**" diff --git a/CHANGELOG.md b/CHANGELOG.md index c2e2f7521ac..759cbfb10d1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -50,6 +50,7 @@ Docs: https://docs.openclaw.ai - Plugins/Matrix: add `allowBots` room policy so configured Matrix bot accounts can talk to each other, with optional mention-only gating. Thanks @gumadeiras. - Plugins/Matrix: add per-account `allowPrivateNetwork` opt-in for private/internal homeservers, while keeping public cleartext homeservers blocked. Thanks @gumadeiras. - Web tools/Tavily: add Tavily as a bundled web-search provider with dedicated `tavily_search` and `tavily_extract` tools, using canonical plugin-owned config under `plugins.entries.tavily.config.webSearch.*`. (#49200) thanks @lakshyaag-tavily. +- Plugins/DeepInfra: add DeepInfra as a bundled LLM provider with API-key auth, dynamic model discovery, and default-on extension wiring. (#48088) Thanks @ats3v. - Docs/plugins: add the community DingTalk plugin listing to the docs catalog. (#29913) Thanks @sliverp. - Docs/plugins: add the community QQbot plugin listing to the docs catalog. (#29898) Thanks @sliverp. - Plugins/context engines: pass the embedded runner `modelId` into context-engine `assemble()` so plugins can adapt context formatting per model. (#47437) thanks @jscianna. diff --git a/appcast.xml b/appcast.xml index c1919972b22..bf80ac55964 100644 --- a/appcast.xml +++ b/appcast.xml @@ -245,4 +245,4 @@ - \ No newline at end of file + diff --git a/docs/.generated/config-baseline.json b/docs/.generated/config-baseline.json index de52713cc21..4ad27124118 100644 --- a/docs/.generated/config-baseline.json +++ b/docs/.generated/config-baseline.json @@ -47020,6 +47020,127 @@ "help": "Explicitly allows this plugin to request provider/model overrides in background subagent runs. Keep false unless the plugin is trusted to steer model selection.", "hasChildren": false }, + { + "path": "plugins.entries.deepinfra", + "kind": "plugin", + "type": "object", + "required": false, + "deprecated": false, + "sensitive": false, + "tags": [ + "advanced" + ], + "label": "@openclaw/deepinfra-provider", + "help": "OpenClaw DeepInfra provider plugin (plugin: deepinfra)", + "hasChildren": true + }, + { + "path": "plugins.entries.deepinfra.config", + "kind": "plugin", + "type": "object", + "required": false, + "deprecated": false, + "sensitive": false, + "tags": [ + "advanced" + ], + "label": "@openclaw/deepinfra-provider Config", + "help": "Plugin-defined config payload for deepinfra.", + "hasChildren": false + }, + { + "path": "plugins.entries.deepinfra.enabled", + "kind": "plugin", + "type": "boolean", + "required": false, + "deprecated": false, + "sensitive": false, + "tags": [ + "advanced" + ], + "label": "Enable @openclaw/deepinfra-provider", + "hasChildren": false + }, + { + "path": "plugins.entries.deepinfra.hooks", + "kind": "plugin", + "type": "object", + "required": false, + "deprecated": false, + "sensitive": false, + "tags": [ + "advanced" + ], + "label": "Plugin Hook Policy", + "help": "Per-plugin typed hook policy controls for core-enforced safety gates. Use this to constrain high-impact hook categories without disabling the entire plugin.", + "hasChildren": true + }, + { + "path": "plugins.entries.deepinfra.hooks.allowPromptInjection", + "kind": "plugin", + "type": "boolean", + "required": false, + "deprecated": false, + "sensitive": false, + "tags": [ + "access" + ], + "label": "Allow Prompt Injection Hooks", + "help": "Controls whether this plugin may mutate prompts through typed hooks. Set false to block `before_prompt_build` and ignore prompt-mutating fields from legacy `before_agent_start`, while preserving legacy `modelOverride` and `providerOverride` behavior.", + "hasChildren": false + }, + { + "path": "plugins.entries.deepinfra.subagent", + "kind": "plugin", + "type": "object", + "required": false, + "deprecated": false, + "sensitive": false, + "tags": [ + "advanced" + ], + "label": "Plugin Subagent Policy", + "help": "Per-plugin subagent runtime controls for model override trust and allowlists. Keep this unset unless a plugin must explicitly steer subagent model selection.", + "hasChildren": true + }, + { + "path": "plugins.entries.deepinfra.subagent.allowedModels", + "kind": "plugin", + "type": "array", + "required": false, + "deprecated": false, + "sensitive": false, + "tags": [ + "access" + ], + "label": "Plugin Subagent Allowed Models", + "help": "Allowed override targets for trusted plugin subagent runs as canonical \"provider/model\" refs. Use \"*\" only when you intentionally allow any model.", + "hasChildren": true + }, + { + "path": "plugins.entries.deepinfra.subagent.allowedModels.*", + "kind": "plugin", + "type": "string", + "required": false, + "deprecated": false, + "sensitive": false, + "tags": [], + "hasChildren": false + }, + { + "path": "plugins.entries.deepinfra.subagent.allowModelOverride", + "kind": "plugin", + "type": "boolean", + "required": false, + "deprecated": false, + "sensitive": false, + "tags": [ + "access" + ], + "label": "Allow Plugin Subagent Model Override", + "help": "Explicitly allows this plugin to request provider/model overrides in background subagent runs. Keep false unless the plugin is trusted to steer model selection.", + "hasChildren": false + }, { "path": "plugins.entries.device-pair", "kind": "plugin", diff --git a/docs/.generated/config-baseline.jsonl b/docs/.generated/config-baseline.jsonl index 85f12a83a8c..808a29d7b02 100644 --- a/docs/.generated/config-baseline.jsonl +++ b/docs/.generated/config-baseline.jsonl @@ -1,4 +1,4 @@ -{"generatedBy":"scripts/generate-config-doc-baseline.ts","recordType":"meta","totalPaths":5549} +{"generatedBy":"scripts/generate-config-doc-baseline.ts","recordType":"meta","totalPaths":5558} {"recordType":"path","path":"acp","kind":"core","type":"object","required":false,"deprecated":false,"sensitive":false,"tags":["advanced"],"label":"ACP","help":"ACP runtime controls for enabling dispatch, selecting backends, constraining allowed agent targets, and tuning streamed turn projection behavior.","hasChildren":true} {"recordType":"path","path":"acp.allowedAgents","kind":"core","type":"array","required":false,"deprecated":false,"sensitive":false,"tags":["access"],"label":"ACP Allowed Agents","help":"Allowlist of ACP target agent ids permitted for ACP runtime sessions. Empty means no additional allowlist restriction.","hasChildren":true} {"recordType":"path","path":"acp.allowedAgents.*","kind":"core","type":"string","required":false,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":false} @@ -4155,6 +4155,15 @@ {"recordType":"path","path":"plugins.entries.copilot-proxy.subagent.allowedModels","kind":"plugin","type":"array","required":false,"deprecated":false,"sensitive":false,"tags":["access"],"label":"Plugin Subagent Allowed Models","help":"Allowed override targets for trusted plugin subagent runs as canonical \"provider/model\" refs. Use \"*\" only when you intentionally allow any model.","hasChildren":true} {"recordType":"path","path":"plugins.entries.copilot-proxy.subagent.allowedModels.*","kind":"plugin","type":"string","required":false,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":false} {"recordType":"path","path":"plugins.entries.copilot-proxy.subagent.allowModelOverride","kind":"plugin","type":"boolean","required":false,"deprecated":false,"sensitive":false,"tags":["access"],"label":"Allow Plugin Subagent Model Override","help":"Explicitly allows this plugin to request provider/model overrides in background subagent runs. Keep false unless the plugin is trusted to steer model selection.","hasChildren":false} +{"recordType":"path","path":"plugins.entries.deepinfra","kind":"plugin","type":"object","required":false,"deprecated":false,"sensitive":false,"tags":["advanced"],"label":"@openclaw/deepinfra-provider","help":"OpenClaw DeepInfra provider plugin (plugin: deepinfra)","hasChildren":true} +{"recordType":"path","path":"plugins.entries.deepinfra.config","kind":"plugin","type":"object","required":false,"deprecated":false,"sensitive":false,"tags":["advanced"],"label":"@openclaw/deepinfra-provider Config","help":"Plugin-defined config payload for deepinfra.","hasChildren":false} +{"recordType":"path","path":"plugins.entries.deepinfra.enabled","kind":"plugin","type":"boolean","required":false,"deprecated":false,"sensitive":false,"tags":["advanced"],"label":"Enable @openclaw/deepinfra-provider","hasChildren":false} +{"recordType":"path","path":"plugins.entries.deepinfra.hooks","kind":"plugin","type":"object","required":false,"deprecated":false,"sensitive":false,"tags":["advanced"],"label":"Plugin Hook Policy","help":"Per-plugin typed hook policy controls for core-enforced safety gates. Use this to constrain high-impact hook categories without disabling the entire plugin.","hasChildren":true} +{"recordType":"path","path":"plugins.entries.deepinfra.hooks.allowPromptInjection","kind":"plugin","type":"boolean","required":false,"deprecated":false,"sensitive":false,"tags":["access"],"label":"Allow Prompt Injection Hooks","help":"Controls whether this plugin may mutate prompts through typed hooks. Set false to block `before_prompt_build` and ignore prompt-mutating fields from legacy `before_agent_start`, while preserving legacy `modelOverride` and `providerOverride` behavior.","hasChildren":false} +{"recordType":"path","path":"plugins.entries.deepinfra.subagent","kind":"plugin","type":"object","required":false,"deprecated":false,"sensitive":false,"tags":["advanced"],"label":"Plugin Subagent Policy","help":"Per-plugin subagent runtime controls for model override trust and allowlists. Keep this unset unless a plugin must explicitly steer subagent model selection.","hasChildren":true} +{"recordType":"path","path":"plugins.entries.deepinfra.subagent.allowedModels","kind":"plugin","type":"array","required":false,"deprecated":false,"sensitive":false,"tags":["access"],"label":"Plugin Subagent Allowed Models","help":"Allowed override targets for trusted plugin subagent runs as canonical \"provider/model\" refs. Use \"*\" only when you intentionally allow any model.","hasChildren":true} +{"recordType":"path","path":"plugins.entries.deepinfra.subagent.allowedModels.*","kind":"plugin","type":"string","required":false,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":false} +{"recordType":"path","path":"plugins.entries.deepinfra.subagent.allowModelOverride","kind":"plugin","type":"boolean","required":false,"deprecated":false,"sensitive":false,"tags":["access"],"label":"Allow Plugin Subagent Model Override","help":"Explicitly allows this plugin to request provider/model overrides in background subagent runs. Keep false unless the plugin is trusted to steer model selection.","hasChildren":false} {"recordType":"path","path":"plugins.entries.device-pair","kind":"plugin","type":"object","required":false,"deprecated":false,"sensitive":false,"tags":["advanced"],"label":"Device Pairing","help":"Generate setup codes and approve device pairing requests. (plugin: device-pair)","hasChildren":true} {"recordType":"path","path":"plugins.entries.device-pair.config","kind":"plugin","type":"object","required":false,"deprecated":false,"sensitive":false,"tags":["advanced"],"label":"Device Pairing Config","help":"Plugin-defined config payload for device-pair.","hasChildren":true} {"recordType":"path","path":"plugins.entries.device-pair.config.publicUrl","kind":"plugin","type":"string","required":false,"deprecated":false,"sensitive":false,"tags":["advanced"],"label":"Gateway URL","help":"Public WebSocket URL used for /pair setup codes (ws/wss or http/https).","hasChildren":false} diff --git a/docs/concepts/model-providers.md b/docs/concepts/model-providers.md index ebcf7e49290..69ecef519f4 100644 --- a/docs/concepts/model-providers.md +++ b/docs/concepts/model-providers.md @@ -252,6 +252,16 @@ OpenClaw ships with the pi‑ai catalog. These providers require **no** See [/providers/kilocode](/providers/kilocode) for setup details. +### DeepInfra + +- Provider: `deepinfra` +- Auth: `DEEPINFRA_API_KEY` +- Example model: `deepinfra/openai/gpt-oss-120b` +- CLI: `openclaw onboard --deepinfra-api-key ` +- Base URL: `https://api.deepinfra.com/v1/openai/` + +See [/providers/deepinfra](/providers/deepinfra) for setup details. + ### Other bundled provider plugins - OpenRouter: `openrouter` (`OPENROUTER_API_KEY`) diff --git a/docs/docs.json b/docs/docs.json index be9fa476ea7..ad446d45372 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -1136,6 +1136,7 @@ "providers/cloudflare-ai-gateway", "providers/claude-max-api-proxy", "providers/deepgram", + "providers/deepinfra", "providers/github-copilot", "providers/google", "providers/groq", diff --git a/docs/providers/deepinfra.md b/docs/providers/deepinfra.md new file mode 100644 index 00000000000..15996885ace --- /dev/null +++ b/docs/providers/deepinfra.md @@ -0,0 +1,62 @@ +--- +summary: "Use DeepInfra's unified API to access the most popular open source models in OpenClaw" +read_when: + - You want a single API key for the top open source LLMs + - You want to run models via DeepInfra's API in OpenClaw +--- + +# DeepInfra + +DeepInfra provides a **unified API** that routes requests to the most popular open source models behind a single +endpoint and API key. It is OpenAI-compatible, so most OpenAI SDKs work by switching the base URL. + +## Getting an API key + +1. Go to [https://deepinfra.com/](https://deepinfra.com/) +2. Sign in or create an account +3. Navigate to Dashboard / Keys and generate a new API key or use the auto created one + +## CLI setup + +```bash +openclaw onboard --deepinfra-api-key +``` + +Or set the environment variable: + +```bash +export DEEPINFRA_API_KEY="" # pragma: allowlist secret +``` + +## Config snippet + +```json5 +{ + env: { DEEPINFRA_API_KEY: "" }, // pragma: allowlist secret + agents: { + defaults: { + model: { primary: "deepinfra/openai/gpt-oss-120b" }, + }, + }, +} +``` + +## Available models + +OpenClaw dynamically discovers available DeepInfra models at startup. Use +`/models deepinfra` to see the full list of models available with your account. + +Any model available on [DeepInfra.com](https://deepinfra.com/) can be used with the `deepinfra/` prefix: + +``` +deepinfra/MiniMaxAI/MiniMax-M2.5 +deepinfra/zai-org/GLM-5 +deepinfra/moonshotai/Kimi-K2.5 +...and many more +``` + +## Notes + +- Model refs are `deepinfra//` (e.g., `deepinfra/Qwen/Qwen3-Max`). +- Default model: `deepinfra/openai/gpt-oss-120b` +- Base URL: `https://api.deepinfra.com/v1/openai/` diff --git a/docs/providers/index.md b/docs/providers/index.md index 93ccdf27635..b031e637ca4 100644 --- a/docs/providers/index.md +++ b/docs/providers/index.md @@ -29,6 +29,7 @@ Looking for chat channel docs (WhatsApp/Telegram/Discord/Slack/Mattermost (plugi - [Amazon Bedrock](/providers/bedrock) - [Anthropic (API + Claude Code CLI)](/providers/anthropic) - [Cloudflare AI Gateway](/providers/cloudflare-ai-gateway) +- [DeepInfra](/providers/deepinfra) - [GLM models](/providers/glm) - [Google (Gemini)](/providers/google) - [Groq (LPU inference)](/providers/groq) diff --git a/extensions/deepinfra/index.ts b/extensions/deepinfra/index.ts new file mode 100644 index 00000000000..b4b7c5f4d86 --- /dev/null +++ b/extensions/deepinfra/index.ts @@ -0,0 +1,84 @@ +import { definePluginEntry } from "openclaw/plugin-sdk/core"; +import { createProviderApiKeyAuthMethod } from "openclaw/plugin-sdk/provider-auth"; +import { buildSingleProviderApiKeyCatalog } from "openclaw/plugin-sdk/provider-catalog"; +import { + createDeepInfraSystemCacheWrapper, + createDeepInfraWrapper, + isProxyReasoningUnsupported, +} from "openclaw/plugin-sdk/provider-stream"; +import { applyDeepInfraConfig, DEEPINFRA_DEFAULT_MODEL_REF } from "./onboard.js"; +import { buildDeepInfraProviderWithDiscovery } from "./provider-catalog.js"; + +const PROVIDER_ID = "deepinfra"; + +const DEEPINFRA_CACHE_TTL_MODEL_PREFIXES = [ + "anthropic/", + "moonshot/", + "moonshotai/", + "zai/", + "zai-org/", +] as const; + +function isDeepInfraCacheTtlModel(modelId: string): boolean { + return DEEPINFRA_CACHE_TTL_MODEL_PREFIXES.some((prefix) => modelId.startsWith(prefix)); +} + +export default definePluginEntry({ + id: PROVIDER_ID, + name: "DeepInfra Provider", + description: "Bundled DeepInfra provider plugin", + register(api) { + api.registerProvider({ + id: PROVIDER_ID, + label: "DeepInfra", + docsPath: "/providers/deepinfra", + envVars: ["DEEPINFRA_API_KEY"], + auth: [ + createProviderApiKeyAuthMethod({ + providerId: PROVIDER_ID, + methodId: "api-key", + label: "DeepInfra API key", + hint: "Unified API for open source models", + optionKey: "deepinfraApiKey", + flagName: "--deepinfra-api-key", + envVar: "DEEPINFRA_API_KEY", + promptMessage: "Enter DeepInfra API key", + defaultModel: DEEPINFRA_DEFAULT_MODEL_REF, + expectedProviders: ["deepinfra"], + applyConfig: (cfg) => applyDeepInfraConfig(cfg), + wizard: { + choiceId: "deepinfra-api-key", + choiceLabel: "DeepInfra API key", + groupId: "deepinfra", + groupLabel: "DeepInfra", + groupHint: "Unified API for open source models", + }, + }), + ], + catalog: { + order: "simple", + run: (ctx) => + buildSingleProviderApiKeyCatalog({ + ctx, + providerId: PROVIDER_ID, + buildProvider: buildDeepInfraProviderWithDiscovery, + }), + }, + capabilities: { + openAiCompatTurnValidation: false, + geminiThoughtSignatureSanitization: true, + geminiThoughtSignatureModelHints: ["gemini"], + dropThinkingBlockModelHints: ["claude"], + }, + wrapStreamFn: (ctx) => { + const thinkingLevel = isProxyReasoningUnsupported(ctx.modelId) + ? undefined + : ctx.thinkingLevel; + let streamFn = createDeepInfraWrapper(ctx.streamFn, thinkingLevel); + streamFn = createDeepInfraSystemCacheWrapper(streamFn); + return streamFn; + }, + isCacheTtlEligible: (ctx) => isDeepInfraCacheTtlModel(ctx.modelId), + }); + }, +}); diff --git a/extensions/deepinfra/onboard.ts b/extensions/deepinfra/onboard.ts new file mode 100644 index 00000000000..b19a7d6b1a2 --- /dev/null +++ b/extensions/deepinfra/onboard.ts @@ -0,0 +1,36 @@ +import { + DEEPINFRA_BASE_URL, + DEEPINFRA_DEFAULT_MODEL_REF, +} from "openclaw/plugin-sdk/provider-models"; +import { + applyAgentDefaultModelPrimary, + type OpenClawConfig, +} from "openclaw/plugin-sdk/provider-onboard"; + +export { DEEPINFRA_BASE_URL, DEEPINFRA_DEFAULT_MODEL_REF }; + +export function applyDeepInfraProviderConfig(cfg: OpenClawConfig): OpenClawConfig { + const models = { ...cfg.agents?.defaults?.models }; + models[DEEPINFRA_DEFAULT_MODEL_REF] = { + ...models[DEEPINFRA_DEFAULT_MODEL_REF], + alias: models[DEEPINFRA_DEFAULT_MODEL_REF]?.alias ?? "DeepInfra", + }; + + return { + ...cfg, + agents: { + ...cfg.agents, + defaults: { + ...cfg.agents?.defaults, + models, + }, + }, + }; +} + +export function applyDeepInfraConfig(cfg: OpenClawConfig): OpenClawConfig { + return applyAgentDefaultModelPrimary( + applyDeepInfraProviderConfig(cfg), + DEEPINFRA_DEFAULT_MODEL_REF, + ); +} diff --git a/extensions/deepinfra/openclaw.plugin.json b/extensions/deepinfra/openclaw.plugin.json new file mode 100644 index 00000000000..cac903f6973 --- /dev/null +++ b/extensions/deepinfra/openclaw.plugin.json @@ -0,0 +1,28 @@ +{ + "id": "deepinfra", + "providers": ["deepinfra"], + "providerAuthEnvVars": { + "deepinfra": ["DEEPINFRA_API_KEY"] + }, + "providerAuthChoices": [ + { + "provider": "deepinfra", + "method": "api-key", + "choiceId": "deepinfra-api-key", + "choiceLabel": "DeepInfra API key", + "choiceHint": "Unified API for open source models", + "groupId": "deepinfra", + "groupLabel": "DeepInfra", + "groupHint": "Unified API for open source models", + "optionKey": "deepinfraApiKey", + "cliFlag": "--deepinfra-api-key", + "cliOption": "--deepinfra-api-key ", + "cliDescription": "DeepInfra API key" + } + ], + "configSchema": { + "type": "object", + "additionalProperties": false, + "properties": {} + } +} diff --git a/extensions/deepinfra/package.json b/extensions/deepinfra/package.json new file mode 100644 index 00000000000..67915591d64 --- /dev/null +++ b/extensions/deepinfra/package.json @@ -0,0 +1,12 @@ +{ + "name": "@openclaw/deepinfra-provider", + "version": "2026.3.14", + "private": true, + "description": "OpenClaw DeepInfra provider plugin", + "type": "module", + "openclaw": { + "extensions": [ + "./index.ts" + ] + } +} diff --git a/extensions/deepinfra/provider-catalog.ts b/extensions/deepinfra/provider-catalog.ts new file mode 100644 index 00000000000..6e3ba6c12b8 --- /dev/null +++ b/extensions/deepinfra/provider-catalog.ts @@ -0,0 +1,14 @@ +import { + type ModelProviderConfig, + discoverDeepInfraModels, + DEEPINFRA_BASE_URL, +} from "openclaw/plugin-sdk/provider-models"; + +export async function buildDeepInfraProviderWithDiscovery(): Promise { + const models = await discoverDeepInfraModels(); + return { + baseUrl: DEEPINFRA_BASE_URL, + api: "openai-completions", + models, + }; +} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 7f438d0a2e3..4f91c8991fa 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -261,6 +261,8 @@ importers: extensions/copilot-proxy: {} + extensions/deepinfra: {} + extensions/diagnostics-otel: dependencies: '@opentelemetry/api': diff --git a/src/agents/deepinfra-models.test.ts b/src/agents/deepinfra-models.test.ts new file mode 100644 index 00000000000..e400322896c --- /dev/null +++ b/src/agents/deepinfra-models.test.ts @@ -0,0 +1,277 @@ +import { describe, expect, it, vi } from "vitest"; +import { discoverDeepInfraModels, DEEPINFRA_MODELS_URL } from "./deepinfra-models.js"; + +// discoverDeepInfraModels checks for VITEST env and returns static catalog, +// so we need to temporarily unset it to test the fetch path. + +function makeModelEntry(overrides: Record = {}) { + return { + id: "openai/gpt-oss-120b", + object: "model", + owned_by: "deepinfra", + metadata: { + description: "A powerful model", + context_length: 131072, + max_tokens: 131072, + pricing: { + input_tokens: 3.0, + output_tokens: 15.0, + cache_read_tokens: 0.3, + }, + tags: ["vision", "reasoning_effort", "prompt_cache", "reasoning"], + }, + ...overrides, + }; +} + +function makeTextOnlyEntry(overrides: Record = {}) { + return makeModelEntry({ + id: "minimaxai/minimax-m2.5", + metadata: { + description: "Text only model", + context_length: 196608, + max_tokens: 196608, + pricing: { + input_tokens: 1.0, + output_tokens: 2.0, + }, + tags: [], + }, + ...overrides, + }); +} + +async function withFetchPathTest( + mockFetch: ReturnType, + runAssertions: () => Promise, +) { + const origNodeEnv = process.env.NODE_ENV; + const origVitest = process.env.VITEST; + delete process.env.NODE_ENV; + delete process.env.VITEST; + + vi.stubGlobal("fetch", mockFetch); + + try { + await runAssertions(); + } finally { + if (origNodeEnv === undefined) { + delete process.env.NODE_ENV; + } else { + process.env.NODE_ENV = origNodeEnv; + } + if (origVitest === undefined) { + delete process.env.VITEST; + } else { + process.env.VITEST = origVitest; + } + vi.unstubAllGlobals(); + } +} + +describe("discoverDeepInfraModels", () => { + it("returns static catalog in test environment", async () => { + const models = await discoverDeepInfraModels(); + expect(models.length).toBeGreaterThan(0); + expect(models.some((m) => m.id === "openai/gpt-oss-120b")).toBe(true); + }); + + it("static catalog has correct defaults for default model", async () => { + const models = await discoverDeepInfraModels(); + const defaultModel = models.find((m) => m.id === "openai/gpt-oss-120b"); + expect(defaultModel).toBeDefined(); + expect(defaultModel?.name).toBe("gpt-oss-120b"); + expect(defaultModel?.reasoning).toBe(true); + expect(defaultModel?.input).toEqual(["text"]); + expect(defaultModel?.contextWindow).toBe(131072); + expect(defaultModel?.maxTokens).toBe(131072); + expect(defaultModel?.cost).toEqual({ input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }); + }); +}); + +describe("discoverDeepInfraModels (fetch path)", () => { + it("fetches from the correct URL with Accept header", async () => { + const mockFetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => Promise.resolve({ data: [makeModelEntry()] }), + }); + await withFetchPathTest(mockFetch, async () => { + await discoverDeepInfraModels(); + expect(mockFetch).toHaveBeenCalledWith( + DEEPINFRA_MODELS_URL, + expect.objectContaining({ + headers: { Accept: "application/json" }, + }), + ); + }); + }); + + it("parses model pricing correctly", async () => { + const mockFetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => Promise.resolve({ data: [makeModelEntry()] }), + }); + await withFetchPathTest(mockFetch, async () => { + const models = await discoverDeepInfraModels(); + const model = models.find((m) => m.id === "openai/gpt-oss-120b"); + expect(model).toBeDefined(); + expect(model?.cost.input).toBeCloseTo(3.0); + expect(model?.cost.output).toBeCloseTo(15.0); + expect(model?.cost.cacheRead).toBeCloseTo(0.3); + expect(model?.cost.cacheWrite).toBe(0); + }); + }); + + it("detects vision models with image modality", async () => { + const mockFetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => Promise.resolve({ data: [makeModelEntry()] }), + }); + await withFetchPathTest(mockFetch, async () => { + const models = await discoverDeepInfraModels(); + const model = models.find((m) => m.id === "openai/gpt-oss-120b"); + expect(model?.input).toEqual(["text", "image"]); + }); + }); + + it("detects text-only models without image modality", async () => { + const mockFetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => Promise.resolve({ data: [makeTextOnlyEntry()] }), + }); + await withFetchPathTest(mockFetch, async () => { + const models = await discoverDeepInfraModels(); + const model = models.find((m) => m.id === "minimaxai/minimax-m2.5"); + expect(model?.input).toEqual(["text"]); + }); + }); + + it("detects reasoning models via reasoning_effort tag", async () => { + const mockFetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => Promise.resolve({ data: [makeModelEntry(), makeTextOnlyEntry()] }), + }); + await withFetchPathTest(mockFetch, async () => { + const models = await discoverDeepInfraModels(); + expect(models.find((m) => m.id === "openai/gpt-oss-120b")?.reasoning).toBe(true); + expect(models.find((m) => m.id === "minimaxai/minimax-m2.5")?.reasoning).toBe(false); + }); + }); + + it("uses defaults when context_length and max_tokens are missing", async () => { + const entryNoLimits = makeModelEntry({ + id: "some/model", + metadata: { + pricing: { input_tokens: 1, output_tokens: 2 }, + tags: [], + }, + }); + const mockFetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => Promise.resolve({ data: [entryNoLimits] }), + }); + await withFetchPathTest(mockFetch, async () => { + const models = await discoverDeepInfraModels(); + const model = models.find((m) => m.id === "some/model"); + expect(model?.contextWindow).toBe(128000); + expect(model?.maxTokens).toBe(8192); + }); + }); + + it("uses zero cost when pricing fields are missing", async () => { + const entryNoPricing = makeModelEntry({ + id: "some/free-model", + metadata: { + context_length: 32000, + max_tokens: 4096, + tags: [], + }, + }); + const mockFetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => Promise.resolve({ data: [entryNoPricing] }), + }); + await withFetchPathTest(mockFetch, async () => { + const models = await discoverDeepInfraModels(); + const model = models.find((m) => m.id === "some/free-model"); + expect(model?.cost).toEqual({ input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }); + }); + }); + + it("skips models with null metadata (embeddings, image-gen, etc.)", async () => { + const embeddingEntry = { id: "BAAI/bge-m3", object: "model", metadata: null }; + const mockFetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => Promise.resolve({ data: [embeddingEntry, makeModelEntry()] }), + }); + await withFetchPathTest(mockFetch, async () => { + const models = await discoverDeepInfraModels(); + expect(models.some((m) => m.id === "BAAI/bge-m3")).toBe(false); + expect(models.some((m) => m.id === "openai/gpt-oss-120b")).toBe(true); + }); + }); + + it("deduplicates models with the same id", async () => { + const mockFetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => Promise.resolve({ data: [makeModelEntry(), makeModelEntry()] }), + }); + await withFetchPathTest(mockFetch, async () => { + const models = await discoverDeepInfraModels(); + const matches = models.filter((m) => m.id === "openai/gpt-oss-120b"); + expect(matches.length).toBe(1); + }); + }); + + it("falls back to static catalog on network error", async () => { + const mockFetch = vi.fn().mockRejectedValue(new Error("network error")); + await withFetchPathTest(mockFetch, async () => { + const models = await discoverDeepInfraModels(); + expect(models.length).toBeGreaterThan(0); + expect(models.some((m) => m.id === "openai/gpt-oss-120b")).toBe(true); + }); + }); + + it("falls back to static catalog on HTTP error", async () => { + const mockFetch = vi.fn().mockResolvedValue({ + ok: false, + status: 500, + }); + await withFetchPathTest(mockFetch, async () => { + const models = await discoverDeepInfraModels(); + expect(models.length).toBeGreaterThan(0); + expect(models.some((m) => m.id === "openai/gpt-oss-120b")).toBe(true); + }); + }); + + it("falls back to static catalog when response has empty data array", async () => { + const mockFetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => Promise.resolve({ data: [] }), + }); + await withFetchPathTest(mockFetch, async () => { + const models = await discoverDeepInfraModels(); + expect(models.length).toBeGreaterThan(0); + expect(models.some((m) => m.id === "openai/gpt-oss-120b")).toBe(true); + }); + }); + + it("falls back to static catalog when all entries have null metadata", async () => { + const mockFetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => + Promise.resolve({ + data: [ + { id: "BAAI/bge-m3", metadata: null }, + { id: "stabilityai/sdxl", metadata: null }, + ], + }), + }); + await withFetchPathTest(mockFetch, async () => { + const models = await discoverDeepInfraModels(); + expect(models.length).toBeGreaterThan(0); + // Falls back to static catalog + expect(models.some((m) => m.id === "openai/gpt-oss-120b")).toBe(true); + }); + }); +}); diff --git a/src/agents/deepinfra-models.ts b/src/agents/deepinfra-models.ts new file mode 100644 index 00000000000..571367c28bf --- /dev/null +++ b/src/agents/deepinfra-models.ts @@ -0,0 +1,156 @@ +import type { ModelDefinitionConfig } from "../config/types.js"; +import { createSubsystemLogger } from "../logging/subsystem.js"; +import { + DEEPINFRA_BASE_URL, + DEEPINFRA_DEFAULT_CONTEXT_WINDOW, + DEEPINFRA_DEFAULT_COST, + DEEPINFRA_DEFAULT_MAX_TOKENS, + DEEPINFRA_MODEL_CATALOG, +} from "../providers/deepinfra-shared.js"; + +const log = createSubsystemLogger("deepinfra-models"); + +export const DEEPINFRA_MODELS_URL = `${DEEPINFRA_BASE_URL}models`; + +const DISCOVERY_TIMEOUT_MS = 5000; + +// --------------------------------------------------------------------------- +// API response types (DeepInfra OpenAI-compatible /models schema) +// --------------------------------------------------------------------------- + +interface DeepInfraModelPricing { + input_tokens?: number; + output_tokens?: number; + cache_read_tokens?: number; +} + +interface DeepInfraModelMetadata { + description?: string; + context_length?: number; + max_tokens?: number; + pricing?: DeepInfraModelPricing; + /** e.g. ["vision", "reasoning_effort", "prompt_cache", "reasoning"] */ + tags?: string[]; +} + +interface DeepInfraModelEntry { + id: string; + object?: string; + owned_by?: string; + metadata: DeepInfraModelMetadata | null; +} + +interface DeepInfraModelsResponse { + data: DeepInfraModelEntry[]; +} + +// --------------------------------------------------------------------------- +// Model parsing +// --------------------------------------------------------------------------- + +function parseModality(metadata: DeepInfraModelMetadata): Array<"text" | "image"> { + const hasVision = metadata.tags?.includes("vision") ?? false; + return hasVision ? ["text", "image"] : ["text"]; +} + +function parseReasoning(metadata: DeepInfraModelMetadata): boolean { + return ( + (metadata.tags?.includes("reasoning_effort") || metadata.tags?.includes("reasoning")) ?? false + ); +} + +function toModelDefinition(entry: DeepInfraModelEntry): ModelDefinitionConfig { + // metadata is guaranteed non-null at call site + const meta = entry.metadata!; + return { + id: entry.id, + name: entry.id, + reasoning: parseReasoning(meta), + input: parseModality(meta), + cost: { + input: meta.pricing?.input_tokens ?? 0, + output: meta.pricing?.output_tokens ?? 0, + cacheRead: meta.pricing?.cache_read_tokens ?? 0, + cacheWrite: 0, + }, + contextWindow: meta.context_length ?? DEEPINFRA_DEFAULT_CONTEXT_WINDOW, + maxTokens: meta.max_tokens ?? DEEPINFRA_DEFAULT_MAX_TOKENS, + }; +} + +// --------------------------------------------------------------------------- +// Static fallback +// --------------------------------------------------------------------------- + +export function buildStaticCatalog(): ModelDefinitionConfig[] { + return DEEPINFRA_MODEL_CATALOG.map((model) => ({ + id: model.id, + name: model.name, + reasoning: model.reasoning, + input: model.input, + cost: DEEPINFRA_DEFAULT_COST, + contextWindow: model.contextWindow ?? DEEPINFRA_DEFAULT_CONTEXT_WINDOW, + maxTokens: model.maxTokens ?? DEEPINFRA_DEFAULT_MAX_TOKENS, + })); +} + +// --------------------------------------------------------------------------- +// Discovery +// --------------------------------------------------------------------------- + +/** + * Discover models from the DeepInfra API with fallback to static catalog. + * Skips models with null metadata (embeddings, image-gen, etc.). + */ +export async function discoverDeepInfraModels(): Promise { + // Skip API discovery in test environment + if (process.env.NODE_ENV === "test" || process.env.VITEST) { + return buildStaticCatalog(); + } + + try { + const response = await fetch(DEEPINFRA_MODELS_URL, { + headers: { Accept: "application/json" }, + signal: AbortSignal.timeout(DISCOVERY_TIMEOUT_MS), + }); + + if (!response.ok) { + log.warn(`Failed to discover models: HTTP ${response.status}, using static catalog`); + return buildStaticCatalog(); + } + + const data = (await response.json()) as DeepInfraModelsResponse; + if (!Array.isArray(data.data) || data.data.length === 0) { + log.warn("No models found from DeepInfra API, using static catalog"); + return buildStaticCatalog(); + } + + const models: ModelDefinitionConfig[] = []; + const discoveredIds = new Set(); + + for (const entry of data.data) { + if (!entry || typeof entry !== "object") { + continue; + } + const id = typeof entry.id === "string" ? entry.id.trim() : ""; + if (!id || discoveredIds.has(id)) { + continue; + } + // Skip non-completion models (embeddings, image-gen, etc.) + if (entry.metadata === null) { + continue; + } + try { + models.push(toModelDefinition(entry)); + discoveredIds.add(id); + } catch (e) { + log.warn(`Skipping malformed model entry "${id}": ${String(e)}`); + } + } + + return models.length > 0 ? models : buildStaticCatalog(); + } catch (error) { + log.warn(`Discovery failed: ${String(error)}, using static catalog`); + return buildStaticCatalog(); + } +} diff --git a/src/agents/models-config.e2e-harness.ts b/src/agents/models-config.e2e-harness.ts index bd01edc86be..5190bb070fc 100644 --- a/src/agents/models-config.e2e-harness.ts +++ b/src/agents/models-config.e2e-harness.ts @@ -109,6 +109,7 @@ export const MODELS_CONFIG_IMPLICIT_ENV_VARS = [ "VOLCANO_ENGINE_API_KEY", "BYTEPLUS_API_KEY", "KILOCODE_API_KEY", + "DEEPINFRA_API_KEY", "KIMI_API_KEY", "KIMICODE_API_KEY", "GEMINI_API_KEY", diff --git a/src/agents/models-config.providers.deepinfra.test.ts b/src/agents/models-config.providers.deepinfra.test.ts new file mode 100644 index 00000000000..d3787dd03d8 --- /dev/null +++ b/src/agents/models-config.providers.deepinfra.test.ts @@ -0,0 +1,64 @@ +import { mkdtempSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { describe, expect, it } from "vitest"; +import { captureEnv } from "../test-utils/env.ts"; +import { buildStaticCatalog } from "./deepinfra-models.ts"; +import { resolveImplicitProvidersForTest } from "./models-config.e2e-harness.ts"; + +const DEEPINFRA_MODEL_IDS = [ + "openai/gpt-oss-120b", + "MiniMaxAI/MiniMax-M2.5", + "zai-org/GLM-5", + "moonshotai/Kimi-K2.5", +]; + +describe("DeepInfra implicit provider", () => { + it("should include deepinfra when DEEPINFRA_API_KEY is configured", async () => { + const agentDir = mkdtempSync(join(tmpdir(), "openclaw-test-")); + const envSnapshot = captureEnv(["DEEPINFRA_API_KEY"]); + process.env.DEEPINFRA_API_KEY = "test-key"; // pragma: allowlist secret + + try { + const providers = await resolveImplicitProvidersForTest({ agentDir }); + expect(providers?.deepinfra).toBeDefined(); + expect(providers?.deepinfra?.models?.length).toBeGreaterThan(0); + } finally { + envSnapshot.restore(); + } + }); + + it("should not include deepinfra when no API key is configured", async () => { + const agentDir = mkdtempSync(join(tmpdir(), "openclaw-test-")); + const envSnapshot = captureEnv(["DEEPINFRA_API_KEY"]); + delete process.env.DEEPINFRA_API_KEY; + + try { + const providers = await resolveImplicitProvidersForTest({ agentDir }); + expect(providers?.deepinfra).toBeUndefined(); + } finally { + envSnapshot.restore(); + } + }); + + it("should build deepinfra provider with correct configuration", () => { + const models = buildStaticCatalog(); + expect(models).toBeDefined(); + expect(models.length).toBeGreaterThan(0); + }); + + it("should include the default deepinfra model", () => { + const models = buildStaticCatalog(); + const modelIds = models.map((m) => m.id); + expect(modelIds).toContain("openai/gpt-oss-120b"); + }); + + it("should include the static fallback catalog", () => { + const models = buildStaticCatalog(); + const modelIds = models.map((m) => m.id); + for (const modelId of DEEPINFRA_MODEL_IDS) { + expect(modelIds).toContain(modelId); + } + expect(models).toHaveLength(DEEPINFRA_MODEL_IDS.length); + }); +}); diff --git a/src/agents/models-config.providers.discovery.ts b/src/agents/models-config.providers.discovery.ts index b138c4853d1..ee06a272ee9 100644 --- a/src/agents/models-config.providers.discovery.ts +++ b/src/agents/models-config.providers.discovery.ts @@ -19,6 +19,7 @@ import { SGLANG_DEFAULT_BASE_URL, SGLANG_PROVIDER_LABEL } from "./sglang-default import { VLLM_DEFAULT_BASE_URL, VLLM_PROVIDER_LABEL } from "./vllm-defaults.js"; export { buildHuggingfaceProvider } from "../../extensions/huggingface/provider-catalog.js"; export { buildKilocodeProviderWithDiscovery } from "../../extensions/kilocode/provider-catalog.js"; +export { buildDeepInfraProviderWithDiscovery } from "../../extensions/deepinfra/provider-catalog.js"; export { buildVeniceProvider } from "../../extensions/venice/provider-catalog.js"; export { buildVercelAiGatewayProvider } from "../../extensions/vercel-ai-gateway/provider-catalog.js"; diff --git a/src/agents/pi-embedded-runner/cache-ttl.test.ts b/src/agents/pi-embedded-runner/cache-ttl.test.ts index f5ff8be2827..a806309a240 100644 --- a/src/agents/pi-embedded-runner/cache-ttl.test.ts +++ b/src/agents/pi-embedded-runner/cache-ttl.test.ts @@ -15,6 +15,11 @@ vi.mock("../../plugins/provider-runtime.js", () => ({ params.context.modelId.startsWith(prefix), ); } + if (params.context.provider === "deepinfra") { + return ["anthropic/", "moonshot/", "moonshotai/", "zai/", "zai-org/"].some((prefix) => + params.context.modelId.startsWith(prefix), + ); + } return undefined; }, })); @@ -29,6 +34,8 @@ describe("isCacheTtlEligibleProvider", () => { it("allows moonshot and zai providers", () => { expect(isCacheTtlEligibleProvider("moonshot", "kimi-k2.5")).toBe(true); expect(isCacheTtlEligibleProvider("zai", "glm-5")).toBe(true); + expect(isCacheTtlEligibleProvider("deepinfra", "zai-org/glm-5")).toBe(true); + expect(isCacheTtlEligibleProvider("deepinfra", "moonshotai/kimi-k2.5")).toBe(true); }); it("is case-insensitive for native providers", () => { @@ -46,5 +53,6 @@ describe("isCacheTtlEligibleProvider", () => { it("rejects unsupported providers and models", () => { expect(isCacheTtlEligibleProvider("openai", "gpt-4o")).toBe(false); expect(isCacheTtlEligibleProvider("openrouter", "openai/gpt-4o")).toBe(false); + expect(isCacheTtlEligibleProvider("deepinfra", "openai/gpt-4o")).toBe(false); }); }); diff --git a/src/agents/pi-embedded-runner/deepinfra.test.ts b/src/agents/pi-embedded-runner/deepinfra.test.ts new file mode 100644 index 00000000000..7c8a19a9a67 --- /dev/null +++ b/src/agents/pi-embedded-runner/deepinfra.test.ts @@ -0,0 +1,21 @@ +import { describe, expect, it } from "vitest"; +import { isCacheTtlEligibleProvider } from "./cache-ttl.js"; + +describe("deepinfra cache-ttl eligibility", () => { + it("is eligible when model starts with zai", () => { + expect(isCacheTtlEligibleProvider("deepinfra", "zai-org/glm-5")).toBe(true); + }); + + it("is eligible when model starts with moonshot", () => { + expect(isCacheTtlEligibleProvider("deepinfra", "moonshotai/kimi-k2.5")).toBe(true); + }); + + it("is not eligible for other models on deepinfra", () => { + expect(isCacheTtlEligibleProvider("deepinfra", "openai/gpt-oss-120b")).toBe(false); + }); + + it("is case-insensitive for provider name", () => { + expect(isCacheTtlEligibleProvider("DeepInfra", "moonshotai/kimi-k2.5")).toBe(true); + expect(isCacheTtlEligibleProvider("DEEPINFRA", "Moonshotai/kimi-k2.5")).toBe(true); + }); +}); diff --git a/src/agents/pi-embedded-runner/extra-params.deepinfra-cache-control.test.ts b/src/agents/pi-embedded-runner/extra-params.deepinfra-cache-control.test.ts new file mode 100644 index 00000000000..789af3df4b4 --- /dev/null +++ b/src/agents/pi-embedded-runner/extra-params.deepinfra-cache-control.test.ts @@ -0,0 +1,92 @@ +import type { Model } from "@mariozechner/pi-ai"; +import { describe, expect, it } from "vitest"; +import { runExtraParamsCase } from "./extra-params.test-support.js"; + +type StreamPayload = { + messages: Array<{ + role: string; + content: unknown; + }>; +}; + +function runDeepInfraPayload(payload: StreamPayload, modelId: string) { + runExtraParamsCase({ + cfg: { + plugins: { + entries: { + deepinfra: { + enabled: true, + }, + }, + }, + }, + model: { + api: "openai-completions", + provider: "deepinfra", + id: modelId, + } as Model<"openai-completions">, + payload, + }); +} + +describe("extra-params: DeepInfra Anthropic cache_control", () => { + it("injects cache_control into system message for DeepInfra Anthropic models", () => { + const payload = { + messages: [ + { role: "system", content: "You are a helpful assistant." }, + { role: "user", content: "Hello" }, + ], + }; + + runDeepInfraPayload(payload, "anthropic/claude-opus-4-6"); + + expect(payload.messages[0].content).toEqual([ + { type: "text", text: "You are a helpful assistant.", cache_control: { type: "ephemeral" } }, + ]); + expect(payload.messages[1].content).toBe("Hello"); + }); + + it("adds cache_control to last content block when system message is already array", () => { + const payload = { + messages: [ + { + role: "system", + content: [ + { type: "text", text: "Part 1" }, + { type: "text", text: "Part 2" }, + ], + }, + ], + }; + + runDeepInfraPayload(payload, "anthropic/claude-opus-4-6"); + + const content = payload.messages[0].content as Array>; + expect(content[0]).toEqual({ type: "text", text: "Part 1" }); + expect(content[1]).toEqual({ + type: "text", + text: "Part 2", + cache_control: { type: "ephemeral" }, + }); + }); + + it("does not inject cache_control for DeepInfra non-Anthropic models", () => { + const payload = { + messages: [{ role: "system", content: "You are a helpful assistant." }], + }; + + runDeepInfraPayload(payload, "google/gemini-2.5-pro"); + + expect(payload.messages[0].content).toBe("You are a helpful assistant."); + }); + + it("leaves payload unchanged when no system message exists", () => { + const payload = { + messages: [{ role: "user", content: "Hello" }], + }; + + runDeepInfraPayload(payload, "anthropic/claude-opus-4-6"); + + expect(payload.messages[0].content).toBe("Hello"); + }); +}); diff --git a/src/agents/pi-embedded-runner/proxy-stream-wrappers.test.ts b/src/agents/pi-embedded-runner/proxy-stream-wrappers.test.ts index 487d90582ef..54a39a53aee 100644 --- a/src/agents/pi-embedded-runner/proxy-stream-wrappers.test.ts +++ b/src/agents/pi-embedded-runner/proxy-stream-wrappers.test.ts @@ -2,7 +2,7 @@ import type { StreamFn } from "@mariozechner/pi-agent-core"; import type { Context, Model } from "@mariozechner/pi-ai"; import { createAssistantMessageEventStream } from "@mariozechner/pi-ai"; import { describe, expect, it } from "vitest"; -import { createOpenRouterWrapper } from "./proxy-stream-wrappers.js"; +import { createDeepInfraWrapper, createOpenRouterWrapper } from "./proxy-stream-wrappers.js"; describe("proxy stream wrappers", () => { it("adds OpenRouter attribution headers to stream options", () => { @@ -35,4 +35,67 @@ describe("proxy stream wrappers", () => { }, ]); }); + + describe("createDeepInfraWrapper", () => { + function capturePayloads() { + const payloads: unknown[] = []; + const baseStreamFn: StreamFn = (_model, _context, options) => { + const payload = { model: "test" }; + options?.onPayload?.(payload, _model); + payloads.push(structuredClone(payload)); + return createAssistantMessageEventStream(); + }; + return { baseStreamFn, payloads }; + } + + const model = { + api: "openai-completions", + provider: "deepinfra", + id: "moonshotai/Kimi-K2.5", + } as Model<"openai-completions">; + const context: Context = { messages: [] }; + + it("injects reasoning effort when thinkingLevel is set", () => { + const { baseStreamFn, payloads } = capturePayloads(); + const wrapped = createDeepInfraWrapper(baseStreamFn, "high"); + void wrapped(model, context, {}); + + expect(payloads[0]).toEqual({ + model: "test", + reasoning: { effort: "high" }, + }); + }); + + it("maps 'off' to no reasoning field", () => { + const { baseStreamFn, payloads } = capturePayloads(); + const wrapped = createDeepInfraWrapper(baseStreamFn, "off"); + void wrapped(model, context, {}); + + expect(payloads[0]).toEqual({ model: "test" }); + }); + + it("does not inject reasoning when thinkingLevel is undefined", () => { + const { baseStreamFn, payloads } = capturePayloads(); + const wrapped = createDeepInfraWrapper(baseStreamFn, undefined); + void wrapped(model, context, {}); + + expect(payloads[0]).toEqual({ model: "test" }); + }); + + it("preserves existing onPayload callback", () => { + const { baseStreamFn } = capturePayloads(); + const wrapped = createDeepInfraWrapper(baseStreamFn, "low"); + const seen: unknown[] = []; + void wrapped(model, context, { + onPayload: (payload) => { + seen.push(structuredClone(payload)); + }, + }); + + expect(seen[0]).toEqual({ + model: "test", + reasoning: { effort: "low" }, + }); + }); + }); }); diff --git a/src/agents/pi-embedded-runner/proxy-stream-wrappers.ts b/src/agents/pi-embedded-runner/proxy-stream-wrappers.ts index cc5e7596050..70ea42bec41 100644 --- a/src/agents/pi-embedded-runner/proxy-stream-wrappers.ts +++ b/src/agents/pi-embedded-runner/proxy-stream-wrappers.ts @@ -15,6 +15,10 @@ function isOpenRouterAnthropicModel(provider: string, modelId: string): boolean return provider.toLowerCase() === "openrouter" && modelId.toLowerCase().startsWith("anthropic/"); } +function isDeepInfraAnthropicModel(provider: string, modelId: string): boolean { + return provider.toLowerCase() === "deepinfra" && modelId.toLowerCase().startsWith("anthropic/"); +} + function mapThinkingLevelToOpenRouterReasoningEffort( thinkingLevel: ThinkLevel, ): "none" | "minimal" | "low" | "medium" | "high" | "xhigh" { @@ -55,13 +59,16 @@ function normalizeProxyReasoningPayload(payload: unknown, thinkingLevel?: ThinkL } } -export function createOpenRouterSystemCacheWrapper(baseStreamFn: StreamFn | undefined): StreamFn { +export function createSystemCacheWrapper( + baseStreamFn: StreamFn | undefined, + isEligible: (provider: string, modelId: string) => boolean, +): StreamFn { const underlying = baseStreamFn ?? streamSimple; return (model, context, options) => { if ( typeof model.provider !== "string" || typeof model.id !== "string" || - !isOpenRouterAnthropicModel(model.provider, model.id) + !isEligible(model.provider, model.id) ) { return underlying(model, context, options); } @@ -94,6 +101,14 @@ export function createOpenRouterSystemCacheWrapper(baseStreamFn: StreamFn | unde }; } +export function createOpenRouterSystemCacheWrapper(baseStreamFn: StreamFn | undefined): StreamFn { + return createSystemCacheWrapper(baseStreamFn, isOpenRouterAnthropicModel); +} + +export function createDeepInfraSystemCacheWrapper(baseStreamFn: StreamFn | undefined): StreamFn { + return createSystemCacheWrapper(baseStreamFn, isDeepInfraAnthropicModel); +} + export function createOpenRouterWrapper( baseStreamFn: StreamFn | undefined, thinkingLevel?: ThinkLevel, @@ -120,6 +135,23 @@ export function isProxyReasoningUnsupported(modelId: string): boolean { return modelId.toLowerCase().startsWith("x-ai/"); } +export function createDeepInfraWrapper( + baseStreamFn: StreamFn | undefined, + thinkingLevel?: ThinkLevel, +): StreamFn { + const underlying = baseStreamFn ?? streamSimple; + return (model, context, options) => { + const onPayload = options?.onPayload; + return underlying(model, context, { + ...options, + onPayload: (payload) => { + normalizeProxyReasoningPayload(payload, thinkingLevel); + return onPayload?.(payload, model); + }, + }); + }; +} + export function createKilocodeWrapper( baseStreamFn: StreamFn | undefined, thinkingLevel?: ThinkLevel, diff --git a/src/agents/provider-capabilities.test.ts b/src/agents/provider-capabilities.test.ts index 09f19468776..1c03ea77210 100644 --- a/src/agents/provider-capabilities.test.ts +++ b/src/agents/provider-capabilities.test.ts @@ -17,6 +17,13 @@ const resolveProviderCapabilitiesWithPluginMock = vi.fn((params: { provider: str geminiThoughtSignatureSanitization: true, geminiThoughtSignatureModelHints: ["gemini"], }; + case "deepinfra": + return { + openAiCompatTurnValidation: false, + geminiThoughtSignatureSanitization: true, + geminiThoughtSignatureModelHints: ["gemini"], + dropThinkingBlockModelHints: ["claude"], + }; case "openai-codex": return { providerFamily: "openai", @@ -131,6 +138,12 @@ describe("resolveProviderCapabilities", () => { modelId: "gemini-2.0-flash", }), ).toBe(true); + expect( + shouldSanitizeGeminiThoughtSignaturesForModel({ + provider: "deepinfra", + modelId: "google/gemini-2.5-pro", + }), + ).toBe(true); expect( shouldSanitizeGeminiThoughtSignaturesForModel({ provider: "opencode-go", @@ -174,6 +187,12 @@ describe("resolveProviderCapabilities", () => { modelId: "claude-3.7-sonnet", }), ).toBe(true); + expect( + shouldDropThinkingBlocksForModel({ + provider: "deepinfra", + modelId: "anthropic/claude-3.5-sonnet", + }), + ).toBe(true); }); it("forwards config and workspace context to plugin capability lookup", () => { diff --git a/src/agents/transcript-policy.test.ts b/src/agents/transcript-policy.test.ts index 7409e7a4b12..3cd8aa82a05 100644 --- a/src/agents/transcript-policy.test.ts +++ b/src/agents/transcript-policy.test.ts @@ -158,6 +158,7 @@ describe("resolveTranscriptPolicy", () => { { provider: "openrouter", modelId: "google/gemini-2.5-pro-preview" }, { provider: "opencode", modelId: "google/gemini-2.5-flash" }, { provider: "kilocode", modelId: "gemini-2.0-flash" }, + { provider: "deepinfra", modelId: "google/gemini-2.5-pro" }, ])("sanitizes Gemini thought signatures for $provider routes", ({ provider, modelId }) => { const policy = resolveTranscriptPolicy({ provider, diff --git a/src/commands/onboard-auth.config-core.deepinfra.test.ts b/src/commands/onboard-auth.config-core.deepinfra.test.ts new file mode 100644 index 00000000000..4cf04c3cc48 --- /dev/null +++ b/src/commands/onboard-auth.config-core.deepinfra.test.ts @@ -0,0 +1,157 @@ +import { mkdtempSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { describe, expect, it } from "vitest"; +import { + applyDeepInfraProviderConfig, + applyDeepInfraConfig, +} from "../../extensions/deepinfra/onboard.js"; +import { resolveApiKeyForProvider, resolveEnvApiKey } from "../agents/model-auth.js"; +import type { OpenClawConfig } from "../config/config.js"; +import { resolveAgentModelPrimaryValue } from "../config/model-input.js"; +import { + DEEPINFRA_BASE_URL, + DEEPINFRA_DEFAULT_CONTEXT_WINDOW, + DEEPINFRA_DEFAULT_COST, + DEEPINFRA_DEFAULT_MODEL_ID, + DEEPINFRA_DEFAULT_MODEL_REF, + DEEPINFRA_DEFAULT_MAX_TOKENS, +} from "../providers/deepinfra-shared.js"; +import { captureEnv } from "../test-utils/env.js"; + +const emptyCfg: OpenClawConfig = {}; + +describe("DeepInfra provider config", () => { + describe("constants", () => { + it("DEEPINFRA_BASE_URL points to DeepInfra OpenAI-compatible endpoint", () => { + expect(DEEPINFRA_BASE_URL).toBe("https://api.deepinfra.com/v1/openai/"); + }); + + it("DEEPINFRA_DEFAULT_MODEL_REF includes provider prefix", () => { + expect(DEEPINFRA_DEFAULT_MODEL_REF).toBe(`deepinfra/${DEEPINFRA_DEFAULT_MODEL_ID}`); + }); + + it("DEEPINFRA_DEFAULT_MODEL_ID is openai/gpt-oss-120b", () => { + expect(DEEPINFRA_DEFAULT_MODEL_ID).toBe("openai/gpt-oss-120b"); + }); + + it("DEEPINFRA_DEFAULT_CONTEXT_WINDOW is 128000", () => { + expect(DEEPINFRA_DEFAULT_CONTEXT_WINDOW).toBe(128000); + }); + + it("DEEPINFRA_DEFAULT_MAX_TOKENS is 8192", () => { + expect(DEEPINFRA_DEFAULT_MAX_TOKENS).toBe(8192); + }); + + it("DEEPINFRA_DEFAULT_COST has zero values", () => { + expect(DEEPINFRA_DEFAULT_COST).toEqual({ + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }); + }); + }); + + describe("applyDeepInfraProviderConfig", () => { + it("does not persist a provider block (discovery populates models at runtime)", () => { + const result = applyDeepInfraProviderConfig(emptyCfg); + expect(result.models?.providers?.deepinfra).toBeUndefined(); + }); + + it("sets DeepInfra alias in agent default models", () => { + const result = applyDeepInfraProviderConfig(emptyCfg); + const agentModel = result.agents?.defaults?.models?.[DEEPINFRA_DEFAULT_MODEL_REF]; + expect(agentModel).toBeDefined(); + expect(agentModel?.alias).toBe("DeepInfra"); + }); + + it("preserves existing alias if already set", () => { + const cfg: OpenClawConfig = { + agents: { + defaults: { + models: { + [DEEPINFRA_DEFAULT_MODEL_REF]: { alias: "My Custom Alias" }, + }, + }, + }, + }; + const result = applyDeepInfraProviderConfig(cfg); + const agentModel = result.agents?.defaults?.models?.[DEEPINFRA_DEFAULT_MODEL_REF]; + expect(agentModel?.alias).toBe("My Custom Alias"); + }); + + it("does not change the default model selection", () => { + const cfg: OpenClawConfig = { + agents: { + defaults: { + model: { primary: "openai/gpt-5" }, + }, + }, + }; + const result = applyDeepInfraProviderConfig(cfg); + expect(resolveAgentModelPrimaryValue(result.agents?.defaults?.model)).toBe("openai/gpt-5"); + }); + }); + + describe("applyDeepInfraConfig", () => { + it("sets deepinfra's default model as the config's default model", () => { + const result = applyDeepInfraConfig(emptyCfg); + expect(resolveAgentModelPrimaryValue(result.agents?.defaults?.model)).toBe( + DEEPINFRA_DEFAULT_MODEL_REF, + ); + }); + + it("does not persist a provider block (discovery populates models at runtime)", () => { + const result = applyDeepInfraConfig(emptyCfg); + expect(result.models?.providers?.deepinfra).toBeUndefined(); + }); + }); + + describe("env var resolution", () => { + it("resolves DEEPINFRA_API_KEY from env", () => { + const envSnapshot = captureEnv(["DEEPINFRA_API_KEY"]); + process.env.DEEPINFRA_API_KEY = "test-deepinfra-key"; // pragma: allowlist secret + + try { + const result = resolveEnvApiKey("deepinfra"); + expect(result).not.toBeNull(); + expect(result?.apiKey).toBe("test-deepinfra-key"); + expect(result?.source).toContain("DEEPINFRA_API_KEY"); + } finally { + envSnapshot.restore(); + } + }); + + it("returns null when DEEPINFRA_API_KEY is not set", () => { + const envSnapshot = captureEnv(["DEEPINFRA_API_KEY"]); + delete process.env.DEEPINFRA_API_KEY; + + try { + const result = resolveEnvApiKey("deepinfra"); + expect(result).toBeNull(); + } finally { + envSnapshot.restore(); + } + }); + + it("resolves the deepinfra api key via resolveApiKeyForProvider", async () => { + const agentDir = mkdtempSync(join(tmpdir(), "openclaw-test-")); + const envSnapshot = captureEnv(["DEEPINFRA_API_KEY"]); + process.env.DEEPINFRA_API_KEY = "deepinfra-provider-test-key"; // pragma: allowlist secret + + try { + const auth = await resolveApiKeyForProvider({ + provider: "deepinfra", + agentDir, + }); + + expect(auth.apiKey).toBe("deepinfra-provider-test-key"); + expect(auth.mode).toBe("api-key"); + expect(auth.source).toContain("DEEPINFRA_API_KEY"); + } finally { + envSnapshot.restore(); + } + }); + }); +}); diff --git a/src/commands/onboard-types.ts b/src/commands/onboard-types.ts index 832fae75448..6161fcafb00 100644 --- a/src/commands/onboard-types.ts +++ b/src/commands/onboard-types.ts @@ -14,6 +14,7 @@ export type BuiltInAuthChoice = | "openai-api-key" | "openrouter-api-key" | "kilocode-api-key" + | "deepinfra-api-key" | "litellm-api-key" | "ai-gateway-api-key" | "cloudflare-ai-gateway-api-key" @@ -62,6 +63,7 @@ export type BuiltInAuthChoiceGroupId = | "copilot" | "openrouter" | "kilocode" + | "deepinfra" | "litellm" | "ai-gateway" | "cloudflare-ai-gateway" @@ -119,6 +121,7 @@ export type OnboardOptions = { mistralApiKey?: string; openrouterApiKey?: string; kilocodeApiKey?: string; + deepinfraApiKey?: string; litellmApiKey?: string; aiGatewayApiKey?: string; cloudflareAiGatewayAccountId?: string; diff --git a/src/config/io.ts b/src/config/io.ts index fba17f253aa..214ac6210ec 100644 --- a/src/config/io.ts +++ b/src/config/io.ts @@ -71,6 +71,7 @@ const SHELL_ENV_EXPECTED_KEYS = [ "MODELSTUDIO_API_KEY", "SYNTHETIC_API_KEY", "KILOCODE_API_KEY", + "DEEPINFRA_API_KEY", "ELEVENLABS_API_KEY", "TELEGRAM_BOT_TOKEN", "DISCORD_BOT_TOKEN", diff --git a/src/plugin-sdk/provider-models.ts b/src/plugin-sdk/provider-models.ts index e38c02138bb..72da6116ffb 100644 --- a/src/plugin-sdk/provider-models.ts +++ b/src/plugin-sdk/provider-models.ts @@ -107,6 +107,8 @@ export { discoverVercelAiGatewayModels, VERCEL_AI_GATEWAY_BASE_URL, } from "../agents/vercel-ai-gateway.js"; +export { DEEPINFRA_BASE_URL, DEEPINFRA_DEFAULT_MODEL_REF } from "../providers/deepinfra-shared.js"; +export { discoverDeepInfraModels } from "../agents/deepinfra-models.js"; export function buildKilocodeModelDefinition(): ModelDefinitionConfig { return { diff --git a/src/plugin-sdk/provider-stream.ts b/src/plugin-sdk/provider-stream.ts index ced7c2d9d4c..74f9484e8e9 100644 --- a/src/plugin-sdk/provider-stream.ts +++ b/src/plugin-sdk/provider-stream.ts @@ -9,9 +9,12 @@ export { sanitizeGoogleThinkingPayload, } from "../agents/pi-embedded-runner/google-stream-wrappers.js"; export { + createDeepInfraSystemCacheWrapper, + createDeepInfraWrapper, createKilocodeWrapper, createOpenRouterSystemCacheWrapper, createOpenRouterWrapper, + createSystemCacheWrapper, isProxyReasoningUnsupported, } from "../agents/pi-embedded-runner/proxy-stream-wrappers.js"; export { diff --git a/src/plugins/bundled-provider-auth-env-vars.generated.ts b/src/plugins/bundled-provider-auth-env-vars.generated.ts index 80ebcedc2b9..116f83e16a7 100644 --- a/src/plugins/bundled-provider-auth-env-vars.generated.ts +++ b/src/plugins/bundled-provider-auth-env-vars.generated.ts @@ -6,6 +6,7 @@ export const BUNDLED_PROVIDER_AUTH_ENV_VAR_CANDIDATES = { byteplus: ["BYTEPLUS_API_KEY"], chutes: ["CHUTES_API_KEY", "CHUTES_OAUTH_TOKEN"], "cloudflare-ai-gateway": ["CLOUDFLARE_AI_GATEWAY_API_KEY"], + deepinfra: ["DEEPINFRA_API_KEY"], fal: ["FAL_KEY"], firecrawl: ["FIRECRAWL_API_KEY"], "github-copilot": ["COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"], diff --git a/src/plugins/config-state.ts b/src/plugins/config-state.ts index 986f038e4cd..725cac63849 100644 --- a/src/plugins/config-state.ts +++ b/src/plugins/config-state.ts @@ -33,6 +33,7 @@ export const BUNDLED_ENABLED_BY_DEFAULT = new Set([ "anthropic", "byteplus", "cloudflare-ai-gateway", + "deepinfra", "device-pair", "github-copilot", "google", diff --git a/src/plugins/contracts/registry.ts b/src/plugins/contracts/registry.ts index 0a419efebe1..6e6a2c69494 100644 --- a/src/plugins/contracts/registry.ts +++ b/src/plugins/contracts/registry.ts @@ -4,6 +4,7 @@ import byteplusPlugin from "../../../extensions/byteplus/index.js"; import chutesPlugin from "../../../extensions/chutes/index.js"; import cloudflareAiGatewayPlugin from "../../../extensions/cloudflare-ai-gateway/index.js"; import copilotProxyPlugin from "../../../extensions/copilot-proxy/index.js"; +import deepinfraPlugin from "../../../extensions/deepinfra/index.js"; import elevenLabsPlugin from "../../../extensions/elevenlabs/index.js"; import falPlugin from "../../../extensions/fal/index.js"; import githubCopilotPlugin from "../../../extensions/github-copilot/index.js"; @@ -353,6 +354,7 @@ const bundledProviderPlugins = dedupePlugins([ chutesPlugin, cloudflareAiGatewayPlugin, copilotProxyPlugin, + deepinfraPlugin, githubCopilotPlugin, falPlugin, googlePlugin, diff --git a/src/plugins/provider-auth-storage.ts b/src/plugins/provider-auth-storage.ts index d8e15115902..97ac8ea466f 100644 --- a/src/plugins/provider-auth-storage.ts +++ b/src/plugins/provider-auth-storage.ts @@ -1,6 +1,7 @@ import { resolveOpenClawAgentDir } from "../agents/agent-paths.js"; import { upsertAuthProfile } from "../agents/auth-profiles.js"; import type { SecretInput } from "../config/types.secrets.js"; +import { DEEPINFRA_DEFAULT_MODEL_REF } from "../providers/deepinfra-shared.js"; import { KILOCODE_DEFAULT_MODEL_REF } from "../providers/kilocode-shared.js"; import { buildApiKeyCredential, @@ -12,6 +13,7 @@ import { const resolveAuthAgentDir = (agentDir?: string) => agentDir ?? resolveOpenClawAgentDir(); export { KILOCODE_DEFAULT_MODEL_REF }; +export { DEEPINFRA_DEFAULT_MODEL_REF }; export { buildApiKeyCredential, type ApiKeyStorageOptions, @@ -250,6 +252,20 @@ export async function setOpencodeGoApiKey( await setSharedOpencodeApiKey(key, agentDir, options); } +// TODO: use this to reduce the code duplication a bit. +function setApiKey( + providerId: string, + key: SecretInput, + agentDir?: string, + options?: ApiKeyStorageOptions, +) { + upsertAuthProfile({ + profileId: `${providerId}:default`, + credential: buildApiKeyCredential(providerId, key, undefined, options), + agentDir: resolveAuthAgentDir(agentDir), + }); +} + async function setSharedOpencodeApiKey( key: SecretInput, agentDir?: string, @@ -343,3 +359,11 @@ export async function setKilocodeApiKey( agentDir: resolveAuthAgentDir(agentDir), }); } + +export async function setDeepInfraApiKey( + key: SecretInput, + agentDir?: string, + options?: ApiKeyStorageOptions, +) { + setApiKey("deepinfra", key, agentDir, options); +} diff --git a/src/providers/deepinfra-shared.ts b/src/providers/deepinfra-shared.ts new file mode 100644 index 00000000000..10c99998812 --- /dev/null +++ b/src/providers/deepinfra-shared.ts @@ -0,0 +1,62 @@ +export const DEEPINFRA_BASE_URL = "https://api.deepinfra.com/v1/openai/"; +export const DEEPINFRA_DEFAULT_MODEL_ID = "openai/gpt-oss-120b"; +export const DEEPINFRA_DEFAULT_MODEL_REF = `deepinfra/${DEEPINFRA_DEFAULT_MODEL_ID}`; +export type DeepInfraModelCatalogEntry = { + id: string; + name: string; + reasoning: boolean; + input: Array<"text" | "image">; + contextWindow: number; + maxTokens: number; +}; + +const DEEPINFRA_DEFAULT_MODEL_NAME = "gpt-oss-120b"; + +/** + * Static fallback catalog used by the sync onboarding path and as a + * fallback when dynamic model discovery from the gateway API fails. + * The full model list is fetched dynamically by {@link discoverDeepInfraModels} + * in `src/agents/deepinfra-models.ts`. + */ +export const DEEPINFRA_MODEL_CATALOG: DeepInfraModelCatalogEntry[] = [ + { + id: DEEPINFRA_DEFAULT_MODEL_ID, + name: DEEPINFRA_DEFAULT_MODEL_NAME, + reasoning: true, + input: ["text"], + contextWindow: 131072, + maxTokens: 131072, + }, + { + id: "MiniMaxAI/MiniMax-M2.5", + name: "MiniMax M2.5", + reasoning: true, + input: ["text"], + contextWindow: 196608, + maxTokens: 196608, + }, + { + id: "zai-org/GLM-5", + name: "GLM 5", + reasoning: true, + input: ["text"], + contextWindow: 202752, + maxTokens: 202752, + }, + { + id: "moonshotai/Kimi-K2.5", + name: "Kimi K2.5", + reasoning: true, + input: ["text", "image"], + contextWindow: 262144, + maxTokens: 262144, + }, +]; +export const DEEPINFRA_DEFAULT_CONTEXT_WINDOW = 128000; +export const DEEPINFRA_DEFAULT_MAX_TOKENS = 8192; +export const DEEPINFRA_DEFAULT_COST = { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, +} as const;