diff --git a/.github/labeler.yml b/.github/labeler.yml index 67a74985465..dccc4db9c5b 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -325,3 +325,7 @@ - changed-files: - any-glob-to-any-file: - "extensions/fal/**" +"extensions: deepinfra": + - changed-files: + - any-glob-to-any-file: + - "extensions/deepinfra/**" diff --git a/extensions/deepinfra/index.ts b/extensions/deepinfra/index.ts index 08e0aa13b05..e0f19efe415 100644 --- a/extensions/deepinfra/index.ts +++ b/extensions/deepinfra/index.ts @@ -1,6 +1,7 @@ import { definePluginEntry } from "openclaw/plugin-sdk/core"; import { createProviderApiKeyAuthMethod } from "openclaw/plugin-sdk/provider-auth"; import { buildSingleProviderApiKeyCatalog } from "openclaw/plugin-sdk/provider-catalog"; +import { createDeepInfraSystemCacheWrapper } from "openclaw/plugin-sdk/provider-stream"; import { applyDeepInfraConfig, DEEPINFRA_DEFAULT_MODEL_REF } from "./onboard.js"; import { buildDeepInfraProviderWithDiscovery } from "./provider-catalog.js"; @@ -65,6 +66,7 @@ export default definePluginEntry({ geminiThoughtSignatureModelHints: ["gemini"], dropThinkingBlockModelHints: ["claude"], }, + wrapStreamFn: (ctx) => createDeepInfraSystemCacheWrapper(ctx.streamFn), isCacheTtlEligible: (ctx) => isDeepInfraCacheTtlModel(ctx.modelId), }); }, diff --git a/src/agents/pi-embedded-runner/extra-params.deepinfra-cache-control.test.ts b/src/agents/pi-embedded-runner/extra-params.deepinfra-cache-control.test.ts new file mode 100644 index 00000000000..789af3df4b4 --- /dev/null +++ b/src/agents/pi-embedded-runner/extra-params.deepinfra-cache-control.test.ts @@ -0,0 +1,92 @@ +import type { Model } from "@mariozechner/pi-ai"; +import { describe, expect, it } from "vitest"; +import { runExtraParamsCase } from "./extra-params.test-support.js"; + +type StreamPayload = { + messages: Array<{ + role: string; + content: unknown; + }>; +}; + +function runDeepInfraPayload(payload: StreamPayload, modelId: string) { + runExtraParamsCase({ + cfg: { + plugins: { + entries: { + deepinfra: { + enabled: true, + }, + }, + }, + }, + model: { + api: "openai-completions", + provider: "deepinfra", + id: modelId, + } as Model<"openai-completions">, + payload, + }); +} + +describe("extra-params: DeepInfra Anthropic cache_control", () => { + it("injects cache_control into system message for DeepInfra Anthropic models", () => { + const payload = { + messages: [ + { role: "system", content: "You are a helpful assistant." }, + { role: "user", content: "Hello" }, + ], + }; + + runDeepInfraPayload(payload, "anthropic/claude-opus-4-6"); + + expect(payload.messages[0].content).toEqual([ + { type: "text", text: "You are a helpful assistant.", cache_control: { type: "ephemeral" } }, + ]); + expect(payload.messages[1].content).toBe("Hello"); + }); + + it("adds cache_control to last content block when system message is already array", () => { + const payload = { + messages: [ + { + role: "system", + content: [ + { type: "text", text: "Part 1" }, + { type: "text", text: "Part 2" }, + ], + }, + ], + }; + + runDeepInfraPayload(payload, "anthropic/claude-opus-4-6"); + + const content = payload.messages[0].content as Array>; + expect(content[0]).toEqual({ type: "text", text: "Part 1" }); + expect(content[1]).toEqual({ + type: "text", + text: "Part 2", + cache_control: { type: "ephemeral" }, + }); + }); + + it("does not inject cache_control for DeepInfra non-Anthropic models", () => { + const payload = { + messages: [{ role: "system", content: "You are a helpful assistant." }], + }; + + runDeepInfraPayload(payload, "google/gemini-2.5-pro"); + + expect(payload.messages[0].content).toBe("You are a helpful assistant."); + }); + + it("leaves payload unchanged when no system message exists", () => { + const payload = { + messages: [{ role: "user", content: "Hello" }], + }; + + runDeepInfraPayload(payload, "anthropic/claude-opus-4-6"); + + expect(payload.messages[0].content).toBe("Hello"); + }); +}); diff --git a/src/agents/pi-embedded-runner/proxy-stream-wrappers.ts b/src/agents/pi-embedded-runner/proxy-stream-wrappers.ts index cc5e7596050..50568ba6efd 100644 --- a/src/agents/pi-embedded-runner/proxy-stream-wrappers.ts +++ b/src/agents/pi-embedded-runner/proxy-stream-wrappers.ts @@ -15,6 +15,10 @@ function isOpenRouterAnthropicModel(provider: string, modelId: string): boolean return provider.toLowerCase() === "openrouter" && modelId.toLowerCase().startsWith("anthropic/"); } +function isDeepInfraAnthropicModel(provider: string, modelId: string): boolean { + return provider.toLowerCase() === "deepinfra" && modelId.toLowerCase().startsWith("anthropic/"); +} + function mapThinkingLevelToOpenRouterReasoningEffort( thinkingLevel: ThinkLevel, ): "none" | "minimal" | "low" | "medium" | "high" | "xhigh" { @@ -55,13 +59,16 @@ function normalizeProxyReasoningPayload(payload: unknown, thinkingLevel?: ThinkL } } -export function createOpenRouterSystemCacheWrapper(baseStreamFn: StreamFn | undefined): StreamFn { +export function createSystemCacheWrapper( + baseStreamFn: StreamFn | undefined, + isEligible: (provider: string, modelId: string) => boolean, +): StreamFn { const underlying = baseStreamFn ?? streamSimple; return (model, context, options) => { if ( typeof model.provider !== "string" || typeof model.id !== "string" || - !isOpenRouterAnthropicModel(model.provider, model.id) + !isEligible(model.provider, model.id) ) { return underlying(model, context, options); } @@ -94,6 +101,14 @@ export function createOpenRouterSystemCacheWrapper(baseStreamFn: StreamFn | unde }; } +export function createOpenRouterSystemCacheWrapper(baseStreamFn: StreamFn | undefined): StreamFn { + return createSystemCacheWrapper(baseStreamFn, isOpenRouterAnthropicModel); +} + +export function createDeepInfraSystemCacheWrapper(baseStreamFn: StreamFn | undefined): StreamFn { + return createSystemCacheWrapper(baseStreamFn, isDeepInfraAnthropicModel); +} + export function createOpenRouterWrapper( baseStreamFn: StreamFn | undefined, thinkingLevel?: ThinkLevel, diff --git a/src/plugin-sdk/provider-stream.ts b/src/plugin-sdk/provider-stream.ts index ced7c2d9d4c..68df86e5116 100644 --- a/src/plugin-sdk/provider-stream.ts +++ b/src/plugin-sdk/provider-stream.ts @@ -9,9 +9,11 @@ export { sanitizeGoogleThinkingPayload, } from "../agents/pi-embedded-runner/google-stream-wrappers.js"; export { + createDeepInfraSystemCacheWrapper, createKilocodeWrapper, createOpenRouterSystemCacheWrapper, createOpenRouterWrapper, + createSystemCacheWrapper, isProxyReasoningUnsupported, } from "../agents/pi-embedded-runner/proxy-stream-wrappers.js"; export {