* fix(cache): inject cache_control into system prompt for OpenRouter Anthropic
Add onPayload wrapper that injects cache_control: { type: "ephemeral" }
into the system/developer message content for OpenRouter requests routed
to Anthropic models. The system prompt is typically ~18k tokens and was
being re-processed on every request without caching.
Fixes #15151
* Changelog: add OpenRouter note for #17473
---------
Co-authored-by: Vincent Koc <vincentkoc@ieee.org>
This commit is contained in:
parent
66529c7aa5
commit
c52b2ad5c3
@ -28,6 +28,8 @@ Docs: https://docs.openclaw.ai
|
|||||||
|
|
||||||
### Fixes
|
### Fixes
|
||||||
|
|
||||||
|
- OpenRouter/Anthropic: inject `cache_control` on system prompts for OpenRouter Anthropic models to improve prompt-cache reuse. (#17473) Thanks @rrenamed.
|
||||||
|
|
||||||
- Providers/OpenRouter: allow pass-through OpenRouter and Opencode model IDs in live model filtering so custom routed model IDs are treated as modern refs. (#14312) Thanks @Joly0.
|
- Providers/OpenRouter: allow pass-through OpenRouter and Opencode model IDs in live model filtering so custom routed model IDs are treated as modern refs. (#14312) Thanks @Joly0.
|
||||||
- Providers/OpenRouter: default reasoning to enabled when the selected model advertises `reasoning: true` and no session/directive override is set. (#22513) Thanks @zwffff.
|
- Providers/OpenRouter: default reasoning to enabled when the selected model advertises `reasoning: true` and no session/directive override is set. (#22513) Thanks @zwffff.
|
||||||
- Providers/OpenRouter: map `/think` levels to `reasoning.effort` in embedded runs while preserving explicit `reasoning.max_tokens` payloads. (#17236) Thanks @robbyczgw-cla.
|
- Providers/OpenRouter: map `/think` levels to `reasoning.effort` in embedded runs while preserving explicit `reasoning.max_tokens` payloads. (#17236) Thanks @robbyczgw-cla.
|
||||||
|
|||||||
@ -0,0 +1,99 @@
|
|||||||
|
import type { StreamFn } from "@mariozechner/pi-agent-core";
|
||||||
|
import type { Context, Model } from "@mariozechner/pi-ai";
|
||||||
|
import { AssistantMessageEventStream } from "@mariozechner/pi-ai";
|
||||||
|
import { describe, expect, it } from "vitest";
|
||||||
|
import { applyExtraParamsToAgent } from "./extra-params.js";
|
||||||
|
|
||||||
|
describe("extra-params: OpenRouter Anthropic cache_control", () => {
|
||||||
|
it("injects cache_control into system message for OpenRouter Anthropic models", () => {
|
||||||
|
const payload = {
|
||||||
|
messages: [
|
||||||
|
{ role: "system", content: "You are a helpful assistant." },
|
||||||
|
{ role: "user", content: "Hello" },
|
||||||
|
],
|
||||||
|
};
|
||||||
|
const baseStreamFn: StreamFn = (_model, _context, options) => {
|
||||||
|
options?.onPayload?.(payload);
|
||||||
|
return new AssistantMessageEventStream();
|
||||||
|
};
|
||||||
|
const agent = { streamFn: baseStreamFn };
|
||||||
|
|
||||||
|
applyExtraParamsToAgent(agent, undefined, "openrouter", "anthropic/claude-opus-4-6");
|
||||||
|
|
||||||
|
const model = {
|
||||||
|
api: "openai-completions",
|
||||||
|
provider: "openrouter",
|
||||||
|
id: "anthropic/claude-opus-4-6",
|
||||||
|
} as Model<"openai-completions">;
|
||||||
|
const context: Context = { messages: [] };
|
||||||
|
|
||||||
|
void agent.streamFn?.(model, context, {});
|
||||||
|
|
||||||
|
expect(payload.messages[0].content).toEqual([
|
||||||
|
{ type: "text", text: "You are a helpful assistant.", cache_control: { type: "ephemeral" } },
|
||||||
|
]);
|
||||||
|
expect(payload.messages[1].content).toBe("Hello");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("adds cache_control to last content block when system message is already array", () => {
|
||||||
|
const payload = {
|
||||||
|
messages: [
|
||||||
|
{
|
||||||
|
role: "system",
|
||||||
|
content: [
|
||||||
|
{ type: "text", text: "Part 1" },
|
||||||
|
{ type: "text", text: "Part 2" },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
};
|
||||||
|
const baseStreamFn: StreamFn = (_model, _context, options) => {
|
||||||
|
options?.onPayload?.(payload);
|
||||||
|
return new AssistantMessageEventStream();
|
||||||
|
};
|
||||||
|
const agent = { streamFn: baseStreamFn };
|
||||||
|
|
||||||
|
applyExtraParamsToAgent(agent, undefined, "openrouter", "anthropic/claude-opus-4-6");
|
||||||
|
|
||||||
|
const model = {
|
||||||
|
api: "openai-completions",
|
||||||
|
provider: "openrouter",
|
||||||
|
id: "anthropic/claude-opus-4-6",
|
||||||
|
} as Model<"openai-completions">;
|
||||||
|
const context: Context = { messages: [] };
|
||||||
|
|
||||||
|
void agent.streamFn?.(model, context, {});
|
||||||
|
|
||||||
|
const content = payload.messages[0].content as Array<Record<string, unknown>>;
|
||||||
|
expect(content[0]).toEqual({ type: "text", text: "Part 1" });
|
||||||
|
expect(content[1]).toEqual({
|
||||||
|
type: "text",
|
||||||
|
text: "Part 2",
|
||||||
|
cache_control: { type: "ephemeral" },
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it("does not inject cache_control for OpenRouter non-Anthropic models", () => {
|
||||||
|
const payload = {
|
||||||
|
messages: [{ role: "system", content: "You are a helpful assistant." }],
|
||||||
|
};
|
||||||
|
const baseStreamFn: StreamFn = (_model, _context, options) => {
|
||||||
|
options?.onPayload?.(payload);
|
||||||
|
return new AssistantMessageEventStream();
|
||||||
|
};
|
||||||
|
const agent = { streamFn: baseStreamFn };
|
||||||
|
|
||||||
|
applyExtraParamsToAgent(agent, undefined, "openrouter", "google/gemini-3-pro");
|
||||||
|
|
||||||
|
const model = {
|
||||||
|
api: "openai-completions",
|
||||||
|
provider: "openrouter",
|
||||||
|
id: "google/gemini-3-pro",
|
||||||
|
} as Model<"openai-completions">;
|
||||||
|
const context: Context = { messages: [] };
|
||||||
|
|
||||||
|
void agent.streamFn?.(model, context, {});
|
||||||
|
|
||||||
|
expect(payload.messages[0].content).toBe("You are a helpful assistant.");
|
||||||
|
});
|
||||||
|
});
|
||||||
@ -290,6 +290,59 @@ function createAnthropicBetaHeadersWrapper(
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function isOpenRouterAnthropicModel(provider: string, modelId: string): boolean {
|
||||||
|
return provider.toLowerCase() === "openrouter" && modelId.toLowerCase().startsWith("anthropic/");
|
||||||
|
}
|
||||||
|
|
||||||
|
type PayloadMessage = {
|
||||||
|
role?: string;
|
||||||
|
content?: unknown;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Inject cache_control into the system message for OpenRouter Anthropic models.
|
||||||
|
* OpenRouter passes through Anthropic's cache_control field — caching the system
|
||||||
|
* prompt avoids re-processing it on every request.
|
||||||
|
*/
|
||||||
|
function createOpenRouterSystemCacheWrapper(baseStreamFn: StreamFn | undefined): StreamFn {
|
||||||
|
const underlying = baseStreamFn ?? streamSimple;
|
||||||
|
return (model, context, options) => {
|
||||||
|
if (
|
||||||
|
typeof model.provider !== "string" ||
|
||||||
|
typeof model.id !== "string" ||
|
||||||
|
!isOpenRouterAnthropicModel(model.provider, model.id)
|
||||||
|
) {
|
||||||
|
return underlying(model, context, options);
|
||||||
|
}
|
||||||
|
|
||||||
|
const originalOnPayload = options?.onPayload;
|
||||||
|
return underlying(model, context, {
|
||||||
|
...options,
|
||||||
|
onPayload: (payload) => {
|
||||||
|
const messages = (payload as Record<string, unknown>)?.messages;
|
||||||
|
if (Array.isArray(messages)) {
|
||||||
|
for (const msg of messages as PayloadMessage[]) {
|
||||||
|
if (msg.role !== "system" && msg.role !== "developer") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (typeof msg.content === "string") {
|
||||||
|
msg.content = [
|
||||||
|
{ type: "text", text: msg.content, cache_control: { type: "ephemeral" } },
|
||||||
|
];
|
||||||
|
} else if (Array.isArray(msg.content) && msg.content.length > 0) {
|
||||||
|
const last = msg.content[msg.content.length - 1];
|
||||||
|
if (last && typeof last === "object") {
|
||||||
|
(last as Record<string, unknown>).cache_control = { type: "ephemeral" };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
originalOnPayload?.(payload);
|
||||||
|
},
|
||||||
|
});
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Map OpenClaw's ThinkLevel to OpenRouter's reasoning.effort values.
|
* Map OpenClaw's ThinkLevel to OpenRouter's reasoning.effort values.
|
||||||
* "off" maps to "none"; all other levels pass through as-is.
|
* "off" maps to "none"; all other levels pass through as-is.
|
||||||
@ -426,6 +479,7 @@ export function applyExtraParamsToAgent(
|
|||||||
if (provider === "openrouter") {
|
if (provider === "openrouter") {
|
||||||
log.debug(`applying OpenRouter app attribution headers for ${provider}/${modelId}`);
|
log.debug(`applying OpenRouter app attribution headers for ${provider}/${modelId}`);
|
||||||
agent.streamFn = createOpenRouterWrapper(agent.streamFn, thinkingLevel);
|
agent.streamFn = createOpenRouterWrapper(agent.streamFn, thinkingLevel);
|
||||||
|
agent.streamFn = createOpenRouterSystemCacheWrapper(agent.streamFn);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Enable Z.AI tool_stream for real-time tool call streaming.
|
// Enable Z.AI tool_stream for real-time tool call streaming.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user