From 35aafd7ca8131c760f7b21f4510051491397e0da Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Thu, 12 Mar 2026 23:38:48 +0000 Subject: [PATCH] feat: add Anthropic fast mode support --- CHANGELOG.md | 1 + docs/concepts/model-providers.md | 1 + docs/providers/anthropic.md | 28 ++++++ docs/tools/thinking.md | 2 + src/agents/fast-mode.ts | 8 ++ ...i-embedded-runner-extraparams.live.test.ts | 77 ++++++++++++++++ .../pi-embedded-runner-extraparams.test.ts | 88 ++++++++++++++++++- .../anthropic-stream-wrappers.ts | 59 +++++++++++++ src/agents/pi-embedded-runner/extra-params.ts | 8 ++ 9 files changed, 271 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ab0a4385a58..15dce06eec2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ Docs: https://docs.openclaw.ai - Control UI/dashboard-v2: refresh the gateway dashboard with modular overview, chat, config, agent, and session views, plus a command palette, mobile bottom tabs, and richer chat tools like slash commands, search, export, and pinned messages. (#41503) Thanks @BunsDev. - Models/plugins: move Ollama, vLLM, and SGLang onto the provider-plugin architecture, with provider-owned onboarding, discovery, model-picker setup, and post-selection hooks so core provider wiring is more modular. - OpenAI/GPT-5.4 fast mode: add configurable session-level fast toggles across `/fast`, TUI, Control UI, and ACP, with per-model config defaults and OpenAI/Codex request shaping. +- Anthropic/Claude fast mode: map the shared `/fast` toggle and `params.fastMode` to direct Anthropic API-key `service_tier` requests, with live verification for both Anthropic and OpenAI fast-mode tiers. ### Fixes diff --git a/docs/concepts/model-providers.md b/docs/concepts/model-providers.md index 3a081c29416..357ac82ec7a 100644 --- a/docs/concepts/model-providers.md +++ b/docs/concepts/model-providers.md @@ -62,6 +62,7 @@ OpenClaw ships with the pi‑ai catalog. These providers require **no** - Optional rotation: `ANTHROPIC_API_KEYS`, `ANTHROPIC_API_KEY_1`, `ANTHROPIC_API_KEY_2`, plus `OPENCLAW_LIVE_ANTHROPIC_KEY` (single override) - Example model: `anthropic/claude-opus-4-6` - CLI: `openclaw onboard --auth-choice token` (paste setup-token) or `openclaw models auth paste-token --provider anthropic` +- Direct API-key models support the shared `/fast` toggle and `params.fastMode`; OpenClaw maps that to Anthropic `service_tier` (`auto` vs `standard_only`) - Policy note: setup-token support is technical compatibility; Anthropic has blocked some subscription usage outside Claude Code in the past. Verify current Anthropic terms and decide based on your risk tolerance. - Recommendation: Anthropic API key auth is the safer, recommended path over subscription setup-token auth. diff --git a/docs/providers/anthropic.md b/docs/providers/anthropic.md index de974315273..8974bb2dd61 100644 --- a/docs/providers/anthropic.md +++ b/docs/providers/anthropic.md @@ -44,6 +44,34 @@ openclaw onboard --anthropic-api-key "$ANTHROPIC_API_KEY" - [Adaptive thinking](https://platform.claude.com/docs/en/build-with-claude/adaptive-thinking) - [Extended thinking](https://platform.claude.com/docs/en/build-with-claude/extended-thinking) +## Fast mode (Anthropic API) + +OpenClaw's shared `/fast` toggle also supports direct Anthropic API-key traffic. + +- `/fast on` maps to `service_tier: "auto"` +- `/fast off` maps to `service_tier: "standard_only"` +- Config default: + +```json5 +{ + agents: { + defaults: { + models: { + "anthropic/claude-sonnet-4-5": { + params: { fastMode: true }, + }, + }, + }, + }, +} +``` + +Important limits: + +- This is **API-key only**. Anthropic setup-token / OAuth auth does not honor OpenClaw fast-mode tier injection. +- OpenClaw only injects Anthropic service tiers for direct `api.anthropic.com` requests. If you route `anthropic/*` through a proxy or gateway, `/fast` leaves `service_tier` untouched. +- Anthropic reports the effective tier on the response under `usage.service_tier`. On accounts without Priority Tier capacity, `service_tier: "auto"` may still resolve to `standard`. + ## Prompt caching (Anthropic API) OpenClaw supports Anthropic's prompt caching feature. This is **API-only**; subscription auth does not honor cache settings. diff --git a/docs/tools/thinking.md b/docs/tools/thinking.md index 9fe989332f4..045911c92b2 100644 --- a/docs/tools/thinking.md +++ b/docs/tools/thinking.md @@ -54,6 +54,8 @@ title: "Thinking Levels" 4. Fallback: `off` - For `openai/*`, fast mode applies the OpenAI fast profile: `service_tier=priority` when supported, plus low reasoning effort and low text verbosity. - For `openai-codex/*`, fast mode applies the same low-latency profile on Codex Responses. OpenClaw keeps one shared `/fast` toggle across both auth paths. +- For direct `anthropic/*` API-key requests, fast mode maps to Anthropic service tiers: `/fast on` sets `service_tier=auto`, `/fast off` sets `service_tier=standard_only`. +- Anthropic fast mode is API-key only. OpenClaw skips Anthropic service-tier injection for Claude setup-token / OAuth auth and for non-Anthropic proxy base URLs. ## Verbose directives (/verbose or /v) diff --git a/src/agents/fast-mode.ts b/src/agents/fast-mode.ts index bae3d5d300a..3935eeae27b 100644 --- a/src/agents/fast-mode.ts +++ b/src/agents/fast-mode.ts @@ -7,6 +7,14 @@ export type FastModeState = { source: "session" | "config" | "default"; }; +export function resolveFastModeParam( + extraParams: Record | undefined, +): boolean | undefined { + return normalizeFastMode( + (extraParams?.fastMode ?? extraParams?.fast_mode) as string | boolean | null | undefined, + ); +} + function resolveConfiguredFastModeRaw(params: { cfg: OpenClawConfig | undefined; provider: string; diff --git a/src/agents/pi-embedded-runner-extraparams.live.test.ts b/src/agents/pi-embedded-runner-extraparams.live.test.ts index 4116476c71f..22ccccdcac6 100644 --- a/src/agents/pi-embedded-runner-extraparams.live.test.ts +++ b/src/agents/pi-embedded-runner-extraparams.live.test.ts @@ -6,12 +6,16 @@ import { isTruthyEnvValue } from "../infra/env.js"; import { applyExtraParamsToAgent } from "./pi-embedded-runner.js"; const OPENAI_KEY = process.env.OPENAI_API_KEY ?? ""; +const ANTHROPIC_KEY = process.env.ANTHROPIC_API_KEY ?? ""; const GEMINI_KEY = process.env.GEMINI_API_KEY ?? ""; const LIVE = isTruthyEnvValue(process.env.OPENAI_LIVE_TEST) || isTruthyEnvValue(process.env.LIVE); +const ANTHROPIC_LIVE = + isTruthyEnvValue(process.env.ANTHROPIC_LIVE_TEST) || isTruthyEnvValue(process.env.LIVE); const GEMINI_LIVE = isTruthyEnvValue(process.env.GEMINI_LIVE_TEST) || isTruthyEnvValue(process.env.LIVE); const describeLive = LIVE && OPENAI_KEY ? describe : describe.skip; +const describeAnthropicLive = ANTHROPIC_LIVE && ANTHROPIC_KEY ? describe : describe.skip; const describeGeminiLive = GEMINI_LIVE && GEMINI_KEY ? describe : describe.skip; describeLive("pi embedded extra params (live)", () => { @@ -65,6 +69,79 @@ describeLive("pi embedded extra params (live)", () => { // Should respect maxTokens from config (16) — allow a small buffer for provider rounding. expect(outputTokens ?? 0).toBeLessThanOrEqual(20); }, 30_000); + + it("verifies OpenAI fast-mode service_tier semantics against the live API", async () => { + const headers = { + "content-type": "application/json", + authorization: `Bearer ${OPENAI_KEY}`, + }; + + const runProbe = async (serviceTier: "default" | "priority") => { + const res = await fetch("https://api.openai.com/v1/responses", { + method: "POST", + headers, + body: JSON.stringify({ + model: "gpt-5.4", + input: "Reply with OK.", + max_output_tokens: 32, + service_tier: serviceTier, + }), + }); + const json = (await res.json()) as { + error?: { message?: string }; + service_tier?: string; + status?: string; + }; + expect(res.ok, json.error?.message ?? `HTTP ${res.status}`).toBe(true); + return json; + }; + + const standard = await runProbe("default"); + expect(standard.service_tier).toBe("default"); + expect(standard.status).toBe("completed"); + + const fast = await runProbe("priority"); + expect(fast.service_tier).toBe("priority"); + expect(fast.status).toBe("completed"); + }, 45_000); +}); + +describeAnthropicLive("pi embedded extra params (anthropic live)", () => { + it("verifies Anthropic fast-mode service_tier semantics against the live API", async () => { + const headers = { + "content-type": "application/json", + "x-api-key": ANTHROPIC_KEY, + "anthropic-version": "2023-06-01", + }; + + const runProbe = async (serviceTier: "auto" | "standard_only") => { + const res = await fetch("https://api.anthropic.com/v1/messages", { + method: "POST", + headers, + body: JSON.stringify({ + model: "claude-sonnet-4-5", + max_tokens: 32, + service_tier: serviceTier, + messages: [{ role: "user", content: "Reply with OK." }], + }), + }); + const json = (await res.json()) as { + error?: { message?: string }; + stop_reason?: string; + usage?: { service_tier?: string }; + }; + expect(res.ok, json.error?.message ?? `HTTP ${res.status}`).toBe(true); + return json; + }; + + const standard = await runProbe("standard_only"); + expect(standard.usage?.service_tier).toBe("standard"); + expect(standard.stop_reason).toBe("end_turn"); + + const fast = await runProbe("auto"); + expect(["standard", "priority"]).toContain(fast.usage?.service_tier); + expect(fast.stop_reason).toBe("end_turn"); + }, 45_000); }); describeGeminiLive("pi embedded extra params (gemini live)", () => { diff --git a/src/agents/pi-embedded-runner-extraparams.test.ts b/src/agents/pi-embedded-runner-extraparams.test.ts index 468d62f9911..7a29f30f9eb 100644 --- a/src/agents/pi-embedded-runner-extraparams.test.ts +++ b/src/agents/pi-embedded-runner-extraparams.test.ts @@ -201,7 +201,8 @@ describe("applyExtraParamsToAgent", () => { model: | Model<"openai-responses"> | Model<"openai-codex-responses"> - | Model<"openai-completions">; + | Model<"openai-completions"> + | Model<"anthropic-messages">; options?: SimpleStreamOptions; cfg?: Record; extraParamsOverride?: Record; @@ -1683,6 +1684,91 @@ describe("applyExtraParamsToAgent", () => { expect(payload.service_tier).toBe("default"); }); + it("injects service_tier=auto for Anthropic fast mode on direct API-key models", () => { + const payload = runResponsesPayloadMutationCase({ + applyProvider: "anthropic", + applyModelId: "claude-sonnet-4-5", + extraParamsOverride: { fastMode: true }, + model: { + api: "anthropic-messages", + provider: "anthropic", + id: "claude-sonnet-4-5", + baseUrl: "https://api.anthropic.com", + } as unknown as Model<"anthropic-messages">, + payload: {}, + }); + expect(payload.service_tier).toBe("auto"); + }); + + it("injects service_tier=standard_only for Anthropic fast mode off", () => { + const payload = runResponsesPayloadMutationCase({ + applyProvider: "anthropic", + applyModelId: "claude-sonnet-4-5", + extraParamsOverride: { fastMode: false }, + model: { + api: "anthropic-messages", + provider: "anthropic", + id: "claude-sonnet-4-5", + baseUrl: "https://api.anthropic.com", + } as unknown as Model<"anthropic-messages">, + payload: {}, + }); + expect(payload.service_tier).toBe("standard_only"); + }); + + it("preserves caller-provided Anthropic service_tier values", () => { + const payload = runResponsesPayloadMutationCase({ + applyProvider: "anthropic", + applyModelId: "claude-sonnet-4-5", + extraParamsOverride: { fastMode: true }, + model: { + api: "anthropic-messages", + provider: "anthropic", + id: "claude-sonnet-4-5", + baseUrl: "https://api.anthropic.com", + } as unknown as Model<"anthropic-messages">, + payload: { + service_tier: "standard_only", + }, + }); + expect(payload.service_tier).toBe("standard_only"); + }); + + it("does not inject Anthropic fast mode service_tier for OAuth auth", () => { + const payload = runResponsesPayloadMutationCase({ + applyProvider: "anthropic", + applyModelId: "claude-sonnet-4-5", + extraParamsOverride: { fastMode: true }, + model: { + api: "anthropic-messages", + provider: "anthropic", + id: "claude-sonnet-4-5", + baseUrl: "https://api.anthropic.com", + } as unknown as Model<"anthropic-messages">, + options: { + apiKey: "sk-ant-oat-test-token", + }, + payload: {}, + }); + expect(payload).not.toHaveProperty("service_tier"); + }); + + it("does not inject Anthropic fast mode service_tier for proxied base URLs", () => { + const payload = runResponsesPayloadMutationCase({ + applyProvider: "anthropic", + applyModelId: "claude-sonnet-4-5", + extraParamsOverride: { fastMode: true }, + model: { + api: "anthropic-messages", + provider: "anthropic", + id: "claude-sonnet-4-5", + baseUrl: "https://proxy.example.com/anthropic", + } as unknown as Model<"anthropic-messages">, + payload: {}, + }); + expect(payload).not.toHaveProperty("service_tier"); + }); + it("applies fast-mode defaults for openai-codex responses without service_tier", () => { const payload = runResponsesPayloadMutationCase({ applyProvider: "openai-codex", diff --git a/src/agents/pi-embedded-runner/anthropic-stream-wrappers.ts b/src/agents/pi-embedded-runner/anthropic-stream-wrappers.ts index df43d2570c7..efed941762d 100644 --- a/src/agents/pi-embedded-runner/anthropic-stream-wrappers.ts +++ b/src/agents/pi-embedded-runner/anthropic-stream-wrappers.ts @@ -1,5 +1,6 @@ import type { StreamFn } from "@mariozechner/pi-agent-core"; import { streamSimple } from "@mariozechner/pi-ai"; +import { resolveFastModeParam } from "../fast-mode.js"; import { requiresOpenAiCompatibleAnthropicToolPayload, usesOpenAiFunctionAnthropicToolSchema, @@ -18,6 +19,7 @@ const PI_AI_OAUTH_ANTHROPIC_BETAS = [ "oauth-2025-04-20", ...PI_AI_DEFAULT_ANTHROPIC_BETAS, ] as const; +type AnthropicServiceTier = "auto" | "standard_only"; type CacheRetention = "none" | "short" | "long"; @@ -53,6 +55,25 @@ function isAnthropicOAuthApiKey(apiKey: unknown): boolean { return typeof apiKey === "string" && apiKey.includes("sk-ant-oat"); } +function isAnthropicPublicApiBaseUrl(baseUrl: unknown): boolean { + if (baseUrl == null) { + return true; + } + if (typeof baseUrl !== "string" || !baseUrl.trim()) { + return true; + } + + try { + return new URL(baseUrl).hostname.toLowerCase() === "api.anthropic.com"; + } catch { + return baseUrl.toLowerCase().includes("api.anthropic.com"); + } +} + +function resolveAnthropicFastServiceTier(enabled: boolean): AnthropicServiceTier { + return enabled ? "auto" : "standard_only"; +} + function requiresAnthropicToolPayloadCompatibilityForModel(model: { api?: unknown; provider?: unknown; @@ -304,6 +325,44 @@ export function createAnthropicToolPayloadCompatibilityWrapper( }; } +export function createAnthropicFastModeWrapper( + baseStreamFn: StreamFn | undefined, + enabled: boolean, +): StreamFn { + const underlying = baseStreamFn ?? streamSimple; + const serviceTier = resolveAnthropicFastServiceTier(enabled); + return (model, context, options) => { + if ( + model.api !== "anthropic-messages" || + model.provider !== "anthropic" || + !isAnthropicPublicApiBaseUrl(model.baseUrl) || + isAnthropicOAuthApiKey(options?.apiKey) + ) { + return underlying(model, context, options); + } + + const originalOnPayload = options?.onPayload; + return underlying(model, context, { + ...options, + onPayload: (payload) => { + if (payload && typeof payload === "object") { + const payloadObj = payload as Record; + if (payloadObj.service_tier === undefined) { + payloadObj.service_tier = serviceTier; + } + } + return originalOnPayload?.(payload, model); + }, + }); + }; +} + +export function resolveAnthropicFastMode( + extraParams: Record | undefined, +): boolean | undefined { + return resolveFastModeParam(extraParams); +} + export function createBedrockNoCacheWrapper(baseStreamFn: StreamFn | undefined): StreamFn { const underlying = baseStreamFn ?? streamSimple; return (model, context, options) => diff --git a/src/agents/pi-embedded-runner/extra-params.ts b/src/agents/pi-embedded-runner/extra-params.ts index 230b6f1c853..a9d5085e013 100644 --- a/src/agents/pi-embedded-runner/extra-params.ts +++ b/src/agents/pi-embedded-runner/extra-params.ts @@ -5,9 +5,11 @@ import type { ThinkLevel } from "../../auto-reply/thinking.js"; import type { OpenClawConfig } from "../../config/config.js"; import { createAnthropicBetaHeadersWrapper, + createAnthropicFastModeWrapper, createAnthropicToolPayloadCompatibilityWrapper, createBedrockNoCacheWrapper, isAnthropicBedrockModel, + resolveAnthropicFastMode, resolveAnthropicBetas, resolveCacheRetention, } from "./anthropic-stream-wrappers.js"; @@ -439,6 +441,12 @@ export function applyExtraParamsToAgent( // upstream model-ID heuristics for Gemini 3.1 variants. agent.streamFn = createGoogleThinkingPayloadWrapper(agent.streamFn, thinkingLevel); + const anthropicFastMode = resolveAnthropicFastMode(merged); + if (anthropicFastMode !== undefined) { + log.debug(`applying Anthropic fast mode=${anthropicFastMode} for ${provider}/${modelId}`); + agent.streamFn = createAnthropicFastModeWrapper(agent.streamFn, anthropicFastMode); + } + const openAIFastMode = resolveOpenAIFastMode(merged); if (openAIFastMode) { log.debug(`applying OpenAI fast mode for ${provider}/${modelId}`);