feat: add Anthropic fast mode support
This commit is contained in:
parent
52e2a7747a
commit
35aafd7ca8
@ -11,6 +11,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Control UI/dashboard-v2: refresh the gateway dashboard with modular overview, chat, config, agent, and session views, plus a command palette, mobile bottom tabs, and richer chat tools like slash commands, search, export, and pinned messages. (#41503) Thanks @BunsDev.
|
||||
- Models/plugins: move Ollama, vLLM, and SGLang onto the provider-plugin architecture, with provider-owned onboarding, discovery, model-picker setup, and post-selection hooks so core provider wiring is more modular.
|
||||
- OpenAI/GPT-5.4 fast mode: add configurable session-level fast toggles across `/fast`, TUI, Control UI, and ACP, with per-model config defaults and OpenAI/Codex request shaping.
|
||||
- Anthropic/Claude fast mode: map the shared `/fast` toggle and `params.fastMode` to direct Anthropic API-key `service_tier` requests, with live verification for both Anthropic and OpenAI fast-mode tiers.
|
||||
|
||||
### Fixes
|
||||
|
||||
|
||||
@ -62,6 +62,7 @@ OpenClaw ships with the pi‑ai catalog. These providers require **no**
|
||||
- Optional rotation: `ANTHROPIC_API_KEYS`, `ANTHROPIC_API_KEY_1`, `ANTHROPIC_API_KEY_2`, plus `OPENCLAW_LIVE_ANTHROPIC_KEY` (single override)
|
||||
- Example model: `anthropic/claude-opus-4-6`
|
||||
- CLI: `openclaw onboard --auth-choice token` (paste setup-token) or `openclaw models auth paste-token --provider anthropic`
|
||||
- Direct API-key models support the shared `/fast` toggle and `params.fastMode`; OpenClaw maps that to Anthropic `service_tier` (`auto` vs `standard_only`)
|
||||
- Policy note: setup-token support is technical compatibility; Anthropic has blocked some subscription usage outside Claude Code in the past. Verify current Anthropic terms and decide based on your risk tolerance.
|
||||
- Recommendation: Anthropic API key auth is the safer, recommended path over subscription setup-token auth.
|
||||
|
||||
|
||||
@ -44,6 +44,34 @@ openclaw onboard --anthropic-api-key "$ANTHROPIC_API_KEY"
|
||||
- [Adaptive thinking](https://platform.claude.com/docs/en/build-with-claude/adaptive-thinking)
|
||||
- [Extended thinking](https://platform.claude.com/docs/en/build-with-claude/extended-thinking)
|
||||
|
||||
## Fast mode (Anthropic API)
|
||||
|
||||
OpenClaw's shared `/fast` toggle also supports direct Anthropic API-key traffic.
|
||||
|
||||
- `/fast on` maps to `service_tier: "auto"`
|
||||
- `/fast off` maps to `service_tier: "standard_only"`
|
||||
- Config default:
|
||||
|
||||
```json5
|
||||
{
|
||||
agents: {
|
||||
defaults: {
|
||||
models: {
|
||||
"anthropic/claude-sonnet-4-5": {
|
||||
params: { fastMode: true },
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
Important limits:
|
||||
|
||||
- This is **API-key only**. Anthropic setup-token / OAuth auth does not honor OpenClaw fast-mode tier injection.
|
||||
- OpenClaw only injects Anthropic service tiers for direct `api.anthropic.com` requests. If you route `anthropic/*` through a proxy or gateway, `/fast` leaves `service_tier` untouched.
|
||||
- Anthropic reports the effective tier on the response under `usage.service_tier`. On accounts without Priority Tier capacity, `service_tier: "auto"` may still resolve to `standard`.
|
||||
|
||||
## Prompt caching (Anthropic API)
|
||||
|
||||
OpenClaw supports Anthropic's prompt caching feature. This is **API-only**; subscription auth does not honor cache settings.
|
||||
|
||||
@ -54,6 +54,8 @@ title: "Thinking Levels"
|
||||
4. Fallback: `off`
|
||||
- For `openai/*`, fast mode applies the OpenAI fast profile: `service_tier=priority` when supported, plus low reasoning effort and low text verbosity.
|
||||
- For `openai-codex/*`, fast mode applies the same low-latency profile on Codex Responses. OpenClaw keeps one shared `/fast` toggle across both auth paths.
|
||||
- For direct `anthropic/*` API-key requests, fast mode maps to Anthropic service tiers: `/fast on` sets `service_tier=auto`, `/fast off` sets `service_tier=standard_only`.
|
||||
- Anthropic fast mode is API-key only. OpenClaw skips Anthropic service-tier injection for Claude setup-token / OAuth auth and for non-Anthropic proxy base URLs.
|
||||
|
||||
## Verbose directives (/verbose or /v)
|
||||
|
||||
|
||||
@ -7,6 +7,14 @@ export type FastModeState = {
|
||||
source: "session" | "config" | "default";
|
||||
};
|
||||
|
||||
export function resolveFastModeParam(
|
||||
extraParams: Record<string, unknown> | undefined,
|
||||
): boolean | undefined {
|
||||
return normalizeFastMode(
|
||||
(extraParams?.fastMode ?? extraParams?.fast_mode) as string | boolean | null | undefined,
|
||||
);
|
||||
}
|
||||
|
||||
function resolveConfiguredFastModeRaw(params: {
|
||||
cfg: OpenClawConfig | undefined;
|
||||
provider: string;
|
||||
|
||||
@ -6,12 +6,16 @@ import { isTruthyEnvValue } from "../infra/env.js";
|
||||
import { applyExtraParamsToAgent } from "./pi-embedded-runner.js";
|
||||
|
||||
const OPENAI_KEY = process.env.OPENAI_API_KEY ?? "";
|
||||
const ANTHROPIC_KEY = process.env.ANTHROPIC_API_KEY ?? "";
|
||||
const GEMINI_KEY = process.env.GEMINI_API_KEY ?? "";
|
||||
const LIVE = isTruthyEnvValue(process.env.OPENAI_LIVE_TEST) || isTruthyEnvValue(process.env.LIVE);
|
||||
const ANTHROPIC_LIVE =
|
||||
isTruthyEnvValue(process.env.ANTHROPIC_LIVE_TEST) || isTruthyEnvValue(process.env.LIVE);
|
||||
const GEMINI_LIVE =
|
||||
isTruthyEnvValue(process.env.GEMINI_LIVE_TEST) || isTruthyEnvValue(process.env.LIVE);
|
||||
|
||||
const describeLive = LIVE && OPENAI_KEY ? describe : describe.skip;
|
||||
const describeAnthropicLive = ANTHROPIC_LIVE && ANTHROPIC_KEY ? describe : describe.skip;
|
||||
const describeGeminiLive = GEMINI_LIVE && GEMINI_KEY ? describe : describe.skip;
|
||||
|
||||
describeLive("pi embedded extra params (live)", () => {
|
||||
@ -65,6 +69,79 @@ describeLive("pi embedded extra params (live)", () => {
|
||||
// Should respect maxTokens from config (16) — allow a small buffer for provider rounding.
|
||||
expect(outputTokens ?? 0).toBeLessThanOrEqual(20);
|
||||
}, 30_000);
|
||||
|
||||
it("verifies OpenAI fast-mode service_tier semantics against the live API", async () => {
|
||||
const headers = {
|
||||
"content-type": "application/json",
|
||||
authorization: `Bearer ${OPENAI_KEY}`,
|
||||
};
|
||||
|
||||
const runProbe = async (serviceTier: "default" | "priority") => {
|
||||
const res = await fetch("https://api.openai.com/v1/responses", {
|
||||
method: "POST",
|
||||
headers,
|
||||
body: JSON.stringify({
|
||||
model: "gpt-5.4",
|
||||
input: "Reply with OK.",
|
||||
max_output_tokens: 32,
|
||||
service_tier: serviceTier,
|
||||
}),
|
||||
});
|
||||
const json = (await res.json()) as {
|
||||
error?: { message?: string };
|
||||
service_tier?: string;
|
||||
status?: string;
|
||||
};
|
||||
expect(res.ok, json.error?.message ?? `HTTP ${res.status}`).toBe(true);
|
||||
return json;
|
||||
};
|
||||
|
||||
const standard = await runProbe("default");
|
||||
expect(standard.service_tier).toBe("default");
|
||||
expect(standard.status).toBe("completed");
|
||||
|
||||
const fast = await runProbe("priority");
|
||||
expect(fast.service_tier).toBe("priority");
|
||||
expect(fast.status).toBe("completed");
|
||||
}, 45_000);
|
||||
});
|
||||
|
||||
describeAnthropicLive("pi embedded extra params (anthropic live)", () => {
|
||||
it("verifies Anthropic fast-mode service_tier semantics against the live API", async () => {
|
||||
const headers = {
|
||||
"content-type": "application/json",
|
||||
"x-api-key": ANTHROPIC_KEY,
|
||||
"anthropic-version": "2023-06-01",
|
||||
};
|
||||
|
||||
const runProbe = async (serviceTier: "auto" | "standard_only") => {
|
||||
const res = await fetch("https://api.anthropic.com/v1/messages", {
|
||||
method: "POST",
|
||||
headers,
|
||||
body: JSON.stringify({
|
||||
model: "claude-sonnet-4-5",
|
||||
max_tokens: 32,
|
||||
service_tier: serviceTier,
|
||||
messages: [{ role: "user", content: "Reply with OK." }],
|
||||
}),
|
||||
});
|
||||
const json = (await res.json()) as {
|
||||
error?: { message?: string };
|
||||
stop_reason?: string;
|
||||
usage?: { service_tier?: string };
|
||||
};
|
||||
expect(res.ok, json.error?.message ?? `HTTP ${res.status}`).toBe(true);
|
||||
return json;
|
||||
};
|
||||
|
||||
const standard = await runProbe("standard_only");
|
||||
expect(standard.usage?.service_tier).toBe("standard");
|
||||
expect(standard.stop_reason).toBe("end_turn");
|
||||
|
||||
const fast = await runProbe("auto");
|
||||
expect(["standard", "priority"]).toContain(fast.usage?.service_tier);
|
||||
expect(fast.stop_reason).toBe("end_turn");
|
||||
}, 45_000);
|
||||
});
|
||||
|
||||
describeGeminiLive("pi embedded extra params (gemini live)", () => {
|
||||
|
||||
@ -201,7 +201,8 @@ describe("applyExtraParamsToAgent", () => {
|
||||
model:
|
||||
| Model<"openai-responses">
|
||||
| Model<"openai-codex-responses">
|
||||
| Model<"openai-completions">;
|
||||
| Model<"openai-completions">
|
||||
| Model<"anthropic-messages">;
|
||||
options?: SimpleStreamOptions;
|
||||
cfg?: Record<string, unknown>;
|
||||
extraParamsOverride?: Record<string, unknown>;
|
||||
@ -1683,6 +1684,91 @@ describe("applyExtraParamsToAgent", () => {
|
||||
expect(payload.service_tier).toBe("default");
|
||||
});
|
||||
|
||||
it("injects service_tier=auto for Anthropic fast mode on direct API-key models", () => {
|
||||
const payload = runResponsesPayloadMutationCase({
|
||||
applyProvider: "anthropic",
|
||||
applyModelId: "claude-sonnet-4-5",
|
||||
extraParamsOverride: { fastMode: true },
|
||||
model: {
|
||||
api: "anthropic-messages",
|
||||
provider: "anthropic",
|
||||
id: "claude-sonnet-4-5",
|
||||
baseUrl: "https://api.anthropic.com",
|
||||
} as unknown as Model<"anthropic-messages">,
|
||||
payload: {},
|
||||
});
|
||||
expect(payload.service_tier).toBe("auto");
|
||||
});
|
||||
|
||||
it("injects service_tier=standard_only for Anthropic fast mode off", () => {
|
||||
const payload = runResponsesPayloadMutationCase({
|
||||
applyProvider: "anthropic",
|
||||
applyModelId: "claude-sonnet-4-5",
|
||||
extraParamsOverride: { fastMode: false },
|
||||
model: {
|
||||
api: "anthropic-messages",
|
||||
provider: "anthropic",
|
||||
id: "claude-sonnet-4-5",
|
||||
baseUrl: "https://api.anthropic.com",
|
||||
} as unknown as Model<"anthropic-messages">,
|
||||
payload: {},
|
||||
});
|
||||
expect(payload.service_tier).toBe("standard_only");
|
||||
});
|
||||
|
||||
it("preserves caller-provided Anthropic service_tier values", () => {
|
||||
const payload = runResponsesPayloadMutationCase({
|
||||
applyProvider: "anthropic",
|
||||
applyModelId: "claude-sonnet-4-5",
|
||||
extraParamsOverride: { fastMode: true },
|
||||
model: {
|
||||
api: "anthropic-messages",
|
||||
provider: "anthropic",
|
||||
id: "claude-sonnet-4-5",
|
||||
baseUrl: "https://api.anthropic.com",
|
||||
} as unknown as Model<"anthropic-messages">,
|
||||
payload: {
|
||||
service_tier: "standard_only",
|
||||
},
|
||||
});
|
||||
expect(payload.service_tier).toBe("standard_only");
|
||||
});
|
||||
|
||||
it("does not inject Anthropic fast mode service_tier for OAuth auth", () => {
|
||||
const payload = runResponsesPayloadMutationCase({
|
||||
applyProvider: "anthropic",
|
||||
applyModelId: "claude-sonnet-4-5",
|
||||
extraParamsOverride: { fastMode: true },
|
||||
model: {
|
||||
api: "anthropic-messages",
|
||||
provider: "anthropic",
|
||||
id: "claude-sonnet-4-5",
|
||||
baseUrl: "https://api.anthropic.com",
|
||||
} as unknown as Model<"anthropic-messages">,
|
||||
options: {
|
||||
apiKey: "sk-ant-oat-test-token",
|
||||
},
|
||||
payload: {},
|
||||
});
|
||||
expect(payload).not.toHaveProperty("service_tier");
|
||||
});
|
||||
|
||||
it("does not inject Anthropic fast mode service_tier for proxied base URLs", () => {
|
||||
const payload = runResponsesPayloadMutationCase({
|
||||
applyProvider: "anthropic",
|
||||
applyModelId: "claude-sonnet-4-5",
|
||||
extraParamsOverride: { fastMode: true },
|
||||
model: {
|
||||
api: "anthropic-messages",
|
||||
provider: "anthropic",
|
||||
id: "claude-sonnet-4-5",
|
||||
baseUrl: "https://proxy.example.com/anthropic",
|
||||
} as unknown as Model<"anthropic-messages">,
|
||||
payload: {},
|
||||
});
|
||||
expect(payload).not.toHaveProperty("service_tier");
|
||||
});
|
||||
|
||||
it("applies fast-mode defaults for openai-codex responses without service_tier", () => {
|
||||
const payload = runResponsesPayloadMutationCase({
|
||||
applyProvider: "openai-codex",
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
import type { StreamFn } from "@mariozechner/pi-agent-core";
|
||||
import { streamSimple } from "@mariozechner/pi-ai";
|
||||
import { resolveFastModeParam } from "../fast-mode.js";
|
||||
import {
|
||||
requiresOpenAiCompatibleAnthropicToolPayload,
|
||||
usesOpenAiFunctionAnthropicToolSchema,
|
||||
@ -18,6 +19,7 @@ const PI_AI_OAUTH_ANTHROPIC_BETAS = [
|
||||
"oauth-2025-04-20",
|
||||
...PI_AI_DEFAULT_ANTHROPIC_BETAS,
|
||||
] as const;
|
||||
type AnthropicServiceTier = "auto" | "standard_only";
|
||||
|
||||
type CacheRetention = "none" | "short" | "long";
|
||||
|
||||
@ -53,6 +55,25 @@ function isAnthropicOAuthApiKey(apiKey: unknown): boolean {
|
||||
return typeof apiKey === "string" && apiKey.includes("sk-ant-oat");
|
||||
}
|
||||
|
||||
function isAnthropicPublicApiBaseUrl(baseUrl: unknown): boolean {
|
||||
if (baseUrl == null) {
|
||||
return true;
|
||||
}
|
||||
if (typeof baseUrl !== "string" || !baseUrl.trim()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
try {
|
||||
return new URL(baseUrl).hostname.toLowerCase() === "api.anthropic.com";
|
||||
} catch {
|
||||
return baseUrl.toLowerCase().includes("api.anthropic.com");
|
||||
}
|
||||
}
|
||||
|
||||
function resolveAnthropicFastServiceTier(enabled: boolean): AnthropicServiceTier {
|
||||
return enabled ? "auto" : "standard_only";
|
||||
}
|
||||
|
||||
function requiresAnthropicToolPayloadCompatibilityForModel(model: {
|
||||
api?: unknown;
|
||||
provider?: unknown;
|
||||
@ -304,6 +325,44 @@ export function createAnthropicToolPayloadCompatibilityWrapper(
|
||||
};
|
||||
}
|
||||
|
||||
export function createAnthropicFastModeWrapper(
|
||||
baseStreamFn: StreamFn | undefined,
|
||||
enabled: boolean,
|
||||
): StreamFn {
|
||||
const underlying = baseStreamFn ?? streamSimple;
|
||||
const serviceTier = resolveAnthropicFastServiceTier(enabled);
|
||||
return (model, context, options) => {
|
||||
if (
|
||||
model.api !== "anthropic-messages" ||
|
||||
model.provider !== "anthropic" ||
|
||||
!isAnthropicPublicApiBaseUrl(model.baseUrl) ||
|
||||
isAnthropicOAuthApiKey(options?.apiKey)
|
||||
) {
|
||||
return underlying(model, context, options);
|
||||
}
|
||||
|
||||
const originalOnPayload = options?.onPayload;
|
||||
return underlying(model, context, {
|
||||
...options,
|
||||
onPayload: (payload) => {
|
||||
if (payload && typeof payload === "object") {
|
||||
const payloadObj = payload as Record<string, unknown>;
|
||||
if (payloadObj.service_tier === undefined) {
|
||||
payloadObj.service_tier = serviceTier;
|
||||
}
|
||||
}
|
||||
return originalOnPayload?.(payload, model);
|
||||
},
|
||||
});
|
||||
};
|
||||
}
|
||||
|
||||
export function resolveAnthropicFastMode(
|
||||
extraParams: Record<string, unknown> | undefined,
|
||||
): boolean | undefined {
|
||||
return resolveFastModeParam(extraParams);
|
||||
}
|
||||
|
||||
export function createBedrockNoCacheWrapper(baseStreamFn: StreamFn | undefined): StreamFn {
|
||||
const underlying = baseStreamFn ?? streamSimple;
|
||||
return (model, context, options) =>
|
||||
|
||||
@ -5,9 +5,11 @@ import type { ThinkLevel } from "../../auto-reply/thinking.js";
|
||||
import type { OpenClawConfig } from "../../config/config.js";
|
||||
import {
|
||||
createAnthropicBetaHeadersWrapper,
|
||||
createAnthropicFastModeWrapper,
|
||||
createAnthropicToolPayloadCompatibilityWrapper,
|
||||
createBedrockNoCacheWrapper,
|
||||
isAnthropicBedrockModel,
|
||||
resolveAnthropicFastMode,
|
||||
resolveAnthropicBetas,
|
||||
resolveCacheRetention,
|
||||
} from "./anthropic-stream-wrappers.js";
|
||||
@ -439,6 +441,12 @@ export function applyExtraParamsToAgent(
|
||||
// upstream model-ID heuristics for Gemini 3.1 variants.
|
||||
agent.streamFn = createGoogleThinkingPayloadWrapper(agent.streamFn, thinkingLevel);
|
||||
|
||||
const anthropicFastMode = resolveAnthropicFastMode(merged);
|
||||
if (anthropicFastMode !== undefined) {
|
||||
log.debug(`applying Anthropic fast mode=${anthropicFastMode} for ${provider}/${modelId}`);
|
||||
agent.streamFn = createAnthropicFastModeWrapper(agent.streamFn, anthropicFastMode);
|
||||
}
|
||||
|
||||
const openAIFastMode = resolveOpenAIFastMode(merged);
|
||||
if (openAIFastMode) {
|
||||
log.debug(`applying OpenAI fast mode for ${provider}/${modelId}`);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user