diff --git a/src/config/types.agent-defaults.ts b/src/config/types.agent-defaults.ts index ecaaecb69b9..2573b1c1c40 100644 --- a/src/config/types.agent-defaults.ts +++ b/src/config/types.agent-defaults.ts @@ -262,6 +262,14 @@ export type AgentDefaultsConfig = { * per-heartbeat token cost by avoiding the full session transcript. */ isolatedSession?: boolean; + /** + * Maximum estimated cost (USD) for a single heartbeat run. If the + * estimated cost exceeds this value, the run is skipped before calling + * the model. Estimation uses approximate token count and model pricing. + * + * Default: undefined (no cap). + */ + maxCostPerRun?: number; /** * When enabled, deliver the model's reasoning payload for heartbeat runs (when available) * as a separate message prefixed with `Reasoning:` (same as `/reasoning on`). diff --git a/src/infra/heartbeat-runner.cost-cap.test.ts b/src/infra/heartbeat-runner.cost-cap.test.ts new file mode 100644 index 00000000000..53b74212fc8 --- /dev/null +++ b/src/infra/heartbeat-runner.cost-cap.test.ts @@ -0,0 +1,343 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { telegramPlugin } from "../../extensions/telegram/src/channel.js"; +import { setTelegramRuntime } from "../../extensions/telegram/src/runtime.js"; +import { whatsappPlugin } from "../../extensions/whatsapp/src/channel.js"; +import { setWhatsAppRuntime } from "../../extensions/whatsapp/src/runtime.js"; +import * as replyModule from "../auto-reply/reply.js"; +import type { OpenClawConfig } from "../config/config.js"; +import { resolveMainSessionKey } from "../config/sessions.js"; +import { setActivePluginRegistry } from "../plugins/runtime.js"; +import { createPluginRuntime } from "../plugins/runtime/index.js"; +import { createTestRegistry } from "../test-utils/channel-plugins.js"; +import { estimateRunCost, runHeartbeatOnce } from "./heartbeat-runner.js"; +import { seedSessionStore, withTempHeartbeatSandbox } from "./heartbeat-runner.test-utils.js"; + +// Avoid pulling optional runtime deps during isolated runs. +vi.mock("jiti", () => ({ createJiti: () => () => ({}) })); + +// --------------------------------------------------------------------------- +// Unit tests: estimateRunCost +// --------------------------------------------------------------------------- + +describe("estimateRunCost", () => { + it("estimates cost for a known model", () => { + // 4000 chars = ~1000 tokens. claude-opus-4 = $15/M input tokens = $0.015/1K + const cost = estimateRunCost("x".repeat(4000), "claude-opus-4-20260901"); + expect(cost).toBeCloseTo(0.015, 3); + }); + + it("estimates cost for a cheap model", () => { + // 4000 chars = ~1000 tokens. gemini-2.0-flash = $0.10/M = $0.0001/1K + const cost = estimateRunCost("x".repeat(4000), "gemini-2.0-flash"); + expect(cost).toBeCloseTo(0.0001, 5); + }); + + it("uses conservative fallback for unknown models", () => { + const cost = estimateRunCost("x".repeat(4000), "some-unknown-model-v9"); + expect(cost).toBeCloseTo(0.015, 3); + }); + + it("returns zero for empty prompt", () => { + const cost = estimateRunCost("", "claude-opus-4"); + expect(cost).toBe(0); + }); + + it("returns non-zero for single-char prompt", () => { + // ceil(1/4) = 1 token + const cost = estimateRunCost("a", "claude-opus-4"); + expect(cost).toBe(15 / 1_000_000); + }); + + it("handles large context (128K chars)", () => { + // 128000 chars = ~32000 tokens. claude-opus-4 = $15/M = $0.48 + const cost = estimateRunCost("x".repeat(128_000), "claude-opus-4"); + expect(cost).toBeCloseTo(0.48, 2); + }); + + it("is case-insensitive for model names", () => { + const lower = estimateRunCost("x".repeat(4000), "claude-opus-4"); + const upper = estimateRunCost("x".repeat(4000), "CLAUDE-OPUS-4"); + const mixed = estimateRunCost("x".repeat(4000), "Claude-Opus-4"); + expect(lower).toBe(upper); + expect(lower).toBe(mixed); + }); +}); + +describe("prefix matching ordering", () => { + it("gpt-4o matches gpt-4o pricing, not gpt-4", () => { + const cost4o = estimateRunCost("x".repeat(4000), "gpt-4o-2026-03-01"); + const cost4 = estimateRunCost("x".repeat(4000), "gpt-4-0613"); + expect(cost4o).toBeLessThan(cost4); + expect(cost4o).toBeCloseTo(0.0025, 4); + }); + + it("gpt-4-turbo matches gpt-4-turbo pricing, not gpt-4", () => { + const costTurbo = estimateRunCost("x".repeat(4000), "gpt-4-turbo-preview"); + const cost4 = estimateRunCost("x".repeat(4000), "gpt-4-0613"); + expect(costTurbo).toBeLessThan(cost4); + expect(costTurbo).toBeCloseTo(0.01, 4); + }); + + it("gpt-4 exact matches gpt-4 pricing", () => { + const cost = estimateRunCost("x".repeat(4000), "gpt-4-0613"); + expect(cost).toBeCloseTo(0.03, 4); + }); + + it("o1-mini matches o1-mini pricing, not o1", () => { + const costMini = estimateRunCost("x".repeat(4000), "o1-mini-2026-01-01"); + const costFull = estimateRunCost("x".repeat(4000), "o1-2026-01-01"); + expect(costMini).toBeLessThan(costFull); + expect(costMini).toBeCloseTo(0.003, 4); + }); + + it("o3-mini matches o3-mini pricing, not o3", () => { + const costMini = estimateRunCost("x".repeat(4000), "o3-mini"); + const costFull = estimateRunCost("x".repeat(4000), "o3-preview"); + expect(costMini).toBeLessThan(costFull); + expect(costMini).toBeCloseTo(0.0011, 4); + }); +}); + +describe("config catalog lookup", () => { + it("uses catalog pricing when model is defined in config", () => { + const cfg = { + models: { + providers: { + custom: { + models: [ + { + id: "my-cheap-model", + cost: { input: 0.5, output: 1, cacheRead: 0, cacheWrite: 0 }, + }, + ], + }, + }, + }, + } as unknown as import("../config/config.js").OpenClawConfig; + // 4000 chars = 1000 tokens. catalog says $0.50/M = $0.0005/1K + const cost = estimateRunCost("x".repeat(4000), "custom/my-cheap-model", cfg); + expect(cost).toBeCloseTo(0.0005, 5); + }); + + it("falls back to hardcoded table when model not in catalog", () => { + const cfg = { + models: { providers: {} }, + } as unknown as import("../config/config.js").OpenClawConfig; + const cost = estimateRunCost("x".repeat(4000), "claude-opus-4", cfg); + // Should still use hardcoded table: $15/M = $0.015/1K + expect(cost).toBeCloseTo(0.015, 3); + }); + + it("falls back to hardcoded table when cfg is undefined", () => { + const cost = estimateRunCost("x".repeat(4000), "claude-opus-4"); + expect(cost).toBeCloseTo(0.015, 3); + }); + + it("falls back to hardcoded table for bare model name without provider/", () => { + const cfg = { + models: { + providers: { + anthropic: { + models: [ + { + id: "claude-opus-4", + cost: { input: 15, output: 75, cacheRead: 0, cacheWrite: 0 }, + }, + ], + }, + }, + }, + } as unknown as import("../config/config.js").OpenClawConfig; + // Bare name "claude-opus-4" has no provider/ prefix, so parseModelRef + // returns provider="" which fails catalog lookup. Falls back to hardcoded. + const cost = estimateRunCost("x".repeat(4000), "claude-opus-4", cfg); + expect(cost).toBeCloseTo(0.015, 3); + }); + + it("uses zero cost.input from catalog for free models", () => { + const cfg = { + models: { + providers: { + custom: { + models: [ + { + id: "free-model", + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + }, + ], + }, + }, + }, + } as unknown as import("../config/config.js").OpenClawConfig; + const cost = estimateRunCost("x".repeat(4000), "custom/free-model", cfg); + expect(cost).toBe(0); // free model = $0 + }); + + it("uses zero cost.input from catalog (free/local model)", () => { + const cfg = { + models: { + providers: { + local: { + models: [ + { + id: "llama3", + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + }, + ], + }, + }, + }, + } as unknown as import("../config/config.js").OpenClawConfig; + const cost = estimateRunCost("x".repeat(4000), "local/llama3", cfg); + expect(cost).toBe(0); // free model = $0 + }); + + it("matches hardcoded table with provider-prefixed model name", () => { + // "openai/gpt-4o" should strip "openai/" and match "gpt-4o" in hardcoded table + const cost = estimateRunCost("x".repeat(4000), "openai/gpt-4o"); + expect(cost).toBeCloseTo(0.0025, 4); // gpt-4o = $2.5/M + }); + + it("matches hardcoded table with nested provider prefix", () => { + // "openrouter/anthropic/claude-opus-4" should strip to "claude-opus-4" + const cost = estimateRunCost("x".repeat(4000), "openrouter/anthropic/claude-opus-4"); + expect(cost).toBeCloseTo(0.015, 3); // claude-opus-4 = $15/M + }); + + it("ignores catalog entry with negative cost.input", () => { + const cfg = { + models: { + providers: { + custom: { + models: [ + { + id: "bad-model", + cost: { input: -5, output: 1, cacheRead: 0, cacheWrite: 0 }, + }, + ], + }, + }, + }, + } as unknown as import("../config/config.js").OpenClawConfig; + const cost = estimateRunCost("x".repeat(4000), "custom/bad-model", cfg); + expect(cost).toBeCloseTo(0.015, 3); // fallback + }); +}); + +// --------------------------------------------------------------------------- +// Integration tests: runHeartbeatOnce with maxCostPerRun +// --------------------------------------------------------------------------- + +beforeEach(() => { + const runtime = createPluginRuntime(); + setTelegramRuntime(runtime); + setWhatsAppRuntime(runtime); + setActivePluginRegistry( + createTestRegistry([ + { pluginId: "whatsapp", plugin: whatsappPlugin, source: "test" }, + { pluginId: "telegram", plugin: telegramPlugin, source: "test" }, + ]), + ); +}); + +afterEach(() => { + vi.restoreAllMocks(); +}); + +describe("runHeartbeatOnce – maxCostPerRun", () => { + async function runWithCostCap(params: { maxCostPerRun?: number; model?: string }) { + return withTempHeartbeatSandbox( + async ({ tmpDir, storePath, replySpy }) => { + const cfg: OpenClawConfig = { + agents: { + defaults: { + workspace: tmpDir, + heartbeat: { + every: "5m", + target: "whatsapp", + model: params.model ?? "claude-opus-4", + maxCostPerRun: params.maxCostPerRun, + }, + }, + }, + channels: { whatsapp: { allowFrom: ["*"] } }, + session: { store: storePath }, + }; + const sessionKey = resolveMainSessionKey(cfg); + await seedSessionStore(storePath, sessionKey, { + lastChannel: "whatsapp", + lastProvider: "whatsapp", + lastTo: "+1555", + }); + + replySpy.mockResolvedValue({ text: "HEARTBEAT_OK" }); + + const result = await runHeartbeatOnce({ + cfg, + deps: { getQueueSize: () => 0, nowMs: () => 0 }, + }); + + // Capture spy state before withTempHeartbeatSandbox restores it in finally. + const replyCallCount = replySpy.mock.calls.length; + return { result, replyCallCount }; + }, + { prefix: "openclaw-hb-costcap-" }, + ); + } + + it("skips run when estimated cost exceeds maxCostPerRun", async () => { + const { result, replyCallCount } = await runWithCostCap({ maxCostPerRun: 0.0000001 }); + expect(result).toEqual({ status: "skipped", reason: "cost-cap-exceeded" }); + expect(replyCallCount).toBe(0); + }); + + it("proceeds when estimated cost is within maxCostPerRun", async () => { + const { result, replyCallCount } = await runWithCostCap({ maxCostPerRun: 100 }); + expect(result).toEqual(expect.objectContaining({ status: "ran" })); + expect(replyCallCount).toBe(1); + }); + + it("proceeds when maxCostPerRun is not set", async () => { + const { result, replyCallCount } = await runWithCostCap({ maxCostPerRun: undefined }); + expect(result).toEqual(expect.objectContaining({ status: "ran" })); + expect(replyCallCount).toBe(1); + }); + + it("skips all runs when maxCostPerRun is 0", async () => { + const { result, replyCallCount } = await runWithCostCap({ maxCostPerRun: 0 }); + expect(result).toEqual({ status: "skipped", reason: "cost-cap-exceeded" }); + expect(replyCallCount).toBe(0); + }); + + it("ignores negative maxCostPerRun (no cap)", async () => { + const { result, replyCallCount } = await runWithCostCap({ maxCostPerRun: -1 }); + expect(result).toEqual(expect.objectContaining({ status: "ran" })); + expect(replyCallCount).toBe(1); + }); + + it("ignores NaN maxCostPerRun (no cap)", async () => { + const { result, replyCallCount } = await runWithCostCap({ maxCostPerRun: NaN }); + expect(result).toEqual(expect.objectContaining({ status: "ran" })); + expect(replyCallCount).toBe(1); + }); + + it("ignores Infinity maxCostPerRun (no cap)", async () => { + const { result, replyCallCount } = await runWithCostCap({ maxCostPerRun: Infinity }); + expect(result).toEqual(expect.objectContaining({ status: "ran" })); + expect(replyCallCount).toBe(1); + }); + + it("skips cheap (non-free) model when maxCostPerRun is 0", async () => { + // gemini-2.0-flash costs $0.10/M input — non-zero — so with maxCostPerRun = 0 + // the condition (estimatedCost > 0) is true and the run is skipped. + // Note: a *truly* free model (cost.input = 0 in catalog) would NOT be skipped + // because 0 > 0 === false. That scenario is covered by the unit tests above. + const { result, replyCallCount } = await runWithCostCap({ + maxCostPerRun: 0, + model: "gemini-2.0-flash", // cheapest in table, but still > $0 for non-empty prompt + }); + // gemini-2.0-flash with HEARTBEAT.md context will have cost > 0, so it gets skipped + expect(result).toEqual({ status: "skipped", reason: "cost-cap-exceeded" }); + expect(replyCallCount).toBe(0); + }); +}); diff --git a/src/infra/heartbeat-runner.ts b/src/infra/heartbeat-runner.ts index 5e6ddcf07cf..c821436a955 100644 --- a/src/infra/heartbeat-runner.ts +++ b/src/infra/heartbeat-runner.ts @@ -11,6 +11,7 @@ import { } from "../agents/agent-scope.js"; import { appendCronStyleCurrentTimeLine } from "../agents/current-time.js"; import { resolveEffectiveMessagesConfig } from "../agents/identity.js"; +import { parseModelRef } from "../agents/model-selection.js"; import { DEFAULT_HEARTBEAT_FILENAME } from "../agents/workspace.js"; import { resolveHeartbeatReplyPayload } from "../auto-reply/heartbeat-reply-payload.js"; import { @@ -48,6 +49,7 @@ import { } from "../routing/session-key.js"; import { defaultRuntime, type RuntimeEnv } from "../runtime.js"; import { escapeRegExp } from "../utils.js"; +import { resolveModelCostConfig } from "../utils/usage-format.js"; import { formatErrorMessage, hasErrnoCode } from "./errors.js"; import { isWithinActiveHours } from "./heartbeat-active-hours.js"; import { @@ -717,6 +719,32 @@ export async function runHeartbeatOnce(opts: { bootstrapContextMode, } : { isHeartbeat: true, suppressToolErrorWarnings, bootstrapContextMode }; + + // Pre-call cost cap: estimate the cost of this run and skip if it + // exceeds maxCostPerRun. Estimation is approximate (chars / 4 for + // token count, hardcoded pricing table with conservative fallback). + const maxCostPerRun = heartbeat?.maxCostPerRun; + if (typeof maxCostPerRun === "number" && Number.isFinite(maxCostPerRun) && maxCostPerRun >= 0) { + const estimatedCost = estimateRunCost( + ctx.Body, + heartbeatModelOverride ?? resolveDefaultModelId(cfg), + cfg, + ); + if (estimatedCost > maxCostPerRun) { + log.warn("heartbeat: skipping run, estimated cost exceeds maxCostPerRun", { + estimatedCost: estimatedCost.toFixed(4), + maxCostPerRun, + model: heartbeatModelOverride ?? "default", + }); + emitHeartbeatEvent({ + status: "skipped", + reason: "cost-cap-exceeded", + durationMs: Date.now() - startedAt, + }); + return { status: "skipped", reason: "cost-cap-exceeded" }; + } + } + const replyResult = await getReplyFromConfig(ctx, replyOpts, cfg); const replyPayload = resolveHeartbeatReplyPayload(replyResult); const includeReasoning = heartbeat?.includeReasoning === true; @@ -1180,3 +1208,97 @@ export function startHeartbeatRunner(opts: { return { stop: cleanup, updateConfig }; } + +// --------------------------------------------------------------------------- +// Cost estimation for maxCostPerRun +// --------------------------------------------------------------------------- + +/** Per-input-token pricing (USD) for known model families. Conservative. */ +const MODEL_INPUT_PRICING: Record = { + // Anthropic + "claude-opus-4": 15 / 1_000_000, + "claude-sonnet-4": 3 / 1_000_000, + "claude-haiku": 0.25 / 1_000_000, + // OpenAI + "gpt-4o": 2.5 / 1_000_000, + "gpt-4-turbo": 10 / 1_000_000, + "gpt-4": 30 / 1_000_000, + "gpt-3.5": 0.5 / 1_000_000, + "o1-mini": 3 / 1_000_000, + o1: 15 / 1_000_000, + "o3-mini": 1.1 / 1_000_000, + o3: 10 / 1_000_000, + // Google + "gemini-1.5-pro": 3.5 / 1_000_000, + "gemini-2.0-flash": 0.1 / 1_000_000, +}; + +/** Conservative fallback: assumes expensive model if unknown. */ +const FALLBACK_PRICE_PER_TOKEN = 15 / 1_000_000; + +/** + * Sorted prefixes (longest first) to guarantee that "gpt-4o" matches + * before "gpt-4" and "gpt-4-turbo" matches before "gpt-4". + */ +const SORTED_PRICING_ENTRIES = Object.entries(MODEL_INPUT_PRICING).sort( + ([a], [b]) => b.length - a.length, +); + +/** + * Try the config model catalog first (exact match via resolveModelCostConfig), + * then fall back to prefix matching against the hardcoded table. + */ +function resolveInputPricePerToken(modelId: string, cfg?: OpenClawConfig): number { + if (cfg) { + const ref = parseModelRef(modelId, ""); + if (ref) { + const costConfig = resolveModelCostConfig({ + provider: ref.provider, + model: ref.model, + config: cfg, + }); + if (costConfig && Number.isFinite(costConfig.input) && costConfig.input >= 0) { + return costConfig.input / 1_000_000; + } + } + } + // Strip provider prefix (e.g. "openai/gpt-4o" or "openrouter/anthropic/claude-opus-4" + // -> "gpt-4o" / "claude-opus-4") for hardcoded table match. + const slash = modelId.lastIndexOf("/"); + const bareModel = slash !== -1 ? modelId.slice(slash + 1) : modelId; + const lower = bareModel.toLowerCase(); + for (const [prefix, price] of SORTED_PRICING_ENTRIES) { + if (lower.startsWith(prefix)) { + return price; + } + } + return FALLBACK_PRICE_PER_TOKEN; +} + +/** Approximate token count from character length (chars / 4). */ +function estimateTokenCount(text: string): number { + return Math.ceil(text.length / 4); +} + +/** + * Estimate input cost (USD) for a single run. Checks the config model + * catalog first, falls back to the hardcoded pricing table. + */ +export function estimateRunCost(promptBody: string, modelId: string, cfg?: OpenClawConfig): number { + return estimateTokenCount(promptBody) * resolveInputPricePerToken(modelId, cfg); +} + +/** Resolve the default model id from config. */ +function resolveDefaultModelId(cfg: OpenClawConfig): string { + const primary = cfg.agents?.defaults?.model; + if (typeof primary === "string") { + return primary; + } + if (primary && typeof primary === "object" && "primary" in primary) { + const p = (primary as Record).primary; + if (typeof p === "string") { + return p; + } + } + return "unknown"; +}