feat(heartbeat): add maxCostPerRun to cap embedded run cost

Adds a pre-call cost check in runHeartbeatOnce(). Estimated cost is
computed from context token count and model pricing before dispatching
to getReplyFromConfig(). If the estimate exceeds maxCostPerRun, the
run is skipped without invoking the model.

Fixes #49823
Ref #3181
This commit is contained in:
amabito 2026-03-18 23:23:05 +09:00
parent 4a44ca8f79
commit 907ff4ac8f
3 changed files with 313 additions and 0 deletions

View File

@ -262,6 +262,14 @@ export type AgentDefaultsConfig = {
* per-heartbeat token cost by avoiding the full session transcript.
*/
isolatedSession?: boolean;
/**
* Maximum estimated cost (USD) for a single heartbeat run. If the
* estimated cost exceeds this value, the run is skipped before calling
* the model. Estimation uses approximate token count and model pricing.
*
* Default: undefined (no cap).
*/
maxCostPerRun?: number;
/**
* When enabled, deliver the model's reasoning payload for heartbeat runs (when available)
* as a separate message prefixed with `Reasoning:` (same as `/reasoning on`).

View File

@ -0,0 +1,206 @@
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import { telegramPlugin } from "../../extensions/telegram/src/channel.js";
import { setTelegramRuntime } from "../../extensions/telegram/src/runtime.js";
import { whatsappPlugin } from "../../extensions/whatsapp/src/channel.js";
import { setWhatsAppRuntime } from "../../extensions/whatsapp/src/runtime.js";
import * as replyModule from "../auto-reply/reply.js";
import type { OpenClawConfig } from "../config/config.js";
import { resolveMainSessionKey } from "../config/sessions.js";
import { setActivePluginRegistry } from "../plugins/runtime.js";
import { createPluginRuntime } from "../plugins/runtime/index.js";
import { createTestRegistry } from "../test-utils/channel-plugins.js";
import { estimateRunCost, runHeartbeatOnce } from "./heartbeat-runner.js";
import { seedSessionStore, withTempHeartbeatSandbox } from "./heartbeat-runner.test-utils.js";
// Avoid pulling optional runtime deps during isolated runs.
vi.mock("jiti", () => ({ createJiti: () => () => ({}) }));
// ---------------------------------------------------------------------------
// Unit tests: estimateRunCost
// ---------------------------------------------------------------------------
describe("estimateRunCost", () => {
it("estimates cost for a known model", () => {
// 4000 chars = ~1000 tokens. claude-opus-4 = $15/M input tokens = $0.015/1K
const cost = estimateRunCost("x".repeat(4000), "claude-opus-4-20260901");
expect(cost).toBeCloseTo(0.015, 3);
});
it("estimates cost for a cheap model", () => {
// 4000 chars = ~1000 tokens. gemini-2.0-flash = $0.10/M = $0.0001/1K
const cost = estimateRunCost("x".repeat(4000), "gemini-2.0-flash");
expect(cost).toBeCloseTo(0.0001, 5);
});
it("uses conservative fallback for unknown models", () => {
const cost = estimateRunCost("x".repeat(4000), "some-unknown-model-v9");
expect(cost).toBeCloseTo(0.015, 3);
});
it("returns zero for empty prompt", () => {
const cost = estimateRunCost("", "claude-opus-4");
expect(cost).toBe(0);
});
it("returns non-zero for single-char prompt", () => {
// ceil(1/4) = 1 token
const cost = estimateRunCost("a", "claude-opus-4");
expect(cost).toBe(15 / 1_000_000);
});
it("handles large context (128K chars)", () => {
// 128000 chars = ~32000 tokens. claude-opus-4 = $15/M = $0.48
const cost = estimateRunCost("x".repeat(128_000), "claude-opus-4");
expect(cost).toBeCloseTo(0.48, 2);
});
it("is case-insensitive for model names", () => {
const lower = estimateRunCost("x".repeat(4000), "claude-opus-4");
const upper = estimateRunCost("x".repeat(4000), "CLAUDE-OPUS-4");
const mixed = estimateRunCost("x".repeat(4000), "Claude-Opus-4");
expect(lower).toBe(upper);
expect(lower).toBe(mixed);
});
});
describe("prefix matching ordering", () => {
it("gpt-4o matches gpt-4o pricing, not gpt-4", () => {
const cost4o = estimateRunCost("x".repeat(4000), "gpt-4o-2026-03-01");
const cost4 = estimateRunCost("x".repeat(4000), "gpt-4-0613");
expect(cost4o).toBeLessThan(cost4);
expect(cost4o).toBeCloseTo(0.0025, 4);
});
it("gpt-4-turbo matches gpt-4-turbo pricing, not gpt-4", () => {
const costTurbo = estimateRunCost("x".repeat(4000), "gpt-4-turbo-preview");
const cost4 = estimateRunCost("x".repeat(4000), "gpt-4-0613");
expect(costTurbo).toBeLessThan(cost4);
expect(costTurbo).toBeCloseTo(0.01, 4);
});
it("gpt-4 exact matches gpt-4 pricing", () => {
const cost = estimateRunCost("x".repeat(4000), "gpt-4-0613");
expect(cost).toBeCloseTo(0.03, 4);
});
it("o1-mini matches o1-mini pricing, not o1", () => {
const costMini = estimateRunCost("x".repeat(4000), "o1-mini-2026-01-01");
const costFull = estimateRunCost("x".repeat(4000), "o1-2026-01-01");
expect(costMini).toBeLessThan(costFull);
expect(costMini).toBeCloseTo(0.003, 4);
});
it("o3-mini matches o3-mini pricing, not o3", () => {
const costMini = estimateRunCost("x".repeat(4000), "o3-mini");
const costFull = estimateRunCost("x".repeat(4000), "o3-preview");
expect(costMini).toBeLessThan(costFull);
expect(costMini).toBeCloseTo(0.0011, 4);
});
});
// ---------------------------------------------------------------------------
// Integration tests: runHeartbeatOnce with maxCostPerRun
// ---------------------------------------------------------------------------
beforeEach(() => {
const runtime = createPluginRuntime();
setTelegramRuntime(runtime);
setWhatsAppRuntime(runtime);
setActivePluginRegistry(
createTestRegistry([
{ pluginId: "whatsapp", plugin: whatsappPlugin, source: "test" },
{ pluginId: "telegram", plugin: telegramPlugin, source: "test" },
]),
);
});
afterEach(() => {
vi.restoreAllMocks();
});
describe("runHeartbeatOnce maxCostPerRun", () => {
async function runWithCostCap(params: {
maxCostPerRun?: number;
model?: string;
}) {
return withTempHeartbeatSandbox(
async ({ tmpDir, storePath, replySpy }) => {
const cfg: OpenClawConfig = {
agents: {
defaults: {
workspace: tmpDir,
heartbeat: {
every: "5m",
target: "whatsapp",
model: params.model ?? "claude-opus-4",
maxCostPerRun: params.maxCostPerRun,
},
},
},
channels: { whatsapp: { allowFrom: ["*"] } },
session: { store: storePath },
};
const sessionKey = resolveMainSessionKey(cfg);
await seedSessionStore(storePath, sessionKey, {
lastChannel: "whatsapp",
lastProvider: "whatsapp",
lastTo: "+1555",
});
replySpy.mockResolvedValue({ text: "HEARTBEAT_OK" });
const result = await runHeartbeatOnce({
cfg,
deps: { getQueueSize: () => 0, nowMs: () => 0 },
});
// Capture spy state before withTempHeartbeatSandbox restores it in finally.
const replyCallCount = replySpy.mock.calls.length;
return { result, replyCallCount };
},
{ prefix: "openclaw-hb-costcap-" },
);
}
it("skips run when estimated cost exceeds maxCostPerRun", async () => {
const { result, replyCallCount } = await runWithCostCap({ maxCostPerRun: 0.0000001 });
expect(result).toEqual({ status: "skipped", reason: "cost-cap-exceeded" });
expect(replyCallCount).toBe(0);
});
it("proceeds when estimated cost is within maxCostPerRun", async () => {
const { result, replyCallCount } = await runWithCostCap({ maxCostPerRun: 100 });
expect(result).toEqual(expect.objectContaining({ status: "ran" }));
expect(replyCallCount).toBe(1);
});
it("proceeds when maxCostPerRun is not set", async () => {
const { result, replyCallCount } = await runWithCostCap({ maxCostPerRun: undefined });
expect(result).toEqual(expect.objectContaining({ status: "ran" }));
expect(replyCallCount).toBe(1);
});
it("skips all runs when maxCostPerRun is 0", async () => {
const { result, replyCallCount } = await runWithCostCap({ maxCostPerRun: 0 });
expect(result).toEqual({ status: "skipped", reason: "cost-cap-exceeded" });
expect(replyCallCount).toBe(0);
});
it("ignores negative maxCostPerRun (no cap)", async () => {
const { result, replyCallCount } = await runWithCostCap({ maxCostPerRun: -1 });
expect(result).toEqual(expect.objectContaining({ status: "ran" }));
expect(replyCallCount).toBe(1);
});
it("ignores NaN maxCostPerRun (no cap)", async () => {
const { result, replyCallCount } = await runWithCostCap({ maxCostPerRun: NaN });
expect(result).toEqual(expect.objectContaining({ status: "ran" }));
expect(replyCallCount).toBe(1);
});
it("ignores Infinity maxCostPerRun (no cap)", async () => {
const { result, replyCallCount } = await runWithCostCap({ maxCostPerRun: Infinity });
expect(result).toEqual(expect.objectContaining({ status: "ran" }));
expect(replyCallCount).toBe(1);
});
});

View File

@ -713,6 +713,31 @@ export async function runHeartbeatOnce(opts: {
bootstrapContextMode,
}
: { isHeartbeat: true, suppressToolErrorWarnings, bootstrapContextMode };
// Pre-call cost cap: estimate the cost of this run and skip if it
// exceeds maxCostPerRun. Estimation is approximate (chars / 4 for
// token count, hardcoded pricing table with conservative fallback).
const maxCostPerRun = heartbeat?.maxCostPerRun;
if (typeof maxCostPerRun === "number" && Number.isFinite(maxCostPerRun) && maxCostPerRun >= 0) {
const estimatedCost = estimateRunCost(
ctx.Body,
heartbeatModelOverride ?? resolveDefaultModelId(cfg),
);
if (estimatedCost > maxCostPerRun) {
log.warn("heartbeat: skipping run, estimated cost exceeds maxCostPerRun", {
estimatedCost: estimatedCost.toFixed(4),
maxCostPerRun,
model: heartbeatModelOverride ?? "default",
});
emitHeartbeatEvent({
status: "skipped",
reason: "cost-cap-exceeded",
durationMs: Date.now() - startedAt,
});
return { status: "skipped", reason: "cost-cap-exceeded" };
}
}
const replyResult = await getReplyFromConfig(ctx, replyOpts, cfg);
const replyPayload = resolveHeartbeatReplyPayload(replyResult);
const includeReasoning = heartbeat?.includeReasoning === true;
@ -1180,3 +1205,77 @@ export function startHeartbeatRunner(opts: {
return { stop: cleanup, updateConfig };
}
// ---------------------------------------------------------------------------
// Cost estimation for maxCostPerRun
// ---------------------------------------------------------------------------
/** Per-input-token pricing (USD) for known model families. Conservative. */
const MODEL_INPUT_PRICING: Record<string, number> = {
// Anthropic
"claude-opus-4": 15 / 1_000_000,
"claude-sonnet-4": 3 / 1_000_000,
"claude-haiku": 0.25 / 1_000_000,
// OpenAI
"gpt-4o": 2.5 / 1_000_000,
"gpt-4-turbo": 10 / 1_000_000,
"gpt-4": 30 / 1_000_000,
"gpt-3.5": 0.5 / 1_000_000,
"o1-mini": 3 / 1_000_000,
"o1": 15 / 1_000_000,
"o3-mini": 1.1 / 1_000_000,
"o3": 10 / 1_000_000,
// Google
"gemini-1.5-pro": 3.5 / 1_000_000,
"gemini-2.0-flash": 0.1 / 1_000_000,
};
/** Conservative fallback: assumes expensive model if unknown. */
const FALLBACK_PRICE_PER_TOKEN = 15 / 1_000_000;
/**
* Sorted prefixes (longest first) to guarantee that "gpt-4o" matches
* before "gpt-4" and "gpt-4-turbo" matches before "gpt-4".
*/
const SORTED_PRICING_ENTRIES = Object.entries(MODEL_INPUT_PRICING).sort(
([a], [b]) => b.length - a.length,
);
/**
* Match a model id against the pricing table. Tries longest-prefix-first
* matching so "claude-opus-4-20260901" matches "claude-opus-4".
*/
function resolveInputPricePerToken(modelId: string): number {
const lower = modelId.toLowerCase();
for (const [prefix, price] of SORTED_PRICING_ENTRIES) {
if (lower.startsWith(prefix)) {
return price;
}
}
return FALLBACK_PRICE_PER_TOKEN;
}
/** Approximate token count from character length (chars / 4). */
function estimateTokenCount(text: string): number {
return Math.ceil(text.length / 4);
}
/** Estimate input cost (USD) for a single run. */
export function estimateRunCost(promptBody: string, modelId: string): number {
return estimateTokenCount(promptBody) * resolveInputPricePerToken(modelId);
}
/** Resolve the default model id from config. */
function resolveDefaultModelId(cfg: OpenClawConfig): string {
const primary = cfg.agents?.defaults?.model;
if (typeof primary === "string") {
return primary;
}
if (primary && typeof primary === "object" && "primary" in primary) {
const p = (primary as Record<string, unknown>).primary;
if (typeof p === "string") {
return p;
}
}
return "unknown";
}