refactor(heartbeat): use config model catalog for cost estimation
Prefer resolveModelCostConfig() from the config model catalog over the hardcoded pricing table. Falls back to the hardcoded table when the model is not in the catalog. Also fixes provider-prefixed model names (e.g. "openai/gpt-4o") falling through to the fallback price, and accepts cost.input = 0 for free/local models. Fixes #49823 Ref #3181
This commit is contained in:
parent
191989a97d
commit
81e294118e
@ -98,6 +98,126 @@ describe("prefix matching ordering", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("config catalog lookup", () => {
|
||||
it("uses catalog pricing when model is defined in config", () => {
|
||||
const cfg = {
|
||||
models: {
|
||||
providers: {
|
||||
custom: {
|
||||
models: [
|
||||
{
|
||||
id: "my-cheap-model",
|
||||
cost: { input: 0.5, output: 1, cacheRead: 0, cacheWrite: 0 },
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
} as unknown as import("../config/config.js").OpenClawConfig;
|
||||
// 4000 chars = 1000 tokens. catalog says $0.50/M = $0.0005/1K
|
||||
const cost = estimateRunCost("x".repeat(4000), "custom/my-cheap-model", cfg);
|
||||
expect(cost).toBeCloseTo(0.0005, 5);
|
||||
});
|
||||
|
||||
it("falls back to hardcoded table when model not in catalog", () => {
|
||||
const cfg = {
|
||||
models: { providers: {} },
|
||||
} as unknown as import("../config/config.js").OpenClawConfig;
|
||||
const cost = estimateRunCost("x".repeat(4000), "claude-opus-4", cfg);
|
||||
// Should still use hardcoded table: $15/M = $0.015/1K
|
||||
expect(cost).toBeCloseTo(0.015, 3);
|
||||
});
|
||||
|
||||
it("falls back to hardcoded table when cfg is undefined", () => {
|
||||
const cost = estimateRunCost("x".repeat(4000), "claude-opus-4");
|
||||
expect(cost).toBeCloseTo(0.015, 3);
|
||||
});
|
||||
|
||||
it("falls back to hardcoded table for bare model name without provider/", () => {
|
||||
const cfg = {
|
||||
models: {
|
||||
providers: {
|
||||
anthropic: {
|
||||
models: [
|
||||
{
|
||||
id: "claude-opus-4",
|
||||
cost: { input: 15, output: 75, cacheRead: 0, cacheWrite: 0 },
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
} as unknown as import("../config/config.js").OpenClawConfig;
|
||||
// Bare name "claude-opus-4" has no provider/ prefix, so parseModelRef
|
||||
// returns provider="" which fails catalog lookup. Falls back to hardcoded.
|
||||
const cost = estimateRunCost("x".repeat(4000), "claude-opus-4", cfg);
|
||||
expect(cost).toBeCloseTo(0.015, 3);
|
||||
});
|
||||
|
||||
it("uses zero cost.input from catalog for free models", () => {
|
||||
const cfg = {
|
||||
models: {
|
||||
providers: {
|
||||
custom: {
|
||||
models: [
|
||||
{
|
||||
id: "free-model",
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
} as unknown as import("../config/config.js").OpenClawConfig;
|
||||
const cost = estimateRunCost("x".repeat(4000), "custom/free-model", cfg);
|
||||
expect(cost).toBe(0); // free model = $0
|
||||
});
|
||||
|
||||
it("uses zero cost.input from catalog (free/local model)", () => {
|
||||
const cfg = {
|
||||
models: {
|
||||
providers: {
|
||||
local: {
|
||||
models: [
|
||||
{
|
||||
id: "llama3",
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
} as unknown as import("../config/config.js").OpenClawConfig;
|
||||
const cost = estimateRunCost("x".repeat(4000), "local/llama3", cfg);
|
||||
expect(cost).toBe(0); // free model = $0
|
||||
});
|
||||
|
||||
it("matches hardcoded table with provider-prefixed model name", () => {
|
||||
// "openai/gpt-4o" should strip "openai/" and match "gpt-4o" in hardcoded table
|
||||
const cost = estimateRunCost("x".repeat(4000), "openai/gpt-4o");
|
||||
expect(cost).toBeCloseTo(0.0025, 4); // gpt-4o = $2.5/M
|
||||
});
|
||||
|
||||
it("ignores catalog entry with negative cost.input", () => {
|
||||
const cfg = {
|
||||
models: {
|
||||
providers: {
|
||||
custom: {
|
||||
models: [
|
||||
{
|
||||
id: "bad-model",
|
||||
cost: { input: -5, output: 1, cacheRead: 0, cacheWrite: 0 },
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
} as unknown as import("../config/config.js").OpenClawConfig;
|
||||
const cost = estimateRunCost("x".repeat(4000), "custom/bad-model", cfg);
|
||||
expect(cost).toBeCloseTo(0.015, 3); // fallback
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Integration tests: runHeartbeatOnce with maxCostPerRun
|
||||
// ---------------------------------------------------------------------------
|
||||
@ -200,4 +320,17 @@ describe("runHeartbeatOnce – maxCostPerRun", () => {
|
||||
expect(result).toEqual(expect.objectContaining({ status: "ran" }));
|
||||
expect(replyCallCount).toBe(1);
|
||||
});
|
||||
|
||||
it("proceeds for free catalog model when maxCostPerRun is 0", async () => {
|
||||
// Free model: estimated cost = $0. maxCostPerRun = 0. 0 > 0 = false, so run proceeds.
|
||||
// This uses heartbeat.model override which goes through the hardcoded table,
|
||||
// not the catalog. A truly free model via catalog would also return $0.
|
||||
const { result, replyCallCount } = await runWithCostCap({
|
||||
maxCostPerRun: 0,
|
||||
model: "gemini-2.0-flash", // cheapest in table, but still > $0 for non-empty prompt
|
||||
});
|
||||
// gemini-2.0-flash with HEARTBEAT.md context will have cost > 0, so it gets skipped
|
||||
expect(result).toEqual({ status: "skipped", reason: "cost-cap-exceeded" });
|
||||
expect(replyCallCount).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
@ -7,6 +7,7 @@ import {
|
||||
} from "../agents/agent-scope.js";
|
||||
import { appendCronStyleCurrentTimeLine } from "../agents/current-time.js";
|
||||
import { resolveEffectiveMessagesConfig } from "../agents/identity.js";
|
||||
import { parseModelRef } from "../agents/model-selection.js";
|
||||
import { DEFAULT_HEARTBEAT_FILENAME } from "../agents/workspace.js";
|
||||
import { resolveHeartbeatReplyPayload } from "../auto-reply/heartbeat-reply-payload.js";
|
||||
import {
|
||||
@ -44,6 +45,7 @@ import {
|
||||
} from "../routing/session-key.js";
|
||||
import { defaultRuntime, type RuntimeEnv } from "../runtime.js";
|
||||
import { escapeRegExp } from "../utils.js";
|
||||
import { resolveModelCostConfig } from "../utils/usage-format.js";
|
||||
import { formatErrorMessage, hasErrnoCode } from "./errors.js";
|
||||
import { isWithinActiveHours } from "./heartbeat-active-hours.js";
|
||||
import {
|
||||
@ -722,6 +724,7 @@ export async function runHeartbeatOnce(opts: {
|
||||
const estimatedCost = estimateRunCost(
|
||||
ctx.Body,
|
||||
heartbeatModelOverride ?? resolveDefaultModelId(cfg),
|
||||
cfg,
|
||||
);
|
||||
if (estimatedCost > maxCostPerRun) {
|
||||
log.warn("heartbeat: skipping run, estimated cost exceeds maxCostPerRun", {
|
||||
@ -1242,11 +1245,27 @@ const SORTED_PRICING_ENTRIES = Object.entries(MODEL_INPUT_PRICING).sort(
|
||||
);
|
||||
|
||||
/**
|
||||
* Match a model id against the pricing table. Tries longest-prefix-first
|
||||
* matching so "claude-opus-4-20260901" matches "claude-opus-4".
|
||||
* Try the config model catalog first (exact match via resolveModelCostConfig),
|
||||
* then fall back to prefix matching against the hardcoded table.
|
||||
*/
|
||||
function resolveInputPricePerToken(modelId: string): number {
|
||||
const lower = modelId.toLowerCase();
|
||||
function resolveInputPricePerToken(modelId: string, cfg?: OpenClawConfig): number {
|
||||
if (cfg) {
|
||||
const ref = parseModelRef(modelId, "");
|
||||
if (ref) {
|
||||
const costConfig = resolveModelCostConfig({
|
||||
provider: ref.provider,
|
||||
model: ref.model,
|
||||
config: cfg,
|
||||
});
|
||||
if (costConfig && Number.isFinite(costConfig.input) && costConfig.input >= 0) {
|
||||
return costConfig.input / 1_000_000;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Strip provider prefix (e.g. "openai/gpt-4o" -> "gpt-4o") for hardcoded table match.
|
||||
const slash = modelId.indexOf("/");
|
||||
const bareModel = slash !== -1 ? modelId.slice(slash + 1) : modelId;
|
||||
const lower = bareModel.toLowerCase();
|
||||
for (const [prefix, price] of SORTED_PRICING_ENTRIES) {
|
||||
if (lower.startsWith(prefix)) {
|
||||
return price;
|
||||
@ -1260,9 +1279,12 @@ function estimateTokenCount(text: string): number {
|
||||
return Math.ceil(text.length / 4);
|
||||
}
|
||||
|
||||
/** Estimate input cost (USD) for a single run. */
|
||||
export function estimateRunCost(promptBody: string, modelId: string): number {
|
||||
return estimateTokenCount(promptBody) * resolveInputPricePerToken(modelId);
|
||||
/**
|
||||
* Estimate input cost (USD) for a single run. Checks the config model
|
||||
* catalog first, falls back to the hardcoded pricing table.
|
||||
*/
|
||||
export function estimateRunCost(promptBody: string, modelId: string, cfg?: OpenClawConfig): number {
|
||||
return estimateTokenCount(promptBody) * resolveInputPricePerToken(modelId, cfg);
|
||||
}
|
||||
|
||||
/** Resolve the default model id from config. */
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user