fix: per-model cooldown scope + stepped backoff + user-facing rate-limit message
Combines ideas from PRs #45113, #31962, and #45763 to address three cooldown-related issues: 1. Stepped cooldown (30s → 1m → 5m cap) replaces the aggressive exponential formula (1m → 5m → 25m → 1h) that locked out providers for far longer than the actual API rate-limit window. 2. Per-model cooldown scoping: rate_limit cooldowns now record which model triggered them. When a different model on the same auth profile is requested, the cooldown is bypassed — so one model hitting a 429 no longer blocks all other models on the same provider. 3. FallbackSummaryError with soonest-expiry countdown: when all candidates are exhausted, the user sees a clear message like '⚠️ Rate-limited — ready in ~28s' instead of a generic failure. Files changed: - types.ts: add cooldownReason/cooldownModel to ProfileUsageStats - usage.ts: stepped formula, model-aware isProfileInCooldown, modelId threading through computeNextProfileUsageStats/markAuthProfileFailure - model-fallback.ts: FallbackSummaryError class, model-aware availability check, soonestCooldownExpiry computation - pi-embedded-runner/run.ts: thread modelId into failure recording - agent-runner-execution.ts: buildCopilotCooldownMessage helper, rate-limit detection branch in error handler - usage.test.ts: update expected cooldown value (60s → 30s)
This commit is contained in:
parent
c4a4050ce4
commit
b5913862ac
@ -51,6 +51,8 @@ export type AuthProfileFailureReason =
|
||||
export type ProfileUsageStats = {
|
||||
lastUsed?: number;
|
||||
cooldownUntil?: number;
|
||||
cooldownReason?: AuthProfileFailureReason;
|
||||
cooldownModel?: string;
|
||||
disabledUntil?: number;
|
||||
disabledReason?: AuthProfileFailureReason;
|
||||
errorCount?: number;
|
||||
|
||||
@ -621,8 +621,8 @@ describe("markAuthProfileFailure — active windows do not extend on retry", ()
|
||||
errorCount: 3,
|
||||
lastFailureAt: now - 60_000,
|
||||
}),
|
||||
// errorCount resets → calculateAuthProfileCooldownMs(1) = 60_000
|
||||
expectedUntil: (now: number) => now + 60_000,
|
||||
// errorCount resets → calculateAuthProfileCooldownMs(1) = 30_000 (stepped: 30s → 1m → 5m)
|
||||
expectedUntil: (now: number) => now + 30_000,
|
||||
readUntil: (stats: WindowStats | undefined) => stats?.cooldownUntil,
|
||||
},
|
||||
{
|
||||
|
||||
@ -44,6 +44,7 @@ export function isProfileInCooldown(
|
||||
store: AuthProfileStore,
|
||||
profileId: string,
|
||||
now?: number,
|
||||
forModel?: string,
|
||||
): boolean {
|
||||
if (isAuthCooldownBypassedForProvider(store.profiles[profileId]?.provider)) {
|
||||
return false;
|
||||
@ -52,6 +53,16 @@ export function isProfileInCooldown(
|
||||
if (!stats) {
|
||||
return false;
|
||||
}
|
||||
// Model-aware bypass: if the cooldown was caused by a rate_limit on a
|
||||
// specific model and the caller is requesting a *different* model, allow it.
|
||||
if (
|
||||
forModel &&
|
||||
stats.cooldownReason === "rate_limit" &&
|
||||
stats.cooldownModel &&
|
||||
stats.cooldownModel !== forModel
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
const unusableUntil = resolveProfileUnusableUntil(stats);
|
||||
const ts = now ?? Date.now();
|
||||
return unusableUntil ? ts < unusableUntil : false;
|
||||
@ -212,6 +223,8 @@ export function clearExpiredCooldowns(store: AuthProfileStore, now?: number): bo
|
||||
|
||||
if (cooldownExpired) {
|
||||
stats.cooldownUntil = undefined;
|
||||
stats.cooldownReason = undefined;
|
||||
stats.cooldownModel = undefined;
|
||||
profileMutated = true;
|
||||
}
|
||||
if (disabledExpired) {
|
||||
@ -275,10 +288,9 @@ export async function markAuthProfileUsed(params: {
|
||||
|
||||
export function calculateAuthProfileCooldownMs(errorCount: number): number {
|
||||
const normalized = Math.max(1, errorCount);
|
||||
return Math.min(
|
||||
60 * 60 * 1000, // 1 hour max
|
||||
60 * 1000 * 5 ** Math.min(normalized - 1, 3),
|
||||
);
|
||||
if (normalized <= 1) return 30_000; // 30 seconds
|
||||
if (normalized <= 2) return 60_000; // 1 minute
|
||||
return 5 * 60_000; // 5 minutes max
|
||||
}
|
||||
|
||||
type ResolvedAuthCooldownConfig = {
|
||||
@ -366,6 +378,8 @@ function resetUsageStats(
|
||||
...existing,
|
||||
errorCount: 0,
|
||||
cooldownUntil: undefined,
|
||||
cooldownReason: undefined,
|
||||
cooldownModel: undefined,
|
||||
disabledUntil: undefined,
|
||||
disabledReason: undefined,
|
||||
failureCounts: undefined,
|
||||
@ -398,6 +412,7 @@ function computeNextProfileUsageStats(params: {
|
||||
now: number;
|
||||
reason: AuthProfileFailureReason;
|
||||
cfgResolved: ResolvedAuthCooldownConfig;
|
||||
modelId?: string;
|
||||
}): ProfileUsageStats {
|
||||
const windowMs = params.cfgResolved.failureWindowMs;
|
||||
const windowExpired =
|
||||
@ -451,6 +466,19 @@ function computeNextProfileUsageStats(params: {
|
||||
now: params.now,
|
||||
recomputedUntil: params.now + backoffMs,
|
||||
});
|
||||
// Preserve existing cooldown metadata if the cooldown window is still
|
||||
// active; otherwise record the new reason/model.
|
||||
const existingCooldownActive =
|
||||
typeof params.existing.cooldownUntil === "number" &&
|
||||
params.existing.cooldownUntil > params.now;
|
||||
if (existingCooldownActive) {
|
||||
updatedStats.cooldownReason = params.existing.cooldownReason;
|
||||
updatedStats.cooldownModel = params.existing.cooldownModel;
|
||||
} else {
|
||||
updatedStats.cooldownReason = params.reason;
|
||||
updatedStats.cooldownModel =
|
||||
params.reason === "rate_limit" ? params.modelId : undefined;
|
||||
}
|
||||
}
|
||||
|
||||
return updatedStats;
|
||||
@ -468,8 +496,9 @@ export async function markAuthProfileFailure(params: {
|
||||
cfg?: OpenClawConfig;
|
||||
agentDir?: string;
|
||||
runId?: string;
|
||||
modelId?: string;
|
||||
}): Promise<void> {
|
||||
const { store, profileId, reason, agentDir, cfg, runId } = params;
|
||||
const { store, profileId, reason, agentDir, cfg, runId, modelId } = params;
|
||||
const profile = store.profiles[profileId];
|
||||
if (!profile || isAuthCooldownBypassedForProvider(profile.provider)) {
|
||||
return;
|
||||
@ -498,6 +527,7 @@ export async function markAuthProfileFailure(params: {
|
||||
now,
|
||||
reason,
|
||||
cfgResolved,
|
||||
modelId,
|
||||
});
|
||||
nextStats = computed;
|
||||
updateUsageStatsEntry(freshStore, profileId, () => computed);
|
||||
@ -536,6 +566,7 @@ export async function markAuthProfileFailure(params: {
|
||||
now,
|
||||
reason,
|
||||
cfgResolved,
|
||||
modelId,
|
||||
});
|
||||
nextStats = computed;
|
||||
updateUsageStatsEntry(store, profileId, () => computed);
|
||||
|
||||
@ -34,6 +34,32 @@ import { isLikelyContextOverflowError } from "./pi-embedded-helpers.js";
|
||||
|
||||
const log = createSubsystemLogger("model-fallback");
|
||||
|
||||
/**
|
||||
* Structured error thrown when all model fallback candidates have been
|
||||
* exhausted. Carries per-attempt details so callers can build informative
|
||||
* user-facing messages (e.g. "rate-limited, retry in 30 s").
|
||||
*/
|
||||
export class FallbackSummaryError extends Error {
|
||||
readonly attempts: FallbackAttempt[];
|
||||
readonly soonestCooldownExpiry: number | null;
|
||||
|
||||
constructor(
|
||||
message: string,
|
||||
attempts: FallbackAttempt[],
|
||||
soonestCooldownExpiry: number | null,
|
||||
cause?: Error,
|
||||
) {
|
||||
super(message, { cause });
|
||||
this.name = "FallbackSummaryError";
|
||||
this.attempts = attempts;
|
||||
this.soonestCooldownExpiry = soonestCooldownExpiry;
|
||||
}
|
||||
}
|
||||
|
||||
export function isFallbackSummaryError(err: unknown): err is FallbackSummaryError {
|
||||
return err instanceof FallbackSummaryError;
|
||||
}
|
||||
|
||||
export type ModelFallbackRunOptions = {
|
||||
allowTransientCooldownProbe?: boolean;
|
||||
};
|
||||
@ -189,17 +215,18 @@ function throwFallbackFailureSummary(params: {
|
||||
lastError: unknown;
|
||||
label: string;
|
||||
formatAttempt: (attempt: FallbackAttempt) => string;
|
||||
soonestCooldownExpiry?: number | null;
|
||||
}): never {
|
||||
if (params.attempts.length <= 1 && params.lastError) {
|
||||
throw params.lastError;
|
||||
}
|
||||
const summary =
|
||||
params.attempts.length > 0 ? params.attempts.map(params.formatAttempt).join(" | ") : "unknown";
|
||||
throw new Error(
|
||||
throw new FallbackSummaryError(
|
||||
`All ${params.label} failed (${params.attempts.length || params.candidates.length}): ${summary}`,
|
||||
{
|
||||
cause: params.lastError instanceof Error ? params.lastError : undefined,
|
||||
},
|
||||
params.attempts,
|
||||
params.soonestCooldownExpiry ?? null,
|
||||
params.lastError instanceof Error ? params.lastError : undefined,
|
||||
);
|
||||
}
|
||||
|
||||
@ -548,7 +575,7 @@ export async function runWithModelFallback<T>(params: {
|
||||
store: authStore,
|
||||
provider: candidate.provider,
|
||||
});
|
||||
const isAnyProfileAvailable = profileIds.some((id) => !isProfileInCooldown(authStore, id));
|
||||
const isAnyProfileAvailable = profileIds.some((id) => !isProfileInCooldown(authStore, id, undefined, candidate.model));
|
||||
|
||||
if (profileIds.length > 0 && !isAnyProfileAvailable) {
|
||||
// All profiles for this provider are in cooldown.
|
||||
@ -771,6 +798,19 @@ export async function runWithModelFallback<T>(params: {
|
||||
`${attempt.provider}/${attempt.model}: ${attempt.error}${
|
||||
attempt.reason ? ` (${attempt.reason})` : ""
|
||||
}`,
|
||||
soonestCooldownExpiry: (() => {
|
||||
if (!authStore) return null;
|
||||
const allProfileIds = new Set<string>();
|
||||
for (const c of candidates) {
|
||||
const ids = resolveAuthProfileOrder({
|
||||
cfg: params.cfg,
|
||||
store: authStore,
|
||||
provider: c.provider,
|
||||
});
|
||||
for (const id of ids) allProfileIds.add(id);
|
||||
}
|
||||
return getSoonestCooldownExpiry(authStore, [...allProfileIds]);
|
||||
})(),
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@ -832,6 +832,7 @@ export async function runEmbeddedPiAgent(
|
||||
reason?: AuthProfileFailureReason | null;
|
||||
config?: RunEmbeddedPiAgentParams["config"];
|
||||
agentDir?: RunEmbeddedPiAgentParams["agentDir"];
|
||||
modelId?: string;
|
||||
}) => {
|
||||
const { profileId, reason } = failure;
|
||||
if (!profileId || !reason || reason === "timeout") {
|
||||
@ -844,6 +845,7 @@ export async function runEmbeddedPiAgent(
|
||||
cfg: params.config,
|
||||
agentDir,
|
||||
runId: params.runId,
|
||||
modelId: failure.modelId,
|
||||
});
|
||||
};
|
||||
const resolveAuthProfileFailureReason = (
|
||||
@ -1382,6 +1384,7 @@ export async function runEmbeddedPiAgent(
|
||||
await maybeMarkAuthProfileFailure({
|
||||
profileId: lastProfileId,
|
||||
reason: promptProfileFailureReason,
|
||||
modelId,
|
||||
});
|
||||
const promptFailoverFailure =
|
||||
promptFailoverReason !== null || isFailoverErrorMessage(errorText);
|
||||
@ -1523,6 +1526,7 @@ export async function runEmbeddedPiAgent(
|
||||
await maybeMarkAuthProfileFailure({
|
||||
profileId: lastProfileId,
|
||||
reason,
|
||||
modelId,
|
||||
});
|
||||
if (timedOut && !isProbeSession) {
|
||||
log.warn(`Profile ${lastProfileId} timed out. Trying next account...`);
|
||||
|
||||
@ -5,6 +5,7 @@ import { resolveBootstrapWarningSignaturesSeen } from "../../agents/bootstrap-bu
|
||||
import { runCliAgent } from "../../agents/cli-runner.js";
|
||||
import { getCliSessionId } from "../../agents/cli-session.js";
|
||||
import { runWithModelFallback } from "../../agents/model-fallback.js";
|
||||
import { isFallbackSummaryError } from "../../agents/model-fallback.js";
|
||||
import { isCliProvider } from "../../agents/model-selection.js";
|
||||
import {
|
||||
BILLING_ERROR_USER_MESSAGE,
|
||||
@ -12,6 +13,7 @@ import {
|
||||
isContextOverflowError,
|
||||
isBillingErrorMessage,
|
||||
isLikelyContextOverflowError,
|
||||
isRateLimitErrorMessage,
|
||||
isTransientHttpError,
|
||||
sanitizeUserFacingText,
|
||||
} from "../../agents/pi-embedded-helpers.js";
|
||||
@ -74,6 +76,26 @@ export type AgentRunLoopResult =
|
||||
}
|
||||
| { kind: "final"; payload: ReplyPayload };
|
||||
|
||||
/**
|
||||
* Build a human-friendly rate-limit message from a FallbackSummaryError.
|
||||
* Includes a countdown when the soonest cooldown expiry is known.
|
||||
*/
|
||||
function buildCopilotCooldownMessage(err: unknown): string {
|
||||
if (!isFallbackSummaryError(err)) {
|
||||
return "⚠️ All models are temporarily rate-limited. Please try again in a few minutes.";
|
||||
}
|
||||
const expiry = err.soonestCooldownExpiry;
|
||||
if (typeof expiry === "number" && expiry > Date.now()) {
|
||||
const secsLeft = Math.ceil((expiry - Date.now()) / 1000);
|
||||
if (secsLeft <= 60) {
|
||||
return `⚠️ Rate-limited — ready in ~${secsLeft}s. Please wait a moment.`;
|
||||
}
|
||||
const minsLeft = Math.ceil(secsLeft / 60);
|
||||
return `⚠️ Rate-limited — ready in ~${minsLeft} min. Please try again shortly.`;
|
||||
}
|
||||
return "⚠️ All models are temporarily rate-limited. Please try again in a few minutes.";
|
||||
}
|
||||
|
||||
export async function runAgentTurnWithFallback(params: {
|
||||
commandBody: string;
|
||||
followupRun: FollowupRun;
|
||||
@ -623,17 +645,21 @@ export async function runAgentTurnWithFallback(params: {
|
||||
}
|
||||
|
||||
defaultRuntime.error(`Embedded agent failed before reply: ${message}`);
|
||||
const isRateLimit =
|
||||
isRateLimitErrorMessage(message) || isFallbackSummaryError(err);
|
||||
const safeMessage = isTransientHttp
|
||||
? sanitizeUserFacingText(message, { errorContext: true })
|
||||
: message;
|
||||
const trimmedMessage = safeMessage.replace(/\.\s*$/, "");
|
||||
const fallbackText = isBilling
|
||||
? BILLING_ERROR_USER_MESSAGE
|
||||
: isContextOverflow
|
||||
? "⚠️ Context overflow — prompt too large for this model. Try a shorter message or a larger-context model."
|
||||
: isRoleOrderingError
|
||||
? "⚠️ Message ordering conflict - please try again. If this persists, use /new to start a fresh session."
|
||||
: `⚠️ Agent failed before reply: ${trimmedMessage}.\nLogs: openclaw logs --follow`;
|
||||
: isRateLimit
|
||||
? buildCopilotCooldownMessage(err)
|
||||
: isContextOverflow
|
||||
? "⚠️ Context overflow — prompt too large for this model. Try a shorter message or a larger-context model."
|
||||
: isRoleOrderingError
|
||||
? "⚠️ Message ordering conflict - please try again. If this persists, use /new to start a fresh session."
|
||||
: `⚠️ Agent failed before reply: ${trimmedMessage}.\nLogs: openclaw logs --follow`;
|
||||
|
||||
return {
|
||||
kind: "final",
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user