Merge 6d58d2f381d74da1ffef86e47fdb1d93b03b1660 into 9fb78453e088cd7b553d7779faa0de5c83708e70

This commit is contained in:
kiranvk2011 2026-03-20 22:18:56 -07:00 committed by GitHub
commit f27d6e20b5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 346 additions and 32 deletions

View File

@ -0,0 +1 @@
- Agents/cooldowns: scope rate-limit cooldowns per model so one 429 no longer blocks every model on the same auth profile, replace the exponential 1 min → 1 h escalation with a stepped 30 s / 1 min / 5 min ladder, and surface a user-facing countdown message when all models are rate-limited. (#49834) Thanks @kiranvk-2011.

View File

@ -230,12 +230,12 @@ describe("markAuthProfileFailure", () => {
const stats = store.usageStats?.["anthropic:default"];
// Error count should reset to 1 (not escalate to 4) because the
// previous cooldown expired. Cooldown should be ~1 min, not ~60 min.
// previous cooldown expired. Cooldown should be ~30s, not ~5 min.
expect(stats?.errorCount).toBe(1);
expect(stats?.failureCounts?.rate_limit).toBe(1);
const cooldownMs = (stats?.cooldownUntil ?? 0) - now;
// calculateAuthProfileCooldownMs(1) = 60_000 (1 minute)
expect(cooldownMs).toBeLessThan(120_000);
// calculateAuthProfileCooldownMs(1) = 30_000 (stepped: 30s → 1m → 5m)
expect(cooldownMs).toBeLessThan(60_000);
expect(cooldownMs).toBeGreaterThan(0);
} finally {
fs.rmSync(agentDir, { recursive: true, force: true });
@ -267,11 +267,11 @@ describe("markAuthProfileFailure", () => {
});
describe("calculateAuthProfileCooldownMs", () => {
it("applies exponential backoff with a 1h cap", () => {
expect(calculateAuthProfileCooldownMs(1)).toBe(60_000);
expect(calculateAuthProfileCooldownMs(2)).toBe(5 * 60_000);
expect(calculateAuthProfileCooldownMs(3)).toBe(25 * 60_000);
expect(calculateAuthProfileCooldownMs(4)).toBe(60 * 60_000);
expect(calculateAuthProfileCooldownMs(5)).toBe(60 * 60_000);
it("applies stepped backoff with a 5-min cap", () => {
expect(calculateAuthProfileCooldownMs(1)).toBe(30_000); // 30 seconds
expect(calculateAuthProfileCooldownMs(2)).toBe(60_000); // 1 minute
expect(calculateAuthProfileCooldownMs(3)).toBe(5 * 60_000); // 5 minutes
expect(calculateAuthProfileCooldownMs(4)).toBe(5 * 60_000); // 5 minutes (cap)
expect(calculateAuthProfileCooldownMs(5)).toBe(5 * 60_000); // 5 minutes (cap)
});
});

View File

@ -51,6 +51,8 @@ export type AuthProfileFailureReason =
export type ProfileUsageStats = {
lastUsed?: number;
cooldownUntil?: number;
cooldownReason?: AuthProfileFailureReason;
cooldownModel?: string;
disabledUntil?: number;
disabledReason?: AuthProfileFailureReason;
errorCount?: number;

View File

@ -132,6 +132,53 @@ describe("isProfileInCooldown", () => {
});
expect(isProfileInCooldown(store, "kilocode:default")).toBe(false);
});
it("returns false for a different model when cooldown is model-scoped (rate_limit)", () => {
const store = makeStore({
"github-copilot:github": {
cooldownUntil: Date.now() + 60_000,
cooldownReason: "rate_limit",
cooldownModel: "claude-sonnet-4.6",
},
});
// Different model bypasses the cooldown
expect(isProfileInCooldown(store, "github-copilot:github", undefined, "gpt-4.1")).toBe(false);
// Same model is still blocked
expect(
isProfileInCooldown(store, "github-copilot:github", undefined, "claude-sonnet-4.6"),
).toBe(true);
// No model specified — blocked (conservative)
expect(isProfileInCooldown(store, "github-copilot:github")).toBe(true);
});
it("returns true for all models when cooldownModel is undefined (profile-wide)", () => {
const store = makeStore({
"github-copilot:github": {
cooldownUntil: Date.now() + 60_000,
cooldownReason: "rate_limit",
cooldownModel: undefined,
},
});
expect(
isProfileInCooldown(store, "github-copilot:github", undefined, "claude-sonnet-4.6"),
).toBe(true);
expect(isProfileInCooldown(store, "github-copilot:github", undefined, "gpt-4.1")).toBe(true);
});
it("does not bypass model-scoped cooldown when disabledUntil is active", () => {
const store = makeStore({
"github-copilot:github": {
cooldownUntil: Date.now() + 60_000,
cooldownReason: "rate_limit",
cooldownModel: "claude-sonnet-4.6",
disabledUntil: Date.now() + 120_000,
disabledReason: "billing",
},
});
// Even though cooldownModel is for a different model, billing disable
// should keep the profile blocked for all models.
expect(isProfileInCooldown(store, "github-copilot:github", undefined, "gpt-4.1")).toBe(true);
});
});
describe("resolveProfilesUnavailableReason", () => {
@ -621,8 +668,8 @@ describe("markAuthProfileFailure — active windows do not extend on retry", ()
errorCount: 3,
lastFailureAt: now - 60_000,
}),
// errorCount resets → calculateAuthProfileCooldownMs(1) = 60_000
expectedUntil: (now: number) => now + 60_000,
// errorCount resets → calculateAuthProfileCooldownMs(1) = 30_000 (stepped: 30s → 1m → 5m)
expectedUntil: (now: number) => now + 30_000,
readUntil: (stats: WindowStats | undefined) => stats?.cooldownUntil,
},
{
@ -675,3 +722,125 @@ describe("markAuthProfileFailure — active windows do not extend on retry", ()
});
}
});
describe("markAuthProfileFailure — per-model cooldown metadata", () => {
function makeStoreWithCopilot(usageStats: AuthProfileStore["usageStats"]): AuthProfileStore {
const store = makeStore(usageStats);
store.profiles["github-copilot:github"] = {
type: "api_key",
provider: "github-copilot",
key: "ghu_test",
};
return store;
}
async function markFailure(params: {
store: ReturnType<typeof makeStoreWithCopilot>;
now: number;
modelId?: string;
}): Promise<void> {
vi.useFakeTimers();
vi.setSystemTime(params.now);
try {
await markAuthProfileFailure({
store: params.store,
profileId: "github-copilot:github",
reason: "rate_limit",
modelId: params.modelId,
});
} finally {
vi.useRealTimers();
}
}
it("records cooldownModel on first rate_limit failure", async () => {
const now = 1_000_000;
const store = makeStoreWithCopilot({});
await markFailure({ store, now, modelId: "claude-sonnet-4.6" });
const stats = store.usageStats?.["github-copilot:github"];
expect(stats?.cooldownReason).toBe("rate_limit");
expect(stats?.cooldownModel).toBe("claude-sonnet-4.6");
});
it("widens cooldownModel to undefined when a different model fails during active cooldown", async () => {
const now = 1_000_000;
const store = makeStoreWithCopilot({
"github-copilot:github": {
cooldownUntil: now + 30_000,
cooldownReason: "rate_limit",
cooldownModel: "claude-sonnet-4.6",
errorCount: 1,
lastFailureAt: now - 1000,
},
});
// Different model fails during active cooldown
await markFailure({ store, now, modelId: "gpt-4.1" });
const stats = store.usageStats?.["github-copilot:github"];
// Scope widened to all models
expect(stats?.cooldownModel).toBeUndefined();
expect(stats?.cooldownReason).toBe("rate_limit");
});
it("preserves cooldownModel when the same model fails again during active cooldown", async () => {
const now = 1_000_000;
const store = makeStoreWithCopilot({
"github-copilot:github": {
cooldownUntil: now + 30_000,
cooldownReason: "rate_limit",
cooldownModel: "claude-sonnet-4.6",
errorCount: 1,
lastFailureAt: now - 1000,
},
});
await markFailure({ store, now, modelId: "claude-sonnet-4.6" });
const stats = store.usageStats?.["github-copilot:github"];
expect(stats?.cooldownModel).toBe("claude-sonnet-4.6");
});
it("updates cooldownReason when auth failure occurs during active rate_limit window", async () => {
const now = 1_000_000;
const store = makeStoreWithCopilot({
"github-copilot:github": {
cooldownUntil: now + 30_000,
cooldownReason: "rate_limit",
cooldownModel: "claude-sonnet-4.6",
errorCount: 1,
lastFailureAt: now - 1000,
},
});
await markAuthProfileFailure({
store,
profileId: "github-copilot:github",
reason: "auth",
modelId: "claude-opus-4.6",
});
const stats = store.usageStats?.["github-copilot:github"];
// Reason should update to the new failure type, not stay as rate_limit
expect(stats?.cooldownReason).toBe("auth");
// Model scope should be cleared — auth failures are profile-wide
expect(stats?.cooldownModel).toBeUndefined();
});
it("clears cooldownModel when non-rate_limit failure hits same model during active window", async () => {
const now = 1_000_000;
const store = makeStoreWithCopilot({
"github-copilot:github": {
cooldownUntil: now + 30_000,
cooldownReason: "rate_limit",
cooldownModel: "claude-sonnet-4.6",
errorCount: 1,
lastFailureAt: now - 1000,
},
});
await markAuthProfileFailure({
store,
profileId: "github-copilot:github",
reason: "auth",
modelId: "claude-sonnet-4.6",
});
const stats = store.usageStats?.["github-copilot:github"];
// Even same-model auth failure should clear model scope (auth is profile-wide)
expect(stats?.cooldownReason).toBe("auth");
expect(stats?.cooldownModel).toBeUndefined();
});
});

View File

@ -44,6 +44,7 @@ export function isProfileInCooldown(
store: AuthProfileStore,
profileId: string,
now?: number,
forModel?: string,
): boolean {
if (isAuthCooldownBypassedForProvider(store.profiles[profileId]?.provider)) {
return false;
@ -52,6 +53,19 @@ export function isProfileInCooldown(
if (!stats) {
return false;
}
// Model-aware bypass: if the cooldown was caused by a rate_limit on a
// specific model and the caller is requesting a *different* model, allow it.
// We still honour any active billing/auth disable (`disabledUntil`) — those
// are profile-wide and must not be short-circuited by model scoping.
if (
forModel &&
stats.cooldownReason === "rate_limit" &&
stats.cooldownModel &&
stats.cooldownModel !== forModel &&
!isActiveUnusableWindow(stats.disabledUntil, now ?? Date.now())
) {
return false;
}
const unusableUntil = resolveProfileUnusableUntil(stats);
const ts = now ?? Date.now();
return unusableUntil ? ts < unusableUntil : false;
@ -212,6 +226,8 @@ export function clearExpiredCooldowns(store: AuthProfileStore, now?: number): bo
if (cooldownExpired) {
stats.cooldownUntil = undefined;
stats.cooldownReason = undefined;
stats.cooldownModel = undefined;
profileMutated = true;
}
if (disabledExpired) {
@ -275,10 +291,13 @@ export async function markAuthProfileUsed(params: {
export function calculateAuthProfileCooldownMs(errorCount: number): number {
const normalized = Math.max(1, errorCount);
return Math.min(
60 * 60 * 1000, // 1 hour max
60 * 1000 * 5 ** Math.min(normalized - 1, 3),
);
if (normalized <= 1) {
return 30_000; // 30 seconds
}
if (normalized <= 2) {
return 60_000; // 1 minute
}
return 5 * 60_000; // 5 minutes max
}
type ResolvedAuthCooldownConfig = {
@ -366,6 +385,8 @@ function resetUsageStats(
...existing,
errorCount: 0,
cooldownUntil: undefined,
cooldownReason: undefined,
cooldownModel: undefined,
disabledUntil: undefined,
disabledReason: undefined,
failureCounts: undefined,
@ -398,6 +419,7 @@ function computeNextProfileUsageStats(params: {
now: number;
reason: AuthProfileFailureReason;
cfgResolved: ResolvedAuthCooldownConfig;
modelId?: string;
}): ProfileUsageStats {
const windowMs = params.cfgResolved.failureWindowMs;
const windowExpired =
@ -451,6 +473,36 @@ function computeNextProfileUsageStats(params: {
now: params.now,
recomputedUntil: params.now + backoffMs,
});
// Update cooldown metadata based on whether the window is still active
// and whether the same or a different model is failing.
const existingCooldownActive =
typeof params.existing.cooldownUntil === "number" &&
params.existing.cooldownUntil > params.now;
if (existingCooldownActive) {
// Always use the latest failure reason so that downstream consumers
// (e.g. isProfileInCooldown model-bypass) see the most recent signal.
// A non-rate_limit failure (auth, billing, …) is profile-wide, so
// upgrading from rate_limit → auth correctly blocks all models.
updatedStats.cooldownReason = params.reason;
// If a different model fails during an active window, widen the scope
// to all models (undefined) so neither model bypasses the cooldown.
if (
params.existing.cooldownModel &&
params.modelId &&
params.existing.cooldownModel !== params.modelId
) {
updatedStats.cooldownModel = undefined;
} else if (params.reason !== "rate_limit") {
// Non-rate-limit failures are profile-wide — clear model scope even
// when the same model fails, so that no model can bypass.
updatedStats.cooldownModel = undefined;
} else {
updatedStats.cooldownModel = params.existing.cooldownModel;
}
} else {
updatedStats.cooldownReason = params.reason;
updatedStats.cooldownModel = params.reason === "rate_limit" ? params.modelId : undefined;
}
}
return updatedStats;
@ -468,8 +520,9 @@ export async function markAuthProfileFailure(params: {
cfg?: OpenClawConfig;
agentDir?: string;
runId?: string;
modelId?: string;
}): Promise<void> {
const { store, profileId, reason, agentDir, cfg, runId } = params;
const { store, profileId, reason, agentDir, cfg, runId, modelId } = params;
const profile = store.profiles[profileId];
if (!profile || isAuthCooldownBypassedForProvider(profile.provider)) {
return;
@ -498,6 +551,7 @@ export async function markAuthProfileFailure(params: {
now,
reason,
cfgResolved,
modelId,
});
nextStats = computed;
updateUsageStatsEntry(freshStore, profileId, () => computed);
@ -536,6 +590,7 @@ export async function markAuthProfileFailure(params: {
now,
reason,
cfgResolved,
modelId,
});
nextStats = computed;
updateUsageStatsEntry(store, profileId, () => computed);
@ -552,8 +607,8 @@ export async function markAuthProfileFailure(params: {
}
/**
* Mark a profile as transiently failed. Applies exponential backoff cooldown.
* Cooldown times: 1min, 5min, 25min, max 1 hour.
* Mark a profile as transiently failed. Applies stepped backoff cooldown.
* Cooldown times: 30s, 1min, 5min (capped).
* Uses store lock to avoid overwriting concurrent usage updates.
*/
export async function markAuthProfileCooldown(params: {

View File

@ -34,6 +34,32 @@ import { isLikelyContextOverflowError } from "./pi-embedded-helpers.js";
const log = createSubsystemLogger("model-fallback");
/**
* Structured error thrown when all model fallback candidates have been
* exhausted. Carries per-attempt details so callers can build informative
* user-facing messages (e.g. "rate-limited, retry in 30 s").
*/
export class FallbackSummaryError extends Error {
readonly attempts: FallbackAttempt[];
readonly soonestCooldownExpiry: number | null;
constructor(
message: string,
attempts: FallbackAttempt[],
soonestCooldownExpiry: number | null,
cause?: Error,
) {
super(message, { cause });
this.name = "FallbackSummaryError";
this.attempts = attempts;
this.soonestCooldownExpiry = soonestCooldownExpiry;
}
}
export function isFallbackSummaryError(err: unknown): err is FallbackSummaryError {
return err instanceof FallbackSummaryError;
}
export type ModelFallbackRunOptions = {
allowTransientCooldownProbe?: boolean;
};
@ -189,17 +215,18 @@ function throwFallbackFailureSummary(params: {
lastError: unknown;
label: string;
formatAttempt: (attempt: FallbackAttempt) => string;
soonestCooldownExpiry?: number | null;
}): never {
if (params.attempts.length <= 1 && params.lastError) {
throw params.lastError;
}
const summary =
params.attempts.length > 0 ? params.attempts.map(params.formatAttempt).join(" | ") : "unknown";
throw new Error(
throw new FallbackSummaryError(
`All ${params.label} failed (${params.attempts.length || params.candidates.length}): ${summary}`,
{
cause: params.lastError instanceof Error ? params.lastError : undefined,
},
params.attempts,
params.soonestCooldownExpiry ?? null,
params.lastError instanceof Error ? params.lastError : undefined,
);
}
@ -548,7 +575,9 @@ export async function runWithModelFallback<T>(params: {
store: authStore,
provider: candidate.provider,
});
const isAnyProfileAvailable = profileIds.some((id) => !isProfileInCooldown(authStore, id));
const isAnyProfileAvailable = profileIds.some(
(id) => !isProfileInCooldown(authStore, id, undefined, candidate.model),
);
if (profileIds.length > 0 && !isAnyProfileAvailable) {
// All profiles for this provider are in cooldown.
@ -771,6 +800,23 @@ export async function runWithModelFallback<T>(params: {
`${attempt.provider}/${attempt.model}: ${attempt.error}${
attempt.reason ? ` (${attempt.reason})` : ""
}`,
soonestCooldownExpiry: (() => {
if (!authStore) {
return null;
}
const allProfileIds = new Set<string>();
for (const c of candidates) {
const ids = resolveAuthProfileOrder({
cfg: params.cfg,
store: authStore,
provider: c.provider,
});
for (const id of ids) {
allProfileIds.add(id);
}
}
return getSoonestCooldownExpiry(authStore, [...allProfileIds]);
})(),
});
}

View File

@ -717,7 +717,7 @@ export async function runEmbeddedPiAgent(
let nextIndex = profileIndex + 1;
while (nextIndex < profileCandidates.length) {
const candidate = profileCandidates[nextIndex];
if (candidate && isProfileInCooldown(authStore, candidate)) {
if (candidate && isProfileInCooldown(authStore, candidate, undefined, modelId)) {
nextIndex += 1;
continue;
}
@ -744,7 +744,9 @@ export async function runEmbeddedPiAgent(
);
const allAutoProfilesInCooldown =
autoProfileCandidates.length > 0 &&
autoProfileCandidates.every((candidate) => isProfileInCooldown(authStore, candidate));
autoProfileCandidates.every((candidate) =>
isProfileInCooldown(authStore, candidate, undefined, modelId),
);
const unavailableReason = allAutoProfilesInCooldown
? (resolveProfilesUnavailableReason({
store: authStore,
@ -763,7 +765,9 @@ export async function runEmbeddedPiAgent(
while (profileIndex < profileCandidates.length) {
const candidate = profileCandidates[profileIndex];
const inCooldown =
candidate && candidate !== lockedProfileId && isProfileInCooldown(authStore, candidate);
candidate &&
candidate !== lockedProfileId &&
isProfileInCooldown(authStore, candidate, undefined, modelId);
if (inCooldown) {
if (allowTransientCooldownProbe && !didTransientCooldownProbe) {
didTransientCooldownProbe = true;
@ -833,6 +837,7 @@ export async function runEmbeddedPiAgent(
reason?: AuthProfileFailureReason | null;
config?: RunEmbeddedPiAgentParams["config"];
agentDir?: RunEmbeddedPiAgentParams["agentDir"];
modelId?: string;
}) => {
const { profileId, reason } = failure;
if (!profileId || !reason || reason === "timeout") {
@ -845,6 +850,7 @@ export async function runEmbeddedPiAgent(
cfg: params.config,
agentDir,
runId: params.runId,
modelId: failure.modelId,
});
};
const resolveAuthProfileFailureReason = (
@ -1394,6 +1400,7 @@ export async function runEmbeddedPiAgent(
await maybeMarkAuthProfileFailure({
profileId: lastProfileId,
reason: promptProfileFailureReason,
modelId,
});
const promptFailoverFailure =
promptFailoverReason !== null || isFailoverErrorMessage(errorText);
@ -1535,6 +1542,7 @@ export async function runEmbeddedPiAgent(
await maybeMarkAuthProfileFailure({
profileId: lastProfileId,
reason,
modelId,
});
if (timedOut && !isProbeSession) {
log.warn(`Profile ${lastProfileId} timed out. Trying next account...`);

View File

@ -4,7 +4,7 @@ import { resolveSendableOutboundReplyParts } from "openclaw/plugin-sdk/reply-pay
import { resolveBootstrapWarningSignaturesSeen } from "../../agents/bootstrap-budget.js";
import { runCliAgent } from "../../agents/cli-runner.js";
import { getCliSessionId } from "../../agents/cli-session.js";
import { runWithModelFallback } from "../../agents/model-fallback.js";
import { runWithModelFallback, isFallbackSummaryError } from "../../agents/model-fallback.js";
import { isCliProvider } from "../../agents/model-selection.js";
import {
BILLING_ERROR_USER_MESSAGE,
@ -12,6 +12,7 @@ import {
isContextOverflowError,
isBillingErrorMessage,
isLikelyContextOverflowError,
isRateLimitErrorMessage,
isTransientHttpError,
sanitizeUserFacingText,
} from "../../agents/pi-embedded-helpers.js";
@ -74,6 +75,26 @@ export type AgentRunLoopResult =
}
| { kind: "final"; payload: ReplyPayload };
/**
* Build a human-friendly rate-limit message from a FallbackSummaryError.
* Includes a countdown when the soonest cooldown expiry is known.
*/
function buildCopilotCooldownMessage(err: unknown): string {
if (!isFallbackSummaryError(err)) {
return "⚠️ All models are temporarily rate-limited. Please try again in a few minutes.";
}
const expiry = err.soonestCooldownExpiry;
if (typeof expiry === "number" && expiry > Date.now()) {
const secsLeft = Math.ceil((expiry - Date.now()) / 1000);
if (secsLeft <= 60) {
return `⚠️ Rate-limited — ready in ~${secsLeft}s. Please wait a moment.`;
}
const minsLeft = Math.ceil(secsLeft / 60);
return `⚠️ Rate-limited — ready in ~${minsLeft} min. Please try again shortly.`;
}
return "⚠️ All models are temporarily rate-limited. Please try again in a few minutes.";
}
export async function runAgentTurnWithFallback(params: {
commandBody: string;
followupRun: FollowupRun;
@ -623,17 +644,29 @@ export async function runAgentTurnWithFallback(params: {
}
defaultRuntime.error(`Embedded agent failed before reply: ${message}`);
// Only classify as rate-limit when we have concrete evidence: either
// the error message itself is a rate-limit string, or the fallback
// chain exhaustion includes at least one rate_limit / overloaded attempt.
// Using `.some()` intentionally: when any attempt is rate-limited, the
// countdown message is more actionable than the generic failure text,
// even if other attempts failed for different reasons (auth, etc.).
const isRateLimit =
isRateLimitErrorMessage(message) ||
(isFallbackSummaryError(err) &&
err.attempts.some((a) => a.reason === "rate_limit" || a.reason === "overloaded"));
const safeMessage = isTransientHttp
? sanitizeUserFacingText(message, { errorContext: true })
: message;
const trimmedMessage = safeMessage.replace(/\.\s*$/, "");
const fallbackText = isBilling
? BILLING_ERROR_USER_MESSAGE
: isContextOverflow
? "⚠️ Context overflow — prompt too large for this model. Try a shorter message or a larger-context model."
: isRoleOrderingError
? "⚠️ Message ordering conflict - please try again. If this persists, use /new to start a fresh session."
: `⚠️ Agent failed before reply: ${trimmedMessage}.\nLogs: openclaw logs --follow`;
: isRateLimit
? buildCopilotCooldownMessage(err)
: isContextOverflow
? "⚠️ Context overflow — prompt too large for this model. Try a shorter message or a larger-context model."
: isRoleOrderingError
? "⚠️ Message ordering conflict - please try again. If this persists, use /new to start a fresh session."
: `⚠️ Agent failed before reply: ${trimmedMessage}.\nLogs: openclaw logs --follow`;
return {
kind: "final",