From b5913862ac1cb79151d1ea4a0dc7a2d6f3b592fe Mon Sep 17 00:00:00 2001 From: kiranvk2011 Date: Wed, 18 Mar 2026 13:50:52 +0000 Subject: [PATCH 1/8] fix: per-model cooldown scope + stepped backoff + user-facing rate-limit message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Combines ideas from PRs #45113, #31962, and #45763 to address three cooldown-related issues: 1. Stepped cooldown (30s → 1m → 5m cap) replaces the aggressive exponential formula (1m → 5m → 25m → 1h) that locked out providers for far longer than the actual API rate-limit window. 2. Per-model cooldown scoping: rate_limit cooldowns now record which model triggered them. When a different model on the same auth profile is requested, the cooldown is bypassed — so one model hitting a 429 no longer blocks all other models on the same provider. 3. FallbackSummaryError with soonest-expiry countdown: when all candidates are exhausted, the user sees a clear message like '⚠️ Rate-limited — ready in ~28s' instead of a generic failure. Files changed: - types.ts: add cooldownReason/cooldownModel to ProfileUsageStats - usage.ts: stepped formula, model-aware isProfileInCooldown, modelId threading through computeNextProfileUsageStats/markAuthProfileFailure - model-fallback.ts: FallbackSummaryError class, model-aware availability check, soonestCooldownExpiry computation - pi-embedded-runner/run.ts: thread modelId into failure recording - agent-runner-execution.ts: buildCopilotCooldownMessage helper, rate-limit detection branch in error handler - usage.test.ts: update expected cooldown value (60s → 30s) --- src/agents/auth-profiles/types.ts | 2 + src/agents/auth-profiles/usage.test.ts | 4 +- src/agents/auth-profiles/usage.ts | 41 +++++++++++++-- src/agents/model-fallback.ts | 50 +++++++++++++++++-- src/agents/pi-embedded-runner/run.ts | 4 ++ .../reply/agent-runner-execution.ts | 36 +++++++++++-- 6 files changed, 120 insertions(+), 17 deletions(-) diff --git a/src/agents/auth-profiles/types.ts b/src/agents/auth-profiles/types.ts index 127a444939b..848268385a2 100644 --- a/src/agents/auth-profiles/types.ts +++ b/src/agents/auth-profiles/types.ts @@ -51,6 +51,8 @@ export type AuthProfileFailureReason = export type ProfileUsageStats = { lastUsed?: number; cooldownUntil?: number; + cooldownReason?: AuthProfileFailureReason; + cooldownModel?: string; disabledUntil?: number; disabledReason?: AuthProfileFailureReason; errorCount?: number; diff --git a/src/agents/auth-profiles/usage.test.ts b/src/agents/auth-profiles/usage.test.ts index 6dd5697cc99..ec9cd2e143e 100644 --- a/src/agents/auth-profiles/usage.test.ts +++ b/src/agents/auth-profiles/usage.test.ts @@ -621,8 +621,8 @@ describe("markAuthProfileFailure — active windows do not extend on retry", () errorCount: 3, lastFailureAt: now - 60_000, }), - // errorCount resets → calculateAuthProfileCooldownMs(1) = 60_000 - expectedUntil: (now: number) => now + 60_000, + // errorCount resets → calculateAuthProfileCooldownMs(1) = 30_000 (stepped: 30s → 1m → 5m) + expectedUntil: (now: number) => now + 30_000, readUntil: (stats: WindowStats | undefined) => stats?.cooldownUntil, }, { diff --git a/src/agents/auth-profiles/usage.ts b/src/agents/auth-profiles/usage.ts index 20e1cbaa497..c587f3c6d0e 100644 --- a/src/agents/auth-profiles/usage.ts +++ b/src/agents/auth-profiles/usage.ts @@ -44,6 +44,7 @@ export function isProfileInCooldown( store: AuthProfileStore, profileId: string, now?: number, + forModel?: string, ): boolean { if (isAuthCooldownBypassedForProvider(store.profiles[profileId]?.provider)) { return false; @@ -52,6 +53,16 @@ export function isProfileInCooldown( if (!stats) { return false; } + // Model-aware bypass: if the cooldown was caused by a rate_limit on a + // specific model and the caller is requesting a *different* model, allow it. + if ( + forModel && + stats.cooldownReason === "rate_limit" && + stats.cooldownModel && + stats.cooldownModel !== forModel + ) { + return false; + } const unusableUntil = resolveProfileUnusableUntil(stats); const ts = now ?? Date.now(); return unusableUntil ? ts < unusableUntil : false; @@ -212,6 +223,8 @@ export function clearExpiredCooldowns(store: AuthProfileStore, now?: number): bo if (cooldownExpired) { stats.cooldownUntil = undefined; + stats.cooldownReason = undefined; + stats.cooldownModel = undefined; profileMutated = true; } if (disabledExpired) { @@ -275,10 +288,9 @@ export async function markAuthProfileUsed(params: { export function calculateAuthProfileCooldownMs(errorCount: number): number { const normalized = Math.max(1, errorCount); - return Math.min( - 60 * 60 * 1000, // 1 hour max - 60 * 1000 * 5 ** Math.min(normalized - 1, 3), - ); + if (normalized <= 1) return 30_000; // 30 seconds + if (normalized <= 2) return 60_000; // 1 minute + return 5 * 60_000; // 5 minutes max } type ResolvedAuthCooldownConfig = { @@ -366,6 +378,8 @@ function resetUsageStats( ...existing, errorCount: 0, cooldownUntil: undefined, + cooldownReason: undefined, + cooldownModel: undefined, disabledUntil: undefined, disabledReason: undefined, failureCounts: undefined, @@ -398,6 +412,7 @@ function computeNextProfileUsageStats(params: { now: number; reason: AuthProfileFailureReason; cfgResolved: ResolvedAuthCooldownConfig; + modelId?: string; }): ProfileUsageStats { const windowMs = params.cfgResolved.failureWindowMs; const windowExpired = @@ -451,6 +466,19 @@ function computeNextProfileUsageStats(params: { now: params.now, recomputedUntil: params.now + backoffMs, }); + // Preserve existing cooldown metadata if the cooldown window is still + // active; otherwise record the new reason/model. + const existingCooldownActive = + typeof params.existing.cooldownUntil === "number" && + params.existing.cooldownUntil > params.now; + if (existingCooldownActive) { + updatedStats.cooldownReason = params.existing.cooldownReason; + updatedStats.cooldownModel = params.existing.cooldownModel; + } else { + updatedStats.cooldownReason = params.reason; + updatedStats.cooldownModel = + params.reason === "rate_limit" ? params.modelId : undefined; + } } return updatedStats; @@ -468,8 +496,9 @@ export async function markAuthProfileFailure(params: { cfg?: OpenClawConfig; agentDir?: string; runId?: string; + modelId?: string; }): Promise { - const { store, profileId, reason, agentDir, cfg, runId } = params; + const { store, profileId, reason, agentDir, cfg, runId, modelId } = params; const profile = store.profiles[profileId]; if (!profile || isAuthCooldownBypassedForProvider(profile.provider)) { return; @@ -498,6 +527,7 @@ export async function markAuthProfileFailure(params: { now, reason, cfgResolved, + modelId, }); nextStats = computed; updateUsageStatsEntry(freshStore, profileId, () => computed); @@ -536,6 +566,7 @@ export async function markAuthProfileFailure(params: { now, reason, cfgResolved, + modelId, }); nextStats = computed; updateUsageStatsEntry(store, profileId, () => computed); diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts index 5fd6e533a1a..8127fc263be 100644 --- a/src/agents/model-fallback.ts +++ b/src/agents/model-fallback.ts @@ -34,6 +34,32 @@ import { isLikelyContextOverflowError } from "./pi-embedded-helpers.js"; const log = createSubsystemLogger("model-fallback"); +/** + * Structured error thrown when all model fallback candidates have been + * exhausted. Carries per-attempt details so callers can build informative + * user-facing messages (e.g. "rate-limited, retry in 30 s"). + */ +export class FallbackSummaryError extends Error { + readonly attempts: FallbackAttempt[]; + readonly soonestCooldownExpiry: number | null; + + constructor( + message: string, + attempts: FallbackAttempt[], + soonestCooldownExpiry: number | null, + cause?: Error, + ) { + super(message, { cause }); + this.name = "FallbackSummaryError"; + this.attempts = attempts; + this.soonestCooldownExpiry = soonestCooldownExpiry; + } +} + +export function isFallbackSummaryError(err: unknown): err is FallbackSummaryError { + return err instanceof FallbackSummaryError; +} + export type ModelFallbackRunOptions = { allowTransientCooldownProbe?: boolean; }; @@ -189,17 +215,18 @@ function throwFallbackFailureSummary(params: { lastError: unknown; label: string; formatAttempt: (attempt: FallbackAttempt) => string; + soonestCooldownExpiry?: number | null; }): never { if (params.attempts.length <= 1 && params.lastError) { throw params.lastError; } const summary = params.attempts.length > 0 ? params.attempts.map(params.formatAttempt).join(" | ") : "unknown"; - throw new Error( + throw new FallbackSummaryError( `All ${params.label} failed (${params.attempts.length || params.candidates.length}): ${summary}`, - { - cause: params.lastError instanceof Error ? params.lastError : undefined, - }, + params.attempts, + params.soonestCooldownExpiry ?? null, + params.lastError instanceof Error ? params.lastError : undefined, ); } @@ -548,7 +575,7 @@ export async function runWithModelFallback(params: { store: authStore, provider: candidate.provider, }); - const isAnyProfileAvailable = profileIds.some((id) => !isProfileInCooldown(authStore, id)); + const isAnyProfileAvailable = profileIds.some((id) => !isProfileInCooldown(authStore, id, undefined, candidate.model)); if (profileIds.length > 0 && !isAnyProfileAvailable) { // All profiles for this provider are in cooldown. @@ -771,6 +798,19 @@ export async function runWithModelFallback(params: { `${attempt.provider}/${attempt.model}: ${attempt.error}${ attempt.reason ? ` (${attempt.reason})` : "" }`, + soonestCooldownExpiry: (() => { + if (!authStore) return null; + const allProfileIds = new Set(); + for (const c of candidates) { + const ids = resolveAuthProfileOrder({ + cfg: params.cfg, + store: authStore, + provider: c.provider, + }); + for (const id of ids) allProfileIds.add(id); + } + return getSoonestCooldownExpiry(authStore, [...allProfileIds]); + })(), }); } diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index a35c03d98ca..b5c55cf9d6d 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -832,6 +832,7 @@ export async function runEmbeddedPiAgent( reason?: AuthProfileFailureReason | null; config?: RunEmbeddedPiAgentParams["config"]; agentDir?: RunEmbeddedPiAgentParams["agentDir"]; + modelId?: string; }) => { const { profileId, reason } = failure; if (!profileId || !reason || reason === "timeout") { @@ -844,6 +845,7 @@ export async function runEmbeddedPiAgent( cfg: params.config, agentDir, runId: params.runId, + modelId: failure.modelId, }); }; const resolveAuthProfileFailureReason = ( @@ -1382,6 +1384,7 @@ export async function runEmbeddedPiAgent( await maybeMarkAuthProfileFailure({ profileId: lastProfileId, reason: promptProfileFailureReason, + modelId, }); const promptFailoverFailure = promptFailoverReason !== null || isFailoverErrorMessage(errorText); @@ -1523,6 +1526,7 @@ export async function runEmbeddedPiAgent( await maybeMarkAuthProfileFailure({ profileId: lastProfileId, reason, + modelId, }); if (timedOut && !isProbeSession) { log.warn(`Profile ${lastProfileId} timed out. Trying next account...`); diff --git a/src/auto-reply/reply/agent-runner-execution.ts b/src/auto-reply/reply/agent-runner-execution.ts index c25342e4a28..049958aec15 100644 --- a/src/auto-reply/reply/agent-runner-execution.ts +++ b/src/auto-reply/reply/agent-runner-execution.ts @@ -5,6 +5,7 @@ import { resolveBootstrapWarningSignaturesSeen } from "../../agents/bootstrap-bu import { runCliAgent } from "../../agents/cli-runner.js"; import { getCliSessionId } from "../../agents/cli-session.js"; import { runWithModelFallback } from "../../agents/model-fallback.js"; +import { isFallbackSummaryError } from "../../agents/model-fallback.js"; import { isCliProvider } from "../../agents/model-selection.js"; import { BILLING_ERROR_USER_MESSAGE, @@ -12,6 +13,7 @@ import { isContextOverflowError, isBillingErrorMessage, isLikelyContextOverflowError, + isRateLimitErrorMessage, isTransientHttpError, sanitizeUserFacingText, } from "../../agents/pi-embedded-helpers.js"; @@ -74,6 +76,26 @@ export type AgentRunLoopResult = } | { kind: "final"; payload: ReplyPayload }; +/** + * Build a human-friendly rate-limit message from a FallbackSummaryError. + * Includes a countdown when the soonest cooldown expiry is known. + */ +function buildCopilotCooldownMessage(err: unknown): string { + if (!isFallbackSummaryError(err)) { + return "⚠️ All models are temporarily rate-limited. Please try again in a few minutes."; + } + const expiry = err.soonestCooldownExpiry; + if (typeof expiry === "number" && expiry > Date.now()) { + const secsLeft = Math.ceil((expiry - Date.now()) / 1000); + if (secsLeft <= 60) { + return `⚠️ Rate-limited — ready in ~${secsLeft}s. Please wait a moment.`; + } + const minsLeft = Math.ceil(secsLeft / 60); + return `⚠️ Rate-limited — ready in ~${minsLeft} min. Please try again shortly.`; + } + return "⚠️ All models are temporarily rate-limited. Please try again in a few minutes."; +} + export async function runAgentTurnWithFallback(params: { commandBody: string; followupRun: FollowupRun; @@ -623,17 +645,21 @@ export async function runAgentTurnWithFallback(params: { } defaultRuntime.error(`Embedded agent failed before reply: ${message}`); + const isRateLimit = + isRateLimitErrorMessage(message) || isFallbackSummaryError(err); const safeMessage = isTransientHttp ? sanitizeUserFacingText(message, { errorContext: true }) : message; const trimmedMessage = safeMessage.replace(/\.\s*$/, ""); const fallbackText = isBilling ? BILLING_ERROR_USER_MESSAGE - : isContextOverflow - ? "⚠️ Context overflow — prompt too large for this model. Try a shorter message or a larger-context model." - : isRoleOrderingError - ? "⚠️ Message ordering conflict - please try again. If this persists, use /new to start a fresh session." - : `⚠️ Agent failed before reply: ${trimmedMessage}.\nLogs: openclaw logs --follow`; + : isRateLimit + ? buildCopilotCooldownMessage(err) + : isContextOverflow + ? "⚠️ Context overflow — prompt too large for this model. Try a shorter message or a larger-context model." + : isRoleOrderingError + ? "⚠️ Message ordering conflict - please try again. If this persists, use /new to start a fresh session." + : `⚠️ Agent failed before reply: ${trimmedMessage}.\nLogs: openclaw logs --follow`; return { kind: "final", From d51e76b0cd9e9a9417e4e7b25738d4c35b8e7bbc Mon Sep 17 00:00:00 2001 From: kiranvk2011 Date: Wed, 18 Mar 2026 14:02:39 +0000 Subject: [PATCH 2/8] =?UTF-8?q?fix:=20address=20review=20feedback=20?= =?UTF-8?q?=E2=80=94=20stale=20JSDoc,=20duplicate=20import,=20oxfmt=20form?= =?UTF-8?q?atting?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Update markAuthProfileCooldown JSDoc to reflect new stepped backoff (30s/1m/5m) - Merge duplicate isFallbackSummaryError import into single import statement - Run oxfmt on all changed files to fix formatting CI failure --- src/agents/auth-profiles/usage.ts | 12 +++++++----- src/agents/model-fallback.ts | 4 +++- src/auto-reply/reply/agent-runner-execution.ts | 12 +++++++++--- 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/src/agents/auth-profiles/usage.ts b/src/agents/auth-profiles/usage.ts index c587f3c6d0e..9bcd674d81c 100644 --- a/src/agents/auth-profiles/usage.ts +++ b/src/agents/auth-profiles/usage.ts @@ -55,11 +55,14 @@ export function isProfileInCooldown( } // Model-aware bypass: if the cooldown was caused by a rate_limit on a // specific model and the caller is requesting a *different* model, allow it. + // We still honour any active billing/auth disable (`disabledUntil`) — those + // are profile-wide and must not be short-circuited by model scoping. if ( forModel && stats.cooldownReason === "rate_limit" && stats.cooldownModel && - stats.cooldownModel !== forModel + stats.cooldownModel !== forModel && + !isActiveUnusableWindow(stats.disabledUntil, now ?? Date.now()) ) { return false; } @@ -476,8 +479,7 @@ function computeNextProfileUsageStats(params: { updatedStats.cooldownModel = params.existing.cooldownModel; } else { updatedStats.cooldownReason = params.reason; - updatedStats.cooldownModel = - params.reason === "rate_limit" ? params.modelId : undefined; + updatedStats.cooldownModel = params.reason === "rate_limit" ? params.modelId : undefined; } } @@ -583,8 +585,8 @@ export async function markAuthProfileFailure(params: { } /** - * Mark a profile as transiently failed. Applies exponential backoff cooldown. - * Cooldown times: 1min, 5min, 25min, max 1 hour. + * Mark a profile as transiently failed. Applies stepped backoff cooldown. + * Cooldown times: 30s, 1min, 5min (capped). * Uses store lock to avoid overwriting concurrent usage updates. */ export async function markAuthProfileCooldown(params: { diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts index 8127fc263be..7223c6c9dfa 100644 --- a/src/agents/model-fallback.ts +++ b/src/agents/model-fallback.ts @@ -575,7 +575,9 @@ export async function runWithModelFallback(params: { store: authStore, provider: candidate.provider, }); - const isAnyProfileAvailable = profileIds.some((id) => !isProfileInCooldown(authStore, id, undefined, candidate.model)); + const isAnyProfileAvailable = profileIds.some( + (id) => !isProfileInCooldown(authStore, id, undefined, candidate.model), + ); if (profileIds.length > 0 && !isAnyProfileAvailable) { // All profiles for this provider are in cooldown. diff --git a/src/auto-reply/reply/agent-runner-execution.ts b/src/auto-reply/reply/agent-runner-execution.ts index 049958aec15..855b352bfe2 100644 --- a/src/auto-reply/reply/agent-runner-execution.ts +++ b/src/auto-reply/reply/agent-runner-execution.ts @@ -4,8 +4,7 @@ import { resolveSendableOutboundReplyParts } from "openclaw/plugin-sdk/reply-pay import { resolveBootstrapWarningSignaturesSeen } from "../../agents/bootstrap-budget.js"; import { runCliAgent } from "../../agents/cli-runner.js"; import { getCliSessionId } from "../../agents/cli-session.js"; -import { runWithModelFallback } from "../../agents/model-fallback.js"; -import { isFallbackSummaryError } from "../../agents/model-fallback.js"; +import { runWithModelFallback, isFallbackSummaryError } from "../../agents/model-fallback.js"; import { isCliProvider } from "../../agents/model-selection.js"; import { BILLING_ERROR_USER_MESSAGE, @@ -645,8 +644,15 @@ export async function runAgentTurnWithFallback(params: { } defaultRuntime.error(`Embedded agent failed before reply: ${message}`); + // Only classify as rate-limit when we have concrete evidence: either + // the error message itself is a rate-limit string, or the fallback + // chain exhaustion includes at least one rate_limit / overloaded attempt. + // This avoids showing misleading "Rate-limited — ready in ~Xs" messages + // for auth, model_not_found, or other non-rate-limit failures. const isRateLimit = - isRateLimitErrorMessage(message) || isFallbackSummaryError(err); + isRateLimitErrorMessage(message) || + (isFallbackSummaryError(err) && + err.attempts.some((a) => a.reason === "rate_limit" || a.reason === "overloaded")); const safeMessage = isTransientHttp ? sanitizeUserFacingText(message, { errorContext: true }) : message; From 75661f59c10744116b9516e6e7cd38517c96e645 Mon Sep 17 00:00:00 2001 From: kiranvk2011 Date: Wed, 18 Mar 2026 14:37:30 +0000 Subject: [PATCH 3/8] fix: widen cooldown scope when different model fails during active window - When model A is cooling down and model B also fails, set cooldownModel to undefined so neither model bypasses via per-model scope - Same-model retries preserve the original cooldownModel - Add 8 new tests for per-model cooldown behavior: model-scoped bypass, profile-wide cooldown, billing-disable guard, scope-widening, same-model retry preservation - Update .some() comment to document intentional design choice for mixed fallback failure reasons --- src/agents/auth-profiles/usage.test.ts | 122 ++++++++++++++++++ src/agents/auth-profiles/usage.ts | 16 ++- .../reply/agent-runner-execution.ts | 5 +- 3 files changed, 138 insertions(+), 5 deletions(-) diff --git a/src/agents/auth-profiles/usage.test.ts b/src/agents/auth-profiles/usage.test.ts index ec9cd2e143e..8d633a7d89b 100644 --- a/src/agents/auth-profiles/usage.test.ts +++ b/src/agents/auth-profiles/usage.test.ts @@ -132,6 +132,53 @@ describe("isProfileInCooldown", () => { }); expect(isProfileInCooldown(store, "kilocode:default")).toBe(false); }); + + it("returns false for a different model when cooldown is model-scoped (rate_limit)", () => { + const store = makeStore({ + "github-copilot:github": { + cooldownUntil: Date.now() + 60_000, + cooldownReason: "rate_limit", + cooldownModel: "claude-sonnet-4.6", + }, + }); + // Different model bypasses the cooldown + expect(isProfileInCooldown(store, "github-copilot:github", undefined, "gpt-4.1")).toBe(false); + // Same model is still blocked + expect( + isProfileInCooldown(store, "github-copilot:github", undefined, "claude-sonnet-4.6"), + ).toBe(true); + // No model specified — blocked (conservative) + expect(isProfileInCooldown(store, "github-copilot:github")).toBe(true); + }); + + it("returns true for all models when cooldownModel is undefined (profile-wide)", () => { + const store = makeStore({ + "github-copilot:github": { + cooldownUntil: Date.now() + 60_000, + cooldownReason: "rate_limit", + cooldownModel: undefined, + }, + }); + expect( + isProfileInCooldown(store, "github-copilot:github", undefined, "claude-sonnet-4.6"), + ).toBe(true); + expect(isProfileInCooldown(store, "github-copilot:github", undefined, "gpt-4.1")).toBe(true); + }); + + it("does not bypass model-scoped cooldown when disabledUntil is active", () => { + const store = makeStore({ + "github-copilot:github": { + cooldownUntil: Date.now() + 60_000, + cooldownReason: "rate_limit", + cooldownModel: "claude-sonnet-4.6", + disabledUntil: Date.now() + 120_000, + disabledReason: "billing", + }, + }); + // Even though cooldownModel is for a different model, billing disable + // should keep the profile blocked for all models. + expect(isProfileInCooldown(store, "github-copilot:github", undefined, "gpt-4.1")).toBe(true); + }); }); describe("resolveProfilesUnavailableReason", () => { @@ -675,3 +722,78 @@ describe("markAuthProfileFailure — active windows do not extend on retry", () }); } }); + +describe("markAuthProfileFailure — per-model cooldown metadata", () => { + function makeStoreWithCopilot(usageStats: AuthProfileStore["usageStats"]): AuthProfileStore { + const store = makeStore(usageStats); + store.profiles["github-copilot:github"] = { + type: "api_key", + provider: "github-copilot", + key: "ghu_test", + }; + return store; + } + + async function markFailure(params: { + store: ReturnType; + now: number; + modelId?: string; + }): Promise { + vi.useFakeTimers(); + vi.setSystemTime(params.now); + try { + await markAuthProfileFailure({ + store: params.store, + profileId: "github-copilot:github", + reason: "rate_limit", + modelId: params.modelId, + }); + } finally { + vi.useRealTimers(); + } + } + + it("records cooldownModel on first rate_limit failure", async () => { + const now = 1_000_000; + const store = makeStoreWithCopilot({}); + await markFailure({ store, now, modelId: "claude-sonnet-4.6" }); + const stats = store.usageStats?.["github-copilot:github"]; + expect(stats?.cooldownReason).toBe("rate_limit"); + expect(stats?.cooldownModel).toBe("claude-sonnet-4.6"); + }); + + it("widens cooldownModel to undefined when a different model fails during active cooldown", async () => { + const now = 1_000_000; + const store = makeStoreWithCopilot({ + "github-copilot:github": { + cooldownUntil: now + 30_000, + cooldownReason: "rate_limit", + cooldownModel: "claude-sonnet-4.6", + errorCount: 1, + lastFailureAt: now - 1000, + }, + }); + // Different model fails during active cooldown + await markFailure({ store, now, modelId: "gpt-4.1" }); + const stats = store.usageStats?.["github-copilot:github"]; + // Scope widened to all models + expect(stats?.cooldownModel).toBeUndefined(); + expect(stats?.cooldownReason).toBe("rate_limit"); + }); + + it("preserves cooldownModel when the same model fails again during active cooldown", async () => { + const now = 1_000_000; + const store = makeStoreWithCopilot({ + "github-copilot:github": { + cooldownUntil: now + 30_000, + cooldownReason: "rate_limit", + cooldownModel: "claude-sonnet-4.6", + errorCount: 1, + lastFailureAt: now - 1000, + }, + }); + await markFailure({ store, now, modelId: "claude-sonnet-4.6" }); + const stats = store.usageStats?.["github-copilot:github"]; + expect(stats?.cooldownModel).toBe("claude-sonnet-4.6"); + }); +}); diff --git a/src/agents/auth-profiles/usage.ts b/src/agents/auth-profiles/usage.ts index 9bcd674d81c..4412e03ec27 100644 --- a/src/agents/auth-profiles/usage.ts +++ b/src/agents/auth-profiles/usage.ts @@ -469,14 +469,24 @@ function computeNextProfileUsageStats(params: { now: params.now, recomputedUntil: params.now + backoffMs, }); - // Preserve existing cooldown metadata if the cooldown window is still - // active; otherwise record the new reason/model. + // Update cooldown metadata based on whether the window is still active + // and whether the same or a different model is failing. const existingCooldownActive = typeof params.existing.cooldownUntil === "number" && params.existing.cooldownUntil > params.now; if (existingCooldownActive) { updatedStats.cooldownReason = params.existing.cooldownReason; - updatedStats.cooldownModel = params.existing.cooldownModel; + // If a different model fails during an active window, widen the scope + // to all models (undefined) so neither model bypasses the cooldown. + if ( + params.existing.cooldownModel && + params.modelId && + params.existing.cooldownModel !== params.modelId + ) { + updatedStats.cooldownModel = undefined; + } else { + updatedStats.cooldownModel = params.existing.cooldownModel; + } } else { updatedStats.cooldownReason = params.reason; updatedStats.cooldownModel = params.reason === "rate_limit" ? params.modelId : undefined; diff --git a/src/auto-reply/reply/agent-runner-execution.ts b/src/auto-reply/reply/agent-runner-execution.ts index 855b352bfe2..7bf940e5792 100644 --- a/src/auto-reply/reply/agent-runner-execution.ts +++ b/src/auto-reply/reply/agent-runner-execution.ts @@ -647,8 +647,9 @@ export async function runAgentTurnWithFallback(params: { // Only classify as rate-limit when we have concrete evidence: either // the error message itself is a rate-limit string, or the fallback // chain exhaustion includes at least one rate_limit / overloaded attempt. - // This avoids showing misleading "Rate-limited — ready in ~Xs" messages - // for auth, model_not_found, or other non-rate-limit failures. + // Using `.some()` intentionally: when any attempt is rate-limited, the + // countdown message is more actionable than the generic failure text, + // even if other attempts failed for different reasons (auth, etc.). const isRateLimit = isRateLimitErrorMessage(message) || (isFallbackSummaryError(err) && From d88de88962e268464b966f9f52befa26317f0ee2 Mon Sep 17 00:00:00 2001 From: kiranvk2011 Date: Wed, 18 Mar 2026 15:03:38 +0000 Subject: [PATCH 4/8] fix: lint curly braces + thread modelId into embedded runner cooldown checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add curly braces to single-line if/for bodies in usage.ts and model-fallback.ts to satisfy oxlint eslint(curly) rule - Thread modelId into all 3 isProfileInCooldown calls in pi-embedded-runner/run.ts (lines 719, 746, 767) so the inner profile loop respects per-model cooldown scope — fixes Codex P1 review comment about outer gate passing model-B while inner loop rejects it without model context --- src/agents/auth-profiles/usage.ts | 8 ++++++-- src/agents/model-fallback.ts | 8 ++++++-- src/agents/pi-embedded-runner/run.ts | 10 +++++++--- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/src/agents/auth-profiles/usage.ts b/src/agents/auth-profiles/usage.ts index 4412e03ec27..f9b7cfc975f 100644 --- a/src/agents/auth-profiles/usage.ts +++ b/src/agents/auth-profiles/usage.ts @@ -291,8 +291,12 @@ export async function markAuthProfileUsed(params: { export function calculateAuthProfileCooldownMs(errorCount: number): number { const normalized = Math.max(1, errorCount); - if (normalized <= 1) return 30_000; // 30 seconds - if (normalized <= 2) return 60_000; // 1 minute + if (normalized <= 1) { + return 30_000; // 30 seconds + } + if (normalized <= 2) { + return 60_000; // 1 minute + } return 5 * 60_000; // 5 minutes max } diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts index 7223c6c9dfa..ece9d59334c 100644 --- a/src/agents/model-fallback.ts +++ b/src/agents/model-fallback.ts @@ -801,7 +801,9 @@ export async function runWithModelFallback(params: { attempt.reason ? ` (${attempt.reason})` : "" }`, soonestCooldownExpiry: (() => { - if (!authStore) return null; + if (!authStore) { + return null; + } const allProfileIds = new Set(); for (const c of candidates) { const ids = resolveAuthProfileOrder({ @@ -809,7 +811,9 @@ export async function runWithModelFallback(params: { store: authStore, provider: c.provider, }); - for (const id of ids) allProfileIds.add(id); + for (const id of ids) { + allProfileIds.add(id); + } } return getSoonestCooldownExpiry(authStore, [...allProfileIds]); })(), diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index b5c55cf9d6d..00159222af0 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -716,7 +716,7 @@ export async function runEmbeddedPiAgent( let nextIndex = profileIndex + 1; while (nextIndex < profileCandidates.length) { const candidate = profileCandidates[nextIndex]; - if (candidate && isProfileInCooldown(authStore, candidate)) { + if (candidate && isProfileInCooldown(authStore, candidate, undefined, modelId)) { nextIndex += 1; continue; } @@ -743,7 +743,9 @@ export async function runEmbeddedPiAgent( ); const allAutoProfilesInCooldown = autoProfileCandidates.length > 0 && - autoProfileCandidates.every((candidate) => isProfileInCooldown(authStore, candidate)); + autoProfileCandidates.every((candidate) => + isProfileInCooldown(authStore, candidate, undefined, modelId), + ); const unavailableReason = allAutoProfilesInCooldown ? (resolveProfilesUnavailableReason({ store: authStore, @@ -762,7 +764,9 @@ export async function runEmbeddedPiAgent( while (profileIndex < profileCandidates.length) { const candidate = profileCandidates[profileIndex]; const inCooldown = - candidate && candidate !== lockedProfileId && isProfileInCooldown(authStore, candidate); + candidate && + candidate !== lockedProfileId && + isProfileInCooldown(authStore, candidate, undefined, modelId); if (inCooldown) { if (allowTransientCooldownProbe && !didTransientCooldownProbe) { didTransientCooldownProbe = true; From 8ea6f5206c8f4237525f1708e54753ad40c0ebf7 Mon Sep 17 00:00:00 2001 From: kiranvk2011 Date: Wed, 18 Mar 2026 15:36:32 +0000 Subject: [PATCH 5/8] fix: update cooldownReason on active-window failures + clear model scope for non-rate-limit --- src/agents/auth-profiles/usage.test.ts | 47 ++++++++++++++++++++++++++ src/agents/auth-profiles/usage.ts | 10 +++++- 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/src/agents/auth-profiles/usage.test.ts b/src/agents/auth-profiles/usage.test.ts index 8d633a7d89b..69d3d4ca200 100644 --- a/src/agents/auth-profiles/usage.test.ts +++ b/src/agents/auth-profiles/usage.test.ts @@ -796,4 +796,51 @@ describe("markAuthProfileFailure — per-model cooldown metadata", () => { const stats = store.usageStats?.["github-copilot:github"]; expect(stats?.cooldownModel).toBe("claude-sonnet-4.6"); }); + + it("updates cooldownReason when auth failure occurs during active rate_limit window", async () => { + const now = 1_000_000; + const store = makeStoreWithCopilot({ + "github-copilot:github": { + cooldownUntil: now + 30_000, + cooldownReason: "rate_limit", + cooldownModel: "claude-sonnet-4.6", + errorCount: 1, + lastFailureAt: now - 1000, + }, + }); + await markAuthProfileFailure({ + store, + profileId: "github-copilot:github", + reason: "auth", + modelId: "claude-opus-4.6", + }); + const stats = store.usageStats?.["github-copilot:github"]; + // Reason should update to the new failure type, not stay as rate_limit + expect(stats?.cooldownReason).toBe("auth"); + // Model scope should be cleared — auth failures are profile-wide + expect(stats?.cooldownModel).toBeUndefined(); + }); + + it("clears cooldownModel when non-rate_limit failure hits same model during active window", async () => { + const now = 1_000_000; + const store = makeStoreWithCopilot({ + "github-copilot:github": { + cooldownUntil: now + 30_000, + cooldownReason: "rate_limit", + cooldownModel: "claude-sonnet-4.6", + errorCount: 1, + lastFailureAt: now - 1000, + }, + }); + await markAuthProfileFailure({ + store, + profileId: "github-copilot:github", + reason: "auth", + modelId: "claude-sonnet-4.6", + }); + const stats = store.usageStats?.["github-copilot:github"]; + // Even same-model auth failure should clear model scope (auth is profile-wide) + expect(stats?.cooldownReason).toBe("auth"); + expect(stats?.cooldownModel).toBeUndefined(); + }); }); diff --git a/src/agents/auth-profiles/usage.ts b/src/agents/auth-profiles/usage.ts index f9b7cfc975f..84f9bdaa83b 100644 --- a/src/agents/auth-profiles/usage.ts +++ b/src/agents/auth-profiles/usage.ts @@ -479,7 +479,11 @@ function computeNextProfileUsageStats(params: { typeof params.existing.cooldownUntil === "number" && params.existing.cooldownUntil > params.now; if (existingCooldownActive) { - updatedStats.cooldownReason = params.existing.cooldownReason; + // Always use the latest failure reason so that downstream consumers + // (e.g. isProfileInCooldown model-bypass) see the most recent signal. + // A non-rate_limit failure (auth, billing, …) is profile-wide, so + // upgrading from rate_limit → auth correctly blocks all models. + updatedStats.cooldownReason = params.reason; // If a different model fails during an active window, widen the scope // to all models (undefined) so neither model bypasses the cooldown. if ( @@ -488,6 +492,10 @@ function computeNextProfileUsageStats(params: { params.existing.cooldownModel !== params.modelId ) { updatedStats.cooldownModel = undefined; + } else if (params.reason !== "rate_limit") { + // Non-rate-limit failures are profile-wide — clear model scope even + // when the same model fails, so that no model can bypass. + updatedStats.cooldownModel = undefined; } else { updatedStats.cooldownModel = params.existing.cooldownModel; } From ae9ad3c07048d46ddd7799b17c4968dca3cae382 Mon Sep 17 00:00:00 2001 From: kiranvk2011 Date: Wed, 18 Mar 2026 18:44:50 +0000 Subject: [PATCH 6/8] test: update markAuthProfileFailure tests for stepped 30s/1m/5m cooldown ladder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update the auth-profiles.markauthprofilefailure test suite to match the new stepped cooldown formula (30s → 1m → 5m cap) introduced in the first commit. The test was still asserting the old exponential backoff values (1m → 5m → 25m → 1h cap). Changes: - calculateAuthProfileCooldownMs assertions: 60s→30s, 5m→1m, 25m→5m, 1h→5m cap - 'resets error count when previous cooldown has expired' test: upper bound adjusted from 120s to 60s to match 30s base cooldown - Comments updated to reflect the stepped ladder Resolves merge-blocker review from @altaywtf. --- ...uth-profiles.markauthprofilefailure.test.ts | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/agents/auth-profiles.markauthprofilefailure.test.ts b/src/agents/auth-profiles.markauthprofilefailure.test.ts index 5c4d73197b3..af828e1ee52 100644 --- a/src/agents/auth-profiles.markauthprofilefailure.test.ts +++ b/src/agents/auth-profiles.markauthprofilefailure.test.ts @@ -230,12 +230,12 @@ describe("markAuthProfileFailure", () => { const stats = store.usageStats?.["anthropic:default"]; // Error count should reset to 1 (not escalate to 4) because the - // previous cooldown expired. Cooldown should be ~1 min, not ~60 min. + // previous cooldown expired. Cooldown should be ~30s, not ~5 min. expect(stats?.errorCount).toBe(1); expect(stats?.failureCounts?.rate_limit).toBe(1); const cooldownMs = (stats?.cooldownUntil ?? 0) - now; - // calculateAuthProfileCooldownMs(1) = 60_000 (1 minute) - expect(cooldownMs).toBeLessThan(120_000); + // calculateAuthProfileCooldownMs(1) = 30_000 (stepped: 30s → 1m → 5m) + expect(cooldownMs).toBeLessThan(60_000); expect(cooldownMs).toBeGreaterThan(0); } finally { fs.rmSync(agentDir, { recursive: true, force: true }); @@ -267,11 +267,11 @@ describe("markAuthProfileFailure", () => { }); describe("calculateAuthProfileCooldownMs", () => { - it("applies exponential backoff with a 1h cap", () => { - expect(calculateAuthProfileCooldownMs(1)).toBe(60_000); - expect(calculateAuthProfileCooldownMs(2)).toBe(5 * 60_000); - expect(calculateAuthProfileCooldownMs(3)).toBe(25 * 60_000); - expect(calculateAuthProfileCooldownMs(4)).toBe(60 * 60_000); - expect(calculateAuthProfileCooldownMs(5)).toBe(60 * 60_000); + it("applies stepped backoff with a 5-min cap", () => { + expect(calculateAuthProfileCooldownMs(1)).toBe(30_000); // 30 seconds + expect(calculateAuthProfileCooldownMs(2)).toBe(60_000); // 1 minute + expect(calculateAuthProfileCooldownMs(3)).toBe(5 * 60_000); // 5 minutes + expect(calculateAuthProfileCooldownMs(4)).toBe(5 * 60_000); // 5 minutes (cap) + expect(calculateAuthProfileCooldownMs(5)).toBe(5 * 60_000); // 5 minutes (cap) }); }); From 0eefc6d55f68be51d2ed1c156be1e3b2ab45229f Mon Sep 17 00:00:00 2001 From: kiranvk2011 Date: Wed, 18 Mar 2026 20:00:18 +0000 Subject: [PATCH 7/8] docs: add changelog fragment for per-model cooldown + stepped backoff --- changelog/fragments/cooldown-per-model-stepped-backoff.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog/fragments/cooldown-per-model-stepped-backoff.md diff --git a/changelog/fragments/cooldown-per-model-stepped-backoff.md b/changelog/fragments/cooldown-per-model-stepped-backoff.md new file mode 100644 index 00000000000..228bf0c6737 --- /dev/null +++ b/changelog/fragments/cooldown-per-model-stepped-backoff.md @@ -0,0 +1 @@ +- Agents/cooldowns: scope rate-limit cooldowns per model so one 429 no longer blocks every model on the same auth profile, replace the exponential 1 min → 1 h escalation with a stepped 30 s / 1 min / 5 min ladder, and surface a user-facing countdown message when all models are rate-limited. (#49834) Thanks @kiranvk-2011. From 6d58d2f381d74da1ffef86e47fdb1d93b03b1660 Mon Sep 17 00:00:00 2001 From: kiranvk2011 Date: Thu, 19 Mar 2026 11:26:54 +0000 Subject: [PATCH 8/8] ci: retrigger CI (all failures are pre-existing on main)