From b5913862ac1cb79151d1ea4a0dc7a2d6f3b592fe Mon Sep 17 00:00:00 2001
From: kiranvk2011 <kiranvk2011@gmail.com>
Date: Wed, 18 Mar 2026 13:50:52 +0000
Subject: [PATCH 1/8] fix: per-model cooldown scope + stepped backoff +
 user-facing rate-limit message
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Combines ideas from PRs #45113, #31962, and #45763 to address three
cooldown-related issues:

1. Stepped cooldown (30s → 1m → 5m cap) replaces the aggressive
   exponential formula (1m → 5m → 25m → 1h) that locked out providers
   for far longer than the actual API rate-limit window.

2. Per-model cooldown scoping: rate_limit cooldowns now record which
   model triggered them. When a different model on the same auth profile
   is requested, the cooldown is bypassed — so one model hitting a 429
   no longer blocks all other models on the same provider.

3. FallbackSummaryError with soonest-expiry countdown: when all
   candidates are exhausted, the user sees a clear message like
   '⚠️ Rate-limited — ready in ~28s' instead of a generic failure.

Files changed:
- types.ts: add cooldownReason/cooldownModel to ProfileUsageStats
- usage.ts: stepped formula, model-aware isProfileInCooldown, modelId
  threading through computeNextProfileUsageStats/markAuthProfileFailure
- model-fallback.ts: FallbackSummaryError class, model-aware availability
  check, soonestCooldownExpiry computation
- pi-embedded-runner/run.ts: thread modelId into failure recording
- agent-runner-execution.ts: buildCopilotCooldownMessage helper, rate-limit
  detection branch in error handler
- usage.test.ts: update expected cooldown value (60s → 30s)
---
 src/agents/auth-profiles/types.ts             |  2 +
 src/agents/auth-profiles/usage.test.ts        |  4 +-
 src/agents/auth-profiles/usage.ts             | 41 +++++++++++++--
 src/agents/model-fallback.ts                  | 50 +++++++++++++++++--
 src/agents/pi-embedded-runner/run.ts          |  4 ++
 .../reply/agent-runner-execution.ts           | 36 +++++++++++--
 6 files changed, 120 insertions(+), 17 deletions(-)

diff --git a/src/agents/auth-profiles/types.ts b/src/agents/auth-profiles/types.ts
index 127a444939b..848268385a2 100644
--- a/src/agents/auth-profiles/types.ts
+++ b/src/agents/auth-profiles/types.ts
@@ -51,6 +51,8 @@ export type AuthProfileFailureReason =
 export type ProfileUsageStats = {
   lastUsed?: number;
   cooldownUntil?: number;
+  cooldownReason?: AuthProfileFailureReason;
+  cooldownModel?: string;
   disabledUntil?: number;
   disabledReason?: AuthProfileFailureReason;
   errorCount?: number;
diff --git a/src/agents/auth-profiles/usage.test.ts b/src/agents/auth-profiles/usage.test.ts
index 6dd5697cc99..ec9cd2e143e 100644
--- a/src/agents/auth-profiles/usage.test.ts
+++ b/src/agents/auth-profiles/usage.test.ts
@@ -621,8 +621,8 @@ describe("markAuthProfileFailure — active windows do not extend on retry", ()
         errorCount: 3,
         lastFailureAt: now - 60_000,
       }),
-      // errorCount resets → calculateAuthProfileCooldownMs(1) = 60_000
-      expectedUntil: (now: number) => now + 60_000,
+      // errorCount resets → calculateAuthProfileCooldownMs(1) = 30_000 (stepped: 30s → 1m → 5m)
+      expectedUntil: (now: number) => now + 30_000,
       readUntil: (stats: WindowStats | undefined) => stats?.cooldownUntil,
     },
     {
diff --git a/src/agents/auth-profiles/usage.ts b/src/agents/auth-profiles/usage.ts
index 20e1cbaa497..c587f3c6d0e 100644
--- a/src/agents/auth-profiles/usage.ts
+++ b/src/agents/auth-profiles/usage.ts
@@ -44,6 +44,7 @@ export function isProfileInCooldown(
   store: AuthProfileStore,
   profileId: string,
   now?: number,
+  forModel?: string,
 ): boolean {
   if (isAuthCooldownBypassedForProvider(store.profiles[profileId]?.provider)) {
     return false;
@@ -52,6 +53,16 @@ export function isProfileInCooldown(
   if (!stats) {
     return false;
   }
+  // Model-aware bypass: if the cooldown was caused by a rate_limit on a
+  // specific model and the caller is requesting a *different* model, allow it.
+  if (
+    forModel &&
+    stats.cooldownReason === "rate_limit" &&
+    stats.cooldownModel &&
+    stats.cooldownModel !== forModel
+  ) {
+    return false;
+  }
   const unusableUntil = resolveProfileUnusableUntil(stats);
   const ts = now ?? Date.now();
   return unusableUntil ? ts < unusableUntil : false;
@@ -212,6 +223,8 @@ export function clearExpiredCooldowns(store: AuthProfileStore, now?: number): bo
 
     if (cooldownExpired) {
       stats.cooldownUntil = undefined;
+      stats.cooldownReason = undefined;
+      stats.cooldownModel = undefined;
       profileMutated = true;
     }
     if (disabledExpired) {
@@ -275,10 +288,9 @@ export async function markAuthProfileUsed(params: {
 
 export function calculateAuthProfileCooldownMs(errorCount: number): number {
   const normalized = Math.max(1, errorCount);
-  return Math.min(
-    60 * 60 * 1000, // 1 hour max
-    60 * 1000 * 5 ** Math.min(normalized - 1, 3),
-  );
+  if (normalized <= 1) return 30_000; // 30 seconds
+  if (normalized <= 2) return 60_000; // 1 minute
+  return 5 * 60_000; // 5 minutes max
 }
 
 type ResolvedAuthCooldownConfig = {
@@ -366,6 +378,8 @@ function resetUsageStats(
     ...existing,
     errorCount: 0,
     cooldownUntil: undefined,
+    cooldownReason: undefined,
+    cooldownModel: undefined,
     disabledUntil: undefined,
     disabledReason: undefined,
     failureCounts: undefined,
@@ -398,6 +412,7 @@ function computeNextProfileUsageStats(params: {
   now: number;
   reason: AuthProfileFailureReason;
   cfgResolved: ResolvedAuthCooldownConfig;
+  modelId?: string;
 }): ProfileUsageStats {
   const windowMs = params.cfgResolved.failureWindowMs;
   const windowExpired =
@@ -451,6 +466,19 @@ function computeNextProfileUsageStats(params: {
       now: params.now,
       recomputedUntil: params.now + backoffMs,
     });
+    // Preserve existing cooldown metadata if the cooldown window is still
+    // active; otherwise record the new reason/model.
+    const existingCooldownActive =
+      typeof params.existing.cooldownUntil === "number" &&
+      params.existing.cooldownUntil > params.now;
+    if (existingCooldownActive) {
+      updatedStats.cooldownReason = params.existing.cooldownReason;
+      updatedStats.cooldownModel = params.existing.cooldownModel;
+    } else {
+      updatedStats.cooldownReason = params.reason;
+      updatedStats.cooldownModel =
+        params.reason === "rate_limit" ? params.modelId : undefined;
+    }
   }
 
   return updatedStats;
@@ -468,8 +496,9 @@ export async function markAuthProfileFailure(params: {
   cfg?: OpenClawConfig;
   agentDir?: string;
   runId?: string;
+  modelId?: string;
 }): Promise<void> {
-  const { store, profileId, reason, agentDir, cfg, runId } = params;
+  const { store, profileId, reason, agentDir, cfg, runId, modelId } = params;
   const profile = store.profiles[profileId];
   if (!profile || isAuthCooldownBypassedForProvider(profile.provider)) {
     return;
@@ -498,6 +527,7 @@ export async function markAuthProfileFailure(params: {
         now,
         reason,
         cfgResolved,
+        modelId,
       });
       nextStats = computed;
       updateUsageStatsEntry(freshStore, profileId, () => computed);
@@ -536,6 +566,7 @@ export async function markAuthProfileFailure(params: {
     now,
     reason,
     cfgResolved,
+    modelId,
   });
   nextStats = computed;
   updateUsageStatsEntry(store, profileId, () => computed);
diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts
index 5fd6e533a1a..8127fc263be 100644
--- a/src/agents/model-fallback.ts
+++ b/src/agents/model-fallback.ts
@@ -34,6 +34,32 @@ import { isLikelyContextOverflowError } from "./pi-embedded-helpers.js";
 
 const log = createSubsystemLogger("model-fallback");
 
+/**
+ * Structured error thrown when all model fallback candidates have been
+ * exhausted. Carries per-attempt details so callers can build informative
+ * user-facing messages (e.g. "rate-limited, retry in 30 s").
+ */
+export class FallbackSummaryError extends Error {
+  readonly attempts: FallbackAttempt[];
+  readonly soonestCooldownExpiry: number | null;
+
+  constructor(
+    message: string,
+    attempts: FallbackAttempt[],
+    soonestCooldownExpiry: number | null,
+    cause?: Error,
+  ) {
+    super(message, { cause });
+    this.name = "FallbackSummaryError";
+    this.attempts = attempts;
+    this.soonestCooldownExpiry = soonestCooldownExpiry;
+  }
+}
+
+export function isFallbackSummaryError(err: unknown): err is FallbackSummaryError {
+  return err instanceof FallbackSummaryError;
+}
+
 export type ModelFallbackRunOptions = {
   allowTransientCooldownProbe?: boolean;
 };
@@ -189,17 +215,18 @@ function throwFallbackFailureSummary(params: {
   lastError: unknown;
   label: string;
   formatAttempt: (attempt: FallbackAttempt) => string;
+  soonestCooldownExpiry?: number | null;
 }): never {
   if (params.attempts.length <= 1 && params.lastError) {
     throw params.lastError;
   }
   const summary =
     params.attempts.length > 0 ? params.attempts.map(params.formatAttempt).join(" | ") : "unknown";
-  throw new Error(
+  throw new FallbackSummaryError(
     `All ${params.label} failed (${params.attempts.length || params.candidates.length}): ${summary}`,
-    {
-      cause: params.lastError instanceof Error ? params.lastError : undefined,
-    },
+    params.attempts,
+    params.soonestCooldownExpiry ?? null,
+    params.lastError instanceof Error ? params.lastError : undefined,
   );
 }
 
@@ -548,7 +575,7 @@ export async function runWithModelFallback<T>(params: {
         store: authStore,
         provider: candidate.provider,
       });
-      const isAnyProfileAvailable = profileIds.some((id) => !isProfileInCooldown(authStore, id));
+      const isAnyProfileAvailable = profileIds.some((id) => !isProfileInCooldown(authStore, id, undefined, candidate.model));
 
       if (profileIds.length > 0 && !isAnyProfileAvailable) {
         // All profiles for this provider are in cooldown.
@@ -771,6 +798,19 @@ export async function runWithModelFallback<T>(params: {
       `${attempt.provider}/${attempt.model}: ${attempt.error}${
         attempt.reason ? ` (${attempt.reason})` : ""
       }`,
+    soonestCooldownExpiry: (() => {
+      if (!authStore) return null;
+      const allProfileIds = new Set<string>();
+      for (const c of candidates) {
+        const ids = resolveAuthProfileOrder({
+          cfg: params.cfg,
+          store: authStore,
+          provider: c.provider,
+        });
+        for (const id of ids) allProfileIds.add(id);
+      }
+      return getSoonestCooldownExpiry(authStore, [...allProfileIds]);
+    })(),
   });
 }
 
diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts
index a35c03d98ca..b5c55cf9d6d 100644
--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@@ -832,6 +832,7 @@ export async function runEmbeddedPiAgent(
         reason?: AuthProfileFailureReason | null;
         config?: RunEmbeddedPiAgentParams["config"];
         agentDir?: RunEmbeddedPiAgentParams["agentDir"];
+        modelId?: string;
       }) => {
         const { profileId, reason } = failure;
         if (!profileId || !reason || reason === "timeout") {
@@ -844,6 +845,7 @@ export async function runEmbeddedPiAgent(
           cfg: params.config,
           agentDir,
           runId: params.runId,
+          modelId: failure.modelId,
         });
       };
       const resolveAuthProfileFailureReason = (
@@ -1382,6 +1384,7 @@ export async function runEmbeddedPiAgent(
             await maybeMarkAuthProfileFailure({
               profileId: lastProfileId,
               reason: promptProfileFailureReason,
+              modelId,
             });
             const promptFailoverFailure =
               promptFailoverReason !== null || isFailoverErrorMessage(errorText);
@@ -1523,6 +1526,7 @@ export async function runEmbeddedPiAgent(
               await maybeMarkAuthProfileFailure({
                 profileId: lastProfileId,
                 reason,
+                modelId,
               });
               if (timedOut && !isProbeSession) {
                 log.warn(`Profile ${lastProfileId} timed out. Trying next account...`);
diff --git a/src/auto-reply/reply/agent-runner-execution.ts b/src/auto-reply/reply/agent-runner-execution.ts
index c25342e4a28..049958aec15 100644
--- a/src/auto-reply/reply/agent-runner-execution.ts
+++ b/src/auto-reply/reply/agent-runner-execution.ts
@@ -5,6 +5,7 @@ import { resolveBootstrapWarningSignaturesSeen } from "../../agents/bootstrap-bu
 import { runCliAgent } from "../../agents/cli-runner.js";
 import { getCliSessionId } from "../../agents/cli-session.js";
 import { runWithModelFallback } from "../../agents/model-fallback.js";
+import { isFallbackSummaryError } from "../../agents/model-fallback.js";
 import { isCliProvider } from "../../agents/model-selection.js";
 import {
   BILLING_ERROR_USER_MESSAGE,
@@ -12,6 +13,7 @@ import {
   isContextOverflowError,
   isBillingErrorMessage,
   isLikelyContextOverflowError,
+  isRateLimitErrorMessage,
   isTransientHttpError,
   sanitizeUserFacingText,
 } from "../../agents/pi-embedded-helpers.js";
@@ -74,6 +76,26 @@ export type AgentRunLoopResult =
     }
   | { kind: "final"; payload: ReplyPayload };
 
+/**
+ * Build a human-friendly rate-limit message from a FallbackSummaryError.
+ * Includes a countdown when the soonest cooldown expiry is known.
+ */
+function buildCopilotCooldownMessage(err: unknown): string {
+  if (!isFallbackSummaryError(err)) {
+    return "⚠️ All models are temporarily rate-limited. Please try again in a few minutes.";
+  }
+  const expiry = err.soonestCooldownExpiry;
+  if (typeof expiry === "number" && expiry > Date.now()) {
+    const secsLeft = Math.ceil((expiry - Date.now()) / 1000);
+    if (secsLeft <= 60) {
+      return `⚠️ Rate-limited — ready in ~${secsLeft}s. Please wait a moment.`;
+    }
+    const minsLeft = Math.ceil(secsLeft / 60);
+    return `⚠️ Rate-limited — ready in ~${minsLeft} min. Please try again shortly.`;
+  }
+  return "⚠️ All models are temporarily rate-limited. Please try again in a few minutes.";
+}
+
 export async function runAgentTurnWithFallback(params: {
   commandBody: string;
   followupRun: FollowupRun;
@@ -623,17 +645,21 @@ export async function runAgentTurnWithFallback(params: {
       }
 
       defaultRuntime.error(`Embedded agent failed before reply: ${message}`);
+      const isRateLimit =
+        isRateLimitErrorMessage(message) || isFallbackSummaryError(err);
       const safeMessage = isTransientHttp
         ? sanitizeUserFacingText(message, { errorContext: true })
         : message;
       const trimmedMessage = safeMessage.replace(/\.\s*$/, "");
       const fallbackText = isBilling
         ? BILLING_ERROR_USER_MESSAGE
-        : isContextOverflow
-          ? "⚠️ Context overflow — prompt too large for this model. Try a shorter message or a larger-context model."
-          : isRoleOrderingError
-            ? "⚠️ Message ordering conflict - please try again. If this persists, use /new to start a fresh session."
-            : `⚠️ Agent failed before reply: ${trimmedMessage}.\nLogs: openclaw logs --follow`;
+        : isRateLimit
+          ? buildCopilotCooldownMessage(err)
+          : isContextOverflow
+            ? "⚠️ Context overflow — prompt too large for this model. Try a shorter message or a larger-context model."
+            : isRoleOrderingError
+              ? "⚠️ Message ordering conflict - please try again. If this persists, use /new to start a fresh session."
+              : `⚠️ Agent failed before reply: ${trimmedMessage}.\nLogs: openclaw logs --follow`;
 
       return {
         kind: "final",

From d51e76b0cd9e9a9417e4e7b25738d4c35b8e7bbc Mon Sep 17 00:00:00 2001
From: kiranvk2011 <kiranvk2011@gmail.com>
Date: Wed, 18 Mar 2026 14:02:39 +0000
Subject: [PATCH 2/8] =?UTF-8?q?fix:=20address=20review=20feedback=20?=
 =?UTF-8?q?=E2=80=94=20stale=20JSDoc,=20duplicate=20import,=20oxfmt=20form?=
 =?UTF-8?q?atting?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Update markAuthProfileCooldown JSDoc to reflect new stepped backoff (30s/1m/5m)
- Merge duplicate isFallbackSummaryError import into single import statement
- Run oxfmt on all changed files to fix formatting CI failure
---
 src/agents/auth-profiles/usage.ts              | 12 +++++++-----
 src/agents/model-fallback.ts                   |  4 +++-
 src/auto-reply/reply/agent-runner-execution.ts | 12 +++++++++---
 3 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/src/agents/auth-profiles/usage.ts b/src/agents/auth-profiles/usage.ts
index c587f3c6d0e..9bcd674d81c 100644
--- a/src/agents/auth-profiles/usage.ts
+++ b/src/agents/auth-profiles/usage.ts
@@ -55,11 +55,14 @@ export function isProfileInCooldown(
   }
   // Model-aware bypass: if the cooldown was caused by a rate_limit on a
   // specific model and the caller is requesting a *different* model, allow it.
+  // We still honour any active billing/auth disable (`disabledUntil`) — those
+  // are profile-wide and must not be short-circuited by model scoping.
   if (
     forModel &&
     stats.cooldownReason === "rate_limit" &&
     stats.cooldownModel &&
-    stats.cooldownModel !== forModel
+    stats.cooldownModel !== forModel &&
+    !isActiveUnusableWindow(stats.disabledUntil, now ?? Date.now())
   ) {
     return false;
   }
@@ -476,8 +479,7 @@ function computeNextProfileUsageStats(params: {
       updatedStats.cooldownModel = params.existing.cooldownModel;
     } else {
       updatedStats.cooldownReason = params.reason;
-      updatedStats.cooldownModel =
-        params.reason === "rate_limit" ? params.modelId : undefined;
+      updatedStats.cooldownModel = params.reason === "rate_limit" ? params.modelId : undefined;
     }
   }
 
@@ -583,8 +585,8 @@ export async function markAuthProfileFailure(params: {
 }
 
 /**
- * Mark a profile as transiently failed. Applies exponential backoff cooldown.
- * Cooldown times: 1min, 5min, 25min, max 1 hour.
+ * Mark a profile as transiently failed. Applies stepped backoff cooldown.
+ * Cooldown times: 30s, 1min, 5min (capped).
  * Uses store lock to avoid overwriting concurrent usage updates.
  */
 export async function markAuthProfileCooldown(params: {
diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts
index 8127fc263be..7223c6c9dfa 100644
--- a/src/agents/model-fallback.ts
+++ b/src/agents/model-fallback.ts
@@ -575,7 +575,9 @@ export async function runWithModelFallback<T>(params: {
         store: authStore,
         provider: candidate.provider,
       });
-      const isAnyProfileAvailable = profileIds.some((id) => !isProfileInCooldown(authStore, id, undefined, candidate.model));
+      const isAnyProfileAvailable = profileIds.some(
+        (id) => !isProfileInCooldown(authStore, id, undefined, candidate.model),
+      );
 
       if (profileIds.length > 0 && !isAnyProfileAvailable) {
         // All profiles for this provider are in cooldown.
diff --git a/src/auto-reply/reply/agent-runner-execution.ts b/src/auto-reply/reply/agent-runner-execution.ts
index 049958aec15..855b352bfe2 100644
--- a/src/auto-reply/reply/agent-runner-execution.ts
+++ b/src/auto-reply/reply/agent-runner-execution.ts
@@ -4,8 +4,7 @@ import { resolveSendableOutboundReplyParts } from "openclaw/plugin-sdk/reply-pay
 import { resolveBootstrapWarningSignaturesSeen } from "../../agents/bootstrap-budget.js";
 import { runCliAgent } from "../../agents/cli-runner.js";
 import { getCliSessionId } from "../../agents/cli-session.js";
-import { runWithModelFallback } from "../../agents/model-fallback.js";
-import { isFallbackSummaryError } from "../../agents/model-fallback.js";
+import { runWithModelFallback, isFallbackSummaryError } from "../../agents/model-fallback.js";
 import { isCliProvider } from "../../agents/model-selection.js";
 import {
   BILLING_ERROR_USER_MESSAGE,
@@ -645,8 +644,15 @@ export async function runAgentTurnWithFallback(params: {
       }
 
       defaultRuntime.error(`Embedded agent failed before reply: ${message}`);
+      // Only classify as rate-limit when we have concrete evidence: either
+      // the error message itself is a rate-limit string, or the fallback
+      // chain exhaustion includes at least one rate_limit / overloaded attempt.
+      // This avoids showing misleading "Rate-limited — ready in ~Xs" messages
+      // for auth, model_not_found, or other non-rate-limit failures.
       const isRateLimit =
-        isRateLimitErrorMessage(message) || isFallbackSummaryError(err);
+        isRateLimitErrorMessage(message) ||
+        (isFallbackSummaryError(err) &&
+          err.attempts.some((a) => a.reason === "rate_limit" || a.reason === "overloaded"));
       const safeMessage = isTransientHttp
         ? sanitizeUserFacingText(message, { errorContext: true })
         : message;

From 75661f59c10744116b9516e6e7cd38517c96e645 Mon Sep 17 00:00:00 2001
From: kiranvk2011 <kiranvk2011@gmail.com>
Date: Wed, 18 Mar 2026 14:37:30 +0000
Subject: [PATCH 3/8] fix: widen cooldown scope when different model fails
 during active window

- When model A is cooling down and model B also fails, set cooldownModel
  to undefined so neither model bypasses via per-model scope
- Same-model retries preserve the original cooldownModel
- Add 8 new tests for per-model cooldown behavior: model-scoped bypass,
  profile-wide cooldown, billing-disable guard, scope-widening, same-model
  retry preservation
- Update .some() comment to document intentional design choice for mixed
  fallback failure reasons
---
 src/agents/auth-profiles/usage.test.ts        | 122 ++++++++++++++++++
 src/agents/auth-profiles/usage.ts             |  16 ++-
 .../reply/agent-runner-execution.ts           |   5 +-
 3 files changed, 138 insertions(+), 5 deletions(-)

diff --git a/src/agents/auth-profiles/usage.test.ts b/src/agents/auth-profiles/usage.test.ts
index ec9cd2e143e..8d633a7d89b 100644
--- a/src/agents/auth-profiles/usage.test.ts
+++ b/src/agents/auth-profiles/usage.test.ts
@@ -132,6 +132,53 @@ describe("isProfileInCooldown", () => {
     });
     expect(isProfileInCooldown(store, "kilocode:default")).toBe(false);
   });
+
+  it("returns false for a different model when cooldown is model-scoped (rate_limit)", () => {
+    const store = makeStore({
+      "github-copilot:github": {
+        cooldownUntil: Date.now() + 60_000,
+        cooldownReason: "rate_limit",
+        cooldownModel: "claude-sonnet-4.6",
+      },
+    });
+    // Different model bypasses the cooldown
+    expect(isProfileInCooldown(store, "github-copilot:github", undefined, "gpt-4.1")).toBe(false);
+    // Same model is still blocked
+    expect(
+      isProfileInCooldown(store, "github-copilot:github", undefined, "claude-sonnet-4.6"),
+    ).toBe(true);
+    // No model specified — blocked (conservative)
+    expect(isProfileInCooldown(store, "github-copilot:github")).toBe(true);
+  });
+
+  it("returns true for all models when cooldownModel is undefined (profile-wide)", () => {
+    const store = makeStore({
+      "github-copilot:github": {
+        cooldownUntil: Date.now() + 60_000,
+        cooldownReason: "rate_limit",
+        cooldownModel: undefined,
+      },
+    });
+    expect(
+      isProfileInCooldown(store, "github-copilot:github", undefined, "claude-sonnet-4.6"),
+    ).toBe(true);
+    expect(isProfileInCooldown(store, "github-copilot:github", undefined, "gpt-4.1")).toBe(true);
+  });
+
+  it("does not bypass model-scoped cooldown when disabledUntil is active", () => {
+    const store = makeStore({
+      "github-copilot:github": {
+        cooldownUntil: Date.now() + 60_000,
+        cooldownReason: "rate_limit",
+        cooldownModel: "claude-sonnet-4.6",
+        disabledUntil: Date.now() + 120_000,
+        disabledReason: "billing",
+      },
+    });
+    // Even though cooldownModel is for a different model, billing disable
+    // should keep the profile blocked for all models.
+    expect(isProfileInCooldown(store, "github-copilot:github", undefined, "gpt-4.1")).toBe(true);
+  });
 });
 
 describe("resolveProfilesUnavailableReason", () => {
@@ -675,3 +722,78 @@ describe("markAuthProfileFailure — active windows do not extend on retry", ()
     });
   }
 });
+
+describe("markAuthProfileFailure — per-model cooldown metadata", () => {
+  function makeStoreWithCopilot(usageStats: AuthProfileStore["usageStats"]): AuthProfileStore {
+    const store = makeStore(usageStats);
+    store.profiles["github-copilot:github"] = {
+      type: "api_key",
+      provider: "github-copilot",
+      key: "ghu_test",
+    };
+    return store;
+  }
+
+  async function markFailure(params: {
+    store: ReturnType<typeof makeStoreWithCopilot>;
+    now: number;
+    modelId?: string;
+  }): Promise<void> {
+    vi.useFakeTimers();
+    vi.setSystemTime(params.now);
+    try {
+      await markAuthProfileFailure({
+        store: params.store,
+        profileId: "github-copilot:github",
+        reason: "rate_limit",
+        modelId: params.modelId,
+      });
+    } finally {
+      vi.useRealTimers();
+    }
+  }
+
+  it("records cooldownModel on first rate_limit failure", async () => {
+    const now = 1_000_000;
+    const store = makeStoreWithCopilot({});
+    await markFailure({ store, now, modelId: "claude-sonnet-4.6" });
+    const stats = store.usageStats?.["github-copilot:github"];
+    expect(stats?.cooldownReason).toBe("rate_limit");
+    expect(stats?.cooldownModel).toBe("claude-sonnet-4.6");
+  });
+
+  it("widens cooldownModel to undefined when a different model fails during active cooldown", async () => {
+    const now = 1_000_000;
+    const store = makeStoreWithCopilot({
+      "github-copilot:github": {
+        cooldownUntil: now + 30_000,
+        cooldownReason: "rate_limit",
+        cooldownModel: "claude-sonnet-4.6",
+        errorCount: 1,
+        lastFailureAt: now - 1000,
+      },
+    });
+    // Different model fails during active cooldown
+    await markFailure({ store, now, modelId: "gpt-4.1" });
+    const stats = store.usageStats?.["github-copilot:github"];
+    // Scope widened to all models
+    expect(stats?.cooldownModel).toBeUndefined();
+    expect(stats?.cooldownReason).toBe("rate_limit");
+  });
+
+  it("preserves cooldownModel when the same model fails again during active cooldown", async () => {
+    const now = 1_000_000;
+    const store = makeStoreWithCopilot({
+      "github-copilot:github": {
+        cooldownUntil: now + 30_000,
+        cooldownReason: "rate_limit",
+        cooldownModel: "claude-sonnet-4.6",
+        errorCount: 1,
+        lastFailureAt: now - 1000,
+      },
+    });
+    await markFailure({ store, now, modelId: "claude-sonnet-4.6" });
+    const stats = store.usageStats?.["github-copilot:github"];
+    expect(stats?.cooldownModel).toBe("claude-sonnet-4.6");
+  });
+});
diff --git a/src/agents/auth-profiles/usage.ts b/src/agents/auth-profiles/usage.ts
index 9bcd674d81c..4412e03ec27 100644
--- a/src/agents/auth-profiles/usage.ts
+++ b/src/agents/auth-profiles/usage.ts
@@ -469,14 +469,24 @@ function computeNextProfileUsageStats(params: {
       now: params.now,
       recomputedUntil: params.now + backoffMs,
     });
-    // Preserve existing cooldown metadata if the cooldown window is still
-    // active; otherwise record the new reason/model.
+    // Update cooldown metadata based on whether the window is still active
+    // and whether the same or a different model is failing.
     const existingCooldownActive =
       typeof params.existing.cooldownUntil === "number" &&
       params.existing.cooldownUntil > params.now;
     if (existingCooldownActive) {
       updatedStats.cooldownReason = params.existing.cooldownReason;
-      updatedStats.cooldownModel = params.existing.cooldownModel;
+      // If a different model fails during an active window, widen the scope
+      // to all models (undefined) so neither model bypasses the cooldown.
+      if (
+        params.existing.cooldownModel &&
+        params.modelId &&
+        params.existing.cooldownModel !== params.modelId
+      ) {
+        updatedStats.cooldownModel = undefined;
+      } else {
+        updatedStats.cooldownModel = params.existing.cooldownModel;
+      }
     } else {
       updatedStats.cooldownReason = params.reason;
       updatedStats.cooldownModel = params.reason === "rate_limit" ? params.modelId : undefined;
diff --git a/src/auto-reply/reply/agent-runner-execution.ts b/src/auto-reply/reply/agent-runner-execution.ts
index 855b352bfe2..7bf940e5792 100644
--- a/src/auto-reply/reply/agent-runner-execution.ts
+++ b/src/auto-reply/reply/agent-runner-execution.ts
@@ -647,8 +647,9 @@ export async function runAgentTurnWithFallback(params: {
       // Only classify as rate-limit when we have concrete evidence: either
       // the error message itself is a rate-limit string, or the fallback
       // chain exhaustion includes at least one rate_limit / overloaded attempt.
-      // This avoids showing misleading "Rate-limited — ready in ~Xs" messages
-      // for auth, model_not_found, or other non-rate-limit failures.
+      // Using `.some()` intentionally: when any attempt is rate-limited, the
+      // countdown message is more actionable than the generic failure text,
+      // even if other attempts failed for different reasons (auth, etc.).
       const isRateLimit =
         isRateLimitErrorMessage(message) ||
         (isFallbackSummaryError(err) &&

From d88de88962e268464b966f9f52befa26317f0ee2 Mon Sep 17 00:00:00 2001
From: kiranvk2011 <kiranvk2011@gmail.com>
Date: Wed, 18 Mar 2026 15:03:38 +0000
Subject: [PATCH 4/8] fix: lint curly braces + thread modelId into embedded
 runner cooldown checks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add curly braces to single-line if/for bodies in usage.ts and
  model-fallback.ts to satisfy oxlint eslint(curly) rule
- Thread modelId into all 3 isProfileInCooldown calls in
  pi-embedded-runner/run.ts (lines 719, 746, 767) so the inner
  profile loop respects per-model cooldown scope — fixes Codex P1
  review comment about outer gate passing model-B while inner loop
  rejects it without model context
---
 src/agents/auth-profiles/usage.ts    |  8 ++++++--
 src/agents/model-fallback.ts         |  8 ++++++--
 src/agents/pi-embedded-runner/run.ts | 10 +++++++---
 3 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/src/agents/auth-profiles/usage.ts b/src/agents/auth-profiles/usage.ts
index 4412e03ec27..f9b7cfc975f 100644
--- a/src/agents/auth-profiles/usage.ts
+++ b/src/agents/auth-profiles/usage.ts
@@ -291,8 +291,12 @@ export async function markAuthProfileUsed(params: {
 
 export function calculateAuthProfileCooldownMs(errorCount: number): number {
   const normalized = Math.max(1, errorCount);
-  if (normalized <= 1) return 30_000; // 30 seconds
-  if (normalized <= 2) return 60_000; // 1 minute
+  if (normalized <= 1) {
+    return 30_000; // 30 seconds
+  }
+  if (normalized <= 2) {
+    return 60_000; // 1 minute
+  }
   return 5 * 60_000; // 5 minutes max
 }
 
diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts
index 7223c6c9dfa..ece9d59334c 100644
--- a/src/agents/model-fallback.ts
+++ b/src/agents/model-fallback.ts
@@ -801,7 +801,9 @@ export async function runWithModelFallback<T>(params: {
         attempt.reason ? ` (${attempt.reason})` : ""
       }`,
     soonestCooldownExpiry: (() => {
-      if (!authStore) return null;
+      if (!authStore) {
+        return null;
+      }
       const allProfileIds = new Set<string>();
       for (const c of candidates) {
         const ids = resolveAuthProfileOrder({
@@ -809,7 +811,9 @@ export async function runWithModelFallback<T>(params: {
           store: authStore,
           provider: c.provider,
         });
-        for (const id of ids) allProfileIds.add(id);
+        for (const id of ids) {
+          allProfileIds.add(id);
+        }
       }
       return getSoonestCooldownExpiry(authStore, [...allProfileIds]);
     })(),
diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts
index b5c55cf9d6d..00159222af0 100644
--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@@ -716,7 +716,7 @@ export async function runEmbeddedPiAgent(
         let nextIndex = profileIndex + 1;
         while (nextIndex < profileCandidates.length) {
           const candidate = profileCandidates[nextIndex];
-          if (candidate && isProfileInCooldown(authStore, candidate)) {
+          if (candidate && isProfileInCooldown(authStore, candidate, undefined, modelId)) {
             nextIndex += 1;
             continue;
           }
@@ -743,7 +743,9 @@ export async function runEmbeddedPiAgent(
         );
         const allAutoProfilesInCooldown =
           autoProfileCandidates.length > 0 &&
-          autoProfileCandidates.every((candidate) => isProfileInCooldown(authStore, candidate));
+          autoProfileCandidates.every((candidate) =>
+            isProfileInCooldown(authStore, candidate, undefined, modelId),
+          );
         const unavailableReason = allAutoProfilesInCooldown
           ? (resolveProfilesUnavailableReason({
               store: authStore,
@@ -762,7 +764,9 @@ export async function runEmbeddedPiAgent(
         while (profileIndex < profileCandidates.length) {
           const candidate = profileCandidates[profileIndex];
           const inCooldown =
-            candidate && candidate !== lockedProfileId && isProfileInCooldown(authStore, candidate);
+            candidate &&
+            candidate !== lockedProfileId &&
+            isProfileInCooldown(authStore, candidate, undefined, modelId);
           if (inCooldown) {
             if (allowTransientCooldownProbe && !didTransientCooldownProbe) {
               didTransientCooldownProbe = true;

From 8ea6f5206c8f4237525f1708e54753ad40c0ebf7 Mon Sep 17 00:00:00 2001
From: kiranvk2011 <kiranvk2011@gmail.com>
Date: Wed, 18 Mar 2026 15:36:32 +0000
Subject: [PATCH 5/8] fix: update cooldownReason on active-window failures +
 clear model scope for non-rate-limit

---
 src/agents/auth-profiles/usage.test.ts | 47 ++++++++++++++++++++++++++
 src/agents/auth-profiles/usage.ts      | 10 +++++-
 2 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/src/agents/auth-profiles/usage.test.ts b/src/agents/auth-profiles/usage.test.ts
index 8d633a7d89b..69d3d4ca200 100644
--- a/src/agents/auth-profiles/usage.test.ts
+++ b/src/agents/auth-profiles/usage.test.ts
@@ -796,4 +796,51 @@ describe("markAuthProfileFailure — per-model cooldown metadata", () => {
     const stats = store.usageStats?.["github-copilot:github"];
     expect(stats?.cooldownModel).toBe("claude-sonnet-4.6");
   });
+
+  it("updates cooldownReason when auth failure occurs during active rate_limit window", async () => {
+    const now = 1_000_000;
+    const store = makeStoreWithCopilot({
+      "github-copilot:github": {
+        cooldownUntil: now + 30_000,
+        cooldownReason: "rate_limit",
+        cooldownModel: "claude-sonnet-4.6",
+        errorCount: 1,
+        lastFailureAt: now - 1000,
+      },
+    });
+    await markAuthProfileFailure({
+      store,
+      profileId: "github-copilot:github",
+      reason: "auth",
+      modelId: "claude-opus-4.6",
+    });
+    const stats = store.usageStats?.["github-copilot:github"];
+    // Reason should update to the new failure type, not stay as rate_limit
+    expect(stats?.cooldownReason).toBe("auth");
+    // Model scope should be cleared — auth failures are profile-wide
+    expect(stats?.cooldownModel).toBeUndefined();
+  });
+
+  it("clears cooldownModel when non-rate_limit failure hits same model during active window", async () => {
+    const now = 1_000_000;
+    const store = makeStoreWithCopilot({
+      "github-copilot:github": {
+        cooldownUntil: now + 30_000,
+        cooldownReason: "rate_limit",
+        cooldownModel: "claude-sonnet-4.6",
+        errorCount: 1,
+        lastFailureAt: now - 1000,
+      },
+    });
+    await markAuthProfileFailure({
+      store,
+      profileId: "github-copilot:github",
+      reason: "auth",
+      modelId: "claude-sonnet-4.6",
+    });
+    const stats = store.usageStats?.["github-copilot:github"];
+    // Even same-model auth failure should clear model scope (auth is profile-wide)
+    expect(stats?.cooldownReason).toBe("auth");
+    expect(stats?.cooldownModel).toBeUndefined();
+  });
 });
diff --git a/src/agents/auth-profiles/usage.ts b/src/agents/auth-profiles/usage.ts
index f9b7cfc975f..84f9bdaa83b 100644
--- a/src/agents/auth-profiles/usage.ts
+++ b/src/agents/auth-profiles/usage.ts
@@ -479,7 +479,11 @@ function computeNextProfileUsageStats(params: {
       typeof params.existing.cooldownUntil === "number" &&
       params.existing.cooldownUntil > params.now;
     if (existingCooldownActive) {
-      updatedStats.cooldownReason = params.existing.cooldownReason;
+      // Always use the latest failure reason so that downstream consumers
+      // (e.g. isProfileInCooldown model-bypass) see the most recent signal.
+      // A non-rate_limit failure (auth, billing, …) is profile-wide, so
+      // upgrading from rate_limit → auth correctly blocks all models.
+      updatedStats.cooldownReason = params.reason;
       // If a different model fails during an active window, widen the scope
       // to all models (undefined) so neither model bypasses the cooldown.
       if (
@@ -488,6 +492,10 @@ function computeNextProfileUsageStats(params: {
         params.existing.cooldownModel !== params.modelId
       ) {
         updatedStats.cooldownModel = undefined;
+      } else if (params.reason !== "rate_limit") {
+        // Non-rate-limit failures are profile-wide — clear model scope even
+        // when the same model fails, so that no model can bypass.
+        updatedStats.cooldownModel = undefined;
       } else {
         updatedStats.cooldownModel = params.existing.cooldownModel;
       }

From ae9ad3c07048d46ddd7799b17c4968dca3cae382 Mon Sep 17 00:00:00 2001
From: kiranvk2011 <kiranvk2011@gmail.com>
Date: Wed, 18 Mar 2026 18:44:50 +0000
Subject: [PATCH 6/8] test: update markAuthProfileFailure tests for stepped
 30s/1m/5m cooldown ladder
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Update the auth-profiles.markauthprofilefailure test suite to match the
new stepped cooldown formula (30s → 1m → 5m cap) introduced in the
first commit. The test was still asserting the old exponential backoff
values (1m → 5m → 25m → 1h cap).

Changes:
- calculateAuthProfileCooldownMs assertions: 60s→30s, 5m→1m, 25m→5m,
  1h→5m cap
- 'resets error count when previous cooldown has expired' test: upper
  bound adjusted from 120s to 60s to match 30s base cooldown
- Comments updated to reflect the stepped ladder

Resolves merge-blocker review from @altaywtf.
---
 ...uth-profiles.markauthprofilefailure.test.ts | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/agents/auth-profiles.markauthprofilefailure.test.ts b/src/agents/auth-profiles.markauthprofilefailure.test.ts
index 5c4d73197b3..af828e1ee52 100644
--- a/src/agents/auth-profiles.markauthprofilefailure.test.ts
+++ b/src/agents/auth-profiles.markauthprofilefailure.test.ts
@@ -230,12 +230,12 @@ describe("markAuthProfileFailure", () => {
 
       const stats = store.usageStats?.["anthropic:default"];
       // Error count should reset to 1 (not escalate to 4) because the
-      // previous cooldown expired. Cooldown should be ~1 min, not ~60 min.
+      // previous cooldown expired. Cooldown should be ~30s, not ~5 min.
       expect(stats?.errorCount).toBe(1);
       expect(stats?.failureCounts?.rate_limit).toBe(1);
       const cooldownMs = (stats?.cooldownUntil ?? 0) - now;
-      // calculateAuthProfileCooldownMs(1) = 60_000 (1 minute)
-      expect(cooldownMs).toBeLessThan(120_000);
+      // calculateAuthProfileCooldownMs(1) = 30_000 (stepped: 30s → 1m → 5m)
+      expect(cooldownMs).toBeLessThan(60_000);
       expect(cooldownMs).toBeGreaterThan(0);
     } finally {
       fs.rmSync(agentDir, { recursive: true, force: true });
@@ -267,11 +267,11 @@ describe("markAuthProfileFailure", () => {
 });
 
 describe("calculateAuthProfileCooldownMs", () => {
-  it("applies exponential backoff with a 1h cap", () => {
-    expect(calculateAuthProfileCooldownMs(1)).toBe(60_000);
-    expect(calculateAuthProfileCooldownMs(2)).toBe(5 * 60_000);
-    expect(calculateAuthProfileCooldownMs(3)).toBe(25 * 60_000);
-    expect(calculateAuthProfileCooldownMs(4)).toBe(60 * 60_000);
-    expect(calculateAuthProfileCooldownMs(5)).toBe(60 * 60_000);
+  it("applies stepped backoff with a 5-min cap", () => {
+    expect(calculateAuthProfileCooldownMs(1)).toBe(30_000); // 30 seconds
+    expect(calculateAuthProfileCooldownMs(2)).toBe(60_000); // 1 minute
+    expect(calculateAuthProfileCooldownMs(3)).toBe(5 * 60_000); // 5 minutes
+    expect(calculateAuthProfileCooldownMs(4)).toBe(5 * 60_000); // 5 minutes (cap)
+    expect(calculateAuthProfileCooldownMs(5)).toBe(5 * 60_000); // 5 minutes (cap)
   });
 });

From 0eefc6d55f68be51d2ed1c156be1e3b2ab45229f Mon Sep 17 00:00:00 2001
From: kiranvk2011 <kiranvk2011@gmail.com>
Date: Wed, 18 Mar 2026 20:00:18 +0000
Subject: [PATCH 7/8] docs: add changelog fragment for per-model cooldown +
 stepped backoff

---
 changelog/fragments/cooldown-per-model-stepped-backoff.md | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog/fragments/cooldown-per-model-stepped-backoff.md

diff --git a/changelog/fragments/cooldown-per-model-stepped-backoff.md b/changelog/fragments/cooldown-per-model-stepped-backoff.md
new file mode 100644
index 00000000000..228bf0c6737
--- /dev/null
+++ b/changelog/fragments/cooldown-per-model-stepped-backoff.md
@@ -0,0 +1 @@
+- Agents/cooldowns: scope rate-limit cooldowns per model so one 429 no longer blocks every model on the same auth profile, replace the exponential 1 min → 1 h escalation with a stepped 30 s / 1 min / 5 min ladder, and surface a user-facing countdown message when all models are rate-limited. (#49834) Thanks @kiranvk-2011.

From 6d58d2f381d74da1ffef86e47fdb1d93b03b1660 Mon Sep 17 00:00:00 2001
From: kiranvk2011 <kiranvk2011@gmail.com>
Date: Thu, 19 Mar 2026 11:26:54 +0000
Subject: [PATCH 8/8] ci: retrigger CI (all failures are pre-existing on main)