From b5913862ac1cb79151d1ea4a0dc7a2d6f3b592fe Mon Sep 17 00:00:00 2001
From: kiranvk2011 <kiranvk2011@gmail.com>
Date: Wed, 18 Mar 2026 13:50:52 +0000
Subject: [PATCH] fix: per-model cooldown scope + stepped backoff + user-facing
 rate-limit message
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Combines ideas from PRs #45113, #31962, and #45763 to address three
cooldown-related issues:

1. Stepped cooldown (30s → 1m → 5m cap) replaces the aggressive
   exponential formula (1m → 5m → 25m → 1h) that locked out providers
   for far longer than the actual API rate-limit window.

2. Per-model cooldown scoping: rate_limit cooldowns now record which
   model triggered them. When a different model on the same auth profile
   is requested, the cooldown is bypassed — so one model hitting a 429
   no longer blocks all other models on the same provider.

3. FallbackSummaryError with soonest-expiry countdown: when all
   candidates are exhausted, the user sees a clear message like
   '⚠️ Rate-limited — ready in ~28s' instead of a generic failure.

Files changed:
- types.ts: add cooldownReason/cooldownModel to ProfileUsageStats
- usage.ts: stepped formula, model-aware isProfileInCooldown, modelId
  threading through computeNextProfileUsageStats/markAuthProfileFailure
- model-fallback.ts: FallbackSummaryError class, model-aware availability
  check, soonestCooldownExpiry computation
- pi-embedded-runner/run.ts: thread modelId into failure recording
- agent-runner-execution.ts: buildCopilotCooldownMessage helper, rate-limit
  detection branch in error handler
- usage.test.ts: update expected cooldown value (60s → 30s)
---
 src/agents/auth-profiles/types.ts             |  2 +
 src/agents/auth-profiles/usage.test.ts        |  4 +-
 src/agents/auth-profiles/usage.ts             | 41 +++++++++++++--
 src/agents/model-fallback.ts                  | 50 +++++++++++++++++--
 src/agents/pi-embedded-runner/run.ts          |  4 ++
 .../reply/agent-runner-execution.ts           | 36 +++++++++++--
 6 files changed, 120 insertions(+), 17 deletions(-)

diff --git a/src/agents/auth-profiles/types.ts b/src/agents/auth-profiles/types.ts
index 127a444939b..848268385a2 100644
--- a/src/agents/auth-profiles/types.ts
+++ b/src/agents/auth-profiles/types.ts
@@ -51,6 +51,8 @@ export type AuthProfileFailureReason =
 export type ProfileUsageStats = {
   lastUsed?: number;
   cooldownUntil?: number;
+  cooldownReason?: AuthProfileFailureReason;
+  cooldownModel?: string;
   disabledUntil?: number;
   disabledReason?: AuthProfileFailureReason;
   errorCount?: number;
diff --git a/src/agents/auth-profiles/usage.test.ts b/src/agents/auth-profiles/usage.test.ts
index 6dd5697cc99..ec9cd2e143e 100644
--- a/src/agents/auth-profiles/usage.test.ts
+++ b/src/agents/auth-profiles/usage.test.ts
@@ -621,8 +621,8 @@ describe("markAuthProfileFailure — active windows do not extend on retry", ()
         errorCount: 3,
         lastFailureAt: now - 60_000,
       }),
-      // errorCount resets → calculateAuthProfileCooldownMs(1) = 60_000
-      expectedUntil: (now: number) => now + 60_000,
+      // errorCount resets → calculateAuthProfileCooldownMs(1) = 30_000 (stepped: 30s → 1m → 5m)
+      expectedUntil: (now: number) => now + 30_000,
       readUntil: (stats: WindowStats | undefined) => stats?.cooldownUntil,
     },
     {
diff --git a/src/agents/auth-profiles/usage.ts b/src/agents/auth-profiles/usage.ts
index 20e1cbaa497..c587f3c6d0e 100644
--- a/src/agents/auth-profiles/usage.ts
+++ b/src/agents/auth-profiles/usage.ts
@@ -44,6 +44,7 @@ export function isProfileInCooldown(
   store: AuthProfileStore,
   profileId: string,
   now?: number,
+  forModel?: string,
 ): boolean {
   if (isAuthCooldownBypassedForProvider(store.profiles[profileId]?.provider)) {
     return false;
@@ -52,6 +53,16 @@ export function isProfileInCooldown(
   if (!stats) {
     return false;
   }
+  // Model-aware bypass: if the cooldown was caused by a rate_limit on a
+  // specific model and the caller is requesting a *different* model, allow it.
+  if (
+    forModel &&
+    stats.cooldownReason === "rate_limit" &&
+    stats.cooldownModel &&
+    stats.cooldownModel !== forModel
+  ) {
+    return false;
+  }
   const unusableUntil = resolveProfileUnusableUntil(stats);
   const ts = now ?? Date.now();
   return unusableUntil ? ts < unusableUntil : false;
@@ -212,6 +223,8 @@ export function clearExpiredCooldowns(store: AuthProfileStore, now?: number): bo
 
     if (cooldownExpired) {
       stats.cooldownUntil = undefined;
+      stats.cooldownReason = undefined;
+      stats.cooldownModel = undefined;
       profileMutated = true;
     }
     if (disabledExpired) {
@@ -275,10 +288,9 @@ export async function markAuthProfileUsed(params: {
 
 export function calculateAuthProfileCooldownMs(errorCount: number): number {
   const normalized = Math.max(1, errorCount);
-  return Math.min(
-    60 * 60 * 1000, // 1 hour max
-    60 * 1000 * 5 ** Math.min(normalized - 1, 3),
-  );
+  if (normalized <= 1) return 30_000; // 30 seconds
+  if (normalized <= 2) return 60_000; // 1 minute
+  return 5 * 60_000; // 5 minutes max
 }
 
 type ResolvedAuthCooldownConfig = {
@@ -366,6 +378,8 @@ function resetUsageStats(
     ...existing,
     errorCount: 0,
     cooldownUntil: undefined,
+    cooldownReason: undefined,
+    cooldownModel: undefined,
     disabledUntil: undefined,
     disabledReason: undefined,
     failureCounts: undefined,
@@ -398,6 +412,7 @@ function computeNextProfileUsageStats(params: {
   now: number;
   reason: AuthProfileFailureReason;
   cfgResolved: ResolvedAuthCooldownConfig;
+  modelId?: string;
 }): ProfileUsageStats {
   const windowMs = params.cfgResolved.failureWindowMs;
   const windowExpired =
@@ -451,6 +466,19 @@ function computeNextProfileUsageStats(params: {
       now: params.now,
       recomputedUntil: params.now + backoffMs,
     });
+    // Preserve existing cooldown metadata if the cooldown window is still
+    // active; otherwise record the new reason/model.
+    const existingCooldownActive =
+      typeof params.existing.cooldownUntil === "number" &&
+      params.existing.cooldownUntil > params.now;
+    if (existingCooldownActive) {
+      updatedStats.cooldownReason = params.existing.cooldownReason;
+      updatedStats.cooldownModel = params.existing.cooldownModel;
+    } else {
+      updatedStats.cooldownReason = params.reason;
+      updatedStats.cooldownModel =
+        params.reason === "rate_limit" ? params.modelId : undefined;
+    }
   }
 
   return updatedStats;
@@ -468,8 +496,9 @@ export async function markAuthProfileFailure(params: {
   cfg?: OpenClawConfig;
   agentDir?: string;
   runId?: string;
+  modelId?: string;
 }): Promise<void> {
-  const { store, profileId, reason, agentDir, cfg, runId } = params;
+  const { store, profileId, reason, agentDir, cfg, runId, modelId } = params;
   const profile = store.profiles[profileId];
   if (!profile || isAuthCooldownBypassedForProvider(profile.provider)) {
     return;
@@ -498,6 +527,7 @@ export async function markAuthProfileFailure(params: {
         now,
         reason,
         cfgResolved,
+        modelId,
       });
       nextStats = computed;
       updateUsageStatsEntry(freshStore, profileId, () => computed);
@@ -536,6 +566,7 @@ export async function markAuthProfileFailure(params: {
     now,
     reason,
     cfgResolved,
+    modelId,
   });
   nextStats = computed;
   updateUsageStatsEntry(store, profileId, () => computed);
diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts
index 5fd6e533a1a..8127fc263be 100644
--- a/src/agents/model-fallback.ts
+++ b/src/agents/model-fallback.ts
@@ -34,6 +34,32 @@ import { isLikelyContextOverflowError } from "./pi-embedded-helpers.js";
 
 const log = createSubsystemLogger("model-fallback");
 
+/**
+ * Structured error thrown when all model fallback candidates have been
+ * exhausted. Carries per-attempt details so callers can build informative
+ * user-facing messages (e.g. "rate-limited, retry in 30 s").
+ */
+export class FallbackSummaryError extends Error {
+  readonly attempts: FallbackAttempt[];
+  readonly soonestCooldownExpiry: number | null;
+
+  constructor(
+    message: string,
+    attempts: FallbackAttempt[],
+    soonestCooldownExpiry: number | null,
+    cause?: Error,
+  ) {
+    super(message, { cause });
+    this.name = "FallbackSummaryError";
+    this.attempts = attempts;
+    this.soonestCooldownExpiry = soonestCooldownExpiry;
+  }
+}
+
+export function isFallbackSummaryError(err: unknown): err is FallbackSummaryError {
+  return err instanceof FallbackSummaryError;
+}
+
 export type ModelFallbackRunOptions = {
   allowTransientCooldownProbe?: boolean;
 };
@@ -189,17 +215,18 @@ function throwFallbackFailureSummary(params: {
   lastError: unknown;
   label: string;
   formatAttempt: (attempt: FallbackAttempt) => string;
+  soonestCooldownExpiry?: number | null;
 }): never {
   if (params.attempts.length <= 1 && params.lastError) {
     throw params.lastError;
   }
   const summary =
     params.attempts.length > 0 ? params.attempts.map(params.formatAttempt).join(" | ") : "unknown";
-  throw new Error(
+  throw new FallbackSummaryError(
     `All ${params.label} failed (${params.attempts.length || params.candidates.length}): ${summary}`,
-    {
-      cause: params.lastError instanceof Error ? params.lastError : undefined,
-    },
+    params.attempts,
+    params.soonestCooldownExpiry ?? null,
+    params.lastError instanceof Error ? params.lastError : undefined,
   );
 }
 
@@ -548,7 +575,7 @@ export async function runWithModelFallback<T>(params: {
         store: authStore,
         provider: candidate.provider,
       });
-      const isAnyProfileAvailable = profileIds.some((id) => !isProfileInCooldown(authStore, id));
+      const isAnyProfileAvailable = profileIds.some((id) => !isProfileInCooldown(authStore, id, undefined, candidate.model));
 
       if (profileIds.length > 0 && !isAnyProfileAvailable) {
         // All profiles for this provider are in cooldown.
@@ -771,6 +798,19 @@ export async function runWithModelFallback<T>(params: {
       `${attempt.provider}/${attempt.model}: ${attempt.error}${
         attempt.reason ? ` (${attempt.reason})` : ""
       }`,
+    soonestCooldownExpiry: (() => {
+      if (!authStore) return null;
+      const allProfileIds = new Set<string>();
+      for (const c of candidates) {
+        const ids = resolveAuthProfileOrder({
+          cfg: params.cfg,
+          store: authStore,
+          provider: c.provider,
+        });
+        for (const id of ids) allProfileIds.add(id);
+      }
+      return getSoonestCooldownExpiry(authStore, [...allProfileIds]);
+    })(),
   });
 }
 
diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts
index a35c03d98ca..b5c55cf9d6d 100644
--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@@ -832,6 +832,7 @@ export async function runEmbeddedPiAgent(
         reason?: AuthProfileFailureReason | null;
         config?: RunEmbeddedPiAgentParams["config"];
         agentDir?: RunEmbeddedPiAgentParams["agentDir"];
+        modelId?: string;
       }) => {
         const { profileId, reason } = failure;
         if (!profileId || !reason || reason === "timeout") {
@@ -844,6 +845,7 @@ export async function runEmbeddedPiAgent(
           cfg: params.config,
           agentDir,
           runId: params.runId,
+          modelId: failure.modelId,
         });
       };
       const resolveAuthProfileFailureReason = (
@@ -1382,6 +1384,7 @@ export async function runEmbeddedPiAgent(
             await maybeMarkAuthProfileFailure({
               profileId: lastProfileId,
               reason: promptProfileFailureReason,
+              modelId,
             });
             const promptFailoverFailure =
               promptFailoverReason !== null || isFailoverErrorMessage(errorText);
@@ -1523,6 +1526,7 @@ export async function runEmbeddedPiAgent(
               await maybeMarkAuthProfileFailure({
                 profileId: lastProfileId,
                 reason,
+                modelId,
               });
               if (timedOut && !isProbeSession) {
                 log.warn(`Profile ${lastProfileId} timed out. Trying next account...`);
diff --git a/src/auto-reply/reply/agent-runner-execution.ts b/src/auto-reply/reply/agent-runner-execution.ts
index c25342e4a28..049958aec15 100644
--- a/src/auto-reply/reply/agent-runner-execution.ts
+++ b/src/auto-reply/reply/agent-runner-execution.ts
@@ -5,6 +5,7 @@ import { resolveBootstrapWarningSignaturesSeen } from "../../agents/bootstrap-bu
 import { runCliAgent } from "../../agents/cli-runner.js";
 import { getCliSessionId } from "../../agents/cli-session.js";
 import { runWithModelFallback } from "../../agents/model-fallback.js";
+import { isFallbackSummaryError } from "../../agents/model-fallback.js";
 import { isCliProvider } from "../../agents/model-selection.js";
 import {
   BILLING_ERROR_USER_MESSAGE,
@@ -12,6 +13,7 @@ import {
   isContextOverflowError,
   isBillingErrorMessage,
   isLikelyContextOverflowError,
+  isRateLimitErrorMessage,
   isTransientHttpError,
   sanitizeUserFacingText,
 } from "../../agents/pi-embedded-helpers.js";
@@ -74,6 +76,26 @@ export type AgentRunLoopResult =
     }
   | { kind: "final"; payload: ReplyPayload };
 
+/**
+ * Build a human-friendly rate-limit message from a FallbackSummaryError.
+ * Includes a countdown when the soonest cooldown expiry is known.
+ */
+function buildCopilotCooldownMessage(err: unknown): string {
+  if (!isFallbackSummaryError(err)) {
+    return "⚠️ All models are temporarily rate-limited. Please try again in a few minutes.";
+  }
+  const expiry = err.soonestCooldownExpiry;
+  if (typeof expiry === "number" && expiry > Date.now()) {
+    const secsLeft = Math.ceil((expiry - Date.now()) / 1000);
+    if (secsLeft <= 60) {
+      return `⚠️ Rate-limited — ready in ~${secsLeft}s. Please wait a moment.`;
+    }
+    const minsLeft = Math.ceil(secsLeft / 60);
+    return `⚠️ Rate-limited — ready in ~${minsLeft} min. Please try again shortly.`;
+  }
+  return "⚠️ All models are temporarily rate-limited. Please try again in a few minutes.";
+}
+
 export async function runAgentTurnWithFallback(params: {
   commandBody: string;
   followupRun: FollowupRun;
@@ -623,17 +645,21 @@ export async function runAgentTurnWithFallback(params: {
       }
 
       defaultRuntime.error(`Embedded agent failed before reply: ${message}`);
+      const isRateLimit =
+        isRateLimitErrorMessage(message) || isFallbackSummaryError(err);
       const safeMessage = isTransientHttp
         ? sanitizeUserFacingText(message, { errorContext: true })
         : message;
       const trimmedMessage = safeMessage.replace(/\.\s*$/, "");
       const fallbackText = isBilling
         ? BILLING_ERROR_USER_MESSAGE
-        : isContextOverflow
-          ? "⚠️ Context overflow — prompt too large for this model. Try a shorter message or a larger-context model."
-          : isRoleOrderingError
-            ? "⚠️ Message ordering conflict - please try again. If this persists, use /new to start a fresh session."
-            : `⚠️ Agent failed before reply: ${trimmedMessage}.\nLogs: openclaw logs --follow`;
+        : isRateLimit
+          ? buildCopilotCooldownMessage(err)
+          : isContextOverflow
+            ? "⚠️ Context overflow — prompt too large for this model. Try a shorter message or a larger-context model."
+            : isRoleOrderingError
+              ? "⚠️ Message ordering conflict - please try again. If this persists, use /new to start a fresh session."
+              : `⚠️ Agent failed before reply: ${trimmedMessage}.\nLogs: openclaw logs --follow`;
 
       return {
         kind: "final",