Merge 6d58d2f381d74da1ffef86e47fdb1d93b03b1660 into 9fb78453e088cd7b553d7779faa0de5c83708e70

2026-03-20 22:18:56 -07:00 · 2026-03-20 22:18:56 -07:00 · f27d6e20b5
commit f27d6e20b5
parent 9fb78453e0 6d58d2f381
8 changed files with 346 additions and 32 deletions
--- a/changelog/fragments/cooldown-per-model-stepped-backoff.md
+++ b/changelog/fragments/cooldown-per-model-stepped-backoff.md
@ -0,0 +1 @@
+- Agents/cooldowns: scope rate-limit cooldowns per model so one 429 no longer blocks every model on the same auth profile, replace the exponential 1 min → 1 h escalation with a stepped 30 s / 1 min / 5 min ladder, and surface a user-facing countdown message when all models are rate-limited. (#49834) Thanks @kiranvk-2011.
--- a/src/agents/auth-profiles.markauthprofilefailure.test.ts
+++ b/src/agents/auth-profiles.markauthprofilefailure.test.ts
@ -230,12 +230,12 @@ describe("markAuthProfileFailure", () => {

      const stats = store.usageStats?.["anthropic:default"];
      // Error count should reset to 1 (not escalate to 4) because the
-      // previous cooldown expired. Cooldown should be ~1 min, not ~60 min.
+      // previous cooldown expired. Cooldown should be ~30s, not ~5 min.
      expect(stats?.errorCount).toBe(1);
      expect(stats?.failureCounts?.rate_limit).toBe(1);
      const cooldownMs = (stats?.cooldownUntil ?? 0) - now;
-      // calculateAuthProfileCooldownMs(1) = 60_000 (1 minute)
-      expect(cooldownMs).toBeLessThan(120_000);
+      // calculateAuthProfileCooldownMs(1) = 30_000 (stepped: 30s → 1m → 5m)
+      expect(cooldownMs).toBeLessThan(60_000);
      expect(cooldownMs).toBeGreaterThan(0);
    } finally {
      fs.rmSync(agentDir, { recursive: true, force: true });
@ -267,11 +267,11 @@ describe("markAuthProfileFailure", () => {
 });

 describe("calculateAuthProfileCooldownMs", () => {
-  it("applies exponential backoff with a 1h cap", () => {
-    expect(calculateAuthProfileCooldownMs(1)).toBe(60_000);
-    expect(calculateAuthProfileCooldownMs(2)).toBe(5 * 60_000);
-    expect(calculateAuthProfileCooldownMs(3)).toBe(25 * 60_000);
-    expect(calculateAuthProfileCooldownMs(4)).toBe(60 * 60_000);
-    expect(calculateAuthProfileCooldownMs(5)).toBe(60 * 60_000);
+  it("applies stepped backoff with a 5-min cap", () => {
+    expect(calculateAuthProfileCooldownMs(1)).toBe(30_000); // 30 seconds
+    expect(calculateAuthProfileCooldownMs(2)).toBe(60_000); // 1 minute
+    expect(calculateAuthProfileCooldownMs(3)).toBe(5 * 60_000); // 5 minutes
+    expect(calculateAuthProfileCooldownMs(4)).toBe(5 * 60_000); // 5 minutes (cap)
+    expect(calculateAuthProfileCooldownMs(5)).toBe(5 * 60_000); // 5 minutes (cap)
  });
 });
--- a/src/agents/auth-profiles/types.ts
+++ b/src/agents/auth-profiles/types.ts
@ -51,6 +51,8 @@ export type AuthProfileFailureReason =
 export type ProfileUsageStats = {
  lastUsed?: number;
  cooldownUntil?: number;
+  cooldownReason?: AuthProfileFailureReason;
+  cooldownModel?: string;
  disabledUntil?: number;
  disabledReason?: AuthProfileFailureReason;
  errorCount?: number;
--- a/src/agents/auth-profiles/usage.test.ts
+++ b/src/agents/auth-profiles/usage.test.ts
@ -132,6 +132,53 @@ describe("isProfileInCooldown", () => {
    });
    expect(isProfileInCooldown(store, "kilocode:default")).toBe(false);
  });
+
+  it("returns false for a different model when cooldown is model-scoped (rate_limit)", () => {
+    const store = makeStore({
+      "github-copilot:github": {
+        cooldownUntil: Date.now() + 60_000,
+        cooldownReason: "rate_limit",
+        cooldownModel: "claude-sonnet-4.6",
+      },
+    });
+    // Different model bypasses the cooldown
+    expect(isProfileInCooldown(store, "github-copilot:github", undefined, "gpt-4.1")).toBe(false);
+    // Same model is still blocked
+    expect(
+      isProfileInCooldown(store, "github-copilot:github", undefined, "claude-sonnet-4.6"),
+    ).toBe(true);
+    // No model specified — blocked (conservative)
+    expect(isProfileInCooldown(store, "github-copilot:github")).toBe(true);
+  });
+
+  it("returns true for all models when cooldownModel is undefined (profile-wide)", () => {
+    const store = makeStore({
+      "github-copilot:github": {
+        cooldownUntil: Date.now() + 60_000,
+        cooldownReason: "rate_limit",
+        cooldownModel: undefined,
+      },
+    });
+    expect(
+      isProfileInCooldown(store, "github-copilot:github", undefined, "claude-sonnet-4.6"),
+    ).toBe(true);
+    expect(isProfileInCooldown(store, "github-copilot:github", undefined, "gpt-4.1")).toBe(true);
+  });
+
+  it("does not bypass model-scoped cooldown when disabledUntil is active", () => {
+    const store = makeStore({
+      "github-copilot:github": {
+        cooldownUntil: Date.now() + 60_000,
+        cooldownReason: "rate_limit",
+        cooldownModel: "claude-sonnet-4.6",
+        disabledUntil: Date.now() + 120_000,
+        disabledReason: "billing",
+      },
+    });
+    // Even though cooldownModel is for a different model, billing disable
+    // should keep the profile blocked for all models.
+    expect(isProfileInCooldown(store, "github-copilot:github", undefined, "gpt-4.1")).toBe(true);
+  });
 });

 describe("resolveProfilesUnavailableReason", () => {
@ -621,8 +668,8 @@ describe("markAuthProfileFailure — active windows do not extend on retry", ()
        errorCount: 3,
        lastFailureAt: now - 60_000,
      }),
-      // errorCount resets → calculateAuthProfileCooldownMs(1) = 60_000
-      expectedUntil: (now: number) => now + 60_000,
+      // errorCount resets → calculateAuthProfileCooldownMs(1) = 30_000 (stepped: 30s → 1m → 5m)
+      expectedUntil: (now: number) => now + 30_000,
      readUntil: (stats: WindowStats | undefined) => stats?.cooldownUntil,
    },
    {
@ -675,3 +722,125 @@ describe("markAuthProfileFailure — active windows do not extend on retry", ()
    });
  }
 });
+
+describe("markAuthProfileFailure — per-model cooldown metadata", () => {
+  function makeStoreWithCopilot(usageStats: AuthProfileStore["usageStats"]): AuthProfileStore {
+    const store = makeStore(usageStats);
+    store.profiles["github-copilot:github"] = {
+      type: "api_key",
+      provider: "github-copilot",
+      key: "ghu_test",
+    };
+    return store;
+  }
+
+  async function markFailure(params: {
+    store: ReturnType<typeof makeStoreWithCopilot>;
+    now: number;
+    modelId?: string;
+  }): Promise<void> {
+    vi.useFakeTimers();
+    vi.setSystemTime(params.now);
+    try {
+      await markAuthProfileFailure({
+        store: params.store,
+        profileId: "github-copilot:github",
+        reason: "rate_limit",
+        modelId: params.modelId,
+      });
+    } finally {
+      vi.useRealTimers();
+    }
+  }
+
+  it("records cooldownModel on first rate_limit failure", async () => {
+    const now = 1_000_000;
+    const store = makeStoreWithCopilot({});
+    await markFailure({ store, now, modelId: "claude-sonnet-4.6" });
+    const stats = store.usageStats?.["github-copilot:github"];
+    expect(stats?.cooldownReason).toBe("rate_limit");
+    expect(stats?.cooldownModel).toBe("claude-sonnet-4.6");
+  });
+
+  it("widens cooldownModel to undefined when a different model fails during active cooldown", async () => {
+    const now = 1_000_000;
+    const store = makeStoreWithCopilot({
+      "github-copilot:github": {
+        cooldownUntil: now + 30_000,
+        cooldownReason: "rate_limit",
+        cooldownModel: "claude-sonnet-4.6",
+        errorCount: 1,
+        lastFailureAt: now - 1000,
+      },
+    });
+    // Different model fails during active cooldown
+    await markFailure({ store, now, modelId: "gpt-4.1" });
+    const stats = store.usageStats?.["github-copilot:github"];
+    // Scope widened to all models
+    expect(stats?.cooldownModel).toBeUndefined();
+    expect(stats?.cooldownReason).toBe("rate_limit");
+  });
+
+  it("preserves cooldownModel when the same model fails again during active cooldown", async () => {
+    const now = 1_000_000;
+    const store = makeStoreWithCopilot({
+      "github-copilot:github": {
+        cooldownUntil: now + 30_000,
+        cooldownReason: "rate_limit",
+        cooldownModel: "claude-sonnet-4.6",
+        errorCount: 1,
+        lastFailureAt: now - 1000,
+      },
+    });
+    await markFailure({ store, now, modelId: "claude-sonnet-4.6" });
+    const stats = store.usageStats?.["github-copilot:github"];
+    expect(stats?.cooldownModel).toBe("claude-sonnet-4.6");
+  });
+
+  it("updates cooldownReason when auth failure occurs during active rate_limit window", async () => {
+    const now = 1_000_000;
+    const store = makeStoreWithCopilot({
+      "github-copilot:github": {
+        cooldownUntil: now + 30_000,
+        cooldownReason: "rate_limit",
+        cooldownModel: "claude-sonnet-4.6",
+        errorCount: 1,
+        lastFailureAt: now - 1000,
+      },
+    });
+    await markAuthProfileFailure({
+      store,
+      profileId: "github-copilot:github",
+      reason: "auth",
+      modelId: "claude-opus-4.6",
+    });
+    const stats = store.usageStats?.["github-copilot:github"];
+    // Reason should update to the new failure type, not stay as rate_limit
+    expect(stats?.cooldownReason).toBe("auth");
+    // Model scope should be cleared — auth failures are profile-wide
+    expect(stats?.cooldownModel).toBeUndefined();
+  });
+
+  it("clears cooldownModel when non-rate_limit failure hits same model during active window", async () => {
+    const now = 1_000_000;
+    const store = makeStoreWithCopilot({
+      "github-copilot:github": {
+        cooldownUntil: now + 30_000,
+        cooldownReason: "rate_limit",
+        cooldownModel: "claude-sonnet-4.6",
+        errorCount: 1,
+        lastFailureAt: now - 1000,
+      },
+    });
+    await markAuthProfileFailure({
+      store,
+      profileId: "github-copilot:github",
+      reason: "auth",
+      modelId: "claude-sonnet-4.6",
+    });
+    const stats = store.usageStats?.["github-copilot:github"];
+    // Even same-model auth failure should clear model scope (auth is profile-wide)
+    expect(stats?.cooldownReason).toBe("auth");
+    expect(stats?.cooldownModel).toBeUndefined();
+  });
+});
--- a/src/agents/auth-profiles/usage.ts
+++ b/src/agents/auth-profiles/usage.ts
@ -44,6 +44,7 @@ export function isProfileInCooldown(
  store: AuthProfileStore,
  profileId: string,
  now?: number,
+  forModel?: string,
 ): boolean {
  if (isAuthCooldownBypassedForProvider(store.profiles[profileId]?.provider)) {
    return false;
@ -52,6 +53,19 @@ export function isProfileInCooldown(
  if (!stats) {
    return false;
  }
+  // Model-aware bypass: if the cooldown was caused by a rate_limit on a
+  // specific model and the caller is requesting a *different* model, allow it.
+  // We still honour any active billing/auth disable (`disabledUntil`) — those
+  // are profile-wide and must not be short-circuited by model scoping.
+  if (
+    forModel &&
+    stats.cooldownReason === "rate_limit" &&
+    stats.cooldownModel &&
+    stats.cooldownModel !== forModel &&
+    !isActiveUnusableWindow(stats.disabledUntil, now ?? Date.now())
+  ) {
+    return false;
+  }
  const unusableUntil = resolveProfileUnusableUntil(stats);
  const ts = now ?? Date.now();
  return unusableUntil ? ts < unusableUntil : false;
@ -212,6 +226,8 @@ export function clearExpiredCooldowns(store: AuthProfileStore, now?: number): bo

    if (cooldownExpired) {
      stats.cooldownUntil = undefined;
+      stats.cooldownReason = undefined;
+      stats.cooldownModel = undefined;
      profileMutated = true;
    }
    if (disabledExpired) {
@ -275,10 +291,13 @@ export async function markAuthProfileUsed(params: {

 export function calculateAuthProfileCooldownMs(errorCount: number): number {
  const normalized = Math.max(1, errorCount);
-  return Math.min(
-    60 * 60 * 1000, // 1 hour max
-    60 * 1000 * 5 ** Math.min(normalized - 1, 3),
-  );
+  if (normalized <= 1) {
+    return 30_000; // 30 seconds
+  }
+  if (normalized <= 2) {
+    return 60_000; // 1 minute
+  }
+  return 5 * 60_000; // 5 minutes max
 }

 type ResolvedAuthCooldownConfig = {
@ -366,6 +385,8 @@ function resetUsageStats(
    ...existing,
    errorCount: 0,
    cooldownUntil: undefined,
+    cooldownReason: undefined,
+    cooldownModel: undefined,
    disabledUntil: undefined,
    disabledReason: undefined,
    failureCounts: undefined,
@ -398,6 +419,7 @@ function computeNextProfileUsageStats(params: {
  now: number;
  reason: AuthProfileFailureReason;
  cfgResolved: ResolvedAuthCooldownConfig;
+  modelId?: string;
 }): ProfileUsageStats {
  const windowMs = params.cfgResolved.failureWindowMs;
  const windowExpired =
@ -451,6 +473,36 @@ function computeNextProfileUsageStats(params: {
      now: params.now,
      recomputedUntil: params.now + backoffMs,
    });
+    // Update cooldown metadata based on whether the window is still active
+    // and whether the same or a different model is failing.
+    const existingCooldownActive =
+      typeof params.existing.cooldownUntil === "number" &&
+      params.existing.cooldownUntil > params.now;
+    if (existingCooldownActive) {
+      // Always use the latest failure reason so that downstream consumers
+      // (e.g. isProfileInCooldown model-bypass) see the most recent signal.
+      // A non-rate_limit failure (auth, billing, …) is profile-wide, so
+      // upgrading from rate_limit → auth correctly blocks all models.
+      updatedStats.cooldownReason = params.reason;
+      // If a different model fails during an active window, widen the scope
+      // to all models (undefined) so neither model bypasses the cooldown.
+      if (
+        params.existing.cooldownModel &&
+        params.modelId &&
+        params.existing.cooldownModel !== params.modelId
+      ) {
+        updatedStats.cooldownModel = undefined;
+      } else if (params.reason !== "rate_limit") {
+        // Non-rate-limit failures are profile-wide — clear model scope even
+        // when the same model fails, so that no model can bypass.
+        updatedStats.cooldownModel = undefined;
+      } else {
+        updatedStats.cooldownModel = params.existing.cooldownModel;
+      }
+    } else {
+      updatedStats.cooldownReason = params.reason;
+      updatedStats.cooldownModel = params.reason === "rate_limit" ? params.modelId : undefined;
+    }
  }

  return updatedStats;
@ -468,8 +520,9 @@ export async function markAuthProfileFailure(params: {
  cfg?: OpenClawConfig;
  agentDir?: string;
  runId?: string;
+  modelId?: string;
 }): Promise<void> {
-  const { store, profileId, reason, agentDir, cfg, runId } = params;
+  const { store, profileId, reason, agentDir, cfg, runId, modelId } = params;
  const profile = store.profiles[profileId];
  if (!profile || isAuthCooldownBypassedForProvider(profile.provider)) {
    return;
@ -498,6 +551,7 @@ export async function markAuthProfileFailure(params: {
        now,
        reason,
        cfgResolved,
+        modelId,
      });
      nextStats = computed;
      updateUsageStatsEntry(freshStore, profileId, () => computed);
@ -536,6 +590,7 @@ export async function markAuthProfileFailure(params: {
    now,
    reason,
    cfgResolved,
+    modelId,
  });
  nextStats = computed;
  updateUsageStatsEntry(store, profileId, () => computed);
@ -552,8 +607,8 @@ export async function markAuthProfileFailure(params: {
 }

 /**
- * Mark a profile as transiently failed. Applies exponential backoff cooldown.
- * Cooldown times: 1min, 5min, 25min, max 1 hour.
+ * Mark a profile as transiently failed. Applies stepped backoff cooldown.
+ * Cooldown times: 30s, 1min, 5min (capped).
 * Uses store lock to avoid overwriting concurrent usage updates.
 */
 export async function markAuthProfileCooldown(params: {
--- a/src/agents/model-fallback.ts
+++ b/src/agents/model-fallback.ts
@ -34,6 +34,32 @@ import { isLikelyContextOverflowError } from "./pi-embedded-helpers.js";

 const log = createSubsystemLogger("model-fallback");

+/**
+ * Structured error thrown when all model fallback candidates have been
+ * exhausted. Carries per-attempt details so callers can build informative
+ * user-facing messages (e.g. "rate-limited, retry in 30 s").
+ */
+export class FallbackSummaryError extends Error {
+  readonly attempts: FallbackAttempt[];
+  readonly soonestCooldownExpiry: number | null;
+
+  constructor(
+    message: string,
+    attempts: FallbackAttempt[],
+    soonestCooldownExpiry: number | null,
+    cause?: Error,
+  ) {
+    super(message, { cause });
+    this.name = "FallbackSummaryError";
+    this.attempts = attempts;
+    this.soonestCooldownExpiry = soonestCooldownExpiry;
+  }
+}
+
+export function isFallbackSummaryError(err: unknown): err is FallbackSummaryError {
+  return err instanceof FallbackSummaryError;
+}
+
 export type ModelFallbackRunOptions = {
  allowTransientCooldownProbe?: boolean;
 };
@ -189,17 +215,18 @@ function throwFallbackFailureSummary(params: {
  lastError: unknown;
  label: string;
  formatAttempt: (attempt: FallbackAttempt) => string;
+  soonestCooldownExpiry?: number | null;
 }): never {
  if (params.attempts.length <= 1 && params.lastError) {
    throw params.lastError;
  }
  const summary =
    params.attempts.length > 0 ? params.attempts.map(params.formatAttempt).join(" | ") : "unknown";
-  throw new Error(
+  throw new FallbackSummaryError(
    `All ${params.label} failed (${params.attempts.length || params.candidates.length}): ${summary}`,
-    {
-      cause: params.lastError instanceof Error ? params.lastError : undefined,
-    },
+    params.attempts,
+    params.soonestCooldownExpiry ?? null,
+    params.lastError instanceof Error ? params.lastError : undefined,
  );
 }

@ -548,7 +575,9 @@ export async function runWithModelFallback<T>(params: {
        store: authStore,
        provider: candidate.provider,
      });
-      const isAnyProfileAvailable = profileIds.some((id) => !isProfileInCooldown(authStore, id));
+      const isAnyProfileAvailable = profileIds.some(
+        (id) => !isProfileInCooldown(authStore, id, undefined, candidate.model),
+      );

      if (profileIds.length > 0 && !isAnyProfileAvailable) {
        // All profiles for this provider are in cooldown.
@ -771,6 +800,23 @@ export async function runWithModelFallback<T>(params: {
      `${attempt.provider}/${attempt.model}: ${attempt.error}${
        attempt.reason ? ` (${attempt.reason})` : ""
      }`,
+    soonestCooldownExpiry: (() => {
+      if (!authStore) {
+        return null;
+      }
+      const allProfileIds = new Set<string>();
+      for (const c of candidates) {
+        const ids = resolveAuthProfileOrder({
+          cfg: params.cfg,
+          store: authStore,
+          provider: c.provider,
+        });
+        for (const id of ids) {
+          allProfileIds.add(id);
+        }
+      }
+      return getSoonestCooldownExpiry(authStore, [...allProfileIds]);
+    })(),
  });
 }

--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@ -717,7 +717,7 @@ export async function runEmbeddedPiAgent(
        let nextIndex = profileIndex + 1;
        while (nextIndex < profileCandidates.length) {
          const candidate = profileCandidates[nextIndex];
-          if (candidate && isProfileInCooldown(authStore, candidate)) {
+          if (candidate && isProfileInCooldown(authStore, candidate, undefined, modelId)) {
            nextIndex += 1;
            continue;
          }
@ -744,7 +744,9 @@ export async function runEmbeddedPiAgent(
        );
        const allAutoProfilesInCooldown =
          autoProfileCandidates.length > 0 &&
-          autoProfileCandidates.every((candidate) => isProfileInCooldown(authStore, candidate));
+          autoProfileCandidates.every((candidate) =>
+            isProfileInCooldown(authStore, candidate, undefined, modelId),
+          );
        const unavailableReason = allAutoProfilesInCooldown
          ? (resolveProfilesUnavailableReason({
              store: authStore,
@ -763,7 +765,9 @@ export async function runEmbeddedPiAgent(
        while (profileIndex < profileCandidates.length) {
          const candidate = profileCandidates[profileIndex];
          const inCooldown =
-            candidate && candidate !== lockedProfileId && isProfileInCooldown(authStore, candidate);
+            candidate &&
+            candidate !== lockedProfileId &&
+            isProfileInCooldown(authStore, candidate, undefined, modelId);
          if (inCooldown) {
            if (allowTransientCooldownProbe && !didTransientCooldownProbe) {
              didTransientCooldownProbe = true;
@ -833,6 +837,7 @@ export async function runEmbeddedPiAgent(
        reason?: AuthProfileFailureReason | null;
        config?: RunEmbeddedPiAgentParams["config"];
        agentDir?: RunEmbeddedPiAgentParams["agentDir"];
+        modelId?: string;
      }) => {
        const { profileId, reason } = failure;
        if (!profileId || !reason || reason === "timeout") {
@ -845,6 +850,7 @@ export async function runEmbeddedPiAgent(
          cfg: params.config,
          agentDir,
          runId: params.runId,
+          modelId: failure.modelId,
        });
      };
      const resolveAuthProfileFailureReason = (
@ -1394,6 +1400,7 @@ export async function runEmbeddedPiAgent(
            await maybeMarkAuthProfileFailure({
              profileId: lastProfileId,
              reason: promptProfileFailureReason,
+              modelId,
            });
            const promptFailoverFailure =
              promptFailoverReason !== null || isFailoverErrorMessage(errorText);
@ -1535,6 +1542,7 @@ export async function runEmbeddedPiAgent(
              await maybeMarkAuthProfileFailure({
                profileId: lastProfileId,
                reason,
+                modelId,
              });
              if (timedOut && !isProbeSession) {
                log.warn(`Profile ${lastProfileId} timed out. Trying next account...`);
--- a/src/auto-reply/reply/agent-runner-execution.ts
+++ b/src/auto-reply/reply/agent-runner-execution.ts
@ -4,7 +4,7 @@ import { resolveSendableOutboundReplyParts } from "openclaw/plugin-sdk/reply-pay
 import { resolveBootstrapWarningSignaturesSeen } from "../../agents/bootstrap-budget.js";
 import { runCliAgent } from "../../agents/cli-runner.js";
 import { getCliSessionId } from "../../agents/cli-session.js";
-import { runWithModelFallback } from "../../agents/model-fallback.js";
+import { runWithModelFallback, isFallbackSummaryError } from "../../agents/model-fallback.js";
 import { isCliProvider } from "../../agents/model-selection.js";
 import {
  BILLING_ERROR_USER_MESSAGE,
@ -12,6 +12,7 @@ import {
  isContextOverflowError,
  isBillingErrorMessage,
  isLikelyContextOverflowError,
+  isRateLimitErrorMessage,
  isTransientHttpError,
  sanitizeUserFacingText,
 } from "../../agents/pi-embedded-helpers.js";
@ -74,6 +75,26 @@ export type AgentRunLoopResult =
    }
  | { kind: "final"; payload: ReplyPayload };

+/**
+ * Build a human-friendly rate-limit message from a FallbackSummaryError.
+ * Includes a countdown when the soonest cooldown expiry is known.
+ */
+function buildCopilotCooldownMessage(err: unknown): string {
+  if (!isFallbackSummaryError(err)) {
+    return "⚠️ All models are temporarily rate-limited. Please try again in a few minutes.";
+  }
+  const expiry = err.soonestCooldownExpiry;
+  if (typeof expiry === "number" && expiry > Date.now()) {
+    const secsLeft = Math.ceil((expiry - Date.now()) / 1000);
+    if (secsLeft <= 60) {
+      return `⚠️ Rate-limited — ready in ~${secsLeft}s. Please wait a moment.`;
+    }
+    const minsLeft = Math.ceil(secsLeft / 60);
+    return `⚠️ Rate-limited — ready in ~${minsLeft} min. Please try again shortly.`;
+  }
+  return "⚠️ All models are temporarily rate-limited. Please try again in a few minutes.";
+}
+
 export async function runAgentTurnWithFallback(params: {
  commandBody: string;
  followupRun: FollowupRun;
@ -623,17 +644,29 @@ export async function runAgentTurnWithFallback(params: {
      }

      defaultRuntime.error(`Embedded agent failed before reply: ${message}`);
+      // Only classify as rate-limit when we have concrete evidence: either
+      // the error message itself is a rate-limit string, or the fallback
+      // chain exhaustion includes at least one rate_limit / overloaded attempt.
+      // Using `.some()` intentionally: when any attempt is rate-limited, the
+      // countdown message is more actionable than the generic failure text,
+      // even if other attempts failed for different reasons (auth, etc.).
+      const isRateLimit =
+        isRateLimitErrorMessage(message) ||
+        (isFallbackSummaryError(err) &&
+          err.attempts.some((a) => a.reason === "rate_limit" || a.reason === "overloaded"));
      const safeMessage = isTransientHttp
        ? sanitizeUserFacingText(message, { errorContext: true })
        : message;
      const trimmedMessage = safeMessage.replace(/\.\s*$/, "");
      const fallbackText = isBilling
        ? BILLING_ERROR_USER_MESSAGE
-        : isContextOverflow
-          ? "⚠️ Context overflow — prompt too large for this model. Try a shorter message or a larger-context model."
-          : isRoleOrderingError
-            ? "⚠️ Message ordering conflict - please try again. If this persists, use /new to start a fresh session."
-            : `⚠️ Agent failed before reply: ${trimmedMessage}.\nLogs: openclaw logs --follow`;
+        : isRateLimit
+          ? buildCopilotCooldownMessage(err)
+          : isContextOverflow
+            ? "⚠️ Context overflow — prompt too large for this model. Try a shorter message or a larger-context model."
+            : isRoleOrderingError
+              ? "⚠️ Message ordering conflict - please try again. If this persists, use /new to start a fresh session."
+              : `⚠️ Agent failed before reply: ${trimmedMessage}.\nLogs: openclaw logs --follow`;

      return {
        kind: "final",
				`@ -0,0 +1 @@`
				`- Agents/cooldowns: scope rate-limit cooldowns per model so one 429 no longer blocks every model on the same auth profile, replace the exponential 1 min → 1 h escalation with a stepped 30 s / 1 min / 5 min ladder, and surface a user-facing countdown message when all models are rate-limited. (#49834) Thanks @kiranvk-2011.`