diff --git a/src/agents/auth-profiles/usage.ts b/src/agents/auth-profiles/usage.ts
index c587f3c6d0e..9bcd674d81c 100644
--- a/src/agents/auth-profiles/usage.ts
+++ b/src/agents/auth-profiles/usage.ts
@@ -55,11 +55,14 @@ export function isProfileInCooldown(
   }
   // Model-aware bypass: if the cooldown was caused by a rate_limit on a
   // specific model and the caller is requesting a *different* model, allow it.
+  // We still honour any active billing/auth disable (`disabledUntil`) — those
+  // are profile-wide and must not be short-circuited by model scoping.
   if (
     forModel &&
     stats.cooldownReason === "rate_limit" &&
     stats.cooldownModel &&
-    stats.cooldownModel !== forModel
+    stats.cooldownModel !== forModel &&
+    !isActiveUnusableWindow(stats.disabledUntil, now ?? Date.now())
   ) {
     return false;
   }
@@ -476,8 +479,7 @@ function computeNextProfileUsageStats(params: {
       updatedStats.cooldownModel = params.existing.cooldownModel;
     } else {
       updatedStats.cooldownReason = params.reason;
-      updatedStats.cooldownModel =
-        params.reason === "rate_limit" ? params.modelId : undefined;
+      updatedStats.cooldownModel = params.reason === "rate_limit" ? params.modelId : undefined;
     }
   }
 
@@ -583,8 +585,8 @@ export async function markAuthProfileFailure(params: {
 }
 
 /**
- * Mark a profile as transiently failed. Applies exponential backoff cooldown.
- * Cooldown times: 1min, 5min, 25min, max 1 hour.
+ * Mark a profile as transiently failed. Applies stepped backoff cooldown.
+ * Cooldown times: 30s, 1min, 5min (capped).
  * Uses store lock to avoid overwriting concurrent usage updates.
  */
 export async function markAuthProfileCooldown(params: {
diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts
index 8127fc263be..7223c6c9dfa 100644
--- a/src/agents/model-fallback.ts
+++ b/src/agents/model-fallback.ts
@@ -575,7 +575,9 @@ export async function runWithModelFallback<T>(params: {
         store: authStore,
         provider: candidate.provider,
       });
-      const isAnyProfileAvailable = profileIds.some((id) => !isProfileInCooldown(authStore, id, undefined, candidate.model));
+      const isAnyProfileAvailable = profileIds.some(
+        (id) => !isProfileInCooldown(authStore, id, undefined, candidate.model),
+      );
 
       if (profileIds.length > 0 && !isAnyProfileAvailable) {
         // All profiles for this provider are in cooldown.
diff --git a/src/auto-reply/reply/agent-runner-execution.ts b/src/auto-reply/reply/agent-runner-execution.ts
index 049958aec15..855b352bfe2 100644
--- a/src/auto-reply/reply/agent-runner-execution.ts
+++ b/src/auto-reply/reply/agent-runner-execution.ts
@@ -4,8 +4,7 @@ import { resolveSendableOutboundReplyParts } from "openclaw/plugin-sdk/reply-pay
 import { resolveBootstrapWarningSignaturesSeen } from "../../agents/bootstrap-budget.js";
 import { runCliAgent } from "../../agents/cli-runner.js";
 import { getCliSessionId } from "../../agents/cli-session.js";
-import { runWithModelFallback } from "../../agents/model-fallback.js";
-import { isFallbackSummaryError } from "../../agents/model-fallback.js";
+import { runWithModelFallback, isFallbackSummaryError } from "../../agents/model-fallback.js";
 import { isCliProvider } from "../../agents/model-selection.js";
 import {
   BILLING_ERROR_USER_MESSAGE,
@@ -645,8 +644,15 @@ export async function runAgentTurnWithFallback(params: {
       }
 
       defaultRuntime.error(`Embedded agent failed before reply: ${message}`);
+      // Only classify as rate-limit when we have concrete evidence: either
+      // the error message itself is a rate-limit string, or the fallback
+      // chain exhaustion includes at least one rate_limit / overloaded attempt.
+      // This avoids showing misleading "Rate-limited — ready in ~Xs" messages
+      // for auth, model_not_found, or other non-rate-limit failures.
       const isRateLimit =
-        isRateLimitErrorMessage(message) || isFallbackSummaryError(err);
+        isRateLimitErrorMessage(message) ||
+        (isFallbackSummaryError(err) &&
+          err.attempts.some((a) => a.reason === "rate_limit" || a.reason === "overloaded"));
       const safeMessage = isTransientHttp
         ? sanitizeUserFacingText(message, { errorContext: true })
         : message;