diff --git a/src/agents/auth-profiles/usage.ts b/src/agents/auth-profiles/usage.ts index c587f3c6d0e..9bcd674d81c 100644 --- a/src/agents/auth-profiles/usage.ts +++ b/src/agents/auth-profiles/usage.ts @@ -55,11 +55,14 @@ export function isProfileInCooldown( } // Model-aware bypass: if the cooldown was caused by a rate_limit on a // specific model and the caller is requesting a *different* model, allow it. + // We still honour any active billing/auth disable (`disabledUntil`) — those + // are profile-wide and must not be short-circuited by model scoping. if ( forModel && stats.cooldownReason === "rate_limit" && stats.cooldownModel && - stats.cooldownModel !== forModel + stats.cooldownModel !== forModel && + !isActiveUnusableWindow(stats.disabledUntil, now ?? Date.now()) ) { return false; } @@ -476,8 +479,7 @@ function computeNextProfileUsageStats(params: { updatedStats.cooldownModel = params.existing.cooldownModel; } else { updatedStats.cooldownReason = params.reason; - updatedStats.cooldownModel = - params.reason === "rate_limit" ? params.modelId : undefined; + updatedStats.cooldownModel = params.reason === "rate_limit" ? params.modelId : undefined; } } @@ -583,8 +585,8 @@ export async function markAuthProfileFailure(params: { } /** - * Mark a profile as transiently failed. Applies exponential backoff cooldown. - * Cooldown times: 1min, 5min, 25min, max 1 hour. + * Mark a profile as transiently failed. Applies stepped backoff cooldown. + * Cooldown times: 30s, 1min, 5min (capped). * Uses store lock to avoid overwriting concurrent usage updates. */ export async function markAuthProfileCooldown(params: { diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts index 8127fc263be..7223c6c9dfa 100644 --- a/src/agents/model-fallback.ts +++ b/src/agents/model-fallback.ts @@ -575,7 +575,9 @@ export async function runWithModelFallback(params: { store: authStore, provider: candidate.provider, }); - const isAnyProfileAvailable = profileIds.some((id) => !isProfileInCooldown(authStore, id, undefined, candidate.model)); + const isAnyProfileAvailable = profileIds.some( + (id) => !isProfileInCooldown(authStore, id, undefined, candidate.model), + ); if (profileIds.length > 0 && !isAnyProfileAvailable) { // All profiles for this provider are in cooldown. diff --git a/src/auto-reply/reply/agent-runner-execution.ts b/src/auto-reply/reply/agent-runner-execution.ts index 049958aec15..855b352bfe2 100644 --- a/src/auto-reply/reply/agent-runner-execution.ts +++ b/src/auto-reply/reply/agent-runner-execution.ts @@ -4,8 +4,7 @@ import { resolveSendableOutboundReplyParts } from "openclaw/plugin-sdk/reply-pay import { resolveBootstrapWarningSignaturesSeen } from "../../agents/bootstrap-budget.js"; import { runCliAgent } from "../../agents/cli-runner.js"; import { getCliSessionId } from "../../agents/cli-session.js"; -import { runWithModelFallback } from "../../agents/model-fallback.js"; -import { isFallbackSummaryError } from "../../agents/model-fallback.js"; +import { runWithModelFallback, isFallbackSummaryError } from "../../agents/model-fallback.js"; import { isCliProvider } from "../../agents/model-selection.js"; import { BILLING_ERROR_USER_MESSAGE, @@ -645,8 +644,15 @@ export async function runAgentTurnWithFallback(params: { } defaultRuntime.error(`Embedded agent failed before reply: ${message}`); + // Only classify as rate-limit when we have concrete evidence: either + // the error message itself is a rate-limit string, or the fallback + // chain exhaustion includes at least one rate_limit / overloaded attempt. + // This avoids showing misleading "Rate-limited — ready in ~Xs" messages + // for auth, model_not_found, or other non-rate-limit failures. const isRateLimit = - isRateLimitErrorMessage(message) || isFallbackSummaryError(err); + isRateLimitErrorMessage(message) || + (isFallbackSummaryError(err) && + err.attempts.some((a) => a.reason === "rate_limit" || a.reason === "overloaded")); const safeMessage = isTransientHttp ? sanitizeUserFacingText(message, { errorContext: true }) : message;