fix(agents): tighten overload probe and backoff state

fix(agents): back off before overload failover
fix(agents): classify overloaded failures separately
2026-03-06 15:47:23 +03:00 · 2026-03-06 15:36:03 +03:00 · 2026-03-06 15:27:57 +03:00 · 2026-03-06 14:47:04 +03:00 · 2026-03-06 14:16:33 +03:00
10 changed files with 227 additions and 36 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -118,6 +118,7 @@ Docs: https://docs.openclaw.ai
 - Security/auth labels: remove token and API-key snippets from user-facing auth status labels so `/status` and `/models` do not expose credential fragments. (#33262) thanks @cu1ch3n.
 - Auth/credential semantics: align profile eligibility + probe diagnostics with SecretRef/expiry rules and harden browser download atomic writes. (#33733) thanks @joshavant.
 - Security/audit denyCommands guidance: suggest likely exact node command IDs for unknown `gateway.nodes.denyCommands` entries so ineffective denylist entries are easier to correct. (#29713) thanks @liquidhorizon88-bot.
+- Agents/overload failover handling: classify overloaded provider failures separately from rate limits/status timeouts, add short overload backoff before retry/failover, and keep overloaded prompt/assistant failures out of auth-profile failure state so transient provider overloads do not poison later profile selection on the same provider.
 - Docs/security hardening guidance: document Docker `DOCKER-USER` + UFW policy and add cross-linking from Docker install docs for VPS/public-host setups. (#27613) thanks @dorukardahan.
 - Docs/security threat-model links: replace relative `.md` links with Mintlify-compatible root-relative routes in security docs to prevent broken internal navigation. (#27698) thanks @clawdoo.
 - Plugins/Update integrity drift: avoid false integrity drift prompts when updating npm-installed plugins from unpinned specs, while keeping drift checks for exact pinned versions. (#37179) Thanks @vincentkoc.
--- a/src/agents/failover-error.test.ts
+++ b/src/agents/failover-error.test.ts
@ -69,13 +69,13 @@ describe("failover-error", () => {
    // Keep the status-only path behavior-preserving and conservative.
    expect(resolveFailoverReasonFromError({ status: 500 })).toBeNull();
    expect(resolveFailoverReasonFromError({ status: 502 })).toBe("timeout");
-    expect(resolveFailoverReasonFromError({ status: 503 })).toBe("timeout");
+    expect(resolveFailoverReasonFromError({ status: 503 })).toBe("overloaded");
    expect(resolveFailoverReasonFromError({ status: 504 })).toBe("timeout");
    expect(resolveFailoverReasonFromError({ status: 521 })).toBeNull();
    expect(resolveFailoverReasonFromError({ status: 522 })).toBeNull();
    expect(resolveFailoverReasonFromError({ status: 523 })).toBeNull();
    expect(resolveFailoverReasonFromError({ status: 524 })).toBeNull();
-    expect(resolveFailoverReasonFromError({ status: 529 })).toBe("rate_limit");
+    expect(resolveFailoverReasonFromError({ status: 529 })).toBe("overloaded");
  });

  it("classifies documented provider error shapes at the error boundary", () => {
@ -90,7 +90,7 @@ describe("failover-error", () => {
        status: 529,
        message: ANTHROPIC_OVERLOADED_PAYLOAD,
      }),
-    ).toBe("rate_limit");
+    ).toBe("overloaded");
    expect(
      resolveFailoverReasonFromError({
        status: 429,
@ -114,7 +114,7 @@ describe("failover-error", () => {
        status: 503,
        message: BEDROCK_SERVICE_UNAVAILABLE_MESSAGE,
      }),
-    ).toBe("timeout");
+    ).toBe("overloaded");
    expect(
      resolveFailoverReasonFromError({
        status: 429,
@ -126,7 +126,7 @@ describe("failover-error", () => {
        status: 503,
        message: GROQ_SERVICE_UNAVAILABLE_MESSAGE,
      }),
-    ).toBe("timeout");
+    ).toBe("overloaded");
  });

  it("treats 400 insufficient_quota payloads as billing instead of format", () => {
@ -151,6 +151,14 @@ describe("failover-error", () => {
    ).toBe("rate_limit");
  });

+  it("treats overloaded provider payloads as overloaded", () => {
+    expect(
+      resolveFailoverReasonFromError({
+        message: ANTHROPIC_OVERLOADED_PAYLOAD,
+      }),
+    ).toBe("overloaded");
+  });
+
  it("keeps raw-text 402 weekly/monthly limit errors in billing", () => {
    expect(
      resolveFailoverReasonFromError({
@ -221,6 +229,10 @@ describe("failover-error", () => {
    expect(err?.model).toBe("claude-opus-4-5");
  });

+  it("maps overloaded to a 503 fallback status", () => {
+    expect(resolveFailoverStatus("overloaded")).toBe(503);
+  });
+
  it("coerces format errors with a 400 status", () => {
    const err = coerceToFailoverError("invalid request format", {
      provider: "google",
--- a/src/agents/failover-error.ts
+++ b/src/agents/failover-error.ts
@ -49,6 +49,8 @@ export function resolveFailoverStatus(reason: FailoverReason): number | undefine
      return 402;
    case "rate_limit":
      return 429;
+    case "overloaded":
+      return 503;
    case "auth":
      return 401;
    case "auth_permanent":
--- a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts
+++ b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts
@ -509,12 +509,12 @@ describe("classifyFailoverReason", () => {
  it("classifies documented provider error messages", () => {
    expect(classifyFailoverReason(OPENAI_RATE_LIMIT_MESSAGE)).toBe("rate_limit");
    expect(classifyFailoverReason(GEMINI_RESOURCE_EXHAUSTED_MESSAGE)).toBe("rate_limit");
-    expect(classifyFailoverReason(ANTHROPIC_OVERLOADED_PAYLOAD)).toBe("rate_limit");
+    expect(classifyFailoverReason(ANTHROPIC_OVERLOADED_PAYLOAD)).toBe("overloaded");
    expect(classifyFailoverReason(OPENROUTER_CREDITS_MESSAGE)).toBe("billing");
    expect(classifyFailoverReason(TOGETHER_PAYMENT_REQUIRED_MESSAGE)).toBe("billing");
-    expect(classifyFailoverReason(TOGETHER_ENGINE_OVERLOADED_MESSAGE)).toBe("timeout");
+    expect(classifyFailoverReason(TOGETHER_ENGINE_OVERLOADED_MESSAGE)).toBe("overloaded");
    expect(classifyFailoverReason(GROQ_TOO_MANY_REQUESTS_MESSAGE)).toBe("rate_limit");
-    expect(classifyFailoverReason(GROQ_SERVICE_UNAVAILABLE_MESSAGE)).toBe("timeout");
+    expect(classifyFailoverReason(GROQ_SERVICE_UNAVAILABLE_MESSAGE)).toBe("overloaded");
  });

  it("classifies internal and compatibility error messages", () => {
@ -572,20 +572,20 @@ describe("classifyFailoverReason", () => {
      "rate_limit",
    );
  });
-  it("classifies provider high-demand / service-unavailable messages as rate_limit", () => {
+  it("classifies provider high-demand / service-unavailable messages as overloaded", () => {
    expect(
      classifyFailoverReason(
        "This model is currently experiencing high demand. Please try again later.",
      ),
-    ).toBe("rate_limit");
-    // "service unavailable" combined with overload/capacity indicator → rate_limit
+    ).toBe("overloaded");
+    // "service unavailable" combined with overload/capacity indicator → overloaded
    // (exercises the new regex — none of the standalone patterns match here)
-    expect(classifyFailoverReason("service unavailable due to capacity limits")).toBe("rate_limit");
+    expect(classifyFailoverReason("service unavailable due to capacity limits")).toBe("overloaded");
    expect(
      classifyFailoverReason(
        '{"error":{"code":503,"message":"The model is overloaded. Please try later","status":"UNAVAILABLE"}}',
      ),
-    ).toBe("rate_limit");
+    ).toBe("overloaded");
  });
  it("classifies bare 'service unavailable' as timeout instead of rate_limit (#32828)", () => {
    // A generic "service unavailable" from a proxy/CDN should stay retryable,
--- a/src/agents/pi-embedded-helpers/errors.ts
+++ b/src/agents/pi-embedded-helpers/errors.ts
@ -293,13 +293,11 @@ export function classifyFailoverReasonFromHttpStatus(
  if (status === 408) {
    return "timeout";
  }
-  // Keep the status-only path conservative and behavior-preserving.
-  // Message-path HTTP heuristics are broader and should not leak in here.
-  if (status === 502 || status === 503 || status === 504) {
+  if (status === 502 || status === 504) {
    return "timeout";
  }
-  if (status === 529) {
-    return "rate_limit";
+  if (status === 503 || status === 529) {
+    return "overloaded";
  }
  if (status === 400) {
    // Some providers return quota/balance errors under HTTP 400, so do not
@ -854,13 +852,6 @@ export function classifyFailoverReason(raw: string): FailoverReason | null {
  if (isModelNotFoundErrorMessage(raw)) {
    return "model_not_found";
  }
-  if (isTransientHttpError(raw)) {
-    // Treat transient 5xx provider failures as retryable transport issues.
-    return "timeout";
-  }
-  if (isJsonApiInternalServerError(raw)) {
-    return "timeout";
-  }
  if (isPeriodicUsageLimitErrorMessage(raw)) {
    return isBillingErrorMessage(raw) ? "billing" : "rate_limit";
  }
@ -868,7 +859,14 @@ export function classifyFailoverReason(raw: string): FailoverReason | null {
    return "rate_limit";
  }
  if (isOverloadedErrorMessage(raw)) {
-    return "rate_limit";
+    return "overloaded";
+  }
+  if (isTransientHttpError(raw)) {
+    // Treat transient 5xx provider failures as retryable transport issues.
+    return "timeout";
+  }
+  if (isJsonApiInternalServerError(raw)) {
+    return "timeout";
  }
  if (isCloudCodeAssistFormatError(raw)) {
    return "format";
--- a/src/agents/pi-embedded-helpers/types.ts
+++ b/src/agents/pi-embedded-helpers/types.ts
@ -5,6 +5,7 @@ export type FailoverReason =
  | "auth_permanent"
  | "format"
  | "rate_limit"
+  | "overloaded"
  | "billing"
  | "timeout"
  | "model_not_found"
--- a/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts
+++ b/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts
@ -9,11 +9,28 @@ import type { EmbeddedRunAttemptResult } from "./pi-embedded-runner/run/types.js

 const runEmbeddedAttemptMock = vi.fn<(params: unknown) => Promise<EmbeddedRunAttemptResult>>();
 const resolveCopilotApiTokenMock = vi.fn();
+const { computeBackoffMock, sleepWithAbortMock } = vi.hoisted(() => ({
+  computeBackoffMock: vi.fn(
+    (
+      _policy: { initialMs: number; maxMs: number; factor: number; jitter: number },
+      _attempt: number,
+    ) => 321,
+  ),
+  sleepWithAbortMock: vi.fn(async (_ms: number, _abortSignal?: AbortSignal) => undefined),
+}));

 vi.mock("./pi-embedded-runner/run/attempt.js", () => ({
  runEmbeddedAttempt: (params: unknown) => runEmbeddedAttemptMock(params),
 }));

+vi.mock("../infra/backoff.js", () => ({
+  computeBackoff: (
+    policy: { initialMs: number; maxMs: number; factor: number; jitter: number },
+    attempt: number,
+  ) => computeBackoffMock(policy, attempt),
+  sleepWithAbort: (ms: number, abortSignal?: AbortSignal) => sleepWithAbortMock(ms, abortSignal),
+}));
+
 vi.mock("../providers/github-copilot-token.js", () => ({
  DEFAULT_COPILOT_API_BASE_URL: "https://api.individual.githubcopilot.com",
  resolveCopilotApiToken: (...args: unknown[]) => resolveCopilotApiTokenMock(...args),
@ -43,6 +60,8 @@ beforeEach(() => {
  vi.useRealTimers();
  runEmbeddedAttemptMock.mockClear();
  resolveCopilotApiTokenMock.mockReset();
+  computeBackoffMock.mockClear();
+  sleepWithAbortMock.mockClear();
 });

 const baseUsage = {
@ -252,6 +271,24 @@ const mockFailedThenSuccessfulAttempt = (errorMessage = "rate limit") => {
    );
 };

+const mockPromptErrorThenSuccessfulAttempt = (errorMessage: string) => {
+  runEmbeddedAttemptMock
+    .mockResolvedValueOnce(
+      makeAttempt({
+        promptError: new Error(errorMessage),
+      }),
+    )
+    .mockResolvedValueOnce(
+      makeAttempt({
+        assistantTexts: ["ok"],
+        lastAssistant: buildAssistant({
+          stopReason: "stop",
+          content: [{ type: "text", text: "ok" }],
+        }),
+      }),
+    );
+};
+
 async function runAutoPinnedOpenAiTurn(params: {
  agentDir: string;
  workspaceDir: string;
@ -320,6 +357,28 @@ async function runAutoPinnedRotationCase(params: {
  });
 }

+async function runAutoPinnedPromptErrorRotationCase(params: {
+  errorMessage: string;
+  sessionKey: string;
+  runId: string;
+}) {
+  runEmbeddedAttemptMock.mockClear();
+  return withAgentWorkspace(async ({ agentDir, workspaceDir }) => {
+    await writeAuthStore(agentDir);
+    mockPromptErrorThenSuccessfulAttempt(params.errorMessage);
+    await runAutoPinnedOpenAiTurn({
+      agentDir,
+      workspaceDir,
+      sessionKey: params.sessionKey,
+      runId: params.runId,
+    });
+
+    expect(runEmbeddedAttemptMock).toHaveBeenCalledTimes(2);
+    const usageStats = await readUsageStats(agentDir);
+    return { usageStats };
+  });
+}
+
 function mockSingleSuccessfulAttempt() {
  runEmbeddedAttemptMock.mockResolvedValueOnce(
    makeAttempt({
@ -639,13 +698,48 @@ describe("runEmbeddedPiAgent auth profile rotation", () => {
    expect(typeof usageStats["openai:p2"]?.lastUsed).toBe("number");
  });

-  it("rotates for overloaded prompt failures across auto-pinned profiles", async () => {
+  it("rotates for overloaded assistant failures across auto-pinned profiles", async () => {
    const { usageStats } = await runAutoPinnedRotationCase({
      errorMessage: '{"type":"error","error":{"type":"overloaded_error","message":"Overloaded"}}',
      sessionKey: "agent:test:overloaded-rotation",
      runId: "run:overloaded-rotation",
    });
    expect(typeof usageStats["openai:p2"]?.lastUsed).toBe("number");
+    expect(usageStats["openai:p1"]?.cooldownUntil).toBeUndefined();
+    expect(computeBackoffMock).toHaveBeenCalledTimes(1);
+    expect(computeBackoffMock).toHaveBeenCalledWith(
+      expect.objectContaining({
+        initialMs: 250,
+        maxMs: 1500,
+        factor: 2,
+        jitter: 0.2,
+      }),
+      1,
+    );
+    expect(sleepWithAbortMock).toHaveBeenCalledTimes(1);
+    expect(sleepWithAbortMock).toHaveBeenCalledWith(321, undefined);
+  });
+
+  it("rotates for overloaded prompt failures across auto-pinned profiles", async () => {
+    const { usageStats } = await runAutoPinnedPromptErrorRotationCase({
+      errorMessage: '{"type":"error","error":{"type":"overloaded_error","message":"Overloaded"}}',
+      sessionKey: "agent:test:overloaded-prompt-rotation",
+      runId: "run:overloaded-prompt-rotation",
+    });
+    expect(typeof usageStats["openai:p2"]?.lastUsed).toBe("number");
+    expect(usageStats["openai:p1"]?.cooldownUntil).toBeUndefined();
+    expect(computeBackoffMock).toHaveBeenCalledTimes(1);
+    expect(computeBackoffMock).toHaveBeenCalledWith(
+      expect.objectContaining({
+        initialMs: 250,
+        maxMs: 1500,
+        factor: 2,
+        jitter: 0.2,
+      }),
+      1,
+    );
+    expect(sleepWithAbortMock).toHaveBeenCalledTimes(1);
+    expect(sleepWithAbortMock).toHaveBeenCalledWith(321, undefined);
  });

  it("rotates on timeout without cooling down the timed-out profile", async () => {
@ -656,6 +750,8 @@ describe("runEmbeddedPiAgent auth profile rotation", () => {
    });
    expect(typeof usageStats["openai:p2"]?.lastUsed).toBe("number");
    expect(usageStats["openai:p1"]?.cooldownUntil).toBeUndefined();
+    expect(computeBackoffMock).not.toHaveBeenCalled();
+    expect(sleepWithAbortMock).not.toHaveBeenCalled();
  });

  it("rotates on bare service unavailable without cooling down the profile", async () => {
@ -668,6 +764,54 @@ describe("runEmbeddedPiAgent auth profile rotation", () => {
    expect(usageStats["openai:p1"]?.cooldownUntil).toBeUndefined();
  });

+  it("resets overload failover backoff after a successful turn", async () => {
+    await withAgentWorkspace(async ({ agentDir, workspaceDir }) => {
+      await writeAuthStore(agentDir);
+
+      mockFailedThenSuccessfulAttempt(
+        '{"type":"error","error":{"type":"overloaded_error","message":"Overloaded"}}',
+      );
+      await runAutoPinnedOpenAiTurn({
+        agentDir,
+        workspaceDir,
+        sessionKey: "agent:test:overloaded-backoff-reset-1",
+        runId: "run:overloaded-backoff-reset-1",
+      });
+
+      mockFailedThenSuccessfulAttempt(
+        '{"type":"error","error":{"type":"overloaded_error","message":"Overloaded"}}',
+      );
+      await runAutoPinnedOpenAiTurn({
+        agentDir,
+        workspaceDir,
+        sessionKey: "agent:test:overloaded-backoff-reset-2",
+        runId: "run:overloaded-backoff-reset-2",
+      });
+
+      expect(computeBackoffMock).toHaveBeenCalledTimes(2);
+      expect(computeBackoffMock).toHaveBeenNthCalledWith(
+        1,
+        expect.objectContaining({
+          initialMs: 250,
+          maxMs: 1500,
+          factor: 2,
+          jitter: 0.2,
+        }),
+        1,
+      );
+      expect(computeBackoffMock).toHaveBeenNthCalledWith(
+        2,
+        expect.objectContaining({
+          initialMs: 250,
+          maxMs: 1500,
+          factor: 2,
+          jitter: 0.2,
+        }),
+        1,
+      );
+    });
+  });
+
  it("does not rotate for compaction timeouts", async () => {
    await withAgentWorkspace(async ({ agentDir, workspaceDir }) => {
      await writeAuthStore(agentDir);
--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@ -1,6 +1,7 @@
 import { randomBytes } from "node:crypto";
 import fs from "node:fs/promises";
 import type { ThinkLevel } from "../../auto-reply/thinking.js";
+import { computeBackoff, sleepWithAbort, type BackoffPolicy } from "../../infra/backoff.js";
 import { generateSecureToken } from "../../infra/secure-random.js";
 import { getGlobalHookRunner } from "../../plugins/hook-runner-global.js";
 import type { PluginHookBeforeAgentStartResult } from "../../plugins/types.js";
@ -10,6 +11,7 @@ import { resolveOpenClawAgentDir } from "../agent-paths.js";
 import { hasConfiguredModelFallbacks } from "../agent-scope.js";
 import {
  isProfileInCooldown,
+  type AuthProfileFailureReason,
  markAuthProfileFailure,
  markAuthProfileGood,
  markAuthProfileUsed,
@ -76,6 +78,12 @@ type CopilotTokenState = {
 const COPILOT_REFRESH_MARGIN_MS = 5 * 60 * 1000;
 const COPILOT_REFRESH_RETRY_MS = 60 * 1000;
 const COPILOT_REFRESH_MIN_DELAY_MS = 5 * 1000;
+const OVERLOAD_FAILOVER_BACKOFF_POLICY: BackoffPolicy = {
+  initialMs: 250,
+  maxMs: 1_500,
+  factor: 2,
+  jitter: 0.2,
+};

 // Avoid Anthropic's refusal test token poisoning session transcripts.
 const ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL = "ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL";
@ -719,9 +727,10 @@ export async function runEmbeddedPiAgent(
      let lastRunPromptUsage: ReturnType<typeof normalizeUsage> | undefined;
      let autoCompactionCount = 0;
      let runLoopIterations = 0;
+      let overloadFailoverAttempts = 0;
      const maybeMarkAuthProfileFailure = async (failure: {
        profileId?: string;
-        reason?: Parameters<typeof markAuthProfileFailure>[0]["reason"] | null;
+        reason?: AuthProfileFailureReason | null;
        config?: RunEmbeddedPiAgentParams["config"];
        agentDir?: RunEmbeddedPiAgentParams["agentDir"];
      }) => {
@ -737,6 +746,22 @@ export async function runEmbeddedPiAgent(
          agentDir,
        });
      };
+      const resolveAuthProfileFailureReason = (
+        failoverReason: FailoverReason | null,
+      ): AuthProfileFailureReason | null => {
+        if (!failoverReason || failoverReason === "timeout" || failoverReason === "overloaded") {
+          return null;
+        }
+        return failoverReason;
+      };
+      const maybeBackoffBeforeOverloadFailover = async (reason: FailoverReason | null) => {
+        if (reason !== "overloaded") {
+          return;
+        }
+        overloadFailoverAttempts += 1;
+        const delayMs = computeBackoff(OVERLOAD_FAILOVER_BACKOFF_POLICY, overloadFailoverAttempts);
+        await sleepWithAbort(delayMs, params.abortSignal);
+      };
      try {
        let authRetryPending = false;
        // Hoisted so the retry-limit error path can use the most recent API total.
@ -1145,15 +1170,19 @@ export async function runEmbeddedPiAgent(
              };
            }
            const promptFailoverReason = classifyFailoverReason(errorText);
+            const promptProfileFailureReason =
+              resolveAuthProfileFailureReason(promptFailoverReason);
            await maybeMarkAuthProfileFailure({
              profileId: lastProfileId,
-              reason: promptFailoverReason,
+              reason: promptProfileFailureReason,
            });
+            const promptFailoverFailure = isFailoverErrorMessage(errorText);
            if (
-              isFailoverErrorMessage(errorText) &&
+              promptFailoverFailure &&
              promptFailoverReason !== "timeout" &&
              (await advanceAuthProfile())
            ) {
+              await maybeBackoffBeforeOverloadFailover(promptFailoverReason);
              continue;
            }
            const fallbackThinking = pickFallbackThinkingLevel({
@ -1169,7 +1198,8 @@ export async function runEmbeddedPiAgent(
            }
            // FIX: Throw FailoverError for prompt errors when fallbacks configured
            // This enables model fallback for quota/rate limit errors during prompt submission
-            if (fallbackConfigured && isFailoverErrorMessage(errorText)) {
+            if (fallbackConfigured && promptFailoverFailure) {
+              await maybeBackoffBeforeOverloadFailover(promptFailoverReason);
              throw new FailoverError(errorText, {
                reason: promptFailoverReason ?? "unknown",
                provider,
@ -1198,6 +1228,8 @@ export async function runEmbeddedPiAgent(
          const billingFailure = isBillingAssistantError(lastAssistant);
          const failoverFailure = isFailoverAssistantError(lastAssistant);
          const assistantFailoverReason = classifyFailoverReason(lastAssistant?.errorMessage ?? "");
+          const assistantProfileFailureReason =
+            resolveAuthProfileFailureReason(assistantFailoverReason);
          const cloudCodeAssistFormatError = attempt.cloudCodeAssistFormatError;
          const imageDimensionError = parseImageDimensionError(lastAssistant?.errorMessage ?? "");

@ -1237,10 +1269,7 @@ export async function runEmbeddedPiAgent(

          if (shouldRotate) {
            if (lastProfileId) {
-              const reason =
-                timedOut || assistantFailoverReason === "timeout"
-                  ? "timeout"
-                  : (assistantFailoverReason ?? "unknown");
+              const reason = timedOut ? "timeout" : assistantProfileFailureReason;
              // Skip cooldown for timeouts: a timeout is model/network-specific,
              // not an auth issue. Marking the profile would poison fallback models
              // on the same provider (e.g. gpt-5.3 timeout blocks gpt-5.2).
@ -1260,10 +1289,12 @@ export async function runEmbeddedPiAgent(

            const rotated = await advanceAuthProfile();
            if (rotated) {
+              await maybeBackoffBeforeOverloadFailover(assistantFailoverReason);
              continue;
            }

            if (fallbackConfigured) {
+              await maybeBackoffBeforeOverloadFailover(assistantFailoverReason);
              // Prefer formatted error message (user-friendly) over raw errorMessage
              const message =
                (lastAssistant
@ -1369,6 +1400,7 @@ export async function runEmbeddedPiAgent(
            `embedded run done: runId=${params.runId} sessionId=${params.sessionId} durationMs=${Date.now() - started} aborted=${aborted}`,
          );
          if (lastProfileId) {
+            overloadFailoverAttempts = 0;
            await markAuthProfileGood({
              store: authStore,
              provider,
--- a/src/commands/models/list.probe.test.ts
+++ b/src/commands/models/list.probe.test.ts
@ -9,6 +9,7 @@ describe("mapFailoverReasonToProbeStatus", () => {
  it("keeps existing failover reason mappings", () => {
    expect(mapFailoverReasonToProbeStatus("auth")).toBe("auth");
    expect(mapFailoverReasonToProbeStatus("rate_limit")).toBe("rate_limit");
+    expect(mapFailoverReasonToProbeStatus("overloaded")).toBe("rate_limit");
    expect(mapFailoverReasonToProbeStatus("billing")).toBe("billing");
    expect(mapFailoverReasonToProbeStatus("timeout")).toBe("timeout");
    expect(mapFailoverReasonToProbeStatus("format")).toBe("format");
--- a/src/commands/models/list.probe.ts
+++ b/src/commands/models/list.probe.ts
@ -106,7 +106,7 @@ export function mapFailoverReasonToProbeStatus(reason?: string | null): AuthProb
    // surface in the auth bucket instead of showing as unknown.
    return "auth";
  }
-  if (reason === "rate_limit") {
+  if (reason === "rate_limit" || reason === "overloaded") {
    return "rate_limit";
  }
  if (reason === "billing") {
Author	SHA1	Message	Date
Altay	890dbf523f	fix(agents): tighten overload probe and backoff state	2026-03-06 15:47:23 +03:00
Altay	c7148f1a66	fix(agents): back off before overload failover	2026-03-06 15:36:03 +03:00
Altay	fc07dee37e	fix(agents): classify overloaded failures separately	2026-03-06 15:27:57 +03:00
Altay	89240e1226	fix(agents): note overload auth-profile fallback fix	2026-03-06 14:47:04 +03:00
Altay	d389977be4	fix(agents): skip auth-profile failure on overload	2026-03-06 14:16:33 +03:00