refactor(agents): simplify 402 failover classifier

fix(agents): align raw and status-based 402 classification
agents: simplify 402 failover classifier
2026-03-08 02:00:11 +03:00 · 2026-03-08 02:00:11 +03:00 · 2026-03-08 02:00:10 +03:00 · 2026-03-08 02:00:10 +03:00 · 2026-03-08 01:59:55 +03:00
7 changed files with 334 additions and 26 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -299,6 +299,7 @@ Docs: https://docs.openclaw.ai
 - Agents/OpenAI-responses compatibility: strip unsupported `store` payload fields when `supportsStore=false` (including OpenAI-compatible non-OpenAI providers) while preserving server-compaction payload behavior. (#39219) Thanks @ademczuk.
 - Agents/model fallback visibility: warn when configured model IDs cannot be resolved and fallback is applied, with log-safe sanitization of model text to prevent control-sequence injection in warning output. (#39215) Thanks @ademczuk.
 - Outbound delivery replay safety: use two-phase delivery ACK markers (`.json` -> `.delivered` -> unlink) and startup marker cleanup so crash windows between send and cleanup do not replay already-delivered messages. (#38668) Thanks @Gundam98.
+- Agents/failover 402 recovery: keep temporary spend-limit `402` payloads retryable, preserve explicit insufficient-credit billing detection even in long provider payloads, and allow throttled billing-cooldown probes so single-provider setups can recover instead of staying locked out. (#38533) Thanks @xialonglee.

 ## 2026.3.2

--- a/src/agents/failover-error.test.ts
+++ b/src/agents/failover-error.test.ts
@ -18,6 +18,8 @@ const GEMINI_RESOURCE_EXHAUSTED_MESSAGE =
  "RESOURCE_EXHAUSTED: Resource has been exhausted (e.g. check quota).";
 // OpenRouter 402 billing example: https://openrouter.ai/docs/api-reference/errors
 const OPENROUTER_CREDITS_MESSAGE = "Payment Required: insufficient credits";
+const TOGETHER_MONTHLY_SPEND_CAP_MESSAGE =
+  "The account associated with this API key has reached its maximum allowed monthly spending limit.";
 // Issue-backed Anthropic/OpenAI-compatible insufficient_quota payload under HTTP 400:
 // https://github.com/openclaw/openclaw/issues/23440
 const INSUFFICIENT_QUOTA_PAYLOAD =
@ -182,6 +184,48 @@ describe("failover-error", () => {
    ).toBe("billing");
  });

+  it("keeps temporary 402 spend limits retryable without downgrading explicit billing", () => {
+    expect(
+      resolveFailoverReasonFromError({
+        status: 402,
+        message: "Monthly spend limit reached. Please visit your billing settings.",
+      }),
+    ).toBe("rate_limit");
+    expect(
+      resolveFailoverReasonFromError({
+        status: 402,
+        message: "Workspace spend limit reached. Contact your admin.",
+      }),
+    ).toBe("rate_limit");
+    expect(
+      resolveFailoverReasonFromError({
+        status: 402,
+        message: `${"x".repeat(520)} insufficient credits. Monthly spend limit reached.`,
+      }),
+    ).toBe("billing");
+    expect(
+      resolveFailoverReasonFromError({
+        status: 402,
+        message: TOGETHER_MONTHLY_SPEND_CAP_MESSAGE,
+      }),
+    ).toBe("billing");
+  });
+
+  it("keeps raw 402 wrappers aligned with status-split temporary spend limits", () => {
+    const message = "Monthly spend limit reached. Please visit your billing settings.";
+    expect(
+      resolveFailoverReasonFromError({
+        message: `402 Payment Required: ${message}`,
+      }),
+    ).toBe("rate_limit");
+    expect(
+      resolveFailoverReasonFromError({
+        status: 402,
+        message,
+      }),
+    ).toBe("rate_limit");
+  });
+
  it("infers format errors from error messages", () => {
    expect(
      resolveFailoverReasonFromError({
--- a/src/agents/model-fallback.probe.test.ts
+++ b/src/agents/model-fallback.probe.test.ts
@ -345,4 +345,105 @@ describe("runWithModelFallback – probe logic", () => {
      allowTransientCooldownProbe: true,
    });
  });
+
+  it("probes billing-cooldowned primary when no fallback candidates exist", async () => {
+    const cfg = makeCfg({
+      agents: {
+        defaults: {
+          model: {
+            primary: "openai/gpt-4.1-mini",
+            fallbacks: [],
+          },
+        },
+      },
+    } as Partial<OpenClawConfig>);
+
+    // Billing cooldown far from expiry — would normally be skipped
+    const expiresIn30Min = NOW + 30 * 60 * 1000;
+    mockedGetSoonestCooldownExpiry.mockReturnValue(expiresIn30Min);
+    mockedResolveProfilesUnavailableReason.mockReturnValue("billing");
+
+    const run = vi.fn().mockResolvedValue("billing-recovered");
+
+    const result = await runWithModelFallback({
+      cfg,
+      provider: "openai",
+      model: "gpt-4.1-mini",
+      fallbacksOverride: [],
+      run,
+    });
+
+    expect(result.result).toBe("billing-recovered");
+    expect(run).toHaveBeenCalledTimes(1);
+    expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini", {
+      allowTransientCooldownProbe: true,
+    });
+  });
+
+  it("throttles billing probe for single-candidate at 30s intervals", async () => {
+    const cfg = makeCfg({
+      agents: {
+        defaults: {
+          model: {
+            primary: "openai/gpt-4.1-mini",
+            fallbacks: [],
+          },
+        },
+      },
+    } as Partial<OpenClawConfig>);
+
+    mockedGetSoonestCooldownExpiry.mockReturnValue(NOW + 30 * 60 * 1000);
+    mockedResolveProfilesUnavailableReason.mockReturnValue("billing");
+
+    // Simulate a recent probe 10s ago
+    _probeThrottleInternals.lastProbeAttempt.set("openai", NOW - 10_000);
+
+    const run = vi.fn().mockResolvedValue("unreachable");
+
+    await expect(
+      runWithModelFallback({
+        cfg,
+        provider: "openai",
+        model: "gpt-4.1-mini",
+        fallbacksOverride: [],
+        run,
+      }),
+    ).rejects.toThrow("All models failed");
+
+    expect(run).not.toHaveBeenCalled();
+  });
+
+  it("probes billing-cooldowned primary with fallbacks when near cooldown expiry", async () => {
+    const cfg = makeCfg();
+    // Cooldown expires in 1 minute — within 2-min probe margin
+    const expiresIn1Min = NOW + 60 * 1000;
+    mockedGetSoonestCooldownExpiry.mockReturnValue(expiresIn1Min);
+    mockedResolveProfilesUnavailableReason.mockReturnValue("billing");
+
+    const run = vi.fn().mockResolvedValue("billing-probe-ok");
+
+    const result = await runPrimaryCandidate(cfg, run);
+
+    expect(result.result).toBe("billing-probe-ok");
+    expect(run).toHaveBeenCalledTimes(1);
+    expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini", {
+      allowTransientCooldownProbe: true,
+    });
+  });
+
+  it("skips billing-cooldowned primary with fallbacks when far from cooldown expiry", async () => {
+    const cfg = makeCfg();
+    const expiresIn30Min = NOW + 30 * 60 * 1000;
+    mockedGetSoonestCooldownExpiry.mockReturnValue(expiresIn30Min);
+    mockedResolveProfilesUnavailableReason.mockReturnValue("billing");
+
+    const run = vi.fn().mockResolvedValue("ok");
+
+    const result = await runPrimaryCandidate(cfg, run);
+
+    expect(result.result).toBe("ok");
+    expect(run).toHaveBeenCalledTimes(1);
+    expect(run).toHaveBeenCalledWith("anthropic", "claude-haiku-3-5");
+    expect(result.attempts[0]?.reason).toBe("billing");
+  });
 });
--- a/src/agents/model-fallback.ts
+++ b/src/agents/model-fallback.ts
@ -419,11 +419,30 @@ function resolveCooldownDecision(params: {
      profileIds: params.profileIds,
      now: params.now,
    }) ?? "rate_limit";
-  const isPersistentIssue =
-    inferredReason === "auth" ||
-    inferredReason === "auth_permanent" ||
-    inferredReason === "billing";
-  if (isPersistentIssue) {
+  const isPersistentAuthIssue = inferredReason === "auth" || inferredReason === "auth_permanent";
+  if (isPersistentAuthIssue) {
+    return {
+      type: "skip",
+      reason: inferredReason,
+      error: `Provider ${params.candidate.provider} has ${inferredReason} issue (skipping all models)`,
+    };
+  }
+
+  // Billing is semi-persistent: the user may fix their balance, or a transient
+  // 402 might have been misclassified. Without fallback candidates, skipping is
+  // guaranteed failure so we attempt (throttled). With fallbacks, probe the
+  // primary when the standard probe schedule allows.
+  if (inferredReason === "billing") {
+    if (params.isPrimary) {
+      if (!params.hasFallbackCandidates) {
+        const lastProbe = lastProbeAttempt.get(params.probeThrottleKey) ?? 0;
+        if (params.now - lastProbe >= MIN_PROBE_INTERVAL_MS) {
+          return { type: "attempt", reason: inferredReason, markProbe: true };
+        }
+      } else if (shouldProbe) {
+        return { type: "attempt", reason: inferredReason, markProbe: true };
+      }
+    }
    return {
      type: "skip",
      reason: inferredReason,
@ -518,7 +537,11 @@ export async function runWithModelFallback<T>(params: {
        if (decision.markProbe) {
          lastProbeAttempt.set(probeThrottleKey, now);
        }
-        if (decision.reason === "rate_limit" || decision.reason === "overloaded") {
+        if (
+          decision.reason === "rate_limit" ||
+          decision.reason === "overloaded" ||
+          decision.reason === "billing"
+        ) {
          runOptions = { allowTransientCooldownProbe: true };
        }
      }
--- a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts
+++ b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts
@ -1,6 +1,7 @@
 import { describe, expect, it } from "vitest";
 import {
  classifyFailoverReason,
+  classifyFailoverReasonFromHttpStatus,
  isAuthErrorMessage,
  isAuthPermanentErrorMessage,
  isBillingErrorMessage,
@ -505,6 +506,73 @@ describe("image dimension errors", () => {
  });
 });

+describe("classifyFailoverReasonFromHttpStatus – 402 temporary limits", () => {
+  it("reclassifies periodic usage limits as rate_limit", () => {
+    const samples = [
+      "Monthly spend limit reached.",
+      "Weekly usage limit exhausted.",
+      "Daily limit reached, resets tomorrow.",
+    ];
+    for (const sample of samples) {
+      expect(classifyFailoverReasonFromHttpStatus(402, sample)).toBe("rate_limit");
+    }
+  });
+
+  it("reclassifies org/workspace spend limits as rate_limit", () => {
+    const samples = [
+      "Organization spending limit exceeded.",
+      "Workspace spend limit reached.",
+      "Organization limit exceeded for this billing period.",
+    ];
+    for (const sample of samples) {
+      expect(classifyFailoverReasonFromHttpStatus(402, sample)).toBe("rate_limit");
+    }
+  });
+
+  it("keeps 402 as billing when explicit billing signals are present", () => {
+    expect(
+      classifyFailoverReasonFromHttpStatus(
+        402,
+        "Your credit balance is too low. Monthly limit exceeded.",
+      ),
+    ).toBe("billing");
+    expect(
+      classifyFailoverReasonFromHttpStatus(
+        402,
+        "Insufficient credits. Organization limit reached.",
+      ),
+    ).toBe("billing");
+    expect(
+      classifyFailoverReasonFromHttpStatus(
+        402,
+        "The account associated with this API key has reached its maximum allowed monthly spending limit.",
+      ),
+    ).toBe("billing");
+  });
+
+  it("keeps long 402 payloads with explicit billing text as billing", () => {
+    const longBillingPayload = `${"x".repeat(520)} insufficient credits. Monthly spend limit reached.`;
+    expect(classifyFailoverReasonFromHttpStatus(402, longBillingPayload)).toBe("billing");
+  });
+
+  it("keeps 402 as billing without message or with generic message", () => {
+    expect(classifyFailoverReasonFromHttpStatus(402, undefined)).toBe("billing");
+    expect(classifyFailoverReasonFromHttpStatus(402, "")).toBe("billing");
+    expect(classifyFailoverReasonFromHttpStatus(402, "Payment required")).toBe("billing");
+  });
+
+  it("matches raw 402 wrappers and status-split payloads for the same message", () => {
+    const transientMessage = "Monthly spend limit reached. Please visit your billing settings.";
+    expect(classifyFailoverReason(`402 Payment Required: ${transientMessage}`)).toBe("rate_limit");
+    expect(classifyFailoverReasonFromHttpStatus(402, transientMessage)).toBe("rate_limit");
+
+    const billingMessage =
+      "The account associated with this API key has reached its maximum allowed monthly spending limit.";
+    expect(classifyFailoverReason(`402 Payment Required: ${billingMessage}`)).toBe("billing");
+    expect(classifyFailoverReasonFromHttpStatus(402, billingMessage)).toBe("billing");
+  });
+});
+
 describe("classifyFailoverReason", () => {
  it("classifies documented provider error messages", () => {
    expect(classifyFailoverReason(OPENAI_RATE_LIMIT_MESSAGE)).toBe("rate_limit");
--- a/src/agents/pi-embedded-helpers/errors.ts
+++ b/src/agents/pi-embedded-helpers/errors.ts
@ -208,6 +208,89 @@ const HTTP_ERROR_HINTS = [
  "permission",
 ];

+type PaymentRequiredFailoverReason = Extract<FailoverReason, "billing" | "rate_limit">;
+
+const BILLING_402_HINTS = [
+  "insufficient credits",
+  "insufficient quota",
+  "credit balance",
+  "insufficient balance",
+  "plans & billing",
+  "add more credits",
+  "top up",
+] as const;
+
+const PERIODIC_402_HINTS = ["daily", "weekly", "monthly"] as const;
+const RETRYABLE_402_RETRY_HINTS = ["try again", "retry", "temporary", "cooldown"] as const;
+const RETRYABLE_402_LIMIT_HINTS = ["usage limit", "rate limit", "organization usage"] as const;
+const RETRYABLE_402_SCOPED_HINTS = ["organization", "workspace"] as const;
+const RETRYABLE_402_SCOPED_RESULT_HINTS = [
+  "billing period",
+  "exceeded",
+  "reached",
+  "exhausted",
+] as const;
+const RAW_402_MARKER_RE =
+  /["']?(?:status|code)["']?\s*[:=]\s*402\b|\bhttp\s*402\b|\berror(?:\s+code)?\s*[:=]?\s*402\b|\b(?:got|returned|received)\s+(?:a\s+)?402\b|^\s*402\s+payment required\b/i;
+const LEADING_402_WRAPPER_RE =
+  /^(?:error[:\s-]+)?(?:(?:http\s*)?402(?:\s+payment required)?|payment required)(?:[:\s-]+|$)/i;
+
+function includesAnyHint(text: string, hints: readonly string[]): boolean {
+  return hints.some((hint) => text.includes(hint));
+}
+
+function hasExplicit402BillingSignal(text: string): boolean {
+  return (
+    includesAnyHint(text, BILLING_402_HINTS) ||
+    text.includes("billing hard limit") ||
+    text.includes("hard limit reached") ||
+    (text.includes("maximum allowed") && text.includes("limit"))
+  );
+}
+
+function hasRetryable402TransientSignal(text: string): boolean {
+  const hasPeriodicHint = includesAnyHint(text, PERIODIC_402_HINTS);
+  const hasSpendLimit = text.includes("spend limit") || text.includes("spending limit");
+  const hasScopedHint = includesAnyHint(text, RETRYABLE_402_SCOPED_HINTS);
+  return (
+    (includesAnyHint(text, RETRYABLE_402_RETRY_HINTS) &&
+      includesAnyHint(text, RETRYABLE_402_LIMIT_HINTS)) ||
+    (hasPeriodicHint && (text.includes("usage limit") || hasSpendLimit)) ||
+    (hasPeriodicHint && text.includes("limit") && text.includes("reset")) ||
+    (hasScopedHint &&
+      text.includes("limit") &&
+      (hasSpendLimit || includesAnyHint(text, RETRYABLE_402_SCOPED_RESULT_HINTS)))
+  );
+}
+
+function normalize402Message(raw: string): string {
+  return raw.trim().toLowerCase().replace(LEADING_402_WRAPPER_RE, "").trim();
+}
+
+function classify402Message(message: string): PaymentRequiredFailoverReason {
+  const normalized = normalize402Message(message);
+  if (!normalized) {
+    return "billing";
+  }
+
+  if (hasExplicit402BillingSignal(normalized)) {
+    return "billing";
+  }
+
+  if (hasRetryable402TransientSignal(normalized)) {
+    return "rate_limit";
+  }
+
+  return "billing";
+}
+
+function classifyFailoverReasonFrom402Text(raw: string): PaymentRequiredFailoverReason | null {
+  if (!RAW_402_MARKER_RE.test(raw)) {
+    return null;
+  }
+  return classify402Message(raw);
+}
+
 function extractLeadingHttpStatus(raw: string): { code: number; rest: string } | null {
  const match = raw.match(HTTP_STATUS_CODE_PREFIX_RE);
  if (!match) {
@ -261,25 +344,7 @@ export function classifyFailoverReasonFromHttpStatus(
  }

  if (status === 402) {
-    // Some providers (e.g. Anthropic Claude Max plan) surface temporary
-    // usage/rate-limit failures as HTTP 402. Use a narrow matcher for
-    // temporary limits to avoid misclassifying billing failures (#30484).
-    if (message) {
-      const lower = message.toLowerCase();
-      // Temporary usage limit signals: retry language + usage/limit terminology
-      const hasTemporarySignal =
-        (lower.includes("try again") ||
-          lower.includes("retry") ||
-          lower.includes("temporary") ||
-          lower.includes("cooldown")) &&
-        (lower.includes("usage limit") ||
-          lower.includes("rate limit") ||
-          lower.includes("organization usage"));
-      if (hasTemporarySignal) {
-        return "rate_limit";
-      }
-    }
-    return "billing";
+    return message ? classify402Message(message) : "billing";
  }
  if (status === 429) {
    return "rate_limit";
@ -858,6 +923,10 @@ export function classifyFailoverReason(raw: string): FailoverReason | null {
  if (isModelNotFoundErrorMessage(raw)) {
    return "model_not_found";
  }
+  const reasonFrom402Text = classifyFailoverReasonFrom402Text(raw);
+  if (reasonFrom402Text) {
+    return reasonFrom402Text;
+  }
  if (isPeriodicUsageLimitErrorMessage(raw)) {
    return isBillingErrorMessage(raw) ? "billing" : "rate_limit";
  }
--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@ -668,7 +668,9 @@ export async function runEmbeddedPiAgent(
        const allowTransientCooldownProbe =
          params.allowTransientCooldownProbe === true &&
          allAutoProfilesInCooldown &&
-          (unavailableReason === "rate_limit" || unavailableReason === "overloaded");
+          (unavailableReason === "rate_limit" ||
+            unavailableReason === "overloaded" ||
+            unavailableReason === "billing");
        let didTransientCooldownProbe = false;

        while (profileIndex < profileCandidates.length) {
Author	SHA1	Message	Date
Altay	75960a8af1	refactor(agents): simplify 402 failover classifier	2026-03-08 02:00:11 +03:00
Altay	d71051aa5f	fix(agents): align raw and status-based 402 classification	2026-03-08 02:00:11 +03:00
Altay	060c00b7ce	agents: simplify 402 failover classifier	2026-03-08 02:00:10 +03:00
Altay	2e1d48f2be	agents: tighten 402 billing guard	2026-03-08 02:00:10 +03:00
xialonglee	ea1143a287	fix(agents): broaden 402 temporary-limit detection and allow billing cooldown probes - Broaden classifyFailoverReasonFromHttpStatus to treat periodic usage limits and org/workspace spend limits as rate_limit instead of billing - Treat billing as semi-persistent in model-fallback: allow probes when no fallbacks exist (30s throttle) or when fallbacks exist (near expiry) - Add tests for new 402 classification and billing probe behavior	2026-03-08 01:59:55 +03:00