openclaw/src/agents/failover-error.test.ts

import { describe, expect, it } from "vitest";
import {
  coerceToFailoverError,
  describeFailoverError,
  isTimeoutError,
  resolveFailoverReasonFromError,
  resolveFailoverStatus,
} from "./failover-error.js";

describe("failover-error", () => {
  it("infers failover reason from HTTP status", () => {
    expect(resolveFailoverReasonFromError({ status: 402 })).toBe("billing");
    expect(resolveFailoverReasonFromError({ statusCode: "429" })).toBe("rate_limit");
    expect(resolveFailoverReasonFromError({ status: 403 })).toBe("auth");
    expect(resolveFailoverReasonFromError({ status: 408 })).toBe("timeout");
    expect(resolveFailoverReasonFromError({ status: 400 })).toBe("format");
    // Transient server errors (502/503/504) should trigger failover as timeout.
    expect(resolveFailoverReasonFromError({ status: 502 })).toBe("timeout");
    expect(resolveFailoverReasonFromError({ status: 503 })).toBe("timeout");
    expect(resolveFailoverReasonFromError({ status: 504 })).toBe("timeout");
  });

  it("infers format errors from error messages", () => {
    expect(
      resolveFailoverReasonFromError({
        message: "invalid request format: messages.1.content.1.tool_use.id",
      }),
    ).toBe("format");
  });

  it("infers timeout from common node error codes", () => {
    expect(resolveFailoverReasonFromError({ code: "ETIMEDOUT" })).toBe("timeout");
    expect(resolveFailoverReasonFromError({ code: "ECONNRESET" })).toBe("timeout");
  });

  it("infers timeout from abort stop-reason messages", () => {
    expect(resolveFailoverReasonFromError({ message: "Unhandled stop reason: abort" })).toBe(
      "timeout",
    );
    expect(resolveFailoverReasonFromError({ message: "stop reason: abort" })).toBe("timeout");
    expect(resolveFailoverReasonFromError({ message: "reason: abort" })).toBe("timeout");
  });

  it("treats AbortError reason=abort as timeout", () => {
    const err = Object.assign(new Error("aborted"), {
      name: "AbortError",
      reason: "reason: abort",
    });
    expect(isTimeoutError(err)).toBe(true);
  });

  it("coerces failover-worthy errors into FailoverError with metadata", () => {
    const err = coerceToFailoverError("credit balance too low", {
      provider: "anthropic",
      model: "claude-opus-4-5",
    });
    expect(err?.name).toBe("FailoverError");
    expect(err?.reason).toBe("billing");
    expect(err?.status).toBe(402);
    expect(err?.provider).toBe("anthropic");
    expect(err?.model).toBe("claude-opus-4-5");
  });

  it("coerces format errors with a 400 status", () => {
    const err = coerceToFailoverError("invalid request format", {
      provider: "google",
      model: "cloud-code-assist",
    });
    expect(err?.reason).toBe("format");
    expect(err?.status).toBe(400);
  });

  it("401/403 with generic message still returns auth (backward compat)", () => {
    expect(resolveFailoverReasonFromError({ status: 401, message: "Unauthorized" })).toBe("auth");
    expect(resolveFailoverReasonFromError({ status: 403, message: "Forbidden" })).toBe("auth");
  });

  it("401 with permanent auth message returns auth_permanent", () => {
    expect(resolveFailoverReasonFromError({ status: 401, message: "invalid_api_key" })).toBe(
      "auth_permanent",
    );
  });

  it("403 with revoked key message returns auth_permanent", () => {
    expect(resolveFailoverReasonFromError({ status: 403, message: "api key revoked" })).toBe(
      "auth_permanent",
    );
  });

  it("resolveFailoverStatus maps auth_permanent to 403", () => {
    expect(resolveFailoverStatus("auth_permanent")).toBe(403);
  });

  it("coerces permanent auth error with correct reason", () => {
    const err = coerceToFailoverError(
      { status: 401, message: "invalid_api_key" },
      { provider: "anthropic", model: "claude-opus-4-6" },
    );
    expect(err?.reason).toBe("auth_permanent");
    expect(err?.provider).toBe("anthropic");
  });

  it("describes non-Error values consistently", () => {
    const described = describeFailoverError(123);
    expect(described.message).toBe("123");
    expect(described.reason).toBeUndefined();
  });
});
refactor(agents): centralize failover normalization 2026-01-09 22:15:03 +01:00			`import { describe, expect, it } from "vitest";`
			`import {`
			`coerceToFailoverError,`
			`describeFailoverError,`
fix(failover): align abort timeout detection and regressions 2026-02-16 20:59:44 -05:00			`isTimeoutError,`
refactor(agents): centralize failover normalization 2026-01-09 22:15:03 +01:00			`resolveFailoverReasonFromError,`
fix(auth): distinguish revoked API keys from transient auth errors (#25754) Merged via /review-pr -> /prepare-pr -> /merge-pr. Prepared head SHA: 8f9c07a200644284e11adae76368adab40c5fa4e Co-authored-by: rrenamed <87486610+rrenamed@users.noreply.github.com> Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com> Reviewed-by: @gumadeiras 2026-02-26 02:47:16 +02:00			`resolveFailoverStatus,`
refactor(agents): centralize failover normalization 2026-01-09 22:15:03 +01:00			`} from "./failover-error.js";`

			`describe("failover-error", () => {`
			`it("infers failover reason from HTTP status", () => {`
			`expect(resolveFailoverReasonFromError({ status: 402 })).toBe("billing");`
chore: migrate to oxlint and oxfmt Co-authored-by: Christoph Nakazawa <christoph.pojer@gmail.com> 2026-01-14 14:31:43 +00:00			`expect(resolveFailoverReasonFromError({ statusCode: "429" })).toBe("rate_limit");`
refactor(agents): centralize failover normalization 2026-01-09 22:15:03 +01:00			`expect(resolveFailoverReasonFromError({ status: 403 })).toBe("auth");`
			`expect(resolveFailoverReasonFromError({ status: 408 })).toBe("timeout");`
fix: handle 400 status in failover to enable model fallback (#1879) 2026-02-09 09:12:06 +02:00			`expect(resolveFailoverReasonFromError({ status: 400 })).toBe("format");`
fix: treat HTTP 502/503/504 as failover-eligible (timeout reason) (#21017) * fix: treat HTTP 502/503/504 as failover-eligible (timeout reason) When a model API returns 502 Bad Gateway, 503 Service Unavailable, or 504 Gateway Timeout, the error object carries the status code directly. resolveFailoverReasonFromError() only checked 402/429/401/403/408/400, so 5xx server errors fell through to message-based classification which requires the status code to appear at the start of the error message. Many API SDKs (Google, Anthropic) set err.status = 503 without prefixing the message with '503', so the message classifier never matched and failover never triggered — the run retried the same broken model. Add 502/503/504 to the status-code branch, returning 'timeout' (matching the existing behavior of isTransientHttpError in the message classifier). Fixes #20999 * Changelog: add failover 502/503/504 note with credits * Failover: classify HTTP 504 as transient in message parser * Changelog: credit taw0002 and vincentkoc for failover fix --------- Co-authored-by: Vincent Koc <vincentkoc@ieee.org> 2026-02-23 01:01:57 -07:00			`// Transient server errors (502/503/504) should trigger failover as timeout.`
			`expect(resolveFailoverReasonFromError({ status: 502 })).toBe("timeout");`
fix: treat HTTP 503 as failover-eligible for LLM provider errors (#21086) * fix: treat HTTP 503 as failover-eligible for LLM provider errors When LLM SDKs wrap 503 responses, the leading "503" prefix is lost (e.g. Google Gemini returns "high demand" / "UNAVAILABLE" without a numeric prefix). The existing isTransientHttpError only matches messages starting with "503 ...", so these wrapped errors silently skip failover — no profile rotation, no model fallback. This patch closes that gap: - resolveFailoverReasonFromError: map HTTP status 503 → rate_limit (covers structured error objects with a status field) - ERROR_PATTERNS.overloaded: add /\b503\b/, "service unavailable", "high demand" (covers message-only classification when the leading status prefix is absent) Existing isTransientHttpError behavior is unchanged; these additions are complementary and only fire for errors that previously fell through unclassified. * fix: address review feedback — drop /\b503\b/ pattern, add test coverage - Remove `/\b503\b/` from ERROR_PATTERNS.overloaded to resolve the semantic inconsistency noted by reviewers: `isTransientHttpError` already handles messages prefixed with "503" (→ "timeout"), so a redundant overloaded pattern would classify the same class of errors differently depending on message formatting. - Keep "service unavailable" and "high demand" patterns — these are the real gap-fillers for SDK-rewritten messages that lack a numeric prefix. - Add test case for JSON-wrapped 503 error body containing "overloaded" to strengthen coverage. * fix: unify 503 classification — status 503 → timeout (consistent with isTransientHttpError) resolveFailoverReasonFromError previously mapped status 503 → "rate_limit", while the string-based isTransientHttpError mapped "503 ..." → "timeout". Align both paths: structured {status: 503} now also returns "timeout", matching the existing transient-error convention. Both reasons are failover-eligible, so runtime behavior is unchanged. --------- Co-authored-by: Vincent Koc <vincentkoc@ieee.org> 2026-02-20 04:45:09 +08:00			`expect(resolveFailoverReasonFromError({ status: 503 })).toBe("timeout");`
fix: treat HTTP 502/503/504 as failover-eligible (timeout reason) (#21017) * fix: treat HTTP 502/503/504 as failover-eligible (timeout reason) When a model API returns 502 Bad Gateway, 503 Service Unavailable, or 504 Gateway Timeout, the error object carries the status code directly. resolveFailoverReasonFromError() only checked 402/429/401/403/408/400, so 5xx server errors fell through to message-based classification which requires the status code to appear at the start of the error message. Many API SDKs (Google, Anthropic) set err.status = 503 without prefixing the message with '503', so the message classifier never matched and failover never triggered — the run retried the same broken model. Add 502/503/504 to the status-code branch, returning 'timeout' (matching the existing behavior of isTransientHttpError in the message classifier). Fixes #20999 * Changelog: add failover 502/503/504 note with credits * Failover: classify HTTP 504 as transient in message parser * Changelog: credit taw0002 and vincentkoc for failover fix --------- Co-authored-by: Vincent Koc <vincentkoc@ieee.org> 2026-02-23 01:01:57 -07:00			`expect(resolveFailoverReasonFromError({ status: 504 })).toBe("timeout");`
refactor(agents): centralize failover normalization 2026-01-09 22:15:03 +01:00			`});`

refactor: centralize failover error parsing 2026-01-10 01:25:01 +01:00			`it("infers format errors from error messages", () => {`
			`expect(`
			`resolveFailoverReasonFromError({`
			`message: "invalid request format: messages.1.content.1.tool_use.id",`
			`}),`
			`).toBe("format");`
			`});`

refactor(agents): centralize failover normalization 2026-01-09 22:15:03 +01:00			`it("infers timeout from common node error codes", () => {`
chore: migrate to oxlint and oxfmt Co-authored-by: Christoph Nakazawa <christoph.pojer@gmail.com> 2026-01-14 14:31:43 +00:00			`expect(resolveFailoverReasonFromError({ code: "ETIMEDOUT" })).toBe("timeout");`
			`expect(resolveFailoverReasonFromError({ code: "ECONNRESET" })).toBe("timeout");`
refactor(agents): centralize failover normalization 2026-01-09 22:15:03 +01:00			`});`

fix(failover): align abort timeout detection and regressions 2026-02-16 20:59:44 -05:00			`it("infers timeout from abort stop-reason messages", () => {`
			`expect(resolveFailoverReasonFromError({ message: "Unhandled stop reason: abort" })).toBe(`
			`"timeout",`
			`);`
			`expect(resolveFailoverReasonFromError({ message: "stop reason: abort" })).toBe("timeout");`
			`expect(resolveFailoverReasonFromError({ message: "reason: abort" })).toBe("timeout");`
			`});`

			`it("treats AbortError reason=abort as timeout", () => {`
			`const err = Object.assign(new Error("aborted"), {`
			`name: "AbortError",`
			`reason: "reason: abort",`
			`});`
			`expect(isTimeoutError(err)).toBe(true);`
			`});`

refactor(agents): centralize failover normalization 2026-01-09 22:15:03 +01:00			`it("coerces failover-worthy errors into FailoverError with metadata", () => {`
			`const err = coerceToFailoverError("credit balance too low", {`
			`provider: "anthropic",`
			`model: "claude-opus-4-5",`
			`});`
			`expect(err?.name).toBe("FailoverError");`
			`expect(err?.reason).toBe("billing");`
			`expect(err?.status).toBe(402);`
			`expect(err?.provider).toBe("anthropic");`
			`expect(err?.model).toBe("claude-opus-4-5");`
			`});`

refactor: centralize failover error parsing 2026-01-10 01:25:01 +01:00			`it("coerces format errors with a 400 status", () => {`
			`const err = coerceToFailoverError("invalid request format", {`
			`provider: "google",`
			`model: "cloud-code-assist",`
			`});`
			`expect(err?.reason).toBe("format");`
			`expect(err?.status).toBe(400);`
			`});`

fix(auth): distinguish revoked API keys from transient auth errors (#25754) Merged via /review-pr -> /prepare-pr -> /merge-pr. Prepared head SHA: 8f9c07a200644284e11adae76368adab40c5fa4e Co-authored-by: rrenamed <87486610+rrenamed@users.noreply.github.com> Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com> Reviewed-by: @gumadeiras 2026-02-26 02:47:16 +02:00			`it("401/403 with generic message still returns auth (backward compat)", () => {`
			`expect(resolveFailoverReasonFromError({ status: 401, message: "Unauthorized" })).toBe("auth");`
			`expect(resolveFailoverReasonFromError({ status: 403, message: "Forbidden" })).toBe("auth");`
			`});`

			`it("401 with permanent auth message returns auth_permanent", () => {`
			`expect(resolveFailoverReasonFromError({ status: 401, message: "invalid_api_key" })).toBe(`
			`"auth_permanent",`
			`);`
			`});`

			`it("403 with revoked key message returns auth_permanent", () => {`
			`expect(resolveFailoverReasonFromError({ status: 403, message: "api key revoked" })).toBe(`
			`"auth_permanent",`
			`);`
			`});`

			`it("resolveFailoverStatus maps auth_permanent to 403", () => {`
			`expect(resolveFailoverStatus("auth_permanent")).toBe(403);`
			`});`

			`it("coerces permanent auth error with correct reason", () => {`
			`const err = coerceToFailoverError(`
			`{ status: 401, message: "invalid_api_key" },`
			`{ provider: "anthropic", model: "claude-opus-4-6" },`
			`);`
			`expect(err?.reason).toBe("auth_permanent");`
			`expect(err?.provider).toBe("anthropic");`
			`});`

refactor(agents): centralize failover normalization 2026-01-09 22:15:03 +01:00			`it("describes non-Error values consistently", () => {`
			`const described = describeFailoverError(123);`
			`expect(described.message).toBe("123");`
			`expect(described.reason).toBeUndefined();`
			`});`
			`});`