2026-01-09 22:15:03 +01:00
|
|
|
import { describe, expect, it } from "vitest";
|
|
|
|
|
import {
|
|
|
|
|
coerceToFailoverError,
|
|
|
|
|
describeFailoverError,
|
2026-02-16 20:59:44 -05:00
|
|
|
isTimeoutError,
|
2026-01-09 22:15:03 +01:00
|
|
|
resolveFailoverReasonFromError,
|
2026-02-26 02:47:16 +02:00
|
|
|
resolveFailoverStatus,
|
2026-01-09 22:15:03 +01:00
|
|
|
} from "./failover-error.js";
|
|
|
|
|
|
|
|
|
|
describe("failover-error", () => {
|
|
|
|
|
it("infers failover reason from HTTP status", () => {
|
|
|
|
|
expect(resolveFailoverReasonFromError({ status: 402 })).toBe("billing");
|
2026-01-14 14:31:43 +00:00
|
|
|
expect(resolveFailoverReasonFromError({ statusCode: "429" })).toBe("rate_limit");
|
2026-01-09 22:15:03 +01:00
|
|
|
expect(resolveFailoverReasonFromError({ status: 403 })).toBe("auth");
|
|
|
|
|
expect(resolveFailoverReasonFromError({ status: 408 })).toBe("timeout");
|
2026-02-09 09:12:06 +02:00
|
|
|
expect(resolveFailoverReasonFromError({ status: 400 })).toBe("format");
|
2026-02-23 01:01:57 -07:00
|
|
|
// Transient server errors (502/503/504) should trigger failover as timeout.
|
|
|
|
|
expect(resolveFailoverReasonFromError({ status: 502 })).toBe("timeout");
|
fix: treat HTTP 503 as failover-eligible for LLM provider errors (#21086)
* fix: treat HTTP 503 as failover-eligible for LLM provider errors
When LLM SDKs wrap 503 responses, the leading "503" prefix is lost
(e.g. Google Gemini returns "high demand" / "UNAVAILABLE" without a
numeric prefix). The existing isTransientHttpError only matches
messages starting with "503 ...", so these wrapped errors silently
skip failover — no profile rotation, no model fallback.
This patch closes that gap:
- resolveFailoverReasonFromError: map HTTP status 503 → rate_limit
(covers structured error objects with a status field)
- ERROR_PATTERNS.overloaded: add /\b503\b/, "service unavailable",
"high demand" (covers message-only classification when the leading
status prefix is absent)
Existing isTransientHttpError behavior is unchanged; these additions
are complementary and only fire for errors that previously fell
through unclassified.
* fix: address review feedback — drop /\b503\b/ pattern, add test coverage
- Remove `/\b503\b/` from ERROR_PATTERNS.overloaded to resolve the
semantic inconsistency noted by reviewers: `isTransientHttpError`
already handles messages prefixed with "503" (→ "timeout"), so a
redundant overloaded pattern would classify the same class of errors
differently depending on message formatting.
- Keep "service unavailable" and "high demand" patterns — these are the
real gap-fillers for SDK-rewritten messages that lack a numeric prefix.
- Add test case for JSON-wrapped 503 error body containing "overloaded"
to strengthen coverage.
* fix: unify 503 classification — status 503 → timeout (consistent with isTransientHttpError)
resolveFailoverReasonFromError previously mapped status 503 → "rate_limit",
while the string-based isTransientHttpError mapped "503 ..." → "timeout".
Align both paths: structured {status: 503} now also returns "timeout",
matching the existing transient-error convention. Both reasons are
failover-eligible, so runtime behavior is unchanged.
---------
Co-authored-by: Vincent Koc <vincentkoc@ieee.org>
2026-02-20 04:45:09 +08:00
|
|
|
expect(resolveFailoverReasonFromError({ status: 503 })).toBe("timeout");
|
2026-02-23 01:01:57 -07:00
|
|
|
expect(resolveFailoverReasonFromError({ status: 504 })).toBe("timeout");
|
2026-01-09 22:15:03 +01:00
|
|
|
});
|
|
|
|
|
|
2026-01-10 01:25:01 +01:00
|
|
|
it("infers format errors from error messages", () => {
|
|
|
|
|
expect(
|
|
|
|
|
resolveFailoverReasonFromError({
|
|
|
|
|
message: "invalid request format: messages.1.content.1.tool_use.id",
|
|
|
|
|
}),
|
|
|
|
|
).toBe("format");
|
|
|
|
|
});
|
|
|
|
|
|
2026-01-09 22:15:03 +01:00
|
|
|
it("infers timeout from common node error codes", () => {
|
2026-01-14 14:31:43 +00:00
|
|
|
expect(resolveFailoverReasonFromError({ code: "ETIMEDOUT" })).toBe("timeout");
|
|
|
|
|
expect(resolveFailoverReasonFromError({ code: "ECONNRESET" })).toBe("timeout");
|
2026-01-09 22:15:03 +01:00
|
|
|
});
|
|
|
|
|
|
2026-02-16 20:59:44 -05:00
|
|
|
it("infers timeout from abort stop-reason messages", () => {
|
|
|
|
|
expect(resolveFailoverReasonFromError({ message: "Unhandled stop reason: abort" })).toBe(
|
|
|
|
|
"timeout",
|
|
|
|
|
);
|
|
|
|
|
expect(resolveFailoverReasonFromError({ message: "stop reason: abort" })).toBe("timeout");
|
|
|
|
|
expect(resolveFailoverReasonFromError({ message: "reason: abort" })).toBe("timeout");
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it("treats AbortError reason=abort as timeout", () => {
|
|
|
|
|
const err = Object.assign(new Error("aborted"), {
|
|
|
|
|
name: "AbortError",
|
|
|
|
|
reason: "reason: abort",
|
|
|
|
|
});
|
|
|
|
|
expect(isTimeoutError(err)).toBe(true);
|
|
|
|
|
});
|
|
|
|
|
|
2026-01-09 22:15:03 +01:00
|
|
|
it("coerces failover-worthy errors into FailoverError with metadata", () => {
|
|
|
|
|
const err = coerceToFailoverError("credit balance too low", {
|
|
|
|
|
provider: "anthropic",
|
|
|
|
|
model: "claude-opus-4-5",
|
|
|
|
|
});
|
|
|
|
|
expect(err?.name).toBe("FailoverError");
|
|
|
|
|
expect(err?.reason).toBe("billing");
|
|
|
|
|
expect(err?.status).toBe(402);
|
|
|
|
|
expect(err?.provider).toBe("anthropic");
|
|
|
|
|
expect(err?.model).toBe("claude-opus-4-5");
|
|
|
|
|
});
|
|
|
|
|
|
2026-01-10 01:25:01 +01:00
|
|
|
it("coerces format errors with a 400 status", () => {
|
|
|
|
|
const err = coerceToFailoverError("invalid request format", {
|
|
|
|
|
provider: "google",
|
|
|
|
|
model: "cloud-code-assist",
|
|
|
|
|
});
|
|
|
|
|
expect(err?.reason).toBe("format");
|
|
|
|
|
expect(err?.status).toBe(400);
|
|
|
|
|
});
|
|
|
|
|
|
2026-02-26 02:47:16 +02:00
|
|
|
it("401/403 with generic message still returns auth (backward compat)", () => {
|
|
|
|
|
expect(resolveFailoverReasonFromError({ status: 401, message: "Unauthorized" })).toBe("auth");
|
|
|
|
|
expect(resolveFailoverReasonFromError({ status: 403, message: "Forbidden" })).toBe("auth");
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it("401 with permanent auth message returns auth_permanent", () => {
|
|
|
|
|
expect(resolveFailoverReasonFromError({ status: 401, message: "invalid_api_key" })).toBe(
|
|
|
|
|
"auth_permanent",
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it("403 with revoked key message returns auth_permanent", () => {
|
|
|
|
|
expect(resolveFailoverReasonFromError({ status: 403, message: "api key revoked" })).toBe(
|
|
|
|
|
"auth_permanent",
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it("resolveFailoverStatus maps auth_permanent to 403", () => {
|
|
|
|
|
expect(resolveFailoverStatus("auth_permanent")).toBe(403);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it("coerces permanent auth error with correct reason", () => {
|
|
|
|
|
const err = coerceToFailoverError(
|
|
|
|
|
{ status: 401, message: "invalid_api_key" },
|
|
|
|
|
{ provider: "anthropic", model: "claude-opus-4-6" },
|
|
|
|
|
);
|
|
|
|
|
expect(err?.reason).toBe("auth_permanent");
|
|
|
|
|
expect(err?.provider).toBe("anthropic");
|
|
|
|
|
});
|
|
|
|
|
|
2026-01-09 22:15:03 +01:00
|
|
|
it("describes non-Error values consistently", () => {
|
|
|
|
|
const described = describeFailoverError(123);
|
|
|
|
|
expect(described.message).toBe("123");
|
|
|
|
|
expect(described.reason).toBeUndefined();
|
|
|
|
|
});
|
|
|
|
|
});
|