Compare commits

...

2 Commits

Author SHA1 Message Date
Val Alexander
b3b148bba1
fix: narrow 402 rate-limit matcher to prevent billing misclassification
The original implementation used isRateLimitErrorMessage(), which matches
phrases like 'quota exceeded' that legitimately appear in billing errors.

This commit replaces it with a narrow, 402-specific matcher that requires
BOTH retry language (try again/retry/temporary/cooldown) AND limit
terminology (usage limit/rate limit/organization usage).

Prevents misclassification of errors like:
'HTTP 402: exceeded quota, please add credits' -> billing (not rate_limit)

Added regression test for the ambiguous case.
2026-03-06 03:36:11 -06:00
Xinhua Gu
92263fadc0 fix(failover): classify HTTP 402 as rate_limit when payload indicates usage limit (#30484)
Some providers (notably Anthropic Claude Max plan) surface temporary
usage/rate-limit failures as HTTP 402 instead of 429. Before this change,
all 402s were unconditionally mapped to 'billing', which produced a
misleading 'run out of credits' warning for Max plan users who simply
hit their usage window.

This follows the same pattern introduced for HTTP 400 in #36783: check
the error message for an explicit rate-limit signal before falling back
to the default status-code classification.

- classifyFailoverReasonFromHttpStatus now returns 'rate_limit' for 402
  when isRateLimitErrorMessage matches the payload text
- Added regression tests covering both the rate-limit and billing paths
  on 402
2026-03-05 23:28:21 +01:00
2 changed files with 39 additions and 0 deletions

View File

@ -37,6 +37,27 @@ const GROQ_SERVICE_UNAVAILABLE_MESSAGE =
describe("failover-error", () => {
it("infers failover reason from HTTP status", () => {
expect(resolveFailoverReasonFromError({ status: 402 })).toBe("billing");
// Anthropic Claude Max plan surfaces rate limits as HTTP 402 (#30484)
expect(
resolveFailoverReasonFromError({
status: 402,
message: "HTTP 402: request reached organization usage limit, try again later",
}),
).toBe("rate_limit");
// Explicit billing messages on 402 stay classified as billing
expect(
resolveFailoverReasonFromError({
status: 402,
message: "insufficient credits — please top up your account",
}),
).toBe("billing");
// Ambiguous "quota exceeded" + billing signal → billing wins
expect(
resolveFailoverReasonFromError({
status: 402,
message: "HTTP 402: You have exceeded your current quota. Please add more credits.",
}),
).toBe("billing");
expect(resolveFailoverReasonFromError({ statusCode: "429" })).toBe("rate_limit");
expect(resolveFailoverReasonFromError({ status: 403 })).toBe("auth");
expect(resolveFailoverReasonFromError({ status: 408 })).toBe("timeout");

View File

@ -260,6 +260,24 @@ export function classifyFailoverReasonFromHttpStatus(
}
if (status === 402) {
// Some providers (e.g. Anthropic Claude Max plan) surface temporary
// usage/rate-limit failures as HTTP 402. Use a narrow matcher for
// temporary limits to avoid misclassifying billing failures (#30484).
if (message) {
const lower = message.toLowerCase();
// Temporary usage limit signals: retry language + usage/limit terminology
const hasTemporarySignal =
(lower.includes("try again") ||
lower.includes("retry") ||
lower.includes("temporary") ||
lower.includes("cooldown")) &&
(lower.includes("usage limit") ||
lower.includes("rate limit") ||
lower.includes("organization usage"));
if (hasTemporarySignal) {
return "rate_limit";
}
}
return "billing";
}
if (status === 429) {