Merge d762b2a92e41e406b788bb6f5191b770985c6878 into 5e417b44e1540f528d2ae63e3e20229a902d1db2

This commit is contained in:
Ayush Ojha 2026-03-20 19:38:35 -07:00 committed by GitHub
commit 2e8389c976
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 96 additions and 12 deletions

View File

@ -853,11 +853,72 @@ describe("classifyFailoverReason", () => {
expect(classifyFailoverReason("key has been disabled")).toBe("auth_permanent");
expect(classifyFailoverReason("account has been deactivated")).toBe("auth_permanent");
});
it("classifies JSON api_error internal server failures as timeout", () => {
it("classifies JSON api_error with transient signal as timeout", () => {
expect(
classifyFailoverReason(
'{"type":"error","error":{"type":"api_error","message":"Internal server error"}}',
),
).toBe("timeout");
// MiniMax non-standard message
expect(
classifyFailoverReason('{"type":"api_error","message":"unknown error, 520 (1000)"}'),
).toBe("timeout");
// Overloaded variant
expect(
classifyFailoverReason(
'{"type":"error","error":{"type":"api_error","message":"Service temporarily unavailable"}}',
),
).toBe("timeout");
});
it("does not classify non-transient api_error payloads as timeout", () => {
// Context overflow - not transient
expect(
classifyFailoverReason(
'{"type":"error","error":{"type":"api_error","message":"Request size exceeds model context window"}}',
),
).not.toBe("timeout");
// Schema/validation error - not transient
expect(
classifyFailoverReason(
'{"type":"error","error":{"type":"api_error","message":"messages.1.content.1.tool_use.id should match pattern"}}',
),
).not.toBe("timeout");
// Generic unknown api_error without transient wording - should not be retried
expect(
classifyFailoverReason(
'{"type":"error","error":{"type":"api_error","message":"invalid input format"}}',
),
).not.toBe("timeout");
});
it("does not shadow billing errors that carry api_error type", () => {
// A provider may wrap a billing error in a JSON payload with "type":"api_error".
// The billing classifier must win over the broad api_error transient match.
expect(
classifyFailoverReason(
'{"type":"error","error":{"type":"api_error","message":"insufficient credits"}}',
),
).toBe("billing");
expect(
classifyFailoverReason(
'{"type":"error","error":{"type":"api_error","message":"Payment required"}}',
),
).toBe("billing");
});
it("does not shadow auth errors that carry api_error type", () => {
expect(
classifyFailoverReason(
'{"type":"error","error":{"type":"api_error","message":"invalid api key"}}',
),
).toBe("auth");
expect(
classifyFailoverReason(
'{"type":"error","error":{"type":"api_error","message":"unauthorized"}}',
),
).toBe("auth");
expect(
classifyFailoverReason(
'{"type":"error","error":{"type":"api_error","message":"permission_error"}}',
),
).toBe("auth_permanent");
});
});

View File

@ -670,14 +670,34 @@ export function isBillingAssistantError(msg: AssistantMessage | undefined): bool
return isBillingErrorMessage(msg.errorMessage ?? "");
}
// Transient signal patterns for api_error payloads. Only treat an api_error as
// retryable when the message text itself indicates a transient server issue.
// Non-transient api_error payloads (context overflow, validation/schema errors)
// must NOT be classified as timeout.
const API_ERROR_TRANSIENT_SIGNALS_RE =
/internal server error|overload|temporarily unavailable|service unavailable|unknown error|server error|bad gateway|gateway timeout|upstream error|backend error|try again later|temporarily.+unable/i;
function isJsonApiInternalServerError(raw: string): boolean {
if (!raw) {
return false;
}
const value = raw.toLowerCase();
// Anthropic often wraps transient 500s in JSON payloads like:
// Providers wrap transient 5xx errors in JSON payloads like:
// {"type":"error","error":{"type":"api_error","message":"Internal server error"}}
return value.includes('"type":"api_error"') && value.includes("internal server error");
// Non-standard providers (e.g. MiniMax) may use different message text:
// {"type":"api_error","message":"unknown error, 520 (1000)"}
if (!value.includes('"type":"api_error"')) {
return false;
}
// Billing and auth errors can also carry "type":"api_error". Exclude them so
// the more specific classifiers further down the chain handle them correctly.
if (isBillingErrorMessage(raw) || isAuthErrorMessage(raw) || isAuthPermanentErrorMessage(raw)) {
return false;
}
// Only match when the message contains a transient signal. api_error payloads
// with non-transient messages (e.g. context overflow, schema validation) should
// fall through to more specific classifiers or remain unclassified.
return API_ERROR_TRANSIENT_SIGNALS_RE.test(raw);
}
export function parseImageDimensionError(raw: string): {
@ -830,24 +850,27 @@ export function classifyFailoverReason(raw: string): FailoverReason | null {
// Treat remaining transient 5xx provider failures as retryable transport issues.
return "timeout";
}
if (isJsonApiInternalServerError(raw)) {
return "timeout";
}
if (isCloudCodeAssistFormatError(raw)) {
return "format";
}
// Billing and auth classifiers run before the broad isJsonApiInternalServerError
// check so that provider errors like {"type":"api_error","message":"insufficient
// balance"} are correctly classified as "billing"/"auth" rather than "timeout".
if (isBillingErrorMessage(raw)) {
return "billing";
}
if (isTimeoutErrorMessage(raw)) {
return "timeout";
}
if (isAuthPermanentErrorMessage(raw)) {
return "auth_permanent";
}
if (isAuthErrorMessage(raw)) {
return "auth";
}
if (isJsonApiInternalServerError(raw)) {
return "timeout";
}
if (isCloudCodeAssistFormatError(raw)) {
return "format";
}
if (isTimeoutErrorMessage(raw)) {
return "timeout";
}
return null;
}