fix: narrow api_error transient detection to require transient signal wording

This commit is contained in:
Ayush Ojha 2026-03-18 15:25:15 -07:00
parent 04755c2327
commit d762b2a92e
2 changed files with 42 additions and 2 deletions

View File

@ -853,12 +853,42 @@ describe("classifyFailoverReason", () => {
expect(classifyFailoverReason("key has been disabled")).toBe("auth_permanent");
expect(classifyFailoverReason("account has been deactivated")).toBe("auth_permanent");
});
it("classifies JSON api_error internal server failures as timeout", () => {
it("classifies JSON api_error with transient signal as timeout", () => {
expect(
classifyFailoverReason(
'{"type":"error","error":{"type":"api_error","message":"Internal server error"}}',
),
).toBe("timeout");
// MiniMax non-standard message
expect(
classifyFailoverReason('{"type":"api_error","message":"unknown error, 520 (1000)"}'),
).toBe("timeout");
// Overloaded variant
expect(
classifyFailoverReason(
'{"type":"error","error":{"type":"api_error","message":"Service temporarily unavailable"}}',
),
).toBe("timeout");
});
it("does not classify non-transient api_error payloads as timeout", () => {
// Context overflow - not transient
expect(
classifyFailoverReason(
'{"type":"error","error":{"type":"api_error","message":"Request size exceeds model context window"}}',
),
).not.toBe("timeout");
// Schema/validation error - not transient
expect(
classifyFailoverReason(
'{"type":"error","error":{"type":"api_error","message":"messages.1.content.1.tool_use.id should match pattern"}}',
),
).not.toBe("timeout");
// Generic unknown api_error without transient wording - should not be retried
expect(
classifyFailoverReason(
'{"type":"error","error":{"type":"api_error","message":"invalid input format"}}',
),
).not.toBe("timeout");
});
it("does not shadow billing errors that carry api_error type", () => {
// A provider may wrap a billing error in a JSON payload with "type":"api_error".

View File

@ -848,6 +848,13 @@ export function isBillingAssistantError(msg: AssistantMessage | undefined): bool
return isBillingErrorMessage(msg.errorMessage ?? "");
}
// Transient signal patterns for api_error payloads. Only treat an api_error as
// retryable when the message text itself indicates a transient server issue.
// Non-transient api_error payloads (context overflow, validation/schema errors)
// must NOT be classified as timeout.
const API_ERROR_TRANSIENT_SIGNALS_RE =
/internal server error|overload|temporarily unavailable|service unavailable|unknown error|server error|bad gateway|gateway timeout|upstream error|backend error|try again later|temporarily.+unable/i;
function isJsonApiInternalServerError(raw: string): boolean {
if (!raw) {
return false;
@ -865,7 +872,10 @@ function isJsonApiInternalServerError(raw: string): boolean {
if (isBillingErrorMessage(raw) || isAuthErrorMessage(raw) || isAuthPermanentErrorMessage(raw)) {
return false;
}
return true;
// Only match when the message contains a transient signal. api_error payloads
// with non-transient messages (e.g. context overflow, schema validation) should
// fall through to more specific classifiers or remain unclassified.
return API_ERROR_TRANSIENT_SIGNALS_RE.test(raw);
}
export function parseImageDimensionError(raw: string): {