diff --git a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts index 09262aa6769..13f3ea1d388 100644 --- a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts +++ b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts @@ -853,12 +853,42 @@ describe("classifyFailoverReason", () => { expect(classifyFailoverReason("key has been disabled")).toBe("auth_permanent"); expect(classifyFailoverReason("account has been deactivated")).toBe("auth_permanent"); }); - it("classifies JSON api_error internal server failures as timeout", () => { + it("classifies JSON api_error with transient signal as timeout", () => { expect( classifyFailoverReason( '{"type":"error","error":{"type":"api_error","message":"Internal server error"}}', ), ).toBe("timeout"); + // MiniMax non-standard message + expect( + classifyFailoverReason('{"type":"api_error","message":"unknown error, 520 (1000)"}'), + ).toBe("timeout"); + // Overloaded variant + expect( + classifyFailoverReason( + '{"type":"error","error":{"type":"api_error","message":"Service temporarily unavailable"}}', + ), + ).toBe("timeout"); + }); + it("does not classify non-transient api_error payloads as timeout", () => { + // Context overflow - not transient + expect( + classifyFailoverReason( + '{"type":"error","error":{"type":"api_error","message":"Request size exceeds model context window"}}', + ), + ).not.toBe("timeout"); + // Schema/validation error - not transient + expect( + classifyFailoverReason( + '{"type":"error","error":{"type":"api_error","message":"messages.1.content.1.tool_use.id should match pattern"}}', + ), + ).not.toBe("timeout"); + // Generic unknown api_error without transient wording - should not be retried + expect( + classifyFailoverReason( + '{"type":"error","error":{"type":"api_error","message":"invalid input format"}}', + ), + ).not.toBe("timeout"); }); it("does not shadow billing errors that carry api_error type", () => { // A provider may wrap a billing error in a JSON payload with "type":"api_error". diff --git a/src/agents/pi-embedded-helpers/errors.ts b/src/agents/pi-embedded-helpers/errors.ts index 10725427566..e978f8ea512 100644 --- a/src/agents/pi-embedded-helpers/errors.ts +++ b/src/agents/pi-embedded-helpers/errors.ts @@ -848,6 +848,13 @@ export function isBillingAssistantError(msg: AssistantMessage | undefined): bool return isBillingErrorMessage(msg.errorMessage ?? ""); } +// Transient signal patterns for api_error payloads. Only treat an api_error as +// retryable when the message text itself indicates a transient server issue. +// Non-transient api_error payloads (context overflow, validation/schema errors) +// must NOT be classified as timeout. +const API_ERROR_TRANSIENT_SIGNALS_RE = + /internal server error|overload|temporarily unavailable|service unavailable|unknown error|server error|bad gateway|gateway timeout|upstream error|backend error|try again later|temporarily.+unable/i; + function isJsonApiInternalServerError(raw: string): boolean { if (!raw) { return false; @@ -865,7 +872,10 @@ function isJsonApiInternalServerError(raw: string): boolean { if (isBillingErrorMessage(raw) || isAuthErrorMessage(raw) || isAuthPermanentErrorMessage(raw)) { return false; } - return true; + // Only match when the message contains a transient signal. api_error payloads + // with non-transient messages (e.g. context overflow, schema validation) should + // fall through to more specific classifiers or remain unclassified. + return API_ERROR_TRANSIENT_SIGNALS_RE.test(raw); } export function parseImageDimensionError(raw: string): {