diff --git a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts index 8c0a0b1994d..13f3ea1d388 100644 --- a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts +++ b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts @@ -853,11 +853,72 @@ describe("classifyFailoverReason", () => { expect(classifyFailoverReason("key has been disabled")).toBe("auth_permanent"); expect(classifyFailoverReason("account has been deactivated")).toBe("auth_permanent"); }); - it("classifies JSON api_error internal server failures as timeout", () => { + it("classifies JSON api_error with transient signal as timeout", () => { expect( classifyFailoverReason( '{"type":"error","error":{"type":"api_error","message":"Internal server error"}}', ), ).toBe("timeout"); + // MiniMax non-standard message + expect( + classifyFailoverReason('{"type":"api_error","message":"unknown error, 520 (1000)"}'), + ).toBe("timeout"); + // Overloaded variant + expect( + classifyFailoverReason( + '{"type":"error","error":{"type":"api_error","message":"Service temporarily unavailable"}}', + ), + ).toBe("timeout"); + }); + it("does not classify non-transient api_error payloads as timeout", () => { + // Context overflow - not transient + expect( + classifyFailoverReason( + '{"type":"error","error":{"type":"api_error","message":"Request size exceeds model context window"}}', + ), + ).not.toBe("timeout"); + // Schema/validation error - not transient + expect( + classifyFailoverReason( + '{"type":"error","error":{"type":"api_error","message":"messages.1.content.1.tool_use.id should match pattern"}}', + ), + ).not.toBe("timeout"); + // Generic unknown api_error without transient wording - should not be retried + expect( + classifyFailoverReason( + '{"type":"error","error":{"type":"api_error","message":"invalid input format"}}', + ), + ).not.toBe("timeout"); + }); + it("does not shadow billing errors that carry api_error type", () => { + // A provider may wrap a billing error in a JSON payload with "type":"api_error". + // The billing classifier must win over the broad api_error transient match. + expect( + classifyFailoverReason( + '{"type":"error","error":{"type":"api_error","message":"insufficient credits"}}', + ), + ).toBe("billing"); + expect( + classifyFailoverReason( + '{"type":"error","error":{"type":"api_error","message":"Payment required"}}', + ), + ).toBe("billing"); + }); + it("does not shadow auth errors that carry api_error type", () => { + expect( + classifyFailoverReason( + '{"type":"error","error":{"type":"api_error","message":"invalid api key"}}', + ), + ).toBe("auth"); + expect( + classifyFailoverReason( + '{"type":"error","error":{"type":"api_error","message":"unauthorized"}}', + ), + ).toBe("auth"); + expect( + classifyFailoverReason( + '{"type":"error","error":{"type":"api_error","message":"permission_error"}}', + ), + ).toBe("auth_permanent"); }); }); diff --git a/src/agents/pi-embedded-helpers/errors.ts b/src/agents/pi-embedded-helpers/errors.ts index 7719ecb41a0..3019a184fcd 100644 --- a/src/agents/pi-embedded-helpers/errors.ts +++ b/src/agents/pi-embedded-helpers/errors.ts @@ -670,14 +670,34 @@ export function isBillingAssistantError(msg: AssistantMessage | undefined): bool return isBillingErrorMessage(msg.errorMessage ?? ""); } +// Transient signal patterns for api_error payloads. Only treat an api_error as +// retryable when the message text itself indicates a transient server issue. +// Non-transient api_error payloads (context overflow, validation/schema errors) +// must NOT be classified as timeout. +const API_ERROR_TRANSIENT_SIGNALS_RE = + /internal server error|overload|temporarily unavailable|service unavailable|unknown error|server error|bad gateway|gateway timeout|upstream error|backend error|try again later|temporarily.+unable/i; + function isJsonApiInternalServerError(raw: string): boolean { if (!raw) { return false; } const value = raw.toLowerCase(); - // Anthropic often wraps transient 500s in JSON payloads like: + // Providers wrap transient 5xx errors in JSON payloads like: // {"type":"error","error":{"type":"api_error","message":"Internal server error"}} - return value.includes('"type":"api_error"') && value.includes("internal server error"); + // Non-standard providers (e.g. MiniMax) may use different message text: + // {"type":"api_error","message":"unknown error, 520 (1000)"} + if (!value.includes('"type":"api_error"')) { + return false; + } + // Billing and auth errors can also carry "type":"api_error". Exclude them so + // the more specific classifiers further down the chain handle them correctly. + if (isBillingErrorMessage(raw) || isAuthErrorMessage(raw) || isAuthPermanentErrorMessage(raw)) { + return false; + } + // Only match when the message contains a transient signal. api_error payloads + // with non-transient messages (e.g. context overflow, schema validation) should + // fall through to more specific classifiers or remain unclassified. + return API_ERROR_TRANSIENT_SIGNALS_RE.test(raw); } export function parseImageDimensionError(raw: string): { @@ -830,24 +850,27 @@ export function classifyFailoverReason(raw: string): FailoverReason | null { // Treat remaining transient 5xx provider failures as retryable transport issues. return "timeout"; } - if (isJsonApiInternalServerError(raw)) { - return "timeout"; - } - if (isCloudCodeAssistFormatError(raw)) { - return "format"; - } + // Billing and auth classifiers run before the broad isJsonApiInternalServerError + // check so that provider errors like {"type":"api_error","message":"insufficient + // balance"} are correctly classified as "billing"/"auth" rather than "timeout". if (isBillingErrorMessage(raw)) { return "billing"; } - if (isTimeoutErrorMessage(raw)) { - return "timeout"; - } if (isAuthPermanentErrorMessage(raw)) { return "auth_permanent"; } if (isAuthErrorMessage(raw)) { return "auth"; } + if (isJsonApiInternalServerError(raw)) { + return "timeout"; + } + if (isCloudCodeAssistFormatError(raw)) { + return "format"; + } + if (isTimeoutErrorMessage(raw)) { + return "timeout"; + } return null; }