From 876566439e24236ea681d394512e03874d85f4de Mon Sep 17 00:00:00 2001 From: Ayush Ojha Date: Wed, 18 Mar 2026 01:18:32 -0700 Subject: [PATCH 1/3] fix: generalize api_error detection for fallback model triggering --- src/agents/pi-embedded-helpers/errors.ts | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/agents/pi-embedded-helpers/errors.ts b/src/agents/pi-embedded-helpers/errors.ts index 605cdd22118..ce03a8482ae 100644 --- a/src/agents/pi-embedded-helpers/errors.ts +++ b/src/agents/pi-embedded-helpers/errors.ts @@ -853,9 +853,12 @@ function isJsonApiInternalServerError(raw: string): boolean { return false; } const value = raw.toLowerCase(); - // Anthropic often wraps transient 500s in JSON payloads like: + // Providers wrap transient 5xx errors in JSON payloads like: // {"type":"error","error":{"type":"api_error","message":"Internal server error"}} - return value.includes('"type":"api_error"') && value.includes("internal server error"); + // Non-standard providers (e.g. MiniMax) may use different message text: + // {"type":"api_error","message":"unknown error, 520 (1000)"} + // Any api_error type indicates a provider-side failure regardless of message text. + return value.includes('"type":"api_error"'); } export function parseImageDimensionError(raw: string): { From 04755c2327dd465c08fec89d5124c3bf8bb0040e Mon Sep 17 00:00:00 2001 From: Ayush Ojha Date: Wed, 18 Mar 2026 01:31:33 -0700 Subject: [PATCH 2/3] fix: narrow api_error detection to exclude billing/auth patterns --- ...dded-helpers.isbillingerrormessage.test.ts | 31 ++++++++++++++++++ src/agents/pi-embedded-helpers/errors.ts | 32 ++++++++++++------- 2 files changed, 52 insertions(+), 11 deletions(-) diff --git a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts index 8c0a0b1994d..09262aa6769 100644 --- a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts +++ b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts @@ -860,4 +860,35 @@ describe("classifyFailoverReason", () => { ), ).toBe("timeout"); }); + it("does not shadow billing errors that carry api_error type", () => { + // A provider may wrap a billing error in a JSON payload with "type":"api_error". + // The billing classifier must win over the broad api_error transient match. + expect( + classifyFailoverReason( + '{"type":"error","error":{"type":"api_error","message":"insufficient credits"}}', + ), + ).toBe("billing"); + expect( + classifyFailoverReason( + '{"type":"error","error":{"type":"api_error","message":"Payment required"}}', + ), + ).toBe("billing"); + }); + it("does not shadow auth errors that carry api_error type", () => { + expect( + classifyFailoverReason( + '{"type":"error","error":{"type":"api_error","message":"invalid api key"}}', + ), + ).toBe("auth"); + expect( + classifyFailoverReason( + '{"type":"error","error":{"type":"api_error","message":"unauthorized"}}', + ), + ).toBe("auth"); + expect( + classifyFailoverReason( + '{"type":"error","error":{"type":"api_error","message":"permission_error"}}', + ), + ).toBe("auth_permanent"); + }); }); diff --git a/src/agents/pi-embedded-helpers/errors.ts b/src/agents/pi-embedded-helpers/errors.ts index ce03a8482ae..10725427566 100644 --- a/src/agents/pi-embedded-helpers/errors.ts +++ b/src/agents/pi-embedded-helpers/errors.ts @@ -857,8 +857,15 @@ function isJsonApiInternalServerError(raw: string): boolean { // {"type":"error","error":{"type":"api_error","message":"Internal server error"}} // Non-standard providers (e.g. MiniMax) may use different message text: // {"type":"api_error","message":"unknown error, 520 (1000)"} - // Any api_error type indicates a provider-side failure regardless of message text. - return value.includes('"type":"api_error"'); + if (!value.includes('"type":"api_error"')) { + return false; + } + // Billing and auth errors can also carry "type":"api_error". Exclude them so + // the more specific classifiers further down the chain handle them correctly. + if (isBillingErrorMessage(raw) || isAuthErrorMessage(raw) || isAuthPermanentErrorMessage(raw)) { + return false; + } + return true; } export function parseImageDimensionError(raw: string): { @@ -1011,24 +1018,27 @@ export function classifyFailoverReason(raw: string): FailoverReason | null { // Treat remaining transient 5xx provider failures as retryable transport issues. return "timeout"; } - if (isJsonApiInternalServerError(raw)) { - return "timeout"; - } - if (isCloudCodeAssistFormatError(raw)) { - return "format"; - } + // Billing and auth classifiers run before the broad isJsonApiInternalServerError + // check so that provider errors like {"type":"api_error","message":"insufficient + // balance"} are correctly classified as "billing"/"auth" rather than "timeout". if (isBillingErrorMessage(raw)) { return "billing"; } - if (isTimeoutErrorMessage(raw)) { - return "timeout"; - } if (isAuthPermanentErrorMessage(raw)) { return "auth_permanent"; } if (isAuthErrorMessage(raw)) { return "auth"; } + if (isJsonApiInternalServerError(raw)) { + return "timeout"; + } + if (isCloudCodeAssistFormatError(raw)) { + return "format"; + } + if (isTimeoutErrorMessage(raw)) { + return "timeout"; + } return null; } From d762b2a92e41e406b788bb6f5191b770985c6878 Mon Sep 17 00:00:00 2001 From: Ayush Ojha Date: Wed, 18 Mar 2026 15:25:15 -0700 Subject: [PATCH 3/3] fix: narrow api_error transient detection to require transient signal wording --- ...dded-helpers.isbillingerrormessage.test.ts | 32 ++++++++++++++++++- src/agents/pi-embedded-helpers/errors.ts | 12 ++++++- 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts index 09262aa6769..13f3ea1d388 100644 --- a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts +++ b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts @@ -853,12 +853,42 @@ describe("classifyFailoverReason", () => { expect(classifyFailoverReason("key has been disabled")).toBe("auth_permanent"); expect(classifyFailoverReason("account has been deactivated")).toBe("auth_permanent"); }); - it("classifies JSON api_error internal server failures as timeout", () => { + it("classifies JSON api_error with transient signal as timeout", () => { expect( classifyFailoverReason( '{"type":"error","error":{"type":"api_error","message":"Internal server error"}}', ), ).toBe("timeout"); + // MiniMax non-standard message + expect( + classifyFailoverReason('{"type":"api_error","message":"unknown error, 520 (1000)"}'), + ).toBe("timeout"); + // Overloaded variant + expect( + classifyFailoverReason( + '{"type":"error","error":{"type":"api_error","message":"Service temporarily unavailable"}}', + ), + ).toBe("timeout"); + }); + it("does not classify non-transient api_error payloads as timeout", () => { + // Context overflow - not transient + expect( + classifyFailoverReason( + '{"type":"error","error":{"type":"api_error","message":"Request size exceeds model context window"}}', + ), + ).not.toBe("timeout"); + // Schema/validation error - not transient + expect( + classifyFailoverReason( + '{"type":"error","error":{"type":"api_error","message":"messages.1.content.1.tool_use.id should match pattern"}}', + ), + ).not.toBe("timeout"); + // Generic unknown api_error without transient wording - should not be retried + expect( + classifyFailoverReason( + '{"type":"error","error":{"type":"api_error","message":"invalid input format"}}', + ), + ).not.toBe("timeout"); }); it("does not shadow billing errors that carry api_error type", () => { // A provider may wrap a billing error in a JSON payload with "type":"api_error". diff --git a/src/agents/pi-embedded-helpers/errors.ts b/src/agents/pi-embedded-helpers/errors.ts index 10725427566..e978f8ea512 100644 --- a/src/agents/pi-embedded-helpers/errors.ts +++ b/src/agents/pi-embedded-helpers/errors.ts @@ -848,6 +848,13 @@ export function isBillingAssistantError(msg: AssistantMessage | undefined): bool return isBillingErrorMessage(msg.errorMessage ?? ""); } +// Transient signal patterns for api_error payloads. Only treat an api_error as +// retryable when the message text itself indicates a transient server issue. +// Non-transient api_error payloads (context overflow, validation/schema errors) +// must NOT be classified as timeout. +const API_ERROR_TRANSIENT_SIGNALS_RE = + /internal server error|overload|temporarily unavailable|service unavailable|unknown error|server error|bad gateway|gateway timeout|upstream error|backend error|try again later|temporarily.+unable/i; + function isJsonApiInternalServerError(raw: string): boolean { if (!raw) { return false; @@ -865,7 +872,10 @@ function isJsonApiInternalServerError(raw: string): boolean { if (isBillingErrorMessage(raw) || isAuthErrorMessage(raw) || isAuthPermanentErrorMessage(raw)) { return false; } - return true; + // Only match when the message contains a transient signal. api_error payloads + // with non-transient messages (e.g. context overflow, schema validation) should + // fall through to more specific classifiers or remain unclassified. + return API_ERROR_TRANSIENT_SIGNALS_RE.test(raw); } export function parseImageDimensionError(raw: string): {