From 1cad2605ddcbeb6a4aa3d76f562b9a8df6731e58 Mon Sep 17 00:00:00 2001 From: subaochen Date: Sat, 21 Mar 2026 11:22:03 +0800 Subject: [PATCH] fix: wire up fallbackOnErrors parameter and fix edge cases - Wire fallbackOnErrors through runFallbackCandidate and runFallbackAttempt - Use coerceToFailoverErrorWithConfig when fallbackOnErrors is provided - Fix "all" mode to use status >= 400 instead of fixed list - Fix "all" mode to not trigger fallback for non-HTTP errors - Update comments to match actual default status codes (include 504, 404) - Add fallbackOnErrors to runWithImageModelFallback for consistency Addresses review feedback from Greptile: - Critical: fallbackOnErrors parameter is now actually used - Logic: "all" now correctly covers all 4xx/5xx status codes - Logic: "all" no longer triggers fallback for unrelated runtime errors - Style: Comments now match actual implementation --- src/agents/failover-error.ts | 35 ++++++++------------------ src/agents/model-fallback.ts | 42 +++++++++++++++++++++++-------- src/config/types.agents-shared.ts | 12 ++++----- 3 files changed, 48 insertions(+), 41 deletions(-) diff --git a/src/agents/failover-error.ts b/src/agents/failover-error.ts index 67a9834ab98..ce6f3a3729c 100644 --- a/src/agents/failover-error.ts +++ b/src/agents/failover-error.ts @@ -339,18 +339,13 @@ export function coerceToFailoverError( */ const DEFAULT_FALLBACK_STATUS_CODES = [408, 429, 500, 502, 503, 504, 404]; -/** - * All HTTP status codes that could trigger fallback (including client errors). - */ -const ALL_FALLBACK_STATUS_CODES = [400, 401, 402, 403, 404, 405, 408, 410, 429, 500, 502, 503, 504]; - /** * Check if an error should trigger fallback based on the configured error codes. * * @param err - The error to check * @param fallbackOnErrors - Configuration for which errors should trigger fallback - * - "default": Use default behavior (server errors + rate limits) - * - "all": All errors trigger fallback + * - "default": Use default behavior (server errors + rate limits + timeout + not found) + * - "all": All HTTP errors (4xx and 5xx) trigger fallback * - number[]: Custom list of status codes * @returns true if the error should trigger fallback */ @@ -363,29 +358,21 @@ export function shouldTriggerFallback( // If no status code found, try to determine from error reason if (status === undefined) { const reason = resolveFailoverReasonFromError(err); - // Default behavior: non-null reason means it's a recognized failover error - if (fallbackOnErrors === undefined || fallbackOnErrors === "default") { - return reason !== null; - } - // For "all", any reason (including null) should trigger fallback - if (fallbackOnErrors === "all") { - return true; - } - // For custom codes, we can't determine without status - return false; + // For any mode, only trigger fallback if we have a recognized failover reason + // This prevents unrelated runtime errors (parse errors, filesystem errors) from triggering fallback + return reason !== null; } - // Determine which status codes to use - let allowedCodes: number[]; + // Determine if status code should trigger fallback if (fallbackOnErrors === undefined || fallbackOnErrors === "default") { - allowedCodes = DEFAULT_FALLBACK_STATUS_CODES; + return DEFAULT_FALLBACK_STATUS_CODES.includes(status); } else if (fallbackOnErrors === "all") { - allowedCodes = ALL_FALLBACK_STATUS_CODES; + // "all" means all HTTP errors (4xx and 5xx) + return status >= 400; } else { - allowedCodes = fallbackOnErrors; + // Custom list of status codes + return fallbackOnErrors.includes(status); } - - return allowedCodes.includes(status); } /** diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts index 4576982cba9..63225e6a2f9 100644 --- a/src/agents/model-fallback.ts +++ b/src/agents/model-fallback.ts @@ -16,7 +16,7 @@ import { import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "./defaults.js"; import { coerceToFailoverError, - // coerceToFailoverErrorWithConfig, // TODO: use in future implementation + coerceToFailoverErrorWithConfig, describeFailoverError, isFailoverError, isTimeoutError, @@ -132,6 +132,7 @@ async function runFallbackCandidate(params: { provider: string; model: string; options?: ModelFallbackRunOptions; + fallbackOnErrors?: FallbackOnErrorCodes; }): Promise<{ ok: true; result: T } | { ok: false; error: unknown }> { try { const result = params.options @@ -144,10 +145,16 @@ async function runFallbackCandidate(params: { } catch (err) { // Normalize abort-wrapped rate-limit errors (e.g. Google Vertex RESOURCE_EXHAUSTED) // so they become FailoverErrors and continue the fallback loop instead of aborting. - const normalizedFailover = coerceToFailoverError(err, { - provider: params.provider, - model: params.model, - }); + // Use config-aware error coercion if fallbackOnErrors is provided. + const normalizedFailover = params.fallbackOnErrors + ? coerceToFailoverErrorWithConfig(err, params.fallbackOnErrors, { + provider: params.provider, + model: params.model, + }) + : coerceToFailoverError(err, { + provider: params.provider, + model: params.model, + }); if (shouldRethrowAbort(err) && !normalizedFailover) { throw err; } @@ -161,12 +168,14 @@ async function runFallbackAttempt(params: { model: string; attempts: FallbackAttempt[]; options?: ModelFallbackRunOptions; + fallbackOnErrors?: FallbackOnErrorCodes; }): Promise<{ success: ModelFallbackRunResult } | { error: unknown }> { const runResult = await runFallbackCandidate({ run: params.run, provider: params.provider, model: params.model, options: params.options, + fallbackOnErrors: params.fallbackOnErrors, }); if (runResult.ok) { return { @@ -667,6 +676,7 @@ export async function runWithModelFallback(params: { ...candidate, attempts, options: runOptions, + fallbackOnErrors: params.fallbackOnErrors, }); if ("success" in attemptRun) { if (i > 0 || attempts.length > 0 || attemptedDuringCooldown) { @@ -715,11 +725,15 @@ export async function runWithModelFallback(params: { if (isLikelyContextOverflowError(errMessage)) { throw err; } - const normalized = - coerceToFailoverError(err, { - provider: candidate.provider, - model: candidate.model, - }) ?? err; + const normalized = params.fallbackOnErrors + ? coerceToFailoverErrorWithConfig(err, params.fallbackOnErrors, { + provider: candidate.provider, + model: candidate.model, + }) ?? err + : coerceToFailoverError(err, { + provider: candidate.provider, + model: candidate.model, + }) ?? err; // Even unrecognized errors should not abort the fallback loop when // there are remaining candidates. Only abort/context-overflow errors @@ -783,6 +797,7 @@ export async function runWithImageModelFallback(params: { modelOverride?: string; run: (provider: string, model: string) => Promise; onError?: ModelFallbackErrorHandler; + fallbackOnErrors?: FallbackOnErrorCodes; }): Promise> { const candidates = resolveImageFallbackCandidates({ cfg: params.cfg, @@ -800,7 +815,12 @@ export async function runWithImageModelFallback(params: { for (let i = 0; i < candidates.length; i += 1) { const candidate = candidates[i]; - const attemptRun = await runFallbackAttempt({ run: params.run, ...candidate, attempts }); + const attemptRun = await runFallbackAttempt({ + run: params.run, + ...candidate, + attempts, + fallbackOnErrors: params.fallbackOnErrors, + }); if ("success" in attemptRun) { return attemptRun.success; } diff --git a/src/config/types.agents-shared.ts b/src/config/types.agents-shared.ts index d8901ba7228..73dca0d6360 100644 --- a/src/config/types.agents-shared.ts +++ b/src/config/types.agents-shared.ts @@ -7,12 +7,12 @@ import type { /** * HTTP status codes that should trigger model fallback. - * Default behavior only triggers fallback on server errors (5xx) and rate limits (429). - * Users can extend this to include client errors like 400, 401, 403, etc. + * Default behavior triggers fallback on server errors, rate limits, timeouts, and not-found errors. + * Users can extend this to include all client errors with "all" or specify custom codes. */ export type FallbackOnErrorCodes = - | "all" // All errors trigger fallback - | "default" // Server errors + rate limits only (500, 502, 503, 429, 408) + | "all" // All HTTP errors (4xx and 5xx) trigger fallback + | "default" // Server errors (500, 502, 503, 504) + rate limits (429) + timeout (408) + not found (404) | number[]; // Custom list of HTTP status codes export type AgentModelConfig = @@ -24,8 +24,8 @@ export type AgentModelConfig = fallbacks?: string[]; /** * HTTP status codes that should trigger fallback to next model. - * - "default": Server errors (5xx) + rate limits (429) + timeout (408) [default] - * - "all": All errors trigger fallback (including 400, 401, 403, 404) + * - "default": Server errors (500, 502, 503, 504) + rate limits (429) + timeout (408) + not found (404) [default] + * - "all": All HTTP errors (4xx and 5xx) trigger fallback * - number[]: Custom list of status codes (e.g., [400, 401, 403, 429, 500, 502, 503]) * * @example