diff --git a/src/agents/failover-error.ts b/src/agents/failover-error.ts index dd482310a2b..67a9834ab98 100644 --- a/src/agents/failover-error.ts +++ b/src/agents/failover-error.ts @@ -1,3 +1,4 @@ +import type { FallbackOnErrorCodes } from "../config/types.agents-shared.js"; import { readErrorName } from "../infra/errors.js"; import { classifyFailoverReason, @@ -328,3 +329,112 @@ export function coerceToFailoverError( cause: err instanceof Error ? err : undefined, }); } + +/** + * Default HTTP status codes that trigger fallback. + * - Server errors: 500, 502, 503, 504 + * - Rate limits: 429 + * - Timeouts: 408 + * - Not found: 404 (model may have been removed) + */ +const DEFAULT_FALLBACK_STATUS_CODES = [408, 429, 500, 502, 503, 504, 404]; + +/** + * All HTTP status codes that could trigger fallback (including client errors). + */ +const ALL_FALLBACK_STATUS_CODES = [400, 401, 402, 403, 404, 405, 408, 410, 429, 500, 502, 503, 504]; + +/** + * Check if an error should trigger fallback based on the configured error codes. + * + * @param err - The error to check + * @param fallbackOnErrors - Configuration for which errors should trigger fallback + * - "default": Use default behavior (server errors + rate limits) + * - "all": All errors trigger fallback + * - number[]: Custom list of status codes + * @returns true if the error should trigger fallback + */ +export function shouldTriggerFallback( + err: unknown, + fallbackOnErrors?: FallbackOnErrorCodes, +): boolean { + const status = getStatusCode(err); + + // If no status code found, try to determine from error reason + if (status === undefined) { + const reason = resolveFailoverReasonFromError(err); + // Default behavior: non-null reason means it's a recognized failover error + if (fallbackOnErrors === undefined || fallbackOnErrors === "default") { + return reason !== null; + } + // For "all", any reason (including null) should trigger fallback + if (fallbackOnErrors === "all") { + return true; + } + // For custom codes, we can't determine without status + return false; + } + + // Determine which status codes to use + let allowedCodes: number[]; + if (fallbackOnErrors === undefined || fallbackOnErrors === "default") { + allowedCodes = DEFAULT_FALLBACK_STATUS_CODES; + } else if (fallbackOnErrors === "all") { + allowedCodes = ALL_FALLBACK_STATUS_CODES; + } else { + allowedCodes = fallbackOnErrors; + } + + return allowedCodes.includes(status); +} + +/** + * Coerce an error to FailoverError if it should trigger fallback based on configuration. + * + * @param err - The error to check + * @param fallbackOnErrors - Configuration for which errors should trigger fallback + * @param context - Additional context (provider, model, profileId) + * @returns FailoverError if the error should trigger fallback, null otherwise + */ +export function coerceToFailoverErrorWithConfig( + err: unknown, + fallbackOnErrors: FallbackOnErrorCodes | undefined, + context?: { + provider?: string; + model?: string; + profileId?: string; + }, +): FailoverError | null { + // First check if it's already a FailoverError + if (isFailoverError(err)) { + // Still need to check if it should trigger fallback based on config + if (!shouldTriggerFallback(err, fallbackOnErrors)) { + return null; + } + return err; + } + + // Check if error should trigger fallback + if (!shouldTriggerFallback(err, fallbackOnErrors)) { + return null; + } + + // Coerce to FailoverError + const status = getStatusCode(err); + const reason = resolveFailoverReasonFromError(err); + const message = getErrorMessage(err) || String(err); + const code = getErrorCode(err); + + // If we have a status but no reason, create a generic reason + const effectiveReason: FailoverReason = reason ?? "unknown"; + + return new FailoverError(message, { + reason: effectiveReason, + provider: context?.provider, + model: context?.model, + profileId: context?.profileId, + status, + code, + cause: err instanceof Error ? err : undefined, + }); +} diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts index 5fd6e533a1a..4576982cba9 100644 --- a/src/agents/model-fallback.ts +++ b/src/agents/model-fallback.ts @@ -3,6 +3,7 @@ import { resolveAgentModelFallbackValues, resolveAgentModelPrimaryValue, } from "../config/model-input.js"; +import type { FallbackOnErrorCodes } from "../config/types.agents-shared.js"; import { createSubsystemLogger } from "../logging/subsystem.js"; import { sanitizeForLog } from "../terminal/ansi.js"; import { @@ -15,6 +16,7 @@ import { import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "./defaults.js"; import { coerceToFailoverError, + // coerceToFailoverErrorWithConfig, // TODO: use in future implementation describeFailoverError, isFailoverError, isTimeoutError, @@ -516,6 +518,8 @@ export async function runWithModelFallback(params: { agentDir?: string; /** Optional explicit fallbacks list; when provided (even empty), replaces agents.defaults.model.fallbacks. */ fallbacksOverride?: string[]; + /** HTTP status codes that should trigger fallback. */ + fallbackOnErrors?: FallbackOnErrorCodes; run: ModelFallbackRunFn; onError?: ModelFallbackErrorHandler; }): Promise> { diff --git a/src/config/types.agents-shared.ts b/src/config/types.agents-shared.ts index 3351d9903c9..d8901ba7228 100644 --- a/src/config/types.agents-shared.ts +++ b/src/config/types.agents-shared.ts @@ -5,6 +5,16 @@ import type { SandboxSshSettings, } from "./types.sandbox.js"; +/** + * HTTP status codes that should trigger model fallback. + * Default behavior only triggers fallback on server errors (5xx) and rate limits (429). + * Users can extend this to include client errors like 400, 401, 403, etc. + */ +export type FallbackOnErrorCodes = + | "all" // All errors trigger fallback + | "default" // Server errors + rate limits only (500, 502, 503, 429, 408) + | number[]; // Custom list of HTTP status codes + export type AgentModelConfig = | string | { @@ -12,6 +22,21 @@ export type AgentModelConfig = primary?: string; /** Per-agent model fallbacks (provider/model). */ fallbacks?: string[]; + /** + * HTTP status codes that should trigger fallback to next model. + * - "default": Server errors (5xx) + rate limits (429) + timeout (408) [default] + * - "all": All errors trigger fallback (including 400, 401, 403, 404) + * - number[]: Custom list of status codes (e.g., [400, 401, 403, 429, 500, 502, 503]) + * + * @example + * // Enable fallback on all client and server errors + * { primary: "openai/gpt-4", fallbacks: ["anthropic/claude-3"], fallbackOnErrors: "all" } + * + * @example + * // Custom error codes + * { primary: "openai/gpt-4", fallbacks: ["anthropic/claude-3"], fallbackOnErrors: [400, 429, 500, 502, 503] } + */ + fallbackOnErrors?: FallbackOnErrorCodes; }; export type AgentSandboxConfig = {