diff --git a/src/agents/agent-command.ts b/src/agents/agent-command.ts index 5db40b13a27..da2b3f9eac6 100644 --- a/src/agents/agent-command.ts +++ b/src/agents/agent-command.ts @@ -57,6 +57,7 @@ import { listAgentIds, resolveAgentDir, resolveEffectiveModelFallbacks, + resolveRunModelFallbackOnErrors, resolveSessionAgentId, resolveAgentSkillsFilter, resolveAgentWorkspaceDir, @@ -1177,6 +1178,11 @@ async function agentCommandInternal( runId, agentDir, fallbacksOverride: effectiveFallbacksOverride, + fallbackOnErrors: resolveRunModelFallbackOnErrors({ + cfg, + agentId: sessionAgentId, + sessionKey, + }), run: (providerOverride, modelOverride, runOptions) => { const isFallbackRetry = fallbackAttemptIndex > 0; fallbackAttemptIndex += 1; diff --git a/src/agents/agent-scope.ts b/src/agents/agent-scope.ts index 5425b033dca..6af1cada008 100644 --- a/src/agents/agent-scope.ts +++ b/src/agents/agent-scope.ts @@ -1,8 +1,12 @@ import fs from "node:fs"; import path from "node:path"; import type { OpenClawConfig } from "../config/config.js"; -import { resolveAgentModelFallbackValues } from "../config/model-input.js"; +import { + resolveAgentModelFallbackOnErrors, + resolveAgentModelFallbackValues, +} from "../config/model-input.js"; import { resolveStateDir } from "../config/paths.js"; +import type { FallbackOnErrorCodes } from "../config/types.agents-shared.js"; import { createSubsystemLogger } from "../logging/subsystem.js"; import { DEFAULT_AGENT_ID, @@ -230,6 +234,29 @@ export function resolveRunModelFallbacksOverride(params: { ); } +export function resolveAgentModelFallbackOnErrorsOverride( + cfg: OpenClawConfig, + agentId: string, +): FallbackOnErrorCodes | undefined { + const raw = resolveAgentConfig(cfg, agentId)?.model; + return resolveAgentModelFallbackOnErrors(raw); +} + +export function resolveRunModelFallbackOnErrors(params: { + cfg: OpenClawConfig | undefined; + agentId?: string | null; + sessionKey?: string | null; +}): FallbackOnErrorCodes | undefined { + if (!params.cfg) { + return undefined; + } + const raw = resolveAgentConfig( + params.cfg, + resolveFallbackAgentId({ agentId: params.agentId, sessionKey: params.sessionKey }), + )?.model; + return resolveAgentModelFallbackOnErrors(raw); +} + export function hasConfiguredModelFallbacks(params: { cfg: OpenClawConfig | undefined; agentId?: string | null; diff --git a/src/agents/failover-error.ts b/src/agents/failover-error.ts index dd482310a2b..acf34bf3183 100644 --- a/src/agents/failover-error.ts +++ b/src/agents/failover-error.ts @@ -1,3 +1,4 @@ +import type { FallbackOnErrorCodes } from "../config/types.agents-shared.js"; import { readErrorName } from "../infra/errors.js"; import { classifyFailoverReason, @@ -328,3 +329,91 @@ export function coerceToFailoverError( cause: err instanceof Error ? err : undefined, }); } + +/** + * Check if an error should trigger fallback based on the configured error codes. + * + * For "default" or undefined, this delegates to resolveFailoverReasonFromError() which + * matches the original behavior (no regression). + * + * @param err - The error to check + * @param fallbackOnErrors - Configuration for which errors should trigger fallback + * - "default": Use original behavior (same as no config) - any recognized failover reason + * - "all": All HTTP errors (4xx and 5xx) trigger fallback + * - number[]: Custom list of status codes + * @returns true if the error should trigger fallback + */ +export function shouldTriggerFallback( + err: unknown, + fallbackOnErrors?: FallbackOnErrorCodes, +): boolean { + const status = getStatusCode(err); + const reason = resolveFailoverReasonFromError(err); + + // For "default" or undefined, match original behavior exactly + // This delegates to the existing reason classification logic + if (fallbackOnErrors === undefined || fallbackOnErrors === "default") { + return reason !== null; + } + + // For "all", check if HTTP error (4xx or 5xx) + // Also allow non-HTTP errors with recognized reasons + if (fallbackOnErrors === "all") { + return status !== undefined ? status >= 400 : reason !== null; + } + + // For custom array, check specific status codes only + // Ignore non-HTTP errors even if they have a recognized reason + return status !== undefined && new Set(fallbackOnErrors).has(status); +} + +/** + * Coerce an error to FailoverError if it should trigger fallback based on configuration. + * + * @param err - The error to check + * @param fallbackOnErrors - Configuration for which errors should trigger fallback + * @param context - Additional context (provider, model, profileId) + * @returns FailoverError if the error should trigger fallback, null otherwise + */ +export function coerceToFailoverErrorWithConfig( + err: unknown, + fallbackOnErrors: FallbackOnErrorCodes | undefined, + context?: { + provider?: string; + model?: string; + profileId?: string; + }, +): FailoverError | null { + // First check if it's already a FailoverError + if (isFailoverError(err)) { + // Still need to check if it should trigger fallback based on config + if (!shouldTriggerFallback(err, fallbackOnErrors)) { + return null; + } + return err; + } + + // Check if error should trigger fallback + if (!shouldTriggerFallback(err, fallbackOnErrors)) { + return null; + } + + // Coerce to FailoverError + const status = getStatusCode(err); + const reason = resolveFailoverReasonFromError(err); + const message = getErrorMessage(err) || String(err); + const code = getErrorCode(err); + + // If we have a status but no reason, create a generic reason + const effectiveReason: FailoverReason = reason ?? "unknown"; + + return new FailoverError(message, { + reason: effectiveReason, + provider: context?.provider, + model: context?.model, + profileId: context?.profileId, + status, + code, + cause: err instanceof Error ? err : undefined, + }); +} diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts index 5fd6e533a1a..e0fdcdd40a9 100644 --- a/src/agents/model-fallback.ts +++ b/src/agents/model-fallback.ts @@ -3,6 +3,7 @@ import { resolveAgentModelFallbackValues, resolveAgentModelPrimaryValue, } from "../config/model-input.js"; +import type { FallbackOnErrorCodes } from "../config/types.agents-shared.js"; import { createSubsystemLogger } from "../logging/subsystem.js"; import { sanitizeForLog } from "../terminal/ansi.js"; import { @@ -15,6 +16,7 @@ import { import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "./defaults.js"; import { coerceToFailoverError, + coerceToFailoverErrorWithConfig, describeFailoverError, isFailoverError, isTimeoutError, @@ -130,6 +132,7 @@ async function runFallbackCandidate(params: { provider: string; model: string; options?: ModelFallbackRunOptions; + fallbackOnErrors?: FallbackOnErrorCodes; }): Promise<{ ok: true; result: T } | { ok: false; error: unknown }> { try { const result = params.options @@ -142,10 +145,16 @@ async function runFallbackCandidate(params: { } catch (err) { // Normalize abort-wrapped rate-limit errors (e.g. Google Vertex RESOURCE_EXHAUSTED) // so they become FailoverErrors and continue the fallback loop instead of aborting. - const normalizedFailover = coerceToFailoverError(err, { - provider: params.provider, - model: params.model, - }); + // Use config-aware error coercion if fallbackOnErrors is provided. + const normalizedFailover = params.fallbackOnErrors + ? coerceToFailoverErrorWithConfig(err, params.fallbackOnErrors, { + provider: params.provider, + model: params.model, + }) + : coerceToFailoverError(err, { + provider: params.provider, + model: params.model, + }); if (shouldRethrowAbort(err) && !normalizedFailover) { throw err; } @@ -159,12 +168,14 @@ async function runFallbackAttempt(params: { model: string; attempts: FallbackAttempt[]; options?: ModelFallbackRunOptions; + fallbackOnErrors?: FallbackOnErrorCodes; }): Promise<{ success: ModelFallbackRunResult } | { error: unknown }> { const runResult = await runFallbackCandidate({ run: params.run, provider: params.provider, model: params.model, options: params.options, + fallbackOnErrors: params.fallbackOnErrors, }); if (runResult.ok) { return { @@ -516,6 +527,8 @@ export async function runWithModelFallback(params: { agentDir?: string; /** Optional explicit fallbacks list; when provided (even empty), replaces agents.defaults.model.fallbacks. */ fallbacksOverride?: string[]; + /** HTTP status codes that should trigger fallback. */ + fallbackOnErrors?: FallbackOnErrorCodes; run: ModelFallbackRunFn; onError?: ModelFallbackErrorHandler; }): Promise> { @@ -663,6 +676,7 @@ export async function runWithModelFallback(params: { ...candidate, attempts, options: runOptions, + fallbackOnErrors: params.fallbackOnErrors, }); if ("success" in attemptRun) { if (i > 0 || attempts.length > 0 || attemptedDuringCooldown) { @@ -711,11 +725,15 @@ export async function runWithModelFallback(params: { if (isLikelyContextOverflowError(errMessage)) { throw err; } - const normalized = - coerceToFailoverError(err, { - provider: candidate.provider, - model: candidate.model, - }) ?? err; + const normalized = params.fallbackOnErrors + ? (coerceToFailoverErrorWithConfig(err, params.fallbackOnErrors, { + provider: candidate.provider, + model: candidate.model, + }) ?? err) + : (coerceToFailoverError(err, { + provider: candidate.provider, + model: candidate.model, + }) ?? err); // Even unrecognized errors should not abort the fallback loop when // there are remaining candidates. Only abort/context-overflow errors @@ -779,6 +797,7 @@ export async function runWithImageModelFallback(params: { modelOverride?: string; run: (provider: string, model: string) => Promise; onError?: ModelFallbackErrorHandler; + fallbackOnErrors?: FallbackOnErrorCodes; }): Promise> { const candidates = resolveImageFallbackCandidates({ cfg: params.cfg, @@ -796,7 +815,12 @@ export async function runWithImageModelFallback(params: { for (let i = 0; i < candidates.length; i += 1) { const candidate = candidates[i]; - const attemptRun = await runFallbackAttempt({ run: params.run, ...candidate, attempts }); + const attemptRun = await runFallbackAttempt({ + run: params.run, + ...candidate, + attempts, + fallbackOnErrors: params.fallbackOnErrors, + }); if ("success" in attemptRun) { return attemptRun.success; } diff --git a/src/auto-reply/reply/agent-runner-utils.ts b/src/auto-reply/reply/agent-runner-utils.ts index abf6322a287..4b3fd44013c 100644 --- a/src/auto-reply/reply/agent-runner-utils.ts +++ b/src/auto-reply/reply/agent-runner-utils.ts @@ -1,4 +1,7 @@ -import { resolveRunModelFallbacksOverride } from "../../agents/agent-scope.js"; +import { + resolveRunModelFallbacksOverride, + resolveRunModelFallbackOnErrors, +} from "../../agents/agent-scope.js"; import type { NormalizedUsage } from "../../agents/usage.js"; import { getChannelPlugin } from "../../channels/plugins/index.js"; import type { ChannelId, ChannelThreadingToolContext } from "../../channels/plugins/types.js"; @@ -165,6 +168,11 @@ export function resolveModelFallbackOptions(run: FollowupRun["run"]) { agentId: run.agentId, sessionKey: run.sessionKey, }), + fallbackOnErrors: resolveRunModelFallbackOnErrors({ + cfg: run.config, + agentId: run.agentId, + sessionKey: run.sessionKey, + }), }; } diff --git a/src/config/model-input.ts b/src/config/model-input.ts index 197947ab853..263dbcf86b1 100644 --- a/src/config/model-input.ts +++ b/src/config/model-input.ts @@ -1,4 +1,4 @@ -import type { AgentModelConfig } from "./types.agents-shared.js"; +import type { AgentModelConfig, FallbackOnErrorCodes } from "./types.agents-shared.js"; type AgentModelListLike = { primary?: string; @@ -24,6 +24,21 @@ export function resolveAgentModelFallbackValues(model?: AgentModelConfig): strin return Array.isArray(model.fallbacks) ? model.fallbacks : []; } +/** + * Resolve the fallbackOnErrors configuration from an AgentModelConfig. + * + * @param model - The agent model configuration + * @returns The fallbackOnErrors value ("all", "default", number[], or undefined) + */ +export function resolveAgentModelFallbackOnErrors( + model?: AgentModelConfig, +): FallbackOnErrorCodes | undefined { + if (!model || typeof model !== "object") { + return undefined; + } + return model.fallbackOnErrors; +} + export function toAgentModelListLike(model?: AgentModelConfig): AgentModelListLike | undefined { if (typeof model === "string") { const primary = model.trim(); diff --git a/src/config/types.agents-shared.ts b/src/config/types.agents-shared.ts index 3351d9903c9..73dca0d6360 100644 --- a/src/config/types.agents-shared.ts +++ b/src/config/types.agents-shared.ts @@ -5,6 +5,16 @@ import type { SandboxSshSettings, } from "./types.sandbox.js"; +/** + * HTTP status codes that should trigger model fallback. + * Default behavior triggers fallback on server errors, rate limits, timeouts, and not-found errors. + * Users can extend this to include all client errors with "all" or specify custom codes. + */ +export type FallbackOnErrorCodes = + | "all" // All HTTP errors (4xx and 5xx) trigger fallback + | "default" // Server errors (500, 502, 503, 504) + rate limits (429) + timeout (408) + not found (404) + | number[]; // Custom list of HTTP status codes + export type AgentModelConfig = | string | { @@ -12,6 +22,21 @@ export type AgentModelConfig = primary?: string; /** Per-agent model fallbacks (provider/model). */ fallbacks?: string[]; + /** + * HTTP status codes that should trigger fallback to next model. + * - "default": Server errors (500, 502, 503, 504) + rate limits (429) + timeout (408) + not found (404) [default] + * - "all": All HTTP errors (4xx and 5xx) trigger fallback + * - number[]: Custom list of status codes (e.g., [400, 401, 403, 429, 500, 502, 503]) + * + * @example + * // Enable fallback on all client and server errors + * { primary: "openai/gpt-4", fallbacks: ["anthropic/claude-3"], fallbackOnErrors: "all" } + * + * @example + * // Custom error codes + * { primary: "openai/gpt-4", fallbacks: ["anthropic/claude-3"], fallbackOnErrors: [400, 429, 500, 502, 503] } + */ + fallbackOnErrors?: FallbackOnErrorCodes; }; export type AgentSandboxConfig = { diff --git a/src/cron/isolated-agent/run.ts b/src/cron/isolated-agent/run.ts index 1a122f56864..4471bf0a7cd 100644 --- a/src/cron/isolated-agent/run.ts +++ b/src/cron/isolated-agent/run.ts @@ -5,6 +5,7 @@ import { resolveAgentModelFallbacksOverride, resolveAgentWorkspaceDir, resolveDefaultAgentId, + resolveRunModelFallbackOnErrors, } from "../../agents/agent-scope.js"; import { resolveSessionAuthProfileOverride } from "../../agents/auth-profiles/session-override.js"; import { resolveBootstrapWarningSignaturesSeen } from "../../agents/bootstrap-budget.js"; @@ -586,6 +587,10 @@ export async function runCronIsolatedAgentTurn(params: { agentDir, fallbacksOverride: payloadFallbacks ?? resolveAgentModelFallbacksOverride(params.cfg, agentId), + fallbackOnErrors: resolveRunModelFallbackOnErrors({ + cfg: cfgWithAgentDefaults, + agentId, + }), run: async (providerOverride, modelOverride, runOptions) => { if (abortSignal?.aborted) { throw new Error(abortReason()); diff --git a/test/fixtures/plugin-extension-import-boundary-inventory.json b/test/fixtures/plugin-extension-import-boundary-inventory.json index 0894fe0d5b5..ead171321f9 100644 --- a/test/fixtures/plugin-extension-import-boundary-inventory.json +++ b/test/fixtures/plugin-extension-import-boundary-inventory.json @@ -31,14 +31,6 @@ "resolvedPath": "extensions/imessage/runtime-api.js", "reason": "imports extension-owned file from src/plugins" }, - { - "file": "src/plugins/runtime/runtime-matrix.ts", - "line": 4, - "kind": "import", - "specifier": "../../../extensions/matrix/runtime-api.js", - "resolvedPath": "extensions/matrix/runtime-api.js", - "reason": "imports extension-owned file from src/plugins" - }, { "file": "src/plugins/runtime/runtime-slack-ops.runtime.ts", "line": 10,