Merge b46a671a9147f1658b5e4de94e1c9ffd416f0598 into 598f1826d8b2bc969aace2c6459824737667218c

2026-03-21 04:00:05 +00:00 · 2026-03-21 04:00:05 +00:00 · afe057040e
commit afe057040e
parent 598f1826d8 b46a671a91
9 changed files with 212 additions and 21 deletions
--- a/src/agents/agent-command.ts
+++ b/src/agents/agent-command.ts
@ -57,6 +57,7 @@ import {
  listAgentIds,
  resolveAgentDir,
  resolveEffectiveModelFallbacks,
+  resolveRunModelFallbackOnErrors,
  resolveSessionAgentId,
  resolveAgentSkillsFilter,
  resolveAgentWorkspaceDir,
@ -1177,6 +1178,11 @@ async function agentCommandInternal(
        runId,
        agentDir,
        fallbacksOverride: effectiveFallbacksOverride,
+        fallbackOnErrors: resolveRunModelFallbackOnErrors({
+          cfg,
+          agentId: sessionAgentId,
+          sessionKey,
+        }),
        run: (providerOverride, modelOverride, runOptions) => {
          const isFallbackRetry = fallbackAttemptIndex > 0;
          fallbackAttemptIndex += 1;
--- a/src/agents/agent-scope.ts
+++ b/src/agents/agent-scope.ts
@ -1,8 +1,12 @@
 import fs from "node:fs";
 import path from "node:path";
 import type { OpenClawConfig } from "../config/config.js";
-import { resolveAgentModelFallbackValues } from "../config/model-input.js";
+import {
+  resolveAgentModelFallbackOnErrors,
+  resolveAgentModelFallbackValues,
+} from "../config/model-input.js";
 import { resolveStateDir } from "../config/paths.js";
+import type { FallbackOnErrorCodes } from "../config/types.agents-shared.js";
 import { createSubsystemLogger } from "../logging/subsystem.js";
 import {
  DEFAULT_AGENT_ID,
@ -230,6 +234,29 @@ export function resolveRunModelFallbacksOverride(params: {
  );
 }

+export function resolveAgentModelFallbackOnErrorsOverride(
+  cfg: OpenClawConfig,
+  agentId: string,
+): FallbackOnErrorCodes | undefined {
+  const raw = resolveAgentConfig(cfg, agentId)?.model;
+  return resolveAgentModelFallbackOnErrors(raw);
+}
+
+export function resolveRunModelFallbackOnErrors(params: {
+  cfg: OpenClawConfig | undefined;
+  agentId?: string | null;
+  sessionKey?: string | null;
+}): FallbackOnErrorCodes | undefined {
+  if (!params.cfg) {
+    return undefined;
+  }
+  const raw = resolveAgentConfig(
+    params.cfg,
+    resolveFallbackAgentId({ agentId: params.agentId, sessionKey: params.sessionKey }),
+  )?.model;
+  return resolveAgentModelFallbackOnErrors(raw);
+}
+
 export function hasConfiguredModelFallbacks(params: {
  cfg: OpenClawConfig | undefined;
  agentId?: string | null;
--- a/src/agents/failover-error.ts
+++ b/src/agents/failover-error.ts
@ -1,3 +1,4 @@
+import type { FallbackOnErrorCodes } from "../config/types.agents-shared.js";
 import { readErrorName } from "../infra/errors.js";
 import {
  classifyFailoverReason,
@ -328,3 +329,91 @@ export function coerceToFailoverError(
    cause: err instanceof Error ? err : undefined,
  });
 }
+
+/**
+ * Check if an error should trigger fallback based on the configured error codes.
+ *
+ * For "default" or undefined, this delegates to resolveFailoverReasonFromError() which
+ * matches the original behavior (no regression).
+ *
+ * @param err - The error to check
+ * @param fallbackOnErrors - Configuration for which errors should trigger fallback
+ *   - "default": Use original behavior (same as no config) - any recognized failover reason
+ *   - "all": All HTTP errors (4xx and 5xx) trigger fallback
+ *   - number[]: Custom list of status codes
+ * @returns true if the error should trigger fallback
+ */
+export function shouldTriggerFallback(
+  err: unknown,
+  fallbackOnErrors?: FallbackOnErrorCodes,
+): boolean {
+  const status = getStatusCode(err);
+  const reason = resolveFailoverReasonFromError(err);
+
+  // For "default" or undefined, match original behavior exactly
+  // This delegates to the existing reason classification logic
+  if (fallbackOnErrors === undefined || fallbackOnErrors === "default") {
+    return reason !== null;
+  }
+
+  // For "all", check if HTTP error (4xx or 5xx)
+  // Also allow non-HTTP errors with recognized reasons
+  if (fallbackOnErrors === "all") {
+    return status !== undefined ? status >= 400 : reason !== null;
+  }
+
+  // For custom array, check specific status codes only
+  // Ignore non-HTTP errors even if they have a recognized reason
+  return status !== undefined && new Set(fallbackOnErrors).has(status);
+}
+
+/**
+ * Coerce an error to FailoverError if it should trigger fallback based on configuration.
+ *
+ * @param err - The error to check
+ * @param fallbackOnErrors - Configuration for which errors should trigger fallback
+ * @param context - Additional context (provider, model, profileId)
+ * @returns FailoverError if the error should trigger fallback, null otherwise
+ */
+export function coerceToFailoverErrorWithConfig(
+  err: unknown,
+  fallbackOnErrors: FallbackOnErrorCodes | undefined,
+  context?: {
+    provider?: string;
+    model?: string;
+    profileId?: string;
+  },
+): FailoverError | null {
+  // First check if it's already a FailoverError
+  if (isFailoverError(err)) {
+    // Still need to check if it should trigger fallback based on config
+    if (!shouldTriggerFallback(err, fallbackOnErrors)) {
+      return null;
+    }
+    return err;
+  }
+
+  // Check if error should trigger fallback
+  if (!shouldTriggerFallback(err, fallbackOnErrors)) {
+    return null;
+  }
+
+  // Coerce to FailoverError
+  const status = getStatusCode(err);
+  const reason = resolveFailoverReasonFromError(err);
+  const message = getErrorMessage(err) || String(err);
+  const code = getErrorCode(err);
+
+  // If we have a status but no reason, create a generic reason
+  const effectiveReason: FailoverReason = reason ?? "unknown";
+
+  return new FailoverError(message, {
+    reason: effectiveReason,
+    provider: context?.provider,
+    model: context?.model,
+    profileId: context?.profileId,
+    status,
+    code,
+    cause: err instanceof Error ? err : undefined,
+  });
+}
--- a/src/agents/model-fallback.ts
+++ b/src/agents/model-fallback.ts
@ -3,6 +3,7 @@ import {
  resolveAgentModelFallbackValues,
  resolveAgentModelPrimaryValue,
 } from "../config/model-input.js";
+import type { FallbackOnErrorCodes } from "../config/types.agents-shared.js";
 import { createSubsystemLogger } from "../logging/subsystem.js";
 import { sanitizeForLog } from "../terminal/ansi.js";
 import {
@ -15,6 +16,7 @@ import {
 import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "./defaults.js";
 import {
  coerceToFailoverError,
+  coerceToFailoverErrorWithConfig,
  describeFailoverError,
  isFailoverError,
  isTimeoutError,
@ -130,6 +132,7 @@ async function runFallbackCandidate<T>(params: {
  provider: string;
  model: string;
  options?: ModelFallbackRunOptions;
+  fallbackOnErrors?: FallbackOnErrorCodes;
 }): Promise<{ ok: true; result: T } | { ok: false; error: unknown }> {
  try {
    const result = params.options
@ -142,10 +145,16 @@ async function runFallbackCandidate<T>(params: {
  } catch (err) {
    // Normalize abort-wrapped rate-limit errors (e.g. Google Vertex RESOURCE_EXHAUSTED)
    // so they become FailoverErrors and continue the fallback loop instead of aborting.
-    const normalizedFailover = coerceToFailoverError(err, {
-      provider: params.provider,
-      model: params.model,
-    });
+    // Use config-aware error coercion if fallbackOnErrors is provided.
+    const normalizedFailover = params.fallbackOnErrors
+      ? coerceToFailoverErrorWithConfig(err, params.fallbackOnErrors, {
+          provider: params.provider,
+          model: params.model,
+        })
+      : coerceToFailoverError(err, {
+          provider: params.provider,
+          model: params.model,
+        });
    if (shouldRethrowAbort(err) && !normalizedFailover) {
      throw err;
    }
@ -159,12 +168,14 @@ async function runFallbackAttempt<T>(params: {
  model: string;
  attempts: FallbackAttempt[];
  options?: ModelFallbackRunOptions;
+  fallbackOnErrors?: FallbackOnErrorCodes;
 }): Promise<{ success: ModelFallbackRunResult<T> } | { error: unknown }> {
  const runResult = await runFallbackCandidate({
    run: params.run,
    provider: params.provider,
    model: params.model,
    options: params.options,
+    fallbackOnErrors: params.fallbackOnErrors,
  });
  if (runResult.ok) {
    return {
@ -516,6 +527,8 @@ export async function runWithModelFallback<T>(params: {
  agentDir?: string;
  /** Optional explicit fallbacks list; when provided (even empty), replaces agents.defaults.model.fallbacks. */
  fallbacksOverride?: string[];
+  /** HTTP status codes that should trigger fallback. */
+  fallbackOnErrors?: FallbackOnErrorCodes;
  run: ModelFallbackRunFn<T>;
  onError?: ModelFallbackErrorHandler;
 }): Promise<ModelFallbackRunResult<T>> {
@ -663,6 +676,7 @@ export async function runWithModelFallback<T>(params: {
      ...candidate,
      attempts,
      options: runOptions,
+      fallbackOnErrors: params.fallbackOnErrors,
    });
    if ("success" in attemptRun) {
      if (i > 0 || attempts.length > 0 || attemptedDuringCooldown) {
@ -711,11 +725,15 @@ export async function runWithModelFallback<T>(params: {
      if (isLikelyContextOverflowError(errMessage)) {
        throw err;
      }
-      const normalized =
-        coerceToFailoverError(err, {
-          provider: candidate.provider,
-          model: candidate.model,
-        }) ?? err;
+      const normalized = params.fallbackOnErrors
+        ? (coerceToFailoverErrorWithConfig(err, params.fallbackOnErrors, {
+            provider: candidate.provider,
+            model: candidate.model,
+          }) ?? err)
+        : (coerceToFailoverError(err, {
+            provider: candidate.provider,
+            model: candidate.model,
+          }) ?? err);

      // Even unrecognized errors should not abort the fallback loop when
      // there are remaining candidates.  Only abort/context-overflow errors
@ -779,6 +797,7 @@ export async function runWithImageModelFallback<T>(params: {
  modelOverride?: string;
  run: (provider: string, model: string) => Promise<T>;
  onError?: ModelFallbackErrorHandler;
+  fallbackOnErrors?: FallbackOnErrorCodes;
 }): Promise<ModelFallbackRunResult<T>> {
  const candidates = resolveImageFallbackCandidates({
    cfg: params.cfg,
@ -796,7 +815,12 @@ export async function runWithImageModelFallback<T>(params: {

  for (let i = 0; i < candidates.length; i += 1) {
    const candidate = candidates[i];
-    const attemptRun = await runFallbackAttempt({ run: params.run, ...candidate, attempts });
+    const attemptRun = await runFallbackAttempt({
+      run: params.run,
+      ...candidate,
+      attempts,
+      fallbackOnErrors: params.fallbackOnErrors,
+    });
    if ("success" in attemptRun) {
      return attemptRun.success;
    }
--- a/src/auto-reply/reply/agent-runner-utils.ts
+++ b/src/auto-reply/reply/agent-runner-utils.ts
@ -1,4 +1,7 @@
-import { resolveRunModelFallbacksOverride } from "../../agents/agent-scope.js";
+import {
+  resolveRunModelFallbacksOverride,
+  resolveRunModelFallbackOnErrors,
+} from "../../agents/agent-scope.js";
 import type { NormalizedUsage } from "../../agents/usage.js";
 import { getChannelPlugin } from "../../channels/plugins/index.js";
 import type { ChannelId, ChannelThreadingToolContext } from "../../channels/plugins/types.js";
@ -165,6 +168,11 @@ export function resolveModelFallbackOptions(run: FollowupRun["run"]) {
      agentId: run.agentId,
      sessionKey: run.sessionKey,
    }),
+    fallbackOnErrors: resolveRunModelFallbackOnErrors({
+      cfg: run.config,
+      agentId: run.agentId,
+      sessionKey: run.sessionKey,
+    }),
  };
 }

--- a/src/config/model-input.ts
+++ b/src/config/model-input.ts
@ -1,4 +1,4 @@
-import type { AgentModelConfig } from "./types.agents-shared.js";
+import type { AgentModelConfig, FallbackOnErrorCodes } from "./types.agents-shared.js";

 type AgentModelListLike = {
  primary?: string;
@ -24,6 +24,21 @@ export function resolveAgentModelFallbackValues(model?: AgentModelConfig): strin
  return Array.isArray(model.fallbacks) ? model.fallbacks : [];
 }

+/**
+ * Resolve the fallbackOnErrors configuration from an AgentModelConfig.
+ *
+ * @param model - The agent model configuration
+ * @returns The fallbackOnErrors value ("all", "default", number[], or undefined)
+ */
+export function resolveAgentModelFallbackOnErrors(
+  model?: AgentModelConfig,
+): FallbackOnErrorCodes | undefined {
+  if (!model || typeof model !== "object") {
+    return undefined;
+  }
+  return model.fallbackOnErrors;
+}
+
 export function toAgentModelListLike(model?: AgentModelConfig): AgentModelListLike | undefined {
  if (typeof model === "string") {
    const primary = model.trim();
--- a/src/config/types.agents-shared.ts
+++ b/src/config/types.agents-shared.ts
@ -5,6 +5,16 @@ import type {
  SandboxSshSettings,
 } from "./types.sandbox.js";

+/**
+ * HTTP status codes that should trigger model fallback.
+ * Default behavior triggers fallback on server errors, rate limits, timeouts, and not-found errors.
+ * Users can extend this to include all client errors with "all" or specify custom codes.
+ */
+export type FallbackOnErrorCodes =
+  | "all" // All HTTP errors (4xx and 5xx) trigger fallback
+  | "default" // Server errors (500, 502, 503, 504) + rate limits (429) + timeout (408) + not found (404)
+  | number[]; // Custom list of HTTP status codes
+
 export type AgentModelConfig =
  | string
  | {
@ -12,6 +22,21 @@ export type AgentModelConfig =
      primary?: string;
      /** Per-agent model fallbacks (provider/model). */
      fallbacks?: string[];
+      /**
+       * HTTP status codes that should trigger fallback to next model.
+       * - "default": Server errors (500, 502, 503, 504) + rate limits (429) + timeout (408) + not found (404) [default]
+       * - "all": All HTTP errors (4xx and 5xx) trigger fallback
+       * - number[]: Custom list of status codes (e.g., [400, 401, 403, 429, 500, 502, 503])
+       *
+       * @example
+       * // Enable fallback on all client and server errors
+       * { primary: "openai/gpt-4", fallbacks: ["anthropic/claude-3"], fallbackOnErrors: "all" }
+       *
+       * @example
+       * // Custom error codes
+       * { primary: "openai/gpt-4", fallbacks: ["anthropic/claude-3"], fallbackOnErrors: [400, 429, 500, 502, 503] }
+       */
+      fallbackOnErrors?: FallbackOnErrorCodes;
    };

 export type AgentSandboxConfig = {
--- a/src/cron/isolated-agent/run.ts
+++ b/src/cron/isolated-agent/run.ts
@ -5,6 +5,7 @@ import {
  resolveAgentModelFallbacksOverride,
  resolveAgentWorkspaceDir,
  resolveDefaultAgentId,
+  resolveRunModelFallbackOnErrors,
 } from "../../agents/agent-scope.js";
 import { resolveSessionAuthProfileOverride } from "../../agents/auth-profiles/session-override.js";
 import { resolveBootstrapWarningSignaturesSeen } from "../../agents/bootstrap-budget.js";
@ -586,6 +587,10 @@ export async function runCronIsolatedAgentTurn(params: {
        agentDir,
        fallbacksOverride:
          payloadFallbacks ?? resolveAgentModelFallbacksOverride(params.cfg, agentId),
+        fallbackOnErrors: resolveRunModelFallbackOnErrors({
+          cfg: cfgWithAgentDefaults,
+          agentId,
+        }),
        run: async (providerOverride, modelOverride, runOptions) => {
          if (abortSignal?.aborted) {
            throw new Error(abortReason());
--- a/test/fixtures/plugin-extension-import-boundary-inventory.json
+++ b/test/fixtures/plugin-extension-import-boundary-inventory.json
@ -31,14 +31,6 @@
    "resolvedPath": "extensions/imessage/runtime-api.js",
    "reason": "imports extension-owned file from src/plugins"
  },
-  {
-    "file": "src/plugins/runtime/runtime-matrix.ts",
-    "line": 4,
-    "kind": "import",
-    "specifier": "../../../extensions/matrix/runtime-api.js",
-    "resolvedPath": "extensions/matrix/runtime-api.js",
-    "reason": "imports extension-owned file from src/plugins"
-  },
  {
    "file": "src/plugins/runtime/runtime-slack-ops.runtime.ts",
    "line": 10,