openclaw/src/agents/pi-embedded-runner/run.ts

import { randomBytes } from "node:crypto";
import fs from "node:fs/promises";
import type { ThinkLevel } from "../../auto-reply/thinking.js";
import {
  ensureContextEnginesInitialized,
  resolveContextEngine,
} from "../../context-engine/index.js";
import { computeBackoff, sleepWithAbort, type BackoffPolicy } from "../../infra/backoff.js";
import { generateSecureToken } from "../../infra/secure-random.js";
import { getGlobalHookRunner } from "../../plugins/hook-runner-global.js";
import type { PluginHookBeforeAgentStartResult } from "../../plugins/types.js";
import { enqueueCommandInLane } from "../../process/command-queue.js";
import { isMarkdownCapableMessageChannel } from "../../utils/message-channel.js";
import { resolveOpenClawAgentDir } from "../agent-paths.js";
import { hasConfiguredModelFallbacks } from "../agent-scope.js";
import {
  isProfileInCooldown,
  type AuthProfileFailureReason,
  markAuthProfileFailure,
  markAuthProfileGood,
  markAuthProfileUsed,
  resolveProfilesUnavailableReason,
} from "../auth-profiles.js";
import {
  CONTEXT_WINDOW_HARD_MIN_TOKENS,
  CONTEXT_WINDOW_WARN_BELOW_TOKENS,
  evaluateContextWindowGuard,
  resolveContextWindowInfo,
} from "../context-window-guard.js";
import { DEFAULT_CONTEXT_TOKENS, DEFAULT_MODEL, DEFAULT_PROVIDER } from "../defaults.js";
import { FailoverError, resolveFailoverStatus } from "../failover-error.js";
import {
  ensureAuthProfileStore,
  getApiKeyForModel,
  resolveAuthProfileOrder,
  type ResolvedProviderAuth,
} from "../model-auth.js";
import { normalizeProviderId } from "../model-selection.js";
import { ensureOpenClawModelsJson } from "../models-config.js";
import {
  formatBillingErrorMessage,
  classifyFailoverReason,
  formatAssistantErrorText,
  isAuthAssistantError,
  isBillingAssistantError,
  isCompactionFailureError,
  isLikelyContextOverflowError,
  isFailoverAssistantError,
  isFailoverErrorMessage,
  parseImageSizeError,
  parseImageDimensionError,
  isRateLimitAssistantError,
  isTimeoutErrorMessage,
  pickFallbackThinkingLevel,
  type FailoverReason,
} from "../pi-embedded-helpers.js";
import { ensureRuntimePluginsLoaded } from "../runtime-plugins.js";
import { derivePromptTokens, normalizeUsage, type UsageLike } from "../usage.js";
import { redactRunIdentifier, resolveRunWorkspaceDir } from "../workspace-run.js";
import { resolveGlobalLane, resolveSessionLane } from "./lanes.js";
import { log } from "./logger.js";
import { resolveModel } from "./model.js";
import { runEmbeddedAttempt } from "./run/attempt.js";
import { createFailoverDecisionLogger } from "./run/failover-observation.js";
import type { RunEmbeddedPiAgentParams } from "./run/params.js";
import { buildEmbeddedRunPayloads } from "./run/payloads.js";
import {
  truncateOversizedToolResultsInSession,
  sessionLikelyHasOversizedToolResults,
} from "./tool-result-truncation.js";
import type { EmbeddedPiAgentMeta, EmbeddedPiRunResult } from "./types.js";
import { describeUnknownError } from "./utils.js";

type ApiKeyInfo = ResolvedProviderAuth;

type CopilotTokenState = {
  githubToken: string;
  expiresAt: number;
  refreshTimer?: ReturnType<typeof setTimeout>;
  refreshInFlight?: Promise<void>;
};

const COPILOT_REFRESH_MARGIN_MS = 5 * 60 * 1000;
const COPILOT_REFRESH_RETRY_MS = 60 * 1000;
const COPILOT_REFRESH_MIN_DELAY_MS = 5 * 1000;
// Keep overload pacing noticeable enough to avoid tight retry bursts, but short
// enough that fallback still feels responsive within a single turn.
const OVERLOAD_FAILOVER_BACKOFF_POLICY: BackoffPolicy = {
  initialMs: 250,
  maxMs: 1_500,
  factor: 2,
  jitter: 0.2,
};

// Avoid Anthropic's refusal test token poisoning session transcripts.
const ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL = "ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL";
const ANTHROPIC_MAGIC_STRING_REPLACEMENT = "ANTHROPIC MAGIC STRING TRIGGER REFUSAL (redacted)";

function scrubAnthropicRefusalMagic(prompt: string): string {
  if (!prompt.includes(ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL)) {
    return prompt;
  }
  return prompt.replaceAll(
    ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL,
    ANTHROPIC_MAGIC_STRING_REPLACEMENT,
  );
}

type UsageAccumulator = {
  input: number;
  output: number;
  cacheRead: number;
  cacheWrite: number;
  total: number;
  /** Cache fields from the most recent API call (not accumulated). */
  lastCacheRead: number;
  lastCacheWrite: number;
  lastInput: number;
};

const createUsageAccumulator = (): UsageAccumulator => ({
  input: 0,
  output: 0,
  cacheRead: 0,
  cacheWrite: 0,
  total: 0,
  lastCacheRead: 0,
  lastCacheWrite: 0,
  lastInput: 0,
});

function createCompactionDiagId(): string {
  return `ovf-${Date.now().toString(36)}-${generateSecureToken(4)}`;
}

// Defensive guard for the outer run loop across all retry branches.
const BASE_RUN_RETRY_ITERATIONS = 24;
const RUN_RETRY_ITERATIONS_PER_PROFILE = 8;
const MIN_RUN_RETRY_ITERATIONS = 32;
const MAX_RUN_RETRY_ITERATIONS = 160;

function resolveMaxRunRetryIterations(profileCandidateCount: number): number {
  const scaled =
    BASE_RUN_RETRY_ITERATIONS +
    Math.max(1, profileCandidateCount) * RUN_RETRY_ITERATIONS_PER_PROFILE;
  return Math.min(MAX_RUN_RETRY_ITERATIONS, Math.max(MIN_RUN_RETRY_ITERATIONS, scaled));
}

const hasUsageValues = (
  usage: ReturnType<typeof normalizeUsage>,
): usage is NonNullable<ReturnType<typeof normalizeUsage>> =>
  !!usage &&
  [usage.input, usage.output, usage.cacheRead, usage.cacheWrite, usage.total].some(
    (value) => typeof value === "number" && Number.isFinite(value) && value > 0,
  );

const mergeUsageIntoAccumulator = (
  target: UsageAccumulator,
  usage: ReturnType<typeof normalizeUsage>,
) => {
  if (!hasUsageValues(usage)) {
    return;
  }
  target.input += usage.input ?? 0;
  target.output += usage.output ?? 0;
  target.cacheRead += usage.cacheRead ?? 0;
  target.cacheWrite += usage.cacheWrite ?? 0;
  target.total +=
    usage.total ??
    (usage.input ?? 0) + (usage.output ?? 0) + (usage.cacheRead ?? 0) + (usage.cacheWrite ?? 0);
  // Track the most recent API call's cache fields for accurate context-size reporting.
  // Accumulated cache totals inflate context size when there are multiple tool-call round-trips,
  // since each call reports cacheRead ≈ current_context_size.
  target.lastCacheRead = usage.cacheRead ?? 0;
  target.lastCacheWrite = usage.cacheWrite ?? 0;
  target.lastInput = usage.input ?? 0;
};

const toNormalizedUsage = (usage: UsageAccumulator) => {
  const hasUsage =
    usage.input > 0 ||
    usage.output > 0 ||
    usage.cacheRead > 0 ||
    usage.cacheWrite > 0 ||
    usage.total > 0;
  if (!hasUsage) {
    return undefined;
  }
  // Use the LAST API call's cache fields for context-size calculation.
  // The accumulated cacheRead/cacheWrite inflate context size because each tool-call
  // round-trip reports cacheRead ≈ current_context_size, and summing N calls gives
  // N × context_size which gets clamped to contextWindow (e.g. 200k).
  // See: https://github.com/openclaw/openclaw/issues/13698
  //
  // We use lastInput/lastCacheRead/lastCacheWrite (from the most recent API call) for
  // cache-related fields, but keep accumulated output (total generated text this turn).
  const lastPromptTokens = usage.lastInput + usage.lastCacheRead + usage.lastCacheWrite;
  return {
    input: usage.lastInput || undefined,
    output: usage.output || undefined,
    cacheRead: usage.lastCacheRead || undefined,
    cacheWrite: usage.lastCacheWrite || undefined,
    total: lastPromptTokens + usage.output || undefined,
  };
};

function resolveActiveErrorContext(params: {
  lastAssistant: { provider?: string; model?: string } | undefined;
  provider: string;
  model: string;
}): { provider: string; model: string } {
  return {
    provider: params.lastAssistant?.provider ?? params.provider,
    model: params.lastAssistant?.model ?? params.model,
  };
}

/**
 * Build agentMeta for error return paths, preserving accumulated usage so that
 * session totalTokens reflects the actual context size rather than going stale.
 * Without this, error returns omit usage and the session keeps whatever
 * totalTokens was set by the previous successful run.
 */
function buildErrorAgentMeta(params: {
  sessionId: string;
  provider: string;
  model: string;
  usageAccumulator: UsageAccumulator;
  lastRunPromptUsage: ReturnType<typeof normalizeUsage> | undefined;
  lastAssistant?: { usage?: unknown } | null;
  /** API-reported total from the most recent call, mirroring the success path correction. */
  lastTurnTotal?: number;
}): EmbeddedPiAgentMeta {
  const usage = toNormalizedUsage(params.usageAccumulator);
  // Apply the same lastTurnTotal correction the success path uses so
  // usage.total reflects the API-reported context size, not accumulated totals.
  if (usage && params.lastTurnTotal && params.lastTurnTotal > 0) {
    usage.total = params.lastTurnTotal;
  }
  const lastCallUsage = params.lastAssistant
    ? normalizeUsage(params.lastAssistant.usage as UsageLike)
    : undefined;
  const promptTokens = derivePromptTokens(params.lastRunPromptUsage);
  return {
    sessionId: params.sessionId,
    provider: params.provider,
    model: params.model,
    // Only include usage fields when we have actual data from prior API calls.
    ...(usage ? { usage } : {}),
    ...(lastCallUsage ? { lastCallUsage } : {}),
    ...(promptTokens ? { promptTokens } : {}),
  };
}

export async function runEmbeddedPiAgent(
  params: RunEmbeddedPiAgentParams,
): Promise<EmbeddedPiRunResult> {
  const sessionLane = resolveSessionLane(params.sessionKey?.trim() || params.sessionId);
  const globalLane = resolveGlobalLane(params.lane);
  const enqueueGlobal =
    params.enqueue ?? ((task, opts) => enqueueCommandInLane(globalLane, task, opts));
  const enqueueSession =
    params.enqueue ?? ((task, opts) => enqueueCommandInLane(sessionLane, task, opts));
  const channelHint = params.messageChannel ?? params.messageProvider;
  const resolvedToolResultFormat =
    params.toolResultFormat ??
    (channelHint
      ? isMarkdownCapableMessageChannel(channelHint)
        ? "markdown"
        : "plain"
      : "markdown");
  const isProbeSession = params.sessionId?.startsWith("probe-") ?? false;

  return enqueueSession(() =>
    enqueueGlobal(async () => {
      const started = Date.now();
      const workspaceResolution = resolveRunWorkspaceDir({
        workspaceDir: params.workspaceDir,
        sessionKey: params.sessionKey,
        agentId: params.agentId,
        config: params.config,
      });
      const resolvedWorkspace = workspaceResolution.workspaceDir;
      const redactedSessionId = redactRunIdentifier(params.sessionId);
      const redactedSessionKey = redactRunIdentifier(params.sessionKey);
      const redactedWorkspace = redactRunIdentifier(resolvedWorkspace);
      if (workspaceResolution.usedFallback) {
        log.warn(
          `[workspace-fallback] caller=runEmbeddedPiAgent reason=${workspaceResolution.fallbackReason} run=${params.runId} session=${redactedSessionId} sessionKey=${redactedSessionKey} agent=${workspaceResolution.agentId} workspace=${redactedWorkspace}`,
        );
      }
      ensureRuntimePluginsLoaded({
        config: params.config,
        workspaceDir: resolvedWorkspace,
      });
      const prevCwd = process.cwd();

      let provider = (params.provider ?? DEFAULT_PROVIDER).trim() || DEFAULT_PROVIDER;
      let modelId = (params.model ?? DEFAULT_MODEL).trim() || DEFAULT_MODEL;
      const agentDir = params.agentDir ?? resolveOpenClawAgentDir();
      const fallbackConfigured = hasConfiguredModelFallbacks({
        cfg: params.config,
        agentId: params.agentId,
        sessionKey: params.sessionKey,
      });
      await ensureOpenClawModelsJson(params.config, agentDir);

      // Run before_model_resolve hooks early so plugins can override the
      // provider/model before resolveModel().
      //
      // Legacy compatibility: before_agent_start is also checked for override
      // fields if present. New hook takes precedence when both are set.
      let modelResolveOverride: { providerOverride?: string; modelOverride?: string } | undefined;
      let legacyBeforeAgentStartResult: PluginHookBeforeAgentStartResult | undefined;
      const hookRunner = getGlobalHookRunner();
      const hookCtx = {
        agentId: workspaceResolution.agentId,
        sessionKey: params.sessionKey,
        sessionId: params.sessionId,
        workspaceDir: resolvedWorkspace,
        messageProvider: params.messageProvider ?? undefined,
        trigger: params.trigger,
        channelId: params.messageChannel ?? params.messageProvider ?? undefined,
      };
      if (hookRunner?.hasHooks("before_model_resolve")) {
        try {
          modelResolveOverride = await hookRunner.runBeforeModelResolve(
            { prompt: params.prompt },
            hookCtx,
          );
        } catch (hookErr) {
          log.warn(`before_model_resolve hook failed: ${String(hookErr)}`);
        }
      }
      if (hookRunner?.hasHooks("before_agent_start")) {
        try {
          legacyBeforeAgentStartResult = await hookRunner.runBeforeAgentStart(
            { prompt: params.prompt },
            hookCtx,
          );
          modelResolveOverride = {
            providerOverride:
              modelResolveOverride?.providerOverride ??
              legacyBeforeAgentStartResult?.providerOverride,
            modelOverride:
              modelResolveOverride?.modelOverride ?? legacyBeforeAgentStartResult?.modelOverride,
          };
        } catch (hookErr) {
          log.warn(
            `before_agent_start hook (legacy model resolve path) failed: ${String(hookErr)}`,
          );
        }
      }
      if (modelResolveOverride?.providerOverride) {
        provider = modelResolveOverride.providerOverride;
        log.info(`[hooks] provider overridden to ${provider}`);
      }
      if (modelResolveOverride?.modelOverride) {
        modelId = modelResolveOverride.modelOverride;
        log.info(`[hooks] model overridden to ${modelId}`);
      }

      const { model, error, authStorage, modelRegistry } = resolveModel(
        provider,
        modelId,
        agentDir,
        params.config,
      );
      if (!model) {
        throw new FailoverError(error ?? `Unknown model: ${provider}/${modelId}`, {
          reason: "model_not_found",
          provider,
          model: modelId,
        });
      }

      const ctxInfo = resolveContextWindowInfo({
        cfg: params.config,
        provider,
        modelId,
        modelContextWindow: model.contextWindow,
        defaultTokens: DEFAULT_CONTEXT_TOKENS,
      });
      // Apply contextTokens cap to model so pi-coding-agent's auto-compaction
      // threshold uses the effective limit, not the native context window.
      const effectiveModel =
        ctxInfo.tokens < (model.contextWindow ?? Infinity)
          ? { ...model, contextWindow: ctxInfo.tokens }
          : model;
      const ctxGuard = evaluateContextWindowGuard({
        info: ctxInfo,
        warnBelowTokens: CONTEXT_WINDOW_WARN_BELOW_TOKENS,
        hardMinTokens: CONTEXT_WINDOW_HARD_MIN_TOKENS,
      });
      if (ctxGuard.shouldWarn) {
        log.warn(
          `low context window: ${provider}/${modelId} ctx=${ctxGuard.tokens} (warn<${CONTEXT_WINDOW_WARN_BELOW_TOKENS}) source=${ctxGuard.source}`,
        );
      }
      if (ctxGuard.shouldBlock) {
        log.error(
          `blocked model (context window too small): ${provider}/${modelId} ctx=${ctxGuard.tokens} (min=${CONTEXT_WINDOW_HARD_MIN_TOKENS}) source=${ctxGuard.source}`,
        );
        throw new FailoverError(
          `Model context window too small (${ctxGuard.tokens} tokens). Minimum is ${CONTEXT_WINDOW_HARD_MIN_TOKENS}.`,
          { reason: "unknown", provider, model: modelId },
        );
      }

      const authStore = ensureAuthProfileStore(agentDir, { allowKeychainPrompt: false });
      const preferredProfileId = params.authProfileId?.trim();
      let lockedProfileId = params.authProfileIdSource === "user" ? preferredProfileId : undefined;
      if (lockedProfileId) {
        const lockedProfile = authStore.profiles[lockedProfileId];
        if (
          !lockedProfile ||
          normalizeProviderId(lockedProfile.provider) !== normalizeProviderId(provider)
        ) {
          lockedProfileId = undefined;
        }
      }
      const profileOrder = resolveAuthProfileOrder({
        cfg: params.config,
        store: authStore,
        provider,
        preferredProfile: preferredProfileId,
      });
      if (lockedProfileId && !profileOrder.includes(lockedProfileId)) {
        throw new Error(`Auth profile "${lockedProfileId}" is not configured for ${provider}.`);
      }
      const profileCandidates = lockedProfileId
        ? [lockedProfileId]
        : profileOrder.length > 0
          ? profileOrder
          : [undefined];
      let profileIndex = 0;

      const initialThinkLevel = params.thinkLevel ?? "off";
      let thinkLevel = initialThinkLevel;
      const attemptedThinking = new Set<ThinkLevel>();
      let apiKeyInfo: ApiKeyInfo | null = null;
      let lastProfileId: string | undefined;
      const copilotTokenState: CopilotTokenState | null =
        model.provider === "github-copilot" ? { githubToken: "", expiresAt: 0 } : null;
      let copilotRefreshCancelled = false;
      const hasCopilotGithubToken = () => Boolean(copilotTokenState?.githubToken.trim());

      const clearCopilotRefreshTimer = () => {
        if (!copilotTokenState?.refreshTimer) {
          return;
        }
        clearTimeout(copilotTokenState.refreshTimer);
        copilotTokenState.refreshTimer = undefined;
      };

      const stopCopilotRefreshTimer = () => {
        if (!copilotTokenState) {
          return;
        }
        copilotRefreshCancelled = true;
        clearCopilotRefreshTimer();
      };

      const refreshCopilotToken = async (reason: string): Promise<void> => {
        if (!copilotTokenState) {
          return;
        }
        if (copilotTokenState.refreshInFlight) {
          await copilotTokenState.refreshInFlight;
          return;
        }
        const { resolveCopilotApiToken } = await import("../../providers/github-copilot-token.js");
        copilotTokenState.refreshInFlight = (async () => {
          const githubToken = copilotTokenState.githubToken.trim();
          if (!githubToken) {
            throw new Error("Copilot refresh requires a GitHub token.");
          }
          log.debug(`Refreshing GitHub Copilot token (${reason})...`);
          const copilotToken = await resolveCopilotApiToken({
            githubToken,
          });
          authStorage.setRuntimeApiKey(model.provider, copilotToken.token);
          copilotTokenState.expiresAt = copilotToken.expiresAt;
          const remaining = copilotToken.expiresAt - Date.now();
          log.debug(
            `Copilot token refreshed; expires in ${Math.max(0, Math.floor(remaining / 1000))}s.`,
          );
        })()
          .catch((err) => {
            log.warn(`Copilot token refresh failed: ${describeUnknownError(err)}`);
            throw err;
          })
          .finally(() => {
            copilotTokenState.refreshInFlight = undefined;
          });
        await copilotTokenState.refreshInFlight;
      };

      const scheduleCopilotRefresh = (): void => {
        if (!copilotTokenState || copilotRefreshCancelled) {
          return;
        }
        if (!hasCopilotGithubToken()) {
          log.warn("Skipping Copilot refresh scheduling; GitHub token missing.");
          return;
        }
        clearCopilotRefreshTimer();
        const now = Date.now();
        const refreshAt = copilotTokenState.expiresAt - COPILOT_REFRESH_MARGIN_MS;
        const delayMs = Math.max(COPILOT_REFRESH_MIN_DELAY_MS, refreshAt - now);
        const timer = setTimeout(() => {
          if (copilotRefreshCancelled) {
            return;
          }
          refreshCopilotToken("scheduled")
            .then(() => scheduleCopilotRefresh())
            .catch(() => {
              if (copilotRefreshCancelled) {
                return;
              }
              const retryTimer = setTimeout(() => {
                if (copilotRefreshCancelled) {
                  return;
                }
                refreshCopilotToken("scheduled-retry")
                  .then(() => scheduleCopilotRefresh())
                  .catch(() => undefined);
              }, COPILOT_REFRESH_RETRY_MS);
              copilotTokenState.refreshTimer = retryTimer;
              if (copilotRefreshCancelled) {
                clearTimeout(retryTimer);
                copilotTokenState.refreshTimer = undefined;
              }
            });
        }, delayMs);
        copilotTokenState.refreshTimer = timer;
        if (copilotRefreshCancelled) {
          clearTimeout(timer);
          copilotTokenState.refreshTimer = undefined;
        }
      };

      const resolveAuthProfileFailoverReason = (params: {
        allInCooldown: boolean;
        message: string;
        profileIds?: Array<string | undefined>;
      }): FailoverReason => {
        if (params.allInCooldown) {
          const profileIds = (params.profileIds ?? profileCandidates).filter(
            (id): id is string => typeof id === "string" && id.length > 0,
          );
          return (
            resolveProfilesUnavailableReason({
              store: authStore,
              profileIds,
            }) ?? "rate_limit"
          );
        }
        const classified = classifyFailoverReason(params.message);
        return classified ?? "auth";
      };

      const throwAuthProfileFailover = (params: {
        allInCooldown: boolean;
        message?: string;
        error?: unknown;
      }): never => {
        const fallbackMessage = `No available auth profile for ${provider} (all in cooldown or unavailable).`;
        const message =
          params.message?.trim() ||
          (params.error ? describeUnknownError(params.error).trim() : "") ||
          fallbackMessage;
        const reason = resolveAuthProfileFailoverReason({
          allInCooldown: params.allInCooldown,
          message,
          profileIds: profileCandidates,
        });
        if (fallbackConfigured) {
          throw new FailoverError(message, {
            reason,
            provider,
            model: modelId,
            status: resolveFailoverStatus(reason),
            cause: params.error,
          });
        }
        if (params.error instanceof Error) {
          throw params.error;
        }
        throw new Error(message);
      };

      const resolveApiKeyForCandidate = async (candidate?: string) => {
        return getApiKeyForModel({
          model,
          cfg: params.config,
          profileId: candidate,
          store: authStore,
          agentDir,
        });
      };

      const applyApiKeyInfo = async (candidate?: string): Promise<void> => {
        apiKeyInfo = await resolveApiKeyForCandidate(candidate);
        const resolvedProfileId = apiKeyInfo.profileId ?? candidate;
        if (!apiKeyInfo.apiKey) {
          if (apiKeyInfo.mode !== "aws-sdk") {
            throw new Error(
              `No API key resolved for provider "${model.provider}" (auth mode: ${apiKeyInfo.mode}).`,
            );
          }
          lastProfileId = resolvedProfileId;
          return;
        }
        if (model.provider === "github-copilot") {
          const { resolveCopilotApiToken } =
            await import("../../providers/github-copilot-token.js");
          const copilotToken = await resolveCopilotApiToken({
            githubToken: apiKeyInfo.apiKey,
          });
          authStorage.setRuntimeApiKey(model.provider, copilotToken.token);
          if (copilotTokenState) {
            copilotTokenState.githubToken = apiKeyInfo.apiKey;
            copilotTokenState.expiresAt = copilotToken.expiresAt;
            scheduleCopilotRefresh();
          }
        } else {
          authStorage.setRuntimeApiKey(model.provider, apiKeyInfo.apiKey);
        }
        lastProfileId = apiKeyInfo.profileId;
      };

      const advanceAuthProfile = async (): Promise<boolean> => {
        if (lockedProfileId) {
          return false;
        }
        let nextIndex = profileIndex + 1;
        while (nextIndex < profileCandidates.length) {
          const candidate = profileCandidates[nextIndex];
          if (candidate && isProfileInCooldown(authStore, candidate)) {
            nextIndex += 1;
            continue;
          }
          try {
            await applyApiKeyInfo(candidate);
            profileIndex = nextIndex;
            thinkLevel = initialThinkLevel;
            attemptedThinking.clear();
            return true;
          } catch (err) {
            if (candidate && candidate === lockedProfileId) {
              throw err;
            }
            nextIndex += 1;
          }
        }
        return false;
      };

      try {
        const autoProfileCandidates = profileCandidates.filter(
          (candidate): candidate is string =>
            typeof candidate === "string" && candidate.length > 0 && candidate !== lockedProfileId,
        );
        const allAutoProfilesInCooldown =
          autoProfileCandidates.length > 0 &&
          autoProfileCandidates.every((candidate) => isProfileInCooldown(authStore, candidate));
        const unavailableReason = allAutoProfilesInCooldown
          ? (resolveProfilesUnavailableReason({
              store: authStore,
              profileIds: autoProfileCandidates,
            }) ?? "rate_limit")
          : null;
        const allowTransientCooldownProbe =
          params.allowTransientCooldownProbe === true &&
          allAutoProfilesInCooldown &&
          (unavailableReason === "rate_limit" ||
            unavailableReason === "overloaded" ||
            unavailableReason === "billing");
        let didTransientCooldownProbe = false;

        while (profileIndex < profileCandidates.length) {
          const candidate = profileCandidates[profileIndex];
          const inCooldown =
            candidate && candidate !== lockedProfileId && isProfileInCooldown(authStore, candidate);
          if (inCooldown) {
            if (allowTransientCooldownProbe && !didTransientCooldownProbe) {
              didTransientCooldownProbe = true;
              log.warn(
                `probing cooldowned auth profile for ${provider}/${modelId} due to ${unavailableReason ?? "transient"} unavailability`,
              );
            } else {
              profileIndex += 1;
              continue;
            }
          }
          await applyApiKeyInfo(profileCandidates[profileIndex]);
          break;
        }
        if (profileIndex >= profileCandidates.length) {
          throwAuthProfileFailover({ allInCooldown: true });
        }
      } catch (err) {
        if (err instanceof FailoverError) {
          throw err;
        }
        if (profileCandidates[profileIndex] === lockedProfileId) {
          throwAuthProfileFailover({ allInCooldown: false, error: err });
        }
        const advanced = await advanceAuthProfile();
        if (!advanced) {
          throwAuthProfileFailover({ allInCooldown: false, error: err });
        }
      }

      const maybeRefreshCopilotForAuthError = async (
        errorText: string,
        retried: boolean,
      ): Promise<boolean> => {
        if (!copilotTokenState || retried) {
          return false;
        }
        if (!isFailoverErrorMessage(errorText)) {
          return false;
        }
        if (classifyFailoverReason(errorText) !== "auth") {
          return false;
        }
        try {
          await refreshCopilotToken("auth-error");
          scheduleCopilotRefresh();
          return true;
        } catch {
          return false;
        }
      };

      const MAX_OVERFLOW_COMPACTION_ATTEMPTS = 3;
      const MAX_RUN_LOOP_ITERATIONS = resolveMaxRunRetryIterations(profileCandidates.length);
      let overflowCompactionAttempts = 0;
      let toolResultTruncationAttempted = false;
      let bootstrapPromptWarningSignaturesSeen =
        params.bootstrapPromptWarningSignaturesSeen ??
        (params.bootstrapPromptWarningSignature ? [params.bootstrapPromptWarningSignature] : []);
      const usageAccumulator = createUsageAccumulator();
      let lastRunPromptUsage: ReturnType<typeof normalizeUsage> | undefined;
      let autoCompactionCount = 0;
      let runLoopIterations = 0;
      let overloadFailoverAttempts = 0;
      const maybeMarkAuthProfileFailure = async (failure: {
        profileId?: string;
        reason?: AuthProfileFailureReason | null;
        config?: RunEmbeddedPiAgentParams["config"];
        agentDir?: RunEmbeddedPiAgentParams["agentDir"];
      }) => {
        const { profileId, reason } = failure;
        if (!profileId || !reason || reason === "timeout") {
          return;
        }
        await markAuthProfileFailure({
          store: authStore,
          profileId,
          reason,
          cfg: params.config,
          agentDir,
          runId: params.runId,
        });
      };
      const resolveAuthProfileFailureReason = (
        failoverReason: FailoverReason | null,
      ): AuthProfileFailureReason | null => {
        // Timeouts are transport/model-path failures, not auth health signals,
        // so they should not persist auth-profile failure state.
        if (!failoverReason || failoverReason === "timeout") {
          return null;
        }
        return failoverReason;
      };
      const maybeBackoffBeforeOverloadFailover = async (reason: FailoverReason | null) => {
        if (reason !== "overloaded") {
          return;
        }
        overloadFailoverAttempts += 1;
        const delayMs = computeBackoff(OVERLOAD_FAILOVER_BACKOFF_POLICY, overloadFailoverAttempts);
        log.warn(
          `overload backoff before failover for ${provider}/${modelId}: attempt=${overloadFailoverAttempts} delayMs=${delayMs}`,
        );
        try {
          await sleepWithAbort(delayMs, params.abortSignal);
        } catch (err) {
          if (params.abortSignal?.aborted) {
            const abortErr = new Error("Operation aborted", { cause: err });
            abortErr.name = "AbortError";
            throw abortErr;
          }
          throw err;
        }
      };
      // Resolve the context engine once and reuse across retries to avoid
      // repeated initialization/connection overhead per attempt.
      ensureContextEnginesInitialized();
      const contextEngine = await resolveContextEngine(params.config);
      try {
        let authRetryPending = false;
        // Hoisted so the retry-limit error path can use the most recent API total.
        let lastTurnTotal: number | undefined;
        while (true) {
          if (runLoopIterations >= MAX_RUN_LOOP_ITERATIONS) {
            const message =
              `Exceeded retry limit after ${runLoopIterations} attempts ` +
              `(max=${MAX_RUN_LOOP_ITERATIONS}).`;
            log.error(
              `[run-retry-limit] sessionKey=${params.sessionKey ?? params.sessionId} ` +
                `provider=${provider}/${modelId} attempts=${runLoopIterations} ` +
                `maxAttempts=${MAX_RUN_LOOP_ITERATIONS}`,
            );
            return {
              payloads: [
                {
                  text:
                    "Request failed after repeated internal retries. " +
                    "Please try again, or use /new to start a fresh session.",
                  isError: true,
                },
              ],
              meta: {
                durationMs: Date.now() - started,
                agentMeta: buildErrorAgentMeta({
                  sessionId: params.sessionId,
                  provider,
                  model: model.id,
                  usageAccumulator,
                  lastRunPromptUsage,
                  lastTurnTotal,
                }),
                error: { kind: "retry_limit", message },
              },
            };
          }
          runLoopIterations += 1;
          const copilotAuthRetry = authRetryPending;
          authRetryPending = false;
          attemptedThinking.add(thinkLevel);
          await fs.mkdir(resolvedWorkspace, { recursive: true });

          const prompt =
            provider === "anthropic" ? scrubAnthropicRefusalMagic(params.prompt) : params.prompt;

          const attempt = await runEmbeddedAttempt({
            sessionId: params.sessionId,
            sessionKey: params.sessionKey,
            trigger: params.trigger,
            messageChannel: params.messageChannel,
            messageProvider: params.messageProvider,
            agentAccountId: params.agentAccountId,
            messageTo: params.messageTo,
            messageThreadId: params.messageThreadId,
            groupId: params.groupId,
            groupChannel: params.groupChannel,
            groupSpace: params.groupSpace,
            spawnedBy: params.spawnedBy,
            senderId: params.senderId,
            senderName: params.senderName,
            senderUsername: params.senderUsername,
            senderE164: params.senderE164,
            senderIsOwner: params.senderIsOwner,
            currentChannelId: params.currentChannelId,
            currentThreadTs: params.currentThreadTs,
            currentMessageId: params.currentMessageId,
            replyToMode: params.replyToMode,
            hasRepliedRef: params.hasRepliedRef,
            sessionFile: params.sessionFile,
            workspaceDir: resolvedWorkspace,
            agentDir,
            config: params.config,
            contextEngine,
            contextTokenBudget: ctxInfo.tokens,
            skillsSnapshot: params.skillsSnapshot,
            prompt,
            images: params.images,
            disableTools: params.disableTools,
            provider,
            modelId,
            model: effectiveModel,
            authProfileId: lastProfileId,
            authProfileIdSource: lockedProfileId ? "user" : "auto",
            authStorage,
            modelRegistry,
            agentId: workspaceResolution.agentId,
            legacyBeforeAgentStartResult,
            thinkLevel,
            verboseLevel: params.verboseLevel,
            reasoningLevel: params.reasoningLevel,
            toolResultFormat: resolvedToolResultFormat,
            execOverrides: params.execOverrides,
            bashElevated: params.bashElevated,
            timeoutMs: params.timeoutMs,
            runId: params.runId,
            abortSignal: params.abortSignal,
            shouldEmitToolResult: params.shouldEmitToolResult,
            shouldEmitToolOutput: params.shouldEmitToolOutput,
            onPartialReply: params.onPartialReply,
            onAssistantMessageStart: params.onAssistantMessageStart,
            onBlockReply: params.onBlockReply,
            onBlockReplyFlush: params.onBlockReplyFlush,
            blockReplyBreak: params.blockReplyBreak,
            blockReplyChunking: params.blockReplyChunking,
            onReasoningStream: params.onReasoningStream,
            onReasoningEnd: params.onReasoningEnd,
            onToolResult: params.onToolResult,
            onAgentEvent: params.onAgentEvent,
            extraSystemPrompt: params.extraSystemPrompt,
            inputProvenance: params.inputProvenance,
            streamParams: params.streamParams,
            ownerNumbers: params.ownerNumbers,
            enforceFinalTag: params.enforceFinalTag,
            bootstrapPromptWarningSignaturesSeen,
            bootstrapPromptWarningSignature:
              bootstrapPromptWarningSignaturesSeen[bootstrapPromptWarningSignaturesSeen.length - 1],
          });

          const {
            aborted,
            promptError,
            timedOut,
            timedOutDuringCompaction,
            sessionIdUsed,
            lastAssistant,
          } = attempt;
          bootstrapPromptWarningSignaturesSeen =
            attempt.bootstrapPromptWarningSignaturesSeen ??
            (attempt.bootstrapPromptWarningSignature
              ? Array.from(
                  new Set([
                    ...bootstrapPromptWarningSignaturesSeen,
                    attempt.bootstrapPromptWarningSignature,
                  ]),
                )
              : bootstrapPromptWarningSignaturesSeen);
          const lastAssistantUsage = normalizeUsage(lastAssistant?.usage as UsageLike);
          const attemptUsage = attempt.attemptUsage ?? lastAssistantUsage;
          mergeUsageIntoAccumulator(usageAccumulator, attemptUsage);
          // Keep prompt size from the latest model call so session totalTokens
          // reflects current context usage, not accumulated tool-loop usage.
          lastRunPromptUsage = lastAssistantUsage ?? attemptUsage;
          lastTurnTotal = lastAssistantUsage?.total ?? attemptUsage?.total;
          const attemptCompactionCount = Math.max(0, attempt.compactionCount ?? 0);
          autoCompactionCount += attemptCompactionCount;
          const activeErrorContext = resolveActiveErrorContext({
            lastAssistant,
            provider,
            model: modelId,
          });
          const formattedAssistantErrorText = lastAssistant
            ? formatAssistantErrorText(lastAssistant, {
                cfg: params.config,
                sessionKey: params.sessionKey ?? params.sessionId,
                provider: activeErrorContext.provider,
                model: activeErrorContext.model,
              })
            : undefined;
          const assistantErrorText =
            lastAssistant?.stopReason === "error"
              ? lastAssistant.errorMessage?.trim() || formattedAssistantErrorText
              : undefined;

          const contextOverflowError = !aborted
            ? (() => {
                if (promptError) {
                  const errorText = describeUnknownError(promptError);
                  if (isLikelyContextOverflowError(errorText)) {
                    return { text: errorText, source: "promptError" as const };
                  }
                  // Prompt submission failed with a non-overflow error. Do not
                  // inspect prior assistant errors from history for this attempt.
                  return null;
                }
                if (assistantErrorText && isLikelyContextOverflowError(assistantErrorText)) {
                  return { text: assistantErrorText, source: "assistantError" as const };
                }
                return null;
              })()
            : null;

          if (contextOverflowError) {
            const overflowDiagId = createCompactionDiagId();
            const errorText = contextOverflowError.text;
            const msgCount = attempt.messagesSnapshot?.length ?? 0;
            log.warn(
              `[context-overflow-diag] sessionKey=${params.sessionKey ?? params.sessionId} ` +
                `provider=${provider}/${modelId} source=${contextOverflowError.source} ` +
                `messages=${msgCount} sessionFile=${params.sessionFile} ` +
                `diagId=${overflowDiagId} compactionAttempts=${overflowCompactionAttempts} ` +
                `error=${errorText.slice(0, 200)}`,
            );
            const isCompactionFailure = isCompactionFailureError(errorText);
            const hadAttemptLevelCompaction = attemptCompactionCount > 0;
            // If this attempt already compacted (SDK auto-compaction), avoid immediately
            // running another explicit compaction for the same overflow trigger.
            if (
              !isCompactionFailure &&
              hadAttemptLevelCompaction &&
              overflowCompactionAttempts < MAX_OVERFLOW_COMPACTION_ATTEMPTS
            ) {
              overflowCompactionAttempts++;
              log.warn(
                `context overflow persisted after in-attempt compaction (attempt ${overflowCompactionAttempts}/${MAX_OVERFLOW_COMPACTION_ATTEMPTS}); retrying prompt without additional compaction for ${provider}/${modelId}`,
              );
              continue;
            }
            // Attempt explicit overflow compaction only when this attempt did not
            // already auto-compact.
            if (
              !isCompactionFailure &&
              !hadAttemptLevelCompaction &&
              overflowCompactionAttempts < MAX_OVERFLOW_COMPACTION_ATTEMPTS
            ) {
              if (log.isEnabled("debug")) {
                log.debug(
                  `[compaction-diag] decision diagId=${overflowDiagId} branch=compact ` +
                    `isCompactionFailure=${isCompactionFailure} hasOversizedToolResults=unknown ` +
                    `attempt=${overflowCompactionAttempts + 1} maxAttempts=${MAX_OVERFLOW_COMPACTION_ATTEMPTS}`,
                );
              }
              overflowCompactionAttempts++;
              log.warn(
                `context overflow detected (attempt ${overflowCompactionAttempts}/${MAX_OVERFLOW_COMPACTION_ATTEMPTS}); attempting auto-compaction for ${provider}/${modelId}`,
              );
              const compactResult = await contextEngine.compact({
                sessionId: params.sessionId,
                sessionFile: params.sessionFile,
                tokenBudget: ctxInfo.tokens,
                force: true,
                compactionTarget: "budget",
                runtimeContext: {
                  sessionKey: params.sessionKey,
                  messageChannel: params.messageChannel,
                  messageProvider: params.messageProvider,
                  agentAccountId: params.agentAccountId,
                  authProfileId: lastProfileId,
                  workspaceDir: resolvedWorkspace,
                  agentDir,
                  config: params.config,
                  skillsSnapshot: params.skillsSnapshot,
                  senderIsOwner: params.senderIsOwner,
                  provider,
                  model: modelId,
                  runId: params.runId,
                  thinkLevel,
                  reasoningLevel: params.reasoningLevel,
                  bashElevated: params.bashElevated,
                  extraSystemPrompt: params.extraSystemPrompt,
                  ownerNumbers: params.ownerNumbers,
                  trigger: "overflow",
                  diagId: overflowDiagId,
                  attempt: overflowCompactionAttempts,
                  maxAttempts: MAX_OVERFLOW_COMPACTION_ATTEMPTS,
                },
              });
              if (compactResult.compacted) {
                autoCompactionCount += 1;
                log.info(`auto-compaction succeeded for ${provider}/${modelId}; retrying prompt`);
                continue;
              }
              log.warn(
                `auto-compaction failed for ${provider}/${modelId}: ${compactResult.reason ?? "nothing to compact"}`,
              );
            }
            // Fallback: try truncating oversized tool results in the session.
            // This handles the case where a single tool result exceeds the
            // context window and compaction cannot reduce it further.
            if (!toolResultTruncationAttempted) {
              const contextWindowTokens = ctxInfo.tokens;
              const hasOversized = attempt.messagesSnapshot
                ? sessionLikelyHasOversizedToolResults({
                    messages: attempt.messagesSnapshot,
                    contextWindowTokens,
                  })
                : false;

              if (hasOversized) {
                if (log.isEnabled("debug")) {
                  log.debug(
                    `[compaction-diag] decision diagId=${overflowDiagId} branch=truncate_tool_results ` +
                      `isCompactionFailure=${isCompactionFailure} hasOversizedToolResults=${hasOversized} ` +
                      `attempt=${overflowCompactionAttempts} maxAttempts=${MAX_OVERFLOW_COMPACTION_ATTEMPTS}`,
                  );
                }
                toolResultTruncationAttempted = true;
                log.warn(
                  `[context-overflow-recovery] Attempting tool result truncation for ${provider}/${modelId} ` +
                    `(contextWindow=${contextWindowTokens} tokens)`,
                );
                const truncResult = await truncateOversizedToolResultsInSession({
                  sessionFile: params.sessionFile,
                  contextWindowTokens,
                  sessionId: params.sessionId,
                  sessionKey: params.sessionKey,
                });
                if (truncResult.truncated) {
                  log.info(
                    `[context-overflow-recovery] Truncated ${truncResult.truncatedCount} tool result(s); retrying prompt`,
                  );
                  // Do NOT reset overflowCompactionAttempts here — the global cap must remain
                  // enforced across all iterations to prevent unbounded compaction cycles (OC-65).
                  continue;
                }
                log.warn(
                  `[context-overflow-recovery] Tool result truncation did not help: ${truncResult.reason ?? "unknown"}`,
                );
              } else if (log.isEnabled("debug")) {
                log.debug(
                  `[compaction-diag] decision diagId=${overflowDiagId} branch=give_up ` +
                    `isCompactionFailure=${isCompactionFailure} hasOversizedToolResults=${hasOversized} ` +
                    `attempt=${overflowCompactionAttempts} maxAttempts=${MAX_OVERFLOW_COMPACTION_ATTEMPTS}`,
                );
              }
            }
            if (
              (isCompactionFailure ||
                overflowCompactionAttempts >= MAX_OVERFLOW_COMPACTION_ATTEMPTS ||
                toolResultTruncationAttempted) &&
              log.isEnabled("debug")
            ) {
              log.debug(
                `[compaction-diag] decision diagId=${overflowDiagId} branch=give_up ` +
                  `isCompactionFailure=${isCompactionFailure} hasOversizedToolResults=unknown ` +
                  `attempt=${overflowCompactionAttempts} maxAttempts=${MAX_OVERFLOW_COMPACTION_ATTEMPTS}`,
              );
            }
            const kind = isCompactionFailure ? "compaction_failure" : "context_overflow";
            return {
              payloads: [
                {
                  text:
                    "Context overflow: prompt too large for the model. " +
                    "Try /reset (or /new) to start a fresh session, or use a larger-context model.",
                  isError: true,
                },
              ],
              meta: {
                durationMs: Date.now() - started,
                agentMeta: buildErrorAgentMeta({
                  sessionId: sessionIdUsed,
                  provider,
                  model: model.id,
                  usageAccumulator,
                  lastRunPromptUsage,
                  lastAssistant,
                  lastTurnTotal,
                }),
                systemPromptReport: attempt.systemPromptReport,
                error: { kind, message: errorText },
              },
            };
          }

          if (promptError && !aborted) {
            const errorText = describeUnknownError(promptError);
            if (await maybeRefreshCopilotForAuthError(errorText, copilotAuthRetry)) {
              authRetryPending = true;
              continue;
            }
            // Handle role ordering errors with a user-friendly message
            if (/incorrect role information|roles must alternate/i.test(errorText)) {
              return {
                payloads: [
                  {
                    text:
                      "Message ordering conflict - please try again. " +
                      "If this persists, use /new to start a fresh session.",
                    isError: true,
                  },
                ],
                meta: {
                  durationMs: Date.now() - started,
                  agentMeta: buildErrorAgentMeta({
                    sessionId: sessionIdUsed,
                    provider,
                    model: model.id,
                    usageAccumulator,
                    lastRunPromptUsage,
                    lastAssistant,
                    lastTurnTotal,
                  }),
                  systemPromptReport: attempt.systemPromptReport,
                  error: { kind: "role_ordering", message: errorText },
                },
              };
            }
            // Handle image size errors with a user-friendly message (no retry needed)
            const imageSizeError = parseImageSizeError(errorText);
            if (imageSizeError) {
              const maxMb = imageSizeError.maxMb;
              const maxMbLabel =
                typeof maxMb === "number" && Number.isFinite(maxMb) ? `${maxMb}` : null;
              const maxBytesHint = maxMbLabel ? ` (max ${maxMbLabel}MB)` : "";
              return {
                payloads: [
                  {
                    text:
                      `Image too large for the model${maxBytesHint}. ` +
                      "Please compress or resize the image and try again.",
                    isError: true,
                  },
                ],
                meta: {
                  durationMs: Date.now() - started,
                  agentMeta: buildErrorAgentMeta({
                    sessionId: sessionIdUsed,
                    provider,
                    model: model.id,
                    usageAccumulator,
                    lastRunPromptUsage,
                    lastAssistant,
                    lastTurnTotal,
                  }),
                  systemPromptReport: attempt.systemPromptReport,
                  error: { kind: "image_size", message: errorText },
                },
              };
            }
            const promptFailoverReason = classifyFailoverReason(errorText);
            const promptProfileFailureReason =
              resolveAuthProfileFailureReason(promptFailoverReason);
            await maybeMarkAuthProfileFailure({
              profileId: lastProfileId,
              reason: promptProfileFailureReason,
            });
            const promptFailoverFailure = isFailoverErrorMessage(errorText);
            // Capture the failing profile before auth-profile rotation mutates `lastProfileId`.
            const failedPromptProfileId = lastProfileId;
            const logPromptFailoverDecision = createFailoverDecisionLogger({
              stage: "prompt",
              runId: params.runId,
              rawError: errorText,
              failoverReason: promptFailoverReason,
              profileFailureReason: promptProfileFailureReason,
              provider,
              model: modelId,
              profileId: failedPromptProfileId,
              fallbackConfigured,
              aborted,
            });
            if (
              promptFailoverFailure &&
              promptFailoverReason !== "timeout" &&
              (await advanceAuthProfile())
            ) {
              logPromptFailoverDecision("rotate_profile");
              await maybeBackoffBeforeOverloadFailover(promptFailoverReason);
              continue;
            }
            const fallbackThinking = pickFallbackThinkingLevel({
              message: errorText,
              attempted: attemptedThinking,
            });
            if (fallbackThinking) {
              log.warn(
                `unsupported thinking level for ${provider}/${modelId}; retrying with ${fallbackThinking}`,
              );
              thinkLevel = fallbackThinking;
              continue;
            }
            // Throw FailoverError for prompt-side failover reasons when fallbacks
            // are configured so outer model fallback can continue on overload,
            // rate-limit, auth, or billing failures.
            if (fallbackConfigured && promptFailoverFailure) {
              const status = resolveFailoverStatus(promptFailoverReason ?? "unknown");
              logPromptFailoverDecision("fallback_model", { status });
              await maybeBackoffBeforeOverloadFailover(promptFailoverReason);
              throw new FailoverError(errorText, {
                reason: promptFailoverReason ?? "unknown",
                provider,
                model: modelId,
                profileId: lastProfileId,
                status,
              });
            }
            if (promptFailoverFailure || promptFailoverReason) {
              logPromptFailoverDecision("surface_error");
            }
            throw promptError;
          }

          const fallbackThinking = pickFallbackThinkingLevel({
            message: lastAssistant?.errorMessage,
            attempted: attemptedThinking,
          });
          if (fallbackThinking && !aborted) {
            log.warn(
              `unsupported thinking level for ${provider}/${modelId}; retrying with ${fallbackThinking}`,
            );
            thinkLevel = fallbackThinking;
            continue;
          }

          const authFailure = isAuthAssistantError(lastAssistant);
          const rateLimitFailure = isRateLimitAssistantError(lastAssistant);
          const billingFailure = isBillingAssistantError(lastAssistant);
          const failoverFailure = isFailoverAssistantError(lastAssistant);
          const assistantFailoverReason = classifyFailoverReason(lastAssistant?.errorMessage ?? "");
          const assistantProfileFailureReason =
            resolveAuthProfileFailureReason(assistantFailoverReason);
          const cloudCodeAssistFormatError = attempt.cloudCodeAssistFormatError;
          const imageDimensionError = parseImageDimensionError(lastAssistant?.errorMessage ?? "");
          // Capture the failing profile before auth-profile rotation mutates `lastProfileId`.
          const failedAssistantProfileId = lastProfileId;
          const logAssistantFailoverDecision = createFailoverDecisionLogger({
            stage: "assistant",
            runId: params.runId,
            rawError: lastAssistant?.errorMessage?.trim(),
            failoverReason: assistantFailoverReason,
            profileFailureReason: assistantProfileFailureReason,
            provider: activeErrorContext.provider,
            model: activeErrorContext.model,
            profileId: failedAssistantProfileId,
            fallbackConfigured,
            timedOut,
            aborted,
          });

          if (
            authFailure &&
            (await maybeRefreshCopilotForAuthError(
              lastAssistant?.errorMessage ?? "",
              copilotAuthRetry,
            ))
          ) {
            authRetryPending = true;
            continue;
          }
          if (imageDimensionError && lastProfileId) {
            const details = [
              imageDimensionError.messageIndex !== undefined
                ? `message=${imageDimensionError.messageIndex}`
                : null,
              imageDimensionError.contentIndex !== undefined
                ? `content=${imageDimensionError.contentIndex}`
                : null,
              imageDimensionError.maxDimensionPx !== undefined
                ? `limit=${imageDimensionError.maxDimensionPx}px`
                : null,
            ]
              .filter(Boolean)
              .join(" ");
            log.warn(
              `Profile ${lastProfileId} rejected image payload${details ? ` (${details})` : ""}.`,
            );
          }

          // Rotate on timeout to try another account/model path in this turn,
          // but exclude post-prompt compaction timeouts (model succeeded; no profile issue).
          const shouldRotate =
            (!aborted && failoverFailure) || (timedOut && !timedOutDuringCompaction);

          if (shouldRotate) {
            if (lastProfileId) {
              const reason = timedOut ? "timeout" : assistantProfileFailureReason;
              // Skip cooldown for timeouts: a timeout is model/network-specific,
              // not an auth issue. Marking the profile would poison fallback models
              // on the same provider (e.g. gpt-5.3 timeout blocks gpt-5.2).
              await maybeMarkAuthProfileFailure({
                profileId: lastProfileId,
                reason,
              });
              if (timedOut && !isProbeSession) {
                log.warn(`Profile ${lastProfileId} timed out. Trying next account...`);
              }
              if (cloudCodeAssistFormatError) {
                log.warn(
                  `Profile ${lastProfileId} hit Cloud Code Assist format error. Tool calls will be sanitized on retry.`,
                );
              }
            }

            const rotated = await advanceAuthProfile();
            if (rotated) {
              logAssistantFailoverDecision("rotate_profile");
              await maybeBackoffBeforeOverloadFailover(assistantFailoverReason);
              continue;
            }

            if (fallbackConfigured) {
              await maybeBackoffBeforeOverloadFailover(assistantFailoverReason);
              // Prefer formatted error message (user-friendly) over raw errorMessage
              const message =
                (lastAssistant
                  ? formatAssistantErrorText(lastAssistant, {
                      cfg: params.config,
                      sessionKey: params.sessionKey ?? params.sessionId,
                      provider: activeErrorContext.provider,
                      model: activeErrorContext.model,
                    })
                  : undefined) ||
                lastAssistant?.errorMessage?.trim() ||
                (timedOut
                  ? "LLM request timed out."
                  : rateLimitFailure
                    ? "LLM request rate limited."
                    : billingFailure
                      ? formatBillingErrorMessage(
                          activeErrorContext.provider,
                          activeErrorContext.model,
                        )
                      : authFailure
                        ? "LLM request unauthorized."
                        : "LLM request failed.");
              const status =
                resolveFailoverStatus(assistantFailoverReason ?? "unknown") ??
                (isTimeoutErrorMessage(message) ? 408 : undefined);
              logAssistantFailoverDecision("fallback_model", { status });
              throw new FailoverError(message, {
                reason: assistantFailoverReason ?? "unknown",
                provider: activeErrorContext.provider,
                model: activeErrorContext.model,
                profileId: lastProfileId,
                status,
              });
            }
            logAssistantFailoverDecision("surface_error");
          }

          const usage = toNormalizedUsage(usageAccumulator);
          if (usage && lastTurnTotal && lastTurnTotal > 0) {
            usage.total = lastTurnTotal;
          }
          // Extract the last individual API call's usage for context-window
          // utilization display. The accumulated `usage` sums input tokens
          // across all calls (tool-use loops, compaction retries), which
          // overstates the actual context size. `lastCallUsage` reflects only
          // the final call, giving an accurate snapshot of current context.
          const lastCallUsage = normalizeUsage(lastAssistant?.usage as UsageLike);
          const promptTokens = derivePromptTokens(lastRunPromptUsage);
          const agentMeta: EmbeddedPiAgentMeta = {
            sessionId: sessionIdUsed,
            provider: lastAssistant?.provider ?? provider,
            model: lastAssistant?.model ?? model.id,
            usage,
            lastCallUsage: lastCallUsage ?? undefined,
            promptTokens,
            compactionCount: autoCompactionCount > 0 ? autoCompactionCount : undefined,
          };

          const payloads = buildEmbeddedRunPayloads({
            assistantTexts: attempt.assistantTexts,
            toolMetas: attempt.toolMetas,
            lastAssistant: attempt.lastAssistant,
            lastToolError: attempt.lastToolError,
            config: params.config,
            sessionKey: params.sessionKey ?? params.sessionId,
            provider: activeErrorContext.provider,
            model: activeErrorContext.model,
            verboseLevel: params.verboseLevel,
            reasoningLevel: params.reasoningLevel,
            toolResultFormat: resolvedToolResultFormat,
            suppressToolErrorWarnings: params.suppressToolErrorWarnings,
            inlineToolResultsAllowed: false,
            didSendViaMessagingTool: attempt.didSendViaMessagingTool,
            didSendDeterministicApprovalPrompt: attempt.didSendDeterministicApprovalPrompt,
          });

          // Timeout aborts can leave the run without any assistant payloads.
          // Emit an explicit timeout error instead of silently completing, so
          // callers do not lose the turn as an orphaned user message.
          if (timedOut && !timedOutDuringCompaction && payloads.length === 0) {
            return {
              payloads: [
                {
                  text:
                    "Request timed out before a response was generated. " +
                    "Please try again, or increase `agents.defaults.timeoutSeconds` in your config.",
                  isError: true,
                },
              ],
              meta: {
                durationMs: Date.now() - started,
                agentMeta,
                aborted,
                systemPromptReport: attempt.systemPromptReport,
              },
              didSendViaMessagingTool: attempt.didSendViaMessagingTool,
              didSendDeterministicApprovalPrompt: attempt.didSendDeterministicApprovalPrompt,
              messagingToolSentTexts: attempt.messagingToolSentTexts,
              messagingToolSentMediaUrls: attempt.messagingToolSentMediaUrls,
              messagingToolSentTargets: attempt.messagingToolSentTargets,
              successfulCronAdds: attempt.successfulCronAdds,
            };
          }

          log.debug(
            `embedded run done: runId=${params.runId} sessionId=${params.sessionId} durationMs=${Date.now() - started} aborted=${aborted}`,
          );
          if (lastProfileId) {
            await markAuthProfileGood({
              store: authStore,
              provider,
              profileId: lastProfileId,
              agentDir: params.agentDir,
            });
            await markAuthProfileUsed({
              store: authStore,
              profileId: lastProfileId,
              agentDir: params.agentDir,
            });
          }
          return {
            payloads: payloads.length ? payloads : undefined,
            meta: {
              durationMs: Date.now() - started,
              agentMeta,
              aborted,
              systemPromptReport: attempt.systemPromptReport,
              // Handle client tool calls (OpenResponses hosted tools)
              // Propagate the LLM stop reason so callers (lifecycle events,
              // ACP bridge) can distinguish end_turn from max_tokens.
              stopReason: attempt.clientToolCall
                ? "tool_calls"
                : (lastAssistant?.stopReason as string | undefined),
              pendingToolCalls: attempt.clientToolCall
                ? [
                    {
                      id: randomBytes(5).toString("hex").slice(0, 9),
                      name: attempt.clientToolCall.name,
                      arguments: JSON.stringify(attempt.clientToolCall.params),
                    },
                  ]
                : undefined,
            },
            didSendViaMessagingTool: attempt.didSendViaMessagingTool,
            didSendDeterministicApprovalPrompt: attempt.didSendDeterministicApprovalPrompt,
            messagingToolSentTexts: attempt.messagingToolSentTexts,
            messagingToolSentMediaUrls: attempt.messagingToolSentMediaUrls,
            messagingToolSentTargets: attempt.messagingToolSentTargets,
            successfulCronAdds: attempt.successfulCronAdds,
          };
        }
      } finally {
        await contextEngine.dispose?.();
        stopCopilotRefreshTimer();
        process.chdir(prevCwd);
      }
    }),
  );
}
-												fix: sanitize tool call IDs in agent loop for Mistral strict9 format (#23595) (#23698)

* fix: sanitize tool call IDs in agent loop for Mistral strict9 format (#23595)

Mistral requires tool call IDs to be exactly 9 alphanumeric characters
([a-zA-Z0-9]{9}). The existing sanitizeToolCallIdsForCloudCodeAssist
mechanism only ran on historical messages at attempt start via
sanitizeSessionHistory, but the pi-agent-core agent loop's internal
tool call → tool result cycles bypassed that path entirely.

Changes:
- Wrap streamFn (like dropThinkingBlocks) so every outbound request
  sees sanitized tool call IDs when the transcript policy requires it
- Replace call_${Date.now()} in pendingToolCalls with a 9-char hex ID
  generated from crypto.randomBytes
- Add Mistral tool call ID error pattern to ERROR_PATTERNS.format so
  the error is correctly classified for retry/rotation

* Changelog: document Mistral strict9 tool-call ID fix

---------

Co-authored-by: echoVic <AkiraVic@outlook.com>
Co-authored-by: Vincent Koc <vincentkoc@ieee.org>
											
										
										
											2026-02-23 02:37:12 +08:00
+								import { randomBytes } from "node:crypto";
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								import fs from "node:fs/promises";
 								import type { ThinkLevel } from "../../auto-reply/thinking.js";
-												feature(context): extend plugin system to support custom context management (#22201)

* feat(context-engine): add ContextEngine interface and registry

Introduce the pluggable ContextEngine abstraction that allows external
plugins to register custom context management strategies.

- ContextEngine interface with lifecycle methods: bootstrap, ingest,
  ingestBatch, afterTurn, assemble, compact, prepareSubagentSpawn,
  onSubagentEnded, dispose
- Module-level singleton registry with registerContextEngine() and
  resolveContextEngine() (config-driven slot selection)
- LegacyContextEngine: pass-through implementation wrapping existing
  compaction behavior for 100% backward compatibility
- ensureContextEnginesInitialized() guard for safe one-time registration
- 19 tests covering contract, registry, resolution, and legacy parity

* feat(plugins): add context-engine slot and registerContextEngine API

Wire the ContextEngine abstraction into the plugin system so external
plugins can register context engines via the standard plugin API.

- Add 'context-engine' to PluginKind union type
- Add 'contextEngine' slot to PluginSlotsConfig (default: 'legacy')
- Wire registerContextEngine() through OpenClawPluginApi
- Export ContextEngine types from plugin-sdk for external consumers
- Restore proper slot-based resolution in registry

* feat(context-engine): wire ContextEngine into agent run lifecycle

Integrate the ContextEngine abstraction into the core agent run path:

- Resolve context engine once per run (reused across retries)
- Bootstrap: hydrate canonical store from session file on first run
- Assemble: route context assembly through pluggable engine
- Auto-compaction guard: disable built-in auto-compaction when
  the engine declares ownsCompaction (prevents double-compaction)
- AfterTurn: post-turn lifecycle hook for ingest + background
  compaction decisions
- Overflow compaction: route through contextEngine.compact()
- Dispose: clean up engine resources in finally block
- Notify context engine on subagent lifecycle events

Legacy engine: all lifecycle methods are pass-through/no-op, preserving
100% backward compatibility for users without a context engine plugin.

* feat(plugins): add scoped subagent methods and gateway request scope

Expose runtime.subagent.{run, waitForRun, getSession, deleteSession}
so external plugins can spawn sub-agent sessions without raw gateway
dispatch access.

Uses AsyncLocalStorage request-scope bridge to dispatch internally via
handleGatewayRequest with a synthetic operator client. Methods are only
available during gateway request handling.

- Symbol.for-backed global singleton for cross-module-reload safety
- Fallback gateway context for non-WS dispatch paths (Telegram/WhatsApp)
- Set gateway request scope for all handlers, not just plugin handlers
- 3 staleness tests for fallback context hardening

* feat(context-engine): route /compact and sessions.get through context engine

Wire the /compact command and sessions.get handler through the pluggable
ContextEngine interface.

- Thread tokenBudget and force parameters to context engine compact
- Route /compact through contextEngine.compact() when registered
- Wire sessions.get as runtime alias for plugin subagent dispatch
- Add .pebbles/ to .gitignore

* style: format with oxfmt 0.33.0

Fix duplicate import (ControlUiRootState in server.impl.ts) and
import ordering across all changed files.

* fix: update extension test mocks for context-engine types

Add missing subagent property to bluebubbles PluginRuntime mock.
Add missing registerContextEngine to lobster OpenClawPluginApi mock.

* fix(subagents): keep deferred delete cleanup retryable

* style: format run attempt for CI

* fix(rebase): remove duplicate embedded-run imports

* test: add missing gateway context mock export

* fix: pass resolved auth profile into afterTurn compaction

Ensure the embedded runner forwards resolved auth profile context into
legacy context-engine compaction params on the normal afterTurn path,
matching overflow compaction behavior. This allows downstream LCM
summarization to use the intended provider auth/profile consistently.

Also fix strict TS typing in external-link token dedupe and align an
attempt unit test reasoningLevel value with the current ReasoningLevel
enum.

Regeneration-Prompt: |
  We were debugging context-engine compaction where downstream summary
  calls were missing the right auth/profile context in normal afterTurn
  flow, while overflow compaction already propagated it. Preserve current
  behavior and keep changes additive: thread the resolved authProfileId
  through run -> attempt -> legacy compaction param builder without
  broad refactors.

  Add tests that prove the auth profile is included in afterTurn legacy
  params and that overflow compaction still passes it through run
  attempts. Keep existing APIs stable, and only adjust small type issues
  needed for strict compilation.

* fix: remove duplicate imports from rebase

* feat: add context-engine system prompt additions

* fix(rebase): dedupe attempt import declarations

* test: fix fetch mock typing in ollama autodiscovery

* fix(test): add registerContextEngine to diffs extension mock APIs

* test(windows): use path.delimiter in ios-team-id fixture PATH

* test(cron): add model formatting and precedence edge case tests

Covers:
- Provider/model string splitting (whitespace, nested paths, empty segments)
- Provider normalization (casing, aliases like bedrock→amazon-bedrock)
- Anthropic model alias normalization (opus-4.5→claude-opus-4-5)
- Precedence: job payload > session override > config default
- Sequential runs with different providers (CI flake regression pattern)
- forceNew session preserving stored model overrides
- Whitespace/empty model string edge cases
- Config model as string vs object format

* test(cron): fix model formatting test config types

* test(phone-control): add registerContextEngine to mock API

* fix: re-export ChannelKind from config-reload-plan

* fix: add subagent mock to plugin-runtime-mock test util

* docs: add changelog fragment for context engine PR #22201
											
										
										
											2026-03-06 05:31:59 -08:00
+								import {
 								  ensureContextEnginesInitialized,
 								  resolveContextEngine,
 								} from "../../context-engine/index.js";
-												fix(agents): handle overloaded failover separately (#38301)

* fix(agents): skip auth-profile failure on overload

* fix(agents): note overload auth-profile fallback fix

* fix(agents): classify overloaded failures separately

* fix(agents): back off before overload failover

* fix(agents): tighten overload probe and backoff state

* fix(agents): persist overloaded cooldown across runs

* fix(agents): tighten overloaded status handling

* test(agents): add overload regression coverage

* fix(agents): restore runner imports after rebase

* test(agents): add overload fallback integration coverage

* fix(agents): harden overloaded failover abort handling

* test(agents): tighten overload classifier coverage

* test(agents): cover all-overloaded fallback exhaustion

* fix(cron): retry overloaded fallback summaries

* fix(cron): treat HTTP 529 as overloaded retry
											
										
										
											2026-03-07 01:42:11 +03:00
+								import { computeBackoff, sleepWithAbort, type BackoffPolicy } from "../../infra/backoff.js";
-												refactor(security): unify secure id paths and guard weak patterns

											
										
										
											2026-02-22 10:14:55 +01:00
+								import { generateSecureToken } from "../../infra/secure-random.js";
-												feat(plugins): add modelOverride/providerOverride to before_agent_start hook

Enable plugins to override the model and provider for agent runs by
returning modelOverride/providerOverride from the before_agent_start
hook. The hook is now invoked early in run.ts (before resolveModel)
so overrides take effect. The result is passed to attempt.ts via
earlyHookResult to prevent double-firing.

This enables security-critical use cases like routing PII-containing
prompts to local models instead of cloud providers.

											
										
										
											2026-02-15 12:05:29 -05:00
+								import { getGlobalHookRunner } from "../../plugins/hook-runner-global.js";
-												Plugins/Hooks: avoid duplicate before_agent_start executions

											
										
										
											2026-02-21 22:31:51 -08:00
+								import type { PluginHookBeforeAgentStartResult } from "../../plugins/types.js";
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								import { enqueueCommandInLane } from "../../process/command-queue.js";
-												fix: format verbose tool output by channel

											
										
										
											2026-01-17 10:17:57 +00:00
+								import { isMarkdownCapableMessageChannel } from "../../utils/message-channel.js";
-												refactor: rename to openclaw

											
										
										
											2026-01-30 03:15:10 +01:00
+								import { resolveOpenClawAgentDir } from "../agent-paths.js";
-												refactor(agents): centralize model fallback resolution

											
										
										
											2026-02-25 04:32:25 +00:00
+								import { hasConfiguredModelFallbacks } from "../agent-scope.js";
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								import {
-												fix(auth): skip auth profiles in cooldown during selection and rotation

Auth profiles in cooldown (due to rate limiting) were being attempted,
causing unnecessary retries and delays. This fix ensures:

1. Initial profile selection skips profiles in cooldown
2. Profile rotation (after failures) skips cooldown profiles
3. Clear error message when all profiles are unavailable

Tests added:
- Skips profiles in cooldown during initial selection
- Skips profiles in cooldown when rotating after failure

Fixes #1316

											
										
										
											2026-01-22 10:04:56 +01:00
+								  isProfileInCooldown,
-												fix(agents): handle overloaded failover separately (#38301)

* fix(agents): skip auth-profile failure on overload

* fix(agents): note overload auth-profile fallback fix

* fix(agents): classify overloaded failures separately

* fix(agents): back off before overload failover

* fix(agents): tighten overload probe and backoff state

* fix(agents): persist overloaded cooldown across runs

* fix(agents): tighten overloaded status handling

* test(agents): add overload regression coverage

* fix(agents): restore runner imports after rebase

* test(agents): add overload fallback integration coverage

* fix(agents): harden overloaded failover abort handling

* test(agents): tighten overload classifier coverage

* test(agents): cover all-overloaded fallback exhaustion

* fix(cron): retry overloaded fallback summaries

* fix(cron): treat HTTP 529 as overloaded retry
											
										
										
											2026-03-07 01:42:11 +03:00
+								  type AuthProfileFailureReason,
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								  markAuthProfileFailure,
 								  markAuthProfileGood,
 								  markAuthProfileUsed,
-												Agents: infer auth-profile unavailable failover reason

											
										
										
											2026-02-22 16:10:24 -08:00
+								  resolveProfilesUnavailableReason,
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								} from "../auth-profiles.js";
 								import {
 								  CONTEXT_WINDOW_HARD_MIN_TOKENS,
 								  CONTEXT_WINDOW_WARN_BELOW_TOKENS,
 								  evaluateContextWindowGuard,
 								  resolveContextWindowInfo,
 								} from "../context-window-guard.js";
-												chore: migrate to oxlint and oxfmt

Co-authored-by: Christoph Nakazawa <christoph.pojer@gmail.com>

											
										
										
											2026-01-14 14:31:43 +00:00
+								import { DEFAULT_CONTEXT_TOKENS, DEFAULT_MODEL, DEFAULT_PROVIDER } from "../defaults.js";
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								import { FailoverError, resolveFailoverStatus } from "../failover-error.js";
 								import {
 								  ensureAuthProfileStore,
 								  getApiKeyForModel,
 								  resolveAuthProfileOrder,
-												refactor: add aws-sdk auth mode and tighten provider auth

											
										
										
											2026-01-20 07:53:25 +00:00
+								  type ResolvedProviderAuth,
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								} from "../model-auth.js";
-												fix: normalize model override auth handling

											
										
										
											2026-01-21 06:00:16 +00:00
+								import { normalizeProviderId } from "../model-selection.js";
-												refactor: rename to openclaw

											
										
										
											2026-01-30 03:15:10 +01:00
+								import { ensureOpenClawModelsJson } from "../models-config.js";
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								import {
-												fix(providers): include provider name in billing error messages (#14697)

Merged via /review-pr -> /prepare-pr -> /merge-pr.

Prepared head SHA: 774e0b660514d59fea48bda0e300e94b398f58e8
Co-authored-by: fagemx <117356295+fagemx@users.noreply.github.com>
Co-authored-by: shakkernerd <165377636+shakkernerd@users.noreply.github.com>
Reviewed-by: @shakkernerd

											
										
										
											2026-02-13 02:23:27 +08:00
+								  formatBillingErrorMessage,
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								  classifyFailoverReason,
 								  formatAssistantErrorText,
 								  isAuthAssistantError,
-												fix(errors): show clear billing error instead of cryptic API response (#8391)

* fix(errors): return clear billing error message instead of cryptic raw error (#8136)

When an LLM API provider returns a credit/billing-related error (HTTP 402,
insufficient credits, low balance, etc.), OpenClaw now shows a clear,
actionable message instead of passing through the raw/cryptic error text:

  ⚠️ API provider returned a billing error — your API key has run out of
  credits or has an insufficient balance. Check your provider's billing
  dashboard and top up or switch to a different API key.

Changes:
- formatAssistantErrorText: detect billing errors via isBillingErrorMessage()
  and return a user-friendly message (placed before the generic HTTP/JSON
  error fallthrough)
- sanitizeUserFacingText: same billing detection for the sanitization path
- pi-embedded-runner/run.ts: add billingFailure detection in the profile
  exhaustion fallback, so the FailoverError message is billing-specific
- Added 3 new tests for credit balance, HTTP 402, and insufficient credits

* fix: extract billing error message to shared constant
											
										
										
											2026-02-05 17:58:43 -04:00
+								  isBillingAssistantError,
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								  isCompactionFailureError,
-												fix(agents): stabilize overflow compaction retries and session context accounting (openclaw#14102) thanks @vpesh

Verified:
- CI checks for commit 86a7ecb45ebf0be61dce9261398000524fd9fab6
- Rebase conflict resolution for compatibility with latest main

Co-authored-by: vpesh <9496634+vpesh@users.noreply.github.com>

											
										
										
											2026-02-13 00:53:13 +01:00
+								  isLikelyContextOverflowError,
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								  isFailoverAssistantError,
 								  isFailoverErrorMessage,
-												fix: avoid global image size regression

											
										
										
											2026-01-27 15:59:11 -06:00
+								  parseImageSizeError,
-												fix: sanitize oversized image payloads

											
										
										
											2026-01-18 15:19:25 +00:00
+								  parseImageDimensionError,
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								  isRateLimitAssistantError,
 								  isTimeoutErrorMessage,
 								  pickFallbackThinkingLevel,
-												fix: trigger fallback on auth profile exhaustion

											
										
										
											2026-01-24 06:14:17 +00:00
+								  type FailoverReason,
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								} from "../pi-embedded-helpers.js";
-												fix(agents): bootstrap runtime plugins before context-engine resolution

											
										
										
											2026-03-08 23:38:14 +00:00
+								import { ensureRuntimePluginsLoaded } from "../runtime-plugins.js";
-												fix(agents): stabilize overflow compaction retries and session context accounting (openclaw#14102) thanks @vpesh

Verified:
- CI checks for commit 86a7ecb45ebf0be61dce9261398000524fd9fab6
- Rebase conflict resolution for compatibility with latest main

Co-authored-by: vpesh <9496634+vpesh@users.noreply.github.com>

											
										
										
											2026-02-13 00:53:13 +01:00
+								import { derivePromptTokens, normalizeUsage, type UsageLike } from "../usage.js";
-												fix: guard resolveUserPath against undefined input (#10176)

* fix: guard resolveUserPath against undefined input

When subagent spawner omits workspaceDir, resolveUserPath receives
undefined and crashes on .trim().  Add a falsy guard that falls back
to process.cwd(), matching the behavior callers already expect.

Closes #10089

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix: harden runner workspace fallback (#10176) (thanks @Yida-Dev)

* fix: harden workspace fallback scoping (#10176) (thanks @Yida-Dev)

* refactor: centralize workspace fallback classification and redaction (#10176) (thanks @Yida-Dev)

* test: remove explicit any from utils mock (#10176) (thanks @Yida-Dev)

* security: reject malformed agent session keys for workspace resolution (#10176) (thanks @Yida-Dev)

---------

Co-authored-by: Yida-Dev <reyifeijun@gmail.com>
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
Co-authored-by: Gustavo Madeira Santana <gumadeiras@gmail.com>
											
										
										
											2026-02-07 01:16:58 +07:00
+								import { redactRunIdentifier, resolveRunWorkspaceDir } from "../workspace-run.js";
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								import { resolveGlobalLane, resolveSessionLane } from "./lanes.js";
 								import { log } from "./logger.js";
 								import { resolveModel } from "./model.js";
 								import { runEmbeddedAttempt } from "./run/attempt.js";
-												Agents: add embedded error observations (#41336)

Merged via squash.

Prepared head SHA: 490004229862129ceb21939e382658714e23bd68
Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com>
Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com>
Reviewed-by: @altaywtf
											
										
										
											2026-03-09 22:27:05 +03:00
+								import { createFailoverDecisionLogger } from "./run/failover-observation.js";
-												style: align formatting with oxfmt 0.33

											
										
										
											2026-02-18 01:34:35 +00:00
+								import type { RunEmbeddedPiAgentParams } from "./run/params.js";
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								import { buildEmbeddedRunPayloads } from "./run/payloads.js";
-												fix: recover from context overflow caused by oversized tool results (#11579)

* fix: gracefully handle oversized tool results causing context overflow

When a subagent reads a very large file or gets a huge tool result (e.g.,
gh pr diff on a massive PR), it can exceed the model's context window in
a single prompt. Auto-compaction can't help because there's no older
history to compact — just one giant tool result.

This adds two layers of defense:

1. Pre-emptive: Hard cap on tool result size (400K chars ≈ 100K tokens)
   applied in the session tool result guard before persistence. This
   prevents extremely large tool results from being stored in full,
   regardless of model context window size.

2. Recovery: When context overflow is detected and compaction fails,
   scan session messages for oversized tool results relative to the
   model's actual context window (30% max share). If found, truncate
   them in the session via branching (creating a new branch with
   truncated content) and retry the prompt.

The truncation preserves the beginning of the content (most useful for
understanding what was read) and appends a notice explaining the
truncation and suggesting offset/limit parameters for targeted reads.

Includes comprehensive tests for:
- Text truncation with newline-boundary awareness
- Context-window-proportional size calculation
- In-memory message truncation
- Oversized detection heuristics
- Guard-level size capping during persistence

* fix: prep fixes for tool result truncation PR (#11579) (thanks @tyler6204)
											
										
										
											2026-02-07 17:40:51 -08:00
+								import {
 								  truncateOversizedToolResultsInSession,
 								  sessionLikelyHasOversizedToolResults,
 								} from "./tool-result-truncation.js";
-												style: align formatting with oxfmt 0.33

											
										
										
											2026-02-18 01:34:35 +00:00
+								import type { EmbeddedPiAgentMeta, EmbeddedPiRunResult } from "./types.js";
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								import { describeUnknownError } from "./utils.js";
-												refactor: add aws-sdk auth mode and tighten provider auth

											
										
										
											2026-01-20 07:53:25 +00:00
+								type ApiKeyInfo = ResolvedProviderAuth;
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
-												fix: refresh Copilot token before expiry and retry on auth errors

GitHub Copilot API tokens expire after ~30 minutes. When OpenClaw spawns
a long-running subagent using Copilot as the provider, the token would
expire mid-session with no recovery mechanism, causing 401 auth errors.

This commit adds:
- Periodic token refresh scheduled 5 minutes before expiry
- Auth error detection with automatic token refresh and single retry
- Proper timer cleanup on session shutdown to prevent leaks

The implementation uses a per-attempt retry flag to ensure each auth
error can trigger one refresh+retry cycle without creating infinite
retry loops.

🤖 AI-assisted: This fix was developed with GitHub Copilot CLI assistance.
Testing: Fully tested with 3 new unit tests covering auth retry, retry
reset, and timer cleanup scenarios. All 11 auth rotation tests pass.

											
										
										
											2026-02-04 09:13:59 -03:00
+								type CopilotTokenState = {
 								  githubToken: string;
 								  expiresAt: number;
 								  refreshTimer?: ReturnType<typeof setTimeout>;
 								  refreshInFlight?: Promise<void>;
 								};
 								const COPILOT_REFRESH_MARGIN_MS = 5 * 60 * 1000;
 								const COPILOT_REFRESH_RETRY_MS = 60 * 1000;
 								const COPILOT_REFRESH_MIN_DELAY_MS = 5 * 1000;
-												fix(agents): handle overloaded failover separately (#38301)

* fix(agents): skip auth-profile failure on overload

* fix(agents): note overload auth-profile fallback fix

* fix(agents): classify overloaded failures separately

* fix(agents): back off before overload failover

* fix(agents): tighten overload probe and backoff state

* fix(agents): persist overloaded cooldown across runs

* fix(agents): tighten overloaded status handling

* test(agents): add overload regression coverage

* fix(agents): restore runner imports after rebase

* test(agents): add overload fallback integration coverage

* fix(agents): harden overloaded failover abort handling

* test(agents): tighten overload classifier coverage

* test(agents): cover all-overloaded fallback exhaustion

* fix(cron): retry overloaded fallback summaries

* fix(cron): treat HTTP 529 as overloaded retry
											
										
										
											2026-03-07 01:42:11 +03:00
+								// Keep overload pacing noticeable enough to avoid tight retry bursts, but short
 								// enough that fallback still feels responsive within a single turn.
 								const OVERLOAD_FAILOVER_BACKOFF_POLICY: BackoffPolicy = {
 								  initialMs: 250,
 								  maxMs: 1_500,
 								  factor: 2,
 								  jitter: 0.2,
 								};
-												fix: refresh Copilot token before expiry and retry on auth errors

GitHub Copilot API tokens expire after ~30 minutes. When OpenClaw spawns
a long-running subagent using Copilot as the provider, the token would
expire mid-session with no recovery mechanism, causing 401 auth errors.

This commit adds:
- Periodic token refresh scheduled 5 minutes before expiry
- Auth error detection with automatic token refresh and single retry
- Proper timer cleanup on session shutdown to prevent leaks

The implementation uses a per-attempt retry flag to ensure each auth
error can trigger one refresh+retry cycle without creating infinite
retry loops.

🤖 AI-assisted: This fix was developed with GitHub Copilot CLI assistance.
Testing: Fully tested with 3 new unit tests covering auth retry, retry
reset, and timer cleanup scenarios. All 11 auth rotation tests pass.

											
										
										
											2026-02-04 09:13:59 -03:00
-												fix: guard anthropic refusal trigger

											
										
										
											2026-01-21 07:28:11 +00:00
+								// Avoid Anthropic's refusal test token poisoning session transcripts.
 								const ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL = "ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL";
 								const ANTHROPIC_MAGIC_STRING_REPLACEMENT = "ANTHROPIC MAGIC STRING TRIGGER REFUSAL (redacted)";
 								function scrubAnthropicRefusalMagic(prompt: string): string {
-												chore: Enable "curly" rule to avoid single-statement if confusion/errors.

											
										
										
											2026-01-31 16:19:20 +09:00
+								  if (!prompt.includes(ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL)) {
 								    return prompt;
 								  }
-												fix: guard anthropic refusal trigger

											
										
										
											2026-01-21 07:28:11 +00:00
+								  return prompt.replaceAll(
 								    ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL,
 								    ANTHROPIC_MAGIC_STRING_REPLACEMENT,
 								  );
 								}
-												fix: context overflow compaction and subagent announce improvements (#11664) (thanks @tyler6204)

* initial commit

* feat: implement deriveSessionTotalTokens function and update usage tests

* Added deriveSessionTotalTokens function to calculate total tokens based on usage and context tokens.
* Updated usage tests to include cases for derived session total tokens.
* Refactored session usage calculations in multiple files to utilize the new function for improved accuracy.

* fix: restore overflow truncation fallback + changelog/test hardening (#11551) (thanks @tyler6204)
											
										
										
											2026-02-07 20:02:32 -08:00
+								type UsageAccumulator = {
 								  input: number;
 								  output: number;
 								  cacheRead: number;
 								  cacheWrite: number;
 								  total: number;
-												fix: use last API call's cache tokens for context-size display (#13698) (#13805)

The UsageAccumulator sums cacheRead/cacheWrite across all API calls
within a single turn. With Anthropic prompt caching, each call reports
cacheRead ≈ current_context_size, so after N tool-call round-trips the
accumulated total becomes N × actual_context, which gets clamped to
contextWindow (200k) by deriveSessionTotalTokens().

Fix: track the most recent API call's cache fields separately and use
them in toNormalizedUsage() for context-size reporting. This makes
/status Context display accurate while preserving accumulated output
token counts.

Fixes #13698
Fixes #13782

Co-authored-by: akari-musubi <259925157+akari-musubi@users.noreply.github.com>
											
										
										
											2026-02-12 23:01:36 +09:00
+								  /** Cache fields from the most recent API call (not accumulated). */
 								  lastCacheRead: number;
 								  lastCacheWrite: number;
 								  lastInput: number;
-												fix: context overflow compaction and subagent announce improvements (#11664) (thanks @tyler6204)

* initial commit

* feat: implement deriveSessionTotalTokens function and update usage tests

* Added deriveSessionTotalTokens function to calculate total tokens based on usage and context tokens.
* Updated usage tests to include cases for derived session total tokens.
* Refactored session usage calculations in multiple files to utilize the new function for improved accuracy.

* fix: restore overflow truncation fallback + changelog/test hardening (#11551) (thanks @tyler6204)
											
										
										
											2026-02-07 20:02:32 -08:00
+								};
 								const createUsageAccumulator = (): UsageAccumulator => ({
 								  input: 0,
 								  output: 0,
 								  cacheRead: 0,
 								  cacheWrite: 0,
 								  total: 0,
-												fix: use last API call's cache tokens for context-size display (#13698) (#13805)

The UsageAccumulator sums cacheRead/cacheWrite across all API calls
within a single turn. With Anthropic prompt caching, each call reports
cacheRead ≈ current_context_size, so after N tool-call round-trips the
accumulated total becomes N × actual_context, which gets clamped to
contextWindow (200k) by deriveSessionTotalTokens().

Fix: track the most recent API call's cache fields separately and use
them in toNormalizedUsage() for context-size reporting. This makes
/status Context display accurate while preserving accumulated output
token counts.

Fixes #13698
Fixes #13782

Co-authored-by: akari-musubi <259925157+akari-musubi@users.noreply.github.com>
											
										
										
											2026-02-12 23:01:36 +09:00
+								  lastCacheRead: 0,
 								  lastCacheWrite: 0,
 								  lastInput: 0,
-												fix: context overflow compaction and subagent announce improvements (#11664) (thanks @tyler6204)

* initial commit

* feat: implement deriveSessionTotalTokens function and update usage tests

* Added deriveSessionTotalTokens function to calculate total tokens based on usage and context tokens.
* Updated usage tests to include cases for derived session total tokens.
* Refactored session usage calculations in multiple files to utilize the new function for improved accuracy.

* fix: restore overflow truncation fallback + changelog/test hardening (#11551) (thanks @tyler6204)
											
										
										
											2026-02-07 20:02:32 -08:00
+								});
-												feat: add pre-prompt context size diagnostic logging (openclaw#8930) thanks @Glucksberg

Verified:
- pnpm build
- pnpm check
- pnpm test

Co-authored-by: Glucksberg <80581902+Glucksberg@users.noreply.github.com>
Co-authored-by: Tak Hoffman <781889+Takhoffman@users.noreply.github.com>

											
										
										
											2026-02-13 19:54:22 -04:00
+								function createCompactionDiagId(): string {
-												refactor(security): unify secure id paths and guard weak patterns

											
										
										
											2026-02-22 10:14:55 +01:00
+								  return `ovf-${Date.now().toString(36)}-${generateSecureToken(4)}`;
-												feat: add pre-prompt context size diagnostic logging (openclaw#8930) thanks @Glucksberg

Verified:
- pnpm build
- pnpm check
- pnpm test

Co-authored-by: Glucksberg <80581902+Glucksberg@users.noreply.github.com>
Co-authored-by: Tak Hoffman <781889+Takhoffman@users.noreply.github.com>

											
										
										
											2026-02-13 19:54:22 -04:00
+								}
-												fix(agents): cap embedded runner retry loop

											
										
										
											2026-02-21 15:35:45 +01:00
+								// Defensive guard for the outer run loop across all retry branches.
-												fix(agents): raise dynamic retry cap budget

											
										
										
											2026-02-21 15:41:03 +01:00
+								const BASE_RUN_RETRY_ITERATIONS = 24;
 								const RUN_RETRY_ITERATIONS_PER_PROFILE = 8;
 								const MIN_RUN_RETRY_ITERATIONS = 32;
 								const MAX_RUN_RETRY_ITERATIONS = 160;
 								function resolveMaxRunRetryIterations(profileCandidateCount: number): number {
 								  const scaled =
 								    BASE_RUN_RETRY_ITERATIONS +
 								    Math.max(1, profileCandidateCount) * RUN_RETRY_ITERATIONS_PER_PROFILE;
 								  return Math.min(MAX_RUN_RETRY_ITERATIONS, Math.max(MIN_RUN_RETRY_ITERATIONS, scaled));
 								}
-												fix(agents): cap embedded runner retry loop

											
										
										
											2026-02-21 15:35:45 +01:00
-												fix: context overflow compaction and subagent announce improvements (#11664) (thanks @tyler6204)

* initial commit

* feat: implement deriveSessionTotalTokens function and update usage tests

* Added deriveSessionTotalTokens function to calculate total tokens based on usage and context tokens.
* Updated usage tests to include cases for derived session total tokens.
* Refactored session usage calculations in multiple files to utilize the new function for improved accuracy.

* fix: restore overflow truncation fallback + changelog/test hardening (#11551) (thanks @tyler6204)
											
										
										
											2026-02-07 20:02:32 -08:00
+								const hasUsageValues = (
 								  usage: ReturnType<typeof normalizeUsage>,
 								): usage is NonNullable<ReturnType<typeof normalizeUsage>> =>
 								  !!usage &&
 								  [usage.input, usage.output, usage.cacheRead, usage.cacheWrite, usage.total].some(
 								    (value) => typeof value === "number" && Number.isFinite(value) && value > 0,
 								  );
 								const mergeUsageIntoAccumulator = (
 								  target: UsageAccumulator,
 								  usage: ReturnType<typeof normalizeUsage>,
 								) => {
 								  if (!hasUsageValues(usage)) {
 								    return;
 								  }
 								  target.input += usage.input ?? 0;
 								  target.output += usage.output ?? 0;
 								  target.cacheRead += usage.cacheRead ?? 0;
 								  target.cacheWrite += usage.cacheWrite ?? 0;
 								  target.total +=
 								    usage.total ??
 								    (usage.input ?? 0) + (usage.output ?? 0) + (usage.cacheRead ?? 0) + (usage.cacheWrite ?? 0);
-												fix: use last API call's cache tokens for context-size display (#13698) (#13805)

The UsageAccumulator sums cacheRead/cacheWrite across all API calls
within a single turn. With Anthropic prompt caching, each call reports
cacheRead ≈ current_context_size, so after N tool-call round-trips the
accumulated total becomes N × actual_context, which gets clamped to
contextWindow (200k) by deriveSessionTotalTokens().

Fix: track the most recent API call's cache fields separately and use
them in toNormalizedUsage() for context-size reporting. This makes
/status Context display accurate while preserving accumulated output
token counts.

Fixes #13698
Fixes #13782

Co-authored-by: akari-musubi <259925157+akari-musubi@users.noreply.github.com>
											
										
										
											2026-02-12 23:01:36 +09:00
+								  // Track the most recent API call's cache fields for accurate context-size reporting.
 								  // Accumulated cache totals inflate context size when there are multiple tool-call round-trips,
 								  // since each call reports cacheRead ≈ current_context_size.
 								  target.lastCacheRead = usage.cacheRead ?? 0;
 								  target.lastCacheWrite = usage.cacheWrite ?? 0;
 								  target.lastInput = usage.input ?? 0;
-												fix: context overflow compaction and subagent announce improvements (#11664) (thanks @tyler6204)

* initial commit

* feat: implement deriveSessionTotalTokens function and update usage tests

* Added deriveSessionTotalTokens function to calculate total tokens based on usage and context tokens.
* Updated usage tests to include cases for derived session total tokens.
* Refactored session usage calculations in multiple files to utilize the new function for improved accuracy.

* fix: restore overflow truncation fallback + changelog/test hardening (#11551) (thanks @tyler6204)
											
										
										
											2026-02-07 20:02:32 -08:00
+								};
 								const toNormalizedUsage = (usage: UsageAccumulator) => {
 								  const hasUsage =
 								    usage.input > 0 ||
 								    usage.output > 0 ||
 								    usage.cacheRead > 0 ||
 								    usage.cacheWrite > 0 ||
 								    usage.total > 0;
 								  if (!hasUsage) {
 								    return undefined;
 								  }
-												fix: use last API call's cache tokens for context-size display (#13698) (#13805)

The UsageAccumulator sums cacheRead/cacheWrite across all API calls
within a single turn. With Anthropic prompt caching, each call reports
cacheRead ≈ current_context_size, so after N tool-call round-trips the
accumulated total becomes N × actual_context, which gets clamped to
contextWindow (200k) by deriveSessionTotalTokens().

Fix: track the most recent API call's cache fields separately and use
them in toNormalizedUsage() for context-size reporting. This makes
/status Context display accurate while preserving accumulated output
token counts.

Fixes #13698
Fixes #13782

Co-authored-by: akari-musubi <259925157+akari-musubi@users.noreply.github.com>
											
										
										
											2026-02-12 23:01:36 +09:00
+								  // Use the LAST API call's cache fields for context-size calculation.
 								  // The accumulated cacheRead/cacheWrite inflate context size because each tool-call
 								  // round-trip reports cacheRead ≈ current_context_size, and summing N calls gives
 								  // N × context_size which gets clamped to contextWindow (e.g. 200k).
 								  // See: https://github.com/openclaw/openclaw/issues/13698
 								  //
 								  // We use lastInput/lastCacheRead/lastCacheWrite (from the most recent API call) for
 								  // cache-related fields, but keep accumulated output (total generated text this turn).
 								  const lastPromptTokens = usage.lastInput + usage.lastCacheRead + usage.lastCacheWrite;
-												fix: context overflow compaction and subagent announce improvements (#11664) (thanks @tyler6204)

* initial commit

* feat: implement deriveSessionTotalTokens function and update usage tests

* Added deriveSessionTotalTokens function to calculate total tokens based on usage and context tokens.
* Updated usage tests to include cases for derived session total tokens.
* Refactored session usage calculations in multiple files to utilize the new function for improved accuracy.

* fix: restore overflow truncation fallback + changelog/test hardening (#11551) (thanks @tyler6204)
											
										
										
											2026-02-07 20:02:32 -08:00
+								  return {
-												fix: use last API call's cache tokens for context-size display (#13698) (#13805)

The UsageAccumulator sums cacheRead/cacheWrite across all API calls
within a single turn. With Anthropic prompt caching, each call reports
cacheRead ≈ current_context_size, so after N tool-call round-trips the
accumulated total becomes N × actual_context, which gets clamped to
contextWindow (200k) by deriveSessionTotalTokens().

Fix: track the most recent API call's cache fields separately and use
them in toNormalizedUsage() for context-size reporting. This makes
/status Context display accurate while preserving accumulated output
token counts.

Fixes #13698
Fixes #13782

Co-authored-by: akari-musubi <259925157+akari-musubi@users.noreply.github.com>
											
										
										
											2026-02-12 23:01:36 +09:00
+								    input: usage.lastInput || undefined,
-												fix: context overflow compaction and subagent announce improvements (#11664) (thanks @tyler6204)

* initial commit

* feat: implement deriveSessionTotalTokens function and update usage tests

* Added deriveSessionTotalTokens function to calculate total tokens based on usage and context tokens.
* Updated usage tests to include cases for derived session total tokens.
* Refactored session usage calculations in multiple files to utilize the new function for improved accuracy.

* fix: restore overflow truncation fallback + changelog/test hardening (#11551) (thanks @tyler6204)
											
										
										
											2026-02-07 20:02:32 -08:00
+								    output: usage.output || undefined,
-												fix: use last API call's cache tokens for context-size display (#13698) (#13805)

The UsageAccumulator sums cacheRead/cacheWrite across all API calls
within a single turn. With Anthropic prompt caching, each call reports
cacheRead ≈ current_context_size, so after N tool-call round-trips the
accumulated total becomes N × actual_context, which gets clamped to
contextWindow (200k) by deriveSessionTotalTokens().

Fix: track the most recent API call's cache fields separately and use
them in toNormalizedUsage() for context-size reporting. This makes
/status Context display accurate while preserving accumulated output
token counts.

Fixes #13698
Fixes #13782

Co-authored-by: akari-musubi <259925157+akari-musubi@users.noreply.github.com>
											
										
										
											2026-02-12 23:01:36 +09:00
+								    cacheRead: usage.lastCacheRead || undefined,
 								    cacheWrite: usage.lastCacheWrite || undefined,
 								    total: lastPromptTokens + usage.output || undefined,
-												fix: context overflow compaction and subagent announce improvements (#11664) (thanks @tyler6204)

* initial commit

* feat: implement deriveSessionTotalTokens function and update usage tests

* Added deriveSessionTotalTokens function to calculate total tokens based on usage and context tokens.
* Updated usage tests to include cases for derived session total tokens.
* Refactored session usage calculations in multiple files to utilize the new function for improved accuracy.

* fix: restore overflow truncation fallback + changelog/test hardening (#11551) (thanks @tyler6204)
											
										
										
											2026-02-07 20:02:32 -08:00
+								  };
 								};
-												fix: include provider and model name in billing error message (#20510)

Merged via /review-pr -> /prepare-pr -> /merge-pr.

Prepared head SHA: 40dbdf62e8952dd6c5afcb9ce2a73199f3f532a6
Co-authored-by: echoVic <16428813+echoVic@users.noreply.github.com>
Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com>
Reviewed-by: @gumadeiras

											
										
										
											2026-02-19 10:56:00 +08:00
+								function resolveActiveErrorContext(params: {
 								  lastAssistant: { provider?: string; model?: string } | undefined;
 								  provider: string;
 								  model: string;
 								}): { provider: string; model: string } {
 								  return {
 								    provider: params.lastAssistant?.provider ?? params.provider,
 								    model: params.lastAssistant?.model ?? params.model,
 								  };
 								}
-												agents: preserve totalTokens on request failure instead of using contextWindow (#34275)

Merged via squash.

Prepared head SHA: f9d111d0a79a07815d476356e98a28df3a0000ba
Co-authored-by: RealKai42 <44634134+RealKai42@users.noreply.github.com>
Co-authored-by: jalehman <550978+jalehman@users.noreply.github.com>
Reviewed-by: @jalehman

											
										
										
											2026-03-05 04:02:22 +08:00
+								/**
 								 * Build agentMeta for error return paths, preserving accumulated usage so that
 								 * session totalTokens reflects the actual context size rather than going stale.
 								 * Without this, error returns omit usage and the session keeps whatever
 								 * totalTokens was set by the previous successful run.
 								 */
 								function buildErrorAgentMeta(params: {
 								  sessionId: string;
 								  provider: string;
 								  model: string;
 								  usageAccumulator: UsageAccumulator;
 								  lastRunPromptUsage: ReturnType<typeof normalizeUsage> | undefined;
 								  lastAssistant?: { usage?: unknown } | null;
 								  /** API-reported total from the most recent call, mirroring the success path correction. */
 								  lastTurnTotal?: number;
 								}): EmbeddedPiAgentMeta {
 								  const usage = toNormalizedUsage(params.usageAccumulator);
 								  // Apply the same lastTurnTotal correction the success path uses so
 								  // usage.total reflects the API-reported context size, not accumulated totals.
 								  if (usage && params.lastTurnTotal && params.lastTurnTotal > 0) {
 								    usage.total = params.lastTurnTotal;
 								  }
 								  const lastCallUsage = params.lastAssistant
 								    ? normalizeUsage(params.lastAssistant.usage as UsageLike)
 								    : undefined;
 								  const promptTokens = derivePromptTokens(params.lastRunPromptUsage);
 								  return {
 								    sessionId: params.sessionId,
 								    provider: params.provider,
 								    model: params.model,
 								    // Only include usage fields when we have actual data from prior API calls.
 								    ...(usage ? { usage } : {}),
 								    ...(lastCallUsage ? { lastCallUsage } : {}),
 								    ...(promptTokens ? { promptTokens } : {}),
 								  };
 								}
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								export async function runEmbeddedPiAgent(
 								  params: RunEmbeddedPiAgentParams,
 								): Promise<EmbeddedPiRunResult> {
-												chore: migrate to oxlint and oxfmt

Co-authored-by: Christoph Nakazawa <christoph.pojer@gmail.com>

											
										
										
											2026-01-14 14:31:43 +00:00
+								  const sessionLane = resolveSessionLane(params.sessionKey?.trim() || params.sessionId);
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								  const globalLane = resolveGlobalLane(params.lane);
 								  const enqueueGlobal =
-												chore: migrate to oxlint and oxfmt

Co-authored-by: Christoph Nakazawa <christoph.pojer@gmail.com>

											
										
										
											2026-01-14 14:31:43 +00:00
+								    params.enqueue ?? ((task, opts) => enqueueCommandInLane(globalLane, task, opts));
-												fix: stabilize embedded runner queueing

											
										
										
											2026-01-24 02:05:31 +00:00
+								  const enqueueSession =
 								    params.enqueue ?? ((task, opts) => enqueueCommandInLane(sessionLane, task, opts));
-												fix: format verbose tool output by channel

											
										
										
											2026-01-17 10:17:57 +00:00
+								  const channelHint = params.messageChannel ?? params.messageProvider;
 								  const resolvedToolResultFormat =
 								    params.toolResultFormat ??
 								    (channelHint
 								      ? isMarkdownCapableMessageChannel(channelHint)
 								        ? "markdown"
 								        : "plain"
 								      : "markdown");
-												fix: silence probe timeouts

											
										
										
											2026-01-24 00:04:53 +00:00
+								  const isProbeSession = params.sessionId?.startsWith("probe-") ?? false;
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
-												fix: stabilize embedded runner queueing

											
										
										
											2026-01-24 02:05:31 +00:00
+								  return enqueueSession(() =>
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								    enqueueGlobal(async () => {
 								      const started = Date.now();
-												fix: guard resolveUserPath against undefined input (#10176)

* fix: guard resolveUserPath against undefined input

When subagent spawner omits workspaceDir, resolveUserPath receives
undefined and crashes on .trim().  Add a falsy guard that falls back
to process.cwd(), matching the behavior callers already expect.

Closes #10089

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix: harden runner workspace fallback (#10176) (thanks @Yida-Dev)

* fix: harden workspace fallback scoping (#10176) (thanks @Yida-Dev)

* refactor: centralize workspace fallback classification and redaction (#10176) (thanks @Yida-Dev)

* test: remove explicit any from utils mock (#10176) (thanks @Yida-Dev)

* security: reject malformed agent session keys for workspace resolution (#10176) (thanks @Yida-Dev)

---------

Co-authored-by: Yida-Dev <reyifeijun@gmail.com>
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
Co-authored-by: Gustavo Madeira Santana <gumadeiras@gmail.com>
											
										
										
											2026-02-07 01:16:58 +07:00
+								      const workspaceResolution = resolveRunWorkspaceDir({
 								        workspaceDir: params.workspaceDir,
 								        sessionKey: params.sessionKey,
 								        agentId: params.agentId,
 								        config: params.config,
 								      });
 								      const resolvedWorkspace = workspaceResolution.workspaceDir;
 								      const redactedSessionId = redactRunIdentifier(params.sessionId);
 								      const redactedSessionKey = redactRunIdentifier(params.sessionKey);
 								      const redactedWorkspace = redactRunIdentifier(resolvedWorkspace);
 								      if (workspaceResolution.usedFallback) {
 								        log.warn(
 								          `[workspace-fallback] caller=runEmbeddedPiAgent reason=${workspaceResolution.fallbackReason} run=${params.runId} session=${redactedSessionId} sessionKey=${redactedSessionKey} agent=${workspaceResolution.agentId} workspace=${redactedWorkspace}`,
 								        );
 								      }
-												fix(agents): bootstrap runtime plugins before context-engine resolution

											
										
										
											2026-03-08 23:38:14 +00:00
+								      ensureRuntimePluginsLoaded({
 								        config: params.config,
 								        workspaceDir: resolvedWorkspace,
 								      });
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								      const prevCwd = process.cwd();
-												feat(plugins): add modelOverride/providerOverride to before_agent_start hook

Enable plugins to override the model and provider for agent runs by
returning modelOverride/providerOverride from the before_agent_start
hook. The hook is now invoked early in run.ts (before resolveModel)
so overrides take effect. The result is passed to attempt.ts via
earlyHookResult to prevent double-firing.

This enables security-critical use cases like routing PII-containing
prompts to local models instead of cloud providers.

											
										
										
											2026-02-15 12:05:29 -05:00
+								      let provider = (params.provider ?? DEFAULT_PROVIDER).trim() || DEFAULT_PROVIDER;
 								      let modelId = (params.model ?? DEFAULT_MODEL).trim() || DEFAULT_MODEL;
-												refactor: rename to openclaw

											
										
										
											2026-01-30 03:15:10 +01:00
+								      const agentDir = params.agentDir ?? resolveOpenClawAgentDir();
-												refactor(agents): centralize model fallback resolution

											
										
										
											2026-02-25 04:32:25 +00:00
+								      const fallbackConfigured = hasConfiguredModelFallbacks({
 								        cfg: params.config,
 								        agentId: params.agentId,
 								        sessionKey: params.sessionKey,
 								      });
-												refactor: rename to openclaw

											
										
										
											2026-01-30 03:15:10 +01:00
+								      await ensureOpenClawModelsJson(params.config, agentDir);
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
-												refactor(plugins): split before-agent hooks by model and prompt phases

											
										
										
											2026-02-17 03:28:10 +01:00
+								      // Run before_model_resolve hooks early so plugins can override the
 								      // provider/model before resolveModel().
 								      //
 								      // Legacy compatibility: before_agent_start is also checked for override
 								      // fields if present. New hook takes precedence when both are set.
 								      let modelResolveOverride: { providerOverride?: string; modelOverride?: string } | undefined;
-												Plugins/Hooks: avoid duplicate before_agent_start executions

											
										
										
											2026-02-21 22:31:51 -08:00
+								      let legacyBeforeAgentStartResult: PluginHookBeforeAgentStartResult | undefined;
-												feat(plugins): add modelOverride/providerOverride to before_agent_start hook

Enable plugins to override the model and provider for agent runs by
returning modelOverride/providerOverride from the before_agent_start
hook. The hook is now invoked early in run.ts (before resolveModel)
so overrides take effect. The result is passed to attempt.ts via
earlyHookResult to prevent double-firing.

This enables security-critical use cases like routing PII-containing
prompts to local models instead of cloud providers.

											
										
										
											2026-02-15 12:05:29 -05:00
+								      const hookRunner = getGlobalHookRunner();
-												refactor(plugins): split before-agent hooks by model and prompt phases

											
										
										
											2026-02-17 03:28:10 +01:00
+								      const hookCtx = {
 								        agentId: workspaceResolution.agentId,
 								        sessionKey: params.sessionKey,
 								        sessionId: params.sessionId,
 								        workspaceDir: resolvedWorkspace,
 								        messageProvider: params.messageProvider ?? undefined,
-												feat(hooks): add trigger and channelId to plugin hook agent context (#28623)

* feat(hooks): add trigger and channelId to plugin hook agent context

Adds `trigger` and `channelId` fields to `PluginHookAgentContext` so
plugins can determine what initiated the agent run and which channel
it originated from, without session-key parsing or Redis bridging.

trigger values: "user", "heartbeat", "cron", "memory"
channelId values: "telegram", "discord", "whatsapp", etc.

Both fields are threaded through run.ts and attempt.ts hookCtx so all
hook phases receive them (before_model_resolve, before_prompt_build,
before_agent_start, llm_input, llm_output, agent_end).

channelId falls back from messageChannel to messageProvider when the
former is not set. followup-runner passes originatingChannel so queued
followup runs also carry channel context.

* docs(changelog): note hook context parity fix for #28623

---------

Co-authored-by: Vincent Koc <vincentkoc@ieee.org>
											
										
										
											2026-03-03 11:39:20 +10:00
+								        trigger: params.trigger,
 								        channelId: params.messageChannel ?? params.messageProvider ?? undefined,
-												refactor(plugins): split before-agent hooks by model and prompt phases

											
										
										
											2026-02-17 03:28:10 +01:00
+								      };
 								      if (hookRunner?.hasHooks("before_model_resolve")) {
 								        try {
 								          modelResolveOverride = await hookRunner.runBeforeModelResolve(
 								            { prompt: params.prompt },
 								            hookCtx,
 								          );
 								        } catch (hookErr) {
 								          log.warn(`before_model_resolve hook failed: ${String(hookErr)}`);
 								        }
 								      }
-												feat(plugins): add modelOverride/providerOverride to before_agent_start hook

Enable plugins to override the model and provider for agent runs by
returning modelOverride/providerOverride from the before_agent_start
hook. The hook is now invoked early in run.ts (before resolveModel)
so overrides take effect. The result is passed to attempt.ts via
earlyHookResult to prevent double-firing.

This enables security-critical use cases like routing PII-containing
prompts to local models instead of cloud providers.

											
										
										
											2026-02-15 12:05:29 -05:00
+								      if (hookRunner?.hasHooks("before_agent_start")) {
 								        try {
-												Plugins/Hooks: avoid duplicate before_agent_start executions

											
										
										
											2026-02-21 22:31:51 -08:00
+								          legacyBeforeAgentStartResult = await hookRunner.runBeforeAgentStart(
-												feat(plugins): add modelOverride/providerOverride to before_agent_start hook

Enable plugins to override the model and provider for agent runs by
returning modelOverride/providerOverride from the before_agent_start
hook. The hook is now invoked early in run.ts (before resolveModel)
so overrides take effect. The result is passed to attempt.ts via
earlyHookResult to prevent double-firing.

This enables security-critical use cases like routing PII-containing
prompts to local models instead of cloud providers.

											
										
										
											2026-02-15 12:05:29 -05:00
+								            { prompt: params.prompt },
-												refactor(plugins): split before-agent hooks by model and prompt phases

											
										
										
											2026-02-17 03:28:10 +01:00
+								            hookCtx,
-												feat(plugins): add modelOverride/providerOverride to before_agent_start hook

Enable plugins to override the model and provider for agent runs by
returning modelOverride/providerOverride from the before_agent_start
hook. The hook is now invoked early in run.ts (before resolveModel)
so overrides take effect. The result is passed to attempt.ts via
earlyHookResult to prevent double-firing.

This enables security-critical use cases like routing PII-containing
prompts to local models instead of cloud providers.

											
										
										
											2026-02-15 12:05:29 -05:00
+								          );
-												refactor(plugins): split before-agent hooks by model and prompt phases

											
										
										
											2026-02-17 03:28:10 +01:00
+								          modelResolveOverride = {
 								            providerOverride:
-												Plugins/Hooks: avoid duplicate before_agent_start executions

											
										
										
											2026-02-21 22:31:51 -08:00
+								              modelResolveOverride?.providerOverride ??
 								              legacyBeforeAgentStartResult?.providerOverride,
 								            modelOverride:
 								              modelResolveOverride?.modelOverride ?? legacyBeforeAgentStartResult?.modelOverride,
-												refactor(plugins): split before-agent hooks by model and prompt phases

											
										
										
											2026-02-17 03:28:10 +01:00
+								          };
-												feat(plugins): add modelOverride/providerOverride to before_agent_start hook

Enable plugins to override the model and provider for agent runs by
returning modelOverride/providerOverride from the before_agent_start
hook. The hook is now invoked early in run.ts (before resolveModel)
so overrides take effect. The result is passed to attempt.ts via
earlyHookResult to prevent double-firing.

This enables security-critical use cases like routing PII-containing
prompts to local models instead of cloud providers.

											
										
										
											2026-02-15 12:05:29 -05:00
+								        } catch (hookErr) {
-												refactor(plugins): split before-agent hooks by model and prompt phases

											
										
										
											2026-02-17 03:28:10 +01:00
+								          log.warn(
 								            `before_agent_start hook (legacy model resolve path) failed: ${String(hookErr)}`,
 								          );
-												feat(plugins): add modelOverride/providerOverride to before_agent_start hook

Enable plugins to override the model and provider for agent runs by
returning modelOverride/providerOverride from the before_agent_start
hook. The hook is now invoked early in run.ts (before resolveModel)
so overrides take effect. The result is passed to attempt.ts via
earlyHookResult to prevent double-firing.

This enables security-critical use cases like routing PII-containing
prompts to local models instead of cloud providers.

											
										
										
											2026-02-15 12:05:29 -05:00
+								        }
 								      }
-												refactor(plugins): split before-agent hooks by model and prompt phases

											
										
										
											2026-02-17 03:28:10 +01:00
+								      if (modelResolveOverride?.providerOverride) {
 								        provider = modelResolveOverride.providerOverride;
 								        log.info(`[hooks] provider overridden to ${provider}`);
 								      }
 								      if (modelResolveOverride?.modelOverride) {
 								        modelId = modelResolveOverride.modelOverride;
 								        log.info(`[hooks] model overridden to ${modelId}`);
 								      }
-												feat(plugins): add modelOverride/providerOverride to before_agent_start hook

Enable plugins to override the model and provider for agent runs by
returning modelOverride/providerOverride from the before_agent_start
hook. The hook is now invoked early in run.ts (before resolveModel)
so overrides take effect. The result is passed to attempt.ts via
earlyHookResult to prevent double-firing.

This enables security-critical use cases like routing PII-containing
prompts to local models instead of cloud providers.

											
										
										
											2026-02-15 12:05:29 -05:00
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								      const { model, error, authStorage, modelRegistry } = resolveModel(
 								        provider,
 								        modelId,
 								        agentDir,
 								        params.config,
 								      );
 								      if (!model) {
-												fix(auth): bidirectional mode/type compat + sync OAuth to all agents (#12692)

Merged via /review-pr -> /prepare-pr -> /merge-pr.

Prepared head SHA: 2dee8e1174e637e50d10bf7020f1de2990b804dc
Co-authored-by: mudrii <220262+mudrii@users.noreply.github.com>
Co-authored-by: obviyus <22031114+obviyus@users.noreply.github.com>
Reviewed-by: @obviyus

											
										
										
											2026-02-20 18:31:09 +08:00
+								        throw new FailoverError(error ?? `Unknown model: ${provider}/${modelId}`, {
 								          reason: "model_not_found",
 								          provider,
 								          model: modelId,
 								        });
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								      }
 								      const ctxInfo = resolveContextWindowInfo({
 								        cfg: params.config,
 								        provider,
 								        modelId,
 								        modelContextWindow: model.contextWindow,
 								        defaultTokens: DEFAULT_CONTEXT_TOKENS,
 								      });
-												fix(agents): apply contextTokens cap for compaction threshold (#39099)

Land #39099 by @MumuTW.

Co-authored-by: MumuTW <clothl47364@gmail.com>

											
										
										
											2026-03-07 19:18:48 +00:00
+								      // Apply contextTokens cap to model so pi-coding-agent's auto-compaction
 								      // threshold uses the effective limit, not the native context window.
 								      const effectiveModel =
 								        ctxInfo.tokens < (model.contextWindow ?? Infinity)
 								          ? { ...model, contextWindow: ctxInfo.tokens }
 								          : model;
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								      const ctxGuard = evaluateContextWindowGuard({
 								        info: ctxInfo,
 								        warnBelowTokens: CONTEXT_WINDOW_WARN_BELOW_TOKENS,
 								        hardMinTokens: CONTEXT_WINDOW_HARD_MIN_TOKENS,
 								      });
 								      if (ctxGuard.shouldWarn) {
 								        log.warn(
 								          `low context window: ${provider}/${modelId} ctx=${ctxGuard.tokens} (warn<${CONTEXT_WINDOW_WARN_BELOW_TOKENS}) source=${ctxGuard.source}`,
 								        );
 								      }
 								      if (ctxGuard.shouldBlock) {
 								        log.error(
 								          `blocked model (context window too small): ${provider}/${modelId} ctx=${ctxGuard.tokens} (min=${CONTEXT_WINDOW_HARD_MIN_TOKENS}) source=${ctxGuard.source}`,
 								        );
 								        throw new FailoverError(
 								          `Model context window too small (${ctxGuard.tokens} tokens). Minimum is ${CONTEXT_WINDOW_HARD_MIN_TOKENS}.`,
 								          { reason: "unknown", provider, model: modelId },
 								        );
 								      }
-												fix: avoid keychain prompts in embedded runner

											
										
										
											2026-01-18 04:18:58 +00:00
+								      const authStore = ensureAuthProfileStore(agentDir, { allowKeychainPrompt: false });
-												fix(auth): preserve auto-pin preference

Co-authored-by: Mykyta Bozhenko <21245729+cheeeee@users.noreply.github.com>

											
										
										
											2026-01-18 08:22:50 +00:00
+								      const preferredProfileId = params.authProfileId?.trim();
-												fix: normalize model override auth handling

											
										
										
											2026-01-21 06:00:16 +00:00
+								      let lockedProfileId = params.authProfileIdSource === "user" ? preferredProfileId : undefined;
 								      if (lockedProfileId) {
 								        const lockedProfile = authStore.profiles[lockedProfileId];
 								        if (
 								          !lockedProfile ||
 								          normalizeProviderId(lockedProfile.provider) !== normalizeProviderId(provider)
 								        ) {
 								          lockedProfileId = undefined;
 								        }
 								      }
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								      const profileOrder = resolveAuthProfileOrder({
 								        cfg: params.config,
 								        store: authStore,
 								        provider,
-												fix(auth): preserve auto-pin preference

Co-authored-by: Mykyta Bozhenko <21245729+cheeeee@users.noreply.github.com>

											
										
										
											2026-01-18 08:22:50 +00:00
+								        preferredProfile: preferredProfileId,
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								      });
-												fix(auth): preserve auto-pin preference

Co-authored-by: Mykyta Bozhenko <21245729+cheeeee@users.noreply.github.com>

											
										
										
											2026-01-18 08:22:50 +00:00
+								      if (lockedProfileId && !profileOrder.includes(lockedProfileId)) {
 								        throw new Error(`Auth profile "${lockedProfileId}" is not configured for ${provider}.`);
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								      }
-												fix: honor user-pinned profiles and search ranking

											
										
										
											2026-01-23 03:05:01 +00:00
+								      const profileCandidates = lockedProfileId
 								        ? [lockedProfileId]
 								        : profileOrder.length > 0
 								          ? profileOrder
 								          : [undefined];
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								      let profileIndex = 0;
 								      const initialThinkLevel = params.thinkLevel ?? "off";
 								      let thinkLevel = initialThinkLevel;
 								      const attemptedThinking = new Set<ThinkLevel>();
 								      let apiKeyInfo: ApiKeyInfo | null = null;
 								      let lastProfileId: string | undefined;
-												fix: refresh Copilot token before expiry and retry on auth errors

GitHub Copilot API tokens expire after ~30 minutes. When OpenClaw spawns
a long-running subagent using Copilot as the provider, the token would
expire mid-session with no recovery mechanism, causing 401 auth errors.

This commit adds:
- Periodic token refresh scheduled 5 minutes before expiry
- Auth error detection with automatic token refresh and single retry
- Proper timer cleanup on session shutdown to prevent leaks

The implementation uses a per-attempt retry flag to ensure each auth
error can trigger one refresh+retry cycle without creating infinite
retry loops.

🤖 AI-assisted: This fix was developed with GitHub Copilot CLI assistance.
Testing: Fully tested with 3 new unit tests covering auth retry, retry
reset, and timer cleanup scenarios. All 11 auth rotation tests pass.

											
										
										
											2026-02-04 09:13:59 -03:00
+								      const copilotTokenState: CopilotTokenState | null =
 								        model.provider === "github-copilot" ? { githubToken: "", expiresAt: 0 } : null;
 								      let copilotRefreshCancelled = false;
 								      const hasCopilotGithubToken = () => Boolean(copilotTokenState?.githubToken.trim());
 								      const clearCopilotRefreshTimer = () => {
 								        if (!copilotTokenState?.refreshTimer) {
 								          return;
 								        }
 								        clearTimeout(copilotTokenState.refreshTimer);
 								        copilotTokenState.refreshTimer = undefined;
 								      };
 								      const stopCopilotRefreshTimer = () => {
 								        if (!copilotTokenState) {
 								          return;
 								        }
 								        copilotRefreshCancelled = true;
 								        clearCopilotRefreshTimer();
 								      };
 								      const refreshCopilotToken = async (reason: string): Promise<void> => {
 								        if (!copilotTokenState) {
 								          return;
 								        }
 								        if (copilotTokenState.refreshInFlight) {
 								          await copilotTokenState.refreshInFlight;
 								          return;
 								        }
 								        const { resolveCopilotApiToken } = await import("../../providers/github-copilot-token.js");
 								        copilotTokenState.refreshInFlight = (async () => {
 								          const githubToken = copilotTokenState.githubToken.trim();
 								          if (!githubToken) {
 								            throw new Error("Copilot refresh requires a GitHub token.");
 								          }
 								          log.debug(`Refreshing GitHub Copilot token (${reason})...`);
 								          const copilotToken = await resolveCopilotApiToken({
 								            githubToken,
 								          });
 								          authStorage.setRuntimeApiKey(model.provider, copilotToken.token);
 								          copilotTokenState.expiresAt = copilotToken.expiresAt;
 								          const remaining = copilotToken.expiresAt - Date.now();
 								          log.debug(
 								            `Copilot token refreshed; expires in ${Math.max(0, Math.floor(remaining / 1000))}s.`,
 								          );
 								        })()
 								          .catch((err) => {
 								            log.warn(`Copilot token refresh failed: ${describeUnknownError(err)}`);
 								            throw err;
 								          })
 								          .finally(() => {
 								            copilotTokenState.refreshInFlight = undefined;
 								          });
 								        await copilotTokenState.refreshInFlight;
 								      };
 								      const scheduleCopilotRefresh = (): void => {
 								        if (!copilotTokenState || copilotRefreshCancelled) {
 								          return;
 								        }
 								        if (!hasCopilotGithubToken()) {
 								          log.warn("Skipping Copilot refresh scheduling; GitHub token missing.");
 								          return;
 								        }
 								        clearCopilotRefreshTimer();
 								        const now = Date.now();
 								        const refreshAt = copilotTokenState.expiresAt - COPILOT_REFRESH_MARGIN_MS;
 								        const delayMs = Math.max(COPILOT_REFRESH_MIN_DELAY_MS, refreshAt - now);
 								        const timer = setTimeout(() => {
 								          if (copilotRefreshCancelled) {
 								            return;
 								          }
 								          refreshCopilotToken("scheduled")
 								            .then(() => scheduleCopilotRefresh())
 								            .catch(() => {
 								              if (copilotRefreshCancelled) {
 								                return;
 								              }
 								              const retryTimer = setTimeout(() => {
 								                if (copilotRefreshCancelled) {
 								                  return;
 								                }
 								                refreshCopilotToken("scheduled-retry")
 								                  .then(() => scheduleCopilotRefresh())
 								                  .catch(() => undefined);
 								              }, COPILOT_REFRESH_RETRY_MS);
 								              copilotTokenState.refreshTimer = retryTimer;
 								              if (copilotRefreshCancelled) {
 								                clearTimeout(retryTimer);
 								                copilotTokenState.refreshTimer = undefined;
 								              }
 								            });
 								        }, delayMs);
 								        copilotTokenState.refreshTimer = timer;
 								        if (copilotRefreshCancelled) {
 								          clearTimeout(timer);
 								          copilotTokenState.refreshTimer = undefined;
 								        }
 								      };
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
-												fix: trigger fallback on auth profile exhaustion

											
										
										
											2026-01-24 06:14:17 +00:00
+								      const resolveAuthProfileFailoverReason = (params: {
 								        allInCooldown: boolean;
 								        message: string;
-												Agents: infer auth-profile unavailable failover reason

											
										
										
											2026-02-22 16:10:24 -08:00
+								        profileIds?: Array<string | undefined>;
-												fix: trigger fallback on auth profile exhaustion

											
										
										
											2026-01-24 06:14:17 +00:00
+								      }): FailoverReason => {
-												chore: Enable "curly" rule to avoid single-statement if confusion/errors.

											
										
										
											2026-01-31 16:19:20 +09:00
+								        if (params.allInCooldown) {
-												Agents: infer auth-profile unavailable failover reason

											
										
										
											2026-02-22 16:10:24 -08:00
+								          const profileIds = (params.profileIds ?? profileCandidates).filter(
 								            (id): id is string => typeof id === "string" && id.length > 0,
 								          );
 								          return (
 								            resolveProfilesUnavailableReason({
 								              store: authStore,
 								              profileIds,
 								            }) ?? "rate_limit"
 								          );
-												chore: Enable "curly" rule to avoid single-statement if confusion/errors.

											
										
										
											2026-01-31 16:19:20 +09:00
+								        }
-												fix: trigger fallback on auth profile exhaustion

											
										
										
											2026-01-24 06:14:17 +00:00
+								        const classified = classifyFailoverReason(params.message);
 								        return classified ?? "auth";
 								      };
 								      const throwAuthProfileFailover = (params: {
 								        allInCooldown: boolean;
 								        message?: string;
 								        error?: unknown;
 								      }): never => {
 								        const fallbackMessage = `No available auth profile for ${provider} (all in cooldown or unavailable).`;
 								        const message =
 								          params.message?.trim() ||
 								          (params.error ? describeUnknownError(params.error).trim() : "") ||
 								          fallbackMessage;
 								        const reason = resolveAuthProfileFailoverReason({
 								          allInCooldown: params.allInCooldown,
 								          message,
-												Agents: infer auth-profile unavailable failover reason

											
										
										
											2026-02-22 16:10:24 -08:00
+								          profileIds: profileCandidates,
-												fix: trigger fallback on auth profile exhaustion

											
										
										
											2026-01-24 06:14:17 +00:00
+								        });
 								        if (fallbackConfigured) {
 								          throw new FailoverError(message, {
 								            reason,
 								            provider,
 								            model: modelId,
 								            status: resolveFailoverStatus(reason),
 								            cause: params.error,
 								          });
 								        }
-												chore: Enable "curly" rule to avoid single-statement if confusion/errors.

											
										
										
											2026-01-31 16:19:20 +09:00
+								        if (params.error instanceof Error) {
 								          throw params.error;
 								        }
-												fix: trigger fallback on auth profile exhaustion

											
										
										
											2026-01-24 06:14:17 +00:00
+								        throw new Error(message);
 								      };
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								      const resolveApiKeyForCandidate = async (candidate?: string) => {
 								        return getApiKeyForModel({
 								          model,
 								          cfg: params.config,
 								          profileId: candidate,
 								          store: authStore,
-												feat: improve agent auth guidance

											
										
										
											2026-01-15 04:41:50 +00:00
+								          agentDir,
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								        });
 								      };
 								      const applyApiKeyInfo = async (candidate?: string): Promise<void> => {
 								        apiKeyInfo = await resolveApiKeyForCandidate(candidate);
-												fix: honor user-pinned profiles and search ranking

											
										
										
											2026-01-23 03:05:01 +00:00
+								        const resolvedProfileId = apiKeyInfo.profileId ?? candidate;
-												refactor: add aws-sdk auth mode and tighten provider auth

											
										
										
											2026-01-20 07:53:25 +00:00
+								        if (!apiKeyInfo.apiKey) {
 								          if (apiKeyInfo.mode !== "aws-sdk") {
 								            throw new Error(
 								              `No API key resolved for provider "${model.provider}" (auth mode: ${apiKeyInfo.mode}).`,
 								            );
 								          }
-												fix: honor user-pinned profiles and search ranking

											
										
										
											2026-01-23 03:05:01 +00:00
+								          lastProfileId = resolvedProfileId;
-												refactor: add aws-sdk auth mode and tighten provider auth

											
										
										
											2026-01-20 07:53:25 +00:00
+								          return;
 								        }
-												Revert "fix: improve GitHub Copilot integration"

This reverts commit 21a9b3b66f9b01851c36db0b683ad942cd23d668.

											
										
										
											2026-01-23 07:12:01 +00:00
+								        if (model.provider === "github-copilot") {
 								          const { resolveCopilotApiToken } =
 								            await import("../../providers/github-copilot-token.js");
 								          const copilotToken = await resolveCopilotApiToken({
 								            githubToken: apiKeyInfo.apiKey,
 								          });
 								          authStorage.setRuntimeApiKey(model.provider, copilotToken.token);
-												fix: refresh Copilot token before expiry and retry on auth errors

GitHub Copilot API tokens expire after ~30 minutes. When OpenClaw spawns
a long-running subagent using Copilot as the provider, the token would
expire mid-session with no recovery mechanism, causing 401 auth errors.

This commit adds:
- Periodic token refresh scheduled 5 minutes before expiry
- Auth error detection with automatic token refresh and single retry
- Proper timer cleanup on session shutdown to prevent leaks

The implementation uses a per-attempt retry flag to ensure each auth
error can trigger one refresh+retry cycle without creating infinite
retry loops.

🤖 AI-assisted: This fix was developed with GitHub Copilot CLI assistance.
Testing: Fully tested with 3 new unit tests covering auth retry, retry
reset, and timer cleanup scenarios. All 11 auth rotation tests pass.

											
										
										
											2026-02-04 09:13:59 -03:00
+								          if (copilotTokenState) {
 								            copilotTokenState.githubToken = apiKeyInfo.apiKey;
 								            copilotTokenState.expiresAt = copilotToken.expiresAt;
 								            scheduleCopilotRefresh();
 								          }
-												Revert "fix: improve GitHub Copilot integration"

This reverts commit 21a9b3b66f9b01851c36db0b683ad942cd23d668.

											
										
										
											2026-01-23 07:12:01 +00:00
+								        } else {
 								          authStorage.setRuntimeApiKey(model.provider, apiKeyInfo.apiKey);
 								        }
 								        lastProfileId = apiKeyInfo.profileId;
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								      };
 								      const advanceAuthProfile = async (): Promise<boolean> => {
-												chore: Enable "curly" rule to avoid single-statement if confusion/errors.

											
										
										
											2026-01-31 16:19:20 +09:00
+								        if (lockedProfileId) {
 								          return false;
 								        }
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								        let nextIndex = profileIndex + 1;
 								        while (nextIndex < profileCandidates.length) {
 								          const candidate = profileCandidates[nextIndex];
-												fix(auth): skip auth profiles in cooldown during selection and rotation

Auth profiles in cooldown (due to rate limiting) were being attempted,
causing unnecessary retries and delays. This fix ensures:

1. Initial profile selection skips profiles in cooldown
2. Profile rotation (after failures) skips cooldown profiles
3. Clear error message when all profiles are unavailable

Tests added:
- Skips profiles in cooldown during initial selection
- Skips profiles in cooldown when rotating after failure

Fixes #1316

											
										
										
											2026-01-22 10:04:56 +01:00
+								          if (candidate && isProfileInCooldown(authStore, candidate)) {
 								            nextIndex += 1;
 								            continue;
 								          }
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								          try {
 								            await applyApiKeyInfo(candidate);
 								            profileIndex = nextIndex;
 								            thinkLevel = initialThinkLevel;
 								            attemptedThinking.clear();
 								            return true;
 								          } catch (err) {
-												chore: Enable "curly" rule to avoid single-statement if confusion/errors.

											
										
										
											2026-01-31 16:19:20 +09:00
+								            if (candidate && candidate === lockedProfileId) {
 								              throw err;
 								            }
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								            nextIndex += 1;
 								          }
 								        }
 								        return false;
 								      };
 								      try {
-												fix(agents): honor explicit rate-limit cooldown probes in fallback runs

											
										
										
											2026-03-05 20:02:36 -08:00
+								        const autoProfileCandidates = profileCandidates.filter(
 								          (candidate): candidate is string =>
 								            typeof candidate === "string" && candidate.length > 0 && candidate !== lockedProfileId,
 								        );
 								        const allAutoProfilesInCooldown =
 								          autoProfileCandidates.length > 0 &&
 								          autoProfileCandidates.every((candidate) => isProfileInCooldown(authStore, candidate));
 								        const unavailableReason = allAutoProfilesInCooldown
 								          ? (resolveProfilesUnavailableReason({
 								              store: authStore,
 								              profileIds: autoProfileCandidates,
 								            }) ?? "rate_limit")
 								          : null;
-												fix(agents): handle overloaded failover separately (#38301)

* fix(agents): skip auth-profile failure on overload

* fix(agents): note overload auth-profile fallback fix

* fix(agents): classify overloaded failures separately

* fix(agents): back off before overload failover

* fix(agents): tighten overload probe and backoff state

* fix(agents): persist overloaded cooldown across runs

* fix(agents): tighten overloaded status handling

* test(agents): add overload regression coverage

* fix(agents): restore runner imports after rebase

* test(agents): add overload fallback integration coverage

* fix(agents): harden overloaded failover abort handling

* test(agents): tighten overload classifier coverage

* test(agents): cover all-overloaded fallback exhaustion

* fix(cron): retry overloaded fallback summaries

* fix(cron): treat HTTP 529 as overloaded retry
											
										
										
											2026-03-07 01:42:11 +03:00
+								        const allowTransientCooldownProbe =
 								          params.allowTransientCooldownProbe === true &&
-												fix(agents): honor explicit rate-limit cooldown probes in fallback runs

											
										
										
											2026-03-05 20:02:36 -08:00
+								          allAutoProfilesInCooldown &&
-												fix(agents): broaden 402 temporary-limit detection and allow billing cooldown probe (#38533)

Merged via squash.

Prepared head SHA: 282b9186c6f48fcdbf0c81c49f739e5e9ed2df23
Co-authored-by: xialonglee <22994703+xialonglee@users.noreply.github.com>
Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com>
Reviewed-by: @altaywtf
											
										
										
											2026-03-08 01:27:01 -06:00
+								          (unavailableReason === "rate_limit" ||
 								            unavailableReason === "overloaded" ||
 								            unavailableReason === "billing");
-												fix(agents): handle overloaded failover separately (#38301)

* fix(agents): skip auth-profile failure on overload

* fix(agents): note overload auth-profile fallback fix

* fix(agents): classify overloaded failures separately

* fix(agents): back off before overload failover

* fix(agents): tighten overload probe and backoff state

* fix(agents): persist overloaded cooldown across runs

* fix(agents): tighten overloaded status handling

* test(agents): add overload regression coverage

* fix(agents): restore runner imports after rebase

* test(agents): add overload fallback integration coverage

* fix(agents): harden overloaded failover abort handling

* test(agents): tighten overload classifier coverage

* test(agents): cover all-overloaded fallback exhaustion

* fix(cron): retry overloaded fallback summaries

* fix(cron): treat HTTP 529 as overloaded retry
											
										
										
											2026-03-07 01:42:11 +03:00
+								        let didTransientCooldownProbe = false;
-												fix(agents): honor explicit rate-limit cooldown probes in fallback runs

											
										
										
											2026-03-05 20:02:36 -08:00
-												fix(auth): skip auth profiles in cooldown during selection and rotation

Auth profiles in cooldown (due to rate limiting) were being attempted,
causing unnecessary retries and delays. This fix ensures:

1. Initial profile selection skips profiles in cooldown
2. Profile rotation (after failures) skips cooldown profiles
3. Clear error message when all profiles are unavailable

Tests added:
- Skips profiles in cooldown during initial selection
- Skips profiles in cooldown when rotating after failure

Fixes #1316

											
										
										
											2026-01-22 10:04:56 +01:00
+								        while (profileIndex < profileCandidates.length) {
 								          const candidate = profileCandidates[profileIndex];
-												fix(agents): honor explicit rate-limit cooldown probes in fallback runs

											
										
										
											2026-03-05 20:02:36 -08:00
+								          const inCooldown =
 								            candidate && candidate !== lockedProfileId && isProfileInCooldown(authStore, candidate);
 								          if (inCooldown) {
-												fix(agents): handle overloaded failover separately (#38301)

* fix(agents): skip auth-profile failure on overload

* fix(agents): note overload auth-profile fallback fix

* fix(agents): classify overloaded failures separately

* fix(agents): back off before overload failover

* fix(agents): tighten overload probe and backoff state

* fix(agents): persist overloaded cooldown across runs

* fix(agents): tighten overloaded status handling

* test(agents): add overload regression coverage

* fix(agents): restore runner imports after rebase

* test(agents): add overload fallback integration coverage

* fix(agents): harden overloaded failover abort handling

* test(agents): tighten overload classifier coverage

* test(agents): cover all-overloaded fallback exhaustion

* fix(cron): retry overloaded fallback summaries

* fix(cron): treat HTTP 529 as overloaded retry
											
										
										
											2026-03-07 01:42:11 +03:00
+								            if (allowTransientCooldownProbe && !didTransientCooldownProbe) {
 								              didTransientCooldownProbe = true;
-												fix(agents): honor explicit rate-limit cooldown probes in fallback runs

											
										
										
											2026-03-05 20:02:36 -08:00
+								              log.warn(
-												fix(agents): handle overloaded failover separately (#38301)

* fix(agents): skip auth-profile failure on overload

* fix(agents): note overload auth-profile fallback fix

* fix(agents): classify overloaded failures separately

* fix(agents): back off before overload failover

* fix(agents): tighten overload probe and backoff state

* fix(agents): persist overloaded cooldown across runs

* fix(agents): tighten overloaded status handling

* test(agents): add overload regression coverage

* fix(agents): restore runner imports after rebase

* test(agents): add overload fallback integration coverage

* fix(agents): harden overloaded failover abort handling

* test(agents): tighten overload classifier coverage

* test(agents): cover all-overloaded fallback exhaustion

* fix(cron): retry overloaded fallback summaries

* fix(cron): treat HTTP 529 as overloaded retry
											
										
										
											2026-03-07 01:42:11 +03:00
+								                `probing cooldowned auth profile for ${provider}/${modelId} due to ${unavailableReason ?? "transient"} unavailability`,
-												fix(agents): honor explicit rate-limit cooldown probes in fallback runs

											
										
										
											2026-03-05 20:02:36 -08:00
+								              );
 								            } else {
 								              profileIndex += 1;
 								              continue;
 								            }
-												fix(auth): skip auth profiles in cooldown during selection and rotation

Auth profiles in cooldown (due to rate limiting) were being attempted,
causing unnecessary retries and delays. This fix ensures:

1. Initial profile selection skips profiles in cooldown
2. Profile rotation (after failures) skips cooldown profiles
3. Clear error message when all profiles are unavailable

Tests added:
- Skips profiles in cooldown during initial selection
- Skips profiles in cooldown when rotating after failure

Fixes #1316

											
										
										
											2026-01-22 10:04:56 +01:00
+								          }
 								          await applyApiKeyInfo(profileCandidates[profileIndex]);
 								          break;
 								        }
 								        if (profileIndex >= profileCandidates.length) {
-												fix: trigger fallback on auth profile exhaustion

											
										
										
											2026-01-24 06:14:17 +00:00
+								          throwAuthProfileFailover({ allInCooldown: true });
-												fix(auth): skip auth profiles in cooldown during selection and rotation

Auth profiles in cooldown (due to rate limiting) were being attempted,
causing unnecessary retries and delays. This fix ensures:

1. Initial profile selection skips profiles in cooldown
2. Profile rotation (after failures) skips cooldown profiles
3. Clear error message when all profiles are unavailable

Tests added:
- Skips profiles in cooldown during initial selection
- Skips profiles in cooldown when rotating after failure

Fixes #1316

											
										
										
											2026-01-22 10:04:56 +01:00
+								        }
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								      } catch (err) {
-												chore: Enable "curly" rule to avoid single-statement if confusion/errors.

											
										
										
											2026-01-31 16:19:20 +09:00
+								        if (err instanceof FailoverError) {
 								          throw err;
 								        }
-												fix: trigger fallback on auth profile exhaustion

											
										
										
											2026-01-24 06:14:17 +00:00
+								        if (profileCandidates[profileIndex] === lockedProfileId) {
 								          throwAuthProfileFailover({ allInCooldown: false, error: err });
 								        }
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								        const advanced = await advanceAuthProfile();
-												fix: trigger fallback on auth profile exhaustion

											
										
										
											2026-01-24 06:14:17 +00:00
+								        if (!advanced) {
 								          throwAuthProfileFailover({ allInCooldown: false, error: err });
 								        }
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								      }
-												fix: refresh Copilot token before expiry and retry on auth errors

GitHub Copilot API tokens expire after ~30 minutes. When OpenClaw spawns
a long-running subagent using Copilot as the provider, the token would
expire mid-session with no recovery mechanism, causing 401 auth errors.

This commit adds:
- Periodic token refresh scheduled 5 minutes before expiry
- Auth error detection with automatic token refresh and single retry
- Proper timer cleanup on session shutdown to prevent leaks

The implementation uses a per-attempt retry flag to ensure each auth
error can trigger one refresh+retry cycle without creating infinite
retry loops.

🤖 AI-assisted: This fix was developed with GitHub Copilot CLI assistance.
Testing: Fully tested with 3 new unit tests covering auth retry, retry
reset, and timer cleanup scenarios. All 11 auth rotation tests pass.

											
										
										
											2026-02-04 09:13:59 -03:00
+								      const maybeRefreshCopilotForAuthError = async (
 								        errorText: string,
 								        retried: boolean,
 								      ): Promise<boolean> => {
 								        if (!copilotTokenState || retried) {
 								          return false;
 								        }
 								        if (!isFailoverErrorMessage(errorText)) {
 								          return false;
 								        }
 								        if (classifyFailoverReason(errorText) !== "auth") {
 								          return false;
 								        }
 								        try {
 								          await refreshCopilotToken("auth-error");
 								          scheduleCopilotRefresh();
 								          return true;
 								        } catch {
 								          return false;
 								        }
 								      };
-												fix: allow multiple compaction retries on context overflow (#8928)

Previously, overflowCompactionAttempted was a boolean flag set once, preventing
recovery when a single compaction wasn't enough. Change to a counter allowing up
to 3 attempts before giving up. Also add diagnostic logging on overflow events to
help debug early-overflow issues.

Fixes sessions that hit context overflow during long agentic turns with many tool
calls, where one compaction round isn't sufficient to bring context below limits.
											
										
										
											2026-02-05 17:58:37 -04:00
+								      const MAX_OVERFLOW_COMPACTION_ATTEMPTS = 3;
-												fix(agents): raise dynamic retry cap budget

											
										
										
											2026-02-21 15:41:03 +01:00
+								      const MAX_RUN_LOOP_ITERATIONS = resolveMaxRunRetryIterations(profileCandidates.length);
-												fix: allow multiple compaction retries on context overflow (#8928)

Previously, overflowCompactionAttempted was a boolean flag set once, preventing
recovery when a single compaction wasn't enough. Change to a counter allowing up
to 3 attempts before giving up. Also add diagnostic logging on overflow events to
help debug early-overflow issues.

Fixes sessions that hit context overflow during long agentic turns with many tool
calls, where one compaction round isn't sufficient to bring context below limits.
											
										
										
											2026-02-05 17:58:37 -04:00
+								      let overflowCompactionAttempts = 0;
-												fix: recover from context overflow caused by oversized tool results (#11579)

* fix: gracefully handle oversized tool results causing context overflow

When a subagent reads a very large file or gets a huge tool result (e.g.,
gh pr diff on a massive PR), it can exceed the model's context window in
a single prompt. Auto-compaction can't help because there's no older
history to compact — just one giant tool result.

This adds two layers of defense:

1. Pre-emptive: Hard cap on tool result size (400K chars ≈ 100K tokens)
   applied in the session tool result guard before persistence. This
   prevents extremely large tool results from being stored in full,
   regardless of model context window size.

2. Recovery: When context overflow is detected and compaction fails,
   scan session messages for oversized tool results relative to the
   model's actual context window (30% max share). If found, truncate
   them in the session via branching (creating a new branch with
   truncated content) and retry the prompt.

The truncation preserves the beginning of the content (most useful for
understanding what was read) and appends a notice explaining the
truncation and suggesting offset/limit parameters for targeted reads.

Includes comprehensive tests for:
- Text truncation with newline-boundary awareness
- Context-window-proportional size calculation
- In-memory message truncation
- Oversized detection heuristics
- Guard-level size capping during persistence

* fix: prep fixes for tool result truncation PR (#11579) (thanks @tyler6204)
											
										
										
											2026-02-07 17:40:51 -08:00
+								      let toolResultTruncationAttempted = false;
-												Agent: unify bootstrap truncation warning handling (#32769)

Merged via squash.

Prepared head SHA: 5d6d4ddfa620011e267d892b402751847d5ac0c3
Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com>
Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com>
Reviewed-by: @gumadeiras

											
										
										
											2026-03-03 16:28:38 -05:00
+								      let bootstrapPromptWarningSignaturesSeen =
 								        params.bootstrapPromptWarningSignaturesSeen ??
 								        (params.bootstrapPromptWarningSignature ? [params.bootstrapPromptWarningSignature] : []);
-												fix: context overflow compaction and subagent announce improvements (#11664) (thanks @tyler6204)

* initial commit

* feat: implement deriveSessionTotalTokens function and update usage tests

* Added deriveSessionTotalTokens function to calculate total tokens based on usage and context tokens.
* Updated usage tests to include cases for derived session total tokens.
* Refactored session usage calculations in multiple files to utilize the new function for improved accuracy.

* fix: restore overflow truncation fallback + changelog/test hardening (#11551) (thanks @tyler6204)
											
										
										
											2026-02-07 20:02:32 -08:00
+								      const usageAccumulator = createUsageAccumulator();
-												fix(agents): stabilize overflow compaction retries and session context accounting (openclaw#14102) thanks @vpesh

Verified:
- CI checks for commit 86a7ecb45ebf0be61dce9261398000524fd9fab6
- Rebase conflict resolution for compatibility with latest main

Co-authored-by: vpesh <9496634+vpesh@users.noreply.github.com>

											
										
										
											2026-02-13 00:53:13 +01:00
+								      let lastRunPromptUsage: ReturnType<typeof normalizeUsage> | undefined;
-												fix: context overflow compaction and subagent announce improvements (#11664) (thanks @tyler6204)

* initial commit

* feat: implement deriveSessionTotalTokens function and update usage tests

* Added deriveSessionTotalTokens function to calculate total tokens based on usage and context tokens.
* Updated usage tests to include cases for derived session total tokens.
* Refactored session usage calculations in multiple files to utilize the new function for improved accuracy.

* fix: restore overflow truncation fallback + changelog/test hardening (#11551) (thanks @tyler6204)
											
										
										
											2026-02-07 20:02:32 -08:00
+								      let autoCompactionCount = 0;
-												fix(agents): cap embedded runner retry loop

											
										
										
											2026-02-21 15:35:45 +01:00
+								      let runLoopIterations = 0;
-												fix(agents): handle overloaded failover separately (#38301)

* fix(agents): skip auth-profile failure on overload

* fix(agents): note overload auth-profile fallback fix

* fix(agents): classify overloaded failures separately

* fix(agents): back off before overload failover

* fix(agents): tighten overload probe and backoff state

* fix(agents): persist overloaded cooldown across runs

* fix(agents): tighten overloaded status handling

* test(agents): add overload regression coverage

* fix(agents): restore runner imports after rebase

* test(agents): add overload fallback integration coverage

* fix(agents): harden overloaded failover abort handling

* test(agents): tighten overload classifier coverage

* test(agents): cover all-overloaded fallback exhaustion

* fix(cron): retry overloaded fallback summaries

* fix(cron): treat HTTP 529 as overloaded retry
											
										
										
											2026-03-07 01:42:11 +03:00
+								      let overloadFailoverAttempts = 0;
-												fix(agents): stop param shadowing in auth failure marker

											
										
										
											2026-02-22 21:00:17 +05:30
+								      const maybeMarkAuthProfileFailure = async (failure: {
-												refactor: dedupe auth-profile failure marking and rotation test setup

											
										
										
											2026-02-22 15:43:57 +01:00
+								        profileId?: string;
-												fix(agents): handle overloaded failover separately (#38301)

* fix(agents): skip auth-profile failure on overload

* fix(agents): note overload auth-profile fallback fix

* fix(agents): classify overloaded failures separately

* fix(agents): back off before overload failover

* fix(agents): tighten overload probe and backoff state

* fix(agents): persist overloaded cooldown across runs

* fix(agents): tighten overloaded status handling

* test(agents): add overload regression coverage

* fix(agents): restore runner imports after rebase

* test(agents): add overload fallback integration coverage

* fix(agents): harden overloaded failover abort handling

* test(agents): tighten overload classifier coverage

* test(agents): cover all-overloaded fallback exhaustion

* fix(cron): retry overloaded fallback summaries

* fix(cron): treat HTTP 529 as overloaded retry
											
										
										
											2026-03-07 01:42:11 +03:00
+								        reason?: AuthProfileFailureReason | null;
-												Agents: fix embedded auth-profile failure helper typing

											
										
										
											2026-02-22 15:03:34 +00:00
+								        config?: RunEmbeddedPiAgentParams["config"];
 								        agentDir?: RunEmbeddedPiAgentParams["agentDir"];
-												refactor: dedupe auth-profile failure marking and rotation test setup

											
										
										
											2026-02-22 15:43:57 +01:00
+								      }) => {
-												fix(agents): stop param shadowing in auth failure marker

											
										
										
											2026-02-22 21:00:17 +05:30
+								        const { profileId, reason } = failure;
-												refactor: dedupe auth-profile failure marking and rotation test setup

											
										
										
											2026-02-22 15:43:57 +01:00
+								        if (!profileId || !reason || reason === "timeout") {
 								          return;
 								        }
 								        await markAuthProfileFailure({
 								          store: authStore,
 								          profileId,
 								          reason,
 								          cfg: params.config,
-												fix(agents): stop param shadowing in auth failure marker

											
										
										
											2026-02-22 21:00:17 +05:30
+								          agentDir,
-												Agents: add fallback error observations (#41337)

Merged via squash.

Prepared head SHA: 852469c82ff28fb0e1be7f1019f5283e712c4283
Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com>
Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com>
Reviewed-by: @altaywtf
											
										
										
											2026-03-10 01:12:10 +03:00
+								          runId: params.runId,
-												refactor: dedupe auth-profile failure marking and rotation test setup

											
										
										
											2026-02-22 15:43:57 +01:00
+								        });
 								      };
-												fix(agents): handle overloaded failover separately (#38301)

* fix(agents): skip auth-profile failure on overload

* fix(agents): note overload auth-profile fallback fix

* fix(agents): classify overloaded failures separately

* fix(agents): back off before overload failover

* fix(agents): tighten overload probe and backoff state

* fix(agents): persist overloaded cooldown across runs

* fix(agents): tighten overloaded status handling

* test(agents): add overload regression coverage

* fix(agents): restore runner imports after rebase

* test(agents): add overload fallback integration coverage

* fix(agents): harden overloaded failover abort handling

* test(agents): tighten overload classifier coverage

* test(agents): cover all-overloaded fallback exhaustion

* fix(cron): retry overloaded fallback summaries

* fix(cron): treat HTTP 529 as overloaded retry
											
										
										
											2026-03-07 01:42:11 +03:00
+								      const resolveAuthProfileFailureReason = (
 								        failoverReason: FailoverReason | null,
 								      ): AuthProfileFailureReason | null => {
 								        // Timeouts are transport/model-path failures, not auth health signals,
 								        // so they should not persist auth-profile failure state.
 								        if (!failoverReason || failoverReason === "timeout") {
 								          return null;
 								        }
 								        return failoverReason;
 								      };
 								      const maybeBackoffBeforeOverloadFailover = async (reason: FailoverReason | null) => {
 								        if (reason !== "overloaded") {
 								          return;
 								        }
 								        overloadFailoverAttempts += 1;
 								        const delayMs = computeBackoff(OVERLOAD_FAILOVER_BACKOFF_POLICY, overloadFailoverAttempts);
 								        log.warn(
 								          `overload backoff before failover for ${provider}/${modelId}: attempt=${overloadFailoverAttempts} delayMs=${delayMs}`,
 								        );
 								        try {
 								          await sleepWithAbort(delayMs, params.abortSignal);
 								        } catch (err) {
 								          if (params.abortSignal?.aborted) {
 								            const abortErr = new Error("Operation aborted", { cause: err });
 								            abortErr.name = "AbortError";
 								            throw abortErr;
 								          }
 								          throw err;
 								        }
 								      };
-												feature(context): extend plugin system to support custom context management (#22201)

* feat(context-engine): add ContextEngine interface and registry

Introduce the pluggable ContextEngine abstraction that allows external
plugins to register custom context management strategies.

- ContextEngine interface with lifecycle methods: bootstrap, ingest,
  ingestBatch, afterTurn, assemble, compact, prepareSubagentSpawn,
  onSubagentEnded, dispose
- Module-level singleton registry with registerContextEngine() and
  resolveContextEngine() (config-driven slot selection)
- LegacyContextEngine: pass-through implementation wrapping existing
  compaction behavior for 100% backward compatibility
- ensureContextEnginesInitialized() guard for safe one-time registration
- 19 tests covering contract, registry, resolution, and legacy parity

* feat(plugins): add context-engine slot and registerContextEngine API

Wire the ContextEngine abstraction into the plugin system so external
plugins can register context engines via the standard plugin API.

- Add 'context-engine' to PluginKind union type
- Add 'contextEngine' slot to PluginSlotsConfig (default: 'legacy')
- Wire registerContextEngine() through OpenClawPluginApi
- Export ContextEngine types from plugin-sdk for external consumers
- Restore proper slot-based resolution in registry

* feat(context-engine): wire ContextEngine into agent run lifecycle

Integrate the ContextEngine abstraction into the core agent run path:

- Resolve context engine once per run (reused across retries)
- Bootstrap: hydrate canonical store from session file on first run
- Assemble: route context assembly through pluggable engine
- Auto-compaction guard: disable built-in auto-compaction when
  the engine declares ownsCompaction (prevents double-compaction)
- AfterTurn: post-turn lifecycle hook for ingest + background
  compaction decisions
- Overflow compaction: route through contextEngine.compact()
- Dispose: clean up engine resources in finally block
- Notify context engine on subagent lifecycle events

Legacy engine: all lifecycle methods are pass-through/no-op, preserving
100% backward compatibility for users without a context engine plugin.

* feat(plugins): add scoped subagent methods and gateway request scope

Expose runtime.subagent.{run, waitForRun, getSession, deleteSession}
so external plugins can spawn sub-agent sessions without raw gateway
dispatch access.

Uses AsyncLocalStorage request-scope bridge to dispatch internally via
handleGatewayRequest with a synthetic operator client. Methods are only
available during gateway request handling.

- Symbol.for-backed global singleton for cross-module-reload safety
- Fallback gateway context for non-WS dispatch paths (Telegram/WhatsApp)
- Set gateway request scope for all handlers, not just plugin handlers
- 3 staleness tests for fallback context hardening

* feat(context-engine): route /compact and sessions.get through context engine

Wire the /compact command and sessions.get handler through the pluggable
ContextEngine interface.

- Thread tokenBudget and force parameters to context engine compact
- Route /compact through contextEngine.compact() when registered
- Wire sessions.get as runtime alias for plugin subagent dispatch
- Add .pebbles/ to .gitignore

* style: format with oxfmt 0.33.0

Fix duplicate import (ControlUiRootState in server.impl.ts) and
import ordering across all changed files.

* fix: update extension test mocks for context-engine types

Add missing subagent property to bluebubbles PluginRuntime mock.
Add missing registerContextEngine to lobster OpenClawPluginApi mock.

* fix(subagents): keep deferred delete cleanup retryable

* style: format run attempt for CI

* fix(rebase): remove duplicate embedded-run imports

* test: add missing gateway context mock export

* fix: pass resolved auth profile into afterTurn compaction

Ensure the embedded runner forwards resolved auth profile context into
legacy context-engine compaction params on the normal afterTurn path,
matching overflow compaction behavior. This allows downstream LCM
summarization to use the intended provider auth/profile consistently.

Also fix strict TS typing in external-link token dedupe and align an
attempt unit test reasoningLevel value with the current ReasoningLevel
enum.

Regeneration-Prompt: |
  We were debugging context-engine compaction where downstream summary
  calls were missing the right auth/profile context in normal afterTurn
  flow, while overflow compaction already propagated it. Preserve current
  behavior and keep changes additive: thread the resolved authProfileId
  through run -> attempt -> legacy compaction param builder without
  broad refactors.

  Add tests that prove the auth profile is included in afterTurn legacy
  params and that overflow compaction still passes it through run
  attempts. Keep existing APIs stable, and only adjust small type issues
  needed for strict compilation.

* fix: remove duplicate imports from rebase

* feat: add context-engine system prompt additions

* fix(rebase): dedupe attempt import declarations

* test: fix fetch mock typing in ollama autodiscovery

* fix(test): add registerContextEngine to diffs extension mock APIs

* test(windows): use path.delimiter in ios-team-id fixture PATH

* test(cron): add model formatting and precedence edge case tests

Covers:
- Provider/model string splitting (whitespace, nested paths, empty segments)
- Provider normalization (casing, aliases like bedrock→amazon-bedrock)
- Anthropic model alias normalization (opus-4.5→claude-opus-4-5)
- Precedence: job payload > session override > config default
- Sequential runs with different providers (CI flake regression pattern)
- forceNew session preserving stored model overrides
- Whitespace/empty model string edge cases
- Config model as string vs object format

* test(cron): fix model formatting test config types

* test(phone-control): add registerContextEngine to mock API

* fix: re-export ChannelKind from config-reload-plan

* fix: add subagent mock to plugin-runtime-mock test util

* docs: add changelog fragment for context engine PR #22201
											
										
										
											2026-03-06 05:31:59 -08:00
+								      // Resolve the context engine once and reuse across retries to avoid
 								      // repeated initialization/connection overhead per attempt.
 								      ensureContextEnginesInitialized();
 								      const contextEngine = await resolveContextEngine(params.config);
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								      try {
-												fix: refresh Copilot token before expiry and retry on auth errors

GitHub Copilot API tokens expire after ~30 minutes. When OpenClaw spawns
a long-running subagent using Copilot as the provider, the token would
expire mid-session with no recovery mechanism, causing 401 auth errors.

This commit adds:
- Periodic token refresh scheduled 5 minutes before expiry
- Auth error detection with automatic token refresh and single retry
- Proper timer cleanup on session shutdown to prevent leaks

The implementation uses a per-attempt retry flag to ensure each auth
error can trigger one refresh+retry cycle without creating infinite
retry loops.

🤖 AI-assisted: This fix was developed with GitHub Copilot CLI assistance.
Testing: Fully tested with 3 new unit tests covering auth retry, retry
reset, and timer cleanup scenarios. All 11 auth rotation tests pass.

											
										
										
											2026-02-04 09:13:59 -03:00
+								        let authRetryPending = false;
-												agents: preserve totalTokens on request failure instead of using contextWindow (#34275)

Merged via squash.

Prepared head SHA: f9d111d0a79a07815d476356e98a28df3a0000ba
Co-authored-by: RealKai42 <44634134+RealKai42@users.noreply.github.com>
Co-authored-by: jalehman <550978+jalehman@users.noreply.github.com>
Reviewed-by: @jalehman

											
										
										
											2026-03-05 04:02:22 +08:00
+								        // Hoisted so the retry-limit error path can use the most recent API total.
 								        let lastTurnTotal: number | undefined;
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								        while (true) {
-												fix(agents): cap embedded runner retry loop

											
										
										
											2026-02-21 15:35:45 +01:00
+								          if (runLoopIterations >= MAX_RUN_LOOP_ITERATIONS) {
-												fix(agents): raise dynamic retry cap budget

											
										
										
											2026-02-21 15:41:03 +01:00
+								            const message =
 								              `Exceeded retry limit after ${runLoopIterations} attempts ` +
 								              `(max=${MAX_RUN_LOOP_ITERATIONS}).`;
-												fix(agents): cap embedded runner retry loop

											
										
										
											2026-02-21 15:35:45 +01:00
+								            log.error(
 								              `[run-retry-limit] sessionKey=${params.sessionKey ?? params.sessionId} ` +
-												fix(agents): raise dynamic retry cap budget

											
										
										
											2026-02-21 15:41:03 +01:00
+								                `provider=${provider}/${modelId} attempts=${runLoopIterations} ` +
 								                `maxAttempts=${MAX_RUN_LOOP_ITERATIONS}`,
-												fix(agents): cap embedded runner retry loop

											
										
										
											2026-02-21 15:35:45 +01:00
+								            );
 								            return {
 								              payloads: [
 								                {
 								                  text:
 								                    "Request failed after repeated internal retries. " +
 								                    "Please try again, or use /new to start a fresh session.",
 								                  isError: true,
 								                },
 								              ],
 								              meta: {
 								                durationMs: Date.now() - started,
-												agents: preserve totalTokens on request failure instead of using contextWindow (#34275)

Merged via squash.

Prepared head SHA: f9d111d0a79a07815d476356e98a28df3a0000ba
Co-authored-by: RealKai42 <44634134+RealKai42@users.noreply.github.com>
Co-authored-by: jalehman <550978+jalehman@users.noreply.github.com>
Reviewed-by: @jalehman

											
										
										
											2026-03-05 04:02:22 +08:00
+								                agentMeta: buildErrorAgentMeta({
-												fix(agents): cap embedded runner retry loop

											
										
										
											2026-02-21 15:35:45 +01:00
+								                  sessionId: params.sessionId,
 								                  provider,
 								                  model: model.id,
-												agents: preserve totalTokens on request failure instead of using contextWindow (#34275)

Merged via squash.

Prepared head SHA: f9d111d0a79a07815d476356e98a28df3a0000ba
Co-authored-by: RealKai42 <44634134+RealKai42@users.noreply.github.com>
Co-authored-by: jalehman <550978+jalehman@users.noreply.github.com>
Reviewed-by: @jalehman

											
										
										
											2026-03-05 04:02:22 +08:00
+								                  usageAccumulator,
 								                  lastRunPromptUsage,
 								                  lastTurnTotal,
 								                }),
-												fix(agents): cap embedded runner retry loop

											
										
										
											2026-02-21 15:35:45 +01:00
+								                error: { kind: "retry_limit", message },
 								              },
 								            };
 								          }
 								          runLoopIterations += 1;
-												fix: refresh Copilot token before expiry and retry on auth errors

GitHub Copilot API tokens expire after ~30 minutes. When OpenClaw spawns
a long-running subagent using Copilot as the provider, the token would
expire mid-session with no recovery mechanism, causing 401 auth errors.

This commit adds:
- Periodic token refresh scheduled 5 minutes before expiry
- Auth error detection with automatic token refresh and single retry
- Proper timer cleanup on session shutdown to prevent leaks

The implementation uses a per-attempt retry flag to ensure each auth
error can trigger one refresh+retry cycle without creating infinite
retry loops.

🤖 AI-assisted: This fix was developed with GitHub Copilot CLI assistance.
Testing: Fully tested with 3 new unit tests covering auth retry, retry
reset, and timer cleanup scenarios. All 11 auth rotation tests pass.

											
										
										
											2026-02-04 09:13:59 -03:00
+								          const copilotAuthRetry = authRetryPending;
 								          authRetryPending = false;
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								          attemptedThinking.add(thinkLevel);
 								          await fs.mkdir(resolvedWorkspace, { recursive: true });
-												fix: guard anthropic refusal trigger

											
										
										
											2026-01-21 07:28:11 +00:00
+								          const prompt =
 								            provider === "anthropic" ? scrubAnthropicRefusalMagic(params.prompt) : params.prompt;
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								          const attempt = await runEmbeddedAttempt({
 								            sessionId: params.sessionId,
 								            sessionKey: params.sessionKey,
-												feat(hooks): add trigger and channelId to plugin hook agent context (#28623)

* feat(hooks): add trigger and channelId to plugin hook agent context

Adds `trigger` and `channelId` fields to `PluginHookAgentContext` so
plugins can determine what initiated the agent run and which channel
it originated from, without session-key parsing or Redis bridging.

trigger values: "user", "heartbeat", "cron", "memory"
channelId values: "telegram", "discord", "whatsapp", etc.

Both fields are threaded through run.ts and attempt.ts hookCtx so all
hook phases receive them (before_model_resolve, before_prompt_build,
before_agent_start, llm_input, llm_output, agent_end).

channelId falls back from messageChannel to messageProvider when the
former is not set. followup-runner passes originatingChannel so queued
followup runs also carry channel context.

* docs(changelog): note hook context parity fix for #28623

---------

Co-authored-by: Vincent Koc <vincentkoc@ieee.org>
											
										
										
											2026-03-03 11:39:20 +10:00
+								            trigger: params.trigger,
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								            messageChannel: params.messageChannel,
 								            messageProvider: params.messageProvider,
 								            agentAccountId: params.agentAccountId,
-												fix: preserve subagent thread routing (#1241)

Thanks @gnarco.

Co-authored-by: gnarco <gnarco@users.noreply.github.com>

											
										
										
											2026-01-20 17:22:07 +00:00
+								            messageTo: params.messageTo,
 								            messageThreadId: params.messageThreadId,
-												Channels: add per-group tool policies

											
										
										
											2026-01-24 15:35:05 +13:00
+								            groupId: params.groupId,
 								            groupChannel: params.groupChannel,
 								            groupSpace: params.groupSpace,
-												fix: enforce group tool policy inheritance for subagents (#1557) (thanks @adam91holt)

											
										
										
											2026-01-24 05:49:23 +00:00
+								            spawnedBy: params.spawnedBy,
-												fix(pi-embedded-runner): propagate sender identity to fix Feishu doc create auto-grant (#32915)

Merged via squash.

Prepared head SHA: efb229307559ad37062b454da444567f5dca8a96
Co-authored-by: cszhouwei <1811726+cszhouwei@users.noreply.github.com>
Co-authored-by: jalehman <550978+jalehman@users.noreply.github.com>
Reviewed-by: @jalehman
											
										
										
											2026-03-07 06:31:15 +08:00
+								            senderId: params.senderId,
 								            senderName: params.senderName,
 								            senderUsername: params.senderUsername,
 								            senderE164: params.senderE164,
-												Security: owner-only tools + command auth hardening (#9202)

* Security: gate whatsapp_login by sender auth

* Security: treat undefined senderAuthorized as unauthorized (opt-in)

* fix: gate whatsapp_login to owner senders (#8768) (thanks @victormier)

* fix: add explicit owner allowlist for tools (#8768) (thanks @victormier)

* fix: normalize escaped newlines in send actions (#8768) (thanks @victormier)

---------

Co-authored-by: Victor Mier <victormier@gmail.com>
											
										
										
											2026-02-04 19:49:36 -05:00
+								            senderIsOwner: params.senderIsOwner,
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								            currentChannelId: params.currentChannelId,
 								            currentThreadTs: params.currentThreadTs,
-												fix(telegram): make reaction handling soft-fail and message-id resilient (#20236)

* Telegram: soft-fail reactions and fallback to inbound message id

* Telegram: soft-fail missing reaction message id

* Update CHANGELOG.md

---------

Co-authored-by: Vincent Koc <vincentkoc@ieee.org>
											
										
										
											2026-02-23 23:25:14 +08:00
+								            currentMessageId: params.currentMessageId,
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								            replyToMode: params.replyToMode,
 								            hasRepliedRef: params.hasRepliedRef,
 								            sessionFile: params.sessionFile,
-												fix: guard resolveUserPath against undefined input (#10176)

* fix: guard resolveUserPath against undefined input

When subagent spawner omits workspaceDir, resolveUserPath receives
undefined and crashes on .trim().  Add a falsy guard that falls back
to process.cwd(), matching the behavior callers already expect.

Closes #10089

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix: harden runner workspace fallback (#10176) (thanks @Yida-Dev)

* fix: harden workspace fallback scoping (#10176) (thanks @Yida-Dev)

* refactor: centralize workspace fallback classification and redaction (#10176) (thanks @Yida-Dev)

* test: remove explicit any from utils mock (#10176) (thanks @Yida-Dev)

* security: reject malformed agent session keys for workspace resolution (#10176) (thanks @Yida-Dev)

---------

Co-authored-by: Yida-Dev <reyifeijun@gmail.com>
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
Co-authored-by: Gustavo Madeira Santana <gumadeiras@gmail.com>
											
										
										
											2026-02-07 01:16:58 +07:00
+								            workspaceDir: resolvedWorkspace,
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								            agentDir,
 								            config: params.config,
-												feature(context): extend plugin system to support custom context management (#22201)

* feat(context-engine): add ContextEngine interface and registry

Introduce the pluggable ContextEngine abstraction that allows external
plugins to register custom context management strategies.

- ContextEngine interface with lifecycle methods: bootstrap, ingest,
  ingestBatch, afterTurn, assemble, compact, prepareSubagentSpawn,
  onSubagentEnded, dispose
- Module-level singleton registry with registerContextEngine() and
  resolveContextEngine() (config-driven slot selection)
- LegacyContextEngine: pass-through implementation wrapping existing
  compaction behavior for 100% backward compatibility
- ensureContextEnginesInitialized() guard for safe one-time registration
- 19 tests covering contract, registry, resolution, and legacy parity

* feat(plugins): add context-engine slot and registerContextEngine API

Wire the ContextEngine abstraction into the plugin system so external
plugins can register context engines via the standard plugin API.

- Add 'context-engine' to PluginKind union type
- Add 'contextEngine' slot to PluginSlotsConfig (default: 'legacy')
- Wire registerContextEngine() through OpenClawPluginApi
- Export ContextEngine types from plugin-sdk for external consumers
- Restore proper slot-based resolution in registry

* feat(context-engine): wire ContextEngine into agent run lifecycle

Integrate the ContextEngine abstraction into the core agent run path:

- Resolve context engine once per run (reused across retries)
- Bootstrap: hydrate canonical store from session file on first run
- Assemble: route context assembly through pluggable engine
- Auto-compaction guard: disable built-in auto-compaction when
  the engine declares ownsCompaction (prevents double-compaction)
- AfterTurn: post-turn lifecycle hook for ingest + background
  compaction decisions
- Overflow compaction: route through contextEngine.compact()
- Dispose: clean up engine resources in finally block
- Notify context engine on subagent lifecycle events

Legacy engine: all lifecycle methods are pass-through/no-op, preserving
100% backward compatibility for users without a context engine plugin.

* feat(plugins): add scoped subagent methods and gateway request scope

Expose runtime.subagent.{run, waitForRun, getSession, deleteSession}
so external plugins can spawn sub-agent sessions without raw gateway
dispatch access.

Uses AsyncLocalStorage request-scope bridge to dispatch internally via
handleGatewayRequest with a synthetic operator client. Methods are only
available during gateway request handling.

- Symbol.for-backed global singleton for cross-module-reload safety
- Fallback gateway context for non-WS dispatch paths (Telegram/WhatsApp)
- Set gateway request scope for all handlers, not just plugin handlers
- 3 staleness tests for fallback context hardening

* feat(context-engine): route /compact and sessions.get through context engine

Wire the /compact command and sessions.get handler through the pluggable
ContextEngine interface.

- Thread tokenBudget and force parameters to context engine compact
- Route /compact through contextEngine.compact() when registered
- Wire sessions.get as runtime alias for plugin subagent dispatch
- Add .pebbles/ to .gitignore

* style: format with oxfmt 0.33.0

Fix duplicate import (ControlUiRootState in server.impl.ts) and
import ordering across all changed files.

* fix: update extension test mocks for context-engine types

Add missing subagent property to bluebubbles PluginRuntime mock.
Add missing registerContextEngine to lobster OpenClawPluginApi mock.

* fix(subagents): keep deferred delete cleanup retryable

* style: format run attempt for CI

* fix(rebase): remove duplicate embedded-run imports

* test: add missing gateway context mock export

* fix: pass resolved auth profile into afterTurn compaction

Ensure the embedded runner forwards resolved auth profile context into
legacy context-engine compaction params on the normal afterTurn path,
matching overflow compaction behavior. This allows downstream LCM
summarization to use the intended provider auth/profile consistently.

Also fix strict TS typing in external-link token dedupe and align an
attempt unit test reasoningLevel value with the current ReasoningLevel
enum.

Regeneration-Prompt: |
  We were debugging context-engine compaction where downstream summary
  calls were missing the right auth/profile context in normal afterTurn
  flow, while overflow compaction already propagated it. Preserve current
  behavior and keep changes additive: thread the resolved authProfileId
  through run -> attempt -> legacy compaction param builder without
  broad refactors.

  Add tests that prove the auth profile is included in afterTurn legacy
  params and that overflow compaction still passes it through run
  attempts. Keep existing APIs stable, and only adjust small type issues
  needed for strict compilation.

* fix: remove duplicate imports from rebase

* feat: add context-engine system prompt additions

* fix(rebase): dedupe attempt import declarations

* test: fix fetch mock typing in ollama autodiscovery

* fix(test): add registerContextEngine to diffs extension mock APIs

* test(windows): use path.delimiter in ios-team-id fixture PATH

* test(cron): add model formatting and precedence edge case tests

Covers:
- Provider/model string splitting (whitespace, nested paths, empty segments)
- Provider normalization (casing, aliases like bedrock→amazon-bedrock)
- Anthropic model alias normalization (opus-4.5→claude-opus-4-5)
- Precedence: job payload > session override > config default
- Sequential runs with different providers (CI flake regression pattern)
- forceNew session preserving stored model overrides
- Whitespace/empty model string edge cases
- Config model as string vs object format

* test(cron): fix model formatting test config types

* test(phone-control): add registerContextEngine to mock API

* fix: re-export ChannelKind from config-reload-plan

* fix: add subagent mock to plugin-runtime-mock test util

* docs: add changelog fragment for context engine PR #22201
											
										
										
											2026-03-06 05:31:59 -08:00
+								            contextEngine,
 								            contextTokenBudget: ctxInfo.tokens,
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								            skillsSnapshot: params.skillsSnapshot,
-												fix: guard anthropic refusal trigger

											
										
										
											2026-01-21 07:28:11 +00:00
+								            prompt,
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								            images: params.images,
-												fix: publish llm-task docs and harden tool

											
										
										
											2026-01-24 01:44:36 +00:00
+								            disableTools: params.disableTools,
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								            provider,
 								            modelId,
-												fix(agents): apply contextTokens cap for compaction threshold (#39099)

Land #39099 by @MumuTW.

Co-authored-by: MumuTW <clothl47364@gmail.com>

											
										
										
											2026-03-07 19:18:48 +00:00
+								            model: effectiveModel,
-												feature(context): extend plugin system to support custom context management (#22201)

* feat(context-engine): add ContextEngine interface and registry

Introduce the pluggable ContextEngine abstraction that allows external
plugins to register custom context management strategies.

- ContextEngine interface with lifecycle methods: bootstrap, ingest,
  ingestBatch, afterTurn, assemble, compact, prepareSubagentSpawn,
  onSubagentEnded, dispose
- Module-level singleton registry with registerContextEngine() and
  resolveContextEngine() (config-driven slot selection)
- LegacyContextEngine: pass-through implementation wrapping existing
  compaction behavior for 100% backward compatibility
- ensureContextEnginesInitialized() guard for safe one-time registration
- 19 tests covering contract, registry, resolution, and legacy parity

* feat(plugins): add context-engine slot and registerContextEngine API

Wire the ContextEngine abstraction into the plugin system so external
plugins can register context engines via the standard plugin API.

- Add 'context-engine' to PluginKind union type
- Add 'contextEngine' slot to PluginSlotsConfig (default: 'legacy')
- Wire registerContextEngine() through OpenClawPluginApi
- Export ContextEngine types from plugin-sdk for external consumers
- Restore proper slot-based resolution in registry

* feat(context-engine): wire ContextEngine into agent run lifecycle

Integrate the ContextEngine abstraction into the core agent run path:

- Resolve context engine once per run (reused across retries)
- Bootstrap: hydrate canonical store from session file on first run
- Assemble: route context assembly through pluggable engine
- Auto-compaction guard: disable built-in auto-compaction when
  the engine declares ownsCompaction (prevents double-compaction)
- AfterTurn: post-turn lifecycle hook for ingest + background
  compaction decisions
- Overflow compaction: route through contextEngine.compact()
- Dispose: clean up engine resources in finally block
- Notify context engine on subagent lifecycle events

Legacy engine: all lifecycle methods are pass-through/no-op, preserving
100% backward compatibility for users without a context engine plugin.

* feat(plugins): add scoped subagent methods and gateway request scope

Expose runtime.subagent.{run, waitForRun, getSession, deleteSession}
so external plugins can spawn sub-agent sessions without raw gateway
dispatch access.

Uses AsyncLocalStorage request-scope bridge to dispatch internally via
handleGatewayRequest with a synthetic operator client. Methods are only
available during gateway request handling.

- Symbol.for-backed global singleton for cross-module-reload safety
- Fallback gateway context for non-WS dispatch paths (Telegram/WhatsApp)
- Set gateway request scope for all handlers, not just plugin handlers
- 3 staleness tests for fallback context hardening

* feat(context-engine): route /compact and sessions.get through context engine

Wire the /compact command and sessions.get handler through the pluggable
ContextEngine interface.

- Thread tokenBudget and force parameters to context engine compact
- Route /compact through contextEngine.compact() when registered
- Wire sessions.get as runtime alias for plugin subagent dispatch
- Add .pebbles/ to .gitignore

* style: format with oxfmt 0.33.0

Fix duplicate import (ControlUiRootState in server.impl.ts) and
import ordering across all changed files.

* fix: update extension test mocks for context-engine types

Add missing subagent property to bluebubbles PluginRuntime mock.
Add missing registerContextEngine to lobster OpenClawPluginApi mock.

* fix(subagents): keep deferred delete cleanup retryable

* style: format run attempt for CI

* fix(rebase): remove duplicate embedded-run imports

* test: add missing gateway context mock export

* fix: pass resolved auth profile into afterTurn compaction

Ensure the embedded runner forwards resolved auth profile context into
legacy context-engine compaction params on the normal afterTurn path,
matching overflow compaction behavior. This allows downstream LCM
summarization to use the intended provider auth/profile consistently.

Also fix strict TS typing in external-link token dedupe and align an
attempt unit test reasoningLevel value with the current ReasoningLevel
enum.

Regeneration-Prompt: |
  We were debugging context-engine compaction where downstream summary
  calls were missing the right auth/profile context in normal afterTurn
  flow, while overflow compaction already propagated it. Preserve current
  behavior and keep changes additive: thread the resolved authProfileId
  through run -> attempt -> legacy compaction param builder without
  broad refactors.

  Add tests that prove the auth profile is included in afterTurn legacy
  params and that overflow compaction still passes it through run
  attempts. Keep existing APIs stable, and only adjust small type issues
  needed for strict compilation.

* fix: remove duplicate imports from rebase

* feat: add context-engine system prompt additions

* fix(rebase): dedupe attempt import declarations

* test: fix fetch mock typing in ollama autodiscovery

* fix(test): add registerContextEngine to diffs extension mock APIs

* test(windows): use path.delimiter in ios-team-id fixture PATH

* test(cron): add model formatting and precedence edge case tests

Covers:
- Provider/model string splitting (whitespace, nested paths, empty segments)
- Provider normalization (casing, aliases like bedrock→amazon-bedrock)
- Anthropic model alias normalization (opus-4.5→claude-opus-4-5)
- Precedence: job payload > session override > config default
- Sequential runs with different providers (CI flake regression pattern)
- forceNew session preserving stored model overrides
- Whitespace/empty model string edge cases
- Config model as string vs object format

* test(cron): fix model formatting test config types

* test(phone-control): add registerContextEngine to mock API

* fix: re-export ChannelKind from config-reload-plan

* fix: add subagent mock to plugin-runtime-mock test util

* docs: add changelog fragment for context engine PR #22201
											
										
										
											2026-03-06 05:31:59 -08:00
+								            authProfileId: lastProfileId,
 								            authProfileIdSource: lockedProfileId ? "user" : "auto",
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								            authStorage,
 								            modelRegistry,
-												fix: guard resolveUserPath against undefined input (#10176)

* fix: guard resolveUserPath against undefined input

When subagent spawner omits workspaceDir, resolveUserPath receives
undefined and crashes on .trim().  Add a falsy guard that falls back
to process.cwd(), matching the behavior callers already expect.

Closes #10089

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix: harden runner workspace fallback (#10176) (thanks @Yida-Dev)

* fix: harden workspace fallback scoping (#10176) (thanks @Yida-Dev)

* refactor: centralize workspace fallback classification and redaction (#10176) (thanks @Yida-Dev)

* test: remove explicit any from utils mock (#10176) (thanks @Yida-Dev)

* security: reject malformed agent session keys for workspace resolution (#10176) (thanks @Yida-Dev)

---------

Co-authored-by: Yida-Dev <reyifeijun@gmail.com>
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
Co-authored-by: Gustavo Madeira Santana <gumadeiras@gmail.com>
											
										
										
											2026-02-07 01:16:58 +07:00
+								            agentId: workspaceResolution.agentId,
-												Plugins/Hooks: avoid duplicate before_agent_start executions

											
										
										
											2026-02-21 22:31:51 -08:00
+								            legacyBeforeAgentStartResult,
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								            thinkLevel,
 								            verboseLevel: params.verboseLevel,
 								            reasoningLevel: params.reasoningLevel,
-												fix: format verbose tool output by channel

											
										
										
											2026-01-17 10:17:57 +00:00
+								            toolResultFormat: resolvedToolResultFormat,
-												feat: add /exec session overrides

											
										
										
											2026-01-18 06:11:38 +00:00
+								            execOverrides: params.execOverrides,
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								            bashElevated: params.bashElevated,
 								            timeoutMs: params.timeoutMs,
 								            runId: params.runId,
 								            abortSignal: params.abortSignal,
 								            shouldEmitToolResult: params.shouldEmitToolResult,
-												feat: extend verbose tool feedback

											
										
										
											2026-01-17 05:33:27 +00:00
+								            shouldEmitToolOutput: params.shouldEmitToolOutput,
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								            onPartialReply: params.onPartialReply,
 								            onAssistantMessageStart: params.onAssistantMessageStart,
 								            onBlockReply: params.onBlockReply,
 								            onBlockReplyFlush: params.onBlockReplyFlush,
 								            blockReplyBreak: params.blockReplyBreak,
 								            blockReplyChunking: params.blockReplyChunking,
 								            onReasoningStream: params.onReasoningStream,
-												fix(telegram): fix streaming with extended thinking models overwriting previous messages/ also happens to Execution error (#17973)

Merged via /review-pr -> /prepare-pr -> /merge-pr.

Prepared head SHA: 34b52eead8faeb64995f61b9e7ab102711acd37b
Co-authored-by: Marvae <11957602+Marvae@users.noreply.github.com>
Co-authored-by: obviyus <22031114+obviyus@users.noreply.github.com>
Reviewed-by: @obviyus

											
										
										
											2026-02-16 21:24:34 +08:00
+								            onReasoningEnd: params.onReasoningEnd,
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								            onToolResult: params.onToolResult,
 								            onAgentEvent: params.onAgentEvent,
 								            extraSystemPrompt: params.extraSystemPrompt,
-												fix: preserve inter-session input provenance (thanks @anbecker)

											
										
										
											2026-02-13 02:01:53 +01:00
+								            inputProvenance: params.inputProvenance,
-												fix: expand /v1/responses inputs (#1229) (thanks @RyanLisse)

											
										
										
											2026-01-20 07:35:29 +00:00
+								            streamParams: params.streamParams,
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								            ownerNumbers: params.ownerNumbers,
 								            enforceFinalTag: params.enforceFinalTag,
-												Agent: unify bootstrap truncation warning handling (#32769)

Merged via squash.

Prepared head SHA: 5d6d4ddfa620011e267d892b402751847d5ac0c3
Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com>
Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com>
Reviewed-by: @gumadeiras

											
										
										
											2026-03-03 16:28:38 -05:00
+								            bootstrapPromptWarningSignaturesSeen,
 								            bootstrapPromptWarningSignature:
 								              bootstrapPromptWarningSignaturesSeen[bootstrapPromptWarningSignaturesSeen.length - 1],
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								          });
-												fix(agent): prevent session lock deadlock on timeout during compaction (#9855)

Merged via /review-pr -> /prepare-pr -> /merge-pr.

Prepared head SHA: 64a28900f183941a496a6fd5baaa9efcfb38f0f8
Co-authored-by: mverrilli <816450+mverrilli@users.noreply.github.com>
Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com>
Reviewed-by: @gumadeiras

											
										
										
											2026-02-14 14:24:20 -05:00
+								          const {
 								            aborted,
 								            promptError,
 								            timedOut,
 								            timedOutDuringCompaction,
 								            sessionIdUsed,
 								            lastAssistant,
 								          } = attempt;
-												Agent: unify bootstrap truncation warning handling (#32769)

Merged via squash.

Prepared head SHA: 5d6d4ddfa620011e267d892b402751847d5ac0c3
Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com>
Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com>
Reviewed-by: @gumadeiras

											
										
										
											2026-03-03 16:28:38 -05:00
+								          bootstrapPromptWarningSignaturesSeen =
 								            attempt.bootstrapPromptWarningSignaturesSeen ??
 								            (attempt.bootstrapPromptWarningSignature
 								              ? Array.from(
 								                  new Set([
 								                    ...bootstrapPromptWarningSignaturesSeen,
 								                    attempt.bootstrapPromptWarningSignature,
 								                  ]),
 								                )
 								              : bootstrapPromptWarningSignaturesSeen);
-												fix(agents): stabilize overflow compaction retries and session context accounting (openclaw#14102) thanks @vpesh

Verified:
- CI checks for commit 86a7ecb45ebf0be61dce9261398000524fd9fab6
- Rebase conflict resolution for compatibility with latest main

Co-authored-by: vpesh <9496634+vpesh@users.noreply.github.com>

											
										
										
											2026-02-13 00:53:13 +01:00
+								          const lastAssistantUsage = normalizeUsage(lastAssistant?.usage as UsageLike);
 								          const attemptUsage = attempt.attemptUsage ?? lastAssistantUsage;
 								          mergeUsageIntoAccumulator(usageAccumulator, attemptUsage);
 								          // Keep prompt size from the latest model call so session totalTokens
 								          // reflects current context usage, not accumulated tool-loop usage.
 								          lastRunPromptUsage = lastAssistantUsage ?? attemptUsage;
-												agents: preserve totalTokens on request failure instead of using contextWindow (#34275)

Merged via squash.

Prepared head SHA: f9d111d0a79a07815d476356e98a28df3a0000ba
Co-authored-by: RealKai42 <44634134+RealKai42@users.noreply.github.com>
Co-authored-by: jalehman <550978+jalehman@users.noreply.github.com>
Reviewed-by: @jalehman

											
										
										
											2026-03-05 04:02:22 +08:00
+								          lastTurnTotal = lastAssistantUsage?.total ?? attemptUsage?.total;
-												Agents: add nested subagent orchestration controls and reduce subagent token waste (#14447)

* Agents: add subagent orchestration controls

* Agents: add subagent orchestration controls (WIP uncommitted changes)

* feat(subagents): add depth-based spawn gating for sub-sub-agents

* feat(subagents): tool policy, registry, and announce chain for nested agents

* feat(subagents): system prompt, docs, changelog for nested sub-agents

* fix(subagents): prevent model fallback override, show model during active runs, and block context overflow fallback

Bug 1: When a session has an explicit model override (e.g., gpt/openai-codex),
the fallback candidate logic in resolveFallbackCandidates silently appended the
global primary model (opus) as a backstop. On reinjection/steer with a transient
error, the session could fall back to opus which has a smaller context window
and crash. Fix: when storedModelOverride is set, pass fallbacksOverride ?? []
instead of undefined, preventing the implicit primary backstop.

Bug 2: Active subagents showed 'model n/a' in /subagents list because
resolveModelDisplay only read entry.model/modelProvider (populated after run
completes). Fix: fall back to modelOverride/providerOverride fields which are
populated at spawn time via sessions.patch.

Bug 3: Context overflow errors (prompt too long, context_length_exceeded) could
theoretically escape runEmbeddedPiAgent and be treated as failover candidates
in runWithModelFallback, causing a switch to a model with a smaller context
window. Fix: in runWithModelFallback, detect context overflow errors via
isLikelyContextOverflowError and rethrow them immediately instead of trying the
next model candidate.

* fix(subagents): track spawn depth in session store and fix announce routing for nested agents

* Fix compaction status tracking and dedupe overflow compaction triggers

* fix(subagents): enforce depth block via session store and implement cascade kill

* fix: inject group chat context into system prompt

* fix(subagents): always write model to session store at spawn time

* Preserve spawnDepth when agent handler rewrites session entry

* fix(subagents): suppress announce on steer-restart

* fix(subagents): fallback spawned session model to runtime default

* fix(subagents): enforce spawn depth when caller key resolves by sessionId

* feat(subagents): implement active-first ordering for numeric targets and enhance task display

- Added a test to verify that subagents with numeric targets follow an active-first list ordering.
- Updated `resolveSubagentTarget` to sort subagent runs based on active status and recent activity.
- Enhanced task display in command responses to prevent truncation of long task descriptions.
- Introduced new utility functions for compacting task text and managing subagent run states.

* fix(subagents): show model for active runs via run record fallback

When the spawned model matches the agent's default model, the session
store's override fields are intentionally cleared (isDefault: true).
The model/modelProvider fields are only populated after the run
completes. This left active subagents showing 'model n/a'.

Fix: store the resolved model on SubagentRunRecord at registration
time, and use it as a fallback in both display paths (subagents tool
and /subagents command) when the session store entry has no model info.

Changes:
- SubagentRunRecord: add optional model field
- registerSubagentRun: accept and persist model param
- sessions-spawn-tool: pass resolvedModel to registerSubagentRun
- subagents-tool: pass run record model as fallback to resolveModelDisplay
- commands-subagents: pass run record model as fallback to resolveModelDisplay

* feat(chat): implement session key resolution and reset on sidebar navigation

- Added functions to resolve the main session key and reset chat state when switching sessions from the sidebar.
- Updated the `renderTab` function to handle session key changes when navigating to the chat tab.
- Introduced a test to verify that the session resets to "main" when opening chat from the sidebar navigation.

* fix: subagent timeout=0 passthrough and fallback prompt duplication

Bug 1: runTimeoutSeconds=0 now means 'no timeout' instead of applying 600s default
- sessions-spawn-tool: default to undefined (not 0) when neither timeout param
  is provided; use != null check so explicit 0 passes through to gateway
- agent.ts: accept 0 as valid timeout (resolveAgentTimeoutMs already handles
  0 → MAX_SAFE_TIMEOUT_MS)

Bug 2: model fallback no longer re-injects the original prompt as a duplicate
- agent.ts: track fallback attempt index; on retries use a short continuation
  message instead of the full original prompt since the session file already
  contains it from the first attempt
- Also skip re-sending images on fallback retries (already in session)

* feat(subagents): truncate long task descriptions in subagents command output

- Introduced a new utility function to format task previews, limiting their length to improve readability.
- Updated the command handler to use the new formatting function, ensuring task descriptions are truncated appropriately.
- Adjusted related tests to verify that long task descriptions are now truncated in the output.

* refactor(subagents): update subagent registry path resolution and improve command output formatting

- Replaced direct import of STATE_DIR with a utility function to resolve the state directory dynamically.
- Enhanced the formatting of command output for active and recent subagents, adding separators for better readability.
- Updated related tests to reflect changes in command output structure.

* fix(subagent): default sessions_spawn to no timeout when runTimeoutSeconds omitted

The previous fix (75a791106) correctly handled the case where
runTimeoutSeconds was explicitly set to 0 ("no timeout"). However,
when models omit the parameter entirely (which is common since the
schema marks it as optional), runTimeoutSeconds resolved to undefined.

undefined flowed through the chain as:
  sessions_spawn → timeout: undefined (since undefined != null is false)
  → gateway agent handler → agentCommand opts.timeout: undefined
  → resolveAgentTimeoutMs({ overrideSeconds: undefined })
  → DEFAULT_AGENT_TIMEOUT_SECONDS (600s = 10 minutes)

This caused subagents to be killed at exactly 10 minutes even though
the user's intent (via TOOLS.md) was for subagents to run without a
timeout.

Fix: default runTimeoutSeconds to 0 (no timeout) when neither
runTimeoutSeconds nor timeoutSeconds is provided by the caller.
Subagent spawns are long-running by design and should not inherit the
600s agent-command default timeout.

* fix(subagent): accept timeout=0 in agent-via-gateway path (second 600s default)

* fix: thread timeout override through getReplyFromConfig dispatch path

getReplyFromConfig called resolveAgentTimeoutMs({ cfg }) with no override,
always falling back to the config default (600s). Add timeoutOverrideSeconds
to GetReplyOptions and pass it through as overrideSeconds so callers of the
dispatch chain can specify a custom timeout (0 = no timeout).

This complements the existing timeout threading in agentCommand and the
cron isolated-agent runner, which already pass overrideSeconds correctly.

* feat(model-fallback): normalize OpenAI Codex model references and enhance fallback handling

- Added normalization for OpenAI Codex model references, specifically converting "gpt-5.3-codex" to "openai-codex" before execution.
- Updated the `resolveFallbackCandidates` function to utilize the new normalization logic.
- Enhanced tests to verify the correct behavior of model normalization and fallback mechanisms.
- Introduced a new test case to ensure that the normalization process works as expected for various input formats.

* feat(tests): add unit tests for steer failure behavior in openclaw-tools

- Introduced a new test file to validate the behavior of subagents when steer replacement dispatch fails.
- Implemented tests to ensure that the announce behavior is restored correctly and that the suppression reason is cleared as expected.
- Enhanced the subagent registry with a new function to clear steer restart suppression.
- Updated related components to support the new test scenarios.

* fix(subagents): replace stop command with kill in slash commands and documentation

- Updated the `/subagents` command to replace `stop` with `kill` for consistency in controlling sub-agent runs.
- Modified related documentation to reflect the change in command usage.
- Removed legacy timeoutSeconds references from the sessions-spawn-tool schema and tests to streamline timeout handling.
- Enhanced tests to ensure correct behavior of the updated commands and their interactions.

* feat(tests): add unit tests for readLatestAssistantReply function

- Introduced a new test file for the `readLatestAssistantReply` function to validate its behavior with various message scenarios.
- Implemented tests to ensure the function correctly retrieves the latest assistant message and handles cases where the latest message has no text.
- Mocked the gateway call to simulate different message histories for comprehensive testing.

* feat(tests): enhance subagent kill-all cascade tests and announce formatting

- Added a new test to verify that the `kill-all` command cascades through ended parents to active descendants in subagents.
- Updated the subagent announce formatting tests to reflect changes in message structure, including the replacement of "Findings:" with "Result:" and the addition of new expectations for message content.
- Improved the handling of long findings and stats in the announce formatting logic to ensure concise output.
- Refactored related functions to enhance clarity and maintainability in the subagent registry and tools.

* refactor(subagent): update announce formatting and remove unused constants

- Modified the subagent announce formatting to replace "Findings:" with "Result:" and adjusted related expectations in tests.
- Removed constants for maximum announce findings characters and summary words, simplifying the announcement logic.
- Updated the handling of findings to retain full content instead of truncating, ensuring more informative outputs.
- Cleaned up unused imports in the commands-subagents file to enhance code clarity.

* feat(tests): enhance billing error handling in user-facing text

- Added tests to ensure that normal text mentioning billing plans is not rewritten, preserving user context.
- Updated the `isBillingErrorMessage` and `sanitizeUserFacingText` functions to improve handling of billing-related messages.
- Introduced new test cases for various scenarios involving billing messages to ensure accurate processing and output.
- Enhanced the subagent announce flow to correctly manage active descendant runs, preventing premature announcements.

* feat(subagent): enhance workflow guidance and auto-announcement clarity

- Added a new guideline in the subagent system prompt to emphasize trust in push-based completion, discouraging busy polling for status updates.
- Updated documentation to clarify that sub-agents will automatically announce their results, improving user understanding of the workflow.
- Enhanced tests to verify the new guidance on avoiding polling loops and to ensure the accuracy of the updated prompts.

* fix(cron): avoid announcing interim subagent spawn acks

* chore: clean post-rebase imports

* fix(cron): fall back to child replies when parent stays interim

* fix(subagents): make active-run guidance advisory

* fix(subagents): update announce flow to handle active descendants and enhance test coverage

- Modified the announce flow to defer announcements when active descendant runs are present, ensuring accurate status reporting.
- Updated tests to verify the new behavior, including scenarios where no fallback requester is available and ensuring proper handling of finished subagents.
- Enhanced the announce formatting to include an `expectFinal` flag for better clarity in the announcement process.

* fix(subagents): enhance announce flow and formatting for user updates

- Updated the announce flow to provide clearer instructions for user updates based on active subagent runs and requester context.
- Refactored the announcement logic to improve clarity and ensure internal context remains private.
- Enhanced tests to verify the new message expectations and formatting, including updated prompts for user-facing updates.
- Introduced a new function to build reply instructions based on session context, improving the overall announcement process.

* fix: resolve prep blockers and changelog placement (#14447) (thanks @tyler6204)

* fix: restore cron delivery-plan import after rebase (#14447) (thanks @tyler6204)

* fix: resolve test failures from rebase conflicts (#14447) (thanks @tyler6204)

* fix: apply formatting after rebase (#14447) (thanks @tyler6204)
											
										
										
											2026-02-14 22:03:45 -08:00
+								          const attemptCompactionCount = Math.max(0, attempt.compactionCount ?? 0);
 								          autoCompactionCount += attemptCompactionCount;
-												fix: include provider and model name in billing error message (#20510)

Merged via /review-pr -> /prepare-pr -> /merge-pr.

Prepared head SHA: 40dbdf62e8952dd6c5afcb9ce2a73199f3f532a6
Co-authored-by: echoVic <16428813+echoVic@users.noreply.github.com>
Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com>
Reviewed-by: @gumadeiras

											
										
										
											2026-02-19 10:56:00 +08:00
+								          const activeErrorContext = resolveActiveErrorContext({
 								            lastAssistant,
 								            provider,
 								            model: modelId,
 								          });
-												fix: context overflow compaction and subagent announce improvements (#11664) (thanks @tyler6204)

* initial commit

* feat: implement deriveSessionTotalTokens function and update usage tests

* Added deriveSessionTotalTokens function to calculate total tokens based on usage and context tokens.
* Updated usage tests to include cases for derived session total tokens.
* Refactored session usage calculations in multiple files to utilize the new function for improved accuracy.

* fix: restore overflow truncation fallback + changelog/test hardening (#11551) (thanks @tyler6204)
											
										
										
											2026-02-07 20:02:32 -08:00
+								          const formattedAssistantErrorText = lastAssistant
 								            ? formatAssistantErrorText(lastAssistant, {
 								                cfg: params.config,
 								                sessionKey: params.sessionKey ?? params.sessionId,
-												fix: include provider and model name in billing error message (#20510)

Merged via /review-pr -> /prepare-pr -> /merge-pr.

Prepared head SHA: 40dbdf62e8952dd6c5afcb9ce2a73199f3f532a6
Co-authored-by: echoVic <16428813+echoVic@users.noreply.github.com>
Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com>
Reviewed-by: @gumadeiras

											
										
										
											2026-02-19 10:56:00 +08:00
+								                provider: activeErrorContext.provider,
 								                model: activeErrorContext.model,
-												fix: context overflow compaction and subagent announce improvements (#11664) (thanks @tyler6204)

* initial commit

* feat: implement deriveSessionTotalTokens function and update usage tests

* Added deriveSessionTotalTokens function to calculate total tokens based on usage and context tokens.
* Updated usage tests to include cases for derived session total tokens.
* Refactored session usage calculations in multiple files to utilize the new function for improved accuracy.

* fix: restore overflow truncation fallback + changelog/test hardening (#11551) (thanks @tyler6204)
											
										
										
											2026-02-07 20:02:32 -08:00
+								              })
 								            : undefined;
 								          const assistantErrorText =
 								            lastAssistant?.stopReason === "error"
 								              ? lastAssistant.errorMessage?.trim() || formattedAssistantErrorText
 								              : undefined;
 								          const contextOverflowError = !aborted
 								            ? (() => {
 								                if (promptError) {
 								                  const errorText = describeUnknownError(promptError);
-												fix(agents): stabilize overflow compaction retries and session context accounting (openclaw#14102) thanks @vpesh

Verified:
- CI checks for commit 86a7ecb45ebf0be61dce9261398000524fd9fab6
- Rebase conflict resolution for compatibility with latest main

Co-authored-by: vpesh <9496634+vpesh@users.noreply.github.com>

											
										
										
											2026-02-13 00:53:13 +01:00
+								                  if (isLikelyContextOverflowError(errorText)) {
-												fix: context overflow compaction and subagent announce improvements (#11664) (thanks @tyler6204)

* initial commit

* feat: implement deriveSessionTotalTokens function and update usage tests

* Added deriveSessionTotalTokens function to calculate total tokens based on usage and context tokens.
* Updated usage tests to include cases for derived session total tokens.
* Refactored session usage calculations in multiple files to utilize the new function for improved accuracy.

* fix: restore overflow truncation fallback + changelog/test hardening (#11551) (thanks @tyler6204)
											
										
										
											2026-02-07 20:02:32 -08:00
+								                    return { text: errorText, source: "promptError" as const };
 								                  }
 								                  // Prompt submission failed with a non-overflow error. Do not
 								                  // inspect prior assistant errors from history for this attempt.
 								                  return null;
 								                }
-												fix(agents): stabilize overflow compaction retries and session context accounting (openclaw#14102) thanks @vpesh

Verified:
- CI checks for commit 86a7ecb45ebf0be61dce9261398000524fd9fab6
- Rebase conflict resolution for compatibility with latest main

Co-authored-by: vpesh <9496634+vpesh@users.noreply.github.com>

											
										
										
											2026-02-13 00:53:13 +01:00
+								                if (assistantErrorText && isLikelyContextOverflowError(assistantErrorText)) {
-												fix: context overflow compaction and subagent announce improvements (#11664) (thanks @tyler6204)

* initial commit

* feat: implement deriveSessionTotalTokens function and update usage tests

* Added deriveSessionTotalTokens function to calculate total tokens based on usage and context tokens.
* Updated usage tests to include cases for derived session total tokens.
* Refactored session usage calculations in multiple files to utilize the new function for improved accuracy.

* fix: restore overflow truncation fallback + changelog/test hardening (#11551) (thanks @tyler6204)
											
										
										
											2026-02-07 20:02:32 -08:00
+								                  return { text: assistantErrorText, source: "assistantError" as const };
 								                }
 								                return null;
 								              })()
 								            : null;
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
-												fix: context overflow compaction and subagent announce improvements (#11664) (thanks @tyler6204)

* initial commit

* feat: implement deriveSessionTotalTokens function and update usage tests

* Added deriveSessionTotalTokens function to calculate total tokens based on usage and context tokens.
* Updated usage tests to include cases for derived session total tokens.
* Refactored session usage calculations in multiple files to utilize the new function for improved accuracy.

* fix: restore overflow truncation fallback + changelog/test hardening (#11551) (thanks @tyler6204)
											
										
										
											2026-02-07 20:02:32 -08:00
+								          if (contextOverflowError) {
-												feat: add pre-prompt context size diagnostic logging (openclaw#8930) thanks @Glucksberg

Verified:
- pnpm build
- pnpm check
- pnpm test

Co-authored-by: Glucksberg <80581902+Glucksberg@users.noreply.github.com>
Co-authored-by: Tak Hoffman <781889+Takhoffman@users.noreply.github.com>

											
										
										
											2026-02-13 19:54:22 -04:00
+								            const overflowDiagId = createCompactionDiagId();
-												fix: context overflow compaction and subagent announce improvements (#11664) (thanks @tyler6204)

* initial commit

* feat: implement deriveSessionTotalTokens function and update usage tests

* Added deriveSessionTotalTokens function to calculate total tokens based on usage and context tokens.
* Updated usage tests to include cases for derived session total tokens.
* Refactored session usage calculations in multiple files to utilize the new function for improved accuracy.

* fix: restore overflow truncation fallback + changelog/test hardening (#11551) (thanks @tyler6204)
											
										
										
											2026-02-07 20:02:32 -08:00
+								            const errorText = contextOverflowError.text;
 								            const msgCount = attempt.messagesSnapshot?.length ?? 0;
 								            log.warn(
 								              `[context-overflow-diag] sessionKey=${params.sessionKey ?? params.sessionId} ` +
 								                `provider=${provider}/${modelId} source=${contextOverflowError.source} ` +
 								                `messages=${msgCount} sessionFile=${params.sessionFile} ` +
-												feat: add pre-prompt context size diagnostic logging (openclaw#8930) thanks @Glucksberg

Verified:
- pnpm build
- pnpm check
- pnpm test

Co-authored-by: Glucksberg <80581902+Glucksberg@users.noreply.github.com>
Co-authored-by: Tak Hoffman <781889+Takhoffman@users.noreply.github.com>

											
										
										
											2026-02-13 19:54:22 -04:00
+								                `diagId=${overflowDiagId} compactionAttempts=${overflowCompactionAttempts} ` +
 								                `error=${errorText.slice(0, 200)}`,
-												fix: context overflow compaction and subagent announce improvements (#11664) (thanks @tyler6204)

* initial commit

* feat: implement deriveSessionTotalTokens function and update usage tests

* Added deriveSessionTotalTokens function to calculate total tokens based on usage and context tokens.
* Updated usage tests to include cases for derived session total tokens.
* Refactored session usage calculations in multiple files to utilize the new function for improved accuracy.

* fix: restore overflow truncation fallback + changelog/test hardening (#11551) (thanks @tyler6204)
											
										
										
											2026-02-07 20:02:32 -08:00
+								            );
 								            const isCompactionFailure = isCompactionFailureError(errorText);
-												Agents: add nested subagent orchestration controls and reduce subagent token waste (#14447)

* Agents: add subagent orchestration controls

* Agents: add subagent orchestration controls (WIP uncommitted changes)

* feat(subagents): add depth-based spawn gating for sub-sub-agents

* feat(subagents): tool policy, registry, and announce chain for nested agents

* feat(subagents): system prompt, docs, changelog for nested sub-agents

* fix(subagents): prevent model fallback override, show model during active runs, and block context overflow fallback

Bug 1: When a session has an explicit model override (e.g., gpt/openai-codex),
the fallback candidate logic in resolveFallbackCandidates silently appended the
global primary model (opus) as a backstop. On reinjection/steer with a transient
error, the session could fall back to opus which has a smaller context window
and crash. Fix: when storedModelOverride is set, pass fallbacksOverride ?? []
instead of undefined, preventing the implicit primary backstop.

Bug 2: Active subagents showed 'model n/a' in /subagents list because
resolveModelDisplay only read entry.model/modelProvider (populated after run
completes). Fix: fall back to modelOverride/providerOverride fields which are
populated at spawn time via sessions.patch.

Bug 3: Context overflow errors (prompt too long, context_length_exceeded) could
theoretically escape runEmbeddedPiAgent and be treated as failover candidates
in runWithModelFallback, causing a switch to a model with a smaller context
window. Fix: in runWithModelFallback, detect context overflow errors via
isLikelyContextOverflowError and rethrow them immediately instead of trying the
next model candidate.

* fix(subagents): track spawn depth in session store and fix announce routing for nested agents

* Fix compaction status tracking and dedupe overflow compaction triggers

* fix(subagents): enforce depth block via session store and implement cascade kill

* fix: inject group chat context into system prompt

* fix(subagents): always write model to session store at spawn time

* Preserve spawnDepth when agent handler rewrites session entry

* fix(subagents): suppress announce on steer-restart

* fix(subagents): fallback spawned session model to runtime default

* fix(subagents): enforce spawn depth when caller key resolves by sessionId

* feat(subagents): implement active-first ordering for numeric targets and enhance task display

- Added a test to verify that subagents with numeric targets follow an active-first list ordering.
- Updated `resolveSubagentTarget` to sort subagent runs based on active status and recent activity.
- Enhanced task display in command responses to prevent truncation of long task descriptions.
- Introduced new utility functions for compacting task text and managing subagent run states.

* fix(subagents): show model for active runs via run record fallback

When the spawned model matches the agent's default model, the session
store's override fields are intentionally cleared (isDefault: true).
The model/modelProvider fields are only populated after the run
completes. This left active subagents showing 'model n/a'.

Fix: store the resolved model on SubagentRunRecord at registration
time, and use it as a fallback in both display paths (subagents tool
and /subagents command) when the session store entry has no model info.

Changes:
- SubagentRunRecord: add optional model field
- registerSubagentRun: accept and persist model param
- sessions-spawn-tool: pass resolvedModel to registerSubagentRun
- subagents-tool: pass run record model as fallback to resolveModelDisplay
- commands-subagents: pass run record model as fallback to resolveModelDisplay

* feat(chat): implement session key resolution and reset on sidebar navigation

- Added functions to resolve the main session key and reset chat state when switching sessions from the sidebar.
- Updated the `renderTab` function to handle session key changes when navigating to the chat tab.
- Introduced a test to verify that the session resets to "main" when opening chat from the sidebar navigation.

* fix: subagent timeout=0 passthrough and fallback prompt duplication

Bug 1: runTimeoutSeconds=0 now means 'no timeout' instead of applying 600s default
- sessions-spawn-tool: default to undefined (not 0) when neither timeout param
  is provided; use != null check so explicit 0 passes through to gateway
- agent.ts: accept 0 as valid timeout (resolveAgentTimeoutMs already handles
  0 → MAX_SAFE_TIMEOUT_MS)

Bug 2: model fallback no longer re-injects the original prompt as a duplicate
- agent.ts: track fallback attempt index; on retries use a short continuation
  message instead of the full original prompt since the session file already
  contains it from the first attempt
- Also skip re-sending images on fallback retries (already in session)

* feat(subagents): truncate long task descriptions in subagents command output

- Introduced a new utility function to format task previews, limiting their length to improve readability.
- Updated the command handler to use the new formatting function, ensuring task descriptions are truncated appropriately.
- Adjusted related tests to verify that long task descriptions are now truncated in the output.

* refactor(subagents): update subagent registry path resolution and improve command output formatting

- Replaced direct import of STATE_DIR with a utility function to resolve the state directory dynamically.
- Enhanced the formatting of command output for active and recent subagents, adding separators for better readability.
- Updated related tests to reflect changes in command output structure.

* fix(subagent): default sessions_spawn to no timeout when runTimeoutSeconds omitted

The previous fix (75a791106) correctly handled the case where
runTimeoutSeconds was explicitly set to 0 ("no timeout"). However,
when models omit the parameter entirely (which is common since the
schema marks it as optional), runTimeoutSeconds resolved to undefined.

undefined flowed through the chain as:
  sessions_spawn → timeout: undefined (since undefined != null is false)
  → gateway agent handler → agentCommand opts.timeout: undefined
  → resolveAgentTimeoutMs({ overrideSeconds: undefined })
  → DEFAULT_AGENT_TIMEOUT_SECONDS (600s = 10 minutes)

This caused subagents to be killed at exactly 10 minutes even though
the user's intent (via TOOLS.md) was for subagents to run without a
timeout.

Fix: default runTimeoutSeconds to 0 (no timeout) when neither
runTimeoutSeconds nor timeoutSeconds is provided by the caller.
Subagent spawns are long-running by design and should not inherit the
600s agent-command default timeout.

* fix(subagent): accept timeout=0 in agent-via-gateway path (second 600s default)

* fix: thread timeout override through getReplyFromConfig dispatch path

getReplyFromConfig called resolveAgentTimeoutMs({ cfg }) with no override,
always falling back to the config default (600s). Add timeoutOverrideSeconds
to GetReplyOptions and pass it through as overrideSeconds so callers of the
dispatch chain can specify a custom timeout (0 = no timeout).

This complements the existing timeout threading in agentCommand and the
cron isolated-agent runner, which already pass overrideSeconds correctly.

* feat(model-fallback): normalize OpenAI Codex model references and enhance fallback handling

- Added normalization for OpenAI Codex model references, specifically converting "gpt-5.3-codex" to "openai-codex" before execution.
- Updated the `resolveFallbackCandidates` function to utilize the new normalization logic.
- Enhanced tests to verify the correct behavior of model normalization and fallback mechanisms.
- Introduced a new test case to ensure that the normalization process works as expected for various input formats.

* feat(tests): add unit tests for steer failure behavior in openclaw-tools

- Introduced a new test file to validate the behavior of subagents when steer replacement dispatch fails.
- Implemented tests to ensure that the announce behavior is restored correctly and that the suppression reason is cleared as expected.
- Enhanced the subagent registry with a new function to clear steer restart suppression.
- Updated related components to support the new test scenarios.

* fix(subagents): replace stop command with kill in slash commands and documentation

- Updated the `/subagents` command to replace `stop` with `kill` for consistency in controlling sub-agent runs.
- Modified related documentation to reflect the change in command usage.
- Removed legacy timeoutSeconds references from the sessions-spawn-tool schema and tests to streamline timeout handling.
- Enhanced tests to ensure correct behavior of the updated commands and their interactions.

* feat(tests): add unit tests for readLatestAssistantReply function

- Introduced a new test file for the `readLatestAssistantReply` function to validate its behavior with various message scenarios.
- Implemented tests to ensure the function correctly retrieves the latest assistant message and handles cases where the latest message has no text.
- Mocked the gateway call to simulate different message histories for comprehensive testing.

* feat(tests): enhance subagent kill-all cascade tests and announce formatting

- Added a new test to verify that the `kill-all` command cascades through ended parents to active descendants in subagents.
- Updated the subagent announce formatting tests to reflect changes in message structure, including the replacement of "Findings:" with "Result:" and the addition of new expectations for message content.
- Improved the handling of long findings and stats in the announce formatting logic to ensure concise output.
- Refactored related functions to enhance clarity and maintainability in the subagent registry and tools.

* refactor(subagent): update announce formatting and remove unused constants

- Modified the subagent announce formatting to replace "Findings:" with "Result:" and adjusted related expectations in tests.
- Removed constants for maximum announce findings characters and summary words, simplifying the announcement logic.
- Updated the handling of findings to retain full content instead of truncating, ensuring more informative outputs.
- Cleaned up unused imports in the commands-subagents file to enhance code clarity.

* feat(tests): enhance billing error handling in user-facing text

- Added tests to ensure that normal text mentioning billing plans is not rewritten, preserving user context.
- Updated the `isBillingErrorMessage` and `sanitizeUserFacingText` functions to improve handling of billing-related messages.
- Introduced new test cases for various scenarios involving billing messages to ensure accurate processing and output.
- Enhanced the subagent announce flow to correctly manage active descendant runs, preventing premature announcements.

* feat(subagent): enhance workflow guidance and auto-announcement clarity

- Added a new guideline in the subagent system prompt to emphasize trust in push-based completion, discouraging busy polling for status updates.
- Updated documentation to clarify that sub-agents will automatically announce their results, improving user understanding of the workflow.
- Enhanced tests to verify the new guidance on avoiding polling loops and to ensure the accuracy of the updated prompts.

* fix(cron): avoid announcing interim subagent spawn acks

* chore: clean post-rebase imports

* fix(cron): fall back to child replies when parent stays interim

* fix(subagents): make active-run guidance advisory

* fix(subagents): update announce flow to handle active descendants and enhance test coverage

- Modified the announce flow to defer announcements when active descendant runs are present, ensuring accurate status reporting.
- Updated tests to verify the new behavior, including scenarios where no fallback requester is available and ensuring proper handling of finished subagents.
- Enhanced the announce formatting to include an `expectFinal` flag for better clarity in the announcement process.

* fix(subagents): enhance announce flow and formatting for user updates

- Updated the announce flow to provide clearer instructions for user updates based on active subagent runs and requester context.
- Refactored the announcement logic to improve clarity and ensure internal context remains private.
- Enhanced tests to verify the new message expectations and formatting, including updated prompts for user-facing updates.
- Introduced a new function to build reply instructions based on session context, improving the overall announcement process.

* fix: resolve prep blockers and changelog placement (#14447) (thanks @tyler6204)

* fix: restore cron delivery-plan import after rebase (#14447) (thanks @tyler6204)

* fix: resolve test failures from rebase conflicts (#14447) (thanks @tyler6204)

* fix: apply formatting after rebase (#14447) (thanks @tyler6204)
											
										
										
											2026-02-14 22:03:45 -08:00
+								            const hadAttemptLevelCompaction = attemptCompactionCount > 0;
 								            // If this attempt already compacted (SDK auto-compaction), avoid immediately
 								            // running another explicit compaction for the same overflow trigger.
-												fix: context overflow compaction and subagent announce improvements (#11664) (thanks @tyler6204)

* initial commit

* feat: implement deriveSessionTotalTokens function and update usage tests

* Added deriveSessionTotalTokens function to calculate total tokens based on usage and context tokens.
* Updated usage tests to include cases for derived session total tokens.
* Refactored session usage calculations in multiple files to utilize the new function for improved accuracy.

* fix: restore overflow truncation fallback + changelog/test hardening (#11551) (thanks @tyler6204)
											
										
										
											2026-02-07 20:02:32 -08:00
+								            if (
 								              !isCompactionFailure &&
-												Agents: add nested subagent orchestration controls and reduce subagent token waste (#14447)

* Agents: add subagent orchestration controls

* Agents: add subagent orchestration controls (WIP uncommitted changes)

* feat(subagents): add depth-based spawn gating for sub-sub-agents

* feat(subagents): tool policy, registry, and announce chain for nested agents

* feat(subagents): system prompt, docs, changelog for nested sub-agents

* fix(subagents): prevent model fallback override, show model during active runs, and block context overflow fallback

Bug 1: When a session has an explicit model override (e.g., gpt/openai-codex),
the fallback candidate logic in resolveFallbackCandidates silently appended the
global primary model (opus) as a backstop. On reinjection/steer with a transient
error, the session could fall back to opus which has a smaller context window
and crash. Fix: when storedModelOverride is set, pass fallbacksOverride ?? []
instead of undefined, preventing the implicit primary backstop.

Bug 2: Active subagents showed 'model n/a' in /subagents list because
resolveModelDisplay only read entry.model/modelProvider (populated after run
completes). Fix: fall back to modelOverride/providerOverride fields which are
populated at spawn time via sessions.patch.

Bug 3: Context overflow errors (prompt too long, context_length_exceeded) could
theoretically escape runEmbeddedPiAgent and be treated as failover candidates
in runWithModelFallback, causing a switch to a model with a smaller context
window. Fix: in runWithModelFallback, detect context overflow errors via
isLikelyContextOverflowError and rethrow them immediately instead of trying the
next model candidate.

* fix(subagents): track spawn depth in session store and fix announce routing for nested agents

* Fix compaction status tracking and dedupe overflow compaction triggers

* fix(subagents): enforce depth block via session store and implement cascade kill

* fix: inject group chat context into system prompt

* fix(subagents): always write model to session store at spawn time

* Preserve spawnDepth when agent handler rewrites session entry

* fix(subagents): suppress announce on steer-restart

* fix(subagents): fallback spawned session model to runtime default

* fix(subagents): enforce spawn depth when caller key resolves by sessionId

* feat(subagents): implement active-first ordering for numeric targets and enhance task display

- Added a test to verify that subagents with numeric targets follow an active-first list ordering.
- Updated `resolveSubagentTarget` to sort subagent runs based on active status and recent activity.
- Enhanced task display in command responses to prevent truncation of long task descriptions.
- Introduced new utility functions for compacting task text and managing subagent run states.

* fix(subagents): show model for active runs via run record fallback

When the spawned model matches the agent's default model, the session
store's override fields are intentionally cleared (isDefault: true).
The model/modelProvider fields are only populated after the run
completes. This left active subagents showing 'model n/a'.

Fix: store the resolved model on SubagentRunRecord at registration
time, and use it as a fallback in both display paths (subagents tool
and /subagents command) when the session store entry has no model info.

Changes:
- SubagentRunRecord: add optional model field
- registerSubagentRun: accept and persist model param
- sessions-spawn-tool: pass resolvedModel to registerSubagentRun
- subagents-tool: pass run record model as fallback to resolveModelDisplay
- commands-subagents: pass run record model as fallback to resolveModelDisplay

* feat(chat): implement session key resolution and reset on sidebar navigation

- Added functions to resolve the main session key and reset chat state when switching sessions from the sidebar.
- Updated the `renderTab` function to handle session key changes when navigating to the chat tab.
- Introduced a test to verify that the session resets to "main" when opening chat from the sidebar navigation.

* fix: subagent timeout=0 passthrough and fallback prompt duplication

Bug 1: runTimeoutSeconds=0 now means 'no timeout' instead of applying 600s default
- sessions-spawn-tool: default to undefined (not 0) when neither timeout param
  is provided; use != null check so explicit 0 passes through to gateway
- agent.ts: accept 0 as valid timeout (resolveAgentTimeoutMs already handles
  0 → MAX_SAFE_TIMEOUT_MS)

Bug 2: model fallback no longer re-injects the original prompt as a duplicate
- agent.ts: track fallback attempt index; on retries use a short continuation
  message instead of the full original prompt since the session file already
  contains it from the first attempt
- Also skip re-sending images on fallback retries (already in session)

* feat(subagents): truncate long task descriptions in subagents command output

- Introduced a new utility function to format task previews, limiting their length to improve readability.
- Updated the command handler to use the new formatting function, ensuring task descriptions are truncated appropriately.
- Adjusted related tests to verify that long task descriptions are now truncated in the output.

* refactor(subagents): update subagent registry path resolution and improve command output formatting

- Replaced direct import of STATE_DIR with a utility function to resolve the state directory dynamically.
- Enhanced the formatting of command output for active and recent subagents, adding separators for better readability.
- Updated related tests to reflect changes in command output structure.

* fix(subagent): default sessions_spawn to no timeout when runTimeoutSeconds omitted

The previous fix (75a791106) correctly handled the case where
runTimeoutSeconds was explicitly set to 0 ("no timeout"). However,
when models omit the parameter entirely (which is common since the
schema marks it as optional), runTimeoutSeconds resolved to undefined.

undefined flowed through the chain as:
  sessions_spawn → timeout: undefined (since undefined != null is false)
  → gateway agent handler → agentCommand opts.timeout: undefined
  → resolveAgentTimeoutMs({ overrideSeconds: undefined })
  → DEFAULT_AGENT_TIMEOUT_SECONDS (600s = 10 minutes)

This caused subagents to be killed at exactly 10 minutes even though
the user's intent (via TOOLS.md) was for subagents to run without a
timeout.

Fix: default runTimeoutSeconds to 0 (no timeout) when neither
runTimeoutSeconds nor timeoutSeconds is provided by the caller.
Subagent spawns are long-running by design and should not inherit the
600s agent-command default timeout.

* fix(subagent): accept timeout=0 in agent-via-gateway path (second 600s default)

* fix: thread timeout override through getReplyFromConfig dispatch path

getReplyFromConfig called resolveAgentTimeoutMs({ cfg }) with no override,
always falling back to the config default (600s). Add timeoutOverrideSeconds
to GetReplyOptions and pass it through as overrideSeconds so callers of the
dispatch chain can specify a custom timeout (0 = no timeout).

This complements the existing timeout threading in agentCommand and the
cron isolated-agent runner, which already pass overrideSeconds correctly.

* feat(model-fallback): normalize OpenAI Codex model references and enhance fallback handling

- Added normalization for OpenAI Codex model references, specifically converting "gpt-5.3-codex" to "openai-codex" before execution.
- Updated the `resolveFallbackCandidates` function to utilize the new normalization logic.
- Enhanced tests to verify the correct behavior of model normalization and fallback mechanisms.
- Introduced a new test case to ensure that the normalization process works as expected for various input formats.

* feat(tests): add unit tests for steer failure behavior in openclaw-tools

- Introduced a new test file to validate the behavior of subagents when steer replacement dispatch fails.
- Implemented tests to ensure that the announce behavior is restored correctly and that the suppression reason is cleared as expected.
- Enhanced the subagent registry with a new function to clear steer restart suppression.
- Updated related components to support the new test scenarios.

* fix(subagents): replace stop command with kill in slash commands and documentation

- Updated the `/subagents` command to replace `stop` with `kill` for consistency in controlling sub-agent runs.
- Modified related documentation to reflect the change in command usage.
- Removed legacy timeoutSeconds references from the sessions-spawn-tool schema and tests to streamline timeout handling.
- Enhanced tests to ensure correct behavior of the updated commands and their interactions.

* feat(tests): add unit tests for readLatestAssistantReply function

- Introduced a new test file for the `readLatestAssistantReply` function to validate its behavior with various message scenarios.
- Implemented tests to ensure the function correctly retrieves the latest assistant message and handles cases where the latest message has no text.
- Mocked the gateway call to simulate different message histories for comprehensive testing.

* feat(tests): enhance subagent kill-all cascade tests and announce formatting

- Added a new test to verify that the `kill-all` command cascades through ended parents to active descendants in subagents.
- Updated the subagent announce formatting tests to reflect changes in message structure, including the replacement of "Findings:" with "Result:" and the addition of new expectations for message content.
- Improved the handling of long findings and stats in the announce formatting logic to ensure concise output.
- Refactored related functions to enhance clarity and maintainability in the subagent registry and tools.

* refactor(subagent): update announce formatting and remove unused constants

- Modified the subagent announce formatting to replace "Findings:" with "Result:" and adjusted related expectations in tests.
- Removed constants for maximum announce findings characters and summary words, simplifying the announcement logic.
- Updated the handling of findings to retain full content instead of truncating, ensuring more informative outputs.
- Cleaned up unused imports in the commands-subagents file to enhance code clarity.

* feat(tests): enhance billing error handling in user-facing text

- Added tests to ensure that normal text mentioning billing plans is not rewritten, preserving user context.
- Updated the `isBillingErrorMessage` and `sanitizeUserFacingText` functions to improve handling of billing-related messages.
- Introduced new test cases for various scenarios involving billing messages to ensure accurate processing and output.
- Enhanced the subagent announce flow to correctly manage active descendant runs, preventing premature announcements.

* feat(subagent): enhance workflow guidance and auto-announcement clarity

- Added a new guideline in the subagent system prompt to emphasize trust in push-based completion, discouraging busy polling for status updates.
- Updated documentation to clarify that sub-agents will automatically announce their results, improving user understanding of the workflow.
- Enhanced tests to verify the new guidance on avoiding polling loops and to ensure the accuracy of the updated prompts.

* fix(cron): avoid announcing interim subagent spawn acks

* chore: clean post-rebase imports

* fix(cron): fall back to child replies when parent stays interim

* fix(subagents): make active-run guidance advisory

* fix(subagents): update announce flow to handle active descendants and enhance test coverage

- Modified the announce flow to defer announcements when active descendant runs are present, ensuring accurate status reporting.
- Updated tests to verify the new behavior, including scenarios where no fallback requester is available and ensuring proper handling of finished subagents.
- Enhanced the announce formatting to include an `expectFinal` flag for better clarity in the announcement process.

* fix(subagents): enhance announce flow and formatting for user updates

- Updated the announce flow to provide clearer instructions for user updates based on active subagent runs and requester context.
- Refactored the announcement logic to improve clarity and ensure internal context remains private.
- Enhanced tests to verify the new message expectations and formatting, including updated prompts for user-facing updates.
- Introduced a new function to build reply instructions based on session context, improving the overall announcement process.

* fix: resolve prep blockers and changelog placement (#14447) (thanks @tyler6204)

* fix: restore cron delivery-plan import after rebase (#14447) (thanks @tyler6204)

* fix: resolve test failures from rebase conflicts (#14447) (thanks @tyler6204)

* fix: apply formatting after rebase (#14447) (thanks @tyler6204)
											
										
										
											2026-02-14 22:03:45 -08:00
+								              hadAttemptLevelCompaction &&
 								              overflowCompactionAttempts < MAX_OVERFLOW_COMPACTION_ATTEMPTS
 								            ) {
 								              overflowCompactionAttempts++;
 								              log.warn(
 								                `context overflow persisted after in-attempt compaction (attempt ${overflowCompactionAttempts}/${MAX_OVERFLOW_COMPACTION_ATTEMPTS}); retrying prompt without additional compaction for ${provider}/${modelId}`,
 								              );
 								              continue;
 								            }
 								            // Attempt explicit overflow compaction only when this attempt did not
 								            // already auto-compact.
 								            if (
 								              !isCompactionFailure &&
 								              !hadAttemptLevelCompaction &&
-												fix: context overflow compaction and subagent announce improvements (#11664) (thanks @tyler6204)

* initial commit

* feat: implement deriveSessionTotalTokens function and update usage tests

* Added deriveSessionTotalTokens function to calculate total tokens based on usage and context tokens.
* Updated usage tests to include cases for derived session total tokens.
* Refactored session usage calculations in multiple files to utilize the new function for improved accuracy.

* fix: restore overflow truncation fallback + changelog/test hardening (#11551) (thanks @tyler6204)
											
										
										
											2026-02-07 20:02:32 -08:00
+								              overflowCompactionAttempts < MAX_OVERFLOW_COMPACTION_ATTEMPTS
 								            ) {
-												feat: add pre-prompt context size diagnostic logging (openclaw#8930) thanks @Glucksberg

Verified:
- pnpm build
- pnpm check
- pnpm test

Co-authored-by: Glucksberg <80581902+Glucksberg@users.noreply.github.com>
Co-authored-by: Tak Hoffman <781889+Takhoffman@users.noreply.github.com>

											
										
										
											2026-02-13 19:54:22 -04:00
+								              if (log.isEnabled("debug")) {
 								                log.debug(
 								                  `[compaction-diag] decision diagId=${overflowDiagId} branch=compact ` +
 								                    `isCompactionFailure=${isCompactionFailure} hasOversizedToolResults=unknown ` +
 								                    `attempt=${overflowCompactionAttempts + 1} maxAttempts=${MAX_OVERFLOW_COMPACTION_ATTEMPTS}`,
 								                );
 								              }
-												fix: context overflow compaction and subagent announce improvements (#11664) (thanks @tyler6204)

* initial commit

* feat: implement deriveSessionTotalTokens function and update usage tests

* Added deriveSessionTotalTokens function to calculate total tokens based on usage and context tokens.
* Updated usage tests to include cases for derived session total tokens.
* Refactored session usage calculations in multiple files to utilize the new function for improved accuracy.

* fix: restore overflow truncation fallback + changelog/test hardening (#11551) (thanks @tyler6204)
											
										
										
											2026-02-07 20:02:32 -08:00
+								              overflowCompactionAttempts++;
-												fix: allow multiple compaction retries on context overflow (#8928)

Previously, overflowCompactionAttempted was a boolean flag set once, preventing
recovery when a single compaction wasn't enough. Change to a counter allowing up
to 3 attempts before giving up. Also add diagnostic logging on overflow events to
help debug early-overflow issues.

Fixes sessions that hit context overflow during long agentic turns with many tool
calls, where one compaction round isn't sufficient to bring context below limits.
											
										
										
											2026-02-05 17:58:37 -04:00
+								              log.warn(
-												fix: context overflow compaction and subagent announce improvements (#11664) (thanks @tyler6204)

* initial commit

* feat: implement deriveSessionTotalTokens function and update usage tests

* Added deriveSessionTotalTokens function to calculate total tokens based on usage and context tokens.
* Updated usage tests to include cases for derived session total tokens.
* Refactored session usage calculations in multiple files to utilize the new function for improved accuracy.

* fix: restore overflow truncation fallback + changelog/test hardening (#11551) (thanks @tyler6204)
											
										
										
											2026-02-07 20:02:32 -08:00
+								                `context overflow detected (attempt ${overflowCompactionAttempts}/${MAX_OVERFLOW_COMPACTION_ATTEMPTS}); attempting auto-compaction for ${provider}/${modelId}`,
-												fix: allow multiple compaction retries on context overflow (#8928)

Previously, overflowCompactionAttempted was a boolean flag set once, preventing
recovery when a single compaction wasn't enough. Change to a counter allowing up
to 3 attempts before giving up. Also add diagnostic logging on overflow events to
help debug early-overflow issues.

Fixes sessions that hit context overflow during long agentic turns with many tool
calls, where one compaction round isn't sufficient to bring context below limits.
											
										
										
											2026-02-05 17:58:37 -04:00
+								              );
-												feature(context): extend plugin system to support custom context management (#22201)

* feat(context-engine): add ContextEngine interface and registry

Introduce the pluggable ContextEngine abstraction that allows external
plugins to register custom context management strategies.

- ContextEngine interface with lifecycle methods: bootstrap, ingest,
  ingestBatch, afterTurn, assemble, compact, prepareSubagentSpawn,
  onSubagentEnded, dispose
- Module-level singleton registry with registerContextEngine() and
  resolveContextEngine() (config-driven slot selection)
- LegacyContextEngine: pass-through implementation wrapping existing
  compaction behavior for 100% backward compatibility
- ensureContextEnginesInitialized() guard for safe one-time registration
- 19 tests covering contract, registry, resolution, and legacy parity

* feat(plugins): add context-engine slot and registerContextEngine API

Wire the ContextEngine abstraction into the plugin system so external
plugins can register context engines via the standard plugin API.

- Add 'context-engine' to PluginKind union type
- Add 'contextEngine' slot to PluginSlotsConfig (default: 'legacy')
- Wire registerContextEngine() through OpenClawPluginApi
- Export ContextEngine types from plugin-sdk for external consumers
- Restore proper slot-based resolution in registry

* feat(context-engine): wire ContextEngine into agent run lifecycle

Integrate the ContextEngine abstraction into the core agent run path:

- Resolve context engine once per run (reused across retries)
- Bootstrap: hydrate canonical store from session file on first run
- Assemble: route context assembly through pluggable engine
- Auto-compaction guard: disable built-in auto-compaction when
  the engine declares ownsCompaction (prevents double-compaction)
- AfterTurn: post-turn lifecycle hook for ingest + background
  compaction decisions
- Overflow compaction: route through contextEngine.compact()
- Dispose: clean up engine resources in finally block
- Notify context engine on subagent lifecycle events

Legacy engine: all lifecycle methods are pass-through/no-op, preserving
100% backward compatibility for users without a context engine plugin.

* feat(plugins): add scoped subagent methods and gateway request scope

Expose runtime.subagent.{run, waitForRun, getSession, deleteSession}
so external plugins can spawn sub-agent sessions without raw gateway
dispatch access.

Uses AsyncLocalStorage request-scope bridge to dispatch internally via
handleGatewayRequest with a synthetic operator client. Methods are only
available during gateway request handling.

- Symbol.for-backed global singleton for cross-module-reload safety
- Fallback gateway context for non-WS dispatch paths (Telegram/WhatsApp)
- Set gateway request scope for all handlers, not just plugin handlers
- 3 staleness tests for fallback context hardening

* feat(context-engine): route /compact and sessions.get through context engine

Wire the /compact command and sessions.get handler through the pluggable
ContextEngine interface.

- Thread tokenBudget and force parameters to context engine compact
- Route /compact through contextEngine.compact() when registered
- Wire sessions.get as runtime alias for plugin subagent dispatch
- Add .pebbles/ to .gitignore

* style: format with oxfmt 0.33.0

Fix duplicate import (ControlUiRootState in server.impl.ts) and
import ordering across all changed files.

* fix: update extension test mocks for context-engine types

Add missing subagent property to bluebubbles PluginRuntime mock.
Add missing registerContextEngine to lobster OpenClawPluginApi mock.

* fix(subagents): keep deferred delete cleanup retryable

* style: format run attempt for CI

* fix(rebase): remove duplicate embedded-run imports

* test: add missing gateway context mock export

* fix: pass resolved auth profile into afterTurn compaction

Ensure the embedded runner forwards resolved auth profile context into
legacy context-engine compaction params on the normal afterTurn path,
matching overflow compaction behavior. This allows downstream LCM
summarization to use the intended provider auth/profile consistently.

Also fix strict TS typing in external-link token dedupe and align an
attempt unit test reasoningLevel value with the current ReasoningLevel
enum.

Regeneration-Prompt: |
  We were debugging context-engine compaction where downstream summary
  calls were missing the right auth/profile context in normal afterTurn
  flow, while overflow compaction already propagated it. Preserve current
  behavior and keep changes additive: thread the resolved authProfileId
  through run -> attempt -> legacy compaction param builder without
  broad refactors.

  Add tests that prove the auth profile is included in afterTurn legacy
  params and that overflow compaction still passes it through run
  attempts. Keep existing APIs stable, and only adjust small type issues
  needed for strict compilation.

* fix: remove duplicate imports from rebase

* feat: add context-engine system prompt additions

* fix(rebase): dedupe attempt import declarations

* test: fix fetch mock typing in ollama autodiscovery

* fix(test): add registerContextEngine to diffs extension mock APIs

* test(windows): use path.delimiter in ios-team-id fixture PATH

* test(cron): add model formatting and precedence edge case tests

Covers:
- Provider/model string splitting (whitespace, nested paths, empty segments)
- Provider normalization (casing, aliases like bedrock→amazon-bedrock)
- Anthropic model alias normalization (opus-4.5→claude-opus-4-5)
- Precedence: job payload > session override > config default
- Sequential runs with different providers (CI flake regression pattern)
- forceNew session preserving stored model overrides
- Whitespace/empty model string edge cases
- Config model as string vs object format

* test(cron): fix model formatting test config types

* test(phone-control): add registerContextEngine to mock API

* fix: re-export ChannelKind from config-reload-plan

* fix: add subagent mock to plugin-runtime-mock test util

* docs: add changelog fragment for context engine PR #22201
											
										
										
											2026-03-06 05:31:59 -08:00
+								              const compactResult = await contextEngine.compact({
-												fix: context overflow compaction and subagent announce improvements (#11664) (thanks @tyler6204)

* initial commit

* feat: implement deriveSessionTotalTokens function and update usage tests

* Added deriveSessionTotalTokens function to calculate total tokens based on usage and context tokens.
* Updated usage tests to include cases for derived session total tokens.
* Refactored session usage calculations in multiple files to utilize the new function for improved accuracy.

* fix: restore overflow truncation fallback + changelog/test hardening (#11551) (thanks @tyler6204)
											
										
										
											2026-02-07 20:02:32 -08:00
+								                sessionId: params.sessionId,
 								                sessionFile: params.sessionFile,
-												feature(context): extend plugin system to support custom context management (#22201)

* feat(context-engine): add ContextEngine interface and registry

Introduce the pluggable ContextEngine abstraction that allows external
plugins to register custom context management strategies.

- ContextEngine interface with lifecycle methods: bootstrap, ingest,
  ingestBatch, afterTurn, assemble, compact, prepareSubagentSpawn,
  onSubagentEnded, dispose
- Module-level singleton registry with registerContextEngine() and
  resolveContextEngine() (config-driven slot selection)
- LegacyContextEngine: pass-through implementation wrapping existing
  compaction behavior for 100% backward compatibility
- ensureContextEnginesInitialized() guard for safe one-time registration
- 19 tests covering contract, registry, resolution, and legacy parity

* feat(plugins): add context-engine slot and registerContextEngine API

Wire the ContextEngine abstraction into the plugin system so external
plugins can register context engines via the standard plugin API.

- Add 'context-engine' to PluginKind union type
- Add 'contextEngine' slot to PluginSlotsConfig (default: 'legacy')
- Wire registerContextEngine() through OpenClawPluginApi
- Export ContextEngine types from plugin-sdk for external consumers
- Restore proper slot-based resolution in registry

* feat(context-engine): wire ContextEngine into agent run lifecycle

Integrate the ContextEngine abstraction into the core agent run path:

- Resolve context engine once per run (reused across retries)
- Bootstrap: hydrate canonical store from session file on first run
- Assemble: route context assembly through pluggable engine
- Auto-compaction guard: disable built-in auto-compaction when
  the engine declares ownsCompaction (prevents double-compaction)
- AfterTurn: post-turn lifecycle hook for ingest + background
  compaction decisions
- Overflow compaction: route through contextEngine.compact()
- Dispose: clean up engine resources in finally block
- Notify context engine on subagent lifecycle events

Legacy engine: all lifecycle methods are pass-through/no-op, preserving
100% backward compatibility for users without a context engine plugin.

* feat(plugins): add scoped subagent methods and gateway request scope

Expose runtime.subagent.{run, waitForRun, getSession, deleteSession}
so external plugins can spawn sub-agent sessions without raw gateway
dispatch access.

Uses AsyncLocalStorage request-scope bridge to dispatch internally via
handleGatewayRequest with a synthetic operator client. Methods are only
available during gateway request handling.

- Symbol.for-backed global singleton for cross-module-reload safety
- Fallback gateway context for non-WS dispatch paths (Telegram/WhatsApp)
- Set gateway request scope for all handlers, not just plugin handlers
- 3 staleness tests for fallback context hardening

* feat(context-engine): route /compact and sessions.get through context engine

Wire the /compact command and sessions.get handler through the pluggable
ContextEngine interface.

- Thread tokenBudget and force parameters to context engine compact
- Route /compact through contextEngine.compact() when registered
- Wire sessions.get as runtime alias for plugin subagent dispatch
- Add .pebbles/ to .gitignore

* style: format with oxfmt 0.33.0

Fix duplicate import (ControlUiRootState in server.impl.ts) and
import ordering across all changed files.

* fix: update extension test mocks for context-engine types

Add missing subagent property to bluebubbles PluginRuntime mock.
Add missing registerContextEngine to lobster OpenClawPluginApi mock.

* fix(subagents): keep deferred delete cleanup retryable

* style: format run attempt for CI

* fix(rebase): remove duplicate embedded-run imports

* test: add missing gateway context mock export

* fix: pass resolved auth profile into afterTurn compaction

Ensure the embedded runner forwards resolved auth profile context into
legacy context-engine compaction params on the normal afterTurn path,
matching overflow compaction behavior. This allows downstream LCM
summarization to use the intended provider auth/profile consistently.

Also fix strict TS typing in external-link token dedupe and align an
attempt unit test reasoningLevel value with the current ReasoningLevel
enum.

Regeneration-Prompt: |
  We were debugging context-engine compaction where downstream summary
  calls were missing the right auth/profile context in normal afterTurn
  flow, while overflow compaction already propagated it. Preserve current
  behavior and keep changes additive: thread the resolved authProfileId
  through run -> attempt -> legacy compaction param builder without
  broad refactors.

  Add tests that prove the auth profile is included in afterTurn legacy
  params and that overflow compaction still passes it through run
  attempts. Keep existing APIs stable, and only adjust small type issues
  needed for strict compilation.

* fix: remove duplicate imports from rebase

* feat: add context-engine system prompt additions

* fix(rebase): dedupe attempt import declarations

* test: fix fetch mock typing in ollama autodiscovery

* fix(test): add registerContextEngine to diffs extension mock APIs

* test(windows): use path.delimiter in ios-team-id fixture PATH

* test(cron): add model formatting and precedence edge case tests

Covers:
- Provider/model string splitting (whitespace, nested paths, empty segments)
- Provider normalization (casing, aliases like bedrock→amazon-bedrock)
- Anthropic model alias normalization (opus-4.5→claude-opus-4-5)
- Precedence: job payload > session override > config default
- Sequential runs with different providers (CI flake regression pattern)
- forceNew session preserving stored model overrides
- Whitespace/empty model string edge cases
- Config model as string vs object format

* test(cron): fix model formatting test config types

* test(phone-control): add registerContextEngine to mock API

* fix: re-export ChannelKind from config-reload-plan

* fix: add subagent mock to plugin-runtime-mock test util

* docs: add changelog fragment for context engine PR #22201
											
										
										
											2026-03-06 05:31:59 -08:00
+								                tokenBudget: ctxInfo.tokens,
 								                force: true,
 								                compactionTarget: "budget",
-												refactor: neutralize context engine runtime bridge

											
										
										
											2026-03-08 17:13:18 +00:00
+								                runtimeContext: {
-												feature(context): extend plugin system to support custom context management (#22201)

* feat(context-engine): add ContextEngine interface and registry

Introduce the pluggable ContextEngine abstraction that allows external
plugins to register custom context management strategies.

- ContextEngine interface with lifecycle methods: bootstrap, ingest,
  ingestBatch, afterTurn, assemble, compact, prepareSubagentSpawn,
  onSubagentEnded, dispose
- Module-level singleton registry with registerContextEngine() and
  resolveContextEngine() (config-driven slot selection)
- LegacyContextEngine: pass-through implementation wrapping existing
  compaction behavior for 100% backward compatibility
- ensureContextEnginesInitialized() guard for safe one-time registration
- 19 tests covering contract, registry, resolution, and legacy parity

* feat(plugins): add context-engine slot and registerContextEngine API

Wire the ContextEngine abstraction into the plugin system so external
plugins can register context engines via the standard plugin API.

- Add 'context-engine' to PluginKind union type
- Add 'contextEngine' slot to PluginSlotsConfig (default: 'legacy')
- Wire registerContextEngine() through OpenClawPluginApi
- Export ContextEngine types from plugin-sdk for external consumers
- Restore proper slot-based resolution in registry

* feat(context-engine): wire ContextEngine into agent run lifecycle

Integrate the ContextEngine abstraction into the core agent run path:

- Resolve context engine once per run (reused across retries)
- Bootstrap: hydrate canonical store from session file on first run
- Assemble: route context assembly through pluggable engine
- Auto-compaction guard: disable built-in auto-compaction when
  the engine declares ownsCompaction (prevents double-compaction)
- AfterTurn: post-turn lifecycle hook for ingest + background
  compaction decisions
- Overflow compaction: route through contextEngine.compact()
- Dispose: clean up engine resources in finally block
- Notify context engine on subagent lifecycle events

Legacy engine: all lifecycle methods are pass-through/no-op, preserving
100% backward compatibility for users without a context engine plugin.

* feat(plugins): add scoped subagent methods and gateway request scope

Expose runtime.subagent.{run, waitForRun, getSession, deleteSession}
so external plugins can spawn sub-agent sessions without raw gateway
dispatch access.

Uses AsyncLocalStorage request-scope bridge to dispatch internally via
handleGatewayRequest with a synthetic operator client. Methods are only
available during gateway request handling.

- Symbol.for-backed global singleton for cross-module-reload safety
- Fallback gateway context for non-WS dispatch paths (Telegram/WhatsApp)
- Set gateway request scope for all handlers, not just plugin handlers
- 3 staleness tests for fallback context hardening

* feat(context-engine): route /compact and sessions.get through context engine

Wire the /compact command and sessions.get handler through the pluggable
ContextEngine interface.

- Thread tokenBudget and force parameters to context engine compact
- Route /compact through contextEngine.compact() when registered
- Wire sessions.get as runtime alias for plugin subagent dispatch
- Add .pebbles/ to .gitignore

* style: format with oxfmt 0.33.0

Fix duplicate import (ControlUiRootState in server.impl.ts) and
import ordering across all changed files.

* fix: update extension test mocks for context-engine types

Add missing subagent property to bluebubbles PluginRuntime mock.
Add missing registerContextEngine to lobster OpenClawPluginApi mock.

* fix(subagents): keep deferred delete cleanup retryable

* style: format run attempt for CI

* fix(rebase): remove duplicate embedded-run imports

* test: add missing gateway context mock export

* fix: pass resolved auth profile into afterTurn compaction

Ensure the embedded runner forwards resolved auth profile context into
legacy context-engine compaction params on the normal afterTurn path,
matching overflow compaction behavior. This allows downstream LCM
summarization to use the intended provider auth/profile consistently.

Also fix strict TS typing in external-link token dedupe and align an
attempt unit test reasoningLevel value with the current ReasoningLevel
enum.

Regeneration-Prompt: |
  We were debugging context-engine compaction where downstream summary
  calls were missing the right auth/profile context in normal afterTurn
  flow, while overflow compaction already propagated it. Preserve current
  behavior and keep changes additive: thread the resolved authProfileId
  through run -> attempt -> legacy compaction param builder without
  broad refactors.

  Add tests that prove the auth profile is included in afterTurn legacy
  params and that overflow compaction still passes it through run
  attempts. Keep existing APIs stable, and only adjust small type issues
  needed for strict compilation.

* fix: remove duplicate imports from rebase

* feat: add context-engine system prompt additions

* fix(rebase): dedupe attempt import declarations

* test: fix fetch mock typing in ollama autodiscovery

* fix(test): add registerContextEngine to diffs extension mock APIs

* test(windows): use path.delimiter in ios-team-id fixture PATH

* test(cron): add model formatting and precedence edge case tests

Covers:
- Provider/model string splitting (whitespace, nested paths, empty segments)
- Provider normalization (casing, aliases like bedrock→amazon-bedrock)
- Anthropic model alias normalization (opus-4.5→claude-opus-4-5)
- Precedence: job payload > session override > config default
- Sequential runs with different providers (CI flake regression pattern)
- forceNew session preserving stored model overrides
- Whitespace/empty model string edge cases
- Config model as string vs object format

* test(cron): fix model formatting test config types

* test(phone-control): add registerContextEngine to mock API

* fix: re-export ChannelKind from config-reload-plan

* fix: add subagent mock to plugin-runtime-mock test util

* docs: add changelog fragment for context engine PR #22201
											
										
										
											2026-03-06 05:31:59 -08:00
+								                  sessionKey: params.sessionKey,
 								                  messageChannel: params.messageChannel,
 								                  messageProvider: params.messageProvider,
 								                  agentAccountId: params.agentAccountId,
 								                  authProfileId: lastProfileId,
 								                  workspaceDir: resolvedWorkspace,
 								                  agentDir,
 								                  config: params.config,
 								                  skillsSnapshot: params.skillsSnapshot,
 								                  senderIsOwner: params.senderIsOwner,
 								                  provider,
 								                  model: modelId,
 								                  runId: params.runId,
 								                  thinkLevel,
 								                  reasoningLevel: params.reasoningLevel,
 								                  bashElevated: params.bashElevated,
 								                  extraSystemPrompt: params.extraSystemPrompt,
 								                  ownerNumbers: params.ownerNumbers,
 								                  trigger: "overflow",
 								                  diagId: overflowDiagId,
 								                  attempt: overflowCompactionAttempts,
 								                  maxAttempts: MAX_OVERFLOW_COMPACTION_ATTEMPTS,
 								                },
-												fix: context overflow compaction and subagent announce improvements (#11664) (thanks @tyler6204)

* initial commit

* feat: implement deriveSessionTotalTokens function and update usage tests

* Added deriveSessionTotalTokens function to calculate total tokens based on usage and context tokens.
* Updated usage tests to include cases for derived session total tokens.
* Refactored session usage calculations in multiple files to utilize the new function for improved accuracy.

* fix: restore overflow truncation fallback + changelog/test hardening (#11551) (thanks @tyler6204)
											
										
										
											2026-02-07 20:02:32 -08:00
+								              });
 								              if (compactResult.compacted) {
 								                autoCompactionCount += 1;
 								                log.info(`auto-compaction succeeded for ${provider}/${modelId}; retrying prompt`);
 								                continue;
 								              }
 								              log.warn(
 								                `auto-compaction failed for ${provider}/${modelId}: ${compactResult.reason ?? "nothing to compact"}`,
 								              );
 								            }
 								            // Fallback: try truncating oversized tool results in the session.
 								            // This handles the case where a single tool result exceeds the
 								            // context window and compaction cannot reduce it further.
 								            if (!toolResultTruncationAttempted) {
 								              const contextWindowTokens = ctxInfo.tokens;
 								              const hasOversized = attempt.messagesSnapshot
 								                ? sessionLikelyHasOversizedToolResults({
 								                    messages: attempt.messagesSnapshot,
 								                    contextWindowTokens,
 								                  })
 								                : false;
 								              if (hasOversized) {
-												feat: add pre-prompt context size diagnostic logging (openclaw#8930) thanks @Glucksberg

Verified:
- pnpm build
- pnpm check
- pnpm test

Co-authored-by: Glucksberg <80581902+Glucksberg@users.noreply.github.com>
Co-authored-by: Tak Hoffman <781889+Takhoffman@users.noreply.github.com>

											
										
										
											2026-02-13 19:54:22 -04:00
+								                if (log.isEnabled("debug")) {
 								                  log.debug(
 								                    `[compaction-diag] decision diagId=${overflowDiagId} branch=truncate_tool_results ` +
 								                      `isCompactionFailure=${isCompactionFailure} hasOversizedToolResults=${hasOversized} ` +
 								                      `attempt=${overflowCompactionAttempts} maxAttempts=${MAX_OVERFLOW_COMPACTION_ATTEMPTS}`,
 								                  );
 								                }
-												fix: context overflow compaction and subagent announce improvements (#11664) (thanks @tyler6204)

* initial commit

* feat: implement deriveSessionTotalTokens function and update usage tests

* Added deriveSessionTotalTokens function to calculate total tokens based on usage and context tokens.
* Updated usage tests to include cases for derived session total tokens.
* Refactored session usage calculations in multiple files to utilize the new function for improved accuracy.

* fix: restore overflow truncation fallback + changelog/test hardening (#11551) (thanks @tyler6204)
											
										
										
											2026-02-07 20:02:32 -08:00
+								                toolResultTruncationAttempted = true;
-												fix: auto-compact on context overflow promptError before returning error (#1627)

* fix: detect Anthropic 'Request size exceeds model context window' as context overflow

Anthropic now returns 'Request size exceeds model context window' instead of
the previously detected 'prompt is too long' format. This new error message
was not recognized by isContextOverflowError(), causing auto-compaction to
NOT trigger. Users would see the raw error twice without any recovery attempt.

Changes:
- Add 'exceeds model context window' and 'request size exceeds' to
  isContextOverflowError() detection patterns
- Add tests that fail without the fix, verifying both the raw error
  string and the JSON-wrapped format from Anthropic's API
- Add test for formatAssistantErrorText to ensure the friendly
  'Context overflow' message is shown instead of the raw error

Note: The upstream pi-ai package (@mariozechner/pi-ai) also needs a fix
in its OVERFLOW_PATTERNS regex: /exceeds the context window/i should be
changed to /exceeds.*context window/i to match both 'the' and 'model'
variants for triggering auto-compaction retry.

* fix(tests): remove unused imports and helper from test files

Remove WorkspaceBootstrapFile references and _makeFile helper that were
incorrectly copied from another test file. These caused type errors and
were unrelated to the context overflow detection tests.

* fix: trigger auto-compaction on context overflow promptError

When the LLM rejects a request with a context overflow error that surfaces
as a promptError (thrown exception rather than streamed error), the existing
auto-compaction in pi-coding-agent never triggers. This happens because the
error bypasses the agent's message_end → agent_end → _checkCompaction path.

This fix adds a fallback compaction attempt directly in the run loop:
- Detects context overflow in promptError (excluding compaction_failure)
- Calls compactEmbeddedPiSessionDirect (bypassing lane queues since already in-lane)
- Retries the prompt after successful compaction
- Limits to one compaction attempt per run to prevent infinite loops

Fixes: context overflow errors shown to user without auto-compaction attempt

* style: format compact.ts and run.ts with oxfmt

* fix: tighten context overflow match (#1627) (thanks @rodrigouroz)

---------

Co-authored-by: Claude <claude@anthropic.com>
Co-authored-by: Peter Steinberger <steipete@gmail.com>
											
										
										
											2026-01-24 19:09:24 -03:00
+								                log.warn(
-												fix: context overflow compaction and subagent announce improvements (#11664) (thanks @tyler6204)

* initial commit

* feat: implement deriveSessionTotalTokens function and update usage tests

* Added deriveSessionTotalTokens function to calculate total tokens based on usage and context tokens.
* Updated usage tests to include cases for derived session total tokens.
* Refactored session usage calculations in multiple files to utilize the new function for improved accuracy.

* fix: restore overflow truncation fallback + changelog/test hardening (#11551) (thanks @tyler6204)
											
										
										
											2026-02-07 20:02:32 -08:00
+								                  `[context-overflow-recovery] Attempting tool result truncation for ${provider}/${modelId} ` +
 								                    `(contextWindow=${contextWindowTokens} tokens)`,
-												fix: auto-compact on context overflow promptError before returning error (#1627)

* fix: detect Anthropic 'Request size exceeds model context window' as context overflow

Anthropic now returns 'Request size exceeds model context window' instead of
the previously detected 'prompt is too long' format. This new error message
was not recognized by isContextOverflowError(), causing auto-compaction to
NOT trigger. Users would see the raw error twice without any recovery attempt.

Changes:
- Add 'exceeds model context window' and 'request size exceeds' to
  isContextOverflowError() detection patterns
- Add tests that fail without the fix, verifying both the raw error
  string and the JSON-wrapped format from Anthropic's API
- Add test for formatAssistantErrorText to ensure the friendly
  'Context overflow' message is shown instead of the raw error

Note: The upstream pi-ai package (@mariozechner/pi-ai) also needs a fix
in its OVERFLOW_PATTERNS regex: /exceeds the context window/i should be
changed to /exceeds.*context window/i to match both 'the' and 'model'
variants for triggering auto-compaction retry.

* fix(tests): remove unused imports and helper from test files

Remove WorkspaceBootstrapFile references and _makeFile helper that were
incorrectly copied from another test file. These caused type errors and
were unrelated to the context overflow detection tests.

* fix: trigger auto-compaction on context overflow promptError

When the LLM rejects a request with a context overflow error that surfaces
as a promptError (thrown exception rather than streamed error), the existing
auto-compaction in pi-coding-agent never triggers. This happens because the
error bypasses the agent's message_end → agent_end → _checkCompaction path.

This fix adds a fallback compaction attempt directly in the run loop:
- Detects context overflow in promptError (excluding compaction_failure)
- Calls compactEmbeddedPiSessionDirect (bypassing lane queues since already in-lane)
- Retries the prompt after successful compaction
- Limits to one compaction attempt per run to prevent infinite loops

Fixes: context overflow errors shown to user without auto-compaction attempt

* style: format compact.ts and run.ts with oxfmt

* fix: tighten context overflow match (#1627) (thanks @rodrigouroz)

---------

Co-authored-by: Claude <claude@anthropic.com>
Co-authored-by: Peter Steinberger <steipete@gmail.com>
											
										
										
											2026-01-24 19:09:24 -03:00
+								                );
-												fix: context overflow compaction and subagent announce improvements (#11664) (thanks @tyler6204)

* initial commit

* feat: implement deriveSessionTotalTokens function and update usage tests

* Added deriveSessionTotalTokens function to calculate total tokens based on usage and context tokens.
* Updated usage tests to include cases for derived session total tokens.
* Refactored session usage calculations in multiple files to utilize the new function for improved accuracy.

* fix: restore overflow truncation fallback + changelog/test hardening (#11551) (thanks @tyler6204)
											
										
										
											2026-02-07 20:02:32 -08:00
+								                const truncResult = await truncateOversizedToolResultsInSession({
 								                  sessionFile: params.sessionFile,
 								                  contextWindowTokens,
-												fix: auto-compact on context overflow promptError before returning error (#1627)

* fix: detect Anthropic 'Request size exceeds model context window' as context overflow

Anthropic now returns 'Request size exceeds model context window' instead of
the previously detected 'prompt is too long' format. This new error message
was not recognized by isContextOverflowError(), causing auto-compaction to
NOT trigger. Users would see the raw error twice without any recovery attempt.

Changes:
- Add 'exceeds model context window' and 'request size exceeds' to
  isContextOverflowError() detection patterns
- Add tests that fail without the fix, verifying both the raw error
  string and the JSON-wrapped format from Anthropic's API
- Add test for formatAssistantErrorText to ensure the friendly
  'Context overflow' message is shown instead of the raw error

Note: The upstream pi-ai package (@mariozechner/pi-ai) also needs a fix
in its OVERFLOW_PATTERNS regex: /exceeds the context window/i should be
changed to /exceeds.*context window/i to match both 'the' and 'model'
variants for triggering auto-compaction retry.

* fix(tests): remove unused imports and helper from test files

Remove WorkspaceBootstrapFile references and _makeFile helper that were
incorrectly copied from another test file. These caused type errors and
were unrelated to the context overflow detection tests.

* fix: trigger auto-compaction on context overflow promptError

When the LLM rejects a request with a context overflow error that surfaces
as a promptError (thrown exception rather than streamed error), the existing
auto-compaction in pi-coding-agent never triggers. This happens because the
error bypasses the agent's message_end → agent_end → _checkCompaction path.

This fix adds a fallback compaction attempt directly in the run loop:
- Detects context overflow in promptError (excluding compaction_failure)
- Calls compactEmbeddedPiSessionDirect (bypassing lane queues since already in-lane)
- Retries the prompt after successful compaction
- Limits to one compaction attempt per run to prevent infinite loops

Fixes: context overflow errors shown to user without auto-compaction attempt

* style: format compact.ts and run.ts with oxfmt

* fix: tighten context overflow match (#1627) (thanks @rodrigouroz)

---------

Co-authored-by: Claude <claude@anthropic.com>
Co-authored-by: Peter Steinberger <steipete@gmail.com>
											
										
										
											2026-01-24 19:09:24 -03:00
+								                  sessionId: params.sessionId,
 								                  sessionKey: params.sessionKey,
 								                });
-												fix: context overflow compaction and subagent announce improvements (#11664) (thanks @tyler6204)

* initial commit

* feat: implement deriveSessionTotalTokens function and update usage tests

* Added deriveSessionTotalTokens function to calculate total tokens based on usage and context tokens.
* Updated usage tests to include cases for derived session total tokens.
* Refactored session usage calculations in multiple files to utilize the new function for improved accuracy.

* fix: restore overflow truncation fallback + changelog/test hardening (#11551) (thanks @tyler6204)
											
										
										
											2026-02-07 20:02:32 -08:00
+								                if (truncResult.truncated) {
 								                  log.info(
 								                    `[context-overflow-recovery] Truncated ${truncResult.truncatedCount} tool result(s); retrying prompt`,
 								                  );
-												fix(security): OC-65 prevent compaction counter reset to enforce context exhaustion limit — Aether AI Agent

Remove the `overflowCompactionAttempts = 0` reset inside the inner loop's
tool-result-truncation branch. The counter was being zeroed on each truncation
cycle, allowing prompt-injection attacks to bypass the MAX_OVERFLOW_COMPACTION_ATTEMPTS
guard and trigger unbounded auto-compaction, exhausting context window resources (DoS).

CWE-400 / GHSA-x2g4-7mj7-2hhj

											
										
										
											2026-02-18 15:27:57 +11:00
+								                  // Do NOT reset overflowCompactionAttempts here — the global cap must remain
 								                  // enforced across all iterations to prevent unbounded compaction cycles (OC-65).
-												fix: auto-compact on context overflow promptError before returning error (#1627)

* fix: detect Anthropic 'Request size exceeds model context window' as context overflow

Anthropic now returns 'Request size exceeds model context window' instead of
the previously detected 'prompt is too long' format. This new error message
was not recognized by isContextOverflowError(), causing auto-compaction to
NOT trigger. Users would see the raw error twice without any recovery attempt.

Changes:
- Add 'exceeds model context window' and 'request size exceeds' to
  isContextOverflowError() detection patterns
- Add tests that fail without the fix, verifying both the raw error
  string and the JSON-wrapped format from Anthropic's API
- Add test for formatAssistantErrorText to ensure the friendly
  'Context overflow' message is shown instead of the raw error

Note: The upstream pi-ai package (@mariozechner/pi-ai) also needs a fix
in its OVERFLOW_PATTERNS regex: /exceeds the context window/i should be
changed to /exceeds.*context window/i to match both 'the' and 'model'
variants for triggering auto-compaction retry.

* fix(tests): remove unused imports and helper from test files

Remove WorkspaceBootstrapFile references and _makeFile helper that were
incorrectly copied from another test file. These caused type errors and
were unrelated to the context overflow detection tests.

* fix: trigger auto-compaction on context overflow promptError

When the LLM rejects a request with a context overflow error that surfaces
as a promptError (thrown exception rather than streamed error), the existing
auto-compaction in pi-coding-agent never triggers. This happens because the
error bypasses the agent's message_end → agent_end → _checkCompaction path.

This fix adds a fallback compaction attempt directly in the run loop:
- Detects context overflow in promptError (excluding compaction_failure)
- Calls compactEmbeddedPiSessionDirect (bypassing lane queues since already in-lane)
- Retries the prompt after successful compaction
- Limits to one compaction attempt per run to prevent infinite loops

Fixes: context overflow errors shown to user without auto-compaction attempt

* style: format compact.ts and run.ts with oxfmt

* fix: tighten context overflow match (#1627) (thanks @rodrigouroz)

---------

Co-authored-by: Claude <claude@anthropic.com>
Co-authored-by: Peter Steinberger <steipete@gmail.com>
											
										
										
											2026-01-24 19:09:24 -03:00
+								                  continue;
 								                }
 								                log.warn(
-												fix: context overflow compaction and subagent announce improvements (#11664) (thanks @tyler6204)

* initial commit

* feat: implement deriveSessionTotalTokens function and update usage tests

* Added deriveSessionTotalTokens function to calculate total tokens based on usage and context tokens.
* Updated usage tests to include cases for derived session total tokens.
* Refactored session usage calculations in multiple files to utilize the new function for improved accuracy.

* fix: restore overflow truncation fallback + changelog/test hardening (#11551) (thanks @tyler6204)
											
										
										
											2026-02-07 20:02:32 -08:00
+								                  `[context-overflow-recovery] Tool result truncation did not help: ${truncResult.reason ?? "unknown"}`,
-												fix: auto-compact on context overflow promptError before returning error (#1627)

* fix: detect Anthropic 'Request size exceeds model context window' as context overflow

Anthropic now returns 'Request size exceeds model context window' instead of
the previously detected 'prompt is too long' format. This new error message
was not recognized by isContextOverflowError(), causing auto-compaction to
NOT trigger. Users would see the raw error twice without any recovery attempt.

Changes:
- Add 'exceeds model context window' and 'request size exceeds' to
  isContextOverflowError() detection patterns
- Add tests that fail without the fix, verifying both the raw error
  string and the JSON-wrapped format from Anthropic's API
- Add test for formatAssistantErrorText to ensure the friendly
  'Context overflow' message is shown instead of the raw error

Note: The upstream pi-ai package (@mariozechner/pi-ai) also needs a fix
in its OVERFLOW_PATTERNS regex: /exceeds the context window/i should be
changed to /exceeds.*context window/i to match both 'the' and 'model'
variants for triggering auto-compaction retry.

* fix(tests): remove unused imports and helper from test files

Remove WorkspaceBootstrapFile references and _makeFile helper that were
incorrectly copied from another test file. These caused type errors and
were unrelated to the context overflow detection tests.

* fix: trigger auto-compaction on context overflow promptError

When the LLM rejects a request with a context overflow error that surfaces
as a promptError (thrown exception rather than streamed error), the existing
auto-compaction in pi-coding-agent never triggers. This happens because the
error bypasses the agent's message_end → agent_end → _checkCompaction path.

This fix adds a fallback compaction attempt directly in the run loop:
- Detects context overflow in promptError (excluding compaction_failure)
- Calls compactEmbeddedPiSessionDirect (bypassing lane queues since already in-lane)
- Retries the prompt after successful compaction
- Limits to one compaction attempt per run to prevent infinite loops

Fixes: context overflow errors shown to user without auto-compaction attempt

* style: format compact.ts and run.ts with oxfmt

* fix: tighten context overflow match (#1627) (thanks @rodrigouroz)

---------

Co-authored-by: Claude <claude@anthropic.com>
Co-authored-by: Peter Steinberger <steipete@gmail.com>
											
										
										
											2026-01-24 19:09:24 -03:00
+								                );
-												feat: add pre-prompt context size diagnostic logging (openclaw#8930) thanks @Glucksberg

Verified:
- pnpm build
- pnpm check
- pnpm test

Co-authored-by: Glucksberg <80581902+Glucksberg@users.noreply.github.com>
Co-authored-by: Tak Hoffman <781889+Takhoffman@users.noreply.github.com>

											
										
										
											2026-02-13 19:54:22 -04:00
+								              } else if (log.isEnabled("debug")) {
 								                log.debug(
 								                  `[compaction-diag] decision diagId=${overflowDiagId} branch=give_up ` +
 								                    `isCompactionFailure=${isCompactionFailure} hasOversizedToolResults=${hasOversized} ` +
 								                    `attempt=${overflowCompactionAttempts} maxAttempts=${MAX_OVERFLOW_COMPACTION_ATTEMPTS}`,
 								                );
-												fix: auto-compact on context overflow promptError before returning error (#1627)

* fix: detect Anthropic 'Request size exceeds model context window' as context overflow

Anthropic now returns 'Request size exceeds model context window' instead of
the previously detected 'prompt is too long' format. This new error message
was not recognized by isContextOverflowError(), causing auto-compaction to
NOT trigger. Users would see the raw error twice without any recovery attempt.

Changes:
- Add 'exceeds model context window' and 'request size exceeds' to
  isContextOverflowError() detection patterns
- Add tests that fail without the fix, verifying both the raw error
  string and the JSON-wrapped format from Anthropic's API
- Add test for formatAssistantErrorText to ensure the friendly
  'Context overflow' message is shown instead of the raw error

Note: The upstream pi-ai package (@mariozechner/pi-ai) also needs a fix
in its OVERFLOW_PATTERNS regex: /exceeds the context window/i should be
changed to /exceeds.*context window/i to match both 'the' and 'model'
variants for triggering auto-compaction retry.

* fix(tests): remove unused imports and helper from test files

Remove WorkspaceBootstrapFile references and _makeFile helper that were
incorrectly copied from another test file. These caused type errors and
were unrelated to the context overflow detection tests.

* fix: trigger auto-compaction on context overflow promptError

When the LLM rejects a request with a context overflow error that surfaces
as a promptError (thrown exception rather than streamed error), the existing
auto-compaction in pi-coding-agent never triggers. This happens because the
error bypasses the agent's message_end → agent_end → _checkCompaction path.

This fix adds a fallback compaction attempt directly in the run loop:
- Detects context overflow in promptError (excluding compaction_failure)
- Calls compactEmbeddedPiSessionDirect (bypassing lane queues since already in-lane)
- Retries the prompt after successful compaction
- Limits to one compaction attempt per run to prevent infinite loops

Fixes: context overflow errors shown to user without auto-compaction attempt

* style: format compact.ts and run.ts with oxfmt

* fix: tighten context overflow match (#1627) (thanks @rodrigouroz)

---------

Co-authored-by: Claude <claude@anthropic.com>
Co-authored-by: Peter Steinberger <steipete@gmail.com>
											
										
										
											2026-01-24 19:09:24 -03:00
+								              }
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								            }
-												feat: add pre-prompt context size diagnostic logging (openclaw#8930) thanks @Glucksberg

Verified:
- pnpm build
- pnpm check
- pnpm test

Co-authored-by: Glucksberg <80581902+Glucksberg@users.noreply.github.com>
Co-authored-by: Tak Hoffman <781889+Takhoffman@users.noreply.github.com>

											
										
										
											2026-02-13 19:54:22 -04:00
+								            if (
 								              (isCompactionFailure ||
 								                overflowCompactionAttempts >= MAX_OVERFLOW_COMPACTION_ATTEMPTS ||
 								                toolResultTruncationAttempted) &&
 								              log.isEnabled("debug")
 								            ) {
 								              log.debug(
 								                `[compaction-diag] decision diagId=${overflowDiagId} branch=give_up ` +
 								                  `isCompactionFailure=${isCompactionFailure} hasOversizedToolResults=unknown ` +
 								                  `attempt=${overflowCompactionAttempts} maxAttempts=${MAX_OVERFLOW_COMPACTION_ATTEMPTS}`,
 								              );
 								            }
-												fix: context overflow compaction and subagent announce improvements (#11664) (thanks @tyler6204)

* initial commit

* feat: implement deriveSessionTotalTokens function and update usage tests

* Added deriveSessionTotalTokens function to calculate total tokens based on usage and context tokens.
* Updated usage tests to include cases for derived session total tokens.
* Refactored session usage calculations in multiple files to utilize the new function for improved accuracy.

* fix: restore overflow truncation fallback + changelog/test hardening (#11551) (thanks @tyler6204)
											
										
										
											2026-02-07 20:02:32 -08:00
+								            const kind = isCompactionFailure ? "compaction_failure" : "context_overflow";
 								            return {
 								              payloads: [
 								                {
 								                  text:
 								                    "Context overflow: prompt too large for the model. " +
-												fix: suggest /clear in context overflow error message (#12973)

* fix: suggest /reset in context overflow error message

When the context window overflows, the error message now suggests
using /reset to clear session history, giving users an actionable
recovery path instead of a dead-end error.

Closes #12940

Co-Authored-By: Claude <noreply@anthropic.com>

* fix: suggest /reset in context overflow error message (#12973) (thanks @RamiNoodle733)

---------

Co-authored-by: Claude <noreply@anthropic.com>
Co-authored-by: Rami Abdelrazzaq <RamiNoodle733@users.noreply.github.com>
											
										
										
											2026-02-09 20:44:37 -06:00
+								                    "Try /reset (or /new) to start a fresh session, or use a larger-context model.",
-												fix: context overflow compaction and subagent announce improvements (#11664) (thanks @tyler6204)

* initial commit

* feat: implement deriveSessionTotalTokens function and update usage tests

* Added deriveSessionTotalTokens function to calculate total tokens based on usage and context tokens.
* Updated usage tests to include cases for derived session total tokens.
* Refactored session usage calculations in multiple files to utilize the new function for improved accuracy.

* fix: restore overflow truncation fallback + changelog/test hardening (#11551) (thanks @tyler6204)
											
										
										
											2026-02-07 20:02:32 -08:00
+								                  isError: true,
 								                },
 								              ],
 								              meta: {
 								                durationMs: Date.now() - started,
-												agents: preserve totalTokens on request failure instead of using contextWindow (#34275)

Merged via squash.

Prepared head SHA: f9d111d0a79a07815d476356e98a28df3a0000ba
Co-authored-by: RealKai42 <44634134+RealKai42@users.noreply.github.com>
Co-authored-by: jalehman <550978+jalehman@users.noreply.github.com>
Reviewed-by: @jalehman

											
										
										
											2026-03-05 04:02:22 +08:00
+								                agentMeta: buildErrorAgentMeta({
-												fix: context overflow compaction and subagent announce improvements (#11664) (thanks @tyler6204)

* initial commit

* feat: implement deriveSessionTotalTokens function and update usage tests

* Added deriveSessionTotalTokens function to calculate total tokens based on usage and context tokens.
* Updated usage tests to include cases for derived session total tokens.
* Refactored session usage calculations in multiple files to utilize the new function for improved accuracy.

* fix: restore overflow truncation fallback + changelog/test hardening (#11551) (thanks @tyler6204)
											
										
										
											2026-02-07 20:02:32 -08:00
+								                  sessionId: sessionIdUsed,
 								                  provider,
 								                  model: model.id,
-												agents: preserve totalTokens on request failure instead of using contextWindow (#34275)

Merged via squash.

Prepared head SHA: f9d111d0a79a07815d476356e98a28df3a0000ba
Co-authored-by: RealKai42 <44634134+RealKai42@users.noreply.github.com>
Co-authored-by: jalehman <550978+jalehman@users.noreply.github.com>
Reviewed-by: @jalehman

											
										
										
											2026-03-05 04:02:22 +08:00
+								                  usageAccumulator,
 								                  lastRunPromptUsage,
 								                  lastAssistant,
 								                  lastTurnTotal,
 								                }),
-												fix: context overflow compaction and subagent announce improvements (#11664) (thanks @tyler6204)

* initial commit

* feat: implement deriveSessionTotalTokens function and update usage tests

* Added deriveSessionTotalTokens function to calculate total tokens based on usage and context tokens.
* Updated usage tests to include cases for derived session total tokens.
* Refactored session usage calculations in multiple files to utilize the new function for improved accuracy.

* fix: restore overflow truncation fallback + changelog/test hardening (#11551) (thanks @tyler6204)
											
										
										
											2026-02-07 20:02:32 -08:00
+								                systemPromptReport: attempt.systemPromptReport,
 								                error: { kind, message: errorText },
 								              },
 								            };
 								          }
 								          if (promptError && !aborted) {
 								            const errorText = describeUnknownError(promptError);
-												fix: refresh Copilot token before expiry and retry on auth errors

GitHub Copilot API tokens expire after ~30 minutes. When OpenClaw spawns
a long-running subagent using Copilot as the provider, the token would
expire mid-session with no recovery mechanism, causing 401 auth errors.

This commit adds:
- Periodic token refresh scheduled 5 minutes before expiry
- Auth error detection with automatic token refresh and single retry
- Proper timer cleanup on session shutdown to prevent leaks

The implementation uses a per-attempt retry flag to ensure each auth
error can trigger one refresh+retry cycle without creating infinite
retry loops.

🤖 AI-assisted: This fix was developed with GitHub Copilot CLI assistance.
Testing: Fully tested with 3 new unit tests covering auth retry, retry
reset, and timer cleanup scenarios. All 11 auth rotation tests pass.

											
										
										
											2026-02-04 09:13:59 -03:00
+								            if (await maybeRefreshCopilotForAuthError(errorText, copilotAuthRetry)) {
 								              authRetryPending = true;
 								              continue;
 								            }
-												fix: sanitize user-facing errors and strip final tags

Co-authored-by: Drake Thomsen <drake.thomsen@example.com>

											
										
										
											2026-01-16 03:00:40 +00:00
+								            // Handle role ordering errors with a user-friendly message
 								            if (/incorrect role information|roles must alternate/i.test(errorText)) {
 								              return {
 								                payloads: [
 								                  {
 								                    text:
 								                      "Message ordering conflict - please try again. " +
 								                      "If this persists, use /new to start a fresh session.",
 								                    isError: true,
 								                  },
 								                ],
 								                meta: {
 								                  durationMs: Date.now() - started,
-												agents: preserve totalTokens on request failure instead of using contextWindow (#34275)

Merged via squash.

Prepared head SHA: f9d111d0a79a07815d476356e98a28df3a0000ba
Co-authored-by: RealKai42 <44634134+RealKai42@users.noreply.github.com>
Co-authored-by: jalehman <550978+jalehman@users.noreply.github.com>
Reviewed-by: @jalehman

											
										
										
											2026-03-05 04:02:22 +08:00
+								                  agentMeta: buildErrorAgentMeta({
-												fix: sanitize user-facing errors and strip final tags

Co-authored-by: Drake Thomsen <drake.thomsen@example.com>

											
										
										
											2026-01-16 03:00:40 +00:00
+								                    sessionId: sessionIdUsed,
 								                    provider,
 								                    model: model.id,
-												agents: preserve totalTokens on request failure instead of using contextWindow (#34275)

Merged via squash.

Prepared head SHA: f9d111d0a79a07815d476356e98a28df3a0000ba
Co-authored-by: RealKai42 <44634134+RealKai42@users.noreply.github.com>
Co-authored-by: jalehman <550978+jalehman@users.noreply.github.com>
Reviewed-by: @jalehman

											
										
										
											2026-03-05 04:02:22 +08:00
+								                    usageAccumulator,
 								                    lastRunPromptUsage,
 								                    lastAssistant,
 								                    lastTurnTotal,
 								                  }),
-												fix: reset sessions after role ordering conflicts

											
										
										
											2026-01-16 09:03:54 +00:00
+								                  systemPromptReport: attempt.systemPromptReport,
 								                  error: { kind: "role_ordering", message: errorText },
-												fix: sanitize user-facing errors and strip final tags

Co-authored-by: Drake Thomsen <drake.thomsen@example.com>

											
										
										
											2026-01-16 03:00:40 +00:00
+								                },
 								              };
 								            }
-												fix: prevent infinite retry loop for images exceeding 5MB

- Change MAX_IMAGE_BYTES from 6MB to 5MB to match Anthropic API limit
- Add isImageSizeError() to detect image size errors from API
- Handle image size errors with user-friendly message instead of retry
- Prevent failover for image size errors (not retriable)

Fixes #2271

											
										
										
											2026-01-27 22:21:51 +05:30
+								            // Handle image size errors with a user-friendly message (no retry needed)
-												fix: avoid global image size regression

											
										
										
											2026-01-27 15:59:11 -06:00
+								            const imageSizeError = parseImageSizeError(errorText);
 								            if (imageSizeError) {
 								              const maxMb = imageSizeError.maxMb;
 								              const maxMbLabel =
 								                typeof maxMb === "number" && Number.isFinite(maxMb) ? `${maxMb}` : null;
 								              const maxBytesHint = maxMbLabel ? ` (max ${maxMbLabel}MB)` : "";
-												fix: prevent infinite retry loop for images exceeding 5MB

- Change MAX_IMAGE_BYTES from 6MB to 5MB to match Anthropic API limit
- Add isImageSizeError() to detect image size errors from API
- Handle image size errors with user-friendly message instead of retry
- Prevent failover for image size errors (not retriable)

Fixes #2271

											
										
										
											2026-01-27 22:21:51 +05:30
+								              return {
 								                payloads: [
 								                  {
 								                    text:
-												fix: avoid global image size regression

											
										
										
											2026-01-27 15:59:11 -06:00
+								                      `Image too large for the model${maxBytesHint}. ` +
-												fix: prevent infinite retry loop for images exceeding 5MB

- Change MAX_IMAGE_BYTES from 6MB to 5MB to match Anthropic API limit
- Add isImageSizeError() to detect image size errors from API
- Handle image size errors with user-friendly message instead of retry
- Prevent failover for image size errors (not retriable)

Fixes #2271

											
										
										
											2026-01-27 22:21:51 +05:30
+								                      "Please compress or resize the image and try again.",
 								                    isError: true,
 								                  },
 								                ],
 								                meta: {
 								                  durationMs: Date.now() - started,
-												agents: preserve totalTokens on request failure instead of using contextWindow (#34275)

Merged via squash.

Prepared head SHA: f9d111d0a79a07815d476356e98a28df3a0000ba
Co-authored-by: RealKai42 <44634134+RealKai42@users.noreply.github.com>
Co-authored-by: jalehman <550978+jalehman@users.noreply.github.com>
Reviewed-by: @jalehman

											
										
										
											2026-03-05 04:02:22 +08:00
+								                  agentMeta: buildErrorAgentMeta({
-												fix: prevent infinite retry loop for images exceeding 5MB

- Change MAX_IMAGE_BYTES from 6MB to 5MB to match Anthropic API limit
- Add isImageSizeError() to detect image size errors from API
- Handle image size errors with user-friendly message instead of retry
- Prevent failover for image size errors (not retriable)

Fixes #2271

											
										
										
											2026-01-27 22:21:51 +05:30
+								                    sessionId: sessionIdUsed,
 								                    provider,
 								                    model: model.id,
-												agents: preserve totalTokens on request failure instead of using contextWindow (#34275)

Merged via squash.

Prepared head SHA: f9d111d0a79a07815d476356e98a28df3a0000ba
Co-authored-by: RealKai42 <44634134+RealKai42@users.noreply.github.com>
Co-authored-by: jalehman <550978+jalehman@users.noreply.github.com>
Reviewed-by: @jalehman

											
										
										
											2026-03-05 04:02:22 +08:00
+								                    usageAccumulator,
 								                    lastRunPromptUsage,
 								                    lastAssistant,
 								                    lastTurnTotal,
 								                  }),
-												fix: prevent infinite retry loop for images exceeding 5MB

- Change MAX_IMAGE_BYTES from 6MB to 5MB to match Anthropic API limit
- Add isImageSizeError() to detect image size errors from API
- Handle image size errors with user-friendly message instead of retry
- Prevent failover for image size errors (not retriable)

Fixes #2271

											
										
										
											2026-01-27 22:21:51 +05:30
+								                  systemPromptReport: attempt.systemPromptReport,
 								                  error: { kind: "image_size", message: errorText },
 								                },
 								              };
 								            }
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								            const promptFailoverReason = classifyFailoverReason(errorText);
-												fix(agents): handle overloaded failover separately (#38301)

* fix(agents): skip auth-profile failure on overload

* fix(agents): note overload auth-profile fallback fix

* fix(agents): classify overloaded failures separately

* fix(agents): back off before overload failover

* fix(agents): tighten overload probe and backoff state

* fix(agents): persist overloaded cooldown across runs

* fix(agents): tighten overloaded status handling

* test(agents): add overload regression coverage

* fix(agents): restore runner imports after rebase

* test(agents): add overload fallback integration coverage

* fix(agents): harden overloaded failover abort handling

* test(agents): tighten overload classifier coverage

* test(agents): cover all-overloaded fallback exhaustion

* fix(cron): retry overloaded fallback summaries

* fix(cron): treat HTTP 529 as overloaded retry
											
										
										
											2026-03-07 01:42:11 +03:00
+								            const promptProfileFailureReason =
 								              resolveAuthProfileFailureReason(promptFailoverReason);
-												refactor: dedupe auth-profile failure marking and rotation test setup

											
										
										
											2026-02-22 15:43:57 +01:00
+								            await maybeMarkAuthProfileFailure({
 								              profileId: lastProfileId,
-												fix(agents): handle overloaded failover separately (#38301)

* fix(agents): skip auth-profile failure on overload

* fix(agents): note overload auth-profile fallback fix

* fix(agents): classify overloaded failures separately

* fix(agents): back off before overload failover

* fix(agents): tighten overload probe and backoff state

* fix(agents): persist overloaded cooldown across runs

* fix(agents): tighten overloaded status handling

* test(agents): add overload regression coverage

* fix(agents): restore runner imports after rebase

* test(agents): add overload fallback integration coverage

* fix(agents): harden overloaded failover abort handling

* test(agents): tighten overload classifier coverage

* test(agents): cover all-overloaded fallback exhaustion

* fix(cron): retry overloaded fallback summaries

* fix(cron): treat HTTP 529 as overloaded retry
											
										
										
											2026-03-07 01:42:11 +03:00
+								              reason: promptProfileFailureReason,
-												refactor: dedupe auth-profile failure marking and rotation test setup

											
										
										
											2026-02-22 15:43:57 +01:00
+								            });
-												fix(agents): handle overloaded failover separately (#38301)

* fix(agents): skip auth-profile failure on overload

* fix(agents): note overload auth-profile fallback fix

* fix(agents): classify overloaded failures separately

* fix(agents): back off before overload failover

* fix(agents): tighten overload probe and backoff state

* fix(agents): persist overloaded cooldown across runs

* fix(agents): tighten overloaded status handling

* test(agents): add overload regression coverage

* fix(agents): restore runner imports after rebase

* test(agents): add overload fallback integration coverage

* fix(agents): harden overloaded failover abort handling

* test(agents): tighten overload classifier coverage

* test(agents): cover all-overloaded fallback exhaustion

* fix(cron): retry overloaded fallback summaries

* fix(cron): treat HTTP 529 as overloaded retry
											
										
										
											2026-03-07 01:42:11 +03:00
+								            const promptFailoverFailure = isFailoverErrorMessage(errorText);
-												Agents: add embedded error observations (#41336)

Merged via squash.

Prepared head SHA: 490004229862129ceb21939e382658714e23bd68
Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com>
Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com>
Reviewed-by: @altaywtf
											
										
										
											2026-03-09 22:27:05 +03:00
+								            // Capture the failing profile before auth-profile rotation mutates `lastProfileId`.
 								            const failedPromptProfileId = lastProfileId;
 								            const logPromptFailoverDecision = createFailoverDecisionLogger({
 								              stage: "prompt",
 								              runId: params.runId,
 								              rawError: errorText,
 								              failoverReason: promptFailoverReason,
 								              profileFailureReason: promptProfileFailureReason,
 								              provider,
 								              model: modelId,
 								              profileId: failedPromptProfileId,
 								              fallbackConfigured,
 								              aborted,
 								            });
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								            if (
-												fix(agents): handle overloaded failover separately (#38301)

* fix(agents): skip auth-profile failure on overload

* fix(agents): note overload auth-profile fallback fix

* fix(agents): classify overloaded failures separately

* fix(agents): back off before overload failover

* fix(agents): tighten overload probe and backoff state

* fix(agents): persist overloaded cooldown across runs

* fix(agents): tighten overloaded status handling

* test(agents): add overload regression coverage

* fix(agents): restore runner imports after rebase

* test(agents): add overload fallback integration coverage

* fix(agents): harden overloaded failover abort handling

* test(agents): tighten overload classifier coverage

* test(agents): cover all-overloaded fallback exhaustion

* fix(cron): retry overloaded fallback summaries

* fix(cron): treat HTTP 529 as overloaded retry
											
										
										
											2026-03-07 01:42:11 +03:00
+								              promptFailoverFailure &&
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								              promptFailoverReason !== "timeout" &&
 								              (await advanceAuthProfile())
 								            ) {
-												Agents: add embedded error observations (#41336)

Merged via squash.

Prepared head SHA: 490004229862129ceb21939e382658714e23bd68
Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com>
Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com>
Reviewed-by: @altaywtf
											
										
										
											2026-03-09 22:27:05 +03:00
+								              logPromptFailoverDecision("rotate_profile");
-												fix(agents): handle overloaded failover separately (#38301)

* fix(agents): skip auth-profile failure on overload

* fix(agents): note overload auth-profile fallback fix

* fix(agents): classify overloaded failures separately

* fix(agents): back off before overload failover

* fix(agents): tighten overload probe and backoff state

* fix(agents): persist overloaded cooldown across runs

* fix(agents): tighten overloaded status handling

* test(agents): add overload regression coverage

* fix(agents): restore runner imports after rebase

* test(agents): add overload fallback integration coverage

* fix(agents): harden overloaded failover abort handling

* test(agents): tighten overload classifier coverage

* test(agents): cover all-overloaded fallback exhaustion

* fix(cron): retry overloaded fallback summaries

* fix(cron): treat HTTP 529 as overloaded retry
											
										
										
											2026-03-07 01:42:11 +03:00
+								              await maybeBackoffBeforeOverloadFailover(promptFailoverReason);
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								              continue;
 								            }
 								            const fallbackThinking = pickFallbackThinkingLevel({
 								              message: errorText,
 								              attempted: attemptedThinking,
 								            });
 								            if (fallbackThinking) {
 								              log.warn(
 								                `unsupported thinking level for ${provider}/${modelId}; retrying with ${fallbackThinking}`,
 								              );
 								              thinkLevel = fallbackThinking;
 								              continue;
 								            }
-												fix(agents): handle overloaded failover separately (#38301)

* fix(agents): skip auth-profile failure on overload

* fix(agents): note overload auth-profile fallback fix

* fix(agents): classify overloaded failures separately

* fix(agents): back off before overload failover

* fix(agents): tighten overload probe and backoff state

* fix(agents): persist overloaded cooldown across runs

* fix(agents): tighten overloaded status handling

* test(agents): add overload regression coverage

* fix(agents): restore runner imports after rebase

* test(agents): add overload fallback integration coverage

* fix(agents): harden overloaded failover abort handling

* test(agents): tighten overload classifier coverage

* test(agents): cover all-overloaded fallback exhaustion

* fix(cron): retry overloaded fallback summaries

* fix(cron): treat HTTP 529 as overloaded retry
											
										
										
											2026-03-07 01:42:11 +03:00
+								            // Throw FailoverError for prompt-side failover reasons when fallbacks
 								            // are configured so outer model fallback can continue on overload,
 								            // rate-limit, auth, or billing failures.
 								            if (fallbackConfigured && promptFailoverFailure) {
-												Agents: add embedded error observations (#41336)

Merged via squash.

Prepared head SHA: 490004229862129ceb21939e382658714e23bd68
Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com>
Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com>
Reviewed-by: @altaywtf
											
										
										
											2026-03-09 22:27:05 +03:00
+								              const status = resolveFailoverStatus(promptFailoverReason ?? "unknown");
 								              logPromptFailoverDecision("fallback_model", { status });
-												fix(agents): handle overloaded failover separately (#38301)

* fix(agents): skip auth-profile failure on overload

* fix(agents): note overload auth-profile fallback fix

* fix(agents): classify overloaded failures separately

* fix(agents): back off before overload failover

* fix(agents): tighten overload probe and backoff state

* fix(agents): persist overloaded cooldown across runs

* fix(agents): tighten overloaded status handling

* test(agents): add overload regression coverage

* fix(agents): restore runner imports after rebase

* test(agents): add overload fallback integration coverage

* fix(agents): harden overloaded failover abort handling

* test(agents): tighten overload classifier coverage

* test(agents): cover all-overloaded fallback exhaustion

* fix(cron): retry overloaded fallback summaries

* fix(cron): treat HTTP 529 as overloaded retry
											
										
										
											2026-03-07 01:42:11 +03:00
+								              await maybeBackoffBeforeOverloadFailover(promptFailoverReason);
-												fix(agent): Enable model fallback for prompt-phase quota/rate limit errors

When a prompt submission fails with quota or rate limit errors, throw
FailoverError instead of the raw promptError. This enables the model
fallback system to try alternative models.

Previously, rate limit errors during the prompt phase (before streaming)
were thrown directly, bypassing fallback. Only response-phase errors
triggered model fallback.

Now checks if fallback models are configured and the error is failover-
eligible. If so, wraps in FailoverError to trigger the fallback chain.

											
										
										
											2026-01-18 01:29:48 +00:00
+								              throw new FailoverError(errorText, {
 								                reason: promptFailoverReason ?? "unknown",
 								                provider,
 								                model: modelId,
 								                profileId: lastProfileId,
-												Agents: add embedded error observations (#41336)

Merged via squash.

Prepared head SHA: 490004229862129ceb21939e382658714e23bd68
Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com>
Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com>
Reviewed-by: @altaywtf
											
										
										
											2026-03-09 22:27:05 +03:00
+								                status,
-												fix(agent): Enable model fallback for prompt-phase quota/rate limit errors

When a prompt submission fails with quota or rate limit errors, throw
FailoverError instead of the raw promptError. This enables the model
fallback system to try alternative models.

Previously, rate limit errors during the prompt phase (before streaming)
were thrown directly, bypassing fallback. Only response-phase errors
triggered model fallback.

Now checks if fallback models are configured and the error is failover-
eligible. If so, wraps in FailoverError to trigger the fallback chain.

											
										
										
											2026-01-18 01:29:48 +00:00
+								              });
 								            }
-												Agents: add embedded error observations (#41336)

Merged via squash.

Prepared head SHA: 490004229862129ceb21939e382658714e23bd68
Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com>
Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com>
Reviewed-by: @altaywtf
											
										
										
											2026-03-09 22:27:05 +03:00
+								            if (promptFailoverFailure || promptFailoverReason) {
 								              logPromptFailoverDecision("surface_error");
 								            }
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								            throw promptError;
 								          }
 								          const fallbackThinking = pickFallbackThinkingLevel({
 								            message: lastAssistant?.errorMessage,
 								            attempted: attemptedThinking,
 								          });
 								          if (fallbackThinking && !aborted) {
 								            log.warn(
 								              `unsupported thinking level for ${provider}/${modelId}; retrying with ${fallbackThinking}`,
 								            );
 								            thinkLevel = fallbackThinking;
 								            continue;
 								          }
 								          const authFailure = isAuthAssistantError(lastAssistant);
 								          const rateLimitFailure = isRateLimitAssistantError(lastAssistant);
-												fix(errors): show clear billing error instead of cryptic API response (#8391)

* fix(errors): return clear billing error message instead of cryptic raw error (#8136)

When an LLM API provider returns a credit/billing-related error (HTTP 402,
insufficient credits, low balance, etc.), OpenClaw now shows a clear,
actionable message instead of passing through the raw/cryptic error text:

  ⚠️ API provider returned a billing error — your API key has run out of
  credits or has an insufficient balance. Check your provider's billing
  dashboard and top up or switch to a different API key.

Changes:
- formatAssistantErrorText: detect billing errors via isBillingErrorMessage()
  and return a user-friendly message (placed before the generic HTTP/JSON
  error fallthrough)
- sanitizeUserFacingText: same billing detection for the sanitization path
- pi-embedded-runner/run.ts: add billingFailure detection in the profile
  exhaustion fallback, so the FailoverError message is billing-specific
- Added 3 new tests for credit balance, HTTP 402, and insufficient credits

* fix: extract billing error message to shared constant
											
										
										
											2026-02-05 17:58:43 -04:00
+								          const billingFailure = isBillingAssistantError(lastAssistant);
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								          const failoverFailure = isFailoverAssistantError(lastAssistant);
-												chore: migrate to oxlint and oxfmt

Co-authored-by: Christoph Nakazawa <christoph.pojer@gmail.com>

											
										
										
											2026-01-14 14:31:43 +00:00
+								          const assistantFailoverReason = classifyFailoverReason(lastAssistant?.errorMessage ?? "");
-												fix(agents): handle overloaded failover separately (#38301)

* fix(agents): skip auth-profile failure on overload

* fix(agents): note overload auth-profile fallback fix

* fix(agents): classify overloaded failures separately

* fix(agents): back off before overload failover

* fix(agents): tighten overload probe and backoff state

* fix(agents): persist overloaded cooldown across runs

* fix(agents): tighten overloaded status handling

* test(agents): add overload regression coverage

* fix(agents): restore runner imports after rebase

* test(agents): add overload fallback integration coverage

* fix(agents): harden overloaded failover abort handling

* test(agents): tighten overload classifier coverage

* test(agents): cover all-overloaded fallback exhaustion

* fix(cron): retry overloaded fallback summaries

* fix(cron): treat HTTP 529 as overloaded retry
											
										
										
											2026-03-07 01:42:11 +03:00
+								          const assistantProfileFailureReason =
 								            resolveAuthProfileFailureReason(assistantFailoverReason);
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								          const cloudCodeAssistFormatError = attempt.cloudCodeAssistFormatError;
-												fix: sanitize oversized image payloads

											
										
										
											2026-01-18 15:19:25 +00:00
+								          const imageDimensionError = parseImageDimensionError(lastAssistant?.errorMessage ?? "");
-												Agents: add embedded error observations (#41336)

Merged via squash.

Prepared head SHA: 490004229862129ceb21939e382658714e23bd68
Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com>
Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com>
Reviewed-by: @altaywtf
											
										
										
											2026-03-09 22:27:05 +03:00
+								          // Capture the failing profile before auth-profile rotation mutates `lastProfileId`.
 								          const failedAssistantProfileId = lastProfileId;
 								          const logAssistantFailoverDecision = createFailoverDecisionLogger({
 								            stage: "assistant",
 								            runId: params.runId,
 								            rawError: lastAssistant?.errorMessage?.trim(),
 								            failoverReason: assistantFailoverReason,
 								            profileFailureReason: assistantProfileFailureReason,
 								            provider: activeErrorContext.provider,
 								            model: activeErrorContext.model,
 								            profileId: failedAssistantProfileId,
 								            fallbackConfigured,
 								            timedOut,
 								            aborted,
 								          });
-												fix: sanitize oversized image payloads

											
										
										
											2026-01-18 15:19:25 +00:00
-												fix: refresh Copilot token before expiry and retry on auth errors

GitHub Copilot API tokens expire after ~30 minutes. When OpenClaw spawns
a long-running subagent using Copilot as the provider, the token would
expire mid-session with no recovery mechanism, causing 401 auth errors.

This commit adds:
- Periodic token refresh scheduled 5 minutes before expiry
- Auth error detection with automatic token refresh and single retry
- Proper timer cleanup on session shutdown to prevent leaks

The implementation uses a per-attempt retry flag to ensure each auth
error can trigger one refresh+retry cycle without creating infinite
retry loops.

🤖 AI-assisted: This fix was developed with GitHub Copilot CLI assistance.
Testing: Fully tested with 3 new unit tests covering auth retry, retry
reset, and timer cleanup scenarios. All 11 auth rotation tests pass.

											
										
										
											2026-02-04 09:13:59 -03:00
+								          if (
 								            authFailure &&
 								            (await maybeRefreshCopilotForAuthError(
 								              lastAssistant?.errorMessage ?? "",
 								              copilotAuthRetry,
 								            ))
 								          ) {
 								            authRetryPending = true;
 								            continue;
 								          }
-												fix: sanitize oversized image payloads

											
										
										
											2026-01-18 15:19:25 +00:00
+								          if (imageDimensionError && lastProfileId) {
 								            const details = [
 								              imageDimensionError.messageIndex !== undefined
 								                ? `message=${imageDimensionError.messageIndex}`
 								                : null,
 								              imageDimensionError.contentIndex !== undefined
 								                ? `content=${imageDimensionError.contentIndex}`
 								                : null,
 								              imageDimensionError.maxDimensionPx !== undefined
 								                ? `limit=${imageDimensionError.maxDimensionPx}px`
 								                : null,
 								            ]
 								              .filter(Boolean)
 								              .join(" ");
 								            log.warn(
 								              `Profile ${lastProfileId} rejected image payload${details ? ` (${details})` : ""}.`,
 								            );
 								          }
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
-												fix: align timeout cooldown behavior docs/tests (#22622) (thanks @vageeshkumar)

											
										
										
											2026-02-22 15:33:40 +01:00
+								          // Rotate on timeout to try another account/model path in this turn,
 								          // but exclude post-prompt compaction timeouts (model succeeded; no profile issue).
-												fix(agent): prevent session lock deadlock on timeout during compaction (#9855)

Merged via /review-pr -> /prepare-pr -> /merge-pr.

Prepared head SHA: 64a28900f183941a496a6fd5baaa9efcfb38f0f8
Co-authored-by: mverrilli <816450+mverrilli@users.noreply.github.com>
Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com>
Reviewed-by: @gumadeiras

											
										
										
											2026-02-14 14:24:20 -05:00
+								          const shouldRotate =
 								            (!aborted && failoverFailure) || (timedOut && !timedOutDuringCompaction);
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
 								          if (shouldRotate) {
 								            if (lastProfileId) {
-												fix(agents): handle overloaded failover separately (#38301)

* fix(agents): skip auth-profile failure on overload

* fix(agents): note overload auth-profile fallback fix

* fix(agents): classify overloaded failures separately

* fix(agents): back off before overload failover

* fix(agents): tighten overload probe and backoff state

* fix(agents): persist overloaded cooldown across runs

* fix(agents): tighten overloaded status handling

* test(agents): add overload regression coverage

* fix(agents): restore runner imports after rebase

* test(agents): add overload fallback integration coverage

* fix(agents): harden overloaded failover abort handling

* test(agents): tighten overload classifier coverage

* test(agents): cover all-overloaded fallback exhaustion

* fix(cron): retry overloaded fallback summaries

* fix(cron): treat HTTP 529 as overloaded retry
											
										
										
											2026-03-07 01:42:11 +03:00
+								              const reason = timedOut ? "timeout" : assistantProfileFailureReason;
-												fix(agents): skip auth profile cooldown for timeout failures

A timeout is model/network-specific, not an auth issue. Marking the
auth profile as failed on timeout poisons fallback models on the same
provider (e.g. gpt-5.3 timeout would block gpt-5.2 via shared profile
cooldown). The prompt-phase path already guards against this; this
aligns the post-response timeout path to match.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-21 04:12:56 -08:00
+								              // Skip cooldown for timeouts: a timeout is model/network-specific,
 								              // not an auth issue. Marking the profile would poison fallback models
 								              // on the same provider (e.g. gpt-5.3 timeout blocks gpt-5.2).
-												refactor: dedupe auth-profile failure marking and rotation test setup

											
										
										
											2026-02-22 15:43:57 +01:00
+								              await maybeMarkAuthProfileFailure({
 								                profileId: lastProfileId,
 								                reason,
 								              });
-												fix: silence probe timeouts

											
										
										
											2026-01-24 00:04:53 +00:00
+								              if (timedOut && !isProbeSession) {
-												fix: align timeout cooldown behavior docs/tests (#22622) (thanks @vageeshkumar)

											
										
										
											2026-02-22 15:33:40 +01:00
+								                log.warn(`Profile ${lastProfileId} timed out. Trying next account...`);
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								              }
 								              if (cloudCodeAssistFormatError) {
 								                log.warn(
 								                  `Profile ${lastProfileId} hit Cloud Code Assist format error. Tool calls will be sanitized on retry.`,
 								                );
 								              }
 								            }
 								            const rotated = await advanceAuthProfile();
-												chore: Enable "curly" rule to avoid single-statement if confusion/errors.

											
										
										
											2026-01-31 16:19:20 +09:00
+								            if (rotated) {
-												Agents: add embedded error observations (#41336)

Merged via squash.

Prepared head SHA: 490004229862129ceb21939e382658714e23bd68
Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com>
Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com>
Reviewed-by: @altaywtf
											
										
										
											2026-03-09 22:27:05 +03:00
+								              logAssistantFailoverDecision("rotate_profile");
-												fix(agents): handle overloaded failover separately (#38301)

* fix(agents): skip auth-profile failure on overload

* fix(agents): note overload auth-profile fallback fix

* fix(agents): classify overloaded failures separately

* fix(agents): back off before overload failover

* fix(agents): tighten overload probe and backoff state

* fix(agents): persist overloaded cooldown across runs

* fix(agents): tighten overloaded status handling

* test(agents): add overload regression coverage

* fix(agents): restore runner imports after rebase

* test(agents): add overload fallback integration coverage

* fix(agents): harden overloaded failover abort handling

* test(agents): tighten overload classifier coverage

* test(agents): cover all-overloaded fallback exhaustion

* fix(cron): retry overloaded fallback summaries

* fix(cron): treat HTTP 529 as overloaded retry
											
										
										
											2026-03-07 01:42:11 +03:00
+								              await maybeBackoffBeforeOverloadFailover(assistantFailoverReason);
-												chore: Enable "curly" rule to avoid single-statement if confusion/errors.

											
										
										
											2026-01-31 16:19:20 +09:00
+								              continue;
 								            }
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
 								            if (fallbackConfigured) {
-												fix(agents): handle overloaded failover separately (#38301)

* fix(agents): skip auth-profile failure on overload

* fix(agents): note overload auth-profile fallback fix

* fix(agents): classify overloaded failures separately

* fix(agents): back off before overload failover

* fix(agents): tighten overload probe and backoff state

* fix(agents): persist overloaded cooldown across runs

* fix(agents): tighten overloaded status handling

* test(agents): add overload regression coverage

* fix(agents): restore runner imports after rebase

* test(agents): add overload fallback integration coverage

* fix(agents): harden overloaded failover abort handling

* test(agents): tighten overload classifier coverage

* test(agents): cover all-overloaded fallback exhaustion

* fix(cron): retry overloaded fallback summaries

* fix(cron): treat HTTP 529 as overloaded retry
											
										
										
											2026-03-07 01:42:11 +03:00
+								              await maybeBackoffBeforeOverloadFailover(assistantFailoverReason);
-												fix: sanitize user-facing errors and strip final tags

Co-authored-by: Drake Thomsen <drake.thomsen@example.com>

											
										
										
											2026-01-16 03:00:40 +00:00
+								              // Prefer formatted error message (user-friendly) over raw errorMessage
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								              const message =
 								                (lastAssistant
 								                  ? formatAssistantErrorText(lastAssistant, {
 								                      cfg: params.config,
 								                      sessionKey: params.sessionKey ?? params.sessionId,
-												fix: include provider and model name in billing error message (#20510)

Merged via /review-pr -> /prepare-pr -> /merge-pr.

Prepared head SHA: 40dbdf62e8952dd6c5afcb9ce2a73199f3f532a6
Co-authored-by: echoVic <16428813+echoVic@users.noreply.github.com>
Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com>
Reviewed-by: @gumadeiras

											
										
										
											2026-02-19 10:56:00 +08:00
+								                      provider: activeErrorContext.provider,
 								                      model: activeErrorContext.model,
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								                    })
-												fix: sanitize user-facing errors and strip final tags

Co-authored-by: Drake Thomsen <drake.thomsen@example.com>

											
										
										
											2026-01-16 03:00:40 +00:00
+								                  : undefined) ||
 								                lastAssistant?.errorMessage?.trim() ||
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								                (timedOut
 								                  ? "LLM request timed out."
 								                  : rateLimitFailure
 								                    ? "LLM request rate limited."
-												fix(errors): show clear billing error instead of cryptic API response (#8391)

* fix(errors): return clear billing error message instead of cryptic raw error (#8136)

When an LLM API provider returns a credit/billing-related error (HTTP 402,
insufficient credits, low balance, etc.), OpenClaw now shows a clear,
actionable message instead of passing through the raw/cryptic error text:

  ⚠️ API provider returned a billing error — your API key has run out of
  credits or has an insufficient balance. Check your provider's billing
  dashboard and top up or switch to a different API key.

Changes:
- formatAssistantErrorText: detect billing errors via isBillingErrorMessage()
  and return a user-friendly message (placed before the generic HTTP/JSON
  error fallthrough)
- sanitizeUserFacingText: same billing detection for the sanitization path
- pi-embedded-runner/run.ts: add billingFailure detection in the profile
  exhaustion fallback, so the FailoverError message is billing-specific
- Added 3 new tests for credit balance, HTTP 402, and insufficient credits

* fix: extract billing error message to shared constant
											
										
										
											2026-02-05 17:58:43 -04:00
+								                    : billingFailure
-												fix: include provider and model name in billing error message (#20510)

Merged via /review-pr -> /prepare-pr -> /merge-pr.

Prepared head SHA: 40dbdf62e8952dd6c5afcb9ce2a73199f3f532a6
Co-authored-by: echoVic <16428813+echoVic@users.noreply.github.com>
Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com>
Reviewed-by: @gumadeiras

											
										
										
											2026-02-19 10:56:00 +08:00
+								                      ? formatBillingErrorMessage(
 								                          activeErrorContext.provider,
 								                          activeErrorContext.model,
 								                        )
-												fix(errors): show clear billing error instead of cryptic API response (#8391)

* fix(errors): return clear billing error message instead of cryptic raw error (#8136)

When an LLM API provider returns a credit/billing-related error (HTTP 402,
insufficient credits, low balance, etc.), OpenClaw now shows a clear,
actionable message instead of passing through the raw/cryptic error text:

  ⚠️ API provider returned a billing error — your API key has run out of
  credits or has an insufficient balance. Check your provider's billing
  dashboard and top up or switch to a different API key.

Changes:
- formatAssistantErrorText: detect billing errors via isBillingErrorMessage()
  and return a user-friendly message (placed before the generic HTTP/JSON
  error fallthrough)
- sanitizeUserFacingText: same billing detection for the sanitization path
- pi-embedded-runner/run.ts: add billingFailure detection in the profile
  exhaustion fallback, so the FailoverError message is billing-specific
- Added 3 new tests for credit balance, HTTP 402, and insufficient credits

* fix: extract billing error message to shared constant
											
										
										
											2026-02-05 17:58:43 -04:00
+								                      : authFailure
 								                        ? "LLM request unauthorized."
 								                        : "LLM request failed.");
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								              const status =
 								                resolveFailoverStatus(assistantFailoverReason ?? "unknown") ??
 								                (isTimeoutErrorMessage(message) ? 408 : undefined);
-												Agents: add embedded error observations (#41336)

Merged via squash.

Prepared head SHA: 490004229862129ceb21939e382658714e23bd68
Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com>
Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com>
Reviewed-by: @altaywtf
											
										
										
											2026-03-09 22:27:05 +03:00
+								              logAssistantFailoverDecision("fallback_model", { status });
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								              throw new FailoverError(message, {
 								                reason: assistantFailoverReason ?? "unknown",
-												fix: include provider and model name in billing error message (#20510)

Merged via /review-pr -> /prepare-pr -> /merge-pr.

Prepared head SHA: 40dbdf62e8952dd6c5afcb9ce2a73199f3f532a6
Co-authored-by: echoVic <16428813+echoVic@users.noreply.github.com>
Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com>
Reviewed-by: @gumadeiras

											
										
										
											2026-02-19 10:56:00 +08:00
+								                provider: activeErrorContext.provider,
 								                model: activeErrorContext.model,
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								                profileId: lastProfileId,
 								                status,
 								              });
 								            }
-												Agents: add embedded error observations (#41336)

Merged via squash.

Prepared head SHA: 490004229862129ceb21939e382658714e23bd68
Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com>
Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com>
Reviewed-by: @altaywtf
											
										
										
											2026-03-09 22:27:05 +03:00
+								            logAssistantFailoverDecision("surface_error");
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								          }
-												fix: context overflow compaction and subagent announce improvements (#11664) (thanks @tyler6204)

* initial commit

* feat: implement deriveSessionTotalTokens function and update usage tests

* Added deriveSessionTotalTokens function to calculate total tokens based on usage and context tokens.
* Updated usage tests to include cases for derived session total tokens.
* Refactored session usage calculations in multiple files to utilize the new function for improved accuracy.

* fix: restore overflow truncation fallback + changelog/test hardening (#11551) (thanks @tyler6204)
											
										
										
											2026-02-07 20:02:32 -08:00
+								          const usage = toNormalizedUsage(usageAccumulator);
-												fix(agent): isolate last-turn total in token usage reporting (#17016)

recordAssistantUsage accumulated cacheRead across the entire multi-turn
run, and totalTokens was clamped to contextTokens. This caused
session_status to report 100% context usage regardless of actual load.

Changes:
- run.ts: capture lastTurnTotal from the most recent model call and
  inject it into the normalized usage before it reaches agentMeta.
- usage-reporting.test.ts: verify usage.total reflects current turn,
  not accumulated total.

Fixes #17016

											
										
										
											2026-02-16 13:44:22 +01:00
+								          if (usage && lastTurnTotal && lastTurnTotal > 0) {
 								            usage.total = lastTurnTotal;
 								          }
-												fix: update totalTokens after compaction using last-call usage (#15018)

Merged via /review-pr -> /prepare-pr -> /merge-pr.

Prepared head SHA: 9214291bf7e9e62ba8661aa46b4739113794056a
Co-authored-by: shtse8 <8020099+shtse8@users.noreply.github.com>
Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com>
Reviewed-by: @gumadeiras

											
										
										
											2026-02-12 23:02:30 +00:00
+								          // Extract the last individual API call's usage for context-window
 								          // utilization display. The accumulated `usage` sums input tokens
 								          // across all calls (tool-use loops, compaction retries), which
 								          // overstates the actual context size. `lastCallUsage` reflects only
 								          // the final call, giving an accurate snapshot of current context.
 								          const lastCallUsage = normalizeUsage(lastAssistant?.usage as UsageLike);
-												fix(agents): stabilize overflow compaction retries and session context accounting (openclaw#14102) thanks @vpesh

Verified:
- CI checks for commit 86a7ecb45ebf0be61dce9261398000524fd9fab6
- Rebase conflict resolution for compatibility with latest main

Co-authored-by: vpesh <9496634+vpesh@users.noreply.github.com>

											
										
										
											2026-02-13 00:53:13 +01:00
+								          const promptTokens = derivePromptTokens(lastRunPromptUsage);
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								          const agentMeta: EmbeddedPiAgentMeta = {
 								            sessionId: sessionIdUsed,
 								            provider: lastAssistant?.provider ?? provider,
 								            model: lastAssistant?.model ?? model.id,
 								            usage,
-												fix: update totalTokens after compaction using last-call usage (#15018)

Merged via /review-pr -> /prepare-pr -> /merge-pr.

Prepared head SHA: 9214291bf7e9e62ba8661aa46b4739113794056a
Co-authored-by: shtse8 <8020099+shtse8@users.noreply.github.com>
Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com>
Reviewed-by: @gumadeiras

											
										
										
											2026-02-12 23:02:30 +00:00
+								            lastCallUsage: lastCallUsage ?? undefined,
-												fix(agents): stabilize overflow compaction retries and session context accounting (openclaw#14102) thanks @vpesh

Verified:
- CI checks for commit 86a7ecb45ebf0be61dce9261398000524fd9fab6
- Rebase conflict resolution for compatibility with latest main

Co-authored-by: vpesh <9496634+vpesh@users.noreply.github.com>

											
										
										
											2026-02-13 00:53:13 +01:00
+								            promptTokens,
-												fix: context overflow compaction and subagent announce improvements (#11664) (thanks @tyler6204)

* initial commit

* feat: implement deriveSessionTotalTokens function and update usage tests

* Added deriveSessionTotalTokens function to calculate total tokens based on usage and context tokens.
* Updated usage tests to include cases for derived session total tokens.
* Refactored session usage calculations in multiple files to utilize the new function for improved accuracy.

* fix: restore overflow truncation fallback + changelog/test hardening (#11551) (thanks @tyler6204)
											
										
										
											2026-02-07 20:02:32 -08:00
+								            compactionCount: autoCompactionCount > 0 ? autoCompactionCount : undefined,
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								          };
 								          const payloads = buildEmbeddedRunPayloads({
 								            assistantTexts: attempt.assistantTexts,
 								            toolMetas: attempt.toolMetas,
 								            lastAssistant: attempt.lastAssistant,
-												Agents: surface tool failures without assistant output

											
										
										
											2026-01-18 18:35:03 +05:30
+								            lastToolError: attempt.lastToolError,
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								            config: params.config,
 								            sessionKey: params.sessionKey ?? params.sessionId,
-												fix: include provider and model name in billing error message (#20510)

Merged via /review-pr -> /prepare-pr -> /merge-pr.

Prepared head SHA: 40dbdf62e8952dd6c5afcb9ce2a73199f3f532a6
Co-authored-by: echoVic <16428813+echoVic@users.noreply.github.com>
Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com>
Reviewed-by: @gumadeiras

											
										
										
											2026-02-19 10:56:00 +08:00
+								            provider: activeErrorContext.provider,
 								            model: activeErrorContext.model,
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								            verboseLevel: params.verboseLevel,
 								            reasoningLevel: params.reasoningLevel,
-												fix: format verbose tool output by channel

											
										
										
											2026-01-17 10:17:57 +00:00
+								            toolResultFormat: resolvedToolResultFormat,
-												Heartbeat: allow suppressing tool warnings (#18497)

* Heartbeat: allow suppressing tool warnings

* Changelog: note heartbeat tool-warning suppression
											
										
										
											2026-02-16 13:29:24 -06:00
+								            suppressToolErrorWarnings: params.suppressToolErrorWarnings,
-												fix: stop sending tool summaries to channels

											
										
										
											2026-01-25 11:54:20 +00:00
+								            inlineToolResultsAllowed: false,
-												Agents/Replies: scope done fallback to direct sessions

											
										
										
											2026-02-22 13:30:16 -08:00
+								            didSendViaMessagingTool: attempt.didSendViaMessagingTool,
-												Telegram: exec approvals for OpenCode/Codex (#37233)

Merged via squash.

Prepared head SHA: f2433790941841ade0efe6292ff4909b2edd6f18
Co-authored-by: huntharo <5617868+huntharo@users.noreply.github.com>
Co-authored-by: huntharo <5617868+huntharo@users.noreply.github.com>
Reviewed-by: @huntharo
											
										
										
											2026-03-09 23:04:35 -04:00
+								            didSendDeterministicApprovalPrompt: attempt.didSendDeterministicApprovalPrompt,
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								          });
-												fix (agents): return timeout reply on empty timed-out runs

											
										
										
											2026-02-14 20:32:45 -08:00
+								          // Timeout aborts can leave the run without any assistant payloads.
 								          // Emit an explicit timeout error instead of silently completing, so
 								          // callers do not lose the turn as an orphaned user message.
 								          if (timedOut && !timedOutDuringCompaction && payloads.length === 0) {
 								            return {
 								              payloads: [
 								                {
 								                  text:
 								                    "Request timed out before a response was generated. " +
 								                    "Please try again, or increase `agents.defaults.timeoutSeconds` in your config.",
 								                  isError: true,
 								                },
 								              ],
 								              meta: {
 								                durationMs: Date.now() - started,
 								                agentMeta,
 								                aborted,
 								                systemPromptReport: attempt.systemPromptReport,
 								              },
 								              didSendViaMessagingTool: attempt.didSendViaMessagingTool,
-												Telegram: exec approvals for OpenCode/Codex (#37233)

Merged via squash.

Prepared head SHA: f2433790941841ade0efe6292ff4909b2edd6f18
Co-authored-by: huntharo <5617868+huntharo@users.noreply.github.com>
Co-authored-by: huntharo <5617868+huntharo@users.noreply.github.com>
Reviewed-by: @huntharo
											
										
										
											2026-03-09 23:04:35 -04:00
+								              didSendDeterministicApprovalPrompt: attempt.didSendDeterministicApprovalPrompt,
-												fix (agents): return timeout reply on empty timed-out runs

											
										
										
											2026-02-14 20:32:45 -08:00
+								              messagingToolSentTexts: attempt.messagingToolSentTexts,
-												fix(discord): add media dedup production code for messaging tool pipeline

Wire media URL tracking through the embedded agent pipeline so that
media already sent via messaging tools is not delivered again by the
reply dispatcher.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-16 20:41:41 +01:00
+								              messagingToolSentMediaUrls: attempt.messagingToolSentMediaUrls,
-												fix (agents): return timeout reply on empty timed-out runs

											
										
										
											2026-02-14 20:32:45 -08:00
+								              messagingToolSentTargets: attempt.messagingToolSentTargets,
-												Agent: guard reminder promises behind cron scheduling

											
										
										
											2026-02-16 13:34:09 -08:00
+								              successfulCronAdds: attempt.successfulCronAdds,
-												fix (agents): return timeout reply on empty timed-out runs

											
										
										
											2026-02-14 20:32:45 -08:00
+								            };
 								          }
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								          log.debug(
 								            `embedded run done: runId=${params.runId} sessionId=${params.sessionId} durationMs=${Date.now() - started} aborted=${aborted}`,
 								          );
 								          if (lastProfileId) {
 								            await markAuthProfileGood({
 								              store: authStore,
 								              provider,
 								              profileId: lastProfileId,
-												fix: honor user-pinned profiles and search ranking

											
										
										
											2026-01-23 03:05:01 +00:00
+								              agentDir: params.agentDir,
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								            });
 								            await markAuthProfileUsed({
 								              store: authStore,
 								              profileId: lastProfileId,
-												fix: honor user-pinned profiles and search ranking

											
										
										
											2026-01-23 03:05:01 +00:00
+								              agentDir: params.agentDir,
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								            });
 								          }
 								          return {
 								            payloads: payloads.length ? payloads : undefined,
 								            meta: {
 								              durationMs: Date.now() - started,
 								              agentMeta,
 								              aborted,
-												feat: add /context prompt breakdown

											
										
										
											2026-01-15 01:06:19 +00:00
+								              systemPromptReport: attempt.systemPromptReport,
-												feat(gateway): implement OpenResponses /v1/responses endpoint phase 2

- Add input_image and input_file support with SSRF protection
- Add client-side tools (Hosted Tools) support
- Add turn-based tool flow with function_call_output handling
- Export buildAgentPrompt for testing

											
										
										
											2026-01-19 12:43:00 +01:00
+								              // Handle client tool calls (OpenResponses hosted tools)
-												fix(gateway): flush throttled delta before emitChatFinal (#24856)

* fix(gateway): flush throttled delta before emitChatFinal

The 150ms throttle in emitChatDelta can suppress the last text chunk
before emitChatFinal fires, causing streaming clients (e.g. ACP) to
receive truncated responses. The final event carries the complete text,
but clients that build responses incrementally from deltas miss the
tail end.

Flush one last unthrottled delta with the complete buffered text
immediately before sending the final event. This ensures all streaming
consumers have the full response without needing to reconcile deltas
against the final payload.

* fix(gateway): avoid duplicate delta flush when buffer unchanged

Track the text length at the time of the last broadcast. The flush in
emitChatFinal now only sends a delta if the buffer has grown since the
last broadcast, preventing duplicate sends when the final delta passed
the 150ms throttle and was already broadcast.

* fix(gateway): honor heartbeat suppression in final delta flush

* test(gateway): add final delta flush and dedupe coverage

* fix(gateway): skip final flush for silent lead fragments

* docs(changelog): note gateway final-delta flush fix credits

---------

Co-authored-by: Jonathan Taylor <visionik@pobox.com>
Co-authored-by: Vincent Koc <vincentkoc@ieee.org>
											
										
										
											2026-03-02 23:45:46 -05:00
+								              // Propagate the LLM stop reason so callers (lifecycle events,
 								              // ACP bridge) can distinguish end_turn from max_tokens.
 								              stopReason: attempt.clientToolCall
 								                ? "tool_calls"
 								                : (lastAssistant?.stopReason as string | undefined),
-												feat(gateway): implement OpenResponses /v1/responses endpoint phase 2

- Add input_image and input_file support with SSRF protection
- Add client-side tools (Hosted Tools) support
- Add turn-based tool flow with function_call_output handling
- Export buildAgentPrompt for testing

											
										
										
											2026-01-19 12:43:00 +01:00
+								              pendingToolCalls: attempt.clientToolCall
 								                ? [
 								                    {
-												fix: sanitize tool call IDs in agent loop for Mistral strict9 format (#23595) (#23698)

* fix: sanitize tool call IDs in agent loop for Mistral strict9 format (#23595)

Mistral requires tool call IDs to be exactly 9 alphanumeric characters
([a-zA-Z0-9]{9}). The existing sanitizeToolCallIdsForCloudCodeAssist
mechanism only ran on historical messages at attempt start via
sanitizeSessionHistory, but the pi-agent-core agent loop's internal
tool call → tool result cycles bypassed that path entirely.

Changes:
- Wrap streamFn (like dropThinkingBlocks) so every outbound request
  sees sanitized tool call IDs when the transcript policy requires it
- Replace call_${Date.now()} in pendingToolCalls with a 9-char hex ID
  generated from crypto.randomBytes
- Add Mistral tool call ID error pattern to ERROR_PATTERNS.format so
  the error is correctly classified for retry/rotation

* Changelog: document Mistral strict9 tool-call ID fix

---------

Co-authored-by: echoVic <AkiraVic@outlook.com>
Co-authored-by: Vincent Koc <vincentkoc@ieee.org>
											
										
										
											2026-02-23 02:37:12 +08:00
+								                      id: randomBytes(5).toString("hex").slice(0, 9),
-												feat(gateway): implement OpenResponses /v1/responses endpoint phase 2

- Add input_image and input_file support with SSRF protection
- Add client-side tools (Hosted Tools) support
- Add turn-based tool flow with function_call_output handling
- Export buildAgentPrompt for testing

											
										
										
											2026-01-19 12:43:00 +01:00
+								                      name: attempt.clientToolCall.name,
 								                      arguments: JSON.stringify(attempt.clientToolCall.params),
 								                    },
 								                  ]
 								                : undefined,
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								            },
 								            didSendViaMessagingTool: attempt.didSendViaMessagingTool,
-												Telegram: exec approvals for OpenCode/Codex (#37233)

Merged via squash.

Prepared head SHA: f2433790941841ade0efe6292ff4909b2edd6f18
Co-authored-by: huntharo <5617868+huntharo@users.noreply.github.com>
Co-authored-by: huntharo <5617868+huntharo@users.noreply.github.com>
Reviewed-by: @huntharo
											
										
										
											2026-03-09 23:04:35 -04:00
+								            didSendDeterministicApprovalPrompt: attempt.didSendDeterministicApprovalPrompt,
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								            messagingToolSentTexts: attempt.messagingToolSentTexts,
-												fix(discord): add media dedup production code for messaging tool pipeline

Wire media URL tracking through the embedded agent pipeline so that
media already sent via messaging tools is not delivered again by the
reply dispatcher.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-16 20:41:41 +01:00
+								            messagingToolSentMediaUrls: attempt.messagingToolSentMediaUrls,
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								            messagingToolSentTargets: attempt.messagingToolSentTargets,
-												Agent: guard reminder promises behind cron scheduling

											
										
										
											2026-02-16 13:34:09 -08:00
+								            successfulCronAdds: attempt.successfulCronAdds,
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								          };
 								        }
 								      } finally {
-												feature(context): extend plugin system to support custom context management (#22201)

* feat(context-engine): add ContextEngine interface and registry

Introduce the pluggable ContextEngine abstraction that allows external
plugins to register custom context management strategies.

- ContextEngine interface with lifecycle methods: bootstrap, ingest,
  ingestBatch, afterTurn, assemble, compact, prepareSubagentSpawn,
  onSubagentEnded, dispose
- Module-level singleton registry with registerContextEngine() and
  resolveContextEngine() (config-driven slot selection)
- LegacyContextEngine: pass-through implementation wrapping existing
  compaction behavior for 100% backward compatibility
- ensureContextEnginesInitialized() guard for safe one-time registration
- 19 tests covering contract, registry, resolution, and legacy parity

* feat(plugins): add context-engine slot and registerContextEngine API

Wire the ContextEngine abstraction into the plugin system so external
plugins can register context engines via the standard plugin API.

- Add 'context-engine' to PluginKind union type
- Add 'contextEngine' slot to PluginSlotsConfig (default: 'legacy')
- Wire registerContextEngine() through OpenClawPluginApi
- Export ContextEngine types from plugin-sdk for external consumers
- Restore proper slot-based resolution in registry

* feat(context-engine): wire ContextEngine into agent run lifecycle

Integrate the ContextEngine abstraction into the core agent run path:

- Resolve context engine once per run (reused across retries)
- Bootstrap: hydrate canonical store from session file on first run
- Assemble: route context assembly through pluggable engine
- Auto-compaction guard: disable built-in auto-compaction when
  the engine declares ownsCompaction (prevents double-compaction)
- AfterTurn: post-turn lifecycle hook for ingest + background
  compaction decisions
- Overflow compaction: route through contextEngine.compact()
- Dispose: clean up engine resources in finally block
- Notify context engine on subagent lifecycle events

Legacy engine: all lifecycle methods are pass-through/no-op, preserving
100% backward compatibility for users without a context engine plugin.

* feat(plugins): add scoped subagent methods and gateway request scope

Expose runtime.subagent.{run, waitForRun, getSession, deleteSession}
so external plugins can spawn sub-agent sessions without raw gateway
dispatch access.

Uses AsyncLocalStorage request-scope bridge to dispatch internally via
handleGatewayRequest with a synthetic operator client. Methods are only
available during gateway request handling.

- Symbol.for-backed global singleton for cross-module-reload safety
- Fallback gateway context for non-WS dispatch paths (Telegram/WhatsApp)
- Set gateway request scope for all handlers, not just plugin handlers
- 3 staleness tests for fallback context hardening

* feat(context-engine): route /compact and sessions.get through context engine

Wire the /compact command and sessions.get handler through the pluggable
ContextEngine interface.

- Thread tokenBudget and force parameters to context engine compact
- Route /compact through contextEngine.compact() when registered
- Wire sessions.get as runtime alias for plugin subagent dispatch
- Add .pebbles/ to .gitignore

* style: format with oxfmt 0.33.0

Fix duplicate import (ControlUiRootState in server.impl.ts) and
import ordering across all changed files.

* fix: update extension test mocks for context-engine types

Add missing subagent property to bluebubbles PluginRuntime mock.
Add missing registerContextEngine to lobster OpenClawPluginApi mock.

* fix(subagents): keep deferred delete cleanup retryable

* style: format run attempt for CI

* fix(rebase): remove duplicate embedded-run imports

* test: add missing gateway context mock export

* fix: pass resolved auth profile into afterTurn compaction

Ensure the embedded runner forwards resolved auth profile context into
legacy context-engine compaction params on the normal afterTurn path,
matching overflow compaction behavior. This allows downstream LCM
summarization to use the intended provider auth/profile consistently.

Also fix strict TS typing in external-link token dedupe and align an
attempt unit test reasoningLevel value with the current ReasoningLevel
enum.

Regeneration-Prompt: |
  We were debugging context-engine compaction where downstream summary
  calls were missing the right auth/profile context in normal afterTurn
  flow, while overflow compaction already propagated it. Preserve current
  behavior and keep changes additive: thread the resolved authProfileId
  through run -> attempt -> legacy compaction param builder without
  broad refactors.

  Add tests that prove the auth profile is included in afterTurn legacy
  params and that overflow compaction still passes it through run
  attempts. Keep existing APIs stable, and only adjust small type issues
  needed for strict compilation.

* fix: remove duplicate imports from rebase

* feat: add context-engine system prompt additions

* fix(rebase): dedupe attempt import declarations

* test: fix fetch mock typing in ollama autodiscovery

* fix(test): add registerContextEngine to diffs extension mock APIs

* test(windows): use path.delimiter in ios-team-id fixture PATH

* test(cron): add model formatting and precedence edge case tests

Covers:
- Provider/model string splitting (whitespace, nested paths, empty segments)
- Provider normalization (casing, aliases like bedrock→amazon-bedrock)
- Anthropic model alias normalization (opus-4.5→claude-opus-4-5)
- Precedence: job payload > session override > config default
- Sequential runs with different providers (CI flake regression pattern)
- forceNew session preserving stored model overrides
- Whitespace/empty model string edge cases
- Config model as string vs object format

* test(cron): fix model formatting test config types

* test(phone-control): add registerContextEngine to mock API

* fix: re-export ChannelKind from config-reload-plan

* fix: add subagent mock to plugin-runtime-mock test util

* docs: add changelog fragment for context engine PR #22201
											
										
										
											2026-03-06 05:31:59 -08:00
+								        await contextEngine.dispose?.();
-												fix: refresh Copilot token before expiry and retry on auth errors

GitHub Copilot API tokens expire after ~30 minutes. When OpenClaw spawns
a long-running subagent using Copilot as the provider, the token would
expire mid-session with no recovery mechanism, causing 401 auth errors.

This commit adds:
- Periodic token refresh scheduled 5 minutes before expiry
- Auth error detection with automatic token refresh and single retry
- Proper timer cleanup on session shutdown to prevent leaks

The implementation uses a per-attempt retry flag to ensure each auth
error can trigger one refresh+retry cycle without creating infinite
retry loops.

🤖 AI-assisted: This fix was developed with GitHub Copilot CLI assistance.
Testing: Fully tested with 3 new unit tests covering auth retry, retry
reset, and timer cleanup scenarios. All 11 auth rotation tests pass.

											
										
										
											2026-02-04 09:13:59 -03:00
+								        stopCopilotRefreshTimer();
-												refactor(src): split oversized modules

											
										
										
											2026-01-14 01:08:15 +00:00
+								        process.chdir(prevCwd);
 								      }
 								    }),
 								  );
 								}