import fs from "node:fs/promises"; import type { ThinkLevel } from "../../auto-reply/thinking.js"; import { getGlobalHookRunner } from "../../plugins/hook-runner-global.js"; import { enqueueCommandInLane } from "../../process/command-queue.js"; import { isMarkdownCapableMessageChannel } from "../../utils/message-channel.js"; import { resolveOpenClawAgentDir } from "../agent-paths.js"; import { isProfileInCooldown, markAuthProfileFailure, markAuthProfileGood, markAuthProfileUsed, } from "../auth-profiles.js"; import { CONTEXT_WINDOW_HARD_MIN_TOKENS, CONTEXT_WINDOW_WARN_BELOW_TOKENS, evaluateContextWindowGuard, resolveContextWindowInfo, } from "../context-window-guard.js"; import { DEFAULT_CONTEXT_TOKENS, DEFAULT_MODEL, DEFAULT_PROVIDER } from "../defaults.js"; import { FailoverError, resolveFailoverStatus } from "../failover-error.js"; import { ensureAuthProfileStore, getApiKeyForModel, resolveAuthProfileOrder, type ResolvedProviderAuth, } from "../model-auth.js"; import { normalizeProviderId } from "../model-selection.js"; import { ensureOpenClawModelsJson } from "../models-config.js"; import { formatBillingErrorMessage, classifyFailoverReason, formatAssistantErrorText, isAuthAssistantError, isBillingAssistantError, isCompactionFailureError, isLikelyContextOverflowError, isFailoverAssistantError, isFailoverErrorMessage, parseImageSizeError, parseImageDimensionError, isRateLimitAssistantError, isTimeoutErrorMessage, pickFallbackThinkingLevel, type FailoverReason, } from "../pi-embedded-helpers.js"; import { derivePromptTokens, normalizeUsage, type UsageLike } from "../usage.js"; import { redactRunIdentifier, resolveRunWorkspaceDir } from "../workspace-run.js"; import { compactEmbeddedPiSessionDirect } from "./compact.js"; import { resolveGlobalLane, resolveSessionLane } from "./lanes.js"; import { log } from "./logger.js"; import { resolveModel } from "./model.js"; import { runEmbeddedAttempt } from "./run/attempt.js"; import type { RunEmbeddedPiAgentParams } from "./run/params.js"; import { buildEmbeddedRunPayloads } from "./run/payloads.js"; import { truncateOversizedToolResultsInSession, sessionLikelyHasOversizedToolResults, } from "./tool-result-truncation.js"; import type { EmbeddedPiAgentMeta, EmbeddedPiRunResult } from "./types.js"; import { describeUnknownError } from "./utils.js"; type ApiKeyInfo = ResolvedProviderAuth; // Avoid Anthropic's refusal test token poisoning session transcripts. const ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL = "ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL"; const ANTHROPIC_MAGIC_STRING_REPLACEMENT = "ANTHROPIC MAGIC STRING TRIGGER REFUSAL (redacted)"; function scrubAnthropicRefusalMagic(prompt: string): string { if (!prompt.includes(ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL)) { return prompt; } return prompt.replaceAll( ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL, ANTHROPIC_MAGIC_STRING_REPLACEMENT, ); } type UsageAccumulator = { input: number; output: number; cacheRead: number; cacheWrite: number; total: number; /** Cache fields from the most recent API call (not accumulated). */ lastCacheRead: number; lastCacheWrite: number; lastInput: number; }; const createUsageAccumulator = (): UsageAccumulator => ({ input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0, lastCacheRead: 0, lastCacheWrite: 0, lastInput: 0, }); function createCompactionDiagId(): string { return `ovf-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}`; } const hasUsageValues = ( usage: ReturnType, ): usage is NonNullable> => !!usage && [usage.input, usage.output, usage.cacheRead, usage.cacheWrite, usage.total].some( (value) => typeof value === "number" && Number.isFinite(value) && value > 0, ); const mergeUsageIntoAccumulator = ( target: UsageAccumulator, usage: ReturnType, ) => { if (!hasUsageValues(usage)) { return; } target.input += usage.input ?? 0; target.output += usage.output ?? 0; target.cacheRead += usage.cacheRead ?? 0; target.cacheWrite += usage.cacheWrite ?? 0; target.total += usage.total ?? (usage.input ?? 0) + (usage.output ?? 0) + (usage.cacheRead ?? 0) + (usage.cacheWrite ?? 0); // Track the most recent API call's cache fields for accurate context-size reporting. // Accumulated cache totals inflate context size when there are multiple tool-call round-trips, // since each call reports cacheRead ≈ current_context_size. target.lastCacheRead = usage.cacheRead ?? 0; target.lastCacheWrite = usage.cacheWrite ?? 0; target.lastInput = usage.input ?? 0; }; const toNormalizedUsage = (usage: UsageAccumulator) => { const hasUsage = usage.input > 0 || usage.output > 0 || usage.cacheRead > 0 || usage.cacheWrite > 0 || usage.total > 0; if (!hasUsage) { return undefined; } // Use the LAST API call's cache fields for context-size calculation. // The accumulated cacheRead/cacheWrite inflate context size because each tool-call // round-trip reports cacheRead ≈ current_context_size, and summing N calls gives // N × context_size which gets clamped to contextWindow (e.g. 200k). // See: https://github.com/openclaw/openclaw/issues/13698 // // We use lastInput/lastCacheRead/lastCacheWrite (from the most recent API call) for // cache-related fields, but keep accumulated output (total generated text this turn). const lastPromptTokens = usage.lastInput + usage.lastCacheRead + usage.lastCacheWrite; return { input: usage.lastInput || undefined, output: usage.output || undefined, cacheRead: usage.lastCacheRead || undefined, cacheWrite: usage.lastCacheWrite || undefined, total: lastPromptTokens + usage.output || undefined, }; }; export async function runEmbeddedPiAgent( params: RunEmbeddedPiAgentParams, ): Promise { const sessionLane = resolveSessionLane(params.sessionKey?.trim() || params.sessionId); const globalLane = resolveGlobalLane(params.lane); const enqueueGlobal = params.enqueue ?? ((task, opts) => enqueueCommandInLane(globalLane, task, opts)); const enqueueSession = params.enqueue ?? ((task, opts) => enqueueCommandInLane(sessionLane, task, opts)); const channelHint = params.messageChannel ?? params.messageProvider; const resolvedToolResultFormat = params.toolResultFormat ?? (channelHint ? isMarkdownCapableMessageChannel(channelHint) ? "markdown" : "plain" : "markdown"); const isProbeSession = params.sessionId?.startsWith("probe-") ?? false; return enqueueSession(() => enqueueGlobal(async () => { const started = Date.now(); const workspaceResolution = resolveRunWorkspaceDir({ workspaceDir: params.workspaceDir, sessionKey: params.sessionKey, agentId: params.agentId, config: params.config, }); const resolvedWorkspace = workspaceResolution.workspaceDir; const redactedSessionId = redactRunIdentifier(params.sessionId); const redactedSessionKey = redactRunIdentifier(params.sessionKey); const redactedWorkspace = redactRunIdentifier(resolvedWorkspace); if (workspaceResolution.usedFallback) { log.warn( `[workspace-fallback] caller=runEmbeddedPiAgent reason=${workspaceResolution.fallbackReason} run=${params.runId} session=${redactedSessionId} sessionKey=${redactedSessionKey} agent=${workspaceResolution.agentId} workspace=${redactedWorkspace}`, ); } const prevCwd = process.cwd(); let provider = (params.provider ?? DEFAULT_PROVIDER).trim() || DEFAULT_PROVIDER; let modelId = (params.model ?? DEFAULT_MODEL).trim() || DEFAULT_MODEL; const agentDir = params.agentDir ?? resolveOpenClawAgentDir(); const fallbackConfigured = (params.config?.agents?.defaults?.model?.fallbacks?.length ?? 0) > 0; await ensureOpenClawModelsJson(params.config, agentDir); // Run before_model_resolve hooks early so plugins can override the // provider/model before resolveModel(). // // Legacy compatibility: before_agent_start is also checked for override // fields if present. New hook takes precedence when both are set. let modelResolveOverride: { providerOverride?: string; modelOverride?: string } | undefined; const hookRunner = getGlobalHookRunner(); const hookCtx = { agentId: workspaceResolution.agentId, sessionKey: params.sessionKey, sessionId: params.sessionId, workspaceDir: resolvedWorkspace, messageProvider: params.messageProvider ?? undefined, }; if (hookRunner?.hasHooks("before_model_resolve")) { try { modelResolveOverride = await hookRunner.runBeforeModelResolve( { prompt: params.prompt }, hookCtx, ); } catch (hookErr) { log.warn(`before_model_resolve hook failed: ${String(hookErr)}`); } } if (hookRunner?.hasHooks("before_agent_start")) { try { const legacyResult = await hookRunner.runBeforeAgentStart( { prompt: params.prompt }, hookCtx, ); modelResolveOverride = { providerOverride: modelResolveOverride?.providerOverride ?? legacyResult?.providerOverride, modelOverride: modelResolveOverride?.modelOverride ?? legacyResult?.modelOverride, }; } catch (hookErr) { log.warn( `before_agent_start hook (legacy model resolve path) failed: ${String(hookErr)}`, ); } } if (modelResolveOverride?.providerOverride) { provider = modelResolveOverride.providerOverride; log.info(`[hooks] provider overridden to ${provider}`); } if (modelResolveOverride?.modelOverride) { modelId = modelResolveOverride.modelOverride; log.info(`[hooks] model overridden to ${modelId}`); } const { model, error, authStorage, modelRegistry } = resolveModel( provider, modelId, agentDir, params.config, ); if (!model) { throw new Error(error ?? `Unknown model: ${provider}/${modelId}`); } const ctxInfo = resolveContextWindowInfo({ cfg: params.config, provider, modelId, modelContextWindow: model.contextWindow, defaultTokens: DEFAULT_CONTEXT_TOKENS, }); const ctxGuard = evaluateContextWindowGuard({ info: ctxInfo, warnBelowTokens: CONTEXT_WINDOW_WARN_BELOW_TOKENS, hardMinTokens: CONTEXT_WINDOW_HARD_MIN_TOKENS, }); if (ctxGuard.shouldWarn) { log.warn( `low context window: ${provider}/${modelId} ctx=${ctxGuard.tokens} (warn<${CONTEXT_WINDOW_WARN_BELOW_TOKENS}) source=${ctxGuard.source}`, ); } if (ctxGuard.shouldBlock) { log.error( `blocked model (context window too small): ${provider}/${modelId} ctx=${ctxGuard.tokens} (min=${CONTEXT_WINDOW_HARD_MIN_TOKENS}) source=${ctxGuard.source}`, ); throw new FailoverError( `Model context window too small (${ctxGuard.tokens} tokens). Minimum is ${CONTEXT_WINDOW_HARD_MIN_TOKENS}.`, { reason: "unknown", provider, model: modelId }, ); } const authStore = ensureAuthProfileStore(agentDir, { allowKeychainPrompt: false }); const preferredProfileId = params.authProfileId?.trim(); let lockedProfileId = params.authProfileIdSource === "user" ? preferredProfileId : undefined; if (lockedProfileId) { const lockedProfile = authStore.profiles[lockedProfileId]; if ( !lockedProfile || normalizeProviderId(lockedProfile.provider) !== normalizeProviderId(provider) ) { lockedProfileId = undefined; } } const profileOrder = resolveAuthProfileOrder({ cfg: params.config, store: authStore, provider, preferredProfile: preferredProfileId, }); if (lockedProfileId && !profileOrder.includes(lockedProfileId)) { throw new Error(`Auth profile "${lockedProfileId}" is not configured for ${provider}.`); } const profileCandidates = lockedProfileId ? [lockedProfileId] : profileOrder.length > 0 ? profileOrder : [undefined]; let profileIndex = 0; const initialThinkLevel = params.thinkLevel ?? "off"; let thinkLevel = initialThinkLevel; const attemptedThinking = new Set(); let apiKeyInfo: ApiKeyInfo | null = null; let lastProfileId: string | undefined; const resolveAuthProfileFailoverReason = (params: { allInCooldown: boolean; message: string; }): FailoverReason => { if (params.allInCooldown) { return "rate_limit"; } const classified = classifyFailoverReason(params.message); return classified ?? "auth"; }; const throwAuthProfileFailover = (params: { allInCooldown: boolean; message?: string; error?: unknown; }): never => { const fallbackMessage = `No available auth profile for ${provider} (all in cooldown or unavailable).`; const message = params.message?.trim() || (params.error ? describeUnknownError(params.error).trim() : "") || fallbackMessage; const reason = resolveAuthProfileFailoverReason({ allInCooldown: params.allInCooldown, message, }); if (fallbackConfigured) { throw new FailoverError(message, { reason, provider, model: modelId, status: resolveFailoverStatus(reason), cause: params.error, }); } if (params.error instanceof Error) { throw params.error; } throw new Error(message); }; const resolveApiKeyForCandidate = async (candidate?: string) => { return getApiKeyForModel({ model, cfg: params.config, profileId: candidate, store: authStore, agentDir, }); }; const applyApiKeyInfo = async (candidate?: string): Promise => { apiKeyInfo = await resolveApiKeyForCandidate(candidate); const resolvedProfileId = apiKeyInfo.profileId ?? candidate; if (!apiKeyInfo.apiKey) { if (apiKeyInfo.mode !== "aws-sdk") { throw new Error( `No API key resolved for provider "${model.provider}" (auth mode: ${apiKeyInfo.mode}).`, ); } lastProfileId = resolvedProfileId; return; } if (model.provider === "github-copilot") { const { resolveCopilotApiToken } = await import("../../providers/github-copilot-token.js"); const copilotToken = await resolveCopilotApiToken({ githubToken: apiKeyInfo.apiKey, }); authStorage.setRuntimeApiKey(model.provider, copilotToken.token); } else { authStorage.setRuntimeApiKey(model.provider, apiKeyInfo.apiKey); } lastProfileId = apiKeyInfo.profileId; }; const advanceAuthProfile = async (): Promise => { if (lockedProfileId) { return false; } let nextIndex = profileIndex + 1; while (nextIndex < profileCandidates.length) { const candidate = profileCandidates[nextIndex]; if (candidate && isProfileInCooldown(authStore, candidate)) { nextIndex += 1; continue; } try { await applyApiKeyInfo(candidate); profileIndex = nextIndex; thinkLevel = initialThinkLevel; attemptedThinking.clear(); return true; } catch (err) { if (candidate && candidate === lockedProfileId) { throw err; } nextIndex += 1; } } return false; }; try { while (profileIndex < profileCandidates.length) { const candidate = profileCandidates[profileIndex]; if ( candidate && candidate !== lockedProfileId && isProfileInCooldown(authStore, candidate) ) { profileIndex += 1; continue; } await applyApiKeyInfo(profileCandidates[profileIndex]); break; } if (profileIndex >= profileCandidates.length) { throwAuthProfileFailover({ allInCooldown: true }); } } catch (err) { if (err instanceof FailoverError) { throw err; } if (profileCandidates[profileIndex] === lockedProfileId) { throwAuthProfileFailover({ allInCooldown: false, error: err }); } const advanced = await advanceAuthProfile(); if (!advanced) { throwAuthProfileFailover({ allInCooldown: false, error: err }); } } const MAX_OVERFLOW_COMPACTION_ATTEMPTS = 3; let overflowCompactionAttempts = 0; let toolResultTruncationAttempted = false; const usageAccumulator = createUsageAccumulator(); let lastRunPromptUsage: ReturnType | undefined; let autoCompactionCount = 0; try { while (true) { attemptedThinking.add(thinkLevel); await fs.mkdir(resolvedWorkspace, { recursive: true }); const prompt = provider === "anthropic" ? scrubAnthropicRefusalMagic(params.prompt) : params.prompt; const attempt = await runEmbeddedAttempt({ sessionId: params.sessionId, sessionKey: params.sessionKey, messageChannel: params.messageChannel, messageProvider: params.messageProvider, agentAccountId: params.agentAccountId, messageTo: params.messageTo, messageThreadId: params.messageThreadId, groupId: params.groupId, groupChannel: params.groupChannel, groupSpace: params.groupSpace, spawnedBy: params.spawnedBy, senderIsOwner: params.senderIsOwner, currentChannelId: params.currentChannelId, currentThreadTs: params.currentThreadTs, replyToMode: params.replyToMode, hasRepliedRef: params.hasRepliedRef, sessionFile: params.sessionFile, workspaceDir: resolvedWorkspace, agentDir, config: params.config, skillsSnapshot: params.skillsSnapshot, prompt, images: params.images, disableTools: params.disableTools, provider, modelId, model, authStorage, modelRegistry, agentId: workspaceResolution.agentId, thinkLevel, verboseLevel: params.verboseLevel, reasoningLevel: params.reasoningLevel, toolResultFormat: resolvedToolResultFormat, execOverrides: params.execOverrides, bashElevated: params.bashElevated, timeoutMs: params.timeoutMs, runId: params.runId, abortSignal: params.abortSignal, shouldEmitToolResult: params.shouldEmitToolResult, shouldEmitToolOutput: params.shouldEmitToolOutput, onPartialReply: params.onPartialReply, onAssistantMessageStart: params.onAssistantMessageStart, onBlockReply: params.onBlockReply, onBlockReplyFlush: params.onBlockReplyFlush, blockReplyBreak: params.blockReplyBreak, blockReplyChunking: params.blockReplyChunking, onReasoningStream: params.onReasoningStream, onReasoningEnd: params.onReasoningEnd, onToolResult: params.onToolResult, onAgentEvent: params.onAgentEvent, extraSystemPrompt: params.extraSystemPrompt, inputProvenance: params.inputProvenance, streamParams: params.streamParams, ownerNumbers: params.ownerNumbers, enforceFinalTag: params.enforceFinalTag, }); const { aborted, promptError, timedOut, timedOutDuringCompaction, sessionIdUsed, lastAssistant, } = attempt; const lastAssistantUsage = normalizeUsage(lastAssistant?.usage as UsageLike); const attemptUsage = attempt.attemptUsage ?? lastAssistantUsage; mergeUsageIntoAccumulator(usageAccumulator, attemptUsage); // Keep prompt size from the latest model call so session totalTokens // reflects current context usage, not accumulated tool-loop usage. lastRunPromptUsage = lastAssistantUsage ?? attemptUsage; const lastTurnTotal = lastAssistantUsage?.total ?? attemptUsage?.total; const attemptCompactionCount = Math.max(0, attempt.compactionCount ?? 0); autoCompactionCount += attemptCompactionCount; const formattedAssistantErrorText = lastAssistant ? formatAssistantErrorText(lastAssistant, { cfg: params.config, sessionKey: params.sessionKey ?? params.sessionId, provider, }) : undefined; const assistantErrorText = lastAssistant?.stopReason === "error" ? lastAssistant.errorMessage?.trim() || formattedAssistantErrorText : undefined; const contextOverflowError = !aborted ? (() => { if (promptError) { const errorText = describeUnknownError(promptError); if (isLikelyContextOverflowError(errorText)) { return { text: errorText, source: "promptError" as const }; } // Prompt submission failed with a non-overflow error. Do not // inspect prior assistant errors from history for this attempt. return null; } if (assistantErrorText && isLikelyContextOverflowError(assistantErrorText)) { return { text: assistantErrorText, source: "assistantError" as const }; } return null; })() : null; if (contextOverflowError) { const overflowDiagId = createCompactionDiagId(); const errorText = contextOverflowError.text; const msgCount = attempt.messagesSnapshot?.length ?? 0; log.warn( `[context-overflow-diag] sessionKey=${params.sessionKey ?? params.sessionId} ` + `provider=${provider}/${modelId} source=${contextOverflowError.source} ` + `messages=${msgCount} sessionFile=${params.sessionFile} ` + `diagId=${overflowDiagId} compactionAttempts=${overflowCompactionAttempts} ` + `error=${errorText.slice(0, 200)}`, ); const isCompactionFailure = isCompactionFailureError(errorText); const hadAttemptLevelCompaction = attemptCompactionCount > 0; // If this attempt already compacted (SDK auto-compaction), avoid immediately // running another explicit compaction for the same overflow trigger. if ( !isCompactionFailure && hadAttemptLevelCompaction && overflowCompactionAttempts < MAX_OVERFLOW_COMPACTION_ATTEMPTS ) { overflowCompactionAttempts++; log.warn( `context overflow persisted after in-attempt compaction (attempt ${overflowCompactionAttempts}/${MAX_OVERFLOW_COMPACTION_ATTEMPTS}); retrying prompt without additional compaction for ${provider}/${modelId}`, ); continue; } // Attempt explicit overflow compaction only when this attempt did not // already auto-compact. if ( !isCompactionFailure && !hadAttemptLevelCompaction && overflowCompactionAttempts < MAX_OVERFLOW_COMPACTION_ATTEMPTS ) { if (log.isEnabled("debug")) { log.debug( `[compaction-diag] decision diagId=${overflowDiagId} branch=compact ` + `isCompactionFailure=${isCompactionFailure} hasOversizedToolResults=unknown ` + `attempt=${overflowCompactionAttempts + 1} maxAttempts=${MAX_OVERFLOW_COMPACTION_ATTEMPTS}`, ); } overflowCompactionAttempts++; log.warn( `context overflow detected (attempt ${overflowCompactionAttempts}/${MAX_OVERFLOW_COMPACTION_ATTEMPTS}); attempting auto-compaction for ${provider}/${modelId}`, ); const compactResult = await compactEmbeddedPiSessionDirect({ sessionId: params.sessionId, sessionKey: params.sessionKey, messageChannel: params.messageChannel, messageProvider: params.messageProvider, agentAccountId: params.agentAccountId, authProfileId: lastProfileId, sessionFile: params.sessionFile, workspaceDir: resolvedWorkspace, agentDir, config: params.config, skillsSnapshot: params.skillsSnapshot, senderIsOwner: params.senderIsOwner, provider, model: modelId, runId: params.runId, thinkLevel, reasoningLevel: params.reasoningLevel, bashElevated: params.bashElevated, extraSystemPrompt: params.extraSystemPrompt, ownerNumbers: params.ownerNumbers, trigger: "overflow", diagId: overflowDiagId, attempt: overflowCompactionAttempts, maxAttempts: MAX_OVERFLOW_COMPACTION_ATTEMPTS, }); if (compactResult.compacted) { autoCompactionCount += 1; log.info(`auto-compaction succeeded for ${provider}/${modelId}; retrying prompt`); continue; } log.warn( `auto-compaction failed for ${provider}/${modelId}: ${compactResult.reason ?? "nothing to compact"}`, ); } // Fallback: try truncating oversized tool results in the session. // This handles the case where a single tool result exceeds the // context window and compaction cannot reduce it further. if (!toolResultTruncationAttempted) { const contextWindowTokens = ctxInfo.tokens; const hasOversized = attempt.messagesSnapshot ? sessionLikelyHasOversizedToolResults({ messages: attempt.messagesSnapshot, contextWindowTokens, }) : false; if (hasOversized) { if (log.isEnabled("debug")) { log.debug( `[compaction-diag] decision diagId=${overflowDiagId} branch=truncate_tool_results ` + `isCompactionFailure=${isCompactionFailure} hasOversizedToolResults=${hasOversized} ` + `attempt=${overflowCompactionAttempts} maxAttempts=${MAX_OVERFLOW_COMPACTION_ATTEMPTS}`, ); } toolResultTruncationAttempted = true; log.warn( `[context-overflow-recovery] Attempting tool result truncation for ${provider}/${modelId} ` + `(contextWindow=${contextWindowTokens} tokens)`, ); const truncResult = await truncateOversizedToolResultsInSession({ sessionFile: params.sessionFile, contextWindowTokens, sessionId: params.sessionId, sessionKey: params.sessionKey, }); if (truncResult.truncated) { log.info( `[context-overflow-recovery] Truncated ${truncResult.truncatedCount} tool result(s); retrying prompt`, ); // Session is now smaller; allow compaction retries again. overflowCompactionAttempts = 0; continue; } log.warn( `[context-overflow-recovery] Tool result truncation did not help: ${truncResult.reason ?? "unknown"}`, ); } else if (log.isEnabled("debug")) { log.debug( `[compaction-diag] decision diagId=${overflowDiagId} branch=give_up ` + `isCompactionFailure=${isCompactionFailure} hasOversizedToolResults=${hasOversized} ` + `attempt=${overflowCompactionAttempts} maxAttempts=${MAX_OVERFLOW_COMPACTION_ATTEMPTS}`, ); } } if ( (isCompactionFailure || overflowCompactionAttempts >= MAX_OVERFLOW_COMPACTION_ATTEMPTS || toolResultTruncationAttempted) && log.isEnabled("debug") ) { log.debug( `[compaction-diag] decision diagId=${overflowDiagId} branch=give_up ` + `isCompactionFailure=${isCompactionFailure} hasOversizedToolResults=unknown ` + `attempt=${overflowCompactionAttempts} maxAttempts=${MAX_OVERFLOW_COMPACTION_ATTEMPTS}`, ); } const kind = isCompactionFailure ? "compaction_failure" : "context_overflow"; return { payloads: [ { text: "Context overflow: prompt too large for the model. " + "Try /reset (or /new) to start a fresh session, or use a larger-context model.", isError: true, }, ], meta: { durationMs: Date.now() - started, agentMeta: { sessionId: sessionIdUsed, provider, model: model.id, }, systemPromptReport: attempt.systemPromptReport, error: { kind, message: errorText }, }, }; } if (promptError && !aborted) { const errorText = describeUnknownError(promptError); // Handle role ordering errors with a user-friendly message if (/incorrect role information|roles must alternate/i.test(errorText)) { return { payloads: [ { text: "Message ordering conflict - please try again. " + "If this persists, use /new to start a fresh session.", isError: true, }, ], meta: { durationMs: Date.now() - started, agentMeta: { sessionId: sessionIdUsed, provider, model: model.id, }, systemPromptReport: attempt.systemPromptReport, error: { kind: "role_ordering", message: errorText }, }, }; } // Handle image size errors with a user-friendly message (no retry needed) const imageSizeError = parseImageSizeError(errorText); if (imageSizeError) { const maxMb = imageSizeError.maxMb; const maxMbLabel = typeof maxMb === "number" && Number.isFinite(maxMb) ? `${maxMb}` : null; const maxBytesHint = maxMbLabel ? ` (max ${maxMbLabel}MB)` : ""; return { payloads: [ { text: `Image too large for the model${maxBytesHint}. ` + "Please compress or resize the image and try again.", isError: true, }, ], meta: { durationMs: Date.now() - started, agentMeta: { sessionId: sessionIdUsed, provider, model: model.id, }, systemPromptReport: attempt.systemPromptReport, error: { kind: "image_size", message: errorText }, }, }; } const promptFailoverReason = classifyFailoverReason(errorText); if (promptFailoverReason && promptFailoverReason !== "timeout" && lastProfileId) { await markAuthProfileFailure({ store: authStore, profileId: lastProfileId, reason: promptFailoverReason, cfg: params.config, agentDir: params.agentDir, }); } if ( isFailoverErrorMessage(errorText) && promptFailoverReason !== "timeout" && (await advanceAuthProfile()) ) { continue; } const fallbackThinking = pickFallbackThinkingLevel({ message: errorText, attempted: attemptedThinking, }); if (fallbackThinking) { log.warn( `unsupported thinking level for ${provider}/${modelId}; retrying with ${fallbackThinking}`, ); thinkLevel = fallbackThinking; continue; } // FIX: Throw FailoverError for prompt errors when fallbacks configured // This enables model fallback for quota/rate limit errors during prompt submission if (fallbackConfigured && isFailoverErrorMessage(errorText)) { throw new FailoverError(errorText, { reason: promptFailoverReason ?? "unknown", provider, model: modelId, profileId: lastProfileId, status: resolveFailoverStatus(promptFailoverReason ?? "unknown"), }); } throw promptError; } const fallbackThinking = pickFallbackThinkingLevel({ message: lastAssistant?.errorMessage, attempted: attemptedThinking, }); if (fallbackThinking && !aborted) { log.warn( `unsupported thinking level for ${provider}/${modelId}; retrying with ${fallbackThinking}`, ); thinkLevel = fallbackThinking; continue; } const authFailure = isAuthAssistantError(lastAssistant); const rateLimitFailure = isRateLimitAssistantError(lastAssistant); const billingFailure = isBillingAssistantError(lastAssistant); const failoverFailure = isFailoverAssistantError(lastAssistant); const assistantFailoverReason = classifyFailoverReason(lastAssistant?.errorMessage ?? ""); const cloudCodeAssistFormatError = attempt.cloudCodeAssistFormatError; const imageDimensionError = parseImageDimensionError(lastAssistant?.errorMessage ?? ""); if (imageDimensionError && lastProfileId) { const details = [ imageDimensionError.messageIndex !== undefined ? `message=${imageDimensionError.messageIndex}` : null, imageDimensionError.contentIndex !== undefined ? `content=${imageDimensionError.contentIndex}` : null, imageDimensionError.maxDimensionPx !== undefined ? `limit=${imageDimensionError.maxDimensionPx}px` : null, ] .filter(Boolean) .join(" "); log.warn( `Profile ${lastProfileId} rejected image payload${details ? ` (${details})` : ""}.`, ); } // Treat timeout as potential rate limit (Antigravity hangs on rate limit) // But exclude post-prompt compaction timeouts (model succeeded; no profile issue) const shouldRotate = (!aborted && failoverFailure) || (timedOut && !timedOutDuringCompaction); if (shouldRotate) { if (lastProfileId) { const reason = timedOut || assistantFailoverReason === "timeout" ? "timeout" : (assistantFailoverReason ?? "unknown"); await markAuthProfileFailure({ store: authStore, profileId: lastProfileId, reason, cfg: params.config, agentDir: params.agentDir, }); if (timedOut && !isProbeSession) { log.warn( `Profile ${lastProfileId} timed out (possible rate limit). Trying next account...`, ); } if (cloudCodeAssistFormatError) { log.warn( `Profile ${lastProfileId} hit Cloud Code Assist format error. Tool calls will be sanitized on retry.`, ); } } const rotated = await advanceAuthProfile(); if (rotated) { continue; } if (fallbackConfigured) { // Prefer formatted error message (user-friendly) over raw errorMessage const message = (lastAssistant ? formatAssistantErrorText(lastAssistant, { cfg: params.config, sessionKey: params.sessionKey ?? params.sessionId, provider, }) : undefined) || lastAssistant?.errorMessage?.trim() || (timedOut ? "LLM request timed out." : rateLimitFailure ? "LLM request rate limited." : billingFailure ? formatBillingErrorMessage(provider) : authFailure ? "LLM request unauthorized." : "LLM request failed."); const status = resolveFailoverStatus(assistantFailoverReason ?? "unknown") ?? (isTimeoutErrorMessage(message) ? 408 : undefined); throw new FailoverError(message, { reason: assistantFailoverReason ?? "unknown", provider, model: modelId, profileId: lastProfileId, status, }); } } const usage = toNormalizedUsage(usageAccumulator); if (usage && lastTurnTotal && lastTurnTotal > 0) { usage.total = lastTurnTotal; } // Extract the last individual API call's usage for context-window // utilization display. The accumulated `usage` sums input tokens // across all calls (tool-use loops, compaction retries), which // overstates the actual context size. `lastCallUsage` reflects only // the final call, giving an accurate snapshot of current context. const lastCallUsage = normalizeUsage(lastAssistant?.usage as UsageLike); const promptTokens = derivePromptTokens(lastRunPromptUsage); const agentMeta: EmbeddedPiAgentMeta = { sessionId: sessionIdUsed, provider: lastAssistant?.provider ?? provider, model: lastAssistant?.model ?? model.id, usage, lastCallUsage: lastCallUsage ?? undefined, promptTokens, compactionCount: autoCompactionCount > 0 ? autoCompactionCount : undefined, }; const payloads = buildEmbeddedRunPayloads({ assistantTexts: attempt.assistantTexts, toolMetas: attempt.toolMetas, lastAssistant: attempt.lastAssistant, lastToolError: attempt.lastToolError, config: params.config, sessionKey: params.sessionKey ?? params.sessionId, provider, verboseLevel: params.verboseLevel, reasoningLevel: params.reasoningLevel, toolResultFormat: resolvedToolResultFormat, suppressToolErrorWarnings: params.suppressToolErrorWarnings, inlineToolResultsAllowed: false, }); // Timeout aborts can leave the run without any assistant payloads. // Emit an explicit timeout error instead of silently completing, so // callers do not lose the turn as an orphaned user message. if (timedOut && !timedOutDuringCompaction && payloads.length === 0) { return { payloads: [ { text: "Request timed out before a response was generated. " + "Please try again, or increase `agents.defaults.timeoutSeconds` in your config.", isError: true, }, ], meta: { durationMs: Date.now() - started, agentMeta, aborted, systemPromptReport: attempt.systemPromptReport, }, didSendViaMessagingTool: attempt.didSendViaMessagingTool, messagingToolSentTexts: attempt.messagingToolSentTexts, messagingToolSentMediaUrls: attempt.messagingToolSentMediaUrls, messagingToolSentTargets: attempt.messagingToolSentTargets, successfulCronAdds: attempt.successfulCronAdds, }; } log.debug( `embedded run done: runId=${params.runId} sessionId=${params.sessionId} durationMs=${Date.now() - started} aborted=${aborted}`, ); if (lastProfileId) { await markAuthProfileGood({ store: authStore, provider, profileId: lastProfileId, agentDir: params.agentDir, }); await markAuthProfileUsed({ store: authStore, profileId: lastProfileId, agentDir: params.agentDir, }); } return { payloads: payloads.length ? payloads : undefined, meta: { durationMs: Date.now() - started, agentMeta, aborted, systemPromptReport: attempt.systemPromptReport, // Handle client tool calls (OpenResponses hosted tools) stopReason: attempt.clientToolCall ? "tool_calls" : undefined, pendingToolCalls: attempt.clientToolCall ? [ { id: `call_${Date.now()}`, name: attempt.clientToolCall.name, arguments: JSON.stringify(attempt.clientToolCall.params), }, ] : undefined, }, didSendViaMessagingTool: attempt.didSendViaMessagingTool, messagingToolSentTexts: attempt.messagingToolSentTexts, messagingToolSentMediaUrls: attempt.messagingToolSentMediaUrls, messagingToolSentTargets: attempt.messagingToolSentTargets, successfulCronAdds: attempt.successfulCronAdds, }; } } finally { process.chdir(prevCwd); } }), ); }