867 lines
34 KiB
TypeScript
Raw Normal View History

2026-01-14 01:08:15 +00:00
import fs from "node:fs/promises";
import type { ThinkLevel } from "../../auto-reply/thinking.js";
import type { RunEmbeddedPiAgentParams } from "./run/params.js";
import type { EmbeddedPiAgentMeta, EmbeddedPiRunResult } from "./types.js";
2026-01-14 01:08:15 +00:00
import { enqueueCommandInLane } from "../../process/command-queue.js";
import { isMarkdownCapableMessageChannel } from "../../utils/message-channel.js";
2026-01-30 03:15:10 +01:00
import { resolveOpenClawAgentDir } from "../agent-paths.js";
2026-01-14 01:08:15 +00:00
import {
isProfileInCooldown,
2026-01-14 01:08:15 +00:00
markAuthProfileFailure,
markAuthProfileGood,
markAuthProfileUsed,
} from "../auth-profiles.js";
import {
CONTEXT_WINDOW_HARD_MIN_TOKENS,
CONTEXT_WINDOW_WARN_BELOW_TOKENS,
evaluateContextWindowGuard,
resolveContextWindowInfo,
} from "../context-window-guard.js";
import { DEFAULT_CONTEXT_TOKENS, DEFAULT_MODEL, DEFAULT_PROVIDER } from "../defaults.js";
2026-01-14 01:08:15 +00:00
import { FailoverError, resolveFailoverStatus } from "../failover-error.js";
import {
ensureAuthProfileStore,
getApiKeyForModel,
resolveAuthProfileOrder,
type ResolvedProviderAuth,
2026-01-14 01:08:15 +00:00
} from "../model-auth.js";
import { normalizeProviderId } from "../model-selection.js";
2026-01-30 03:15:10 +01:00
import { ensureOpenClawModelsJson } from "../models-config.js";
2026-01-14 01:08:15 +00:00
import {
BILLING_ERROR_USER_MESSAGE,
2026-01-14 01:08:15 +00:00
classifyFailoverReason,
formatAssistantErrorText,
isAuthAssistantError,
isBillingAssistantError,
2026-01-14 01:08:15 +00:00
isCompactionFailureError,
isContextOverflowError,
isFailoverAssistantError,
isFailoverErrorMessage,
parseImageSizeError,
2026-01-18 15:19:25 +00:00
parseImageDimensionError,
2026-01-14 01:08:15 +00:00
isRateLimitAssistantError,
isTimeoutErrorMessage,
pickFallbackThinkingLevel,
type FailoverReason,
2026-01-14 01:08:15 +00:00
} from "../pi-embedded-helpers.js";
import { normalizeUsage, type UsageLike } from "../usage.js";
import { redactRunIdentifier, resolveRunWorkspaceDir } from "../workspace-run.js";
fix: auto-compact on context overflow promptError before returning error (#1627) * fix: detect Anthropic 'Request size exceeds model context window' as context overflow Anthropic now returns 'Request size exceeds model context window' instead of the previously detected 'prompt is too long' format. This new error message was not recognized by isContextOverflowError(), causing auto-compaction to NOT trigger. Users would see the raw error twice without any recovery attempt. Changes: - Add 'exceeds model context window' and 'request size exceeds' to isContextOverflowError() detection patterns - Add tests that fail without the fix, verifying both the raw error string and the JSON-wrapped format from Anthropic's API - Add test for formatAssistantErrorText to ensure the friendly 'Context overflow' message is shown instead of the raw error Note: The upstream pi-ai package (@mariozechner/pi-ai) also needs a fix in its OVERFLOW_PATTERNS regex: /exceeds the context window/i should be changed to /exceeds.*context window/i to match both 'the' and 'model' variants for triggering auto-compaction retry. * fix(tests): remove unused imports and helper from test files Remove WorkspaceBootstrapFile references and _makeFile helper that were incorrectly copied from another test file. These caused type errors and were unrelated to the context overflow detection tests. * fix: trigger auto-compaction on context overflow promptError When the LLM rejects a request with a context overflow error that surfaces as a promptError (thrown exception rather than streamed error), the existing auto-compaction in pi-coding-agent never triggers. This happens because the error bypasses the agent's message_end → agent_end → _checkCompaction path. This fix adds a fallback compaction attempt directly in the run loop: - Detects context overflow in promptError (excluding compaction_failure) - Calls compactEmbeddedPiSessionDirect (bypassing lane queues since already in-lane) - Retries the prompt after successful compaction - Limits to one compaction attempt per run to prevent infinite loops Fixes: context overflow errors shown to user without auto-compaction attempt * style: format compact.ts and run.ts with oxfmt * fix: tighten context overflow match (#1627) (thanks @rodrigouroz) --------- Co-authored-by: Claude <claude@anthropic.com> Co-authored-by: Peter Steinberger <steipete@gmail.com>
2026-01-24 19:09:24 -03:00
import { compactEmbeddedPiSessionDirect } from "./compact.js";
2026-01-14 01:08:15 +00:00
import { resolveGlobalLane, resolveSessionLane } from "./lanes.js";
import { log } from "./logger.js";
import { resolveModel } from "./model.js";
import { runEmbeddedAttempt } from "./run/attempt.js";
import { buildEmbeddedRunPayloads } from "./run/payloads.js";
fix: recover from context overflow caused by oversized tool results (#11579) * fix: gracefully handle oversized tool results causing context overflow When a subagent reads a very large file or gets a huge tool result (e.g., gh pr diff on a massive PR), it can exceed the model's context window in a single prompt. Auto-compaction can't help because there's no older history to compact — just one giant tool result. This adds two layers of defense: 1. Pre-emptive: Hard cap on tool result size (400K chars ≈ 100K tokens) applied in the session tool result guard before persistence. This prevents extremely large tool results from being stored in full, regardless of model context window size. 2. Recovery: When context overflow is detected and compaction fails, scan session messages for oversized tool results relative to the model's actual context window (30% max share). If found, truncate them in the session via branching (creating a new branch with truncated content) and retry the prompt. The truncation preserves the beginning of the content (most useful for understanding what was read) and appends a notice explaining the truncation and suggesting offset/limit parameters for targeted reads. Includes comprehensive tests for: - Text truncation with newline-boundary awareness - Context-window-proportional size calculation - In-memory message truncation - Oversized detection heuristics - Guard-level size capping during persistence * fix: prep fixes for tool result truncation PR (#11579) (thanks @tyler6204)
2026-02-07 17:40:51 -08:00
import {
truncateOversizedToolResultsInSession,
sessionLikelyHasOversizedToolResults,
} from "./tool-result-truncation.js";
2026-01-14 01:08:15 +00:00
import { describeUnknownError } from "./utils.js";
type ApiKeyInfo = ResolvedProviderAuth;
2026-01-14 01:08:15 +00:00
2026-01-21 07:28:11 +00:00
// Avoid Anthropic's refusal test token poisoning session transcripts.
const ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL = "ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL";
const ANTHROPIC_MAGIC_STRING_REPLACEMENT = "ANTHROPIC MAGIC STRING TRIGGER REFUSAL (redacted)";
function scrubAnthropicRefusalMagic(prompt: string): string {
if (!prompt.includes(ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL)) {
return prompt;
}
2026-01-21 07:28:11 +00:00
return prompt.replaceAll(
ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL,
ANTHROPIC_MAGIC_STRING_REPLACEMENT,
);
}
type UsageAccumulator = {
input: number;
output: number;
cacheRead: number;
cacheWrite: number;
total: number;
};
const createUsageAccumulator = (): UsageAccumulator => ({
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
total: 0,
});
const hasUsageValues = (
usage: ReturnType<typeof normalizeUsage>,
): usage is NonNullable<ReturnType<typeof normalizeUsage>> =>
!!usage &&
[usage.input, usage.output, usage.cacheRead, usage.cacheWrite, usage.total].some(
(value) => typeof value === "number" && Number.isFinite(value) && value > 0,
);
const mergeUsageIntoAccumulator = (
target: UsageAccumulator,
usage: ReturnType<typeof normalizeUsage>,
) => {
if (!hasUsageValues(usage)) {
return;
}
target.input += usage.input ?? 0;
target.output += usage.output ?? 0;
target.cacheRead += usage.cacheRead ?? 0;
target.cacheWrite += usage.cacheWrite ?? 0;
target.total +=
usage.total ??
(usage.input ?? 0) + (usage.output ?? 0) + (usage.cacheRead ?? 0) + (usage.cacheWrite ?? 0);
};
const toNormalizedUsage = (usage: UsageAccumulator) => {
const hasUsage =
usage.input > 0 ||
usage.output > 0 ||
usage.cacheRead > 0 ||
usage.cacheWrite > 0 ||
usage.total > 0;
if (!hasUsage) {
return undefined;
}
const derivedTotal = usage.input + usage.output + usage.cacheRead + usage.cacheWrite;
return {
input: usage.input || undefined,
output: usage.output || undefined,
cacheRead: usage.cacheRead || undefined,
cacheWrite: usage.cacheWrite || undefined,
total: usage.total || derivedTotal || undefined,
};
};
2026-01-14 01:08:15 +00:00
export async function runEmbeddedPiAgent(
params: RunEmbeddedPiAgentParams,
): Promise<EmbeddedPiRunResult> {
const sessionLane = resolveSessionLane(params.sessionKey?.trim() || params.sessionId);
2026-01-14 01:08:15 +00:00
const globalLane = resolveGlobalLane(params.lane);
const enqueueGlobal =
params.enqueue ?? ((task, opts) => enqueueCommandInLane(globalLane, task, opts));
const enqueueSession =
params.enqueue ?? ((task, opts) => enqueueCommandInLane(sessionLane, task, opts));
const channelHint = params.messageChannel ?? params.messageProvider;
const resolvedToolResultFormat =
params.toolResultFormat ??
(channelHint
? isMarkdownCapableMessageChannel(channelHint)
? "markdown"
: "plain"
: "markdown");
2026-01-24 00:04:53 +00:00
const isProbeSession = params.sessionId?.startsWith("probe-") ?? false;
2026-01-14 01:08:15 +00:00
return enqueueSession(() =>
2026-01-14 01:08:15 +00:00
enqueueGlobal(async () => {
const started = Date.now();
const workspaceResolution = resolveRunWorkspaceDir({
workspaceDir: params.workspaceDir,
sessionKey: params.sessionKey,
agentId: params.agentId,
config: params.config,
});
const resolvedWorkspace = workspaceResolution.workspaceDir;
const redactedSessionId = redactRunIdentifier(params.sessionId);
const redactedSessionKey = redactRunIdentifier(params.sessionKey);
const redactedWorkspace = redactRunIdentifier(resolvedWorkspace);
if (workspaceResolution.usedFallback) {
log.warn(
`[workspace-fallback] caller=runEmbeddedPiAgent reason=${workspaceResolution.fallbackReason} run=${params.runId} session=${redactedSessionId} sessionKey=${redactedSessionKey} agent=${workspaceResolution.agentId} workspace=${redactedWorkspace}`,
);
}
2026-01-14 01:08:15 +00:00
const prevCwd = process.cwd();
const provider = (params.provider ?? DEFAULT_PROVIDER).trim() || DEFAULT_PROVIDER;
2026-01-14 01:08:15 +00:00
const modelId = (params.model ?? DEFAULT_MODEL).trim() || DEFAULT_MODEL;
2026-01-30 03:15:10 +01:00
const agentDir = params.agentDir ?? resolveOpenClawAgentDir();
const fallbackConfigured =
(params.config?.agents?.defaults?.model?.fallbacks?.length ?? 0) > 0;
2026-01-30 03:15:10 +01:00
await ensureOpenClawModelsJson(params.config, agentDir);
2026-01-14 01:08:15 +00:00
const { model, error, authStorage, modelRegistry } = resolveModel(
provider,
modelId,
agentDir,
params.config,
);
if (!model) {
throw new Error(error ?? `Unknown model: ${provider}/${modelId}`);
}
const ctxInfo = resolveContextWindowInfo({
cfg: params.config,
provider,
modelId,
modelContextWindow: model.contextWindow,
defaultTokens: DEFAULT_CONTEXT_TOKENS,
});
const ctxGuard = evaluateContextWindowGuard({
info: ctxInfo,
warnBelowTokens: CONTEXT_WINDOW_WARN_BELOW_TOKENS,
hardMinTokens: CONTEXT_WINDOW_HARD_MIN_TOKENS,
});
if (ctxGuard.shouldWarn) {
log.warn(
`low context window: ${provider}/${modelId} ctx=${ctxGuard.tokens} (warn<${CONTEXT_WINDOW_WARN_BELOW_TOKENS}) source=${ctxGuard.source}`,
);
}
if (ctxGuard.shouldBlock) {
log.error(
`blocked model (context window too small): ${provider}/${modelId} ctx=${ctxGuard.tokens} (min=${CONTEXT_WINDOW_HARD_MIN_TOKENS}) source=${ctxGuard.source}`,
);
throw new FailoverError(
`Model context window too small (${ctxGuard.tokens} tokens). Minimum is ${CONTEXT_WINDOW_HARD_MIN_TOKENS}.`,
{ reason: "unknown", provider, model: modelId },
);
}
const authStore = ensureAuthProfileStore(agentDir, { allowKeychainPrompt: false });
const preferredProfileId = params.authProfileId?.trim();
let lockedProfileId = params.authProfileIdSource === "user" ? preferredProfileId : undefined;
if (lockedProfileId) {
const lockedProfile = authStore.profiles[lockedProfileId];
if (
!lockedProfile ||
normalizeProviderId(lockedProfile.provider) !== normalizeProviderId(provider)
) {
lockedProfileId = undefined;
}
}
2026-01-14 01:08:15 +00:00
const profileOrder = resolveAuthProfileOrder({
cfg: params.config,
store: authStore,
provider,
preferredProfile: preferredProfileId,
2026-01-14 01:08:15 +00:00
});
if (lockedProfileId && !profileOrder.includes(lockedProfileId)) {
throw new Error(`Auth profile "${lockedProfileId}" is not configured for ${provider}.`);
2026-01-14 01:08:15 +00:00
}
const profileCandidates = lockedProfileId
? [lockedProfileId]
: profileOrder.length > 0
? profileOrder
: [undefined];
2026-01-14 01:08:15 +00:00
let profileIndex = 0;
const initialThinkLevel = params.thinkLevel ?? "off";
let thinkLevel = initialThinkLevel;
const attemptedThinking = new Set<ThinkLevel>();
let apiKeyInfo: ApiKeyInfo | null = null;
let lastProfileId: string | undefined;
const resolveAuthProfileFailoverReason = (params: {
allInCooldown: boolean;
message: string;
}): FailoverReason => {
if (params.allInCooldown) {
return "rate_limit";
}
const classified = classifyFailoverReason(params.message);
return classified ?? "auth";
};
const throwAuthProfileFailover = (params: {
allInCooldown: boolean;
message?: string;
error?: unknown;
}): never => {
const fallbackMessage = `No available auth profile for ${provider} (all in cooldown or unavailable).`;
const message =
params.message?.trim() ||
(params.error ? describeUnknownError(params.error).trim() : "") ||
fallbackMessage;
const reason = resolveAuthProfileFailoverReason({
allInCooldown: params.allInCooldown,
message,
});
if (fallbackConfigured) {
throw new FailoverError(message, {
reason,
provider,
model: modelId,
status: resolveFailoverStatus(reason),
cause: params.error,
});
}
if (params.error instanceof Error) {
throw params.error;
}
throw new Error(message);
};
2026-01-14 01:08:15 +00:00
const resolveApiKeyForCandidate = async (candidate?: string) => {
return getApiKeyForModel({
model,
cfg: params.config,
profileId: candidate,
store: authStore,
2026-01-15 04:41:50 +00:00
agentDir,
2026-01-14 01:08:15 +00:00
});
};
const applyApiKeyInfo = async (candidate?: string): Promise<void> => {
apiKeyInfo = await resolveApiKeyForCandidate(candidate);
const resolvedProfileId = apiKeyInfo.profileId ?? candidate;
if (!apiKeyInfo.apiKey) {
if (apiKeyInfo.mode !== "aws-sdk") {
throw new Error(
`No API key resolved for provider "${model.provider}" (auth mode: ${apiKeyInfo.mode}).`,
);
}
lastProfileId = resolvedProfileId;
return;
}
if (model.provider === "github-copilot") {
const { resolveCopilotApiToken } =
await import("../../providers/github-copilot-token.js");
const copilotToken = await resolveCopilotApiToken({
githubToken: apiKeyInfo.apiKey,
});
authStorage.setRuntimeApiKey(model.provider, copilotToken.token);
} else {
authStorage.setRuntimeApiKey(model.provider, apiKeyInfo.apiKey);
}
lastProfileId = apiKeyInfo.profileId;
2026-01-14 01:08:15 +00:00
};
const advanceAuthProfile = async (): Promise<boolean> => {
if (lockedProfileId) {
return false;
}
2026-01-14 01:08:15 +00:00
let nextIndex = profileIndex + 1;
while (nextIndex < profileCandidates.length) {
const candidate = profileCandidates[nextIndex];
if (candidate && isProfileInCooldown(authStore, candidate)) {
nextIndex += 1;
continue;
}
2026-01-14 01:08:15 +00:00
try {
await applyApiKeyInfo(candidate);
profileIndex = nextIndex;
thinkLevel = initialThinkLevel;
attemptedThinking.clear();
return true;
} catch (err) {
if (candidate && candidate === lockedProfileId) {
throw err;
}
2026-01-14 01:08:15 +00:00
nextIndex += 1;
}
}
return false;
};
try {
while (profileIndex < profileCandidates.length) {
const candidate = profileCandidates[profileIndex];
if (
candidate &&
candidate !== lockedProfileId &&
isProfileInCooldown(authStore, candidate)
) {
profileIndex += 1;
continue;
}
await applyApiKeyInfo(profileCandidates[profileIndex]);
break;
}
if (profileIndex >= profileCandidates.length) {
throwAuthProfileFailover({ allInCooldown: true });
}
2026-01-14 01:08:15 +00:00
} catch (err) {
if (err instanceof FailoverError) {
throw err;
}
if (profileCandidates[profileIndex] === lockedProfileId) {
throwAuthProfileFailover({ allInCooldown: false, error: err });
}
2026-01-14 01:08:15 +00:00
const advanced = await advanceAuthProfile();
if (!advanced) {
throwAuthProfileFailover({ allInCooldown: false, error: err });
}
2026-01-14 01:08:15 +00:00
}
const MAX_OVERFLOW_COMPACTION_ATTEMPTS = 3;
let overflowCompactionAttempts = 0;
fix: recover from context overflow caused by oversized tool results (#11579) * fix: gracefully handle oversized tool results causing context overflow When a subagent reads a very large file or gets a huge tool result (e.g., gh pr diff on a massive PR), it can exceed the model's context window in a single prompt. Auto-compaction can't help because there's no older history to compact — just one giant tool result. This adds two layers of defense: 1. Pre-emptive: Hard cap on tool result size (400K chars ≈ 100K tokens) applied in the session tool result guard before persistence. This prevents extremely large tool results from being stored in full, regardless of model context window size. 2. Recovery: When context overflow is detected and compaction fails, scan session messages for oversized tool results relative to the model's actual context window (30% max share). If found, truncate them in the session via branching (creating a new branch with truncated content) and retry the prompt. The truncation preserves the beginning of the content (most useful for understanding what was read) and appends a notice explaining the truncation and suggesting offset/limit parameters for targeted reads. Includes comprehensive tests for: - Text truncation with newline-boundary awareness - Context-window-proportional size calculation - In-memory message truncation - Oversized detection heuristics - Guard-level size capping during persistence * fix: prep fixes for tool result truncation PR (#11579) (thanks @tyler6204)
2026-02-07 17:40:51 -08:00
let toolResultTruncationAttempted = false;
const usageAccumulator = createUsageAccumulator();
let autoCompactionCount = 0;
2026-01-14 01:08:15 +00:00
try {
while (true) {
attemptedThinking.add(thinkLevel);
await fs.mkdir(resolvedWorkspace, { recursive: true });
2026-01-21 07:28:11 +00:00
const prompt =
provider === "anthropic" ? scrubAnthropicRefusalMagic(params.prompt) : params.prompt;
2026-01-14 01:08:15 +00:00
const attempt = await runEmbeddedAttempt({
sessionId: params.sessionId,
sessionKey: params.sessionKey,
messageChannel: params.messageChannel,
messageProvider: params.messageProvider,
agentAccountId: params.agentAccountId,
messageTo: params.messageTo,
messageThreadId: params.messageThreadId,
2026-01-24 15:35:05 +13:00
groupId: params.groupId,
groupChannel: params.groupChannel,
groupSpace: params.groupSpace,
spawnedBy: params.spawnedBy,
senderIsOwner: params.senderIsOwner,
2026-01-14 01:08:15 +00:00
currentChannelId: params.currentChannelId,
currentThreadTs: params.currentThreadTs,
replyToMode: params.replyToMode,
hasRepliedRef: params.hasRepliedRef,
sessionFile: params.sessionFile,
workspaceDir: resolvedWorkspace,
2026-01-14 01:08:15 +00:00
agentDir,
config: params.config,
skillsSnapshot: params.skillsSnapshot,
2026-01-21 07:28:11 +00:00
prompt,
2026-01-14 01:08:15 +00:00
images: params.images,
disableTools: params.disableTools,
2026-01-14 01:08:15 +00:00
provider,
modelId,
model,
authStorage,
modelRegistry,
agentId: workspaceResolution.agentId,
2026-01-14 01:08:15 +00:00
thinkLevel,
verboseLevel: params.verboseLevel,
reasoningLevel: params.reasoningLevel,
toolResultFormat: resolvedToolResultFormat,
2026-01-18 06:11:38 +00:00
execOverrides: params.execOverrides,
2026-01-14 01:08:15 +00:00
bashElevated: params.bashElevated,
timeoutMs: params.timeoutMs,
runId: params.runId,
abortSignal: params.abortSignal,
shouldEmitToolResult: params.shouldEmitToolResult,
2026-01-17 05:33:27 +00:00
shouldEmitToolOutput: params.shouldEmitToolOutput,
2026-01-14 01:08:15 +00:00
onPartialReply: params.onPartialReply,
onAssistantMessageStart: params.onAssistantMessageStart,
onBlockReply: params.onBlockReply,
onBlockReplyFlush: params.onBlockReplyFlush,
blockReplyBreak: params.blockReplyBreak,
blockReplyChunking: params.blockReplyChunking,
onReasoningStream: params.onReasoningStream,
onToolResult: params.onToolResult,
onAgentEvent: params.onAgentEvent,
extraSystemPrompt: params.extraSystemPrompt,
streamParams: params.streamParams,
2026-01-14 01:08:15 +00:00
ownerNumbers: params.ownerNumbers,
enforceFinalTag: params.enforceFinalTag,
});
const { aborted, promptError, timedOut, sessionIdUsed, lastAssistant } = attempt;
mergeUsageIntoAccumulator(
usageAccumulator,
attempt.attemptUsage ?? normalizeUsage(lastAssistant?.usage as UsageLike),
);
autoCompactionCount += Math.max(0, attempt.compactionCount ?? 0);
const formattedAssistantErrorText = lastAssistant
? formatAssistantErrorText(lastAssistant, {
cfg: params.config,
sessionKey: params.sessionKey ?? params.sessionId,
})
: undefined;
const assistantErrorText =
lastAssistant?.stopReason === "error"
? lastAssistant.errorMessage?.trim() || formattedAssistantErrorText
: undefined;
const contextOverflowError = !aborted
? (() => {
if (promptError) {
const errorText = describeUnknownError(promptError);
if (isContextOverflowError(errorText)) {
return { text: errorText, source: "promptError" as const };
}
// Prompt submission failed with a non-overflow error. Do not
// inspect prior assistant errors from history for this attempt.
return null;
}
if (assistantErrorText && isContextOverflowError(assistantErrorText)) {
return { text: assistantErrorText, source: "assistantError" as const };
}
return null;
})()
: null;
2026-01-14 01:08:15 +00:00
if (contextOverflowError) {
const errorText = contextOverflowError.text;
const msgCount = attempt.messagesSnapshot?.length ?? 0;
log.warn(
`[context-overflow-diag] sessionKey=${params.sessionKey ?? params.sessionId} ` +
`provider=${provider}/${modelId} source=${contextOverflowError.source} ` +
`messages=${msgCount} sessionFile=${params.sessionFile} ` +
`compactionAttempts=${overflowCompactionAttempts} error=${errorText.slice(0, 200)}`,
);
const isCompactionFailure = isCompactionFailureError(errorText);
// Attempt auto-compaction on context overflow (not compaction_failure)
if (
!isCompactionFailure &&
overflowCompactionAttempts < MAX_OVERFLOW_COMPACTION_ATTEMPTS
) {
overflowCompactionAttempts++;
log.warn(
`context overflow detected (attempt ${overflowCompactionAttempts}/${MAX_OVERFLOW_COMPACTION_ATTEMPTS}); attempting auto-compaction for ${provider}/${modelId}`,
);
const compactResult = await compactEmbeddedPiSessionDirect({
sessionId: params.sessionId,
sessionKey: params.sessionKey,
messageChannel: params.messageChannel,
messageProvider: params.messageProvider,
agentAccountId: params.agentAccountId,
authProfileId: lastProfileId,
sessionFile: params.sessionFile,
workspaceDir: resolvedWorkspace,
agentDir,
config: params.config,
skillsSnapshot: params.skillsSnapshot,
senderIsOwner: params.senderIsOwner,
provider,
model: modelId,
thinkLevel,
reasoningLevel: params.reasoningLevel,
bashElevated: params.bashElevated,
extraSystemPrompt: params.extraSystemPrompt,
ownerNumbers: params.ownerNumbers,
});
if (compactResult.compacted) {
autoCompactionCount += 1;
log.info(`auto-compaction succeeded for ${provider}/${modelId}; retrying prompt`);
continue;
}
log.warn(
`auto-compaction failed for ${provider}/${modelId}: ${compactResult.reason ?? "nothing to compact"}`,
);
}
// Fallback: try truncating oversized tool results in the session.
// This handles the case where a single tool result exceeds the
// context window and compaction cannot reduce it further.
if (!toolResultTruncationAttempted) {
const contextWindowTokens = ctxInfo.tokens;
const hasOversized = attempt.messagesSnapshot
? sessionLikelyHasOversizedToolResults({
messages: attempt.messagesSnapshot,
contextWindowTokens,
})
: false;
if (hasOversized) {
toolResultTruncationAttempted = true;
fix: auto-compact on context overflow promptError before returning error (#1627) * fix: detect Anthropic 'Request size exceeds model context window' as context overflow Anthropic now returns 'Request size exceeds model context window' instead of the previously detected 'prompt is too long' format. This new error message was not recognized by isContextOverflowError(), causing auto-compaction to NOT trigger. Users would see the raw error twice without any recovery attempt. Changes: - Add 'exceeds model context window' and 'request size exceeds' to isContextOverflowError() detection patterns - Add tests that fail without the fix, verifying both the raw error string and the JSON-wrapped format from Anthropic's API - Add test for formatAssistantErrorText to ensure the friendly 'Context overflow' message is shown instead of the raw error Note: The upstream pi-ai package (@mariozechner/pi-ai) also needs a fix in its OVERFLOW_PATTERNS regex: /exceeds the context window/i should be changed to /exceeds.*context window/i to match both 'the' and 'model' variants for triggering auto-compaction retry. * fix(tests): remove unused imports and helper from test files Remove WorkspaceBootstrapFile references and _makeFile helper that were incorrectly copied from another test file. These caused type errors and were unrelated to the context overflow detection tests. * fix: trigger auto-compaction on context overflow promptError When the LLM rejects a request with a context overflow error that surfaces as a promptError (thrown exception rather than streamed error), the existing auto-compaction in pi-coding-agent never triggers. This happens because the error bypasses the agent's message_end → agent_end → _checkCompaction path. This fix adds a fallback compaction attempt directly in the run loop: - Detects context overflow in promptError (excluding compaction_failure) - Calls compactEmbeddedPiSessionDirect (bypassing lane queues since already in-lane) - Retries the prompt after successful compaction - Limits to one compaction attempt per run to prevent infinite loops Fixes: context overflow errors shown to user without auto-compaction attempt * style: format compact.ts and run.ts with oxfmt * fix: tighten context overflow match (#1627) (thanks @rodrigouroz) --------- Co-authored-by: Claude <claude@anthropic.com> Co-authored-by: Peter Steinberger <steipete@gmail.com>
2026-01-24 19:09:24 -03:00
log.warn(
`[context-overflow-recovery] Attempting tool result truncation for ${provider}/${modelId} ` +
`(contextWindow=${contextWindowTokens} tokens)`,
fix: auto-compact on context overflow promptError before returning error (#1627) * fix: detect Anthropic 'Request size exceeds model context window' as context overflow Anthropic now returns 'Request size exceeds model context window' instead of the previously detected 'prompt is too long' format. This new error message was not recognized by isContextOverflowError(), causing auto-compaction to NOT trigger. Users would see the raw error twice without any recovery attempt. Changes: - Add 'exceeds model context window' and 'request size exceeds' to isContextOverflowError() detection patterns - Add tests that fail without the fix, verifying both the raw error string and the JSON-wrapped format from Anthropic's API - Add test for formatAssistantErrorText to ensure the friendly 'Context overflow' message is shown instead of the raw error Note: The upstream pi-ai package (@mariozechner/pi-ai) also needs a fix in its OVERFLOW_PATTERNS regex: /exceeds the context window/i should be changed to /exceeds.*context window/i to match both 'the' and 'model' variants for triggering auto-compaction retry. * fix(tests): remove unused imports and helper from test files Remove WorkspaceBootstrapFile references and _makeFile helper that were incorrectly copied from another test file. These caused type errors and were unrelated to the context overflow detection tests. * fix: trigger auto-compaction on context overflow promptError When the LLM rejects a request with a context overflow error that surfaces as a promptError (thrown exception rather than streamed error), the existing auto-compaction in pi-coding-agent never triggers. This happens because the error bypasses the agent's message_end → agent_end → _checkCompaction path. This fix adds a fallback compaction attempt directly in the run loop: - Detects context overflow in promptError (excluding compaction_failure) - Calls compactEmbeddedPiSessionDirect (bypassing lane queues since already in-lane) - Retries the prompt after successful compaction - Limits to one compaction attempt per run to prevent infinite loops Fixes: context overflow errors shown to user without auto-compaction attempt * style: format compact.ts and run.ts with oxfmt * fix: tighten context overflow match (#1627) (thanks @rodrigouroz) --------- Co-authored-by: Claude <claude@anthropic.com> Co-authored-by: Peter Steinberger <steipete@gmail.com>
2026-01-24 19:09:24 -03:00
);
const truncResult = await truncateOversizedToolResultsInSession({
sessionFile: params.sessionFile,
contextWindowTokens,
fix: auto-compact on context overflow promptError before returning error (#1627) * fix: detect Anthropic 'Request size exceeds model context window' as context overflow Anthropic now returns 'Request size exceeds model context window' instead of the previously detected 'prompt is too long' format. This new error message was not recognized by isContextOverflowError(), causing auto-compaction to NOT trigger. Users would see the raw error twice without any recovery attempt. Changes: - Add 'exceeds model context window' and 'request size exceeds' to isContextOverflowError() detection patterns - Add tests that fail without the fix, verifying both the raw error string and the JSON-wrapped format from Anthropic's API - Add test for formatAssistantErrorText to ensure the friendly 'Context overflow' message is shown instead of the raw error Note: The upstream pi-ai package (@mariozechner/pi-ai) also needs a fix in its OVERFLOW_PATTERNS regex: /exceeds the context window/i should be changed to /exceeds.*context window/i to match both 'the' and 'model' variants for triggering auto-compaction retry. * fix(tests): remove unused imports and helper from test files Remove WorkspaceBootstrapFile references and _makeFile helper that were incorrectly copied from another test file. These caused type errors and were unrelated to the context overflow detection tests. * fix: trigger auto-compaction on context overflow promptError When the LLM rejects a request with a context overflow error that surfaces as a promptError (thrown exception rather than streamed error), the existing auto-compaction in pi-coding-agent never triggers. This happens because the error bypasses the agent's message_end → agent_end → _checkCompaction path. This fix adds a fallback compaction attempt directly in the run loop: - Detects context overflow in promptError (excluding compaction_failure) - Calls compactEmbeddedPiSessionDirect (bypassing lane queues since already in-lane) - Retries the prompt after successful compaction - Limits to one compaction attempt per run to prevent infinite loops Fixes: context overflow errors shown to user without auto-compaction attempt * style: format compact.ts and run.ts with oxfmt * fix: tighten context overflow match (#1627) (thanks @rodrigouroz) --------- Co-authored-by: Claude <claude@anthropic.com> Co-authored-by: Peter Steinberger <steipete@gmail.com>
2026-01-24 19:09:24 -03:00
sessionId: params.sessionId,
sessionKey: params.sessionKey,
});
if (truncResult.truncated) {
log.info(
`[context-overflow-recovery] Truncated ${truncResult.truncatedCount} tool result(s); retrying prompt`,
);
// Session is now smaller; allow compaction retries again.
overflowCompactionAttempts = 0;
fix: auto-compact on context overflow promptError before returning error (#1627) * fix: detect Anthropic 'Request size exceeds model context window' as context overflow Anthropic now returns 'Request size exceeds model context window' instead of the previously detected 'prompt is too long' format. This new error message was not recognized by isContextOverflowError(), causing auto-compaction to NOT trigger. Users would see the raw error twice without any recovery attempt. Changes: - Add 'exceeds model context window' and 'request size exceeds' to isContextOverflowError() detection patterns - Add tests that fail without the fix, verifying both the raw error string and the JSON-wrapped format from Anthropic's API - Add test for formatAssistantErrorText to ensure the friendly 'Context overflow' message is shown instead of the raw error Note: The upstream pi-ai package (@mariozechner/pi-ai) also needs a fix in its OVERFLOW_PATTERNS regex: /exceeds the context window/i should be changed to /exceeds.*context window/i to match both 'the' and 'model' variants for triggering auto-compaction retry. * fix(tests): remove unused imports and helper from test files Remove WorkspaceBootstrapFile references and _makeFile helper that were incorrectly copied from another test file. These caused type errors and were unrelated to the context overflow detection tests. * fix: trigger auto-compaction on context overflow promptError When the LLM rejects a request with a context overflow error that surfaces as a promptError (thrown exception rather than streamed error), the existing auto-compaction in pi-coding-agent never triggers. This happens because the error bypasses the agent's message_end → agent_end → _checkCompaction path. This fix adds a fallback compaction attempt directly in the run loop: - Detects context overflow in promptError (excluding compaction_failure) - Calls compactEmbeddedPiSessionDirect (bypassing lane queues since already in-lane) - Retries the prompt after successful compaction - Limits to one compaction attempt per run to prevent infinite loops Fixes: context overflow errors shown to user without auto-compaction attempt * style: format compact.ts and run.ts with oxfmt * fix: tighten context overflow match (#1627) (thanks @rodrigouroz) --------- Co-authored-by: Claude <claude@anthropic.com> Co-authored-by: Peter Steinberger <steipete@gmail.com>
2026-01-24 19:09:24 -03:00
continue;
}
log.warn(
`[context-overflow-recovery] Tool result truncation did not help: ${truncResult.reason ?? "unknown"}`,
fix: auto-compact on context overflow promptError before returning error (#1627) * fix: detect Anthropic 'Request size exceeds model context window' as context overflow Anthropic now returns 'Request size exceeds model context window' instead of the previously detected 'prompt is too long' format. This new error message was not recognized by isContextOverflowError(), causing auto-compaction to NOT trigger. Users would see the raw error twice without any recovery attempt. Changes: - Add 'exceeds model context window' and 'request size exceeds' to isContextOverflowError() detection patterns - Add tests that fail without the fix, verifying both the raw error string and the JSON-wrapped format from Anthropic's API - Add test for formatAssistantErrorText to ensure the friendly 'Context overflow' message is shown instead of the raw error Note: The upstream pi-ai package (@mariozechner/pi-ai) also needs a fix in its OVERFLOW_PATTERNS regex: /exceeds the context window/i should be changed to /exceeds.*context window/i to match both 'the' and 'model' variants for triggering auto-compaction retry. * fix(tests): remove unused imports and helper from test files Remove WorkspaceBootstrapFile references and _makeFile helper that were incorrectly copied from another test file. These caused type errors and were unrelated to the context overflow detection tests. * fix: trigger auto-compaction on context overflow promptError When the LLM rejects a request with a context overflow error that surfaces as a promptError (thrown exception rather than streamed error), the existing auto-compaction in pi-coding-agent never triggers. This happens because the error bypasses the agent's message_end → agent_end → _checkCompaction path. This fix adds a fallback compaction attempt directly in the run loop: - Detects context overflow in promptError (excluding compaction_failure) - Calls compactEmbeddedPiSessionDirect (bypassing lane queues since already in-lane) - Retries the prompt after successful compaction - Limits to one compaction attempt per run to prevent infinite loops Fixes: context overflow errors shown to user without auto-compaction attempt * style: format compact.ts and run.ts with oxfmt * fix: tighten context overflow match (#1627) (thanks @rodrigouroz) --------- Co-authored-by: Claude <claude@anthropic.com> Co-authored-by: Peter Steinberger <steipete@gmail.com>
2026-01-24 19:09:24 -03:00
);
}
2026-01-14 01:08:15 +00:00
}
const kind = isCompactionFailure ? "compaction_failure" : "context_overflow";
return {
payloads: [
{
text:
"Context overflow: prompt too large for the model. " +
"Try again with less input or a larger-context model.",
isError: true,
},
],
meta: {
durationMs: Date.now() - started,
agentMeta: {
sessionId: sessionIdUsed,
provider,
model: model.id,
},
systemPromptReport: attempt.systemPromptReport,
error: { kind, message: errorText },
},
};
}
if (promptError && !aborted) {
const errorText = describeUnknownError(promptError);
// Handle role ordering errors with a user-friendly message
if (/incorrect role information|roles must alternate/i.test(errorText)) {
return {
payloads: [
{
text:
"Message ordering conflict - please try again. " +
"If this persists, use /new to start a fresh session.",
isError: true,
},
],
meta: {
durationMs: Date.now() - started,
agentMeta: {
sessionId: sessionIdUsed,
provider,
model: model.id,
},
systemPromptReport: attempt.systemPromptReport,
error: { kind: "role_ordering", message: errorText },
},
};
}
// Handle image size errors with a user-friendly message (no retry needed)
const imageSizeError = parseImageSizeError(errorText);
if (imageSizeError) {
const maxMb = imageSizeError.maxMb;
const maxMbLabel =
typeof maxMb === "number" && Number.isFinite(maxMb) ? `${maxMb}` : null;
const maxBytesHint = maxMbLabel ? ` (max ${maxMbLabel}MB)` : "";
return {
payloads: [
{
text:
`Image too large for the model${maxBytesHint}. ` +
"Please compress or resize the image and try again.",
isError: true,
},
],
meta: {
durationMs: Date.now() - started,
agentMeta: {
sessionId: sessionIdUsed,
provider,
model: model.id,
},
systemPromptReport: attempt.systemPromptReport,
error: { kind: "image_size", message: errorText },
},
};
}
2026-01-14 01:08:15 +00:00
const promptFailoverReason = classifyFailoverReason(errorText);
if (promptFailoverReason && promptFailoverReason !== "timeout" && lastProfileId) {
2026-01-14 01:08:15 +00:00
await markAuthProfileFailure({
store: authStore,
profileId: lastProfileId,
reason: promptFailoverReason,
cfg: params.config,
agentDir: params.agentDir,
});
}
if (
isFailoverErrorMessage(errorText) &&
promptFailoverReason !== "timeout" &&
(await advanceAuthProfile())
) {
continue;
}
const fallbackThinking = pickFallbackThinkingLevel({
message: errorText,
attempted: attemptedThinking,
});
if (fallbackThinking) {
log.warn(
`unsupported thinking level for ${provider}/${modelId}; retrying with ${fallbackThinking}`,
);
thinkLevel = fallbackThinking;
continue;
}
// FIX: Throw FailoverError for prompt errors when fallbacks configured
// This enables model fallback for quota/rate limit errors during prompt submission
if (fallbackConfigured && isFailoverErrorMessage(errorText)) {
throw new FailoverError(errorText, {
reason: promptFailoverReason ?? "unknown",
provider,
model: modelId,
profileId: lastProfileId,
status: resolveFailoverStatus(promptFailoverReason ?? "unknown"),
});
}
2026-01-14 01:08:15 +00:00
throw promptError;
}
const fallbackThinking = pickFallbackThinkingLevel({
message: lastAssistant?.errorMessage,
attempted: attemptedThinking,
});
if (fallbackThinking && !aborted) {
log.warn(
`unsupported thinking level for ${provider}/${modelId}; retrying with ${fallbackThinking}`,
);
thinkLevel = fallbackThinking;
continue;
}
const authFailure = isAuthAssistantError(lastAssistant);
const rateLimitFailure = isRateLimitAssistantError(lastAssistant);
const billingFailure = isBillingAssistantError(lastAssistant);
2026-01-14 01:08:15 +00:00
const failoverFailure = isFailoverAssistantError(lastAssistant);
const assistantFailoverReason = classifyFailoverReason(lastAssistant?.errorMessage ?? "");
2026-01-14 01:08:15 +00:00
const cloudCodeAssistFormatError = attempt.cloudCodeAssistFormatError;
2026-01-18 15:19:25 +00:00
const imageDimensionError = parseImageDimensionError(lastAssistant?.errorMessage ?? "");
if (imageDimensionError && lastProfileId) {
const details = [
imageDimensionError.messageIndex !== undefined
? `message=${imageDimensionError.messageIndex}`
: null,
imageDimensionError.contentIndex !== undefined
? `content=${imageDimensionError.contentIndex}`
: null,
imageDimensionError.maxDimensionPx !== undefined
? `limit=${imageDimensionError.maxDimensionPx}px`
: null,
]
.filter(Boolean)
.join(" ");
log.warn(
`Profile ${lastProfileId} rejected image payload${details ? ` (${details})` : ""}.`,
);
}
2026-01-14 01:08:15 +00:00
// Treat timeout as potential rate limit (Antigravity hangs on rate limit)
const shouldRotate = (!aborted && failoverFailure) || timedOut;
if (shouldRotate) {
if (lastProfileId) {
const reason =
timedOut || assistantFailoverReason === "timeout"
? "timeout"
: (assistantFailoverReason ?? "unknown");
await markAuthProfileFailure({
store: authStore,
profileId: lastProfileId,
reason,
cfg: params.config,
agentDir: params.agentDir,
});
2026-01-24 00:04:53 +00:00
if (timedOut && !isProbeSession) {
2026-01-14 01:08:15 +00:00
log.warn(
`Profile ${lastProfileId} timed out (possible rate limit). Trying next account...`,
);
}
if (cloudCodeAssistFormatError) {
log.warn(
`Profile ${lastProfileId} hit Cloud Code Assist format error. Tool calls will be sanitized on retry.`,
);
}
}
const rotated = await advanceAuthProfile();
if (rotated) {
continue;
}
2026-01-14 01:08:15 +00:00
if (fallbackConfigured) {
// Prefer formatted error message (user-friendly) over raw errorMessage
2026-01-14 01:08:15 +00:00
const message =
(lastAssistant
? formatAssistantErrorText(lastAssistant, {
cfg: params.config,
sessionKey: params.sessionKey ?? params.sessionId,
})
: undefined) ||
lastAssistant?.errorMessage?.trim() ||
2026-01-14 01:08:15 +00:00
(timedOut
? "LLM request timed out."
: rateLimitFailure
? "LLM request rate limited."
: billingFailure
? BILLING_ERROR_USER_MESSAGE
: authFailure
? "LLM request unauthorized."
: "LLM request failed.");
2026-01-14 01:08:15 +00:00
const status =
resolveFailoverStatus(assistantFailoverReason ?? "unknown") ??
(isTimeoutErrorMessage(message) ? 408 : undefined);
throw new FailoverError(message, {
reason: assistantFailoverReason ?? "unknown",
provider,
model: modelId,
profileId: lastProfileId,
status,
});
}
}
const usage = toNormalizedUsage(usageAccumulator);
2026-01-14 01:08:15 +00:00
const agentMeta: EmbeddedPiAgentMeta = {
sessionId: sessionIdUsed,
provider: lastAssistant?.provider ?? provider,
model: lastAssistant?.model ?? model.id,
usage,
compactionCount: autoCompactionCount > 0 ? autoCompactionCount : undefined,
2026-01-14 01:08:15 +00:00
};
const payloads = buildEmbeddedRunPayloads({
assistantTexts: attempt.assistantTexts,
toolMetas: attempt.toolMetas,
lastAssistant: attempt.lastAssistant,
lastToolError: attempt.lastToolError,
2026-01-14 01:08:15 +00:00
config: params.config,
sessionKey: params.sessionKey ?? params.sessionId,
verboseLevel: params.verboseLevel,
reasoningLevel: params.reasoningLevel,
toolResultFormat: resolvedToolResultFormat,
inlineToolResultsAllowed: false,
2026-01-14 01:08:15 +00:00
});
log.debug(
`embedded run done: runId=${params.runId} sessionId=${params.sessionId} durationMs=${Date.now() - started} aborted=${aborted}`,
);
if (lastProfileId) {
await markAuthProfileGood({
store: authStore,
provider,
profileId: lastProfileId,
agentDir: params.agentDir,
2026-01-14 01:08:15 +00:00
});
await markAuthProfileUsed({
store: authStore,
profileId: lastProfileId,
agentDir: params.agentDir,
2026-01-14 01:08:15 +00:00
});
}
return {
payloads: payloads.length ? payloads : undefined,
meta: {
durationMs: Date.now() - started,
agentMeta,
aborted,
2026-01-15 01:06:19 +00:00
systemPromptReport: attempt.systemPromptReport,
// Handle client tool calls (OpenResponses hosted tools)
stopReason: attempt.clientToolCall ? "tool_calls" : undefined,
pendingToolCalls: attempt.clientToolCall
? [
{
id: `call_${Date.now()}`,
name: attempt.clientToolCall.name,
arguments: JSON.stringify(attempt.clientToolCall.params),
},
]
: undefined,
2026-01-14 01:08:15 +00:00
},
didSendViaMessagingTool: attempt.didSendViaMessagingTool,
messagingToolSentTexts: attempt.messagingToolSentTexts,
messagingToolSentTargets: attempt.messagingToolSentTargets,
};
}
} finally {
process.chdir(prevCwd);
}
}),
);
}