From 957b8830821d6e7113f64ff68cfc6a81ec1787ca Mon Sep 17 00:00:00 2001 From: Vladimir Peshekhonov Date: Fri, 13 Feb 2026 00:53:13 +0100 Subject: [PATCH] fix(agents): stabilize overflow compaction retries and session context accounting (openclaw#14102) thanks @vpesh Verified: - CI checks for commit 86a7ecb45ebf0be61dce9261398000524fd9fab6 - Rebase conflict resolution for compatibility with latest main Co-authored-by: vpesh <9496634+vpesh@users.noreply.github.com> --- ...d-helpers.iscompactionfailureerror.test.ts | 1 + ...lpers.islikelycontextoverflowerror.test.ts | 2 + src/agents/pi-embedded-helpers/errors.ts | 17 +++- .../run.overflow-compaction.test.ts | 80 ++++++++++++++++++- src/agents/pi-embedded-runner/run.ts | 21 +++-- src/agents/pi-embedded-runner/types.ts | 1 + src/agents/usage.test.ts | 15 ++++ src/agents/usage.ts | 22 +++-- src/auto-reply/reply/agent-runner.ts | 2 + src/auto-reply/reply/followup-runner.ts | 2 + src/auto-reply/reply/session-usage.ts | 2 + src/commands/agent/session-store.ts | 2 + src/cron/isolated-agent/run.ts | 2 + 13 files changed, 148 insertions(+), 21 deletions(-) diff --git a/src/agents/pi-embedded-helpers.iscompactionfailureerror.test.ts b/src/agents/pi-embedded-helpers.iscompactionfailureerror.test.ts index 7158d19b990..6abcabba5bd 100644 --- a/src/agents/pi-embedded-helpers.iscompactionfailureerror.test.ts +++ b/src/agents/pi-embedded-helpers.iscompactionfailureerror.test.ts @@ -6,6 +6,7 @@ describe("isCompactionFailureError", () => { 'Context overflow: Summarization failed: 400 {"message":"prompt is too long"}', "auto-compaction failed due to context overflow", "Compaction failed: prompt is too long", + "Summarization failed: context window exceeded for this request", ]; for (const sample of samples) { expect(isCompactionFailureError(sample)).toBe(true); diff --git a/src/agents/pi-embedded-helpers.islikelycontextoverflowerror.test.ts b/src/agents/pi-embedded-helpers.islikelycontextoverflowerror.test.ts index 148f3b95785..e9ff9e457c3 100644 --- a/src/agents/pi-embedded-helpers.islikelycontextoverflowerror.test.ts +++ b/src/agents/pi-embedded-helpers.islikelycontextoverflowerror.test.ts @@ -30,6 +30,8 @@ describe("isLikelyContextOverflowError", () => { "too many requests", "429 Too Many Requests", "exceeded your current quota", + "This request would exceed your account's rate limit", + "429 Too Many Requests: request exceeds rate limit", ]; for (const sample of samples) { expect(isLikelyContextOverflowError(sample)).toBe(false); diff --git a/src/agents/pi-embedded-helpers/errors.ts b/src/agents/pi-embedded-helpers/errors.ts index c9a16eb00ce..d4d0f34e40a 100644 --- a/src/agents/pi-embedded-helpers/errors.ts +++ b/src/agents/pi-embedded-helpers/errors.ts @@ -38,7 +38,9 @@ export function isContextOverflowError(errorMessage?: string): boolean { const CONTEXT_WINDOW_TOO_SMALL_RE = /context window.*(too small|minimum is)/i; const CONTEXT_OVERFLOW_HINT_RE = - /context.*overflow|context window.*(too (?:large|long)|exceed|over|limit|max(?:imum)?|requested|sent|tokens)|(?:prompt|request|input).*(too (?:large|long)|exceed|over|limit|max(?:imum)?)/i; + /context.*overflow|context window.*(too (?:large|long)|exceed|over|limit|max(?:imum)?|requested|sent|tokens)|prompt.*(too (?:large|long)|exceed|over|limit|max(?:imum)?)|(?:request|input).*(?:context|window|length|token).*(too (?:large|long)|exceed|over|limit|max(?:imum)?)/i; +const RATE_LIMIT_HINT_RE = + /rate limit|too many requests|requests per (?:minute|hour|day)|quota|throttl|429\b/i; export function isLikelyContextOverflowError(errorMessage?: string): boolean { if (!errorMessage) { @@ -56,6 +58,9 @@ export function isLikelyContextOverflowError(errorMessage?: string): boolean { if (isContextOverflowError(errorMessage)) { return true; } + if (RATE_LIMIT_HINT_RE.test(errorMessage)) { + return false; + } return CONTEXT_OVERFLOW_HINT_RE.test(errorMessage); } @@ -72,9 +77,13 @@ export function isCompactionFailureError(errorMessage?: string): boolean { if (!hasCompactionTerm) { return false; } - // For compaction failures, also accept "context overflow" without colon - // since the error message itself describes a compaction/summarization failure - return isContextOverflowError(errorMessage) || lower.includes("context overflow"); + // Treat any likely overflow shape as a compaction failure when compaction terms are present. + // Providers often vary wording (e.g. "context window exceeded") across APIs. + if (isLikelyContextOverflowError(errorMessage)) { + return true; + } + // Keep explicit fallback for bare "context overflow" strings. + return lower.includes("context overflow"); } const ERROR_PAYLOAD_PREFIX_RE = diff --git a/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts b/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts index df85d888cf8..059ceb2c453 100644 --- a/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts +++ b/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts @@ -87,7 +87,21 @@ vi.mock("../failover-error.js", () => ({ })); vi.mock("../usage.js", () => ({ - normalizeUsage: vi.fn(() => undefined), + normalizeUsage: vi.fn((usage?: unknown) => + usage && typeof usage === "object" ? usage : undefined, + ), + derivePromptTokens: vi.fn( + (usage?: { input?: number; cacheRead?: number; cacheWrite?: number }) => { + if (!usage) { + return undefined; + } + const input = usage.input ?? 0; + const cacheRead = usage.cacheRead ?? 0; + const cacheWrite = usage.cacheWrite ?? 0; + const sum = input + cacheRead + cacheWrite; + return sum > 0 ? sum : undefined; + }, + ), hasNonzeroUsage: vi.fn(() => false), })); @@ -143,6 +157,18 @@ vi.mock("../pi-embedded-helpers.js", async () => { const lower = msg.toLowerCase(); return lower.includes("request_too_large") || lower.includes("request size exceeds"); }, + isLikelyContextOverflowError: (msg?: string) => { + if (!msg) { + return false; + } + const lower = msg.toLowerCase(); + return ( + lower.includes("request_too_large") || + lower.includes("request size exceeds") || + lower.includes("context window exceeded") || + lower.includes("prompt too large") + ); + }, isFailoverAssistantError: vi.fn(() => false), isFailoverErrorMessage: vi.fn(() => false), isAuthAssistantError: vi.fn(() => false), @@ -249,6 +275,31 @@ describe("overflow compaction in run loop", () => { expect(result.meta.error).toBeUndefined(); }); + it("retries after successful compaction on likely-overflow promptError variants", async () => { + const overflowHintError = new Error("Context window exceeded: requested 12000 tokens"); + + mockedRunEmbeddedAttempt + .mockResolvedValueOnce(makeAttemptResult({ promptError: overflowHintError })) + .mockResolvedValueOnce(makeAttemptResult({ promptError: null })); + + mockedCompactDirect.mockResolvedValueOnce({ + ok: true, + compacted: true, + result: { + summary: "Compacted session", + firstKeptEntryId: "entry-6", + tokensBefore: 140000, + }, + }); + + const result = await runEmbeddedPiAgent(baseParams); + + expect(mockedCompactDirect).toHaveBeenCalledTimes(1); + expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2); + expect(log.warn).toHaveBeenCalledWith(expect.stringContaining("source=promptError")); + expect(result.meta.error).toBeUndefined(); + }); + it("returns error if compaction fails", async () => { const overflowError = new Error("request_too_large: Request size exceeds model context window"); @@ -433,4 +484,31 @@ describe("overflow compaction in run loop", () => { expect(mockedCompactDirect).not.toHaveBeenCalled(); expect(log.warn).not.toHaveBeenCalledWith(expect.stringContaining("source=assistantError")); }); + + it("sets promptTokens from the latest model call usage, not accumulated attempt usage", async () => { + mockedRunEmbeddedAttempt.mockResolvedValue( + makeAttemptResult({ + attemptUsage: { + input: 4_000, + cacheRead: 120_000, + cacheWrite: 0, + total: 124_000, + }, + lastAssistant: { + stopReason: "end_turn", + usage: { + input: 900, + cacheRead: 1_100, + cacheWrite: 0, + total: 2_000, + }, + } as EmbeddedRunAttemptResult["lastAssistant"], + }), + ); + + const result = await runEmbeddedPiAgent(baseParams); + + expect(result.meta.agentMeta?.usage?.input).toBe(4_000); + expect(result.meta.agentMeta?.promptTokens).toBe(2_000); + }); }); diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index d56d188b5b2..467ddba5d96 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -34,7 +34,7 @@ import { isAuthAssistantError, isBillingAssistantError, isCompactionFailureError, - isContextOverflowError, + isLikelyContextOverflowError, isFailoverAssistantError, isFailoverErrorMessage, parseImageSizeError, @@ -44,7 +44,7 @@ import { pickFallbackThinkingLevel, type FailoverReason, } from "../pi-embedded-helpers.js"; -import { normalizeUsage, type UsageLike } from "../usage.js"; +import { derivePromptTokens, normalizeUsage, type UsageLike } from "../usage.js"; import { redactRunIdentifier, resolveRunWorkspaceDir } from "../workspace-run.js"; import { compactEmbeddedPiSessionDirect } from "./compact.js"; import { resolveGlobalLane, resolveSessionLane } from "./lanes.js"; @@ -408,6 +408,7 @@ export async function runEmbeddedPiAgent( let overflowCompactionAttempts = 0; let toolResultTruncationAttempted = false; const usageAccumulator = createUsageAccumulator(); + let lastRunPromptUsage: ReturnType | undefined; let autoCompactionCount = 0; try { while (true) { @@ -475,10 +476,12 @@ export async function runEmbeddedPiAgent( }); const { aborted, promptError, timedOut, sessionIdUsed, lastAssistant } = attempt; - mergeUsageIntoAccumulator( - usageAccumulator, - attempt.attemptUsage ?? normalizeUsage(lastAssistant?.usage as UsageLike), - ); + const lastAssistantUsage = normalizeUsage(lastAssistant?.usage as UsageLike); + const attemptUsage = attempt.attemptUsage ?? lastAssistantUsage; + mergeUsageIntoAccumulator(usageAccumulator, attemptUsage); + // Keep prompt size from the latest model call so session totalTokens + // reflects current context usage, not accumulated tool-loop usage. + lastRunPromptUsage = lastAssistantUsage ?? attemptUsage; autoCompactionCount += Math.max(0, attempt.compactionCount ?? 0); const formattedAssistantErrorText = lastAssistant ? formatAssistantErrorText(lastAssistant, { @@ -496,14 +499,14 @@ export async function runEmbeddedPiAgent( ? (() => { if (promptError) { const errorText = describeUnknownError(promptError); - if (isContextOverflowError(errorText)) { + if (isLikelyContextOverflowError(errorText)) { return { text: errorText, source: "promptError" as const }; } // Prompt submission failed with a non-overflow error. Do not // inspect prior assistant errors from history for this attempt. return null; } - if (assistantErrorText && isContextOverflowError(assistantErrorText)) { + if (assistantErrorText && isLikelyContextOverflowError(assistantErrorText)) { return { text: assistantErrorText, source: "assistantError" as const }; } return null; @@ -826,12 +829,14 @@ export async function runEmbeddedPiAgent( // overstates the actual context size. `lastCallUsage` reflects only // the final call, giving an accurate snapshot of current context. const lastCallUsage = normalizeUsage(lastAssistant?.usage as UsageLike); + const promptTokens = derivePromptTokens(lastRunPromptUsage); const agentMeta: EmbeddedPiAgentMeta = { sessionId: sessionIdUsed, provider: lastAssistant?.provider ?? provider, model: lastAssistant?.model ?? model.id, usage, lastCallUsage: lastCallUsage ?? undefined, + promptTokens, compactionCount: autoCompactionCount > 0 ? autoCompactionCount : undefined, }; diff --git a/src/agents/pi-embedded-runner/types.ts b/src/agents/pi-embedded-runner/types.ts index 2f845de6b06..4c1e2412082 100644 --- a/src/agents/pi-embedded-runner/types.ts +++ b/src/agents/pi-embedded-runner/types.ts @@ -6,6 +6,7 @@ export type EmbeddedPiAgentMeta = { provider: string; model: string; compactionCount?: number; + promptTokens?: number; usage?: { input?: number; output?: number; diff --git a/src/agents/usage.test.ts b/src/agents/usage.test.ts index 8743de718dc..02f24c22212 100644 --- a/src/agents/usage.test.ts +++ b/src/agents/usage.test.ts @@ -74,4 +74,19 @@ describe("normalizeUsage", () => { }), ).toBe(1_550); }); + + it("prefers explicit prompt token overrides", () => { + expect( + deriveSessionTotalTokens({ + usage: { + input: 1_200, + cacheRead: 300, + cacheWrite: 50, + total: 9_999, + }, + promptTokens: 65_000, + contextTokens: 200_000, + }), + ).toBe(65_000); + }); }); diff --git a/src/agents/usage.ts b/src/agents/usage.ts index 7367b99ff35..7e8a4f2ecc9 100644 --- a/src/agents/usage.ts +++ b/src/agents/usage.ts @@ -112,18 +112,24 @@ export function deriveSessionTotalTokens(params: { cacheWrite?: number; }; contextTokens?: number; + promptTokens?: number; }): number | undefined { + const promptOverride = params.promptTokens; + const hasPromptOverride = + typeof promptOverride === "number" && Number.isFinite(promptOverride) && promptOverride > 0; const usage = params.usage; - if (!usage) { + if (!usage && !hasPromptOverride) { return undefined; } - const input = usage.input ?? 0; - const promptTokens = derivePromptTokens({ - input: usage.input, - cacheRead: usage.cacheRead, - cacheWrite: usage.cacheWrite, - }); - let total = promptTokens ?? usage.total ?? input; + const input = usage?.input ?? 0; + const promptTokens = hasPromptOverride + ? promptOverride + : derivePromptTokens({ + input: usage?.input, + cacheRead: usage?.cacheRead, + cacheWrite: usage?.cacheWrite, + }); + let total = promptTokens ?? usage?.total ?? input; if (!(total > 0)) { return undefined; } diff --git a/src/auto-reply/reply/agent-runner.ts b/src/auto-reply/reply/agent-runner.ts index 9f0db997534..73a380e705c 100644 --- a/src/auto-reply/reply/agent-runner.ts +++ b/src/auto-reply/reply/agent-runner.ts @@ -371,6 +371,7 @@ export async function runReplyAgent(params: { } const usage = runResult.meta.agentMeta?.usage; + const promptTokens = runResult.meta.agentMeta?.promptTokens; const modelUsed = runResult.meta.agentMeta?.model ?? fallbackModel ?? defaultModel; const providerUsed = runResult.meta.agentMeta?.provider ?? fallbackProvider ?? followupRun.run.provider; @@ -388,6 +389,7 @@ export async function runReplyAgent(params: { sessionKey, usage, lastCallUsage: runResult.meta.agentMeta?.lastCallUsage, + promptTokens, modelUsed, providerUsed, contextTokensUsed, diff --git a/src/auto-reply/reply/followup-runner.ts b/src/auto-reply/reply/followup-runner.ts index eb8ce09fa86..cdc392369e6 100644 --- a/src/auto-reply/reply/followup-runner.ts +++ b/src/auto-reply/reply/followup-runner.ts @@ -194,6 +194,7 @@ export function createFollowupRunner(params: { } const usage = runResult.meta.agentMeta?.usage; + const promptTokens = runResult.meta.agentMeta?.promptTokens; const modelUsed = runResult.meta.agentMeta?.model ?? fallbackModel ?? defaultModel; const contextTokensUsed = agentCfgContextTokens ?? @@ -207,6 +208,7 @@ export function createFollowupRunner(params: { sessionKey, usage, lastCallUsage: runResult.meta.agentMeta?.lastCallUsage, + promptTokens, modelUsed, providerUsed: fallbackProvider, contextTokensUsed, diff --git a/src/auto-reply/reply/session-usage.ts b/src/auto-reply/reply/session-usage.ts index 2922564b71c..d5408870e37 100644 --- a/src/auto-reply/reply/session-usage.ts +++ b/src/auto-reply/reply/session-usage.ts @@ -25,6 +25,7 @@ export async function persistSessionUsageUpdate(params: { modelUsed?: string; providerUsed?: string; contextTokensUsed?: number; + promptTokens?: number; systemPromptReport?: SessionSystemPromptReport; cliSessionId?: string; logLabel?: string; @@ -56,6 +57,7 @@ export async function persistSessionUsageUpdate(params: { deriveSessionTotalTokens({ usage: usageForContext, contextTokens: resolvedContextTokens, + promptTokens: params.promptTokens, }) ?? input, modelProvider: params.providerUsed ?? entry.modelProvider, model: params.modelUsed ?? entry.model, diff --git a/src/commands/agent/session-store.ts b/src/commands/agent/session-store.ts index af0c24ae59b..48657bba197 100644 --- a/src/commands/agent/session-store.ts +++ b/src/commands/agent/session-store.ts @@ -37,6 +37,7 @@ export async function updateSessionStoreAfterAgentRun(params: { } = params; const usage = result.meta.agentMeta?.usage; + const promptTokens = result.meta.agentMeta?.promptTokens; const compactionsThisRun = Math.max(0, result.meta.agentMeta?.compactionCount ?? 0); const modelUsed = result.meta.agentMeta?.model ?? fallbackModel ?? defaultModel; const providerUsed = result.meta.agentMeta?.provider ?? fallbackProvider ?? defaultProvider; @@ -71,6 +72,7 @@ export async function updateSessionStoreAfterAgentRun(params: { deriveSessionTotalTokens({ usage, contextTokens, + promptTokens, }) ?? input; } if (compactionsThisRun > 0) { diff --git a/src/cron/isolated-agent/run.ts b/src/cron/isolated-agent/run.ts index 015ee6d511b..9029ae29f64 100644 --- a/src/cron/isolated-agent/run.ts +++ b/src/cron/isolated-agent/run.ts @@ -456,6 +456,7 @@ export async function runCronIsolatedAgentTurn(params: { // Update token+model fields in the session store. { const usage = runResult.meta.agentMeta?.usage; + const promptTokens = runResult.meta.agentMeta?.promptTokens; const modelUsed = runResult.meta.agentMeta?.model ?? fallbackModel ?? model; const providerUsed = runResult.meta.agentMeta?.provider ?? fallbackProvider ?? provider; const contextTokens = @@ -479,6 +480,7 @@ export async function runCronIsolatedAgentTurn(params: { deriveSessionTotalTokens({ usage, contextTokens, + promptTokens, }) ?? input; } await persistSessionEntry();