From d7a92284ff49a2ed12b994d17649ab46a353011a Mon Sep 17 00:00:00 2001 From: Joey Krug Date: Sat, 14 Mar 2026 13:13:20 -0400 Subject: [PATCH 1/8] fix: trigger compaction on LLM timeout with high context usage When the LLM times out and context usage is high (>65% of the context window), the system retries with the same oversized context, causing a death spiral of repeated timeouts. This adds timeout-triggered compaction: after a timeout with high context usage, compact the context before retrying to break the cycle. Also fixes missing applyLocalNoAuthHeaderOverride mock in overflow compaction test shared mocks. Co-Authored-By: Claude Opus 4.6 --- .../run.timeout-triggered-compaction.test.ts | 226 ++++++++++++++++++ src/agents/pi-embedded-runner/run.ts | 69 ++++++ 2 files changed, 295 insertions(+) create mode 100644 src/agents/pi-embedded-runner/run.timeout-triggered-compaction.test.ts diff --git a/src/agents/pi-embedded-runner/run.timeout-triggered-compaction.test.ts b/src/agents/pi-embedded-runner/run.timeout-triggered-compaction.test.ts new file mode 100644 index 00000000000..65192a15a6b --- /dev/null +++ b/src/agents/pi-embedded-runner/run.timeout-triggered-compaction.test.ts @@ -0,0 +1,226 @@ +import { beforeAll, beforeEach, describe, expect, it } from "vitest"; +import { makeAttemptResult, makeCompactionSuccess } from "./run.overflow-compaction.fixture.js"; +import { + loadRunOverflowCompactionHarness, + mockedCoerceToFailoverError, + mockedDescribeFailoverError, + mockedGlobalHookRunner, + mockedResolveFailoverStatus, + mockedContextEngine, + mockedCompactDirect, + mockedRunEmbeddedAttempt, + resetRunOverflowCompactionHarnessMocks, + mockedSessionLikelyHasOversizedToolResults, + mockedTruncateOversizedToolResultsInSession, + overflowBaseRunParams, +} from "./run.overflow-compaction.harness.js"; + +let runEmbeddedPiAgent: typeof import("./run.js").runEmbeddedPiAgent; + +describe("timeout-triggered compaction", () => { + beforeAll(async () => { + ({ runEmbeddedPiAgent } = await loadRunOverflowCompactionHarness()); + }); + + beforeEach(() => { + resetRunOverflowCompactionHarnessMocks(); + mockedRunEmbeddedAttempt.mockReset(); + mockedCompactDirect.mockReset(); + mockedCoerceToFailoverError.mockReset(); + mockedDescribeFailoverError.mockReset(); + mockedResolveFailoverStatus.mockReset(); + mockedSessionLikelyHasOversizedToolResults.mockReset(); + mockedTruncateOversizedToolResultsInSession.mockReset(); + mockedGlobalHookRunner.runBeforeAgentStart.mockReset(); + mockedGlobalHookRunner.runBeforeCompaction.mockReset(); + mockedGlobalHookRunner.runAfterCompaction.mockReset(); + mockedContextEngine.info.ownsCompaction = false; + mockedCompactDirect.mockResolvedValue({ + ok: false, + compacted: false, + reason: "nothing to compact", + }); + mockedCoerceToFailoverError.mockReturnValue(null); + mockedDescribeFailoverError.mockImplementation((err: unknown) => ({ + message: err instanceof Error ? err.message : String(err), + reason: undefined, + status: undefined, + code: undefined, + })); + mockedSessionLikelyHasOversizedToolResults.mockReturnValue(false); + mockedTruncateOversizedToolResultsInSession.mockResolvedValue({ + truncated: false, + truncatedCount: 0, + reason: "no oversized tool results", + }); + mockedGlobalHookRunner.hasHooks.mockImplementation(() => false); + }); + + it("attempts compaction when LLM times out with high context usage (>65%)", async () => { + // First attempt: timeout with high usage (150k / 200k = 75%) + mockedRunEmbeddedAttempt.mockResolvedValueOnce( + makeAttemptResult({ + timedOut: true, + lastAssistant: { + usage: { total: 150000 }, + } as never, + }), + ); + // Compaction succeeds + mockedCompactDirect.mockResolvedValueOnce( + makeCompactionSuccess({ + summary: "timeout recovery compaction", + tokensBefore: 150000, + tokensAfter: 80000, + }), + ); + // Retry after compaction succeeds + mockedRunEmbeddedAttempt.mockResolvedValueOnce(makeAttemptResult({ promptError: null })); + + const result = await runEmbeddedPiAgent(overflowBaseRunParams); + + expect(mockedCompactDirect).toHaveBeenCalledTimes(1); + expect(mockedCompactDirect).toHaveBeenCalledWith( + expect.objectContaining({ + sessionId: "test-session", + sessionFile: "/tmp/session.json", + tokenBudget: 200000, + force: true, + compactionTarget: "budget", + runtimeContext: expect.objectContaining({ + trigger: "timeout_recovery", + attempt: 1, + maxAttempts: 1, + }), + }), + ); + expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2); + expect(result.meta.error).toBeUndefined(); + }); + + it("retries the prompt after successful timeout compaction", async () => { + // First attempt: timeout with high usage + mockedRunEmbeddedAttempt.mockResolvedValueOnce( + makeAttemptResult({ + timedOut: true, + lastAssistant: { + usage: { total: 160000 }, + } as never, + }), + ); + // Compaction succeeds + mockedCompactDirect.mockResolvedValueOnce( + makeCompactionSuccess({ + summary: "compacted for timeout", + tokensBefore: 160000, + tokensAfter: 60000, + }), + ); + // Second attempt succeeds + mockedRunEmbeddedAttempt.mockResolvedValueOnce(makeAttemptResult({ promptError: null })); + + const result = await runEmbeddedPiAgent(overflowBaseRunParams); + + // Verify the loop continued (retry happened) + expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2); + expect(result.meta.error).toBeUndefined(); + }); + + it("falls through to normal handling when timeout compaction fails", async () => { + // Timeout with high usage + mockedRunEmbeddedAttempt.mockResolvedValueOnce( + makeAttemptResult({ + timedOut: true, + lastAssistant: { + usage: { total: 150000 }, + } as never, + }), + ); + // Compaction does not reduce context + mockedCompactDirect.mockResolvedValueOnce({ + ok: false, + compacted: false, + reason: "nothing to compact", + }); + + const result = await runEmbeddedPiAgent(overflowBaseRunParams); + + // Compaction was attempted but failed → falls through to timeout error payload + expect(mockedCompactDirect).toHaveBeenCalledTimes(1); + expect(result.payloads?.[0]?.isError).toBe(true); + expect(result.payloads?.[0]?.text).toContain("timed out"); + }); + + it("does not attempt compaction when context usage is low", async () => { + // Timeout with low usage (20k / 200k = 10%) + mockedRunEmbeddedAttempt.mockResolvedValueOnce( + makeAttemptResult({ + timedOut: true, + lastAssistant: { + usage: { total: 20000 }, + } as never, + }), + ); + + const result = await runEmbeddedPiAgent(overflowBaseRunParams); + + // No compaction attempt for low usage + expect(mockedCompactDirect).not.toHaveBeenCalled(); + expect(result.payloads?.[0]?.isError).toBe(true); + expect(result.payloads?.[0]?.text).toContain("timed out"); + }); + + it("does not attempt compaction when aborted", async () => { + mockedRunEmbeddedAttempt.mockResolvedValueOnce( + makeAttemptResult({ + timedOut: true, + aborted: true, + lastAssistant: { + usage: { total: 180000 }, + } as never, + }), + ); + + await runEmbeddedPiAgent(overflowBaseRunParams); + + expect(mockedCompactDirect).not.toHaveBeenCalled(); + }); + + it("does not attempt compaction when timedOutDuringCompaction is true", async () => { + mockedRunEmbeddedAttempt.mockResolvedValueOnce( + makeAttemptResult({ + timedOut: true, + timedOutDuringCompaction: true, + lastAssistant: { + usage: { total: 180000 }, + } as never, + }), + ); + + await runEmbeddedPiAgent(overflowBaseRunParams); + + // timedOutDuringCompaction skips timeout-triggered compaction + expect(mockedCompactDirect).not.toHaveBeenCalled(); + }); + + it("catches thrown errors from contextEngine.compact during timeout recovery", async () => { + // Timeout with high usage + mockedRunEmbeddedAttempt.mockResolvedValueOnce( + makeAttemptResult({ + timedOut: true, + lastAssistant: { + usage: { total: 150000 }, + } as never, + }), + ); + // Compaction throws + mockedCompactDirect.mockRejectedValueOnce(new Error("engine crashed")); + + const result = await runEmbeddedPiAgent(overflowBaseRunParams); + + // Should not crash — falls through to normal timeout handling + expect(mockedCompactDirect).toHaveBeenCalledTimes(1); + expect(result.payloads?.[0]?.isError).toBe(true); + expect(result.payloads?.[0]?.text).toContain("timed out"); + }); +}); diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index a35c03d98ca..4936ebc6ce4 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -1047,6 +1047,75 @@ export async function runEmbeddedPiAgent( ? lastAssistant.errorMessage?.trim() || formattedAssistantErrorText : undefined; + // ── Timeout-triggered compaction ────────────────────────────────── + // When the LLM times out with high context usage, compact before + // retrying to break the death spiral of repeated timeouts. + if (timedOut && !aborted && !timedOutDuringCompaction) { + const tokenUsedRatio = + lastTurnTotal != null && ctxInfo.tokens > 0 ? lastTurnTotal / ctxInfo.tokens : 0; + if ( + tokenUsedRatio > 0.65 || + (overflowCompactionAttempts === 0 && runLoopIterations > 1) + ) { + const timeoutDiagId = createCompactionDiagId(); + log.warn( + `[timeout-compaction] LLM timed out with high context usage (${Math.round(tokenUsedRatio * 100)}%); ` + + `attempting compaction before retry diagId=${timeoutDiagId}`, + ); + let timeoutCompactResult: Awaited>; + try { + timeoutCompactResult = await contextEngine.compact({ + sessionId: params.sessionId, + sessionKey: params.sessionKey, + sessionFile: params.sessionFile, + tokenBudget: ctxInfo.tokens, + force: true, + compactionTarget: "budget", + runtimeContext: { + sessionKey: params.sessionKey, + messageChannel: params.messageChannel, + messageProvider: params.messageProvider, + agentAccountId: params.agentAccountId, + authProfileId: lastProfileId, + workspaceDir: resolvedWorkspace, + agentDir, + config: params.config, + skillsSnapshot: params.skillsSnapshot, + senderIsOwner: params.senderIsOwner, + provider, + model: modelId, + runId: params.runId, + thinkLevel, + reasoningLevel: params.reasoningLevel, + bashElevated: params.bashElevated, + extraSystemPrompt: params.extraSystemPrompt, + ownerNumbers: params.ownerNumbers, + trigger: "timeout_recovery", + diagId: timeoutDiagId, + attempt: 1, + maxAttempts: 1, + }, + }); + } catch (compactErr) { + log.warn( + `[timeout-compaction] contextEngine.compact() threw during timeout recovery for ${provider}/${modelId}: ${String(compactErr)}`, + ); + timeoutCompactResult = { ok: false, compacted: false, reason: String(compactErr) }; + } + if (timeoutCompactResult.compacted) { + autoCompactionCount += 1; + log.info( + `[timeout-compaction] compaction succeeded for ${provider}/${modelId}; retrying prompt`, + ); + continue; + } else { + log.warn( + `[timeout-compaction] compaction did not reduce context for ${provider}/${modelId}; falling through to normal handling`, + ); + } + } + } + const contextOverflowError = !aborted ? (() => { if (promptError) { From 2ada811e9d713a7ce1e107a8337c67cd41c626c5 Mon Sep 17 00:00:00 2001 From: Joey Krug Date: Sat, 14 Mar 2026 14:50:37 -0400 Subject: [PATCH 2/8] Pi Runner: fix timeout compaction review feedback --- .../run.overflow-compaction.test.ts | 2 + .../run.timeout-triggered-compaction.test.ts | 73 +++++++++++++++ src/agents/pi-embedded-runner/run.ts | 93 ++++++++++--------- 3 files changed, 125 insertions(+), 43 deletions(-) diff --git a/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts b/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts index 1f5f0b6de35..caaf175f85f 100644 --- a/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts +++ b/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts @@ -44,6 +44,7 @@ describe("runEmbeddedPiAgent overflow compaction trigger routing", () => { mockedGlobalHookRunner.runBeforeAgentStart.mockReset(); mockedGlobalHookRunner.runBeforeCompaction.mockReset(); mockedGlobalHookRunner.runAfterCompaction.mockReset(); + mockedPickFallbackThinkingLevel.mockReset(); mockedContextEngine.info.ownsCompaction = false; mockedCompactDirect.mockResolvedValue({ ok: false, @@ -63,6 +64,7 @@ describe("runEmbeddedPiAgent overflow compaction trigger routing", () => { truncatedCount: 0, reason: "no oversized tool results", }); + mockedPickFallbackThinkingLevel.mockReturnValue(undefined); mockedGlobalHookRunner.hasHooks.mockImplementation(() => false); }); diff --git a/src/agents/pi-embedded-runner/run.timeout-triggered-compaction.test.ts b/src/agents/pi-embedded-runner/run.timeout-triggered-compaction.test.ts index 65192a15a6b..24b7720ed81 100644 --- a/src/agents/pi-embedded-runner/run.timeout-triggered-compaction.test.ts +++ b/src/agents/pi-embedded-runner/run.timeout-triggered-compaction.test.ts @@ -9,6 +9,7 @@ import { mockedContextEngine, mockedCompactDirect, mockedRunEmbeddedAttempt, + mockedPickFallbackThinkingLevel, resetRunOverflowCompactionHarnessMocks, mockedSessionLikelyHasOversizedToolResults, mockedTruncateOversizedToolResultsInSession, @@ -34,6 +35,7 @@ describe("timeout-triggered compaction", () => { mockedGlobalHookRunner.runBeforeAgentStart.mockReset(); mockedGlobalHookRunner.runBeforeCompaction.mockReset(); mockedGlobalHookRunner.runAfterCompaction.mockReset(); + mockedPickFallbackThinkingLevel.mockReset(); mockedContextEngine.info.ownsCompaction = false; mockedCompactDirect.mockResolvedValue({ ok: false, @@ -53,6 +55,7 @@ describe("timeout-triggered compaction", () => { truncatedCount: 0, reason: "no oversized tool results", }); + mockedPickFallbackThinkingLevel.mockReturnValue(undefined); mockedGlobalHookRunner.hasHooks.mockImplementation(() => false); }); @@ -170,6 +173,31 @@ describe("timeout-triggered compaction", () => { expect(result.payloads?.[0]?.text).toContain("timed out"); }); + it("does not attempt compaction for low-context timeouts on later retries", async () => { + mockedPickFallbackThinkingLevel.mockReturnValueOnce("low"); + mockedRunEmbeddedAttempt + .mockResolvedValueOnce( + makeAttemptResult({ + promptError: new Error("unsupported reasoning mode"), + }), + ) + .mockResolvedValueOnce( + makeAttemptResult({ + timedOut: true, + lastAssistant: { + usage: { total: 20000 }, + } as never, + }), + ); + + const result = await runEmbeddedPiAgent(overflowBaseRunParams); + + expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2); + expect(mockedCompactDirect).not.toHaveBeenCalled(); + expect(result.payloads?.[0]?.isError).toBe(true); + expect(result.payloads?.[0]?.text).toContain("timed out"); + }); + it("does not attempt compaction when aborted", async () => { mockedRunEmbeddedAttempt.mockResolvedValueOnce( makeAttemptResult({ @@ -223,4 +251,49 @@ describe("timeout-triggered compaction", () => { expect(result.payloads?.[0]?.isError).toBe(true); expect(result.payloads?.[0]?.text).toContain("timed out"); }); + + it("fires compaction hooks during timeout recovery for ownsCompaction engines", async () => { + mockedContextEngine.info.ownsCompaction = true; + mockedGlobalHookRunner.hasHooks.mockImplementation( + (hookName) => hookName === "before_compaction" || hookName === "after_compaction", + ); + mockedRunEmbeddedAttempt + .mockResolvedValueOnce( + makeAttemptResult({ + timedOut: true, + lastAssistant: { + usage: { total: 160000 }, + } as never, + }), + ) + .mockResolvedValueOnce(makeAttemptResult({ promptError: null })); + mockedCompactDirect.mockResolvedValueOnce({ + ok: true, + compacted: true, + result: { + summary: "engine-owned timeout compaction", + tokensAfter: 70, + }, + }); + + await runEmbeddedPiAgent(overflowBaseRunParams); + + expect(mockedGlobalHookRunner.runBeforeCompaction).toHaveBeenCalledWith( + { messageCount: -1, sessionFile: "/tmp/session.json" }, + expect.objectContaining({ + sessionKey: "test-key", + }), + ); + expect(mockedGlobalHookRunner.runAfterCompaction).toHaveBeenCalledWith( + { + messageCount: -1, + compactedCount: -1, + tokenCount: 70, + sessionFile: "/tmp/session.json", + }, + expect.objectContaining({ + sessionKey: "test-key", + }), + ); + }); }); diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index 4936ebc6ce4..3309a7af1fb 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -881,6 +881,51 @@ export async function runEmbeddedPiAgent( ensureContextEnginesInitialized(); const contextEngine = await resolveContextEngine(params.config); try { + // When the engine owns compaction, compactEmbeddedPiSessionDirect is + // bypassed. Fire lifecycle hooks here so recovery paths still notify + // subscribers like memory extensions and usage trackers. + const runOwnsCompactionBeforeHook = async (reason: string) => { + if ( + contextEngine.info.ownsCompaction !== true || + !hookRunner?.hasHooks("before_compaction") + ) { + return; + } + try { + await hookRunner.runBeforeCompaction( + { messageCount: -1, sessionFile: params.sessionFile }, + hookCtx, + ); + } catch (hookErr) { + log.warn(`before_compaction hook failed during ${reason}: ${String(hookErr)}`); + } + }; + const runOwnsCompactionAfterHook = async ( + reason: string, + compactResult: Awaited>, + ) => { + if ( + contextEngine.info.ownsCompaction !== true || + !compactResult.ok || + !compactResult.compacted || + !hookRunner?.hasHooks("after_compaction") + ) { + return; + } + try { + await hookRunner.runAfterCompaction( + { + messageCount: -1, + compactedCount: -1, + tokenCount: compactResult.result?.tokensAfter, + sessionFile: params.sessionFile, + }, + hookCtx, + ); + } catch (hookErr) { + log.warn(`after_compaction hook failed during ${reason}: ${String(hookErr)}`); + } + }; let authRetryPending = false; // Hoisted so the retry-limit error path can use the most recent API total. let lastTurnTotal: number | undefined; @@ -1053,16 +1098,14 @@ export async function runEmbeddedPiAgent( if (timedOut && !aborted && !timedOutDuringCompaction) { const tokenUsedRatio = lastTurnTotal != null && ctxInfo.tokens > 0 ? lastTurnTotal / ctxInfo.tokens : 0; - if ( - tokenUsedRatio > 0.65 || - (overflowCompactionAttempts === 0 && runLoopIterations > 1) - ) { + if (tokenUsedRatio > 0.65) { const timeoutDiagId = createCompactionDiagId(); log.warn( `[timeout-compaction] LLM timed out with high context usage (${Math.round(tokenUsedRatio * 100)}%); ` + `attempting compaction before retry diagId=${timeoutDiagId}`, ); let timeoutCompactResult: Awaited>; + await runOwnsCompactionBeforeHook("timeout recovery"); try { timeoutCompactResult = await contextEngine.compact({ sessionId: params.sessionId, @@ -1102,6 +1145,7 @@ export async function runEmbeddedPiAgent( ); timeoutCompactResult = { ok: false, compacted: false, reason: String(compactErr) }; } + await runOwnsCompactionAfterHook("timeout recovery", timeoutCompactResult); if (timeoutCompactResult.compacted) { autoCompactionCount += 1; log.info( @@ -1181,24 +1225,7 @@ export async function runEmbeddedPiAgent( `context overflow detected (attempt ${overflowCompactionAttempts}/${MAX_OVERFLOW_COMPACTION_ATTEMPTS}); attempting auto-compaction for ${provider}/${modelId}`, ); let compactResult: Awaited>; - // When the engine owns compaction, hooks are not fired inside - // compactEmbeddedPiSessionDirect (which is bypassed). Fire them - // here so subscribers (memory extensions, usage trackers) are - // notified even on overflow-recovery compactions. - const overflowEngineOwnsCompaction = contextEngine.info.ownsCompaction === true; - const overflowHookRunner = overflowEngineOwnsCompaction ? hookRunner : null; - if (overflowHookRunner?.hasHooks("before_compaction")) { - try { - await overflowHookRunner.runBeforeCompaction( - { messageCount: -1, sessionFile: params.sessionFile }, - hookCtx, - ); - } catch (hookErr) { - log.warn( - `before_compaction hook failed during overflow recovery: ${String(hookErr)}`, - ); - } - } + await runOwnsCompactionBeforeHook("overflow recovery"); try { compactResult = await contextEngine.compact({ sessionId: params.sessionId, @@ -1250,27 +1277,7 @@ export async function runEmbeddedPiAgent( ); compactResult = { ok: false, compacted: false, reason: String(compactErr) }; } - if ( - compactResult.ok && - compactResult.compacted && - overflowHookRunner?.hasHooks("after_compaction") - ) { - try { - await overflowHookRunner.runAfterCompaction( - { - messageCount: -1, - compactedCount: -1, - tokenCount: compactResult.result?.tokensAfter, - sessionFile: params.sessionFile, - }, - hookCtx, - ); - } catch (hookErr) { - log.warn( - `after_compaction hook failed during overflow recovery: ${String(hookErr)}`, - ); - } - } + await runOwnsCompactionAfterHook("overflow recovery", compactResult); if (compactResult.compacted) { autoCompactionCount += 1; log.info(`auto-compaction succeeded for ${provider}/${modelId}; retrying prompt`); From 0fa1aea357841b8da7eb78ec83922ddab3a37e18 Mon Sep 17 00:00:00 2001 From: Joey Krug Date: Sat, 14 Mar 2026 16:13:10 -0400 Subject: [PATCH 3/8] fix: cap timeout-compaction retries to prevent rotation bypass MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When timeout-triggered compaction succeeds, the run loop retries the same model via `continue`. Without a cap, repeated timeouts with successful compaction (e.g. force: true on long sessions) create an infinite compact→retry→timeout→compact loop that never falls through to the failover rotation path (shouldRotate). Add a timeoutCompactionAttempts counter (max 1) so after one successful timeout compaction, subsequent timeouts skip compaction and fall through to profile/model rotation. Co-Authored-By: Claude Opus 4.6 --- .../run.timeout-triggered-compaction.test.ts | 39 +++++++++++++++++++ src/agents/pi-embedded-runner/run.ts | 9 ++++- 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/src/agents/pi-embedded-runner/run.timeout-triggered-compaction.test.ts b/src/agents/pi-embedded-runner/run.timeout-triggered-compaction.test.ts index 24b7720ed81..b92c31ba602 100644 --- a/src/agents/pi-embedded-runner/run.timeout-triggered-compaction.test.ts +++ b/src/agents/pi-embedded-runner/run.timeout-triggered-compaction.test.ts @@ -231,6 +231,45 @@ describe("timeout-triggered compaction", () => { expect(mockedCompactDirect).not.toHaveBeenCalled(); }); + it("falls through to failover rotation after max timeout compaction attempts", async () => { + // First attempt: timeout with high usage (150k / 200k = 75%) + mockedRunEmbeddedAttempt.mockResolvedValueOnce( + makeAttemptResult({ + timedOut: true, + lastAssistant: { + usage: { total: 150000 }, + } as never, + }), + ); + // Compaction succeeds on first timeout + mockedCompactDirect.mockResolvedValueOnce( + makeCompactionSuccess({ + summary: "timeout recovery compaction", + tokensBefore: 150000, + tokensAfter: 80000, + }), + ); + // Second attempt after compaction: also times out with high usage + mockedRunEmbeddedAttempt.mockResolvedValueOnce( + makeAttemptResult({ + timedOut: true, + lastAssistant: { + usage: { total: 140000 }, + } as never, + }), + ); + + const result = await runEmbeddedPiAgent(overflowBaseRunParams); + + // Compaction was only attempted once (first timeout); second timeout + // should NOT trigger compaction because the counter is exhausted. + expect(mockedCompactDirect).toHaveBeenCalledTimes(1); + expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2); + // Falls through to timeout error payload (failover rotation path) + expect(result.payloads?.[0]?.isError).toBe(true); + expect(result.payloads?.[0]?.text).toContain("timed out"); + }); + it("catches thrown errors from contextEngine.compact during timeout recovery", async () => { // Timeout with high usage mockedRunEmbeddedAttempt.mockResolvedValueOnce( diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index 3309a7af1fb..f3b47d7dbb5 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -815,6 +815,7 @@ export async function runEmbeddedPiAgent( } }; + const MAX_TIMEOUT_COMPACTION_ATTEMPTS = 1; const MAX_OVERFLOW_COMPACTION_ATTEMPTS = 3; const MAX_RUN_LOOP_ITERATIONS = resolveMaxRunRetryIterations(profileCandidates.length); let overflowCompactionAttempts = 0; @@ -827,6 +828,7 @@ export async function runEmbeddedPiAgent( let autoCompactionCount = 0; let runLoopIterations = 0; let overloadFailoverAttempts = 0; + let timeoutCompactionAttempts = 0; const maybeMarkAuthProfileFailure = async (failure: { profileId?: string; reason?: AuthProfileFailureReason | null; @@ -1098,7 +1100,11 @@ export async function runEmbeddedPiAgent( if (timedOut && !aborted && !timedOutDuringCompaction) { const tokenUsedRatio = lastTurnTotal != null && ctxInfo.tokens > 0 ? lastTurnTotal / ctxInfo.tokens : 0; - if (tokenUsedRatio > 0.65) { + if (timeoutCompactionAttempts >= MAX_TIMEOUT_COMPACTION_ATTEMPTS) { + log.warn( + `[timeout-compaction] already compacted ${timeoutCompactionAttempts} time(s) for timeouts; falling through to failover rotation`, + ); + } else if (tokenUsedRatio > 0.65) { const timeoutDiagId = createCompactionDiagId(); log.warn( `[timeout-compaction] LLM timed out with high context usage (${Math.round(tokenUsedRatio * 100)}%); ` + @@ -1148,6 +1154,7 @@ export async function runEmbeddedPiAgent( await runOwnsCompactionAfterHook("timeout recovery", timeoutCompactResult); if (timeoutCompactResult.compacted) { autoCompactionCount += 1; + timeoutCompactionAttempts += 1; log.info( `[timeout-compaction] compaction succeeded for ${provider}/${modelId}; retrying prompt`, ); From cedbcf5f76efcf30ae67c758c653dfc7795a04b0 Mon Sep 17 00:00:00 2001 From: Joey Krug Date: Sat, 14 Mar 2026 16:37:20 -0400 Subject: [PATCH 4/8] fix: use prompt tokens for timeout compaction ratio --- .../run.timeout-triggered-compaction.test.ts | 55 ++++++++++++------- src/agents/pi-embedded-runner/run.ts | 8 ++- 2 files changed, 43 insertions(+), 20 deletions(-) diff --git a/src/agents/pi-embedded-runner/run.timeout-triggered-compaction.test.ts b/src/agents/pi-embedded-runner/run.timeout-triggered-compaction.test.ts index b92c31ba602..8fda6b771e4 100644 --- a/src/agents/pi-embedded-runner/run.timeout-triggered-compaction.test.ts +++ b/src/agents/pi-embedded-runner/run.timeout-triggered-compaction.test.ts @@ -59,13 +59,13 @@ describe("timeout-triggered compaction", () => { mockedGlobalHookRunner.hasHooks.mockImplementation(() => false); }); - it("attempts compaction when LLM times out with high context usage (>65%)", async () => { - // First attempt: timeout with high usage (150k / 200k = 75%) + it("attempts compaction when LLM times out with high prompt usage (>65%)", async () => { + // First attempt: timeout with high prompt usage (150k / 200k = 75%) mockedRunEmbeddedAttempt.mockResolvedValueOnce( makeAttemptResult({ timedOut: true, lastAssistant: { - usage: { total: 150000 }, + usage: { input: 150000, output: 20000, total: 170000 }, } as never, }), ); @@ -102,12 +102,12 @@ describe("timeout-triggered compaction", () => { }); it("retries the prompt after successful timeout compaction", async () => { - // First attempt: timeout with high usage + // First attempt: timeout with high prompt usage carried by cache reads mockedRunEmbeddedAttempt.mockResolvedValueOnce( makeAttemptResult({ timedOut: true, lastAssistant: { - usage: { total: 160000 }, + usage: { input: 20000, cacheRead: 120000, output: 15000, total: 155000 }, } as never, }), ); @@ -130,12 +130,12 @@ describe("timeout-triggered compaction", () => { }); it("falls through to normal handling when timeout compaction fails", async () => { - // Timeout with high usage + // Timeout with high prompt usage mockedRunEmbeddedAttempt.mockResolvedValueOnce( makeAttemptResult({ timedOut: true, lastAssistant: { - usage: { total: 150000 }, + usage: { input: 150000, output: 10000, total: 160000 }, } as never, }), ); @@ -155,12 +155,12 @@ describe("timeout-triggered compaction", () => { }); it("does not attempt compaction when context usage is low", async () => { - // Timeout with low usage (20k / 200k = 10%) + // Timeout with low prompt usage (20k / 200k = 10%) mockedRunEmbeddedAttempt.mockResolvedValueOnce( makeAttemptResult({ timedOut: true, lastAssistant: { - usage: { total: 20000 }, + usage: { input: 20000, output: 5000, total: 25000 }, } as never, }), ); @@ -185,7 +185,7 @@ describe("timeout-triggered compaction", () => { makeAttemptResult({ timedOut: true, lastAssistant: { - usage: { total: 20000 }, + usage: { input: 20000, output: 5000, total: 25000 }, } as never, }), ); @@ -204,7 +204,7 @@ describe("timeout-triggered compaction", () => { timedOut: true, aborted: true, lastAssistant: { - usage: { total: 180000 }, + usage: { input: 180000, output: 5000, total: 185000 }, } as never, }), ); @@ -220,7 +220,7 @@ describe("timeout-triggered compaction", () => { timedOut: true, timedOutDuringCompaction: true, lastAssistant: { - usage: { total: 180000 }, + usage: { input: 180000, output: 5000, total: 185000 }, } as never, }), ); @@ -232,12 +232,12 @@ describe("timeout-triggered compaction", () => { }); it("falls through to failover rotation after max timeout compaction attempts", async () => { - // First attempt: timeout with high usage (150k / 200k = 75%) + // First attempt: timeout with high prompt usage (150k / 200k = 75%) mockedRunEmbeddedAttempt.mockResolvedValueOnce( makeAttemptResult({ timedOut: true, lastAssistant: { - usage: { total: 150000 }, + usage: { input: 150000, output: 10000, total: 160000 }, } as never, }), ); @@ -249,12 +249,12 @@ describe("timeout-triggered compaction", () => { tokensAfter: 80000, }), ); - // Second attempt after compaction: also times out with high usage + // Second attempt after compaction: also times out with high prompt usage mockedRunEmbeddedAttempt.mockResolvedValueOnce( makeAttemptResult({ timedOut: true, lastAssistant: { - usage: { total: 140000 }, + usage: { input: 140000, output: 12000, total: 152000 }, } as never, }), ); @@ -271,12 +271,12 @@ describe("timeout-triggered compaction", () => { }); it("catches thrown errors from contextEngine.compact during timeout recovery", async () => { - // Timeout with high usage + // Timeout with high prompt usage mockedRunEmbeddedAttempt.mockResolvedValueOnce( makeAttemptResult({ timedOut: true, lastAssistant: { - usage: { total: 150000 }, + usage: { input: 150000, output: 10000, total: 160000 }, } as never, }), ); @@ -301,7 +301,7 @@ describe("timeout-triggered compaction", () => { makeAttemptResult({ timedOut: true, lastAssistant: { - usage: { total: 160000 }, + usage: { input: 160000, output: 10000, total: 170000 }, } as never, }), ) @@ -335,4 +335,21 @@ describe("timeout-triggered compaction", () => { }), ); }); + + it("does not attempt compaction when only output tokens are high", async () => { + mockedRunEmbeddedAttempt.mockResolvedValueOnce( + makeAttemptResult({ + timedOut: true, + lastAssistant: { + usage: { input: 20000, output: 170000, total: 190000 }, + } as never, + }), + ); + + const result = await runEmbeddedPiAgent(overflowBaseRunParams); + + expect(mockedCompactDirect).not.toHaveBeenCalled(); + expect(result.payloads?.[0]?.isError).toBe(true); + expect(result.payloads?.[0]?.text).toContain("timed out"); + }); }); diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index f3b47d7dbb5..6c589554dca 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -1098,8 +1098,14 @@ export async function runEmbeddedPiAgent( // When the LLM times out with high context usage, compact before // retrying to break the death spiral of repeated timeouts. if (timedOut && !aborted && !timedOutDuringCompaction) { + // Only consider prompt-side tokens here. API totals include output + // tokens, which can make a long generation look like high context + // pressure even when the prompt itself was small. + const lastTurnPromptTokens = derivePromptTokens(lastRunPromptUsage); const tokenUsedRatio = - lastTurnTotal != null && ctxInfo.tokens > 0 ? lastTurnTotal / ctxInfo.tokens : 0; + lastTurnPromptTokens != null && ctxInfo.tokens > 0 + ? lastTurnPromptTokens / ctxInfo.tokens + : 0; if (timeoutCompactionAttempts >= MAX_TIMEOUT_COMPACTION_ATTEMPTS) { log.warn( `[timeout-compaction] already compacted ${timeoutCompactionAttempts} time(s) for timeouts; falling through to failover rotation`, From 92f298be06ba570da26b9ad66edc741503711015 Mon Sep 17 00:00:00 2001 From: Joey Krug Date: Sat, 14 Mar 2026 17:04:36 -0400 Subject: [PATCH 5/8] fix: always increment timeout-compaction counter and use prompt tokens for ratio MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Restore prompt-token-based ratio (derivePromptTokens) that was lost in the main merge; total tokens include output which inflates the ratio after long generations. - Move timeoutCompactionAttempts increment before the compacted check so failed compaction (compacted:false or thrown error) also consumes the attempt, preventing unbounded retry loops on persistent failure. - Update all test usage fixtures from total → input to match prompt-token ratio behavior. - Add tests: failed compaction increments counter, thrown compact() increments counter, prompt-vs-total token ratio discrimination. Co-Authored-By: Claude Opus 4.6 --- .../run.timeout-triggered-compaction.test.ts | 81 ++++++++++++++----- src/agents/pi-embedded-runner/run.ts | 4 +- 2 files changed, 65 insertions(+), 20 deletions(-) diff --git a/src/agents/pi-embedded-runner/run.timeout-triggered-compaction.test.ts b/src/agents/pi-embedded-runner/run.timeout-triggered-compaction.test.ts index 8fda6b771e4..c5d4ea5d1ad 100644 --- a/src/agents/pi-embedded-runner/run.timeout-triggered-compaction.test.ts +++ b/src/agents/pi-embedded-runner/run.timeout-triggered-compaction.test.ts @@ -6,8 +6,8 @@ import { mockedDescribeFailoverError, mockedGlobalHookRunner, mockedResolveFailoverStatus, - mockedContextEngine, mockedCompactDirect, + mockedContextEngine, mockedRunEmbeddedAttempt, mockedPickFallbackThinkingLevel, resetRunOverflowCompactionHarnessMocks, @@ -59,13 +59,13 @@ describe("timeout-triggered compaction", () => { mockedGlobalHookRunner.hasHooks.mockImplementation(() => false); }); - it("attempts compaction when LLM times out with high prompt usage (>65%)", async () => { + it("attempts compaction when LLM times out with high prompt token usage (>65%)", async () => { // First attempt: timeout with high prompt usage (150k / 200k = 75%) mockedRunEmbeddedAttempt.mockResolvedValueOnce( makeAttemptResult({ timedOut: true, lastAssistant: { - usage: { input: 150000, output: 20000, total: 170000 }, + usage: { input: 150000 }, } as never, }), ); @@ -102,12 +102,12 @@ describe("timeout-triggered compaction", () => { }); it("retries the prompt after successful timeout compaction", async () => { - // First attempt: timeout with high prompt usage carried by cache reads + // First attempt: timeout with high prompt usage mockedRunEmbeddedAttempt.mockResolvedValueOnce( makeAttemptResult({ timedOut: true, lastAssistant: { - usage: { input: 20000, cacheRead: 120000, output: 15000, total: 155000 }, + usage: { input: 160000 }, } as never, }), ); @@ -135,7 +135,7 @@ describe("timeout-triggered compaction", () => { makeAttemptResult({ timedOut: true, lastAssistant: { - usage: { input: 150000, output: 10000, total: 160000 }, + usage: { input: 150000 }, } as never, }), ); @@ -154,13 +154,13 @@ describe("timeout-triggered compaction", () => { expect(result.payloads?.[0]?.text).toContain("timed out"); }); - it("does not attempt compaction when context usage is low", async () => { + it("does not attempt compaction when prompt token usage is low", async () => { // Timeout with low prompt usage (20k / 200k = 10%) mockedRunEmbeddedAttempt.mockResolvedValueOnce( makeAttemptResult({ timedOut: true, lastAssistant: { - usage: { input: 20000, output: 5000, total: 25000 }, + usage: { input: 20000 }, } as never, }), ); @@ -185,7 +185,7 @@ describe("timeout-triggered compaction", () => { makeAttemptResult({ timedOut: true, lastAssistant: { - usage: { input: 20000, output: 5000, total: 25000 }, + usage: { input: 20000 }, } as never, }), ); @@ -204,7 +204,7 @@ describe("timeout-triggered compaction", () => { timedOut: true, aborted: true, lastAssistant: { - usage: { input: 180000, output: 5000, total: 185000 }, + usage: { input: 180000 }, } as never, }), ); @@ -220,7 +220,7 @@ describe("timeout-triggered compaction", () => { timedOut: true, timedOutDuringCompaction: true, lastAssistant: { - usage: { input: 180000, output: 5000, total: 185000 }, + usage: { input: 180000 }, } as never, }), ); @@ -237,7 +237,7 @@ describe("timeout-triggered compaction", () => { makeAttemptResult({ timedOut: true, lastAssistant: { - usage: { input: 150000, output: 10000, total: 160000 }, + usage: { input: 150000 }, } as never, }), ); @@ -249,12 +249,12 @@ describe("timeout-triggered compaction", () => { tokensAfter: 80000, }), ); - // Second attempt after compaction: also times out with high prompt usage + // Second attempt after compaction: also times out with high usage mockedRunEmbeddedAttempt.mockResolvedValueOnce( makeAttemptResult({ timedOut: true, lastAssistant: { - usage: { input: 140000, output: 12000, total: 152000 }, + usage: { input: 140000 }, } as never, }), ); @@ -276,7 +276,7 @@ describe("timeout-triggered compaction", () => { makeAttemptResult({ timedOut: true, lastAssistant: { - usage: { input: 150000, output: 10000, total: 160000 }, + usage: { input: 150000 }, } as never, }), ); @@ -301,7 +301,7 @@ describe("timeout-triggered compaction", () => { makeAttemptResult({ timedOut: true, lastAssistant: { - usage: { input: 160000, output: 10000, total: 170000 }, + usage: { input: 160000 }, } as never, }), ) @@ -336,18 +336,63 @@ describe("timeout-triggered compaction", () => { ); }); - it("does not attempt compaction when only output tokens are high", async () => { + it("increments attempt counter even when compaction returns compacted:false", async () => { + // First timeout: high prompt usage, compaction fails (compacted:false) mockedRunEmbeddedAttempt.mockResolvedValueOnce( makeAttemptResult({ timedOut: true, lastAssistant: { - usage: { input: 20000, output: 170000, total: 190000 }, + usage: { input: 150000 }, + } as never, + }), + ); + mockedCompactDirect.mockResolvedValueOnce({ + ok: false, + compacted: false, + reason: "nothing to compact", + }); + // The failed compaction falls through to timeout error; the runner + // returns with an error payload (no retry because compacted was false). + const result = await runEmbeddedPiAgent(overflowBaseRunParams); + + expect(mockedCompactDirect).toHaveBeenCalledTimes(1); + expect(result.payloads?.[0]?.isError).toBe(true); + }); + + it("increments attempt counter when compact() throws, blocking subsequent attempts", async () => { + // First timeout: high prompt usage, compact() throws + mockedRunEmbeddedAttempt.mockResolvedValueOnce( + makeAttemptResult({ + timedOut: true, + lastAssistant: { + usage: { input: 150000 }, + } as never, + }), + ); + mockedCompactDirect.mockRejectedValueOnce(new Error("engine crashed")); + // Falls through to timeout error on first attempt + const result = await runEmbeddedPiAgent(overflowBaseRunParams); + + expect(mockedCompactDirect).toHaveBeenCalledTimes(1); + expect(result.payloads?.[0]?.isError).toBe(true); + }); + + it("uses prompt/input tokens for ratio, not total tokens", async () => { + // Timeout where total tokens are high (150k) but input/prompt tokens + // are low (20k / 200k = 10%). Should NOT trigger compaction because + // the ratio is based on prompt tokens, not total. + mockedRunEmbeddedAttempt.mockResolvedValueOnce( + makeAttemptResult({ + timedOut: true, + lastAssistant: { + usage: { input: 20000, total: 150000 }, } as never, }), ); const result = await runEmbeddedPiAgent(overflowBaseRunParams); + // Despite high total tokens, low prompt tokens mean no compaction expect(mockedCompactDirect).not.toHaveBeenCalled(); expect(result.payloads?.[0]?.isError).toBe(true); expect(result.payloads?.[0]?.text).toContain("timed out"); diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index 6c589554dca..e723c513384 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -1113,7 +1113,7 @@ export async function runEmbeddedPiAgent( } else if (tokenUsedRatio > 0.65) { const timeoutDiagId = createCompactionDiagId(); log.warn( - `[timeout-compaction] LLM timed out with high context usage (${Math.round(tokenUsedRatio * 100)}%); ` + + `[timeout-compaction] LLM timed out with high prompt token usage (${Math.round(tokenUsedRatio * 100)}%); ` + `attempting compaction before retry diagId=${timeoutDiagId}`, ); let timeoutCompactResult: Awaited>; @@ -1158,9 +1158,9 @@ export async function runEmbeddedPiAgent( timeoutCompactResult = { ok: false, compacted: false, reason: String(compactErr) }; } await runOwnsCompactionAfterHook("timeout recovery", timeoutCompactResult); + timeoutCompactionAttempts += 1; if (timeoutCompactResult.compacted) { autoCompactionCount += 1; - timeoutCompactionAttempts += 1; log.info( `[timeout-compaction] compaction succeeded for ${provider}/${modelId}; retrying prompt`, ); From d1b7e5ae9a92d53e97815305a1da8a99aeb29148 Mon Sep 17 00:00:00 2001 From: Joey Krug Date: Sat, 14 Mar 2026 18:07:18 -0400 Subject: [PATCH 6/8] fix: trigger timeout compaction for real timeouts --- .../run.timeout-triggered-compaction.test.ts | 16 +++++++++++++--- src/agents/pi-embedded-runner/run.ts | 9 +++++---- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/src/agents/pi-embedded-runner/run.timeout-triggered-compaction.test.ts b/src/agents/pi-embedded-runner/run.timeout-triggered-compaction.test.ts index c5d4ea5d1ad..bccae68bfeb 100644 --- a/src/agents/pi-embedded-runner/run.timeout-triggered-compaction.test.ts +++ b/src/agents/pi-embedded-runner/run.timeout-triggered-compaction.test.ts @@ -198,7 +198,7 @@ describe("timeout-triggered compaction", () => { expect(result.payloads?.[0]?.text).toContain("timed out"); }); - it("does not attempt compaction when aborted", async () => { + it("still attempts compaction for timed-out attempts that set aborted", async () => { mockedRunEmbeddedAttempt.mockResolvedValueOnce( makeAttemptResult({ timedOut: true, @@ -208,10 +208,20 @@ describe("timeout-triggered compaction", () => { } as never, }), ); + mockedCompactDirect.mockResolvedValueOnce( + makeCompactionSuccess({ + summary: "timeout recovery compaction", + tokensBefore: 180000, + tokensAfter: 90000, + }), + ); + mockedRunEmbeddedAttempt.mockResolvedValueOnce(makeAttemptResult({ promptError: null })); - await runEmbeddedPiAgent(overflowBaseRunParams); + const result = await runEmbeddedPiAgent(overflowBaseRunParams); - expect(mockedCompactDirect).not.toHaveBeenCalled(); + expect(mockedCompactDirect).toHaveBeenCalledTimes(1); + expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2); + expect(result.meta.error).toBeUndefined(); }); it("does not attempt compaction when timedOutDuringCompaction is true", async () => { diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index e723c513384..1a39f383ff8 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -1097,7 +1097,7 @@ export async function runEmbeddedPiAgent( // ── Timeout-triggered compaction ────────────────────────────────── // When the LLM times out with high context usage, compact before // retrying to break the death spiral of repeated timeouts. - if (timedOut && !aborted && !timedOutDuringCompaction) { + if (timedOut && !timedOutDuringCompaction) { // Only consider prompt-side tokens here. API totals include output // tokens, which can make a long generation look like high context // pressure even when the prompt itself was small. @@ -1112,12 +1112,14 @@ export async function runEmbeddedPiAgent( ); } else if (tokenUsedRatio > 0.65) { const timeoutDiagId = createCompactionDiagId(); + const nextTimeoutCompactionAttempt = timeoutCompactionAttempts + 1; log.warn( `[timeout-compaction] LLM timed out with high prompt token usage (${Math.round(tokenUsedRatio * 100)}%); ` + `attempting compaction before retry diagId=${timeoutDiagId}`, ); let timeoutCompactResult: Awaited>; await runOwnsCompactionBeforeHook("timeout recovery"); + timeoutCompactionAttempts = nextTimeoutCompactionAttempt; try { timeoutCompactResult = await contextEngine.compact({ sessionId: params.sessionId, @@ -1147,8 +1149,8 @@ export async function runEmbeddedPiAgent( ownerNumbers: params.ownerNumbers, trigger: "timeout_recovery", diagId: timeoutDiagId, - attempt: 1, - maxAttempts: 1, + attempt: nextTimeoutCompactionAttempt, + maxAttempts: MAX_TIMEOUT_COMPACTION_ATTEMPTS, }, }); } catch (compactErr) { @@ -1158,7 +1160,6 @@ export async function runEmbeddedPiAgent( timeoutCompactResult = { ok: false, compacted: false, reason: String(compactErr) }; } await runOwnsCompactionAfterHook("timeout recovery", timeoutCompactResult); - timeoutCompactionAttempts += 1; if (timeoutCompactResult.compacted) { autoCompactionCount += 1; log.info( From b80bba8e12b1150ac0a065ff19d6dab85b6e551e Mon Sep 17 00:00:00 2001 From: Joey Krug Date: Sat, 14 Mar 2026 18:37:49 -0400 Subject: [PATCH 7/8] test: cover timeout compaction retry cap --- .../run.overflow-compaction.harness.ts | 28 ++++- .../run.timeout-triggered-compaction.test.ts | 117 ++++++++++++++---- src/agents/pi-embedded-runner/run.ts | 2 +- 3 files changed, 116 insertions(+), 31 deletions(-) diff --git a/src/agents/pi-embedded-runner/run.overflow-compaction.harness.ts b/src/agents/pi-embedded-runner/run.overflow-compaction.harness.ts index 9e7853ef7d5..b1664434d67 100644 --- a/src/agents/pi-embedded-runner/run.overflow-compaction.harness.ts +++ b/src/agents/pi-embedded-runner/run.overflow-compaction.harness.ts @@ -136,6 +136,15 @@ export const mockedIsLikelyContextOverflowError = vi.fn((msg?: string) => { export const mockedPickFallbackThinkingLevel = vi.fn<(params?: unknown) => ThinkLevel | null>( () => null, ); +export const mockedGetApiKeyForModel = vi.fn( + async ({ profileId }: { profileId?: string } = {}) => ({ + apiKey: "test-key", + profileId: profileId ?? "test-profile", + source: "test", + mode: "api-key" as const, + }), +); +export const mockedResolveAuthProfileOrder = vi.fn(() => [] as string[]); export const overflowBaseRunParams = { sessionId: "test-session", @@ -223,6 +232,17 @@ export function resetRunOverflowCompactionHarnessMocks(): void { }); mockedPickFallbackThinkingLevel.mockReset(); mockedPickFallbackThinkingLevel.mockReturnValue(null); + mockedGetApiKeyForModel.mockReset(); + mockedGetApiKeyForModel.mockImplementation( + async ({ profileId }: { profileId?: string } = {}) => ({ + apiKey: "test-key", + profileId: profileId ?? "test-profile", + source: "test", + mode: "api-key", + }), + ); + mockedResolveAuthProfileOrder.mockReset(); + mockedResolveAuthProfileOrder.mockReturnValue([]); } export async function loadRunOverflowCompactionHarness(): Promise<{ @@ -322,12 +342,8 @@ export async function loadRunOverflowCompactionHarness(): Promise<{ vi.doMock("../model-auth.js", () => ({ applyLocalNoAuthHeaderOverride: vi.fn((model: unknown) => model), ensureAuthProfileStore: vi.fn(() => ({})), - getApiKeyForModel: vi.fn(async () => ({ - apiKey: "test-key", - profileId: "test-profile", - source: "test", - })), - resolveAuthProfileOrder: vi.fn(() => []), + getApiKeyForModel: mockedGetApiKeyForModel, + resolveAuthProfileOrder: mockedResolveAuthProfileOrder, })); vi.doMock("../models-config.js", () => ({ diff --git a/src/agents/pi-embedded-runner/run.timeout-triggered-compaction.test.ts b/src/agents/pi-embedded-runner/run.timeout-triggered-compaction.test.ts index bccae68bfeb..647d449ceca 100644 --- a/src/agents/pi-embedded-runner/run.timeout-triggered-compaction.test.ts +++ b/src/agents/pi-embedded-runner/run.timeout-triggered-compaction.test.ts @@ -9,7 +9,9 @@ import { mockedCompactDirect, mockedContextEngine, mockedRunEmbeddedAttempt, + mockedGetApiKeyForModel, mockedPickFallbackThinkingLevel, + mockedResolveAuthProfileOrder, resetRunOverflowCompactionHarnessMocks, mockedSessionLikelyHasOversizedToolResults, mockedTruncateOversizedToolResultsInSession, @@ -18,6 +20,16 @@ import { let runEmbeddedPiAgent: typeof import("./run.js").runEmbeddedPiAgent; +const useTwoAuthProfiles = () => { + mockedResolveAuthProfileOrder.mockReturnValue(["profile-a", "profile-b"]); + mockedGetApiKeyForModel.mockImplementation(async ({ profileId } = {}) => ({ + apiKey: `test-key-${profileId ?? "profile-a"}`, + profileId: profileId ?? "profile-a", + source: "test", + mode: "api-key", + })); +}; + describe("timeout-triggered compaction", () => { beforeAll(async () => { ({ runEmbeddedPiAgent } = await loadRunOverflowCompactionHarness()); @@ -57,6 +69,13 @@ describe("timeout-triggered compaction", () => { }); mockedPickFallbackThinkingLevel.mockReturnValue(undefined); mockedGlobalHookRunner.hasHooks.mockImplementation(() => false); + mockedGetApiKeyForModel.mockImplementation(async ({ profileId }) => ({ + apiKey: "test-key", + profileId: profileId ?? "test-profile", + source: "test", + mode: "api-key", + })); + mockedResolveAuthProfileOrder.mockReturnValue([]); }); it("attempts compaction when LLM times out with high prompt token usage (>65%)", async () => { @@ -346,45 +365,95 @@ describe("timeout-triggered compaction", () => { ); }); - it("increments attempt counter even when compaction returns compacted:false", async () => { - // First timeout: high prompt usage, compaction fails (compacted:false) - mockedRunEmbeddedAttempt.mockResolvedValueOnce( - makeAttemptResult({ - timedOut: true, - lastAssistant: { - usage: { input: 150000 }, - } as never, - }), - ); + it("counts compacted:false timeout compactions against the retry cap across profile rotation", async () => { + useTwoAuthProfiles(); + mockedRunEmbeddedAttempt + .mockResolvedValueOnce( + makeAttemptResult({ + timedOut: true, + aborted: true, + lastAssistant: { + usage: { input: 150000 }, + } as never, + }), + ) + .mockResolvedValueOnce( + makeAttemptResult({ + timedOut: true, + aborted: true, + lastAssistant: { + usage: { input: 150000 }, + } as never, + }), + ); mockedCompactDirect.mockResolvedValueOnce({ ok: false, compacted: false, reason: "nothing to compact", }); - // The failed compaction falls through to timeout error; the runner - // returns with an error payload (no retry because compacted was false). + const result = await runEmbeddedPiAgent(overflowBaseRunParams); expect(mockedCompactDirect).toHaveBeenCalledTimes(1); - expect(result.payloads?.[0]?.isError).toBe(true); - }); - - it("increments attempt counter when compact() throws, blocking subsequent attempts", async () => { - // First timeout: high prompt usage, compact() throws - mockedRunEmbeddedAttempt.mockResolvedValueOnce( - makeAttemptResult({ - timedOut: true, - lastAssistant: { - usage: { input: 150000 }, - } as never, + expect(mockedCompactDirect).toHaveBeenCalledWith( + expect.objectContaining({ + runtimeContext: expect.objectContaining({ + authProfileId: "profile-a", + attempt: 1, + maxAttempts: 1, + }), }), ); + expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2); + expect(mockedRunEmbeddedAttempt).toHaveBeenNthCalledWith( + 1, + expect.objectContaining({ authProfileId: "profile-a" }), + ); + expect(mockedRunEmbeddedAttempt).toHaveBeenNthCalledWith( + 2, + expect.objectContaining({ authProfileId: "profile-b" }), + ); + expect(result.payloads?.[0]?.isError).toBe(true); + expect(result.payloads?.[0]?.text).toContain("timed out"); + }); + + it("counts thrown timeout compactions against the retry cap across profile rotation", async () => { + useTwoAuthProfiles(); + mockedRunEmbeddedAttempt + .mockResolvedValueOnce( + makeAttemptResult({ + timedOut: true, + aborted: true, + lastAssistant: { + usage: { input: 150000 }, + } as never, + }), + ) + .mockResolvedValueOnce( + makeAttemptResult({ + timedOut: true, + aborted: true, + lastAssistant: { + usage: { input: 150000 }, + } as never, + }), + ); mockedCompactDirect.mockRejectedValueOnce(new Error("engine crashed")); - // Falls through to timeout error on first attempt + const result = await runEmbeddedPiAgent(overflowBaseRunParams); expect(mockedCompactDirect).toHaveBeenCalledTimes(1); + expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2); + expect(mockedRunEmbeddedAttempt).toHaveBeenNthCalledWith( + 1, + expect.objectContaining({ authProfileId: "profile-a" }), + ); + expect(mockedRunEmbeddedAttempt).toHaveBeenNthCalledWith( + 2, + expect.objectContaining({ authProfileId: "profile-b" }), + ); expect(result.payloads?.[0]?.isError).toBe(true); + expect(result.payloads?.[0]?.text).toContain("timed out"); }); it("uses prompt/input tokens for ratio, not total tokens", async () => { diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index 1a39f383ff8..5c4c1cd1665 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -1108,7 +1108,7 @@ export async function runEmbeddedPiAgent( : 0; if (timeoutCompactionAttempts >= MAX_TIMEOUT_COMPACTION_ATTEMPTS) { log.warn( - `[timeout-compaction] already compacted ${timeoutCompactionAttempts} time(s) for timeouts; falling through to failover rotation`, + `[timeout-compaction] already attempted timeout compaction ${timeoutCompactionAttempts} time(s); falling through to failover rotation`, ); } else if (tokenUsedRatio > 0.65) { const timeoutDiagId = createCompactionDiagId(); From 4db9a6d8d04b4c1858371cad3be8d277d944cf3f Mon Sep 17 00:00:00 2001 From: Joey Krug Date: Sat, 21 Mar 2026 00:56:36 -0400 Subject: [PATCH 8/8] fix: address codex review comments on #46417 - Increase MAX_TIMEOUT_COMPACTION_ATTEMPTS to 2 so timeout retries can reach failover rotation after consecutive failures - Increment timeoutCompactionAttempts before the attempt so failed compactions count toward the retry cap - Use dynamic counter for attempt/maxAttempts (consistent with overflow path) - Call runPostCompactionSideEffects after successful timeout compaction to run hooks that the normal compaction entrypoint performs - Add proper test mocking for compact.js import --- src/agents/pi-embedded-runner/compact.ts | 2 +- .../run.overflow-compaction.harness.ts | 7 ++ .../run.timeout-triggered-compaction.test.ts | 91 ++++++++++++++----- src/agents/pi-embedded-runner/run.ts | 15 ++- 4 files changed, 84 insertions(+), 31 deletions(-) diff --git a/src/agents/pi-embedded-runner/compact.ts b/src/agents/pi-embedded-runner/compact.ts index d76a01ed5af..55ecb9881b4 100644 --- a/src/agents/pi-embedded-runner/compact.ts +++ b/src/agents/pi-embedded-runner/compact.ts @@ -360,7 +360,7 @@ function syncPostCompactionSessionMemory(params: { return Promise.resolve(); } -async function runPostCompactionSideEffects(params: { +export async function runPostCompactionSideEffects(params: { config?: OpenClawConfig; sessionKey?: string; sessionFile: string; diff --git a/src/agents/pi-embedded-runner/run.overflow-compaction.harness.ts b/src/agents/pi-embedded-runner/run.overflow-compaction.harness.ts index b1664434d67..99363d4652c 100644 --- a/src/agents/pi-embedded-runner/run.overflow-compaction.harness.ts +++ b/src/agents/pi-embedded-runner/run.overflow-compaction.harness.ts @@ -62,6 +62,7 @@ export const mockedContextEngine = { export const mockedContextEngineCompact = mockedContextEngine.compact; export const mockedCompactDirect = mockedContextEngine.compact; +export const mockedRunPostCompactionSideEffects = vi.fn(async () => {}); export const mockedEnsureRuntimePluginsLoaded = vi.fn<(params?: unknown) => void>(); export const mockedPrepareProviderRuntimeAuth = vi.fn(async () => undefined); export const mockedRunEmbeddedAttempt = @@ -243,6 +244,8 @@ export function resetRunOverflowCompactionHarnessMocks(): void { ); mockedResolveAuthProfileOrder.mockReset(); mockedResolveAuthProfileOrder.mockReturnValue([]); + mockedRunPostCompactionSideEffects.mockReset(); + mockedRunPostCompactionSideEffects.mockResolvedValue(undefined); } export async function loadRunOverflowCompactionHarness(): Promise<{ @@ -408,6 +411,10 @@ export async function loadRunOverflowCompactionHarness(): Promise<{ sessionLikelyHasOversizedToolResults: mockedSessionLikelyHasOversizedToolResults, })); + vi.doMock("./compact.js", () => ({ + runPostCompactionSideEffects: mockedRunPostCompactionSideEffects, + })); + vi.doMock("./utils.js", () => ({ describeUnknownError: vi.fn((err: unknown) => { if (err instanceof Error) { diff --git a/src/agents/pi-embedded-runner/run.timeout-triggered-compaction.test.ts b/src/agents/pi-embedded-runner/run.timeout-triggered-compaction.test.ts index 647d449ceca..d74b689e695 100644 --- a/src/agents/pi-embedded-runner/run.timeout-triggered-compaction.test.ts +++ b/src/agents/pi-embedded-runner/run.timeout-triggered-compaction.test.ts @@ -15,6 +15,7 @@ import { resetRunOverflowCompactionHarnessMocks, mockedSessionLikelyHasOversizedToolResults, mockedTruncateOversizedToolResultsInSession, + mockedRunPostCompactionSideEffects, overflowBaseRunParams, } from "./run.overflow-compaction.harness.js"; @@ -48,6 +49,8 @@ describe("timeout-triggered compaction", () => { mockedGlobalHookRunner.runBeforeCompaction.mockReset(); mockedGlobalHookRunner.runAfterCompaction.mockReset(); mockedPickFallbackThinkingLevel.mockReset(); + mockedRunPostCompactionSideEffects.mockReset(); + mockedRunPostCompactionSideEffects.mockResolvedValue(undefined); mockedContextEngine.info.ownsCompaction = false; mockedCompactDirect.mockResolvedValue({ ok: false, @@ -112,7 +115,7 @@ describe("timeout-triggered compaction", () => { runtimeContext: expect.objectContaining({ trigger: "timeout_recovery", attempt: 1, - maxAttempts: 1, + maxAttempts: 2, }), }), ); @@ -145,6 +148,13 @@ describe("timeout-triggered compaction", () => { // Verify the loop continued (retry happened) expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2); + // Post-compaction side effects (transcript update, memory sync) should fire + expect(mockedRunPostCompactionSideEffects).toHaveBeenCalledTimes(1); + expect(mockedRunPostCompactionSideEffects).toHaveBeenCalledWith( + expect.objectContaining({ + sessionFile: "/tmp/session.json", + }), + ); expect(result.meta.error).toBeUndefined(); }); @@ -270,10 +280,10 @@ describe("timeout-triggered compaction", () => { } as never, }), ); - // Compaction succeeds on first timeout + // First compaction succeeds mockedCompactDirect.mockResolvedValueOnce( makeCompactionSuccess({ - summary: "timeout recovery compaction", + summary: "timeout recovery compaction 1", tokensBefore: 150000, tokensAfter: 80000, }), @@ -287,13 +297,29 @@ describe("timeout-triggered compaction", () => { } as never, }), ); + // Second compaction also succeeds + mockedCompactDirect.mockResolvedValueOnce( + makeCompactionSuccess({ + summary: "timeout recovery compaction 2", + tokensBefore: 140000, + tokensAfter: 70000, + }), + ); + // Third attempt after second compaction: still times out + mockedRunEmbeddedAttempt.mockResolvedValueOnce( + makeAttemptResult({ + timedOut: true, + lastAssistant: { + usage: { input: 130000 }, + } as never, + }), + ); const result = await runEmbeddedPiAgent(overflowBaseRunParams); - // Compaction was only attempted once (first timeout); second timeout - // should NOT trigger compaction because the counter is exhausted. - expect(mockedCompactDirect).toHaveBeenCalledTimes(1); - expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2); + // Both compaction attempts used; third timeout falls through. + expect(mockedCompactDirect).toHaveBeenCalledTimes(2); + expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(3); // Falls through to timeout error payload (failover rotation path) expect(result.payloads?.[0]?.isError).toBe(true); expect(result.payloads?.[0]?.text).toContain("timed out"); @@ -367,6 +393,7 @@ describe("timeout-triggered compaction", () => { it("counts compacted:false timeout compactions against the retry cap across profile rotation", async () => { useTwoAuthProfiles(); + // Attempt 1 (profile-a): timeout → compaction #1 fails → rotate to profile-b mockedRunEmbeddedAttempt .mockResolvedValueOnce( makeAttemptResult({ @@ -377,6 +404,7 @@ describe("timeout-triggered compaction", () => { } as never, }), ) + // Attempt 2 (profile-b): timeout → compaction #2 fails → cap exhausted → rotation .mockResolvedValueOnce( makeAttemptResult({ timedOut: true, @@ -386,39 +414,49 @@ describe("timeout-triggered compaction", () => { } as never, }), ); - mockedCompactDirect.mockResolvedValueOnce({ - ok: false, - compacted: false, - reason: "nothing to compact", - }); + mockedCompactDirect + .mockResolvedValueOnce({ + ok: false, + compacted: false, + reason: "nothing to compact", + }) + .mockResolvedValueOnce({ + ok: false, + compacted: false, + reason: "nothing to compact", + }); const result = await runEmbeddedPiAgent(overflowBaseRunParams); - expect(mockedCompactDirect).toHaveBeenCalledTimes(1); - expect(mockedCompactDirect).toHaveBeenCalledWith( + expect(mockedCompactDirect).toHaveBeenCalledTimes(2); + expect(mockedCompactDirect).toHaveBeenNthCalledWith( + 1, expect.objectContaining({ runtimeContext: expect.objectContaining({ authProfileId: "profile-a", attempt: 1, - maxAttempts: 1, + maxAttempts: 2, + }), + }), + ); + expect(mockedCompactDirect).toHaveBeenNthCalledWith( + 2, + expect.objectContaining({ + runtimeContext: expect.objectContaining({ + authProfileId: "profile-b", + attempt: 2, + maxAttempts: 2, }), }), ); expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2); - expect(mockedRunEmbeddedAttempt).toHaveBeenNthCalledWith( - 1, - expect.objectContaining({ authProfileId: "profile-a" }), - ); - expect(mockedRunEmbeddedAttempt).toHaveBeenNthCalledWith( - 2, - expect.objectContaining({ authProfileId: "profile-b" }), - ); expect(result.payloads?.[0]?.isError).toBe(true); expect(result.payloads?.[0]?.text).toContain("timed out"); }); it("counts thrown timeout compactions against the retry cap across profile rotation", async () => { useTwoAuthProfiles(); + // Attempt 1 (profile-a): timeout → compaction #1 throws → rotate to profile-b mockedRunEmbeddedAttempt .mockResolvedValueOnce( makeAttemptResult({ @@ -429,6 +467,7 @@ describe("timeout-triggered compaction", () => { } as never, }), ) + // Attempt 2 (profile-b): timeout → compaction #2 throws → cap exhausted → rotation .mockResolvedValueOnce( makeAttemptResult({ timedOut: true, @@ -438,11 +477,13 @@ describe("timeout-triggered compaction", () => { } as never, }), ); - mockedCompactDirect.mockRejectedValueOnce(new Error("engine crashed")); + mockedCompactDirect + .mockRejectedValueOnce(new Error("engine crashed")) + .mockRejectedValueOnce(new Error("engine crashed again")); const result = await runEmbeddedPiAgent(overflowBaseRunParams); - expect(mockedCompactDirect).toHaveBeenCalledTimes(1); + expect(mockedCompactDirect).toHaveBeenCalledTimes(2); expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2); expect(mockedRunEmbeddedAttempt).toHaveBeenNthCalledWith( 1, diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index 5c4c1cd1665..35f10eb0a7d 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -65,6 +65,7 @@ import { import { ensureRuntimePluginsLoaded } from "../runtime-plugins.js"; import { derivePromptTokens, normalizeUsage, type UsageLike } from "../usage.js"; import { redactRunIdentifier, resolveRunWorkspaceDir } from "../workspace-run.js"; +import { runPostCompactionSideEffects } from "./compact.js"; import { buildEmbeddedCompactionRuntimeContext } from "./compaction-runtime-context.js"; import { resolveGlobalLane, resolveSessionLane } from "./lanes.js"; import { log } from "./logger.js"; @@ -815,7 +816,7 @@ export async function runEmbeddedPiAgent( } }; - const MAX_TIMEOUT_COMPACTION_ATTEMPTS = 1; + const MAX_TIMEOUT_COMPACTION_ATTEMPTS = 2; const MAX_OVERFLOW_COMPACTION_ATTEMPTS = 3; const MAX_RUN_LOOP_ITERATIONS = resolveMaxRunRetryIterations(profileCandidates.length); let overflowCompactionAttempts = 0; @@ -1112,14 +1113,13 @@ export async function runEmbeddedPiAgent( ); } else if (tokenUsedRatio > 0.65) { const timeoutDiagId = createCompactionDiagId(); - const nextTimeoutCompactionAttempt = timeoutCompactionAttempts + 1; + timeoutCompactionAttempts++; log.warn( `[timeout-compaction] LLM timed out with high prompt token usage (${Math.round(tokenUsedRatio * 100)}%); ` + - `attempting compaction before retry diagId=${timeoutDiagId}`, + `attempting compaction before retry (attempt ${timeoutCompactionAttempts}/${MAX_TIMEOUT_COMPACTION_ATTEMPTS}) diagId=${timeoutDiagId}`, ); let timeoutCompactResult: Awaited>; await runOwnsCompactionBeforeHook("timeout recovery"); - timeoutCompactionAttempts = nextTimeoutCompactionAttempt; try { timeoutCompactResult = await contextEngine.compact({ sessionId: params.sessionId, @@ -1149,7 +1149,7 @@ export async function runEmbeddedPiAgent( ownerNumbers: params.ownerNumbers, trigger: "timeout_recovery", diagId: timeoutDiagId, - attempt: nextTimeoutCompactionAttempt, + attempt: timeoutCompactionAttempts, maxAttempts: MAX_TIMEOUT_COMPACTION_ATTEMPTS, }, }); @@ -1162,6 +1162,11 @@ export async function runEmbeddedPiAgent( await runOwnsCompactionAfterHook("timeout recovery", timeoutCompactResult); if (timeoutCompactResult.compacted) { autoCompactionCount += 1; + await runPostCompactionSideEffects({ + config: params.config, + sessionKey: params.sessionKey, + sessionFile: params.sessionFile, + }); log.info( `[timeout-compaction] compaction succeeded for ${provider}/${modelId}; retrying prompt`, );