fix: trigger compaction on LLM timeout with high context usage

When the LLM times out and context usage is high (>65% of the context
window), the system retries with the same oversized context, causing a
death spiral of repeated timeouts. This adds timeout-triggered
compaction: after a timeout with high context usage, compact the
context before retrying to break the cycle.

Also fixes missing applyLocalNoAuthHeaderOverride mock in overflow
compaction test shared mocks.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Joey Krug 2026-03-14 13:13:20 -04:00
parent 2364e45fe4
commit d7a92284ff
2 changed files with 295 additions and 0 deletions

View File

@ -0,0 +1,226 @@
import { beforeAll, beforeEach, describe, expect, it } from "vitest";
import { makeAttemptResult, makeCompactionSuccess } from "./run.overflow-compaction.fixture.js";
import {
loadRunOverflowCompactionHarness,
mockedCoerceToFailoverError,
mockedDescribeFailoverError,
mockedGlobalHookRunner,
mockedResolveFailoverStatus,
mockedContextEngine,
mockedCompactDirect,
mockedRunEmbeddedAttempt,
resetRunOverflowCompactionHarnessMocks,
mockedSessionLikelyHasOversizedToolResults,
mockedTruncateOversizedToolResultsInSession,
overflowBaseRunParams,
} from "./run.overflow-compaction.harness.js";
let runEmbeddedPiAgent: typeof import("./run.js").runEmbeddedPiAgent;
describe("timeout-triggered compaction", () => {
beforeAll(async () => {
({ runEmbeddedPiAgent } = await loadRunOverflowCompactionHarness());
});
beforeEach(() => {
resetRunOverflowCompactionHarnessMocks();
mockedRunEmbeddedAttempt.mockReset();
mockedCompactDirect.mockReset();
mockedCoerceToFailoverError.mockReset();
mockedDescribeFailoverError.mockReset();
mockedResolveFailoverStatus.mockReset();
mockedSessionLikelyHasOversizedToolResults.mockReset();
mockedTruncateOversizedToolResultsInSession.mockReset();
mockedGlobalHookRunner.runBeforeAgentStart.mockReset();
mockedGlobalHookRunner.runBeforeCompaction.mockReset();
mockedGlobalHookRunner.runAfterCompaction.mockReset();
mockedContextEngine.info.ownsCompaction = false;
mockedCompactDirect.mockResolvedValue({
ok: false,
compacted: false,
reason: "nothing to compact",
});
mockedCoerceToFailoverError.mockReturnValue(null);
mockedDescribeFailoverError.mockImplementation((err: unknown) => ({
message: err instanceof Error ? err.message : String(err),
reason: undefined,
status: undefined,
code: undefined,
}));
mockedSessionLikelyHasOversizedToolResults.mockReturnValue(false);
mockedTruncateOversizedToolResultsInSession.mockResolvedValue({
truncated: false,
truncatedCount: 0,
reason: "no oversized tool results",
});
mockedGlobalHookRunner.hasHooks.mockImplementation(() => false);
});
it("attempts compaction when LLM times out with high context usage (>65%)", async () => {
// First attempt: timeout with high usage (150k / 200k = 75%)
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
makeAttemptResult({
timedOut: true,
lastAssistant: {
usage: { total: 150000 },
} as never,
}),
);
// Compaction succeeds
mockedCompactDirect.mockResolvedValueOnce(
makeCompactionSuccess({
summary: "timeout recovery compaction",
tokensBefore: 150000,
tokensAfter: 80000,
}),
);
// Retry after compaction succeeds
mockedRunEmbeddedAttempt.mockResolvedValueOnce(makeAttemptResult({ promptError: null }));
const result = await runEmbeddedPiAgent(overflowBaseRunParams);
expect(mockedCompactDirect).toHaveBeenCalledTimes(1);
expect(mockedCompactDirect).toHaveBeenCalledWith(
expect.objectContaining({
sessionId: "test-session",
sessionFile: "/tmp/session.json",
tokenBudget: 200000,
force: true,
compactionTarget: "budget",
runtimeContext: expect.objectContaining({
trigger: "timeout_recovery",
attempt: 1,
maxAttempts: 1,
}),
}),
);
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2);
expect(result.meta.error).toBeUndefined();
});
it("retries the prompt after successful timeout compaction", async () => {
// First attempt: timeout with high usage
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
makeAttemptResult({
timedOut: true,
lastAssistant: {
usage: { total: 160000 },
} as never,
}),
);
// Compaction succeeds
mockedCompactDirect.mockResolvedValueOnce(
makeCompactionSuccess({
summary: "compacted for timeout",
tokensBefore: 160000,
tokensAfter: 60000,
}),
);
// Second attempt succeeds
mockedRunEmbeddedAttempt.mockResolvedValueOnce(makeAttemptResult({ promptError: null }));
const result = await runEmbeddedPiAgent(overflowBaseRunParams);
// Verify the loop continued (retry happened)
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2);
expect(result.meta.error).toBeUndefined();
});
it("falls through to normal handling when timeout compaction fails", async () => {
// Timeout with high usage
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
makeAttemptResult({
timedOut: true,
lastAssistant: {
usage: { total: 150000 },
} as never,
}),
);
// Compaction does not reduce context
mockedCompactDirect.mockResolvedValueOnce({
ok: false,
compacted: false,
reason: "nothing to compact",
});
const result = await runEmbeddedPiAgent(overflowBaseRunParams);
// Compaction was attempted but failed → falls through to timeout error payload
expect(mockedCompactDirect).toHaveBeenCalledTimes(1);
expect(result.payloads?.[0]?.isError).toBe(true);
expect(result.payloads?.[0]?.text).toContain("timed out");
});
it("does not attempt compaction when context usage is low", async () => {
// Timeout with low usage (20k / 200k = 10%)
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
makeAttemptResult({
timedOut: true,
lastAssistant: {
usage: { total: 20000 },
} as never,
}),
);
const result = await runEmbeddedPiAgent(overflowBaseRunParams);
// No compaction attempt for low usage
expect(mockedCompactDirect).not.toHaveBeenCalled();
expect(result.payloads?.[0]?.isError).toBe(true);
expect(result.payloads?.[0]?.text).toContain("timed out");
});
it("does not attempt compaction when aborted", async () => {
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
makeAttemptResult({
timedOut: true,
aborted: true,
lastAssistant: {
usage: { total: 180000 },
} as never,
}),
);
await runEmbeddedPiAgent(overflowBaseRunParams);
expect(mockedCompactDirect).not.toHaveBeenCalled();
});
it("does not attempt compaction when timedOutDuringCompaction is true", async () => {
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
makeAttemptResult({
timedOut: true,
timedOutDuringCompaction: true,
lastAssistant: {
usage: { total: 180000 },
} as never,
}),
);
await runEmbeddedPiAgent(overflowBaseRunParams);
// timedOutDuringCompaction skips timeout-triggered compaction
expect(mockedCompactDirect).not.toHaveBeenCalled();
});
it("catches thrown errors from contextEngine.compact during timeout recovery", async () => {
// Timeout with high usage
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
makeAttemptResult({
timedOut: true,
lastAssistant: {
usage: { total: 150000 },
} as never,
}),
);
// Compaction throws
mockedCompactDirect.mockRejectedValueOnce(new Error("engine crashed"));
const result = await runEmbeddedPiAgent(overflowBaseRunParams);
// Should not crash — falls through to normal timeout handling
expect(mockedCompactDirect).toHaveBeenCalledTimes(1);
expect(result.payloads?.[0]?.isError).toBe(true);
expect(result.payloads?.[0]?.text).toContain("timed out");
});
});

View File

@ -1047,6 +1047,75 @@ export async function runEmbeddedPiAgent(
? lastAssistant.errorMessage?.trim() || formattedAssistantErrorText
: undefined;
// ── Timeout-triggered compaction ──────────────────────────────────
// When the LLM times out with high context usage, compact before
// retrying to break the death spiral of repeated timeouts.
if (timedOut && !aborted && !timedOutDuringCompaction) {
const tokenUsedRatio =
lastTurnTotal != null && ctxInfo.tokens > 0 ? lastTurnTotal / ctxInfo.tokens : 0;
if (
tokenUsedRatio > 0.65 ||
(overflowCompactionAttempts === 0 && runLoopIterations > 1)
) {
const timeoutDiagId = createCompactionDiagId();
log.warn(
`[timeout-compaction] LLM timed out with high context usage (${Math.round(tokenUsedRatio * 100)}%); ` +
`attempting compaction before retry diagId=${timeoutDiagId}`,
);
let timeoutCompactResult: Awaited<ReturnType<typeof contextEngine.compact>>;
try {
timeoutCompactResult = await contextEngine.compact({
sessionId: params.sessionId,
sessionKey: params.sessionKey,
sessionFile: params.sessionFile,
tokenBudget: ctxInfo.tokens,
force: true,
compactionTarget: "budget",
runtimeContext: {
sessionKey: params.sessionKey,
messageChannel: params.messageChannel,
messageProvider: params.messageProvider,
agentAccountId: params.agentAccountId,
authProfileId: lastProfileId,
workspaceDir: resolvedWorkspace,
agentDir,
config: params.config,
skillsSnapshot: params.skillsSnapshot,
senderIsOwner: params.senderIsOwner,
provider,
model: modelId,
runId: params.runId,
thinkLevel,
reasoningLevel: params.reasoningLevel,
bashElevated: params.bashElevated,
extraSystemPrompt: params.extraSystemPrompt,
ownerNumbers: params.ownerNumbers,
trigger: "timeout_recovery",
diagId: timeoutDiagId,
attempt: 1,
maxAttempts: 1,
},
});
} catch (compactErr) {
log.warn(
`[timeout-compaction] contextEngine.compact() threw during timeout recovery for ${provider}/${modelId}: ${String(compactErr)}`,
);
timeoutCompactResult = { ok: false, compacted: false, reason: String(compactErr) };
}
if (timeoutCompactResult.compacted) {
autoCompactionCount += 1;
log.info(
`[timeout-compaction] compaction succeeded for ${provider}/${modelId}; retrying prompt`,
);
continue;
} else {
log.warn(
`[timeout-compaction] compaction did not reduce context for ${provider}/${modelId}; falling through to normal handling`,
);
}
}
}
const contextOverflowError = !aborted
? (() => {
if (promptError) {