fix: trigger compaction on LLM timeout with high context usage
When the LLM times out and context usage is high (>65% of the context window), the system retries with the same oversized context, causing a death spiral of repeated timeouts. This adds timeout-triggered compaction: after a timeout with high context usage, compact the context before retrying to break the cycle. Also fixes missing applyLocalNoAuthHeaderOverride mock in overflow compaction test shared mocks. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
2364e45fe4
commit
d7a92284ff
@ -0,0 +1,226 @@
|
||||
import { beforeAll, beforeEach, describe, expect, it } from "vitest";
|
||||
import { makeAttemptResult, makeCompactionSuccess } from "./run.overflow-compaction.fixture.js";
|
||||
import {
|
||||
loadRunOverflowCompactionHarness,
|
||||
mockedCoerceToFailoverError,
|
||||
mockedDescribeFailoverError,
|
||||
mockedGlobalHookRunner,
|
||||
mockedResolveFailoverStatus,
|
||||
mockedContextEngine,
|
||||
mockedCompactDirect,
|
||||
mockedRunEmbeddedAttempt,
|
||||
resetRunOverflowCompactionHarnessMocks,
|
||||
mockedSessionLikelyHasOversizedToolResults,
|
||||
mockedTruncateOversizedToolResultsInSession,
|
||||
overflowBaseRunParams,
|
||||
} from "./run.overflow-compaction.harness.js";
|
||||
|
||||
let runEmbeddedPiAgent: typeof import("./run.js").runEmbeddedPiAgent;
|
||||
|
||||
describe("timeout-triggered compaction", () => {
|
||||
beforeAll(async () => {
|
||||
({ runEmbeddedPiAgent } = await loadRunOverflowCompactionHarness());
|
||||
});
|
||||
|
||||
beforeEach(() => {
|
||||
resetRunOverflowCompactionHarnessMocks();
|
||||
mockedRunEmbeddedAttempt.mockReset();
|
||||
mockedCompactDirect.mockReset();
|
||||
mockedCoerceToFailoverError.mockReset();
|
||||
mockedDescribeFailoverError.mockReset();
|
||||
mockedResolveFailoverStatus.mockReset();
|
||||
mockedSessionLikelyHasOversizedToolResults.mockReset();
|
||||
mockedTruncateOversizedToolResultsInSession.mockReset();
|
||||
mockedGlobalHookRunner.runBeforeAgentStart.mockReset();
|
||||
mockedGlobalHookRunner.runBeforeCompaction.mockReset();
|
||||
mockedGlobalHookRunner.runAfterCompaction.mockReset();
|
||||
mockedContextEngine.info.ownsCompaction = false;
|
||||
mockedCompactDirect.mockResolvedValue({
|
||||
ok: false,
|
||||
compacted: false,
|
||||
reason: "nothing to compact",
|
||||
});
|
||||
mockedCoerceToFailoverError.mockReturnValue(null);
|
||||
mockedDescribeFailoverError.mockImplementation((err: unknown) => ({
|
||||
message: err instanceof Error ? err.message : String(err),
|
||||
reason: undefined,
|
||||
status: undefined,
|
||||
code: undefined,
|
||||
}));
|
||||
mockedSessionLikelyHasOversizedToolResults.mockReturnValue(false);
|
||||
mockedTruncateOversizedToolResultsInSession.mockResolvedValue({
|
||||
truncated: false,
|
||||
truncatedCount: 0,
|
||||
reason: "no oversized tool results",
|
||||
});
|
||||
mockedGlobalHookRunner.hasHooks.mockImplementation(() => false);
|
||||
});
|
||||
|
||||
it("attempts compaction when LLM times out with high context usage (>65%)", async () => {
|
||||
// First attempt: timeout with high usage (150k / 200k = 75%)
|
||||
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
|
||||
makeAttemptResult({
|
||||
timedOut: true,
|
||||
lastAssistant: {
|
||||
usage: { total: 150000 },
|
||||
} as never,
|
||||
}),
|
||||
);
|
||||
// Compaction succeeds
|
||||
mockedCompactDirect.mockResolvedValueOnce(
|
||||
makeCompactionSuccess({
|
||||
summary: "timeout recovery compaction",
|
||||
tokensBefore: 150000,
|
||||
tokensAfter: 80000,
|
||||
}),
|
||||
);
|
||||
// Retry after compaction succeeds
|
||||
mockedRunEmbeddedAttempt.mockResolvedValueOnce(makeAttemptResult({ promptError: null }));
|
||||
|
||||
const result = await runEmbeddedPiAgent(overflowBaseRunParams);
|
||||
|
||||
expect(mockedCompactDirect).toHaveBeenCalledTimes(1);
|
||||
expect(mockedCompactDirect).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
sessionId: "test-session",
|
||||
sessionFile: "/tmp/session.json",
|
||||
tokenBudget: 200000,
|
||||
force: true,
|
||||
compactionTarget: "budget",
|
||||
runtimeContext: expect.objectContaining({
|
||||
trigger: "timeout_recovery",
|
||||
attempt: 1,
|
||||
maxAttempts: 1,
|
||||
}),
|
||||
}),
|
||||
);
|
||||
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2);
|
||||
expect(result.meta.error).toBeUndefined();
|
||||
});
|
||||
|
||||
it("retries the prompt after successful timeout compaction", async () => {
|
||||
// First attempt: timeout with high usage
|
||||
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
|
||||
makeAttemptResult({
|
||||
timedOut: true,
|
||||
lastAssistant: {
|
||||
usage: { total: 160000 },
|
||||
} as never,
|
||||
}),
|
||||
);
|
||||
// Compaction succeeds
|
||||
mockedCompactDirect.mockResolvedValueOnce(
|
||||
makeCompactionSuccess({
|
||||
summary: "compacted for timeout",
|
||||
tokensBefore: 160000,
|
||||
tokensAfter: 60000,
|
||||
}),
|
||||
);
|
||||
// Second attempt succeeds
|
||||
mockedRunEmbeddedAttempt.mockResolvedValueOnce(makeAttemptResult({ promptError: null }));
|
||||
|
||||
const result = await runEmbeddedPiAgent(overflowBaseRunParams);
|
||||
|
||||
// Verify the loop continued (retry happened)
|
||||
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2);
|
||||
expect(result.meta.error).toBeUndefined();
|
||||
});
|
||||
|
||||
it("falls through to normal handling when timeout compaction fails", async () => {
|
||||
// Timeout with high usage
|
||||
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
|
||||
makeAttemptResult({
|
||||
timedOut: true,
|
||||
lastAssistant: {
|
||||
usage: { total: 150000 },
|
||||
} as never,
|
||||
}),
|
||||
);
|
||||
// Compaction does not reduce context
|
||||
mockedCompactDirect.mockResolvedValueOnce({
|
||||
ok: false,
|
||||
compacted: false,
|
||||
reason: "nothing to compact",
|
||||
});
|
||||
|
||||
const result = await runEmbeddedPiAgent(overflowBaseRunParams);
|
||||
|
||||
// Compaction was attempted but failed → falls through to timeout error payload
|
||||
expect(mockedCompactDirect).toHaveBeenCalledTimes(1);
|
||||
expect(result.payloads?.[0]?.isError).toBe(true);
|
||||
expect(result.payloads?.[0]?.text).toContain("timed out");
|
||||
});
|
||||
|
||||
it("does not attempt compaction when context usage is low", async () => {
|
||||
// Timeout with low usage (20k / 200k = 10%)
|
||||
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
|
||||
makeAttemptResult({
|
||||
timedOut: true,
|
||||
lastAssistant: {
|
||||
usage: { total: 20000 },
|
||||
} as never,
|
||||
}),
|
||||
);
|
||||
|
||||
const result = await runEmbeddedPiAgent(overflowBaseRunParams);
|
||||
|
||||
// No compaction attempt for low usage
|
||||
expect(mockedCompactDirect).not.toHaveBeenCalled();
|
||||
expect(result.payloads?.[0]?.isError).toBe(true);
|
||||
expect(result.payloads?.[0]?.text).toContain("timed out");
|
||||
});
|
||||
|
||||
it("does not attempt compaction when aborted", async () => {
|
||||
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
|
||||
makeAttemptResult({
|
||||
timedOut: true,
|
||||
aborted: true,
|
||||
lastAssistant: {
|
||||
usage: { total: 180000 },
|
||||
} as never,
|
||||
}),
|
||||
);
|
||||
|
||||
await runEmbeddedPiAgent(overflowBaseRunParams);
|
||||
|
||||
expect(mockedCompactDirect).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("does not attempt compaction when timedOutDuringCompaction is true", async () => {
|
||||
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
|
||||
makeAttemptResult({
|
||||
timedOut: true,
|
||||
timedOutDuringCompaction: true,
|
||||
lastAssistant: {
|
||||
usage: { total: 180000 },
|
||||
} as never,
|
||||
}),
|
||||
);
|
||||
|
||||
await runEmbeddedPiAgent(overflowBaseRunParams);
|
||||
|
||||
// timedOutDuringCompaction skips timeout-triggered compaction
|
||||
expect(mockedCompactDirect).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("catches thrown errors from contextEngine.compact during timeout recovery", async () => {
|
||||
// Timeout with high usage
|
||||
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
|
||||
makeAttemptResult({
|
||||
timedOut: true,
|
||||
lastAssistant: {
|
||||
usage: { total: 150000 },
|
||||
} as never,
|
||||
}),
|
||||
);
|
||||
// Compaction throws
|
||||
mockedCompactDirect.mockRejectedValueOnce(new Error("engine crashed"));
|
||||
|
||||
const result = await runEmbeddedPiAgent(overflowBaseRunParams);
|
||||
|
||||
// Should not crash — falls through to normal timeout handling
|
||||
expect(mockedCompactDirect).toHaveBeenCalledTimes(1);
|
||||
expect(result.payloads?.[0]?.isError).toBe(true);
|
||||
expect(result.payloads?.[0]?.text).toContain("timed out");
|
||||
});
|
||||
});
|
||||
@ -1047,6 +1047,75 @@ export async function runEmbeddedPiAgent(
|
||||
? lastAssistant.errorMessage?.trim() || formattedAssistantErrorText
|
||||
: undefined;
|
||||
|
||||
// ── Timeout-triggered compaction ──────────────────────────────────
|
||||
// When the LLM times out with high context usage, compact before
|
||||
// retrying to break the death spiral of repeated timeouts.
|
||||
if (timedOut && !aborted && !timedOutDuringCompaction) {
|
||||
const tokenUsedRatio =
|
||||
lastTurnTotal != null && ctxInfo.tokens > 0 ? lastTurnTotal / ctxInfo.tokens : 0;
|
||||
if (
|
||||
tokenUsedRatio > 0.65 ||
|
||||
(overflowCompactionAttempts === 0 && runLoopIterations > 1)
|
||||
) {
|
||||
const timeoutDiagId = createCompactionDiagId();
|
||||
log.warn(
|
||||
`[timeout-compaction] LLM timed out with high context usage (${Math.round(tokenUsedRatio * 100)}%); ` +
|
||||
`attempting compaction before retry diagId=${timeoutDiagId}`,
|
||||
);
|
||||
let timeoutCompactResult: Awaited<ReturnType<typeof contextEngine.compact>>;
|
||||
try {
|
||||
timeoutCompactResult = await contextEngine.compact({
|
||||
sessionId: params.sessionId,
|
||||
sessionKey: params.sessionKey,
|
||||
sessionFile: params.sessionFile,
|
||||
tokenBudget: ctxInfo.tokens,
|
||||
force: true,
|
||||
compactionTarget: "budget",
|
||||
runtimeContext: {
|
||||
sessionKey: params.sessionKey,
|
||||
messageChannel: params.messageChannel,
|
||||
messageProvider: params.messageProvider,
|
||||
agentAccountId: params.agentAccountId,
|
||||
authProfileId: lastProfileId,
|
||||
workspaceDir: resolvedWorkspace,
|
||||
agentDir,
|
||||
config: params.config,
|
||||
skillsSnapshot: params.skillsSnapshot,
|
||||
senderIsOwner: params.senderIsOwner,
|
||||
provider,
|
||||
model: modelId,
|
||||
runId: params.runId,
|
||||
thinkLevel,
|
||||
reasoningLevel: params.reasoningLevel,
|
||||
bashElevated: params.bashElevated,
|
||||
extraSystemPrompt: params.extraSystemPrompt,
|
||||
ownerNumbers: params.ownerNumbers,
|
||||
trigger: "timeout_recovery",
|
||||
diagId: timeoutDiagId,
|
||||
attempt: 1,
|
||||
maxAttempts: 1,
|
||||
},
|
||||
});
|
||||
} catch (compactErr) {
|
||||
log.warn(
|
||||
`[timeout-compaction] contextEngine.compact() threw during timeout recovery for ${provider}/${modelId}: ${String(compactErr)}`,
|
||||
);
|
||||
timeoutCompactResult = { ok: false, compacted: false, reason: String(compactErr) };
|
||||
}
|
||||
if (timeoutCompactResult.compacted) {
|
||||
autoCompactionCount += 1;
|
||||
log.info(
|
||||
`[timeout-compaction] compaction succeeded for ${provider}/${modelId}; retrying prompt`,
|
||||
);
|
||||
continue;
|
||||
} else {
|
||||
log.warn(
|
||||
`[timeout-compaction] compaction did not reduce context for ${provider}/${modelId}; falling through to normal handling`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const contextOverflowError = !aborted
|
||||
? (() => {
|
||||
if (promptError) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user