From cedbcf5f76efcf30ae67c758c653dfc7795a04b0 Mon Sep 17 00:00:00 2001 From: Joey Krug Date: Sat, 14 Mar 2026 16:37:20 -0400 Subject: [PATCH] fix: use prompt tokens for timeout compaction ratio --- .../run.timeout-triggered-compaction.test.ts | 55 ++++++++++++------- src/agents/pi-embedded-runner/run.ts | 8 ++- 2 files changed, 43 insertions(+), 20 deletions(-) diff --git a/src/agents/pi-embedded-runner/run.timeout-triggered-compaction.test.ts b/src/agents/pi-embedded-runner/run.timeout-triggered-compaction.test.ts index b92c31ba602..8fda6b771e4 100644 --- a/src/agents/pi-embedded-runner/run.timeout-triggered-compaction.test.ts +++ b/src/agents/pi-embedded-runner/run.timeout-triggered-compaction.test.ts @@ -59,13 +59,13 @@ describe("timeout-triggered compaction", () => { mockedGlobalHookRunner.hasHooks.mockImplementation(() => false); }); - it("attempts compaction when LLM times out with high context usage (>65%)", async () => { - // First attempt: timeout with high usage (150k / 200k = 75%) + it("attempts compaction when LLM times out with high prompt usage (>65%)", async () => { + // First attempt: timeout with high prompt usage (150k / 200k = 75%) mockedRunEmbeddedAttempt.mockResolvedValueOnce( makeAttemptResult({ timedOut: true, lastAssistant: { - usage: { total: 150000 }, + usage: { input: 150000, output: 20000, total: 170000 }, } as never, }), ); @@ -102,12 +102,12 @@ describe("timeout-triggered compaction", () => { }); it("retries the prompt after successful timeout compaction", async () => { - // First attempt: timeout with high usage + // First attempt: timeout with high prompt usage carried by cache reads mockedRunEmbeddedAttempt.mockResolvedValueOnce( makeAttemptResult({ timedOut: true, lastAssistant: { - usage: { total: 160000 }, + usage: { input: 20000, cacheRead: 120000, output: 15000, total: 155000 }, } as never, }), ); @@ -130,12 +130,12 @@ describe("timeout-triggered compaction", () => { }); it("falls through to normal handling when timeout compaction fails", async () => { - // Timeout with high usage + // Timeout with high prompt usage mockedRunEmbeddedAttempt.mockResolvedValueOnce( makeAttemptResult({ timedOut: true, lastAssistant: { - usage: { total: 150000 }, + usage: { input: 150000, output: 10000, total: 160000 }, } as never, }), ); @@ -155,12 +155,12 @@ describe("timeout-triggered compaction", () => { }); it("does not attempt compaction when context usage is low", async () => { - // Timeout with low usage (20k / 200k = 10%) + // Timeout with low prompt usage (20k / 200k = 10%) mockedRunEmbeddedAttempt.mockResolvedValueOnce( makeAttemptResult({ timedOut: true, lastAssistant: { - usage: { total: 20000 }, + usage: { input: 20000, output: 5000, total: 25000 }, } as never, }), ); @@ -185,7 +185,7 @@ describe("timeout-triggered compaction", () => { makeAttemptResult({ timedOut: true, lastAssistant: { - usage: { total: 20000 }, + usage: { input: 20000, output: 5000, total: 25000 }, } as never, }), ); @@ -204,7 +204,7 @@ describe("timeout-triggered compaction", () => { timedOut: true, aborted: true, lastAssistant: { - usage: { total: 180000 }, + usage: { input: 180000, output: 5000, total: 185000 }, } as never, }), ); @@ -220,7 +220,7 @@ describe("timeout-triggered compaction", () => { timedOut: true, timedOutDuringCompaction: true, lastAssistant: { - usage: { total: 180000 }, + usage: { input: 180000, output: 5000, total: 185000 }, } as never, }), ); @@ -232,12 +232,12 @@ describe("timeout-triggered compaction", () => { }); it("falls through to failover rotation after max timeout compaction attempts", async () => { - // First attempt: timeout with high usage (150k / 200k = 75%) + // First attempt: timeout with high prompt usage (150k / 200k = 75%) mockedRunEmbeddedAttempt.mockResolvedValueOnce( makeAttemptResult({ timedOut: true, lastAssistant: { - usage: { total: 150000 }, + usage: { input: 150000, output: 10000, total: 160000 }, } as never, }), ); @@ -249,12 +249,12 @@ describe("timeout-triggered compaction", () => { tokensAfter: 80000, }), ); - // Second attempt after compaction: also times out with high usage + // Second attempt after compaction: also times out with high prompt usage mockedRunEmbeddedAttempt.mockResolvedValueOnce( makeAttemptResult({ timedOut: true, lastAssistant: { - usage: { total: 140000 }, + usage: { input: 140000, output: 12000, total: 152000 }, } as never, }), ); @@ -271,12 +271,12 @@ describe("timeout-triggered compaction", () => { }); it("catches thrown errors from contextEngine.compact during timeout recovery", async () => { - // Timeout with high usage + // Timeout with high prompt usage mockedRunEmbeddedAttempt.mockResolvedValueOnce( makeAttemptResult({ timedOut: true, lastAssistant: { - usage: { total: 150000 }, + usage: { input: 150000, output: 10000, total: 160000 }, } as never, }), ); @@ -301,7 +301,7 @@ describe("timeout-triggered compaction", () => { makeAttemptResult({ timedOut: true, lastAssistant: { - usage: { total: 160000 }, + usage: { input: 160000, output: 10000, total: 170000 }, } as never, }), ) @@ -335,4 +335,21 @@ describe("timeout-triggered compaction", () => { }), ); }); + + it("does not attempt compaction when only output tokens are high", async () => { + mockedRunEmbeddedAttempt.mockResolvedValueOnce( + makeAttemptResult({ + timedOut: true, + lastAssistant: { + usage: { input: 20000, output: 170000, total: 190000 }, + } as never, + }), + ); + + const result = await runEmbeddedPiAgent(overflowBaseRunParams); + + expect(mockedCompactDirect).not.toHaveBeenCalled(); + expect(result.payloads?.[0]?.isError).toBe(true); + expect(result.payloads?.[0]?.text).toContain("timed out"); + }); }); diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index f3b47d7dbb5..6c589554dca 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -1098,8 +1098,14 @@ export async function runEmbeddedPiAgent( // When the LLM times out with high context usage, compact before // retrying to break the death spiral of repeated timeouts. if (timedOut && !aborted && !timedOutDuringCompaction) { + // Only consider prompt-side tokens here. API totals include output + // tokens, which can make a long generation look like high context + // pressure even when the prompt itself was small. + const lastTurnPromptTokens = derivePromptTokens(lastRunPromptUsage); const tokenUsedRatio = - lastTurnTotal != null && ctxInfo.tokens > 0 ? lastTurnTotal / ctxInfo.tokens : 0; + lastTurnPromptTokens != null && ctxInfo.tokens > 0 + ? lastTurnPromptTokens / ctxInfo.tokens + : 0; if (timeoutCompactionAttempts >= MAX_TIMEOUT_COMPACTION_ATTEMPTS) { log.warn( `[timeout-compaction] already compacted ${timeoutCompactionAttempts} time(s) for timeouts; falling through to failover rotation`,