fix: always increment timeout-compaction counter and use prompt tokens for ratio

- Restore prompt-token-based ratio (derivePromptTokens) that was lost in
  the main merge; total tokens include output which inflates the ratio
  after long generations.
- Move timeoutCompactionAttempts increment before the compacted check so
  failed compaction (compacted:false or thrown error) also consumes the
  attempt, preventing unbounded retry loops on persistent failure.
- Update all test usage fixtures from total → input to match prompt-token
  ratio behavior.
- Add tests: failed compaction increments counter, thrown compact()
  increments counter, prompt-vs-total token ratio discrimination.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Joey Krug 2026-03-14 17:04:36 -04:00
parent cedbcf5f76
commit 92f298be06
2 changed files with 65 additions and 20 deletions

View File

@ -6,8 +6,8 @@ import {
mockedDescribeFailoverError,
mockedGlobalHookRunner,
mockedResolveFailoverStatus,
mockedContextEngine,
mockedCompactDirect,
mockedContextEngine,
mockedRunEmbeddedAttempt,
mockedPickFallbackThinkingLevel,
resetRunOverflowCompactionHarnessMocks,
@ -59,13 +59,13 @@ describe("timeout-triggered compaction", () => {
mockedGlobalHookRunner.hasHooks.mockImplementation(() => false);
});
it("attempts compaction when LLM times out with high prompt usage (>65%)", async () => {
it("attempts compaction when LLM times out with high prompt token usage (>65%)", async () => {
// First attempt: timeout with high prompt usage (150k / 200k = 75%)
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
makeAttemptResult({
timedOut: true,
lastAssistant: {
usage: { input: 150000, output: 20000, total: 170000 },
usage: { input: 150000 },
} as never,
}),
);
@ -102,12 +102,12 @@ describe("timeout-triggered compaction", () => {
});
it("retries the prompt after successful timeout compaction", async () => {
// First attempt: timeout with high prompt usage carried by cache reads
// First attempt: timeout with high prompt usage
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
makeAttemptResult({
timedOut: true,
lastAssistant: {
usage: { input: 20000, cacheRead: 120000, output: 15000, total: 155000 },
usage: { input: 160000 },
} as never,
}),
);
@ -135,7 +135,7 @@ describe("timeout-triggered compaction", () => {
makeAttemptResult({
timedOut: true,
lastAssistant: {
usage: { input: 150000, output: 10000, total: 160000 },
usage: { input: 150000 },
} as never,
}),
);
@ -154,13 +154,13 @@ describe("timeout-triggered compaction", () => {
expect(result.payloads?.[0]?.text).toContain("timed out");
});
it("does not attempt compaction when context usage is low", async () => {
it("does not attempt compaction when prompt token usage is low", async () => {
// Timeout with low prompt usage (20k / 200k = 10%)
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
makeAttemptResult({
timedOut: true,
lastAssistant: {
usage: { input: 20000, output: 5000, total: 25000 },
usage: { input: 20000 },
} as never,
}),
);
@ -185,7 +185,7 @@ describe("timeout-triggered compaction", () => {
makeAttemptResult({
timedOut: true,
lastAssistant: {
usage: { input: 20000, output: 5000, total: 25000 },
usage: { input: 20000 },
} as never,
}),
);
@ -204,7 +204,7 @@ describe("timeout-triggered compaction", () => {
timedOut: true,
aborted: true,
lastAssistant: {
usage: { input: 180000, output: 5000, total: 185000 },
usage: { input: 180000 },
} as never,
}),
);
@ -220,7 +220,7 @@ describe("timeout-triggered compaction", () => {
timedOut: true,
timedOutDuringCompaction: true,
lastAssistant: {
usage: { input: 180000, output: 5000, total: 185000 },
usage: { input: 180000 },
} as never,
}),
);
@ -237,7 +237,7 @@ describe("timeout-triggered compaction", () => {
makeAttemptResult({
timedOut: true,
lastAssistant: {
usage: { input: 150000, output: 10000, total: 160000 },
usage: { input: 150000 },
} as never,
}),
);
@ -249,12 +249,12 @@ describe("timeout-triggered compaction", () => {
tokensAfter: 80000,
}),
);
// Second attempt after compaction: also times out with high prompt usage
// Second attempt after compaction: also times out with high usage
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
makeAttemptResult({
timedOut: true,
lastAssistant: {
usage: { input: 140000, output: 12000, total: 152000 },
usage: { input: 140000 },
} as never,
}),
);
@ -276,7 +276,7 @@ describe("timeout-triggered compaction", () => {
makeAttemptResult({
timedOut: true,
lastAssistant: {
usage: { input: 150000, output: 10000, total: 160000 },
usage: { input: 150000 },
} as never,
}),
);
@ -301,7 +301,7 @@ describe("timeout-triggered compaction", () => {
makeAttemptResult({
timedOut: true,
lastAssistant: {
usage: { input: 160000, output: 10000, total: 170000 },
usage: { input: 160000 },
} as never,
}),
)
@ -336,18 +336,63 @@ describe("timeout-triggered compaction", () => {
);
});
it("does not attempt compaction when only output tokens are high", async () => {
it("increments attempt counter even when compaction returns compacted:false", async () => {
// First timeout: high prompt usage, compaction fails (compacted:false)
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
makeAttemptResult({
timedOut: true,
lastAssistant: {
usage: { input: 20000, output: 170000, total: 190000 },
usage: { input: 150000 },
} as never,
}),
);
mockedCompactDirect.mockResolvedValueOnce({
ok: false,
compacted: false,
reason: "nothing to compact",
});
// The failed compaction falls through to timeout error; the runner
// returns with an error payload (no retry because compacted was false).
const result = await runEmbeddedPiAgent(overflowBaseRunParams);
expect(mockedCompactDirect).toHaveBeenCalledTimes(1);
expect(result.payloads?.[0]?.isError).toBe(true);
});
it("increments attempt counter when compact() throws, blocking subsequent attempts", async () => {
// First timeout: high prompt usage, compact() throws
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
makeAttemptResult({
timedOut: true,
lastAssistant: {
usage: { input: 150000 },
} as never,
}),
);
mockedCompactDirect.mockRejectedValueOnce(new Error("engine crashed"));
// Falls through to timeout error on first attempt
const result = await runEmbeddedPiAgent(overflowBaseRunParams);
expect(mockedCompactDirect).toHaveBeenCalledTimes(1);
expect(result.payloads?.[0]?.isError).toBe(true);
});
it("uses prompt/input tokens for ratio, not total tokens", async () => {
// Timeout where total tokens are high (150k) but input/prompt tokens
// are low (20k / 200k = 10%). Should NOT trigger compaction because
// the ratio is based on prompt tokens, not total.
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
makeAttemptResult({
timedOut: true,
lastAssistant: {
usage: { input: 20000, total: 150000 },
} as never,
}),
);
const result = await runEmbeddedPiAgent(overflowBaseRunParams);
// Despite high total tokens, low prompt tokens mean no compaction
expect(mockedCompactDirect).not.toHaveBeenCalled();
expect(result.payloads?.[0]?.isError).toBe(true);
expect(result.payloads?.[0]?.text).toContain("timed out");

View File

@ -1113,7 +1113,7 @@ export async function runEmbeddedPiAgent(
} else if (tokenUsedRatio > 0.65) {
const timeoutDiagId = createCompactionDiagId();
log.warn(
`[timeout-compaction] LLM timed out with high context usage (${Math.round(tokenUsedRatio * 100)}%); ` +
`[timeout-compaction] LLM timed out with high prompt token usage (${Math.round(tokenUsedRatio * 100)}%); ` +
`attempting compaction before retry diagId=${timeoutDiagId}`,
);
let timeoutCompactResult: Awaited<ReturnType<typeof contextEngine.compact>>;
@ -1158,9 +1158,9 @@ export async function runEmbeddedPiAgent(
timeoutCompactResult = { ok: false, compacted: false, reason: String(compactErr) };
}
await runOwnsCompactionAfterHook("timeout recovery", timeoutCompactResult);
timeoutCompactionAttempts += 1;
if (timeoutCompactResult.compacted) {
autoCompactionCount += 1;
timeoutCompactionAttempts += 1;
log.info(
`[timeout-compaction] compaction succeeded for ${provider}/${modelId}; retrying prompt`,
);