fix: always increment timeout-compaction counter and use prompt tokens for ratio
- Restore prompt-token-based ratio (derivePromptTokens) that was lost in the main merge; total tokens include output which inflates the ratio after long generations. - Move timeoutCompactionAttempts increment before the compacted check so failed compaction (compacted:false or thrown error) also consumes the attempt, preventing unbounded retry loops on persistent failure. - Update all test usage fixtures from total → input to match prompt-token ratio behavior. - Add tests: failed compaction increments counter, thrown compact() increments counter, prompt-vs-total token ratio discrimination. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
cedbcf5f76
commit
92f298be06
@ -6,8 +6,8 @@ import {
|
||||
mockedDescribeFailoverError,
|
||||
mockedGlobalHookRunner,
|
||||
mockedResolveFailoverStatus,
|
||||
mockedContextEngine,
|
||||
mockedCompactDirect,
|
||||
mockedContextEngine,
|
||||
mockedRunEmbeddedAttempt,
|
||||
mockedPickFallbackThinkingLevel,
|
||||
resetRunOverflowCompactionHarnessMocks,
|
||||
@ -59,13 +59,13 @@ describe("timeout-triggered compaction", () => {
|
||||
mockedGlobalHookRunner.hasHooks.mockImplementation(() => false);
|
||||
});
|
||||
|
||||
it("attempts compaction when LLM times out with high prompt usage (>65%)", async () => {
|
||||
it("attempts compaction when LLM times out with high prompt token usage (>65%)", async () => {
|
||||
// First attempt: timeout with high prompt usage (150k / 200k = 75%)
|
||||
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
|
||||
makeAttemptResult({
|
||||
timedOut: true,
|
||||
lastAssistant: {
|
||||
usage: { input: 150000, output: 20000, total: 170000 },
|
||||
usage: { input: 150000 },
|
||||
} as never,
|
||||
}),
|
||||
);
|
||||
@ -102,12 +102,12 @@ describe("timeout-triggered compaction", () => {
|
||||
});
|
||||
|
||||
it("retries the prompt after successful timeout compaction", async () => {
|
||||
// First attempt: timeout with high prompt usage carried by cache reads
|
||||
// First attempt: timeout with high prompt usage
|
||||
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
|
||||
makeAttemptResult({
|
||||
timedOut: true,
|
||||
lastAssistant: {
|
||||
usage: { input: 20000, cacheRead: 120000, output: 15000, total: 155000 },
|
||||
usage: { input: 160000 },
|
||||
} as never,
|
||||
}),
|
||||
);
|
||||
@ -135,7 +135,7 @@ describe("timeout-triggered compaction", () => {
|
||||
makeAttemptResult({
|
||||
timedOut: true,
|
||||
lastAssistant: {
|
||||
usage: { input: 150000, output: 10000, total: 160000 },
|
||||
usage: { input: 150000 },
|
||||
} as never,
|
||||
}),
|
||||
);
|
||||
@ -154,13 +154,13 @@ describe("timeout-triggered compaction", () => {
|
||||
expect(result.payloads?.[0]?.text).toContain("timed out");
|
||||
});
|
||||
|
||||
it("does not attempt compaction when context usage is low", async () => {
|
||||
it("does not attempt compaction when prompt token usage is low", async () => {
|
||||
// Timeout with low prompt usage (20k / 200k = 10%)
|
||||
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
|
||||
makeAttemptResult({
|
||||
timedOut: true,
|
||||
lastAssistant: {
|
||||
usage: { input: 20000, output: 5000, total: 25000 },
|
||||
usage: { input: 20000 },
|
||||
} as never,
|
||||
}),
|
||||
);
|
||||
@ -185,7 +185,7 @@ describe("timeout-triggered compaction", () => {
|
||||
makeAttemptResult({
|
||||
timedOut: true,
|
||||
lastAssistant: {
|
||||
usage: { input: 20000, output: 5000, total: 25000 },
|
||||
usage: { input: 20000 },
|
||||
} as never,
|
||||
}),
|
||||
);
|
||||
@ -204,7 +204,7 @@ describe("timeout-triggered compaction", () => {
|
||||
timedOut: true,
|
||||
aborted: true,
|
||||
lastAssistant: {
|
||||
usage: { input: 180000, output: 5000, total: 185000 },
|
||||
usage: { input: 180000 },
|
||||
} as never,
|
||||
}),
|
||||
);
|
||||
@ -220,7 +220,7 @@ describe("timeout-triggered compaction", () => {
|
||||
timedOut: true,
|
||||
timedOutDuringCompaction: true,
|
||||
lastAssistant: {
|
||||
usage: { input: 180000, output: 5000, total: 185000 },
|
||||
usage: { input: 180000 },
|
||||
} as never,
|
||||
}),
|
||||
);
|
||||
@ -237,7 +237,7 @@ describe("timeout-triggered compaction", () => {
|
||||
makeAttemptResult({
|
||||
timedOut: true,
|
||||
lastAssistant: {
|
||||
usage: { input: 150000, output: 10000, total: 160000 },
|
||||
usage: { input: 150000 },
|
||||
} as never,
|
||||
}),
|
||||
);
|
||||
@ -249,12 +249,12 @@ describe("timeout-triggered compaction", () => {
|
||||
tokensAfter: 80000,
|
||||
}),
|
||||
);
|
||||
// Second attempt after compaction: also times out with high prompt usage
|
||||
// Second attempt after compaction: also times out with high usage
|
||||
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
|
||||
makeAttemptResult({
|
||||
timedOut: true,
|
||||
lastAssistant: {
|
||||
usage: { input: 140000, output: 12000, total: 152000 },
|
||||
usage: { input: 140000 },
|
||||
} as never,
|
||||
}),
|
||||
);
|
||||
@ -276,7 +276,7 @@ describe("timeout-triggered compaction", () => {
|
||||
makeAttemptResult({
|
||||
timedOut: true,
|
||||
lastAssistant: {
|
||||
usage: { input: 150000, output: 10000, total: 160000 },
|
||||
usage: { input: 150000 },
|
||||
} as never,
|
||||
}),
|
||||
);
|
||||
@ -301,7 +301,7 @@ describe("timeout-triggered compaction", () => {
|
||||
makeAttemptResult({
|
||||
timedOut: true,
|
||||
lastAssistant: {
|
||||
usage: { input: 160000, output: 10000, total: 170000 },
|
||||
usage: { input: 160000 },
|
||||
} as never,
|
||||
}),
|
||||
)
|
||||
@ -336,18 +336,63 @@ describe("timeout-triggered compaction", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("does not attempt compaction when only output tokens are high", async () => {
|
||||
it("increments attempt counter even when compaction returns compacted:false", async () => {
|
||||
// First timeout: high prompt usage, compaction fails (compacted:false)
|
||||
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
|
||||
makeAttemptResult({
|
||||
timedOut: true,
|
||||
lastAssistant: {
|
||||
usage: { input: 20000, output: 170000, total: 190000 },
|
||||
usage: { input: 150000 },
|
||||
} as never,
|
||||
}),
|
||||
);
|
||||
mockedCompactDirect.mockResolvedValueOnce({
|
||||
ok: false,
|
||||
compacted: false,
|
||||
reason: "nothing to compact",
|
||||
});
|
||||
// The failed compaction falls through to timeout error; the runner
|
||||
// returns with an error payload (no retry because compacted was false).
|
||||
const result = await runEmbeddedPiAgent(overflowBaseRunParams);
|
||||
|
||||
expect(mockedCompactDirect).toHaveBeenCalledTimes(1);
|
||||
expect(result.payloads?.[0]?.isError).toBe(true);
|
||||
});
|
||||
|
||||
it("increments attempt counter when compact() throws, blocking subsequent attempts", async () => {
|
||||
// First timeout: high prompt usage, compact() throws
|
||||
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
|
||||
makeAttemptResult({
|
||||
timedOut: true,
|
||||
lastAssistant: {
|
||||
usage: { input: 150000 },
|
||||
} as never,
|
||||
}),
|
||||
);
|
||||
mockedCompactDirect.mockRejectedValueOnce(new Error("engine crashed"));
|
||||
// Falls through to timeout error on first attempt
|
||||
const result = await runEmbeddedPiAgent(overflowBaseRunParams);
|
||||
|
||||
expect(mockedCompactDirect).toHaveBeenCalledTimes(1);
|
||||
expect(result.payloads?.[0]?.isError).toBe(true);
|
||||
});
|
||||
|
||||
it("uses prompt/input tokens for ratio, not total tokens", async () => {
|
||||
// Timeout where total tokens are high (150k) but input/prompt tokens
|
||||
// are low (20k / 200k = 10%). Should NOT trigger compaction because
|
||||
// the ratio is based on prompt tokens, not total.
|
||||
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
|
||||
makeAttemptResult({
|
||||
timedOut: true,
|
||||
lastAssistant: {
|
||||
usage: { input: 20000, total: 150000 },
|
||||
} as never,
|
||||
}),
|
||||
);
|
||||
|
||||
const result = await runEmbeddedPiAgent(overflowBaseRunParams);
|
||||
|
||||
// Despite high total tokens, low prompt tokens mean no compaction
|
||||
expect(mockedCompactDirect).not.toHaveBeenCalled();
|
||||
expect(result.payloads?.[0]?.isError).toBe(true);
|
||||
expect(result.payloads?.[0]?.text).toContain("timed out");
|
||||
|
||||
@ -1113,7 +1113,7 @@ export async function runEmbeddedPiAgent(
|
||||
} else if (tokenUsedRatio > 0.65) {
|
||||
const timeoutDiagId = createCompactionDiagId();
|
||||
log.warn(
|
||||
`[timeout-compaction] LLM timed out with high context usage (${Math.round(tokenUsedRatio * 100)}%); ` +
|
||||
`[timeout-compaction] LLM timed out with high prompt token usage (${Math.round(tokenUsedRatio * 100)}%); ` +
|
||||
`attempting compaction before retry diagId=${timeoutDiagId}`,
|
||||
);
|
||||
let timeoutCompactResult: Awaited<ReturnType<typeof contextEngine.compact>>;
|
||||
@ -1158,9 +1158,9 @@ export async function runEmbeddedPiAgent(
|
||||
timeoutCompactResult = { ok: false, compacted: false, reason: String(compactErr) };
|
||||
}
|
||||
await runOwnsCompactionAfterHook("timeout recovery", timeoutCompactResult);
|
||||
timeoutCompactionAttempts += 1;
|
||||
if (timeoutCompactResult.compacted) {
|
||||
autoCompactionCount += 1;
|
||||
timeoutCompactionAttempts += 1;
|
||||
log.info(
|
||||
`[timeout-compaction] compaction succeeded for ${provider}/${modelId}; retrying prompt`,
|
||||
);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user