From 890dbf523f4d03c83bb353691b324ce403ad5a6e Mon Sep 17 00:00:00 2001 From: Altay Date: Fri, 6 Mar 2026 15:47:23 +0300 Subject: [PATCH] fix(agents): tighten overload probe and backoff state --- ...pi-agent.auth-profile-rotation.e2e.test.ts | 66 +++++++++++++++++++ src/agents/pi-embedded-runner/run.ts | 1 + src/commands/models/list.probe.test.ts | 1 + src/commands/models/list.probe.ts | 2 +- 4 files changed, 69 insertions(+), 1 deletion(-) diff --git a/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts b/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts index 4b9f5d1e090..581f73bd75b 100644 --- a/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts +++ b/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts @@ -707,6 +707,15 @@ describe("runEmbeddedPiAgent auth profile rotation", () => { expect(typeof usageStats["openai:p2"]?.lastUsed).toBe("number"); expect(usageStats["openai:p1"]?.cooldownUntil).toBeUndefined(); expect(computeBackoffMock).toHaveBeenCalledTimes(1); + expect(computeBackoffMock).toHaveBeenCalledWith( + expect.objectContaining({ + initialMs: 250, + maxMs: 1500, + factor: 2, + jitter: 0.2, + }), + 1, + ); expect(sleepWithAbortMock).toHaveBeenCalledTimes(1); expect(sleepWithAbortMock).toHaveBeenCalledWith(321, undefined); }); @@ -720,6 +729,15 @@ describe("runEmbeddedPiAgent auth profile rotation", () => { expect(typeof usageStats["openai:p2"]?.lastUsed).toBe("number"); expect(usageStats["openai:p1"]?.cooldownUntil).toBeUndefined(); expect(computeBackoffMock).toHaveBeenCalledTimes(1); + expect(computeBackoffMock).toHaveBeenCalledWith( + expect.objectContaining({ + initialMs: 250, + maxMs: 1500, + factor: 2, + jitter: 0.2, + }), + 1, + ); expect(sleepWithAbortMock).toHaveBeenCalledTimes(1); expect(sleepWithAbortMock).toHaveBeenCalledWith(321, undefined); }); @@ -746,6 +764,54 @@ describe("runEmbeddedPiAgent auth profile rotation", () => { expect(usageStats["openai:p1"]?.cooldownUntil).toBeUndefined(); }); + it("resets overload failover backoff after a successful turn", async () => { + await withAgentWorkspace(async ({ agentDir, workspaceDir }) => { + await writeAuthStore(agentDir); + + mockFailedThenSuccessfulAttempt( + '{"type":"error","error":{"type":"overloaded_error","message":"Overloaded"}}', + ); + await runAutoPinnedOpenAiTurn({ + agentDir, + workspaceDir, + sessionKey: "agent:test:overloaded-backoff-reset-1", + runId: "run:overloaded-backoff-reset-1", + }); + + mockFailedThenSuccessfulAttempt( + '{"type":"error","error":{"type":"overloaded_error","message":"Overloaded"}}', + ); + await runAutoPinnedOpenAiTurn({ + agentDir, + workspaceDir, + sessionKey: "agent:test:overloaded-backoff-reset-2", + runId: "run:overloaded-backoff-reset-2", + }); + + expect(computeBackoffMock).toHaveBeenCalledTimes(2); + expect(computeBackoffMock).toHaveBeenNthCalledWith( + 1, + expect.objectContaining({ + initialMs: 250, + maxMs: 1500, + factor: 2, + jitter: 0.2, + }), + 1, + ); + expect(computeBackoffMock).toHaveBeenNthCalledWith( + 2, + expect.objectContaining({ + initialMs: 250, + maxMs: 1500, + factor: 2, + jitter: 0.2, + }), + 1, + ); + }); + }); + it("does not rotate for compaction timeouts", async () => { await withAgentWorkspace(async ({ agentDir, workspaceDir }) => { await writeAuthStore(agentDir); diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index 019d3e2efc1..8eee31ce77b 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -1400,6 +1400,7 @@ export async function runEmbeddedPiAgent( `embedded run done: runId=${params.runId} sessionId=${params.sessionId} durationMs=${Date.now() - started} aborted=${aborted}`, ); if (lastProfileId) { + overloadFailoverAttempts = 0; await markAuthProfileGood({ store: authStore, provider, diff --git a/src/commands/models/list.probe.test.ts b/src/commands/models/list.probe.test.ts index 55c5ef064f3..70ffde1dd65 100644 --- a/src/commands/models/list.probe.test.ts +++ b/src/commands/models/list.probe.test.ts @@ -9,6 +9,7 @@ describe("mapFailoverReasonToProbeStatus", () => { it("keeps existing failover reason mappings", () => { expect(mapFailoverReasonToProbeStatus("auth")).toBe("auth"); expect(mapFailoverReasonToProbeStatus("rate_limit")).toBe("rate_limit"); + expect(mapFailoverReasonToProbeStatus("overloaded")).toBe("rate_limit"); expect(mapFailoverReasonToProbeStatus("billing")).toBe("billing"); expect(mapFailoverReasonToProbeStatus("timeout")).toBe("timeout"); expect(mapFailoverReasonToProbeStatus("format")).toBe("format"); diff --git a/src/commands/models/list.probe.ts b/src/commands/models/list.probe.ts index 433c005077d..8a2ec87adcc 100644 --- a/src/commands/models/list.probe.ts +++ b/src/commands/models/list.probe.ts @@ -106,7 +106,7 @@ export function mapFailoverReasonToProbeStatus(reason?: string | null): AuthProb // surface in the auth bucket instead of showing as unknown. return "auth"; } - if (reason === "rate_limit") { + if (reason === "rate_limit" || reason === "overloaded") { return "rate_limit"; } if (reason === "billing") {