diff --git a/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts b/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts index 8c1aef240f7..a192cb65ba5 100644 --- a/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts +++ b/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts @@ -252,6 +252,24 @@ const mockFailedThenSuccessfulAttempt = (errorMessage = "rate limit") => { ); }; +const mockPromptErrorThenSuccessfulAttempt = (errorMessage: string) => { + runEmbeddedAttemptMock + .mockResolvedValueOnce( + makeAttempt({ + promptError: new Error(errorMessage), + }), + ) + .mockResolvedValueOnce( + makeAttempt({ + assistantTexts: ["ok"], + lastAssistant: buildAssistant({ + stopReason: "stop", + content: [{ type: "text", text: "ok" }], + }), + }), + ); +}; + async function runAutoPinnedOpenAiTurn(params: { agentDir: string; workspaceDir: string; @@ -320,6 +338,28 @@ async function runAutoPinnedRotationCase(params: { }); } +async function runAutoPinnedPromptErrorRotationCase(params: { + errorMessage: string; + sessionKey: string; + runId: string; +}) { + runEmbeddedAttemptMock.mockClear(); + return withAgentWorkspace(async ({ agentDir, workspaceDir }) => { + await writeAuthStore(agentDir); + mockPromptErrorThenSuccessfulAttempt(params.errorMessage); + await runAutoPinnedOpenAiTurn({ + agentDir, + workspaceDir, + sessionKey: params.sessionKey, + runId: params.runId, + }); + + expect(runEmbeddedAttemptMock).toHaveBeenCalledTimes(2); + const usageStats = await readUsageStats(agentDir); + return { usageStats }; + }); +} + function mockSingleSuccessfulAttempt() { runEmbeddedAttemptMock.mockResolvedValueOnce( makeAttempt({ @@ -639,13 +679,24 @@ describe("runEmbeddedPiAgent auth profile rotation", () => { expect(typeof usageStats["openai:p2"]?.lastUsed).toBe("number"); }); - it("rotates for overloaded prompt failures across auto-pinned profiles", async () => { + it("rotates for overloaded assistant failures across auto-pinned profiles", async () => { const { usageStats } = await runAutoPinnedRotationCase({ errorMessage: '{"type":"error","error":{"type":"overloaded_error","message":"Overloaded"}}', sessionKey: "agent:test:overloaded-rotation", runId: "run:overloaded-rotation", }); expect(typeof usageStats["openai:p2"]?.lastUsed).toBe("number"); + expect(usageStats["openai:p1"]?.cooldownUntil).toBeUndefined(); + }); + + it("rotates for overloaded prompt failures across auto-pinned profiles", async () => { + const { usageStats } = await runAutoPinnedPromptErrorRotationCase({ + errorMessage: '{"type":"error","error":{"type":"overloaded_error","message":"Overloaded"}}', + sessionKey: "agent:test:overloaded-prompt-rotation", + runId: "run:overloaded-prompt-rotation", + }); + expect(typeof usageStats["openai:p2"]?.lastUsed).toBe("number"); + expect(usageStats["openai:p1"]?.cooldownUntil).toBeUndefined(); }); it("rotates on timeout without cooling down the timed-out profile", async () => { diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index a5b799471d2..a36410f033c 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -10,6 +10,7 @@ import { resolveOpenClawAgentDir } from "../agent-paths.js"; import { hasConfiguredModelFallbacks } from "../agent-scope.js"; import { isProfileInCooldown, + type AuthProfileFailureReason, markAuthProfileFailure, markAuthProfileGood, markAuthProfileUsed, @@ -41,6 +42,7 @@ import { isLikelyContextOverflowError, isFailoverAssistantError, isFailoverErrorMessage, + isOverloadedErrorMessage, parseImageSizeError, parseImageDimensionError, isRateLimitAssistantError, @@ -721,7 +723,7 @@ export async function runEmbeddedPiAgent( let runLoopIterations = 0; const maybeMarkAuthProfileFailure = async (failure: { profileId?: string; - reason?: Parameters[0]["reason"] | null; + reason?: AuthProfileFailureReason | null; config?: RunEmbeddedPiAgentParams["config"]; agentDir?: RunEmbeddedPiAgentParams["agentDir"]; }) => { @@ -737,6 +739,21 @@ export async function runEmbeddedPiAgent( agentDir, }); }; + const resolveAuthProfileFailureReason = ( + errorText: string, + failoverReason: FailoverReason | null, + ): AuthProfileFailureReason | null => { + if (!failoverReason || failoverReason === "timeout") { + return null; + } + // Overloaded provider responses currently stay on the rate_limit failover lane + // so existing retry/failover behavior keeps working, but they should not + // be recorded as auth-profile failures. + if (failoverReason === "rate_limit" && isOverloadedErrorMessage(errorText)) { + return null; + } + return failoverReason; + }; try { let authRetryPending = false; // Hoisted so the retry-limit error path can use the most recent API total. @@ -1145,9 +1162,13 @@ export async function runEmbeddedPiAgent( }; } const promptFailoverReason = classifyFailoverReason(errorText); + const promptProfileFailureReason = resolveAuthProfileFailureReason( + errorText, + promptFailoverReason, + ); await maybeMarkAuthProfileFailure({ profileId: lastProfileId, - reason: promptFailoverReason, + reason: promptProfileFailureReason, }); if ( isFailoverErrorMessage(errorText) && @@ -1198,6 +1219,10 @@ export async function runEmbeddedPiAgent( const billingFailure = isBillingAssistantError(lastAssistant); const failoverFailure = isFailoverAssistantError(lastAssistant); const assistantFailoverReason = classifyFailoverReason(lastAssistant?.errorMessage ?? ""); + const assistantProfileFailureReason = resolveAuthProfileFailureReason( + lastAssistant?.errorMessage ?? "", + assistantFailoverReason, + ); const cloudCodeAssistFormatError = attempt.cloudCodeAssistFormatError; const imageDimensionError = parseImageDimensionError(lastAssistant?.errorMessage ?? ""); @@ -1237,10 +1262,7 @@ export async function runEmbeddedPiAgent( if (shouldRotate) { if (lastProfileId) { - const reason = - timedOut || assistantFailoverReason === "timeout" - ? "timeout" - : (assistantFailoverReason ?? "unknown"); + const reason = timedOut ? "timeout" : assistantProfileFailureReason; // Skip cooldown for timeouts: a timeout is model/network-specific, // not an auth issue. Marking the profile would poison fallback models // on the same provider (e.g. gpt-5.3 timeout blocks gpt-5.2).