Merge 87c4111a09fc632cfe6e59de735f98618338f3fb into 598f1826d8b2bc969aace2c6459824737667218c
This commit is contained in:
commit
647f9d226c
@ -114,6 +114,21 @@ describe("markAuthProfileFailure", () => {
|
||||
expect(reloaded.usageStats?.["anthropic:default"]?.cooldownUntil).toBe(firstCooldownUntil);
|
||||
});
|
||||
});
|
||||
it("records the model that triggered a rate-limit cooldown", async () => {
|
||||
await withAuthProfileStore(async ({ agentDir, store }) => {
|
||||
await markAuthProfileFailure({
|
||||
store,
|
||||
profileId: "anthropic:default",
|
||||
reason: "rate_limit",
|
||||
modelId: "claude-opus-4-6",
|
||||
agentDir,
|
||||
});
|
||||
|
||||
const stats = store.usageStats?.["anthropic:default"];
|
||||
expect(stats?.cooldownReason).toBe("rate_limit");
|
||||
expect(stats?.cooldownModel).toBe("claude-opus-4-6");
|
||||
});
|
||||
});
|
||||
it("records overloaded failures in the cooldown bucket", async () => {
|
||||
await withAuthProfileStore(async ({ agentDir, store }) => {
|
||||
await markAuthProfileFailure({
|
||||
|
||||
@ -51,6 +51,8 @@ export type AuthProfileFailureReason =
|
||||
export type ProfileUsageStats = {
|
||||
lastUsed?: number;
|
||||
cooldownUntil?: number;
|
||||
cooldownReason?: AuthProfileFailureReason;
|
||||
cooldownModel?: string;
|
||||
disabledUntil?: number;
|
||||
disabledReason?: AuthProfileFailureReason;
|
||||
errorCount?: number;
|
||||
|
||||
@ -94,6 +94,51 @@ describe("isProfileInCooldown", () => {
|
||||
expect(isProfileInCooldown(store, "anthropic:default")).toBe(true);
|
||||
});
|
||||
|
||||
it("does not block a different model when the cooldown came from a rate limit on another model", () => {
|
||||
const store = makeStore({
|
||||
"anthropic:default": {
|
||||
cooldownUntil: Date.now() + 60_000,
|
||||
cooldownReason: "rate_limit",
|
||||
cooldownModel: "claude-opus-4-6",
|
||||
},
|
||||
});
|
||||
expect(isProfileInCooldown(store, "anthropic:default", undefined, "claude-sonnet-4-6")).toBe(
|
||||
false,
|
||||
);
|
||||
expect(isProfileInCooldown(store, "anthropic:default", undefined, "claude-opus-4-6")).toBe(
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
it("still blocks a different model for non-rate-limit cooldowns", () => {
|
||||
const store = makeStore({
|
||||
"anthropic:default": {
|
||||
cooldownUntil: Date.now() + 60_000,
|
||||
cooldownReason: "overloaded",
|
||||
cooldownModel: "claude-opus-4-6",
|
||||
},
|
||||
});
|
||||
expect(isProfileInCooldown(store, "anthropic:default", undefined, "claude-sonnet-4-6")).toBe(
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
it("still blocks when a disabled window is active even if the stored rate-limit cooldown is for another model", () => {
|
||||
const store = makeStore({
|
||||
"anthropic:default": {
|
||||
cooldownUntil: Date.now() + 60_000,
|
||||
cooldownReason: "rate_limit",
|
||||
cooldownModel: "claude-opus-4-6",
|
||||
disabledUntil: Date.now() + 5 * 60_000,
|
||||
disabledReason: "billing",
|
||||
},
|
||||
});
|
||||
|
||||
expect(isProfileInCooldown(store, "anthropic:default", undefined, "claude-sonnet-4-6")).toBe(
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
it("returns false when cooldownUntil has passed", () => {
|
||||
const store = makeStore({
|
||||
"anthropic:default": { cooldownUntil: Date.now() - 1_000 },
|
||||
@ -339,6 +384,8 @@ describe("clearExpiredCooldowns", () => {
|
||||
cooldownUntil: Date.now() - 1_000,
|
||||
disabledUntil: future,
|
||||
disabledReason: "billing",
|
||||
cooldownReason: "rate_limit",
|
||||
cooldownModel: "claude-opus-4-6",
|
||||
errorCount: 5,
|
||||
failureCounts: { rate_limit: 3, billing: 2 },
|
||||
},
|
||||
@ -349,6 +396,8 @@ describe("clearExpiredCooldowns", () => {
|
||||
const stats = store.usageStats?.["anthropic:default"];
|
||||
// cooldownUntil cleared
|
||||
expect(stats?.cooldownUntil).toBeUndefined();
|
||||
expect(stats?.cooldownReason).toBeUndefined();
|
||||
expect(stats?.cooldownModel).toBeUndefined();
|
||||
// disabledUntil still active — not touched
|
||||
expect(stats?.disabledUntil).toBe(future);
|
||||
expect(stats?.disabledReason).toBe("billing");
|
||||
@ -538,7 +587,8 @@ describe("markAuthProfileFailure — active windows do not extend on retry", ()
|
||||
async function markFailureAt(params: {
|
||||
store: ReturnType<typeof makeStore>;
|
||||
now: number;
|
||||
reason: "rate_limit" | "billing" | "auth_permanent";
|
||||
reason: "rate_limit" | "billing" | "auth_permanent" | "overloaded";
|
||||
modelId?: string;
|
||||
}): Promise<void> {
|
||||
vi.useFakeTimers();
|
||||
vi.setSystemTime(params.now);
|
||||
@ -547,6 +597,7 @@ describe("markAuthProfileFailure — active windows do not extend on retry", ()
|
||||
store: params.store,
|
||||
profileId: "anthropic:default",
|
||||
reason: params.reason,
|
||||
modelId: params.modelId,
|
||||
});
|
||||
} finally {
|
||||
vi.useRealTimers();
|
||||
@ -608,6 +659,31 @@ describe("markAuthProfileFailure — active windows do not extend on retry", ()
|
||||
});
|
||||
}
|
||||
|
||||
it("keeps rate-limit cooldown metadata unchanged while the active window is still in effect", async () => {
|
||||
const now = 1_000_000;
|
||||
const existingStats: WindowStats = {
|
||||
cooldownUntil: now + 50 * 60 * 1000,
|
||||
cooldownReason: "rate_limit",
|
||||
cooldownModel: "claude-opus-4-6",
|
||||
errorCount: 3,
|
||||
failureCounts: { rate_limit: 3 },
|
||||
lastFailureAt: now - 10 * 60 * 1000,
|
||||
};
|
||||
const store = makeStore({ "anthropic:default": existingStats });
|
||||
|
||||
await markFailureAt({
|
||||
store,
|
||||
now,
|
||||
reason: "overloaded",
|
||||
modelId: "claude-sonnet-4-6",
|
||||
});
|
||||
|
||||
const stats = store.usageStats?.["anthropic:default"];
|
||||
expect(stats?.cooldownUntil).toBe(existingStats.cooldownUntil);
|
||||
expect(stats?.cooldownReason).toBe("rate_limit");
|
||||
expect(stats?.cooldownModel).toBe("claude-opus-4-6");
|
||||
});
|
||||
|
||||
// When a cooldown/disabled window expires, the error count resets to prevent
|
||||
// stale counters from escalating the next cooldown (the root cause of
|
||||
// infinite cooldown loops — see #40989). The next failure should compute
|
||||
|
||||
@ -44,6 +44,7 @@ export function isProfileInCooldown(
|
||||
store: AuthProfileStore,
|
||||
profileId: string,
|
||||
now?: number,
|
||||
forModel?: string,
|
||||
): boolean {
|
||||
if (isAuthCooldownBypassedForProvider(store.profiles[profileId]?.provider)) {
|
||||
return false;
|
||||
@ -54,6 +55,17 @@ export function isProfileInCooldown(
|
||||
}
|
||||
const unusableUntil = resolveProfileUnusableUntil(stats);
|
||||
const ts = now ?? Date.now();
|
||||
if (
|
||||
!isActiveUnusableWindow(stats.disabledUntil, ts) &&
|
||||
stats.cooldownReason === "rate_limit" &&
|
||||
typeof forModel === "string" &&
|
||||
forModel.trim().length > 0 &&
|
||||
typeof stats.cooldownModel === "string" &&
|
||||
stats.cooldownModel.trim().length > 0 &&
|
||||
stats.cooldownModel.trim() !== forModel.trim()
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
return unusableUntil ? ts < unusableUntil : false;
|
||||
}
|
||||
|
||||
@ -212,6 +224,8 @@ export function clearExpiredCooldowns(store: AuthProfileStore, now?: number): bo
|
||||
|
||||
if (cooldownExpired) {
|
||||
stats.cooldownUntil = undefined;
|
||||
stats.cooldownReason = undefined;
|
||||
stats.cooldownModel = undefined;
|
||||
profileMutated = true;
|
||||
}
|
||||
if (disabledExpired) {
|
||||
@ -397,6 +411,7 @@ function computeNextProfileUsageStats(params: {
|
||||
existing: ProfileUsageStats;
|
||||
now: number;
|
||||
reason: AuthProfileFailureReason;
|
||||
modelId?: string;
|
||||
cfgResolved: ResolvedAuthCooldownConfig;
|
||||
}): ProfileUsageStats {
|
||||
const windowMs = params.cfgResolved.failureWindowMs;
|
||||
@ -442,15 +457,36 @@ function computeNextProfileUsageStats(params: {
|
||||
recomputedUntil: params.now + backoffMs,
|
||||
});
|
||||
updatedStats.disabledReason = params.reason;
|
||||
updatedStats.cooldownReason = undefined;
|
||||
updatedStats.cooldownModel = undefined;
|
||||
} else {
|
||||
const backoffMs = calculateAuthProfileCooldownMs(nextErrorCount);
|
||||
const existingCooldownUntil = params.existing.cooldownUntil;
|
||||
const keepsExistingCooldownWindow =
|
||||
typeof existingCooldownUntil === "number" &&
|
||||
Number.isFinite(existingCooldownUntil) &&
|
||||
existingCooldownUntil > params.now;
|
||||
// Keep active cooldown windows immutable so retries within the window
|
||||
// cannot push recovery further out.
|
||||
updatedStats.cooldownUntil = keepActiveWindowOrRecompute({
|
||||
existingUntil: params.existing.cooldownUntil,
|
||||
existingUntil: existingCooldownUntil,
|
||||
now: params.now,
|
||||
recomputedUntil: params.now + backoffMs,
|
||||
});
|
||||
if (keepsExistingCooldownWindow) {
|
||||
// Keep metadata aligned with the preserved active window so a later
|
||||
// transient failure cannot widen a model-scoped cooldown back to profile-wide.
|
||||
updatedStats.cooldownReason = params.existing.cooldownReason;
|
||||
updatedStats.cooldownModel = params.existing.cooldownModel;
|
||||
} else {
|
||||
updatedStats.cooldownReason = params.reason;
|
||||
updatedStats.cooldownModel =
|
||||
params.reason === "rate_limit" &&
|
||||
typeof params.modelId === "string" &&
|
||||
params.modelId.trim().length > 0
|
||||
? params.modelId.trim()
|
||||
: undefined;
|
||||
}
|
||||
}
|
||||
|
||||
return updatedStats;
|
||||
@ -465,11 +501,12 @@ export async function markAuthProfileFailure(params: {
|
||||
store: AuthProfileStore;
|
||||
profileId: string;
|
||||
reason: AuthProfileFailureReason;
|
||||
modelId?: string;
|
||||
cfg?: OpenClawConfig;
|
||||
agentDir?: string;
|
||||
runId?: string;
|
||||
}): Promise<void> {
|
||||
const { store, profileId, reason, agentDir, cfg, runId } = params;
|
||||
const { store, profileId, reason, modelId, agentDir, cfg, runId } = params;
|
||||
const profile = store.profiles[profileId];
|
||||
if (!profile || isAuthCooldownBypassedForProvider(profile.provider)) {
|
||||
return;
|
||||
@ -497,6 +534,7 @@ export async function markAuthProfileFailure(params: {
|
||||
existing: previousStats ?? {},
|
||||
now,
|
||||
reason,
|
||||
modelId,
|
||||
cfgResolved,
|
||||
});
|
||||
nextStats = computed;
|
||||
@ -535,6 +573,7 @@ export async function markAuthProfileFailure(params: {
|
||||
existing: previousStats ?? {},
|
||||
now,
|
||||
reason,
|
||||
modelId,
|
||||
cfgResolved,
|
||||
});
|
||||
nextStats = computed;
|
||||
|
||||
@ -1211,6 +1211,50 @@ describe("runWithModelFallback", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("does not skip a provider when the stored rate-limit cooldown is for a different model", async () => {
|
||||
const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-test-"));
|
||||
const store: AuthProfileStore = {
|
||||
version: AUTH_STORE_VERSION,
|
||||
profiles: {
|
||||
"anthropic:default": { type: "api_key", provider: "anthropic", key: "test-key" },
|
||||
"groq:default": { type: "api_key", provider: "groq", key: "test-key" },
|
||||
},
|
||||
usageStats: {
|
||||
"anthropic:default": {
|
||||
cooldownUntil: Date.now() + 300000,
|
||||
cooldownReason: "rate_limit",
|
||||
cooldownModel: "claude-opus-4-6",
|
||||
},
|
||||
},
|
||||
};
|
||||
saveAuthProfileStore(store, tmpDir);
|
||||
|
||||
const cfg = makeCfg({
|
||||
agents: {
|
||||
defaults: {
|
||||
model: {
|
||||
primary: "anthropic/claude-sonnet-4-5",
|
||||
fallbacks: ["groq/llama-3.3-70b-versatile"],
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const run = vi.fn().mockResolvedValueOnce("sonnet success");
|
||||
|
||||
const result = await runWithModelFallback({
|
||||
cfg,
|
||||
provider: "anthropic",
|
||||
model: "claude-sonnet-4-5",
|
||||
run,
|
||||
agentDir: tmpDir,
|
||||
});
|
||||
|
||||
expect(result.result).toBe("sonnet success");
|
||||
expect(run).toHaveBeenCalledTimes(1);
|
||||
expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5");
|
||||
});
|
||||
|
||||
it("skips same-provider models on auth cooldown but still tries no-profile fallback providers", async () => {
|
||||
const { dir } = await makeAuthStoreWithCooldown("anthropic", "auth");
|
||||
const cfg = makeCfg({
|
||||
|
||||
@ -548,7 +548,9 @@ export async function runWithModelFallback<T>(params: {
|
||||
store: authStore,
|
||||
provider: candidate.provider,
|
||||
});
|
||||
const isAnyProfileAvailable = profileIds.some((id) => !isProfileInCooldown(authStore, id));
|
||||
const isAnyProfileAvailable = profileIds.some(
|
||||
(id) => !isProfileInCooldown(authStore, id, undefined, candidate.model),
|
||||
);
|
||||
|
||||
if (profileIds.length > 0 && !isAnyProfileAvailable) {
|
||||
// All profiles for this provider are in cooldown.
|
||||
|
||||
@ -242,6 +242,8 @@ const writeAuthStore = async (
|
||||
{
|
||||
lastUsed?: number;
|
||||
cooldownUntil?: number;
|
||||
cooldownReason?: AuthProfileFailureReason;
|
||||
cooldownModel?: string;
|
||||
disabledUntil?: number;
|
||||
disabledReason?: AuthProfileFailureReason;
|
||||
failureCounts?: Partial<Record<AuthProfileFailureReason, number>>;
|
||||
@ -356,6 +358,8 @@ async function readUsageStats(agentDir: string) {
|
||||
{
|
||||
lastUsed?: number;
|
||||
cooldownUntil?: number;
|
||||
cooldownReason?: AuthProfileFailureReason;
|
||||
cooldownModel?: string;
|
||||
disabledUntil?: number;
|
||||
disabledReason?: AuthProfileFailureReason;
|
||||
}
|
||||
@ -1150,6 +1154,43 @@ describe("runEmbeddedPiAgent auth profile rotation", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("does not block a locked profile when its rate-limit cooldown came from a different model", async () => {
|
||||
await withTimedAgentWorkspace(async ({ agentDir, workspaceDir, now }) => {
|
||||
await writeAuthStore(agentDir, {
|
||||
usageStats: {
|
||||
"openai:p1": {
|
||||
lastUsed: 1,
|
||||
cooldownUntil: now + 60 * 60 * 1000,
|
||||
cooldownReason: "rate_limit",
|
||||
cooldownModel: "mock-2",
|
||||
},
|
||||
"openai:p2": { lastUsed: 2 },
|
||||
},
|
||||
});
|
||||
|
||||
mockSingleSuccessfulAttempt();
|
||||
|
||||
const result = await runEmbeddedPiAgent({
|
||||
sessionId: "session:test",
|
||||
sessionKey: "agent:test:model-specific-cooldown-lock",
|
||||
sessionFile: path.join(workspaceDir, "session.jsonl"),
|
||||
workspaceDir,
|
||||
agentDir,
|
||||
config: makeConfig(),
|
||||
prompt: "hello",
|
||||
provider: "openai",
|
||||
model: "mock-1",
|
||||
authProfileId: "openai:p1",
|
||||
authProfileIdSource: "user",
|
||||
timeoutMs: 5_000,
|
||||
runId: "run:model-specific-cooldown-lock",
|
||||
});
|
||||
|
||||
expect(runEmbeddedAttemptMock).toHaveBeenCalledTimes(1);
|
||||
expect(result.payloads?.[0]?.text ?? "").toContain("ok");
|
||||
});
|
||||
});
|
||||
|
||||
it("treats agent-level fallbacks as configured when defaults have none", async () => {
|
||||
await withTimedAgentWorkspace(async ({ agentDir, workspaceDir, now }) => {
|
||||
await writeAuthStore(agentDir, {
|
||||
|
||||
@ -717,7 +717,7 @@ export async function runEmbeddedPiAgent(
|
||||
let nextIndex = profileIndex + 1;
|
||||
while (nextIndex < profileCandidates.length) {
|
||||
const candidate = profileCandidates[nextIndex];
|
||||
if (candidate && isProfileInCooldown(authStore, candidate)) {
|
||||
if (candidate && isProfileInCooldown(authStore, candidate, undefined, modelId)) {
|
||||
nextIndex += 1;
|
||||
continue;
|
||||
}
|
||||
@ -744,7 +744,9 @@ export async function runEmbeddedPiAgent(
|
||||
);
|
||||
const allAutoProfilesInCooldown =
|
||||
autoProfileCandidates.length > 0 &&
|
||||
autoProfileCandidates.every((candidate) => isProfileInCooldown(authStore, candidate));
|
||||
autoProfileCandidates.every((candidate) =>
|
||||
isProfileInCooldown(authStore, candidate, undefined, modelId),
|
||||
);
|
||||
const unavailableReason = allAutoProfilesInCooldown
|
||||
? (resolveProfilesUnavailableReason({
|
||||
store: authStore,
|
||||
@ -763,7 +765,9 @@ export async function runEmbeddedPiAgent(
|
||||
while (profileIndex < profileCandidates.length) {
|
||||
const candidate = profileCandidates[profileIndex];
|
||||
const inCooldown =
|
||||
candidate && candidate !== lockedProfileId && isProfileInCooldown(authStore, candidate);
|
||||
candidate &&
|
||||
candidate !== lockedProfileId &&
|
||||
isProfileInCooldown(authStore, candidate, undefined, modelId);
|
||||
if (inCooldown) {
|
||||
if (allowTransientCooldownProbe && !didTransientCooldownProbe) {
|
||||
didTransientCooldownProbe = true;
|
||||
@ -842,6 +846,7 @@ export async function runEmbeddedPiAgent(
|
||||
store: authStore,
|
||||
profileId,
|
||||
reason,
|
||||
modelId,
|
||||
cfg: params.config,
|
||||
agentDir,
|
||||
runId: params.runId,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user