Merge 87c4111a09fc632cfe6e59de735f98618338f3fb into 598f1826d8b2bc969aace2c6459824737667218c

This commit is contained in:
Siew's Capital Jarvis 2026-03-21 12:05:31 +08:00 committed by GitHub
commit 647f9d226c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 231 additions and 7 deletions

View File

@ -114,6 +114,21 @@ describe("markAuthProfileFailure", () => {
expect(reloaded.usageStats?.["anthropic:default"]?.cooldownUntil).toBe(firstCooldownUntil);
});
});
it("records the model that triggered a rate-limit cooldown", async () => {
await withAuthProfileStore(async ({ agentDir, store }) => {
await markAuthProfileFailure({
store,
profileId: "anthropic:default",
reason: "rate_limit",
modelId: "claude-opus-4-6",
agentDir,
});
const stats = store.usageStats?.["anthropic:default"];
expect(stats?.cooldownReason).toBe("rate_limit");
expect(stats?.cooldownModel).toBe("claude-opus-4-6");
});
});
it("records overloaded failures in the cooldown bucket", async () => {
await withAuthProfileStore(async ({ agentDir, store }) => {
await markAuthProfileFailure({

View File

@ -51,6 +51,8 @@ export type AuthProfileFailureReason =
export type ProfileUsageStats = {
lastUsed?: number;
cooldownUntil?: number;
cooldownReason?: AuthProfileFailureReason;
cooldownModel?: string;
disabledUntil?: number;
disabledReason?: AuthProfileFailureReason;
errorCount?: number;

View File

@ -94,6 +94,51 @@ describe("isProfileInCooldown", () => {
expect(isProfileInCooldown(store, "anthropic:default")).toBe(true);
});
it("does not block a different model when the cooldown came from a rate limit on another model", () => {
const store = makeStore({
"anthropic:default": {
cooldownUntil: Date.now() + 60_000,
cooldownReason: "rate_limit",
cooldownModel: "claude-opus-4-6",
},
});
expect(isProfileInCooldown(store, "anthropic:default", undefined, "claude-sonnet-4-6")).toBe(
false,
);
expect(isProfileInCooldown(store, "anthropic:default", undefined, "claude-opus-4-6")).toBe(
true,
);
});
it("still blocks a different model for non-rate-limit cooldowns", () => {
const store = makeStore({
"anthropic:default": {
cooldownUntil: Date.now() + 60_000,
cooldownReason: "overloaded",
cooldownModel: "claude-opus-4-6",
},
});
expect(isProfileInCooldown(store, "anthropic:default", undefined, "claude-sonnet-4-6")).toBe(
true,
);
});
it("still blocks when a disabled window is active even if the stored rate-limit cooldown is for another model", () => {
const store = makeStore({
"anthropic:default": {
cooldownUntil: Date.now() + 60_000,
cooldownReason: "rate_limit",
cooldownModel: "claude-opus-4-6",
disabledUntil: Date.now() + 5 * 60_000,
disabledReason: "billing",
},
});
expect(isProfileInCooldown(store, "anthropic:default", undefined, "claude-sonnet-4-6")).toBe(
true,
);
});
it("returns false when cooldownUntil has passed", () => {
const store = makeStore({
"anthropic:default": { cooldownUntil: Date.now() - 1_000 },
@ -339,6 +384,8 @@ describe("clearExpiredCooldowns", () => {
cooldownUntil: Date.now() - 1_000,
disabledUntil: future,
disabledReason: "billing",
cooldownReason: "rate_limit",
cooldownModel: "claude-opus-4-6",
errorCount: 5,
failureCounts: { rate_limit: 3, billing: 2 },
},
@ -349,6 +396,8 @@ describe("clearExpiredCooldowns", () => {
const stats = store.usageStats?.["anthropic:default"];
// cooldownUntil cleared
expect(stats?.cooldownUntil).toBeUndefined();
expect(stats?.cooldownReason).toBeUndefined();
expect(stats?.cooldownModel).toBeUndefined();
// disabledUntil still active — not touched
expect(stats?.disabledUntil).toBe(future);
expect(stats?.disabledReason).toBe("billing");
@ -538,7 +587,8 @@ describe("markAuthProfileFailure — active windows do not extend on retry", ()
async function markFailureAt(params: {
store: ReturnType<typeof makeStore>;
now: number;
reason: "rate_limit" | "billing" | "auth_permanent";
reason: "rate_limit" | "billing" | "auth_permanent" | "overloaded";
modelId?: string;
}): Promise<void> {
vi.useFakeTimers();
vi.setSystemTime(params.now);
@ -547,6 +597,7 @@ describe("markAuthProfileFailure — active windows do not extend on retry", ()
store: params.store,
profileId: "anthropic:default",
reason: params.reason,
modelId: params.modelId,
});
} finally {
vi.useRealTimers();
@ -608,6 +659,31 @@ describe("markAuthProfileFailure — active windows do not extend on retry", ()
});
}
it("keeps rate-limit cooldown metadata unchanged while the active window is still in effect", async () => {
const now = 1_000_000;
const existingStats: WindowStats = {
cooldownUntil: now + 50 * 60 * 1000,
cooldownReason: "rate_limit",
cooldownModel: "claude-opus-4-6",
errorCount: 3,
failureCounts: { rate_limit: 3 },
lastFailureAt: now - 10 * 60 * 1000,
};
const store = makeStore({ "anthropic:default": existingStats });
await markFailureAt({
store,
now,
reason: "overloaded",
modelId: "claude-sonnet-4-6",
});
const stats = store.usageStats?.["anthropic:default"];
expect(stats?.cooldownUntil).toBe(existingStats.cooldownUntil);
expect(stats?.cooldownReason).toBe("rate_limit");
expect(stats?.cooldownModel).toBe("claude-opus-4-6");
});
// When a cooldown/disabled window expires, the error count resets to prevent
// stale counters from escalating the next cooldown (the root cause of
// infinite cooldown loops — see #40989). The next failure should compute

View File

@ -44,6 +44,7 @@ export function isProfileInCooldown(
store: AuthProfileStore,
profileId: string,
now?: number,
forModel?: string,
): boolean {
if (isAuthCooldownBypassedForProvider(store.profiles[profileId]?.provider)) {
return false;
@ -54,6 +55,17 @@ export function isProfileInCooldown(
}
const unusableUntil = resolveProfileUnusableUntil(stats);
const ts = now ?? Date.now();
if (
!isActiveUnusableWindow(stats.disabledUntil, ts) &&
stats.cooldownReason === "rate_limit" &&
typeof forModel === "string" &&
forModel.trim().length > 0 &&
typeof stats.cooldownModel === "string" &&
stats.cooldownModel.trim().length > 0 &&
stats.cooldownModel.trim() !== forModel.trim()
) {
return false;
}
return unusableUntil ? ts < unusableUntil : false;
}
@ -212,6 +224,8 @@ export function clearExpiredCooldowns(store: AuthProfileStore, now?: number): bo
if (cooldownExpired) {
stats.cooldownUntil = undefined;
stats.cooldownReason = undefined;
stats.cooldownModel = undefined;
profileMutated = true;
}
if (disabledExpired) {
@ -397,6 +411,7 @@ function computeNextProfileUsageStats(params: {
existing: ProfileUsageStats;
now: number;
reason: AuthProfileFailureReason;
modelId?: string;
cfgResolved: ResolvedAuthCooldownConfig;
}): ProfileUsageStats {
const windowMs = params.cfgResolved.failureWindowMs;
@ -442,15 +457,36 @@ function computeNextProfileUsageStats(params: {
recomputedUntil: params.now + backoffMs,
});
updatedStats.disabledReason = params.reason;
updatedStats.cooldownReason = undefined;
updatedStats.cooldownModel = undefined;
} else {
const backoffMs = calculateAuthProfileCooldownMs(nextErrorCount);
const existingCooldownUntil = params.existing.cooldownUntil;
const keepsExistingCooldownWindow =
typeof existingCooldownUntil === "number" &&
Number.isFinite(existingCooldownUntil) &&
existingCooldownUntil > params.now;
// Keep active cooldown windows immutable so retries within the window
// cannot push recovery further out.
updatedStats.cooldownUntil = keepActiveWindowOrRecompute({
existingUntil: params.existing.cooldownUntil,
existingUntil: existingCooldownUntil,
now: params.now,
recomputedUntil: params.now + backoffMs,
});
if (keepsExistingCooldownWindow) {
// Keep metadata aligned with the preserved active window so a later
// transient failure cannot widen a model-scoped cooldown back to profile-wide.
updatedStats.cooldownReason = params.existing.cooldownReason;
updatedStats.cooldownModel = params.existing.cooldownModel;
} else {
updatedStats.cooldownReason = params.reason;
updatedStats.cooldownModel =
params.reason === "rate_limit" &&
typeof params.modelId === "string" &&
params.modelId.trim().length > 0
? params.modelId.trim()
: undefined;
}
}
return updatedStats;
@ -465,11 +501,12 @@ export async function markAuthProfileFailure(params: {
store: AuthProfileStore;
profileId: string;
reason: AuthProfileFailureReason;
modelId?: string;
cfg?: OpenClawConfig;
agentDir?: string;
runId?: string;
}): Promise<void> {
const { store, profileId, reason, agentDir, cfg, runId } = params;
const { store, profileId, reason, modelId, agentDir, cfg, runId } = params;
const profile = store.profiles[profileId];
if (!profile || isAuthCooldownBypassedForProvider(profile.provider)) {
return;
@ -497,6 +534,7 @@ export async function markAuthProfileFailure(params: {
existing: previousStats ?? {},
now,
reason,
modelId,
cfgResolved,
});
nextStats = computed;
@ -535,6 +573,7 @@ export async function markAuthProfileFailure(params: {
existing: previousStats ?? {},
now,
reason,
modelId,
cfgResolved,
});
nextStats = computed;

View File

@ -1211,6 +1211,50 @@ describe("runWithModelFallback", () => {
});
});
it("does not skip a provider when the stored rate-limit cooldown is for a different model", async () => {
const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-test-"));
const store: AuthProfileStore = {
version: AUTH_STORE_VERSION,
profiles: {
"anthropic:default": { type: "api_key", provider: "anthropic", key: "test-key" },
"groq:default": { type: "api_key", provider: "groq", key: "test-key" },
},
usageStats: {
"anthropic:default": {
cooldownUntil: Date.now() + 300000,
cooldownReason: "rate_limit",
cooldownModel: "claude-opus-4-6",
},
},
};
saveAuthProfileStore(store, tmpDir);
const cfg = makeCfg({
agents: {
defaults: {
model: {
primary: "anthropic/claude-sonnet-4-5",
fallbacks: ["groq/llama-3.3-70b-versatile"],
},
},
},
});
const run = vi.fn().mockResolvedValueOnce("sonnet success");
const result = await runWithModelFallback({
cfg,
provider: "anthropic",
model: "claude-sonnet-4-5",
run,
agentDir: tmpDir,
});
expect(result.result).toBe("sonnet success");
expect(run).toHaveBeenCalledTimes(1);
expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5");
});
it("skips same-provider models on auth cooldown but still tries no-profile fallback providers", async () => {
const { dir } = await makeAuthStoreWithCooldown("anthropic", "auth");
const cfg = makeCfg({

View File

@ -548,7 +548,9 @@ export async function runWithModelFallback<T>(params: {
store: authStore,
provider: candidate.provider,
});
const isAnyProfileAvailable = profileIds.some((id) => !isProfileInCooldown(authStore, id));
const isAnyProfileAvailable = profileIds.some(
(id) => !isProfileInCooldown(authStore, id, undefined, candidate.model),
);
if (profileIds.length > 0 && !isAnyProfileAvailable) {
// All profiles for this provider are in cooldown.

View File

@ -242,6 +242,8 @@ const writeAuthStore = async (
{
lastUsed?: number;
cooldownUntil?: number;
cooldownReason?: AuthProfileFailureReason;
cooldownModel?: string;
disabledUntil?: number;
disabledReason?: AuthProfileFailureReason;
failureCounts?: Partial<Record<AuthProfileFailureReason, number>>;
@ -356,6 +358,8 @@ async function readUsageStats(agentDir: string) {
{
lastUsed?: number;
cooldownUntil?: number;
cooldownReason?: AuthProfileFailureReason;
cooldownModel?: string;
disabledUntil?: number;
disabledReason?: AuthProfileFailureReason;
}
@ -1150,6 +1154,43 @@ describe("runEmbeddedPiAgent auth profile rotation", () => {
});
});
it("does not block a locked profile when its rate-limit cooldown came from a different model", async () => {
await withTimedAgentWorkspace(async ({ agentDir, workspaceDir, now }) => {
await writeAuthStore(agentDir, {
usageStats: {
"openai:p1": {
lastUsed: 1,
cooldownUntil: now + 60 * 60 * 1000,
cooldownReason: "rate_limit",
cooldownModel: "mock-2",
},
"openai:p2": { lastUsed: 2 },
},
});
mockSingleSuccessfulAttempt();
const result = await runEmbeddedPiAgent({
sessionId: "session:test",
sessionKey: "agent:test:model-specific-cooldown-lock",
sessionFile: path.join(workspaceDir, "session.jsonl"),
workspaceDir,
agentDir,
config: makeConfig(),
prompt: "hello",
provider: "openai",
model: "mock-1",
authProfileId: "openai:p1",
authProfileIdSource: "user",
timeoutMs: 5_000,
runId: "run:model-specific-cooldown-lock",
});
expect(runEmbeddedAttemptMock).toHaveBeenCalledTimes(1);
expect(result.payloads?.[0]?.text ?? "").toContain("ok");
});
});
it("treats agent-level fallbacks as configured when defaults have none", async () => {
await withTimedAgentWorkspace(async ({ agentDir, workspaceDir, now }) => {
await writeAuthStore(agentDir, {

View File

@ -717,7 +717,7 @@ export async function runEmbeddedPiAgent(
let nextIndex = profileIndex + 1;
while (nextIndex < profileCandidates.length) {
const candidate = profileCandidates[nextIndex];
if (candidate && isProfileInCooldown(authStore, candidate)) {
if (candidate && isProfileInCooldown(authStore, candidate, undefined, modelId)) {
nextIndex += 1;
continue;
}
@ -744,7 +744,9 @@ export async function runEmbeddedPiAgent(
);
const allAutoProfilesInCooldown =
autoProfileCandidates.length > 0 &&
autoProfileCandidates.every((candidate) => isProfileInCooldown(authStore, candidate));
autoProfileCandidates.every((candidate) =>
isProfileInCooldown(authStore, candidate, undefined, modelId),
);
const unavailableReason = allAutoProfilesInCooldown
? (resolveProfilesUnavailableReason({
store: authStore,
@ -763,7 +765,9 @@ export async function runEmbeddedPiAgent(
while (profileIndex < profileCandidates.length) {
const candidate = profileCandidates[profileIndex];
const inCooldown =
candidate && candidate !== lockedProfileId && isProfileInCooldown(authStore, candidate);
candidate &&
candidate !== lockedProfileId &&
isProfileInCooldown(authStore, candidate, undefined, modelId);
if (inCooldown) {
if (allowTransientCooldownProbe && !didTransientCooldownProbe) {
didTransientCooldownProbe = true;
@ -842,6 +846,7 @@ export async function runEmbeddedPiAgent(
store: authStore,
profileId,
reason,
modelId,
cfg: params.config,
agentDir,
runId: params.runId,