Mykyta Bozhenko 448394a0de fix(agent): Enable model fallback for prompt-phase quota/rate limit errors
When a prompt submission fails with quota or rate limit errors, throw
FailoverError instead of the raw promptError. This enables the model
fallback system to try alternative models.

Previously, rate limit errors during the prompt phase (before streaming)
were thrown directly, bypassing fallback. Only response-phase errors
triggered model fallback.

Now checks if fallback models are configured and the error is failover-
eligible. If so, wraps in FailoverError to trigger the fallback chain.
2026-01-18 01:29:48 +00:00

471 lines
18 KiB
TypeScript

import fs from "node:fs/promises";
import type { ThinkLevel } from "../../auto-reply/thinking.js";
import { enqueueCommandInLane } from "../../process/command-queue.js";
import { resolveUserPath } from "../../utils.js";
import { isMarkdownCapableMessageChannel } from "../../utils/message-channel.js";
import { resolveClawdbotAgentDir } from "../agent-paths.js";
import {
markAuthProfileFailure,
markAuthProfileGood,
markAuthProfileUsed,
} from "../auth-profiles.js";
import {
CONTEXT_WINDOW_HARD_MIN_TOKENS,
CONTEXT_WINDOW_WARN_BELOW_TOKENS,
evaluateContextWindowGuard,
resolveContextWindowInfo,
} from "../context-window-guard.js";
import { DEFAULT_CONTEXT_TOKENS, DEFAULT_MODEL, DEFAULT_PROVIDER } from "../defaults.js";
import { FailoverError, resolveFailoverStatus } from "../failover-error.js";
import {
ensureAuthProfileStore,
getApiKeyForModel,
resolveAuthProfileOrder,
} from "../model-auth.js";
import { ensureClawdbotModelsJson } from "../models-config.js";
import {
classifyFailoverReason,
formatAssistantErrorText,
isAuthAssistantError,
isCompactionFailureError,
isContextOverflowError,
isFailoverAssistantError,
isFailoverErrorMessage,
isRateLimitAssistantError,
isTimeoutErrorMessage,
pickFallbackThinkingLevel,
} from "../pi-embedded-helpers.js";
import { normalizeUsage, type UsageLike } from "../usage.js";
import { resolveGlobalLane, resolveSessionLane } from "./lanes.js";
import { log } from "./logger.js";
import { resolveModel } from "./model.js";
import { runEmbeddedAttempt } from "./run/attempt.js";
import type { RunEmbeddedPiAgentParams } from "./run/params.js";
import { buildEmbeddedRunPayloads } from "./run/payloads.js";
import type { EmbeddedPiAgentMeta, EmbeddedPiRunResult } from "./types.js";
import { describeUnknownError } from "./utils.js";
type ApiKeyInfo = {
apiKey: string;
profileId?: string;
source: string;
};
export async function runEmbeddedPiAgent(
params: RunEmbeddedPiAgentParams,
): Promise<EmbeddedPiRunResult> {
const sessionLane = resolveSessionLane(params.sessionKey?.trim() || params.sessionId);
const globalLane = resolveGlobalLane(params.lane);
const enqueueGlobal =
params.enqueue ?? ((task, opts) => enqueueCommandInLane(globalLane, task, opts));
const channelHint = params.messageChannel ?? params.messageProvider;
const resolvedToolResultFormat =
params.toolResultFormat ??
(channelHint
? isMarkdownCapableMessageChannel(channelHint)
? "markdown"
: "plain"
: "markdown");
return enqueueCommandInLane(sessionLane, () =>
enqueueGlobal(async () => {
const started = Date.now();
const resolvedWorkspace = resolveUserPath(params.workspaceDir);
const prevCwd = process.cwd();
const provider = (params.provider ?? DEFAULT_PROVIDER).trim() || DEFAULT_PROVIDER;
const modelId = (params.model ?? DEFAULT_MODEL).trim() || DEFAULT_MODEL;
const agentDir = params.agentDir ?? resolveClawdbotAgentDir();
await ensureClawdbotModelsJson(params.config, agentDir);
const { model, error, authStorage, modelRegistry } = resolveModel(
provider,
modelId,
agentDir,
params.config,
);
if (!model) {
throw new Error(error ?? `Unknown model: ${provider}/${modelId}`);
}
const ctxInfo = resolveContextWindowInfo({
cfg: params.config,
provider,
modelId,
modelContextWindow: model.contextWindow,
defaultTokens: DEFAULT_CONTEXT_TOKENS,
});
const ctxGuard = evaluateContextWindowGuard({
info: ctxInfo,
warnBelowTokens: CONTEXT_WINDOW_WARN_BELOW_TOKENS,
hardMinTokens: CONTEXT_WINDOW_HARD_MIN_TOKENS,
});
if (ctxGuard.shouldWarn) {
log.warn(
`low context window: ${provider}/${modelId} ctx=${ctxGuard.tokens} (warn<${CONTEXT_WINDOW_WARN_BELOW_TOKENS}) source=${ctxGuard.source}`,
);
}
if (ctxGuard.shouldBlock) {
log.error(
`blocked model (context window too small): ${provider}/${modelId} ctx=${ctxGuard.tokens} (min=${CONTEXT_WINDOW_HARD_MIN_TOKENS}) source=${ctxGuard.source}`,
);
throw new FailoverError(
`Model context window too small (${ctxGuard.tokens} tokens). Minimum is ${CONTEXT_WINDOW_HARD_MIN_TOKENS}.`,
{ reason: "unknown", provider, model: modelId },
);
}
const authStore = ensureAuthProfileStore(agentDir);
const explicitProfileId = params.authProfileId?.trim();
const profileOrder = resolveAuthProfileOrder({
cfg: params.config,
store: authStore,
provider,
preferredProfile: explicitProfileId,
});
if (explicitProfileId && !profileOrder.includes(explicitProfileId)) {
throw new Error(`Auth profile "${explicitProfileId}" is not configured for ${provider}.`);
}
const profileCandidates = profileOrder.length > 0 ? profileOrder : [undefined];
let profileIndex = 0;
const initialThinkLevel = params.thinkLevel ?? "off";
let thinkLevel = initialThinkLevel;
const attemptedThinking = new Set<ThinkLevel>();
let apiKeyInfo: ApiKeyInfo | null = null;
let lastProfileId: string | undefined;
const resolveApiKeyForCandidate = async (candidate?: string) => {
return getApiKeyForModel({
model,
cfg: params.config,
profileId: candidate,
store: authStore,
agentDir,
});
};
const applyApiKeyInfo = async (candidate?: string): Promise<void> => {
apiKeyInfo = await resolveApiKeyForCandidate(candidate);
if (model.provider === "github-copilot") {
const { resolveCopilotApiToken } =
await import("../../providers/github-copilot-token.js");
const copilotToken = await resolveCopilotApiToken({
githubToken: apiKeyInfo.apiKey,
});
authStorage.setRuntimeApiKey(model.provider, copilotToken.token);
} else {
authStorage.setRuntimeApiKey(model.provider, apiKeyInfo.apiKey);
}
lastProfileId = apiKeyInfo.profileId;
};
const advanceAuthProfile = async (): Promise<boolean> => {
let nextIndex = profileIndex + 1;
while (nextIndex < profileCandidates.length) {
const candidate = profileCandidates[nextIndex];
try {
await applyApiKeyInfo(candidate);
profileIndex = nextIndex;
thinkLevel = initialThinkLevel;
attemptedThinking.clear();
return true;
} catch (err) {
if (candidate && candidate === explicitProfileId) throw err;
nextIndex += 1;
}
}
return false;
};
try {
await applyApiKeyInfo(profileCandidates[profileIndex]);
} catch (err) {
if (profileCandidates[profileIndex] === explicitProfileId) throw err;
const advanced = await advanceAuthProfile();
if (!advanced) throw err;
}
try {
while (true) {
attemptedThinking.add(thinkLevel);
await fs.mkdir(resolvedWorkspace, { recursive: true });
const attempt = await runEmbeddedAttempt({
sessionId: params.sessionId,
sessionKey: params.sessionKey,
messageChannel: params.messageChannel,
messageProvider: params.messageProvider,
agentAccountId: params.agentAccountId,
currentChannelId: params.currentChannelId,
currentThreadTs: params.currentThreadTs,
replyToMode: params.replyToMode,
hasRepliedRef: params.hasRepliedRef,
sessionFile: params.sessionFile,
workspaceDir: params.workspaceDir,
agentDir,
config: params.config,
skillsSnapshot: params.skillsSnapshot,
prompt: params.prompt,
images: params.images,
provider,
modelId,
model,
authStorage,
modelRegistry,
thinkLevel,
verboseLevel: params.verboseLevel,
reasoningLevel: params.reasoningLevel,
toolResultFormat: resolvedToolResultFormat,
bashElevated: params.bashElevated,
timeoutMs: params.timeoutMs,
runId: params.runId,
abortSignal: params.abortSignal,
shouldEmitToolResult: params.shouldEmitToolResult,
shouldEmitToolOutput: params.shouldEmitToolOutput,
onPartialReply: params.onPartialReply,
onAssistantMessageStart: params.onAssistantMessageStart,
onBlockReply: params.onBlockReply,
onBlockReplyFlush: params.onBlockReplyFlush,
blockReplyBreak: params.blockReplyBreak,
blockReplyChunking: params.blockReplyChunking,
onReasoningStream: params.onReasoningStream,
onToolResult: params.onToolResult,
onAgentEvent: params.onAgentEvent,
extraSystemPrompt: params.extraSystemPrompt,
ownerNumbers: params.ownerNumbers,
enforceFinalTag: params.enforceFinalTag,
});
const { aborted, promptError, timedOut, sessionIdUsed, lastAssistant } = attempt;
if (promptError && !aborted) {
const errorText = describeUnknownError(promptError);
if (isContextOverflowError(errorText)) {
const kind = isCompactionFailureError(errorText)
? "compaction_failure"
: "context_overflow";
return {
payloads: [
{
text:
"Context overflow: prompt too large for the model. " +
"Try again with less input or a larger-context model.",
isError: true,
},
],
meta: {
durationMs: Date.now() - started,
agentMeta: {
sessionId: sessionIdUsed,
provider,
model: model.id,
},
systemPromptReport: attempt.systemPromptReport,
error: { kind, message: errorText },
},
};
}
// Handle role ordering errors with a user-friendly message
if (/incorrect role information|roles must alternate/i.test(errorText)) {
return {
payloads: [
{
text:
"Message ordering conflict - please try again. " +
"If this persists, use /new to start a fresh session.",
isError: true,
},
],
meta: {
durationMs: Date.now() - started,
agentMeta: {
sessionId: sessionIdUsed,
provider,
model: model.id,
},
systemPromptReport: attempt.systemPromptReport,
error: { kind: "role_ordering", message: errorText },
},
};
}
const promptFailoverReason = classifyFailoverReason(errorText);
if (promptFailoverReason && promptFailoverReason !== "timeout" && lastProfileId) {
await markAuthProfileFailure({
store: authStore,
profileId: lastProfileId,
reason: promptFailoverReason,
cfg: params.config,
agentDir: params.agentDir,
});
}
if (
isFailoverErrorMessage(errorText) &&
promptFailoverReason !== "timeout" &&
(await advanceAuthProfile())
) {
continue;
}
const fallbackThinking = pickFallbackThinkingLevel({
message: errorText,
attempted: attemptedThinking,
});
if (fallbackThinking) {
log.warn(
`unsupported thinking level for ${provider}/${modelId}; retrying with ${fallbackThinking}`,
);
thinkLevel = fallbackThinking;
continue;
}
// FIX: Throw FailoverError for prompt errors when fallbacks configured
// This enables model fallback for quota/rate limit errors during prompt submission
const promptFallbackConfigured =
(params.config?.agents?.defaults?.model?.fallbacks?.length ?? 0) > 0;
if (promptFallbackConfigured && isFailoverErrorMessage(errorText)) {
throw new FailoverError(errorText, {
reason: promptFailoverReason ?? "unknown",
provider,
model: modelId,
profileId: lastProfileId,
status: resolveFailoverStatus(promptFailoverReason ?? "unknown"),
});
}
throw promptError;
}
const fallbackThinking = pickFallbackThinkingLevel({
message: lastAssistant?.errorMessage,
attempted: attemptedThinking,
});
if (fallbackThinking && !aborted) {
log.warn(
`unsupported thinking level for ${provider}/${modelId}; retrying with ${fallbackThinking}`,
);
thinkLevel = fallbackThinking;
continue;
}
const fallbackConfigured =
(params.config?.agents?.defaults?.model?.fallbacks?.length ?? 0) > 0;
const authFailure = isAuthAssistantError(lastAssistant);
const rateLimitFailure = isRateLimitAssistantError(lastAssistant);
const failoverFailure = isFailoverAssistantError(lastAssistant);
const assistantFailoverReason = classifyFailoverReason(lastAssistant?.errorMessage ?? "");
const cloudCodeAssistFormatError = attempt.cloudCodeAssistFormatError;
// Treat timeout as potential rate limit (Antigravity hangs on rate limit)
const shouldRotate = (!aborted && failoverFailure) || timedOut;
if (shouldRotate) {
if (lastProfileId) {
const reason =
timedOut || assistantFailoverReason === "timeout"
? "timeout"
: (assistantFailoverReason ?? "unknown");
await markAuthProfileFailure({
store: authStore,
profileId: lastProfileId,
reason,
cfg: params.config,
agentDir: params.agentDir,
});
if (timedOut) {
log.warn(
`Profile ${lastProfileId} timed out (possible rate limit). Trying next account...`,
);
}
if (cloudCodeAssistFormatError) {
log.warn(
`Profile ${lastProfileId} hit Cloud Code Assist format error. Tool calls will be sanitized on retry.`,
);
}
}
const rotated = await advanceAuthProfile();
if (rotated) continue;
if (fallbackConfigured) {
// Prefer formatted error message (user-friendly) over raw errorMessage
const message =
(lastAssistant
? formatAssistantErrorText(lastAssistant, {
cfg: params.config,
sessionKey: params.sessionKey ?? params.sessionId,
})
: undefined) ||
lastAssistant?.errorMessage?.trim() ||
(timedOut
? "LLM request timed out."
: rateLimitFailure
? "LLM request rate limited."
: authFailure
? "LLM request unauthorized."
: "LLM request failed.");
const status =
resolveFailoverStatus(assistantFailoverReason ?? "unknown") ??
(isTimeoutErrorMessage(message) ? 408 : undefined);
throw new FailoverError(message, {
reason: assistantFailoverReason ?? "unknown",
provider,
model: modelId,
profileId: lastProfileId,
status,
});
}
}
const usage = normalizeUsage(lastAssistant?.usage as UsageLike);
const agentMeta: EmbeddedPiAgentMeta = {
sessionId: sessionIdUsed,
provider: lastAssistant?.provider ?? provider,
model: lastAssistant?.model ?? model.id,
usage,
};
const payloads = buildEmbeddedRunPayloads({
assistantTexts: attempt.assistantTexts,
toolMetas: attempt.toolMetas,
lastAssistant: attempt.lastAssistant,
config: params.config,
sessionKey: params.sessionKey ?? params.sessionId,
verboseLevel: params.verboseLevel,
reasoningLevel: params.reasoningLevel,
toolResultFormat: resolvedToolResultFormat,
inlineToolResultsAllowed: !params.onPartialReply && !params.onToolResult,
});
log.debug(
`embedded run done: runId=${params.runId} sessionId=${params.sessionId} durationMs=${Date.now() - started} aborted=${aborted}`,
);
if (lastProfileId) {
await markAuthProfileGood({
store: authStore,
provider,
profileId: lastProfileId,
});
await markAuthProfileUsed({
store: authStore,
profileId: lastProfileId,
});
}
return {
payloads: payloads.length ? payloads : undefined,
meta: {
durationMs: Date.now() - started,
agentMeta,
aborted,
systemPromptReport: attempt.systemPromptReport,
},
didSendViaMessagingTool: attempt.didSendViaMessagingTool,
messagingToolSentTexts: attempt.messagingToolSentTexts,
messagingToolSentTargets: attempt.messagingToolSentTargets,
};
}
} finally {
process.chdir(prevCwd);
}
}),
);
}