* fix(gateway): flush throttled delta before emitChatFinal The 150ms throttle in emitChatDelta can suppress the last text chunk before emitChatFinal fires, causing streaming clients (e.g. ACP) to receive truncated responses. The final event carries the complete text, but clients that build responses incrementally from deltas miss the tail end. Flush one last unthrottled delta with the complete buffered text immediately before sending the final event. This ensures all streaming consumers have the full response without needing to reconcile deltas against the final payload. * fix(gateway): avoid duplicate delta flush when buffer unchanged Track the text length at the time of the last broadcast. The flush in emitChatFinal now only sends a delta if the buffer has grown since the last broadcast, preventing duplicate sends when the final delta passed the 150ms throttle and was already broadcast. * fix(gateway): honor heartbeat suppression in final delta flush * test(gateway): add final delta flush and dedupe coverage * fix(gateway): skip final flush for silent lead fragments * docs(changelog): note gateway final-delta flush fix credits --------- Co-authored-by: Jonathan Taylor <visionik@pobox.com> Co-authored-by: Vincent Koc <vincentkoc@ieee.org>
1327 lines
53 KiB
TypeScript
1327 lines
53 KiB
TypeScript
import { randomBytes } from "node:crypto";
|
||
import fs from "node:fs/promises";
|
||
import type { ThinkLevel } from "../../auto-reply/thinking.js";
|
||
import { generateSecureToken } from "../../infra/secure-random.js";
|
||
import { getGlobalHookRunner } from "../../plugins/hook-runner-global.js";
|
||
import type { PluginHookBeforeAgentStartResult } from "../../plugins/types.js";
|
||
import { enqueueCommandInLane } from "../../process/command-queue.js";
|
||
import { isMarkdownCapableMessageChannel } from "../../utils/message-channel.js";
|
||
import { resolveOpenClawAgentDir } from "../agent-paths.js";
|
||
import { hasConfiguredModelFallbacks } from "../agent-scope.js";
|
||
import {
|
||
isProfileInCooldown,
|
||
markAuthProfileFailure,
|
||
markAuthProfileGood,
|
||
markAuthProfileUsed,
|
||
resolveProfilesUnavailableReason,
|
||
} from "../auth-profiles.js";
|
||
import {
|
||
CONTEXT_WINDOW_HARD_MIN_TOKENS,
|
||
CONTEXT_WINDOW_WARN_BELOW_TOKENS,
|
||
evaluateContextWindowGuard,
|
||
resolveContextWindowInfo,
|
||
} from "../context-window-guard.js";
|
||
import { DEFAULT_CONTEXT_TOKENS, DEFAULT_MODEL, DEFAULT_PROVIDER } from "../defaults.js";
|
||
import { FailoverError, resolveFailoverStatus } from "../failover-error.js";
|
||
import {
|
||
ensureAuthProfileStore,
|
||
getApiKeyForModel,
|
||
resolveAuthProfileOrder,
|
||
type ResolvedProviderAuth,
|
||
} from "../model-auth.js";
|
||
import { normalizeProviderId } from "../model-selection.js";
|
||
import { ensureOpenClawModelsJson } from "../models-config.js";
|
||
import {
|
||
formatBillingErrorMessage,
|
||
classifyFailoverReason,
|
||
formatAssistantErrorText,
|
||
isAuthAssistantError,
|
||
isBillingAssistantError,
|
||
isCompactionFailureError,
|
||
isLikelyContextOverflowError,
|
||
isFailoverAssistantError,
|
||
isFailoverErrorMessage,
|
||
parseImageSizeError,
|
||
parseImageDimensionError,
|
||
isRateLimitAssistantError,
|
||
isTimeoutErrorMessage,
|
||
pickFallbackThinkingLevel,
|
||
type FailoverReason,
|
||
} from "../pi-embedded-helpers.js";
|
||
import { derivePromptTokens, normalizeUsage, type UsageLike } from "../usage.js";
|
||
import { redactRunIdentifier, resolveRunWorkspaceDir } from "../workspace-run.js";
|
||
import { compactEmbeddedPiSessionDirect } from "./compact.js";
|
||
import { resolveGlobalLane, resolveSessionLane } from "./lanes.js";
|
||
import { log } from "./logger.js";
|
||
import { resolveModel } from "./model.js";
|
||
import { runEmbeddedAttempt } from "./run/attempt.js";
|
||
import type { RunEmbeddedPiAgentParams } from "./run/params.js";
|
||
import { buildEmbeddedRunPayloads } from "./run/payloads.js";
|
||
import {
|
||
truncateOversizedToolResultsInSession,
|
||
sessionLikelyHasOversizedToolResults,
|
||
} from "./tool-result-truncation.js";
|
||
import type { EmbeddedPiAgentMeta, EmbeddedPiRunResult } from "./types.js";
|
||
import { describeUnknownError } from "./utils.js";
|
||
|
||
type ApiKeyInfo = ResolvedProviderAuth;
|
||
|
||
type CopilotTokenState = {
|
||
githubToken: string;
|
||
expiresAt: number;
|
||
refreshTimer?: ReturnType<typeof setTimeout>;
|
||
refreshInFlight?: Promise<void>;
|
||
};
|
||
|
||
const COPILOT_REFRESH_MARGIN_MS = 5 * 60 * 1000;
|
||
const COPILOT_REFRESH_RETRY_MS = 60 * 1000;
|
||
const COPILOT_REFRESH_MIN_DELAY_MS = 5 * 1000;
|
||
|
||
// Avoid Anthropic's refusal test token poisoning session transcripts.
|
||
const ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL = "ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL";
|
||
const ANTHROPIC_MAGIC_STRING_REPLACEMENT = "ANTHROPIC MAGIC STRING TRIGGER REFUSAL (redacted)";
|
||
|
||
function scrubAnthropicRefusalMagic(prompt: string): string {
|
||
if (!prompt.includes(ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL)) {
|
||
return prompt;
|
||
}
|
||
return prompt.replaceAll(
|
||
ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL,
|
||
ANTHROPIC_MAGIC_STRING_REPLACEMENT,
|
||
);
|
||
}
|
||
|
||
type UsageAccumulator = {
|
||
input: number;
|
||
output: number;
|
||
cacheRead: number;
|
||
cacheWrite: number;
|
||
total: number;
|
||
/** Cache fields from the most recent API call (not accumulated). */
|
||
lastCacheRead: number;
|
||
lastCacheWrite: number;
|
||
lastInput: number;
|
||
};
|
||
|
||
const createUsageAccumulator = (): UsageAccumulator => ({
|
||
input: 0,
|
||
output: 0,
|
||
cacheRead: 0,
|
||
cacheWrite: 0,
|
||
total: 0,
|
||
lastCacheRead: 0,
|
||
lastCacheWrite: 0,
|
||
lastInput: 0,
|
||
});
|
||
|
||
function createCompactionDiagId(): string {
|
||
return `ovf-${Date.now().toString(36)}-${generateSecureToken(4)}`;
|
||
}
|
||
|
||
// Defensive guard for the outer run loop across all retry branches.
|
||
const BASE_RUN_RETRY_ITERATIONS = 24;
|
||
const RUN_RETRY_ITERATIONS_PER_PROFILE = 8;
|
||
const MIN_RUN_RETRY_ITERATIONS = 32;
|
||
const MAX_RUN_RETRY_ITERATIONS = 160;
|
||
|
||
function resolveMaxRunRetryIterations(profileCandidateCount: number): number {
|
||
const scaled =
|
||
BASE_RUN_RETRY_ITERATIONS +
|
||
Math.max(1, profileCandidateCount) * RUN_RETRY_ITERATIONS_PER_PROFILE;
|
||
return Math.min(MAX_RUN_RETRY_ITERATIONS, Math.max(MIN_RUN_RETRY_ITERATIONS, scaled));
|
||
}
|
||
|
||
const hasUsageValues = (
|
||
usage: ReturnType<typeof normalizeUsage>,
|
||
): usage is NonNullable<ReturnType<typeof normalizeUsage>> =>
|
||
!!usage &&
|
||
[usage.input, usage.output, usage.cacheRead, usage.cacheWrite, usage.total].some(
|
||
(value) => typeof value === "number" && Number.isFinite(value) && value > 0,
|
||
);
|
||
|
||
const mergeUsageIntoAccumulator = (
|
||
target: UsageAccumulator,
|
||
usage: ReturnType<typeof normalizeUsage>,
|
||
) => {
|
||
if (!hasUsageValues(usage)) {
|
||
return;
|
||
}
|
||
target.input += usage.input ?? 0;
|
||
target.output += usage.output ?? 0;
|
||
target.cacheRead += usage.cacheRead ?? 0;
|
||
target.cacheWrite += usage.cacheWrite ?? 0;
|
||
target.total +=
|
||
usage.total ??
|
||
(usage.input ?? 0) + (usage.output ?? 0) + (usage.cacheRead ?? 0) + (usage.cacheWrite ?? 0);
|
||
// Track the most recent API call's cache fields for accurate context-size reporting.
|
||
// Accumulated cache totals inflate context size when there are multiple tool-call round-trips,
|
||
// since each call reports cacheRead ≈ current_context_size.
|
||
target.lastCacheRead = usage.cacheRead ?? 0;
|
||
target.lastCacheWrite = usage.cacheWrite ?? 0;
|
||
target.lastInput = usage.input ?? 0;
|
||
};
|
||
|
||
const toNormalizedUsage = (usage: UsageAccumulator) => {
|
||
const hasUsage =
|
||
usage.input > 0 ||
|
||
usage.output > 0 ||
|
||
usage.cacheRead > 0 ||
|
||
usage.cacheWrite > 0 ||
|
||
usage.total > 0;
|
||
if (!hasUsage) {
|
||
return undefined;
|
||
}
|
||
// Use the LAST API call's cache fields for context-size calculation.
|
||
// The accumulated cacheRead/cacheWrite inflate context size because each tool-call
|
||
// round-trip reports cacheRead ≈ current_context_size, and summing N calls gives
|
||
// N × context_size which gets clamped to contextWindow (e.g. 200k).
|
||
// See: https://github.com/openclaw/openclaw/issues/13698
|
||
//
|
||
// We use lastInput/lastCacheRead/lastCacheWrite (from the most recent API call) for
|
||
// cache-related fields, but keep accumulated output (total generated text this turn).
|
||
const lastPromptTokens = usage.lastInput + usage.lastCacheRead + usage.lastCacheWrite;
|
||
return {
|
||
input: usage.lastInput || undefined,
|
||
output: usage.output || undefined,
|
||
cacheRead: usage.lastCacheRead || undefined,
|
||
cacheWrite: usage.lastCacheWrite || undefined,
|
||
total: lastPromptTokens + usage.output || undefined,
|
||
};
|
||
};
|
||
|
||
function resolveActiveErrorContext(params: {
|
||
lastAssistant: { provider?: string; model?: string } | undefined;
|
||
provider: string;
|
||
model: string;
|
||
}): { provider: string; model: string } {
|
||
return {
|
||
provider: params.lastAssistant?.provider ?? params.provider,
|
||
model: params.lastAssistant?.model ?? params.model,
|
||
};
|
||
}
|
||
|
||
export async function runEmbeddedPiAgent(
|
||
params: RunEmbeddedPiAgentParams,
|
||
): Promise<EmbeddedPiRunResult> {
|
||
const sessionLane = resolveSessionLane(params.sessionKey?.trim() || params.sessionId);
|
||
const globalLane = resolveGlobalLane(params.lane);
|
||
const enqueueGlobal =
|
||
params.enqueue ?? ((task, opts) => enqueueCommandInLane(globalLane, task, opts));
|
||
const enqueueSession =
|
||
params.enqueue ?? ((task, opts) => enqueueCommandInLane(sessionLane, task, opts));
|
||
const channelHint = params.messageChannel ?? params.messageProvider;
|
||
const resolvedToolResultFormat =
|
||
params.toolResultFormat ??
|
||
(channelHint
|
||
? isMarkdownCapableMessageChannel(channelHint)
|
||
? "markdown"
|
||
: "plain"
|
||
: "markdown");
|
||
const isProbeSession = params.sessionId?.startsWith("probe-") ?? false;
|
||
|
||
return enqueueSession(() =>
|
||
enqueueGlobal(async () => {
|
||
const started = Date.now();
|
||
const workspaceResolution = resolveRunWorkspaceDir({
|
||
workspaceDir: params.workspaceDir,
|
||
sessionKey: params.sessionKey,
|
||
agentId: params.agentId,
|
||
config: params.config,
|
||
});
|
||
const resolvedWorkspace = workspaceResolution.workspaceDir;
|
||
const redactedSessionId = redactRunIdentifier(params.sessionId);
|
||
const redactedSessionKey = redactRunIdentifier(params.sessionKey);
|
||
const redactedWorkspace = redactRunIdentifier(resolvedWorkspace);
|
||
if (workspaceResolution.usedFallback) {
|
||
log.warn(
|
||
`[workspace-fallback] caller=runEmbeddedPiAgent reason=${workspaceResolution.fallbackReason} run=${params.runId} session=${redactedSessionId} sessionKey=${redactedSessionKey} agent=${workspaceResolution.agentId} workspace=${redactedWorkspace}`,
|
||
);
|
||
}
|
||
const prevCwd = process.cwd();
|
||
|
||
let provider = (params.provider ?? DEFAULT_PROVIDER).trim() || DEFAULT_PROVIDER;
|
||
let modelId = (params.model ?? DEFAULT_MODEL).trim() || DEFAULT_MODEL;
|
||
const agentDir = params.agentDir ?? resolveOpenClawAgentDir();
|
||
const fallbackConfigured = hasConfiguredModelFallbacks({
|
||
cfg: params.config,
|
||
agentId: params.agentId,
|
||
sessionKey: params.sessionKey,
|
||
});
|
||
await ensureOpenClawModelsJson(params.config, agentDir);
|
||
|
||
// Run before_model_resolve hooks early so plugins can override the
|
||
// provider/model before resolveModel().
|
||
//
|
||
// Legacy compatibility: before_agent_start is also checked for override
|
||
// fields if present. New hook takes precedence when both are set.
|
||
let modelResolveOverride: { providerOverride?: string; modelOverride?: string } | undefined;
|
||
let legacyBeforeAgentStartResult: PluginHookBeforeAgentStartResult | undefined;
|
||
const hookRunner = getGlobalHookRunner();
|
||
const hookCtx = {
|
||
agentId: workspaceResolution.agentId,
|
||
sessionKey: params.sessionKey,
|
||
sessionId: params.sessionId,
|
||
workspaceDir: resolvedWorkspace,
|
||
messageProvider: params.messageProvider ?? undefined,
|
||
trigger: params.trigger,
|
||
channelId: params.messageChannel ?? params.messageProvider ?? undefined,
|
||
};
|
||
if (hookRunner?.hasHooks("before_model_resolve")) {
|
||
try {
|
||
modelResolveOverride = await hookRunner.runBeforeModelResolve(
|
||
{ prompt: params.prompt },
|
||
hookCtx,
|
||
);
|
||
} catch (hookErr) {
|
||
log.warn(`before_model_resolve hook failed: ${String(hookErr)}`);
|
||
}
|
||
}
|
||
if (hookRunner?.hasHooks("before_agent_start")) {
|
||
try {
|
||
legacyBeforeAgentStartResult = await hookRunner.runBeforeAgentStart(
|
||
{ prompt: params.prompt },
|
||
hookCtx,
|
||
);
|
||
modelResolveOverride = {
|
||
providerOverride:
|
||
modelResolveOverride?.providerOverride ??
|
||
legacyBeforeAgentStartResult?.providerOverride,
|
||
modelOverride:
|
||
modelResolveOverride?.modelOverride ?? legacyBeforeAgentStartResult?.modelOverride,
|
||
};
|
||
} catch (hookErr) {
|
||
log.warn(
|
||
`before_agent_start hook (legacy model resolve path) failed: ${String(hookErr)}`,
|
||
);
|
||
}
|
||
}
|
||
if (modelResolveOverride?.providerOverride) {
|
||
provider = modelResolveOverride.providerOverride;
|
||
log.info(`[hooks] provider overridden to ${provider}`);
|
||
}
|
||
if (modelResolveOverride?.modelOverride) {
|
||
modelId = modelResolveOverride.modelOverride;
|
||
log.info(`[hooks] model overridden to ${modelId}`);
|
||
}
|
||
|
||
const { model, error, authStorage, modelRegistry } = resolveModel(
|
||
provider,
|
||
modelId,
|
||
agentDir,
|
||
params.config,
|
||
);
|
||
if (!model) {
|
||
throw new FailoverError(error ?? `Unknown model: ${provider}/${modelId}`, {
|
||
reason: "model_not_found",
|
||
provider,
|
||
model: modelId,
|
||
});
|
||
}
|
||
|
||
const ctxInfo = resolveContextWindowInfo({
|
||
cfg: params.config,
|
||
provider,
|
||
modelId,
|
||
modelContextWindow: model.contextWindow,
|
||
defaultTokens: DEFAULT_CONTEXT_TOKENS,
|
||
});
|
||
const ctxGuard = evaluateContextWindowGuard({
|
||
info: ctxInfo,
|
||
warnBelowTokens: CONTEXT_WINDOW_WARN_BELOW_TOKENS,
|
||
hardMinTokens: CONTEXT_WINDOW_HARD_MIN_TOKENS,
|
||
});
|
||
if (ctxGuard.shouldWarn) {
|
||
log.warn(
|
||
`low context window: ${provider}/${modelId} ctx=${ctxGuard.tokens} (warn<${CONTEXT_WINDOW_WARN_BELOW_TOKENS}) source=${ctxGuard.source}`,
|
||
);
|
||
}
|
||
if (ctxGuard.shouldBlock) {
|
||
log.error(
|
||
`blocked model (context window too small): ${provider}/${modelId} ctx=${ctxGuard.tokens} (min=${CONTEXT_WINDOW_HARD_MIN_TOKENS}) source=${ctxGuard.source}`,
|
||
);
|
||
throw new FailoverError(
|
||
`Model context window too small (${ctxGuard.tokens} tokens). Minimum is ${CONTEXT_WINDOW_HARD_MIN_TOKENS}.`,
|
||
{ reason: "unknown", provider, model: modelId },
|
||
);
|
||
}
|
||
|
||
const authStore = ensureAuthProfileStore(agentDir, { allowKeychainPrompt: false });
|
||
const preferredProfileId = params.authProfileId?.trim();
|
||
let lockedProfileId = params.authProfileIdSource === "user" ? preferredProfileId : undefined;
|
||
if (lockedProfileId) {
|
||
const lockedProfile = authStore.profiles[lockedProfileId];
|
||
if (
|
||
!lockedProfile ||
|
||
normalizeProviderId(lockedProfile.provider) !== normalizeProviderId(provider)
|
||
) {
|
||
lockedProfileId = undefined;
|
||
}
|
||
}
|
||
const profileOrder = resolveAuthProfileOrder({
|
||
cfg: params.config,
|
||
store: authStore,
|
||
provider,
|
||
preferredProfile: preferredProfileId,
|
||
});
|
||
if (lockedProfileId && !profileOrder.includes(lockedProfileId)) {
|
||
throw new Error(`Auth profile "${lockedProfileId}" is not configured for ${provider}.`);
|
||
}
|
||
const profileCandidates = lockedProfileId
|
||
? [lockedProfileId]
|
||
: profileOrder.length > 0
|
||
? profileOrder
|
||
: [undefined];
|
||
let profileIndex = 0;
|
||
|
||
const initialThinkLevel = params.thinkLevel ?? "off";
|
||
let thinkLevel = initialThinkLevel;
|
||
const attemptedThinking = new Set<ThinkLevel>();
|
||
let apiKeyInfo: ApiKeyInfo | null = null;
|
||
let lastProfileId: string | undefined;
|
||
const copilotTokenState: CopilotTokenState | null =
|
||
model.provider === "github-copilot" ? { githubToken: "", expiresAt: 0 } : null;
|
||
let copilotRefreshCancelled = false;
|
||
const hasCopilotGithubToken = () => Boolean(copilotTokenState?.githubToken.trim());
|
||
|
||
const clearCopilotRefreshTimer = () => {
|
||
if (!copilotTokenState?.refreshTimer) {
|
||
return;
|
||
}
|
||
clearTimeout(copilotTokenState.refreshTimer);
|
||
copilotTokenState.refreshTimer = undefined;
|
||
};
|
||
|
||
const stopCopilotRefreshTimer = () => {
|
||
if (!copilotTokenState) {
|
||
return;
|
||
}
|
||
copilotRefreshCancelled = true;
|
||
clearCopilotRefreshTimer();
|
||
};
|
||
|
||
const refreshCopilotToken = async (reason: string): Promise<void> => {
|
||
if (!copilotTokenState) {
|
||
return;
|
||
}
|
||
if (copilotTokenState.refreshInFlight) {
|
||
await copilotTokenState.refreshInFlight;
|
||
return;
|
||
}
|
||
const { resolveCopilotApiToken } = await import("../../providers/github-copilot-token.js");
|
||
copilotTokenState.refreshInFlight = (async () => {
|
||
const githubToken = copilotTokenState.githubToken.trim();
|
||
if (!githubToken) {
|
||
throw new Error("Copilot refresh requires a GitHub token.");
|
||
}
|
||
log.debug(`Refreshing GitHub Copilot token (${reason})...`);
|
||
const copilotToken = await resolveCopilotApiToken({
|
||
githubToken,
|
||
});
|
||
authStorage.setRuntimeApiKey(model.provider, copilotToken.token);
|
||
copilotTokenState.expiresAt = copilotToken.expiresAt;
|
||
const remaining = copilotToken.expiresAt - Date.now();
|
||
log.debug(
|
||
`Copilot token refreshed; expires in ${Math.max(0, Math.floor(remaining / 1000))}s.`,
|
||
);
|
||
})()
|
||
.catch((err) => {
|
||
log.warn(`Copilot token refresh failed: ${describeUnknownError(err)}`);
|
||
throw err;
|
||
})
|
||
.finally(() => {
|
||
copilotTokenState.refreshInFlight = undefined;
|
||
});
|
||
await copilotTokenState.refreshInFlight;
|
||
};
|
||
|
||
const scheduleCopilotRefresh = (): void => {
|
||
if (!copilotTokenState || copilotRefreshCancelled) {
|
||
return;
|
||
}
|
||
if (!hasCopilotGithubToken()) {
|
||
log.warn("Skipping Copilot refresh scheduling; GitHub token missing.");
|
||
return;
|
||
}
|
||
clearCopilotRefreshTimer();
|
||
const now = Date.now();
|
||
const refreshAt = copilotTokenState.expiresAt - COPILOT_REFRESH_MARGIN_MS;
|
||
const delayMs = Math.max(COPILOT_REFRESH_MIN_DELAY_MS, refreshAt - now);
|
||
const timer = setTimeout(() => {
|
||
if (copilotRefreshCancelled) {
|
||
return;
|
||
}
|
||
refreshCopilotToken("scheduled")
|
||
.then(() => scheduleCopilotRefresh())
|
||
.catch(() => {
|
||
if (copilotRefreshCancelled) {
|
||
return;
|
||
}
|
||
const retryTimer = setTimeout(() => {
|
||
if (copilotRefreshCancelled) {
|
||
return;
|
||
}
|
||
refreshCopilotToken("scheduled-retry")
|
||
.then(() => scheduleCopilotRefresh())
|
||
.catch(() => undefined);
|
||
}, COPILOT_REFRESH_RETRY_MS);
|
||
copilotTokenState.refreshTimer = retryTimer;
|
||
if (copilotRefreshCancelled) {
|
||
clearTimeout(retryTimer);
|
||
copilotTokenState.refreshTimer = undefined;
|
||
}
|
||
});
|
||
}, delayMs);
|
||
copilotTokenState.refreshTimer = timer;
|
||
if (copilotRefreshCancelled) {
|
||
clearTimeout(timer);
|
||
copilotTokenState.refreshTimer = undefined;
|
||
}
|
||
};
|
||
|
||
const resolveAuthProfileFailoverReason = (params: {
|
||
allInCooldown: boolean;
|
||
message: string;
|
||
profileIds?: Array<string | undefined>;
|
||
}): FailoverReason => {
|
||
if (params.allInCooldown) {
|
||
const profileIds = (params.profileIds ?? profileCandidates).filter(
|
||
(id): id is string => typeof id === "string" && id.length > 0,
|
||
);
|
||
return (
|
||
resolveProfilesUnavailableReason({
|
||
store: authStore,
|
||
profileIds,
|
||
}) ?? "rate_limit"
|
||
);
|
||
}
|
||
const classified = classifyFailoverReason(params.message);
|
||
return classified ?? "auth";
|
||
};
|
||
|
||
const throwAuthProfileFailover = (params: {
|
||
allInCooldown: boolean;
|
||
message?: string;
|
||
error?: unknown;
|
||
}): never => {
|
||
const fallbackMessage = `No available auth profile for ${provider} (all in cooldown or unavailable).`;
|
||
const message =
|
||
params.message?.trim() ||
|
||
(params.error ? describeUnknownError(params.error).trim() : "") ||
|
||
fallbackMessage;
|
||
const reason = resolveAuthProfileFailoverReason({
|
||
allInCooldown: params.allInCooldown,
|
||
message,
|
||
profileIds: profileCandidates,
|
||
});
|
||
if (fallbackConfigured) {
|
||
throw new FailoverError(message, {
|
||
reason,
|
||
provider,
|
||
model: modelId,
|
||
status: resolveFailoverStatus(reason),
|
||
cause: params.error,
|
||
});
|
||
}
|
||
if (params.error instanceof Error) {
|
||
throw params.error;
|
||
}
|
||
throw new Error(message);
|
||
};
|
||
|
||
const resolveApiKeyForCandidate = async (candidate?: string) => {
|
||
return getApiKeyForModel({
|
||
model,
|
||
cfg: params.config,
|
||
profileId: candidate,
|
||
store: authStore,
|
||
agentDir,
|
||
});
|
||
};
|
||
|
||
const applyApiKeyInfo = async (candidate?: string): Promise<void> => {
|
||
apiKeyInfo = await resolveApiKeyForCandidate(candidate);
|
||
const resolvedProfileId = apiKeyInfo.profileId ?? candidate;
|
||
if (!apiKeyInfo.apiKey) {
|
||
if (apiKeyInfo.mode !== "aws-sdk") {
|
||
throw new Error(
|
||
`No API key resolved for provider "${model.provider}" (auth mode: ${apiKeyInfo.mode}).`,
|
||
);
|
||
}
|
||
lastProfileId = resolvedProfileId;
|
||
return;
|
||
}
|
||
if (model.provider === "github-copilot") {
|
||
const { resolveCopilotApiToken } =
|
||
await import("../../providers/github-copilot-token.js");
|
||
const copilotToken = await resolveCopilotApiToken({
|
||
githubToken: apiKeyInfo.apiKey,
|
||
});
|
||
authStorage.setRuntimeApiKey(model.provider, copilotToken.token);
|
||
if (copilotTokenState) {
|
||
copilotTokenState.githubToken = apiKeyInfo.apiKey;
|
||
copilotTokenState.expiresAt = copilotToken.expiresAt;
|
||
scheduleCopilotRefresh();
|
||
}
|
||
} else {
|
||
authStorage.setRuntimeApiKey(model.provider, apiKeyInfo.apiKey);
|
||
}
|
||
lastProfileId = apiKeyInfo.profileId;
|
||
};
|
||
|
||
const advanceAuthProfile = async (): Promise<boolean> => {
|
||
if (lockedProfileId) {
|
||
return false;
|
||
}
|
||
let nextIndex = profileIndex + 1;
|
||
while (nextIndex < profileCandidates.length) {
|
||
const candidate = profileCandidates[nextIndex];
|
||
if (candidate && isProfileInCooldown(authStore, candidate)) {
|
||
nextIndex += 1;
|
||
continue;
|
||
}
|
||
try {
|
||
await applyApiKeyInfo(candidate);
|
||
profileIndex = nextIndex;
|
||
thinkLevel = initialThinkLevel;
|
||
attemptedThinking.clear();
|
||
return true;
|
||
} catch (err) {
|
||
if (candidate && candidate === lockedProfileId) {
|
||
throw err;
|
||
}
|
||
nextIndex += 1;
|
||
}
|
||
}
|
||
return false;
|
||
};
|
||
|
||
try {
|
||
while (profileIndex < profileCandidates.length) {
|
||
const candidate = profileCandidates[profileIndex];
|
||
if (
|
||
candidate &&
|
||
candidate !== lockedProfileId &&
|
||
isProfileInCooldown(authStore, candidate)
|
||
) {
|
||
profileIndex += 1;
|
||
continue;
|
||
}
|
||
await applyApiKeyInfo(profileCandidates[profileIndex]);
|
||
break;
|
||
}
|
||
if (profileIndex >= profileCandidates.length) {
|
||
throwAuthProfileFailover({ allInCooldown: true });
|
||
}
|
||
} catch (err) {
|
||
if (err instanceof FailoverError) {
|
||
throw err;
|
||
}
|
||
if (profileCandidates[profileIndex] === lockedProfileId) {
|
||
throwAuthProfileFailover({ allInCooldown: false, error: err });
|
||
}
|
||
const advanced = await advanceAuthProfile();
|
||
if (!advanced) {
|
||
throwAuthProfileFailover({ allInCooldown: false, error: err });
|
||
}
|
||
}
|
||
|
||
const maybeRefreshCopilotForAuthError = async (
|
||
errorText: string,
|
||
retried: boolean,
|
||
): Promise<boolean> => {
|
||
if (!copilotTokenState || retried) {
|
||
return false;
|
||
}
|
||
if (!isFailoverErrorMessage(errorText)) {
|
||
return false;
|
||
}
|
||
if (classifyFailoverReason(errorText) !== "auth") {
|
||
return false;
|
||
}
|
||
try {
|
||
await refreshCopilotToken("auth-error");
|
||
scheduleCopilotRefresh();
|
||
return true;
|
||
} catch {
|
||
return false;
|
||
}
|
||
};
|
||
|
||
const MAX_OVERFLOW_COMPACTION_ATTEMPTS = 3;
|
||
const MAX_RUN_LOOP_ITERATIONS = resolveMaxRunRetryIterations(profileCandidates.length);
|
||
let overflowCompactionAttempts = 0;
|
||
let toolResultTruncationAttempted = false;
|
||
const usageAccumulator = createUsageAccumulator();
|
||
let lastRunPromptUsage: ReturnType<typeof normalizeUsage> | undefined;
|
||
let autoCompactionCount = 0;
|
||
let runLoopIterations = 0;
|
||
const maybeMarkAuthProfileFailure = async (failure: {
|
||
profileId?: string;
|
||
reason?: Parameters<typeof markAuthProfileFailure>[0]["reason"] | null;
|
||
config?: RunEmbeddedPiAgentParams["config"];
|
||
agentDir?: RunEmbeddedPiAgentParams["agentDir"];
|
||
}) => {
|
||
const { profileId, reason } = failure;
|
||
if (!profileId || !reason || reason === "timeout") {
|
||
return;
|
||
}
|
||
await markAuthProfileFailure({
|
||
store: authStore,
|
||
profileId,
|
||
reason,
|
||
cfg: params.config,
|
||
agentDir,
|
||
});
|
||
};
|
||
try {
|
||
let authRetryPending = false;
|
||
while (true) {
|
||
if (runLoopIterations >= MAX_RUN_LOOP_ITERATIONS) {
|
||
const message =
|
||
`Exceeded retry limit after ${runLoopIterations} attempts ` +
|
||
`(max=${MAX_RUN_LOOP_ITERATIONS}).`;
|
||
log.error(
|
||
`[run-retry-limit] sessionKey=${params.sessionKey ?? params.sessionId} ` +
|
||
`provider=${provider}/${modelId} attempts=${runLoopIterations} ` +
|
||
`maxAttempts=${MAX_RUN_LOOP_ITERATIONS}`,
|
||
);
|
||
return {
|
||
payloads: [
|
||
{
|
||
text:
|
||
"Request failed after repeated internal retries. " +
|
||
"Please try again, or use /new to start a fresh session.",
|
||
isError: true,
|
||
},
|
||
],
|
||
meta: {
|
||
durationMs: Date.now() - started,
|
||
agentMeta: {
|
||
sessionId: params.sessionId,
|
||
provider,
|
||
model: model.id,
|
||
},
|
||
error: { kind: "retry_limit", message },
|
||
},
|
||
};
|
||
}
|
||
runLoopIterations += 1;
|
||
const copilotAuthRetry = authRetryPending;
|
||
authRetryPending = false;
|
||
attemptedThinking.add(thinkLevel);
|
||
await fs.mkdir(resolvedWorkspace, { recursive: true });
|
||
|
||
const prompt =
|
||
provider === "anthropic" ? scrubAnthropicRefusalMagic(params.prompt) : params.prompt;
|
||
|
||
const attempt = await runEmbeddedAttempt({
|
||
sessionId: params.sessionId,
|
||
sessionKey: params.sessionKey,
|
||
trigger: params.trigger,
|
||
messageChannel: params.messageChannel,
|
||
messageProvider: params.messageProvider,
|
||
agentAccountId: params.agentAccountId,
|
||
messageTo: params.messageTo,
|
||
messageThreadId: params.messageThreadId,
|
||
groupId: params.groupId,
|
||
groupChannel: params.groupChannel,
|
||
groupSpace: params.groupSpace,
|
||
spawnedBy: params.spawnedBy,
|
||
senderIsOwner: params.senderIsOwner,
|
||
currentChannelId: params.currentChannelId,
|
||
currentThreadTs: params.currentThreadTs,
|
||
currentMessageId: params.currentMessageId,
|
||
replyToMode: params.replyToMode,
|
||
hasRepliedRef: params.hasRepliedRef,
|
||
sessionFile: params.sessionFile,
|
||
workspaceDir: resolvedWorkspace,
|
||
agentDir,
|
||
config: params.config,
|
||
skillsSnapshot: params.skillsSnapshot,
|
||
prompt,
|
||
images: params.images,
|
||
disableTools: params.disableTools,
|
||
provider,
|
||
modelId,
|
||
model,
|
||
authStorage,
|
||
modelRegistry,
|
||
agentId: workspaceResolution.agentId,
|
||
legacyBeforeAgentStartResult,
|
||
thinkLevel,
|
||
verboseLevel: params.verboseLevel,
|
||
reasoningLevel: params.reasoningLevel,
|
||
toolResultFormat: resolvedToolResultFormat,
|
||
execOverrides: params.execOverrides,
|
||
bashElevated: params.bashElevated,
|
||
timeoutMs: params.timeoutMs,
|
||
runId: params.runId,
|
||
abortSignal: params.abortSignal,
|
||
shouldEmitToolResult: params.shouldEmitToolResult,
|
||
shouldEmitToolOutput: params.shouldEmitToolOutput,
|
||
onPartialReply: params.onPartialReply,
|
||
onAssistantMessageStart: params.onAssistantMessageStart,
|
||
onBlockReply: params.onBlockReply,
|
||
onBlockReplyFlush: params.onBlockReplyFlush,
|
||
blockReplyBreak: params.blockReplyBreak,
|
||
blockReplyChunking: params.blockReplyChunking,
|
||
onReasoningStream: params.onReasoningStream,
|
||
onReasoningEnd: params.onReasoningEnd,
|
||
onToolResult: params.onToolResult,
|
||
onAgentEvent: params.onAgentEvent,
|
||
extraSystemPrompt: params.extraSystemPrompt,
|
||
inputProvenance: params.inputProvenance,
|
||
streamParams: params.streamParams,
|
||
ownerNumbers: params.ownerNumbers,
|
||
enforceFinalTag: params.enforceFinalTag,
|
||
});
|
||
|
||
const {
|
||
aborted,
|
||
promptError,
|
||
timedOut,
|
||
timedOutDuringCompaction,
|
||
sessionIdUsed,
|
||
lastAssistant,
|
||
} = attempt;
|
||
const lastAssistantUsage = normalizeUsage(lastAssistant?.usage as UsageLike);
|
||
const attemptUsage = attempt.attemptUsage ?? lastAssistantUsage;
|
||
mergeUsageIntoAccumulator(usageAccumulator, attemptUsage);
|
||
// Keep prompt size from the latest model call so session totalTokens
|
||
// reflects current context usage, not accumulated tool-loop usage.
|
||
lastRunPromptUsage = lastAssistantUsage ?? attemptUsage;
|
||
const lastTurnTotal = lastAssistantUsage?.total ?? attemptUsage?.total;
|
||
const attemptCompactionCount = Math.max(0, attempt.compactionCount ?? 0);
|
||
autoCompactionCount += attemptCompactionCount;
|
||
const activeErrorContext = resolveActiveErrorContext({
|
||
lastAssistant,
|
||
provider,
|
||
model: modelId,
|
||
});
|
||
const formattedAssistantErrorText = lastAssistant
|
||
? formatAssistantErrorText(lastAssistant, {
|
||
cfg: params.config,
|
||
sessionKey: params.sessionKey ?? params.sessionId,
|
||
provider: activeErrorContext.provider,
|
||
model: activeErrorContext.model,
|
||
})
|
||
: undefined;
|
||
const assistantErrorText =
|
||
lastAssistant?.stopReason === "error"
|
||
? lastAssistant.errorMessage?.trim() || formattedAssistantErrorText
|
||
: undefined;
|
||
|
||
const contextOverflowError = !aborted
|
||
? (() => {
|
||
if (promptError) {
|
||
const errorText = describeUnknownError(promptError);
|
||
if (isLikelyContextOverflowError(errorText)) {
|
||
return { text: errorText, source: "promptError" as const };
|
||
}
|
||
// Prompt submission failed with a non-overflow error. Do not
|
||
// inspect prior assistant errors from history for this attempt.
|
||
return null;
|
||
}
|
||
if (assistantErrorText && isLikelyContextOverflowError(assistantErrorText)) {
|
||
return { text: assistantErrorText, source: "assistantError" as const };
|
||
}
|
||
return null;
|
||
})()
|
||
: null;
|
||
|
||
if (contextOverflowError) {
|
||
const overflowDiagId = createCompactionDiagId();
|
||
const errorText = contextOverflowError.text;
|
||
const msgCount = attempt.messagesSnapshot?.length ?? 0;
|
||
log.warn(
|
||
`[context-overflow-diag] sessionKey=${params.sessionKey ?? params.sessionId} ` +
|
||
`provider=${provider}/${modelId} source=${contextOverflowError.source} ` +
|
||
`messages=${msgCount} sessionFile=${params.sessionFile} ` +
|
||
`diagId=${overflowDiagId} compactionAttempts=${overflowCompactionAttempts} ` +
|
||
`error=${errorText.slice(0, 200)}`,
|
||
);
|
||
const isCompactionFailure = isCompactionFailureError(errorText);
|
||
const hadAttemptLevelCompaction = attemptCompactionCount > 0;
|
||
// If this attempt already compacted (SDK auto-compaction), avoid immediately
|
||
// running another explicit compaction for the same overflow trigger.
|
||
if (
|
||
!isCompactionFailure &&
|
||
hadAttemptLevelCompaction &&
|
||
overflowCompactionAttempts < MAX_OVERFLOW_COMPACTION_ATTEMPTS
|
||
) {
|
||
overflowCompactionAttempts++;
|
||
log.warn(
|
||
`context overflow persisted after in-attempt compaction (attempt ${overflowCompactionAttempts}/${MAX_OVERFLOW_COMPACTION_ATTEMPTS}); retrying prompt without additional compaction for ${provider}/${modelId}`,
|
||
);
|
||
continue;
|
||
}
|
||
// Attempt explicit overflow compaction only when this attempt did not
|
||
// already auto-compact.
|
||
if (
|
||
!isCompactionFailure &&
|
||
!hadAttemptLevelCompaction &&
|
||
overflowCompactionAttempts < MAX_OVERFLOW_COMPACTION_ATTEMPTS
|
||
) {
|
||
if (log.isEnabled("debug")) {
|
||
log.debug(
|
||
`[compaction-diag] decision diagId=${overflowDiagId} branch=compact ` +
|
||
`isCompactionFailure=${isCompactionFailure} hasOversizedToolResults=unknown ` +
|
||
`attempt=${overflowCompactionAttempts + 1} maxAttempts=${MAX_OVERFLOW_COMPACTION_ATTEMPTS}`,
|
||
);
|
||
}
|
||
overflowCompactionAttempts++;
|
||
log.warn(
|
||
`context overflow detected (attempt ${overflowCompactionAttempts}/${MAX_OVERFLOW_COMPACTION_ATTEMPTS}); attempting auto-compaction for ${provider}/${modelId}`,
|
||
);
|
||
const compactResult = await compactEmbeddedPiSessionDirect({
|
||
sessionId: params.sessionId,
|
||
sessionKey: params.sessionKey,
|
||
messageChannel: params.messageChannel,
|
||
messageProvider: params.messageProvider,
|
||
agentAccountId: params.agentAccountId,
|
||
authProfileId: lastProfileId,
|
||
sessionFile: params.sessionFile,
|
||
workspaceDir: resolvedWorkspace,
|
||
agentDir,
|
||
config: params.config,
|
||
skillsSnapshot: params.skillsSnapshot,
|
||
senderIsOwner: params.senderIsOwner,
|
||
provider,
|
||
model: modelId,
|
||
runId: params.runId,
|
||
thinkLevel,
|
||
reasoningLevel: params.reasoningLevel,
|
||
bashElevated: params.bashElevated,
|
||
extraSystemPrompt: params.extraSystemPrompt,
|
||
ownerNumbers: params.ownerNumbers,
|
||
trigger: "overflow",
|
||
diagId: overflowDiagId,
|
||
attempt: overflowCompactionAttempts,
|
||
maxAttempts: MAX_OVERFLOW_COMPACTION_ATTEMPTS,
|
||
});
|
||
if (compactResult.compacted) {
|
||
autoCompactionCount += 1;
|
||
log.info(`auto-compaction succeeded for ${provider}/${modelId}; retrying prompt`);
|
||
continue;
|
||
}
|
||
log.warn(
|
||
`auto-compaction failed for ${provider}/${modelId}: ${compactResult.reason ?? "nothing to compact"}`,
|
||
);
|
||
}
|
||
// Fallback: try truncating oversized tool results in the session.
|
||
// This handles the case where a single tool result exceeds the
|
||
// context window and compaction cannot reduce it further.
|
||
if (!toolResultTruncationAttempted) {
|
||
const contextWindowTokens = ctxInfo.tokens;
|
||
const hasOversized = attempt.messagesSnapshot
|
||
? sessionLikelyHasOversizedToolResults({
|
||
messages: attempt.messagesSnapshot,
|
||
contextWindowTokens,
|
||
})
|
||
: false;
|
||
|
||
if (hasOversized) {
|
||
if (log.isEnabled("debug")) {
|
||
log.debug(
|
||
`[compaction-diag] decision diagId=${overflowDiagId} branch=truncate_tool_results ` +
|
||
`isCompactionFailure=${isCompactionFailure} hasOversizedToolResults=${hasOversized} ` +
|
||
`attempt=${overflowCompactionAttempts} maxAttempts=${MAX_OVERFLOW_COMPACTION_ATTEMPTS}`,
|
||
);
|
||
}
|
||
toolResultTruncationAttempted = true;
|
||
log.warn(
|
||
`[context-overflow-recovery] Attempting tool result truncation for ${provider}/${modelId} ` +
|
||
`(contextWindow=${contextWindowTokens} tokens)`,
|
||
);
|
||
const truncResult = await truncateOversizedToolResultsInSession({
|
||
sessionFile: params.sessionFile,
|
||
contextWindowTokens,
|
||
sessionId: params.sessionId,
|
||
sessionKey: params.sessionKey,
|
||
});
|
||
if (truncResult.truncated) {
|
||
log.info(
|
||
`[context-overflow-recovery] Truncated ${truncResult.truncatedCount} tool result(s); retrying prompt`,
|
||
);
|
||
// Do NOT reset overflowCompactionAttempts here — the global cap must remain
|
||
// enforced across all iterations to prevent unbounded compaction cycles (OC-65).
|
||
continue;
|
||
}
|
||
log.warn(
|
||
`[context-overflow-recovery] Tool result truncation did not help: ${truncResult.reason ?? "unknown"}`,
|
||
);
|
||
} else if (log.isEnabled("debug")) {
|
||
log.debug(
|
||
`[compaction-diag] decision diagId=${overflowDiagId} branch=give_up ` +
|
||
`isCompactionFailure=${isCompactionFailure} hasOversizedToolResults=${hasOversized} ` +
|
||
`attempt=${overflowCompactionAttempts} maxAttempts=${MAX_OVERFLOW_COMPACTION_ATTEMPTS}`,
|
||
);
|
||
}
|
||
}
|
||
if (
|
||
(isCompactionFailure ||
|
||
overflowCompactionAttempts >= MAX_OVERFLOW_COMPACTION_ATTEMPTS ||
|
||
toolResultTruncationAttempted) &&
|
||
log.isEnabled("debug")
|
||
) {
|
||
log.debug(
|
||
`[compaction-diag] decision diagId=${overflowDiagId} branch=give_up ` +
|
||
`isCompactionFailure=${isCompactionFailure} hasOversizedToolResults=unknown ` +
|
||
`attempt=${overflowCompactionAttempts} maxAttempts=${MAX_OVERFLOW_COMPACTION_ATTEMPTS}`,
|
||
);
|
||
}
|
||
const kind = isCompactionFailure ? "compaction_failure" : "context_overflow";
|
||
return {
|
||
payloads: [
|
||
{
|
||
text:
|
||
"Context overflow: prompt too large for the model. " +
|
||
"Try /reset (or /new) to start a fresh session, or use a larger-context model.",
|
||
isError: true,
|
||
},
|
||
],
|
||
meta: {
|
||
durationMs: Date.now() - started,
|
||
agentMeta: {
|
||
sessionId: sessionIdUsed,
|
||
provider,
|
||
model: model.id,
|
||
},
|
||
systemPromptReport: attempt.systemPromptReport,
|
||
error: { kind, message: errorText },
|
||
},
|
||
};
|
||
}
|
||
|
||
if (promptError && !aborted) {
|
||
const errorText = describeUnknownError(promptError);
|
||
if (await maybeRefreshCopilotForAuthError(errorText, copilotAuthRetry)) {
|
||
authRetryPending = true;
|
||
continue;
|
||
}
|
||
// Handle role ordering errors with a user-friendly message
|
||
if (/incorrect role information|roles must alternate/i.test(errorText)) {
|
||
return {
|
||
payloads: [
|
||
{
|
||
text:
|
||
"Message ordering conflict - please try again. " +
|
||
"If this persists, use /new to start a fresh session.",
|
||
isError: true,
|
||
},
|
||
],
|
||
meta: {
|
||
durationMs: Date.now() - started,
|
||
agentMeta: {
|
||
sessionId: sessionIdUsed,
|
||
provider,
|
||
model: model.id,
|
||
},
|
||
systemPromptReport: attempt.systemPromptReport,
|
||
error: { kind: "role_ordering", message: errorText },
|
||
},
|
||
};
|
||
}
|
||
// Handle image size errors with a user-friendly message (no retry needed)
|
||
const imageSizeError = parseImageSizeError(errorText);
|
||
if (imageSizeError) {
|
||
const maxMb = imageSizeError.maxMb;
|
||
const maxMbLabel =
|
||
typeof maxMb === "number" && Number.isFinite(maxMb) ? `${maxMb}` : null;
|
||
const maxBytesHint = maxMbLabel ? ` (max ${maxMbLabel}MB)` : "";
|
||
return {
|
||
payloads: [
|
||
{
|
||
text:
|
||
`Image too large for the model${maxBytesHint}. ` +
|
||
"Please compress or resize the image and try again.",
|
||
isError: true,
|
||
},
|
||
],
|
||
meta: {
|
||
durationMs: Date.now() - started,
|
||
agentMeta: {
|
||
sessionId: sessionIdUsed,
|
||
provider,
|
||
model: model.id,
|
||
},
|
||
systemPromptReport: attempt.systemPromptReport,
|
||
error: { kind: "image_size", message: errorText },
|
||
},
|
||
};
|
||
}
|
||
const promptFailoverReason = classifyFailoverReason(errorText);
|
||
await maybeMarkAuthProfileFailure({
|
||
profileId: lastProfileId,
|
||
reason: promptFailoverReason,
|
||
});
|
||
if (
|
||
isFailoverErrorMessage(errorText) &&
|
||
promptFailoverReason !== "timeout" &&
|
||
(await advanceAuthProfile())
|
||
) {
|
||
continue;
|
||
}
|
||
const fallbackThinking = pickFallbackThinkingLevel({
|
||
message: errorText,
|
||
attempted: attemptedThinking,
|
||
});
|
||
if (fallbackThinking) {
|
||
log.warn(
|
||
`unsupported thinking level for ${provider}/${modelId}; retrying with ${fallbackThinking}`,
|
||
);
|
||
thinkLevel = fallbackThinking;
|
||
continue;
|
||
}
|
||
// FIX: Throw FailoverError for prompt errors when fallbacks configured
|
||
// This enables model fallback for quota/rate limit errors during prompt submission
|
||
if (fallbackConfigured && isFailoverErrorMessage(errorText)) {
|
||
throw new FailoverError(errorText, {
|
||
reason: promptFailoverReason ?? "unknown",
|
||
provider,
|
||
model: modelId,
|
||
profileId: lastProfileId,
|
||
status: resolveFailoverStatus(promptFailoverReason ?? "unknown"),
|
||
});
|
||
}
|
||
throw promptError;
|
||
}
|
||
|
||
const fallbackThinking = pickFallbackThinkingLevel({
|
||
message: lastAssistant?.errorMessage,
|
||
attempted: attemptedThinking,
|
||
});
|
||
if (fallbackThinking && !aborted) {
|
||
log.warn(
|
||
`unsupported thinking level for ${provider}/${modelId}; retrying with ${fallbackThinking}`,
|
||
);
|
||
thinkLevel = fallbackThinking;
|
||
continue;
|
||
}
|
||
|
||
const authFailure = isAuthAssistantError(lastAssistant);
|
||
const rateLimitFailure = isRateLimitAssistantError(lastAssistant);
|
||
const billingFailure = isBillingAssistantError(lastAssistant);
|
||
const failoverFailure = isFailoverAssistantError(lastAssistant);
|
||
const assistantFailoverReason = classifyFailoverReason(lastAssistant?.errorMessage ?? "");
|
||
const cloudCodeAssistFormatError = attempt.cloudCodeAssistFormatError;
|
||
const imageDimensionError = parseImageDimensionError(lastAssistant?.errorMessage ?? "");
|
||
|
||
if (
|
||
authFailure &&
|
||
(await maybeRefreshCopilotForAuthError(
|
||
lastAssistant?.errorMessage ?? "",
|
||
copilotAuthRetry,
|
||
))
|
||
) {
|
||
authRetryPending = true;
|
||
continue;
|
||
}
|
||
if (imageDimensionError && lastProfileId) {
|
||
const details = [
|
||
imageDimensionError.messageIndex !== undefined
|
||
? `message=${imageDimensionError.messageIndex}`
|
||
: null,
|
||
imageDimensionError.contentIndex !== undefined
|
||
? `content=${imageDimensionError.contentIndex}`
|
||
: null,
|
||
imageDimensionError.maxDimensionPx !== undefined
|
||
? `limit=${imageDimensionError.maxDimensionPx}px`
|
||
: null,
|
||
]
|
||
.filter(Boolean)
|
||
.join(" ");
|
||
log.warn(
|
||
`Profile ${lastProfileId} rejected image payload${details ? ` (${details})` : ""}.`,
|
||
);
|
||
}
|
||
|
||
// Rotate on timeout to try another account/model path in this turn,
|
||
// but exclude post-prompt compaction timeouts (model succeeded; no profile issue).
|
||
const shouldRotate =
|
||
(!aborted && failoverFailure) || (timedOut && !timedOutDuringCompaction);
|
||
|
||
if (shouldRotate) {
|
||
if (lastProfileId) {
|
||
const reason =
|
||
timedOut || assistantFailoverReason === "timeout"
|
||
? "timeout"
|
||
: (assistantFailoverReason ?? "unknown");
|
||
// Skip cooldown for timeouts: a timeout is model/network-specific,
|
||
// not an auth issue. Marking the profile would poison fallback models
|
||
// on the same provider (e.g. gpt-5.3 timeout blocks gpt-5.2).
|
||
await maybeMarkAuthProfileFailure({
|
||
profileId: lastProfileId,
|
||
reason,
|
||
});
|
||
if (timedOut && !isProbeSession) {
|
||
log.warn(`Profile ${lastProfileId} timed out. Trying next account...`);
|
||
}
|
||
if (cloudCodeAssistFormatError) {
|
||
log.warn(
|
||
`Profile ${lastProfileId} hit Cloud Code Assist format error. Tool calls will be sanitized on retry.`,
|
||
);
|
||
}
|
||
}
|
||
|
||
const rotated = await advanceAuthProfile();
|
||
if (rotated) {
|
||
continue;
|
||
}
|
||
|
||
if (fallbackConfigured) {
|
||
// Prefer formatted error message (user-friendly) over raw errorMessage
|
||
const message =
|
||
(lastAssistant
|
||
? formatAssistantErrorText(lastAssistant, {
|
||
cfg: params.config,
|
||
sessionKey: params.sessionKey ?? params.sessionId,
|
||
provider: activeErrorContext.provider,
|
||
model: activeErrorContext.model,
|
||
})
|
||
: undefined) ||
|
||
lastAssistant?.errorMessage?.trim() ||
|
||
(timedOut
|
||
? "LLM request timed out."
|
||
: rateLimitFailure
|
||
? "LLM request rate limited."
|
||
: billingFailure
|
||
? formatBillingErrorMessage(
|
||
activeErrorContext.provider,
|
||
activeErrorContext.model,
|
||
)
|
||
: authFailure
|
||
? "LLM request unauthorized."
|
||
: "LLM request failed.");
|
||
const status =
|
||
resolveFailoverStatus(assistantFailoverReason ?? "unknown") ??
|
||
(isTimeoutErrorMessage(message) ? 408 : undefined);
|
||
throw new FailoverError(message, {
|
||
reason: assistantFailoverReason ?? "unknown",
|
||
provider: activeErrorContext.provider,
|
||
model: activeErrorContext.model,
|
||
profileId: lastProfileId,
|
||
status,
|
||
});
|
||
}
|
||
}
|
||
|
||
const usage = toNormalizedUsage(usageAccumulator);
|
||
if (usage && lastTurnTotal && lastTurnTotal > 0) {
|
||
usage.total = lastTurnTotal;
|
||
}
|
||
// Extract the last individual API call's usage for context-window
|
||
// utilization display. The accumulated `usage` sums input tokens
|
||
// across all calls (tool-use loops, compaction retries), which
|
||
// overstates the actual context size. `lastCallUsage` reflects only
|
||
// the final call, giving an accurate snapshot of current context.
|
||
const lastCallUsage = normalizeUsage(lastAssistant?.usage as UsageLike);
|
||
const promptTokens = derivePromptTokens(lastRunPromptUsage);
|
||
const agentMeta: EmbeddedPiAgentMeta = {
|
||
sessionId: sessionIdUsed,
|
||
provider: lastAssistant?.provider ?? provider,
|
||
model: lastAssistant?.model ?? model.id,
|
||
usage,
|
||
lastCallUsage: lastCallUsage ?? undefined,
|
||
promptTokens,
|
||
compactionCount: autoCompactionCount > 0 ? autoCompactionCount : undefined,
|
||
};
|
||
|
||
const payloads = buildEmbeddedRunPayloads({
|
||
assistantTexts: attempt.assistantTexts,
|
||
toolMetas: attempt.toolMetas,
|
||
lastAssistant: attempt.lastAssistant,
|
||
lastToolError: attempt.lastToolError,
|
||
config: params.config,
|
||
sessionKey: params.sessionKey ?? params.sessionId,
|
||
provider: activeErrorContext.provider,
|
||
model: activeErrorContext.model,
|
||
verboseLevel: params.verboseLevel,
|
||
reasoningLevel: params.reasoningLevel,
|
||
toolResultFormat: resolvedToolResultFormat,
|
||
suppressToolErrorWarnings: params.suppressToolErrorWarnings,
|
||
inlineToolResultsAllowed: false,
|
||
didSendViaMessagingTool: attempt.didSendViaMessagingTool,
|
||
});
|
||
|
||
// Timeout aborts can leave the run without any assistant payloads.
|
||
// Emit an explicit timeout error instead of silently completing, so
|
||
// callers do not lose the turn as an orphaned user message.
|
||
if (timedOut && !timedOutDuringCompaction && payloads.length === 0) {
|
||
return {
|
||
payloads: [
|
||
{
|
||
text:
|
||
"Request timed out before a response was generated. " +
|
||
"Please try again, or increase `agents.defaults.timeoutSeconds` in your config.",
|
||
isError: true,
|
||
},
|
||
],
|
||
meta: {
|
||
durationMs: Date.now() - started,
|
||
agentMeta,
|
||
aborted,
|
||
systemPromptReport: attempt.systemPromptReport,
|
||
},
|
||
didSendViaMessagingTool: attempt.didSendViaMessagingTool,
|
||
messagingToolSentTexts: attempt.messagingToolSentTexts,
|
||
messagingToolSentMediaUrls: attempt.messagingToolSentMediaUrls,
|
||
messagingToolSentTargets: attempt.messagingToolSentTargets,
|
||
successfulCronAdds: attempt.successfulCronAdds,
|
||
};
|
||
}
|
||
|
||
log.debug(
|
||
`embedded run done: runId=${params.runId} sessionId=${params.sessionId} durationMs=${Date.now() - started} aborted=${aborted}`,
|
||
);
|
||
if (lastProfileId) {
|
||
await markAuthProfileGood({
|
||
store: authStore,
|
||
provider,
|
||
profileId: lastProfileId,
|
||
agentDir: params.agentDir,
|
||
});
|
||
await markAuthProfileUsed({
|
||
store: authStore,
|
||
profileId: lastProfileId,
|
||
agentDir: params.agentDir,
|
||
});
|
||
}
|
||
return {
|
||
payloads: payloads.length ? payloads : undefined,
|
||
meta: {
|
||
durationMs: Date.now() - started,
|
||
agentMeta,
|
||
aborted,
|
||
systemPromptReport: attempt.systemPromptReport,
|
||
// Handle client tool calls (OpenResponses hosted tools)
|
||
// Propagate the LLM stop reason so callers (lifecycle events,
|
||
// ACP bridge) can distinguish end_turn from max_tokens.
|
||
stopReason: attempt.clientToolCall
|
||
? "tool_calls"
|
||
: (lastAssistant?.stopReason as string | undefined),
|
||
pendingToolCalls: attempt.clientToolCall
|
||
? [
|
||
{
|
||
id: randomBytes(5).toString("hex").slice(0, 9),
|
||
name: attempt.clientToolCall.name,
|
||
arguments: JSON.stringify(attempt.clientToolCall.params),
|
||
},
|
||
]
|
||
: undefined,
|
||
},
|
||
didSendViaMessagingTool: attempt.didSendViaMessagingTool,
|
||
messagingToolSentTexts: attempt.messagingToolSentTexts,
|
||
messagingToolSentMediaUrls: attempt.messagingToolSentMediaUrls,
|
||
messagingToolSentTargets: attempt.messagingToolSentTargets,
|
||
successfulCronAdds: attempt.successfulCronAdds,
|
||
};
|
||
}
|
||
} finally {
|
||
stopCopilotRefreshTimer();
|
||
process.chdir(prevCwd);
|
||
}
|
||
}),
|
||
);
|
||
}
|