From e4f9097feb51d9813e0e0ec671711d969092dc94 Mon Sep 17 00:00:00 2001 From: fengyuwusong Date: Fri, 6 Mar 2026 12:00:59 +0800 Subject: [PATCH 1/2] feat: add configurable retry for embedded agent announce operations --- src/agents/subagent-announce.ts | 87 ++++++++++++++++++++++--- src/config/types.agent-defaults.ts | 11 ++++ src/config/zod-schema.agent-defaults.ts | 42 ++++++++++++ 3 files changed, 132 insertions(+), 8 deletions(-) diff --git a/src/agents/subagent-announce.ts b/src/agents/subagent-announce.ts index 83391755e9c..f02362818c2 100644 --- a/src/agents/subagent-announce.ts +++ b/src/agents/subagent-announce.ts @@ -61,9 +61,68 @@ function loadSubagentRegistryRuntime() { return subagentRegistryRuntimePromise; } -const DIRECT_ANNOUNCE_TRANSIENT_RETRY_DELAYS_MS = FAST_TEST_MODE - ? ([8, 16, 32] as const) - : ([5_000, 10_000, 20_000] as const); +// Default retry configuration for announce delivery +const DEFAULT_ANNOUNCE_RETRY_ATTEMPTS = 3; +const DEFAULT_ANNOUNCE_RETRY_MIN_DELAY_MS = 5_000; +const DEFAULT_ANNOUNCE_RETRY_MAX_DELAY_MS = 30_000; +const DEFAULT_ANNOUNCE_RETRY_JITTER = 0.1; + +type AnnounceRetryConfig = { + attempts: number; + minDelayMs: number; + maxDelayMs: number; + jitter: number; +}; + +function resolveAnnounceRetryConfig(cfg: ReturnType): AnnounceRetryConfig { + const retry = cfg.agents?.defaults?.subagents?.announceRetry; + const minDelayMs = + typeof retry?.minDelayMs === "number" && + Number.isFinite(retry.minDelayMs) && + retry.minDelayMs > 0 + ? Math.floor(retry.minDelayMs) + : DEFAULT_ANNOUNCE_RETRY_MIN_DELAY_MS; + const rawMaxDelayMs = + typeof retry?.maxDelayMs === "number" && + Number.isFinite(retry.maxDelayMs) && + retry.maxDelayMs > 0 + ? Math.floor(retry.maxDelayMs) + : DEFAULT_ANNOUNCE_RETRY_MAX_DELAY_MS; + // Ensure maxDelayMs is at least minDelayMs and does not exceed timer-safe maximum + const maxDelayMs = Math.min(Math.max(minDelayMs, rawMaxDelayMs), MAX_TIMER_SAFE_TIMEOUT_MS); + return { + // attempts represents total attempts (including initial), so retries = attempts - 1 + attempts: + typeof retry?.attempts === "number" && Number.isFinite(retry.attempts) && retry.attempts > 0 + ? Math.min(Math.floor(retry.attempts), 10) + : DEFAULT_ANNOUNCE_RETRY_ATTEMPTS, + minDelayMs, + maxDelayMs, + jitter: + typeof retry?.jitter === "number" && + Number.isFinite(retry.jitter) && + retry.jitter >= 0 && + retry.jitter <= 1 + ? retry.jitter + : DEFAULT_ANNOUNCE_RETRY_JITTER, + }; +} + +function computeRetryDelayMs(retryIndex: number, config: AnnounceRetryConfig): number { + if (FAST_TEST_MODE) { + // Fast test mode: use short fixed delays + const fastDelays = [8, 16, 32]; + return fastDelays[retryIndex] ?? 32; + } + // Exponential backoff with jitter + const baseDelay = config.minDelayMs * Math.pow(2, retryIndex); + const cappedDelay = Math.min(baseDelay, config.maxDelayMs); + const jitterRange = cappedDelay * config.jitter; + const jitterOffset = (Math.random() * 2 - 1) * jitterRange; + const delayWithJitter = Math.floor(cappedDelay + jitterOffset); + // Ensure final delay respects both min and max bounds after jitter + return Math.max(config.minDelayMs, Math.min(config.maxDelayMs, delayWithJitter)); +} type ToolResultMessage = { role?: unknown; @@ -157,7 +216,17 @@ async function runAnnounceDeliveryWithRetry(params: { operation: string; signal?: AbortSignal; run: () => Promise; + retryConfig?: AnnounceRetryConfig; }): Promise { + const config = params.retryConfig ?? { + attempts: DEFAULT_ANNOUNCE_RETRY_ATTEMPTS, + minDelayMs: DEFAULT_ANNOUNCE_RETRY_MIN_DELAY_MS, + maxDelayMs: DEFAULT_ANNOUNCE_RETRY_MAX_DELAY_MS, + jitter: DEFAULT_ANNOUNCE_RETRY_JITTER, + }; + // attempts represents the number of retries (not total attempts) + // This matches the previous behavior where 3 delays = 3 retries + const maxRetries = config.attempts; let retryIndex = 0; for (;;) { if (params.signal?.aborted) { @@ -166,14 +235,14 @@ async function runAnnounceDeliveryWithRetry(params: { try { return await params.run(); } catch (err) { - const delayMs = DIRECT_ANNOUNCE_TRANSIENT_RETRY_DELAYS_MS[retryIndex]; - if (delayMs == null || !isTransientAnnounceDeliveryError(err) || params.signal?.aborted) { + const hasNextRetry = retryIndex < maxRetries; + if (!hasNextRetry || !isTransientAnnounceDeliveryError(err) || params.signal?.aborted) { throw err; } - const nextAttempt = retryIndex + 2; - const maxAttempts = DIRECT_ANNOUNCE_TRANSIENT_RETRY_DELAYS_MS.length + 1; + const delayMs = computeRetryDelayMs(retryIndex, config); + const nextAttempt = retryIndex + 1; defaultRuntime.log( - `[warn] Subagent announce ${params.operation} transient failure, retrying ${nextAttempt}/${maxAttempts} in ${Math.round(delayMs / 1000)}s: ${summarizeDeliveryError(err)}`, + `[warn] Subagent announce ${params.operation} transient failure, retrying ${nextAttempt}/${maxRetries} in ${Math.round(delayMs / 1000)}s: ${summarizeDeliveryError(err)}`, ); retryIndex += 1; await waitForAnnounceRetryDelay(delayMs, params.signal); @@ -786,6 +855,7 @@ async function sendSubagentAnnounceDirectly(params: { ? "completion direct announce agent call" : "direct announce agent call", signal: params.signal, + retryConfig: resolveAnnounceRetryConfig(cfg), run: async () => await callGateway({ method: "agent", @@ -1094,6 +1164,7 @@ async function wakeSubagentRunAfterDescendants(params: { const wakeResponse = await runAnnounceDeliveryWithRetry<{ runId?: string }>({ operation: "descendant wake agent call", signal: params.signal, + retryConfig: resolveAnnounceRetryConfig(cfg), run: async () => await callGateway({ method: "agent", diff --git a/src/config/types.agent-defaults.ts b/src/config/types.agent-defaults.ts index 6ceba822362..02dbe4eabd3 100644 --- a/src/config/types.agent-defaults.ts +++ b/src/config/types.agent-defaults.ts @@ -281,6 +281,17 @@ export type AgentDefaultsConfig = { runTimeoutSeconds?: number; /** Gateway timeout in ms for sub-agent announce delivery calls (default: 60000). */ announceTimeoutMs?: number; + /** Retry configuration for sub-agent announce delivery on transient failures. */ + announceRetry?: { + /** Number of retry attempts after initial failure (default: 3, max: 10). */ + attempts?: number; + /** Minimum retry delay in ms (default: 5000). */ + minDelayMs?: number; + /** Maximum retry delay cap in ms with exponential backoff (default: 30000). */ + maxDelayMs?: number; + /** Jitter factor (0-1) applied to delays to avoid thundering herd (default: 0.1). */ + jitter?: number; + }; }; /** Optional sandbox settings for non-main sessions. */ sandbox?: AgentSandboxConfig; diff --git a/src/config/zod-schema.agent-defaults.ts b/src/config/zod-schema.agent-defaults.ts index 276f97f586d..7ee43bd217a 100644 --- a/src/config/zod-schema.agent-defaults.ts +++ b/src/config/zod-schema.agent-defaults.ts @@ -183,6 +183,48 @@ export const AgentDefaultsSchema = z thinking: z.string().optional(), runTimeoutSeconds: z.number().int().min(0).optional(), announceTimeoutMs: z.number().int().positive().optional(), + announceRetry: z + .object({ + attempts: z + .number() + .int() + .min(1) + .max(10) + .optional() + .describe("Number of retry attempts after initial failure (default: 3, max: 10)."), + minDelayMs: z + .number() + .int() + .positive() + .optional() + .describe("Minimum retry delay in ms (default: 5000)."), + maxDelayMs: z + .number() + .int() + .positive() + .optional() + .describe("Maximum retry delay cap in ms (default: 30000)."), + jitter: z + .number() + .min(0) + .max(1) + .optional() + .describe("Jitter factor (0-1) applied to delays (default: 0.1)."), + }) + .strict() + .refine( + (data) => { + if (data.minDelayMs != null && data.maxDelayMs != null) { + return data.maxDelayMs >= data.minDelayMs; + } + return true; + }, + { + message: "maxDelayMs must be greater than or equal to minDelayMs", + }, + ) + .optional() + .describe("Retry configuration for sub-agent announce delivery on transient failures."), }) .strict() .optional(), From 1fa3623eae034905f7e92bf8739e9f7d0f1742ca Mon Sep 17 00:00:00 2001 From: fengyuwusong Date: Fri, 6 Mar 2026 12:28:29 +0800 Subject: [PATCH 2/2] fix: clamp minDelayMs to timer-safe maximum Also improve code clarity with explicit rawMinDelayMs/rawMaxDelayMs variables. --- src/agents/subagent-announce.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/agents/subagent-announce.ts b/src/agents/subagent-announce.ts index f02362818c2..93af932e1e2 100644 --- a/src/agents/subagent-announce.ts +++ b/src/agents/subagent-announce.ts @@ -76,12 +76,14 @@ type AnnounceRetryConfig = { function resolveAnnounceRetryConfig(cfg: ReturnType): AnnounceRetryConfig { const retry = cfg.agents?.defaults?.subagents?.announceRetry; - const minDelayMs = + // Clamp minDelayMs to timer-safe maximum + const rawMinDelayMs = typeof retry?.minDelayMs === "number" && Number.isFinite(retry.minDelayMs) && retry.minDelayMs > 0 ? Math.floor(retry.minDelayMs) : DEFAULT_ANNOUNCE_RETRY_MIN_DELAY_MS; + const minDelayMs = Math.min(rawMinDelayMs, MAX_TIMER_SAFE_TIMEOUT_MS); const rawMaxDelayMs = typeof retry?.maxDelayMs === "number" && Number.isFinite(retry.maxDelayMs) &&