* Changelog: add subagent announce timeout note * Tests: cover subagent completion timeout no-retry * Subagents: stop retrying external completion timeouts * Config: update subagent announce timeout default docs * Tests: use fake timers for subagent timeout retry guard
This commit is contained in:
parent
7844bc89a1
commit
8ad0ca309e
@ -34,6 +34,7 @@ Docs: https://docs.openclaw.ai
|
|||||||
- Mattermost/block streaming: fix duplicate message delivery (one threaded, one top-level) when block streaming is active by excluding `replyToId` from the block reply dedup key and adding an explicit `threading` dock to the Mattermost plugin. (#41362) Thanks @mathiasnagler and @vincentkoc.
|
- Mattermost/block streaming: fix duplicate message delivery (one threaded, one top-level) when block streaming is active by excluding `replyToId` from the block reply dedup key and adding an explicit `threading` dock to the Mattermost plugin. (#41362) Thanks @mathiasnagler and @vincentkoc.
|
||||||
- BlueBubbles/self-chat echo dedupe: drop reflected duplicate webhook copies only when a matching `fromMe` event was just seen for the same chat, body, and timestamp, preventing self-chat loops without broad webhook suppression. Related to #32166. (#38442) Thanks @vincentkoc.
|
- BlueBubbles/self-chat echo dedupe: drop reflected duplicate webhook copies only when a matching `fromMe` event was just seen for the same chat, body, and timestamp, preventing self-chat loops without broad webhook suppression. Related to #32166. (#38442) Thanks @vincentkoc.
|
||||||
- Models/Kimi Coding: send `anthropic-messages` tools in native Anthropic format again so `kimi-coding` stops degrading tool calls into XML/plain-text pseudo invocations instead of real `tool_use` blocks. (#38669, #39907, #40552) Thanks @opriz.
|
- Models/Kimi Coding: send `anthropic-messages` tools in native Anthropic format again so `kimi-coding` stops degrading tool calls into XML/plain-text pseudo invocations instead of real `tool_use` blocks. (#38669, #39907, #40552) Thanks @opriz.
|
||||||
|
- Subagents/completion announce retries: raise the default announce timeout to 90 seconds and stop retrying gateway-timeout failures for externally delivered completion announces, preventing duplicate user-facing completion messages after slow gateway responses. Fixes #41235. Thanks @vasujain00 and @vincentkoc.
|
||||||
- Sandbox/write: preserve pinned mutation-helper payload stdin so sandboxed `write` no longer reports success while creating empty files. (#43876) Thanks @glitch418x.
|
- Sandbox/write: preserve pinned mutation-helper payload stdin so sandboxed `write` no longer reports success while creating empty files. (#43876) Thanks @glitch418x.
|
||||||
- Gateway/main-session routing: keep TUI and other `mode:UI` main-session sends on the internal surface when `deliver` is enabled, so replies no longer inherit the session's persisted Telegram/WhatsApp route. (#43918) Thanks @obviyus.
|
- Gateway/main-session routing: keep TUI and other `mode:UI` main-session sends on the internal surface when `deliver` is enabled, so replies no longer inherit the session's persisted Telegram/WhatsApp route. (#43918) Thanks @obviyus.
|
||||||
- Doctor/gateway service audit: canonicalize service entrypoint paths before comparing them so symlink-vs-realpath installs no longer trigger false "entrypoint does not match the current install" repair prompts. (#43882) Thanks @ngutman.
|
- Doctor/gateway service audit: canonicalize service entrypoint paths before comparing them so symlink-vs-realpath installs no longer trigger false "entrypoint does not match the current install" repair prompts. (#43882) Thanks @ngutman.
|
||||||
|
|||||||
@ -8,6 +8,12 @@ type GatewayCall = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
const gatewayCalls: GatewayCall[] = [];
|
const gatewayCalls: GatewayCall[] = [];
|
||||||
|
let callGatewayImpl: (request: GatewayCall) => Promise<unknown> = async (request) => {
|
||||||
|
if (request.method === "chat.history") {
|
||||||
|
return { messages: [] };
|
||||||
|
}
|
||||||
|
return {};
|
||||||
|
};
|
||||||
let sessionStore: Record<string, Record<string, unknown>> = {};
|
let sessionStore: Record<string, Record<string, unknown>> = {};
|
||||||
let configOverride: ReturnType<(typeof import("../config/config.js"))["loadConfig"]> = {
|
let configOverride: ReturnType<(typeof import("../config/config.js"))["loadConfig"]> = {
|
||||||
session: {
|
session: {
|
||||||
@ -27,10 +33,7 @@ let fallbackRequesterResolution: {
|
|||||||
vi.mock("../gateway/call.js", () => ({
|
vi.mock("../gateway/call.js", () => ({
|
||||||
callGateway: vi.fn(async (request: GatewayCall) => {
|
callGateway: vi.fn(async (request: GatewayCall) => {
|
||||||
gatewayCalls.push(request);
|
gatewayCalls.push(request);
|
||||||
if (request.method === "chat.history") {
|
return await callGatewayImpl(request);
|
||||||
return { messages: [] };
|
|
||||||
}
|
|
||||||
return {};
|
|
||||||
}),
|
}),
|
||||||
}));
|
}));
|
||||||
|
|
||||||
@ -120,6 +123,12 @@ function findGatewayCall(predicate: (call: GatewayCall) => boolean): GatewayCall
|
|||||||
describe("subagent announce timeout config", () => {
|
describe("subagent announce timeout config", () => {
|
||||||
beforeEach(() => {
|
beforeEach(() => {
|
||||||
gatewayCalls.length = 0;
|
gatewayCalls.length = 0;
|
||||||
|
callGatewayImpl = async (request) => {
|
||||||
|
if (request.method === "chat.history") {
|
||||||
|
return { messages: [] };
|
||||||
|
}
|
||||||
|
return {};
|
||||||
|
};
|
||||||
sessionStore = {};
|
sessionStore = {};
|
||||||
configOverride = {
|
configOverride = {
|
||||||
session: defaultSessionConfig,
|
session: defaultSessionConfig,
|
||||||
@ -131,13 +140,13 @@ describe("subagent announce timeout config", () => {
|
|||||||
fallbackRequesterResolution = null;
|
fallbackRequesterResolution = null;
|
||||||
});
|
});
|
||||||
|
|
||||||
it("uses 60s timeout by default for direct announce agent call", async () => {
|
it("uses 90s timeout by default for direct announce agent call", async () => {
|
||||||
await runAnnounceFlowForTest("run-default-timeout");
|
await runAnnounceFlowForTest("run-default-timeout");
|
||||||
|
|
||||||
const directAgentCall = findGatewayCall(
|
const directAgentCall = findGatewayCall(
|
||||||
(call) => call.method === "agent" && call.expectFinal === true,
|
(call) => call.method === "agent" && call.expectFinal === true,
|
||||||
);
|
);
|
||||||
expect(directAgentCall?.timeoutMs).toBe(60_000);
|
expect(directAgentCall?.timeoutMs).toBe(90_000);
|
||||||
});
|
});
|
||||||
|
|
||||||
it("honors configured announce timeout for direct announce agent call", async () => {
|
it("honors configured announce timeout for direct announce agent call", async () => {
|
||||||
@ -166,6 +175,35 @@ describe("subagent announce timeout config", () => {
|
|||||||
expect(completionDirectAgentCall?.timeoutMs).toBe(90_000);
|
expect(completionDirectAgentCall?.timeoutMs).toBe(90_000);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("does not retry gateway timeout for externally delivered completion announces", async () => {
|
||||||
|
vi.useFakeTimers();
|
||||||
|
try {
|
||||||
|
callGatewayImpl = async (request) => {
|
||||||
|
if (request.method === "chat.history") {
|
||||||
|
return { messages: [] };
|
||||||
|
}
|
||||||
|
throw new Error("gateway timeout after 90000ms");
|
||||||
|
};
|
||||||
|
|
||||||
|
await expect(
|
||||||
|
runAnnounceFlowForTest("run-completion-timeout-no-retry", {
|
||||||
|
requesterOrigin: {
|
||||||
|
channel: "telegram",
|
||||||
|
to: "12345",
|
||||||
|
},
|
||||||
|
expectsCompletionMessage: true,
|
||||||
|
}),
|
||||||
|
).resolves.toBe(false);
|
||||||
|
|
||||||
|
const directAgentCalls = gatewayCalls.filter(
|
||||||
|
(call) => call.method === "agent" && call.expectFinal === true,
|
||||||
|
);
|
||||||
|
expect(directAgentCalls).toHaveLength(1);
|
||||||
|
} finally {
|
||||||
|
vi.useRealTimers();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
it("regression, skips parent announce while descendants are still pending", async () => {
|
it("regression, skips parent announce while descendants are still pending", async () => {
|
||||||
requesterDepthResolver = () => 1;
|
requesterDepthResolver = () => 1;
|
||||||
pendingDescendantRuns = 2;
|
pendingDescendantRuns = 2;
|
||||||
|
|||||||
@ -51,8 +51,9 @@ import { isAnnounceSkip } from "./tools/sessions-send-helpers.js";
|
|||||||
|
|
||||||
const FAST_TEST_MODE = process.env.OPENCLAW_TEST_FAST === "1";
|
const FAST_TEST_MODE = process.env.OPENCLAW_TEST_FAST === "1";
|
||||||
const FAST_TEST_RETRY_INTERVAL_MS = 8;
|
const FAST_TEST_RETRY_INTERVAL_MS = 8;
|
||||||
const DEFAULT_SUBAGENT_ANNOUNCE_TIMEOUT_MS = 60_000;
|
const DEFAULT_SUBAGENT_ANNOUNCE_TIMEOUT_MS = 90_000;
|
||||||
const MAX_TIMER_SAFE_TIMEOUT_MS = 2_147_000_000;
|
const MAX_TIMER_SAFE_TIMEOUT_MS = 2_147_000_000;
|
||||||
|
const GATEWAY_TIMEOUT_PATTERN = /gateway timeout/i;
|
||||||
let subagentRegistryRuntimePromise: Promise<
|
let subagentRegistryRuntimePromise: Promise<
|
||||||
typeof import("./subagent-registry-runtime.js")
|
typeof import("./subagent-registry-runtime.js")
|
||||||
> | null = null;
|
> | null = null;
|
||||||
@ -107,7 +108,7 @@ const TRANSIENT_ANNOUNCE_DELIVERY_ERROR_PATTERNS: readonly RegExp[] = [
|
|||||||
/no active .* listener/i,
|
/no active .* listener/i,
|
||||||
/gateway not connected/i,
|
/gateway not connected/i,
|
||||||
/gateway closed \(1006/i,
|
/gateway closed \(1006/i,
|
||||||
/gateway timeout/i,
|
GATEWAY_TIMEOUT_PATTERN,
|
||||||
/\b(econnreset|econnrefused|etimedout|enotfound|ehostunreach|network error)\b/i,
|
/\b(econnreset|econnrefused|etimedout|enotfound|ehostunreach|network error)\b/i,
|
||||||
];
|
];
|
||||||
|
|
||||||
@ -133,6 +134,11 @@ function isTransientAnnounceDeliveryError(error: unknown): boolean {
|
|||||||
return TRANSIENT_ANNOUNCE_DELIVERY_ERROR_PATTERNS.some((re) => re.test(message));
|
return TRANSIENT_ANNOUNCE_DELIVERY_ERROR_PATTERNS.some((re) => re.test(message));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function isGatewayTimeoutError(error: unknown): boolean {
|
||||||
|
const message = summarizeDeliveryError(error);
|
||||||
|
return Boolean(message) && GATEWAY_TIMEOUT_PATTERN.test(message);
|
||||||
|
}
|
||||||
|
|
||||||
async function waitForAnnounceRetryDelay(ms: number, signal?: AbortSignal): Promise<void> {
|
async function waitForAnnounceRetryDelay(ms: number, signal?: AbortSignal): Promise<void> {
|
||||||
if (ms <= 0) {
|
if (ms <= 0) {
|
||||||
return;
|
return;
|
||||||
@ -160,6 +166,7 @@ async function waitForAnnounceRetryDelay(ms: number, signal?: AbortSignal): Prom
|
|||||||
|
|
||||||
async function runAnnounceDeliveryWithRetry<T>(params: {
|
async function runAnnounceDeliveryWithRetry<T>(params: {
|
||||||
operation: string;
|
operation: string;
|
||||||
|
noRetryOnGatewayTimeout?: boolean;
|
||||||
signal?: AbortSignal;
|
signal?: AbortSignal;
|
||||||
run: () => Promise<T>;
|
run: () => Promise<T>;
|
||||||
}): Promise<T> {
|
}): Promise<T> {
|
||||||
@ -171,6 +178,9 @@ async function runAnnounceDeliveryWithRetry<T>(params: {
|
|||||||
try {
|
try {
|
||||||
return await params.run();
|
return await params.run();
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
|
if (params.noRetryOnGatewayTimeout && isGatewayTimeoutError(err)) {
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
const delayMs = DIRECT_ANNOUNCE_TRANSIENT_RETRY_DELAYS_MS[retryIndex];
|
const delayMs = DIRECT_ANNOUNCE_TRANSIENT_RETRY_DELAYS_MS[retryIndex];
|
||||||
if (delayMs == null || !isTransientAnnounceDeliveryError(err) || params.signal?.aborted) {
|
if (delayMs == null || !isTransientAnnounceDeliveryError(err) || params.signal?.aborted) {
|
||||||
throw err;
|
throw err;
|
||||||
@ -789,6 +799,7 @@ async function sendSubagentAnnounceDirectly(params: {
|
|||||||
operation: params.expectsCompletionMessage
|
operation: params.expectsCompletionMessage
|
||||||
? "completion direct announce agent call"
|
? "completion direct announce agent call"
|
||||||
: "direct announce agent call",
|
: "direct announce agent call",
|
||||||
|
noRetryOnGatewayTimeout: params.expectsCompletionMessage && shouldDeliverExternally,
|
||||||
signal: params.signal,
|
signal: params.signal,
|
||||||
run: async () =>
|
run: async () =>
|
||||||
await callGateway({
|
await callGateway({
|
||||||
|
|||||||
@ -279,7 +279,7 @@ export type AgentDefaultsConfig = {
|
|||||||
thinking?: string;
|
thinking?: string;
|
||||||
/** Default run timeout in seconds for spawned sub-agents (0 = no timeout). */
|
/** Default run timeout in seconds for spawned sub-agents (0 = no timeout). */
|
||||||
runTimeoutSeconds?: number;
|
runTimeoutSeconds?: number;
|
||||||
/** Gateway timeout in ms for sub-agent announce delivery calls (default: 60000). */
|
/** Gateway timeout in ms for sub-agent announce delivery calls (default: 90000). */
|
||||||
announceTimeoutMs?: number;
|
announceTimeoutMs?: number;
|
||||||
};
|
};
|
||||||
/** Optional sandbox settings for non-main sessions. */
|
/** Optional sandbox settings for non-main sessions. */
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user