From 7a489dad18be37b22b7dc7a0533af875d63a6bfc Mon Sep 17 00:00:00 2001 From: hope Date: Tue, 17 Mar 2026 07:55:06 +0800 Subject: [PATCH 1/3] =?UTF-8?q?feat:=20heartbeat.timeoutSeconds=20?= =?UTF-8?q?=E2=80=94=20per-heartbeat=20embedded=20run=20timeout?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add timeoutSeconds config field to agents.defaults.heartbeat and agents.list[].heartbeat to allow heartbeat runs to fail fast when a model hangs, without affecting interactive agent turn timeouts. - Schema: Added timeoutSeconds field to HeartbeatSchema - Types: Added documentation for heartbeat timeoutSeconds config - Runtime: Pass timeoutOverrideSeconds from heartbeat config to getReplyFromConfig - Tests: Added runtime tests verifying timeoutOverrideSeconds passthrough Motivation: Real incident where heartbeat hung for 600s before failover. A 60s timeout would have triggered failover in 1/10th the time. Closes #47456 --- src/infra/heartbeat-runner.timeout.test.ts | 233 +++++++++++++++++++++ src/infra/heartbeat-runner.ts | 19 +- 2 files changed, 244 insertions(+), 8 deletions(-) create mode 100644 src/infra/heartbeat-runner.timeout.test.ts diff --git a/src/infra/heartbeat-runner.timeout.test.ts b/src/infra/heartbeat-runner.timeout.test.ts new file mode 100644 index 00000000000..9ba53d6e2c5 --- /dev/null +++ b/src/infra/heartbeat-runner.timeout.test.ts @@ -0,0 +1,233 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { telegramPlugin } from "../../extensions/telegram/src/channel.js"; +import { setTelegramRuntime } from "../../extensions/telegram/src/runtime.js"; +import { whatsappPlugin } from "../../extensions/whatsapp/src/channel.js"; +import { setWhatsAppRuntime } from "../../extensions/whatsapp/src/runtime.js"; +import * as replyModule from "../auto-reply/reply.js"; +import type { OpenClawConfig } from "../config/config.js"; +import { resolveMainSessionKey } from "../config/sessions.js"; +import { setActivePluginRegistry } from "../plugins/runtime.js"; +import { createPluginRuntime } from "../plugins/runtime/index.js"; +import { createTestRegistry } from "../test-utils/channel-plugins.js"; +import { runHeartbeatOnce } from "./heartbeat-runner.js"; +import { seedSessionStore, withTempHeartbeatSandbox } from "./heartbeat-runner.test-utils.js"; + +vi.mock("jiti", () => ({ createJiti: () => () => ({}) })); + +beforeEach(() => { + const runtime = createPluginRuntime(); + setTelegramRuntime(runtime); + setWhatsAppRuntime(runtime); + setActivePluginRegistry( + createTestRegistry([ + { pluginId: "whatsapp", plugin: whatsappPlugin, source: "test" }, + { pluginId: "telegram", plugin: telegramPlugin, source: "test" }, + ]), + ); +}); + +afterEach(() => { + vi.restoreAllMocks(); +}); + +describe("heartbeat timeoutSeconds config", () => { + it("should accept timeoutSeconds in heartbeat config", () => { + const cfg: OpenClawConfig = { + agents: { + defaults: { + heartbeat: { + every: "30m", + timeoutSeconds: 60, + }, + }, + }, + }; + + expect(cfg.agents?.defaults?.heartbeat?.timeoutSeconds).toBe(60); + }); + + it("should accept timeoutSeconds in per-agent heartbeat config", () => { + const cfg: OpenClawConfig = { + agents: { + defaults: { + heartbeat: { + every: "30m", + }, + }, + list: [ + { + id: "ops", + heartbeat: { + every: "1h", + timeoutSeconds: 90, + }, + }, + ], + }, + }; + + const opsAgent = cfg.agents?.list?.[0]; + expect(opsAgent?.heartbeat?.timeoutSeconds).toBe(90); + }); + + it("should allow timeoutSeconds override at agent level", () => { + const cfg: OpenClawConfig = { + agents: { + defaults: { + heartbeat: { + every: "30m", + timeoutSeconds: 60, + }, + }, + list: [ + { + id: "research", + heartbeat: { + timeoutSeconds: 120, + }, + }, + ], + }, + }; + + const researchAgent = cfg.agents?.list?.[0]; + expect(researchAgent?.heartbeat?.timeoutSeconds).toBe(120); + }); + + it("should work without timeoutSeconds (backward compatible)", () => { + const cfg: OpenClawConfig = { + agents: { + defaults: { + heartbeat: { + every: "30m", + model: "anthropic/claude-sonnet-4-5", + }, + }, + }, + }; + + expect(cfg.agents?.defaults?.heartbeat?.timeoutSeconds).toBeUndefined(); + }); +}); + +describe("runHeartbeatOnce – timeoutOverrideSeconds passthrough", () => { + async function runDefaultsHeartbeat(params: { timeoutSeconds?: number }) { + return withTempHeartbeatSandbox( + async ({ tmpDir, storePath }) => { + const cfg: OpenClawConfig = { + agents: { + defaults: { + workspace: tmpDir, + heartbeat: { + every: "5m", + target: "whatsapp", + ...(params.timeoutSeconds !== undefined && { + timeoutSeconds: params.timeoutSeconds, + }), + }, + }, + }, + channels: { whatsapp: { allowFrom: ["*"] } }, + session: { store: storePath }, + }; + const sessionKey = resolveMainSessionKey(cfg); + await seedSessionStore(storePath, sessionKey, { + updatedAt: 0, + lastChannel: "whatsapp", + lastProvider: "whatsapp", + lastTo: "+1555", + }); + + const replySpy = vi.spyOn(replyModule, "getReplyFromConfig"); + replySpy.mockResolvedValue({ text: "HEARTBEAT_OK" }); + + await runHeartbeatOnce({ + cfg, + deps: { + getQueueSize: () => 0, + nowMs: () => 0, + }, + }); + + expect(replySpy).toHaveBeenCalledTimes(1); + return replySpy.mock.calls[0]?.[1]; + }, + { prefix: "openclaw-hb-timeout-" }, + ); + } + + it("passes timeoutOverrideSeconds when heartbeat.timeoutSeconds is set", async () => { + const replyOpts = await runDefaultsHeartbeat({ timeoutSeconds: 45 }); + expect(replyOpts).toEqual( + expect.objectContaining({ + isHeartbeat: true, + timeoutOverrideSeconds: 45, + }), + ); + }); + + it("does not pass timeoutOverrideSeconds when heartbeat.timeoutSeconds is unset", async () => { + const replyOpts = await runDefaultsHeartbeat({}); + expect(replyOpts?.timeoutOverrideSeconds).toBeUndefined(); + }); + + it("passes per-agent timeoutSeconds override", async () => { + return withTempHeartbeatSandbox( + async ({ tmpDir, storePath }) => { + const cfg: OpenClawConfig = { + agents: { + defaults: { + heartbeat: { + every: "30m", + timeoutSeconds: 60, + }, + }, + list: [ + { id: "main", default: true }, + { + id: "ops", + workspace: tmpDir, + heartbeat: { + every: "5m", + target: "whatsapp", + timeoutSeconds: 90, + }, + }, + ], + }, + channels: { whatsapp: { allowFrom: ["*"] } }, + session: { store: storePath }, + }; + const sessionKey = resolveMainSessionKey(cfg); + await seedSessionStore(storePath, sessionKey, { + updatedAt: 0, + lastChannel: "whatsapp", + lastProvider: "whatsapp", + lastTo: "+1555", + }); + + const replySpy = vi.spyOn(replyModule, "getReplyFromConfig"); + replySpy.mockResolvedValue({ text: "HEARTBEAT_OK" }); + + await runHeartbeatOnce({ + cfg, + agentId: "ops", + deps: { + getQueueSize: () => 0, + nowMs: () => 0, + }, + }); + + expect(replySpy).toHaveBeenCalledWith( + expect.any(Object), + expect.objectContaining({ + isHeartbeat: true, + timeoutOverrideSeconds: 90, + }), + cfg, + ); + }, + { prefix: "openclaw-hb-timeout-per-agent-" }, + ); + }); +}); diff --git a/src/infra/heartbeat-runner.ts b/src/infra/heartbeat-runner.ts index 5e6ddcf07cf..7f1b48b2e3b 100644 --- a/src/infra/heartbeat-runner.ts +++ b/src/infra/heartbeat-runner.ts @@ -709,14 +709,17 @@ export async function runHeartbeatOnce(opts: { const suppressToolErrorWarnings = heartbeat?.suppressToolErrorWarnings === true; const bootstrapContextMode: "lightweight" | undefined = heartbeat?.lightContext === true ? "lightweight" : undefined; - const replyOpts = heartbeatModelOverride - ? { - isHeartbeat: true, - heartbeatModelOverride, - suppressToolErrorWarnings, - bootstrapContextMode, - } - : { isHeartbeat: true, suppressToolErrorWarnings, bootstrapContextMode }; + const timeoutOverrideSeconds = heartbeat?.timeoutSeconds; + const replyOpts = + heartbeatModelOverride || timeoutOverrideSeconds !== undefined + ? { + isHeartbeat: true, + heartbeatModelOverride, + suppressToolErrorWarnings, + bootstrapContextMode, + ...(timeoutOverrideSeconds !== undefined && { timeoutOverrideSeconds }), + } + : { isHeartbeat: true, suppressToolErrorWarnings, bootstrapContextMode }; const replyResult = await getReplyFromConfig(ctx, replyOpts, cfg); const replyPayload = resolveHeartbeatReplyPayload(replyResult); const includeReasoning = heartbeat?.includeReasoning === true; From 950ed293d35528064991373190d4d0cdb346d636 Mon Sep 17 00:00:00 2001 From: hope Date: Tue, 17 Mar 2026 08:13:01 +0800 Subject: [PATCH 2/3] fix: add missing Zod schema and type definitions for timeoutSeconds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Zod schema: Add timeoutSeconds to HeartbeatSchema (src/config/zod-schema.agent-runtime.ts) - TypeScript types: Add timeoutSeconds to heartbeat config with documentation (src/config/types.agent-defaults.ts) - Runtime: Use conditional spread pattern for heartbeatModelOverride to avoid undefined in object (src/infra/heartbeat-runner.ts) - Fix duplicate includeReasoning field in HeartbeatSchema Addresses Greptile review feedback on PR #48568: 1. Missing Zod schema update — configs with timeoutSeconds would be rejected 2. Missing TypeScript type definition — compilation would fail 3. heartbeatModelOverride: undefined explicitly included when only timeout is set --- src/config/types.agent-defaults.ts | 8 ++++++++ src/config/zod-schema.agent-runtime.ts | 1 + src/infra/heartbeat-runner.ts | 2 +- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/config/types.agent-defaults.ts b/src/config/types.agent-defaults.ts index 604bf88bdcb..363d45b9eae 100644 --- a/src/config/types.agent-defaults.ts +++ b/src/config/types.agent-defaults.ts @@ -269,6 +269,14 @@ export type AgentDefaultsConfig = { * Default: false (only the final heartbeat payload is delivered). */ includeReasoning?: boolean; + /** + * Max timeout in seconds for a heartbeat embedded run. + * Allows heartbeats to fail fast (e.g., 60s) when a model hangs, + * without affecting the global agents.defaults.timeoutSeconds (default 600s). + * + * Default: undefined (inherits agents.defaults.timeoutSeconds). + */ + timeoutSeconds?: number; }; /** Max concurrent agent runs across all conversations. Default: 1 (sequential). */ maxConcurrent?: number; diff --git a/src/config/zod-schema.agent-runtime.ts b/src/config/zod-schema.agent-runtime.ts index 10f0f8637e9..9a40f2ddcbf 100644 --- a/src/config/zod-schema.agent-runtime.ts +++ b/src/config/zod-schema.agent-runtime.ts @@ -35,6 +35,7 @@ export const HeartbeatSchema = z suppressToolErrorWarnings: z.boolean().optional(), lightContext: z.boolean().optional(), isolatedSession: z.boolean().optional(), + timeoutSeconds: z.number().int().positive().optional(), }) .strict() .superRefine((val, ctx) => { diff --git a/src/infra/heartbeat-runner.ts b/src/infra/heartbeat-runner.ts index 7f1b48b2e3b..e51845cdb4e 100644 --- a/src/infra/heartbeat-runner.ts +++ b/src/infra/heartbeat-runner.ts @@ -714,7 +714,7 @@ export async function runHeartbeatOnce(opts: { heartbeatModelOverride || timeoutOverrideSeconds !== undefined ? { isHeartbeat: true, - heartbeatModelOverride, + ...(heartbeatModelOverride !== undefined && { heartbeatModelOverride }), suppressToolErrorWarnings, bootstrapContextMode, ...(timeoutOverrideSeconds !== undefined && { timeoutOverrideSeconds }), From 2b8da13d36600474009af8f67080e931305cfa37 Mon Sep 17 00:00:00 2001 From: hope Date: Tue, 17 Mar 2026 09:04:55 +0800 Subject: [PATCH 3/3] test: fix per-agent timeoutSeconds test to use correct session key - Import resolveAgentMainSessionKey from sessions.js - Use resolveAgentMainSessionKey({ cfg, agentId: 'ops' }) instead of resolveMainSessionKey(cfg) - Matches established pattern in heartbeat-runner.returns-default-unset.test.ts Addresses Greptile review feedback: per-agent test was seeding wrong session key, causing test to pass for wrong reason (missing session triggers unconditional heartbeat). --- src/infra/heartbeat-runner.timeout.test.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/infra/heartbeat-runner.timeout.test.ts b/src/infra/heartbeat-runner.timeout.test.ts index 9ba53d6e2c5..3ffbb3c2454 100644 --- a/src/infra/heartbeat-runner.timeout.test.ts +++ b/src/infra/heartbeat-runner.timeout.test.ts @@ -5,7 +5,7 @@ import { whatsappPlugin } from "../../extensions/whatsapp/src/channel.js"; import { setWhatsAppRuntime } from "../../extensions/whatsapp/src/runtime.js"; import * as replyModule from "../auto-reply/reply.js"; import type { OpenClawConfig } from "../config/config.js"; -import { resolveMainSessionKey } from "../config/sessions.js"; +import { resolveAgentMainSessionKey, resolveMainSessionKey } from "../config/sessions.js"; import { setActivePluginRegistry } from "../plugins/runtime.js"; import { createPluginRuntime } from "../plugins/runtime/index.js"; import { createTestRegistry } from "../test-utils/channel-plugins.js"; @@ -198,7 +198,7 @@ describe("runHeartbeatOnce – timeoutOverrideSeconds passthrough", () => { channels: { whatsapp: { allowFrom: ["*"] } }, session: { store: storePath }, }; - const sessionKey = resolveMainSessionKey(cfg); + const sessionKey = resolveAgentMainSessionKey({ cfg, agentId: "ops" }); await seedSessionStore(storePath, sessionKey, { updatedAt: 0, lastChannel: "whatsapp",