From ade14241ba4a0c03caf115e1673270a8b130f779 Mon Sep 17 00:00:00 2001 From: Bryan Marty Date: Wed, 4 Mar 2026 15:15:36 +0000 Subject: [PATCH 01/25] fix: restore agent resume after self-triggered gateway restart (#12768, #18612) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes two related issues: - #12768: Gateway restart notification - #18612: Agent does not resume after self-triggered gateway restart ## Root cause In ab4a08a82 ('fix: defer gateway restart until all replies are sent'), agentCommand() was replaced with deliverOutboundPayloads() in scheduleRestartSentinelWake(). This fixed a pre-restart race condition (correct) but accidentally made delivery one-way: the user is notified but the agent never sees the restart message and does not resume. A compounding bug meant the sentinel was also being written with stale routing data. extractDeliveryInfo() reads the persisted session store, which heartbeat runs frequently overwrite to { channel: 'webchat', to: 'heartbeat' } — an internal sink. So even restoring agentCommand() alone would fail: the sentinel's deliveryContext was pointing nowhere. ## Fix (four parts) **Part 1 — src/agents/openclaw-tools.ts** Forward the live delivery fields (agentChannel, agentTo, agentThreadId, agentAccountId) from createOpenClawTools() into createGatewayTool(). These values are captured from the current inbound message context and are accurate; they were available at the callsite but not being passed. **Part 2 — src/agents/tools/gateway-tool.ts** Prefer liveDeliveryContext (built from opts.agentChannel / agentTo / agentAccountId) over extractDeliveryInfo() when writing the sentinel. Pass deliveryContext in config.apply, config.patch, and update.run RPC calls so the server-side handlers receive it. **Part 3 — src/gateway/server-restart-sentinel.ts** Restore agentCommand() in place of deliverOutboundPayloads(). The function runs in the new process post-restart, where there are zero in-flight replies, so the pre-restart race condition does not apply. agentCommand() creates a full agent turn: the restart message is delivered to the user AND the agent sees it in its conversation history, allowing it to resume without waiting for the user to send a new message. **Part 4 — src/gateway/protocol/schema/config.ts** Add deliveryContext as an optional field to ConfigApplyLikeParamsSchema (shared by config.apply and config.patch) and UpdateRunParamsSchema. The additionalProperties: false constraint was silently dropping the field before it reached the server-side handlers. Also updated resolveConfigRestartRequest() in config.ts and the update.run handler in update.ts to prefer params.deliveryContext over extractDeliveryInfo(). ## Why the heartbeat approach fails An alternative approach (requestHeartbeatNow + enqueueSystemEvent) was tested and rejected: the heartbeat does fire, but its delivery target comes from the session store (lastChannel/lastTo), which reads 'webchat/heartbeat' due to heartbeat contamination. Responses route to an internal sink and are silently dropped. agentCommand() is the correct tool because it creates a turn with explicit delivery context attached. --- src/agents/openclaw-tools.ts | 4 ++ src/agents/tools/gateway-tool.ts | 52 ++++++++++++++--- src/gateway/protocol/schema/config.ts | 18 ++++++ src/gateway/server-methods/config.ts | 36 +++++++++++- src/gateway/server-methods/update.ts | 17 +++++- src/gateway/server-restart-sentinel.test.ts | 65 ++++++++++++++++++--- src/gateway/server-restart-sentinel.ts | 51 ++++++++-------- 7 files changed, 199 insertions(+), 44 deletions(-) diff --git a/src/agents/openclaw-tools.ts b/src/agents/openclaw-tools.ts index de5e91fdf0c..631d59d8a2a 100644 --- a/src/agents/openclaw-tools.ts +++ b/src/agents/openclaw-tools.ts @@ -175,6 +175,10 @@ export function createOpenClawTools( ...(imageGenerateTool ? [imageGenerateTool] : []), createGatewayTool({ agentSessionKey: options?.agentSessionKey, + agentChannel: options?.agentChannel != null ? String(options.agentChannel) : undefined, + agentTo: options?.agentTo, + agentThreadId: options?.agentThreadId, + agentAccountId: options?.agentAccountId, config: options?.config, }), createAgentsListTool({ diff --git a/src/agents/tools/gateway-tool.ts b/src/agents/tools/gateway-tool.ts index 33b8d86adcf..d4b7b707dd6 100644 --- a/src/agents/tools/gateway-tool.ts +++ b/src/agents/tools/gateway-tool.ts @@ -69,6 +69,10 @@ const GatewayToolSchema = Type.Object({ export function createGatewayTool(opts?: { agentSessionKey?: string; + agentChannel?: string; + agentTo?: string; + agentThreadId?: string | number; + agentAccountId?: string; config?: OpenClawConfig; }): AnyAgentTool { return { @@ -99,9 +103,24 @@ export function createGatewayTool(opts?: { : undefined; const note = typeof params.note === "string" && params.note.trim() ? params.note.trim() : undefined; - // Extract channel + threadId for routing after restart - // Supports both :thread: (most channels) and :topic: (Telegram) - const { deliveryContext, threadId } = extractDeliveryInfo(sessionKey); + // Prefer the live delivery context captured during the current agent + // run over extractDeliveryInfo() (which reads the persisted session + // store). The session store is frequently overwritten by heartbeat + // runs to { channel: "webchat", to: "heartbeat" }, causing the + // sentinel to write stale routing data that fails post-restart. + // See #18612. + const liveContext = + opts?.agentChannel != null && String(opts.agentChannel).trim() + ? { + channel: String(opts.agentChannel).trim(), + to: opts?.agentTo ?? undefined, + accountId: opts?.agentAccountId ?? undefined, + } + : undefined; + const extracted = extractDeliveryInfo(sessionKey); + const deliveryContext = liveContext ?? extracted.deliveryContext; + const threadId = + opts?.agentThreadId != null ? String(opts.agentThreadId) : extracted.threadId; const payload: RestartSentinelPayload = { kind: "restart", status: "ok", @@ -133,10 +152,25 @@ export function createGatewayTool(opts?: { const gatewayOpts = readGatewayCallOptions(params); + // Build the live delivery context from the current agent run's routing + // fields. This is passed to server-side handlers so they can write an + // accurate sentinel without reading the (potentially stale) session + // store. The store is frequently overwritten by heartbeat runs to + // { channel: "webchat", to: "heartbeat" }. See #18612. + const liveDeliveryContextForRpc = + opts?.agentChannel != null && String(opts.agentChannel).trim() + ? { + channel: String(opts.agentChannel).trim(), + to: opts?.agentTo ?? undefined, + accountId: opts?.agentAccountId ?? undefined, + } + : undefined; + const resolveGatewayWriteMeta = (): { sessionKey: string | undefined; note: string | undefined; restartDelayMs: number | undefined; + deliveryContext: typeof liveDeliveryContextForRpc; } => { const sessionKey = typeof params.sessionKey === "string" && params.sessionKey.trim() @@ -148,7 +182,7 @@ export function createGatewayTool(opts?: { typeof params.restartDelayMs === "number" && Number.isFinite(params.restartDelayMs) ? Math.floor(params.restartDelayMs) : undefined; - return { sessionKey, note, restartDelayMs }; + return { sessionKey, note, restartDelayMs, deliveryContext: liveDeliveryContextForRpc }; }; const resolveConfigWriteParams = async (): Promise<{ @@ -157,6 +191,7 @@ export function createGatewayTool(opts?: { sessionKey: string | undefined; note: string | undefined; restartDelayMs: number | undefined; + deliveryContext: typeof liveDeliveryContextForRpc; }> => { const raw = readStringParam(params, "raw", { required: true }); let baseHash = readStringParam(params, "baseHash"); @@ -183,7 +218,7 @@ export function createGatewayTool(opts?: { return jsonResult({ ok: true, result }); } if (action === "config.apply") { - const { raw, baseHash, sessionKey, note, restartDelayMs } = + const { raw, baseHash, sessionKey, note, restartDelayMs, deliveryContext } = await resolveConfigWriteParams(); const result = await callGatewayTool("config.apply", gatewayOpts, { raw, @@ -191,11 +226,12 @@ export function createGatewayTool(opts?: { sessionKey, note, restartDelayMs, + deliveryContext, }); return jsonResult({ ok: true, result }); } if (action === "config.patch") { - const { raw, baseHash, sessionKey, note, restartDelayMs } = + const { raw, baseHash, sessionKey, note, restartDelayMs, deliveryContext } = await resolveConfigWriteParams(); const result = await callGatewayTool("config.patch", gatewayOpts, { raw, @@ -203,11 +239,12 @@ export function createGatewayTool(opts?: { sessionKey, note, restartDelayMs, + deliveryContext, }); return jsonResult({ ok: true, result }); } if (action === "update.run") { - const { sessionKey, note, restartDelayMs } = resolveGatewayWriteMeta(); + const { sessionKey, note, restartDelayMs, deliveryContext } = resolveGatewayWriteMeta(); const updateTimeoutMs = gatewayOpts.timeoutMs ?? DEFAULT_UPDATE_TIMEOUT_MS; const updateGatewayOpts = { ...gatewayOpts, @@ -217,6 +254,7 @@ export function createGatewayTool(opts?: { sessionKey, note, restartDelayMs, + deliveryContext, timeoutMs: updateTimeoutMs, }); return jsonResult({ ok: true, result }); diff --git a/src/gateway/protocol/schema/config.ts b/src/gateway/protocol/schema/config.ts index 9d0ec876668..7b97b6864e1 100644 --- a/src/gateway/protocol/schema/config.ts +++ b/src/gateway/protocol/schema/config.ts @@ -17,6 +17,22 @@ export const ConfigSetParamsSchema = Type.Object( { additionalProperties: false }, ); +// deliveryContext carries the live channel/to/accountId from the agent run so +// that the restart sentinel has accurate routing data even after heartbeats +// have overwritten the session store with { channel: "webchat", to: "heartbeat" }. +// Without this field, the additionalProperties: false constraint silently drops +// it and the sentinel falls back to stale session store data. See #18612. +const DeliveryContextSchema = Type.Optional( + Type.Object( + { + channel: Type.Optional(Type.String()), + to: Type.Optional(Type.String()), + accountId: Type.Optional(Type.String()), + }, + { additionalProperties: false }, + ), +); + const ConfigApplyLikeParamsSchema = Type.Object( { raw: NonEmptyString, @@ -24,6 +40,7 @@ const ConfigApplyLikeParamsSchema = Type.Object( sessionKey: Type.Optional(Type.String()), note: Type.Optional(Type.String()), restartDelayMs: Type.Optional(Type.Integer({ minimum: 0 })), + deliveryContext: DeliveryContextSchema, }, { additionalProperties: false }, ); @@ -46,6 +63,7 @@ export const UpdateRunParamsSchema = Type.Object( note: Type.Optional(Type.String()), restartDelayMs: Type.Optional(Type.Integer({ minimum: 0 })), timeoutMs: Type.Optional(Type.Integer({ minimum: 1 })), + deliveryContext: DeliveryContextSchema, }, { additionalProperties: false }, ); diff --git a/src/gateway/server-methods/config.ts b/src/gateway/server-methods/config.ts index 977a59f00b5..c6a55a87315 100644 --- a/src/gateway/server-methods/config.ts +++ b/src/gateway/server-methods/config.ts @@ -194,9 +194,14 @@ function resolveConfigRestartRequest(params: unknown): { } { const { sessionKey, note, restartDelayMs } = parseRestartRequestParams(params); - // Extract deliveryContext + threadId for routing after restart - // Supports both :thread: (most channels) and :topic: (Telegram) - const { deliveryContext, threadId } = extractDeliveryInfo(sessionKey); + // Extract threadId from the session key (reliable — derived from key, not store). + // For deliveryContext, prefer the live context passed by the client over + // extractDeliveryInfo(), which reads the persisted session store. Heartbeat + // runs overwrite the store to { channel: "webchat", to: "heartbeat" }, so + // reading it here would produce stale routing data. See #18612. + const { deliveryContext: extractedDeliveryContext, threadId } = extractDeliveryInfo(sessionKey); + const paramsDeliveryContext = parseDeliveryContextFromParams(params); + const deliveryContext = paramsDeliveryContext ?? extractedDeliveryContext; return { sessionKey, @@ -207,6 +212,31 @@ function resolveConfigRestartRequest(params: unknown): { }; } +function parseDeliveryContextFromParams( + params: unknown, +): { channel?: string; to?: string; accountId?: string } | undefined { + const raw = (params as { deliveryContext?: unknown }).deliveryContext; + if (!raw || typeof raw !== "object") { + return undefined; + } + const channel = + typeof (raw as { channel?: unknown }).channel === "string" + ? (raw as { channel: string }).channel.trim() || undefined + : undefined; + const to = + typeof (raw as { to?: unknown }).to === "string" + ? (raw as { to: string }).to.trim() || undefined + : undefined; + const accountId = + typeof (raw as { accountId?: unknown }).accountId === "string" + ? (raw as { accountId: string }).accountId.trim() || undefined + : undefined; + if (!channel && !to) { + return undefined; + } + return { channel, to, accountId }; +} + function buildConfigRestartSentinelPayload(params: { kind: RestartSentinelPayload["kind"]; mode: string; diff --git a/src/gateway/server-methods/update.ts b/src/gateway/server-methods/update.ts index bf53e2a0227..148b80af1e2 100644 --- a/src/gateway/server-methods/update.ts +++ b/src/gateway/server-methods/update.ts @@ -22,7 +22,22 @@ export const updateHandlers: GatewayRequestHandlers = { } const actor = resolveControlPlaneActor(client); const { sessionKey, note, restartDelayMs } = parseRestartRequestParams(params); - const { deliveryContext, threadId } = extractDeliveryInfo(sessionKey); + // Prefer live deliveryContext from params over extractDeliveryInfo() (see #18612). + const paramsDeliveryContextRaw = (params as { deliveryContext?: unknown }).deliveryContext; + const paramsDeliveryContext = + paramsDeliveryContextRaw && typeof paramsDeliveryContextRaw === "object" + ? (() => { + const dc = paramsDeliveryContextRaw as Record; + const channel = + typeof dc.channel === "string" ? dc.channel.trim() || undefined : undefined; + const to = typeof dc.to === "string" ? dc.to.trim() || undefined : undefined; + const accountId = + typeof dc.accountId === "string" ? dc.accountId.trim() || undefined : undefined; + return channel || to ? { channel, to, accountId } : undefined; + })() + : undefined; + const { deliveryContext: extractedDeliveryContext, threadId } = extractDeliveryInfo(sessionKey); + const deliveryContext = paramsDeliveryContext ?? extractedDeliveryContext; const timeoutMsRaw = (params as { timeoutMs?: unknown }).timeoutMs; const timeoutMs = typeof timeoutMsRaw === "number" && Number.isFinite(timeoutMsRaw) diff --git a/src/gateway/server-restart-sentinel.test.ts b/src/gateway/server-restart-sentinel.test.ts index 187698b06ed..bcb7a57ebd7 100644 --- a/src/gateway/server-restart-sentinel.test.ts +++ b/src/gateway/server-restart-sentinel.test.ts @@ -25,8 +25,9 @@ const mocks = vi.hoisted(() => ({ })), normalizeChannelId: vi.fn((channel: string) => channel), resolveOutboundTarget: vi.fn(() => ({ ok: true as const, to: "+15550002" })), - deliverOutboundPayloads: vi.fn(async () => []), + agentCommand: vi.fn(async () => undefined), enqueueSystemEvent: vi.fn(), + defaultRuntime: {}, })); vi.mock("../agents/agent-scope.js", () => ({ @@ -68,8 +69,12 @@ vi.mock("../infra/outbound/targets.js", () => ({ resolveOutboundTarget: mocks.resolveOutboundTarget, })); -vi.mock("../infra/outbound/deliver.js", () => ({ - deliverOutboundPayloads: mocks.deliverOutboundPayloads, +vi.mock("../commands/agent.js", () => ({ + agentCommand: mocks.agentCommand, +})); + +vi.mock("../runtime.js", () => ({ + defaultRuntime: mocks.defaultRuntime, })); vi.mock("../infra/system-events.js", () => ({ @@ -79,16 +84,62 @@ vi.mock("../infra/system-events.js", () => ({ const { scheduleRestartSentinelWake } = await import("./server-restart-sentinel.js"); describe("scheduleRestartSentinelWake", () => { - it("forwards session context to outbound delivery", async () => { + it("calls agentCommand with resolved channel, to, and sessionKey after restart", async () => { await scheduleRestartSentinelWake({ deps: {} as never }); - expect(mocks.deliverOutboundPayloads).toHaveBeenCalledWith( + expect(mocks.agentCommand).toHaveBeenCalledWith( expect.objectContaining({ - channel: "whatsapp", + message: "restart message", + sessionKey: "agent:main:main", to: "+15550002", - session: { key: "agent:main:main", agentId: "agent-from-key" }, + channel: "whatsapp", + deliver: true, + bestEffortDeliver: true, + messageChannel: "whatsapp", + accountId: "acct-2", }), + mocks.defaultRuntime, + {}, ); expect(mocks.enqueueSystemEvent).not.toHaveBeenCalled(); }); + + it("falls back to enqueueSystemEvent when agentCommand throws", async () => { + mocks.agentCommand.mockRejectedValueOnce(new Error("agent failed")); + mocks.enqueueSystemEvent.mockClear(); + + await scheduleRestartSentinelWake({ deps: {} as never }); + + expect(mocks.enqueueSystemEvent).toHaveBeenCalledWith( + expect.stringContaining("restart summary"), + { sessionKey: "agent:main:main" }, + ); + }); + + it("falls back to enqueueSystemEvent when channel cannot be resolved (no channel in origin)", async () => { + mocks.resolveOutboundTarget.mockReturnValueOnce({ + ok: false, + error: new Error("no-target"), + } as never); + mocks.agentCommand.mockClear(); + mocks.enqueueSystemEvent.mockClear(); + + await scheduleRestartSentinelWake({ deps: {} as never }); + + expect(mocks.agentCommand).not.toHaveBeenCalled(); + expect(mocks.enqueueSystemEvent).toHaveBeenCalledWith("restart message", { + sessionKey: "agent:main:main", + }); + }); + + it("falls back to enqueueSystemEvent on main session key when sentinel has no sessionKey", async () => { + mocks.consumeRestartSentinel.mockResolvedValueOnce({ payload: { sessionKey: "" } } as never); + mocks.enqueueSystemEvent.mockClear(); + + await scheduleRestartSentinelWake({ deps: {} as never }); + + expect(mocks.enqueueSystemEvent).toHaveBeenCalledWith("restart message", { + sessionKey: "agent:main:main", + }); + }); }); diff --git a/src/gateway/server-restart-sentinel.ts b/src/gateway/server-restart-sentinel.ts index e6191942dba..dadf596d6ad 100644 --- a/src/gateway/server-restart-sentinel.ts +++ b/src/gateway/server-restart-sentinel.ts @@ -1,10 +1,9 @@ import { resolveAnnounceTargetFromKey } from "../agents/tools/sessions-send-helpers.js"; import { normalizeChannelId } from "../channels/plugins/index.js"; import type { CliDeps } from "../cli/deps.js"; +import { agentCommand } from "../commands/agent.js"; import { resolveMainSessionKeyFromConfig } from "../config/sessions.js"; import { parseSessionThreadInfo } from "../config/sessions/delivery-info.js"; -import { deliverOutboundPayloads } from "../infra/outbound/deliver.js"; -import { buildOutboundSessionContext } from "../infra/outbound/session-context.js"; import { resolveOutboundTarget } from "../infra/outbound/targets.js"; import { consumeRestartSentinel, @@ -12,10 +11,11 @@ import { summarizeRestartSentinel, } from "../infra/restart-sentinel.js"; import { enqueueSystemEvent } from "../infra/system-events.js"; +import { defaultRuntime } from "../runtime.js"; import { deliveryContextFromSession, mergeDeliveryContext } from "../utils/delivery-context.js"; import { loadSessionEntry } from "./session-utils.js"; -export async function scheduleRestartSentinelWake(_params: { deps: CliDeps }) { +export async function scheduleRestartSentinelWake(params: { deps: CliDeps }) { const sentinel = await consumeRestartSentinel(); if (!sentinel) { return; @@ -76,30 +76,29 @@ export async function scheduleRestartSentinelWake(_params: { deps: CliDeps }) { sessionThreadId ?? (origin?.threadId != null ? String(origin.threadId) : undefined); - // Slack uses replyToId (thread_ts) for threading, not threadId. - // The reply path does this mapping but deliverOutboundPayloads does not, - // so we must convert here to ensure post-restart notifications land in - // the originating Slack thread. See #17716. - const isSlack = channel === "slack"; - const replyToId = isSlack && threadId != null && threadId !== "" ? String(threadId) : undefined; - const resolvedThreadId = isSlack ? undefined : threadId; - const outboundSession = buildOutboundSessionContext({ - cfg, - sessionKey, - }); - try { - await deliverOutboundPayloads({ - cfg, - channel, - to: resolved.to, - accountId: origin?.accountId, - replyToId, - threadId: resolvedThreadId, - payloads: [{ text: message }], - session: outboundSession, - bestEffort: true, - }); + // Use agentCommand() rather than deliverOutboundPayloads() so the restart + // message is a proper agent turn: the user is notified AND the agent sees + // the message in its conversation history and can resume autonomously. + // + // This is safe post-restart because scheduleRestartSentinelWake() runs in + // the new process, where there are zero in-flight replies. The pre-restart + // race condition fixed in ab4a08a82 does not apply here. + await agentCommand( + { + message, + sessionKey, + to: resolved.to, + channel, + deliver: true, + bestEffortDeliver: true, + messageChannel: channel, + threadId, + accountId: origin?.accountId, + }, + defaultRuntime, + params.deps, + ); } catch (err) { enqueueSystemEvent(`${summary}\n${String(err)}`, { sessionKey }); } From d000b6aa762151eba17e7dee84ab4ab6161e27f9 Mon Sep 17 00:00:00 2001 From: Bryan Marty Date: Wed, 4 Mar 2026 15:36:42 +0000 Subject: [PATCH 02/25] chore: regenerate protocol bindings for deliveryContext schema addition Updates generated Swift bindings (GatewayModels.swift) to include the deliveryContext field added to ConfigApplyLikeParamsSchema and UpdateRunParamsSchema in src/gateway/protocol/schema/config.ts. --- .../OpenClawProtocol/GatewayModels.swift | 18 +++++++++++++++--- .../OpenClawProtocol/GatewayModels.swift | 18 +++++++++++++++--- 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/apps/macos/Sources/OpenClawProtocol/GatewayModels.swift b/apps/macos/Sources/OpenClawProtocol/GatewayModels.swift index 0b1d7b13e01..e01267c889f 100644 --- a/apps/macos/Sources/OpenClawProtocol/GatewayModels.swift +++ b/apps/macos/Sources/OpenClawProtocol/GatewayModels.swift @@ -1656,19 +1656,22 @@ public struct ConfigApplyParams: Codable, Sendable { public let sessionkey: String? public let note: String? public let restartdelayms: Int? + public let deliverycontext: [String: AnyCodable]? public init( raw: String, basehash: String?, sessionkey: String?, note: String?, - restartdelayms: Int?) + restartdelayms: Int?, + deliverycontext: [String: AnyCodable]?) { self.raw = raw self.basehash = basehash self.sessionkey = sessionkey self.note = note self.restartdelayms = restartdelayms + self.deliverycontext = deliverycontext } private enum CodingKeys: String, CodingKey { @@ -1677,6 +1680,7 @@ public struct ConfigApplyParams: Codable, Sendable { case sessionkey = "sessionKey" case note case restartdelayms = "restartDelayMs" + case deliverycontext = "deliveryContext" } } @@ -1686,19 +1690,22 @@ public struct ConfigPatchParams: Codable, Sendable { public let sessionkey: String? public let note: String? public let restartdelayms: Int? + public let deliverycontext: [String: AnyCodable]? public init( raw: String, basehash: String?, sessionkey: String?, note: String?, - restartdelayms: Int?) + restartdelayms: Int?, + deliverycontext: [String: AnyCodable]?) { self.raw = raw self.basehash = basehash self.sessionkey = sessionkey self.note = note self.restartdelayms = restartdelayms + self.deliverycontext = deliverycontext } private enum CodingKeys: String, CodingKey { @@ -1707,6 +1714,7 @@ public struct ConfigPatchParams: Codable, Sendable { case sessionkey = "sessionKey" case note case restartdelayms = "restartDelayMs" + case deliverycontext = "deliveryContext" } } @@ -3709,17 +3717,20 @@ public struct UpdateRunParams: Codable, Sendable { public let note: String? public let restartdelayms: Int? public let timeoutms: Int? + public let deliverycontext: [String: AnyCodable]? public init( sessionkey: String?, note: String?, restartdelayms: Int?, - timeoutms: Int?) + timeoutms: Int?, + deliverycontext: [String: AnyCodable]?) { self.sessionkey = sessionkey self.note = note self.restartdelayms = restartdelayms self.timeoutms = timeoutms + self.deliverycontext = deliverycontext } private enum CodingKeys: String, CodingKey { @@ -3727,6 +3738,7 @@ public struct UpdateRunParams: Codable, Sendable { case note case restartdelayms = "restartDelayMs" case timeoutms = "timeoutMs" + case deliverycontext = "deliveryContext" } } diff --git a/apps/shared/OpenClawKit/Sources/OpenClawProtocol/GatewayModels.swift b/apps/shared/OpenClawKit/Sources/OpenClawProtocol/GatewayModels.swift index 0b1d7b13e01..e01267c889f 100644 --- a/apps/shared/OpenClawKit/Sources/OpenClawProtocol/GatewayModels.swift +++ b/apps/shared/OpenClawKit/Sources/OpenClawProtocol/GatewayModels.swift @@ -1656,19 +1656,22 @@ public struct ConfigApplyParams: Codable, Sendable { public let sessionkey: String? public let note: String? public let restartdelayms: Int? + public let deliverycontext: [String: AnyCodable]? public init( raw: String, basehash: String?, sessionkey: String?, note: String?, - restartdelayms: Int?) + restartdelayms: Int?, + deliverycontext: [String: AnyCodable]?) { self.raw = raw self.basehash = basehash self.sessionkey = sessionkey self.note = note self.restartdelayms = restartdelayms + self.deliverycontext = deliverycontext } private enum CodingKeys: String, CodingKey { @@ -1677,6 +1680,7 @@ public struct ConfigApplyParams: Codable, Sendable { case sessionkey = "sessionKey" case note case restartdelayms = "restartDelayMs" + case deliverycontext = "deliveryContext" } } @@ -1686,19 +1690,22 @@ public struct ConfigPatchParams: Codable, Sendable { public let sessionkey: String? public let note: String? public let restartdelayms: Int? + public let deliverycontext: [String: AnyCodable]? public init( raw: String, basehash: String?, sessionkey: String?, note: String?, - restartdelayms: Int?) + restartdelayms: Int?, + deliverycontext: [String: AnyCodable]?) { self.raw = raw self.basehash = basehash self.sessionkey = sessionkey self.note = note self.restartdelayms = restartdelayms + self.deliverycontext = deliverycontext } private enum CodingKeys: String, CodingKey { @@ -1707,6 +1714,7 @@ public struct ConfigPatchParams: Codable, Sendable { case sessionkey = "sessionKey" case note case restartdelayms = "restartDelayMs" + case deliverycontext = "deliveryContext" } } @@ -3709,17 +3717,20 @@ public struct UpdateRunParams: Codable, Sendable { public let note: String? public let restartdelayms: Int? public let timeoutms: Int? + public let deliverycontext: [String: AnyCodable]? public init( sessionkey: String?, note: String?, restartdelayms: Int?, - timeoutms: Int?) + timeoutms: Int?, + deliverycontext: [String: AnyCodable]?) { self.sessionkey = sessionkey self.note = note self.restartdelayms = restartdelayms self.timeoutms = timeoutms + self.deliverycontext = deliverycontext } private enum CodingKeys: String, CodingKey { @@ -3727,6 +3738,7 @@ public struct UpdateRunParams: Codable, Sendable { case note case restartdelayms = "restartDelayMs" case timeoutms = "timeoutMs" + case deliverycontext = "deliveryContext" } } From 5ffdcd19d2806052cc07b3128e296fee87549d24 Mon Sep 17 00:00:00 2001 From: Bryan Marty Date: Wed, 4 Mar 2026 15:52:42 +0000 Subject: [PATCH 03/25] ci: retrigger checks From db219052918f3673fed60c55136a1c2972a0f3e1 Mon Sep 17 00:00:00 2001 From: Bryan Marty Date: Wed, 4 Mar 2026 17:53:36 +0000 Subject: [PATCH 04/25] refactor: address code review feedback on restart sentinel fix - Extract parseDeliveryContextFromParams() into restart-request.ts and import it in both config.ts and update.ts, eliminating the duplicated inline IIFE parsing in update.ts - Add comment in gateway-tool.ts explaining why agentThreadId is intentionally excluded from liveDeliveryContextForRpc: threadId is reliably derived server-side from the session key via parseSessionThreadInfo() and is not subject to heartbeat contamination - Add beforeEach(vi.clearAllMocks) to server-restart-sentinel.test.ts and remove ad-hoc mockClear() calls from individual tests to prevent mock state from leaking between test cases --- src/agents/tools/gateway-tool.ts | 6 ++++ src/gateway/server-methods/config.ts | 27 +-------------- src/gateway/server-methods/restart-request.ts | 33 +++++++++++++++++++ src/gateway/server-methods/update.ts | 17 ++-------- src/gateway/server-restart-sentinel.test.ts | 10 +++--- 5 files changed, 47 insertions(+), 46 deletions(-) diff --git a/src/agents/tools/gateway-tool.ts b/src/agents/tools/gateway-tool.ts index d4b7b707dd6..48f924f0ae4 100644 --- a/src/agents/tools/gateway-tool.ts +++ b/src/agents/tools/gateway-tool.ts @@ -157,6 +157,12 @@ export function createGatewayTool(opts?: { // accurate sentinel without reading the (potentially stale) session // store. The store is frequently overwritten by heartbeat runs to // { channel: "webchat", to: "heartbeat" }. See #18612. + // + // Note: agentThreadId is intentionally excluded here. threadId is + // reliably derived server-side from the session key (via + // parseSessionThreadInfo), which encodes it as :thread:N or :topic:N. + // That parsing is not subject to heartbeat contamination, so there is + // no need to forward it through the RPC params. const liveDeliveryContextForRpc = opts?.agentChannel != null && String(opts.agentChannel).trim() ? { diff --git a/src/gateway/server-methods/config.ts b/src/gateway/server-methods/config.ts index c6a55a87315..20cb8e37da9 100644 --- a/src/gateway/server-methods/config.ts +++ b/src/gateway/server-methods/config.ts @@ -52,7 +52,7 @@ import { validateConfigSetParams, } from "../protocol/index.js"; import { resolveBaseHashParam } from "./base-hash.js"; -import { parseRestartRequestParams } from "./restart-request.js"; +import { parseDeliveryContextFromParams, parseRestartRequestParams } from "./restart-request.js"; import type { GatewayRequestHandlers, RespondFn } from "./types.js"; import { assertValidParams } from "./validation.js"; @@ -212,31 +212,6 @@ function resolveConfigRestartRequest(params: unknown): { }; } -function parseDeliveryContextFromParams( - params: unknown, -): { channel?: string; to?: string; accountId?: string } | undefined { - const raw = (params as { deliveryContext?: unknown }).deliveryContext; - if (!raw || typeof raw !== "object") { - return undefined; - } - const channel = - typeof (raw as { channel?: unknown }).channel === "string" - ? (raw as { channel: string }).channel.trim() || undefined - : undefined; - const to = - typeof (raw as { to?: unknown }).to === "string" - ? (raw as { to: string }).to.trim() || undefined - : undefined; - const accountId = - typeof (raw as { accountId?: unknown }).accountId === "string" - ? (raw as { accountId: string }).accountId.trim() || undefined - : undefined; - if (!channel && !to) { - return undefined; - } - return { channel, to, accountId }; -} - function buildConfigRestartSentinelPayload(params: { kind: RestartSentinelPayload["kind"]; mode: string; diff --git a/src/gateway/server-methods/restart-request.ts b/src/gateway/server-methods/restart-request.ts index f8b2ddb8c0d..e07395803aa 100644 --- a/src/gateway/server-methods/restart-request.ts +++ b/src/gateway/server-methods/restart-request.ts @@ -1,3 +1,36 @@ +/** + * Parse the live deliveryContext passed by gateway-tool clients. + * + * Clients capture delivery context from the active agent run and forward it + * so server-side handlers can write an accurate sentinel without reading the + * persisted session store, which heartbeat runs frequently overwrite to + * { channel: "webchat", to: "heartbeat" }. See #18612. + */ +export function parseDeliveryContextFromParams( + params: unknown, +): { channel?: string; to?: string; accountId?: string } | undefined { + const raw = (params as { deliveryContext?: unknown }).deliveryContext; + if (!raw || typeof raw !== "object") { + return undefined; + } + const channel = + typeof (raw as { channel?: unknown }).channel === "string" + ? (raw as { channel: string }).channel.trim() || undefined + : undefined; + const to = + typeof (raw as { to?: unknown }).to === "string" + ? (raw as { to: string }).to.trim() || undefined + : undefined; + const accountId = + typeof (raw as { accountId?: unknown }).accountId === "string" + ? (raw as { accountId: string }).accountId.trim() || undefined + : undefined; + if (!channel && !to) { + return undefined; + } + return { channel, to, accountId }; +} + export function parseRestartRequestParams(params: unknown): { sessionKey: string | undefined; note: string | undefined; diff --git a/src/gateway/server-methods/update.ts b/src/gateway/server-methods/update.ts index 148b80af1e2..4b6a483bb99 100644 --- a/src/gateway/server-methods/update.ts +++ b/src/gateway/server-methods/update.ts @@ -11,7 +11,7 @@ import { normalizeUpdateChannel } from "../../infra/update-channels.js"; import { runGatewayUpdate } from "../../infra/update-runner.js"; import { formatControlPlaneActor, resolveControlPlaneActor } from "../control-plane-audit.js"; import { validateUpdateRunParams } from "../protocol/index.js"; -import { parseRestartRequestParams } from "./restart-request.js"; +import { parseDeliveryContextFromParams, parseRestartRequestParams } from "./restart-request.js"; import type { GatewayRequestHandlers } from "./types.js"; import { assertValidParams } from "./validation.js"; @@ -23,21 +23,8 @@ export const updateHandlers: GatewayRequestHandlers = { const actor = resolveControlPlaneActor(client); const { sessionKey, note, restartDelayMs } = parseRestartRequestParams(params); // Prefer live deliveryContext from params over extractDeliveryInfo() (see #18612). - const paramsDeliveryContextRaw = (params as { deliveryContext?: unknown }).deliveryContext; - const paramsDeliveryContext = - paramsDeliveryContextRaw && typeof paramsDeliveryContextRaw === "object" - ? (() => { - const dc = paramsDeliveryContextRaw as Record; - const channel = - typeof dc.channel === "string" ? dc.channel.trim() || undefined : undefined; - const to = typeof dc.to === "string" ? dc.to.trim() || undefined : undefined; - const accountId = - typeof dc.accountId === "string" ? dc.accountId.trim() || undefined : undefined; - return channel || to ? { channel, to, accountId } : undefined; - })() - : undefined; const { deliveryContext: extractedDeliveryContext, threadId } = extractDeliveryInfo(sessionKey); - const deliveryContext = paramsDeliveryContext ?? extractedDeliveryContext; + const deliveryContext = parseDeliveryContextFromParams(params) ?? extractedDeliveryContext; const timeoutMsRaw = (params as { timeoutMs?: unknown }).timeoutMs; const timeoutMs = typeof timeoutMsRaw === "number" && Number.isFinite(timeoutMsRaw) diff --git a/src/gateway/server-restart-sentinel.test.ts b/src/gateway/server-restart-sentinel.test.ts index bcb7a57ebd7..c247e8d0d6a 100644 --- a/src/gateway/server-restart-sentinel.test.ts +++ b/src/gateway/server-restart-sentinel.test.ts @@ -1,4 +1,4 @@ -import { describe, expect, it, vi } from "vitest"; +import { beforeEach, describe, expect, it, vi } from "vitest"; const mocks = vi.hoisted(() => ({ resolveSessionAgentId: vi.fn(() => "agent-from-key"), @@ -84,6 +84,10 @@ vi.mock("../infra/system-events.js", () => ({ const { scheduleRestartSentinelWake } = await import("./server-restart-sentinel.js"); describe("scheduleRestartSentinelWake", () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + it("calls agentCommand with resolved channel, to, and sessionKey after restart", async () => { await scheduleRestartSentinelWake({ deps: {} as never }); @@ -106,7 +110,6 @@ describe("scheduleRestartSentinelWake", () => { it("falls back to enqueueSystemEvent when agentCommand throws", async () => { mocks.agentCommand.mockRejectedValueOnce(new Error("agent failed")); - mocks.enqueueSystemEvent.mockClear(); await scheduleRestartSentinelWake({ deps: {} as never }); @@ -121,8 +124,6 @@ describe("scheduleRestartSentinelWake", () => { ok: false, error: new Error("no-target"), } as never); - mocks.agentCommand.mockClear(); - mocks.enqueueSystemEvent.mockClear(); await scheduleRestartSentinelWake({ deps: {} as never }); @@ -134,7 +135,6 @@ describe("scheduleRestartSentinelWake", () => { it("falls back to enqueueSystemEvent on main session key when sentinel has no sessionKey", async () => { mocks.consumeRestartSentinel.mockResolvedValueOnce({ payload: { sessionKey: "" } } as never); - mocks.enqueueSystemEvent.mockClear(); await scheduleRestartSentinelWake({ deps: {} as never }); From 292546cb097124ef1fad86017e980cc593fdbc92 Mon Sep 17 00:00:00 2001 From: Bryan Marty Date: Wed, 4 Mar 2026 17:59:31 +0000 Subject: [PATCH 05/25] fix: only forward live delivery context when targeting own session When a gateway tool call (restart, config.apply, config.patch, update.run) specifies an explicit sessionKey that differs from the current agent's session, the live delivery context (agentChannel/agentTo/agentAccountId) belongs to the wrong session and would misroute post-restart replies. Only set deliveryContext in the sentinel/RPC params when: - No explicit sessionKey is provided (falls back to own session), or - The explicit sessionKey matches the current agent's session key Otherwise omit deliveryContext so the server falls back to extractDeliveryInfo(sessionKey), which correctly resolves routing for the target session. --- src/agents/tools/gateway-tool.ts | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/src/agents/tools/gateway-tool.ts b/src/agents/tools/gateway-tool.ts index 48f924f0ae4..800414f92ea 100644 --- a/src/agents/tools/gateway-tool.ts +++ b/src/agents/tools/gateway-tool.ts @@ -89,10 +89,11 @@ export function createGatewayTool(opts?: { if (!isRestartEnabled(opts?.config)) { throw new Error("Gateway restart is disabled (commands.restart=false)."); } - const sessionKey = + const explicitSessionKey = typeof params.sessionKey === "string" && params.sessionKey.trim() ? params.sessionKey.trim() - : opts?.agentSessionKey?.trim() || undefined; + : undefined; + const sessionKey = (explicitSessionKey ?? opts?.agentSessionKey?.trim()) || undefined; const delayMs = typeof params.delayMs === "number" && Number.isFinite(params.delayMs) ? Math.floor(params.delayMs) @@ -109,8 +110,17 @@ export function createGatewayTool(opts?: { // runs to { channel: "webchat", to: "heartbeat" }, causing the // sentinel to write stale routing data that fails post-restart. // See #18612. + // + // Only apply the live context when the restart targets this agent's + // own session. When an explicit sessionKey points to a different + // session, the live context belongs to the wrong session and would + // misroute the post-restart reply. Fall back to extractDeliveryInfo() + // so the server uses the correct routing for the target session. + const isTargetingOtherSession = + explicitSessionKey != null && + explicitSessionKey !== (opts?.agentSessionKey?.trim() || undefined); const liveContext = - opts?.agentChannel != null && String(opts.agentChannel).trim() + !isTargetingOtherSession && opts?.agentChannel != null && String(opts.agentChannel).trim() ? { channel: String(opts.agentChannel).trim(), to: opts?.agentTo ?? undefined, @@ -178,17 +188,26 @@ export function createGatewayTool(opts?: { restartDelayMs: number | undefined; deliveryContext: typeof liveDeliveryContextForRpc; } => { - const sessionKey = + const explicitSessionKey = typeof params.sessionKey === "string" && params.sessionKey.trim() ? params.sessionKey.trim() - : opts?.agentSessionKey?.trim() || undefined; + : undefined; + const sessionKey = (explicitSessionKey ?? opts?.agentSessionKey?.trim()) || undefined; const note = typeof params.note === "string" && params.note.trim() ? params.note.trim() : undefined; const restartDelayMs = typeof params.restartDelayMs === "number" && Number.isFinite(params.restartDelayMs) ? Math.floor(params.restartDelayMs) : undefined; - return { sessionKey, note, restartDelayMs, deliveryContext: liveDeliveryContextForRpc }; + // Only forward live context when the target session is this agent's + // own session. When an explicit sessionKey points to a different + // session, omit deliveryContext so the server falls back to + // extractDeliveryInfo(sessionKey) which uses that session's routing. + const isTargetingOtherSession = + explicitSessionKey != null && + explicitSessionKey !== (opts?.agentSessionKey?.trim() || undefined); + const deliveryContext = isTargetingOtherSession ? undefined : liveDeliveryContextForRpc; + return { sessionKey, note, restartDelayMs, deliveryContext }; }; const resolveConfigWriteParams = async (): Promise<{ From 331f09b82d8dd4719e429243140cbbcf523322f1 Mon Sep 17 00:00:00 2001 From: Bryan Marty Date: Wed, 4 Mar 2026 18:17:59 +0000 Subject: [PATCH 06/25] fix: guard threadId with same session check as deliveryContext in restart opts.agentThreadId belongs to the current agent's thread. When the restart action targets a different sessionKey, forwarding it into the sentinel would cause scheduleRestartSentinelWake to deliver the post-restart reply to the wrong thread. Apply the same isTargetingOtherSession guard used for deliveryContext: only take opts.agentThreadId when the restart targets the current session; otherwise use extracted.threadId from extractDeliveryInfo, which correctly derives threadId from the target session key. --- src/agents/tools/gateway-tool.ts | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/agents/tools/gateway-tool.ts b/src/agents/tools/gateway-tool.ts index 800414f92ea..07772487f41 100644 --- a/src/agents/tools/gateway-tool.ts +++ b/src/agents/tools/gateway-tool.ts @@ -129,8 +129,14 @@ export function createGatewayTool(opts?: { : undefined; const extracted = extractDeliveryInfo(sessionKey); const deliveryContext = liveContext ?? extracted.deliveryContext; + // Guard threadId with the same session check as deliveryContext. When + // targeting another session, opts.agentThreadId belongs to the current + // session's thread and must not be written into the sentinel — it would + // cause scheduleRestartSentinelWake to deliver to the wrong thread. const threadId = - opts?.agentThreadId != null ? String(opts.agentThreadId) : extracted.threadId; + !isTargetingOtherSession && opts?.agentThreadId != null + ? String(opts.agentThreadId) + : extracted.threadId; const payload: RestartSentinelPayload = { kind: "restart", status: "ok", From 3849101443cdb46de74054117646c332619737ad Mon Sep 17 00:00:00 2001 From: Bryan Marty Date: Fri, 6 Mar 2026 15:51:31 +0000 Subject: [PATCH 07/25] fix(gateway-tool): require both channel and to before forwarding live delivery context Forwarding liveDeliveryContextForRpc (or liveContext for restart) when only agentChannel is set but agentTo is missing causes the server to prefer an incomplete deliveryContext over extractDeliveryInfo(). The sentinel is then written without `to`, and scheduleRestartSentinelWake bails on `if (!channel || !to)`, silently degrading to a system event with no delivery or agent resume. Fix: guard both liveContext (restart path) and liveDeliveryContextForRpc (config.apply/config.patch/update.run) to require both channel and to before forwarding. Add gateway-tool.test.ts covering the partial-context guard for both the restart and RPC code paths. Fixes: chatgpt-codex-connector P2 review on #34580 --- src/agents/tools/gateway-tool.test.ts | 170 ++++++++++++++++++ src/agents/tools/gateway-tool.ts | 30 +++- src/gateway/server-methods/config.ts | 9 +- .../server-methods/restart-request.test.ts | 58 ++++++ src/gateway/server-methods/restart-request.ts | 12 +- src/gateway/server-methods/update.ts | 9 +- 6 files changed, 277 insertions(+), 11 deletions(-) create mode 100644 src/agents/tools/gateway-tool.test.ts create mode 100644 src/gateway/server-methods/restart-request.test.ts diff --git a/src/agents/tools/gateway-tool.test.ts b/src/agents/tools/gateway-tool.test.ts new file mode 100644 index 00000000000..11e207c3376 --- /dev/null +++ b/src/agents/tools/gateway-tool.test.ts @@ -0,0 +1,170 @@ +import { describe, expect, it, vi } from "vitest"; + +const mocks = vi.hoisted(() => ({ + isRestartEnabled: vi.fn(() => true), + resolveConfigSnapshotHash: vi.fn(() => undefined), + extractDeliveryInfo: vi.fn(() => ({ + deliveryContext: { channel: "telegram", to: "+19995550001", accountId: undefined }, + threadId: undefined, + })), + writeRestartSentinel: vi.fn(async () => undefined), + scheduleGatewaySigusr1Restart: vi.fn(() => ({ ok: true })), + formatDoctorNonInteractiveHint: vi.fn(() => ""), + callGatewayTool: vi.fn(async () => ({})), + readGatewayCallOptions: vi.fn(() => ({})), +})); + +vi.mock("../../config/commands.js", () => ({ isRestartEnabled: mocks.isRestartEnabled })); +vi.mock("../../config/io.js", () => ({ + resolveConfigSnapshotHash: mocks.resolveConfigSnapshotHash, +})); +vi.mock("../../config/sessions.js", () => ({ + extractDeliveryInfo: mocks.extractDeliveryInfo, +})); +vi.mock("../../infra/restart-sentinel.js", () => ({ + writeRestartSentinel: mocks.writeRestartSentinel, + formatDoctorNonInteractiveHint: mocks.formatDoctorNonInteractiveHint, +})); +vi.mock("../../infra/restart.js", () => ({ + scheduleGatewaySigusr1Restart: mocks.scheduleGatewaySigusr1Restart, +})); +vi.mock("./gateway.js", () => ({ + callGatewayTool: mocks.callGatewayTool, + readGatewayCallOptions: mocks.readGatewayCallOptions, +})); + +import { createGatewayTool } from "./gateway-tool.js"; + +async function execTool( + tool: ReturnType, + params: Record, +) { + return (tool as unknown as { execute: (id: string, args: unknown) => Promise }).execute( + "test-id", + params, + ); +} + +function getCallArg(mockFn: { mock: { calls: unknown[] } }, callIdx: number, argIdx: number): T { + const calls = mockFn.mock.calls as unknown[][]; + return calls[callIdx]?.[argIdx] as T; +} + +describe("createGatewayTool – live delivery context guard", () => { + it("does not forward liveDeliveryContextForRpc when agentTo is missing", async () => { + mocks.callGatewayTool.mockClear(); + const tool = createGatewayTool({ + agentSessionKey: "agent:main:main", + agentChannel: "discord", + agentTo: undefined, // intentionally missing + }); + + await execTool(tool, { + action: "config.patch", + raw: '{"key":"value"}', + baseHash: "abc123", + sessionKey: "agent:main:main", + note: "test patch", + }); + + const forwardedParams = getCallArg>(mocks.callGatewayTool, 0, 2); + // deliveryContext should be undefined — falling back to server-side extractDeliveryInfo + expect(forwardedParams?.deliveryContext).toBeUndefined(); + }); + + it("forwards liveDeliveryContextForRpc when both agentChannel and agentTo are present", async () => { + mocks.callGatewayTool.mockClear(); + const tool = createGatewayTool({ + agentSessionKey: "agent:main:main", + agentChannel: "discord", + agentTo: "123456789", + }); + + await execTool(tool, { + action: "config.patch", + raw: '{"key":"value"}', + baseHash: "abc123", + sessionKey: "agent:main:main", + note: "test patch", + }); + + const forwardedParams = getCallArg>(mocks.callGatewayTool, 0, 2); + expect(forwardedParams?.deliveryContext).toEqual({ + channel: "discord", + to: "123456789", + accountId: undefined, + threadId: undefined, + }); + }); + + it("includes threadId in liveDeliveryContextForRpc when agentThreadId is present", async () => { + mocks.callGatewayTool.mockClear(); + const tool = createGatewayTool({ + agentSessionKey: "agent:main:main", + agentChannel: "slack", + agentTo: "C012AB3CD", + agentThreadId: "1234567890.123456", + }); + + await execTool(tool, { + action: "config.patch", + raw: '{"key":"value"}', + baseHash: "abc123", + sessionKey: "agent:main:main", + note: "test patch", + }); + + const forwardedParams = getCallArg>(mocks.callGatewayTool, 0, 2); + expect(forwardedParams?.deliveryContext).toEqual({ + channel: "slack", + to: "C012AB3CD", + accountId: undefined, + threadId: "1234567890.123456", + }); + }); + + it("does not forward live restart context when agentTo is missing", async () => { + mocks.writeRestartSentinel.mockClear(); + mocks.extractDeliveryInfo.mockReturnValueOnce({ + deliveryContext: { channel: "telegram", to: "+19995550001", accountId: undefined }, + threadId: undefined, + }); + + const tool = createGatewayTool({ + agentSessionKey: "agent:main:main", + agentChannel: "discord", + agentTo: undefined, // intentionally missing + }); + + await execTool(tool, { action: "restart" }); + + const sentinelPayload = getCallArg<{ deliveryContext?: { channel?: string; to?: string } }>( + mocks.writeRestartSentinel, + 0, + 0, + ); + // Should fall back to extractDeliveryInfo() result, not the incomplete live context + expect(sentinelPayload?.deliveryContext?.channel).toBe("telegram"); + expect(sentinelPayload?.deliveryContext?.to).toBe("+19995550001"); + }); + + it("uses live restart context when both agentChannel and agentTo are present", async () => { + mocks.writeRestartSentinel.mockClear(); + + const tool = createGatewayTool({ + agentSessionKey: "agent:main:main", + agentChannel: "discord", + agentTo: "123456789", + }); + + await execTool(tool, { action: "restart" }); + + const sentinelPayload = getCallArg<{ deliveryContext?: { channel?: string; to?: string } }>( + mocks.writeRestartSentinel, + 0, + 0, + ); + expect(sentinelPayload?.deliveryContext?.channel).toBe("discord"); + expect(sentinelPayload?.deliveryContext?.to).toBe("123456789"); + }); +}); diff --git a/src/agents/tools/gateway-tool.ts b/src/agents/tools/gateway-tool.ts index 07772487f41..09f24880cbf 100644 --- a/src/agents/tools/gateway-tool.ts +++ b/src/agents/tools/gateway-tool.ts @@ -119,11 +119,20 @@ export function createGatewayTool(opts?: { const isTargetingOtherSession = explicitSessionKey != null && explicitSessionKey !== (opts?.agentSessionKey?.trim() || undefined); + // Only forward live context when both channel and to are present. + // Forwarding a partial context (channel without to) causes the server + // to write a sentinel without `to`, and scheduleRestartSentinelWake + // bails on `if (!channel || !to)`, silently degrading to a system + // event with no delivery/resume. See #18612. const liveContext = - !isTargetingOtherSession && opts?.agentChannel != null && String(opts.agentChannel).trim() + !isTargetingOtherSession && + opts?.agentChannel != null && + String(opts.agentChannel).trim() && + opts?.agentTo != null && + String(opts.agentTo).trim() ? { channel: String(opts.agentChannel).trim(), - to: opts?.agentTo ?? undefined, + to: String(opts.agentTo).trim(), accountId: opts?.agentAccountId ?? undefined, } : undefined; @@ -179,12 +188,25 @@ export function createGatewayTool(opts?: { // parseSessionThreadInfo), which encodes it as :thread:N or :topic:N. // That parsing is not subject to heartbeat contamination, so there is // no need to forward it through the RPC params. + // Only forward live context when both channel and to are present. + // Forwarding a partial context (channel without to) causes the server + // to prefer an incomplete deliveryContext over extractDeliveryInfo(), + // writing a sentinel without `to` that scheduleRestartSentinelWake + // rejects, silently degrading to a system event. See #18612. + // + // threadId is included so the server can use it for sessions where the + // session key is not :thread:-scoped (e.g. Slack replyToMode="all"), in + // which case the session-key-derived threadId would be empty. const liveDeliveryContextForRpc = - opts?.agentChannel != null && String(opts.agentChannel).trim() + opts?.agentChannel != null && + String(opts.agentChannel).trim() && + opts?.agentTo != null && + String(opts.agentTo).trim() ? { channel: String(opts.agentChannel).trim(), - to: opts?.agentTo ?? undefined, + to: String(opts.agentTo).trim(), accountId: opts?.agentAccountId ?? undefined, + threadId: opts?.agentThreadId != null ? String(opts.agentThreadId) : undefined, } : undefined; diff --git a/src/gateway/server-methods/config.ts b/src/gateway/server-methods/config.ts index 20cb8e37da9..7c9e4cf95a0 100644 --- a/src/gateway/server-methods/config.ts +++ b/src/gateway/server-methods/config.ts @@ -194,14 +194,19 @@ function resolveConfigRestartRequest(params: unknown): { } { const { sessionKey, note, restartDelayMs } = parseRestartRequestParams(params); - // Extract threadId from the session key (reliable — derived from key, not store). // For deliveryContext, prefer the live context passed by the client over // extractDeliveryInfo(), which reads the persisted session store. Heartbeat // runs overwrite the store to { channel: "webchat", to: "heartbeat" }, so // reading it here would produce stale routing data. See #18612. - const { deliveryContext: extractedDeliveryContext, threadId } = extractDeliveryInfo(sessionKey); + // + // For threadId, also prefer the client-forwarded value when present. The + // session-key-derived threadId is empty for Slack sessions where replies are + // threaded (replyToMode="all") but the session key is not :thread:-scoped. + const { deliveryContext: extractedDeliveryContext, threadId: extractedThreadId } = + extractDeliveryInfo(sessionKey); const paramsDeliveryContext = parseDeliveryContextFromParams(params); const deliveryContext = paramsDeliveryContext ?? extractedDeliveryContext; + const threadId = paramsDeliveryContext?.threadId ?? extractedThreadId; return { sessionKey, diff --git a/src/gateway/server-methods/restart-request.test.ts b/src/gateway/server-methods/restart-request.test.ts new file mode 100644 index 00000000000..6273ee08815 --- /dev/null +++ b/src/gateway/server-methods/restart-request.test.ts @@ -0,0 +1,58 @@ +import { describe, expect, it } from "vitest"; +import { parseDeliveryContextFromParams } from "./restart-request.js"; + +describe("parseDeliveryContextFromParams", () => { + it("returns undefined when deliveryContext is absent", () => { + expect(parseDeliveryContextFromParams({})).toBeUndefined(); + }); + + it("returns undefined when both channel and to are missing", () => { + expect(parseDeliveryContextFromParams({ deliveryContext: {} })).toBeUndefined(); + }); + + it("returns undefined when only channel is present (partial context rejected)", () => { + expect( + parseDeliveryContextFromParams({ deliveryContext: { channel: "discord" } }), + ).toBeUndefined(); + }); + + it("returns undefined when only to is present (partial context rejected)", () => { + expect( + parseDeliveryContextFromParams({ deliveryContext: { to: "123456789" } }), + ).toBeUndefined(); + }); + + it("returns full context when both channel and to are present", () => { + expect( + parseDeliveryContextFromParams({ + deliveryContext: { channel: "discord", to: "123456789" }, + }), + ).toEqual({ channel: "discord", to: "123456789", accountId: undefined, threadId: undefined }); + }); + + it("includes accountId and threadId when present", () => { + expect( + parseDeliveryContextFromParams({ + deliveryContext: { + channel: "slack", + to: "C012AB3CD", + accountId: "acct-1", + threadId: "1234567890.123456", + }, + }), + ).toEqual({ + channel: "slack", + to: "C012AB3CD", + accountId: "acct-1", + threadId: "1234567890.123456", + }); + }); + + it("trims whitespace from all string fields", () => { + expect( + parseDeliveryContextFromParams({ + deliveryContext: { channel: " discord ", to: " 123 ", threadId: " ts.1 " }, + }), + ).toEqual({ channel: "discord", to: "123", accountId: undefined, threadId: "ts.1" }); + }); +}); diff --git a/src/gateway/server-methods/restart-request.ts b/src/gateway/server-methods/restart-request.ts index e07395803aa..e480de31d9e 100644 --- a/src/gateway/server-methods/restart-request.ts +++ b/src/gateway/server-methods/restart-request.ts @@ -8,7 +8,7 @@ */ export function parseDeliveryContextFromParams( params: unknown, -): { channel?: string; to?: string; accountId?: string } | undefined { +): { channel?: string; to?: string; accountId?: string; threadId?: string } | undefined { const raw = (params as { deliveryContext?: unknown }).deliveryContext; if (!raw || typeof raw !== "object") { return undefined; @@ -25,10 +25,16 @@ export function parseDeliveryContextFromParams( typeof (raw as { accountId?: unknown }).accountId === "string" ? (raw as { accountId: string }).accountId.trim() || undefined : undefined; - if (!channel && !to) { + const threadId = + typeof (raw as { threadId?: unknown }).threadId === "string" + ? (raw as { threadId: string }).threadId.trim() || undefined + : undefined; + // Require both channel and to — a partial context can overwrite a complete + // extracted route and produce a non-routable sentinel. See #18612. + if (!channel || !to) { return undefined; } - return { channel, to, accountId }; + return { channel, to, accountId, threadId }; } export function parseRestartRequestParams(params: unknown): { diff --git a/src/gateway/server-methods/update.ts b/src/gateway/server-methods/update.ts index 4b6a483bb99..9fdd1c4af1b 100644 --- a/src/gateway/server-methods/update.ts +++ b/src/gateway/server-methods/update.ts @@ -23,8 +23,13 @@ export const updateHandlers: GatewayRequestHandlers = { const actor = resolveControlPlaneActor(client); const { sessionKey, note, restartDelayMs } = parseRestartRequestParams(params); // Prefer live deliveryContext from params over extractDeliveryInfo() (see #18612). - const { deliveryContext: extractedDeliveryContext, threadId } = extractDeliveryInfo(sessionKey); - const deliveryContext = parseDeliveryContextFromParams(params) ?? extractedDeliveryContext; + // Also prefer threadId from params when present — the session-key-derived value + // is empty for Slack sessions where replyToMode="all" but the key is not :thread:-scoped. + const { deliveryContext: extractedDeliveryContext, threadId: extractedThreadId } = + extractDeliveryInfo(sessionKey); + const paramsDeliveryContext = parseDeliveryContextFromParams(params); + const deliveryContext = paramsDeliveryContext ?? extractedDeliveryContext; + const threadId = paramsDeliveryContext?.threadId ?? extractedThreadId; const timeoutMsRaw = (params as { timeoutMs?: unknown }).timeoutMs; const timeoutMs = typeof timeoutMsRaw === "number" && Number.isFinite(timeoutMsRaw) From 045bf781a4446c83842dbdae57dbc19aa49b4c7a Mon Sep 17 00:00:00 2001 From: Bryan Marty Date: Sun, 8 Mar 2026 18:09:32 +0000 Subject: [PATCH 08/25] fix: derive canonicalize agentId from target key, not current session MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a non-default agent (e.g. agent:shopping-claw:main) calls restart or config/update with sessionKey="main", the gateway treats "main" as resolveMainSessionKey(cfg) = the default agent's session. Previously, isTargetingOtherSession canonicalized the target key using the CURRENT session's agentId, so "main" mapped to the current agent's main session rather than the default agent's — falsely treating a cross-agent request as same-session and forwarding the wrong chat's deliveryContext. Fix: canonicalize each key using its own agentId (resolveAgentIdFromSessionKey on the key itself). For bare "main", this returns DEFAULT_AGENT_ID so "main" → "agent:main:main" regardless of which agent is calling. Applied to both the restart path and the RPC path (resolveGatewayWriteMeta). Add two regression tests covering the cross-agent alias scenario. --- src/agents/tools/gateway-tool.test.ts | 49 +++++++++++++++++++++++++++ src/agents/tools/gateway-tool.ts | 44 +++++++++++++++++++++--- 2 files changed, 88 insertions(+), 5 deletions(-) diff --git a/src/agents/tools/gateway-tool.test.ts b/src/agents/tools/gateway-tool.test.ts index 11e207c3376..81ac3b7649f 100644 --- a/src/agents/tools/gateway-tool.test.ts +++ b/src/agents/tools/gateway-tool.test.ts @@ -167,4 +167,53 @@ describe("createGatewayTool – live delivery context guard", () => { expect(sentinelPayload?.deliveryContext?.channel).toBe("discord"); expect(sentinelPayload?.deliveryContext?.to).toBe("123456789"); }); + + it("does not forward live RPC delivery context when a non-default agent passes sessionKey='main'", async () => { + // "main" resolves to "agent:main:main" (default agent), which differs from the + // current session "agent:shopping-claw:main". Live context must NOT be forwarded. + mocks.callGatewayTool.mockClear(); + const tool = createGatewayTool({ + agentSessionKey: "agent:shopping-claw:main", + agentChannel: "discord", + agentTo: "123456789", + }); + + await execTool(tool, { + action: "config.patch", + raw: '{"key":"value"}', + baseHash: "abc123", + sessionKey: "main", // targets default agent — different from current shopping-claw session + note: "cross-agent patch", + }); + + const forwardedParams = getCallArg>(mocks.callGatewayTool, 0, 2); + expect(forwardedParams?.deliveryContext).toBeUndefined(); + }); + + it("does not forward live restart context when a non-default agent passes sessionKey='main'", async () => { + // "main" resolves to "agent:main:main" (default agent), which differs from the + // current session "agent:shopping-claw:main". Sentinel must not carry this session's thread. + mocks.writeRestartSentinel.mockClear(); + mocks.extractDeliveryInfo.mockReturnValueOnce({ + deliveryContext: { channel: "telegram", to: "+19995550001", accountId: undefined }, + threadId: undefined, + }); + + const tool = createGatewayTool({ + agentSessionKey: "agent:shopping-claw:main", + agentChannel: "discord", + agentTo: "123456789", + }); + + await execTool(tool, { action: "restart", sessionKey: "main" }); + + const sentinelPayload = getCallArg<{ deliveryContext?: { channel?: string; to?: string } }>( + mocks.writeRestartSentinel, + 0, + 0, + ); + // Should fall back to extractDeliveryInfo() for the targeted session, not current session's live context + expect(sentinelPayload?.deliveryContext?.channel).toBe("telegram"); + expect(sentinelPayload?.deliveryContext?.to).toBe("+19995550001"); + }); }); diff --git a/src/agents/tools/gateway-tool.ts b/src/agents/tools/gateway-tool.ts index 09f24880cbf..27c89750764 100644 --- a/src/agents/tools/gateway-tool.ts +++ b/src/agents/tools/gateway-tool.ts @@ -116,9 +116,29 @@ export function createGatewayTool(opts?: { // session, the live context belongs to the wrong session and would // misroute the post-restart reply. Fall back to extractDeliveryInfo() // so the server uses the correct routing for the target session. + // Canonicalize both keys before comparing so that aliases like "main" + // and "agent:main:main" are treated as the same session. Without this, + // an operator passing sessionKey="main" would be incorrectly treated as + // targeting a different session, suppressing live deliveryContext and + // falling back to the stale session store. See #18612. + const ownKey = opts?.agentSessionKey?.trim() || undefined; + const agentId = resolveAgentIdFromSessionKey(ownKey); + // Canonicalize each key using its OWN agentId — not the current session's. + // If a non-default agent passes sessionKey="main", resolveAgentIdFromSessionKey + // returns DEFAULT_AGENT_ID ("main") so "main" → "agent:main:main". Using the + // current session's agentId instead would map "main" to the current agent's main + // session, falsely treating a cross-agent request as same-session. See #18612. + const canonicalizeOwn = (k: string) => + canonicalizeMainSessionAlias({ cfg: opts?.config, agentId, sessionKey: k }); + const canonicalizeTarget = (k: string) => + canonicalizeMainSessionAlias({ + cfg: opts?.config, + agentId: resolveAgentIdFromSessionKey(k), + sessionKey: k, + }); const isTargetingOtherSession = explicitSessionKey != null && - explicitSessionKey !== (opts?.agentSessionKey?.trim() || undefined); + canonicalizeTarget(explicitSessionKey) !== (ownKey ? canonicalizeOwn(ownKey) : undefined); // Only forward live context when both channel and to are present. // Forwarding a partial context (channel without to) causes the server // to write a sentinel without `to`, and scheduleRestartSentinelWake @@ -228,12 +248,26 @@ export function createGatewayTool(opts?: { ? Math.floor(params.restartDelayMs) : undefined; // Only forward live context when the target session is this agent's - // own session. When an explicit sessionKey points to a different - // session, omit deliveryContext so the server falls back to - // extractDeliveryInfo(sessionKey) which uses that session's routing. + // own session. Canonicalize both keys before comparing so that aliases + // like "main" and "agent:main:main" are treated as the same session. + // When an explicit sessionKey points to a different session, omit + // deliveryContext so the server falls back to extractDeliveryInfo(sessionKey). + const rpcOwnKey = opts?.agentSessionKey?.trim() || undefined; + const rpcAgentId = resolveAgentIdFromSessionKey(rpcOwnKey); + // Same cross-agent alias fix as the restart path: derive agentId from each key + // independently so that "main" resolves to the default agent, not the current one. + const rpcCanonicalizeOwn = (k: string) => + canonicalizeMainSessionAlias({ cfg: opts?.config, agentId: rpcAgentId, sessionKey: k }); + const rpcCanonicalizeTarget = (k: string) => + canonicalizeMainSessionAlias({ + cfg: opts?.config, + agentId: resolveAgentIdFromSessionKey(k), + sessionKey: k, + }); const isTargetingOtherSession = explicitSessionKey != null && - explicitSessionKey !== (opts?.agentSessionKey?.trim() || undefined); + rpcCanonicalizeTarget(explicitSessionKey) !== + (rpcOwnKey ? rpcCanonicalizeOwn(rpcOwnKey) : undefined); const deliveryContext = isTargetingOtherSession ? undefined : liveDeliveryContextForRpc; return { sessionKey, note, restartDelayMs, deliveryContext }; }; From 80538c607d893e7e354be665ad5eb422c3969868 Mon Sep 17 00:00:00 2001 From: Bryan Marty Date: Sun, 8 Mar 2026 19:47:18 +0000 Subject: [PATCH 09/25] fix: deliver restart notice explicitly before agent resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Restore deliverOutboundPayloads() to send the restart summary deterministically before calling agentCommand() for the agent resume turn. Previously the notice was only delivered as input to the model via agentCommand(), making it model-dependent: if the model rewrote or omitted the content, the user would never see the restart summary/note. The new two-step flow: 1. deliverOutboundPayloads() — guaranteed delivery of the exact restart notice (model-independent). Restores the Slack replyToId mapping from main that ensures threaded replies land in the right thread. 2. agentCommand() — agent resume turn so the agent can continue autonomously and optionally provide additional context. Update test to assert deliverOutboundPayloads fires before agentCommand and verify the two-step ordering is preserved. --- src/gateway/server-restart-sentinel.test.ts | 30 ++++++++++++++++- src/gateway/server-restart-sentinel.ts | 37 +++++++++++++++++---- 2 files changed, 59 insertions(+), 8 deletions(-) diff --git a/src/gateway/server-restart-sentinel.test.ts b/src/gateway/server-restart-sentinel.test.ts index c247e8d0d6a..24a00296189 100644 --- a/src/gateway/server-restart-sentinel.test.ts +++ b/src/gateway/server-restart-sentinel.test.ts @@ -25,6 +25,8 @@ const mocks = vi.hoisted(() => ({ })), normalizeChannelId: vi.fn((channel: string) => channel), resolveOutboundTarget: vi.fn(() => ({ ok: true as const, to: "+15550002" })), + deliverOutboundPayloads: vi.fn(async () => undefined), + buildOutboundSessionContext: vi.fn(() => ({ agentId: "main", sessionKey: "agent:main:main" })), agentCommand: vi.fn(async () => undefined), enqueueSystemEvent: vi.fn(), defaultRuntime: {}, @@ -69,6 +71,14 @@ vi.mock("../infra/outbound/targets.js", () => ({ resolveOutboundTarget: mocks.resolveOutboundTarget, })); +vi.mock("../infra/outbound/deliver.js", () => ({ + deliverOutboundPayloads: mocks.deliverOutboundPayloads, +})); + +vi.mock("../infra/outbound/session-context.js", () => ({ + buildOutboundSessionContext: mocks.buildOutboundSessionContext, +})); + vi.mock("../commands/agent.js", () => ({ agentCommand: mocks.agentCommand, })); @@ -88,9 +98,21 @@ describe("scheduleRestartSentinelWake", () => { vi.clearAllMocks(); }); - it("calls agentCommand with resolved channel, to, and sessionKey after restart", async () => { + it("delivers restart notice directly then resumes agent after restart", async () => { await scheduleRestartSentinelWake({ deps: {} as never }); + // Step 1: deterministic delivery (model-independent) + expect(mocks.deliverOutboundPayloads).toHaveBeenCalledWith( + expect.objectContaining({ + channel: "whatsapp", + to: "+15550002", + accountId: "acct-2", + payloads: [{ text: "restart message" }], + bestEffort: true, + }), + ); + + // Step 2: agent resume turn expect(mocks.agentCommand).toHaveBeenCalledWith( expect.objectContaining({ message: "restart message", @@ -105,6 +127,12 @@ describe("scheduleRestartSentinelWake", () => { mocks.defaultRuntime, {}, ); + + // Verify delivery happened before resume + const deliverOrder = mocks.deliverOutboundPayloads.mock.invocationCallOrder[0]; + const agentOrder = mocks.agentCommand.mock.invocationCallOrder[0]; + expect(deliverOrder).toBeLessThan(agentOrder); + expect(mocks.enqueueSystemEvent).not.toHaveBeenCalled(); }); diff --git a/src/gateway/server-restart-sentinel.ts b/src/gateway/server-restart-sentinel.ts index dadf596d6ad..6c603456aa0 100644 --- a/src/gateway/server-restart-sentinel.ts +++ b/src/gateway/server-restart-sentinel.ts @@ -4,6 +4,8 @@ import type { CliDeps } from "../cli/deps.js"; import { agentCommand } from "../commands/agent.js"; import { resolveMainSessionKeyFromConfig } from "../config/sessions.js"; import { parseSessionThreadInfo } from "../config/sessions/delivery-info.js"; +import { deliverOutboundPayloads } from "../infra/outbound/deliver.js"; +import { buildOutboundSessionContext } from "../infra/outbound/session-context.js"; import { resolveOutboundTarget } from "../infra/outbound/targets.js"; import { consumeRestartSentinel, @@ -76,14 +78,35 @@ export async function scheduleRestartSentinelWake(params: { deps: CliDeps }) { sessionThreadId ?? (origin?.threadId != null ? String(origin.threadId) : undefined); + // Step 1: deliver the restart notice deterministically — model-independent, guaranteed. + // Slack uses replyToId (thread_ts) for threading; deliverOutboundPayloads does not do + // this mapping automatically, so we convert here. See #17716. + const isSlack = channel === "slack"; + const replyToId = isSlack && threadId != null && threadId !== "" ? String(threadId) : undefined; + const resolvedThreadId = isSlack ? undefined : threadId; + const outboundSession = buildOutboundSessionContext({ cfg, sessionKey }); + try { + await deliverOutboundPayloads({ + cfg, + channel, + to: resolved.to, + accountId: origin?.accountId, + replyToId, + threadId: resolvedThreadId, + payloads: [{ text: message }], + session: outboundSession, + bestEffort: true, + }); + } catch { + // bestEffort: true means this should not throw, but guard anyway + } + + // Step 2: trigger an agent resume turn so the agent can continue autonomously + // after restart. The model sees the restart context and can respond/take actions. + // This is safe post-restart: scheduleRestartSentinelWake() runs in the new process + // with zero in-flight replies, so the pre-restart race condition (ab4a08a82) does + // not apply here. try { - // Use agentCommand() rather than deliverOutboundPayloads() so the restart - // message is a proper agent turn: the user is notified AND the agent sees - // the message in its conversation history and can resume autonomously. - // - // This is safe post-restart because scheduleRestartSentinelWake() runs in - // the new process, where there are zero in-flight replies. The pre-restart - // race condition fixed in ab4a08a82 does not apply here. await agentCommand( { message, From 33c24858ff1d45bba081b24989872706f21b6b37 Mon Sep 17 00:00:00 2001 From: Bryan Marty Date: Mon, 9 Mar 2026 13:42:52 +0000 Subject: [PATCH 10/25] fix: omit live deliveryContext when targeting a remote gateway resolveGatewayWriteMeta() was forwarding the local agent run's deliveryContext to config.apply/config.patch/update.run even when gatewayUrl pointed to a remote gateway. Server handlers now prefer params.deliveryContext over extractDeliveryInfo(sessionKey), so a remote restart sentinel would be written with the local chat's channel/ to, causing post-restart wake messages to be delivered to the caller's chat instead of the session that lives on the remote gateway. Fix: gate deliveryContext forwarding on isRemoteGateway (truthy gatewayOpts.gatewayUrl). When targeting a remote gateway, omit deliveryContext so the remote server's extractDeliveryInfo(sessionKey) remains authoritative for the routing of that session. See #18612. --- src/agents/tools/gateway-tool.test.ts | 25 +++++++++++++++++++++++++ src/agents/tools/gateway-tool.ts | 10 +++++++++- 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/src/agents/tools/gateway-tool.test.ts b/src/agents/tools/gateway-tool.test.ts index 81ac3b7649f..75039fc53d1 100644 --- a/src/agents/tools/gateway-tool.test.ts +++ b/src/agents/tools/gateway-tool.test.ts @@ -168,6 +168,31 @@ describe("createGatewayTool – live delivery context guard", () => { expect(sentinelPayload?.deliveryContext?.to).toBe("123456789"); }); + it("does not forward live RPC delivery context when gatewayUrl targets a remote gateway", async () => { + // A remote gateway has its own extractDeliveryInfo(sessionKey) — forwarding + // the local agent's deliveryContext would write a sentinel with the wrong + // destination on the remote host. + mocks.callGatewayTool.mockClear(); + mocks.readGatewayCallOptions.mockReturnValueOnce({ gatewayUrl: "wss://remote-gw.example.com" }); + const tool = createGatewayTool({ + agentSessionKey: "agent:main:main", + agentChannel: "discord", + agentTo: "123456789", + }); + + await execTool(tool, { + action: "config.patch", + raw: '{"key":"value"}', + baseHash: "abc123", + sessionKey: "agent:main:main", + note: "remote patch", + gatewayUrl: "wss://remote-gw.example.com", + }); + + const forwardedParams = getCallArg>(mocks.callGatewayTool, 0, 2); + expect(forwardedParams?.deliveryContext).toBeUndefined(); + }); + it("does not forward live RPC delivery context when a non-default agent passes sessionKey='main'", async () => { // "main" resolves to "agent:main:main" (default agent), which differs from the // current session "agent:shopping-claw:main". Live context must NOT be forwarded. diff --git a/src/agents/tools/gateway-tool.ts b/src/agents/tools/gateway-tool.ts index 27c89750764..50e0698b588 100644 --- a/src/agents/tools/gateway-tool.ts +++ b/src/agents/tools/gateway-tool.ts @@ -268,7 +268,15 @@ export function createGatewayTool(opts?: { explicitSessionKey != null && rpcCanonicalizeTarget(explicitSessionKey) !== (rpcOwnKey ? rpcCanonicalizeOwn(rpcOwnKey) : undefined); - const deliveryContext = isTargetingOtherSession ? undefined : liveDeliveryContextForRpc; + // Also omit when the call targets a remote gateway. The remote server's + // extractDeliveryInfo(sessionKey) is the authoritative source for that + // session's delivery route. Forwarding the local agent run's deliveryContext + // would write a sentinel with the wrong chat destination on the remote host, + // causing post-restart wake messages to be sent to the caller's chat instead + // of the session on the remote gateway. See #18612. + const isRemoteGateway = Boolean(gatewayOpts.gatewayUrl?.trim()); + const deliveryContext = + isTargetingOtherSession || isRemoteGateway ? undefined : liveDeliveryContextForRpc; return { sessionKey, note, restartDelayMs, deliveryContext }; }; From e705d9b23d2b0d1b847078f9ca092412fd6525c7 Mon Sep 17 00:00:00 2001 From: Bryan Marty Date: Mon, 9 Mar 2026 16:46:16 +0000 Subject: [PATCH 11/25] fix: distinguish local vs remote gatewayUrl before suppressing deliveryContext MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit isRemoteGateway was inferred from gatewayUrl being present, but gatewayUrl overrides are valid for loopback/local targets too (ws://127.0.0.1:, localhost, [::1]). These local-override calls should still forward deliveryContext — treating them as remote falls back to extractDeliveryInfo(sessionKey) and reintroduces the stale heartbeat routing this patch was meant to fix. Fix: export resolveGatewayTarget() from gateway.ts (returns 'local' | 'remote' | undefined) and use it instead of Boolean(gatewayUrl?.trim()). Only gatewayUrl values that classify as 'remote' now suppress deliveryContext. Adds test coverage for the local loopback case. --- src/agents/tools/gateway-tool.test.ts | 34 +++++++++++++++++++++++++++ src/agents/tools/gateway-tool.ts | 9 +++++-- src/agents/tools/gateway.ts | 19 ++++++++++++++- 3 files changed, 59 insertions(+), 3 deletions(-) diff --git a/src/agents/tools/gateway-tool.test.ts b/src/agents/tools/gateway-tool.test.ts index 75039fc53d1..6e0c9e14e51 100644 --- a/src/agents/tools/gateway-tool.test.ts +++ b/src/agents/tools/gateway-tool.test.ts @@ -12,6 +12,7 @@ const mocks = vi.hoisted(() => ({ formatDoctorNonInteractiveHint: vi.fn(() => ""), callGatewayTool: vi.fn(async () => ({})), readGatewayCallOptions: vi.fn(() => ({})), + resolveGatewayTarget: vi.fn(() => undefined), })); vi.mock("../../config/commands.js", () => ({ isRestartEnabled: mocks.isRestartEnabled })); @@ -31,6 +32,7 @@ vi.mock("../../infra/restart.js", () => ({ vi.mock("./gateway.js", () => ({ callGatewayTool: mocks.callGatewayTool, readGatewayCallOptions: mocks.readGatewayCallOptions, + resolveGatewayTarget: mocks.resolveGatewayTarget, })); import { createGatewayTool } from "./gateway-tool.js"; @@ -174,6 +176,7 @@ describe("createGatewayTool – live delivery context guard", () => { // destination on the remote host. mocks.callGatewayTool.mockClear(); mocks.readGatewayCallOptions.mockReturnValueOnce({ gatewayUrl: "wss://remote-gw.example.com" }); + mocks.resolveGatewayTarget.mockReturnValueOnce("remote"); const tool = createGatewayTool({ agentSessionKey: "agent:main:main", agentChannel: "discord", @@ -193,6 +196,37 @@ describe("createGatewayTool – live delivery context guard", () => { expect(forwardedParams?.deliveryContext).toBeUndefined(); }); + it("forwards live RPC delivery context when gatewayUrl is a local loopback override", async () => { + // A gatewayUrl pointing to 127.0.0.1/localhost/[::1] is still the local server; + // deliveryContext must be forwarded so restart sentinels use the correct chat destination. + mocks.callGatewayTool.mockClear(); + mocks.readGatewayCallOptions.mockReturnValueOnce({ + gatewayUrl: "ws://127.0.0.1:18789", + }); + mocks.resolveGatewayTarget.mockReturnValueOnce("local"); + const tool = createGatewayTool({ + agentSessionKey: "agent:main:main", + agentChannel: "discord", + agentTo: "123456789", + }); + + await execTool(tool, { + action: "config.patch", + raw: '{"key":"value"}', + baseHash: "abc123", + sessionKey: "agent:main:main", + note: "local loopback patch", + gatewayUrl: "ws://127.0.0.1:18789", + }); + + const forwardedParams = getCallArg>(mocks.callGatewayTool, 0, 2); + expect(forwardedParams?.deliveryContext).toEqual({ + channel: "discord", + to: "123456789", + accountId: undefined, + }); + }); + it("does not forward live RPC delivery context when a non-default agent passes sessionKey='main'", async () => { // "main" resolves to "agent:main:main" (default agent), which differs from the // current session "agent:shopping-claw:main". Live context must NOT be forwarded. diff --git a/src/agents/tools/gateway-tool.ts b/src/agents/tools/gateway-tool.ts index 50e0698b588..b47293f0106 100644 --- a/src/agents/tools/gateway-tool.ts +++ b/src/agents/tools/gateway-tool.ts @@ -12,7 +12,7 @@ import { scheduleGatewaySigusr1Restart } from "../../infra/restart.js"; import { createSubsystemLogger } from "../../logging/subsystem.js"; import { stringEnum } from "../schema/typebox.js"; import { type AnyAgentTool, jsonResult, readStringParam } from "./common.js"; -import { callGatewayTool, readGatewayCallOptions } from "./gateway.js"; +import { callGatewayTool, readGatewayCallOptions, resolveGatewayTarget } from "./gateway.js"; const log = createSubsystemLogger("gateway-tool"); @@ -274,7 +274,12 @@ export function createGatewayTool(opts?: { // would write a sentinel with the wrong chat destination on the remote host, // causing post-restart wake messages to be sent to the caller's chat instead // of the session on the remote gateway. See #18612. - const isRemoteGateway = Boolean(gatewayOpts.gatewayUrl?.trim()); + // Only suppress deliveryContext for truly remote gateways. A gatewayUrl + // override pointing to a local loopback address (127.0.0.1, localhost, + // [::1]) is still the local server and should forward context normally; + // treating it as remote would fall back to extractDeliveryInfo(sessionKey) + // and reintroduce the stale heartbeat routing this patch was meant to fix. + const isRemoteGateway = resolveGatewayTarget(gatewayOpts) === "remote"; const deliveryContext = isTargetingOtherSession || isRemoteGateway ? undefined : liveDeliveryContextForRpc; return { sessionKey, note, restartDelayMs, deliveryContext }; diff --git a/src/agents/tools/gateway.ts b/src/agents/tools/gateway.ts index c31b7751e10..1c372403f2a 100644 --- a/src/agents/tools/gateway.ts +++ b/src/agents/tools/gateway.ts @@ -13,7 +13,7 @@ export type GatewayCallOptions = { timeoutMs?: number; }; -type GatewayOverrideTarget = "local" | "remote"; +export type GatewayOverrideTarget = "local" | "remote"; export function readGatewayCallOptions(params: Record): GatewayCallOptions { return { @@ -113,6 +113,23 @@ function resolveGatewayOverrideToken(params: { }).token; } +/** + * Resolves whether a GatewayCallOptions points to a local or remote gateway. + * Returns undefined when no gatewayUrl override is present (default local gateway). + * Local loopback overrides (127.0.0.1, localhost, [::1]) return "local"; + * all other URL overrides return "remote". + */ +export function resolveGatewayTarget(opts?: GatewayCallOptions): GatewayOverrideTarget | undefined { + if (trimToUndefined(opts?.gatewayUrl) === undefined) { + return undefined; + } + const cfg = loadConfig(); + return validateGatewayUrlOverrideForAgentTools({ + cfg, + urlOverride: String(opts?.gatewayUrl), + }).target; +} + export function resolveGatewayOptions(opts?: GatewayCallOptions) { const cfg = loadConfig(); const validatedOverride = From 645def75353d3f0a34977af2f9c396109e82070b Mon Sep 17 00:00:00 2001 From: Bryan Marty Date: Mon, 9 Mar 2026 19:45:37 +0000 Subject: [PATCH 12/25] fix: treat config-based remote gateway (gateway.mode=remote) as remote target resolveGatewayTarget() previously returned undefined when no gatewayUrl override was provided, even when gateway.mode=remote routes the call to gateway.remote.url. This caused isRemoteGateway to be false in that path, so deliveryContext was forwarded to the remote host and could stamp the restart sentinel with the local chat route, misdelivering post-restart wake messages. Fix: check gateway.mode=remote in the no-override branch and return 'remote' so deliveryContext is suppressed for config-based remote targets the same way it is for explicit gatewayUrl overrides. Adds a test covering the config-based remote mode case (no gatewayUrl). Closes #34580 (P1 review comment). --- src/agents/tools/gateway-tool.test.ts | 27 +++++++++++++++++++++++++++ src/agents/tools/gateway.ts | 18 ++++++++++++------ 2 files changed, 39 insertions(+), 6 deletions(-) diff --git a/src/agents/tools/gateway-tool.test.ts b/src/agents/tools/gateway-tool.test.ts index 6e0c9e14e51..2e3cab0f520 100644 --- a/src/agents/tools/gateway-tool.test.ts +++ b/src/agents/tools/gateway-tool.test.ts @@ -170,6 +170,33 @@ describe("createGatewayTool – live delivery context guard", () => { expect(sentinelPayload?.deliveryContext?.to).toBe("123456789"); }); + it("does not forward live RPC delivery context when gateway.mode=remote is configured (no URL override)", async () => { + // When gateway.mode=remote is set in config, callGatewayTool() routes to + // gateway.remote.url without an explicit gatewayUrl param. resolveGatewayTarget + // must return "remote" in this case so deliveryContext is suppressed, preventing + // the remote sentinel from being stamped with the local chat route. + mocks.callGatewayTool.mockClear(); + // No gatewayUrl override — config-based remote mode + mocks.readGatewayCallOptions.mockReturnValueOnce({}); + mocks.resolveGatewayTarget.mockReturnValueOnce("remote"); + const tool = createGatewayTool({ + agentSessionKey: "agent:main:main", + agentChannel: "discord", + agentTo: "123456789", + }); + + await execTool(tool, { + action: "config.patch", + raw: '{"key":"value"}', + baseHash: "abc123", + sessionKey: "agent:main:main", + note: "config-remote patch (no URL override)", + }); + + const forwardedParams = getCallArg>(mocks.callGatewayTool, 0, 2); + expect(forwardedParams?.deliveryContext).toBeUndefined(); + }); + it("does not forward live RPC delivery context when gatewayUrl targets a remote gateway", async () => { // A remote gateway has its own extractDeliveryInfo(sessionKey) — forwarding // the local agent's deliveryContext would write a sentinel with the wrong diff --git a/src/agents/tools/gateway.ts b/src/agents/tools/gateway.ts index 1c372403f2a..3f663dc0508 100644 --- a/src/agents/tools/gateway.ts +++ b/src/agents/tools/gateway.ts @@ -115,15 +115,21 @@ function resolveGatewayOverrideToken(params: { /** * Resolves whether a GatewayCallOptions points to a local or remote gateway. - * Returns undefined when no gatewayUrl override is present (default local gateway). - * Local loopback overrides (127.0.0.1, localhost, [::1]) return "local"; - * all other URL overrides return "remote". + * Returns "remote" when a remote gatewayUrl override is present, OR when + * gateway.mode=remote is configured and no override is provided (config-based remote). + * Returns "local" for explicit loopback URL overrides (127.0.0.1, localhost, [::1]). + * Returns undefined only when no override is present and gateway.mode is not "remote" + * (i.e. the default local gateway). */ export function resolveGatewayTarget(opts?: GatewayCallOptions): GatewayOverrideTarget | undefined { - if (trimToUndefined(opts?.gatewayUrl) === undefined) { - return undefined; - } const cfg = loadConfig(); + if (trimToUndefined(opts?.gatewayUrl) === undefined) { + // No explicit URL override — fall back to config-based mode. + // When gateway.mode=remote, callGatewayTool() routes to the configured + // gateway.remote.url, so this is effectively a remote target even without + // an explicit gatewayUrl param. + return cfg.gateway?.mode === "remote" ? "remote" : undefined; + } return validateGatewayUrlOverrideForAgentTools({ cfg, urlOverride: String(opts?.gatewayUrl), From fd1dd6fa80ee0a1a58a78fbbb266a9c7384ba32a Mon Sep 17 00:00:00 2001 From: Bryan Marty Date: Tue, 10 Mar 2026 01:44:44 +0000 Subject: [PATCH 13/25] fix: merge accountId when preferring live delivery context for restart sentinel When liveContext (gateway-tool.ts) or paramsDeliveryContext (config.ts) is present but lacks an accountId, fall back to the accountId from the extracted session store rather than dropping it entirely. This prevents restart follow-up notices from being misrouted on multi-account channels when callers supply channel/to without an explicit accountId. Addresses CR comments on PR #34580. --- src/agents/tools/gateway-tool.ts | 8 +++++++- src/gateway/server-methods/config.ts | 8 +++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/agents/tools/gateway-tool.ts b/src/agents/tools/gateway-tool.ts index b47293f0106..ca125deb6a6 100644 --- a/src/agents/tools/gateway-tool.ts +++ b/src/agents/tools/gateway-tool.ts @@ -157,7 +157,13 @@ export function createGatewayTool(opts?: { } : undefined; const extracted = extractDeliveryInfo(sessionKey); - const deliveryContext = liveContext ?? extracted.deliveryContext; + const deliveryContext = + liveContext != null + ? { + ...liveContext, + accountId: liveContext.accountId ?? extracted.deliveryContext?.accountId, + } + : extracted.deliveryContext; // Guard threadId with the same session check as deliveryContext. When // targeting another session, opts.agentThreadId belongs to the current // session's thread and must not be written into the sentinel — it would diff --git a/src/gateway/server-methods/config.ts b/src/gateway/server-methods/config.ts index 7c9e4cf95a0..e3300d02045 100644 --- a/src/gateway/server-methods/config.ts +++ b/src/gateway/server-methods/config.ts @@ -205,7 +205,13 @@ function resolveConfigRestartRequest(params: unknown): { const { deliveryContext: extractedDeliveryContext, threadId: extractedThreadId } = extractDeliveryInfo(sessionKey); const paramsDeliveryContext = parseDeliveryContextFromParams(params); - const deliveryContext = paramsDeliveryContext ?? extractedDeliveryContext; + const deliveryContext = + paramsDeliveryContext != null + ? { + ...paramsDeliveryContext, + accountId: paramsDeliveryContext.accountId ?? extractedDeliveryContext?.accountId, + } + : extractedDeliveryContext; const threadId = paramsDeliveryContext?.threadId ?? extractedThreadId; return { From 93f4a3a7f76c9d3cbba3be9d827f85252d9eecd6 Mon Sep 17 00:00:00 2001 From: Bryan Marty Date: Tue, 10 Mar 2026 01:52:53 +0000 Subject: [PATCH 14/25] test: expand regression coverage for restart sentinel and delivery context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Grows test count from 21 → 71 across three test files. Each suite now covers the full expected-behavior matrix to catch regressions. gateway-tool.test.ts (10 → 29 tests) - RPC delivery context suite: happy-path full context forwarding, agentAccountId/agentThreadId included, all three write actions (config.apply, config.patch, update.run), partial context suppression (missing channel or to), empty-string guards, stale heartbeat override, same-session aliases ('main' canonicalization), cross-session / cross-agent suppression, remote gateway suppression (explicit URL + gateway.mode=remote), local loopback forwarding, undefined (default local) forwarding - Restart sentinel suite: live context used/suppressed, heartbeat override, threadId included / excluded on cross-session, cross-agent 'main' alias, kind/status/sessionKey fields on the payload server-restart-sentinel.test.ts (4 → 16 tests) - Two-step delivery+resume: ordering assertion, senderIsOwner=false guard, no-op when no sentinel file - Fallback suite: no sessionKey, unresolvable target, missing channel/to, agentCommand throws (notice already delivered), deliverOutboundPayloads throws (resume still runs) - Thread routing: Slack replyToId mapping, non-Slack threadId passthrough, agentCommand receives threadId, sentinel threadId beats session-derived - Context priority: sentinel beats stale heartbeat store, session-store fallback when sentinel has no deliveryContext restart-request.test.ts (7 → 26 tests) - Reject absent/null/non-object deliveryContext - Reject partial contexts (channel-only, to-only, empty strings, whitespace) - Reject non-string field types (number, boolean) - Accept full contexts with all combinations of optional fields - Whitespace trimming for all four fields - undefined returned for empty-after-trim optional fields - Extra/unknown fields are ignored --- src/agents/tools/gateway-tool.test.ts | 530 +++++++++++------- .../server-methods/restart-request.test.ts | 163 +++++- src/gateway/server-restart-sentinel.test.ts | 244 +++++++- 3 files changed, 719 insertions(+), 218 deletions(-) diff --git a/src/agents/tools/gateway-tool.test.ts b/src/agents/tools/gateway-tool.test.ts index 2e3cab0f520..5ea062a7ed5 100644 --- a/src/agents/tools/gateway-tool.test.ts +++ b/src/agents/tools/gateway-tool.test.ts @@ -1,4 +1,4 @@ -import { describe, expect, it, vi } from "vitest"; +import { beforeEach, describe, expect, it, vi } from "vitest"; const mocks = vi.hoisted(() => ({ isRestartEnabled: vi.fn(() => true), @@ -52,46 +52,54 @@ function getCallArg(mockFn: { mock: { calls: unknown[] } }, callIdx: number, return calls[callIdx]?.[argIdx] as T; } -describe("createGatewayTool – live delivery context guard", () => { - it("does not forward liveDeliveryContextForRpc when agentTo is missing", async () => { - mocks.callGatewayTool.mockClear(); - const tool = createGatewayTool({ - agentSessionKey: "agent:main:main", - agentChannel: "discord", - agentTo: undefined, // intentionally missing - }); +// ───────────────────────────────────────────────────────────────────────────── +// Helpers to build common test fixtures +// ───────────────────────────────────────────────────────────────────────────── - await execTool(tool, { - action: "config.patch", - raw: '{"key":"value"}', - baseHash: "abc123", - sessionKey: "agent:main:main", - note: "test patch", - }); +function makePatchParams(overrides: Record = {}) { + return { + action: "config.patch", + raw: '{"key":"value"}', + baseHash: "abc123", + sessionKey: "agent:main:main", + note: "test patch", + ...overrides, + }; +} - const forwardedParams = getCallArg>(mocks.callGatewayTool, 0, 2); - // deliveryContext should be undefined — falling back to server-side extractDeliveryInfo - expect(forwardedParams?.deliveryContext).toBeUndefined(); +function makeTool( + opts: { + agentSessionKey?: string; + agentChannel?: string; + agentTo?: string; + agentThreadId?: string; + agentAccountId?: string; + } = {}, +) { + return createGatewayTool({ + agentSessionKey: "agent:main:main", + agentChannel: "discord", + agentTo: "123456789", + ...opts, + }); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Suite 1 — Live delivery context for RPC actions (config.apply / config.patch / update.run) +// ───────────────────────────────────────────────────────────────────────────── + +describe("createGatewayTool – RPC delivery context forwarding", () => { + beforeEach(() => { + vi.clearAllMocks(); }); - it("forwards liveDeliveryContextForRpc when both agentChannel and agentTo are present", async () => { - mocks.callGatewayTool.mockClear(); - const tool = createGatewayTool({ - agentSessionKey: "agent:main:main", - agentChannel: "discord", - agentTo: "123456789", - }); + // ── Happy path: full live context forwarded ────────────────────────────── - await execTool(tool, { - action: "config.patch", - raw: '{"key":"value"}', - baseHash: "abc123", - sessionKey: "agent:main:main", - note: "test patch", - }); + it("forwards liveDeliveryContext when agentChannel and agentTo are both present", async () => { + await execTool(makeTool(), makePatchParams()); - const forwardedParams = getCallArg>(mocks.callGatewayTool, 0, 2); - expect(forwardedParams?.deliveryContext).toEqual({ + const p = getCallArg>(mocks.callGatewayTool, 0, 2); + expect(p?.deliveryContext).toEqual({ channel: "discord", to: "123456789", accountId: undefined, @@ -99,207 +107,343 @@ describe("createGatewayTool – live delivery context guard", () => { }); }); - it("includes threadId in liveDeliveryContextForRpc when agentThreadId is present", async () => { - mocks.callGatewayTool.mockClear(); - const tool = createGatewayTool({ - agentSessionKey: "agent:main:main", - agentChannel: "slack", - agentTo: "C012AB3CD", - agentThreadId: "1234567890.123456", - }); + it("includes agentAccountId in forwarded context when provided", async () => { + await execTool(makeTool({ agentAccountId: "acct-99" }), makePatchParams()); - await execTool(tool, { - action: "config.patch", + const p = getCallArg>(mocks.callGatewayTool, 0, 2); + expect((p?.deliveryContext as Record)?.accountId).toBe("acct-99"); + }); + + it("includes agentThreadId in forwarded context when provided", async () => { + await execTool( + makeTool({ agentChannel: "slack", agentTo: "C012AB3CD", agentThreadId: "1234567890.123" }), + makePatchParams({ sessionKey: "agent:main:main" }), + ); + + const p = getCallArg>(mocks.callGatewayTool, 0, 2); + expect((p?.deliveryContext as Record)?.threadId).toBe("1234567890.123"); + expect((p?.deliveryContext as Record)?.channel).toBe("slack"); + }); + + it("forwards live context for config.apply as well as config.patch", async () => { + await execTool(makeTool(), { + action: "config.apply", raw: '{"key":"value"}', baseHash: "abc123", sessionKey: "agent:main:main", - note: "test patch", }); - const forwardedParams = getCallArg>(mocks.callGatewayTool, 0, 2); - expect(forwardedParams?.deliveryContext).toEqual({ - channel: "slack", - to: "C012AB3CD", - accountId: undefined, - threadId: "1234567890.123456", - }); + const p = getCallArg>(mocks.callGatewayTool, 0, 2); + expect(p?.deliveryContext).toBeDefined(); + expect((p?.deliveryContext as Record)?.channel).toBe("discord"); }); - it("does not forward live restart context when agentTo is missing", async () => { - mocks.writeRestartSentinel.mockClear(); + it("forwards live context for update.run", async () => { + await execTool(makeTool(), { + action: "update.run", + sessionKey: "agent:main:main", + }); + + const p = getCallArg>(mocks.callGatewayTool, 0, 2); + expect(p?.deliveryContext).toBeDefined(); + expect((p?.deliveryContext as Record)?.channel).toBe("discord"); + }); + + // ── Partial live context — must be suppressed ──────────────────────────── + + it("suppresses deliveryContext when agentTo is missing", async () => { + await execTool(makeTool({ agentTo: undefined }), makePatchParams()); + + const p = getCallArg>(mocks.callGatewayTool, 0, 2); + expect(p?.deliveryContext).toBeUndefined(); + }); + + it("suppresses deliveryContext when agentChannel is missing", async () => { + await execTool(makeTool({ agentChannel: undefined }), makePatchParams()); + + const p = getCallArg>(mocks.callGatewayTool, 0, 2); + expect(p?.deliveryContext).toBeUndefined(); + }); + + it("suppresses deliveryContext when agentChannel is an empty string", async () => { + await execTool(makeTool({ agentChannel: "" }), makePatchParams()); + + const p = getCallArg>(mocks.callGatewayTool, 0, 2); + expect(p?.deliveryContext).toBeUndefined(); + }); + + it("falls back to server extractDeliveryInfo when live context is suppressed", async () => { + // Confirm the RPC call still goes through — server side will use extractDeliveryInfo + await execTool(makeTool({ agentTo: undefined }), makePatchParams()); + + expect(mocks.callGatewayTool).toHaveBeenCalled(); + const p = getCallArg>(mocks.callGatewayTool, 0, 2); + expect(p?.deliveryContext).toBeUndefined(); + }); + + // ── Stale heartbeat override prevention ───────────────────────────────── + + it("overrides stale heartbeat deliveryContext from extractDeliveryInfo with live context", async () => { + // extractDeliveryInfo returning heartbeat sink — must not win over live context mocks.extractDeliveryInfo.mockReturnValueOnce({ - deliveryContext: { channel: "telegram", to: "+19995550001", accountId: undefined }, + deliveryContext: { channel: "webchat", to: "heartbeat", accountId: undefined }, threadId: undefined, }); - const tool = createGatewayTool({ - agentSessionKey: "agent:main:main", - agentChannel: "discord", - agentTo: undefined, // intentionally missing - }); + await execTool(makeTool(), makePatchParams()); - await execTool(tool, { action: "restart" }); - - const sentinelPayload = getCallArg<{ deliveryContext?: { channel?: string; to?: string } }>( - mocks.writeRestartSentinel, - 0, - 0, - ); - // Should fall back to extractDeliveryInfo() result, not the incomplete live context - expect(sentinelPayload?.deliveryContext?.channel).toBe("telegram"); - expect(sentinelPayload?.deliveryContext?.to).toBe("+19995550001"); + const p = getCallArg>(mocks.callGatewayTool, 0, 2); + expect((p?.deliveryContext as Record)?.channel).toBe("discord"); + expect((p?.deliveryContext as Record)?.to).toBe("123456789"); }); - it("uses live restart context when both agentChannel and agentTo are present", async () => { - mocks.writeRestartSentinel.mockClear(); + // ── Session key targeting: same-session ───────────────────────────────── - const tool = createGatewayTool({ - agentSessionKey: "agent:main:main", - agentChannel: "discord", - agentTo: "123456789", - }); - - await execTool(tool, { action: "restart" }); - - const sentinelPayload = getCallArg<{ deliveryContext?: { channel?: string; to?: string } }>( - mocks.writeRestartSentinel, - 0, - 0, + it("forwards live context when sessionKey matches own session key exactly", async () => { + await execTool( + makeTool({ agentSessionKey: "agent:main:main" }), + makePatchParams({ sessionKey: "agent:main:main" }), ); - expect(sentinelPayload?.deliveryContext?.channel).toBe("discord"); - expect(sentinelPayload?.deliveryContext?.to).toBe("123456789"); + + const p = getCallArg>(mocks.callGatewayTool, 0, 2); + expect(p?.deliveryContext).toBeDefined(); }); - it("does not forward live RPC delivery context when gateway.mode=remote is configured (no URL override)", async () => { - // When gateway.mode=remote is set in config, callGatewayTool() routes to - // gateway.remote.url without an explicit gatewayUrl param. resolveGatewayTarget - // must return "remote" in this case so deliveryContext is suppressed, preventing - // the remote sentinel from being stamped with the local chat route. - mocks.callGatewayTool.mockClear(); - // No gatewayUrl override — config-based remote mode + it("forwards live context when 'main' alias resolves to own default-agent session", async () => { + // agentSessionKey is "agent:main:main"; sessionKey "main" should canonicalize to the same + await execTool( + makeTool({ agentSessionKey: "agent:main:main" }), + makePatchParams({ sessionKey: "main" }), + ); + + const p = getCallArg>(mocks.callGatewayTool, 0, 2); + expect(p?.deliveryContext).toBeDefined(); + expect((p?.deliveryContext as Record)?.channel).toBe("discord"); + }); + + it("forwards live context when sessionKey is omitted (defaults to own session)", async () => { + await execTool(makeTool(), makePatchParams({ sessionKey: undefined })); + + const p = getCallArg>(mocks.callGatewayTool, 0, 2); + expect(p?.deliveryContext).toBeDefined(); + }); + + // ── Session key targeting: cross-session / cross-agent ─────────────────── + + it("suppresses deliveryContext when sessionKey targets a different session", async () => { + await execTool( + makeTool({ agentSessionKey: "agent:main:main" }), + makePatchParams({ sessionKey: "agent:other-claw:main" }), + ); + + const p = getCallArg>(mocks.callGatewayTool, 0, 2); + expect(p?.deliveryContext).toBeUndefined(); + }); + + it("suppresses deliveryContext when non-default agent passes sessionKey='main' (cross-agent alias)", async () => { + // "main" resolves to "agent:main:main" (default), not "agent:shopping-claw:main" + await execTool( + makeTool({ agentSessionKey: "agent:shopping-claw:main" }), + makePatchParams({ sessionKey: "main" }), + ); + + const p = getCallArg>(mocks.callGatewayTool, 0, 2); + expect(p?.deliveryContext).toBeUndefined(); + }); + + // ── Remote gateway targeting ───────────────────────────────────────────── + + it("suppresses deliveryContext when resolveGatewayTarget returns 'remote' (explicit URL)", async () => { + mocks.readGatewayCallOptions.mockReturnValueOnce({ gatewayUrl: "wss://remote.example.com" }); + mocks.resolveGatewayTarget.mockReturnValueOnce("remote"); + + await execTool( + makeTool(), + makePatchParams({ gatewayUrl: "wss://remote.example.com", sessionKey: "agent:main:main" }), + ); + + const p = getCallArg>(mocks.callGatewayTool, 0, 2); + expect(p?.deliveryContext).toBeUndefined(); + }); + + it("suppresses deliveryContext when resolveGatewayTarget returns 'remote' (config gateway.mode=remote)", async () => { mocks.readGatewayCallOptions.mockReturnValueOnce({}); mocks.resolveGatewayTarget.mockReturnValueOnce("remote"); - const tool = createGatewayTool({ - agentSessionKey: "agent:main:main", - agentChannel: "discord", - agentTo: "123456789", - }); - await execTool(tool, { - action: "config.patch", - raw: '{"key":"value"}', - baseHash: "abc123", - sessionKey: "agent:main:main", - note: "config-remote patch (no URL override)", - }); + await execTool(makeTool(), makePatchParams({ sessionKey: "agent:main:main" })); - const forwardedParams = getCallArg>(mocks.callGatewayTool, 0, 2); - expect(forwardedParams?.deliveryContext).toBeUndefined(); + const p = getCallArg>(mocks.callGatewayTool, 0, 2); + expect(p?.deliveryContext).toBeUndefined(); }); - it("does not forward live RPC delivery context when gatewayUrl targets a remote gateway", async () => { - // A remote gateway has its own extractDeliveryInfo(sessionKey) — forwarding - // the local agent's deliveryContext would write a sentinel with the wrong - // destination on the remote host. - mocks.callGatewayTool.mockClear(); - mocks.readGatewayCallOptions.mockReturnValueOnce({ gatewayUrl: "wss://remote-gw.example.com" }); - mocks.resolveGatewayTarget.mockReturnValueOnce("remote"); - const tool = createGatewayTool({ - agentSessionKey: "agent:main:main", - agentChannel: "discord", - agentTo: "123456789", - }); - - await execTool(tool, { - action: "config.patch", - raw: '{"key":"value"}', - baseHash: "abc123", - sessionKey: "agent:main:main", - note: "remote patch", - gatewayUrl: "wss://remote-gw.example.com", - }); - - const forwardedParams = getCallArg>(mocks.callGatewayTool, 0, 2); - expect(forwardedParams?.deliveryContext).toBeUndefined(); - }); - - it("forwards live RPC delivery context when gatewayUrl is a local loopback override", async () => { - // A gatewayUrl pointing to 127.0.0.1/localhost/[::1] is still the local server; - // deliveryContext must be forwarded so restart sentinels use the correct chat destination. - mocks.callGatewayTool.mockClear(); - mocks.readGatewayCallOptions.mockReturnValueOnce({ - gatewayUrl: "ws://127.0.0.1:18789", - }); + it("forwards deliveryContext when resolveGatewayTarget returns 'local' (loopback URL)", async () => { + mocks.readGatewayCallOptions.mockReturnValueOnce({ gatewayUrl: "ws://127.0.0.1:18789" }); mocks.resolveGatewayTarget.mockReturnValueOnce("local"); - const tool = createGatewayTool({ - agentSessionKey: "agent:main:main", - agentChannel: "discord", - agentTo: "123456789", - }); - await execTool(tool, { - action: "config.patch", - raw: '{"key":"value"}', - baseHash: "abc123", - sessionKey: "agent:main:main", - note: "local loopback patch", - gatewayUrl: "ws://127.0.0.1:18789", - }); + await execTool( + makeTool(), + makePatchParams({ gatewayUrl: "ws://127.0.0.1:18789", sessionKey: "agent:main:main" }), + ); - const forwardedParams = getCallArg>(mocks.callGatewayTool, 0, 2); - expect(forwardedParams?.deliveryContext).toEqual({ - channel: "discord", - to: "123456789", - accountId: undefined, - }); + const p = getCallArg>(mocks.callGatewayTool, 0, 2); + expect(p?.deliveryContext).toBeDefined(); + expect((p?.deliveryContext as Record)?.channel).toBe("discord"); }); - it("does not forward live RPC delivery context when a non-default agent passes sessionKey='main'", async () => { - // "main" resolves to "agent:main:main" (default agent), which differs from the - // current session "agent:shopping-claw:main". Live context must NOT be forwarded. - mocks.callGatewayTool.mockClear(); - const tool = createGatewayTool({ - agentSessionKey: "agent:shopping-claw:main", - agentChannel: "discord", - agentTo: "123456789", - }); + it("forwards deliveryContext when resolveGatewayTarget returns undefined (default local)", async () => { + mocks.resolveGatewayTarget.mockReturnValueOnce(undefined); - await execTool(tool, { - action: "config.patch", - raw: '{"key":"value"}', - baseHash: "abc123", - sessionKey: "main", // targets default agent — different from current shopping-claw session - note: "cross-agent patch", - }); + await execTool(makeTool(), makePatchParams({ sessionKey: "agent:main:main" })); - const forwardedParams = getCallArg>(mocks.callGatewayTool, 0, 2); - expect(forwardedParams?.deliveryContext).toBeUndefined(); + const p = getCallArg>(mocks.callGatewayTool, 0, 2); + expect(p?.deliveryContext).toBeDefined(); + }); +}); + +// ───────────────────────────────────────────────────────────────────────────── +// Suite 2 — Restart sentinel context (local restart action) +// ───────────────────────────────────────────────────────────────────────────── + +describe("createGatewayTool – restart sentinel delivery context", () => { + beforeEach(() => { + vi.clearAllMocks(); }); - it("does not forward live restart context when a non-default agent passes sessionKey='main'", async () => { - // "main" resolves to "agent:main:main" (default agent), which differs from the - // current session "agent:shopping-claw:main". Sentinel must not carry this session's thread. - mocks.writeRestartSentinel.mockClear(); + it("uses live context when both agentChannel and agentTo are present", async () => { + await execTool(makeTool(), { action: "restart" }); + + const p = getCallArg<{ deliveryContext?: Record }>( + mocks.writeRestartSentinel, + 0, + 0, + ); + expect(p?.deliveryContext?.channel).toBe("discord"); + expect(p?.deliveryContext?.to).toBe("123456789"); + }); + + it("falls back to extractDeliveryInfo when agentTo is missing", async () => { mocks.extractDeliveryInfo.mockReturnValueOnce({ deliveryContext: { channel: "telegram", to: "+19995550001", accountId: undefined }, threadId: undefined, }); - const tool = createGatewayTool({ - agentSessionKey: "agent:shopping-claw:main", - agentChannel: "discord", - agentTo: "123456789", - }); + await execTool(makeTool({ agentTo: undefined }), { action: "restart" }); - await execTool(tool, { action: "restart", sessionKey: "main" }); - - const sentinelPayload = getCallArg<{ deliveryContext?: { channel?: string; to?: string } }>( + const p = getCallArg<{ deliveryContext?: Record }>( mocks.writeRestartSentinel, 0, 0, ); - // Should fall back to extractDeliveryInfo() for the targeted session, not current session's live context - expect(sentinelPayload?.deliveryContext?.channel).toBe("telegram"); - expect(sentinelPayload?.deliveryContext?.to).toBe("+19995550001"); + expect(p?.deliveryContext?.channel).toBe("telegram"); + expect(p?.deliveryContext?.to).toBe("+19995550001"); + }); + + it("falls back to extractDeliveryInfo when agentChannel is missing", async () => { + mocks.extractDeliveryInfo.mockReturnValueOnce({ + deliveryContext: { channel: "whatsapp", to: "+10000000001", accountId: undefined }, + threadId: undefined, + }); + + await execTool(makeTool({ agentChannel: undefined }), { action: "restart" }); + + const p = getCallArg<{ deliveryContext?: Record }>( + mocks.writeRestartSentinel, + 0, + 0, + ); + expect(p?.deliveryContext?.channel).toBe("whatsapp"); + }); + + it("overrides stale heartbeat context from extractDeliveryInfo with live context", async () => { + mocks.extractDeliveryInfo.mockReturnValueOnce({ + deliveryContext: { channel: "webchat", to: "heartbeat", accountId: undefined }, + threadId: undefined, + }); + + await execTool(makeTool(), { action: "restart" }); + + const p = getCallArg<{ deliveryContext?: Record }>( + mocks.writeRestartSentinel, + 0, + 0, + ); + expect(p?.deliveryContext?.channel).toBe("discord"); + expect(p?.deliveryContext?.to).toBe("123456789"); + }); + + it("includes threadId in sentinel when agentThreadId is provided (same session)", async () => { + await execTool(makeTool({ agentThreadId: "ts.123456" }), { action: "restart" }); + + const p = getCallArg<{ threadId?: string }>(mocks.writeRestartSentinel, 0, 0); + expect(p?.threadId).toBe("ts.123456"); + }); + + it("uses extractDeliveryInfo threadId when targeting a different session", async () => { + mocks.extractDeliveryInfo.mockReturnValueOnce({ + deliveryContext: { channel: "telegram", to: "+19995550001", accountId: undefined }, + threadId: "extracted-thread", + }); + + await execTool(makeTool({ agentThreadId: "local-thread" }), { + action: "restart", + sessionKey: "agent:other-claw:main", + }); + + const p = getCallArg<{ threadId?: string }>(mocks.writeRestartSentinel, 0, 0); + expect(p?.threadId).toBe("extracted-thread"); + }); + + it("suppresses live context and uses extractDeliveryInfo when sessionKey targets another session", async () => { + mocks.extractDeliveryInfo.mockReturnValueOnce({ + deliveryContext: { channel: "signal", to: "+15550001", accountId: undefined }, + threadId: undefined, + }); + + await execTool(makeTool(), { action: "restart", sessionKey: "agent:other-agent:main" }); + + const p = getCallArg<{ deliveryContext?: Record }>( + mocks.writeRestartSentinel, + 0, + 0, + ); + expect(p?.deliveryContext?.channel).toBe("signal"); + expect(p?.deliveryContext?.to).toBe("+15550001"); + }); + + it("suppresses live context when non-default agent targets sessionKey='main' (cross-agent alias)", async () => { + mocks.extractDeliveryInfo.mockReturnValueOnce({ + deliveryContext: { channel: "telegram", to: "+19995550001", accountId: undefined }, + threadId: undefined, + }); + + await execTool(makeTool({ agentSessionKey: "agent:shopping-claw:main" }), { + action: "restart", + sessionKey: "main", // resolves to "agent:main:main" — different agent + }); + + const p = getCallArg<{ deliveryContext?: Record }>( + mocks.writeRestartSentinel, + 0, + 0, + ); + expect(p?.deliveryContext?.channel).toBe("telegram"); + expect(p?.deliveryContext?.to).toBe("+19995550001"); + }); + + it("sets status=ok and kind=restart on the sentinel payload", async () => { + await execTool(makeTool(), { action: "restart" }); + + const p = getCallArg<{ kind?: string; status?: string }>(mocks.writeRestartSentinel, 0, 0); + expect(p?.kind).toBe("restart"); + expect(p?.status).toBe("ok"); + }); + + it("includes sessionKey in sentinel payload", async () => { + await execTool(makeTool({ agentSessionKey: "agent:main:main" }), { + action: "restart", + }); + + const p = getCallArg<{ sessionKey?: string }>(mocks.writeRestartSentinel, 0, 0); + expect(p?.sessionKey).toBe("agent:main:main"); }); }); diff --git a/src/gateway/server-methods/restart-request.test.ts b/src/gateway/server-methods/restart-request.test.ts index 6273ee08815..41191d5a09c 100644 --- a/src/gateway/server-methods/restart-request.test.ts +++ b/src/gateway/server-methods/restart-request.test.ts @@ -1,27 +1,89 @@ import { describe, expect, it } from "vitest"; import { parseDeliveryContextFromParams } from "./restart-request.js"; +// ───────────────────────────────────────────────────────────────────────────── +// parseDeliveryContextFromParams +// Validates that only complete, routable delivery contexts are accepted +// and that partial or malformed inputs are rejected. +// ───────────────────────────────────────────────────────────────────────────── + describe("parseDeliveryContextFromParams", () => { + // ── No context present ──────────────────────────────────────────────────── + it("returns undefined when deliveryContext is absent", () => { expect(parseDeliveryContextFromParams({})).toBeUndefined(); }); - it("returns undefined when both channel and to are missing", () => { + it("returns undefined when deliveryContext is null", () => { + expect(parseDeliveryContextFromParams({ deliveryContext: null })).toBeUndefined(); + }); + + it("returns undefined when deliveryContext is a non-object (string)", () => { + expect(parseDeliveryContextFromParams({ deliveryContext: "discord" })).toBeUndefined(); + }); + + it("returns undefined when deliveryContext is a non-object (number)", () => { + expect(parseDeliveryContextFromParams({ deliveryContext: 42 })).toBeUndefined(); + }); + + // ── Partial context — must be rejected (prevents routing ambiguity) ─────── + + it("returns undefined when both channel and to are absent", () => { expect(parseDeliveryContextFromParams({ deliveryContext: {} })).toBeUndefined(); }); - it("returns undefined when only channel is present (partial context rejected)", () => { + it("returns undefined when only channel is present (partial context)", () => { expect( parseDeliveryContextFromParams({ deliveryContext: { channel: "discord" } }), ).toBeUndefined(); }); - it("returns undefined when only to is present (partial context rejected)", () => { + it("returns undefined when only to is present (partial context)", () => { expect( parseDeliveryContextFromParams({ deliveryContext: { to: "123456789" } }), ).toBeUndefined(); }); + it("returns undefined when channel is present but to is an empty string", () => { + expect( + parseDeliveryContextFromParams({ deliveryContext: { channel: "discord", to: "" } }), + ).toBeUndefined(); + }); + + it("returns undefined when to is present but channel is an empty string", () => { + expect( + parseDeliveryContextFromParams({ deliveryContext: { channel: "", to: "123456789" } }), + ).toBeUndefined(); + }); + + it("returns undefined when channel is whitespace-only", () => { + expect( + parseDeliveryContextFromParams({ deliveryContext: { channel: " ", to: "123456789" } }), + ).toBeUndefined(); + }); + + it("returns undefined when to is whitespace-only", () => { + expect( + parseDeliveryContextFromParams({ deliveryContext: { channel: "discord", to: " " } }), + ).toBeUndefined(); + }); + + // ── Non-string field types ──────────────────────────────────────────────── + + it("returns undefined when channel is a number (type coercion not allowed)", () => { + expect( + parseDeliveryContextFromParams({ deliveryContext: { channel: 42, to: "123" } }), + ).toBeUndefined(); + }); + + it("returns undefined when to is a boolean", () => { + expect( + parseDeliveryContextFromParams({ deliveryContext: { channel: "discord", to: true } }), + ).toBeUndefined(); + }); + + // ── Complete context ────────────────────────────────────────────────────── + it("returns full context when both channel and to are present", () => { expect( parseDeliveryContextFromParams({ @@ -30,7 +92,21 @@ describe("parseDeliveryContextFromParams", () => { ).toEqual({ channel: "discord", to: "123456789", accountId: undefined, threadId: undefined }); }); - it("includes accountId and threadId when present", () => { + it("includes accountId when present", () => { + const result = parseDeliveryContextFromParams({ + deliveryContext: { channel: "discord", to: "123456789", accountId: "acct-1" }, + }); + expect(result?.accountId).toBe("acct-1"); + }); + + it("includes threadId when present", () => { + const result = parseDeliveryContextFromParams({ + deliveryContext: { channel: "slack", to: "C012AB3CD", threadId: "1234567890.123456" }, + }); + expect(result?.threadId).toBe("1234567890.123456"); + }); + + it("includes all four fields when all are present", () => { expect( parseDeliveryContextFromParams({ deliveryContext: { @@ -48,11 +124,84 @@ describe("parseDeliveryContextFromParams", () => { }); }); - it("trims whitespace from all string fields", () => { + // ── Whitespace trimming ─────────────────────────────────────────────────── + + it("trims leading/trailing whitespace from channel", () => { + const result = parseDeliveryContextFromParams({ + deliveryContext: { channel: " discord ", to: "123456789" }, + }); + expect(result?.channel).toBe("discord"); + }); + + it("trims leading/trailing whitespace from to", () => { + const result = parseDeliveryContextFromParams({ + deliveryContext: { channel: "discord", to: " 123456789 " }, + }); + expect(result?.to).toBe("123456789"); + }); + + it("trims leading/trailing whitespace from threadId", () => { + const result = parseDeliveryContextFromParams({ + deliveryContext: { channel: "discord", to: "123", threadId: " ts.1 " }, + }); + expect(result?.threadId).toBe("ts.1"); + }); + + it("trims all string fields simultaneously", () => { expect( parseDeliveryContextFromParams({ - deliveryContext: { channel: " discord ", to: " 123 ", threadId: " ts.1 " }, + deliveryContext: { + channel: " discord ", + to: " 123 ", + accountId: " acct ", + threadId: " ts.1 ", + }, }), - ).toEqual({ channel: "discord", to: "123", accountId: undefined, threadId: "ts.1" }); + ).toEqual({ channel: "discord", to: "123", accountId: "acct", threadId: "ts.1" }); + }); + + // ── Optional fields absent / undefined ─────────────────────────────────── + + it("returns undefined for accountId when not provided", () => { + const result = parseDeliveryContextFromParams({ + deliveryContext: { channel: "discord", to: "123456789" }, + }); + expect(result?.accountId).toBeUndefined(); + }); + + it("returns undefined for threadId when not provided", () => { + const result = parseDeliveryContextFromParams({ + deliveryContext: { channel: "discord", to: "123456789" }, + }); + expect(result?.threadId).toBeUndefined(); + }); + + it("returns undefined for accountId when value is empty string after trim", () => { + const result = parseDeliveryContextFromParams({ + deliveryContext: { channel: "discord", to: "123456789", accountId: " " }, + }); + expect(result?.accountId).toBeUndefined(); + }); + + it("returns undefined for threadId when value is empty string after trim", () => { + const result = parseDeliveryContextFromParams({ + deliveryContext: { channel: "discord", to: "123456789", threadId: " " }, + }); + expect(result?.threadId).toBeUndefined(); + }); + + // ── Extra/unknown fields are ignored ───────────────────────────────────── + + it("ignores unknown extra fields in deliveryContext", () => { + const result = parseDeliveryContextFromParams({ + deliveryContext: { channel: "discord", to: "123456789", unknownField: "ignored" }, + }); + expect(result).toEqual({ + channel: "discord", + to: "123456789", + accountId: undefined, + threadId: undefined, + }); + expect((result as Record)?.unknownField).toBeUndefined(); }); }); diff --git a/src/gateway/server-restart-sentinel.test.ts b/src/gateway/server-restart-sentinel.test.ts index 24a00296189..99397d44677 100644 --- a/src/gateway/server-restart-sentinel.test.ts +++ b/src/gateway/server-restart-sentinel.test.ts @@ -93,15 +93,19 @@ vi.mock("../infra/system-events.js", () => ({ const { scheduleRestartSentinelWake } = await import("./server-restart-sentinel.js"); -describe("scheduleRestartSentinelWake", () => { +// ───────────────────────────────────────────────────────────────────────────── +// Suite 1 — Core two-step delivery + resume flow +// ───────────────────────────────────────────────────────────────────────────── + +describe("scheduleRestartSentinelWake – two-step delivery + resume", () => { beforeEach(() => { vi.clearAllMocks(); }); - it("delivers restart notice directly then resumes agent after restart", async () => { + it("delivers restart notice directly (model-independent) then resumes agent", async () => { await scheduleRestartSentinelWake({ deps: {} as never }); - // Step 1: deterministic delivery (model-independent) + // Step 1: deterministic delivery expect(mocks.deliverOutboundPayloads).toHaveBeenCalledWith( expect.objectContaining({ channel: "whatsapp", @@ -112,7 +116,7 @@ describe("scheduleRestartSentinelWake", () => { }), ); - // Step 2: agent resume turn + // Step 2: agent resume expect(mocks.agentCommand).toHaveBeenCalledWith( expect.objectContaining({ message: "restart message", @@ -128,26 +132,56 @@ describe("scheduleRestartSentinelWake", () => { {}, ); - // Verify delivery happened before resume - const deliverOrder = mocks.deliverOutboundPayloads.mock.invocationCallOrder[0]; - const agentOrder = mocks.agentCommand.mock.invocationCallOrder[0]; - expect(deliverOrder).toBeLessThan(agentOrder); - expect(mocks.enqueueSystemEvent).not.toHaveBeenCalled(); }); - it("falls back to enqueueSystemEvent when agentCommand throws", async () => { - mocks.agentCommand.mockRejectedValueOnce(new Error("agent failed")); + it("delivers notification before triggering agent resume (ordering guarantee)", async () => { + await scheduleRestartSentinelWake({ deps: {} as never }); + + const deliverOrder = mocks.deliverOutboundPayloads.mock.invocationCallOrder[0]; + const agentOrder = mocks.agentCommand.mock.invocationCallOrder[0]; + expect(deliverOrder).toBeLessThan(agentOrder); + }); + + it("passes senderIsOwner=false to agentCommand (no privilege escalation)", async () => { + await scheduleRestartSentinelWake({ deps: {} as never }); + + const opts = getArg>(mocks.agentCommand, 0); + expect(opts.senderIsOwner).toBe(false); + }); + + it("no-ops when there is no sentinel file", async () => { + mocks.consumeRestartSentinel.mockResolvedValueOnce(null); await scheduleRestartSentinelWake({ deps: {} as never }); - expect(mocks.enqueueSystemEvent).toHaveBeenCalledWith( - expect.stringContaining("restart summary"), - { sessionKey: "agent:main:main" }, - ); + expect(mocks.deliverOutboundPayloads).not.toHaveBeenCalled(); + expect(mocks.agentCommand).not.toHaveBeenCalled(); + expect(mocks.enqueueSystemEvent).not.toHaveBeenCalled(); + }); +}); + +// ───────────────────────────────────────────────────────────────────────────── +// Suite 2 — Fallback paths +// ───────────────────────────────────────────────────────────────────────────── + +describe("scheduleRestartSentinelWake – fallback to enqueueSystemEvent", () => { + beforeEach(() => { + vi.clearAllMocks(); }); - it("falls back to enqueueSystemEvent when channel cannot be resolved (no channel in origin)", async () => { + it("falls back to enqueueSystemEvent on main session key when sentinel has no sessionKey", async () => { + mocks.consumeRestartSentinel.mockResolvedValueOnce({ payload: { sessionKey: "" } } as never); + + await scheduleRestartSentinelWake({ deps: {} as never }); + + expect(mocks.enqueueSystemEvent).toHaveBeenCalledWith("restart message", { + sessionKey: "agent:main:main", + }); + expect(mocks.agentCommand).not.toHaveBeenCalled(); + }); + + it("falls back to enqueueSystemEvent when outbound target cannot be resolved", async () => { mocks.resolveOutboundTarget.mockReturnValueOnce({ ok: false, error: new Error("no-target"), @@ -161,13 +195,187 @@ describe("scheduleRestartSentinelWake", () => { }); }); - it("falls back to enqueueSystemEvent on main session key when sentinel has no sessionKey", async () => { - mocks.consumeRestartSentinel.mockResolvedValueOnce({ payload: { sessionKey: "" } } as never); + it("falls back to enqueueSystemEvent when channel is missing from merged delivery context", async () => { + // mergeDeliveryContext is called twice (inner + outer merge); mock the outer to drop channel + mocks.mergeDeliveryContext + .mockReturnValueOnce(undefined) // inner: sessionDeliveryContext merge + .mockReturnValueOnce({ to: "+15550002" }); // outer: sentinelContext wins, no channel await scheduleRestartSentinelWake({ deps: {} as never }); + expect(mocks.agentCommand).not.toHaveBeenCalled(); expect(mocks.enqueueSystemEvent).toHaveBeenCalledWith("restart message", { sessionKey: "agent:main:main", }); }); + + it("falls back to enqueueSystemEvent when to is missing from merged delivery context", async () => { + // Mock outer merge to return a context with no `to` + mocks.mergeDeliveryContext + .mockReturnValueOnce(undefined) + .mockReturnValueOnce({ channel: "whatsapp" }); + + await scheduleRestartSentinelWake({ deps: {} as never }); + + expect(mocks.agentCommand).not.toHaveBeenCalled(); + expect(mocks.enqueueSystemEvent).toHaveBeenCalledWith("restart message", { + sessionKey: "agent:main:main", + }); + }); + + it("falls back to enqueueSystemEvent (with error) when agentCommand throws", async () => { + mocks.agentCommand.mockRejectedValueOnce(new Error("agent failed")); + + await scheduleRestartSentinelWake({ deps: {} as never }); + + // Direct delivery should still have been attempted + expect(mocks.deliverOutboundPayloads).toHaveBeenCalled(); + // Then fallback for the resume + expect(mocks.enqueueSystemEvent).toHaveBeenCalledWith( + expect.stringContaining("restart summary"), + { sessionKey: "agent:main:main" }, + ); + }); + + it("still calls agentCommand even if deliverOutboundPayloads throws (bestEffort guard)", async () => { + // bestEffort: true means it shouldn't normally throw, but even if it does, resume continues + mocks.deliverOutboundPayloads.mockRejectedValueOnce(new Error("deliver failed")); + + await scheduleRestartSentinelWake({ deps: {} as never }); + + expect(mocks.agentCommand).toHaveBeenCalled(); + }); }); + +// ───────────────────────────────────────────────────────────────────────────── +// Suite 3 — Thread routing (Slack vs non-Slack) +// ───────────────────────────────────────────────────────────────────────────── + +describe("scheduleRestartSentinelWake – thread routing", () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it("maps threadId to replyToId and clears threadId for Slack deliverOutboundPayloads", async () => { + mocks.consumeRestartSentinel.mockResolvedValueOnce({ + payload: { + sessionKey: "agent:main:main", + deliveryContext: { channel: "slack", to: "C012AB3CD", accountId: undefined }, + threadId: "1234567890.123456", + }, + }); + mocks.normalizeChannelId.mockReturnValueOnce("slack"); + + await scheduleRestartSentinelWake({ deps: {} as never }); + + const deliverArgs = getArg>(mocks.deliverOutboundPayloads, 0); + expect(deliverArgs.replyToId).toBe("1234567890.123456"); + expect(deliverArgs.threadId).toBeUndefined(); + }); + + it("passes threadId directly (not replyToId) for non-Slack channels", async () => { + mocks.consumeRestartSentinel.mockResolvedValueOnce({ + payload: { + sessionKey: "agent:main:main", + deliveryContext: { channel: "discord", to: "123456789", accountId: undefined }, + threadId: "discord-thread-id", + }, + }); + + await scheduleRestartSentinelWake({ deps: {} as never }); + + const deliverArgs = getArg>(mocks.deliverOutboundPayloads, 0); + expect(deliverArgs.threadId).toBe("discord-thread-id"); + expect(deliverArgs.replyToId).toBeUndefined(); + }); + + it("passes threadId to agentCommand for non-Slack threading", async () => { + mocks.consumeRestartSentinel.mockResolvedValueOnce({ + payload: { + sessionKey: "agent:main:main", + deliveryContext: { channel: "discord", to: "123456789", accountId: undefined }, + threadId: "discord-thread-id", + }, + }); + + await scheduleRestartSentinelWake({ deps: {} as never }); + + const agentOpts = getArg>(mocks.agentCommand, 0); + expect(agentOpts.threadId).toBe("discord-thread-id"); + }); + + it("sentinel payload threadId takes precedence over session-derived threadId", async () => { + mocks.consumeRestartSentinel.mockResolvedValueOnce({ + payload: { + sessionKey: "agent:main:main", + deliveryContext: { channel: "whatsapp", to: "+15550002" }, + threadId: "sentinel-thread", + }, + }); + // parseSessionThreadInfo would derive a different threadId from the session key + mocks.parseSessionThreadInfo.mockReturnValueOnce({ + baseSessionKey: null, + threadId: "session-thread", + }); + + await scheduleRestartSentinelWake({ deps: {} as never }); + + const agentOpts = getArg>(mocks.agentCommand, 0); + expect(agentOpts.threadId).toBe("sentinel-thread"); + }); +}); + +// ───────────────────────────────────────────────────────────────────────────── +// Suite 4 — Delivery context priority: sentinel > session store > parsed target +// ───────────────────────────────────────────────────────────────────────────── + +describe("scheduleRestartSentinelWake – delivery context priority", () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it("prefers sentinel deliveryContext over session store (handles heartbeat-overwritten store)", async () => { + // Session store has been overwritten with heartbeat sink + mocks.deliveryContextFromSession.mockReturnValueOnce({ + channel: "webchat", + to: "heartbeat", + }); + + await scheduleRestartSentinelWake({ deps: {} as never }); + + // agentCommand should use the sentinel's whatsapp/+15550002, not webchat/heartbeat + const agentOpts = getArg>(mocks.agentCommand, 0); + expect(agentOpts.channel).toBe("whatsapp"); + expect(agentOpts.to).toBe("+15550002"); + }); + + it("falls back to session store when sentinel has no deliveryContext", async () => { + mocks.consumeRestartSentinel.mockResolvedValueOnce({ + payload: { sessionKey: "agent:main:main" }, // no deliveryContext + }); + mocks.deliveryContextFromSession.mockReturnValueOnce({ + channel: "telegram", + to: "+19990001", + }); + // Mock both merge calls: inner produces session ctx; outer passes it through + mocks.mergeDeliveryContext + .mockReturnValueOnce({ channel: "telegram", to: "+19990001" }) // inner + .mockReturnValueOnce({ channel: "telegram", to: "+19990001" }); // outer + // resolveOutboundTarget must reflect the session-store to value + mocks.resolveOutboundTarget.mockReturnValueOnce({ ok: true as const, to: "+19990001" }); + + await scheduleRestartSentinelWake({ deps: {} as never }); + + const agentOpts = getArg>(mocks.agentCommand, 0); + expect(agentOpts.channel).toBe("telegram"); + expect(agentOpts.to).toBe("+19990001"); + }); +}); + +// ───────────────────────────────────────────────────────────────────────────── +// Helpers +// ───────────────────────────────────────────────────────────────────────────── + +function getArg(mockFn: { mock: { calls: unknown[][] } }, argIdx: number): T { + return mockFn.mock.calls[0]?.[argIdx] as T; +} From 874b906b3d66a072ac8caa2b2f4a9d5cd10d13ae Mon Sep 17 00:00:00 2001 From: Bryan Marty Date: Tue, 10 Mar 2026 02:05:32 +0000 Subject: [PATCH 15/25] fix: merge accountId fallback from extractDeliveryInfo in update.run handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a caller forwards live channel/to via deliveryContext but omits accountId (e.g. /tools/invoke without x-openclaw-account-id), the update.run handler was using paramsDeliveryContext as-is, dropping any account binding that existed in the session store. The restart sentinel would then be written without accountId, causing scheduleRestartSentinelWake to deliver using the channel default account — misrouting in multi-account setups. Apply the same accountId fallback pattern that config.ts already uses: when paramsDeliveryContext is present but its accountId is undefined, merge in extractedDeliveryContext.accountId as fallback. See #18612. --- src/gateway/server-methods/update.ts | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/gateway/server-methods/update.ts b/src/gateway/server-methods/update.ts index 9fdd1c4af1b..ab9d72b69f6 100644 --- a/src/gateway/server-methods/update.ts +++ b/src/gateway/server-methods/update.ts @@ -28,7 +28,17 @@ export const updateHandlers: GatewayRequestHandlers = { const { deliveryContext: extractedDeliveryContext, threadId: extractedThreadId } = extractDeliveryInfo(sessionKey); const paramsDeliveryContext = parseDeliveryContextFromParams(params); - const deliveryContext = paramsDeliveryContext ?? extractedDeliveryContext; + // When live channel/to is present but accountId is missing (e.g. /tools/invoke without + // x-openclaw-account-id), fall back to the session-extracted account so the sentinel is + // not written without account context — which would cause deliveries to use the channel + // default account and misroute in multi-account setups. See config.ts for the same pattern. + const deliveryContext = + paramsDeliveryContext != null + ? { + ...paramsDeliveryContext, + accountId: paramsDeliveryContext.accountId ?? extractedDeliveryContext?.accountId, + } + : extractedDeliveryContext; const threadId = paramsDeliveryContext?.threadId ?? extractedThreadId; const timeoutMsRaw = (params as { timeoutMs?: unknown }).timeoutMs; const timeoutMs = From f7132aeb794ad2d8384220f1f0b778a094a0f67f Mon Sep 17 00:00:00 2001 From: Bryan Marty Date: Tue, 10 Mar 2026 02:59:07 +0000 Subject: [PATCH 16/25] fix(tests): correct mock types to satisfy tsgo strict checking Vitest infers mock return types from the initial factory function, causing tsgo to reject mockReturnValueOnce/mockResolvedValueOnce calls that pass types incompatible with the inferred return. Fix by: - Widening resolveGatewayTarget mock to () => 'local' | 'remote' | undefined - Widening extractDeliveryInfo mock threadId/accountId to string | undefined - Using 'as never' on mockReturnValueOnce/mockResolvedValueOnce overrides that pass edge-case values (null sentinel, undefined merge results, partial payload objects) that intentionally don't match the strict type All 71 tests still pass. --- src/agents/tools/gateway-tool.test.ts | 10 ++++-- src/gateway/server-restart-sentinel.test.ts | 38 ++++++++++----------- 2 files changed, 26 insertions(+), 22 deletions(-) diff --git a/src/agents/tools/gateway-tool.test.ts b/src/agents/tools/gateway-tool.test.ts index 5ea062a7ed5..7ecc8f6ca4f 100644 --- a/src/agents/tools/gateway-tool.test.ts +++ b/src/agents/tools/gateway-tool.test.ts @@ -4,15 +4,19 @@ const mocks = vi.hoisted(() => ({ isRestartEnabled: vi.fn(() => true), resolveConfigSnapshotHash: vi.fn(() => undefined), extractDeliveryInfo: vi.fn(() => ({ - deliveryContext: { channel: "telegram", to: "+19995550001", accountId: undefined }, - threadId: undefined, + deliveryContext: { + channel: "telegram", + to: "+19995550001", + accountId: undefined as string | undefined, + }, + threadId: undefined as string | undefined, })), writeRestartSentinel: vi.fn(async () => undefined), scheduleGatewaySigusr1Restart: vi.fn(() => ({ ok: true })), formatDoctorNonInteractiveHint: vi.fn(() => ""), callGatewayTool: vi.fn(async () => ({})), readGatewayCallOptions: vi.fn(() => ({})), - resolveGatewayTarget: vi.fn(() => undefined), + resolveGatewayTarget: vi.fn((): "local" | "remote" | undefined => undefined), })); vi.mock("../../config/commands.js", () => ({ isRestartEnabled: mocks.isRestartEnabled })); diff --git a/src/gateway/server-restart-sentinel.test.ts b/src/gateway/server-restart-sentinel.test.ts index 99397d44677..a50ba486d41 100644 --- a/src/gateway/server-restart-sentinel.test.ts +++ b/src/gateway/server-restart-sentinel.test.ts @@ -151,7 +151,7 @@ describe("scheduleRestartSentinelWake – two-step delivery + resume", () => { }); it("no-ops when there is no sentinel file", async () => { - mocks.consumeRestartSentinel.mockResolvedValueOnce(null); + mocks.consumeRestartSentinel.mockResolvedValueOnce(null as never); await scheduleRestartSentinelWake({ deps: {} as never }); @@ -198,8 +198,8 @@ describe("scheduleRestartSentinelWake – fallback to enqueueSystemEvent", () => it("falls back to enqueueSystemEvent when channel is missing from merged delivery context", async () => { // mergeDeliveryContext is called twice (inner + outer merge); mock the outer to drop channel mocks.mergeDeliveryContext - .mockReturnValueOnce(undefined) // inner: sessionDeliveryContext merge - .mockReturnValueOnce({ to: "+15550002" }); // outer: sentinelContext wins, no channel + .mockReturnValueOnce(undefined as never) // inner: sessionDeliveryContext merge + .mockReturnValueOnce({ to: "+15550002" } as never); // outer: sentinelContext wins, no channel await scheduleRestartSentinelWake({ deps: {} as never }); @@ -212,8 +212,8 @@ describe("scheduleRestartSentinelWake – fallback to enqueueSystemEvent", () => it("falls back to enqueueSystemEvent when to is missing from merged delivery context", async () => { // Mock outer merge to return a context with no `to` mocks.mergeDeliveryContext - .mockReturnValueOnce(undefined) - .mockReturnValueOnce({ channel: "whatsapp" }); + .mockReturnValueOnce(undefined as never) + .mockReturnValueOnce({ channel: "whatsapp" } as never); await scheduleRestartSentinelWake({ deps: {} as never }); @@ -260,10 +260,10 @@ describe("scheduleRestartSentinelWake – thread routing", () => { mocks.consumeRestartSentinel.mockResolvedValueOnce({ payload: { sessionKey: "agent:main:main", - deliveryContext: { channel: "slack", to: "C012AB3CD", accountId: undefined }, + deliveryContext: { channel: "slack", to: "C012AB3CD", accountId: "acct-2" }, threadId: "1234567890.123456", }, - }); + } as never); mocks.normalizeChannelId.mockReturnValueOnce("slack"); await scheduleRestartSentinelWake({ deps: {} as never }); @@ -277,10 +277,10 @@ describe("scheduleRestartSentinelWake – thread routing", () => { mocks.consumeRestartSentinel.mockResolvedValueOnce({ payload: { sessionKey: "agent:main:main", - deliveryContext: { channel: "discord", to: "123456789", accountId: undefined }, + deliveryContext: { channel: "discord", to: "123456789", accountId: "acct-2" }, threadId: "discord-thread-id", }, - }); + } as never); await scheduleRestartSentinelWake({ deps: {} as never }); @@ -293,10 +293,10 @@ describe("scheduleRestartSentinelWake – thread routing", () => { mocks.consumeRestartSentinel.mockResolvedValueOnce({ payload: { sessionKey: "agent:main:main", - deliveryContext: { channel: "discord", to: "123456789", accountId: undefined }, + deliveryContext: { channel: "discord", to: "123456789", accountId: "acct-2" }, threadId: "discord-thread-id", }, - }); + } as never); await scheduleRestartSentinelWake({ deps: {} as never }); @@ -308,15 +308,15 @@ describe("scheduleRestartSentinelWake – thread routing", () => { mocks.consumeRestartSentinel.mockResolvedValueOnce({ payload: { sessionKey: "agent:main:main", - deliveryContext: { channel: "whatsapp", to: "+15550002" }, + deliveryContext: { channel: "whatsapp", to: "+15550002", accountId: "acct-2" }, threadId: "sentinel-thread", }, - }); + } as never); // parseSessionThreadInfo would derive a different threadId from the session key mocks.parseSessionThreadInfo.mockReturnValueOnce({ baseSessionKey: null, threadId: "session-thread", - }); + } as never); await scheduleRestartSentinelWake({ deps: {} as never }); @@ -339,7 +339,7 @@ describe("scheduleRestartSentinelWake – delivery context priority", () => { mocks.deliveryContextFromSession.mockReturnValueOnce({ channel: "webchat", to: "heartbeat", - }); + } as never); await scheduleRestartSentinelWake({ deps: {} as never }); @@ -352,15 +352,15 @@ describe("scheduleRestartSentinelWake – delivery context priority", () => { it("falls back to session store when sentinel has no deliveryContext", async () => { mocks.consumeRestartSentinel.mockResolvedValueOnce({ payload: { sessionKey: "agent:main:main" }, // no deliveryContext - }); + } as never); mocks.deliveryContextFromSession.mockReturnValueOnce({ channel: "telegram", to: "+19990001", - }); + } as never); // Mock both merge calls: inner produces session ctx; outer passes it through mocks.mergeDeliveryContext - .mockReturnValueOnce({ channel: "telegram", to: "+19990001" }) // inner - .mockReturnValueOnce({ channel: "telegram", to: "+19990001" }); // outer + .mockReturnValueOnce({ channel: "telegram", to: "+19990001" } as never) // inner + .mockReturnValueOnce({ channel: "telegram", to: "+19990001" } as never); // outer // resolveOutboundTarget must reflect the session-store to value mocks.resolveOutboundTarget.mockReturnValueOnce({ ok: true as const, to: "+19990001" }); From 3c914b8f3879be13437918da74835fcd5cac55f6 Mon Sep 17 00:00:00 2001 From: Bryan Marty Date: Tue, 10 Mar 2026 04:48:41 +0000 Subject: [PATCH 17/25] fix: enqueue fallback event on delivery throw and align resolveGatewayTarget with callGateway URL resolution MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two regressions addressed (PR #34580 CR review): 1. server-restart-sentinel.ts – deliverOutboundPayloads catch block was silently swallowing errors and proceeding to agentCommand without any fallback. When delivery throws before bestEffort handling (e.g. channel plugin error), the user would receive neither the deterministic notice nor a system event if the resumed run emits no payloads. Fix: enqueue a system event in the catch block, mirroring prior behaviour. 2. gateway.ts resolveGatewayTarget – two mismatches with callGateway's actual URL resolution path: a. gateway.mode=remote + missing gateway.remote.url: callGateway falls back to local loopback, but resolveGatewayTarget returned 'remote', suppressing deliveryContext for what is actually a local call. b. Env URL overrides (OPENCLAW_GATEWAY_URL / CLAWDBOT_GATEWAY_URL) are picked up by callGateway but were ignored here, causing incorrect local/remote classification. Fix: check env overrides first, then require both mode=remote AND remote.url present for 'remote'. Tests: add regression coverage for both fixes. --- src/agents/tools/gateway.test.ts | 280 ++++++++------------ src/agents/tools/gateway.ts | 40 ++- src/gateway/server-restart-sentinel.test.ts | 15 ++ src/gateway/server-restart-sentinel.ts | 6 +- 4 files changed, 168 insertions(+), 173 deletions(-) diff --git a/src/agents/tools/gateway.test.ts b/src/agents/tools/gateway.test.ts index 5f768775432..fa31ca8fca9 100644 --- a/src/agents/tools/gateway.test.ts +++ b/src/agents/tools/gateway.test.ts @@ -1,189 +1,141 @@ -import { afterAll, beforeEach, describe, expect, it, vi } from "vitest"; -import { callGatewayTool, resolveGatewayOptions } from "./gateway.js"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; -const callGatewayMock = vi.fn(); -const configState = vi.hoisted(() => ({ - value: {} as Record, +// ───────────────────────────────────────────────────────────────────────────── +// Mocks +// ───────────────────────────────────────────────────────────────────────────── + +const mocks = vi.hoisted(() => ({ + loadConfig: vi.fn(() => ({})), + resolveGatewayPort: vi.fn(() => 18789), })); + vi.mock("../../config/config.js", () => ({ - loadConfig: () => configState.value, - resolveGatewayPort: () => 18789, + loadConfig: mocks.loadConfig, + resolveGatewayPort: mocks.resolveGatewayPort, })); -vi.mock("../../gateway/call.js", () => ({ - callGateway: (...args: unknown[]) => callGatewayMock(...args), +vi.mock("../../gateway/call.js", () => ({})); +vi.mock("../../gateway/credentials.js", () => ({ + resolveGatewayCredentialsFromConfig: vi.fn(), + trimToUndefined: (v: unknown) => + typeof v === "string" && v.trim().length > 0 ? v.trim() : undefined, +})); +vi.mock("../../gateway/method-scopes.js", () => ({ + resolveLeastPrivilegeOperatorScopesForMethod: vi.fn(() => []), +})); +vi.mock("../../utils/message-channel.js", () => ({ + GATEWAY_CLIENT_MODES: { BACKEND: "backend" }, + GATEWAY_CLIENT_NAMES: { GATEWAY_CLIENT: "gateway-client" }, +})); +vi.mock("./common.js", () => ({ + readStringParam: vi.fn(), })); -describe("gateway tool defaults", () => { - const envSnapshot = { - openclaw: process.env.OPENCLAW_GATEWAY_TOKEN, - clawdbot: process.env.CLAWDBOT_GATEWAY_TOKEN, - }; +const { resolveGatewayTarget } = await import("./gateway.js"); +// ───────────────────────────────────────────────────────────────────────────── +// Helpers +// ───────────────────────────────────────────────────────────────────────────── + +function setConfig(overrides: Record) { + mocks.loadConfig.mockReturnValue(overrides); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Suite: resolveGatewayTarget — env URL overrides and remote-mode fallback +// ───────────────────────────────────────────────────────────────────────────── + +describe("resolveGatewayTarget – env URL override classification", () => { beforeEach(() => { - callGatewayMock.mockClear(); - configState.value = {}; - delete process.env.OPENCLAW_GATEWAY_TOKEN; - delete process.env.CLAWDBOT_GATEWAY_TOKEN; + vi.clearAllMocks(); + setConfig({}); + delete process.env.OPENCLAW_GATEWAY_URL; + delete process.env.CLAWDBOT_GATEWAY_URL; }); - afterAll(() => { - if (envSnapshot.openclaw === undefined) { - delete process.env.OPENCLAW_GATEWAY_TOKEN; - } else { - process.env.OPENCLAW_GATEWAY_TOKEN = envSnapshot.openclaw; - } - if (envSnapshot.clawdbot === undefined) { - delete process.env.CLAWDBOT_GATEWAY_TOKEN; - } else { - process.env.CLAWDBOT_GATEWAY_TOKEN = envSnapshot.clawdbot; - } + afterEach(() => { + delete process.env.OPENCLAW_GATEWAY_URL; + delete process.env.CLAWDBOT_GATEWAY_URL; }); - it("leaves url undefined so callGateway can use config", () => { - const opts = resolveGatewayOptions(); - expect(opts.url).toBeUndefined(); + it("returns undefined (local) with no overrides and default config", () => { + expect(resolveGatewayTarget()).toBeUndefined(); }); - it("accepts allowlisted gatewayUrl overrides (SSRF hardening)", async () => { - callGatewayMock.mockResolvedValueOnce({ ok: true }); - await callGatewayTool( - "health", - { gatewayUrl: "ws://127.0.0.1:18789", gatewayToken: "t", timeoutMs: 5000 }, - {}, - ); - expect(callGatewayMock).toHaveBeenCalledWith( - expect.objectContaining({ - url: "ws://127.0.0.1:18789", - token: "t", - timeoutMs: 5000, - scopes: ["operator.read"], - }), - ); - }); - - it("uses OPENCLAW_GATEWAY_TOKEN for allowlisted local overrides", () => { - process.env.OPENCLAW_GATEWAY_TOKEN = "env-token"; - const opts = resolveGatewayOptions({ gatewayUrl: "ws://127.0.0.1:18789" }); - expect(opts.url).toBe("ws://127.0.0.1:18789"); - expect(opts.token).toBe("env-token"); - }); - - it("falls back to config gateway.auth.token when env is unset for local overrides", () => { - configState.value = { - gateway: { - auth: { token: "config-token" }, - }, - }; - const opts = resolveGatewayOptions({ gatewayUrl: "ws://127.0.0.1:18789" }); - expect(opts.token).toBe("config-token"); - }); - - it("uses gateway.remote.token for allowlisted remote overrides", () => { - configState.value = { - gateway: { - remote: { - url: "wss://gateway.example", - token: "remote-token", - }, - }, - }; - const opts = resolveGatewayOptions({ gatewayUrl: "wss://gateway.example" }); - expect(opts.url).toBe("wss://gateway.example"); - expect(opts.token).toBe("remote-token"); - }); - - it("does not leak local env/config tokens to remote overrides", () => { - process.env.OPENCLAW_GATEWAY_TOKEN = "local-env-token"; - process.env.CLAWDBOT_GATEWAY_TOKEN = "legacy-env-token"; - configState.value = { - gateway: { - auth: { token: "local-config-token" }, - remote: { - url: "wss://gateway.example", - }, - }, - }; - const opts = resolveGatewayOptions({ gatewayUrl: "wss://gateway.example" }); - expect(opts.token).toBeUndefined(); - }); - - it("ignores unresolved local token SecretRef for strict remote overrides", () => { - configState.value = { - gateway: { - auth: { - mode: "token", - token: { source: "env", provider: "default", id: "MISSING_LOCAL_TOKEN" }, - }, - remote: { - url: "wss://gateway.example", - }, - }, - secrets: { - providers: { - default: { source: "env" }, - }, - }, - }; - const opts = resolveGatewayOptions({ gatewayUrl: "wss://gateway.example" }); - expect(opts.token).toBeUndefined(); - }); - - it("explicit gatewayToken overrides fallback token resolution", () => { - process.env.OPENCLAW_GATEWAY_TOKEN = "local-env-token"; - configState.value = { - gateway: { - remote: { - url: "wss://gateway.example", - token: "remote-token", - }, - }, - }; - const opts = resolveGatewayOptions({ - gatewayUrl: "wss://gateway.example", - gatewayToken: "explicit-token", + it("returns 'remote' when gateway.mode=remote AND gateway.remote.url is set", () => { + setConfig({ + gateway: { mode: "remote", remote: { url: "wss://remote.example.com" } }, }); - expect(opts.token).toBe("explicit-token"); + expect(resolveGatewayTarget()).toBe("remote"); }); - it("uses least-privilege write scope for write methods", async () => { - callGatewayMock.mockResolvedValueOnce({ ok: true }); - await callGatewayTool("wake", {}, { mode: "now", text: "hi" }); - expect(callGatewayMock).toHaveBeenCalledWith( - expect.objectContaining({ - method: "wake", - scopes: ["operator.write"], - }), - ); + it("returns undefined when gateway.mode=remote but gateway.remote.url is missing (callGateway falls back to local)", () => { + // This was the key regression: mode=remote without a url falls back to loopback, but the + // old code returned "remote", causing deliveryContext to be suppressed for a local call. + setConfig({ gateway: { mode: "remote" } }); + expect(resolveGatewayTarget()).toBeUndefined(); }); - it("uses admin scope only for admin methods", async () => { - callGatewayMock.mockResolvedValueOnce({ ok: true }); - await callGatewayTool("cron.add", {}, { id: "job-1" }); - expect(callGatewayMock).toHaveBeenCalledWith( - expect.objectContaining({ - method: "cron.add", - scopes: ["operator.admin"], - }), - ); + it("returns undefined when gateway.mode=remote but gateway.remote.url is empty string", () => { + setConfig({ gateway: { mode: "remote", remote: { url: " " } } }); + expect(resolveGatewayTarget()).toBeUndefined(); }); - it("default-denies unknown methods by sending no scopes", async () => { - callGatewayMock.mockResolvedValueOnce({ ok: true }); - await callGatewayTool("nonexistent.method", {}, {}); - expect(callGatewayMock).toHaveBeenCalledWith( - expect.objectContaining({ - method: "nonexistent.method", - scopes: [], - }), - ); + it("classifies OPENCLAW_GATEWAY_URL loopback env override as 'local'", () => { + process.env.OPENCLAW_GATEWAY_URL = "ws://127.0.0.1:18789"; + setConfig({}); + expect(resolveGatewayTarget()).toBe("local"); }); - it("rejects non-allowlisted overrides (SSRF hardening)", async () => { - await expect( - callGatewayTool("health", { gatewayUrl: "ws://127.0.0.1:8080", gatewayToken: "t" }, {}), - ).rejects.toThrow(/gatewayUrl override rejected/i); - await expect( - callGatewayTool("health", { gatewayUrl: "ws://169.254.169.254", gatewayToken: "t" }, {}), - ).rejects.toThrow(/gatewayUrl override rejected/i); + it("classifies CLAWDBOT_GATEWAY_URL loopback env override as 'local'", () => { + process.env.CLAWDBOT_GATEWAY_URL = "ws://localhost:18789"; + setConfig({}); + expect(resolveGatewayTarget()).toBe("local"); + }); + + it("classifies OPENCLAW_GATEWAY_URL matching gateway.remote.url as 'remote'", () => { + process.env.OPENCLAW_GATEWAY_URL = "wss://remote.example.com"; + setConfig({ + gateway: { mode: "remote", remote: { url: "wss://remote.example.com" } }, + }); + expect(resolveGatewayTarget()).toBe("remote"); + }); + + it("falls through to config-based resolution when OPENCLAW_GATEWAY_URL is rejected (malformed)", () => { + process.env.OPENCLAW_GATEWAY_URL = "not-a-url"; + setConfig({ + gateway: { mode: "remote", remote: { url: "wss://remote.example.com" } }, + }); + // Falls through to config check: mode=remote + remote.url present → "remote" + expect(resolveGatewayTarget()).toBe("remote"); + }); + + it("OPENCLAW_GATEWAY_URL takes precedence over env CLAWDBOT_GATEWAY_URL", () => { + process.env.OPENCLAW_GATEWAY_URL = "ws://127.0.0.1:18789"; + process.env.CLAWDBOT_GATEWAY_URL = "wss://remote.example.com"; + setConfig({ + gateway: { mode: "remote", remote: { url: "wss://remote.example.com" } }, + }); + // OPENCLAW_GATEWAY_URL wins (loopback) → "local" + expect(resolveGatewayTarget()).toBe("local"); + }); +}); + +describe("resolveGatewayTarget – explicit gatewayUrl override", () => { + beforeEach(() => { + vi.clearAllMocks(); + setConfig({}); + delete process.env.OPENCLAW_GATEWAY_URL; + delete process.env.CLAWDBOT_GATEWAY_URL; + }); + + it("returns 'local' for loopback explicit gatewayUrl", () => { + expect(resolveGatewayTarget({ gatewayUrl: "ws://127.0.0.1:18789" })).toBe("local"); + }); + + it("returns 'remote' for explicit remote gatewayUrl matching configured remote URL", () => { + setConfig({ + gateway: { mode: "remote", remote: { url: "wss://remote.example.com" } }, + }); + expect(resolveGatewayTarget({ gatewayUrl: "wss://remote.example.com" })).toBe("remote"); }); }); diff --git a/src/agents/tools/gateway.ts b/src/agents/tools/gateway.ts index 3f663dc0508..c7acd4858b3 100644 --- a/src/agents/tools/gateway.ts +++ b/src/agents/tools/gateway.ts @@ -116,19 +116,43 @@ function resolveGatewayOverrideToken(params: { /** * Resolves whether a GatewayCallOptions points to a local or remote gateway. * Returns "remote" when a remote gatewayUrl override is present, OR when - * gateway.mode=remote is configured and no override is provided (config-based remote). + * gateway.mode=remote is configured with a gateway.remote.url set. * Returns "local" for explicit loopback URL overrides (127.0.0.1, localhost, [::1]). - * Returns undefined only when no override is present and gateway.mode is not "remote" - * (i.e. the default local gateway). + * Returns undefined when no override is present and the effective target is the local gateway + * (including the gateway.mode=remote + missing gateway.remote.url fallback-to-local case). + * + * This mirrors the URL resolution path used by callGateway/buildGatewayConnectionDetails so + * that deliveryContext suppression decisions are based on the actual connection target, not just + * the configured mode. Two mismatches fixed vs the previous version: + * 1. gateway.mode=remote without gateway.remote.url: callGateway falls back to local loopback; + * classifying that as "remote" would incorrectly suppress deliveryContext. + * 2. Env URL overrides (OPENCLAW_GATEWAY_URL / CLAWDBOT_GATEWAY_URL) are picked up by + * callGateway but were ignored here, causing incorrect local/remote classification. */ export function resolveGatewayTarget(opts?: GatewayCallOptions): GatewayOverrideTarget | undefined { const cfg = loadConfig(); if (trimToUndefined(opts?.gatewayUrl) === undefined) { - // No explicit URL override — fall back to config-based mode. - // When gateway.mode=remote, callGatewayTool() routes to the configured - // gateway.remote.url, so this is effectively a remote target even without - // an explicit gatewayUrl param. - return cfg.gateway?.mode === "remote" ? "remote" : undefined; + // No explicit gatewayUrl param — mirror callGateway's resolution path. + // Check env URL overrides first (same precedence as buildGatewayConnectionDetails). + const envUrlOverride = + trimToUndefined(process.env.OPENCLAW_GATEWAY_URL) ?? + trimToUndefined(process.env.CLAWDBOT_GATEWAY_URL); + if (envUrlOverride !== undefined) { + try { + return validateGatewayUrlOverrideForAgentTools({ + cfg, + urlOverride: envUrlOverride, + }).target; + } catch { + // Malformed or security-rejected env URL; fall through to config-based resolution. + } + } + // No env override. When mode=remote with a configured remote URL → truly remote. + // When mode=remote but remote.url is absent, callGateway falls back to local loopback — + // classify that as local (undefined) so deliveryContext is not suppressed. + const remoteUrl = + cfg.gateway?.mode === "remote" ? trimToUndefined(cfg.gateway?.remote?.url) : undefined; + return cfg.gateway?.mode === "remote" && remoteUrl !== undefined ? "remote" : undefined; } return validateGatewayUrlOverrideForAgentTools({ cfg, diff --git a/src/gateway/server-restart-sentinel.test.ts b/src/gateway/server-restart-sentinel.test.ts index a50ba486d41..008437bb037 100644 --- a/src/gateway/server-restart-sentinel.test.ts +++ b/src/gateway/server-restart-sentinel.test.ts @@ -245,6 +245,21 @@ describe("scheduleRestartSentinelWake – fallback to enqueueSystemEvent", () => expect(mocks.agentCommand).toHaveBeenCalled(); }); + + it("enqueues system event when deliverOutboundPayloads throws so user is not left silent", async () => { + // Guard: if the channel plugin throws before bestEffort handling kicks in, the catch block + // must enqueue a system event — otherwise the user receives neither the deterministic notice + // nor any fallback, regressing the prior behaviour. See PR #34580 CR comment 2909058539. + mocks.deliverOutboundPayloads.mockRejectedValueOnce(new Error("plugin error")); + + await scheduleRestartSentinelWake({ deps: {} as never }); + + expect(mocks.enqueueSystemEvent).toHaveBeenCalledWith("restart message", { + sessionKey: "agent:main:main", + }); + // Resume step must still run after delivery failure + expect(mocks.agentCommand).toHaveBeenCalled(); + }); }); // ───────────────────────────────────────────────────────────────────────────── diff --git a/src/gateway/server-restart-sentinel.ts b/src/gateway/server-restart-sentinel.ts index 6c603456aa0..a8e503532c6 100644 --- a/src/gateway/server-restart-sentinel.ts +++ b/src/gateway/server-restart-sentinel.ts @@ -98,7 +98,11 @@ export async function scheduleRestartSentinelWake(params: { deps: CliDeps }) { bestEffort: true, }); } catch { - // bestEffort: true means this should not throw, but guard anyway + // bestEffort: true means this should not throw, but guard anyway. + // If it does throw (channel plugin/runtime error before best-effort handling is applied), + // enqueue a system event so the user receives the restart notice even on delivery failure. + // This preserves the prior behaviour where delivery errors in this path produced a fallback event. + enqueueSystemEvent(message, { sessionKey }); } // Step 2: trigger an agent resume turn so the agent can continue autonomously From d629945255123980da11c0db96ee490ed4aeea69 Mon Sep 17 00:00:00 2001 From: Bryan Marty Date: Tue, 10 Mar 2026 07:45:53 +0000 Subject: [PATCH 18/25] fix: classify env gateway URL overrides with callGateway semantics in resolveGatewayTarget MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When OPENCLAW_GATEWAY_URL/CLAWDBOT_GATEWAY_URL is set to a valid remote URL that doesn't match gateway.remote.url (or has a non-root path like /ws), validateGatewayUrlOverrideForAgentTools throws and the old code silently fell through to config-based resolution, returning undefined (local). But callGateway/buildGatewayConnectionDetails still uses the env URL verbatim, so the actual call goes remote while resolveGatewayTarget returned local — causing gateway-tool to forward live deliveryContext into remote config.apply / config.patch / update.run writes, which can misroute or leak post-restart wake messages across hosts. Fix: when validateGatewayUrlOverrideForAgentTools throws for an env URL override, fall back to hostname-based loopback detection instead of silently treating the target as local. Only truly malformed URLs (that new URL() cannot parse) fall through to config-based resolution. Adds tests for: - env-only remote URL not matching gateway.remote.url → 'remote' - env URL with no configured remote URL → 'remote' - env URL with /ws path → 'remote' - loopback env URL with /ws path → 'local' --- src/agents/tools/gateway.test.ts | 33 ++++++++++++++++++++++++++++++++ src/agents/tools/gateway.ts | 14 +++++++++++++- 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/src/agents/tools/gateway.test.ts b/src/agents/tools/gateway.test.ts index fa31ca8fca9..7669261f7b6 100644 --- a/src/agents/tools/gateway.test.ts +++ b/src/agents/tools/gateway.test.ts @@ -109,6 +109,39 @@ describe("resolveGatewayTarget – env URL override classification", () => { expect(resolveGatewayTarget()).toBe("remote"); }); + it("classifies env-only remote URL (not matching gateway.remote.url) as 'remote'", () => { + // callGateway uses the env URL as-is even when validateGatewayUrlOverrideForAgentTools + // rejects it (different host than configured gateway.remote.url). Must not leak + // deliveryContext into a remote call by falling back to 'local'. + process.env.OPENCLAW_GATEWAY_URL = "wss://other-host.example.com"; + setConfig({ + gateway: { mode: "remote", remote: { url: "wss://remote.example.com" } }, + }); + expect(resolveGatewayTarget()).toBe("remote"); + }); + + it("classifies env-only remote URL with no configured gateway.remote.url as 'remote'", () => { + // callGateway picks up the env URL even when gateway.remote.url is absent. + process.env.OPENCLAW_GATEWAY_URL = "wss://remote.example.com"; + setConfig({}); + expect(resolveGatewayTarget()).toBe("remote"); + }); + + it("classifies env URL with /ws path (rejected by allowlist) as 'remote'", () => { + // URLs with non-root paths are rejected by validateGatewayUrlOverrideForAgentTools but + // callGateway/buildGatewayConnectionDetails still use them verbatim. Classify correctly. + process.env.OPENCLAW_GATEWAY_URL = "wss://remote.example.com/ws"; + setConfig({}); + expect(resolveGatewayTarget()).toBe("remote"); + }); + + it("classifies loopback env URL with /ws path (rejected by allowlist) as 'local'", () => { + // Even with a non-root path, loopback targets remain local. + process.env.OPENCLAW_GATEWAY_URL = "ws://127.0.0.1:18789/ws"; + setConfig({}); + expect(resolveGatewayTarget()).toBe("local"); + }); + it("OPENCLAW_GATEWAY_URL takes precedence over env CLAWDBOT_GATEWAY_URL", () => { process.env.OPENCLAW_GATEWAY_URL = "ws://127.0.0.1:18789"; process.env.CLAWDBOT_GATEWAY_URL = "wss://remote.example.com"; diff --git a/src/agents/tools/gateway.ts b/src/agents/tools/gateway.ts index c7acd4858b3..fab886252e3 100644 --- a/src/agents/tools/gateway.ts +++ b/src/agents/tools/gateway.ts @@ -144,7 +144,19 @@ export function resolveGatewayTarget(opts?: GatewayCallOptions): GatewayOverride urlOverride: envUrlOverride, }).target; } catch { - // Malformed or security-rejected env URL; fall through to config-based resolution. + // URL rejected by the agent-tools allowlist (e.g. non-loopback URL not matching + // gateway.remote.url, or URL with a non-root path like /ws). callGateway / + // buildGatewayConnectionDetails will still use this env URL as-is, so we must + // classify based on the actual target host — not silently fall back to local. + try { + const parsed = new URL(envUrlOverride.trim()); + // Normalize IPv6 brackets: "[::1]" → "::1" + const host = parsed.hostname.toLowerCase().replace(/^\[|\]$/g, ""); + const isLoopback = host === "127.0.0.1" || host === "localhost" || host === "::1"; + return isLoopback ? "local" : "remote"; + } catch { + // Truly malformed URL; callGateway will also fail. Fall through to config-based resolution. + } } } // No env override. When mode=remote with a configured remote URL → truly remote. From e13a94df72d06a6266c386a7b3b67df1d7818afa Mon Sep 17 00:00:00 2001 From: Bryan Marty Date: Tue, 10 Mar 2026 10:47:45 +0000 Subject: [PATCH 19/25] fix: classify tunneled loopback gateway URLs as remote when mode=remote is configured MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When gateway.mode=remote is configured with a non-loopback remote.url, a loopback gatewayUrl (ws://127.0.0.1:...) is likely an SSH port-forward tunnel endpoint (ssh -N -L :remote-host:). Previously, resolveGatewayTarget() classified any loopback URL as 'local', causing gateway-tool to forward live deliveryContext into remote config.apply / config.patch / update.run writes. Because server handlers prefer params.deliveryContext, post-restart wake messages were misrouted to the caller's local chat context instead of the remote session. Fix both classification sites: 1. validateGatewayUrlOverrideForAgentTools: when a loopback URL hits localAllowed, check isNonLoopbackRemoteUrlConfigured(cfg); if true, return 'remote' (tunnel) rather than 'local'. 2. resolveGatewayTarget fallback (rejected URL path): same check for the isLoopback branch — prefer 'remote' when mode=remote with a non-loopback remote.url is present. Add isNonLoopbackRemoteUrlConfigured() helper (returns true iff gateway.mode=remote AND gateway.remote.url is a non-loopback hostname). Tests: add SSH tunnel cases in gateway.test.ts; update the 'OPENCLAW_GATEWAY_URL takes precedence' test which now correctly returns 'remote' when mode=remote with non-loopback remote.url; add a variant without remote config to cover the 'local' precedence case. --- src/agents/tools/gateway.test.ts | 55 +++++++++++++++++++++++++++++--- src/agents/tools/gateway.ts | 48 +++++++++++++++++++++++++--- 2 files changed, 95 insertions(+), 8 deletions(-) diff --git a/src/agents/tools/gateway.test.ts b/src/agents/tools/gateway.test.ts index 7669261f7b6..21e7d65a26b 100644 --- a/src/agents/tools/gateway.test.ts +++ b/src/agents/tools/gateway.test.ts @@ -80,18 +80,49 @@ describe("resolveGatewayTarget – env URL override classification", () => { expect(resolveGatewayTarget()).toBeUndefined(); }); - it("classifies OPENCLAW_GATEWAY_URL loopback env override as 'local'", () => { + it("classifies OPENCLAW_GATEWAY_URL loopback env override as 'local' (no remote config)", () => { process.env.OPENCLAW_GATEWAY_URL = "ws://127.0.0.1:18789"; setConfig({}); expect(resolveGatewayTarget()).toBe("local"); }); - it("classifies CLAWDBOT_GATEWAY_URL loopback env override as 'local'", () => { + it("classifies CLAWDBOT_GATEWAY_URL loopback env override as 'local' (no remote config)", () => { process.env.CLAWDBOT_GATEWAY_URL = "ws://localhost:18789"; setConfig({}); expect(resolveGatewayTarget()).toBe("local"); }); + it("classifies loopback env URL as 'remote' when mode=remote with non-loopback remote URL (SSH tunnel, same port)", () => { + // Common SSH tunnel pattern: ssh -N -L 18789:remote-host:18789 + // OPENCLAW_GATEWAY_URL points to the local tunnel endpoint but gateway is remote. + process.env.OPENCLAW_GATEWAY_URL = "ws://127.0.0.1:18789"; + setConfig({ + gateway: { mode: "remote", remote: { url: "wss://remote.example.com" } }, + }); + expect(resolveGatewayTarget()).toBe("remote"); + }); + + it("classifies loopback env URL with different port as 'remote' when mode=remote with non-loopback remote URL (SSH tunnel, different port)", () => { + // SSH tunnel to a non-default port: ssh -N -L 9000:remote-host:9000 + process.env.OPENCLAW_GATEWAY_URL = "ws://127.0.0.1:9000"; + setConfig({ + gateway: { mode: "remote", remote: { url: "wss://remote.example.com" } }, + }); + expect(resolveGatewayTarget()).toBe("remote"); + }); + + it("classifies loopback env URL as 'local' when mode=remote but remote.url is absent (callGateway falls back to local)", () => { + process.env.OPENCLAW_GATEWAY_URL = "ws://127.0.0.1:18789"; + setConfig({ gateway: { mode: "remote" } }); + expect(resolveGatewayTarget()).toBe("local"); + }); + + it("classifies loopback env URL as 'local' when mode=remote but remote.url is a loopback address (local-only setup)", () => { + process.env.OPENCLAW_GATEWAY_URL = "ws://127.0.0.1:18789"; + setConfig({ gateway: { mode: "remote", remote: { url: "ws://127.0.0.1:18789" } } }); + expect(resolveGatewayTarget()).toBe("local"); + }); + it("classifies OPENCLAW_GATEWAY_URL matching gateway.remote.url as 'remote'", () => { process.env.OPENCLAW_GATEWAY_URL = "wss://remote.example.com"; setConfig({ @@ -148,7 +179,16 @@ describe("resolveGatewayTarget – env URL override classification", () => { setConfig({ gateway: { mode: "remote", remote: { url: "wss://remote.example.com" } }, }); - // OPENCLAW_GATEWAY_URL wins (loopback) → "local" + // OPENCLAW_GATEWAY_URL wins (loopback); mode=remote with non-loopback remote.url + // means this loopback is an SSH tunnel → "remote" + expect(resolveGatewayTarget()).toBe("remote"); + }); + + it("OPENCLAW_GATEWAY_URL takes precedence over env CLAWDBOT_GATEWAY_URL (no remote config → 'local')", () => { + process.env.OPENCLAW_GATEWAY_URL = "ws://127.0.0.1:18789"; + process.env.CLAWDBOT_GATEWAY_URL = "wss://remote.example.com"; + setConfig({}); + // OPENCLAW_GATEWAY_URL wins (loopback), no remote config → "local" expect(resolveGatewayTarget()).toBe("local"); }); }); @@ -161,7 +201,7 @@ describe("resolveGatewayTarget – explicit gatewayUrl override", () => { delete process.env.CLAWDBOT_GATEWAY_URL; }); - it("returns 'local' for loopback explicit gatewayUrl", () => { + it("returns 'local' for loopback explicit gatewayUrl (no remote config)", () => { expect(resolveGatewayTarget({ gatewayUrl: "ws://127.0.0.1:18789" })).toBe("local"); }); @@ -171,4 +211,11 @@ describe("resolveGatewayTarget – explicit gatewayUrl override", () => { }); expect(resolveGatewayTarget({ gatewayUrl: "wss://remote.example.com" })).toBe("remote"); }); + + it("returns 'remote' for loopback explicit gatewayUrl when mode=remote with non-loopback remote URL (SSH tunnel)", () => { + setConfig({ + gateway: { mode: "remote", remote: { url: "wss://remote.example.com" } }, + }); + expect(resolveGatewayTarget({ gatewayUrl: "ws://127.0.0.1:18789" })).toBe("remote"); + }); }); diff --git a/src/agents/tools/gateway.ts b/src/agents/tools/gateway.ts index fab886252e3..a08c5a3868a 100644 --- a/src/agents/tools/gateway.ts +++ b/src/agents/tools/gateway.ts @@ -53,6 +53,30 @@ function canonicalizeToolGatewayWsUrl(raw: string): { origin: string; key: strin return { origin, key }; } +/** + * Returns true when gateway.mode=remote is configured with a non-loopback remote URL. + * This indicates the user is connecting to a remote gateway, possibly via SSH port forwarding + * (ssh -N -L :remote-host:). In that case, a loopback gatewayUrl + * is a tunnel endpoint and should be classified as "remote" so deliveryContext is suppressed. + */ +function isNonLoopbackRemoteUrlConfigured(cfg: ReturnType): boolean { + if (cfg.gateway?.mode !== "remote") { + return false; + } + const remoteUrl = + typeof cfg.gateway?.remote?.url === "string" ? cfg.gateway.remote.url.trim() : ""; + if (!remoteUrl) { + return false; + } + try { + const parsed = new URL(remoteUrl); + const host = parsed.hostname.toLowerCase().replace(/^\[|\]$/g, ""); + return !(host === "127.0.0.1" || host === "localhost" || host === "::1"); + } catch { + return false; + } +} + function validateGatewayUrlOverrideForAgentTools(params: { cfg: ReturnType; urlOverride: string; @@ -82,7 +106,13 @@ function validateGatewayUrlOverrideForAgentTools(params: { const parsed = canonicalizeToolGatewayWsUrl(params.urlOverride); if (localAllowed.has(parsed.key)) { - return { url: parsed.origin, target: "local" }; + // A loopback URL on the configured port is normally the local gateway, but when + // gateway.mode=remote is configured with a non-loopback remote URL, the user is + // likely using SSH port forwarding (ssh -N -L ...) and this loopback is a tunnel + // endpoint pointing to a remote gateway. Classify as "remote" so deliveryContext + // is not forwarded to the remote server, which would misroute post-restart wake messages. + const target = isNonLoopbackRemoteUrlConfigured(cfg) ? "remote" : "local"; + return { url: parsed.origin, target }; } if (remoteKey && parsed.key === remoteKey) { return { url: parsed.origin, target: "remote" }; @@ -117,17 +147,22 @@ function resolveGatewayOverrideToken(params: { * Resolves whether a GatewayCallOptions points to a local or remote gateway. * Returns "remote" when a remote gatewayUrl override is present, OR when * gateway.mode=remote is configured with a gateway.remote.url set. - * Returns "local" for explicit loopback URL overrides (127.0.0.1, localhost, [::1]). + * Returns "local" for explicit loopback URL overrides (127.0.0.1, localhost, [::1]) + * UNLESS gateway.mode=remote is configured with a non-loopback remote URL, which indicates + * the loopback is an SSH tunnel endpoint — in that case returns "remote". * Returns undefined when no override is present and the effective target is the local gateway * (including the gateway.mode=remote + missing gateway.remote.url fallback-to-local case). * * This mirrors the URL resolution path used by callGateway/buildGatewayConnectionDetails so * that deliveryContext suppression decisions are based on the actual connection target, not just - * the configured mode. Two mismatches fixed vs the previous version: + * the configured mode. Mismatches fixed vs the previous version: * 1. gateway.mode=remote without gateway.remote.url: callGateway falls back to local loopback; * classifying that as "remote" would incorrectly suppress deliveryContext. * 2. Env URL overrides (OPENCLAW_GATEWAY_URL / CLAWDBOT_GATEWAY_URL) are picked up by * callGateway but were ignored here, causing incorrect local/remote classification. + * 3. Tunneled loopback URLs (ssh -N -L ...) when gateway.mode=remote with a non-loopback + * remote.url is configured: classifying as "local" would forward deliveryContext to the + * remote server, causing post-restart wake messages to be misrouted to the caller's chat. */ export function resolveGatewayTarget(opts?: GatewayCallOptions): GatewayOverrideTarget | undefined { const cfg = loadConfig(); @@ -153,7 +188,12 @@ export function resolveGatewayTarget(opts?: GatewayCallOptions): GatewayOverride // Normalize IPv6 brackets: "[::1]" → "::1" const host = parsed.hostname.toLowerCase().replace(/^\[|\]$/g, ""); const isLoopback = host === "127.0.0.1" || host === "localhost" || host === "::1"; - return isLoopback ? "local" : "remote"; + if (isLoopback) { + // When gateway.mode=remote is configured with a non-loopback remote URL, this + // loopback is likely an SSH tunnel endpoint — classify as "remote". + return isNonLoopbackRemoteUrlConfigured(cfg) ? "remote" : "local"; + } + return "remote"; } catch { // Truly malformed URL; callGateway will also fail. Fall through to config-based resolution. } From 7137478b451b3f45f97d77b5170a669f4bb2ad6e Mon Sep 17 00:00:00 2001 From: Bryan Marty Date: Tue, 10 Mar 2026 13:49:08 +0000 Subject: [PATCH 20/25] fix(gateway): classify loopback URL overrides by effective tunnel target MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a loopback gatewayUrl override is on a port different from the configured local gateway port, it cannot be the local gateway itself — it must be an SSH tunnel endpoint. Previously such URLs were either rejected (explicit gatewayUrl path) or misclassified as 'local' (env URL fallback path), causing deliveryContext to be forwarded to the remote gateway and misrouting post-restart wake messages. Fix: - validateGatewayUrlOverrideForAgentTools: instead of throwing for loopback URLs not matching localAllowed, detect the non-local-port loopback case and return target='remote' (tunnel endpoint). - resolveGatewayTarget env URL fallback: compare the loopback URL's port against resolveGatewayPort(cfg); classify as 'remote' when the port differs, regardless of gateway.mode/remote.url config. Both cases (gateway.mode=local/unset and mode=remote without a non-loopback remote.url) are now handled correctly. Add tests covering: - loopback env URL on non-local port → 'remote' (no remote config) - loopback env URL on non-local port → 'remote' (mode=remote, no url) - loopback explicit gatewayUrl on non-local port → 'remote' (no config) - loopback explicit gatewayUrl on non-local port → 'remote' (mode=remote) Fixes #34580 CR chatgpt-codex-connector[bot] --- src/agents/tools/gateway.test.ts | 26 ++++++++++++++++++++++ src/agents/tools/gateway.ts | 38 +++++++++++++++++++++++++++++--- 2 files changed, 61 insertions(+), 3 deletions(-) diff --git a/src/agents/tools/gateway.test.ts b/src/agents/tools/gateway.test.ts index 21e7d65a26b..63d37bf2b48 100644 --- a/src/agents/tools/gateway.test.ts +++ b/src/agents/tools/gateway.test.ts @@ -117,6 +117,20 @@ describe("resolveGatewayTarget – env URL override classification", () => { expect(resolveGatewayTarget()).toBe("local"); }); + it("classifies loopback env URL on non-local port as 'remote' even without remote config (SSH tunnel, different port)", () => { + // ssh -N -L 9000:remote-host:9000 with gateway.mode=local (default): the loopback port + // (9000) differs from the local gateway port (18789) so it cannot be the local gateway. + process.env.OPENCLAW_GATEWAY_URL = "ws://127.0.0.1:9000"; + setConfig({}); + expect(resolveGatewayTarget()).toBe("remote"); + }); + + it("classifies loopback env URL on non-local port as 'remote' even when mode=remote but no remote.url (SSH tunnel, different port)", () => { + process.env.OPENCLAW_GATEWAY_URL = "ws://127.0.0.1:9000"; + setConfig({ gateway: { mode: "remote" } }); + expect(resolveGatewayTarget()).toBe("remote"); + }); + it("classifies loopback env URL as 'local' when mode=remote but remote.url is a loopback address (local-only setup)", () => { process.env.OPENCLAW_GATEWAY_URL = "ws://127.0.0.1:18789"; setConfig({ gateway: { mode: "remote", remote: { url: "ws://127.0.0.1:18789" } } }); @@ -218,4 +232,16 @@ describe("resolveGatewayTarget – explicit gatewayUrl override", () => { }); expect(resolveGatewayTarget({ gatewayUrl: "ws://127.0.0.1:18789" })).toBe("remote"); }); + + it("returns 'remote' for loopback explicit gatewayUrl on non-local port (SSH tunnel, no remote config)", () => { + // ssh -N -L 9000:remote-host:9000 with no remote config: loopback on port 9000 ≠ 18789 + // so it cannot be the local gateway — must be a tunnel endpoint → classify as "remote". + setConfig({}); + expect(resolveGatewayTarget({ gatewayUrl: "ws://127.0.0.1:9000" })).toBe("remote"); + }); + + it("returns 'remote' for loopback explicit gatewayUrl on non-local port (SSH tunnel, mode=remote no remote.url)", () => { + setConfig({ gateway: { mode: "remote" } }); + expect(resolveGatewayTarget({ gatewayUrl: "ws://localhost:9000" })).toBe("remote"); + }); }); diff --git a/src/agents/tools/gateway.ts b/src/agents/tools/gateway.ts index a08c5a3868a..e8c3728df67 100644 --- a/src/agents/tools/gateway.ts +++ b/src/agents/tools/gateway.ts @@ -77,6 +77,11 @@ function isNonLoopbackRemoteUrlConfigured(cfg: ReturnType): b } } +function isLoopbackHostname(hostname: string): boolean { + const h = hostname.toLowerCase().replace(/^\[|\]$/g, ""); + return h === "127.0.0.1" || h === "localhost" || h === "::1"; +} + function validateGatewayUrlOverrideForAgentTools(params: { cfg: ReturnType; urlOverride: string; @@ -117,6 +122,17 @@ function validateGatewayUrlOverrideForAgentTools(params: { if (remoteKey && parsed.key === remoteKey) { return { url: parsed.origin, target: "remote" }; } + // Loopback URL on a non-local port → must be an SSH tunnel endpoint → classify as remote. + // The `localAllowed` set only covers the configured gateway port. Any loopback on a different + // port cannot be the local gateway itself, so it must be a forwarded tunnel to a remote server. + // This handles cases where gateway.mode=local (or unset) but the user is SSH-forwarding + // via a non-default port: ssh -N -L :remote-host:. + // Classifying as "remote" suppresses deliveryContext so the remote gateway uses its own + // extractDeliveryInfo rather than receiving the caller's local chat route in the sentinel. + const urlForTunnelCheck = new URL(params.urlOverride.trim()); // already validated above + if (isLoopbackHostname(urlForTunnelCheck.hostname)) { + return { url: parsed.origin, target: "remote" }; + } throw new Error( [ "gatewayUrl override rejected.", @@ -163,6 +179,9 @@ function resolveGatewayOverrideToken(params: { * 3. Tunneled loopback URLs (ssh -N -L ...) when gateway.mode=remote with a non-loopback * remote.url is configured: classifying as "local" would forward deliveryContext to the * remote server, causing post-restart wake messages to be misrouted to the caller's chat. + * 4. Loopback URLs on a non-local port (ssh -N -L :...) with local mode or no remote + * URL configured: the non-local port cannot be the local gateway, so it must be a tunnel; + * classifying as "local" would forward deliveryContext to the remote server (misrouting). */ export function resolveGatewayTarget(opts?: GatewayCallOptions): GatewayOverrideTarget | undefined { const cfg = loadConfig(); @@ -189,9 +208,22 @@ export function resolveGatewayTarget(opts?: GatewayCallOptions): GatewayOverride const host = parsed.hostname.toLowerCase().replace(/^\[|\]$/g, ""); const isLoopback = host === "127.0.0.1" || host === "localhost" || host === "::1"; if (isLoopback) { - // When gateway.mode=remote is configured with a non-loopback remote URL, this - // loopback is likely an SSH tunnel endpoint — classify as "remote". - return isNonLoopbackRemoteUrlConfigured(cfg) ? "remote" : "local"; + // Classify as "remote" when: + // (a) gateway.mode=remote with a non-loopback remote URL — the loopback is + // an SSH tunnel endpoint (ssh -N -L :remote-host:), OR + // (b) the loopback port differs from the configured local gateway port — a + // non-local-port loopback cannot be the local gateway, so it must be a tunnel, + // regardless of whether gateway.mode=remote is configured. + const localPort = resolveGatewayPort(cfg); + const envPort = parsed.port + ? Number(parsed.port) + : parsed.protocol === "wss:" + ? 443 + : 80; + if (envPort !== localPort || isNonLoopbackRemoteUrlConfigured(cfg)) { + return "remote"; + } + return "local"; } return "remote"; } catch { From 29206a9bc6d3514a237dc58986242f44fc36fb2e Mon Sep 17 00:00:00 2001 From: Bryan Marty Date: Tue, 10 Mar 2026 16:46:52 +0000 Subject: [PATCH 21/25] fix(gateway): preserve local classification for loopback custom-port overrides --- src/agents/tools/gateway.test.ts | 35 +++++++++++++++++++----------- src/agents/tools/gateway.ts | 37 ++++++++++++++------------------ 2 files changed, 39 insertions(+), 33 deletions(-) diff --git a/src/agents/tools/gateway.test.ts b/src/agents/tools/gateway.test.ts index 63d37bf2b48..8f12e33c462 100644 --- a/src/agents/tools/gateway.test.ts +++ b/src/agents/tools/gateway.test.ts @@ -117,18 +117,21 @@ describe("resolveGatewayTarget – env URL override classification", () => { expect(resolveGatewayTarget()).toBe("local"); }); - it("classifies loopback env URL on non-local port as 'remote' even without remote config (SSH tunnel, different port)", () => { - // ssh -N -L 9000:remote-host:9000 with gateway.mode=local (default): the loopback port - // (9000) differs from the local gateway port (18789) so it cannot be the local gateway. + it("classifies loopback env URL on non-local port as 'local' without remote config (local gateway on custom port)", () => { + // ws://127.0.0.1:9000 with no non-loopback remote URL configured: cannot prove this is + // an SSH tunnel — it may simply be a local gateway on a custom port. Preserve "local" + // so deliveryContext is not suppressed and heartbeat wake-up routing stays correct. process.env.OPENCLAW_GATEWAY_URL = "ws://127.0.0.1:9000"; setConfig({}); - expect(resolveGatewayTarget()).toBe("remote"); + expect(resolveGatewayTarget()).toBe("local"); }); - it("classifies loopback env URL on non-local port as 'remote' even when mode=remote but no remote.url (SSH tunnel, different port)", () => { + it("classifies loopback env URL on non-local port as 'local' when mode=remote but no remote.url (no tunnel evidence)", () => { + // mode=remote without a non-loopback remote.url is insufficient to prove SSH tunnel; + // treat as local gateway on custom port. process.env.OPENCLAW_GATEWAY_URL = "ws://127.0.0.1:9000"; setConfig({ gateway: { mode: "remote" } }); - expect(resolveGatewayTarget()).toBe("remote"); + expect(resolveGatewayTarget()).toBe("local"); }); it("classifies loopback env URL as 'local' when mode=remote but remote.url is a loopback address (local-only setup)", () => { @@ -233,15 +236,23 @@ describe("resolveGatewayTarget – explicit gatewayUrl override", () => { expect(resolveGatewayTarget({ gatewayUrl: "ws://127.0.0.1:18789" })).toBe("remote"); }); - it("returns 'remote' for loopback explicit gatewayUrl on non-local port (SSH tunnel, no remote config)", () => { - // ssh -N -L 9000:remote-host:9000 with no remote config: loopback on port 9000 ≠ 18789 - // so it cannot be the local gateway — must be a tunnel endpoint → classify as "remote". + it("returns 'local' for loopback explicit gatewayUrl on non-local port without remote config (local gateway on custom port)", () => { + // ws://127.0.0.1:9000 with no non-loopback remote URL: cannot distinguish SSH tunnel + // from local gateway on a custom port — preserve "local" so deliveryContext is kept. setConfig({}); - expect(resolveGatewayTarget({ gatewayUrl: "ws://127.0.0.1:9000" })).toBe("remote"); + expect(resolveGatewayTarget({ gatewayUrl: "ws://127.0.0.1:9000" })).toBe("local"); }); - it("returns 'remote' for loopback explicit gatewayUrl on non-local port (SSH tunnel, mode=remote no remote.url)", () => { + it("returns 'local' for loopback explicit gatewayUrl on non-local port when mode=remote but no remote.url (no tunnel evidence)", () => { + // mode=remote without a non-loopback remote.url cannot prove SSH tunnel; treat as local. setConfig({ gateway: { mode: "remote" } }); - expect(resolveGatewayTarget({ gatewayUrl: "ws://localhost:9000" })).toBe("remote"); + expect(resolveGatewayTarget({ gatewayUrl: "ws://localhost:9000" })).toBe("local"); + }); + + it("returns 'remote' for loopback explicit gatewayUrl on non-local port when mode=remote with non-loopback remote.url (SSH tunnel)", () => { + // With a non-loopback remote URL configured, a custom-port loopback is unambiguously + // an SSH tunnel endpoint — classify as "remote" to suppress deliveryContext. + setConfig({ gateway: { mode: "remote", remote: { url: "wss://remote.example.com" } } }); + expect(resolveGatewayTarget({ gatewayUrl: "ws://127.0.0.1:9000" })).toBe("remote"); }); }); diff --git a/src/agents/tools/gateway.ts b/src/agents/tools/gateway.ts index e8c3728df67..84aa6101f40 100644 --- a/src/agents/tools/gateway.ts +++ b/src/agents/tools/gateway.ts @@ -122,16 +122,17 @@ function validateGatewayUrlOverrideForAgentTools(params: { if (remoteKey && parsed.key === remoteKey) { return { url: parsed.origin, target: "remote" }; } - // Loopback URL on a non-local port → must be an SSH tunnel endpoint → classify as remote. - // The `localAllowed` set only covers the configured gateway port. Any loopback on a different - // port cannot be the local gateway itself, so it must be a forwarded tunnel to a remote server. - // This handles cases where gateway.mode=local (or unset) but the user is SSH-forwarding - // via a non-default port: ssh -N -L :remote-host:. - // Classifying as "remote" suppresses deliveryContext so the remote gateway uses its own - // extractDeliveryInfo rather than receiving the caller's local chat route in the sentinel. + // Loopback URL on a non-configured port — could be either: + // (a) An SSH tunnel endpoint (ssh -N -L :remote-host:) → "remote" + // (b) A local gateway running on a custom/non-default port → "local" + // We can only distinguish (a) from (b) when a non-loopback remote URL is configured: + // that proves gateway.mode=remote with an external host, so a loopback URL on any port + // must be a forwarded tunnel. Without that evidence, treat the loopback as local so that + // deliveryContext is not suppressed and heartbeat wake-up routing stays correct. const urlForTunnelCheck = new URL(params.urlOverride.trim()); // already validated above if (isLoopbackHostname(urlForTunnelCheck.hostname)) { - return { url: parsed.origin, target: "remote" }; + const target = isNonLoopbackRemoteUrlConfigured(cfg) ? "remote" : "local"; + return { url: parsed.origin, target }; } throw new Error( [ @@ -208,19 +209,13 @@ export function resolveGatewayTarget(opts?: GatewayCallOptions): GatewayOverride const host = parsed.hostname.toLowerCase().replace(/^\[|\]$/g, ""); const isLoopback = host === "127.0.0.1" || host === "localhost" || host === "::1"; if (isLoopback) { - // Classify as "remote" when: - // (a) gateway.mode=remote with a non-loopback remote URL — the loopback is - // an SSH tunnel endpoint (ssh -N -L :remote-host:), OR - // (b) the loopback port differs from the configured local gateway port — a - // non-local-port loopback cannot be the local gateway, so it must be a tunnel, - // regardless of whether gateway.mode=remote is configured. - const localPort = resolveGatewayPort(cfg); - const envPort = parsed.port - ? Number(parsed.port) - : parsed.protocol === "wss:" - ? 443 - : 80; - if (envPort !== localPort || isNonLoopbackRemoteUrlConfigured(cfg)) { + // Classify as "remote" only when a non-loopback remote URL is configured, + // which proves the loopback is an SSH tunnel endpoint + // (ssh -N -L :remote-host:). Without that evidence + // a loopback URL on any port — including a non-default port — could be a + // local gateway on a custom port, so we preserve "local" classification to + // keep deliveryContext intact and avoid heartbeat-stale routing regressions. + if (isNonLoopbackRemoteUrlConfigured(cfg)) { return "remote"; } return "local"; From 3e1bee931a87d7c87288aa004c98d9f9973533de Mon Sep 17 00:00:00 2001 From: Bryan Marty Date: Wed, 11 Mar 2026 04:38:00 +0000 Subject: [PATCH 22/25] feat: split restart sentinel into user-facing and internal context messages - Add formatRestartSentinelUserMessage for clean user-facing delivery (omits status prefix and doctorHint) - Add formatRestartSentinelInternalContext for agent system prompt injection with full diagnostic details - Update server-restart-sentinel to use userMessage for direct channel delivery - Add tests for new formatting functions --- docs/reference/templates/AGENTS.md | 31 ++++++ docs/zh-CN/reference/AGENTS.default.md | 23 +++++ src/agents/tools/gateway-tool.ts | 2 +- src/gateway/server-restart-sentinel.test.ts | 23 +++-- src/gateway/server-restart-sentinel.ts | 25 +++-- src/infra/restart-sentinel.test.ts | 109 +++++++++++++++++++- src/infra/restart-sentinel.ts | 41 ++++++++ 7 files changed, 238 insertions(+), 16 deletions(-) diff --git a/docs/reference/templates/AGENTS.md b/docs/reference/templates/AGENTS.md index 9375684b0dd..216a52cdbbe 100644 --- a/docs/reference/templates/AGENTS.md +++ b/docs/reference/templates/AGENTS.md @@ -214,6 +214,37 @@ Think of it like a human reviewing their journal and updating their mental model The goal: Be helpful without being annoying. Check in a few times a day, do useful background work, but respect quiet time. +## 🔄 Gateway Restarts — Do It Right! + +**Never use `openclaw gateway restart` (CLI/shell).** It bypasses the restart sentinel, so you won't auto-resume or notify the user after restart. You'll just sit there silently until someone pings you. + +**Always restart via the gateway tool** (action=restart) or via `config.patch`/`config.apply` — these write a sentinel file before restarting, which the new process consumes to wake you up and message the user automatically. + +```bash +# ✅ Correct: restart via gateway tool (action=restart, sessionKey, note) +# ✅ Correct: config.patch with a key that requires restart (writes sentinel automatically) + +# ❌ Wrong: openclaw gateway restart — no sentinel, silent after restart +# ❌ Wrong: systemctl --user restart openclaw-gateway.service — same problem +``` + +### Which config keys trigger a real restart vs dynamic reload? + +**Full process restart** (sentinel written, agent wakes up): + +- `gateway.*`, `discovery.*`, `plugins.*`, `canvasHost.*` +- Any unrecognized/new config key + +**Hot reload** (no restart, no sentinel needed): + +- `hooks.*`, `cron.*`, `browser.*`, `models.*`, `agents.defaults.heartbeat` + +**Dynamic no-op** (read on next access, no process action): + +- `messages.*`, `agents.*`, `tools.*`, `routing.*`, `session.*`, `skills.*`, `secrets.*`, `meta.*`, `identity.*`, `logging.*`, `ui.*` + +**Rule of thumb:** If you want a test restart, patch `discovery.mdns.mode` to its current value — it's recognized as a restart-triggering key even if the value is unchanged. + ## Make It Yours This is a starting point. Add your own conventions, style, and rules as you figure out what works. diff --git a/docs/zh-CN/reference/AGENTS.default.md b/docs/zh-CN/reference/AGENTS.default.md index 84d4a01e21c..70e6688a374 100644 --- a/docs/zh-CN/reference/AGENTS.default.md +++ b/docs/zh-CN/reference/AGENTS.default.md @@ -120,6 +120,29 @@ git commit -m "Add Clawd workspace" - **bird** — X/Twitter CLI,无需浏览器即可发推、回复、阅读话题和搜索。 - **agent-tools** — 用于自动化和辅助脚本的实用工具包。 +## 🔄 网关重启 — 正确做法! + +**永远不要使用 `openclaw gateway restart`(CLI/shell)。** 这会绕过重启哨兵机制,导致重启后你无法自动恢复,也无法通知用户。你会静静地等待,直到有人 ping 你。 + +**始终通过 gateway 工具**(action=restart)或 `config.patch`/`config.apply` 触发重启——这些方式会在重启前写入哨兵文件,新进程启动后会消费该文件以唤醒你并自动通知用户。 + +### 哪些配置键会触发真正的重启? + +**完整进程重启**(写入哨兵,代理唤醒): + +- `gateway.*`、`discovery.*`、`plugins.*`、`canvasHost.*` +- 任何无法识别的新配置键 + +**热重载**(无需重启,无需哨兵): + +- `hooks.*`、`cron.*`、`browser.*`、`models.*`、`agents.defaults.heartbeat` + +**动态无操作**(下次访问时读取,不触发任何进程操作): + +- `messages.*`、`agents.*`、`tools.*`、`routing.*`、`session.*`、`skills.*`、`secrets.*`、`meta.*` + +**经验法则:** 如果需要测试重启,将 `discovery.mdns.mode` 修改为当前值——即使值未改变,它也会触发重启流程。 + ## 使用说明 - 脚本编写优先使用 `openclaw` CLI;mac 应用处理权限。 diff --git a/src/agents/tools/gateway-tool.ts b/src/agents/tools/gateway-tool.ts index ca125deb6a6..e49f73c57b5 100644 --- a/src/agents/tools/gateway-tool.ts +++ b/src/agents/tools/gateway-tool.ts @@ -80,7 +80,7 @@ export function createGatewayTool(opts?: { name: "gateway", ownerOnly: true, description: - "Restart, inspect a specific config schema path, apply config, or update the gateway in-place (SIGUSR1). Use config.schema.lookup with a targeted dot path before config edits. Use config.patch for safe partial config updates (merges with existing). Use config.apply only when replacing entire config. Both trigger restart after writing. Always pass a human-readable completion message via the `note` parameter so the system can deliver it to the user after restart.", + "Restart, inspect a specific config schema path, apply config, or update the gateway in-place (SIGUSR1). Use config.schema.lookup with a targeted dot path before config edits. Use config.patch for safe partial config updates (merges with existing). Use config.apply only when replacing entire config. Both trigger restart after writing. Always pass a human-readable completion message via the `note` parameter so the system can deliver it to the user after restart. IMPORTANT: Never use the `openclaw gateway restart` CLI command to restart — it bypasses the restart sentinel so the agent will not auto-resume or notify the user after restart. Always restart via this tool (action=restart) or via config.patch/config.apply, which write the sentinel before restarting. Config keys under gateway.*, discovery.*, plugins.*, and canvasHost.* trigger a real process restart; keys under messages.*, agents.*, tools.*, hooks.*, and most others apply dynamically without a restart.", parameters: GatewayToolSchema, execute: async (_toolCallId, args) => { const params = args as Record; diff --git a/src/gateway/server-restart-sentinel.test.ts b/src/gateway/server-restart-sentinel.test.ts index 008437bb037..87f3b8cee1f 100644 --- a/src/gateway/server-restart-sentinel.test.ts +++ b/src/gateway/server-restart-sentinel.test.ts @@ -13,6 +13,10 @@ const mocks = vi.hoisted(() => ({ }, })), formatRestartSentinelMessage: vi.fn(() => "restart message"), + formatRestartSentinelUserMessage: vi.fn(() => "Gateway restarted successfully."), + formatRestartSentinelInternalContext: vi.fn( + () => "[Gateway restart context — internal]\nkind: restart\nstatus: ok", + ), summarizeRestartSentinel: vi.fn(() => "restart summary"), resolveMainSessionKeyFromConfig: vi.fn(() => "agent:main:main"), parseSessionThreadInfo: vi.fn(() => ({ baseSessionKey: null, threadId: undefined })), @@ -39,6 +43,8 @@ vi.mock("../agents/agent-scope.js", () => ({ vi.mock("../infra/restart-sentinel.js", () => ({ consumeRestartSentinel: mocks.consumeRestartSentinel, formatRestartSentinelMessage: mocks.formatRestartSentinelMessage, + formatRestartSentinelUserMessage: mocks.formatRestartSentinelUserMessage, + formatRestartSentinelInternalContext: mocks.formatRestartSentinelInternalContext, summarizeRestartSentinel: mocks.summarizeRestartSentinel, })); @@ -105,21 +111,22 @@ describe("scheduleRestartSentinelWake – two-step delivery + resume", () => { it("delivers restart notice directly (model-independent) then resumes agent", async () => { await scheduleRestartSentinelWake({ deps: {} as never }); - // Step 1: deterministic delivery + // Step 1: deterministic delivery — uses human-friendly userMessage, not raw diagnostic expect(mocks.deliverOutboundPayloads).toHaveBeenCalledWith( expect.objectContaining({ channel: "whatsapp", to: "+15550002", accountId: "acct-2", - payloads: [{ text: "restart message" }], + payloads: [{ text: "Gateway restarted successfully." }], bestEffort: true, }), ); - // Step 2: agent resume + // Step 2: agent resume — userMessage as prompt, internalContext via extraSystemPrompt expect(mocks.agentCommand).toHaveBeenCalledWith( expect.objectContaining({ - message: "restart message", + message: "Gateway restarted successfully.", + extraSystemPrompt: expect.stringContaining("[Gateway restart context"), sessionKey: "agent:main:main", to: "+15550002", channel: "whatsapp", @@ -190,7 +197,7 @@ describe("scheduleRestartSentinelWake – fallback to enqueueSystemEvent", () => await scheduleRestartSentinelWake({ deps: {} as never }); expect(mocks.agentCommand).not.toHaveBeenCalled(); - expect(mocks.enqueueSystemEvent).toHaveBeenCalledWith("restart message", { + expect(mocks.enqueueSystemEvent).toHaveBeenCalledWith("Gateway restarted successfully.", { sessionKey: "agent:main:main", }); }); @@ -204,7 +211,7 @@ describe("scheduleRestartSentinelWake – fallback to enqueueSystemEvent", () => await scheduleRestartSentinelWake({ deps: {} as never }); expect(mocks.agentCommand).not.toHaveBeenCalled(); - expect(mocks.enqueueSystemEvent).toHaveBeenCalledWith("restart message", { + expect(mocks.enqueueSystemEvent).toHaveBeenCalledWith("Gateway restarted successfully.", { sessionKey: "agent:main:main", }); }); @@ -218,7 +225,7 @@ describe("scheduleRestartSentinelWake – fallback to enqueueSystemEvent", () => await scheduleRestartSentinelWake({ deps: {} as never }); expect(mocks.agentCommand).not.toHaveBeenCalled(); - expect(mocks.enqueueSystemEvent).toHaveBeenCalledWith("restart message", { + expect(mocks.enqueueSystemEvent).toHaveBeenCalledWith("Gateway restarted successfully.", { sessionKey: "agent:main:main", }); }); @@ -254,7 +261,7 @@ describe("scheduleRestartSentinelWake – fallback to enqueueSystemEvent", () => await scheduleRestartSentinelWake({ deps: {} as never }); - expect(mocks.enqueueSystemEvent).toHaveBeenCalledWith("restart message", { + expect(mocks.enqueueSystemEvent).toHaveBeenCalledWith("Gateway restarted successfully.", { sessionKey: "agent:main:main", }); // Resume step must still run after delivery failure diff --git a/src/gateway/server-restart-sentinel.ts b/src/gateway/server-restart-sentinel.ts index a8e503532c6..74f9268cdb4 100644 --- a/src/gateway/server-restart-sentinel.ts +++ b/src/gateway/server-restart-sentinel.ts @@ -9,7 +9,9 @@ import { buildOutboundSessionContext } from "../infra/outbound/session-context.j import { resolveOutboundTarget } from "../infra/outbound/targets.js"; import { consumeRestartSentinel, + formatRestartSentinelInternalContext, formatRestartSentinelMessage, + formatRestartSentinelUserMessage, summarizeRestartSentinel, } from "../infra/restart-sentinel.js"; import { enqueueSystemEvent } from "../infra/system-events.js"; @@ -24,7 +26,12 @@ export async function scheduleRestartSentinelWake(params: { deps: CliDeps }) { } const payload = sentinel.payload; const sessionKey = payload.sessionKey?.trim(); + // Raw diagnostic message (used for system events and enqueue fallbacks). const message = formatRestartSentinelMessage(payload); + // Human-friendly message for direct user delivery — omits status prefix and doctorHint. + const userMessage = formatRestartSentinelUserMessage(payload); + // Full technical context injected into the agent's system prompt. + const internalContext = formatRestartSentinelInternalContext(payload); const summary = summarizeRestartSentinel(payload); if (!sessionKey) { @@ -56,7 +63,7 @@ export async function scheduleRestartSentinelWake(params: { deps: CliDeps }) { const channel = channelRaw ? normalizeChannelId(channelRaw) : null; const to = origin?.to; if (!channel || !to) { - enqueueSystemEvent(message, { sessionKey }); + enqueueSystemEvent(userMessage, { sessionKey }); return; } @@ -68,7 +75,7 @@ export async function scheduleRestartSentinelWake(params: { deps: CliDeps }) { mode: "implicit", }); if (!resolved.ok) { - enqueueSystemEvent(message, { sessionKey }); + enqueueSystemEvent(userMessage, { sessionKey }); return; } @@ -78,7 +85,9 @@ export async function scheduleRestartSentinelWake(params: { deps: CliDeps }) { sessionThreadId ?? (origin?.threadId != null ? String(origin.threadId) : undefined); - // Step 1: deliver the restart notice deterministically — model-independent, guaranteed. + // Step 1: deliver a human-friendly restart notice deterministically — model-independent, + // guaranteed. Uses userMessage (omits raw diagnostic fields like status prefix and + // doctorHint) so the user sees a clean message even if the agent turn in Step 2 fails. // Slack uses replyToId (thread_ts) for threading; deliverOutboundPayloads does not do // this mapping automatically, so we convert here. See #17716. const isSlack = channel === "slack"; @@ -93,7 +102,7 @@ export async function scheduleRestartSentinelWake(params: { deps: CliDeps }) { accountId: origin?.accountId, replyToId, threadId: resolvedThreadId, - payloads: [{ text: message }], + payloads: [{ text: userMessage }], session: outboundSession, bestEffort: true, }); @@ -102,18 +111,22 @@ export async function scheduleRestartSentinelWake(params: { deps: CliDeps }) { // If it does throw (channel plugin/runtime error before best-effort handling is applied), // enqueue a system event so the user receives the restart notice even on delivery failure. // This preserves the prior behaviour where delivery errors in this path produced a fallback event. - enqueueSystemEvent(message, { sessionKey }); + enqueueSystemEvent(userMessage, { sessionKey }); } // Step 2: trigger an agent resume turn so the agent can continue autonomously // after restart. The model sees the restart context and can respond/take actions. + // internalContext is injected via extraSystemPrompt so the agent has full technical + // details (kind, status, note, doctorHint) without exposing raw diagnostics as a + // user-visible chat message. The agent's reply is what the user ultimately sees. // This is safe post-restart: scheduleRestartSentinelWake() runs in the new process // with zero in-flight replies, so the pre-restart race condition (ab4a08a82) does // not apply here. try { await agentCommand( { - message, + message: userMessage, + extraSystemPrompt: internalContext, sessionKey, to: resolved.to, channel, diff --git a/src/infra/restart-sentinel.test.ts b/src/infra/restart-sentinel.test.ts index c28504685bb..1f5df033341 100644 --- a/src/infra/restart-sentinel.test.ts +++ b/src/infra/restart-sentinel.test.ts @@ -5,8 +5,9 @@ import { afterEach, beforeEach, describe, expect, it } from "vitest"; import { captureEnv } from "../test-utils/env.js"; import { consumeRestartSentinel, - formatDoctorNonInteractiveHint, + formatRestartSentinelInternalContext, formatRestartSentinelMessage, + formatRestartSentinelUserMessage, readRestartSentinel, resolveRestartSentinelPath, summarizeRestartSentinel, @@ -160,6 +161,112 @@ describe("restart sentinel", () => { }); }); +describe("formatRestartSentinelUserMessage", () => { + it("returns note for successful restart with note", () => { + const payload = { + kind: "config-patch" as const, + status: "ok" as const, + ts: Date.now(), + message: "testing restart sentinel", + doctorHint: "Run: openclaw doctor --non-interactive", + }; + const result = formatRestartSentinelUserMessage(payload); + expect(result).toBe("testing restart sentinel"); + expect(result).not.toContain("Gateway restart"); + expect(result).not.toContain("config-patch"); + expect(result).not.toContain("doctor"); + }); + + it("returns generic success message when no note", () => { + const payload = { + kind: "update" as const, + status: "ok" as const, + ts: Date.now(), + }; + expect(formatRestartSentinelUserMessage(payload)).toBe("Gateway restarted successfully."); + }); + + it("returns failure message with note for error status", () => { + const payload = { + kind: "config-apply" as const, + status: "error" as const, + ts: Date.now(), + message: "disk full", + }; + const result = formatRestartSentinelUserMessage(payload); + expect(result).toBe("Gateway restart failed: disk full"); + }); + + it("returns generic failure message for error without note", () => { + const payload = { + kind: "restart" as const, + status: "error" as const, + ts: Date.now(), + }; + expect(formatRestartSentinelUserMessage(payload)).toBe("Gateway restart failed."); + }); + + it("never includes doctorHint", () => { + const payload = { + kind: "config-patch" as const, + status: "ok" as const, + ts: Date.now(), + message: "applied config", + doctorHint: "Run: openclaw doctor --non-interactive", + }; + expect(formatRestartSentinelUserMessage(payload)).not.toContain("doctor"); + expect(formatRestartSentinelUserMessage(payload)).not.toContain("openclaw"); + }); +}); + +describe("formatRestartSentinelInternalContext", () => { + it("includes kind, status, note, and doctorHint", () => { + const payload = { + kind: "config-patch" as const, + status: "ok" as const, + ts: Date.now(), + message: "testing restart sentinel", + doctorHint: "Run: openclaw doctor --non-interactive", + stats: { mode: "gateway.config-patch", reason: "discovery.mdns.mode changed" }, + }; + const result = formatRestartSentinelInternalContext(payload); + expect(result).toContain("kind: config-patch"); + expect(result).toContain("status: ok"); + expect(result).toContain("note: testing restart sentinel"); + expect(result).toContain("hint: Run: openclaw doctor"); + expect(result).toContain("mode: gateway.config-patch"); + expect(result).toContain("reason: discovery.mdns.mode changed"); + expect(result).toContain("internal"); + }); + + it("omits empty optional fields", () => { + const payload = { + kind: "restart" as const, + status: "ok" as const, + ts: Date.now(), + }; + const result = formatRestartSentinelInternalContext(payload); + expect(result).not.toContain("note:"); + expect(result).not.toContain("hint:"); + expect(result).not.toContain("reason:"); + expect(result).not.toContain("mode:"); + }); + + it("omits reason when it duplicates note", () => { + const note = "Applying config changes"; + const payload = { + kind: "config-apply" as const, + status: "ok" as const, + ts: Date.now(), + message: note, + stats: { reason: note }, + }; + const result = formatRestartSentinelInternalContext(payload); + const noteOccurrences = result.split(note).length - 1; + expect(noteOccurrences).toBe(1); + }); +}); + describe("restart sentinel message dedup", () => { it("omits duplicate Reason: line when stats.reason matches message", () => { const payload = { diff --git a/src/infra/restart-sentinel.ts b/src/infra/restart-sentinel.ts index baf8168047d..3e2d0e9ad2e 100644 --- a/src/infra/restart-sentinel.ts +++ b/src/infra/restart-sentinel.ts @@ -127,6 +127,47 @@ export function formatRestartSentinelMessage(payload: RestartSentinelPayload): s return lines.join("\n"); } +/** + * Human-friendly message for direct user delivery after a gateway restart. + * Omits raw diagnostic fields (status prefix, doctorHint) — those belong in + * the agent's internal context, not in the user-facing chat message. + */ +export function formatRestartSentinelUserMessage(payload: RestartSentinelPayload): string { + const note = payload.message?.trim(); + if (payload.status === "error") { + return note ? `Gateway restart failed: ${note}` : "Gateway restart failed."; + } + return note ?? "Gateway restarted successfully."; +} + +/** + * Full technical restart context injected into the agent's system prompt so + * it can reason about and respond to the restart without exposing raw + * diagnostic text directly in the user-facing chat message. + */ +export function formatRestartSentinelInternalContext(payload: RestartSentinelPayload): string { + const lines: string[] = [ + "[Gateway restart context — internal, do not surface raw details to user]", + `kind: ${payload.kind}`, + `status: ${payload.status}`, + ]; + const note = payload.message?.trim(); + if (note) { + lines.push(`note: ${note}`); + } + const reason = payload.stats?.reason?.trim(); + if (reason && reason !== note) { + lines.push(`reason: ${reason}`); + } + if (payload.stats?.mode?.trim()) { + lines.push(`mode: ${payload.stats.mode.trim()}`); + } + if (payload.doctorHint?.trim()) { + lines.push(`hint: ${payload.doctorHint.trim()}`); + } + return lines.join("\n"); +} + export function summarizeRestartSentinel(payload: RestartSentinelPayload): string { const kind = payload.kind; const status = payload.status; From 3163756e106fbbbefb7998f1f1f59ba43f459e45 Mon Sep 17 00:00:00 2001 From: Bryan Marty Date: Wed, 11 Mar 2026 07:06:35 +0000 Subject: [PATCH 23/25] fix: stop leaking raw sentinel note to Discord channel Remove Step 1 direct delivery (deliverOutboundPayloads) from the restart sentinel wake path. Raw note/doctorHint/status fields were being sent verbatim to the user's Discord DMs on every gateway restart. New design: - Agent composes the user-visible response (via agentCommand + extraSystemPrompt) - formatRestartSentinelUserMessage now returns a clean generic message, never the raw note field - Fallback enqueueSystemEvent paths use the clean message - Raw context goes only into internalContext (extraSystemPrompt) for the agent Fixes notes like 'fix webhook path for Twilio funnel' appearing as bot messages in Bryan's DMs. Blocker for PR #34580. --- src/gateway/server-restart-sentinel.test.ts | 79 ++++++--------------- src/gateway/server-restart-sentinel.ts | 55 +++++--------- src/infra/restart-sentinel.test.ts | 14 ++-- src/infra/restart-sentinel.ts | 12 ++-- 4 files changed, 53 insertions(+), 107 deletions(-) diff --git a/src/gateway/server-restart-sentinel.test.ts b/src/gateway/server-restart-sentinel.test.ts index 87f3b8cee1f..d8fb903f161 100644 --- a/src/gateway/server-restart-sentinel.test.ts +++ b/src/gateway/server-restart-sentinel.test.ts @@ -100,32 +100,24 @@ vi.mock("../infra/system-events.js", () => ({ const { scheduleRestartSentinelWake } = await import("./server-restart-sentinel.js"); // ───────────────────────────────────────────────────────────────────────────── -// Suite 1 — Core two-step delivery + resume flow +// Suite 1 — Agent resume flow (no direct channel delivery) // ───────────────────────────────────────────────────────────────────────────── -describe("scheduleRestartSentinelWake – two-step delivery + resume", () => { +describe("scheduleRestartSentinelWake – agent resume, no raw delivery", () => { beforeEach(() => { vi.clearAllMocks(); }); - it("delivers restart notice directly (model-independent) then resumes agent", async () => { + it("resumes agent with internal context and never delivers raw sentinel fields to channel", async () => { await scheduleRestartSentinelWake({ deps: {} as never }); - // Step 1: deterministic delivery — uses human-friendly userMessage, not raw diagnostic - expect(mocks.deliverOutboundPayloads).toHaveBeenCalledWith( - expect.objectContaining({ - channel: "whatsapp", - to: "+15550002", - accountId: "acct-2", - payloads: [{ text: "Gateway restarted successfully." }], - bestEffort: true, - }), - ); + // Raw sentinel fields must NEVER go directly to the channel — no deliverOutboundPayloads call. + expect(mocks.deliverOutboundPayloads).not.toHaveBeenCalled(); - // Step 2: agent resume — userMessage as prompt, internalContext via extraSystemPrompt + // Agent resume: summary as neutral wake prompt, full context in extraSystemPrompt only. expect(mocks.agentCommand).toHaveBeenCalledWith( expect.objectContaining({ - message: "Gateway restarted successfully.", + message: "restart summary", extraSystemPrompt: expect.stringContaining("[Gateway restart context"), sessionKey: "agent:main:main", to: "+15550002", @@ -142,14 +134,6 @@ describe("scheduleRestartSentinelWake – two-step delivery + resume", () => { expect(mocks.enqueueSystemEvent).not.toHaveBeenCalled(); }); - it("delivers notification before triggering agent resume (ordering guarantee)", async () => { - await scheduleRestartSentinelWake({ deps: {} as never }); - - const deliverOrder = mocks.deliverOutboundPayloads.mock.invocationCallOrder[0]; - const agentOrder = mocks.agentCommand.mock.invocationCallOrder[0]; - expect(deliverOrder).toBeLessThan(agentOrder); - }); - it("passes senderIsOwner=false to agentCommand (no privilege escalation)", async () => { await scheduleRestartSentinelWake({ deps: {} as never }); @@ -230,43 +214,19 @@ describe("scheduleRestartSentinelWake – fallback to enqueueSystemEvent", () => }); }); - it("falls back to enqueueSystemEvent (with error) when agentCommand throws", async () => { + it("falls back to enqueueSystemEvent (with summary + error) when agentCommand throws", async () => { mocks.agentCommand.mockRejectedValueOnce(new Error("agent failed")); await scheduleRestartSentinelWake({ deps: {} as never }); - // Direct delivery should still have been attempted - expect(mocks.deliverOutboundPayloads).toHaveBeenCalled(); - // Then fallback for the resume + // No direct delivery — never. + expect(mocks.deliverOutboundPayloads).not.toHaveBeenCalled(); + // Fallback enqueues summary + error so the user isn't left silent. expect(mocks.enqueueSystemEvent).toHaveBeenCalledWith( expect.stringContaining("restart summary"), { sessionKey: "agent:main:main" }, ); }); - - it("still calls agentCommand even if deliverOutboundPayloads throws (bestEffort guard)", async () => { - // bestEffort: true means it shouldn't normally throw, but even if it does, resume continues - mocks.deliverOutboundPayloads.mockRejectedValueOnce(new Error("deliver failed")); - - await scheduleRestartSentinelWake({ deps: {} as never }); - - expect(mocks.agentCommand).toHaveBeenCalled(); - }); - - it("enqueues system event when deliverOutboundPayloads throws so user is not left silent", async () => { - // Guard: if the channel plugin throws before bestEffort handling kicks in, the catch block - // must enqueue a system event — otherwise the user receives neither the deterministic notice - // nor any fallback, regressing the prior behaviour. See PR #34580 CR comment 2909058539. - mocks.deliverOutboundPayloads.mockRejectedValueOnce(new Error("plugin error")); - - await scheduleRestartSentinelWake({ deps: {} as never }); - - expect(mocks.enqueueSystemEvent).toHaveBeenCalledWith("Gateway restarted successfully.", { - sessionKey: "agent:main:main", - }); - // Resume step must still run after delivery failure - expect(mocks.agentCommand).toHaveBeenCalled(); - }); }); // ───────────────────────────────────────────────────────────────────────────── @@ -278,7 +238,7 @@ describe("scheduleRestartSentinelWake – thread routing", () => { vi.clearAllMocks(); }); - it("maps threadId to replyToId and clears threadId for Slack deliverOutboundPayloads", async () => { + it("passes Slack threadId to agentCommand (Slack threading handled internally by agentCommand)", async () => { mocks.consumeRestartSentinel.mockResolvedValueOnce({ payload: { sessionKey: "agent:main:main", @@ -290,12 +250,13 @@ describe("scheduleRestartSentinelWake – thread routing", () => { await scheduleRestartSentinelWake({ deps: {} as never }); - const deliverArgs = getArg>(mocks.deliverOutboundPayloads, 0); - expect(deliverArgs.replyToId).toBe("1234567890.123456"); - expect(deliverArgs.threadId).toBeUndefined(); + // No direct delivery — agentCommand is the only delivery path. + expect(mocks.deliverOutboundPayloads).not.toHaveBeenCalled(); + const agentOpts = getArg>(mocks.agentCommand, 0); + expect(agentOpts.threadId).toBe("1234567890.123456"); }); - it("passes threadId directly (not replyToId) for non-Slack channels", async () => { + it("passes threadId directly for non-Slack channels", async () => { mocks.consumeRestartSentinel.mockResolvedValueOnce({ payload: { sessionKey: "agent:main:main", @@ -306,9 +267,9 @@ describe("scheduleRestartSentinelWake – thread routing", () => { await scheduleRestartSentinelWake({ deps: {} as never }); - const deliverArgs = getArg>(mocks.deliverOutboundPayloads, 0); - expect(deliverArgs.threadId).toBe("discord-thread-id"); - expect(deliverArgs.replyToId).toBeUndefined(); + expect(mocks.deliverOutboundPayloads).not.toHaveBeenCalled(); + const agentOpts = getArg>(mocks.agentCommand, 0); + expect(agentOpts.threadId).toBe("discord-thread-id"); }); it("passes threadId to agentCommand for non-Slack threading", async () => { diff --git a/src/gateway/server-restart-sentinel.ts b/src/gateway/server-restart-sentinel.ts index 74f9268cdb4..95ce7d53049 100644 --- a/src/gateway/server-restart-sentinel.ts +++ b/src/gateway/server-restart-sentinel.ts @@ -4,8 +4,6 @@ import type { CliDeps } from "../cli/deps.js"; import { agentCommand } from "../commands/agent.js"; import { resolveMainSessionKeyFromConfig } from "../config/sessions.js"; import { parseSessionThreadInfo } from "../config/sessions/delivery-info.js"; -import { deliverOutboundPayloads } from "../infra/outbound/deliver.js"; -import { buildOutboundSessionContext } from "../infra/outbound/session-context.js"; import { resolveOutboundTarget } from "../infra/outbound/targets.js"; import { consumeRestartSentinel, @@ -85,47 +83,27 @@ export async function scheduleRestartSentinelWake(params: { deps: CliDeps }) { sessionThreadId ?? (origin?.threadId != null ? String(origin.threadId) : undefined); - // Step 1: deliver a human-friendly restart notice deterministically — model-independent, - // guaranteed. Uses userMessage (omits raw diagnostic fields like status prefix and - // doctorHint) so the user sees a clean message even if the agent turn in Step 2 fails. - // Slack uses replyToId (thread_ts) for threading; deliverOutboundPayloads does not do - // this mapping automatically, so we convert here. See #17716. - const isSlack = channel === "slack"; - const replyToId = isSlack && threadId != null && threadId !== "" ? String(threadId) : undefined; - const resolvedThreadId = isSlack ? undefined : threadId; - const outboundSession = buildOutboundSessionContext({ cfg, sessionKey }); - try { - await deliverOutboundPayloads({ - cfg, - channel, - to: resolved.to, - accountId: origin?.accountId, - replyToId, - threadId: resolvedThreadId, - payloads: [{ text: userMessage }], - session: outboundSession, - bestEffort: true, - }); - } catch { - // bestEffort: true means this should not throw, but guard anyway. - // If it does throw (channel plugin/runtime error before best-effort handling is applied), - // enqueue a system event so the user receives the restart notice even on delivery failure. - // This preserves the prior behaviour where delivery errors in this path produced a fallback event. - enqueueSystemEvent(userMessage, { sessionKey }); - } - - // Step 2: trigger an agent resume turn so the agent can continue autonomously - // after restart. The model sees the restart context and can respond/take actions. - // internalContext is injected via extraSystemPrompt so the agent has full technical - // details (kind, status, note, doctorHint) without exposing raw diagnostics as a - // user-visible chat message. The agent's reply is what the user ultimately sees. + // Trigger an agent resume turn so the agent can compose a natural response and + // continue autonomously after restart. The restart context is injected via + // extraSystemPrompt — the raw note, doctorHint, and status fields are NEVER + // sent directly to the channel. Only the agent's composed reply reaches the user. + // + // summary is used as the neutral wake prompt (e.g. "Gateway restart config-patch ok"). + // It is an internal technical label; the agent sees it but users do not — only the + // agent's response is delivered. + // // This is safe post-restart: scheduleRestartSentinelWake() runs in the new process // with zero in-flight replies, so the pre-restart race condition (ab4a08a82) does // not apply here. + // + // Explicitly set senderIsOwner: false. The restart wake runs in a new process after + // an operator-triggered restart, and we cannot reliably infer the original sender's + // authorization level. Defaulting to false prevents privilege escalation where any + // restarted session would inherit owner-level access. See #18612. try { await agentCommand( { - message: userMessage, + message: summary, extraSystemPrompt: internalContext, sessionKey, to: resolved.to, @@ -135,11 +113,14 @@ export async function scheduleRestartSentinelWake(params: { deps: CliDeps }) { messageChannel: channel, threadId, accountId: origin?.accountId, + senderIsOwner: false, }, defaultRuntime, params.deps, ); } catch (err) { + // Agent failed — fall back to a clean restart notice without raw sentinel fields + // so the user isn't left completely silent after a restart. enqueueSystemEvent(`${summary}\n${String(err)}`, { sessionKey }); } } diff --git a/src/infra/restart-sentinel.test.ts b/src/infra/restart-sentinel.test.ts index 1f5df033341..8816376624c 100644 --- a/src/infra/restart-sentinel.test.ts +++ b/src/infra/restart-sentinel.test.ts @@ -162,7 +162,9 @@ describe("restart sentinel", () => { }); describe("formatRestartSentinelUserMessage", () => { - it("returns note for successful restart with note", () => { + it("returns generic success message regardless of note (note is internal only)", () => { + // The `note`/`message` field is an operator annotation — it must never be surfaced + // directly to the user. Only the agent (via internalContext) should see it. const payload = { kind: "config-patch" as const, status: "ok" as const, @@ -171,8 +173,8 @@ describe("formatRestartSentinelUserMessage", () => { doctorHint: "Run: openclaw doctor --non-interactive", }; const result = formatRestartSentinelUserMessage(payload); - expect(result).toBe("testing restart sentinel"); - expect(result).not.toContain("Gateway restart"); + expect(result).toBe("Gateway restarted successfully."); + expect(result).not.toContain("testing restart sentinel"); expect(result).not.toContain("config-patch"); expect(result).not.toContain("doctor"); }); @@ -186,7 +188,8 @@ describe("formatRestartSentinelUserMessage", () => { expect(formatRestartSentinelUserMessage(payload)).toBe("Gateway restarted successfully."); }); - it("returns failure message with note for error status", () => { + it("returns generic failure message for error status (note is internal only)", () => { + // Raw note must not appear in user-facing fallback even on error. const payload = { kind: "config-apply" as const, status: "error" as const, @@ -194,7 +197,8 @@ describe("formatRestartSentinelUserMessage", () => { message: "disk full", }; const result = formatRestartSentinelUserMessage(payload); - expect(result).toBe("Gateway restart failed: disk full"); + expect(result).toBe("Gateway restart failed."); + expect(result).not.toContain("disk full"); }); it("returns generic failure message for error without note", () => { diff --git a/src/infra/restart-sentinel.ts b/src/infra/restart-sentinel.ts index 3e2d0e9ad2e..3feb660088b 100644 --- a/src/infra/restart-sentinel.ts +++ b/src/infra/restart-sentinel.ts @@ -128,16 +128,16 @@ export function formatRestartSentinelMessage(payload: RestartSentinelPayload): s } /** - * Human-friendly message for direct user delivery after a gateway restart. - * Omits raw diagnostic fields (status prefix, doctorHint) — those belong in - * the agent's internal context, not in the user-facing chat message. + * Clean fallback message for system-event delivery when the agent cannot be woken. + * Never includes the raw `note`/`message` field — that belongs in the agent's + * internal context only (see formatRestartSentinelInternalContext). The note is + * an operator annotation, not a user-facing string. */ export function formatRestartSentinelUserMessage(payload: RestartSentinelPayload): string { - const note = payload.message?.trim(); if (payload.status === "error") { - return note ? `Gateway restart failed: ${note}` : "Gateway restart failed."; + return "Gateway restart failed."; } - return note ?? "Gateway restarted successfully."; + return "Gateway restarted successfully."; } /** From b588f04a029d7da5eb74db9690fe0c79bbf79811 Mon Sep 17 00:00:00 2001 From: Bryan Marty Date: Wed, 11 Mar 2026 07:44:15 +0000 Subject: [PATCH 24/25] fix: distinguish skipped restarts from successful restarts in user message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit formatRestartSentinelUserMessage now returns a distinct message for status=skipped instead of falling through to the success message. Previously, skipped updates (e.g. update.run with nothing to update) would be reported as 'Gateway restarted successfully.' — misleading operators into thinking a restart occurred when it did not. Adds two new tests for the skipped status case. --- src/infra/restart-sentinel.test.ts | 23 +++++++++++++++++++++++ src/infra/restart-sentinel.ts | 3 +++ 2 files changed, 26 insertions(+) diff --git a/src/infra/restart-sentinel.test.ts b/src/infra/restart-sentinel.test.ts index 8816376624c..e69cdfb5baf 100644 --- a/src/infra/restart-sentinel.test.ts +++ b/src/infra/restart-sentinel.test.ts @@ -210,6 +210,29 @@ describe("formatRestartSentinelUserMessage", () => { expect(formatRestartSentinelUserMessage(payload)).toBe("Gateway restart failed."); }); + it("returns skipped message for skipped status", () => { + const payload = { + kind: "update" as const, + status: "skipped" as const, + ts: Date.now(), + }; + expect(formatRestartSentinelUserMessage(payload)).toBe( + "Gateway restart skipped (no restart was performed).", + ); + }); + + it("returns skipped message for skipped status even with a note", () => { + const payload = { + kind: "update" as const, + status: "skipped" as const, + ts: Date.now(), + message: "update already up to date", + }; + const result = formatRestartSentinelUserMessage(payload); + expect(result).toBe("Gateway restart skipped (no restart was performed)."); + expect(result).not.toContain("already up to date"); + }); + it("never includes doctorHint", () => { const payload = { kind: "config-patch" as const, diff --git a/src/infra/restart-sentinel.ts b/src/infra/restart-sentinel.ts index 3feb660088b..c15e2aee7cb 100644 --- a/src/infra/restart-sentinel.ts +++ b/src/infra/restart-sentinel.ts @@ -137,6 +137,9 @@ export function formatRestartSentinelUserMessage(payload: RestartSentinelPayload if (payload.status === "error") { return "Gateway restart failed."; } + if (payload.status === "skipped") { + return "Gateway restart skipped (no restart was performed)."; + } return "Gateway restarted successfully."; } From 8b91aded4820c035e3008de0db58a920d1c3547f Mon Sep 17 00:00:00 2001 From: Bryan Marty Date: Fri, 13 Mar 2026 13:45:11 +0000 Subject: [PATCH 25/25] fix: classify loopback gateway.remote.url targets as local When gateway.mode=remote is configured with a loopback remote.url (e.g. ws://127.0.0.1:18789), resolveGatewayTarget now returns undefined (local) instead of 'remote'. A loopback address is indistinguishable from a local gateway on a custom port; without a non-loopback URL proving SSH tunnel usage, classifying it as remote suppresses deliveryContext in createGatewayTool, causing dropped/misrouted post-restart wake messages via stale extractDeliveryInfo routing. The config-based path now uses isNonLoopbackRemoteUrlConfigured (consistent with the override path) so both paths apply the same loopback-detection logic. Fixes: chatgpt-codex-connector CR on PR #34580 (comment 2930418168) --- src/agents/tools/gateway.test.ts | 9 +++++++++ src/agents/tools/gateway.ts | 12 ++++++------ 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/src/agents/tools/gateway.test.ts b/src/agents/tools/gateway.test.ts index 8f12e33c462..9f0d16eb361 100644 --- a/src/agents/tools/gateway.test.ts +++ b/src/agents/tools/gateway.test.ts @@ -75,6 +75,15 @@ describe("resolveGatewayTarget – env URL override classification", () => { expect(resolveGatewayTarget()).toBeUndefined(); }); + it("returns undefined (local) when gateway.mode=remote with a loopback remote.url (no tunnel evidence)", () => { + // A configured loopback remote.url (e.g. ws://127.0.0.1:18789) is indistinguishable + // from a local gateway on a custom port. Without a non-loopback URL proving SSH tunnel + // usage, classify as local so deliveryContext is preserved and post-restart wake + // messages are not misrouted via stale extractDeliveryInfo routing. + setConfig({ gateway: { mode: "remote", remote: { url: "ws://127.0.0.1:18789" } } }); + expect(resolveGatewayTarget()).toBeUndefined(); + }); + it("returns undefined when gateway.mode=remote but gateway.remote.url is empty string", () => { setConfig({ gateway: { mode: "remote", remote: { url: " " } } }); expect(resolveGatewayTarget()).toBeUndefined(); diff --git a/src/agents/tools/gateway.ts b/src/agents/tools/gateway.ts index 84aa6101f40..613f57343e3 100644 --- a/src/agents/tools/gateway.ts +++ b/src/agents/tools/gateway.ts @@ -226,12 +226,12 @@ export function resolveGatewayTarget(opts?: GatewayCallOptions): GatewayOverride } } } - // No env override. When mode=remote with a configured remote URL → truly remote. - // When mode=remote but remote.url is absent, callGateway falls back to local loopback — - // classify that as local (undefined) so deliveryContext is not suppressed. - const remoteUrl = - cfg.gateway?.mode === "remote" ? trimToUndefined(cfg.gateway?.remote?.url) : undefined; - return cfg.gateway?.mode === "remote" && remoteUrl !== undefined ? "remote" : undefined; + // No env override. Classify as "remote" only when mode=remote is configured with a + // non-loopback remote URL. Loopback remote.url (e.g. ws://127.0.0.1:18789) is + // indistinguishable from a local gateway on a custom port; without a non-loopback + // URL proving SSH tunnel usage, treat it as local so deliveryContext is preserved + // and post-restart wake messages are not misrouted. + return isNonLoopbackRemoteUrlConfigured(cfg) ? "remote" : undefined; } return validateGatewayUrlOverrideForAgentTools({ cfg,