fix: stop leaking raw sentinel note to Discord channel

Remove Step 1 direct delivery (deliverOutboundPayloads) from the restart
sentinel wake path. Raw note/doctorHint/status fields were being sent
verbatim to the user's Discord DMs on every gateway restart.

New design:
- Agent composes the user-visible response (via agentCommand + extraSystemPrompt)
- formatRestartSentinelUserMessage now returns a clean generic message,
  never the raw note field
- Fallback enqueueSystemEvent paths use the clean message
- Raw context goes only into internalContext (extraSystemPrompt) for the agent

Fixes notes like 'fix webhook path for Twilio funnel' appearing as bot
messages in Bryan's DMs. Blocker for PR #34580.
This commit is contained in:
Bryan Marty 2026-03-11 07:06:35 +00:00
parent 3e1bee931a
commit 3163756e10
No known key found for this signature in database
4 changed files with 53 additions and 107 deletions

View File

@ -100,32 +100,24 @@ vi.mock("../infra/system-events.js", () => ({
const { scheduleRestartSentinelWake } = await import("./server-restart-sentinel.js");
// ─────────────────────────────────────────────────────────────────────────────
// Suite 1 — Core two-step delivery + resume flow
// Suite 1 — Agent resume flow (no direct channel delivery)
// ─────────────────────────────────────────────────────────────────────────────
describe("scheduleRestartSentinelWake two-step delivery + resume", () => {
describe("scheduleRestartSentinelWake agent resume, no raw delivery", () => {
beforeEach(() => {
vi.clearAllMocks();
});
it("delivers restart notice directly (model-independent) then resumes agent", async () => {
it("resumes agent with internal context and never delivers raw sentinel fields to channel", async () => {
await scheduleRestartSentinelWake({ deps: {} as never });
// Step 1: deterministic delivery — uses human-friendly userMessage, not raw diagnostic
expect(mocks.deliverOutboundPayloads).toHaveBeenCalledWith(
expect.objectContaining({
channel: "whatsapp",
to: "+15550002",
accountId: "acct-2",
payloads: [{ text: "Gateway restarted successfully." }],
bestEffort: true,
}),
);
// Raw sentinel fields must NEVER go directly to the channel — no deliverOutboundPayloads call.
expect(mocks.deliverOutboundPayloads).not.toHaveBeenCalled();
// Step 2: agent resume — userMessage as prompt, internalContext via extraSystemPrompt
// Agent resume: summary as neutral wake prompt, full context in extraSystemPrompt only.
expect(mocks.agentCommand).toHaveBeenCalledWith(
expect.objectContaining({
message: "Gateway restarted successfully.",
message: "restart summary",
extraSystemPrompt: expect.stringContaining("[Gateway restart context"),
sessionKey: "agent:main:main",
to: "+15550002",
@ -142,14 +134,6 @@ describe("scheduleRestartSentinelWake two-step delivery + resume", () => {
expect(mocks.enqueueSystemEvent).not.toHaveBeenCalled();
});
it("delivers notification before triggering agent resume (ordering guarantee)", async () => {
await scheduleRestartSentinelWake({ deps: {} as never });
const deliverOrder = mocks.deliverOutboundPayloads.mock.invocationCallOrder[0];
const agentOrder = mocks.agentCommand.mock.invocationCallOrder[0];
expect(deliverOrder).toBeLessThan(agentOrder);
});
it("passes senderIsOwner=false to agentCommand (no privilege escalation)", async () => {
await scheduleRestartSentinelWake({ deps: {} as never });
@ -230,43 +214,19 @@ describe("scheduleRestartSentinelWake fallback to enqueueSystemEvent", () =>
});
});
it("falls back to enqueueSystemEvent (with error) when agentCommand throws", async () => {
it("falls back to enqueueSystemEvent (with summary + error) when agentCommand throws", async () => {
mocks.agentCommand.mockRejectedValueOnce(new Error("agent failed"));
await scheduleRestartSentinelWake({ deps: {} as never });
// Direct delivery should still have been attempted
expect(mocks.deliverOutboundPayloads).toHaveBeenCalled();
// Then fallback for the resume
// No direct delivery — never.
expect(mocks.deliverOutboundPayloads).not.toHaveBeenCalled();
// Fallback enqueues summary + error so the user isn't left silent.
expect(mocks.enqueueSystemEvent).toHaveBeenCalledWith(
expect.stringContaining("restart summary"),
{ sessionKey: "agent:main:main" },
);
});
it("still calls agentCommand even if deliverOutboundPayloads throws (bestEffort guard)", async () => {
// bestEffort: true means it shouldn't normally throw, but even if it does, resume continues
mocks.deliverOutboundPayloads.mockRejectedValueOnce(new Error("deliver failed"));
await scheduleRestartSentinelWake({ deps: {} as never });
expect(mocks.agentCommand).toHaveBeenCalled();
});
it("enqueues system event when deliverOutboundPayloads throws so user is not left silent", async () => {
// Guard: if the channel plugin throws before bestEffort handling kicks in, the catch block
// must enqueue a system event — otherwise the user receives neither the deterministic notice
// nor any fallback, regressing the prior behaviour. See PR #34580 CR comment 2909058539.
mocks.deliverOutboundPayloads.mockRejectedValueOnce(new Error("plugin error"));
await scheduleRestartSentinelWake({ deps: {} as never });
expect(mocks.enqueueSystemEvent).toHaveBeenCalledWith("Gateway restarted successfully.", {
sessionKey: "agent:main:main",
});
// Resume step must still run after delivery failure
expect(mocks.agentCommand).toHaveBeenCalled();
});
});
// ─────────────────────────────────────────────────────────────────────────────
@ -278,7 +238,7 @@ describe("scheduleRestartSentinelWake thread routing", () => {
vi.clearAllMocks();
});
it("maps threadId to replyToId and clears threadId for Slack deliverOutboundPayloads", async () => {
it("passes Slack threadId to agentCommand (Slack threading handled internally by agentCommand)", async () => {
mocks.consumeRestartSentinel.mockResolvedValueOnce({
payload: {
sessionKey: "agent:main:main",
@ -290,12 +250,13 @@ describe("scheduleRestartSentinelWake thread routing", () => {
await scheduleRestartSentinelWake({ deps: {} as never });
const deliverArgs = getArg<Record<string, unknown>>(mocks.deliverOutboundPayloads, 0);
expect(deliverArgs.replyToId).toBe("1234567890.123456");
expect(deliverArgs.threadId).toBeUndefined();
// No direct delivery — agentCommand is the only delivery path.
expect(mocks.deliverOutboundPayloads).not.toHaveBeenCalled();
const agentOpts = getArg<Record<string, unknown>>(mocks.agentCommand, 0);
expect(agentOpts.threadId).toBe("1234567890.123456");
});
it("passes threadId directly (not replyToId) for non-Slack channels", async () => {
it("passes threadId directly for non-Slack channels", async () => {
mocks.consumeRestartSentinel.mockResolvedValueOnce({
payload: {
sessionKey: "agent:main:main",
@ -306,9 +267,9 @@ describe("scheduleRestartSentinelWake thread routing", () => {
await scheduleRestartSentinelWake({ deps: {} as never });
const deliverArgs = getArg<Record<string, unknown>>(mocks.deliverOutboundPayloads, 0);
expect(deliverArgs.threadId).toBe("discord-thread-id");
expect(deliverArgs.replyToId).toBeUndefined();
expect(mocks.deliverOutboundPayloads).not.toHaveBeenCalled();
const agentOpts = getArg<Record<string, unknown>>(mocks.agentCommand, 0);
expect(agentOpts.threadId).toBe("discord-thread-id");
});
it("passes threadId to agentCommand for non-Slack threading", async () => {

View File

@ -4,8 +4,6 @@ import type { CliDeps } from "../cli/deps.js";
import { agentCommand } from "../commands/agent.js";
import { resolveMainSessionKeyFromConfig } from "../config/sessions.js";
import { parseSessionThreadInfo } from "../config/sessions/delivery-info.js";
import { deliverOutboundPayloads } from "../infra/outbound/deliver.js";
import { buildOutboundSessionContext } from "../infra/outbound/session-context.js";
import { resolveOutboundTarget } from "../infra/outbound/targets.js";
import {
consumeRestartSentinel,
@ -85,47 +83,27 @@ export async function scheduleRestartSentinelWake(params: { deps: CliDeps }) {
sessionThreadId ??
(origin?.threadId != null ? String(origin.threadId) : undefined);
// Step 1: deliver a human-friendly restart notice deterministically — model-independent,
// guaranteed. Uses userMessage (omits raw diagnostic fields like status prefix and
// doctorHint) so the user sees a clean message even if the agent turn in Step 2 fails.
// Slack uses replyToId (thread_ts) for threading; deliverOutboundPayloads does not do
// this mapping automatically, so we convert here. See #17716.
const isSlack = channel === "slack";
const replyToId = isSlack && threadId != null && threadId !== "" ? String(threadId) : undefined;
const resolvedThreadId = isSlack ? undefined : threadId;
const outboundSession = buildOutboundSessionContext({ cfg, sessionKey });
try {
await deliverOutboundPayloads({
cfg,
channel,
to: resolved.to,
accountId: origin?.accountId,
replyToId,
threadId: resolvedThreadId,
payloads: [{ text: userMessage }],
session: outboundSession,
bestEffort: true,
});
} catch {
// bestEffort: true means this should not throw, but guard anyway.
// If it does throw (channel plugin/runtime error before best-effort handling is applied),
// enqueue a system event so the user receives the restart notice even on delivery failure.
// This preserves the prior behaviour where delivery errors in this path produced a fallback event.
enqueueSystemEvent(userMessage, { sessionKey });
}
// Step 2: trigger an agent resume turn so the agent can continue autonomously
// after restart. The model sees the restart context and can respond/take actions.
// internalContext is injected via extraSystemPrompt so the agent has full technical
// details (kind, status, note, doctorHint) without exposing raw diagnostics as a
// user-visible chat message. The agent's reply is what the user ultimately sees.
// Trigger an agent resume turn so the agent can compose a natural response and
// continue autonomously after restart. The restart context is injected via
// extraSystemPrompt — the raw note, doctorHint, and status fields are NEVER
// sent directly to the channel. Only the agent's composed reply reaches the user.
//
// summary is used as the neutral wake prompt (e.g. "Gateway restart config-patch ok").
// It is an internal technical label; the agent sees it but users do not — only the
// agent's response is delivered.
//
// This is safe post-restart: scheduleRestartSentinelWake() runs in the new process
// with zero in-flight replies, so the pre-restart race condition (ab4a08a82) does
// not apply here.
//
// Explicitly set senderIsOwner: false. The restart wake runs in a new process after
// an operator-triggered restart, and we cannot reliably infer the original sender's
// authorization level. Defaulting to false prevents privilege escalation where any
// restarted session would inherit owner-level access. See #18612.
try {
await agentCommand(
{
message: userMessage,
message: summary,
extraSystemPrompt: internalContext,
sessionKey,
to: resolved.to,
@ -135,11 +113,14 @@ export async function scheduleRestartSentinelWake(params: { deps: CliDeps }) {
messageChannel: channel,
threadId,
accountId: origin?.accountId,
senderIsOwner: false,
},
defaultRuntime,
params.deps,
);
} catch (err) {
// Agent failed — fall back to a clean restart notice without raw sentinel fields
// so the user isn't left completely silent after a restart.
enqueueSystemEvent(`${summary}\n${String(err)}`, { sessionKey });
}
}

View File

@ -162,7 +162,9 @@ describe("restart sentinel", () => {
});
describe("formatRestartSentinelUserMessage", () => {
it("returns note for successful restart with note", () => {
it("returns generic success message regardless of note (note is internal only)", () => {
// The `note`/`message` field is an operator annotation — it must never be surfaced
// directly to the user. Only the agent (via internalContext) should see it.
const payload = {
kind: "config-patch" as const,
status: "ok" as const,
@ -171,8 +173,8 @@ describe("formatRestartSentinelUserMessage", () => {
doctorHint: "Run: openclaw doctor --non-interactive",
};
const result = formatRestartSentinelUserMessage(payload);
expect(result).toBe("testing restart sentinel");
expect(result).not.toContain("Gateway restart");
expect(result).toBe("Gateway restarted successfully.");
expect(result).not.toContain("testing restart sentinel");
expect(result).not.toContain("config-patch");
expect(result).not.toContain("doctor");
});
@ -186,7 +188,8 @@ describe("formatRestartSentinelUserMessage", () => {
expect(formatRestartSentinelUserMessage(payload)).toBe("Gateway restarted successfully.");
});
it("returns failure message with note for error status", () => {
it("returns generic failure message for error status (note is internal only)", () => {
// Raw note must not appear in user-facing fallback even on error.
const payload = {
kind: "config-apply" as const,
status: "error" as const,
@ -194,7 +197,8 @@ describe("formatRestartSentinelUserMessage", () => {
message: "disk full",
};
const result = formatRestartSentinelUserMessage(payload);
expect(result).toBe("Gateway restart failed: disk full");
expect(result).toBe("Gateway restart failed.");
expect(result).not.toContain("disk full");
});
it("returns generic failure message for error without note", () => {

View File

@ -128,16 +128,16 @@ export function formatRestartSentinelMessage(payload: RestartSentinelPayload): s
}
/**
* Human-friendly message for direct user delivery after a gateway restart.
* Omits raw diagnostic fields (status prefix, doctorHint) those belong in
* the agent's internal context, not in the user-facing chat message.
* Clean fallback message for system-event delivery when the agent cannot be woken.
* Never includes the raw `note`/`message` field that belongs in the agent's
* internal context only (see formatRestartSentinelInternalContext). The note is
* an operator annotation, not a user-facing string.
*/
export function formatRestartSentinelUserMessage(payload: RestartSentinelPayload): string {
const note = payload.message?.trim();
if (payload.status === "error") {
return note ? `Gateway restart failed: ${note}` : "Gateway restart failed.";
return "Gateway restart failed.";
}
return note ?? "Gateway restarted successfully.";
return "Gateway restarted successfully.";
}
/**