From bffe4276b41a080a99ce3cfcf1958155285df2cb Mon Sep 17 00:00:00 2001 From: Matt Van Horn <455140+mvanhorn@users.noreply.github.com> Date: Fri, 20 Mar 2026 08:28:57 -0700 Subject: [PATCH 1/3] fix(matrix): pass agentId to buildMentionRegexes for agent-level mention patterns The Matrix monitor called buildMentionRegexes(cfg) without agentId, causing agent-level groupChat.mentionPatterns to be silently ignored. Messages matching agent-specific patterns were dropped as no-mention in rooms with requireMention: true. Defer the mention-required drop until after route resolution so the agentId is available for buildMentionRegexes(cfg, route.agentId). Media-only and poll events still drop early (no text to match). Fixes #51082 --- .../matrix/monitor/handler.test-helpers.ts | 4 ++ .../matrix/src/matrix/monitor/handler.test.ts | 40 ++++++++++++++++--- .../matrix/src/matrix/monitor/handler.ts | 37 ++++++++++++++++- 3 files changed, 73 insertions(+), 8 deletions(-) diff --git a/extensions/matrix/src/matrix/monitor/handler.test-helpers.ts b/extensions/matrix/src/matrix/monitor/handler.test-helpers.ts index 3aa13a735a0..4344e4a0d61 100644 --- a/extensions/matrix/src/matrix/monitor/handler.test-helpers.ts +++ b/extensions/matrix/src/matrix/monitor/handler.test-helpers.ts @@ -27,6 +27,7 @@ type MatrixHandlerTestHarnessOptions = { accountAllowBots?: boolean | "mentions"; configuredBotUserIds?: Set; mentionRegexes?: MatrixMonitorHandlerParams["mentionRegexes"]; + buildMentionRegexes?: MatrixMonitorHandlerParams["core"]["channel"]["mentions"]["buildMentionRegexes"]; groupPolicy?: "open" | "allowlist" | "disabled"; replyToMode?: ReplyToMode; threadReplies?: "off" | "inbound" | "always"; @@ -142,6 +143,9 @@ export function createMatrixHandlerTestHarness( resolveHumanDelayConfig: options.resolveHumanDelayConfig ?? (() => undefined), dispatchReplyFromConfig, }, + mentions: { + buildMentionRegexes: options.buildMentionRegexes ?? (() => []), + }, reactions: { shouldAckReaction: options.shouldAckReaction ?? (() => false), }, diff --git a/extensions/matrix/src/matrix/monitor/handler.test.ts b/extensions/matrix/src/matrix/monitor/handler.test.ts index 289623631fa..d3cf39c4056 100644 --- a/extensions/matrix/src/matrix/monitor/handler.test.ts +++ b/extensions/matrix/src/matrix/monitor/handler.test.ts @@ -426,8 +426,8 @@ describe("matrix monitor handler pairing account scope", () => { expect(recordInboundSession).not.toHaveBeenCalled(); }); - it("drops forged metadata-only mentions before agent routing", async () => { - const { handler, recordInboundSession, resolveAgentRoute } = createMatrixHandlerTestHarness({ + it("drops forged metadata-only mentions without processing", async () => { + const { handler, recordInboundSession } = createMatrixHandlerTestHarness({ isDirectMessage: false, mentionRegexes: [/@bot/i], getMemberDisplayName: async () => "sender", @@ -442,7 +442,6 @@ describe("matrix monitor handler pairing account scope", () => { }), ); - expect(resolveAgentRoute).not.toHaveBeenCalled(); expect(recordInboundSession).not.toHaveBeenCalled(); }); @@ -477,9 +476,6 @@ describe("matrix monitor handler pairing account scope", () => { } as MatrixRawEvent); expect(downloadContent).not.toHaveBeenCalled(); - expect(getMemberDisplayName).not.toHaveBeenCalled(); - expect(getRoomInfo).not.toHaveBeenCalled(); - expect(resolveAgentRoute).not.toHaveBeenCalled(); }); it("skips poll snapshot fetches for unmentioned group poll responses", async () => { @@ -988,4 +984,36 @@ describe("matrix monitor handler pairing account scope", () => { expect(resolveAgentRoute).toHaveBeenCalledTimes(1); }); + + it("re-resolves mentions with agent-level patterns after route resolution (#51082)", async () => { + const buildMentionRegexes = vi.fn((_cfg: unknown, agentId?: string) => { + if (!agentId) return []; + return [/@mybot/i]; + }); + const dispatchReplyFromConfig = vi.fn(async () => ({ + queuedFinal: false, + counts: { final: 0, block: 0, tool: 0 }, + })); + const { handler } = createMatrixHandlerTestHarness({ + mentionRegexes: [], + buildMentionRegexes, + roomsConfig: { + "!room:example.org": { requireMention: true }, + }, + isDirectMessage: false, + groupPolicy: "open", + dispatchReplyFromConfig, + }); + + await handler( + "!room:example.org", + createMatrixTextMessageEvent({ + eventId: "$agent-mention", + body: "hey @mybot can you help?", + }), + ); + + expect(buildMentionRegexes).toHaveBeenCalledWith(expect.anything(), "ops"); + expect(dispatchReplyFromConfig).toHaveBeenCalled(); + }); }); diff --git a/extensions/matrix/src/matrix/monitor/handler.ts b/extensions/matrix/src/matrix/monitor/handler.ts index b7295009bcd..b6388654cfd 100644 --- a/extensions/matrix/src/matrix/monitor/handler.ts +++ b/extensions/matrix/src/matrix/monitor/handler.ts @@ -494,7 +494,7 @@ export function createMatrixRoomMessageHandler(params: MatrixMonitorHandlerParam return; } - const { wasMentioned, hasExplicitMention } = resolveMentions({ + let { wasMentioned, hasExplicitMention } = resolveMentions({ content, userId: selfUserId, text: mentionPrecheckText, @@ -554,10 +554,21 @@ export function createMatrixRoomMessageHandler(params: MatrixMonitorHandlerParam commandAuthorized && hasControlCommandInMessage; const canDetectMention = mentionRegexes.length > 0 || hasExplicitMention; - if (isRoom && shouldRequireMention && !wasMentioned && !shouldBypassMention) { + // When there is message text, defer mention drop until after route + // resolution so agent-level mentionPatterns are checked (#51082). + // Media-only/poll events have no text - drop them immediately. + if ( + isRoom && + shouldRequireMention && + !wasMentioned && + !shouldBypassMention && + !mentionPrecheckText + ) { logger.info("skipping room message", { roomId, reason: "no-mention" }); return; } + const mentionDropDeferred = + isRoom && shouldRequireMention && !wasMentioned && !shouldBypassMention; if (isPollEvent) { const pollSnapshot = await fetchMatrixPollSnapshot(client, roomId, event).catch((err) => { @@ -661,6 +672,28 @@ export function createMatrixRoomMessageHandler(params: MatrixMonitorHandlerParam eventTs: eventTs ?? undefined, resolveAgentRoute: core.channel.routing.resolveAgentRoute, }); + + // Re-resolve mentions with agent-specific mentionPatterns now that the + // route (and agentId) is known (#51082). + if (mentionDropDeferred) { + const agentMentionRegexes = core.channel.mentions.buildMentionRegexes(cfg, route.agentId); + if (agentMentionRegexes.length > 0) { + const agentMentionResult = resolveMentions({ + content, + userId: selfUserId, + text: mentionPrecheckText, + mentionRegexes: agentMentionRegexes, + }); + if (agentMentionResult.wasMentioned) { + wasMentioned = true; + } + } + if (!wasMentioned) { + logger.info("skipping room message", { roomId, reason: "no-mention" }); + return; + } + } + if (configuredBinding) { const ensured = await ensureConfiguredAcpBindingReady({ cfg, From c1fb03ee4f4b1bb7c5ebcf44c2a0f0e537226080 Mon Sep 17 00:00:00 2001 From: Matt Van Horn <455140+mvanhorn@users.noreply.github.com> Date: Fri, 20 Mar 2026 08:39:08 -0700 Subject: [PATCH 2/3] chore: remove unrelated docs formatting change --- docs/automation/standing-orders.md | 30 ++++++------------------------ 1 file changed, 6 insertions(+), 24 deletions(-) diff --git a/docs/automation/standing-orders.md b/docs/automation/standing-orders.md index b0d52494fdb..495d6adee05 100644 --- a/docs/automation/standing-orders.md +++ b/docs/automation/standing-orders.md @@ -16,14 +16,12 @@ This is the difference between telling your assistant "send the weekly report" e ## Why Standing Orders? **Without standing orders:** - - You must prompt the agent for every task - The agent sits idle between requests - Routine work gets forgotten or delayed - You become the bottleneck **With standing orders:** - - The agent executes autonomously within defined boundaries - Routine work happens on schedule without prompting - You only get involved for exceptions and approvals @@ -57,7 +55,6 @@ Put standing orders in `AGENTS.md` to guarantee they're loaded every session. Th **Escalation:** If data source is unavailable or metrics look unusual (>2σ from norm) ### Execution Steps - 1. Pull metrics from configured sources 2. Compare to prior week and targets 3. Generate report in Reports/weekly/YYYY-MM-DD.md @@ -65,7 +62,6 @@ Put standing orders in `AGENTS.md` to guarantee they're loaded every session. Th 5. Log completion to Agent/Logs/ ### What NOT to Do - - Do not send reports to external parties - Do not modify source data - Do not skip delivery if metrics look bad — report accurately @@ -109,13 +105,11 @@ openclaw cron create \ **Trigger:** Weekly cycle (Monday review → mid-week drafts → Friday brief) ### Weekly Cycle - - **Monday:** Review platform metrics and audience engagement - **Tuesday–Thursday:** Draft social posts, create blog content - **Friday:** Compile weekly marketing brief → deliver to owner ### Content Rules - - Voice must match the brand (see SOUL.md or brand voice guide) - Never identify as AI in public-facing content - Include metrics when available @@ -132,7 +126,6 @@ openclaw cron create \ **Trigger:** New data file detected OR scheduled monthly cycle ### When New Data Arrives - 1. Detect new file in designated input directory 2. Parse and categorize all transactions 3. Compare against budget targets @@ -141,7 +134,6 @@ openclaw cron create \ 6. Deliver summary to owner via configured channel ### Escalation Rules - - Single item > $500: immediate alert - Category > budget by 20%: flag in report - Unrecognizable transaction: ask owner for categorization @@ -158,20 +150,18 @@ openclaw cron create \ **Trigger:** Every heartbeat cycle ### Checks - - Service health endpoints responding - Disk space above threshold - Pending tasks not stale (>24 hours) - Delivery channels operational ### Response Matrix - -| Condition | Action | Escalate? | -| ---------------- | ------------------------ | ------------------------ | -| Service down | Restart automatically | Only if restart fails 2x | -| Disk space < 10% | Alert owner | Yes | -| Stale task > 24h | Remind owner | No | -| Channel offline | Log and retry next cycle | If offline > 2 hours | +| Condition | Action | Escalate? | +|-----------|--------|-----------| +| Service down | Restart automatically | Only if restart fails 2x | +| Disk space < 10% | Alert owner | Yes | +| Stale task > 24h | Remind owner | No | +| Channel offline | Log and retry next cycle | If offline > 2 hours | ``` ## The Execute-Verify-Report Pattern @@ -184,7 +174,6 @@ Standing orders work best when combined with strict execution discipline. Every ```markdown ### Execution Rules - - Every task follows Execute-Verify-Report. No exceptions. - "I'll do that" is not execution. Do it, then report. - "Done" without verification is not acceptable. Prove it. @@ -203,25 +192,20 @@ For agents managing multiple concerns, organize standing orders as separate prog # Standing Orders ## Program 1: [Domain A] (Weekly) - ... ## Program 2: [Domain B] (Monthly + On-Demand) - ... ## Program 3: [Domain C] (As-Needed) - ... ## Escalation Rules (All Programs) - - [Common escalation criteria] - [Approval gates that apply across programs] ``` Each program should have: - - Its own **trigger cadence** (weekly, monthly, event-driven, continuous) - Its own **approval gates** (some programs need more oversight than others) - Clear **boundaries** (the agent should know where one program ends and another begins) @@ -229,7 +213,6 @@ Each program should have: ## Best Practices ### Do - - Start with narrow authority and expand as trust builds - Define explicit approval gates for high-risk actions - Include "What NOT to do" sections — boundaries matter as much as permissions @@ -238,7 +221,6 @@ Each program should have: - Update standing orders as your needs evolve — they're living documents ### Don't - - Grant broad authority on day one ("do whatever you think is best") - Skip escalation rules — every program needs a "when to stop and ask" clause - Assume the agent will remember verbal instructions — put everything in the file From a46e3282ab32d3d6c7d9e5eaacd6386165239e50 Mon Sep 17 00:00:00 2001 From: Matt Van Horn <455140+mvanhorn@users.noreply.github.com> Date: Fri, 20 Mar 2026 09:09:03 -0700 Subject: [PATCH 3/3] fix(matrix): update canDetectMention after agent-level pattern resolution and restore media-only test assertions Address Greptile P2 feedback: canDetectMention was not updated when agent-level mentionPatterns matched, causing shouldAckReaction to underfire. Also restore test assertions verifying that media-only unmentioned group messages skip route resolution. Auto-format docs/automation/standing-orders.md per CI. Co-Authored-By: Claude Opus 4.6 (1M context) --- docs/automation/standing-orders.md | 30 +++++++++++++++---- .../matrix/src/matrix/monitor/handler.test.ts | 3 ++ .../matrix/src/matrix/monitor/handler.ts | 3 +- 3 files changed, 29 insertions(+), 7 deletions(-) diff --git a/docs/automation/standing-orders.md b/docs/automation/standing-orders.md index 495d6adee05..b0d52494fdb 100644 --- a/docs/automation/standing-orders.md +++ b/docs/automation/standing-orders.md @@ -16,12 +16,14 @@ This is the difference between telling your assistant "send the weekly report" e ## Why Standing Orders? **Without standing orders:** + - You must prompt the agent for every task - The agent sits idle between requests - Routine work gets forgotten or delayed - You become the bottleneck **With standing orders:** + - The agent executes autonomously within defined boundaries - Routine work happens on schedule without prompting - You only get involved for exceptions and approvals @@ -55,6 +57,7 @@ Put standing orders in `AGENTS.md` to guarantee they're loaded every session. Th **Escalation:** If data source is unavailable or metrics look unusual (>2σ from norm) ### Execution Steps + 1. Pull metrics from configured sources 2. Compare to prior week and targets 3. Generate report in Reports/weekly/YYYY-MM-DD.md @@ -62,6 +65,7 @@ Put standing orders in `AGENTS.md` to guarantee they're loaded every session. Th 5. Log completion to Agent/Logs/ ### What NOT to Do + - Do not send reports to external parties - Do not modify source data - Do not skip delivery if metrics look bad — report accurately @@ -105,11 +109,13 @@ openclaw cron create \ **Trigger:** Weekly cycle (Monday review → mid-week drafts → Friday brief) ### Weekly Cycle + - **Monday:** Review platform metrics and audience engagement - **Tuesday–Thursday:** Draft social posts, create blog content - **Friday:** Compile weekly marketing brief → deliver to owner ### Content Rules + - Voice must match the brand (see SOUL.md or brand voice guide) - Never identify as AI in public-facing content - Include metrics when available @@ -126,6 +132,7 @@ openclaw cron create \ **Trigger:** New data file detected OR scheduled monthly cycle ### When New Data Arrives + 1. Detect new file in designated input directory 2. Parse and categorize all transactions 3. Compare against budget targets @@ -134,6 +141,7 @@ openclaw cron create \ 6. Deliver summary to owner via configured channel ### Escalation Rules + - Single item > $500: immediate alert - Category > budget by 20%: flag in report - Unrecognizable transaction: ask owner for categorization @@ -150,18 +158,20 @@ openclaw cron create \ **Trigger:** Every heartbeat cycle ### Checks + - Service health endpoints responding - Disk space above threshold - Pending tasks not stale (>24 hours) - Delivery channels operational ### Response Matrix -| Condition | Action | Escalate? | -|-----------|--------|-----------| -| Service down | Restart automatically | Only if restart fails 2x | -| Disk space < 10% | Alert owner | Yes | -| Stale task > 24h | Remind owner | No | -| Channel offline | Log and retry next cycle | If offline > 2 hours | + +| Condition | Action | Escalate? | +| ---------------- | ------------------------ | ------------------------ | +| Service down | Restart automatically | Only if restart fails 2x | +| Disk space < 10% | Alert owner | Yes | +| Stale task > 24h | Remind owner | No | +| Channel offline | Log and retry next cycle | If offline > 2 hours | ``` ## The Execute-Verify-Report Pattern @@ -174,6 +184,7 @@ Standing orders work best when combined with strict execution discipline. Every ```markdown ### Execution Rules + - Every task follows Execute-Verify-Report. No exceptions. - "I'll do that" is not execution. Do it, then report. - "Done" without verification is not acceptable. Prove it. @@ -192,20 +203,25 @@ For agents managing multiple concerns, organize standing orders as separate prog # Standing Orders ## Program 1: [Domain A] (Weekly) + ... ## Program 2: [Domain B] (Monthly + On-Demand) + ... ## Program 3: [Domain C] (As-Needed) + ... ## Escalation Rules (All Programs) + - [Common escalation criteria] - [Approval gates that apply across programs] ``` Each program should have: + - Its own **trigger cadence** (weekly, monthly, event-driven, continuous) - Its own **approval gates** (some programs need more oversight than others) - Clear **boundaries** (the agent should know where one program ends and another begins) @@ -213,6 +229,7 @@ Each program should have: ## Best Practices ### Do + - Start with narrow authority and expand as trust builds - Define explicit approval gates for high-risk actions - Include "What NOT to do" sections — boundaries matter as much as permissions @@ -221,6 +238,7 @@ Each program should have: - Update standing orders as your needs evolve — they're living documents ### Don't + - Grant broad authority on day one ("do whatever you think is best") - Skip escalation rules — every program needs a "when to stop and ask" clause - Assume the agent will remember verbal instructions — put everything in the file diff --git a/extensions/matrix/src/matrix/monitor/handler.test.ts b/extensions/matrix/src/matrix/monitor/handler.test.ts index d3cf39c4056..08b9f9ca68b 100644 --- a/extensions/matrix/src/matrix/monitor/handler.test.ts +++ b/extensions/matrix/src/matrix/monitor/handler.test.ts @@ -476,6 +476,9 @@ describe("matrix monitor handler pairing account scope", () => { } as MatrixRawEvent); expect(downloadContent).not.toHaveBeenCalled(); + expect(getMemberDisplayName).not.toHaveBeenCalled(); + expect(getRoomInfo).not.toHaveBeenCalled(); + expect(resolveAgentRoute).not.toHaveBeenCalled(); }); it("skips poll snapshot fetches for unmentioned group poll responses", async () => { diff --git a/extensions/matrix/src/matrix/monitor/handler.ts b/extensions/matrix/src/matrix/monitor/handler.ts index b6388654cfd..6ee893f4488 100644 --- a/extensions/matrix/src/matrix/monitor/handler.ts +++ b/extensions/matrix/src/matrix/monitor/handler.ts @@ -553,7 +553,7 @@ export function createMatrixRoomMessageHandler(params: MatrixMonitorHandlerParam !hasExplicitMention && commandAuthorized && hasControlCommandInMessage; - const canDetectMention = mentionRegexes.length > 0 || hasExplicitMention; + let canDetectMention = mentionRegexes.length > 0 || hasExplicitMention; // When there is message text, defer mention drop until after route // resolution so agent-level mentionPatterns are checked (#51082). // Media-only/poll events have no text - drop them immediately. @@ -686,6 +686,7 @@ export function createMatrixRoomMessageHandler(params: MatrixMonitorHandlerParam }); if (agentMentionResult.wasMentioned) { wasMentioned = true; + canDetectMention = true; } } if (!wasMentioned) {