From f422d3363d71c6f59294e74a05a56ae38c587df5 Mon Sep 17 00:00:00 2001 From: chenpitang Date: Sat, 7 Mar 2026 19:21:26 +0800 Subject: [PATCH 01/15] feat: notify user when context compaction starts and completes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During auto-compaction the agent goes silent for several seconds while the context is summarised. Users on every channel (Discord, Feishu, Telegram, webchat โ€ฆ) had no indication that something was happening โ€” leading to confusion and duplicate messages. Changes: - agent-runner-execution.ts: listen for compaction phase='start' event and immediately deliver a "๐Ÿงน Compacting context..." notice via the existing onBlockReply callback. This fires for every channel because onBlockReply is the universal in-run delivery path. - agent-runner.ts: make the completion notice unconditional (was previously guarded behind verboseEnabled). Non-verbose users now see "โœ… Context compacted (count N)."; verbose users continue to see the legacy "๐Ÿงน Auto-compaction complete (count N)." wording. Why onBlockReply for start? onBlockReply is already wired to every channel adapter and fires during the live run, so the notice arrives in-band with zero new plumbing. Using verboseNotices (appended after the run) would be too late and would miss the start signal entirely. Fixes: users seeing silent pauses of 5-15 s with no feedback during compaction on any channel. --- src/auto-reply/reply/agent-runner-execution.ts | 10 ++++++++-- src/auto-reply/reply/agent-runner.ts | 10 +++++++++- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/src/auto-reply/reply/agent-runner-execution.ts b/src/auto-reply/reply/agent-runner-execution.ts index c25342e4a28..e88c227d0a1 100644 --- a/src/auto-reply/reply/agent-runner-execution.ts +++ b/src/auto-reply/reply/agent-runner-execution.ts @@ -394,11 +394,17 @@ export async function runAgentTurnWithFallback(params: { await params.opts?.onToolStart?.({ name, phase }); } } - // Track auto-compaction completion and notify UI layer. + // Track auto-compaction and notify higher layers. if (evt.stream === "compaction") { const phase = typeof evt.data.phase === "string" ? evt.data.phase : ""; if (phase === "start") { - await params.opts?.onCompactionStart?.(); + if (params.opts?.onCompactionStart) { + await params.opts.onCompactionStart(); + } else { + // Use the universal in-run block reply path so every + // channel sees a notice while compaction is pausing the run. + await params.opts?.onBlockReply?.({ text: "๐Ÿงน Compacting context..." }); + } } const completed = evt.data?.completed === true; if (phase === "end" && completed) { diff --git a/src/auto-reply/reply/agent-runner.ts b/src/auto-reply/reply/agent-runner.ts index fbdad1be160..fba204eccad 100644 --- a/src/auto-reply/reply/agent-runner.ts +++ b/src/auto-reply/reply/agent-runner.ts @@ -697,8 +697,16 @@ export async function runReplyAgent(params: { }); } + // Always notify the user when compaction completes โ€” not just in verbose + // mode. The "๐Ÿงน Compacting context..." notice was already sent at start, + // so the completion message closes the loop for every user regardless of + // their verbose setting. + const suffix = typeof count === "number" ? ` (count ${count})` : ""; + verboseNotices.push({ text: `โœ… Context compacted${suffix}.` }); if (verboseEnabled) { - const suffix = typeof count === "number" ? ` (count ${count})` : ""; + // Verbose mode already gets the completion โ€” keep the legacy notice + // text only for verbose so power users see the traditional wording. + verboseNotices.pop(); verboseNotices.push({ text: `๐Ÿงน Auto-compaction complete${suffix}.` }); } } From 40b175a69571e6dc5002735880c96a46d3a6f28f Mon Sep 17 00:00:00 2001 From: zidongdesign Date: Sat, 7 Mar 2026 19:34:14 +0800 Subject: [PATCH 02/15] fix: send compaction completion notice via onBlockReply in streaming mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In block-streaming mode, the reply pipeline bypasses buildReplyPayloads, so notices only pushed to verboseNotices were never delivered to the user. The start notice ("๐Ÿงน Compacting context...") was already sent via opts.onBlockReply directly in agent-runner-execution.ts; mirror the same path for the completion notice. - If opts.onBlockReply is present (streaming mode): await onBlockReply with the completion text directly, so it reaches the user immediately. - Otherwise (non-streaming): push to verboseNotices as before so it gets prepended to the final payload batch. Also consolidate the verbose vs. non-verbose text selection into a single completionText variable, removing the redundant pop/push pattern. --- src/auto-reply/reply/agent-runner.ts | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/auto-reply/reply/agent-runner.ts b/src/auto-reply/reply/agent-runner.ts index fba204eccad..e8874366201 100644 --- a/src/auto-reply/reply/agent-runner.ts +++ b/src/auto-reply/reply/agent-runner.ts @@ -702,12 +702,18 @@ export async function runReplyAgent(params: { // so the completion message closes the loop for every user regardless of // their verbose setting. const suffix = typeof count === "number" ? ` (count ${count})` : ""; - verboseNotices.push({ text: `โœ… Context compacted${suffix}.` }); - if (verboseEnabled) { - // Verbose mode already gets the completion โ€” keep the legacy notice - // text only for verbose so power users see the traditional wording. - verboseNotices.pop(); - verboseNotices.push({ text: `๐Ÿงน Auto-compaction complete${suffix}.` }); + const completionText = verboseEnabled + ? `๐Ÿงน Auto-compaction complete${suffix}.` + : `โœ… Context compacted${suffix}.`; + + // In block-streaming mode, onBlockReply bypasses buildReplyPayloads, so + // we must deliver the completion notice the same way the start notice was + // sent (via onBlockReply directly). Otherwise the user sees the "๐Ÿงน + // Compacting context..." start notice but never receives the completion. + if (opts?.onBlockReply) { + await opts.onBlockReply({ text: completionText }); + } else { + verboseNotices.push({ text: completionText }); } } if (verboseNotices.length > 0) { From 8e216cbb4e20c76f5d6ff7acb6792326cf5c99bd Mon Sep 17 00:00:00 2001 From: zidongdesign Date: Sun, 8 Mar 2026 10:01:40 +0800 Subject: [PATCH 03/15] fix: thread compaction notices through reply-to mode handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compaction start and completion notices were sent via raw opts.onBlockReply, bypassing createBlockReplyDeliveryHandler and the applyReplyToMode pipeline. In channels configured with replyToMode=all|first, this caused compaction notices to be delivered as unthreaded top-level messages while all other replies stayed threaded โ€” inconsistent and noisy. Fix agent-runner-execution.ts: extract createBlockReplyDeliveryHandler result into blockReplyHandler and share it between onBlockReply and the compaction start notice in onAgentEvent. Both now use the same handler. Fix agent-runner.ts: inject currentMessageId + replyToCurrent into the completion notice payload before passing through applyReplyToMode, so threading directives are honoured consistently with normal replies. Closes the P2 review comment on PR #38805 (agent-runner.ts:701). --- .../reply/agent-runner-execution.ts | 39 +++++++++++-------- src/auto-reply/reply/agent-runner.ts | 10 ++++- 2 files changed, 31 insertions(+), 18 deletions(-) diff --git a/src/auto-reply/reply/agent-runner-execution.ts b/src/auto-reply/reply/agent-runner-execution.ts index e88c227d0a1..4141c8c9bc8 100644 --- a/src/auto-reply/reply/agent-runner-execution.ts +++ b/src/auto-reply/reply/agent-runner-execution.ts @@ -199,6 +199,24 @@ export async function runAgentTurnWithFallback(params: { return text; }; const blockReplyPipeline = params.blockReplyPipeline; + // Build the delivery handler once so both onAgentEvent (compaction start + // notice) and the onBlockReply field share the same instance. This + // ensures replyToId threading (replyToMode=all|first) is applied to + // compaction notices just like every other block reply. + const blockReplyHandler = params.opts?.onBlockReply + ? createBlockReplyDeliveryHandler({ + onBlockReply: params.opts.onBlockReply, + currentMessageId: + params.sessionCtx.MessageSidFull ?? params.sessionCtx.MessageSid, + normalizeStreamingText, + applyReplyToMode: params.applyReplyToMode, + normalizeMediaPaths: normalizeReplyMediaPaths, + typingSignals: params.typingSignals, + blockStreamingEnabled: params.blockStreamingEnabled, + blockReplyPipeline, + directlySentBlockKeys, + }) + : undefined; const onToolResult = params.opts?.onToolResult; const fallbackResult = await runWithModelFallback({ ...resolveModelFallbackOptions(params.followupRun.run), @@ -401,9 +419,9 @@ export async function runAgentTurnWithFallback(params: { if (params.opts?.onCompactionStart) { await params.opts.onCompactionStart(); } else { - // Use the universal in-run block reply path so every - // channel sees a notice while compaction is pausing the run. - await params.opts?.onBlockReply?.({ text: "๐Ÿงน Compacting context..." }); + // Route through the shared block reply handler so + // reply-to threading matches other in-run notices. + await blockReplyHandler?.({ text: "๐Ÿงน Compacting context..." }); } } const completed = evt.data?.completed === true; @@ -416,20 +434,7 @@ export async function runAgentTurnWithFallback(params: { // Always pass onBlockReply so flushBlockReplyBuffer works before tool execution, // even when regular block streaming is disabled. The handler sends directly // via opts.onBlockReply when the pipeline isn't available. - onBlockReply: params.opts?.onBlockReply - ? createBlockReplyDeliveryHandler({ - onBlockReply: params.opts.onBlockReply, - currentMessageId: - params.sessionCtx.MessageSidFull ?? params.sessionCtx.MessageSid, - normalizeStreamingText, - applyReplyToMode: params.applyReplyToMode, - normalizeMediaPaths: normalizeReplyMediaPaths, - typingSignals: params.typingSignals, - blockStreamingEnabled: params.blockStreamingEnabled, - blockReplyPipeline, - directlySentBlockKeys, - }) - : undefined, + onBlockReply: blockReplyHandler, onBlockReplyFlush: params.blockStreamingEnabled && blockReplyPipeline ? async () => { diff --git a/src/auto-reply/reply/agent-runner.ts b/src/auto-reply/reply/agent-runner.ts index e8874366201..8b65b6d00d9 100644 --- a/src/auto-reply/reply/agent-runner.ts +++ b/src/auto-reply/reply/agent-runner.ts @@ -710,8 +710,16 @@ export async function runReplyAgent(params: { // we must deliver the completion notice the same way the start notice was // sent (via onBlockReply directly). Otherwise the user sees the "๐Ÿงน // Compacting context..." start notice but never receives the completion. + // Apply replyToMode so the notice is threaded consistently with normal + // replies when replyToMode=all|first is configured. if (opts?.onBlockReply) { - await opts.onBlockReply({ text: completionText }); + const currentMessageId = sessionCtx.MessageSidFull ?? sessionCtx.MessageSid; + const noticePayload = applyReplyToMode({ + text: completionText, + replyToId: currentMessageId, + replyToCurrent: true, + }); + await opts.onBlockReply(noticePayload); } else { verboseNotices.push({ text: completionText }); } From b9beb6869e1003b61dcd99737d8822f918c65819 Mon Sep 17 00:00:00 2001 From: zidongdesign Date: Sun, 8 Mar 2026 10:42:45 +0800 Subject: [PATCH 04/15] fix: route compaction completion notice through block reply pipeline Previously the completion notice bypassed the block-reply pipeline by calling opts.onBlockReply directly after the pipeline had already been flushed and stopped. This meant timeout/abort handling and serial delivery guarantees did not apply to the notice, risking stalls or out-of-order delivery in streaming/routed runs. Fix: enqueue the completion notice into blockReplyPipeline *before* flush so it is delivered through the same path as every other block reply. The non-streaming fallback (verboseNotices) is preserved for runs where no pipeline exists. Also removes the now-unnecessary direct opts.onBlockReply call and cleans up the redundant suffix in the pre-flush path (count suffix is still included in the verboseNotices fallback path where count is available). Addresses P1 review comment on PR #38805. --- src/auto-reply/reply/agent-runner.ts | 42 ++++++++++++++++++---------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/src/auto-reply/reply/agent-runner.ts b/src/auto-reply/reply/agent-runner.ts index 8b65b6d00d9..e6acb1d9af5 100644 --- a/src/auto-reply/reply/agent-runner.ts +++ b/src/auto-reply/reply/agent-runner.ts @@ -414,6 +414,27 @@ export async function runReplyAgent(params: { const payloadArray = runResult.payloads ?? []; + // If compaction completed, enqueue the completion notice into the pipeline + // *before* flushing so it benefits from the same timeout/abort/serial- + // delivery guarantees as every other block reply. The count update and + // post-compaction context injection still happen later (after flush) because + // they don't affect the user-visible notice text at this point โ€” we use a + // placeholder suffix here and the full count is logged separately. + if (autoCompactionCompleted && blockReplyPipeline) { + const verboseEnabled = resolvedVerboseLevel !== "off"; + const completionText = verboseEnabled + ? `๐Ÿงน Auto-compaction complete.` + : `โœ… Context compacted.`; + const currentMessageId = sessionCtx.MessageSidFull ?? sessionCtx.MessageSid; + blockReplyPipeline.enqueue( + applyReplyToMode({ + text: completionText, + replyToId: currentMessageId, + replyToCurrent: true, + }), + ); + } + if (blockReplyPipeline) { await blockReplyPipeline.flush({ force: true }); blockReplyPipeline.stop(); @@ -706,21 +727,12 @@ export async function runReplyAgent(params: { ? `๐Ÿงน Auto-compaction complete${suffix}.` : `โœ… Context compacted${suffix}.`; - // In block-streaming mode, onBlockReply bypasses buildReplyPayloads, so - // we must deliver the completion notice the same way the start notice was - // sent (via onBlockReply directly). Otherwise the user sees the "๐Ÿงน - // Compacting context..." start notice but never receives the completion. - // Apply replyToMode so the notice is threaded consistently with normal - // replies when replyToMode=all|first is configured. - if (opts?.onBlockReply) { - const currentMessageId = sessionCtx.MessageSidFull ?? sessionCtx.MessageSid; - const noticePayload = applyReplyToMode({ - text: completionText, - replyToId: currentMessageId, - replyToCurrent: true, - }); - await opts.onBlockReply(noticePayload); - } else { + // In block-streaming mode the completion notice was already enqueued into + // blockReplyPipeline before flush (see above), so it travels through the + // normal timeout/abort/serial-delivery path without bypassing the pipeline. + // Here we only handle the non-streaming fallback: push into verboseNotices + // so it appears as a final payload alongside other verbose output. + if (!blockReplyPipeline) { verboseNotices.push({ text: completionText }); } } From 197ef0a1f510dd50618084dfb1ff78e6f2090d5e Mon Sep 17 00:00:00 2001 From: zidongdesign Date: Sun, 8 Mar 2026 11:39:06 +0800 Subject: [PATCH 05/15] fix: send compaction completion notice after pipeline stop to preserve didStream Enqueueing the completion notice into blockReplyPipeline before flush caused didStream() to return true even when no assistant content was streamed. buildReplyPayloads drops all finalPayloads when didStream() is true, so the real assistant reply could be silently discarded on non-streaming model paths (e.g. pi-embedded-subscribe) that fill assistantTexts without emitting block replies. Fix: move the completion notice send to *after* pipeline flush+stop, using a fire-and-forget Promise.race with blockReplyTimeoutMs. This keeps the timeout guarantee (satisfying the previous P1) while not touching didStream() at all. Non-streaming fallback (verboseNotices) is unchanged. Addresses P1 review comment on PR #38805. --- src/auto-reply/reply/agent-runner.ts | 55 ++++++++++++++++------------ 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/src/auto-reply/reply/agent-runner.ts b/src/auto-reply/reply/agent-runner.ts index e6acb1d9af5..7bec12d1c8a 100644 --- a/src/auto-reply/reply/agent-runner.ts +++ b/src/auto-reply/reply/agent-runner.ts @@ -414,30 +414,36 @@ export async function runReplyAgent(params: { const payloadArray = runResult.payloads ?? []; - // If compaction completed, enqueue the completion notice into the pipeline - // *before* flushing so it benefits from the same timeout/abort/serial- - // delivery guarantees as every other block reply. The count update and - // post-compaction context injection still happen later (after flush) because - // they don't affect the user-visible notice text at this point โ€” we use a - // placeholder suffix here and the full count is logged separately. - if (autoCompactionCompleted && blockReplyPipeline) { + if (blockReplyPipeline) { + await blockReplyPipeline.flush({ force: true }); + blockReplyPipeline.stop(); + } + + // Send the compaction completion notice *after* the pipeline has flushed and + // stopped, so the enqueue does not set didStream() = true and cause + // buildReplyPayloads to discard the real assistant reply. We still apply a + // timeout so the notice cannot stall the run indefinitely. + if (autoCompactionCompleted && opts?.onBlockReply) { const verboseEnabled = resolvedVerboseLevel !== "off"; const completionText = verboseEnabled ? `๐Ÿงน Auto-compaction complete.` : `โœ… Context compacted.`; const currentMessageId = sessionCtx.MessageSidFull ?? sessionCtx.MessageSid; - blockReplyPipeline.enqueue( - applyReplyToMode({ - text: completionText, - replyToId: currentMessageId, - replyToCurrent: true, - }), - ); - } - - if (blockReplyPipeline) { - await blockReplyPipeline.flush({ force: true }); - blockReplyPipeline.stop(); + const noticePayload = applyReplyToMode({ + text: completionText, + replyToId: currentMessageId, + replyToCurrent: true, + }); + // Fire-and-forget with timeout โ€” best-effort delivery; failure must not + // propagate to the caller. + void Promise.race([ + opts.onBlockReply(noticePayload), + new Promise((_, reject) => + setTimeout(() => reject(new Error("compaction notice timeout")), blockReplyTimeoutMs), + ), + ]).catch(() => { + // Intentionally swallowed โ€” the notice is informational only. + }); } if (pendingToolTasks.size > 0) { await Promise.allSettled(pendingToolTasks); @@ -727,11 +733,12 @@ export async function runReplyAgent(params: { ? `๐Ÿงน Auto-compaction complete${suffix}.` : `โœ… Context compacted${suffix}.`; - // In block-streaming mode the completion notice was already enqueued into - // blockReplyPipeline before flush (see above), so it travels through the - // normal timeout/abort/serial-delivery path without bypassing the pipeline. - // Here we only handle the non-streaming fallback: push into verboseNotices - // so it appears as a final payload alongside other verbose output. + // In block-streaming mode the completion notice is sent above (after the + // pipeline has flushed) via a fire-and-forget call to opts.onBlockReply, + // so that it does not set didStream()=true and cause buildReplyPayloads to + // discard the real assistant reply. + // In non-streaming mode, push into verboseNotices so it is included in + // the final payload batch. if (!blockReplyPipeline) { verboseNotices.push({ text: completionText }); } From bcc2d2188e798caddafdbbe2ed513abe1f532be5 Mon Sep 17 00:00:00 2001 From: zidongdesign Date: Sun, 8 Mar 2026 12:39:14 +0800 Subject: [PATCH 06/15] fix: address P2 review comments on compaction notices P2-1 (agent-runner.ts): Restrict direct completion notice to block-streaming runs. The condition now checks blockStreamingEnabled in addition to opts?.onBlockReply, preventing duplicate completion notices in non-streaming sessions where verboseNotices already handles the compaction-complete text. P2-2 (agent-runner-execution.ts): Emit compaction start notice when streaming is off. blockReplyHandler is a no-op for non-streaming runs, so add a direct fallback path: when blockStreamingEnabled is false and opts.onBlockReply is present, send the start notice directly with applyReplyToMode threading applied. --- src/auto-reply/reply/agent-runner-execution.ts | 16 +++++++++++++++- src/auto-reply/reply/agent-runner.ts | 2 +- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/auto-reply/reply/agent-runner-execution.ts b/src/auto-reply/reply/agent-runner-execution.ts index 4141c8c9bc8..687e4ecf05a 100644 --- a/src/auto-reply/reply/agent-runner-execution.ts +++ b/src/auto-reply/reply/agent-runner-execution.ts @@ -418,10 +418,24 @@ export async function runAgentTurnWithFallback(params: { if (phase === "start") { if (params.opts?.onCompactionStart) { await params.opts.onCompactionStart(); - } else { + } else if (params.blockStreamingEnabled) { // Route through the shared block reply handler so // reply-to threading matches other in-run notices. await blockReplyHandler?.({ text: "๐Ÿงน Compacting context..." }); + } else if (params.opts?.onBlockReply) { + // blockReplyHandler is a no-op when streaming is disabled. + // Fall back to direct delivery so non-streaming runs also + // receive the compaction start notice. + const currentMessageId = + params.sessionCtx.MessageSidFull ?? params.sessionCtx.MessageSid; + const noticePayload = params.applyReplyToMode({ + text: "๐Ÿงน Compacting context...", + replyToId: currentMessageId, + replyToCurrent: true, + }); + await params.opts.onBlockReply(noticePayload); + } else { + await params.opts?.onBlockReply?.({ text: "๐Ÿงน Compacting context..." }); } } const completed = evt.data?.completed === true; diff --git a/src/auto-reply/reply/agent-runner.ts b/src/auto-reply/reply/agent-runner.ts index 7bec12d1c8a..73473949db1 100644 --- a/src/auto-reply/reply/agent-runner.ts +++ b/src/auto-reply/reply/agent-runner.ts @@ -423,7 +423,7 @@ export async function runReplyAgent(params: { // stopped, so the enqueue does not set didStream() = true and cause // buildReplyPayloads to discard the real assistant reply. We still apply a // timeout so the notice cannot stall the run indefinitely. - if (autoCompactionCompleted && opts?.onBlockReply) { + if (autoCompactionCompleted && blockStreamingEnabled && opts?.onBlockReply) { const verboseEnabled = resolvedVerboseLevel !== "off"; const completionText = verboseEnabled ? `๐Ÿงน Auto-compaction complete.` From 88c9ad302602f8f5f6e5d755d928ebc0b63d7c67 Mon Sep 17 00:00:00 2001 From: zidongdesign Date: Sun, 8 Mar 2026 13:06:50 +0800 Subject: [PATCH 07/15] fix: bypass pipeline for compaction start notice to preserve final reply Previously the start notice was routed through blockReplyHandler which enqueues into blockReplyPipeline, setting didStream() = true. This caused buildReplyPayloads to drop all final payloads (shouldDropFinalPayloads path), discarding the real assistant reply on non-streaming model paths where assistantTexts is populated from the final message (not block chunks). Fix: send the start notice directly via opts.onBlockReply, bypassing the pipeline entirely. applyReplyToMode is still applied so replyToId threading (replyToMode=all|first) is honoured. This mirrors how the completion notice in agent-runner.ts avoids the pipeline after flush()/stop(). --- src/auto-reply/reply/agent-runner-execution.ts | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/auto-reply/reply/agent-runner-execution.ts b/src/auto-reply/reply/agent-runner-execution.ts index 687e4ecf05a..c7191b92edd 100644 --- a/src/auto-reply/reply/agent-runner-execution.ts +++ b/src/auto-reply/reply/agent-runner-execution.ts @@ -418,14 +418,10 @@ export async function runAgentTurnWithFallback(params: { if (phase === "start") { if (params.opts?.onCompactionStart) { await params.opts.onCompactionStart(); - } else if (params.blockStreamingEnabled) { - // Route through the shared block reply handler so - // reply-to threading matches other in-run notices. - await blockReplyHandler?.({ text: "๐Ÿงน Compacting context..." }); } else if (params.opts?.onBlockReply) { - // blockReplyHandler is a no-op when streaming is disabled. - // Fall back to direct delivery so non-streaming runs also - // receive the compaction start notice. + // Send directly via opts.onBlockReply (bypassing the + // pipeline) so the notice does not cause final payloads + // to be discarded on non-streaming model paths. const currentMessageId = params.sessionCtx.MessageSidFull ?? params.sessionCtx.MessageSid; const noticePayload = params.applyReplyToMode({ @@ -434,8 +430,6 @@ export async function runAgentTurnWithFallback(params: { replyToCurrent: true, }); await params.opts.onBlockReply(noticePayload); - } else { - await params.opts?.onBlockReply?.({ text: "๐Ÿงน Compacting context..." }); } } const completed = evt.data?.completed === true; From 643eb31ea4c4b47c15af9bfe02ed29787d0addfa Mon Sep 17 00:00:00 2001 From: zidongdesign Date: Sun, 8 Mar 2026 13:36:49 +0800 Subject: [PATCH 08/15] fix: exclude compaction notices from TTS transcript accumulation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add isCompactionNotice flag to ReplyPayload and set it on both the compaction start notice (agent-runner-execution.ts) and the completion notice (agent-runner.ts). dispatch-from-config.ts skips accumulation into accumulatedBlockText when the flag is set, so compaction status lines (๐Ÿงน / โœ…) are never synthesised into the fallback TTS audio for block-streaming runs with tts.mode=final. --- src/auto-reply/reply/agent-runner-execution.ts | 1 + src/auto-reply/reply/agent-runner.ts | 1 + src/auto-reply/reply/dispatch-from-config.ts | 6 ++++-- src/auto-reply/types.ts | 4 ++++ 4 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/auto-reply/reply/agent-runner-execution.ts b/src/auto-reply/reply/agent-runner-execution.ts index c7191b92edd..9998cca29d3 100644 --- a/src/auto-reply/reply/agent-runner-execution.ts +++ b/src/auto-reply/reply/agent-runner-execution.ts @@ -428,6 +428,7 @@ export async function runAgentTurnWithFallback(params: { text: "๐Ÿงน Compacting context...", replyToId: currentMessageId, replyToCurrent: true, + isCompactionNotice: true, }); await params.opts.onBlockReply(noticePayload); } diff --git a/src/auto-reply/reply/agent-runner.ts b/src/auto-reply/reply/agent-runner.ts index 73473949db1..2e93650446b 100644 --- a/src/auto-reply/reply/agent-runner.ts +++ b/src/auto-reply/reply/agent-runner.ts @@ -433,6 +433,7 @@ export async function runReplyAgent(params: { text: completionText, replyToId: currentMessageId, replyToCurrent: true, + isCompactionNotice: true, }); // Fire-and-forget with timeout โ€” best-effort delivery; failure must not // propagate to the caller. diff --git a/src/auto-reply/reply/dispatch-from-config.ts b/src/auto-reply/reply/dispatch-from-config.ts index 9df6ef2bc63..9f603b30863 100644 --- a/src/auto-reply/reply/dispatch-from-config.ts +++ b/src/auto-reply/reply/dispatch-from-config.ts @@ -582,8 +582,10 @@ export async function dispatchReplyFromConfig(params: { if (shouldSuppressReasoningPayload(payload)) { return; } - // Accumulate block text for TTS generation after streaming - if (payload.text) { + // Accumulate block text for TTS generation after streaming. + // Exclude compaction status notices โ€” they are informational UI + // signals and must not be synthesised into the spoken reply. + if (payload.text && !payload.isCompactionNotice) { if (accumulatedBlockText.length > 0) { accumulatedBlockText += "\n"; } diff --git a/src/auto-reply/types.ts b/src/auto-reply/types.ts index c424f43ab92..638dda42d8f 100644 --- a/src/auto-reply/types.ts +++ b/src/auto-reply/types.ts @@ -91,6 +91,10 @@ export type ReplyPayload = { /** Marks this payload as a reasoning/thinking block. Channels that do not * have a dedicated reasoning lane (e.g. WhatsApp, web) should suppress it. */ isReasoning?: boolean; + /** Marks this payload as a compaction status notice (start/end). + * Should be excluded from TTS transcript accumulation so compaction + * status lines are not synthesised into the spoken assistant reply. */ + isCompactionNotice?: boolean; /** Channel-specific payload data (per-channel envelope). */ channelData?: Record; }; From e7fd0a7b21e7c18a94f7dba3edff86621b969f09 Mon Sep 17 00:00:00 2001 From: zidongdesign Date: Sun, 8 Mar 2026 14:06:01 +0800 Subject: [PATCH 09/15] fix: wrap compaction start notice onBlockReply in try/catch to prevent unhandled rejection onAgentEvent is fired fire-and-forget (void ctx.params.onAgentEvent?.(...) in pi-embedded-subscribe.handlers.compaction.ts), so any rejection from the awaited onBlockReply call would escape unobserved. Wrap the delivery in a try/catch that swallows the error and logs a warning via params.logger, consistent with other non-critical notice delivery paths. --- src/auto-reply/reply/agent-runner-execution.ts | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/auto-reply/reply/agent-runner-execution.ts b/src/auto-reply/reply/agent-runner-execution.ts index 9998cca29d3..1b7e486a446 100644 --- a/src/auto-reply/reply/agent-runner-execution.ts +++ b/src/auto-reply/reply/agent-runner-execution.ts @@ -430,7 +430,15 @@ export async function runAgentTurnWithFallback(params: { replyToCurrent: true, isCompactionNotice: true, }); - await params.opts.onBlockReply(noticePayload); + try { + await params.opts.onBlockReply(noticePayload); + } catch (err) { + // Non-critical notice delivery failure should not + // bubble out of the fire-and-forget event handler. + logVerbose( + `compaction start notice delivery failed (non-fatal): ${String(err)}`, + ); + } } } const completed = evt.data?.completed === true; From 1e381c6c8c15d471a44b88eb7c4c40c678919b8d Mon Sep 17 00:00:00 2001 From: zidongdesign Date: Sun, 8 Mar 2026 14:36:27 +0800 Subject: [PATCH 10/15] fix: don't consume replyToMode=first slot for compaction notices Compaction start/end notices are transient status messages that should be threaded (appear in-context) but must not advance the hasThreaded flag inside createReplyToModeFilter when mode=first. Before this fix, the compaction start notice was the "first" threaded message, so all real assistant reply chunks that followed had replyToId stripped and were sent as unthreaded top-level messages. Fix: skip advancing hasThreaded when payload.isCompactionNotice is true. The notice still receives replyToId (so it appears in the thread), but the filter's stateful "first" slot is preserved for the actual assistant reply that follows. --- src/auto-reply/reply/reply-threading.ts | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/auto-reply/reply/reply-threading.ts b/src/auto-reply/reply/reply-threading.ts index 66871f226b7..e5ee97a9d7f 100644 --- a/src/auto-reply/reply/reply-threading.ts +++ b/src/auto-reply/reply/reply-threading.ts @@ -44,7 +44,13 @@ export function createReplyToModeFilter( if (hasThreaded) { return { ...payload, replyToId: undefined }; } - hasThreaded = true; + // Compaction notices are transient status messages โ€” they should be + // threaded (so they appear in-context), but they must not consume the + // "first" slot of the replyToMode=first filter. Skip advancing + // hasThreaded so the real assistant reply still gets replyToId. + if (!payload.isCompactionNotice) { + hasThreaded = true; + } return payload; }; } From e2dc9b1682a3a140a3ee15621e58d9ed223c49b9 Mon Sep 17 00:00:00 2001 From: zidongdesign Date: Sun, 8 Mar 2026 15:36:02 +0800 Subject: [PATCH 11/15] fix(threading): keep compaction notices threaded after first assistant block MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In replyToMode=first, the hasThreaded flag was set by the first assistant chunk, causing the completion notice (emitted after flush) to hit the `if (hasThreaded)` branch and have its replyToId stripped โ€” making it an unthreaded top-level message. Fix: add an isCompactionNotice exemption inside the `hasThreaded` branch so that compaction notices (both start and completion) always retain their replyToId regardless of hasThreaded state, while non-notice payloads continue to behave as before. Addresses review comment https://github.com/openclaw/openclaw/pull/38805#discussion_r2901465625 --- src/auto-reply/reply/reply-threading.ts | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/auto-reply/reply/reply-threading.ts b/src/auto-reply/reply/reply-threading.ts index e5ee97a9d7f..177f4ccbe0b 100644 --- a/src/auto-reply/reply/reply-threading.ts +++ b/src/auto-reply/reply/reply-threading.ts @@ -42,6 +42,12 @@ export function createReplyToModeFilter( return payload; } if (hasThreaded) { + // Compaction notices are transient status messages that should always + // appear in-thread, even after the first assistant block has already + // consumed the "first" slot. Let them keep their replyToId. + if (payload.isCompactionNotice) { + return payload; + } return { ...payload, replyToId: undefined }; } // Compaction notices are transient status messages โ€” they should be From 0b236892ac922d04443c0e125b88ef242715194b Mon Sep 17 00:00:00 2001 From: zidongdesign Date: Sun, 8 Mar 2026 21:06:35 +0800 Subject: [PATCH 12/15] fix(threading): honor replyToMode=off for compaction notices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compaction notices carried replyToCurrent=true, which caused them to pass through the allowExplicitReplyTagsWhenOff path in createReplyToModeFilter("off") and retain replyToId. In replyToMode=off sessions this made the transient status messages appear in-thread while normal assistant replies stayed off-thread, contradicting the off-mode expectation. Add an !isCompactionNotice guard to the explicit-tag fast-path so compaction payloads always fall through to the strip branch and have their replyToId removed โ€” consistent with how every other payload is treated in off mode. --- src/auto-reply/reply/reply-threading.ts | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/auto-reply/reply/reply-threading.ts b/src/auto-reply/reply/reply-threading.ts index 177f4ccbe0b..5c0e1e423bc 100644 --- a/src/auto-reply/reply/reply-threading.ts +++ b/src/auto-reply/reply/reply-threading.ts @@ -33,7 +33,12 @@ export function createReplyToModeFilter( } if (mode === "off") { const isExplicit = Boolean(payload.replyToTag) || Boolean(payload.replyToCurrent); - if (opts.allowExplicitReplyTagsWhenOff && isExplicit) { + // Compaction notices must never be threaded when replyToMode=off โ€” even + // if they carry explicit reply tags (replyToCurrent). Honouring the + // explicit tag here would make status notices appear in-thread while + // normal assistant replies stay off-thread, contradicting the off-mode + // expectation. Strip replyToId unconditionally for compaction payloads. + if (opts.allowExplicitReplyTagsWhenOff && isExplicit && !payload.isCompactionNotice) { return payload; } return { ...payload, replyToId: undefined }; From d2ea0e3dc8774409bb17843ce62b849ffa568fa7 Mon Sep 17 00:00:00 2001 From: zidongdesign Date: Sat, 14 Mar 2026 10:12:07 +0800 Subject: [PATCH 13/15] fix: preserve compaction count in streaming notices & route non-streaming notices through compaction metadata --- src/auto-reply/reply/agent-runner.ts | 72 ++++++++++++++-------------- 1 file changed, 37 insertions(+), 35 deletions(-) diff --git a/src/auto-reply/reply/agent-runner.ts b/src/auto-reply/reply/agent-runner.ts index 2e93650446b..7499657d6d4 100644 --- a/src/auto-reply/reply/agent-runner.ts +++ b/src/auto-reply/reply/agent-runner.ts @@ -419,33 +419,10 @@ export async function runReplyAgent(params: { blockReplyPipeline.stop(); } - // Send the compaction completion notice *after* the pipeline has flushed and - // stopped, so the enqueue does not set didStream() = true and cause - // buildReplyPayloads to discard the real assistant reply. We still apply a - // timeout so the notice cannot stall the run indefinitely. - if (autoCompactionCompleted && blockStreamingEnabled && opts?.onBlockReply) { - const verboseEnabled = resolvedVerboseLevel !== "off"; - const completionText = verboseEnabled - ? `๐Ÿงน Auto-compaction complete.` - : `โœ… Context compacted.`; - const currentMessageId = sessionCtx.MessageSidFull ?? sessionCtx.MessageSid; - const noticePayload = applyReplyToMode({ - text: completionText, - replyToId: currentMessageId, - replyToCurrent: true, - isCompactionNotice: true, - }); - // Fire-and-forget with timeout โ€” best-effort delivery; failure must not - // propagate to the caller. - void Promise.race([ - opts.onBlockReply(noticePayload), - new Promise((_, reject) => - setTimeout(() => reject(new Error("compaction notice timeout")), blockReplyTimeoutMs), - ), - ]).catch(() => { - // Intentionally swallowed โ€” the notice is informational only. - }); - } + // NOTE: The compaction completion notice for block-streaming mode is sent + // further below โ€” after incrementRunCompactionCount โ€” so it can include + // the `(count N)` suffix. Sending it here (before the count is known) + // would omit that information. if (pendingToolTasks.size > 0) { await Promise.allSettled(pendingToolTasks); } @@ -734,14 +711,39 @@ export async function runReplyAgent(params: { ? `๐Ÿงน Auto-compaction complete${suffix}.` : `โœ… Context compacted${suffix}.`; - // In block-streaming mode the completion notice is sent above (after the - // pipeline has flushed) via a fire-and-forget call to opts.onBlockReply, - // so that it does not set didStream()=true and cause buildReplyPayloads to - // discard the real assistant reply. - // In non-streaming mode, push into verboseNotices so it is included in - // the final payload batch. - if (!blockReplyPipeline) { - verboseNotices.push({ text: completionText }); + if (blockReplyPipeline && opts?.onBlockReply) { + // In block-streaming mode, send the completion notice via + // fire-and-forget *after* the pipeline has flushed (so it does not set + // didStream()=true and cause buildReplyPayloads to discard the real + // assistant reply). Now that the count is known we can include it. + const currentMessageId = sessionCtx.MessageSidFull ?? sessionCtx.MessageSid; + const noticePayload = applyReplyToMode({ + text: completionText, + replyToId: currentMessageId, + replyToCurrent: true, + isCompactionNotice: true, + }); + void Promise.race([ + opts.onBlockReply(noticePayload), + new Promise((_, reject) => + setTimeout(() => reject(new Error("compaction notice timeout")), blockReplyTimeoutMs), + ), + ]).catch(() => { + // Intentionally swallowed โ€” the notice is informational only. + }); + } else { + // Non-streaming: push into verboseNotices with full compaction metadata + // so threading exemptions apply and replyToMode=first does not thread + // the notice instead of the real assistant reply. + const currentMessageId = sessionCtx.MessageSidFull ?? sessionCtx.MessageSid; + verboseNotices.push( + applyReplyToMode({ + text: completionText, + replyToId: currentMessageId, + replyToCurrent: true, + isCompactionNotice: true, + }), + ); } } if (verboseNotices.length > 0) { From e5e73e37798b2e99b520ef1ff4f58fbd66c2f9d0 Mon Sep 17 00:00:00 2001 From: zidongdesign Date: Wed, 18 Mar 2026 10:10:42 +0800 Subject: [PATCH 14/15] fix(tts): skip TTS synthesis for compaction notices Compaction start/completion notices carry isCompactionNotice: true on the ReplyPayload. Guard maybeApplyTtsToPayload() with an early return so these informational UI signals are never synthesised as speech, regardless of TTS mode or auto-mode configuration. Addresses review feedback from jalehman on PR #38805. --- src/tts/tts.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/tts/tts.ts b/src/tts/tts.ts index 17a7c2fc981..348ff94be91 100644 --- a/src/tts/tts.ts +++ b/src/tts/tts.ts @@ -825,6 +825,10 @@ export async function maybeApplyTtsToPayload(params: { inboundAudio?: boolean; ttsAuto?: string; }): Promise { + // Compaction notices are informational UI signals โ€” never synthesise them as speech. + if (params.payload.isCompactionNotice) { + return params.payload; + } const config = resolveTtsConfig(params.cfg); const prefsPath = resolveTtsPrefsPath(config); const autoMode = resolveTtsAutoMode({ From eb90f612e69550a59d9e64b9b988d6f34a871958 Mon Sep 17 00:00:00 2001 From: zidongchen Date: Sat, 21 Mar 2026 10:12:12 +0800 Subject: [PATCH 15/15] fix: run oxfmt on agent-runner-execution.ts --- src/auto-reply/reply/agent-runner-execution.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/auto-reply/reply/agent-runner-execution.ts b/src/auto-reply/reply/agent-runner-execution.ts index 1b7e486a446..7e6a4cfa6bc 100644 --- a/src/auto-reply/reply/agent-runner-execution.ts +++ b/src/auto-reply/reply/agent-runner-execution.ts @@ -206,8 +206,7 @@ export async function runAgentTurnWithFallback(params: { const blockReplyHandler = params.opts?.onBlockReply ? createBlockReplyDeliveryHandler({ onBlockReply: params.opts.onBlockReply, - currentMessageId: - params.sessionCtx.MessageSidFull ?? params.sessionCtx.MessageSid, + currentMessageId: params.sessionCtx.MessageSidFull ?? params.sessionCtx.MessageSid, normalizeStreamingText, applyReplyToMode: params.applyReplyToMode, normalizeMediaPaths: normalizeReplyMediaPaths,