fix: thread compaction notices through reply-to mode handler

Compaction start and completion notices were sent via raw opts.onBlockReply, bypassing createBlockReplyDeliveryHandler and the applyReplyToMode pipeline. In channels configured with replyToMode=all|first, this caused compaction notices to be delivered as unthreaded top-level messages while all other replies stayed threaded — inconsistent and noisy. Fix agent-runner-execution.ts: extract createBlockReplyDeliveryHandler result into blockReplyHandler and share it between onBlockReply and the compaction start notice in onAgentEvent. Both now use the same handler. Fix agent-runner.ts: inject currentMessageId + replyToCurrent into the completion notice payload before passing through applyReplyToMode, so threading directives are honoured consistently with normal replies. Closes the P2 review comment on PR #38805 (agent-runner.ts:701).
2026-03-08 10:01:40 +08:00 · 2026-03-08 10:01:40 +08:00 · 8e216cbb4e
commit 8e216cbb4e
parent 40b175a695
2 changed files with 31 additions and 18 deletions
--- a/src/auto-reply/reply/agent-runner-execution.ts
+++ b/src/auto-reply/reply/agent-runner-execution.ts
@ -199,6 +199,24 @@ export async function runAgentTurnWithFallback(params: {
        return text;
      };
      const blockReplyPipeline = params.blockReplyPipeline;
+      // Build the delivery handler once so both onAgentEvent (compaction start
+      // notice) and the onBlockReply field share the same instance.  This
+      // ensures replyToId threading (replyToMode=all|first) is applied to
+      // compaction notices just like every other block reply.
+      const blockReplyHandler = params.opts?.onBlockReply
+        ? createBlockReplyDeliveryHandler({
+            onBlockReply: params.opts.onBlockReply,
+            currentMessageId:
+              params.sessionCtx.MessageSidFull ?? params.sessionCtx.MessageSid,
+            normalizeStreamingText,
+            applyReplyToMode: params.applyReplyToMode,
+            normalizeMediaPaths: normalizeReplyMediaPaths,
+            typingSignals: params.typingSignals,
+            blockStreamingEnabled: params.blockStreamingEnabled,
+            blockReplyPipeline,
+            directlySentBlockKeys,
+          })
+        : undefined;
      const onToolResult = params.opts?.onToolResult;
      const fallbackResult = await runWithModelFallback({
        ...resolveModelFallbackOptions(params.followupRun.run),
@ -401,9 +419,9 @@ export async function runAgentTurnWithFallback(params: {
                      if (params.opts?.onCompactionStart) {
                        await params.opts.onCompactionStart();
                      } else {
-                        // Use the universal in-run block reply path so every
-                        // channel sees a notice while compaction is pausing the run.
-                        await params.opts?.onBlockReply?.({ text: "🧹 Compacting context..." });
+                        // Route through the shared block reply handler so
+                        // reply-to threading matches other in-run notices.
+                        await blockReplyHandler?.({ text: "🧹 Compacting context..." });
                      }
                    }
                    const completed = evt.data?.completed === true;
@ -416,20 +434,7 @@ export async function runAgentTurnWithFallback(params: {
                // Always pass onBlockReply so flushBlockReplyBuffer works before tool execution,
                // even when regular block streaming is disabled. The handler sends directly
                // via opts.onBlockReply when the pipeline isn't available.
-                onBlockReply: params.opts?.onBlockReply
-                  ? createBlockReplyDeliveryHandler({
-                      onBlockReply: params.opts.onBlockReply,
-                      currentMessageId:
-                        params.sessionCtx.MessageSidFull ?? params.sessionCtx.MessageSid,
-                      normalizeStreamingText,
-                      applyReplyToMode: params.applyReplyToMode,
-                      normalizeMediaPaths: normalizeReplyMediaPaths,
-                      typingSignals: params.typingSignals,
-                      blockStreamingEnabled: params.blockStreamingEnabled,
-                      blockReplyPipeline,
-                      directlySentBlockKeys,
-                    })
-                  : undefined,
+                onBlockReply: blockReplyHandler,
                onBlockReplyFlush:
                  params.blockStreamingEnabled && blockReplyPipeline
                    ? async () => {
--- a/src/auto-reply/reply/agent-runner.ts
+++ b/src/auto-reply/reply/agent-runner.ts
@ -710,8 +710,16 @@ export async function runReplyAgent(params: {
      // we must deliver the completion notice the same way the start notice was
      // sent (via onBlockReply directly). Otherwise the user sees the "🧹
      // Compacting context..." start notice but never receives the completion.
+      // Apply replyToMode so the notice is threaded consistently with normal
+      // replies when replyToMode=all|first is configured.
      if (opts?.onBlockReply) {
-        await opts.onBlockReply({ text: completionText });
+        const currentMessageId = sessionCtx.MessageSidFull ?? sessionCtx.MessageSid;
+        const noticePayload = applyReplyToMode({
+          text: completionText,
+          replyToId: currentMessageId,
+          replyToCurrent: true,
+        });
+        await opts.onBlockReply(noticePayload);
      } else {
        verboseNotices.push({ text: completionText });
      }