diff --git a/src/auto-reply/reply/agent-runner-execution.ts b/src/auto-reply/reply/agent-runner-execution.ts
index e88c227d0a1..4141c8c9bc8 100644
--- a/src/auto-reply/reply/agent-runner-execution.ts
+++ b/src/auto-reply/reply/agent-runner-execution.ts
@@ -199,6 +199,24 @@ export async function runAgentTurnWithFallback(params: {
         return text;
       };
       const blockReplyPipeline = params.blockReplyPipeline;
+      // Build the delivery handler once so both onAgentEvent (compaction start
+      // notice) and the onBlockReply field share the same instance.  This
+      // ensures replyToId threading (replyToMode=all|first) is applied to
+      // compaction notices just like every other block reply.
+      const blockReplyHandler = params.opts?.onBlockReply
+        ? createBlockReplyDeliveryHandler({
+            onBlockReply: params.opts.onBlockReply,
+            currentMessageId:
+              params.sessionCtx.MessageSidFull ?? params.sessionCtx.MessageSid,
+            normalizeStreamingText,
+            applyReplyToMode: params.applyReplyToMode,
+            normalizeMediaPaths: normalizeReplyMediaPaths,
+            typingSignals: params.typingSignals,
+            blockStreamingEnabled: params.blockStreamingEnabled,
+            blockReplyPipeline,
+            directlySentBlockKeys,
+          })
+        : undefined;
       const onToolResult = params.opts?.onToolResult;
       const fallbackResult = await runWithModelFallback({
         ...resolveModelFallbackOptions(params.followupRun.run),
@@ -401,9 +419,9 @@ export async function runAgentTurnWithFallback(params: {
                       if (params.opts?.onCompactionStart) {
                         await params.opts.onCompactionStart();
                       } else {
-                        // Use the universal in-run block reply path so every
-                        // channel sees a notice while compaction is pausing the run.
-                        await params.opts?.onBlockReply?.({ text: "🧹 Compacting context..." });
+                        // Route through the shared block reply handler so
+                        // reply-to threading matches other in-run notices.
+                        await blockReplyHandler?.({ text: "🧹 Compacting context..." });
                       }
                     }
                     const completed = evt.data?.completed === true;
@@ -416,20 +434,7 @@ export async function runAgentTurnWithFallback(params: {
                 // Always pass onBlockReply so flushBlockReplyBuffer works before tool execution,
                 // even when regular block streaming is disabled. The handler sends directly
                 // via opts.onBlockReply when the pipeline isn't available.
-                onBlockReply: params.opts?.onBlockReply
-                  ? createBlockReplyDeliveryHandler({
-                      onBlockReply: params.opts.onBlockReply,
-                      currentMessageId:
-                        params.sessionCtx.MessageSidFull ?? params.sessionCtx.MessageSid,
-                      normalizeStreamingText,
-                      applyReplyToMode: params.applyReplyToMode,
-                      normalizeMediaPaths: normalizeReplyMediaPaths,
-                      typingSignals: params.typingSignals,
-                      blockStreamingEnabled: params.blockStreamingEnabled,
-                      blockReplyPipeline,
-                      directlySentBlockKeys,
-                    })
-                  : undefined,
+                onBlockReply: blockReplyHandler,
                 onBlockReplyFlush:
                   params.blockStreamingEnabled && blockReplyPipeline
                     ? async () => {
diff --git a/src/auto-reply/reply/agent-runner.ts b/src/auto-reply/reply/agent-runner.ts
index e8874366201..8b65b6d00d9 100644
--- a/src/auto-reply/reply/agent-runner.ts
+++ b/src/auto-reply/reply/agent-runner.ts
@@ -710,8 +710,16 @@ export async function runReplyAgent(params: {
       // we must deliver the completion notice the same way the start notice was
       // sent (via onBlockReply directly). Otherwise the user sees the "🧹
       // Compacting context..." start notice but never receives the completion.
+      // Apply replyToMode so the notice is threaded consistently with normal
+      // replies when replyToMode=all|first is configured.
       if (opts?.onBlockReply) {
-        await opts.onBlockReply({ text: completionText });
+        const currentMessageId = sessionCtx.MessageSidFull ?? sessionCtx.MessageSid;
+        const noticePayload = applyReplyToMode({
+          text: completionText,
+          replyToId: currentMessageId,
+          replyToCurrent: true,
+        });
+        await opts.onBlockReply(noticePayload);
       } else {
         verboseNotices.push({ text: completionText });
       }