diff --git a/src/auto-reply/reply/agent-runner-execution.ts b/src/auto-reply/reply/agent-runner-execution.ts index 4141c8c9bc8..687e4ecf05a 100644 --- a/src/auto-reply/reply/agent-runner-execution.ts +++ b/src/auto-reply/reply/agent-runner-execution.ts @@ -418,10 +418,24 @@ export async function runAgentTurnWithFallback(params: { if (phase === "start") { if (params.opts?.onCompactionStart) { await params.opts.onCompactionStart(); - } else { + } else if (params.blockStreamingEnabled) { // Route through the shared block reply handler so // reply-to threading matches other in-run notices. await blockReplyHandler?.({ text: "🧹 Compacting context..." }); + } else if (params.opts?.onBlockReply) { + // blockReplyHandler is a no-op when streaming is disabled. + // Fall back to direct delivery so non-streaming runs also + // receive the compaction start notice. + const currentMessageId = + params.sessionCtx.MessageSidFull ?? params.sessionCtx.MessageSid; + const noticePayload = params.applyReplyToMode({ + text: "🧹 Compacting context...", + replyToId: currentMessageId, + replyToCurrent: true, + }); + await params.opts.onBlockReply(noticePayload); + } else { + await params.opts?.onBlockReply?.({ text: "🧹 Compacting context..." }); } } const completed = evt.data?.completed === true; diff --git a/src/auto-reply/reply/agent-runner.ts b/src/auto-reply/reply/agent-runner.ts index 7bec12d1c8a..73473949db1 100644 --- a/src/auto-reply/reply/agent-runner.ts +++ b/src/auto-reply/reply/agent-runner.ts @@ -423,7 +423,7 @@ export async function runReplyAgent(params: { // stopped, so the enqueue does not set didStream() = true and cause // buildReplyPayloads to discard the real assistant reply. We still apply a // timeout so the notice cannot stall the run indefinitely. - if (autoCompactionCompleted && opts?.onBlockReply) { + if (autoCompactionCompleted && blockStreamingEnabled && opts?.onBlockReply) { const verboseEnabled = resolvedVerboseLevel !== "off"; const completionText = verboseEnabled ? `🧹 Auto-compaction complete.`