From 88c9ad302602f8f5f6e5d755d928ebc0b63d7c67 Mon Sep 17 00:00:00 2001 From: zidongdesign Date: Sun, 8 Mar 2026 13:06:50 +0800 Subject: [PATCH] fix: bypass pipeline for compaction start notice to preserve final reply Previously the start notice was routed through blockReplyHandler which enqueues into blockReplyPipeline, setting didStream() = true. This caused buildReplyPayloads to drop all final payloads (shouldDropFinalPayloads path), discarding the real assistant reply on non-streaming model paths where assistantTexts is populated from the final message (not block chunks). Fix: send the start notice directly via opts.onBlockReply, bypassing the pipeline entirely. applyReplyToMode is still applied so replyToId threading (replyToMode=all|first) is honoured. This mirrors how the completion notice in agent-runner.ts avoids the pipeline after flush()/stop(). --- src/auto-reply/reply/agent-runner-execution.ts | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/auto-reply/reply/agent-runner-execution.ts b/src/auto-reply/reply/agent-runner-execution.ts index 687e4ecf05a..c7191b92edd 100644 --- a/src/auto-reply/reply/agent-runner-execution.ts +++ b/src/auto-reply/reply/agent-runner-execution.ts @@ -418,14 +418,10 @@ export async function runAgentTurnWithFallback(params: { if (phase === "start") { if (params.opts?.onCompactionStart) { await params.opts.onCompactionStart(); - } else if (params.blockStreamingEnabled) { - // Route through the shared block reply handler so - // reply-to threading matches other in-run notices. - await blockReplyHandler?.({ text: "🧹 Compacting context..." }); } else if (params.opts?.onBlockReply) { - // blockReplyHandler is a no-op when streaming is disabled. - // Fall back to direct delivery so non-streaming runs also - // receive the compaction start notice. + // Send directly via opts.onBlockReply (bypassing the + // pipeline) so the notice does not cause final payloads + // to be discarded on non-streaming model paths. const currentMessageId = params.sessionCtx.MessageSidFull ?? params.sessionCtx.MessageSid; const noticePayload = params.applyReplyToMode({ @@ -434,8 +430,6 @@ export async function runAgentTurnWithFallback(params: { replyToCurrent: true, }); await params.opts.onBlockReply(noticePayload); - } else { - await params.opts?.onBlockReply?.({ text: "🧹 Compacting context..." }); } } const completed = evt.data?.completed === true;