diff --git a/src/auto-reply/reply/reply-threading.ts b/src/auto-reply/reply/reply-threading.ts index 66871f226b7..e5ee97a9d7f 100644 --- a/src/auto-reply/reply/reply-threading.ts +++ b/src/auto-reply/reply/reply-threading.ts @@ -44,7 +44,13 @@ export function createReplyToModeFilter( if (hasThreaded) { return { ...payload, replyToId: undefined }; } - hasThreaded = true; + // Compaction notices are transient status messages — they should be + // threaded (so they appear in-context), but they must not consume the + // "first" slot of the replyToMode=first filter. Skip advancing + // hasThreaded so the real assistant reply still gets replyToId. + if (!payload.isCompactionNotice) { + hasThreaded = true; + } return payload; }; }