From 0ca772166f6c23b089d93a673960f9e1cf861154 Mon Sep 17 00:00:00 2001 From: Joey Krug Date: Sun, 15 Mar 2026 00:40:12 -0400 Subject: [PATCH] Auto-reply: fix followup media prompt rebuild --- src/auto-reply/reply/followup-media.ts | 55 +++++-- src/auto-reply/reply/followup-runner.test.ts | 143 +++++++++++++++++++ 2 files changed, 183 insertions(+), 15 deletions(-) diff --git a/src/auto-reply/reply/followup-media.ts b/src/auto-reply/reply/followup-media.ts index 61d3b9f9cfb..d862a77875b 100644 --- a/src/auto-reply/reply/followup-media.ts +++ b/src/auto-reply/reply/followup-media.ts @@ -55,6 +55,30 @@ function replaceLastOccurrence( return `${value.slice(0, index)}${replacement}${value.slice(index + search.length)}`; } +function findFirstOccurrenceBeforeFileBlocks(value: string, search: string): number { + if (!search) { + return -1; + } + const fileBlockIndex = value.search(FILE_BLOCK_RE); + const bodyRegion = fileBlockIndex >= 0 ? value.slice(0, fileBlockIndex) : value; + return bodyRegion.indexOf(search); +} + +function replaceFirstOccurrenceBeforeFileBlocks( + value: string, + search: string, + replacement: string, +): string | undefined { + if (!search) { + return undefined; + } + const index = findFirstOccurrenceBeforeFileBlocks(value, search); + if (index < 0) { + return undefined; + } + return `${value.slice(0, index)}${replacement}${value.slice(index + search.length)}`; +} + function stripInlineDirectives(text: string | undefined): string { return parseInlineDirectives(text ?? "").cleaned.trim(); } @@ -92,20 +116,29 @@ function rebuildQueuedPromptWithMediaUnderstanding(params: { stripped = stripLeadingMediaReplyHint(stripped); } + const replacementTargets = [ + params.originalBody?.trim(), + stripInlineDirectives(params.originalBody), + MEDIA_ONLY_PLACEHOLDER, + ].filter( + (value, index, list): value is string => Boolean(value) && list.indexOf(value) === index, + ); + // Strip pre-existing file blocks from the body region when the updated body // contains new file blocks. Mixed messages (audio + PDF) can arrive with // file extraction already applied in the primary path; without this strip // the old block stays in the prompt while the updated body adds a new one, // duplicating potentially large file payloads. - // Scope stripping to the body segment so quoted/replied text and thread - // history above the body retain any legitimate blocks. + // Scope stripping to the confirmed body segment so quoted/replied text, + // thread history above the body, and prompts whose original body no longer + // appears all retain any legitimate blocks. if (params.updatedBody && FILE_BLOCK_RE.test(params.updatedBody)) { - const bodyTarget = params.originalBody?.trim(); - const bodyIdx = bodyTarget ? stripped.lastIndexOf(bodyTarget) : -1; + const bodyIdx = + replacementTargets + .map((target) => findFirstOccurrenceBeforeFileBlocks(stripped, target)) + .find((index) => index >= 0) ?? -1; if (bodyIdx >= 0) { stripped = stripped.slice(0, bodyIdx) + stripExistingFileBlocks(stripped.slice(bodyIdx)); - } else { - stripped = stripExistingFileBlocks(stripped); } } @@ -117,17 +150,9 @@ function rebuildQueuedPromptWithMediaUnderstanding(params: { return [params.mediaNote?.trim(), stripped].filter(Boolean).join("\n").trim(); } - const replacementTargets = [ - params.originalBody?.trim(), - stripInlineDirectives(params.originalBody), - MEDIA_ONLY_PLACEHOLDER, - ].filter( - (value, index, list): value is string => Boolean(value) && list.indexOf(value) === index, - ); - let rebuilt = stripped; for (const target of replacementTargets) { - const replaced = replaceLastOccurrence(rebuilt, target, updatedBody); + const replaced = replaceFirstOccurrenceBeforeFileBlocks(rebuilt, target, updatedBody); if (replaced !== undefined) { rebuilt = replaced; return [params.mediaNote?.trim(), rebuilt.trim()].filter(Boolean).join("\n").trim(); diff --git a/src/auto-reply/reply/followup-runner.test.ts b/src/auto-reply/reply/followup-runner.test.ts index 16f2b8eec90..d85223b1afe 100644 --- a/src/auto-reply/reply/followup-runner.test.ts +++ b/src/auto-reply/reply/followup-runner.test.ts @@ -1882,6 +1882,149 @@ describe("createFollowupRunner media understanding", () => { expect(agentCall?.prompt).not.toContain("old extracted content"); }); + it("preserves unrelated file blocks when the original body is absent from the prompt", async () => { + const quotedFileBlock = + '\nquoted thread attachment\n'; + const existingFileBlock = + '\nold extracted content\n'; + const newFileBlock = + '\nnew extracted content\n'; + const transcriptText = "Transcript from deferred audio"; + + applyMediaUnderstandingMock.mockImplementationOnce( + async (params: { ctx: Record }) => { + params.ctx.MediaUnderstanding = [ + { + kind: "audio.transcription", + text: transcriptText, + attachmentIndex: 0, + provider: "whisper", + }, + ]; + params.ctx.Transcript = transcriptText; + params.ctx.Body = `[Audio]\nTranscript:\n${transcriptText}\n\nsummarize this\n\n${newFileBlock}`; + return { + outputs: [ + { + kind: "audio.transcription", + text: transcriptText, + attachmentIndex: 0, + provider: "whisper", + }, + ], + decisions: [], + appliedImage: false, + appliedAudio: true, + appliedVideo: false, + appliedFile: true, + }; + }, + ); + runEmbeddedPiAgentMock.mockResolvedValueOnce({ + payloads: [{ text: "processed" }], + meta: {}, + }); + + const runner = createFollowupRunner({ + opts: { onBlockReply: vi.fn(async () => {}) }, + typing: createMockTypingController(), + typingMode: "instant", + defaultModel: "anthropic/claude-opus-4-5", + }); + + await runner( + createQueuedRun({ + prompt: `[media attached 1/2: /tmp/voice.ogg]\n[media attached 2/2: /tmp/report.pdf]\n${MEDIA_REPLY_HINT}\nQuoted thread above\n\n${quotedFileBlock}`, + mediaContext: { + Body: `summarize this\n\n${existingFileBlock}`, + CommandBody: "summarize this", + RawBody: "summarize this", + MediaPaths: ["/tmp/voice.ogg", "/tmp/report.pdf"], + MediaTypes: ["audio/ogg", "application/pdf"], + }, + }), + ); + + const agentCall = runEmbeddedPiAgentMock.mock.calls.at(-1)?.[0] as { + prompt?: string; + }; + expect(agentCall?.prompt).toContain("Quoted thread above"); + expect(agentCall?.prompt).toContain(quotedFileBlock); + expect(agentCall?.prompt).toContain(newFileBlock); + expect(agentCall?.prompt?.match(/ { + const existingFileBlock = + '\nsummary notes:\nsummarize this\n'; + const transcriptText = "Transcript from deferred audio"; + + applyMediaUnderstandingMock.mockImplementationOnce( + async (params: { ctx: Record }) => { + params.ctx.MediaUnderstanding = [ + { + kind: "audio.transcription", + text: transcriptText, + attachmentIndex: 0, + provider: "whisper", + }, + ]; + params.ctx.Transcript = transcriptText; + params.ctx.Body = `[Audio]\nTranscript:\n${transcriptText}\n\nsummarize this`; + return { + outputs: [ + { + kind: "audio.transcription", + text: transcriptText, + attachmentIndex: 0, + provider: "whisper", + }, + ], + decisions: [], + appliedImage: false, + appliedAudio: true, + appliedVideo: false, + appliedFile: false, + }; + }, + ); + runEmbeddedPiAgentMock.mockResolvedValueOnce({ + payloads: [{ text: "processed" }], + meta: {}, + }); + + const runner = createFollowupRunner({ + opts: { onBlockReply: vi.fn(async () => {}) }, + typing: createMockTypingController(), + typingMode: "instant", + defaultModel: "anthropic/claude-opus-4-5", + }); + + await runner( + createQueuedRun({ + prompt: `[media attached 1/2: /tmp/voice.ogg]\n[media attached 2/2: /tmp/report.pdf]\n${MEDIA_REPLY_HINT}\nsummarize this\n\n${existingFileBlock}`, + mediaContext: { + Body: `summarize this\n\n${existingFileBlock}`, + CommandBody: "summarize this", + RawBody: "summarize this", + MediaPaths: ["/tmp/voice.ogg", "/tmp/report.pdf"], + MediaTypes: ["audio/ogg", "application/pdf"], + }, + }), + ); + + const agentCall = runEmbeddedPiAgentMock.mock.calls.at(-1)?.[0] as { + prompt?: string; + }; + const transcriptBlock = `[Audio]\nTranscript:\n${transcriptText}\n\nsummarize this`; + expect(agentCall?.prompt).toContain(existingFileBlock); + expect(agentCall?.prompt).toContain(transcriptBlock); + expect(agentCall?.prompt?.indexOf(transcriptBlock)).toBeGreaterThan(-1); + expect(agentCall?.prompt?.indexOf(transcriptBlock)).toBeLessThan( + agentCall?.prompt?.indexOf(existingFileBlock) ?? -1, + ); + }); + it("sets DeferredMediaApplied when media understanding throws", async () => { applyMediaUnderstandingMock.mockRejectedValueOnce( new Error("transcription service unavailable"),