diff --git a/src/auto-reply/reply/followup-media.test.ts b/src/auto-reply/reply/followup-media.test.ts index d8ba453f801..77996f85606 100644 --- a/src/auto-reply/reply/followup-media.test.ts +++ b/src/auto-reply/reply/followup-media.test.ts @@ -1,43 +1,57 @@ import { describe, expect, it } from "vitest"; import { - _findLastOccurrenceBeforeFileBlocks as findLastOccurrenceBeforeFileBlocks, + _findLastOccurrenceOutsideFileBlocks as findLastOccurrenceOutsideFileBlocks, _normalizeUpdatedBody as normalizeUpdatedBody, _rebuildQueuedPromptWithMediaUnderstanding as rebuildQueuedPromptWithMediaUnderstanding, } from "./followup-media.js"; const FILE_BLOCK = '\nPDF content\n'; -describe("findLastOccurrenceBeforeFileBlocks", () => { +describe("findLastOccurrenceOutsideFileBlocks", () => { it("returns -1 for empty search", () => { - expect(findLastOccurrenceBeforeFileBlocks("hello", "")).toBe(-1); + expect(findLastOccurrenceOutsideFileBlocks("hello", "")).toBe(-1); }); it("finds last occurrence in body region before file blocks", () => { const value = `hello world hello\n${FILE_BLOCK}`; // "hello" appears at 0 and 12 — both before the file block - expect(findLastOccurrenceBeforeFileBlocks(value, "hello")).toBe(12); + expect(findLastOccurrenceOutsideFileBlocks(value, "hello")).toBe(12); }); - it("does not match inside file block content", () => { + it("skips matches inside file block content", () => { + // "PDF content" appears only inside the file block — no valid match outside. + const value = `some text\n${FILE_BLOCK}`; + expect(findLastOccurrenceOutsideFileBlocks(value, "PDF content")).toBe(-1); + }); + + it("finds trailing occurrence outside file block even when also inside one", () => { const value = `some text\n${FILE_BLOCK}\nPDF content`; - // "PDF content" appears in the file block and after it, but the body region - // (before { - // When the search string contains a block, it can't appear in the - // body-only region, so the fallback searches the full value. + it("finds occurrence when search itself contains file blocks", () => { const bodyWithFile = `caption\n${FILE_BLOCK}`; const value = `previous\n${bodyWithFile}\nlater\n${bodyWithFile}`; // Should find the *last* (trailing) occurrence const expected = value.lastIndexOf(bodyWithFile); - expect(findLastOccurrenceBeforeFileBlocks(value, bodyWithFile)).toBe(expected); + expect(findLastOccurrenceOutsideFileBlocks(value, bodyWithFile)).toBe(expected); expect(expected).toBeGreaterThan(value.indexOf(bodyWithFile)); }); it("returns index when no file blocks exist in value", () => { - expect(findLastOccurrenceBeforeFileBlocks("abc abc", "abc")).toBe(4); + expect(findLastOccurrenceOutsideFileBlocks("abc abc", "abc")).toBe(4); + }); + + it("finds body text after thread-history file blocks", () => { + const value = `Thread history\n${FILE_BLOCK}\n\ncheck this out`; + // The body "check this out" appears after a file block from thread history. + // The old truncation approach would miss this; the new approach finds it. + expect(findLastOccurrenceOutsideFileBlocks(value, "check this out")).toBe( + value.lastIndexOf("check this out"), + ); }); }); diff --git a/src/auto-reply/reply/followup-media.ts b/src/auto-reply/reply/followup-media.ts index 425bdd601ea..f0d5d951683 100644 --- a/src/auto-reply/reply/followup-media.ts +++ b/src/auto-reply/reply/followup-media.ts @@ -41,26 +41,48 @@ function stripLeadingMediaReplyHint(prompt: string): string { return prompt.trim(); } -function findLastOccurrenceBeforeFileBlocks(value: string, search: string): number { +/** Collect the [start, end) ranges of every `` block in `value`. */ +function collectFileBlockRanges(value: string): Array<[number, number]> { + const ranges: Array<[number, number]> = []; + const re = new RegExp(FILE_BLOCK_FULL_RE.source, FILE_BLOCK_FULL_RE.flags); + let m: RegExpExecArray | null; + while ((m = re.exec(value)) !== null) { + ranges.push([m.index, m.index + m[0].length]); + } + return ranges; +} + +function isInsideFileBlock( + position: number, + length: number, + ranges: Array<[number, number]>, +): boolean { + for (const [start, end] of ranges) { + if (position >= start && position + length <= end) { + return true; + } + } + return false; +} + +/** + * Find the last occurrence of `search` in `value` that is NOT inside a + * `` block. Searches the full string with lastIndexOf, + * then walks backward past any matches that fall inside file blocks. + */ +function findLastOccurrenceOutsideFileBlocks(value: string, search: string): number { if (!search) { return -1; } - const fileBlockIndex = value.search(FILE_BLOCK_RE); - const bodyRegion = fileBlockIndex >= 0 ? value.slice(0, fileBlockIndex) : value; - const index = bodyRegion.lastIndexOf(search); - if (index >= 0) { - return index; + const ranges = collectFileBlockRanges(value); + let pos = value.lastIndexOf(search); + while (pos >= 0 && isInsideFileBlock(pos, search.length, ranges)) { + pos = value.lastIndexOf(search, pos - 1); } - // Fallback: search string itself contains file blocks — it can't appear in the - // body-only region. Search the full value with lastIndexOf to pick the trailing - // (most recent) occurrence when thread/history has identical bodies. - if (FILE_BLOCK_RE.test(search)) { - return value.lastIndexOf(search); - } - return -1; + return pos; } -function replaceLastOccurrenceBeforeFileBlocks( +function replaceLastOccurrenceOutsideFileBlocks( value: string, search: string, replacement: string, @@ -68,7 +90,7 @@ function replaceLastOccurrenceBeforeFileBlocks( if (!search) { return undefined; } - const index = findLastOccurrenceBeforeFileBlocks(value, search); + const index = findLastOccurrenceOutsideFileBlocks(value, search); if (index < 0) { return undefined; } @@ -81,7 +103,7 @@ function findTrailingReplacementTargetBeforeFileBlocks( ): { index: number; target: string } | undefined { let bestMatch: { index: number; target: string } | undefined; for (const target of targets) { - const index = findLastOccurrenceBeforeFileBlocks(value, target); + const index = findLastOccurrenceOutsideFileBlocks(value, target); if (index < 0) { continue; } @@ -172,7 +194,7 @@ function normalizeUpdatedBody(params: { originalBody?: string; updatedBody?: str return cleanedOriginalBody; } return ( - replaceLastOccurrenceBeforeFileBlocks(updatedBody, originalBody, cleanedOriginalBody) ?? + replaceLastOccurrenceOutsideFileBlocks(updatedBody, originalBody, cleanedOriginalBody) ?? updatedBody ).trim(); } @@ -294,7 +316,7 @@ function snapshotUpdatedMediaContext(params: { // Exported for unit testing — these are pure string helpers with no side effects. export { - findLastOccurrenceBeforeFileBlocks as _findLastOccurrenceBeforeFileBlocks, + findLastOccurrenceOutsideFileBlocks as _findLastOccurrenceOutsideFileBlocks, normalizeUpdatedBody as _normalizeUpdatedBody, rebuildQueuedPromptWithMediaUnderstanding as _rebuildQueuedPromptWithMediaUnderstanding, }; diff --git a/src/auto-reply/reply/followup-runner.test.ts b/src/auto-reply/reply/followup-runner.test.ts index 0db30881b7e..2c94f03fb8d 100644 --- a/src/auto-reply/reply/followup-runner.test.ts +++ b/src/auto-reply/reply/followup-runner.test.ts @@ -2025,6 +2025,78 @@ describe("createFollowupRunner media understanding", () => { ); }); + it("finds the body after thread-history file blocks when body appears after the first tag", async () => { + const threadFileBlock = + '\nolder thread attachment\n'; + const transcriptText = "Transcript from deferred voice note"; + + applyMediaUnderstandingMock.mockImplementationOnce( + async (params: { ctx: Record }) => { + params.ctx.MediaUnderstanding = [ + { + kind: "audio.transcription", + text: transcriptText, + attachmentIndex: 0, + provider: "whisper", + }, + ]; + params.ctx.Transcript = transcriptText; + params.ctx.Body = `[Audio]\nTranscript:\n${transcriptText}\n\ncheck this out`; + return { + outputs: [ + { + kind: "audio.transcription", + text: transcriptText, + attachmentIndex: 0, + provider: "whisper", + }, + ], + decisions: [], + appliedImage: false, + appliedAudio: true, + appliedVideo: false, + appliedFile: false, + }; + }, + ); + runEmbeddedPiAgentMock.mockResolvedValueOnce({ + payloads: [{ text: "processed" }], + meta: {}, + }); + + const runner = createFollowupRunner({ + opts: { onBlockReply: vi.fn(async () => {}) }, + typing: createMockTypingController(), + typingMode: "instant", + defaultModel: "anthropic/claude-opus-4-5", + }); + + // The prompt has thread history with a file block BEFORE the current + // queued body text. The old truncation approach would miss the body + // because it only searched before the first tag. + await runner( + createQueuedRun({ + prompt: `[media attached: /tmp/voice.ogg]\n${MEDIA_REPLY_HINT}\nThread history\n\n${threadFileBlock}\n\ncheck this out`, + mediaContext: { + Body: "check this out", + RawBody: "check this out", + MediaPaths: ["/tmp/voice.ogg"], + MediaTypes: ["audio/ogg"], + }, + }), + ); + + const agentCall = runEmbeddedPiAgentMock.mock.calls.at(-1)?.[0] as { + prompt?: string; + }; + const transcriptBlock = `[Audio]\nTranscript:\n${transcriptText}\n\ncheck this out`; + // The body should be replaced with the transcript block + expect(agentCall?.prompt).toContain(transcriptBlock); + // Thread history and its file block should be preserved + expect(agentCall?.prompt).toContain("Thread history"); + expect(agentCall?.prompt).toContain(threadFileBlock); + }); + it("sets DeferredMediaApplied when media understanding throws", async () => { applyMediaUnderstandingMock.mockRejectedValueOnce( new Error("transcription service unavailable"), diff --git a/src/auto-reply/reply/queue/drain.ts b/src/auto-reply/reply/queue/drain.ts index 5d53df34189..41c859d2ec3 100644 --- a/src/auto-reply/reply/queue/drain.ts +++ b/src/auto-reply/reply/queue/drain.ts @@ -85,14 +85,19 @@ export async function applyDeferredMediaToQueuedRuns(items: FollowupRun[]): Prom } async function resolveSummaryLines(items: FollowupRun[]): Promise { - const summaryLines: string[] = []; - for (const item of items) { - await applyDeferredMediaUnderstandingToQueuedRun(item, { logLabel: "followup queue" }); - // After deferred media, prefer the updated prompt (which includes transcripts) - // over the original summaryLine (which may just be the caption text). - summaryLines.push(buildQueueSummaryLine(item.prompt.trim() || item.summaryLine?.trim() || "")); - } - return summaryLines; + // Parallelize the media understanding API calls upfront (same pattern as + // applyDeferredMediaToQueuedRuns), then build summary lines sequentially + // so line order matches the original item order. + await Promise.allSettled( + items.map((item) => + applyDeferredMediaUnderstandingToQueuedRun(item, { logLabel: "followup queue" }), + ), + ); + // After deferred media, prefer the updated prompt (which includes transcripts) + // over the original summaryLine (which may just be the caption text). + return items.map((item) => + buildQueueSummaryLine(item.prompt.trim() || item.summaryLine?.trim() || ""), + ); } export async function buildMediaAwareQueueSummaryPrompt(params: {