Auto-reply: fix followup media prompt rebuild

This commit is contained in:
Joey Krug 2026-03-15 00:40:12 -04:00
parent f890bc75dc
commit 0ca772166f
2 changed files with 183 additions and 15 deletions

View File

@ -55,6 +55,30 @@ function replaceLastOccurrence(
return `${value.slice(0, index)}${replacement}${value.slice(index + search.length)}`;
}
function findFirstOccurrenceBeforeFileBlocks(value: string, search: string): number {
if (!search) {
return -1;
}
const fileBlockIndex = value.search(FILE_BLOCK_RE);
const bodyRegion = fileBlockIndex >= 0 ? value.slice(0, fileBlockIndex) : value;
return bodyRegion.indexOf(search);
}
function replaceFirstOccurrenceBeforeFileBlocks(
value: string,
search: string,
replacement: string,
): string | undefined {
if (!search) {
return undefined;
}
const index = findFirstOccurrenceBeforeFileBlocks(value, search);
if (index < 0) {
return undefined;
}
return `${value.slice(0, index)}${replacement}${value.slice(index + search.length)}`;
}
function stripInlineDirectives(text: string | undefined): string {
return parseInlineDirectives(text ?? "").cleaned.trim();
}
@ -92,20 +116,29 @@ function rebuildQueuedPromptWithMediaUnderstanding(params: {
stripped = stripLeadingMediaReplyHint(stripped);
}
const replacementTargets = [
params.originalBody?.trim(),
stripInlineDirectives(params.originalBody),
MEDIA_ONLY_PLACEHOLDER,
].filter(
(value, index, list): value is string => Boolean(value) && list.indexOf(value) === index,
);
// Strip pre-existing file blocks from the body region when the updated body
// contains new file blocks. Mixed messages (audio + PDF) can arrive with
// file extraction already applied in the primary path; without this strip
// the old block stays in the prompt while the updated body adds a new one,
// duplicating potentially large file payloads.
// Scope stripping to the body segment so quoted/replied text and thread
// history above the body retain any legitimate <file> blocks.
// Scope stripping to the confirmed body segment so quoted/replied text,
// thread history above the body, and prompts whose original body no longer
// appears all retain any legitimate <file> blocks.
if (params.updatedBody && FILE_BLOCK_RE.test(params.updatedBody)) {
const bodyTarget = params.originalBody?.trim();
const bodyIdx = bodyTarget ? stripped.lastIndexOf(bodyTarget) : -1;
const bodyIdx =
replacementTargets
.map((target) => findFirstOccurrenceBeforeFileBlocks(stripped, target))
.find((index) => index >= 0) ?? -1;
if (bodyIdx >= 0) {
stripped = stripped.slice(0, bodyIdx) + stripExistingFileBlocks(stripped.slice(bodyIdx));
} else {
stripped = stripExistingFileBlocks(stripped);
}
}
@ -117,17 +150,9 @@ function rebuildQueuedPromptWithMediaUnderstanding(params: {
return [params.mediaNote?.trim(), stripped].filter(Boolean).join("\n").trim();
}
const replacementTargets = [
params.originalBody?.trim(),
stripInlineDirectives(params.originalBody),
MEDIA_ONLY_PLACEHOLDER,
].filter(
(value, index, list): value is string => Boolean(value) && list.indexOf(value) === index,
);
let rebuilt = stripped;
for (const target of replacementTargets) {
const replaced = replaceLastOccurrence(rebuilt, target, updatedBody);
const replaced = replaceFirstOccurrenceBeforeFileBlocks(rebuilt, target, updatedBody);
if (replaced !== undefined) {
rebuilt = replaced;
return [params.mediaNote?.trim(), rebuilt.trim()].filter(Boolean).join("\n").trim();

View File

@ -1882,6 +1882,149 @@ describe("createFollowupRunner media understanding", () => {
expect(agentCall?.prompt).not.toContain("old extracted content");
});
it("preserves unrelated file blocks when the original body is absent from the prompt", async () => {
const quotedFileBlock =
'<file name="thread.pdf" mime="application/pdf">\nquoted thread attachment\n</file>';
const existingFileBlock =
'<file name="report.pdf" mime="application/pdf">\nold extracted content\n</file>';
const newFileBlock =
'<file name="report.pdf" mime="application/pdf">\nnew extracted content\n</file>';
const transcriptText = "Transcript from deferred audio";
applyMediaUnderstandingMock.mockImplementationOnce(
async (params: { ctx: Record<string, unknown> }) => {
params.ctx.MediaUnderstanding = [
{
kind: "audio.transcription",
text: transcriptText,
attachmentIndex: 0,
provider: "whisper",
},
];
params.ctx.Transcript = transcriptText;
params.ctx.Body = `[Audio]\nTranscript:\n${transcriptText}\n\nsummarize this\n\n${newFileBlock}`;
return {
outputs: [
{
kind: "audio.transcription",
text: transcriptText,
attachmentIndex: 0,
provider: "whisper",
},
],
decisions: [],
appliedImage: false,
appliedAudio: true,
appliedVideo: false,
appliedFile: true,
};
},
);
runEmbeddedPiAgentMock.mockResolvedValueOnce({
payloads: [{ text: "processed" }],
meta: {},
});
const runner = createFollowupRunner({
opts: { onBlockReply: vi.fn(async () => {}) },
typing: createMockTypingController(),
typingMode: "instant",
defaultModel: "anthropic/claude-opus-4-5",
});
await runner(
createQueuedRun({
prompt: `[media attached 1/2: /tmp/voice.ogg]\n[media attached 2/2: /tmp/report.pdf]\n${MEDIA_REPLY_HINT}\nQuoted thread above\n\n${quotedFileBlock}`,
mediaContext: {
Body: `summarize this\n\n${existingFileBlock}`,
CommandBody: "summarize this",
RawBody: "summarize this",
MediaPaths: ["/tmp/voice.ogg", "/tmp/report.pdf"],
MediaTypes: ["audio/ogg", "application/pdf"],
},
}),
);
const agentCall = runEmbeddedPiAgentMock.mock.calls.at(-1)?.[0] as {
prompt?: string;
};
expect(agentCall?.prompt).toContain("Quoted thread above");
expect(agentCall?.prompt).toContain(quotedFileBlock);
expect(agentCall?.prompt).toContain(newFileBlock);
expect(agentCall?.prompt?.match(/<file\s+name="/g)).toHaveLength(2);
});
it("replaces the visible body before file blocks instead of matching file content", async () => {
const existingFileBlock =
'<file name="report.pdf" mime="application/pdf">\nsummary notes:\nsummarize this\n</file>';
const transcriptText = "Transcript from deferred audio";
applyMediaUnderstandingMock.mockImplementationOnce(
async (params: { ctx: Record<string, unknown> }) => {
params.ctx.MediaUnderstanding = [
{
kind: "audio.transcription",
text: transcriptText,
attachmentIndex: 0,
provider: "whisper",
},
];
params.ctx.Transcript = transcriptText;
params.ctx.Body = `[Audio]\nTranscript:\n${transcriptText}\n\nsummarize this`;
return {
outputs: [
{
kind: "audio.transcription",
text: transcriptText,
attachmentIndex: 0,
provider: "whisper",
},
],
decisions: [],
appliedImage: false,
appliedAudio: true,
appliedVideo: false,
appliedFile: false,
};
},
);
runEmbeddedPiAgentMock.mockResolvedValueOnce({
payloads: [{ text: "processed" }],
meta: {},
});
const runner = createFollowupRunner({
opts: { onBlockReply: vi.fn(async () => {}) },
typing: createMockTypingController(),
typingMode: "instant",
defaultModel: "anthropic/claude-opus-4-5",
});
await runner(
createQueuedRun({
prompt: `[media attached 1/2: /tmp/voice.ogg]\n[media attached 2/2: /tmp/report.pdf]\n${MEDIA_REPLY_HINT}\nsummarize this\n\n${existingFileBlock}`,
mediaContext: {
Body: `summarize this\n\n${existingFileBlock}`,
CommandBody: "summarize this",
RawBody: "summarize this",
MediaPaths: ["/tmp/voice.ogg", "/tmp/report.pdf"],
MediaTypes: ["audio/ogg", "application/pdf"],
},
}),
);
const agentCall = runEmbeddedPiAgentMock.mock.calls.at(-1)?.[0] as {
prompt?: string;
};
const transcriptBlock = `[Audio]\nTranscript:\n${transcriptText}\n\nsummarize this`;
expect(agentCall?.prompt).toContain(existingFileBlock);
expect(agentCall?.prompt).toContain(transcriptBlock);
expect(agentCall?.prompt?.indexOf(transcriptBlock)).toBeGreaterThan(-1);
expect(agentCall?.prompt?.indexOf(transcriptBlock)).toBeLessThan(
agentCall?.prompt?.indexOf(existingFileBlock) ?? -1,
);
});
it("sets DeferredMediaApplied when media understanding throws", async () => {
applyMediaUnderstandingMock.mockRejectedValueOnce(
new Error("transcription service unavailable"),