fix: set DeferredMediaApplied on error and strip old file blocks on rebuild
This commit is contained in:
parent
7973b2cc5b
commit
f1e023c3de
@ -13,6 +13,11 @@ const MEDIA_ONLY_PLACEHOLDER = "[User sent media without caption]";
|
||||
const MEDIA_REPLY_HINT_PREFIX = "To send an image back, prefer the message tool";
|
||||
const LEADING_MEDIA_ATTACHED_LINE_RE = /^\[media attached(?: \d+\/\d+)?: [^\r\n]*\]$/;
|
||||
const FILE_BLOCK_RE = /<file\s+name="/i;
|
||||
const FILE_BLOCK_FULL_RE = /<file\s+name="[^"]*"[^>]*>[\s\S]*?<\/file>\n?/gi;
|
||||
|
||||
function stripExistingFileBlocks(text: string): string {
|
||||
return text.replace(FILE_BLOCK_FULL_RE, "").trim();
|
||||
}
|
||||
|
||||
function stripLeadingMediaAttachedLines(prompt: string): string {
|
||||
const lines = prompt.split("\n");
|
||||
@ -87,6 +92,15 @@ function rebuildQueuedPromptWithMediaUnderstanding(params: {
|
||||
stripped = stripLeadingMediaReplyHint(stripped);
|
||||
}
|
||||
|
||||
// Strip pre-existing file blocks from the prompt when the updated body
|
||||
// contains new file blocks. Mixed messages (audio + PDF) can arrive with
|
||||
// file extraction already applied in the primary path; without this strip
|
||||
// the old block stays in the prompt while the updated body adds a new one,
|
||||
// duplicating potentially large file payloads.
|
||||
if (params.updatedBody && FILE_BLOCK_RE.test(params.updatedBody)) {
|
||||
stripped = stripExistingFileBlocks(stripped);
|
||||
}
|
||||
|
||||
const updatedBody = normalizeUpdatedBody({
|
||||
originalBody: params.originalBody,
|
||||
updatedBody: params.updatedBody,
|
||||
@ -234,6 +248,7 @@ export async function applyDeferredMediaUnderstandingToQueuedRun(
|
||||
updatedBody: shouldRebuildPrompt ? mediaCtx.Body : undefined,
|
||||
});
|
||||
} catch (err) {
|
||||
mediaContext.DeferredMediaApplied = true;
|
||||
logVerbose(
|
||||
`${params.logLabel ?? "followup"}: media understanding failed, proceeding with raw content: ${err instanceof Error ? err.message : String(err)}`,
|
||||
);
|
||||
|
||||
@ -1808,6 +1808,117 @@ describe("createFollowupRunner media understanding", () => {
|
||||
expect(matches?.length).toBe(1);
|
||||
});
|
||||
|
||||
it("does not duplicate file blocks for mixed audio+file messages re-processed in followup", async () => {
|
||||
const existingFileBlock =
|
||||
'<file name="report.pdf" mime="application/pdf">\nold extracted content\n</file>';
|
||||
const newFileBlock =
|
||||
'<file name="report.pdf" mime="application/pdf">\nnew extracted content\n</file>';
|
||||
const transcriptText = "Mixed message transcript";
|
||||
|
||||
applyMediaUnderstandingMock.mockImplementationOnce(
|
||||
async (params: { ctx: Record<string, unknown> }) => {
|
||||
params.ctx.MediaUnderstanding = [
|
||||
{
|
||||
kind: "audio.transcription",
|
||||
text: transcriptText,
|
||||
attachmentIndex: 0,
|
||||
provider: "whisper",
|
||||
},
|
||||
];
|
||||
params.ctx.Transcript = transcriptText;
|
||||
params.ctx.Body = `[Audio]\nTranscript:\n${transcriptText}\n\nanalyze this\n\n${newFileBlock}`;
|
||||
return {
|
||||
outputs: [
|
||||
{
|
||||
kind: "audio.transcription",
|
||||
text: transcriptText,
|
||||
attachmentIndex: 0,
|
||||
provider: "whisper",
|
||||
},
|
||||
],
|
||||
decisions: [],
|
||||
appliedImage: false,
|
||||
appliedAudio: true,
|
||||
appliedVideo: false,
|
||||
appliedFile: true,
|
||||
};
|
||||
},
|
||||
);
|
||||
runEmbeddedPiAgentMock.mockResolvedValueOnce({
|
||||
payloads: [{ text: "processed" }],
|
||||
meta: {},
|
||||
});
|
||||
|
||||
const runner = createFollowupRunner({
|
||||
opts: { onBlockReply: vi.fn(async () => {}) },
|
||||
typing: createMockTypingController(),
|
||||
typingMode: "instant",
|
||||
defaultModel: "anthropic/claude-opus-4-5",
|
||||
});
|
||||
|
||||
// Simulate a mixed message where the primary path already extracted the
|
||||
// PDF (file block is in the prompt) but audio transcription failed.
|
||||
await runner(
|
||||
createQueuedRun({
|
||||
prompt: `[media attached 1/2: /tmp/voice.ogg]\n[media attached 2/2: /tmp/report.pdf]\n${MEDIA_REPLY_HINT}\nanalyze this\n\n${existingFileBlock}`,
|
||||
mediaContext: {
|
||||
Body: `analyze this\n\n${existingFileBlock}`,
|
||||
CommandBody: "analyze this",
|
||||
RawBody: "analyze this",
|
||||
MediaPaths: ["/tmp/voice.ogg", "/tmp/report.pdf"],
|
||||
MediaTypes: ["audio/ogg", "application/pdf"],
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const agentCall = runEmbeddedPiAgentMock.mock.calls.at(-1)?.[0] as {
|
||||
prompt?: string;
|
||||
};
|
||||
// Should contain the transcript
|
||||
expect(agentCall?.prompt).toContain(transcriptText);
|
||||
// Should have exactly one file block (the new one), not two
|
||||
expect(agentCall?.prompt?.match(/<file\s+name="report\.pdf"/g)).toHaveLength(1);
|
||||
expect(agentCall?.prompt).toContain("new extracted content");
|
||||
expect(agentCall?.prompt).not.toContain("old extracted content");
|
||||
});
|
||||
|
||||
it("sets DeferredMediaApplied when media understanding throws", async () => {
|
||||
applyMediaUnderstandingMock.mockRejectedValueOnce(
|
||||
new Error("transcription service unavailable"),
|
||||
);
|
||||
runEmbeddedPiAgentMock.mockResolvedValueOnce({
|
||||
payloads: [{ text: "fallback reply" }],
|
||||
meta: {},
|
||||
});
|
||||
|
||||
const runner = createFollowupRunner({
|
||||
opts: { onBlockReply: vi.fn(async () => {}) },
|
||||
typing: createMockTypingController(),
|
||||
typingMode: "instant",
|
||||
defaultModel: "anthropic/claude-opus-4-5",
|
||||
});
|
||||
|
||||
const queued = createQueuedRun({
|
||||
prompt: "[media attached: /tmp/voice.ogg (audio/ogg)]\nsome text",
|
||||
mediaContext: {
|
||||
Body: "some text",
|
||||
MediaPaths: ["/tmp/voice.ogg"],
|
||||
MediaTypes: ["audio/ogg"],
|
||||
},
|
||||
});
|
||||
|
||||
await runner(queued);
|
||||
|
||||
// DeferredMediaApplied should be set so re-runs don't retry
|
||||
expect(queued.mediaContext?.DeferredMediaApplied).toBe(true);
|
||||
|
||||
// The agent should still be called with the raw prompt
|
||||
const agentCall = runEmbeddedPiAgentMock.mock.calls.at(-1)?.[0] as {
|
||||
prompt?: string;
|
||||
};
|
||||
expect(agentCall?.prompt).toContain("some text");
|
||||
});
|
||||
|
||||
it("does not re-apply file extraction when the stored media body already has a file block", async () => {
|
||||
const fileBlock = '<file name="report.pdf" mime="application/pdf">\nreport content\n</file>';
|
||||
runEmbeddedPiAgentMock.mockResolvedValueOnce({
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user