diff --git a/src/auto-reply/reply/followup-media.ts b/src/auto-reply/reply/followup-media.ts
index 5a014e63f9b..5340d0df99a 100644
--- a/src/auto-reply/reply/followup-media.ts
+++ b/src/auto-reply/reply/followup-media.ts
@@ -13,6 +13,11 @@ const MEDIA_ONLY_PLACEHOLDER = "[User sent media without caption]";
 const MEDIA_REPLY_HINT_PREFIX = "To send an image back, prefer the message tool";
 const LEADING_MEDIA_ATTACHED_LINE_RE = /^\[media attached(?: \d+\/\d+)?: [^\r\n]*\]$/;
 const FILE_BLOCK_RE = /<file\s+name="/i;
+const FILE_BLOCK_FULL_RE = /<file\s+name="[^"]*"[^>]*>[\s\S]*?<\/file>\n?/gi;
+
+function stripExistingFileBlocks(text: string): string {
+  return text.replace(FILE_BLOCK_FULL_RE, "").trim();
+}
 
 function stripLeadingMediaAttachedLines(prompt: string): string {
   const lines = prompt.split("\n");
@@ -87,6 +92,15 @@ function rebuildQueuedPromptWithMediaUnderstanding(params: {
     stripped = stripLeadingMediaReplyHint(stripped);
   }
 
+  // Strip pre-existing file blocks from the prompt when the updated body
+  // contains new file blocks.  Mixed messages (audio + PDF) can arrive with
+  // file extraction already applied in the primary path; without this strip
+  // the old block stays in the prompt while the updated body adds a new one,
+  // duplicating potentially large file payloads.
+  if (params.updatedBody && FILE_BLOCK_RE.test(params.updatedBody)) {
+    stripped = stripExistingFileBlocks(stripped);
+  }
+
   const updatedBody = normalizeUpdatedBody({
     originalBody: params.originalBody,
     updatedBody: params.updatedBody,
@@ -234,6 +248,7 @@ export async function applyDeferredMediaUnderstandingToQueuedRun(
       updatedBody: shouldRebuildPrompt ? mediaCtx.Body : undefined,
     });
   } catch (err) {
+    mediaContext.DeferredMediaApplied = true;
     logVerbose(
       `${params.logLabel ?? "followup"}: media understanding failed, proceeding with raw content: ${err instanceof Error ? err.message : String(err)}`,
     );
diff --git a/src/auto-reply/reply/followup-runner.test.ts b/src/auto-reply/reply/followup-runner.test.ts
index c3e75e6b856..cd1951aa748 100644
--- a/src/auto-reply/reply/followup-runner.test.ts
+++ b/src/auto-reply/reply/followup-runner.test.ts
@@ -1808,6 +1808,117 @@ describe("createFollowupRunner media understanding", () => {
     expect(matches?.length).toBe(1);
   });
 
+  it("does not duplicate file blocks for mixed audio+file messages re-processed in followup", async () => {
+    const existingFileBlock =
+      '<file name="report.pdf" mime="application/pdf">\nold extracted content\n</file>';
+    const newFileBlock =
+      '<file name="report.pdf" mime="application/pdf">\nnew extracted content\n</file>';
+    const transcriptText = "Mixed message transcript";
+
+    applyMediaUnderstandingMock.mockImplementationOnce(
+      async (params: { ctx: Record<string, unknown> }) => {
+        params.ctx.MediaUnderstanding = [
+          {
+            kind: "audio.transcription",
+            text: transcriptText,
+            attachmentIndex: 0,
+            provider: "whisper",
+          },
+        ];
+        params.ctx.Transcript = transcriptText;
+        params.ctx.Body = `[Audio]\nTranscript:\n${transcriptText}\n\nanalyze this\n\n${newFileBlock}`;
+        return {
+          outputs: [
+            {
+              kind: "audio.transcription",
+              text: transcriptText,
+              attachmentIndex: 0,
+              provider: "whisper",
+            },
+          ],
+          decisions: [],
+          appliedImage: false,
+          appliedAudio: true,
+          appliedVideo: false,
+          appliedFile: true,
+        };
+      },
+    );
+    runEmbeddedPiAgentMock.mockResolvedValueOnce({
+      payloads: [{ text: "processed" }],
+      meta: {},
+    });
+
+    const runner = createFollowupRunner({
+      opts: { onBlockReply: vi.fn(async () => {}) },
+      typing: createMockTypingController(),
+      typingMode: "instant",
+      defaultModel: "anthropic/claude-opus-4-5",
+    });
+
+    // Simulate a mixed message where the primary path already extracted the
+    // PDF (file block is in the prompt) but audio transcription failed.
+    await runner(
+      createQueuedRun({
+        prompt: `[media attached 1/2: /tmp/voice.ogg]\n[media attached 2/2: /tmp/report.pdf]\n${MEDIA_REPLY_HINT}\nanalyze this\n\n${existingFileBlock}`,
+        mediaContext: {
+          Body: `analyze this\n\n${existingFileBlock}`,
+          CommandBody: "analyze this",
+          RawBody: "analyze this",
+          MediaPaths: ["/tmp/voice.ogg", "/tmp/report.pdf"],
+          MediaTypes: ["audio/ogg", "application/pdf"],
+        },
+      }),
+    );
+
+    const agentCall = runEmbeddedPiAgentMock.mock.calls.at(-1)?.[0] as {
+      prompt?: string;
+    };
+    // Should contain the transcript
+    expect(agentCall?.prompt).toContain(transcriptText);
+    // Should have exactly one file block (the new one), not two
+    expect(agentCall?.prompt?.match(/<file\s+name="report\.pdf"/g)).toHaveLength(1);
+    expect(agentCall?.prompt).toContain("new extracted content");
+    expect(agentCall?.prompt).not.toContain("old extracted content");
+  });
+
+  it("sets DeferredMediaApplied when media understanding throws", async () => {
+    applyMediaUnderstandingMock.mockRejectedValueOnce(
+      new Error("transcription service unavailable"),
+    );
+    runEmbeddedPiAgentMock.mockResolvedValueOnce({
+      payloads: [{ text: "fallback reply" }],
+      meta: {},
+    });
+
+    const runner = createFollowupRunner({
+      opts: { onBlockReply: vi.fn(async () => {}) },
+      typing: createMockTypingController(),
+      typingMode: "instant",
+      defaultModel: "anthropic/claude-opus-4-5",
+    });
+
+    const queued = createQueuedRun({
+      prompt: "[media attached: /tmp/voice.ogg (audio/ogg)]\nsome text",
+      mediaContext: {
+        Body: "some text",
+        MediaPaths: ["/tmp/voice.ogg"],
+        MediaTypes: ["audio/ogg"],
+      },
+    });
+
+    await runner(queued);
+
+    // DeferredMediaApplied should be set so re-runs don't retry
+    expect(queued.mediaContext?.DeferredMediaApplied).toBe(true);
+
+    // The agent should still be called with the raw prompt
+    const agentCall = runEmbeddedPiAgentMock.mock.calls.at(-1)?.[0] as {
+      prompt?: string;
+    };
+    expect(agentCall?.prompt).toContain("some text");
+  });
+
   it("does not re-apply file extraction when the stored media body already has a file block", async () => {
     const fileBlock = '<file name="report.pdf" mime="application/pdf">\nreport content\n</file>';
     runEmbeddedPiAgentMock.mockResolvedValueOnce({