fix: use file-block-safe replacement in normalizeUpdatedBody and trailing fallback (#46454)

This commit is contained in:
Joey Krug 2026-03-15 15:51:14 -04:00
parent dc8cdfce5c
commit 8f6d5a278f
2 changed files with 150 additions and 1 deletions

View File

@ -0,0 +1,107 @@
import { describe, expect, it } from "vitest";
import {
_findLastOccurrenceBeforeFileBlocks as findLastOccurrenceBeforeFileBlocks,
_normalizeUpdatedBody as normalizeUpdatedBody,
_rebuildQueuedPromptWithMediaUnderstanding as rebuildQueuedPromptWithMediaUnderstanding,
} from "./followup-media.js";
const FILE_BLOCK = '<file name="doc.pdf" type="application/pdf">\nPDF content\n</file>';
describe("findLastOccurrenceBeforeFileBlocks", () => {
it("returns -1 for empty search", () => {
expect(findLastOccurrenceBeforeFileBlocks("hello", "")).toBe(-1);
});
it("finds last occurrence in body region before file blocks", () => {
const value = `hello world hello\n${FILE_BLOCK}`;
// "hello" appears at 0 and 12 — both before the file block
expect(findLastOccurrenceBeforeFileBlocks(value, "hello")).toBe(12);
});
it("does not match inside file block content", () => {
const value = `some text\n${FILE_BLOCK}\nPDF content`;
// "PDF content" appears in the file block and after it, but the body region
// (before <file) is just "some text\n" — no match there.
expect(findLastOccurrenceBeforeFileBlocks(value, "PDF content")).toBe(-1);
});
it("uses lastIndexOf in fallback when search itself contains file blocks", () => {
// When the search string contains a <file> block, it can't appear in the
// body-only region, so the fallback searches the full value.
const bodyWithFile = `caption\n${FILE_BLOCK}`;
const value = `previous\n${bodyWithFile}\nlater\n${bodyWithFile}`;
// Should find the *last* (trailing) occurrence
const expected = value.lastIndexOf(bodyWithFile);
expect(findLastOccurrenceBeforeFileBlocks(value, bodyWithFile)).toBe(expected);
expect(expected).toBeGreaterThan(value.indexOf(bodyWithFile));
});
it("returns index when no file blocks exist in value", () => {
expect(findLastOccurrenceBeforeFileBlocks("abc abc", "abc")).toBe(4);
});
});
describe("normalizeUpdatedBody", () => {
it("returns empty string when updatedBody is empty", () => {
expect(normalizeUpdatedBody({ originalBody: "foo", updatedBody: "" })).toBe("");
});
it("returns updatedBody when originalBody is empty", () => {
expect(normalizeUpdatedBody({ updatedBody: "hello" })).toBe("hello");
});
it("strips directives when updatedBody equals originalBody", () => {
const body = "/think high tell me a joke";
const result = normalizeUpdatedBody({ originalBody: body, updatedBody: body });
expect(result).toBe("tell me a joke");
});
it("does not corrupt file block content during directive cleanup", () => {
const originalBody = "/think high tell me about this file";
// updatedBody has the original body plus a file block appended by media processing
const updatedBody = `${originalBody}\n${FILE_BLOCK}`;
const result = normalizeUpdatedBody({ originalBody, updatedBody });
// The directive should be stripped from the body portion, file block preserved
expect(result).toContain("tell me about this file");
expect(result).toContain(FILE_BLOCK);
expect(result).not.toContain("/think");
});
it("replaces in body region, not inside file blocks", () => {
const originalBody = "PDF content";
const updatedBody = `PDF content\n<file name="doc.pdf" type="application/pdf">\nPDF content\n</file>`;
// The replacement should target the body region "PDF content" before the
// file block, not the "PDF content" inside the <file> block.
const result = normalizeUpdatedBody({ originalBody, updatedBody });
// With no directives to strip, original === cleaned, updatedBody !== originalBody
// because updatedBody has the file block appended. The replacement targets the
// body-region occurrence.
expect(result).toContain('<file name="doc.pdf"');
expect(result).toContain("PDF content\n</file>");
});
});
describe("rebuildQueuedPromptWithMediaUnderstanding", () => {
it("replaces original body with updated body in prompt", () => {
const result = rebuildQueuedPromptWithMediaUnderstanding({
prompt: "thread context\nhello world",
originalBody: "hello world",
updatedBody: 'hello world\n<file name="a.pdf">data</file>',
});
expect(result).toContain('<file name="a.pdf">data</file>');
expect(result).toContain("thread context");
});
it("preserves file blocks in thread history when body is replaced", () => {
const prompt = `history\n<file name="old.pdf">old</file>\nhello world`;
const result = rebuildQueuedPromptWithMediaUnderstanding({
prompt,
originalBody: "hello world",
updatedBody: "hello world transcribed",
});
// The old file block from history should be preserved since updatedBody
// has no file blocks of its own.
expect(result).toContain('<file name="old.pdf">old</file>');
expect(result).toContain("hello world transcribed");
});
});

View File

@ -64,6 +64,40 @@ function findFirstOccurrenceBeforeFileBlocks(value: string, search: string): num
return bodyRegion.indexOf(search);
}
function findLastOccurrenceBeforeFileBlocks(value: string, search: string): number {
if (!search) {
return -1;
}
const fileBlockIndex = value.search(FILE_BLOCK_RE);
const bodyRegion = fileBlockIndex >= 0 ? value.slice(0, fileBlockIndex) : value;
const index = bodyRegion.lastIndexOf(search);
if (index >= 0) {
return index;
}
// Fallback: search string itself contains file blocks — it can't appear in the
// body-only region. Search the full value with lastIndexOf to pick the trailing
// (most recent) occurrence when thread/history has identical <file> bodies.
if (FILE_BLOCK_RE.test(search)) {
return value.lastIndexOf(search);
}
return -1;
}
function replaceLastOccurrenceBeforeFileBlocks(
value: string,
search: string,
replacement: string,
): string | undefined {
if (!search) {
return undefined;
}
const index = findLastOccurrenceBeforeFileBlocks(value, search);
if (index < 0) {
return undefined;
}
return `${value.slice(0, index)}${replacement}${value.slice(index + search.length)}`;
}
function replaceFirstOccurrenceBeforeFileBlocks(
value: string,
search: string,
@ -101,7 +135,8 @@ function normalizeUpdatedBody(params: { originalBody?: string; updatedBody?: str
return cleanedOriginalBody;
}
return (
replaceLastOccurrence(updatedBody, originalBody, cleanedOriginalBody) ?? updatedBody
replaceLastOccurrenceBeforeFileBlocks(updatedBody, originalBody, cleanedOriginalBody) ??
updatedBody
).trim();
}
@ -215,6 +250,13 @@ function snapshotUpdatedMediaContext(params: {
};
}
// Exported for unit testing — these are pure string helpers with no side effects.
export {
findLastOccurrenceBeforeFileBlocks as _findLastOccurrenceBeforeFileBlocks,
normalizeUpdatedBody as _normalizeUpdatedBody,
rebuildQueuedPromptWithMediaUnderstanding as _rebuildQueuedPromptWithMediaUnderstanding,
};
export async function applyDeferredMediaUnderstandingToQueuedRun(
queued: FollowupRun,
params: { logLabel?: string } = {},