fix: trailing body match and RawBody-missing extraction detection (#46454)

This commit is contained in:
Joey Krug 2026-03-15 17:34:56 -04:00
parent 8f6d5a278f
commit eb59b9c19d
3 changed files with 272 additions and 66 deletions

View File

@ -1,3 +1,4 @@
import path from "node:path";
import { logVerbose } from "../../globals.js";
import { applyMediaUnderstanding } from "../../media-understanding/apply.js";
import {
@ -40,30 +41,6 @@ function stripLeadingMediaReplyHint(prompt: string): string {
return prompt.trim();
}
function replaceLastOccurrence(
value: string,
search: string,
replacement: string,
): string | undefined {
if (!search) {
return undefined;
}
const index = value.lastIndexOf(search);
if (index < 0) {
return undefined;
}
return `${value.slice(0, index)}${replacement}${value.slice(index + search.length)}`;
}
function findFirstOccurrenceBeforeFileBlocks(value: string, search: string): number {
if (!search) {
return -1;
}
const fileBlockIndex = value.search(FILE_BLOCK_RE);
const bodyRegion = fileBlockIndex >= 0 ? value.slice(0, fileBlockIndex) : value;
return bodyRegion.indexOf(search);
}
function findLastOccurrenceBeforeFileBlocks(value: string, search: string): number {
if (!search) {
return -1;
@ -98,21 +75,81 @@ function replaceLastOccurrenceBeforeFileBlocks(
return `${value.slice(0, index)}${replacement}${value.slice(index + search.length)}`;
}
function replaceFirstOccurrenceBeforeFileBlocks(
function findTrailingReplacementTargetBeforeFileBlocks(
value: string,
targets: string[],
): { index: number; target: string } | undefined {
let bestMatch: { index: number; target: string } | undefined;
for (const target of targets) {
const index = findLastOccurrenceBeforeFileBlocks(value, target);
if (index < 0) {
continue;
}
if (!bestMatch || index > bestMatch.index) {
bestMatch = { index, target };
}
}
return bestMatch;
}
function replaceOccurrenceAtIndex(
value: string,
search: string,
replacement: string,
): string | undefined {
if (!search) {
return undefined;
}
const index = findFirstOccurrenceBeforeFileBlocks(value, search);
if (index < 0) {
return undefined;
}
index: number,
): string {
return `${value.slice(0, index)}${replacement}${value.slice(index + search.length)}`;
}
function decodeXmlAttr(value: string): string {
return value
.replace(/&quot;/g, '"')
.replace(/&apos;/g, "'")
.replace(/&lt;/g, "<")
.replace(/&gt;/g, ">")
.replace(/&amp;/g, "&");
}
function extractAttachmentFileName(value?: string): string | undefined {
const trimmed = value?.trim();
if (!trimmed) {
return undefined;
}
if (/^[a-zA-Z][a-zA-Z\d+.-]*:/.test(trimmed)) {
try {
const pathname = new URL(trimmed).pathname;
const basename = path.posix.basename(pathname);
return basename || undefined;
} catch {
// Fall back to path-style parsing below.
}
}
const normalized = trimmed.replace(/\\/g, "/");
const basename = path.posix.basename(normalized);
return basename || undefined;
}
function bodyContainsMatchingFileBlock(mediaContext: FollowupMediaContext): boolean {
const body = mediaContext.Body?.trim();
if (!body || !FILE_BLOCK_RE.test(body)) {
return false;
}
const bodyFileNames = new Set<string>();
for (const match of body.matchAll(/<file\s+name="([^"]*)"[^>]*>/gi)) {
const fileName = match[1]?.trim();
if (fileName) {
bodyFileNames.add(decodeXmlAttr(fileName));
}
}
if (bodyFileNames.size === 0) {
return false;
}
return normalizeAttachments(mediaContext as MsgContext).some((attachment) => {
const fileName = extractAttachmentFileName(attachment.path ?? attachment.url);
return Boolean(fileName && bodyFileNames.has(fileName));
});
}
function stripInlineDirectives(text: string | undefined): string {
return parseInlineDirectives(text ?? "").cleaned.trim();
}
@ -168,12 +205,14 @@ function rebuildQueuedPromptWithMediaUnderstanding(params: {
// thread history above the body, and prompts whose original body no longer
// appears all retain any legitimate <file> blocks.
if (params.updatedBody && FILE_BLOCK_RE.test(params.updatedBody)) {
const bodyIdx =
replacementTargets
.map((target) => findFirstOccurrenceBeforeFileBlocks(stripped, target))
.find((index) => index >= 0) ?? -1;
if (bodyIdx >= 0) {
stripped = stripped.slice(0, bodyIdx) + stripExistingFileBlocks(stripped.slice(bodyIdx));
const trailingMatch = findTrailingReplacementTargetBeforeFileBlocks(
stripped,
replacementTargets,
);
if (trailingMatch) {
stripped =
stripped.slice(0, trailingMatch.index) +
stripExistingFileBlocks(stripped.slice(trailingMatch.index));
}
}
@ -186,12 +225,15 @@ function rebuildQueuedPromptWithMediaUnderstanding(params: {
}
let rebuilt = stripped;
for (const target of replacementTargets) {
const replaced = replaceFirstOccurrenceBeforeFileBlocks(rebuilt, target, updatedBody);
if (replaced !== undefined) {
rebuilt = replaced;
return [params.mediaNote?.trim(), rebuilt.trim()].filter(Boolean).join("\n").trim();
}
const trailingMatch = findTrailingReplacementTargetBeforeFileBlocks(rebuilt, replacementTargets);
if (trailingMatch) {
rebuilt = replaceOccurrenceAtIndex(
rebuilt,
trailingMatch.target,
updatedBody,
trailingMatch.index,
);
return [params.mediaNote?.trim(), rebuilt.trim()].filter(Boolean).join("\n").trim();
}
rebuilt = [rebuilt, updatedBody].filter(Boolean).join("\n\n");
@ -268,29 +310,29 @@ export async function applyDeferredMediaUnderstandingToQueuedRun(
if (!mediaContext || mediaContext.DeferredMediaApplied) {
return;
}
if (mediaContext.MediaUnderstanding?.length) {
mediaContext.DeferredMediaApplied = true;
return;
}
if (!hasMediaAttachments(mediaContext)) {
mediaContext.DeferredMediaApplied = true;
return;
}
const resolvedOriginalBody =
mediaContext.CommandBody ?? mediaContext.RawBody ?? mediaContext.Body;
// Detect file extraction from the primary path via body mutation instead of
// scanning for literal '<file name=' patterns (which false-positives on user
// text). Compare Body against RawBody (never mutated by the primary path's
// media/file processing) rather than CommandBody (which differs from Body
// when inline directives like /think were stripped).
const referenceBody = mediaContext.RawBody ?? mediaContext.Body;
if (
!mediaContext.DeferredFileBlocksExtracted &&
mediaContext.Body !== referenceBody &&
hasAnyFileAttachments(mediaContext)
) {
mediaContext.DeferredFileBlocksExtracted = true;
// Prefer RawBody-vs-Body comparison when RawBody exists. If RawBody is
// missing, fall back to explicit file-extraction signals instead of
// re-running extraction just because the clean pre-extraction body is gone.
if (!mediaContext.DeferredFileBlocksExtracted && hasAnyFileAttachments(mediaContext)) {
const rawBodyMissing = typeof mediaContext.RawBody !== "string";
if (mediaContext.Body !== referenceBody) {
mediaContext.DeferredFileBlocksExtracted = true;
} else if (
rawBodyMissing &&
(Boolean(mediaContext.MediaUnderstanding?.length) ||
bodyContainsMatchingFileBlock(mediaContext))
) {
mediaContext.DeferredFileBlocksExtracted = true;
}
}
if (mediaContext.MediaUnderstanding?.length) {
mediaContext.DeferredMediaApplied = true;
return;
}
if (mediaContext.DeferredFileBlocksExtracted && hasOnlyFileLikeAttachments(mediaContext)) {
@ -298,6 +340,9 @@ export async function applyDeferredMediaUnderstandingToQueuedRun(
return;
}
const resolvedOriginalBody =
mediaContext.CommandBody ?? mediaContext.RawBody ?? mediaContext.Body;
try {
const mediaCtx = {
...mediaContext,

View File

@ -2095,6 +2095,114 @@ describe("createFollowupRunner media understanding", () => {
};
expect(agentCall?.prompt?.match(/<file\s+name="report\.pdf"/g)).toHaveLength(1);
});
it("does not re-apply file extraction when RawBody is missing but Body already has a matching file block", async () => {
const fileBlock = '<file name="report.pdf" mime="application/pdf">\nreport content\n</file>';
runEmbeddedPiAgentMock.mockResolvedValueOnce({
payloads: [{ text: "processed" }],
meta: {},
});
const runner = createFollowupRunner({
opts: { onBlockReply: vi.fn(async () => {}) },
typing: createMockTypingController(),
typingMode: "instant",
defaultModel: "anthropic/claude-opus-4-5",
});
await runner(
createQueuedRun({
prompt: `[media attached: /tmp/report.pdf]\n${MEDIA_REPLY_HINT}\nsummarize this\n\n${fileBlock}`,
mediaContext: {
Body: `summarize this\n\n${fileBlock}`,
CommandBody: "summarize this",
MediaPaths: ["/tmp/report.pdf"],
MediaTypes: ["application/pdf"],
},
}),
);
expect(applyMediaUnderstandingMock).not.toHaveBeenCalled();
const agentCall = runEmbeddedPiAgentMock.mock.calls.at(-1)?.[0] as {
prompt?: string;
};
expect(agentCall?.prompt).toContain(fileBlock);
});
it("replaces the trailing repeated body segment instead of the first matching thread text", async () => {
const existingFileBlock =
'<file name="report.pdf" mime="application/pdf">\nold extracted content\n</file>';
const newFileBlock =
'<file name="report.pdf" mime="application/pdf">\nnew extracted content\n</file>';
const transcriptText = "Deferred transcript";
applyMediaUnderstandingMock.mockImplementationOnce(
async (params: { ctx: Record<string, unknown> }) => {
params.ctx.MediaUnderstanding = [
{
kind: "audio.transcription",
text: transcriptText,
attachmentIndex: 0,
provider: "whisper",
},
];
params.ctx.Transcript = transcriptText;
params.ctx.Body = `[Audio]\nTranscript:\n${transcriptText}\n\nsummarize this\n\n${newFileBlock}`;
return {
outputs: [
{
kind: "audio.transcription",
text: transcriptText,
attachmentIndex: 0,
provider: "whisper",
},
],
decisions: [],
appliedImage: false,
appliedAudio: true,
appliedVideo: false,
appliedFile: true,
};
},
);
runEmbeddedPiAgentMock.mockResolvedValueOnce({
payloads: [{ text: "processed" }],
meta: {},
});
const runner = createFollowupRunner({
opts: { onBlockReply: vi.fn(async () => {}) },
typing: createMockTypingController(),
typingMode: "instant",
defaultModel: "anthropic/claude-opus-4-5",
});
await runner(
createQueuedRun({
prompt:
`[media attached 1/2: /tmp/voice.ogg]\n[media attached 2/2: /tmp/report.pdf]\n${MEDIA_REPLY_HINT}\nThread history: summarize this\n\n` +
`summarize this\n\n${existingFileBlock}`,
mediaContext: {
Body: `summarize this\n\n${existingFileBlock}`,
CommandBody: "summarize this",
RawBody: "summarize this",
MediaPaths: ["/tmp/voice.ogg", "/tmp/report.pdf"],
MediaTypes: ["audio/ogg", "application/pdf"],
},
}),
);
const agentCall = runEmbeddedPiAgentMock.mock.calls.at(-1)?.[0] as {
prompt?: string;
};
expect(agentCall?.prompt).toContain("Thread history: summarize this");
expect(agentCall?.prompt).toContain(transcriptText);
expect(agentCall?.prompt).toContain(newFileBlock);
expect(agentCall?.prompt).not.toContain("old extracted content");
expect(agentCall?.prompt?.indexOf(newFileBlock)).toBeGreaterThan(
agentCall?.prompt?.lastIndexOf("summarize this") ?? -1,
);
});
});
describe("followup queue drain deferred media understanding", () => {
@ -2163,6 +2271,56 @@ describe("followup queue drain deferred media understanding", () => {
expect(prompt).not.toContain("[media attached: /tmp/voice.ogg");
});
it("preprocesses queued runs in parallel", async () => {
const resolvers: Array<() => void> = [];
const done = () => ({
outputs: [],
decisions: [],
appliedImage: false,
appliedAudio: false,
appliedVideo: false,
appliedFile: false,
});
applyMediaUnderstandingMock.mockImplementation(
async () =>
await new Promise((resolve) => {
resolvers.push(() => resolve(done()));
}),
);
const items: FollowupRun[] = [
createQueuedRun({
prompt: "[media attached: /tmp/voice-1.ogg (audio/ogg)]\nfirst text",
summaryLine: "first text",
run: { messageProvider: "telegram" },
mediaContext: {
Body: "first text",
MediaPaths: ["/tmp/voice-1.ogg"],
MediaTypes: ["audio/ogg"],
},
}),
createQueuedRun({
prompt: "[media attached: /tmp/voice-2.ogg (audio/ogg)]\nsecond text",
summaryLine: "second text",
run: { messageProvider: "telegram" },
mediaContext: {
Body: "second text",
MediaPaths: ["/tmp/voice-2.ogg"],
MediaTypes: ["audio/ogg"],
},
}),
];
const pending = applyDeferredMediaToQueuedRuns(items);
expect(applyMediaUnderstandingMock).toHaveBeenCalledTimes(2);
for (const resolve of resolvers) {
resolve();
}
await pending;
});
it("preprocesses dropped media items before building overflow summaries", async () => {
applyMediaUnderstandingMock.mockImplementationOnce(
async (params: { ctx: Record<string, unknown> }) => {

View File

@ -76,9 +76,12 @@ function clearFollowupQueueSummaryState(queue: FollowupQueueState): void {
}
export async function applyDeferredMediaToQueuedRuns(items: FollowupRun[]): Promise<void> {
for (const item of items) {
await applyDeferredMediaUnderstandingToQueuedRun(item, { logLabel: "followup queue" });
}
await Promise.allSettled(
items.map(
async (item) =>
await applyDeferredMediaUnderstandingToQueuedRun(item, { logLabel: "followup queue" }),
),
);
}
async function resolveSummaryLines(items: FollowupRun[]): Promise<string[]> {