fix: trailing body match and RawBody-missing extraction detection (#46454)
This commit is contained in:
parent
8f6d5a278f
commit
eb59b9c19d
@ -1,3 +1,4 @@
|
||||
import path from "node:path";
|
||||
import { logVerbose } from "../../globals.js";
|
||||
import { applyMediaUnderstanding } from "../../media-understanding/apply.js";
|
||||
import {
|
||||
@ -40,30 +41,6 @@ function stripLeadingMediaReplyHint(prompt: string): string {
|
||||
return prompt.trim();
|
||||
}
|
||||
|
||||
function replaceLastOccurrence(
|
||||
value: string,
|
||||
search: string,
|
||||
replacement: string,
|
||||
): string | undefined {
|
||||
if (!search) {
|
||||
return undefined;
|
||||
}
|
||||
const index = value.lastIndexOf(search);
|
||||
if (index < 0) {
|
||||
return undefined;
|
||||
}
|
||||
return `${value.slice(0, index)}${replacement}${value.slice(index + search.length)}`;
|
||||
}
|
||||
|
||||
function findFirstOccurrenceBeforeFileBlocks(value: string, search: string): number {
|
||||
if (!search) {
|
||||
return -1;
|
||||
}
|
||||
const fileBlockIndex = value.search(FILE_BLOCK_RE);
|
||||
const bodyRegion = fileBlockIndex >= 0 ? value.slice(0, fileBlockIndex) : value;
|
||||
return bodyRegion.indexOf(search);
|
||||
}
|
||||
|
||||
function findLastOccurrenceBeforeFileBlocks(value: string, search: string): number {
|
||||
if (!search) {
|
||||
return -1;
|
||||
@ -98,21 +75,81 @@ function replaceLastOccurrenceBeforeFileBlocks(
|
||||
return `${value.slice(0, index)}${replacement}${value.slice(index + search.length)}`;
|
||||
}
|
||||
|
||||
function replaceFirstOccurrenceBeforeFileBlocks(
|
||||
function findTrailingReplacementTargetBeforeFileBlocks(
|
||||
value: string,
|
||||
targets: string[],
|
||||
): { index: number; target: string } | undefined {
|
||||
let bestMatch: { index: number; target: string } | undefined;
|
||||
for (const target of targets) {
|
||||
const index = findLastOccurrenceBeforeFileBlocks(value, target);
|
||||
if (index < 0) {
|
||||
continue;
|
||||
}
|
||||
if (!bestMatch || index > bestMatch.index) {
|
||||
bestMatch = { index, target };
|
||||
}
|
||||
}
|
||||
return bestMatch;
|
||||
}
|
||||
|
||||
function replaceOccurrenceAtIndex(
|
||||
value: string,
|
||||
search: string,
|
||||
replacement: string,
|
||||
): string | undefined {
|
||||
if (!search) {
|
||||
return undefined;
|
||||
}
|
||||
const index = findFirstOccurrenceBeforeFileBlocks(value, search);
|
||||
if (index < 0) {
|
||||
return undefined;
|
||||
}
|
||||
index: number,
|
||||
): string {
|
||||
return `${value.slice(0, index)}${replacement}${value.slice(index + search.length)}`;
|
||||
}
|
||||
|
||||
function decodeXmlAttr(value: string): string {
|
||||
return value
|
||||
.replace(/"/g, '"')
|
||||
.replace(/'/g, "'")
|
||||
.replace(/</g, "<")
|
||||
.replace(/>/g, ">")
|
||||
.replace(/&/g, "&");
|
||||
}
|
||||
|
||||
function extractAttachmentFileName(value?: string): string | undefined {
|
||||
const trimmed = value?.trim();
|
||||
if (!trimmed) {
|
||||
return undefined;
|
||||
}
|
||||
if (/^[a-zA-Z][a-zA-Z\d+.-]*:/.test(trimmed)) {
|
||||
try {
|
||||
const pathname = new URL(trimmed).pathname;
|
||||
const basename = path.posix.basename(pathname);
|
||||
return basename || undefined;
|
||||
} catch {
|
||||
// Fall back to path-style parsing below.
|
||||
}
|
||||
}
|
||||
const normalized = trimmed.replace(/\\/g, "/");
|
||||
const basename = path.posix.basename(normalized);
|
||||
return basename || undefined;
|
||||
}
|
||||
|
||||
function bodyContainsMatchingFileBlock(mediaContext: FollowupMediaContext): boolean {
|
||||
const body = mediaContext.Body?.trim();
|
||||
if (!body || !FILE_BLOCK_RE.test(body)) {
|
||||
return false;
|
||||
}
|
||||
const bodyFileNames = new Set<string>();
|
||||
for (const match of body.matchAll(/<file\s+name="([^"]*)"[^>]*>/gi)) {
|
||||
const fileName = match[1]?.trim();
|
||||
if (fileName) {
|
||||
bodyFileNames.add(decodeXmlAttr(fileName));
|
||||
}
|
||||
}
|
||||
if (bodyFileNames.size === 0) {
|
||||
return false;
|
||||
}
|
||||
return normalizeAttachments(mediaContext as MsgContext).some((attachment) => {
|
||||
const fileName = extractAttachmentFileName(attachment.path ?? attachment.url);
|
||||
return Boolean(fileName && bodyFileNames.has(fileName));
|
||||
});
|
||||
}
|
||||
|
||||
function stripInlineDirectives(text: string | undefined): string {
|
||||
return parseInlineDirectives(text ?? "").cleaned.trim();
|
||||
}
|
||||
@ -168,12 +205,14 @@ function rebuildQueuedPromptWithMediaUnderstanding(params: {
|
||||
// thread history above the body, and prompts whose original body no longer
|
||||
// appears all retain any legitimate <file> blocks.
|
||||
if (params.updatedBody && FILE_BLOCK_RE.test(params.updatedBody)) {
|
||||
const bodyIdx =
|
||||
replacementTargets
|
||||
.map((target) => findFirstOccurrenceBeforeFileBlocks(stripped, target))
|
||||
.find((index) => index >= 0) ?? -1;
|
||||
if (bodyIdx >= 0) {
|
||||
stripped = stripped.slice(0, bodyIdx) + stripExistingFileBlocks(stripped.slice(bodyIdx));
|
||||
const trailingMatch = findTrailingReplacementTargetBeforeFileBlocks(
|
||||
stripped,
|
||||
replacementTargets,
|
||||
);
|
||||
if (trailingMatch) {
|
||||
stripped =
|
||||
stripped.slice(0, trailingMatch.index) +
|
||||
stripExistingFileBlocks(stripped.slice(trailingMatch.index));
|
||||
}
|
||||
}
|
||||
|
||||
@ -186,12 +225,15 @@ function rebuildQueuedPromptWithMediaUnderstanding(params: {
|
||||
}
|
||||
|
||||
let rebuilt = stripped;
|
||||
for (const target of replacementTargets) {
|
||||
const replaced = replaceFirstOccurrenceBeforeFileBlocks(rebuilt, target, updatedBody);
|
||||
if (replaced !== undefined) {
|
||||
rebuilt = replaced;
|
||||
return [params.mediaNote?.trim(), rebuilt.trim()].filter(Boolean).join("\n").trim();
|
||||
}
|
||||
const trailingMatch = findTrailingReplacementTargetBeforeFileBlocks(rebuilt, replacementTargets);
|
||||
if (trailingMatch) {
|
||||
rebuilt = replaceOccurrenceAtIndex(
|
||||
rebuilt,
|
||||
trailingMatch.target,
|
||||
updatedBody,
|
||||
trailingMatch.index,
|
||||
);
|
||||
return [params.mediaNote?.trim(), rebuilt.trim()].filter(Boolean).join("\n").trim();
|
||||
}
|
||||
|
||||
rebuilt = [rebuilt, updatedBody].filter(Boolean).join("\n\n");
|
||||
@ -268,29 +310,29 @@ export async function applyDeferredMediaUnderstandingToQueuedRun(
|
||||
if (!mediaContext || mediaContext.DeferredMediaApplied) {
|
||||
return;
|
||||
}
|
||||
if (mediaContext.MediaUnderstanding?.length) {
|
||||
mediaContext.DeferredMediaApplied = true;
|
||||
return;
|
||||
}
|
||||
if (!hasMediaAttachments(mediaContext)) {
|
||||
mediaContext.DeferredMediaApplied = true;
|
||||
return;
|
||||
}
|
||||
|
||||
const resolvedOriginalBody =
|
||||
mediaContext.CommandBody ?? mediaContext.RawBody ?? mediaContext.Body;
|
||||
// Detect file extraction from the primary path via body mutation instead of
|
||||
// scanning for literal '<file name=' patterns (which false-positives on user
|
||||
// text). Compare Body against RawBody (never mutated by the primary path's
|
||||
// media/file processing) rather than CommandBody (which differs from Body
|
||||
// when inline directives like /think were stripped).
|
||||
const referenceBody = mediaContext.RawBody ?? mediaContext.Body;
|
||||
if (
|
||||
!mediaContext.DeferredFileBlocksExtracted &&
|
||||
mediaContext.Body !== referenceBody &&
|
||||
hasAnyFileAttachments(mediaContext)
|
||||
) {
|
||||
mediaContext.DeferredFileBlocksExtracted = true;
|
||||
// Prefer RawBody-vs-Body comparison when RawBody exists. If RawBody is
|
||||
// missing, fall back to explicit file-extraction signals instead of
|
||||
// re-running extraction just because the clean pre-extraction body is gone.
|
||||
if (!mediaContext.DeferredFileBlocksExtracted && hasAnyFileAttachments(mediaContext)) {
|
||||
const rawBodyMissing = typeof mediaContext.RawBody !== "string";
|
||||
if (mediaContext.Body !== referenceBody) {
|
||||
mediaContext.DeferredFileBlocksExtracted = true;
|
||||
} else if (
|
||||
rawBodyMissing &&
|
||||
(Boolean(mediaContext.MediaUnderstanding?.length) ||
|
||||
bodyContainsMatchingFileBlock(mediaContext))
|
||||
) {
|
||||
mediaContext.DeferredFileBlocksExtracted = true;
|
||||
}
|
||||
}
|
||||
if (mediaContext.MediaUnderstanding?.length) {
|
||||
mediaContext.DeferredMediaApplied = true;
|
||||
return;
|
||||
}
|
||||
|
||||
if (mediaContext.DeferredFileBlocksExtracted && hasOnlyFileLikeAttachments(mediaContext)) {
|
||||
@ -298,6 +340,9 @@ export async function applyDeferredMediaUnderstandingToQueuedRun(
|
||||
return;
|
||||
}
|
||||
|
||||
const resolvedOriginalBody =
|
||||
mediaContext.CommandBody ?? mediaContext.RawBody ?? mediaContext.Body;
|
||||
|
||||
try {
|
||||
const mediaCtx = {
|
||||
...mediaContext,
|
||||
|
||||
@ -2095,6 +2095,114 @@ describe("createFollowupRunner media understanding", () => {
|
||||
};
|
||||
expect(agentCall?.prompt?.match(/<file\s+name="report\.pdf"/g)).toHaveLength(1);
|
||||
});
|
||||
|
||||
it("does not re-apply file extraction when RawBody is missing but Body already has a matching file block", async () => {
|
||||
const fileBlock = '<file name="report.pdf" mime="application/pdf">\nreport content\n</file>';
|
||||
runEmbeddedPiAgentMock.mockResolvedValueOnce({
|
||||
payloads: [{ text: "processed" }],
|
||||
meta: {},
|
||||
});
|
||||
|
||||
const runner = createFollowupRunner({
|
||||
opts: { onBlockReply: vi.fn(async () => {}) },
|
||||
typing: createMockTypingController(),
|
||||
typingMode: "instant",
|
||||
defaultModel: "anthropic/claude-opus-4-5",
|
||||
});
|
||||
|
||||
await runner(
|
||||
createQueuedRun({
|
||||
prompt: `[media attached: /tmp/report.pdf]\n${MEDIA_REPLY_HINT}\nsummarize this\n\n${fileBlock}`,
|
||||
mediaContext: {
|
||||
Body: `summarize this\n\n${fileBlock}`,
|
||||
CommandBody: "summarize this",
|
||||
MediaPaths: ["/tmp/report.pdf"],
|
||||
MediaTypes: ["application/pdf"],
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
expect(applyMediaUnderstandingMock).not.toHaveBeenCalled();
|
||||
const agentCall = runEmbeddedPiAgentMock.mock.calls.at(-1)?.[0] as {
|
||||
prompt?: string;
|
||||
};
|
||||
expect(agentCall?.prompt).toContain(fileBlock);
|
||||
});
|
||||
|
||||
it("replaces the trailing repeated body segment instead of the first matching thread text", async () => {
|
||||
const existingFileBlock =
|
||||
'<file name="report.pdf" mime="application/pdf">\nold extracted content\n</file>';
|
||||
const newFileBlock =
|
||||
'<file name="report.pdf" mime="application/pdf">\nnew extracted content\n</file>';
|
||||
const transcriptText = "Deferred transcript";
|
||||
|
||||
applyMediaUnderstandingMock.mockImplementationOnce(
|
||||
async (params: { ctx: Record<string, unknown> }) => {
|
||||
params.ctx.MediaUnderstanding = [
|
||||
{
|
||||
kind: "audio.transcription",
|
||||
text: transcriptText,
|
||||
attachmentIndex: 0,
|
||||
provider: "whisper",
|
||||
},
|
||||
];
|
||||
params.ctx.Transcript = transcriptText;
|
||||
params.ctx.Body = `[Audio]\nTranscript:\n${transcriptText}\n\nsummarize this\n\n${newFileBlock}`;
|
||||
return {
|
||||
outputs: [
|
||||
{
|
||||
kind: "audio.transcription",
|
||||
text: transcriptText,
|
||||
attachmentIndex: 0,
|
||||
provider: "whisper",
|
||||
},
|
||||
],
|
||||
decisions: [],
|
||||
appliedImage: false,
|
||||
appliedAudio: true,
|
||||
appliedVideo: false,
|
||||
appliedFile: true,
|
||||
};
|
||||
},
|
||||
);
|
||||
runEmbeddedPiAgentMock.mockResolvedValueOnce({
|
||||
payloads: [{ text: "processed" }],
|
||||
meta: {},
|
||||
});
|
||||
|
||||
const runner = createFollowupRunner({
|
||||
opts: { onBlockReply: vi.fn(async () => {}) },
|
||||
typing: createMockTypingController(),
|
||||
typingMode: "instant",
|
||||
defaultModel: "anthropic/claude-opus-4-5",
|
||||
});
|
||||
|
||||
await runner(
|
||||
createQueuedRun({
|
||||
prompt:
|
||||
`[media attached 1/2: /tmp/voice.ogg]\n[media attached 2/2: /tmp/report.pdf]\n${MEDIA_REPLY_HINT}\nThread history: summarize this\n\n` +
|
||||
`summarize this\n\n${existingFileBlock}`,
|
||||
mediaContext: {
|
||||
Body: `summarize this\n\n${existingFileBlock}`,
|
||||
CommandBody: "summarize this",
|
||||
RawBody: "summarize this",
|
||||
MediaPaths: ["/tmp/voice.ogg", "/tmp/report.pdf"],
|
||||
MediaTypes: ["audio/ogg", "application/pdf"],
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const agentCall = runEmbeddedPiAgentMock.mock.calls.at(-1)?.[0] as {
|
||||
prompt?: string;
|
||||
};
|
||||
expect(agentCall?.prompt).toContain("Thread history: summarize this");
|
||||
expect(agentCall?.prompt).toContain(transcriptText);
|
||||
expect(agentCall?.prompt).toContain(newFileBlock);
|
||||
expect(agentCall?.prompt).not.toContain("old extracted content");
|
||||
expect(agentCall?.prompt?.indexOf(newFileBlock)).toBeGreaterThan(
|
||||
agentCall?.prompt?.lastIndexOf("summarize this") ?? -1,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("followup queue drain deferred media understanding", () => {
|
||||
@ -2163,6 +2271,56 @@ describe("followup queue drain deferred media understanding", () => {
|
||||
expect(prompt).not.toContain("[media attached: /tmp/voice.ogg");
|
||||
});
|
||||
|
||||
it("preprocesses queued runs in parallel", async () => {
|
||||
const resolvers: Array<() => void> = [];
|
||||
const done = () => ({
|
||||
outputs: [],
|
||||
decisions: [],
|
||||
appliedImage: false,
|
||||
appliedAudio: false,
|
||||
appliedVideo: false,
|
||||
appliedFile: false,
|
||||
});
|
||||
applyMediaUnderstandingMock.mockImplementation(
|
||||
async () =>
|
||||
await new Promise((resolve) => {
|
||||
resolvers.push(() => resolve(done()));
|
||||
}),
|
||||
);
|
||||
|
||||
const items: FollowupRun[] = [
|
||||
createQueuedRun({
|
||||
prompt: "[media attached: /tmp/voice-1.ogg (audio/ogg)]\nfirst text",
|
||||
summaryLine: "first text",
|
||||
run: { messageProvider: "telegram" },
|
||||
mediaContext: {
|
||||
Body: "first text",
|
||||
MediaPaths: ["/tmp/voice-1.ogg"],
|
||||
MediaTypes: ["audio/ogg"],
|
||||
},
|
||||
}),
|
||||
createQueuedRun({
|
||||
prompt: "[media attached: /tmp/voice-2.ogg (audio/ogg)]\nsecond text",
|
||||
summaryLine: "second text",
|
||||
run: { messageProvider: "telegram" },
|
||||
mediaContext: {
|
||||
Body: "second text",
|
||||
MediaPaths: ["/tmp/voice-2.ogg"],
|
||||
MediaTypes: ["audio/ogg"],
|
||||
},
|
||||
}),
|
||||
];
|
||||
|
||||
const pending = applyDeferredMediaToQueuedRuns(items);
|
||||
|
||||
expect(applyMediaUnderstandingMock).toHaveBeenCalledTimes(2);
|
||||
|
||||
for (const resolve of resolvers) {
|
||||
resolve();
|
||||
}
|
||||
await pending;
|
||||
});
|
||||
|
||||
it("preprocesses dropped media items before building overflow summaries", async () => {
|
||||
applyMediaUnderstandingMock.mockImplementationOnce(
|
||||
async (params: { ctx: Record<string, unknown> }) => {
|
||||
|
||||
@ -76,9 +76,12 @@ function clearFollowupQueueSummaryState(queue: FollowupQueueState): void {
|
||||
}
|
||||
|
||||
export async function applyDeferredMediaToQueuedRuns(items: FollowupRun[]): Promise<void> {
|
||||
for (const item of items) {
|
||||
await applyDeferredMediaUnderstandingToQueuedRun(item, { logLabel: "followup queue" });
|
||||
}
|
||||
await Promise.allSettled(
|
||||
items.map(
|
||||
async (item) =>
|
||||
await applyDeferredMediaUnderstandingToQueuedRun(item, { logLabel: "followup queue" }),
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
async function resolveSummaryLines(items: FollowupRun[]): Promise<string[]> {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user