fix: rebuild queued followup media prompts
This commit is contained in:
parent
6edb3b7e34
commit
be3eec46e2
@ -818,6 +818,138 @@ describe("createFollowupRunner media understanding", () => {
|
||||
expect(onBlockReply).toHaveBeenCalledWith(expect.objectContaining({ text: "Got it!" }));
|
||||
});
|
||||
|
||||
it("strips the full media line when attachment paths or URLs contain brackets", async () => {
|
||||
const transcriptText = "Bracket-safe transcript";
|
||||
applyMediaUnderstandingMock.mockImplementationOnce(
|
||||
async (params: { ctx: Record<string, unknown> }) => {
|
||||
params.ctx.MediaUnderstanding = [
|
||||
{
|
||||
kind: "audio.transcription",
|
||||
text: transcriptText,
|
||||
attachmentIndex: 0,
|
||||
provider: "whisper",
|
||||
},
|
||||
];
|
||||
params.ctx.Transcript = transcriptText;
|
||||
params.ctx.Body = `[Audio]\nTranscript:\n${transcriptText}`;
|
||||
return {
|
||||
outputs: [
|
||||
{
|
||||
kind: "audio.transcription",
|
||||
text: transcriptText,
|
||||
attachmentIndex: 0,
|
||||
provider: "whisper",
|
||||
},
|
||||
],
|
||||
decisions: [],
|
||||
appliedImage: false,
|
||||
appliedAudio: true,
|
||||
appliedVideo: false,
|
||||
appliedFile: false,
|
||||
};
|
||||
},
|
||||
);
|
||||
runEmbeddedPiAgentMock.mockResolvedValueOnce({
|
||||
payloads: [{ text: "done" }],
|
||||
meta: {},
|
||||
});
|
||||
|
||||
const runner = createFollowupRunner({
|
||||
opts: { onBlockReply: vi.fn(async () => {}) },
|
||||
typing: createMockTypingController(),
|
||||
typingMode: "instant",
|
||||
defaultModel: "anthropic/claude-opus-4-5",
|
||||
});
|
||||
|
||||
await runner(
|
||||
createQueuedRun({
|
||||
prompt:
|
||||
"[media attached: /tmp/voice[0].ogg (audio/ogg) | https://cdn.example.com/files[0].ogg]\nsome text",
|
||||
mediaContext: {
|
||||
Body: "some text",
|
||||
CommandBody: "some text",
|
||||
RawBody: "some text",
|
||||
MediaPaths: ["/tmp/voice[0].ogg"],
|
||||
MediaUrls: ["https://cdn.example.com/files[0].ogg"],
|
||||
MediaTypes: ["audio/ogg"],
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const agentCall = runEmbeddedPiAgentMock.mock.calls.at(-1)?.[0] as {
|
||||
prompt?: string;
|
||||
};
|
||||
expect(agentCall?.prompt).toContain(transcriptText);
|
||||
expect(agentCall?.prompt).not.toContain("[media attached:");
|
||||
expect(agentCall?.prompt).not.toContain("files[0].ogg]");
|
||||
});
|
||||
|
||||
it("only strips leading synthetic media lines and preserves literal user text later in the prompt", async () => {
|
||||
const transcriptText = "Transcript with literal token";
|
||||
applyMediaUnderstandingMock.mockImplementationOnce(
|
||||
async (params: { ctx: Record<string, unknown> }) => {
|
||||
params.ctx.MediaUnderstanding = [
|
||||
{
|
||||
kind: "audio.transcription",
|
||||
text: transcriptText,
|
||||
attachmentIndex: 0,
|
||||
provider: "whisper",
|
||||
},
|
||||
];
|
||||
params.ctx.Transcript = transcriptText;
|
||||
params.ctx.Body = "I literally typed [media attached: keep me] in this message.";
|
||||
return {
|
||||
outputs: [
|
||||
{
|
||||
kind: "audio.transcription",
|
||||
text: transcriptText,
|
||||
attachmentIndex: 0,
|
||||
provider: "whisper",
|
||||
},
|
||||
],
|
||||
decisions: [],
|
||||
appliedImage: false,
|
||||
appliedAudio: true,
|
||||
appliedVideo: false,
|
||||
appliedFile: false,
|
||||
};
|
||||
},
|
||||
);
|
||||
runEmbeddedPiAgentMock.mockResolvedValueOnce({
|
||||
payloads: [{ text: "done" }],
|
||||
meta: {},
|
||||
});
|
||||
|
||||
const runner = createFollowupRunner({
|
||||
opts: { onBlockReply: vi.fn(async () => {}) },
|
||||
typing: createMockTypingController(),
|
||||
typingMode: "instant",
|
||||
defaultModel: "anthropic/claude-opus-4-5",
|
||||
});
|
||||
|
||||
await runner(
|
||||
createQueuedRun({
|
||||
prompt:
|
||||
"[media attached: /tmp/voice.ogg (audio/ogg)]\nI literally typed [media attached: keep me] in this message.",
|
||||
mediaContext: {
|
||||
Body: "I literally typed [media attached: keep me] in this message.",
|
||||
CommandBody: "I literally typed [media attached: keep me] in this message.",
|
||||
RawBody: "I literally typed [media attached: keep me] in this message.",
|
||||
MediaPaths: ["/tmp/voice.ogg"],
|
||||
MediaTypes: ["audio/ogg"],
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const agentCall = runEmbeddedPiAgentMock.mock.calls.at(-1)?.[0] as {
|
||||
prompt?: string;
|
||||
};
|
||||
expect(agentCall?.prompt).toContain(
|
||||
"I literally typed [media attached: keep me] in this message.",
|
||||
);
|
||||
expect(agentCall?.prompt).not.toContain("[media attached: /tmp/voice.ogg (audio/ogg)]");
|
||||
});
|
||||
|
||||
it("skips media understanding when MediaUnderstanding is already populated", async () => {
|
||||
runEmbeddedPiAgentMock.mockResolvedValueOnce({
|
||||
payloads: [{ text: "reply" }],
|
||||
@ -920,6 +1052,54 @@ describe("createFollowupRunner media understanding", () => {
|
||||
expect(onBlockReply).toHaveBeenCalledWith(expect.objectContaining({ text: "fallback reply" }));
|
||||
});
|
||||
|
||||
it("rebuilds the prompt when file extraction succeeds without media outputs", async () => {
|
||||
const fileBlock = '<file name="notes.txt" mime="text/plain">\nline one\n</file>';
|
||||
applyMediaUnderstandingMock.mockImplementationOnce(
|
||||
async (params: { ctx: Record<string, unknown> }) => {
|
||||
params.ctx.Body = `some text\n\n${fileBlock}`;
|
||||
return {
|
||||
outputs: [],
|
||||
decisions: [],
|
||||
appliedImage: false,
|
||||
appliedAudio: false,
|
||||
appliedVideo: false,
|
||||
appliedFile: true,
|
||||
};
|
||||
},
|
||||
);
|
||||
runEmbeddedPiAgentMock.mockResolvedValueOnce({
|
||||
payloads: [{ text: "file processed" }],
|
||||
meta: {},
|
||||
});
|
||||
|
||||
const runner = createFollowupRunner({
|
||||
opts: { onBlockReply: vi.fn(async () => {}) },
|
||||
typing: createMockTypingController(),
|
||||
typingMode: "instant",
|
||||
defaultModel: "anthropic/claude-opus-4-5",
|
||||
});
|
||||
|
||||
await runner(
|
||||
createQueuedRun({
|
||||
prompt: "[media attached: /tmp/notes.txt (text/plain)]\nsome text",
|
||||
mediaContext: {
|
||||
Body: "some text",
|
||||
CommandBody: "some text",
|
||||
RawBody: "some text",
|
||||
MediaPaths: ["/tmp/notes.txt"],
|
||||
MediaTypes: ["text/plain"],
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const agentCall = runEmbeddedPiAgentMock.mock.calls.at(-1)?.[0] as {
|
||||
prompt?: string;
|
||||
};
|
||||
expect(agentCall?.prompt).toContain("[media attached: /tmp/notes.txt (text/plain)]");
|
||||
expect(agentCall?.prompt).toContain(fileBlock);
|
||||
expect(agentCall?.prompt?.match(/<file\b/g)).toHaveLength(1);
|
||||
});
|
||||
|
||||
it("preserves non-audio media lines when only audio is transcribed", async () => {
|
||||
applyMediaUnderstandingMock.mockImplementationOnce(
|
||||
async (params: { ctx: Record<string, unknown> }) => {
|
||||
|
||||
@ -43,13 +43,16 @@ import type { TypingController } from "./typing.js";
|
||||
|
||||
const MEDIA_ONLY_PLACEHOLDER = "[User sent media without caption]";
|
||||
const MEDIA_REPLY_HINT_PREFIX = "To send an image back, prefer the message tool";
|
||||
const LEADING_MEDIA_ATTACHED_LINE_RE =
|
||||
/^(?:\[media attached: \d+ files\]|\[media attached(?: \d+\/\d+)?: [^\r\n]*\])$/;
|
||||
const FILE_BLOCK_RE = /<file\b/i;
|
||||
|
||||
function stripLeadingMediaAttachedLines(prompt: string): string {
|
||||
const lines = prompt.split("\n");
|
||||
let index = 0;
|
||||
while (index < lines.length) {
|
||||
const trimmed = lines[index]?.trim() ?? "";
|
||||
if (!trimmed.startsWith("[media attached") || !trimmed.endsWith("]")) {
|
||||
if (!LEADING_MEDIA_ATTACHED_LINE_RE.test(trimmed)) {
|
||||
break;
|
||||
}
|
||||
index += 1;
|
||||
@ -252,8 +255,14 @@ export function createFollowupRunner(params: {
|
||||
);
|
||||
if (hasMedia) {
|
||||
try {
|
||||
const mediaCtx = { ...queued.mediaContext } as MsgContext;
|
||||
const originalBody = mediaCtx.Body;
|
||||
const mediaCtx = {
|
||||
...queued.mediaContext,
|
||||
Body:
|
||||
queued.mediaContext.CommandBody ??
|
||||
queued.mediaContext.RawBody ??
|
||||
queued.mediaContext.Body,
|
||||
} as MsgContext;
|
||||
const originalBody = queued.mediaContext.Body;
|
||||
const muResult = await applyMediaUnderstanding({
|
||||
ctx: mediaCtx,
|
||||
cfg: queued.run.config,
|
||||
@ -263,7 +272,10 @@ export function createFollowupRunner(params: {
|
||||
model: queued.run.model,
|
||||
},
|
||||
});
|
||||
if (muResult.outputs.length > 0 || muResult.appliedFile) {
|
||||
const shouldRebuildPrompt =
|
||||
muResult.outputs.length > 0 ||
|
||||
(muResult.appliedFile && !FILE_BLOCK_RE.test(queued.prompt));
|
||||
if (shouldRebuildPrompt) {
|
||||
// Rebuild the queued prompt from the mutated media context so the
|
||||
// deferred path matches the primary path's prompt shape.
|
||||
const newMediaNote = buildInboundMediaNote(mediaCtx);
|
||||
@ -273,7 +285,6 @@ export function createFollowupRunner(params: {
|
||||
updatedBody: mediaCtx.Body,
|
||||
mediaNote: newMediaNote,
|
||||
});
|
||||
|
||||
logVerbose(
|
||||
`followup: applied media understanding (audio=${muResult.appliedAudio}, image=${muResult.appliedImage}, video=${muResult.appliedVideo}, file=${muResult.appliedFile})`,
|
||||
);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user