fix: handle placeholder transcripts per skipped attachment
This commit is contained in:
parent
fd2f47a5e4
commit
bac89c2ec8
@ -499,6 +499,51 @@ describe("applyMediaUnderstanding", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("injects a placeholder transcript when local-path audio is too small", async () => {
|
||||
const ctx = await createAudioCtx({
|
||||
fileName: "tiny.ogg",
|
||||
mediaType: "audio/ogg",
|
||||
content: Buffer.alloc(100),
|
||||
});
|
||||
const transcribeAudio = vi.fn(async () => ({ text: "should-not-run" }));
|
||||
const cfg: OpenClawConfig = {
|
||||
tools: {
|
||||
media: {
|
||||
audio: {
|
||||
enabled: true,
|
||||
maxBytes: 1024 * 1024,
|
||||
models: [{ provider: "groq" }],
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const result = await applyMediaUnderstanding({
|
||||
ctx,
|
||||
cfg,
|
||||
providers: {
|
||||
groq: { id: "groq", transcribeAudio },
|
||||
},
|
||||
});
|
||||
|
||||
expect(transcribeAudio).not.toHaveBeenCalled();
|
||||
expect(result.appliedAudio).toBe(true);
|
||||
expect(result.outputs).toEqual([
|
||||
expect.objectContaining({
|
||||
kind: "audio.transcription",
|
||||
text: "[Voice note was empty or contained only silence — no speech detected]",
|
||||
provider: "openclaw",
|
||||
model: "synthetic-empty-audio",
|
||||
}),
|
||||
]);
|
||||
expect(ctx.Transcript).toBe(
|
||||
"[Voice note was empty or contained only silence — no speech detected]",
|
||||
);
|
||||
expect(ctx.Body).toBe(
|
||||
"[Audio]\nTranscript:\n[Voice note was empty or contained only silence — no speech detected]",
|
||||
);
|
||||
});
|
||||
|
||||
it("skips audio transcription when attachment exceeds maxBytes", async () => {
|
||||
const ctx = await createAudioCtx({
|
||||
fileName: "large.wav",
|
||||
|
||||
@ -293,10 +293,10 @@ function buildSyntheticSkippedAudioOutputs(
|
||||
return [];
|
||||
}
|
||||
return audioDecision.attachments.flatMap((attachment) => {
|
||||
const reason = attachment.attempts
|
||||
.map((attempt) => attempt.reason?.trim())
|
||||
.find((value): value is string => Boolean(value));
|
||||
if (!reason?.startsWith("tooSmall")) {
|
||||
const hasTooSmallAttempt = attachment.attempts.some((attempt) =>
|
||||
attempt.reason?.trim().startsWith("tooSmall"),
|
||||
);
|
||||
if (!hasTooSmallAttempt) {
|
||||
return [];
|
||||
}
|
||||
return [
|
||||
@ -523,9 +523,15 @@ export async function applyMediaUnderstanding(params: {
|
||||
decisions.push(entry.decision);
|
||||
}
|
||||
|
||||
if (!outputs.some((output) => output.kind === "audio.transcription")) {
|
||||
outputs.push(...buildSyntheticSkippedAudioOutputs(decisions));
|
||||
}
|
||||
const audioOutputAttachmentIndexes = new Set(
|
||||
outputs
|
||||
.filter((output) => output.kind === "audio.transcription")
|
||||
.map((output) => output.attachmentIndex),
|
||||
);
|
||||
const syntheticSkippedAudioOutputs = buildSyntheticSkippedAudioOutputs(decisions).filter(
|
||||
(output) => !audioOutputAttachmentIndexes.has(output.attachmentIndex),
|
||||
);
|
||||
outputs.push(...syntheticSkippedAudioOutputs);
|
||||
|
||||
if (decisions.length > 0) {
|
||||
ctx.MediaUnderstandingDecisions = [...(ctx.MediaUnderstandingDecisions ?? []), ...decisions];
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user