From ec688d809fa86dd4b5bd3d8b12112896e2ae1ef3 Mon Sep 17 00:00:00 2001 From: Lucenx9 <185146821+Lucenx9@users.noreply.github.com> Date: Mon, 2 Mar 2026 23:59:32 +0100 Subject: [PATCH] fix(media): normalize MIME kind detection for audio transcription --- src/media-understanding/apply.test.ts | 38 +++++++++++++++++++++++++++ src/media/mime.ts | 2 +- 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/src/media-understanding/apply.test.ts b/src/media-understanding/apply.test.ts index e35259c267e..5b574c0167b 100644 --- a/src/media-understanding/apply.test.ts +++ b/src/media-understanding/apply.test.ts @@ -361,6 +361,44 @@ describe("applyMediaUnderstanding", () => { expect(ctx.Body).toBe("[Audio]\nTranscript:\nremote transcript"); }); + it("transcribes WhatsApp audio with parameterized MIME despite casing/whitespace", async () => { + const ctx = await createAudioCtx({ + fileName: "voice-note", + mediaType: " Audio/Ogg; codecs=opus ", + }); + ctx.ChatType = "direct"; + ctx.Surface = "whatsapp"; + + const cfg: OpenClawConfig = { + tools: { + media: { + audio: { + enabled: true, + maxBytes: 1024 * 1024, + scope: { + default: "deny", + rules: [ + { action: "allow", match: { chatType: "dm" } }, + { action: "allow", match: { channel: "whatsapp" } }, + ], + }, + models: [{ provider: "groq" }], + }, + }, + }, + }; + + const result = await applyMediaUnderstanding({ + ctx, + cfg, + providers: createGroqProviders("whatsapp transcript"), + }); + + expect(result.appliedAudio).toBe(true); + expect(ctx.Transcript).toBe("whatsapp transcript"); + expect(ctx.Body).toBe("[Audio]\nTranscript:\nwhatsapp transcript"); + }); + it("skips URL-only audio when remote file is too small", async () => { // Override the default mock to return a tiny buffer (below MIN_AUDIO_FILE_BYTES) mockedFetchRemoteMedia.mockResolvedValueOnce({ diff --git a/src/media/mime.ts b/src/media/mime.ts index 85f4962b43d..fced9c61236 100644 --- a/src/media/mime.ts +++ b/src/media/mime.ts @@ -188,5 +188,5 @@ export function imageMimeFromFormat(format?: string | null): string | undefined } export function kindFromMime(mime?: string | null): MediaKind { - return mediaKindFromMime(mime); + return mediaKindFromMime(normalizeMimeType(mime)); }