openclaw/src/media-understanding/runner.skip-tiny-audio.test.ts
Vincent Koc 42e3d8d693
Secrets: add inline allowlist review set (#38314)
* Secrets: add inline allowlist review set

* Secrets: narrow detect-secrets file exclusions

* Secrets: exclude Docker fingerprint false positive

* Secrets: allowlist test and docs false positives

* Secrets: refresh baseline after allowlist updates

* Secrets: fix gateway chat fixture pragma

* Secrets: format pre-commit config

* Android: keep talk mode fixture JSON valid

* Feishu: rely on client timeout injection

* Secrets: allowlist provider auth test fixtures

* Secrets: allowlist onboard search fixtures

* Secrets: allowlist onboard mode fixture

* Secrets: allowlist gateway auth mode fixture

* Secrets: allowlist APNS wake test key

* Secrets: allowlist gateway reload fixtures

* Secrets: allowlist moonshot video fixture

* Secrets: allowlist auto audio fixture

* Secrets: allowlist tiny audio fixture

* Secrets: allowlist embeddings fixtures

* Secrets: allowlist resolve fixtures

* Secrets: allowlist target registry pattern fixtures

* Secrets: allowlist gateway chat env fixture

* Secrets: refresh baseline after fixture allowlists

* Secrets: reapply gateway chat env allowlist

* Secrets: reapply gateway chat env allowlist

* Secrets: stabilize gateway chat env allowlist

* Secrets: allowlist runtime snapshot save fixture

* Secrets: allowlist oauth profile fixtures

* Secrets: allowlist compaction identifier fixture

* Secrets: allowlist model auth fixture

* Secrets: allowlist model status fixtures

* Secrets: allowlist custom onboarding fixture

* Secrets: allowlist mattermost token summary fixtures

* Secrets: allowlist gateway auth suite fixtures

* Secrets: allowlist channel summary fixture

* Secrets: allowlist provider usage auth fixtures

* Secrets: allowlist media proxy fixture

* Secrets: allowlist secrets audit fixtures

* Secrets: refresh baseline after final fixture allowlists

* Feishu: prefer explicit client timeout

* Feishu: test direct timeout precedence
2026-03-06 19:35:26 -05:00

169 lines
5.2 KiB
TypeScript

import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { describe, expect, it } from "vitest";
import type { MsgContext } from "../auto-reply/templating.js";
import type { OpenClawConfig } from "../config/config.js";
import { MIN_AUDIO_FILE_BYTES } from "./defaults.js";
import {
buildProviderRegistry,
createMediaAttachmentCache,
normalizeMediaAttachments,
runCapability,
} from "./runner.js";
import type { AudioTranscriptionRequest } from "./types.js";
async function withAudioFixture(params: {
filePrefix: string;
extension: string;
mediaType: string;
fileContents: Buffer;
run: (params: {
ctx: MsgContext;
media: ReturnType<typeof normalizeMediaAttachments>;
cache: ReturnType<typeof createMediaAttachmentCache>;
}) => Promise<void>;
}) {
const originalPath = process.env.PATH;
process.env.PATH = "/usr/bin:/bin";
const tmpPath = path.join(
os.tmpdir(),
`${params.filePrefix}-${Date.now().toString()}.${params.extension}`,
);
await fs.writeFile(tmpPath, params.fileContents);
const ctx: MsgContext = { MediaPath: tmpPath, MediaType: params.mediaType };
const media = normalizeMediaAttachments(ctx);
const cache = createMediaAttachmentCache(media, {
localPathRoots: [path.dirname(tmpPath)],
});
try {
await params.run({ ctx, media, cache });
} finally {
process.env.PATH = originalPath;
await cache.cleanup();
await fs.unlink(tmpPath).catch(() => {});
}
}
const AUDIO_CAPABILITY_CFG = {
models: {
providers: {
openai: {
apiKey: "test-key", // pragma: allowlist secret
models: [],
},
},
},
} as unknown as OpenClawConfig;
async function runAudioCapabilityWithTranscriber(params: {
ctx: MsgContext;
media: ReturnType<typeof normalizeMediaAttachments>;
cache: ReturnType<typeof createMediaAttachmentCache>;
transcribeAudio: (req: AudioTranscriptionRequest) => Promise<{ text: string; model: string }>;
}) {
const providerRegistry = buildProviderRegistry({
openai: {
id: "openai",
capabilities: ["audio"],
transcribeAudio: params.transcribeAudio,
},
});
return await runCapability({
capability: "audio",
cfg: AUDIO_CAPABILITY_CFG,
ctx: params.ctx,
attachments: params.cache,
media: params.media,
providerRegistry,
});
}
describe("runCapability skips tiny audio files", () => {
it("skips audio transcription when file is smaller than MIN_AUDIO_FILE_BYTES", async () => {
await withAudioFixture({
filePrefix: "openclaw-tiny-audio",
extension: "wav",
mediaType: "audio/wav",
fileContents: Buffer.alloc(100), // 100 bytes, way below 1024
run: async ({ ctx, media, cache }) => {
let transcribeCalled = false;
const result = await runAudioCapabilityWithTranscriber({
ctx,
media,
cache,
transcribeAudio: async (req) => {
transcribeCalled = true;
return { text: "should not happen", model: req.model ?? "whisper-1" };
},
});
// The provider should never be called
expect(transcribeCalled).toBe(false);
// The result should indicate the attachment was skipped
expect(result.outputs).toHaveLength(0);
expect(result.decision.outcome).toBe("skipped");
expect(result.decision.attachments).toHaveLength(1);
expect(result.decision.attachments[0].attempts).toHaveLength(1);
expect(result.decision.attachments[0].attempts[0].outcome).toBe("skipped");
expect(result.decision.attachments[0].attempts[0].reason).toContain("tooSmall");
},
});
});
it("skips audio transcription for empty (0-byte) files", async () => {
await withAudioFixture({
filePrefix: "openclaw-empty-audio",
extension: "ogg",
mediaType: "audio/ogg",
fileContents: Buffer.alloc(0),
run: async ({ ctx, media, cache }) => {
let transcribeCalled = false;
const result = await runAudioCapabilityWithTranscriber({
ctx,
media,
cache,
transcribeAudio: async () => {
transcribeCalled = true;
return { text: "nope", model: "whisper-1" };
},
});
expect(transcribeCalled).toBe(false);
expect(result.outputs).toHaveLength(0);
},
});
});
it("proceeds with transcription when file meets minimum size", async () => {
await withAudioFixture({
filePrefix: "openclaw-ok-audio",
extension: "wav",
mediaType: "audio/wav",
fileContents: Buffer.alloc(MIN_AUDIO_FILE_BYTES + 100),
run: async ({ ctx, media, cache }) => {
let transcribeCalled = false;
const result = await runAudioCapabilityWithTranscriber({
ctx,
media,
cache,
transcribeAudio: async (req) => {
transcribeCalled = true;
return { text: "hello world", model: req.model ?? "whisper-1" };
},
});
expect(transcribeCalled).toBe(true);
expect(result.outputs).toHaveLength(1);
expect(result.outputs[0].text).toBe("hello world");
expect(result.decision.outcome).toBe("success");
},
});
});
});