diff --git a/extensions/discord/src/voice/manager.e2e.test.ts b/extensions/discord/src/voice/manager.e2e.test.ts index 73c6f249021..0889e351bf5 100644 --- a/extensions/discord/src/voice/manager.e2e.test.ts +++ b/extensions/discord/src/voice/manager.e2e.test.ts @@ -85,15 +85,19 @@ vi.mock("@discordjs/voice", () => ({ joinVoiceChannel: joinVoiceChannelMock, })); -vi.mock("../../../../src/routing/resolve-route.js", () => ({ +vi.mock("openclaw/plugin-sdk/routing", () => ({ resolveAgentRoute: resolveAgentRouteMock, })); -vi.mock("../../../../src/commands/agent.js", () => ({ - agentCommandFromIngress: agentCommandMock, -})); +vi.mock("openclaw/plugin-sdk/agent-runtime", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + agentCommandFromIngress: agentCommandMock, + }; +}); -vi.mock("../../../../src/media-understanding/runtime.js", () => ({ +vi.mock("openclaw/plugin-sdk/media-understanding-runtime", () => ({ transcribeAudioFile: transcribeAudioFileMock, })); diff --git a/extensions/discord/src/voice/manager.ts b/extensions/discord/src/voice/manager.ts index c2fbcbfc686..5f9f66242ad 100644 --- a/extensions/discord/src/voice/manager.ts +++ b/extensions/discord/src/voice/manager.ts @@ -18,28 +18,19 @@ import { } from "@discordjs/voice"; import { resolveAgentDir } from "openclaw/plugin-sdk/agent-runtime"; import { agentCommandFromIngress } from "openclaw/plugin-sdk/agent-runtime"; -import { - resolveTtsConfig, - textToSpeech, - type ResolvedTtsConfig, -} from "openclaw/plugin-sdk/agent-runtime"; +import { resolveTtsConfig, type ResolvedTtsConfig } from "openclaw/plugin-sdk/agent-runtime"; import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime"; import { isDangerousNameMatchingEnabled } from "openclaw/plugin-sdk/config-runtime"; import type { DiscordAccountConfig, TtsConfig } from "openclaw/plugin-sdk/config-runtime"; import { formatErrorMessage } from "openclaw/plugin-sdk/infra-runtime"; import { resolvePreferredOpenClawTmpDir } from "openclaw/plugin-sdk/infra-runtime"; -import { - buildProviderRegistry, - createMediaAttachmentCache, - normalizeMediaAttachments, - runCapability, -} from "openclaw/plugin-sdk/media-runtime"; -import type { MsgContext } from "openclaw/plugin-sdk/reply-runtime"; +import { transcribeAudioFile } from "openclaw/plugin-sdk/media-understanding-runtime"; import { resolveAgentRoute } from "openclaw/plugin-sdk/routing"; import { logVerbose, shouldLogVerbose } from "openclaw/plugin-sdk/runtime-env"; import { createSubsystemLogger } from "openclaw/plugin-sdk/runtime-env"; import type { RuntimeEnv } from "openclaw/plugin-sdk/runtime-env"; import { parseTtsDirectives } from "openclaw/plugin-sdk/speech"; +import { textToSpeech } from "openclaw/plugin-sdk/speech-runtime"; import { formatMention } from "../mentions.js"; import { resolveDiscordOwnerAccess } from "../monitor/allow-list.js"; import { formatDiscordUserTag } from "../monitor/format.js"; @@ -240,33 +231,13 @@ async function transcribeAudio(params: { agentId: string; filePath: string; }): Promise { - const ctx: MsgContext = { - MediaPath: params.filePath, - MediaType: "audio/wav", - }; - const attachments = normalizeMediaAttachments(ctx); - if (attachments.length === 0) { - return undefined; - } - const cache = createMediaAttachmentCache(attachments); - const providerRegistry = buildProviderRegistry(); - try { - const result = await runCapability({ - capability: "audio", - cfg: params.cfg, - ctx, - attachments: cache, - media: attachments, - agentDir: resolveAgentDir(params.cfg, params.agentId), - providerRegistry, - config: params.cfg.tools?.media?.audio, - }); - const output = result.outputs.find((entry) => entry.kind === "audio.transcription"); - const text = output?.text?.trim(); - return text || undefined; - } finally { - await cache.cleanup(); - } + const result = await transcribeAudioFile({ + filePath: params.filePath, + cfg: params.cfg, + agentDir: resolveAgentDir(params.cfg, params.agentId), + mime: "audio/wav", + }); + return result.text?.trim() || undefined; } export class DiscordVoiceManager { diff --git a/extensions/telegram/src/sticker-cache.test.ts b/extensions/telegram/src/sticker-cache.test.ts index 219ce421e62..75a1db8725d 100644 --- a/extensions/telegram/src/sticker-cache.test.ts +++ b/extensions/telegram/src/sticker-cache.test.ts @@ -1,44 +1,49 @@ import fs from "node:fs"; import path from "node:path"; import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; -import { - cacheSticker, - getAllCachedStickers, - getCachedSticker, - getCacheStats, - searchStickers, -} from "./sticker-cache.js"; -// Mock the state directory to use a temp location -vi.mock("../../../src/config/paths.js", async (importOriginal) => { - const actual = await importOriginal(); - return { - ...actual, - STATE_DIR: "/tmp/openclaw-test-sticker-cache", - }; -}); +vi.mock("openclaw/plugin-sdk/agent-runtime", () => ({ + resolveApiKeyForProvider: vi.fn(), + findModelInCatalog: vi.fn(), + loadModelCatalog: vi.fn(async () => []), + modelSupportsVision: vi.fn(() => false), + resolveDefaultModelForAgent: vi.fn(() => ({ provider: "openai", model: "gpt-5.2" })), +})); + +vi.mock("openclaw/plugin-sdk/media-runtime", () => ({ + AUTO_IMAGE_KEY_PROVIDERS: ["openai"], + DEFAULT_IMAGE_MODELS: { openai: "gpt-4.1-mini" }, + resolveAutoImageModel: vi.fn(async () => null), +})); + +vi.mock("openclaw/plugin-sdk/media-understanding-runtime", () => ({ + describeImageFileWithModel: vi.fn(), +})); const TEST_CACHE_DIR = "/tmp/openclaw-test-sticker-cache/telegram"; const TEST_CACHE_FILE = path.join(TEST_CACHE_DIR, "sticker-cache.json"); +type StickerCacheModule = typeof import("./sticker-cache.js"); + +let stickerCache: StickerCacheModule; + describe("sticker-cache", () => { - beforeEach(() => { - // Clean up before each test - if (fs.existsSync(TEST_CACHE_FILE)) { - fs.unlinkSync(TEST_CACHE_FILE); - } + beforeEach(async () => { + process.env.OPENCLAW_STATE_DIR = "/tmp/openclaw-test-sticker-cache"; + fs.rmSync("/tmp/openclaw-test-sticker-cache", { recursive: true, force: true }); + fs.mkdirSync(TEST_CACHE_DIR, { recursive: true }); + vi.resetModules(); + stickerCache = await import("./sticker-cache.js"); }); afterEach(() => { - // Clean up after each test - if (fs.existsSync(TEST_CACHE_FILE)) { - fs.unlinkSync(TEST_CACHE_FILE); - } + fs.rmSync("/tmp/openclaw-test-sticker-cache", { recursive: true, force: true }); + delete process.env.OPENCLAW_STATE_DIR; }); describe("getCachedSticker", () => { it("returns null for unknown ID", () => { - const result = getCachedSticker("unknown-id"); + const result = stickerCache.getCachedSticker("unknown-id"); expect(result).toBeNull(); }); @@ -52,8 +57,8 @@ describe("sticker-cache", () => { cachedAt: "2026-01-26T12:00:00.000Z", }; - cacheSticker(sticker); - const result = getCachedSticker("unique123"); + stickerCache.cacheSticker(sticker); + const result = stickerCache.getCachedSticker("unique123"); expect(result).toEqual(sticker); }); @@ -66,13 +71,13 @@ describe("sticker-cache", () => { cachedAt: "2026-01-26T12:00:00.000Z", }; - cacheSticker(sticker); - expect(getCachedSticker("unique123")).not.toBeNull(); + stickerCache.cacheSticker(sticker); + expect(stickerCache.getCachedSticker("unique123")).not.toBeNull(); // Manually clear the cache file - fs.unlinkSync(TEST_CACHE_FILE); + fs.rmSync(TEST_CACHE_FILE, { force: true }); - expect(getCachedSticker("unique123")).toBeNull(); + expect(stickerCache.getCachedSticker("unique123")).toBeNull(); }); }); @@ -85,9 +90,9 @@ describe("sticker-cache", () => { cachedAt: "2026-01-26T12:00:00.000Z", }; - cacheSticker(sticker); + stickerCache.cacheSticker(sticker); - const all = getAllCachedStickers(); + const all = stickerCache.getAllCachedStickers(); expect(all).toHaveLength(1); expect(all[0]).toEqual(sticker); }); @@ -106,10 +111,10 @@ describe("sticker-cache", () => { cachedAt: "2026-01-26T13:00:00.000Z", }; - cacheSticker(original); - cacheSticker(updated); + stickerCache.cacheSticker(original); + stickerCache.cacheSticker(updated); - const result = getCachedSticker("unique789"); + const result = stickerCache.getCachedSticker("unique789"); expect(result?.description).toBe("Updated description"); expect(result?.fileId).toBe("file789-new"); }); @@ -118,7 +123,7 @@ describe("sticker-cache", () => { describe("searchStickers", () => { beforeEach(() => { // Seed cache with test stickers - cacheSticker({ + stickerCache.cacheSticker({ fileId: "fox1", fileUniqueId: "fox-unique-1", emoji: "🦊", @@ -126,7 +131,7 @@ describe("sticker-cache", () => { description: "A cute orange fox waving hello", cachedAt: "2026-01-26T10:00:00.000Z", }); - cacheSticker({ + stickerCache.cacheSticker({ fileId: "fox2", fileUniqueId: "fox-unique-2", emoji: "🦊", @@ -134,7 +139,7 @@ describe("sticker-cache", () => { description: "A fox sleeping peacefully", cachedAt: "2026-01-26T11:00:00.000Z", }); - cacheSticker({ + stickerCache.cacheSticker({ fileId: "cat1", fileUniqueId: "cat-unique-1", emoji: "🐱", @@ -142,7 +147,7 @@ describe("sticker-cache", () => { description: "A cat sitting on a keyboard", cachedAt: "2026-01-26T12:00:00.000Z", }); - cacheSticker({ + stickerCache.cacheSticker({ fileId: "dog1", fileUniqueId: "dog-unique-1", emoji: "🐶", @@ -153,47 +158,47 @@ describe("sticker-cache", () => { }); it("finds stickers by description substring", () => { - const results = searchStickers("fox"); + const results = stickerCache.searchStickers("fox"); expect(results).toHaveLength(2); expect(results.every((s) => s.description.toLowerCase().includes("fox"))).toBe(true); }); it("finds stickers by emoji", () => { - const results = searchStickers("🦊"); + const results = stickerCache.searchStickers("🦊"); expect(results).toHaveLength(2); expect(results.every((s) => s.emoji === "🦊")).toBe(true); }); it("finds stickers by set name", () => { - const results = searchStickers("CuteFoxes"); + const results = stickerCache.searchStickers("CuteFoxes"); expect(results).toHaveLength(2); expect(results.every((s) => s.setName === "CuteFoxes")).toBe(true); }); it("respects limit parameter", () => { - const results = searchStickers("fox", 1); + const results = stickerCache.searchStickers("fox", 1); expect(results).toHaveLength(1); }); it("ranks exact matches higher", () => { // "waving" appears in "fox waving hello" - should be ranked first - const results = searchStickers("waving"); + const results = stickerCache.searchStickers("waving"); expect(results).toHaveLength(1); expect(results[0]?.fileUniqueId).toBe("fox-unique-1"); }); it("returns empty array for no matches", () => { - const results = searchStickers("elephant"); + const results = stickerCache.searchStickers("elephant"); expect(results).toHaveLength(0); }); it("is case insensitive", () => { - const results = searchStickers("FOX"); + const results = stickerCache.searchStickers("FOX"); expect(results).toHaveLength(2); }); it("matches multiple words", () => { - const results = searchStickers("cat keyboard"); + const results = stickerCache.searchStickers("cat keyboard"); expect(results).toHaveLength(1); expect(results[0]?.fileUniqueId).toBe("cat-unique-1"); }); @@ -201,58 +206,58 @@ describe("sticker-cache", () => { describe("getAllCachedStickers", () => { it("returns empty array when cache is empty", () => { - const result = getAllCachedStickers(); + const result = stickerCache.getAllCachedStickers(); expect(result).toEqual([]); }); it("returns all cached stickers", () => { - cacheSticker({ + stickerCache.cacheSticker({ fileId: "a", fileUniqueId: "a-unique", description: "Sticker A", cachedAt: "2026-01-26T10:00:00.000Z", }); - cacheSticker({ + stickerCache.cacheSticker({ fileId: "b", fileUniqueId: "b-unique", description: "Sticker B", cachedAt: "2026-01-26T11:00:00.000Z", }); - const result = getAllCachedStickers(); + const result = stickerCache.getAllCachedStickers(); expect(result).toHaveLength(2); }); }); describe("getCacheStats", () => { it("returns count 0 when cache is empty", () => { - const stats = getCacheStats(); + const stats = stickerCache.getCacheStats(); expect(stats.count).toBe(0); expect(stats.oldestAt).toBeUndefined(); expect(stats.newestAt).toBeUndefined(); }); it("returns correct stats with cached stickers", () => { - cacheSticker({ + stickerCache.cacheSticker({ fileId: "old", fileUniqueId: "old-unique", description: "Old sticker", cachedAt: "2026-01-20T10:00:00.000Z", }); - cacheSticker({ + stickerCache.cacheSticker({ fileId: "new", fileUniqueId: "new-unique", description: "New sticker", cachedAt: "2026-01-26T10:00:00.000Z", }); - cacheSticker({ + stickerCache.cacheSticker({ fileId: "mid", fileUniqueId: "mid-unique", description: "Middle sticker", cachedAt: "2026-01-23T10:00:00.000Z", }); - const stats = getCacheStats(); + const stats = stickerCache.getCacheStats(); expect(stats.count).toBe(3); expect(stats.oldestAt).toBe("2026-01-20T10:00:00.000Z"); expect(stats.newestAt).toBe("2026-01-26T10:00:00.000Z"); diff --git a/extensions/telegram/src/sticker-cache.ts b/extensions/telegram/src/sticker-cache.ts index ea86bd8f1bf..e6fd3398f16 100644 --- a/extensions/telegram/src/sticker-cache.ts +++ b/extensions/telegram/src/sticker-cache.ts @@ -1,4 +1,3 @@ -import fs from "node:fs/promises"; import path from "node:path"; import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/agent-runtime"; import type { ModelCatalogEntry } from "openclaw/plugin-sdk/agent-runtime"; @@ -12,6 +11,7 @@ import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime"; import { loadJsonFile, saveJsonFile } from "openclaw/plugin-sdk/json-store"; import { AUTO_IMAGE_KEY_PROVIDERS, DEFAULT_IMAGE_MODELS } from "openclaw/plugin-sdk/media-runtime"; import { resolveAutoImageModel } from "openclaw/plugin-sdk/media-runtime"; +import { describeImageFileWithModel } from "openclaw/plugin-sdk/media-understanding-runtime"; import { logVerbose } from "openclaw/plugin-sdk/runtime-env"; import { STATE_DIR } from "openclaw/plugin-sdk/state-paths"; @@ -143,12 +143,6 @@ export function getCacheStats(): { count: number; oldestAt?: string; newestAt?: const STICKER_DESCRIPTION_PROMPT = "Describe this sticker image in 1-2 sentences. Focus on what the sticker depicts (character, object, action, emotion). Be concise and objective."; -let imageRuntimePromise: Promise | null = null; - -function loadImageRuntime() { - imageRuntimePromise ??= import("./media-understanding.runtime.js"); - return imageRuntimePromise; -} export interface DescribeStickerParams { imagePath: string; @@ -242,22 +236,18 @@ export async function describeStickerImage(params: DescribeStickerParams): Promi logVerbose(`telegram: describing sticker with ${provider}/${model}`); try { - const buffer = await fs.readFile(imagePath); - // Lazy import to avoid circular dependency - const { describeImageWithModel } = await loadImageRuntime(); - const result = await describeImageWithModel({ - buffer, - fileName: "sticker.webp", + const result = await describeImageFileWithModel({ + filePath: imagePath, mime: "image/webp", - prompt: STICKER_DESCRIPTION_PROMPT, cfg, - agentDir: agentDir ?? "", + agentDir, provider, model, + prompt: STICKER_DESCRIPTION_PROMPT, maxTokens: 150, - timeoutMs: 30000, + timeoutMs: 30_000, }); - return result.text; + return result.text ?? null; } catch (err) { logVerbose(`telegram: failed to describe sticker: ${String(err)}`); return null;