feat(xiaomi): add media understanding provider for image and video

Add Xiaomi media understanding provider with image and video
capabilities using MiMo V2 Omni. Enables automatic media description
when users send image or video attachments.
This commit is contained in:
Jinhao Dong 2026-03-18 20:23:23 +08:00
parent 0ae3e70a5c
commit d4cb917092
2 changed files with 124 additions and 0 deletions

View File

@ -2,6 +2,7 @@ import { definePluginEntry } from "openclaw/plugin-sdk/core";
import { createProviderApiKeyAuthMethod } from "openclaw/plugin-sdk/provider-auth";
import { buildSingleProviderApiKeyCatalog } from "openclaw/plugin-sdk/provider-catalog";
import { PROVIDER_LABELS } from "openclaw/plugin-sdk/provider-usage";
import { xiaomiMediaUnderstandingProvider } from "./media-understanding-provider.js";
import { applyXiaomiConfig, XIAOMI_DEFAULT_MODEL_REF } from "./onboard.js";
import { buildXiaomiProvider } from "./provider-catalog.js";
@ -60,5 +61,6 @@ export default definePluginEntry({
windows: [],
}),
});
api.registerMediaUnderstandingProvider(xiaomiMediaUnderstandingProvider);
},
});

View File

@ -0,0 +1,122 @@
import {
describeImageWithModel,
describeImagesWithModel,
type MediaUnderstandingProvider,
type VideoDescriptionRequest,
type VideoDescriptionResult,
assertOkOrThrowHttpError,
normalizeBaseUrl,
postJsonRequest,
} from "openclaw/plugin-sdk/media-understanding";
export const DEFAULT_XIAOMI_VIDEO_BASE_URL = "https://api.xiaomimimo.com/v1";
const DEFAULT_XIAOMI_VIDEO_MODEL = "mimo-v2-omni";
const DEFAULT_XIAOMI_VIDEO_PROMPT = "Describe the video.";
type XiaomiVideoPayload = {
choices?: Array<{
message?: {
content?: string | Array<{ text?: string }>;
reasoning_content?: string;
};
}>;
};
function resolveModel(model?: string): string {
const trimmed = model?.trim();
return trimmed || DEFAULT_XIAOMI_VIDEO_MODEL;
}
function resolvePrompt(prompt?: string): string {
const trimmed = prompt?.trim();
return trimmed || DEFAULT_XIAOMI_VIDEO_PROMPT;
}
function coerceResponseText(payload: XiaomiVideoPayload): string | null {
const message = payload.choices?.[0]?.message;
if (!message) {
return null;
}
if (typeof message.content === "string" && message.content.trim()) {
return message.content.trim();
}
if (Array.isArray(message.content)) {
const text = message.content
.map((part) => (typeof part.text === "string" ? part.text.trim() : ""))
.filter(Boolean)
.join("\n")
.trim();
if (text) {
return text;
}
}
if (typeof message.reasoning_content === "string" && message.reasoning_content.trim()) {
return message.reasoning_content.trim();
}
return null;
}
export async function describeXiaomiVideo(
params: VideoDescriptionRequest,
): Promise<VideoDescriptionResult> {
const fetchFn = params.fetchFn ?? fetch;
const baseUrl = normalizeBaseUrl(params.baseUrl, DEFAULT_XIAOMI_VIDEO_BASE_URL);
const model = resolveModel(params.model);
const mime = params.mime ?? "video/mp4";
const prompt = resolvePrompt(params.prompt);
const url = `${baseUrl}/chat/completions`;
const headers = new Headers(params.headers);
if (!headers.has("content-type")) {
headers.set("content-type", "application/json");
}
if (!headers.has("authorization")) {
headers.set("authorization", `Bearer ${params.apiKey}`);
}
const body = {
model,
messages: [
{
role: "user",
content: [
{ type: "text", text: prompt },
{
type: "video_url",
video_url: {
url: `data:${mime};base64,${params.buffer.toString("base64")}`,
},
},
],
},
],
};
const { response: res, release } = await postJsonRequest({
url,
headers,
body,
timeoutMs: params.timeoutMs,
fetchFn,
});
try {
await assertOkOrThrowHttpError(res, "Xiaomi video description failed");
const payload = (await res.json()) as XiaomiVideoPayload;
const text = coerceResponseText(payload);
if (!text) {
throw new Error("Xiaomi video description response missing content");
}
return { text, model };
} finally {
await release();
}
}
export const xiaomiMediaUnderstandingProvider: MediaUnderstandingProvider = {
id: "xiaomi",
capabilities: ["image", "video"],
describeImage: describeImageWithModel,
describeImages: describeImagesWithModel,
describeVideo: describeXiaomiVideo,
};