Merge d4cb9170923021b3bad69517ce90fb40a7b786c2 into 9fb78453e088cd7b553d7779faa0de5c83708e70

This commit is contained in:
Jinhao Dong 2026-03-20 22:20:19 -07:00 committed by GitHub
commit 0e1e4694f6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 124 additions and 0 deletions

View File

@ -2,6 +2,7 @@ import { definePluginEntry } from "openclaw/plugin-sdk/core";
import { createProviderApiKeyAuthMethod } from "openclaw/plugin-sdk/provider-auth";
import { buildSingleProviderApiKeyCatalog } from "openclaw/plugin-sdk/provider-catalog";
import { PROVIDER_LABELS } from "openclaw/plugin-sdk/provider-usage";
import { xiaomiMediaUnderstandingProvider } from "./media-understanding-provider.js";
import { applyXiaomiConfig, XIAOMI_DEFAULT_MODEL_REF } from "./onboard.js";
import { buildXiaomiProvider } from "./provider-catalog.js";
@ -60,5 +61,6 @@ export default definePluginEntry({
windows: [],
}),
});
api.registerMediaUnderstandingProvider(xiaomiMediaUnderstandingProvider);
},
});

View File

@ -0,0 +1,122 @@
import {
describeImageWithModel,
describeImagesWithModel,
type MediaUnderstandingProvider,
type VideoDescriptionRequest,
type VideoDescriptionResult,
assertOkOrThrowHttpError,
normalizeBaseUrl,
postJsonRequest,
} from "openclaw/plugin-sdk/media-understanding";
export const DEFAULT_XIAOMI_VIDEO_BASE_URL = "https://api.xiaomimimo.com/v1";
const DEFAULT_XIAOMI_VIDEO_MODEL = "mimo-v2-omni";
const DEFAULT_XIAOMI_VIDEO_PROMPT = "Describe the video.";
type XiaomiVideoPayload = {
choices?: Array<{
message?: {
content?: string | Array<{ text?: string }>;
reasoning_content?: string;
};
}>;
};
function resolveModel(model?: string): string {
const trimmed = model?.trim();
return trimmed || DEFAULT_XIAOMI_VIDEO_MODEL;
}
function resolvePrompt(prompt?: string): string {
const trimmed = prompt?.trim();
return trimmed || DEFAULT_XIAOMI_VIDEO_PROMPT;
}
function coerceResponseText(payload: XiaomiVideoPayload): string | null {
const message = payload.choices?.[0]?.message;
if (!message) {
return null;
}
if (typeof message.content === "string" && message.content.trim()) {
return message.content.trim();
}
if (Array.isArray(message.content)) {
const text = message.content
.map((part) => (typeof part.text === "string" ? part.text.trim() : ""))
.filter(Boolean)
.join("\n")
.trim();
if (text) {
return text;
}
}
if (typeof message.reasoning_content === "string" && message.reasoning_content.trim()) {
return message.reasoning_content.trim();
}
return null;
}
export async function describeXiaomiVideo(
params: VideoDescriptionRequest,
): Promise<VideoDescriptionResult> {
const fetchFn = params.fetchFn ?? fetch;
const baseUrl = normalizeBaseUrl(params.baseUrl, DEFAULT_XIAOMI_VIDEO_BASE_URL);
const model = resolveModel(params.model);
const mime = params.mime ?? "video/mp4";
const prompt = resolvePrompt(params.prompt);
const url = `${baseUrl}/chat/completions`;
const headers = new Headers(params.headers);
if (!headers.has("content-type")) {
headers.set("content-type", "application/json");
}
if (!headers.has("authorization")) {
headers.set("authorization", `Bearer ${params.apiKey}`);
}
const body = {
model,
messages: [
{
role: "user",
content: [
{ type: "text", text: prompt },
{
type: "video_url",
video_url: {
url: `data:${mime};base64,${params.buffer.toString("base64")}`,
},
},
],
},
],
};
const { response: res, release } = await postJsonRequest({
url,
headers,
body,
timeoutMs: params.timeoutMs,
fetchFn,
});
try {
await assertOkOrThrowHttpError(res, "Xiaomi video description failed");
const payload = (await res.json()) as XiaomiVideoPayload;
const text = coerceResponseText(payload);
if (!text) {
throw new Error("Xiaomi video description response missing content");
}
return { text, model };
} finally {
await release();
}
}
export const xiaomiMediaUnderstandingProvider: MediaUnderstandingProvider = {
id: "xiaomi",
capabilities: ["image", "video"],
describeImage: describeImageWithModel,
describeImages: describeImagesWithModel,
describeVideo: describeXiaomiVideo,
};