feat(image-generation): add image_generate tool
This commit is contained in:
parent
916db21fe5
commit
3a456678ee
39
src/agents/openclaw-tools.image-generation.test.ts
Normal file
39
src/agents/openclaw-tools.image-generation.test.ts
Normal file
@ -0,0 +1,39 @@
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
import { createOpenClawTools } from "./openclaw-tools.js";
|
||||
|
||||
vi.mock("../plugins/tools.js", () => ({
|
||||
resolvePluginTools: () => [],
|
||||
}));
|
||||
|
||||
function asConfig(value: unknown): OpenClawConfig {
|
||||
return value as OpenClawConfig;
|
||||
}
|
||||
|
||||
describe("openclaw tools image generation registration", () => {
|
||||
it("registers image_generate when image-generation config is present", () => {
|
||||
const tools = createOpenClawTools({
|
||||
config: asConfig({
|
||||
agents: {
|
||||
defaults: {
|
||||
imageGenerationModel: {
|
||||
primary: "openai/gpt-image-1",
|
||||
},
|
||||
},
|
||||
},
|
||||
}),
|
||||
agentDir: "/tmp/openclaw-agent-main",
|
||||
});
|
||||
|
||||
expect(tools.map((tool) => tool.name)).toContain("image_generate");
|
||||
});
|
||||
|
||||
it("omits image_generate when image-generation config is absent", () => {
|
||||
const tools = createOpenClawTools({
|
||||
config: asConfig({}),
|
||||
agentDir: "/tmp/openclaw-agent-main",
|
||||
});
|
||||
|
||||
expect(tools.map((tool) => tool.name)).not.toContain("image_generate");
|
||||
});
|
||||
});
|
||||
@ -12,6 +12,7 @@ import { createCanvasTool } from "./tools/canvas-tool.js";
|
||||
import type { AnyAgentTool } from "./tools/common.js";
|
||||
import { createCronTool } from "./tools/cron-tool.js";
|
||||
import { createGatewayTool } from "./tools/gateway-tool.js";
|
||||
import { createImageGenerateTool } from "./tools/image-generate-tool.js";
|
||||
import { createImageTool } from "./tools/image-tool.js";
|
||||
import { createMessageTool } from "./tools/message-tool.js";
|
||||
import { createNodesTool } from "./tools/nodes-tool.js";
|
||||
@ -103,6 +104,13 @@ export function createOpenClawTools(
|
||||
modelHasVision: options?.modelHasVision,
|
||||
})
|
||||
: null;
|
||||
const imageGenerateTool = createImageGenerateTool({
|
||||
config: options?.config,
|
||||
agentDir: options?.agentDir,
|
||||
workspaceDir,
|
||||
sandbox,
|
||||
fsPolicy: options?.fsPolicy,
|
||||
});
|
||||
const pdfTool = options?.agentDir?.trim()
|
||||
? createPdfTool({
|
||||
config: options?.config,
|
||||
@ -163,6 +171,7 @@ export function createOpenClawTools(
|
||||
agentChannel: options?.agentChannel,
|
||||
config: options?.config,
|
||||
}),
|
||||
...(imageGenerateTool ? [imageGenerateTool] : []),
|
||||
createGatewayTool({
|
||||
agentSessionKey: options?.agentSessionKey,
|
||||
config: options?.config,
|
||||
|
||||
@ -1,5 +1,8 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { extractToolResultMediaPaths } from "./pi-embedded-subscribe.tools.js";
|
||||
import {
|
||||
extractToolResultMediaPaths,
|
||||
isToolResultMediaTrusted,
|
||||
} from "./pi-embedded-subscribe.tools.js";
|
||||
|
||||
describe("extractToolResultMediaPaths", () => {
|
||||
it("returns empty array for null/undefined", () => {
|
||||
@ -229,4 +232,8 @@ describe("extractToolResultMediaPaths", () => {
|
||||
};
|
||||
expect(extractToolResultMediaPaths(result)).toEqual(["/tmp/page1.png", "/tmp/page2.png"]);
|
||||
});
|
||||
|
||||
it("trusts image_generate local MEDIA paths", () => {
|
||||
expect(isToolResultMediaTrusted("image_generate")).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
@ -142,6 +142,7 @@ const TRUSTED_TOOL_RESULT_MEDIA = new Set([
|
||||
"exec",
|
||||
"gateway",
|
||||
"image",
|
||||
"image_generate",
|
||||
"memory_get",
|
||||
"memory_search",
|
||||
"message",
|
||||
|
||||
@ -268,6 +268,7 @@ export function buildAgentSystemPrompt(params: {
|
||||
session_status:
|
||||
"Show a /status-equivalent status card (usage + time + Reasoning/Verbose/Elevated); use for model-use questions (📊 session_status); optional per-session model override",
|
||||
image: "Analyze an image with the configured image model",
|
||||
image_generate: "Generate images with the configured image-generation model",
|
||||
};
|
||||
|
||||
const toolOrder = [
|
||||
@ -295,6 +296,7 @@ export function buildAgentSystemPrompt(params: {
|
||||
"subagents",
|
||||
"session_status",
|
||||
"image",
|
||||
"image_generate",
|
||||
];
|
||||
|
||||
const rawToolNames = (params.toolNames ?? []).map((tool) => tool.trim());
|
||||
|
||||
@ -7,5 +7,6 @@ describe("tool-catalog", () => {
|
||||
expect(policy).toBeDefined();
|
||||
expect(policy!.allow).toContain("web_search");
|
||||
expect(policy!.allow).toContain("web_fetch");
|
||||
expect(policy!.allow).toContain("image_generate");
|
||||
});
|
||||
});
|
||||
|
||||
@ -233,6 +233,14 @@ const CORE_TOOL_DEFINITIONS: CoreToolDefinition[] = [
|
||||
profiles: ["coding"],
|
||||
includeInOpenClawGroup: true,
|
||||
},
|
||||
{
|
||||
id: "image_generate",
|
||||
label: "image_generate",
|
||||
description: "Image generation",
|
||||
sectionId: "media",
|
||||
profiles: ["coding"],
|
||||
includeInOpenClawGroup: true,
|
||||
},
|
||||
{
|
||||
id: "tts",
|
||||
label: "tts",
|
||||
|
||||
280
src/agents/tools/image-generate-tool.test.ts
Normal file
280
src/agents/tools/image-generate-tool.test.ts
Normal file
@ -0,0 +1,280 @@
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import * as imageGenerationRuntime from "../../image-generation/runtime.js";
|
||||
import * as imageOps from "../../media/image-ops.js";
|
||||
import * as mediaStore from "../../media/store.js";
|
||||
import * as webMedia from "../../plugin-sdk/web-media.js";
|
||||
import { createImageGenerateTool } from "./image-generate-tool.js";
|
||||
|
||||
describe("createImageGenerateTool", () => {
|
||||
afterEach(() => {
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
it("returns null when image-generation model is not configured", () => {
|
||||
expect(createImageGenerateTool({ config: {} })).toBeNull();
|
||||
});
|
||||
|
||||
it("generates images and returns MEDIA paths", async () => {
|
||||
const generateImage = vi.spyOn(imageGenerationRuntime, "generateImage").mockResolvedValue({
|
||||
provider: "openai",
|
||||
model: "gpt-image-1",
|
||||
attempts: [],
|
||||
images: [
|
||||
{
|
||||
buffer: Buffer.from("png-1"),
|
||||
mimeType: "image/png",
|
||||
fileName: "cat-one.png",
|
||||
},
|
||||
{
|
||||
buffer: Buffer.from("png-2"),
|
||||
mimeType: "image/png",
|
||||
fileName: "cat-two.png",
|
||||
revisedPrompt: "A more cinematic cat",
|
||||
},
|
||||
],
|
||||
});
|
||||
const saveMediaBuffer = vi.spyOn(mediaStore, "saveMediaBuffer");
|
||||
saveMediaBuffer.mockResolvedValueOnce({
|
||||
path: "/tmp/generated-1.png",
|
||||
id: "generated-1.png",
|
||||
size: 5,
|
||||
contentType: "image/png",
|
||||
});
|
||||
saveMediaBuffer.mockResolvedValueOnce({
|
||||
path: "/tmp/generated-2.png",
|
||||
id: "generated-2.png",
|
||||
size: 5,
|
||||
contentType: "image/png",
|
||||
});
|
||||
|
||||
const tool = createImageGenerateTool({
|
||||
config: {
|
||||
agents: {
|
||||
defaults: {
|
||||
imageGenerationModel: {
|
||||
primary: "openai/gpt-image-1",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
agentDir: "/tmp/agent",
|
||||
});
|
||||
|
||||
expect(tool).not.toBeNull();
|
||||
if (!tool) {
|
||||
throw new Error("expected image_generate tool");
|
||||
}
|
||||
|
||||
const result = await tool.execute("call-1", {
|
||||
prompt: "A cat wearing sunglasses",
|
||||
model: "openai/gpt-image-1",
|
||||
count: 2,
|
||||
size: "1024x1024",
|
||||
});
|
||||
|
||||
expect(generateImage).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
cfg: {
|
||||
agents: {
|
||||
defaults: {
|
||||
imageGenerationModel: {
|
||||
primary: "openai/gpt-image-1",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
prompt: "A cat wearing sunglasses",
|
||||
agentDir: "/tmp/agent",
|
||||
modelOverride: "openai/gpt-image-1",
|
||||
size: "1024x1024",
|
||||
count: 2,
|
||||
inputImages: [],
|
||||
}),
|
||||
);
|
||||
expect(saveMediaBuffer).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
Buffer.from("png-1"),
|
||||
"image/png",
|
||||
"tool-image-generation",
|
||||
undefined,
|
||||
"cat-one.png",
|
||||
);
|
||||
expect(saveMediaBuffer).toHaveBeenNthCalledWith(
|
||||
2,
|
||||
Buffer.from("png-2"),
|
||||
"image/png",
|
||||
"tool-image-generation",
|
||||
undefined,
|
||||
"cat-two.png",
|
||||
);
|
||||
expect(result).toMatchObject({
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: expect.stringContaining("Generated 2 images with openai/gpt-image-1."),
|
||||
},
|
||||
],
|
||||
details: {
|
||||
provider: "openai",
|
||||
model: "gpt-image-1",
|
||||
count: 2,
|
||||
paths: ["/tmp/generated-1.png", "/tmp/generated-2.png"],
|
||||
revisedPrompts: ["A more cinematic cat"],
|
||||
},
|
||||
});
|
||||
const text = (result.content?.[0] as { text: string } | undefined)?.text ?? "";
|
||||
expect(text).toContain("MEDIA:/tmp/generated-1.png");
|
||||
expect(text).toContain("MEDIA:/tmp/generated-2.png");
|
||||
});
|
||||
|
||||
it("rejects counts outside the supported range", async () => {
|
||||
const tool = createImageGenerateTool({
|
||||
config: {
|
||||
agents: {
|
||||
defaults: {
|
||||
imageGenerationModel: {
|
||||
primary: "google/gemini-3.1-flash-image-preview",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
expect(tool).not.toBeNull();
|
||||
if (!tool) {
|
||||
throw new Error("expected image_generate tool");
|
||||
}
|
||||
|
||||
await expect(tool.execute("call-2", { prompt: "too many cats", count: 5 })).rejects.toThrow(
|
||||
"count must be between 1 and 4",
|
||||
);
|
||||
});
|
||||
|
||||
it("forwards reference images and inferred resolution for edit mode", async () => {
|
||||
const generateImage = vi.spyOn(imageGenerationRuntime, "generateImage").mockResolvedValue({
|
||||
provider: "google",
|
||||
model: "gemini-3-pro-image-preview",
|
||||
attempts: [],
|
||||
images: [
|
||||
{
|
||||
buffer: Buffer.from("png-out"),
|
||||
mimeType: "image/png",
|
||||
fileName: "edited.png",
|
||||
},
|
||||
],
|
||||
});
|
||||
vi.spyOn(webMedia, "loadWebMedia").mockResolvedValue({
|
||||
kind: "image",
|
||||
buffer: Buffer.from("input-image"),
|
||||
contentType: "image/png",
|
||||
});
|
||||
vi.spyOn(imageOps, "getImageMetadata").mockResolvedValue({
|
||||
width: 3200,
|
||||
height: 1800,
|
||||
});
|
||||
vi.spyOn(mediaStore, "saveMediaBuffer").mockResolvedValue({
|
||||
path: "/tmp/edited.png",
|
||||
id: "edited.png",
|
||||
size: 7,
|
||||
contentType: "image/png",
|
||||
});
|
||||
|
||||
const tool = createImageGenerateTool({
|
||||
config: {
|
||||
agents: {
|
||||
defaults: {
|
||||
imageGenerationModel: {
|
||||
primary: "google/gemini-3-pro-image-preview",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
workspaceDir: process.cwd(),
|
||||
});
|
||||
|
||||
expect(tool).not.toBeNull();
|
||||
if (!tool) {
|
||||
throw new Error("expected image_generate tool");
|
||||
}
|
||||
|
||||
await tool.execute("call-edit", {
|
||||
prompt: "Add a dramatic stormy sky but keep everything else identical.",
|
||||
image: "./fixtures/reference.png",
|
||||
});
|
||||
|
||||
expect(generateImage).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
resolution: "4K",
|
||||
inputImages: [
|
||||
expect.objectContaining({
|
||||
buffer: Buffer.from("input-image"),
|
||||
mimeType: "image/png",
|
||||
}),
|
||||
],
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("lists registered provider and model options", async () => {
|
||||
vi.spyOn(imageGenerationRuntime, "listRuntimeImageGenerationProviders").mockReturnValue([
|
||||
{
|
||||
id: "google",
|
||||
defaultModel: "gemini-3.1-flash-image-preview",
|
||||
models: ["gemini-3.1-flash-image-preview", "gemini-3-pro-image-preview"],
|
||||
supportedResolutions: ["1K", "2K", "4K"],
|
||||
supportsImageEditing: true,
|
||||
generateImage: vi.fn(async () => {
|
||||
throw new Error("not used");
|
||||
}),
|
||||
},
|
||||
{
|
||||
id: "openai",
|
||||
defaultModel: "gpt-image-1",
|
||||
models: ["gpt-image-1"],
|
||||
supportedSizes: ["1024x1024", "1024x1536", "1536x1024"],
|
||||
supportsImageEditing: false,
|
||||
generateImage: vi.fn(async () => {
|
||||
throw new Error("not used");
|
||||
}),
|
||||
},
|
||||
]);
|
||||
|
||||
const tool = createImageGenerateTool({
|
||||
config: {
|
||||
agents: {
|
||||
defaults: {
|
||||
imageGenerationModel: {
|
||||
primary: "google/gemini-3.1-flash-image-preview",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(tool).not.toBeNull();
|
||||
if (!tool) {
|
||||
throw new Error("expected image_generate tool");
|
||||
}
|
||||
|
||||
const result = await tool.execute("call-list", { action: "list" });
|
||||
const text = (result.content?.[0] as { text: string } | undefined)?.text ?? "";
|
||||
|
||||
expect(text).toContain("google (default gemini-3.1-flash-image-preview)");
|
||||
expect(text).toContain("gemini-3.1-flash-image-preview");
|
||||
expect(text).toContain("gemini-3-pro-image-preview");
|
||||
expect(text).toContain("editing");
|
||||
expect(result).toMatchObject({
|
||||
details: {
|
||||
providers: expect.arrayContaining([
|
||||
expect.objectContaining({
|
||||
id: "google",
|
||||
defaultModel: "gemini-3.1-flash-image-preview",
|
||||
models: expect.arrayContaining([
|
||||
"gemini-3.1-flash-image-preview",
|
||||
"gemini-3-pro-image-preview",
|
||||
]),
|
||||
}),
|
||||
]),
|
||||
},
|
||||
});
|
||||
});
|
||||
});
|
||||
424
src/agents/tools/image-generate-tool.ts
Normal file
424
src/agents/tools/image-generate-tool.ts
Normal file
@ -0,0 +1,424 @@
|
||||
import { Type } from "@sinclair/typebox";
|
||||
import type { OpenClawConfig } from "../../config/config.js";
|
||||
import { loadConfig } from "../../config/config.js";
|
||||
import {
|
||||
generateImage,
|
||||
listRuntimeImageGenerationProviders,
|
||||
} from "../../image-generation/runtime.js";
|
||||
import type {
|
||||
ImageGenerationResolution,
|
||||
ImageGenerationSourceImage,
|
||||
} from "../../image-generation/types.js";
|
||||
import { getImageMetadata } from "../../media/image-ops.js";
|
||||
import { saveMediaBuffer } from "../../media/store.js";
|
||||
import { loadWebMedia } from "../../plugin-sdk/web-media.js";
|
||||
import { resolveUserPath } from "../../utils.js";
|
||||
import { ToolInputError, readNumberParam, readStringParam } from "./common.js";
|
||||
import { decodeDataUrl } from "./image-tool.helpers.js";
|
||||
import { resolveMediaToolLocalRoots } from "./media-tool-shared.js";
|
||||
import {
|
||||
createSandboxBridgeReadFile,
|
||||
resolveSandboxedBridgeMediaPath,
|
||||
type AnyAgentTool,
|
||||
type SandboxFsBridge,
|
||||
type ToolFsPolicy,
|
||||
} from "./tool-runtime.helpers.js";
|
||||
|
||||
const DEFAULT_COUNT = 1;
|
||||
const MAX_COUNT = 4;
|
||||
const MAX_INPUT_IMAGES = 4;
|
||||
const DEFAULT_RESOLUTION: ImageGenerationResolution = "1K";
|
||||
|
||||
const ImageGenerateToolSchema = Type.Object({
|
||||
action: Type.Optional(
|
||||
Type.String({
|
||||
description:
|
||||
'Optional action: "generate" (default) or "list" to inspect available providers/models.',
|
||||
}),
|
||||
),
|
||||
prompt: Type.Optional(Type.String({ description: "Image generation prompt." })),
|
||||
image: Type.Optional(
|
||||
Type.String({
|
||||
description: "Optional reference image path or URL for edit mode.",
|
||||
}),
|
||||
),
|
||||
images: Type.Optional(
|
||||
Type.Array(Type.String(), {
|
||||
description: `Optional reference images for edit mode (up to ${MAX_INPUT_IMAGES}).`,
|
||||
}),
|
||||
),
|
||||
model: Type.Optional(
|
||||
Type.String({ description: "Optional provider/model override, e.g. openai/gpt-image-1." }),
|
||||
),
|
||||
size: Type.Optional(
|
||||
Type.String({
|
||||
description:
|
||||
"Optional size hint like 1024x1024, 1536x1024, 1024x1536, 1024x1792, or 1792x1024.",
|
||||
}),
|
||||
),
|
||||
resolution: Type.Optional(
|
||||
Type.String({
|
||||
description:
|
||||
"Optional resolution hint: 1K, 2K, or 4K. Useful for Google edit/generation flows.",
|
||||
}),
|
||||
),
|
||||
count: Type.Optional(
|
||||
Type.Number({
|
||||
description: `Optional number of images to request (1-${MAX_COUNT}).`,
|
||||
minimum: 1,
|
||||
maximum: MAX_COUNT,
|
||||
}),
|
||||
),
|
||||
});
|
||||
|
||||
function hasConfiguredImageGenerationModel(cfg: OpenClawConfig): boolean {
|
||||
const configured = cfg.agents?.defaults?.imageGenerationModel;
|
||||
if (typeof configured === "string") {
|
||||
return configured.trim().length > 0;
|
||||
}
|
||||
if (configured?.primary?.trim()) {
|
||||
return true;
|
||||
}
|
||||
return (configured?.fallbacks ?? []).some((entry) => entry.trim().length > 0);
|
||||
}
|
||||
|
||||
function resolveAction(args: Record<string, unknown>): "generate" | "list" {
|
||||
const raw = readStringParam(args, "action");
|
||||
if (!raw) {
|
||||
return "generate";
|
||||
}
|
||||
const normalized = raw.trim().toLowerCase();
|
||||
if (normalized === "generate" || normalized === "list") {
|
||||
return normalized;
|
||||
}
|
||||
throw new ToolInputError('action must be "generate" or "list"');
|
||||
}
|
||||
|
||||
function resolveRequestedCount(args: Record<string, unknown>): number {
|
||||
const count = readNumberParam(args, "count", { integer: true });
|
||||
if (count === undefined) {
|
||||
return DEFAULT_COUNT;
|
||||
}
|
||||
if (count < 1 || count > MAX_COUNT) {
|
||||
throw new ToolInputError(`count must be between 1 and ${MAX_COUNT}`);
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
function normalizeResolution(raw: string | undefined): ImageGenerationResolution | undefined {
|
||||
const normalized = raw?.trim().toUpperCase();
|
||||
if (!normalized) {
|
||||
return undefined;
|
||||
}
|
||||
if (normalized === "1K" || normalized === "2K" || normalized === "4K") {
|
||||
return normalized;
|
||||
}
|
||||
throw new ToolInputError("resolution must be one of 1K, 2K, or 4K");
|
||||
}
|
||||
|
||||
function normalizeReferenceImages(args: Record<string, unknown>): string[] {
|
||||
const imageCandidates: string[] = [];
|
||||
if (typeof args.image === "string") {
|
||||
imageCandidates.push(args.image);
|
||||
}
|
||||
if (Array.isArray(args.images)) {
|
||||
imageCandidates.push(
|
||||
...args.images.filter((value): value is string => typeof value === "string"),
|
||||
);
|
||||
}
|
||||
|
||||
const seen = new Set<string>();
|
||||
const normalized: string[] = [];
|
||||
for (const candidate of imageCandidates) {
|
||||
const trimmed = candidate.trim();
|
||||
const dedupe = trimmed.startsWith("@") ? trimmed.slice(1).trim() : trimmed;
|
||||
if (!dedupe || seen.has(dedupe)) {
|
||||
continue;
|
||||
}
|
||||
seen.add(dedupe);
|
||||
normalized.push(trimmed);
|
||||
}
|
||||
if (normalized.length > MAX_INPUT_IMAGES) {
|
||||
throw new ToolInputError(
|
||||
`Too many reference images: ${normalized.length} provided, maximum is ${MAX_INPUT_IMAGES}.`,
|
||||
);
|
||||
}
|
||||
return normalized;
|
||||
}
|
||||
|
||||
type ImageGenerateSandboxConfig = {
|
||||
root: string;
|
||||
bridge: SandboxFsBridge;
|
||||
};
|
||||
|
||||
async function loadReferenceImages(params: {
|
||||
imageInputs: string[];
|
||||
maxBytes?: number;
|
||||
localRoots: string[];
|
||||
sandboxConfig: { root: string; bridge: SandboxFsBridge; workspaceOnly: boolean } | null;
|
||||
}): Promise<
|
||||
Array<{
|
||||
sourceImage: ImageGenerationSourceImage;
|
||||
resolvedImage: string;
|
||||
rewrittenFrom?: string;
|
||||
}>
|
||||
> {
|
||||
const loaded: Array<{
|
||||
sourceImage: ImageGenerationSourceImage;
|
||||
resolvedImage: string;
|
||||
rewrittenFrom?: string;
|
||||
}> = [];
|
||||
|
||||
for (const imageRawInput of params.imageInputs) {
|
||||
const trimmed = imageRawInput.trim();
|
||||
const imageRaw = trimmed.startsWith("@") ? trimmed.slice(1).trim() : trimmed;
|
||||
if (!imageRaw) {
|
||||
throw new ToolInputError("image required (empty string in array)");
|
||||
}
|
||||
const looksLikeWindowsDrivePath = /^[a-zA-Z]:[\\/]/.test(imageRaw);
|
||||
const hasScheme = /^[a-z][a-z0-9+.-]*:/i.test(imageRaw);
|
||||
const isFileUrl = /^file:/i.test(imageRaw);
|
||||
const isHttpUrl = /^https?:\/\//i.test(imageRaw);
|
||||
const isDataUrl = /^data:/i.test(imageRaw);
|
||||
if (hasScheme && !looksLikeWindowsDrivePath && !isFileUrl && !isHttpUrl && !isDataUrl) {
|
||||
throw new ToolInputError(
|
||||
`Unsupported image reference: ${imageRawInput}. Use a file path, a file:// URL, a data: URL, or an http(s) URL.`,
|
||||
);
|
||||
}
|
||||
if (params.sandboxConfig && isHttpUrl) {
|
||||
throw new ToolInputError("Sandboxed image_generate does not allow remote URLs.");
|
||||
}
|
||||
|
||||
const resolvedImage = (() => {
|
||||
if (params.sandboxConfig) {
|
||||
return imageRaw;
|
||||
}
|
||||
if (imageRaw.startsWith("~")) {
|
||||
return resolveUserPath(imageRaw);
|
||||
}
|
||||
return imageRaw;
|
||||
})();
|
||||
|
||||
const resolvedPathInfo: { resolved: string; rewrittenFrom?: string } = isDataUrl
|
||||
? { resolved: "" }
|
||||
: params.sandboxConfig
|
||||
? await resolveSandboxedBridgeMediaPath({
|
||||
sandbox: params.sandboxConfig,
|
||||
mediaPath: resolvedImage,
|
||||
inboundFallbackDir: "media/inbound",
|
||||
})
|
||||
: {
|
||||
resolved: resolvedImage.startsWith("file://")
|
||||
? resolvedImage.slice("file://".length)
|
||||
: resolvedImage,
|
||||
};
|
||||
const resolvedPath = isDataUrl ? null : resolvedPathInfo.resolved;
|
||||
|
||||
const media = isDataUrl
|
||||
? decodeDataUrl(resolvedImage)
|
||||
: params.sandboxConfig
|
||||
? await loadWebMedia(resolvedPath ?? resolvedImage, {
|
||||
maxBytes: params.maxBytes,
|
||||
sandboxValidated: true,
|
||||
readFile: createSandboxBridgeReadFile({ sandbox: params.sandboxConfig }),
|
||||
})
|
||||
: await loadWebMedia(resolvedPath ?? resolvedImage, {
|
||||
maxBytes: params.maxBytes,
|
||||
localRoots: params.localRoots,
|
||||
});
|
||||
if (media.kind !== "image") {
|
||||
throw new ToolInputError(`Unsupported media type: ${media.kind}`);
|
||||
}
|
||||
|
||||
const mimeType =
|
||||
("contentType" in media && media.contentType) ||
|
||||
("mimeType" in media && media.mimeType) ||
|
||||
"image/png";
|
||||
|
||||
loaded.push({
|
||||
sourceImage: {
|
||||
buffer: media.buffer,
|
||||
mimeType,
|
||||
},
|
||||
resolvedImage,
|
||||
...(resolvedPathInfo.rewrittenFrom ? { rewrittenFrom: resolvedPathInfo.rewrittenFrom } : {}),
|
||||
});
|
||||
}
|
||||
|
||||
return loaded;
|
||||
}
|
||||
|
||||
async function inferResolutionFromInputImages(
|
||||
images: ImageGenerationSourceImage[],
|
||||
): Promise<ImageGenerationResolution> {
|
||||
let maxDimension = 0;
|
||||
for (const image of images) {
|
||||
const meta = await getImageMetadata(image.buffer);
|
||||
const dimension = Math.max(meta?.width ?? 0, meta?.height ?? 0);
|
||||
maxDimension = Math.max(maxDimension, dimension);
|
||||
}
|
||||
if (maxDimension >= 3000) {
|
||||
return "4K";
|
||||
}
|
||||
if (maxDimension >= 1500) {
|
||||
return "2K";
|
||||
}
|
||||
return DEFAULT_RESOLUTION;
|
||||
}
|
||||
|
||||
export function createImageGenerateTool(options?: {
|
||||
config?: OpenClawConfig;
|
||||
agentDir?: string;
|
||||
workspaceDir?: string;
|
||||
sandbox?: ImageGenerateSandboxConfig;
|
||||
fsPolicy?: ToolFsPolicy;
|
||||
}): AnyAgentTool | null {
|
||||
const cfg = options?.config ?? loadConfig();
|
||||
if (!hasConfiguredImageGenerationModel(cfg)) {
|
||||
return null;
|
||||
}
|
||||
const localRoots = resolveMediaToolLocalRoots(options?.workspaceDir, {
|
||||
workspaceOnly: options?.fsPolicy?.workspaceOnly === true,
|
||||
});
|
||||
const sandboxConfig =
|
||||
options?.sandbox && options.sandbox.root.trim()
|
||||
? {
|
||||
root: options.sandbox.root.trim(),
|
||||
bridge: options.sandbox.bridge,
|
||||
workspaceOnly: options.fsPolicy?.workspaceOnly === true,
|
||||
}
|
||||
: null;
|
||||
|
||||
return {
|
||||
label: "Image Generation",
|
||||
name: "image_generate",
|
||||
description:
|
||||
'Generate new images or edit reference images with the configured image-generation model. Use action="list" to inspect available providers/models. Generated images are delivered automatically from the tool result as MEDIA paths.',
|
||||
parameters: ImageGenerateToolSchema,
|
||||
execute: async (_toolCallId, args) => {
|
||||
const params = args as Record<string, unknown>;
|
||||
const action = resolveAction(params);
|
||||
if (action === "list") {
|
||||
const providers = listRuntimeImageGenerationProviders({ config: cfg }).map((provider) => ({
|
||||
id: provider.id,
|
||||
...(provider.label ? { label: provider.label } : {}),
|
||||
...(provider.defaultModel ? { defaultModel: provider.defaultModel } : {}),
|
||||
models: provider.models ?? (provider.defaultModel ? [provider.defaultModel] : []),
|
||||
...(provider.supportedSizes ? { supportedSizes: [...provider.supportedSizes] } : {}),
|
||||
...(provider.supportedResolutions
|
||||
? { supportedResolutions: [...provider.supportedResolutions] }
|
||||
: {}),
|
||||
...(typeof provider.supportsImageEditing === "boolean"
|
||||
? { supportsImageEditing: provider.supportsImageEditing }
|
||||
: {}),
|
||||
}));
|
||||
const lines = providers.flatMap((provider) => {
|
||||
const caps: string[] = [];
|
||||
if (provider.supportsImageEditing) {
|
||||
caps.push("editing");
|
||||
}
|
||||
if ((provider.supportedResolutions?.length ?? 0) > 0) {
|
||||
caps.push(`resolutions ${provider.supportedResolutions?.join("/")}`);
|
||||
}
|
||||
if ((provider.supportedSizes?.length ?? 0) > 0) {
|
||||
caps.push(`sizes ${provider.supportedSizes?.join(", ")}`);
|
||||
}
|
||||
const modelLine =
|
||||
provider.models.length > 0
|
||||
? `models: ${provider.models.join(", ")}`
|
||||
: "models: unknown";
|
||||
return [
|
||||
`${provider.id}${provider.defaultModel ? ` (default ${provider.defaultModel})` : ""}`,
|
||||
` ${modelLine}`,
|
||||
...(caps.length > 0 ? [` capabilities: ${caps.join("; ")}`] : []),
|
||||
];
|
||||
});
|
||||
return {
|
||||
content: [{ type: "text", text: lines.join("\n") }],
|
||||
details: { providers },
|
||||
};
|
||||
}
|
||||
|
||||
const prompt = readStringParam(params, "prompt", { required: true });
|
||||
const imageInputs = normalizeReferenceImages(params);
|
||||
const model = readStringParam(params, "model");
|
||||
const size = readStringParam(params, "size");
|
||||
const explicitResolution = normalizeResolution(readStringParam(params, "resolution"));
|
||||
const count = resolveRequestedCount(params);
|
||||
const loadedReferenceImages = await loadReferenceImages({
|
||||
imageInputs,
|
||||
localRoots,
|
||||
sandboxConfig,
|
||||
});
|
||||
const inputImages = loadedReferenceImages.map((entry) => entry.sourceImage);
|
||||
const resolution =
|
||||
explicitResolution ??
|
||||
(size
|
||||
? undefined
|
||||
: inputImages.length > 0
|
||||
? await inferResolutionFromInputImages(inputImages)
|
||||
: undefined);
|
||||
|
||||
const result = await generateImage({
|
||||
cfg,
|
||||
prompt,
|
||||
agentDir: options?.agentDir,
|
||||
modelOverride: model,
|
||||
size,
|
||||
resolution,
|
||||
count,
|
||||
inputImages,
|
||||
});
|
||||
|
||||
const savedImages = await Promise.all(
|
||||
result.images.map((image) =>
|
||||
saveMediaBuffer(
|
||||
image.buffer,
|
||||
image.mimeType,
|
||||
"tool-image-generation",
|
||||
undefined,
|
||||
image.fileName,
|
||||
),
|
||||
),
|
||||
);
|
||||
|
||||
const revisedPrompts = result.images
|
||||
.map((image) => image.revisedPrompt?.trim())
|
||||
.filter((entry): entry is string => Boolean(entry));
|
||||
const lines = [
|
||||
`Generated ${savedImages.length} image${savedImages.length === 1 ? "" : "s"} with ${result.provider}/${result.model}.`,
|
||||
...savedImages.map((image) => `MEDIA:${image.path}`),
|
||||
];
|
||||
|
||||
return {
|
||||
content: [{ type: "text", text: lines.join("\n") }],
|
||||
details: {
|
||||
provider: result.provider,
|
||||
model: result.model,
|
||||
count: savedImages.length,
|
||||
paths: savedImages.map((image) => image.path),
|
||||
...(imageInputs.length === 1
|
||||
? {
|
||||
image: loadedReferenceImages[0]?.resolvedImage,
|
||||
...(loadedReferenceImages[0]?.rewrittenFrom
|
||||
? { rewrittenFrom: loadedReferenceImages[0].rewrittenFrom }
|
||||
: {}),
|
||||
}
|
||||
: imageInputs.length > 1
|
||||
? {
|
||||
images: loadedReferenceImages.map((entry) => ({
|
||||
image: entry.resolvedImage,
|
||||
...(entry.rewrittenFrom ? { rewrittenFrom: entry.rewrittenFrom } : {}),
|
||||
})),
|
||||
}
|
||||
: {}),
|
||||
...(resolution ? { resolution } : {}),
|
||||
...(size ? { size } : {}),
|
||||
attempts: result.attempts,
|
||||
metadata: result.metadata,
|
||||
...(revisedPrompts.length > 0 ? { revisedPrompts } : {}),
|
||||
},
|
||||
};
|
||||
},
|
||||
};
|
||||
}
|
||||
@ -131,4 +131,78 @@ describe("Google image-generation provider", () => {
|
||||
model: "gemini-3.1-flash-image-preview",
|
||||
});
|
||||
});
|
||||
|
||||
it("sends reference images and explicit resolution for edit flows", async () => {
|
||||
vi.spyOn(modelAuth, "resolveApiKeyForProvider").mockResolvedValue({
|
||||
apiKey: "google-test-key",
|
||||
source: "env",
|
||||
mode: "api-key",
|
||||
});
|
||||
const fetchMock = vi.fn().mockResolvedValue({
|
||||
ok: true,
|
||||
json: async () => ({
|
||||
candidates: [
|
||||
{
|
||||
content: {
|
||||
parts: [
|
||||
{
|
||||
inlineData: {
|
||||
mimeType: "image/png",
|
||||
data: Buffer.from("png-data").toString("base64"),
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
],
|
||||
}),
|
||||
});
|
||||
vi.stubGlobal("fetch", fetchMock);
|
||||
|
||||
const provider = buildGoogleImageGenerationProvider();
|
||||
await provider.generateImage({
|
||||
provider: "google",
|
||||
model: "gemini-3-pro-image-preview",
|
||||
prompt: "Change only the sky to a sunset.",
|
||||
cfg: {},
|
||||
resolution: "4K",
|
||||
inputImages: [
|
||||
{
|
||||
buffer: Buffer.from("reference-bytes"),
|
||||
mimeType: "image/png",
|
||||
fileName: "reference.png",
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
expect(fetchMock).toHaveBeenCalledWith(
|
||||
"https://generativelanguage.googleapis.com/v1beta/models/gemini-3-pro-image-preview:generateContent",
|
||||
expect.objectContaining({
|
||||
method: "POST",
|
||||
body: JSON.stringify({
|
||||
contents: [
|
||||
{
|
||||
role: "user",
|
||||
parts: [
|
||||
{
|
||||
inlineData: {
|
||||
mimeType: "image/png",
|
||||
data: Buffer.from("reference-bytes").toString("base64"),
|
||||
},
|
||||
},
|
||||
{ text: "Change only the sky to a sunset." },
|
||||
],
|
||||
},
|
||||
],
|
||||
generationConfig: {
|
||||
responseModalities: ["TEXT", "IMAGE"],
|
||||
imageConfig: {
|
||||
aspectRatio: "1:1",
|
||||
imageSize: "4K",
|
||||
},
|
||||
},
|
||||
}),
|
||||
}),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
@ -79,11 +79,16 @@ export function buildGoogleImageGenerationProvider(): ImageGenerationProviderPlu
|
||||
return {
|
||||
id: "google",
|
||||
label: "Google",
|
||||
defaultModel: DEFAULT_GOOGLE_IMAGE_MODEL,
|
||||
models: [DEFAULT_GOOGLE_IMAGE_MODEL, "gemini-3-pro-image-preview"],
|
||||
supportedResolutions: ["1K", "2K", "4K"],
|
||||
supportsImageEditing: true,
|
||||
async generateImage(req) {
|
||||
const auth = await resolveApiKeyForProvider({
|
||||
provider: "google",
|
||||
cfg: req.cfg,
|
||||
agentDir: req.agentDir,
|
||||
store: req.authStore,
|
||||
});
|
||||
if (!auth.apiKey) {
|
||||
throw new Error("Google API key missing");
|
||||
@ -98,6 +103,16 @@ export function buildGoogleImageGenerationProvider(): ImageGenerationProviderPlu
|
||||
const authHeaders = parseGeminiAuth(auth.apiKey);
|
||||
const headers = new Headers(authHeaders.headers);
|
||||
const imageConfig = mapSizeToImageConfig(req.size);
|
||||
const inputParts = (req.inputImages ?? []).map((image) => ({
|
||||
inlineData: {
|
||||
mimeType: image.mimeType,
|
||||
data: image.buffer.toString("base64"),
|
||||
},
|
||||
}));
|
||||
const resolvedImageConfig = {
|
||||
...imageConfig,
|
||||
...(req.resolution ? { imageSize: req.resolution } : {}),
|
||||
};
|
||||
|
||||
const { response: res, release } = await postJsonRequest({
|
||||
url: `${baseUrl}/models/${model}:generateContent`,
|
||||
@ -106,12 +121,14 @@ export function buildGoogleImageGenerationProvider(): ImageGenerationProviderPlu
|
||||
contents: [
|
||||
{
|
||||
role: "user",
|
||||
parts: [{ text: req.prompt }],
|
||||
parts: [...inputParts, { text: req.prompt }],
|
||||
},
|
||||
],
|
||||
generationConfig: {
|
||||
responseModalities: ["TEXT", "IMAGE"],
|
||||
...(imageConfig ? { imageConfig } : {}),
|
||||
...(Object.keys(resolvedImageConfig).length > 0
|
||||
? { imageConfig: resolvedImageConfig }
|
||||
: {}),
|
||||
},
|
||||
},
|
||||
timeoutMs: 60_000,
|
||||
|
||||
@ -8,7 +8,7 @@ describe("OpenAI image-generation provider", () => {
|
||||
});
|
||||
|
||||
it("generates PNG buffers from the OpenAI Images API", async () => {
|
||||
vi.spyOn(modelAuth, "resolveApiKeyForProvider").mockResolvedValue({
|
||||
const resolveApiKeySpy = vi.spyOn(modelAuth, "resolveApiKeyForProvider").mockResolvedValue({
|
||||
apiKey: "sk-test",
|
||||
source: "env",
|
||||
mode: "api-key",
|
||||
@ -27,17 +27,31 @@ describe("OpenAI image-generation provider", () => {
|
||||
vi.stubGlobal("fetch", fetchMock);
|
||||
|
||||
const provider = buildOpenAIImageGenerationProvider();
|
||||
const authStore = { version: 1, profiles: {} };
|
||||
const result = await provider.generateImage({
|
||||
provider: "openai",
|
||||
model: "gpt-image-1",
|
||||
prompt: "draw a cat",
|
||||
cfg: {},
|
||||
authStore,
|
||||
});
|
||||
|
||||
expect(resolveApiKeySpy).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
provider: "openai",
|
||||
store: authStore,
|
||||
}),
|
||||
);
|
||||
expect(fetchMock).toHaveBeenCalledWith(
|
||||
"https://api.openai.com/v1/images/generations",
|
||||
expect.objectContaining({
|
||||
method: "POST",
|
||||
body: JSON.stringify({
|
||||
model: "gpt-image-1",
|
||||
prompt: "draw a cat",
|
||||
n: 1,
|
||||
size: "1024x1024",
|
||||
}),
|
||||
}),
|
||||
);
|
||||
expect(result).toEqual({
|
||||
@ -52,4 +66,18 @@ describe("OpenAI image-generation provider", () => {
|
||||
model: "gpt-image-1",
|
||||
});
|
||||
});
|
||||
|
||||
it("rejects reference-image edits for now", async () => {
|
||||
const provider = buildOpenAIImageGenerationProvider();
|
||||
|
||||
await expect(
|
||||
provider.generateImage({
|
||||
provider: "openai",
|
||||
model: "gpt-image-1",
|
||||
prompt: "Edit this image",
|
||||
cfg: {},
|
||||
inputImages: [{ buffer: Buffer.from("x"), mimeType: "image/png" }],
|
||||
}),
|
||||
).rejects.toThrow("does not support reference-image edits");
|
||||
});
|
||||
});
|
||||
|
||||
@ -22,12 +22,18 @@ export function buildOpenAIImageGenerationProvider(): ImageGenerationProviderPlu
|
||||
return {
|
||||
id: "openai",
|
||||
label: "OpenAI",
|
||||
defaultModel: DEFAULT_OPENAI_IMAGE_MODEL,
|
||||
models: [DEFAULT_OPENAI_IMAGE_MODEL],
|
||||
supportedSizes: ["1024x1024", "1024x1536", "1536x1024"],
|
||||
async generateImage(req) {
|
||||
if ((req.inputImages?.length ?? 0) > 0) {
|
||||
throw new Error("OpenAI image generation provider does not support reference-image edits");
|
||||
}
|
||||
const auth = await resolveApiKeyForProvider({
|
||||
provider: "openai",
|
||||
cfg: req.cfg,
|
||||
agentDir: req.agentDir,
|
||||
store: req.authStore,
|
||||
});
|
||||
if (!auth.apiKey) {
|
||||
throw new Error("OpenAI API key missing");
|
||||
@ -44,7 +50,6 @@ export function buildOpenAIImageGenerationProvider(): ImageGenerationProviderPlu
|
||||
prompt: req.prompt,
|
||||
n: req.count ?? 1,
|
||||
size: req.size ?? DEFAULT_SIZE,
|
||||
response_format: "b64_json",
|
||||
}),
|
||||
});
|
||||
|
||||
|
||||
@ -11,13 +11,16 @@ describe("image-generation runtime helpers", () => {
|
||||
|
||||
it("generates images through the active image-generation registry", async () => {
|
||||
const pluginRegistry = createEmptyPluginRegistry();
|
||||
const authStore = { version: 1, profiles: {} } as const;
|
||||
let seenAuthStore: unknown;
|
||||
pluginRegistry.imageGenerationProviders.push({
|
||||
pluginId: "image-plugin",
|
||||
pluginName: "Image Plugin",
|
||||
source: "test",
|
||||
provider: {
|
||||
id: "image-plugin",
|
||||
async generateImage() {
|
||||
async generateImage(req) {
|
||||
seenAuthStore = req.authStore;
|
||||
return {
|
||||
images: [
|
||||
{
|
||||
@ -47,11 +50,13 @@ describe("image-generation runtime helpers", () => {
|
||||
cfg,
|
||||
prompt: "draw a cat",
|
||||
agentDir: "/tmp/agent",
|
||||
authStore,
|
||||
});
|
||||
|
||||
expect(result.provider).toBe("image-plugin");
|
||||
expect(result.model).toBe("img-v1");
|
||||
expect(result.attempts).toEqual([]);
|
||||
expect(seenAuthStore).toEqual(authStore);
|
||||
expect(result.images).toEqual([
|
||||
{
|
||||
buffer: Buffer.from("png-bytes"),
|
||||
@ -69,6 +74,9 @@ describe("image-generation runtime helpers", () => {
|
||||
source: "test",
|
||||
provider: {
|
||||
id: "image-plugin",
|
||||
defaultModel: "img-v1",
|
||||
models: ["img-v1", "img-v2"],
|
||||
supportedResolutions: ["1K", "2K"],
|
||||
generateImage: async () => ({
|
||||
images: [{ buffer: Buffer.from("x"), mimeType: "image/png" }],
|
||||
}),
|
||||
@ -76,6 +84,13 @@ describe("image-generation runtime helpers", () => {
|
||||
});
|
||||
setActivePluginRegistry(pluginRegistry);
|
||||
|
||||
expect(listRuntimeImageGenerationProviders()).toMatchObject([{ id: "image-plugin" }]);
|
||||
expect(listRuntimeImageGenerationProviders()).toMatchObject([
|
||||
{
|
||||
id: "image-plugin",
|
||||
defaultModel: "img-v1",
|
||||
models: ["img-v1", "img-v2"],
|
||||
supportedResolutions: ["1K", "2K"],
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
@ -1,3 +1,4 @@
|
||||
import type { AuthProfileStore } from "../agents/auth-profiles.js";
|
||||
import { describeFailoverError, isFailoverError } from "../agents/failover-error.js";
|
||||
import type { FallbackAttempt } from "../agents/model-fallback.types.js";
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
@ -7,7 +8,12 @@ import {
|
||||
} from "../config/model-input.js";
|
||||
import { createSubsystemLogger } from "../logging/subsystem.js";
|
||||
import { getImageGenerationProvider, listImageGenerationProviders } from "./provider-registry.js";
|
||||
import type { GeneratedImageAsset, ImageGenerationResult } from "./types.js";
|
||||
import type {
|
||||
GeneratedImageAsset,
|
||||
ImageGenerationResolution,
|
||||
ImageGenerationResult,
|
||||
ImageGenerationSourceImage,
|
||||
} from "./types.js";
|
||||
|
||||
const log = createSubsystemLogger("image-generation");
|
||||
|
||||
@ -15,9 +21,12 @@ export type GenerateImageParams = {
|
||||
cfg: OpenClawConfig;
|
||||
prompt: string;
|
||||
agentDir?: string;
|
||||
authStore?: AuthProfileStore;
|
||||
modelOverride?: string;
|
||||
count?: number;
|
||||
size?: string;
|
||||
resolution?: ImageGenerationResolution;
|
||||
inputImages?: ImageGenerationSourceImage[];
|
||||
};
|
||||
|
||||
export type GenerateImageRuntimeResult = {
|
||||
@ -130,8 +139,11 @@ export async function generateImage(
|
||||
prompt: params.prompt,
|
||||
cfg: params.cfg,
|
||||
agentDir: params.agentDir,
|
||||
authStore: params.authStore,
|
||||
count: params.count,
|
||||
size: params.size,
|
||||
resolution: params.resolution,
|
||||
inputImages: params.inputImages,
|
||||
});
|
||||
if (!Array.isArray(result.images) || result.images.length === 0) {
|
||||
throw new Error("Image generation provider returned no images.");
|
||||
|
||||
@ -1,3 +1,4 @@
|
||||
import type { AuthProfileStore } from "../agents/auth-profiles.js";
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
|
||||
export type GeneratedImageAsset = {
|
||||
@ -8,14 +9,26 @@ export type GeneratedImageAsset = {
|
||||
metadata?: Record<string, unknown>;
|
||||
};
|
||||
|
||||
export type ImageGenerationResolution = "1K" | "2K" | "4K";
|
||||
|
||||
export type ImageGenerationSourceImage = {
|
||||
buffer: Buffer;
|
||||
mimeType: string;
|
||||
fileName?: string;
|
||||
metadata?: Record<string, unknown>;
|
||||
};
|
||||
|
||||
export type ImageGenerationRequest = {
|
||||
provider: string;
|
||||
model: string;
|
||||
prompt: string;
|
||||
cfg: OpenClawConfig;
|
||||
agentDir?: string;
|
||||
authStore?: AuthProfileStore;
|
||||
count?: number;
|
||||
size?: string;
|
||||
resolution?: ImageGenerationResolution;
|
||||
inputImages?: ImageGenerationSourceImage[];
|
||||
};
|
||||
|
||||
export type ImageGenerationResult = {
|
||||
@ -28,6 +41,10 @@ export type ImageGenerationProvider = {
|
||||
id: string;
|
||||
aliases?: string[];
|
||||
label?: string;
|
||||
defaultModel?: string;
|
||||
models?: string[];
|
||||
supportedSizes?: string[];
|
||||
supportedResolutions?: ImageGenerationResolution[];
|
||||
supportsImageEditing?: boolean;
|
||||
generateImage: (req: ImageGenerationRequest) => Promise<ImageGenerationResult>;
|
||||
};
|
||||
|
||||
@ -3,8 +3,10 @@
|
||||
export type {
|
||||
GeneratedImageAsset,
|
||||
ImageGenerationProvider,
|
||||
ImageGenerationResolution,
|
||||
ImageGenerationRequest,
|
||||
ImageGenerationResult,
|
||||
ImageGenerationSourceImage,
|
||||
} from "../image-generation/types.js";
|
||||
|
||||
export { buildGoogleImageGenerationProvider } from "../image-generation/providers/google.js";
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user