Agents: harden CLI prompt image hydration (#51373)
This commit is contained in:
parent
315713fc40
commit
b36f8d23e9
@ -205,6 +205,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Plugins/Matrix: add a new Matrix plugin backed by the official `matrix-js-sdk`. If you are upgrading from the previous public Matrix plugin, follow the migration guide: https://docs.openclaw.ai/install/migrating-matrix Thanks @gumadeiras.
|
||||
- Discord/commands: switch native command deployment to Carbon reconcile by default so Discord restarts stop churning slash commands through OpenClaw’s local deploy path. (#46597) Thanks @huntharo and @thewilloftheshadow.
|
||||
- Plugins/Matrix: durably dedupe inbound room events across gateway restarts so previously handled Matrix messages are not replayed as new, while preserving clean-restart backlog delivery for unseen events. (#50922) thanks @gumadeiras
|
||||
- BlueBubbles/CLI agents: restore inbound prompt image refs for CLI routed turns, reapply embedded runner image size guardrails, and cover both CLI image transport paths with regression tests. (#51373)
|
||||
|
||||
## 2026.3.13
|
||||
|
||||
|
||||
116
src/agents/cli-runner.helpers.test.ts
Normal file
116
src/agents/cli-runner.helpers.test.ts
Normal file
@ -0,0 +1,116 @@
|
||||
import type { ImageContent } from "@mariozechner/pi-ai";
|
||||
import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import { MAX_IMAGE_BYTES } from "../media/constants.js";
|
||||
import type { SandboxFsBridge } from "./sandbox/fs-bridge.js";
|
||||
|
||||
const mocks = vi.hoisted(() => ({
|
||||
loadImageFromRef: vi.fn(),
|
||||
sanitizeImageBlocks: vi.fn(),
|
||||
}));
|
||||
|
||||
vi.mock("./pi-embedded-runner/run/images.js", async (importOriginal) => {
|
||||
const actual = await importOriginal<typeof import("./pi-embedded-runner/run/images.js")>();
|
||||
return {
|
||||
...actual,
|
||||
loadImageFromRef: (...args: unknown[]) => mocks.loadImageFromRef(...args),
|
||||
};
|
||||
});
|
||||
|
||||
vi.mock("./tool-images.js", async (importOriginal) => {
|
||||
const actual = await importOriginal<typeof import("./tool-images.js")>();
|
||||
return {
|
||||
...actual,
|
||||
sanitizeImageBlocks: (...args: unknown[]) => mocks.sanitizeImageBlocks(...args),
|
||||
};
|
||||
});
|
||||
|
||||
import { loadPromptRefImages } from "./cli-runner/helpers.js";
|
||||
|
||||
describe("loadPromptRefImages", () => {
|
||||
beforeEach(() => {
|
||||
mocks.loadImageFromRef.mockReset();
|
||||
mocks.sanitizeImageBlocks.mockReset();
|
||||
mocks.sanitizeImageBlocks.mockImplementation(async (images: ImageContent[]) => ({
|
||||
images,
|
||||
dropped: 0,
|
||||
}));
|
||||
});
|
||||
|
||||
it("returns empty results when the prompt has no image refs", async () => {
|
||||
await expect(
|
||||
loadPromptRefImages({
|
||||
prompt: "just text",
|
||||
workspaceDir: "/workspace",
|
||||
}),
|
||||
).resolves.toEqual([]);
|
||||
|
||||
expect(mocks.loadImageFromRef).not.toHaveBeenCalled();
|
||||
expect(mocks.sanitizeImageBlocks).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("passes the max-byte guardrail through load and sanitize", async () => {
|
||||
const loadedImage: ImageContent = {
|
||||
type: "image",
|
||||
data: "c29tZS1pbWFnZQ==",
|
||||
mimeType: "image/png",
|
||||
};
|
||||
const sanitizedImage: ImageContent = {
|
||||
type: "image",
|
||||
data: "c2FuaXRpemVkLWltYWdl",
|
||||
mimeType: "image/jpeg",
|
||||
};
|
||||
const sandbox = {
|
||||
root: "/sandbox",
|
||||
bridge: {} as SandboxFsBridge,
|
||||
};
|
||||
|
||||
mocks.loadImageFromRef.mockResolvedValueOnce(loadedImage);
|
||||
mocks.sanitizeImageBlocks.mockResolvedValueOnce({ images: [sanitizedImage], dropped: 0 });
|
||||
|
||||
const result = await loadPromptRefImages({
|
||||
prompt: "Look at /tmp/photo.png",
|
||||
workspaceDir: "/workspace",
|
||||
workspaceOnly: true,
|
||||
sandbox,
|
||||
});
|
||||
|
||||
const [ref, workspaceDir, options] = mocks.loadImageFromRef.mock.calls[0] ?? [];
|
||||
expect(ref).toMatchObject({ resolved: "/tmp/photo.png", type: "path" });
|
||||
expect(workspaceDir).toBe("/workspace");
|
||||
expect(options).toEqual({
|
||||
maxBytes: MAX_IMAGE_BYTES,
|
||||
workspaceOnly: true,
|
||||
sandbox,
|
||||
});
|
||||
expect(mocks.sanitizeImageBlocks).toHaveBeenCalledWith([loadedImage], "prompt:images", {
|
||||
maxBytes: MAX_IMAGE_BYTES,
|
||||
});
|
||||
expect(result).toEqual([sanitizedImage]);
|
||||
});
|
||||
|
||||
it("dedupes repeated refs and skips failed loads before sanitizing", async () => {
|
||||
const loadedImage: ImageContent = {
|
||||
type: "image",
|
||||
data: "b25lLWltYWdl",
|
||||
mimeType: "image/png",
|
||||
};
|
||||
|
||||
mocks.loadImageFromRef.mockResolvedValueOnce(loadedImage).mockResolvedValueOnce(null);
|
||||
|
||||
const result = await loadPromptRefImages({
|
||||
prompt: "Compare /tmp/a.png with /tmp/a.png and /tmp/b.png",
|
||||
workspaceDir: "/workspace",
|
||||
});
|
||||
|
||||
expect(mocks.loadImageFromRef).toHaveBeenCalledTimes(2);
|
||||
expect(
|
||||
mocks.loadImageFromRef.mock.calls.map(
|
||||
(call: unknown[]) => (call[0] as { resolved?: string } | undefined)?.resolved,
|
||||
),
|
||||
).toEqual(["/tmp/a.png", "/tmp/b.png"]);
|
||||
expect(mocks.sanitizeImageBlocks).toHaveBeenCalledWith([loadedImage], "prompt:images", {
|
||||
maxBytes: MAX_IMAGE_BYTES,
|
||||
});
|
||||
expect(result).toEqual([loadedImage]);
|
||||
});
|
||||
});
|
||||
@ -12,6 +12,8 @@ import type { WorkspaceBootstrapFile } from "./workspace.js";
|
||||
const supervisorSpawnMock = vi.fn();
|
||||
const enqueueSystemEventMock = vi.fn();
|
||||
const requestHeartbeatNowMock = vi.fn();
|
||||
const SMALL_PNG_BASE64 =
|
||||
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/woAAn8B9FD5fHAAAAAASUVORK5CYII=";
|
||||
const hoisted = vi.hoisted(() => {
|
||||
type BootstrapContext = {
|
||||
bootstrapFiles: WorkspaceBootstrapFile[];
|
||||
@ -206,13 +208,7 @@ describe("runCliAgent with process supervisor", () => {
|
||||
path.join(resolvePreferredOpenClawTmpDir(), "openclaw-cli-prompt-image-"),
|
||||
);
|
||||
const sourceImage = path.join(tempDir, "bb-image.png");
|
||||
await fs.writeFile(
|
||||
sourceImage,
|
||||
Buffer.from(
|
||||
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/woAAn8B9FD5fHAAAAAASUVORK5CYII=",
|
||||
"base64",
|
||||
),
|
||||
);
|
||||
await fs.writeFile(sourceImage, Buffer.from(SMALL_PNG_BASE64, "base64"));
|
||||
|
||||
try {
|
||||
await runCliAgent({
|
||||
@ -237,6 +233,92 @@ describe("runCliAgent with process supervisor", () => {
|
||||
expect(argv[imageArgIndex + 1]).not.toBe(sourceImage);
|
||||
});
|
||||
|
||||
it("appends hydrated prompt media refs to generic backend prompts", async () => {
|
||||
supervisorSpawnMock.mockResolvedValueOnce(
|
||||
createManagedRun({
|
||||
reason: "exit",
|
||||
exitCode: 0,
|
||||
exitSignal: null,
|
||||
durationMs: 50,
|
||||
stdout: "ok",
|
||||
stderr: "",
|
||||
timedOut: false,
|
||||
noOutputTimedOut: false,
|
||||
}),
|
||||
);
|
||||
|
||||
const tempDir = await fs.mkdtemp(
|
||||
path.join(resolvePreferredOpenClawTmpDir(), "openclaw-cli-prompt-image-generic-"),
|
||||
);
|
||||
const sourceImage = path.join(tempDir, "claude-image.png");
|
||||
await fs.writeFile(sourceImage, Buffer.from(SMALL_PNG_BASE64, "base64"));
|
||||
|
||||
try {
|
||||
await runCliAgent({
|
||||
sessionId: "s1",
|
||||
sessionFile: "/tmp/session.jsonl",
|
||||
workspaceDir: tempDir,
|
||||
prompt: `[media attached: ${sourceImage} (image/png)]\n\n<media:image>`,
|
||||
provider: "claude-cli",
|
||||
model: "claude-opus-4-1",
|
||||
timeoutMs: 1_000,
|
||||
runId: "run-prompt-image-generic",
|
||||
});
|
||||
} finally {
|
||||
await fs.rm(tempDir, { recursive: true, force: true });
|
||||
}
|
||||
|
||||
const input = supervisorSpawnMock.mock.calls[0]?.[0] as { argv?: string[]; input?: string };
|
||||
const argv = input.argv ?? [];
|
||||
expect(argv).not.toContain("--image");
|
||||
const promptCarrier = [input.input ?? "", ...argv].join("\n");
|
||||
const appendedPath = argv.find((value) => value.includes("openclaw-cli-images-"));
|
||||
expect(appendedPath).toBeDefined();
|
||||
expect(appendedPath).not.toBe(sourceImage);
|
||||
expect(promptCarrier).toContain(appendedPath ?? "");
|
||||
});
|
||||
|
||||
it("prefers explicit images over prompt refs", async () => {
|
||||
supervisorSpawnMock.mockResolvedValueOnce(
|
||||
createManagedRun({
|
||||
reason: "exit",
|
||||
exitCode: 0,
|
||||
exitSignal: null,
|
||||
durationMs: 50,
|
||||
stdout: "ok",
|
||||
stderr: "",
|
||||
timedOut: false,
|
||||
noOutputTimedOut: false,
|
||||
}),
|
||||
);
|
||||
|
||||
const tempDir = await fs.mkdtemp(
|
||||
path.join(resolvePreferredOpenClawTmpDir(), "openclaw-cli-explicit-images-"),
|
||||
);
|
||||
const sourceImage = path.join(tempDir, "ignored-prompt-image.png");
|
||||
await fs.writeFile(sourceImage, Buffer.from(SMALL_PNG_BASE64, "base64"));
|
||||
|
||||
try {
|
||||
await runCliAgent({
|
||||
sessionId: "s1",
|
||||
sessionFile: "/tmp/session.jsonl",
|
||||
workspaceDir: tempDir,
|
||||
prompt: `[media attached: ${sourceImage} (image/png)]\n\n<media:image>`,
|
||||
images: [{ type: "image", data: SMALL_PNG_BASE64, mimeType: "image/png" }],
|
||||
provider: "codex-cli",
|
||||
model: "gpt-5.2-codex",
|
||||
timeoutMs: 1_000,
|
||||
runId: "run-explicit-image-precedence",
|
||||
});
|
||||
} finally {
|
||||
await fs.rm(tempDir, { recursive: true, force: true });
|
||||
}
|
||||
|
||||
const input = supervisorSpawnMock.mock.calls[0]?.[0] as { argv?: string[] };
|
||||
const argv = input.argv ?? [];
|
||||
expect(argv.filter((arg) => arg === "--image")).toHaveLength(1);
|
||||
});
|
||||
|
||||
it("fails with timeout when no-output watchdog trips", async () => {
|
||||
supervisorSpawnMock.mockResolvedValueOnce(
|
||||
createManagedRun({
|
||||
|
||||
@ -8,6 +8,7 @@ import { KeyedAsyncQueue } from "openclaw/plugin-sdk/keyed-async-queue";
|
||||
import type { ThinkLevel } from "../../auto-reply/thinking.js";
|
||||
import type { OpenClawConfig } from "../../config/config.js";
|
||||
import type { CliBackendConfig } from "../../config/types.js";
|
||||
import { MAX_IMAGE_BYTES } from "../../media/constants.js";
|
||||
import { buildTtsSystemPromptHint } from "../../tts/tts.js";
|
||||
import { isRecord } from "../../utils.js";
|
||||
import { buildModelAliasLines } from "../model-alias-lines.js";
|
||||
@ -15,9 +16,11 @@ import { resolveDefaultModelForAgent } from "../model-selection.js";
|
||||
import { resolveOwnerDisplaySetting } from "../owner-display.js";
|
||||
import type { EmbeddedContextFile } from "../pi-embedded-helpers.js";
|
||||
import { detectImageReferences, loadImageFromRef } from "../pi-embedded-runner/run/images.js";
|
||||
import type { SandboxFsBridge } from "../sandbox/fs-bridge.js";
|
||||
import { detectRuntimeShell } from "../shell-utils.js";
|
||||
import { buildSystemPromptParams } from "../system-prompt-params.js";
|
||||
import { buildAgentSystemPrompt } from "../system-prompt.js";
|
||||
import { sanitizeImageBlocks } from "../tool-images.js";
|
||||
export { buildCliSupervisorScopeKey, resolveCliNoOutputTimeoutMs } from "./reliability.js";
|
||||
|
||||
const CLI_RUN_QUEUE = new KeyedAsyncQueue();
|
||||
@ -328,12 +331,16 @@ export function appendImagePathsToPrompt(prompt: string, paths: string[]): strin
|
||||
export async function loadPromptRefImages(params: {
|
||||
prompt: string;
|
||||
workspaceDir: string;
|
||||
maxBytes?: number;
|
||||
workspaceOnly?: boolean;
|
||||
sandbox?: { root: string; bridge: SandboxFsBridge };
|
||||
}): Promise<ImageContent[]> {
|
||||
const refs = detectImageReferences(params.prompt);
|
||||
if (refs.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const maxBytes = params.maxBytes ?? MAX_IMAGE_BYTES;
|
||||
const seen = new Set<string>();
|
||||
const images: ImageContent[] = [];
|
||||
for (const ref of refs) {
|
||||
@ -342,12 +349,20 @@ export async function loadPromptRefImages(params: {
|
||||
continue;
|
||||
}
|
||||
seen.add(key);
|
||||
const image = await loadImageFromRef(ref, params.workspaceDir);
|
||||
const image = await loadImageFromRef(ref, params.workspaceDir, {
|
||||
maxBytes,
|
||||
workspaceOnly: params.workspaceOnly,
|
||||
sandbox: params.sandbox,
|
||||
});
|
||||
if (image) {
|
||||
images.push(image);
|
||||
}
|
||||
}
|
||||
return images;
|
||||
|
||||
const { images: sanitizedImages } = await sanitizeImageBlocks(images, "prompt:images", {
|
||||
maxBytes,
|
||||
});
|
||||
return sanitizedImages;
|
||||
}
|
||||
|
||||
export async function writeCliImages(
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user