Agents: harden CLI prompt image hydration (#51373 )

fix(cli): hydrate prompt image refs for inbound media
2026-03-20 19:29:35 -07:00 · 2026-03-20 18:58:15 -07:00
5 changed files with 297 additions and 2 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -205,6 +205,7 @@ Docs: https://docs.openclaw.ai
 - Plugins/Matrix: add a new Matrix plugin backed by the official `matrix-js-sdk`. If you are upgrading from the previous public Matrix plugin, follow the migration guide: https://docs.openclaw.ai/install/migrating-matrix Thanks @gumadeiras.
 - Discord/commands: switch native command deployment to Carbon reconcile by default so Discord restarts stop churning slash commands through OpenClaw’s local deploy path. (#46597) Thanks @huntharo and @thewilloftheshadow.
 - Plugins/Matrix: durably dedupe inbound room events across gateway restarts so previously handled Matrix messages are not replayed as new, while preserving clean-restart backlog delivery for unseen events. (#50922) thanks @gumadeiras
 - BlueBubbles/CLI agents: restore inbound prompt image refs for CLI routed turns, reapply embedded runner image size guardrails, and cover both CLI image transport paths with regression tests. (#51373)
 ## 2026.3.13
--- a/src/agents/cli-runner.helpers.test.ts
+++ b/src/agents/cli-runner.helpers.test.ts
@ -0,0 +1,116 @@
 import type { ImageContent } from "@mariozechner/pi-ai";
 import { beforeEach, describe, expect, it, vi } from "vitest";
 import { MAX_IMAGE_BYTES } from "../media/constants.js";
 import type { SandboxFsBridge } from "./sandbox/fs-bridge.js";
 const mocks = vi.hoisted(() => ({
  loadImageFromRef: vi.fn(),
  sanitizeImageBlocks: vi.fn(),
 }));
 vi.mock("./pi-embedded-runner/run/images.js", async (importOriginal) => {
  const actual = await importOriginal<typeof import("./pi-embedded-runner/run/images.js")>();
  return {
    ...actual,
    loadImageFromRef: (...args: unknown[]) => mocks.loadImageFromRef(...args),
  };
 });
 vi.mock("./tool-images.js", async (importOriginal) => {
  const actual = await importOriginal<typeof import("./tool-images.js")>();
  return {
    ...actual,
    sanitizeImageBlocks: (...args: unknown[]) => mocks.sanitizeImageBlocks(...args),
  };
 });
 import { loadPromptRefImages } from "./cli-runner/helpers.js";
 describe("loadPromptRefImages", () => {
  beforeEach(() => {
    mocks.loadImageFromRef.mockReset();
    mocks.sanitizeImageBlocks.mockReset();
    mocks.sanitizeImageBlocks.mockImplementation(async (images: ImageContent[]) => ({
      images,
      dropped: 0,
    }));
  });
  it("returns empty results when the prompt has no image refs", async () => {
    await expect(
      loadPromptRefImages({
        prompt: "just text",
        workspaceDir: "/workspace",
      }),
    ).resolves.toEqual([]);
    expect(mocks.loadImageFromRef).not.toHaveBeenCalled();
    expect(mocks.sanitizeImageBlocks).not.toHaveBeenCalled();
  });
  it("passes the max-byte guardrail through load and sanitize", async () => {
    const loadedImage: ImageContent = {
      type: "image",
      data: "c29tZS1pbWFnZQ==",
      mimeType: "image/png",
    };
    const sanitizedImage: ImageContent = {
      type: "image",
      data: "c2FuaXRpemVkLWltYWdl",
      mimeType: "image/jpeg",
    };
    const sandbox = {
      root: "/sandbox",
      bridge: {} as SandboxFsBridge,
    };
    mocks.loadImageFromRef.mockResolvedValueOnce(loadedImage);
    mocks.sanitizeImageBlocks.mockResolvedValueOnce({ images: [sanitizedImage], dropped: 0 });
    const result = await loadPromptRefImages({
      prompt: "Look at /tmp/photo.png",
      workspaceDir: "/workspace",
      workspaceOnly: true,
      sandbox,
    });
    const [ref, workspaceDir, options] = mocks.loadImageFromRef.mock.calls[0] ?? [];
    expect(ref).toMatchObject({ resolved: "/tmp/photo.png", type: "path" });
    expect(workspaceDir).toBe("/workspace");
    expect(options).toEqual({
      maxBytes: MAX_IMAGE_BYTES,
      workspaceOnly: true,
      sandbox,
    });
    expect(mocks.sanitizeImageBlocks).toHaveBeenCalledWith([loadedImage], "prompt:images", {
      maxBytes: MAX_IMAGE_BYTES,
    });
    expect(result).toEqual([sanitizedImage]);
  });
  it("dedupes repeated refs and skips failed loads before sanitizing", async () => {
    const loadedImage: ImageContent = {
      type: "image",
      data: "b25lLWltYWdl",
      mimeType: "image/png",
    };
    mocks.loadImageFromRef.mockResolvedValueOnce(loadedImage).mockResolvedValueOnce(null);
    const result = await loadPromptRefImages({
      prompt: "Compare /tmp/a.png with /tmp/a.png and /tmp/b.png",
      workspaceDir: "/workspace",
    });
    expect(mocks.loadImageFromRef).toHaveBeenCalledTimes(2);
    expect(
      mocks.loadImageFromRef.mock.calls.map(
        (call: unknown[]) => (call[0] as { resolved?: string } | undefined)?.resolved,
      ),
    ).toEqual(["/tmp/a.png", "/tmp/b.png"]);
    expect(mocks.sanitizeImageBlocks).toHaveBeenCalledWith([loadedImage], "prompt:images", {
      maxBytes: MAX_IMAGE_BYTES,
    });
    expect(result).toEqual([loadedImage]);
  });
 });
--- a/src/agents/cli-runner.test.ts
+++ b/src/agents/cli-runner.test.ts
@ -3,6 +3,7 @@ import os from "node:os";
 import path from "node:path";
 import { beforeEach, describe, expect, it, vi } from "vitest";
 import type { OpenClawConfig } from "../config/config.js";
 import { resolvePreferredOpenClawTmpDir } from "../infra/tmp-openclaw-dir.js";
 import { runCliAgent } from "./cli-runner.js";
 import { resolveCliNoOutputTimeoutMs } from "./cli-runner/helpers.js";
 import type { EmbeddedContextFile } from "./pi-embedded-helpers.js";
@ -11,6 +12,8 @@ import type { WorkspaceBootstrapFile } from "./workspace.js";
 const supervisorSpawnMock = vi.fn();
 const enqueueSystemEventMock = vi.fn();
 const requestHeartbeatNowMock = vi.fn();
 const SMALL_PNG_BASE64 =
  "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/woAAn8B9FD5fHAAAAAASUVORK5CYII=";
 const hoisted = vi.hoisted(() => {
  type BootstrapContext = {
    bootstrapFiles: WorkspaceBootstrapFile[];
@ -187,6 +190,135 @@ describe("runCliAgent with process supervisor", () => {
    expect(promptCarrier).toContain("hi");
  });
  it("hydrates prompt media refs into CLI image args", async () => {
    supervisorSpawnMock.mockResolvedValueOnce(
      createManagedRun({
        reason: "exit",
        exitCode: 0,
        exitSignal: null,
        durationMs: 50,
        stdout: "ok",
        stderr: "",
        timedOut: false,
        noOutputTimedOut: false,
      }),
    );
    const tempDir = await fs.mkdtemp(
      path.join(resolvePreferredOpenClawTmpDir(), "openclaw-cli-prompt-image-"),
    );
    const sourceImage = path.join(tempDir, "bb-image.png");
    await fs.writeFile(sourceImage, Buffer.from(SMALL_PNG_BASE64, "base64"));
    try {
      await runCliAgent({
        sessionId: "s1",
        sessionFile: "/tmp/session.jsonl",
        workspaceDir: tempDir,
        prompt: `[media attached: ${sourceImage} (image/png)]\n\n<media:image>`,
        provider: "codex-cli",
        model: "gpt-5.2-codex",
        timeoutMs: 1_000,
        runId: "run-prompt-image",
      });
    } finally {
      await fs.rm(tempDir, { recursive: true, force: true });
    }
    const input = supervisorSpawnMock.mock.calls[0]?.[0] as { argv?: string[] };
    const argv = input.argv ?? [];
    const imageArgIndex = argv.indexOf("--image");
    expect(imageArgIndex).toBeGreaterThanOrEqual(0);
    expect(argv[imageArgIndex + 1]).toContain("openclaw-cli-images-");
    expect(argv[imageArgIndex + 1]).not.toBe(sourceImage);
  });
  it("appends hydrated prompt media refs to generic backend prompts", async () => {
    supervisorSpawnMock.mockResolvedValueOnce(
      createManagedRun({
        reason: "exit",
        exitCode: 0,
        exitSignal: null,
        durationMs: 50,
        stdout: "ok",
        stderr: "",
        timedOut: false,
        noOutputTimedOut: false,
      }),
    );
    const tempDir = await fs.mkdtemp(
      path.join(resolvePreferredOpenClawTmpDir(), "openclaw-cli-prompt-image-generic-"),
    );
    const sourceImage = path.join(tempDir, "claude-image.png");
    await fs.writeFile(sourceImage, Buffer.from(SMALL_PNG_BASE64, "base64"));
    try {
      await runCliAgent({
        sessionId: "s1",
        sessionFile: "/tmp/session.jsonl",
        workspaceDir: tempDir,
        prompt: `[media attached: ${sourceImage} (image/png)]\n\n<media:image>`,
        provider: "claude-cli",
        model: "claude-opus-4-1",
        timeoutMs: 1_000,
        runId: "run-prompt-image-generic",
      });
    } finally {
      await fs.rm(tempDir, { recursive: true, force: true });
    }
    const input = supervisorSpawnMock.mock.calls[0]?.[0] as { argv?: string[]; input?: string };
    const argv = input.argv ?? [];
    expect(argv).not.toContain("--image");
    const promptCarrier = [input.input ?? "", ...argv].join("\n");
    const appendedPath = argv.find((value) => value.includes("openclaw-cli-images-"));
    expect(appendedPath).toBeDefined();
    expect(appendedPath).not.toBe(sourceImage);
    expect(promptCarrier).toContain(appendedPath ?? "");
  });
  it("prefers explicit images over prompt refs", async () => {
    supervisorSpawnMock.mockResolvedValueOnce(
      createManagedRun({
        reason: "exit",
        exitCode: 0,
        exitSignal: null,
        durationMs: 50,
        stdout: "ok",
        stderr: "",
        timedOut: false,
        noOutputTimedOut: false,
      }),
    );
    const tempDir = await fs.mkdtemp(
      path.join(resolvePreferredOpenClawTmpDir(), "openclaw-cli-explicit-images-"),
    );
    const sourceImage = path.join(tempDir, "ignored-prompt-image.png");
    await fs.writeFile(sourceImage, Buffer.from(SMALL_PNG_BASE64, "base64"));
    try {
      await runCliAgent({
        sessionId: "s1",
        sessionFile: "/tmp/session.jsonl",
        workspaceDir: tempDir,
        prompt: `[media attached: ${sourceImage} (image/png)]\n\n<media:image>`,
        images: [{ type: "image", data: SMALL_PNG_BASE64, mimeType: "image/png" }],
        provider: "codex-cli",
        model: "gpt-5.2-codex",
        timeoutMs: 1_000,
        runId: "run-explicit-image-precedence",
      });
    } finally {
      await fs.rm(tempDir, { recursive: true, force: true });
    }
    const input = supervisorSpawnMock.mock.calls[0]?.[0] as { argv?: string[] };
    const argv = input.argv ?? [];
    expect(argv.filter((arg) => arg === "--image")).toHaveLength(1);
  });
  it("fails with timeout when no-output watchdog trips", async () => {
    supervisorSpawnMock.mockResolvedValueOnce(
      createManagedRun({
--- a/src/agents/cli-runner.ts
+++ b/src/agents/cli-runner.ts
@ -26,6 +26,7 @@ import {
  buildCliArgs,
  buildSystemPrompt,
  enqueueCliRun,
  loadPromptRefImages,
  normalizeCliModel,
  parseCliJson,
  parseCliJsonl,
@ -221,8 +222,12 @@ export async function runCliAgent(params: {
    let prompt = prependBootstrapPromptWarning(params.prompt, bootstrapPromptWarning.lines, {
      preserveExactPrompt: heartbeatPrompt,
    });
-    if (params.images && params.images.length > 0) {
+    const resolvedImages =
-      const imagePayload = await writeCliImages(params.images);
+      params.images && params.images.length > 0
        ? params.images
        : await loadPromptRefImages({ prompt, workspaceDir });
    if (resolvedImages.length > 0) {
      const imagePayload = await writeCliImages(resolvedImages);
      imagePaths = imagePayload.paths;
      cleanupImages = imagePayload.cleanup;
      if (!backend.imageArg) {
--- a/src/agents/cli-runner/helpers.ts
+++ b/src/agents/cli-runner/helpers.ts
@ -8,15 +8,19 @@ import { KeyedAsyncQueue } from "openclaw/plugin-sdk/keyed-async-queue";
 import type { ThinkLevel } from "../../auto-reply/thinking.js";
 import type { OpenClawConfig } from "../../config/config.js";
 import type { CliBackendConfig } from "../../config/types.js";
 import { MAX_IMAGE_BYTES } from "../../media/constants.js";
 import { buildTtsSystemPromptHint } from "../../tts/tts.js";
 import { isRecord } from "../../utils.js";
 import { buildModelAliasLines } from "../model-alias-lines.js";
 import { resolveDefaultModelForAgent } from "../model-selection.js";
 import { resolveOwnerDisplaySetting } from "../owner-display.js";
 import type { EmbeddedContextFile } from "../pi-embedded-helpers.js";
 import { detectImageReferences, loadImageFromRef } from "../pi-embedded-runner/run/images.js";
 import type { SandboxFsBridge } from "../sandbox/fs-bridge.js";
 import { detectRuntimeShell } from "../shell-utils.js";
 import { buildSystemPromptParams } from "../system-prompt-params.js";
 import { buildAgentSystemPrompt } from "../system-prompt.js";
 import { sanitizeImageBlocks } from "../tool-images.js";
 export { buildCliSupervisorScopeKey, resolveCliNoOutputTimeoutMs } from "./reliability.js";
 const CLI_RUN_QUEUE = new KeyedAsyncQueue();
@ -324,6 +328,43 @@ export function appendImagePathsToPrompt(prompt: string, paths: string[]): strin
  return `${trimmed}${separator}${paths.join("\n")}`;
 }
 export async function loadPromptRefImages(params: {
  prompt: string;
  workspaceDir: string;
  maxBytes?: number;
  workspaceOnly?: boolean;
  sandbox?: { root: string; bridge: SandboxFsBridge };
 }): Promise<ImageContent[]> {
  const refs = detectImageReferences(params.prompt);
  if (refs.length === 0) {
    return [];
  }
  const maxBytes = params.maxBytes ?? MAX_IMAGE_BYTES;
  const seen = new Set<string>();
  const images: ImageContent[] = [];
  for (const ref of refs) {
    const key = `${ref.type}:${ref.resolved}`;
    if (seen.has(key)) {
      continue;
    }
    seen.add(key);
    const image = await loadImageFromRef(ref, params.workspaceDir, {
      maxBytes,
      workspaceOnly: params.workspaceOnly,
      sandbox: params.sandbox,
    });
    if (image) {
      images.push(image);
    }
  }
  const { images: sanitizedImages } = await sanitizeImageBlocks(images, "prompt:images", {
    maxBytes,
  });
  return sanitizedImages;
 }
 export async function writeCliImages(
  images: ImageContent[],
 ): Promise<{ paths: string[]; cleanup: () => Promise<void> }> {
Author	SHA1	Message	Date
Tyler Yust	b36f8d23e9	Agents: harden CLI prompt image hydration (#51373 )	2026-03-20 19:29:35 -07:00
Tyler Yust	315713fc40	fix(cli): hydrate prompt image refs for inbound media	2026-03-20 18:58:15 -07:00