diff --git a/CHANGELOG.md b/CHANGELOG.md index 87ca45239ee..189aad52411 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -205,6 +205,7 @@ Docs: https://docs.openclaw.ai - Plugins/Matrix: add a new Matrix plugin backed by the official `matrix-js-sdk`. If you are upgrading from the previous public Matrix plugin, follow the migration guide: https://docs.openclaw.ai/install/migrating-matrix Thanks @gumadeiras. - Discord/commands: switch native command deployment to Carbon reconcile by default so Discord restarts stop churning slash commands through OpenClaw’s local deploy path. (#46597) Thanks @huntharo and @thewilloftheshadow. - Plugins/Matrix: durably dedupe inbound room events across gateway restarts so previously handled Matrix messages are not replayed as new, while preserving clean-restart backlog delivery for unseen events. (#50922) thanks @gumadeiras +- BlueBubbles/CLI agents: restore inbound prompt image refs for CLI routed turns, reapply embedded runner image size guardrails, and cover both CLI image transport paths with regression tests. (#51373) ## 2026.3.13 diff --git a/src/agents/cli-runner.helpers.test.ts b/src/agents/cli-runner.helpers.test.ts new file mode 100644 index 00000000000..955be64cba7 --- /dev/null +++ b/src/agents/cli-runner.helpers.test.ts @@ -0,0 +1,116 @@ +import type { ImageContent } from "@mariozechner/pi-ai"; +import { beforeEach, describe, expect, it, vi } from "vitest"; +import { MAX_IMAGE_BYTES } from "../media/constants.js"; +import type { SandboxFsBridge } from "./sandbox/fs-bridge.js"; + +const mocks = vi.hoisted(() => ({ + loadImageFromRef: vi.fn(), + sanitizeImageBlocks: vi.fn(), +})); + +vi.mock("./pi-embedded-runner/run/images.js", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + loadImageFromRef: (...args: unknown[]) => mocks.loadImageFromRef(...args), + }; +}); + +vi.mock("./tool-images.js", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + sanitizeImageBlocks: (...args: unknown[]) => mocks.sanitizeImageBlocks(...args), + }; +}); + +import { loadPromptRefImages } from "./cli-runner/helpers.js"; + +describe("loadPromptRefImages", () => { + beforeEach(() => { + mocks.loadImageFromRef.mockReset(); + mocks.sanitizeImageBlocks.mockReset(); + mocks.sanitizeImageBlocks.mockImplementation(async (images: ImageContent[]) => ({ + images, + dropped: 0, + })); + }); + + it("returns empty results when the prompt has no image refs", async () => { + await expect( + loadPromptRefImages({ + prompt: "just text", + workspaceDir: "/workspace", + }), + ).resolves.toEqual([]); + + expect(mocks.loadImageFromRef).not.toHaveBeenCalled(); + expect(mocks.sanitizeImageBlocks).not.toHaveBeenCalled(); + }); + + it("passes the max-byte guardrail through load and sanitize", async () => { + const loadedImage: ImageContent = { + type: "image", + data: "c29tZS1pbWFnZQ==", + mimeType: "image/png", + }; + const sanitizedImage: ImageContent = { + type: "image", + data: "c2FuaXRpemVkLWltYWdl", + mimeType: "image/jpeg", + }; + const sandbox = { + root: "/sandbox", + bridge: {} as SandboxFsBridge, + }; + + mocks.loadImageFromRef.mockResolvedValueOnce(loadedImage); + mocks.sanitizeImageBlocks.mockResolvedValueOnce({ images: [sanitizedImage], dropped: 0 }); + + const result = await loadPromptRefImages({ + prompt: "Look at /tmp/photo.png", + workspaceDir: "/workspace", + workspaceOnly: true, + sandbox, + }); + + const [ref, workspaceDir, options] = mocks.loadImageFromRef.mock.calls[0] ?? []; + expect(ref).toMatchObject({ resolved: "/tmp/photo.png", type: "path" }); + expect(workspaceDir).toBe("/workspace"); + expect(options).toEqual({ + maxBytes: MAX_IMAGE_BYTES, + workspaceOnly: true, + sandbox, + }); + expect(mocks.sanitizeImageBlocks).toHaveBeenCalledWith([loadedImage], "prompt:images", { + maxBytes: MAX_IMAGE_BYTES, + }); + expect(result).toEqual([sanitizedImage]); + }); + + it("dedupes repeated refs and skips failed loads before sanitizing", async () => { + const loadedImage: ImageContent = { + type: "image", + data: "b25lLWltYWdl", + mimeType: "image/png", + }; + + mocks.loadImageFromRef.mockResolvedValueOnce(loadedImage).mockResolvedValueOnce(null); + + const result = await loadPromptRefImages({ + prompt: "Compare /tmp/a.png with /tmp/a.png and /tmp/b.png", + workspaceDir: "/workspace", + }); + + expect(mocks.loadImageFromRef).toHaveBeenCalledTimes(2); + expect( + mocks.loadImageFromRef.mock.calls.map( + (call: unknown[]) => (call[0] as { resolved?: string } | undefined)?.resolved, + ), + ).toEqual(["/tmp/a.png", "/tmp/b.png"]); + expect(mocks.sanitizeImageBlocks).toHaveBeenCalledWith([loadedImage], "prompt:images", { + maxBytes: MAX_IMAGE_BYTES, + }); + expect(result).toEqual([loadedImage]); + }); +}); diff --git a/src/agents/cli-runner.test.ts b/src/agents/cli-runner.test.ts index 350d1a2352d..4779d352e6a 100644 --- a/src/agents/cli-runner.test.ts +++ b/src/agents/cli-runner.test.ts @@ -12,6 +12,8 @@ import type { WorkspaceBootstrapFile } from "./workspace.js"; const supervisorSpawnMock = vi.fn(); const enqueueSystemEventMock = vi.fn(); const requestHeartbeatNowMock = vi.fn(); +const SMALL_PNG_BASE64 = + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/woAAn8B9FD5fHAAAAAASUVORK5CYII="; const hoisted = vi.hoisted(() => { type BootstrapContext = { bootstrapFiles: WorkspaceBootstrapFile[]; @@ -206,13 +208,7 @@ describe("runCliAgent with process supervisor", () => { path.join(resolvePreferredOpenClawTmpDir(), "openclaw-cli-prompt-image-"), ); const sourceImage = path.join(tempDir, "bb-image.png"); - await fs.writeFile( - sourceImage, - Buffer.from( - "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/woAAn8B9FD5fHAAAAAASUVORK5CYII=", - "base64", - ), - ); + await fs.writeFile(sourceImage, Buffer.from(SMALL_PNG_BASE64, "base64")); try { await runCliAgent({ @@ -237,6 +233,92 @@ describe("runCliAgent with process supervisor", () => { expect(argv[imageArgIndex + 1]).not.toBe(sourceImage); }); + it("appends hydrated prompt media refs to generic backend prompts", async () => { + supervisorSpawnMock.mockResolvedValueOnce( + createManagedRun({ + reason: "exit", + exitCode: 0, + exitSignal: null, + durationMs: 50, + stdout: "ok", + stderr: "", + timedOut: false, + noOutputTimedOut: false, + }), + ); + + const tempDir = await fs.mkdtemp( + path.join(resolvePreferredOpenClawTmpDir(), "openclaw-cli-prompt-image-generic-"), + ); + const sourceImage = path.join(tempDir, "claude-image.png"); + await fs.writeFile(sourceImage, Buffer.from(SMALL_PNG_BASE64, "base64")); + + try { + await runCliAgent({ + sessionId: "s1", + sessionFile: "/tmp/session.jsonl", + workspaceDir: tempDir, + prompt: `[media attached: ${sourceImage} (image/png)]\n\n`, + provider: "claude-cli", + model: "claude-opus-4-1", + timeoutMs: 1_000, + runId: "run-prompt-image-generic", + }); + } finally { + await fs.rm(tempDir, { recursive: true, force: true }); + } + + const input = supervisorSpawnMock.mock.calls[0]?.[0] as { argv?: string[]; input?: string }; + const argv = input.argv ?? []; + expect(argv).not.toContain("--image"); + const promptCarrier = [input.input ?? "", ...argv].join("\n"); + const appendedPath = argv.find((value) => value.includes("openclaw-cli-images-")); + expect(appendedPath).toBeDefined(); + expect(appendedPath).not.toBe(sourceImage); + expect(promptCarrier).toContain(appendedPath ?? ""); + }); + + it("prefers explicit images over prompt refs", async () => { + supervisorSpawnMock.mockResolvedValueOnce( + createManagedRun({ + reason: "exit", + exitCode: 0, + exitSignal: null, + durationMs: 50, + stdout: "ok", + stderr: "", + timedOut: false, + noOutputTimedOut: false, + }), + ); + + const tempDir = await fs.mkdtemp( + path.join(resolvePreferredOpenClawTmpDir(), "openclaw-cli-explicit-images-"), + ); + const sourceImage = path.join(tempDir, "ignored-prompt-image.png"); + await fs.writeFile(sourceImage, Buffer.from(SMALL_PNG_BASE64, "base64")); + + try { + await runCliAgent({ + sessionId: "s1", + sessionFile: "/tmp/session.jsonl", + workspaceDir: tempDir, + prompt: `[media attached: ${sourceImage} (image/png)]\n\n`, + images: [{ type: "image", data: SMALL_PNG_BASE64, mimeType: "image/png" }], + provider: "codex-cli", + model: "gpt-5.2-codex", + timeoutMs: 1_000, + runId: "run-explicit-image-precedence", + }); + } finally { + await fs.rm(tempDir, { recursive: true, force: true }); + } + + const input = supervisorSpawnMock.mock.calls[0]?.[0] as { argv?: string[] }; + const argv = input.argv ?? []; + expect(argv.filter((arg) => arg === "--image")).toHaveLength(1); + }); + it("fails with timeout when no-output watchdog trips", async () => { supervisorSpawnMock.mockResolvedValueOnce( createManagedRun({ diff --git a/src/agents/cli-runner/helpers.ts b/src/agents/cli-runner/helpers.ts index d71cd51f11d..3003e1e9f8b 100644 --- a/src/agents/cli-runner/helpers.ts +++ b/src/agents/cli-runner/helpers.ts @@ -8,6 +8,7 @@ import { KeyedAsyncQueue } from "openclaw/plugin-sdk/keyed-async-queue"; import type { ThinkLevel } from "../../auto-reply/thinking.js"; import type { OpenClawConfig } from "../../config/config.js"; import type { CliBackendConfig } from "../../config/types.js"; +import { MAX_IMAGE_BYTES } from "../../media/constants.js"; import { buildTtsSystemPromptHint } from "../../tts/tts.js"; import { isRecord } from "../../utils.js"; import { buildModelAliasLines } from "../model-alias-lines.js"; @@ -15,9 +16,11 @@ import { resolveDefaultModelForAgent } from "../model-selection.js"; import { resolveOwnerDisplaySetting } from "../owner-display.js"; import type { EmbeddedContextFile } from "../pi-embedded-helpers.js"; import { detectImageReferences, loadImageFromRef } from "../pi-embedded-runner/run/images.js"; +import type { SandboxFsBridge } from "../sandbox/fs-bridge.js"; import { detectRuntimeShell } from "../shell-utils.js"; import { buildSystemPromptParams } from "../system-prompt-params.js"; import { buildAgentSystemPrompt } from "../system-prompt.js"; +import { sanitizeImageBlocks } from "../tool-images.js"; export { buildCliSupervisorScopeKey, resolveCliNoOutputTimeoutMs } from "./reliability.js"; const CLI_RUN_QUEUE = new KeyedAsyncQueue(); @@ -328,12 +331,16 @@ export function appendImagePathsToPrompt(prompt: string, paths: string[]): strin export async function loadPromptRefImages(params: { prompt: string; workspaceDir: string; + maxBytes?: number; + workspaceOnly?: boolean; + sandbox?: { root: string; bridge: SandboxFsBridge }; }): Promise { const refs = detectImageReferences(params.prompt); if (refs.length === 0) { return []; } + const maxBytes = params.maxBytes ?? MAX_IMAGE_BYTES; const seen = new Set(); const images: ImageContent[] = []; for (const ref of refs) { @@ -342,12 +349,20 @@ export async function loadPromptRefImages(params: { continue; } seen.add(key); - const image = await loadImageFromRef(ref, params.workspaceDir); + const image = await loadImageFromRef(ref, params.workspaceDir, { + maxBytes, + workspaceOnly: params.workspaceOnly, + sandbox: params.sandbox, + }); if (image) { images.push(image); } } - return images; + + const { images: sanitizedImages } = await sanitizeImageBlocks(images, "prompt:images", { + maxBytes, + }); + return sanitizedImages; } export async function writeCliImages(