fix(cli): hydrate prompt image refs for inbound media

This commit is contained in:
Tyler Yust 2026-03-20 18:58:15 -07:00
parent 5e417b44e1
commit 315713fc40
3 changed files with 83 additions and 2 deletions

View File

@ -3,6 +3,7 @@ import os from "node:os";
import path from "node:path";
import { beforeEach, describe, expect, it, vi } from "vitest";
import type { OpenClawConfig } from "../config/config.js";
import { resolvePreferredOpenClawTmpDir } from "../infra/tmp-openclaw-dir.js";
import { runCliAgent } from "./cli-runner.js";
import { resolveCliNoOutputTimeoutMs } from "./cli-runner/helpers.js";
import type { EmbeddedContextFile } from "./pi-embedded-helpers.js";
@ -187,6 +188,55 @@ describe("runCliAgent with process supervisor", () => {
expect(promptCarrier).toContain("hi");
});
it("hydrates prompt media refs into CLI image args", async () => {
supervisorSpawnMock.mockResolvedValueOnce(
createManagedRun({
reason: "exit",
exitCode: 0,
exitSignal: null,
durationMs: 50,
stdout: "ok",
stderr: "",
timedOut: false,
noOutputTimedOut: false,
}),
);
const tempDir = await fs.mkdtemp(
path.join(resolvePreferredOpenClawTmpDir(), "openclaw-cli-prompt-image-"),
);
const sourceImage = path.join(tempDir, "bb-image.png");
await fs.writeFile(
sourceImage,
Buffer.from(
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/woAAn8B9FD5fHAAAAAASUVORK5CYII=",
"base64",
),
);
try {
await runCliAgent({
sessionId: "s1",
sessionFile: "/tmp/session.jsonl",
workspaceDir: tempDir,
prompt: `[media attached: ${sourceImage} (image/png)]\n\n<media:image>`,
provider: "codex-cli",
model: "gpt-5.2-codex",
timeoutMs: 1_000,
runId: "run-prompt-image",
});
} finally {
await fs.rm(tempDir, { recursive: true, force: true });
}
const input = supervisorSpawnMock.mock.calls[0]?.[0] as { argv?: string[] };
const argv = input.argv ?? [];
const imageArgIndex = argv.indexOf("--image");
expect(imageArgIndex).toBeGreaterThanOrEqual(0);
expect(argv[imageArgIndex + 1]).toContain("openclaw-cli-images-");
expect(argv[imageArgIndex + 1]).not.toBe(sourceImage);
});
it("fails with timeout when no-output watchdog trips", async () => {
supervisorSpawnMock.mockResolvedValueOnce(
createManagedRun({

View File

@ -26,6 +26,7 @@ import {
buildCliArgs,
buildSystemPrompt,
enqueueCliRun,
loadPromptRefImages,
normalizeCliModel,
parseCliJson,
parseCliJsonl,
@ -221,8 +222,12 @@ export async function runCliAgent(params: {
let prompt = prependBootstrapPromptWarning(params.prompt, bootstrapPromptWarning.lines, {
preserveExactPrompt: heartbeatPrompt,
});
if (params.images && params.images.length > 0) {
const imagePayload = await writeCliImages(params.images);
const resolvedImages =
params.images && params.images.length > 0
? params.images
: await loadPromptRefImages({ prompt, workspaceDir });
if (resolvedImages.length > 0) {
const imagePayload = await writeCliImages(resolvedImages);
imagePaths = imagePayload.paths;
cleanupImages = imagePayload.cleanup;
if (!backend.imageArg) {

View File

@ -14,6 +14,7 @@ import { buildModelAliasLines } from "../model-alias-lines.js";
import { resolveDefaultModelForAgent } from "../model-selection.js";
import { resolveOwnerDisplaySetting } from "../owner-display.js";
import type { EmbeddedContextFile } from "../pi-embedded-helpers.js";
import { detectImageReferences, loadImageFromRef } from "../pi-embedded-runner/run/images.js";
import { detectRuntimeShell } from "../shell-utils.js";
import { buildSystemPromptParams } from "../system-prompt-params.js";
import { buildAgentSystemPrompt } from "../system-prompt.js";
@ -324,6 +325,31 @@ export function appendImagePathsToPrompt(prompt: string, paths: string[]): strin
return `${trimmed}${separator}${paths.join("\n")}`;
}
export async function loadPromptRefImages(params: {
prompt: string;
workspaceDir: string;
}): Promise<ImageContent[]> {
const refs = detectImageReferences(params.prompt);
if (refs.length === 0) {
return [];
}
const seen = new Set<string>();
const images: ImageContent[] = [];
for (const ref of refs) {
const key = `${ref.type}:${ref.resolved}`;
if (seen.has(key)) {
continue;
}
seen.add(key);
const image = await loadImageFromRef(ref, params.workspaceDir);
if (image) {
images.push(image);
}
}
return images;
}
export async function writeCliImages(
images: ImageContent[],
): Promise<{ paths: string[]; cleanup: () => Promise<void> }> {