diff --git a/src/agents/cli-runner.test.ts b/src/agents/cli-runner.test.ts index e77ac021fd7..350d1a2352d 100644 --- a/src/agents/cli-runner.test.ts +++ b/src/agents/cli-runner.test.ts @@ -3,6 +3,7 @@ import os from "node:os"; import path from "node:path"; import { beforeEach, describe, expect, it, vi } from "vitest"; import type { OpenClawConfig } from "../config/config.js"; +import { resolvePreferredOpenClawTmpDir } from "../infra/tmp-openclaw-dir.js"; import { runCliAgent } from "./cli-runner.js"; import { resolveCliNoOutputTimeoutMs } from "./cli-runner/helpers.js"; import type { EmbeddedContextFile } from "./pi-embedded-helpers.js"; @@ -187,6 +188,55 @@ describe("runCliAgent with process supervisor", () => { expect(promptCarrier).toContain("hi"); }); + it("hydrates prompt media refs into CLI image args", async () => { + supervisorSpawnMock.mockResolvedValueOnce( + createManagedRun({ + reason: "exit", + exitCode: 0, + exitSignal: null, + durationMs: 50, + stdout: "ok", + stderr: "", + timedOut: false, + noOutputTimedOut: false, + }), + ); + + const tempDir = await fs.mkdtemp( + path.join(resolvePreferredOpenClawTmpDir(), "openclaw-cli-prompt-image-"), + ); + const sourceImage = path.join(tempDir, "bb-image.png"); + await fs.writeFile( + sourceImage, + Buffer.from( + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/woAAn8B9FD5fHAAAAAASUVORK5CYII=", + "base64", + ), + ); + + try { + await runCliAgent({ + sessionId: "s1", + sessionFile: "/tmp/session.jsonl", + workspaceDir: tempDir, + prompt: `[media attached: ${sourceImage} (image/png)]\n\n`, + provider: "codex-cli", + model: "gpt-5.2-codex", + timeoutMs: 1_000, + runId: "run-prompt-image", + }); + } finally { + await fs.rm(tempDir, { recursive: true, force: true }); + } + + const input = supervisorSpawnMock.mock.calls[0]?.[0] as { argv?: string[] }; + const argv = input.argv ?? []; + const imageArgIndex = argv.indexOf("--image"); + expect(imageArgIndex).toBeGreaterThanOrEqual(0); + expect(argv[imageArgIndex + 1]).toContain("openclaw-cli-images-"); + expect(argv[imageArgIndex + 1]).not.toBe(sourceImage); + }); + it("fails with timeout when no-output watchdog trips", async () => { supervisorSpawnMock.mockResolvedValueOnce( createManagedRun({ diff --git a/src/agents/cli-runner.ts b/src/agents/cli-runner.ts index 9056668e087..6d6ddcc0c1c 100644 --- a/src/agents/cli-runner.ts +++ b/src/agents/cli-runner.ts @@ -26,6 +26,7 @@ import { buildCliArgs, buildSystemPrompt, enqueueCliRun, + loadPromptRefImages, normalizeCliModel, parseCliJson, parseCliJsonl, @@ -221,8 +222,12 @@ export async function runCliAgent(params: { let prompt = prependBootstrapPromptWarning(params.prompt, bootstrapPromptWarning.lines, { preserveExactPrompt: heartbeatPrompt, }); - if (params.images && params.images.length > 0) { - const imagePayload = await writeCliImages(params.images); + const resolvedImages = + params.images && params.images.length > 0 + ? params.images + : await loadPromptRefImages({ prompt, workspaceDir }); + if (resolvedImages.length > 0) { + const imagePayload = await writeCliImages(resolvedImages); imagePaths = imagePayload.paths; cleanupImages = imagePayload.cleanup; if (!backend.imageArg) { diff --git a/src/agents/cli-runner/helpers.ts b/src/agents/cli-runner/helpers.ts index 98289396112..d71cd51f11d 100644 --- a/src/agents/cli-runner/helpers.ts +++ b/src/agents/cli-runner/helpers.ts @@ -14,6 +14,7 @@ import { buildModelAliasLines } from "../model-alias-lines.js"; import { resolveDefaultModelForAgent } from "../model-selection.js"; import { resolveOwnerDisplaySetting } from "../owner-display.js"; import type { EmbeddedContextFile } from "../pi-embedded-helpers.js"; +import { detectImageReferences, loadImageFromRef } from "../pi-embedded-runner/run/images.js"; import { detectRuntimeShell } from "../shell-utils.js"; import { buildSystemPromptParams } from "../system-prompt-params.js"; import { buildAgentSystemPrompt } from "../system-prompt.js"; @@ -324,6 +325,31 @@ export function appendImagePathsToPrompt(prompt: string, paths: string[]): strin return `${trimmed}${separator}${paths.join("\n")}`; } +export async function loadPromptRefImages(params: { + prompt: string; + workspaceDir: string; +}): Promise { + const refs = detectImageReferences(params.prompt); + if (refs.length === 0) { + return []; + } + + const seen = new Set(); + const images: ImageContent[] = []; + for (const ref of refs) { + const key = `${ref.type}:${ref.resolved}`; + if (seen.has(key)) { + continue; + } + seen.add(key); + const image = await loadImageFromRef(ref, params.workspaceDir); + if (image) { + images.push(image); + } + } + return images; +} + export async function writeCliImages( images: ImageContent[], ): Promise<{ paths: string[]; cleanup: () => Promise }> {