Compare commits
2 Commits
main
...
fix/media-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
520d2129be | ||
|
|
fc6682d525 |
@ -24,6 +24,7 @@ Docs: https://docs.openclaw.ai
|
|||||||
- BlueBubbles: include sender identity in group chat envelopes and pass clean message text to the agent prompt, aligning with iMessage/Signal formatting. (#16210) Thanks @zerone0x.
|
- BlueBubbles: include sender identity in group chat envelopes and pass clean message text to the agent prompt, aligning with iMessage/Signal formatting. (#16210) Thanks @zerone0x.
|
||||||
- WhatsApp: honor per-account `dmPolicy` overrides (account-level settings now take precedence over channel defaults for inbound DMs). (#10082) Thanks @mcaxtr.
|
- WhatsApp: honor per-account `dmPolicy` overrides (account-level settings now take precedence over channel defaults for inbound DMs). (#10082) Thanks @mcaxtr.
|
||||||
- Media: accept `MEDIA:`-prefixed paths (lenient whitespace) when loading outbound media to prevent `ENOENT` for tool-returned local media paths. (#13107) Thanks @mcaxtr.
|
- Media: accept `MEDIA:`-prefixed paths (lenient whitespace) when loading outbound media to prevent `ENOENT` for tool-returned local media paths. (#13107) Thanks @mcaxtr.
|
||||||
|
- Agents/Image tool: allow workspace-local image paths by including the active workspace directory in local media allowlists, and trust sandbox-validated paths in image loaders to prevent false "not under an allowed directory" rejections. (#15541)
|
||||||
- Cron/Slack: preserve agent identity (name and icon) when cron jobs deliver outbound messages. (#16242) Thanks @robbyczgw-cla.
|
- Cron/Slack: preserve agent identity (name and icon) when cron jobs deliver outbound messages. (#16242) Thanks @robbyczgw-cla.
|
||||||
- Cron: prevent `cron list`/`cron status` from silently skipping past-due recurring jobs by using maintenance recompute semantics. (#16156) Thanks @zerone0x.
|
- Cron: prevent `cron list`/`cron status` from silently skipping past-due recurring jobs by using maintenance recompute semantics. (#16156) Thanks @zerone0x.
|
||||||
- Cron: repair missing/corrupt `nextRunAtMs` for the updated job without globally recomputing unrelated due jobs during `cron update`. (#15750)
|
- Cron: repair missing/corrupt `nextRunAtMs` for the updated job without globally recomputing unrelated due jobs during `cron update`. (#15750)
|
||||||
|
|||||||
@ -261,4 +261,24 @@ describe("CallManager", () => {
|
|||||||
|
|
||||||
expect(manager.getCallByProviderCallId("provider-exact")).toBeDefined();
|
expect(manager.getCallByProviderCallId("provider-exact")).toBeDefined();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("persists the webhook replay ledger across restarts", () => {
|
||||||
|
const config = VoiceCallConfigSchema.parse({
|
||||||
|
enabled: true,
|
||||||
|
provider: "plivo",
|
||||||
|
fromNumber: "+15550000000",
|
||||||
|
});
|
||||||
|
|
||||||
|
const now = Date.now();
|
||||||
|
const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`);
|
||||||
|
const manager = new CallManager(config, storePath);
|
||||||
|
manager.initialize(new FakeProvider(), "https://example.com/voice/webhook");
|
||||||
|
manager.rememberWebhookReplay("plivo:replay-key", 60_000, now);
|
||||||
|
|
||||||
|
const restarted = new CallManager(config, storePath);
|
||||||
|
restarted.initialize(new FakeProvider(), "https://example.com/voice/webhook");
|
||||||
|
|
||||||
|
expect(restarted.isRecentWebhookReplay("plivo:replay-key", now + 1)).toBe(true);
|
||||||
|
expect(restarted.isRecentWebhookReplay("plivo:replay-key", now + 60_001)).toBe(false);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@ -15,6 +15,7 @@ import {
|
|||||||
speakInitialMessage as speakInitialMessageWithContext,
|
speakInitialMessage as speakInitialMessageWithContext,
|
||||||
} from "./manager/outbound.js";
|
} from "./manager/outbound.js";
|
||||||
import { getCallHistoryFromStore, loadActiveCallsFromStore } from "./manager/store.js";
|
import { getCallHistoryFromStore, loadActiveCallsFromStore } from "./manager/store.js";
|
||||||
|
import { loadReplayLedger, persistReplayLedgerEntry, pruneReplayLedger } from "./replay-ledger.js";
|
||||||
import { resolveUserPath } from "./utils.js";
|
import { resolveUserPath } from "./utils.js";
|
||||||
|
|
||||||
function resolveDefaultStoreBase(config: VoiceCallConfig, storePath?: string): string {
|
function resolveDefaultStoreBase(config: VoiceCallConfig, storePath?: string): string {
|
||||||
@ -43,6 +44,7 @@ export class CallManager {
|
|||||||
private providerCallIdMap = new Map<string, CallId>();
|
private providerCallIdMap = new Map<string, CallId>();
|
||||||
private processedEventIds = new Set<string>();
|
private processedEventIds = new Set<string>();
|
||||||
private rejectedProviderCallIds = new Set<string>();
|
private rejectedProviderCallIds = new Set<string>();
|
||||||
|
private webhookReplayLedger = new Map<string, number>();
|
||||||
private provider: VoiceCallProvider | null = null;
|
private provider: VoiceCallProvider | null = null;
|
||||||
private config: VoiceCallConfig;
|
private config: VoiceCallConfig;
|
||||||
private storePath: string;
|
private storePath: string;
|
||||||
@ -76,6 +78,7 @@ export class CallManager {
|
|||||||
this.providerCallIdMap = persisted.providerCallIdMap;
|
this.providerCallIdMap = persisted.providerCallIdMap;
|
||||||
this.processedEventIds = persisted.processedEventIds;
|
this.processedEventIds = persisted.processedEventIds;
|
||||||
this.rejectedProviderCallIds = persisted.rejectedProviderCallIds;
|
this.rejectedProviderCallIds = persisted.rejectedProviderCallIds;
|
||||||
|
this.webhookReplayLedger = loadReplayLedger(this.storePath);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -198,6 +201,30 @@ export class CallManager {
|
|||||||
return Array.from(this.activeCalls.values());
|
return Array.from(this.activeCalls.values());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
isRecentWebhookReplay(key: string, now = Date.now()): boolean {
|
||||||
|
this.pruneWebhookReplayLedger(now);
|
||||||
|
const expiresAt = this.webhookReplayLedger.get(key);
|
||||||
|
return typeof expiresAt === "number" && expiresAt > now;
|
||||||
|
}
|
||||||
|
|
||||||
|
rememberWebhookReplay(key: string, ttlMs: number, now = Date.now()): void {
|
||||||
|
if (!Number.isFinite(ttlMs) || ttlMs <= 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
this.pruneWebhookReplayLedger(now);
|
||||||
|
const expiresAt = now + ttlMs;
|
||||||
|
const previous = this.webhookReplayLedger.get(key);
|
||||||
|
if (typeof previous === "number" && previous >= expiresAt) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
this.webhookReplayLedger.set(key, expiresAt);
|
||||||
|
persistReplayLedgerEntry(this.storePath, { key, expiresAt });
|
||||||
|
}
|
||||||
|
|
||||||
|
private pruneWebhookReplayLedger(now = Date.now()): void {
|
||||||
|
pruneReplayLedger(this.webhookReplayLedger, now);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get call history (from persisted logs).
|
* Get call history (from persisted logs).
|
||||||
*/
|
*/
|
||||||
|
|||||||
93
extensions/voice-call/src/replay-ledger.ts
Normal file
93
extensions/voice-call/src/replay-ledger.ts
Normal file
@ -0,0 +1,93 @@
|
|||||||
|
import crypto from "node:crypto";
|
||||||
|
import fs from "node:fs";
|
||||||
|
import path from "node:path";
|
||||||
|
import type { ProviderName, WebhookContext } from "./types.js";
|
||||||
|
|
||||||
|
const REPLAY_LEDGER_FILE = "webhook-replay.jsonl";
|
||||||
|
|
||||||
|
type ReplayLedgerRecord = {
|
||||||
|
key: string;
|
||||||
|
expiresAt: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
export function buildWebhookReplayKey(params: {
|
||||||
|
provider: ProviderName;
|
||||||
|
ctx: WebhookContext;
|
||||||
|
}): string {
|
||||||
|
const url = new URL(params.ctx.url);
|
||||||
|
const sortedQuery = Array.from(url.searchParams.entries())
|
||||||
|
.sort(([aKey, aValue], [bKey, bValue]) => {
|
||||||
|
if (aKey === bKey) {
|
||||||
|
return aValue.localeCompare(bValue);
|
||||||
|
}
|
||||||
|
return aKey.localeCompare(bKey);
|
||||||
|
})
|
||||||
|
.map(([key, value]) => `${key}=${value}`)
|
||||||
|
.join("&");
|
||||||
|
const digest = crypto
|
||||||
|
.createHash("sha256")
|
||||||
|
.update(params.provider)
|
||||||
|
.update("\n")
|
||||||
|
.update(params.ctx.method)
|
||||||
|
.update("\n")
|
||||||
|
.update(url.pathname)
|
||||||
|
.update("\n")
|
||||||
|
.update(sortedQuery)
|
||||||
|
.update("\n")
|
||||||
|
.update(params.ctx.rawBody)
|
||||||
|
.digest("hex");
|
||||||
|
return `${params.provider}:${digest}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function loadReplayLedger(storePath: string, now = Date.now()): Map<string, number> {
|
||||||
|
const logPath = path.join(storePath, REPLAY_LEDGER_FILE);
|
||||||
|
if (!fs.existsSync(logPath)) {
|
||||||
|
return new Map();
|
||||||
|
}
|
||||||
|
|
||||||
|
const ledger = new Map<string, number>();
|
||||||
|
const lines = fs.readFileSync(logPath, "utf-8").split("\n");
|
||||||
|
for (const line of lines) {
|
||||||
|
if (!line.trim()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
const record = JSON.parse(line) as ReplayLedgerRecord;
|
||||||
|
if (
|
||||||
|
typeof record.key !== "string" ||
|
||||||
|
!record.key ||
|
||||||
|
typeof record.expiresAt !== "number" ||
|
||||||
|
!Number.isFinite(record.expiresAt) ||
|
||||||
|
record.expiresAt <= now
|
||||||
|
) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
ledger.set(record.key, record.expiresAt);
|
||||||
|
} catch {
|
||||||
|
// Ignore malformed lines.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ledger;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function pruneReplayLedger(ledger: Map<string, number>, now = Date.now()): void {
|
||||||
|
for (const [key, expiresAt] of ledger) {
|
||||||
|
if (expiresAt <= now) {
|
||||||
|
ledger.delete(key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export function persistReplayLedgerEntry(
|
||||||
|
storePath: string,
|
||||||
|
params: { key: string; expiresAt: number },
|
||||||
|
): void {
|
||||||
|
const logPath = path.join(storePath, REPLAY_LEDGER_FILE);
|
||||||
|
const line = `${JSON.stringify(params)}\n`;
|
||||||
|
try {
|
||||||
|
fs.appendFileSync(logPath, line);
|
||||||
|
} catch (err) {
|
||||||
|
console.error("[voice-call] Failed to persist replay ledger entry:", err);
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -15,8 +15,10 @@ import type { TwilioProvider } from "./providers/twilio.js";
|
|||||||
import type { NormalizedEvent, WebhookContext } from "./types.js";
|
import type { NormalizedEvent, WebhookContext } from "./types.js";
|
||||||
import { MediaStreamHandler } from "./media-stream.js";
|
import { MediaStreamHandler } from "./media-stream.js";
|
||||||
import { OpenAIRealtimeSTTProvider } from "./providers/stt-openai-realtime.js";
|
import { OpenAIRealtimeSTTProvider } from "./providers/stt-openai-realtime.js";
|
||||||
|
import { buildWebhookReplayKey } from "./replay-ledger.js";
|
||||||
|
|
||||||
const MAX_WEBHOOK_BODY_BYTES = 1024 * 1024;
|
const MAX_WEBHOOK_BODY_BYTES = 1024 * 1024;
|
||||||
|
const WEBHOOK_REPLAY_TTL_MS = 15 * 60_000;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* HTTP server for receiving voice call webhooks from providers.
|
* HTTP server for receiving voice call webhooks from providers.
|
||||||
@ -281,9 +283,25 @@ export class VoiceCallWebhookServer {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const replayKey = buildWebhookReplayKey({
|
||||||
|
provider: this.provider.name,
|
||||||
|
ctx,
|
||||||
|
});
|
||||||
|
if (this.manager.isRecentWebhookReplay(replayKey)) {
|
||||||
|
console.warn(`[voice-call] Dropping replayed ${this.provider.name} webhook request`);
|
||||||
|
const replayResult = this.provider.parseWebhookEvent(ctx);
|
||||||
|
this.sendProviderResponse(res, replayResult);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// Parse events
|
// Parse events
|
||||||
const result = this.provider.parseWebhookEvent(ctx);
|
const result = this.provider.parseWebhookEvent(ctx);
|
||||||
|
|
||||||
|
if ((result.statusCode ?? 200) >= 400) {
|
||||||
|
this.sendProviderResponse(res, result);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// Process each event
|
// Process each event
|
||||||
for (const event of result.events) {
|
for (const event of result.events) {
|
||||||
try {
|
try {
|
||||||
@ -293,16 +311,10 @@ export class VoiceCallWebhookServer {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
this.manager.rememberWebhookReplay(replayKey, WEBHOOK_REPLAY_TTL_MS);
|
||||||
|
|
||||||
// Send response
|
// Send response
|
||||||
res.statusCode = result.statusCode || 200;
|
this.sendProviderResponse(res, result);
|
||||||
|
|
||||||
if (result.providerResponseHeaders) {
|
|
||||||
for (const [key, value] of Object.entries(result.providerResponseHeaders)) {
|
|
||||||
res.setHeader(key, value);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
res.end(result.providerResponseBody || "OK");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -316,6 +328,23 @@ export class VoiceCallWebhookServer {
|
|||||||
return readRequestBodyWithLimit(req, { maxBytes, timeoutMs });
|
return readRequestBodyWithLimit(req, { maxBytes, timeoutMs });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private sendProviderResponse(
|
||||||
|
res: http.ServerResponse,
|
||||||
|
result: {
|
||||||
|
providerResponseBody?: string;
|
||||||
|
providerResponseHeaders?: Record<string, string>;
|
||||||
|
statusCode?: number;
|
||||||
|
},
|
||||||
|
): void {
|
||||||
|
res.statusCode = result.statusCode || 200;
|
||||||
|
if (result.providerResponseHeaders) {
|
||||||
|
for (const [key, value] of Object.entries(result.providerResponseHeaders)) {
|
||||||
|
res.setHeader(key, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
res.end(result.providerResponseBody || "OK");
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Handle auto-response for inbound calls using the agent system.
|
* Handle auto-response for inbound calls using the agent system.
|
||||||
* Supports tool calling for richer voice interactions.
|
* Supports tool calling for richer voice interactions.
|
||||||
|
|||||||
@ -108,6 +108,23 @@ describe("exec PATH login shell merge", () => {
|
|||||||
|
|
||||||
expect(shellPathMock).not.toHaveBeenCalled();
|
expect(shellPathMock).not.toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("throws security violation on sandbox fallback when env.PATH is provided", async () => {
|
||||||
|
if (isWin) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
process.env.PATH = "/usr/bin";
|
||||||
|
|
||||||
|
const { createExecTool } = await import("./bash-tools.exec.js");
|
||||||
|
const tool = createExecTool({ host: "sandbox", security: "full", ask: "off" });
|
||||||
|
|
||||||
|
await expect(
|
||||||
|
tool.execute("call1", {
|
||||||
|
command: "echo ok",
|
||||||
|
env: { PATH: "/explicit/bin" },
|
||||||
|
}),
|
||||||
|
).rejects.toThrow(/Security Violation: Custom 'PATH' variable is forbidden/);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("exec host env validation", () => {
|
describe("exec host env validation", () => {
|
||||||
|
|||||||
@ -293,10 +293,11 @@ export function createExecTool(
|
|||||||
}
|
}
|
||||||
|
|
||||||
const baseEnv = coerceEnv(process.env);
|
const baseEnv = coerceEnv(process.env);
|
||||||
|
const isHostExecution = !sandbox || host === "gateway" || host === "node";
|
||||||
|
|
||||||
// Logic: Sandbox gets raw env. Host (gateway/node) must pass validation.
|
// When sandboxing is disabled, host=sandbox still executes on the gateway host.
|
||||||
// We validate BEFORE merging to prevent any dangerous vars from entering the stream.
|
// Validate dangerous env overrides on every real host execution path.
|
||||||
if (host !== "sandbox" && params.env) {
|
if (isHostExecution && params.env) {
|
||||||
validateHostEnv(params.env);
|
validateHostEnv(params.env);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -64,6 +64,7 @@ export function createOpenClawTools(options?: {
|
|||||||
? createImageTool({
|
? createImageTool({
|
||||||
config: options?.config,
|
config: options?.config,
|
||||||
agentDir: options.agentDir,
|
agentDir: options.agentDir,
|
||||||
|
workspaceDir: options?.workspaceDir,
|
||||||
sandbox:
|
sandbox:
|
||||||
options?.sandboxRoot && options?.sandboxFsBridge
|
options?.sandboxRoot && options?.sandboxFsBridge
|
||||||
? { root: options.sandboxRoot, bridge: options.sandboxFsBridge }
|
? { root: options.sandboxRoot, bridge: options.sandboxFsBridge }
|
||||||
|
|||||||
@ -1,5 +1,14 @@
|
|||||||
|
import fs from "node:fs/promises";
|
||||||
|
import os from "node:os";
|
||||||
|
import path from "node:path";
|
||||||
import { describe, expect, it } from "vitest";
|
import { describe, expect, it } from "vitest";
|
||||||
import { detectAndLoadPromptImages, detectImageReferences, modelSupportsImages } from "./images.js";
|
import { createHostSandboxFsBridge } from "../../test-helpers/host-sandbox-fs-bridge.js";
|
||||||
|
import {
|
||||||
|
detectAndLoadPromptImages,
|
||||||
|
detectImageReferences,
|
||||||
|
loadImageFromRef,
|
||||||
|
modelSupportsImages,
|
||||||
|
} from "./images.js";
|
||||||
|
|
||||||
describe("detectImageReferences", () => {
|
describe("detectImageReferences", () => {
|
||||||
it("detects absolute file paths with common extensions", () => {
|
it("detects absolute file paths with common extensions", () => {
|
||||||
@ -196,6 +205,41 @@ describe("modelSupportsImages", () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe("loadImageFromRef", () => {
|
||||||
|
it("allows sandbox-validated host paths outside default media roots", async () => {
|
||||||
|
const sandboxParent = await fs.mkdtemp(path.join(os.homedir(), "openclaw-sandbox-image-"));
|
||||||
|
try {
|
||||||
|
const sandboxRoot = path.join(sandboxParent, "sandbox");
|
||||||
|
await fs.mkdir(sandboxRoot, { recursive: true });
|
||||||
|
const imagePath = path.join(sandboxRoot, "photo.png");
|
||||||
|
const pngB64 =
|
||||||
|
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/woAAn8B9FD5fHAAAAAASUVORK5CYII=";
|
||||||
|
await fs.writeFile(imagePath, Buffer.from(pngB64, "base64"));
|
||||||
|
|
||||||
|
const image = await loadImageFromRef(
|
||||||
|
{
|
||||||
|
raw: "./photo.png",
|
||||||
|
type: "path",
|
||||||
|
resolved: "./photo.png",
|
||||||
|
},
|
||||||
|
sandboxRoot,
|
||||||
|
{
|
||||||
|
sandbox: {
|
||||||
|
root: sandboxRoot,
|
||||||
|
bridge: createHostSandboxFsBridge(sandboxRoot),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(image).not.toBeNull();
|
||||||
|
expect(image?.type).toBe("image");
|
||||||
|
expect(image?.data.length).toBeGreaterThan(0);
|
||||||
|
} finally {
|
||||||
|
await fs.rm(sandboxParent, { recursive: true, force: true });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
describe("detectAndLoadPromptImages", () => {
|
describe("detectAndLoadPromptImages", () => {
|
||||||
it("returns no images for non-vision models even when existing images are provided", async () => {
|
it("returns no images for non-vision models even when existing images are provided", async () => {
|
||||||
const result = await detectAndLoadPromptImages({
|
const result = await detectAndLoadPromptImages({
|
||||||
|
|||||||
@ -211,6 +211,7 @@ export async function loadImageFromRef(
|
|||||||
const media = options?.sandbox
|
const media = options?.sandbox
|
||||||
? await loadWebMedia(targetPath, {
|
? await loadWebMedia(targetPath, {
|
||||||
maxBytes: options.maxBytes,
|
maxBytes: options.maxBytes,
|
||||||
|
localRoots: "any",
|
||||||
readFile: (filePath) =>
|
readFile: (filePath) =>
|
||||||
options.sandbox!.bridge.readFile({ filePath, cwd: options.sandbox!.root }),
|
options.sandbox!.bridge.readFile({ filePath, cwd: options.sandbox!.root }),
|
||||||
})
|
})
|
||||||
|
|||||||
@ -150,6 +150,75 @@ describe("image tool implicit imageModel config", () => {
|
|||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("allows workspace images outside default local media roots", async () => {
|
||||||
|
const workspaceParent = await fs.mkdtemp(
|
||||||
|
path.join(process.cwd(), ".openclaw-workspace-image-"),
|
||||||
|
);
|
||||||
|
try {
|
||||||
|
const workspaceDir = path.join(workspaceParent, "workspace");
|
||||||
|
await fs.mkdir(workspaceDir, { recursive: true });
|
||||||
|
const imagePath = path.join(workspaceDir, "photo.png");
|
||||||
|
const pngB64 =
|
||||||
|
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/woAAn8B9FD5fHAAAAAASUVORK5CYII=";
|
||||||
|
await fs.writeFile(imagePath, Buffer.from(pngB64, "base64"));
|
||||||
|
|
||||||
|
const fetch = vi.fn().mockResolvedValue({
|
||||||
|
ok: true,
|
||||||
|
status: 200,
|
||||||
|
statusText: "OK",
|
||||||
|
headers: new Headers(),
|
||||||
|
json: async () => ({
|
||||||
|
content: "ok",
|
||||||
|
base_resp: { status_code: 0, status_msg: "" },
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
// @ts-expect-error partial global
|
||||||
|
global.fetch = fetch;
|
||||||
|
vi.stubEnv("MINIMAX_API_KEY", "minimax-test");
|
||||||
|
|
||||||
|
const agentDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-image-"));
|
||||||
|
const cfg: OpenClawConfig = {
|
||||||
|
agents: {
|
||||||
|
defaults: {
|
||||||
|
model: { primary: "minimax/MiniMax-M2.1" },
|
||||||
|
imageModel: { primary: "minimax/MiniMax-VL-01" },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const withoutWorkspace = createImageTool({ config: cfg, agentDir });
|
||||||
|
expect(withoutWorkspace).not.toBeNull();
|
||||||
|
if (!withoutWorkspace) {
|
||||||
|
throw new Error("expected image tool");
|
||||||
|
}
|
||||||
|
await expect(
|
||||||
|
withoutWorkspace.execute("t0", {
|
||||||
|
prompt: "Describe the image.",
|
||||||
|
image: imagePath,
|
||||||
|
}),
|
||||||
|
).rejects.toThrow(/Local media path is not under an allowed directory/i);
|
||||||
|
|
||||||
|
const withWorkspace = createImageTool({ config: cfg, agentDir, workspaceDir });
|
||||||
|
expect(withWorkspace).not.toBeNull();
|
||||||
|
if (!withWorkspace) {
|
||||||
|
throw new Error("expected image tool");
|
||||||
|
}
|
||||||
|
|
||||||
|
await expect(
|
||||||
|
withWorkspace.execute("t1", {
|
||||||
|
prompt: "Describe the image.",
|
||||||
|
image: imagePath,
|
||||||
|
}),
|
||||||
|
).resolves.toMatchObject({
|
||||||
|
content: [{ type: "text", text: "ok" }],
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(fetch).toHaveBeenCalledTimes(1);
|
||||||
|
} finally {
|
||||||
|
await fs.rm(workspaceParent, { recursive: true, force: true });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
it("sandboxes image paths like the read tool", async () => {
|
it("sandboxes image paths like the read tool", async () => {
|
||||||
const stateDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-image-sandbox-"));
|
const stateDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-image-sandbox-"));
|
||||||
const agentDir = path.join(stateDir, "agent");
|
const agentDir = path.join(stateDir, "agent");
|
||||||
|
|||||||
@ -5,7 +5,7 @@ import type { OpenClawConfig } from "../../config/config.js";
|
|||||||
import type { SandboxFsBridge } from "../sandbox/fs-bridge.js";
|
import type { SandboxFsBridge } from "../sandbox/fs-bridge.js";
|
||||||
import type { AnyAgentTool } from "./common.js";
|
import type { AnyAgentTool } from "./common.js";
|
||||||
import { resolveUserPath } from "../../utils.js";
|
import { resolveUserPath } from "../../utils.js";
|
||||||
import { loadWebMedia } from "../../web/media.js";
|
import { getDefaultLocalRoots, loadWebMedia } from "../../web/media.js";
|
||||||
import { ensureAuthProfileStore, listProfilesForProvider } from "../auth-profiles.js";
|
import { ensureAuthProfileStore, listProfilesForProvider } from "../auth-profiles.js";
|
||||||
import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "../defaults.js";
|
import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "../defaults.js";
|
||||||
import { minimaxUnderstandImage } from "../minimax-vlm.js";
|
import { minimaxUnderstandImage } from "../minimax-vlm.js";
|
||||||
@ -325,6 +325,7 @@ async function runImagePrompt(params: {
|
|||||||
export function createImageTool(options?: {
|
export function createImageTool(options?: {
|
||||||
config?: OpenClawConfig;
|
config?: OpenClawConfig;
|
||||||
agentDir?: string;
|
agentDir?: string;
|
||||||
|
workspaceDir?: string;
|
||||||
sandbox?: ImageSandboxConfig;
|
sandbox?: ImageSandboxConfig;
|
||||||
/** If true, the model has native vision capability and images in the prompt are auto-injected */
|
/** If true, the model has native vision capability and images in the prompt are auto-injected */
|
||||||
modelHasVision?: boolean;
|
modelHasVision?: boolean;
|
||||||
@ -351,6 +352,19 @@ export function createImageTool(options?: {
|
|||||||
? "Analyze an image with a vision model. Only use this tool when the image was NOT already provided in the user's message. Images mentioned in the prompt are automatically visible to you."
|
? "Analyze an image with a vision model. Only use this tool when the image was NOT already provided in the user's message. Images mentioned in the prompt are automatically visible to you."
|
||||||
: "Analyze an image with the configured image model (agents.defaults.imageModel). Provide a prompt and image path or URL.";
|
: "Analyze an image with the configured image model (agents.defaults.imageModel). Provide a prompt and image path or URL.";
|
||||||
|
|
||||||
|
const localRoots = (() => {
|
||||||
|
const roots = getDefaultLocalRoots();
|
||||||
|
const workspaceDir = options?.workspaceDir?.trim();
|
||||||
|
if (!workspaceDir) {
|
||||||
|
return roots;
|
||||||
|
}
|
||||||
|
const normalized = workspaceDir.startsWith("~") ? resolveUserPath(workspaceDir) : workspaceDir;
|
||||||
|
if (!roots.includes(normalized)) {
|
||||||
|
roots.push(normalized);
|
||||||
|
}
|
||||||
|
return roots;
|
||||||
|
})();
|
||||||
|
|
||||||
return {
|
return {
|
||||||
label: "Image",
|
label: "Image",
|
||||||
name: "image",
|
name: "image",
|
||||||
@ -441,10 +455,14 @@ export function createImageTool(options?: {
|
|||||||
: sandboxConfig
|
: sandboxConfig
|
||||||
? await loadWebMedia(resolvedPath ?? resolvedImage, {
|
? await loadWebMedia(resolvedPath ?? resolvedImage, {
|
||||||
maxBytes,
|
maxBytes,
|
||||||
|
localRoots: "any",
|
||||||
readFile: (filePath) =>
|
readFile: (filePath) =>
|
||||||
sandboxConfig.bridge.readFile({ filePath, cwd: sandboxConfig.root }),
|
sandboxConfig.bridge.readFile({ filePath, cwd: sandboxConfig.root }),
|
||||||
})
|
})
|
||||||
: await loadWebMedia(resolvedPath ?? resolvedImage, maxBytes);
|
: await loadWebMedia(resolvedPath ?? resolvedImage, {
|
||||||
|
maxBytes,
|
||||||
|
localRoots,
|
||||||
|
});
|
||||||
if (media.kind !== "image") {
|
if (media.kind !== "image") {
|
||||||
throw new Error(`Unsupported media type: ${media.kind}`);
|
throw new Error(`Unsupported media type: ${media.kind}`);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -31,7 +31,7 @@ type WebMediaOptions = {
|
|||||||
readFile?: (filePath: string) => Promise<Buffer>;
|
readFile?: (filePath: string) => Promise<Buffer>;
|
||||||
};
|
};
|
||||||
|
|
||||||
function getDefaultLocalRoots(): string[] {
|
export function getDefaultLocalRoots(): string[] {
|
||||||
const home = os.homedir();
|
const home = os.homedir();
|
||||||
return [
|
return [
|
||||||
os.tmpdir(),
|
os.tmpdir(),
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user