diff --git a/docs/help/testing.md b/docs/help/testing.md index b2057e8a1da..9fa1404a8d4 100644 --- a/docs/help/testing.md +++ b/docs/help/testing.md @@ -61,7 +61,7 @@ Think of the suites as “increasing realism” (and increasing flakiness/cost): - Command: `pnpm test:e2e` - Config: `vitest.e2e.config.ts` -- Files: `src/**/*.e2e.test.ts` +- Files: `src/**/*.e2e.test.ts`, `test/**/*.e2e.test.ts` - Runtime defaults: - Uses Vitest `vmForks` for faster file startup. - Uses adaptive workers (CI: 2-4, local: 4-8). @@ -77,6 +77,23 @@ Think of the suites as “increasing realism” (and increasing flakiness/cost): - No real keys required - More moving parts than unit tests (can be slower) +### E2E: OpenShell backend smoke + +- Command: `pnpm test:e2e:openshell` +- File: `test/openshell-sandbox.e2e.test.ts` +- Scope: + - Starts an isolated OpenShell gateway on the host via Docker + - Creates a sandbox from a temporary local Dockerfile + - Exercises OpenClaw's OpenShell backend over real `sandbox ssh-config` + SSH exec + - Verifies remote-canonical filesystem behavior through the sandbox fs bridge +- Expectations: + - Opt-in only; not part of the default `pnpm test:e2e` run + - Requires a local `openshell` CLI plus a working Docker daemon + - Uses isolated `HOME` / `XDG_CONFIG_HOME`, then destroys the test gateway and sandbox +- Useful overrides: + - `OPENCLAW_E2E_OPENSHELL=1` to enable the test when running the broader e2e suite manually + - `OPENCLAW_E2E_OPENSHELL_COMMAND=/path/to/openshell` to point at a non-default CLI binary or wrapper script + ### Live (real providers + real models) - Command: `pnpm test:live` diff --git a/package.json b/package.json index 6aa553f5302..124975e63d1 100644 --- a/package.json +++ b/package.json @@ -319,6 +319,7 @@ "test:docker:plugins": "bash scripts/e2e/plugins-docker.sh", "test:docker:qr": "bash scripts/e2e/qr-import-docker.sh", "test:e2e": "vitest run --config vitest.e2e.config.ts", + "test:e2e:openshell": "OPENCLAW_E2E_OPENSHELL=1 vitest run --config vitest.e2e.config.ts test/openshell-sandbox.e2e.test.ts", "test:extensions": "vitest run --config vitest.extensions.config.ts", "test:fast": "vitest run --config vitest.unit.config.ts", "test:force": "node --import tsx scripts/test-force.ts", diff --git a/test/openshell-sandbox.e2e.test.ts b/test/openshell-sandbox.e2e.test.ts new file mode 100644 index 00000000000..21824db38ee --- /dev/null +++ b/test/openshell-sandbox.e2e.test.ts @@ -0,0 +1,585 @@ +import { spawn } from "node:child_process"; +import fs from "node:fs/promises"; +import net from "node:net"; +import os from "node:os"; +import path from "node:path"; +import { describe, expect, it } from "vitest"; +import { createOpenShellSandboxBackendFactory } from "../extensions/openshell/src/backend.js"; +import { resolveOpenShellPluginConfig } from "../extensions/openshell/src/config.js"; +import { createSandboxTestContext } from "../src/agents/sandbox/test-fixtures.js"; + +const OPENCLAW_OPENSHELL_E2E = process.env.OPENCLAW_E2E_OPENSHELL === "1"; +const OPENCLAW_OPENSHELL_E2E_TIMEOUT_MS = 12 * 60_000; +const OPENCLAW_OPENSHELL_COMMAND = + process.env.OPENCLAW_E2E_OPENSHELL_COMMAND?.trim() || "openshell"; + +const CUSTOM_IMAGE_DOCKERFILE = `FROM python:3.13-slim + +RUN apt-get update && apt-get install -y --no-install-recommends \\ + coreutils \\ + curl \\ + findutils \\ + iproute2 \\ + && rm -rf /var/lib/apt/lists/* + +RUN groupadd -g 1000 sandbox && \\ + useradd -m -u 1000 -g sandbox sandbox + +RUN echo "openclaw-openshell-e2e" > /opt/openshell-e2e-marker.txt + +WORKDIR /sandbox +CMD ["sleep", "infinity"] +`; + +type ExecResult = { + code: number; + stdout: string; + stderr: string; +}; + +type HostPolicyServer = { + port: number; + close(): Promise; +}; + +async function runCommand(params: { + command: string; + args: string[]; + cwd?: string; + env?: NodeJS.ProcessEnv; + stdin?: string | Buffer; + allowFailure?: boolean; + timeoutMs?: number; +}): Promise { + return await new Promise((resolve, reject) => { + const child = spawn(params.command, params.args, { + cwd: params.cwd, + env: params.env, + stdio: ["pipe", "pipe", "pipe"], + }); + const stdoutChunks: Buffer[] = []; + const stderrChunks: Buffer[] = []; + let timedOut = false; + const timeout = + params.timeoutMs && params.timeoutMs > 0 + ? setTimeout(() => { + timedOut = true; + child.kill("SIGKILL"); + }, params.timeoutMs) + : null; + + child.stdout.on("data", (chunk) => stdoutChunks.push(Buffer.from(chunk))); + child.stderr.on("data", (chunk) => stderrChunks.push(Buffer.from(chunk))); + child.on("error", reject); + child.on("close", (code) => { + if (timeout) { + clearTimeout(timeout); + } + const stdout = Buffer.concat(stdoutChunks).toString("utf8"); + const stderr = Buffer.concat(stderrChunks).toString("utf8"); + if (timedOut) { + reject(new Error(`command timed out: ${params.command} ${params.args.join(" ")}`)); + return; + } + const exitCode = code ?? 0; + if (exitCode !== 0 && !params.allowFailure) { + reject( + new Error( + [ + `command failed: ${params.command} ${params.args.join(" ")}`, + `exit: ${exitCode}`, + stdout.trim() ? `stdout:\n${stdout}` : "", + stderr.trim() ? `stderr:\n${stderr}` : "", + ] + .filter(Boolean) + .join("\n"), + ), + ); + return; + } + resolve({ code: exitCode, stdout, stderr }); + }); + + child.stdin.end(params.stdin); + }); +} + +async function commandAvailable(command: string): Promise { + try { + const result = await runCommand({ + command, + args: ["--help"], + allowFailure: true, + timeoutMs: 20_000, + }); + return result.code === 0 || result.stdout.length > 0 || result.stderr.length > 0; + } catch { + return false; + } +} + +async function dockerReady(): Promise { + try { + const result = await runCommand({ + command: "docker", + args: ["version"], + allowFailure: true, + timeoutMs: 20_000, + }); + return result.code === 0; + } catch { + return false; + } +} + +async function allocatePort(): Promise { + return await new Promise((resolve, reject) => { + const server = net.createServer(); + server.on("error", reject); + server.listen(0, "127.0.0.1", () => { + const address = server.address(); + if (!address || typeof address === "string") { + server.close(() => reject(new Error("failed to allocate local port"))); + return; + } + const { port } = address; + server.close((error) => { + if (error) { + reject(error); + return; + } + resolve(port); + }); + }); + }); +} + +function openshellEnv(rootDir: string): NodeJS.ProcessEnv { + const homeDir = path.join(rootDir, "home"); + const xdgDir = path.join(rootDir, "xdg"); + const cacheDir = path.join(rootDir, "xdg-cache"); + return { + ...process.env, + HOME: homeDir, + XDG_CONFIG_HOME: xdgDir, + XDG_CACHE_HOME: cacheDir, + }; +} + +function trimTrailingNewline(value: string): string { + return value.replace(/\r?\n$/, ""); +} + +async function startHostPolicyServer(): Promise { + const port = await allocatePort(); + const responseBody = JSON.stringify({ ok: true, message: "hello-from-host" }); + const serverScript = `from http.server import BaseHTTPRequestHandler, HTTPServer +import os + +BODY = os.environ["RESPONSE_BODY"].encode() + +class Handler(BaseHTTPRequestHandler): + def do_GET(self): + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(BODY))) + self.end_headers() + self.wfile.write(BODY) + + def do_POST(self): + length = int(self.headers.get("Content-Length", "0")) + if length: + self.rfile.read(length) + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(BODY))) + self.end_headers() + self.wfile.write(BODY) + + def log_message(self, _format, *_args): + pass + +HTTPServer(("0.0.0.0", 8000), Handler).serve_forever() +`; + const startResult = await runCommand({ + command: "docker", + args: [ + "run", + "--detach", + "--rm", + "-e", + `RESPONSE_BODY=${responseBody}`, + "-p", + `${port}:8000`, + "python:3.13-alpine", + "python3", + "-c", + serverScript, + ], + timeoutMs: 60_000, + }); + const containerId = trimTrailingNewline(startResult.stdout.trim()); + if (!containerId) { + throw new Error("failed to start docker-backed host policy server"); + } + + const startedAt = Date.now(); + while (Date.now() - startedAt < 30_000) { + const readyResult = await runCommand({ + command: "docker", + args: [ + "exec", + containerId, + "python3", + "-c", + "import urllib.request; urllib.request.urlopen('http://127.0.0.1:8000', timeout=1).read()", + ], + allowFailure: true, + timeoutMs: 15_000, + }); + if (readyResult.code === 0) { + return { + port, + async close() { + await runCommand({ + command: "docker", + args: ["rm", "-f", containerId], + allowFailure: true, + timeoutMs: 30_000, + }); + }, + }; + } + await new Promise((resolve) => setTimeout(resolve, 500)); + } + + await runCommand({ + command: "docker", + args: ["rm", "-f", containerId], + allowFailure: true, + timeoutMs: 30_000, + }); + throw new Error("docker-backed host policy server did not become ready"); +} + +function buildOpenShellPolicyYaml(params: { port: number; binaryPath: string }): string { + const networkPolicies = ` host_echo: + name: host-echo + endpoints: + - host: host.openshell.internal + port: ${params.port} + allowed_ips: + - "0.0.0.0/0" + binaries: + - path: ${params.binaryPath}`; + return `version: 1 + +filesystem_policy: + include_workdir: true + read_only: [/usr, /lib, /proc, /dev/urandom, /app, /etc, /var/log] + read_write: [/sandbox, /tmp, /dev/null] + +landlock: + compatibility: best_effort + +process: + run_as_user: sandbox + run_as_group: sandbox + +network_policies: +${networkPolicies} +`; +} + +async function runBackendExec(params: { + backend: Awaited>>; + command: string; + allowFailure?: boolean; + timeoutMs?: number; +}): Promise { + const execSpec = await params.backend.buildExecSpec({ + command: params.command, + env: {}, + usePty: false, + }); + let result: ExecResult | null = null; + try { + result = await runCommand({ + command: execSpec.argv[0] ?? "ssh", + args: execSpec.argv.slice(1), + env: execSpec.env, + allowFailure: params.allowFailure, + timeoutMs: params.timeoutMs, + }); + return result; + } finally { + await params.backend.finalizeExec?.({ + status: result?.code === 0 ? "completed" : "failed", + exitCode: result?.code ?? 1, + timedOut: false, + token: execSpec.finalizeToken, + }); + } +} + +describe("openshell sandbox backend e2e", () => { + it.runIf(process.platform !== "win32" && OPENCLAW_OPENSHELL_E2E)( + "creates a remote-canonical sandbox through OpenShell and executes over SSH", + { timeout: OPENCLAW_OPENSHELL_E2E_TIMEOUT_MS }, + async () => { + if (!(await dockerReady())) { + return; + } + if (!(await commandAvailable(OPENCLAW_OPENSHELL_COMMAND))) { + return; + } + + const rootDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-openshell-e2e-")); + const env = openshellEnv(rootDir); + const previousHome = process.env.HOME; + const previousXdgConfigHome = process.env.XDG_CONFIG_HOME; + const previousXdgCacheHome = process.env.XDG_CACHE_HOME; + const workspaceDir = path.join(rootDir, "workspace"); + const dockerfileDir = path.join(rootDir, "custom-image"); + const dockerfilePath = path.join(dockerfileDir, "Dockerfile"); + const denyPolicyPath = path.join(rootDir, "deny-policy.yaml"); + const allowPolicyPath = path.join(rootDir, "allow-policy.yaml"); + const scopeSuffix = `${process.pid}-${Date.now()}`; + const gatewayName = `openclaw-e2e-${scopeSuffix}`; + const scopeKey = `session:openshell-e2e-deny:${scopeSuffix}`; + const allowSandboxName = `openclaw-policy-allow-${scopeSuffix}`; + const gatewayPort = await allocatePort(); + let hostPolicyServer: HostPolicyServer | null = null; + const sandboxCfg = { + mode: "all" as const, + backend: "openshell" as const, + scope: "session" as const, + workspaceAccess: "rw" as const, + workspaceRoot: path.join(rootDir, "sandboxes"), + docker: { + image: "openclaw-sandbox:bookworm-slim", + containerPrefix: "openclaw-sbx-", + workdir: "/workspace", + readOnlyRoot: true, + tmpfs: ["/tmp"], + network: "none", + capDrop: ["ALL"], + env: {}, + }, + ssh: { + command: "ssh", + workspaceRoot: "/tmp/openclaw-sandboxes", + strictHostKeyChecking: true, + updateHostKeys: true, + }, + browser: { + enabled: false, + image: "openclaw-browser", + containerPrefix: "openclaw-browser-", + network: "bridge", + cdpPort: 9222, + vncPort: 5900, + noVncPort: 6080, + headless: true, + enableNoVnc: false, + allowHostControl: false, + autoStart: false, + autoStartTimeoutMs: 1000, + }, + tools: { allow: [], deny: [] }, + prune: { idleHours: 24, maxAgeDays: 7 }, + }; + + const pluginConfig = resolveOpenShellPluginConfig({ + command: OPENCLAW_OPENSHELL_COMMAND, + gateway: gatewayName, + from: dockerfilePath, + mode: "remote", + autoProviders: false, + policy: denyPolicyPath, + }); + const backendFactory = createOpenShellSandboxBackendFactory({ pluginConfig }); + const backend = await backendFactory({ + sessionKey: scopeKey, + scopeKey, + workspaceDir, + agentWorkspaceDir: workspaceDir, + cfg: sandboxCfg, + }); + + try { + process.env.HOME = env.HOME; + process.env.XDG_CONFIG_HOME = env.XDG_CONFIG_HOME; + process.env.XDG_CACHE_HOME = env.XDG_CACHE_HOME; + hostPolicyServer = await startHostPolicyServer(); + if (!hostPolicyServer) { + throw new Error("failed to start host policy server"); + } + await fs.mkdir(workspaceDir, { recursive: true }); + await fs.mkdir(dockerfileDir, { recursive: true }); + await fs.writeFile(path.join(workspaceDir, "seed.txt"), "seed-from-local\n", "utf8"); + await fs.writeFile(dockerfilePath, CUSTOM_IMAGE_DOCKERFILE, "utf8"); + await fs.writeFile( + denyPolicyPath, + buildOpenShellPolicyYaml({ + port: hostPolicyServer.port, + binaryPath: "/usr/bin/false", + }), + "utf8", + ); + await fs.writeFile( + allowPolicyPath, + buildOpenShellPolicyYaml({ + port: hostPolicyServer.port, + binaryPath: "/**", + }), + "utf8", + ); + + await runCommand({ + command: OPENCLAW_OPENSHELL_COMMAND, + args: [ + "gateway", + "start", + "--name", + gatewayName, + "--port", + String(gatewayPort), + "--recreate", + ], + env, + timeoutMs: 8 * 60_000, + }); + + const execResult = await runBackendExec({ + backend, + command: "pwd && cat /opt/openshell-e2e-marker.txt && cat seed.txt", + timeoutMs: 2 * 60_000, + }); + + expect(execResult.code).toBe(0); + const stdout = execResult.stdout.trim(); + expect(stdout).toContain("/sandbox"); + expect(stdout).toContain("openclaw-openshell-e2e"); + expect(stdout).toContain("seed-from-local"); + + const curlPathResult = await runBackendExec({ + backend, + command: "command -v curl", + timeoutMs: 60_000, + }); + expect(trimTrailingNewline(curlPathResult.stdout.trim())).toMatch(/^\/.+\/curl$/); + + const sandbox = createSandboxTestContext({ + overrides: { + backendId: "openshell", + workspaceDir, + agentWorkspaceDir: workspaceDir, + runtimeId: backend.runtimeId, + runtimeLabel: backend.runtimeLabel, + containerName: backend.runtimeId, + containerWorkdir: backend.workdir, + backend, + }, + }); + const bridge = backend.createFsBridge?.({ sandbox }); + if (!bridge) { + throw new Error("openshell backend did not create a filesystem bridge"); + } + + await bridge.writeFile({ filePath: "nested/remote-only.txt", data: "hello-remote\n" }); + await expect( + fs.readFile(path.join(workspaceDir, "nested", "remote-only.txt"), "utf8"), + ).rejects.toThrow(); + await expect(bridge.readFile({ filePath: "nested/remote-only.txt" })).resolves.toEqual( + Buffer.from("hello-remote\n"), + ); + + const verifyResult = await runCommand({ + command: OPENCLAW_OPENSHELL_COMMAND, + args: ["sandbox", "ssh-config", backend.runtimeId], + env, + timeoutMs: 60_000, + }); + expect(verifyResult.code).toBe(0); + expect(trimTrailingNewline(verifyResult.stdout)).toContain("Host "); + + const blockedGetResult = await runBackendExec({ + backend, + command: `curl --fail --silent --show-error --max-time 15 "http://host.openshell.internal:${hostPolicyServer.port}/policy-test"`, + allowFailure: true, + timeoutMs: 60_000, + }); + expect(blockedGetResult.code).not.toBe(0); + expect(`${blockedGetResult.stdout}\n${blockedGetResult.stderr}`).toMatch(/403|deny/i); + + const allowedGetResult = await runCommand({ + command: OPENCLAW_OPENSHELL_COMMAND, + args: [ + "sandbox", + "create", + "--name", + allowSandboxName, + "--from", + dockerfilePath, + "--policy", + allowPolicyPath, + "--no-auto-providers", + "--no-keep", + "--", + "curl", + "--fail", + "--silent", + "--show-error", + "--max-time", + "15", + `http://host.openshell.internal:${hostPolicyServer.port}/policy-test`, + ], + env, + timeoutMs: 60_000, + }); + expect(allowedGetResult.code).toBe(0); + expect(allowedGetResult.stdout).toContain('"message":"hello-from-host"'); + } finally { + await runCommand({ + command: OPENCLAW_OPENSHELL_COMMAND, + args: ["sandbox", "delete", backend.runtimeId], + env, + allowFailure: true, + timeoutMs: 2 * 60_000, + }); + await runCommand({ + command: OPENCLAW_OPENSHELL_COMMAND, + args: ["sandbox", "delete", allowSandboxName], + env, + allowFailure: true, + timeoutMs: 2 * 60_000, + }); + await runCommand({ + command: OPENCLAW_OPENSHELL_COMMAND, + args: ["gateway", "destroy", "--name", gatewayName], + env, + allowFailure: true, + timeoutMs: 3 * 60_000, + }); + await hostPolicyServer?.close().catch(() => {}); + await fs.rm(rootDir, { recursive: true, force: true }); + if (previousHome === undefined) { + delete process.env.HOME; + } else { + process.env.HOME = previousHome; + } + if (previousXdgConfigHome === undefined) { + delete process.env.XDG_CONFIG_HOME; + } else { + process.env.XDG_CONFIG_HOME = previousXdgConfigHome; + } + if (previousXdgCacheHome === undefined) { + delete process.env.XDG_CACHE_HOME; + } else { + process.env.XDG_CACHE_HOME = previousXdgCacheHome; + } + } + }, + ); +});