diff --git a/src/agents/skills-install-download.ts b/src/agents/skills-install-download.ts index e184a3d804d..a8c77e1f4c7 100644 --- a/src/agents/skills-install-download.ts +++ b/src/agents/skills-install-download.ts @@ -1,15 +1,14 @@ +import { createHash } from "node:crypto"; import fs from "node:fs"; import path from "node:path"; import { Readable } from "node:stream"; import { pipeline } from "node:stream/promises"; import type { ReadableStream as NodeReadableStream } from "node:stream/web"; +import { isWindowsDrivePath } from "../infra/archive-path.js"; import { - isWindowsDrivePath, - resolveArchiveOutputPath, - stripArchivePath, - validateArchiveEntryPath, -} from "../infra/archive-path.js"; -import { extractArchive as extractArchiveSafe } from "../infra/archive.js"; + createTarEntrySafetyChecker, + extractArchive as extractArchiveSafe, +} from "../infra/archive.js"; import { fetchWithSsrFGuard } from "../infra/net/fetch-guard.js"; import { isWithinDir } from "../infra/path-safety.js"; import { runCommandWithTimeout } from "../process/exec.js"; @@ -63,6 +62,101 @@ function resolveArchiveType(spec: SkillInstallSpec, filename: string): string | return undefined; } +const TAR_VERBOSE_MONTHS = new Set([ + "Jan", + "Feb", + "Mar", + "Apr", + "May", + "Jun", + "Jul", + "Aug", + "Sep", + "Oct", + "Nov", + "Dec", +]); +const ISO_DATE_PATTERN = /^\d{4}-\d{2}-\d{2}$/; + +function mapTarVerboseTypeChar(typeChar: string): string { + switch (typeChar) { + case "l": + return "SymbolicLink"; + case "h": + return "Link"; + case "b": + return "BlockDevice"; + case "c": + return "CharacterDevice"; + case "p": + return "FIFO"; + case "s": + return "Socket"; + case "d": + return "Directory"; + default: + return "File"; + } +} + +function parseTarVerboseSize(line: string): number { + const tokens = line.trim().split(/\s+/).filter(Boolean); + if (tokens.length < 6) { + throw new Error(`unable to parse tar verbose metadata: ${line}`); + } + + let dateIndex = tokens.findIndex((token) => TAR_VERBOSE_MONTHS.has(token)); + if (dateIndex > 0) { + const size = Number.parseInt(tokens[dateIndex - 1] ?? "", 10); + if (!Number.isFinite(size) || size < 0) { + throw new Error(`unable to parse tar entry size: ${line}`); + } + return size; + } + + dateIndex = tokens.findIndex((token) => ISO_DATE_PATTERN.test(token)); + if (dateIndex > 0) { + const size = Number.parseInt(tokens[dateIndex - 1] ?? "", 10); + if (!Number.isFinite(size) || size < 0) { + throw new Error(`unable to parse tar entry size: ${line}`); + } + return size; + } + + throw new Error(`unable to parse tar verbose metadata: ${line}`); +} + +function parseTarVerboseMetadata(stdout: string): Array<{ type: string; size: number }> { + const lines = stdout + .split("\n") + .map((line) => line.trim()) + .filter(Boolean); + return lines.map((line) => { + const typeChar = line[0] ?? ""; + if (!typeChar) { + throw new Error("unable to parse tar entry type"); + } + return { + type: mapTarVerboseTypeChar(typeChar), + size: parseTarVerboseSize(line), + }; + }); +} + +async function hashFileSha256(filePath: string): Promise { + const hash = createHash("sha256"); + const stream = fs.createReadStream(filePath); + return await new Promise((resolve, reject) => { + stream.on("data", (chunk) => { + hash.update(chunk as Buffer); + }); + stream.on("error", reject); + stream.on("end", () => { + resolve(hash.digest("hex")); + }); + }); +} + async function downloadFile( url: string, destPath: string, @@ -132,6 +226,8 @@ async function extractArchive(params: { return { stdout: "", stderr: "tar not found on PATH", code: null }; } + const preflightHash = await hashFileSha256(archivePath); + // Preflight list to prevent zip-slip style traversal before extraction. const listResult = await runCommandWithTimeout(["tar", "tf", archivePath], { timeoutMs }); if (listResult.code !== 0) { @@ -154,34 +250,43 @@ async function extractArchive(params: { code: verboseResult.code, }; } - for (const line of verboseResult.stdout.split("\n")) { - const trimmed = line.trim(); - if (!trimmed) { - continue; - } - const typeChar = trimmed[0]; - if (typeChar === "l" || typeChar === "h" || trimmed.includes(" -> ")) { + const metadata = parseTarVerboseMetadata(verboseResult.stdout); + if (metadata.length !== entries.length) { + return { + stdout: verboseResult.stdout, + stderr: `tar verbose/list entry count mismatch (${metadata.length} vs ${entries.length})`, + code: 1, + }; + } + const checkTarEntrySafety = createTarEntrySafetyChecker({ + rootDir: targetDir, + stripComponents: strip, + escapeLabel: "targetDir", + }); + for (let i = 0; i < entries.length; i += 1) { + const entryPath = entries[i]; + const entryMeta = metadata[i]; + if (!entryPath || !entryMeta) { return { stdout: verboseResult.stdout, - stderr: "tar archive contains link entries; refusing to extract for safety", + stderr: "tar metadata parse failure", code: 1, }; } + checkTarEntrySafety({ + path: entryPath, + type: entryMeta.type, + size: entryMeta.size, + }); } - for (const entry of entries) { - validateArchiveEntryPath(entry, { escapeLabel: "targetDir" }); - const relPath = stripArchivePath(entry, strip); - if (!relPath) { - continue; - } - validateArchiveEntryPath(relPath, { escapeLabel: "targetDir" }); - resolveArchiveOutputPath({ - rootDir: targetDir, - relPath, - originalPath: entry, - escapeLabel: "targetDir", - }); + const postPreflightHash = await hashFileSha256(archivePath); + if (postPreflightHash !== preflightHash) { + return { + stdout: "", + stderr: "tar archive changed during safety preflight; refusing to extract", + code: 1, + }; } const argv = ["tar", "xf", archivePath, "-C", targetDir]; diff --git a/src/agents/skills-install.download.test.ts b/src/agents/skills-install.download.test.ts index 1eaf1cf147c..2f17248f24f 100644 --- a/src/agents/skills-install.download.test.ts +++ b/src/agents/skills-install.download.test.ts @@ -260,13 +260,35 @@ describe("installDownloadSpec extraction safety (tar.bz2)", () => { label: "rejects archives containing symlinks", name: "tbz2-symlink", url: "https://example.invalid/evil.tbz2", - listOutput: "link\nlink/pwned.txt\n", + listOutput: "link\n", verboseListOutput: "lrwxr-xr-x 0 0 0 0 Jan 1 00:00 link -> ../outside\n", extract: "reject" as const, expectedOk: false, expectedExtract: false, expectedStderrSubstring: "link", }, + { + label: "rejects archives containing FIFO entries", + name: "tbz2-fifo", + url: "https://example.invalid/evil.tbz2", + listOutput: "evil-fifo\n", + verboseListOutput: "prw-r--r-- 0 0 0 0 Jan 1 00:00 evil-fifo\n", + extract: "reject" as const, + expectedOk: false, + expectedExtract: false, + expectedStderrSubstring: "link", + }, + { + label: "rejects oversized extracted entries", + name: "tbz2-oversized", + url: "https://example.invalid/oversized.tbz2", + listOutput: "big.bin\n", + verboseListOutput: "-rw-r--r-- 0 0 0 314572800 Jan 1 00:00 big.bin\n", + extract: "reject" as const, + expectedOk: false, + expectedExtract: false, + expectedStderrSubstring: "archive entry extracted size exceeds limit", + }, { label: "extracts safe archives with stripComponents", name: "tbz2-ok", @@ -322,4 +344,44 @@ describe("installDownloadSpec extraction safety (tar.bz2)", () => { } } }); + + it("rejects tar.bz2 archives that change after preflight", async () => { + const entry = buildEntry("tbz2-preflight-change"); + const targetDir = path.join(resolveSkillToolsRootDir(entry), "target"); + const commandCallCount = runCommandWithTimeoutMock.mock.calls.length; + + mockArchiveResponse(new Uint8Array([1, 2, 3])); + + runCommandWithTimeoutMock.mockImplementation(async (...argv: unknown[]) => { + const cmd = (argv[0] ?? []) as string[]; + if (cmd[0] === "tar" && cmd[1] === "tf") { + return runCommandResult({ stdout: "package/hello.txt\n" }); + } + if (cmd[0] === "tar" && cmd[1] === "tvf") { + const archivePath = String(cmd[2] ?? ""); + if (archivePath) { + await fs.appendFile(archivePath, "mutated"); + } + return runCommandResult({ stdout: "-rw-r--r-- 0 0 0 0 Jan 1 00:00 package/hello.txt\n" }); + } + if (cmd[0] === "tar" && cmd[1] === "xf") { + throw new Error("should not extract"); + } + return runCommandResult(); + }); + + const result = await installDownloadSkill({ + name: "tbz2-preflight-change", + url: "https://example.invalid/change.tbz2", + archive: "tar.bz2", + targetDir, + }); + + expect(result.ok).toBe(false); + expect(result.stderr).toContain("changed during safety preflight"); + const extractionAttempted = runCommandWithTimeoutMock.mock.calls + .slice(commandCallCount) + .some((call) => (call[0] as string[])[1] === "xf"); + expect(extractionAttempted).toBe(false); + }); }); diff --git a/src/infra/archive.test.ts b/src/infra/archive.test.ts index 6b25d430c6a..bb790bdb585 100644 --- a/src/infra/archive.test.ts +++ b/src/infra/archive.test.ts @@ -176,23 +176,30 @@ describe("archive utils", () => { }, ); - it("rejects archives that exceed archive size budget", async () => { - await withArchiveCase("zip", async ({ archivePath, extractDir }) => { - const zip = new JSZip(); - zip.file("package/file.txt", "ok"); - await fs.writeFile(archivePath, await zip.generateAsync({ type: "nodebuffer" })); - const stat = await fs.stat(archivePath); - - await expect( - extractArchive({ + it.each([{ ext: "zip" as const }, { ext: "tar" as const }])( + "rejects $ext archives that exceed archive size budget", + async ({ ext }) => { + await withArchiveCase(ext, async ({ workDir, archivePath, extractDir }) => { + await writePackageArchive({ + ext, + workDir, archivePath, - destDir: extractDir, - timeoutMs: 5_000, - limits: { maxArchiveBytes: Math.max(1, stat.size - 1) }, - }), - ).rejects.toThrow("archive size exceeds limit"); - }); - }); + fileName: "file.txt", + content: "ok", + }); + const stat = await fs.stat(archivePath); + + await expect( + extractArchive({ + archivePath, + destDir: extractDir, + timeoutMs: 5_000, + limits: { maxArchiveBytes: Math.max(1, stat.size - 1) }, + }), + ).rejects.toThrow("archive size exceeds limit"); + }); + }, + ); it("fails resolvePackedRootDir when extract dir has multiple root dirs", async () => { const workDir = await makeTempDir("packed-root"); diff --git a/src/infra/archive.ts b/src/infra/archive.ts index 0fba579768a..c64afbb6251 100644 --- a/src/infra/archive.ts +++ b/src/infra/archive.ts @@ -21,8 +21,7 @@ export type ArchiveLogger = { export type ArchiveExtractLimits = { /** - * Max archive file bytes (compressed). Primarily protects zip extraction - * because we currently read the whole archive into memory for parsing. + * Max archive file bytes (compressed). */ maxArchiveBytes?: number; /** Max number of extracted entries (files + dirs). */ @@ -457,7 +456,16 @@ async function extractZip(params: { } } -type TarEntryInfo = { path: string; type: string; size: number }; +export type TarEntryInfo = { path: string; type: string; size: number }; + +const BLOCKED_TAR_ENTRY_TYPES = new Set([ + "SymbolicLink", + "Link", + "BlockDevice", + "CharacterDevice", + "FIFO", + "Socket", +]); function readTarEntryInfo(entry: unknown): TarEntryInfo { const p = @@ -479,6 +487,42 @@ function readTarEntryInfo(entry: unknown): TarEntryInfo { return { path: p, type: t, size: s }; } +export function createTarEntrySafetyChecker(params: { + rootDir: string; + stripComponents?: number; + limits?: ArchiveExtractLimits; + escapeLabel?: string; +}): (entry: TarEntryInfo) => void { + const strip = Math.max(0, Math.floor(params.stripComponents ?? 0)); + const limits = resolveExtractLimits(params.limits); + let entryCount = 0; + const budget = createByteBudgetTracker(limits); + + return (entry: TarEntryInfo) => { + validateArchiveEntryPath(entry.path, { escapeLabel: params.escapeLabel }); + + const relPath = stripArchivePath(entry.path, strip); + if (!relPath) { + return; + } + validateArchiveEntryPath(relPath, { escapeLabel: params.escapeLabel }); + resolveArchiveOutputPath({ + rootDir: params.rootDir, + relPath, + originalPath: entry.path, + escapeLabel: params.escapeLabel, + }); + + if (BLOCKED_TAR_ENTRY_TYPES.has(entry.type)) { + throw new Error(`tar entry is a link: ${entry.path}`); + } + + entryCount += 1; + assertArchiveEntryCountWithinLimit(entryCount, limits); + budget.addEntrySize(entry.size); + }; +} + export async function extractArchive(params: { archivePath: string; destDir: string; @@ -496,49 +540,28 @@ export async function extractArchive(params: { const label = kind === "zip" ? "extract zip" : "extract tar"; if (kind === "tar") { - const strip = Math.max(0, Math.floor(params.stripComponents ?? 0)); const limits = resolveExtractLimits(params.limits); - let entryCount = 0; - const budget = createByteBudgetTracker(limits); + const stat = await fs.stat(params.archivePath); + if (stat.size > limits.maxArchiveBytes) { + throw new Error(ERROR_ARCHIVE_SIZE_EXCEEDS_LIMIT); + } + + const checkTarEntrySafety = createTarEntrySafetyChecker({ + rootDir: params.destDir, + stripComponents: params.stripComponents, + limits, + }); await withTimeout( tar.x({ file: params.archivePath, cwd: params.destDir, - strip, + strip: Math.max(0, Math.floor(params.stripComponents ?? 0)), gzip: params.tarGzip, preservePaths: false, strict: true, onReadEntry(entry) { - const info = readTarEntryInfo(entry); - try { - validateArchiveEntryPath(info.path); - - const relPath = stripArchivePath(info.path, strip); - if (!relPath) { - return; - } - validateArchiveEntryPath(relPath); - resolveArchiveOutputPath({ - rootDir: params.destDir, - relPath, - originalPath: info.path, - }); - - if ( - info.type === "SymbolicLink" || - info.type === "Link" || - info.type === "BlockDevice" || - info.type === "CharacterDevice" || - info.type === "FIFO" || - info.type === "Socket" - ) { - throw new Error(`tar entry is a link: ${info.path}`); - } - - entryCount += 1; - assertArchiveEntryCountWithinLimit(entryCount, limits); - budget.addEntrySize(info.size); + checkTarEntrySafety(readTarEntryInfo(entry)); } catch (err) { const error = err instanceof Error ? err : new Error(String(err)); // Node's EventEmitter calls listeners with `this` bound to the