import { randomUUID } from "node:crypto"; import { constants as fsConstants } from "node:fs"; import type { Stats } from "node:fs"; import type { FileHandle } from "node:fs/promises"; import fs from "node:fs/promises"; import path from "node:path"; import { Readable, Transform } from "node:stream"; import { pipeline } from "node:stream/promises"; import JSZip from "jszip"; import * as tar from "tar"; import { resolveArchiveOutputPath, stripArchivePath, validateArchiveEntryPath, } from "./archive-path.js"; import { createArchiveSymlinkTraversalError, mergeExtractedTreeIntoDestination, prepareArchiveDestinationDir, prepareArchiveOutputPath, withStagedArchiveDestination, } from "./archive-staging.js"; import { sameFileIdentity } from "./file-identity.js"; import { openFileWithinRoot, openWritableFileWithinRoot, SafeOpenError } from "./fs-safe.js"; import { isNotFoundPathError } from "./path-guards.js"; export type ArchiveKind = "tar" | "zip"; export type ArchiveLogger = { info?: (message: string) => void; warn?: (message: string) => void; }; export type ArchiveExtractLimits = { /** * Max archive file bytes (compressed). */ maxArchiveBytes?: number; /** Max number of extracted entries (files + dirs). */ maxEntries?: number; /** Max extracted bytes (sum of all files). */ maxExtractedBytes?: number; /** Max extracted bytes for a single file entry. */ maxEntryBytes?: number; }; export { ArchiveSecurityError, type ArchiveSecurityErrorCode } from "./archive-staging.js"; export { mergeExtractedTreeIntoDestination, prepareArchiveDestinationDir, prepareArchiveOutputPath, withStagedArchiveDestination, } from "./archive-staging.js"; /** @internal */ export const DEFAULT_MAX_ARCHIVE_BYTES_ZIP = 256 * 1024 * 1024; /** @internal */ export const DEFAULT_MAX_ENTRIES = 50_000; /** @internal */ export const DEFAULT_MAX_EXTRACTED_BYTES = 512 * 1024 * 1024; /** @internal */ export const DEFAULT_MAX_ENTRY_BYTES = 256 * 1024 * 1024; const ERROR_ARCHIVE_SIZE_EXCEEDS_LIMIT = "archive size exceeds limit"; const ERROR_ARCHIVE_ENTRY_COUNT_EXCEEDS_LIMIT = "archive entry count exceeds limit"; const ERROR_ARCHIVE_ENTRY_EXTRACTED_SIZE_EXCEEDS_LIMIT = "archive entry extracted size exceeds limit"; const ERROR_ARCHIVE_EXTRACTED_SIZE_EXCEEDS_LIMIT = "archive extracted size exceeds limit"; const SUPPORTS_NOFOLLOW = process.platform !== "win32" && "O_NOFOLLOW" in fsConstants; const OPEN_WRITE_CREATE_FLAGS = fsConstants.O_WRONLY | fsConstants.O_CREAT | fsConstants.O_EXCL | (SUPPORTS_NOFOLLOW ? fsConstants.O_NOFOLLOW : 0); const TAR_SUFFIXES = [".tgz", ".tar.gz", ".tar"]; export function resolveArchiveKind(filePath: string): ArchiveKind | null { const lower = filePath.toLowerCase(); if (lower.endsWith(".zip")) { return "zip"; } if (TAR_SUFFIXES.some((suffix) => lower.endsWith(suffix))) { return "tar"; } return null; } export async function resolvePackedRootDir(extractDir: string): Promise { const direct = path.join(extractDir, "package"); try { const stat = await fs.stat(direct); if (stat.isDirectory()) { return direct; } } catch { // ignore } const entries = await fs.readdir(extractDir, { withFileTypes: true }); const dirs = entries.filter((entry) => entry.isDirectory()).map((entry) => entry.name); if (dirs.length !== 1) { throw new Error(`unexpected archive layout (dirs: ${dirs.join(", ")})`); } const onlyDir = dirs[0]; if (!onlyDir) { throw new Error("unexpected archive layout (no package dir found)"); } return path.join(extractDir, onlyDir); } export async function withTimeout( promise: Promise, timeoutMs: number, label: string, ): Promise { let timeoutId: ReturnType | undefined; try { return await Promise.race([ promise, new Promise((_, reject) => { timeoutId = setTimeout( () => reject(new Error(`${label} timed out after ${timeoutMs}ms`)), timeoutMs, ); }), ]); } finally { if (timeoutId) { clearTimeout(timeoutId); } } } type ResolvedArchiveExtractLimits = Required; function clampLimit(value: number | undefined): number | undefined { if (typeof value !== "number" || !Number.isFinite(value)) { return undefined; } const v = Math.floor(value); return v > 0 ? v : undefined; } function resolveExtractLimits(limits?: ArchiveExtractLimits): ResolvedArchiveExtractLimits { // Defaults: defensive, but should not break normal installs. return { maxArchiveBytes: clampLimit(limits?.maxArchiveBytes) ?? DEFAULT_MAX_ARCHIVE_BYTES_ZIP, maxEntries: clampLimit(limits?.maxEntries) ?? DEFAULT_MAX_ENTRIES, maxExtractedBytes: clampLimit(limits?.maxExtractedBytes) ?? DEFAULT_MAX_EXTRACTED_BYTES, maxEntryBytes: clampLimit(limits?.maxEntryBytes) ?? DEFAULT_MAX_ENTRY_BYTES, }; } function assertArchiveEntryCountWithinLimit( entryCount: number, limits: ResolvedArchiveExtractLimits, ) { if (entryCount > limits.maxEntries) { throw new Error(ERROR_ARCHIVE_ENTRY_COUNT_EXCEEDS_LIMIT); } } function createByteBudgetTracker(limits: ResolvedArchiveExtractLimits): { startEntry: () => void; addBytes: (bytes: number) => void; addEntrySize: (size: number) => void; } { let entryBytes = 0; let extractedBytes = 0; const addBytes = (bytes: number) => { const b = Math.max(0, Math.floor(bytes)); if (b === 0) { return; } entryBytes += b; if (entryBytes > limits.maxEntryBytes) { throw new Error(ERROR_ARCHIVE_ENTRY_EXTRACTED_SIZE_EXCEEDS_LIMIT); } extractedBytes += b; if (extractedBytes > limits.maxExtractedBytes) { throw new Error(ERROR_ARCHIVE_EXTRACTED_SIZE_EXCEEDS_LIMIT); } }; return { startEntry() { entryBytes = 0; }, addBytes, addEntrySize(size: number) { const s = Math.max(0, Math.floor(size)); if (s > limits.maxEntryBytes) { throw new Error(ERROR_ARCHIVE_ENTRY_EXTRACTED_SIZE_EXCEEDS_LIMIT); } // Note: tar budgets are based on the header-declared size. addBytes(s); }, }; } function createExtractBudgetTransform(params: { onChunkBytes: (bytes: number) => void; }): Transform { return new Transform({ transform(chunk, _encoding, callback) { try { const buf = chunk instanceof Buffer ? chunk : Buffer.from(chunk as Uint8Array); params.onChunkBytes(buf.byteLength); callback(null, buf); } catch (err) { callback(err instanceof Error ? err : new Error(String(err))); } }, }); } function symlinkTraversalError(originalPath: string) { return createArchiveSymlinkTraversalError(originalPath); } type OpenZipOutputFileResult = { handle: FileHandle; createdForWrite: boolean; openedRealPath: string; openedStat: Stats; }; async function openZipOutputFile(params: { relPath: string; originalPath: string; destinationRealDir: string; }): Promise { try { return await openWritableFileWithinRoot({ rootDir: params.destinationRealDir, relativePath: params.relPath, mkdir: false, mode: 0o666, }); } catch (err) { if ( err instanceof SafeOpenError && (err.code === "invalid-path" || err.code === "outside-workspace" || err.code === "path-mismatch") ) { throw symlinkTraversalError(params.originalPath); } throw err; } } async function cleanupPartialRegularFile(filePath: string): Promise { let stat: Awaited>; try { stat = await fs.lstat(filePath); } catch (err) { if (isNotFoundPathError(err)) { return; } throw err; } if (stat.isFile()) { await fs.unlink(filePath).catch(() => undefined); } } function buildArchiveAtomicTempPath(targetPath: string): string { return path.join( path.dirname(targetPath), `.${path.basename(targetPath)}.${process.pid}.${randomUUID()}.tmp`, ); } async function verifyZipWriteResult(params: { destinationRealDir: string; relPath: string; expectedStat: Stats; }): Promise { const opened = await openFileWithinRoot({ rootDir: params.destinationRealDir, relativePath: params.relPath, rejectHardlinks: true, }); try { if (!sameFileIdentity(opened.stat, params.expectedStat)) { throw new SafeOpenError("path-mismatch", "path changed during zip extract"); } return opened.realPath; } finally { await opened.handle.close().catch(() => undefined); } } type ZipEntry = { name: string; dir: boolean; unixPermissions?: number; nodeStream?: () => NodeJS.ReadableStream; async: (type: "nodebuffer") => Promise; }; type ZipExtractBudget = ReturnType; async function readZipEntryStream(entry: ZipEntry): Promise { if (typeof entry.nodeStream === "function") { return entry.nodeStream(); } // Old JSZip: fall back to buffering, but still extract via a stream. const buf = await entry.async("nodebuffer"); return Readable.from(buf); } function resolveZipOutputPath(params: { entryPath: string; strip: number; destinationDir: string; }): { relPath: string; outPath: string } | null { validateArchiveEntryPath(params.entryPath); const relPath = stripArchivePath(params.entryPath, params.strip); if (!relPath) { return null; } validateArchiveEntryPath(relPath); return { relPath, outPath: resolveArchiveOutputPath({ rootDir: params.destinationDir, relPath, originalPath: params.entryPath, }), }; } async function prepareZipOutputPath(params: { destinationDir: string; destinationRealDir: string; relPath: string; outPath: string; originalPath: string; isDirectory: boolean; }): Promise { await prepareArchiveOutputPath(params); } async function writeZipFileEntry(params: { entry: ZipEntry; relPath: string; destinationRealDir: string; budget: ZipExtractBudget; }): Promise { const opened = await openZipOutputFile({ relPath: params.relPath, originalPath: params.entry.name, destinationRealDir: params.destinationRealDir, }); params.budget.startEntry(); const readable = await readZipEntryStream(params.entry); const destinationPath = opened.openedRealPath; const targetMode = opened.openedStat.mode & 0o777; await opened.handle.close().catch(() => undefined); let tempHandle: FileHandle | null = null; let tempPath: string | null = null; let tempStat: Stats | null = null; let handleClosedByStream = false; try { tempPath = buildArchiveAtomicTempPath(destinationPath); tempHandle = await fs.open(tempPath, OPEN_WRITE_CREATE_FLAGS, targetMode || 0o666); const writable = tempHandle.createWriteStream(); writable.once("close", () => { handleClosedByStream = true; }); await pipeline( readable, createExtractBudgetTransform({ onChunkBytes: params.budget.addBytes }), writable, ); tempStat = await fs.stat(tempPath); if (!tempStat) { throw new Error("zip temp write did not produce file metadata"); } if (!handleClosedByStream) { await tempHandle.close().catch(() => undefined); handleClosedByStream = true; } tempHandle = null; await fs.rename(tempPath, destinationPath); tempPath = null; const verifiedPath = await verifyZipWriteResult({ destinationRealDir: params.destinationRealDir, relPath: params.relPath, expectedStat: tempStat, }); // Best-effort permission restore for zip entries created on unix. if (typeof params.entry.unixPermissions === "number") { const mode = params.entry.unixPermissions & 0o777; if (mode !== 0) { await fs.chmod(verifiedPath, mode).catch(() => undefined); } } } catch (err) { if (tempPath) { await fs.rm(tempPath, { force: true }).catch(() => undefined); } else { await cleanupPartialRegularFile(destinationPath).catch(() => undefined); } if (err instanceof SafeOpenError) { throw symlinkTraversalError(params.entry.name); } throw err; } finally { if (tempHandle && !handleClosedByStream) { await tempHandle.close().catch(() => undefined); } } } async function extractZip(params: { archivePath: string; destDir: string; stripComponents?: number; limits?: ArchiveExtractLimits; }): Promise { const limits = resolveExtractLimits(params.limits); const destinationRealDir = await prepareArchiveDestinationDir(params.destDir); const stat = await fs.stat(params.archivePath); if (stat.size > limits.maxArchiveBytes) { throw new Error(ERROR_ARCHIVE_SIZE_EXCEEDS_LIMIT); } const buffer = await fs.readFile(params.archivePath); const zip = await JSZip.loadAsync(buffer); const entries = Object.values(zip.files) as ZipEntry[]; const strip = Math.max(0, Math.floor(params.stripComponents ?? 0)); assertArchiveEntryCountWithinLimit(entries.length, limits); const budget = createByteBudgetTracker(limits); for (const entry of entries) { const output = resolveZipOutputPath({ entryPath: entry.name, strip, destinationDir: params.destDir, }); if (!output) { continue; } await prepareZipOutputPath({ destinationDir: params.destDir, destinationRealDir, relPath: output.relPath, outPath: output.outPath, originalPath: entry.name, isDirectory: entry.dir, }); if (entry.dir) { continue; } await writeZipFileEntry({ entry, relPath: output.relPath, destinationRealDir, budget, }); } } export type TarEntryInfo = { path: string; type: string; size: number }; const BLOCKED_TAR_ENTRY_TYPES = new Set([ "SymbolicLink", "Link", "BlockDevice", "CharacterDevice", "FIFO", "Socket", ]); function readTarEntryInfo(entry: unknown): TarEntryInfo { const p = typeof entry === "object" && entry !== null && "path" in entry ? String((entry as { path: unknown }).path) : ""; const t = typeof entry === "object" && entry !== null && "type" in entry ? String((entry as { type: unknown }).type) : ""; const s = typeof entry === "object" && entry !== null && "size" in entry && typeof (entry as { size?: unknown }).size === "number" && Number.isFinite((entry as { size: number }).size) ? Math.max(0, Math.floor((entry as { size: number }).size)) : 0; return { path: p, type: t, size: s }; } export function createTarEntryPreflightChecker(params: { rootDir: string; stripComponents?: number; limits?: ArchiveExtractLimits; escapeLabel?: string; }): (entry: TarEntryInfo) => void { const strip = Math.max(0, Math.floor(params.stripComponents ?? 0)); const limits = resolveExtractLimits(params.limits); let entryCount = 0; const budget = createByteBudgetTracker(limits); return (entry: TarEntryInfo) => { validateArchiveEntryPath(entry.path, { escapeLabel: params.escapeLabel }); const relPath = stripArchivePath(entry.path, strip); if (!relPath) { return; } validateArchiveEntryPath(relPath, { escapeLabel: params.escapeLabel }); resolveArchiveOutputPath({ rootDir: params.rootDir, relPath, originalPath: entry.path, escapeLabel: params.escapeLabel, }); if (BLOCKED_TAR_ENTRY_TYPES.has(entry.type)) { throw new Error(`tar entry is a link: ${entry.path}`); } entryCount += 1; assertArchiveEntryCountWithinLimit(entryCount, limits); budget.addEntrySize(entry.size); }; } export async function extractArchive(params: { archivePath: string; destDir: string; timeoutMs: number; kind?: ArchiveKind; stripComponents?: number; tarGzip?: boolean; limits?: ArchiveExtractLimits; logger?: ArchiveLogger; }): Promise { const kind = params.kind ?? resolveArchiveKind(params.archivePath); if (!kind) { throw new Error(`unsupported archive: ${params.archivePath}`); } const label = kind === "zip" ? "extract zip" : "extract tar"; if (kind === "tar") { await withTimeout( (async () => { const limits = resolveExtractLimits(params.limits); const stat = await fs.stat(params.archivePath); if (stat.size > limits.maxArchiveBytes) { throw new Error(ERROR_ARCHIVE_SIZE_EXCEEDS_LIMIT); } const destinationRealDir = await prepareArchiveDestinationDir(params.destDir); await withStagedArchiveDestination({ destinationRealDir, run: async (stagingDir) => { const checkTarEntrySafety = createTarEntryPreflightChecker({ rootDir: destinationRealDir, stripComponents: params.stripComponents, limits, }); // A canonical cwd is not enough here: tar can still follow // pre-existing child symlinks in the live destination tree. // Extract into a private staging dir first, then merge through // the same safe-open boundary checks used by direct file writes. await tar.x({ file: params.archivePath, cwd: stagingDir, strip: Math.max(0, Math.floor(params.stripComponents ?? 0)), gzip: params.tarGzip, preservePaths: false, strict: true, onReadEntry(entry) { try { checkTarEntrySafety(readTarEntryInfo(entry)); } catch (err) { const error = err instanceof Error ? err : new Error(String(err)); // Node's EventEmitter calls listeners with `this` bound to the // emitter (tar.Unpack), which exposes Parser.abort(). const emitter = this as unknown as { abort?: (error: Error) => void }; emitter.abort?.(error); } }, }); await mergeExtractedTreeIntoDestination({ sourceDir: stagingDir, destinationDir: destinationRealDir, destinationRealDir, }); }, }); })(), params.timeoutMs, label, ); return; } await withTimeout( extractZip({ archivePath: params.archivePath, destDir: params.destDir, stripComponents: params.stripComponents, limits: params.limits, }), params.timeoutMs, label, ); } export async function fileExists(filePath: string): Promise { try { await fs.stat(filePath); return true; } catch { return false; } } export async function readJsonFile(filePath: string): Promise { const raw = await fs.readFile(filePath, "utf-8"); return JSON.parse(raw) as T; }