perf(security): cache scanner directory walks

This commit is contained in:
Peter Steinberger 2026-03-02 20:18:35 +00:00
parent f7765bc151
commit 2f352306fe
2 changed files with 212 additions and 20 deletions

View File

@ -4,6 +4,7 @@ import os from "node:os";
import path from "node:path";
import { afterEach, describe, expect, it, vi } from "vitest";
import {
clearSkillScanCacheForTest,
isScannable,
scanDirectory,
scanDirectoryWithSummary,
@ -27,6 +28,7 @@ afterEach(async () => {
await fs.rm(dir, { recursive: true, force: true }).catch(() => {});
}
tmpDirs.length = 0;
clearSkillScanCacheForTest();
});
// ---------------------------------------------------------------------------
@ -342,4 +344,37 @@ describe("scanDirectoryWithSummary", () => {
spy.mockRestore();
}
});
it("reuses cached findings for unchanged files and invalidates on file updates", async () => {
const root = makeTmpDir();
const filePath = path.join(root, "cached.js");
fsSync.writeFileSync(filePath, `const x = eval("1+1");`);
const readSpy = vi.spyOn(fs, "readFile");
const first = await scanDirectoryWithSummary(root);
const second = await scanDirectoryWithSummary(root);
expect(first.critical).toBeGreaterThan(0);
expect(second.critical).toBe(first.critical);
expect(readSpy).toHaveBeenCalledTimes(1);
await fs.writeFile(filePath, `const x = eval("2+2");`, "utf-8");
const third = await scanDirectoryWithSummary(root);
expect(third.critical).toBeGreaterThan(0);
expect(readSpy).toHaveBeenCalledTimes(2);
readSpy.mockRestore();
});
it("reuses cached directory listings for unchanged trees", async () => {
const root = makeTmpDir();
fsSync.writeFileSync(path.join(root, "cached.js"), `export const ok = true;`);
const readdirSpy = vi.spyOn(fs, "readdir");
await scanDirectoryWithSummary(root);
await scanDirectoryWithSummary(root);
expect(readdirSpy).toHaveBeenCalledTimes(1);
readdirSpy.mockRestore();
});
});

View File

@ -49,11 +49,78 @@ const SCANNABLE_EXTENSIONS = new Set([
const DEFAULT_MAX_SCAN_FILES = 500;
const DEFAULT_MAX_FILE_BYTES = 1024 * 1024;
const FILE_SCAN_CACHE_MAX = 5000;
const DIR_ENTRY_CACHE_MAX = 5000;
type FileScanCacheEntry = {
size: number;
mtimeMs: number;
maxFileBytes: number;
scanned: boolean;
findings: SkillScanFinding[];
};
const FILE_SCAN_CACHE = new Map<string, FileScanCacheEntry>();
type CachedDirEntry = {
name: string;
kind: "file" | "dir";
};
type DirEntryCacheEntry = {
mtimeMs: number;
entries: CachedDirEntry[];
};
const DIR_ENTRY_CACHE = new Map<string, DirEntryCacheEntry>();
export function isScannable(filePath: string): boolean {
return SCANNABLE_EXTENSIONS.has(path.extname(filePath).toLowerCase());
}
function getCachedFileScanResult(params: {
filePath: string;
size: number;
mtimeMs: number;
maxFileBytes: number;
}): FileScanCacheEntry | undefined {
const cached = FILE_SCAN_CACHE.get(params.filePath);
if (!cached) {
return undefined;
}
if (
cached.size !== params.size ||
cached.mtimeMs !== params.mtimeMs ||
cached.maxFileBytes !== params.maxFileBytes
) {
FILE_SCAN_CACHE.delete(params.filePath);
return undefined;
}
return cached;
}
function setCachedFileScanResult(filePath: string, entry: FileScanCacheEntry): void {
if (FILE_SCAN_CACHE.size >= FILE_SCAN_CACHE_MAX) {
const oldest = FILE_SCAN_CACHE.keys().next();
if (!oldest.done) {
FILE_SCAN_CACHE.delete(oldest.value);
}
}
FILE_SCAN_CACHE.set(filePath, entry);
}
function setCachedDirEntries(dirPath: string, entry: DirEntryCacheEntry): void {
if (DIR_ENTRY_CACHE.size >= DIR_ENTRY_CACHE_MAX) {
const oldest = DIR_ENTRY_CACHE.keys().next();
if (!oldest.done) {
DIR_ENTRY_CACHE.delete(oldest.value);
}
}
DIR_ENTRY_CACHE.set(dirPath, entry);
}
export function clearSkillScanCacheForTest(): void {
FILE_SCAN_CACHE.clear();
DIR_ENTRY_CACHE.clear();
}
// ---------------------------------------------------------------------------
// Rule definitions
// ---------------------------------------------------------------------------
@ -263,7 +330,7 @@ async function walkDirWithLimit(dirPath: string, maxFiles: number): Promise<stri
break;
}
const entries = await fs.readdir(currentDir, { withFileTypes: true });
const entries = await readDirEntriesWithCache(currentDir);
for (const entry of entries) {
if (files.length >= maxFiles) {
break;
@ -274,9 +341,9 @@ async function walkDirWithLimit(dirPath: string, maxFiles: number): Promise<stri
}
const fullPath = path.join(currentDir, entry.name);
if (entry.isDirectory()) {
if (entry.kind === "dir") {
stack.push(fullPath);
} else if (isScannable(entry.name)) {
} else if (entry.kind === "file" && isScannable(entry.name)) {
files.push(fullPath);
}
}
@ -285,6 +352,41 @@ async function walkDirWithLimit(dirPath: string, maxFiles: number): Promise<stri
return files;
}
async function readDirEntriesWithCache(dirPath: string): Promise<CachedDirEntry[]> {
let st: Awaited<ReturnType<typeof fs.stat>> | null = null;
try {
st = await fs.stat(dirPath);
} catch (err) {
if (hasErrnoCode(err, "ENOENT")) {
return [];
}
throw err;
}
if (!st?.isDirectory()) {
return [];
}
const cached = DIR_ENTRY_CACHE.get(dirPath);
if (cached && cached.mtimeMs === st.mtimeMs) {
return cached.entries;
}
const dirents = await fs.readdir(dirPath, { withFileTypes: true });
const entries: CachedDirEntry[] = [];
for (const entry of dirents) {
if (entry.isDirectory()) {
entries.push({ name: entry.name, kind: "dir" });
} else if (entry.isFile()) {
entries.push({ name: entry.name, kind: "file" });
}
}
setCachedDirEntries(dirPath, {
mtimeMs: st.mtimeMs,
entries,
});
return entries;
}
async function resolveForcedFiles(params: {
rootDir: string;
includeFiles: string[];
@ -354,27 +456,66 @@ async function collectScannableFiles(dirPath: string, opts: Required<SkillScanOp
return out;
}
async function readScannableSource(filePath: string, maxFileBytes: number): Promise<string | null> {
async function scanFileWithCache(params: {
filePath: string;
maxFileBytes: number;
}): Promise<{ scanned: boolean; findings: SkillScanFinding[] }> {
const { filePath, maxFileBytes } = params;
let st: Awaited<ReturnType<typeof fs.stat>> | null = null;
try {
st = await fs.stat(filePath);
} catch (err) {
if (hasErrnoCode(err, "ENOENT")) {
return null;
return { scanned: false, findings: [] };
}
throw err;
}
if (!st?.isFile() || st.size > maxFileBytes) {
return null;
if (!st?.isFile()) {
return { scanned: false, findings: [] };
}
const cached = getCachedFileScanResult({
filePath,
size: st.size,
mtimeMs: st.mtimeMs,
maxFileBytes,
});
if (cached) {
return {
scanned: cached.scanned,
findings: cached.findings,
};
}
if (st.size > maxFileBytes) {
const skippedEntry: FileScanCacheEntry = {
size: st.size,
mtimeMs: st.mtimeMs,
maxFileBytes,
scanned: false,
findings: [],
};
setCachedFileScanResult(filePath, skippedEntry);
return { scanned: false, findings: [] };
}
let source: string;
try {
return await fs.readFile(filePath, "utf-8");
source = await fs.readFile(filePath, "utf-8");
} catch (err) {
if (hasErrnoCode(err, "ENOENT")) {
return null;
return { scanned: false, findings: [] };
}
throw err;
}
const findings = scanSource(source, filePath);
setCachedFileScanResult(filePath, {
size: st.size,
mtimeMs: st.mtimeMs,
maxFileBytes,
scanned: true,
findings,
});
return { scanned: true, findings };
}
export async function scanDirectory(
@ -386,12 +527,14 @@ export async function scanDirectory(
const allFindings: SkillScanFinding[] = [];
for (const file of files) {
const source = await readScannableSource(file, scanOptions.maxFileBytes);
if (source == null) {
const scanResult = await scanFileWithCache({
filePath: file,
maxFileBytes: scanOptions.maxFileBytes,
});
if (!scanResult.scanned) {
continue;
}
const findings = scanSource(source, file);
allFindings.push(...findings);
allFindings.push(...scanResult.findings);
}
return allFindings;
@ -405,22 +548,36 @@ export async function scanDirectoryWithSummary(
const files = await collectScannableFiles(dirPath, scanOptions);
const allFindings: SkillScanFinding[] = [];
let scannedFiles = 0;
let critical = 0;
let warn = 0;
let info = 0;
for (const file of files) {
const source = await readScannableSource(file, scanOptions.maxFileBytes);
if (source == null) {
const scanResult = await scanFileWithCache({
filePath: file,
maxFileBytes: scanOptions.maxFileBytes,
});
if (!scanResult.scanned) {
continue;
}
scannedFiles += 1;
const findings = scanSource(source, file);
allFindings.push(...findings);
for (const finding of scanResult.findings) {
allFindings.push(finding);
if (finding.severity === "critical") {
critical += 1;
} else if (finding.severity === "warn") {
warn += 1;
} else {
info += 1;
}
}
}
return {
scannedFiles,
critical: allFindings.filter((f) => f.severity === "critical").length,
warn: allFindings.filter((f) => f.severity === "warn").length,
info: allFindings.filter((f) => f.severity === "info").length,
critical,
warn,
info,
findings: allFindings,
};
}