From 2f352306fe083295035d1d92aa7b77c3f46070b1 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Mon, 2 Mar 2026 20:18:35 +0000 Subject: [PATCH] perf(security): cache scanner directory walks --- src/security/skill-scanner.test.ts | 35 +++++ src/security/skill-scanner.ts | 197 ++++++++++++++++++++++++++--- 2 files changed, 212 insertions(+), 20 deletions(-) diff --git a/src/security/skill-scanner.test.ts b/src/security/skill-scanner.test.ts index c27b0e32656..f16528aa006 100644 --- a/src/security/skill-scanner.test.ts +++ b/src/security/skill-scanner.test.ts @@ -4,6 +4,7 @@ import os from "node:os"; import path from "node:path"; import { afterEach, describe, expect, it, vi } from "vitest"; import { + clearSkillScanCacheForTest, isScannable, scanDirectory, scanDirectoryWithSummary, @@ -27,6 +28,7 @@ afterEach(async () => { await fs.rm(dir, { recursive: true, force: true }).catch(() => {}); } tmpDirs.length = 0; + clearSkillScanCacheForTest(); }); // --------------------------------------------------------------------------- @@ -342,4 +344,37 @@ describe("scanDirectoryWithSummary", () => { spy.mockRestore(); } }); + + it("reuses cached findings for unchanged files and invalidates on file updates", async () => { + const root = makeTmpDir(); + const filePath = path.join(root, "cached.js"); + fsSync.writeFileSync(filePath, `const x = eval("1+1");`); + + const readSpy = vi.spyOn(fs, "readFile"); + const first = await scanDirectoryWithSummary(root); + const second = await scanDirectoryWithSummary(root); + + expect(first.critical).toBeGreaterThan(0); + expect(second.critical).toBe(first.critical); + expect(readSpy).toHaveBeenCalledTimes(1); + + await fs.writeFile(filePath, `const x = eval("2+2");`, "utf-8"); + const third = await scanDirectoryWithSummary(root); + + expect(third.critical).toBeGreaterThan(0); + expect(readSpy).toHaveBeenCalledTimes(2); + readSpy.mockRestore(); + }); + + it("reuses cached directory listings for unchanged trees", async () => { + const root = makeTmpDir(); + fsSync.writeFileSync(path.join(root, "cached.js"), `export const ok = true;`); + + const readdirSpy = vi.spyOn(fs, "readdir"); + await scanDirectoryWithSummary(root); + await scanDirectoryWithSummary(root); + + expect(readdirSpy).toHaveBeenCalledTimes(1); + readdirSpy.mockRestore(); + }); }); diff --git a/src/security/skill-scanner.ts b/src/security/skill-scanner.ts index dd58e61bae8..18f87726f36 100644 --- a/src/security/skill-scanner.ts +++ b/src/security/skill-scanner.ts @@ -49,11 +49,78 @@ const SCANNABLE_EXTENSIONS = new Set([ const DEFAULT_MAX_SCAN_FILES = 500; const DEFAULT_MAX_FILE_BYTES = 1024 * 1024; +const FILE_SCAN_CACHE_MAX = 5000; +const DIR_ENTRY_CACHE_MAX = 5000; + +type FileScanCacheEntry = { + size: number; + mtimeMs: number; + maxFileBytes: number; + scanned: boolean; + findings: SkillScanFinding[]; +}; + +const FILE_SCAN_CACHE = new Map(); +type CachedDirEntry = { + name: string; + kind: "file" | "dir"; +}; +type DirEntryCacheEntry = { + mtimeMs: number; + entries: CachedDirEntry[]; +}; +const DIR_ENTRY_CACHE = new Map(); export function isScannable(filePath: string): boolean { return SCANNABLE_EXTENSIONS.has(path.extname(filePath).toLowerCase()); } +function getCachedFileScanResult(params: { + filePath: string; + size: number; + mtimeMs: number; + maxFileBytes: number; +}): FileScanCacheEntry | undefined { + const cached = FILE_SCAN_CACHE.get(params.filePath); + if (!cached) { + return undefined; + } + if ( + cached.size !== params.size || + cached.mtimeMs !== params.mtimeMs || + cached.maxFileBytes !== params.maxFileBytes + ) { + FILE_SCAN_CACHE.delete(params.filePath); + return undefined; + } + return cached; +} + +function setCachedFileScanResult(filePath: string, entry: FileScanCacheEntry): void { + if (FILE_SCAN_CACHE.size >= FILE_SCAN_CACHE_MAX) { + const oldest = FILE_SCAN_CACHE.keys().next(); + if (!oldest.done) { + FILE_SCAN_CACHE.delete(oldest.value); + } + } + FILE_SCAN_CACHE.set(filePath, entry); +} + +function setCachedDirEntries(dirPath: string, entry: DirEntryCacheEntry): void { + if (DIR_ENTRY_CACHE.size >= DIR_ENTRY_CACHE_MAX) { + const oldest = DIR_ENTRY_CACHE.keys().next(); + if (!oldest.done) { + DIR_ENTRY_CACHE.delete(oldest.value); + } + } + DIR_ENTRY_CACHE.set(dirPath, entry); +} + +export function clearSkillScanCacheForTest(): void { + FILE_SCAN_CACHE.clear(); + DIR_ENTRY_CACHE.clear(); +} + // --------------------------------------------------------------------------- // Rule definitions // --------------------------------------------------------------------------- @@ -263,7 +330,7 @@ async function walkDirWithLimit(dirPath: string, maxFiles: number): Promise= maxFiles) { break; @@ -274,9 +341,9 @@ async function walkDirWithLimit(dirPath: string, maxFiles: number): Promise { + let st: Awaited> | null = null; + try { + st = await fs.stat(dirPath); + } catch (err) { + if (hasErrnoCode(err, "ENOENT")) { + return []; + } + throw err; + } + if (!st?.isDirectory()) { + return []; + } + + const cached = DIR_ENTRY_CACHE.get(dirPath); + if (cached && cached.mtimeMs === st.mtimeMs) { + return cached.entries; + } + + const dirents = await fs.readdir(dirPath, { withFileTypes: true }); + const entries: CachedDirEntry[] = []; + for (const entry of dirents) { + if (entry.isDirectory()) { + entries.push({ name: entry.name, kind: "dir" }); + } else if (entry.isFile()) { + entries.push({ name: entry.name, kind: "file" }); + } + } + setCachedDirEntries(dirPath, { + mtimeMs: st.mtimeMs, + entries, + }); + return entries; +} + async function resolveForcedFiles(params: { rootDir: string; includeFiles: string[]; @@ -354,27 +456,66 @@ async function collectScannableFiles(dirPath: string, opts: Required { +async function scanFileWithCache(params: { + filePath: string; + maxFileBytes: number; +}): Promise<{ scanned: boolean; findings: SkillScanFinding[] }> { + const { filePath, maxFileBytes } = params; let st: Awaited> | null = null; try { st = await fs.stat(filePath); } catch (err) { if (hasErrnoCode(err, "ENOENT")) { - return null; + return { scanned: false, findings: [] }; } throw err; } - if (!st?.isFile() || st.size > maxFileBytes) { - return null; + if (!st?.isFile()) { + return { scanned: false, findings: [] }; } + const cached = getCachedFileScanResult({ + filePath, + size: st.size, + mtimeMs: st.mtimeMs, + maxFileBytes, + }); + if (cached) { + return { + scanned: cached.scanned, + findings: cached.findings, + }; + } + + if (st.size > maxFileBytes) { + const skippedEntry: FileScanCacheEntry = { + size: st.size, + mtimeMs: st.mtimeMs, + maxFileBytes, + scanned: false, + findings: [], + }; + setCachedFileScanResult(filePath, skippedEntry); + return { scanned: false, findings: [] }; + } + + let source: string; try { - return await fs.readFile(filePath, "utf-8"); + source = await fs.readFile(filePath, "utf-8"); } catch (err) { if (hasErrnoCode(err, "ENOENT")) { - return null; + return { scanned: false, findings: [] }; } throw err; } + const findings = scanSource(source, filePath); + setCachedFileScanResult(filePath, { + size: st.size, + mtimeMs: st.mtimeMs, + maxFileBytes, + scanned: true, + findings, + }); + return { scanned: true, findings }; } export async function scanDirectory( @@ -386,12 +527,14 @@ export async function scanDirectory( const allFindings: SkillScanFinding[] = []; for (const file of files) { - const source = await readScannableSource(file, scanOptions.maxFileBytes); - if (source == null) { + const scanResult = await scanFileWithCache({ + filePath: file, + maxFileBytes: scanOptions.maxFileBytes, + }); + if (!scanResult.scanned) { continue; } - const findings = scanSource(source, file); - allFindings.push(...findings); + allFindings.push(...scanResult.findings); } return allFindings; @@ -405,22 +548,36 @@ export async function scanDirectoryWithSummary( const files = await collectScannableFiles(dirPath, scanOptions); const allFindings: SkillScanFinding[] = []; let scannedFiles = 0; + let critical = 0; + let warn = 0; + let info = 0; for (const file of files) { - const source = await readScannableSource(file, scanOptions.maxFileBytes); - if (source == null) { + const scanResult = await scanFileWithCache({ + filePath: file, + maxFileBytes: scanOptions.maxFileBytes, + }); + if (!scanResult.scanned) { continue; } scannedFiles += 1; - const findings = scanSource(source, file); - allFindings.push(...findings); + for (const finding of scanResult.findings) { + allFindings.push(finding); + if (finding.severity === "critical") { + critical += 1; + } else if (finding.severity === "warn") { + warn += 1; + } else { + info += 1; + } + } } return { scannedFiles, - critical: allFindings.filter((f) => f.severity === "critical").length, - warn: allFindings.filter((f) => f.severity === "warn").length, - info: allFindings.filter((f) => f.severity === "info").length, + critical, + warn, + info, findings: allFindings, }; }