perf(security): cache scanner directory walks
This commit is contained in:
parent
f7765bc151
commit
2f352306fe
@ -4,6 +4,7 @@ import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import {
|
||||
clearSkillScanCacheForTest,
|
||||
isScannable,
|
||||
scanDirectory,
|
||||
scanDirectoryWithSummary,
|
||||
@ -27,6 +28,7 @@ afterEach(async () => {
|
||||
await fs.rm(dir, { recursive: true, force: true }).catch(() => {});
|
||||
}
|
||||
tmpDirs.length = 0;
|
||||
clearSkillScanCacheForTest();
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
@ -342,4 +344,37 @@ describe("scanDirectoryWithSummary", () => {
|
||||
spy.mockRestore();
|
||||
}
|
||||
});
|
||||
|
||||
it("reuses cached findings for unchanged files and invalidates on file updates", async () => {
|
||||
const root = makeTmpDir();
|
||||
const filePath = path.join(root, "cached.js");
|
||||
fsSync.writeFileSync(filePath, `const x = eval("1+1");`);
|
||||
|
||||
const readSpy = vi.spyOn(fs, "readFile");
|
||||
const first = await scanDirectoryWithSummary(root);
|
||||
const second = await scanDirectoryWithSummary(root);
|
||||
|
||||
expect(first.critical).toBeGreaterThan(0);
|
||||
expect(second.critical).toBe(first.critical);
|
||||
expect(readSpy).toHaveBeenCalledTimes(1);
|
||||
|
||||
await fs.writeFile(filePath, `const x = eval("2+2");`, "utf-8");
|
||||
const third = await scanDirectoryWithSummary(root);
|
||||
|
||||
expect(third.critical).toBeGreaterThan(0);
|
||||
expect(readSpy).toHaveBeenCalledTimes(2);
|
||||
readSpy.mockRestore();
|
||||
});
|
||||
|
||||
it("reuses cached directory listings for unchanged trees", async () => {
|
||||
const root = makeTmpDir();
|
||||
fsSync.writeFileSync(path.join(root, "cached.js"), `export const ok = true;`);
|
||||
|
||||
const readdirSpy = vi.spyOn(fs, "readdir");
|
||||
await scanDirectoryWithSummary(root);
|
||||
await scanDirectoryWithSummary(root);
|
||||
|
||||
expect(readdirSpy).toHaveBeenCalledTimes(1);
|
||||
readdirSpy.mockRestore();
|
||||
});
|
||||
});
|
||||
|
||||
@ -49,11 +49,78 @@ const SCANNABLE_EXTENSIONS = new Set([
|
||||
|
||||
const DEFAULT_MAX_SCAN_FILES = 500;
|
||||
const DEFAULT_MAX_FILE_BYTES = 1024 * 1024;
|
||||
const FILE_SCAN_CACHE_MAX = 5000;
|
||||
const DIR_ENTRY_CACHE_MAX = 5000;
|
||||
|
||||
type FileScanCacheEntry = {
|
||||
size: number;
|
||||
mtimeMs: number;
|
||||
maxFileBytes: number;
|
||||
scanned: boolean;
|
||||
findings: SkillScanFinding[];
|
||||
};
|
||||
|
||||
const FILE_SCAN_CACHE = new Map<string, FileScanCacheEntry>();
|
||||
type CachedDirEntry = {
|
||||
name: string;
|
||||
kind: "file" | "dir";
|
||||
};
|
||||
type DirEntryCacheEntry = {
|
||||
mtimeMs: number;
|
||||
entries: CachedDirEntry[];
|
||||
};
|
||||
const DIR_ENTRY_CACHE = new Map<string, DirEntryCacheEntry>();
|
||||
|
||||
export function isScannable(filePath: string): boolean {
|
||||
return SCANNABLE_EXTENSIONS.has(path.extname(filePath).toLowerCase());
|
||||
}
|
||||
|
||||
function getCachedFileScanResult(params: {
|
||||
filePath: string;
|
||||
size: number;
|
||||
mtimeMs: number;
|
||||
maxFileBytes: number;
|
||||
}): FileScanCacheEntry | undefined {
|
||||
const cached = FILE_SCAN_CACHE.get(params.filePath);
|
||||
if (!cached) {
|
||||
return undefined;
|
||||
}
|
||||
if (
|
||||
cached.size !== params.size ||
|
||||
cached.mtimeMs !== params.mtimeMs ||
|
||||
cached.maxFileBytes !== params.maxFileBytes
|
||||
) {
|
||||
FILE_SCAN_CACHE.delete(params.filePath);
|
||||
return undefined;
|
||||
}
|
||||
return cached;
|
||||
}
|
||||
|
||||
function setCachedFileScanResult(filePath: string, entry: FileScanCacheEntry): void {
|
||||
if (FILE_SCAN_CACHE.size >= FILE_SCAN_CACHE_MAX) {
|
||||
const oldest = FILE_SCAN_CACHE.keys().next();
|
||||
if (!oldest.done) {
|
||||
FILE_SCAN_CACHE.delete(oldest.value);
|
||||
}
|
||||
}
|
||||
FILE_SCAN_CACHE.set(filePath, entry);
|
||||
}
|
||||
|
||||
function setCachedDirEntries(dirPath: string, entry: DirEntryCacheEntry): void {
|
||||
if (DIR_ENTRY_CACHE.size >= DIR_ENTRY_CACHE_MAX) {
|
||||
const oldest = DIR_ENTRY_CACHE.keys().next();
|
||||
if (!oldest.done) {
|
||||
DIR_ENTRY_CACHE.delete(oldest.value);
|
||||
}
|
||||
}
|
||||
DIR_ENTRY_CACHE.set(dirPath, entry);
|
||||
}
|
||||
|
||||
export function clearSkillScanCacheForTest(): void {
|
||||
FILE_SCAN_CACHE.clear();
|
||||
DIR_ENTRY_CACHE.clear();
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Rule definitions
|
||||
// ---------------------------------------------------------------------------
|
||||
@ -263,7 +330,7 @@ async function walkDirWithLimit(dirPath: string, maxFiles: number): Promise<stri
|
||||
break;
|
||||
}
|
||||
|
||||
const entries = await fs.readdir(currentDir, { withFileTypes: true });
|
||||
const entries = await readDirEntriesWithCache(currentDir);
|
||||
for (const entry of entries) {
|
||||
if (files.length >= maxFiles) {
|
||||
break;
|
||||
@ -274,9 +341,9 @@ async function walkDirWithLimit(dirPath: string, maxFiles: number): Promise<stri
|
||||
}
|
||||
|
||||
const fullPath = path.join(currentDir, entry.name);
|
||||
if (entry.isDirectory()) {
|
||||
if (entry.kind === "dir") {
|
||||
stack.push(fullPath);
|
||||
} else if (isScannable(entry.name)) {
|
||||
} else if (entry.kind === "file" && isScannable(entry.name)) {
|
||||
files.push(fullPath);
|
||||
}
|
||||
}
|
||||
@ -285,6 +352,41 @@ async function walkDirWithLimit(dirPath: string, maxFiles: number): Promise<stri
|
||||
return files;
|
||||
}
|
||||
|
||||
async function readDirEntriesWithCache(dirPath: string): Promise<CachedDirEntry[]> {
|
||||
let st: Awaited<ReturnType<typeof fs.stat>> | null = null;
|
||||
try {
|
||||
st = await fs.stat(dirPath);
|
||||
} catch (err) {
|
||||
if (hasErrnoCode(err, "ENOENT")) {
|
||||
return [];
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
if (!st?.isDirectory()) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const cached = DIR_ENTRY_CACHE.get(dirPath);
|
||||
if (cached && cached.mtimeMs === st.mtimeMs) {
|
||||
return cached.entries;
|
||||
}
|
||||
|
||||
const dirents = await fs.readdir(dirPath, { withFileTypes: true });
|
||||
const entries: CachedDirEntry[] = [];
|
||||
for (const entry of dirents) {
|
||||
if (entry.isDirectory()) {
|
||||
entries.push({ name: entry.name, kind: "dir" });
|
||||
} else if (entry.isFile()) {
|
||||
entries.push({ name: entry.name, kind: "file" });
|
||||
}
|
||||
}
|
||||
setCachedDirEntries(dirPath, {
|
||||
mtimeMs: st.mtimeMs,
|
||||
entries,
|
||||
});
|
||||
return entries;
|
||||
}
|
||||
|
||||
async function resolveForcedFiles(params: {
|
||||
rootDir: string;
|
||||
includeFiles: string[];
|
||||
@ -354,27 +456,66 @@ async function collectScannableFiles(dirPath: string, opts: Required<SkillScanOp
|
||||
return out;
|
||||
}
|
||||
|
||||
async function readScannableSource(filePath: string, maxFileBytes: number): Promise<string | null> {
|
||||
async function scanFileWithCache(params: {
|
||||
filePath: string;
|
||||
maxFileBytes: number;
|
||||
}): Promise<{ scanned: boolean; findings: SkillScanFinding[] }> {
|
||||
const { filePath, maxFileBytes } = params;
|
||||
let st: Awaited<ReturnType<typeof fs.stat>> | null = null;
|
||||
try {
|
||||
st = await fs.stat(filePath);
|
||||
} catch (err) {
|
||||
if (hasErrnoCode(err, "ENOENT")) {
|
||||
return null;
|
||||
return { scanned: false, findings: [] };
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
if (!st?.isFile() || st.size > maxFileBytes) {
|
||||
return null;
|
||||
if (!st?.isFile()) {
|
||||
return { scanned: false, findings: [] };
|
||||
}
|
||||
const cached = getCachedFileScanResult({
|
||||
filePath,
|
||||
size: st.size,
|
||||
mtimeMs: st.mtimeMs,
|
||||
maxFileBytes,
|
||||
});
|
||||
if (cached) {
|
||||
return {
|
||||
scanned: cached.scanned,
|
||||
findings: cached.findings,
|
||||
};
|
||||
}
|
||||
|
||||
if (st.size > maxFileBytes) {
|
||||
const skippedEntry: FileScanCacheEntry = {
|
||||
size: st.size,
|
||||
mtimeMs: st.mtimeMs,
|
||||
maxFileBytes,
|
||||
scanned: false,
|
||||
findings: [],
|
||||
};
|
||||
setCachedFileScanResult(filePath, skippedEntry);
|
||||
return { scanned: false, findings: [] };
|
||||
}
|
||||
|
||||
let source: string;
|
||||
try {
|
||||
return await fs.readFile(filePath, "utf-8");
|
||||
source = await fs.readFile(filePath, "utf-8");
|
||||
} catch (err) {
|
||||
if (hasErrnoCode(err, "ENOENT")) {
|
||||
return null;
|
||||
return { scanned: false, findings: [] };
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
const findings = scanSource(source, filePath);
|
||||
setCachedFileScanResult(filePath, {
|
||||
size: st.size,
|
||||
mtimeMs: st.mtimeMs,
|
||||
maxFileBytes,
|
||||
scanned: true,
|
||||
findings,
|
||||
});
|
||||
return { scanned: true, findings };
|
||||
}
|
||||
|
||||
export async function scanDirectory(
|
||||
@ -386,12 +527,14 @@ export async function scanDirectory(
|
||||
const allFindings: SkillScanFinding[] = [];
|
||||
|
||||
for (const file of files) {
|
||||
const source = await readScannableSource(file, scanOptions.maxFileBytes);
|
||||
if (source == null) {
|
||||
const scanResult = await scanFileWithCache({
|
||||
filePath: file,
|
||||
maxFileBytes: scanOptions.maxFileBytes,
|
||||
});
|
||||
if (!scanResult.scanned) {
|
||||
continue;
|
||||
}
|
||||
const findings = scanSource(source, file);
|
||||
allFindings.push(...findings);
|
||||
allFindings.push(...scanResult.findings);
|
||||
}
|
||||
|
||||
return allFindings;
|
||||
@ -405,22 +548,36 @@ export async function scanDirectoryWithSummary(
|
||||
const files = await collectScannableFiles(dirPath, scanOptions);
|
||||
const allFindings: SkillScanFinding[] = [];
|
||||
let scannedFiles = 0;
|
||||
let critical = 0;
|
||||
let warn = 0;
|
||||
let info = 0;
|
||||
|
||||
for (const file of files) {
|
||||
const source = await readScannableSource(file, scanOptions.maxFileBytes);
|
||||
if (source == null) {
|
||||
const scanResult = await scanFileWithCache({
|
||||
filePath: file,
|
||||
maxFileBytes: scanOptions.maxFileBytes,
|
||||
});
|
||||
if (!scanResult.scanned) {
|
||||
continue;
|
||||
}
|
||||
scannedFiles += 1;
|
||||
const findings = scanSource(source, file);
|
||||
allFindings.push(...findings);
|
||||
for (const finding of scanResult.findings) {
|
||||
allFindings.push(finding);
|
||||
if (finding.severity === "critical") {
|
||||
critical += 1;
|
||||
} else if (finding.severity === "warn") {
|
||||
warn += 1;
|
||||
} else {
|
||||
info += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
scannedFiles,
|
||||
critical: allFindings.filter((f) => f.severity === "critical").length,
|
||||
warn: allFindings.filter((f) => f.severity === "warn").length,
|
||||
info: allFindings.filter((f) => f.severity === "info").length,
|
||||
critical,
|
||||
warn,
|
||||
info,
|
||||
findings: allFindings,
|
||||
};
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user