diff --git a/scripts/pre-commit/resolve-node.sh b/scripts/pre-commit/resolve-node.sh index f51226889fc..60a77ed759b 100644 --- a/scripts/pre-commit/resolve-node.sh +++ b/scripts/pre-commit/resolve-node.sh @@ -12,6 +12,13 @@ if ! command -v node >/dev/null 2>&1; then [[ -x "$_p" ]] || continue _ver="${_p%/bin/node}"; _ver="${_ver##*/v}" IFS=. read -r _ma _mi _pa <<< "$_ver" + # Strip any non-numeric suffix so prerelease tags (e.g. "0-rc.1", "0-nightly") + # do not make printf '%05d' fail under set -euo pipefail. + # ${var%%pattern} removes the longest suffix matching pattern, so + # "0-rc.1" → "0" and "14" → "14" (no-op when already numeric-only). + _ma="${_ma%%[^0-9]*}" + _mi="${_mi%%[^0-9]*}" + _pa="${_pa%%[^0-9]*}" printf '%05d%05d%05d\t%s\n' "${_ma:-0}" "${_mi:-0}" "${_pa:-0}" "$_p" done | sort | tail -1 | cut -f2- ) diff --git a/src/plugin-sdk/file-lock.test.ts b/src/plugin-sdk/file-lock.test.ts index a49005192ea..012b574d7c1 100644 --- a/src/plugin-sdk/file-lock.test.ts +++ b/src/plugin-sdk/file-lock.test.ts @@ -2,8 +2,15 @@ import fs from "node:fs/promises"; import os from "node:os"; import path from "node:path"; import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { getProcessStartTime } from "../shared/pid-alive.js"; import { withFileLock } from "./file-lock.js"; +/** Set a file's mtime to `msAgo` milliseconds in the past. */ +async function ageFile(filePath: string, msAgo: number): Promise { + const t = (Date.now() - msAgo) / 1000; // fs.utimes takes seconds + await fs.utimes(filePath, t, t); +} + const LOCK_OPTIONS = { retries: { retries: 2, @@ -58,9 +65,11 @@ describe("withFileLock", () => { const lockPath = `${targetFile}.lock`; await fs.mkdir(path.dirname(targetFile), { recursive: true }); await fs.writeFile(lockPath, ""); // empty — no pid/createdAt written + // An empty file with a young mtime is conservatively treated as a live + // writer still in the open→writeFile window. Age it past staleMs so the + // mtime-based fallback marks it as stale. + await ageFile(lockPath, LOCK_OPTIONS.stale + 1_000); - // withFileLock must not time out; it should reclaim the empty lock and - // run the callback without error. let ran = false; await expect( withFileLock(targetFile, LOCK_OPTIONS, async () => { @@ -70,10 +79,28 @@ describe("withFileLock", () => { expect(ran).toBe(true); }); - it("reclaims a lock file containing partial/invalid JSON", async () => { + it("does not reclaim an empty lock file whose mtime is within staleMs (live writer window)", async () => { + // A freshly-created empty lock belongs to a live writer still in the + // open→writeFile window. isStaleLock must NOT treat it as stale. + const lockPath = `${targetFile}.lock`; + await fs.mkdir(path.dirname(targetFile), { recursive: true }); + await fs.writeFile(lockPath, ""); // young mtime: effectively "just opened" + + // withFileLock should time out because the empty lock looks live. + await expect( + withFileLock( + targetFile, + { ...LOCK_OPTIONS, retries: { ...LOCK_OPTIONS.retries, retries: 1 } }, + async () => {}, + ), + ).rejects.toThrow("file lock timeout"); + }); + + it("reclaims a lock file containing partial/invalid JSON aged past staleMs", async () => { const lockPath = `${targetFile}.lock`; await fs.mkdir(path.dirname(targetFile), { recursive: true }); await fs.writeFile(lockPath, '{"pid":'); // truncated JSON + await ageFile(lockPath, LOCK_OPTIONS.stale + 1_000); let ran = false; await expect( @@ -84,13 +111,38 @@ describe("withFileLock", () => { expect(ran).toBe(true); }); - it("reclaims a lock file whose pid field is not a number", async () => { + it("reclaims a lock file whose pid field is not a number, aged past staleMs", async () => { const lockPath = `${targetFile}.lock`; await fs.mkdir(path.dirname(targetFile), { recursive: true }); await fs.writeFile( lockPath, JSON.stringify({ pid: "not-a-number", createdAt: new Date().toISOString() }), ); + await ageFile(lockPath, LOCK_OPTIONS.stale + 1_000); + + let ran = false; + await expect( + withFileLock(targetFile, LOCK_OPTIONS, async () => { + ran = true; + }), + ).resolves.toBeUndefined(); + expect(ran).toBe(true); + }); + + it("reclaims a lock whose live PID has an old createdAt (PID-reuse via age check)", async () => { + // Simulate PID reuse: the lock contains the test process's own (live) PID + // but a createdAt older than staleMs, as if the original holder crashed and + // the OS later assigned the same PID to an unrelated process. The age + // check must reclaim the lock even though the PID is alive. + const lockPath = `${targetFile}.lock`; + await fs.mkdir(path.dirname(targetFile), { recursive: true }); + await fs.writeFile( + lockPath, + JSON.stringify({ + pid: process.pid, + createdAt: new Date(Date.now() - (LOCK_OPTIONS.stale + 5_000)).toISOString(), + }), + ); let ran = false; await expect( @@ -101,6 +153,40 @@ describe("withFileLock", () => { expect(ran).toBe(true); }); + it.skipIf(process.platform !== "linux")( + "reclaims a lock whose PID was recycled by a different process (startTime mismatch)", + async () => { + // On Linux, /proc/{pid}/stat provides a per-process start time that + // survives PID reuse. Write a lock with the current process's PID but a + // startTime that cannot match — isStaleLock should detect the mismatch + // and reclaim immediately, without waiting for createdAt to age out. + const actualStartTime = getProcessStartTime(process.pid); + if (actualStartTime === null) { + // getProcessStartTime returned null unexpectedly on Linux — skip. + return; + } + + const lockPath = `${targetFile}.lock`; + await fs.mkdir(path.dirname(targetFile), { recursive: true }); + await fs.writeFile( + lockPath, + JSON.stringify({ + pid: process.pid, + createdAt: new Date().toISOString(), // recent — age check alone would not fire + startTime: actualStartTime + 999_999, // deliberately wrong + }), + ); + + let ran = false; + await expect( + withFileLock(targetFile, LOCK_OPTIONS, async () => { + ran = true; + }), + ).resolves.toBeUndefined(); + expect(ran).toBe(true); + }, + ); + it("two concurrent waiters on a stale lock never overlap inside fn()", async () => { // Plant a stale lock (dead PID, old timestamp) so both waiters will // simultaneously enter the stale-reclaim branch. The inode guard must diff --git a/src/plugin-sdk/file-lock.ts b/src/plugin-sdk/file-lock.ts index 061f3335920..f09a594d2f6 100644 --- a/src/plugin-sdk/file-lock.ts +++ b/src/plugin-sdk/file-lock.ts @@ -1,6 +1,6 @@ import fs from "node:fs/promises"; import path from "node:path"; -import { isPidAlive } from "../shared/pid-alive.js"; +import { getProcessStartTime, isPidAlive } from "../shared/pid-alive.js"; import { resolveProcessScopedMap } from "../shared/process-scoped-map.js"; export type FileLockOptions = { @@ -17,6 +17,11 @@ export type FileLockOptions = { type LockFilePayload = { pid: number; createdAt: string; + // /proc/{pid}/stat field 22 (clock ticks since boot), recorded at lock + // creation. Present only on Linux; omitted (undefined) on other platforms. + // Used to detect PID recycling: if the same PID is later alive but has a + // different startTime, it belongs to a different process. + startTime?: number; }; type HeldLock = { @@ -44,7 +49,11 @@ async function readLockPayload(lockPath: string): Promise { async function isStaleLock(lockPath: string, staleMs: number): Promise { const payload = await readLockPayload(lockPath); - // A lock file with missing or unparseable content was left by a process - // that crashed between open("wx") (which creates the file) and the - // subsequent writeFile (which fills in the pid/createdAt). Treat it as - // stale immediately so it can be reclaimed rather than blocking every - // future writer until the mtime-based timeout expires. - if (payload === null) { - return true; + + if (payload !== null) { + // PID liveness alone is not enough: the OS can recycle a PID after the + // original holder exits. Three complementary checks handle this: + // + // 1. isPidAlive: fast path — if the PID is gone, the lock is stale. + // + // 2. startTime (Linux only): /proc/{pid}/stat field 22 records how long + // after boot the process started. If the current holder's startTime + // differs from the stored value, the PID was recycled by an unrelated + // process and the lock can be reclaimed immediately. + // + // 3. createdAt age: a reused PID inherits the old creation timestamp, so + // once it exceeds staleMs the lock is reclaimed on any platform. + if (!isPidAlive(payload.pid)) { + return true; + } + if (payload.startTime !== undefined) { + const currentStartTime = getProcessStartTime(payload.pid); + if (currentStartTime !== null && currentStartTime !== payload.startTime) { + return true; // PID was recycled by a different process + } + } + const createdAt = Date.parse(payload.createdAt); + if (!Number.isFinite(createdAt) || Date.now() - createdAt > staleMs) { + return true; + } + return false; } - if (!isPidAlive(payload.pid)) { - return true; + + // payload is null: the lock file exists but its content is empty or + // unparseable. The most likely cause is a crash in the narrow window + // between open("wx") (file created, empty) and writeFile (payload written). + // A live writer still in that window is indistinguishable from a crashed + // one by content alone, so we fall back to the file's mtime: a young file + // (mtime < staleMs ago) may belong to a live writer; an old file was + // definitely orphaned. Treating null as immediately stale would steal the + // lock from a live writer and break mutual exclusion. + try { + const stat = await fs.stat(lockPath); + return Date.now() - stat.mtimeMs > staleMs; + } catch { + return true; // file vanished: another waiter already handled it } - const createdAt = Date.parse(payload.createdAt); - if (!Number.isFinite(createdAt) || Date.now() - createdAt > staleMs) { - return true; - } - return false; } export type FileLockHandle = { @@ -120,8 +157,19 @@ export async function acquireFileLock( for (let attempt = 0; attempt < attempts; attempt += 1) { try { const handle = await fs.open(lockPath, "wx"); + const startTime = getProcessStartTime(process.pid); await handle.writeFile( - JSON.stringify({ pid: process.pid, createdAt: new Date().toISOString() }, null, 2), + JSON.stringify( + { + pid: process.pid, + createdAt: new Date().toISOString(), + // Omit startTime on non-Linux where it is null so the field is + // absent from the JSON rather than present as null. + ...(startTime !== null && { startTime }), + }, + null, + 2, + ), "utf8", ); HELD_LOCKS.set(normalizedFile, { count: 1, handle, lockPath });