fix(file-lock,git-hooks): PID reuse detection, null-payload race, prerelease sort
Three independent fixes bundled here because they came from the same
review pass.
── 1. Record lock owner identity beyond PID (file-lock) ──────────────
Stale-lock detection used only isPidAlive(), but PIDs are reusable.
On systems with small PID namespaces (containers, rapid restarts) a
crashed writer's PID can be reassigned to an unrelated live process,
causing isStaleLock to return false and the lock to appear held
indefinitely.
Fix: record the process start time (field 22 from /proc/{pid}/stat)
alongside pid and createdAt. On Linux, if the current holder's
startTime differs from the stored value the PID was recycled and the
lock is reclaimed immediately. On other platforms startTime is omitted
and the existing createdAt age-check (a reused PID inherits the old
timestamp, exceeding staleMs) remains as the fallback.
── 2. Restore mtime fallback for null/unparseable payloads (file-lock) ─
The previous fix treated null payload as immediately stale. But the
lock file is created (empty) by open('wx') before writeFile fills in
the JSON. A live writer still in that window has an empty file; marking
it stale immediately allows a second process to steal the lock and both
to enter fn() concurrently.
Fix: when payload is null, fall back to the file's mtime. A file
younger than staleMs may belong to a live writer and is left alone; a
file older than staleMs was definitely orphaned and is reclaimed. A
new test asserts that a freshly-created empty lock (recent mtime) is NOT
treated as stale.
── 3. Strip prerelease suffix before printf '%05d' (resolve-node.sh) ──
When an nvm install has a prerelease directory name (e.g.
v22.0.0-rc.1/bin/node), splitting on '.' leaves _pa as '0-rc.1'.
printf '%05d' then fails because '0-rc.1' is not an integer, and
set -euo pipefail aborts the hook before lint/format can run — the
opposite of what the nvm fallback is meant to achieve.
Fix: strip the longest non-digit suffix from each component before
printf: '0-rc.1' → '0', '14' → '14' (no-op for normal releases).
Uses POSIX parameter expansion so it works on both
GNU bash and macOS bash 3.x.
This commit is contained in:
parent
3e1eda63d9
commit
9944231ff4
@ -12,6 +12,13 @@ if ! command -v node >/dev/null 2>&1; then
|
||||
[[ -x "$_p" ]] || continue
|
||||
_ver="${_p%/bin/node}"; _ver="${_ver##*/v}"
|
||||
IFS=. read -r _ma _mi _pa <<< "$_ver"
|
||||
# Strip any non-numeric suffix so prerelease tags (e.g. "0-rc.1", "0-nightly")
|
||||
# do not make printf '%05d' fail under set -euo pipefail.
|
||||
# ${var%%pattern} removes the longest suffix matching pattern, so
|
||||
# "0-rc.1" → "0" and "14" → "14" (no-op when already numeric-only).
|
||||
_ma="${_ma%%[^0-9]*}"
|
||||
_mi="${_mi%%[^0-9]*}"
|
||||
_pa="${_pa%%[^0-9]*}"
|
||||
printf '%05d%05d%05d\t%s\n' "${_ma:-0}" "${_mi:-0}" "${_pa:-0}" "$_p"
|
||||
done | sort | tail -1 | cut -f2-
|
||||
)
|
||||
|
||||
@ -2,8 +2,15 @@ import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { afterEach, beforeEach, describe, expect, it } from "vitest";
|
||||
import { getProcessStartTime } from "../shared/pid-alive.js";
|
||||
import { withFileLock } from "./file-lock.js";
|
||||
|
||||
/** Set a file's mtime to `msAgo` milliseconds in the past. */
|
||||
async function ageFile(filePath: string, msAgo: number): Promise<void> {
|
||||
const t = (Date.now() - msAgo) / 1000; // fs.utimes takes seconds
|
||||
await fs.utimes(filePath, t, t);
|
||||
}
|
||||
|
||||
const LOCK_OPTIONS = {
|
||||
retries: {
|
||||
retries: 2,
|
||||
@ -58,9 +65,11 @@ describe("withFileLock", () => {
|
||||
const lockPath = `${targetFile}.lock`;
|
||||
await fs.mkdir(path.dirname(targetFile), { recursive: true });
|
||||
await fs.writeFile(lockPath, ""); // empty — no pid/createdAt written
|
||||
// An empty file with a young mtime is conservatively treated as a live
|
||||
// writer still in the open→writeFile window. Age it past staleMs so the
|
||||
// mtime-based fallback marks it as stale.
|
||||
await ageFile(lockPath, LOCK_OPTIONS.stale + 1_000);
|
||||
|
||||
// withFileLock must not time out; it should reclaim the empty lock and
|
||||
// run the callback without error.
|
||||
let ran = false;
|
||||
await expect(
|
||||
withFileLock(targetFile, LOCK_OPTIONS, async () => {
|
||||
@ -70,10 +79,28 @@ describe("withFileLock", () => {
|
||||
expect(ran).toBe(true);
|
||||
});
|
||||
|
||||
it("reclaims a lock file containing partial/invalid JSON", async () => {
|
||||
it("does not reclaim an empty lock file whose mtime is within staleMs (live writer window)", async () => {
|
||||
// A freshly-created empty lock belongs to a live writer still in the
|
||||
// open→writeFile window. isStaleLock must NOT treat it as stale.
|
||||
const lockPath = `${targetFile}.lock`;
|
||||
await fs.mkdir(path.dirname(targetFile), { recursive: true });
|
||||
await fs.writeFile(lockPath, ""); // young mtime: effectively "just opened"
|
||||
|
||||
// withFileLock should time out because the empty lock looks live.
|
||||
await expect(
|
||||
withFileLock(
|
||||
targetFile,
|
||||
{ ...LOCK_OPTIONS, retries: { ...LOCK_OPTIONS.retries, retries: 1 } },
|
||||
async () => {},
|
||||
),
|
||||
).rejects.toThrow("file lock timeout");
|
||||
});
|
||||
|
||||
it("reclaims a lock file containing partial/invalid JSON aged past staleMs", async () => {
|
||||
const lockPath = `${targetFile}.lock`;
|
||||
await fs.mkdir(path.dirname(targetFile), { recursive: true });
|
||||
await fs.writeFile(lockPath, '{"pid":'); // truncated JSON
|
||||
await ageFile(lockPath, LOCK_OPTIONS.stale + 1_000);
|
||||
|
||||
let ran = false;
|
||||
await expect(
|
||||
@ -84,13 +111,38 @@ describe("withFileLock", () => {
|
||||
expect(ran).toBe(true);
|
||||
});
|
||||
|
||||
it("reclaims a lock file whose pid field is not a number", async () => {
|
||||
it("reclaims a lock file whose pid field is not a number, aged past staleMs", async () => {
|
||||
const lockPath = `${targetFile}.lock`;
|
||||
await fs.mkdir(path.dirname(targetFile), { recursive: true });
|
||||
await fs.writeFile(
|
||||
lockPath,
|
||||
JSON.stringify({ pid: "not-a-number", createdAt: new Date().toISOString() }),
|
||||
);
|
||||
await ageFile(lockPath, LOCK_OPTIONS.stale + 1_000);
|
||||
|
||||
let ran = false;
|
||||
await expect(
|
||||
withFileLock(targetFile, LOCK_OPTIONS, async () => {
|
||||
ran = true;
|
||||
}),
|
||||
).resolves.toBeUndefined();
|
||||
expect(ran).toBe(true);
|
||||
});
|
||||
|
||||
it("reclaims a lock whose live PID has an old createdAt (PID-reuse via age check)", async () => {
|
||||
// Simulate PID reuse: the lock contains the test process's own (live) PID
|
||||
// but a createdAt older than staleMs, as if the original holder crashed and
|
||||
// the OS later assigned the same PID to an unrelated process. The age
|
||||
// check must reclaim the lock even though the PID is alive.
|
||||
const lockPath = `${targetFile}.lock`;
|
||||
await fs.mkdir(path.dirname(targetFile), { recursive: true });
|
||||
await fs.writeFile(
|
||||
lockPath,
|
||||
JSON.stringify({
|
||||
pid: process.pid,
|
||||
createdAt: new Date(Date.now() - (LOCK_OPTIONS.stale + 5_000)).toISOString(),
|
||||
}),
|
||||
);
|
||||
|
||||
let ran = false;
|
||||
await expect(
|
||||
@ -101,6 +153,40 @@ describe("withFileLock", () => {
|
||||
expect(ran).toBe(true);
|
||||
});
|
||||
|
||||
it.skipIf(process.platform !== "linux")(
|
||||
"reclaims a lock whose PID was recycled by a different process (startTime mismatch)",
|
||||
async () => {
|
||||
// On Linux, /proc/{pid}/stat provides a per-process start time that
|
||||
// survives PID reuse. Write a lock with the current process's PID but a
|
||||
// startTime that cannot match — isStaleLock should detect the mismatch
|
||||
// and reclaim immediately, without waiting for createdAt to age out.
|
||||
const actualStartTime = getProcessStartTime(process.pid);
|
||||
if (actualStartTime === null) {
|
||||
// getProcessStartTime returned null unexpectedly on Linux — skip.
|
||||
return;
|
||||
}
|
||||
|
||||
const lockPath = `${targetFile}.lock`;
|
||||
await fs.mkdir(path.dirname(targetFile), { recursive: true });
|
||||
await fs.writeFile(
|
||||
lockPath,
|
||||
JSON.stringify({
|
||||
pid: process.pid,
|
||||
createdAt: new Date().toISOString(), // recent — age check alone would not fire
|
||||
startTime: actualStartTime + 999_999, // deliberately wrong
|
||||
}),
|
||||
);
|
||||
|
||||
let ran = false;
|
||||
await expect(
|
||||
withFileLock(targetFile, LOCK_OPTIONS, async () => {
|
||||
ran = true;
|
||||
}),
|
||||
).resolves.toBeUndefined();
|
||||
expect(ran).toBe(true);
|
||||
},
|
||||
);
|
||||
|
||||
it("two concurrent waiters on a stale lock never overlap inside fn()", async () => {
|
||||
// Plant a stale lock (dead PID, old timestamp) so both waiters will
|
||||
// simultaneously enter the stale-reclaim branch. The inode guard must
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import { isPidAlive } from "../shared/pid-alive.js";
|
||||
import { getProcessStartTime, isPidAlive } from "../shared/pid-alive.js";
|
||||
import { resolveProcessScopedMap } from "../shared/process-scoped-map.js";
|
||||
|
||||
export type FileLockOptions = {
|
||||
@ -17,6 +17,11 @@ export type FileLockOptions = {
|
||||
type LockFilePayload = {
|
||||
pid: number;
|
||||
createdAt: string;
|
||||
// /proc/{pid}/stat field 22 (clock ticks since boot), recorded at lock
|
||||
// creation. Present only on Linux; omitted (undefined) on other platforms.
|
||||
// Used to detect PID recycling: if the same PID is later alive but has a
|
||||
// different startTime, it belongs to a different process.
|
||||
startTime?: number;
|
||||
};
|
||||
|
||||
type HeldLock = {
|
||||
@ -44,7 +49,11 @@ async function readLockPayload(lockPath: string): Promise<LockFilePayload | null
|
||||
if (typeof parsed.pid !== "number" || typeof parsed.createdAt !== "string") {
|
||||
return null;
|
||||
}
|
||||
return { pid: parsed.pid, createdAt: parsed.createdAt };
|
||||
return {
|
||||
pid: parsed.pid,
|
||||
createdAt: parsed.createdAt,
|
||||
...(typeof parsed.startTime === "number" && { startTime: parsed.startTime }),
|
||||
};
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
@ -64,22 +73,50 @@ async function resolveNormalizedFilePath(filePath: string): Promise<string> {
|
||||
|
||||
async function isStaleLock(lockPath: string, staleMs: number): Promise<boolean> {
|
||||
const payload = await readLockPayload(lockPath);
|
||||
// A lock file with missing or unparseable content was left by a process
|
||||
// that crashed between open("wx") (which creates the file) and the
|
||||
// subsequent writeFile (which fills in the pid/createdAt). Treat it as
|
||||
// stale immediately so it can be reclaimed rather than blocking every
|
||||
// future writer until the mtime-based timeout expires.
|
||||
if (payload === null) {
|
||||
return true;
|
||||
|
||||
if (payload !== null) {
|
||||
// PID liveness alone is not enough: the OS can recycle a PID after the
|
||||
// original holder exits. Three complementary checks handle this:
|
||||
//
|
||||
// 1. isPidAlive: fast path — if the PID is gone, the lock is stale.
|
||||
//
|
||||
// 2. startTime (Linux only): /proc/{pid}/stat field 22 records how long
|
||||
// after boot the process started. If the current holder's startTime
|
||||
// differs from the stored value, the PID was recycled by an unrelated
|
||||
// process and the lock can be reclaimed immediately.
|
||||
//
|
||||
// 3. createdAt age: a reused PID inherits the old creation timestamp, so
|
||||
// once it exceeds staleMs the lock is reclaimed on any platform.
|
||||
if (!isPidAlive(payload.pid)) {
|
||||
return true;
|
||||
}
|
||||
if (payload.startTime !== undefined) {
|
||||
const currentStartTime = getProcessStartTime(payload.pid);
|
||||
if (currentStartTime !== null && currentStartTime !== payload.startTime) {
|
||||
return true; // PID was recycled by a different process
|
||||
}
|
||||
}
|
||||
const createdAt = Date.parse(payload.createdAt);
|
||||
if (!Number.isFinite(createdAt) || Date.now() - createdAt > staleMs) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
if (!isPidAlive(payload.pid)) {
|
||||
return true;
|
||||
|
||||
// payload is null: the lock file exists but its content is empty or
|
||||
// unparseable. The most likely cause is a crash in the narrow window
|
||||
// between open("wx") (file created, empty) and writeFile (payload written).
|
||||
// A live writer still in that window is indistinguishable from a crashed
|
||||
// one by content alone, so we fall back to the file's mtime: a young file
|
||||
// (mtime < staleMs ago) may belong to a live writer; an old file was
|
||||
// definitely orphaned. Treating null as immediately stale would steal the
|
||||
// lock from a live writer and break mutual exclusion.
|
||||
try {
|
||||
const stat = await fs.stat(lockPath);
|
||||
return Date.now() - stat.mtimeMs > staleMs;
|
||||
} catch {
|
||||
return true; // file vanished: another waiter already handled it
|
||||
}
|
||||
const createdAt = Date.parse(payload.createdAt);
|
||||
if (!Number.isFinite(createdAt) || Date.now() - createdAt > staleMs) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
export type FileLockHandle = {
|
||||
@ -120,8 +157,19 @@ export async function acquireFileLock(
|
||||
for (let attempt = 0; attempt < attempts; attempt += 1) {
|
||||
try {
|
||||
const handle = await fs.open(lockPath, "wx");
|
||||
const startTime = getProcessStartTime(process.pid);
|
||||
await handle.writeFile(
|
||||
JSON.stringify({ pid: process.pid, createdAt: new Date().toISOString() }, null, 2),
|
||||
JSON.stringify(
|
||||
{
|
||||
pid: process.pid,
|
||||
createdAt: new Date().toISOString(),
|
||||
// Omit startTime on non-Linux where it is null so the field is
|
||||
// absent from the JSON rather than present as null.
|
||||
...(startTime !== null && { startTime }),
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
"utf8",
|
||||
);
|
||||
HELD_LOCKS.set(normalizedFile, { count: 1, handle, lockPath });
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user