diff --git a/src/gateway/server-tailscale.test.ts b/src/gateway/server-tailscale.test.ts index 1fef3839453..4c9dc0e5fc1 100644 --- a/src/gateway/server-tailscale.test.ts +++ b/src/gateway/server-tailscale.test.ts @@ -27,6 +27,7 @@ function createOwnerStore() { claimedAt: string; phase: "active" | "cleaning"; cleanupStartedAt?: string; + alive: boolean; } = null; let nextPid = process.pid - 1; @@ -40,6 +41,7 @@ function createOwnerStore() { pid: nextPid, claimedAt: new Date(0).toISOString(), phase: "active" as const, + alive: true, }; currentOwner = owner; return { owner, previousOwner }; @@ -52,7 +54,10 @@ function createOwnerStore() { return true; }, async runCleanupIfCurrentOwner(token: string, cleanup: () => Promise) { - if (currentOwner?.token !== token) { + if (!currentOwner) { + return false; + } + if (currentOwner.token !== token && currentOwner.alive) { return false; } currentOwner = { @@ -64,6 +69,11 @@ function createOwnerStore() { currentOwner = null; return true; }, + markCurrentOwnerDead() { + if (currentOwner) { + currentOwner = { ...currentOwner, alive: false }; + } + }, }; } @@ -204,6 +214,36 @@ describe.each(modeCases)( expect(disableMock).toHaveBeenCalledTimes(1); }); + it("reclaims cleanup from a dead newer owner before exit", async () => { + const ownerStore = createOwnerStore(); + const logTailscale = { + info: vi.fn(), + warn: vi.fn(), + }; + + const cleanupA = await startGatewayTailscaleExposure({ + tailscaleMode: mode, + resetOnExit: true, + port: 18789, + logTailscale, + ownerStore, + }); + await startGatewayTailscaleExposure({ + tailscaleMode: mode, + resetOnExit: true, + port: 18789, + logTailscale, + ownerStore, + }); + ownerStore.markCurrentOwnerDead(); + + await cleanupA?.(); + expect(disableMock).toHaveBeenCalledTimes(1); + expect(logTailscale.info).not.toHaveBeenCalledWith( + `${mode} cleanup skipped: not the current owner`, + ); + }); + it("falls back to unguarded cleanup when the ownership guard cannot claim", async () => { const logTailscale = { info: vi.fn(), @@ -271,5 +311,49 @@ describe.each(modeCases)( `${mode} ownership cleanup still in progress; skipping external exposure`, ); }); + + it("falls back to a direct reset when guarded cleanup bookkeeping fails", async () => { + const logTailscale = { + info: vi.fn(), + warn: vi.fn(), + }; + + const cleanup = await startGatewayTailscaleExposure({ + tailscaleMode: mode, + resetOnExit: true, + port: 18789, + logTailscale, + ownerStore: { + async claim() { + return { + owner: { + token: "owner-1", + mode, + port: 18789, + pid: process.pid, + claimedAt: new Date(0).toISOString(), + phase: "active" as const, + }, + previousOwner: null, + }; + }, + async replaceIfCurrent() { + return true; + }, + async runCleanupIfCurrentOwner() { + throw new Error("lock dir unavailable"); + }, + }, + }); + + await cleanup?.(); + expect(disableMock).toHaveBeenCalledTimes(1); + expect(logTailscale.warn).toHaveBeenCalledWith( + `${mode} cleanup failed: lock dir unavailable`, + ); + expect(logTailscale.warn).toHaveBeenCalledWith( + `${mode} cleanup guard failed; applied direct reset fallback`, + ); + }); }, ); diff --git a/src/gateway/server-tailscale.ts b/src/gateway/server-tailscale.ts index d7b08c50722..1d7521ed223 100644 --- a/src/gateway/server-tailscale.ts +++ b/src/gateway/server-tailscale.ts @@ -160,17 +160,12 @@ function createTailscaleExposureOwnerStore(): TailscaleExposureOwnerStore { return { async claim(mode, port) { + const claimDeadline = Date.now() + cleanupClaimWaitMs; while (true) { const result = await withOwnerLock(async () => { const previousOwner = await readOwner(); if (previousOwner?.phase === "cleaning" && isPidAlive(previousOwner.pid)) { - const cleanupStartedAtMs = Date.parse( - previousOwner.cleanupStartedAt ?? previousOwner.claimedAt, - ); - const cleanupAgeMs = Number.isFinite(cleanupStartedAtMs) - ? Date.now() - cleanupStartedAtMs - : Number.POSITIVE_INFINITY; - if (cleanupAgeMs < cleanupClaimWaitMs) { + if (Date.now() < claimDeadline) { return { type: "wait" as const }; } return { type: "blocked" as const, previousOwner }; @@ -214,9 +209,14 @@ function createTailscaleExposureOwnerStore(): TailscaleExposureOwnerStore { async runCleanupIfCurrentOwner(token, cleanup) { const cleanupOwner = await withOwnerLock(async () => { const current = await readOwner(); - if (current?.token !== token) { + if (!current) { return null; } + if (current.token !== token && isPidAlive(current.pid)) { + return null; + } + // If a newer owner died without cleaning up, reclaim its stale exposure + // before this older process exits so resetOnExit still clears Tailscale. // Mark cleanup in progress before releasing the lock so overlapping // startups cannot claim exposure until this reset finishes or fails. const nextOwner: TailscaleExposureOwnerRecord = { @@ -314,6 +314,9 @@ export async function startGatewayTailscaleExposure(params: { } } catch (err) { if (owner) { + // Restore the previous owner's original token so its own cleanup hook can + // still prove ownership later, or keep this failed owner if it needs to + // reset any partial/orphaned Tailscale state on exit. const nextOwner = previousOwner && isPidAlive(previousOwner.pid) ? previousOwner @@ -331,15 +334,21 @@ export async function startGatewayTailscaleExposure(params: { return null; } + const disableExposure = async () => { + if (params.tailscaleMode === "serve") { + await disableTailscaleServe(); + } else { + await disableTailscaleFunnel(); + } + }; + return async () => { try { if (owner) { + // A failed enable can still leave behind partial or orphaned Tailscale + // config, so shutdown cleanup intentionally re-runs the global reset. const cleanedUp = await ownerStore.runCleanupIfCurrentOwner(owner.token, async () => { - if (params.tailscaleMode === "serve") { - await disableTailscaleServe(); - } else { - await disableTailscaleFunnel(); - } + await disableExposure(); }); if (!cleanedUp) { params.logTailscale.info( @@ -349,15 +358,22 @@ export async function startGatewayTailscaleExposure(params: { return; } - if (params.tailscaleMode === "serve") { - await disableTailscaleServe(); - } else { - await disableTailscaleFunnel(); - } + await disableExposure(); } catch (err) { params.logTailscale.warn( `${params.tailscaleMode} cleanup failed: ${err instanceof Error ? err.message : String(err)}`, ); + if (!owner) { + return; + } + try { + await disableExposure(); + params.logTailscale.warn( + `${params.tailscaleMode} cleanup guard failed; applied direct reset fallback`, + ); + } catch { + // The direct reset failed too; keep the original warning above. + } } }; }