diff --git a/src/gateway/server-tailscale.test.ts b/src/gateway/server-tailscale.test.ts index 75fe443dea2..29575d6f743 100644 --- a/src/gateway/server-tailscale.test.ts +++ b/src/gateway/server-tailscale.test.ts @@ -59,74 +59,141 @@ function createOwnerStore() { }; } -describe("startGatewayTailscaleExposure", () => { - beforeEach(() => { - vi.clearAllMocks(); - }); +const modeCases = [ + { + mode: "serve" as const, + enableMock: tailscaleState.enableServe, + disableMock: tailscaleState.disableServe, + }, + { + mode: "funnel" as const, + enableMock: tailscaleState.enableFunnel, + disableMock: tailscaleState.disableFunnel, + }, +]; - it("skips stale serve cleanup after a newer gateway takes ownership", async () => { - const ownerStore = createOwnerStore(); - const logTailscale = { - info: vi.fn(), - warn: vi.fn(), - }; - - const cleanupA = await startGatewayTailscaleExposure({ - tailscaleMode: "serve", - resetOnExit: true, - port: 18789, - logTailscale, - ownerStore, - }); - const cleanupB = await startGatewayTailscaleExposure({ - tailscaleMode: "serve", - resetOnExit: true, - port: 18789, - logTailscale, - ownerStore, +describe.each(modeCases)( + "startGatewayTailscaleExposure (%s)", + ({ mode, enableMock, disableMock }) => { + beforeEach(() => { + vi.restoreAllMocks(); + vi.clearAllMocks(); }); - await cleanupA?.(); - expect(tailscaleState.disableServe).not.toHaveBeenCalled(); - expect(logTailscale.info).toHaveBeenCalledWith("serve cleanup skipped: not the current owner"); + it("skips stale cleanup after a newer gateway takes ownership", async () => { + const ownerStore = createOwnerStore(); + const logTailscale = { + info: vi.fn(), + warn: vi.fn(), + }; - await cleanupB?.(); - expect(tailscaleState.disableServe).toHaveBeenCalledTimes(1); - }); + const cleanupA = await startGatewayTailscaleExposure({ + tailscaleMode: mode, + resetOnExit: true, + port: 18789, + logTailscale, + ownerStore, + }); + const cleanupB = await startGatewayTailscaleExposure({ + tailscaleMode: mode, + resetOnExit: true, + port: 18789, + logTailscale, + ownerStore, + }); - it("restores the previous owner after a takeover startup failure", async () => { - const ownerStore = createOwnerStore(); - const logTailscale = { - info: vi.fn(), - warn: vi.fn(), - }; + await cleanupA?.(); + expect(disableMock).not.toHaveBeenCalled(); + expect(logTailscale.info).toHaveBeenCalledWith( + `${mode} cleanup skipped: not the current owner`, + ); - const cleanupA = await startGatewayTailscaleExposure({ - tailscaleMode: "serve", - resetOnExit: true, - port: 18789, - logTailscale, - ownerStore, + await cleanupB?.(); + expect(disableMock).toHaveBeenCalledTimes(1); }); - tailscaleState.enableServe.mockRejectedValueOnce(new Error("boom")); + it("restores the previous live owner after a takeover startup failure", async () => { + const ownerStore = createOwnerStore(); + const logTailscale = { + info: vi.fn(), + warn: vi.fn(), + }; - const cleanupB = await startGatewayTailscaleExposure({ - tailscaleMode: "serve", - resetOnExit: true, - port: 18789, - logTailscale, - ownerStore, + const cleanupA = await startGatewayTailscaleExposure({ + tailscaleMode: mode, + resetOnExit: true, + port: 18789, + logTailscale, + ownerStore, + }); + + enableMock.mockRejectedValueOnce(new Error("boom")); + + const cleanupB = await startGatewayTailscaleExposure({ + tailscaleMode: mode, + resetOnExit: true, + port: 18789, + logTailscale, + ownerStore, + }); + + expect(cleanupB).not.toBeNull(); + expect(logTailscale.warn).toHaveBeenCalledWith(`${mode} failed: boom`); + + await cleanupB?.(); + expect(disableMock).not.toHaveBeenCalled(); + expect(logTailscale.info).toHaveBeenCalledWith( + `${mode} cleanup skipped: not the current owner`, + ); + + await cleanupA?.(); + expect(disableMock).toHaveBeenCalledTimes(1); }); - expect(cleanupB).not.toBeNull(); - expect(logTailscale.warn).toHaveBeenCalledWith("serve failed: boom"); + it("keeps the failed owner when the previous owner is already gone", async () => { + const ownerStore = createOwnerStore(); + const logTailscale = { + info: vi.fn(), + warn: vi.fn(), + }; - await cleanupB?.(); - expect(tailscaleState.disableServe).not.toHaveBeenCalled(); - expect(logTailscale.info).toHaveBeenCalledWith("serve cleanup skipped: not the current owner"); + const cleanupA = await startGatewayTailscaleExposure({ + tailscaleMode: mode, + resetOnExit: true, + port: 18789, + logTailscale, + ownerStore, + }); - await cleanupA?.(); - expect(tailscaleState.disableServe).toHaveBeenCalledTimes(1); - }); -}); + vi.spyOn(process, "kill").mockImplementation(((pid: number) => { + if (pid === 1) { + const err = new Error("gone") as NodeJS.ErrnoException; + err.code = "ESRCH"; + throw err; + } + return true; + }) as typeof process.kill); + enableMock.mockRejectedValueOnce(new Error("boom")); + + const cleanupB = await startGatewayTailscaleExposure({ + tailscaleMode: mode, + resetOnExit: true, + port: 18789, + logTailscale, + ownerStore, + }); + + expect(cleanupB).not.toBeNull(); + expect(logTailscale.warn).toHaveBeenCalledWith(`${mode} failed: boom`); + + await cleanupA?.(); + expect(disableMock).not.toHaveBeenCalled(); + expect(logTailscale.info).toHaveBeenCalledWith( + `${mode} cleanup skipped: not the current owner`, + ); + + await cleanupB?.(); + expect(disableMock).toHaveBeenCalledTimes(1); + }); + }, +); diff --git a/src/gateway/server-tailscale.ts b/src/gateway/server-tailscale.ts index 9bc18d1e660..73dfb792fae 100644 --- a/src/gateway/server-tailscale.ts +++ b/src/gateway/server-tailscale.ts @@ -32,6 +32,15 @@ type TailscaleExposureOwnerStore = { runCleanupIfCurrentOwner(token: string, cleanup: () => Promise): Promise; }; +function isPidAlive(pid: number): boolean { + try { + process.kill(pid, 0); + return true; + } catch (err) { + return (err as NodeJS.ErrnoException | undefined)?.code !== "ESRCH"; + } +} + function createTailscaleExposureOwnerStore(): TailscaleExposureOwnerStore { const ownerFilePath = path.join(resolveGatewayLockDir(), "tailscale-exposure-owner.json"); const ownerLockPath = path.join(resolveGatewayLockDir(), "tailscale-exposure-owner.lock"); @@ -64,29 +73,15 @@ function createTailscaleExposureOwnerStore(): TailscaleExposureOwnerStore { await new Promise((resolve) => setTimeout(resolve, ms)); } - function isPidAlive(pid: number): boolean { - try { - process.kill(pid, 0); - return true; - } catch (err) { - return (err as NodeJS.ErrnoException | undefined)?.code !== "ESRCH"; - } - } - async function breakStaleLock() { try { - const [raw, stat] = await Promise.all([ - fs.readFile(ownerLockPath, "utf8").catch(() => null), - fs.stat(ownerLockPath), - ]); + const stat = await fs.stat(ownerLockPath); if (Date.now() - stat.mtimeMs < lockStaleMs) { return; } - const parsed = raw ? JSON.parse(raw) : null; - const pid = typeof parsed?.pid === "number" ? parsed.pid : null; - if (pid !== null && isPidAlive(pid)) { - return; - } + // All lock holders only perform short file I/O plus the Tailscale CLI calls, + // and those helpers already time out after 15s. If the lock still exists after + // the wider stale window, assume the holder is wedged and break it. await fs.unlink(ownerLockPath).catch(() => {}); } catch (err) { if ((err as NodeJS.ErrnoException | undefined)?.code !== "ENOENT") { @@ -202,7 +197,13 @@ export async function startGatewayTailscaleExposure(params: { params.logTailscale.info(`${params.tailscaleMode} enabled`); } } catch (err) { - await ownerStore.replaceIfCurrent(owner.token, previousOwner).catch(() => {}); + const nextOwner = + previousOwner && isPidAlive(previousOwner.pid) + ? previousOwner + : params.resetOnExit + ? owner + : null; + await ownerStore.replaceIfCurrent(owner.token, nextOwner).catch(() => {}); params.logTailscale.warn( `${params.tailscaleMode} failed: ${err instanceof Error ? err.message : String(err)}`, );