From fe36d98bc32e4c7a2253849f6da6bc8edcc02437 Mon Sep 17 00:00:00 2001 From: Alix-007 <267018309+Alix-007@users.noreply.github.com> Date: Thu, 19 Mar 2026 14:03:17 +0800 Subject: [PATCH 1/2] fix(daemon): avoid killing current gateway pid on restart --- src/cli/daemon-cli/lifecycle.test.ts | 26 +++++++++++++++++++++++++- src/cli/daemon-cli/lifecycle.ts | 13 ++++++++++--- 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/src/cli/daemon-cli/lifecycle.test.ts b/src/cli/daemon-cli/lifecycle.test.ts index f026f81399f..7da1935c1e2 100644 --- a/src/cli/daemon-cli/lifecycle.test.ts +++ b/src/cli/daemon-cli/lifecycle.test.ts @@ -3,7 +3,7 @@ import { afterEach, beforeAll, beforeEach, describe, expect, it, vi } from "vite type RestartHealthSnapshot = { healthy: boolean; staleGatewayPids: number[]; - runtime: { status?: string }; + runtime: { status?: string; pid?: number }; portUsage: { port: number; status: string; listeners: []; hints: []; errors?: string[] }; }; @@ -206,6 +206,30 @@ describe("runDaemonRestart health checks", () => { expect(waitForGatewayHealthyRestart).toHaveBeenCalledTimes(2); }); + it("does not kill the current running gateway pid when stale detection includes self", async () => { + const unhealthy: RestartHealthSnapshot = { + healthy: false, + staleGatewayPids: [1993, 2111], + runtime: { status: "running", pid: 1993 }, + portUsage: { port: 18789, status: "busy", listeners: [], hints: [] }, + }; + const healthy: RestartHealthSnapshot = { + healthy: true, + staleGatewayPids: [], + runtime: { status: "running", pid: 1993 }, + portUsage: { port: 18789, status: "busy", listeners: [], hints: [] }, + }; + waitForGatewayHealthyRestart.mockResolvedValueOnce(unhealthy).mockResolvedValueOnce(healthy); + terminateStaleGatewayPids.mockResolvedValue([2111]); + + const result = await runDaemonRestart({ json: true }); + + expect(result).toBe(true); + expect(terminateStaleGatewayPids).toHaveBeenCalledWith([2111]); + expect(service.restart).toHaveBeenCalledTimes(1); + expect(waitForGatewayHealthyRestart).toHaveBeenCalledTimes(2); + }); + it("skips stale-pid retry health checks when the retry restart is only scheduled", async () => { const unhealthy: RestartHealthSnapshot = { healthy: false, diff --git a/src/cli/daemon-cli/lifecycle.ts b/src/cli/daemon-cli/lifecycle.ts index d3e01f66412..df85ee3cc04 100644 --- a/src/cli/daemon-cli/lifecycle.ts +++ b/src/cli/daemon-cli/lifecycle.ts @@ -207,15 +207,22 @@ export async function runDaemonRestart(opts: DaemonLifecycleOptions = {}): Promi includeUnknownListenersAsStale: process.platform === "win32", }); - if (!health.healthy && health.staleGatewayPids.length > 0) { - const staleMsg = `Found stale gateway process(es): ${health.staleGatewayPids.join(", ")}.`; + const staleGatewayPids = health.staleGatewayPids.filter((pid) => { + if (health.runtime.status !== "running") { + return true; + } + return health.runtime.pid == null || pid !== health.runtime.pid; + }); + + if (!health.healthy && staleGatewayPids.length > 0) { + const staleMsg = `Found stale gateway process(es): ${staleGatewayPids.join(", ")}.`; warnings.push(staleMsg); if (!json) { defaultRuntime.log(theme.warn(staleMsg)); defaultRuntime.log(theme.muted("Stopping stale process(es) and retrying restart...")); } - await terminateStaleGatewayPids(health.staleGatewayPids); + await terminateStaleGatewayPids(staleGatewayPids); const retryRestart = await service.restart({ env: process.env, stdout }); if (retryRestart.outcome === "scheduled") { return retryRestart; From 37828cd4bb3972e12ae48ecb002c61e7cb29628c Mon Sep 17 00:00:00 2001 From: Alix-007 <267018309+Alix-007@users.noreply.github.com> Date: Thu, 19 Mar 2026 16:21:16 +0800 Subject: [PATCH 2/2] test(daemon): cover self-only stale pid restart failure --- src/cli/daemon-cli/lifecycle.test.ts | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/cli/daemon-cli/lifecycle.test.ts b/src/cli/daemon-cli/lifecycle.test.ts index 7da1935c1e2..6c1fde92ab2 100644 --- a/src/cli/daemon-cli/lifecycle.test.ts +++ b/src/cli/daemon-cli/lifecycle.test.ts @@ -230,6 +230,26 @@ describe("runDaemonRestart health checks", () => { expect(waitForGatewayHealthyRestart).toHaveBeenCalledTimes(2); }); + it("fails without killing the current runtime pid when stale detection only includes self", async () => { + const unhealthy: RestartHealthSnapshot = { + healthy: false, + staleGatewayPids: [1993], + runtime: { status: "running", pid: 1993 }, + portUsage: { port: 18789, status: "busy", listeners: [], hints: [] }, + }; + waitForGatewayHealthyRestart.mockResolvedValue(unhealthy); + + await expect(runDaemonRestart({ json: true })).rejects.toMatchObject({ + message: "Gateway restart timed out after 60s waiting for health checks.", + hints: ["openclaw gateway status --deep", "openclaw doctor"], + }); + + expect(terminateStaleGatewayPids).not.toHaveBeenCalled(); + expect(service.restart).not.toHaveBeenCalled(); + expect(waitForGatewayHealthyRestart).toHaveBeenCalledTimes(1); + expect(renderRestartDiagnostics).toHaveBeenCalledTimes(1); + }); + it("skips stale-pid retry health checks when the retry restart is only scheduled", async () => { const unhealthy: RestartHealthSnapshot = { healthy: false,