From cdcb9464fb76c0b0282232ece745439bfd4e03a0 Mon Sep 17 00:00:00 2001 From: Chance Robinson Date: Wed, 11 Mar 2026 22:59:39 -0400 Subject: [PATCH] fix(gateway): auto-repair unloaded LaunchAgent on start/restart MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a macOS LaunchAgent silently unloads after sleep or extended idle, `gateway start` and `gateway restart` would bail with 'not loaded' hints instead of attempting recovery — even though the plist still exists on disk and `repairLaunchAgentBootstrap()` can re-register it. Changes: - Add optional `repairNotLoaded` method to `GatewayService` interface - Wire darwin implementation: checks plist exists, calls `repairLaunchAgentBootstrap()` (enable + bootstrap + kickstart) - `runServiceStart()`: when service is not loaded but repair succeeds, proceed with normal start flow instead of printing install hints - `runServiceRestart()`: when `onNotLoaded` returns null (no running process to signal) but repair succeeds, proceed with restart flow - Both paths are best-effort: if repair fails or throws, falls through to existing not-loaded behavior (no regression) Fixes #43602 --- src/cli/daemon-cli/lifecycle-core.test.ts | 137 ++++++++++++++++++++++ src/cli/daemon-cli/lifecycle-core.ts | 43 ++++++- src/daemon/service.ts | 20 ++++ 3 files changed, 198 insertions(+), 2 deletions(-) diff --git a/src/cli/daemon-cli/lifecycle-core.test.ts b/src/cli/daemon-cli/lifecycle-core.test.ts index 2f17269eb6c..9cbcc163193 100644 --- a/src/cli/daemon-cli/lifecycle-core.test.ts +++ b/src/cli/daemon-cli/lifecycle-core.test.ts @@ -190,4 +190,141 @@ describe("runServiceRestart token drift", () => { expect(payload.result).toBe("scheduled"); expect(payload.message).toBe("restart scheduled, gateway will restart momentarily"); }); + + describe("repairNotLoaded (#43602)", () => { + it("start: repairs unloaded service when repairNotLoaded succeeds", async () => { + service.isLoaded.mockResolvedValue(false); + const repairNotLoaded = vi.fn().mockResolvedValue({ ok: true }); + const serviceWithRepair = { ...service, repairNotLoaded }; + + await runServiceStart({ + serviceNoun: "Gateway", + service: serviceWithRepair, + renderStartHints: () => [], + opts: { json: true }, + }); + + expect(repairNotLoaded).toHaveBeenCalledTimes(1); + // After successful repair, start should proceed to restart the service. + expect(service.restart).toHaveBeenCalledTimes(1); + const jsonLine = runtimeLogs.find((line) => line.trim().startsWith("{")); + const payload = JSON.parse(jsonLine ?? "{}") as { result?: string }; + expect(payload.result).toBe("started"); + }); + + it("start: falls through to hints when repairNotLoaded returns ok:false", async () => { + service.isLoaded.mockResolvedValue(false); + const repairNotLoaded = vi.fn().mockResolvedValue({ ok: false }); + const serviceWithRepair = { ...service, repairNotLoaded }; + + await runServiceStart({ + serviceNoun: "Gateway", + service: serviceWithRepair, + renderStartHints: () => ["openclaw gateway install"], + opts: { json: true }, + }); + + expect(repairNotLoaded).toHaveBeenCalledTimes(1); + expect(service.restart).not.toHaveBeenCalled(); + const jsonLine = runtimeLogs.find((line) => line.trim().startsWith("{")); + const payload = JSON.parse(jsonLine ?? "{}") as { result?: string; hints?: string[] }; + expect(payload.result).toBe("not-loaded"); + expect(payload.hints).toContain("openclaw gateway install"); + }); + + it("start: falls through to hints when repairNotLoaded throws", async () => { + service.isLoaded.mockResolvedValue(false); + const repairNotLoaded = vi.fn().mockRejectedValue(new Error("launchctl failed")); + const serviceWithRepair = { ...service, repairNotLoaded }; + + await runServiceStart({ + serviceNoun: "Gateway", + service: serviceWithRepair, + renderStartHints: () => ["openclaw gateway install"], + opts: { json: true }, + }); + + expect(repairNotLoaded).toHaveBeenCalledTimes(1); + expect(service.restart).not.toHaveBeenCalled(); + const jsonLine = runtimeLogs.find((line) => line.trim().startsWith("{")); + const payload = JSON.parse(jsonLine ?? "{}") as { result?: string }; + expect(payload.result).toBe("not-loaded"); + }); + + it("start: does not call repairNotLoaded when service is already loaded", async () => { + service.isLoaded.mockResolvedValue(true); + const repairNotLoaded = vi.fn().mockResolvedValue({ ok: true }); + const serviceWithRepair = { ...service, repairNotLoaded }; + + await runServiceStart({ + serviceNoun: "Gateway", + service: serviceWithRepair, + renderStartHints: () => [], + opts: { json: true }, + }); + + expect(repairNotLoaded).not.toHaveBeenCalled(); + }); + + it("restart: repairs unloaded service when onNotLoaded returns null", async () => { + service.isLoaded.mockResolvedValue(false); + const repairNotLoaded = vi.fn().mockResolvedValue({ ok: true }); + const serviceWithRepair = { ...service, repairNotLoaded }; + + const result = await runServiceRestart({ + serviceNoun: "Gateway", + service: serviceWithRepair, + renderStartHints: () => [], + opts: { json: true }, + onNotLoaded: async () => null, + }); + + expect(result).toBe(true); + expect(repairNotLoaded).toHaveBeenCalledTimes(1); + const jsonLine = runtimeLogs.find((line) => line.trim().startsWith("{")); + const payload = JSON.parse(jsonLine ?? "{}") as { result?: string; message?: string }; + expect(payload.result).toBe("restarted"); + expect(payload.message).toContain("re-registered"); + }); + + it("restart: skips repair when onNotLoaded handles it", async () => { + service.isLoaded.mockResolvedValue(false); + const repairNotLoaded = vi.fn().mockResolvedValue({ ok: true }); + const serviceWithRepair = { ...service, repairNotLoaded }; + + const result = await runServiceRestart({ + serviceNoun: "Gateway", + service: serviceWithRepair, + renderStartHints: () => [], + opts: { json: true }, + onNotLoaded: async () => ({ + result: "restarted" as const, + message: "handled by SIGUSR1", + }), + }); + + expect(result).toBe(true); + expect(repairNotLoaded).not.toHaveBeenCalled(); + }); + + it("restart: falls through to hints when repair returns ok:false", async () => { + service.isLoaded.mockResolvedValue(false); + const repairNotLoaded = vi.fn().mockResolvedValue({ ok: false }); + const serviceWithRepair = { ...service, repairNotLoaded }; + + const result = await runServiceRestart({ + serviceNoun: "Gateway", + service: serviceWithRepair, + renderStartHints: () => ["openclaw gateway install"], + opts: { json: true }, + onNotLoaded: async () => null, + }); + + expect(result).toBe(false); + expect(repairNotLoaded).toHaveBeenCalledTimes(1); + const jsonLine = runtimeLogs.find((line) => line.trim().startsWith("{")); + const payload = JSON.parse(jsonLine ?? "{}") as { result?: string }; + expect(payload.result).toBe("not-loaded"); + }); + }); }); diff --git a/src/cli/daemon-cli/lifecycle-core.ts b/src/cli/daemon-cli/lifecycle-core.ts index 8def6aeefe6..38bd47d85f8 100644 --- a/src/cli/daemon-cli/lifecycle-core.ts +++ b/src/cli/daemon-cli/lifecycle-core.ts @@ -194,7 +194,7 @@ export async function runServiceStart(params: { const json = Boolean(params.opts?.json); const { stdout, emit, fail } = createActionIO({ action: "start", json }); - const loaded = await resolveServiceLoadedOrFail({ + let loaded = await resolveServiceLoadedOrFail({ serviceNoun: params.serviceNoun, service: params.service, fail, @@ -202,6 +202,25 @@ export async function runServiceStart(params: { if (loaded === null) { return; } + if (!loaded && params.service.repairNotLoaded) { + // The service was previously installed but is no longer loaded (e.g. + // macOS LaunchAgent silently unloaded after sleep/idle). Attempt to + // re-register the existing service definition before falling through + // to the "not loaded" install hints. See #43602. + try { + const repair = await params.service.repairNotLoaded({ env: process.env }); + if (repair.ok) { + loaded = true; + if (!json) { + defaultRuntime.log( + `${params.serviceNoun} was not loaded — re-registered from existing service definition.`, + ); + } + } + } catch { + // Best-effort repair; fall through to normal not-loaded handling. + } + } if (!loaded) { await handleServiceNotLoaded({ serviceNoun: params.serviceNoun, @@ -356,7 +375,7 @@ export async function runServiceRestart(params: { return true; }; - const loaded = await resolveServiceLoadedOrFail({ + let loaded = await resolveServiceLoadedOrFail({ serviceNoun: params.serviceNoun, service: params.service, fail, @@ -384,6 +403,26 @@ export async function runServiceRestart(params: { fail(`${params.serviceNoun} restart failed: ${String(err)}`); return false; } + if (!handledNotLoaded && params.service.repairNotLoaded) { + // No running process to signal, but the service definition may still + // exist on disk (e.g. macOS LaunchAgent unloaded after sleep/idle). + // Re-register it so `restart` can proceed normally. See #43602. + try { + const repair = await params.service.repairNotLoaded({ env: process.env }); + if (repair.ok) { + loaded = true; + handledNotLoaded = { + result: "restarted", + message: `${params.serviceNoun} was not loaded — re-registered from existing service definition.`, + }; + if (!json) { + defaultRuntime.log(handledNotLoaded.message); + } + } + } catch { + // Best-effort repair; fall through to normal not-loaded handling. + } + } if (!handledNotLoaded) { await handleServiceNotLoaded({ serviceNoun: params.serviceNoun, diff --git a/src/daemon/service.ts b/src/daemon/service.ts index 8083ce4b5e1..58dac26af20 100644 --- a/src/daemon/service.ts +++ b/src/daemon/service.ts @@ -1,8 +1,10 @@ import { installLaunchAgent, isLaunchAgentLoaded, + launchAgentPlistExists, readLaunchAgentProgramArguments, readLaunchAgentRuntime, + repairLaunchAgentBootstrap, restartLaunchAgent, stopLaunchAgent, uninstallLaunchAgent, @@ -64,6 +66,16 @@ export type GatewayService = { isLoaded: (args: GatewayServiceEnvArgs) => Promise; readCommand: (env: GatewayServiceEnv) => Promise; readRuntime: (env: GatewayServiceEnv) => Promise; + /** + * Attempt to re-register and start a service that was previously installed + * but is no longer loaded (e.g. after macOS sleep/idle unloads the + * LaunchAgent). Returns `{ ok: true }` when the service was successfully + * re-bootstrapped, `{ ok: false }` when the service definition does not + * exist on disk (caller should fall through to install hints). + * + * Optional — platforms that do not experience silent unloads can omit this. + */ + repairNotLoaded?: (args: GatewayServiceEnvArgs) => Promise<{ ok: boolean; detail?: string }>; }; export function describeGatewayServiceRestart( @@ -105,6 +117,14 @@ const GATEWAY_SERVICE_REGISTRY: Record { + const env = args.env ?? (process.env as Record); + const plistExists = await launchAgentPlistExists(env); + if (!plistExists) { + return { ok: false, detail: "plist not found on disk" }; + } + return await repairLaunchAgentBootstrap({ env }); + }, }, linux: { label: "systemd",