diff --git a/src/cli/daemon-cli/lifecycle-core.config-guard.test.ts b/src/cli/daemon-cli/lifecycle-core.config-guard.test.ts index 59a2926e993..fdf1b7b5154 100644 --- a/src/cli/daemon-cli/lifecycle-core.config-guard.test.ts +++ b/src/cli/daemon-cli/lifecycle-core.config-guard.test.ts @@ -140,3 +140,99 @@ describe("runServiceStart config pre-flight (#35862)", () => { expect(service.restart).toHaveBeenCalledTimes(1); }); }); + +describe("config-guard gates repairNotLoaded (#43602 + #35862)", () => { + let runServiceStart: typeof import("./lifecycle-core.js").runServiceStart; + let runServiceRestart: typeof import("./lifecycle-core.js").runServiceRestart; + + beforeAll(async () => { + ({ runServiceStart, runServiceRestart } = await import("./lifecycle-core.js")); + }); + + beforeEach(() => { + resetLifecycleRuntimeLogs(); + readConfigFileSnapshotMock.mockReset(); + setConfigSnapshot({ exists: true, valid: true }); + loadConfig.mockReset(); + loadConfig.mockReturnValue({}); + resetLifecycleServiceMocks(); + service.isLoaded.mockResolvedValue(false); + }); + + it("start: aborts before repairNotLoaded when config is invalid", async () => { + const repairNotLoaded = vi.fn().mockResolvedValue({ ok: true }); + const serviceWithRepair = { ...service, repairNotLoaded }; + setConfigSnapshot({ + exists: true, + valid: false, + issues: [{ path: "agents.defaults.model", message: "Unrecognized key" }], + }); + + await expect( + runServiceStart({ + serviceNoun: "Gateway", + service: serviceWithRepair, + renderStartHints: () => [], + opts: { json: true }, + }), + ).rejects.toThrow("__exit__:1"); + + expect(repairNotLoaded).not.toHaveBeenCalled(); + expect(service.restart).not.toHaveBeenCalled(); + }); + + it("start: proceeds with repair when config is valid", async () => { + const repairNotLoaded = vi.fn().mockResolvedValue({ ok: true }); + const serviceWithRepair = { ...service, repairNotLoaded }; + + await runServiceStart({ + serviceNoun: "Gateway", + service: serviceWithRepair, + renderStartHints: () => [], + opts: { json: true }, + }); + + expect(repairNotLoaded).toHaveBeenCalledTimes(1); + // Repair already started the service; service.restart() is NOT called + expect(service.restart).not.toHaveBeenCalled(); + }); + + it("restart: aborts before repairNotLoaded when config is invalid", async () => { + const repairNotLoaded = vi.fn().mockResolvedValue({ ok: true }); + const serviceWithRepair = { ...service, repairNotLoaded }; + setConfigSnapshot({ + exists: true, + valid: false, + issues: [{ path: "agents.defaults.model", message: "Unrecognized key" }], + }); + + await expect( + runServiceRestart({ + serviceNoun: "Gateway", + service: serviceWithRepair, + renderStartHints: () => [], + opts: { json: true }, + onNotLoaded: async () => null, + }), + ).rejects.toThrow("__exit__:1"); + + expect(repairNotLoaded).not.toHaveBeenCalled(); + expect(service.restart).not.toHaveBeenCalled(); + }); + + it("restart: proceeds with repair when config is valid", async () => { + const repairNotLoaded = vi.fn().mockResolvedValue({ ok: true }); + const serviceWithRepair = { ...service, repairNotLoaded }; + + const result = await runServiceRestart({ + serviceNoun: "Gateway", + service: serviceWithRepair, + renderStartHints: () => [], + opts: { json: true }, + onNotLoaded: async () => null, + }); + + expect(result).toBe(true); + expect(repairNotLoaded).toHaveBeenCalledTimes(1); + }); +}); diff --git a/src/cli/daemon-cli/lifecycle-core.test.ts b/src/cli/daemon-cli/lifecycle-core.test.ts index 2f17269eb6c..561351d6ae4 100644 --- a/src/cli/daemon-cli/lifecycle-core.test.ts +++ b/src/cli/daemon-cli/lifecycle-core.test.ts @@ -190,4 +190,180 @@ describe("runServiceRestart token drift", () => { expect(payload.result).toBe("scheduled"); expect(payload.message).toBe("restart scheduled, gateway will restart momentarily"); }); + + describe("repairNotLoaded (#43602)", () => { + it("start: repairs unloaded service when repairNotLoaded succeeds", async () => { + service.isLoaded.mockResolvedValue(false); + const repairNotLoaded = vi.fn().mockResolvedValue({ ok: true }); + const serviceWithRepair = { ...service, repairNotLoaded }; + + await runServiceStart({ + serviceNoun: "Gateway", + service: serviceWithRepair, + renderStartHints: () => [], + opts: { json: true }, + }); + + expect(repairNotLoaded).toHaveBeenCalledTimes(1); + // Repair already started the service (bootstrap → kickstart), so + // service.restart() should NOT be called — avoids double-kickstart. + expect(service.restart).not.toHaveBeenCalled(); + const jsonLine = runtimeLogs.find((line) => line.trim().startsWith("{")); + const payload = JSON.parse(jsonLine ?? "{}") as { result?: string }; + expect(payload.result).toBe("started"); + }); + + it("start: falls through to hints when repairNotLoaded returns ok:false", async () => { + service.isLoaded.mockResolvedValue(false); + const repairNotLoaded = vi.fn().mockResolvedValue({ ok: false }); + const serviceWithRepair = { ...service, repairNotLoaded }; + + await runServiceStart({ + serviceNoun: "Gateway", + service: serviceWithRepair, + renderStartHints: () => ["openclaw gateway install"], + opts: { json: true }, + }); + + expect(repairNotLoaded).toHaveBeenCalledTimes(1); + expect(service.restart).not.toHaveBeenCalled(); + const jsonLine = runtimeLogs.find((line) => line.trim().startsWith("{")); + const payload = JSON.parse(jsonLine ?? "{}") as { result?: string; hints?: string[] }; + expect(payload.result).toBe("not-loaded"); + expect(payload.hints).toContain("openclaw gateway install"); + }); + + it("start: falls through to hints when repairNotLoaded throws", async () => { + service.isLoaded.mockResolvedValue(false); + const repairNotLoaded = vi.fn().mockRejectedValue(new Error("launchctl failed")); + const serviceWithRepair = { ...service, repairNotLoaded }; + + await runServiceStart({ + serviceNoun: "Gateway", + service: serviceWithRepair, + renderStartHints: () => ["openclaw gateway install"], + opts: { json: true }, + }); + + expect(repairNotLoaded).toHaveBeenCalledTimes(1); + expect(service.restart).not.toHaveBeenCalled(); + const jsonLine = runtimeLogs.find((line) => line.trim().startsWith("{")); + const payload = JSON.parse(jsonLine ?? "{}") as { result?: string }; + expect(payload.result).toBe("not-loaded"); + }); + + it("start: does not call repairNotLoaded when service is already loaded", async () => { + service.isLoaded.mockResolvedValue(true); + const repairNotLoaded = vi.fn().mockResolvedValue({ ok: true }); + const serviceWithRepair = { ...service, repairNotLoaded }; + + await runServiceStart({ + serviceNoun: "Gateway", + service: serviceWithRepair, + renderStartHints: () => [], + opts: { json: true }, + }); + + expect(repairNotLoaded).not.toHaveBeenCalled(); + }); + + it("restart: repairs unloaded service when onNotLoaded returns null", async () => { + service.isLoaded.mockResolvedValue(false); + const repairNotLoaded = vi.fn().mockResolvedValue({ ok: true }); + const serviceWithRepair = { ...service, repairNotLoaded }; + + const result = await runServiceRestart({ + serviceNoun: "Gateway", + service: serviceWithRepair, + renderStartHints: () => [], + opts: { json: true }, + onNotLoaded: async () => null, + }); + + expect(result).toBe(true); + expect(repairNotLoaded).toHaveBeenCalledTimes(1); + const jsonLine = runtimeLogs.find((line) => line.trim().startsWith("{")); + const payload = JSON.parse(jsonLine ?? "{}") as { result?: string; message?: string }; + expect(payload.result).toBe("restarted"); + expect(payload.message).toContain("re-registered"); + }); + + it("restart: skips repair when onNotLoaded handles it", async () => { + service.isLoaded.mockResolvedValue(false); + const repairNotLoaded = vi.fn().mockResolvedValue({ ok: true }); + const serviceWithRepair = { ...service, repairNotLoaded }; + + const result = await runServiceRestart({ + serviceNoun: "Gateway", + service: serviceWithRepair, + renderStartHints: () => [], + opts: { json: true }, + onNotLoaded: async () => ({ + result: "restarted" as const, + message: "handled by SIGUSR1", + }), + }); + + expect(result).toBe(true); + expect(repairNotLoaded).not.toHaveBeenCalled(); + }); + + it("restart: does not double-log repair message in non-JSON mode", async () => { + service.isLoaded.mockResolvedValue(false); + const repairNotLoaded = vi.fn().mockResolvedValue({ ok: true }); + const serviceWithRepair = { ...service, repairNotLoaded }; + + const result = await runServiceRestart({ + serviceNoun: "Gateway", + service: serviceWithRepair, + renderStartHints: () => [], + opts: { json: false }, + onNotLoaded: async () => null, + }); + + expect(result).toBe(true); + expect(repairNotLoaded).toHaveBeenCalledTimes(1); + // The re-registered message should appear exactly once in non-JSON output + const reRegisterLogs = runtimeLogs.filter((line) => line.includes("re-registered")); + expect(reRegisterLogs).toHaveLength(1); + }); + + it("restart: does not call service.restart after successful repair (no double-kickstart)", async () => { + service.isLoaded.mockResolvedValue(false); + const repairNotLoaded = vi.fn().mockResolvedValue({ ok: true }); + const serviceWithRepair = { ...service, repairNotLoaded }; + + await runServiceRestart({ + serviceNoun: "Gateway", + service: serviceWithRepair, + renderStartHints: () => [], + opts: { json: true }, + onNotLoaded: async () => null, + }); + + // repairLaunchAgentBootstrap already kickstarted the service. + // service.restart() should NOT be called — avoids redundant kill+restart. + expect(service.restart).not.toHaveBeenCalled(); + }); + + it("restart: falls through to hints when repair returns ok:false", async () => { + service.isLoaded.mockResolvedValue(false); + const repairNotLoaded = vi.fn().mockResolvedValue({ ok: false }); + const serviceWithRepair = { ...service, repairNotLoaded }; + + const result = await runServiceRestart({ + serviceNoun: "Gateway", + service: serviceWithRepair, + renderStartHints: () => ["openclaw gateway install"], + opts: { json: true }, + onNotLoaded: async () => null, + }); + + expect(result).toBe(false); + expect(repairNotLoaded).toHaveBeenCalledTimes(1); + const jsonLine = runtimeLogs.find((line) => line.trim().startsWith("{")); + const payload = JSON.parse(jsonLine ?? "{}") as { result?: string }; + expect(payload.result).toBe("not-loaded"); + }); + }); }); diff --git a/src/cli/daemon-cli/lifecycle-core.ts b/src/cli/daemon-cli/lifecycle-core.ts index 8def6aeefe6..344ddfe84f3 100644 --- a/src/cli/daemon-cli/lifecycle-core.ts +++ b/src/cli/daemon-cli/lifecycle-core.ts @@ -194,7 +194,7 @@ export async function runServiceStart(params: { const json = Boolean(params.opts?.json); const { stdout, emit, fail } = createActionIO({ action: "start", json }); - const loaded = await resolveServiceLoadedOrFail({ + let loaded = await resolveServiceLoadedOrFail({ serviceNoun: params.serviceNoun, service: params.service, fail, @@ -202,6 +202,42 @@ export async function runServiceStart(params: { if (loaded === null) { return; } + if (!loaded && params.service.repairNotLoaded) { + // Pre-flight config validation before attempting repair — don't re-register + // a service that will immediately crash due to invalid config. (#35862) + const configError = await getConfigValidationError(); + if (configError) { + fail( + `${params.serviceNoun} aborted: config is invalid.\n${configError}\nFix the config and retry, or run "openclaw doctor" to repair.`, + ); + return; + } + // The service was previously installed but is no longer loaded (e.g. + // macOS LaunchAgent silently unloaded after sleep/idle). Attempt to + // re-register the existing service definition before falling through + // to the "not loaded" install hints. See #43602. + try { + const repair = await params.service.repairNotLoaded({ env: process.env }); + if (repair.ok) { + // repairLaunchAgentBootstrap already started the service (enable → + // bootstrap → kickstart). Emit success and return — calling + // service.restart() here would be a redundant kill+restart cycle. + const message = `${params.serviceNoun} was not loaded — re-registered from existing service definition.`; + emit({ + ok: true, + result: "started", + message, + service: buildDaemonServiceSnapshot(params.service, true), + }); + if (!json) { + defaultRuntime.log(message); + } + return; + } + } catch { + // Best-effort repair; fall through to normal not-loaded handling. + } + } if (!loaded) { await handleServiceNotLoaded({ serviceNoun: params.serviceNoun, @@ -356,7 +392,7 @@ export async function runServiceRestart(params: { return true; }; - const loaded = await resolveServiceLoadedOrFail({ + let loaded = await resolveServiceLoadedOrFail({ serviceNoun: params.serviceNoun, service: params.service, fail, @@ -384,6 +420,29 @@ export async function runServiceRestart(params: { fail(`${params.serviceNoun} restart failed: ${String(err)}`); return false; } + if (!handledNotLoaded && params.service.repairNotLoaded) { + // No running process to signal, but the service definition may still + // exist on disk (e.g. macOS LaunchAgent unloaded after sleep/idle). + // Re-register it so `restart` can proceed normally. See #43602. + // + // repairLaunchAgentBootstrap already starts the service (enable → + // bootstrap → kickstart), so we do NOT set `loaded = true` here — + // that would cause the `if (loaded)` branch below to call + // `service.restart()`, issuing a redundant kill+restart cycle. + // Instead we record the result via `handledNotLoaded` and let the + // end-of-function emit path handle messaging. + try { + const repair = await params.service.repairNotLoaded({ env: process.env }); + if (repair.ok) { + handledNotLoaded = { + result: "restarted", + message: `${params.serviceNoun} was not loaded — re-registered from existing service definition.`, + }; + } + } catch { + // Best-effort repair; fall through to normal not-loaded handling. + } + } if (!handledNotLoaded) { await handleServiceNotLoaded({ serviceNoun: params.serviceNoun, diff --git a/src/daemon/launchd.test.ts b/src/daemon/launchd.test.ts index 341f071de91..e3e8e551100 100644 --- a/src/daemon/launchd.test.ts +++ b/src/daemon/launchd.test.ts @@ -18,6 +18,7 @@ const state = vi.hoisted(() => ({ listOutput: "", printOutput: "", bootstrapError: "", + bootstrapCode: 1, kickstartError: "", kickstartFailuresRemaining: 0, dirs: new Set(), @@ -75,7 +76,7 @@ vi.mock("./exec-file.js", () => ({ return { stdout: state.printOutput, stderr: "", code: 0 }; } if (call[0] === "bootstrap" && state.bootstrapError) { - return { stdout: "", stderr: state.bootstrapError, code: 1 }; + return { stdout: "", stderr: state.bootstrapError, code: state.bootstrapCode }; } if (call[0] === "kickstart" && state.kickstartError && state.kickstartFailuresRemaining > 0) { state.kickstartFailuresRemaining -= 1; @@ -152,6 +153,7 @@ beforeEach(() => { state.listOutput = ""; state.printOutput = ""; state.bootstrapError = ""; + state.bootstrapCode = 1; state.kickstartError = ""; state.kickstartFailuresRemaining = 0; state.dirs.clear(); @@ -255,6 +257,49 @@ describe("launchd bootstrap repair", () => { expect(kickstartIndex).toBeGreaterThanOrEqual(0); expect(bootstrapIndex).toBeLessThan(kickstartIndex); }); + + it("treats bootstrap exit 130 as success (race condition)", async () => { + state.bootstrapError = "Service already loaded"; + state.bootstrapCode = 130; + const env: Record = { + HOME: "/Users/test", + OPENCLAW_PROFILE: "default", + }; + const repair = await repairLaunchAgentBootstrap({ env }); + expect(repair.ok).toBe(true); + // Should still kickstart even though bootstrap returned 130 + const kickCalls = state.launchctlCalls.filter((c) => c[0] === "kickstart"); + expect(kickCalls.length).toBe(1); + }); + + it("treats 'already exists in domain' as success (race condition)", async () => { + state.bootstrapError = + "Could not bootstrap service: 5: Input/output error: already exists in domain for gui/501"; + state.bootstrapCode = 1; + const env: Record = { + HOME: "/Users/test", + OPENCLAW_PROFILE: "default", + }; + const repair = await repairLaunchAgentBootstrap({ env }); + expect(repair.ok).toBe(true); + const kickCalls = state.launchctlCalls.filter((c) => c[0] === "kickstart"); + expect(kickCalls.length).toBe(1); + }); + + it("returns ok:false for non-race bootstrap failures", async () => { + state.bootstrapError = "Could not find specified service"; + state.bootstrapCode = 1; + const env: Record = { + HOME: "/Users/test", + OPENCLAW_PROFILE: "default", + }; + const repair = await repairLaunchAgentBootstrap({ env }); + expect(repair.ok).toBe(false); + expect(repair.detail).toContain("Could not find specified service"); + // Should NOT kickstart when bootstrap genuinely failed + const kickCalls = state.launchctlCalls.filter((c) => c[0] === "kickstart"); + expect(kickCalls.length).toBe(0); + }); }); describe("launchd install", () => { diff --git a/src/daemon/launchd.ts b/src/daemon/launchd.ts index 13d6fd46d33..63ab9ca8132 100644 --- a/src/daemon/launchd.ts +++ b/src/daemon/launchd.ts @@ -325,7 +325,15 @@ export async function repairLaunchAgentBootstrap(args: { await execLaunchctl(["enable", `${domain}/${label}`]); const boot = await execLaunchctl(["bootstrap", domain, plistPath]); if (boot.code !== 0) { - return { ok: false, detail: (boot.stderr || boot.stdout).trim() || undefined }; + // Race condition: another process (or macOS itself) may have bootstrapped + // the service between our isLoaded check and this call. Exit code 130 or + // "already exists in domain" means the service is already registered — treat + // as success and proceed to kickstart. See #26088 for prior art. + const bootDetail = (boot.stderr || boot.stdout).trim().toLowerCase(); + const alreadyLoaded = boot.code === 130 || bootDetail.includes("already exists in domain"); + if (!alreadyLoaded) { + return { ok: false, detail: (boot.stderr || boot.stdout).trim() || undefined }; + } } const kick = await execLaunchctl(["kickstart", "-k", `${domain}/${label}`]); if (kick.code !== 0) { diff --git a/src/daemon/service.ts b/src/daemon/service.ts index 8083ce4b5e1..58dac26af20 100644 --- a/src/daemon/service.ts +++ b/src/daemon/service.ts @@ -1,8 +1,10 @@ import { installLaunchAgent, isLaunchAgentLoaded, + launchAgentPlistExists, readLaunchAgentProgramArguments, readLaunchAgentRuntime, + repairLaunchAgentBootstrap, restartLaunchAgent, stopLaunchAgent, uninstallLaunchAgent, @@ -64,6 +66,16 @@ export type GatewayService = { isLoaded: (args: GatewayServiceEnvArgs) => Promise; readCommand: (env: GatewayServiceEnv) => Promise; readRuntime: (env: GatewayServiceEnv) => Promise; + /** + * Attempt to re-register and start a service that was previously installed + * but is no longer loaded (e.g. after macOS sleep/idle unloads the + * LaunchAgent). Returns `{ ok: true }` when the service was successfully + * re-bootstrapped, `{ ok: false }` when the service definition does not + * exist on disk (caller should fall through to install hints). + * + * Optional — platforms that do not experience silent unloads can omit this. + */ + repairNotLoaded?: (args: GatewayServiceEnvArgs) => Promise<{ ok: boolean; detail?: string }>; }; export function describeGatewayServiceRestart( @@ -105,6 +117,14 @@ const GATEWAY_SERVICE_REGISTRY: Record { + const env = args.env ?? (process.env as Record); + const plistExists = await launchAgentPlistExists(env); + if (!plistExists) { + return { ok: false, detail: "plist not found on disk" }; + } + return await repairLaunchAgentBootstrap({ env }); + }, }, linux: { label: "systemd",