import { spawnSync } from "node:child_process"; import fsSync from "node:fs"; import { isRestartEnabled } from "../../config/commands.js"; import { readBestEffortConfig, resolveGatewayPort } from "../../config/config.js"; import { parseCmdScriptCommandLine } from "../../daemon/cmd-argv.js"; import { resolveGatewayService } from "../../daemon/service.js"; import { probeGateway } from "../../gateway/probe.js"; import { isGatewayArgv, parseProcCmdline } from "../../infra/gateway-process-argv.js"; import { findGatewayPidsOnPortSync } from "../../infra/restart.js"; import { defaultRuntime } from "../../runtime.js"; import { theme } from "../../terminal/theme.js"; import { formatCliCommand } from "../command-format.js"; import { runServiceRestart, runServiceStart, runServiceStop, runServiceUninstall, } from "./lifecycle-core.js"; import { DEFAULT_RESTART_HEALTH_ATTEMPTS, DEFAULT_RESTART_HEALTH_DELAY_MS, renderGatewayPortHealthDiagnostics, renderRestartDiagnostics, terminateStaleGatewayPids, waitForGatewayHealthyListener, waitForGatewayHealthyRestart, } from "./restart-health.js"; import { parsePortFromArgs, renderGatewayServiceStartHints } from "./shared.js"; import type { DaemonLifecycleOptions } from "./types.js"; const POST_RESTART_HEALTH_ATTEMPTS = DEFAULT_RESTART_HEALTH_ATTEMPTS; const POST_RESTART_HEALTH_DELAY_MS = DEFAULT_RESTART_HEALTH_DELAY_MS; async function resolveGatewayLifecyclePort(service = resolveGatewayService()) { const command = await service.readCommand(process.env).catch(() => null); const serviceEnv = command?.environment ?? undefined; const mergedEnv = { ...(process.env as Record), ...(serviceEnv ?? undefined), } as NodeJS.ProcessEnv; const portFromArgs = parsePortFromArgs(command?.programArguments); return portFromArgs ?? resolveGatewayPort(await readBestEffortConfig(), mergedEnv); } function extractWindowsCommandLine(raw: string): string | null { const lines = raw .split(/\r?\n/) .map((line) => line.trim()) .filter(Boolean); for (const line of lines) { if (!line.toLowerCase().startsWith("commandline=")) { continue; } const value = line.slice("commandline=".length).trim(); return value || null; } return lines.find((line) => line.toLowerCase() !== "commandline") ?? null; } function readGatewayProcessArgsSync(pid: number): string[] | null { if (process.platform === "linux") { try { return parseProcCmdline(fsSync.readFileSync(`/proc/${pid}/cmdline`, "utf8")); } catch { return null; } } if (process.platform === "darwin") { const ps = spawnSync("ps", ["-o", "command=", "-p", String(pid)], { encoding: "utf8", timeout: 1000, }); if (ps.error || ps.status !== 0) { return null; } const command = ps.stdout.trim(); return command ? command.split(/\s+/) : null; } if (process.platform === "win32") { const wmic = spawnSync( "wmic", ["process", "where", `ProcessId=${pid}`, "get", "CommandLine", "/value"], { encoding: "utf8", timeout: 1000, }, ); if (wmic.error || wmic.status !== 0) { return null; } const command = extractWindowsCommandLine(wmic.stdout); return command ? parseCmdScriptCommandLine(command) : null; } return null; } function resolveGatewayListenerPids(port: number): number[] { return Array.from(new Set(findGatewayPidsOnPortSync(port))) .filter((pid): pid is number => Number.isFinite(pid) && pid > 0) .filter((pid) => { const args = readGatewayProcessArgsSync(pid); return args != null && isGatewayArgv(args, { allowGatewayBinary: true }); }); } function resolveGatewayPortFallback(): Promise { return readBestEffortConfig() .then((cfg) => resolveGatewayPort(cfg, process.env)) .catch(() => resolveGatewayPort(undefined, process.env)); } function signalGatewayPid(pid: number, signal: "SIGTERM" | "SIGUSR1") { const args = readGatewayProcessArgsSync(pid); if (!args || !isGatewayArgv(args, { allowGatewayBinary: true })) { throw new Error(`refusing to signal non-gateway process pid ${pid}`); } process.kill(pid, signal); } function formatGatewayPidList(pids: number[]): string { return pids.join(", "); } async function assertUnmanagedGatewayRestartEnabled(port: number): Promise { const probe = await probeGateway({ url: `ws://127.0.0.1:${port}`, auth: { token: process.env.OPENCLAW_GATEWAY_TOKEN?.trim() || undefined, password: process.env.OPENCLAW_GATEWAY_PASSWORD?.trim() || undefined, }, timeoutMs: 1_000, }).catch(() => null); if (!probe?.ok) { return; } if (!isRestartEnabled(probe.configSnapshot as { commands?: unknown } | undefined)) { throw new Error( "Gateway restart is disabled in the running gateway config (commands.restart=false); unmanaged SIGUSR1 restart would be ignored", ); } } function resolveVerifiedGatewayListenerPids(port: number): number[] { return resolveGatewayListenerPids(port).filter( (pid): pid is number => Number.isFinite(pid) && pid > 0, ); } async function stopGatewayWithoutServiceManager(port: number) { const pids = resolveVerifiedGatewayListenerPids(port); if (pids.length === 0) { return null; } for (const pid of pids) { signalGatewayPid(pid, "SIGTERM"); } return { result: "stopped" as const, message: `Gateway stop signal sent to unmanaged process${pids.length === 1 ? "" : "es"} on port ${port}: ${formatGatewayPidList(pids)}.`, }; } async function restartGatewayWithoutServiceManager(port: number) { await assertUnmanagedGatewayRestartEnabled(port); const pids = resolveVerifiedGatewayListenerPids(port); if (pids.length === 0) { return null; } if (pids.length > 1) { throw new Error( `multiple gateway processes are listening on port ${port}: ${formatGatewayPidList(pids)}; use "openclaw gateway status --deep" before retrying restart`, ); } signalGatewayPid(pids[0], "SIGUSR1"); return { result: "restarted" as const, message: `Gateway restart signal sent to unmanaged process on port ${port}: ${pids[0]}.`, }; } export async function runDaemonUninstall(opts: DaemonLifecycleOptions = {}) { return await runServiceUninstall({ serviceNoun: "Gateway", service: resolveGatewayService(), opts, stopBeforeUninstall: true, assertNotLoadedAfterUninstall: true, }); } export async function runDaemonStart(opts: DaemonLifecycleOptions = {}) { return await runServiceStart({ serviceNoun: "Gateway", service: resolveGatewayService(), renderStartHints: renderGatewayServiceStartHints, opts, }); } export async function runDaemonStop(opts: DaemonLifecycleOptions = {}) { const service = resolveGatewayService(); const gatewayPort = await resolveGatewayLifecyclePort(service).catch(() => resolveGatewayPortFallback(), ); return await runServiceStop({ serviceNoun: "Gateway", service, opts, onNotLoaded: async () => stopGatewayWithoutServiceManager(gatewayPort), }); } /** * Restart the gateway service service. * @returns `true` if restart succeeded, `false` if the service was not loaded. * Throws/exits on check or restart failures. */ export async function runDaemonRestart(opts: DaemonLifecycleOptions = {}): Promise { const json = Boolean(opts.json); const service = resolveGatewayService(); let restartedWithoutServiceManager = false; const restartPort = await resolveGatewayLifecyclePort(service).catch(() => resolveGatewayPortFallback(), ); const restartWaitMs = POST_RESTART_HEALTH_ATTEMPTS * POST_RESTART_HEALTH_DELAY_MS; const restartWaitSeconds = Math.round(restartWaitMs / 1000); return await runServiceRestart({ serviceNoun: "Gateway", service, renderStartHints: renderGatewayServiceStartHints, opts, checkTokenDrift: true, onNotLoaded: async () => { const handled = await restartGatewayWithoutServiceManager(restartPort); if (handled) { restartedWithoutServiceManager = true; } return handled; }, postRestartCheck: async ({ warnings, fail, stdout }) => { if (restartedWithoutServiceManager) { const health = await waitForGatewayHealthyListener({ port: restartPort, attempts: POST_RESTART_HEALTH_ATTEMPTS, delayMs: POST_RESTART_HEALTH_DELAY_MS, }); if (health.healthy) { return; } const diagnostics = renderGatewayPortHealthDiagnostics(health); const timeoutLine = `Timed out after ${restartWaitSeconds}s waiting for gateway port ${restartPort} to become healthy.`; if (!json) { defaultRuntime.log(theme.warn(timeoutLine)); for (const line of diagnostics) { defaultRuntime.log(theme.muted(line)); } } else { warnings.push(timeoutLine); warnings.push(...diagnostics); } fail(`Gateway restart timed out after ${restartWaitSeconds}s waiting for health checks.`, [ formatCliCommand("openclaw gateway status --deep"), formatCliCommand("openclaw doctor"), ]); } let health = await waitForGatewayHealthyRestart({ service, port: restartPort, attempts: POST_RESTART_HEALTH_ATTEMPTS, delayMs: POST_RESTART_HEALTH_DELAY_MS, includeUnknownListenersAsStale: process.platform === "win32", }); if (!health.healthy && health.staleGatewayPids.length > 0) { const staleMsg = `Found stale gateway process(es): ${health.staleGatewayPids.join(", ")}.`; warnings.push(staleMsg); if (!json) { defaultRuntime.log(theme.warn(staleMsg)); defaultRuntime.log(theme.muted("Stopping stale process(es) and retrying restart...")); } await terminateStaleGatewayPids(health.staleGatewayPids); await service.restart({ env: process.env, stdout }); health = await waitForGatewayHealthyRestart({ service, port: restartPort, attempts: POST_RESTART_HEALTH_ATTEMPTS, delayMs: POST_RESTART_HEALTH_DELAY_MS, includeUnknownListenersAsStale: process.platform === "win32", }); } if (health.healthy) { return; } const diagnostics = renderRestartDiagnostics(health); const timeoutLine = `Timed out after ${restartWaitSeconds}s waiting for gateway port ${restartPort} to become healthy.`; const runningNoPortLine = health.runtime.status === "running" && health.portUsage.status === "free" ? `Gateway process is running but port ${restartPort} is still free (startup hang/crash loop or very slow VM startup).` : null; if (!json) { defaultRuntime.log(theme.warn(timeoutLine)); if (runningNoPortLine) { defaultRuntime.log(theme.warn(runningNoPortLine)); } for (const line of diagnostics) { defaultRuntime.log(theme.muted(line)); } } else { warnings.push(timeoutLine); if (runningNoPortLine) { warnings.push(runningNoPortLine); } warnings.push(...diagnostics); } fail(`Gateway restart timed out after ${restartWaitSeconds}s waiting for health checks.`, [ formatCliCommand("openclaw gateway status --deep"), formatCliCommand("openclaw doctor"), ]); }, }); }