* fix(gateway): correct launchctl command sequence for gateway restart (closes #20030) * fix(restart): expand HOME and escape label in launchctl plist path * fix(restart): poll port free after SIGKILL to prevent EADDRINUSE restart loop When cleanStaleGatewayProcessesSync() kills a stale gateway process, the kernel may not immediately release the TCP port. Previously the function returned after a fixed 500ms sleep (300ms SIGTERM + 200ms SIGKILL), allowing triggerOpenClawRestart() to hand off to systemd before the port was actually free. The new systemd process then raced the dying socket for port 18789, hit EADDRINUSE, and exited with status 1, causing systemd to retry indefinitely — the zombie restart loop reported in #33103. Fix: add waitForPortFreeSync() that polls lsof at 50ms intervals for up to 2 seconds after SIGKILL. cleanStaleGatewayProcessesSync() now blocks until the port is confirmed free (or the budget expires with a warning) before returning. The increased SIGTERM/SIGKILL wait budgets (600ms / 400ms) also give slow processes more time to exit cleanly. Fixes #33103 Related: #28134 * fix: add EADDRINUSE retry and TIME_WAIT port-bind checks for gateway startup * fix(ports): treat EADDRNOTAVAIL as non-retryable and fix flaky test * fix(gateway): hot-reload agents.defaults.models allowlist changes The reload plan had a rule for `agents.defaults.model` (singular) but not `agents.defaults.models` (plural — the allowlist array). Because `agents.defaults.models` does not prefix-match `agents.defaults.model.`, it fell through to the catch-all `agents` tail rule (kind=none), so allowlist edits in openclaw.json were silently ignored at runtime. Add a dedicated reload rule so changes to the models allowlist trigger a heartbeat restart, which re-reads the config and serves the updated list to clients. Fixes #33600 Co-authored-by: HCL <chenglunhu@gmail.com> Signed-off-by: HCL <chenglunhu@gmail.com> * test(restart): 100% branch coverage — audit round 2 Audit findings fixed: - remove dead guard: terminateStaleProcessesSync pids.length===0 check was unreachable (only caller cleanStaleGatewayProcessesSync already guards) - expose __testing.callSleepSyncRaw so sleepSync's real Atomics.wait path can be unit-tested directly without going through the override - fix broken sleepSync Atomics.wait test: previous test set override=null but cleanStaleGatewayProcessesSync returned before calling sleepSync — replaced with direct callSleepSyncRaw calls that actually exercise L36/L42-47 - fix pid collision: two tests used process.pid+304 (EPERM + dead-at-SIGTERM); EPERM test changed to process.pid+305 - fix misindented tests: 'deduplicates pids' and 'lsof status 1 container edge case' were outside their intended describe blocks; moved to correct scopes (findGatewayPidsOnPortSync and pollPortOnce respectively) - add missing branch tests: - status 1 + non-empty stdout with zero openclaw pids → free:true (L145) - mid-loop non-openclaw cmd in &&-chain (L67) - consecutive p-lines without c-line between them (L67) - invalid PID in p-line (p0 / pNaN) — ternary false branch (L67) - unknown lsof output line (else-if false branch L69) Coverage: 100% stmts / 100% branch / 100% funcs / 100% lines (36 tests) * test(restart): fix stale-pid test typing for tsgo * fix(gateway): address lifecycle review findings * test(update): make restart-helper path assertions windows-safe --------- Signed-off-by: HCL <chenglunhu@gmail.com> Co-authored-by: Glucksberg <markuscontasul@gmail.com> Co-authored-by: Efe Büken <efe@arven.digital> Co-authored-by: Riccardo Marino <rmarino@apple.com> Co-authored-by: HCL <chenglunhu@gmail.com>
309 lines
12 KiB
TypeScript
309 lines
12 KiB
TypeScript
import { spawn, type ChildProcess } from "node:child_process";
|
|
import fs from "node:fs/promises";
|
|
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
|
|
import { prepareRestartScript, runRestartScript } from "./restart-helper.js";
|
|
|
|
vi.mock("node:child_process", () => ({
|
|
spawn: vi.fn(),
|
|
}));
|
|
|
|
describe("restart-helper", () => {
|
|
const originalPlatform = process.platform;
|
|
const originalGetUid = process.getuid;
|
|
|
|
async function prepareAndReadScript(env: Record<string, string>, gatewayPort = 18789) {
|
|
const scriptPath = await prepareRestartScript(env, gatewayPort);
|
|
expect(scriptPath).toBeTruthy();
|
|
const content = await fs.readFile(scriptPath!, "utf-8");
|
|
return { scriptPath: scriptPath!, content };
|
|
}
|
|
|
|
async function cleanupScript(scriptPath: string) {
|
|
await fs.unlink(scriptPath);
|
|
}
|
|
|
|
function expectWindowsRestartWaitOrdering(content: string, port = 18789) {
|
|
const endCommand = 'schtasks /End /TN "';
|
|
const pollAttemptsInit = "set /a attempts=0";
|
|
const pollLabel = ":wait_for_port_release";
|
|
const pollAttemptIncrement = "set /a attempts+=1";
|
|
const pollNetstatCheck = `netstat -ano | findstr /R /C:":${port} .*LISTENING" >nul`;
|
|
const forceKillLabel = ":force_kill_listener";
|
|
const forceKillCommand = "taskkill /F /PID %%P >nul 2>&1";
|
|
const portReleasedLabel = ":port_released";
|
|
const runCommand = 'schtasks /Run /TN "';
|
|
const endIndex = content.indexOf(endCommand);
|
|
const attemptsInitIndex = content.indexOf(pollAttemptsInit, endIndex);
|
|
const pollLabelIndex = content.indexOf(pollLabel, attemptsInitIndex);
|
|
const pollAttemptIncrementIndex = content.indexOf(pollAttemptIncrement, pollLabelIndex);
|
|
const pollNetstatCheckIndex = content.indexOf(pollNetstatCheck, pollAttemptIncrementIndex);
|
|
const forceKillLabelIndex = content.indexOf(forceKillLabel, pollNetstatCheckIndex);
|
|
const forceKillCommandIndex = content.indexOf(forceKillCommand, forceKillLabelIndex);
|
|
const portReleasedLabelIndex = content.indexOf(portReleasedLabel, forceKillCommandIndex);
|
|
const runIndex = content.indexOf(runCommand, portReleasedLabelIndex);
|
|
|
|
expect(endIndex).toBeGreaterThanOrEqual(0);
|
|
expect(attemptsInitIndex).toBeGreaterThan(endIndex);
|
|
expect(pollLabelIndex).toBeGreaterThan(attemptsInitIndex);
|
|
expect(pollAttemptIncrementIndex).toBeGreaterThan(pollLabelIndex);
|
|
expect(pollNetstatCheckIndex).toBeGreaterThan(pollAttemptIncrementIndex);
|
|
expect(forceKillLabelIndex).toBeGreaterThan(pollNetstatCheckIndex);
|
|
expect(forceKillCommandIndex).toBeGreaterThan(forceKillLabelIndex);
|
|
expect(portReleasedLabelIndex).toBeGreaterThan(forceKillCommandIndex);
|
|
expect(runIndex).toBeGreaterThan(portReleasedLabelIndex);
|
|
|
|
expect(content).not.toContain("timeout /t 3 /nobreak >nul");
|
|
}
|
|
|
|
beforeEach(() => {
|
|
vi.resetAllMocks();
|
|
});
|
|
|
|
afterEach(() => {
|
|
Object.defineProperty(process, "platform", { value: originalPlatform });
|
|
process.getuid = originalGetUid;
|
|
});
|
|
|
|
describe("prepareRestartScript", () => {
|
|
it("creates a systemd restart script on Linux", async () => {
|
|
Object.defineProperty(process, "platform", { value: "linux" });
|
|
const { scriptPath, content } = await prepareAndReadScript({
|
|
OPENCLAW_PROFILE: "default",
|
|
});
|
|
expect(scriptPath.endsWith(".sh")).toBe(true);
|
|
expect(content).toContain("#!/bin/sh");
|
|
expect(content).toContain("systemctl --user restart 'openclaw-gateway.service'");
|
|
// Script should self-cleanup
|
|
expect(content).toContain('rm -f "$0"');
|
|
await cleanupScript(scriptPath);
|
|
});
|
|
|
|
it("uses OPENCLAW_SYSTEMD_UNIT override for systemd scripts", async () => {
|
|
Object.defineProperty(process, "platform", { value: "linux" });
|
|
const { scriptPath, content } = await prepareAndReadScript({
|
|
OPENCLAW_PROFILE: "default",
|
|
OPENCLAW_SYSTEMD_UNIT: "custom-gateway",
|
|
});
|
|
expect(content).toContain("systemctl --user restart 'custom-gateway.service'");
|
|
await cleanupScript(scriptPath);
|
|
});
|
|
|
|
it("creates a launchd restart script on macOS", async () => {
|
|
Object.defineProperty(process, "platform", { value: "darwin" });
|
|
process.getuid = () => 501;
|
|
|
|
const { scriptPath, content } = await prepareAndReadScript({
|
|
OPENCLAW_PROFILE: "default",
|
|
});
|
|
expect(scriptPath.endsWith(".sh")).toBe(true);
|
|
expect(content).toContain("#!/bin/sh");
|
|
expect(content).toContain("launchctl kickstart -k 'gui/501/ai.openclaw.gateway'");
|
|
// Should fall back to bootstrap when kickstart fails (service deregistered after bootout)
|
|
expect(content).toContain("launchctl bootstrap 'gui/501'");
|
|
expect(content).toContain('rm -f "$0"');
|
|
await cleanupScript(scriptPath);
|
|
});
|
|
|
|
it("uses OPENCLAW_LAUNCHD_LABEL override on macOS", async () => {
|
|
Object.defineProperty(process, "platform", { value: "darwin" });
|
|
process.getuid = () => 501;
|
|
|
|
const { scriptPath, content } = await prepareAndReadScript({
|
|
OPENCLAW_PROFILE: "default",
|
|
OPENCLAW_LAUNCHD_LABEL: "com.custom.openclaw",
|
|
});
|
|
expect(content).toContain("launchctl kickstart -k 'gui/501/com.custom.openclaw'");
|
|
await cleanupScript(scriptPath);
|
|
});
|
|
|
|
it("creates a schtasks restart script on Windows", async () => {
|
|
Object.defineProperty(process, "platform", { value: "win32" });
|
|
|
|
const { scriptPath, content } = await prepareAndReadScript({
|
|
OPENCLAW_PROFILE: "default",
|
|
});
|
|
expect(scriptPath.endsWith(".bat")).toBe(true);
|
|
expect(content).toContain("@echo off");
|
|
expect(content).toContain('schtasks /End /TN "OpenClaw Gateway"');
|
|
expect(content).toContain('schtasks /Run /TN "OpenClaw Gateway"');
|
|
expectWindowsRestartWaitOrdering(content);
|
|
// Batch self-cleanup
|
|
expect(content).toContain('del "%~f0"');
|
|
await cleanupScript(scriptPath);
|
|
});
|
|
|
|
it("uses OPENCLAW_WINDOWS_TASK_NAME override on Windows", async () => {
|
|
Object.defineProperty(process, "platform", { value: "win32" });
|
|
|
|
const { scriptPath, content } = await prepareAndReadScript({
|
|
OPENCLAW_PROFILE: "default",
|
|
OPENCLAW_WINDOWS_TASK_NAME: "OpenClaw Gateway (custom)",
|
|
});
|
|
expect(content).toContain('schtasks /End /TN "OpenClaw Gateway (custom)"');
|
|
expect(content).toContain('schtasks /Run /TN "OpenClaw Gateway (custom)"');
|
|
expectWindowsRestartWaitOrdering(content);
|
|
await cleanupScript(scriptPath);
|
|
});
|
|
|
|
it("uses passed gateway port for port polling on Windows", async () => {
|
|
Object.defineProperty(process, "platform", { value: "win32" });
|
|
const customPort = 9999;
|
|
|
|
const { scriptPath, content } = await prepareAndReadScript(
|
|
{
|
|
OPENCLAW_PROFILE: "default",
|
|
},
|
|
customPort,
|
|
);
|
|
expect(content).toContain(`netstat -ano | findstr /R /C:":${customPort} .*LISTENING" >nul`);
|
|
expect(content).toContain(
|
|
`for /f "tokens=5" %%P in ('netstat -ano ^| findstr /R /C:":${customPort} .*LISTENING"') do (`,
|
|
);
|
|
expectWindowsRestartWaitOrdering(content, customPort);
|
|
await cleanupScript(scriptPath);
|
|
});
|
|
|
|
it("uses custom profile in service names", async () => {
|
|
Object.defineProperty(process, "platform", { value: "linux" });
|
|
const { scriptPath, content } = await prepareAndReadScript({
|
|
OPENCLAW_PROFILE: "production",
|
|
});
|
|
expect(content).toContain("openclaw-gateway-production.service");
|
|
await cleanupScript(scriptPath);
|
|
});
|
|
|
|
it("uses custom profile in macOS launchd label", async () => {
|
|
Object.defineProperty(process, "platform", { value: "darwin" });
|
|
process.getuid = () => 502;
|
|
|
|
const { scriptPath, content } = await prepareAndReadScript({
|
|
OPENCLAW_PROFILE: "staging",
|
|
});
|
|
expect(content).toContain("gui/502/ai.openclaw.staging");
|
|
await cleanupScript(scriptPath);
|
|
});
|
|
|
|
it("uses custom profile in Windows task name", async () => {
|
|
Object.defineProperty(process, "platform", { value: "win32" });
|
|
|
|
const { scriptPath, content } = await prepareAndReadScript({
|
|
OPENCLAW_PROFILE: "production",
|
|
});
|
|
expect(content).toContain('schtasks /End /TN "OpenClaw Gateway (production)"');
|
|
expectWindowsRestartWaitOrdering(content);
|
|
await cleanupScript(scriptPath);
|
|
});
|
|
|
|
it("returns null for unsupported platforms", async () => {
|
|
Object.defineProperty(process, "platform", { value: "aix" });
|
|
const scriptPath = await prepareRestartScript({});
|
|
expect(scriptPath).toBeNull();
|
|
});
|
|
|
|
it("returns null when script creation fails", async () => {
|
|
Object.defineProperty(process, "platform", { value: "linux" });
|
|
const writeFileSpy = vi
|
|
.spyOn(fs, "writeFile")
|
|
.mockRejectedValueOnce(new Error("simulated write failure"));
|
|
|
|
const scriptPath = await prepareRestartScript({
|
|
OPENCLAW_PROFILE: "default",
|
|
});
|
|
|
|
expect(scriptPath).toBeNull();
|
|
writeFileSpy.mockRestore();
|
|
});
|
|
|
|
it("escapes single quotes in profile names for shell scripts", async () => {
|
|
Object.defineProperty(process, "platform", { value: "linux" });
|
|
const { scriptPath, content } = await prepareAndReadScript({
|
|
OPENCLAW_PROFILE: "it's-a-test",
|
|
});
|
|
// Single quotes should be escaped with '\'' pattern
|
|
expect(content).not.toContain("it's");
|
|
expect(content).toContain("it'\\''s");
|
|
await cleanupScript(scriptPath);
|
|
});
|
|
|
|
it("expands HOME in plist path instead of leaving literal $HOME", async () => {
|
|
Object.defineProperty(process, "platform", { value: "darwin" });
|
|
process.getuid = () => 501;
|
|
|
|
const { scriptPath, content } = await prepareAndReadScript({
|
|
HOME: "/Users/testuser",
|
|
OPENCLAW_PROFILE: "default",
|
|
});
|
|
// The plist path must contain the resolved home dir, not literal $HOME
|
|
expect(content).toMatch(/[\\/]Users[\\/]testuser[\\/]Library[\\/]LaunchAgents[\\/]/);
|
|
expect(content).not.toContain("$HOME");
|
|
await cleanupScript(scriptPath);
|
|
});
|
|
|
|
it("prefers env parameter HOME over process.env.HOME for plist path", async () => {
|
|
Object.defineProperty(process, "platform", { value: "darwin" });
|
|
process.getuid = () => 502;
|
|
|
|
const { scriptPath, content } = await prepareAndReadScript({
|
|
HOME: "/Users/envhome",
|
|
OPENCLAW_PROFILE: "default",
|
|
});
|
|
expect(content).toMatch(/[\\/]Users[\\/]envhome[\\/]Library[\\/]LaunchAgents[\\/]/);
|
|
await cleanupScript(scriptPath);
|
|
});
|
|
|
|
it("shell-escapes the label in the plist path on macOS", async () => {
|
|
Object.defineProperty(process, "platform", { value: "darwin" });
|
|
process.getuid = () => 501;
|
|
|
|
const { scriptPath, content } = await prepareAndReadScript({
|
|
HOME: "/Users/testuser",
|
|
OPENCLAW_LAUNCHD_LABEL: "ai.openclaw.it's-a-test",
|
|
});
|
|
// The plist path must also shell-escape the label to prevent injection
|
|
expect(content).toContain("ai.openclaw.it'\\''s-a-test.plist");
|
|
await cleanupScript(scriptPath);
|
|
});
|
|
|
|
it("rejects unsafe batch profile names on Windows", async () => {
|
|
Object.defineProperty(process, "platform", { value: "win32" });
|
|
const scriptPath = await prepareRestartScript({
|
|
OPENCLAW_PROFILE: "test&whoami",
|
|
});
|
|
|
|
expect(scriptPath).toBeNull();
|
|
});
|
|
});
|
|
|
|
describe("runRestartScript", () => {
|
|
it("spawns the script as a detached process on Linux", async () => {
|
|
Object.defineProperty(process, "platform", { value: "linux" });
|
|
const scriptPath = "/tmp/fake-script.sh";
|
|
const mockChild = { unref: vi.fn() };
|
|
vi.mocked(spawn).mockReturnValue(mockChild as unknown as ChildProcess);
|
|
|
|
await runRestartScript(scriptPath);
|
|
|
|
expect(spawn).toHaveBeenCalledWith("/bin/sh", [scriptPath], {
|
|
detached: true,
|
|
stdio: "ignore",
|
|
});
|
|
expect(mockChild.unref).toHaveBeenCalled();
|
|
});
|
|
|
|
it("uses cmd.exe on Windows", async () => {
|
|
Object.defineProperty(process, "platform", { value: "win32" });
|
|
const scriptPath = "C:\\Temp\\fake-script.bat";
|
|
const mockChild = { unref: vi.fn() };
|
|
vi.mocked(spawn).mockReturnValue(mockChild as unknown as ChildProcess);
|
|
|
|
await runRestartScript(scriptPath);
|
|
|
|
expect(spawn).toHaveBeenCalledWith("cmd.exe", ["/c", scriptPath], {
|
|
detached: true,
|
|
stdio: "ignore",
|
|
});
|
|
expect(mockChild.unref).toHaveBeenCalled();
|
|
});
|
|
});
|
|
});
|