fix(gateway): race condition handling + config-guard for repairNotLoaded

- Handle bootstrap exit 130 and 'already exists in domain' as race
  condition success in repairLaunchAgentBootstrap (inspired by #26088)
- Gate repairNotLoaded behind config validation in runServiceStart to
  prevent re-registering a service with invalid config (#35862)
- Adapt config-guard tests to upstream test harness refactor
- Add 3 race condition tests for launchd bootstrap repair
- Add 4 config-guard integration tests for repairNotLoaded (start+restart)
- All 50 tests pass
This commit is contained in:
Chance Robinson 2026-03-13 11:56:56 -04:00
parent cdcb9464fb
commit 16d1462f8b
4 changed files with 159 additions and 2 deletions

View File

@ -140,3 +140,98 @@ describe("runServiceStart config pre-flight (#35862)", () => {
expect(service.restart).toHaveBeenCalledTimes(1);
});
});
describe("config-guard gates repairNotLoaded (#43602 + #35862)", () => {
let runServiceStart: typeof import("./lifecycle-core.js").runServiceStart;
let runServiceRestart: typeof import("./lifecycle-core.js").runServiceRestart;
beforeAll(async () => {
({ runServiceStart, runServiceRestart } = await import("./lifecycle-core.js"));
});
beforeEach(() => {
resetLifecycleRuntimeLogs();
readConfigFileSnapshotMock.mockReset();
setConfigSnapshot({ exists: true, valid: true });
loadConfig.mockReset();
loadConfig.mockReturnValue({});
resetLifecycleServiceMocks();
service.isLoaded.mockResolvedValue(false);
});
it("start: aborts before repairNotLoaded when config is invalid", async () => {
const repairNotLoaded = vi.fn().mockResolvedValue({ ok: true });
const serviceWithRepair = { ...service, repairNotLoaded };
setConfigSnapshot({
exists: true,
valid: false,
issues: [{ path: "agents.defaults.model", message: "Unrecognized key" }],
});
await expect(
runServiceStart({
serviceNoun: "Gateway",
service: serviceWithRepair,
renderStartHints: () => [],
opts: { json: true },
}),
).rejects.toThrow("__exit__:1");
expect(repairNotLoaded).not.toHaveBeenCalled();
expect(service.restart).not.toHaveBeenCalled();
});
it("start: proceeds with repair when config is valid", async () => {
const repairNotLoaded = vi.fn().mockResolvedValue({ ok: true });
const serviceWithRepair = { ...service, repairNotLoaded };
await runServiceStart({
serviceNoun: "Gateway",
service: serviceWithRepair,
renderStartHints: () => [],
opts: { json: true },
});
expect(repairNotLoaded).toHaveBeenCalledTimes(1);
expect(service.restart).toHaveBeenCalledTimes(1);
});
it("restart: aborts before repairNotLoaded when config is invalid", async () => {
const repairNotLoaded = vi.fn().mockResolvedValue({ ok: true });
const serviceWithRepair = { ...service, repairNotLoaded };
setConfigSnapshot({
exists: true,
valid: false,
issues: [{ path: "agents.defaults.model", message: "Unrecognized key" }],
});
await expect(
runServiceRestart({
serviceNoun: "Gateway",
service: serviceWithRepair,
renderStartHints: () => [],
opts: { json: true },
onNotLoaded: async () => null,
}),
).rejects.toThrow("__exit__:1");
expect(repairNotLoaded).not.toHaveBeenCalled();
expect(service.restart).not.toHaveBeenCalled();
});
it("restart: proceeds with repair when config is valid", async () => {
const repairNotLoaded = vi.fn().mockResolvedValue({ ok: true });
const serviceWithRepair = { ...service, repairNotLoaded };
const result = await runServiceRestart({
serviceNoun: "Gateway",
service: serviceWithRepair,
renderStartHints: () => [],
opts: { json: true },
onNotLoaded: async () => null,
});
expect(result).toBe(true);
expect(repairNotLoaded).toHaveBeenCalledTimes(1);
});
});

View File

@ -203,6 +203,15 @@ export async function runServiceStart(params: {
return;
}
if (!loaded && params.service.repairNotLoaded) {
// Pre-flight config validation before attempting repair — don't re-register
// a service that will immediately crash due to invalid config. (#35862)
const configError = await getConfigValidationError();
if (configError) {
fail(
`${params.serviceNoun} aborted: config is invalid.\n${configError}\nFix the config and retry, or run "openclaw doctor" to repair.`,
);
return;
}
// The service was previously installed but is no longer loaded (e.g.
// macOS LaunchAgent silently unloaded after sleep/idle). Attempt to
// re-register the existing service definition before falling through

View File

@ -18,6 +18,7 @@ const state = vi.hoisted(() => ({
listOutput: "",
printOutput: "",
bootstrapError: "",
bootstrapCode: 1,
kickstartError: "",
kickstartFailuresRemaining: 0,
dirs: new Set<string>(),
@ -72,7 +73,7 @@ vi.mock("./exec-file.js", () => ({
return { stdout: state.printOutput, stderr: "", code: 0 };
}
if (call[0] === "bootstrap" && state.bootstrapError) {
return { stdout: "", stderr: state.bootstrapError, code: 1 };
return { stdout: "", stderr: state.bootstrapError, code: state.bootstrapCode };
}
if (call[0] === "kickstart" && state.kickstartError && state.kickstartFailuresRemaining > 0) {
state.kickstartFailuresRemaining -= 1;
@ -145,6 +146,7 @@ beforeEach(() => {
state.listOutput = "";
state.printOutput = "";
state.bootstrapError = "";
state.bootstrapCode = 1;
state.kickstartError = "";
state.kickstartFailuresRemaining = 0;
state.dirs.clear();
@ -246,6 +248,49 @@ describe("launchd bootstrap repair", () => {
expect(kickstartIndex).toBeGreaterThanOrEqual(0);
expect(bootstrapIndex).toBeLessThan(kickstartIndex);
});
it("treats bootstrap exit 130 as success (race condition)", async () => {
state.bootstrapError = "Service already loaded";
state.bootstrapCode = 130;
const env: Record<string, string | undefined> = {
HOME: "/Users/test",
OPENCLAW_PROFILE: "default",
};
const repair = await repairLaunchAgentBootstrap({ env });
expect(repair.ok).toBe(true);
// Should still kickstart even though bootstrap returned 130
const kickCalls = state.launchctlCalls.filter((c) => c[0] === "kickstart");
expect(kickCalls.length).toBe(1);
});
it("treats 'already exists in domain' as success (race condition)", async () => {
state.bootstrapError =
"Could not bootstrap service: 5: Input/output error: already exists in domain for gui/501";
state.bootstrapCode = 1;
const env: Record<string, string | undefined> = {
HOME: "/Users/test",
OPENCLAW_PROFILE: "default",
};
const repair = await repairLaunchAgentBootstrap({ env });
expect(repair.ok).toBe(true);
const kickCalls = state.launchctlCalls.filter((c) => c[0] === "kickstart");
expect(kickCalls.length).toBe(1);
});
it("returns ok:false for non-race bootstrap failures", async () => {
state.bootstrapError = "Could not find specified service";
state.bootstrapCode = 1;
const env: Record<string, string | undefined> = {
HOME: "/Users/test",
OPENCLAW_PROFILE: "default",
};
const repair = await repairLaunchAgentBootstrap({ env });
expect(repair.ok).toBe(false);
expect(repair.detail).toContain("Could not find specified service");
// Should NOT kickstart when bootstrap genuinely failed
const kickCalls = state.launchctlCalls.filter((c) => c[0] === "kickstart");
expect(kickCalls.length).toBe(0);
});
});
describe("launchd install", () => {

View File

@ -286,7 +286,15 @@ export async function repairLaunchAgentBootstrap(args: {
await execLaunchctl(["enable", `${domain}/${label}`]);
const boot = await execLaunchctl(["bootstrap", domain, plistPath]);
if (boot.code !== 0) {
return { ok: false, detail: (boot.stderr || boot.stdout).trim() || undefined };
// Race condition: another process (or macOS itself) may have bootstrapped
// the service between our isLoaded check and this call. Exit code 130 or
// "already exists in domain" means the service is already registered — treat
// as success and proceed to kickstart. See #26088 for prior art.
const bootDetail = (boot.stderr || boot.stdout).trim().toLowerCase();
const alreadyLoaded = boot.code === 130 || bootDetail.includes("already exists in domain");
if (!alreadyLoaded) {
return { ok: false, detail: (boot.stderr || boot.stdout).trim() || undefined };
}
}
const kick = await execLaunchctl(["kickstart", "-k", `${domain}/${label}`]);
if (kick.code !== 0) {