fix(gateway): race condition handling + config-guard for repairNotLoaded
- Handle bootstrap exit 130 and 'already exists in domain' as race condition success in repairLaunchAgentBootstrap (inspired by #26088) - Gate repairNotLoaded behind config validation in runServiceStart to prevent re-registering a service with invalid config (#35862) - Adapt config-guard tests to upstream test harness refactor - Add 3 race condition tests for launchd bootstrap repair - Add 4 config-guard integration tests for repairNotLoaded (start+restart) - All 50 tests pass
This commit is contained in:
parent
cdcb9464fb
commit
16d1462f8b
@ -140,3 +140,98 @@ describe("runServiceStart config pre-flight (#35862)", () => {
|
||||
expect(service.restart).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
|
||||
describe("config-guard gates repairNotLoaded (#43602 + #35862)", () => {
|
||||
let runServiceStart: typeof import("./lifecycle-core.js").runServiceStart;
|
||||
let runServiceRestart: typeof import("./lifecycle-core.js").runServiceRestart;
|
||||
|
||||
beforeAll(async () => {
|
||||
({ runServiceStart, runServiceRestart } = await import("./lifecycle-core.js"));
|
||||
});
|
||||
|
||||
beforeEach(() => {
|
||||
resetLifecycleRuntimeLogs();
|
||||
readConfigFileSnapshotMock.mockReset();
|
||||
setConfigSnapshot({ exists: true, valid: true });
|
||||
loadConfig.mockReset();
|
||||
loadConfig.mockReturnValue({});
|
||||
resetLifecycleServiceMocks();
|
||||
service.isLoaded.mockResolvedValue(false);
|
||||
});
|
||||
|
||||
it("start: aborts before repairNotLoaded when config is invalid", async () => {
|
||||
const repairNotLoaded = vi.fn().mockResolvedValue({ ok: true });
|
||||
const serviceWithRepair = { ...service, repairNotLoaded };
|
||||
setConfigSnapshot({
|
||||
exists: true,
|
||||
valid: false,
|
||||
issues: [{ path: "agents.defaults.model", message: "Unrecognized key" }],
|
||||
});
|
||||
|
||||
await expect(
|
||||
runServiceStart({
|
||||
serviceNoun: "Gateway",
|
||||
service: serviceWithRepair,
|
||||
renderStartHints: () => [],
|
||||
opts: { json: true },
|
||||
}),
|
||||
).rejects.toThrow("__exit__:1");
|
||||
|
||||
expect(repairNotLoaded).not.toHaveBeenCalled();
|
||||
expect(service.restart).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("start: proceeds with repair when config is valid", async () => {
|
||||
const repairNotLoaded = vi.fn().mockResolvedValue({ ok: true });
|
||||
const serviceWithRepair = { ...service, repairNotLoaded };
|
||||
|
||||
await runServiceStart({
|
||||
serviceNoun: "Gateway",
|
||||
service: serviceWithRepair,
|
||||
renderStartHints: () => [],
|
||||
opts: { json: true },
|
||||
});
|
||||
|
||||
expect(repairNotLoaded).toHaveBeenCalledTimes(1);
|
||||
expect(service.restart).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("restart: aborts before repairNotLoaded when config is invalid", async () => {
|
||||
const repairNotLoaded = vi.fn().mockResolvedValue({ ok: true });
|
||||
const serviceWithRepair = { ...service, repairNotLoaded };
|
||||
setConfigSnapshot({
|
||||
exists: true,
|
||||
valid: false,
|
||||
issues: [{ path: "agents.defaults.model", message: "Unrecognized key" }],
|
||||
});
|
||||
|
||||
await expect(
|
||||
runServiceRestart({
|
||||
serviceNoun: "Gateway",
|
||||
service: serviceWithRepair,
|
||||
renderStartHints: () => [],
|
||||
opts: { json: true },
|
||||
onNotLoaded: async () => null,
|
||||
}),
|
||||
).rejects.toThrow("__exit__:1");
|
||||
|
||||
expect(repairNotLoaded).not.toHaveBeenCalled();
|
||||
expect(service.restart).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("restart: proceeds with repair when config is valid", async () => {
|
||||
const repairNotLoaded = vi.fn().mockResolvedValue({ ok: true });
|
||||
const serviceWithRepair = { ...service, repairNotLoaded };
|
||||
|
||||
const result = await runServiceRestart({
|
||||
serviceNoun: "Gateway",
|
||||
service: serviceWithRepair,
|
||||
renderStartHints: () => [],
|
||||
opts: { json: true },
|
||||
onNotLoaded: async () => null,
|
||||
});
|
||||
|
||||
expect(result).toBe(true);
|
||||
expect(repairNotLoaded).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
|
||||
@ -203,6 +203,15 @@ export async function runServiceStart(params: {
|
||||
return;
|
||||
}
|
||||
if (!loaded && params.service.repairNotLoaded) {
|
||||
// Pre-flight config validation before attempting repair — don't re-register
|
||||
// a service that will immediately crash due to invalid config. (#35862)
|
||||
const configError = await getConfigValidationError();
|
||||
if (configError) {
|
||||
fail(
|
||||
`${params.serviceNoun} aborted: config is invalid.\n${configError}\nFix the config and retry, or run "openclaw doctor" to repair.`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
// The service was previously installed but is no longer loaded (e.g.
|
||||
// macOS LaunchAgent silently unloaded after sleep/idle). Attempt to
|
||||
// re-register the existing service definition before falling through
|
||||
|
||||
@ -18,6 +18,7 @@ const state = vi.hoisted(() => ({
|
||||
listOutput: "",
|
||||
printOutput: "",
|
||||
bootstrapError: "",
|
||||
bootstrapCode: 1,
|
||||
kickstartError: "",
|
||||
kickstartFailuresRemaining: 0,
|
||||
dirs: new Set<string>(),
|
||||
@ -72,7 +73,7 @@ vi.mock("./exec-file.js", () => ({
|
||||
return { stdout: state.printOutput, stderr: "", code: 0 };
|
||||
}
|
||||
if (call[0] === "bootstrap" && state.bootstrapError) {
|
||||
return { stdout: "", stderr: state.bootstrapError, code: 1 };
|
||||
return { stdout: "", stderr: state.bootstrapError, code: state.bootstrapCode };
|
||||
}
|
||||
if (call[0] === "kickstart" && state.kickstartError && state.kickstartFailuresRemaining > 0) {
|
||||
state.kickstartFailuresRemaining -= 1;
|
||||
@ -145,6 +146,7 @@ beforeEach(() => {
|
||||
state.listOutput = "";
|
||||
state.printOutput = "";
|
||||
state.bootstrapError = "";
|
||||
state.bootstrapCode = 1;
|
||||
state.kickstartError = "";
|
||||
state.kickstartFailuresRemaining = 0;
|
||||
state.dirs.clear();
|
||||
@ -246,6 +248,49 @@ describe("launchd bootstrap repair", () => {
|
||||
expect(kickstartIndex).toBeGreaterThanOrEqual(0);
|
||||
expect(bootstrapIndex).toBeLessThan(kickstartIndex);
|
||||
});
|
||||
|
||||
it("treats bootstrap exit 130 as success (race condition)", async () => {
|
||||
state.bootstrapError = "Service already loaded";
|
||||
state.bootstrapCode = 130;
|
||||
const env: Record<string, string | undefined> = {
|
||||
HOME: "/Users/test",
|
||||
OPENCLAW_PROFILE: "default",
|
||||
};
|
||||
const repair = await repairLaunchAgentBootstrap({ env });
|
||||
expect(repair.ok).toBe(true);
|
||||
// Should still kickstart even though bootstrap returned 130
|
||||
const kickCalls = state.launchctlCalls.filter((c) => c[0] === "kickstart");
|
||||
expect(kickCalls.length).toBe(1);
|
||||
});
|
||||
|
||||
it("treats 'already exists in domain' as success (race condition)", async () => {
|
||||
state.bootstrapError =
|
||||
"Could not bootstrap service: 5: Input/output error: already exists in domain for gui/501";
|
||||
state.bootstrapCode = 1;
|
||||
const env: Record<string, string | undefined> = {
|
||||
HOME: "/Users/test",
|
||||
OPENCLAW_PROFILE: "default",
|
||||
};
|
||||
const repair = await repairLaunchAgentBootstrap({ env });
|
||||
expect(repair.ok).toBe(true);
|
||||
const kickCalls = state.launchctlCalls.filter((c) => c[0] === "kickstart");
|
||||
expect(kickCalls.length).toBe(1);
|
||||
});
|
||||
|
||||
it("returns ok:false for non-race bootstrap failures", async () => {
|
||||
state.bootstrapError = "Could not find specified service";
|
||||
state.bootstrapCode = 1;
|
||||
const env: Record<string, string | undefined> = {
|
||||
HOME: "/Users/test",
|
||||
OPENCLAW_PROFILE: "default",
|
||||
};
|
||||
const repair = await repairLaunchAgentBootstrap({ env });
|
||||
expect(repair.ok).toBe(false);
|
||||
expect(repair.detail).toContain("Could not find specified service");
|
||||
// Should NOT kickstart when bootstrap genuinely failed
|
||||
const kickCalls = state.launchctlCalls.filter((c) => c[0] === "kickstart");
|
||||
expect(kickCalls.length).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("launchd install", () => {
|
||||
|
||||
@ -286,7 +286,15 @@ export async function repairLaunchAgentBootstrap(args: {
|
||||
await execLaunchctl(["enable", `${domain}/${label}`]);
|
||||
const boot = await execLaunchctl(["bootstrap", domain, plistPath]);
|
||||
if (boot.code !== 0) {
|
||||
return { ok: false, detail: (boot.stderr || boot.stdout).trim() || undefined };
|
||||
// Race condition: another process (or macOS itself) may have bootstrapped
|
||||
// the service between our isLoaded check and this call. Exit code 130 or
|
||||
// "already exists in domain" means the service is already registered — treat
|
||||
// as success and proceed to kickstart. See #26088 for prior art.
|
||||
const bootDetail = (boot.stderr || boot.stdout).trim().toLowerCase();
|
||||
const alreadyLoaded = boot.code === 130 || bootDetail.includes("already exists in domain");
|
||||
if (!alreadyLoaded) {
|
||||
return { ok: false, detail: (boot.stderr || boot.stdout).trim() || undefined };
|
||||
}
|
||||
}
|
||||
const kick = await execLaunchctl(["kickstart", "-k", `${domain}/${label}`]);
|
||||
if (kick.code !== 0) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user