fix(bootstrap): eliminate gateway probe race and pre-onboard config failures

Pre-onboard config (gateway.mode, gateway.port, plugin trust) used to call
`openclaw config set` before `openclaw onboard` created the profile, causing
"Failed to set ..." errors on fresh installs (#101). All pre-onboard config is
now staged via raw JSON writes. Hidden gateway restarts inside syncBundledPlugins
are removed; one explicit restart happens after all post-onboard config, followed
by retried health probes so bootstrap no longer falsely reports "gateway closed"
while the gateway is simply finishing startup.
This commit is contained in:
kumarabhirup 2026-03-15 17:16:05 -07:00
parent fc04de7eb8
commit 4436e57d9a
No known key found for this signature in database
GPG Key ID: DB7CA2289CAB0167
4 changed files with 190 additions and 136 deletions

View File

@ -1,6 +1,6 @@
{
"name": "denchclaw",
"version": "2.3.3",
"version": "2.3.4",
"description": "Fully Managed OpenClaw Framework for managing your CRM, Sales Automation and Outreach agents. The only local productivity tool you need.",
"keywords": [],
"homepage": "https://github.com/DenchHQ/DenchClaw#readme",

View File

@ -1,6 +1,6 @@
{
"name": "dench",
"version": "2.3.3",
"version": "2.3.4",
"description": "Shorthand alias for denchclaw — AI-powered CRM platform CLI",
"license": "MIT",
"repository": {
@ -16,7 +16,7 @@
],
"type": "module",
"dependencies": {
"denchclaw": "^2.3.3"
"denchclaw": "^2.3.4"
},
"engines": {
"node": ">=22.12.0"

View File

@ -414,7 +414,7 @@ describe("bootstrapCommand always-onboard behavior", () => {
expect(summary.onboarded).toBe(true);
});
it("sets gateway.mode before onboard when config is missing it (prevents first-start daemon crash loop)", async () => {
it("stages gateway.mode=local in raw JSON before onboard so first daemon start does not drift (no CLI calls pre-profile)", async () => {
gatewayModeConfigValue = "\n";
const runtime: RuntimeEnv = {
log: vi.fn(),
@ -431,33 +431,26 @@ describe("bootstrapCommand always-onboard behavior", () => {
runtime,
);
const gatewayModeGetIndex = spawnCalls.findIndex(
(call) =>
call.command === "openclaw" &&
call.args.includes("config") &&
call.args.includes("get") &&
call.args.includes("gateway.mode"),
);
const gatewayModeSetIndex = spawnCalls.findIndex(
(call) =>
const configPath = path.join(stateDir, "openclaw.json");
const config = JSON.parse(readFileSync(configPath, "utf-8"));
expect(config.gateway?.mode).toBe("local");
const preOnboardGatewayModeCliSet = spawnCalls.findIndex((call, index) => {
const onboardIndex = spawnCalls.findIndex(
(c) => c.command === "openclaw" && c.args.includes("onboard"),
);
return (
index < onboardIndex &&
call.command === "openclaw" &&
call.args.includes("config") &&
call.args.includes("set") &&
call.args.includes("gateway.mode") &&
call.args.includes("local"),
);
const onboardIndex = spawnCalls.findIndex(
(call) => call.command === "openclaw" && call.args.includes("onboard"),
);
expect(gatewayModeGetIndex).toBeGreaterThan(-1);
expect(gatewayModeSetIndex).toBeGreaterThan(-1);
expect(onboardIndex).toBeGreaterThan(-1);
expect(gatewayModeGetIndex).toBeLessThan(gatewayModeSetIndex);
expect(gatewayModeSetIndex).toBeLessThan(onboardIndex);
call.args.includes("gateway.mode")
);
});
expect(preOnboardGatewayModeCliSet).toBe(-1);
});
it("sets gateway.port before onboard so the first daemon start uses DenchClaw's selected port", async () => {
it("stages gateway.port in raw JSON before onboard so first daemon start uses DenchClaw's port (no CLI calls pre-profile)", async () => {
const runtime: RuntimeEnv = {
log: vi.fn(),
error: vi.fn(),
@ -473,23 +466,26 @@ describe("bootstrapCommand always-onboard behavior", () => {
runtime,
);
const gatewayPortSetIndex = spawnCalls.findIndex(
(call) =>
const configPath = path.join(stateDir, "openclaw.json");
const config = JSON.parse(readFileSync(configPath, "utf-8"));
expect(config.gateway?.port).toBeGreaterThanOrEqual(19001);
const preOnboardGatewayPortCliSet = spawnCalls.findIndex((call, index) => {
const onboardIndex = spawnCalls.findIndex(
(c) => c.command === "openclaw" && c.args.includes("onboard"),
);
return (
index < onboardIndex &&
call.command === "openclaw" &&
call.args.includes("config") &&
call.args.includes("set") &&
call.args.includes("gateway.port"),
);
const onboardIndex = spawnCalls.findIndex(
(call) => call.command === "openclaw" && call.args.includes("onboard"),
);
expect(gatewayPortSetIndex).toBeGreaterThan(-1);
expect(onboardIndex).toBeGreaterThan(-1);
expect(gatewayPortSetIndex).toBeLessThan(onboardIndex);
call.args.includes("gateway.port")
);
});
expect(preOnboardGatewayPortCliSet).toBe(-1);
});
it("rechecks gateway.mode after onboard when onboarding drifts it away from local (keeps DenchClaw on a local gateway)", async () => {
it("enforces gateway.mode=local via CLI after onboard when onboarding drifts it away from local", async () => {
gatewayModeConfigValue = "\n";
driftGatewayModeAfterOnboard = true;
const runtime: RuntimeEnv = {
@ -507,26 +503,24 @@ describe("bootstrapCommand always-onboard behavior", () => {
runtime,
);
const gatewayModeSetIndices = spawnCalls.flatMap((call, index) =>
call.command === "openclaw" &&
call.args.includes("config") &&
call.args.includes("set") &&
call.args.includes("gateway.mode") &&
call.args.includes("local")
? [index]
: [],
);
const onboardIndex = spawnCalls.findIndex(
(call) => call.command === "openclaw" && call.args.includes("onboard"),
);
const postOnboardModeSet = spawnCalls.findIndex(
(call, index) =>
index > onboardIndex &&
call.command === "openclaw" &&
call.args.includes("config") &&
call.args.includes("set") &&
call.args.includes("gateway.mode") &&
call.args.includes("local"),
);
expect(gatewayModeSetIndices).toHaveLength(2);
expect(onboardIndex).toBeGreaterThan(-1);
expect(gatewayModeSetIndices[0]).toBeLessThan(onboardIndex);
expect(gatewayModeSetIndices[1]).toBeGreaterThan(onboardIndex);
expect(postOnboardModeSet).toBeGreaterThan(onboardIndex);
});
it("reapplies gateway.port after onboard so onboarding defaults cannot desync DenchClaw's gateway target", async () => {
it("applies gateway.port via CLI after onboard so onboarding defaults cannot desync DenchClaw's gateway target", async () => {
const runtime: RuntimeEnv = {
log: vi.fn(),
error: vi.fn(),
@ -542,22 +536,20 @@ describe("bootstrapCommand always-onboard behavior", () => {
runtime,
);
const gatewayPortSetIndices = spawnCalls.flatMap((call, index) =>
call.command === "openclaw" &&
call.args.includes("config") &&
call.args.includes("set") &&
call.args.includes("gateway.port")
? [index]
: [],
);
const onboardIndex = spawnCalls.findIndex(
(call) => call.command === "openclaw" && call.args.includes("onboard"),
);
const postOnboardPortSet = spawnCalls.findIndex(
(call, index) =>
index > onboardIndex &&
call.command === "openclaw" &&
call.args.includes("config") &&
call.args.includes("set") &&
call.args.includes("gateway.port"),
);
expect(gatewayPortSetIndices).toHaveLength(2);
expect(onboardIndex).toBeGreaterThan(-1);
expect(gatewayPortSetIndices[0]).toBeLessThan(onboardIndex);
expect(gatewayPortSetIndices[1]).toBeGreaterThan(onboardIndex);
expect(postOnboardPortSet).toBeGreaterThan(onboardIndex);
});
it("ignores bootstrap --profile override and keeps dench profile (prevents profile drift)", async () => {
@ -1515,7 +1507,7 @@ describe("bootstrapCommand always-onboard behavior", () => {
expect(summary.installedOpenClawCli).toBe(true);
});
it("runs doctor/gateway autofix steps when initial health probe fails", async () => {
it("recovers without autofix when gateway just needs a moment after restart (no false gateway-closed)", async () => {
healthFailuresBeforeSuccess = 1;
const runtime: RuntimeEnv = {
log: vi.fn(),
@ -1532,6 +1524,65 @@ describe("bootstrapCommand always-onboard behavior", () => {
runtime,
);
const doctorFixCalled = spawnCalls.some(
(call) =>
call.command === "openclaw" && call.args.includes("doctor") && call.args.includes("--fix"),
);
expect(doctorFixCalled).toBe(false);
expect(summary.gatewayReachable).toBe(true);
expect(summary.gatewayAutoFix).toBeUndefined();
});
it("performs one explicit gateway restart after all post-onboard config (no hidden restarts)", async () => {
const runtime: RuntimeEnv = {
log: vi.fn(),
error: vi.fn(),
exit: vi.fn(),
};
await bootstrapCommand(
{
nonInteractive: true,
noOpen: true,
skipUpdate: true,
},
runtime,
);
const onboardIndex = spawnCalls.findIndex(
(call) => call.command === "openclaw" && call.args.includes("onboard"),
);
const gatewayRestartCalls = spawnCalls
.map((call, index) => ({ call, index }))
.filter(
({ call }) =>
call.command === "openclaw" &&
call.args.includes("gateway") &&
call.args.includes("restart"),
);
expect(onboardIndex).toBeGreaterThan(-1);
expect(gatewayRestartCalls).toHaveLength(1);
expect(gatewayRestartCalls[0]!.index).toBeGreaterThan(onboardIndex);
});
it("runs doctor/gateway autofix steps only after all retried probes fail", async () => {
healthFailuresBeforeSuccess = 5;
const runtime: RuntimeEnv = {
log: vi.fn(),
error: vi.fn(),
exit: vi.fn(),
};
const summary = await bootstrapCommand(
{
nonInteractive: true,
noOpen: true,
skipUpdate: true,
},
runtime,
);
const doctorFixCalled = spawnCalls.some(
(call) =>
call.command === "openclaw" && call.args.includes("doctor") && call.args.includes("--fix"),

View File

@ -523,7 +523,6 @@ async function syncBundledPlugins(params: {
profile: string;
stateDir: string;
plugins: BundledPluginSpec[];
restartGateway?: boolean;
}): Promise<BundledPluginSyncResult> {
try {
const packageRoot = resolveCliPackageRoot();
@ -623,19 +622,6 @@ async function syncBundledPlugins(params: {
rmSync(legacyPluginDir, { recursive: true, force: true });
}
if (params.restartGateway) {
try {
await runOpenClawOrThrow({
openclawCommand: params.openclawCommand,
args: ["--profile", params.profile, "gateway", "restart"],
timeoutMs: 60_000,
errorMessage: "Failed to restart gateway after plugin install.",
});
} catch {
// Gateway may not be running yet (first bootstrap) — ignore.
}
}
return {
installedPluginIds,
migratedLegacyDenchPlugin,
@ -693,19 +679,35 @@ async function ensureDefaultWorkspacePath(
}
/**
* Write `agents.defaults.workspace` directly into `stateDir/openclaw.json`
* Stage all required pre-onboard config directly into `stateDir/openclaw.json`
* without going through the OpenClaw CLI. On a fresh install the "dench"
* profile doesn't exist yet (it's created by `openclaw onboard`), so the
* CLI-based `config set` fails. Writing the file directly sidesteps this
* while still ensuring the workspace is pinned before onboard runs.
* profile doesn't exist yet (it's created by `openclaw onboard`), so any
* `openclaw config set` call fails. Writing the file directly sidesteps
* this while still ensuring the config is in place before onboard starts
* the daemon. The CLI-based re-application happens post-onboard once the
* profile is live.
*/
function pinWorkspaceInConfigFile(stateDir: string, workspaceDir: string): void {
function stagePreOnboardConfig(
stateDir: string,
params: {
workspaceDir: string;
gatewayMode: string;
gatewayPort: number;
},
): void {
const raw = readBootstrapConfig(stateDir) ?? {};
const agents = { ...(asRecord(raw.agents) ?? {}) };
const defaults = { ...(asRecord(agents.defaults) ?? {}) };
defaults.workspace = workspaceDir;
defaults.workspace = params.workspaceDir;
agents.defaults = defaults;
raw.agents = agents;
const gateway = { ...(asRecord(raw.gateway) ?? {}) };
gateway.mode = params.gatewayMode;
gateway.port = params.gatewayPort;
raw.gateway = gateway;
mkdirSync(stateDir, { recursive: true });
writeFileSync(
path.join(stateDir, "openclaw.json"),
@ -1413,8 +1415,8 @@ async function attemptGatewayAutoFix(params: {
}
let finalProbe = await probeGateway(params.openclawCommand, params.profile, params.gatewayPort);
for (let attempt = 0; attempt < 2 && !finalProbe.ok; attempt += 1) {
await sleep(1_200);
for (let attempt = 0; attempt < 4 && !finalProbe.ok; attempt += 1) {
await sleep(1_000);
finalProbe = await probeGateway(params.openclawCommand, params.profile, params.gatewayPort);
}
@ -2248,15 +2250,18 @@ export async function bootstrapCommand(
);
}
// Pin OpenClaw to the managed default workspace before onboarding so bootstrap
// never drifts into creating/using legacy workspace-* paths.
// On a fresh install the "dench" profile doesn't exist yet (created by
// `openclaw onboard`), so `openclaw config set` fails. Write the value
// directly into the JSON config file instead — the CLI-based re-application
// happens post-onboard alongside gateway mode/port.
// Stage workspace, gateway mode, and gateway port directly into the raw JSON
// config file. On a fresh install the "dench" profile doesn't exist yet
// (it's created by `openclaw onboard`), so any `openclaw config set` call
// would fail. Writing directly sidesteps this; the CLI-based re-application
// happens post-onboard once the profile is live.
mkdirSync(workspaceDir, { recursive: true });
preCloudSpinner?.message("Configuring default workspace…");
pinWorkspaceInConfigFile(stateDir, workspaceDir);
preCloudSpinner?.message("Staging pre-onboard config…");
stagePreOnboardConfig(stateDir, {
workspaceDir,
gatewayMode: "local",
gatewayPort,
});
preCloudSpinner?.stop("Gateway ready.");
@ -2305,44 +2310,14 @@ export async function bootstrapCommand(
profile,
stateDir,
plugins: managedBundledPlugins,
restartGateway: true,
});
const posthogPluginInstalled = preOnboardPlugins.installedPluginIds.includes("posthog-analytics");
// Ensure gateway.mode=local BEFORE onboard so the daemon starts successfully.
// Previously this ran post-onboard, but onboard --install-daemon starts the
// gateway immediately — if gateway.mode is unset at that point the daemon
// blocks with "set gateway.mode=local" and enters a crash loop.
preOnboardSpinner?.message("Configuring gateway…");
await ensureGatewayModeLocal(openclawCommand, profile);
// Persist the assigned port so the daemon binds to the correct port on first
// start rather than falling back to the default.
await ensureGatewayPort(openclawCommand, profile, gatewayPort);
// Push plugin trust through the CLI as the LAST config step before onboard.
// syncBundledPlugins writes plugins.allow / plugins.load.paths to the raw
// JSON file, but subsequent `openclaw config set` calls may clobber them.
// Re-applying via the CLI ensures OpenClaw's own config resolution sees them.
if (preOnboardPlugins.installedPluginIds.length > 0) {
preOnboardSpinner?.message("Trusting managed plugins…");
await setOpenClawConfigJson({
openclawCommand,
profile,
key: "plugins.allow",
value: preOnboardPlugins.installedPluginIds,
errorMessage: "Failed to set plugins.allow for managed plugins.",
});
const pluginLoadPaths = managedBundledPlugins.map((plugin) =>
normalizeFilesystemPath(path.join(stateDir, "extensions", plugin.sourceDirName)),
);
await setOpenClawConfigJson({
openclawCommand,
profile,
key: "plugins.load.paths",
value: pluginLoadPaths,
errorMessage: "Failed to set plugins.load.paths for managed plugins.",
});
}
// All pre-onboard config (workspace, gateway mode/port, plugin trust) is now
// staged via raw JSON writes above — no CLI calls needed before the profile
// exists. syncBundledPlugins already wrote plugins.allow / plugins.load.paths
// to the raw JSON file. Post-onboard re-application via the CLI happens after
// `openclaw onboard` creates the profile.
preOnboardSpinner?.stop("Ready to onboard.");
@ -2392,18 +2367,17 @@ export async function bootstrapCommand(
const postOnboardSpinner = !opts.json ? spinner() : null;
postOnboardSpinner?.start("Finalizing configuration…");
// Re-apply settings after onboard so interactive/wizard flows cannot
// drift DenchClaw away from its required configuration. The workspace path
// was written directly to the JSON file pre-onboard (profile didn't exist
// yet); now that the profile is live we also push it through the CLI.
// ── Post-onboard config reconciliation ──
// Apply all Dench-owned settings via the CLI now that onboard has created the
// profile. Pre-onboard config was staged via raw JSON writes (the profile
// didn't exist for CLI calls); this pass enforces the values through
// OpenClaw's own config resolution and guards against onboard wizard drift.
await ensureDefaultWorkspacePath(openclawCommand, profile, workspaceDir);
postOnboardSpinner?.message("Configuring gateway…");
await ensureGatewayModeLocal(openclawCommand, profile);
postOnboardSpinner?.message("Configuring gateway port…");
await ensureGatewayPort(openclawCommand, profile, gatewayPort);
postOnboardSpinner?.message("Setting tools profile…");
// DenchClaw requires the full tool profile; onboarding defaults can drift to
// messaging-only, so enforce this on every bootstrap run.
await ensureToolsProfile(openclawCommand, profile);
if (
@ -2431,14 +2405,43 @@ export async function bootstrapCommand(
profile,
stateDir,
plugins: managedBundledPlugins,
restartGateway: true,
});
postOnboardSpinner?.message("Configuring subagent defaults…");
await ensureSubagentDefaults(openclawCommand, profile);
postOnboardSpinner?.message("Probing gateway health…");
// ── Single post-config gateway restart ──
// All Dench-owned config has been applied. Restart the gateway once so the
// daemon picks up plugin, model, and subagent changes that were written after
// onboard started it. No helper above triggers its own restart.
postOnboardSpinner?.message("Restarting gateway…");
try {
await runOpenClawOrThrow({
openclawCommand,
args: ["--profile", profile, "gateway", "restart"],
timeoutMs: 60_000,
errorMessage: "Failed to restart gateway after config update.",
});
} catch {
// Gateway may not be running (e.g. onboard daemon install failed on this
// platform). The final readiness check below will catch this.
}
// ── Final readiness verification ──
// Give the gateway time to finish starting after the restart, then verify
// readiness. The probe retries here replace the old pattern of probing
// immediately (which raced gateway startup) and jumping straight into a
// destructive stop/install/start auto-fix cycle.
postOnboardSpinner?.message("Waiting for gateway…");
let gatewayProbe = await probeGateway(openclawCommand, profile, gatewayPort);
for (let attempt = 0; attempt < 4 && !gatewayProbe.ok; attempt += 1) {
await sleep(750);
postOnboardSpinner?.message(`Probing gateway health (attempt ${attempt + 2}/5)…`);
gatewayProbe = await probeGateway(openclawCommand, profile, gatewayPort);
}
// Repair is failure-only: only invoked when the retried final verification
// still reports the gateway as unreachable.
let gatewayAutoFix: GatewayAutoFixResult | undefined;
if (!gatewayProbe.ok) {
postOnboardSpinner?.message("Gateway unreachable, attempting auto-fix…");