Compare commits
7 Commits
main
...
fix/issue-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
67e22fa07f | ||
|
|
ff764ee690 | ||
|
|
5f85b654f0 | ||
|
|
954928a90d | ||
|
|
ac27fa485a | ||
|
|
b11933d8a9 | ||
|
|
da4e43d52c |
@ -58,6 +58,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Matrix/DM routing: add safer fallback detection for broken `m.direct` homeservers, honor explicit room bindings over DM classification, and preserve room-bound agent selection for Matrix DM rooms. (#19736) Thanks @derbronko.
|
||||
- Cron/Telegram announce delivery: route text-only announce jobs through the real outbound adapters after finalizing descendant output so plain Telegram targets no longer report `delivered: true` when no message actually reached Telegram. (#40575) thanks @obviyus.
|
||||
- Gateway/restart timeout recovery: exit non-zero when restart-triggered shutdown drains time out so launchd/systemd restart the gateway instead of treating the failed restart as a clean stop. Landed from contributor PR #40380 by @dsantoreis. Thanks @dsantoreis.
|
||||
- Gateway/config restart guard: validate config before service start/restart and keep post-SIGUSR1 startup failures from crashing the gateway process, reducing invalid-config restart loops and macOS permission loss. Landed from contributor PR #38699 by @lml2468. Thanks @lml2468.
|
||||
|
||||
## 2026.3.7
|
||||
|
||||
|
||||
206
src/cli/daemon-cli/lifecycle-core.config-guard.test.ts
Normal file
206
src/cli/daemon-cli/lifecycle-core.config-guard.test.ts
Normal file
@ -0,0 +1,206 @@
|
||||
import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
|
||||
const readConfigFileSnapshotMock = vi.fn();
|
||||
const loadConfig = vi.fn(() => ({}));
|
||||
|
||||
const runtimeLogs: string[] = [];
|
||||
const defaultRuntime = {
|
||||
log: (message: string) => runtimeLogs.push(message),
|
||||
error: vi.fn(),
|
||||
exit: (code: number) => {
|
||||
throw new Error(`__exit__:${code}`);
|
||||
},
|
||||
};
|
||||
|
||||
const service = {
|
||||
label: "TestService",
|
||||
loadedText: "loaded",
|
||||
notLoadedText: "not loaded",
|
||||
install: vi.fn(),
|
||||
uninstall: vi.fn(),
|
||||
stop: vi.fn(),
|
||||
isLoaded: vi.fn(),
|
||||
readCommand: vi.fn(),
|
||||
readRuntime: vi.fn(),
|
||||
restart: vi.fn(),
|
||||
};
|
||||
|
||||
vi.mock("../../config/config.js", () => ({
|
||||
loadConfig: () => loadConfig(),
|
||||
readConfigFileSnapshot: () => readConfigFileSnapshotMock(),
|
||||
}));
|
||||
|
||||
vi.mock("../../config/issue-format.js", () => ({
|
||||
formatConfigIssueLines: (
|
||||
issues: Array<{ path: string; message: string }>,
|
||||
_prefix: string,
|
||||
_opts?: unknown,
|
||||
) => issues.map((i) => `${i.path}: ${i.message}`),
|
||||
}));
|
||||
|
||||
vi.mock("../../runtime.js", () => ({
|
||||
defaultRuntime,
|
||||
}));
|
||||
|
||||
describe("runServiceRestart config pre-flight (#35862)", () => {
|
||||
let runServiceRestart: typeof import("./lifecycle-core.js").runServiceRestart;
|
||||
|
||||
beforeAll(async () => {
|
||||
({ runServiceRestart } = await import("./lifecycle-core.js"));
|
||||
});
|
||||
|
||||
beforeEach(() => {
|
||||
runtimeLogs.length = 0;
|
||||
readConfigFileSnapshotMock.mockReset();
|
||||
readConfigFileSnapshotMock.mockResolvedValue({
|
||||
exists: true,
|
||||
valid: true,
|
||||
config: {},
|
||||
issues: [],
|
||||
});
|
||||
loadConfig.mockReset();
|
||||
loadConfig.mockReturnValue({});
|
||||
service.isLoaded.mockClear();
|
||||
service.readCommand.mockClear();
|
||||
service.restart.mockClear();
|
||||
service.isLoaded.mockResolvedValue(true);
|
||||
service.readCommand.mockResolvedValue({ environment: {} });
|
||||
service.restart.mockResolvedValue(undefined);
|
||||
vi.unstubAllEnvs();
|
||||
vi.stubEnv("OPENCLAW_GATEWAY_TOKEN", "");
|
||||
vi.stubEnv("CLAWDBOT_GATEWAY_TOKEN", "");
|
||||
});
|
||||
|
||||
it("aborts restart when config is invalid", async () => {
|
||||
readConfigFileSnapshotMock.mockResolvedValue({
|
||||
exists: true,
|
||||
valid: false,
|
||||
config: {},
|
||||
issues: [{ path: "agents.defaults.pdfModel", message: "Unrecognized key" }],
|
||||
});
|
||||
|
||||
await expect(
|
||||
runServiceRestart({
|
||||
serviceNoun: "Gateway",
|
||||
service,
|
||||
renderStartHints: () => [],
|
||||
opts: { json: true },
|
||||
}),
|
||||
).rejects.toThrow("__exit__:1");
|
||||
|
||||
expect(service.restart).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("proceeds with restart when config is valid", async () => {
|
||||
readConfigFileSnapshotMock.mockResolvedValue({
|
||||
exists: true,
|
||||
valid: true,
|
||||
config: {},
|
||||
issues: [],
|
||||
});
|
||||
|
||||
const result = await runServiceRestart({
|
||||
serviceNoun: "Gateway",
|
||||
service,
|
||||
renderStartHints: () => [],
|
||||
opts: { json: true },
|
||||
});
|
||||
|
||||
expect(result).toBe(true);
|
||||
expect(service.restart).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("proceeds with restart when config file does not exist", async () => {
|
||||
readConfigFileSnapshotMock.mockResolvedValue({
|
||||
exists: false,
|
||||
valid: true,
|
||||
config: {},
|
||||
issues: [],
|
||||
});
|
||||
|
||||
const result = await runServiceRestart({
|
||||
serviceNoun: "Gateway",
|
||||
service,
|
||||
renderStartHints: () => [],
|
||||
opts: { json: true },
|
||||
});
|
||||
|
||||
expect(result).toBe(true);
|
||||
expect(service.restart).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("proceeds with restart when snapshot read throws", async () => {
|
||||
readConfigFileSnapshotMock.mockRejectedValue(new Error("read failed"));
|
||||
|
||||
const result = await runServiceRestart({
|
||||
serviceNoun: "Gateway",
|
||||
service,
|
||||
renderStartHints: () => [],
|
||||
opts: { json: true },
|
||||
});
|
||||
|
||||
expect(result).toBe(true);
|
||||
expect(service.restart).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
|
||||
describe("runServiceStart config pre-flight (#35862)", () => {
|
||||
let runServiceStart: typeof import("./lifecycle-core.js").runServiceStart;
|
||||
|
||||
beforeAll(async () => {
|
||||
({ runServiceStart } = await import("./lifecycle-core.js"));
|
||||
});
|
||||
|
||||
beforeEach(() => {
|
||||
runtimeLogs.length = 0;
|
||||
readConfigFileSnapshotMock.mockReset();
|
||||
readConfigFileSnapshotMock.mockResolvedValue({
|
||||
exists: true,
|
||||
valid: true,
|
||||
config: {},
|
||||
issues: [],
|
||||
});
|
||||
service.isLoaded.mockClear();
|
||||
service.restart.mockClear();
|
||||
service.isLoaded.mockResolvedValue(true);
|
||||
service.restart.mockResolvedValue(undefined);
|
||||
});
|
||||
|
||||
it("aborts start when config is invalid", async () => {
|
||||
readConfigFileSnapshotMock.mockResolvedValue({
|
||||
exists: true,
|
||||
valid: false,
|
||||
config: {},
|
||||
issues: [{ path: "agents.defaults.pdfModel", message: "Unrecognized key" }],
|
||||
});
|
||||
|
||||
await expect(
|
||||
runServiceStart({
|
||||
serviceNoun: "Gateway",
|
||||
service,
|
||||
renderStartHints: () => [],
|
||||
opts: { json: true },
|
||||
}),
|
||||
).rejects.toThrow("__exit__:1");
|
||||
|
||||
expect(service.restart).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("proceeds with start when config is valid", async () => {
|
||||
readConfigFileSnapshotMock.mockResolvedValue({
|
||||
exists: true,
|
||||
valid: true,
|
||||
config: {},
|
||||
issues: [],
|
||||
});
|
||||
|
||||
await runServiceStart({
|
||||
serviceNoun: "Gateway",
|
||||
service,
|
||||
renderStartHints: () => [],
|
||||
opts: { json: true },
|
||||
});
|
||||
|
||||
expect(service.restart).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
@ -1,5 +1,6 @@
|
||||
import type { Writable } from "node:stream";
|
||||
import { readBestEffortConfig } from "../../config/config.js";
|
||||
import { readBestEffortConfig, readConfigFileSnapshot } from "../../config/config.js";
|
||||
import { formatConfigIssueLines } from "../../config/issue-format.js";
|
||||
import { resolveIsNixMode } from "../../config/paths.js";
|
||||
import { checkTokenDrift } from "../../daemon/service-audit.js";
|
||||
import type { GatewayService } from "../../daemon/service.js";
|
||||
@ -107,6 +108,29 @@ async function resolveServiceLoadedOrFail(params: {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Best-effort config validation. Returns a string describing the issues if
|
||||
* config exists and is invalid, or null if config is valid/missing/unreadable.
|
||||
*
|
||||
* Note: This reads the config file snapshot in the current CLI environment.
|
||||
* Configs using env vars only available in the service context (launchd/systemd)
|
||||
* may produce false positives, but the check is intentionally best-effort —
|
||||
* a false positive here is safer than a crash on startup. (#35862)
|
||||
*/
|
||||
async function getConfigValidationError(): Promise<string | null> {
|
||||
try {
|
||||
const snapshot = await readConfigFileSnapshot();
|
||||
if (!snapshot.exists || snapshot.valid) {
|
||||
return null;
|
||||
}
|
||||
return snapshot.issues.length > 0
|
||||
? formatConfigIssueLines(snapshot.issues, "", { normalizeRoot: true }).join("\n")
|
||||
: "Unknown validation issue.";
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export async function runServiceUninstall(params: {
|
||||
serviceNoun: string;
|
||||
service: GatewayService;
|
||||
@ -187,6 +211,17 @@ export async function runServiceStart(params: {
|
||||
});
|
||||
return;
|
||||
}
|
||||
// Pre-flight config validation (#35862)
|
||||
{
|
||||
const configError = await getConfigValidationError();
|
||||
if (configError) {
|
||||
fail(
|
||||
`${params.serviceNoun} aborted: config is invalid.\n${configError}\nFix the config and retry, or run "openclaw doctor" to repair.`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
await params.service.restart({ env: process.env, stdout });
|
||||
} catch (err) {
|
||||
@ -298,6 +333,19 @@ export async function runServiceRestart(params: {
|
||||
if (loaded === null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Pre-flight config validation: check before any restart action (including
|
||||
// onNotLoaded which may send SIGUSR1 to an unmanaged process). (#35862)
|
||||
{
|
||||
const configError = await getConfigValidationError();
|
||||
if (configError) {
|
||||
fail(
|
||||
`${params.serviceNoun} aborted: config is invalid.\n${configError}\nFix the config and retry, or run "openclaw doctor" to repair.`,
|
||||
);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!loaded) {
|
||||
try {
|
||||
handledNotLoaded = (await params.onNotLoaded?.({ json, stdout, fail })) ?? null;
|
||||
|
||||
@ -190,10 +190,34 @@ export async function runGatewayLoop(params: {
|
||||
|
||||
// Keep process alive; SIGUSR1 triggers an in-process restart (no supervisor required).
|
||||
// SIGTERM/SIGINT still exit after a graceful shutdown.
|
||||
let isFirstStart = true;
|
||||
// eslint-disable-next-line no-constant-condition
|
||||
while (true) {
|
||||
onIteration();
|
||||
server = await params.start();
|
||||
try {
|
||||
server = await params.start();
|
||||
isFirstStart = false;
|
||||
} catch (err) {
|
||||
// On initial startup, let the error propagate so the outer handler
|
||||
// can report "Gateway failed to start" and exit non-zero. Only
|
||||
// swallow errors on subsequent in-process restarts to keep the
|
||||
// process alive (a crash would lose macOS TCC permissions). (#35862)
|
||||
if (isFirstStart) {
|
||||
throw err;
|
||||
}
|
||||
server = null;
|
||||
// Release the gateway lock so that `daemon restart/stop` (which
|
||||
// discovers PIDs via the gateway port) can still manage the process.
|
||||
// Without this, the process holds the lock but is not listening,
|
||||
// forcing manual cleanup. (#35862)
|
||||
await releaseLockIfHeld();
|
||||
const errMsg = err instanceof Error ? err.message : String(err);
|
||||
const errStack = err instanceof Error && err.stack ? `\n${err.stack}` : "";
|
||||
gatewayLog.error(
|
||||
`gateway startup failed: ${errMsg}. ` +
|
||||
`Process will stay alive; fix the issue and restart.${errStack}`,
|
||||
);
|
||||
}
|
||||
await new Promise<void>((resolve) => {
|
||||
restartResolver = resolve;
|
||||
});
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user