CLI: fix gateway restart health ownership for child listener pids (#24696)
Merged via /review-pr -> /prepare-pr -> /merge-pr. Prepared head SHA: d6d4b43f7e0a59856f40d259053cbf653fac3bc2 Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com> Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com> Reviewed-by: @gumadeiras
This commit is contained in:
parent
78e7f41d28
commit
5de1f540e7
@ -39,6 +39,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Providers/OpenRouter: remove conflicting top-level `reasoning_effort` when injecting nested `reasoning.effort`, preventing OpenRouter 400 payload-validation failures for reasoning models. (#24120) thanks @tenequm.
|
||||
- Providers/Groq: avoid classifying Groq TPM limit errors as context overflow so throttling paths no longer trigger overflow recovery logic. (#16176) Thanks @dddabtc.
|
||||
- Gateway/WS: close repeated post-handshake `unauthorized role:*` request floods per connection and sample duplicate rejection logs, preventing a single misbehaving client from degrading gateway responsiveness. (#20168) Thanks @acy103, @vibecodooor, and @vincentkoc.
|
||||
- Gateway/Restart: treat child listener PIDs as owned by the service runtime PID during restart health checks to avoid false stale-process kills and restart timeouts on launchd/systemd. (#24696) Thanks @gumadeiras.
|
||||
- Config/Write: apply `unsetPaths` with immutable path-copy updates so config writes never mutate caller-provided objects, and harden `openclaw config get/set/unset` path traversal by rejecting prototype-key segments and inherited-property traversal. (#24134) thanks @frankekn.
|
||||
- Security/Exec: detect obfuscated commands before exec allowlist decisions and require explicit approval for obfuscation patterns. (#8592) Thanks @CornBrother0x and @vincentkoc.
|
||||
- Security/Skills: escape user-controlled prompt, filename, and output-path values in `openai-image-gen` HTML gallery generation to prevent stored XSS in generated `index.html` output. (#12538) Thanks @CornBrother0x.
|
||||
|
||||
66
src/cli/daemon-cli/restart-health.test.ts
Normal file
66
src/cli/daemon-cli/restart-health.test.ts
Normal file
@ -0,0 +1,66 @@
|
||||
import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import type { GatewayService } from "../../daemon/service.js";
|
||||
import type { PortListenerKind, PortUsage } from "../../infra/ports.js";
|
||||
|
||||
const inspectPortUsage = vi.hoisted(() => vi.fn<(port: number) => Promise<PortUsage>>());
|
||||
const classifyPortListener = vi.hoisted(() =>
|
||||
vi.fn<(_listener: unknown, _port: number) => PortListenerKind>(() => "gateway"),
|
||||
);
|
||||
|
||||
vi.mock("../../infra/ports.js", () => ({
|
||||
classifyPortListener: (listener: unknown, port: number) => classifyPortListener(listener, port),
|
||||
formatPortDiagnostics: vi.fn(() => []),
|
||||
inspectPortUsage: (port: number) => inspectPortUsage(port),
|
||||
}));
|
||||
|
||||
describe("inspectGatewayRestart", () => {
|
||||
beforeEach(() => {
|
||||
inspectPortUsage.mockReset();
|
||||
inspectPortUsage.mockResolvedValue({
|
||||
port: 0,
|
||||
status: "free",
|
||||
listeners: [],
|
||||
hints: [],
|
||||
});
|
||||
classifyPortListener.mockReset();
|
||||
classifyPortListener.mockReturnValue("gateway");
|
||||
});
|
||||
|
||||
it("treats a gateway listener child pid as healthy ownership", async () => {
|
||||
const service = {
|
||||
readRuntime: vi.fn(async () => ({ status: "running", pid: 7000 })),
|
||||
} as unknown as GatewayService;
|
||||
|
||||
inspectPortUsage.mockResolvedValue({
|
||||
port: 18789,
|
||||
status: "busy",
|
||||
listeners: [{ pid: 7001, ppid: 7000, commandLine: "openclaw-gateway" }],
|
||||
hints: [],
|
||||
});
|
||||
|
||||
const { inspectGatewayRestart } = await import("./restart-health.js");
|
||||
const snapshot = await inspectGatewayRestart({ service, port: 18789 });
|
||||
|
||||
expect(snapshot.healthy).toBe(true);
|
||||
expect(snapshot.staleGatewayPids).toEqual([]);
|
||||
});
|
||||
|
||||
it("marks non-owned gateway listener pids as stale while runtime is running", async () => {
|
||||
const service = {
|
||||
readRuntime: vi.fn(async () => ({ status: "running", pid: 8000 })),
|
||||
} as unknown as GatewayService;
|
||||
|
||||
inspectPortUsage.mockResolvedValue({
|
||||
port: 18789,
|
||||
status: "busy",
|
||||
listeners: [{ pid: 9000, ppid: 8999, commandLine: "openclaw-gateway" }],
|
||||
hints: [],
|
||||
});
|
||||
|
||||
const { inspectGatewayRestart } = await import("./restart-health.js");
|
||||
const snapshot = await inspectGatewayRestart({ service, port: 18789 });
|
||||
|
||||
expect(snapshot.healthy).toBe(false);
|
||||
expect(snapshot.staleGatewayPids).toEqual([9000]);
|
||||
});
|
||||
});
|
||||
@ -21,6 +21,13 @@ export type GatewayRestartSnapshot = {
|
||||
staleGatewayPids: number[];
|
||||
};
|
||||
|
||||
function listenerOwnedByRuntimePid(params: {
|
||||
listener: PortUsage["listeners"][number];
|
||||
runtimePid: number;
|
||||
}): boolean {
|
||||
return params.listener.pid === params.runtimePid || params.listener.ppid === params.runtimePid;
|
||||
}
|
||||
|
||||
export async function inspectGatewayRestart(params: {
|
||||
service: GatewayService;
|
||||
port: number;
|
||||
@ -54,18 +61,27 @@ export async function inspectGatewayRestart(params: {
|
||||
)
|
||||
: [];
|
||||
const running = runtime.status === "running";
|
||||
const runtimePid = runtime.pid;
|
||||
const ownsPort =
|
||||
runtime.pid != null
|
||||
? portUsage.listeners.some((listener) => listener.pid === runtime.pid)
|
||||
runtimePid != null
|
||||
? portUsage.listeners.some((listener) => listenerOwnedByRuntimePid({ listener, runtimePid }))
|
||||
: gatewayListeners.length > 0 ||
|
||||
(portUsage.status === "busy" && portUsage.listeners.length === 0);
|
||||
const healthy = running && ownsPort;
|
||||
const staleGatewayPids = Array.from(
|
||||
new Set(
|
||||
gatewayListeners
|
||||
.map((listener) => listener.pid)
|
||||
.filter((pid): pid is number => Number.isFinite(pid))
|
||||
.filter((pid) => runtime.pid == null || pid !== runtime.pid || !running),
|
||||
.filter((listener) => Number.isFinite(listener.pid))
|
||||
.filter((listener) => {
|
||||
if (!running) {
|
||||
return true;
|
||||
}
|
||||
if (runtimePid == null) {
|
||||
return true;
|
||||
}
|
||||
return !listenerOwnedByRuntimePid({ listener, runtimePid });
|
||||
})
|
||||
.map((listener) => listener.pid as number),
|
||||
),
|
||||
);
|
||||
|
||||
|
||||
@ -75,6 +75,16 @@ async function resolveUnixUser(pid: number): Promise<string | undefined> {
|
||||
return line || undefined;
|
||||
}
|
||||
|
||||
async function resolveUnixParentPid(pid: number): Promise<number | undefined> {
|
||||
const res = await runCommandSafe(["ps", "-p", String(pid), "-o", "ppid="]);
|
||||
if (res.code !== 0) {
|
||||
return undefined;
|
||||
}
|
||||
const line = res.stdout.trim();
|
||||
const parentPid = Number.parseInt(line, 10);
|
||||
return Number.isFinite(parentPid) && parentPid > 0 ? parentPid : undefined;
|
||||
}
|
||||
|
||||
async function readUnixListeners(
|
||||
port: number,
|
||||
): Promise<{ listeners: PortListener[]; detail?: string; errors: string[] }> {
|
||||
@ -88,9 +98,10 @@ async function readUnixListeners(
|
||||
if (!listener.pid) {
|
||||
return;
|
||||
}
|
||||
const [commandLine, user] = await Promise.all([
|
||||
const [commandLine, user, parentPid] = await Promise.all([
|
||||
resolveUnixCommandLine(listener.pid),
|
||||
resolveUnixUser(listener.pid),
|
||||
resolveUnixParentPid(listener.pid),
|
||||
]);
|
||||
if (commandLine) {
|
||||
listener.commandLine = commandLine;
|
||||
@ -98,6 +109,9 @@ async function readUnixListeners(
|
||||
if (user) {
|
||||
listener.user = user;
|
||||
}
|
||||
if (parentPid !== undefined) {
|
||||
listener.ppid = parentPid;
|
||||
}
|
||||
}),
|
||||
);
|
||||
return { listeners, detail: res.stdout.trim() || undefined, errors };
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
export type PortListener = {
|
||||
pid?: number;
|
||||
ppid?: number;
|
||||
command?: string;
|
||||
commandLine?: string;
|
||||
user?: string;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user