fix(cli): add rollback safety net and crash diagnostics for web runtime updates

- Back up existing app dir before replacing (enables rollback on crash)
- Rollback to previous working runtime when new version fails readiness probe
- Early exit in waitForWebRuntime() when spawned process dies (saves 15s)
- Include web-app.err.log tail in probe failure reason for diagnostics
- Clean up backup after successful probe

Made-with: Cursor
This commit is contained in:
Mark 2026-03-17 13:10:33 -07:00
parent ef72fe29d7
commit f4ce923f5c
4 changed files with 268 additions and 10 deletions

View File

@ -24,6 +24,7 @@ const workspaceSeedMocks = vi.hoisted(() => ({
const spawnMock = vi.hoisted(() => vi.fn());
const webRuntimeMocks = vi.hoisted(() => ({
DEFAULT_WEB_APP_PORT: 3100,
cleanupManagedWebRuntimeBackup: vi.fn(),
ensureManagedWebRuntime: vi.fn(async () => ({ ready: true, reason: "ready" })),
evaluateMajorVersionTransition: vi.fn(() => ({
previousMajor: 2,
@ -100,6 +101,8 @@ vi.mock("./web-runtime.js", () => ({
startManagedWebRuntime: webRuntimeMocks.startManagedWebRuntime,
stopManagedWebRuntime: webRuntimeMocks.stopManagedWebRuntime,
waitForWebRuntime: webRuntimeMocks.waitForWebRuntime,
cleanupManagedWebRuntimeBackup: webRuntimeMocks.cleanupManagedWebRuntimeBackup,
readLastLogLines: vi.fn(() => undefined),
}));
import {

View File

@ -11,9 +11,11 @@ import { VERSION } from "../version.js";
import { applyCliProfileEnv } from "./profile.js";
import {
DEFAULT_WEB_APP_PORT,
cleanupManagedWebRuntimeBackup,
ensureManagedWebRuntime,
evaluateMajorVersionTransition,
readLastKnownWebPort,
readLastLogLines,
readManagedWebRuntimeManifest,
resolveCliPackageRoot,
resolveManagedWebRuntimeServerPath,
@ -488,6 +490,8 @@ export async function updateWebRuntimeCommand(
throw new Error(`Web runtime update failed: ${summary.reason}`);
}
cleanupManagedWebRuntimeBackup(stateDir);
await promptAndOpenWebUi({
webPort: selectedPort,
json: opts.json,
@ -621,14 +625,23 @@ export async function startWebRuntimeCommand(
);
}
const probe = await waitForWebRuntime(selectedPort);
const probe = await waitForWebRuntime(selectedPort, startResult.pid);
let probeReason = probe.reason;
if (!probe.ok) {
const errLog = readLastLogLines(stateDir, "web-app.err.log", 6);
if (errLog) {
probeReason = `${probe.reason}\n--- web-app.err.log ---\n${errLog}`;
}
}
const summary: StartWebRuntimeSummary = {
profile,
webPort: selectedPort,
stoppedPids: stopResult.stoppedPids,
skippedForeignPids: stopResult.skippedForeignPids,
started: probe.ok,
reason: probe.reason,
reason: probeReason,
gatewayRestarted: gatewayResult.restarted,
gatewayError: daemonless ? undefined : gatewayResult.error,
};
@ -659,6 +672,8 @@ export async function startWebRuntimeCommand(
throw new Error(`Web runtime failed readiness probe: ${summary.reason}`);
}
cleanupManagedWebRuntimeBackup(stateDir);
await promptAndOpenWebUi({
webPort: selectedPort,
json: opts.json,

View File

@ -1,8 +1,16 @@
import { describe, expect, it } from "vitest";
import { existsSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
import { mkdtempSync } from "node:fs";
import os from "node:os";
import path from "node:path";
import { afterEach, beforeEach, describe, expect, it } from "vitest";
import {
classifyWebPortListener,
cleanupManagedWebRuntimeBackup,
evaluateMajorVersionTransition,
evaluateWebProfilesPayload,
installManagedWebRuntime,
readLastLogLines,
rollbackManagedWebRuntime,
} from "./web-runtime.js";
describe("evaluateWebProfilesPayload", () => {
@ -79,3 +87,147 @@ describe("evaluateMajorVersionTransition", () => {
expect(result.currentMajor).toBe(2);
});
});
describe("installManagedWebRuntime backup", () => {
let tmpDir: string;
let stateDir: string;
let packageRoot: string;
beforeEach(() => {
tmpDir = mkdtempSync(path.join(os.tmpdir(), "web-runtime-test-"));
stateDir = path.join(tmpDir, "state");
packageRoot = path.join(tmpDir, "pkg");
const standaloneAppDir = path.join(packageRoot, "apps", "web", ".next", "standalone", "apps", "web");
mkdirSync(standaloneAppDir, { recursive: true });
writeFileSync(path.join(standaloneAppDir, "server.js"), "module.exports = {};", "utf-8");
});
afterEach(() => {
rmSync(tmpDir, { recursive: true, force: true });
});
it("backs up existing app dir before replacing (enables rollback on crash)", () => {
const runtimeAppDir = path.join(stateDir, "web-runtime", "app");
mkdirSync(runtimeAppDir, { recursive: true });
writeFileSync(path.join(runtimeAppDir, "old-marker.txt"), "old", "utf-8");
installManagedWebRuntime({
stateDir,
packageRoot,
denchVersion: "2.0.0",
});
const backupDir = path.join(stateDir, "web-runtime", "app.prev");
expect(existsSync(backupDir)).toBe(true);
expect(existsSync(path.join(backupDir, "old-marker.txt"))).toBe(true);
expect(existsSync(path.join(runtimeAppDir, "server.js"))).toBe(true);
});
it("installs without error when no previous app dir exists", () => {
const result = installManagedWebRuntime({
stateDir,
packageRoot,
denchVersion: "2.0.0",
});
expect(result.installed).toBe(true);
const backupDir = path.join(stateDir, "web-runtime", "app.prev");
expect(existsSync(backupDir)).toBe(false);
});
});
describe("rollbackManagedWebRuntime", () => {
let tmpDir: string;
beforeEach(() => {
tmpDir = mkdtempSync(path.join(os.tmpdir(), "web-runtime-rollback-test-"));
});
afterEach(() => {
rmSync(tmpDir, { recursive: true, force: true });
});
it("restores backup dir to app dir (recovers from broken update)", () => {
const runtimeDir = path.join(tmpDir, "web-runtime");
const appDir = path.join(runtimeDir, "app");
const backupDir = path.join(runtimeDir, "app.prev");
mkdirSync(appDir, { recursive: true });
writeFileSync(path.join(appDir, "broken.js"), "crash", "utf-8");
mkdirSync(backupDir, { recursive: true });
writeFileSync(path.join(backupDir, "server.js"), "working", "utf-8");
const result = rollbackManagedWebRuntime(tmpDir);
expect(result).toBe(true);
expect(existsSync(path.join(appDir, "server.js"))).toBe(true);
expect(existsSync(backupDir)).toBe(false);
});
it("returns false when no backup exists", () => {
expect(rollbackManagedWebRuntime(tmpDir)).toBe(false);
});
});
describe("cleanupManagedWebRuntimeBackup", () => {
let tmpDir: string;
beforeEach(() => {
tmpDir = mkdtempSync(path.join(os.tmpdir(), "web-runtime-cleanup-test-"));
});
afterEach(() => {
rmSync(tmpDir, { recursive: true, force: true });
});
it("removes backup dir after successful probe", () => {
const backupDir = path.join(tmpDir, "web-runtime", "app.prev");
mkdirSync(backupDir, { recursive: true });
writeFileSync(path.join(backupDir, "old.js"), "old", "utf-8");
cleanupManagedWebRuntimeBackup(tmpDir);
expect(existsSync(backupDir)).toBe(false);
});
it("does not throw when backup dir is absent", () => {
expect(() => cleanupManagedWebRuntimeBackup(tmpDir)).not.toThrow();
});
});
describe("readLastLogLines", () => {
let tmpDir: string;
beforeEach(() => {
tmpDir = mkdtempSync(path.join(os.tmpdir(), "web-runtime-logs-test-"));
});
afterEach(() => {
rmSync(tmpDir, { recursive: true, force: true });
});
it("reads the last N lines from a log file (provides crash diagnostics)", () => {
const logsDir = path.join(tmpDir, "logs");
mkdirSync(logsDir, { recursive: true });
writeFileSync(
path.join(logsDir, "web-app.err.log"),
"line1\nline2\nline3\nline4\nline5\n",
"utf-8",
);
const result = readLastLogLines(tmpDir, "web-app.err.log", 3);
expect(result).toBe("line3\nline4\nline5");
});
it("returns undefined when log file is missing", () => {
expect(readLastLogLines(tmpDir, "web-app.err.log")).toBeUndefined();
});
it("returns undefined when log file is empty", () => {
const logsDir = path.join(tmpDir, "logs");
mkdirSync(logsDir, { recursive: true });
writeFileSync(path.join(logsDir, "web-app.err.log"), "", "utf-8");
expect(readLastLogLines(tmpDir, "web-app.err.log")).toBeUndefined();
});
});

View File

@ -8,6 +8,7 @@ import {
readFileSync,
readlinkSync,
readdirSync,
renameSync,
rmSync,
writeFileSync,
} from "node:fs";
@ -24,6 +25,7 @@ import { listPortListeners, type PortProcess } from "./ports.js";
export const DEFAULT_WEB_APP_PORT = 3100;
const WEB_RUNTIME_DIRNAME = "web-runtime";
const WEB_RUNTIME_APP_DIRNAME = "app";
const WEB_RUNTIME_APP_BACKUP_DIRNAME = "app.prev";
const WEB_RUNTIME_MANIFEST_FILENAME = "manifest.json";
const WEB_RUNTIME_PROCESS_FILENAME = "process.json";
const WEB_APP_PROBE_ATTEMPTS = 20;
@ -130,6 +132,22 @@ function ensureParentDir(filePath: string): void {
mkdirSync(path.dirname(filePath), { recursive: true });
}
export function readLastLogLines(
stateDir: string,
filename: string,
maxLines = 8,
): string | undefined {
const logPath = path.join(stateDir, "logs", filename);
try {
const content = readFileSync(logPath, "utf-8");
const lines = content.split(/\r?\n/).filter(Boolean);
const tail = lines.slice(-maxLines);
return tail.length > 0 ? tail.join("\n") : undefined;
} catch {
return undefined;
}
}
function readJsonFile<T>(filePath: string): T | null {
if (!existsSync(filePath)) {
return null;
@ -243,6 +261,10 @@ export function resolveManagedWebRuntimeAppDir(stateDir: string): string {
return path.join(resolveManagedWebRuntimeDir(stateDir), WEB_RUNTIME_APP_DIRNAME);
}
function resolveManagedWebRuntimeBackupDir(stateDir: string): string {
return path.join(resolveManagedWebRuntimeDir(stateDir), WEB_RUNTIME_APP_BACKUP_DIRNAME);
}
export function resolveManagedWebRuntimeServerPath(stateDir: string): string {
return path.join(resolveManagedWebRuntimeAppDir(stateDir), "server.js");
}
@ -370,9 +392,15 @@ export async function probeWebRuntime(port: number): Promise<WebProbeResult> {
}
}
export async function waitForWebRuntime(port: number): Promise<WebProbeResult> {
export async function waitForWebRuntime(
port: number,
pid?: number,
): Promise<WebProbeResult> {
let lastResult: WebProbeResult = { ok: false, reason: "web runtime did not respond" };
for (let attempt = 0; attempt < WEB_APP_PROBE_ATTEMPTS; attempt += 1) {
if (typeof pid === "number" && pid > 0 && !isProcessAlive(pid)) {
return { ok: false, reason: `web runtime process exited (pid ${pid})` };
}
const result = await probeWebRuntime(port);
if (result.ok) {
return result;
@ -719,7 +747,17 @@ export function installManagedWebRuntime(params: {
flattenPnpmStandaloneDeps(standaloneDir);
mkdirSync(runtimeDir, { recursive: true });
rmSync(runtimeAppDir, { recursive: true, force: true });
const backupDir = resolveManagedWebRuntimeBackupDir(params.stateDir);
rmSync(backupDir, { recursive: true, force: true });
if (existsSync(runtimeAppDir)) {
try {
renameSync(runtimeAppDir, backupDir);
} catch {
rmSync(runtimeAppDir, { recursive: true, force: true });
}
}
cpSync(sourceAppDir, runtimeAppDir, { recursive: true, force: true, dereference: true });
dereferenceRuntimeNodeModules(runtimeAppDir, standaloneDir);
@ -745,6 +783,25 @@ export function installManagedWebRuntime(params: {
};
}
export function rollbackManagedWebRuntime(stateDir: string): boolean {
const runtimeAppDir = resolveManagedWebRuntimeAppDir(stateDir);
const backupDir = resolveManagedWebRuntimeBackupDir(stateDir);
if (!existsSync(backupDir)) {
return false;
}
try {
rmSync(runtimeAppDir, { recursive: true, force: true });
renameSync(backupDir, runtimeAppDir);
return true;
} catch {
return false;
}
}
export function cleanupManagedWebRuntimeBackup(stateDir: string): void {
rmSync(resolveManagedWebRuntimeBackupDir(stateDir), { recursive: true, force: true });
}
export async function stopManagedWebRuntime(params: {
stateDir: string;
port: number;
@ -896,11 +953,42 @@ export async function ensureManagedWebRuntime(params: {
};
}
const probe = await waitForWebRuntime(params.port);
return {
ready: probe.ok,
reason: probe.reason,
};
const probe = await waitForWebRuntime(params.port, start.pid);
if (probe.ok) {
cleanupManagedWebRuntimeBackup(params.stateDir);
return { ready: true, reason: probe.reason };
}
const errLog = readLastLogLines(params.stateDir, "web-app.err.log", 6);
const diagnosticReason = errLog
? `${probe.reason}\n--- web-app.err.log ---\n${errLog}`
: probe.reason;
const backupDir = resolveManagedWebRuntimeBackupDir(params.stateDir);
if (existsSync(backupDir)) {
await stopManagedWebRuntime({
stateDir: params.stateDir,
port: params.port,
includeLegacyStandalone: true,
});
const rolled = rollbackManagedWebRuntime(params.stateDir);
if (rolled) {
const retryStart = doStart({
stateDir: params.stateDir,
port: params.port,
gatewayPort: params.gatewayPort,
});
if (retryStart.started) {
const retryProbe = await waitForWebRuntime(params.port, retryStart.pid);
if (retryProbe.ok) {
return { ready: true, reason: "rolled back to previous version" };
}
}
}
}
return { ready: false, reason: diagnosticReason };
}
export function resolveOpenClawCommandOrThrow(): string {