fix: write crash report on loop detection and add recovery hints
Address review feedback: - Move exit handler registration to after all CLI validation passes so user config errors (bad port, auth, bind) are not counted as crashes. - Write a crash-report.json with timestamps when the hard crash limit is reached. - Add recovery instructions to the CrashLoopError message (delete the history file or wait for the window to expire). Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
parent
430b320cd0
commit
b53c77ef66
@ -99,7 +99,9 @@ export class CrashLoopError extends Error {
|
||||
super(
|
||||
`CRASH LOOP DETECTED: ${count} crashes in the last ${windowMinutes} minutes. ` +
|
||||
`The gateway will not restart automatically. ` +
|
||||
`Fix the root cause, then run the gateway manually.`,
|
||||
`Fix the root cause, then run the gateway manually. ` +
|
||||
`To reset the crash counter immediately, delete the gateway-crash-history.json ` +
|
||||
`file in the state directory, or wait ${windowMinutes} minutes for it to expire.`,
|
||||
);
|
||||
this.name = "CrashLoopError";
|
||||
this.recentCrashCount = count;
|
||||
@ -118,6 +120,27 @@ export async function applyCrashLoopGuard(deps: CrashLoopGuardDeps): Promise<voi
|
||||
const recentCrashes = history.crashes.filter((t) => t > cutoff);
|
||||
|
||||
if (recentCrashes.length >= MAX_CRASHES) {
|
||||
const reportPath = path.join(deps.stateDir, "crash-report.json");
|
||||
try {
|
||||
fs.mkdirSync(path.dirname(reportPath), { recursive: true });
|
||||
fs.writeFileSync(
|
||||
reportPath,
|
||||
JSON.stringify(
|
||||
{
|
||||
detectedAt: new Date(now).toISOString(),
|
||||
recentCrashCount: recentCrashes.length,
|
||||
windowMinutes: CRASH_WINDOW_MS / 60_000,
|
||||
crashTimestamps: recentCrashes.map((t) => new Date(t).toISOString()),
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
"utf-8",
|
||||
);
|
||||
deps.logger.error(`Crash report written to ${reportPath}`);
|
||||
} catch {
|
||||
// best-effort
|
||||
}
|
||||
throw new CrashLoopError(recentCrashes.length, CRASH_WINDOW_MS / 60_000);
|
||||
}
|
||||
|
||||
|
||||
@ -3,7 +3,6 @@ import { readSecretFromFile } from "../../acp/secret-file.js";
|
||||
import fs from "node:fs";
|
||||
import path from "node:path";
|
||||
import type { GatewayAuthMode, GatewayTailscaleMode } from "../../config/config.js";
|
||||
import type { GatewayWsLogStyle } from "../../gateway/ws-logging.js";
|
||||
import {
|
||||
CONFIG_PATH,
|
||||
loadConfig,
|
||||
@ -14,6 +13,7 @@ import {
|
||||
import { hasConfiguredSecretInput } from "../../config/types.secrets.js";
|
||||
import { resolveGatewayAuth } from "../../gateway/auth.js";
|
||||
import { startGatewayServer } from "../../gateway/server.js";
|
||||
import type { GatewayWsLogStyle } from "../../gateway/ws-logging.js";
|
||||
import { setGatewayWsLogStyle } from "../../gateway/ws-logging.js";
|
||||
import { setVerbose } from "../../globals.js";
|
||||
import { GatewayLockError } from "../../infra/gateway-lock.js";
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user