fix: write crash report on loop detection and add recovery hints

Address review feedback:
- Move exit handler registration to after all CLI validation passes so
  user config errors (bad port, auth, bind) are not counted as crashes.
- Write a crash-report.json with timestamps when the hard crash limit
  is reached.
- Add recovery instructions to the CrashLoopError message (delete
  the history file or wait for the window to expire).

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Protocol-zero-0 2026-02-20 15:33:57 +00:00
parent 430b320cd0
commit b53c77ef66
2 changed files with 25 additions and 2 deletions

View File

@ -99,7 +99,9 @@ export class CrashLoopError extends Error {
super(
`CRASH LOOP DETECTED: ${count} crashes in the last ${windowMinutes} minutes. ` +
`The gateway will not restart automatically. ` +
`Fix the root cause, then run the gateway manually.`,
`Fix the root cause, then run the gateway manually. ` +
`To reset the crash counter immediately, delete the gateway-crash-history.json ` +
`file in the state directory, or wait ${windowMinutes} minutes for it to expire.`,
);
this.name = "CrashLoopError";
this.recentCrashCount = count;
@ -118,6 +120,27 @@ export async function applyCrashLoopGuard(deps: CrashLoopGuardDeps): Promise<voi
const recentCrashes = history.crashes.filter((t) => t > cutoff);
if (recentCrashes.length >= MAX_CRASHES) {
const reportPath = path.join(deps.stateDir, "crash-report.json");
try {
fs.mkdirSync(path.dirname(reportPath), { recursive: true });
fs.writeFileSync(
reportPath,
JSON.stringify(
{
detectedAt: new Date(now).toISOString(),
recentCrashCount: recentCrashes.length,
windowMinutes: CRASH_WINDOW_MS / 60_000,
crashTimestamps: recentCrashes.map((t) => new Date(t).toISOString()),
},
null,
2,
),
"utf-8",
);
deps.logger.error(`Crash report written to ${reportPath}`);
} catch {
// best-effort
}
throw new CrashLoopError(recentCrashes.length, CRASH_WINDOW_MS / 60_000);
}

View File

@ -3,7 +3,6 @@ import { readSecretFromFile } from "../../acp/secret-file.js";
import fs from "node:fs";
import path from "node:path";
import type { GatewayAuthMode, GatewayTailscaleMode } from "../../config/config.js";
import type { GatewayWsLogStyle } from "../../gateway/ws-logging.js";
import {
CONFIG_PATH,
loadConfig,
@ -14,6 +13,7 @@ import {
import { hasConfiguredSecretInput } from "../../config/types.secrets.js";
import { resolveGatewayAuth } from "../../gateway/auth.js";
import { startGatewayServer } from "../../gateway/server.js";
import type { GatewayWsLogStyle } from "../../gateway/ws-logging.js";
import { setGatewayWsLogStyle } from "../../gateway/ws-logging.js";
import { setVerbose } from "../../globals.js";
import { GatewayLockError } from "../../infra/gateway-lock.js";