fix: address 6 review comments on PR #47719

1. [P1] Treat remap failures as resume failures — if replaceSubagentRunAfterSteer
   returns false, do NOT clear abortedLastRun, increment failed count.

2. [P2] Count scan-level exceptions as retryable failures — set result.failed > 0
   in the outer catch block so scheduleOrphanRecovery retry logic triggers.

3. [P2] Persist resumed-session dedupe across recovery retries — accept
   resumedSessionKeys as a parameter; scheduleOrphanRecovery lifts the Set to
   its own scope and passes it through retries.

4. [Greptile] Use typed config accessors instead of raw structural cast for TLS
   check in lifecycle.ts.

5. [Greptile] Forward gateway.reload.deferralTimeoutMs to deferGatewayRestartUntilIdle
   in scheduleGatewaySigusr1Restart so user-configured value is not silently ignored.

6. [Greptile] Same as #4 — already addressed by the typed config fix.

Co-Authored-By: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
This commit is contained in:
bot_apk 2026-03-16 03:36:56 +00:00 committed by Peter Steinberger
parent c780b6a6ab
commit 98f6ec50aa
3 changed files with 24 additions and 6 deletions

View File

@ -100,10 +100,16 @@ async function resumeOrphanedSession(params: {
},
timeoutMs: 10_000,
});
replaceSubagentRunAfterSteer({
const remapped = replaceSubagentRunAfterSteer({
previousRunId: params.originalRunId,
nextRunId: result.runId,
});
if (!remapped) {
log.warn(
`resumed orphaned session ${params.sessionKey} but remap failed (old run already removed); treating as failure`,
);
return false;
}
log.info(`resumed orphaned session: ${params.sessionKey}`);
return true;
} catch (err) {
@ -125,9 +131,11 @@ async function resumeOrphanedSession(params: {
*/
export async function recoverOrphanedSubagentSessions(params: {
getActiveRuns: () => Map<string, SubagentRunRecord>;
/** Persisted across retries so already-resumed sessions are not resumed again. */
resumedSessionKeys?: Set<string>;
}): Promise<{ recovered: number; failed: number; skipped: number }> {
const result = { recovered: 0, failed: 0, skipped: 0 };
const resumedSessionKeys = new Set<string>();
const resumedSessionKeys = params.resumedSessionKeys ?? new Set<string>();
const configChangePattern = /openclaw\.json|openclaw gateway restart|config\.patch/i;
try {
@ -236,6 +244,10 @@ export async function recoverOrphanedSubagentSessions(params: {
}
} catch (err) {
log.warn(`orphan recovery scan failed: ${String(err)}`);
// Ensure retry logic fires for scan-level exceptions.
if (result.failed === 0) {
result.failed = 1;
}
}
if (result.recovered > 0 || result.failed > 0) {
@ -265,9 +277,11 @@ export function scheduleOrphanRecovery(params: {
const initialDelay = params.delayMs ?? DEFAULT_RECOVERY_DELAY_MS;
const maxRetries = params.maxRetries ?? MAX_RECOVERY_RETRIES;
const resumedSessionKeys = new Set<string>();
const attemptRecovery = (attempt: number, delay: number) => {
setTimeout(() => {
void recoverOrphanedSubagentSessions(params)
void recoverOrphanedSubagentSessions({ ...params, resumedSessionKeys })
.then((result) => {
if (result.failed > 0 && attempt < maxRetries) {
const nextDelay = delay * RETRY_BACKOFF_MULTIPLIER;

View File

@ -51,8 +51,7 @@ function resolveGatewayPortFallback(): Promise<number> {
async function assertUnmanagedGatewayRestartEnabled(port: number): Promise<void> {
const cfg = await readBestEffortConfig().catch(() => undefined);
const tlsEnabled = !!(cfg as { gateway?: { tls?: { enabled?: unknown } } } | undefined)?.gateway
?.tls?.enabled;
const tlsEnabled = !!cfg?.gateway?.tls?.enabled;
const scheme = tlsEnabled ? "wss" : "ws";
const probe = await probeGateway({
url: `${scheme}://127.0.0.1:${port}`,

View File

@ -1,6 +1,7 @@
import { spawnSync } from "node:child_process";
import os from "node:os";
import path from "node:path";
import { loadConfig } from "../config/config.js";
import {
resolveGatewayLaunchAgentLabel,
resolveGatewaySystemdServiceName,
@ -476,7 +477,11 @@ export function scheduleGatewaySigusr1Restart(opts?: {
emitGatewayRestart();
return;
}
deferGatewayRestartUntilIdle({ getPendingCount: pendingCheck });
const cfg = loadConfig();
deferGatewayRestartUntilIdle({
getPendingCount: pendingCheck,
maxWaitMs: cfg.gateway?.reload?.deferralTimeoutMs,
});
},
Math.max(0, requestedDueAt - nowMs),
);