fix: address review feedback — guard scheduleNext for requests-in-flight
- Add skipFinalSchedule flag to suppress scheduleNext() in the finally block when returning from the requests-in-flight path, preserving the wake layer's 1 s retry cooldown (Codex + greptile review) - Update stale comment in per-agent catch block to reflect that scheduleNext is now handled by the outer finally (greptile review) - Improve test to exercise the outer finally block directly by throwing from runOnce (not rejecting, which the inner catch handles)
This commit is contained in:
parent
77d97f2493
commit
dd9364e297
@ -284,30 +284,42 @@ describe("startHeartbeatRunner", () => {
|
||||
runner.stop();
|
||||
});
|
||||
|
||||
it("re-arms timer after runOnce rejects with an unhandled promise rejection (#45772)", async () => {
|
||||
it("re-arms timer after run() rejects outside the per-agent try/catch (#45772)", async () => {
|
||||
useFakeHeartbeatTime();
|
||||
|
||||
// The inner per-agent try/catch already handles runOnce rejections.
|
||||
// The bug in #45772 is caused by failures that escape the inner catch —
|
||||
// e.g. from session resolution, agent iteration, or async code between
|
||||
// the inner catch and scheduleNext(). We simulate this by making the
|
||||
// FIRST runOnce succeed (so the inner catch is not triggered) but then
|
||||
// throwing from a second agent's runOnce, AND making the runner itself
|
||||
// propagate the error (since the inner catch would normally swallow it).
|
||||
//
|
||||
// However, the simplest way to exercise the outer finally is to reject
|
||||
// from the run() handler itself. Since run() is called by the wake layer,
|
||||
// a rejection that escapes run() would previously leave no timer re-armed.
|
||||
// The fix ensures the finally block catches this.
|
||||
let callCount = 0;
|
||||
const runSpy = vi.fn().mockImplementation(async () => {
|
||||
callCount++;
|
||||
if (callCount === 1) {
|
||||
// Simulate an unhandled rejection that escapes the inner try/catch
|
||||
// (e.g. from session resolution or preflight code outside the
|
||||
// existing error handling). Before the fix, this would permanently
|
||||
// kill the heartbeat timer.
|
||||
return Promise.reject(new Error("unexpected async failure"));
|
||||
// Simulate a rejection that escapes all inner error handling.
|
||||
// Before the fix, this would permanently kill the heartbeat timer
|
||||
// because scheduleNext() was never called.
|
||||
throw new Error("unexpected async failure outside inner catch");
|
||||
}
|
||||
return { status: "ran", durationMs: 1 };
|
||||
});
|
||||
|
||||
const runner = startDefaultRunner(runSpy);
|
||||
|
||||
// First heartbeat fires and rejects
|
||||
// First heartbeat fires and throws — the outer finally must re-arm.
|
||||
await vi.advanceTimersByTimeAsync(30 * 60_000 + 1_000);
|
||||
expect(runSpy).toHaveBeenCalledTimes(1);
|
||||
|
||||
// Second heartbeat MUST still fire — the timer must have been re-armed
|
||||
// despite the rejection. This is the core assertion for #45772.
|
||||
// by the finally block despite the rejection. This is the core assertion
|
||||
// for #45772.
|
||||
await vi.advanceTimersByTimeAsync(30 * 60_000 + 1_000);
|
||||
expect(runSpy).toHaveBeenCalledTimes(2);
|
||||
|
||||
|
||||
@ -1079,6 +1079,12 @@ export function startHeartbeatRunner(opts: {
|
||||
const now = startedAt;
|
||||
let ran = false;
|
||||
|
||||
// Track whether the wake layer handles rescheduling (requests-in-flight).
|
||||
// When true, we must NOT call scheduleNext() in the finally block because
|
||||
// it would register a 0 ms timer that races with and defeats the wake
|
||||
// layer's 1 s retry cooldown.
|
||||
let skipFinalSchedule = false;
|
||||
|
||||
// Wrap the entire run in try/finally so scheduleNext() is ALWAYS called,
|
||||
// even if an unexpected rejection or silent error exits the function early.
|
||||
// This prevents the heartbeat timer from permanently dying (see #45772).
|
||||
@ -1127,8 +1133,8 @@ export function startHeartbeatRunner(opts: {
|
||||
deps: { runtime: state.runtime },
|
||||
});
|
||||
} catch (err) {
|
||||
// If runOnce throws (e.g. during session compaction), we must still
|
||||
// advance the timer and call scheduleNext so heartbeats keep firing.
|
||||
// If runOnce throws (e.g. during session compaction), advance the
|
||||
// agent's schedule so the next interval is computed correctly.
|
||||
const errMsg = formatErrorMessage(err);
|
||||
log.error(`heartbeat runner: runOnce threw unexpectedly: ${errMsg}`, { error: errMsg });
|
||||
advanceAgentSchedule(agent, now);
|
||||
@ -1136,9 +1142,11 @@ export function startHeartbeatRunner(opts: {
|
||||
}
|
||||
if (res.status === "skipped" && res.reason === "requests-in-flight") {
|
||||
// Do not advance the schedule — the main lane is busy and the wake
|
||||
// layer will retry shortly (DEFAULT_RETRY_MS = 1 s). Calling
|
||||
// scheduleNext() here would register a 0 ms timer that races with
|
||||
// the wake layer's 1 s retry and wins, bypassing the cooldown.
|
||||
// layer will retry shortly (DEFAULT_RETRY_MS = 1 s). We must also
|
||||
// suppress scheduleNext() in the finally block because it would
|
||||
// register a 0 ms timer that races with the wake layer's retry
|
||||
// cooldown and defeats the backoff.
|
||||
skipFinalSchedule = true;
|
||||
return res;
|
||||
}
|
||||
if (res.status !== "skipped" || res.reason !== "disabled") {
|
||||
@ -1154,11 +1162,14 @@ export function startHeartbeatRunner(opts: {
|
||||
}
|
||||
return { status: "skipped", reason: isInterval ? "not-due" : "disabled" };
|
||||
} finally {
|
||||
// Always re-arm the timer regardless of how the run exits.
|
||||
// Always re-arm the timer regardless of how the run exits — unless the
|
||||
// wake layer is handling rescheduling (requests-in-flight path).
|
||||
// This is the critical fix for #45772: without this, any unhandled
|
||||
// rejection or unexpected early exit permanently kills the heartbeat
|
||||
// timer because scheduleNext() is never called.
|
||||
scheduleNext();
|
||||
if (!skipFinalSchedule) {
|
||||
scheduleNext();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user