From 31db7df761a166c47917f129464913c5e1b1be69 Mon Sep 17 00:00:00 2001 From: Barry <53959224+cal-gooo@users.noreply.github.com> Date: Thu, 19 Mar 2026 00:00:14 -0400 Subject: [PATCH 1/4] fix(cron): report ok status when primary delivery succeeded despite follow-up errors (#50170) If `delivered: true` on the run result, follow-up failures (e.g. a second message chunk or a Canvas render) no longer taint the overall job run status. An `effectiveStatus` is derived and used consistently for `lastRunStatus`, `lastStatus`, `lastErrorReason`, consecutive-error tracking, failure alerts, and one-shot job disable/delete logic. The raw `result.error` is still preserved in `lastError` so the follow-up failure remains visible for diagnostics without surfacing a misleading top-level error status. Co-Authored-By: Claude Sonnet 4.6 --- .../service/timer.apply-job-result.test.ts | 140 ++++++++++++++++++ src/cron/service/timer.ts | 18 ++- 2 files changed, 151 insertions(+), 7 deletions(-) create mode 100644 src/cron/service/timer.apply-job-result.test.ts diff --git a/src/cron/service/timer.apply-job-result.test.ts b/src/cron/service/timer.apply-job-result.test.ts new file mode 100644 index 00000000000..4ab71dbde48 --- /dev/null +++ b/src/cron/service/timer.apply-job-result.test.ts @@ -0,0 +1,140 @@ +import { describe, expect, it, vi } from "vitest"; +import type { CronJob, CronStoreFile } from "../types.js"; +import { applyJobResult } from "./timer.js"; +import type { CronServiceState } from "./state.js"; + +function createMockState(jobs: CronJob[] = []): CronServiceState { + const store: CronStoreFile = { version: 1, jobs }; + return { + deps: { + cronEnabled: true, + nowMs: () => 1_000_000, + log: { + debug: vi.fn(), + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + }, + enqueueSystemEvent: vi.fn(), + requestHeartbeatNow: vi.fn(), + runHeartbeatOnce: vi.fn(), + runIsolatedAgentJob: vi.fn(), + onEvent: vi.fn(), + persistence: { + read: vi.fn(), + write: vi.fn(), + }, + }, + store, + timer: null, + running: false, + } as unknown as CronServiceState; +} + +function createJob(overrides: Partial = {}): CronJob { + return { + id: "test-job-1", + name: "Test Job", + enabled: true, + createdAtMs: 0, + updatedAtMs: 0, + schedule: { kind: "cron", expr: "0 9 * * *" }, + sessionTarget: "isolated", + wakeMode: "now", + payload: { kind: "agentTurn", message: "run" }, + state: {}, + ...overrides, + }; +} + +const BASE_RESULT = { startedAt: 900_000, endedAt: 1_000_000 }; + +describe("applyJobResult – delivered=true overrides error status (#50170)", () => { + it("sets lastRunStatus to ok when delivered=true despite status=error", () => { + const job = createJob(); + const state = createMockState([job]); + + applyJobResult(state, job, { + ...BASE_RESULT, + status: "error", + error: "Warning: Canvas failed", + delivered: true, + }); + + expect(job.state.lastRunStatus).toBe("ok"); + expect(job.state.lastStatus).toBe("ok"); + }); + + it("preserves lastError for diagnostics even when status is downgraded", () => { + const job = createJob(); + const state = createMockState([job]); + + applyJobResult(state, job, { + ...BASE_RESULT, + status: "error", + error: "Warning: Message failed", + delivered: true, + }); + + expect(job.state.lastError).toBe("Warning: Message failed"); + expect(job.state.lastDelivered).toBe(true); + expect(job.state.lastDeliveryStatus).toBe("delivered"); + }); + + it("does not increment consecutiveErrors when delivered=true overrides error", () => { + const job = createJob(); + const state = createMockState([job]); + + applyJobResult(state, job, { + ...BASE_RESULT, + status: "error", + error: "Warning: Canvas failed", + delivered: true, + }); + + expect(job.state.consecutiveErrors).toBe(0); + }); + + it("still sets status=error when delivered is false", () => { + const job = createJob(); + const state = createMockState([job]); + + applyJobResult(state, job, { + ...BASE_RESULT, + status: "error", + error: "Agent failed to start", + delivered: false, + }); + + expect(job.state.lastRunStatus).toBe("error"); + expect(job.state.consecutiveErrors).toBe(1); + }); + + it("still sets status=error when delivered is undefined", () => { + const job = createJob(); + const state = createMockState([job]); + + applyJobResult(state, job, { + ...BASE_RESULT, + status: "error", + error: "Agent failed to start", + }); + + expect(job.state.lastRunStatus).toBe("error"); + expect(job.state.consecutiveErrors).toBe(1); + }); + + it("passes through ok status unchanged", () => { + const job = createJob(); + const state = createMockState([job]); + + applyJobResult(state, job, { + ...BASE_RESULT, + status: "ok", + delivered: true, + }); + + expect(job.state.lastRunStatus).toBe("ok"); + expect(job.state.consecutiveErrors).toBe(0); + }); +}); diff --git a/src/cron/service/timer.ts b/src/cron/service/timer.ts index e12c4ae38e7..b15f1e96673 100644 --- a/src/cron/service/timer.ts +++ b/src/cron/service/timer.ts @@ -319,12 +319,16 @@ export function applyJobResult( }; job.state.runningAtMs = undefined; job.state.lastRunAtMs = result.startedAt; - job.state.lastRunStatus = result.status; - job.state.lastStatus = result.status; + // If primary delivery succeeded, don't let follow-up failures (e.g. a second + // message chunk or Canvas render) taint the overall run status. See #50170. + const effectiveStatus: CronRunStatus = + result.status === "error" && result.delivered === true ? "ok" : result.status; + job.state.lastRunStatus = effectiveStatus; + job.state.lastStatus = effectiveStatus; job.state.lastDurationMs = Math.max(0, result.endedAt - result.startedAt); job.state.lastError = result.error; job.state.lastErrorReason = - result.status === "error" && typeof result.error === "string" + effectiveStatus === "error" && typeof result.error === "string" ? (resolveFailoverReasonFromError(result.error) ?? undefined) : undefined; job.state.lastDelivered = result.delivered; @@ -335,7 +339,7 @@ export function applyJobResult( job.updatedAtMs = result.endedAt; // Track consecutive errors for backoff / auto-disable. - if (result.status === "error") { + if (effectiveStatus === "error") { job.state.consecutiveErrors = (job.state.consecutiveErrors ?? 0) + 1; const alertConfig = resolveFailureAlert(state, job); if (alertConfig && job.state.consecutiveErrors >= alertConfig.after) { @@ -367,15 +371,15 @@ export function applyJobResult( } const shouldDelete = - job.schedule.kind === "at" && job.deleteAfterRun === true && result.status === "ok"; + job.schedule.kind === "at" && job.deleteAfterRun === true && effectiveStatus === "ok"; if (!shouldDelete) { if (job.schedule.kind === "at") { - if (result.status === "ok" || result.status === "skipped") { + if (effectiveStatus === "ok" || effectiveStatus === "skipped") { // One-shot done or skipped: disable to prevent tight-loop (#11452). job.enabled = false; job.state.nextRunAtMs = undefined; - } else if (result.status === "error") { + } else if (effectiveStatus === "error") { const retryConfig = resolveRetryConfig(state.deps.cronConfig); const transient = isTransientCronError(result.error, retryConfig.retryOn); // consecutiveErrors is always set to ≥1 by the increment block above. From defdbe911022b94869078fb27f93fc4c73890655 Mon Sep 17 00:00:00 2001 From: Barry <53959224+cal-gooo@users.noreply.github.com> Date: Thu, 19 Mar 2026 00:03:58 -0400 Subject: [PATCH 2/4] style: fix formatting in timer.apply-job-result.test.ts Co-Authored-By: Claude Sonnet 4.6 --- src/cron/service/timer.apply-job-result.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cron/service/timer.apply-job-result.test.ts b/src/cron/service/timer.apply-job-result.test.ts index 4ab71dbde48..e64714a1560 100644 --- a/src/cron/service/timer.apply-job-result.test.ts +++ b/src/cron/service/timer.apply-job-result.test.ts @@ -1,7 +1,7 @@ import { describe, expect, it, vi } from "vitest"; import type { CronJob, CronStoreFile } from "../types.js"; -import { applyJobResult } from "./timer.js"; import type { CronServiceState } from "./state.js"; +import { applyJobResult } from "./timer.js"; function createMockState(jobs: CronJob[] = []): CronServiceState { const store: CronStoreFile = { version: 1, jobs }; From 4742ca7c4a4cadf5df9f6da02aa9ef31ce0b6e29 Mon Sep 17 00:00:00 2001 From: Barry <53959224+cal-gooo@users.noreply.github.com> Date: Thu, 19 Mar 2026 07:35:38 -0400 Subject: [PATCH 3/4] fix(cron): use effectiveStatus for recurring job backoff scheduling Missed substitution on line 420: the recurring-job backoff branch still checked result.status instead of effectiveStatus, so a job that delivered successfully (delivered=true) could still have its nextRunAtMs pushed out by error backoff even though all status fields showed ok. Also adds a nextRunAtMs assertion for the delivered=true override case. Co-Authored-By: Claude Sonnet 4.6 --- .../service/timer.apply-job-result.test.ts | 27 +++++++++++++++++++ src/cron/service/timer.ts | 2 +- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/src/cron/service/timer.apply-job-result.test.ts b/src/cron/service/timer.apply-job-result.test.ts index e64714a1560..4c3e02d40d5 100644 --- a/src/cron/service/timer.apply-job-result.test.ts +++ b/src/cron/service/timer.apply-job-result.test.ts @@ -137,4 +137,31 @@ describe("applyJobResult – delivered=true overrides error status (#50170)", () expect(job.state.lastRunStatus).toBe("ok"); expect(job.state.consecutiveErrors).toBe(0); }); + + it("does not apply error backoff to nextRunAtMs for recurring job when delivered=true overrides error", () => { + // Use a high-frequency schedule so the backoff (30 s minimum) would + // exceed the natural interval and push nextRunAtMs out if applied. + const schedule = { kind: "every" as const, intervalMs: 5_000 }; + + const jobError = createJob({ schedule }); + const jobOk = createJob({ id: "test-job-2", schedule }); + const stateError = createMockState([jobError]); + const stateOk = createMockState([jobOk]); + + applyJobResult(stateError, jobError, { + ...BASE_RESULT, + status: "error", + error: "Warning: Canvas failed", + delivered: true, + }); + + applyJobResult(stateOk, jobOk, { + ...BASE_RESULT, + status: "ok", + delivered: true, + }); + + // Both should schedule the same natural next run — no backoff on delivered=true. + expect(jobError.state.nextRunAtMs).toBe(jobOk.state.nextRunAtMs); + }); }); diff --git a/src/cron/service/timer.ts b/src/cron/service/timer.ts index b15f1e96673..d18f14060d4 100644 --- a/src/cron/service/timer.ts +++ b/src/cron/service/timer.ts @@ -417,7 +417,7 @@ export function applyJobResult( ); } } - } else if (result.status === "error" && job.enabled) { + } else if (effectiveStatus === "error" && job.enabled) { // Apply exponential backoff for errored jobs to prevent retry storms. const backoff = errorBackoffMs(job.state.consecutiveErrors ?? 1); let normalNext: number | undefined; From 6c7f823569c333ef3714eee2a2c004f0c5693fb2 Mon Sep 17 00:00:00 2001 From: Barry <53959224+cal-gooo@users.noreply.github.com> Date: Thu, 19 Mar 2026 07:39:47 -0400 Subject: [PATCH 4/4] fix(test): use everyMs instead of intervalMs in CronSchedule fixture Co-Authored-By: Claude Sonnet 4.6 --- src/cron/service/timer.apply-job-result.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cron/service/timer.apply-job-result.test.ts b/src/cron/service/timer.apply-job-result.test.ts index 4c3e02d40d5..2f17e84d35a 100644 --- a/src/cron/service/timer.apply-job-result.test.ts +++ b/src/cron/service/timer.apply-job-result.test.ts @@ -141,7 +141,7 @@ describe("applyJobResult – delivered=true overrides error status (#50170)", () it("does not apply error backoff to nextRunAtMs for recurring job when delivered=true overrides error", () => { // Use a high-frequency schedule so the backoff (30 s minimum) would // exceed the natural interval and push nextRunAtMs out if applied. - const schedule = { kind: "every" as const, intervalMs: 5_000 }; + const schedule = { kind: "every" as const, everyMs: 5_000 }; const jobError = createJob({ schedule }); const jobOk = createJob({ id: "test-job-2", schedule });