fix(cron): report ok status when primary delivery succeeded despite follow-up errors (#50170)

If `delivered: true` on the run result, follow-up failures (e.g. a second
message chunk or a Canvas render) no longer taint the overall job run
status. An `effectiveStatus` is derived and used consistently for
`lastRunStatus`, `lastStatus`, `lastErrorReason`, consecutive-error
tracking, failure alerts, and one-shot job disable/delete logic.

The raw `result.error` is still preserved in `lastError` so the
follow-up failure remains visible for diagnostics without surfacing
a misleading top-level error status.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Barry 2026-03-19 00:00:14 -04:00
parent 25015161fe
commit 31db7df761
2 changed files with 151 additions and 7 deletions

View File

@ -0,0 +1,140 @@
import { describe, expect, it, vi } from "vitest";
import type { CronJob, CronStoreFile } from "../types.js";
import { applyJobResult } from "./timer.js";
import type { CronServiceState } from "./state.js";
function createMockState(jobs: CronJob[] = []): CronServiceState {
const store: CronStoreFile = { version: 1, jobs };
return {
deps: {
cronEnabled: true,
nowMs: () => 1_000_000,
log: {
debug: vi.fn(),
info: vi.fn(),
warn: vi.fn(),
error: vi.fn(),
},
enqueueSystemEvent: vi.fn(),
requestHeartbeatNow: vi.fn(),
runHeartbeatOnce: vi.fn(),
runIsolatedAgentJob: vi.fn(),
onEvent: vi.fn(),
persistence: {
read: vi.fn(),
write: vi.fn(),
},
},
store,
timer: null,
running: false,
} as unknown as CronServiceState;
}
function createJob(overrides: Partial<CronJob> = {}): CronJob {
return {
id: "test-job-1",
name: "Test Job",
enabled: true,
createdAtMs: 0,
updatedAtMs: 0,
schedule: { kind: "cron", expr: "0 9 * * *" },
sessionTarget: "isolated",
wakeMode: "now",
payload: { kind: "agentTurn", message: "run" },
state: {},
...overrides,
};
}
const BASE_RESULT = { startedAt: 900_000, endedAt: 1_000_000 };
describe("applyJobResult delivered=true overrides error status (#50170)", () => {
it("sets lastRunStatus to ok when delivered=true despite status=error", () => {
const job = createJob();
const state = createMockState([job]);
applyJobResult(state, job, {
...BASE_RESULT,
status: "error",
error: "Warning: Canvas failed",
delivered: true,
});
expect(job.state.lastRunStatus).toBe("ok");
expect(job.state.lastStatus).toBe("ok");
});
it("preserves lastError for diagnostics even when status is downgraded", () => {
const job = createJob();
const state = createMockState([job]);
applyJobResult(state, job, {
...BASE_RESULT,
status: "error",
error: "Warning: Message failed",
delivered: true,
});
expect(job.state.lastError).toBe("Warning: Message failed");
expect(job.state.lastDelivered).toBe(true);
expect(job.state.lastDeliveryStatus).toBe("delivered");
});
it("does not increment consecutiveErrors when delivered=true overrides error", () => {
const job = createJob();
const state = createMockState([job]);
applyJobResult(state, job, {
...BASE_RESULT,
status: "error",
error: "Warning: Canvas failed",
delivered: true,
});
expect(job.state.consecutiveErrors).toBe(0);
});
it("still sets status=error when delivered is false", () => {
const job = createJob();
const state = createMockState([job]);
applyJobResult(state, job, {
...BASE_RESULT,
status: "error",
error: "Agent failed to start",
delivered: false,
});
expect(job.state.lastRunStatus).toBe("error");
expect(job.state.consecutiveErrors).toBe(1);
});
it("still sets status=error when delivered is undefined", () => {
const job = createJob();
const state = createMockState([job]);
applyJobResult(state, job, {
...BASE_RESULT,
status: "error",
error: "Agent failed to start",
});
expect(job.state.lastRunStatus).toBe("error");
expect(job.state.consecutiveErrors).toBe(1);
});
it("passes through ok status unchanged", () => {
const job = createJob();
const state = createMockState([job]);
applyJobResult(state, job, {
...BASE_RESULT,
status: "ok",
delivered: true,
});
expect(job.state.lastRunStatus).toBe("ok");
expect(job.state.consecutiveErrors).toBe(0);
});
});

View File

@ -319,12 +319,16 @@ export function applyJobResult(
};
job.state.runningAtMs = undefined;
job.state.lastRunAtMs = result.startedAt;
job.state.lastRunStatus = result.status;
job.state.lastStatus = result.status;
// If primary delivery succeeded, don't let follow-up failures (e.g. a second
// message chunk or Canvas render) taint the overall run status. See #50170.
const effectiveStatus: CronRunStatus =
result.status === "error" && result.delivered === true ? "ok" : result.status;
job.state.lastRunStatus = effectiveStatus;
job.state.lastStatus = effectiveStatus;
job.state.lastDurationMs = Math.max(0, result.endedAt - result.startedAt);
job.state.lastError = result.error;
job.state.lastErrorReason =
result.status === "error" && typeof result.error === "string"
effectiveStatus === "error" && typeof result.error === "string"
? (resolveFailoverReasonFromError(result.error) ?? undefined)
: undefined;
job.state.lastDelivered = result.delivered;
@ -335,7 +339,7 @@ export function applyJobResult(
job.updatedAtMs = result.endedAt;
// Track consecutive errors for backoff / auto-disable.
if (result.status === "error") {
if (effectiveStatus === "error") {
job.state.consecutiveErrors = (job.state.consecutiveErrors ?? 0) + 1;
const alertConfig = resolveFailureAlert(state, job);
if (alertConfig && job.state.consecutiveErrors >= alertConfig.after) {
@ -367,15 +371,15 @@ export function applyJobResult(
}
const shouldDelete =
job.schedule.kind === "at" && job.deleteAfterRun === true && result.status === "ok";
job.schedule.kind === "at" && job.deleteAfterRun === true && effectiveStatus === "ok";
if (!shouldDelete) {
if (job.schedule.kind === "at") {
if (result.status === "ok" || result.status === "skipped") {
if (effectiveStatus === "ok" || effectiveStatus === "skipped") {
// One-shot done or skipped: disable to prevent tight-loop (#11452).
job.enabled = false;
job.state.nextRunAtMs = undefined;
} else if (result.status === "error") {
} else if (effectiveStatus === "error") {
const retryConfig = resolveRetryConfig(state.deps.cronConfig);
const transient = isTransientCronError(result.error, retryConfig.retryOn);
// consecutiveErrors is always set to ≥1 by the increment block above.