fix(cron): report ok status when primary delivery succeeded despite follow-up errors (#50170)
If `delivered: true` on the run result, follow-up failures (e.g. a second message chunk or a Canvas render) no longer taint the overall job run status. An `effectiveStatus` is derived and used consistently for `lastRunStatus`, `lastStatus`, `lastErrorReason`, consecutive-error tracking, failure alerts, and one-shot job disable/delete logic. The raw `result.error` is still preserved in `lastError` so the follow-up failure remains visible for diagnostics without surfacing a misleading top-level error status. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
25015161fe
commit
31db7df761
140
src/cron/service/timer.apply-job-result.test.ts
Normal file
140
src/cron/service/timer.apply-job-result.test.ts
Normal file
@ -0,0 +1,140 @@
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
import type { CronJob, CronStoreFile } from "../types.js";
|
||||
import { applyJobResult } from "./timer.js";
|
||||
import type { CronServiceState } from "./state.js";
|
||||
|
||||
function createMockState(jobs: CronJob[] = []): CronServiceState {
|
||||
const store: CronStoreFile = { version: 1, jobs };
|
||||
return {
|
||||
deps: {
|
||||
cronEnabled: true,
|
||||
nowMs: () => 1_000_000,
|
||||
log: {
|
||||
debug: vi.fn(),
|
||||
info: vi.fn(),
|
||||
warn: vi.fn(),
|
||||
error: vi.fn(),
|
||||
},
|
||||
enqueueSystemEvent: vi.fn(),
|
||||
requestHeartbeatNow: vi.fn(),
|
||||
runHeartbeatOnce: vi.fn(),
|
||||
runIsolatedAgentJob: vi.fn(),
|
||||
onEvent: vi.fn(),
|
||||
persistence: {
|
||||
read: vi.fn(),
|
||||
write: vi.fn(),
|
||||
},
|
||||
},
|
||||
store,
|
||||
timer: null,
|
||||
running: false,
|
||||
} as unknown as CronServiceState;
|
||||
}
|
||||
|
||||
function createJob(overrides: Partial<CronJob> = {}): CronJob {
|
||||
return {
|
||||
id: "test-job-1",
|
||||
name: "Test Job",
|
||||
enabled: true,
|
||||
createdAtMs: 0,
|
||||
updatedAtMs: 0,
|
||||
schedule: { kind: "cron", expr: "0 9 * * *" },
|
||||
sessionTarget: "isolated",
|
||||
wakeMode: "now",
|
||||
payload: { kind: "agentTurn", message: "run" },
|
||||
state: {},
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
const BASE_RESULT = { startedAt: 900_000, endedAt: 1_000_000 };
|
||||
|
||||
describe("applyJobResult – delivered=true overrides error status (#50170)", () => {
|
||||
it("sets lastRunStatus to ok when delivered=true despite status=error", () => {
|
||||
const job = createJob();
|
||||
const state = createMockState([job]);
|
||||
|
||||
applyJobResult(state, job, {
|
||||
...BASE_RESULT,
|
||||
status: "error",
|
||||
error: "Warning: Canvas failed",
|
||||
delivered: true,
|
||||
});
|
||||
|
||||
expect(job.state.lastRunStatus).toBe("ok");
|
||||
expect(job.state.lastStatus).toBe("ok");
|
||||
});
|
||||
|
||||
it("preserves lastError for diagnostics even when status is downgraded", () => {
|
||||
const job = createJob();
|
||||
const state = createMockState([job]);
|
||||
|
||||
applyJobResult(state, job, {
|
||||
...BASE_RESULT,
|
||||
status: "error",
|
||||
error: "Warning: Message failed",
|
||||
delivered: true,
|
||||
});
|
||||
|
||||
expect(job.state.lastError).toBe("Warning: Message failed");
|
||||
expect(job.state.lastDelivered).toBe(true);
|
||||
expect(job.state.lastDeliveryStatus).toBe("delivered");
|
||||
});
|
||||
|
||||
it("does not increment consecutiveErrors when delivered=true overrides error", () => {
|
||||
const job = createJob();
|
||||
const state = createMockState([job]);
|
||||
|
||||
applyJobResult(state, job, {
|
||||
...BASE_RESULT,
|
||||
status: "error",
|
||||
error: "Warning: Canvas failed",
|
||||
delivered: true,
|
||||
});
|
||||
|
||||
expect(job.state.consecutiveErrors).toBe(0);
|
||||
});
|
||||
|
||||
it("still sets status=error when delivered is false", () => {
|
||||
const job = createJob();
|
||||
const state = createMockState([job]);
|
||||
|
||||
applyJobResult(state, job, {
|
||||
...BASE_RESULT,
|
||||
status: "error",
|
||||
error: "Agent failed to start",
|
||||
delivered: false,
|
||||
});
|
||||
|
||||
expect(job.state.lastRunStatus).toBe("error");
|
||||
expect(job.state.consecutiveErrors).toBe(1);
|
||||
});
|
||||
|
||||
it("still sets status=error when delivered is undefined", () => {
|
||||
const job = createJob();
|
||||
const state = createMockState([job]);
|
||||
|
||||
applyJobResult(state, job, {
|
||||
...BASE_RESULT,
|
||||
status: "error",
|
||||
error: "Agent failed to start",
|
||||
});
|
||||
|
||||
expect(job.state.lastRunStatus).toBe("error");
|
||||
expect(job.state.consecutiveErrors).toBe(1);
|
||||
});
|
||||
|
||||
it("passes through ok status unchanged", () => {
|
||||
const job = createJob();
|
||||
const state = createMockState([job]);
|
||||
|
||||
applyJobResult(state, job, {
|
||||
...BASE_RESULT,
|
||||
status: "ok",
|
||||
delivered: true,
|
||||
});
|
||||
|
||||
expect(job.state.lastRunStatus).toBe("ok");
|
||||
expect(job.state.consecutiveErrors).toBe(0);
|
||||
});
|
||||
});
|
||||
@ -319,12 +319,16 @@ export function applyJobResult(
|
||||
};
|
||||
job.state.runningAtMs = undefined;
|
||||
job.state.lastRunAtMs = result.startedAt;
|
||||
job.state.lastRunStatus = result.status;
|
||||
job.state.lastStatus = result.status;
|
||||
// If primary delivery succeeded, don't let follow-up failures (e.g. a second
|
||||
// message chunk or Canvas render) taint the overall run status. See #50170.
|
||||
const effectiveStatus: CronRunStatus =
|
||||
result.status === "error" && result.delivered === true ? "ok" : result.status;
|
||||
job.state.lastRunStatus = effectiveStatus;
|
||||
job.state.lastStatus = effectiveStatus;
|
||||
job.state.lastDurationMs = Math.max(0, result.endedAt - result.startedAt);
|
||||
job.state.lastError = result.error;
|
||||
job.state.lastErrorReason =
|
||||
result.status === "error" && typeof result.error === "string"
|
||||
effectiveStatus === "error" && typeof result.error === "string"
|
||||
? (resolveFailoverReasonFromError(result.error) ?? undefined)
|
||||
: undefined;
|
||||
job.state.lastDelivered = result.delivered;
|
||||
@ -335,7 +339,7 @@ export function applyJobResult(
|
||||
job.updatedAtMs = result.endedAt;
|
||||
|
||||
// Track consecutive errors for backoff / auto-disable.
|
||||
if (result.status === "error") {
|
||||
if (effectiveStatus === "error") {
|
||||
job.state.consecutiveErrors = (job.state.consecutiveErrors ?? 0) + 1;
|
||||
const alertConfig = resolveFailureAlert(state, job);
|
||||
if (alertConfig && job.state.consecutiveErrors >= alertConfig.after) {
|
||||
@ -367,15 +371,15 @@ export function applyJobResult(
|
||||
}
|
||||
|
||||
const shouldDelete =
|
||||
job.schedule.kind === "at" && job.deleteAfterRun === true && result.status === "ok";
|
||||
job.schedule.kind === "at" && job.deleteAfterRun === true && effectiveStatus === "ok";
|
||||
|
||||
if (!shouldDelete) {
|
||||
if (job.schedule.kind === "at") {
|
||||
if (result.status === "ok" || result.status === "skipped") {
|
||||
if (effectiveStatus === "ok" || effectiveStatus === "skipped") {
|
||||
// One-shot done or skipped: disable to prevent tight-loop (#11452).
|
||||
job.enabled = false;
|
||||
job.state.nextRunAtMs = undefined;
|
||||
} else if (result.status === "error") {
|
||||
} else if (effectiveStatus === "error") {
|
||||
const retryConfig = resolveRetryConfig(state.deps.cronConfig);
|
||||
const transient = isTransientCronError(result.error, retryConfig.retryOn);
|
||||
// consecutiveErrors is always set to ≥1 by the increment block above.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user