diff --git a/src/cron/service/timer.apply-job-result.test.ts b/src/cron/service/timer.apply-job-result.test.ts new file mode 100644 index 00000000000..2f17e84d35a --- /dev/null +++ b/src/cron/service/timer.apply-job-result.test.ts @@ -0,0 +1,167 @@ +import { describe, expect, it, vi } from "vitest"; +import type { CronJob, CronStoreFile } from "../types.js"; +import type { CronServiceState } from "./state.js"; +import { applyJobResult } from "./timer.js"; + +function createMockState(jobs: CronJob[] = []): CronServiceState { + const store: CronStoreFile = { version: 1, jobs }; + return { + deps: { + cronEnabled: true, + nowMs: () => 1_000_000, + log: { + debug: vi.fn(), + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + }, + enqueueSystemEvent: vi.fn(), + requestHeartbeatNow: vi.fn(), + runHeartbeatOnce: vi.fn(), + runIsolatedAgentJob: vi.fn(), + onEvent: vi.fn(), + persistence: { + read: vi.fn(), + write: vi.fn(), + }, + }, + store, + timer: null, + running: false, + } as unknown as CronServiceState; +} + +function createJob(overrides: Partial = {}): CronJob { + return { + id: "test-job-1", + name: "Test Job", + enabled: true, + createdAtMs: 0, + updatedAtMs: 0, + schedule: { kind: "cron", expr: "0 9 * * *" }, + sessionTarget: "isolated", + wakeMode: "now", + payload: { kind: "agentTurn", message: "run" }, + state: {}, + ...overrides, + }; +} + +const BASE_RESULT = { startedAt: 900_000, endedAt: 1_000_000 }; + +describe("applyJobResult – delivered=true overrides error status (#50170)", () => { + it("sets lastRunStatus to ok when delivered=true despite status=error", () => { + const job = createJob(); + const state = createMockState([job]); + + applyJobResult(state, job, { + ...BASE_RESULT, + status: "error", + error: "Warning: Canvas failed", + delivered: true, + }); + + expect(job.state.lastRunStatus).toBe("ok"); + expect(job.state.lastStatus).toBe("ok"); + }); + + it("preserves lastError for diagnostics even when status is downgraded", () => { + const job = createJob(); + const state = createMockState([job]); + + applyJobResult(state, job, { + ...BASE_RESULT, + status: "error", + error: "Warning: Message failed", + delivered: true, + }); + + expect(job.state.lastError).toBe("Warning: Message failed"); + expect(job.state.lastDelivered).toBe(true); + expect(job.state.lastDeliveryStatus).toBe("delivered"); + }); + + it("does not increment consecutiveErrors when delivered=true overrides error", () => { + const job = createJob(); + const state = createMockState([job]); + + applyJobResult(state, job, { + ...BASE_RESULT, + status: "error", + error: "Warning: Canvas failed", + delivered: true, + }); + + expect(job.state.consecutiveErrors).toBe(0); + }); + + it("still sets status=error when delivered is false", () => { + const job = createJob(); + const state = createMockState([job]); + + applyJobResult(state, job, { + ...BASE_RESULT, + status: "error", + error: "Agent failed to start", + delivered: false, + }); + + expect(job.state.lastRunStatus).toBe("error"); + expect(job.state.consecutiveErrors).toBe(1); + }); + + it("still sets status=error when delivered is undefined", () => { + const job = createJob(); + const state = createMockState([job]); + + applyJobResult(state, job, { + ...BASE_RESULT, + status: "error", + error: "Agent failed to start", + }); + + expect(job.state.lastRunStatus).toBe("error"); + expect(job.state.consecutiveErrors).toBe(1); + }); + + it("passes through ok status unchanged", () => { + const job = createJob(); + const state = createMockState([job]); + + applyJobResult(state, job, { + ...BASE_RESULT, + status: "ok", + delivered: true, + }); + + expect(job.state.lastRunStatus).toBe("ok"); + expect(job.state.consecutiveErrors).toBe(0); + }); + + it("does not apply error backoff to nextRunAtMs for recurring job when delivered=true overrides error", () => { + // Use a high-frequency schedule so the backoff (30 s minimum) would + // exceed the natural interval and push nextRunAtMs out if applied. + const schedule = { kind: "every" as const, everyMs: 5_000 }; + + const jobError = createJob({ schedule }); + const jobOk = createJob({ id: "test-job-2", schedule }); + const stateError = createMockState([jobError]); + const stateOk = createMockState([jobOk]); + + applyJobResult(stateError, jobError, { + ...BASE_RESULT, + status: "error", + error: "Warning: Canvas failed", + delivered: true, + }); + + applyJobResult(stateOk, jobOk, { + ...BASE_RESULT, + status: "ok", + delivered: true, + }); + + // Both should schedule the same natural next run — no backoff on delivered=true. + expect(jobError.state.nextRunAtMs).toBe(jobOk.state.nextRunAtMs); + }); +}); diff --git a/src/cron/service/timer.ts b/src/cron/service/timer.ts index e12c4ae38e7..d18f14060d4 100644 --- a/src/cron/service/timer.ts +++ b/src/cron/service/timer.ts @@ -319,12 +319,16 @@ export function applyJobResult( }; job.state.runningAtMs = undefined; job.state.lastRunAtMs = result.startedAt; - job.state.lastRunStatus = result.status; - job.state.lastStatus = result.status; + // If primary delivery succeeded, don't let follow-up failures (e.g. a second + // message chunk or Canvas render) taint the overall run status. See #50170. + const effectiveStatus: CronRunStatus = + result.status === "error" && result.delivered === true ? "ok" : result.status; + job.state.lastRunStatus = effectiveStatus; + job.state.lastStatus = effectiveStatus; job.state.lastDurationMs = Math.max(0, result.endedAt - result.startedAt); job.state.lastError = result.error; job.state.lastErrorReason = - result.status === "error" && typeof result.error === "string" + effectiveStatus === "error" && typeof result.error === "string" ? (resolveFailoverReasonFromError(result.error) ?? undefined) : undefined; job.state.lastDelivered = result.delivered; @@ -335,7 +339,7 @@ export function applyJobResult( job.updatedAtMs = result.endedAt; // Track consecutive errors for backoff / auto-disable. - if (result.status === "error") { + if (effectiveStatus === "error") { job.state.consecutiveErrors = (job.state.consecutiveErrors ?? 0) + 1; const alertConfig = resolveFailureAlert(state, job); if (alertConfig && job.state.consecutiveErrors >= alertConfig.after) { @@ -367,15 +371,15 @@ export function applyJobResult( } const shouldDelete = - job.schedule.kind === "at" && job.deleteAfterRun === true && result.status === "ok"; + job.schedule.kind === "at" && job.deleteAfterRun === true && effectiveStatus === "ok"; if (!shouldDelete) { if (job.schedule.kind === "at") { - if (result.status === "ok" || result.status === "skipped") { + if (effectiveStatus === "ok" || effectiveStatus === "skipped") { // One-shot done or skipped: disable to prevent tight-loop (#11452). job.enabled = false; job.state.nextRunAtMs = undefined; - } else if (result.status === "error") { + } else if (effectiveStatus === "error") { const retryConfig = resolveRetryConfig(state.deps.cronConfig); const transient = isTransientCronError(result.error, retryConfig.retryOn); // consecutiveErrors is always set to ≥1 by the increment block above. @@ -413,7 +417,7 @@ export function applyJobResult( ); } } - } else if (result.status === "error" && job.enabled) { + } else if (effectiveStatus === "error" && job.enabled) { // Apply exponential backoff for errored jobs to prevent retry storms. const backoff = errorBackoffMs(job.state.consecutiveErrors ?? 1); let normalNext: number | undefined;