From 2efa044a299ddd5e0c449ee80a1d572c885617f6 Mon Sep 17 00:00:00 2001 From: create Date: Fri, 20 Mar 2026 14:08:12 +0800 Subject: [PATCH 01/15] fix(cron): recompute next run after external schedule reload --- ...external-reload-schedule-recompute.test.ts | 79 +++++++++++++++++++ src/cron/service/store.ts | 74 ++++++++++++++++- 2 files changed, 151 insertions(+), 2 deletions(-) create mode 100644 src/cron/service.external-reload-schedule-recompute.test.ts diff --git a/src/cron/service.external-reload-schedule-recompute.test.ts b/src/cron/service.external-reload-schedule-recompute.test.ts new file mode 100644 index 00000000000..7c45d2cac29 --- /dev/null +++ b/src/cron/service.external-reload-schedule-recompute.test.ts @@ -0,0 +1,79 @@ +import fs from "node:fs/promises"; +import { describe, expect, it, vi } from "vitest"; +import { setupCronServiceSuite, writeCronStoreSnapshot } from "./service.test-harness.js"; +import { createCronServiceState } from "./service/state.js"; +import { ensureLoaded } from "./service/store.js"; +import type { CronJob } from "./types.js"; + +const { logger: noopLogger, makeStorePath } = setupCronServiceSuite({ + prefix: "openclaw-cron-external-reload-", + baseTimeIso: "2026-03-19T01:44:00.000Z", +}); + +describe("forceReload repairs externally changed cron schedules", () => { + it("recomputes nextRunAtMs when jobs.json changes schedule outside cron.update", async () => { + const store = await makeStorePath(); + const nowMs = Date.parse("2026-03-19T01:44:00.000Z"); + const jobId = "external-schedule-change"; + const staleNextRunAtMs = Date.parse("2026-03-20T00:30:00.000Z"); + const correctedNextRunAtMs = Date.parse("2026-03-19T12:30:00.000Z"); + + const createJob = (expr: string): CronJob => ({ + id: jobId, + name: "external schedule change", + enabled: true, + createdAtMs: Date.parse("2026-03-18T00:30:00.000Z"), + updatedAtMs: Date.parse("2026-03-19T01:44:00.000Z"), + schedule: { kind: "cron", expr, tz: "Asia/Shanghai", staggerMs: 0 }, + sessionTarget: "main", + wakeMode: "next-heartbeat", + payload: { kind: "systemEvent", text: "tick" }, + state: { + nextRunAtMs: staleNextRunAtMs, + lastRunAtMs: Date.parse("2026-03-19T00:30:00.000Z"), + lastStatus: "ok", + lastRunStatus: "ok", + }, + }); + + await writeCronStoreSnapshot({ + storePath: store.storePath, + jobs: [createJob("30 8 * * *")], + }); + + const state = createCronServiceState({ + cronEnabled: true, + storePath: store.storePath, + log: noopLogger, + nowMs: () => nowMs, + enqueueSystemEvent: vi.fn(), + requestHeartbeatNow: vi.fn(), + runIsolatedAgentJob: vi.fn(async () => ({ status: "ok" as const })), + }); + + await ensureLoaded(state, { skipRecompute: true }); + expect(state.store?.jobs[0]?.state.nextRunAtMs).toBe(staleNextRunAtMs); + + await writeCronStoreSnapshot({ + storePath: store.storePath, + jobs: [createJob("30 8,20 * * *")], + }); + + await ensureLoaded(state, { forceReload: true, skipRecompute: true }); + + const reloaded = state.store?.jobs.find((job) => job.id === jobId); + expect(reloaded?.schedule).toEqual({ + kind: "cron", + expr: "30 8,20 * * *", + tz: "Asia/Shanghai", + staggerMs: 0, + }); + expect(reloaded?.state.nextRunAtMs).toBe(correctedNextRunAtMs); + + const persisted = JSON.parse(await fs.readFile(store.storePath, "utf8")) as { + jobs?: Array<{ id: string; state?: { nextRunAtMs?: number } }>; + }; + const persistedJob = persisted.jobs?.find((job) => job.id === jobId); + expect(persistedJob?.state?.nextRunAtMs).toBe(correctedNextRunAtMs); + }); +}); diff --git a/src/cron/service/store.ts b/src/cron/service/store.ts index d1d36e48e08..37942ccd367 100644 --- a/src/cron/service/store.ts +++ b/src/cron/service/store.ts @@ -2,7 +2,7 @@ import fs from "node:fs"; import { normalizeStoredCronJobs } from "../store-migration.js"; import { loadCronStore, saveCronStore } from "../store.js"; import type { CronJob } from "../types.js"; -import { recomputeNextRuns } from "./jobs.js"; +import { computeJobNextRunAtMs, recomputeNextRuns } from "./jobs.js"; import type { CronServiceState } from "./state.js"; async function getFileMtimeMs(path: string): Promise { @@ -14,6 +14,71 @@ async function getFileMtimeMs(path: string): Promise { } } +function schedulesEqual(a: CronJob["schedule"], b: CronJob["schedule"]): boolean { + if (a.kind !== b.kind) { + return false; + } + if (a.kind === "at" && b.kind === "at") { + return a.at === b.at; + } + if (a.kind === "every" && b.kind === "every") { + return a.everyMs === b.everyMs && a.anchorMs === b.anchorMs; + } + if (a.kind === "cron" && b.kind === "cron") { + return a.expr === b.expr && a.tz === b.tz && a.staggerMs === b.staggerMs; + } + return false; +} + +function repairNextRunsAfterExternalReload(params: { + state: CronServiceState; + previousJobs: CronJob[] | undefined; +}): boolean { + const { state, previousJobs } = params; + if (!state.store || !previousJobs?.length) { + return false; + } + + const previousById = new Map(previousJobs.map((job) => [job.id, job])); + const now = state.deps.nowMs(); + let changed = false; + + for (const job of state.store.jobs) { + const previous = previousById.get(job.id); + if (!previous) { + continue; + } + + const scheduleChanged = !schedulesEqual(previous.schedule, job.schedule); + const enabledChanged = previous.enabled !== job.enabled; + if (!scheduleChanged && !enabledChanged) { + continue; + } + + const nextRunAtMs = job.enabled ? computeJobNextRunAtMs(job, now) : undefined; + if (job.state.nextRunAtMs !== nextRunAtMs) { + job.state.nextRunAtMs = nextRunAtMs; + changed = true; + } + if (!job.enabled && job.state.runningAtMs !== undefined) { + job.state.runningAtMs = undefined; + changed = true; + } + + state.deps.log.debug( + { + jobId: job.id, + scheduleChanged, + enabledChanged, + nextRunAtMs: job.state.nextRunAtMs, + }, + "cron: repaired nextRunAtMs after external reload", + ); + } + + return changed; +} + export async function ensureLoaded( state: CronServiceState, opts?: { @@ -31,6 +96,7 @@ export async function ensureLoaded( // Force reload always re-reads the file to avoid missing cross-service // edits on filesystems with coarse mtime resolution. + const previousJobs = state.store?.jobs; const fileMtimeMs = await getFileMtimeMs(state.deps.storePath); const loaded = await loadCronStore(state.deps.storePath); const jobs = (loaded.jobs ?? []) as unknown as Array>; @@ -38,12 +104,16 @@ export async function ensureLoaded( state.store = { version: 1, jobs: jobs as unknown as CronJob[] }; state.storeLoadedAtMs = state.deps.nowMs(); state.storeFileMtimeMs = fileMtimeMs; + const repairedExternalReload = repairNextRunsAfterExternalReload({ + state, + previousJobs, + }); if (!opts?.skipRecompute) { recomputeNextRuns(state); } - if (mutated) { + if (mutated || repairedExternalReload) { await persist(state, { skipBackup: true }); } } From d1623edb1863d49f9b25480e61deb5ecce06d02e Mon Sep 17 00:00:00 2001 From: create Date: Fri, 20 Mar 2026 14:34:22 +0800 Subject: [PATCH 02/15] fix(cron): handle invalid schedule reload errors --- ...external-reload-schedule-recompute.test.ts | 66 +++++++++++++++++++ src/cron/service/store.ts | 16 ++++- 2 files changed, 80 insertions(+), 2 deletions(-) diff --git a/src/cron/service.external-reload-schedule-recompute.test.ts b/src/cron/service.external-reload-schedule-recompute.test.ts index 7c45d2cac29..ef45c1c6058 100644 --- a/src/cron/service.external-reload-schedule-recompute.test.ts +++ b/src/cron/service.external-reload-schedule-recompute.test.ts @@ -76,4 +76,70 @@ describe("forceReload repairs externally changed cron schedules", () => { const persistedJob = persisted.jobs?.find((job) => job.id === jobId); expect(persistedJob?.state?.nextRunAtMs).toBe(correctedNextRunAtMs); }); + + it("records schedule errors instead of aborting reload when an external edit is invalid", async () => { + const store = await makeStorePath(); + const nowMs = Date.parse("2026-03-19T01:44:00.000Z"); + const jobId = "external-invalid-schedule"; + + const createJob = (expr: string): CronJob => ({ + id: jobId, + name: "external invalid schedule", + enabled: true, + createdAtMs: Date.parse("2026-03-18T00:30:00.000Z"), + updatedAtMs: Date.parse("2026-03-19T01:44:00.000Z"), + schedule: { kind: "cron", expr, tz: "Asia/Shanghai", staggerMs: 0 }, + sessionTarget: "main", + wakeMode: "next-heartbeat", + payload: { kind: "systemEvent", text: "tick" }, + state: { + nextRunAtMs: Date.parse("2026-03-20T00:30:00.000Z"), + lastRunAtMs: Date.parse("2026-03-19T00:30:00.000Z"), + lastStatus: "ok", + lastRunStatus: "ok", + }, + }); + + await writeCronStoreSnapshot({ + storePath: store.storePath, + jobs: [createJob("30 8 * * *")], + }); + + const state = createCronServiceState({ + cronEnabled: true, + storePath: store.storePath, + log: noopLogger, + nowMs: () => nowMs, + enqueueSystemEvent: vi.fn(), + requestHeartbeatNow: vi.fn(), + runIsolatedAgentJob: vi.fn(async () => ({ status: "ok" as const })), + }); + + await ensureLoaded(state, { skipRecompute: true }); + + await writeCronStoreSnapshot({ + storePath: store.storePath, + jobs: [createJob("not a valid cron")], + }); + + await expect( + ensureLoaded(state, { forceReload: true, skipRecompute: true }), + ).resolves.toBeUndefined(); + + const reloaded = state.store?.jobs.find((job) => job.id === jobId); + expect(reloaded?.state.nextRunAtMs).toBeUndefined(); + expect(reloaded?.state.scheduleErrorCount).toBe(1); + expect(reloaded?.state.lastError).toMatch(/^schedule error:/); + + const persisted = JSON.parse(await fs.readFile(store.storePath, "utf8")) as { + jobs?: Array<{ + id: string; + state?: { scheduleErrorCount?: number; lastError?: string; nextRunAtMs?: number }; + }>; + }; + const persistedJob = persisted.jobs?.find((job) => job.id === jobId); + expect(persistedJob?.state?.scheduleErrorCount).toBe(1); + expect(persistedJob?.state?.lastError).toMatch(/^schedule error:/); + expect(persistedJob?.state?.nextRunAtMs).toBeUndefined(); + }); }); diff --git a/src/cron/service/store.ts b/src/cron/service/store.ts index 37942ccd367..47f5db9c8cb 100644 --- a/src/cron/service/store.ts +++ b/src/cron/service/store.ts @@ -2,7 +2,7 @@ import fs from "node:fs"; import { normalizeStoredCronJobs } from "../store-migration.js"; import { loadCronStore, saveCronStore } from "../store.js"; import type { CronJob } from "../types.js"; -import { computeJobNextRunAtMs, recomputeNextRuns } from "./jobs.js"; +import { computeJobNextRunAtMs, recordScheduleComputeError, recomputeNextRuns } from "./jobs.js"; import type { CronServiceState } from "./state.js"; async function getFileMtimeMs(path: string): Promise { @@ -55,7 +55,19 @@ function repairNextRunsAfterExternalReload(params: { continue; } - const nextRunAtMs = job.enabled ? computeJobNextRunAtMs(job, now) : undefined; + let nextRunAtMs: number | undefined; + try { + nextRunAtMs = job.enabled ? computeJobNextRunAtMs(job, now) : undefined; + if (job.state.scheduleErrorCount !== undefined) { + job.state.scheduleErrorCount = undefined; + changed = true; + } + } catch (err) { + if (recordScheduleComputeError({ state, job, err })) { + changed = true; + } + continue; + } if (job.state.nextRunAtMs !== nextRunAtMs) { job.state.nextRunAtMs = nextRunAtMs; changed = true; From ce43e39dcb343dcb0977207fd15fc03a392950f1 Mon Sep 17 00:00:00 2001 From: create Date: Fri, 20 Mar 2026 14:58:57 +0800 Subject: [PATCH 03/15] fix(cron): avoid double-counting external reload schedule errors --- ...external-reload-schedule-recompute.test.ts | 105 ++++++++++++++++++ src/cron/service/jobs.ts | 12 ++ src/cron/service/state.ts | 2 + src/cron/service/store.ts | 8 ++ 4 files changed, 127 insertions(+) diff --git a/src/cron/service.external-reload-schedule-recompute.test.ts b/src/cron/service.external-reload-schedule-recompute.test.ts index ef45c1c6058..cc887dfc89b 100644 --- a/src/cron/service.external-reload-schedule-recompute.test.ts +++ b/src/cron/service.external-reload-schedule-recompute.test.ts @@ -1,6 +1,7 @@ import fs from "node:fs/promises"; import { describe, expect, it, vi } from "vitest"; import { setupCronServiceSuite, writeCronStoreSnapshot } from "./service.test-harness.js"; +import { recomputeNextRuns, recomputeNextRunsForMaintenance } from "./service/jobs.js"; import { createCronServiceState } from "./service/state.js"; import { ensureLoaded } from "./service/store.js"; import type { CronJob } from "./types.js"; @@ -142,4 +143,108 @@ describe("forceReload repairs externally changed cron schedules", () => { expect(persistedJob?.state?.lastError).toMatch(/^schedule error:/); expect(persistedJob?.state?.nextRunAtMs).toBeUndefined(); }); + + it("does not double-count a reload schedule error during the immediate full recompute", async () => { + const store = await makeStorePath(); + const nowMs = Date.parse("2026-03-19T01:44:00.000Z"); + const jobId = "external-invalid-schedule-full-recompute"; + + const createJob = (expr: string): CronJob => ({ + id: jobId, + name: "external invalid schedule full recompute", + enabled: true, + createdAtMs: Date.parse("2026-03-18T00:30:00.000Z"), + updatedAtMs: Date.parse("2026-03-19T01:44:00.000Z"), + schedule: { kind: "cron", expr, tz: "Asia/Shanghai", staggerMs: 0 }, + sessionTarget: "main", + wakeMode: "next-heartbeat", + payload: { kind: "systemEvent", text: "tick" }, + state: { + nextRunAtMs: Date.parse("2026-03-20T00:30:00.000Z"), + }, + }); + + await writeCronStoreSnapshot({ + storePath: store.storePath, + jobs: [createJob("30 8 * * *")], + }); + + const state = createCronServiceState({ + cronEnabled: true, + storePath: store.storePath, + log: noopLogger, + nowMs: () => nowMs, + enqueueSystemEvent: vi.fn(), + requestHeartbeatNow: vi.fn(), + runIsolatedAgentJob: vi.fn(async () => ({ status: "ok" as const })), + }); + + await ensureLoaded(state, { skipRecompute: true }); + + await writeCronStoreSnapshot({ + storePath: store.storePath, + jobs: [createJob("not a valid cron")], + }); + + await ensureLoaded(state, { forceReload: true, skipRecompute: true }); + expect(state.store?.jobs[0]?.state.scheduleErrorCount).toBe(1); + + recomputeNextRuns(state); + expect(state.store?.jobs[0]?.state.scheduleErrorCount).toBe(1); + + recomputeNextRuns(state); + expect(state.store?.jobs[0]?.state.scheduleErrorCount).toBe(2); + }); + + it("does not double-count a reload schedule error during immediate maintenance recompute", async () => { + const store = await makeStorePath(); + const nowMs = Date.parse("2026-03-19T01:44:00.000Z"); + const jobId = "external-invalid-schedule-maintenance"; + + const createJob = (expr: string): CronJob => ({ + id: jobId, + name: "external invalid schedule maintenance", + enabled: true, + createdAtMs: Date.parse("2026-03-18T00:30:00.000Z"), + updatedAtMs: Date.parse("2026-03-19T01:44:00.000Z"), + schedule: { kind: "cron", expr, tz: "Asia/Shanghai", staggerMs: 0 }, + sessionTarget: "main", + wakeMode: "next-heartbeat", + payload: { kind: "systemEvent", text: "tick" }, + state: { + nextRunAtMs: Date.parse("2026-03-20T00:30:00.000Z"), + }, + }); + + await writeCronStoreSnapshot({ + storePath: store.storePath, + jobs: [createJob("30 8 * * *")], + }); + + const state = createCronServiceState({ + cronEnabled: true, + storePath: store.storePath, + log: noopLogger, + nowMs: () => nowMs, + enqueueSystemEvent: vi.fn(), + requestHeartbeatNow: vi.fn(), + runIsolatedAgentJob: vi.fn(async () => ({ status: "ok" as const })), + }); + + await ensureLoaded(state, { skipRecompute: true }); + + await writeCronStoreSnapshot({ + storePath: store.storePath, + jobs: [createJob("not a valid cron")], + }); + + await ensureLoaded(state, { forceReload: true, skipRecompute: true }); + expect(state.store?.jobs[0]?.state.scheduleErrorCount).toBe(1); + + recomputeNextRunsForMaintenance(state); + expect(state.store?.jobs[0]?.state.scheduleErrorCount).toBe(1); + + recomputeNextRunsForMaintenance(state); + expect(state.store?.jobs[0]?.state.scheduleErrorCount).toBe(2); + }); }); diff --git a/src/cron/service/jobs.ts b/src/cron/service/jobs.ts index 542ba81053d..8f853ea8e92 100644 --- a/src/cron/service/jobs.ts +++ b/src/cron/service/jobs.ts @@ -413,7 +413,19 @@ function walkSchedulableJobs( return changed; } +function consumeSkipNextReloadRepairRecompute(state: CronServiceState, jobId: string): boolean { + const pending = state.skipNextReloadRepairRecomputeJobIds; + if (!pending?.has(jobId)) { + return false; + } + pending.delete(jobId); + return true; +} + function recomputeJobNextRunAtMs(params: { state: CronServiceState; job: CronJob; nowMs: number }) { + if (consumeSkipNextReloadRepairRecompute(params.state, params.job.id)) { + return false; + } let changed = false; try { const newNext = computeJobNextRunAtMs(params.job, params.nowMs); diff --git a/src/cron/service/state.ts b/src/cron/service/state.ts index 073efd8f459..d51464c29d2 100644 --- a/src/cron/service/state.ts +++ b/src/cron/service/state.ts @@ -127,6 +127,7 @@ export type CronServiceState = { warnedDisabled: boolean; storeLoadedAtMs: number | null; storeFileMtimeMs: number | null; + skipNextReloadRepairRecomputeJobIds?: Set; }; export function createCronServiceState(deps: CronServiceDeps): CronServiceState { @@ -139,6 +140,7 @@ export function createCronServiceState(deps: CronServiceDeps): CronServiceState warnedDisabled: false, storeLoadedAtMs: null, storeFileMtimeMs: null, + skipNextReloadRepairRecomputeJobIds: new Set(), }; } diff --git a/src/cron/service/store.ts b/src/cron/service/store.ts index 47f5db9c8cb..640f2ae9516 100644 --- a/src/cron/service/store.ts +++ b/src/cron/service/store.ts @@ -30,11 +30,17 @@ function schedulesEqual(a: CronJob["schedule"], b: CronJob["schedule"]): boolean return false; } +function getSkipNextReloadRepairRecomputeJobIds(state: CronServiceState): Set { + return (state.skipNextReloadRepairRecomputeJobIds ??= new Set()); +} + function repairNextRunsAfterExternalReload(params: { state: CronServiceState; previousJobs: CronJob[] | undefined; }): boolean { const { state, previousJobs } = params; + const skipRecomputeJobIds = getSkipNextReloadRepairRecomputeJobIds(state); + skipRecomputeJobIds.clear(); if (!state.store || !previousJobs?.length) { return false; } @@ -55,6 +61,7 @@ function repairNextRunsAfterExternalReload(params: { continue; } + skipRecomputeJobIds.delete(job.id); let nextRunAtMs: number | undefined; try { nextRunAtMs = job.enabled ? computeJobNextRunAtMs(job, now) : undefined; @@ -66,6 +73,7 @@ function repairNextRunsAfterExternalReload(params: { if (recordScheduleComputeError({ state, job, err })) { changed = true; } + skipRecomputeJobIds.add(job.id); continue; } if (job.state.nextRunAtMs !== nextRunAtMs) { From a42cd9473c8bbb2c63b4aa114b9729ba4346f85b Mon Sep 17 00:00:00 2001 From: create Date: Fri, 20 Mar 2026 15:43:15 +0800 Subject: [PATCH 04/15] fix(cron): preserve reload skip across repeated force reloads --- ...external-reload-schedule-recompute.test.ts | 55 +++++++++++++++++++ src/cron/service/store.ts | 9 ++- 2 files changed, 63 insertions(+), 1 deletion(-) diff --git a/src/cron/service.external-reload-schedule-recompute.test.ts b/src/cron/service.external-reload-schedule-recompute.test.ts index cc887dfc89b..95bed250642 100644 --- a/src/cron/service.external-reload-schedule-recompute.test.ts +++ b/src/cron/service.external-reload-schedule-recompute.test.ts @@ -247,4 +247,59 @@ describe("forceReload repairs externally changed cron schedules", () => { recomputeNextRunsForMaintenance(state); expect(state.store?.jobs[0]?.state.scheduleErrorCount).toBe(2); }); + + it("preserves the one-shot skip across a second forceReload before maintenance recompute", async () => { + const store = await makeStorePath(); + const nowMs = Date.parse("2026-03-19T01:44:00.000Z"); + const jobId = "external-invalid-schedule-second-reload"; + + const createJob = (expr: string): CronJob => ({ + id: jobId, + name: "external invalid schedule second reload", + enabled: true, + createdAtMs: Date.parse("2026-03-18T00:30:00.000Z"), + updatedAtMs: Date.parse("2026-03-19T01:44:00.000Z"), + schedule: { kind: "cron", expr, tz: "Asia/Shanghai", staggerMs: 0 }, + sessionTarget: "main", + wakeMode: "next-heartbeat", + payload: { kind: "systemEvent", text: "tick" }, + state: { + nextRunAtMs: Date.parse("2026-03-20T00:30:00.000Z"), + }, + }); + + await writeCronStoreSnapshot({ + storePath: store.storePath, + jobs: [createJob("30 8 * * *")], + }); + + const state = createCronServiceState({ + cronEnabled: true, + storePath: store.storePath, + log: noopLogger, + nowMs: () => nowMs, + enqueueSystemEvent: vi.fn(), + requestHeartbeatNow: vi.fn(), + runIsolatedAgentJob: vi.fn(async () => ({ status: "ok" as const })), + }); + + await ensureLoaded(state, { skipRecompute: true }); + + await writeCronStoreSnapshot({ + storePath: store.storePath, + jobs: [createJob("not a valid cron")], + }); + + await ensureLoaded(state, { forceReload: true, skipRecompute: true }); + expect(state.store?.jobs[0]?.state.scheduleErrorCount).toBe(1); + + await ensureLoaded(state, { forceReload: true, skipRecompute: true }); + expect(state.store?.jobs[0]?.state.scheduleErrorCount).toBe(1); + + recomputeNextRunsForMaintenance(state); + expect(state.store?.jobs[0]?.state.scheduleErrorCount).toBe(1); + + recomputeNextRunsForMaintenance(state); + expect(state.store?.jobs[0]?.state.scheduleErrorCount).toBe(2); + }); }); diff --git a/src/cron/service/store.ts b/src/cron/service/store.ts index 640f2ae9516..470b7178522 100644 --- a/src/cron/service/store.ts +++ b/src/cron/service/store.ts @@ -40,10 +40,17 @@ function repairNextRunsAfterExternalReload(params: { }): boolean { const { state, previousJobs } = params; const skipRecomputeJobIds = getSkipNextReloadRepairRecomputeJobIds(state); - skipRecomputeJobIds.clear(); if (!state.store || !previousJobs?.length) { return false; } + if (skipRecomputeJobIds.size > 0) { + const currentJobIds = new Set(state.store.jobs.map((job) => job.id)); + for (const jobId of skipRecomputeJobIds) { + if (!currentJobIds.has(jobId)) { + skipRecomputeJobIds.delete(jobId); + } + } + } const previousById = new Map(previousJobs.map((job) => [job.id, job])); const now = state.deps.nowMs(); From c3338cdb9f864bda47ae253402ffd0a766739bf6 Mon Sep 17 00:00:00 2001 From: create Date: Fri, 20 Mar 2026 16:25:26 +0800 Subject: [PATCH 05/15] fix(cron): preserve reload repairs in manual run and apply paths --- ...external-reload-schedule-recompute.test.ts | 94 ++++++++++++++++ src/cron/service.issue-regressions.test.ts | 44 +++++++- src/cron/service/jobs.ts | 13 ++- src/cron/service/ops.ts | 24 ++++- src/cron/service/timer.ts | 100 ++++++++++-------- 5 files changed, 219 insertions(+), 56 deletions(-) diff --git a/src/cron/service.external-reload-schedule-recompute.test.ts b/src/cron/service.external-reload-schedule-recompute.test.ts index 95bed250642..7fde2d75d3c 100644 --- a/src/cron/service.external-reload-schedule-recompute.test.ts +++ b/src/cron/service.external-reload-schedule-recompute.test.ts @@ -2,6 +2,7 @@ import fs from "node:fs/promises"; import { describe, expect, it, vi } from "vitest"; import { setupCronServiceSuite, writeCronStoreSnapshot } from "./service.test-harness.js"; import { recomputeNextRuns, recomputeNextRunsForMaintenance } from "./service/jobs.js"; +import { run } from "./service/ops.js"; import { createCronServiceState } from "./service/state.js"; import { ensureLoaded } from "./service/store.js"; import type { CronJob } from "./types.js"; @@ -302,4 +303,97 @@ describe("forceReload repairs externally changed cron schedules", () => { recomputeNextRunsForMaintenance(state); expect(state.store?.jobs[0]?.state.scheduleErrorCount).toBe(2); }); + + it("keeps forceReload repairs when manual-run snapshot is merged back", async () => { + const store = await makeStorePath(); + let nowMs = Date.parse("2026-03-19T01:44:00.000Z"); + const jobId = "manual-run-reload-merge"; + const staleNextRunAtMs = Date.parse("2026-03-19T23:30:00.000Z"); + + const createJob = (params: { + expr: string; + enabled: boolean; + nextRunAtMs?: number; + lastStatus?: CronJob["state"]["lastStatus"]; + }): CronJob => ({ + id: jobId, + name: "manual run reload merge", + enabled: params.enabled, + createdAtMs: Date.parse("2026-03-18T00:30:00.000Z"), + updatedAtMs: Date.parse("2026-03-19T01:44:00.000Z"), + schedule: { kind: "cron", expr: params.expr, tz: "Asia/Shanghai", staggerMs: 0 }, + sessionTarget: "isolated", + wakeMode: "next-heartbeat", + payload: { kind: "agentTurn", message: "tick" }, + state: { + nextRunAtMs: params.nextRunAtMs, + lastStatus: params.lastStatus, + }, + }); + + await writeCronStoreSnapshot({ + storePath: store.storePath, + jobs: [ + createJob({ + expr: "30 23 * * *", + enabled: true, + nextRunAtMs: staleNextRunAtMs, + }), + ], + }); + + const runIsolatedAgentJob = vi.fn(async () => { + await writeCronStoreSnapshot({ + storePath: store.storePath, + jobs: [ + createJob({ + expr: "30 8 * * *", + enabled: false, + nextRunAtMs: staleNextRunAtMs, + lastStatus: "error", + }), + ], + }); + nowMs += 500; + return { status: "ok" as const, summary: "done" }; + }); + + const state = createCronServiceState({ + cronEnabled: true, + storePath: store.storePath, + log: noopLogger, + nowMs: () => nowMs, + enqueueSystemEvent: vi.fn(), + requestHeartbeatNow: vi.fn(), + runIsolatedAgentJob, + }); + + const result = await run(state, jobId, "force"); + expect(result).toEqual({ ok: true, ran: true }); + expect(runIsolatedAgentJob).toHaveBeenCalledTimes(1); + + const merged = state.store?.jobs.find((job) => job.id === jobId); + expect(merged?.schedule).toEqual({ + kind: "cron", + expr: "30 8 * * *", + tz: "Asia/Shanghai", + staggerMs: 0, + }); + expect(merged?.enabled).toBe(false); + expect(merged?.state.nextRunAtMs).toBeUndefined(); + expect(merged?.state.lastStatus).toBe("ok"); + expect(merged?.state.lastRunAtMs).toBeDefined(); + + const persisted = JSON.parse(await fs.readFile(store.storePath, "utf8")) as { + jobs?: Array<{ + id: string; + enabled?: boolean; + state?: { nextRunAtMs?: number; lastStatus?: string }; + }>; + }; + const persistedJob = persisted.jobs?.find((job) => job.id === jobId); + expect(persistedJob?.enabled).toBe(false); + expect(persistedJob?.state?.nextRunAtMs).toBeUndefined(); + expect(persistedJob?.state?.lastStatus).toBe("ok"); + }); }); diff --git a/src/cron/service.issue-regressions.test.ts b/src/cron/service.issue-regressions.test.ts index dac28f4b0c9..e29a059f39d 100644 --- a/src/cron/service.issue-regressions.test.ts +++ b/src/cron/service.issue-regressions.test.ts @@ -22,7 +22,7 @@ import { createNoopLogger, createRunningCronServiceState, } from "./service.test-harness.js"; -import { computeJobNextRunAtMs } from "./service/jobs.js"; +import { computeJobNextRunAtMs, recomputeNextRunsForMaintenance } from "./service/jobs.js"; import { enqueueRun, run } from "./service/ops.js"; import { createCronServiceState, type CronEvent } from "./service/state.js"; import { @@ -1748,6 +1748,48 @@ describe("Cron issue regressions", () => { expect(job.enabled).toBe(true); }); + it("does not double-count reload schedule errors in apply path before maintenance recompute", () => { + const startedAt = Date.parse("2026-03-02T12:10:00.000Z"); + const endedAt = startedAt + 25; + const job = createIsolatedRegressionJob({ + id: "apply-result-reload-dedupe-30905", + name: "apply-result-reload-dedupe-30905", + scheduledAt: startedAt, + schedule: { kind: "cron", expr: "0 7 * * *", tz: "Invalid/Timezone" }, + payload: { kind: "agentTurn", message: "ping" }, + state: { + nextRunAtMs: undefined, + runningAtMs: startedAt - 500, + scheduleErrorCount: 1, + lastError: "schedule error: previous", + }, + }); + const state = createRunningCronServiceState({ + storePath: "/tmp/cron-30905-reload-dedupe.json", + log: noopLogger as never, + nowMs: () => endedAt, + jobs: [job], + }); + state.skipNextReloadRepairRecomputeJobIds = new Set([job.id]); + + applyJobResult(state, job, { + status: "ok", + delivered: true, + startedAt, + endedAt, + }); + + expect(job.state.scheduleErrorCount).toBe(1); + expect(state.skipNextReloadRepairRecomputeJobIds?.has(job.id)).toBe(true); + + recomputeNextRunsForMaintenance(state); + expect(job.state.scheduleErrorCount).toBe(1); + expect(state.skipNextReloadRepairRecomputeJobIds?.has(job.id)).toBe(false); + + recomputeNextRunsForMaintenance(state); + expect(job.state.scheduleErrorCount).toBe(2); + }); + it("force run preserves 'every' anchor while recording manual lastRunAtMs", () => { const nowMs = Date.now(); const everyMs = 24 * 60 * 60 * 1_000; diff --git a/src/cron/service/jobs.ts b/src/cron/service/jobs.ts index 8f853ea8e92..db90f1502f1 100644 --- a/src/cron/service/jobs.ts +++ b/src/cron/service/jobs.ts @@ -413,12 +413,19 @@ function walkSchedulableJobs( return changed; } -function consumeSkipNextReloadRepairRecompute(state: CronServiceState, jobId: string): boolean { +export function hasSkipNextReloadRepairRecompute(state: CronServiceState, jobId: string): boolean { const pending = state.skipNextReloadRepairRecomputeJobIds; - if (!pending?.has(jobId)) { + return pending?.has(jobId) === true; +} + +export function consumeSkipNextReloadRepairRecompute( + state: CronServiceState, + jobId: string, +): boolean { + if (!hasSkipNextReloadRepairRecompute(state, jobId)) { return false; } - pending.delete(jobId); + state.skipNextReloadRepairRecomputeJobIds?.delete(jobId); return true; } diff --git a/src/cron/service/ops.ts b/src/cron/service/ops.ts index 69751e4dfdb..e932e666cae 100644 --- a/src/cron/service/ops.ts +++ b/src/cron/service/ops.ts @@ -51,7 +51,6 @@ function mergeManualRunSnapshotAfterReload(params: { state: CronServiceState; jobId: string; snapshot: { - enabled: boolean; updatedAtMs: number; state: CronJob["state"]; } | null; @@ -71,9 +70,25 @@ function mergeManualRunSnapshotAfterReload(params: { if (!reloaded) { return; } - reloaded.enabled = params.snapshot.enabled; - reloaded.updatedAtMs = params.snapshot.updatedAtMs; - reloaded.state = params.snapshot.state; + const preservedEnabled = reloaded.enabled; + const preservedNextRunAtMs = reloaded.state.nextRunAtMs; + const preservedScheduleErrorCount = reloaded.state.scheduleErrorCount; + const preservedScheduleErrorText = reloaded.state.lastError; + + reloaded.updatedAtMs = Math.max(reloaded.updatedAtMs, params.snapshot.updatedAtMs); + reloaded.state = { + ...reloaded.state, + ...params.snapshot.state, + }; + + // Keep externally reloaded schedule/enable repairs even when a manual run + // snapshot was captured before forceReload. + reloaded.enabled = preservedEnabled; + reloaded.state.nextRunAtMs = preservedNextRunAtMs; + if (preservedScheduleErrorCount !== undefined) { + reloaded.state.scheduleErrorCount = preservedScheduleErrorCount; + reloaded.state.lastError = preservedScheduleErrorText; + } } async function ensureLoadedForRead(state: CronServiceState) { @@ -519,7 +534,6 @@ async function finishPreparedManualRun( const postRunSnapshot = shouldDelete ? null : { - enabled: job.enabled, updatedAtMs: job.updatedAtMs, state: structuredClone(job.state), }; diff --git a/src/cron/service/timer.ts b/src/cron/service/timer.ts index e12c4ae38e7..b3c130b124b 100644 --- a/src/cron/service/timer.ts +++ b/src/cron/service/timer.ts @@ -15,6 +15,7 @@ import type { import { computeJobPreviousRunAtMs, computeJobNextRunAtMs, + hasSkipNextReloadRepairRecompute, nextWakeAtMs, recomputeNextRunsForMaintenance, recordScheduleComputeError, @@ -368,6 +369,7 @@ export function applyJobResult( const shouldDelete = job.schedule.kind === "at" && job.deleteAfterRun === true && result.status === "ok"; + const skipImmediateScheduleRecompute = hasSkipNextReloadRepairRecompute(state, job.id); if (!shouldDelete) { if (job.schedule.kind === "at") { @@ -416,54 +418,58 @@ export function applyJobResult( } else if (result.status === "error" && job.enabled) { // Apply exponential backoff for errored jobs to prevent retry storms. const backoff = errorBackoffMs(job.state.consecutiveErrors ?? 1); - let normalNext: number | undefined; - try { - normalNext = - opts?.preserveSchedule && job.schedule.kind === "every" - ? computeNextWithPreservedLastRun(result.endedAt) - : computeJobNextRunAtMs(job, result.endedAt); - } catch (err) { - // If the schedule expression/timezone throws (croner edge cases), - // record the schedule error (auto-disables after repeated failures) - // and fall back to backoff-only schedule so the state update is not lost. - recordScheduleComputeError({ state, job, err }); - } - const backoffNext = result.endedAt + backoff; - // Use whichever is later: the natural next run or the backoff delay. - job.state.nextRunAtMs = - normalNext !== undefined ? Math.max(normalNext, backoffNext) : backoffNext; - state.deps.log.info( - { - jobId: job.id, - consecutiveErrors: job.state.consecutiveErrors, - backoffMs: backoff, - nextRunAtMs: job.state.nextRunAtMs, - }, - "cron: applying error backoff", - ); - } else if (job.enabled) { - let naturalNext: number | undefined; - try { - naturalNext = - opts?.preserveSchedule && job.schedule.kind === "every" - ? computeNextWithPreservedLastRun(result.endedAt) - : computeJobNextRunAtMs(job, result.endedAt); - } catch (err) { - // If the schedule expression/timezone throws (croner edge cases), - // record the schedule error (auto-disables after repeated failures) - // so a persistent throw doesn't cause a MIN_REFIRE_GAP_MS hot loop. - recordScheduleComputeError({ state, job, err }); - } - if (job.schedule.kind === "cron") { - // Safety net: ensure the next fire is at least MIN_REFIRE_GAP_MS - // after the current run ended. Prevents spin-loops when the - // schedule computation lands in the same second due to - // timezone/croner edge cases (see #17821). - const minNext = result.endedAt + MIN_REFIRE_GAP_MS; + if (!skipImmediateScheduleRecompute) { + let normalNext: number | undefined; + try { + normalNext = + opts?.preserveSchedule && job.schedule.kind === "every" + ? computeNextWithPreservedLastRun(result.endedAt) + : computeJobNextRunAtMs(job, result.endedAt); + } catch (err) { + // If the schedule expression/timezone throws (croner edge cases), + // record the schedule error (auto-disables after repeated failures) + // and fall back to backoff-only schedule so the state update is not lost. + recordScheduleComputeError({ state, job, err }); + } + const backoffNext = result.endedAt + backoff; + // Use whichever is later: the natural next run or the backoff delay. job.state.nextRunAtMs = - naturalNext !== undefined ? Math.max(naturalNext, minNext) : minNext; - } else { - job.state.nextRunAtMs = naturalNext; + normalNext !== undefined ? Math.max(normalNext, backoffNext) : backoffNext; + state.deps.log.info( + { + jobId: job.id, + consecutiveErrors: job.state.consecutiveErrors, + backoffMs: backoff, + nextRunAtMs: job.state.nextRunAtMs, + }, + "cron: applying error backoff", + ); + } + } else if (job.enabled) { + if (!skipImmediateScheduleRecompute) { + let naturalNext: number | undefined; + try { + naturalNext = + opts?.preserveSchedule && job.schedule.kind === "every" + ? computeNextWithPreservedLastRun(result.endedAt) + : computeJobNextRunAtMs(job, result.endedAt); + } catch (err) { + // If the schedule expression/timezone throws (croner edge cases), + // record the schedule error (auto-disables after repeated failures) + // so a persistent throw doesn't cause a MIN_REFIRE_GAP_MS hot loop. + recordScheduleComputeError({ state, job, err }); + } + if (job.schedule.kind === "cron") { + // Safety net: ensure the next fire is at least MIN_REFIRE_GAP_MS + // after the current run ended. Prevents spin-loops when the + // schedule computation lands in the same second due to + // timezone/croner edge cases (see #17821). + const minNext = result.endedAt + MIN_REFIRE_GAP_MS; + job.state.nextRunAtMs = + naturalNext !== undefined ? Math.max(naturalNext, minNext) : minNext; + } else { + job.state.nextRunAtMs = naturalNext; + } } } else { job.state.nextRunAtMs = undefined; From 9f9e6b7cfe0954af8101c9a2e6b8849996d7faf0 Mon Sep 17 00:00:00 2001 From: create Date: Fri, 20 Mar 2026 16:59:33 +0800 Subject: [PATCH 06/15] fix(cron): handle latest review edge cases in manual merge and apply wake --- ...external-reload-schedule-recompute.test.ts | 60 +++++++++++++++++++ src/cron/service.issue-regressions.test.ts | 41 ++++++++++++- src/cron/service/ops.ts | 53 +++++++++++++--- src/cron/service/timer.ts | 12 +++- 4 files changed, 155 insertions(+), 11 deletions(-) diff --git a/src/cron/service.external-reload-schedule-recompute.test.ts b/src/cron/service.external-reload-schedule-recompute.test.ts index 7fde2d75d3c..29029c3a197 100644 --- a/src/cron/service.external-reload-schedule-recompute.test.ts +++ b/src/cron/service.external-reload-schedule-recompute.test.ts @@ -396,4 +396,64 @@ describe("forceReload repairs externally changed cron schedules", () => { expect(persistedJob?.state?.nextRunAtMs).toBeUndefined(); expect(persistedJob?.state?.lastStatus).toBe("ok"); }); + + it("keeps one-shot terminal disable state when manual force-run reloads unchanged store", async () => { + const store = await makeStorePath(); + let nowMs = Date.parse("2026-03-19T01:44:00.000Z"); + const jobId = "manual-run-at-terminal-state"; + const scheduledAtMs = nowMs + 60_000; + + const createJob = (): CronJob => ({ + id: jobId, + name: "manual run at terminal state", + enabled: true, + createdAtMs: Date.parse("2026-03-18T00:30:00.000Z"), + updatedAtMs: Date.parse("2026-03-19T01:44:00.000Z"), + schedule: { kind: "at", at: new Date(scheduledAtMs).toISOString() }, + sessionTarget: "isolated", + wakeMode: "next-heartbeat", + payload: { kind: "agentTurn", message: "tick" }, + state: { + nextRunAtMs: scheduledAtMs, + }, + }); + + await writeCronStoreSnapshot({ + storePath: store.storePath, + jobs: [createJob()], + }); + + const state = createCronServiceState({ + cronEnabled: true, + storePath: store.storePath, + log: noopLogger, + nowMs: () => nowMs, + enqueueSystemEvent: vi.fn(), + requestHeartbeatNow: vi.fn(), + runIsolatedAgentJob: vi.fn(async () => { + nowMs += 500; + return { status: "ok" as const, summary: "done" }; + }), + }); + + const result = await run(state, jobId, "force"); + expect(result).toEqual({ ok: true, ran: true }); + + const merged = state.store?.jobs.find((job) => job.id === jobId); + expect(merged?.enabled).toBe(false); + expect(merged?.state.nextRunAtMs).toBeUndefined(); + expect(merged?.state.lastStatus).toBe("ok"); + + const persisted = JSON.parse(await fs.readFile(store.storePath, "utf8")) as { + jobs?: Array<{ + id: string; + enabled?: boolean; + state?: { nextRunAtMs?: number; lastStatus?: string }; + }>; + }; + const persistedJob = persisted.jobs?.find((job) => job.id === jobId); + expect(persistedJob?.enabled).toBe(false); + expect(persistedJob?.state?.nextRunAtMs).toBeUndefined(); + expect(persistedJob?.state?.lastStatus).toBe("ok"); + }); }); diff --git a/src/cron/service.issue-regressions.test.ts b/src/cron/service.issue-regressions.test.ts index e29a059f39d..c8078672b97 100644 --- a/src/cron/service.issue-regressions.test.ts +++ b/src/cron/service.issue-regressions.test.ts @@ -1780,14 +1780,51 @@ describe("Cron issue regressions", () => { }); expect(job.state.scheduleErrorCount).toBe(1); - expect(state.skipNextReloadRepairRecomputeJobIds?.has(job.id)).toBe(true); + expect(state.skipNextReloadRepairRecomputeJobIds?.has(job.id)).toBe(false); recomputeNextRunsForMaintenance(state); expect(job.state.scheduleErrorCount).toBe(1); expect(state.skipNextReloadRepairRecomputeJobIds?.has(job.id)).toBe(false); + }); + + it("keeps a future wake when apply skips immediate recompute after reload schedule error", () => { + const startedAt = Date.parse("2026-03-02T12:12:00.000Z"); + const endedAt = startedAt + 25; + const job = createIsolatedRegressionJob({ + id: "apply-result-reload-wake-30905", + name: "apply-result-reload-wake-30905", + scheduledAt: startedAt, + schedule: { kind: "cron", expr: "0 7 * * *", tz: "Invalid/Timezone" }, + payload: { kind: "agentTurn", message: "ping" }, + state: { + nextRunAtMs: undefined, + runningAtMs: startedAt - 500, + scheduleErrorCount: 1, + lastError: "schedule error: previous", + }, + }); + const state = createRunningCronServiceState({ + storePath: "/tmp/cron-30905-reload-wake.json", + log: noopLogger as never, + nowMs: () => endedAt, + jobs: [job], + }); + state.skipNextReloadRepairRecomputeJobIds = new Set([job.id]); + + applyJobResult(state, job, { + status: "error", + error: "synthetic failure", + startedAt, + endedAt, + }); + + expect(job.state.scheduleErrorCount).toBe(1); + expect(job.state.nextRunAtMs).toBe(endedAt + 30_000); + expect(state.skipNextReloadRepairRecomputeJobIds?.has(job.id)).toBe(false); recomputeNextRunsForMaintenance(state); - expect(job.state.scheduleErrorCount).toBe(2); + expect(job.state.scheduleErrorCount).toBe(1); + expect(state.skipNextReloadRepairRecomputeJobIds?.has(job.id)).toBe(false); }); it("force run preserves 'every' anchor while recording manual lastRunAtMs", () => { diff --git a/src/cron/service/ops.ts b/src/cron/service/ops.ts index e932e666cae..58b12933839 100644 --- a/src/cron/service/ops.ts +++ b/src/cron/service/ops.ts @@ -47,13 +47,35 @@ export type CronListPageResult = { hasMore: boolean; nextOffset: number | null; }; + +function schedulesEqual(a: CronJob["schedule"], b: CronJob["schedule"]): boolean { + if (a.kind !== b.kind) { + return false; + } + if (a.kind === "at" && b.kind === "at") { + return a.at === b.at; + } + if (a.kind === "every" && b.kind === "every") { + return a.everyMs === b.everyMs && a.anchorMs === b.anchorMs; + } + if (a.kind === "cron" && b.kind === "cron") { + return a.expr === b.expr && a.tz === b.tz && a.staggerMs === b.staggerMs; + } + return false; +} + function mergeManualRunSnapshotAfterReload(params: { state: CronServiceState; jobId: string; snapshot: { + enabled: boolean; updatedAtMs: number; state: CronJob["state"]; } | null; + baseline: { + enabled: boolean; + schedule: CronJob["schedule"]; + } | null; removed: boolean; }) { if (!params.state.store) { @@ -74,6 +96,10 @@ function mergeManualRunSnapshotAfterReload(params: { const preservedNextRunAtMs = reloaded.state.nextRunAtMs; const preservedScheduleErrorCount = reloaded.state.scheduleErrorCount; const preservedScheduleErrorText = reloaded.state.lastError; + const externalScheduleOrEnabledChanged = + params.baseline !== null && + (preservedEnabled !== params.baseline.enabled || + !schedulesEqual(reloaded.schedule, params.baseline.schedule)); reloaded.updatedAtMs = Math.max(reloaded.updatedAtMs, params.snapshot.updatedAtMs); reloaded.state = { @@ -81,13 +107,18 @@ function mergeManualRunSnapshotAfterReload(params: { ...params.snapshot.state, }; - // Keep externally reloaded schedule/enable repairs even when a manual run - // snapshot was captured before forceReload. - reloaded.enabled = preservedEnabled; - reloaded.state.nextRunAtMs = preservedNextRunAtMs; - if (preservedScheduleErrorCount !== undefined) { - reloaded.state.scheduleErrorCount = preservedScheduleErrorCount; - reloaded.state.lastError = preservedScheduleErrorText; + // Only preserve reload-derived schedule/enable repairs when the underlying + // schedule or enabled flag was externally changed while the manual run was executing. + // Otherwise, keep the manual-run terminal state (e.g. one-shot disable on success). + if (externalScheduleOrEnabledChanged) { + reloaded.enabled = preservedEnabled; + reloaded.state.nextRunAtMs = preservedNextRunAtMs; + if (preservedScheduleErrorCount !== undefined) { + reloaded.state.scheduleErrorCount = preservedScheduleErrorCount; + reloaded.state.lastError = preservedScheduleErrorText; + } + } else { + reloaded.enabled = params.snapshot.enabled; } } @@ -534,9 +565,16 @@ async function finishPreparedManualRun( const postRunSnapshot = shouldDelete ? null : { + enabled: job.enabled, updatedAtMs: job.updatedAtMs, state: structuredClone(job.state), }; + const postRunBaseline = shouldDelete + ? null + : { + enabled: executionJob.enabled, + schedule: structuredClone(executionJob.schedule), + }; const postRunRemoved = shouldDelete; // Isolated Telegram send can persist target writeback directly to disk. // Reload before final persist so manual `cron run` keeps those changes. @@ -545,6 +583,7 @@ async function finishPreparedManualRun( state, jobId, snapshot: postRunSnapshot, + baseline: postRunBaseline, removed: postRunRemoved, }); recomputeNextRunsForMaintenance(state, { recomputeExpired: true }); diff --git a/src/cron/service/timer.ts b/src/cron/service/timer.ts index b3c130b124b..ecac35d5fe8 100644 --- a/src/cron/service/timer.ts +++ b/src/cron/service/timer.ts @@ -15,7 +15,7 @@ import type { import { computeJobPreviousRunAtMs, computeJobNextRunAtMs, - hasSkipNextReloadRepairRecompute, + consumeSkipNextReloadRepairRecompute, nextWakeAtMs, recomputeNextRunsForMaintenance, recordScheduleComputeError, @@ -369,7 +369,7 @@ export function applyJobResult( const shouldDelete = job.schedule.kind === "at" && job.deleteAfterRun === true && result.status === "ok"; - const skipImmediateScheduleRecompute = hasSkipNextReloadRepairRecompute(state, job.id); + const skipImmediateScheduleRecompute = consumeSkipNextReloadRepairRecompute(state, job.id); if (!shouldDelete) { if (job.schedule.kind === "at") { @@ -444,6 +444,10 @@ export function applyJobResult( }, "cron: applying error backoff", ); + } else { + // Keep a future wake so we don't stall when the one-shot skip marker + // defers immediate schedule recompute after reload repair. + job.state.nextRunAtMs = result.endedAt + backoff; } } else if (job.enabled) { if (!skipImmediateScheduleRecompute) { @@ -470,6 +474,10 @@ export function applyJobResult( } else { job.state.nextRunAtMs = naturalNext; } + } else if (job.state.nextRunAtMs === undefined) { + // Keep timer progress when immediate recompute is deferred by the + // reload-repair skip marker. + job.state.nextRunAtMs = result.endedAt + MIN_REFIRE_GAP_MS; } } else { job.state.nextRunAtMs = undefined; From dbc29254515fba0033d799675b1d97cae7b8e9cd Mon Sep 17 00:00:00 2001 From: create Date: Fri, 20 Mar 2026 17:47:32 +0800 Subject: [PATCH 07/15] fix(cron): clear reload-skip markers on delete and harden loader regression test --- ...external-reload-schedule-recompute.test.ts | 118 +++++++++++++++++- src/cron/service.jobs.test.ts | 1 + src/cron/service.test-harness.ts | 1 + .../jobs.schedule-error-isolation.test.ts | 5 + src/cron/service/jobs.ts | 18 ++- src/cron/service/ops.ts | 27 +--- src/cron/service/schedule-equality.ts | 17 +++ src/cron/service/state.ts | 2 +- src/cron/service/store.ts | 23 +--- src/cron/service/timer.ts | 7 +- 10 files changed, 167 insertions(+), 52 deletions(-) create mode 100644 src/cron/service/schedule-equality.ts diff --git a/src/cron/service.external-reload-schedule-recompute.test.ts b/src/cron/service.external-reload-schedule-recompute.test.ts index 29029c3a197..d8378dab51d 100644 --- a/src/cron/service.external-reload-schedule-recompute.test.ts +++ b/src/cron/service.external-reload-schedule-recompute.test.ts @@ -2,7 +2,7 @@ import fs from "node:fs/promises"; import { describe, expect, it, vi } from "vitest"; import { setupCronServiceSuite, writeCronStoreSnapshot } from "./service.test-harness.js"; import { recomputeNextRuns, recomputeNextRunsForMaintenance } from "./service/jobs.js"; -import { run } from "./service/ops.js"; +import { remove, run } from "./service/ops.js"; import { createCronServiceState } from "./service/state.js"; import { ensureLoaded } from "./service/store.js"; import type { CronJob } from "./types.js"; @@ -456,4 +456,120 @@ describe("forceReload repairs externally changed cron schedules", () => { expect(persistedJob?.state?.nextRunAtMs).toBeUndefined(); expect(persistedJob?.state?.lastStatus).toBe("ok"); }); + + it("clears reload-repair skip markers when a job is removed before same-id rebuild", async () => { + const store = await makeStorePath(); + const nowMs = Date.parse("2026-03-19T01:44:00.000Z"); + const jobId = "external-reload-skip-marker-id-reuse"; + + const createJob = (expr: string): CronJob => ({ + id: jobId, + name: "external reload skip marker id reuse", + enabled: true, + createdAtMs: Date.parse("2026-03-18T00:30:00.000Z"), + updatedAtMs: Date.parse("2026-03-19T01:44:00.000Z"), + schedule: { kind: "cron", expr, tz: "UTC", staggerMs: 0 }, + sessionTarget: "main", + wakeMode: "next-heartbeat", + payload: { kind: "systemEvent", text: "tick" }, + state: {}, + }); + + await writeCronStoreSnapshot({ + storePath: store.storePath, + jobs: [createJob("*/15 * * * *")], + }); + + const state = createCronServiceState({ + cronEnabled: true, + storePath: store.storePath, + log: noopLogger, + nowMs: () => nowMs, + enqueueSystemEvent: vi.fn(), + requestHeartbeatNow: vi.fn(), + runIsolatedAgentJob: vi.fn(async () => ({ status: "ok" as const })), + }); + + await ensureLoaded(state, { skipRecompute: true }); + + await writeCronStoreSnapshot({ + storePath: store.storePath, + jobs: [createJob("not a valid cron")], + }); + await ensureLoaded(state, { forceReload: true, skipRecompute: true }); + expect(state.skipNextReloadRepairRecomputeJobIds.has(jobId)).toBe(true); + + const removed = await remove(state, jobId); + expect(removed).toEqual({ ok: true, removed: true }); + expect(state.skipNextReloadRepairRecomputeJobIds.has(jobId)).toBe(false); + + await writeCronStoreSnapshot({ + storePath: store.storePath, + jobs: [createJob("*/5 * * * *")], + }); + await ensureLoaded(state, { forceReload: true, skipRecompute: true }); + recomputeNextRunsForMaintenance(state); + + const rebuilt = state.store?.jobs.find((job) => job.id === jobId); + expect(typeof rebuilt?.state.nextRunAtMs).toBe("number"); + expect(Number.isFinite(rebuilt?.state.nextRunAtMs)).toBe(true); + expect(rebuilt?.state.scheduleErrorCount).toBeUndefined(); + }); + + it("recomputes nextRunAtMs when external every schedule changes", async () => { + const store = await makeStorePath(); + const nowMs = Date.parse("2026-03-19T01:44:00.000Z"); + const jobId = "external-every-schedule-change"; + + const createEveryJob = (everyMs: number): CronJob => ({ + id: jobId, + name: "external every schedule change", + enabled: true, + createdAtMs: Date.parse("2026-03-18T00:00:00.000Z"), + updatedAtMs: Date.parse("2026-03-19T01:44:00.000Z"), + schedule: { + kind: "every", + everyMs, + anchorMs: Date.parse("2026-03-19T00:00:00.000Z"), + }, + sessionTarget: "main", + wakeMode: "next-heartbeat", + payload: { kind: "systemEvent", text: "tick" }, + state: { + nextRunAtMs: Date.parse("2026-03-20T00:00:00.000Z"), + }, + }); + + await writeCronStoreSnapshot({ + storePath: store.storePath, + jobs: [createEveryJob(6 * 60_000)], + }); + + const state = createCronServiceState({ + cronEnabled: true, + storePath: store.storePath, + log: noopLogger, + nowMs: () => nowMs, + enqueueSystemEvent: vi.fn(), + requestHeartbeatNow: vi.fn(), + runIsolatedAgentJob: vi.fn(async () => ({ status: "ok" as const })), + }); + + await ensureLoaded(state, { skipRecompute: true }); + + await writeCronStoreSnapshot({ + storePath: store.storePath, + jobs: [createEveryJob(60_000)], + }); + + await ensureLoaded(state, { forceReload: true, skipRecompute: true }); + + const reloaded = state.store?.jobs.find((job) => job.id === jobId); + expect(reloaded?.schedule).toEqual({ + kind: "every", + everyMs: 60_000, + anchorMs: Date.parse("2026-03-19T00:00:00.000Z"), + }); + expect(reloaded?.state.nextRunAtMs).toBe(Date.parse("2026-03-19T01:44:00.000Z")); + }); }); diff --git a/src/cron/service.jobs.test.ts b/src/cron/service.jobs.test.ts index c514f7528ba..c97adb2f504 100644 --- a/src/cron/service.jobs.test.ts +++ b/src/cron/service.jobs.test.ts @@ -385,6 +385,7 @@ function createMockState(now: number, opts?: { defaultAgentId?: string }): CronS nowMs: () => now, defaultAgentId: opts?.defaultAgentId, }, + skipNextReloadRepairRecomputeJobIds: new Set(), } as unknown as CronServiceState; } diff --git a/src/cron/service.test-harness.ts b/src/cron/service.test-harness.ts index fcc62637892..a4803b5be3e 100644 --- a/src/cron/service.test-harness.ts +++ b/src/cron/service.test-harness.ts @@ -220,6 +220,7 @@ export function createMockCronStateForJobs(params: { timer: null, storeLoadedAtMs: nowMs, storeFileMtimeMs: null, + skipNextReloadRepairRecomputeJobIds: new Set(), op: Promise.resolve(), warnedDisabled: false, deps: { diff --git a/src/cron/service/jobs.schedule-error-isolation.test.ts b/src/cron/service/jobs.schedule-error-isolation.test.ts index 84cd8e0a1e9..3604c4ab45b 100644 --- a/src/cron/service/jobs.schedule-error-isolation.test.ts +++ b/src/cron/service/jobs.schedule-error-isolation.test.ts @@ -28,6 +28,11 @@ function createMockState(jobs: CronJob[]): CronServiceState { store, timer: null, running: false, + op: Promise.resolve(), + warnedDisabled: false, + storeLoadedAtMs: null, + storeFileMtimeMs: null, + skipNextReloadRepairRecomputeJobIds: new Set(), } as unknown as CronServiceState; } diff --git a/src/cron/service/jobs.ts b/src/cron/service/jobs.ts index db90f1502f1..184f136d98c 100644 --- a/src/cron/service/jobs.ts +++ b/src/cron/service/jobs.ts @@ -238,6 +238,19 @@ export function findJobOrThrow(state: CronServiceState, id: string) { return job; } +export function removeJobById(state: CronServiceState, jobId: string): boolean { + if (!state.store) { + return false; + } + const before = state.store.jobs.length; + state.store.jobs = state.store.jobs.filter((job) => job.id !== jobId); + const removed = state.store.jobs.length !== before; + if (removed) { + state.skipNextReloadRepairRecomputeJobIds.delete(jobId); + } + return removed; +} + export function computeJobNextRunAtMs(job: CronJob, nowMs: number): number | undefined { if (!job.enabled) { return undefined; @@ -414,8 +427,7 @@ function walkSchedulableJobs( } export function hasSkipNextReloadRepairRecompute(state: CronServiceState, jobId: string): boolean { - const pending = state.skipNextReloadRepairRecomputeJobIds; - return pending?.has(jobId) === true; + return state.skipNextReloadRepairRecomputeJobIds.has(jobId); } export function consumeSkipNextReloadRepairRecompute( @@ -425,7 +437,7 @@ export function consumeSkipNextReloadRepairRecompute( if (!hasSkipNextReloadRepairRecompute(state, jobId)) { return false; } - state.skipNextReloadRepairRecomputeJobIds?.delete(jobId); + state.skipNextReloadRepairRecomputeJobIds.delete(jobId); return true; } diff --git a/src/cron/service/ops.ts b/src/cron/service/ops.ts index 58b12933839..a59dd2c2251 100644 --- a/src/cron/service/ops.ts +++ b/src/cron/service/ops.ts @@ -9,10 +9,12 @@ import { findJobOrThrow, isJobDue, nextWakeAtMs, + removeJobById, recomputeNextRuns, recomputeNextRunsForMaintenance, } from "./jobs.js"; import { locked } from "./locked.js"; +import { schedulesEqual } from "./schedule-equality.js"; import type { CronServiceState } from "./state.js"; import { ensureLoaded, persist, warnIfDisabled } from "./store.js"; import { @@ -48,22 +50,6 @@ export type CronListPageResult = { nextOffset: number | null; }; -function schedulesEqual(a: CronJob["schedule"], b: CronJob["schedule"]): boolean { - if (a.kind !== b.kind) { - return false; - } - if (a.kind === "at" && b.kind === "at") { - return a.at === b.at; - } - if (a.kind === "every" && b.kind === "every") { - return a.everyMs === b.everyMs && a.anchorMs === b.anchorMs; - } - if (a.kind === "cron" && b.kind === "cron") { - return a.expr === b.expr && a.tz === b.tz && a.staggerMs === b.staggerMs; - } - return false; -} - function mergeManualRunSnapshotAfterReload(params: { state: CronServiceState; jobId: string; @@ -82,7 +68,7 @@ function mergeManualRunSnapshotAfterReload(params: { return; } if (params.removed) { - params.state.store.jobs = params.state.store.jobs.filter((job) => job.id !== params.jobId); + removeJobById(params.state, params.jobId); return; } if (!params.snapshot) { @@ -372,12 +358,10 @@ export async function remove(state: CronServiceState, id: string) { return await locked(state, async () => { warnIfDisabled(state, "remove"); await ensureLoaded(state); - const before = state.store?.jobs.length ?? 0; if (!state.store) { return { ok: false, removed: false } as const; } - state.store.jobs = state.store.jobs.filter((j) => j.id !== id); - const removed = (state.store.jobs.length ?? 0) !== before; + const removed = removeJobById(state, id); await persist(state); armTimer(state); if (removed) { @@ -554,8 +538,7 @@ async function finishPreparedManualRun( usage: coreResult.usage, }); - if (shouldDelete && state.store) { - state.store.jobs = state.store.jobs.filter((entry) => entry.id !== job.id); + if (shouldDelete && removeJobById(state, job.id)) { emit(state, { jobId: job.id, action: "removed" }); } diff --git a/src/cron/service/schedule-equality.ts b/src/cron/service/schedule-equality.ts new file mode 100644 index 00000000000..d4f64432ede --- /dev/null +++ b/src/cron/service/schedule-equality.ts @@ -0,0 +1,17 @@ +import type { CronJob } from "../types.js"; + +export function schedulesEqual(a: CronJob["schedule"], b: CronJob["schedule"]): boolean { + if (a.kind !== b.kind) { + return false; + } + if (a.kind === "at" && b.kind === "at") { + return a.at === b.at; + } + if (a.kind === "every" && b.kind === "every") { + return a.everyMs === b.everyMs && a.anchorMs === b.anchorMs; + } + if (a.kind === "cron" && b.kind === "cron") { + return a.expr === b.expr && a.tz === b.tz && a.staggerMs === b.staggerMs; + } + return false; +} diff --git a/src/cron/service/state.ts b/src/cron/service/state.ts index d51464c29d2..e56c7d23bcb 100644 --- a/src/cron/service/state.ts +++ b/src/cron/service/state.ts @@ -127,7 +127,7 @@ export type CronServiceState = { warnedDisabled: boolean; storeLoadedAtMs: number | null; storeFileMtimeMs: number | null; - skipNextReloadRepairRecomputeJobIds?: Set; + skipNextReloadRepairRecomputeJobIds: Set; }; export function createCronServiceState(deps: CronServiceDeps): CronServiceState { diff --git a/src/cron/service/store.ts b/src/cron/service/store.ts index 470b7178522..c4a64791a1e 100644 --- a/src/cron/service/store.ts +++ b/src/cron/service/store.ts @@ -3,6 +3,7 @@ import { normalizeStoredCronJobs } from "../store-migration.js"; import { loadCronStore, saveCronStore } from "../store.js"; import type { CronJob } from "../types.js"; import { computeJobNextRunAtMs, recordScheduleComputeError, recomputeNextRuns } from "./jobs.js"; +import { schedulesEqual } from "./schedule-equality.js"; import type { CronServiceState } from "./state.js"; async function getFileMtimeMs(path: string): Promise { @@ -14,32 +15,12 @@ async function getFileMtimeMs(path: string): Promise { } } -function schedulesEqual(a: CronJob["schedule"], b: CronJob["schedule"]): boolean { - if (a.kind !== b.kind) { - return false; - } - if (a.kind === "at" && b.kind === "at") { - return a.at === b.at; - } - if (a.kind === "every" && b.kind === "every") { - return a.everyMs === b.everyMs && a.anchorMs === b.anchorMs; - } - if (a.kind === "cron" && b.kind === "cron") { - return a.expr === b.expr && a.tz === b.tz && a.staggerMs === b.staggerMs; - } - return false; -} - -function getSkipNextReloadRepairRecomputeJobIds(state: CronServiceState): Set { - return (state.skipNextReloadRepairRecomputeJobIds ??= new Set()); -} - function repairNextRunsAfterExternalReload(params: { state: CronServiceState; previousJobs: CronJob[] | undefined; }): boolean { const { state, previousJobs } = params; - const skipRecomputeJobIds = getSkipNextReloadRepairRecomputeJobIds(state); + const skipRecomputeJobIds = state.skipNextReloadRepairRecomputeJobIds; if (!state.store || !previousJobs?.length) { return false; } diff --git a/src/cron/service/timer.ts b/src/cron/service/timer.ts index ecac35d5fe8..315be8a4fc8 100644 --- a/src/cron/service/timer.ts +++ b/src/cron/service/timer.ts @@ -17,6 +17,7 @@ import { computeJobNextRunAtMs, consumeSkipNextReloadRepairRecompute, nextWakeAtMs, + removeJobById, recomputeNextRunsForMaintenance, recordScheduleComputeError, resolveJobPayloadTextForMain, @@ -512,8 +513,7 @@ function applyOutcomeToStoredJob(state: CronServiceState, result: TimedCronRunOu emitJobFinished(state, job, result, result.startedAt); - if (shouldDelete) { - store.jobs = jobs.filter((entry) => entry.id !== job.id); + if (shouldDelete && removeJobById(state, job.id)) { emit(state, { jobId: job.id, action: "removed" }); } } @@ -1208,8 +1208,7 @@ export async function executeJob( emitJobFinished(state, job, coreResult, startedAt); - if (shouldDelete && state.store) { - state.store.jobs = state.store.jobs.filter((j) => j.id !== job.id); + if (shouldDelete && removeJobById(state, job.id)) { emit(state, { jobId: job.id, action: "removed" }); } } From 44e912eccd0976ec31ac56d4144a0c470b913dda Mon Sep 17 00:00:00 2001 From: create Date: Fri, 20 Mar 2026 18:23:34 +0800 Subject: [PATCH 08/15] fix(cron): keep repaired schedule error state after force-run reload --- ...external-reload-schedule-recompute.test.ts | 89 +++++++++++++++++++ src/cron/service/ops.ts | 2 +- 2 files changed, 90 insertions(+), 1 deletion(-) diff --git a/src/cron/service.external-reload-schedule-recompute.test.ts b/src/cron/service.external-reload-schedule-recompute.test.ts index d8378dab51d..88e3b0dd821 100644 --- a/src/cron/service.external-reload-schedule-recompute.test.ts +++ b/src/cron/service.external-reload-schedule-recompute.test.ts @@ -397,6 +397,95 @@ describe("forceReload repairs externally changed cron schedules", () => { expect(persistedJob?.state?.lastStatus).toBe("ok"); }); + it("keeps scheduleErrorCount cleared when external reload fixes schedule during force-run", async () => { + const store = await makeStorePath(); + let nowMs = Date.parse("2026-03-19T01:44:00.000Z"); + const jobId = "manual-run-reload-clears-schedule-error-count"; + const staleNextRunAtMs = Date.parse("2026-03-19T23:30:00.000Z"); + + const createJob = (params: { + expr: string; + scheduleErrorCount?: number; + lastError?: string; + nextRunAtMs?: number; + }): CronJob => ({ + id: jobId, + name: "manual run reload clears schedule error count", + enabled: true, + createdAtMs: Date.parse("2026-03-18T00:30:00.000Z"), + updatedAtMs: Date.parse("2026-03-19T01:44:00.000Z"), + schedule: { kind: "cron", expr: params.expr, tz: "Asia/Shanghai", staggerMs: 0 }, + sessionTarget: "isolated", + wakeMode: "next-heartbeat", + payload: { kind: "agentTurn", message: "tick" }, + state: { + nextRunAtMs: params.nextRunAtMs, + scheduleErrorCount: params.scheduleErrorCount, + lastError: params.lastError, + }, + }); + + await writeCronStoreSnapshot({ + storePath: store.storePath, + jobs: [ + createJob({ + expr: "30 23 * * *", + nextRunAtMs: staleNextRunAtMs, + scheduleErrorCount: 2, + lastError: "cron: invalid expression", + }), + ], + }); + + const runIsolatedAgentJob = vi.fn(async () => { + await writeCronStoreSnapshot({ + storePath: store.storePath, + jobs: [ + createJob({ + expr: "30 8 * * *", + nextRunAtMs: staleNextRunAtMs, + scheduleErrorCount: 2, + lastError: "cron: invalid expression", + }), + ], + }); + nowMs += 500; + return { status: "ok" as const, summary: "done" }; + }); + + const state = createCronServiceState({ + cronEnabled: true, + storePath: store.storePath, + log: noopLogger, + nowMs: () => nowMs, + enqueueSystemEvent: vi.fn(), + requestHeartbeatNow: vi.fn(), + runIsolatedAgentJob, + }); + + const result = await run(state, jobId, "force"); + expect(result).toEqual({ ok: true, ran: true }); + expect(runIsolatedAgentJob).toHaveBeenCalledTimes(1); + + const merged = state.store?.jobs.find((job) => job.id === jobId); + expect(merged?.schedule).toEqual({ + kind: "cron", + expr: "30 8 * * *", + tz: "Asia/Shanghai", + staggerMs: 0, + }); + expect(merged?.state.scheduleErrorCount).toBeUndefined(); + + const persisted = JSON.parse(await fs.readFile(store.storePath, "utf8")) as { + jobs?: Array<{ + id: string; + state?: { scheduleErrorCount?: number }; + }>; + }; + const persistedJob = persisted.jobs?.find((job) => job.id === jobId); + expect(persistedJob?.state?.scheduleErrorCount).toBeUndefined(); + }); + it("keeps one-shot terminal disable state when manual force-run reloads unchanged store", async () => { const store = await makeStorePath(); let nowMs = Date.parse("2026-03-19T01:44:00.000Z"); diff --git a/src/cron/service/ops.ts b/src/cron/service/ops.ts index a59dd2c2251..bc120b8258d 100644 --- a/src/cron/service/ops.ts +++ b/src/cron/service/ops.ts @@ -99,8 +99,8 @@ function mergeManualRunSnapshotAfterReload(params: { if (externalScheduleOrEnabledChanged) { reloaded.enabled = preservedEnabled; reloaded.state.nextRunAtMs = preservedNextRunAtMs; + reloaded.state.scheduleErrorCount = preservedScheduleErrorCount; if (preservedScheduleErrorCount !== undefined) { - reloaded.state.scheduleErrorCount = preservedScheduleErrorCount; reloaded.state.lastError = preservedScheduleErrorText; } } else { From 576d0dbcd9b5fc3afad964640bc0f51dfe6ebeeb Mon Sep 17 00:00:00 2001 From: create Date: Fri, 20 Mar 2026 18:41:47 +0800 Subject: [PATCH 09/15] fix(cron): recompute external reload repairs from edit time --- ...external-reload-schedule-recompute.test.ts | 51 +++++++++++++++++++ src/cron/service/store.ts | 27 +++++++++- 2 files changed, 77 insertions(+), 1 deletion(-) diff --git a/src/cron/service.external-reload-schedule-recompute.test.ts b/src/cron/service.external-reload-schedule-recompute.test.ts index 88e3b0dd821..1d168c0689f 100644 --- a/src/cron/service.external-reload-schedule-recompute.test.ts +++ b/src/cron/service.external-reload-schedule-recompute.test.ts @@ -79,6 +79,57 @@ describe("forceReload repairs externally changed cron schedules", () => { expect(persistedJob?.state?.nextRunAtMs).toBe(correctedNextRunAtMs); }); + it("recomputes from updatedAtMs so delayed reload keeps newly earlier slots due", async () => { + const store = await makeStorePath(); + const nowMs = Date.parse("2026-03-19T12:10:00.000Z"); + const initialUpdatedAtMs = Date.parse("2026-03-19T12:00:00.000Z"); + const editedAtMs = Date.parse("2026-03-19T12:01:00.000Z"); + const jobId = "external-schedule-change-delayed-observe"; + + const createJob = (params: { expr: string; updatedAtMs: number }): CronJob => ({ + id: jobId, + name: "external schedule delayed observe", + enabled: true, + createdAtMs: Date.parse("2026-03-18T00:30:00.000Z"), + updatedAtMs: params.updatedAtMs, + schedule: { kind: "cron", expr: params.expr, tz: "UTC", staggerMs: 0 }, + sessionTarget: "main", + wakeMode: "next-heartbeat", + payload: { kind: "systemEvent", text: "tick" }, + state: { + nextRunAtMs: Date.parse("2026-03-20T00:30:00.000Z"), + }, + }); + + await writeCronStoreSnapshot({ + storePath: store.storePath, + jobs: [createJob({ expr: "30 23 * * *", updatedAtMs: initialUpdatedAtMs })], + }); + + const state = createCronServiceState({ + cronEnabled: true, + storePath: store.storePath, + log: noopLogger, + nowMs: () => nowMs, + enqueueSystemEvent: vi.fn(), + requestHeartbeatNow: vi.fn(), + runIsolatedAgentJob: vi.fn(async () => ({ status: "ok" as const })), + }); + + await ensureLoaded(state, { skipRecompute: true }); + + await writeCronStoreSnapshot({ + storePath: store.storePath, + jobs: [createJob({ expr: "* * * * *", updatedAtMs: editedAtMs })], + }); + + await ensureLoaded(state, { forceReload: true, skipRecompute: true }); + + const reloaded = state.store?.jobs.find((job) => job.id === jobId); + expect(reloaded?.state.nextRunAtMs).toBeLessThan(nowMs); + expect(reloaded?.state.nextRunAtMs).toBe(Date.parse("2026-03-19T12:02:00.000Z")); + }); + it("records schedule errors instead of aborting reload when an external edit is invalid", async () => { const store = await makeStorePath(); const nowMs = Date.parse("2026-03-19T01:44:00.000Z"); diff --git a/src/cron/service/store.ts b/src/cron/service/store.ts index c4a64791a1e..3727b1cdd55 100644 --- a/src/cron/service/store.ts +++ b/src/cron/service/store.ts @@ -15,6 +15,25 @@ async function getFileMtimeMs(path: string): Promise { } } +function resolveExternalRepairComputeBaseMs(params: { + nowMs: number; + reloadedUpdatedAtMs: number; + previousUpdatedAtMs: number; +}): number { + const { nowMs, reloadedUpdatedAtMs, previousUpdatedAtMs } = params; + if (!Number.isFinite(reloadedUpdatedAtMs)) { + return nowMs; + } + const normalizedReloadedUpdatedAtMs = Math.max(0, Math.floor(reloadedUpdatedAtMs)); + const normalizedPreviousUpdatedAtMs = Number.isFinite(previousUpdatedAtMs) + ? Math.max(0, Math.floor(previousUpdatedAtMs)) + : Number.NEGATIVE_INFINITY; + if (normalizedReloadedUpdatedAtMs <= normalizedPreviousUpdatedAtMs) { + return nowMs; + } + return Math.min(nowMs, normalizedReloadedUpdatedAtMs); +} + function repairNextRunsAfterExternalReload(params: { state: CronServiceState; previousJobs: CronJob[] | undefined; @@ -50,9 +69,14 @@ function repairNextRunsAfterExternalReload(params: { } skipRecomputeJobIds.delete(job.id); + const computeBaseMs = resolveExternalRepairComputeBaseMs({ + nowMs: now, + reloadedUpdatedAtMs: job.updatedAtMs, + previousUpdatedAtMs: previous.updatedAtMs, + }); let nextRunAtMs: number | undefined; try { - nextRunAtMs = job.enabled ? computeJobNextRunAtMs(job, now) : undefined; + nextRunAtMs = job.enabled ? computeJobNextRunAtMs(job, computeBaseMs) : undefined; if (job.state.scheduleErrorCount !== undefined) { job.state.scheduleErrorCount = undefined; changed = true; @@ -78,6 +102,7 @@ function repairNextRunsAfterExternalReload(params: { jobId: job.id, scheduleChanged, enabledChanged, + computeBaseMs, nextRunAtMs: job.state.nextRunAtMs, }, "cron: repaired nextRunAtMs after external reload", From 4aabf034eb58005c015064e9bb423f4287b2dea6 Mon Sep 17 00:00:00 2001 From: create Date: Fri, 20 Mar 2026 18:55:39 +0800 Subject: [PATCH 10/15] fix(cron): avoid synthetic reruns after reload-repair defer --- src/cron/service.issue-regressions.test.ts | 16 +++++++--- src/cron/service/jobs.ts | 35 +++++++++++++++------- src/cron/service/timer.ts | 12 ++------ 3 files changed, 39 insertions(+), 24 deletions(-) diff --git a/src/cron/service.issue-regressions.test.ts b/src/cron/service.issue-regressions.test.ts index c8078672b97..3a1f291943c 100644 --- a/src/cron/service.issue-regressions.test.ts +++ b/src/cron/service.issue-regressions.test.ts @@ -22,7 +22,11 @@ import { createNoopLogger, createRunningCronServiceState, } from "./service.test-harness.js"; -import { computeJobNextRunAtMs, recomputeNextRunsForMaintenance } from "./service/jobs.js"; +import { + computeJobNextRunAtMs, + nextWakeAtMs, + recomputeNextRunsForMaintenance, +} from "./service/jobs.js"; import { enqueueRun, run } from "./service/ops.js"; import { createCronServiceState, type CronEvent } from "./service/state.js"; import { @@ -1780,11 +1784,13 @@ describe("Cron issue regressions", () => { }); expect(job.state.scheduleErrorCount).toBe(1); - expect(state.skipNextReloadRepairRecomputeJobIds?.has(job.id)).toBe(false); + expect(state.skipNextReloadRepairRecomputeJobIds?.has(job.id)).toBe(true); + expect(nextWakeAtMs(state)).toBe(endedAt + 2_000); recomputeNextRunsForMaintenance(state); expect(job.state.scheduleErrorCount).toBe(1); expect(state.skipNextReloadRepairRecomputeJobIds?.has(job.id)).toBe(false); + expect(nextWakeAtMs(state)).toBe(endedAt + 2_000); }); it("keeps a future wake when apply skips immediate recompute after reload schedule error", () => { @@ -1819,12 +1825,14 @@ describe("Cron issue regressions", () => { }); expect(job.state.scheduleErrorCount).toBe(1); - expect(job.state.nextRunAtMs).toBe(endedAt + 30_000); - expect(state.skipNextReloadRepairRecomputeJobIds?.has(job.id)).toBe(false); + expect(job.state.nextRunAtMs).toBeUndefined(); + expect(state.skipNextReloadRepairRecomputeJobIds?.has(job.id)).toBe(true); + expect(nextWakeAtMs(state)).toBe(endedAt + 2_000); recomputeNextRunsForMaintenance(state); expect(job.state.scheduleErrorCount).toBe(1); expect(state.skipNextReloadRepairRecomputeJobIds?.has(job.id)).toBe(false); + expect(nextWakeAtMs(state)).toBe(endedAt + 2_000); }); it("force run preserves 'every' anchor while recording manual lastRunAtMs", () => { diff --git a/src/cron/service/jobs.ts b/src/cron/service/jobs.ts index 184f136d98c..6ba919bcd1c 100644 --- a/src/cron/service/jobs.ts +++ b/src/cron/service/jobs.ts @@ -33,6 +33,7 @@ import { import type { CronServiceState } from "./state.js"; const STUCK_RUN_MS = 2 * 60 * 60 * 1000; +const MISSING_NEXT_RUN_WAKE_MS = 2_000; const STAGGER_OFFSET_CACHE_MAX = 4096; const staggerOffsetCache = new Map(); @@ -526,18 +527,32 @@ export function recomputeNextRunsForMaintenance( export function nextWakeAtMs(state: CronServiceState) { const jobs = state.store?.jobs ?? []; - const enabled = jobs.filter((j) => j.enabled && isFiniteTimestamp(j.state.nextRunAtMs)); - if (enabled.length === 0) { - return undefined; + let minEnabledNextRunAtMs: number | undefined; + let hasEnabledMissingNextRun = false; + + for (const job of jobs) { + if (!job.enabled) { + continue; + } + const nextRunAtMs = job.state.nextRunAtMs; + if (isFiniteTimestamp(nextRunAtMs)) { + minEnabledNextRunAtMs = + minEnabledNextRunAtMs === undefined + ? nextRunAtMs + : Math.min(minEnabledNextRunAtMs, nextRunAtMs); + continue; + } + hasEnabledMissingNextRun = true; } - const first = enabled[0]?.state.nextRunAtMs; - if (!isFiniteTimestamp(first)) { - return undefined; + + if (!hasEnabledMissingNextRun) { + return minEnabledNextRunAtMs; } - return enabled.reduce((min, j) => { - const next = j.state.nextRunAtMs; - return isFiniteTimestamp(next) ? Math.min(min, next) : min; - }, first); + + const wakeForMissingNextRunAtMs = state.deps.nowMs() + MISSING_NEXT_RUN_WAKE_MS; + return minEnabledNextRunAtMs === undefined + ? wakeForMissingNextRunAtMs + : Math.min(minEnabledNextRunAtMs, wakeForMissingNextRunAtMs); } export function createJob(state: CronServiceState, input: CronJobCreate): CronJob { diff --git a/src/cron/service/timer.ts b/src/cron/service/timer.ts index 315be8a4fc8..66662de5616 100644 --- a/src/cron/service/timer.ts +++ b/src/cron/service/timer.ts @@ -15,7 +15,7 @@ import type { import { computeJobPreviousRunAtMs, computeJobNextRunAtMs, - consumeSkipNextReloadRepairRecompute, + hasSkipNextReloadRepairRecompute, nextWakeAtMs, removeJobById, recomputeNextRunsForMaintenance, @@ -370,7 +370,7 @@ export function applyJobResult( const shouldDelete = job.schedule.kind === "at" && job.deleteAfterRun === true && result.status === "ok"; - const skipImmediateScheduleRecompute = consumeSkipNextReloadRepairRecompute(state, job.id); + const skipImmediateScheduleRecompute = hasSkipNextReloadRepairRecompute(state, job.id); if (!shouldDelete) { if (job.schedule.kind === "at") { @@ -445,10 +445,6 @@ export function applyJobResult( }, "cron: applying error backoff", ); - } else { - // Keep a future wake so we don't stall when the one-shot skip marker - // defers immediate schedule recompute after reload repair. - job.state.nextRunAtMs = result.endedAt + backoff; } } else if (job.enabled) { if (!skipImmediateScheduleRecompute) { @@ -475,10 +471,6 @@ export function applyJobResult( } else { job.state.nextRunAtMs = naturalNext; } - } else if (job.state.nextRunAtMs === undefined) { - // Keep timer progress when immediate recompute is deferred by the - // reload-repair skip marker. - job.state.nextRunAtMs = result.endedAt + MIN_REFIRE_GAP_MS; } } else { job.state.nextRunAtMs = undefined; From f5eb99df3f2092be9cf9b14c2ee2cfa3d606dba1 Mon Sep 17 00:00:00 2001 From: create Date: Fri, 20 Mar 2026 19:20:00 +0800 Subject: [PATCH 11/15] fix(cron): repair stale cold-load schedules and stabilize jiti regression test --- ...external-reload-schedule-recompute.test.ts | 49 +++++++++++++++++ src/cron/service/store.ts | 54 ++++++++++++++----- src/plugins/loader.test.ts | 6 +-- 3 files changed, 91 insertions(+), 18 deletions(-) diff --git a/src/cron/service.external-reload-schedule-recompute.test.ts b/src/cron/service.external-reload-schedule-recompute.test.ts index 1d168c0689f..bb25f792773 100644 --- a/src/cron/service.external-reload-schedule-recompute.test.ts +++ b/src/cron/service.external-reload-schedule-recompute.test.ts @@ -130,6 +130,55 @@ describe("forceReload repairs externally changed cron schedules", () => { expect(reloaded?.state.nextRunAtMs).toBe(Date.parse("2026-03-19T12:02:00.000Z")); }); + it("repairs stale nextRunAtMs on first load after restart", async () => { + const store = await makeStorePath(); + const nowMs = Date.parse("2026-03-19T12:10:00.000Z"); + const editedAtMs = Date.parse("2026-03-19T12:01:00.000Z"); + const jobId = "external-schedule-change-cold-load"; + const staleNextRunAtMs = Date.parse("2026-03-20T00:30:00.000Z"); + + const createJob = (): CronJob => ({ + id: jobId, + name: "external schedule cold load repair", + enabled: true, + createdAtMs: Date.parse("2026-03-18T00:30:00.000Z"), + updatedAtMs: editedAtMs, + schedule: { kind: "cron", expr: "* * * * *", tz: "UTC", staggerMs: 0 }, + sessionTarget: "main", + wakeMode: "next-heartbeat", + payload: { kind: "systemEvent", text: "tick" }, + state: { + nextRunAtMs: staleNextRunAtMs, + }, + }); + + await writeCronStoreSnapshot({ + storePath: store.storePath, + jobs: [createJob()], + }); + + const state = createCronServiceState({ + cronEnabled: true, + storePath: store.storePath, + log: noopLogger, + nowMs: () => nowMs, + enqueueSystemEvent: vi.fn(), + requestHeartbeatNow: vi.fn(), + runIsolatedAgentJob: vi.fn(async () => ({ status: "ok" as const })), + }); + + await ensureLoaded(state, { skipRecompute: true }); + + const reloaded = state.store?.jobs.find((job) => job.id === jobId); + expect(reloaded?.state.nextRunAtMs).toBe(Date.parse("2026-03-19T12:02:00.000Z")); + + const persisted = JSON.parse(await fs.readFile(store.storePath, "utf8")) as { + jobs?: Array<{ id: string; state?: { nextRunAtMs?: number } }>; + }; + const persistedJob = persisted.jobs?.find((job) => job.id === jobId); + expect(persistedJob?.state?.nextRunAtMs).toBe(Date.parse("2026-03-19T12:02:00.000Z")); + }); + it("records schedule errors instead of aborting reload when an external edit is invalid", async () => { const store = await makeStorePath(); const nowMs = Date.parse("2026-03-19T01:44:00.000Z"); diff --git a/src/cron/service/store.ts b/src/cron/service/store.ts index 3727b1cdd55..408a9186fae 100644 --- a/src/cron/service/store.ts +++ b/src/cron/service/store.ts @@ -34,13 +34,38 @@ function resolveExternalRepairComputeBaseMs(params: { return Math.min(nowMs, normalizedReloadedUpdatedAtMs); } +function shouldRepairColdLoadNextRun(params: { job: CronJob; nowMs: number }): boolean { + const { job, nowMs } = params; + if (!job.enabled) { + return false; + } + if ((job.state.consecutiveErrors ?? 0) > 0) { + return false; + } + if (typeof job.updatedAtMs !== "number" || !Number.isFinite(job.updatedAtMs)) { + return false; + } + const persistedNextRunAtMs = job.state.nextRunAtMs; + if (typeof persistedNextRunAtMs !== "number" || !Number.isFinite(persistedNextRunAtMs)) { + return false; + } + // Cold-load repair is only for stale future schedules edited while the + // gateway was offline. Already-due timestamps should be preserved so they can + // execute on the next tick. + if (persistedNextRunAtMs <= nowMs) { + return false; + } + const computeBaseMs = Math.min(nowMs, Math.max(0, Math.floor(job.updatedAtMs))); + return computeBaseMs < persistedNextRunAtMs; +} + function repairNextRunsAfterExternalReload(params: { state: CronServiceState; previousJobs: CronJob[] | undefined; }): boolean { const { state, previousJobs } = params; const skipRecomputeJobIds = state.skipNextReloadRepairRecomputeJobIds; - if (!state.store || !previousJobs?.length) { + if (!state.store) { return false; } if (skipRecomputeJobIds.size > 0) { @@ -52,28 +77,28 @@ function repairNextRunsAfterExternalReload(params: { } } - const previousById = new Map(previousJobs.map((job) => [job.id, job])); + const previousById = new Map((previousJobs ?? []).map((job) => [job.id, job])); const now = state.deps.nowMs(); let changed = false; for (const job of state.store.jobs) { const previous = previousById.get(job.id); - if (!previous) { - continue; - } - - const scheduleChanged = !schedulesEqual(previous.schedule, job.schedule); - const enabledChanged = previous.enabled !== job.enabled; - if (!scheduleChanged && !enabledChanged) { + const coldLoadRepairCandidate = + previousJobs === undefined && shouldRepairColdLoadNextRun({ job, nowMs: now }); + const scheduleChanged = previous ? !schedulesEqual(previous.schedule, job.schedule) : false; + const enabledChanged = previous ? previous.enabled !== job.enabled : false; + if (!scheduleChanged && !enabledChanged && !coldLoadRepairCandidate) { continue; } skipRecomputeJobIds.delete(job.id); - const computeBaseMs = resolveExternalRepairComputeBaseMs({ - nowMs: now, - reloadedUpdatedAtMs: job.updatedAtMs, - previousUpdatedAtMs: previous.updatedAtMs, - }); + const computeBaseMs = coldLoadRepairCandidate + ? Math.min(now, Math.max(0, Math.floor(job.updatedAtMs))) + : resolveExternalRepairComputeBaseMs({ + nowMs: now, + reloadedUpdatedAtMs: job.updatedAtMs, + previousUpdatedAtMs: previous?.updatedAtMs ?? Number.NEGATIVE_INFINITY, + }); let nextRunAtMs: number | undefined; try { nextRunAtMs = job.enabled ? computeJobNextRunAtMs(job, computeBaseMs) : undefined; @@ -102,6 +127,7 @@ function repairNextRunsAfterExternalReload(params: { jobId: job.id, scheduleChanged, enabledChanged, + coldLoadRepairCandidate, computeBaseMs, nextRunAtMs: job.state.nextRunAtMs, }, diff --git a/src/plugins/loader.test.ts b/src/plugins/loader.test.ts index 8af6cf927d4..d81f8c18326 100644 --- a/src/plugins/loader.test.ts +++ b/src/plugins/loader.test.ts @@ -1,7 +1,6 @@ import fs from "node:fs"; import os from "node:os"; import path from "node:path"; -import { pathToFileURL } from "node:url"; import { afterAll, afterEach, describe, expect, it, vi } from "vitest"; import { emitDiagnosticEvent, resetDiagnosticEventsForTest } from "../infra/diagnostic-events.js"; import { withEnv } from "../test-utils/env.js"; @@ -3701,10 +3700,9 @@ export const syntheticRuntimeMarker = { "utf-8", ); const copiedChannelRuntime = path.join(copiedExtensionRoot, "src", "channel.runtime.ts"); - const jitiBaseUrl = pathToFileURL(jitiBaseFile).href; const createJiti = await getCreateJiti(); - const withoutAlias = createJiti(jitiBaseUrl, { + const withoutAlias = createJiti(jitiBaseFile, { ...__testing.buildPluginLoaderJitiOptions({}), tryNative: false, }); @@ -3712,7 +3710,7 @@ export const syntheticRuntimeMarker = { // follow the same path instead of the async import helper. expect(() => withoutAlias(copiedChannelRuntime)).toThrow(); - const withAlias = createJiti(jitiBaseUrl, { + const withAlias = createJiti(jitiBaseFile, { ...__testing.buildPluginLoaderJitiOptions({ "openclaw/plugin-sdk/infra-runtime": copiedChannelRuntimeShim, }), From a7217bfcf7b5a387bffd6f873b97474343737208 Mon Sep 17 00:00:00 2001 From: create Date: Fri, 20 Mar 2026 19:34:36 +0800 Subject: [PATCH 12/15] fix(cron): tighten cold-load repair and wake fallback heuristics --- ...external-reload-schedule-recompute.test.ts | 87 +++++++++++++++++++ src/cron/service.issue-regressions.test.ts | 47 ++++++++++ src/cron/service/jobs.ts | 22 ++++- src/cron/service/store.ts | 13 ++- 4 files changed, 157 insertions(+), 12 deletions(-) diff --git a/src/cron/service.external-reload-schedule-recompute.test.ts b/src/cron/service.external-reload-schedule-recompute.test.ts index bb25f792773..16af1888e78 100644 --- a/src/cron/service.external-reload-schedule-recompute.test.ts +++ b/src/cron/service.external-reload-schedule-recompute.test.ts @@ -179,6 +179,93 @@ describe("forceReload repairs externally changed cron schedules", () => { expect(persistedJob?.state?.nextRunAtMs).toBe(Date.parse("2026-03-19T12:02:00.000Z")); }); + it("repairs overdue stale nextRunAtMs on first load when edit happened later", async () => { + const store = await makeStorePath(); + const nowMs = Date.parse("2026-03-19T12:10:00.000Z"); + const staleDueNextRunAtMs = Date.parse("2026-03-19T12:00:00.000Z"); + const editedAtMs = Date.parse("2026-03-19T12:05:00.000Z"); + const jobId = "external-schedule-change-cold-load-overdue"; + + const createJob = (): CronJob => ({ + id: jobId, + name: "external schedule cold load overdue repair", + enabled: true, + createdAtMs: Date.parse("2026-03-18T00:30:00.000Z"), + updatedAtMs: editedAtMs, + schedule: { kind: "cron", expr: "30 23 * * *", tz: "UTC", staggerMs: 0 }, + sessionTarget: "main", + wakeMode: "next-heartbeat", + payload: { kind: "systemEvent", text: "tick" }, + state: { + nextRunAtMs: staleDueNextRunAtMs, + }, + }); + + await writeCronStoreSnapshot({ + storePath: store.storePath, + jobs: [createJob()], + }); + + const state = createCronServiceState({ + cronEnabled: true, + storePath: store.storePath, + log: noopLogger, + nowMs: () => nowMs, + enqueueSystemEvent: vi.fn(), + requestHeartbeatNow: vi.fn(), + runIsolatedAgentJob: vi.fn(async () => ({ status: "ok" as const })), + }); + + await ensureLoaded(state, { skipRecompute: true }); + + const reloaded = state.store?.jobs.find((job) => job.id === jobId); + expect(reloaded?.state.nextRunAtMs).toBe(Date.parse("2026-03-19T23:30:00.000Z")); + }); + + it("repairs cold-load stale nextRunAtMs even when consecutiveErrors is set", async () => { + const store = await makeStorePath(); + const nowMs = Date.parse("2026-03-19T12:10:00.000Z"); + const editedAtMs = Date.parse("2026-03-19T12:01:00.000Z"); + const jobId = "external-schedule-change-cold-load-consecutive-errors"; + const staleNextRunAtMs = Date.parse("2026-03-20T00:30:00.000Z"); + + const createJob = (): CronJob => ({ + id: jobId, + name: "external schedule cold load with consecutiveErrors", + enabled: true, + createdAtMs: Date.parse("2026-03-18T00:30:00.000Z"), + updatedAtMs: editedAtMs, + schedule: { kind: "cron", expr: "* * * * *", tz: "UTC", staggerMs: 0 }, + sessionTarget: "main", + wakeMode: "next-heartbeat", + payload: { kind: "systemEvent", text: "tick" }, + state: { + nextRunAtMs: staleNextRunAtMs, + consecutiveErrors: 2, + }, + }); + + await writeCronStoreSnapshot({ + storePath: store.storePath, + jobs: [createJob()], + }); + + const state = createCronServiceState({ + cronEnabled: true, + storePath: store.storePath, + log: noopLogger, + nowMs: () => nowMs, + enqueueSystemEvent: vi.fn(), + requestHeartbeatNow: vi.fn(), + runIsolatedAgentJob: vi.fn(async () => ({ status: "ok" as const })), + }); + + await ensureLoaded(state, { skipRecompute: true }); + + const reloaded = state.store?.jobs.find((job) => job.id === jobId); + expect(reloaded?.state.nextRunAtMs).toBe(Date.parse("2026-03-19T12:02:00.000Z")); + }); + it("records schedule errors instead of aborting reload when an external edit is invalid", async () => { const store = await makeStorePath(); const nowMs = Date.parse("2026-03-19T01:44:00.000Z"); diff --git a/src/cron/service.issue-regressions.test.ts b/src/cron/service.issue-regressions.test.ts index 3a1f291943c..baf73b71192 100644 --- a/src/cron/service.issue-regressions.test.ts +++ b/src/cron/service.issue-regressions.test.ts @@ -1835,6 +1835,53 @@ describe("Cron issue regressions", () => { expect(nextWakeAtMs(state)).toBe(endedAt + 2_000); }); + it("does not arm 2s wake for malformed every schedules with non-repairable missing nextRun", () => { + const nowMs = Date.parse("2026-03-02T12:20:00.000Z"); + const job = createIsolatedRegressionJob({ + id: "missing-nextrun-malformed-every", + name: "missing-nextrun-malformed-every", + scheduledAt: nowMs, + schedule: { kind: "every", everyMs: Number.NaN, anchorMs: nowMs - 60_000 }, + payload: { kind: "agentTurn", message: "ping" }, + state: { + nextRunAtMs: undefined, + }, + }); + const state = createRunningCronServiceState({ + storePath: "/tmp/cron-missing-nextrun-malformed-every.json", + log: noopLogger as never, + nowMs: () => nowMs, + jobs: [job], + }); + + expect(nextWakeAtMs(state)).toBeUndefined(); + }); + + it("does not arm 2s wake for exhausted one-shot jobs with missing nextRun", () => { + const nowMs = Date.parse("2026-03-02T12:22:00.000Z"); + const atMs = nowMs - 60_000; + const job = createIsolatedRegressionJob({ + id: "missing-nextrun-exhausted-at", + name: "missing-nextrun-exhausted-at", + scheduledAt: nowMs, + schedule: { kind: "at", at: new Date(atMs).toISOString() }, + payload: { kind: "agentTurn", message: "ping" }, + state: { + nextRunAtMs: undefined, + lastStatus: "ok", + lastRunAtMs: nowMs - 30_000, + }, + }); + const state = createRunningCronServiceState({ + storePath: "/tmp/cron-missing-nextrun-exhausted-at.json", + log: noopLogger as never, + nowMs: () => nowMs, + jobs: [job], + }); + + expect(nextWakeAtMs(state)).toBeUndefined(); + }); + it("force run preserves 'every' anchor while recording manual lastRunAtMs", () => { const nowMs = Date.now(); const everyMs = 24 * 60 * 60 * 1_000; diff --git a/src/cron/service/jobs.ts b/src/cron/service/jobs.ts index 6ba919bcd1c..3c087c94331 100644 --- a/src/cron/service/jobs.ts +++ b/src/cron/service/jobs.ts @@ -528,7 +528,8 @@ export function recomputeNextRunsForMaintenance( export function nextWakeAtMs(state: CronServiceState) { const jobs = state.store?.jobs ?? []; let minEnabledNextRunAtMs: number | undefined; - let hasEnabledMissingNextRun = false; + let hasEnabledRepairableMissingNextRun = false; + const nowMs = state.deps.nowMs(); for (const job of jobs) { if (!job.enabled) { @@ -542,14 +543,27 @@ export function nextWakeAtMs(state: CronServiceState) { : Math.min(minEnabledNextRunAtMs, nextRunAtMs); continue; } - hasEnabledMissingNextRun = true; + // Only wake for missing nextRun values that can be repaired by recompute. + // Non-repairable malformed schedules (e.g. invalid every/at payloads) + // should not keep the scheduler in a perpetual 2s poll loop. + if ((job.state.scheduleErrorCount ?? 0) > 0) { + hasEnabledRepairableMissingNextRun = true; + continue; + } + try { + if (computeJobNextRunAtMs(job, nowMs) !== undefined) { + hasEnabledRepairableMissingNextRun = true; + } + } catch { + hasEnabledRepairableMissingNextRun = true; + } } - if (!hasEnabledMissingNextRun) { + if (!hasEnabledRepairableMissingNextRun) { return minEnabledNextRunAtMs; } - const wakeForMissingNextRunAtMs = state.deps.nowMs() + MISSING_NEXT_RUN_WAKE_MS; + const wakeForMissingNextRunAtMs = nowMs + MISSING_NEXT_RUN_WAKE_MS; return minEnabledNextRunAtMs === undefined ? wakeForMissingNextRunAtMs : Math.min(minEnabledNextRunAtMs, wakeForMissingNextRunAtMs); diff --git a/src/cron/service/store.ts b/src/cron/service/store.ts index 408a9186fae..04ac09fa33b 100644 --- a/src/cron/service/store.ts +++ b/src/cron/service/store.ts @@ -39,9 +39,6 @@ function shouldRepairColdLoadNextRun(params: { job: CronJob; nowMs: number }): b if (!job.enabled) { return false; } - if ((job.state.consecutiveErrors ?? 0) > 0) { - return false; - } if (typeof job.updatedAtMs !== "number" || !Number.isFinite(job.updatedAtMs)) { return false; } @@ -49,13 +46,13 @@ function shouldRepairColdLoadNextRun(params: { job: CronJob; nowMs: number }): b if (typeof persistedNextRunAtMs !== "number" || !Number.isFinite(persistedNextRunAtMs)) { return false; } - // Cold-load repair is only for stale future schedules edited while the - // gateway was offline. Already-due timestamps should be preserved so they can - // execute on the next tick. + const normalizedUpdatedAtMs = Math.max(0, Math.floor(job.updatedAtMs)); + // If a schedule edit happened after the persisted slot, the slot is stale + // even when it is already overdue at startup. if (persistedNextRunAtMs <= nowMs) { - return false; + return normalizedUpdatedAtMs > persistedNextRunAtMs; } - const computeBaseMs = Math.min(nowMs, Math.max(0, Math.floor(job.updatedAtMs))); + const computeBaseMs = Math.min(nowMs, normalizedUpdatedAtMs); return computeBaseMs < persistedNextRunAtMs; } From 83844adec2a9bca688a579dd6b6dc0c4cc10f8cc Mon Sep 17 00:00:00 2001 From: create Date: Fri, 20 Mar 2026 20:49:42 +0800 Subject: [PATCH 13/15] fix(cron): avoid cold-load backoff regression and restore loader jiti base --- src/cron/service/store.ts | 59 ++++++++++++++++++++++++++++++++++++++ src/plugins/loader.test.ts | 6 ++-- 2 files changed, 63 insertions(+), 2 deletions(-) diff --git a/src/cron/service/store.ts b/src/cron/service/store.ts index 04ac09fa33b..8c2a6cb0fde 100644 --- a/src/cron/service/store.ts +++ b/src/cron/service/store.ts @@ -1,4 +1,6 @@ import fs from "node:fs"; +import { parseAbsoluteTimeMs } from "../parse.js"; +import { coerceFiniteScheduleNumber } from "../schedule.js"; import { normalizeStoredCronJobs } from "../store-migration.js"; import { loadCronStore, saveCronStore } from "../store.js"; import type { CronJob } from "../types.js"; @@ -34,6 +36,22 @@ function resolveExternalRepairComputeBaseMs(params: { return Math.min(nowMs, normalizedReloadedUpdatedAtMs); } +function hasPendingErrorBackoff(job: CronJob, nowMs: number): boolean { + const nextRunAtMs = job.state.nextRunAtMs; + if (typeof nextRunAtMs !== "number" || !Number.isFinite(nextRunAtMs) || nextRunAtMs <= nowMs) { + return false; + } + const consecutiveErrors = job.state.consecutiveErrors; + if ( + typeof consecutiveErrors !== "number" || + !Number.isFinite(consecutiveErrors) || + consecutiveErrors <= 0 + ) { + return false; + } + return job.state.lastStatus === "error"; +} + function shouldRepairColdLoadNextRun(params: { job: CronJob; nowMs: number }): boolean { const { job, nowMs } = params; if (!job.enabled) { @@ -52,10 +70,40 @@ function shouldRepairColdLoadNextRun(params: { job: CronJob; nowMs: number }): b if (persistedNextRunAtMs <= nowMs) { return normalizedUpdatedAtMs > persistedNextRunAtMs; } + if (hasPendingErrorBackoff(job, nowMs)) { + return false; + } const computeBaseMs = Math.min(nowMs, normalizedUpdatedAtMs); return computeBaseMs < persistedNextRunAtMs; } +function parseAtScheduleMs(schedule: Extract): number | null { + const legacy = schedule as { at?: string; atMs?: number | string }; + if (typeof legacy.atMs === "number" && Number.isFinite(legacy.atMs) && legacy.atMs > 0) { + return legacy.atMs; + } + if (typeof legacy.atMs === "string") { + return parseAbsoluteTimeMs(legacy.atMs); + } + if (typeof legacy.at === "string") { + return parseAbsoluteTimeMs(legacy.at); + } + return null; +} + +function shouldTreatUndefinedNextRunAsScheduleError(job: CronJob): boolean { + if (!job.enabled) { + return false; + } + if (job.schedule.kind === "every") { + return coerceFiniteScheduleNumber(job.schedule.everyMs) === undefined; + } + if (job.schedule.kind === "at") { + return parseAtScheduleMs(job.schedule) === null; + } + return false; +} + function repairNextRunsAfterExternalReload(params: { state: CronServiceState; previousJobs: CronJob[] | undefined; @@ -99,6 +147,17 @@ function repairNextRunsAfterExternalReload(params: { let nextRunAtMs: number | undefined; try { nextRunAtMs = job.enabled ? computeJobNextRunAtMs(job, computeBaseMs) : undefined; + if (nextRunAtMs === undefined && shouldTreatUndefinedNextRunAsScheduleError(job)) { + const err = + job.schedule.kind === "every" + ? new Error("invalid every schedule: everyMs must be a finite number") + : new Error("invalid at schedule: at must be a valid absolute timestamp"); + if (recordScheduleComputeError({ state, job, err })) { + changed = true; + } + skipRecomputeJobIds.add(job.id); + continue; + } if (job.state.scheduleErrorCount !== undefined) { job.state.scheduleErrorCount = undefined; changed = true; diff --git a/src/plugins/loader.test.ts b/src/plugins/loader.test.ts index d81f8c18326..8af6cf927d4 100644 --- a/src/plugins/loader.test.ts +++ b/src/plugins/loader.test.ts @@ -1,6 +1,7 @@ import fs from "node:fs"; import os from "node:os"; import path from "node:path"; +import { pathToFileURL } from "node:url"; import { afterAll, afterEach, describe, expect, it, vi } from "vitest"; import { emitDiagnosticEvent, resetDiagnosticEventsForTest } from "../infra/diagnostic-events.js"; import { withEnv } from "../test-utils/env.js"; @@ -3700,9 +3701,10 @@ export const syntheticRuntimeMarker = { "utf-8", ); const copiedChannelRuntime = path.join(copiedExtensionRoot, "src", "channel.runtime.ts"); + const jitiBaseUrl = pathToFileURL(jitiBaseFile).href; const createJiti = await getCreateJiti(); - const withoutAlias = createJiti(jitiBaseFile, { + const withoutAlias = createJiti(jitiBaseUrl, { ...__testing.buildPluginLoaderJitiOptions({}), tryNative: false, }); @@ -3710,7 +3712,7 @@ export const syntheticRuntimeMarker = { // follow the same path instead of the async import helper. expect(() => withoutAlias(copiedChannelRuntime)).toThrow(); - const withAlias = createJiti(jitiBaseFile, { + const withAlias = createJiti(jitiBaseUrl, { ...__testing.buildPluginLoaderJitiOptions({ "openclaw/plugin-sdk/infra-runtime": copiedChannelRuntimeShim, }), From 3cb6564ac2400ffb55690f77a9cc06853f3cbba1 Mon Sep 17 00:00:00 2001 From: create Date: Sat, 21 Mar 2026 02:09:07 +0800 Subject: [PATCH 14/15] fix(cron): preserve reload repair error state --- src/cron/service.issue-regressions.test.ts | 36 +++++++++++++- .../jobs.schedule-error-isolation.test.ts | 40 +++++++++++++++ src/cron/service/jobs.ts | 49 +++++++++++++++++-- src/cron/service/ops.ts | 4 +- src/cron/service/store.ts | 36 +++----------- 5 files changed, 127 insertions(+), 38 deletions(-) diff --git a/src/cron/service.issue-regressions.test.ts b/src/cron/service.issue-regressions.test.ts index baf73b71192..e0ca0f32816 100644 --- a/src/cron/service.issue-regressions.test.ts +++ b/src/cron/service.issue-regressions.test.ts @@ -27,7 +27,7 @@ import { nextWakeAtMs, recomputeNextRunsForMaintenance, } from "./service/jobs.js"; -import { enqueueRun, run } from "./service/ops.js"; +import { enqueueRun, list, run, status } from "./service/ops.js"; import { createCronServiceState, type CronEvent } from "./service/state.js"; import { DEFAULT_JOB_TIMEOUT_MS, @@ -1857,6 +1857,40 @@ describe("Cron issue regressions", () => { expect(nextWakeAtMs(state)).toBeUndefined(); }); + it("does not consume reload skip markers during read-only status/list maintenance", async () => { + const nowMs = Date.parse("2026-03-02T12:25:00.000Z"); + const job = createIsolatedRegressionJob({ + id: "read-only-skip-marker", + name: "read-only-skip-marker", + scheduledAt: nowMs, + schedule: { kind: "cron", expr: "0 7 * * *", tz: "Invalid/Timezone" }, + payload: { kind: "agentTurn", message: "ping" }, + state: { + nextRunAtMs: undefined, + scheduleErrorCount: 1, + lastError: "schedule error: previous", + }, + }); + const state = createRunningCronServiceState({ + storePath: "/tmp/cron-read-only-skip-marker.json", + log: noopLogger as never, + nowMs: () => nowMs, + jobs: [job], + }); + state.skipNextReloadRepairRecomputeJobIds = new Set([job.id]); + + const currentStatus = await status(state); + expect(currentStatus.nextWakeAtMs).toBe(nowMs + 2_000); + expect(state.skipNextReloadRepairRecomputeJobIds.has(job.id)).toBe(true); + + const jobs = await list(state, { includeDisabled: true }); + expect(jobs).toHaveLength(1); + expect(state.skipNextReloadRepairRecomputeJobIds.has(job.id)).toBe(true); + + recomputeNextRunsForMaintenance(state); + expect(state.skipNextReloadRepairRecomputeJobIds.has(job.id)).toBe(false); + }); + it("does not arm 2s wake for exhausted one-shot jobs with missing nextRun", () => { const nowMs = Date.parse("2026-03-02T12:22:00.000Z"); const atMs = nowMs - 60_000; diff --git a/src/cron/service/jobs.schedule-error-isolation.test.ts b/src/cron/service/jobs.schedule-error-isolation.test.ts index 3604c4ab45b..cd55e4aa56d 100644 --- a/src/cron/service/jobs.schedule-error-isolation.test.ts +++ b/src/cron/service/jobs.schedule-error-isolation.test.ts @@ -206,4 +206,44 @@ describe("cron schedule error isolation", () => { expect(badJob.state.lastError).not.toContain("Cannot read properties of undefined"); expect(badJob.state.scheduleErrorCount).toBe(1); }); + + it("keeps malformed every schedules on the schedule-error path", () => { + const badJob = createJob({ + id: "bad-every", + name: "Bad Every", + schedule: { kind: "every", everyMs: Number.NaN }, + state: { + nextRunAtMs: undefined, + scheduleErrorCount: 1, + lastError: "schedule error: previous", + }, + }); + const state = createMockState([badJob]); + + recomputeNextRuns(state); + + expect(badJob.state.nextRunAtMs).toBeUndefined(); + expect(badJob.state.scheduleErrorCount).toBe(2); + expect(badJob.state.lastError).toContain("invalid every schedule"); + }); + + it("keeps malformed at schedules on the schedule-error path", () => { + const badJob = createJob({ + id: "bad-at", + name: "Bad At", + schedule: { kind: "at", at: "not-a-timestamp" }, + state: { + nextRunAtMs: undefined, + scheduleErrorCount: 1, + lastError: "schedule error: previous", + }, + }); + const state = createMockState([badJob]); + + recomputeNextRuns(state); + + expect(badJob.state.nextRunAtMs).toBeUndefined(); + expect(badJob.state.scheduleErrorCount).toBe(2); + expect(badJob.state.lastError).toContain("invalid at schedule"); + }); }); diff --git a/src/cron/service/jobs.ts b/src/cron/service/jobs.ts index 3c087c94331..2f1f6f4d2ab 100644 --- a/src/cron/service/jobs.ts +++ b/src/cron/service/jobs.ts @@ -308,6 +308,22 @@ export function computeJobNextRunAtMs(job: CronJob, nowMs: number): number | und return isFiniteTimestamp(next) ? next : undefined; } +export function shouldTreatUndefinedNextRunAsScheduleError(job: CronJob): boolean { + if (!job.enabled) { + return false; + } + if (job.schedule.kind === "every") { + return coerceFiniteScheduleNumber(job.schedule.everyMs) === undefined; + } + if (job.schedule.kind === "at") { + return parseAbsoluteTimeMs(job.schedule.at) === null; + } + if (job.schedule.kind === "cron") { + return job.schedule.expr.trim().length === 0; + } + return false; +} + export function computeJobPreviousRunAtMs(job: CronJob, nowMs: number): number | undefined { if (!job.enabled || job.schedule.kind !== "cron") { return undefined; @@ -442,13 +458,35 @@ export function consumeSkipNextReloadRepairRecompute( return true; } -function recomputeJobNextRunAtMs(params: { state: CronServiceState; job: CronJob; nowMs: number }) { - if (consumeSkipNextReloadRepairRecompute(params.state, params.job.id)) { +function recomputeJobNextRunAtMs(params: { + state: CronServiceState; + job: CronJob; + nowMs: number; + consumeReloadRepairSkip?: boolean; +}) { + const consumeReloadRepairSkip = params.consumeReloadRepairSkip ?? true; + if ( + consumeReloadRepairSkip + ? consumeSkipNextReloadRepairRecompute(params.state, params.job.id) + : hasSkipNextReloadRepairRecompute(params.state, params.job.id) + ) { return false; } let changed = false; try { const newNext = computeJobNextRunAtMs(params.job, params.nowMs); + if (newNext === undefined && shouldTreatUndefinedNextRunAsScheduleError(params.job)) { + const err = + params.job.schedule.kind === "every" + ? new Error("invalid every schedule: everyMs must be a finite number") + : params.job.schedule.kind === "at" + ? new Error("invalid at schedule: at must be a valid absolute timestamp") + : new Error("invalid cron schedule: expr is required"); + if (recordScheduleComputeError({ state: params.state, job: params.job, err })) { + changed = true; + } + return changed; + } if (params.job.state.nextRunAtMs !== newNext) { params.job.state.nextRunAtMs = newNext; changed = true; @@ -492,16 +530,17 @@ export function recomputeNextRuns(state: CronServiceState): boolean { */ export function recomputeNextRunsForMaintenance( state: CronServiceState, - opts?: { recomputeExpired?: boolean; nowMs?: number }, + opts?: { recomputeExpired?: boolean; nowMs?: number; consumeReloadRepairSkip?: boolean }, ): boolean { const recomputeExpired = opts?.recomputeExpired ?? false; + const consumeReloadRepairSkip = opts?.consumeReloadRepairSkip ?? true; return walkSchedulableJobs( state, ({ job, nowMs: now }) => { let changed = false; if (!isFiniteTimestamp(job.state.nextRunAtMs)) { // Missing or invalid nextRunAtMs is always repaired. - if (recomputeJobNextRunAtMs({ state, job, nowMs: now })) { + if (recomputeJobNextRunAtMs({ state, job, nowMs: now, consumeReloadRepairSkip })) { changed = true; } } else if ( @@ -514,7 +553,7 @@ export function recomputeNextRunsForMaintenance( const lastRun = job.state.lastRunAtMs; const alreadyExecutedSlot = isFiniteTimestamp(lastRun) && lastRun >= job.state.nextRunAtMs; if (alreadyExecutedSlot) { - if (recomputeJobNextRunAtMs({ state, job, nowMs: now })) { + if (recomputeJobNextRunAtMs({ state, job, nowMs: now, consumeReloadRepairSkip })) { changed = true; } } diff --git a/src/cron/service/ops.ts b/src/cron/service/ops.ts index bc120b8258d..9ae602933ce 100644 --- a/src/cron/service/ops.ts +++ b/src/cron/service/ops.ts @@ -115,7 +115,7 @@ async function ensureLoadedForRead(state: CronServiceState) { } // Use the maintenance-only version so that read-only operations never // advance a past-due nextRunAtMs without executing the job (#16156). - const changed = recomputeNextRunsForMaintenance(state); + const changed = recomputeNextRunsForMaintenance(state, { consumeReloadRepairSkip: false }); if (changed) { await persist(state); } @@ -413,7 +413,7 @@ async function inspectManualRunPreflight( // Normalize job tick state (clears stale runningAtMs markers) before // checking if already running, so a stale marker from a crashed Phase-1 // persist does not block manual triggers for up to STUCK_RUN_MS (#17554). - recomputeNextRunsForMaintenance(state); + recomputeNextRunsForMaintenance(state, { consumeReloadRepairSkip: false }); const job = findJobOrThrow(state, id); if (typeof job.state.runningAtMs === "number") { return { ok: true, ran: false, reason: "already-running" as const }; diff --git a/src/cron/service/store.ts b/src/cron/service/store.ts index 8c2a6cb0fde..2c4f0ecf022 100644 --- a/src/cron/service/store.ts +++ b/src/cron/service/store.ts @@ -1,10 +1,13 @@ import fs from "node:fs"; -import { parseAbsoluteTimeMs } from "../parse.js"; -import { coerceFiniteScheduleNumber } from "../schedule.js"; import { normalizeStoredCronJobs } from "../store-migration.js"; import { loadCronStore, saveCronStore } from "../store.js"; import type { CronJob } from "../types.js"; -import { computeJobNextRunAtMs, recordScheduleComputeError, recomputeNextRuns } from "./jobs.js"; +import { + computeJobNextRunAtMs, + recordScheduleComputeError, + recomputeNextRuns, + shouldTreatUndefinedNextRunAsScheduleError, +} from "./jobs.js"; import { schedulesEqual } from "./schedule-equality.js"; import type { CronServiceState } from "./state.js"; @@ -77,33 +80,6 @@ function shouldRepairColdLoadNextRun(params: { job: CronJob; nowMs: number }): b return computeBaseMs < persistedNextRunAtMs; } -function parseAtScheduleMs(schedule: Extract): number | null { - const legacy = schedule as { at?: string; atMs?: number | string }; - if (typeof legacy.atMs === "number" && Number.isFinite(legacy.atMs) && legacy.atMs > 0) { - return legacy.atMs; - } - if (typeof legacy.atMs === "string") { - return parseAbsoluteTimeMs(legacy.atMs); - } - if (typeof legacy.at === "string") { - return parseAbsoluteTimeMs(legacy.at); - } - return null; -} - -function shouldTreatUndefinedNextRunAsScheduleError(job: CronJob): boolean { - if (!job.enabled) { - return false; - } - if (job.schedule.kind === "every") { - return coerceFiniteScheduleNumber(job.schedule.everyMs) === undefined; - } - if (job.schedule.kind === "at") { - return parseAtScheduleMs(job.schedule) === null; - } - return false; -} - function repairNextRunsAfterExternalReload(params: { state: CronServiceState; previousJobs: CronJob[] | undefined; From 089077d985f9cf4de1ce66511bd8dff7f7d451b8 Mon Sep 17 00:00:00 2001 From: create Date: Sat, 21 Mar 2026 13:48:35 +0800 Subject: [PATCH 15/15] fix(cron): narrow external reload repair scope --- ...external-reload-schedule-recompute.test.ts | 702 ++++-------------- src/cron/service/ops.ts | 1 + src/cron/service/store.ts | 74 +- 3 files changed, 155 insertions(+), 622 deletions(-) diff --git a/src/cron/service.external-reload-schedule-recompute.test.ts b/src/cron/service.external-reload-schedule-recompute.test.ts index 16af1888e78..cec8c03e311 100644 --- a/src/cron/service.external-reload-schedule-recompute.test.ts +++ b/src/cron/service.external-reload-schedule-recompute.test.ts @@ -1,8 +1,8 @@ import fs from "node:fs/promises"; import { describe, expect, it, vi } from "vitest"; import { setupCronServiceSuite, writeCronStoreSnapshot } from "./service.test-harness.js"; -import { recomputeNextRuns, recomputeNextRunsForMaintenance } from "./service/jobs.js"; -import { remove, run } from "./service/ops.js"; +import { recomputeNextRuns } from "./service/jobs.js"; +import { run } from "./service/ops.js"; import { createCronServiceState } from "./service/state.js"; import { ensureLoaded } from "./service/store.js"; import type { CronJob } from "./types.js"; @@ -12,35 +12,48 @@ const { logger: noopLogger, makeStorePath } = setupCronServiceSuite({ baseTimeIso: "2026-03-19T01:44:00.000Z", }); -describe("forceReload repairs externally changed cron schedules", () => { - it("recomputes nextRunAtMs when jobs.json changes schedule outside cron.update", async () => { +function createCronJob(params: { + id: string; + expr: string; + updatedAtMs?: number; + enabled?: boolean; + nextRunAtMs?: number; + scheduleErrorCount?: number; + lastError?: string; + lastStatus?: CronJob["state"]["lastStatus"]; + runningAtMs?: number; +}): CronJob { + return { + id: params.id, + name: params.id, + enabled: params.enabled ?? true, + createdAtMs: Date.parse("2026-03-18T00:30:00.000Z"), + updatedAtMs: params.updatedAtMs ?? Date.parse("2026-03-19T01:44:00.000Z"), + schedule: { kind: "cron", expr: params.expr, tz: "Asia/Shanghai", staggerMs: 0 }, + sessionTarget: "main", + wakeMode: "next-heartbeat", + payload: { kind: "systemEvent", text: "tick" }, + state: { + nextRunAtMs: params.nextRunAtMs, + scheduleErrorCount: params.scheduleErrorCount, + lastError: params.lastError, + lastStatus: params.lastStatus, + runningAtMs: params.runningAtMs, + }, + }; +} + +describe("forceReload repairs externally changed schedules", () => { + it("recomputes nextRunAtMs when jobs.json changes a cron schedule outside cron.update", async () => { const store = await makeStorePath(); const nowMs = Date.parse("2026-03-19T01:44:00.000Z"); const jobId = "external-schedule-change"; const staleNextRunAtMs = Date.parse("2026-03-20T00:30:00.000Z"); const correctedNextRunAtMs = Date.parse("2026-03-19T12:30:00.000Z"); - const createJob = (expr: string): CronJob => ({ - id: jobId, - name: "external schedule change", - enabled: true, - createdAtMs: Date.parse("2026-03-18T00:30:00.000Z"), - updatedAtMs: Date.parse("2026-03-19T01:44:00.000Z"), - schedule: { kind: "cron", expr, tz: "Asia/Shanghai", staggerMs: 0 }, - sessionTarget: "main", - wakeMode: "next-heartbeat", - payload: { kind: "systemEvent", text: "tick" }, - state: { - nextRunAtMs: staleNextRunAtMs, - lastRunAtMs: Date.parse("2026-03-19T00:30:00.000Z"), - lastStatus: "ok", - lastRunStatus: "ok", - }, - }); - await writeCronStoreSnapshot({ storePath: store.storePath, - jobs: [createJob("30 8 * * *")], + jobs: [createCronJob({ id: jobId, expr: "30 8 * * *", nextRunAtMs: staleNextRunAtMs })], }); const state = createCronServiceState({ @@ -54,56 +67,44 @@ describe("forceReload repairs externally changed cron schedules", () => { }); await ensureLoaded(state, { skipRecompute: true }); - expect(state.store?.jobs[0]?.state.nextRunAtMs).toBe(staleNextRunAtMs); await writeCronStoreSnapshot({ storePath: store.storePath, - jobs: [createJob("30 8,20 * * *")], + jobs: [createCronJob({ id: jobId, expr: "30 8,20 * * *", nextRunAtMs: staleNextRunAtMs })], }); await ensureLoaded(state, { forceReload: true, skipRecompute: true }); const reloaded = state.store?.jobs.find((job) => job.id === jobId); - expect(reloaded?.schedule).toEqual({ - kind: "cron", - expr: "30 8,20 * * *", - tz: "Asia/Shanghai", - staggerMs: 0, - }); expect(reloaded?.state.nextRunAtMs).toBe(correctedNextRunAtMs); const persisted = JSON.parse(await fs.readFile(store.storePath, "utf8")) as { jobs?: Array<{ id: string; state?: { nextRunAtMs?: number } }>; }; - const persistedJob = persisted.jobs?.find((job) => job.id === jobId); - expect(persistedJob?.state?.nextRunAtMs).toBe(correctedNextRunAtMs); + expect(persisted.jobs?.find((job) => job.id === jobId)?.state?.nextRunAtMs).toBe( + correctedNextRunAtMs, + ); }); it("recomputes from updatedAtMs so delayed reload keeps newly earlier slots due", async () => { const store = await makeStorePath(); const nowMs = Date.parse("2026-03-19T12:10:00.000Z"); - const initialUpdatedAtMs = Date.parse("2026-03-19T12:00:00.000Z"); - const editedAtMs = Date.parse("2026-03-19T12:01:00.000Z"); const jobId = "external-schedule-change-delayed-observe"; + const staleNextRunAtMs = Date.parse("2026-03-20T00:30:00.000Z"); - const createJob = (params: { expr: string; updatedAtMs: number }): CronJob => ({ - id: jobId, - name: "external schedule delayed observe", - enabled: true, - createdAtMs: Date.parse("2026-03-18T00:30:00.000Z"), - updatedAtMs: params.updatedAtMs, - schedule: { kind: "cron", expr: params.expr, tz: "UTC", staggerMs: 0 }, - sessionTarget: "main", - wakeMode: "next-heartbeat", - payload: { kind: "systemEvent", text: "tick" }, - state: { - nextRunAtMs: Date.parse("2026-03-20T00:30:00.000Z"), - }, - }); + const createJob = (params: { expr: string; updatedAtMs: number }) => + createCronJob({ + id: jobId, + expr: params.expr, + updatedAtMs: params.updatedAtMs, + nextRunAtMs: staleNextRunAtMs, + }); await writeCronStoreSnapshot({ storePath: store.storePath, - jobs: [createJob({ expr: "30 23 * * *", updatedAtMs: initialUpdatedAtMs })], + jobs: [ + createJob({ expr: "30 23 * * *", updatedAtMs: Date.parse("2026-03-19T12:00:00.000Z") }), + ], }); const state = createCronServiceState({ @@ -120,150 +121,12 @@ describe("forceReload repairs externally changed cron schedules", () => { await writeCronStoreSnapshot({ storePath: store.storePath, - jobs: [createJob({ expr: "* * * * *", updatedAtMs: editedAtMs })], + jobs: [createJob({ expr: "* * * * *", updatedAtMs: Date.parse("2026-03-19T12:01:00.000Z") })], }); await ensureLoaded(state, { forceReload: true, skipRecompute: true }); - const reloaded = state.store?.jobs.find((job) => job.id === jobId); - expect(reloaded?.state.nextRunAtMs).toBeLessThan(nowMs); - expect(reloaded?.state.nextRunAtMs).toBe(Date.parse("2026-03-19T12:02:00.000Z")); - }); - - it("repairs stale nextRunAtMs on first load after restart", async () => { - const store = await makeStorePath(); - const nowMs = Date.parse("2026-03-19T12:10:00.000Z"); - const editedAtMs = Date.parse("2026-03-19T12:01:00.000Z"); - const jobId = "external-schedule-change-cold-load"; - const staleNextRunAtMs = Date.parse("2026-03-20T00:30:00.000Z"); - - const createJob = (): CronJob => ({ - id: jobId, - name: "external schedule cold load repair", - enabled: true, - createdAtMs: Date.parse("2026-03-18T00:30:00.000Z"), - updatedAtMs: editedAtMs, - schedule: { kind: "cron", expr: "* * * * *", tz: "UTC", staggerMs: 0 }, - sessionTarget: "main", - wakeMode: "next-heartbeat", - payload: { kind: "systemEvent", text: "tick" }, - state: { - nextRunAtMs: staleNextRunAtMs, - }, - }); - - await writeCronStoreSnapshot({ - storePath: store.storePath, - jobs: [createJob()], - }); - - const state = createCronServiceState({ - cronEnabled: true, - storePath: store.storePath, - log: noopLogger, - nowMs: () => nowMs, - enqueueSystemEvent: vi.fn(), - requestHeartbeatNow: vi.fn(), - runIsolatedAgentJob: vi.fn(async () => ({ status: "ok" as const })), - }); - - await ensureLoaded(state, { skipRecompute: true }); - - const reloaded = state.store?.jobs.find((job) => job.id === jobId); - expect(reloaded?.state.nextRunAtMs).toBe(Date.parse("2026-03-19T12:02:00.000Z")); - - const persisted = JSON.parse(await fs.readFile(store.storePath, "utf8")) as { - jobs?: Array<{ id: string; state?: { nextRunAtMs?: number } }>; - }; - const persistedJob = persisted.jobs?.find((job) => job.id === jobId); - expect(persistedJob?.state?.nextRunAtMs).toBe(Date.parse("2026-03-19T12:02:00.000Z")); - }); - - it("repairs overdue stale nextRunAtMs on first load when edit happened later", async () => { - const store = await makeStorePath(); - const nowMs = Date.parse("2026-03-19T12:10:00.000Z"); - const staleDueNextRunAtMs = Date.parse("2026-03-19T12:00:00.000Z"); - const editedAtMs = Date.parse("2026-03-19T12:05:00.000Z"); - const jobId = "external-schedule-change-cold-load-overdue"; - - const createJob = (): CronJob => ({ - id: jobId, - name: "external schedule cold load overdue repair", - enabled: true, - createdAtMs: Date.parse("2026-03-18T00:30:00.000Z"), - updatedAtMs: editedAtMs, - schedule: { kind: "cron", expr: "30 23 * * *", tz: "UTC", staggerMs: 0 }, - sessionTarget: "main", - wakeMode: "next-heartbeat", - payload: { kind: "systemEvent", text: "tick" }, - state: { - nextRunAtMs: staleDueNextRunAtMs, - }, - }); - - await writeCronStoreSnapshot({ - storePath: store.storePath, - jobs: [createJob()], - }); - - const state = createCronServiceState({ - cronEnabled: true, - storePath: store.storePath, - log: noopLogger, - nowMs: () => nowMs, - enqueueSystemEvent: vi.fn(), - requestHeartbeatNow: vi.fn(), - runIsolatedAgentJob: vi.fn(async () => ({ status: "ok" as const })), - }); - - await ensureLoaded(state, { skipRecompute: true }); - - const reloaded = state.store?.jobs.find((job) => job.id === jobId); - expect(reloaded?.state.nextRunAtMs).toBe(Date.parse("2026-03-19T23:30:00.000Z")); - }); - - it("repairs cold-load stale nextRunAtMs even when consecutiveErrors is set", async () => { - const store = await makeStorePath(); - const nowMs = Date.parse("2026-03-19T12:10:00.000Z"); - const editedAtMs = Date.parse("2026-03-19T12:01:00.000Z"); - const jobId = "external-schedule-change-cold-load-consecutive-errors"; - const staleNextRunAtMs = Date.parse("2026-03-20T00:30:00.000Z"); - - const createJob = (): CronJob => ({ - id: jobId, - name: "external schedule cold load with consecutiveErrors", - enabled: true, - createdAtMs: Date.parse("2026-03-18T00:30:00.000Z"), - updatedAtMs: editedAtMs, - schedule: { kind: "cron", expr: "* * * * *", tz: "UTC", staggerMs: 0 }, - sessionTarget: "main", - wakeMode: "next-heartbeat", - payload: { kind: "systemEvent", text: "tick" }, - state: { - nextRunAtMs: staleNextRunAtMs, - consecutiveErrors: 2, - }, - }); - - await writeCronStoreSnapshot({ - storePath: store.storePath, - jobs: [createJob()], - }); - - const state = createCronServiceState({ - cronEnabled: true, - storePath: store.storePath, - log: noopLogger, - nowMs: () => nowMs, - enqueueSystemEvent: vi.fn(), - requestHeartbeatNow: vi.fn(), - runIsolatedAgentJob: vi.fn(async () => ({ status: "ok" as const })), - }); - - await ensureLoaded(state, { skipRecompute: true }); - - const reloaded = state.store?.jobs.find((job) => job.id === jobId); - expect(reloaded?.state.nextRunAtMs).toBe(Date.parse("2026-03-19T12:02:00.000Z")); + expect(state.store?.jobs[0]?.state.nextRunAtMs).toBe(Date.parse("2026-03-19T12:02:00.000Z")); }); it("records schedule errors instead of aborting reload when an external edit is invalid", async () => { @@ -271,27 +134,15 @@ describe("forceReload repairs externally changed cron schedules", () => { const nowMs = Date.parse("2026-03-19T01:44:00.000Z"); const jobId = "external-invalid-schedule"; - const createJob = (expr: string): CronJob => ({ - id: jobId, - name: "external invalid schedule", - enabled: true, - createdAtMs: Date.parse("2026-03-18T00:30:00.000Z"), - updatedAtMs: Date.parse("2026-03-19T01:44:00.000Z"), - schedule: { kind: "cron", expr, tz: "Asia/Shanghai", staggerMs: 0 }, - sessionTarget: "main", - wakeMode: "next-heartbeat", - payload: { kind: "systemEvent", text: "tick" }, - state: { - nextRunAtMs: Date.parse("2026-03-20T00:30:00.000Z"), - lastRunAtMs: Date.parse("2026-03-19T00:30:00.000Z"), - lastStatus: "ok", - lastRunStatus: "ok", - }, - }); - await writeCronStoreSnapshot({ storePath: store.storePath, - jobs: [createJob("30 8 * * *")], + jobs: [ + createCronJob({ + id: jobId, + expr: "30 8 * * *", + nextRunAtMs: Date.parse("2026-03-20T00:30:00.000Z"), + }), + ], }); const state = createCronServiceState({ @@ -308,53 +159,39 @@ describe("forceReload repairs externally changed cron schedules", () => { await writeCronStoreSnapshot({ storePath: store.storePath, - jobs: [createJob("not a valid cron")], + jobs: [ + createCronJob({ + id: jobId, + expr: "not a valid cron", + nextRunAtMs: Date.parse("2026-03-20T00:30:00.000Z"), + }), + ], }); await expect( ensureLoaded(state, { forceReload: true, skipRecompute: true }), ).resolves.toBeUndefined(); - const reloaded = state.store?.jobs.find((job) => job.id === jobId); + const reloaded = state.store?.jobs[0]; expect(reloaded?.state.nextRunAtMs).toBeUndefined(); expect(reloaded?.state.scheduleErrorCount).toBe(1); expect(reloaded?.state.lastError).toMatch(/^schedule error:/); - - const persisted = JSON.parse(await fs.readFile(store.storePath, "utf8")) as { - jobs?: Array<{ - id: string; - state?: { scheduleErrorCount?: number; lastError?: string; nextRunAtMs?: number }; - }>; - }; - const persistedJob = persisted.jobs?.find((job) => job.id === jobId); - expect(persistedJob?.state?.scheduleErrorCount).toBe(1); - expect(persistedJob?.state?.lastError).toMatch(/^schedule error:/); - expect(persistedJob?.state?.nextRunAtMs).toBeUndefined(); }); - it("does not double-count a reload schedule error during the immediate full recompute", async () => { + it("does not double-count a reload schedule error during the immediate recompute", async () => { const store = await makeStorePath(); const nowMs = Date.parse("2026-03-19T01:44:00.000Z"); const jobId = "external-invalid-schedule-full-recompute"; - const createJob = (expr: string): CronJob => ({ - id: jobId, - name: "external invalid schedule full recompute", - enabled: true, - createdAtMs: Date.parse("2026-03-18T00:30:00.000Z"), - updatedAtMs: Date.parse("2026-03-19T01:44:00.000Z"), - schedule: { kind: "cron", expr, tz: "Asia/Shanghai", staggerMs: 0 }, - sessionTarget: "main", - wakeMode: "next-heartbeat", - payload: { kind: "systemEvent", text: "tick" }, - state: { - nextRunAtMs: Date.parse("2026-03-20T00:30:00.000Z"), - }, - }); - await writeCronStoreSnapshot({ storePath: store.storePath, - jobs: [createJob("30 8 * * *")], + jobs: [ + createCronJob({ + id: jobId, + expr: "30 8 * * *", + nextRunAtMs: Date.parse("2026-03-20T00:30:00.000Z"), + }), + ], }); const state = createCronServiceState({ @@ -371,7 +208,13 @@ describe("forceReload repairs externally changed cron schedules", () => { await writeCronStoreSnapshot({ storePath: store.storePath, - jobs: [createJob("not a valid cron")], + jobs: [ + createCronJob({ + id: jobId, + expr: "not a valid cron", + nextRunAtMs: Date.parse("2026-03-20T00:30:00.000Z"), + }), + ], }); await ensureLoaded(state, { forceReload: true, skipRecompute: true }); @@ -379,116 +222,6 @@ describe("forceReload repairs externally changed cron schedules", () => { recomputeNextRuns(state); expect(state.store?.jobs[0]?.state.scheduleErrorCount).toBe(1); - - recomputeNextRuns(state); - expect(state.store?.jobs[0]?.state.scheduleErrorCount).toBe(2); - }); - - it("does not double-count a reload schedule error during immediate maintenance recompute", async () => { - const store = await makeStorePath(); - const nowMs = Date.parse("2026-03-19T01:44:00.000Z"); - const jobId = "external-invalid-schedule-maintenance"; - - const createJob = (expr: string): CronJob => ({ - id: jobId, - name: "external invalid schedule maintenance", - enabled: true, - createdAtMs: Date.parse("2026-03-18T00:30:00.000Z"), - updatedAtMs: Date.parse("2026-03-19T01:44:00.000Z"), - schedule: { kind: "cron", expr, tz: "Asia/Shanghai", staggerMs: 0 }, - sessionTarget: "main", - wakeMode: "next-heartbeat", - payload: { kind: "systemEvent", text: "tick" }, - state: { - nextRunAtMs: Date.parse("2026-03-20T00:30:00.000Z"), - }, - }); - - await writeCronStoreSnapshot({ - storePath: store.storePath, - jobs: [createJob("30 8 * * *")], - }); - - const state = createCronServiceState({ - cronEnabled: true, - storePath: store.storePath, - log: noopLogger, - nowMs: () => nowMs, - enqueueSystemEvent: vi.fn(), - requestHeartbeatNow: vi.fn(), - runIsolatedAgentJob: vi.fn(async () => ({ status: "ok" as const })), - }); - - await ensureLoaded(state, { skipRecompute: true }); - - await writeCronStoreSnapshot({ - storePath: store.storePath, - jobs: [createJob("not a valid cron")], - }); - - await ensureLoaded(state, { forceReload: true, skipRecompute: true }); - expect(state.store?.jobs[0]?.state.scheduleErrorCount).toBe(1); - - recomputeNextRunsForMaintenance(state); - expect(state.store?.jobs[0]?.state.scheduleErrorCount).toBe(1); - - recomputeNextRunsForMaintenance(state); - expect(state.store?.jobs[0]?.state.scheduleErrorCount).toBe(2); - }); - - it("preserves the one-shot skip across a second forceReload before maintenance recompute", async () => { - const store = await makeStorePath(); - const nowMs = Date.parse("2026-03-19T01:44:00.000Z"); - const jobId = "external-invalid-schedule-second-reload"; - - const createJob = (expr: string): CronJob => ({ - id: jobId, - name: "external invalid schedule second reload", - enabled: true, - createdAtMs: Date.parse("2026-03-18T00:30:00.000Z"), - updatedAtMs: Date.parse("2026-03-19T01:44:00.000Z"), - schedule: { kind: "cron", expr, tz: "Asia/Shanghai", staggerMs: 0 }, - sessionTarget: "main", - wakeMode: "next-heartbeat", - payload: { kind: "systemEvent", text: "tick" }, - state: { - nextRunAtMs: Date.parse("2026-03-20T00:30:00.000Z"), - }, - }); - - await writeCronStoreSnapshot({ - storePath: store.storePath, - jobs: [createJob("30 8 * * *")], - }); - - const state = createCronServiceState({ - cronEnabled: true, - storePath: store.storePath, - log: noopLogger, - nowMs: () => nowMs, - enqueueSystemEvent: vi.fn(), - requestHeartbeatNow: vi.fn(), - runIsolatedAgentJob: vi.fn(async () => ({ status: "ok" as const })), - }); - - await ensureLoaded(state, { skipRecompute: true }); - - await writeCronStoreSnapshot({ - storePath: store.storePath, - jobs: [createJob("not a valid cron")], - }); - - await ensureLoaded(state, { forceReload: true, skipRecompute: true }); - expect(state.store?.jobs[0]?.state.scheduleErrorCount).toBe(1); - - await ensureLoaded(state, { forceReload: true, skipRecompute: true }); - expect(state.store?.jobs[0]?.state.scheduleErrorCount).toBe(1); - - recomputeNextRunsForMaintenance(state); - expect(state.store?.jobs[0]?.state.scheduleErrorCount).toBe(1); - - recomputeNextRunsForMaintenance(state); - expect(state.store?.jobs[0]?.state.scheduleErrorCount).toBe(2); }); it("keeps forceReload repairs when manual-run snapshot is merged back", async () => { @@ -497,49 +230,26 @@ describe("forceReload repairs externally changed cron schedules", () => { const jobId = "manual-run-reload-merge"; const staleNextRunAtMs = Date.parse("2026-03-19T23:30:00.000Z"); - const createJob = (params: { - expr: string; - enabled: boolean; - nextRunAtMs?: number; - lastStatus?: CronJob["state"]["lastStatus"]; - }): CronJob => ({ - id: jobId, - name: "manual run reload merge", - enabled: params.enabled, - createdAtMs: Date.parse("2026-03-18T00:30:00.000Z"), - updatedAtMs: Date.parse("2026-03-19T01:44:00.000Z"), - schedule: { kind: "cron", expr: params.expr, tz: "Asia/Shanghai", staggerMs: 0 }, - sessionTarget: "isolated", - wakeMode: "next-heartbeat", - payload: { kind: "agentTurn", message: "tick" }, - state: { + const createJob = (params: { expr: string; enabled: boolean; nextRunAtMs?: number }) => ({ + ...createCronJob({ + id: jobId, + expr: params.expr, + enabled: params.enabled, nextRunAtMs: params.nextRunAtMs, - lastStatus: params.lastStatus, - }, + }), + sessionTarget: "isolated" as const, + payload: { kind: "agentTurn", message: "tick" } as const, }); await writeCronStoreSnapshot({ storePath: store.storePath, - jobs: [ - createJob({ - expr: "30 23 * * *", - enabled: true, - nextRunAtMs: staleNextRunAtMs, - }), - ], + jobs: [createJob({ expr: "30 23 * * *", enabled: true, nextRunAtMs: staleNextRunAtMs })], }); const runIsolatedAgentJob = vi.fn(async () => { await writeCronStoreSnapshot({ storePath: store.storePath, - jobs: [ - createJob({ - expr: "30 8 * * *", - enabled: false, - nextRunAtMs: staleNextRunAtMs, - lastStatus: "error", - }), - ], + jobs: [createJob({ expr: "30 8 * * *", enabled: false, nextRunAtMs: staleNextRunAtMs })], }); nowMs += 500; return { status: "ok" as const, summary: "done" }; @@ -555,33 +265,12 @@ describe("forceReload repairs externally changed cron schedules", () => { runIsolatedAgentJob, }); - const result = await run(state, jobId, "force"); - expect(result).toEqual({ ok: true, ran: true }); - expect(runIsolatedAgentJob).toHaveBeenCalledTimes(1); + expect(await run(state, jobId, "force")).toEqual({ ok: true, ran: true }); - const merged = state.store?.jobs.find((job) => job.id === jobId); - expect(merged?.schedule).toEqual({ - kind: "cron", - expr: "30 8 * * *", - tz: "Asia/Shanghai", - staggerMs: 0, - }); + const merged = state.store?.jobs[0]; expect(merged?.enabled).toBe(false); expect(merged?.state.nextRunAtMs).toBeUndefined(); expect(merged?.state.lastStatus).toBe("ok"); - expect(merged?.state.lastRunAtMs).toBeDefined(); - - const persisted = JSON.parse(await fs.readFile(store.storePath, "utf8")) as { - jobs?: Array<{ - id: string; - enabled?: boolean; - state?: { nextRunAtMs?: number; lastStatus?: string }; - }>; - }; - const persistedJob = persisted.jobs?.find((job) => job.id === jobId); - expect(persistedJob?.enabled).toBe(false); - expect(persistedJob?.state?.nextRunAtMs).toBeUndefined(); - expect(persistedJob?.state?.lastStatus).toBe("ok"); }); it("keeps scheduleErrorCount cleared when external reload fixes schedule during force-run", async () => { @@ -590,51 +279,27 @@ describe("forceReload repairs externally changed cron schedules", () => { const jobId = "manual-run-reload-clears-schedule-error-count"; const staleNextRunAtMs = Date.parse("2026-03-19T23:30:00.000Z"); - const createJob = (params: { - expr: string; - scheduleErrorCount?: number; - lastError?: string; - nextRunAtMs?: number; - }): CronJob => ({ - id: jobId, - name: "manual run reload clears schedule error count", - enabled: true, - createdAtMs: Date.parse("2026-03-18T00:30:00.000Z"), - updatedAtMs: Date.parse("2026-03-19T01:44:00.000Z"), - schedule: { kind: "cron", expr: params.expr, tz: "Asia/Shanghai", staggerMs: 0 }, - sessionTarget: "isolated", - wakeMode: "next-heartbeat", - payload: { kind: "agentTurn", message: "tick" }, - state: { - nextRunAtMs: params.nextRunAtMs, - scheduleErrorCount: params.scheduleErrorCount, - lastError: params.lastError, - }, + const createJob = (expr: string) => ({ + ...createCronJob({ + id: jobId, + expr, + nextRunAtMs: staleNextRunAtMs, + scheduleErrorCount: 2, + lastError: "schedule error: invalid expression", + }), + sessionTarget: "isolated" as const, + payload: { kind: "agentTurn", message: "tick" } as const, }); await writeCronStoreSnapshot({ storePath: store.storePath, - jobs: [ - createJob({ - expr: "30 23 * * *", - nextRunAtMs: staleNextRunAtMs, - scheduleErrorCount: 2, - lastError: "cron: invalid expression", - }), - ], + jobs: [createJob("30 23 * * *")], }); const runIsolatedAgentJob = vi.fn(async () => { await writeCronStoreSnapshot({ storePath: store.storePath, - jobs: [ - createJob({ - expr: "30 8 * * *", - nextRunAtMs: staleNextRunAtMs, - scheduleErrorCount: 2, - lastError: "cron: invalid expression", - }), - ], + jobs: [createJob("30 8 * * *")], }); nowMs += 500; return { status: "ok" as const, summary: "done" }; @@ -650,110 +315,26 @@ describe("forceReload repairs externally changed cron schedules", () => { runIsolatedAgentJob, }); - const result = await run(state, jobId, "force"); - expect(result).toEqual({ ok: true, ran: true }); - expect(runIsolatedAgentJob).toHaveBeenCalledTimes(1); - - const merged = state.store?.jobs.find((job) => job.id === jobId); - expect(merged?.schedule).toEqual({ - kind: "cron", - expr: "30 8 * * *", - tz: "Asia/Shanghai", - staggerMs: 0, - }); - expect(merged?.state.scheduleErrorCount).toBeUndefined(); - - const persisted = JSON.parse(await fs.readFile(store.storePath, "utf8")) as { - jobs?: Array<{ - id: string; - state?: { scheduleErrorCount?: number }; - }>; - }; - const persistedJob = persisted.jobs?.find((job) => job.id === jobId); - expect(persistedJob?.state?.scheduleErrorCount).toBeUndefined(); + expect(await run(state, jobId, "force")).toEqual({ ok: true, ran: true }); + expect(state.store?.jobs[0]?.state.scheduleErrorCount).toBeUndefined(); }); - it("keeps one-shot terminal disable state when manual force-run reloads unchanged store", async () => { + it("preserves runningAtMs when an external reload comes from a stale file snapshot", async () => { const store = await makeStorePath(); - let nowMs = Date.parse("2026-03-19T01:44:00.000Z"); - const jobId = "manual-run-at-terminal-state"; - const scheduledAtMs = nowMs + 60_000; - - const createJob = (): CronJob => ({ - id: jobId, - name: "manual run at terminal state", - enabled: true, - createdAtMs: Date.parse("2026-03-18T00:30:00.000Z"), - updatedAtMs: Date.parse("2026-03-19T01:44:00.000Z"), - schedule: { kind: "at", at: new Date(scheduledAtMs).toISOString() }, - sessionTarget: "isolated", - wakeMode: "next-heartbeat", - payload: { kind: "agentTurn", message: "tick" }, - state: { - nextRunAtMs: scheduledAtMs, - }, - }); + const nowMs = Date.parse("2026-03-19T12:10:00.000Z"); + const jobId = "external-running-marker"; + const runningAtMs = Date.parse("2026-03-19T12:00:00.000Z"); await writeCronStoreSnapshot({ storePath: store.storePath, - jobs: [createJob()], - }); - - const state = createCronServiceState({ - cronEnabled: true, - storePath: store.storePath, - log: noopLogger, - nowMs: () => nowMs, - enqueueSystemEvent: vi.fn(), - requestHeartbeatNow: vi.fn(), - runIsolatedAgentJob: vi.fn(async () => { - nowMs += 500; - return { status: "ok" as const, summary: "done" }; - }), - }); - - const result = await run(state, jobId, "force"); - expect(result).toEqual({ ok: true, ran: true }); - - const merged = state.store?.jobs.find((job) => job.id === jobId); - expect(merged?.enabled).toBe(false); - expect(merged?.state.nextRunAtMs).toBeUndefined(); - expect(merged?.state.lastStatus).toBe("ok"); - - const persisted = JSON.parse(await fs.readFile(store.storePath, "utf8")) as { - jobs?: Array<{ - id: string; - enabled?: boolean; - state?: { nextRunAtMs?: number; lastStatus?: string }; - }>; - }; - const persistedJob = persisted.jobs?.find((job) => job.id === jobId); - expect(persistedJob?.enabled).toBe(false); - expect(persistedJob?.state?.nextRunAtMs).toBeUndefined(); - expect(persistedJob?.state?.lastStatus).toBe("ok"); - }); - - it("clears reload-repair skip markers when a job is removed before same-id rebuild", async () => { - const store = await makeStorePath(); - const nowMs = Date.parse("2026-03-19T01:44:00.000Z"); - const jobId = "external-reload-skip-marker-id-reuse"; - - const createJob = (expr: string): CronJob => ({ - id: jobId, - name: "external reload skip marker id reuse", - enabled: true, - createdAtMs: Date.parse("2026-03-18T00:30:00.000Z"), - updatedAtMs: Date.parse("2026-03-19T01:44:00.000Z"), - schedule: { kind: "cron", expr, tz: "UTC", staggerMs: 0 }, - sessionTarget: "main", - wakeMode: "next-heartbeat", - payload: { kind: "systemEvent", text: "tick" }, - state: {}, - }); - - await writeCronStoreSnapshot({ - storePath: store.storePath, - jobs: [createJob("*/15 * * * *")], + jobs: [ + createCronJob({ + id: jobId, + expr: "30 23 * * *", + nextRunAtMs: Date.parse("2026-03-20T00:30:00.000Z"), + runningAtMs, + }), + ], }); const state = createCronServiceState({ @@ -770,36 +351,31 @@ describe("forceReload repairs externally changed cron schedules", () => { await writeCronStoreSnapshot({ storePath: store.storePath, - jobs: [createJob("not a valid cron")], + jobs: [ + createCronJob({ + id: jobId, + expr: "* * * * *", + updatedAtMs: Date.parse("2026-03-19T12:01:00.000Z"), + nextRunAtMs: Date.parse("2026-03-20T00:30:00.000Z"), + }), + ], }); + await ensureLoaded(state, { forceReload: true, skipRecompute: true }); - expect(state.skipNextReloadRepairRecomputeJobIds.has(jobId)).toBe(true); - const removed = await remove(state, jobId); - expect(removed).toEqual({ ok: true, removed: true }); - expect(state.skipNextReloadRepairRecomputeJobIds.has(jobId)).toBe(false); - - await writeCronStoreSnapshot({ - storePath: store.storePath, - jobs: [createJob("*/5 * * * *")], - }); - await ensureLoaded(state, { forceReload: true, skipRecompute: true }); - recomputeNextRunsForMaintenance(state); - - const rebuilt = state.store?.jobs.find((job) => job.id === jobId); - expect(typeof rebuilt?.state.nextRunAtMs).toBe("number"); - expect(Number.isFinite(rebuilt?.state.nextRunAtMs)).toBe(true); - expect(rebuilt?.state.scheduleErrorCount).toBeUndefined(); + const reloaded = state.store?.jobs[0]; + expect(reloaded?.state.runningAtMs).toBe(runningAtMs); + expect(reloaded?.state.nextRunAtMs).toBe(Date.parse("2026-03-19T12:02:00.000Z")); }); - it("recomputes nextRunAtMs when external every schedule changes", async () => { + it("recomputes nextRunAtMs when an external every schedule changes", async () => { const store = await makeStorePath(); const nowMs = Date.parse("2026-03-19T01:44:00.000Z"); const jobId = "external-every-schedule-change"; const createEveryJob = (everyMs: number): CronJob => ({ id: jobId, - name: "external every schedule change", + name: jobId, enabled: true, createdAtMs: Date.parse("2026-03-18T00:00:00.000Z"), updatedAtMs: Date.parse("2026-03-19T01:44:00.000Z"), @@ -840,12 +416,6 @@ describe("forceReload repairs externally changed cron schedules", () => { await ensureLoaded(state, { forceReload: true, skipRecompute: true }); - const reloaded = state.store?.jobs.find((job) => job.id === jobId); - expect(reloaded?.schedule).toEqual({ - kind: "every", - everyMs: 60_000, - anchorMs: Date.parse("2026-03-19T00:00:00.000Z"), - }); - expect(reloaded?.state.nextRunAtMs).toBe(Date.parse("2026-03-19T01:44:00.000Z")); + expect(state.store?.jobs[0]?.state.nextRunAtMs).toBe(Date.parse("2026-03-19T01:44:00.000Z")); }); }); diff --git a/src/cron/service/ops.ts b/src/cron/service/ops.ts index 9ae602933ce..95e4f84df87 100644 --- a/src/cron/service/ops.ts +++ b/src/cron/service/ops.ts @@ -334,6 +334,7 @@ export async function update(state: CronServiceState, id: string, patch: CronJob job.state.nextRunAtMs = undefined; job.state.runningAtMs = undefined; } + state.skipNextReloadRepairRecomputeJobIds.delete(id); } else if (job.enabled) { // Non-schedule edits should not mutate other jobs, but still repair a // missing/corrupt nextRunAtMs for the updated job. diff --git a/src/cron/service/store.ts b/src/cron/service/store.ts index 2c4f0ecf022..c7e2452844e 100644 --- a/src/cron/service/store.ts +++ b/src/cron/service/store.ts @@ -39,54 +39,13 @@ function resolveExternalRepairComputeBaseMs(params: { return Math.min(nowMs, normalizedReloadedUpdatedAtMs); } -function hasPendingErrorBackoff(job: CronJob, nowMs: number): boolean { - const nextRunAtMs = job.state.nextRunAtMs; - if (typeof nextRunAtMs !== "number" || !Number.isFinite(nextRunAtMs) || nextRunAtMs <= nowMs) { - return false; - } - const consecutiveErrors = job.state.consecutiveErrors; - if ( - typeof consecutiveErrors !== "number" || - !Number.isFinite(consecutiveErrors) || - consecutiveErrors <= 0 - ) { - return false; - } - return job.state.lastStatus === "error"; -} - -function shouldRepairColdLoadNextRun(params: { job: CronJob; nowMs: number }): boolean { - const { job, nowMs } = params; - if (!job.enabled) { - return false; - } - if (typeof job.updatedAtMs !== "number" || !Number.isFinite(job.updatedAtMs)) { - return false; - } - const persistedNextRunAtMs = job.state.nextRunAtMs; - if (typeof persistedNextRunAtMs !== "number" || !Number.isFinite(persistedNextRunAtMs)) { - return false; - } - const normalizedUpdatedAtMs = Math.max(0, Math.floor(job.updatedAtMs)); - // If a schedule edit happened after the persisted slot, the slot is stale - // even when it is already overdue at startup. - if (persistedNextRunAtMs <= nowMs) { - return normalizedUpdatedAtMs > persistedNextRunAtMs; - } - if (hasPendingErrorBackoff(job, nowMs)) { - return false; - } - const computeBaseMs = Math.min(nowMs, normalizedUpdatedAtMs); - return computeBaseMs < persistedNextRunAtMs; -} - function repairNextRunsAfterExternalReload(params: { state: CronServiceState; previousJobs: CronJob[] | undefined; }): boolean { const { state, previousJobs } = params; const skipRecomputeJobIds = state.skipNextReloadRepairRecomputeJobIds; - if (!state.store) { + if (!state.store || previousJobs === undefined) { return false; } if (skipRecomputeJobIds.size > 0) { @@ -98,28 +57,32 @@ function repairNextRunsAfterExternalReload(params: { } } - const previousById = new Map((previousJobs ?? []).map((job) => [job.id, job])); + const previousById = new Map(previousJobs.map((job) => [job.id, job])); const now = state.deps.nowMs(); let changed = false; for (const job of state.store.jobs) { const previous = previousById.get(job.id); - const coldLoadRepairCandidate = - previousJobs === undefined && shouldRepairColdLoadNextRun({ job, nowMs: now }); - const scheduleChanged = previous ? !schedulesEqual(previous.schedule, job.schedule) : false; - const enabledChanged = previous ? previous.enabled !== job.enabled : false; - if (!scheduleChanged && !enabledChanged && !coldLoadRepairCandidate) { + if (!previous) { + continue; + } + if (typeof previous.state.runningAtMs === "number" && job.state.runningAtMs === undefined) { + job.state.runningAtMs = previous.state.runningAtMs; + changed = true; + } + + const scheduleChanged = !schedulesEqual(previous.schedule, job.schedule); + const enabledChanged = previous.enabled !== job.enabled; + if (!scheduleChanged && !enabledChanged) { continue; } skipRecomputeJobIds.delete(job.id); - const computeBaseMs = coldLoadRepairCandidate - ? Math.min(now, Math.max(0, Math.floor(job.updatedAtMs))) - : resolveExternalRepairComputeBaseMs({ - nowMs: now, - reloadedUpdatedAtMs: job.updatedAtMs, - previousUpdatedAtMs: previous?.updatedAtMs ?? Number.NEGATIVE_INFINITY, - }); + const computeBaseMs = resolveExternalRepairComputeBaseMs({ + nowMs: now, + reloadedUpdatedAtMs: job.updatedAtMs, + previousUpdatedAtMs: previous.updatedAtMs, + }); let nextRunAtMs: number | undefined; try { nextRunAtMs = job.enabled ? computeJobNextRunAtMs(job, computeBaseMs) : undefined; @@ -159,7 +122,6 @@ function repairNextRunsAfterExternalReload(params: { jobId: job.id, scheduleChanged, enabledChanged, - coldLoadRepairCandidate, computeBaseMs, nextRunAtMs: job.state.nextRunAtMs, },