diff --git a/CHANGELOG.md b/CHANGELOG.md index 884d10b98da..9c7dd6524d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ Docs: https://docs.openclaw.ai ### Fixes - Agents/Ollama: preserve unsafe integer tool-call arguments as exact strings during NDJSON parsing, preventing large numeric IDs from being rounded before tool execution. (#23170) Thanks @BestJoester. +- Cron/Gateway: keep `cron.list` and `cron.status` responsive during startup catch-up by avoiding a long-held cron lock while missed jobs execute. (#23106) Thanks @jayleekr. - Gateway/Pairing: treat operator.admin pairing tokens as satisfying operator.write requests so legacy devices stop looping through scope-upgrade prompts introduced in 2026.2.19. (#23125, #23006) Thanks @vignesh07. - Memory/QMD: add optional `memory.qmd.mcporter` search routing so QMD `query/search/vsearch` can run through mcporter keep-alive flows (including multi-collection paths) to reduce cold starts, while keeping searches on agent-scoped QMD state for consistent recall. (#19617) Thanks @nicole-luxe and @vignesh07. - Chat/UI: strip inline reply/audio directive tags (`[[reply_to_current]]`, `[[reply_to:]]`, `[[audio_as_voice]]`) from displayed chat history, live chat event output, and session preview snippets so control tags no longer leak into user-visible surfaces. diff --git a/src/cron/service.read-ops-nonblocking.test.ts b/src/cron/service.read-ops-nonblocking.test.ts index 8faac781a98..a749af09931 100644 --- a/src/cron/service.read-ops-nonblocking.test.ts +++ b/src/cron/service.read-ops-nonblocking.test.ts @@ -11,6 +11,22 @@ const noopLogger = { error: vi.fn(), }; +async function withTimeout(promise: Promise, timeoutMs: number, label: string): Promise { + let timeout: NodeJS.Timeout | undefined; + try { + return await Promise.race([ + promise, + new Promise((_resolve, reject) => { + timeout = setTimeout(() => reject(new Error(`${label} timed out`)), timeoutMs); + }), + ]); + } finally { + if (timeout) { + clearTimeout(timeout); + } + } +} + async function makeStorePath() { const dir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-cron-")); return { @@ -135,4 +151,86 @@ describe("CronService read ops while job is running", () => { await store.cleanup(); } }); + + it("keeps list and status responsive during startup catch-up runs", async () => { + const store = await makeStorePath(); + const enqueueSystemEvent = vi.fn(); + const requestHeartbeatNow = vi.fn(); + const nowMs = Date.parse("2025-12-13T00:00:00.000Z"); + + await fs.mkdir(path.dirname(store.storePath), { recursive: true }); + await fs.writeFile( + store.storePath, + JSON.stringify({ + version: 1, + jobs: [ + { + id: "startup-catchup", + name: "startup catch-up", + enabled: true, + createdAtMs: nowMs - 86_400_000, + updatedAtMs: nowMs - 86_400_000, + schedule: { kind: "at", at: new Date(nowMs - 60_000).toISOString() }, + sessionTarget: "isolated", + wakeMode: "next-heartbeat", + payload: { kind: "agentTurn", message: "startup replay" }, + delivery: { mode: "none" }, + state: { nextRunAtMs: nowMs - 60_000 }, + }, + ], + }), + "utf-8", + ); + + let resolveRun: + | ((value: { status: "ok" | "error" | "skipped"; summary?: string; error?: string }) => void) + | undefined; + let resolveRunStarted: (() => void) | undefined; + const runStarted = new Promise((resolve) => { + resolveRunStarted = resolve; + }); + + const runIsolatedAgentJob = vi.fn(async () => { + resolveRunStarted?.(); + return await new Promise<{ + status: "ok" | "error" | "skipped"; + summary?: string; + error?: string; + }>((resolve) => { + resolveRun = resolve; + }); + }); + + const cron = new CronService({ + storePath: store.storePath, + cronEnabled: true, + log: noopLogger, + nowMs: () => nowMs, + enqueueSystemEvent, + requestHeartbeatNow, + runIsolatedAgentJob, + }); + + try { + const startPromise = cron.start(); + await runStarted; + + await expect( + withTimeout(cron.list({ includeDisabled: true }), 300, "cron.list during startup"), + ).resolves.toBeTypeOf("object"); + await expect(withTimeout(cron.status(), 300, "cron.status during startup")).resolves.toEqual( + expect.objectContaining({ enabled: true, storePath: store.storePath }), + ); + + resolveRun?.({ status: "ok", summary: "done" }); + await startPromise; + + const jobs = await cron.list({ includeDisabled: true }); + expect(jobs[0]?.state.lastStatus).toBe("ok"); + expect(jobs[0]?.state.runningAtMs).toBeUndefined(); + } finally { + cron.stop(); + await store.cleanup(); + } + }); }); diff --git a/src/cron/service/ops.ts b/src/cron/service/ops.ts index d1b9794ff21..9c71ae4f1d9 100644 --- a/src/cron/service/ops.ts +++ b/src/cron/service/ops.ts @@ -28,14 +28,15 @@ async function ensureLoadedForRead(state: CronServiceState) { } export async function start(state: CronServiceState) { + if (!state.deps.cronEnabled) { + state.deps.log.info({ enabled: false }, "cron: disabled"); + return; + } + + const startupInterruptedJobIds = new Set(); await locked(state, async () => { - if (!state.deps.cronEnabled) { - state.deps.log.info({ enabled: false }, "cron: disabled"); - return; - } await ensureLoaded(state, { skipRecompute: true }); const jobs = state.store?.jobs ?? []; - const startupInterruptedJobIds = new Set(); for (const job of jobs) { if (typeof job.state.runningAtMs === "number") { state.deps.log.warn( @@ -46,7 +47,13 @@ export async function start(state: CronServiceState) { startupInterruptedJobIds.add(job.id); } } - await runMissedJobs(state, { skipJobIds: startupInterruptedJobIds }); + await persist(state); + }); + + await runMissedJobs(state, { skipJobIds: startupInterruptedJobIds }); + + await locked(state, async () => { + await ensureLoaded(state, { forceReload: true, skipRecompute: true }); recomputeNextRuns(state); await persist(state); armTimer(state); diff --git a/src/cron/service/timer.ts b/src/cron/service/timer.ts index 96b6ccad2e1..1b6b108dab1 100644 --- a/src/cron/service/timer.ts +++ b/src/cron/service/timer.ts @@ -458,22 +458,97 @@ export async function runMissedJobs( state: CronServiceState, opts?: { skipJobIds?: ReadonlySet }, ) { - if (!state.store) { - return; - } - const now = state.deps.nowMs(); - const skipJobIds = opts?.skipJobIds; - const missed = collectRunnableJobs(state, now, { skipJobIds, skipAtIfAlreadyRan: true }); - - if (missed.length > 0) { + const startupCandidates = await locked(state, async () => { + await ensureLoaded(state, { skipRecompute: true }); + if (!state.store) { + return [] as Array<{ jobId: string; job: CronJob }>; + } + const now = state.deps.nowMs(); + const skipJobIds = opts?.skipJobIds; + const missed = collectRunnableJobs(state, now, { skipJobIds, skipAtIfAlreadyRan: true }); + if (missed.length === 0) { + return [] as Array<{ jobId: string; job: CronJob }>; + } state.deps.log.info( { count: missed.length, jobIds: missed.map((j) => j.id) }, "cron: running missed jobs after restart", ); for (const job of missed) { - await executeJob(state, job, now, { forced: false }); + job.state.runningAtMs = now; + job.state.lastError = undefined; + } + await persist(state); + return missed.map((job) => ({ jobId: job.id, job })); + }); + + if (startupCandidates.length === 0) { + return; + } + + const outcomes: Array = []; + for (const candidate of startupCandidates) { + const startedAt = state.deps.nowMs(); + emit(state, { jobId: candidate.job.id, action: "started", runAtMs: startedAt }); + try { + const result = await executeJobCore(state, candidate.job); + outcomes.push({ + jobId: candidate.jobId, + status: result.status, + error: result.error, + summary: result.summary, + delivered: result.delivered, + sessionId: result.sessionId, + sessionKey: result.sessionKey, + model: result.model, + provider: result.provider, + usage: result.usage, + startedAt, + endedAt: state.deps.nowMs(), + }); + } catch (err) { + outcomes.push({ + jobId: candidate.jobId, + status: "error", + error: String(err), + startedAt, + endedAt: state.deps.nowMs(), + }); } } + + await locked(state, async () => { + await ensureLoaded(state, { forceReload: true, skipRecompute: true }); + if (!state.store) { + return; + } + + for (const result of outcomes) { + const job = state.store.jobs.find((entry) => entry.id === result.jobId); + if (!job) { + continue; + } + const shouldDelete = applyJobResult(state, job, { + status: result.status, + error: result.error, + delivered: result.delivered, + startedAt: result.startedAt, + endedAt: result.endedAt, + }); + + emitJobFinished(state, job, result, result.startedAt); + + if (shouldDelete) { + state.store.jobs = state.store.jobs.filter((entry) => entry.id !== job.id); + emit(state, { jobId: job.id, action: "removed" }); + } + } + + // Preserve any new past-due nextRunAtMs values that became due while + // startup catch-up was running. They should execute on a future tick + // instead of being silently advanced. + recomputeNextRunsForMaintenance(state); + await persist(state); + }); } export async function runDueJobs(state: CronServiceState) {