Cron: keep list/status responsive during startup catch-up

This commit is contained in:
Vignesh Natarajan 2026-02-21 19:13:04 -08:00
parent c45a5c551f
commit 2830dafbe9
4 changed files with 196 additions and 15 deletions

View File

@ -18,6 +18,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- Agents/Ollama: preserve unsafe integer tool-call arguments as exact strings during NDJSON parsing, preventing large numeric IDs from being rounded before tool execution. (#23170) Thanks @BestJoester.
- Cron/Gateway: keep `cron.list` and `cron.status` responsive during startup catch-up by avoiding a long-held cron lock while missed jobs execute. (#23106) Thanks @jayleekr.
- Gateway/Pairing: treat operator.admin pairing tokens as satisfying operator.write requests so legacy devices stop looping through scope-upgrade prompts introduced in 2026.2.19. (#23125, #23006) Thanks @vignesh07.
- Memory/QMD: add optional `memory.qmd.mcporter` search routing so QMD `query/search/vsearch` can run through mcporter keep-alive flows (including multi-collection paths) to reduce cold starts, while keeping searches on agent-scoped QMD state for consistent recall. (#19617) Thanks @nicole-luxe and @vignesh07.
- Chat/UI: strip inline reply/audio directive tags (`[[reply_to_current]]`, `[[reply_to:<id>]]`, `[[audio_as_voice]]`) from displayed chat history, live chat event output, and session preview snippets so control tags no longer leak into user-visible surfaces.

View File

@ -11,6 +11,22 @@ const noopLogger = {
error: vi.fn(),
};
async function withTimeout<T>(promise: Promise<T>, timeoutMs: number, label: string): Promise<T> {
let timeout: NodeJS.Timeout | undefined;
try {
return await Promise.race([
promise,
new Promise<T>((_resolve, reject) => {
timeout = setTimeout(() => reject(new Error(`${label} timed out`)), timeoutMs);
}),
]);
} finally {
if (timeout) {
clearTimeout(timeout);
}
}
}
async function makeStorePath() {
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-cron-"));
return {
@ -135,4 +151,86 @@ describe("CronService read ops while job is running", () => {
await store.cleanup();
}
});
it("keeps list and status responsive during startup catch-up runs", async () => {
const store = await makeStorePath();
const enqueueSystemEvent = vi.fn();
const requestHeartbeatNow = vi.fn();
const nowMs = Date.parse("2025-12-13T00:00:00.000Z");
await fs.mkdir(path.dirname(store.storePath), { recursive: true });
await fs.writeFile(
store.storePath,
JSON.stringify({
version: 1,
jobs: [
{
id: "startup-catchup",
name: "startup catch-up",
enabled: true,
createdAtMs: nowMs - 86_400_000,
updatedAtMs: nowMs - 86_400_000,
schedule: { kind: "at", at: new Date(nowMs - 60_000).toISOString() },
sessionTarget: "isolated",
wakeMode: "next-heartbeat",
payload: { kind: "agentTurn", message: "startup replay" },
delivery: { mode: "none" },
state: { nextRunAtMs: nowMs - 60_000 },
},
],
}),
"utf-8",
);
let resolveRun:
| ((value: { status: "ok" | "error" | "skipped"; summary?: string; error?: string }) => void)
| undefined;
let resolveRunStarted: (() => void) | undefined;
const runStarted = new Promise<void>((resolve) => {
resolveRunStarted = resolve;
});
const runIsolatedAgentJob = vi.fn(async () => {
resolveRunStarted?.();
return await new Promise<{
status: "ok" | "error" | "skipped";
summary?: string;
error?: string;
}>((resolve) => {
resolveRun = resolve;
});
});
const cron = new CronService({
storePath: store.storePath,
cronEnabled: true,
log: noopLogger,
nowMs: () => nowMs,
enqueueSystemEvent,
requestHeartbeatNow,
runIsolatedAgentJob,
});
try {
const startPromise = cron.start();
await runStarted;
await expect(
withTimeout(cron.list({ includeDisabled: true }), 300, "cron.list during startup"),
).resolves.toBeTypeOf("object");
await expect(withTimeout(cron.status(), 300, "cron.status during startup")).resolves.toEqual(
expect.objectContaining({ enabled: true, storePath: store.storePath }),
);
resolveRun?.({ status: "ok", summary: "done" });
await startPromise;
const jobs = await cron.list({ includeDisabled: true });
expect(jobs[0]?.state.lastStatus).toBe("ok");
expect(jobs[0]?.state.runningAtMs).toBeUndefined();
} finally {
cron.stop();
await store.cleanup();
}
});
});

View File

@ -28,14 +28,15 @@ async function ensureLoadedForRead(state: CronServiceState) {
}
export async function start(state: CronServiceState) {
if (!state.deps.cronEnabled) {
state.deps.log.info({ enabled: false }, "cron: disabled");
return;
}
const startupInterruptedJobIds = new Set<string>();
await locked(state, async () => {
if (!state.deps.cronEnabled) {
state.deps.log.info({ enabled: false }, "cron: disabled");
return;
}
await ensureLoaded(state, { skipRecompute: true });
const jobs = state.store?.jobs ?? [];
const startupInterruptedJobIds = new Set<string>();
for (const job of jobs) {
if (typeof job.state.runningAtMs === "number") {
state.deps.log.warn(
@ -46,7 +47,13 @@ export async function start(state: CronServiceState) {
startupInterruptedJobIds.add(job.id);
}
}
await runMissedJobs(state, { skipJobIds: startupInterruptedJobIds });
await persist(state);
});
await runMissedJobs(state, { skipJobIds: startupInterruptedJobIds });
await locked(state, async () => {
await ensureLoaded(state, { forceReload: true, skipRecompute: true });
recomputeNextRuns(state);
await persist(state);
armTimer(state);

View File

@ -458,22 +458,97 @@ export async function runMissedJobs(
state: CronServiceState,
opts?: { skipJobIds?: ReadonlySet<string> },
) {
if (!state.store) {
return;
}
const now = state.deps.nowMs();
const skipJobIds = opts?.skipJobIds;
const missed = collectRunnableJobs(state, now, { skipJobIds, skipAtIfAlreadyRan: true });
if (missed.length > 0) {
const startupCandidates = await locked(state, async () => {
await ensureLoaded(state, { skipRecompute: true });
if (!state.store) {
return [] as Array<{ jobId: string; job: CronJob }>;
}
const now = state.deps.nowMs();
const skipJobIds = opts?.skipJobIds;
const missed = collectRunnableJobs(state, now, { skipJobIds, skipAtIfAlreadyRan: true });
if (missed.length === 0) {
return [] as Array<{ jobId: string; job: CronJob }>;
}
state.deps.log.info(
{ count: missed.length, jobIds: missed.map((j) => j.id) },
"cron: running missed jobs after restart",
);
for (const job of missed) {
await executeJob(state, job, now, { forced: false });
job.state.runningAtMs = now;
job.state.lastError = undefined;
}
await persist(state);
return missed.map((job) => ({ jobId: job.id, job }));
});
if (startupCandidates.length === 0) {
return;
}
const outcomes: Array<TimedCronRunOutcome> = [];
for (const candidate of startupCandidates) {
const startedAt = state.deps.nowMs();
emit(state, { jobId: candidate.job.id, action: "started", runAtMs: startedAt });
try {
const result = await executeJobCore(state, candidate.job);
outcomes.push({
jobId: candidate.jobId,
status: result.status,
error: result.error,
summary: result.summary,
delivered: result.delivered,
sessionId: result.sessionId,
sessionKey: result.sessionKey,
model: result.model,
provider: result.provider,
usage: result.usage,
startedAt,
endedAt: state.deps.nowMs(),
});
} catch (err) {
outcomes.push({
jobId: candidate.jobId,
status: "error",
error: String(err),
startedAt,
endedAt: state.deps.nowMs(),
});
}
}
await locked(state, async () => {
await ensureLoaded(state, { forceReload: true, skipRecompute: true });
if (!state.store) {
return;
}
for (const result of outcomes) {
const job = state.store.jobs.find((entry) => entry.id === result.jobId);
if (!job) {
continue;
}
const shouldDelete = applyJobResult(state, job, {
status: result.status,
error: result.error,
delivered: result.delivered,
startedAt: result.startedAt,
endedAt: result.endedAt,
});
emitJobFinished(state, job, result, result.startedAt);
if (shouldDelete) {
state.store.jobs = state.store.jobs.filter((entry) => entry.id !== job.id);
emit(state, { jobId: job.id, action: "removed" });
}
}
// Preserve any new past-due nextRunAtMs values that became due while
// startup catch-up was running. They should execute on a future tick
// instead of being silently advanced.
recomputeNextRunsForMaintenance(state);
await persist(state);
});
}
export async function runDueJobs(state: CronServiceState) {