* feat(cron): add failure destination support with webhook mode and bestEffort handling Extends PR #24789 failure alerts with features from PR #29145: - Add webhook delivery mode for failure alerts (mode: 'webhook') - Add accountId support for multi-account channel configurations - Add bestEffort handling to skip alerts when job has bestEffort=true - Add separate failureDestination config (global + per-job in delivery) - Add duplicate prevention (prevents sending to same as primary delivery) - Add CLI flags: --failure-alert-mode, --failure-alert-account-id - Add UI fields for new options in web cron editor * fix(cron): merge failureAlert mode/accountId and preserve failureDestination on updates - Fix mergeCronFailureAlert to merge mode and accountId fields - Fix mergeCronDelivery to preserve failureDestination on updates - Fix isSameDeliveryTarget to use 'announce' as default instead of 'none' to properly detect duplicates when delivery.mode is undefined * fix(cron): validate webhook mode requires URL in resolveFailureDestination When mode is 'webhook' but no 'to' URL is provided, return null instead of creating an invalid plan that silently fails later. * fix(cron): fail closed on webhook mode without URL and make failureDestination fields clearable - sendCronFailureAlert: fail closed when mode is webhook but URL is missing - mergeCronDelivery: use per-key presence checks so callers can clear nested failureDestination fields via cron.update Note: protocol:check shows missing internalEvents in Swift models - this is a pre-existing issue unrelated to these changes (upstream sync needed). * fix(cron): use separate schema for failureDestination and fix type cast - Create CronFailureDestinationSchema excluding after/cooldownMs fields - Fix type cast in sendFailureNotificationAnnounce to use CronMessageChannel * fix(cron): merge global failureDestination with partial job overrides When job has partial failureDestination config, fall back to global config for unset fields instead of treating it as a full override. * fix(cron): avoid forcing announce mode and clear inherited to on mode change - UI: only include mode in patch if explicitly set to non-default - delivery.ts: clear inherited 'to' when job overrides mode, since URL semantics differ between announce and webhook modes * fix(cron): preserve explicit to on mode override and always include mode in UI patches - delivery.ts: preserve job-level explicit 'to' when overriding mode - UI: always include mode in failureAlert patch so users can switch between announce/webhook * fix(cron): allow clearing accountId and treat undefined global mode as announce - UI: always include accountId in patch so users can clear it - delivery.ts: treat undefined global mode as announce when comparing for clearing inherited 'to' * Cron: harden failure destination routing and add regression coverage * Cron: resolve failure destination review feedback * Cron: drop unrelated timeout assertions from conflict resolution * Cron: format cron CLI regression test * Cron: align gateway cron test mock types --------- Co-authored-by: Tak Hoffman <781889+Takhoffman@users.noreply.github.com>
267 lines
7.4 KiB
TypeScript
267 lines
7.4 KiB
TypeScript
import fs from "node:fs/promises";
|
|
import os from "node:os";
|
|
import path from "node:path";
|
|
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
|
import { CronService } from "./service.js";
|
|
|
|
const noopLogger = {
|
|
debug: vi.fn(),
|
|
info: vi.fn(),
|
|
warn: vi.fn(),
|
|
error: vi.fn(),
|
|
};
|
|
|
|
async function makeStorePath() {
|
|
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-cron-failure-alert-"));
|
|
return {
|
|
storePath: path.join(dir, "cron", "jobs.json"),
|
|
cleanup: async () => {
|
|
await fs.rm(dir, { recursive: true, force: true });
|
|
},
|
|
};
|
|
}
|
|
|
|
describe("CronService failure alerts", () => {
|
|
beforeEach(() => {
|
|
vi.useFakeTimers();
|
|
vi.setSystemTime(new Date("2026-01-01T00:00:00.000Z"));
|
|
noopLogger.debug.mockClear();
|
|
noopLogger.info.mockClear();
|
|
noopLogger.warn.mockClear();
|
|
noopLogger.error.mockClear();
|
|
});
|
|
|
|
afterEach(() => {
|
|
vi.useRealTimers();
|
|
});
|
|
|
|
it("alerts after configured consecutive failures and honors cooldown", async () => {
|
|
const store = await makeStorePath();
|
|
const sendCronFailureAlert = vi.fn(async () => undefined);
|
|
const runIsolatedAgentJob = vi.fn(async () => ({
|
|
status: "error" as const,
|
|
error: "wrong model id",
|
|
}));
|
|
|
|
const cron = new CronService({
|
|
storePath: store.storePath,
|
|
cronEnabled: true,
|
|
cronConfig: {
|
|
failureAlert: {
|
|
enabled: true,
|
|
after: 2,
|
|
cooldownMs: 60_000,
|
|
},
|
|
},
|
|
log: noopLogger,
|
|
enqueueSystemEvent: vi.fn(),
|
|
requestHeartbeatNow: vi.fn(),
|
|
runIsolatedAgentJob,
|
|
sendCronFailureAlert,
|
|
});
|
|
|
|
await cron.start();
|
|
const job = await cron.add({
|
|
name: "daily report",
|
|
enabled: true,
|
|
schedule: { kind: "every", everyMs: 60_000 },
|
|
sessionTarget: "isolated",
|
|
wakeMode: "next-heartbeat",
|
|
payload: { kind: "agentTurn", message: "run report" },
|
|
delivery: { mode: "announce", channel: "telegram", to: "19098680" },
|
|
});
|
|
|
|
await cron.run(job.id, "force");
|
|
expect(sendCronFailureAlert).not.toHaveBeenCalled();
|
|
|
|
await cron.run(job.id, "force");
|
|
expect(sendCronFailureAlert).toHaveBeenCalledTimes(1);
|
|
expect(sendCronFailureAlert).toHaveBeenLastCalledWith(
|
|
expect.objectContaining({
|
|
job: expect.objectContaining({ id: job.id }),
|
|
channel: "telegram",
|
|
to: "19098680",
|
|
text: expect.stringContaining('Cron job "daily report" failed 2 times'),
|
|
}),
|
|
);
|
|
|
|
await cron.run(job.id, "force");
|
|
expect(sendCronFailureAlert).toHaveBeenCalledTimes(1);
|
|
|
|
vi.advanceTimersByTime(60_000);
|
|
await cron.run(job.id, "force");
|
|
expect(sendCronFailureAlert).toHaveBeenCalledTimes(2);
|
|
expect(sendCronFailureAlert).toHaveBeenLastCalledWith(
|
|
expect.objectContaining({
|
|
text: expect.stringContaining('Cron job "daily report" failed 4 times'),
|
|
}),
|
|
);
|
|
|
|
cron.stop();
|
|
await store.cleanup();
|
|
});
|
|
|
|
it("supports per-job failure alert override when global alerts are disabled", async () => {
|
|
const store = await makeStorePath();
|
|
const sendCronFailureAlert = vi.fn(async () => undefined);
|
|
const runIsolatedAgentJob = vi.fn(async () => ({
|
|
status: "error" as const,
|
|
error: "timeout",
|
|
}));
|
|
|
|
const cron = new CronService({
|
|
storePath: store.storePath,
|
|
cronEnabled: true,
|
|
cronConfig: {
|
|
failureAlert: {
|
|
enabled: false,
|
|
},
|
|
},
|
|
log: noopLogger,
|
|
enqueueSystemEvent: vi.fn(),
|
|
requestHeartbeatNow: vi.fn(),
|
|
runIsolatedAgentJob,
|
|
sendCronFailureAlert,
|
|
});
|
|
|
|
await cron.start();
|
|
const job = await cron.add({
|
|
name: "job with override",
|
|
enabled: true,
|
|
schedule: { kind: "every", everyMs: 60_000 },
|
|
sessionTarget: "isolated",
|
|
wakeMode: "next-heartbeat",
|
|
payload: { kind: "agentTurn", message: "run report" },
|
|
failureAlert: {
|
|
after: 1,
|
|
channel: "telegram",
|
|
to: "12345",
|
|
cooldownMs: 1,
|
|
},
|
|
});
|
|
|
|
await cron.run(job.id, "force");
|
|
expect(sendCronFailureAlert).toHaveBeenCalledTimes(1);
|
|
expect(sendCronFailureAlert).toHaveBeenLastCalledWith(
|
|
expect.objectContaining({
|
|
channel: "telegram",
|
|
to: "12345",
|
|
}),
|
|
);
|
|
|
|
cron.stop();
|
|
await store.cleanup();
|
|
});
|
|
|
|
it("respects per-job failureAlert=false and suppresses alerts", async () => {
|
|
const store = await makeStorePath();
|
|
const sendCronFailureAlert = vi.fn(async () => undefined);
|
|
const runIsolatedAgentJob = vi.fn(async () => ({
|
|
status: "error" as const,
|
|
error: "auth error",
|
|
}));
|
|
|
|
const cron = new CronService({
|
|
storePath: store.storePath,
|
|
cronEnabled: true,
|
|
cronConfig: {
|
|
failureAlert: {
|
|
enabled: true,
|
|
after: 1,
|
|
},
|
|
},
|
|
log: noopLogger,
|
|
enqueueSystemEvent: vi.fn(),
|
|
requestHeartbeatNow: vi.fn(),
|
|
runIsolatedAgentJob,
|
|
sendCronFailureAlert,
|
|
});
|
|
|
|
await cron.start();
|
|
const job = await cron.add({
|
|
name: "disabled alert job",
|
|
enabled: true,
|
|
schedule: { kind: "every", everyMs: 60_000 },
|
|
sessionTarget: "isolated",
|
|
wakeMode: "next-heartbeat",
|
|
payload: { kind: "agentTurn", message: "run report" },
|
|
failureAlert: false,
|
|
});
|
|
|
|
await cron.run(job.id, "force");
|
|
await cron.run(job.id, "force");
|
|
expect(sendCronFailureAlert).not.toHaveBeenCalled();
|
|
|
|
cron.stop();
|
|
await store.cleanup();
|
|
});
|
|
|
|
it("threads failure alert mode/accountId and skips best-effort jobs", async () => {
|
|
const store = await makeStorePath();
|
|
const sendCronFailureAlert = vi.fn(async () => undefined);
|
|
const runIsolatedAgentJob = vi.fn(async () => ({
|
|
status: "error" as const,
|
|
error: "temporary upstream error",
|
|
}));
|
|
|
|
const cron = new CronService({
|
|
storePath: store.storePath,
|
|
cronEnabled: true,
|
|
cronConfig: {
|
|
failureAlert: {
|
|
enabled: true,
|
|
after: 1,
|
|
mode: "webhook",
|
|
accountId: "global-account",
|
|
},
|
|
},
|
|
log: noopLogger,
|
|
enqueueSystemEvent: vi.fn(),
|
|
requestHeartbeatNow: vi.fn(),
|
|
runIsolatedAgentJob,
|
|
sendCronFailureAlert,
|
|
});
|
|
|
|
await cron.start();
|
|
const normalJob = await cron.add({
|
|
name: "normal alert job",
|
|
enabled: true,
|
|
schedule: { kind: "every", everyMs: 60_000 },
|
|
sessionTarget: "isolated",
|
|
wakeMode: "next-heartbeat",
|
|
payload: { kind: "agentTurn", message: "run report" },
|
|
delivery: { mode: "announce", channel: "telegram", to: "19098680" },
|
|
});
|
|
const bestEffortJob = await cron.add({
|
|
name: "best effort alert job",
|
|
enabled: true,
|
|
schedule: { kind: "every", everyMs: 60_000 },
|
|
sessionTarget: "isolated",
|
|
wakeMode: "next-heartbeat",
|
|
payload: { kind: "agentTurn", message: "run report" },
|
|
delivery: {
|
|
mode: "announce",
|
|
channel: "telegram",
|
|
to: "19098680",
|
|
bestEffort: true,
|
|
},
|
|
});
|
|
|
|
await cron.run(normalJob.id, "force");
|
|
expect(sendCronFailureAlert).toHaveBeenCalledTimes(1);
|
|
expect(sendCronFailureAlert).toHaveBeenCalledWith(
|
|
expect.objectContaining({
|
|
mode: "webhook",
|
|
accountId: "global-account",
|
|
to: undefined,
|
|
}),
|
|
);
|
|
|
|
await cron.run(bestEffortJob.id, "force");
|
|
expect(sendCronFailureAlert).toHaveBeenCalledTimes(1);
|
|
|
|
cron.stop();
|
|
await store.cleanup();
|
|
});
|
|
});
|