From a5f0a9240fb135293c7dcc60c9a4b3819b5bba61 Mon Sep 17 00:00:00 2001 From: zerone0x Date: Mon, 2 Mar 2026 10:02:24 +0800 Subject: [PATCH] fix(cron): retry rename on EBUSY and fall back to copyFile on Windows Landed from contributor PR #16932 with additional changelog alignment and verification. --- CHANGELOG.md | 1 + src/cron/store.test.ts | 53 ++++++++++++++++++++++++++++++++++++++++++ src/cron/store.ts | 27 ++++++++++++++++++++- 3 files changed, 80 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2be9e346e5f..515d1b6a341 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -106,6 +106,7 @@ Docs: https://docs.openclaw.ai - Nodes/Screen recording guardrails: cap `nodes` tool `screen_record` `durationMs` to 5 minutes at both schema-validation and runtime invocation layers to prevent long-running blocking captures from unbounded durations. Landed from contributor PR #31106 by @BlueBirdBack. Thanks @BlueBirdBack. - Telegram/Empty final replies: skip outbound send for null/undefined final text payloads without media so Telegram typing indicators do not linger on `text must be non-empty` errors. Landed from contributor PR #30969 by @haosenwang1018. Thanks @haosenwang1018. - Routing/Binding peer-kind parity: treat `peer.kind` `group` and `channel` as equivalent for binding scope matching (while keeping `direct` separate) so Slack/public channel bindings do not silently fall through. Landed from contributor PR #31135 by @Sid-Qin. Thanks @Sid-Qin. +- Cron/Store EBUSY fallback: retry `rename` on `EBUSY` and use `copyFile` fallback on Windows when replacing cron store files so busy-file contention no longer causes false write failures. (#16932) Thanks @sudhanva-chakra. - Agents/FS workspace default: honor documented host file-tool default `tools.fs.workspaceOnly=false` when unset so host `write`/`edit` calls are not incorrectly workspace-restricted unless explicitly enabled. Landed from contributor PR #31128 by @SaucePackets. Thanks @SaucePackets. - Cron/Timer hot-loop guard: enforce a minimum timer re-arm delay when stale past-due jobs would otherwise trigger repeated `setTimeout(0)` loops, preventing event-loop saturation and log-flood behavior. (#29853) Thanks @FlamesCN. - Gateway/CLI session recovery: handle expired CLI session IDs gracefully by clearing stale session state and retrying without crashing gateway runs. Landed from contributor PR #31090 by @frankekn. Thanks @frankekn. diff --git a/src/cron/store.test.ts b/src/cron/store.test.ts index 5a0cff0cc67..29fc65084fd 100644 --- a/src/cron/store.test.ts +++ b/src/cron/store.test.ts @@ -92,3 +92,56 @@ describe("cron store", () => { await store.cleanup(); }); }); + +describe("saveCronStore", () => { + const dummyStore: CronStoreFile = { version: 1, jobs: [] }; + + it("persists and round-trips a store file", async () => { + const { storePath, cleanup } = await makeStorePath(); + await saveCronStore(storePath, dummyStore); + const loaded = await loadCronStore(storePath); + expect(loaded).toEqual(dummyStore); + await cleanup(); + }); + + it("retries rename on EBUSY then succeeds", async () => { + const { storePath, cleanup } = await makeStorePath(); + + const origRename = fs.rename.bind(fs); + let ebusyCount = 0; + const spy = vi.spyOn(fs, "rename").mockImplementation(async (src, dest) => { + if (ebusyCount < 2) { + ebusyCount++; + const err = new Error("EBUSY") as NodeJS.ErrnoException; + err.code = "EBUSY"; + throw err; + } + return origRename(src, dest); + }); + + await saveCronStore(storePath, dummyStore); + expect(ebusyCount).toBe(2); + const loaded = await loadCronStore(storePath); + expect(loaded).toEqual(dummyStore); + + spy.mockRestore(); + await cleanup(); + }); + + it("falls back to copyFile on EPERM (Windows)", async () => { + const { storePath, cleanup } = await makeStorePath(); + + const spy = vi.spyOn(fs, "rename").mockImplementation(async () => { + const err = new Error("EPERM") as NodeJS.ErrnoException; + err.code = "EPERM"; + throw err; + }); + + await saveCronStore(storePath, dummyStore); + const loaded = await loadCronStore(storePath); + expect(loaded).toEqual(dummyStore); + + spy.mockRestore(); + await cleanup(); + }); +}); diff --git a/src/cron/store.ts b/src/cron/store.ts index 2a460f6602b..995c7dfbf3d 100644 --- a/src/cron/store.ts +++ b/src/cron/store.ts @@ -71,5 +71,30 @@ export async function saveCronStore(storePath: string, store: CronStoreFile) { // best-effort } } - await fs.promises.rename(tmp, storePath); + await renameWithRetry(tmp, storePath); +} + +const RENAME_MAX_RETRIES = 3; +const RENAME_BASE_DELAY_MS = 50; + +async function renameWithRetry(src: string, dest: string): Promise { + for (let attempt = 0; attempt <= RENAME_MAX_RETRIES; attempt++) { + try { + await fs.promises.rename(src, dest); + return; + } catch (err) { + const code = (err as { code?: string }).code; + if (code === "EBUSY" && attempt < RENAME_MAX_RETRIES) { + await new Promise((resolve) => setTimeout(resolve, RENAME_BASE_DELAY_MS * 2 ** attempt)); + continue; + } + // Windows doesn't reliably support atomic replace via rename when dest exists. + if (code === "EPERM" || code === "EEXIST") { + await fs.promises.copyFile(src, dest); + await fs.promises.unlink(src).catch(() => {}); + return; + } + throw err; + } + } }