openclaw/src/infra/session-maintenance-warning.ts
Gustavo Madeira Santana e19a23520c
fix: unify session maintenance and cron run pruning (#13083)
* fix: prune stale session entries, cap entry count, and rotate sessions.json

The sessions.json file grows unbounded over time. Every heartbeat tick (default: 30m)
triggers multiple full rewrites, and session keys from groups, threads, and DMs
accumulate indefinitely with large embedded objects (skillsSnapshot,
systemPromptReport). At >50MB the synchronous JSON parse blocks the event loop,
causing Telegram webhook timeouts and effectively taking the bot down.

Three mitigations, all running inside saveSessionStoreUnlocked() on every write:

1. Prune stale entries: remove entries with updatedAt older than 30 days
   (configurable via session.maintenance.pruneDays in openclaw.json)

2. Cap entry count: keep only the 500 most recently updated entries
   (configurable via session.maintenance.maxEntries). Entries without updatedAt
   are evicted first.

3. File rotation: if the existing sessions.json exceeds 10MB before a write,
   rename it to sessions.json.bak.{timestamp} and keep only the 3 most recent
   backups (configurable via session.maintenance.rotateBytes).

All three thresholds are configurable under session.maintenance in openclaw.json
with Zod validation. No env vars.

Existing tests updated to use Date.now() instead of epoch-relative timestamps
(1, 2, 3) that would be incorrectly pruned as stale.

27 new tests covering pruning, capping, rotation, and integration scenarios.

* feat: auto-prune expired cron run sessions (#12289)

Add TTL-based reaper for isolated cron run sessions that accumulate
indefinitely in sessions.json.

New config option:
  cron.sessionRetention: string | false  (default: '24h')

The reaper runs piggy-backed on the cron timer tick, self-throttled
to sweep at most every 5 minutes. It removes session entries matching
the pattern cron:<jobId>:run:<uuid> whose updatedAt + retention < now.

Design follows the Kubernetes ttlSecondsAfterFinished pattern:
- Sessions are persisted normally (observability/debugging)
- A periodic reaper prunes expired entries
- Configurable retention with sensible default
- Set to false to disable pruning entirely

Files changed:
- src/config/types.cron.ts: Add sessionRetention to CronConfig
- src/config/zod-schema.ts: Add Zod validation for sessionRetention
- src/cron/session-reaper.ts: New reaper module (sweepCronRunSessions)
- src/cron/session-reaper.test.ts: 12 tests covering all paths
- src/cron/service/state.ts: Add cronConfig/sessionStorePath to deps
- src/cron/service/timer.ts: Wire reaper into onTimer tick
- src/gateway/server-cron.ts: Pass config and session store path to deps

Closes #12289

* fix: sweep cron session stores per agent

* docs: add changelog for session maintenance (#13083) (thanks @skyfallsin, @Glucksberg)

* fix: add warn-only session maintenance mode

* fix: warn-only maintenance defaults to active session

* fix: deliver maintenance warnings to active session

* docs: add session maintenance examples

* fix: accept duration and size maintenance thresholds

* refactor: share cron run session key check

* fix: format issues and replace defaultRuntime.warn with console.warn

---------

Co-authored-by: Pradeep Elankumaran <pradeepe@gmail.com>
Co-authored-by: Glucksberg <markuscontasul@gmail.com>
Co-authored-by: max <40643627+quotentiroler@users.noreply.github.com>
Co-authored-by: quotentiroler <max.nussbaumer@maxhealth.tech>
2026-02-09 20:42:35 -08:00

109 lines
3.3 KiB
TypeScript

import type { OpenClawConfig } from "../config/config.js";
import type { SessionEntry, SessionMaintenanceWarning } from "../config/sessions.js";
import { isDeliverableMessageChannel, normalizeMessageChannel } from "../utils/message-channel.js";
import { resolveSessionDeliveryTarget } from "./outbound/targets.js";
import { enqueueSystemEvent } from "./system-events.js";
type WarningParams = {
cfg: OpenClawConfig;
sessionKey: string;
entry: SessionEntry;
warning: SessionMaintenanceWarning;
};
const warnedContexts = new Map<string, string>();
function shouldSendWarning(): boolean {
return !process.env.VITEST && process.env.NODE_ENV !== "test";
}
function buildWarningContext(params: WarningParams): string {
const { warning } = params;
return [
warning.activeSessionKey,
warning.pruneAfterMs,
warning.maxEntries,
warning.wouldPrune ? "prune" : "",
warning.wouldCap ? "cap" : "",
]
.filter(Boolean)
.join("|");
}
function formatDuration(ms: number): string {
if (ms >= 86_400_000) {
const days = Math.round(ms / 86_400_000);
return `${days} day${days === 1 ? "" : "s"}`;
}
if (ms >= 3_600_000) {
const hours = Math.round(ms / 3_600_000);
return `${hours} hour${hours === 1 ? "" : "s"}`;
}
if (ms >= 60_000) {
const mins = Math.round(ms / 60_000);
return `${mins} minute${mins === 1 ? "" : "s"}`;
}
const secs = Math.round(ms / 1000);
return `${secs} second${secs === 1 ? "" : "s"}`;
}
function buildWarningText(warning: SessionMaintenanceWarning): string {
const reasons: string[] = [];
if (warning.wouldPrune) {
reasons.push(`older than ${formatDuration(warning.pruneAfterMs)}`);
}
if (warning.wouldCap) {
reasons.push(`not in the most recent ${warning.maxEntries} sessions`);
}
const reasonText = reasons.length > 0 ? reasons.join(" and ") : "over maintenance limits";
return (
`⚠️ Session maintenance warning: this active session would be evicted (${reasonText}). ` +
`Maintenance is set to warn-only, so nothing was reset. ` +
`To enforce cleanup, set \`session.maintenance.mode: "enforce"\` or increase the limits.`
);
}
export async function deliverSessionMaintenanceWarning(params: WarningParams): Promise<void> {
if (!shouldSendWarning()) {
return;
}
const contextKey = buildWarningContext(params);
if (warnedContexts.get(params.sessionKey) === contextKey) {
return;
}
warnedContexts.set(params.sessionKey, contextKey);
const text = buildWarningText(params.warning);
const target = resolveSessionDeliveryTarget({
entry: params.entry,
requestedChannel: "last",
});
if (!target.channel || !target.to) {
enqueueSystemEvent(text, { sessionKey: params.sessionKey });
return;
}
const channel = normalizeMessageChannel(target.channel) ?? target.channel;
if (!isDeliverableMessageChannel(channel)) {
enqueueSystemEvent(text, { sessionKey: params.sessionKey });
return;
}
try {
const { deliverOutboundPayloads } = await import("./outbound/deliver.js");
await deliverOutboundPayloads({
cfg: params.cfg,
channel,
to: target.to,
accountId: target.accountId,
threadId: target.threadId,
payloads: [{ text }],
});
} catch (err) {
console.warn(`Failed to deliver session maintenance warning: ${String(err)}`);
enqueueSystemEvent(text, { sessionKey: params.sessionKey });
}
}