openclaw/src/infra/session-maintenance-warning.ts

111 lines
3.5 KiB
TypeScript
Raw Normal View History

2026-02-17 13:36:48 +09:00
import { resolveSessionAgentId } from "../agents/agent-scope.js";
fix: unify session maintenance and cron run pruning (#13083) * fix: prune stale session entries, cap entry count, and rotate sessions.json The sessions.json file grows unbounded over time. Every heartbeat tick (default: 30m) triggers multiple full rewrites, and session keys from groups, threads, and DMs accumulate indefinitely with large embedded objects (skillsSnapshot, systemPromptReport). At >50MB the synchronous JSON parse blocks the event loop, causing Telegram webhook timeouts and effectively taking the bot down. Three mitigations, all running inside saveSessionStoreUnlocked() on every write: 1. Prune stale entries: remove entries with updatedAt older than 30 days (configurable via session.maintenance.pruneDays in openclaw.json) 2. Cap entry count: keep only the 500 most recently updated entries (configurable via session.maintenance.maxEntries). Entries without updatedAt are evicted first. 3. File rotation: if the existing sessions.json exceeds 10MB before a write, rename it to sessions.json.bak.{timestamp} and keep only the 3 most recent backups (configurable via session.maintenance.rotateBytes). All three thresholds are configurable under session.maintenance in openclaw.json with Zod validation. No env vars. Existing tests updated to use Date.now() instead of epoch-relative timestamps (1, 2, 3) that would be incorrectly pruned as stale. 27 new tests covering pruning, capping, rotation, and integration scenarios. * feat: auto-prune expired cron run sessions (#12289) Add TTL-based reaper for isolated cron run sessions that accumulate indefinitely in sessions.json. New config option: cron.sessionRetention: string | false (default: '24h') The reaper runs piggy-backed on the cron timer tick, self-throttled to sweep at most every 5 minutes. It removes session entries matching the pattern cron:<jobId>:run:<uuid> whose updatedAt + retention < now. Design follows the Kubernetes ttlSecondsAfterFinished pattern: - Sessions are persisted normally (observability/debugging) - A periodic reaper prunes expired entries - Configurable retention with sensible default - Set to false to disable pruning entirely Files changed: - src/config/types.cron.ts: Add sessionRetention to CronConfig - src/config/zod-schema.ts: Add Zod validation for sessionRetention - src/cron/session-reaper.ts: New reaper module (sweepCronRunSessions) - src/cron/session-reaper.test.ts: 12 tests covering all paths - src/cron/service/state.ts: Add cronConfig/sessionStorePath to deps - src/cron/service/timer.ts: Wire reaper into onTimer tick - src/gateway/server-cron.ts: Pass config and session store path to deps Closes #12289 * fix: sweep cron session stores per agent * docs: add changelog for session maintenance (#13083) (thanks @skyfallsin, @Glucksberg) * fix: add warn-only session maintenance mode * fix: warn-only maintenance defaults to active session * fix: deliver maintenance warnings to active session * docs: add session maintenance examples * fix: accept duration and size maintenance thresholds * refactor: share cron run session key check * fix: format issues and replace defaultRuntime.warn with console.warn --------- Co-authored-by: Pradeep Elankumaran <pradeepe@gmail.com> Co-authored-by: Glucksberg <markuscontasul@gmail.com> Co-authored-by: max <40643627+quotentiroler@users.noreply.github.com> Co-authored-by: quotentiroler <max.nussbaumer@maxhealth.tech>
2026-02-09 23:42:35 -05:00
import type { OpenClawConfig } from "../config/config.js";
import type { SessionEntry, SessionMaintenanceWarning } from "../config/sessions.js";
import { isDeliverableMessageChannel, normalizeMessageChannel } from "../utils/message-channel.js";
import { resolveSessionDeliveryTarget } from "./outbound/targets.js";
import { enqueueSystemEvent } from "./system-events.js";
type WarningParams = {
cfg: OpenClawConfig;
sessionKey: string;
entry: SessionEntry;
warning: SessionMaintenanceWarning;
};
const warnedContexts = new Map<string, string>();
function shouldSendWarning(): boolean {
return !process.env.VITEST && process.env.NODE_ENV !== "test";
}
function buildWarningContext(params: WarningParams): string {
const { warning } = params;
return [
warning.activeSessionKey,
warning.pruneAfterMs,
warning.maxEntries,
warning.wouldPrune ? "prune" : "",
warning.wouldCap ? "cap" : "",
]
.filter(Boolean)
.join("|");
}
function formatDuration(ms: number): string {
if (ms >= 86_400_000) {
const days = Math.round(ms / 86_400_000);
return `${days} day${days === 1 ? "" : "s"}`;
}
if (ms >= 3_600_000) {
const hours = Math.round(ms / 3_600_000);
return `${hours} hour${hours === 1 ? "" : "s"}`;
}
if (ms >= 60_000) {
const mins = Math.round(ms / 60_000);
return `${mins} minute${mins === 1 ? "" : "s"}`;
}
const secs = Math.round(ms / 1000);
return `${secs} second${secs === 1 ? "" : "s"}`;
}
function buildWarningText(warning: SessionMaintenanceWarning): string {
const reasons: string[] = [];
if (warning.wouldPrune) {
reasons.push(`older than ${formatDuration(warning.pruneAfterMs)}`);
}
if (warning.wouldCap) {
reasons.push(`not in the most recent ${warning.maxEntries} sessions`);
}
const reasonText = reasons.length > 0 ? reasons.join(" and ") : "over maintenance limits";
return (
`⚠️ Session maintenance warning: this active session would be evicted (${reasonText}). ` +
`Maintenance is set to warn-only, so nothing was reset. ` +
`To enforce cleanup, set \`session.maintenance.mode: "enforce"\` or increase the limits.`
);
}
export async function deliverSessionMaintenanceWarning(params: WarningParams): Promise<void> {
if (!shouldSendWarning()) {
return;
}
const contextKey = buildWarningContext(params);
if (warnedContexts.get(params.sessionKey) === contextKey) {
return;
}
warnedContexts.set(params.sessionKey, contextKey);
const text = buildWarningText(params.warning);
const target = resolveSessionDeliveryTarget({
entry: params.entry,
requestedChannel: "last",
});
if (!target.channel || !target.to) {
enqueueSystemEvent(text, { sessionKey: params.sessionKey });
return;
}
const channel = normalizeMessageChannel(target.channel) ?? target.channel;
if (!isDeliverableMessageChannel(channel)) {
enqueueSystemEvent(text, { sessionKey: params.sessionKey });
return;
}
try {
const { deliverOutboundPayloads } = await import("./outbound/deliver.js");
await deliverOutboundPayloads({
cfg: params.cfg,
channel,
to: target.to,
accountId: target.accountId,
threadId: target.threadId,
payloads: [{ text }],
agentId: resolveSessionAgentId({ sessionKey: params.sessionKey, config: params.cfg }),
fix: unify session maintenance and cron run pruning (#13083) * fix: prune stale session entries, cap entry count, and rotate sessions.json The sessions.json file grows unbounded over time. Every heartbeat tick (default: 30m) triggers multiple full rewrites, and session keys from groups, threads, and DMs accumulate indefinitely with large embedded objects (skillsSnapshot, systemPromptReport). At >50MB the synchronous JSON parse blocks the event loop, causing Telegram webhook timeouts and effectively taking the bot down. Three mitigations, all running inside saveSessionStoreUnlocked() on every write: 1. Prune stale entries: remove entries with updatedAt older than 30 days (configurable via session.maintenance.pruneDays in openclaw.json) 2. Cap entry count: keep only the 500 most recently updated entries (configurable via session.maintenance.maxEntries). Entries without updatedAt are evicted first. 3. File rotation: if the existing sessions.json exceeds 10MB before a write, rename it to sessions.json.bak.{timestamp} and keep only the 3 most recent backups (configurable via session.maintenance.rotateBytes). All three thresholds are configurable under session.maintenance in openclaw.json with Zod validation. No env vars. Existing tests updated to use Date.now() instead of epoch-relative timestamps (1, 2, 3) that would be incorrectly pruned as stale. 27 new tests covering pruning, capping, rotation, and integration scenarios. * feat: auto-prune expired cron run sessions (#12289) Add TTL-based reaper for isolated cron run sessions that accumulate indefinitely in sessions.json. New config option: cron.sessionRetention: string | false (default: '24h') The reaper runs piggy-backed on the cron timer tick, self-throttled to sweep at most every 5 minutes. It removes session entries matching the pattern cron:<jobId>:run:<uuid> whose updatedAt + retention < now. Design follows the Kubernetes ttlSecondsAfterFinished pattern: - Sessions are persisted normally (observability/debugging) - A periodic reaper prunes expired entries - Configurable retention with sensible default - Set to false to disable pruning entirely Files changed: - src/config/types.cron.ts: Add sessionRetention to CronConfig - src/config/zod-schema.ts: Add Zod validation for sessionRetention - src/cron/session-reaper.ts: New reaper module (sweepCronRunSessions) - src/cron/session-reaper.test.ts: 12 tests covering all paths - src/cron/service/state.ts: Add cronConfig/sessionStorePath to deps - src/cron/service/timer.ts: Wire reaper into onTimer tick - src/gateway/server-cron.ts: Pass config and session store path to deps Closes #12289 * fix: sweep cron session stores per agent * docs: add changelog for session maintenance (#13083) (thanks @skyfallsin, @Glucksberg) * fix: add warn-only session maintenance mode * fix: warn-only maintenance defaults to active session * fix: deliver maintenance warnings to active session * docs: add session maintenance examples * fix: accept duration and size maintenance thresholds * refactor: share cron run session key check * fix: format issues and replace defaultRuntime.warn with console.warn --------- Co-authored-by: Pradeep Elankumaran <pradeepe@gmail.com> Co-authored-by: Glucksberg <markuscontasul@gmail.com> Co-authored-by: max <40643627+quotentiroler@users.noreply.github.com> Co-authored-by: quotentiroler <max.nussbaumer@maxhealth.tech>
2026-02-09 23:42:35 -05:00
});
} catch (err) {
console.warn(`Failed to deliver session maintenance warning: ${String(err)}`);
enqueueSystemEvent(text, { sessionKey: params.sessionKey });
}
}