openclaw/src/config/sessions/transcript.ts

165 lines
4.4 KiB
TypeScript
Raw Normal View History

import { CURRENT_SESSION_VERSION, SessionManager } from "@mariozechner/pi-coding-agent";
import fs from "node:fs";
import path from "node:path";
import type { SessionEntry } from "./types.js";
import { emitSessionTranscriptUpdate } from "../../sessions/transcript-events.js";
import { resolveDefaultSessionStorePath, resolveSessionFilePath } from "./paths.js";
import { loadSessionStore, updateSessionStore } from "./store.js";
function stripQuery(value: string): string {
const noHash = value.split("#")[0] ?? value;
return noHash.split("?")[0] ?? noHash;
}
function extractFileNameFromMediaUrl(value: string): string | null {
const trimmed = value.trim();
if (!trimmed) {
return null;
}
const cleaned = stripQuery(trimmed);
try {
const parsed = new URL(cleaned);
const base = path.basename(parsed.pathname);
if (!base) {
return null;
}
try {
return decodeURIComponent(base);
} catch {
return base;
}
} catch {
const base = path.basename(cleaned);
if (!base || base === "/" || base === ".") {
return null;
}
return base;
}
}
export function resolveMirroredTranscriptText(params: {
text?: string;
mediaUrls?: string[];
}): string | null {
const mediaUrls = params.mediaUrls?.filter((url) => url && url.trim()) ?? [];
if (mediaUrls.length > 0) {
const names = mediaUrls
.map((url) => extractFileNameFromMediaUrl(url))
.filter((name): name is string => Boolean(name && name.trim()));
if (names.length > 0) {
return names.join(", ");
}
return "media";
}
const text = params.text ?? "";
const trimmed = text.trim();
return trimmed ? trimmed : null;
}
async function ensureSessionHeader(params: {
sessionFile: string;
sessionId: string;
}): Promise<void> {
if (fs.existsSync(params.sessionFile)) {
return;
}
await fs.promises.mkdir(path.dirname(params.sessionFile), { recursive: true });
const header = {
type: "session",
version: CURRENT_SESSION_VERSION,
id: params.sessionId,
timestamp: new Date().toISOString(),
cwd: process.cwd(),
};
await fs.promises.writeFile(params.sessionFile, `${JSON.stringify(header)}\n`, {
encoding: "utf-8",
mode: 0o600,
});
}
export async function appendAssistantMessageToSessionTranscript(params: {
agentId?: string;
sessionKey: string;
text?: string;
mediaUrls?: string[];
/** Optional override for store path (mostly for tests). */
storePath?: string;
}): Promise<{ ok: true; sessionFile: string } | { ok: false; reason: string }> {
const sessionKey = params.sessionKey.trim();
if (!sessionKey) {
return { ok: false, reason: "missing sessionKey" };
}
const mirrorText = resolveMirroredTranscriptText({
text: params.text,
mediaUrls: params.mediaUrls,
});
if (!mirrorText) {
return { ok: false, reason: "empty text" };
}
const storePath = params.storePath ?? resolveDefaultSessionStorePath(params.agentId);
const store = loadSessionStore(storePath, { skipCache: true });
const entry = store[sessionKey] as SessionEntry | undefined;
if (!entry?.sessionId) {
return { ok: false, reason: `unknown sessionKey: ${sessionKey}` };
}
let sessionFile: string;
try {
sessionFile = resolveSessionFilePath(entry.sessionId, entry, {
agentId: params.agentId,
sessionsDir: path.dirname(storePath),
});
} catch (err) {
return {
ok: false,
reason: err instanceof Error ? err.message : String(err),
};
}
await ensureSessionHeader({ sessionFile, sessionId: entry.sessionId });
const sessionManager = SessionManager.open(sessionFile);
sessionManager.appendMessage({
role: "assistant",
content: [{ type: "text", text: mirrorText }],
api: "openai-responses",
2026-01-30 03:15:10 +01:00
provider: "openclaw",
model: "delivery-mirror",
usage: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 0,
cost: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
total: 0,
},
},
stopReason: "stop",
timestamp: Date.now(),
});
if (!entry.sessionFile || entry.sessionFile !== sessionFile) {
fix: unify session maintenance and cron run pruning (#13083) * fix: prune stale session entries, cap entry count, and rotate sessions.json The sessions.json file grows unbounded over time. Every heartbeat tick (default: 30m) triggers multiple full rewrites, and session keys from groups, threads, and DMs accumulate indefinitely with large embedded objects (skillsSnapshot, systemPromptReport). At >50MB the synchronous JSON parse blocks the event loop, causing Telegram webhook timeouts and effectively taking the bot down. Three mitigations, all running inside saveSessionStoreUnlocked() on every write: 1. Prune stale entries: remove entries with updatedAt older than 30 days (configurable via session.maintenance.pruneDays in openclaw.json) 2. Cap entry count: keep only the 500 most recently updated entries (configurable via session.maintenance.maxEntries). Entries without updatedAt are evicted first. 3. File rotation: if the existing sessions.json exceeds 10MB before a write, rename it to sessions.json.bak.{timestamp} and keep only the 3 most recent backups (configurable via session.maintenance.rotateBytes). All three thresholds are configurable under session.maintenance in openclaw.json with Zod validation. No env vars. Existing tests updated to use Date.now() instead of epoch-relative timestamps (1, 2, 3) that would be incorrectly pruned as stale. 27 new tests covering pruning, capping, rotation, and integration scenarios. * feat: auto-prune expired cron run sessions (#12289) Add TTL-based reaper for isolated cron run sessions that accumulate indefinitely in sessions.json. New config option: cron.sessionRetention: string | false (default: '24h') The reaper runs piggy-backed on the cron timer tick, self-throttled to sweep at most every 5 minutes. It removes session entries matching the pattern cron:<jobId>:run:<uuid> whose updatedAt + retention < now. Design follows the Kubernetes ttlSecondsAfterFinished pattern: - Sessions are persisted normally (observability/debugging) - A periodic reaper prunes expired entries - Configurable retention with sensible default - Set to false to disable pruning entirely Files changed: - src/config/types.cron.ts: Add sessionRetention to CronConfig - src/config/zod-schema.ts: Add Zod validation for sessionRetention - src/cron/session-reaper.ts: New reaper module (sweepCronRunSessions) - src/cron/session-reaper.test.ts: 12 tests covering all paths - src/cron/service/state.ts: Add cronConfig/sessionStorePath to deps - src/cron/service/timer.ts: Wire reaper into onTimer tick - src/gateway/server-cron.ts: Pass config and session store path to deps Closes #12289 * fix: sweep cron session stores per agent * docs: add changelog for session maintenance (#13083) (thanks @skyfallsin, @Glucksberg) * fix: add warn-only session maintenance mode * fix: warn-only maintenance defaults to active session * fix: deliver maintenance warnings to active session * docs: add session maintenance examples * fix: accept duration and size maintenance thresholds * refactor: share cron run session key check * fix: format issues and replace defaultRuntime.warn with console.warn --------- Co-authored-by: Pradeep Elankumaran <pradeepe@gmail.com> Co-authored-by: Glucksberg <markuscontasul@gmail.com> Co-authored-by: max <40643627+quotentiroler@users.noreply.github.com> Co-authored-by: quotentiroler <max.nussbaumer@maxhealth.tech>
2026-02-09 23:42:35 -05:00
await updateSessionStore(
storePath,
(current) => {
current[sessionKey] = {
...entry,
sessionFile,
};
},
{ activeSessionKey: sessionKey },
);
}
emitSessionTranscriptUpdate(sessionFile);
return { ok: true, sessionFile };
}