316 lines
11 KiB
TypeScript
316 lines
11 KiB
TypeScript
import { readFileSync, readdirSync, existsSync, statSync } from "node:fs";
|
|
import { join } from "node:path";
|
|
import { resolveOpenClawStateDir } from "@/lib/workspace";
|
|
|
|
export const dynamic = "force-dynamic";
|
|
|
|
const AGENTS_DIR = join(resolveOpenClawStateDir(), "agents");
|
|
|
|
type MessagePart =
|
|
| { type: "text"; text: string }
|
|
| { type: "thinking"; thinking: string }
|
|
| { type: "tool-call"; toolName: string; toolCallId: string; args?: unknown; output?: string };
|
|
|
|
type ParsedMessage = {
|
|
id: string;
|
|
role: "user" | "assistant" | "system";
|
|
parts: MessagePart[];
|
|
timestamp: string;
|
|
};
|
|
|
|
/**
|
|
* Search for the actual agent transcript for a cron run.
|
|
*
|
|
* For main-target cron runs, the agent response lives in the main session
|
|
* transcript files. This endpoint searches session files for the cron payload
|
|
* text near the run timestamp and returns the matching conversation
|
|
* (user message + assistant response).
|
|
*/
|
|
|
|
/** Try to find a cron-specific session from sessions.json. */
|
|
function findCronSessionId(jobId: string): string | null {
|
|
if (!existsSync(AGENTS_DIR)) {return null;}
|
|
try {
|
|
const agentDirs = readdirSync(AGENTS_DIR, { withFileTypes: true });
|
|
for (const agentDir of agentDirs) {
|
|
if (!agentDir.isDirectory()) {continue;}
|
|
const sessionsJsonPath = join(AGENTS_DIR, agentDir.name, "sessions", "sessions.json");
|
|
if (!existsSync(sessionsJsonPath)) {continue;}
|
|
try {
|
|
const store = JSON.parse(readFileSync(sessionsJsonPath, "utf-8"));
|
|
// Look for cron session key matching this job
|
|
for (const [key, entry] of Object.entries(store)) {
|
|
if (key.includes(`:cron:${jobId}`) && !key.includes(":run:")) {
|
|
const sessionId = (entry as { sessionId?: string })?.sessionId;
|
|
if (typeof sessionId === "string" && sessionId.trim()) {
|
|
// Verify the session file actually exists
|
|
const sessionFile = join(AGENTS_DIR, agentDir.name, "sessions", `${sessionId}.jsonl`);
|
|
if (existsSync(sessionFile)) {
|
|
return sessionId;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} catch {
|
|
// skip malformed sessions.json
|
|
}
|
|
}
|
|
} catch {
|
|
// ignore
|
|
}
|
|
return null;
|
|
}
|
|
|
|
/** Find session files that might contain the cron run's transcript. */
|
|
function findCandidateSessionFiles(runAtMs: number): string[] {
|
|
const candidates: Array<{ path: string; mtimeMs: number }> = [];
|
|
if (!existsSync(AGENTS_DIR)) {return [];}
|
|
|
|
try {
|
|
const agentDirs = readdirSync(AGENTS_DIR, { withFileTypes: true });
|
|
for (const agentDir of agentDirs) {
|
|
if (!agentDir.isDirectory()) {continue;}
|
|
const sessionsDir = join(AGENTS_DIR, agentDir.name, "sessions");
|
|
if (!existsSync(sessionsDir)) {continue;}
|
|
try {
|
|
const files = readdirSync(sessionsDir);
|
|
for (const file of files) {
|
|
if (!file.endsWith(".jsonl")) {continue;}
|
|
const filePath = join(sessionsDir, file);
|
|
try {
|
|
const stat = statSync(filePath);
|
|
// Only consider files modified within ±2 hours of the run
|
|
const windowMs = 2 * 60 * 60 * 1000;
|
|
if (Math.abs(stat.mtimeMs - runAtMs) < windowMs) {
|
|
candidates.push({ path: filePath, mtimeMs: stat.mtimeMs });
|
|
}
|
|
} catch {
|
|
// skip
|
|
}
|
|
}
|
|
} catch {
|
|
// skip
|
|
}
|
|
}
|
|
} catch {
|
|
// ignore
|
|
}
|
|
|
|
// Sort by closest modification time to runAtMs
|
|
candidates.sort((a, b) => Math.abs(a.mtimeMs - runAtMs) - Math.abs(b.mtimeMs - runAtMs));
|
|
|
|
// Limit to 10 most likely candidates
|
|
return candidates.slice(0, 10).map((c) => c.path);
|
|
}
|
|
|
|
/** Parse message entries from a JSONL transcript, optionally filtered by time range. */
|
|
function parseMessagesInRange(
|
|
content: string,
|
|
opts?: { afterMs?: number; beforeMs?: number },
|
|
): ParsedMessage[] {
|
|
const lines = content.trim().split("\n").filter((l) => l.trim());
|
|
const messages: ParsedMessage[] = [];
|
|
const pendingToolCalls = new Map<string, { toolName: string; args?: unknown }>();
|
|
|
|
for (const line of lines) {
|
|
try {
|
|
const entry = JSON.parse(line);
|
|
if (entry.type !== "message" || !entry.message) {continue;}
|
|
|
|
// Filter by timestamp if provided
|
|
if (opts?.afterMs || opts?.beforeMs) {
|
|
const ts = entry.timestamp ? new Date(entry.timestamp).getTime() : (entry.ts ?? 0);
|
|
if (opts.afterMs && ts < opts.afterMs) {continue;}
|
|
if (opts.beforeMs && ts > opts.beforeMs) {continue;}
|
|
}
|
|
|
|
const msg = entry.message;
|
|
const role = msg.role as "user" | "assistant" | "system";
|
|
const parts: MessagePart[] = [];
|
|
|
|
if (Array.isArray(msg.content)) {
|
|
for (const part of msg.content) {
|
|
if (part.type === "text" && typeof part.text === "string" && part.text.trim()) {
|
|
parts.push({ type: "text", text: part.text });
|
|
} else if (part.type === "thinking" && typeof part.thinking === "string" && part.thinking.trim()) {
|
|
parts.push({ type: "thinking", thinking: part.thinking });
|
|
} else if (part.type === "tool_use" || part.type === "tool-call") {
|
|
const toolName = part.name ?? part.toolName ?? "unknown";
|
|
const toolCallId = part.id ?? part.toolCallId ?? `tool-${Date.now()}`;
|
|
pendingToolCalls.set(toolCallId, { toolName, args: part.input ?? part.args });
|
|
parts.push({ type: "tool-call", toolName, toolCallId, args: part.input ?? part.args });
|
|
} else if (part.type === "tool_result" || part.type === "tool-result") {
|
|
const toolCallId = part.tool_use_id ?? part.toolCallId ?? "";
|
|
const pending = pendingToolCalls.get(toolCallId);
|
|
const outputText = typeof part.content === "string"
|
|
? part.content
|
|
: Array.isArray(part.content)
|
|
? part.content.filter((c: { type: string }) => c.type === "text").map((c: { text: string }) => c.text).join("\n")
|
|
: typeof part.output === "string"
|
|
? part.output
|
|
: JSON.stringify(part.output ?? part.content ?? "");
|
|
|
|
if (pending) {
|
|
const existingMsg = messages[messages.length - 1];
|
|
if (existingMsg) {
|
|
const tc = existingMsg.parts.find(
|
|
(p) => p.type === "tool-call" && (p as { toolCallId: string }).toolCallId === toolCallId,
|
|
);
|
|
if (tc && tc.type === "tool-call") {
|
|
(tc as { output?: string }).output = outputText.slice(0, 5000);
|
|
continue;
|
|
}
|
|
}
|
|
parts.push({ type: "tool-call", toolName: pending.toolName, toolCallId, args: pending.args, output: outputText.slice(0, 5000) });
|
|
} else {
|
|
parts.push({ type: "tool-call", toolName: "tool", toolCallId, output: outputText.slice(0, 5000) });
|
|
}
|
|
}
|
|
}
|
|
} else if (typeof msg.content === "string" && msg.content.trim()) {
|
|
parts.push({ type: "text", text: msg.content });
|
|
}
|
|
|
|
if (parts.length > 0) {
|
|
messages.push({
|
|
id: entry.id ?? `msg-${messages.length}`,
|
|
role,
|
|
parts,
|
|
timestamp: entry.timestamp ?? new Date(entry.ts ?? Date.now()).toISOString(),
|
|
});
|
|
}
|
|
} catch {
|
|
// skip malformed lines
|
|
}
|
|
}
|
|
|
|
return messages;
|
|
}
|
|
|
|
/** Extract text content from message parts. */
|
|
function getMessageText(msg: ParsedMessage): string {
|
|
return msg.parts
|
|
.filter((p): p is { type: "text"; text: string } => p.type === "text")
|
|
.map((p) => p.text)
|
|
.join("\n");
|
|
}
|
|
|
|
/**
|
|
* Search session files for the cron run's conversation.
|
|
* Matches by finding a user message containing the summary text near runAtMs,
|
|
* then returns that message + all following messages until the next user message.
|
|
*/
|
|
function searchForRunTranscript(
|
|
sessionFiles: string[],
|
|
summary: string,
|
|
runAtMs: number,
|
|
): { messages: ParsedMessage[]; sessionFile: string } | null {
|
|
// Use a distinctive portion of the summary for matching (first 80 chars)
|
|
const searchText = summary.slice(0, 80);
|
|
// Search window: from 5s before run to 10 minutes after (heartbeat delay)
|
|
const afterMs = runAtMs - 5_000;
|
|
const beforeMs = runAtMs + 10 * 60_000;
|
|
|
|
for (const filePath of sessionFiles) {
|
|
try {
|
|
const content = readFileSync(filePath, "utf-8");
|
|
if (!content.includes(searchText.slice(0, 40))) {
|
|
// Quick pre-check: skip files that don't contain the text at all
|
|
continue;
|
|
}
|
|
|
|
const allMessages = parseMessagesInRange(content);
|
|
|
|
// Find user messages containing the summary text within the time window
|
|
for (let i = 0; i < allMessages.length; i++) {
|
|
const msg = allMessages[i];
|
|
if (msg.role !== "user") {continue;}
|
|
|
|
const msgTs = new Date(msg.timestamp).getTime();
|
|
if (msgTs < afterMs || msgTs > beforeMs) {continue;}
|
|
|
|
const text = getMessageText(msg);
|
|
if (!text.includes(searchText.slice(0, 40))) {continue;}
|
|
|
|
// Found the user message! Collect it + all following messages
|
|
// until the next user message (the full agent turn).
|
|
const conversation: ParsedMessage[] = [msg];
|
|
for (let j = i + 1; j < allMessages.length; j++) {
|
|
const next = allMessages[j];
|
|
if (next.role === "user") {break;}
|
|
conversation.push(next);
|
|
}
|
|
|
|
return { messages: conversation, sessionFile: filePath };
|
|
}
|
|
} catch {
|
|
// skip unreadable files
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* GET /api/cron/runs/search-transcript?jobId=X&runAtMs=Y&summary=Z
|
|
*
|
|
* Search for the actual agent transcript for a cron run that doesn't have
|
|
* a direct sessionId. Tries:
|
|
* 1. Sessions.json lookup for a cron-specific session
|
|
* 2. Time-based search of session files near the run timestamp
|
|
*/
|
|
export async function GET(request: Request) {
|
|
const url = new URL(request.url);
|
|
const jobId = url.searchParams.get("jobId");
|
|
const runAtMsStr = url.searchParams.get("runAtMs");
|
|
const summary = url.searchParams.get("summary");
|
|
|
|
if (!jobId || !runAtMsStr) {
|
|
return Response.json({ error: "jobId and runAtMs are required" }, { status: 400 });
|
|
}
|
|
|
|
const runAtMs = Number(runAtMsStr);
|
|
if (!Number.isFinite(runAtMs)) {
|
|
return Response.json({ error: "Invalid runAtMs" }, { status: 400 });
|
|
}
|
|
|
|
// Strategy 1: Look for a cron-specific session in sessions.json
|
|
const cronSessionId = findCronSessionId(jobId);
|
|
if (cronSessionId) {
|
|
try {
|
|
const agentDirs = readdirSync(AGENTS_DIR, { withFileTypes: true });
|
|
for (const agentDir of agentDirs) {
|
|
if (!agentDir.isDirectory()) {continue;}
|
|
const sessionFile = join(AGENTS_DIR, agentDir.name, "sessions", `${cronSessionId}.jsonl`);
|
|
if (!existsSync(sessionFile)) {continue;}
|
|
|
|
const content = readFileSync(sessionFile, "utf-8");
|
|
const messages = parseMessagesInRange(content);
|
|
if (messages.length > 0) {
|
|
return Response.json({
|
|
sessionId: cronSessionId,
|
|
messages,
|
|
source: "cron-session",
|
|
});
|
|
}
|
|
}
|
|
} catch {
|
|
// fall through to search
|
|
}
|
|
}
|
|
|
|
// Strategy 2: Search session files near the run timestamp
|
|
if (summary) {
|
|
const candidates = findCandidateSessionFiles(runAtMs);
|
|
const result = searchForRunTranscript(candidates, summary, runAtMs);
|
|
if (result) {
|
|
return Response.json({
|
|
messages: result.messages,
|
|
source: "main-session-search",
|
|
});
|
|
}
|
|
}
|
|
|
|
return Response.json({ error: "Transcript not found" }, { status: 404 });
|
|
}
|