openclaw/src/auto-reply/reply/agent-runner-memory.ts

558 lines
19 KiB
TypeScript
Raw Normal View History

import crypto from "node:crypto";
import fs from "node:fs";
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import { resolveBootstrapWarningSignaturesSeen } from "../../agents/bootstrap-budget.js";
import { estimateMessagesTokens } from "../../agents/compaction.js";
import { runWithModelFallback } from "../../agents/model-fallback.js";
import { isCliProvider } from "../../agents/model-selection.js";
import { runEmbeddedPiAgent } from "../../agents/pi-embedded.js";
import { resolveSandboxConfigForAgent, resolveSandboxRuntimeStatus } from "../../agents/sandbox.js";
import {
derivePromptTokens,
hasNonzeroUsage,
normalizeUsage,
type UsageLike,
} from "../../agents/usage.js";
import type { OpenClawConfig } from "../../config/config.js";
import {
resolveAgentIdFromSessionKey,
resolveSessionFilePath,
resolveSessionFilePathOptions,
type SessionEntry,
updateSessionStoreEntry,
} from "../../config/sessions.js";
import { logVerbose } from "../../globals.js";
import { registerAgentRunContext } from "../../infra/agent-events.js";
import type { TemplateContext } from "../templating.js";
import type { VerboseLevel } from "../thinking.js";
import type { GetReplyOptions } from "../types.js";
import {
buildEmbeddedRunBaseParams,
buildEmbeddedRunContexts,
resolveModelFallbackOptions,
} from "./agent-runner-utils.js";
import {
hasAlreadyFlushedForCurrentCompaction,
resolveMemoryFlushContextWindowTokens,
resolveMemoryFlushPromptForRun,
resolveMemoryFlushSettings,
shouldRunMemoryFlush,
} from "./memory-flush.js";
import type { FollowupRun } from "./queue.js";
import { incrementCompactionCount } from "./session-updates.js";
export function estimatePromptTokensForMemoryFlush(prompt?: string): number | undefined {
const trimmed = prompt?.trim();
if (!trimmed) {
return undefined;
}
const message: AgentMessage = { role: "user", content: trimmed, timestamp: Date.now() };
const tokens = estimateMessagesTokens([message]);
if (!Number.isFinite(tokens) || tokens <= 0) {
return undefined;
}
return Math.ceil(tokens);
}
export function resolveEffectivePromptTokens(
basePromptTokens?: number,
lastOutputTokens?: number,
promptTokenEstimate?: number,
): number {
const base = Math.max(0, basePromptTokens ?? 0);
const output = Math.max(0, lastOutputTokens ?? 0);
const estimate = Math.max(0, promptTokenEstimate ?? 0);
// Flush gating projects the next input context by adding the previous
// completion and the current user prompt estimate.
return base + output + estimate;
}
export type SessionTranscriptUsageSnapshot = {
promptTokens?: number;
outputTokens?: number;
};
// Keep a generous near-threshold window so large assistant outputs still trigger
// transcript reads in time to flip memory-flush gating when needed.
const TRANSCRIPT_OUTPUT_READ_BUFFER_TOKENS = 8192;
const TRANSCRIPT_TAIL_CHUNK_BYTES = 64 * 1024;
function parseUsageFromTranscriptLine(line: string): ReturnType<typeof normalizeUsage> | undefined {
const trimmed = line.trim();
if (!trimmed) {
return undefined;
}
try {
const parsed = JSON.parse(trimmed) as {
message?: { usage?: UsageLike };
usage?: UsageLike;
};
const usageRaw = parsed.message?.usage ?? parsed.usage;
const usage = normalizeUsage(usageRaw);
if (usage && hasNonzeroUsage(usage)) {
return usage;
}
} catch {
// ignore bad lines
}
return undefined;
}
function resolveSessionLogPath(
sessionId?: string,
sessionEntry?: SessionEntry,
sessionKey?: string,
opts?: { storePath?: string },
): string | undefined {
if (!sessionId) {
return undefined;
}
try {
const transcriptPath = (
sessionEntry as (SessionEntry & { transcriptPath?: string }) | undefined
)?.transcriptPath?.trim();
const sessionFile = sessionEntry?.sessionFile?.trim() || transcriptPath;
const agentId = resolveAgentIdFromSessionKey(sessionKey);
const pathOpts = resolveSessionFilePathOptions({
agentId,
storePath: opts?.storePath,
});
// Normalize sessionFile through resolveSessionFilePath so relative entries
// are resolved against the sessions dir/store layout, not process.cwd().
return resolveSessionFilePath(
sessionId,
sessionFile ? { sessionFile } : sessionEntry,
pathOpts,
);
} catch {
return undefined;
}
}
function deriveTranscriptUsageSnapshot(
usage: ReturnType<typeof normalizeUsage> | undefined,
): SessionTranscriptUsageSnapshot | undefined {
if (!usage) {
return undefined;
}
const promptTokens = derivePromptTokens(usage);
const outputRaw = usage.output;
const outputTokens =
typeof outputRaw === "number" && Number.isFinite(outputRaw) && outputRaw > 0
? outputRaw
: undefined;
if (!(typeof promptTokens === "number") && !(typeof outputTokens === "number")) {
return undefined;
}
return {
promptTokens,
outputTokens,
};
}
type SessionLogSnapshot = {
byteSize?: number;
usage?: SessionTranscriptUsageSnapshot;
};
async function readSessionLogSnapshot(params: {
sessionId?: string;
sessionEntry?: SessionEntry;
sessionKey?: string;
opts?: { storePath?: string };
includeByteSize: boolean;
includeUsage: boolean;
}): Promise<SessionLogSnapshot> {
const logPath = resolveSessionLogPath(
params.sessionId,
params.sessionEntry,
params.sessionKey,
params.opts,
);
if (!logPath) {
return {};
}
const snapshot: SessionLogSnapshot = {};
if (params.includeByteSize) {
try {
const stat = await fs.promises.stat(logPath);
const size = Math.floor(stat.size);
snapshot.byteSize = Number.isFinite(size) && size >= 0 ? size : undefined;
} catch {
snapshot.byteSize = undefined;
}
}
if (params.includeUsage) {
try {
const lastUsage = await readLastNonzeroUsageFromSessionLog(logPath);
snapshot.usage = deriveTranscriptUsageSnapshot(lastUsage);
} catch {
snapshot.usage = undefined;
}
}
return snapshot;
}
async function readLastNonzeroUsageFromSessionLog(logPath: string) {
const handle = await fs.promises.open(logPath, "r");
try {
const stat = await handle.stat();
let position = stat.size;
let leadingPartial = "";
while (position > 0) {
const chunkSize = Math.min(TRANSCRIPT_TAIL_CHUNK_BYTES, position);
const start = position - chunkSize;
const buffer = Buffer.allocUnsafe(chunkSize);
const { bytesRead } = await handle.read(buffer, 0, chunkSize, start);
if (bytesRead <= 0) {
break;
}
const chunk = buffer.toString("utf-8", 0, bytesRead);
const combined = `${chunk}${leadingPartial}`;
const lines = combined.split(/\n+/);
leadingPartial = lines.shift() ?? "";
for (let i = lines.length - 1; i >= 0; i -= 1) {
const usage = parseUsageFromTranscriptLine(lines[i] ?? "");
if (usage) {
return usage;
}
}
position = start;
}
return parseUsageFromTranscriptLine(leadingPartial);
} finally {
await handle.close();
}
}
export async function readPromptTokensFromSessionLog(
sessionId?: string,
sessionEntry?: SessionEntry,
sessionKey?: string,
opts?: { storePath?: string },
): Promise<SessionTranscriptUsageSnapshot | undefined> {
const snapshot = await readSessionLogSnapshot({
sessionId,
sessionEntry,
sessionKey,
opts,
includeByteSize: false,
includeUsage: true,
});
return snapshot.usage;
}
export async function runMemoryFlushIfNeeded(params: {
2026-01-30 03:15:10 +01:00
cfg: OpenClawConfig;
followupRun: FollowupRun;
promptForEstimate?: string;
sessionCtx: TemplateContext;
opts?: GetReplyOptions;
defaultModel: string;
agentCfgContextTokens?: number;
resolvedVerboseLevel: VerboseLevel;
sessionEntry?: SessionEntry;
sessionStore?: Record<string, SessionEntry>;
sessionKey?: string;
storePath?: string;
isHeartbeat: boolean;
}): Promise<SessionEntry | undefined> {
const memoryFlushSettings = resolveMemoryFlushSettings(params.cfg);
if (!memoryFlushSettings) {
return params.sessionEntry;
}
const memoryFlushWritable = (() => {
if (!params.sessionKey) {
return true;
}
const runtime = resolveSandboxRuntimeStatus({
cfg: params.cfg,
sessionKey: params.sessionKey,
});
if (!runtime.sandboxed) {
return true;
}
const sandboxCfg = resolveSandboxConfigForAgent(params.cfg, runtime.agentId);
return sandboxCfg.workspaceAccess === "rw";
})();
const isCli = isCliProvider(params.followupRun.run.provider, params.cfg);
const canAttemptFlush = memoryFlushWritable && !params.isHeartbeat && !isCli;
let entry =
params.sessionEntry ??
(params.sessionKey ? params.sessionStore?.[params.sessionKey] : undefined);
const contextWindowTokens = resolveMemoryFlushContextWindowTokens({
modelId: params.followupRun.run.model ?? params.defaultModel,
agentCfgContextTokens: params.agentCfgContextTokens,
});
const promptTokenEstimate = estimatePromptTokensForMemoryFlush(
params.promptForEstimate ?? params.followupRun.prompt,
);
const persistedPromptTokensRaw = entry?.totalTokens;
const persistedPromptTokens =
typeof persistedPromptTokensRaw === "number" &&
Number.isFinite(persistedPromptTokensRaw) &&
persistedPromptTokensRaw > 0
? persistedPromptTokensRaw
: undefined;
const hasFreshPersistedPromptTokens =
typeof persistedPromptTokens === "number" && entry?.totalTokensFresh === true;
const flushThreshold =
contextWindowTokens -
memoryFlushSettings.reserveTokensFloor -
memoryFlushSettings.softThresholdTokens;
// When totals are stale/unknown, derive prompt + last output from transcript so memory
// flush can still be evaluated against projected next-input size.
//
// When totals are fresh, only read the transcript when we're close enough to the
// threshold that missing the last output tokens could flip the decision.
const shouldReadTranscriptForOutput =
canAttemptFlush &&
entry &&
hasFreshPersistedPromptTokens &&
typeof promptTokenEstimate === "number" &&
Number.isFinite(promptTokenEstimate) &&
flushThreshold > 0 &&
(persistedPromptTokens ?? 0) + promptTokenEstimate >=
flushThreshold - TRANSCRIPT_OUTPUT_READ_BUFFER_TOKENS;
const shouldReadTranscript = Boolean(
canAttemptFlush && entry && (!hasFreshPersistedPromptTokens || shouldReadTranscriptForOutput),
);
const forceFlushTranscriptBytes = memoryFlushSettings.forceFlushTranscriptBytes;
const shouldCheckTranscriptSizeForForcedFlush = Boolean(
canAttemptFlush &&
entry &&
Number.isFinite(forceFlushTranscriptBytes) &&
forceFlushTranscriptBytes > 0,
);
const shouldReadSessionLog = shouldReadTranscript || shouldCheckTranscriptSizeForForcedFlush;
const sessionLogSnapshot = shouldReadSessionLog
? await readSessionLogSnapshot({
sessionId: params.followupRun.run.sessionId,
sessionEntry: entry,
sessionKey: params.sessionKey ?? params.followupRun.run.sessionKey,
opts: { storePath: params.storePath },
includeByteSize: shouldCheckTranscriptSizeForForcedFlush,
includeUsage: shouldReadTranscript,
})
: undefined;
const transcriptByteSize = sessionLogSnapshot?.byteSize;
const shouldForceFlushByTranscriptSize =
typeof transcriptByteSize === "number" && transcriptByteSize >= forceFlushTranscriptBytes;
const transcriptUsageSnapshot = sessionLogSnapshot?.usage;
const transcriptPromptTokens = transcriptUsageSnapshot?.promptTokens;
const transcriptOutputTokens = transcriptUsageSnapshot?.outputTokens;
const hasReliableTranscriptPromptTokens =
typeof transcriptPromptTokens === "number" &&
Number.isFinite(transcriptPromptTokens) &&
transcriptPromptTokens > 0;
const shouldPersistTranscriptPromptTokens =
hasReliableTranscriptPromptTokens &&
(!hasFreshPersistedPromptTokens ||
(transcriptPromptTokens ?? 0) > (persistedPromptTokens ?? 0));
if (entry && shouldPersistTranscriptPromptTokens) {
const nextEntry = {
...entry,
totalTokens: transcriptPromptTokens,
totalTokensFresh: true,
};
entry = nextEntry;
if (params.sessionKey && params.sessionStore) {
params.sessionStore[params.sessionKey] = nextEntry;
}
if (params.storePath && params.sessionKey) {
try {
const updatedEntry = await updateSessionStoreEntry({
storePath: params.storePath,
sessionKey: params.sessionKey,
update: async () => ({ totalTokens: transcriptPromptTokens, totalTokensFresh: true }),
});
if (updatedEntry) {
entry = updatedEntry;
if (params.sessionStore) {
params.sessionStore[params.sessionKey] = updatedEntry;
}
}
} catch (err) {
logVerbose(`failed to persist derived prompt totalTokens: ${String(err)}`);
}
}
}
const promptTokensSnapshot = Math.max(
hasFreshPersistedPromptTokens ? (persistedPromptTokens ?? 0) : 0,
hasReliableTranscriptPromptTokens ? (transcriptPromptTokens ?? 0) : 0,
);
const hasFreshPromptTokensSnapshot =
promptTokensSnapshot > 0 &&
(hasFreshPersistedPromptTokens || hasReliableTranscriptPromptTokens);
const projectedTokenCount = hasFreshPromptTokensSnapshot
? resolveEffectivePromptTokens(
promptTokensSnapshot,
transcriptOutputTokens,
promptTokenEstimate,
)
: undefined;
const tokenCountForFlush =
typeof projectedTokenCount === "number" &&
Number.isFinite(projectedTokenCount) &&
projectedTokenCount > 0
? projectedTokenCount
: undefined;
// Diagnostic logging to understand why memory flush may not trigger.
logVerbose(
`memoryFlush check: sessionKey=${params.sessionKey} ` +
`tokenCount=${tokenCountForFlush ?? "undefined"} ` +
`contextWindow=${contextWindowTokens} threshold=${flushThreshold} ` +
`isHeartbeat=${params.isHeartbeat} isCli=${isCli} memoryFlushWritable=${memoryFlushWritable} ` +
`compactionCount=${entry?.compactionCount ?? 0} memoryFlushCompactionCount=${entry?.memoryFlushCompactionCount ?? "undefined"} ` +
`persistedPromptTokens=${persistedPromptTokens ?? "undefined"} persistedFresh=${entry?.totalTokensFresh === true} ` +
`promptTokensEst=${promptTokenEstimate ?? "undefined"} transcriptPromptTokens=${transcriptPromptTokens ?? "undefined"} transcriptOutputTokens=${transcriptOutputTokens ?? "undefined"} ` +
`projectedTokenCount=${projectedTokenCount ?? "undefined"} transcriptBytes=${transcriptByteSize ?? "undefined"} ` +
`forceFlushTranscriptBytes=${forceFlushTranscriptBytes} forceFlushByTranscriptSize=${shouldForceFlushByTranscriptSize}`,
);
const shouldFlushMemory =
(memoryFlushSettings &&
memoryFlushWritable &&
!params.isHeartbeat &&
!isCli &&
shouldRunMemoryFlush({
entry,
tokenCount: tokenCountForFlush,
contextWindowTokens,
reserveTokensFloor: memoryFlushSettings.reserveTokensFloor,
softThresholdTokens: memoryFlushSettings.softThresholdTokens,
})) ||
(shouldForceFlushByTranscriptSize &&
entry != null &&
!hasAlreadyFlushedForCurrentCompaction(entry));
if (!shouldFlushMemory) {
return entry ?? params.sessionEntry;
}
logVerbose(
`memoryFlush triggered: sessionKey=${params.sessionKey} tokenCount=${tokenCountForFlush ?? "undefined"} threshold=${flushThreshold}`,
);
let activeSessionEntry = entry ?? params.sessionEntry;
const activeSessionStore = params.sessionStore;
let bootstrapPromptWarningSignaturesSeen = resolveBootstrapWarningSignaturesSeen(
activeSessionEntry?.systemPromptReport ??
(params.sessionKey ? activeSessionStore?.[params.sessionKey]?.systemPromptReport : undefined),
);
const flushRunId = crypto.randomUUID();
if (params.sessionKey) {
registerAgentRunContext(flushRunId, {
sessionKey: params.sessionKey,
verboseLevel: params.resolvedVerboseLevel,
});
}
let memoryCompactionCompleted = false;
const flushSystemPrompt = [
params.followupRun.run.extraSystemPrompt,
memoryFlushSettings.systemPrompt,
]
.filter(Boolean)
.join("\n\n");
try {
await runWithModelFallback({
...resolveModelFallbackOptions(params.followupRun.run),
run: async (provider, model, runOptions) => {
const { authProfile, embeddedContext, senderContext } = buildEmbeddedRunContexts({
run: params.followupRun.run,
sessionCtx: params.sessionCtx,
hasRepliedRef: params.opts?.hasRepliedRef,
provider,
});
const runBaseParams = buildEmbeddedRunBaseParams({
run: params.followupRun.run,
provider,
model,
runId: flushRunId,
authProfile,
allowTransientCooldownProbe: runOptions?.allowTransientCooldownProbe,
});
const result = await runEmbeddedPiAgent({
...embeddedContext,
...senderContext,
...runBaseParams,
trigger: "memory",
prompt: resolveMemoryFlushPromptForRun({
prompt: memoryFlushSettings.prompt,
cfg: params.cfg,
}),
extraSystemPrompt: flushSystemPrompt,
bootstrapPromptWarningSignaturesSeen,
bootstrapPromptWarningSignature:
bootstrapPromptWarningSignaturesSeen[bootstrapPromptWarningSignaturesSeen.length - 1],
onAgentEvent: (evt) => {
if (evt.stream === "compaction") {
const phase = typeof evt.data.phase === "string" ? evt.data.phase : "";
Agents: add nested subagent orchestration controls and reduce subagent token waste (#14447) * Agents: add subagent orchestration controls * Agents: add subagent orchestration controls (WIP uncommitted changes) * feat(subagents): add depth-based spawn gating for sub-sub-agents * feat(subagents): tool policy, registry, and announce chain for nested agents * feat(subagents): system prompt, docs, changelog for nested sub-agents * fix(subagents): prevent model fallback override, show model during active runs, and block context overflow fallback Bug 1: When a session has an explicit model override (e.g., gpt/openai-codex), the fallback candidate logic in resolveFallbackCandidates silently appended the global primary model (opus) as a backstop. On reinjection/steer with a transient error, the session could fall back to opus which has a smaller context window and crash. Fix: when storedModelOverride is set, pass fallbacksOverride ?? [] instead of undefined, preventing the implicit primary backstop. Bug 2: Active subagents showed 'model n/a' in /subagents list because resolveModelDisplay only read entry.model/modelProvider (populated after run completes). Fix: fall back to modelOverride/providerOverride fields which are populated at spawn time via sessions.patch. Bug 3: Context overflow errors (prompt too long, context_length_exceeded) could theoretically escape runEmbeddedPiAgent and be treated as failover candidates in runWithModelFallback, causing a switch to a model with a smaller context window. Fix: in runWithModelFallback, detect context overflow errors via isLikelyContextOverflowError and rethrow them immediately instead of trying the next model candidate. * fix(subagents): track spawn depth in session store and fix announce routing for nested agents * Fix compaction status tracking and dedupe overflow compaction triggers * fix(subagents): enforce depth block via session store and implement cascade kill * fix: inject group chat context into system prompt * fix(subagents): always write model to session store at spawn time * Preserve spawnDepth when agent handler rewrites session entry * fix(subagents): suppress announce on steer-restart * fix(subagents): fallback spawned session model to runtime default * fix(subagents): enforce spawn depth when caller key resolves by sessionId * feat(subagents): implement active-first ordering for numeric targets and enhance task display - Added a test to verify that subagents with numeric targets follow an active-first list ordering. - Updated `resolveSubagentTarget` to sort subagent runs based on active status and recent activity. - Enhanced task display in command responses to prevent truncation of long task descriptions. - Introduced new utility functions for compacting task text and managing subagent run states. * fix(subagents): show model for active runs via run record fallback When the spawned model matches the agent's default model, the session store's override fields are intentionally cleared (isDefault: true). The model/modelProvider fields are only populated after the run completes. This left active subagents showing 'model n/a'. Fix: store the resolved model on SubagentRunRecord at registration time, and use it as a fallback in both display paths (subagents tool and /subagents command) when the session store entry has no model info. Changes: - SubagentRunRecord: add optional model field - registerSubagentRun: accept and persist model param - sessions-spawn-tool: pass resolvedModel to registerSubagentRun - subagents-tool: pass run record model as fallback to resolveModelDisplay - commands-subagents: pass run record model as fallback to resolveModelDisplay * feat(chat): implement session key resolution and reset on sidebar navigation - Added functions to resolve the main session key and reset chat state when switching sessions from the sidebar. - Updated the `renderTab` function to handle session key changes when navigating to the chat tab. - Introduced a test to verify that the session resets to "main" when opening chat from the sidebar navigation. * fix: subagent timeout=0 passthrough and fallback prompt duplication Bug 1: runTimeoutSeconds=0 now means 'no timeout' instead of applying 600s default - sessions-spawn-tool: default to undefined (not 0) when neither timeout param is provided; use != null check so explicit 0 passes through to gateway - agent.ts: accept 0 as valid timeout (resolveAgentTimeoutMs already handles 0 → MAX_SAFE_TIMEOUT_MS) Bug 2: model fallback no longer re-injects the original prompt as a duplicate - agent.ts: track fallback attempt index; on retries use a short continuation message instead of the full original prompt since the session file already contains it from the first attempt - Also skip re-sending images on fallback retries (already in session) * feat(subagents): truncate long task descriptions in subagents command output - Introduced a new utility function to format task previews, limiting their length to improve readability. - Updated the command handler to use the new formatting function, ensuring task descriptions are truncated appropriately. - Adjusted related tests to verify that long task descriptions are now truncated in the output. * refactor(subagents): update subagent registry path resolution and improve command output formatting - Replaced direct import of STATE_DIR with a utility function to resolve the state directory dynamically. - Enhanced the formatting of command output for active and recent subagents, adding separators for better readability. - Updated related tests to reflect changes in command output structure. * fix(subagent): default sessions_spawn to no timeout when runTimeoutSeconds omitted The previous fix (75a791106) correctly handled the case where runTimeoutSeconds was explicitly set to 0 ("no timeout"). However, when models omit the parameter entirely (which is common since the schema marks it as optional), runTimeoutSeconds resolved to undefined. undefined flowed through the chain as: sessions_spawn → timeout: undefined (since undefined != null is false) → gateway agent handler → agentCommand opts.timeout: undefined → resolveAgentTimeoutMs({ overrideSeconds: undefined }) → DEFAULT_AGENT_TIMEOUT_SECONDS (600s = 10 minutes) This caused subagents to be killed at exactly 10 minutes even though the user's intent (via TOOLS.md) was for subagents to run without a timeout. Fix: default runTimeoutSeconds to 0 (no timeout) when neither runTimeoutSeconds nor timeoutSeconds is provided by the caller. Subagent spawns are long-running by design and should not inherit the 600s agent-command default timeout. * fix(subagent): accept timeout=0 in agent-via-gateway path (second 600s default) * fix: thread timeout override through getReplyFromConfig dispatch path getReplyFromConfig called resolveAgentTimeoutMs({ cfg }) with no override, always falling back to the config default (600s). Add timeoutOverrideSeconds to GetReplyOptions and pass it through as overrideSeconds so callers of the dispatch chain can specify a custom timeout (0 = no timeout). This complements the existing timeout threading in agentCommand and the cron isolated-agent runner, which already pass overrideSeconds correctly. * feat(model-fallback): normalize OpenAI Codex model references and enhance fallback handling - Added normalization for OpenAI Codex model references, specifically converting "gpt-5.3-codex" to "openai-codex" before execution. - Updated the `resolveFallbackCandidates` function to utilize the new normalization logic. - Enhanced tests to verify the correct behavior of model normalization and fallback mechanisms. - Introduced a new test case to ensure that the normalization process works as expected for various input formats. * feat(tests): add unit tests for steer failure behavior in openclaw-tools - Introduced a new test file to validate the behavior of subagents when steer replacement dispatch fails. - Implemented tests to ensure that the announce behavior is restored correctly and that the suppression reason is cleared as expected. - Enhanced the subagent registry with a new function to clear steer restart suppression. - Updated related components to support the new test scenarios. * fix(subagents): replace stop command with kill in slash commands and documentation - Updated the `/subagents` command to replace `stop` with `kill` for consistency in controlling sub-agent runs. - Modified related documentation to reflect the change in command usage. - Removed legacy timeoutSeconds references from the sessions-spawn-tool schema and tests to streamline timeout handling. - Enhanced tests to ensure correct behavior of the updated commands and their interactions. * feat(tests): add unit tests for readLatestAssistantReply function - Introduced a new test file for the `readLatestAssistantReply` function to validate its behavior with various message scenarios. - Implemented tests to ensure the function correctly retrieves the latest assistant message and handles cases where the latest message has no text. - Mocked the gateway call to simulate different message histories for comprehensive testing. * feat(tests): enhance subagent kill-all cascade tests and announce formatting - Added a new test to verify that the `kill-all` command cascades through ended parents to active descendants in subagents. - Updated the subagent announce formatting tests to reflect changes in message structure, including the replacement of "Findings:" with "Result:" and the addition of new expectations for message content. - Improved the handling of long findings and stats in the announce formatting logic to ensure concise output. - Refactored related functions to enhance clarity and maintainability in the subagent registry and tools. * refactor(subagent): update announce formatting and remove unused constants - Modified the subagent announce formatting to replace "Findings:" with "Result:" and adjusted related expectations in tests. - Removed constants for maximum announce findings characters and summary words, simplifying the announcement logic. - Updated the handling of findings to retain full content instead of truncating, ensuring more informative outputs. - Cleaned up unused imports in the commands-subagents file to enhance code clarity. * feat(tests): enhance billing error handling in user-facing text - Added tests to ensure that normal text mentioning billing plans is not rewritten, preserving user context. - Updated the `isBillingErrorMessage` and `sanitizeUserFacingText` functions to improve handling of billing-related messages. - Introduced new test cases for various scenarios involving billing messages to ensure accurate processing and output. - Enhanced the subagent announce flow to correctly manage active descendant runs, preventing premature announcements. * feat(subagent): enhance workflow guidance and auto-announcement clarity - Added a new guideline in the subagent system prompt to emphasize trust in push-based completion, discouraging busy polling for status updates. - Updated documentation to clarify that sub-agents will automatically announce their results, improving user understanding of the workflow. - Enhanced tests to verify the new guidance on avoiding polling loops and to ensure the accuracy of the updated prompts. * fix(cron): avoid announcing interim subagent spawn acks * chore: clean post-rebase imports * fix(cron): fall back to child replies when parent stays interim * fix(subagents): make active-run guidance advisory * fix(subagents): update announce flow to handle active descendants and enhance test coverage - Modified the announce flow to defer announcements when active descendant runs are present, ensuring accurate status reporting. - Updated tests to verify the new behavior, including scenarios where no fallback requester is available and ensuring proper handling of finished subagents. - Enhanced the announce formatting to include an `expectFinal` flag for better clarity in the announcement process. * fix(subagents): enhance announce flow and formatting for user updates - Updated the announce flow to provide clearer instructions for user updates based on active subagent runs and requester context. - Refactored the announcement logic to improve clarity and ensure internal context remains private. - Enhanced tests to verify the new message expectations and formatting, including updated prompts for user-facing updates. - Introduced a new function to build reply instructions based on session context, improving the overall announcement process. * fix: resolve prep blockers and changelog placement (#14447) (thanks @tyler6204) * fix: restore cron delivery-plan import after rebase (#14447) (thanks @tyler6204) * fix: resolve test failures from rebase conflicts (#14447) (thanks @tyler6204) * fix: apply formatting after rebase (#14447) (thanks @tyler6204)
2026-02-14 22:03:45 -08:00
if (phase === "end") {
memoryCompactionCompleted = true;
}
}
},
});
bootstrapPromptWarningSignaturesSeen = resolveBootstrapWarningSignaturesSeen(
result.meta?.systemPromptReport,
);
return result;
},
});
let memoryFlushCompactionCount =
activeSessionEntry?.compactionCount ??
(params.sessionKey ? activeSessionStore?.[params.sessionKey]?.compactionCount : 0) ??
0;
if (memoryCompactionCompleted) {
const nextCount = await incrementCompactionCount({
sessionEntry: activeSessionEntry,
sessionStore: activeSessionStore,
sessionKey: params.sessionKey,
storePath: params.storePath,
});
if (typeof nextCount === "number") {
memoryFlushCompactionCount = nextCount;
}
}
if (params.storePath && params.sessionKey) {
try {
const updatedEntry = await updateSessionStoreEntry({
storePath: params.storePath,
sessionKey: params.sessionKey,
update: async () => ({
memoryFlushAt: Date.now(),
memoryFlushCompactionCount,
}),
});
if (updatedEntry) {
activeSessionEntry = updatedEntry;
}
} catch (err) {
logVerbose(`failed to persist memory flush metadata: ${String(err)}`);
}
}
} catch (err) {
logVerbose(`memory flush run failed: ${String(err)}`);
}
return activeSessionEntry;
}