fix(compaction): skip compaction only when session has no meaningful conversation

Fixes #40727

The compaction safeguard and the embedded-runner compaction path both
used a naive role check (user | assistant | toolResult) to decide
whether a session contains real conversation worth summarising.  In
long-running heartbeat sessions nearly every message satisfies that
check, yet the *content* is boilerplate: heartbeat polls produce empty
or HEARTBEAT_OK user turns and NO_REPLY assistant turns.  The guard
therefore never fires, the session grows past the 200K ceiling, and
the agent goes silent.

Fix: replace the role-only predicate with a content-aware one:

  isRealConversationMessage / hasRealConversationContent (two separate
  copies – safeguard.ts and compact.ts – each get the same treatment)

Rules:
  • user / assistant messages are real only when their text content is
    non-empty AND not a pure boilerplate sentinel (HEARTBEAT_OK or
    NO_REPLY).  Non-text blocks (images, attachments) are always real.
  • toolResult messages are real only when a meaningful user message
    appears in the previous 20 messages of the window, so tool output
    that was triggered by a real human ask still counts.

Both predicates are exported via __testing / direct export so the
existing and new unit tests can cover them without mocks.

Tests added:
  compaction-safeguard.test.ts – two new cases:
    • tool result linked to HEARTBEAT_OK user turn → not real
    • tool result linked to meaningful user ask → real
  compact.hooks.test.ts – two new integration cases:
    • boilerplate-only session → skips compaction (reason: no real
      conversation messages)
    • meaningful user ask + tool result → compaction runs
This commit is contained in:
samzong 2026-03-10 20:19:32 +08:00 committed by Josh Lehman
parent 4c60956d8e
commit b1eaf639d5
No known key found for this signature in database
GPG Key ID: D141B425AC7F876B
5 changed files with 285 additions and 19 deletions

View File

@ -67,6 +67,18 @@ export const resolveMemorySearchConfigMock = vi.fn(() => ({
}));
export const resolveSessionAgentIdMock = vi.fn(() => "main");
export const estimateTokensMock = vi.fn((_message?: unknown) => 10);
export const sessionMessages: unknown[] = [
{ role: "user", content: "hello", timestamp: 1 },
{ role: "assistant", content: [{ type: "text", text: "hi" }], timestamp: 2 },
{
role: "toolResult",
toolCallId: "t1",
toolName: "exec",
content: [{ type: "text", text: "output" }],
isError: false,
timestamp: 3,
},
];
export const sessionAbortCompactionMock: Mock<(reason?: unknown) => void> = vi.fn();
export const createOpenClawCodingToolsMock = vi.fn(() => []);
@ -134,6 +146,20 @@ export function resetCompactHooksHarnessMocks(): void {
resolveSessionAgentIdMock.mockReturnValue("main");
estimateTokensMock.mockReset();
estimateTokensMock.mockReturnValue(10);
sessionMessages.splice(
0,
sessionMessages.length,
{ role: "user", content: "hello", timestamp: 1 },
{ role: "assistant", content: [{ type: "text", text: "hi" }], timestamp: 2 },
{
role: "toolResult",
toolCallId: "t1",
toolName: "exec",
content: [{ type: "text", text: "output" }],
isError: false,
timestamp: 3,
},
);
sessionAbortCompactionMock.mockReset();
createOpenClawCodingToolsMock.mockReset();
createOpenClawCodingToolsMock.mockReturnValue([]);
@ -176,18 +202,11 @@ export async function loadCompactHooksHarness(): Promise<{
createAgentSession: vi.fn(async () => {
const session = {
sessionId: "session-1",
messages: [
{ role: "user", content: "hello", timestamp: 1 },
{ role: "assistant", content: [{ type: "text", text: "hi" }], timestamp: 2 },
{
role: "toolResult",
toolCallId: "t1",
toolName: "exec",
content: [{ type: "text", text: "output" }],
isError: false,
timestamp: 3,
},
],
messages: sessionMessages.map((message) =>
typeof structuredClone === "function"
? structuredClone(message)
: JSON.parse(JSON.stringify(message)),
),
agent: {
replaceMessages: vi.fn((messages: unknown[]) => {
session.messages = [...(messages as typeof session.messages)];

View File

@ -16,6 +16,7 @@ import {
resetCompactHooksHarnessMocks,
sanitizeSessionHistoryMock,
sessionAbortCompactionMock,
sessionMessages,
sessionCompactImpl,
triggerInternalHook,
} from "./compact.hooks.harness.js";
@ -154,6 +155,20 @@ describe("compactEmbeddedPiSessionDirect hooks", () => {
estimateTokensMock.mockReset();
estimateTokensMock.mockReturnValue(10);
sessionAbortCompactionMock.mockReset();
sessionMessages.splice(
0,
sessionMessages.length,
{ role: "user", content: "hello", timestamp: 1 },
{ role: "assistant", content: [{ type: "text", text: "hi" }], timestamp: 2 },
{
role: "toolResult",
toolCallId: "t1",
toolName: "exec",
content: [{ type: "text", text: "output" }],
isError: false,
timestamp: 3,
},
);
unregisterApiProviders(getCustomApiRegistrySourceId("ollama"));
});
@ -490,6 +505,64 @@ describe("compactEmbeddedPiSessionDirect hooks", () => {
});
});
it("skips compaction when the transcript only contains boilerplate replies and tool output", async () => {
sessionMessages.splice(
0,
sessionMessages.length,
{ role: "user", content: "HEARTBEAT_OK", timestamp: 1 },
{
role: "toolResult",
toolCallId: "t1",
toolName: "exec",
content: [{ type: "text", text: "checked" }],
isError: false,
timestamp: 2,
},
);
const result = await compactEmbeddedPiSessionDirect({
sessionId: "session-1",
sessionKey: "agent:main:session-1",
sessionFile: "/tmp/session.jsonl",
workspaceDir: "/tmp",
customInstructions: "focus on decisions",
});
expect(result).toMatchObject({
ok: true,
compacted: false,
reason: "no real conversation messages",
});
expect(sessionCompactImpl).not.toHaveBeenCalled();
});
it("keeps compaction enabled when tool output follows a meaningful user request", async () => {
sessionMessages.splice(
0,
sessionMessages.length,
{ role: "user", content: "please inspect the failing PR", timestamp: 1 },
{
role: "toolResult",
toolCallId: "t1",
toolName: "exec",
content: [{ type: "text", text: "checked" }],
isError: false,
timestamp: 2,
},
);
const result = await compactEmbeddedPiSessionDirect({
sessionId: "session-1",
sessionKey: "agent:main:session-1",
sessionFile: "/tmp/session.jsonl",
workspaceDir: "/tmp",
customInstructions: "focus on decisions",
});
expect(result.ok).toBe(true);
expect(sessionCompactImpl).toHaveBeenCalled();
});
it("registers the Ollama api provider before compaction", async () => {
resolveModelMock.mockReturnValue({
model: {

View File

@ -167,8 +167,68 @@ type CompactionMessageMetrics = {
contributors: Array<{ role: string; chars: number; tool?: string }>;
};
function hasRealConversationContent(msg: AgentMessage): boolean {
return msg.role === "user" || msg.role === "assistant" || msg.role === "toolResult";
const BOILERPLATE_REPLY_TEXT = new Set(["HEARTBEAT_OK", "NO_REPLY"]);
const TOOL_RESULT_REAL_CONVERSATION_LOOKBACK = 20;
function hasMeaningfulConversationContent(msg: AgentMessage): boolean {
const content = (msg as { content?: unknown }).content;
if (typeof content === "string") {
const trimmed = content.trim();
if (!trimmed) {
return false;
}
return !BOILERPLATE_REPLY_TEXT.has(trimmed);
}
if (!Array.isArray(content)) {
return false;
}
let sawNonTextBlock = false;
for (const block of content) {
if (!block || typeof block !== "object") {
continue;
}
const type = (block as { type?: unknown }).type;
if (type !== "text") {
sawNonTextBlock = true;
continue;
}
const text = (block as { text?: unknown }).text;
if (typeof text !== "string") {
continue;
}
const trimmed = text.trim();
if (!trimmed) {
continue;
}
if (!BOILERPLATE_REPLY_TEXT.has(trimmed)) {
return true;
}
}
return sawNonTextBlock;
}
function hasRealConversationContent(
msg: AgentMessage,
messages: AgentMessage[],
index: number,
): boolean {
if (msg.role === "user" || msg.role === "assistant") {
return hasMeaningfulConversationContent(msg);
}
if (msg.role !== "toolResult") {
return false;
}
const start = Math.max(0, index - TOOL_RESULT_REAL_CONVERSATION_LOOKBACK);
for (let i = index - 1; i >= start; i -= 1) {
const candidate = messages[i];
if (!candidate || candidate.role !== "user") {
continue;
}
if (hasMeaningfulConversationContent(candidate)) {
return true;
}
}
return false;
}
function createCompactionDiagId(): string {
@ -960,7 +1020,11 @@ export async function compactEmbeddedPiSessionDirect(
);
}
if (!session.messages.some(hasRealConversationContent)) {
if (
!session.messages.some((message, index, messages) =>
hasRealConversationContent(message, messages, index),
)
) {
log.info(
`[compaction] skipping — no real conversation messages (sessionKey=${params.sessionKey ?? params.sessionId})`,
);

View File

@ -1673,6 +1673,50 @@ describe("compaction-safeguard double-compaction guard", () => {
expect(result).toEqual({ cancel: true });
expect(getApiKeyMock).toHaveBeenCalled();
});
it("treats tool results as real conversation only when linked to a meaningful user ask", async () => {
expect(
__testing.isRealConversationMessage(
{
role: "toolResult",
toolCallId: "t1",
toolName: "exec",
content: [{ type: "text", text: "done" }],
} as AgentMessage,
[
{ role: "user", content: "HEARTBEAT_OK" } as AgentMessage,
{
role: "toolResult",
toolCallId: "t1",
toolName: "exec",
content: [{ type: "text", text: "done" }],
} as AgentMessage,
],
1,
),
).toBe(false);
expect(
__testing.isRealConversationMessage(
{
role: "toolResult",
toolCallId: "t2",
toolName: "exec",
content: [{ type: "text", text: "done" }],
} as AgentMessage,
[
{ role: "user", content: "please inspect the repo" } as AgentMessage,
{
role: "toolResult",
toolCallId: "t2",
toolName: "exec",
content: [{ type: "text", text: "done" }],
} as AgentMessage,
],
1,
),
).toBe(true);
});
});
async function expectWorkspaceSummaryEmptyForAgentsAlias(

View File

@ -179,8 +179,68 @@ function formatToolFailuresSection(failures: ToolFailure[]): string {
return `\n\n## Tool Failures\n${lines.join("\n")}`;
}
function isRealConversationMessage(message: AgentMessage): boolean {
return message.role === "user" || message.role === "assistant" || message.role === "toolResult";
const BOILERPLATE_REPLY_TEXT = new Set(["HEARTBEAT_OK", "NO_REPLY"]);
const TOOL_RESULT_REAL_CONVERSATION_LOOKBACK = 20;
function hasMeaningfulConversationContent(message: AgentMessage): boolean {
const content = (message as { content?: unknown }).content;
if (typeof content === "string") {
const trimmed = content.trim();
if (!trimmed) {
return false;
}
return !BOILERPLATE_REPLY_TEXT.has(trimmed);
}
if (!Array.isArray(content)) {
return false;
}
let sawNonTextBlock = false;
for (const block of content) {
if (!block || typeof block !== "object") {
continue;
}
const type = (block as { type?: unknown }).type;
if (type !== "text") {
sawNonTextBlock = true;
continue;
}
const text = (block as { text?: unknown }).text;
if (typeof text !== "string") {
continue;
}
const trimmed = text.trim();
if (!trimmed) {
continue;
}
if (!BOILERPLATE_REPLY_TEXT.has(trimmed)) {
return true;
}
}
return sawNonTextBlock;
}
function isRealConversationMessage(
message: AgentMessage,
messages: AgentMessage[],
index: number,
): boolean {
if (message.role === "user" || message.role === "assistant") {
return hasMeaningfulConversationContent(message);
}
if (message.role !== "toolResult") {
return false;
}
const start = Math.max(0, index - TOOL_RESULT_REAL_CONVERSATION_LOOKBACK);
for (let i = index - 1; i >= start; i -= 1) {
const candidate = messages[i];
if (!candidate || candidate.role !== "user") {
continue;
}
if (hasMeaningfulConversationContent(candidate)) {
return true;
}
}
return false;
}
function computeFileLists(fileOps: FileOperations): {
@ -702,8 +762,12 @@ async function readWorkspaceContextForSummary(): Promise<string> {
export default function compactionSafeguardExtension(api: ExtensionAPI): void {
api.on("session_before_compact", async (event, ctx) => {
const { preparation, customInstructions: eventInstructions, signal } = event;
const hasRealSummarizable = preparation.messagesToSummarize.some(isRealConversationMessage);
const hasRealTurnPrefix = preparation.turnPrefixMessages.some(isRealConversationMessage);
const hasRealSummarizable = preparation.messagesToSummarize.some((message, index, messages) =>
isRealConversationMessage(message, messages, index),
);
const hasRealTurnPrefix = preparation.turnPrefixMessages.some((message, index, messages) =>
isRealConversationMessage(message, messages, index),
);
if (!hasRealSummarizable && !hasRealTurnPrefix) {
// When there are no summarizable messages AND no real turn-prefix content,
// cancelling compaction leaves context unchanged but the SDK re-triggers
@ -1026,6 +1090,8 @@ export const __testing = {
computeAdaptiveChunkRatio,
isOversizedForSummary,
readWorkspaceContextForSummary,
hasMeaningfulConversationContent,
isRealConversationMessage,
BASE_CHUNK_RATIO,
MIN_CHUNK_RATIO,
SAFETY_MARGIN,