fix(compaction): skip compaction only when session has no meaningful conversation

Fixes #40727 The compaction safeguard and the embedded-runner compaction path both used a naive role check (user | assistant | toolResult) to decide whether a session contains real conversation worth summarising. In long-running heartbeat sessions nearly every message satisfies that check, yet the *content* is boilerplate: heartbeat polls produce empty or HEARTBEAT_OK user turns and NO_REPLY assistant turns. The guard therefore never fires, the session grows past the 200K ceiling, and the agent goes silent. Fix: replace the role-only predicate with a content-aware one: isRealConversationMessage / hasRealConversationContent (two separate copies – safeguard.ts and compact.ts – each get the same treatment) Rules: • user / assistant messages are real only when their text content is non-empty AND not a pure boilerplate sentinel (HEARTBEAT_OK or NO_REPLY). Non-text blocks (images, attachments) are always real. • toolResult messages are real only when a meaningful user message appears in the previous 20 messages of the window, so tool output that was triggered by a real human ask still counts. Both predicates are exported via __testing / direct export so the existing and new unit tests can cover them without mocks. Tests added: compaction-safeguard.test.ts – two new cases: • tool result linked to HEARTBEAT_OK user turn → not real • tool result linked to meaningful user ask → real compact.hooks.test.ts – two new integration cases: • boilerplate-only session → skips compaction (reason: no real conversation messages) • meaningful user ask + tool result → compaction runs
2026-03-10 20:19:32 +08:00 · 2026-03-10 20:19:32 +08:00 · b1eaf639d5
commit b1eaf639d5
parent 4c60956d8e
5 changed files with 285 additions and 19 deletions
--- a/src/agents/pi-embedded-runner/compact.hooks.harness.ts
+++ b/src/agents/pi-embedded-runner/compact.hooks.harness.ts
@ -67,6 +67,18 @@ export const resolveMemorySearchConfigMock = vi.fn(() => ({
 }));
 export const resolveSessionAgentIdMock = vi.fn(() => "main");
 export const estimateTokensMock = vi.fn((_message?: unknown) => 10);
+export const sessionMessages: unknown[] = [
+  { role: "user", content: "hello", timestamp: 1 },
+  { role: "assistant", content: [{ type: "text", text: "hi" }], timestamp: 2 },
+  {
+    role: "toolResult",
+    toolCallId: "t1",
+    toolName: "exec",
+    content: [{ type: "text", text: "output" }],
+    isError: false,
+    timestamp: 3,
+  },
+];
 export const sessionAbortCompactionMock: Mock<(reason?: unknown) => void> = vi.fn();
 export const createOpenClawCodingToolsMock = vi.fn(() => []);

@ -134,6 +146,20 @@ export function resetCompactHooksHarnessMocks(): void {
  resolveSessionAgentIdMock.mockReturnValue("main");
  estimateTokensMock.mockReset();
  estimateTokensMock.mockReturnValue(10);
+  sessionMessages.splice(
+    0,
+    sessionMessages.length,
+    { role: "user", content: "hello", timestamp: 1 },
+    { role: "assistant", content: [{ type: "text", text: "hi" }], timestamp: 2 },
+    {
+      role: "toolResult",
+      toolCallId: "t1",
+      toolName: "exec",
+      content: [{ type: "text", text: "output" }],
+      isError: false,
+      timestamp: 3,
+    },
+  );
  sessionAbortCompactionMock.mockReset();
  createOpenClawCodingToolsMock.mockReset();
  createOpenClawCodingToolsMock.mockReturnValue([]);
@ -176,18 +202,11 @@ export async function loadCompactHooksHarness(): Promise<{
    createAgentSession: vi.fn(async () => {
      const session = {
        sessionId: "session-1",
-        messages: [
-          { role: "user", content: "hello", timestamp: 1 },
-          { role: "assistant", content: [{ type: "text", text: "hi" }], timestamp: 2 },
-          {
-            role: "toolResult",
-            toolCallId: "t1",
-            toolName: "exec",
-            content: [{ type: "text", text: "output" }],
-            isError: false,
-            timestamp: 3,
-          },
-        ],
+        messages: sessionMessages.map((message) =>
+          typeof structuredClone === "function"
+            ? structuredClone(message)
+            : JSON.parse(JSON.stringify(message)),
+        ),
        agent: {
          replaceMessages: vi.fn((messages: unknown[]) => {
            session.messages = [...(messages as typeof session.messages)];
--- a/src/agents/pi-embedded-runner/compact.hooks.test.ts
+++ b/src/agents/pi-embedded-runner/compact.hooks.test.ts
@ -16,6 +16,7 @@ import {
  resetCompactHooksHarnessMocks,
  sanitizeSessionHistoryMock,
  sessionAbortCompactionMock,
+  sessionMessages,
  sessionCompactImpl,
  triggerInternalHook,
 } from "./compact.hooks.harness.js";
@ -154,6 +155,20 @@ describe("compactEmbeddedPiSessionDirect hooks", () => {
    estimateTokensMock.mockReset();
    estimateTokensMock.mockReturnValue(10);
    sessionAbortCompactionMock.mockReset();
+    sessionMessages.splice(
+      0,
+      sessionMessages.length,
+      { role: "user", content: "hello", timestamp: 1 },
+      { role: "assistant", content: [{ type: "text", text: "hi" }], timestamp: 2 },
+      {
+        role: "toolResult",
+        toolCallId: "t1",
+        toolName: "exec",
+        content: [{ type: "text", text: "output" }],
+        isError: false,
+        timestamp: 3,
+      },
+    );
    unregisterApiProviders(getCustomApiRegistrySourceId("ollama"));
  });

@ -490,6 +505,64 @@ describe("compactEmbeddedPiSessionDirect hooks", () => {
    });
  });

+  it("skips compaction when the transcript only contains boilerplate replies and tool output", async () => {
+    sessionMessages.splice(
+      0,
+      sessionMessages.length,
+      { role: "user", content: "HEARTBEAT_OK", timestamp: 1 },
+      {
+        role: "toolResult",
+        toolCallId: "t1",
+        toolName: "exec",
+        content: [{ type: "text", text: "checked" }],
+        isError: false,
+        timestamp: 2,
+      },
+    );
+
+    const result = await compactEmbeddedPiSessionDirect({
+      sessionId: "session-1",
+      sessionKey: "agent:main:session-1",
+      sessionFile: "/tmp/session.jsonl",
+      workspaceDir: "/tmp",
+      customInstructions: "focus on decisions",
+    });
+
+    expect(result).toMatchObject({
+      ok: true,
+      compacted: false,
+      reason: "no real conversation messages",
+    });
+    expect(sessionCompactImpl).not.toHaveBeenCalled();
+  });
+
+  it("keeps compaction enabled when tool output follows a meaningful user request", async () => {
+    sessionMessages.splice(
+      0,
+      sessionMessages.length,
+      { role: "user", content: "please inspect the failing PR", timestamp: 1 },
+      {
+        role: "toolResult",
+        toolCallId: "t1",
+        toolName: "exec",
+        content: [{ type: "text", text: "checked" }],
+        isError: false,
+        timestamp: 2,
+      },
+    );
+
+    const result = await compactEmbeddedPiSessionDirect({
+      sessionId: "session-1",
+      sessionKey: "agent:main:session-1",
+      sessionFile: "/tmp/session.jsonl",
+      workspaceDir: "/tmp",
+      customInstructions: "focus on decisions",
+    });
+
+    expect(result.ok).toBe(true);
+    expect(sessionCompactImpl).toHaveBeenCalled();
+  });
+
  it("registers the Ollama api provider before compaction", async () => {
    resolveModelMock.mockReturnValue({
      model: {
--- a/src/agents/pi-embedded-runner/compact.ts
+++ b/src/agents/pi-embedded-runner/compact.ts
@ -167,8 +167,68 @@ type CompactionMessageMetrics = {
  contributors: Array<{ role: string; chars: number; tool?: string }>;
 };

-function hasRealConversationContent(msg: AgentMessage): boolean {
-  return msg.role === "user" || msg.role === "assistant" || msg.role === "toolResult";
+const BOILERPLATE_REPLY_TEXT = new Set(["HEARTBEAT_OK", "NO_REPLY"]);
+const TOOL_RESULT_REAL_CONVERSATION_LOOKBACK = 20;
+
+function hasMeaningfulConversationContent(msg: AgentMessage): boolean {
+  const content = (msg as { content?: unknown }).content;
+  if (typeof content === "string") {
+    const trimmed = content.trim();
+    if (!trimmed) {
+      return false;
+    }
+    return !BOILERPLATE_REPLY_TEXT.has(trimmed);
+  }
+  if (!Array.isArray(content)) {
+    return false;
+  }
+  let sawNonTextBlock = false;
+  for (const block of content) {
+    if (!block || typeof block !== "object") {
+      continue;
+    }
+    const type = (block as { type?: unknown }).type;
+    if (type !== "text") {
+      sawNonTextBlock = true;
+      continue;
+    }
+    const text = (block as { text?: unknown }).text;
+    if (typeof text !== "string") {
+      continue;
+    }
+    const trimmed = text.trim();
+    if (!trimmed) {
+      continue;
+    }
+    if (!BOILERPLATE_REPLY_TEXT.has(trimmed)) {
+      return true;
+    }
+  }
+  return sawNonTextBlock;
+}
+
+function hasRealConversationContent(
+  msg: AgentMessage,
+  messages: AgentMessage[],
+  index: number,
+): boolean {
+  if (msg.role === "user" || msg.role === "assistant") {
+    return hasMeaningfulConversationContent(msg);
+  }
+  if (msg.role !== "toolResult") {
+    return false;
+  }
+  const start = Math.max(0, index - TOOL_RESULT_REAL_CONVERSATION_LOOKBACK);
+  for (let i = index - 1; i >= start; i -= 1) {
+    const candidate = messages[i];
+    if (!candidate || candidate.role !== "user") {
+      continue;
+    }
+    if (hasMeaningfulConversationContent(candidate)) {
+      return true;
+    }
+  }
+  return false;
 }

 function createCompactionDiagId(): string {
@ -960,7 +1020,11 @@ export async function compactEmbeddedPiSessionDirect(
          );
        }

-        if (!session.messages.some(hasRealConversationContent)) {
+        if (
+          !session.messages.some((message, index, messages) =>
+            hasRealConversationContent(message, messages, index),
+          )
+        ) {
          log.info(
            `[compaction] skipping — no real conversation messages (sessionKey=${params.sessionKey ?? params.sessionId})`,
          );
--- a/src/agents/pi-extensions/compaction-safeguard.test.ts
+++ b/src/agents/pi-extensions/compaction-safeguard.test.ts
@ -1673,6 +1673,50 @@ describe("compaction-safeguard double-compaction guard", () => {
    expect(result).toEqual({ cancel: true });
    expect(getApiKeyMock).toHaveBeenCalled();
  });
+
+  it("treats tool results as real conversation only when linked to a meaningful user ask", async () => {
+    expect(
+      __testing.isRealConversationMessage(
+        {
+          role: "toolResult",
+          toolCallId: "t1",
+          toolName: "exec",
+          content: [{ type: "text", text: "done" }],
+        } as AgentMessage,
+        [
+          { role: "user", content: "HEARTBEAT_OK" } as AgentMessage,
+          {
+            role: "toolResult",
+            toolCallId: "t1",
+            toolName: "exec",
+            content: [{ type: "text", text: "done" }],
+          } as AgentMessage,
+        ],
+        1,
+      ),
+    ).toBe(false);
+
+    expect(
+      __testing.isRealConversationMessage(
+        {
+          role: "toolResult",
+          toolCallId: "t2",
+          toolName: "exec",
+          content: [{ type: "text", text: "done" }],
+        } as AgentMessage,
+        [
+          { role: "user", content: "please inspect the repo" } as AgentMessage,
+          {
+            role: "toolResult",
+            toolCallId: "t2",
+            toolName: "exec",
+            content: [{ type: "text", text: "done" }],
+          } as AgentMessage,
+        ],
+        1,
+      ),
+    ).toBe(true);
+  });
 });

 async function expectWorkspaceSummaryEmptyForAgentsAlias(
--- a/src/agents/pi-extensions/compaction-safeguard.ts
+++ b/src/agents/pi-extensions/compaction-safeguard.ts
@ -179,8 +179,68 @@ function formatToolFailuresSection(failures: ToolFailure[]): string {
  return `\n\n## Tool Failures\n${lines.join("\n")}`;
 }

-function isRealConversationMessage(message: AgentMessage): boolean {
-  return message.role === "user" || message.role === "assistant" || message.role === "toolResult";
+const BOILERPLATE_REPLY_TEXT = new Set(["HEARTBEAT_OK", "NO_REPLY"]);
+const TOOL_RESULT_REAL_CONVERSATION_LOOKBACK = 20;
+
+function hasMeaningfulConversationContent(message: AgentMessage): boolean {
+  const content = (message as { content?: unknown }).content;
+  if (typeof content === "string") {
+    const trimmed = content.trim();
+    if (!trimmed) {
+      return false;
+    }
+    return !BOILERPLATE_REPLY_TEXT.has(trimmed);
+  }
+  if (!Array.isArray(content)) {
+    return false;
+  }
+  let sawNonTextBlock = false;
+  for (const block of content) {
+    if (!block || typeof block !== "object") {
+      continue;
+    }
+    const type = (block as { type?: unknown }).type;
+    if (type !== "text") {
+      sawNonTextBlock = true;
+      continue;
+    }
+    const text = (block as { text?: unknown }).text;
+    if (typeof text !== "string") {
+      continue;
+    }
+    const trimmed = text.trim();
+    if (!trimmed) {
+      continue;
+    }
+    if (!BOILERPLATE_REPLY_TEXT.has(trimmed)) {
+      return true;
+    }
+  }
+  return sawNonTextBlock;
+}
+
+function isRealConversationMessage(
+  message: AgentMessage,
+  messages: AgentMessage[],
+  index: number,
+): boolean {
+  if (message.role === "user" || message.role === "assistant") {
+    return hasMeaningfulConversationContent(message);
+  }
+  if (message.role !== "toolResult") {
+    return false;
+  }
+  const start = Math.max(0, index - TOOL_RESULT_REAL_CONVERSATION_LOOKBACK);
+  for (let i = index - 1; i >= start; i -= 1) {
+    const candidate = messages[i];
+    if (!candidate || candidate.role !== "user") {
+      continue;
+    }
+    if (hasMeaningfulConversationContent(candidate)) {
+      return true;
+    }
+  }
+  return false;
 }

 function computeFileLists(fileOps: FileOperations): {
@ -702,8 +762,12 @@ async function readWorkspaceContextForSummary(): Promise<string> {
 export default function compactionSafeguardExtension(api: ExtensionAPI): void {
  api.on("session_before_compact", async (event, ctx) => {
    const { preparation, customInstructions: eventInstructions, signal } = event;
-    const hasRealSummarizable = preparation.messagesToSummarize.some(isRealConversationMessage);
-    const hasRealTurnPrefix = preparation.turnPrefixMessages.some(isRealConversationMessage);
+    const hasRealSummarizable = preparation.messagesToSummarize.some((message, index, messages) =>
+      isRealConversationMessage(message, messages, index),
+    );
+    const hasRealTurnPrefix = preparation.turnPrefixMessages.some((message, index, messages) =>
+      isRealConversationMessage(message, messages, index),
+    );
    if (!hasRealSummarizable && !hasRealTurnPrefix) {
      // When there are no summarizable messages AND no real turn-prefix content,
      // cancelling compaction leaves context unchanged but the SDK re-triggers
@ -1026,6 +1090,8 @@ export const __testing = {
  computeAdaptiveChunkRatio,
  isOversizedForSummary,
  readWorkspaceContextForSummary,
+  hasMeaningfulConversationContent,
+  isRealConversationMessage,
  BASE_CHUNK_RATIO,
  MIN_CHUNK_RATIO,
  SAFETY_MARGIN,