fix(compaction): skip compaction only when session has no meaningful conversation
Fixes #40727 The compaction safeguard and the embedded-runner compaction path both used a naive role check (user | assistant | toolResult) to decide whether a session contains real conversation worth summarising. In long-running heartbeat sessions nearly every message satisfies that check, yet the *content* is boilerplate: heartbeat polls produce empty or HEARTBEAT_OK user turns and NO_REPLY assistant turns. The guard therefore never fires, the session grows past the 200K ceiling, and the agent goes silent. Fix: replace the role-only predicate with a content-aware one: isRealConversationMessage / hasRealConversationContent (two separate copies – safeguard.ts and compact.ts – each get the same treatment) Rules: • user / assistant messages are real only when their text content is non-empty AND not a pure boilerplate sentinel (HEARTBEAT_OK or NO_REPLY). Non-text blocks (images, attachments) are always real. • toolResult messages are real only when a meaningful user message appears in the previous 20 messages of the window, so tool output that was triggered by a real human ask still counts. Both predicates are exported via __testing / direct export so the existing and new unit tests can cover them without mocks. Tests added: compaction-safeguard.test.ts – two new cases: • tool result linked to HEARTBEAT_OK user turn → not real • tool result linked to meaningful user ask → real compact.hooks.test.ts – two new integration cases: • boilerplate-only session → skips compaction (reason: no real conversation messages) • meaningful user ask + tool result → compaction runs
This commit is contained in:
parent
4c60956d8e
commit
b1eaf639d5
@ -67,6 +67,18 @@ export const resolveMemorySearchConfigMock = vi.fn(() => ({
|
||||
}));
|
||||
export const resolveSessionAgentIdMock = vi.fn(() => "main");
|
||||
export const estimateTokensMock = vi.fn((_message?: unknown) => 10);
|
||||
export const sessionMessages: unknown[] = [
|
||||
{ role: "user", content: "hello", timestamp: 1 },
|
||||
{ role: "assistant", content: [{ type: "text", text: "hi" }], timestamp: 2 },
|
||||
{
|
||||
role: "toolResult",
|
||||
toolCallId: "t1",
|
||||
toolName: "exec",
|
||||
content: [{ type: "text", text: "output" }],
|
||||
isError: false,
|
||||
timestamp: 3,
|
||||
},
|
||||
];
|
||||
export const sessionAbortCompactionMock: Mock<(reason?: unknown) => void> = vi.fn();
|
||||
export const createOpenClawCodingToolsMock = vi.fn(() => []);
|
||||
|
||||
@ -134,6 +146,20 @@ export function resetCompactHooksHarnessMocks(): void {
|
||||
resolveSessionAgentIdMock.mockReturnValue("main");
|
||||
estimateTokensMock.mockReset();
|
||||
estimateTokensMock.mockReturnValue(10);
|
||||
sessionMessages.splice(
|
||||
0,
|
||||
sessionMessages.length,
|
||||
{ role: "user", content: "hello", timestamp: 1 },
|
||||
{ role: "assistant", content: [{ type: "text", text: "hi" }], timestamp: 2 },
|
||||
{
|
||||
role: "toolResult",
|
||||
toolCallId: "t1",
|
||||
toolName: "exec",
|
||||
content: [{ type: "text", text: "output" }],
|
||||
isError: false,
|
||||
timestamp: 3,
|
||||
},
|
||||
);
|
||||
sessionAbortCompactionMock.mockReset();
|
||||
createOpenClawCodingToolsMock.mockReset();
|
||||
createOpenClawCodingToolsMock.mockReturnValue([]);
|
||||
@ -176,18 +202,11 @@ export async function loadCompactHooksHarness(): Promise<{
|
||||
createAgentSession: vi.fn(async () => {
|
||||
const session = {
|
||||
sessionId: "session-1",
|
||||
messages: [
|
||||
{ role: "user", content: "hello", timestamp: 1 },
|
||||
{ role: "assistant", content: [{ type: "text", text: "hi" }], timestamp: 2 },
|
||||
{
|
||||
role: "toolResult",
|
||||
toolCallId: "t1",
|
||||
toolName: "exec",
|
||||
content: [{ type: "text", text: "output" }],
|
||||
isError: false,
|
||||
timestamp: 3,
|
||||
},
|
||||
],
|
||||
messages: sessionMessages.map((message) =>
|
||||
typeof structuredClone === "function"
|
||||
? structuredClone(message)
|
||||
: JSON.parse(JSON.stringify(message)),
|
||||
),
|
||||
agent: {
|
||||
replaceMessages: vi.fn((messages: unknown[]) => {
|
||||
session.messages = [...(messages as typeof session.messages)];
|
||||
|
||||
@ -16,6 +16,7 @@ import {
|
||||
resetCompactHooksHarnessMocks,
|
||||
sanitizeSessionHistoryMock,
|
||||
sessionAbortCompactionMock,
|
||||
sessionMessages,
|
||||
sessionCompactImpl,
|
||||
triggerInternalHook,
|
||||
} from "./compact.hooks.harness.js";
|
||||
@ -154,6 +155,20 @@ describe("compactEmbeddedPiSessionDirect hooks", () => {
|
||||
estimateTokensMock.mockReset();
|
||||
estimateTokensMock.mockReturnValue(10);
|
||||
sessionAbortCompactionMock.mockReset();
|
||||
sessionMessages.splice(
|
||||
0,
|
||||
sessionMessages.length,
|
||||
{ role: "user", content: "hello", timestamp: 1 },
|
||||
{ role: "assistant", content: [{ type: "text", text: "hi" }], timestamp: 2 },
|
||||
{
|
||||
role: "toolResult",
|
||||
toolCallId: "t1",
|
||||
toolName: "exec",
|
||||
content: [{ type: "text", text: "output" }],
|
||||
isError: false,
|
||||
timestamp: 3,
|
||||
},
|
||||
);
|
||||
unregisterApiProviders(getCustomApiRegistrySourceId("ollama"));
|
||||
});
|
||||
|
||||
@ -490,6 +505,64 @@ describe("compactEmbeddedPiSessionDirect hooks", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("skips compaction when the transcript only contains boilerplate replies and tool output", async () => {
|
||||
sessionMessages.splice(
|
||||
0,
|
||||
sessionMessages.length,
|
||||
{ role: "user", content: "HEARTBEAT_OK", timestamp: 1 },
|
||||
{
|
||||
role: "toolResult",
|
||||
toolCallId: "t1",
|
||||
toolName: "exec",
|
||||
content: [{ type: "text", text: "checked" }],
|
||||
isError: false,
|
||||
timestamp: 2,
|
||||
},
|
||||
);
|
||||
|
||||
const result = await compactEmbeddedPiSessionDirect({
|
||||
sessionId: "session-1",
|
||||
sessionKey: "agent:main:session-1",
|
||||
sessionFile: "/tmp/session.jsonl",
|
||||
workspaceDir: "/tmp",
|
||||
customInstructions: "focus on decisions",
|
||||
});
|
||||
|
||||
expect(result).toMatchObject({
|
||||
ok: true,
|
||||
compacted: false,
|
||||
reason: "no real conversation messages",
|
||||
});
|
||||
expect(sessionCompactImpl).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("keeps compaction enabled when tool output follows a meaningful user request", async () => {
|
||||
sessionMessages.splice(
|
||||
0,
|
||||
sessionMessages.length,
|
||||
{ role: "user", content: "please inspect the failing PR", timestamp: 1 },
|
||||
{
|
||||
role: "toolResult",
|
||||
toolCallId: "t1",
|
||||
toolName: "exec",
|
||||
content: [{ type: "text", text: "checked" }],
|
||||
isError: false,
|
||||
timestamp: 2,
|
||||
},
|
||||
);
|
||||
|
||||
const result = await compactEmbeddedPiSessionDirect({
|
||||
sessionId: "session-1",
|
||||
sessionKey: "agent:main:session-1",
|
||||
sessionFile: "/tmp/session.jsonl",
|
||||
workspaceDir: "/tmp",
|
||||
customInstructions: "focus on decisions",
|
||||
});
|
||||
|
||||
expect(result.ok).toBe(true);
|
||||
expect(sessionCompactImpl).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("registers the Ollama api provider before compaction", async () => {
|
||||
resolveModelMock.mockReturnValue({
|
||||
model: {
|
||||
|
||||
@ -167,8 +167,68 @@ type CompactionMessageMetrics = {
|
||||
contributors: Array<{ role: string; chars: number; tool?: string }>;
|
||||
};
|
||||
|
||||
function hasRealConversationContent(msg: AgentMessage): boolean {
|
||||
return msg.role === "user" || msg.role === "assistant" || msg.role === "toolResult";
|
||||
const BOILERPLATE_REPLY_TEXT = new Set(["HEARTBEAT_OK", "NO_REPLY"]);
|
||||
const TOOL_RESULT_REAL_CONVERSATION_LOOKBACK = 20;
|
||||
|
||||
function hasMeaningfulConversationContent(msg: AgentMessage): boolean {
|
||||
const content = (msg as { content?: unknown }).content;
|
||||
if (typeof content === "string") {
|
||||
const trimmed = content.trim();
|
||||
if (!trimmed) {
|
||||
return false;
|
||||
}
|
||||
return !BOILERPLATE_REPLY_TEXT.has(trimmed);
|
||||
}
|
||||
if (!Array.isArray(content)) {
|
||||
return false;
|
||||
}
|
||||
let sawNonTextBlock = false;
|
||||
for (const block of content) {
|
||||
if (!block || typeof block !== "object") {
|
||||
continue;
|
||||
}
|
||||
const type = (block as { type?: unknown }).type;
|
||||
if (type !== "text") {
|
||||
sawNonTextBlock = true;
|
||||
continue;
|
||||
}
|
||||
const text = (block as { text?: unknown }).text;
|
||||
if (typeof text !== "string") {
|
||||
continue;
|
||||
}
|
||||
const trimmed = text.trim();
|
||||
if (!trimmed) {
|
||||
continue;
|
||||
}
|
||||
if (!BOILERPLATE_REPLY_TEXT.has(trimmed)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return sawNonTextBlock;
|
||||
}
|
||||
|
||||
function hasRealConversationContent(
|
||||
msg: AgentMessage,
|
||||
messages: AgentMessage[],
|
||||
index: number,
|
||||
): boolean {
|
||||
if (msg.role === "user" || msg.role === "assistant") {
|
||||
return hasMeaningfulConversationContent(msg);
|
||||
}
|
||||
if (msg.role !== "toolResult") {
|
||||
return false;
|
||||
}
|
||||
const start = Math.max(0, index - TOOL_RESULT_REAL_CONVERSATION_LOOKBACK);
|
||||
for (let i = index - 1; i >= start; i -= 1) {
|
||||
const candidate = messages[i];
|
||||
if (!candidate || candidate.role !== "user") {
|
||||
continue;
|
||||
}
|
||||
if (hasMeaningfulConversationContent(candidate)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
function createCompactionDiagId(): string {
|
||||
@ -960,7 +1020,11 @@ export async function compactEmbeddedPiSessionDirect(
|
||||
);
|
||||
}
|
||||
|
||||
if (!session.messages.some(hasRealConversationContent)) {
|
||||
if (
|
||||
!session.messages.some((message, index, messages) =>
|
||||
hasRealConversationContent(message, messages, index),
|
||||
)
|
||||
) {
|
||||
log.info(
|
||||
`[compaction] skipping — no real conversation messages (sessionKey=${params.sessionKey ?? params.sessionId})`,
|
||||
);
|
||||
|
||||
@ -1673,6 +1673,50 @@ describe("compaction-safeguard double-compaction guard", () => {
|
||||
expect(result).toEqual({ cancel: true });
|
||||
expect(getApiKeyMock).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("treats tool results as real conversation only when linked to a meaningful user ask", async () => {
|
||||
expect(
|
||||
__testing.isRealConversationMessage(
|
||||
{
|
||||
role: "toolResult",
|
||||
toolCallId: "t1",
|
||||
toolName: "exec",
|
||||
content: [{ type: "text", text: "done" }],
|
||||
} as AgentMessage,
|
||||
[
|
||||
{ role: "user", content: "HEARTBEAT_OK" } as AgentMessage,
|
||||
{
|
||||
role: "toolResult",
|
||||
toolCallId: "t1",
|
||||
toolName: "exec",
|
||||
content: [{ type: "text", text: "done" }],
|
||||
} as AgentMessage,
|
||||
],
|
||||
1,
|
||||
),
|
||||
).toBe(false);
|
||||
|
||||
expect(
|
||||
__testing.isRealConversationMessage(
|
||||
{
|
||||
role: "toolResult",
|
||||
toolCallId: "t2",
|
||||
toolName: "exec",
|
||||
content: [{ type: "text", text: "done" }],
|
||||
} as AgentMessage,
|
||||
[
|
||||
{ role: "user", content: "please inspect the repo" } as AgentMessage,
|
||||
{
|
||||
role: "toolResult",
|
||||
toolCallId: "t2",
|
||||
toolName: "exec",
|
||||
content: [{ type: "text", text: "done" }],
|
||||
} as AgentMessage,
|
||||
],
|
||||
1,
|
||||
),
|
||||
).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
async function expectWorkspaceSummaryEmptyForAgentsAlias(
|
||||
|
||||
@ -179,8 +179,68 @@ function formatToolFailuresSection(failures: ToolFailure[]): string {
|
||||
return `\n\n## Tool Failures\n${lines.join("\n")}`;
|
||||
}
|
||||
|
||||
function isRealConversationMessage(message: AgentMessage): boolean {
|
||||
return message.role === "user" || message.role === "assistant" || message.role === "toolResult";
|
||||
const BOILERPLATE_REPLY_TEXT = new Set(["HEARTBEAT_OK", "NO_REPLY"]);
|
||||
const TOOL_RESULT_REAL_CONVERSATION_LOOKBACK = 20;
|
||||
|
||||
function hasMeaningfulConversationContent(message: AgentMessage): boolean {
|
||||
const content = (message as { content?: unknown }).content;
|
||||
if (typeof content === "string") {
|
||||
const trimmed = content.trim();
|
||||
if (!trimmed) {
|
||||
return false;
|
||||
}
|
||||
return !BOILERPLATE_REPLY_TEXT.has(trimmed);
|
||||
}
|
||||
if (!Array.isArray(content)) {
|
||||
return false;
|
||||
}
|
||||
let sawNonTextBlock = false;
|
||||
for (const block of content) {
|
||||
if (!block || typeof block !== "object") {
|
||||
continue;
|
||||
}
|
||||
const type = (block as { type?: unknown }).type;
|
||||
if (type !== "text") {
|
||||
sawNonTextBlock = true;
|
||||
continue;
|
||||
}
|
||||
const text = (block as { text?: unknown }).text;
|
||||
if (typeof text !== "string") {
|
||||
continue;
|
||||
}
|
||||
const trimmed = text.trim();
|
||||
if (!trimmed) {
|
||||
continue;
|
||||
}
|
||||
if (!BOILERPLATE_REPLY_TEXT.has(trimmed)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return sawNonTextBlock;
|
||||
}
|
||||
|
||||
function isRealConversationMessage(
|
||||
message: AgentMessage,
|
||||
messages: AgentMessage[],
|
||||
index: number,
|
||||
): boolean {
|
||||
if (message.role === "user" || message.role === "assistant") {
|
||||
return hasMeaningfulConversationContent(message);
|
||||
}
|
||||
if (message.role !== "toolResult") {
|
||||
return false;
|
||||
}
|
||||
const start = Math.max(0, index - TOOL_RESULT_REAL_CONVERSATION_LOOKBACK);
|
||||
for (let i = index - 1; i >= start; i -= 1) {
|
||||
const candidate = messages[i];
|
||||
if (!candidate || candidate.role !== "user") {
|
||||
continue;
|
||||
}
|
||||
if (hasMeaningfulConversationContent(candidate)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
function computeFileLists(fileOps: FileOperations): {
|
||||
@ -702,8 +762,12 @@ async function readWorkspaceContextForSummary(): Promise<string> {
|
||||
export default function compactionSafeguardExtension(api: ExtensionAPI): void {
|
||||
api.on("session_before_compact", async (event, ctx) => {
|
||||
const { preparation, customInstructions: eventInstructions, signal } = event;
|
||||
const hasRealSummarizable = preparation.messagesToSummarize.some(isRealConversationMessage);
|
||||
const hasRealTurnPrefix = preparation.turnPrefixMessages.some(isRealConversationMessage);
|
||||
const hasRealSummarizable = preparation.messagesToSummarize.some((message, index, messages) =>
|
||||
isRealConversationMessage(message, messages, index),
|
||||
);
|
||||
const hasRealTurnPrefix = preparation.turnPrefixMessages.some((message, index, messages) =>
|
||||
isRealConversationMessage(message, messages, index),
|
||||
);
|
||||
if (!hasRealSummarizable && !hasRealTurnPrefix) {
|
||||
// When there are no summarizable messages AND no real turn-prefix content,
|
||||
// cancelling compaction leaves context unchanged but the SDK re-triggers
|
||||
@ -1026,6 +1090,8 @@ export const __testing = {
|
||||
computeAdaptiveChunkRatio,
|
||||
isOversizedForSummary,
|
||||
readWorkspaceContextForSummary,
|
||||
hasMeaningfulConversationContent,
|
||||
isRealConversationMessage,
|
||||
BASE_CHUNK_RATIO,
|
||||
MIN_CHUNK_RATIO,
|
||||
SAFETY_MARGIN,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user