fix(compaction): use full-session token count for post-compaction sanity check (#28347)

Merged via squash.

Prepared head SHA: cf4eab1c51e6b8890e23c2d7172313c40cd2fe04
Co-authored-by: efe-arv <259833796+efe-arv@users.noreply.github.com>
Co-authored-by: jalehman <550978+jalehman@users.noreply.github.com>
Reviewed-by: @jalehman
This commit is contained in:
Efe Büken 2026-03-13 07:26:30 +03:00 committed by GitHub
parent 70d7a0854c
commit 771066d122
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 142 additions and 5 deletions

View File

@ -85,6 +85,7 @@ Docs: https://docs.openclaw.ai
- Cron/doctor: stop flagging canonical `agentTurn` and `systemEvent` payload kinds as legacy cron storage, while still normalizing whitespace-padded and non-canonical variants. (#44012) Thanks @shuicici.
- ACP/client final-message delivery: preserve terminal assistant text snapshots before resolving `end_turn`, so ACP clients no longer drop the last visible reply when the gateway sends the final message body on the terminal chat event. (#17615) Thanks @pjeby.
- Telegram/Discord status reactions: show a temporary compacting reaction during auto-compaction pauses and restore thinking afterward so the bot no longer appears frozen while context is being compacted. (#35474) thanks @Cypherm.
- Agents/compaction: compare post-compaction token sanity checks against full-session pre-compaction totals and skip the check when token estimation fails, so sessions with large bootstrap context keep real token counts instead of falling back to unknown. (#28347) thanks @efe-arv.
## 2026.3.11

View File

@ -13,6 +13,7 @@ const {
getMemorySearchManagerMock,
resolveMemorySearchConfigMock,
resolveSessionAgentIdMock,
estimateTokensMock,
} = vi.hoisted(() => {
const contextEngineCompactMock = vi.fn(async () => ({
ok: true as boolean,
@ -63,6 +64,7 @@ const {
},
})),
resolveSessionAgentIdMock: vi.fn(() => "main"),
estimateTokensMock: vi.fn((_message?: unknown) => 10),
};
});
@ -84,6 +86,11 @@ vi.mock("../../hooks/internal-hooks.js", async () => {
};
});
vi.mock("@mariozechner/pi-ai/oauth", () => ({
getOAuthApiKey: vi.fn(),
getOAuthProviders: vi.fn(() => []),
}));
vi.mock("@mariozechner/pi-coding-agent", () => {
return {
createAgentSession: vi.fn(async () => {
@ -122,7 +129,7 @@ vi.mock("@mariozechner/pi-coding-agent", () => {
SettingsManager: {
create: vi.fn(() => ({})),
},
estimateTokens: vi.fn(() => 10),
estimateTokens: estimateTokensMock,
};
});
@ -363,6 +370,8 @@ describe("compactEmbeddedPiSessionDirect hooks", () => {
});
resolveSessionAgentIdMock.mockReset();
resolveSessionAgentIdMock.mockReturnValue("main");
estimateTokensMock.mockReset();
estimateTokensMock.mockReturnValue(10);
unregisterApiProviders(getCustomApiRegistrySourceId("ollama"));
});
@ -501,6 +510,79 @@ describe("compactEmbeddedPiSessionDirect hooks", () => {
}
});
it("preserves tokensAfter when full-session context exceeds result.tokensBefore", async () => {
estimateTokensMock.mockImplementation((message: unknown) => {
const role = (message as { role?: string }).role;
if (role === "user") {
return 30;
}
if (role === "assistant") {
return 20;
}
return 5;
});
sessionCompactImpl.mockResolvedValue({
summary: "summary",
firstKeptEntryId: "entry-1",
tokensBefore: 20,
details: { ok: true },
});
const result = await compactEmbeddedPiSessionDirect({
sessionId: "session-1",
sessionKey: "agent:main:session-1",
sessionFile: "/tmp/session.jsonl",
workspaceDir: "/tmp",
customInstructions: "focus on decisions",
});
expect(result).toMatchObject({
ok: true,
compacted: true,
result: {
tokensBefore: 20,
tokensAfter: 30,
},
});
expect(sessionHook("compact:after")?.context?.tokenCount).toBe(30);
});
it("treats pre-compaction token estimation failures as a no-op sanity check", async () => {
estimateTokensMock.mockImplementation((message: unknown) => {
const role = (message as { role?: string }).role;
if (role === "assistant") {
throw new Error("legacy message");
}
if (role === "user") {
return 30;
}
return 5;
});
sessionCompactImpl.mockResolvedValue({
summary: "summary",
firstKeptEntryId: "entry-1",
tokensBefore: 20,
details: { ok: true },
});
const result = await compactEmbeddedPiSessionDirect({
sessionId: "session-1",
sessionKey: "agent:main:session-1",
sessionFile: "/tmp/session.jsonl",
workspaceDir: "/tmp",
customInstructions: "focus on decisions",
});
expect(result).toMatchObject({
ok: true,
compacted: true,
result: {
tokensAfter: 30,
},
});
expect(sessionHook("compact:after")?.context?.tokenCount).toBe(30);
});
it("skips sync in await mode when postCompactionForce is false", async () => {
const sync = vi.fn(async () => {});
getMemorySearchManagerMock.mockResolvedValue({ manager: { sync } });

View File

@ -897,6 +897,17 @@ export async function compactEmbeddedPiSessionDirect(
// Measure compactedCount from the original pre-limiting transcript so compaction
// lifecycle metrics represent total reduction through the compaction pipeline.
const messageCountCompactionInput = messageCountOriginal;
// Estimate full session tokens BEFORE compaction (including system prompt,
// bootstrap context, workspace files, and all history). This is needed for
// a correct sanity check — result.tokensBefore only covers the summarizable
// history subset, not the full session.
let fullSessionTokensBefore = 0;
try {
fullSessionTokensBefore = limited.reduce((sum, msg) => sum + estimateTokens(msg), 0);
} catch {
// If token estimation throws on a malformed message, fall back to 0 so
// the sanity check below becomes a no-op instead of crashing compaction.
}
const result = await compactWithSafetyTimeout(() =>
session.compact(params.customInstructions),
);
@ -912,8 +923,15 @@ export async function compactEmbeddedPiSessionDirect(
for (const message of session.messages) {
tokensAfter += estimateTokens(message);
}
// Sanity check: tokensAfter should be less than tokensBefore
if (tokensAfter > (observedTokenCount ?? result.tokensBefore)) {
// Sanity check: compare against the best full-session pre-compaction baseline.
// Prefer the provider-observed live count when available; otherwise use the
// heuristic full-session estimate with a 10% margin for counter jitter.
const sanityCheckBaseline = observedTokenCount ?? fullSessionTokensBefore;
if (
sanityCheckBaseline > 0 &&
tokensAfter >
(observedTokenCount !== undefined ? sanityCheckBaseline : sanityCheckBaseline * 1.1)
) {
tokensAfter = undefined; // Don't trust the estimate
}
} catch {

View File

@ -1,5 +1,18 @@
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
const PROXY_ENV_KEYS = [
"HTTPS_PROXY",
"HTTP_PROXY",
"ALL_PROXY",
"https_proxy",
"http_proxy",
"all_proxy",
] as const;
const ORIGINAL_PROXY_ENV = Object.fromEntries(
PROXY_ENV_KEYS.map((key) => [key, process.env[key]]),
) as Record<(typeof PROXY_ENV_KEYS)[number], string | undefined>;
const { ProxyAgent, EnvHttpProxyAgent, undiciFetch, proxyAgentSpy, envAgentSpy, getLastAgent } =
vi.hoisted(() => {
const undiciFetch = vi.fn();
@ -40,6 +53,22 @@ vi.mock("undici", () => ({
import { makeProxyFetch, resolveProxyFetchFromEnv } from "./proxy-fetch.js";
function clearProxyEnv(): void {
for (const key of PROXY_ENV_KEYS) {
delete process.env[key];
}
}
function restoreProxyEnv(): void {
clearProxyEnv();
for (const key of PROXY_ENV_KEYS) {
const value = ORIGINAL_PROXY_ENV[key];
if (typeof value === "string") {
process.env[key] = value;
}
}
}
describe("makeProxyFetch", () => {
beforeEach(() => vi.clearAllMocks());
@ -60,8 +89,15 @@ describe("makeProxyFetch", () => {
});
describe("resolveProxyFetchFromEnv", () => {
beforeEach(() => vi.clearAllMocks());
afterEach(() => vi.unstubAllEnvs());
beforeEach(() => {
vi.clearAllMocks();
vi.unstubAllEnvs();
clearProxyEnv();
});
afterEach(() => {
vi.unstubAllEnvs();
restoreProxyEnv();
});
it("returns undefined when no proxy env vars are set", () => {
expect(resolveProxyFetchFromEnv({})).toBeUndefined();