fix(compaction): use full-session token count for post-compaction sanity check (#28347)

Merged via squash. Prepared head SHA: cf4eab1c51e6b8890e23c2d7172313c40cd2fe04 Co-authored-by: efe-arv <259833796+efe-arv@users.noreply.github.com> Co-authored-by: jalehman <550978+jalehman@users.noreply.github.com> Reviewed-by: @jalehman
2026-03-13 07:26:30 +03:00 · 2026-03-13 07:26:30 +03:00 · 771066d122
commit 771066d122
parent 70d7a0854c
4 changed files with 142 additions and 5 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -85,6 +85,7 @@ Docs: https://docs.openclaw.ai
 - Cron/doctor: stop flagging canonical `agentTurn` and `systemEvent` payload kinds as legacy cron storage, while still normalizing whitespace-padded and non-canonical variants. (#44012) Thanks @shuicici.
 - ACP/client final-message delivery: preserve terminal assistant text snapshots before resolving `end_turn`, so ACP clients no longer drop the last visible reply when the gateway sends the final message body on the terminal chat event. (#17615) Thanks @pjeby.
 - Telegram/Discord status reactions: show a temporary compacting reaction during auto-compaction pauses and restore thinking afterward so the bot no longer appears frozen while context is being compacted. (#35474) thanks @Cypherm.
+- Agents/compaction: compare post-compaction token sanity checks against full-session pre-compaction totals and skip the check when token estimation fails, so sessions with large bootstrap context keep real token counts instead of falling back to unknown. (#28347) thanks @efe-arv.

 ## 2026.3.11

--- a/src/agents/pi-embedded-runner/compact.hooks.test.ts
+++ b/src/agents/pi-embedded-runner/compact.hooks.test.ts
@ -13,6 +13,7 @@ const {
  getMemorySearchManagerMock,
  resolveMemorySearchConfigMock,
  resolveSessionAgentIdMock,
+  estimateTokensMock,
 } = vi.hoisted(() => {
  const contextEngineCompactMock = vi.fn(async () => ({
    ok: true as boolean,
@ -63,6 +64,7 @@ const {
      },
    })),
    resolveSessionAgentIdMock: vi.fn(() => "main"),
+    estimateTokensMock: vi.fn((_message?: unknown) => 10),
  };
 });

@ -84,6 +86,11 @@ vi.mock("../../hooks/internal-hooks.js", async () => {
  };
 });

+vi.mock("@mariozechner/pi-ai/oauth", () => ({
+  getOAuthApiKey: vi.fn(),
+  getOAuthProviders: vi.fn(() => []),
+}));
+
 vi.mock("@mariozechner/pi-coding-agent", () => {
  return {
    createAgentSession: vi.fn(async () => {
@ -122,7 +129,7 @@ vi.mock("@mariozechner/pi-coding-agent", () => {
    SettingsManager: {
      create: vi.fn(() => ({})),
    },
-    estimateTokens: vi.fn(() => 10),
+    estimateTokens: estimateTokensMock,
  };
 });

@ -363,6 +370,8 @@ describe("compactEmbeddedPiSessionDirect hooks", () => {
    });
    resolveSessionAgentIdMock.mockReset();
    resolveSessionAgentIdMock.mockReturnValue("main");
+    estimateTokensMock.mockReset();
+    estimateTokensMock.mockReturnValue(10);
    unregisterApiProviders(getCustomApiRegistrySourceId("ollama"));
  });

@ -501,6 +510,79 @@ describe("compactEmbeddedPiSessionDirect hooks", () => {
    }
  });

+  it("preserves tokensAfter when full-session context exceeds result.tokensBefore", async () => {
+    estimateTokensMock.mockImplementation((message: unknown) => {
+      const role = (message as { role?: string }).role;
+      if (role === "user") {
+        return 30;
+      }
+      if (role === "assistant") {
+        return 20;
+      }
+      return 5;
+    });
+    sessionCompactImpl.mockResolvedValue({
+      summary: "summary",
+      firstKeptEntryId: "entry-1",
+      tokensBefore: 20,
+      details: { ok: true },
+    });
+
+    const result = await compactEmbeddedPiSessionDirect({
+      sessionId: "session-1",
+      sessionKey: "agent:main:session-1",
+      sessionFile: "/tmp/session.jsonl",
+      workspaceDir: "/tmp",
+      customInstructions: "focus on decisions",
+    });
+
+    expect(result).toMatchObject({
+      ok: true,
+      compacted: true,
+      result: {
+        tokensBefore: 20,
+        tokensAfter: 30,
+      },
+    });
+    expect(sessionHook("compact:after")?.context?.tokenCount).toBe(30);
+  });
+
+  it("treats pre-compaction token estimation failures as a no-op sanity check", async () => {
+    estimateTokensMock.mockImplementation((message: unknown) => {
+      const role = (message as { role?: string }).role;
+      if (role === "assistant") {
+        throw new Error("legacy message");
+      }
+      if (role === "user") {
+        return 30;
+      }
+      return 5;
+    });
+    sessionCompactImpl.mockResolvedValue({
+      summary: "summary",
+      firstKeptEntryId: "entry-1",
+      tokensBefore: 20,
+      details: { ok: true },
+    });
+
+    const result = await compactEmbeddedPiSessionDirect({
+      sessionId: "session-1",
+      sessionKey: "agent:main:session-1",
+      sessionFile: "/tmp/session.jsonl",
+      workspaceDir: "/tmp",
+      customInstructions: "focus on decisions",
+    });
+
+    expect(result).toMatchObject({
+      ok: true,
+      compacted: true,
+      result: {
+        tokensAfter: 30,
+      },
+    });
+    expect(sessionHook("compact:after")?.context?.tokenCount).toBe(30);
+  });
+
  it("skips sync in await mode when postCompactionForce is false", async () => {
    const sync = vi.fn(async () => {});
    getMemorySearchManagerMock.mockResolvedValue({ manager: { sync } });
--- a/src/agents/pi-embedded-runner/compact.ts
+++ b/src/agents/pi-embedded-runner/compact.ts
@ -897,6 +897,17 @@ export async function compactEmbeddedPiSessionDirect(
        // Measure compactedCount from the original pre-limiting transcript so compaction
        // lifecycle metrics represent total reduction through the compaction pipeline.
        const messageCountCompactionInput = messageCountOriginal;
+        // Estimate full session tokens BEFORE compaction (including system prompt,
+        // bootstrap context, workspace files, and all history). This is needed for
+        // a correct sanity check — result.tokensBefore only covers the summarizable
+        // history subset, not the full session.
+        let fullSessionTokensBefore = 0;
+        try {
+          fullSessionTokensBefore = limited.reduce((sum, msg) => sum + estimateTokens(msg), 0);
+        } catch {
+          // If token estimation throws on a malformed message, fall back to 0 so
+          // the sanity check below becomes a no-op instead of crashing compaction.
+        }
        const result = await compactWithSafetyTimeout(() =>
          session.compact(params.customInstructions),
        );
@ -912,8 +923,15 @@ export async function compactEmbeddedPiSessionDirect(
          for (const message of session.messages) {
            tokensAfter += estimateTokens(message);
          }
-          // Sanity check: tokensAfter should be less than tokensBefore
-          if (tokensAfter > (observedTokenCount ?? result.tokensBefore)) {
+          // Sanity check: compare against the best full-session pre-compaction baseline.
+          // Prefer the provider-observed live count when available; otherwise use the
+          // heuristic full-session estimate with a 10% margin for counter jitter.
+          const sanityCheckBaseline = observedTokenCount ?? fullSessionTokensBefore;
+          if (
+            sanityCheckBaseline > 0 &&
+            tokensAfter >
+              (observedTokenCount !== undefined ? sanityCheckBaseline : sanityCheckBaseline * 1.1)
+          ) {
            tokensAfter = undefined; // Don't trust the estimate
          }
        } catch {
--- a/src/infra/net/proxy-fetch.test.ts
+++ b/src/infra/net/proxy-fetch.test.ts
@ -1,5 +1,18 @@
 import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";

+const PROXY_ENV_KEYS = [
+  "HTTPS_PROXY",
+  "HTTP_PROXY",
+  "ALL_PROXY",
+  "https_proxy",
+  "http_proxy",
+  "all_proxy",
+] as const;
+
+const ORIGINAL_PROXY_ENV = Object.fromEntries(
+  PROXY_ENV_KEYS.map((key) => [key, process.env[key]]),
+) as Record<(typeof PROXY_ENV_KEYS)[number], string | undefined>;
+
 const { ProxyAgent, EnvHttpProxyAgent, undiciFetch, proxyAgentSpy, envAgentSpy, getLastAgent } =
  vi.hoisted(() => {
    const undiciFetch = vi.fn();
@ -40,6 +53,22 @@ vi.mock("undici", () => ({

 import { makeProxyFetch, resolveProxyFetchFromEnv } from "./proxy-fetch.js";

+function clearProxyEnv(): void {
+  for (const key of PROXY_ENV_KEYS) {
+    delete process.env[key];
+  }
+}
+
+function restoreProxyEnv(): void {
+  clearProxyEnv();
+  for (const key of PROXY_ENV_KEYS) {
+    const value = ORIGINAL_PROXY_ENV[key];
+    if (typeof value === "string") {
+      process.env[key] = value;
+    }
+  }
+}
+
 describe("makeProxyFetch", () => {
  beforeEach(() => vi.clearAllMocks());

@ -60,8 +89,15 @@ describe("makeProxyFetch", () => {
 });

 describe("resolveProxyFetchFromEnv", () => {
-  beforeEach(() => vi.clearAllMocks());
-  afterEach(() => vi.unstubAllEnvs());
+  beforeEach(() => {
+    vi.clearAllMocks();
+    vi.unstubAllEnvs();
+    clearProxyEnv();
+  });
+  afterEach(() => {
+    vi.unstubAllEnvs();
+    restoreProxyEnv();
+  });

  it("returns undefined when no proxy env vars are set", () => {
    expect(resolveProxyFetchFromEnv({})).toBeUndefined();