Merge 738d6b6b5277d00f9dc5ac55b8946877b0af6e6c into 5e417b44e1540f528d2ae63e3e20229a902d1db2

2026-03-21 06:01:48 +03:00 · 2026-03-21 06:01:48 +03:00 · dee4c45e72
commit dee4c45e72
parent 5e417b44e1 738d6b6b52
3 changed files with 275 additions and 6 deletions
--- a/src/agents/pi-embedded-runner/google.ts
+++ b/src/agents/pi-embedded-runner/google.ts
@ -32,7 +32,7 @@ import {
  type UsageLike,
 } from "../usage.js";
 import { log } from "./logger.js";
-import { dropThinkingBlocks } from "./thinking.js";
+import { dropThinkingBlocks, stripThinkingFromNonLatestAssistant } from "./thinking.js";
 import { describeUnknownError } from "./utils.js";

 const GOOGLE_TURN_ORDERING_CUSTOM_TYPE = "google-turn-ordering-bootstrap";
@ -549,9 +549,22 @@ export async function sanitizeSessionHistory(params: {
      ...resolveImageSanitizationLimits(params.config),
    },
  );
-  const droppedThinking = policy.dropThinkingBlocks
-    ? dropThinkingBlocks(sanitizedImages)
+  // For Anthropic models, strip thinking/redacted_thinking blocks from all
+  // non-latest assistant messages. Anthropic requires these blocks to be
+  // byte-identical to the original response in the latest assistant message,
+  // but allows omitting them from older messages. Compaction and session
+  // serialization can corrupt these blocks, causing API rejections.
+  const isAnthropicProvider =
+    params.modelApi === "anthropic-messages" ||
+    params.modelApi === "bedrock-converse-stream" ||
+    (params.provider ?? "").toLowerCase() === "anthropic" ||
+    (params.provider ?? "").toLowerCase() === "amazon-bedrock";
+  const strippedNonLatestThinking = isAnthropicProvider
+    ? stripThinkingFromNonLatestAssistant(sanitizedImages)
    : sanitizedImages;
+  const droppedThinking = policy.dropThinkingBlocks
+    ? dropThinkingBlocks(strippedNonLatestThinking)
+    : strippedNonLatestThinking;
  const sanitizedToolCalls = sanitizeToolCallInputs(droppedThinking, {
    allowedToolNames: params.allowedToolNames,
  });
--- a/src/agents/pi-embedded-runner/thinking.test.ts
+++ b/src/agents/pi-embedded-runner/thinking.test.ts
@ -1,7 +1,11 @@
 import type { AgentMessage } from "@mariozechner/pi-agent-core";
 import { describe, expect, it } from "vitest";
 import { castAgentMessage } from "../test-helpers/agent-message-fixtures.js";
-import { dropThinkingBlocks, isAssistantMessageWithContent } from "./thinking.js";
+import {
+  dropThinkingBlocks,
+  isAssistantMessageWithContent,
+  stripThinkingFromNonLatestAssistant,
+} from "./thinking.js";

 function dropSingleAssistantContent(content: Array<Record<string, unknown>>) {
  const messages: AgentMessage[] = [
@ -54,6 +58,40 @@ describe("dropThinkingBlocks", () => {
    expect(assistant.content).toEqual([{ type: "text", text: "final" }]);
  });

+  it("drops redacted_thinking blocks", () => {
+    const messages: AgentMessage[] = [
+      castAgentMessage({
+        role: "assistant",
+        content: [
+          { type: "redacted_thinking", data: "opaque-base64-data" },
+          { type: "text", text: "visible" },
+        ],
+      }),
+    ];
+
+    const result = dropThinkingBlocks(messages);
+    const assistant = result[0] as Extract<AgentMessage, { role: "assistant" }>;
+    expect(result).not.toBe(messages);
+    expect(assistant.content).toEqual([{ type: "text", text: "visible" }]);
+  });
+
+  it("drops both thinking and redacted_thinking blocks in the same message", () => {
+    const messages: AgentMessage[] = [
+      castAgentMessage({
+        role: "assistant",
+        content: [
+          { type: "thinking", thinking: "internal" },
+          { type: "redacted_thinking", data: "opaque" },
+          { type: "text", text: "answer" },
+        ],
+      }),
+    ];
+
+    const result = dropThinkingBlocks(messages);
+    const assistant = result[0] as Extract<AgentMessage, { role: "assistant" }>;
+    expect(assistant.content).toEqual([{ type: "text", text: "answer" }]);
+  });
+
  it("keeps assistant turn structure when all content blocks were thinking", () => {
    const { assistant } = dropSingleAssistantContent([
      { type: "thinking", thinking: "internal-only" },
@ -61,3 +99,145 @@ describe("dropThinkingBlocks", () => {
    expect(assistant.content).toEqual([{ type: "text", text: "" }]);
  });
 });
+
+describe("stripThinkingFromNonLatestAssistant", () => {
+  it("returns original reference when no assistant messages have thinking blocks", () => {
+    const messages: AgentMessage[] = [
+      castAgentMessage({ role: "user", content: "hi" }),
+      castAgentMessage({ role: "assistant", content: [{ type: "text", text: "hello" }] }),
+    ];
+
+    const result = stripThinkingFromNonLatestAssistant(messages);
+    expect(result).toBe(messages);
+  });
+
+  it("returns original reference with zero or one assistant message", () => {
+    const single: AgentMessage[] = [
+      castAgentMessage({
+        role: "assistant",
+        content: [
+          { type: "thinking", thinking: "deep thought" },
+          { type: "text", text: "answer" },
+        ],
+      }),
+    ];
+    expect(stripThinkingFromNonLatestAssistant(single)).toBe(single);
+  });
+
+  it("preserves thinking blocks in the latest assistant message only", () => {
+    const messages: AgentMessage[] = [
+      castAgentMessage({
+        role: "assistant",
+        content: [
+          { type: "thinking", thinking: "old thought" },
+          { type: "text", text: "old answer" },
+        ],
+      }),
+      castAgentMessage({ role: "user", content: "follow up" }),
+      castAgentMessage({
+        role: "assistant",
+        content: [
+          { type: "thinking", thinking: "new thought" },
+          { type: "redacted_thinking", data: "opaque" },
+          { type: "text", text: "new answer" },
+        ],
+      }),
+    ];
+
+    const result = stripThinkingFromNonLatestAssistant(messages);
+    expect(result).not.toBe(messages);
+
+    // First assistant: thinking stripped
+    const first = result[0] as Extract<AgentMessage, { role: "assistant" }>;
+    expect(first.content).toEqual([{ type: "text", text: "old answer" }]);
+
+    // Latest assistant: thinking preserved exactly
+    const latest = result[2] as Extract<AgentMessage, { role: "assistant" }>;
+    expect(latest.content).toEqual([
+      { type: "thinking", thinking: "new thought" },
+      { type: "redacted_thinking", data: "opaque" },
+      { type: "text", text: "new answer" },
+    ]);
+  });
+
+  it("strips redacted_thinking blocks from non-latest assistant messages", () => {
+    const messages: AgentMessage[] = [
+      castAgentMessage({
+        role: "assistant",
+        content: [
+          { type: "redacted_thinking", data: "old-opaque" },
+          { type: "text", text: "first" },
+        ],
+      }),
+      castAgentMessage({ role: "user", content: "next" }),
+      castAgentMessage({
+        role: "assistant",
+        content: [{ type: "text", text: "second" }],
+      }),
+    ];
+
+    const result = stripThinkingFromNonLatestAssistant(messages);
+    const first = result[0] as Extract<AgentMessage, { role: "assistant" }>;
+    expect(first.content).toEqual([{ type: "text", text: "first" }]);
+
+    // Latest assistant untouched (no thinking blocks to worry about)
+    const latest = result[2] as Extract<AgentMessage, { role: "assistant" }>;
+    expect(latest.content).toEqual([{ type: "text", text: "second" }]);
+  });
+
+  it("replaces with empty text block when all blocks in non-latest are thinking", () => {
+    const messages: AgentMessage[] = [
+      castAgentMessage({
+        role: "assistant",
+        content: [
+          { type: "thinking", thinking: "only thinking" },
+          { type: "redacted_thinking", data: "opaque" },
+        ],
+      }),
+      castAgentMessage({ role: "user", content: "next" }),
+      castAgentMessage({
+        role: "assistant",
+        content: [{ type: "text", text: "latest" }],
+      }),
+    ];
+
+    const result = stripThinkingFromNonLatestAssistant(messages);
+    const first = result[0] as Extract<AgentMessage, { role: "assistant" }>;
+    expect(first.content).toEqual([{ type: "text", text: "" }]);
+  });
+
+  it("handles interleaved user and toolResult messages correctly", () => {
+    const messages: AgentMessage[] = [
+      castAgentMessage({
+        role: "assistant",
+        content: [
+          { type: "thinking", thinking: "thought 1" },
+          { type: "text", text: "call tool" },
+        ],
+      }),
+      castAgentMessage({ role: "toolResult", content: "result" }),
+      castAgentMessage({
+        role: "assistant",
+        content: [
+          { type: "thinking", thinking: "thought 2" },
+          { type: "text", text: "final" },
+        ],
+      }),
+    ];
+
+    const result = stripThinkingFromNonLatestAssistant(messages);
+    // First assistant: thinking stripped
+    const first = result[0] as Extract<AgentMessage, { role: "assistant" }>;
+    expect(first.content).toEqual([{ type: "text", text: "call tool" }]);
+
+    // toolResult: unchanged
+    expect(result[1]).toBe(messages[1]);
+
+    // Latest assistant: thinking preserved
+    const latest = result[2] as Extract<AgentMessage, { role: "assistant" }>;
+    expect(latest.content).toEqual([
+      { type: "thinking", thinking: "thought 2" },
+      { type: "text", text: "final" },
+    ]);
+  });
+});
--- a/src/agents/pi-embedded-runner/thinking.ts
+++ b/src/agents/pi-embedded-runner/thinking.ts
@ -3,6 +3,17 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core";
 type AssistantContentBlock = Extract<AgentMessage, { role: "assistant" }>["content"][number];
 type AssistantMessage = Extract<AgentMessage, { role: "assistant" }>;

+/** Block types that Anthropic considers immutable once returned. */
+const THINKING_BLOCK_TYPES: ReadonlySet<string> = new Set(["thinking", "redacted_thinking"]);
+
+function isThinkingBlock(block: unknown): boolean {
+  if (!block || typeof block !== "object") {
+    return false;
+  }
+  const type = (block as { type?: unknown }).type;
+  return typeof type === "string" && THINKING_BLOCK_TYPES.has(type);
+}
+
 export function isAssistantMessageWithContent(message: AgentMessage): message is AssistantMessage {
  return (
    !!message &&
@ -13,7 +24,8 @@ export function isAssistantMessageWithContent(message: AgentMessage): message is
 }

 /**
- * Strip all `type: "thinking"` content blocks from assistant messages.
+ * Strip all `type: "thinking"` and `type: "redacted_thinking"` content blocks
+ * from assistant messages.
 *
 * If an assistant message becomes empty after stripping, it is replaced with
 * a synthetic `{ type: "text", text: "" }` block to preserve turn structure
@ -33,7 +45,7 @@ export function dropThinkingBlocks(messages: AgentMessage[]): AgentMessage[] {
    const nextContent: AssistantContentBlock[] = [];
    let changed = false;
    for (const block of msg.content) {
-      if (block && typeof block === "object" && (block as { type?: unknown }).type === "thinking") {
+      if (isThinkingBlock(block)) {
        touched = true;
        changed = true;
        continue;
@ -51,3 +63,67 @@ export function dropThinkingBlocks(messages: AgentMessage[]): AgentMessage[] {
  }
  return touched ? out : messages;
 }
+
+/**
+ * Strip `thinking` and `redacted_thinking` blocks from all assistant messages
+ * **except** the latest (last) assistant message in the array.
+ *
+ * Anthropic requires that thinking/redacted_thinking blocks in the latest
+ * assistant message remain byte-identical to the original API response.
+ * Blocks in non-latest assistant messages may be omitted entirely.
+ *
+ * This prevents compaction or session serialization from corrupting thinking
+ * blocks that are later rejected by the Anthropic API.
+ *
+ * Returns the original array reference when nothing was changed.
+ */
+export function stripThinkingFromNonLatestAssistant(messages: AgentMessage[]): AgentMessage[] {
+  // Find the index of the last assistant message with array content.
+  let lastAssistantIndex = -1;
+  for (let i = messages.length - 1; i >= 0; i--) {
+    if (isAssistantMessageWithContent(messages[i])) {
+      lastAssistantIndex = i;
+      break;
+    }
+  }
+
+  // Nothing to do if there is zero or one assistant message.
+  if (lastAssistantIndex <= 0) {
+    return messages;
+  }
+
+  let touched = false;
+  const out: AgentMessage[] = [];
+
+  for (let i = 0; i < messages.length; i++) {
+    const msg = messages[i];
+
+    // Skip non-assistant or the latest assistant — keep them as-is.
+    if (i === lastAssistantIndex || !isAssistantMessageWithContent(msg)) {
+      out.push(msg);
+      continue;
+    }
+
+    const nextContent: AssistantContentBlock[] = [];
+    let changed = false;
+    for (const block of msg.content) {
+      if (isThinkingBlock(block)) {
+        touched = true;
+        changed = true;
+        continue;
+      }
+      nextContent.push(block);
+    }
+
+    if (!changed) {
+      out.push(msg);
+      continue;
+    }
+
+    const content =
+      nextContent.length > 0 ? nextContent : [{ type: "text", text: "" } as AssistantContentBlock];
+    out.push({ ...msg, content });
+  }
+
+  return touched ? out : messages;
+}