diff --git a/src/agents/pi-embedded-runner/google.ts b/src/agents/pi-embedded-runner/google.ts index 265593f03e0..d0514d58624 100644 --- a/src/agents/pi-embedded-runner/google.ts +++ b/src/agents/pi-embedded-runner/google.ts @@ -32,7 +32,7 @@ import { type UsageLike, } from "../usage.js"; import { log } from "./logger.js"; -import { dropThinkingBlocks } from "./thinking.js"; +import { dropThinkingBlocks, stripThinkingFromNonLatestAssistant } from "./thinking.js"; import { describeUnknownError } from "./utils.js"; const GOOGLE_TURN_ORDERING_CUSTOM_TYPE = "google-turn-ordering-bootstrap"; @@ -549,9 +549,22 @@ export async function sanitizeSessionHistory(params: { ...resolveImageSanitizationLimits(params.config), }, ); - const droppedThinking = policy.dropThinkingBlocks - ? dropThinkingBlocks(sanitizedImages) + // For Anthropic models, strip thinking/redacted_thinking blocks from all + // non-latest assistant messages. Anthropic requires these blocks to be + // byte-identical to the original response in the latest assistant message, + // but allows omitting them from older messages. Compaction and session + // serialization can corrupt these blocks, causing API rejections. + const isAnthropicProvider = + params.modelApi === "anthropic-messages" || + params.modelApi === "bedrock-converse-stream" || + (params.provider ?? "").toLowerCase() === "anthropic" || + (params.provider ?? "").toLowerCase() === "amazon-bedrock"; + const strippedNonLatestThinking = isAnthropicProvider + ? stripThinkingFromNonLatestAssistant(sanitizedImages) : sanitizedImages; + const droppedThinking = policy.dropThinkingBlocks + ? dropThinkingBlocks(strippedNonLatestThinking) + : strippedNonLatestThinking; const sanitizedToolCalls = sanitizeToolCallInputs(droppedThinking, { allowedToolNames: params.allowedToolNames, }); diff --git a/src/agents/pi-embedded-runner/thinking.test.ts b/src/agents/pi-embedded-runner/thinking.test.ts index e3d0a8291b6..1e776c6f1f8 100644 --- a/src/agents/pi-embedded-runner/thinking.test.ts +++ b/src/agents/pi-embedded-runner/thinking.test.ts @@ -1,7 +1,11 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core"; import { describe, expect, it } from "vitest"; import { castAgentMessage } from "../test-helpers/agent-message-fixtures.js"; -import { dropThinkingBlocks, isAssistantMessageWithContent } from "./thinking.js"; +import { + dropThinkingBlocks, + isAssistantMessageWithContent, + stripThinkingFromNonLatestAssistant, +} from "./thinking.js"; function dropSingleAssistantContent(content: Array>) { const messages: AgentMessage[] = [ @@ -54,6 +58,40 @@ describe("dropThinkingBlocks", () => { expect(assistant.content).toEqual([{ type: "text", text: "final" }]); }); + it("drops redacted_thinking blocks", () => { + const messages: AgentMessage[] = [ + castAgentMessage({ + role: "assistant", + content: [ + { type: "redacted_thinking", data: "opaque-base64-data" }, + { type: "text", text: "visible" }, + ], + }), + ]; + + const result = dropThinkingBlocks(messages); + const assistant = result[0] as Extract; + expect(result).not.toBe(messages); + expect(assistant.content).toEqual([{ type: "text", text: "visible" }]); + }); + + it("drops both thinking and redacted_thinking blocks in the same message", () => { + const messages: AgentMessage[] = [ + castAgentMessage({ + role: "assistant", + content: [ + { type: "thinking", thinking: "internal" }, + { type: "redacted_thinking", data: "opaque" }, + { type: "text", text: "answer" }, + ], + }), + ]; + + const result = dropThinkingBlocks(messages); + const assistant = result[0] as Extract; + expect(assistant.content).toEqual([{ type: "text", text: "answer" }]); + }); + it("keeps assistant turn structure when all content blocks were thinking", () => { const { assistant } = dropSingleAssistantContent([ { type: "thinking", thinking: "internal-only" }, @@ -61,3 +99,145 @@ describe("dropThinkingBlocks", () => { expect(assistant.content).toEqual([{ type: "text", text: "" }]); }); }); + +describe("stripThinkingFromNonLatestAssistant", () => { + it("returns original reference when no assistant messages have thinking blocks", () => { + const messages: AgentMessage[] = [ + castAgentMessage({ role: "user", content: "hi" }), + castAgentMessage({ role: "assistant", content: [{ type: "text", text: "hello" }] }), + ]; + + const result = stripThinkingFromNonLatestAssistant(messages); + expect(result).toBe(messages); + }); + + it("returns original reference with zero or one assistant message", () => { + const single: AgentMessage[] = [ + castAgentMessage({ + role: "assistant", + content: [ + { type: "thinking", thinking: "deep thought" }, + { type: "text", text: "answer" }, + ], + }), + ]; + expect(stripThinkingFromNonLatestAssistant(single)).toBe(single); + }); + + it("preserves thinking blocks in the latest assistant message only", () => { + const messages: AgentMessage[] = [ + castAgentMessage({ + role: "assistant", + content: [ + { type: "thinking", thinking: "old thought" }, + { type: "text", text: "old answer" }, + ], + }), + castAgentMessage({ role: "user", content: "follow up" }), + castAgentMessage({ + role: "assistant", + content: [ + { type: "thinking", thinking: "new thought" }, + { type: "redacted_thinking", data: "opaque" }, + { type: "text", text: "new answer" }, + ], + }), + ]; + + const result = stripThinkingFromNonLatestAssistant(messages); + expect(result).not.toBe(messages); + + // First assistant: thinking stripped + const first = result[0] as Extract; + expect(first.content).toEqual([{ type: "text", text: "old answer" }]); + + // Latest assistant: thinking preserved exactly + const latest = result[2] as Extract; + expect(latest.content).toEqual([ + { type: "thinking", thinking: "new thought" }, + { type: "redacted_thinking", data: "opaque" }, + { type: "text", text: "new answer" }, + ]); + }); + + it("strips redacted_thinking blocks from non-latest assistant messages", () => { + const messages: AgentMessage[] = [ + castAgentMessage({ + role: "assistant", + content: [ + { type: "redacted_thinking", data: "old-opaque" }, + { type: "text", text: "first" }, + ], + }), + castAgentMessage({ role: "user", content: "next" }), + castAgentMessage({ + role: "assistant", + content: [{ type: "text", text: "second" }], + }), + ]; + + const result = stripThinkingFromNonLatestAssistant(messages); + const first = result[0] as Extract; + expect(first.content).toEqual([{ type: "text", text: "first" }]); + + // Latest assistant untouched (no thinking blocks to worry about) + const latest = result[2] as Extract; + expect(latest.content).toEqual([{ type: "text", text: "second" }]); + }); + + it("replaces with empty text block when all blocks in non-latest are thinking", () => { + const messages: AgentMessage[] = [ + castAgentMessage({ + role: "assistant", + content: [ + { type: "thinking", thinking: "only thinking" }, + { type: "redacted_thinking", data: "opaque" }, + ], + }), + castAgentMessage({ role: "user", content: "next" }), + castAgentMessage({ + role: "assistant", + content: [{ type: "text", text: "latest" }], + }), + ]; + + const result = stripThinkingFromNonLatestAssistant(messages); + const first = result[0] as Extract; + expect(first.content).toEqual([{ type: "text", text: "" }]); + }); + + it("handles interleaved user and toolResult messages correctly", () => { + const messages: AgentMessage[] = [ + castAgentMessage({ + role: "assistant", + content: [ + { type: "thinking", thinking: "thought 1" }, + { type: "text", text: "call tool" }, + ], + }), + castAgentMessage({ role: "toolResult", content: "result" }), + castAgentMessage({ + role: "assistant", + content: [ + { type: "thinking", thinking: "thought 2" }, + { type: "text", text: "final" }, + ], + }), + ]; + + const result = stripThinkingFromNonLatestAssistant(messages); + // First assistant: thinking stripped + const first = result[0] as Extract; + expect(first.content).toEqual([{ type: "text", text: "call tool" }]); + + // toolResult: unchanged + expect(result[1]).toBe(messages[1]); + + // Latest assistant: thinking preserved + const latest = result[2] as Extract; + expect(latest.content).toEqual([ + { type: "thinking", thinking: "thought 2" }, + { type: "text", text: "final" }, + ]); + }); +}); diff --git a/src/agents/pi-embedded-runner/thinking.ts b/src/agents/pi-embedded-runner/thinking.ts index f503fd3f164..0907f72c636 100644 --- a/src/agents/pi-embedded-runner/thinking.ts +++ b/src/agents/pi-embedded-runner/thinking.ts @@ -3,6 +3,17 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core"; type AssistantContentBlock = Extract["content"][number]; type AssistantMessage = Extract; +/** Block types that Anthropic considers immutable once returned. */ +const THINKING_BLOCK_TYPES: ReadonlySet = new Set(["thinking", "redacted_thinking"]); + +function isThinkingBlock(block: unknown): boolean { + if (!block || typeof block !== "object") { + return false; + } + const type = (block as { type?: unknown }).type; + return typeof type === "string" && THINKING_BLOCK_TYPES.has(type); +} + export function isAssistantMessageWithContent(message: AgentMessage): message is AssistantMessage { return ( !!message && @@ -13,7 +24,8 @@ export function isAssistantMessageWithContent(message: AgentMessage): message is } /** - * Strip all `type: "thinking"` content blocks from assistant messages. + * Strip all `type: "thinking"` and `type: "redacted_thinking"` content blocks + * from assistant messages. * * If an assistant message becomes empty after stripping, it is replaced with * a synthetic `{ type: "text", text: "" }` block to preserve turn structure @@ -33,7 +45,7 @@ export function dropThinkingBlocks(messages: AgentMessage[]): AgentMessage[] { const nextContent: AssistantContentBlock[] = []; let changed = false; for (const block of msg.content) { - if (block && typeof block === "object" && (block as { type?: unknown }).type === "thinking") { + if (isThinkingBlock(block)) { touched = true; changed = true; continue; @@ -51,3 +63,67 @@ export function dropThinkingBlocks(messages: AgentMessage[]): AgentMessage[] { } return touched ? out : messages; } + +/** + * Strip `thinking` and `redacted_thinking` blocks from all assistant messages + * **except** the latest (last) assistant message in the array. + * + * Anthropic requires that thinking/redacted_thinking blocks in the latest + * assistant message remain byte-identical to the original API response. + * Blocks in non-latest assistant messages may be omitted entirely. + * + * This prevents compaction or session serialization from corrupting thinking + * blocks that are later rejected by the Anthropic API. + * + * Returns the original array reference when nothing was changed. + */ +export function stripThinkingFromNonLatestAssistant(messages: AgentMessage[]): AgentMessage[] { + // Find the index of the last assistant message with array content. + let lastAssistantIndex = -1; + for (let i = messages.length - 1; i >= 0; i--) { + if (isAssistantMessageWithContent(messages[i])) { + lastAssistantIndex = i; + break; + } + } + + // Nothing to do if there is zero or one assistant message. + if (lastAssistantIndex <= 0) { + return messages; + } + + let touched = false; + const out: AgentMessage[] = []; + + for (let i = 0; i < messages.length; i++) { + const msg = messages[i]; + + // Skip non-assistant or the latest assistant — keep them as-is. + if (i === lastAssistantIndex || !isAssistantMessageWithContent(msg)) { + out.push(msg); + continue; + } + + const nextContent: AssistantContentBlock[] = []; + let changed = false; + for (const block of msg.content) { + if (isThinkingBlock(block)) { + touched = true; + changed = true; + continue; + } + nextContent.push(block); + } + + if (!changed) { + out.push(msg); + continue; + } + + const content = + nextContent.length > 0 ? nextContent : [{ type: "text", text: "" } as AssistantContentBlock]; + out.push({ ...msg, content }); + } + + return touched ? out : messages; +}