Merge 738d6b6b5277d00f9dc5ac55b8946877b0af6e6c into 5e417b44e1540f528d2ae63e3e20229a902d1db2

This commit is contained in:
Drew Wagner 2026-03-21 06:01:48 +03:00 committed by GitHub
commit dee4c45e72
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 275 additions and 6 deletions

View File

@ -32,7 +32,7 @@ import {
type UsageLike,
} from "../usage.js";
import { log } from "./logger.js";
import { dropThinkingBlocks } from "./thinking.js";
import { dropThinkingBlocks, stripThinkingFromNonLatestAssistant } from "./thinking.js";
import { describeUnknownError } from "./utils.js";
const GOOGLE_TURN_ORDERING_CUSTOM_TYPE = "google-turn-ordering-bootstrap";
@ -549,9 +549,22 @@ export async function sanitizeSessionHistory(params: {
...resolveImageSanitizationLimits(params.config),
},
);
const droppedThinking = policy.dropThinkingBlocks
? dropThinkingBlocks(sanitizedImages)
// For Anthropic models, strip thinking/redacted_thinking blocks from all
// non-latest assistant messages. Anthropic requires these blocks to be
// byte-identical to the original response in the latest assistant message,
// but allows omitting them from older messages. Compaction and session
// serialization can corrupt these blocks, causing API rejections.
const isAnthropicProvider =
params.modelApi === "anthropic-messages" ||
params.modelApi === "bedrock-converse-stream" ||
(params.provider ?? "").toLowerCase() === "anthropic" ||
(params.provider ?? "").toLowerCase() === "amazon-bedrock";
const strippedNonLatestThinking = isAnthropicProvider
? stripThinkingFromNonLatestAssistant(sanitizedImages)
: sanitizedImages;
const droppedThinking = policy.dropThinkingBlocks
? dropThinkingBlocks(strippedNonLatestThinking)
: strippedNonLatestThinking;
const sanitizedToolCalls = sanitizeToolCallInputs(droppedThinking, {
allowedToolNames: params.allowedToolNames,
});

View File

@ -1,7 +1,11 @@
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import { describe, expect, it } from "vitest";
import { castAgentMessage } from "../test-helpers/agent-message-fixtures.js";
import { dropThinkingBlocks, isAssistantMessageWithContent } from "./thinking.js";
import {
dropThinkingBlocks,
isAssistantMessageWithContent,
stripThinkingFromNonLatestAssistant,
} from "./thinking.js";
function dropSingleAssistantContent(content: Array<Record<string, unknown>>) {
const messages: AgentMessage[] = [
@ -54,6 +58,40 @@ describe("dropThinkingBlocks", () => {
expect(assistant.content).toEqual([{ type: "text", text: "final" }]);
});
it("drops redacted_thinking blocks", () => {
const messages: AgentMessage[] = [
castAgentMessage({
role: "assistant",
content: [
{ type: "redacted_thinking", data: "opaque-base64-data" },
{ type: "text", text: "visible" },
],
}),
];
const result = dropThinkingBlocks(messages);
const assistant = result[0] as Extract<AgentMessage, { role: "assistant" }>;
expect(result).not.toBe(messages);
expect(assistant.content).toEqual([{ type: "text", text: "visible" }]);
});
it("drops both thinking and redacted_thinking blocks in the same message", () => {
const messages: AgentMessage[] = [
castAgentMessage({
role: "assistant",
content: [
{ type: "thinking", thinking: "internal" },
{ type: "redacted_thinking", data: "opaque" },
{ type: "text", text: "answer" },
],
}),
];
const result = dropThinkingBlocks(messages);
const assistant = result[0] as Extract<AgentMessage, { role: "assistant" }>;
expect(assistant.content).toEqual([{ type: "text", text: "answer" }]);
});
it("keeps assistant turn structure when all content blocks were thinking", () => {
const { assistant } = dropSingleAssistantContent([
{ type: "thinking", thinking: "internal-only" },
@ -61,3 +99,145 @@ describe("dropThinkingBlocks", () => {
expect(assistant.content).toEqual([{ type: "text", text: "" }]);
});
});
describe("stripThinkingFromNonLatestAssistant", () => {
it("returns original reference when no assistant messages have thinking blocks", () => {
const messages: AgentMessage[] = [
castAgentMessage({ role: "user", content: "hi" }),
castAgentMessage({ role: "assistant", content: [{ type: "text", text: "hello" }] }),
];
const result = stripThinkingFromNonLatestAssistant(messages);
expect(result).toBe(messages);
});
it("returns original reference with zero or one assistant message", () => {
const single: AgentMessage[] = [
castAgentMessage({
role: "assistant",
content: [
{ type: "thinking", thinking: "deep thought" },
{ type: "text", text: "answer" },
],
}),
];
expect(stripThinkingFromNonLatestAssistant(single)).toBe(single);
});
it("preserves thinking blocks in the latest assistant message only", () => {
const messages: AgentMessage[] = [
castAgentMessage({
role: "assistant",
content: [
{ type: "thinking", thinking: "old thought" },
{ type: "text", text: "old answer" },
],
}),
castAgentMessage({ role: "user", content: "follow up" }),
castAgentMessage({
role: "assistant",
content: [
{ type: "thinking", thinking: "new thought" },
{ type: "redacted_thinking", data: "opaque" },
{ type: "text", text: "new answer" },
],
}),
];
const result = stripThinkingFromNonLatestAssistant(messages);
expect(result).not.toBe(messages);
// First assistant: thinking stripped
const first = result[0] as Extract<AgentMessage, { role: "assistant" }>;
expect(first.content).toEqual([{ type: "text", text: "old answer" }]);
// Latest assistant: thinking preserved exactly
const latest = result[2] as Extract<AgentMessage, { role: "assistant" }>;
expect(latest.content).toEqual([
{ type: "thinking", thinking: "new thought" },
{ type: "redacted_thinking", data: "opaque" },
{ type: "text", text: "new answer" },
]);
});
it("strips redacted_thinking blocks from non-latest assistant messages", () => {
const messages: AgentMessage[] = [
castAgentMessage({
role: "assistant",
content: [
{ type: "redacted_thinking", data: "old-opaque" },
{ type: "text", text: "first" },
],
}),
castAgentMessage({ role: "user", content: "next" }),
castAgentMessage({
role: "assistant",
content: [{ type: "text", text: "second" }],
}),
];
const result = stripThinkingFromNonLatestAssistant(messages);
const first = result[0] as Extract<AgentMessage, { role: "assistant" }>;
expect(first.content).toEqual([{ type: "text", text: "first" }]);
// Latest assistant untouched (no thinking blocks to worry about)
const latest = result[2] as Extract<AgentMessage, { role: "assistant" }>;
expect(latest.content).toEqual([{ type: "text", text: "second" }]);
});
it("replaces with empty text block when all blocks in non-latest are thinking", () => {
const messages: AgentMessage[] = [
castAgentMessage({
role: "assistant",
content: [
{ type: "thinking", thinking: "only thinking" },
{ type: "redacted_thinking", data: "opaque" },
],
}),
castAgentMessage({ role: "user", content: "next" }),
castAgentMessage({
role: "assistant",
content: [{ type: "text", text: "latest" }],
}),
];
const result = stripThinkingFromNonLatestAssistant(messages);
const first = result[0] as Extract<AgentMessage, { role: "assistant" }>;
expect(first.content).toEqual([{ type: "text", text: "" }]);
});
it("handles interleaved user and toolResult messages correctly", () => {
const messages: AgentMessage[] = [
castAgentMessage({
role: "assistant",
content: [
{ type: "thinking", thinking: "thought 1" },
{ type: "text", text: "call tool" },
],
}),
castAgentMessage({ role: "toolResult", content: "result" }),
castAgentMessage({
role: "assistant",
content: [
{ type: "thinking", thinking: "thought 2" },
{ type: "text", text: "final" },
],
}),
];
const result = stripThinkingFromNonLatestAssistant(messages);
// First assistant: thinking stripped
const first = result[0] as Extract<AgentMessage, { role: "assistant" }>;
expect(first.content).toEqual([{ type: "text", text: "call tool" }]);
// toolResult: unchanged
expect(result[1]).toBe(messages[1]);
// Latest assistant: thinking preserved
const latest = result[2] as Extract<AgentMessage, { role: "assistant" }>;
expect(latest.content).toEqual([
{ type: "thinking", thinking: "thought 2" },
{ type: "text", text: "final" },
]);
});
});

View File

@ -3,6 +3,17 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core";
type AssistantContentBlock = Extract<AgentMessage, { role: "assistant" }>["content"][number];
type AssistantMessage = Extract<AgentMessage, { role: "assistant" }>;
/** Block types that Anthropic considers immutable once returned. */
const THINKING_BLOCK_TYPES: ReadonlySet<string> = new Set(["thinking", "redacted_thinking"]);
function isThinkingBlock(block: unknown): boolean {
if (!block || typeof block !== "object") {
return false;
}
const type = (block as { type?: unknown }).type;
return typeof type === "string" && THINKING_BLOCK_TYPES.has(type);
}
export function isAssistantMessageWithContent(message: AgentMessage): message is AssistantMessage {
return (
!!message &&
@ -13,7 +24,8 @@ export function isAssistantMessageWithContent(message: AgentMessage): message is
}
/**
* Strip all `type: "thinking"` content blocks from assistant messages.
* Strip all `type: "thinking"` and `type: "redacted_thinking"` content blocks
* from assistant messages.
*
* If an assistant message becomes empty after stripping, it is replaced with
* a synthetic `{ type: "text", text: "" }` block to preserve turn structure
@ -33,7 +45,7 @@ export function dropThinkingBlocks(messages: AgentMessage[]): AgentMessage[] {
const nextContent: AssistantContentBlock[] = [];
let changed = false;
for (const block of msg.content) {
if (block && typeof block === "object" && (block as { type?: unknown }).type === "thinking") {
if (isThinkingBlock(block)) {
touched = true;
changed = true;
continue;
@ -51,3 +63,67 @@ export function dropThinkingBlocks(messages: AgentMessage[]): AgentMessage[] {
}
return touched ? out : messages;
}
/**
* Strip `thinking` and `redacted_thinking` blocks from all assistant messages
* **except** the latest (last) assistant message in the array.
*
* Anthropic requires that thinking/redacted_thinking blocks in the latest
* assistant message remain byte-identical to the original API response.
* Blocks in non-latest assistant messages may be omitted entirely.
*
* This prevents compaction or session serialization from corrupting thinking
* blocks that are later rejected by the Anthropic API.
*
* Returns the original array reference when nothing was changed.
*/
export function stripThinkingFromNonLatestAssistant(messages: AgentMessage[]): AgentMessage[] {
// Find the index of the last assistant message with array content.
let lastAssistantIndex = -1;
for (let i = messages.length - 1; i >= 0; i--) {
if (isAssistantMessageWithContent(messages[i])) {
lastAssistantIndex = i;
break;
}
}
// Nothing to do if there is zero or one assistant message.
if (lastAssistantIndex <= 0) {
return messages;
}
let touched = false;
const out: AgentMessage[] = [];
for (let i = 0; i < messages.length; i++) {
const msg = messages[i];
// Skip non-assistant or the latest assistant — keep them as-is.
if (i === lastAssistantIndex || !isAssistantMessageWithContent(msg)) {
out.push(msg);
continue;
}
const nextContent: AssistantContentBlock[] = [];
let changed = false;
for (const block of msg.content) {
if (isThinkingBlock(block)) {
touched = true;
changed = true;
continue;
}
nextContent.push(block);
}
if (!changed) {
out.push(msg);
continue;
}
const content =
nextContent.length > 0 ? nextContent : [{ type: "text", text: "" } as AssistantContentBlock];
out.push({ ...msg, content });
}
return touched ? out : messages;
}