Merge 738d6b6b5277d00f9dc5ac55b8946877b0af6e6c into 5e417b44e1540f528d2ae63e3e20229a902d1db2
This commit is contained in:
commit
dee4c45e72
@ -32,7 +32,7 @@ import {
|
||||
type UsageLike,
|
||||
} from "../usage.js";
|
||||
import { log } from "./logger.js";
|
||||
import { dropThinkingBlocks } from "./thinking.js";
|
||||
import { dropThinkingBlocks, stripThinkingFromNonLatestAssistant } from "./thinking.js";
|
||||
import { describeUnknownError } from "./utils.js";
|
||||
|
||||
const GOOGLE_TURN_ORDERING_CUSTOM_TYPE = "google-turn-ordering-bootstrap";
|
||||
@ -549,9 +549,22 @@ export async function sanitizeSessionHistory(params: {
|
||||
...resolveImageSanitizationLimits(params.config),
|
||||
},
|
||||
);
|
||||
const droppedThinking = policy.dropThinkingBlocks
|
||||
? dropThinkingBlocks(sanitizedImages)
|
||||
// For Anthropic models, strip thinking/redacted_thinking blocks from all
|
||||
// non-latest assistant messages. Anthropic requires these blocks to be
|
||||
// byte-identical to the original response in the latest assistant message,
|
||||
// but allows omitting them from older messages. Compaction and session
|
||||
// serialization can corrupt these blocks, causing API rejections.
|
||||
const isAnthropicProvider =
|
||||
params.modelApi === "anthropic-messages" ||
|
||||
params.modelApi === "bedrock-converse-stream" ||
|
||||
(params.provider ?? "").toLowerCase() === "anthropic" ||
|
||||
(params.provider ?? "").toLowerCase() === "amazon-bedrock";
|
||||
const strippedNonLatestThinking = isAnthropicProvider
|
||||
? stripThinkingFromNonLatestAssistant(sanitizedImages)
|
||||
: sanitizedImages;
|
||||
const droppedThinking = policy.dropThinkingBlocks
|
||||
? dropThinkingBlocks(strippedNonLatestThinking)
|
||||
: strippedNonLatestThinking;
|
||||
const sanitizedToolCalls = sanitizeToolCallInputs(droppedThinking, {
|
||||
allowedToolNames: params.allowedToolNames,
|
||||
});
|
||||
|
||||
@ -1,7 +1,11 @@
|
||||
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { castAgentMessage } from "../test-helpers/agent-message-fixtures.js";
|
||||
import { dropThinkingBlocks, isAssistantMessageWithContent } from "./thinking.js";
|
||||
import {
|
||||
dropThinkingBlocks,
|
||||
isAssistantMessageWithContent,
|
||||
stripThinkingFromNonLatestAssistant,
|
||||
} from "./thinking.js";
|
||||
|
||||
function dropSingleAssistantContent(content: Array<Record<string, unknown>>) {
|
||||
const messages: AgentMessage[] = [
|
||||
@ -54,6 +58,40 @@ describe("dropThinkingBlocks", () => {
|
||||
expect(assistant.content).toEqual([{ type: "text", text: "final" }]);
|
||||
});
|
||||
|
||||
it("drops redacted_thinking blocks", () => {
|
||||
const messages: AgentMessage[] = [
|
||||
castAgentMessage({
|
||||
role: "assistant",
|
||||
content: [
|
||||
{ type: "redacted_thinking", data: "opaque-base64-data" },
|
||||
{ type: "text", text: "visible" },
|
||||
],
|
||||
}),
|
||||
];
|
||||
|
||||
const result = dropThinkingBlocks(messages);
|
||||
const assistant = result[0] as Extract<AgentMessage, { role: "assistant" }>;
|
||||
expect(result).not.toBe(messages);
|
||||
expect(assistant.content).toEqual([{ type: "text", text: "visible" }]);
|
||||
});
|
||||
|
||||
it("drops both thinking and redacted_thinking blocks in the same message", () => {
|
||||
const messages: AgentMessage[] = [
|
||||
castAgentMessage({
|
||||
role: "assistant",
|
||||
content: [
|
||||
{ type: "thinking", thinking: "internal" },
|
||||
{ type: "redacted_thinking", data: "opaque" },
|
||||
{ type: "text", text: "answer" },
|
||||
],
|
||||
}),
|
||||
];
|
||||
|
||||
const result = dropThinkingBlocks(messages);
|
||||
const assistant = result[0] as Extract<AgentMessage, { role: "assistant" }>;
|
||||
expect(assistant.content).toEqual([{ type: "text", text: "answer" }]);
|
||||
});
|
||||
|
||||
it("keeps assistant turn structure when all content blocks were thinking", () => {
|
||||
const { assistant } = dropSingleAssistantContent([
|
||||
{ type: "thinking", thinking: "internal-only" },
|
||||
@ -61,3 +99,145 @@ describe("dropThinkingBlocks", () => {
|
||||
expect(assistant.content).toEqual([{ type: "text", text: "" }]);
|
||||
});
|
||||
});
|
||||
|
||||
describe("stripThinkingFromNonLatestAssistant", () => {
|
||||
it("returns original reference when no assistant messages have thinking blocks", () => {
|
||||
const messages: AgentMessage[] = [
|
||||
castAgentMessage({ role: "user", content: "hi" }),
|
||||
castAgentMessage({ role: "assistant", content: [{ type: "text", text: "hello" }] }),
|
||||
];
|
||||
|
||||
const result = stripThinkingFromNonLatestAssistant(messages);
|
||||
expect(result).toBe(messages);
|
||||
});
|
||||
|
||||
it("returns original reference with zero or one assistant message", () => {
|
||||
const single: AgentMessage[] = [
|
||||
castAgentMessage({
|
||||
role: "assistant",
|
||||
content: [
|
||||
{ type: "thinking", thinking: "deep thought" },
|
||||
{ type: "text", text: "answer" },
|
||||
],
|
||||
}),
|
||||
];
|
||||
expect(stripThinkingFromNonLatestAssistant(single)).toBe(single);
|
||||
});
|
||||
|
||||
it("preserves thinking blocks in the latest assistant message only", () => {
|
||||
const messages: AgentMessage[] = [
|
||||
castAgentMessage({
|
||||
role: "assistant",
|
||||
content: [
|
||||
{ type: "thinking", thinking: "old thought" },
|
||||
{ type: "text", text: "old answer" },
|
||||
],
|
||||
}),
|
||||
castAgentMessage({ role: "user", content: "follow up" }),
|
||||
castAgentMessage({
|
||||
role: "assistant",
|
||||
content: [
|
||||
{ type: "thinking", thinking: "new thought" },
|
||||
{ type: "redacted_thinking", data: "opaque" },
|
||||
{ type: "text", text: "new answer" },
|
||||
],
|
||||
}),
|
||||
];
|
||||
|
||||
const result = stripThinkingFromNonLatestAssistant(messages);
|
||||
expect(result).not.toBe(messages);
|
||||
|
||||
// First assistant: thinking stripped
|
||||
const first = result[0] as Extract<AgentMessage, { role: "assistant" }>;
|
||||
expect(first.content).toEqual([{ type: "text", text: "old answer" }]);
|
||||
|
||||
// Latest assistant: thinking preserved exactly
|
||||
const latest = result[2] as Extract<AgentMessage, { role: "assistant" }>;
|
||||
expect(latest.content).toEqual([
|
||||
{ type: "thinking", thinking: "new thought" },
|
||||
{ type: "redacted_thinking", data: "opaque" },
|
||||
{ type: "text", text: "new answer" },
|
||||
]);
|
||||
});
|
||||
|
||||
it("strips redacted_thinking blocks from non-latest assistant messages", () => {
|
||||
const messages: AgentMessage[] = [
|
||||
castAgentMessage({
|
||||
role: "assistant",
|
||||
content: [
|
||||
{ type: "redacted_thinking", data: "old-opaque" },
|
||||
{ type: "text", text: "first" },
|
||||
],
|
||||
}),
|
||||
castAgentMessage({ role: "user", content: "next" }),
|
||||
castAgentMessage({
|
||||
role: "assistant",
|
||||
content: [{ type: "text", text: "second" }],
|
||||
}),
|
||||
];
|
||||
|
||||
const result = stripThinkingFromNonLatestAssistant(messages);
|
||||
const first = result[0] as Extract<AgentMessage, { role: "assistant" }>;
|
||||
expect(first.content).toEqual([{ type: "text", text: "first" }]);
|
||||
|
||||
// Latest assistant untouched (no thinking blocks to worry about)
|
||||
const latest = result[2] as Extract<AgentMessage, { role: "assistant" }>;
|
||||
expect(latest.content).toEqual([{ type: "text", text: "second" }]);
|
||||
});
|
||||
|
||||
it("replaces with empty text block when all blocks in non-latest are thinking", () => {
|
||||
const messages: AgentMessage[] = [
|
||||
castAgentMessage({
|
||||
role: "assistant",
|
||||
content: [
|
||||
{ type: "thinking", thinking: "only thinking" },
|
||||
{ type: "redacted_thinking", data: "opaque" },
|
||||
],
|
||||
}),
|
||||
castAgentMessage({ role: "user", content: "next" }),
|
||||
castAgentMessage({
|
||||
role: "assistant",
|
||||
content: [{ type: "text", text: "latest" }],
|
||||
}),
|
||||
];
|
||||
|
||||
const result = stripThinkingFromNonLatestAssistant(messages);
|
||||
const first = result[0] as Extract<AgentMessage, { role: "assistant" }>;
|
||||
expect(first.content).toEqual([{ type: "text", text: "" }]);
|
||||
});
|
||||
|
||||
it("handles interleaved user and toolResult messages correctly", () => {
|
||||
const messages: AgentMessage[] = [
|
||||
castAgentMessage({
|
||||
role: "assistant",
|
||||
content: [
|
||||
{ type: "thinking", thinking: "thought 1" },
|
||||
{ type: "text", text: "call tool" },
|
||||
],
|
||||
}),
|
||||
castAgentMessage({ role: "toolResult", content: "result" }),
|
||||
castAgentMessage({
|
||||
role: "assistant",
|
||||
content: [
|
||||
{ type: "thinking", thinking: "thought 2" },
|
||||
{ type: "text", text: "final" },
|
||||
],
|
||||
}),
|
||||
];
|
||||
|
||||
const result = stripThinkingFromNonLatestAssistant(messages);
|
||||
// First assistant: thinking stripped
|
||||
const first = result[0] as Extract<AgentMessage, { role: "assistant" }>;
|
||||
expect(first.content).toEqual([{ type: "text", text: "call tool" }]);
|
||||
|
||||
// toolResult: unchanged
|
||||
expect(result[1]).toBe(messages[1]);
|
||||
|
||||
// Latest assistant: thinking preserved
|
||||
const latest = result[2] as Extract<AgentMessage, { role: "assistant" }>;
|
||||
expect(latest.content).toEqual([
|
||||
{ type: "thinking", thinking: "thought 2" },
|
||||
{ type: "text", text: "final" },
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
@ -3,6 +3,17 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||
type AssistantContentBlock = Extract<AgentMessage, { role: "assistant" }>["content"][number];
|
||||
type AssistantMessage = Extract<AgentMessage, { role: "assistant" }>;
|
||||
|
||||
/** Block types that Anthropic considers immutable once returned. */
|
||||
const THINKING_BLOCK_TYPES: ReadonlySet<string> = new Set(["thinking", "redacted_thinking"]);
|
||||
|
||||
function isThinkingBlock(block: unknown): boolean {
|
||||
if (!block || typeof block !== "object") {
|
||||
return false;
|
||||
}
|
||||
const type = (block as { type?: unknown }).type;
|
||||
return typeof type === "string" && THINKING_BLOCK_TYPES.has(type);
|
||||
}
|
||||
|
||||
export function isAssistantMessageWithContent(message: AgentMessage): message is AssistantMessage {
|
||||
return (
|
||||
!!message &&
|
||||
@ -13,7 +24,8 @@ export function isAssistantMessageWithContent(message: AgentMessage): message is
|
||||
}
|
||||
|
||||
/**
|
||||
* Strip all `type: "thinking"` content blocks from assistant messages.
|
||||
* Strip all `type: "thinking"` and `type: "redacted_thinking"` content blocks
|
||||
* from assistant messages.
|
||||
*
|
||||
* If an assistant message becomes empty after stripping, it is replaced with
|
||||
* a synthetic `{ type: "text", text: "" }` block to preserve turn structure
|
||||
@ -33,7 +45,7 @@ export function dropThinkingBlocks(messages: AgentMessage[]): AgentMessage[] {
|
||||
const nextContent: AssistantContentBlock[] = [];
|
||||
let changed = false;
|
||||
for (const block of msg.content) {
|
||||
if (block && typeof block === "object" && (block as { type?: unknown }).type === "thinking") {
|
||||
if (isThinkingBlock(block)) {
|
||||
touched = true;
|
||||
changed = true;
|
||||
continue;
|
||||
@ -51,3 +63,67 @@ export function dropThinkingBlocks(messages: AgentMessage[]): AgentMessage[] {
|
||||
}
|
||||
return touched ? out : messages;
|
||||
}
|
||||
|
||||
/**
|
||||
* Strip `thinking` and `redacted_thinking` blocks from all assistant messages
|
||||
* **except** the latest (last) assistant message in the array.
|
||||
*
|
||||
* Anthropic requires that thinking/redacted_thinking blocks in the latest
|
||||
* assistant message remain byte-identical to the original API response.
|
||||
* Blocks in non-latest assistant messages may be omitted entirely.
|
||||
*
|
||||
* This prevents compaction or session serialization from corrupting thinking
|
||||
* blocks that are later rejected by the Anthropic API.
|
||||
*
|
||||
* Returns the original array reference when nothing was changed.
|
||||
*/
|
||||
export function stripThinkingFromNonLatestAssistant(messages: AgentMessage[]): AgentMessage[] {
|
||||
// Find the index of the last assistant message with array content.
|
||||
let lastAssistantIndex = -1;
|
||||
for (let i = messages.length - 1; i >= 0; i--) {
|
||||
if (isAssistantMessageWithContent(messages[i])) {
|
||||
lastAssistantIndex = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Nothing to do if there is zero or one assistant message.
|
||||
if (lastAssistantIndex <= 0) {
|
||||
return messages;
|
||||
}
|
||||
|
||||
let touched = false;
|
||||
const out: AgentMessage[] = [];
|
||||
|
||||
for (let i = 0; i < messages.length; i++) {
|
||||
const msg = messages[i];
|
||||
|
||||
// Skip non-assistant or the latest assistant — keep them as-is.
|
||||
if (i === lastAssistantIndex || !isAssistantMessageWithContent(msg)) {
|
||||
out.push(msg);
|
||||
continue;
|
||||
}
|
||||
|
||||
const nextContent: AssistantContentBlock[] = [];
|
||||
let changed = false;
|
||||
for (const block of msg.content) {
|
||||
if (isThinkingBlock(block)) {
|
||||
touched = true;
|
||||
changed = true;
|
||||
continue;
|
||||
}
|
||||
nextContent.push(block);
|
||||
}
|
||||
|
||||
if (!changed) {
|
||||
out.push(msg);
|
||||
continue;
|
||||
}
|
||||
|
||||
const content =
|
||||
nextContent.length > 0 ? nextContent : [{ type: "text", text: "" } as AssistantContentBlock];
|
||||
out.push({ ...msg, content });
|
||||
}
|
||||
|
||||
return touched ? out : messages;
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user