From 0fad79bb72462a2ba1a5317784f67de0d2c8596e Mon Sep 17 00:00:00 2001 From: MoerAI Date: Fri, 13 Mar 2026 22:43:54 +0900 Subject: [PATCH 1/5] fix(agents): handle stop_reason 'sensitive' gracefully (#43607) --- src/agents/pi-embedded-runner/run.ts | 36 ++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index dce7ff919d4..f28cdd31efb 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -97,6 +97,11 @@ const OVERLOAD_FAILOVER_BACKOFF_POLICY: BackoffPolicy = { const ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL = "ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL"; const ANTHROPIC_MAGIC_STRING_REPLACEMENT = "ANTHROPIC MAGIC STRING TRIGGER REFUSAL (redacted)"; +// Detect model safety-filter rejections (stop_reason: sensitive). +// This is NOT a timeout — it is a content policy block that requires a user-facing message. +const SENSITIVE_STOP_REASON_RE = + /\bunhandled\s+stop reason:\s*sensitive\b|\bstop reason:\s*sensitive\b|\breason:\s*sensitive\b/i; + function scrubAnthropicRefusalMagic(prompt: string): string { if (!prompt.includes(ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL)) { return prompt; @@ -1221,6 +1226,37 @@ export async function runEmbeddedPiAgent( authRetryPending = true; continue; } + // Handle sensitive stop_reason (content safety filter) with a user-friendly message + if (SENSITIVE_STOP_REASON_RE.test(errorText)) { + return { + payloads: [ + { + text: + "I can't respond to that - the content was flagged by the model's safety filter. " + + "Please rephrase or try a different topic.", + }, + ], + meta: { + durationMs: Date.now() - started, + agentMeta: buildErrorAgentMeta({ + sessionId: sessionIdUsed, + provider, + model: model.id, + usageAccumulator, + lastRunPromptUsage, + lastAssistant, + lastTurnTotal, + }), + systemPromptReport: attempt.systemPromptReport, + error: { kind: "sensitive", message: errorText }, + }, + didSendViaMessagingTool: attempt.didSendViaMessagingTool, + messagingToolSentTexts: attempt.messagingToolSentTexts, + messagingToolSentMediaUrls: attempt.messagingToolSentMediaUrls, + messagingToolSentTargets: attempt.messagingToolSentTargets, + successfulCronAdds: attempt.successfulCronAdds, + }; + } // Handle role ordering errors with a user-friendly message if (/incorrect role information|roles must alternate/i.test(errorText)) { return { From 1adc05aa6a4a7e17984a4f9c71fc2987de31cbdf Mon Sep 17 00:00:00 2001 From: MoerAI Date: Mon, 16 Mar 2026 14:59:01 +0900 Subject: [PATCH 2/5] fix(agents): mark sensitive-stop payload as error with isError flag Add isError: true to the sensitive stop_reason payload so downstream consumers (e.g. cron delivery) correctly treat it as an error. Add 'sensitive' to the error kind union type. --- src/agents/pi-embedded-runner/run.ts | 1 + src/agents/pi-embedded-runner/types.ts | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index df2a7388c6e..2d18955d184 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -1320,6 +1320,7 @@ export async function runEmbeddedPiAgent( text: "I can't respond to that - the content was flagged by the model's safety filter. " + "Please rephrase or try a different topic.", + isError: true, }, ], meta: { diff --git a/src/agents/pi-embedded-runner/types.ts b/src/agents/pi-embedded-runner/types.ts index 722abbf2a9a..ac98141f0dc 100644 --- a/src/agents/pi-embedded-runner/types.ts +++ b/src/agents/pi-embedded-runner/types.ts @@ -41,7 +41,8 @@ export type EmbeddedPiRunMeta = { | "compaction_failure" | "role_ordering" | "image_size" - | "retry_limit"; + | "retry_limit" + | "sensitive"; message: string; }; /** Stop reason for the agent run (e.g., "completed", "tool_calls"). */ From 4fea80126df0c268f8b6b8d8ae27a45738e7e02e Mon Sep 17 00:00:00 2001 From: MoerAI Date: Tue, 17 Mar 2026 20:02:46 +0900 Subject: [PATCH 3/5] fix: match stop_reason (underscore) variant in sensitive detection regex The regex only matched 'stop reason' (space) but some error text uses the canonical key form 'stop_reason' (underscore). Use [_ ] character class to match both. --- src/agents/pi-embedded-runner/run.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index 2d18955d184..71d7ae34ad5 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -109,7 +109,7 @@ const ANTHROPIC_MAGIC_STRING_REPLACEMENT = "ANTHROPIC MAGIC STRING TRIGGER REFUS // Detect model safety-filter rejections (stop_reason: sensitive). // This is NOT a timeout — it is a content policy block that requires a user-facing message. const SENSITIVE_STOP_REASON_RE = - /\bunhandled\s+stop reason:\s*sensitive\b|\bstop reason:\s*sensitive\b|\breason:\s*sensitive\b/i; + /\bunhandled\s+stop[_ ]reason:\s*sensitive\b|\bstop[_ ]reason:\s*sensitive\b|\breason:\s*sensitive\b/i; function scrubAnthropicRefusalMagic(prompt: string): string { if (!prompt.includes(ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL)) { From ff954a711b9d75cd165226d040340168ec049918 Mon Sep 17 00:00:00 2001 From: MoerAI Date: Thu, 19 Mar 2026 21:24:39 +0900 Subject: [PATCH 4/5] fix(agents): handle quoted JSON stop_reason payloads in sensitive detection Extend SENSITIVE_STOP_REASON_RE to match JSON-stringified variants like '"stop_reason":"sensitive"' since describeUnknownError uses JSON.stringify for non-Error objects. --- src/agents/pi-embedded-runner/run.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index cf57890acd2..0c350d72b16 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -109,8 +109,10 @@ const ANTHROPIC_MAGIC_STRING_REPLACEMENT = "ANTHROPIC MAGIC STRING TRIGGER REFUS // Detect model safety-filter rejections (stop_reason: sensitive). // This is NOT a timeout — it is a content policy block that requires a user-facing message. +// Matches both bare text ("stop_reason: sensitive") and JSON-stringified variants +// ('"stop_reason":"sensitive"') since describeUnknownError uses JSON.stringify. const SENSITIVE_STOP_REASON_RE = - /\bunhandled\s+stop[_ ]reason:\s*sensitive\b|\bstop[_ ]reason:\s*sensitive\b|\breason:\s*sensitive\b/i; + /\bunhandled\s+stop[_ ]reason:\s*"?sensitive"?\b|\bstop[_ ]reason"?:\s*"?sensitive"?\b|\breason"?:\s*"?sensitive"?\b/i; function scrubAnthropicRefusalMagic(prompt: string): string { if (!prompt.includes(ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL)) { From c621974a9b07c88be8a182cbd20467bcb288f662 Mon Sep 17 00:00:00 2001 From: MoerAI Date: Fri, 20 Mar 2026 10:34:44 +0900 Subject: [PATCH 5/5] fix(agents): drop messaging-tool metadata from sensitive-stop return Remove didSendViaMessagingTool, messagingToolSentTexts, messagingToolSentMediaUrls, messagingToolSentTargets, and successfulCronAdds from the sensitive stop_reason return path. Propagating these fields causes the safety-filter refusal payload to be suppressed when a messaging tool has already sent to the same target, leaving the user with no safety explanation. This aligns the sensitive error path with other early-return error branches (role_ordering, image_size) that also omit messaging-tool metadata. --- src/agents/pi-embedded-runner/run.ts | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index 0c350d72b16..e96d0b5f40a 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -1348,11 +1348,6 @@ export async function runEmbeddedPiAgent( systemPromptReport: attempt.systemPromptReport, error: { kind: "sensitive", message: errorText }, }, - didSendViaMessagingTool: attempt.didSendViaMessagingTool, - messagingToolSentTexts: attempt.messagingToolSentTexts, - messagingToolSentMediaUrls: attempt.messagingToolSentMediaUrls, - messagingToolSentTargets: attempt.messagingToolSentTargets, - successfulCronAdds: attempt.successfulCronAdds, }; } // Handle role ordering errors with a user-friendly message