fix(agents): handle quoted JSON stop_reason payloads in sensitive detection

Extend SENSITIVE_STOP_REASON_RE to match JSON-stringified variants like '"stop_reason":"sensitive"' since describeUnknownError uses JSON.stringify for non-Error objects.
This commit is contained in:
MoerAI 2026-03-19 21:24:39 +09:00
parent 1e224de4ea
commit ff954a711b

View File

@ -109,8 +109,10 @@ const ANTHROPIC_MAGIC_STRING_REPLACEMENT = "ANTHROPIC MAGIC STRING TRIGGER REFUS
// Detect model safety-filter rejections (stop_reason: sensitive).
// This is NOT a timeout — it is a content policy block that requires a user-facing message.
// Matches both bare text ("stop_reason: sensitive") and JSON-stringified variants
// ('"stop_reason":"sensitive"') since describeUnknownError uses JSON.stringify.
const SENSITIVE_STOP_REASON_RE =
/\bunhandled\s+stop[_ ]reason:\s*sensitive\b|\bstop[_ ]reason:\s*sensitive\b|\breason:\s*sensitive\b/i;
/\bunhandled\s+stop[_ ]reason:\s*"?sensitive"?\b|\bstop[_ ]reason"?:\s*"?sensitive"?\b|\breason"?:\s*"?sensitive"?\b/i;
function scrubAnthropicRefusalMagic(prompt: string): string {
if (!prompt.includes(ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL)) {