openclaw/src/agents/pi-embedded-utils.strip-model-special-tokens.test.ts
George Zhang 309162f9a2
fix: strip leaked model control tokens from user-facing text (#42173)
Models like GLM-5 and DeepSeek sometimes emit internal delimiter tokens in their responses. Uses generic pattern in the text extraction pipeline, following the same architecture as stripMinimaxToolCallXml.

Closes #40020
Supersedes #40573

Co-authored-by: imwyvern <100903837+imwyvern@users.noreply.github.com>
2026-03-10 06:27:59 -07:00

26 lines
973 B
TypeScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { describe, expect, it } from "vitest";
import { stripModelSpecialTokens } from "./pi-embedded-utils.js";
/**
* @see https://github.com/openclaw/openclaw/issues/40020
*/
describe("stripModelSpecialTokens", () => {
it("strips tokens and inserts space between adjacent words", () => {
expect(stripModelSpecialTokens("<|user|>Question<|assistant|>Answer")).toBe("Question Answer");
});
it("strips full-width pipe variants (DeepSeek U+FF5C)", () => {
expect(stripModelSpecialTokens("<begin▁of▁sentence>Hello there")).toBe("Hello there");
});
it("does not strip normal angle brackets or HTML", () => {
expect(stripModelSpecialTokens("a < b && c > d")).toBe("a < b && c > d");
expect(stripModelSpecialTokens("<div>hello</div>")).toBe("<div>hello</div>");
});
it("passes through text without tokens unchanged", () => {
const text = "Just a normal response.";
expect(stripModelSpecialTokens(text)).toBe(text);
});
});