");
+ // Inner markdown should NOT be rendered since it's inside escaped HTML
+ expect(html).toContain("**bold**");
+ });
+
+ it("strips script tags", () => {
+ const html = toSanitizedMarkdownHtml("");
+ expect(html).not.toContain("");
+ expect(html).not.toContain(")");
+ // validateLink blocks non-image data: URIs, so raw source appears
+ expect(html).not.toContain("
{
+ const html = toSanitizedMarkdownHtml("[click](file:///etc/passwd)");
+ // DOMPurify strips file: scheme, leaving anchor text
+ expect(html).not.toContain('href="file:');
+ expect(html).toContain("click");
+ });
+ });
+
+ describe("ReDoS protection", () => {
+ it("does not throw on deeply nested emphasis markers (#36213)", () => {
+ const nested = "*".repeat(500) + "text" + "*".repeat(500);
+ expect(() => toSanitizedMarkdownHtml(nested)).not.toThrow();
+ const html = toSanitizedMarkdownHtml(nested);
+ expect(html).toContain("text");
+ });
+
+ it("does not throw on deeply nested brackets (#36213)", () => {
+ const nested = "[".repeat(200) + "link" + "]".repeat(200) + "(" + "x".repeat(200) + ")";
+ expect(() => toSanitizedMarkdownHtml(nested)).not.toThrow();
+ });
+
+ it("does not hang on backtick + bracket ReDoS pattern", { timeout: 2_000 }, () => {
+ const HEADER =
+ '{"type":"message","id":"aaa","parentId":"bbb",' +
+ '"timestamp":"2000-01-01T00:00:00.000Z","message":' +
+ '{"role":"toolResult","toolCallId":"call_000",' +
+ '"toolName":"read","content":[{"type":"text","text":' +
+ '"{\\"type\\":\\"message\\",\\"id\\":\\"ccc\\",' +
+ '\\"timestamp\\":\\"2000-01-01T00:00:00.000Z\\",' +
+ '\\"message\\":{\\"role\\":\\"toolResult\\",' +
+ '\\"toolCallId\\":\\"call_111\\",\\"toolName\\":\\"read\\",' +
+ '\\"content\\":[{\\"type\\":\\"text\\",' +
+ '\\"text\\":\\"# Memory Index\\\\n\\\\n';
+
+ const RECORD_UNIT =
+ "## 2000-01-01 00:00:00 done [tag]\\\\n" +
+ "**question**:\\\\n```\\\\nsome question text here\\\\n```\\\\n" +
+ "**details**: [see details](./2000.01.01/00000000/INFO.md)\\\\n\\\\n";
+
+ const poison = HEADER + RECORD_UNIT.repeat(9);
+
+ const start = performance.now();
+ const html = toSanitizedMarkdownHtml(poison);
+ const elapsed = performance.now() - start;
+
+ expect(elapsed).toBeLessThan(500);
+ expect(html.length).toBeGreaterThan(0);
+ });
+ });
+
+ describe("large text handling", () => {
+ it("uses plain text fallback for oversized content", () => {
+ // MARKDOWN_PARSE_LIMIT is 40_000 chars
+ const input = Array.from(
+ { length: 320 },
+ (_, i) => `Paragraph ${i + 1}: ${"Long plain-text reply. ".repeat(8)}`,
+ ).join("\n\n");
+ const html = toSanitizedMarkdownHtml(input);
+ expect(html).toContain('class="markdown-plain-text-fallback"');
+ });
+
+ it("preserves indentation in plain text fallback", () => {
+ const input = `${"Header line\n".repeat(5000)}\n indented log line\n deeper indent`;
+ const html = toSanitizedMarkdownHtml(input);
+ expect(html).toContain('class="markdown-plain-text-fallback"');
+ expect(html).toContain(" indented log line");
+ expect(html).toContain(" deeper indent");
+ });
+
+ it("caches oversized fallback results", () => {
+ const input = Array.from({ length: 240 }, (_, i) => `P${i}`).join("\n\n") + "x".repeat(35000);
+ const first = toSanitizedMarkdownHtml(input);
+ const second = toSanitizedMarkdownHtml(input);
+ expect(second).toBe(first);
+ });
+
+ it("falls back to escaped text if md.render throws (#36213)", () => {
+ const renderSpy = vi.spyOn(md, "render").mockImplementation(() => {
+ throw new Error("forced failure");
+ });
+ const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {});
+ try {
+ const html = toSanitizedMarkdownHtml("test");
+ expect(html).toContain('');
+ expect(warnSpy).toHaveBeenCalledOnce();
+ } finally {
+ renderSpy.mockRestore();
+ warnSpy.mockRestore();
+ }
+ });
+ });
});
diff --git a/ui/src/ui/markdown.ts b/ui/src/ui/markdown.ts
index 2b324037713..7a0c4e6a5a5 100644
--- a/ui/src/ui/markdown.ts
+++ b/ui/src/ui/markdown.ts
@@ -1,5 +1,6 @@
import DOMPurify from "dompurify";
-import { marked } from "marked";
+import MarkdownIt from "markdown-it";
+import markdownItTaskLists from "markdown-it-task-lists";
import { truncateText } from "./format.ts";
const allowedTags = [
@@ -19,10 +20,12 @@ const allowedTags = [
"h4",
"hr",
"i",
+ "input",
"li",
"ol",
"p",
"pre",
+ "s",
"span",
"strong",
"summary",
@@ -37,7 +40,9 @@ const allowedTags = [
];
const allowedAttrs = [
+ "checked",
"class",
+ "disabled",
"href",
"rel",
"target",
@@ -107,6 +112,256 @@ function installHooks() {
});
}
+// ── markdown-it instance with custom renderers ──
+
+function escapeHtml(value: string): string {
+ return value
+ .replace(/&/g, "&")
+ .replace(//g, ">")
+ .replace(/"/g, """)
+ .replace(/'/g, "'");
+}
+
+function normalizeMarkdownImageLabel(text?: string | null): string {
+ const trimmed = text?.trim();
+ return trimmed ? trimmed : "image";
+}
+
+export const md = new MarkdownIt({
+ html: true, // Enable HTML recognition so html_block/html_inline overrides can escape it
+ breaks: true,
+ linkify: true,
+});
+
+// Enable GFM strikethrough (~~text~~) to match original marked.js behavior.
+// markdown-it uses tags; we added "s" to allowedTags for DOMPurify.
+md.enable("strikethrough");
+
+// Disable fuzzy link detection to prevent bare filenames like "README.md"
+// from being auto-linked as "http://README.md". URLs with explicit protocol
+// (https://...) and emails are still linkified.
+//
+// Alternative considered: extensions/matrix/src/matrix/format.ts uses fuzzyLink
+// with a file-extension blocklist to filter false positives at render time.
+// We chose the www-only approach instead because:
+// 1. Matches original marked.js GFM behavior exactly (bare domains were never linked)
+// 2. No blocklist to maintain — new TLDs like .ai, .io, .dev would need constant updates
+// 3. Predictable behavior — users can always use explicit https:// for any URL
+md.linkify.set({ fuzzyLink: false });
+
+// Re-enable www. prefix detection per GFM spec: bare URLs without protocol
+// must start with "www." to be auto-linked. This avoids false positives on
+// filenames while preserving expected behavior for "www.example.com".
+// GFM spec: valid domain = alphanumeric/underscore/hyphen segments separated
+// by periods, at least one period, no underscores in last two segments.
+md.linkify.add("www", {
+ validate(text, pos) {
+ const tail = text.slice(pos);
+ // Match: . followed by valid domain (single-label like localhost is allowed),
+ // optional port, plus optional path/query/fragment.
+ // marked.js GFM linked www.localhost, www.internal etc. — preserve that behavior.
+ // We allow underscores everywhere for simplicity — real TLDs don't have underscores.
+ // Stop at < per GFM spec — this is an improvement over marked.js which would encode
+ // www.example.com/ as http://www.example.com/%3Ctoken%3E (broken URL with
+ // encoded angle brackets). We stop before < so the placeholder stays visible as text.
+ const match = tail.match(
+ /^\.[a-z0-9_](?:[a-z0-9_-]*[a-z0-9_])?(?:\.[a-z0-9_](?:[a-z0-9_-]*[a-z0-9_])?)*(?::\d+)?(?:[/?#][^\s<]*)?/i,
+ );
+ if (!match) {
+ return 0;
+ }
+
+ // Strip trailing punctuation per GFM extended autolink spec.
+ // GFM says: ?, !, ., ,, :, *, _, ~ are not part of the autolink if trailing.
+ let len = match[0].length;
+
+ // Pre-count parentheses once to avoid O(n²) rescans on malformed URLs with
+ // many trailing ')' characters (e.g., ")".repeat(8000)).
+ let parenBalance = 0;
+ for (let i = 0; i < len; i++) {
+ const c = tail[i];
+ if (c === "(") {
+ parenBalance++;
+ } else if (c === ")") {
+ parenBalance--;
+ }
+ }
+
+ while (len > 0) {
+ const ch = tail[len - 1];
+ if (/[?!.,;:*_~]/.test(ch)) {
+ len--;
+ continue;
+ }
+ // Handle trailing ) — only strip if unbalanced (GFM parentheses rule).
+ if (ch === ")") {
+ if (parenBalance < 0) {
+ parenBalance++;
+ len--;
+ continue;
+ }
+ }
+ break;
+ }
+ return len;
+ },
+ normalize(match) {
+ match.url = "http://" + match.url;
+ },
+});
+
+// Override default link validator to allow all base64 data URIs for images.
+// The default only allows gif/png/jpeg/webp; we support any image/* subtype
+// (e.g., avif, jpg, svg+xml) to match the previous marked.js behavior.
+//
+// Security for blocked schemes (javascript:, vbscript:, file:) is handled at
+// render time, not validation time. This ensures markdown-it generates tokens
+// so our custom renderers can process them:
+// - Images: renderer.rules.image shows alt text for non-data: URLs
+// - Links: DOMPurify strips dangerous href schemes (javascript:, vbscript:, etc.)
+// Blocking at validateLink would skip token generation entirely, causing raw
+// markdown source to appear instead of graceful fallbacks.
+md.validateLink = (url: string) => {
+ // Allow all data: image URIs (any image/* subtype).
+ if (INLINE_DATA_IMAGE_RE.test(url)) {
+ return true;
+ }
+ // Block non-image data: URIs (e.g., data:text/html) at validation time
+ // since these have no graceful fallback and are potential XSS vectors.
+ const str = url.trim().toLowerCase();
+ if (str.startsWith("data:") && !str.startsWith("data:image/")) {
+ return false;
+ }
+ // Allow all other URLs — security enforced by renderers and DOMPurify,
+ // consistent with original marked.js behavior. Dangerous schemes like
+ // javascript:, vbscript:, and file: are stripped by DOMPurify's default
+ // URI sanitization, leaving safe anchor text without a clickable href.
+ return true;
+};
+
+// Enable GFM task list checkboxes (- [x] / - [ ]).
+// enabled: false keeps checkboxes read-only (disabled="") — task lists in
+// chat messages are display-only, not interactive forms.
+// label: false avoids wrapping item text in