fix(ui): replace marked.js with markdown-it to fix ReDoS UI freeze
Replace marked.js with markdown-it to eliminate ReDoS vulnerability (#36213). Behavior remains consistent with original marked.js except for task lists (- [ ] / - [x]) which now render as checkboxes via markdown-it-task-lists. To preserve marked.js behavior: - Enable GFM strikethrough (~~text~~) - Disable fuzzyLink to avoid auto-linking bare filenames like README.md - Add www. autolink validator per GFM spec (trailing punctuation, balanced parens) - Override html_block/html_inline to escape raw HTML - Override image renderer: alt text for remote URLs, allow base64 data URIs - Override fence/code_block: copy button and JSON collapse - Dangerous schemes (javascript:, vbscript:, file:) sanitized by DOMPurify New: task-list checkbox styling for .chat-text, .sidebar-markdown, .chat-thinking Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
faf81c5574
commit
6aeedeb401
24
pnpm-lock.yaml
generated
24
pnpm-lock.yaml
generated
@ -644,10 +644,16 @@ importers:
|
||||
lit:
|
||||
specifier: ^3.3.2
|
||||
version: 3.3.2
|
||||
marked:
|
||||
specifier: ^17.0.4
|
||||
version: 17.0.4
|
||||
markdown-it:
|
||||
specifier: ^14.1.1
|
||||
version: 14.1.1
|
||||
markdown-it-task-lists:
|
||||
specifier: ^2.1.1
|
||||
version: 2.1.1
|
||||
devDependencies:
|
||||
'@types/markdown-it':
|
||||
specifier: ^14.1.2
|
||||
version: 14.1.2
|
||||
'@vitest/browser-playwright':
|
||||
specifier: 4.1.0
|
||||
version: 4.1.0(playwright@1.58.2)(vite@8.0.0(@types/node@25.5.0)(esbuild@0.27.3)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2))(vitest@4.1.0)
|
||||
@ -5071,6 +5077,9 @@ packages:
|
||||
resolution: {integrity: sha512-hXdUTZYIVOt1Ex//jAQi+wTZZpUpwBj/0QsOzqegb3rGMMeJiSEu5xLHnYfBrRV4RH2+OCSOO95Is/7x1WJ4bw==}
|
||||
engines: {node: '>=10'}
|
||||
|
||||
markdown-it-task-lists@2.1.1:
|
||||
resolution: {integrity: sha512-TxFAc76Jnhb2OUu+n3yz9RMu4CwGfaT788br6HhEDlvWfdeJcLUsxk1Hgw2yJio0OXsxv7pyIPmvECY7bMbluA==}
|
||||
|
||||
markdown-it@14.1.0:
|
||||
resolution: {integrity: sha512-a54IwgWPaeBCAAsv13YgmALOF1elABB08FxO9i+r4VFk5Vl4pKokRPeX8u5TCgSsPi6ec1otfLjdOpVcgbpshg==}
|
||||
hasBin: true
|
||||
@ -5087,11 +5096,6 @@ packages:
|
||||
engines: {node: '>= 18'}
|
||||
hasBin: true
|
||||
|
||||
marked@17.0.4:
|
||||
resolution: {integrity: sha512-NOmVMM+KAokHMvjWmC5N/ZOvgmSWuqJB8FoYI019j4ogb/PeRMKoKIjReZ2w3376kkA8dSJIP8uD993Kxc0iRQ==}
|
||||
engines: {node: '>= 20'}
|
||||
hasBin: true
|
||||
|
||||
math-intrinsics@1.1.0:
|
||||
resolution: {integrity: sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==}
|
||||
engines: {node: '>= 0.4'}
|
||||
@ -12166,6 +12170,8 @@ snapshots:
|
||||
dependencies:
|
||||
semver: 7.7.4
|
||||
|
||||
markdown-it-task-lists@2.1.1: {}
|
||||
|
||||
markdown-it@14.1.0:
|
||||
dependencies:
|
||||
argparse: 2.0.1
|
||||
@ -12190,8 +12196,6 @@ snapshots:
|
||||
|
||||
marked@15.0.12: {}
|
||||
|
||||
marked@17.0.4: {}
|
||||
|
||||
math-intrinsics@1.1.0: {}
|
||||
|
||||
matrix-events-sdk@0.0.1: {}
|
||||
|
||||
@ -12,9 +12,11 @@
|
||||
"@noble/ed25519": "3.0.1",
|
||||
"dompurify": "^3.3.3",
|
||||
"lit": "^3.3.2",
|
||||
"marked": "^17.0.4"
|
||||
"markdown-it": "^14.1.1",
|
||||
"markdown-it-task-lists": "^2.1.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/markdown-it": "^14.1.2",
|
||||
"@vitest/browser-playwright": "4.1.0",
|
||||
"jsdom": "^29.0.0",
|
||||
"playwright": "^1.58.2",
|
||||
|
||||
10
ui/src/markdown-it-task-lists.d.ts
vendored
Normal file
10
ui/src/markdown-it-task-lists.d.ts
vendored
Normal file
@ -0,0 +1,10 @@
|
||||
declare module "markdown-it-task-lists" {
|
||||
import type MarkdownIt from "markdown-it";
|
||||
interface TaskListsOptions {
|
||||
enabled?: boolean;
|
||||
label?: boolean;
|
||||
labelAfter?: boolean;
|
||||
}
|
||||
const plugin: (md: MarkdownIt, options?: TaskListsOptions) => void;
|
||||
export default plugin;
|
||||
}
|
||||
@ -41,6 +41,20 @@
|
||||
margin-top: 0.25em;
|
||||
}
|
||||
|
||||
/* Hide default marker only for unordered task lists; ordered lists keep numbers */
|
||||
.chat-text :where(ul > .task-list-item),
|
||||
.sidebar-markdown :where(ul > .task-list-item),
|
||||
.chat-thinking :where(ul > .task-list-item) {
|
||||
list-style: none;
|
||||
}
|
||||
|
||||
.chat-text :where(.task-list-item-checkbox),
|
||||
.sidebar-markdown :where(.task-list-item-checkbox),
|
||||
.chat-thinking :where(.task-list-item-checkbox) {
|
||||
margin-right: 0.4em;
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
.chat-text :where(a) {
|
||||
color: var(--accent);
|
||||
text-decoration: underline;
|
||||
|
||||
@ -1,8 +1,8 @@
|
||||
import { marked } from "marked";
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
import { toSanitizedMarkdownHtml } from "./markdown.ts";
|
||||
import { md, toSanitizedMarkdownHtml } from "./markdown.ts";
|
||||
|
||||
describe("toSanitizedMarkdownHtml", () => {
|
||||
// ── Original tests from before markdown-it migration ──
|
||||
it("renders basic markdown", () => {
|
||||
const html = toSanitizedMarkdownHtml("Hello **world**");
|
||||
expect(html).toContain("<strong>world</strong>");
|
||||
@ -146,9 +146,9 @@ describe("toSanitizedMarkdownHtml", () => {
|
||||
expect(second).toBe(first);
|
||||
});
|
||||
|
||||
it("falls back to escaped plain text if marked.parse throws (#36213)", () => {
|
||||
const parseSpy = vi.spyOn(marked, "parse").mockImplementation(() => {
|
||||
throw new Error("forced parse failure");
|
||||
it("falls back to escaped plain text if md.render throws (#36213)", () => {
|
||||
const renderSpy = vi.spyOn(md, "render").mockImplementation(() => {
|
||||
throw new Error("forced render failure");
|
||||
});
|
||||
const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {});
|
||||
const input = `Fallback **probe** ${Date.now()}`;
|
||||
@ -158,8 +158,374 @@ describe("toSanitizedMarkdownHtml", () => {
|
||||
expect(html).toContain("Fallback **probe**");
|
||||
expect(warnSpy).toHaveBeenCalledOnce();
|
||||
} finally {
|
||||
parseSpy.mockRestore();
|
||||
renderSpy.mockRestore();
|
||||
warnSpy.mockRestore();
|
||||
}
|
||||
});
|
||||
|
||||
// ── Additional tests for markdown-it migration ──
|
||||
describe("www autolinks", () => {
|
||||
it("links www.example.com", () => {
|
||||
const html = toSanitizedMarkdownHtml("Visit www.example.com today");
|
||||
expect(html).toContain('<a href="http://www.example.com"');
|
||||
expect(html).toContain("www.example.com</a>");
|
||||
});
|
||||
|
||||
it("links www.example.com with path, query, and fragment", () => {
|
||||
const html = toSanitizedMarkdownHtml("See www.example.com/path?a=1#section");
|
||||
expect(html).toContain('<a href="http://www.example.com/path?a=1#section"');
|
||||
});
|
||||
|
||||
it("links www.example.com with port", () => {
|
||||
const html = toSanitizedMarkdownHtml("Visit www.example.com:8080/foo");
|
||||
expect(html).toContain('<a href="http://www.example.com:8080/foo"');
|
||||
});
|
||||
|
||||
it("links www.localhost and other single-label hosts", () => {
|
||||
const html = toSanitizedMarkdownHtml("Visit www.localhost:3000/path for dev");
|
||||
expect(html).toContain('<a href="http://www.localhost:3000/path"');
|
||||
});
|
||||
|
||||
it("links www.foo_bar.example.com with underscores", () => {
|
||||
const html = toSanitizedMarkdownHtml("Visit www.foo_bar.example.com");
|
||||
expect(html).toContain('<a href="http://www.foo_bar.example.com"');
|
||||
});
|
||||
|
||||
it("strips trailing punctuation from links", () => {
|
||||
const html1 = toSanitizedMarkdownHtml("Check www.example.com/help.");
|
||||
expect(html1).toContain('href="http://www.example.com/help"');
|
||||
expect(html1).not.toContain('href="http://www.example.com/help."');
|
||||
|
||||
const html2 = toSanitizedMarkdownHtml("See www.example.com!");
|
||||
expect(html2).toContain('href="http://www.example.com"');
|
||||
expect(html2).not.toContain('href="http://www.example.com!"');
|
||||
});
|
||||
|
||||
it("handles balanced parentheses in URLs", () => {
|
||||
const html = toSanitizedMarkdownHtml("(see www.example.com/foo(bar))");
|
||||
expect(html).toContain('href="http://www.example.com/foo(bar)"');
|
||||
});
|
||||
|
||||
it("stops at < character per GFM spec", () => {
|
||||
const html = toSanitizedMarkdownHtml("Visit www.example.com/<token> here");
|
||||
expect(html).toContain('href="http://www.example.com/"');
|
||||
expect(html).not.toContain("<token>");
|
||||
expect(html).toContain("<token>");
|
||||
});
|
||||
|
||||
it("does NOT link bare domains without www", () => {
|
||||
const html = toSanitizedMarkdownHtml("Visit google.com today");
|
||||
expect(html).not.toContain("<a");
|
||||
expect(html).toContain("google.com");
|
||||
});
|
||||
|
||||
it("does NOT link filenames with TLD-like extensions", () => {
|
||||
const html = toSanitizedMarkdownHtml("Check README.md and config.json");
|
||||
expect(html).not.toContain("<a");
|
||||
expect(html).toContain("README.md");
|
||||
});
|
||||
|
||||
it("does NOT link IP addresses", () => {
|
||||
const html = toSanitizedMarkdownHtml("Check 127.0.0.1:8080");
|
||||
expect(html).not.toContain("<a");
|
||||
expect(html).toContain("127.0.0.1:8080");
|
||||
});
|
||||
});
|
||||
|
||||
describe("explicit protocol links", () => {
|
||||
it("links https:// URLs", () => {
|
||||
const html = toSanitizedMarkdownHtml("Visit https://example.com");
|
||||
expect(html).toContain('<a href="https://example.com"');
|
||||
});
|
||||
|
||||
it("links http:// URLs", () => {
|
||||
const html = toSanitizedMarkdownHtml("Visit http://github.com/openclaw");
|
||||
expect(html).toContain('<a href="http://github.com/openclaw"');
|
||||
});
|
||||
|
||||
it("links email addresses", () => {
|
||||
const html = toSanitizedMarkdownHtml("Email me at test@example.com");
|
||||
expect(html).toContain('<a href="mailto:test@example.com"');
|
||||
});
|
||||
});
|
||||
|
||||
describe("HTML escaping", () => {
|
||||
it("escapes HTML tags as text", () => {
|
||||
const html = toSanitizedMarkdownHtml("<div>**bold**</div>");
|
||||
expect(html).toContain("<div>");
|
||||
expect(html).not.toContain("<div>");
|
||||
// Inner markdown should NOT be rendered since it's inside escaped HTML
|
||||
expect(html).toContain("**bold**");
|
||||
});
|
||||
|
||||
it("strips script tags", () => {
|
||||
const html = toSanitizedMarkdownHtml("<script>alert(1)</script>");
|
||||
expect(html).not.toContain("<script");
|
||||
expect(html).toContain("<script>");
|
||||
});
|
||||
|
||||
it("escapes inline HTML tags", () => {
|
||||
const html = toSanitizedMarkdownHtml("Check <b>this</b> out");
|
||||
expect(html).toContain("<b>");
|
||||
expect(html).not.toContain("<b>");
|
||||
});
|
||||
});
|
||||
|
||||
describe("task lists", () => {
|
||||
it("renders task list checkboxes", () => {
|
||||
const html = toSanitizedMarkdownHtml("- [ ] Unchecked\n- [x] Checked");
|
||||
expect(html).toContain("<input");
|
||||
expect(html).toContain('type="checkbox"');
|
||||
expect(html).toContain("disabled");
|
||||
expect(html).toContain("Unchecked");
|
||||
expect(html).toContain("Checked");
|
||||
});
|
||||
|
||||
it("renders links inside task items", () => {
|
||||
const html = toSanitizedMarkdownHtml("- [ ] Task with [link](https://example.com)");
|
||||
expect(html).toContain('<a href="https://example.com"');
|
||||
});
|
||||
|
||||
it("escapes HTML injection in task items", () => {
|
||||
const html = toSanitizedMarkdownHtml("- [ ] <script>alert(1)</script>");
|
||||
expect(html).not.toContain("<script");
|
||||
expect(html).toContain("<script>");
|
||||
});
|
||||
|
||||
it("escapes details/summary injection in task items", () => {
|
||||
const html = toSanitizedMarkdownHtml("- [ ] <details><summary>x</summary>y</details>");
|
||||
expect(html).toContain("<details>");
|
||||
expect(html).not.toContain("<details>");
|
||||
});
|
||||
});
|
||||
|
||||
describe("images", () => {
|
||||
it("flattens remote images to alt text", () => {
|
||||
const html = toSanitizedMarkdownHtml("");
|
||||
expect(html).not.toContain("<img");
|
||||
expect(html).toContain("Alt text");
|
||||
});
|
||||
|
||||
it("preserves markdown formatting in alt text", () => {
|
||||
const html = toSanitizedMarkdownHtml("");
|
||||
expect(html).toContain("**Build log**");
|
||||
});
|
||||
|
||||
it("preserves code formatting in alt text", () => {
|
||||
const html = toSanitizedMarkdownHtml("");
|
||||
expect(html).toContain("`error.log`");
|
||||
});
|
||||
|
||||
it("preserves base64 data URI images (#15437)", () => {
|
||||
const html = toSanitizedMarkdownHtml("");
|
||||
expect(html).toContain("<img");
|
||||
expect(html).toContain('class="markdown-inline-image"');
|
||||
expect(html).toContain("data:image/png;base64,");
|
||||
});
|
||||
|
||||
it("uses fallback label for unlabeled images", () => {
|
||||
const html = toSanitizedMarkdownHtml("");
|
||||
expect(html).not.toContain("<img");
|
||||
expect(html).toContain("image");
|
||||
});
|
||||
});
|
||||
|
||||
describe("code blocks", () => {
|
||||
it("renders fenced code blocks", () => {
|
||||
const html = toSanitizedMarkdownHtml("```ts\nconsole.log(1)\n```");
|
||||
expect(html).toContain("<pre>");
|
||||
expect(html).toContain("<code");
|
||||
expect(html).toContain("console.log(1)");
|
||||
});
|
||||
|
||||
it("renders indented code blocks", () => {
|
||||
// markdown-it requires a blank line before indented code
|
||||
const html = toSanitizedMarkdownHtml("text\n\n indented code");
|
||||
expect(html).toContain("<pre>");
|
||||
expect(html).toContain("<code>");
|
||||
});
|
||||
|
||||
it("includes copy button", () => {
|
||||
const html = toSanitizedMarkdownHtml("```\ncode\n```");
|
||||
expect(html).toContain('class="code-block-copy"');
|
||||
expect(html).toContain("data-code=");
|
||||
});
|
||||
|
||||
it("collapses JSON code blocks", () => {
|
||||
const html = toSanitizedMarkdownHtml('```json\n{"key": "value"}\n```');
|
||||
expect(html).toContain("<details");
|
||||
expect(html).toContain("json-collapse");
|
||||
expect(html).toContain("JSON");
|
||||
});
|
||||
});
|
||||
|
||||
describe("GFM features", () => {
|
||||
it("renders strikethrough", () => {
|
||||
const html = toSanitizedMarkdownHtml("This is ~~deleted~~ text");
|
||||
expect(html).toContain("<s>deleted</s>");
|
||||
});
|
||||
|
||||
it("renders tables", () => {
|
||||
const md = "| A | B |\n|---|---|\n| 1 | 2 |";
|
||||
const html = toSanitizedMarkdownHtml(md);
|
||||
expect(html).toContain("<table");
|
||||
expect(html).toContain("<th>");
|
||||
});
|
||||
|
||||
it("renders basic markdown", () => {
|
||||
const html = toSanitizedMarkdownHtml("**bold** and *italic*");
|
||||
expect(html).toContain("<strong>bold</strong>");
|
||||
expect(html).toContain("<em>italic</em>");
|
||||
});
|
||||
|
||||
it("renders headings", () => {
|
||||
const html = toSanitizedMarkdownHtml("# Heading 1\n## Heading 2");
|
||||
expect(html).toContain("<h1>");
|
||||
expect(html).toContain("<h2>");
|
||||
});
|
||||
|
||||
it("renders blockquotes", () => {
|
||||
const html = toSanitizedMarkdownHtml("> quote");
|
||||
expect(html).toContain("<blockquote>");
|
||||
});
|
||||
|
||||
it("renders lists", () => {
|
||||
const html = toSanitizedMarkdownHtml("- item 1\n- item 2");
|
||||
expect(html).toContain("<ul>");
|
||||
expect(html).toContain("<li>");
|
||||
});
|
||||
});
|
||||
|
||||
describe("security", () => {
|
||||
it("blocks javascript: in links via DOMPurify", () => {
|
||||
const html = toSanitizedMarkdownHtml("[click me](javascript:alert(1))");
|
||||
// DOMPurify strips dangerous href schemes but keeps the anchor text
|
||||
expect(html).not.toContain('href="javascript:');
|
||||
expect(html).toContain("click me");
|
||||
});
|
||||
|
||||
it("shows alt text for javascript: images", () => {
|
||||
const html = toSanitizedMarkdownHtml(")");
|
||||
expect(html).not.toContain("<img");
|
||||
expect(html).not.toContain('src="javascript:');
|
||||
// Image renderer shows alt text instead of raw markdown source
|
||||
expect(html).toContain("Build log");
|
||||
expect(html).not.toContain("![Build log]");
|
||||
});
|
||||
|
||||
it("shows alt text for vbscript: and file: images", () => {
|
||||
const html1 = toSanitizedMarkdownHtml(")");
|
||||
expect(html1).toContain("Alt1");
|
||||
expect(html1).not.toContain("<img");
|
||||
|
||||
const html2 = toSanitizedMarkdownHtml("");
|
||||
expect(html2).toContain("Alt2");
|
||||
expect(html2).not.toContain("<img");
|
||||
});
|
||||
|
||||
it("blocks non-image data: URIs", () => {
|
||||
const html = toSanitizedMarkdownHtml("[x](data:text/html,<script>alert(1)</script>)");
|
||||
// validateLink blocks non-image data: URIs, so raw source appears
|
||||
expect(html).not.toContain("<a");
|
||||
expect(html).toContain("data:text/html");
|
||||
});
|
||||
|
||||
it("does not auto-link bare file:// URIs", () => {
|
||||
const html = toSanitizedMarkdownHtml("Check file:///etc/passwd");
|
||||
// Bare file:// without www. or http:// should NOT be auto-linked
|
||||
expect(html).not.toContain("<a");
|
||||
expect(html).toContain("file:///etc/passwd");
|
||||
});
|
||||
|
||||
it("strips href from explicit file:// links via DOMPurify", () => {
|
||||
const html = toSanitizedMarkdownHtml("[click](file:///etc/passwd)");
|
||||
// DOMPurify strips file: scheme, leaving anchor text
|
||||
expect(html).not.toContain('href="file:');
|
||||
expect(html).toContain("click");
|
||||
});
|
||||
});
|
||||
|
||||
describe("ReDoS protection", () => {
|
||||
it("does not throw on deeply nested emphasis markers (#36213)", () => {
|
||||
const nested = "*".repeat(500) + "text" + "*".repeat(500);
|
||||
expect(() => toSanitizedMarkdownHtml(nested)).not.toThrow();
|
||||
const html = toSanitizedMarkdownHtml(nested);
|
||||
expect(html).toContain("text");
|
||||
});
|
||||
|
||||
it("does not throw on deeply nested brackets (#36213)", () => {
|
||||
const nested = "[".repeat(200) + "link" + "]".repeat(200) + "(" + "x".repeat(200) + ")";
|
||||
expect(() => toSanitizedMarkdownHtml(nested)).not.toThrow();
|
||||
});
|
||||
|
||||
it("does not hang on backtick + bracket ReDoS pattern", { timeout: 2_000 }, () => {
|
||||
const HEADER =
|
||||
'{"type":"message","id":"aaa","parentId":"bbb",' +
|
||||
'"timestamp":"2000-01-01T00:00:00.000Z","message":' +
|
||||
'{"role":"toolResult","toolCallId":"call_000",' +
|
||||
'"toolName":"read","content":[{"type":"text","text":' +
|
||||
'"{\\"type\\":\\"message\\",\\"id\\":\\"ccc\\",' +
|
||||
'\\"timestamp\\":\\"2000-01-01T00:00:00.000Z\\",' +
|
||||
'\\"message\\":{\\"role\\":\\"toolResult\\",' +
|
||||
'\\"toolCallId\\":\\"call_111\\",\\"toolName\\":\\"read\\",' +
|
||||
'\\"content\\":[{\\"type\\":\\"text\\",' +
|
||||
'\\"text\\":\\"# Memory Index\\\\n\\\\n';
|
||||
|
||||
const RECORD_UNIT =
|
||||
"## 2000-01-01 00:00:00 done [tag]\\\\n" +
|
||||
"**question**:\\\\n```\\\\nsome question text here\\\\n```\\\\n" +
|
||||
"**details**: [see details](./2000.01.01/00000000/INFO.md)\\\\n\\\\n";
|
||||
|
||||
const poison = HEADER + RECORD_UNIT.repeat(9);
|
||||
|
||||
const start = performance.now();
|
||||
const html = toSanitizedMarkdownHtml(poison);
|
||||
const elapsed = performance.now() - start;
|
||||
|
||||
expect(elapsed).toBeLessThan(500);
|
||||
expect(html.length).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("large text handling", () => {
|
||||
it("uses plain text fallback for oversized content", () => {
|
||||
// MARKDOWN_PARSE_LIMIT is 40_000 chars
|
||||
const input = Array.from(
|
||||
{ length: 320 },
|
||||
(_, i) => `Paragraph ${i + 1}: ${"Long plain-text reply. ".repeat(8)}`,
|
||||
).join("\n\n");
|
||||
const html = toSanitizedMarkdownHtml(input);
|
||||
expect(html).toContain('class="markdown-plain-text-fallback"');
|
||||
});
|
||||
|
||||
it("preserves indentation in plain text fallback", () => {
|
||||
const input = `${"Header line\n".repeat(5000)}\n indented log line\n deeper indent`;
|
||||
const html = toSanitizedMarkdownHtml(input);
|
||||
expect(html).toContain('class="markdown-plain-text-fallback"');
|
||||
expect(html).toContain(" indented log line");
|
||||
expect(html).toContain(" deeper indent");
|
||||
});
|
||||
|
||||
it("caches oversized fallback results", () => {
|
||||
const input = Array.from({ length: 240 }, (_, i) => `P${i}`).join("\n\n") + "x".repeat(35000);
|
||||
const first = toSanitizedMarkdownHtml(input);
|
||||
const second = toSanitizedMarkdownHtml(input);
|
||||
expect(second).toBe(first);
|
||||
});
|
||||
|
||||
it("falls back to escaped text if md.render throws (#36213)", () => {
|
||||
const renderSpy = vi.spyOn(md, "render").mockImplementation(() => {
|
||||
throw new Error("forced failure");
|
||||
});
|
||||
const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {});
|
||||
try {
|
||||
const html = toSanitizedMarkdownHtml("test");
|
||||
expect(html).toContain('<pre class="code-block">');
|
||||
expect(warnSpy).toHaveBeenCalledOnce();
|
||||
} finally {
|
||||
renderSpy.mockRestore();
|
||||
warnSpy.mockRestore();
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
import DOMPurify from "dompurify";
|
||||
import { marked } from "marked";
|
||||
import MarkdownIt from "markdown-it";
|
||||
import markdownItTaskLists from "markdown-it-task-lists";
|
||||
import { truncateText } from "./format.ts";
|
||||
|
||||
const allowedTags = [
|
||||
@ -19,10 +20,12 @@ const allowedTags = [
|
||||
"h4",
|
||||
"hr",
|
||||
"i",
|
||||
"input",
|
||||
"li",
|
||||
"ol",
|
||||
"p",
|
||||
"pre",
|
||||
"s",
|
||||
"span",
|
||||
"strong",
|
||||
"summary",
|
||||
@ -37,7 +40,9 @@ const allowedTags = [
|
||||
];
|
||||
|
||||
const allowedAttrs = [
|
||||
"checked",
|
||||
"class",
|
||||
"disabled",
|
||||
"href",
|
||||
"rel",
|
||||
"target",
|
||||
@ -107,6 +112,256 @@ function installHooks() {
|
||||
});
|
||||
}
|
||||
|
||||
// ── markdown-it instance with custom renderers ──
|
||||
|
||||
function escapeHtml(value: string): string {
|
||||
return value
|
||||
.replace(/&/g, "&")
|
||||
.replace(/</g, "<")
|
||||
.replace(/>/g, ">")
|
||||
.replace(/"/g, """)
|
||||
.replace(/'/g, "'");
|
||||
}
|
||||
|
||||
function normalizeMarkdownImageLabel(text?: string | null): string {
|
||||
const trimmed = text?.trim();
|
||||
return trimmed ? trimmed : "image";
|
||||
}
|
||||
|
||||
export const md = new MarkdownIt({
|
||||
html: true, // Enable HTML recognition so html_block/html_inline overrides can escape it
|
||||
breaks: true,
|
||||
linkify: true,
|
||||
});
|
||||
|
||||
// Enable GFM strikethrough (~~text~~) to match original marked.js behavior.
|
||||
// markdown-it uses <s> tags; we added "s" to allowedTags for DOMPurify.
|
||||
md.enable("strikethrough");
|
||||
|
||||
// Disable fuzzy link detection to prevent bare filenames like "README.md"
|
||||
// from being auto-linked as "http://README.md". URLs with explicit protocol
|
||||
// (https://...) and emails are still linkified.
|
||||
//
|
||||
// Alternative considered: extensions/matrix/src/matrix/format.ts uses fuzzyLink
|
||||
// with a file-extension blocklist to filter false positives at render time.
|
||||
// We chose the www-only approach instead because:
|
||||
// 1. Matches original marked.js GFM behavior exactly (bare domains were never linked)
|
||||
// 2. No blocklist to maintain — new TLDs like .ai, .io, .dev would need constant updates
|
||||
// 3. Predictable behavior — users can always use explicit https:// for any URL
|
||||
md.linkify.set({ fuzzyLink: false });
|
||||
|
||||
// Re-enable www. prefix detection per GFM spec: bare URLs without protocol
|
||||
// must start with "www." to be auto-linked. This avoids false positives on
|
||||
// filenames while preserving expected behavior for "www.example.com".
|
||||
// GFM spec: valid domain = alphanumeric/underscore/hyphen segments separated
|
||||
// by periods, at least one period, no underscores in last two segments.
|
||||
md.linkify.add("www", {
|
||||
validate(text, pos) {
|
||||
const tail = text.slice(pos);
|
||||
// Match: . followed by valid domain (single-label like localhost is allowed),
|
||||
// optional port, plus optional path/query/fragment.
|
||||
// marked.js GFM linked www.localhost, www.internal etc. — preserve that behavior.
|
||||
// We allow underscores everywhere for simplicity — real TLDs don't have underscores.
|
||||
// Stop at < per GFM spec — this is an improvement over marked.js which would encode
|
||||
// www.example.com/<token> as http://www.example.com/%3Ctoken%3E (broken URL with
|
||||
// encoded angle brackets). We stop before < so the placeholder stays visible as text.
|
||||
const match = tail.match(
|
||||
/^\.[a-z0-9_](?:[a-z0-9_-]*[a-z0-9_])?(?:\.[a-z0-9_](?:[a-z0-9_-]*[a-z0-9_])?)*(?::\d+)?(?:[/?#][^\s<]*)?/i,
|
||||
);
|
||||
if (!match) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Strip trailing punctuation per GFM extended autolink spec.
|
||||
// GFM says: ?, !, ., ,, :, *, _, ~ are not part of the autolink if trailing.
|
||||
let len = match[0].length;
|
||||
|
||||
// Pre-count parentheses once to avoid O(n²) rescans on malformed URLs with
|
||||
// many trailing ')' characters (e.g., ")".repeat(8000)).
|
||||
let parenBalance = 0;
|
||||
for (let i = 0; i < len; i++) {
|
||||
const c = tail[i];
|
||||
if (c === "(") {
|
||||
parenBalance++;
|
||||
} else if (c === ")") {
|
||||
parenBalance--;
|
||||
}
|
||||
}
|
||||
|
||||
while (len > 0) {
|
||||
const ch = tail[len - 1];
|
||||
if (/[?!.,;:*_~]/.test(ch)) {
|
||||
len--;
|
||||
continue;
|
||||
}
|
||||
// Handle trailing ) — only strip if unbalanced (GFM parentheses rule).
|
||||
if (ch === ")") {
|
||||
if (parenBalance < 0) {
|
||||
parenBalance++;
|
||||
len--;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
return len;
|
||||
},
|
||||
normalize(match) {
|
||||
match.url = "http://" + match.url;
|
||||
},
|
||||
});
|
||||
|
||||
// Override default link validator to allow all base64 data URIs for images.
|
||||
// The default only allows gif/png/jpeg/webp; we support any image/* subtype
|
||||
// (e.g., avif, jpg, svg+xml) to match the previous marked.js behavior.
|
||||
//
|
||||
// Security for blocked schemes (javascript:, vbscript:, file:) is handled at
|
||||
// render time, not validation time. This ensures markdown-it generates tokens
|
||||
// so our custom renderers can process them:
|
||||
// - Images: renderer.rules.image shows alt text for non-data: URLs
|
||||
// - Links: DOMPurify strips dangerous href schemes (javascript:, vbscript:, etc.)
|
||||
// Blocking at validateLink would skip token generation entirely, causing raw
|
||||
// markdown source to appear instead of graceful fallbacks.
|
||||
md.validateLink = (url: string) => {
|
||||
// Allow all data: image URIs (any image/* subtype).
|
||||
if (INLINE_DATA_IMAGE_RE.test(url)) {
|
||||
return true;
|
||||
}
|
||||
// Block non-image data: URIs (e.g., data:text/html) at validation time
|
||||
// since these have no graceful fallback and are potential XSS vectors.
|
||||
const str = url.trim().toLowerCase();
|
||||
if (str.startsWith("data:") && !str.startsWith("data:image/")) {
|
||||
return false;
|
||||
}
|
||||
// Allow all other URLs — security enforced by renderers and DOMPurify,
|
||||
// consistent with original marked.js behavior. Dangerous schemes like
|
||||
// javascript:, vbscript:, and file: are stripped by DOMPurify's default
|
||||
// URI sanitization, leaving safe anchor text without a clickable href.
|
||||
return true;
|
||||
};
|
||||
|
||||
// Enable GFM task list checkboxes (- [x] / - [ ]).
|
||||
// enabled: false keeps checkboxes read-only (disabled="") — task lists in
|
||||
// chat messages are display-only, not interactive forms.
|
||||
// label: false avoids wrapping item text in <label>, which would break
|
||||
// accessibility when the item contains links (MDN warns against anchors inside labels).
|
||||
md.use(markdownItTaskLists, { enabled: false, label: false });
|
||||
|
||||
// Mark the <input> html_inline token inside task-list items as trusted so the
|
||||
// html_inline override lets it through. With label: false, the plugin generates
|
||||
// only a single <input ...> token per item.
|
||||
// We identify task-list items by the class="task-list-item" the plugin sets.
|
||||
md.core.ruler.after("github-task-lists", "task-list-allowlist", (state) => {
|
||||
const tokens = state.tokens;
|
||||
for (let i = 2; i < tokens.length; i++) {
|
||||
if (tokens[i].type !== "inline" || !tokens[i].children) {
|
||||
continue;
|
||||
}
|
||||
if (tokens[i - 1].type !== "paragraph_open") {
|
||||
continue;
|
||||
}
|
||||
if (tokens[i - 2].type !== "list_item_open") {
|
||||
continue;
|
||||
}
|
||||
const listItem = tokens[i - 2];
|
||||
const cls = listItem.attrGet("class") ?? "";
|
||||
if (!cls.includes("task-list-item")) {
|
||||
continue;
|
||||
}
|
||||
// Only trust the checkbox <input> token from the plugin, not other user-supplied HTML.
|
||||
// The plugin inserts an <input> at the start; user HTML elsewhere must stay escaped.
|
||||
for (const child of tokens[i].children!) {
|
||||
if (child.type === "html_inline" && /^<input\s/i.test(child.content)) {
|
||||
child.meta = { taskListPlugin: true };
|
||||
break; // Only one checkbox per item
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Override html_block and html_inline to escape raw HTML (#13937).
|
||||
// Exception: html_inline tokens marked by a trusted plugin (meta.taskListPlugin)
|
||||
// are allowed through — they are generated by our own plugin pipeline, not user input,
|
||||
// and DOMPurify provides the final safety net regardless.
|
||||
md.renderer.rules.html_block = (tokens, idx) => {
|
||||
return escapeHtml(tokens[idx].content) + "\n";
|
||||
};
|
||||
md.renderer.rules.html_inline = (tokens, idx) => {
|
||||
const token = tokens[idx];
|
||||
if (token.meta?.taskListPlugin === true) {
|
||||
return token.content;
|
||||
}
|
||||
return escapeHtml(token.content);
|
||||
};
|
||||
|
||||
// Override image to only allow base64 data URIs (#15437)
|
||||
md.renderer.rules.image = (tokens, idx) => {
|
||||
const token = tokens[idx];
|
||||
const src = token.attrGet("src")?.trim() ?? "";
|
||||
// Use token.content which preserves raw markdown formatting (e.g. **bold**)
|
||||
// to match original marked.js behavior.
|
||||
const alt = normalizeMarkdownImageLabel(token.content);
|
||||
if (!INLINE_DATA_IMAGE_RE.test(src)) {
|
||||
return escapeHtml(alt);
|
||||
}
|
||||
return `<img class="markdown-inline-image" src="${escapeHtml(src)}" alt="${escapeHtml(alt)}">`;
|
||||
};
|
||||
|
||||
// Override fenced code blocks with copy button + JSON collapse
|
||||
md.renderer.rules.fence = (tokens, idx) => {
|
||||
const token = tokens[idx];
|
||||
// token.info contains the full fence info string (e.g., "json title=foo");
|
||||
// extract only the first whitespace-separated token as the language.
|
||||
const lang = token.info.trim().split(/\s+/)[0] || "";
|
||||
const text = token.content;
|
||||
const langClass = lang ? ` class="language-${escapeHtml(lang)}"` : "";
|
||||
const safeText = escapeHtml(text);
|
||||
const codeBlock = `<pre><code${langClass}>${safeText}</code></pre>`;
|
||||
const langLabel = lang ? `<span class="code-block-lang">${escapeHtml(lang)}</span>` : "";
|
||||
const attrSafe = escapeHtml(text);
|
||||
const copyBtn = `<button type="button" class="code-block-copy" data-code="${attrSafe}" aria-label="Copy code"><span class="code-block-copy__idle">Copy</span><span class="code-block-copy__done">Copied!</span></button>`;
|
||||
const header = `<div class="code-block-header">${langLabel}${copyBtn}</div>`;
|
||||
|
||||
const trimmed = text.trim();
|
||||
const isJson =
|
||||
lang === "json" ||
|
||||
(!lang &&
|
||||
((trimmed.startsWith("{") && trimmed.endsWith("}")) ||
|
||||
(trimmed.startsWith("[") && trimmed.endsWith("]"))));
|
||||
|
||||
if (isJson) {
|
||||
const lineCount = text.split("\n").length;
|
||||
const label = lineCount > 1 ? `JSON · ${lineCount} lines` : "JSON";
|
||||
return `<details class="json-collapse"><summary>${label}</summary><div class="code-block-wrapper">${header}${codeBlock}</div></details>`;
|
||||
}
|
||||
|
||||
return `<div class="code-block-wrapper">${header}${codeBlock}</div>`;
|
||||
};
|
||||
|
||||
// Override indented code blocks (code_block) with the same treatment as fence
|
||||
md.renderer.rules.code_block = (tokens, idx) => {
|
||||
const token = tokens[idx];
|
||||
const text = token.content;
|
||||
const safeText = escapeHtml(text);
|
||||
const codeBlock = `<pre><code>${safeText}</code></pre>`;
|
||||
const attrSafe = escapeHtml(text);
|
||||
const copyBtn = `<button type="button" class="code-block-copy" data-code="${attrSafe}" aria-label="Copy code"><span class="code-block-copy__idle">Copy</span><span class="code-block-copy__done">Copied!</span></button>`;
|
||||
const header = `<div class="code-block-header">${copyBtn}</div>`;
|
||||
|
||||
const trimmed = text.trim();
|
||||
const isJson =
|
||||
(trimmed.startsWith("{") && trimmed.endsWith("}")) ||
|
||||
(trimmed.startsWith("[") && trimmed.endsWith("]"));
|
||||
|
||||
if (isJson) {
|
||||
const lineCount = text.split("\n").length;
|
||||
const label = lineCount > 1 ? `JSON · ${lineCount} lines` : "JSON";
|
||||
return `<details class="json-collapse"><summary>${label}</summary><div class="code-block-wrapper">${header}${codeBlock}</div></details>`;
|
||||
}
|
||||
|
||||
return `<div class="code-block-wrapper">${header}${codeBlock}</div>`;
|
||||
};
|
||||
|
||||
export function toSanitizedMarkdownHtml(markdown: string): string {
|
||||
const input = markdown.trim();
|
||||
if (!input) {
|
||||
@ -136,15 +391,10 @@ export function toSanitizedMarkdownHtml(markdown: string): string {
|
||||
}
|
||||
let rendered: string;
|
||||
try {
|
||||
rendered = marked.parse(`${truncated.text}${suffix}`, {
|
||||
renderer: htmlEscapeRenderer,
|
||||
gfm: true,
|
||||
breaks: true,
|
||||
}) as string;
|
||||
rendered = md.render(`${truncated.text}${suffix}`);
|
||||
} catch (err) {
|
||||
// Fall back to escaped plain text when marked.parse() throws (e.g.
|
||||
// infinite recursion on pathological markdown patterns — #36213).
|
||||
console.warn("[markdown] marked.parse failed, falling back to plain text:", err);
|
||||
// Fall back to escaped plain text when md.render() throws (#36213).
|
||||
console.warn("[markdown] md.render failed, falling back to plain text:", err);
|
||||
const escaped = escapeHtml(`${truncated.text}${suffix}`);
|
||||
rendered = `<pre class="code-block">${escaped}</pre>`;
|
||||
}
|
||||
@ -155,72 +405,6 @@ export function toSanitizedMarkdownHtml(markdown: string): string {
|
||||
return sanitized;
|
||||
}
|
||||
|
||||
// Prevent raw HTML in chat messages from being rendered as formatted HTML.
|
||||
// Display it as escaped text so users see the literal markup.
|
||||
// Security is handled by DOMPurify, but rendering pasted HTML (e.g. error
|
||||
// pages) as formatted output is confusing UX (#13937).
|
||||
const htmlEscapeRenderer = new marked.Renderer();
|
||||
htmlEscapeRenderer.html = ({ text }: { text: string }) => escapeHtml(text);
|
||||
htmlEscapeRenderer.image = (token: { href?: string | null; text?: string | null }) => {
|
||||
const label = normalizeMarkdownImageLabel(token.text);
|
||||
const href = token.href?.trim() ?? "";
|
||||
if (!INLINE_DATA_IMAGE_RE.test(href)) {
|
||||
return escapeHtml(label);
|
||||
}
|
||||
return `<img class="markdown-inline-image" src="${escapeHtml(href)}" alt="${escapeHtml(label)}">`;
|
||||
};
|
||||
|
||||
function normalizeMarkdownImageLabel(text?: string | null): string {
|
||||
const trimmed = text?.trim();
|
||||
return trimmed ? trimmed : "image";
|
||||
}
|
||||
|
||||
htmlEscapeRenderer.code = ({
|
||||
text,
|
||||
lang,
|
||||
escaped,
|
||||
}: {
|
||||
text: string;
|
||||
lang?: string;
|
||||
escaped?: boolean;
|
||||
}) => {
|
||||
const langClass = lang ? ` class="language-${escapeHtml(lang)}"` : "";
|
||||
const safeText = escaped ? text : escapeHtml(text);
|
||||
const codeBlock = `<pre><code${langClass}>${safeText}</code></pre>`;
|
||||
const langLabel = lang ? `<span class="code-block-lang">${escapeHtml(lang)}</span>` : "";
|
||||
const attrSafe = text
|
||||
.replace(/&/g, "&")
|
||||
.replace(/"/g, """)
|
||||
.replace(/</g, "<")
|
||||
.replace(/>/g, ">");
|
||||
const copyBtn = `<button type="button" class="code-block-copy" data-code="${attrSafe}" aria-label="Copy code"><span class="code-block-copy__idle">Copy</span><span class="code-block-copy__done">Copied!</span></button>`;
|
||||
const header = `<div class="code-block-header">${langLabel}${copyBtn}</div>`;
|
||||
|
||||
const trimmed = text.trim();
|
||||
const isJson =
|
||||
lang === "json" ||
|
||||
(!lang &&
|
||||
((trimmed.startsWith("{") && trimmed.endsWith("}")) ||
|
||||
(trimmed.startsWith("[") && trimmed.endsWith("]"))));
|
||||
|
||||
if (isJson) {
|
||||
const lineCount = text.split("\n").length;
|
||||
const label = lineCount > 1 ? `JSON · ${lineCount} lines` : "JSON";
|
||||
return `<details class="json-collapse"><summary>${label}</summary><div class="code-block-wrapper">${header}${codeBlock}</div></details>`;
|
||||
}
|
||||
|
||||
return `<div class="code-block-wrapper">${header}${codeBlock}</div>`;
|
||||
};
|
||||
|
||||
function escapeHtml(value: string): string {
|
||||
return value
|
||||
.replace(/&/g, "&")
|
||||
.replace(/</g, "<")
|
||||
.replace(/>/g, ">")
|
||||
.replace(/"/g, """)
|
||||
.replace(/'/g, "'");
|
||||
}
|
||||
|
||||
function renderEscapedPlainTextHtml(value: string): string {
|
||||
return `<div class="markdown-plain-text-fallback">${escapeHtml(value.replace(/\r\n?/g, "\n"))}</div>`;
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user