Merge 6aeedeb401a73795f6a487d776e70d3667be0e56 into 8a05c05596ca9ba0735dafd8e359885de4c2c969

This commit is contained in:
zhangfnf 2026-03-21 06:04:26 +00:00 committed by GitHub
commit e74749b7a0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 672 additions and 92 deletions

24
pnpm-lock.yaml generated
View File

@ -650,10 +650,16 @@ importers:
lit:
specifier: ^3.3.2
version: 3.3.2
marked:
specifier: ^17.0.4
version: 17.0.4
markdown-it:
specifier: ^14.1.1
version: 14.1.1
markdown-it-task-lists:
specifier: ^2.1.1
version: 2.1.1
devDependencies:
'@types/markdown-it':
specifier: ^14.1.2
version: 14.1.2
'@vitest/browser-playwright':
specifier: 4.1.0
version: 4.1.0(playwright@1.58.2)(vite@8.0.0(@types/node@25.5.0)(esbuild@0.27.3)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2))(vitest@4.1.0)
@ -5017,6 +5023,9 @@ packages:
resolution: {integrity: sha512-hXdUTZYIVOt1Ex//jAQi+wTZZpUpwBj/0QsOzqegb3rGMMeJiSEu5xLHnYfBrRV4RH2+OCSOO95Is/7x1WJ4bw==}
engines: {node: '>=10'}
markdown-it-task-lists@2.1.1:
resolution: {integrity: sha512-TxFAc76Jnhb2OUu+n3yz9RMu4CwGfaT788br6HhEDlvWfdeJcLUsxk1Hgw2yJio0OXsxv7pyIPmvECY7bMbluA==}
markdown-it@14.1.0:
resolution: {integrity: sha512-a54IwgWPaeBCAAsv13YgmALOF1elABB08FxO9i+r4VFk5Vl4pKokRPeX8u5TCgSsPi6ec1otfLjdOpVcgbpshg==}
hasBin: true
@ -5033,11 +5042,6 @@ packages:
engines: {node: '>= 18'}
hasBin: true
marked@17.0.4:
resolution: {integrity: sha512-NOmVMM+KAokHMvjWmC5N/ZOvgmSWuqJB8FoYI019j4ogb/PeRMKoKIjReZ2w3376kkA8dSJIP8uD993Kxc0iRQ==}
engines: {node: '>= 20'}
hasBin: true
math-intrinsics@1.1.0:
resolution: {integrity: sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==}
engines: {node: '>= 0.4'}
@ -12040,6 +12044,8 @@ snapshots:
dependencies:
semver: 7.7.4
markdown-it-task-lists@2.1.1: {}
markdown-it@14.1.0:
dependencies:
argparse: 2.0.1
@ -12064,8 +12070,6 @@ snapshots:
marked@15.0.12: {}
marked@17.0.4: {}
math-intrinsics@1.1.0: {}
matrix-events-sdk@0.0.1: {}

View File

@ -12,9 +12,11 @@
"@noble/ed25519": "3.0.1",
"dompurify": "^3.3.3",
"lit": "^3.3.2",
"marked": "^17.0.4"
"markdown-it": "^14.1.1",
"markdown-it-task-lists": "^2.1.1"
},
"devDependencies": {
"@types/markdown-it": "^14.1.2",
"@vitest/browser-playwright": "4.1.0",
"jsdom": "^29.0.0",
"playwright": "^1.58.2",

10
ui/src/markdown-it-task-lists.d.ts vendored Normal file
View File

@ -0,0 +1,10 @@
declare module "markdown-it-task-lists" {
import type MarkdownIt from "markdown-it";
interface TaskListsOptions {
enabled?: boolean;
label?: boolean;
labelAfter?: boolean;
}
const plugin: (md: MarkdownIt, options?: TaskListsOptions) => void;
export default plugin;
}

View File

@ -41,6 +41,20 @@
margin-top: 0.25em;
}
/* Hide default marker only for unordered task lists; ordered lists keep numbers */
.chat-text :where(ul > .task-list-item),
.sidebar-markdown :where(ul > .task-list-item),
.chat-thinking :where(ul > .task-list-item) {
list-style: none;
}
.chat-text :where(.task-list-item-checkbox),
.sidebar-markdown :where(.task-list-item-checkbox),
.chat-thinking :where(.task-list-item-checkbox) {
margin-right: 0.4em;
vertical-align: middle;
}
.chat-text :where(a) {
color: var(--accent);
text-decoration: underline;

View File

@ -1,8 +1,8 @@
import { marked } from "marked";
import { describe, expect, it, vi } from "vitest";
import { toSanitizedMarkdownHtml } from "./markdown.ts";
import { md, toSanitizedMarkdownHtml } from "./markdown.ts";
describe("toSanitizedMarkdownHtml", () => {
// ── Original tests from before markdown-it migration ──
it("renders basic markdown", () => {
const html = toSanitizedMarkdownHtml("Hello **world**");
expect(html).toContain("<strong>world</strong>");
@ -146,9 +146,9 @@ describe("toSanitizedMarkdownHtml", () => {
expect(second).toBe(first);
});
it("falls back to escaped plain text if marked.parse throws (#36213)", () => {
const parseSpy = vi.spyOn(marked, "parse").mockImplementation(() => {
throw new Error("forced parse failure");
it("falls back to escaped plain text if md.render throws (#36213)", () => {
const renderSpy = vi.spyOn(md, "render").mockImplementation(() => {
throw new Error("forced render failure");
});
const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {});
const input = `Fallback **probe** ${Date.now()}`;
@ -158,8 +158,374 @@ describe("toSanitizedMarkdownHtml", () => {
expect(html).toContain("Fallback **probe**");
expect(warnSpy).toHaveBeenCalledOnce();
} finally {
parseSpy.mockRestore();
renderSpy.mockRestore();
warnSpy.mockRestore();
}
});
// ── Additional tests for markdown-it migration ──
describe("www autolinks", () => {
it("links www.example.com", () => {
const html = toSanitizedMarkdownHtml("Visit www.example.com today");
expect(html).toContain('<a href="http://www.example.com"');
expect(html).toContain("www.example.com</a>");
});
it("links www.example.com with path, query, and fragment", () => {
const html = toSanitizedMarkdownHtml("See www.example.com/path?a=1#section");
expect(html).toContain('<a href="http://www.example.com/path?a=1#section"');
});
it("links www.example.com with port", () => {
const html = toSanitizedMarkdownHtml("Visit www.example.com:8080/foo");
expect(html).toContain('<a href="http://www.example.com:8080/foo"');
});
it("links www.localhost and other single-label hosts", () => {
const html = toSanitizedMarkdownHtml("Visit www.localhost:3000/path for dev");
expect(html).toContain('<a href="http://www.localhost:3000/path"');
});
it("links www.foo_bar.example.com with underscores", () => {
const html = toSanitizedMarkdownHtml("Visit www.foo_bar.example.com");
expect(html).toContain('<a href="http://www.foo_bar.example.com"');
});
it("strips trailing punctuation from links", () => {
const html1 = toSanitizedMarkdownHtml("Check www.example.com/help.");
expect(html1).toContain('href="http://www.example.com/help"');
expect(html1).not.toContain('href="http://www.example.com/help."');
const html2 = toSanitizedMarkdownHtml("See www.example.com!");
expect(html2).toContain('href="http://www.example.com"');
expect(html2).not.toContain('href="http://www.example.com!"');
});
it("handles balanced parentheses in URLs", () => {
const html = toSanitizedMarkdownHtml("(see www.example.com/foo(bar))");
expect(html).toContain('href="http://www.example.com/foo(bar)"');
});
it("stops at < character per GFM spec", () => {
const html = toSanitizedMarkdownHtml("Visit www.example.com/<token> here");
expect(html).toContain('href="http://www.example.com/"');
expect(html).not.toContain("<token>");
expect(html).toContain("&lt;token&gt;");
});
it("does NOT link bare domains without www", () => {
const html = toSanitizedMarkdownHtml("Visit google.com today");
expect(html).not.toContain("<a");
expect(html).toContain("google.com");
});
it("does NOT link filenames with TLD-like extensions", () => {
const html = toSanitizedMarkdownHtml("Check README.md and config.json");
expect(html).not.toContain("<a");
expect(html).toContain("README.md");
});
it("does NOT link IP addresses", () => {
const html = toSanitizedMarkdownHtml("Check 127.0.0.1:8080");
expect(html).not.toContain("<a");
expect(html).toContain("127.0.0.1:8080");
});
});
describe("explicit protocol links", () => {
it("links https:// URLs", () => {
const html = toSanitizedMarkdownHtml("Visit https://example.com");
expect(html).toContain('<a href="https://example.com"');
});
it("links http:// URLs", () => {
const html = toSanitizedMarkdownHtml("Visit http://github.com/openclaw");
expect(html).toContain('<a href="http://github.com/openclaw"');
});
it("links email addresses", () => {
const html = toSanitizedMarkdownHtml("Email me at test@example.com");
expect(html).toContain('<a href="mailto:test@example.com"');
});
});
describe("HTML escaping", () => {
it("escapes HTML tags as text", () => {
const html = toSanitizedMarkdownHtml("<div>**bold**</div>");
expect(html).toContain("&lt;div&gt;");
expect(html).not.toContain("<div>");
// Inner markdown should NOT be rendered since it's inside escaped HTML
expect(html).toContain("**bold**");
});
it("strips script tags", () => {
const html = toSanitizedMarkdownHtml("<script>alert(1)</script>");
expect(html).not.toContain("<script");
expect(html).toContain("&lt;script&gt;");
});
it("escapes inline HTML tags", () => {
const html = toSanitizedMarkdownHtml("Check <b>this</b> out");
expect(html).toContain("&lt;b&gt;");
expect(html).not.toContain("<b>");
});
});
describe("task lists", () => {
it("renders task list checkboxes", () => {
const html = toSanitizedMarkdownHtml("- [ ] Unchecked\n- [x] Checked");
expect(html).toContain("<input");
expect(html).toContain('type="checkbox"');
expect(html).toContain("disabled");
expect(html).toContain("Unchecked");
expect(html).toContain("Checked");
});
it("renders links inside task items", () => {
const html = toSanitizedMarkdownHtml("- [ ] Task with [link](https://example.com)");
expect(html).toContain('<a href="https://example.com"');
});
it("escapes HTML injection in task items", () => {
const html = toSanitizedMarkdownHtml("- [ ] <script>alert(1)</script>");
expect(html).not.toContain("<script");
expect(html).toContain("&lt;script&gt;");
});
it("escapes details/summary injection in task items", () => {
const html = toSanitizedMarkdownHtml("- [ ] <details><summary>x</summary>y</details>");
expect(html).toContain("&lt;details&gt;");
expect(html).not.toContain("<details>");
});
});
describe("images", () => {
it("flattens remote images to alt text", () => {
const html = toSanitizedMarkdownHtml("![Alt text](https://example.com/img.png)");
expect(html).not.toContain("<img");
expect(html).toContain("Alt text");
});
it("preserves markdown formatting in alt text", () => {
const html = toSanitizedMarkdownHtml("![**Build log**](https://example.com/img.png)");
expect(html).toContain("**Build log**");
});
it("preserves code formatting in alt text", () => {
const html = toSanitizedMarkdownHtml("![`error.log`](https://example.com/img.png)");
expect(html).toContain("`error.log`");
});
it("preserves base64 data URI images (#15437)", () => {
const html = toSanitizedMarkdownHtml("![Chart](data:image/png;base64,iVBORw0KGgo=)");
expect(html).toContain("<img");
expect(html).toContain('class="markdown-inline-image"');
expect(html).toContain("data:image/png;base64,");
});
it("uses fallback label for unlabeled images", () => {
const html = toSanitizedMarkdownHtml("![](https://example.com/image.png)");
expect(html).not.toContain("<img");
expect(html).toContain("image");
});
});
describe("code blocks", () => {
it("renders fenced code blocks", () => {
const html = toSanitizedMarkdownHtml("```ts\nconsole.log(1)\n```");
expect(html).toContain("<pre>");
expect(html).toContain("<code");
expect(html).toContain("console.log(1)");
});
it("renders indented code blocks", () => {
// markdown-it requires a blank line before indented code
const html = toSanitizedMarkdownHtml("text\n\n indented code");
expect(html).toContain("<pre>");
expect(html).toContain("<code>");
});
it("includes copy button", () => {
const html = toSanitizedMarkdownHtml("```\ncode\n```");
expect(html).toContain('class="code-block-copy"');
expect(html).toContain("data-code=");
});
it("collapses JSON code blocks", () => {
const html = toSanitizedMarkdownHtml('```json\n{"key": "value"}\n```');
expect(html).toContain("<details");
expect(html).toContain("json-collapse");
expect(html).toContain("JSON");
});
});
describe("GFM features", () => {
it("renders strikethrough", () => {
const html = toSanitizedMarkdownHtml("This is ~~deleted~~ text");
expect(html).toContain("<s>deleted</s>");
});
it("renders tables", () => {
const md = "| A | B |\n|---|---|\n| 1 | 2 |";
const html = toSanitizedMarkdownHtml(md);
expect(html).toContain("<table");
expect(html).toContain("<th>");
});
it("renders basic markdown", () => {
const html = toSanitizedMarkdownHtml("**bold** and *italic*");
expect(html).toContain("<strong>bold</strong>");
expect(html).toContain("<em>italic</em>");
});
it("renders headings", () => {
const html = toSanitizedMarkdownHtml("# Heading 1\n## Heading 2");
expect(html).toContain("<h1>");
expect(html).toContain("<h2>");
});
it("renders blockquotes", () => {
const html = toSanitizedMarkdownHtml("> quote");
expect(html).toContain("<blockquote>");
});
it("renders lists", () => {
const html = toSanitizedMarkdownHtml("- item 1\n- item 2");
expect(html).toContain("<ul>");
expect(html).toContain("<li>");
});
});
describe("security", () => {
it("blocks javascript: in links via DOMPurify", () => {
const html = toSanitizedMarkdownHtml("[click me](javascript:alert(1))");
// DOMPurify strips dangerous href schemes but keeps the anchor text
expect(html).not.toContain('href="javascript:');
expect(html).toContain("click me");
});
it("shows alt text for javascript: images", () => {
const html = toSanitizedMarkdownHtml("![Build log](javascript:alert(1))");
expect(html).not.toContain("<img");
expect(html).not.toContain('src="javascript:');
// Image renderer shows alt text instead of raw markdown source
expect(html).toContain("Build log");
expect(html).not.toContain("![Build log]");
});
it("shows alt text for vbscript: and file: images", () => {
const html1 = toSanitizedMarkdownHtml("![Alt1](vbscript:msgbox(1))");
expect(html1).toContain("Alt1");
expect(html1).not.toContain("<img");
const html2 = toSanitizedMarkdownHtml("![Alt2](file:///etc/passwd)");
expect(html2).toContain("Alt2");
expect(html2).not.toContain("<img");
});
it("blocks non-image data: URIs", () => {
const html = toSanitizedMarkdownHtml("[x](data:text/html,<script>alert(1)</script>)");
// validateLink blocks non-image data: URIs, so raw source appears
expect(html).not.toContain("<a");
expect(html).toContain("data:text/html");
});
it("does not auto-link bare file:// URIs", () => {
const html = toSanitizedMarkdownHtml("Check file:///etc/passwd");
// Bare file:// without www. or http:// should NOT be auto-linked
expect(html).not.toContain("<a");
expect(html).toContain("file:///etc/passwd");
});
it("strips href from explicit file:// links via DOMPurify", () => {
const html = toSanitizedMarkdownHtml("[click](file:///etc/passwd)");
// DOMPurify strips file: scheme, leaving anchor text
expect(html).not.toContain('href="file:');
expect(html).toContain("click");
});
});
describe("ReDoS protection", () => {
it("does not throw on deeply nested emphasis markers (#36213)", () => {
const nested = "*".repeat(500) + "text" + "*".repeat(500);
expect(() => toSanitizedMarkdownHtml(nested)).not.toThrow();
const html = toSanitizedMarkdownHtml(nested);
expect(html).toContain("text");
});
it("does not throw on deeply nested brackets (#36213)", () => {
const nested = "[".repeat(200) + "link" + "]".repeat(200) + "(" + "x".repeat(200) + ")";
expect(() => toSanitizedMarkdownHtml(nested)).not.toThrow();
});
it("does not hang on backtick + bracket ReDoS pattern", { timeout: 2_000 }, () => {
const HEADER =
'{"type":"message","id":"aaa","parentId":"bbb",' +
'"timestamp":"2000-01-01T00:00:00.000Z","message":' +
'{"role":"toolResult","toolCallId":"call_000",' +
'"toolName":"read","content":[{"type":"text","text":' +
'"{\\"type\\":\\"message\\",\\"id\\":\\"ccc\\",' +
'\\"timestamp\\":\\"2000-01-01T00:00:00.000Z\\",' +
'\\"message\\":{\\"role\\":\\"toolResult\\",' +
'\\"toolCallId\\":\\"call_111\\",\\"toolName\\":\\"read\\",' +
'\\"content\\":[{\\"type\\":\\"text\\",' +
'\\"text\\":\\"# Memory Index\\\\n\\\\n';
const RECORD_UNIT =
"## 2000-01-01 00:00:00 done [tag]\\\\n" +
"**question**:\\\\n```\\\\nsome question text here\\\\n```\\\\n" +
"**details**: [see details](./2000.01.01/00000000/INFO.md)\\\\n\\\\n";
const poison = HEADER + RECORD_UNIT.repeat(9);
const start = performance.now();
const html = toSanitizedMarkdownHtml(poison);
const elapsed = performance.now() - start;
expect(elapsed).toBeLessThan(500);
expect(html.length).toBeGreaterThan(0);
});
});
describe("large text handling", () => {
it("uses plain text fallback for oversized content", () => {
// MARKDOWN_PARSE_LIMIT is 40_000 chars
const input = Array.from(
{ length: 320 },
(_, i) => `Paragraph ${i + 1}: ${"Long plain-text reply. ".repeat(8)}`,
).join("\n\n");
const html = toSanitizedMarkdownHtml(input);
expect(html).toContain('class="markdown-plain-text-fallback"');
});
it("preserves indentation in plain text fallback", () => {
const input = `${"Header line\n".repeat(5000)}\n indented log line\n deeper indent`;
const html = toSanitizedMarkdownHtml(input);
expect(html).toContain('class="markdown-plain-text-fallback"');
expect(html).toContain(" indented log line");
expect(html).toContain(" deeper indent");
});
it("caches oversized fallback results", () => {
const input = Array.from({ length: 240 }, (_, i) => `P${i}`).join("\n\n") + "x".repeat(35000);
const first = toSanitizedMarkdownHtml(input);
const second = toSanitizedMarkdownHtml(input);
expect(second).toBe(first);
});
it("falls back to escaped text if md.render throws (#36213)", () => {
const renderSpy = vi.spyOn(md, "render").mockImplementation(() => {
throw new Error("forced failure");
});
const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {});
try {
const html = toSanitizedMarkdownHtml("test");
expect(html).toContain('<pre class="code-block">');
expect(warnSpy).toHaveBeenCalledOnce();
} finally {
renderSpy.mockRestore();
warnSpy.mockRestore();
}
});
});
});

View File

@ -1,5 +1,6 @@
import DOMPurify from "dompurify";
import { marked } from "marked";
import MarkdownIt from "markdown-it";
import markdownItTaskLists from "markdown-it-task-lists";
import { truncateText } from "./format.ts";
const allowedTags = [
@ -19,10 +20,12 @@ const allowedTags = [
"h4",
"hr",
"i",
"input",
"li",
"ol",
"p",
"pre",
"s",
"span",
"strong",
"summary",
@ -37,7 +40,9 @@ const allowedTags = [
];
const allowedAttrs = [
"checked",
"class",
"disabled",
"href",
"rel",
"target",
@ -107,6 +112,256 @@ function installHooks() {
});
}
// ── markdown-it instance with custom renderers ──
function escapeHtml(value: string): string {
return value
.replace(/&/g, "&amp;")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;")
.replace(/"/g, "&quot;")
.replace(/'/g, "&#39;");
}
function normalizeMarkdownImageLabel(text?: string | null): string {
const trimmed = text?.trim();
return trimmed ? trimmed : "image";
}
export const md = new MarkdownIt({
html: true, // Enable HTML recognition so html_block/html_inline overrides can escape it
breaks: true,
linkify: true,
});
// Enable GFM strikethrough (~~text~~) to match original marked.js behavior.
// markdown-it uses <s> tags; we added "s" to allowedTags for DOMPurify.
md.enable("strikethrough");
// Disable fuzzy link detection to prevent bare filenames like "README.md"
// from being auto-linked as "http://README.md". URLs with explicit protocol
// (https://...) and emails are still linkified.
//
// Alternative considered: extensions/matrix/src/matrix/format.ts uses fuzzyLink
// with a file-extension blocklist to filter false positives at render time.
// We chose the www-only approach instead because:
// 1. Matches original marked.js GFM behavior exactly (bare domains were never linked)
// 2. No blocklist to maintain — new TLDs like .ai, .io, .dev would need constant updates
// 3. Predictable behavior — users can always use explicit https:// for any URL
md.linkify.set({ fuzzyLink: false });
// Re-enable www. prefix detection per GFM spec: bare URLs without protocol
// must start with "www." to be auto-linked. This avoids false positives on
// filenames while preserving expected behavior for "www.example.com".
// GFM spec: valid domain = alphanumeric/underscore/hyphen segments separated
// by periods, at least one period, no underscores in last two segments.
md.linkify.add("www", {
validate(text, pos) {
const tail = text.slice(pos);
// Match: . followed by valid domain (single-label like localhost is allowed),
// optional port, plus optional path/query/fragment.
// marked.js GFM linked www.localhost, www.internal etc. — preserve that behavior.
// We allow underscores everywhere for simplicity — real TLDs don't have underscores.
// Stop at < per GFM spec — this is an improvement over marked.js which would encode
// www.example.com/<token> as http://www.example.com/%3Ctoken%3E (broken URL with
// encoded angle brackets). We stop before < so the placeholder stays visible as text.
const match = tail.match(
/^\.[a-z0-9_](?:[a-z0-9_-]*[a-z0-9_])?(?:\.[a-z0-9_](?:[a-z0-9_-]*[a-z0-9_])?)*(?::\d+)?(?:[/?#][^\s<]*)?/i,
);
if (!match) {
return 0;
}
// Strip trailing punctuation per GFM extended autolink spec.
// GFM says: ?, !, ., ,, :, *, _, ~ are not part of the autolink if trailing.
let len = match[0].length;
// Pre-count parentheses once to avoid O(n²) rescans on malformed URLs with
// many trailing ')' characters (e.g., ")".repeat(8000)).
let parenBalance = 0;
for (let i = 0; i < len; i++) {
const c = tail[i];
if (c === "(") {
parenBalance++;
} else if (c === ")") {
parenBalance--;
}
}
while (len > 0) {
const ch = tail[len - 1];
if (/[?!.,;:*_~]/.test(ch)) {
len--;
continue;
}
// Handle trailing ) — only strip if unbalanced (GFM parentheses rule).
if (ch === ")") {
if (parenBalance < 0) {
parenBalance++;
len--;
continue;
}
}
break;
}
return len;
},
normalize(match) {
match.url = "http://" + match.url;
},
});
// Override default link validator to allow all base64 data URIs for images.
// The default only allows gif/png/jpeg/webp; we support any image/* subtype
// (e.g., avif, jpg, svg+xml) to match the previous marked.js behavior.
//
// Security for blocked schemes (javascript:, vbscript:, file:) is handled at
// render time, not validation time. This ensures markdown-it generates tokens
// so our custom renderers can process them:
// - Images: renderer.rules.image shows alt text for non-data: URLs
// - Links: DOMPurify strips dangerous href schemes (javascript:, vbscript:, etc.)
// Blocking at validateLink would skip token generation entirely, causing raw
// markdown source to appear instead of graceful fallbacks.
md.validateLink = (url: string) => {
// Allow all data: image URIs (any image/* subtype).
if (INLINE_DATA_IMAGE_RE.test(url)) {
return true;
}
// Block non-image data: URIs (e.g., data:text/html) at validation time
// since these have no graceful fallback and are potential XSS vectors.
const str = url.trim().toLowerCase();
if (str.startsWith("data:") && !str.startsWith("data:image/")) {
return false;
}
// Allow all other URLs — security enforced by renderers and DOMPurify,
// consistent with original marked.js behavior. Dangerous schemes like
// javascript:, vbscript:, and file: are stripped by DOMPurify's default
// URI sanitization, leaving safe anchor text without a clickable href.
return true;
};
// Enable GFM task list checkboxes (- [x] / - [ ]).
// enabled: false keeps checkboxes read-only (disabled="") — task lists in
// chat messages are display-only, not interactive forms.
// label: false avoids wrapping item text in <label>, which would break
// accessibility when the item contains links (MDN warns against anchors inside labels).
md.use(markdownItTaskLists, { enabled: false, label: false });
// Mark the <input> html_inline token inside task-list items as trusted so the
// html_inline override lets it through. With label: false, the plugin generates
// only a single <input ...> token per item.
// We identify task-list items by the class="task-list-item" the plugin sets.
md.core.ruler.after("github-task-lists", "task-list-allowlist", (state) => {
const tokens = state.tokens;
for (let i = 2; i < tokens.length; i++) {
if (tokens[i].type !== "inline" || !tokens[i].children) {
continue;
}
if (tokens[i - 1].type !== "paragraph_open") {
continue;
}
if (tokens[i - 2].type !== "list_item_open") {
continue;
}
const listItem = tokens[i - 2];
const cls = listItem.attrGet("class") ?? "";
if (!cls.includes("task-list-item")) {
continue;
}
// Only trust the checkbox <input> token from the plugin, not other user-supplied HTML.
// The plugin inserts an <input> at the start; user HTML elsewhere must stay escaped.
for (const child of tokens[i].children!) {
if (child.type === "html_inline" && /^<input\s/i.test(child.content)) {
child.meta = { taskListPlugin: true };
break; // Only one checkbox per item
}
}
}
});
// Override html_block and html_inline to escape raw HTML (#13937).
// Exception: html_inline tokens marked by a trusted plugin (meta.taskListPlugin)
// are allowed through — they are generated by our own plugin pipeline, not user input,
// and DOMPurify provides the final safety net regardless.
md.renderer.rules.html_block = (tokens, idx) => {
return escapeHtml(tokens[idx].content) + "\n";
};
md.renderer.rules.html_inline = (tokens, idx) => {
const token = tokens[idx];
if (token.meta?.taskListPlugin === true) {
return token.content;
}
return escapeHtml(token.content);
};
// Override image to only allow base64 data URIs (#15437)
md.renderer.rules.image = (tokens, idx) => {
const token = tokens[idx];
const src = token.attrGet("src")?.trim() ?? "";
// Use token.content which preserves raw markdown formatting (e.g. **bold**)
// to match original marked.js behavior.
const alt = normalizeMarkdownImageLabel(token.content);
if (!INLINE_DATA_IMAGE_RE.test(src)) {
return escapeHtml(alt);
}
return `<img class="markdown-inline-image" src="${escapeHtml(src)}" alt="${escapeHtml(alt)}">`;
};
// Override fenced code blocks with copy button + JSON collapse
md.renderer.rules.fence = (tokens, idx) => {
const token = tokens[idx];
// token.info contains the full fence info string (e.g., "json title=foo");
// extract only the first whitespace-separated token as the language.
const lang = token.info.trim().split(/\s+/)[0] || "";
const text = token.content;
const langClass = lang ? ` class="language-${escapeHtml(lang)}"` : "";
const safeText = escapeHtml(text);
const codeBlock = `<pre><code${langClass}>${safeText}</code></pre>`;
const langLabel = lang ? `<span class="code-block-lang">${escapeHtml(lang)}</span>` : "";
const attrSafe = escapeHtml(text);
const copyBtn = `<button type="button" class="code-block-copy" data-code="${attrSafe}" aria-label="Copy code"><span class="code-block-copy__idle">Copy</span><span class="code-block-copy__done">Copied!</span></button>`;
const header = `<div class="code-block-header">${langLabel}${copyBtn}</div>`;
const trimmed = text.trim();
const isJson =
lang === "json" ||
(!lang &&
((trimmed.startsWith("{") && trimmed.endsWith("}")) ||
(trimmed.startsWith("[") && trimmed.endsWith("]"))));
if (isJson) {
const lineCount = text.split("\n").length;
const label = lineCount > 1 ? `JSON &middot; ${lineCount} lines` : "JSON";
return `<details class="json-collapse"><summary>${label}</summary><div class="code-block-wrapper">${header}${codeBlock}</div></details>`;
}
return `<div class="code-block-wrapper">${header}${codeBlock}</div>`;
};
// Override indented code blocks (code_block) with the same treatment as fence
md.renderer.rules.code_block = (tokens, idx) => {
const token = tokens[idx];
const text = token.content;
const safeText = escapeHtml(text);
const codeBlock = `<pre><code>${safeText}</code></pre>`;
const attrSafe = escapeHtml(text);
const copyBtn = `<button type="button" class="code-block-copy" data-code="${attrSafe}" aria-label="Copy code"><span class="code-block-copy__idle">Copy</span><span class="code-block-copy__done">Copied!</span></button>`;
const header = `<div class="code-block-header">${copyBtn}</div>`;
const trimmed = text.trim();
const isJson =
(trimmed.startsWith("{") && trimmed.endsWith("}")) ||
(trimmed.startsWith("[") && trimmed.endsWith("]"));
if (isJson) {
const lineCount = text.split("\n").length;
const label = lineCount > 1 ? `JSON &middot; ${lineCount} lines` : "JSON";
return `<details class="json-collapse"><summary>${label}</summary><div class="code-block-wrapper">${header}${codeBlock}</div></details>`;
}
return `<div class="code-block-wrapper">${header}${codeBlock}</div>`;
};
export function toSanitizedMarkdownHtml(markdown: string): string {
const input = markdown.trim();
if (!input) {
@ -136,15 +391,10 @@ export function toSanitizedMarkdownHtml(markdown: string): string {
}
let rendered: string;
try {
rendered = marked.parse(`${truncated.text}${suffix}`, {
renderer: htmlEscapeRenderer,
gfm: true,
breaks: true,
}) as string;
rendered = md.render(`${truncated.text}${suffix}`);
} catch (err) {
// Fall back to escaped plain text when marked.parse() throws (e.g.
// infinite recursion on pathological markdown patterns — #36213).
console.warn("[markdown] marked.parse failed, falling back to plain text:", err);
// Fall back to escaped plain text when md.render() throws (#36213).
console.warn("[markdown] md.render failed, falling back to plain text:", err);
const escaped = escapeHtml(`${truncated.text}${suffix}`);
rendered = `<pre class="code-block">${escaped}</pre>`;
}
@ -155,72 +405,6 @@ export function toSanitizedMarkdownHtml(markdown: string): string {
return sanitized;
}
// Prevent raw HTML in chat messages from being rendered as formatted HTML.
// Display it as escaped text so users see the literal markup.
// Security is handled by DOMPurify, but rendering pasted HTML (e.g. error
// pages) as formatted output is confusing UX (#13937).
const htmlEscapeRenderer = new marked.Renderer();
htmlEscapeRenderer.html = ({ text }: { text: string }) => escapeHtml(text);
htmlEscapeRenderer.image = (token: { href?: string | null; text?: string | null }) => {
const label = normalizeMarkdownImageLabel(token.text);
const href = token.href?.trim() ?? "";
if (!INLINE_DATA_IMAGE_RE.test(href)) {
return escapeHtml(label);
}
return `<img class="markdown-inline-image" src="${escapeHtml(href)}" alt="${escapeHtml(label)}">`;
};
function normalizeMarkdownImageLabel(text?: string | null): string {
const trimmed = text?.trim();
return trimmed ? trimmed : "image";
}
htmlEscapeRenderer.code = ({
text,
lang,
escaped,
}: {
text: string;
lang?: string;
escaped?: boolean;
}) => {
const langClass = lang ? ` class="language-${escapeHtml(lang)}"` : "";
const safeText = escaped ? text : escapeHtml(text);
const codeBlock = `<pre><code${langClass}>${safeText}</code></pre>`;
const langLabel = lang ? `<span class="code-block-lang">${escapeHtml(lang)}</span>` : "";
const attrSafe = text
.replace(/&/g, "&amp;")
.replace(/"/g, "&quot;")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;");
const copyBtn = `<button type="button" class="code-block-copy" data-code="${attrSafe}" aria-label="Copy code"><span class="code-block-copy__idle">Copy</span><span class="code-block-copy__done">Copied!</span></button>`;
const header = `<div class="code-block-header">${langLabel}${copyBtn}</div>`;
const trimmed = text.trim();
const isJson =
lang === "json" ||
(!lang &&
((trimmed.startsWith("{") && trimmed.endsWith("}")) ||
(trimmed.startsWith("[") && trimmed.endsWith("]"))));
if (isJson) {
const lineCount = text.split("\n").length;
const label = lineCount > 1 ? `JSON &middot; ${lineCount} lines` : "JSON";
return `<details class="json-collapse"><summary>${label}</summary><div class="code-block-wrapper">${header}${codeBlock}</div></details>`;
}
return `<div class="code-block-wrapper">${header}${codeBlock}</div>`;
};
function escapeHtml(value: string): string {
return value
.replace(/&/g, "&amp;")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;")
.replace(/"/g, "&quot;")
.replace(/'/g, "&#39;");
}
function renderEscapedPlainTextHtml(value: string): string {
return `<div class="markdown-plain-text-fallback">${escapeHtml(value.replace(/\r\n?/g, "\n"))}</div>`;
}