Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/src/vs/base/browser/markdownRenderer.ts
5240 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import { onUnexpectedError } from '../common/errors.js';
7
import { escapeDoubleQuotes, IMarkdownString, MarkdownStringTrustedOptions, parseHrefAndDimensions, removeMarkdownEscapes } from '../common/htmlContent.js';
8
import { markdownEscapeEscapedIcons } from '../common/iconLabels.js';
9
import { defaultGenerator } from '../common/idGenerator.js';
10
import { KeyCode } from '../common/keyCodes.js';
11
import { DisposableStore, IDisposable } from '../common/lifecycle.js';
12
import * as marked from '../common/marked/marked.js';
13
import { parse } from '../common/marshalling.js';
14
import { FileAccess, Schemas } from '../common/network.js';
15
import { cloneAndChange } from '../common/objects.js';
16
import { basename as pathBasename } from '../common/path.js';
17
import { basename, dirname, resolvePath } from '../common/resources.js';
18
import { escape } from '../common/strings.js';
19
import { URI, UriComponents } from '../common/uri.js';
20
import * as DOM from './dom.js';
21
import * as domSanitize from './domSanitize.js';
22
import { convertTagToPlaintext } from './domSanitize.js';
23
import { StandardKeyboardEvent } from './keyboardEvent.js';
24
import { StandardMouseEvent } from './mouseEvent.js';
25
import { renderIcon, renderLabelWithIcons } from './ui/iconLabel/iconLabels.js';
26
27
export type MarkdownActionHandler = (linkContent: string, mdStr: IMarkdownString) => void;
28
29
/**
30
* Options for the rendering of markdown with {@link renderMarkdown}.
31
*/
32
export interface MarkdownRenderOptions {
33
readonly codeBlockRenderer?: (languageId: string, value: string) => Promise<HTMLElement>;
34
readonly codeBlockRendererSync?: (languageId: string, value: string, raw?: string) => HTMLElement;
35
readonly asyncRenderCallback?: () => void;
36
37
readonly actionHandler?: MarkdownActionHandler;
38
39
readonly fillInIncompleteTokens?: boolean;
40
41
readonly sanitizerConfig?: MarkdownSanitizerConfig;
42
43
readonly markedOptions?: MarkdownRendererMarkedOptions;
44
readonly markedExtensions?: marked.MarkedExtension[];
45
}
46
47
/**
48
* Subset of options passed to `Marked` for rendering markdown.
49
*/
50
export interface MarkdownRendererMarkedOptions {
51
readonly gfm?: boolean;
52
readonly breaks?: boolean;
53
}
54
55
export interface MarkdownSanitizerConfig {
56
readonly replaceWithPlaintext?: boolean;
57
readonly allowedTags?: {
58
readonly override: readonly string[];
59
};
60
readonly allowedAttributes?: {
61
readonly override: ReadonlyArray<string | domSanitize.SanitizeAttributeRule>;
62
};
63
readonly allowedLinkSchemes?: {
64
readonly augment: readonly string[];
65
};
66
readonly remoteImageIsAllowed?: (uri: URI) => boolean;
67
}
68
69
const defaultMarkedRenderers = Object.freeze({
70
image: ({ href, title, text }: marked.Tokens.Image): string => {
71
let dimensions: string[] = [];
72
let attributes: string[] = [];
73
if (href) {
74
({ href, dimensions } = parseHrefAndDimensions(href));
75
attributes.push(`src="${escapeDoubleQuotes(href)}"`);
76
}
77
if (text) {
78
attributes.push(`alt="${escapeDoubleQuotes(text)}"`);
79
}
80
if (title) {
81
attributes.push(`title="${escapeDoubleQuotes(title)}"`);
82
}
83
if (dimensions.length) {
84
attributes = attributes.concat(dimensions);
85
}
86
return '<img ' + attributes.join(' ') + '>';
87
},
88
89
paragraph(this: marked.Renderer, { tokens }: marked.Tokens.Paragraph): string {
90
return `<p>${this.parser.parseInline(tokens)}</p>`;
91
},
92
93
link(this: marked.Renderer, { href, title, tokens }: marked.Tokens.Link): string {
94
let text = this.parser.parseInline(tokens);
95
if (typeof href !== 'string') {
96
return '';
97
}
98
99
// Remove markdown escapes. Workaround for https://github.com/chjj/marked/issues/829
100
if (href === text) { // raw link case
101
text = removeMarkdownEscapes(text);
102
}
103
104
title = typeof title === 'string' ? escapeDoubleQuotes(removeMarkdownEscapes(title)) : '';
105
href = removeMarkdownEscapes(href);
106
107
// HTML Encode href
108
href = href.replace(/&/g, '&amp;')
109
.replace(/</g, '&lt;')
110
.replace(/>/g, '&gt;')
111
.replace(/"/g, '&quot;')
112
.replace(/'/g, '&#39;');
113
114
return `<a href="${href}" title="${title || href}" draggable="false">${text}</a>`;
115
},
116
});
117
118
/**
119
* Blockquote renderer that processes GitHub-style alert syntax.
120
* Transforms blockquotes like "> [!NOTE]" into structured alert markup with icons.
121
*
122
* Based on GitHub's alert syntax: https://docs.github.com/en/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax#alerts
123
*/
124
function createAlertBlockquoteRenderer(fallbackRenderer: (this: marked.Renderer, token: marked.Tokens.Blockquote) => string) {
125
return function (this: marked.Renderer, token: marked.Tokens.Blockquote): string {
126
const { tokens } = token;
127
// Check if this blockquote starts with alert syntax [!TYPE]
128
const firstToken = tokens[0];
129
if (firstToken?.type !== 'paragraph') {
130
return fallbackRenderer.call(this, token);
131
}
132
133
const paragraphTokens = firstToken.tokens;
134
if (!paragraphTokens || paragraphTokens.length === 0) {
135
return fallbackRenderer.call(this, token);
136
}
137
138
const firstTextToken = paragraphTokens[0];
139
if (firstTextToken?.type !== 'text') {
140
return fallbackRenderer.call(this, token);
141
}
142
143
const pattern = /^\s*\[!(NOTE|TIP|IMPORTANT|WARNING|CAUTION)\]\s*?\n*/i;
144
const match = firstTextToken.raw.match(pattern);
145
if (!match) {
146
return fallbackRenderer.call(this, token);
147
}
148
149
// Remove the alert marker from the token
150
firstTextToken.raw = firstTextToken.raw.replace(pattern, '');
151
firstTextToken.text = firstTextToken.text.replace(pattern, '');
152
153
const alertIcons: Record<string, string> = {
154
'note': 'info',
155
'tip': 'light-bulb',
156
'important': 'comment',
157
'warning': 'alert',
158
'caution': 'stop'
159
};
160
161
const type = match[1];
162
const typeCapitalized = type.charAt(0).toUpperCase() + type.slice(1).toLowerCase();
163
const severity = type.toLowerCase();
164
const iconHtml = renderIcon({ id: alertIcons[severity] }).outerHTML;
165
166
// Render the remaining content
167
const content = this.parser.parse(tokens);
168
169
// Return alert markup with icon and severity (skipping the first 3 characters: `<p>`)
170
return `<blockquote data-severity="${severity}"><p><span>${iconHtml}${typeCapitalized}</span>${content.substring(3)}</blockquote>\n`;
171
};
172
}
173
174
export interface IRenderedMarkdown extends IDisposable {
175
readonly element: HTMLElement;
176
}
177
178
/**
179
* Low-level way create a html element from a markdown string.
180
*
181
* **Note** that for most cases you should be using {@link import('../../editor/browser/widget/markdownRenderer/browser/markdownRenderer.js').MarkdownRenderer MarkdownRenderer}
182
* which comes with support for pretty code block rendering and which uses the default way of handling links.
183
*/
184
export function renderMarkdown(markdown: IMarkdownString, options: MarkdownRenderOptions = {}, target?: HTMLElement): IRenderedMarkdown {
185
const disposables = new DisposableStore();
186
let isDisposed = false;
187
188
const markedInstance = new marked.Marked(...(options.markedExtensions ?? []));
189
const { renderer, codeBlocks, syncCodeBlocks } = createMarkdownRenderer(markedInstance, options, markdown);
190
const value = preprocessMarkdownString(markdown);
191
192
let renderedMarkdown: string;
193
if (options.fillInIncompleteTokens) {
194
// The defaults are applied by parse but not lexer()/parser(), and they need to be present
195
const opts: marked.MarkedOptions = {
196
...markedInstance.defaults,
197
...options.markedOptions,
198
renderer
199
};
200
const tokens = markedInstance.lexer(value, opts);
201
const newTokens = fillInIncompleteTokens(tokens);
202
renderedMarkdown = markedInstance.parser(newTokens, opts);
203
} else {
204
renderedMarkdown = markedInstance.parse(value, { ...options?.markedOptions, renderer, async: false });
205
}
206
207
// Rewrite theme icons
208
if (markdown.supportThemeIcons) {
209
const elements = renderLabelWithIcons(renderedMarkdown);
210
renderedMarkdown = elements.map(e => typeof e === 'string' ? e : e.outerHTML).join('');
211
}
212
213
const renderedContent = document.createElement('div');
214
const sanitizerConfig = getDomSanitizerConfig(markdown, options.sanitizerConfig ?? {});
215
domSanitize.safeSetInnerHtml(renderedContent, renderedMarkdown, sanitizerConfig);
216
217
// Rewrite links and images before potentially inserting them into the real dom
218
rewriteRenderedLinks(markdown, options, renderedContent);
219
220
let outElement: HTMLElement;
221
if (target) {
222
outElement = target;
223
DOM.reset(target, ...renderedContent.childNodes);
224
} else {
225
outElement = renderedContent;
226
}
227
228
if (codeBlocks.length > 0) {
229
Promise.all(codeBlocks).then((tuples) => {
230
if (isDisposed) {
231
return;
232
}
233
const renderedElements = new Map(tuples);
234
// eslint-disable-next-line no-restricted-syntax
235
const placeholderElements = outElement.querySelectorAll<HTMLDivElement>(`div[data-code]`);
236
for (const placeholderElement of placeholderElements) {
237
const renderedElement = renderedElements.get(placeholderElement.dataset['code'] ?? '');
238
if (renderedElement) {
239
DOM.reset(placeholderElement, renderedElement);
240
}
241
}
242
options.asyncRenderCallback?.();
243
});
244
} else if (syncCodeBlocks.length > 0) {
245
const renderedElements = new Map(syncCodeBlocks);
246
// eslint-disable-next-line no-restricted-syntax
247
const placeholderElements = outElement.querySelectorAll<HTMLDivElement>(`div[data-code]`);
248
for (const placeholderElement of placeholderElements) {
249
const renderedElement = renderedElements.get(placeholderElement.dataset['code'] ?? '');
250
if (renderedElement) {
251
DOM.reset(placeholderElement, renderedElement);
252
}
253
}
254
}
255
256
// Signal size changes for image tags
257
if (options.asyncRenderCallback) {
258
// eslint-disable-next-line no-restricted-syntax
259
for (const img of outElement.getElementsByTagName('img')) {
260
const listener = disposables.add(DOM.addDisposableListener(img, 'load', () => {
261
listener.dispose();
262
options.asyncRenderCallback!();
263
}));
264
}
265
}
266
267
// Add event listeners for links
268
if (options.actionHandler) {
269
const clickCb = (e: PointerEvent) => {
270
const mouseEvent = new StandardMouseEvent(DOM.getWindow(outElement), e);
271
if (!mouseEvent.leftButton && !mouseEvent.middleButton) {
272
return;
273
}
274
activateLink(markdown, options, mouseEvent);
275
};
276
disposables.add(DOM.addDisposableListener(outElement, 'click', clickCb));
277
disposables.add(DOM.addDisposableListener(outElement, 'auxclick', clickCb));
278
279
disposables.add(DOM.addDisposableListener(outElement, 'keydown', (e) => {
280
const keyboardEvent = new StandardKeyboardEvent(e);
281
if (!keyboardEvent.equals(KeyCode.Space) && !keyboardEvent.equals(KeyCode.Enter)) {
282
return;
283
}
284
activateLink(markdown, options, keyboardEvent);
285
}));
286
}
287
288
// Remove/disable inputs
289
// eslint-disable-next-line no-restricted-syntax
290
for (const input of [...outElement.getElementsByTagName('input')]) {
291
if (input.attributes.getNamedItem('type')?.value === 'checkbox') {
292
input.setAttribute('disabled', '');
293
} else {
294
if (options.sanitizerConfig?.replaceWithPlaintext) {
295
const replacement = convertTagToPlaintext(input);
296
if (replacement) {
297
input.parentElement?.replaceChild(replacement, input);
298
} else {
299
input.remove();
300
}
301
} else {
302
input.remove();
303
}
304
}
305
}
306
307
return {
308
element: outElement,
309
dispose: () => {
310
isDisposed = true;
311
disposables.dispose();
312
}
313
};
314
}
315
316
function rewriteRenderedLinks(markdown: IMarkdownString, options: MarkdownRenderOptions, root: HTMLElement) {
317
// eslint-disable-next-line no-restricted-syntax
318
for (const el of root.querySelectorAll('img, audio, video, source')) {
319
const src = el.getAttribute('src'); // Get the raw 'src' attribute value as text, not the resolved 'src'
320
if (src) {
321
let href = src;
322
try {
323
if (markdown.baseUri) { // absolute or relative local path, or file: uri
324
href = resolveWithBaseUri(URI.from(markdown.baseUri), href);
325
}
326
} catch (err) { }
327
328
el.setAttribute('src', massageHref(markdown, href, true));
329
330
if (options.sanitizerConfig?.remoteImageIsAllowed) {
331
const uri = URI.parse(href);
332
if (uri.scheme !== Schemas.file && uri.scheme !== Schemas.data && !options.sanitizerConfig.remoteImageIsAllowed(uri)) {
333
el.replaceWith(DOM.$('', undefined, el.outerHTML));
334
}
335
}
336
}
337
}
338
339
// eslint-disable-next-line no-restricted-syntax
340
for (const el of root.querySelectorAll('a')) {
341
const href = el.getAttribute('href'); // Get the raw 'href' attribute value as text, not the resolved 'href'
342
el.setAttribute('href', ''); // Clear out href. We use the `data-href` for handling clicks instead
343
if (!href
344
|| /^data:|javascript:/i.test(href)
345
|| (/^command:/i.test(href) && !markdown.isTrusted)
346
|| /^command:(\/\/\/)?_workbench\.downloadResource/i.test(href)) {
347
// drop the link
348
el.replaceWith(...el.childNodes);
349
} else {
350
let resolvedHref = massageHref(markdown, href, false);
351
if (markdown.baseUri) {
352
resolvedHref = resolveWithBaseUri(URI.from(markdown.baseUri), href);
353
}
354
el.dataset.href = resolvedHref;
355
}
356
}
357
}
358
359
function createMarkdownRenderer(marked: marked.Marked, options: MarkdownRenderOptions, markdown: IMarkdownString): { renderer: marked.Renderer; codeBlocks: Promise<[string, HTMLElement]>[]; syncCodeBlocks: [string, HTMLElement][] } {
360
const renderer = new marked.Renderer(options.markedOptions);
361
renderer.image = defaultMarkedRenderers.image;
362
renderer.link = defaultMarkedRenderers.link;
363
renderer.paragraph = defaultMarkedRenderers.paragraph;
364
365
if (markdown.supportAlertSyntax) {
366
renderer.blockquote = createAlertBlockquoteRenderer(renderer.blockquote);
367
}
368
369
// Will collect [id, renderedElement] tuples
370
const codeBlocks: Promise<[string, HTMLElement]>[] = [];
371
const syncCodeBlocks: [string, HTMLElement][] = [];
372
373
if (options.codeBlockRendererSync) {
374
renderer.code = ({ text, lang, raw }: marked.Tokens.Code) => {
375
const id = defaultGenerator.nextId();
376
const value = options.codeBlockRendererSync!(postProcessCodeBlockLanguageId(lang), text, raw);
377
syncCodeBlocks.push([id, value]);
378
return `<div class="code" data-code="${id}">${escape(text)}</div>`;
379
};
380
} else if (options.codeBlockRenderer) {
381
renderer.code = ({ text, lang }: marked.Tokens.Code) => {
382
const id = defaultGenerator.nextId();
383
const value = options.codeBlockRenderer!(postProcessCodeBlockLanguageId(lang), text);
384
codeBlocks.push(value.then(element => [id, element]));
385
return `<div class="code" data-code="${id}">${escape(text)}</div>`;
386
};
387
}
388
389
if (!markdown.supportHtml) {
390
// Note: we always pass the output through dompurify after this so that we don't rely on
391
// marked for real sanitization.
392
renderer.html = ({ text }) => {
393
if (options.sanitizerConfig?.replaceWithPlaintext) {
394
return escape(text);
395
}
396
397
const match = markdown.isTrusted ? text.match(/^(<span[^>]+>)|(<\/\s*span>)$/) : undefined;
398
return match ? text : '';
399
};
400
}
401
return { renderer, codeBlocks, syncCodeBlocks };
402
}
403
404
function preprocessMarkdownString(markdown: IMarkdownString) {
405
let value = markdown.value;
406
407
// values that are too long will freeze the UI
408
if (value.length > 100_000) {
409
value = `${value.substr(0, 100_000)}…`;
410
}
411
412
// escape theme icons
413
if (markdown.supportThemeIcons) {
414
value = markdownEscapeEscapedIcons(value);
415
}
416
417
return value;
418
}
419
420
function activateLink(mdStr: IMarkdownString, options: MarkdownRenderOptions, event: StandardMouseEvent | StandardKeyboardEvent): void {
421
const target = event.target.closest('a[data-href]');
422
if (!DOM.isHTMLElement(target)) {
423
return;
424
}
425
426
try {
427
let href = target.dataset['href'];
428
if (href) {
429
if (mdStr.baseUri) {
430
href = resolveWithBaseUri(URI.from(mdStr.baseUri), href);
431
}
432
options.actionHandler?.(href, mdStr);
433
}
434
} catch (err) {
435
onUnexpectedError(err);
436
} finally {
437
event.preventDefault();
438
event.stopPropagation();
439
}
440
}
441
442
function uriMassage(markdown: IMarkdownString, part: string): string {
443
let data: unknown;
444
try {
445
data = parse(decodeURIComponent(part));
446
} catch (e) {
447
// ignore
448
}
449
if (!data) {
450
return part;
451
}
452
data = cloneAndChange(data, value => {
453
if (markdown.uris && markdown.uris[value]) {
454
return URI.revive(markdown.uris[value]);
455
} else {
456
return undefined;
457
}
458
});
459
return encodeURIComponent(JSON.stringify(data));
460
}
461
462
function massageHref(markdown: IMarkdownString, href: string, isDomUri: boolean): string {
463
const data = markdown.uris && markdown.uris[href];
464
let uri = URI.revive(data);
465
if (isDomUri) {
466
if (href.startsWith(Schemas.data + ':')) {
467
return href;
468
}
469
if (!uri) {
470
uri = URI.parse(href);
471
}
472
// this URI will end up as "src"-attribute of a dom node
473
// and because of that special rewriting needs to be done
474
// so that the URI uses a protocol that's understood by
475
// browsers (like http or https)
476
return FileAccess.uriToBrowserUri(uri).toString(true);
477
}
478
if (!uri) {
479
return href;
480
}
481
if (URI.parse(href).toString() === uri.toString()) {
482
return href; // no transformation performed
483
}
484
if (uri.query) {
485
uri = uri.with({ query: uriMassage(markdown, uri.query) });
486
}
487
return uri.toString();
488
}
489
490
function postProcessCodeBlockLanguageId(lang: string | undefined): string {
491
if (!lang) {
492
return '';
493
}
494
495
const parts = lang.split(/[\s+|:|,|\{|\?]/, 1);
496
if (parts.length) {
497
return parts[0];
498
}
499
return lang;
500
}
501
502
function resolveWithBaseUri(baseUri: URI, href: string): string {
503
const hasScheme = /^\w[\w\d+.-]*:/.test(href);
504
if (hasScheme) {
505
return href;
506
}
507
508
if (baseUri.path.endsWith('/')) {
509
return resolvePath(baseUri, href).toString();
510
} else {
511
return resolvePath(dirname(baseUri), href).toString();
512
}
513
}
514
515
type MdStrConfig = {
516
readonly isTrusted?: boolean | MarkdownStringTrustedOptions;
517
readonly baseUri?: UriComponents;
518
};
519
520
function sanitizeRenderedMarkdown(
521
renderedMarkdown: string,
522
originalMdStrConfig: MdStrConfig,
523
options: MarkdownSanitizerConfig = {},
524
): TrustedHTML {
525
const sanitizerConfig = getDomSanitizerConfig(originalMdStrConfig, options);
526
return domSanitize.sanitizeHtml(renderedMarkdown, sanitizerConfig);
527
}
528
529
export const allowedMarkdownHtmlTags = Object.freeze([
530
...domSanitize.basicMarkupHtmlTags,
531
'input', // Allow inputs for rendering checkboxes. Other types of inputs are removed and the inputs are always disabled
532
]);
533
534
export const allowedMarkdownHtmlAttributes = Object.freeze<Array<string | domSanitize.SanitizeAttributeRule>>([
535
'align',
536
'autoplay',
537
'alt',
538
'colspan',
539
'controls',
540
'draggable',
541
'height',
542
'href',
543
'loop',
544
'muted',
545
'playsinline',
546
'poster',
547
'rowspan',
548
'src',
549
'target',
550
'title',
551
'type',
552
'width',
553
'start',
554
555
// Input (For disabled inputs)
556
'checked',
557
'disabled',
558
'value',
559
560
// Custom markdown attributes
561
'data-code',
562
'data-href',
563
'data-severity',
564
565
// Only allow very specific styles
566
{
567
attributeName: 'style',
568
shouldKeep: (element, data) => {
569
if (element.tagName === 'SPAN') {
570
if (data.attrName === 'style') {
571
return /^(color\:(#[0-9a-fA-F]+|var\(--vscode(-[a-zA-Z0-9]+)+\));)?(background-color\:(#[0-9a-fA-F]+|var\(--vscode(-[a-zA-Z0-9]+)+\));)?(border-radius:[0-9]+px;)?$/.test(data.attrValue);
572
}
573
}
574
return false;
575
}
576
},
577
578
// Only allow codicons for classes
579
{
580
attributeName: 'class',
581
shouldKeep: (element, data) => {
582
if (element.tagName === 'SPAN') {
583
if (data.attrName === 'class') {
584
return /^codicon codicon-[a-z\-]+( codicon-modifier-[a-z\-]+)?$/.test(data.attrValue);
585
}
586
}
587
return false;
588
},
589
},
590
]);
591
592
function getDomSanitizerConfig(mdStrConfig: MdStrConfig, options: MarkdownSanitizerConfig): domSanitize.DomSanitizerConfig {
593
const isTrusted = mdStrConfig.isTrusted ?? false;
594
const allowedLinkSchemes = [
595
Schemas.http,
596
Schemas.https,
597
Schemas.mailto,
598
Schemas.file,
599
Schemas.vscodeFileResource,
600
Schemas.vscodeRemote,
601
Schemas.vscodeRemoteResource,
602
Schemas.vscodeNotebookCell,
603
// For links that are handled entirely by the action handler
604
Schemas.internal,
605
];
606
607
if (isTrusted) {
608
allowedLinkSchemes.push(Schemas.command);
609
}
610
611
if (options.allowedLinkSchemes?.augment) {
612
allowedLinkSchemes.push(...options.allowedLinkSchemes.augment);
613
}
614
615
return {
616
// allowedTags should included everything that markdown renders to.
617
// Since we have our own sanitize function for marked, it's possible we missed some tag so let dompurify make sure.
618
// HTML tags that can result from markdown are from reading https://spec.commonmark.org/0.29/
619
// HTML table tags that can result from markdown are from https://github.github.com/gfm/#tables-extension-
620
allowedTags: {
621
override: options.allowedTags?.override ?? allowedMarkdownHtmlTags
622
},
623
allowedAttributes: {
624
override: options.allowedAttributes?.override ?? allowedMarkdownHtmlAttributes,
625
},
626
allowedLinkProtocols: {
627
override: allowedLinkSchemes,
628
},
629
allowRelativeLinkPaths: !!mdStrConfig.baseUri,
630
allowedMediaProtocols: {
631
override: [
632
Schemas.http,
633
Schemas.https,
634
Schemas.data,
635
Schemas.file,
636
Schemas.vscodeFileResource,
637
Schemas.vscodeRemote,
638
Schemas.vscodeRemoteResource,
639
]
640
},
641
allowRelativeMediaPaths: !!mdStrConfig.baseUri,
642
replaceWithPlaintext: options.replaceWithPlaintext,
643
};
644
}
645
646
/**
647
* Renders `str` as plaintext, stripping out Markdown syntax if it's a {@link IMarkdownString}.
648
*
649
* For example `# Header` would be output as `Header`.
650
*/
651
export function renderAsPlaintext(str: IMarkdownString | string, options?: {
652
/** Controls if the ``` of code blocks should be preserved in the output or not */
653
readonly includeCodeBlocksFences?: boolean;
654
/** Controls if we want to format empty links from "Link [](file)" to "Link file" */
655
readonly useLinkFormatter?: boolean;
656
}) {
657
if (typeof str === 'string') {
658
return str;
659
}
660
661
// values that are too long will freeze the UI
662
let value = str.value ?? '';
663
if (value.length > 100_000) {
664
value = `${value.substr(0, 100_000)}…`;
665
}
666
667
const renderer = createPlainTextRenderer();
668
if (options?.includeCodeBlocksFences) {
669
renderer.code = codeBlockFences;
670
}
671
if (options?.useLinkFormatter) {
672
renderer.link = linkFormatter;
673
}
674
675
const html = marked.parse(value, { async: false, renderer });
676
return sanitizeRenderedMarkdown(html, { isTrusted: false }, {})
677
.toString()
678
.replace(/&(#\d+|[a-zA-Z]+);/g, m => unescapeInfo.get(m) ?? m)
679
.trim();
680
}
681
682
const unescapeInfo = new Map<string, string>([
683
['&quot;', '"'],
684
['&nbsp;', ' '],
685
['&amp;', '&'],
686
['&#39;', '\''],
687
['&lt;', '<'],
688
['&gt;', '>'],
689
]);
690
691
function createPlainTextRenderer(): marked.Renderer {
692
const renderer = new marked.Renderer();
693
694
renderer.code = ({ text }: marked.Tokens.Code): string => {
695
return escape(text);
696
};
697
renderer.blockquote = ({ text }: marked.Tokens.Blockquote): string => {
698
return text + '\n';
699
};
700
renderer.html = (_: marked.Tokens.HTML): string => {
701
return '';
702
};
703
renderer.heading = function ({ tokens }: marked.Tokens.Heading): string {
704
return this.parser.parseInline(tokens) + '\n';
705
};
706
renderer.hr = (): string => {
707
return '';
708
};
709
renderer.list = function ({ items }: marked.Tokens.List): string {
710
return items.map(x => this.listitem(x)).join('\n') + '\n';
711
};
712
renderer.listitem = ({ text }: marked.Tokens.ListItem): string => {
713
return text + '\n';
714
};
715
renderer.paragraph = function ({ tokens }: marked.Tokens.Paragraph): string {
716
return this.parser.parseInline(tokens) + '\n';
717
};
718
renderer.table = function ({ header, rows }: marked.Tokens.Table): string {
719
return header.map(cell => this.tablecell(cell)).join(' ') + '\n' + rows.map(cells => cells.map(cell => this.tablecell(cell)).join(' ')).join('\n') + '\n';
720
};
721
renderer.tablerow = ({ text }: marked.Tokens.TableRow): string => {
722
return text;
723
};
724
renderer.tablecell = function ({ tokens }: marked.Tokens.TableCell): string {
725
return this.parser.parseInline(tokens);
726
};
727
renderer.strong = ({ text }: marked.Tokens.Strong): string => {
728
return text;
729
};
730
renderer.em = ({ text }: marked.Tokens.Em): string => {
731
return text;
732
};
733
renderer.codespan = ({ text }: marked.Tokens.Codespan): string => {
734
return escape(text);
735
};
736
renderer.br = (_: marked.Tokens.Br): string => {
737
return '\n';
738
};
739
renderer.del = ({ text }: marked.Tokens.Del): string => {
740
return text;
741
};
742
renderer.image = (_: marked.Tokens.Image): string => {
743
return '';
744
};
745
renderer.text = ({ text }: marked.Tokens.Text): string => {
746
return text;
747
};
748
renderer.link = ({ text }: marked.Tokens.Link): string => {
749
return text;
750
};
751
return renderer;
752
}
753
754
const codeBlockFences = ({ text }: marked.Tokens.Code): string => {
755
return `\n\`\`\`\n${escape(text)}\n\`\`\`\n`;
756
};
757
758
const linkFormatter = ({ text, href }: marked.Tokens.Link): string => {
759
try {
760
if (href) {
761
const uri = URI.parse(href);
762
return text.trim() || basename(uri);
763
}
764
} catch (e) {
765
return text.trim() || pathBasename(href);
766
}
767
return text;
768
};
769
770
function mergeRawTokenText(tokens: marked.Token[]): string {
771
let mergedTokenText = '';
772
tokens.forEach(token => {
773
mergedTokenText += token.raw;
774
});
775
return mergedTokenText;
776
}
777
778
function completeSingleLinePattern(token: marked.Tokens.Text | marked.Tokens.Paragraph): marked.Token | undefined {
779
if (!token.tokens) {
780
return undefined;
781
}
782
783
for (let i = token.tokens.length - 1; i >= 0; i--) {
784
const subtoken = token.tokens[i];
785
if (subtoken.type === 'text') {
786
const lines = subtoken.raw.split('\n');
787
const lastLine = lines[lines.length - 1];
788
if (lastLine.includes('`')) {
789
return completeCodespan(token);
790
}
791
792
else if (lastLine.includes('**')) {
793
return completeDoublestar(token);
794
}
795
796
else if (lastLine.match(/\*\w/)) {
797
return completeStar(token);
798
}
799
800
else if (lastLine.match(/(^|\s)__\w/)) {
801
return completeDoubleUnderscore(token);
802
}
803
804
else if (lastLine.match(/(^|\s)_\w/)) {
805
return completeUnderscore(token);
806
}
807
808
else if (
809
// Text with start of link target
810
hasLinkTextAndStartOfLinkTarget(lastLine) ||
811
// This token doesn't have the link text, eg if it contains other markdown constructs that are in other subtokens.
812
// But some preceding token does have an unbalanced [ at least
813
hasStartOfLinkTargetAndNoLinkText(lastLine) && token.tokens.slice(0, i).some(t => t.type === 'text' && t.raw.match(/\[[^\]]*$/))
814
) {
815
const nextTwoSubTokens = token.tokens.slice(i + 1);
816
817
// A markdown link can look like
818
// [link text](https://microsoft.com "more text")
819
// Where "more text" is a title for the link or an argument to a vscode command link
820
if (
821
// If the link was parsed as a link, then look for a link token and a text token with a quote
822
nextTwoSubTokens[0]?.type === 'link' && nextTwoSubTokens[1]?.type === 'text' && nextTwoSubTokens[1].raw.match(/^ *"[^"]*$/) ||
823
// And if the link was not parsed as a link (eg command link), just look for a single quote in this token
824
lastLine.match(/^[^"]* +"[^"]*$/)
825
) {
826
827
return completeLinkTargetArg(token);
828
}
829
return completeLinkTarget(token);
830
}
831
832
// Contains the start of link text, and no following tokens contain the link target
833
else if (lastLine.match(/(^|\s)\[\w*[^\]]*$/)) {
834
return completeLinkText(token);
835
}
836
}
837
}
838
839
return undefined;
840
}
841
842
function hasLinkTextAndStartOfLinkTarget(str: string): boolean {
843
return !!str.match(/(^|\s)\[.*\]\(\w*/);
844
}
845
846
function hasStartOfLinkTargetAndNoLinkText(str: string): boolean {
847
return !!str.match(/^[^\[]*\]\([^\)]*$/);
848
}
849
850
function completeListItemPattern(list: marked.Tokens.List): marked.Tokens.List | undefined {
851
// Patch up this one list item
852
const lastListItem = list.items[list.items.length - 1];
853
const lastListSubToken = lastListItem.tokens ? lastListItem.tokens[lastListItem.tokens.length - 1] : undefined;
854
855
/*
856
Example list token structures:
857
858
list
859
list_item
860
text
861
text
862
codespan
863
link
864
list_item
865
text
866
code // Complete indented codeblock
867
list_item
868
text
869
space
870
text
871
text // Incomplete indented codeblock
872
list_item
873
text
874
list // Nested list
875
list_item
876
text
877
text
878
879
Contrast with paragraph:
880
paragraph
881
text
882
codespan
883
*/
884
885
const listEndsInHeading = (list: marked.Tokens.List): boolean => {
886
// A list item can be rendered as a heading for some reason when it has a subitem where we haven't rendered the text yet like this:
887
// 1. list item
888
// -
889
const lastItem = list.items.at(-1);
890
const lastToken = lastItem?.tokens.at(-1);
891
return lastToken?.type === 'heading' || lastToken?.type === 'list' && listEndsInHeading(lastToken as marked.Tokens.List);
892
};
893
894
let newToken: marked.Token | undefined;
895
if (lastListSubToken?.type === 'text' && !('inRawBlock' in lastListItem)) { // Why does Tag have a type of 'text'
896
newToken = completeSingleLinePattern(lastListSubToken as marked.Tokens.Text);
897
} else if (listEndsInHeading(list)) {
898
const newList = marked.lexer(list.raw.trim() + ' &nbsp;')[0] as marked.Tokens.List;
899
if (newList.type !== 'list') {
900
// Something went wrong
901
return;
902
}
903
return newList;
904
}
905
906
if (!newToken || newToken.type !== 'paragraph') { // 'text' item inside the list item turns into paragraph
907
// Nothing to fix, or not a pattern we were expecting
908
return;
909
}
910
911
const previousListItemsText = mergeRawTokenText(list.items.slice(0, -1));
912
913
// Grabbing the `- ` or `1. ` or `* ` off the list item because I can't find a better way to do this
914
const lastListItemLead = lastListItem.raw.match(/^(\s*(-|\d+\.|\*) +)/)?.[0];
915
if (!lastListItemLead) {
916
// Is badly formatted
917
return;
918
}
919
920
const newListItemText = lastListItemLead +
921
mergeRawTokenText(lastListItem.tokens.slice(0, -1)) +
922
newToken.raw;
923
924
const newList = marked.lexer(previousListItemsText + newListItemText)[0] as marked.Tokens.List;
925
if (newList.type !== 'list') {
926
// Something went wrong
927
return;
928
}
929
930
return newList;
931
}
932
933
function completeHeading(token: marked.Tokens.Heading, fullRawText: string): marked.TokensList | void {
934
if (token.raw.match(/-\s*$/)) {
935
return marked.lexer(fullRawText + ' &nbsp;');
936
}
937
}
938
939
const maxIncompleteTokensFixRounds = 3;
940
export function fillInIncompleteTokens(tokens: marked.TokensList): marked.TokensList {
941
for (let i = 0; i < maxIncompleteTokensFixRounds; i++) {
942
const newTokens = fillInIncompleteTokensOnce(tokens);
943
if (newTokens) {
944
tokens = newTokens;
945
} else {
946
break;
947
}
948
}
949
950
return tokens;
951
}
952
953
function fillInIncompleteTokensOnce(tokens: marked.TokensList): marked.TokensList | null {
954
let i: number;
955
let newTokens: marked.Token[] | undefined;
956
for (i = 0; i < tokens.length; i++) {
957
const token = tokens[i];
958
959
if (token.type === 'paragraph' && token.raw.match(/(\n|^)\|/)) {
960
newTokens = completeTable(tokens.slice(i));
961
break;
962
}
963
}
964
965
const lastToken = tokens.at(-1);
966
if (!newTokens && lastToken?.type === 'list') {
967
const newListToken = completeListItemPattern(lastToken as marked.Tokens.List);
968
if (newListToken) {
969
newTokens = [newListToken];
970
i = tokens.length - 1;
971
}
972
}
973
974
if (!newTokens && lastToken?.type === 'paragraph') {
975
// Only operates on a single token, because any newline that follows this should break these patterns
976
const newToken = completeSingleLinePattern(lastToken as marked.Tokens.Paragraph);
977
if (newToken) {
978
newTokens = [newToken];
979
i = tokens.length - 1;
980
}
981
}
982
983
if (newTokens) {
984
const newTokensList = [
985
...tokens.slice(0, i),
986
...newTokens
987
];
988
(newTokensList as marked.TokensList).links = tokens.links;
989
return newTokensList as marked.TokensList;
990
}
991
992
if (lastToken?.type === 'heading') {
993
const completeTokens = completeHeading(lastToken as marked.Tokens.Heading, mergeRawTokenText(tokens));
994
if (completeTokens) {
995
return completeTokens;
996
}
997
}
998
999
return null;
1000
}
1001
1002
1003
function completeCodespan(token: marked.Token): marked.Token {
1004
return completeWithString(token, '`');
1005
}
1006
1007
function completeStar(tokens: marked.Token): marked.Token {
1008
return completeWithString(tokens, '*');
1009
}
1010
1011
function completeUnderscore(tokens: marked.Token): marked.Token {
1012
return completeWithString(tokens, '_');
1013
}
1014
1015
function completeLinkTarget(tokens: marked.Token): marked.Token {
1016
return completeWithString(tokens, ')', false);
1017
}
1018
1019
function completeLinkTargetArg(tokens: marked.Token): marked.Token {
1020
return completeWithString(tokens, '")', false);
1021
}
1022
1023
function completeLinkText(tokens: marked.Token): marked.Token {
1024
return completeWithString(tokens, '](https://microsoft.com)', false);
1025
}
1026
1027
function completeDoublestar(tokens: marked.Token): marked.Token {
1028
return completeWithString(tokens, '**');
1029
}
1030
1031
function completeDoubleUnderscore(tokens: marked.Token): marked.Token {
1032
return completeWithString(tokens, '__');
1033
}
1034
1035
function completeWithString(tokens: marked.Token[] | marked.Token, closingString: string, shouldTrim = true): marked.Token {
1036
const mergedRawText = mergeRawTokenText(Array.isArray(tokens) ? tokens : [tokens]);
1037
1038
// If it was completed correctly, this should be a single token.
1039
// Expecting either a Paragraph or a List
1040
const trimmedRawText = shouldTrim ? mergedRawText.trimEnd() : mergedRawText;
1041
return marked.lexer(trimmedRawText + closingString)[0];
1042
}
1043
1044
function completeTable(tokens: marked.Token[]): marked.Token[] | undefined {
1045
const mergedRawText = mergeRawTokenText(tokens);
1046
const lines = mergedRawText.split('\n');
1047
1048
let numCols: number | undefined; // The number of line1 col headers
1049
let hasSeparatorRow = false;
1050
for (let i = 0; i < lines.length; i++) {
1051
const line = lines[i].trim();
1052
if (typeof numCols === 'undefined' && line.match(/^\s*\|/)) {
1053
const line1Matches = line.match(/(\|[^\|]+)(?=\||$)/g);
1054
if (line1Matches) {
1055
numCols = line1Matches.length;
1056
}
1057
} else if (typeof numCols === 'number') {
1058
if (line.match(/^\s*\|/)) {
1059
if (i !== lines.length - 1) {
1060
// We got the line1 header row, and the line2 separator row, but there are more lines, and it wasn't parsed as a table!
1061
// That's strange and means that the table is probably malformed in the source, so I won't try to patch it up.
1062
return undefined;
1063
}
1064
1065
// Got a line2 separator row- partial or complete, doesn't matter, we'll replace it with a correct one
1066
hasSeparatorRow = true;
1067
} else {
1068
// The line after the header row isn't a valid separator row, so the table is malformed, don't fix it up
1069
return undefined;
1070
}
1071
}
1072
}
1073
1074
if (typeof numCols === 'number' && numCols > 0) {
1075
const prefixText = hasSeparatorRow ? lines.slice(0, -1).join('\n') : mergedRawText;
1076
const line1EndsInPipe = !!prefixText.match(/\|\s*$/);
1077
const newRawText = prefixText + (line1EndsInPipe ? '' : '|') + `\n|${' --- |'.repeat(numCols)}`;
1078
return marked.lexer(newRawText);
1079
}
1080
1081
return undefined;
1082
}
1083
1084