CoCalc -- markdownRenderer.ts

GitHub Repository: microsoft/vscode
Path: blob/main/src/vs/base/browser/markdownRenderer.ts
³²⁹² views
1
/*---------------------------------------------------------------------------------------------
2
 *  Copyright (c) Microsoft Corporation. All rights reserved.
3
 *  Licensed under the MIT License. See License.txt in the project root for license information.
4
 *--------------------------------------------------------------------------------------------*/
5

6
import { onUnexpectedError } from '../common/errors.js';
7
import { escapeDoubleQuotes, IMarkdownString, MarkdownStringTrustedOptions, parseHrefAndDimensions, removeMarkdownEscapes } from '../common/htmlContent.js';
8
import { markdownEscapeEscapedIcons } from '../common/iconLabels.js';
9
import { defaultGenerator } from '../common/idGenerator.js';
10
import { KeyCode } from '../common/keyCodes.js';
11
import { Lazy } from '../common/lazy.js';
12
import { DisposableStore } from '../common/lifecycle.js';
13
import * as marked from '../common/marked/marked.js';
14
import { parse } from '../common/marshalling.js';
15
import { FileAccess, Schemas } from '../common/network.js';
16
import { cloneAndChange } from '../common/objects.js';
17
import { dirname, resolvePath } from '../common/resources.js';
18
import { escape } from '../common/strings.js';
19
import { URI } from '../common/uri.js';
20
import * as DOM from './dom.js';
21
import * as domSanitize from './domSanitize.js';
22
import { convertTagToPlaintext } from './domSanitize.js';
23
import { StandardKeyboardEvent } from './keyboardEvent.js';
24
import { StandardMouseEvent } from './mouseEvent.js';
25
import { renderLabelWithIcons } from './ui/iconLabel/iconLabels.js';
26

27
export type MarkdownActionHandler = (linkContent: string, mdStr: IMarkdownString) => void;
28

29
/**
30
 * Options for the rendering of markdown with {@link renderMarkdown}.
31
 */
32
export interface MarkdownRenderOptions {
33
	readonly codeBlockRenderer?: (languageId: string, value: string) => Promise<HTMLElement>;
34
	readonly codeBlockRendererSync?: (languageId: string, value: string, raw?: string) => HTMLElement;
35
	readonly asyncRenderCallback?: () => void;
36

37
	readonly actionHandler?: MarkdownActionHandler;
38

39
	readonly fillInIncompleteTokens?: boolean;
40

41
	readonly sanitizerConfig?: MarkdownSanitizerConfig;
42

43
	readonly markedOptions?: MarkdownRendererMarkedOptions;
44
	readonly markedExtensions?: marked.MarkedExtension[];
45
}
46

47
/**
48
 * Subset of options passed to `Marked` for rendering markdown.
49
 */
50
export interface MarkdownRendererMarkedOptions {
51
	readonly gfm?: boolean;
52
	readonly breaks?: boolean;
53
}
54

55
export interface MarkdownSanitizerConfig {
56
	readonly replaceWithPlaintext?: boolean;
57
	readonly allowedTags?: {
58
		readonly override: readonly string[];
59
	};
60
	readonly allowedAttributes?: {
61
		readonly override: ReadonlyArray<string | domSanitize.SanitizeAttributeRule>;
62
	};
63
	readonly allowedLinkSchemes?: {
64
		readonly augment: readonly string[];
65
	};
66
	readonly remoteImageIsAllowed?: (uri: URI) => boolean;
67
}
68

69
const defaultMarkedRenderers = Object.freeze({
70
	image: ({ href, title, text }: marked.Tokens.Image): string => {
71
		let dimensions: string[] = [];
72
		let attributes: string[] = [];
73
		if (href) {
74
			({ href, dimensions } = parseHrefAndDimensions(href));
75
			attributes.push(`src="${escapeDoubleQuotes(href)}"`);
76
		}
77
		if (text) {
78
			attributes.push(`alt="${escapeDoubleQuotes(text)}"`);
79
		}
80
		if (title) {
81
			attributes.push(`title="${escapeDoubleQuotes(title)}"`);
82
		}
83
		if (dimensions.length) {
84
			attributes = attributes.concat(dimensions);
85
		}
86
		return '<img ' + attributes.join(' ') + '>';
87
	},
88

89
	paragraph(this: marked.Renderer, { tokens }: marked.Tokens.Paragraph): string {
90
		return `<p>${this.parser.parseInline(tokens)}</p>`;
91
	},
92

93
	link(this: marked.Renderer, { href, title, tokens }: marked.Tokens.Link): string {
94
		let text = this.parser.parseInline(tokens);
95
		if (typeof href !== 'string') {
96
			return '';
97
		}
98

99
		// Remove markdown escapes. Workaround for https://github.com/chjj/marked/issues/829
100
		if (href === text) { // raw link case
101
			text = removeMarkdownEscapes(text);
102
		}
103

104
		title = typeof title === 'string' ? escapeDoubleQuotes(removeMarkdownEscapes(title)) : '';
105
		href = removeMarkdownEscapes(href);
106

107
		// HTML Encode href
108
		href = href.replace(/&/g, '&amp;')
109
			.replace(/</g, '&lt;')
110
			.replace(/>/g, '&gt;')
111
			.replace(/"/g, '&quot;')
112
			.replace(/'/g, '&#39;');
113

114
		return `<a href="${href}" title="${title || href}" draggable="false">${text}</a>`;
115
	},
116
});
117

118
/**
119
 * Low-level way create a html element from a markdown string.
120
 *
121
 * **Note** that for most cases you should be using {@link import('../../editor/browser/widget/markdownRenderer/browser/markdownRenderer.js').MarkdownRenderer MarkdownRenderer}
122
 * which comes with support for pretty code block rendering and which uses the default way of handling links.
123
 */
124
export function renderMarkdown(markdown: IMarkdownString, options: MarkdownRenderOptions = {}, target?: HTMLElement): { element: HTMLElement; dispose: () => void } {
125
	const disposables = new DisposableStore();
126
	let isDisposed = false;
127

128
	const markedInstance = new marked.Marked(...(options.markedExtensions ?? []));
129
	const { renderer, codeBlocks, syncCodeBlocks } = createMarkdownRenderer(markedInstance, options, markdown);
130
	const value = preprocessMarkdownString(markdown);
131

132
	let renderedMarkdown: string;
133
	if (options.fillInIncompleteTokens) {
134
		// The defaults are applied by parse but not lexer()/parser(), and they need to be present
135
		const opts: marked.MarkedOptions = {
136
			...markedInstance.defaults,
137
			...options.markedOptions,
138
			renderer
139
		};
140
		const tokens = markedInstance.lexer(value, opts);
141
		const newTokens = fillInIncompleteTokens(tokens);
142
		renderedMarkdown = markedInstance.parser(newTokens, opts);
143
	} else {
144
		renderedMarkdown = markedInstance.parse(value, { ...options?.markedOptions, renderer, async: false });
145
	}
146

147
	// Rewrite theme icons
148
	if (markdown.supportThemeIcons) {
149
		const elements = renderLabelWithIcons(renderedMarkdown);
150
		renderedMarkdown = elements.map(e => typeof e === 'string' ? e : e.outerHTML).join('');
151
	}
152

153
	const renderedContent = document.createElement('div');
154
	const sanitizerConfig = getDomSanitizerConfig(markdown.isTrusted ?? false, options.sanitizerConfig ?? {});
155
	domSanitize.safeSetInnerHtml(renderedContent, renderedMarkdown, sanitizerConfig);
156

157
	// Rewrite links and images before potentially inserting them into the real dom
158
	rewriteRenderedLinks(markdown, options, renderedContent);
159

160
	let outElement: HTMLElement;
161
	if (target) {
162
		outElement = target;
163
		DOM.reset(target, ...renderedContent.children);
164
	} else {
165
		outElement = renderedContent;
166
	}
167

168
	if (codeBlocks.length > 0) {
169
		Promise.all(codeBlocks).then((tuples) => {
170
			if (isDisposed) {
171
				return;
172
			}
173
			const renderedElements = new Map(tuples);
174
			const placeholderElements = outElement.querySelectorAll<HTMLDivElement>(`div[data-code]`);
175
			for (const placeholderElement of placeholderElements) {
176
				const renderedElement = renderedElements.get(placeholderElement.dataset['code'] ?? '');
177
				if (renderedElement) {
178
					DOM.reset(placeholderElement, renderedElement);
179
				}
180
			}
181
			options.asyncRenderCallback?.();
182
		});
183
	} else if (syncCodeBlocks.length > 0) {
184
		const renderedElements = new Map(syncCodeBlocks);
185
		const placeholderElements = outElement.querySelectorAll<HTMLDivElement>(`div[data-code]`);
186
		for (const placeholderElement of placeholderElements) {
187
			const renderedElement = renderedElements.get(placeholderElement.dataset['code'] ?? '');
188
			if (renderedElement) {
189
				DOM.reset(placeholderElement, renderedElement);
190
			}
191
		}
192
	}
193

194
	// Signal size changes for image tags
195
	if (options.asyncRenderCallback) {
196
		for (const img of outElement.getElementsByTagName('img')) {
197
			const listener = disposables.add(DOM.addDisposableListener(img, 'load', () => {
198
				listener.dispose();
199
				options.asyncRenderCallback!();
200
			}));
201
		}
202
	}
203

204
	// Add event listeners for links
205
	if (options.actionHandler) {
206
		const clickCb = (e: PointerEvent) => {
207
			const mouseEvent = new StandardMouseEvent(DOM.getWindow(outElement), e);
208
			if (!mouseEvent.leftButton && !mouseEvent.middleButton) {
209
				return;
210
			}
211
			activateLink(markdown, options, mouseEvent);
212
		};
213
		disposables.add(DOM.addDisposableListener(outElement, 'click', clickCb));
214
		disposables.add(DOM.addDisposableListener(outElement, 'auxclick', clickCb));
215

216
		disposables.add(DOM.addDisposableListener(outElement, 'keydown', (e) => {
217
			const keyboardEvent = new StandardKeyboardEvent(e);
218
			if (!keyboardEvent.equals(KeyCode.Space) && !keyboardEvent.equals(KeyCode.Enter)) {
219
				return;
220
			}
221
			activateLink(markdown, options, keyboardEvent);
222
		}));
223
	}
224

225
	// Remove/disable inputs
226
	for (const input of [...outElement.getElementsByTagName('input')]) {
227
		if (input.attributes.getNamedItem('type')?.value === 'checkbox') {
228
			input.setAttribute('disabled', '');
229
		} else {
230
			if (options.sanitizerConfig?.replaceWithPlaintext) {
231
				const replacement = convertTagToPlaintext(input);
232
				input.parentElement?.replaceChild(replacement, input);
233
			} else {
234
				input.remove();
235
			}
236
		}
237
	}
238

239
	return {
240
		element: outElement,
241
		dispose: () => {
242
			isDisposed = true;
243
			disposables.dispose();
244
		}
245
	};
246
}
247

248
function rewriteRenderedLinks(markdown: IMarkdownString, options: MarkdownRenderOptions, root: HTMLElement) {
249
	for (const el of root.querySelectorAll('img, audio, video, source')) {
250
		const src = el.getAttribute('src'); // Get the raw 'src' attribute value as text, not the resolved 'src'
251
		if (src) {
252
			let href = src;
253
			try {
254
				if (markdown.baseUri) { // absolute or relative local path, or file: uri
255
					href = resolveWithBaseUri(URI.from(markdown.baseUri), href);
256
				}
257
			} catch (err) { }
258

259
			el.setAttribute('src', massageHref(markdown, href, true));
260

261
			if (options.sanitizerConfig?.remoteImageIsAllowed) {
262
				const uri = URI.parse(href);
263
				if (uri.scheme !== Schemas.file && uri.scheme !== Schemas.data && !options.sanitizerConfig.remoteImageIsAllowed(uri)) {
264
					el.replaceWith(DOM.$('', undefined, el.outerHTML));
265
				}
266
			}
267
		}
268
	}
269

270
	for (const el of root.querySelectorAll('a')) {
271
		const href = el.getAttribute('href'); // Get the raw 'href' attribute value as text, not the resolved 'href'
272
		el.setAttribute('href', ''); // Clear out href. We use the `data-href` for handling clicks instead
273
		if (!href
274
			|| /^data:|javascript:/i.test(href)
275
			|| (/^command:/i.test(href) && !markdown.isTrusted)
276
			|| /^command:(\/\/\/)?_workbench\.downloadResource/i.test(href)) {
277
			// drop the link
278
			el.replaceWith(...el.childNodes);
279
		} else {
280
			let resolvedHref = massageHref(markdown, href, false);
281
			if (markdown.baseUri) {
282
				resolvedHref = resolveWithBaseUri(URI.from(markdown.baseUri), href);
283
			}
284
			el.dataset.href = resolvedHref;
285
		}
286
	}
287
}
288

289
function createMarkdownRenderer(marked: marked.Marked, options: MarkdownRenderOptions, markdown: IMarkdownString): { renderer: marked.Renderer; codeBlocks: Promise<[string, HTMLElement]>[]; syncCodeBlocks: [string, HTMLElement][] } {
290
	const renderer = new marked.Renderer(options.markedOptions);
291
	renderer.image = defaultMarkedRenderers.image;
292
	renderer.link = defaultMarkedRenderers.link;
293
	renderer.paragraph = defaultMarkedRenderers.paragraph;
294

295
	// Will collect [id, renderedElement] tuples
296
	const codeBlocks: Promise<[string, HTMLElement]>[] = [];
297
	const syncCodeBlocks: [string, HTMLElement][] = [];
298

299
	if (options.codeBlockRendererSync) {
300
		renderer.code = ({ text, lang, raw }: marked.Tokens.Code) => {
301
			const id = defaultGenerator.nextId();
302
			const value = options.codeBlockRendererSync!(postProcessCodeBlockLanguageId(lang), text, raw);
303
			syncCodeBlocks.push([id, value]);
304
			return `<div class="code" data-code="${id}">${escape(text)}</div>`;
305
		};
306
	} else if (options.codeBlockRenderer) {
307
		renderer.code = ({ text, lang }: marked.Tokens.Code) => {
308
			const id = defaultGenerator.nextId();
309
			const value = options.codeBlockRenderer!(postProcessCodeBlockLanguageId(lang), text);
310
			codeBlocks.push(value.then(element => [id, element]));
311
			return `<div class="code" data-code="${id}">${escape(text)}</div>`;
312
		};
313
	}
314

315
	if (!markdown.supportHtml) {
316
		// Note: we always pass the output through dompurify after this so that we don't rely on
317
		// marked for real sanitization.
318
		renderer.html = ({ text }) => {
319
			if (options.sanitizerConfig?.replaceWithPlaintext) {
320
				return escape(text);
321
			}
322

323
			const match = markdown.isTrusted ? text.match(/^(<span[^>]+>)|(<\/\s*span>)$/) : undefined;
324
			return match ? text : '';
325
		};
326
	}
327
	return { renderer, codeBlocks, syncCodeBlocks };
328
}
329

330
function preprocessMarkdownString(markdown: IMarkdownString) {
331
	let value = markdown.value;
332

333
	// values that are too long will freeze the UI
334
	if (value.length > 100_000) {
335
		value = `${value.substr(0, 100_000)}…`;
336
	}
337

338
	// escape theme icons
339
	if (markdown.supportThemeIcons) {
340
		value = markdownEscapeEscapedIcons(value);
341
	}
342

343
	return value;
344
}
345

346
function activateLink(mdStr: IMarkdownString, options: MarkdownRenderOptions, event: StandardMouseEvent | StandardKeyboardEvent): void {
347
	const target = event.target.closest('a[data-href]');
348
	if (!DOM.isHTMLElement(target)) {
349
		return;
350
	}
351

352
	try {
353
		let href = target.dataset['href'];
354
		if (href) {
355
			if (mdStr.baseUri) {
356
				href = resolveWithBaseUri(URI.from(mdStr.baseUri), href);
357
			}
358
			options.actionHandler?.(href, mdStr);
359
		}
360
	} catch (err) {
361
		onUnexpectedError(err);
362
	} finally {
363
		event.preventDefault();
364
	}
365
}
366

367
function uriMassage(markdown: IMarkdownString, part: string): string {
368
	let data: unknown;
369
	try {
370
		data = parse(decodeURIComponent(part));
371
	} catch (e) {
372
		// ignore
373
	}
374
	if (!data) {
375
		return part;
376
	}
377
	data = cloneAndChange(data, value => {
378
		if (markdown.uris && markdown.uris[value]) {
379
			return URI.revive(markdown.uris[value]);
380
		} else {
381
			return undefined;
382
		}
383
	});
384
	return encodeURIComponent(JSON.stringify(data));
385
}
386

387
function massageHref(markdown: IMarkdownString, href: string, isDomUri: boolean): string {
388
	const data = markdown.uris && markdown.uris[href];
389
	let uri = URI.revive(data);
390
	if (isDomUri) {
391
		if (href.startsWith(Schemas.data + ':')) {
392
			return href;
393
		}
394
		if (!uri) {
395
			uri = URI.parse(href);
396
		}
397
		// this URI will end up as "src"-attribute of a dom node
398
		// and because of that special rewriting needs to be done
399
		// so that the URI uses a protocol that's understood by
400
		// browsers (like http or https)
401
		return FileAccess.uriToBrowserUri(uri).toString(true);
402
	}
403
	if (!uri) {
404
		return href;
405
	}
406
	if (URI.parse(href).toString() === uri.toString()) {
407
		return href; // no transformation performed
408
	}
409
	if (uri.query) {
410
		uri = uri.with({ query: uriMassage(markdown, uri.query) });
411
	}
412
	return uri.toString();
413
}
414

415
function postProcessCodeBlockLanguageId(lang: string | undefined): string {
416
	if (!lang) {
417
		return '';
418
	}
419

420
	const parts = lang.split(/[\s+|:|,|\{|\?]/, 1);
421
	if (parts.length) {
422
		return parts[0];
423
	}
424
	return lang;
425
}
426

427
function resolveWithBaseUri(baseUri: URI, href: string): string {
428
	const hasScheme = /^\w[\w\d+.-]*:/.test(href);
429
	if (hasScheme) {
430
		return href;
431
	}
432

433
	if (baseUri.path.endsWith('/')) {
434
		return resolvePath(baseUri, href).toString();
435
	} else {
436
		return resolvePath(dirname(baseUri), href).toString();
437
	}
438
}
439

440
function sanitizeRenderedMarkdown(
441
	renderedMarkdown: string,
442
	isTrusted: boolean | MarkdownStringTrustedOptions,
443
	options: MarkdownSanitizerConfig = {},
444
): TrustedHTML {
445
	const sanitizerConfig = getDomSanitizerConfig(isTrusted, options);
446
	return domSanitize.sanitizeHtml(renderedMarkdown, sanitizerConfig);
447
}
448

449
export const allowedMarkdownHtmlTags = Object.freeze([
450
	...domSanitize.basicMarkupHtmlTags,
451
	'input', // Allow inputs for rendering checkboxes. Other types of inputs are removed and the inputs are always disabled
452
]);
453

454
export const allowedMarkdownHtmlAttributes = Object.freeze<Array<string | domSanitize.SanitizeAttributeRule>>([
455
	'align',
456
	'autoplay',
457
	'alt',
458
	'colspan',
459
	'controls',
460
	'draggable',
461
	'height',
462
	'href',
463
	'loop',
464
	'muted',
465
	'playsinline',
466
	'poster',
467
	'rowspan',
468
	'src',
469
	'target',
470
	'title',
471
	'type',
472
	'width',
473
	'start',
474

475
	// Input (For disabled inputs)
476
	'checked',
477
	'disabled',
478
	'value',
479

480
	// Custom markdown attributes
481
	'data-code',
482
	'data-href',
483

484
	// Only allow very specific styles
485
	{
486
		attributeName: 'style',
487
		shouldKeep: (element, data) => {
488
			if (element.tagName === 'SPAN') {
489
				if (data.attrName === 'style') {
490
					return /^(color\:(#[0-9a-fA-F]+|var\(--vscode(-[a-zA-Z0-9]+)+\));)?(background-color\:(#[0-9a-fA-F]+|var\(--vscode(-[a-zA-Z0-9]+)+\));)?(border-radius:[0-9]+px;)?$/.test(data.attrValue);
491
				}
492
			}
493
			return false;
494
		}
495
	},
496

497
	// Only allow codicons for classes
498
	{
499
		attributeName: 'class',
500
		shouldKeep: (element, data) => {
501
			if (element.tagName === 'SPAN') {
502
				if (data.attrName === 'class') {
503
					return /^codicon codicon-[a-z\-]+( codicon-modifier-[a-z\-]+)?$/.test(data.attrValue);
504
				}
505
			}
506
			return false;
507
		},
508
	},
509
]);
510

511
function getDomSanitizerConfig(isTrusted: boolean | MarkdownStringTrustedOptions, options: MarkdownSanitizerConfig): domSanitize.DomSanitizerConfig {
512
	const allowedLinkSchemes = [
513
		Schemas.http,
514
		Schemas.https,
515
		Schemas.mailto,
516
		Schemas.file,
517
		Schemas.vscodeFileResource,
518
		Schemas.vscodeRemote,
519
		Schemas.vscodeRemoteResource,
520
		Schemas.vscodeNotebookCell
521
	];
522

523
	if (isTrusted) {
524
		allowedLinkSchemes.push(Schemas.command);
525
	}
526

527
	if (options.allowedLinkSchemes?.augment) {
528
		allowedLinkSchemes.push(...options.allowedLinkSchemes.augment);
529
	}
530

531
	return {
532
		// allowedTags should included everything that markdown renders to.
533
		// Since we have our own sanitize function for marked, it's possible we missed some tag so let dompurify make sure.
534
		// HTML tags that can result from markdown are from reading https://spec.commonmark.org/0.29/
535
		// HTML table tags that can result from markdown are from https://github.github.com/gfm/#tables-extension-
536
		allowedTags: {
537
			override: options.allowedTags?.override ?? allowedMarkdownHtmlTags
538
		},
539
		allowedAttributes: {
540
			override: options.allowedAttributes?.override ?? allowedMarkdownHtmlAttributes,
541
		},
542
		allowedLinkProtocols: {
543
			override: allowedLinkSchemes,
544
		},
545
		allowedMediaProtocols: {
546
			override: [
547
				Schemas.http,
548
				Schemas.https,
549
				Schemas.data,
550
				Schemas.file,
551
				Schemas.vscodeFileResource,
552
				Schemas.vscodeRemote,
553
				Schemas.vscodeRemoteResource,
554
			]
555
		},
556
		replaceWithPlaintext: options.replaceWithPlaintext,
557
	};
558
}
559

560
/**
561
 * Renders `str` as plaintext, stripping out Markdown syntax if it's a {@link IMarkdownString}.
562
 *
563
 * For example `# Header` would be output as `Header`.
564
 */
565
export function renderAsPlaintext(str: IMarkdownString | string, options?: {
566
	/** Controls if the ``` of code blocks should be preserved in the output or not */
567
	readonly includeCodeBlocksFences?: boolean;
568
}) {
569
	if (typeof str === 'string') {
570
		return str;
571
	}
572

573
	// values that are too long will freeze the UI
574
	let value = str.value ?? '';
575
	if (value.length > 100_000) {
576
		value = `${value.substr(0, 100_000)}…`;
577
	}
578

579
	const html = marked.parse(value, { async: false, renderer: options?.includeCodeBlocksFences ? plainTextWithCodeBlocksRenderer.value : plainTextRenderer.value });
580
	return sanitizeRenderedMarkdown(html, /* isTrusted */ false, {})
581
		.toString()
582
		.replace(/&(#\d+|[a-zA-Z]+);/g, m => unescapeInfo.get(m) ?? m)
583
		.trim();
584
}
585

586
const unescapeInfo = new Map<string, string>([
587
	['&quot;', '"'],
588
	['&nbsp;', ' '],
589
	['&amp;', '&'],
590
	['&#39;', '\''],
591
	['&lt;', '<'],
592
	['&gt;', '>'],
593
]);
594

595
function createPlainTextRenderer(): marked.Renderer {
596
	const renderer = new marked.Renderer();
597

598
	renderer.code = ({ text }: marked.Tokens.Code): string => {
599
		return escape(text);
600
	};
601
	renderer.blockquote = ({ text }: marked.Tokens.Blockquote): string => {
602
		return text + '\n';
603
	};
604
	renderer.html = (_: marked.Tokens.HTML): string => {
605
		return '';
606
	};
607
	renderer.heading = function ({ tokens }: marked.Tokens.Heading): string {
608
		return this.parser.parseInline(tokens) + '\n';
609
	};
610
	renderer.hr = (): string => {
611
		return '';
612
	};
613
	renderer.list = function ({ items }: marked.Tokens.List): string {
614
		return items.map(x => this.listitem(x)).join('\n') + '\n';
615
	};
616
	renderer.listitem = ({ text }: marked.Tokens.ListItem): string => {
617
		return text + '\n';
618
	};
619
	renderer.paragraph = function ({ tokens }: marked.Tokens.Paragraph): string {
620
		return this.parser.parseInline(tokens) + '\n';
621
	};
622
	renderer.table = function ({ header, rows }: marked.Tokens.Table): string {
623
		return header.map(cell => this.tablecell(cell)).join(' ') + '\n' + rows.map(cells => cells.map(cell => this.tablecell(cell)).join(' ')).join('\n') + '\n';
624
	};
625
	renderer.tablerow = ({ text }: marked.Tokens.TableRow): string => {
626
		return text;
627
	};
628
	renderer.tablecell = function ({ tokens }: marked.Tokens.TableCell): string {
629
		return this.parser.parseInline(tokens);
630
	};
631
	renderer.strong = ({ text }: marked.Tokens.Strong): string => {
632
		return text;
633
	};
634
	renderer.em = ({ text }: marked.Tokens.Em): string => {
635
		return text;
636
	};
637
	renderer.codespan = ({ text }: marked.Tokens.Codespan): string => {
638
		return escape(text);
639
	};
640
	renderer.br = (_: marked.Tokens.Br): string => {
641
		return '\n';
642
	};
643
	renderer.del = ({ text }: marked.Tokens.Del): string => {
644
		return text;
645
	};
646
	renderer.image = (_: marked.Tokens.Image): string => {
647
		return '';
648
	};
649
	renderer.text = ({ text }: marked.Tokens.Text): string => {
650
		return text;
651
	};
652
	renderer.link = ({ text }: marked.Tokens.Link): string => {
653
		return text;
654
	};
655
	return renderer;
656
}
657
const plainTextRenderer = new Lazy<marked.Renderer>(createPlainTextRenderer);
658

659
const plainTextWithCodeBlocksRenderer = new Lazy<marked.Renderer>(() => {
660
	const renderer = createPlainTextRenderer();
661
	renderer.code = ({ text }: marked.Tokens.Code): string => {
662
		return `\n\`\`\`\n${escape(text)}\n\`\`\`\n`;
663
	};
664
	return renderer;
665
});
666

667
function mergeRawTokenText(tokens: marked.Token[]): string {
668
	let mergedTokenText = '';
669
	tokens.forEach(token => {
670
		mergedTokenText += token.raw;
671
	});
672
	return mergedTokenText;
673
}
674

675
function completeSingleLinePattern(token: marked.Tokens.Text | marked.Tokens.Paragraph): marked.Token | undefined {
676
	if (!token.tokens) {
677
		return undefined;
678
	}
679

680
	for (let i = token.tokens.length - 1; i >= 0; i--) {
681
		const subtoken = token.tokens[i];
682
		if (subtoken.type === 'text') {
683
			const lines = subtoken.raw.split('\n');
684
			const lastLine = lines[lines.length - 1];
685
			if (lastLine.includes('`')) {
686
				return completeCodespan(token);
687
			}
688

689
			else if (lastLine.includes('**')) {
690
				return completeDoublestar(token);
691
			}
692

693
			else if (lastLine.match(/\*\w/)) {
694
				return completeStar(token);
695
			}
696

697
			else if (lastLine.match(/(^|\s)__\w/)) {
698
				return completeDoubleUnderscore(token);
699
			}
700

701
			else if (lastLine.match(/(^|\s)_\w/)) {
702
				return completeUnderscore(token);
703
			}
704

705
			else if (
706
				// Text with start of link target
707
				hasLinkTextAndStartOfLinkTarget(lastLine) ||
708
				// This token doesn't have the link text, eg if it contains other markdown constructs that are in other subtokens.
709
				// But some preceding token does have an unbalanced [ at least
710
				hasStartOfLinkTargetAndNoLinkText(lastLine) && token.tokens.slice(0, i).some(t => t.type === 'text' && t.raw.match(/\[[^\]]*$/))
711
			) {
712
				const nextTwoSubTokens = token.tokens.slice(i + 1);
713

714
				// A markdown link can look like
715
				// [link text](https://microsoft.com "more text")
716
				// Where "more text" is a title for the link or an argument to a vscode command link
717
				if (
718
					// If the link was parsed as a link, then look for a link token and a text token with a quote
719
					nextTwoSubTokens[0]?.type === 'link' && nextTwoSubTokens[1]?.type === 'text' && nextTwoSubTokens[1].raw.match(/^ *"[^"]*$/) ||
720
					// And if the link was not parsed as a link (eg command link), just look for a single quote in this token
721
					lastLine.match(/^[^"]* +"[^"]*$/)
722
				) {
723

724
					return completeLinkTargetArg(token);
725
				}
726
				return completeLinkTarget(token);
727
			}
728

729
			// Contains the start of link text, and no following tokens contain the link target
730
			else if (lastLine.match(/(^|\s)\[\w*[^\]]*$/)) {
731
				return completeLinkText(token);
732
			}
733
		}
734
	}
735

736
	return undefined;
737
}
738

739
function hasLinkTextAndStartOfLinkTarget(str: string): boolean {
740
	return !!str.match(/(^|\s)\[.*\]\(\w*/);
741
}
742

743
function hasStartOfLinkTargetAndNoLinkText(str: string): boolean {
744
	return !!str.match(/^[^\[]*\]\([^\)]*$/);
745
}
746

747
function completeListItemPattern(list: marked.Tokens.List): marked.Tokens.List | undefined {
748
	// Patch up this one list item
749
	const lastListItem = list.items[list.items.length - 1];
750
	const lastListSubToken = lastListItem.tokens ? lastListItem.tokens[lastListItem.tokens.length - 1] : undefined;
751

752
	/*
753
	Example list token structures:
754

755
	list
756
		list_item
757
			text
758
				text
759
				codespan
760
				link
761
		list_item
762
			text
763
			code // Complete indented codeblock
764
		list_item
765
			text
766
			space
767
			text
768
				text // Incomplete indented codeblock
769
		list_item
770
			text
771
			list // Nested list
772
				list_item
773
					text
774
						text
775

776
	Contrast with paragraph:
777
	paragraph
778
		text
779
		codespan
780
	*/
781

782
	const listEndsInHeading = (list: marked.Tokens.List): boolean => {
783
		// A list item can be rendered as a heading for some reason when it has a subitem where we haven't rendered the text yet like this:
784
		// 1. list item
785
		//    -
786
		const lastItem = list.items.at(-1);
787
		const lastToken = lastItem?.tokens.at(-1);
788
		return lastToken?.type === 'heading' || lastToken?.type === 'list' && listEndsInHeading(lastToken as marked.Tokens.List);
789
	};
790

791
	let newToken: marked.Token | undefined;
792
	if (lastListSubToken?.type === 'text' && !('inRawBlock' in lastListItem)) { // Why does Tag have a type of 'text'
793
		newToken = completeSingleLinePattern(lastListSubToken as marked.Tokens.Text);
794
	} else if (listEndsInHeading(list)) {
795
		const newList = marked.lexer(list.raw.trim() + ' &nbsp;')[0] as marked.Tokens.List;
796
		if (newList.type !== 'list') {
797
			// Something went wrong
798
			return;
799
		}
800
		return newList;
801
	}
802

803
	if (!newToken || newToken.type !== 'paragraph') { // 'text' item inside the list item turns into paragraph
804
		// Nothing to fix, or not a pattern we were expecting
805
		return;
806
	}
807

808
	const previousListItemsText = mergeRawTokenText(list.items.slice(0, -1));
809

810
	// Grabbing the `- ` or `1. ` or `* ` off the list item because I can't find a better way to do this
811
	const lastListItemLead = lastListItem.raw.match(/^(\s*(-|\d+\.|\*) +)/)?.[0];
812
	if (!lastListItemLead) {
813
		// Is badly formatted
814
		return;
815
	}
816

817
	const newListItemText = lastListItemLead +
818
		mergeRawTokenText(lastListItem.tokens.slice(0, -1)) +
819
		newToken.raw;
820

821
	const newList = marked.lexer(previousListItemsText + newListItemText)[0] as marked.Tokens.List;
822
	if (newList.type !== 'list') {
823
		// Something went wrong
824
		return;
825
	}
826

827
	return newList;
828
}
829

830
function completeHeading(token: marked.Tokens.Heading, fullRawText: string): marked.TokensList | void {
831
	if (token.raw.match(/-\s*$/)) {
832
		return marked.lexer(fullRawText + ' &nbsp;');
833
	}
834
}
835

836
const maxIncompleteTokensFixRounds = 3;
837
export function fillInIncompleteTokens(tokens: marked.TokensList): marked.TokensList {
838
	for (let i = 0; i < maxIncompleteTokensFixRounds; i++) {
839
		const newTokens = fillInIncompleteTokensOnce(tokens);
840
		if (newTokens) {
841
			tokens = newTokens;
842
		} else {
843
			break;
844
		}
845
	}
846

847
	return tokens;
848
}
849

850
function fillInIncompleteTokensOnce(tokens: marked.TokensList): marked.TokensList | null {
851
	let i: number;
852
	let newTokens: marked.Token[] | undefined;
853
	for (i = 0; i < tokens.length; i++) {
854
		const token = tokens[i];
855

856
		if (token.type === 'paragraph' && token.raw.match(/(\n|^)\|/)) {
857
			newTokens = completeTable(tokens.slice(i));
858
			break;
859
		}
860
	}
861

862
	const lastToken = tokens.at(-1);
863
	if (!newTokens && lastToken?.type === 'list') {
864
		const newListToken = completeListItemPattern(lastToken as marked.Tokens.List);
865
		if (newListToken) {
866
			newTokens = [newListToken];
867
			i = tokens.length - 1;
868
		}
869
	}
870

871
	if (!newTokens && lastToken?.type === 'paragraph') {
872
		// Only operates on a single token, because any newline that follows this should break these patterns
873
		const newToken = completeSingleLinePattern(lastToken as marked.Tokens.Paragraph);
874
		if (newToken) {
875
			newTokens = [newToken];
876
			i = tokens.length - 1;
877
		}
878
	}
879

880
	if (newTokens) {
881
		const newTokensList = [
882
			...tokens.slice(0, i),
883
			...newTokens
884
		];
885
		(newTokensList as marked.TokensList).links = tokens.links;
886
		return newTokensList as marked.TokensList;
887
	}
888

889
	if (lastToken?.type === 'heading') {
890
		const completeTokens = completeHeading(lastToken as marked.Tokens.Heading, mergeRawTokenText(tokens));
891
		if (completeTokens) {
892
			return completeTokens;
893
		}
894
	}
895

896
	return null;
897
}
898

899

900
function completeCodespan(token: marked.Token): marked.Token {
901
	return completeWithString(token, '`');
902
}
903

904
function completeStar(tokens: marked.Token): marked.Token {
905
	return completeWithString(tokens, '*');
906
}
907

908
function completeUnderscore(tokens: marked.Token): marked.Token {
909
	return completeWithString(tokens, '_');
910
}
911

912
function completeLinkTarget(tokens: marked.Token): marked.Token {
913
	return completeWithString(tokens, ')', false);
914
}
915

916
function completeLinkTargetArg(tokens: marked.Token): marked.Token {
917
	return completeWithString(tokens, '")', false);
918
}
919

920
function completeLinkText(tokens: marked.Token): marked.Token {
921
	return completeWithString(tokens, '](https://microsoft.com)', false);
922
}
923

924
function completeDoublestar(tokens: marked.Token): marked.Token {
925
	return completeWithString(tokens, '**');
926
}
927

928
function completeDoubleUnderscore(tokens: marked.Token): marked.Token {
929
	return completeWithString(tokens, '__');
930
}
931

932
function completeWithString(tokens: marked.Token[] | marked.Token, closingString: string, shouldTrim = true): marked.Token {
933
	const mergedRawText = mergeRawTokenText(Array.isArray(tokens) ? tokens : [tokens]);
934

935
	// If it was completed correctly, this should be a single token.
936
	// Expecting either a Paragraph or a List
937
	const trimmedRawText = shouldTrim ? mergedRawText.trimEnd() : mergedRawText;
938
	return marked.lexer(trimmedRawText + closingString)[0] as marked.Token;
939
}
940

941
function completeTable(tokens: marked.Token[]): marked.Token[] | undefined {
942
	const mergedRawText = mergeRawTokenText(tokens);
943
	const lines = mergedRawText.split('\n');
944

945
	let numCols: number | undefined; // The number of line1 col headers
946
	let hasSeparatorRow = false;
947
	for (let i = 0; i < lines.length; i++) {
948
		const line = lines[i].trim();
949
		if (typeof numCols === 'undefined' && line.match(/^\s*\|/)) {
950
			const line1Matches = line.match(/(\|[^\|]+)(?=\||$)/g);
951
			if (line1Matches) {
952
				numCols = line1Matches.length;
953
			}
954
		} else if (typeof numCols === 'number') {
955
			if (line.match(/^\s*\|/)) {
956
				if (i !== lines.length - 1) {
957
					// We got the line1 header row, and the line2 separator row, but there are more lines, and it wasn't parsed as a table!
958
					// That's strange and means that the table is probably malformed in the source, so I won't try to patch it up.
959
					return undefined;
960
				}
961

962
				// Got a line2 separator row- partial or complete, doesn't matter, we'll replace it with a correct one
963
				hasSeparatorRow = true;
964
			} else {
965
				// The line after the header row isn't a valid separator row, so the table is malformed, don't fix it up
966
				return undefined;
967
			}
968
		}
969
	}
970

971
	if (typeof numCols === 'number' && numCols > 0) {
972
		const prefixText = hasSeparatorRow ? lines.slice(0, -1).join('\n') : mergedRawText;
973
		const line1EndsInPipe = !!prefixText.match(/\|\s*$/);
974
		const newRawText = prefixText + (line1EndsInPipe ? '' : '|') + `\n|${' --- |'.repeat(numCols)}`;
975
		return marked.lexer(newRawText);
976
	}
977

978
	return undefined;
979
}
980

981

982
Product

Resources

Company