Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/src/vs/base/browser/markdownRenderer.ts
3292 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import { onUnexpectedError } from '../common/errors.js';
7
import { escapeDoubleQuotes, IMarkdownString, MarkdownStringTrustedOptions, parseHrefAndDimensions, removeMarkdownEscapes } from '../common/htmlContent.js';
8
import { markdownEscapeEscapedIcons } from '../common/iconLabels.js';
9
import { defaultGenerator } from '../common/idGenerator.js';
10
import { KeyCode } from '../common/keyCodes.js';
11
import { Lazy } from '../common/lazy.js';
12
import { DisposableStore } from '../common/lifecycle.js';
13
import * as marked from '../common/marked/marked.js';
14
import { parse } from '../common/marshalling.js';
15
import { FileAccess, Schemas } from '../common/network.js';
16
import { cloneAndChange } from '../common/objects.js';
17
import { dirname, resolvePath } from '../common/resources.js';
18
import { escape } from '../common/strings.js';
19
import { URI } from '../common/uri.js';
20
import * as DOM from './dom.js';
21
import * as domSanitize from './domSanitize.js';
22
import { convertTagToPlaintext } from './domSanitize.js';
23
import { StandardKeyboardEvent } from './keyboardEvent.js';
24
import { StandardMouseEvent } from './mouseEvent.js';
25
import { renderLabelWithIcons } from './ui/iconLabel/iconLabels.js';
26
27
export type MarkdownActionHandler = (linkContent: string, mdStr: IMarkdownString) => void;
28
29
/**
30
* Options for the rendering of markdown with {@link renderMarkdown}.
31
*/
32
export interface MarkdownRenderOptions {
33
readonly codeBlockRenderer?: (languageId: string, value: string) => Promise<HTMLElement>;
34
readonly codeBlockRendererSync?: (languageId: string, value: string, raw?: string) => HTMLElement;
35
readonly asyncRenderCallback?: () => void;
36
37
readonly actionHandler?: MarkdownActionHandler;
38
39
readonly fillInIncompleteTokens?: boolean;
40
41
readonly sanitizerConfig?: MarkdownSanitizerConfig;
42
43
readonly markedOptions?: MarkdownRendererMarkedOptions;
44
readonly markedExtensions?: marked.MarkedExtension[];
45
}
46
47
/**
48
* Subset of options passed to `Marked` for rendering markdown.
49
*/
50
export interface MarkdownRendererMarkedOptions {
51
readonly gfm?: boolean;
52
readonly breaks?: boolean;
53
}
54
55
export interface MarkdownSanitizerConfig {
56
readonly replaceWithPlaintext?: boolean;
57
readonly allowedTags?: {
58
readonly override: readonly string[];
59
};
60
readonly allowedAttributes?: {
61
readonly override: ReadonlyArray<string | domSanitize.SanitizeAttributeRule>;
62
};
63
readonly allowedLinkSchemes?: {
64
readonly augment: readonly string[];
65
};
66
readonly remoteImageIsAllowed?: (uri: URI) => boolean;
67
}
68
69
const defaultMarkedRenderers = Object.freeze({
70
image: ({ href, title, text }: marked.Tokens.Image): string => {
71
let dimensions: string[] = [];
72
let attributes: string[] = [];
73
if (href) {
74
({ href, dimensions } = parseHrefAndDimensions(href));
75
attributes.push(`src="${escapeDoubleQuotes(href)}"`);
76
}
77
if (text) {
78
attributes.push(`alt="${escapeDoubleQuotes(text)}"`);
79
}
80
if (title) {
81
attributes.push(`title="${escapeDoubleQuotes(title)}"`);
82
}
83
if (dimensions.length) {
84
attributes = attributes.concat(dimensions);
85
}
86
return '<img ' + attributes.join(' ') + '>';
87
},
88
89
paragraph(this: marked.Renderer, { tokens }: marked.Tokens.Paragraph): string {
90
return `<p>${this.parser.parseInline(tokens)}</p>`;
91
},
92
93
link(this: marked.Renderer, { href, title, tokens }: marked.Tokens.Link): string {
94
let text = this.parser.parseInline(tokens);
95
if (typeof href !== 'string') {
96
return '';
97
}
98
99
// Remove markdown escapes. Workaround for https://github.com/chjj/marked/issues/829
100
if (href === text) { // raw link case
101
text = removeMarkdownEscapes(text);
102
}
103
104
title = typeof title === 'string' ? escapeDoubleQuotes(removeMarkdownEscapes(title)) : '';
105
href = removeMarkdownEscapes(href);
106
107
// HTML Encode href
108
href = href.replace(/&/g, '&amp;')
109
.replace(/</g, '&lt;')
110
.replace(/>/g, '&gt;')
111
.replace(/"/g, '&quot;')
112
.replace(/'/g, '&#39;');
113
114
return `<a href="${href}" title="${title || href}" draggable="false">${text}</a>`;
115
},
116
});
117
118
/**
119
* Low-level way create a html element from a markdown string.
120
*
121
* **Note** that for most cases you should be using {@link import('../../editor/browser/widget/markdownRenderer/browser/markdownRenderer.js').MarkdownRenderer MarkdownRenderer}
122
* which comes with support for pretty code block rendering and which uses the default way of handling links.
123
*/
124
export function renderMarkdown(markdown: IMarkdownString, options: MarkdownRenderOptions = {}, target?: HTMLElement): { element: HTMLElement; dispose: () => void } {
125
const disposables = new DisposableStore();
126
let isDisposed = false;
127
128
const markedInstance = new marked.Marked(...(options.markedExtensions ?? []));
129
const { renderer, codeBlocks, syncCodeBlocks } = createMarkdownRenderer(markedInstance, options, markdown);
130
const value = preprocessMarkdownString(markdown);
131
132
let renderedMarkdown: string;
133
if (options.fillInIncompleteTokens) {
134
// The defaults are applied by parse but not lexer()/parser(), and they need to be present
135
const opts: marked.MarkedOptions = {
136
...markedInstance.defaults,
137
...options.markedOptions,
138
renderer
139
};
140
const tokens = markedInstance.lexer(value, opts);
141
const newTokens = fillInIncompleteTokens(tokens);
142
renderedMarkdown = markedInstance.parser(newTokens, opts);
143
} else {
144
renderedMarkdown = markedInstance.parse(value, { ...options?.markedOptions, renderer, async: false });
145
}
146
147
// Rewrite theme icons
148
if (markdown.supportThemeIcons) {
149
const elements = renderLabelWithIcons(renderedMarkdown);
150
renderedMarkdown = elements.map(e => typeof e === 'string' ? e : e.outerHTML).join('');
151
}
152
153
const renderedContent = document.createElement('div');
154
const sanitizerConfig = getDomSanitizerConfig(markdown.isTrusted ?? false, options.sanitizerConfig ?? {});
155
domSanitize.safeSetInnerHtml(renderedContent, renderedMarkdown, sanitizerConfig);
156
157
// Rewrite links and images before potentially inserting them into the real dom
158
rewriteRenderedLinks(markdown, options, renderedContent);
159
160
let outElement: HTMLElement;
161
if (target) {
162
outElement = target;
163
DOM.reset(target, ...renderedContent.children);
164
} else {
165
outElement = renderedContent;
166
}
167
168
if (codeBlocks.length > 0) {
169
Promise.all(codeBlocks).then((tuples) => {
170
if (isDisposed) {
171
return;
172
}
173
const renderedElements = new Map(tuples);
174
const placeholderElements = outElement.querySelectorAll<HTMLDivElement>(`div[data-code]`);
175
for (const placeholderElement of placeholderElements) {
176
const renderedElement = renderedElements.get(placeholderElement.dataset['code'] ?? '');
177
if (renderedElement) {
178
DOM.reset(placeholderElement, renderedElement);
179
}
180
}
181
options.asyncRenderCallback?.();
182
});
183
} else if (syncCodeBlocks.length > 0) {
184
const renderedElements = new Map(syncCodeBlocks);
185
const placeholderElements = outElement.querySelectorAll<HTMLDivElement>(`div[data-code]`);
186
for (const placeholderElement of placeholderElements) {
187
const renderedElement = renderedElements.get(placeholderElement.dataset['code'] ?? '');
188
if (renderedElement) {
189
DOM.reset(placeholderElement, renderedElement);
190
}
191
}
192
}
193
194
// Signal size changes for image tags
195
if (options.asyncRenderCallback) {
196
for (const img of outElement.getElementsByTagName('img')) {
197
const listener = disposables.add(DOM.addDisposableListener(img, 'load', () => {
198
listener.dispose();
199
options.asyncRenderCallback!();
200
}));
201
}
202
}
203
204
// Add event listeners for links
205
if (options.actionHandler) {
206
const clickCb = (e: PointerEvent) => {
207
const mouseEvent = new StandardMouseEvent(DOM.getWindow(outElement), e);
208
if (!mouseEvent.leftButton && !mouseEvent.middleButton) {
209
return;
210
}
211
activateLink(markdown, options, mouseEvent);
212
};
213
disposables.add(DOM.addDisposableListener(outElement, 'click', clickCb));
214
disposables.add(DOM.addDisposableListener(outElement, 'auxclick', clickCb));
215
216
disposables.add(DOM.addDisposableListener(outElement, 'keydown', (e) => {
217
const keyboardEvent = new StandardKeyboardEvent(e);
218
if (!keyboardEvent.equals(KeyCode.Space) && !keyboardEvent.equals(KeyCode.Enter)) {
219
return;
220
}
221
activateLink(markdown, options, keyboardEvent);
222
}));
223
}
224
225
// Remove/disable inputs
226
for (const input of [...outElement.getElementsByTagName('input')]) {
227
if (input.attributes.getNamedItem('type')?.value === 'checkbox') {
228
input.setAttribute('disabled', '');
229
} else {
230
if (options.sanitizerConfig?.replaceWithPlaintext) {
231
const replacement = convertTagToPlaintext(input);
232
input.parentElement?.replaceChild(replacement, input);
233
} else {
234
input.remove();
235
}
236
}
237
}
238
239
return {
240
element: outElement,
241
dispose: () => {
242
isDisposed = true;
243
disposables.dispose();
244
}
245
};
246
}
247
248
function rewriteRenderedLinks(markdown: IMarkdownString, options: MarkdownRenderOptions, root: HTMLElement) {
249
for (const el of root.querySelectorAll('img, audio, video, source')) {
250
const src = el.getAttribute('src'); // Get the raw 'src' attribute value as text, not the resolved 'src'
251
if (src) {
252
let href = src;
253
try {
254
if (markdown.baseUri) { // absolute or relative local path, or file: uri
255
href = resolveWithBaseUri(URI.from(markdown.baseUri), href);
256
}
257
} catch (err) { }
258
259
el.setAttribute('src', massageHref(markdown, href, true));
260
261
if (options.sanitizerConfig?.remoteImageIsAllowed) {
262
const uri = URI.parse(href);
263
if (uri.scheme !== Schemas.file && uri.scheme !== Schemas.data && !options.sanitizerConfig.remoteImageIsAllowed(uri)) {
264
el.replaceWith(DOM.$('', undefined, el.outerHTML));
265
}
266
}
267
}
268
}
269
270
for (const el of root.querySelectorAll('a')) {
271
const href = el.getAttribute('href'); // Get the raw 'href' attribute value as text, not the resolved 'href'
272
el.setAttribute('href', ''); // Clear out href. We use the `data-href` for handling clicks instead
273
if (!href
274
|| /^data:|javascript:/i.test(href)
275
|| (/^command:/i.test(href) && !markdown.isTrusted)
276
|| /^command:(\/\/\/)?_workbench\.downloadResource/i.test(href)) {
277
// drop the link
278
el.replaceWith(...el.childNodes);
279
} else {
280
let resolvedHref = massageHref(markdown, href, false);
281
if (markdown.baseUri) {
282
resolvedHref = resolveWithBaseUri(URI.from(markdown.baseUri), href);
283
}
284
el.dataset.href = resolvedHref;
285
}
286
}
287
}
288
289
function createMarkdownRenderer(marked: marked.Marked, options: MarkdownRenderOptions, markdown: IMarkdownString): { renderer: marked.Renderer; codeBlocks: Promise<[string, HTMLElement]>[]; syncCodeBlocks: [string, HTMLElement][] } {
290
const renderer = new marked.Renderer(options.markedOptions);
291
renderer.image = defaultMarkedRenderers.image;
292
renderer.link = defaultMarkedRenderers.link;
293
renderer.paragraph = defaultMarkedRenderers.paragraph;
294
295
// Will collect [id, renderedElement] tuples
296
const codeBlocks: Promise<[string, HTMLElement]>[] = [];
297
const syncCodeBlocks: [string, HTMLElement][] = [];
298
299
if (options.codeBlockRendererSync) {
300
renderer.code = ({ text, lang, raw }: marked.Tokens.Code) => {
301
const id = defaultGenerator.nextId();
302
const value = options.codeBlockRendererSync!(postProcessCodeBlockLanguageId(lang), text, raw);
303
syncCodeBlocks.push([id, value]);
304
return `<div class="code" data-code="${id}">${escape(text)}</div>`;
305
};
306
} else if (options.codeBlockRenderer) {
307
renderer.code = ({ text, lang }: marked.Tokens.Code) => {
308
const id = defaultGenerator.nextId();
309
const value = options.codeBlockRenderer!(postProcessCodeBlockLanguageId(lang), text);
310
codeBlocks.push(value.then(element => [id, element]));
311
return `<div class="code" data-code="${id}">${escape(text)}</div>`;
312
};
313
}
314
315
if (!markdown.supportHtml) {
316
// Note: we always pass the output through dompurify after this so that we don't rely on
317
// marked for real sanitization.
318
renderer.html = ({ text }) => {
319
if (options.sanitizerConfig?.replaceWithPlaintext) {
320
return escape(text);
321
}
322
323
const match = markdown.isTrusted ? text.match(/^(<span[^>]+>)|(<\/\s*span>)$/) : undefined;
324
return match ? text : '';
325
};
326
}
327
return { renderer, codeBlocks, syncCodeBlocks };
328
}
329
330
function preprocessMarkdownString(markdown: IMarkdownString) {
331
let value = markdown.value;
332
333
// values that are too long will freeze the UI
334
if (value.length > 100_000) {
335
value = `${value.substr(0, 100_000)}…`;
336
}
337
338
// escape theme icons
339
if (markdown.supportThemeIcons) {
340
value = markdownEscapeEscapedIcons(value);
341
}
342
343
return value;
344
}
345
346
function activateLink(mdStr: IMarkdownString, options: MarkdownRenderOptions, event: StandardMouseEvent | StandardKeyboardEvent): void {
347
const target = event.target.closest('a[data-href]');
348
if (!DOM.isHTMLElement(target)) {
349
return;
350
}
351
352
try {
353
let href = target.dataset['href'];
354
if (href) {
355
if (mdStr.baseUri) {
356
href = resolveWithBaseUri(URI.from(mdStr.baseUri), href);
357
}
358
options.actionHandler?.(href, mdStr);
359
}
360
} catch (err) {
361
onUnexpectedError(err);
362
} finally {
363
event.preventDefault();
364
}
365
}
366
367
function uriMassage(markdown: IMarkdownString, part: string): string {
368
let data: unknown;
369
try {
370
data = parse(decodeURIComponent(part));
371
} catch (e) {
372
// ignore
373
}
374
if (!data) {
375
return part;
376
}
377
data = cloneAndChange(data, value => {
378
if (markdown.uris && markdown.uris[value]) {
379
return URI.revive(markdown.uris[value]);
380
} else {
381
return undefined;
382
}
383
});
384
return encodeURIComponent(JSON.stringify(data));
385
}
386
387
function massageHref(markdown: IMarkdownString, href: string, isDomUri: boolean): string {
388
const data = markdown.uris && markdown.uris[href];
389
let uri = URI.revive(data);
390
if (isDomUri) {
391
if (href.startsWith(Schemas.data + ':')) {
392
return href;
393
}
394
if (!uri) {
395
uri = URI.parse(href);
396
}
397
// this URI will end up as "src"-attribute of a dom node
398
// and because of that special rewriting needs to be done
399
// so that the URI uses a protocol that's understood by
400
// browsers (like http or https)
401
return FileAccess.uriToBrowserUri(uri).toString(true);
402
}
403
if (!uri) {
404
return href;
405
}
406
if (URI.parse(href).toString() === uri.toString()) {
407
return href; // no transformation performed
408
}
409
if (uri.query) {
410
uri = uri.with({ query: uriMassage(markdown, uri.query) });
411
}
412
return uri.toString();
413
}
414
415
function postProcessCodeBlockLanguageId(lang: string | undefined): string {
416
if (!lang) {
417
return '';
418
}
419
420
const parts = lang.split(/[\s+|:|,|\{|\?]/, 1);
421
if (parts.length) {
422
return parts[0];
423
}
424
return lang;
425
}
426
427
function resolveWithBaseUri(baseUri: URI, href: string): string {
428
const hasScheme = /^\w[\w\d+.-]*:/.test(href);
429
if (hasScheme) {
430
return href;
431
}
432
433
if (baseUri.path.endsWith('/')) {
434
return resolvePath(baseUri, href).toString();
435
} else {
436
return resolvePath(dirname(baseUri), href).toString();
437
}
438
}
439
440
function sanitizeRenderedMarkdown(
441
renderedMarkdown: string,
442
isTrusted: boolean | MarkdownStringTrustedOptions,
443
options: MarkdownSanitizerConfig = {},
444
): TrustedHTML {
445
const sanitizerConfig = getDomSanitizerConfig(isTrusted, options);
446
return domSanitize.sanitizeHtml(renderedMarkdown, sanitizerConfig);
447
}
448
449
export const allowedMarkdownHtmlTags = Object.freeze([
450
...domSanitize.basicMarkupHtmlTags,
451
'input', // Allow inputs for rendering checkboxes. Other types of inputs are removed and the inputs are always disabled
452
]);
453
454
export const allowedMarkdownHtmlAttributes = Object.freeze<Array<string | domSanitize.SanitizeAttributeRule>>([
455
'align',
456
'autoplay',
457
'alt',
458
'colspan',
459
'controls',
460
'draggable',
461
'height',
462
'href',
463
'loop',
464
'muted',
465
'playsinline',
466
'poster',
467
'rowspan',
468
'src',
469
'target',
470
'title',
471
'type',
472
'width',
473
'start',
474
475
// Input (For disabled inputs)
476
'checked',
477
'disabled',
478
'value',
479
480
// Custom markdown attributes
481
'data-code',
482
'data-href',
483
484
// Only allow very specific styles
485
{
486
attributeName: 'style',
487
shouldKeep: (element, data) => {
488
if (element.tagName === 'SPAN') {
489
if (data.attrName === 'style') {
490
return /^(color\:(#[0-9a-fA-F]+|var\(--vscode(-[a-zA-Z0-9]+)+\));)?(background-color\:(#[0-9a-fA-F]+|var\(--vscode(-[a-zA-Z0-9]+)+\));)?(border-radius:[0-9]+px;)?$/.test(data.attrValue);
491
}
492
}
493
return false;
494
}
495
},
496
497
// Only allow codicons for classes
498
{
499
attributeName: 'class',
500
shouldKeep: (element, data) => {
501
if (element.tagName === 'SPAN') {
502
if (data.attrName === 'class') {
503
return /^codicon codicon-[a-z\-]+( codicon-modifier-[a-z\-]+)?$/.test(data.attrValue);
504
}
505
}
506
return false;
507
},
508
},
509
]);
510
511
function getDomSanitizerConfig(isTrusted: boolean | MarkdownStringTrustedOptions, options: MarkdownSanitizerConfig): domSanitize.DomSanitizerConfig {
512
const allowedLinkSchemes = [
513
Schemas.http,
514
Schemas.https,
515
Schemas.mailto,
516
Schemas.file,
517
Schemas.vscodeFileResource,
518
Schemas.vscodeRemote,
519
Schemas.vscodeRemoteResource,
520
Schemas.vscodeNotebookCell
521
];
522
523
if (isTrusted) {
524
allowedLinkSchemes.push(Schemas.command);
525
}
526
527
if (options.allowedLinkSchemes?.augment) {
528
allowedLinkSchemes.push(...options.allowedLinkSchemes.augment);
529
}
530
531
return {
532
// allowedTags should included everything that markdown renders to.
533
// Since we have our own sanitize function for marked, it's possible we missed some tag so let dompurify make sure.
534
// HTML tags that can result from markdown are from reading https://spec.commonmark.org/0.29/
535
// HTML table tags that can result from markdown are from https://github.github.com/gfm/#tables-extension-
536
allowedTags: {
537
override: options.allowedTags?.override ?? allowedMarkdownHtmlTags
538
},
539
allowedAttributes: {
540
override: options.allowedAttributes?.override ?? allowedMarkdownHtmlAttributes,
541
},
542
allowedLinkProtocols: {
543
override: allowedLinkSchemes,
544
},
545
allowedMediaProtocols: {
546
override: [
547
Schemas.http,
548
Schemas.https,
549
Schemas.data,
550
Schemas.file,
551
Schemas.vscodeFileResource,
552
Schemas.vscodeRemote,
553
Schemas.vscodeRemoteResource,
554
]
555
},
556
replaceWithPlaintext: options.replaceWithPlaintext,
557
};
558
}
559
560
/**
561
* Renders `str` as plaintext, stripping out Markdown syntax if it's a {@link IMarkdownString}.
562
*
563
* For example `# Header` would be output as `Header`.
564
*/
565
export function renderAsPlaintext(str: IMarkdownString | string, options?: {
566
/** Controls if the ``` of code blocks should be preserved in the output or not */
567
readonly includeCodeBlocksFences?: boolean;
568
}) {
569
if (typeof str === 'string') {
570
return str;
571
}
572
573
// values that are too long will freeze the UI
574
let value = str.value ?? '';
575
if (value.length > 100_000) {
576
value = `${value.substr(0, 100_000)}…`;
577
}
578
579
const html = marked.parse(value, { async: false, renderer: options?.includeCodeBlocksFences ? plainTextWithCodeBlocksRenderer.value : plainTextRenderer.value });
580
return sanitizeRenderedMarkdown(html, /* isTrusted */ false, {})
581
.toString()
582
.replace(/&(#\d+|[a-zA-Z]+);/g, m => unescapeInfo.get(m) ?? m)
583
.trim();
584
}
585
586
const unescapeInfo = new Map<string, string>([
587
['&quot;', '"'],
588
['&nbsp;', ' '],
589
['&amp;', '&'],
590
['&#39;', '\''],
591
['&lt;', '<'],
592
['&gt;', '>'],
593
]);
594
595
function createPlainTextRenderer(): marked.Renderer {
596
const renderer = new marked.Renderer();
597
598
renderer.code = ({ text }: marked.Tokens.Code): string => {
599
return escape(text);
600
};
601
renderer.blockquote = ({ text }: marked.Tokens.Blockquote): string => {
602
return text + '\n';
603
};
604
renderer.html = (_: marked.Tokens.HTML): string => {
605
return '';
606
};
607
renderer.heading = function ({ tokens }: marked.Tokens.Heading): string {
608
return this.parser.parseInline(tokens) + '\n';
609
};
610
renderer.hr = (): string => {
611
return '';
612
};
613
renderer.list = function ({ items }: marked.Tokens.List): string {
614
return items.map(x => this.listitem(x)).join('\n') + '\n';
615
};
616
renderer.listitem = ({ text }: marked.Tokens.ListItem): string => {
617
return text + '\n';
618
};
619
renderer.paragraph = function ({ tokens }: marked.Tokens.Paragraph): string {
620
return this.parser.parseInline(tokens) + '\n';
621
};
622
renderer.table = function ({ header, rows }: marked.Tokens.Table): string {
623
return header.map(cell => this.tablecell(cell)).join(' ') + '\n' + rows.map(cells => cells.map(cell => this.tablecell(cell)).join(' ')).join('\n') + '\n';
624
};
625
renderer.tablerow = ({ text }: marked.Tokens.TableRow): string => {
626
return text;
627
};
628
renderer.tablecell = function ({ tokens }: marked.Tokens.TableCell): string {
629
return this.parser.parseInline(tokens);
630
};
631
renderer.strong = ({ text }: marked.Tokens.Strong): string => {
632
return text;
633
};
634
renderer.em = ({ text }: marked.Tokens.Em): string => {
635
return text;
636
};
637
renderer.codespan = ({ text }: marked.Tokens.Codespan): string => {
638
return escape(text);
639
};
640
renderer.br = (_: marked.Tokens.Br): string => {
641
return '\n';
642
};
643
renderer.del = ({ text }: marked.Tokens.Del): string => {
644
return text;
645
};
646
renderer.image = (_: marked.Tokens.Image): string => {
647
return '';
648
};
649
renderer.text = ({ text }: marked.Tokens.Text): string => {
650
return text;
651
};
652
renderer.link = ({ text }: marked.Tokens.Link): string => {
653
return text;
654
};
655
return renderer;
656
}
657
const plainTextRenderer = new Lazy<marked.Renderer>(createPlainTextRenderer);
658
659
const plainTextWithCodeBlocksRenderer = new Lazy<marked.Renderer>(() => {
660
const renderer = createPlainTextRenderer();
661
renderer.code = ({ text }: marked.Tokens.Code): string => {
662
return `\n\`\`\`\n${escape(text)}\n\`\`\`\n`;
663
};
664
return renderer;
665
});
666
667
function mergeRawTokenText(tokens: marked.Token[]): string {
668
let mergedTokenText = '';
669
tokens.forEach(token => {
670
mergedTokenText += token.raw;
671
});
672
return mergedTokenText;
673
}
674
675
function completeSingleLinePattern(token: marked.Tokens.Text | marked.Tokens.Paragraph): marked.Token | undefined {
676
if (!token.tokens) {
677
return undefined;
678
}
679
680
for (let i = token.tokens.length - 1; i >= 0; i--) {
681
const subtoken = token.tokens[i];
682
if (subtoken.type === 'text') {
683
const lines = subtoken.raw.split('\n');
684
const lastLine = lines[lines.length - 1];
685
if (lastLine.includes('`')) {
686
return completeCodespan(token);
687
}
688
689
else if (lastLine.includes('**')) {
690
return completeDoublestar(token);
691
}
692
693
else if (lastLine.match(/\*\w/)) {
694
return completeStar(token);
695
}
696
697
else if (lastLine.match(/(^|\s)__\w/)) {
698
return completeDoubleUnderscore(token);
699
}
700
701
else if (lastLine.match(/(^|\s)_\w/)) {
702
return completeUnderscore(token);
703
}
704
705
else if (
706
// Text with start of link target
707
hasLinkTextAndStartOfLinkTarget(lastLine) ||
708
// This token doesn't have the link text, eg if it contains other markdown constructs that are in other subtokens.
709
// But some preceding token does have an unbalanced [ at least
710
hasStartOfLinkTargetAndNoLinkText(lastLine) && token.tokens.slice(0, i).some(t => t.type === 'text' && t.raw.match(/\[[^\]]*$/))
711
) {
712
const nextTwoSubTokens = token.tokens.slice(i + 1);
713
714
// A markdown link can look like
715
// [link text](https://microsoft.com "more text")
716
// Where "more text" is a title for the link or an argument to a vscode command link
717
if (
718
// If the link was parsed as a link, then look for a link token and a text token with a quote
719
nextTwoSubTokens[0]?.type === 'link' && nextTwoSubTokens[1]?.type === 'text' && nextTwoSubTokens[1].raw.match(/^ *"[^"]*$/) ||
720
// And if the link was not parsed as a link (eg command link), just look for a single quote in this token
721
lastLine.match(/^[^"]* +"[^"]*$/)
722
) {
723
724
return completeLinkTargetArg(token);
725
}
726
return completeLinkTarget(token);
727
}
728
729
// Contains the start of link text, and no following tokens contain the link target
730
else if (lastLine.match(/(^|\s)\[\w*[^\]]*$/)) {
731
return completeLinkText(token);
732
}
733
}
734
}
735
736
return undefined;
737
}
738
739
function hasLinkTextAndStartOfLinkTarget(str: string): boolean {
740
return !!str.match(/(^|\s)\[.*\]\(\w*/);
741
}
742
743
function hasStartOfLinkTargetAndNoLinkText(str: string): boolean {
744
return !!str.match(/^[^\[]*\]\([^\)]*$/);
745
}
746
747
function completeListItemPattern(list: marked.Tokens.List): marked.Tokens.List | undefined {
748
// Patch up this one list item
749
const lastListItem = list.items[list.items.length - 1];
750
const lastListSubToken = lastListItem.tokens ? lastListItem.tokens[lastListItem.tokens.length - 1] : undefined;
751
752
/*
753
Example list token structures:
754
755
list
756
list_item
757
text
758
text
759
codespan
760
link
761
list_item
762
text
763
code // Complete indented codeblock
764
list_item
765
text
766
space
767
text
768
text // Incomplete indented codeblock
769
list_item
770
text
771
list // Nested list
772
list_item
773
text
774
text
775
776
Contrast with paragraph:
777
paragraph
778
text
779
codespan
780
*/
781
782
const listEndsInHeading = (list: marked.Tokens.List): boolean => {
783
// A list item can be rendered as a heading for some reason when it has a subitem where we haven't rendered the text yet like this:
784
// 1. list item
785
// -
786
const lastItem = list.items.at(-1);
787
const lastToken = lastItem?.tokens.at(-1);
788
return lastToken?.type === 'heading' || lastToken?.type === 'list' && listEndsInHeading(lastToken as marked.Tokens.List);
789
};
790
791
let newToken: marked.Token | undefined;
792
if (lastListSubToken?.type === 'text' && !('inRawBlock' in lastListItem)) { // Why does Tag have a type of 'text'
793
newToken = completeSingleLinePattern(lastListSubToken as marked.Tokens.Text);
794
} else if (listEndsInHeading(list)) {
795
const newList = marked.lexer(list.raw.trim() + ' &nbsp;')[0] as marked.Tokens.List;
796
if (newList.type !== 'list') {
797
// Something went wrong
798
return;
799
}
800
return newList;
801
}
802
803
if (!newToken || newToken.type !== 'paragraph') { // 'text' item inside the list item turns into paragraph
804
// Nothing to fix, or not a pattern we were expecting
805
return;
806
}
807
808
const previousListItemsText = mergeRawTokenText(list.items.slice(0, -1));
809
810
// Grabbing the `- ` or `1. ` or `* ` off the list item because I can't find a better way to do this
811
const lastListItemLead = lastListItem.raw.match(/^(\s*(-|\d+\.|\*) +)/)?.[0];
812
if (!lastListItemLead) {
813
// Is badly formatted
814
return;
815
}
816
817
const newListItemText = lastListItemLead +
818
mergeRawTokenText(lastListItem.tokens.slice(0, -1)) +
819
newToken.raw;
820
821
const newList = marked.lexer(previousListItemsText + newListItemText)[0] as marked.Tokens.List;
822
if (newList.type !== 'list') {
823
// Something went wrong
824
return;
825
}
826
827
return newList;
828
}
829
830
function completeHeading(token: marked.Tokens.Heading, fullRawText: string): marked.TokensList | void {
831
if (token.raw.match(/-\s*$/)) {
832
return marked.lexer(fullRawText + ' &nbsp;');
833
}
834
}
835
836
const maxIncompleteTokensFixRounds = 3;
837
export function fillInIncompleteTokens(tokens: marked.TokensList): marked.TokensList {
838
for (let i = 0; i < maxIncompleteTokensFixRounds; i++) {
839
const newTokens = fillInIncompleteTokensOnce(tokens);
840
if (newTokens) {
841
tokens = newTokens;
842
} else {
843
break;
844
}
845
}
846
847
return tokens;
848
}
849
850
function fillInIncompleteTokensOnce(tokens: marked.TokensList): marked.TokensList | null {
851
let i: number;
852
let newTokens: marked.Token[] | undefined;
853
for (i = 0; i < tokens.length; i++) {
854
const token = tokens[i];
855
856
if (token.type === 'paragraph' && token.raw.match(/(\n|^)\|/)) {
857
newTokens = completeTable(tokens.slice(i));
858
break;
859
}
860
}
861
862
const lastToken = tokens.at(-1);
863
if (!newTokens && lastToken?.type === 'list') {
864
const newListToken = completeListItemPattern(lastToken as marked.Tokens.List);
865
if (newListToken) {
866
newTokens = [newListToken];
867
i = tokens.length - 1;
868
}
869
}
870
871
if (!newTokens && lastToken?.type === 'paragraph') {
872
// Only operates on a single token, because any newline that follows this should break these patterns
873
const newToken = completeSingleLinePattern(lastToken as marked.Tokens.Paragraph);
874
if (newToken) {
875
newTokens = [newToken];
876
i = tokens.length - 1;
877
}
878
}
879
880
if (newTokens) {
881
const newTokensList = [
882
...tokens.slice(0, i),
883
...newTokens
884
];
885
(newTokensList as marked.TokensList).links = tokens.links;
886
return newTokensList as marked.TokensList;
887
}
888
889
if (lastToken?.type === 'heading') {
890
const completeTokens = completeHeading(lastToken as marked.Tokens.Heading, mergeRawTokenText(tokens));
891
if (completeTokens) {
892
return completeTokens;
893
}
894
}
895
896
return null;
897
}
898
899
900
function completeCodespan(token: marked.Token): marked.Token {
901
return completeWithString(token, '`');
902
}
903
904
function completeStar(tokens: marked.Token): marked.Token {
905
return completeWithString(tokens, '*');
906
}
907
908
function completeUnderscore(tokens: marked.Token): marked.Token {
909
return completeWithString(tokens, '_');
910
}
911
912
function completeLinkTarget(tokens: marked.Token): marked.Token {
913
return completeWithString(tokens, ')', false);
914
}
915
916
function completeLinkTargetArg(tokens: marked.Token): marked.Token {
917
return completeWithString(tokens, '")', false);
918
}
919
920
function completeLinkText(tokens: marked.Token): marked.Token {
921
return completeWithString(tokens, '](https://microsoft.com)', false);
922
}
923
924
function completeDoublestar(tokens: marked.Token): marked.Token {
925
return completeWithString(tokens, '**');
926
}
927
928
function completeDoubleUnderscore(tokens: marked.Token): marked.Token {
929
return completeWithString(tokens, '__');
930
}
931
932
function completeWithString(tokens: marked.Token[] | marked.Token, closingString: string, shouldTrim = true): marked.Token {
933
const mergedRawText = mergeRawTokenText(Array.isArray(tokens) ? tokens : [tokens]);
934
935
// If it was completed correctly, this should be a single token.
936
// Expecting either a Paragraph or a List
937
const trimmedRawText = shouldTrim ? mergedRawText.trimEnd() : mergedRawText;
938
return marked.lexer(trimmedRawText + closingString)[0] as marked.Token;
939
}
940
941
function completeTable(tokens: marked.Token[]): marked.Token[] | undefined {
942
const mergedRawText = mergeRawTokenText(tokens);
943
const lines = mergedRawText.split('\n');
944
945
let numCols: number | undefined; // The number of line1 col headers
946
let hasSeparatorRow = false;
947
for (let i = 0; i < lines.length; i++) {
948
const line = lines[i].trim();
949
if (typeof numCols === 'undefined' && line.match(/^\s*\|/)) {
950
const line1Matches = line.match(/(\|[^\|]+)(?=\||$)/g);
951
if (line1Matches) {
952
numCols = line1Matches.length;
953
}
954
} else if (typeof numCols === 'number') {
955
if (line.match(/^\s*\|/)) {
956
if (i !== lines.length - 1) {
957
// We got the line1 header row, and the line2 separator row, but there are more lines, and it wasn't parsed as a table!
958
// That's strange and means that the table is probably malformed in the source, so I won't try to patch it up.
959
return undefined;
960
}
961
962
// Got a line2 separator row- partial or complete, doesn't matter, we'll replace it with a correct one
963
hasSeparatorRow = true;
964
} else {
965
// The line after the header row isn't a valid separator row, so the table is malformed, don't fix it up
966
return undefined;
967
}
968
}
969
}
970
971
if (typeof numCols === 'number' && numCols > 0) {
972
const prefixText = hasSeparatorRow ? lines.slice(0, -1).join('\n') : mergedRawText;
973
const line1EndsInPipe = !!prefixText.match(/\|\s*$/);
974
const newRawText = prefixText + (line1EndsInPipe ? '' : '|') + `\n|${' --- |'.repeat(numCols)}`;
975
return marked.lexer(newRawText);
976
}
977
978
return undefined;
979
}
980
981
982