Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/src/vs/base/browser/domSanitize.ts
5251 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import { Schemas } from '../common/network.js';
7
import { reset } from './dom.js';
8
// eslint-disable-next-line no-restricted-imports
9
import dompurify, * as DomPurifyTypes from './dompurify/dompurify.js';
10
11
/**
12
* List of safe, non-input html tags.
13
*/
14
export const basicMarkupHtmlTags = Object.freeze([
15
'a',
16
'abbr',
17
'b',
18
'bdo',
19
'blockquote',
20
'br',
21
'caption',
22
'cite',
23
'code',
24
'col',
25
'colgroup',
26
'dd',
27
'del',
28
'details',
29
'dfn',
30
'div',
31
'dl',
32
'dt',
33
'em',
34
'figcaption',
35
'figure',
36
'h1',
37
'h2',
38
'h3',
39
'h4',
40
'h5',
41
'h6',
42
'hr',
43
'i',
44
'img',
45
'ins',
46
'kbd',
47
'label',
48
'li',
49
'mark',
50
'ol',
51
'p',
52
'pre',
53
'q',
54
'rp',
55
'rt',
56
'ruby',
57
's',
58
'samp',
59
'small',
60
'small',
61
'source',
62
'span',
63
'strike',
64
'strong',
65
'sub',
66
'summary',
67
'sup',
68
'table',
69
'tbody',
70
'td',
71
'tfoot',
72
'th',
73
'thead',
74
'time',
75
'tr',
76
'tt',
77
'u',
78
'ul',
79
'var',
80
'video',
81
'wbr',
82
]);
83
84
export const defaultAllowedAttrs = Object.freeze([
85
'href',
86
'target',
87
'src',
88
'alt',
89
'title',
90
'for',
91
'name',
92
'role',
93
'tabindex',
94
'x-dispatch',
95
'required',
96
'checked',
97
'placeholder',
98
'type',
99
'start',
100
'width',
101
'height',
102
'align',
103
]);
104
105
106
const fakeRelativeUrlProtocol = 'vscode-relative-path';
107
108
interface AllowedLinksConfig {
109
readonly override: readonly string[] | '*';
110
readonly allowRelativePaths: boolean;
111
}
112
113
function validateLink(value: string, allowedProtocols: AllowedLinksConfig): boolean {
114
if (allowedProtocols.override === '*') {
115
return true; // allow all protocols
116
}
117
118
try {
119
const url = new URL(value, fakeRelativeUrlProtocol + '://');
120
if (allowedProtocols.override.includes(url.protocol.replace(/:$/, ''))) {
121
return true;
122
}
123
124
if (allowedProtocols.allowRelativePaths
125
&& url.protocol === fakeRelativeUrlProtocol + ':'
126
&& !value.trim().toLowerCase().startsWith(fakeRelativeUrlProtocol)
127
) {
128
return true;
129
}
130
131
return false;
132
} catch (e) {
133
return false;
134
}
135
}
136
137
/**
138
* Hooks dompurify using `afterSanitizeAttributes` to check that all `href` and `src`
139
* attributes are valid.
140
*/
141
function hookDomPurifyHrefAndSrcSanitizer(allowedLinkProtocols: AllowedLinksConfig, allowedMediaProtocols: AllowedLinksConfig) {
142
dompurify.addHook('afterSanitizeAttributes', (node) => {
143
// check all href/src attributes for validity
144
for (const attr of ['href', 'src']) {
145
if (node.hasAttribute(attr)) {
146
const attrValue = node.getAttribute(attr) as string;
147
if (attr === 'href') {
148
if (!attrValue.startsWith('#') && !validateLink(attrValue, allowedLinkProtocols)) {
149
node.removeAttribute(attr);
150
}
151
} else { // 'src'
152
if (!validateLink(attrValue, allowedMediaProtocols)) {
153
node.removeAttribute(attr);
154
}
155
}
156
}
157
}
158
});
159
}
160
161
/**
162
* Predicate that checks if an attribute should be kept or removed.
163
*
164
* @returns A boolean indicating whether the attribute should be kept or a string with the sanitized value (which implicitly keeps the attribute)
165
*/
166
export type SanitizeAttributePredicate = (node: Element, data: { readonly attrName: string; readonly attrValue: string }) => boolean | string;
167
168
export interface SanitizeAttributeRule {
169
readonly attributeName: string;
170
shouldKeep: SanitizeAttributePredicate;
171
}
172
173
174
export interface DomSanitizerConfig {
175
/**
176
* Configured the allowed html tags.
177
*/
178
readonly allowedTags?: {
179
readonly override?: readonly string[];
180
readonly augment?: readonly string[];
181
};
182
183
/**
184
* Configured the allowed html attributes.
185
*/
186
readonly allowedAttributes?: {
187
readonly override?: ReadonlyArray<string | SanitizeAttributeRule>;
188
readonly augment?: ReadonlyArray<string | SanitizeAttributeRule>;
189
};
190
191
/**
192
* List of allowed protocols for `href` attributes.
193
*/
194
readonly allowedLinkProtocols?: {
195
readonly override?: readonly string[] | '*';
196
};
197
198
/**
199
* If set, allows relative paths for links.
200
*/
201
readonly allowRelativeLinkPaths?: boolean;
202
203
/**
204
* List of allowed protocols for `src` attributes.
205
*/
206
readonly allowedMediaProtocols?: {
207
readonly override?: readonly string[] | '*';
208
};
209
210
/**
211
* If set, allows relative paths for media (images, videos, etc).
212
*/
213
readonly allowRelativeMediaPaths?: boolean;
214
215
/**
216
* If set, replaces unsupported tags with their plaintext representation instead of removing them.
217
*
218
* For example, <p><bad>"text"</bad></p> becomes <p>"<bad>text</bad>"</p>.
219
*/
220
readonly replaceWithPlaintext?: boolean;
221
}
222
223
const defaultDomPurifyConfig = Object.freeze({
224
ALLOWED_TAGS: [...basicMarkupHtmlTags],
225
ALLOWED_ATTR: [...defaultAllowedAttrs],
226
// We sanitize the src/href attributes later if needed
227
ALLOW_UNKNOWN_PROTOCOLS: true,
228
} satisfies DomPurifyTypes.Config);
229
230
/**
231
* Sanitizes an html string.
232
*
233
* @param untrusted The HTML string to sanitize.
234
* @param config Optional configuration for sanitization. If not provided, defaults to a safe configuration.
235
*
236
* @returns A sanitized string of html.
237
*/
238
export function sanitizeHtml(untrusted: string, config?: DomSanitizerConfig): TrustedHTML {
239
return doSanitizeHtml(untrusted, config, 'trusted');
240
}
241
242
function doSanitizeHtml(untrusted: string, config: DomSanitizerConfig | undefined, outputType: 'dom'): DocumentFragment;
243
function doSanitizeHtml(untrusted: string, config: DomSanitizerConfig | undefined, outputType: 'trusted'): TrustedHTML;
244
function doSanitizeHtml(untrusted: string, config: DomSanitizerConfig | undefined, outputType: 'dom' | 'trusted'): TrustedHTML | DocumentFragment {
245
try {
246
const resolvedConfig: DomPurifyTypes.Config = { ...defaultDomPurifyConfig };
247
248
if (config?.allowedTags) {
249
if (config.allowedTags.override) {
250
resolvedConfig.ALLOWED_TAGS = [...config.allowedTags.override];
251
}
252
253
if (config.allowedTags.augment) {
254
resolvedConfig.ALLOWED_TAGS = [...(resolvedConfig.ALLOWED_TAGS ?? []), ...config.allowedTags.augment];
255
}
256
}
257
258
let resolvedAttributes: Array<string | SanitizeAttributeRule> = [...defaultAllowedAttrs];
259
if (config?.allowedAttributes) {
260
if (config.allowedAttributes.override) {
261
resolvedAttributes = [...config.allowedAttributes.override];
262
}
263
264
if (config.allowedAttributes.augment) {
265
resolvedAttributes = [...resolvedAttributes, ...config.allowedAttributes.augment];
266
}
267
}
268
269
// All attr names are lower-case in the sanitizer hooks
270
resolvedAttributes = resolvedAttributes.map((attr): string | SanitizeAttributeRule => {
271
if (typeof attr === 'string') {
272
return attr.toLowerCase();
273
}
274
return {
275
attributeName: attr.attributeName.toLowerCase(),
276
shouldKeep: attr.shouldKeep,
277
};
278
});
279
280
const allowedAttrNames = new Set(resolvedAttributes.map(attr => typeof attr === 'string' ? attr : attr.attributeName));
281
const allowedAttrPredicates = new Map<string, SanitizeAttributeRule>();
282
for (const attr of resolvedAttributes) {
283
if (typeof attr === 'string') {
284
// New string attribute value clears previously set predicates
285
allowedAttrPredicates.delete(attr);
286
} else {
287
allowedAttrPredicates.set(attr.attributeName, attr);
288
}
289
}
290
291
resolvedConfig.ALLOWED_ATTR = Array.from(allowedAttrNames);
292
293
hookDomPurifyHrefAndSrcSanitizer(
294
{
295
override: config?.allowedLinkProtocols?.override ?? [Schemas.http, Schemas.https],
296
allowRelativePaths: config?.allowRelativeLinkPaths ?? false
297
},
298
{
299
override: config?.allowedMediaProtocols?.override ?? [Schemas.http, Schemas.https],
300
allowRelativePaths: config?.allowRelativeMediaPaths ?? false
301
});
302
303
if (config?.replaceWithPlaintext) {
304
dompurify.addHook('uponSanitizeElement', replaceWithPlainTextHook);
305
}
306
307
if (allowedAttrPredicates.size) {
308
dompurify.addHook('uponSanitizeAttribute', (node, e) => {
309
const predicate = allowedAttrPredicates.get(e.attrName);
310
if (predicate) {
311
const result = predicate.shouldKeep(node, e);
312
if (typeof result === 'string') {
313
e.keepAttr = true;
314
e.attrValue = result;
315
} else {
316
e.keepAttr = result;
317
}
318
} else {
319
e.keepAttr = allowedAttrNames.has(e.attrName);
320
}
321
});
322
}
323
324
if (outputType === 'dom') {
325
return dompurify.sanitize(untrusted, {
326
...resolvedConfig,
327
RETURN_DOM_FRAGMENT: true
328
});
329
} else {
330
return dompurify.sanitize(untrusted, {
331
...resolvedConfig,
332
RETURN_TRUSTED_TYPE: true
333
}) as unknown as TrustedHTML; // Cast from lib TrustedHTML to global TrustedHTML
334
}
335
} finally {
336
dompurify.removeAllHooks();
337
}
338
}
339
340
const selfClosingTags = ['area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr'];
341
342
const replaceWithPlainTextHook: DomPurifyTypes.UponSanitizeElementHook = (node, data, _config) => {
343
if (!data.allowedTags[data.tagName] && data.tagName !== 'body') {
344
const replacement = convertTagToPlaintext(node);
345
if (replacement) {
346
if (node.nodeType === Node.COMMENT_NODE) {
347
// Workaround for https://github.com/cure53/DOMPurify/issues/1005
348
// The comment will be deleted in the next phase. However if we try to remove it now, it will cause
349
// an exception. Instead we insert the text node before the comment.
350
node.parentElement?.insertBefore(replacement, node);
351
} else {
352
node.parentElement?.replaceChild(replacement, node);
353
}
354
}
355
}
356
};
357
358
export function convertTagToPlaintext(node: Node): DocumentFragment | undefined {
359
if (!node.ownerDocument) {
360
return;
361
}
362
363
let startTagText: string;
364
let endTagText: string | undefined;
365
if (node.nodeType === Node.COMMENT_NODE) {
366
startTagText = `<!--${node.textContent}-->`;
367
} else if (node instanceof Element) {
368
const tagName = node.tagName.toLowerCase();
369
const isSelfClosing = selfClosingTags.includes(tagName);
370
const attrString = node.attributes.length ?
371
' ' + Array.from(node.attributes)
372
.map(attr => `${attr.name}="${attr.value}"`)
373
.join(' ')
374
: '';
375
startTagText = `<${tagName}${attrString}>`;
376
if (!isSelfClosing) {
377
endTagText = `</${tagName}>`;
378
}
379
} else {
380
return;
381
}
382
383
const fragment = document.createDocumentFragment();
384
const textNode = node.ownerDocument.createTextNode(startTagText);
385
fragment.appendChild(textNode);
386
while (node.firstChild) {
387
fragment.appendChild(node.firstChild);
388
}
389
390
const endTagTextNode = endTagText ? node.ownerDocument.createTextNode(endTagText) : undefined;
391
if (endTagTextNode) {
392
fragment.appendChild(endTagTextNode);
393
}
394
395
return fragment;
396
}
397
398
/**
399
* Sanitizes the given `value` and reset the given `node` with it.
400
*/
401
export function safeSetInnerHtml(node: HTMLElement, untrusted: string, config?: DomSanitizerConfig): void {
402
const fragment = doSanitizeHtml(untrusted, config, 'dom');
403
reset(node, fragment);
404
}
405
406