Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/src/vs/base/browser/domSanitize.ts
3292 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
import { DisposableStore, IDisposable, toDisposable } from '../common/lifecycle.js';
7
import { Schemas } from '../common/network.js';
8
import { reset } from './dom.js';
9
import dompurify from './dompurify/dompurify.js';
10
11
12
/**
13
* List of safe, non-input html tags.
14
*/
15
export const basicMarkupHtmlTags = Object.freeze([
16
'a',
17
'abbr',
18
'b',
19
'bdo',
20
'blockquote',
21
'br',
22
'caption',
23
'cite',
24
'code',
25
'col',
26
'colgroup',
27
'dd',
28
'del',
29
'details',
30
'dfn',
31
'div',
32
'dl',
33
'dt',
34
'em',
35
'figcaption',
36
'figure',
37
'h1',
38
'h2',
39
'h3',
40
'h4',
41
'h5',
42
'h6',
43
'hr',
44
'i',
45
'img',
46
'ins',
47
'kbd',
48
'label',
49
'li',
50
'mark',
51
'ol',
52
'p',
53
'pre',
54
'q',
55
'rp',
56
'rt',
57
'ruby',
58
'samp',
59
'small',
60
'small',
61
'source',
62
'span',
63
'strike',
64
'strong',
65
'sub',
66
'summary',
67
'sup',
68
'table',
69
'tbody',
70
'td',
71
'tfoot',
72
'th',
73
'thead',
74
'time',
75
'tr',
76
'tt',
77
'u',
78
'ul',
79
'var',
80
'video',
81
'wbr',
82
]);
83
84
export const defaultAllowedAttrs = Object.freeze([
85
'href',
86
'target',
87
'src',
88
'alt',
89
'title',
90
'for',
91
'name',
92
'role',
93
'tabindex',
94
'x-dispatch',
95
'required',
96
'checked',
97
'placeholder',
98
'type',
99
'start',
100
'width',
101
'height',
102
'align',
103
]);
104
105
106
type UponSanitizeElementCb = (currentNode: Element, data: dompurify.SanitizeElementHookEvent, config: dompurify.Config) => void;
107
type UponSanitizeAttributeCb = (currentNode: Element, data: dompurify.SanitizeAttributeHookEvent, config: dompurify.Config) => void;
108
109
function addDompurifyHook(hook: 'uponSanitizeElement', cb: UponSanitizeElementCb): IDisposable;
110
function addDompurifyHook(hook: 'uponSanitizeAttribute', cb: UponSanitizeAttributeCb): IDisposable;
111
function addDompurifyHook(hook: 'uponSanitizeElement' | 'uponSanitizeAttribute', cb: any): IDisposable {
112
dompurify.addHook(hook, cb);
113
return toDisposable(() => dompurify.removeHook(hook));
114
}
115
116
/**
117
* Hooks dompurify using `afterSanitizeAttributes` to check that all `href` and `src`
118
* attributes are valid.
119
*/
120
function hookDomPurifyHrefAndSrcSanitizer(allowedLinkProtocols: readonly string[] | '*', allowedMediaProtocols: readonly string[]): IDisposable {
121
// https://github.com/cure53/DOMPurify/blob/main/demos/hooks-scheme-allowlist.html
122
// build an anchor to map URLs to
123
const anchor = document.createElement('a');
124
125
function validateLink(value: string, allowedProtocols: readonly string[] | '*'): boolean {
126
if (allowedProtocols === '*') {
127
return true; // allow all protocols
128
}
129
130
anchor.href = value;
131
return allowedProtocols.includes(anchor.protocol.replace(/:$/, ''));
132
}
133
134
dompurify.addHook('afterSanitizeAttributes', (node) => {
135
// check all href/src attributes for validity
136
for (const attr of ['href', 'src']) {
137
if (node.hasAttribute(attr)) {
138
const attrValue = node.getAttribute(attr) as string;
139
if (attr === 'href') {
140
141
if (!attrValue.startsWith('#') && !validateLink(attrValue, allowedLinkProtocols)) {
142
node.removeAttribute(attr);
143
}
144
145
} else {// 'src'
146
if (!validateLink(attrValue, allowedMediaProtocols)) {
147
node.removeAttribute(attr);
148
}
149
}
150
}
151
}
152
});
153
154
return toDisposable(() => dompurify.removeHook('afterSanitizeAttributes'));
155
}
156
157
/**
158
* Predicate that checks if an attribute should be kept or removed.
159
*
160
* @returns A boolean indicating whether the attribute should be kept or a string with the sanitized value (which implicitly keeps the attribute)
161
*/
162
export type SanitizeAttributePredicate = (node: Element, data: { readonly attrName: string; readonly attrValue: string }) => boolean | string;
163
164
export interface SanitizeAttributeRule {
165
readonly attributeName: string;
166
shouldKeep: SanitizeAttributePredicate;
167
}
168
169
export interface DomSanitizerConfig {
170
/**
171
* Configured the allowed html tags.
172
*/
173
readonly allowedTags?: {
174
readonly override?: readonly string[];
175
readonly augment?: readonly string[];
176
};
177
178
/**
179
* Configured the allowed html attributes.
180
*/
181
readonly allowedAttributes?: {
182
readonly override?: ReadonlyArray<string | SanitizeAttributeRule>;
183
readonly augment?: ReadonlyArray<string | SanitizeAttributeRule>;
184
};
185
186
/**
187
* List of allowed protocols for `href` attributes.
188
*/
189
readonly allowedLinkProtocols?: {
190
readonly override?: readonly string[] | '*';
191
};
192
193
/**
194
* List of allowed protocols for `src` attributes.
195
*/
196
readonly allowedMediaProtocols?: {
197
readonly override?: readonly string[];
198
};
199
200
/**
201
* If set, replaces unsupported tags with their plaintext representation instead of removing them.
202
*
203
* For example, <p><bad>"text"</bad></p> becomes <p>"<bad>text</bad>"</p>.
204
*/
205
readonly replaceWithPlaintext?: boolean;
206
}
207
208
const defaultDomPurifyConfig = Object.freeze({
209
ALLOWED_TAGS: [...basicMarkupHtmlTags],
210
ALLOWED_ATTR: [...defaultAllowedAttrs],
211
// We sanitize the src/href attributes later if needed
212
ALLOW_UNKNOWN_PROTOCOLS: true,
213
} satisfies dompurify.Config);
214
215
/**
216
* Sanitizes an html string.
217
*
218
* @param untrusted The HTML string to sanitize.
219
* @param config Optional configuration for sanitization. If not provided, defaults to a safe configuration.
220
*
221
* @returns A sanitized string of html.
222
*/
223
export function sanitizeHtml(untrusted: string, config?: DomSanitizerConfig): TrustedHTML {
224
return doSanitizeHtml(untrusted, config, 'trusted');
225
}
226
227
function doSanitizeHtml(untrusted: string, config: DomSanitizerConfig | undefined, outputType: 'dom'): DocumentFragment;
228
function doSanitizeHtml(untrusted: string, config: DomSanitizerConfig | undefined, outputType: 'trusted'): TrustedHTML;
229
function doSanitizeHtml(untrusted: string, config: DomSanitizerConfig | undefined, outputType: 'dom' | 'trusted'): TrustedHTML | DocumentFragment {
230
const store = new DisposableStore();
231
try {
232
const resolvedConfig: dompurify.Config = { ...defaultDomPurifyConfig };
233
234
if (config?.allowedTags) {
235
if (config.allowedTags.override) {
236
resolvedConfig.ALLOWED_TAGS = [...config.allowedTags.override];
237
}
238
239
if (config.allowedTags.augment) {
240
resolvedConfig.ALLOWED_TAGS = [...(resolvedConfig.ALLOWED_TAGS ?? []), ...config.allowedTags.augment];
241
}
242
}
243
244
let resolvedAttributes: Array<string | SanitizeAttributeRule> = [...defaultAllowedAttrs];
245
if (config?.allowedAttributes) {
246
if (config.allowedAttributes.override) {
247
resolvedAttributes = [...config.allowedAttributes.override];
248
}
249
250
if (config.allowedAttributes.augment) {
251
resolvedAttributes = [...resolvedAttributes, ...config.allowedAttributes.augment];
252
}
253
}
254
255
// All attr names are lower-case in the sanitizer hooks
256
resolvedAttributes = resolvedAttributes.map((attr): string | SanitizeAttributeRule => {
257
if (typeof attr === 'string') {
258
return attr.toLowerCase();
259
}
260
return {
261
attributeName: attr.attributeName.toLowerCase(),
262
shouldKeep: attr.shouldKeep,
263
};
264
});
265
266
const allowedAttrNames = new Set(resolvedAttributes.map(attr => typeof attr === 'string' ? attr : attr.attributeName));
267
const allowedAttrPredicates = new Map<string, SanitizeAttributeRule>();
268
for (const attr of resolvedAttributes) {
269
if (typeof attr === 'string') {
270
// New string attribute value clears previously set predicates
271
allowedAttrPredicates.delete(attr);
272
} else {
273
allowedAttrPredicates.set(attr.attributeName, attr);
274
}
275
}
276
277
resolvedConfig.ALLOWED_ATTR = Array.from(allowedAttrNames);
278
279
store.add(hookDomPurifyHrefAndSrcSanitizer(
280
config?.allowedLinkProtocols?.override ?? [Schemas.http, Schemas.https],
281
config?.allowedMediaProtocols?.override ?? [Schemas.http, Schemas.https]));
282
283
if (config?.replaceWithPlaintext) {
284
store.add(addDompurifyHook('uponSanitizeElement', replaceWithPlainTextHook));
285
}
286
287
if (allowedAttrPredicates.size) {
288
store.add(addDompurifyHook('uponSanitizeAttribute', (node, e) => {
289
const predicate = allowedAttrPredicates.get(e.attrName);
290
if (predicate) {
291
const result = predicate.shouldKeep(node, e);
292
if (typeof result === 'string') {
293
e.keepAttr = true;
294
e.attrValue = result;
295
} else {
296
e.keepAttr = result;
297
}
298
} else {
299
e.keepAttr = allowedAttrNames.has(e.attrName);
300
}
301
}));
302
}
303
304
if (outputType === 'dom') {
305
return dompurify.sanitize(untrusted, {
306
...resolvedConfig,
307
RETURN_DOM_FRAGMENT: true
308
});
309
} else {
310
return dompurify.sanitize(untrusted, {
311
...resolvedConfig,
312
RETURN_TRUSTED_TYPE: true
313
});
314
}
315
} finally {
316
store.dispose();
317
}
318
}
319
320
const selfClosingTags = ['area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr'];
321
322
function replaceWithPlainTextHook(element: Element, data: dompurify.SanitizeElementHookEvent, _config: dompurify.Config) {
323
if (!data.allowedTags[data.tagName] && data.tagName !== 'body') {
324
const replacement = convertTagToPlaintext(element);
325
if (element.nodeType === Node.COMMENT_NODE) {
326
// Workaround for https://github.com/cure53/DOMPurify/issues/1005
327
// The comment will be deleted in the next phase. However if we try to remove it now, it will cause
328
// an exception. Instead we insert the text node before the comment.
329
element.parentElement?.insertBefore(replacement, element);
330
} else {
331
element.parentElement?.replaceChild(replacement, element);
332
}
333
}
334
}
335
336
export function convertTagToPlaintext(element: Element): DocumentFragment {
337
let startTagText: string;
338
let endTagText: string | undefined;
339
if (element.nodeType === Node.COMMENT_NODE) {
340
startTagText = `<!--${element.textContent}-->`;
341
} else {
342
const tagName = element.tagName.toLowerCase();
343
const isSelfClosing = selfClosingTags.includes(tagName);
344
const attrString = element.attributes.length ?
345
' ' + Array.from(element.attributes)
346
.map(attr => `${attr.name}="${attr.value}"`)
347
.join(' ')
348
: '';
349
startTagText = `<${tagName}${attrString}>`;
350
if (!isSelfClosing) {
351
endTagText = `</${tagName}>`;
352
}
353
}
354
355
const fragment = document.createDocumentFragment();
356
const textNode = element.ownerDocument.createTextNode(startTagText);
357
fragment.appendChild(textNode);
358
while (element.firstChild) {
359
fragment.appendChild(element.firstChild);
360
}
361
362
const endTagTextNode = endTagText ? element.ownerDocument.createTextNode(endTagText) : undefined;
363
if (endTagTextNode) {
364
fragment.appendChild(endTagTextNode);
365
}
366
367
return fragment;
368
}
369
370
/**
371
* Sanitizes the given `value` and reset the given `node` with it.
372
*/
373
export function safeSetInnerHtml(node: HTMLElement, untrusted: string, config?: DomSanitizerConfig): void {
374
const fragment = doSanitizeHtml(untrusted, config, 'dom');
375
reset(node, fragment);
376
}
377
378