Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
projectdiscovery
GitHub Repository: projectdiscovery/nuclei
Path: blob/dev/pkg/fuzz/analyzers/xss/analyzer.go
4538 views
1
package xss
2
3
import (
4
"io"
5
"strings"
6
7
"github.com/projectdiscovery/nuclei/v3/pkg/fuzz/analyzers"
8
"golang.org/x/net/html"
9
)
10
11
const (
12
analyzerName = "xss_context"
13
maxResponseBodyBytes = 10 * 1024 * 1024 // 10 MiB
14
)
15
16
// XSSAnalyzer implements the analyzers.Analyzer interface for XSS context detection.
17
type XSSAnalyzer struct{}
18
19
var _ analyzers.Analyzer = &XSSAnalyzer{}
20
21
func init() {
22
analyzers.RegisterAnalyzer(analyzerName, &XSSAnalyzer{})
23
}
24
25
func (a *XSSAnalyzer) Name() string {
26
return analyzerName
27
}
28
29
func (a *XSSAnalyzer) ApplyInitialTransformation(data string, params map[string]interface{}) string {
30
return analyzers.ApplyPayloadTransformations(data)
31
}
32
33
func (a *XSSAnalyzer) Analyze(options *analyzers.Options) (bool, string, error) {
34
if options == nil || options.FuzzGenerated.Component == nil || options.HttpClient == nil {
35
return false, "", nil
36
}
37
38
gr := options.FuzzGenerated
39
payload := gr.Value
40
if payload == "" {
41
return false, "", nil
42
}
43
44
if err := gr.Component.SetValue(gr.Key, payload); err != nil {
45
return false, "", err
46
}
47
defer func() {
48
_ = gr.Component.SetValue(gr.Key, gr.OriginalValue)
49
}()
50
51
rebuilt, err := gr.Component.Rebuild()
52
if err != nil {
53
return false, "", err
54
}
55
56
resp, err := options.HttpClient.Do(rebuilt)
57
if err != nil {
58
return false, "", err
59
}
60
defer func() {
61
_ = resp.Body.Close()
62
}()
63
64
body, err := io.ReadAll(io.LimitReader(resp.Body, maxResponseBodyBytes))
65
if err != nil {
66
return false, "", err
67
}
68
69
ctx, err := AnalyzeReflectionContext(string(body), payload)
70
if err != nil {
71
return false, "", err
72
}
73
if ctx == ContextUnknown {
74
return false, "", nil
75
}
76
return true, "xss-reflected in " + ctx.String(), nil
77
}
78
79
// urlAttrs lists attributes whose values may contain navigable URIs.
80
// ping was missed initially, it fires a POST to the URL when <a> is clicked.
81
var urlAttrs = map[string]struct{}{
82
"href": {},
83
"src": {},
84
"action": {},
85
"formaction": {},
86
"data": {},
87
"poster": {},
88
"codebase": {},
89
"cite": {},
90
"background": {},
91
"manifest": {},
92
"icon": {},
93
"ping": {},
94
"longdesc": {},
95
}
96
97
// eventHandlers lists attributes that execute JavaScript when triggered.
98
var eventHandlers = map[string]struct{}{
99
"onabort": {},
100
"onafterprint": {},
101
"onbeforeprint": {},
102
"onbeforeunload": {},
103
"onblur": {},
104
"oncancel": {},
105
"oncanplay": {},
106
"oncanplaythrough": {},
107
"onchange": {},
108
"onclick": {},
109
"onclose": {},
110
"oncontextmenu": {},
111
"oncopy": {},
112
"oncuechange": {},
113
"oncut": {},
114
"ondblclick": {},
115
"ondrag": {},
116
"ondragend": {},
117
"ondragenter": {},
118
"ondragleave": {},
119
"ondragover": {},
120
"ondragstart": {},
121
"ondrop": {},
122
"ondurationchange": {},
123
"onemptied": {},
124
"onended": {},
125
"onerror": {},
126
"onfocus": {},
127
"onfocusin": {},
128
"onfocusout": {},
129
"onhashchange": {},
130
"oninput": {},
131
"oninvalid": {},
132
"onkeydown": {},
133
"onkeypress": {},
134
"onkeyup": {},
135
"onload": {},
136
"onloadeddata": {},
137
"onloadedmetadata": {},
138
"onloadstart": {},
139
"onmessage": {},
140
"onmousedown": {},
141
"onmouseenter": {},
142
"onmouseleave": {},
143
"onmousemove": {},
144
"onmouseout": {},
145
"onmouseover": {},
146
"onmouseup": {},
147
"onoffline": {},
148
"ononline": {},
149
"onpagehide": {},
150
"onpageshow": {},
151
"onpaste": {},
152
"onpause": {},
153
"onplay": {},
154
"onplaying": {},
155
"onpopstate": {},
156
"onprogress": {},
157
"onratechange": {},
158
"onreset": {},
159
"onresize": {},
160
"onscroll": {},
161
"onsearch": {},
162
"onseeked": {},
163
"onseeking": {},
164
"onselect": {},
165
"onstalled": {},
166
"onstorage": {},
167
"onsubmit": {},
168
"onsuspend": {},
169
"ontimeupdate": {},
170
"ontoggle": {},
171
"onunload": {},
172
"onvolumechange": {},
173
"onwaiting": {},
174
"onwheel": {},
175
"onanimationstart": {},
176
"onanimationend": {},
177
"onanimationiteration": {},
178
"ontransitionend": {},
179
"onpointerdown": {},
180
"onpointerup": {},
181
"onpointermove": {},
182
"onpointerover": {},
183
"onpointerout": {},
184
"onpointerenter": {},
185
"onpointerleave": {},
186
"onpointercancel": {},
187
"ongotpointercapture": {},
188
"onlostpointercapture": {},
189
"ontouchstart": {},
190
"ontouchend": {},
191
"ontouchmove": {},
192
"ontouchcancel": {},
193
// added after review, these are newer DOM events that were missing
194
"onauxclick": {},
195
"onbeforeinput": {},
196
"onformdata": {},
197
"onslotchange": {},
198
"onsecuritypolicyviolation": {},
199
}
200
201
// executableScriptTypes lists MIME types that browsers actually execute.
202
// Empty string covers <script> with no type attribute.
203
var executableScriptTypes = map[string]struct{}{
204
"": {},
205
"text/javascript": {},
206
"application/javascript": {},
207
"text/ecmascript": {},
208
"application/ecmascript": {},
209
"module": {},
210
"text/jscript": {},
211
"text/livescript": {},
212
"text/x-ecmascript": {},
213
"text/x-javascript": {},
214
"application/x-javascript": {},
215
"application/x-ecmascript": {},
216
}
217
218
// executableURLSinks maps URL attribute names to the set of tags where
219
// dangerous URIs (javascript:, vbscript:, data:text/html, etc.) actually
220
// execute or render a document. Other tag+attr combos stay as
221
// ContextHTMLAttributeURL — e.g. <img src="javascript:..."> doesn't execute.
222
var executableURLSinks = map[string]map[string]struct{}{
223
"href": {"a": {}, "area": {}},
224
"src": {"iframe": {}, "frame": {}, "embed": {}},
225
"action": {"form": {}},
226
"formaction": {"button": {}, "input": {}},
227
"data": {"object": {}},
228
}
229
230
// AnalyzeReflectionContext determines the HTML context where the given marker
231
// is reflected in the response body. Uses golang.org/x/net/html tokenizer
232
// for parsing. Returns ContextUnknown if the marker is not found.
233
func AnalyzeReflectionContext(responseBody, marker string) (XSSContext, error) {
234
if responseBody == "" || marker == "" {
235
return ContextUnknown, nil
236
}
237
238
markerLower := strings.ToLower(marker)
239
240
// bail early if the marker isn't anywhere in the body
241
if !strings.Contains(strings.ToLower(responseBody), markerLower) {
242
return ContextUnknown, nil
243
}
244
245
tokenizer := html.NewTokenizer(strings.NewReader(responseBody))
246
247
var (
248
inScript bool
249
inStyle bool
250
scriptIsExec bool
251
)
252
253
for {
254
tt := tokenizer.Next()
255
switch tt {
256
case html.ErrorToken:
257
// EOF is expected (end of doc), but surface real parse errors
258
if err := tokenizer.Err(); err != nil && err != io.EOF {
259
return ContextUnknown, err
260
}
261
return ContextUnknown, nil
262
263
case html.CommentToken:
264
if containsMarker(tokenizer.Token().Data, markerLower) {
265
return ContextComment, nil
266
}
267
268
case html.StartTagToken, html.SelfClosingTagToken:
269
tn, hasAttr := tokenizer.TagName()
270
tagName := strings.ToLower(string(tn))
271
272
// Important: TagAttr() is a forward-only iterator. If we checked
273
// script type and marker in separate loops, the second loop would
274
// see no attributes (already consumed). So we do both in one pass.
275
if hasAttr {
276
ctx, found, scriptType := scanAttributes(tokenizer, markerLower, tagName)
277
if found {
278
return ctx, nil
279
}
280
if tt == html.StartTagToken && tagName == "script" {
281
inScript = true
282
scriptIsExec = isScriptTypeExecutable(scriptType)
283
}
284
} else if tt == html.StartTagToken && tagName == "script" {
285
inScript = true
286
scriptIsExec = true // no attrs = executable
287
}
288
289
if tt == html.StartTagToken && tagName == "style" {
290
inStyle = true
291
}
292
293
case html.EndTagToken:
294
tn, _ := tokenizer.TagName()
295
switch strings.ToLower(string(tn)) {
296
case "script":
297
inScript = false
298
case "style":
299
inStyle = false
300
}
301
302
case html.TextToken:
303
if containsMarker(tokenizer.Token().Data, markerLower) {
304
if inScript {
305
if scriptIsExec {
306
return ContextScript, nil
307
}
308
return ContextScriptData, nil
309
}
310
if inStyle {
311
return ContextStyle, nil
312
}
313
return ContextHTMLBody, nil
314
}
315
}
316
}
317
}
318
319
// scanAttributes walks all attributes in one pass. We need this because
320
// the tokenizer's TagAttr() is consumable, once you iterate through,
321
// the attributes are gone. Earlier version had a bug where checking the
322
// script type first would eat all the attrs before we could check for
323
// the marker, so <script src="MARKER"> would silently miss the reflection.
324
func scanAttributes(tokenizer *html.Tokenizer, markerLower, tagName string) (XSSContext, bool, string) {
325
var markerCtx XSSContext
326
markerFound := false
327
scriptType := ""
328
foundType := false
329
330
for {
331
key, val, more := tokenizer.TagAttr()
332
attrName := strings.ToLower(string(key))
333
attrValue := string(val)
334
335
// HTML5 spec: browsers use the first type attribute when dupes exist.
336
// Without this check, <script type="application/json" type="text/javascript">
337
// would be classified as executable (last wins) when the browser treats it
338
// as non-executable (first wins).
339
if attrName == "type" && !foundType {
340
scriptType = strings.ToLower(strings.TrimSpace(attrValue))
341
foundType = true
342
}
343
344
if !markerFound {
345
if containsMarker(attrValue, markerLower) {
346
markerCtx = classifyAttributeContext(attrName, attrValue, tagName)
347
markerFound = true
348
} else if containsMarker(attrName, markerLower) {
349
markerCtx = ContextHTMLAttribute
350
markerFound = true
351
}
352
}
353
354
if !more {
355
break
356
}
357
}
358
359
return markerCtx, markerFound, scriptType
360
}
361
362
// isScriptTypeExecutable returns true if the type value is something
363
// browsers will actually run (or empty, meaning no type was set).
364
// Strips MIME parameters first, browsers still execute
365
// "text/javascript; charset=utf-8" but the raw string wouldn't match
366
// the lookup table without this.
367
func isScriptTypeExecutable(scriptType string) bool {
368
if i := strings.IndexByte(scriptType, ';'); i != -1 {
369
scriptType = strings.TrimSpace(scriptType[:i])
370
}
371
_, isExec := executableScriptTypes[scriptType]
372
return isExec
373
}
374
375
// classifyAttributeContext maps an attribute name to the right XSS context.
376
func classifyAttributeContext(attrName, attrValue, tagName string) XSSContext {
377
if _, ok := eventHandlers[attrName]; ok {
378
return ContextHTMLAttributeEvent
379
}
380
381
if attrName == "style" {
382
return ContextStyle
383
}
384
385
if attrName == "srcdoc" {
386
return ContextHTMLBody
387
}
388
389
if _, ok := urlAttrs[attrName]; ok {
390
trimmed := strings.TrimSpace(strings.ToLower(attrValue))
391
if strings.HasPrefix(trimmed, "javascript:") ||
392
strings.HasPrefix(trimmed, "vbscript:") ||
393
strings.HasPrefix(trimmed, "data:text/html") ||
394
strings.HasPrefix(trimmed, "data:application/xhtml+xml") ||
395
strings.HasPrefix(trimmed, "data:image/svg+xml") {
396
// only promote to ContextScript if this tag+attr pair actually
397
// executes dangerous URIs in browsers — <img src="javascript:...">
398
// doesn't execute, <a href="javascript:..."> does
399
if tags, ok := executableURLSinks[attrName]; ok {
400
if _, ok := tags[tagName]; ok {
401
return ContextScript
402
}
403
}
404
}
405
return ContextHTMLAttributeURL
406
}
407
408
return ContextHTMLAttribute
409
}
410
411
// containsMarker does a case-insensitive substring check.
412
// markerLower must already be lowercased by the caller.
413
func containsMarker(text, markerLower string) bool {
414
return strings.Contains(strings.ToLower(text), markerLower)
415
}
416
417