Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
projectdiscovery
GitHub Repository: projectdiscovery/nuclei
Path: blob/dev/pkg/operators/extractors/fuzz_harness.go
4538 views
1
package extractors
2
3
import (
4
"regexp"
5
"strconv"
6
"strings"
7
)
8
9
const (
10
fuzzMaxInputSize = 16 << 10
11
fuzzMaxItems = 8
12
fuzzMaxValueBytes = 256
13
)
14
15
var (
16
fuzzExtractorTypes = []ExtractorType{RegexExtractor, KValExtractor, XPathExtractor, JSONExtractor, DSLExtractor}
17
fuzzParts = []string{"", "body", "raw", "all_headers", "header", "response"}
18
fuzzAttributes = []string{"", "href", "content", "id", "name"}
19
fuzzRegexDefaults = []string{`([A-Za-z0-9_]+)`, `token=([a-z0-9]+)`, `https?://[^\s"']+`}
20
fuzzKValDefaults = []string{"content_type", "server", "set_cookie", "x_powered_by"}
21
fuzzJSONDefaults = []string{`.id`, `.items[] | .id`, `.headers.server`, `.links[] | .href`}
22
fuzzXPathDefaults = []string{`//title`, `/html/body/div/p[2]/a`, `//link`, `/root/item`, `//item`}
23
fuzzDSLDefaults = []string{`to_upper(hello)`, `contains(to_lower(all_headers), "server")`, `body`, `content_type`}
24
fuzzRegexCorpus = "token=abc123\nserver=nginx\nurl=https://example.com/path\nhello=world"
25
fuzzJSONCorpus = `{"id":1,"name":"nuclei","items":[{"id":"a1"},{"id":"b2"}],"headers":{"server":"nginx"},"links":[{"href":"https://example.com"}]}`
26
fuzzHTMLCorpus = `<!doctype html><html><head><title>Example Domain</title><meta content="text/html" name="description" /></head><body><div><p>hello</p><p><a href="https://www.iana.org/domains/example">More information...</a></p></div></body></html>`
27
fuzzXMLCorpus = `<?xml version="1.0"?><root><item id="alpha">A</item><item id="beta">B</item><link href="https://example.com">Example</link></root>`
28
)
29
30
type fuzzExtractorCandidate struct {
31
extractorType ExtractorType
32
part string
33
name string
34
attribute string
35
internal bool
36
caseInsensitive bool
37
explicitCaseInsensitive bool
38
regexGroup int
39
values []string
40
}
41
42
func extractorFromFuzzData(data []byte) (*Extractor, bool) {
43
if len(data) == 0 || len(data) > fuzzMaxInputSize {
44
return nil, false
45
}
46
47
candidate := newFuzzExtractorCandidate(data)
48
candidate.applyLines(splitFuzzLines(data))
49
candidate.addFallbackValues(data)
50
51
return candidate.build()
52
}
53
54
func newFuzzExtractorCandidate(data []byte) *fuzzExtractorCandidate {
55
flags := fuzzByteAt(data, 1)
56
return &fuzzExtractorCandidate{
57
extractorType: fuzzExtractorTypes[int(fuzzByteAt(data, 0))%len(fuzzExtractorTypes)],
58
part: fuzzParts[int(fuzzByteAt(data, 2))%len(fuzzParts)],
59
name: fuzzName(data),
60
attribute: fuzzAttributes[int(fuzzByteAt(data, 3))%len(fuzzAttributes)],
61
internal: flags&0x01 != 0,
62
caseInsensitive: flags&0x02 != 0,
63
regexGroup: int(fuzzByteAt(data, 4) % 3),
64
}
65
}
66
67
func (candidate *fuzzExtractorCandidate) applyLines(lines []string) {
68
for _, line := range lines {
69
key, rawValue, ok := cutFuzzKV(line)
70
if !ok {
71
candidate.addValue(line)
72
continue
73
}
74
75
switch key {
76
case "type":
77
extractorType, err := toExtractorTypes(rawValue)
78
if err != nil {
79
candidate.extractorType = ExtractorType(0)
80
} else {
81
candidate.extractorType = extractorType
82
}
83
case "part":
84
candidate.part = trimFuzzValue(rawValue)
85
case "name":
86
candidate.name = fuzzNameFromText(rawValue)
87
case "attribute":
88
candidate.attribute = fuzzAttribute(rawValue)
89
case "internal":
90
candidate.internal = parseFuzzBool(rawValue, candidate.internal)
91
case "case-insensitive":
92
candidate.caseInsensitive = parseFuzzBool(rawValue, candidate.caseInsensitive)
93
candidate.explicitCaseInsensitive = true
94
case "group":
95
candidate.regexGroup = parseFuzzGroup(rawValue, candidate.regexGroup)
96
case "value":
97
candidate.addValue(rawValue)
98
case "regex":
99
candidate.extractorType = RegexExtractor
100
candidate.addValue(rawValue)
101
case "kval":
102
candidate.extractorType = KValExtractor
103
candidate.addValue(rawValue)
104
case "json":
105
candidate.extractorType = JSONExtractor
106
candidate.addValue(rawValue)
107
case "xpath":
108
candidate.extractorType = XPathExtractor
109
candidate.addValue(rawValue)
110
case "dsl":
111
candidate.extractorType = DSLExtractor
112
candidate.addValue(rawValue)
113
}
114
}
115
}
116
117
func (candidate *fuzzExtractorCandidate) addFallbackValues(payload []byte) {
118
if len(candidate.values) > 0 || len(candidate.values) >= fuzzMaxItems {
119
return
120
}
121
122
fields := splitFuzzFields(payload)
123
switch candidate.extractorType {
124
case RegexExtractor:
125
for _, field := range fields {
126
candidate.addValue(fuzzRegexValue(field))
127
}
128
candidate.addDefaults(fuzzRegexDefaults, fuzzByteAt(payload, 5))
129
case KValExtractor:
130
for _, field := range fields {
131
candidate.addValue(fuzzIdentifier(field))
132
}
133
candidate.addDefaults(fuzzKValDefaults, fuzzByteAt(payload, 5))
134
case JSONExtractor:
135
for _, field := range fields {
136
candidate.addValue(fuzzJSONQuery(field))
137
}
138
candidate.addDefaults(fuzzJSONDefaults, fuzzByteAt(payload, 5))
139
case XPathExtractor:
140
for _, field := range fields {
141
candidate.addValue(fuzzXPathQuery(field))
142
}
143
candidate.addDefaults(fuzzXPathDefaults, fuzzByteAt(payload, 5))
144
case DSLExtractor:
145
for _, field := range fields {
146
candidate.addValue(fuzzDSLExpression(field))
147
}
148
candidate.addDefaults(fuzzDSLDefaults, fuzzByteAt(payload, 5))
149
default:
150
candidate.addDefaults(fuzzRegexDefaults, fuzzByteAt(payload, 5))
151
}
152
}
153
154
func (candidate *fuzzExtractorCandidate) addDefaults(defaults []string, seed byte) {
155
if len(candidate.values) >= fuzzMaxItems || len(defaults) == 0 {
156
return
157
}
158
159
start := int(seed) % len(defaults)
160
for offset := 0; offset < len(defaults) && len(candidate.values) < 2; offset++ {
161
candidate.addValue(defaults[(start+offset)%len(defaults)])
162
}
163
}
164
165
func (candidate *fuzzExtractorCandidate) addValue(value string) {
166
value = trimFuzzValue(value)
167
if value == "" || len(candidate.values) >= fuzzMaxItems {
168
return
169
}
170
for _, existing := range candidate.values {
171
if existing == value {
172
return
173
}
174
}
175
candidate.values = append(candidate.values, value)
176
}
177
178
func (candidate *fuzzExtractorCandidate) build() (*Extractor, bool) {
179
extractor := &Extractor{
180
Type: ExtractorTypeHolder{ExtractorType: candidate.extractorType},
181
Name: candidate.name,
182
Part: candidate.part,
183
Internal: candidate.internal,
184
Attribute: candidate.attribute,
185
RegexGroup: candidate.regexGroup,
186
}
187
188
if candidate.extractorType == KValExtractor || candidate.explicitCaseInsensitive {
189
extractor.CaseInsensitive = candidate.caseInsensitive
190
}
191
if candidate.extractorType != XPathExtractor {
192
extractor.Attribute = ""
193
}
194
if candidate.extractorType != RegexExtractor {
195
extractor.RegexGroup = 0
196
}
197
198
switch candidate.extractorType {
199
case RegexExtractor:
200
extractor.Regex = append([]string(nil), candidate.values...)
201
case KValExtractor:
202
extractor.KVal = append([]string(nil), candidate.values...)
203
case XPathExtractor:
204
extractor.XPath = append([]string(nil), candidate.values...)
205
case JSONExtractor:
206
extractor.JSON = append([]string(nil), candidate.values...)
207
case DSLExtractor:
208
extractor.DSL = append([]string(nil), candidate.values...)
209
default:
210
extractor.Regex = append([]string(nil), candidate.values...)
211
}
212
213
return extractor, len(candidate.values) > 0
214
}
215
216
func exerciseFuzzExtractor(extractor *Extractor) {
217
switch extractor.GetType() {
218
case RegexExtractor:
219
_ = extractor.ExtractRegex(fuzzRegexCorpus)
220
case KValExtractor:
221
_ = extractor.ExtractKval(fuzzKValData())
222
case XPathExtractor:
223
_ = extractor.ExtractXPath(fuzzHTMLCorpus)
224
_ = extractor.ExtractXPath(fuzzXMLCorpus)
225
case JSONExtractor:
226
_ = extractor.ExtractJSON(fuzzJSONCorpus)
227
case DSLExtractor:
228
_ = extractor.ExtractDSL(fuzzDSLData())
229
}
230
}
231
232
func fuzzKValData() map[string]interface{} {
233
return map[string]interface{}{
234
"content_type": "Text/HTML",
235
"server": "Nginx",
236
"set_cookie": "session=abc123",
237
"x_powered_by": "Go",
238
}
239
}
240
241
func fuzzDSLData() map[string]interface{} {
242
return map[string]interface{}{
243
"hello": "hi",
244
"body": "PING PONG",
245
"all_headers": "Server: Example\nContent-Type: text/html",
246
"content_type": "text/html",
247
"status_code": 200,
248
}
249
}
250
251
func splitFuzzLines(data []byte) []string {
252
fields := strings.FieldsFunc(string(data), func(r rune) bool {
253
return r == '\n' || r == '\r' || r == ';'
254
})
255
if len(fields) > fuzzMaxItems*4 {
256
fields = fields[:fuzzMaxItems*4]
257
}
258
259
lines := make([]string, 0, len(fields))
260
for _, field := range fields {
261
field = trimFuzzValue(field)
262
if field != "" {
263
lines = append(lines, field)
264
}
265
}
266
return lines
267
}
268
269
func splitFuzzFields(data []byte) []string {
270
fields := strings.FieldsFunc(string(data), func(r rune) bool {
271
return r == '\n' || r == '\r' || r == '|' || r == ','
272
})
273
if len(fields) > fuzzMaxItems {
274
fields = fields[:fuzzMaxItems]
275
}
276
277
values := make([]string, 0, len(fields))
278
for _, field := range fields {
279
field = trimFuzzValue(field)
280
if field != "" {
281
values = append(values, field)
282
}
283
}
284
return values
285
}
286
287
func cutFuzzKV(line string) (string, string, bool) {
288
key, value, ok := strings.Cut(line, "=")
289
if !ok {
290
key, value, ok = strings.Cut(line, ":")
291
}
292
if !ok {
293
return "", "", false
294
}
295
return strings.ToLower(strings.TrimSpace(key)), trimFuzzValue(value), true
296
}
297
298
func trimFuzzValue(value string) string {
299
value = strings.TrimSpace(strings.ReplaceAll(value, "\x00", ""))
300
if len(value) > fuzzMaxValueBytes {
301
value = value[:fuzzMaxValueBytes]
302
}
303
return value
304
}
305
306
func fuzzName(data []byte) string {
307
if len(data) == 0 {
308
return ""
309
}
310
if len(data) > 8 {
311
data = data[:8]
312
}
313
return "fuzz-" + strconv.FormatUint(uint64(data[0]), 16) + fuzzNameSuffix(data[1:])
314
}
315
316
func fuzzNameSuffix(data []byte) string {
317
if len(data) == 0 {
318
return ""
319
}
320
var builder strings.Builder
321
for _, value := range data {
322
if builder.Len() >= 15 {
323
break
324
}
325
builder.WriteString(strconv.FormatUint(uint64(value), 16))
326
}
327
return builder.String()
328
}
329
330
func fuzzNameFromText(value string) string {
331
value = strings.ToLower(trimFuzzValue(value))
332
if value == "" {
333
return ""
334
}
335
var builder strings.Builder
336
for _, r := range value {
337
switch {
338
case r >= 'a' && r <= 'z':
339
builder.WriteRune(r)
340
case r >= '0' && r <= '9':
341
builder.WriteRune(r)
342
case r == '-':
343
builder.WriteRune(r)
344
}
345
if builder.Len() >= 32 {
346
break
347
}
348
}
349
if builder.Len() == 0 {
350
return ""
351
}
352
return builder.String()
353
}
354
355
func fuzzAttribute(value string) string {
356
attribute := fuzzIdentifier(value)
357
if attribute == "" {
358
return trimFuzzValue(value)
359
}
360
return attribute
361
}
362
363
func parseFuzzBool(value string, fallback bool) bool {
364
switch strings.ToLower(trimFuzzValue(value)) {
365
case "1", "true", "yes", "on":
366
return true
367
case "0", "false", "no", "off":
368
return false
369
default:
370
return fallback
371
}
372
}
373
374
func parseFuzzGroup(value string, fallback int) int {
375
number, err := strconv.Atoi(trimFuzzValue(value))
376
if err != nil {
377
return fallback
378
}
379
if number < -2 {
380
return -2
381
}
382
if number > 8 {
383
return 8
384
}
385
return number
386
}
387
388
func fuzzRegexValue(value string) string {
389
value = trimFuzzValue(value)
390
if value == "" {
391
return ""
392
}
393
return regexp.QuoteMeta(value)
394
}
395
396
func fuzzJSONQuery(value string) string {
397
identifier := fuzzIdentifier(value)
398
if identifier == "" {
399
return ""
400
}
401
return "." + identifier
402
}
403
404
func fuzzXPathQuery(value string) string {
405
identifier := fuzzIdentifier(value)
406
if identifier == "" {
407
return ""
408
}
409
return "//" + identifier
410
}
411
412
func fuzzDSLExpression(value string) string {
413
identifier := fuzzIdentifier(value)
414
if identifier == "" {
415
return ""
416
}
417
switch identifier {
418
case "hello", "body", "all_headers", "content_type":
419
return "to_upper(" + identifier + ")"
420
case "status_code":
421
return identifier
422
default:
423
return identifier
424
}
425
}
426
427
func fuzzIdentifier(value string) string {
428
value = strings.ToLower(trimFuzzValue(value))
429
if value == "" {
430
return ""
431
}
432
var builder strings.Builder
433
lastUnderscore := false
434
for _, r := range value {
435
switch {
436
case r >= 'a' && r <= 'z':
437
builder.WriteRune(r)
438
lastUnderscore = false
439
case r >= '0' && r <= '9':
440
if builder.Len() == 0 {
441
builder.WriteString("field_")
442
}
443
builder.WriteRune(r)
444
lastUnderscore = false
445
case r == '_' || r == '-' || r == '.' || r == ' ':
446
if builder.Len() > 0 && !lastUnderscore {
447
builder.WriteByte('_')
448
lastUnderscore = true
449
}
450
}
451
if builder.Len() >= 32 {
452
break
453
}
454
}
455
return strings.Trim(builder.String(), "_")
456
}
457
458
func fuzzByteAt(data []byte, index int) byte {
459
if index < 0 || index >= len(data) {
460
return 0
461
}
462
return data[index]
463
}
464
465