Path: blob/dev/pkg/operators/extractors/fuzz_harness.go
4538 views
package extractors12import (3"regexp"4"strconv"5"strings"6)78const (9fuzzMaxInputSize = 16 << 1010fuzzMaxItems = 811fuzzMaxValueBytes = 25612)1314var (15fuzzExtractorTypes = []ExtractorType{RegexExtractor, KValExtractor, XPathExtractor, JSONExtractor, DSLExtractor}16fuzzParts = []string{"", "body", "raw", "all_headers", "header", "response"}17fuzzAttributes = []string{"", "href", "content", "id", "name"}18fuzzRegexDefaults = []string{`([A-Za-z0-9_]+)`, `token=([a-z0-9]+)`, `https?://[^\s"']+`}19fuzzKValDefaults = []string{"content_type", "server", "set_cookie", "x_powered_by"}20fuzzJSONDefaults = []string{`.id`, `.items[] | .id`, `.headers.server`, `.links[] | .href`}21fuzzXPathDefaults = []string{`//title`, `/html/body/div/p[2]/a`, `//link`, `/root/item`, `//item`}22fuzzDSLDefaults = []string{`to_upper(hello)`, `contains(to_lower(all_headers), "server")`, `body`, `content_type`}23fuzzRegexCorpus = "token=abc123\nserver=nginx\nurl=https://example.com/path\nhello=world"24fuzzJSONCorpus = `{"id":1,"name":"nuclei","items":[{"id":"a1"},{"id":"b2"}],"headers":{"server":"nginx"},"links":[{"href":"https://example.com"}]}`25fuzzHTMLCorpus = `<!doctype html><html><head><title>Example Domain</title><meta content="text/html" name="description" /></head><body><div><p>hello</p><p><a href="https://www.iana.org/domains/example">More information...</a></p></div></body></html>`26fuzzXMLCorpus = `<?xml version="1.0"?><root><item id="alpha">A</item><item id="beta">B</item><link href="https://example.com">Example</link></root>`27)2829type fuzzExtractorCandidate struct {30extractorType ExtractorType31part string32name string33attribute string34internal bool35caseInsensitive bool36explicitCaseInsensitive bool37regexGroup int38values []string39}4041func extractorFromFuzzData(data []byte) (*Extractor, bool) {42if len(data) == 0 || len(data) > fuzzMaxInputSize {43return nil, false44}4546candidate := newFuzzExtractorCandidate(data)47candidate.applyLines(splitFuzzLines(data))48candidate.addFallbackValues(data)4950return candidate.build()51}5253func newFuzzExtractorCandidate(data []byte) *fuzzExtractorCandidate {54flags := fuzzByteAt(data, 1)55return &fuzzExtractorCandidate{56extractorType: fuzzExtractorTypes[int(fuzzByteAt(data, 0))%len(fuzzExtractorTypes)],57part: fuzzParts[int(fuzzByteAt(data, 2))%len(fuzzParts)],58name: fuzzName(data),59attribute: fuzzAttributes[int(fuzzByteAt(data, 3))%len(fuzzAttributes)],60internal: flags&0x01 != 0,61caseInsensitive: flags&0x02 != 0,62regexGroup: int(fuzzByteAt(data, 4) % 3),63}64}6566func (candidate *fuzzExtractorCandidate) applyLines(lines []string) {67for _, line := range lines {68key, rawValue, ok := cutFuzzKV(line)69if !ok {70candidate.addValue(line)71continue72}7374switch key {75case "type":76extractorType, err := toExtractorTypes(rawValue)77if err != nil {78candidate.extractorType = ExtractorType(0)79} else {80candidate.extractorType = extractorType81}82case "part":83candidate.part = trimFuzzValue(rawValue)84case "name":85candidate.name = fuzzNameFromText(rawValue)86case "attribute":87candidate.attribute = fuzzAttribute(rawValue)88case "internal":89candidate.internal = parseFuzzBool(rawValue, candidate.internal)90case "case-insensitive":91candidate.caseInsensitive = parseFuzzBool(rawValue, candidate.caseInsensitive)92candidate.explicitCaseInsensitive = true93case "group":94candidate.regexGroup = parseFuzzGroup(rawValue, candidate.regexGroup)95case "value":96candidate.addValue(rawValue)97case "regex":98candidate.extractorType = RegexExtractor99candidate.addValue(rawValue)100case "kval":101candidate.extractorType = KValExtractor102candidate.addValue(rawValue)103case "json":104candidate.extractorType = JSONExtractor105candidate.addValue(rawValue)106case "xpath":107candidate.extractorType = XPathExtractor108candidate.addValue(rawValue)109case "dsl":110candidate.extractorType = DSLExtractor111candidate.addValue(rawValue)112}113}114}115116func (candidate *fuzzExtractorCandidate) addFallbackValues(payload []byte) {117if len(candidate.values) > 0 || len(candidate.values) >= fuzzMaxItems {118return119}120121fields := splitFuzzFields(payload)122switch candidate.extractorType {123case RegexExtractor:124for _, field := range fields {125candidate.addValue(fuzzRegexValue(field))126}127candidate.addDefaults(fuzzRegexDefaults, fuzzByteAt(payload, 5))128case KValExtractor:129for _, field := range fields {130candidate.addValue(fuzzIdentifier(field))131}132candidate.addDefaults(fuzzKValDefaults, fuzzByteAt(payload, 5))133case JSONExtractor:134for _, field := range fields {135candidate.addValue(fuzzJSONQuery(field))136}137candidate.addDefaults(fuzzJSONDefaults, fuzzByteAt(payload, 5))138case XPathExtractor:139for _, field := range fields {140candidate.addValue(fuzzXPathQuery(field))141}142candidate.addDefaults(fuzzXPathDefaults, fuzzByteAt(payload, 5))143case DSLExtractor:144for _, field := range fields {145candidate.addValue(fuzzDSLExpression(field))146}147candidate.addDefaults(fuzzDSLDefaults, fuzzByteAt(payload, 5))148default:149candidate.addDefaults(fuzzRegexDefaults, fuzzByteAt(payload, 5))150}151}152153func (candidate *fuzzExtractorCandidate) addDefaults(defaults []string, seed byte) {154if len(candidate.values) >= fuzzMaxItems || len(defaults) == 0 {155return156}157158start := int(seed) % len(defaults)159for offset := 0; offset < len(defaults) && len(candidate.values) < 2; offset++ {160candidate.addValue(defaults[(start+offset)%len(defaults)])161}162}163164func (candidate *fuzzExtractorCandidate) addValue(value string) {165value = trimFuzzValue(value)166if value == "" || len(candidate.values) >= fuzzMaxItems {167return168}169for _, existing := range candidate.values {170if existing == value {171return172}173}174candidate.values = append(candidate.values, value)175}176177func (candidate *fuzzExtractorCandidate) build() (*Extractor, bool) {178extractor := &Extractor{179Type: ExtractorTypeHolder{ExtractorType: candidate.extractorType},180Name: candidate.name,181Part: candidate.part,182Internal: candidate.internal,183Attribute: candidate.attribute,184RegexGroup: candidate.regexGroup,185}186187if candidate.extractorType == KValExtractor || candidate.explicitCaseInsensitive {188extractor.CaseInsensitive = candidate.caseInsensitive189}190if candidate.extractorType != XPathExtractor {191extractor.Attribute = ""192}193if candidate.extractorType != RegexExtractor {194extractor.RegexGroup = 0195}196197switch candidate.extractorType {198case RegexExtractor:199extractor.Regex = append([]string(nil), candidate.values...)200case KValExtractor:201extractor.KVal = append([]string(nil), candidate.values...)202case XPathExtractor:203extractor.XPath = append([]string(nil), candidate.values...)204case JSONExtractor:205extractor.JSON = append([]string(nil), candidate.values...)206case DSLExtractor:207extractor.DSL = append([]string(nil), candidate.values...)208default:209extractor.Regex = append([]string(nil), candidate.values...)210}211212return extractor, len(candidate.values) > 0213}214215func exerciseFuzzExtractor(extractor *Extractor) {216switch extractor.GetType() {217case RegexExtractor:218_ = extractor.ExtractRegex(fuzzRegexCorpus)219case KValExtractor:220_ = extractor.ExtractKval(fuzzKValData())221case XPathExtractor:222_ = extractor.ExtractXPath(fuzzHTMLCorpus)223_ = extractor.ExtractXPath(fuzzXMLCorpus)224case JSONExtractor:225_ = extractor.ExtractJSON(fuzzJSONCorpus)226case DSLExtractor:227_ = extractor.ExtractDSL(fuzzDSLData())228}229}230231func fuzzKValData() map[string]interface{} {232return map[string]interface{}{233"content_type": "Text/HTML",234"server": "Nginx",235"set_cookie": "session=abc123",236"x_powered_by": "Go",237}238}239240func fuzzDSLData() map[string]interface{} {241return map[string]interface{}{242"hello": "hi",243"body": "PING PONG",244"all_headers": "Server: Example\nContent-Type: text/html",245"content_type": "text/html",246"status_code": 200,247}248}249250func splitFuzzLines(data []byte) []string {251fields := strings.FieldsFunc(string(data), func(r rune) bool {252return r == '\n' || r == '\r' || r == ';'253})254if len(fields) > fuzzMaxItems*4 {255fields = fields[:fuzzMaxItems*4]256}257258lines := make([]string, 0, len(fields))259for _, field := range fields {260field = trimFuzzValue(field)261if field != "" {262lines = append(lines, field)263}264}265return lines266}267268func splitFuzzFields(data []byte) []string {269fields := strings.FieldsFunc(string(data), func(r rune) bool {270return r == '\n' || r == '\r' || r == '|' || r == ','271})272if len(fields) > fuzzMaxItems {273fields = fields[:fuzzMaxItems]274}275276values := make([]string, 0, len(fields))277for _, field := range fields {278field = trimFuzzValue(field)279if field != "" {280values = append(values, field)281}282}283return values284}285286func cutFuzzKV(line string) (string, string, bool) {287key, value, ok := strings.Cut(line, "=")288if !ok {289key, value, ok = strings.Cut(line, ":")290}291if !ok {292return "", "", false293}294return strings.ToLower(strings.TrimSpace(key)), trimFuzzValue(value), true295}296297func trimFuzzValue(value string) string {298value = strings.TrimSpace(strings.ReplaceAll(value, "\x00", ""))299if len(value) > fuzzMaxValueBytes {300value = value[:fuzzMaxValueBytes]301}302return value303}304305func fuzzName(data []byte) string {306if len(data) == 0 {307return ""308}309if len(data) > 8 {310data = data[:8]311}312return "fuzz-" + strconv.FormatUint(uint64(data[0]), 16) + fuzzNameSuffix(data[1:])313}314315func fuzzNameSuffix(data []byte) string {316if len(data) == 0 {317return ""318}319var builder strings.Builder320for _, value := range data {321if builder.Len() >= 15 {322break323}324builder.WriteString(strconv.FormatUint(uint64(value), 16))325}326return builder.String()327}328329func fuzzNameFromText(value string) string {330value = strings.ToLower(trimFuzzValue(value))331if value == "" {332return ""333}334var builder strings.Builder335for _, r := range value {336switch {337case r >= 'a' && r <= 'z':338builder.WriteRune(r)339case r >= '0' && r <= '9':340builder.WriteRune(r)341case r == '-':342builder.WriteRune(r)343}344if builder.Len() >= 32 {345break346}347}348if builder.Len() == 0 {349return ""350}351return builder.String()352}353354func fuzzAttribute(value string) string {355attribute := fuzzIdentifier(value)356if attribute == "" {357return trimFuzzValue(value)358}359return attribute360}361362func parseFuzzBool(value string, fallback bool) bool {363switch strings.ToLower(trimFuzzValue(value)) {364case "1", "true", "yes", "on":365return true366case "0", "false", "no", "off":367return false368default:369return fallback370}371}372373func parseFuzzGroup(value string, fallback int) int {374number, err := strconv.Atoi(trimFuzzValue(value))375if err != nil {376return fallback377}378if number < -2 {379return -2380}381if number > 8 {382return 8383}384return number385}386387func fuzzRegexValue(value string) string {388value = trimFuzzValue(value)389if value == "" {390return ""391}392return regexp.QuoteMeta(value)393}394395func fuzzJSONQuery(value string) string {396identifier := fuzzIdentifier(value)397if identifier == "" {398return ""399}400return "." + identifier401}402403func fuzzXPathQuery(value string) string {404identifier := fuzzIdentifier(value)405if identifier == "" {406return ""407}408return "//" + identifier409}410411func fuzzDSLExpression(value string) string {412identifier := fuzzIdentifier(value)413if identifier == "" {414return ""415}416switch identifier {417case "hello", "body", "all_headers", "content_type":418return "to_upper(" + identifier + ")"419case "status_code":420return identifier421default:422return identifier423}424}425426func fuzzIdentifier(value string) string {427value = strings.ToLower(trimFuzzValue(value))428if value == "" {429return ""430}431var builder strings.Builder432lastUnderscore := false433for _, r := range value {434switch {435case r >= 'a' && r <= 'z':436builder.WriteRune(r)437lastUnderscore = false438case r >= '0' && r <= '9':439if builder.Len() == 0 {440builder.WriteString("field_")441}442builder.WriteRune(r)443lastUnderscore = false444case r == '_' || r == '-' || r == '.' || r == ' ':445if builder.Len() > 0 && !lastUnderscore {446builder.WriteByte('_')447lastUnderscore = true448}449}450if builder.Len() >= 32 {451break452}453}454return strings.Trim(builder.String(), "_")455}456457func fuzzByteAt(data []byte, index int) byte {458if index < 0 || index >= len(data) {459return 0460}461return data[index]462}463464465