Path: blob/dev/pkg/operators/extractors/extract.go
2851 views
package extractors12import (3"fmt"4"strings"56"github.com/antchfx/htmlquery"7"github.com/antchfx/xmlquery"89"github.com/projectdiscovery/nuclei/v3/pkg/types"10"github.com/projectdiscovery/nuclei/v3/pkg/utils/json"11)1213// ExtractRegex extracts text from a corpus and returns it14func (e *Extractor) ExtractRegex(corpus string) map[string]struct{} {15results := make(map[string]struct{})1617groupPlusOne := e.RegexGroup + 118for _, regex := range e.regexCompiled {19// skip prefix short-circuit for case-insensitive patterns20rstr := regex.String()21if !strings.Contains(rstr, "(?i") {22if prefix, ok := regex.LiteralPrefix(); ok && prefix != "" {23if !strings.Contains(corpus, prefix) {24continue25}26}27}2829submatches := regex.FindAllStringSubmatch(corpus, -1)3031for _, match := range submatches {32if len(match) < groupPlusOne {33continue34}35matchString := match[e.RegexGroup]3637if _, ok := results[matchString]; !ok {38results[matchString] = struct{}{}39}40}41}42return results43}4445// ExtractKval extracts key value pairs from a data map46func (e *Extractor) ExtractKval(data map[string]interface{}) map[string]struct{} {47if e.CaseInsensitive {48inputData := data49data = make(map[string]interface{}, len(inputData))50for k, v := range inputData {51if s, ok := v.(string); ok {52v = strings.ToLower(s)53}54data[strings.ToLower(k)] = v55}56}5758results := make(map[string]struct{})59for _, k := range e.KVal {60item, ok := data[k]61if !ok {62continue63}64itemString := types.ToString(item)65if _, ok := results[itemString]; !ok {66results[itemString] = struct{}{}67}68}69return results70}7172// ExtractXPath extracts items from text using XPath selectors73func (e *Extractor) ExtractXPath(corpus string) map[string]struct{} {74if strings.HasPrefix(corpus, "<?xml") {75return e.ExtractXML(corpus)76}77return e.ExtractHTML(corpus)78}7980// ExtractHTML extracts items from HTML using XPath selectors81func (e *Extractor) ExtractHTML(corpus string) map[string]struct{} {82results := make(map[string]struct{})8384doc, err := htmlquery.Parse(strings.NewReader(corpus))85if err != nil {86return results87}88for _, k := range e.XPath {89nodes, err := htmlquery.QueryAll(doc, k)90if err != nil {91continue92}93for _, node := range nodes {94var value string9596if e.Attribute != "" {97value = htmlquery.SelectAttr(node, e.Attribute)98} else {99value = htmlquery.InnerText(node)100}101if _, ok := results[value]; !ok {102results[value] = struct{}{}103}104}105}106return results107}108109// ExtractXML extracts items from XML using XPath selectors110func (e *Extractor) ExtractXML(corpus string) map[string]struct{} {111results := make(map[string]struct{})112113doc, err := xmlquery.Parse(strings.NewReader(corpus))114if err != nil {115return results116}117118for _, k := range e.XPath {119nodes, err := xmlquery.QueryAll(doc, k)120if err != nil {121continue122}123for _, node := range nodes {124var value string125126if e.Attribute != "" {127value = node.SelectAttr(e.Attribute)128} else {129value = node.InnerText()130}131if _, ok := results[value]; !ok {132results[value] = struct{}{}133}134}135}136return results137}138139// ExtractJSON extracts text from a corpus using JQ queries and returns it140func (e *Extractor) ExtractJSON(corpus string) map[string]struct{} {141results := make(map[string]struct{})142143var jsonObj interface{}144145if err := json.Unmarshal([]byte(corpus), &jsonObj); err != nil {146return results147}148149for _, k := range e.jsonCompiled {150iter := k.Run(jsonObj)151for {152v, ok := iter.Next()153if !ok {154break155}156if _, ok := v.(error); ok {157break158}159var result string160if res, err := types.JSONScalarToString(v); err == nil {161result = res162} else if res, err := json.Marshal(v); err == nil {163result = string(res)164} else {165result = types.ToString(v)166}167if _, ok := results[result]; !ok {168results[result] = struct{}{}169}170}171}172return results173}174175// ExtractDSL execute the expression and returns the results176func (e *Extractor) ExtractDSL(data map[string]interface{}) map[string]struct{} {177results := make(map[string]struct{})178179for _, compiledExpression := range e.dslCompiled {180result, err := compiledExpression.Evaluate(data)181// ignore errors that are related to missing parameters182// eg: dns dsl can have all the parameters that are not present183if err != nil && !strings.HasPrefix(err.Error(), "No parameter") {184return results185}186187if result != nil {188resultString := fmt.Sprint(result)189if resultString != "" {190results[resultString] = struct{}{}191}192}193}194return results195}196197198