Path: blob/dev/pkg/operators/extractors/extract.go
2070 views
package extractors12import (3"fmt"4"strings"56"github.com/antchfx/htmlquery"7"github.com/antchfx/xmlquery"89"github.com/projectdiscovery/nuclei/v3/pkg/types"10"github.com/projectdiscovery/nuclei/v3/pkg/utils/json"11)1213// ExtractRegex extracts text from a corpus and returns it14func (e *Extractor) ExtractRegex(corpus string) map[string]struct{} {15results := make(map[string]struct{})1617groupPlusOne := e.RegexGroup + 118for _, regex := range e.regexCompiled {19matches := regex.FindAllStringSubmatch(corpus, -1)2021for _, match := range matches {22if len(match) < groupPlusOne {23continue24}25matchString := match[e.RegexGroup]2627if _, ok := results[matchString]; !ok {28results[matchString] = struct{}{}29}30}31}32return results33}3435// ExtractKval extracts key value pairs from a data map36func (e *Extractor) ExtractKval(data map[string]interface{}) map[string]struct{} {37if e.CaseInsensitive {38inputData := data39data = make(map[string]interface{}, len(inputData))40for k, v := range inputData {41if s, ok := v.(string); ok {42v = strings.ToLower(s)43}44data[strings.ToLower(k)] = v45}46}4748results := make(map[string]struct{})49for _, k := range e.KVal {50item, ok := data[k]51if !ok {52continue53}54itemString := types.ToString(item)55if _, ok := results[itemString]; !ok {56results[itemString] = struct{}{}57}58}59return results60}6162// ExtractXPath extracts items from text using XPath selectors63func (e *Extractor) ExtractXPath(corpus string) map[string]struct{} {64if strings.HasPrefix(corpus, "<?xml") {65return e.ExtractXML(corpus)66}67return e.ExtractHTML(corpus)68}6970// ExtractHTML extracts items from HTML using XPath selectors71func (e *Extractor) ExtractHTML(corpus string) map[string]struct{} {72results := make(map[string]struct{})7374doc, err := htmlquery.Parse(strings.NewReader(corpus))75if err != nil {76return results77}78for _, k := range e.XPath {79nodes, err := htmlquery.QueryAll(doc, k)80if err != nil {81continue82}83for _, node := range nodes {84var value string8586if e.Attribute != "" {87value = htmlquery.SelectAttr(node, e.Attribute)88} else {89value = htmlquery.InnerText(node)90}91if _, ok := results[value]; !ok {92results[value] = struct{}{}93}94}95}96return results97}9899// ExtractXML extracts items from XML using XPath selectors100func (e *Extractor) ExtractXML(corpus string) map[string]struct{} {101results := make(map[string]struct{})102103doc, err := xmlquery.Parse(strings.NewReader(corpus))104if err != nil {105return results106}107108for _, k := range e.XPath {109nodes, err := xmlquery.QueryAll(doc, k)110if err != nil {111continue112}113for _, node := range nodes {114var value string115116if e.Attribute != "" {117value = node.SelectAttr(e.Attribute)118} else {119value = node.InnerText()120}121if _, ok := results[value]; !ok {122results[value] = struct{}{}123}124}125}126return results127}128129// ExtractJSON extracts text from a corpus using JQ queries and returns it130func (e *Extractor) ExtractJSON(corpus string) map[string]struct{} {131results := make(map[string]struct{})132133var jsonObj interface{}134135if err := json.Unmarshal([]byte(corpus), &jsonObj); err != nil {136return results137}138139for _, k := range e.jsonCompiled {140iter := k.Run(jsonObj)141for {142v, ok := iter.Next()143if !ok {144break145}146if _, ok := v.(error); ok {147break148}149var result string150if res, err := types.JSONScalarToString(v); err == nil {151result = res152} else if res, err := json.Marshal(v); err == nil {153result = string(res)154} else {155result = types.ToString(v)156}157if _, ok := results[result]; !ok {158results[result] = struct{}{}159}160}161}162return results163}164165// ExtractDSL execute the expression and returns the results166func (e *Extractor) ExtractDSL(data map[string]interface{}) map[string]struct{} {167results := make(map[string]struct{})168169for _, compiledExpression := range e.dslCompiled {170result, err := compiledExpression.Evaluate(data)171// ignore errors that are related to missing parameters172// eg: dns dsl can have all the parameters that are not present173if err != nil && !strings.HasPrefix(err.Error(), "No parameter") {174return results175}176177if result != nil {178resultString := fmt.Sprint(result)179if resultString != "" {180results[resultString] = struct{}{}181}182}183}184return results185}186187188