Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
projectdiscovery
GitHub Repository: projectdiscovery/nuclei
Path: blob/dev/pkg/operators/extractors/extract.go
2070 views
1
package extractors
2
3
import (
4
"fmt"
5
"strings"
6
7
"github.com/antchfx/htmlquery"
8
"github.com/antchfx/xmlquery"
9
10
"github.com/projectdiscovery/nuclei/v3/pkg/types"
11
"github.com/projectdiscovery/nuclei/v3/pkg/utils/json"
12
)
13
14
// ExtractRegex extracts text from a corpus and returns it
15
func (e *Extractor) ExtractRegex(corpus string) map[string]struct{} {
16
results := make(map[string]struct{})
17
18
groupPlusOne := e.RegexGroup + 1
19
for _, regex := range e.regexCompiled {
20
matches := regex.FindAllStringSubmatch(corpus, -1)
21
22
for _, match := range matches {
23
if len(match) < groupPlusOne {
24
continue
25
}
26
matchString := match[e.RegexGroup]
27
28
if _, ok := results[matchString]; !ok {
29
results[matchString] = struct{}{}
30
}
31
}
32
}
33
return results
34
}
35
36
// ExtractKval extracts key value pairs from a data map
37
func (e *Extractor) ExtractKval(data map[string]interface{}) map[string]struct{} {
38
if e.CaseInsensitive {
39
inputData := data
40
data = make(map[string]interface{}, len(inputData))
41
for k, v := range inputData {
42
if s, ok := v.(string); ok {
43
v = strings.ToLower(s)
44
}
45
data[strings.ToLower(k)] = v
46
}
47
}
48
49
results := make(map[string]struct{})
50
for _, k := range e.KVal {
51
item, ok := data[k]
52
if !ok {
53
continue
54
}
55
itemString := types.ToString(item)
56
if _, ok := results[itemString]; !ok {
57
results[itemString] = struct{}{}
58
}
59
}
60
return results
61
}
62
63
// ExtractXPath extracts items from text using XPath selectors
64
func (e *Extractor) ExtractXPath(corpus string) map[string]struct{} {
65
if strings.HasPrefix(corpus, "<?xml") {
66
return e.ExtractXML(corpus)
67
}
68
return e.ExtractHTML(corpus)
69
}
70
71
// ExtractHTML extracts items from HTML using XPath selectors
72
func (e *Extractor) ExtractHTML(corpus string) map[string]struct{} {
73
results := make(map[string]struct{})
74
75
doc, err := htmlquery.Parse(strings.NewReader(corpus))
76
if err != nil {
77
return results
78
}
79
for _, k := range e.XPath {
80
nodes, err := htmlquery.QueryAll(doc, k)
81
if err != nil {
82
continue
83
}
84
for _, node := range nodes {
85
var value string
86
87
if e.Attribute != "" {
88
value = htmlquery.SelectAttr(node, e.Attribute)
89
} else {
90
value = htmlquery.InnerText(node)
91
}
92
if _, ok := results[value]; !ok {
93
results[value] = struct{}{}
94
}
95
}
96
}
97
return results
98
}
99
100
// ExtractXML extracts items from XML using XPath selectors
101
func (e *Extractor) ExtractXML(corpus string) map[string]struct{} {
102
results := make(map[string]struct{})
103
104
doc, err := xmlquery.Parse(strings.NewReader(corpus))
105
if err != nil {
106
return results
107
}
108
109
for _, k := range e.XPath {
110
nodes, err := xmlquery.QueryAll(doc, k)
111
if err != nil {
112
continue
113
}
114
for _, node := range nodes {
115
var value string
116
117
if e.Attribute != "" {
118
value = node.SelectAttr(e.Attribute)
119
} else {
120
value = node.InnerText()
121
}
122
if _, ok := results[value]; !ok {
123
results[value] = struct{}{}
124
}
125
}
126
}
127
return results
128
}
129
130
// ExtractJSON extracts text from a corpus using JQ queries and returns it
131
func (e *Extractor) ExtractJSON(corpus string) map[string]struct{} {
132
results := make(map[string]struct{})
133
134
var jsonObj interface{}
135
136
if err := json.Unmarshal([]byte(corpus), &jsonObj); err != nil {
137
return results
138
}
139
140
for _, k := range e.jsonCompiled {
141
iter := k.Run(jsonObj)
142
for {
143
v, ok := iter.Next()
144
if !ok {
145
break
146
}
147
if _, ok := v.(error); ok {
148
break
149
}
150
var result string
151
if res, err := types.JSONScalarToString(v); err == nil {
152
result = res
153
} else if res, err := json.Marshal(v); err == nil {
154
result = string(res)
155
} else {
156
result = types.ToString(v)
157
}
158
if _, ok := results[result]; !ok {
159
results[result] = struct{}{}
160
}
161
}
162
}
163
return results
164
}
165
166
// ExtractDSL execute the expression and returns the results
167
func (e *Extractor) ExtractDSL(data map[string]interface{}) map[string]struct{} {
168
results := make(map[string]struct{})
169
170
for _, compiledExpression := range e.dslCompiled {
171
result, err := compiledExpression.Evaluate(data)
172
// ignore errors that are related to missing parameters
173
// eg: dns dsl can have all the parameters that are not present
174
if err != nil && !strings.HasPrefix(err.Error(), "No parameter") {
175
return results
176
}
177
178
if result != nil {
179
resultString := fmt.Sprint(result)
180
if resultString != "" {
181
results[resultString] = struct{}{}
182
}
183
}
184
}
185
return results
186
}
187
188