Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
projectdiscovery
GitHub Repository: projectdiscovery/nuclei
Path: blob/dev/pkg/operators/extractors/extract.go
2851 views
1
package extractors
2
3
import (
4
"fmt"
5
"strings"
6
7
"github.com/antchfx/htmlquery"
8
"github.com/antchfx/xmlquery"
9
10
"github.com/projectdiscovery/nuclei/v3/pkg/types"
11
"github.com/projectdiscovery/nuclei/v3/pkg/utils/json"
12
)
13
14
// ExtractRegex extracts text from a corpus and returns it
15
func (e *Extractor) ExtractRegex(corpus string) map[string]struct{} {
16
results := make(map[string]struct{})
17
18
groupPlusOne := e.RegexGroup + 1
19
for _, regex := range e.regexCompiled {
20
// skip prefix short-circuit for case-insensitive patterns
21
rstr := regex.String()
22
if !strings.Contains(rstr, "(?i") {
23
if prefix, ok := regex.LiteralPrefix(); ok && prefix != "" {
24
if !strings.Contains(corpus, prefix) {
25
continue
26
}
27
}
28
}
29
30
submatches := regex.FindAllStringSubmatch(corpus, -1)
31
32
for _, match := range submatches {
33
if len(match) < groupPlusOne {
34
continue
35
}
36
matchString := match[e.RegexGroup]
37
38
if _, ok := results[matchString]; !ok {
39
results[matchString] = struct{}{}
40
}
41
}
42
}
43
return results
44
}
45
46
// ExtractKval extracts key value pairs from a data map
47
func (e *Extractor) ExtractKval(data map[string]interface{}) map[string]struct{} {
48
if e.CaseInsensitive {
49
inputData := data
50
data = make(map[string]interface{}, len(inputData))
51
for k, v := range inputData {
52
if s, ok := v.(string); ok {
53
v = strings.ToLower(s)
54
}
55
data[strings.ToLower(k)] = v
56
}
57
}
58
59
results := make(map[string]struct{})
60
for _, k := range e.KVal {
61
item, ok := data[k]
62
if !ok {
63
continue
64
}
65
itemString := types.ToString(item)
66
if _, ok := results[itemString]; !ok {
67
results[itemString] = struct{}{}
68
}
69
}
70
return results
71
}
72
73
// ExtractXPath extracts items from text using XPath selectors
74
func (e *Extractor) ExtractXPath(corpus string) map[string]struct{} {
75
if strings.HasPrefix(corpus, "<?xml") {
76
return e.ExtractXML(corpus)
77
}
78
return e.ExtractHTML(corpus)
79
}
80
81
// ExtractHTML extracts items from HTML using XPath selectors
82
func (e *Extractor) ExtractHTML(corpus string) map[string]struct{} {
83
results := make(map[string]struct{})
84
85
doc, err := htmlquery.Parse(strings.NewReader(corpus))
86
if err != nil {
87
return results
88
}
89
for _, k := range e.XPath {
90
nodes, err := htmlquery.QueryAll(doc, k)
91
if err != nil {
92
continue
93
}
94
for _, node := range nodes {
95
var value string
96
97
if e.Attribute != "" {
98
value = htmlquery.SelectAttr(node, e.Attribute)
99
} else {
100
value = htmlquery.InnerText(node)
101
}
102
if _, ok := results[value]; !ok {
103
results[value] = struct{}{}
104
}
105
}
106
}
107
return results
108
}
109
110
// ExtractXML extracts items from XML using XPath selectors
111
func (e *Extractor) ExtractXML(corpus string) map[string]struct{} {
112
results := make(map[string]struct{})
113
114
doc, err := xmlquery.Parse(strings.NewReader(corpus))
115
if err != nil {
116
return results
117
}
118
119
for _, k := range e.XPath {
120
nodes, err := xmlquery.QueryAll(doc, k)
121
if err != nil {
122
continue
123
}
124
for _, node := range nodes {
125
var value string
126
127
if e.Attribute != "" {
128
value = node.SelectAttr(e.Attribute)
129
} else {
130
value = node.InnerText()
131
}
132
if _, ok := results[value]; !ok {
133
results[value] = struct{}{}
134
}
135
}
136
}
137
return results
138
}
139
140
// ExtractJSON extracts text from a corpus using JQ queries and returns it
141
func (e *Extractor) ExtractJSON(corpus string) map[string]struct{} {
142
results := make(map[string]struct{})
143
144
var jsonObj interface{}
145
146
if err := json.Unmarshal([]byte(corpus), &jsonObj); err != nil {
147
return results
148
}
149
150
for _, k := range e.jsonCompiled {
151
iter := k.Run(jsonObj)
152
for {
153
v, ok := iter.Next()
154
if !ok {
155
break
156
}
157
if _, ok := v.(error); ok {
158
break
159
}
160
var result string
161
if res, err := types.JSONScalarToString(v); err == nil {
162
result = res
163
} else if res, err := json.Marshal(v); err == nil {
164
result = string(res)
165
} else {
166
result = types.ToString(v)
167
}
168
if _, ok := results[result]; !ok {
169
results[result] = struct{}{}
170
}
171
}
172
}
173
return results
174
}
175
176
// ExtractDSL execute the expression and returns the results
177
func (e *Extractor) ExtractDSL(data map[string]interface{}) map[string]struct{} {
178
results := make(map[string]struct{})
179
180
for _, compiledExpression := range e.dslCompiled {
181
result, err := compiledExpression.Evaluate(data)
182
// ignore errors that are related to missing parameters
183
// eg: dns dsl can have all the parameters that are not present
184
if err != nil && !strings.HasPrefix(err.Error(), "No parameter") {
185
return results
186
}
187
188
if result != nil {
189
resultString := fmt.Sprint(result)
190
if resultString != "" {
191
results[resultString] = struct{}{}
192
}
193
}
194
}
195
return results
196
}
197
198