Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
projectdiscovery
GitHub Repository: projectdiscovery/nuclei
Path: blob/dev/pkg/input/provider/list/hmap.go
2070 views
1
// package list implements a hybrid hmap/filekv backed input provider
2
// for nuclei that can either stream or store results using different kv stores.
3
package list
4
5
import (
6
"bufio"
7
"context"
8
"fmt"
9
"io"
10
"os"
11
"regexp"
12
"strings"
13
"sync"
14
"time"
15
16
"github.com/pkg/errors"
17
18
"github.com/projectdiscovery/gologger"
19
"github.com/projectdiscovery/hmap/filekv"
20
"github.com/projectdiscovery/hmap/store/hybrid"
21
"github.com/projectdiscovery/mapcidr/asn"
22
providerTypes "github.com/projectdiscovery/nuclei/v3/pkg/input/types"
23
"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/contextargs"
24
"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/protocolstate"
25
"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/uncover"
26
"github.com/projectdiscovery/nuclei/v3/pkg/types"
27
"github.com/projectdiscovery/nuclei/v3/pkg/utils/expand"
28
uncoverlib "github.com/projectdiscovery/uncover"
29
fileutil "github.com/projectdiscovery/utils/file"
30
iputil "github.com/projectdiscovery/utils/ip"
31
readerutil "github.com/projectdiscovery/utils/reader"
32
sliceutil "github.com/projectdiscovery/utils/slice"
33
urlutil "github.com/projectdiscovery/utils/url"
34
)
35
36
const DefaultMaxDedupeItemsCount = 10000
37
38
// ListInputProvider is a hmap/filekv backed nuclei ListInputProvider provider
39
// it supports list type of input ex: urls,file,stdin,uncover,etc. (i.e just url not complete request/response)
40
type ListInputProvider struct {
41
ipOptions *ipOptions
42
inputCount int64
43
excludedCount int64
44
dupeCount int64
45
skippedCount int64
46
hostMap *hybrid.HybridMap
47
excludedHosts map[string]struct{}
48
hostMapStream *filekv.FileDB
49
hostMapStreamOnce sync.Once
50
sync.Once
51
}
52
53
// Options is a wrapper around types.Options structure
54
type Options struct {
55
// Options contains options for hmap provider
56
Options *types.Options
57
// NotFoundCallback is called for each not found target
58
// This overrides error handling for not found target
59
NotFoundCallback func(template string) bool
60
}
61
62
// New creates a new hmap backed nuclei Input Provider
63
// and initializes it based on the passed options Model.
64
func New(opts *Options) (*ListInputProvider, error) {
65
options := opts.Options
66
67
hm, err := hybrid.New(hybrid.DefaultDiskOptions)
68
if err != nil {
69
return nil, errors.Wrap(err, "could not create temporary input file")
70
}
71
72
input := &ListInputProvider{
73
hostMap: hm,
74
ipOptions: &ipOptions{
75
ScanAllIPs: options.ScanAllIPs,
76
IPV4: sliceutil.Contains(options.IPVersion, "4"),
77
IPV6: sliceutil.Contains(options.IPVersion, "6"),
78
},
79
excludedHosts: make(map[string]struct{}),
80
}
81
if options.Stream {
82
fkvOptions := filekv.DefaultOptions
83
fkvOptions.MaxItems = DefaultMaxDedupeItemsCount
84
if tmpFileName, err := fileutil.GetTempFileName(); err != nil {
85
return nil, errors.Wrap(err, "could not create temporary input file")
86
} else {
87
fkvOptions.Path = tmpFileName
88
}
89
fkv, err := filekv.Open(fkvOptions)
90
if err != nil {
91
return nil, errors.Wrap(err, "could not create temporary unsorted input file")
92
}
93
input.hostMapStream = fkv
94
}
95
if initErr := input.initializeInputSources(opts); initErr != nil {
96
return nil, initErr
97
}
98
if input.excludedCount > 0 {
99
gologger.Info().Msgf("Number of hosts excluded from input: %d", input.excludedCount)
100
}
101
if input.dupeCount > 0 {
102
gologger.Info().Msgf("Supplied input was automatically deduplicated (%d removed).", input.dupeCount)
103
}
104
if input.skippedCount > 0 {
105
gologger.Info().Msgf("Number of hosts skipped from input due to exclusion: %d", input.skippedCount)
106
}
107
return input, nil
108
}
109
110
// Count returns the input count
111
func (i *ListInputProvider) Count() int64 {
112
return i.inputCount
113
}
114
115
// Iterate over all inputs in order
116
func (i *ListInputProvider) Iterate(callback func(value *contextargs.MetaInput) bool) {
117
if i.hostMapStream != nil {
118
i.hostMapStreamOnce.Do(func() {
119
if err := i.hostMapStream.Process(); err != nil {
120
gologger.Warning().Msgf("error in stream mode processing: %s\n", err)
121
}
122
})
123
}
124
callbackFunc := func(k, _ []byte) error {
125
metaInput := contextargs.NewMetaInput()
126
if err := metaInput.Unmarshal(string(k)); err != nil {
127
return err
128
}
129
if !callback(metaInput) {
130
return io.EOF
131
}
132
return nil
133
}
134
if i.hostMapStream != nil {
135
_ = i.hostMapStream.Scan(callbackFunc)
136
} else {
137
i.hostMap.Scan(callbackFunc)
138
}
139
}
140
141
// Set normalizes and stores passed input values
142
func (i *ListInputProvider) Set(executionId string, value string) {
143
URL := strings.TrimSpace(value)
144
if URL == "" {
145
return
146
}
147
// parse hostname if url is given
148
urlx, err := urlutil.Parse(URL)
149
if err != nil || (urlx != nil && urlx.Host == "") {
150
gologger.Debug().Label("url").MsgFunc(func() string {
151
if err != nil {
152
return fmt.Sprintf("failed to parse url %v got %v skipping ip selection", URL, err)
153
}
154
return fmt.Sprintf("got empty hostname for %v skipping ip selection", URL)
155
})
156
metaInput := contextargs.NewMetaInput()
157
metaInput.Input = URL
158
i.setItem(metaInput)
159
return
160
}
161
162
// Check if input is ip or hostname
163
if iputil.IsIP(urlx.Hostname()) {
164
metaInput := contextargs.NewMetaInput()
165
metaInput.Input = URL
166
i.setItem(metaInput)
167
return
168
}
169
170
if i.ipOptions.ScanAllIPs {
171
// scan all ips
172
dialers := protocolstate.GetDialersWithId(executionId)
173
if dialers == nil {
174
panic("dialers with executionId " + executionId + " not found")
175
}
176
177
dnsData, err := dialers.Fastdialer.GetDNSData(urlx.Hostname())
178
if err == nil {
179
if (len(dnsData.A) + len(dnsData.AAAA)) > 0 {
180
var ips []string
181
if i.ipOptions.IPV4 {
182
ips = append(ips, dnsData.A...)
183
}
184
if i.ipOptions.IPV6 {
185
ips = append(ips, dnsData.AAAA...)
186
}
187
for _, ip := range ips {
188
if ip == "" {
189
continue
190
}
191
metaInput := contextargs.NewMetaInput()
192
metaInput.Input = URL
193
metaInput.CustomIP = ip
194
i.setItem(metaInput)
195
}
196
return
197
} else {
198
gologger.Debug().Msgf("scanAllIps: no ip's found reverting to default")
199
}
200
} else {
201
// failed to scanallips falling back to defaults
202
gologger.Debug().Msgf("scanAllIps: dns resolution failed: %v", err)
203
}
204
}
205
206
ips := []string{}
207
// only scan the target but ipv6 if it has one
208
if i.ipOptions.IPV6 {
209
dialers := protocolstate.GetDialersWithId(executionId)
210
if dialers == nil {
211
panic("dialers with executionId " + executionId + " not found")
212
}
213
214
dnsData, err := dialers.Fastdialer.GetDNSData(urlx.Hostname())
215
if err == nil && len(dnsData.AAAA) > 0 {
216
// pick/ prefer 1st
217
ips = append(ips, dnsData.AAAA[0])
218
} else {
219
gologger.Warning().Msgf("target does not have ipv6 address falling back to ipv4 %v\n", err)
220
}
221
}
222
if i.ipOptions.IPV4 {
223
// if IPV4 is enabled do not specify ip let dialer handle it
224
ips = append(ips, "")
225
}
226
227
for _, ip := range ips {
228
metaInput := contextargs.NewMetaInput()
229
if ip != "" {
230
metaInput.Input = URL
231
metaInput.CustomIP = ip
232
i.setItem(metaInput)
233
} else {
234
metaInput.Input = URL
235
i.setItem(metaInput)
236
}
237
}
238
}
239
240
// SetWithProbe only sets the input if it is live
241
func (i *ListInputProvider) SetWithProbe(executionId string, value string, probe providerTypes.InputLivenessProbe) error {
242
probedValue, err := probe.ProbeURL(value)
243
if err != nil {
244
return err
245
}
246
i.Set(executionId, probedValue)
247
return nil
248
}
249
250
// SetWithExclusions normalizes and stores passed input values if not excluded
251
func (i *ListInputProvider) SetWithExclusions(executionId string, value string) error {
252
URL := strings.TrimSpace(value)
253
if URL == "" {
254
return nil
255
}
256
if i.isExcluded(URL) {
257
i.skippedCount++
258
return nil
259
}
260
i.Set(executionId, URL)
261
return nil
262
}
263
264
// ListInputProvider is a hmap/filekv backed nuclei ListInputProvider provider
265
func (i *ListInputProvider) InputType() string {
266
return "ListInputProvider"
267
}
268
269
// Close closes the input provider
270
func (i *ListInputProvider) Close() {
271
_ = i.hostMap.Close()
272
if i.hostMapStream != nil {
273
i.hostMapStream.Close()
274
}
275
}
276
277
// initializeInputSources initializes the input sources for hmap input
278
func (i *ListInputProvider) initializeInputSources(opts *Options) error {
279
options := opts.Options
280
281
// Handle targets flags
282
for _, target := range options.Targets {
283
switch {
284
case iputil.IsCIDR(target):
285
ips := expand.CIDR(target)
286
i.addTargets(options.ExecutionId, ips)
287
case asn.IsASN(target):
288
ips := expand.ASN(target)
289
i.addTargets(options.ExecutionId, ips)
290
default:
291
i.Set(options.ExecutionId, target)
292
}
293
}
294
295
// Handle stdin
296
if options.Stdin {
297
i.scanInputFromReader(
298
options.ExecutionId,
299
readerutil.TimeoutReader{Reader: os.Stdin, Timeout: time.Duration(options.InputReadTimeout)})
300
}
301
302
// Handle target file
303
if options.TargetsFilePath != "" {
304
input, inputErr := os.Open(options.TargetsFilePath)
305
if inputErr != nil {
306
// Handle cloud based input here.
307
if opts.NotFoundCallback == nil || !opts.NotFoundCallback(options.TargetsFilePath) {
308
return errors.Wrap(inputErr, "could not open targets file")
309
}
310
}
311
if input != nil {
312
i.scanInputFromReader(options.ExecutionId, input)
313
_ = input.Close()
314
}
315
}
316
if options.Uncover && options.UncoverQuery != nil {
317
gologger.Info().Msgf("Running uncover query against: %s", strings.Join(options.UncoverEngine, ","))
318
uncoverOpts := &uncoverlib.Options{
319
Agents: options.UncoverEngine,
320
Queries: options.UncoverQuery,
321
Limit: options.UncoverLimit,
322
MaxRetry: options.Retries,
323
Timeout: options.Timeout,
324
RateLimit: uint(options.UncoverRateLimit),
325
RateLimitUnit: time.Minute, // default unit is minute
326
}
327
ch, err := uncover.GetTargetsFromUncover(context.TODO(), options.UncoverField, uncoverOpts)
328
if err != nil {
329
return err
330
}
331
for c := range ch {
332
i.Set(options.ExecutionId, c)
333
}
334
}
335
336
if len(options.ExcludeTargets) > 0 {
337
for _, target := range options.ExcludeTargets {
338
switch {
339
case iputil.IsCIDR(target):
340
ips := expand.CIDR(target)
341
i.removeTargets(ips)
342
case asn.IsASN(target):
343
ips := expand.ASN(target)
344
i.removeTargets(ips)
345
default:
346
i.Del(options.ExecutionId, target)
347
}
348
}
349
}
350
351
return nil
352
}
353
354
// scanInputFromReader scans a line of input from reader and passes it for storage
355
func (i *ListInputProvider) scanInputFromReader(executionId string, reader io.Reader) {
356
scanner := bufio.NewScanner(reader)
357
for scanner.Scan() {
358
item := scanner.Text()
359
switch {
360
case iputil.IsCIDR(item):
361
ips := expand.CIDR(item)
362
i.addTargets(executionId, ips)
363
case asn.IsASN(item):
364
ips := expand.ASN(item)
365
i.addTargets(executionId, ips)
366
default:
367
i.Set(executionId, item)
368
}
369
}
370
}
371
372
// isExcluded checks if a URL is in the exclusion list
373
func (i *ListInputProvider) isExcluded(URL string) bool {
374
metaInput := contextargs.NewMetaInput()
375
metaInput.Input = URL
376
key, err := metaInput.MarshalString()
377
if err != nil {
378
gologger.Warning().Msgf("%s\n", err)
379
return false
380
}
381
382
_, exists := i.excludedHosts[key]
383
return exists
384
}
385
386
func (i *ListInputProvider) Del(executionId string, value string) {
387
URL := strings.TrimSpace(value)
388
if URL == "" {
389
return
390
}
391
// parse hostname if url is given
392
urlx, err := urlutil.Parse(URL)
393
if err != nil || (urlx != nil && urlx.Host == "") {
394
gologger.Debug().Label("url").MsgFunc(func() string {
395
if err != nil {
396
return fmt.Sprintf("failed to parse url %v got %v skipping ip selection", URL, err)
397
}
398
return fmt.Sprintf("got empty hostname for %v skipping ip selection", URL)
399
})
400
metaInput := contextargs.NewMetaInput()
401
metaInput.Input = URL
402
i.delItem(metaInput)
403
return
404
}
405
406
// Check if input is ip or hostname
407
if iputil.IsIP(urlx.Hostname()) {
408
metaInput := contextargs.NewMetaInput()
409
metaInput.Input = URL
410
i.delItem(metaInput)
411
return
412
}
413
414
if i.ipOptions.ScanAllIPs {
415
// scan all ips
416
dialers := protocolstate.GetDialersWithId(executionId)
417
if dialers == nil {
418
panic("dialers with executionId " + executionId + " not found")
419
}
420
421
dnsData, err := dialers.Fastdialer.GetDNSData(urlx.Hostname())
422
if err == nil {
423
if (len(dnsData.A) + len(dnsData.AAAA)) > 0 {
424
var ips []string
425
if i.ipOptions.IPV4 {
426
ips = append(ips, dnsData.A...)
427
}
428
if i.ipOptions.IPV6 {
429
ips = append(ips, dnsData.AAAA...)
430
}
431
for _, ip := range ips {
432
if ip == "" {
433
continue
434
}
435
metaInput := contextargs.NewMetaInput()
436
metaInput.Input = value
437
metaInput.CustomIP = ip
438
i.delItem(metaInput)
439
}
440
return
441
} else {
442
gologger.Debug().Msgf("scanAllIps: no ip's found reverting to default")
443
}
444
} else {
445
// failed to scanallips falling back to defaults
446
gologger.Debug().Msgf("scanAllIps: dns resolution failed: %v", err)
447
}
448
}
449
450
ips := []string{}
451
// only scan the target but ipv6 if it has one
452
if i.ipOptions.IPV6 {
453
dialers := protocolstate.GetDialersWithId(executionId)
454
if dialers == nil {
455
panic("dialers with executionId " + executionId + " not found")
456
}
457
458
dnsData, err := dialers.Fastdialer.GetDNSData(urlx.Hostname())
459
if err == nil && len(dnsData.AAAA) > 0 {
460
// pick/ prefer 1st
461
ips = append(ips, dnsData.AAAA[0])
462
} else {
463
gologger.Warning().Msgf("target does not have ipv6 address falling back to ipv4 %v\n", err)
464
}
465
}
466
if i.ipOptions.IPV4 {
467
// if IPV4 is enabled do not specify ip let dialer handle it
468
ips = append(ips, "")
469
}
470
471
for _, ip := range ips {
472
metaInput := contextargs.NewMetaInput()
473
if ip != "" {
474
metaInput.Input = URL
475
metaInput.CustomIP = ip
476
i.delItem(metaInput)
477
} else {
478
metaInput.Input = URL
479
i.delItem(metaInput)
480
}
481
}
482
}
483
484
// setItem in the kv store
485
func (i *ListInputProvider) setItem(metaInput *contextargs.MetaInput) {
486
key, err := metaInput.MarshalString()
487
if err != nil {
488
gologger.Warning().Msgf("%s\n", err)
489
return
490
}
491
if _, ok := i.hostMap.Get(key); ok {
492
i.dupeCount++
493
return
494
}
495
496
i.inputCount++ // tracks target count
497
_ = i.hostMap.Set(key, nil)
498
if i.hostMapStream != nil {
499
i.setHostMapStream(key)
500
}
501
}
502
503
// setItem in the kv store
504
func (i *ListInputProvider) delItem(metaInput *contextargs.MetaInput) {
505
targetUrl, err := urlutil.ParseURL(metaInput.Input, true)
506
if err != nil {
507
gologger.Warning().Msgf("%s\n", err)
508
return
509
}
510
511
i.hostMap.Scan(func(k, _ []byte) error {
512
var tmpMetaInput contextargs.MetaInput
513
if err := tmpMetaInput.Unmarshal(string(k)); err != nil {
514
return err
515
}
516
tmpKey, err := tmpMetaInput.MarshalString()
517
if err != nil {
518
return err
519
}
520
tmpUrl, err := urlutil.ParseURL(tmpMetaInput.Input, true)
521
if err != nil {
522
return err
523
}
524
525
matched, _ := regexp.MatchString(metaInput.Input, tmpUrl.Host)
526
if tmpUrl.Host == targetUrl.Host || matched {
527
_ = i.hostMap.Del(tmpKey)
528
i.excludedHosts[tmpKey] = struct{}{}
529
i.excludedCount++
530
i.inputCount--
531
}
532
return nil
533
})
534
}
535
536
// setHostMapStream sets item in stream mode
537
func (i *ListInputProvider) setHostMapStream(data string) {
538
if _, err := i.hostMapStream.Merge([][]byte{[]byte(data)}); err != nil {
539
gologger.Warning().Msgf("%s\n", err)
540
return
541
}
542
}
543
544
func (i *ListInputProvider) addTargets(executionId string, targets []string) {
545
for _, target := range targets {
546
i.Set(executionId, target)
547
}
548
}
549
550
func (i *ListInputProvider) removeTargets(targets []string) {
551
for _, target := range targets {
552
metaInput := contextargs.NewMetaInput()
553
metaInput.Input = target
554
i.delItem(metaInput)
555
}
556
}
557
558