Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
projectdiscovery
GitHub Repository: projectdiscovery/nuclei
Path: blob/dev/pkg/reporting/dedupe/dedupe.go
2070 views
1
// Package dedupe implements deduplication layer for nuclei-generated
2
// issues.
3
//
4
// The layer can be persisted to leveldb based storage for further use.
5
package dedupe
6
7
import (
8
"crypto/sha1"
9
"os"
10
11
"github.com/syndtr/goleveldb/leveldb"
12
"github.com/syndtr/goleveldb/leveldb/errors"
13
14
"github.com/projectdiscovery/nuclei/v3/pkg/output"
15
"github.com/projectdiscovery/nuclei/v3/pkg/types"
16
"github.com/projectdiscovery/utils/conversion"
17
)
18
19
// Storage is a duplicate detecting storage for nuclei scan events.
20
type Storage struct {
21
temporary string
22
storage *leveldb.DB
23
}
24
25
// New creates a new duplicate detecting storage for nuclei scan events.
26
func New(dbPath string) (*Storage, error) {
27
storage := &Storage{}
28
29
var err error
30
if dbPath == "" {
31
dbPath, err = os.MkdirTemp("", "nuclei-report-*")
32
storage.temporary = dbPath
33
}
34
if err != nil {
35
return nil, err
36
}
37
38
storage.storage, err = leveldb.OpenFile(dbPath, nil)
39
if err != nil {
40
if !errors.IsCorrupted(err) {
41
return nil, err
42
}
43
44
// If the metadata is corrupted, try to recover
45
storage.storage, err = leveldb.RecoverFile(dbPath, nil)
46
if err != nil {
47
return nil, err
48
}
49
}
50
return storage, nil
51
}
52
53
func (s *Storage) Clear() {
54
var keys [][]byte
55
iter := s.storage.NewIterator(nil, nil)
56
for iter.Next() {
57
keys = append(keys, iter.Key())
58
}
59
iter.Release()
60
for _, key := range keys {
61
_ = s.storage.Delete(key, nil)
62
}
63
}
64
65
// Close closes the storage for further operations
66
func (s *Storage) Close() {
67
_ = s.storage.Close()
68
if s.temporary != "" {
69
_ = os.RemoveAll(s.temporary)
70
}
71
}
72
73
// Index indexes an item in storage and returns true if the item
74
// was unique.
75
func (s *Storage) Index(result *output.ResultEvent) (bool, error) {
76
hasher := sha1.New()
77
if result.TemplateID != "" {
78
_, _ = hasher.Write(conversion.Bytes(result.TemplateID))
79
}
80
if result.MatcherName != "" {
81
_, _ = hasher.Write(conversion.Bytes(result.MatcherName))
82
}
83
if result.ExtractorName != "" {
84
_, _ = hasher.Write(conversion.Bytes(result.ExtractorName))
85
}
86
if result.Type != "" {
87
_, _ = hasher.Write(conversion.Bytes(result.Type))
88
}
89
if result.Host != "" {
90
_, _ = hasher.Write(conversion.Bytes(result.Host))
91
}
92
if result.Matched != "" {
93
_, _ = hasher.Write(conversion.Bytes(result.Matched))
94
}
95
for _, v := range result.ExtractedResults {
96
_, _ = hasher.Write(conversion.Bytes(v))
97
}
98
for k, v := range result.Metadata {
99
_, _ = hasher.Write(conversion.Bytes(k))
100
_, _ = hasher.Write(conversion.Bytes(types.ToString(v)))
101
}
102
hash := hasher.Sum(nil)
103
104
exists, err := s.storage.Has(hash, nil)
105
if err != nil {
106
// if we have an error, return with it but mark it as true
107
// since we don't want to lose an issue considering it a dupe.
108
return true, err
109
}
110
if !exists {
111
return true, s.storage.Put(hash, nil, nil)
112
}
113
return false, err
114
}
115
116