Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
projectdiscovery
GitHub Repository: projectdiscovery/nuclei
Path: blob/dev/pkg/protocols/common/hosterrorscache/hosterrorscache.go
2072 views
1
package hosterrorscache
2
3
import (
4
"errors"
5
"net"
6
"net/url"
7
"regexp"
8
"strings"
9
"sync"
10
"sync/atomic"
11
12
"github.com/projectdiscovery/gcache"
13
"github.com/projectdiscovery/gologger"
14
"github.com/projectdiscovery/nuclei/v3/pkg/catalog/config"
15
"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/contextargs"
16
"github.com/projectdiscovery/nuclei/v3/pkg/types/nucleierr"
17
"github.com/projectdiscovery/utils/errkit"
18
stringsutil "github.com/projectdiscovery/utils/strings"
19
)
20
21
// CacheInterface defines the signature of the hosterrorscache so that
22
// users of Nuclei as embedded lib may implement their own cache
23
type CacheInterface interface {
24
SetVerbose(verbose bool) // log verbosely
25
Close() // close the cache
26
Check(protoType string, ctx *contextargs.Context) bool // return true if the host should be skipped
27
Remove(ctx *contextargs.Context) // remove a host from the cache
28
MarkFailed(protoType string, ctx *contextargs.Context, err error) // record a failure (and cause) for the host
29
MarkFailedOrRemove(protoType string, ctx *contextargs.Context, err error) // record a failure (and cause) for the host or remove it
30
}
31
32
var (
33
_ CacheInterface = (*Cache)(nil)
34
)
35
36
// Cache is a cache for host based errors. It allows skipping
37
// certain hosts based on an error threshold.
38
//
39
// It uses an LRU cache internally for skipping unresponsive hosts
40
// that remain so for a duration.
41
type Cache struct {
42
MaxHostError int
43
verbose bool
44
failedTargets gcache.Cache[string, *cacheItem]
45
TrackError []string
46
}
47
48
type cacheItem struct {
49
sync.Once
50
errors atomic.Int32
51
isPermanentErr bool
52
cause error // optional cause
53
mu sync.Mutex
54
}
55
56
const DefaultMaxHostsCount = 10000
57
58
// New returns a new host max errors cache
59
func New(maxHostError, maxHostsCount int, trackError []string) *Cache {
60
gc := gcache.New[string, *cacheItem](maxHostsCount).ARC().Build()
61
62
return &Cache{
63
failedTargets: gc,
64
MaxHostError: maxHostError,
65
TrackError: trackError,
66
}
67
}
68
69
// SetVerbose sets the cache to log at verbose level
70
func (c *Cache) SetVerbose(verbose bool) {
71
c.verbose = verbose
72
}
73
74
// Close closes the host errors cache
75
func (c *Cache) Close() {
76
if config.DefaultConfig.IsDebugArgEnabled(config.DebugArgHostErrorStats) {
77
items := c.failedTargets.GetALL(false)
78
for k, val := range items {
79
gologger.Info().Label("MaxHostErrorStats").Msgf("Host: %s, Errors: %d", k, val.errors.Load())
80
}
81
}
82
c.failedTargets.Purge()
83
}
84
85
// NormalizeCacheValue processes the input value and returns a normalized cache
86
// value.
87
func (c *Cache) NormalizeCacheValue(value string) string {
88
var normalizedValue = value
89
90
u, err := url.ParseRequestURI(value)
91
if err != nil || u.Host == "" {
92
if strings.Contains(value, ":") {
93
return normalizedValue
94
}
95
u, err2 := url.ParseRequestURI("https://" + value)
96
if err2 != nil {
97
return normalizedValue
98
}
99
100
normalizedValue = u.Host
101
} else {
102
port := u.Port()
103
if port == "" {
104
switch u.Scheme {
105
case "https":
106
normalizedValue = net.JoinHostPort(u.Host, "443")
107
case "http":
108
normalizedValue = net.JoinHostPort(u.Host, "80")
109
}
110
} else {
111
normalizedValue = u.Host
112
}
113
}
114
115
return normalizedValue
116
}
117
118
// ErrUnresponsiveHost is returned when a host is unresponsive
119
// var ErrUnresponsiveHost = errors.New("skipping as host is unresponsive")
120
121
// Check returns true if a host should be skipped as it has been
122
// unresponsive for a certain number of times.
123
//
124
// The value can be many formats -
125
// - URL: https?:// type
126
// - Host:port type
127
// - host type
128
func (c *Cache) Check(protoType string, ctx *contextargs.Context) bool {
129
finalValue := c.GetKeyFromContext(ctx, nil)
130
131
cache, err := c.failedTargets.GetIFPresent(finalValue)
132
if err != nil {
133
return false
134
}
135
136
cache.mu.Lock()
137
defer cache.mu.Unlock()
138
139
if cache.isPermanentErr {
140
// skipping permanent errors is expected so verbose instead of info
141
gologger.Verbose().Msgf("Skipped %s from target list as found unresponsive permanently: %s", finalValue, cache.cause)
142
return true
143
}
144
145
if cache.errors.Load() >= int32(c.MaxHostError) {
146
cache.Do(func() {
147
gologger.Info().Msgf("Skipped %s from target list as found unresponsive %d times", finalValue, cache.errors.Load())
148
})
149
return true
150
}
151
152
return false
153
}
154
155
// Remove removes a host from the cache
156
func (c *Cache) Remove(ctx *contextargs.Context) {
157
key := c.GetKeyFromContext(ctx, nil)
158
_ = c.failedTargets.Remove(key) // remove even the cache is not present
159
}
160
161
// MarkFailed marks a host as failed previously
162
//
163
// Deprecated: Use MarkFailedOrRemove instead.
164
func (c *Cache) MarkFailed(protoType string, ctx *contextargs.Context, err error) {
165
if err == nil {
166
return
167
}
168
169
c.MarkFailedOrRemove(protoType, ctx, err)
170
}
171
172
// MarkFailedOrRemove marks a host as failed previously or removes it
173
func (c *Cache) MarkFailedOrRemove(protoType string, ctx *contextargs.Context, err error) {
174
if err != nil && !c.checkError(protoType, err) {
175
return
176
}
177
178
if err == nil {
179
// Remove the host from cache
180
//
181
// NOTE(dwisiswant0): The decision was made to completely remove the
182
// cached entry for the host instead of simply decrementing the error
183
// count (using `(atomic.Int32).Swap` to update the value to `N-1`).
184
// This approach was chosen because the error handling logic operates
185
// concurrently, and decrementing the count could lead to UB (unexpected
186
// behavior) even when the error is `nil`.
187
//
188
// To clarify, consider the following scenario where the error
189
// encountered does NOT belong to the permanent network error category
190
// (`errkit.ErrKindNetworkPermanent`):
191
//
192
// 1. Iteration 1: A timeout error occurs, and the error count for the
193
// host is incremented.
194
// 2. Iteration 2: Another timeout error is encountered, leading to
195
// another increment in the host's error count.
196
// 3. Iteration 3: A third timeout error happens, which increments the
197
// error count further. At this point, the host is flagged as
198
// unresponsive.
199
// 4. Iteration 4: The host becomes reachable (no error or a transient
200
// issue resolved). Instead of performing a no-op and leaving the
201
// host in the cache, the host entry is removed entirely to reset its
202
// state.
203
// 5. Iteration 5: A subsequent timeout error occurs after the host was
204
// removed and re-added to the cache. The error count is reset and
205
// starts from 1 again.
206
//
207
// This removal strategy ensures the cache is updated dynamically to
208
// reflect the current state of the host without persisting stale or
209
// irrelevant error counts that could interfere with future error
210
// handling and tracking logic.
211
c.Remove(ctx)
212
213
return
214
}
215
216
cacheKey := c.GetKeyFromContext(ctx, err)
217
cache, cacheErr := c.failedTargets.GetIFPresent(cacheKey)
218
if errors.Is(cacheErr, gcache.KeyNotFoundError) {
219
cache = &cacheItem{errors: atomic.Int32{}}
220
}
221
222
cache.mu.Lock()
223
defer cache.mu.Unlock()
224
225
if errkit.IsKind(err, errkit.ErrKindNetworkPermanent) {
226
cache.isPermanentErr = true
227
}
228
229
cache.cause = err
230
cache.errors.Add(1)
231
232
_ = c.failedTargets.Set(cacheKey, cache)
233
}
234
235
// GetKeyFromContext returns the key for the cache from the context
236
func (c *Cache) GetKeyFromContext(ctx *contextargs.Context, err error) string {
237
// Note:
238
// ideally any changes made to remote addr in template like {{Hostname}}:81 etc
239
// should be reflected in contextargs but it is not yet reflected in some cases
240
// and needs refactor of ScanContext + ContextArgs to achieve that
241
// i.e why we use real address from error if present
242
var address string
243
244
// 1. the address carried inside the error (if the transport sets it)
245
if err != nil {
246
if v := errkit.GetAttrValue(err, "address"); v.Any() != nil {
247
address = v.String()
248
}
249
}
250
251
if address == "" {
252
address = ctx.MetaInput.Address()
253
}
254
255
finalValue := c.NormalizeCacheValue(address)
256
return finalValue
257
}
258
259
var reCheckError = regexp.MustCompile(`(no address found for host|could not resolve host|connection refused|connection reset by peer|could not connect to any address found for host|timeout awaiting response headers)`)
260
261
// checkError checks if an error represents a type that should be
262
// added to the host skipping table.
263
// it first parses error and extracts the cause and checks for blacklisted
264
// or common errors that should be skipped
265
func (c *Cache) checkError(protoType string, err error) bool {
266
if err == nil {
267
return false
268
}
269
if protoType != "http" {
270
return false
271
}
272
kind := errkit.GetErrorKind(err, nucleierr.ErrTemplateLogic)
273
switch kind {
274
case nucleierr.ErrTemplateLogic:
275
// these are errors that are not related to the target
276
// and are due to template logic
277
return false
278
case errkit.ErrKindNetworkTemporary:
279
// these should not be counted as host errors
280
return false
281
case errkit.ErrKindNetworkPermanent:
282
// these should be counted as host errors
283
return true
284
case errkit.ErrKindDeadline:
285
// these should not be counted as host errors
286
return false
287
default:
288
// parse error for further processing
289
errX := errkit.FromError(err)
290
tmp := errX.Cause()
291
cause := tmp.Error()
292
if stringsutil.ContainsAll(cause, "ReadStatusLine:", "read: connection reset by peer") {
293
// this is a FP and should not be counted as a host error
294
// because server closes connection when it reads corrupted bytes which we send via rawhttp
295
return false
296
}
297
if strings.HasPrefix(cause, "ReadStatusLine:") {
298
// error is present in last part when using rawhttp
299
// this will be fixed once errkit is used everywhere
300
lastIndex := strings.LastIndex(cause, ":")
301
if lastIndex == -1 {
302
lastIndex = 0
303
}
304
if lastIndex >= len(cause)-1 {
305
lastIndex = 0
306
}
307
cause = cause[lastIndex+1:]
308
}
309
for _, msg := range c.TrackError {
310
if strings.Contains(cause, msg) {
311
return true
312
}
313
}
314
return reCheckError.MatchString(cause)
315
}
316
}
317
318