Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
projectdiscovery
GitHub Repository: projectdiscovery/nuclei
Path: blob/dev/pkg/protocols/common/hosterrorscache/hosterrorscache.go
2843 views
1
package hosterrorscache
2
3
import (
4
"errors"
5
"net"
6
"net/url"
7
"regexp"
8
"strings"
9
"sync"
10
"sync/atomic"
11
12
"github.com/projectdiscovery/gcache"
13
"github.com/projectdiscovery/gologger"
14
"github.com/projectdiscovery/nuclei/v3/pkg/catalog/config"
15
"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/contextargs"
16
"github.com/projectdiscovery/nuclei/v3/pkg/types/nucleierr"
17
"github.com/projectdiscovery/utils/errkit"
18
stringsutil "github.com/projectdiscovery/utils/strings"
19
)
20
21
// CacheInterface defines the signature of the hosterrorscache so that
22
// users of Nuclei as embedded lib may implement their own cache
23
type CacheInterface interface {
24
SetVerbose(verbose bool) // log verbosely
25
Close() // close the cache
26
Check(protoType string, ctx *contextargs.Context) bool // return true if the host should be skipped
27
Remove(ctx *contextargs.Context) // remove a host from the cache
28
MarkFailed(protoType string, ctx *contextargs.Context, err error) // record a failure (and cause) for the host
29
MarkFailedOrRemove(protoType string, ctx *contextargs.Context, err error) // record a failure (and cause) for the host or remove it
30
IsPermanentErr(ctx *contextargs.Context, err error) bool // return true if the error is permanent for the host
31
}
32
33
var (
34
_ CacheInterface = (*Cache)(nil)
35
)
36
37
// Cache is a cache for host based errors. It allows skipping
38
// certain hosts based on an error threshold.
39
//
40
// It uses an LRU cache internally for skipping unresponsive hosts
41
// that remain so for a duration.
42
type Cache struct {
43
MaxHostError int
44
verbose bool
45
failedTargets gcache.Cache[string, *cacheItem]
46
TrackError []string
47
}
48
49
type cacheItem struct {
50
sync.Once
51
errors atomic.Int32
52
isPermanentErr bool
53
cause error // optional cause
54
mu sync.Mutex
55
}
56
57
const DefaultMaxHostsCount = 10000
58
59
// New returns a new host max errors cache
60
func New(maxHostError, maxHostsCount int, trackError []string) *Cache {
61
gc := gcache.New[string, *cacheItem](maxHostsCount).ARC().Build()
62
63
return &Cache{
64
failedTargets: gc,
65
MaxHostError: maxHostError,
66
TrackError: trackError,
67
}
68
}
69
70
// SetVerbose sets the cache to log at verbose level
71
func (c *Cache) SetVerbose(verbose bool) {
72
c.verbose = verbose
73
}
74
75
// Close closes the host errors cache
76
func (c *Cache) Close() {
77
if config.DefaultConfig.IsDebugArgEnabled(config.DebugArgHostErrorStats) {
78
items := c.failedTargets.GetALL(false)
79
for k, val := range items {
80
gologger.Info().Label("MaxHostErrorStats").Msgf("Host: %s, Errors: %d", k, val.errors.Load())
81
}
82
}
83
c.failedTargets.Purge()
84
}
85
86
// NormalizeCacheValue processes the input value and returns a normalized cache
87
// value.
88
func (c *Cache) NormalizeCacheValue(value string) string {
89
var normalizedValue = value
90
91
u, err := url.ParseRequestURI(value)
92
if err != nil || u.Host == "" {
93
if strings.Contains(value, ":") {
94
return normalizedValue
95
}
96
u, err2 := url.ParseRequestURI("https://" + value)
97
if err2 != nil {
98
return normalizedValue
99
}
100
101
normalizedValue = u.Host
102
} else {
103
port := u.Port()
104
if port == "" {
105
switch u.Scheme {
106
case "https":
107
normalizedValue = net.JoinHostPort(u.Host, "443")
108
case "http":
109
normalizedValue = net.JoinHostPort(u.Host, "80")
110
}
111
} else {
112
normalizedValue = u.Host
113
}
114
}
115
116
return normalizedValue
117
}
118
119
// ErrUnresponsiveHost is returned when a host is unresponsive
120
// var ErrUnresponsiveHost = errors.New("skipping as host is unresponsive")
121
122
// Check returns true if a host should be skipped as it has been
123
// unresponsive for a certain number of times.
124
//
125
// The value can be many formats -
126
// - URL: https?:// type
127
// - Host:port type
128
// - host type
129
func (c *Cache) Check(protoType string, ctx *contextargs.Context) bool {
130
finalValue := c.GetKeyFromContext(ctx, nil)
131
132
cache, err := c.failedTargets.GetIFPresent(finalValue)
133
if err != nil {
134
return false
135
}
136
137
cache.mu.Lock()
138
defer cache.mu.Unlock()
139
140
if cache.isPermanentErr {
141
cache.Do(func() {
142
gologger.Info().Msgf("Skipped %s from target list as found unresponsive permanently: %s", finalValue, cache.cause)
143
})
144
return true
145
}
146
147
if cache.errors.Load() >= int32(c.MaxHostError) {
148
cache.Do(func() {
149
gologger.Info().Msgf("Skipped %s from target list as found unresponsive %d times", finalValue, cache.errors.Load())
150
})
151
return true
152
}
153
154
return false
155
}
156
157
// Remove removes a host from the cache
158
func (c *Cache) Remove(ctx *contextargs.Context) {
159
key := c.GetKeyFromContext(ctx, nil)
160
_ = c.failedTargets.Remove(key) // remove even the cache is not present
161
}
162
163
// MarkFailed marks a host as failed previously
164
//
165
// Deprecated: Use MarkFailedOrRemove instead.
166
func (c *Cache) MarkFailed(protoType string, ctx *contextargs.Context, err error) {
167
if err == nil {
168
return
169
}
170
171
c.MarkFailedOrRemove(protoType, ctx, err)
172
}
173
174
// MarkFailedOrRemove marks a host as failed previously or removes it
175
func (c *Cache) MarkFailedOrRemove(protoType string, ctx *contextargs.Context, err error) {
176
if err != nil && !c.checkError(protoType, err) {
177
return
178
}
179
180
if err == nil {
181
// Remove the host from cache
182
//
183
// NOTE(dwisiswant0): The decision was made to completely remove the
184
// cached entry for the host instead of simply decrementing the error
185
// count (using `(atomic.Int32).Swap` to update the value to `N-1`).
186
// This approach was chosen because the error handling logic operates
187
// concurrently, and decrementing the count could lead to UB (unexpected
188
// behavior) even when the error is `nil`.
189
//
190
// To clarify, consider the following scenario where the error
191
// encountered does NOT belong to the permanent network error category
192
// (`errkit.ErrKindNetworkPermanent`):
193
//
194
// 1. Iteration 1: A timeout error occurs, and the error count for the
195
// host is incremented.
196
// 2. Iteration 2: Another timeout error is encountered, leading to
197
// another increment in the host's error count.
198
// 3. Iteration 3: A third timeout error happens, which increments the
199
// error count further. At this point, the host is flagged as
200
// unresponsive.
201
// 4. Iteration 4: The host becomes reachable (no error or a transient
202
// issue resolved). Instead of performing a no-op and leaving the
203
// host in the cache, the host entry is removed entirely to reset its
204
// state.
205
// 5. Iteration 5: A subsequent timeout error occurs after the host was
206
// removed and re-added to the cache. The error count is reset and
207
// starts from 1 again.
208
//
209
// This removal strategy ensures the cache is updated dynamically to
210
// reflect the current state of the host without persisting stale or
211
// irrelevant error counts that could interfere with future error
212
// handling and tracking logic.
213
c.Remove(ctx)
214
215
return
216
}
217
218
cacheKey := c.GetKeyFromContext(ctx, err)
219
cache, cacheErr := c.failedTargets.GetIFPresent(cacheKey)
220
if errors.Is(cacheErr, gcache.KeyNotFoundError) {
221
cache = &cacheItem{errors: atomic.Int32{}}
222
}
223
224
cache.mu.Lock()
225
defer cache.mu.Unlock()
226
227
if errkit.IsKind(err, errkit.ErrKindNetworkPermanent) {
228
cache.isPermanentErr = true
229
}
230
231
cache.cause = err
232
cache.errors.Add(1)
233
234
_ = c.failedTargets.Set(cacheKey, cache)
235
}
236
237
// IsPermanentErr returns true if the error is permanent for the host.
238
func (c *Cache) IsPermanentErr(ctx *contextargs.Context, err error) bool {
239
if err == nil {
240
return false
241
}
242
243
if errkit.IsKind(err, errkit.ErrKindNetworkPermanent) {
244
return true
245
}
246
247
cacheKey := c.GetKeyFromContext(ctx, err)
248
cache, cacheErr := c.failedTargets.GetIFPresent(cacheKey)
249
if cacheErr != nil {
250
return false
251
}
252
253
cache.mu.Lock()
254
defer cache.mu.Unlock()
255
256
return cache.isPermanentErr
257
}
258
259
// GetKeyFromContext returns the key for the cache from the context
260
func (c *Cache) GetKeyFromContext(ctx *contextargs.Context, err error) string {
261
// Note:
262
// ideally any changes made to remote addr in template like {{Hostname}}:81 etc
263
// should be reflected in contextargs but it is not yet reflected in some cases
264
// and needs refactor of ScanContext + ContextArgs to achieve that
265
// i.e why we use real address from error if present
266
var address string
267
268
// 1. the address carried inside the error (if the transport sets it)
269
if err != nil {
270
if v := errkit.GetAttrValue(err, "address"); v.Any() != nil {
271
address = v.String()
272
}
273
}
274
275
if address == "" {
276
address = ctx.MetaInput.Address()
277
}
278
279
finalValue := c.NormalizeCacheValue(address)
280
return finalValue
281
}
282
283
var reCheckError = regexp.MustCompile(`(no address found for host|could not resolve host|connection refused|connection reset by peer|could not connect to any address found for host|timeout awaiting response headers)`)
284
285
// checkError checks if an error represents a type that should be
286
// added to the host skipping table.
287
// it first parses error and extracts the cause and checks for blacklisted
288
// or common errors that should be skipped
289
func (c *Cache) checkError(protoType string, err error) bool {
290
if err == nil {
291
return false
292
}
293
if protoType != "http" {
294
return false
295
}
296
kind := errkit.GetErrorKind(err, nucleierr.ErrTemplateLogic)
297
switch kind {
298
case nucleierr.ErrTemplateLogic:
299
// these are errors that are not related to the target
300
// and are due to template logic
301
return false
302
case errkit.ErrKindNetworkTemporary:
303
// these should not be counted as host errors
304
return false
305
case errkit.ErrKindNetworkPermanent:
306
// these should be counted as host errors
307
return true
308
case errkit.ErrKindDeadline:
309
// these should not be counted as host errors
310
return false
311
default:
312
// parse error for further processing
313
errX := errkit.FromError(err)
314
tmp := errX.Cause()
315
cause := tmp.Error()
316
if stringsutil.ContainsAll(cause, "ReadStatusLine:", "read: connection reset by peer") {
317
// this is a FP and should not be counted as a host error
318
// because server closes connection when it reads corrupted bytes which we send via rawhttp
319
return false
320
}
321
if strings.HasPrefix(cause, "ReadStatusLine:") {
322
// error is present in last part when using rawhttp
323
// this will be fixed once errkit is used everywhere
324
lastIndex := strings.LastIndex(cause, ":")
325
if lastIndex == -1 {
326
lastIndex = 0
327
}
328
if lastIndex >= len(cause)-1 {
329
lastIndex = 0
330
}
331
cause = cause[lastIndex+1:]
332
}
333
for _, msg := range c.TrackError {
334
if strings.Contains(cause, msg) {
335
return true
336
}
337
}
338
return reCheckError.MatchString(cause)
339
}
340
}
341
342