Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
gitpod-io
GitHub Repository: gitpod-io/gitpod
Path: blob/main/components/scrubber/scrubber.go
2492 views
1
// Copyright (c) 2023 Gitpod GmbH. All rights reserved.
2
// Licensed under the GNU Affero General Public License (AGPL).
3
// See License.AGPL.txt in the project root for license information.
4
5
package scrubber
6
7
import (
8
"encoding/json"
9
"fmt"
10
"reflect"
11
"regexp"
12
"strings"
13
"unsafe"
14
15
lru "github.com/hashicorp/golang-lru"
16
"github.com/mitchellh/reflectwalk"
17
)
18
19
/*
20
TrustedValue defines a value that should be treated as trusted and not subjected to scrubbing.
21
22
When a TrustedValue is encountered during the scrubbing process, it is skipped over.
23
This allows specific values to be exempted from the scrubbing process when necessary.
24
25
Example:
26
27
type Example struct {
28
Username string
29
Email string
30
Password string
31
}
32
33
type TrustedExample struct {
34
Example
35
}
36
37
func (TrustedExample) IsTrustedValue() {}
38
39
func scrubExample(e *Example) *TrustedExample {
40
return &TrustedExample{
41
Example: Example{
42
Username: e.Username,
43
Email: "trusted:" + Default.Value(e.Email),
44
Password: "trusted:" + Default.KeyValue("password", e.Password),
45
},
46
}
47
}
48
*/
49
type TrustedValue interface {
50
IsTrustedValue()
51
}
52
53
// Scrubber defines the interface for a scrubber, which can sanitise various types of data.
54
// The scrubbing process involves removing or replacing sensitive data to prevent it from being exposed.
55
//
56
// The scrubbing process respects instances of TrustedValue. When a TrustedValue is encountered,
57
// the scrubber does not attempt to scrub it and instead skips over it. This can be used to mark
58
// specific values that should not be scrubbed.
59
type Scrubber interface {
60
// Value scrubs a single value, by trying to detect the kind of data it may contain.
61
// This is an entirely heuristic effort with the lowest likelihood of success. Prefer
62
// the other methods over this one. No assumptions about the structure of the data are made,
63
// e.g. that the value is a JSON string.
64
Value(value string) string
65
66
// KeyValue scrubs a key-value pair. The key is never changed, assuming that it's a hardcoded,
67
// well choosen identifier. The value however is sanitisied much like Value() would, except with the
68
// additional hint of the key name itself.
69
KeyValue(key, value string) (sanitisedValue string)
70
71
// JSON scrubs a JSON structure using a combination of KeyValue() and Value(). If the msg
72
// is not valid JSON, an error is returned.
73
JSON(msg json.RawMessage) (json.RawMessage, error)
74
75
// Struct scrubes a struct. val must be a pointer, otherwise an error is returned.
76
// It mutates the struct in-place.
77
// By default only string and json.RawMessage fields are scrubbed.
78
// The `scrub` struct tag can be used to influnce the scrubber. The struct tag takes the following values:
79
// - `ignore` which causes the scrubber to ignore the field
80
// - `hash` which makes the scrubber hash the field value
81
// - `redact` which makes the scrubber redact the field value
82
//
83
// Example:
84
// type Example struct {
85
// Username string `scrub:"ignore"`
86
// Password string
87
// Inconspicuous string `scrub:"redact"`
88
// }
89
//
90
Struct(val any) error
91
92
// DeepCopyStruct scrubes a struct with a deep copy.
93
// The difference between `DeepCopyStruct` and `Struct`` is that DeepCopyStruct does not modify the structure directly,
94
// but creates a deep copy instead.
95
// Also, val can be a pointer or a structure.
96
DeepCopyStruct(val any) any
97
}
98
99
type ScrubberImplConfig struct {
100
HashedFieldNames []string
101
HashedURLPathsFieldNames []string
102
RedactedFieldNames []string
103
HashedValues map[string]*regexp.Regexp
104
RedactedValues map[string]*regexp.Regexp
105
}
106
107
// CreateCustomScrubber creates a new scrubber with the given configuration
108
// !!! Only use this if you know what you're doing. For all logging purposes, use the "Default" impl !!!
109
func CreateCustomScrubber(cfg *ScrubberImplConfig) Scrubber {
110
return createScrubberImpl(cfg)
111
}
112
113
// Default is the default scrubber consumers of this package should use
114
var Default Scrubber = newScrubberImpl()
115
116
func newScrubberImpl() *scrubberImpl {
117
defaultCfg := ScrubberImplConfig{
118
HashedFieldNames: HashedFieldNames,
119
HashedURLPathsFieldNames: HashedURLPathsFieldNames,
120
RedactedFieldNames: RedactedFieldNames,
121
HashedValues: HashedValues,
122
RedactedValues: RedactedValues,
123
}
124
return createScrubberImpl(&defaultCfg)
125
}
126
127
func createScrubberImpl(cfg *ScrubberImplConfig) *scrubberImpl {
128
var (
129
lowerSanitiseHash []string
130
lowerSanitiseHashURLPaths []string
131
lowerSanitiseRedact []string
132
)
133
for _, v := range cfg.HashedFieldNames {
134
lowerSanitiseHash = append(lowerSanitiseHash, strings.ToLower(v))
135
}
136
for _, v := range cfg.HashedURLPathsFieldNames {
137
lowerSanitiseHashURLPaths = append(lowerSanitiseHashURLPaths, strings.ToLower(v))
138
}
139
for _, v := range cfg.RedactedFieldNames {
140
lowerSanitiseRedact = append(lowerSanitiseRedact, strings.ToLower(v))
141
}
142
143
cache, err := lru.New(1000)
144
if err != nil {
145
panic(fmt.Errorf("cannot create cache: %w", err))
146
}
147
148
res := &scrubberImpl{
149
LowerSanitiseHash: lowerSanitiseHash,
150
LowerSanitiseHashURLPaths: lowerSanitiseHashURLPaths,
151
LowerSanitiseRedact: lowerSanitiseRedact,
152
HashedValues: cfg.HashedValues,
153
RedactedValues: cfg.RedactedValues,
154
KeySanitiserCache: cache,
155
}
156
res.Walker = &structScrubber{Parent: res}
157
158
return res
159
}
160
161
type scrubberImpl struct {
162
Walker *structScrubber
163
LowerSanitiseHash []string
164
LowerSanitiseHashURLPaths []string
165
LowerSanitiseRedact []string
166
HashedValues map[string]*regexp.Regexp
167
RedactedValues map[string]*regexp.Regexp
168
KeySanitiserCache *lru.Cache
169
}
170
171
// JSON implements Scrubber
172
func (s *scrubberImpl) JSON(msg json.RawMessage) (json.RawMessage, error) {
173
var content any
174
err := json.Unmarshal(msg, &content)
175
if err != nil {
176
return nil, fmt.Errorf("cannot scrub JSON: %w", err)
177
}
178
err = s.scrubJsonValue(&content)
179
if err != nil {
180
return nil, fmt.Errorf("cannot scrub JSON: %w", err)
181
}
182
res, err := json.Marshal(content)
183
if err != nil {
184
return nil, fmt.Errorf("cannot scrub JSON: %w", err)
185
}
186
return res, nil
187
}
188
189
// KeyValue implements Scrubber
190
func (s *scrubberImpl) KeyValue(key string, value string) (sanitisedValue string) {
191
sanitisatiser := s.getSanitisatiser(key)
192
if sanitisatiser == nil {
193
return value
194
}
195
return sanitisatiser(value)
196
}
197
198
type keySanitiser struct {
199
s Sanitisatiser
200
}
201
202
var (
203
sanitiseIgnore keySanitiser = keySanitiser{s: nil}
204
sanitiseHash keySanitiser = keySanitiser{s: SanitiseHash}
205
sanitiseHashURLPathSegments keySanitiser = keySanitiser{s: SanitiseHashURLPathSegments}
206
sanitiseRedact keySanitiser = keySanitiser{s: SanitiseRedact}
207
)
208
209
// getSanitisatiser implements
210
func (s *scrubberImpl) getSanitisatiser(key string) Sanitisatiser {
211
lower := strings.ToLower(key)
212
san, ok := s.KeySanitiserCache.Get(lower)
213
if ok {
214
w := san.(keySanitiser)
215
return w.s
216
}
217
218
for _, f := range s.LowerSanitiseRedact {
219
if strings.Contains(lower, f) {
220
s.KeySanitiserCache.Add(lower, sanitiseRedact)
221
return SanitiseRedact
222
}
223
}
224
// Give sanitiseHashURLPathSegments precedence over sanitiseHash
225
for _, f := range s.LowerSanitiseHashURLPaths {
226
if strings.Contains(lower, f) {
227
s.KeySanitiserCache.Add(lower, sanitiseHashURLPathSegments)
228
return SanitiseHashURLPathSegments
229
}
230
}
231
for _, f := range s.LowerSanitiseHash {
232
if strings.Contains(lower, f) {
233
s.KeySanitiserCache.Add(lower, sanitiseHash)
234
return SanitiseHash
235
}
236
}
237
238
s.KeySanitiserCache.Add(lower, sanitiseIgnore)
239
return nil
240
}
241
242
func (s *scrubberImpl) scrubJsonValue(val *any) error {
243
if val == nil {
244
return nil
245
}
246
if v, ok := (*val).(string); ok {
247
*val = s.Value(v)
248
return nil
249
}
250
return s.Struct(*val)
251
}
252
253
// Struct implements Scrubber
254
func (s *scrubberImpl) Struct(val any) error {
255
if val == nil {
256
return nil
257
}
258
switch v := val.(type) {
259
case map[string]interface{}:
260
err := s.scrubJsonObject(v)
261
if err != nil {
262
return err
263
}
264
case []interface{}:
265
err := s.scrubJsonSlice(v)
266
if err != nil {
267
return err
268
}
269
default:
270
return reflectwalk.Walk(val, s.Walker)
271
}
272
return nil
273
}
274
275
func (s *scrubberImpl) deepCopyStruct(fieldName string, src reflect.Value, scrubTag string, skipScrub bool) reflect.Value {
276
if src.Kind() == reflect.Ptr && src.IsNil() {
277
return reflect.New(src.Type()).Elem()
278
}
279
280
if src.CanInterface() {
281
value := src.Interface()
282
if _, ok := value.(TrustedValue); ok {
283
skipScrub = true
284
}
285
}
286
287
if src.Kind() == reflect.String && !skipScrub {
288
dst := reflect.New(src.Type())
289
var (
290
setExplicitValue bool
291
explicitValue string
292
)
293
switch scrubTag {
294
case "ignore":
295
dst.Elem().SetString(src.String())
296
if !dst.CanInterface() {
297
return dst
298
}
299
return dst.Elem()
300
case "hash":
301
setExplicitValue = true
302
explicitValue = SanitiseHash(src.String())
303
case "redact":
304
setExplicitValue = true
305
explicitValue = SanitiseRedact(src.String())
306
}
307
308
if setExplicitValue {
309
dst.Elem().SetString(explicitValue)
310
} else {
311
sanitisatiser := s.getSanitisatiser(fieldName)
312
if sanitisatiser != nil {
313
dst.Elem().SetString(sanitisatiser(src.String()))
314
} else {
315
dst.Elem().SetString(s.Value(src.String()))
316
}
317
}
318
if !dst.CanInterface() {
319
return dst
320
}
321
return dst.Elem()
322
}
323
324
switch src.Kind() {
325
case reflect.Struct:
326
dst := reflect.New(src.Type())
327
t := src.Type()
328
329
for i := 0; i < t.NumField(); i++ {
330
f := t.Field(i)
331
srcValue := src.Field(i)
332
dstValue := dst.Elem().Field(i)
333
334
if !srcValue.CanInterface() {
335
dstValue = reflect.NewAt(dstValue.Type(), unsafe.Pointer(dstValue.UnsafeAddr())).Elem()
336
337
if !srcValue.CanAddr() {
338
switch {
339
case srcValue.CanInt():
340
dstValue.SetInt(srcValue.Int())
341
case srcValue.CanUint():
342
dstValue.SetUint(srcValue.Uint())
343
case srcValue.CanFloat():
344
dstValue.SetFloat(srcValue.Float())
345
case srcValue.CanComplex():
346
dstValue.SetComplex(srcValue.Complex())
347
case srcValue.Kind() == reflect.Bool:
348
dstValue.SetBool(srcValue.Bool())
349
}
350
351
continue
352
}
353
354
srcValue = reflect.NewAt(srcValue.Type(), unsafe.Pointer(srcValue.UnsafeAddr())).Elem()
355
}
356
357
tagValue := f.Tag.Get("scrub")
358
copied := s.deepCopyStruct(f.Name, srcValue, tagValue, skipScrub)
359
dstValue.Set(copied)
360
}
361
return dst.Elem()
362
363
case reflect.Map:
364
dst := reflect.MakeMap(src.Type())
365
keys := src.MapKeys()
366
for i := 0; i < src.Len(); i++ {
367
mValue := src.MapIndex(keys[i])
368
dst.SetMapIndex(keys[i], s.deepCopyStruct(keys[i].String(), mValue, "", skipScrub))
369
}
370
return dst
371
372
case reflect.Slice:
373
dst := reflect.MakeSlice(src.Type(), src.Len(), src.Cap())
374
for i := 0; i < src.Len(); i++ {
375
dst.Index(i).Set(s.deepCopyStruct(fieldName, src.Index(i), "", skipScrub))
376
}
377
return dst
378
379
case reflect.Array:
380
if src.Len() == 0 {
381
return src
382
}
383
384
dst := reflect.New(src.Type()).Elem()
385
for i := 0; i < src.Len(); i++ {
386
dst.Index(i).Set(s.deepCopyStruct(fieldName, src.Index(i), "", skipScrub))
387
}
388
return dst
389
390
case reflect.Interface:
391
if src.IsNil() {
392
return src
393
}
394
dst := reflect.New(src.Elem().Type())
395
copied := s.deepCopyStruct(fieldName, src.Elem(), scrubTag, skipScrub)
396
dst.Elem().Set(copied)
397
return dst.Elem()
398
399
case reflect.Ptr:
400
dst := reflect.New(src.Elem().Type())
401
copied := s.deepCopyStruct(fieldName, src.Elem(), scrubTag, skipScrub)
402
dst.Elem().Set(copied)
403
return dst
404
405
default:
406
dst := reflect.New(src.Type())
407
dst.Elem().Set(src)
408
return dst.Elem()
409
}
410
}
411
412
// Struct implements Scrubber
413
func (s *scrubberImpl) DeepCopyStruct(val any) any {
414
return s.deepCopyStruct("", reflect.ValueOf(val), "", false).Interface()
415
}
416
417
func (s *scrubberImpl) scrubJsonObject(val map[string]interface{}) error {
418
// fix https://github.com/gitpod-io/security/issues/64
419
name, _ := val["name"].(string)
420
value, _ := val["value"].(string)
421
if name != "" && value != "" {
422
val["value"] = s.KeyValue(name, value)
423
}
424
425
for k, v := range val {
426
if str, ok := v.(string); ok {
427
val[k] = s.KeyValue(k, str)
428
} else {
429
err := s.scrubJsonValue(&v)
430
if err != nil {
431
return err
432
}
433
}
434
}
435
return nil
436
}
437
438
func (s *scrubberImpl) scrubJsonSlice(val []interface{}) error {
439
for i := range val {
440
err := s.scrubJsonValue(&(val[i]))
441
if err != nil {
442
return err
443
}
444
}
445
return nil
446
}
447
448
// Value implements Scrubber
449
func (s *scrubberImpl) Value(value string) string {
450
for key, expr := range s.HashedValues {
451
value = expr.ReplaceAllStringFunc(value, func(s string) string {
452
return SanitiseHash(s, SanitiseWithKeyName(key))
453
})
454
}
455
for key, expr := range s.RedactedValues {
456
value = expr.ReplaceAllStringFunc(value, func(s string) string {
457
return SanitiseRedact(s, SanitiseWithKeyName(key))
458
})
459
}
460
461
return value
462
}
463
464
type structScrubber struct {
465
Parent *scrubberImpl
466
}
467
468
var (
469
_ reflectwalk.MapWalker = &structScrubber{}
470
_ reflectwalk.StructWalker = &structScrubber{}
471
_ reflectwalk.PrimitiveWalker = &structScrubber{}
472
_ reflectwalk.PointerValueWalker = &structScrubber{}
473
)
474
475
// Pointer implements reflectwalk.PointerValueWalker
476
func (s *structScrubber) Pointer(val reflect.Value) error {
477
if !val.CanInterface() {
478
return nil
479
}
480
value := val.Interface()
481
if _, ok := value.(TrustedValue); ok {
482
return reflectwalk.SkipEntry
483
}
484
return nil
485
}
486
487
// Primitive implements reflectwalk.PrimitiveWalker
488
func (s *structScrubber) Primitive(val reflect.Value) error {
489
if val.Kind() == reflect.String && val.CanSet() {
490
val.SetString(s.Parent.Value(val.String()))
491
}
492
493
return nil
494
}
495
496
// Struct implements reflectwalk.StructWalker
497
func (s *structScrubber) Struct(val reflect.Value) error {
498
return nil
499
}
500
501
// StructField implements reflectwalk.StructWalker
502
func (s *structScrubber) StructField(field reflect.StructField, val reflect.Value) error {
503
if val.Kind() == reflect.String {
504
var (
505
setExplicitValue bool
506
explicitValue string
507
)
508
tag := field.Tag.Get("scrub")
509
switch tag {
510
case "ignore":
511
return reflectwalk.SkipEntry
512
case "hash":
513
setExplicitValue = true
514
explicitValue = SanitiseHash(val.String())
515
case "redact":
516
setExplicitValue = true
517
explicitValue = SanitiseRedact(val.String())
518
}
519
520
if setExplicitValue {
521
if !val.CanSet() {
522
return fmt.Errorf("cannot set %s", field.PkgPath)
523
}
524
val.SetString(explicitValue)
525
} else {
526
sanitisatiser := s.Parent.getSanitisatiser(field.Name)
527
if sanitisatiser != nil {
528
if !val.CanSet() {
529
return fmt.Errorf("cannot set %s", field.PkgPath)
530
}
531
val.SetString(sanitisatiser(val.String()))
532
}
533
}
534
return reflectwalk.SkipEntry
535
}
536
537
return nil
538
}
539
540
// Map implements reflectwalk.MapWalker
541
func (s *structScrubber) Map(m reflect.Value) error {
542
// fix https://github.com/gitpod-io/security/issues/64
543
var (
544
nameV reflect.Value
545
valueK reflect.Value
546
valueV reflect.Value
547
)
548
for _, k := range m.MapKeys() {
549
kv := m.MapIndex(k)
550
if k.String() == "name" {
551
nameV = kv
552
} else if k.String() == "value" {
553
valueK = k
554
valueV = kv
555
}
556
}
557
if nameV.Kind() == reflect.Interface {
558
nameV = nameV.Elem()
559
}
560
if valueV.Kind() == reflect.Interface {
561
valueV = valueV.Elem()
562
}
563
564
if nameV.Kind() == reflect.String && valueV.Kind() == reflect.String {
565
sanitisatiser := s.Parent.getSanitisatiser(nameV.String())
566
if sanitisatiser != nil {
567
value := sanitisatiser(valueV.String())
568
m.SetMapIndex(valueK, reflect.ValueOf(value))
569
}
570
}
571
return nil
572
}
573
574
// MapElem implements reflectwalk.MapWalker
575
func (s *structScrubber) MapElem(m reflect.Value, k reflect.Value, v reflect.Value) error {
576
kind := v.Kind()
577
if kind == reflect.Interface {
578
v = v.Elem()
579
kind = v.Kind()
580
}
581
if k.Kind() == reflect.Interface {
582
k = k.Elem()
583
}
584
if kind == reflect.String {
585
m.SetMapIndex(k, reflect.ValueOf(s.Parent.KeyValue(k.String(), v.String())))
586
}
587
588
return nil
589
}
590
591