Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
gitpod-io
GitHub Repository: gitpod-io/gitpod
Path: blob/main/components/scrubber/scrubber_test.go
2492 views
1
// Copyright (c) 2023 Gitpod GmbH. All rights reserved.
2
// Licensed under the GNU Affero General Public License (AGPL).
3
// See License.AGPL.txt in the project root for license information.
4
5
package scrubber
6
7
import (
8
"encoding/json"
9
"math/rand"
10
"testing"
11
12
"github.com/google/go-cmp/cmp"
13
"github.com/google/go-cmp/cmp/cmpopts"
14
)
15
16
func TestValue(t *testing.T) {
17
tests := []struct {
18
Name string
19
Value string
20
Expectation string
21
}{
22
{Name: "empty string"},
23
{Name: "email", Value: "[email protected]", Expectation: "[redacted:email]"},
24
{Name: "email in text", Value: "The email is [email protected] or [email protected]", Expectation: "The email is [redacted:email] or [redacted:email]"},
25
{Name: "GitLab Git URL in text", Value: "Content initialization failed: cannot initialize workspace: git initializer gitClone: git clone --depth=1 --shallow-submodules https://gitlab.com/acme-corp/web/frontend/services/deployment-manager.git --config http.version=HTTP/1.1 . failed (exit status 128)", Expectation: "Content initialization failed: cannot initialize workspace: git initializer gitClone: git clone --depth=1 --shallow-submodules [redacted:md5:aa0dfa0c402612a8314b8e7c4326a395:url] --config http.version=HTTP/1.1 . failed (exit status 128)"},
26
{Name: "Non-git URL not scrubbed", Value: "API call to https://api.example.com/endpoint failed", Expectation: "API call to https://api.example.com/endpoint failed"},
27
{Name: "Mixed URLs", Value: "Clone from https://github.com/user/repo.git then visit https://docs.gitpod.io/configure", Expectation: "Clone from [redacted:md5:3c5467d320a0b72072bc609f12e7d879:url] then visit https://docs.gitpod.io/configure"},
28
{Name: "HTTP Git URL", Value: "git clone http://internal-git.company.com/project.git", Expectation: "git clone [redacted:md5:11774800a9c933d1181c479ea207cdff:url]"},
29
}
30
31
for _, test := range tests {
32
t.Run(test.Name, func(t *testing.T) {
33
act := Default.Value(test.Value)
34
35
if diff := cmp.Diff(test.Expectation, act); diff != "" {
36
t.Errorf("Value() mismatch (-want +got):\n%s", diff)
37
}
38
})
39
}
40
}
41
42
func TestKeyValue(t *testing.T) {
43
const testValue = "testvalue"
44
tests := []struct {
45
Key string
46
Expectation string
47
}{
48
{Key: "email", Expectation: "[redacted]"},
49
{Key: "token", Expectation: "[redacted]"},
50
}
51
52
for _, test := range tests {
53
t.Run(test.Key, func(t *testing.T) {
54
act := Default.KeyValue(test.Key, testValue)
55
if diff := cmp.Diff(test.Expectation, act); diff != "" {
56
t.Errorf("KeyValue() mismatch (-want +got):\n%s", diff)
57
}
58
})
59
}
60
}
61
62
var (
63
_ TrustedValue = &TrustedStructToTest{}
64
)
65
66
type StructToTest struct {
67
Username string
68
Email string
69
Password string
70
}
71
72
type TrustedStructToTest struct {
73
StructToTest
74
}
75
76
type TestWrap struct {
77
Test *StructToTest
78
}
79
80
type UnexportedStructToTest struct {
81
Exported string
82
unexportedPtr *string
83
}
84
85
func (TrustedStructToTest) IsTrustedValue() {}
86
87
func scrubStructToTestAsTrustedValue(v *StructToTest) TrustedValue {
88
return scrubStructToTest(v)
89
}
90
91
func scrubStructToTest(v *StructToTest) *TrustedStructToTest {
92
return &TrustedStructToTest{
93
StructToTest: StructToTest{
94
Username: v.Username,
95
Email: "trusted:" + Default.Value(v.Email),
96
Password: "trusted:" + Default.KeyValue("password", v.Password),
97
},
98
}
99
}
100
101
func TestStruct(t *testing.T) {
102
type Expectation struct {
103
Error string
104
Result any
105
}
106
tests := []struct {
107
Name string
108
Struct any
109
Expectation Expectation
110
CmpOpts []cmp.Option
111
}{
112
{
113
Name: "basic happy path",
114
Struct: &struct {
115
Username string
116
Email string
117
Password string
118
WorkspaceID string
119
ContextURL string
120
LeaveMeAlone string
121
}{Username: "foo", Email: "[email protected]", Password: "foobar", WorkspaceID: "gitpodio-gitpod-uesaddev73c", ContextURL: "https://github.com/gitpod-io/gitpod/pull/19402", LeaveMeAlone: "foo"},
122
Expectation: Expectation{
123
Result: &struct {
124
Username string
125
Email string
126
Password string
127
WorkspaceID string
128
ContextURL string
129
LeaveMeAlone string
130
}{Username: "[redacted:md5:acbd18db4cc2f85cedef654fccc4a4d8]", Email: "[redacted]", Password: "[redacted]", WorkspaceID: "[redacted:md5:a35538939333def8477b5c19ac694b35]", ContextURL: "[redacted:md5:3097fca9b1ec8942c4305e550ef1b50a]/[redacted:md5:308cb0f82b8a4966a32f7c360315c160]/[redacted:md5:5bc8d0354fba47db774b70d2a9161bbb]/pull/19402", LeaveMeAlone: "foo"},
131
},
132
},
133
{
134
Name: "map field",
135
Struct: &struct {
136
WithMap map[string]interface{}
137
}{
138
WithMap: map[string]interface{}{
139
"email": "[email protected]",
140
},
141
},
142
Expectation: Expectation{
143
Result: &struct{ WithMap map[string]any }{WithMap: map[string]any{"email": string("[redacted]")}},
144
},
145
},
146
{
147
Name: "slices",
148
Struct: &struct {
149
Slice []string
150
}{Slice: []string{"foo", "bar", "[email protected]"}},
151
Expectation: Expectation{
152
Result: &struct {
153
Slice []string
154
}{Slice: []string{"foo", "bar", "[redacted:email]"}},
155
},
156
},
157
{
158
Name: "struct tags",
159
Struct: &struct {
160
Hashed string `scrub:"hash"`
161
Redacted string `scrub:"redact"`
162
Email string `scrub:"ignore"`
163
}{
164
Hashed: "foo",
165
Redacted: "foo",
166
Email: "foo",
167
},
168
Expectation: Expectation{
169
Result: &struct {
170
Hashed string `scrub:"hash"`
171
Redacted string `scrub:"redact"`
172
Email string `scrub:"ignore"`
173
}{
174
Hashed: "[redacted:md5:acbd18db4cc2f85cedef654fccc4a4d8]",
175
Redacted: "[redacted]",
176
Email: "foo",
177
},
178
},
179
},
180
{
181
Name: "trusted struct",
182
Struct: scrubStructToTest(&StructToTest{
183
Username: "foo",
184
Email: "[email protected]",
185
Password: "foobar",
186
}),
187
Expectation: Expectation{
188
Result: &TrustedStructToTest{
189
StructToTest: StructToTest{
190
Username: "foo",
191
Email: "trusted:[redacted:email]",
192
Password: "trusted:[redacted]",
193
},
194
},
195
},
196
},
197
{
198
Name: "trusted interface",
199
Struct: scrubStructToTestAsTrustedValue(&StructToTest{
200
Username: "foo",
201
Email: "[email protected]",
202
Password: "foobar",
203
}),
204
Expectation: Expectation{
205
Result: &TrustedStructToTest{
206
StructToTest: StructToTest{
207
Username: "foo",
208
Email: "trusted:[redacted:email]",
209
Password: "trusted:[redacted]",
210
},
211
},
212
},
213
},
214
{
215
Name: "contains unexported pointers",
216
Struct: UnexportedStructToTest{
217
Exported: "foo",
218
unexportedPtr: nil,
219
},
220
Expectation: Expectation{
221
Result: UnexportedStructToTest{
222
Exported: "foo",
223
unexportedPtr: nil,
224
},
225
},
226
CmpOpts: []cmp.Option{cmpopts.IgnoreUnexported(UnexportedStructToTest{})},
227
},
228
}
229
230
for _, test := range tests {
231
t.Run(test.Name, func(t *testing.T) {
232
var act Expectation
233
234
err := Default.Struct(test.Struct)
235
if err != nil {
236
act.Error = err.Error()
237
} else {
238
act.Result = test.Struct
239
}
240
241
if diff := cmp.Diff(test.Expectation, act, test.CmpOpts...); diff != "" {
242
t.Errorf("Struct() mismatch (-want +got):\n%s", diff)
243
}
244
})
245
}
246
}
247
248
func TestJSON(t *testing.T) {
249
type Expectation struct {
250
Error string
251
Result string
252
}
253
tests := []struct {
254
Name string
255
Input string
256
Expectation Expectation
257
}{
258
{
259
Name: "basic happy path",
260
Input: `{"ok": true, "email": "[email protected]", "workspaceID": "gitpodio-gitpod-uesaddev73c"}`,
261
Expectation: Expectation{
262
Result: `{"email":"[redacted]","ok":true,"workspaceID":"[redacted:md5:a35538939333def8477b5c19ac694b35]"}`,
263
},
264
},
265
{
266
Name: "analytics",
267
Input: `{"batch":[{"event":"signup","foo":"bar","type":"track"}],"foo":"bar"}`,
268
Expectation: Expectation{Result: `{"batch":[{"event":"signup","foo":"bar","type":"track"}],"foo":"bar"}`},
269
},
270
{
271
// https://github.com/gitpod-io/security/issues/64
272
Name: "complex",
273
Input: `{"auth":{"owner_token":"abcsecrettokendef","total":{}},"env":[{"name":"SECRET_PASSWORD","value":"i-am-leaked-in-the-logs-yikes"},{"name":"GITHUB_TOKEN","value":"thisismyGitHubTokenDontStealIt"},{"name":"SUPER_SEKRET","value":"you.cant.see.me.or.can.you"},{"name":"GITHUB_SSH_PRIVATE_KEY","value":"super-secret-private-ssh-key-from-github"},{"name":"SHELL","value":"zsh"},{"name":"GITLAB_TOKEN","value":"abcsecrettokendef"}],"source":{"file":{"contextPath":".","dockerfilePath":".gitpod.dockerfile","dockerfileVersion":"82561e7f6455e3c0e6ee98be03c4d9aab4d459f8","source":{"git":{"checkoutLocation":"test.repo","cloneTaget":"good-workspace-image","config":{"authPassword":"super-secret-password","authUser":"oauth2","authentication":"BASIC_AUTH"},"remoteUri":"https://github.com/AlexTugarev/test.repo.git","targetMode":"REMOTE_BRANCH"}}}}}`,
274
Expectation: Expectation{
275
Result: `{"auth":{"owner_token":"[redacted]","total":{}},"env":[{"name":"SECRET_PASSWORD","value":"[redacted]"},{"name":"GITHUB_TOKEN","value":"[redacted]"},{"name":"SUPER_SEKRET","value":"you.cant.see.me.or.can.you"},{"name":"GITHUB_SSH_PRIVATE_KEY","value":"[redacted]"},{"name":"SHELL","value":"zsh"},{"name":"GITLAB_TOKEN","value":"[redacted]"}],"source":{"file":{"contextPath":".","dockerfilePath":".gitpod.dockerfile","dockerfileVersion":"82561e7f6455e3c0e6ee98be03c4d9aab4d459f8","source":{"git":{"checkoutLocation":"test.repo","cloneTaget":"good-workspace-image","config":{"authPassword":"[redacted]","authUser":"oauth2","authentication":"BASIC_AUTH"},"remoteUri":"https://github.com/AlexTugarev/test.repo.git","targetMode":"REMOTE_BRANCH"}}}}}`,
276
},
277
},
278
{
279
Name: "string",
280
Input: `"[email protected]"`,
281
Expectation: Expectation{Result: `"[redacted:email]"`},
282
},
283
{
284
Name: "array",
285
Input: `["[email protected]"]`,
286
Expectation: Expectation{Result: `["[redacted:email]"]`},
287
},
288
}
289
290
for _, test := range tests {
291
t.Run(test.Name, func(t *testing.T) {
292
var act Expectation
293
294
res, err := Default.JSON([]byte(test.Input))
295
if err != nil {
296
act.Error = err.Error()
297
}
298
act.Result = string(res)
299
300
if diff := cmp.Diff(test.Expectation, act); diff != "" {
301
t.Errorf("JSON() mismatch (-want +got):\n%s", diff)
302
}
303
})
304
}
305
}
306
307
func TestDeepCopyStruct(t *testing.T) {
308
type Expectation struct {
309
Error string
310
Result any
311
}
312
tests := []struct {
313
Name string
314
Struct any
315
Expectation Expectation
316
CmpOpts []cmp.Option
317
}{
318
{
319
Name: "basic happy path",
320
Struct: &struct {
321
Username string
322
Email string
323
Password string
324
WorkspaceID string
325
LeaveMeAlone string
326
}{Username: "foo", Email: "[email protected]", Password: "foobar", WorkspaceID: "gitpodio-gitpod-uesaddev73c", LeaveMeAlone: "foo"},
327
Expectation: Expectation{
328
Result: &struct {
329
Username string
330
Email string
331
Password string
332
WorkspaceID string
333
LeaveMeAlone string
334
}{Username: "[redacted:md5:acbd18db4cc2f85cedef654fccc4a4d8]", Email: "[redacted]", Password: "[redacted]", WorkspaceID: "[redacted:md5:a35538939333def8477b5c19ac694b35]", LeaveMeAlone: "foo"},
335
},
336
},
337
{
338
Name: "stuct without pointer",
339
Struct: struct {
340
Username string
341
Email string
342
Password string
343
WorkspaceID string
344
LeaveMeAlone string
345
}{Username: "foo", Email: "[email protected]", Password: "foobar", WorkspaceID: "gitpodio-gitpod-uesaddev73c", LeaveMeAlone: "foo"},
346
Expectation: Expectation{
347
Result: struct {
348
Username string
349
Email string
350
Password string
351
WorkspaceID string
352
LeaveMeAlone string
353
}{Username: "[redacted:md5:acbd18db4cc2f85cedef654fccc4a4d8]", Email: "[redacted]", Password: "[redacted]", WorkspaceID: "[redacted:md5:a35538939333def8477b5c19ac694b35]", LeaveMeAlone: "foo"},
354
},
355
},
356
{
357
Name: "map field",
358
Struct: &struct {
359
WithMap map[string]interface{}
360
}{
361
WithMap: map[string]interface{}{
362
"email": "[email protected]",
363
},
364
},
365
Expectation: Expectation{
366
Result: &struct{ WithMap map[string]any }{WithMap: map[string]any{"email": string("[redacted]")}},
367
},
368
},
369
{
370
Name: "slices",
371
Struct: &struct {
372
Slice []string
373
}{Slice: []string{"foo", "bar", "[email protected]"}},
374
Expectation: Expectation{
375
Result: &struct {
376
Slice []string
377
}{Slice: []string{"foo", "bar", "[redacted:email]"}},
378
},
379
},
380
{
381
Name: "struct tags",
382
Struct: &struct {
383
Hashed string `scrub:"hash"`
384
Redacted string `scrub:"redact"`
385
Email string `scrub:"ignore"`
386
}{
387
Hashed: "foo",
388
Redacted: "foo",
389
Email: "foo",
390
},
391
Expectation: Expectation{
392
Result: &struct {
393
Hashed string `scrub:"hash"`
394
Redacted string `scrub:"redact"`
395
Email string `scrub:"ignore"`
396
}{
397
Hashed: "[redacted:md5:acbd18db4cc2f85cedef654fccc4a4d8]",
398
Redacted: "[redacted]",
399
Email: "foo",
400
},
401
},
402
},
403
{
404
Name: "trusted struct",
405
Struct: scrubStructToTest(&StructToTest{
406
Username: "foo",
407
Email: "[email protected]",
408
Password: "foobar",
409
}),
410
Expectation: Expectation{
411
Result: &TrustedStructToTest{
412
StructToTest: StructToTest{
413
Username: "foo",
414
Email: "trusted:[redacted:email]",
415
Password: "trusted:[redacted]",
416
},
417
},
418
},
419
},
420
{
421
Name: "trusted interface",
422
Struct: scrubStructToTestAsTrustedValue(&StructToTest{
423
Username: "foo",
424
Email: "[email protected]",
425
Password: "foobar",
426
}),
427
Expectation: Expectation{
428
Result: &TrustedStructToTest{
429
StructToTest: StructToTest{
430
Username: "foo",
431
Email: "trusted:[redacted:email]",
432
Password: "trusted:[redacted]",
433
},
434
},
435
},
436
},
437
{
438
Name: "contains unexported pointers",
439
Struct: UnexportedStructToTest{
440
Exported: "foo",
441
unexportedPtr: nil,
442
},
443
Expectation: Expectation{
444
Result: UnexportedStructToTest{
445
Exported: "foo",
446
unexportedPtr: nil,
447
},
448
},
449
CmpOpts: []cmp.Option{cmpopts.IgnoreUnexported(UnexportedStructToTest{})},
450
},
451
{
452
Name: "nil interface",
453
Struct: &struct {
454
Hashed string `scrub:"hash"`
455
NilInterface interface{}
456
}{
457
Hashed: "foo",
458
},
459
Expectation: Expectation{
460
Result: &struct {
461
Hashed string `scrub:"hash"`
462
NilInterface interface{}
463
}{
464
Hashed: "[redacted:md5:acbd18db4cc2f85cedef654fccc4a4d8]",
465
NilInterface: nil,
466
},
467
},
468
},
469
{
470
Name: "nil point interface",
471
Struct: &struct {
472
Hashed string `scrub:"hash"`
473
NilInterface *string
474
}{
475
Hashed: "foo",
476
},
477
Expectation: Expectation{
478
Result: &struct {
479
Hashed string `scrub:"hash"`
480
NilInterface *string
481
}{
482
Hashed: "[redacted:md5:acbd18db4cc2f85cedef654fccc4a4d8]",
483
NilInterface: nil,
484
},
485
},
486
},
487
}
488
489
for _, test := range tests {
490
t.Run(test.Name, func(t *testing.T) {
491
var act Expectation
492
b, _ := json.Marshal(test.Struct)
493
494
act.Result = Default.DeepCopyStruct(test.Struct)
495
b2, _ := json.Marshal(test.Struct)
496
497
if diff := cmp.Diff(b, b2, test.CmpOpts...); diff != "" {
498
t.Errorf("DeepCopyStruct for origin struct modified (-want +got):\n%s", diff)
499
}
500
501
if diff := cmp.Diff(test.Expectation, act, test.CmpOpts...); diff != "" {
502
t.Errorf("DeepCopyStruct() mismatch (-want +got):\n%s", diff)
503
}
504
})
505
}
506
}
507
508
func BenchmarkKeyValue(b *testing.B) {
509
key := HashedFieldNames[rand.Intn(len(HashedFieldNames))]
510
511
for i := 0; i < b.N; i++ {
512
Default.KeyValue(key, "value")
513
}
514
}
515
516
// go test -bench=BenchmarkMetrics -benchmem -benchtime=5s
517
// regex:
518
// BenchmarkMetrics-32 5 1106232258 ns/op 8228 B/op 1 allocs/op
519
// strings.Contains:
520
// BenchmarkMetrics-32 190 30726112 ns/op 3098513 B/op 61369 allocs/op
521
// strings.Contains + lru cache on key:
522
// BenchmarkMetrics-32 303 19896634 ns/op 3098512 B/op 61369 allocs/op
523
//
524
// Commented out to exclude the prometheus dependency.
525
// func BenchmarkMetrics(b *testing.B) {
526
// // 1 MB firehose metrics file.
527
// // file contains newline-separated json objects in the format {"b": "<data>"}
528
// // where <data> is a base64-encoded string
529
// file := "/workspace/gitpod/components/scrubber/metrics-file"
530
// var data []string
531
// osFile, err := os.Open(file)
532
// if err != nil {
533
// b.Fatal(err)
534
// }
535
// defer osFile.Close()
536
// dec := json.NewDecoder(osFile)
537
// for dec.More() {
538
// var obj map[string]string
539
// err := dec.Decode(&obj)
540
// if err != nil {
541
// b.Fatal(err)
542
// }
543
// data = append(data, obj["b"])
544
// }
545
546
// var reqs []*prompb.WriteRequest
547
// for _, d := range data {
548
// reader := base64.NewDecoder(base64.StdEncoding, strings.NewReader(d))
549
// req, err := remote.DecodeWriteRequest(reader)
550
// if err != nil {
551
// b.Fatal(err)
552
// }
553
// reqs = append(reqs, req)
554
// }
555
556
// b.ResetTimer()
557
// for i := 0; i < b.N; i++ {
558
// for _, req := range reqs {
559
// for _, ts := range req.Timeseries {
560
// for _, l := range ts.Labels {
561
// _ = Default.KeyValue(l.Name, l.Value)
562
// }
563
// }
564
// }
565
// }
566
// }
567
568
func BenchmarkValue(b *testing.B) {
569
const input = "This text contains {\"json\":\"data\"}, a workspace ID gitpodio-gitpod-uesaddev73c and an email [email protected]"
570
571
for i := 0; i < b.N; i++ {
572
Default.Value(input)
573
}
574
}
575
576