Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
gitpod-io
GitHub Repository: gitpod-io/gitpod
Path: blob/main/components/gitpod-protocol/src/util/scrubbing.ts
2500 views
1
/**
2
* Copyright (c) 2023 Gitpod GmbH. All rights reserved.
3
* Licensed under the GNU Affero General Public License (AGPL).
4
* See License.AGPL.txt in the project root for license information.
5
*/
6
7
import { createHash } from "crypto";
8
import { hashedFields, hashedValues, redactedFields, redactedValues } from "./scrubbing-config";
9
10
/**
11
* `TrustedValue` is a generic wrapper class used to mark certain values as "trusted".
12
* This means these values are safe from being scrubbed or redacted by the Scrubber interface.
13
* A typical use case is to prepare values for logging or similar uses where data sanitization is required.
14
*
15
* @see Scrubber to scrub an underlyig value as necessary.
16
*
17
* @template T - The type of the value being wrapped.
18
* @property value - The value being wrapped.
19
*/
20
export class TrustedValue<T = any> {
21
constructor(readonly value: T) {}
22
}
23
24
/**
25
* The `Scrubber` interface defines methods for scrubbing or anonymizing data.
26
* It helps in preparing data by sanitizing it and ensuring that sensitive information is either hashed or redacted.
27
* The scrubbing operation does not mutate the original data structure but creates a new one with the scrubbed data.
28
*/
29
export interface Scrubber {
30
/**
31
* Scrub an entire object, potentially recursively if `nested` is true.
32
*
33
* @param {any} obj - The object to be scrubbed. This object is not mutated.
34
* @param {boolean} [nested] - A flag indicating whether nested scrubbing should be performed. Defaults to true.
35
* @returns {any} - The scrubbed object. This is a new object, not a mutation of the original.
36
*/
37
scrub(obj: any, nested?: boolean): any;
38
39
/**
40
* Takes a key-value pair and returns a scrubbed version of the value
41
* if the key matches any of the defined sensitive fields.
42
*
43
* @param {string} key - The key of the data to be scrubbed.
44
* @param {string} value - The value of the data to be scrubbed.
45
* @returns {string} - The scrubbed value. The original value is not mutated.
46
*/
47
scrubKeyValue(key: string, value: string): string;
48
49
/**
50
* Takes a value and scrubs it based on defined sensitive patterns.
51
*
52
* @param {string} value - The value to be scrubbed.
53
* @returns {string} - The scrubbed value. The original value is not mutated.
54
*/
55
scrubValue(value: string): string;
56
}
57
58
interface SanitizeOptions {
59
key?: string;
60
}
61
type Sanitisatiser = (value: string, options?: SanitizeOptions) => string;
62
63
const SanitiseRedact: Sanitisatiser = (_: string, options?: SanitizeOptions) => {
64
if (options?.key) {
65
return `[redacted:${options?.key}]`;
66
}
67
return "[redacted]";
68
};
69
const SanitiseHash: Sanitisatiser = (value: string, options?: SanitizeOptions) => {
70
const hash = createHash("md5");
71
hash.update(value);
72
73
let res = `[redacted:md5:${hash.digest("hex")}`;
74
if (options?.key) {
75
res += `:${options?.key}`;
76
}
77
res += `]`;
78
return res;
79
};
80
81
const regexes = new Map<RegExp, Sanitisatiser>([
82
[new RegExp(redactedFields.join("|"), "i"), SanitiseRedact],
83
[new RegExp(hashedFields.join("|"), "i"), SanitiseHash],
84
]);
85
86
export const scrubber: Scrubber = {
87
scrub: function (obj: any, nested: boolean = true): any {
88
return doScrub(obj, 0, nested);
89
},
90
scrubKeyValue: function (key: string, value: string): string {
91
for (const [regex, sanitisatiser] of regexes) {
92
if (regex.test(key)) {
93
return sanitisatiser(value);
94
}
95
}
96
return value;
97
},
98
scrubValue: function (value: string): string {
99
for (const [key, expr] of hashedValues.entries()) {
100
value = value.replace(expr, (s) => SanitiseHash(s, { key }));
101
}
102
for (const [key, expr] of redactedValues.entries()) {
103
value = value.replace(expr, (s) => SanitiseRedact(s, { key }));
104
}
105
return value;
106
},
107
};
108
109
function doScrub(obj: any, depth: number, nested: boolean): any {
110
if (obj === undefined || obj === null) {
111
return undefined;
112
}
113
if (obj instanceof TrustedValue) {
114
return obj.value;
115
}
116
const objType = typeof obj;
117
if (objType === "string") {
118
return scrubber.scrubValue(obj as string);
119
}
120
if (objType === "boolean" || objType === "number") {
121
return obj;
122
}
123
if (Array.isArray(obj)) {
124
if (!nested && depth > 0) {
125
return "[redacted:nested:array]";
126
}
127
return obj.map((v) => doScrub(v, depth + 1, nested));
128
}
129
if (!nested && depth > 0) {
130
return `[redacted:nested:${objType}}]`;
131
}
132
if (objType === "object") {
133
const result: any = {};
134
for (const [key, value] of Object.entries(obj as object)) {
135
if (typeof value === "string") {
136
// First apply field-based scrubbing, then pattern-based scrubbing
137
let scrubbedValue = scrubber.scrubKeyValue(key, value);
138
// If no field-based scrubbing was applied, apply pattern-based scrubbing
139
if (scrubbedValue === value) {
140
scrubbedValue = scrubber.scrubValue(value);
141
}
142
result[key] = scrubbedValue;
143
} else {
144
result[key] = doScrub(value, depth + 1, nested);
145
}
146
}
147
return result;
148
}
149
return obj;
150
}
151
152