Path: blob/main/components/gitpod-protocol/src/util/scrubbing.ts
2500 views
/**1* Copyright (c) 2023 Gitpod GmbH. All rights reserved.2* Licensed under the GNU Affero General Public License (AGPL).3* See License.AGPL.txt in the project root for license information.4*/56import { createHash } from "crypto";7import { hashedFields, hashedValues, redactedFields, redactedValues } from "./scrubbing-config";89/**10* `TrustedValue` is a generic wrapper class used to mark certain values as "trusted".11* This means these values are safe from being scrubbed or redacted by the Scrubber interface.12* A typical use case is to prepare values for logging or similar uses where data sanitization is required.13*14* @see Scrubber to scrub an underlyig value as necessary.15*16* @template T - The type of the value being wrapped.17* @property value - The value being wrapped.18*/19export class TrustedValue<T = any> {20constructor(readonly value: T) {}21}2223/**24* The `Scrubber` interface defines methods for scrubbing or anonymizing data.25* It helps in preparing data by sanitizing it and ensuring that sensitive information is either hashed or redacted.26* The scrubbing operation does not mutate the original data structure but creates a new one with the scrubbed data.27*/28export interface Scrubber {29/**30* Scrub an entire object, potentially recursively if `nested` is true.31*32* @param {any} obj - The object to be scrubbed. This object is not mutated.33* @param {boolean} [nested] - A flag indicating whether nested scrubbing should be performed. Defaults to true.34* @returns {any} - The scrubbed object. This is a new object, not a mutation of the original.35*/36scrub(obj: any, nested?: boolean): any;3738/**39* Takes a key-value pair and returns a scrubbed version of the value40* if the key matches any of the defined sensitive fields.41*42* @param {string} key - The key of the data to be scrubbed.43* @param {string} value - The value of the data to be scrubbed.44* @returns {string} - The scrubbed value. The original value is not mutated.45*/46scrubKeyValue(key: string, value: string): string;4748/**49* Takes a value and scrubs it based on defined sensitive patterns.50*51* @param {string} value - The value to be scrubbed.52* @returns {string} - The scrubbed value. The original value is not mutated.53*/54scrubValue(value: string): string;55}5657interface SanitizeOptions {58key?: string;59}60type Sanitisatiser = (value: string, options?: SanitizeOptions) => string;6162const SanitiseRedact: Sanitisatiser = (_: string, options?: SanitizeOptions) => {63if (options?.key) {64return `[redacted:${options?.key}]`;65}66return "[redacted]";67};68const SanitiseHash: Sanitisatiser = (value: string, options?: SanitizeOptions) => {69const hash = createHash("md5");70hash.update(value);7172let res = `[redacted:md5:${hash.digest("hex")}`;73if (options?.key) {74res += `:${options?.key}`;75}76res += `]`;77return res;78};7980const regexes = new Map<RegExp, Sanitisatiser>([81[new RegExp(redactedFields.join("|"), "i"), SanitiseRedact],82[new RegExp(hashedFields.join("|"), "i"), SanitiseHash],83]);8485export const scrubber: Scrubber = {86scrub: function (obj: any, nested: boolean = true): any {87return doScrub(obj, 0, nested);88},89scrubKeyValue: function (key: string, value: string): string {90for (const [regex, sanitisatiser] of regexes) {91if (regex.test(key)) {92return sanitisatiser(value);93}94}95return value;96},97scrubValue: function (value: string): string {98for (const [key, expr] of hashedValues.entries()) {99value = value.replace(expr, (s) => SanitiseHash(s, { key }));100}101for (const [key, expr] of redactedValues.entries()) {102value = value.replace(expr, (s) => SanitiseRedact(s, { key }));103}104return value;105},106};107108function doScrub(obj: any, depth: number, nested: boolean): any {109if (obj === undefined || obj === null) {110return undefined;111}112if (obj instanceof TrustedValue) {113return obj.value;114}115const objType = typeof obj;116if (objType === "string") {117return scrubber.scrubValue(obj as string);118}119if (objType === "boolean" || objType === "number") {120return obj;121}122if (Array.isArray(obj)) {123if (!nested && depth > 0) {124return "[redacted:nested:array]";125}126return obj.map((v) => doScrub(v, depth + 1, nested));127}128if (!nested && depth > 0) {129return `[redacted:nested:${objType}}]`;130}131if (objType === "object") {132const result: any = {};133for (const [key, value] of Object.entries(obj as object)) {134if (typeof value === "string") {135// First apply field-based scrubbing, then pattern-based scrubbing136let scrubbedValue = scrubber.scrubKeyValue(key, value);137// If no field-based scrubbing was applied, apply pattern-based scrubbing138if (scrubbedValue === value) {139scrubbedValue = scrubber.scrubValue(value);140}141result[key] = scrubbedValue;142} else {143result[key] = doScrub(value, depth + 1, nested);144}145}146return result;147}148return obj;149}150151152