Path: blob/main/extensions/copilot/src/util/vs/base/common/normalization.ts
13405 views
//!!! DO NOT modify, this file was COPIED from 'microsoft/vscode'12/*---------------------------------------------------------------------------------------------3* Copyright (c) Microsoft Corporation. All rights reserved.4* Licensed under the MIT License. See License.txt in the project root for license information.5*--------------------------------------------------------------------------------------------*/67import { LRUCache } from './map';89const nfcCache = new LRUCache<string, string>(10000); // bounded to 10000 elements10export function normalizeNFC(str: string): string {11return normalize(str, 'NFC', nfcCache);12}1314const nfdCache = new LRUCache<string, string>(10000); // bounded to 10000 elements15export function normalizeNFD(str: string): string {16return normalize(str, 'NFD', nfdCache);17}1819const nonAsciiCharactersPattern = /[^\u0000-\u0080]/;20function normalize(str: string, form: string, normalizedCache: LRUCache<string, string>): string {21if (!str) {22return str;23}2425const cached = normalizedCache.get(str);26if (cached) {27return cached;28}2930let res: string;31if (nonAsciiCharactersPattern.test(str)) {32res = str.normalize(form);33} else {34res = str;35}3637// Use the cache for fast lookup38normalizedCache.set(str, res);3940return res;41}4243/**44* Attempts to normalize the string to Unicode base format (NFD -> remove accents -> lower case).45* When original string contains accent characters directly, only lower casing will be performed.46* This is done so as to keep the string length the same and not affect indices.47*48* @see https://stackoverflow.com/questions/990904/remove-accents-diacritics-in-a-string-in-javascript/37511463#3751146349*/50export const tryNormalizeToBase: (str: string) => string = function () {51const cache = new LRUCache<string, string>(10000); // bounded to 10000 elements52const accentsRegex = /[\u0300-\u036f]/g;53return function (str: string): string {54const cached = cache.get(str);55if (cached) {56return cached;57}5859const noAccents = normalizeNFD(str).replace(accentsRegex, '');60const result = (noAccents.length === str.length ? noAccents : str).toLowerCase();61cache.set(str, result);62return result;63};64}();656667