Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/extensions/copilot/src/util/vs/base/common/normalization.ts
13405 views
1
//!!! DO NOT modify, this file was COPIED from 'microsoft/vscode'
2
3
/*---------------------------------------------------------------------------------------------
4
* Copyright (c) Microsoft Corporation. All rights reserved.
5
* Licensed under the MIT License. See License.txt in the project root for license information.
6
*--------------------------------------------------------------------------------------------*/
7
8
import { LRUCache } from './map';
9
10
const nfcCache = new LRUCache<string, string>(10000); // bounded to 10000 elements
11
export function normalizeNFC(str: string): string {
12
return normalize(str, 'NFC', nfcCache);
13
}
14
15
const nfdCache = new LRUCache<string, string>(10000); // bounded to 10000 elements
16
export function normalizeNFD(str: string): string {
17
return normalize(str, 'NFD', nfdCache);
18
}
19
20
const nonAsciiCharactersPattern = /[^\u0000-\u0080]/;
21
function normalize(str: string, form: string, normalizedCache: LRUCache<string, string>): string {
22
if (!str) {
23
return str;
24
}
25
26
const cached = normalizedCache.get(str);
27
if (cached) {
28
return cached;
29
}
30
31
let res: string;
32
if (nonAsciiCharactersPattern.test(str)) {
33
res = str.normalize(form);
34
} else {
35
res = str;
36
}
37
38
// Use the cache for fast lookup
39
normalizedCache.set(str, res);
40
41
return res;
42
}
43
44
/**
45
* Attempts to normalize the string to Unicode base format (NFD -> remove accents -> lower case).
46
* When original string contains accent characters directly, only lower casing will be performed.
47
* This is done so as to keep the string length the same and not affect indices.
48
*
49
* @see https://stackoverflow.com/questions/990904/remove-accents-diacritics-in-a-string-in-javascript/37511463#37511463
50
*/
51
export const tryNormalizeToBase: (str: string) => string = function () {
52
const cache = new LRUCache<string, string>(10000); // bounded to 10000 elements
53
const accentsRegex = /[\u0300-\u036f]/g;
54
return function (str: string): string {
55
const cached = cache.get(str);
56
if (cached) {
57
return cached;
58
}
59
60
const noAccents = normalizeNFD(str).replace(accentsRegex, '');
61
const result = (noAccents.length === str.length ? noAccents : str).toLowerCase();
62
cache.set(str, result);
63
return result;
64
};
65
}();
66
67