Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
microsoft
GitHub Repository: microsoft/vscode
Path: blob/main/src/vs/base/test/common/naturalLanguage/korean.test.ts
3296 views
1
/*---------------------------------------------------------------------------------------------
2
* Copyright (c) Microsoft Corporation. All rights reserved.
3
* Licensed under the MIT License. See License.txt in the project root for license information.
4
*--------------------------------------------------------------------------------------------*/
5
6
// allow-any-unicode-file
7
8
import { strictEqual } from 'assert';
9
import { getKoreanAltChars } from '../../../common/naturalLanguage/korean.js';
10
import { ensureNoDisposablesAreLeakedInTestSuite } from '../utils.js';
11
12
function getKoreanAltCharsForString(text: string): string {
13
let result = '';
14
for (let i = 0; i < text.length; i++) {
15
const chars = getKoreanAltChars(text.charCodeAt(i));
16
if (chars) {
17
result += String.fromCharCode(...Array.from(chars));
18
} else {
19
result += text.charAt(i);
20
}
21
}
22
return result;
23
}
24
25
suite('Korean', () => {
26
ensureNoDisposablesAreLeakedInTestSuite();
27
28
suite('getKoreanAltChars', () => {
29
test('Modern initial consonants', () => {
30
const cases = new Map([
31
['ᄀ', 'r'],
32
['ᄁ', 'R'],
33
['ᄂ', 's'],
34
['ᄃ', 'e'],
35
['ᄄ', 'E'],
36
['ᄅ', 'f'],
37
['ᄆ', 'a'],
38
['ᄇ', 'q'],
39
['ᄈ', 'Q'],
40
['ᄉ', 't'],
41
['ᄊ', 'T'],
42
['ᄋ', 'd'],
43
['ᄌ', 'w'],
44
['ᄍ', 'W'],
45
['ᄎ', 'c'],
46
['ᄏ', 'z'],
47
['ᄐ', 'x'],
48
['ᄑ', 'v'],
49
['ᄒ', 'g'],
50
]);
51
for (const [hangul, alt] of cases.entries()) {
52
strictEqual(getKoreanAltCharsForString(hangul), alt, `"${hangul}" should result in "${alt}"`);
53
}
54
});
55
56
test('Modern latter consonants', () => {
57
const cases = new Map([
58
['ᆨ', 'r'],
59
['ᆩ', 'R'],
60
['ᆪ', 'rt'],
61
['ᆫ', 's'],
62
['ᆬ', 'sw'],
63
['ᆭ', 'sg'],
64
['ᆮ', 'e'],
65
['ᆯ', 'f'],
66
['ᆰ', 'fr'],
67
['ᆱ', 'fa'],
68
['ᆲ', 'fq'],
69
['ᆳ', 'ft'],
70
['ᆴ', 'fx'],
71
['ᆵ', 'fv'],
72
['ᆶ', 'fg'],
73
['ᆷ', 'a'],
74
['ᆸ', 'q'],
75
['ᆹ', 'qt'],
76
['ᆺ', 't'],
77
['ᆻ', 'T'],
78
['ᆼ', 'd'],
79
['ᆽ', 'w'],
80
['ᆾ', 'c'],
81
['ᆿ', 'z'],
82
['ᇀ', 'x'],
83
['ᇁ', 'v'],
84
['ᇂ', 'g'],
85
]);
86
for (const [hangul, alt] of cases.entries()) {
87
strictEqual(getKoreanAltCharsForString(hangul), alt, `"${hangul}" (0x${hangul.charCodeAt(0).toString(16)}) should result in "${alt}"`);
88
}
89
});
90
91
test('Modern vowels', () => {
92
const cases = new Map([
93
['ᅡ', 'k'],
94
['ᅢ', 'o'],
95
['ᅣ', 'i'],
96
['ᅤ', 'O'],
97
['ᅥ', 'j'],
98
['ᅦ', 'p'],
99
['ᅧ', 'u'],
100
['ᅨ', 'P'],
101
['ᅩ', 'h'],
102
['ᅪ', 'hk'],
103
['ᅫ', 'ho'],
104
['ᅬ', 'hl'],
105
['ᅭ', 'y'],
106
['ᅮ', 'n'],
107
['ᅯ', 'nj'],
108
['ᅰ', 'np'],
109
['ᅱ', 'nl'],
110
['ᅲ', 'b'],
111
['ᅳ', 'm'],
112
['ᅴ', 'ml'],
113
['ᅵ', 'l'],
114
]);
115
for (const [hangul, alt] of cases.entries()) {
116
strictEqual(getKoreanAltCharsForString(hangul), alt, `"${hangul}" (0x${hangul.charCodeAt(0).toString(16)}) should result in "${alt}"`);
117
}
118
});
119
120
test('Compatibility Jamo', () => {
121
const cases = new Map([
122
['ㄱ', 'r'],
123
['ㄲ', 'R'],
124
['ㄳ', 'rt'],
125
['ㄴ', 's'],
126
['ㄵ', 'sw'],
127
['ㄶ', 'sg'],
128
['ㄷ', 'e'],
129
['ㄸ', 'E'],
130
['ㄹ', 'f'],
131
['ㄺ', 'fr'],
132
['ㄻ', 'fa'],
133
['ㄼ', 'fq'],
134
['ㄽ', 'ft'],
135
['ㄾ', 'fx'],
136
['ㄿ', 'fv'],
137
['ㅀ', 'fg'],
138
['ㅁ', 'a'],
139
['ㅂ', 'q'],
140
['ㅃ', 'Q'],
141
['ㅄ', 'qt'],
142
['ㅅ', 't'],
143
['ㅆ', 'T'],
144
['ㅇ', 'd'],
145
['ㅈ', 'w'],
146
['ㅉ', 'W'],
147
['ㅊ', 'c'],
148
['ㅋ', 'z'],
149
['ㅌ', 'x'],
150
['ㅍ', 'v'],
151
['ㅎ', 'g'],
152
['ㅏ', 'k'],
153
['ㅐ', 'o'],
154
['ㅑ', 'i'],
155
['ㅒ', 'O'],
156
['ㅓ', 'j'],
157
['ㅔ', 'p'],
158
['ㅕ', 'u'],
159
['ㅖ', 'P'],
160
['ㅗ', 'h'],
161
['ㅘ', 'hk'],
162
['ㅙ', 'ho'],
163
['ㅚ', 'hl'],
164
['ㅛ', 'y'],
165
['ㅜ', 'n'],
166
['ㅝ', 'nj'],
167
['ㅞ', 'np'],
168
['ㅟ', 'nl'],
169
['ㅠ', 'b'],
170
['ㅡ', 'm'],
171
['ㅢ', 'ml'],
172
['ㅣ', 'l'],
173
// HF: Hangul Filler (everything after this is archaic)
174
]);
175
for (const [hangul, alt] of cases.entries()) {
176
strictEqual(getKoreanAltCharsForString(hangul), alt, `"${hangul}" (0x${hangul.charCodeAt(0).toString(16)}) should result in "${alt}"`);
177
}
178
});
179
180
// There are too many characters to test exhaustively, so select some
181
// real world use cases from this code base (workbench contrib names)
182
test('Composed samples', () => {
183
const cases = new Map([
184
['ㅁㅊㅊㄷㄴ냐ㅠㅑㅣㅑ쇼', 'accessibility'],
185
['ㅁㅊ채ㅕㅜㅅ뚜샤시드둣ㄴ', 'accountEntitlements'],
186
['며야ㅐ쳗ㄴ', 'audioCues'],
187
['ㅠㄱㅁ찯셰먁채ㅣㅐ걐ㄷㄱ2ㅆ디듣ㅅ교', 'bracketPairColorizer2Telemetry'],
188
['ㅠㅕㅣㅏㄸ얏', 'bulkEdit'],
189
['ㅊ미ㅣㅗㅑㄷㄱㅁㄱ초ㅛ', 'callHierarchy'],
190
['촘ㅅ', 'chat'],
191
['챙ㄷㅁㅊ샤ㅐㅜㄴ', 'codeActions'],
192
['챙ㄷㄸ야색', 'codeEditor'],
193
['채ㅡㅡ뭉ㄴ', 'commands'],
194
['채ㅡㅡ둣ㄴ', 'comments'],
195
['채ㅜ럏ㄸ테ㅐㄳㄷㄱ', 'configExporter'],
196
['채ㅜㅅㄷㅌ스두ㅕ', 'contextmenu'],
197
['쳔새ㅡㄸ야색', 'customEditor'],
198
['ㅇ듀ㅕㅎ', 'debug'],
199
['ㅇ덱ㄷㅊㅁㅅㄷㅇㄸㅌㅅ두냐ㅐㅜㅡㅑㅎㄱㅁ색', 'deprecatedExtensionMigrator'],
200
['ㄷ얏ㄴㄷㄴ냐ㅐㅜㄴ', 'editSessions'],
201
['드ㅡㄷㅅ', 'emmet'],
202
['ㄷㅌㅅ두냐ㅐㅜㄴ', 'extensions'],
203
['ㄷㅌㅅㄷ구밌ㄷ그ㅑㅜ미', 'externalTerminal'],
204
['ㄷㅌㅅㄷ구미ㅕ갸ㅒㅔ둗ㄱ', 'externalUriOpener'],
205
['랴ㅣㄷㄴ', 'files'],
206
['래ㅣ야ㅜㅎ', 'folding'],
207
['래금ㅅ', 'format'],
208
['ㅑㅟ묘ㅗㅑㅜㅅㄴ', 'inlayHints'],
209
['ㅑㅟㅑㅜㄷ촘ㅅ', 'inlineChat'],
210
['ㅑㅜㅅㄷㄱㅁㅊ샾ㄷ', 'interactive'],
211
['ㅑㄴ녇', 'issue'],
212
['ㅏ됴ㅠㅑㅜ야ㅜㅎㄴ', 'keybindings'],
213
['ㅣ무혐ㅎㄷㅇㄷㅅㄷㅊ샤ㅐㅜ', 'languageDetection'],
214
['ㅣ무혐ㅎㄷㄴㅅㅁ션', 'languageStatus'],
215
['ㅣㅑㅡㅑ샤ㅜ얓ㅁ색', 'limitIndicator'],
216
['ㅣㅑㄴㅅ', 'list'],
217
['ㅣㅐㅊ미ㅗㅑㄴ새교', 'localHistory'],
218
['ㅣㅐㅊ미ㅑㅋㅁ샤ㅐㅜ', 'localization'],
219
['ㅣㅐㅎㄴ', 'logs'],
220
['ㅡ메ㅔㄷㅇㄸ얏ㄴ', 'mappedEdits'],
221
['ㅡㅁ가애주', 'markdown'],
222
['ㅡㅁ갇ㄱㄴ', 'markers'],
223
['ㅡㄷㄱㅎㄷㄸ야색', 'mergeEditor'],
224
['ㅡㅕㅣ샤얄ㄹㄸ야색', 'multiDiffEditor'],
225
['ㅜㅐㅅ듀ㅐㅐㅏ', 'notebook'],
226
['ㅐㅕ시ㅑㅜㄷ', 'outline'],
227
['ㅐㅕ세ㅕㅅ', 'output'],
228
['ㅔㄷㄱ래그뭋ㄷ', 'performance'],
229
['ㅔㄱㄷㄹㄷㄱ둧ㄷㄴ', 'preferences'],
230
['벼ㅑ참ㅊㅊㄷㄴㄴ', 'quickaccess'],
231
['ㄱ디며ㅜ촏ㄱ', 'relauncher'],
232
['ㄱ드ㅐㅅㄷ', 'remote'],
233
['ㄱ드ㅐㅅㄷ쎠ㅜㅜ디', 'remoteTunnel'],
234
['ㄴㅁ노', 'sash'],
235
['ㄴ츠', 'scm'],
236
['ㄴㄷㅁㄱ초', 'search'],
237
['ㄴㄷㅁㄱ초ㄸ야색', 'searchEditor'],
238
['놈ㄱㄷ', 'share'],
239
['누ㅑㅔㅔㄷㅅㄴ', 'snippets'],
240
['넫ㄷ초', 'speech'],
241
['네ㅣㅁ노', 'splash'],
242
['녁ㅍ됸', 'surveys'],
243
['ㅅㅁㅎㄴ', 'tags'],
244
['ㅅㅁ난', 'tasks'],
245
['ㅅ디듣ㅅ교', 'telemetry'],
246
['ㅅㄷ그ㅑㅜ미', 'terminal'],
247
['ㅅㄷ그ㅑㅜ미채ㅜㅅ갸ㅠ', 'terminalContrib'],
248
['ㅅㄷㄴ샤ㅜㅎ', 'testing'],
249
['소듣ㄴ', 'themes'],
250
['샤ㅡ디ㅑㅜㄷ', 'timeline'],
251
['쇼ㅔ도ㅑㄷㄱㅁㄱ초ㅛ', 'typeHierarchy'],
252
['ㅕㅔㅇㅁㅅㄷ', 'update'],
253
['ㅕ기', 'url'],
254
['ㅕㄴㄷㄱㅇㅁㅅ몌개랴ㅣㄷ', 'userDataProfile'],
255
['ㅕㄴㄷㄱㅇㅁㅅㅁ뇨ㅜㅊ', 'userDataSync'],
256
['ㅈ듀퍋ㅈ', 'webview'],
257
['ㅈ듀퍋졔무디', 'webviewPanel'],
258
['ㅈ듀퍋ㅈ퍋ㅈ', 'webviewView'],
259
['ㅈ디채ㅡ듀무ㅜㄷㄱ', 'welcomeBanner'],
260
['ㅈ디채ㅡㄷ야미ㅐㅎ', 'welcomeDialog'],
261
['ㅈ디채ㅡㄷㅎㄷㅅ샤ㅜㅎㄴㅅㅁㄳㄷㅇ', 'welcomeGettingStarted'],
262
['ㅈ디채ㅡㄷ퍋ㅈㄴ', 'welcomeViews'],
263
['ㅈ디채ㅡㄷㅉ미ㅏ소개ㅕ호', 'welcomeWalkthrough'],
264
['재가넴ㅊㄷ', 'workspace'],
265
['재가넴ㅊㄷㄴ', 'workspaces'],
266
]);
267
for (const [hangul, alt] of cases.entries()) {
268
// Compare with lower case as some cases do not have
269
// corresponding hangul inputs
270
strictEqual(
271
getKoreanAltCharsForString(hangul).toLowerCase(),
272
alt.toLowerCase(),
273
`"${hangul}" (0x${hangul.charCodeAt(0).toString(16)}) should result in "${alt}"`
274
);
275
}
276
});
277
});
278
});
279
280