Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/openjdk-multiarch-jdk8u
Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/src/share/classes/java/lang/ConditionalSpecialCasing.java
38829 views
1
/*
2
* Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
3
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
*
5
* This code is free software; you can redistribute it and/or modify it
6
* under the terms of the GNU General Public License version 2 only, as
7
* published by the Free Software Foundation. Oracle designates this
8
* particular file as subject to the "Classpath" exception as provided
9
* by Oracle in the LICENSE file that accompanied this code.
10
*
11
* This code is distributed in the hope that it will be useful, but WITHOUT
12
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14
* version 2 for more details (a copy is included in the LICENSE file that
15
* accompanied this code).
16
*
17
* You should have received a copy of the GNU General Public License version
18
* 2 along with this work; if not, write to the Free Software Foundation,
19
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20
*
21
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22
* or visit www.oracle.com if you need additional information or have any
23
* questions.
24
*/
25
26
package java.lang;
27
28
import java.text.BreakIterator;
29
import java.util.HashSet;
30
import java.util.Hashtable;
31
import java.util.Iterator;
32
import java.util.Locale;
33
import sun.text.Normalizer;
34
35
36
/**
37
* This is a utility class for <code>String.toLowerCase()</code> and
38
* <code>String.toUpperCase()</code>, that handles special casing with
39
* conditions. In other words, it handles the mappings with conditions
40
* that are defined in
41
* <a href="http://www.unicode.org/Public/UNIDATA/SpecialCasing.txt">Special
42
* Casing Properties</a> file.
43
* <p>
44
* Note that the unconditional case mappings (including 1:M mappings)
45
* are handled in <code>Character.toLower/UpperCase()</code>.
46
*/
47
final class ConditionalSpecialCasing {
48
49
// context conditions.
50
final static int FINAL_CASED = 1;
51
final static int AFTER_SOFT_DOTTED = 2;
52
final static int MORE_ABOVE = 3;
53
final static int AFTER_I = 4;
54
final static int NOT_BEFORE_DOT = 5;
55
56
// combining class definitions
57
final static int COMBINING_CLASS_ABOVE = 230;
58
59
// Special case mapping entries
60
static Entry[] entry = {
61
//# ================================================================================
62
//# Conditional mappings
63
//# ================================================================================
64
new Entry(0x03A3, new char[]{0x03C2}, new char[]{0x03A3}, null, FINAL_CASED), // # GREEK CAPITAL LETTER SIGMA
65
new Entry(0x0130, new char[]{0x0069, 0x0307}, new char[]{0x0130}, null, 0), // # LATIN CAPITAL LETTER I WITH DOT ABOVE
66
67
//# ================================================================================
68
//# Locale-sensitive mappings
69
//# ================================================================================
70
//# Lithuanian
71
new Entry(0x0307, new char[]{0x0307}, new char[]{}, "lt", AFTER_SOFT_DOTTED), // # COMBINING DOT ABOVE
72
new Entry(0x0049, new char[]{0x0069, 0x0307}, new char[]{0x0049}, "lt", MORE_ABOVE), // # LATIN CAPITAL LETTER I
73
new Entry(0x004A, new char[]{0x006A, 0x0307}, new char[]{0x004A}, "lt", MORE_ABOVE), // # LATIN CAPITAL LETTER J
74
new Entry(0x012E, new char[]{0x012F, 0x0307}, new char[]{0x012E}, "lt", MORE_ABOVE), // # LATIN CAPITAL LETTER I WITH OGONEK
75
new Entry(0x00CC, new char[]{0x0069, 0x0307, 0x0300}, new char[]{0x00CC}, "lt", 0), // # LATIN CAPITAL LETTER I WITH GRAVE
76
new Entry(0x00CD, new char[]{0x0069, 0x0307, 0x0301}, new char[]{0x00CD}, "lt", 0), // # LATIN CAPITAL LETTER I WITH ACUTE
77
new Entry(0x0128, new char[]{0x0069, 0x0307, 0x0303}, new char[]{0x0128}, "lt", 0), // # LATIN CAPITAL LETTER I WITH TILDE
78
79
//# ================================================================================
80
//# Turkish and Azeri
81
new Entry(0x0130, new char[]{0x0069}, new char[]{0x0130}, "tr", 0), // # LATIN CAPITAL LETTER I WITH DOT ABOVE
82
new Entry(0x0130, new char[]{0x0069}, new char[]{0x0130}, "az", 0), // # LATIN CAPITAL LETTER I WITH DOT ABOVE
83
new Entry(0x0307, new char[]{}, new char[]{0x0307}, "tr", AFTER_I), // # COMBINING DOT ABOVE
84
new Entry(0x0307, new char[]{}, new char[]{0x0307}, "az", AFTER_I), // # COMBINING DOT ABOVE
85
new Entry(0x0049, new char[]{0x0131}, new char[]{0x0049}, "tr", NOT_BEFORE_DOT), // # LATIN CAPITAL LETTER I
86
new Entry(0x0049, new char[]{0x0131}, new char[]{0x0049}, "az", NOT_BEFORE_DOT), // # LATIN CAPITAL LETTER I
87
new Entry(0x0069, new char[]{0x0069}, new char[]{0x0130}, "tr", 0), // # LATIN SMALL LETTER I
88
new Entry(0x0069, new char[]{0x0069}, new char[]{0x0130}, "az", 0) // # LATIN SMALL LETTER I
89
};
90
91
// A hash table that contains the above entries
92
static Hashtable<Integer, HashSet<Entry>> entryTable = new Hashtable<>();
93
static {
94
// create hashtable from the entry
95
for (int i = 0; i < entry.length; i ++) {
96
Entry cur = entry[i];
97
Integer cp = new Integer(cur.getCodePoint());
98
HashSet<Entry> set = entryTable.get(cp);
99
if (set == null) {
100
set = new HashSet<Entry>();
101
}
102
set.add(cur);
103
entryTable.put(cp, set);
104
}
105
}
106
107
static int toLowerCaseEx(String src, int index, Locale locale) {
108
char[] result = lookUpTable(src, index, locale, true);
109
110
if (result != null) {
111
if (result.length == 1) {
112
return result[0];
113
} else {
114
return Character.ERROR;
115
}
116
} else {
117
// default to Character class' one
118
return Character.toLowerCase(src.codePointAt(index));
119
}
120
}
121
122
static int toUpperCaseEx(String src, int index, Locale locale) {
123
char[] result = lookUpTable(src, index, locale, false);
124
125
if (result != null) {
126
if (result.length == 1) {
127
return result[0];
128
} else {
129
return Character.ERROR;
130
}
131
} else {
132
// default to Character class' one
133
return Character.toUpperCaseEx(src.codePointAt(index));
134
}
135
}
136
137
static char[] toLowerCaseCharArray(String src, int index, Locale locale) {
138
return lookUpTable(src, index, locale, true);
139
}
140
141
static char[] toUpperCaseCharArray(String src, int index, Locale locale) {
142
char[] result = lookUpTable(src, index, locale, false);
143
if (result != null) {
144
return result;
145
} else {
146
return Character.toUpperCaseCharArray(src.codePointAt(index));
147
}
148
}
149
150
private static char[] lookUpTable(String src, int index, Locale locale, boolean bLowerCasing) {
151
HashSet<Entry> set = entryTable.get(new Integer(src.codePointAt(index)));
152
char[] ret = null;
153
154
if (set != null) {
155
Iterator<Entry> iter = set.iterator();
156
String currentLang = locale.getLanguage();
157
while (iter.hasNext()) {
158
Entry entry = iter.next();
159
String conditionLang = entry.getLanguage();
160
if (((conditionLang == null) || (conditionLang.equals(currentLang))) &&
161
isConditionMet(src, index, locale, entry.getCondition())) {
162
ret = bLowerCasing ? entry.getLowerCase() : entry.getUpperCase();
163
if (conditionLang != null) {
164
break;
165
}
166
}
167
}
168
}
169
170
return ret;
171
}
172
173
private static boolean isConditionMet(String src, int index, Locale locale, int condition) {
174
switch (condition) {
175
case FINAL_CASED:
176
return isFinalCased(src, index, locale);
177
178
case AFTER_SOFT_DOTTED:
179
return isAfterSoftDotted(src, index);
180
181
case MORE_ABOVE:
182
return isMoreAbove(src, index);
183
184
case AFTER_I:
185
return isAfterI(src, index);
186
187
case NOT_BEFORE_DOT:
188
return !isBeforeDot(src, index);
189
190
default:
191
return true;
192
}
193
}
194
195
/**
196
* Implements the "Final_Cased" condition
197
*
198
* Specification: Within the closest word boundaries containing C, there is a cased
199
* letter before C, and there is no cased letter after C.
200
*
201
* Regular Expression:
202
* Before C: [{cased==true}][{wordBoundary!=true}]*
203
* After C: !([{wordBoundary!=true}]*[{cased}])
204
*/
205
private static boolean isFinalCased(String src, int index, Locale locale) {
206
BreakIterator wordBoundary = BreakIterator.getWordInstance(locale);
207
wordBoundary.setText(src);
208
int ch;
209
210
// Look for a preceding 'cased' letter
211
for (int i = index; (i >= 0) && !wordBoundary.isBoundary(i);
212
i -= Character.charCount(ch)) {
213
214
ch = src.codePointBefore(i);
215
if (isCased(ch)) {
216
217
int len = src.length();
218
// Check that there is no 'cased' letter after the index
219
for (i = index + Character.charCount(src.codePointAt(index));
220
(i < len) && !wordBoundary.isBoundary(i);
221
i += Character.charCount(ch)) {
222
223
ch = src.codePointAt(i);
224
if (isCased(ch)) {
225
return false;
226
}
227
}
228
229
return true;
230
}
231
}
232
233
return false;
234
}
235
236
/**
237
* Implements the "After_I" condition
238
*
239
* Specification: The last preceding base character was an uppercase I,
240
* and there is no intervening combining character class 230 (ABOVE).
241
*
242
* Regular Expression:
243
* Before C: [I]([{cc!=230}&{cc!=0}])*
244
*/
245
private static boolean isAfterI(String src, int index) {
246
int ch;
247
int cc;
248
249
// Look for the last preceding base character
250
for (int i = index; i > 0; i -= Character.charCount(ch)) {
251
252
ch = src.codePointBefore(i);
253
254
if (ch == 'I') {
255
return true;
256
} else {
257
cc = Normalizer.getCombiningClass(ch);
258
if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) {
259
return false;
260
}
261
}
262
}
263
264
return false;
265
}
266
267
/**
268
* Implements the "After_Soft_Dotted" condition
269
*
270
* Specification: The last preceding character with combining class
271
* of zero before C was Soft_Dotted, and there is no intervening
272
* combining character class 230 (ABOVE).
273
*
274
* Regular Expression:
275
* Before C: [{Soft_Dotted==true}]([{cc!=230}&{cc!=0}])*
276
*/
277
private static boolean isAfterSoftDotted(String src, int index) {
278
int ch;
279
int cc;
280
281
// Look for the last preceding character
282
for (int i = index; i > 0; i -= Character.charCount(ch)) {
283
284
ch = src.codePointBefore(i);
285
286
if (isSoftDotted(ch)) {
287
return true;
288
} else {
289
cc = Normalizer.getCombiningClass(ch);
290
if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) {
291
return false;
292
}
293
}
294
}
295
296
return false;
297
}
298
299
/**
300
* Implements the "More_Above" condition
301
*
302
* Specification: C is followed by one or more characters of combining
303
* class 230 (ABOVE) in the combining character sequence.
304
*
305
* Regular Expression:
306
* After C: [{cc!=0}]*[{cc==230}]
307
*/
308
private static boolean isMoreAbove(String src, int index) {
309
int ch;
310
int cc;
311
int len = src.length();
312
313
// Look for a following ABOVE combining class character
314
for (int i = index + Character.charCount(src.codePointAt(index));
315
i < len; i += Character.charCount(ch)) {
316
317
ch = src.codePointAt(i);
318
cc = Normalizer.getCombiningClass(ch);
319
320
if (cc == COMBINING_CLASS_ABOVE) {
321
return true;
322
} else if (cc == 0) {
323
return false;
324
}
325
}
326
327
return false;
328
}
329
330
/**
331
* Implements the "Before_Dot" condition
332
*
333
* Specification: C is followed by <code>U+0307 COMBINING DOT ABOVE</code>.
334
* Any sequence of characters with a combining class that is
335
* neither 0 nor 230 may intervene between the current character
336
* and the combining dot above.
337
*
338
* Regular Expression:
339
* After C: ([{cc!=230}&{cc!=0}])*[\u0307]
340
*/
341
private static boolean isBeforeDot(String src, int index) {
342
int ch;
343
int cc;
344
int len = src.length();
345
346
// Look for a following COMBINING DOT ABOVE
347
for (int i = index + Character.charCount(src.codePointAt(index));
348
i < len; i += Character.charCount(ch)) {
349
350
ch = src.codePointAt(i);
351
352
if (ch == '\u0307') {
353
return true;
354
} else {
355
cc = Normalizer.getCombiningClass(ch);
356
if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) {
357
return false;
358
}
359
}
360
}
361
362
return false;
363
}
364
365
/**
366
* Examines whether a character is 'cased'.
367
*
368
* A character C is defined to be 'cased' if and only if at least one of
369
* following are true for C: uppercase==true, or lowercase==true, or
370
* general_category==titlecase_letter.
371
*
372
* The uppercase and lowercase property values are specified in the data
373
* file DerivedCoreProperties.txt in the Unicode Character Database.
374
*/
375
private static boolean isCased(int ch) {
376
int type = Character.getType(ch);
377
if (type == Character.LOWERCASE_LETTER ||
378
type == Character.UPPERCASE_LETTER ||
379
type == Character.TITLECASE_LETTER) {
380
return true;
381
} else {
382
// Check for Other_Lowercase and Other_Uppercase
383
//
384
if ((ch >= 0x02B0) && (ch <= 0x02B8)) {
385
// MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y
386
return true;
387
} else if ((ch >= 0x02C0) && (ch <= 0x02C1)) {
388
// MODIFIER LETTER GLOTTAL STOP..MODIFIER LETTER REVERSED GLOTTAL STOP
389
return true;
390
} else if ((ch >= 0x02E0) && (ch <= 0x02E4)) {
391
// MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
392
return true;
393
} else if (ch == 0x0345) {
394
// COMBINING GREEK YPOGEGRAMMENI
395
return true;
396
} else if (ch == 0x037A) {
397
// GREEK YPOGEGRAMMENI
398
return true;
399
} else if ((ch >= 0x1D2C) && (ch <= 0x1D61)) {
400
// MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI
401
return true;
402
} else if ((ch >= 0x2160) && (ch <= 0x217F)) {
403
// ROMAN NUMERAL ONE..ROMAN NUMERAL ONE THOUSAND
404
// SMALL ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL ONE THOUSAND
405
return true;
406
} else if ((ch >= 0x24B6) && (ch <= 0x24E9)) {
407
// CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN CAPITAL LETTER Z
408
// CIRCLED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z
409
return true;
410
} else {
411
return false;
412
}
413
}
414
}
415
416
private static boolean isSoftDotted(int ch) {
417
switch (ch) {
418
case 0x0069: // Soft_Dotted # L& LATIN SMALL LETTER I
419
case 0x006A: // Soft_Dotted # L& LATIN SMALL LETTER J
420
case 0x012F: // Soft_Dotted # L& LATIN SMALL LETTER I WITH OGONEK
421
case 0x0268: // Soft_Dotted # L& LATIN SMALL LETTER I WITH STROKE
422
case 0x0456: // Soft_Dotted # L& CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
423
case 0x0458: // Soft_Dotted # L& CYRILLIC SMALL LETTER JE
424
case 0x1D62: // Soft_Dotted # L& LATIN SUBSCRIPT SMALL LETTER I
425
case 0x1E2D: // Soft_Dotted # L& LATIN SMALL LETTER I WITH TILDE BELOW
426
case 0x1ECB: // Soft_Dotted # L& LATIN SMALL LETTER I WITH DOT BELOW
427
case 0x2071: // Soft_Dotted # L& SUPERSCRIPT LATIN SMALL LETTER I
428
return true;
429
default:
430
return false;
431
}
432
}
433
434
/**
435
* An internal class that represents an entry in the Special Casing Properties.
436
*/
437
static class Entry {
438
int ch;
439
char [] lower;
440
char [] upper;
441
String lang;
442
int condition;
443
444
Entry(int ch, char[] lower, char[] upper, String lang, int condition) {
445
this.ch = ch;
446
this.lower = lower;
447
this.upper = upper;
448
this.lang = lang;
449
this.condition = condition;
450
}
451
452
int getCodePoint() {
453
return ch;
454
}
455
456
char[] getLowerCase() {
457
return lower;
458
}
459
460
char[] getUpperCase() {
461
return upper;
462
}
463
464
String getLanguage() {
465
return lang;
466
}
467
468
int getCondition() {
469
return condition;
470
}
471
}
472
}
473
474