CoCalc -- ConditionalSpecialCasing.java

GitHub Repository: PojavLauncherTeam/openjdk-multiarch-jdk8u
Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/src/share/classes/java/lang/ConditionalSpecialCasing.java
⁴⁶²⁰⁸ views
1
/*
2
 * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
 *
5
 * This code is free software; you can redistribute it and/or modify it
6
 * under the terms of the GNU General Public License version 2 only, as
7
 * published by the Free Software Foundation.  Oracle designates this
8
 * particular file as subject to the "Classpath" exception as provided
9
 * by Oracle in the LICENSE file that accompanied this code.
10
 *
11
 * This code is distributed in the hope that it will be useful, but WITHOUT
12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14
 * version 2 for more details (a copy is included in the LICENSE file that
15
 * accompanied this code).
16
 *
17
 * You should have received a copy of the GNU General Public License version
18
 * 2 along with this work; if not, write to the Free Software Foundation,
19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20
 *
21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22
 * or visit www.oracle.com if you need additional information or have any
23
 * questions.
24
 */
25

26
package java.lang;
27

28
import java.text.BreakIterator;
29
import java.util.HashSet;
30
import java.util.Hashtable;
31
import java.util.Iterator;
32
import java.util.Locale;
33
import sun.text.Normalizer;
34

35

36
/**
37
 * This is a utility class for <code>String.toLowerCase()</code> and
38
 * <code>String.toUpperCase()</code>, that handles special casing with
39
 * conditions.  In other words, it handles the mappings with conditions
40
 * that are defined in
41
 * <a href="http://www.unicode.org/Public/UNIDATA/SpecialCasing.txt">Special
42
 * Casing Properties</a> file.
43
 * <p>
44
 * Note that the unconditional case mappings (including 1:M mappings)
45
 * are handled in <code>Character.toLower/UpperCase()</code>.
46
 */
47
final class ConditionalSpecialCasing {
48

49
    // context conditions.
50
    final static int FINAL_CASED =              1;
51
    final static int AFTER_SOFT_DOTTED =        2;
52
    final static int MORE_ABOVE =               3;
53
    final static int AFTER_I =                  4;
54
    final static int NOT_BEFORE_DOT =           5;
55

56
    // combining class definitions
57
    final static int COMBINING_CLASS_ABOVE = 230;
58

59
    // Special case mapping entries
60
    static Entry[] entry = {
61
        //# ================================================================================
62
        //# Conditional mappings
63
        //# ================================================================================
64
        new Entry(0x03A3, new char[]{0x03C2}, new char[]{0x03A3}, null, FINAL_CASED), // # GREEK CAPITAL LETTER SIGMA
65
        new Entry(0x0130, new char[]{0x0069, 0x0307}, new char[]{0x0130}, null, 0), // # LATIN CAPITAL LETTER I WITH DOT ABOVE
66

67
        //# ================================================================================
68
        //# Locale-sensitive mappings
69
        //# ================================================================================
70
        //# Lithuanian
71
        new Entry(0x0307, new char[]{0x0307}, new char[]{}, "lt",  AFTER_SOFT_DOTTED), // # COMBINING DOT ABOVE
72
        new Entry(0x0049, new char[]{0x0069, 0x0307}, new char[]{0x0049}, "lt", MORE_ABOVE), // # LATIN CAPITAL LETTER I
73
        new Entry(0x004A, new char[]{0x006A, 0x0307}, new char[]{0x004A}, "lt", MORE_ABOVE), // # LATIN CAPITAL LETTER J
74
        new Entry(0x012E, new char[]{0x012F, 0x0307}, new char[]{0x012E}, "lt", MORE_ABOVE), // # LATIN CAPITAL LETTER I WITH OGONEK
75
        new Entry(0x00CC, new char[]{0x0069, 0x0307, 0x0300}, new char[]{0x00CC}, "lt", 0), // # LATIN CAPITAL LETTER I WITH GRAVE
76
        new Entry(0x00CD, new char[]{0x0069, 0x0307, 0x0301}, new char[]{0x00CD}, "lt", 0), // # LATIN CAPITAL LETTER I WITH ACUTE
77
        new Entry(0x0128, new char[]{0x0069, 0x0307, 0x0303}, new char[]{0x0128}, "lt", 0), // # LATIN CAPITAL LETTER I WITH TILDE
78

79
        //# ================================================================================
80
        //# Turkish and Azeri
81
        new Entry(0x0130, new char[]{0x0069}, new char[]{0x0130}, "tr", 0), // # LATIN CAPITAL LETTER I WITH DOT ABOVE
82
        new Entry(0x0130, new char[]{0x0069}, new char[]{0x0130}, "az", 0), // # LATIN CAPITAL LETTER I WITH DOT ABOVE
83
        new Entry(0x0307, new char[]{}, new char[]{0x0307}, "tr", AFTER_I), // # COMBINING DOT ABOVE
84
        new Entry(0x0307, new char[]{}, new char[]{0x0307}, "az", AFTER_I), // # COMBINING DOT ABOVE
85
        new Entry(0x0049, new char[]{0x0131}, new char[]{0x0049}, "tr", NOT_BEFORE_DOT), // # LATIN CAPITAL LETTER I
86
        new Entry(0x0049, new char[]{0x0131}, new char[]{0x0049}, "az", NOT_BEFORE_DOT), // # LATIN CAPITAL LETTER I
87
        new Entry(0x0069, new char[]{0x0069}, new char[]{0x0130}, "tr", 0), // # LATIN SMALL LETTER I
88
        new Entry(0x0069, new char[]{0x0069}, new char[]{0x0130}, "az", 0)  // # LATIN SMALL LETTER I
89
    };
90

91
    // A hash table that contains the above entries
92
    static Hashtable<Integer, HashSet<Entry>> entryTable = new Hashtable<>();
93
    static {
94
        // create hashtable from the entry
95
        for (int i = 0; i < entry.length; i ++) {
96
            Entry cur = entry[i];
97
            Integer cp = new Integer(cur.getCodePoint());
98
            HashSet<Entry> set = entryTable.get(cp);
99
            if (set == null) {
100
                set = new HashSet<Entry>();
101
            }
102
            set.add(cur);
103
            entryTable.put(cp, set);
104
        }
105
    }
106

107
    static int toLowerCaseEx(String src, int index, Locale locale) {
108
        char[] result = lookUpTable(src, index, locale, true);
109

110
        if (result != null) {
111
            if (result.length == 1) {
112
                return result[0];
113
            } else {
114
                return Character.ERROR;
115
            }
116
        } else {
117
            // default to Character class' one
118
            return Character.toLowerCase(src.codePointAt(index));
119
        }
120
    }
121

122
    static int toUpperCaseEx(String src, int index, Locale locale) {
123
        char[] result = lookUpTable(src, index, locale, false);
124

125
        if (result != null) {
126
            if (result.length == 1) {
127
                return result[0];
128
            } else {
129
                return Character.ERROR;
130
            }
131
        } else {
132
            // default to Character class' one
133
            return Character.toUpperCaseEx(src.codePointAt(index));
134
        }
135
    }
136

137
    static char[] toLowerCaseCharArray(String src, int index, Locale locale) {
138
        return lookUpTable(src, index, locale, true);
139
    }
140

141
    static char[] toUpperCaseCharArray(String src, int index, Locale locale) {
142
        char[] result = lookUpTable(src, index, locale, false);
143
        if (result != null) {
144
            return result;
145
        } else {
146
            return Character.toUpperCaseCharArray(src.codePointAt(index));
147
        }
148
    }
149

150
    private static char[] lookUpTable(String src, int index, Locale locale, boolean bLowerCasing) {
151
        HashSet<Entry> set = entryTable.get(new Integer(src.codePointAt(index)));
152
        char[] ret = null;
153

154
        if (set != null) {
155
            Iterator<Entry> iter = set.iterator();
156
            String currentLang = locale.getLanguage();
157
            while (iter.hasNext()) {
158
                Entry entry = iter.next();
159
                String conditionLang = entry.getLanguage();
160
                if (((conditionLang == null) || (conditionLang.equals(currentLang))) &&
161
                        isConditionMet(src, index, locale, entry.getCondition())) {
162
                    ret = bLowerCasing ? entry.getLowerCase() : entry.getUpperCase();
163
                    if (conditionLang != null) {
164
                        break;
165
                    }
166
                }
167
            }
168
        }
169

170
        return ret;
171
    }
172

173
    private static boolean isConditionMet(String src, int index, Locale locale, int condition) {
174
        switch (condition) {
175
        case FINAL_CASED:
176
            return isFinalCased(src, index, locale);
177

178
        case AFTER_SOFT_DOTTED:
179
            return isAfterSoftDotted(src, index);
180

181
        case MORE_ABOVE:
182
            return isMoreAbove(src, index);
183

184
        case AFTER_I:
185
            return isAfterI(src, index);
186

187
        case NOT_BEFORE_DOT:
188
            return !isBeforeDot(src, index);
189

190
        default:
191
            return true;
192
        }
193
    }
194

195
    /**
196
     * Implements the "Final_Cased" condition
197
     *
198
     * Specification: Within the closest word boundaries containing C, there is a cased
199
     * letter before C, and there is no cased letter after C.
200
     *
201
     * Regular Expression:
202
     *   Before C: [{cased==true}][{wordBoundary!=true}]*
203
     *   After C: !([{wordBoundary!=true}]*[{cased}])
204
     */
205
    private static boolean isFinalCased(String src, int index, Locale locale) {
206
        BreakIterator wordBoundary = BreakIterator.getWordInstance(locale);
207
        wordBoundary.setText(src);
208
        int ch;
209

210
        // Look for a preceding 'cased' letter
211
        for (int i = index; (i >= 0) && !wordBoundary.isBoundary(i);
212
                i -= Character.charCount(ch)) {
213

214
            ch = src.codePointBefore(i);
215
            if (isCased(ch)) {
216

217
                int len = src.length();
218
                // Check that there is no 'cased' letter after the index
219
                for (i = index + Character.charCount(src.codePointAt(index));
220
                        (i < len) && !wordBoundary.isBoundary(i);
221
                        i += Character.charCount(ch)) {
222

223
                    ch = src.codePointAt(i);
224
                    if (isCased(ch)) {
225
                        return false;
226
                    }
227
                }
228

229
                return true;
230
            }
231
        }
232

233
        return false;
234
    }
235

236
    /**
237
     * Implements the "After_I" condition
238
     *
239
     * Specification: The last preceding base character was an uppercase I,
240
     * and there is no intervening combining character class 230 (ABOVE).
241
     *
242
     * Regular Expression:
243
     *   Before C: [I]([{cc!=230}&{cc!=0}])*
244
     */
245
    private static boolean isAfterI(String src, int index) {
246
        int ch;
247
        int cc;
248

249
        // Look for the last preceding base character
250
        for (int i = index; i > 0; i -= Character.charCount(ch)) {
251

252
            ch = src.codePointBefore(i);
253

254
            if (ch == 'I') {
255
                return true;
256
            } else {
257
                cc = Normalizer.getCombiningClass(ch);
258
                if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) {
259
                    return false;
260
                }
261
            }
262
        }
263

264
        return false;
265
    }
266

267
    /**
268
     * Implements the "After_Soft_Dotted" condition
269
     *
270
     * Specification: The last preceding character with combining class
271
     * of zero before C was Soft_Dotted, and there is no intervening
272
     * combining character class 230 (ABOVE).
273
     *
274
     * Regular Expression:
275
     *   Before C: [{Soft_Dotted==true}]([{cc!=230}&{cc!=0}])*
276
     */
277
    private static boolean isAfterSoftDotted(String src, int index) {
278
        int ch;
279
        int cc;
280

281
        // Look for the last preceding character
282
        for (int i = index; i > 0; i -= Character.charCount(ch)) {
283

284
            ch = src.codePointBefore(i);
285

286
            if (isSoftDotted(ch)) {
287
                return true;
288
            } else {
289
                cc = Normalizer.getCombiningClass(ch);
290
                if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) {
291
                    return false;
292
                }
293
            }
294
        }
295

296
        return false;
297
    }
298

299
    /**
300
     * Implements the "More_Above" condition
301
     *
302
     * Specification: C is followed by one or more characters of combining
303
     * class 230 (ABOVE) in the combining character sequence.
304
     *
305
     * Regular Expression:
306
     *   After C: [{cc!=0}]*[{cc==230}]
307
     */
308
    private static boolean isMoreAbove(String src, int index) {
309
        int ch;
310
        int cc;
311
        int len = src.length();
312

313
        // Look for a following ABOVE combining class character
314
        for (int i = index + Character.charCount(src.codePointAt(index));
315
                i < len; i += Character.charCount(ch)) {
316

317
            ch = src.codePointAt(i);
318
            cc = Normalizer.getCombiningClass(ch);
319

320
            if (cc == COMBINING_CLASS_ABOVE) {
321
                return true;
322
            } else if (cc == 0) {
323
                return false;
324
            }
325
        }
326

327
        return false;
328
    }
329

330
    /**
331
     * Implements the "Before_Dot" condition
332
     *
333
     * Specification: C is followed by <code>U+0307 COMBINING DOT ABOVE</code>.
334
     * Any sequence of characters with a combining class that is
335
     * neither 0 nor 230 may intervene between the current character
336
     * and the combining dot above.
337
     *
338
     * Regular Expression:
339
     *   After C: ([{cc!=230}&{cc!=0}])*[\u0307]
340
     */
341
    private static boolean isBeforeDot(String src, int index) {
342
        int ch;
343
        int cc;
344
        int len = src.length();
345

346
        // Look for a following COMBINING DOT ABOVE
347
        for (int i = index + Character.charCount(src.codePointAt(index));
348
                i < len; i += Character.charCount(ch)) {
349

350
            ch = src.codePointAt(i);
351

352
            if (ch == '\u0307') {
353
                return true;
354
            } else {
355
                cc = Normalizer.getCombiningClass(ch);
356
                if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) {
357
                    return false;
358
                }
359
            }
360
        }
361

362
        return false;
363
    }
364

365
    /**
366
     * Examines whether a character is 'cased'.
367
     *
368
     * A character C is defined to be 'cased' if and only if at least one of
369
     * following are true for C: uppercase==true, or lowercase==true, or
370
     * general_category==titlecase_letter.
371
     *
372
     * The uppercase and lowercase property values are specified in the data
373
     * file DerivedCoreProperties.txt in the Unicode Character Database.
374
     */
375
    private static boolean isCased(int ch) {
376
        int type = Character.getType(ch);
377
        if (type == Character.LOWERCASE_LETTER ||
378
                type == Character.UPPERCASE_LETTER ||
379
                type == Character.TITLECASE_LETTER) {
380
            return true;
381
        } else {
382
            // Check for Other_Lowercase and Other_Uppercase
383
            //
384
            if ((ch >= 0x02B0) && (ch <= 0x02B8)) {
385
                // MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y
386
                return true;
387
            } else if ((ch >= 0x02C0) && (ch <= 0x02C1)) {
388
                // MODIFIER LETTER GLOTTAL STOP..MODIFIER LETTER REVERSED GLOTTAL STOP
389
                return true;
390
            } else if ((ch >= 0x02E0) && (ch <= 0x02E4)) {
391
                // MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
392
                return true;
393
            } else if (ch == 0x0345) {
394
                // COMBINING GREEK YPOGEGRAMMENI
395
                return true;
396
            } else if (ch == 0x037A) {
397
                // GREEK YPOGEGRAMMENI
398
                return true;
399
            } else if ((ch >= 0x1D2C) && (ch <= 0x1D61)) {
400
                // MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI
401
                return true;
402
            } else if ((ch >= 0x2160) && (ch <= 0x217F)) {
403
                // ROMAN NUMERAL ONE..ROMAN NUMERAL ONE THOUSAND
404
                // SMALL ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL ONE THOUSAND
405
                return true;
406
            } else if ((ch >= 0x24B6) && (ch <= 0x24E9)) {
407
                // CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN CAPITAL LETTER Z
408
                // CIRCLED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z
409
                return true;
410
            } else {
411
                return false;
412
            }
413
        }
414
    }
415

416
    private static boolean isSoftDotted(int ch) {
417
        switch (ch) {
418
        case 0x0069: // Soft_Dotted # L&       LATIN SMALL LETTER I
419
        case 0x006A: // Soft_Dotted # L&       LATIN SMALL LETTER J
420
        case 0x012F: // Soft_Dotted # L&       LATIN SMALL LETTER I WITH OGONEK
421
        case 0x0268: // Soft_Dotted # L&       LATIN SMALL LETTER I WITH STROKE
422
        case 0x0456: // Soft_Dotted # L&       CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
423
        case 0x0458: // Soft_Dotted # L&       CYRILLIC SMALL LETTER JE
424
        case 0x1D62: // Soft_Dotted # L&       LATIN SUBSCRIPT SMALL LETTER I
425
        case 0x1E2D: // Soft_Dotted # L&       LATIN SMALL LETTER I WITH TILDE BELOW
426
        case 0x1ECB: // Soft_Dotted # L&       LATIN SMALL LETTER I WITH DOT BELOW
427
        case 0x2071: // Soft_Dotted # L&       SUPERSCRIPT LATIN SMALL LETTER I
428
            return true;
429
        default:
430
            return false;
431
        }
432
    }
433

434
    /**
435
     * An internal class that represents an entry in the Special Casing Properties.
436
     */
437
    static class Entry {
438
        int ch;
439
        char [] lower;
440
        char [] upper;
441
        String lang;
442
        int condition;
443

444
        Entry(int ch, char[] lower, char[] upper, String lang, int condition) {
445
            this.ch = ch;
446
            this.lower = lower;
447
            this.upper = upper;
448
            this.lang = lang;
449
            this.condition = condition;
450
        }
451

452
        int getCodePoint() {
453
            return ch;
454
        }
455

456
        char[] getLowerCase() {
457
            return lower;
458
        }
459

460
        char[] getUpperCase() {
461
            return upper;
462
        }
463

464
        String getLanguage() {
465
            return lang;
466
        }
467

468
        int getCondition() {
469
            return condition;
470
        }
471
    }
472
}
473

474
Product

Resources

Company