CoCalc -- Character.java

GitHub Repository: PojavLauncherTeam/openjdk-aarch32-jdk8u
Path: blob/jdk8u272-b10-aarch32-20201026/jdk/src/share/classes/java/lang/Character.java
⁸³⁴⁰⁵ views
1
/*
2
 * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
 *
5
 * This code is free software; you can redistribute it and/or modify it
6
 * under the terms of the GNU General Public License version 2 only, as
7
 * published by the Free Software Foundation.  Oracle designates this
8
 * particular file as subject to the "Classpath" exception as provided
9
 * by Oracle in the LICENSE file that accompanied this code.
10
 *
11
 * This code is distributed in the hope that it will be useful, but WITHOUT
12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14
 * version 2 for more details (a copy is included in the LICENSE file that
15
 * accompanied this code).
16
 *
17
 * You should have received a copy of the GNU General Public License version
18
 * 2 along with this work; if not, write to the Free Software Foundation,
19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20
 *
21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22
 * or visit www.oracle.com if you need additional information or have any
23
 * questions.
24
 */
25

26
package java.lang;
27

28
import java.util.Arrays;
29
import java.util.Map;
30
import java.util.HashMap;
31
import java.util.Locale;
32

33
/**
34
 * The {@code Character} class wraps a value of the primitive
35
 * type {@code char} in an object. An object of class
36
 * {@code Character} contains a single field whose type is
37
 * {@code char}.
38
 * <p>
39
 * In addition, this class provides a large number of static methods for
40
 * determining a character's category (lowercase letter, digit, etc.)
41
 * and for converting characters from uppercase to lowercase and vice
42
 * versa.
43
 *
44
 * <h3><a id="conformance">Unicode Conformance</a></h3>
45
 * <p>
46
 * The fields and methods of class {@code Character} are defined in terms
47
 * of character information from the Unicode Standard, specifically the
48
 * <i>UnicodeData</i> file that is part of the Unicode Character Database.
49
 * This file specifies properties including name and category for every
50
 * assigned Unicode code point or character range. The file is available
51
 * from the Unicode Consortium at
52
 * <a href="http://www.unicode.org">http://www.unicode.org</a>.
53
 * <p>
54
 * The Java SE 8 Platform uses character information from version 6.2
55
 * of the Unicode Standard, with two extensions. First, the Java SE 8 Platform
56
 * allows an implementation of class {@code Character} to use the Japanese Era
57
 * code point, {@code U+32FF}, from the first version of the Unicode Standard
58
 * after 6.2 that assigns the code point. Second, in recognition of the fact
59
 * that new currencies appear frequently, the Java SE 8 Platform allows an
60
 * implementation of class {@code Character} to use the Currency Symbols
61
 * block from version 10.0 of the Unicode Standard. Consequently, the
62
 * behavior of fields and methods of class {@code Character} may vary across
63
 * implementations of the Java SE 8 Platform when processing the aforementioned
64
 * code points ( outside of version 6.2 ), except for the following methods
65
 * that define Java identifiers:
66
 * {@link #isJavaIdentifierStart(int)}, {@link #isJavaIdentifierStart(char)},
67
 * {@link #isJavaIdentifierPart(int)}, and {@link #isJavaIdentifierPart(char)}.
68
 * Code points in Java identifiers must be drawn from version 6.2 of
69
 * the Unicode Standard.
70
 *
71
 * <h3><a name="unicode">Unicode Character Representations</a></h3>
72
 *
73
 * <p>The {@code char} data type (and therefore the value that a
74
 * {@code Character} object encapsulates) are based on the
75
 * original Unicode specification, which defined characters as
76
 * fixed-width 16-bit entities. The Unicode Standard has since been
77
 * changed to allow for characters whose representation requires more
78
 * than 16 bits.  The range of legal <em>code point</em>s is now
79
 * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
80
 * (Refer to the <a
81
 * href="http://www.unicode.org/reports/tr27/#notation"><i>
82
 * definition</i></a> of the U+<i>n</i> notation in the Unicode
83
 * Standard.)
84
 *
85
 * <p><a name="BMP">The set of characters from U+0000 to U+FFFF</a> is
86
 * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
87
 * <a name="supplementary">Characters</a> whose code points are greater
88
 * than U+FFFF are called <em>supplementary character</em>s.  The Java
89
 * platform uses the UTF-16 representation in {@code char} arrays and
90
 * in the {@code String} and {@code StringBuffer} classes. In
91
 * this representation, supplementary characters are represented as a pair
92
 * of {@code char} values, the first from the <em>high-surrogates</em>
93
 * range, (&#92;uD800-&#92;uDBFF), the second from the
94
 * <em>low-surrogates</em> range (&#92;uDC00-&#92;uDFFF).
95
 *
96
 * <p>A {@code char} value, therefore, represents Basic
97
 * Multilingual Plane (BMP) code points, including the surrogate
98
 * code points, or code units of the UTF-16 encoding. An
99
 * {@code int} value represents all Unicode code points,
100
 * including supplementary code points. The lower (least significant)
101
 * 21 bits of {@code int} are used to represent Unicode code
102
 * points and the upper (most significant) 11 bits must be zero.
103
 * Unless otherwise specified, the behavior with respect to
104
 * supplementary characters and surrogate {@code char} values is
105
 * as follows:
106
 *
107
 * <ul>
108
 * <li>The methods that only accept a {@code char} value cannot support
109
 * supplementary characters. They treat {@code char} values from the
110
 * surrogate ranges as undefined characters. For example,
111
 * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
112
 * this specific value if followed by any low-surrogate value in a string
113
 * would represent a letter.
114
 *
115
 * <li>The methods that accept an {@code int} value support all
116
 * Unicode characters, including supplementary characters. For
117
 * example, {@code Character.isLetter(0x2F81A)} returns
118
 * {@code true} because the code point value represents a letter
119
 * (a CJK ideograph).
120
 * </ul>
121
 *
122
 * <p>In the Java SE API documentation, <em>Unicode code point</em> is
123
 * used for character values in the range between U+0000 and U+10FFFF,
124
 * and <em>Unicode code unit</em> is used for 16-bit
125
 * {@code char} values that are code units of the <em>UTF-16</em>
126
 * encoding. For more information on Unicode terminology, refer to the
127
 * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
128
 *
129
 * @author  Lee Boynton
130
 * @author  Guy Steele
131
 * @author  Akira Tanaka
132
 * @author  Martin Buchholz
133
 * @author  Ulf Zibis
134
 * @since   1.0
135
 */
136
public final
137
class Character implements java.io.Serializable, Comparable<Character> {
138
    /**
139
     * The minimum radix available for conversion to and from strings.
140
     * The constant value of this field is the smallest value permitted
141
     * for the radix argument in radix-conversion methods such as the
142
     * {@code digit} method, the {@code forDigit} method, and the
143
     * {@code toString} method of class {@code Integer}.
144
     *
145
     * @see     Character#digit(char, int)
146
     * @see     Character#forDigit(int, int)
147
     * @see     Integer#toString(int, int)
148
     * @see     Integer#valueOf(String)
149
     */
150
    public static final int MIN_RADIX = 2;
151

152
    /**
153
     * The maximum radix available for conversion to and from strings.
154
     * The constant value of this field is the largest value permitted
155
     * for the radix argument in radix-conversion methods such as the
156
     * {@code digit} method, the {@code forDigit} method, and the
157
     * {@code toString} method of class {@code Integer}.
158
     *
159
     * @see     Character#digit(char, int)
160
     * @see     Character#forDigit(int, int)
161
     * @see     Integer#toString(int, int)
162
     * @see     Integer#valueOf(String)
163
     */
164
    public static final int MAX_RADIX = 36;
165

166
    /**
167
     * The constant value of this field is the smallest value of type
168
     * {@code char}, {@code '\u005Cu0000'}.
169
     *
170
     * @since   1.0.2
171
     */
172
    public static final char MIN_VALUE = '\u0000';
173

174
    /**
175
     * The constant value of this field is the largest value of type
176
     * {@code char}, {@code '\u005CuFFFF'}.
177
     *
178
     * @since   1.0.2
179
     */
180
    public static final char MAX_VALUE = '\uFFFF';
181

182
    /**
183
     * The {@code Class} instance representing the primitive type
184
     * {@code char}.
185
     *
186
     * @since   1.1
187
     */
188
    @SuppressWarnings("unchecked")
189
    public static final Class<Character> TYPE = (Class<Character>) Class.getPrimitiveClass("char");
190

191
    /*
192
     * Normative general types
193
     */
194

195
    /*
196
     * General character types
197
     */
198

199
    /**
200
     * General category "Cn" in the Unicode specification.
201
     * @since   1.1
202
     */
203
    public static final byte UNASSIGNED = 0;
204

205
    /**
206
     * General category "Lu" in the Unicode specification.
207
     * @since   1.1
208
     */
209
    public static final byte UPPERCASE_LETTER = 1;
210

211
    /**
212
     * General category "Ll" in the Unicode specification.
213
     * @since   1.1
214
     */
215
    public static final byte LOWERCASE_LETTER = 2;
216

217
    /**
218
     * General category "Lt" in the Unicode specification.
219
     * @since   1.1
220
     */
221
    public static final byte TITLECASE_LETTER = 3;
222

223
    /**
224
     * General category "Lm" in the Unicode specification.
225
     * @since   1.1
226
     */
227
    public static final byte MODIFIER_LETTER = 4;
228

229
    /**
230
     * General category "Lo" in the Unicode specification.
231
     * @since   1.1
232
     */
233
    public static final byte OTHER_LETTER = 5;
234

235
    /**
236
     * General category "Mn" in the Unicode specification.
237
     * @since   1.1
238
     */
239
    public static final byte NON_SPACING_MARK = 6;
240

241
    /**
242
     * General category "Me" in the Unicode specification.
243
     * @since   1.1
244
     */
245
    public static final byte ENCLOSING_MARK = 7;
246

247
    /**
248
     * General category "Mc" in the Unicode specification.
249
     * @since   1.1
250
     */
251
    public static final byte COMBINING_SPACING_MARK = 8;
252

253
    /**
254
     * General category "Nd" in the Unicode specification.
255
     * @since   1.1
256
     */
257
    public static final byte DECIMAL_DIGIT_NUMBER        = 9;
258

259
    /**
260
     * General category "Nl" in the Unicode specification.
261
     * @since   1.1
262
     */
263
    public static final byte LETTER_NUMBER = 10;
264

265
    /**
266
     * General category "No" in the Unicode specification.
267
     * @since   1.1
268
     */
269
    public static final byte OTHER_NUMBER = 11;
270

271
    /**
272
     * General category "Zs" in the Unicode specification.
273
     * @since   1.1
274
     */
275
    public static final byte SPACE_SEPARATOR = 12;
276

277
    /**
278
     * General category "Zl" in the Unicode specification.
279
     * @since   1.1
280
     */
281
    public static final byte LINE_SEPARATOR = 13;
282

283
    /**
284
     * General category "Zp" in the Unicode specification.
285
     * @since   1.1
286
     */
287
    public static final byte PARAGRAPH_SEPARATOR = 14;
288

289
    /**
290
     * General category "Cc" in the Unicode specification.
291
     * @since   1.1
292
     */
293
    public static final byte CONTROL = 15;
294

295
    /**
296
     * General category "Cf" in the Unicode specification.
297
     * @since   1.1
298
     */
299
    public static final byte FORMAT = 16;
300

301
    /**
302
     * General category "Co" in the Unicode specification.
303
     * @since   1.1
304
     */
305
    public static final byte PRIVATE_USE = 18;
306

307
    /**
308
     * General category "Cs" in the Unicode specification.
309
     * @since   1.1
310
     */
311
    public static final byte SURROGATE = 19;
312

313
    /**
314
     * General category "Pd" in the Unicode specification.
315
     * @since   1.1
316
     */
317
    public static final byte DASH_PUNCTUATION = 20;
318

319
    /**
320
     * General category "Ps" in the Unicode specification.
321
     * @since   1.1
322
     */
323
    public static final byte START_PUNCTUATION = 21;
324

325
    /**
326
     * General category "Pe" in the Unicode specification.
327
     * @since   1.1
328
     */
329
    public static final byte END_PUNCTUATION = 22;
330

331
    /**
332
     * General category "Pc" in the Unicode specification.
333
     * @since   1.1
334
     */
335
    public static final byte CONNECTOR_PUNCTUATION = 23;
336

337
    /**
338
     * General category "Po" in the Unicode specification.
339
     * @since   1.1
340
     */
341
    public static final byte OTHER_PUNCTUATION = 24;
342

343
    /**
344
     * General category "Sm" in the Unicode specification.
345
     * @since   1.1
346
     */
347
    public static final byte MATH_SYMBOL = 25;
348

349
    /**
350
     * General category "Sc" in the Unicode specification.
351
     * @since   1.1
352
     */
353
    public static final byte CURRENCY_SYMBOL = 26;
354

355
    /**
356
     * General category "Sk" in the Unicode specification.
357
     * @since   1.1
358
     */
359
    public static final byte MODIFIER_SYMBOL = 27;
360

361
    /**
362
     * General category "So" in the Unicode specification.
363
     * @since   1.1
364
     */
365
    public static final byte OTHER_SYMBOL = 28;
366

367
    /**
368
     * General category "Pi" in the Unicode specification.
369
     * @since   1.4
370
     */
371
    public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
372

373
    /**
374
     * General category "Pf" in the Unicode specification.
375
     * @since   1.4
376
     */
377
    public static final byte FINAL_QUOTE_PUNCTUATION = 30;
378

379
    /**
380
     * Error flag. Use int (code point) to avoid confusion with U+FFFF.
381
     */
382
    static final int ERROR = 0xFFFFFFFF;
383

384

385
    /**
386
     * Undefined bidirectional character type. Undefined {@code char}
387
     * values have undefined directionality in the Unicode specification.
388
     * @since 1.4
389
     */
390
    public static final byte DIRECTIONALITY_UNDEFINED = -1;
391

392
    /**
393
     * Strong bidirectional character type "L" in the Unicode specification.
394
     * @since 1.4
395
     */
396
    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
397

398
    /**
399
     * Strong bidirectional character type "R" in the Unicode specification.
400
     * @since 1.4
401
     */
402
    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
403

404
    /**
405
    * Strong bidirectional character type "AL" in the Unicode specification.
406
     * @since 1.4
407
     */
408
    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
409

410
    /**
411
     * Weak bidirectional character type "EN" in the Unicode specification.
412
     * @since 1.4
413
     */
414
    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
415

416
    /**
417
     * Weak bidirectional character type "ES" in the Unicode specification.
418
     * @since 1.4
419
     */
420
    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
421

422
    /**
423
     * Weak bidirectional character type "ET" in the Unicode specification.
424
     * @since 1.4
425
     */
426
    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
427

428
    /**
429
     * Weak bidirectional character type "AN" in the Unicode specification.
430
     * @since 1.4
431
     */
432
    public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
433

434
    /**
435
     * Weak bidirectional character type "CS" in the Unicode specification.
436
     * @since 1.4
437
     */
438
    public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
439

440
    /**
441
     * Weak bidirectional character type "NSM" in the Unicode specification.
442
     * @since 1.4
443
     */
444
    public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
445

446
    /**
447
     * Weak bidirectional character type "BN" in the Unicode specification.
448
     * @since 1.4
449
     */
450
    public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
451

452
    /**
453
     * Neutral bidirectional character type "B" in the Unicode specification.
454
     * @since 1.4
455
     */
456
    public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
457

458
    /**
459
     * Neutral bidirectional character type "S" in the Unicode specification.
460
     * @since 1.4
461
     */
462
    public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
463

464
    /**
465
     * Neutral bidirectional character type "WS" in the Unicode specification.
466
     * @since 1.4
467
     */
468
    public static final byte DIRECTIONALITY_WHITESPACE = 12;
469

470
    /**
471
     * Neutral bidirectional character type "ON" in the Unicode specification.
472
     * @since 1.4
473
     */
474
    public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
475

476
    /**
477
     * Strong bidirectional character type "LRE" in the Unicode specification.
478
     * @since 1.4
479
     */
480
    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
481

482
    /**
483
     * Strong bidirectional character type "LRO" in the Unicode specification.
484
     * @since 1.4
485
     */
486
    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
487

488
    /**
489
     * Strong bidirectional character type "RLE" in the Unicode specification.
490
     * @since 1.4
491
     */
492
    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
493

494
    /**
495
     * Strong bidirectional character type "RLO" in the Unicode specification.
496
     * @since 1.4
497
     */
498
    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
499

500
    /**
501
     * Weak bidirectional character type "PDF" in the Unicode specification.
502
     * @since 1.4
503
     */
504
    public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
505

506
    /**
507
     * The minimum value of a
508
     * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
509
     * Unicode high-surrogate code unit</a>
510
     * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
511
     * A high-surrogate is also known as a <i>leading-surrogate</i>.
512
     *
513
     * @since 1.5
514
     */
515
    public static final char MIN_HIGH_SURROGATE = '\uD800';
516

517
    /**
518
     * The maximum value of a
519
     * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
520
     * Unicode high-surrogate code unit</a>
521
     * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
522
     * A high-surrogate is also known as a <i>leading-surrogate</i>.
523
     *
524
     * @since 1.5
525
     */
526
    public static final char MAX_HIGH_SURROGATE = '\uDBFF';
527

528
    /**
529
     * The minimum value of a
530
     * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
531
     * Unicode low-surrogate code unit</a>
532
     * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
533
     * A low-surrogate is also known as a <i>trailing-surrogate</i>.
534
     *
535
     * @since 1.5
536
     */
537
    public static final char MIN_LOW_SURROGATE  = '\uDC00';
538

539
    /**
540
     * The maximum value of a
541
     * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
542
     * Unicode low-surrogate code unit</a>
543
     * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
544
     * A low-surrogate is also known as a <i>trailing-surrogate</i>.
545
     *
546
     * @since 1.5
547
     */
548
    public static final char MAX_LOW_SURROGATE  = '\uDFFF';
549

550
    /**
551
     * The minimum value of a Unicode surrogate code unit in the
552
     * UTF-16 encoding, constant {@code '\u005CuD800'}.
553
     *
554
     * @since 1.5
555
     */
556
    public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
557

558
    /**
559
     * The maximum value of a Unicode surrogate code unit in the
560
     * UTF-16 encoding, constant {@code '\u005CuDFFF'}.
561
     *
562
     * @since 1.5
563
     */
564
    public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
565

566
    /**
567
     * The minimum value of a
568
     * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
569
     * Unicode supplementary code point</a>, constant {@code U+10000}.
570
     *
571
     * @since 1.5
572
     */
573
    public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
574

575
    /**
576
     * The minimum value of a
577
     * <a href="http://www.unicode.org/glossary/#code_point">
578
     * Unicode code point</a>, constant {@code U+0000}.
579
     *
580
     * @since 1.5
581
     */
582
    public static final int MIN_CODE_POINT = 0x000000;
583

584
    /**
585
     * The maximum value of a
586
     * <a href="http://www.unicode.org/glossary/#code_point">
587
     * Unicode code point</a>, constant {@code U+10FFFF}.
588
     *
589
     * @since 1.5
590
     */
591
    public static final int MAX_CODE_POINT = 0X10FFFF;
592

593

594
    /**
595
     * Instances of this class represent particular subsets of the Unicode
596
     * character set.  The only family of subsets defined in the
597
     * {@code Character} class is {@link Character.UnicodeBlock}.
598
     * Other portions of the Java API may define other subsets for their
599
     * own purposes.
600
     *
601
     * @since 1.2
602
     */
603
    public static class Subset  {
604

605
        private String name;
606

607
        /**
608
         * Constructs a new {@code Subset} instance.
609
         *
610
         * @param  name  The name of this subset
611
         * @exception NullPointerException if name is {@code null}
612
         */
613
        protected Subset(String name) {
614
            if (name == null) {
615
                throw new NullPointerException("name");
616
            }
617
            this.name = name;
618
        }
619

620
        /**
621
         * Compares two {@code Subset} objects for equality.
622
         * This method returns {@code true} if and only if
623
         * {@code this} and the argument refer to the same
624
         * object; since this method is {@code final}, this
625
         * guarantee holds for all subclasses.
626
         */
627
        public final boolean equals(Object obj) {
628
            return (this == obj);
629
        }
630

631
        /**
632
         * Returns the standard hash code as defined by the
633
         * {@link Object#hashCode} method.  This method
634
         * is {@code final} in order to ensure that the
635
         * {@code equals} and {@code hashCode} methods will
636
         * be consistent in all subclasses.
637
         */
638
        public final int hashCode() {
639
            return super.hashCode();
640
        }
641

642
        /**
643
         * Returns the name of this subset.
644
         */
645
        public final String toString() {
646
            return name;
647
        }
648
    }
649

650
    // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
651
    // for the latest specification of Unicode Blocks.
652

653
    /**
654
     * A family of character subsets representing the character blocks in the
655
     * Unicode specification. Character blocks generally define characters
656
     * used for a specific script or purpose. A character is contained by
657
     * at most one Unicode block.
658
     *
659
     * @since 1.2
660
     */
661
    public static final class UnicodeBlock extends Subset {
662

663
        private static Map<String, UnicodeBlock> map = new HashMap<>(256);
664

665
        /**
666
         * Creates a UnicodeBlock with the given identifier name.
667
         * This name must be the same as the block identifier.
668
         */
669
        private UnicodeBlock(String idName) {
670
            super(idName);
671
            map.put(idName, this);
672
        }
673

674
        /**
675
         * Creates a UnicodeBlock with the given identifier name and
676
         * alias name.
677
         */
678
        private UnicodeBlock(String idName, String alias) {
679
            this(idName);
680
            map.put(alias, this);
681
        }
682

683
        /**
684
         * Creates a UnicodeBlock with the given identifier name and
685
         * alias names.
686
         */
687
        private UnicodeBlock(String idName, String... aliases) {
688
            this(idName);
689
            for (String alias : aliases)
690
                map.put(alias, this);
691
        }
692

693
        /**
694
         * Constant for the "Basic Latin" Unicode character block.
695
         * @since 1.2
696
         */
697
        public static final UnicodeBlock  BASIC_LATIN =
698
            new UnicodeBlock("BASIC_LATIN",
699
                             "BASIC LATIN",
700
                             "BASICLATIN");
701

702
        /**
703
         * Constant for the "Latin-1 Supplement" Unicode character block.
704
         * @since 1.2
705
         */
706
        public static final UnicodeBlock LATIN_1_SUPPLEMENT =
707
            new UnicodeBlock("LATIN_1_SUPPLEMENT",
708
                             "LATIN-1 SUPPLEMENT",
709
                             "LATIN-1SUPPLEMENT");
710

711
        /**
712
         * Constant for the "Latin Extended-A" Unicode character block.
713
         * @since 1.2
714
         */
715
        public static final UnicodeBlock LATIN_EXTENDED_A =
716
            new UnicodeBlock("LATIN_EXTENDED_A",
717
                             "LATIN EXTENDED-A",
718
                             "LATINEXTENDED-A");
719

720
        /**
721
         * Constant for the "Latin Extended-B" Unicode character block.
722
         * @since 1.2
723
         */
724
        public static final UnicodeBlock LATIN_EXTENDED_B =
725
            new UnicodeBlock("LATIN_EXTENDED_B",
726
                             "LATIN EXTENDED-B",
727
                             "LATINEXTENDED-B");
728

729
        /**
730
         * Constant for the "IPA Extensions" Unicode character block.
731
         * @since 1.2
732
         */
733
        public static final UnicodeBlock IPA_EXTENSIONS =
734
            new UnicodeBlock("IPA_EXTENSIONS",
735
                             "IPA EXTENSIONS",
736
                             "IPAEXTENSIONS");
737

738
        /**
739
         * Constant for the "Spacing Modifier Letters" Unicode character block.
740
         * @since 1.2
741
         */
742
        public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
743
            new UnicodeBlock("SPACING_MODIFIER_LETTERS",
744
                             "SPACING MODIFIER LETTERS",
745
                             "SPACINGMODIFIERLETTERS");
746

747
        /**
748
         * Constant for the "Combining Diacritical Marks" Unicode character block.
749
         * @since 1.2
750
         */
751
        public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
752
            new UnicodeBlock("COMBINING_DIACRITICAL_MARKS",
753
                             "COMBINING DIACRITICAL MARKS",
754
                             "COMBININGDIACRITICALMARKS");
755

756
        /**
757
         * Constant for the "Greek and Coptic" Unicode character block.
758
         * <p>
759
         * This block was previously known as the "Greek" block.
760
         *
761
         * @since 1.2
762
         */
763
        public static final UnicodeBlock GREEK =
764
            new UnicodeBlock("GREEK",
765
                             "GREEK AND COPTIC",
766
                             "GREEKANDCOPTIC");
767

768
        /**
769
         * Constant for the "Cyrillic" Unicode character block.
770
         * @since 1.2
771
         */
772
        public static final UnicodeBlock CYRILLIC =
773
            new UnicodeBlock("CYRILLIC");
774

775
        /**
776
         * Constant for the "Armenian" Unicode character block.
777
         * @since 1.2
778
         */
779
        public static final UnicodeBlock ARMENIAN =
780
            new UnicodeBlock("ARMENIAN");
781

782
        /**
783
         * Constant for the "Hebrew" Unicode character block.
784
         * @since 1.2
785
         */
786
        public static final UnicodeBlock HEBREW =
787
            new UnicodeBlock("HEBREW");
788

789
        /**
790
         * Constant for the "Arabic" Unicode character block.
791
         * @since 1.2
792
         */
793
        public static final UnicodeBlock ARABIC =
794
            new UnicodeBlock("ARABIC");
795

796
        /**
797
         * Constant for the "Devanagari" Unicode character block.
798
         * @since 1.2
799
         */
800
        public static final UnicodeBlock DEVANAGARI =
801
            new UnicodeBlock("DEVANAGARI");
802

803
        /**
804
         * Constant for the "Bengali" Unicode character block.
805
         * @since 1.2
806
         */
807
        public static final UnicodeBlock BENGALI =
808
            new UnicodeBlock("BENGALI");
809

810
        /**
811
         * Constant for the "Gurmukhi" Unicode character block.
812
         * @since 1.2
813
         */
814
        public static final UnicodeBlock GURMUKHI =
815
            new UnicodeBlock("GURMUKHI");
816

817
        /**
818
         * Constant for the "Gujarati" Unicode character block.
819
         * @since 1.2
820
         */
821
        public static final UnicodeBlock GUJARATI =
822
            new UnicodeBlock("GUJARATI");
823

824
        /**
825
         * Constant for the "Oriya" Unicode character block.
826
         * @since 1.2
827
         */
828
        public static final UnicodeBlock ORIYA =
829
            new UnicodeBlock("ORIYA");
830

831
        /**
832
         * Constant for the "Tamil" Unicode character block.
833
         * @since 1.2
834
         */
835
        public static final UnicodeBlock TAMIL =
836
            new UnicodeBlock("TAMIL");
837

838
        /**
839
         * Constant for the "Telugu" Unicode character block.
840
         * @since 1.2
841
         */
842
        public static final UnicodeBlock TELUGU =
843
            new UnicodeBlock("TELUGU");
844

845
        /**
846
         * Constant for the "Kannada" Unicode character block.
847
         * @since 1.2
848
         */
849
        public static final UnicodeBlock KANNADA =
850
            new UnicodeBlock("KANNADA");
851

852
        /**
853
         * Constant for the "Malayalam" Unicode character block.
854
         * @since 1.2
855
         */
856
        public static final UnicodeBlock MALAYALAM =
857
            new UnicodeBlock("MALAYALAM");
858

859
        /**
860
         * Constant for the "Thai" Unicode character block.
861
         * @since 1.2
862
         */
863
        public static final UnicodeBlock THAI =
864
            new UnicodeBlock("THAI");
865

866
        /**
867
         * Constant for the "Lao" Unicode character block.
868
         * @since 1.2
869
         */
870
        public static final UnicodeBlock LAO =
871
            new UnicodeBlock("LAO");
872

873
        /**
874
         * Constant for the "Tibetan" Unicode character block.
875
         * @since 1.2
876
         */
877
        public static final UnicodeBlock TIBETAN =
878
            new UnicodeBlock("TIBETAN");
879

880
        /**
881
         * Constant for the "Georgian" Unicode character block.
882
         * @since 1.2
883
         */
884
        public static final UnicodeBlock GEORGIAN =
885
            new UnicodeBlock("GEORGIAN");
886

887
        /**
888
         * Constant for the "Hangul Jamo" Unicode character block.
889
         * @since 1.2
890
         */
891
        public static final UnicodeBlock HANGUL_JAMO =
892
            new UnicodeBlock("HANGUL_JAMO",
893
                             "HANGUL JAMO",
894
                             "HANGULJAMO");
895

896
        /**
897
         * Constant for the "Latin Extended Additional" Unicode character block.
898
         * @since 1.2
899
         */
900
        public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
901
            new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL",
902
                             "LATIN EXTENDED ADDITIONAL",
903
                             "LATINEXTENDEDADDITIONAL");
904

905
        /**
906
         * Constant for the "Greek Extended" Unicode character block.
907
         * @since 1.2
908
         */
909
        public static final UnicodeBlock GREEK_EXTENDED =
910
            new UnicodeBlock("GREEK_EXTENDED",
911
                             "GREEK EXTENDED",
912
                             "GREEKEXTENDED");
913

914
        /**
915
         * Constant for the "General Punctuation" Unicode character block.
916
         * @since 1.2
917
         */
918
        public static final UnicodeBlock GENERAL_PUNCTUATION =
919
            new UnicodeBlock("GENERAL_PUNCTUATION",
920
                             "GENERAL PUNCTUATION",
921
                             "GENERALPUNCTUATION");
922

923
        /**
924
         * Constant for the "Superscripts and Subscripts" Unicode character
925
         * block.
926
         * @since 1.2
927
         */
928
        public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
929
            new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS",
930
                             "SUPERSCRIPTS AND SUBSCRIPTS",
931
                             "SUPERSCRIPTSANDSUBSCRIPTS");
932

933
        /**
934
         * Constant for the "Currency Symbols" Unicode character block.
935
         * @since 1.2
936
         */
937
        public static final UnicodeBlock CURRENCY_SYMBOLS =
938
            new UnicodeBlock("CURRENCY_SYMBOLS",
939
                             "CURRENCY SYMBOLS",
940
                             "CURRENCYSYMBOLS");
941

942
        /**
943
         * Constant for the "Combining Diacritical Marks for Symbols" Unicode
944
         * character block.
945
         * <p>
946
         * This block was previously known as "Combining Marks for Symbols".
947
         * @since 1.2
948
         */
949
        public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
950
            new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS",
951
                             "COMBINING DIACRITICAL MARKS FOR SYMBOLS",
952
                             "COMBININGDIACRITICALMARKSFORSYMBOLS",
953
                             "COMBINING MARKS FOR SYMBOLS",
954
                             "COMBININGMARKSFORSYMBOLS");
955

956
        /**
957
         * Constant for the "Letterlike Symbols" Unicode character block.
958
         * @since 1.2
959
         */
960
        public static final UnicodeBlock LETTERLIKE_SYMBOLS =
961
            new UnicodeBlock("LETTERLIKE_SYMBOLS",
962
                             "LETTERLIKE SYMBOLS",
963
                             "LETTERLIKESYMBOLS");
964

965
        /**
966
         * Constant for the "Number Forms" Unicode character block.
967
         * @since 1.2
968
         */
969
        public static final UnicodeBlock NUMBER_FORMS =
970
            new UnicodeBlock("NUMBER_FORMS",
971
                             "NUMBER FORMS",
972
                             "NUMBERFORMS");
973

974
        /**
975
         * Constant for the "Arrows" Unicode character block.
976
         * @since 1.2
977
         */
978
        public static final UnicodeBlock ARROWS =
979
            new UnicodeBlock("ARROWS");
980

981
        /**
982
         * Constant for the "Mathematical Operators" Unicode character block.
983
         * @since 1.2
984
         */
985
        public static final UnicodeBlock MATHEMATICAL_OPERATORS =
986
            new UnicodeBlock("MATHEMATICAL_OPERATORS",
987
                             "MATHEMATICAL OPERATORS",
988
                             "MATHEMATICALOPERATORS");
989

990
        /**
991
         * Constant for the "Miscellaneous Technical" Unicode character block.
992
         * @since 1.2
993
         */
994
        public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
995
            new UnicodeBlock("MISCELLANEOUS_TECHNICAL",
996
                             "MISCELLANEOUS TECHNICAL",
997
                             "MISCELLANEOUSTECHNICAL");
998

999
        /**
1000
         * Constant for the "Control Pictures" Unicode character block.
1001
         * @since 1.2
1002
         */
1003
        public static final UnicodeBlock CONTROL_PICTURES =
1004
            new UnicodeBlock("CONTROL_PICTURES",
1005
                             "CONTROL PICTURES",
1006
                             "CONTROLPICTURES");
1007

1008
        /**
1009
         * Constant for the "Optical Character Recognition" Unicode character block.
1010
         * @since 1.2
1011
         */
1012
        public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
1013
            new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION",
1014
                             "OPTICAL CHARACTER RECOGNITION",
1015
                             "OPTICALCHARACTERRECOGNITION");
1016

1017
        /**
1018
         * Constant for the "Enclosed Alphanumerics" Unicode character block.
1019
         * @since 1.2
1020
         */
1021
        public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
1022
            new UnicodeBlock("ENCLOSED_ALPHANUMERICS",
1023
                             "ENCLOSED ALPHANUMERICS",
1024
                             "ENCLOSEDALPHANUMERICS");
1025

1026
        /**
1027
         * Constant for the "Box Drawing" Unicode character block.
1028
         * @since 1.2
1029
         */
1030
        public static final UnicodeBlock BOX_DRAWING =
1031
            new UnicodeBlock("BOX_DRAWING",
1032
                             "BOX DRAWING",
1033
                             "BOXDRAWING");
1034

1035
        /**
1036
         * Constant for the "Block Elements" Unicode character block.
1037
         * @since 1.2
1038
         */
1039
        public static final UnicodeBlock BLOCK_ELEMENTS =
1040
            new UnicodeBlock("BLOCK_ELEMENTS",
1041
                             "BLOCK ELEMENTS",
1042
                             "BLOCKELEMENTS");
1043

1044
        /**
1045
         * Constant for the "Geometric Shapes" Unicode character block.
1046
         * @since 1.2
1047
         */
1048
        public static final UnicodeBlock GEOMETRIC_SHAPES =
1049
            new UnicodeBlock("GEOMETRIC_SHAPES",
1050
                             "GEOMETRIC SHAPES",
1051
                             "GEOMETRICSHAPES");
1052

1053
        /**
1054
         * Constant for the "Miscellaneous Symbols" Unicode character block.
1055
         * @since 1.2
1056
         */
1057
        public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
1058
            new UnicodeBlock("MISCELLANEOUS_SYMBOLS",
1059
                             "MISCELLANEOUS SYMBOLS",
1060
                             "MISCELLANEOUSSYMBOLS");
1061

1062
        /**
1063
         * Constant for the "Dingbats" Unicode character block.
1064
         * @since 1.2
1065
         */
1066
        public static final UnicodeBlock DINGBATS =
1067
            new UnicodeBlock("DINGBATS");
1068

1069
        /**
1070
         * Constant for the "CJK Symbols and Punctuation" Unicode character block.
1071
         * @since 1.2
1072
         */
1073
        public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
1074
            new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION",
1075
                             "CJK SYMBOLS AND PUNCTUATION",
1076
                             "CJKSYMBOLSANDPUNCTUATION");
1077

1078
        /**
1079
         * Constant for the "Hiragana" Unicode character block.
1080
         * @since 1.2
1081
         */
1082
        public static final UnicodeBlock HIRAGANA =
1083
            new UnicodeBlock("HIRAGANA");
1084

1085
        /**
1086
         * Constant for the "Katakana" Unicode character block.
1087
         * @since 1.2
1088
         */
1089
        public static final UnicodeBlock KATAKANA =
1090
            new UnicodeBlock("KATAKANA");
1091

1092
        /**
1093
         * Constant for the "Bopomofo" Unicode character block.
1094
         * @since 1.2
1095
         */
1096
        public static final UnicodeBlock BOPOMOFO =
1097
            new UnicodeBlock("BOPOMOFO");
1098

1099
        /**
1100
         * Constant for the "Hangul Compatibility Jamo" Unicode character block.
1101
         * @since 1.2
1102
         */
1103
        public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
1104
            new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO",
1105
                             "HANGUL COMPATIBILITY JAMO",
1106
                             "HANGULCOMPATIBILITYJAMO");
1107

1108
        /**
1109
         * Constant for the "Kanbun" Unicode character block.
1110
         * @since 1.2
1111
         */
1112
        public static final UnicodeBlock KANBUN =
1113
            new UnicodeBlock("KANBUN");
1114

1115
        /**
1116
         * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
1117
         * @since 1.2
1118
         */
1119
        public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
1120
            new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1121
                             "ENCLOSED CJK LETTERS AND MONTHS",
1122
                             "ENCLOSEDCJKLETTERSANDMONTHS");
1123

1124
        /**
1125
         * Constant for the "CJK Compatibility" Unicode character block.
1126
         * @since 1.2
1127
         */
1128
        public static final UnicodeBlock CJK_COMPATIBILITY =
1129
            new UnicodeBlock("CJK_COMPATIBILITY",
1130
                             "CJK COMPATIBILITY",
1131
                             "CJKCOMPATIBILITY");
1132

1133
        /**
1134
         * Constant for the "CJK Unified Ideographs" Unicode character block.
1135
         * @since 1.2
1136
         */
1137
        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
1138
            new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS",
1139
                             "CJK UNIFIED IDEOGRAPHS",
1140
                             "CJKUNIFIEDIDEOGRAPHS");
1141

1142
        /**
1143
         * Constant for the "Hangul Syllables" Unicode character block.
1144
         * @since 1.2
1145
         */
1146
        public static final UnicodeBlock HANGUL_SYLLABLES =
1147
            new UnicodeBlock("HANGUL_SYLLABLES",
1148
                             "HANGUL SYLLABLES",
1149
                             "HANGULSYLLABLES");
1150

1151
        /**
1152
         * Constant for the "Private Use Area" Unicode character block.
1153
         * @since 1.2
1154
         */
1155
        public static final UnicodeBlock PRIVATE_USE_AREA =
1156
            new UnicodeBlock("PRIVATE_USE_AREA",
1157
                             "PRIVATE USE AREA",
1158
                             "PRIVATEUSEAREA");
1159

1160
        /**
1161
         * Constant for the "CJK Compatibility Ideographs" Unicode character
1162
         * block.
1163
         * @since 1.2
1164
         */
1165
        public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
1166
            new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
1167
                             "CJK COMPATIBILITY IDEOGRAPHS",
1168
                             "CJKCOMPATIBILITYIDEOGRAPHS");
1169

1170
        /**
1171
         * Constant for the "Alphabetic Presentation Forms" Unicode character block.
1172
         * @since 1.2
1173
         */
1174
        public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
1175
            new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS",
1176
                             "ALPHABETIC PRESENTATION FORMS",
1177
                             "ALPHABETICPRESENTATIONFORMS");
1178

1179
        /**
1180
         * Constant for the "Arabic Presentation Forms-A" Unicode character
1181
         * block.
1182
         * @since 1.2
1183
         */
1184
        public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
1185
            new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A",
1186
                             "ARABIC PRESENTATION FORMS-A",
1187
                             "ARABICPRESENTATIONFORMS-A");
1188

1189
        /**
1190
         * Constant for the "Combining Half Marks" Unicode character block.
1191
         * @since 1.2
1192
         */
1193
        public static final UnicodeBlock COMBINING_HALF_MARKS =
1194
            new UnicodeBlock("COMBINING_HALF_MARKS",
1195
                             "COMBINING HALF MARKS",
1196
                             "COMBININGHALFMARKS");
1197

1198
        /**
1199
         * Constant for the "CJK Compatibility Forms" Unicode character block.
1200
         * @since 1.2
1201
         */
1202
        public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
1203
            new UnicodeBlock("CJK_COMPATIBILITY_FORMS",
1204
                             "CJK COMPATIBILITY FORMS",
1205
                             "CJKCOMPATIBILITYFORMS");
1206

1207
        /**
1208
         * Constant for the "Small Form Variants" Unicode character block.
1209
         * @since 1.2
1210
         */
1211
        public static final UnicodeBlock SMALL_FORM_VARIANTS =
1212
            new UnicodeBlock("SMALL_FORM_VARIANTS",
1213
                             "SMALL FORM VARIANTS",
1214
                             "SMALLFORMVARIANTS");
1215

1216
        /**
1217
         * Constant for the "Arabic Presentation Forms-B" Unicode character block.
1218
         * @since 1.2
1219
         */
1220
        public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
1221
            new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B",
1222
                             "ARABIC PRESENTATION FORMS-B",
1223
                             "ARABICPRESENTATIONFORMS-B");
1224

1225
        /**
1226
         * Constant for the "Halfwidth and Fullwidth Forms" Unicode character
1227
         * block.
1228
         * @since 1.2
1229
         */
1230
        public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
1231
            new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
1232
                             "HALFWIDTH AND FULLWIDTH FORMS",
1233
                             "HALFWIDTHANDFULLWIDTHFORMS");
1234

1235
        /**
1236
         * Constant for the "Specials" Unicode character block.
1237
         * @since 1.2
1238
         */
1239
        public static final UnicodeBlock SPECIALS =
1240
            new UnicodeBlock("SPECIALS");
1241

1242
        /**
1243
         * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES},
1244
         *             {@link #HIGH_PRIVATE_USE_SURROGATES}, and
1245
         *             {@link #LOW_SURROGATES}. These new constants match
1246
         *             the block definitions of the Unicode Standard.
1247
         *             The {@link #of(char)} and {@link #of(int)} methods
1248
         *             return the new constants, not SURROGATES_AREA.
1249
         */
1250
        @Deprecated
1251
        public static final UnicodeBlock SURROGATES_AREA =
1252
            new UnicodeBlock("SURROGATES_AREA");
1253

1254
        /**
1255
         * Constant for the "Syriac" Unicode character block.
1256
         * @since 1.4
1257
         */
1258
        public static final UnicodeBlock SYRIAC =
1259
            new UnicodeBlock("SYRIAC");
1260

1261
        /**
1262
         * Constant for the "Thaana" Unicode character block.
1263
         * @since 1.4
1264
         */
1265
        public static final UnicodeBlock THAANA =
1266
            new UnicodeBlock("THAANA");
1267

1268
        /**
1269
         * Constant for the "Sinhala" Unicode character block.
1270
         * @since 1.4
1271
         */
1272
        public static final UnicodeBlock SINHALA =
1273
            new UnicodeBlock("SINHALA");
1274

1275
        /**
1276
         * Constant for the "Myanmar" Unicode character block.
1277
         * @since 1.4
1278
         */
1279
        public static final UnicodeBlock MYANMAR =
1280
            new UnicodeBlock("MYANMAR");
1281

1282
        /**
1283
         * Constant for the "Ethiopic" Unicode character block.
1284
         * @since 1.4
1285
         */
1286
        public static final UnicodeBlock ETHIOPIC =
1287
            new UnicodeBlock("ETHIOPIC");
1288

1289
        /**
1290
         * Constant for the "Cherokee" Unicode character block.
1291
         * @since 1.4
1292
         */
1293
        public static final UnicodeBlock CHEROKEE =
1294
            new UnicodeBlock("CHEROKEE");
1295

1296
        /**
1297
         * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
1298
         * @since 1.4
1299
         */
1300
        public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
1301
            new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1302
                             "UNIFIED CANADIAN ABORIGINAL SYLLABICS",
1303
                             "UNIFIEDCANADIANABORIGINALSYLLABICS");
1304

1305
        /**
1306
         * Constant for the "Ogham" Unicode character block.
1307
         * @since 1.4
1308
         */
1309
        public static final UnicodeBlock OGHAM =
1310
            new UnicodeBlock("OGHAM");
1311

1312
        /**
1313
         * Constant for the "Runic" Unicode character block.
1314
         * @since 1.4
1315
         */
1316
        public static final UnicodeBlock RUNIC =
1317
            new UnicodeBlock("RUNIC");
1318

1319
        /**
1320
         * Constant for the "Khmer" Unicode character block.
1321
         * @since 1.4
1322
         */
1323
        public static final UnicodeBlock KHMER =
1324
            new UnicodeBlock("KHMER");
1325

1326
        /**
1327
         * Constant for the "Mongolian" Unicode character block.
1328
         * @since 1.4
1329
         */
1330
        public static final UnicodeBlock MONGOLIAN =
1331
            new UnicodeBlock("MONGOLIAN");
1332

1333
        /**
1334
         * Constant for the "Braille Patterns" Unicode character block.
1335
         * @since 1.4
1336
         */
1337
        public static final UnicodeBlock BRAILLE_PATTERNS =
1338
            new UnicodeBlock("BRAILLE_PATTERNS",
1339
                             "BRAILLE PATTERNS",
1340
                             "BRAILLEPATTERNS");
1341

1342
        /**
1343
         * Constant for the "CJK Radicals Supplement" Unicode character block.
1344
         * @since 1.4
1345
         */
1346
        public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
1347
            new UnicodeBlock("CJK_RADICALS_SUPPLEMENT",
1348
                             "CJK RADICALS SUPPLEMENT",
1349
                             "CJKRADICALSSUPPLEMENT");
1350

1351
        /**
1352
         * Constant for the "Kangxi Radicals" Unicode character block.
1353
         * @since 1.4
1354
         */
1355
        public static final UnicodeBlock KANGXI_RADICALS =
1356
            new UnicodeBlock("KANGXI_RADICALS",
1357
                             "KANGXI RADICALS",
1358
                             "KANGXIRADICALS");
1359

1360
        /**
1361
         * Constant for the "Ideographic Description Characters" Unicode character block.
1362
         * @since 1.4
1363
         */
1364
        public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
1365
            new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1366
                             "IDEOGRAPHIC DESCRIPTION CHARACTERS",
1367
                             "IDEOGRAPHICDESCRIPTIONCHARACTERS");
1368

1369
        /**
1370
         * Constant for the "Bopomofo Extended" Unicode character block.
1371
         * @since 1.4
1372
         */
1373
        public static final UnicodeBlock BOPOMOFO_EXTENDED =
1374
            new UnicodeBlock("BOPOMOFO_EXTENDED",
1375
                             "BOPOMOFO EXTENDED",
1376
                             "BOPOMOFOEXTENDED");
1377

1378
        /**
1379
         * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
1380
         * @since 1.4
1381
         */
1382
        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
1383
            new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1384
                             "CJK UNIFIED IDEOGRAPHS EXTENSION A",
1385
                             "CJKUNIFIEDIDEOGRAPHSEXTENSIONA");
1386

1387
        /**
1388
         * Constant for the "Yi Syllables" Unicode character block.
1389
         * @since 1.4
1390
         */
1391
        public static final UnicodeBlock YI_SYLLABLES =
1392
            new UnicodeBlock("YI_SYLLABLES",
1393
                             "YI SYLLABLES",
1394
                             "YISYLLABLES");
1395

1396
        /**
1397
         * Constant for the "Yi Radicals" Unicode character block.
1398
         * @since 1.4
1399
         */
1400
        public static final UnicodeBlock YI_RADICALS =
1401
            new UnicodeBlock("YI_RADICALS",
1402
                             "YI RADICALS",
1403
                             "YIRADICALS");
1404

1405
        /**
1406
         * Constant for the "Cyrillic Supplementary" Unicode character block.
1407
         * @since 1.5
1408
         */
1409
        public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
1410
            new UnicodeBlock("CYRILLIC_SUPPLEMENTARY",
1411
                             "CYRILLIC SUPPLEMENTARY",
1412
                             "CYRILLICSUPPLEMENTARY",
1413
                             "CYRILLIC SUPPLEMENT",
1414
                             "CYRILLICSUPPLEMENT");
1415

1416
        /**
1417
         * Constant for the "Tagalog" Unicode character block.
1418
         * @since 1.5
1419
         */
1420
        public static final UnicodeBlock TAGALOG =
1421
            new UnicodeBlock("TAGALOG");
1422

1423
        /**
1424
         * Constant for the "Hanunoo" Unicode character block.
1425
         * @since 1.5
1426
         */
1427
        public static final UnicodeBlock HANUNOO =
1428
            new UnicodeBlock("HANUNOO");
1429

1430
        /**
1431
         * Constant for the "Buhid" Unicode character block.
1432
         * @since 1.5
1433
         */
1434
        public static final UnicodeBlock BUHID =
1435
            new UnicodeBlock("BUHID");
1436

1437
        /**
1438
         * Constant for the "Tagbanwa" Unicode character block.
1439
         * @since 1.5
1440
         */
1441
        public static final UnicodeBlock TAGBANWA =
1442
            new UnicodeBlock("TAGBANWA");
1443

1444
        /**
1445
         * Constant for the "Limbu" Unicode character block.
1446
         * @since 1.5
1447
         */
1448
        public static final UnicodeBlock LIMBU =
1449
            new UnicodeBlock("LIMBU");
1450

1451
        /**
1452
         * Constant for the "Tai Le" Unicode character block.
1453
         * @since 1.5
1454
         */
1455
        public static final UnicodeBlock TAI_LE =
1456
            new UnicodeBlock("TAI_LE",
1457
                             "TAI LE",
1458
                             "TAILE");
1459

1460
        /**
1461
         * Constant for the "Khmer Symbols" Unicode character block.
1462
         * @since 1.5
1463
         */
1464
        public static final UnicodeBlock KHMER_SYMBOLS =
1465
            new UnicodeBlock("KHMER_SYMBOLS",
1466
                             "KHMER SYMBOLS",
1467
                             "KHMERSYMBOLS");
1468

1469
        /**
1470
         * Constant for the "Phonetic Extensions" Unicode character block.
1471
         * @since 1.5
1472
         */
1473
        public static final UnicodeBlock PHONETIC_EXTENSIONS =
1474
            new UnicodeBlock("PHONETIC_EXTENSIONS",
1475
                             "PHONETIC EXTENSIONS",
1476
                             "PHONETICEXTENSIONS");
1477

1478
        /**
1479
         * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
1480
         * @since 1.5
1481
         */
1482
        public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
1483
            new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1484
                             "MISCELLANEOUS MATHEMATICAL SYMBOLS-A",
1485
                             "MISCELLANEOUSMATHEMATICALSYMBOLS-A");
1486

1487
        /**
1488
         * Constant for the "Supplemental Arrows-A" Unicode character block.
1489
         * @since 1.5
1490
         */
1491
        public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
1492
            new UnicodeBlock("SUPPLEMENTAL_ARROWS_A",
1493
                             "SUPPLEMENTAL ARROWS-A",
1494
                             "SUPPLEMENTALARROWS-A");
1495

1496
        /**
1497
         * Constant for the "Supplemental Arrows-B" Unicode character block.
1498
         * @since 1.5
1499
         */
1500
        public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
1501
            new UnicodeBlock("SUPPLEMENTAL_ARROWS_B",
1502
                             "SUPPLEMENTAL ARROWS-B",
1503
                             "SUPPLEMENTALARROWS-B");
1504

1505
        /**
1506
         * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode
1507
         * character block.
1508
         * @since 1.5
1509
         */
1510
        public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B =
1511
            new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1512
                             "MISCELLANEOUS MATHEMATICAL SYMBOLS-B",
1513
                             "MISCELLANEOUSMATHEMATICALSYMBOLS-B");
1514

1515
        /**
1516
         * Constant for the "Supplemental Mathematical Operators" Unicode
1517
         * character block.
1518
         * @since 1.5
1519
         */
1520
        public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
1521
            new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1522
                             "SUPPLEMENTAL MATHEMATICAL OPERATORS",
1523
                             "SUPPLEMENTALMATHEMATICALOPERATORS");
1524

1525
        /**
1526
         * Constant for the "Miscellaneous Symbols and Arrows" Unicode character
1527
         * block.
1528
         * @since 1.5
1529
         */
1530
        public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
1531
            new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1532
                             "MISCELLANEOUS SYMBOLS AND ARROWS",
1533
                             "MISCELLANEOUSSYMBOLSANDARROWS");
1534

1535
        /**
1536
         * Constant for the "Katakana Phonetic Extensions" Unicode character
1537
         * block.
1538
         * @since 1.5
1539
         */
1540
        public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
1541
            new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS",
1542
                             "KATAKANA PHONETIC EXTENSIONS",
1543
                             "KATAKANAPHONETICEXTENSIONS");
1544

1545
        /**
1546
         * Constant for the "Yijing Hexagram Symbols" Unicode character block.
1547
         * @since 1.5
1548
         */
1549
        public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
1550
            new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS",
1551
                             "YIJING HEXAGRAM SYMBOLS",
1552
                             "YIJINGHEXAGRAMSYMBOLS");
1553

1554
        /**
1555
         * Constant for the "Variation Selectors" Unicode character block.
1556
         * @since 1.5
1557
         */
1558
        public static final UnicodeBlock VARIATION_SELECTORS =
1559
            new UnicodeBlock("VARIATION_SELECTORS",
1560
                             "VARIATION SELECTORS",
1561
                             "VARIATIONSELECTORS");
1562

1563
        /**
1564
         * Constant for the "Linear B Syllabary" Unicode character block.
1565
         * @since 1.5
1566
         */
1567
        public static final UnicodeBlock LINEAR_B_SYLLABARY =
1568
            new UnicodeBlock("LINEAR_B_SYLLABARY",
1569
                             "LINEAR B SYLLABARY",
1570
                             "LINEARBSYLLABARY");
1571

1572
        /**
1573
         * Constant for the "Linear B Ideograms" Unicode character block.
1574
         * @since 1.5
1575
         */
1576
        public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
1577
            new UnicodeBlock("LINEAR_B_IDEOGRAMS",
1578
                             "LINEAR B IDEOGRAMS",
1579
                             "LINEARBIDEOGRAMS");
1580

1581
        /**
1582
         * Constant for the "Aegean Numbers" Unicode character block.
1583
         * @since 1.5
1584
         */
1585
        public static final UnicodeBlock AEGEAN_NUMBERS =
1586
            new UnicodeBlock("AEGEAN_NUMBERS",
1587
                             "AEGEAN NUMBERS",
1588
                             "AEGEANNUMBERS");
1589

1590
        /**
1591
         * Constant for the "Old Italic" Unicode character block.
1592
         * @since 1.5
1593
         */
1594
        public static final UnicodeBlock OLD_ITALIC =
1595
            new UnicodeBlock("OLD_ITALIC",
1596
                             "OLD ITALIC",
1597
                             "OLDITALIC");
1598

1599
        /**
1600
         * Constant for the "Gothic" Unicode character block.
1601
         * @since 1.5
1602
         */
1603
        public static final UnicodeBlock GOTHIC =
1604
            new UnicodeBlock("GOTHIC");
1605

1606
        /**
1607
         * Constant for the "Ugaritic" Unicode character block.
1608
         * @since 1.5
1609
         */
1610
        public static final UnicodeBlock UGARITIC =
1611
            new UnicodeBlock("UGARITIC");
1612

1613
        /**
1614
         * Constant for the "Deseret" Unicode character block.
1615
         * @since 1.5
1616
         */
1617
        public static final UnicodeBlock DESERET =
1618
            new UnicodeBlock("DESERET");
1619

1620
        /**
1621
         * Constant for the "Shavian" Unicode character block.
1622
         * @since 1.5
1623
         */
1624
        public static final UnicodeBlock SHAVIAN =
1625
            new UnicodeBlock("SHAVIAN");
1626

1627
        /**
1628
         * Constant for the "Osmanya" Unicode character block.
1629
         * @since 1.5
1630
         */
1631
        public static final UnicodeBlock OSMANYA =
1632
            new UnicodeBlock("OSMANYA");
1633

1634
        /**
1635
         * Constant for the "Cypriot Syllabary" Unicode character block.
1636
         * @since 1.5
1637
         */
1638
        public static final UnicodeBlock CYPRIOT_SYLLABARY =
1639
            new UnicodeBlock("CYPRIOT_SYLLABARY",
1640
                             "CYPRIOT SYLLABARY",
1641
                             "CYPRIOTSYLLABARY");
1642

1643
        /**
1644
         * Constant for the "Byzantine Musical Symbols" Unicode character block.
1645
         * @since 1.5
1646
         */
1647
        public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
1648
            new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS",
1649
                             "BYZANTINE MUSICAL SYMBOLS",
1650
                             "BYZANTINEMUSICALSYMBOLS");
1651

1652
        /**
1653
         * Constant for the "Musical Symbols" Unicode character block.
1654
         * @since 1.5
1655
         */
1656
        public static final UnicodeBlock MUSICAL_SYMBOLS =
1657
            new UnicodeBlock("MUSICAL_SYMBOLS",
1658
                             "MUSICAL SYMBOLS",
1659
                             "MUSICALSYMBOLS");
1660

1661
        /**
1662
         * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
1663
         * @since 1.5
1664
         */
1665
        public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
1666
            new UnicodeBlock("TAI_XUAN_JING_SYMBOLS",
1667
                             "TAI XUAN JING SYMBOLS",
1668
                             "TAIXUANJINGSYMBOLS");
1669

1670
        /**
1671
         * Constant for the "Mathematical Alphanumeric Symbols" Unicode
1672
         * character block.
1673
         * @since 1.5
1674
         */
1675
        public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
1676
            new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1677
                             "MATHEMATICAL ALPHANUMERIC SYMBOLS",
1678
                             "MATHEMATICALALPHANUMERICSYMBOLS");
1679

1680
        /**
1681
         * Constant for the "CJK Unified Ideographs Extension B" Unicode
1682
         * character block.
1683
         * @since 1.5
1684
         */
1685
        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
1686
            new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1687
                             "CJK UNIFIED IDEOGRAPHS EXTENSION B",
1688
                             "CJKUNIFIEDIDEOGRAPHSEXTENSIONB");
1689

1690
        /**
1691
         * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
1692
         * @since 1.5
1693
         */
1694
        public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
1695
            new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1696
                             "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT",
1697
                             "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT");
1698

1699
        /**
1700
         * Constant for the "Tags" Unicode character block.
1701
         * @since 1.5
1702
         */
1703
        public static final UnicodeBlock TAGS =
1704
            new UnicodeBlock("TAGS");
1705

1706
        /**
1707
         * Constant for the "Variation Selectors Supplement" Unicode character
1708
         * block.
1709
         * @since 1.5
1710
         */
1711
        public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
1712
            new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT",
1713
                             "VARIATION SELECTORS SUPPLEMENT",
1714
                             "VARIATIONSELECTORSSUPPLEMENT");
1715

1716
        /**
1717
         * Constant for the "Supplementary Private Use Area-A" Unicode character
1718
         * block.
1719
         * @since 1.5
1720
         */
1721
        public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
1722
            new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1723
                             "SUPPLEMENTARY PRIVATE USE AREA-A",
1724
                             "SUPPLEMENTARYPRIVATEUSEAREA-A");
1725

1726
        /**
1727
         * Constant for the "Supplementary Private Use Area-B" Unicode character
1728
         * block.
1729
         * @since 1.5
1730
         */
1731
        public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
1732
            new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1733
                             "SUPPLEMENTARY PRIVATE USE AREA-B",
1734
                             "SUPPLEMENTARYPRIVATEUSEAREA-B");
1735

1736
        /**
1737
         * Constant for the "High Surrogates" Unicode character block.
1738
         * This block represents codepoint values in the high surrogate
1739
         * range: U+D800 through U+DB7F
1740
         *
1741
         * @since 1.5
1742
         */
1743
        public static final UnicodeBlock HIGH_SURROGATES =
1744
            new UnicodeBlock("HIGH_SURROGATES",
1745
                             "HIGH SURROGATES",
1746
                             "HIGHSURROGATES");
1747

1748
        /**
1749
         * Constant for the "High Private Use Surrogates" Unicode character
1750
         * block.
1751
         * This block represents codepoint values in the private use high
1752
         * surrogate range: U+DB80 through U+DBFF
1753
         *
1754
         * @since 1.5
1755
         */
1756
        public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
1757
            new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES",
1758
                             "HIGH PRIVATE USE SURROGATES",
1759
                             "HIGHPRIVATEUSESURROGATES");
1760

1761
        /**
1762
         * Constant for the "Low Surrogates" Unicode character block.
1763
         * This block represents codepoint values in the low surrogate
1764
         * range: U+DC00 through U+DFFF
1765
         *
1766
         * @since 1.5
1767
         */
1768
        public static final UnicodeBlock LOW_SURROGATES =
1769
            new UnicodeBlock("LOW_SURROGATES",
1770
                             "LOW SURROGATES",
1771
                             "LOWSURROGATES");
1772

1773
        /**
1774
         * Constant for the "Arabic Supplement" Unicode character block.
1775
         * @since 1.7
1776
         */
1777
        public static final UnicodeBlock ARABIC_SUPPLEMENT =
1778
            new UnicodeBlock("ARABIC_SUPPLEMENT",
1779
                             "ARABIC SUPPLEMENT",
1780
                             "ARABICSUPPLEMENT");
1781

1782
        /**
1783
         * Constant for the "NKo" Unicode character block.
1784
         * @since 1.7
1785
         */
1786
        public static final UnicodeBlock NKO =
1787
            new UnicodeBlock("NKO");
1788

1789
        /**
1790
         * Constant for the "Samaritan" Unicode character block.
1791
         * @since 1.7
1792
         */
1793
        public static final UnicodeBlock SAMARITAN =
1794
            new UnicodeBlock("SAMARITAN");
1795

1796
        /**
1797
         * Constant for the "Mandaic" Unicode character block.
1798
         * @since 1.7
1799
         */
1800
        public static final UnicodeBlock MANDAIC =
1801
            new UnicodeBlock("MANDAIC");
1802

1803
        /**
1804
         * Constant for the "Ethiopic Supplement" Unicode character block.
1805
         * @since 1.7
1806
         */
1807
        public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1808
            new UnicodeBlock("ETHIOPIC_SUPPLEMENT",
1809
                             "ETHIOPIC SUPPLEMENT",
1810
                             "ETHIOPICSUPPLEMENT");
1811

1812
        /**
1813
         * Constant for the "Unified Canadian Aboriginal Syllabics Extended"
1814
         * Unicode character block.
1815
         * @since 1.7
1816
         */
1817
        public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
1818
            new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
1819
                             "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED",
1820
                             "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED");
1821

1822
        /**
1823
         * Constant for the "New Tai Lue" Unicode character block.
1824
         * @since 1.7
1825
         */
1826
        public static final UnicodeBlock NEW_TAI_LUE =
1827
            new UnicodeBlock("NEW_TAI_LUE",
1828
                             "NEW TAI LUE",
1829
                             "NEWTAILUE");
1830

1831
        /**
1832
         * Constant for the "Buginese" Unicode character block.
1833
         * @since 1.7
1834
         */
1835
        public static final UnicodeBlock BUGINESE =
1836
            new UnicodeBlock("BUGINESE");
1837

1838
        /**
1839
         * Constant for the "Tai Tham" Unicode character block.
1840
         * @since 1.7
1841
         */
1842
        public static final UnicodeBlock TAI_THAM =
1843
            new UnicodeBlock("TAI_THAM",
1844
                             "TAI THAM",
1845
                             "TAITHAM");
1846

1847
        /**
1848
         * Constant for the "Balinese" Unicode character block.
1849
         * @since 1.7
1850
         */
1851
        public static final UnicodeBlock BALINESE =
1852
            new UnicodeBlock("BALINESE");
1853

1854
        /**
1855
         * Constant for the "Sundanese" Unicode character block.
1856
         * @since 1.7
1857
         */
1858
        public static final UnicodeBlock SUNDANESE =
1859
            new UnicodeBlock("SUNDANESE");
1860

1861
        /**
1862
         * Constant for the "Batak" Unicode character block.
1863
         * @since 1.7
1864
         */
1865
        public static final UnicodeBlock BATAK =
1866
            new UnicodeBlock("BATAK");
1867

1868
        /**
1869
         * Constant for the "Lepcha" Unicode character block.
1870
         * @since 1.7
1871
         */
1872
        public static final UnicodeBlock LEPCHA =
1873
            new UnicodeBlock("LEPCHA");
1874

1875
        /**
1876
         * Constant for the "Ol Chiki" Unicode character block.
1877
         * @since 1.7
1878
         */
1879
        public static final UnicodeBlock OL_CHIKI =
1880
            new UnicodeBlock("OL_CHIKI",
1881
                             "OL CHIKI",
1882
                             "OLCHIKI");
1883

1884
        /**
1885
         * Constant for the "Vedic Extensions" Unicode character block.
1886
         * @since 1.7
1887
         */
1888
        public static final UnicodeBlock VEDIC_EXTENSIONS =
1889
            new UnicodeBlock("VEDIC_EXTENSIONS",
1890
                             "VEDIC EXTENSIONS",
1891
                             "VEDICEXTENSIONS");
1892

1893
        /**
1894
         * Constant for the "Phonetic Extensions Supplement" Unicode character
1895
         * block.
1896
         * @since 1.7
1897
         */
1898
        public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
1899
            new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
1900
                             "PHONETIC EXTENSIONS SUPPLEMENT",
1901
                             "PHONETICEXTENSIONSSUPPLEMENT");
1902

1903
        /**
1904
         * Constant for the "Combining Diacritical Marks Supplement" Unicode
1905
         * character block.
1906
         * @since 1.7
1907
         */
1908
        public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
1909
            new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
1910
                             "COMBINING DIACRITICAL MARKS SUPPLEMENT",
1911
                             "COMBININGDIACRITICALMARKSSUPPLEMENT");
1912

1913
        /**
1914
         * Constant for the "Glagolitic" Unicode character block.
1915
         * @since 1.7
1916
         */
1917
        public static final UnicodeBlock GLAGOLITIC =
1918
            new UnicodeBlock("GLAGOLITIC");
1919

1920
        /**
1921
         * Constant for the "Latin Extended-C" Unicode character block.
1922
         * @since 1.7
1923
         */
1924
        public static final UnicodeBlock LATIN_EXTENDED_C =
1925
            new UnicodeBlock("LATIN_EXTENDED_C",
1926
                             "LATIN EXTENDED-C",
1927
                             "LATINEXTENDED-C");
1928

1929
        /**
1930
         * Constant for the "Coptic" Unicode character block.
1931
         * @since 1.7
1932
         */
1933
        public static final UnicodeBlock COPTIC =
1934
            new UnicodeBlock("COPTIC");
1935

1936
        /**
1937
         * Constant for the "Georgian Supplement" Unicode character block.
1938
         * @since 1.7
1939
         */
1940
        public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
1941
            new UnicodeBlock("GEORGIAN_SUPPLEMENT",
1942
                             "GEORGIAN SUPPLEMENT",
1943
                             "GEORGIANSUPPLEMENT");
1944

1945
        /**
1946
         * Constant for the "Tifinagh" Unicode character block.
1947
         * @since 1.7
1948
         */
1949
        public static final UnicodeBlock TIFINAGH =
1950
            new UnicodeBlock("TIFINAGH");
1951

1952
        /**
1953
         * Constant for the "Ethiopic Extended" Unicode character block.
1954
         * @since 1.7
1955
         */
1956
        public static final UnicodeBlock ETHIOPIC_EXTENDED =
1957
            new UnicodeBlock("ETHIOPIC_EXTENDED",
1958
                             "ETHIOPIC EXTENDED",
1959
                             "ETHIOPICEXTENDED");
1960

1961
        /**
1962
         * Constant for the "Cyrillic Extended-A" Unicode character block.
1963
         * @since 1.7
1964
         */
1965
        public static final UnicodeBlock CYRILLIC_EXTENDED_A =
1966
            new UnicodeBlock("CYRILLIC_EXTENDED_A",
1967
                             "CYRILLIC EXTENDED-A",
1968
                             "CYRILLICEXTENDED-A");
1969

1970
        /**
1971
         * Constant for the "Supplemental Punctuation" Unicode character block.
1972
         * @since 1.7
1973
         */
1974
        public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
1975
            new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION",
1976
                             "SUPPLEMENTAL PUNCTUATION",
1977
                             "SUPPLEMENTALPUNCTUATION");
1978

1979
        /**
1980
         * Constant for the "CJK Strokes" Unicode character block.
1981
         * @since 1.7
1982
         */
1983
        public static final UnicodeBlock CJK_STROKES =
1984
            new UnicodeBlock("CJK_STROKES",
1985
                             "CJK STROKES",
1986
                             "CJKSTROKES");
1987

1988
        /**
1989
         * Constant for the "Lisu" Unicode character block.
1990
         * @since 1.7
1991
         */
1992
        public static final UnicodeBlock LISU =
1993
            new UnicodeBlock("LISU");
1994

1995
        /**
1996
         * Constant for the "Vai" Unicode character block.
1997
         * @since 1.7
1998
         */
1999
        public static final UnicodeBlock VAI =
2000
            new UnicodeBlock("VAI");
2001

2002
        /**
2003
         * Constant for the "Cyrillic Extended-B" Unicode character block.
2004
         * @since 1.7
2005
         */
2006
        public static final UnicodeBlock CYRILLIC_EXTENDED_B =
2007
            new UnicodeBlock("CYRILLIC_EXTENDED_B",
2008
                             "CYRILLIC EXTENDED-B",
2009
                             "CYRILLICEXTENDED-B");
2010

2011
        /**
2012
         * Constant for the "Bamum" Unicode character block.
2013
         * @since 1.7
2014
         */
2015
        public static final UnicodeBlock BAMUM =
2016
            new UnicodeBlock("BAMUM");
2017

2018
        /**
2019
         * Constant for the "Modifier Tone Letters" Unicode character block.
2020
         * @since 1.7
2021
         */
2022
        public static final UnicodeBlock MODIFIER_TONE_LETTERS =
2023
            new UnicodeBlock("MODIFIER_TONE_LETTERS",
2024
                             "MODIFIER TONE LETTERS",
2025
                             "MODIFIERTONELETTERS");
2026

2027
        /**
2028
         * Constant for the "Latin Extended-D" Unicode character block.
2029
         * @since 1.7
2030
         */
2031
        public static final UnicodeBlock LATIN_EXTENDED_D =
2032
            new UnicodeBlock("LATIN_EXTENDED_D",
2033
                             "LATIN EXTENDED-D",
2034
                             "LATINEXTENDED-D");
2035

2036
        /**
2037
         * Constant for the "Syloti Nagri" Unicode character block.
2038
         * @since 1.7
2039
         */
2040
        public static final UnicodeBlock SYLOTI_NAGRI =
2041
            new UnicodeBlock("SYLOTI_NAGRI",
2042
                             "SYLOTI NAGRI",
2043
                             "SYLOTINAGRI");
2044

2045
        /**
2046
         * Constant for the "Common Indic Number Forms" Unicode character block.
2047
         * @since 1.7
2048
         */
2049
        public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
2050
            new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS",
2051
                             "COMMON INDIC NUMBER FORMS",
2052
                             "COMMONINDICNUMBERFORMS");
2053

2054
        /**
2055
         * Constant for the "Phags-pa" Unicode character block.
2056
         * @since 1.7
2057
         */
2058
        public static final UnicodeBlock PHAGS_PA =
2059
            new UnicodeBlock("PHAGS_PA",
2060
                             "PHAGS-PA");
2061

2062
        /**
2063
         * Constant for the "Saurashtra" Unicode character block.
2064
         * @since 1.7
2065
         */
2066
        public static final UnicodeBlock SAURASHTRA =
2067
            new UnicodeBlock("SAURASHTRA");
2068

2069
        /**
2070
         * Constant for the "Devanagari Extended" Unicode character block.
2071
         * @since 1.7
2072
         */
2073
        public static final UnicodeBlock DEVANAGARI_EXTENDED =
2074
            new UnicodeBlock("DEVANAGARI_EXTENDED",
2075
                             "DEVANAGARI EXTENDED",
2076
                             "DEVANAGARIEXTENDED");
2077

2078
        /**
2079
         * Constant for the "Kayah Li" Unicode character block.
2080
         * @since 1.7
2081
         */
2082
        public static final UnicodeBlock KAYAH_LI =
2083
            new UnicodeBlock("KAYAH_LI",
2084
                             "KAYAH LI",
2085
                             "KAYAHLI");
2086

2087
        /**
2088
         * Constant for the "Rejang" Unicode character block.
2089
         * @since 1.7
2090
         */
2091
        public static final UnicodeBlock REJANG =
2092
            new UnicodeBlock("REJANG");
2093

2094
        /**
2095
         * Constant for the "Hangul Jamo Extended-A" Unicode character block.
2096
         * @since 1.7
2097
         */
2098
        public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
2099
            new UnicodeBlock("HANGUL_JAMO_EXTENDED_A",
2100
                             "HANGUL JAMO EXTENDED-A",
2101
                             "HANGULJAMOEXTENDED-A");
2102

2103
        /**
2104
         * Constant for the "Javanese" Unicode character block.
2105
         * @since 1.7
2106
         */
2107
        public static final UnicodeBlock JAVANESE =
2108
            new UnicodeBlock("JAVANESE");
2109

2110
        /**
2111
         * Constant for the "Cham" Unicode character block.
2112
         * @since 1.7
2113
         */
2114
        public static final UnicodeBlock CHAM =
2115
            new UnicodeBlock("CHAM");
2116

2117
        /**
2118
         * Constant for the "Myanmar Extended-A" Unicode character block.
2119
         * @since 1.7
2120
         */
2121
        public static final UnicodeBlock MYANMAR_EXTENDED_A =
2122
            new UnicodeBlock("MYANMAR_EXTENDED_A",
2123
                             "MYANMAR EXTENDED-A",
2124
                             "MYANMAREXTENDED-A");
2125

2126
        /**
2127
         * Constant for the "Tai Viet" Unicode character block.
2128
         * @since 1.7
2129
         */
2130
        public static final UnicodeBlock TAI_VIET =
2131
            new UnicodeBlock("TAI_VIET",
2132
                             "TAI VIET",
2133
                             "TAIVIET");
2134

2135
        /**
2136
         * Constant for the "Ethiopic Extended-A" Unicode character block.
2137
         * @since 1.7
2138
         */
2139
        public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
2140
            new UnicodeBlock("ETHIOPIC_EXTENDED_A",
2141
                             "ETHIOPIC EXTENDED-A",
2142
                             "ETHIOPICEXTENDED-A");
2143

2144
        /**
2145
         * Constant for the "Meetei Mayek" Unicode character block.
2146
         * @since 1.7
2147
         */
2148
        public static final UnicodeBlock MEETEI_MAYEK =
2149
            new UnicodeBlock("MEETEI_MAYEK",
2150
                             "MEETEI MAYEK",
2151
                             "MEETEIMAYEK");
2152

2153
        /**
2154
         * Constant for the "Hangul Jamo Extended-B" Unicode character block.
2155
         * @since 1.7
2156
         */
2157
        public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
2158
            new UnicodeBlock("HANGUL_JAMO_EXTENDED_B",
2159
                             "HANGUL JAMO EXTENDED-B",
2160
                             "HANGULJAMOEXTENDED-B");
2161

2162
        /**
2163
         * Constant for the "Vertical Forms" Unicode character block.
2164
         * @since 1.7
2165
         */
2166
        public static final UnicodeBlock VERTICAL_FORMS =
2167
            new UnicodeBlock("VERTICAL_FORMS",
2168
                             "VERTICAL FORMS",
2169
                             "VERTICALFORMS");
2170

2171
        /**
2172
         * Constant for the "Ancient Greek Numbers" Unicode character block.
2173
         * @since 1.7
2174
         */
2175
        public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
2176
            new UnicodeBlock("ANCIENT_GREEK_NUMBERS",
2177
                             "ANCIENT GREEK NUMBERS",
2178
                             "ANCIENTGREEKNUMBERS");
2179

2180
        /**
2181
         * Constant for the "Ancient Symbols" Unicode character block.
2182
         * @since 1.7
2183
         */
2184
        public static final UnicodeBlock ANCIENT_SYMBOLS =
2185
            new UnicodeBlock("ANCIENT_SYMBOLS",
2186
                             "ANCIENT SYMBOLS",
2187
                             "ANCIENTSYMBOLS");
2188

2189
        /**
2190
         * Constant for the "Phaistos Disc" Unicode character block.
2191
         * @since 1.7
2192
         */
2193
        public static final UnicodeBlock PHAISTOS_DISC =
2194
            new UnicodeBlock("PHAISTOS_DISC",
2195
                             "PHAISTOS DISC",
2196
                             "PHAISTOSDISC");
2197

2198
        /**
2199
         * Constant for the "Lycian" Unicode character block.
2200
         * @since 1.7
2201
         */
2202
        public static final UnicodeBlock LYCIAN =
2203
            new UnicodeBlock("LYCIAN");
2204

2205
        /**
2206
         * Constant for the "Carian" Unicode character block.
2207
         * @since 1.7
2208
         */
2209
        public static final UnicodeBlock CARIAN =
2210
            new UnicodeBlock("CARIAN");
2211

2212
        /**
2213
         * Constant for the "Old Persian" Unicode character block.
2214
         * @since 1.7
2215
         */
2216
        public static final UnicodeBlock OLD_PERSIAN =
2217
            new UnicodeBlock("OLD_PERSIAN",
2218
                             "OLD PERSIAN",
2219
                             "OLDPERSIAN");
2220

2221
        /**
2222
         * Constant for the "Imperial Aramaic" Unicode character block.
2223
         * @since 1.7
2224
         */
2225
        public static final UnicodeBlock IMPERIAL_ARAMAIC =
2226
            new UnicodeBlock("IMPERIAL_ARAMAIC",
2227
                             "IMPERIAL ARAMAIC",
2228
                             "IMPERIALARAMAIC");
2229

2230
        /**
2231
         * Constant for the "Phoenician" Unicode character block.
2232
         * @since 1.7
2233
         */
2234
        public static final UnicodeBlock PHOENICIAN =
2235
            new UnicodeBlock("PHOENICIAN");
2236

2237
        /**
2238
         * Constant for the "Lydian" Unicode character block.
2239
         * @since 1.7
2240
         */
2241
        public static final UnicodeBlock LYDIAN =
2242
            new UnicodeBlock("LYDIAN");
2243

2244
        /**
2245
         * Constant for the "Kharoshthi" Unicode character block.
2246
         * @since 1.7
2247
         */
2248
        public static final UnicodeBlock KHAROSHTHI =
2249
            new UnicodeBlock("KHAROSHTHI");
2250

2251
        /**
2252
         * Constant for the "Old South Arabian" Unicode character block.
2253
         * @since 1.7
2254
         */
2255
        public static final UnicodeBlock OLD_SOUTH_ARABIAN =
2256
            new UnicodeBlock("OLD_SOUTH_ARABIAN",
2257
                             "OLD SOUTH ARABIAN",
2258
                             "OLDSOUTHARABIAN");
2259

2260
        /**
2261
         * Constant for the "Avestan" Unicode character block.
2262
         * @since 1.7
2263
         */
2264
        public static final UnicodeBlock AVESTAN =
2265
            new UnicodeBlock("AVESTAN");
2266

2267
        /**
2268
         * Constant for the "Inscriptional Parthian" Unicode character block.
2269
         * @since 1.7
2270
         */
2271
        public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
2272
            new UnicodeBlock("INSCRIPTIONAL_PARTHIAN",
2273
                             "INSCRIPTIONAL PARTHIAN",
2274
                             "INSCRIPTIONALPARTHIAN");
2275

2276
        /**
2277
         * Constant for the "Inscriptional Pahlavi" Unicode character block.
2278
         * @since 1.7
2279
         */
2280
        public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
2281
            new UnicodeBlock("INSCRIPTIONAL_PAHLAVI",
2282
                             "INSCRIPTIONAL PAHLAVI",
2283
                             "INSCRIPTIONALPAHLAVI");
2284

2285
        /**
2286
         * Constant for the "Old Turkic" Unicode character block.
2287
         * @since 1.7
2288
         */
2289
        public static final UnicodeBlock OLD_TURKIC =
2290
            new UnicodeBlock("OLD_TURKIC",
2291
                             "OLD TURKIC",
2292
                             "OLDTURKIC");
2293

2294
        /**
2295
         * Constant for the "Rumi Numeral Symbols" Unicode character block.
2296
         * @since 1.7
2297
         */
2298
        public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
2299
            new UnicodeBlock("RUMI_NUMERAL_SYMBOLS",
2300
                             "RUMI NUMERAL SYMBOLS",
2301
                             "RUMINUMERALSYMBOLS");
2302

2303
        /**
2304
         * Constant for the "Brahmi" Unicode character block.
2305
         * @since 1.7
2306
         */
2307
        public static final UnicodeBlock BRAHMI =
2308
            new UnicodeBlock("BRAHMI");
2309

2310
        /**
2311
         * Constant for the "Kaithi" Unicode character block.
2312
         * @since 1.7
2313
         */
2314
        public static final UnicodeBlock KAITHI =
2315
            new UnicodeBlock("KAITHI");
2316

2317
        /**
2318
         * Constant for the "Cuneiform" Unicode character block.
2319
         * @since 1.7
2320
         */
2321
        public static final UnicodeBlock CUNEIFORM =
2322
            new UnicodeBlock("CUNEIFORM");
2323

2324
        /**
2325
         * Constant for the "Cuneiform Numbers and Punctuation" Unicode
2326
         * character block.
2327
         * @since 1.7
2328
         */
2329
        public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
2330
            new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
2331
                             "CUNEIFORM NUMBERS AND PUNCTUATION",
2332
                             "CUNEIFORMNUMBERSANDPUNCTUATION");
2333

2334
        /**
2335
         * Constant for the "Egyptian Hieroglyphs" Unicode character block.
2336
         * @since 1.7
2337
         */
2338
        public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
2339
            new UnicodeBlock("EGYPTIAN_HIEROGLYPHS",
2340
                             "EGYPTIAN HIEROGLYPHS",
2341
                             "EGYPTIANHIEROGLYPHS");
2342

2343
        /**
2344
         * Constant for the "Bamum Supplement" Unicode character block.
2345
         * @since 1.7
2346
         */
2347
        public static final UnicodeBlock BAMUM_SUPPLEMENT =
2348
            new UnicodeBlock("BAMUM_SUPPLEMENT",
2349
                             "BAMUM SUPPLEMENT",
2350
                             "BAMUMSUPPLEMENT");
2351

2352
        /**
2353
         * Constant for the "Kana Supplement" Unicode character block.
2354
         * @since 1.7
2355
         */
2356
        public static final UnicodeBlock KANA_SUPPLEMENT =
2357
            new UnicodeBlock("KANA_SUPPLEMENT",
2358
                             "KANA SUPPLEMENT",
2359
                             "KANASUPPLEMENT");
2360

2361
        /**
2362
         * Constant for the "Ancient Greek Musical Notation" Unicode character
2363
         * block.
2364
         * @since 1.7
2365
         */
2366
        public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
2367
            new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
2368
                             "ANCIENT GREEK MUSICAL NOTATION",
2369
                             "ANCIENTGREEKMUSICALNOTATION");
2370

2371
        /**
2372
         * Constant for the "Counting Rod Numerals" Unicode character block.
2373
         * @since 1.7
2374
         */
2375
        public static final UnicodeBlock COUNTING_ROD_NUMERALS =
2376
            new UnicodeBlock("COUNTING_ROD_NUMERALS",
2377
                             "COUNTING ROD NUMERALS",
2378
                             "COUNTINGRODNUMERALS");
2379

2380
        /**
2381
         * Constant for the "Mahjong Tiles" Unicode character block.
2382
         * @since 1.7
2383
         */
2384
        public static final UnicodeBlock MAHJONG_TILES =
2385
            new UnicodeBlock("MAHJONG_TILES",
2386
                             "MAHJONG TILES",
2387
                             "MAHJONGTILES");
2388

2389
        /**
2390
         * Constant for the "Domino Tiles" Unicode character block.
2391
         * @since 1.7
2392
         */
2393
        public static final UnicodeBlock DOMINO_TILES =
2394
            new UnicodeBlock("DOMINO_TILES",
2395
                             "DOMINO TILES",
2396
                             "DOMINOTILES");
2397

2398
        /**
2399
         * Constant for the "Playing Cards" Unicode character block.
2400
         * @since 1.7
2401
         */
2402
        public static final UnicodeBlock PLAYING_CARDS =
2403
            new UnicodeBlock("PLAYING_CARDS",
2404
                             "PLAYING CARDS",
2405
                             "PLAYINGCARDS");
2406

2407
        /**
2408
         * Constant for the "Enclosed Alphanumeric Supplement" Unicode character
2409
         * block.
2410
         * @since 1.7
2411
         */
2412
        public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
2413
            new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
2414
                             "ENCLOSED ALPHANUMERIC SUPPLEMENT",
2415
                             "ENCLOSEDALPHANUMERICSUPPLEMENT");
2416

2417
        /**
2418
         * Constant for the "Enclosed Ideographic Supplement" Unicode character
2419
         * block.
2420
         * @since 1.7
2421
         */
2422
        public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
2423
            new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
2424
                             "ENCLOSED IDEOGRAPHIC SUPPLEMENT",
2425
                             "ENCLOSEDIDEOGRAPHICSUPPLEMENT");
2426

2427
        /**
2428
         * Constant for the "Miscellaneous Symbols And Pictographs" Unicode
2429
         * character block.
2430
         * @since 1.7
2431
         */
2432
        public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
2433
            new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
2434
                             "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS",
2435
                             "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS");
2436

2437
        /**
2438
         * Constant for the "Emoticons" Unicode character block.
2439
         * @since 1.7
2440
         */
2441
        public static final UnicodeBlock EMOTICONS =
2442
            new UnicodeBlock("EMOTICONS");
2443

2444
        /**
2445
         * Constant for the "Transport And Map Symbols" Unicode character block.
2446
         * @since 1.7
2447
         */
2448
        public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
2449
            new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS",
2450
                             "TRANSPORT AND MAP SYMBOLS",
2451
                             "TRANSPORTANDMAPSYMBOLS");
2452

2453
        /**
2454
         * Constant for the "Alchemical Symbols" Unicode character block.
2455
         * @since 1.7
2456
         */
2457
        public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
2458
            new UnicodeBlock("ALCHEMICAL_SYMBOLS",
2459
                             "ALCHEMICAL SYMBOLS",
2460
                             "ALCHEMICALSYMBOLS");
2461

2462
        /**
2463
         * Constant for the "CJK Unified Ideographs Extension C" Unicode
2464
         * character block.
2465
         * @since 1.7
2466
         */
2467
        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
2468
            new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
2469
                             "CJK UNIFIED IDEOGRAPHS EXTENSION C",
2470
                             "CJKUNIFIEDIDEOGRAPHSEXTENSIONC");
2471

2472
        /**
2473
         * Constant for the "CJK Unified Ideographs Extension D" Unicode
2474
         * character block.
2475
         * @since 1.7
2476
         */
2477
        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
2478
            new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
2479
                             "CJK UNIFIED IDEOGRAPHS EXTENSION D",
2480
                             "CJKUNIFIEDIDEOGRAPHSEXTENSIOND");
2481

2482
        /**
2483
         * Constant for the "Arabic Extended-A" Unicode character block.
2484
         * @since 1.8
2485
         */
2486
        public static final UnicodeBlock ARABIC_EXTENDED_A =
2487
            new UnicodeBlock("ARABIC_EXTENDED_A",
2488
                             "ARABIC EXTENDED-A",
2489
                             "ARABICEXTENDED-A");
2490

2491
        /**
2492
         * Constant for the "Sundanese Supplement" Unicode character block.
2493
         * @since 1.8
2494
         */
2495
        public static final UnicodeBlock SUNDANESE_SUPPLEMENT =
2496
            new UnicodeBlock("SUNDANESE_SUPPLEMENT",
2497
                             "SUNDANESE SUPPLEMENT",
2498
                             "SUNDANESESUPPLEMENT");
2499

2500
        /**
2501
         * Constant for the "Meetei Mayek Extensions" Unicode character block.
2502
         * @since 1.8
2503
         */
2504
        public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS =
2505
            new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS",
2506
                             "MEETEI MAYEK EXTENSIONS",
2507
                             "MEETEIMAYEKEXTENSIONS");
2508

2509
        /**
2510
         * Constant for the "Meroitic Hieroglyphs" Unicode character block.
2511
         * @since 1.8
2512
         */
2513
        public static final UnicodeBlock MEROITIC_HIEROGLYPHS =
2514
            new UnicodeBlock("MEROITIC_HIEROGLYPHS",
2515
                             "MEROITIC HIEROGLYPHS",
2516
                             "MEROITICHIEROGLYPHS");
2517

2518
        /**
2519
         * Constant for the "Meroitic Cursive" Unicode character block.
2520
         * @since 1.8
2521
         */
2522
        public static final UnicodeBlock MEROITIC_CURSIVE =
2523
            new UnicodeBlock("MEROITIC_CURSIVE",
2524
                             "MEROITIC CURSIVE",
2525
                             "MEROITICCURSIVE");
2526

2527
        /**
2528
         * Constant for the "Sora Sompeng" Unicode character block.
2529
         * @since 1.8
2530
         */
2531
        public static final UnicodeBlock SORA_SOMPENG =
2532
            new UnicodeBlock("SORA_SOMPENG",
2533
                             "SORA SOMPENG",
2534
                             "SORASOMPENG");
2535

2536
        /**
2537
         * Constant for the "Chakma" Unicode character block.
2538
         * @since 1.8
2539
         */
2540
        public static final UnicodeBlock CHAKMA =
2541
            new UnicodeBlock("CHAKMA");
2542

2543
        /**
2544
         * Constant for the "Sharada" Unicode character block.
2545
         * @since 1.8
2546
         */
2547
        public static final UnicodeBlock SHARADA =
2548
            new UnicodeBlock("SHARADA");
2549

2550
        /**
2551
         * Constant for the "Takri" Unicode character block.
2552
         * @since 1.8
2553
         */
2554
        public static final UnicodeBlock TAKRI =
2555
            new UnicodeBlock("TAKRI");
2556

2557
        /**
2558
         * Constant for the "Miao" Unicode character block.
2559
         * @since 1.8
2560
         */
2561
        public static final UnicodeBlock MIAO =
2562
            new UnicodeBlock("MIAO");
2563

2564
        /**
2565
         * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode
2566
         * character block.
2567
         * @since 1.8
2568
         */
2569
        public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
2570
            new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS",
2571
                             "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS",
2572
                             "ARABICMATHEMATICALALPHABETICSYMBOLS");
2573

2574
        private static final int blockStarts[] = {
2575
            0x0000,   // 0000..007F; Basic Latin
2576
            0x0080,   // 0080..00FF; Latin-1 Supplement
2577
            0x0100,   // 0100..017F; Latin Extended-A
2578
            0x0180,   // 0180..024F; Latin Extended-B
2579
            0x0250,   // 0250..02AF; IPA Extensions
2580
            0x02B0,   // 02B0..02FF; Spacing Modifier Letters
2581
            0x0300,   // 0300..036F; Combining Diacritical Marks
2582
            0x0370,   // 0370..03FF; Greek and Coptic
2583
            0x0400,   // 0400..04FF; Cyrillic
2584
            0x0500,   // 0500..052F; Cyrillic Supplement
2585
            0x0530,   // 0530..058F; Armenian
2586
            0x0590,   // 0590..05FF; Hebrew
2587
            0x0600,   // 0600..06FF; Arabic
2588
            0x0700,   // 0700..074F; Syriac
2589
            0x0750,   // 0750..077F; Arabic Supplement
2590
            0x0780,   // 0780..07BF; Thaana
2591
            0x07C0,   // 07C0..07FF; NKo
2592
            0x0800,   // 0800..083F; Samaritan
2593
            0x0840,   // 0840..085F; Mandaic
2594
            0x0860,   //             unassigned
2595
            0x08A0,   // 08A0..08FF; Arabic Extended-A
2596
            0x0900,   // 0900..097F; Devanagari
2597
            0x0980,   // 0980..09FF; Bengali
2598
            0x0A00,   // 0A00..0A7F; Gurmukhi
2599
            0x0A80,   // 0A80..0AFF; Gujarati
2600
            0x0B00,   // 0B00..0B7F; Oriya
2601
            0x0B80,   // 0B80..0BFF; Tamil
2602
            0x0C00,   // 0C00..0C7F; Telugu
2603
            0x0C80,   // 0C80..0CFF; Kannada
2604
            0x0D00,   // 0D00..0D7F; Malayalam
2605
            0x0D80,   // 0D80..0DFF; Sinhala
2606
            0x0E00,   // 0E00..0E7F; Thai
2607
            0x0E80,   // 0E80..0EFF; Lao
2608
            0x0F00,   // 0F00..0FFF; Tibetan
2609
            0x1000,   // 1000..109F; Myanmar
2610
            0x10A0,   // 10A0..10FF; Georgian
2611
            0x1100,   // 1100..11FF; Hangul Jamo
2612
            0x1200,   // 1200..137F; Ethiopic
2613
            0x1380,   // 1380..139F; Ethiopic Supplement
2614
            0x13A0,   // 13A0..13FF; Cherokee
2615
            0x1400,   // 1400..167F; Unified Canadian Aboriginal Syllabics
2616
            0x1680,   // 1680..169F; Ogham
2617
            0x16A0,   // 16A0..16FF; Runic
2618
            0x1700,   // 1700..171F; Tagalog
2619
            0x1720,   // 1720..173F; Hanunoo
2620
            0x1740,   // 1740..175F; Buhid
2621
            0x1760,   // 1760..177F; Tagbanwa
2622
            0x1780,   // 1780..17FF; Khmer
2623
            0x1800,   // 1800..18AF; Mongolian
2624
            0x18B0,   // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
2625
            0x1900,   // 1900..194F; Limbu
2626
            0x1950,   // 1950..197F; Tai Le
2627
            0x1980,   // 1980..19DF; New Tai Lue
2628
            0x19E0,   // 19E0..19FF; Khmer Symbols
2629
            0x1A00,   // 1A00..1A1F; Buginese
2630
            0x1A20,   // 1A20..1AAF; Tai Tham
2631
            0x1AB0,   //             unassigned
2632
            0x1B00,   // 1B00..1B7F; Balinese
2633
            0x1B80,   // 1B80..1BBF; Sundanese
2634
            0x1BC0,   // 1BC0..1BFF; Batak
2635
            0x1C00,   // 1C00..1C4F; Lepcha
2636
            0x1C50,   // 1C50..1C7F; Ol Chiki
2637
            0x1C80,   //             unassigned
2638
            0x1CC0,   // 1CC0..1CCF; Sundanese Supplement
2639
            0x1CD0,   // 1CD0..1CFF; Vedic Extensions
2640
            0x1D00,   // 1D00..1D7F; Phonetic Extensions
2641
            0x1D80,   // 1D80..1DBF; Phonetic Extensions Supplement
2642
            0x1DC0,   // 1DC0..1DFF; Combining Diacritical Marks Supplement
2643
            0x1E00,   // 1E00..1EFF; Latin Extended Additional
2644
            0x1F00,   // 1F00..1FFF; Greek Extended
2645
            0x2000,   // 2000..206F; General Punctuation
2646
            0x2070,   // 2070..209F; Superscripts and Subscripts
2647
            0x20A0,   // 20A0..20CF; Currency Symbols
2648
            0x20D0,   // 20D0..20FF; Combining Diacritical Marks for Symbols
2649
            0x2100,   // 2100..214F; Letterlike Symbols
2650
            0x2150,   // 2150..218F; Number Forms
2651
            0x2190,   // 2190..21FF; Arrows
2652
            0x2200,   // 2200..22FF; Mathematical Operators
2653
            0x2300,   // 2300..23FF; Miscellaneous Technical
2654
            0x2400,   // 2400..243F; Control Pictures
2655
            0x2440,   // 2440..245F; Optical Character Recognition
2656
            0x2460,   // 2460..24FF; Enclosed Alphanumerics
2657
            0x2500,   // 2500..257F; Box Drawing
2658
            0x2580,   // 2580..259F; Block Elements
2659
            0x25A0,   // 25A0..25FF; Geometric Shapes
2660
            0x2600,   // 2600..26FF; Miscellaneous Symbols
2661
            0x2700,   // 2700..27BF; Dingbats
2662
            0x27C0,   // 27C0..27EF; Miscellaneous Mathematical Symbols-A
2663
            0x27F0,   // 27F0..27FF; Supplemental Arrows-A
2664
            0x2800,   // 2800..28FF; Braille Patterns
2665
            0x2900,   // 2900..297F; Supplemental Arrows-B
2666
            0x2980,   // 2980..29FF; Miscellaneous Mathematical Symbols-B
2667
            0x2A00,   // 2A00..2AFF; Supplemental Mathematical Operators
2668
            0x2B00,   // 2B00..2BFF; Miscellaneous Symbols and Arrows
2669
            0x2C00,   // 2C00..2C5F; Glagolitic
2670
            0x2C60,   // 2C60..2C7F; Latin Extended-C
2671
            0x2C80,   // 2C80..2CFF; Coptic
2672
            0x2D00,   // 2D00..2D2F; Georgian Supplement
2673
            0x2D30,   // 2D30..2D7F; Tifinagh
2674
            0x2D80,   // 2D80..2DDF; Ethiopic Extended
2675
            0x2DE0,   // 2DE0..2DFF; Cyrillic Extended-A
2676
            0x2E00,   // 2E00..2E7F; Supplemental Punctuation
2677
            0x2E80,   // 2E80..2EFF; CJK Radicals Supplement
2678
            0x2F00,   // 2F00..2FDF; Kangxi Radicals
2679
            0x2FE0,   //             unassigned
2680
            0x2FF0,   // 2FF0..2FFF; Ideographic Description Characters
2681
            0x3000,   // 3000..303F; CJK Symbols and Punctuation
2682
            0x3040,   // 3040..309F; Hiragana
2683
            0x30A0,   // 30A0..30FF; Katakana
2684
            0x3100,   // 3100..312F; Bopomofo
2685
            0x3130,   // 3130..318F; Hangul Compatibility Jamo
2686
            0x3190,   // 3190..319F; Kanbun
2687
            0x31A0,   // 31A0..31BF; Bopomofo Extended
2688
            0x31C0,   // 31C0..31EF; CJK Strokes
2689
            0x31F0,   // 31F0..31FF; Katakana Phonetic Extensions
2690
            0x3200,   // 3200..32FF; Enclosed CJK Letters and Months
2691
            0x3300,   // 3300..33FF; CJK Compatibility
2692
            0x3400,   // 3400..4DBF; CJK Unified Ideographs Extension A
2693
            0x4DC0,   // 4DC0..4DFF; Yijing Hexagram Symbols
2694
            0x4E00,   // 4E00..9FFF; CJK Unified Ideographs
2695
            0xA000,   // A000..A48F; Yi Syllables
2696
            0xA490,   // A490..A4CF; Yi Radicals
2697
            0xA4D0,   // A4D0..A4FF; Lisu
2698
            0xA500,   // A500..A63F; Vai
2699
            0xA640,   // A640..A69F; Cyrillic Extended-B
2700
            0xA6A0,   // A6A0..A6FF; Bamum
2701
            0xA700,   // A700..A71F; Modifier Tone Letters
2702
            0xA720,   // A720..A7FF; Latin Extended-D
2703
            0xA800,   // A800..A82F; Syloti Nagri
2704
            0xA830,   // A830..A83F; Common Indic Number Forms
2705
            0xA840,   // A840..A87F; Phags-pa
2706
            0xA880,   // A880..A8DF; Saurashtra
2707
            0xA8E0,   // A8E0..A8FF; Devanagari Extended
2708
            0xA900,   // A900..A92F; Kayah Li
2709
            0xA930,   // A930..A95F; Rejang
2710
            0xA960,   // A960..A97F; Hangul Jamo Extended-A
2711
            0xA980,   // A980..A9DF; Javanese
2712
            0xA9E0,   //             unassigned
2713
            0xAA00,   // AA00..AA5F; Cham
2714
            0xAA60,   // AA60..AA7F; Myanmar Extended-A
2715
            0xAA80,   // AA80..AADF; Tai Viet
2716
            0xAAE0,   // AAE0..AAFF; Meetei Mayek Extensions
2717
            0xAB00,   // AB00..AB2F; Ethiopic Extended-A
2718
            0xAB30,   //             unassigned
2719
            0xABC0,   // ABC0..ABFF; Meetei Mayek
2720
            0xAC00,   // AC00..D7AF; Hangul Syllables
2721
            0xD7B0,   // D7B0..D7FF; Hangul Jamo Extended-B
2722
            0xD800,   // D800..DB7F; High Surrogates
2723
            0xDB80,   // DB80..DBFF; High Private Use Surrogates
2724
            0xDC00,   // DC00..DFFF; Low Surrogates
2725
            0xE000,   // E000..F8FF; Private Use Area
2726
            0xF900,   // F900..FAFF; CJK Compatibility Ideographs
2727
            0xFB00,   // FB00..FB4F; Alphabetic Presentation Forms
2728
            0xFB50,   // FB50..FDFF; Arabic Presentation Forms-A
2729
            0xFE00,   // FE00..FE0F; Variation Selectors
2730
            0xFE10,   // FE10..FE1F; Vertical Forms
2731
            0xFE20,   // FE20..FE2F; Combining Half Marks
2732
            0xFE30,   // FE30..FE4F; CJK Compatibility Forms
2733
            0xFE50,   // FE50..FE6F; Small Form Variants
2734
            0xFE70,   // FE70..FEFF; Arabic Presentation Forms-B
2735
            0xFF00,   // FF00..FFEF; Halfwidth and Fullwidth Forms
2736
            0xFFF0,   // FFF0..FFFF; Specials
2737
            0x10000,  // 10000..1007F; Linear B Syllabary
2738
            0x10080,  // 10080..100FF; Linear B Ideograms
2739
            0x10100,  // 10100..1013F; Aegean Numbers
2740
            0x10140,  // 10140..1018F; Ancient Greek Numbers
2741
            0x10190,  // 10190..101CF; Ancient Symbols
2742
            0x101D0,  // 101D0..101FF; Phaistos Disc
2743
            0x10200,  //               unassigned
2744
            0x10280,  // 10280..1029F; Lycian
2745
            0x102A0,  // 102A0..102DF; Carian
2746
            0x102E0,  //               unassigned
2747
            0x10300,  // 10300..1032F; Old Italic
2748
            0x10330,  // 10330..1034F; Gothic
2749
            0x10350,  //               unassigned
2750
            0x10380,  // 10380..1039F; Ugaritic
2751
            0x103A0,  // 103A0..103DF; Old Persian
2752
            0x103E0,  //               unassigned
2753
            0x10400,  // 10400..1044F; Deseret
2754
            0x10450,  // 10450..1047F; Shavian
2755
            0x10480,  // 10480..104AF; Osmanya
2756
            0x104B0,  //               unassigned
2757
            0x10800,  // 10800..1083F; Cypriot Syllabary
2758
            0x10840,  // 10840..1085F; Imperial Aramaic
2759
            0x10860,  //               unassigned
2760
            0x10900,  // 10900..1091F; Phoenician
2761
            0x10920,  // 10920..1093F; Lydian
2762
            0x10940,  //               unassigned
2763
            0x10980,  // 10980..1099F; Meroitic Hieroglyphs
2764
            0x109A0,  // 109A0..109FF; Meroitic Cursive
2765
            0x10A00,  // 10A00..10A5F; Kharoshthi
2766
            0x10A60,  // 10A60..10A7F; Old South Arabian
2767
            0x10A80,  //               unassigned
2768
            0x10B00,  // 10B00..10B3F; Avestan
2769
            0x10B40,  // 10B40..10B5F; Inscriptional Parthian
2770
            0x10B60,  // 10B60..10B7F; Inscriptional Pahlavi
2771
            0x10B80,  //               unassigned
2772
            0x10C00,  // 10C00..10C4F; Old Turkic
2773
            0x10C50,  //               unassigned
2774
            0x10E60,  // 10E60..10E7F; Rumi Numeral Symbols
2775
            0x10E80,  //               unassigned
2776
            0x11000,  // 11000..1107F; Brahmi
2777
            0x11080,  // 11080..110CF; Kaithi
2778
            0x110D0,  // 110D0..110FF; Sora Sompeng
2779
            0x11100,  // 11100..1114F; Chakma
2780
            0x11150,  //               unassigned
2781
            0x11180,  // 11180..111DF; Sharada
2782
            0x111E0,  //               unassigned
2783
            0x11680,  // 11680..116CF; Takri
2784
            0x116D0,  //               unassigned
2785
            0x12000,  // 12000..123FF; Cuneiform
2786
            0x12400,  // 12400..1247F; Cuneiform Numbers and Punctuation
2787
            0x12480,  //               unassigned
2788
            0x13000,  // 13000..1342F; Egyptian Hieroglyphs
2789
            0x13430,  //               unassigned
2790
            0x16800,  // 16800..16A3F; Bamum Supplement
2791
            0x16A40,  //               unassigned
2792
            0x16F00,  // 16F00..16F9F; Miao
2793
            0x16FA0,  //               unassigned
2794
            0x1B000,  // 1B000..1B0FF; Kana Supplement
2795
            0x1B100,  //               unassigned
2796
            0x1D000,  // 1D000..1D0FF; Byzantine Musical Symbols
2797
            0x1D100,  // 1D100..1D1FF; Musical Symbols
2798
            0x1D200,  // 1D200..1D24F; Ancient Greek Musical Notation
2799
            0x1D250,  //               unassigned
2800
            0x1D300,  // 1D300..1D35F; Tai Xuan Jing Symbols
2801
            0x1D360,  // 1D360..1D37F; Counting Rod Numerals
2802
            0x1D380,  //               unassigned
2803
            0x1D400,  // 1D400..1D7FF; Mathematical Alphanumeric Symbols
2804
            0x1D800,  //               unassigned
2805
            0x1EE00,  // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
2806
            0x1EF00,  //               unassigned
2807
            0x1F000,  // 1F000..1F02F; Mahjong Tiles
2808
            0x1F030,  // 1F030..1F09F; Domino Tiles
2809
            0x1F0A0,  // 1F0A0..1F0FF; Playing Cards
2810
            0x1F100,  // 1F100..1F1FF; Enclosed Alphanumeric Supplement
2811
            0x1F200,  // 1F200..1F2FF; Enclosed Ideographic Supplement
2812
            0x1F300,  // 1F300..1F5FF; Miscellaneous Symbols And Pictographs
2813
            0x1F600,  // 1F600..1F64F; Emoticons
2814
            0x1F650,  //               unassigned
2815
            0x1F680,  // 1F680..1F6FF; Transport And Map Symbols
2816
            0x1F700,  // 1F700..1F77F; Alchemical Symbols
2817
            0x1F780,  //               unassigned
2818
            0x20000,  // 20000..2A6DF; CJK Unified Ideographs Extension B
2819
            0x2A6E0,  //               unassigned
2820
            0x2A700,  // 2A700..2B73F; CJK Unified Ideographs Extension C
2821
            0x2B740,  // 2B740..2B81F; CJK Unified Ideographs Extension D
2822
            0x2B820,  //               unassigned
2823
            0x2F800,  // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
2824
            0x2FA20,  //               unassigned
2825
            0xE0000,  // E0000..E007F; Tags
2826
            0xE0080,  //               unassigned
2827
            0xE0100,  // E0100..E01EF; Variation Selectors Supplement
2828
            0xE01F0,  //               unassigned
2829
            0xF0000,  // F0000..FFFFF; Supplementary Private Use Area-A
2830
            0x100000  // 100000..10FFFF; Supplementary Private Use Area-B
2831
        };
2832

2833
        private static final UnicodeBlock[] blocks = {
2834
            BASIC_LATIN,
2835
            LATIN_1_SUPPLEMENT,
2836
            LATIN_EXTENDED_A,
2837
            LATIN_EXTENDED_B,
2838
            IPA_EXTENSIONS,
2839
            SPACING_MODIFIER_LETTERS,
2840
            COMBINING_DIACRITICAL_MARKS,
2841
            GREEK,
2842
            CYRILLIC,
2843
            CYRILLIC_SUPPLEMENTARY,
2844
            ARMENIAN,
2845
            HEBREW,
2846
            ARABIC,
2847
            SYRIAC,
2848
            ARABIC_SUPPLEMENT,
2849
            THAANA,
2850
            NKO,
2851
            SAMARITAN,
2852
            MANDAIC,
2853
            null,
2854
            ARABIC_EXTENDED_A,
2855
            DEVANAGARI,
2856
            BENGALI,
2857
            GURMUKHI,
2858
            GUJARATI,
2859
            ORIYA,
2860
            TAMIL,
2861
            TELUGU,
2862
            KANNADA,
2863
            MALAYALAM,
2864
            SINHALA,
2865
            THAI,
2866
            LAO,
2867
            TIBETAN,
2868
            MYANMAR,
2869
            GEORGIAN,
2870
            HANGUL_JAMO,
2871
            ETHIOPIC,
2872
            ETHIOPIC_SUPPLEMENT,
2873
            CHEROKEE,
2874
            UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
2875
            OGHAM,
2876
            RUNIC,
2877
            TAGALOG,
2878
            HANUNOO,
2879
            BUHID,
2880
            TAGBANWA,
2881
            KHMER,
2882
            MONGOLIAN,
2883
            UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
2884
            LIMBU,
2885
            TAI_LE,
2886
            NEW_TAI_LUE,
2887
            KHMER_SYMBOLS,
2888
            BUGINESE,
2889
            TAI_THAM,
2890
            null,
2891
            BALINESE,
2892
            SUNDANESE,
2893
            BATAK,
2894
            LEPCHA,
2895
            OL_CHIKI,
2896
            null,
2897
            SUNDANESE_SUPPLEMENT,
2898
            VEDIC_EXTENSIONS,
2899
            PHONETIC_EXTENSIONS,
2900
            PHONETIC_EXTENSIONS_SUPPLEMENT,
2901
            COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
2902
            LATIN_EXTENDED_ADDITIONAL,
2903
            GREEK_EXTENDED,
2904
            GENERAL_PUNCTUATION,
2905
            SUPERSCRIPTS_AND_SUBSCRIPTS,
2906
            CURRENCY_SYMBOLS,
2907
            COMBINING_MARKS_FOR_SYMBOLS,
2908
            LETTERLIKE_SYMBOLS,
2909
            NUMBER_FORMS,
2910
            ARROWS,
2911
            MATHEMATICAL_OPERATORS,
2912
            MISCELLANEOUS_TECHNICAL,
2913
            CONTROL_PICTURES,
2914
            OPTICAL_CHARACTER_RECOGNITION,
2915
            ENCLOSED_ALPHANUMERICS,
2916
            BOX_DRAWING,
2917
            BLOCK_ELEMENTS,
2918
            GEOMETRIC_SHAPES,
2919
            MISCELLANEOUS_SYMBOLS,
2920
            DINGBATS,
2921
            MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
2922
            SUPPLEMENTAL_ARROWS_A,
2923
            BRAILLE_PATTERNS,
2924
            SUPPLEMENTAL_ARROWS_B,
2925
            MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
2926
            SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
2927
            MISCELLANEOUS_SYMBOLS_AND_ARROWS,
2928
            GLAGOLITIC,
2929
            LATIN_EXTENDED_C,
2930
            COPTIC,
2931
            GEORGIAN_SUPPLEMENT,
2932
            TIFINAGH,
2933
            ETHIOPIC_EXTENDED,
2934
            CYRILLIC_EXTENDED_A,
2935
            SUPPLEMENTAL_PUNCTUATION,
2936
            CJK_RADICALS_SUPPLEMENT,
2937
            KANGXI_RADICALS,
2938
            null,
2939
            IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
2940
            CJK_SYMBOLS_AND_PUNCTUATION,
2941
            HIRAGANA,
2942
            KATAKANA,
2943
            BOPOMOFO,
2944
            HANGUL_COMPATIBILITY_JAMO,
2945
            KANBUN,
2946
            BOPOMOFO_EXTENDED,
2947
            CJK_STROKES,
2948
            KATAKANA_PHONETIC_EXTENSIONS,
2949
            ENCLOSED_CJK_LETTERS_AND_MONTHS,
2950
            CJK_COMPATIBILITY,
2951
            CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
2952
            YIJING_HEXAGRAM_SYMBOLS,
2953
            CJK_UNIFIED_IDEOGRAPHS,
2954
            YI_SYLLABLES,
2955
            YI_RADICALS,
2956
            LISU,
2957
            VAI,
2958
            CYRILLIC_EXTENDED_B,
2959
            BAMUM,
2960
            MODIFIER_TONE_LETTERS,
2961
            LATIN_EXTENDED_D,
2962
            SYLOTI_NAGRI,
2963
            COMMON_INDIC_NUMBER_FORMS,
2964
            PHAGS_PA,
2965
            SAURASHTRA,
2966
            DEVANAGARI_EXTENDED,
2967
            KAYAH_LI,
2968
            REJANG,
2969
            HANGUL_JAMO_EXTENDED_A,
2970
            JAVANESE,
2971
            null,
2972
            CHAM,
2973
            MYANMAR_EXTENDED_A,
2974
            TAI_VIET,
2975
            MEETEI_MAYEK_EXTENSIONS,
2976
            ETHIOPIC_EXTENDED_A,
2977
            null,
2978
            MEETEI_MAYEK,
2979
            HANGUL_SYLLABLES,
2980
            HANGUL_JAMO_EXTENDED_B,
2981
            HIGH_SURROGATES,
2982
            HIGH_PRIVATE_USE_SURROGATES,
2983
            LOW_SURROGATES,
2984
            PRIVATE_USE_AREA,
2985
            CJK_COMPATIBILITY_IDEOGRAPHS,
2986
            ALPHABETIC_PRESENTATION_FORMS,
2987
            ARABIC_PRESENTATION_FORMS_A,
2988
            VARIATION_SELECTORS,
2989
            VERTICAL_FORMS,
2990
            COMBINING_HALF_MARKS,
2991
            CJK_COMPATIBILITY_FORMS,
2992
            SMALL_FORM_VARIANTS,
2993
            ARABIC_PRESENTATION_FORMS_B,
2994
            HALFWIDTH_AND_FULLWIDTH_FORMS,
2995
            SPECIALS,
2996
            LINEAR_B_SYLLABARY,
2997
            LINEAR_B_IDEOGRAMS,
2998
            AEGEAN_NUMBERS,
2999
            ANCIENT_GREEK_NUMBERS,
3000
            ANCIENT_SYMBOLS,
3001
            PHAISTOS_DISC,
3002
            null,
3003
            LYCIAN,
3004
            CARIAN,
3005
            null,
3006
            OLD_ITALIC,
3007
            GOTHIC,
3008
            null,
3009
            UGARITIC,
3010
            OLD_PERSIAN,
3011
            null,
3012
            DESERET,
3013
            SHAVIAN,
3014
            OSMANYA,
3015
            null,
3016
            CYPRIOT_SYLLABARY,
3017
            IMPERIAL_ARAMAIC,
3018
            null,
3019
            PHOENICIAN,
3020
            LYDIAN,
3021
            null,
3022
            MEROITIC_HIEROGLYPHS,
3023
            MEROITIC_CURSIVE,
3024
            KHAROSHTHI,
3025
            OLD_SOUTH_ARABIAN,
3026
            null,
3027
            AVESTAN,
3028
            INSCRIPTIONAL_PARTHIAN,
3029
            INSCRIPTIONAL_PAHLAVI,
3030
            null,
3031
            OLD_TURKIC,
3032
            null,
3033
            RUMI_NUMERAL_SYMBOLS,
3034
            null,
3035
            BRAHMI,
3036
            KAITHI,
3037
            SORA_SOMPENG,
3038
            CHAKMA,
3039
            null,
3040
            SHARADA,
3041
            null,
3042
            TAKRI,
3043
            null,
3044
            CUNEIFORM,
3045
            CUNEIFORM_NUMBERS_AND_PUNCTUATION,
3046
            null,
3047
            EGYPTIAN_HIEROGLYPHS,
3048
            null,
3049
            BAMUM_SUPPLEMENT,
3050
            null,
3051
            MIAO,
3052
            null,
3053
            KANA_SUPPLEMENT,
3054
            null,
3055
            BYZANTINE_MUSICAL_SYMBOLS,
3056
            MUSICAL_SYMBOLS,
3057
            ANCIENT_GREEK_MUSICAL_NOTATION,
3058
            null,
3059
            TAI_XUAN_JING_SYMBOLS,
3060
            COUNTING_ROD_NUMERALS,
3061
            null,
3062
            MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
3063
            null,
3064
            ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS,
3065
            null,
3066
            MAHJONG_TILES,
3067
            DOMINO_TILES,
3068
            PLAYING_CARDS,
3069
            ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
3070
            ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
3071
            MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
3072
            EMOTICONS,
3073
            null,
3074
            TRANSPORT_AND_MAP_SYMBOLS,
3075
            ALCHEMICAL_SYMBOLS,
3076
            null,
3077
            CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
3078
            null,
3079
            CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
3080
            CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
3081
            null,
3082
            CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
3083
            null,
3084
            TAGS,
3085
            null,
3086
            VARIATION_SELECTORS_SUPPLEMENT,
3087
            null,
3088
            SUPPLEMENTARY_PRIVATE_USE_AREA_A,
3089
            SUPPLEMENTARY_PRIVATE_USE_AREA_B
3090
        };
3091

3092

3093
        /**
3094
         * Returns the object representing the Unicode block containing the
3095
         * given character, or {@code null} if the character is not a
3096
         * member of a defined block.
3097
         *
3098
         * <p><b>Note:</b> This method cannot handle
3099
         * <a href="Character.html#supplementary"> supplementary
3100
         * characters</a>.  To support all Unicode characters, including
3101
         * supplementary characters, use the {@link #of(int)} method.
3102
         *
3103
         * @param   c  The character in question
3104
         * @return  The {@code UnicodeBlock} instance representing the
3105
         *          Unicode block of which this character is a member, or
3106
         *          {@code null} if the character is not a member of any
3107
         *          Unicode block
3108
         */
3109
        public static UnicodeBlock of(char c) {
3110
            return of((int)c);
3111
        }
3112

3113
        /**
3114
         * Returns the object representing the Unicode block
3115
         * containing the given character (Unicode code point), or
3116
         * {@code null} if the character is not a member of a
3117
         * defined block.
3118
         *
3119
         * @param   codePoint the character (Unicode code point) in question.
3120
         * @return  The {@code UnicodeBlock} instance representing the
3121
         *          Unicode block of which this character is a member, or
3122
         *          {@code null} if the character is not a member of any
3123
         *          Unicode block
3124
         * @exception IllegalArgumentException if the specified
3125
         * {@code codePoint} is an invalid Unicode code point.
3126
         * @see Character#isValidCodePoint(int)
3127
         * @since   1.5
3128
         */
3129
        public static UnicodeBlock of(int codePoint) {
3130
            if (!isValidCodePoint(codePoint)) {
3131
                throw new IllegalArgumentException();
3132
            }
3133

3134
            int top, bottom, current;
3135
            bottom = 0;
3136
            top = blockStarts.length;
3137
            current = top/2;
3138

3139
            // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
3140
            while (top - bottom > 1) {
3141
                if (codePoint >= blockStarts[current]) {
3142
                    bottom = current;
3143
                } else {
3144
                    top = current;
3145
                }
3146
                current = (top + bottom) / 2;
3147
            }
3148
            return blocks[current];
3149
        }
3150

3151
        /**
3152
         * Returns the UnicodeBlock with the given name. Block
3153
         * names are determined by The Unicode Standard. The file
3154
         * Blocks-&lt;version&gt;.txt defines blocks for a particular
3155
         * version of the standard. The {@link Character} class specifies
3156
         * the version of the standard that it supports.
3157
         * <p>
3158
         * This method accepts block names in the following forms:
3159
         * <ol>
3160
         * <li> Canonical block names as defined by the Unicode Standard.
3161
         * For example, the standard defines a "Basic Latin" block. Therefore, this
3162
         * method accepts "Basic Latin" as a valid block name. The documentation of
3163
         * each UnicodeBlock provides the canonical name.
3164
         * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
3165
         * is a valid block name for the "Basic Latin" block.
3166
         * <li>The text representation of each constant UnicodeBlock identifier.
3167
         * For example, this method will return the {@link #BASIC_LATIN} block if
3168
         * provided with the "BASIC_LATIN" name. This form replaces all spaces and
3169
         * hyphens in the canonical name with underscores.
3170
         * </ol>
3171
         * Finally, character case is ignored for all of the valid block name forms.
3172
         * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
3173
         * The en_US locale's case mapping rules are used to provide case-insensitive
3174
         * string comparisons for block name validation.
3175
         * <p>
3176
         * If the Unicode Standard changes block names, both the previous and
3177
         * current names will be accepted.
3178
         *
3179
         * @param blockName A {@code UnicodeBlock} name.
3180
         * @return The {@code UnicodeBlock} instance identified
3181
         *         by {@code blockName}
3182
         * @throws IllegalArgumentException if {@code blockName} is an
3183
         *         invalid name
3184
         * @throws NullPointerException if {@code blockName} is null
3185
         * @since 1.5
3186
         */
3187
        public static final UnicodeBlock forName(String blockName) {
3188
            UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US));
3189
            if (block == null) {
3190
                throw new IllegalArgumentException();
3191
            }
3192
            return block;
3193
        }
3194
    }
3195

3196

3197
    /**
3198
     * A family of character subsets representing the character scripts
3199
     * defined in the <a href="http://www.unicode.org/reports/tr24/">
3200
     * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode
3201
     * character is assigned to a single Unicode script, either a specific
3202
     * script, such as {@link Character.UnicodeScript#LATIN Latin}, or
3203
     * one of the following three special values,
3204
     * {@link Character.UnicodeScript#INHERITED Inherited},
3205
     * {@link Character.UnicodeScript#COMMON Common} or
3206
     * {@link Character.UnicodeScript#UNKNOWN Unknown}.
3207
     *
3208
     * @since 1.7
3209
     */
3210
    public static enum UnicodeScript {
3211
        /**
3212
         * Unicode script "Common".
3213
         */
3214
        COMMON,
3215

3216
        /**
3217
         * Unicode script "Latin".
3218
         */
3219
        LATIN,
3220

3221
        /**
3222
         * Unicode script "Greek".
3223
         */
3224
        GREEK,
3225

3226
        /**
3227
         * Unicode script "Cyrillic".
3228
         */
3229
        CYRILLIC,
3230

3231
        /**
3232
         * Unicode script "Armenian".
3233
         */
3234
        ARMENIAN,
3235

3236
        /**
3237
         * Unicode script "Hebrew".
3238
         */
3239
        HEBREW,
3240

3241
        /**
3242
         * Unicode script "Arabic".
3243
         */
3244
        ARABIC,
3245

3246
        /**
3247
         * Unicode script "Syriac".
3248
         */
3249
        SYRIAC,
3250

3251
        /**
3252
         * Unicode script "Thaana".
3253
         */
3254
        THAANA,
3255

3256
        /**
3257
         * Unicode script "Devanagari".
3258
         */
3259
        DEVANAGARI,
3260

3261
        /**
3262
         * Unicode script "Bengali".
3263
         */
3264
        BENGALI,
3265

3266
        /**
3267
         * Unicode script "Gurmukhi".
3268
         */
3269
        GURMUKHI,
3270

3271
        /**
3272
         * Unicode script "Gujarati".
3273
         */
3274
        GUJARATI,
3275

3276
        /**
3277
         * Unicode script "Oriya".
3278
         */
3279
        ORIYA,
3280

3281
        /**
3282
         * Unicode script "Tamil".
3283
         */
3284
        TAMIL,
3285

3286
        /**
3287
         * Unicode script "Telugu".
3288
         */
3289
        TELUGU,
3290

3291
        /**
3292
         * Unicode script "Kannada".
3293
         */
3294
        KANNADA,
3295

3296
        /**
3297
         * Unicode script "Malayalam".
3298
         */
3299
        MALAYALAM,
3300

3301
        /**
3302
         * Unicode script "Sinhala".
3303
         */
3304
        SINHALA,
3305

3306
        /**
3307
         * Unicode script "Thai".
3308
         */
3309
        THAI,
3310

3311
        /**
3312
         * Unicode script "Lao".
3313
         */
3314
        LAO,
3315

3316
        /**
3317
         * Unicode script "Tibetan".
3318
         */
3319
        TIBETAN,
3320

3321
        /**
3322
         * Unicode script "Myanmar".
3323
         */
3324
        MYANMAR,
3325

3326
        /**
3327
         * Unicode script "Georgian".
3328
         */
3329
        GEORGIAN,
3330

3331
        /**
3332
         * Unicode script "Hangul".
3333
         */
3334
        HANGUL,
3335

3336
        /**
3337
         * Unicode script "Ethiopic".
3338
         */
3339
        ETHIOPIC,
3340

3341
        /**
3342
         * Unicode script "Cherokee".
3343
         */
3344
        CHEROKEE,
3345

3346
        /**
3347
         * Unicode script "Canadian_Aboriginal".
3348
         */
3349
        CANADIAN_ABORIGINAL,
3350

3351
        /**
3352
         * Unicode script "Ogham".
3353
         */
3354
        OGHAM,
3355

3356
        /**
3357
         * Unicode script "Runic".
3358
         */
3359
        RUNIC,
3360

3361
        /**
3362
         * Unicode script "Khmer".
3363
         */
3364
        KHMER,
3365

3366
        /**
3367
         * Unicode script "Mongolian".
3368
         */
3369
        MONGOLIAN,
3370

3371
        /**
3372
         * Unicode script "Hiragana".
3373
         */
3374
        HIRAGANA,
3375

3376
        /**
3377
         * Unicode script "Katakana".
3378
         */
3379
        KATAKANA,
3380

3381
        /**
3382
         * Unicode script "Bopomofo".
3383
         */
3384
        BOPOMOFO,
3385

3386
        /**
3387
         * Unicode script "Han".
3388
         */
3389
        HAN,
3390

3391
        /**
3392
         * Unicode script "Yi".
3393
         */
3394
        YI,
3395

3396
        /**
3397
         * Unicode script "Old_Italic".
3398
         */
3399
        OLD_ITALIC,
3400

3401
        /**
3402
         * Unicode script "Gothic".
3403
         */
3404
        GOTHIC,
3405

3406
        /**
3407
         * Unicode script "Deseret".
3408
         */
3409
        DESERET,
3410

3411
        /**
3412
         * Unicode script "Inherited".
3413
         */
3414
        INHERITED,
3415

3416
        /**
3417
         * Unicode script "Tagalog".
3418
         */
3419
        TAGALOG,
3420

3421
        /**
3422
         * Unicode script "Hanunoo".
3423
         */
3424
        HANUNOO,
3425

3426
        /**
3427
         * Unicode script "Buhid".
3428
         */
3429
        BUHID,
3430

3431
        /**
3432
         * Unicode script "Tagbanwa".
3433
         */
3434
        TAGBANWA,
3435

3436
        /**
3437
         * Unicode script "Limbu".
3438
         */
3439
        LIMBU,
3440

3441
        /**
3442
         * Unicode script "Tai_Le".
3443
         */
3444
        TAI_LE,
3445

3446
        /**
3447
         * Unicode script "Linear_B".
3448
         */
3449
        LINEAR_B,
3450

3451
        /**
3452
         * Unicode script "Ugaritic".
3453
         */
3454
        UGARITIC,
3455

3456
        /**
3457
         * Unicode script "Shavian".
3458
         */
3459
        SHAVIAN,
3460

3461
        /**
3462
         * Unicode script "Osmanya".
3463
         */
3464
        OSMANYA,
3465

3466
        /**
3467
         * Unicode script "Cypriot".
3468
         */
3469
        CYPRIOT,
3470

3471
        /**
3472
         * Unicode script "Braille".
3473
         */
3474
        BRAILLE,
3475

3476
        /**
3477
         * Unicode script "Buginese".
3478
         */
3479
        BUGINESE,
3480

3481
        /**
3482
         * Unicode script "Coptic".
3483
         */
3484
        COPTIC,
3485

3486
        /**
3487
         * Unicode script "New_Tai_Lue".
3488
         */
3489
        NEW_TAI_LUE,
3490

3491
        /**
3492
         * Unicode script "Glagolitic".
3493
         */
3494
        GLAGOLITIC,
3495

3496
        /**
3497
         * Unicode script "Tifinagh".
3498
         */
3499
        TIFINAGH,
3500

3501
        /**
3502
         * Unicode script "Syloti_Nagri".
3503
         */
3504
        SYLOTI_NAGRI,
3505

3506
        /**
3507
         * Unicode script "Old_Persian".
3508
         */
3509
        OLD_PERSIAN,
3510

3511
        /**
3512
         * Unicode script "Kharoshthi".
3513
         */
3514
        KHAROSHTHI,
3515

3516
        /**
3517
         * Unicode script "Balinese".
3518
         */
3519
        BALINESE,
3520

3521
        /**
3522
         * Unicode script "Cuneiform".
3523
         */
3524
        CUNEIFORM,
3525

3526
        /**
3527
         * Unicode script "Phoenician".
3528
         */
3529
        PHOENICIAN,
3530

3531
        /**
3532
         * Unicode script "Phags_Pa".
3533
         */
3534
        PHAGS_PA,
3535

3536
        /**
3537
         * Unicode script "Nko".
3538
         */
3539
        NKO,
3540

3541
        /**
3542
         * Unicode script "Sundanese".
3543
         */
3544
        SUNDANESE,
3545

3546
        /**
3547
         * Unicode script "Batak".
3548
         */
3549
        BATAK,
3550

3551
        /**
3552
         * Unicode script "Lepcha".
3553
         */
3554
        LEPCHA,
3555

3556
        /**
3557
         * Unicode script "Ol_Chiki".
3558
         */
3559
        OL_CHIKI,
3560

3561
        /**
3562
         * Unicode script "Vai".
3563
         */
3564
        VAI,
3565

3566
        /**
3567
         * Unicode script "Saurashtra".
3568
         */
3569
        SAURASHTRA,
3570

3571
        /**
3572
         * Unicode script "Kayah_Li".
3573
         */
3574
        KAYAH_LI,
3575

3576
        /**
3577
         * Unicode script "Rejang".
3578
         */
3579
        REJANG,
3580

3581
        /**
3582
         * Unicode script "Lycian".
3583
         */
3584
        LYCIAN,
3585

3586
        /**
3587
         * Unicode script "Carian".
3588
         */
3589
        CARIAN,
3590

3591
        /**
3592
         * Unicode script "Lydian".
3593
         */
3594
        LYDIAN,
3595

3596
        /**
3597
         * Unicode script "Cham".
3598
         */
3599
        CHAM,
3600

3601
        /**
3602
         * Unicode script "Tai_Tham".
3603
         */
3604
        TAI_THAM,
3605

3606
        /**
3607
         * Unicode script "Tai_Viet".
3608
         */
3609
        TAI_VIET,
3610

3611
        /**
3612
         * Unicode script "Avestan".
3613
         */
3614
        AVESTAN,
3615

3616
        /**
3617
         * Unicode script "Egyptian_Hieroglyphs".
3618
         */
3619
        EGYPTIAN_HIEROGLYPHS,
3620

3621
        /**
3622
         * Unicode script "Samaritan".
3623
         */
3624
        SAMARITAN,
3625

3626
        /**
3627
         * Unicode script "Mandaic".
3628
         */
3629
        MANDAIC,
3630

3631
        /**
3632
         * Unicode script "Lisu".
3633
         */
3634
        LISU,
3635

3636
        /**
3637
         * Unicode script "Bamum".
3638
         */
3639
        BAMUM,
3640

3641
        /**
3642
         * Unicode script "Javanese".
3643
         */
3644
        JAVANESE,
3645

3646
        /**
3647
         * Unicode script "Meetei_Mayek".
3648
         */
3649
        MEETEI_MAYEK,
3650

3651
        /**
3652
         * Unicode script "Imperial_Aramaic".
3653
         */
3654
        IMPERIAL_ARAMAIC,
3655

3656
        /**
3657
         * Unicode script "Old_South_Arabian".
3658
         */
3659
        OLD_SOUTH_ARABIAN,
3660

3661
        /**
3662
         * Unicode script "Inscriptional_Parthian".
3663
         */
3664
        INSCRIPTIONAL_PARTHIAN,
3665

3666
        /**
3667
         * Unicode script "Inscriptional_Pahlavi".
3668
         */
3669
        INSCRIPTIONAL_PAHLAVI,
3670

3671
        /**
3672
         * Unicode script "Old_Turkic".
3673
         */
3674
        OLD_TURKIC,
3675

3676
        /**
3677
         * Unicode script "Brahmi".
3678
         */
3679
        BRAHMI,
3680

3681
        /**
3682
         * Unicode script "Kaithi".
3683
         */
3684
        KAITHI,
3685

3686
        /**
3687
         * Unicode script "Meroitic Hieroglyphs".
3688
         */
3689
        MEROITIC_HIEROGLYPHS,
3690

3691
        /**
3692
         * Unicode script "Meroitic Cursive".
3693
         */
3694
        MEROITIC_CURSIVE,
3695

3696
        /**
3697
         * Unicode script "Sora Sompeng".
3698
         */
3699
        SORA_SOMPENG,
3700

3701
        /**
3702
         * Unicode script "Chakma".
3703
         */
3704
        CHAKMA,
3705

3706
        /**
3707
         * Unicode script "Sharada".
3708
         */
3709
        SHARADA,
3710

3711
        /**
3712
         * Unicode script "Takri".
3713
         */
3714
        TAKRI,
3715

3716
        /**
3717
         * Unicode script "Miao".
3718
         */
3719
        MIAO,
3720

3721
        /**
3722
         * Unicode script "Unknown".
3723
         */
3724
        UNKNOWN;
3725

3726
        private static final int[] scriptStarts = {
3727
            0x0000,   // 0000..0040; COMMON
3728
            0x0041,   // 0041..005A; LATIN
3729
            0x005B,   // 005B..0060; COMMON
3730
            0x0061,   // 0061..007A; LATIN
3731
            0x007B,   // 007B..00A9; COMMON
3732
            0x00AA,   // 00AA..00AA; LATIN
3733
            0x00AB,   // 00AB..00B9; COMMON
3734
            0x00BA,   // 00BA..00BA; LATIN
3735
            0x00BB,   // 00BB..00BF; COMMON
3736
            0x00C0,   // 00C0..00D6; LATIN
3737
            0x00D7,   // 00D7..00D7; COMMON
3738
            0x00D8,   // 00D8..00F6; LATIN
3739
            0x00F7,   // 00F7..00F7; COMMON
3740
            0x00F8,   // 00F8..02B8; LATIN
3741
            0x02B9,   // 02B9..02DF; COMMON
3742
            0x02E0,   // 02E0..02E4; LATIN
3743
            0x02E5,   // 02E5..02E9; COMMON
3744
            0x02EA,   // 02EA..02EB; BOPOMOFO
3745
            0x02EC,   // 02EC..02FF; COMMON
3746
            0x0300,   // 0300..036F; INHERITED
3747
            0x0370,   // 0370..0373; GREEK
3748
            0x0374,   // 0374..0374; COMMON
3749
            0x0375,   // 0375..037D; GREEK
3750
            0x037E,   // 037E..0383; COMMON
3751
            0x0384,   // 0384..0384; GREEK
3752
            0x0385,   // 0385..0385; COMMON
3753
            0x0386,   // 0386..0386; GREEK
3754
            0x0387,   // 0387..0387; COMMON
3755
            0x0388,   // 0388..03E1; GREEK
3756
            0x03E2,   // 03E2..03EF; COPTIC
3757
            0x03F0,   // 03F0..03FF; GREEK
3758
            0x0400,   // 0400..0484; CYRILLIC
3759
            0x0485,   // 0485..0486; INHERITED
3760
            0x0487,   // 0487..0530; CYRILLIC
3761
            0x0531,   // 0531..0588; ARMENIAN
3762
            0x0589,   // 0589..0589; COMMON
3763
            0x058A,   // 058A..0590; ARMENIAN
3764
            0x0591,   // 0591..05FF; HEBREW
3765
            0x0600,   // 0600..060B; ARABIC
3766
            0x060C,   // 060C..060C; COMMON
3767
            0x060D,   // 060D..061A; ARABIC
3768
            0x061B,   // 061B..061D; COMMON
3769
            0x061E,   // 061E..061E; ARABIC
3770
            0x061F,   // 061F..061F; COMMON
3771
            0x0620,   // 0620..063F; ARABIC
3772
            0x0640,   // 0640..0640; COMMON
3773
            0x0641,   // 0641..064A; ARABIC
3774
            0x064B,   // 064B..0655; INHERITED
3775
            0x0656,   // 0656..065F; ARABIC
3776
            0x0660,   // 0660..0669; COMMON
3777
            0x066A,   // 066A..066F; ARABIC
3778
            0x0670,   // 0670..0670; INHERITED
3779
            0x0671,   // 0671..06DC; ARABIC
3780
            0x06DD,   // 06DD..06DD; COMMON
3781
            0x06DE,   // 06DE..06FF; ARABIC
3782
            0x0700,   // 0700..074F; SYRIAC
3783
            0x0750,   // 0750..077F; ARABIC
3784
            0x0780,   // 0780..07BF; THAANA
3785
            0x07C0,   // 07C0..07FF; NKO
3786
            0x0800,   // 0800..083F; SAMARITAN
3787
            0x0840,   // 0840..089F; MANDAIC
3788
            0x08A0,   // 08A0..08FF; ARABIC
3789
            0x0900,   // 0900..0950; DEVANAGARI
3790
            0x0951,   // 0951..0952; INHERITED
3791
            0x0953,   // 0953..0963; DEVANAGARI
3792
            0x0964,   // 0964..0965; COMMON
3793
            0x0966,   // 0966..0980; DEVANAGARI
3794
            0x0981,   // 0981..0A00; BENGALI
3795
            0x0A01,   // 0A01..0A80; GURMUKHI
3796
            0x0A81,   // 0A81..0B00; GUJARATI
3797
            0x0B01,   // 0B01..0B81; ORIYA
3798
            0x0B82,   // 0B82..0C00; TAMIL
3799
            0x0C01,   // 0C01..0C81; TELUGU
3800
            0x0C82,   // 0C82..0CF0; KANNADA
3801
            0x0D02,   // 0D02..0D81; MALAYALAM
3802
            0x0D82,   // 0D82..0E00; SINHALA
3803
            0x0E01,   // 0E01..0E3E; THAI
3804
            0x0E3F,   // 0E3F..0E3F; COMMON
3805
            0x0E40,   // 0E40..0E80; THAI
3806
            0x0E81,   // 0E81..0EFF; LAO
3807
            0x0F00,   // 0F00..0FD4; TIBETAN
3808
            0x0FD5,   // 0FD5..0FD8; COMMON
3809
            0x0FD9,   // 0FD9..0FFF; TIBETAN
3810
            0x1000,   // 1000..109F; MYANMAR
3811
            0x10A0,   // 10A0..10FA; GEORGIAN
3812
            0x10FB,   // 10FB..10FB; COMMON
3813
            0x10FC,   // 10FC..10FF; GEORGIAN
3814
            0x1100,   // 1100..11FF; HANGUL
3815
            0x1200,   // 1200..139F; ETHIOPIC
3816
            0x13A0,   // 13A0..13FF; CHEROKEE
3817
            0x1400,   // 1400..167F; CANADIAN_ABORIGINAL
3818
            0x1680,   // 1680..169F; OGHAM
3819
            0x16A0,   // 16A0..16EA; RUNIC
3820
            0x16EB,   // 16EB..16ED; COMMON
3821
            0x16EE,   // 16EE..16FF; RUNIC
3822
            0x1700,   // 1700..171F; TAGALOG
3823
            0x1720,   // 1720..1734; HANUNOO
3824
            0x1735,   // 1735..173F; COMMON
3825
            0x1740,   // 1740..175F; BUHID
3826
            0x1760,   // 1760..177F; TAGBANWA
3827
            0x1780,   // 1780..17FF; KHMER
3828
            0x1800,   // 1800..1801; MONGOLIAN
3829
            0x1802,   // 1802..1803; COMMON
3830
            0x1804,   // 1804..1804; MONGOLIAN
3831
            0x1805,   // 1805..1805; COMMON
3832
            0x1806,   // 1806..18AF; MONGOLIAN
3833
            0x18B0,   // 18B0..18FF; CANADIAN_ABORIGINAL
3834
            0x1900,   // 1900..194F; LIMBU
3835
            0x1950,   // 1950..197F; TAI_LE
3836
            0x1980,   // 1980..19DF; NEW_TAI_LUE
3837
            0x19E0,   // 19E0..19FF; KHMER
3838
            0x1A00,   // 1A00..1A1F; BUGINESE
3839
            0x1A20,   // 1A20..1AFF; TAI_THAM
3840
            0x1B00,   // 1B00..1B7F; BALINESE
3841
            0x1B80,   // 1B80..1BBF; SUNDANESE
3842
            0x1BC0,   // 1BC0..1BFF; BATAK
3843
            0x1C00,   // 1C00..1C4F; LEPCHA
3844
            0x1C50,   // 1C50..1CBF; OL_CHIKI
3845
            0x1CC0,   // 1CC0..1CCF; SUNDANESE
3846
            0x1CD0,   // 1CD0..1CD2; INHERITED
3847
            0x1CD3,   // 1CD3..1CD3; COMMON
3848
            0x1CD4,   // 1CD4..1CE0; INHERITED
3849
            0x1CE1,   // 1CE1..1CE1; COMMON
3850
            0x1CE2,   // 1CE2..1CE8; INHERITED
3851
            0x1CE9,   // 1CE9..1CEC; COMMON
3852
            0x1CED,   // 1CED..1CED; INHERITED
3853
            0x1CEE,   // 1CEE..1CF3; COMMON
3854
            0x1CF4,   // 1CF4..1CF4; INHERITED
3855
            0x1CF5,   // 1CF5..1CFF; COMMON
3856
            0x1D00,   // 1D00..1D25; LATIN
3857
            0x1D26,   // 1D26..1D2A; GREEK
3858
            0x1D2B,   // 1D2B..1D2B; CYRILLIC
3859
            0x1D2C,   // 1D2C..1D5C; LATIN
3860
            0x1D5D,   // 1D5D..1D61; GREEK
3861
            0x1D62,   // 1D62..1D65; LATIN
3862
            0x1D66,   // 1D66..1D6A; GREEK
3863
            0x1D6B,   // 1D6B..1D77; LATIN
3864
            0x1D78,   // 1D78..1D78; CYRILLIC
3865
            0x1D79,   // 1D79..1DBE; LATIN
3866
            0x1DBF,   // 1DBF..1DBF; GREEK
3867
            0x1DC0,   // 1DC0..1DFF; INHERITED
3868
            0x1E00,   // 1E00..1EFF; LATIN
3869
            0x1F00,   // 1F00..1FFF; GREEK
3870
            0x2000,   // 2000..200B; COMMON
3871
            0x200C,   // 200C..200D; INHERITED
3872
            0x200E,   // 200E..2070; COMMON
3873
            0x2071,   // 2071..2073; LATIN
3874
            0x2074,   // 2074..207E; COMMON
3875
            0x207F,   // 207F..207F; LATIN
3876
            0x2080,   // 2080..208F; COMMON
3877
            0x2090,   // 2090..209F; LATIN
3878
            0x20A0,   // 20A0..20CF; COMMON
3879
            0x20D0,   // 20D0..20FF; INHERITED
3880
            0x2100,   // 2100..2125; COMMON
3881
            0x2126,   // 2126..2126; GREEK
3882
            0x2127,   // 2127..2129; COMMON
3883
            0x212A,   // 212A..212B; LATIN
3884
            0x212C,   // 212C..2131; COMMON
3885
            0x2132,   // 2132..2132; LATIN
3886
            0x2133,   // 2133..214D; COMMON
3887
            0x214E,   // 214E..214E; LATIN
3888
            0x214F,   // 214F..215F; COMMON
3889
            0x2160,   // 2160..2188; LATIN
3890
            0x2189,   // 2189..27FF; COMMON
3891
            0x2800,   // 2800..28FF; BRAILLE
3892
            0x2900,   // 2900..2BFF; COMMON
3893
            0x2C00,   // 2C00..2C5F; GLAGOLITIC
3894
            0x2C60,   // 2C60..2C7F; LATIN
3895
            0x2C80,   // 2C80..2CFF; COPTIC
3896
            0x2D00,   // 2D00..2D2F; GEORGIAN
3897
            0x2D30,   // 2D30..2D7F; TIFINAGH
3898
            0x2D80,   // 2D80..2DDF; ETHIOPIC
3899
            0x2DE0,   // 2DE0..2DFF; CYRILLIC
3900
            0x2E00,   // 2E00..2E7F; COMMON
3901
            0x2E80,   // 2E80..2FEF; HAN
3902
            0x2FF0,   // 2FF0..3004; COMMON
3903
            0x3005,   // 3005..3005; HAN
3904
            0x3006,   // 3006..3006; COMMON
3905
            0x3007,   // 3007..3007; HAN
3906
            0x3008,   // 3008..3020; COMMON
3907
            0x3021,   // 3021..3029; HAN
3908
            0x302A,   // 302A..302D; INHERITED
3909
            0x302E,   // 302E..302F; HANGUL
3910
            0x3030,   // 3030..3037; COMMON
3911
            0x3038,   // 3038..303B; HAN
3912
            0x303C,   // 303C..3040; COMMON
3913
            0x3041,   // 3041..3098; HIRAGANA
3914
            0x3099,   // 3099..309A; INHERITED
3915
            0x309B,   // 309B..309C; COMMON
3916
            0x309D,   // 309D..309F; HIRAGANA
3917
            0x30A0,   // 30A0..30A0; COMMON
3918
            0x30A1,   // 30A1..30FA; KATAKANA
3919
            0x30FB,   // 30FB..30FC; COMMON
3920
            0x30FD,   // 30FD..3104; KATAKANA
3921
            0x3105,   // 3105..3130; BOPOMOFO
3922
            0x3131,   // 3131..318F; HANGUL
3923
            0x3190,   // 3190..319F; COMMON
3924
            0x31A0,   // 31A0..31BF; BOPOMOFO
3925
            0x31C0,   // 31C0..31EF; COMMON
3926
            0x31F0,   // 31F0..31FF; KATAKANA
3927
            0x3200,   // 3200..321F; HANGUL
3928
            0x3220,   // 3220..325F; COMMON
3929
            0x3260,   // 3260..327E; HANGUL
3930
            0x327F,   // 327F..32CF; COMMON
3931
            0x32D0,   // 32D0..32FE; KATAKANA
3932
            0x32FF,   // 32FF      ; COMMON
3933
            0x3300,   // 3300..3357; KATAKANA
3934
            0x3358,   // 3358..33FF; COMMON
3935
            0x3400,   // 3400..4DBF; HAN
3936
            0x4DC0,   // 4DC0..4DFF; COMMON
3937
            0x4E00,   // 4E00..9FFF; HAN
3938
            0xA000,   // A000..A4CF; YI
3939
            0xA4D0,   // A4D0..A4FF; LISU
3940
            0xA500,   // A500..A63F; VAI
3941
            0xA640,   // A640..A69F; CYRILLIC
3942
            0xA6A0,   // A6A0..A6FF; BAMUM
3943
            0xA700,   // A700..A721; COMMON
3944
            0xA722,   // A722..A787; LATIN
3945
            0xA788,   // A788..A78A; COMMON
3946
            0xA78B,   // A78B..A7FF; LATIN
3947
            0xA800,   // A800..A82F; SYLOTI_NAGRI
3948
            0xA830,   // A830..A83F; COMMON
3949
            0xA840,   // A840..A87F; PHAGS_PA
3950
            0xA880,   // A880..A8DF; SAURASHTRA
3951
            0xA8E0,   // A8E0..A8FF; DEVANAGARI
3952
            0xA900,   // A900..A92F; KAYAH_LI
3953
            0xA930,   // A930..A95F; REJANG
3954
            0xA960,   // A960..A97F; HANGUL
3955
            0xA980,   // A980..A9FF; JAVANESE
3956
            0xAA00,   // AA00..AA5F; CHAM
3957
            0xAA60,   // AA60..AA7F; MYANMAR
3958
            0xAA80,   // AA80..AADF; TAI_VIET
3959
            0xAAE0,   // AAE0..AB00; MEETEI_MAYEK
3960
            0xAB01,   // AB01..ABBF; ETHIOPIC
3961
            0xABC0,   // ABC0..ABFF; MEETEI_MAYEK
3962
            0xAC00,   // AC00..D7FB; HANGUL
3963
            0xD7FC,   // D7FC..F8FF; UNKNOWN
3964
            0xF900,   // F900..FAFF; HAN
3965
            0xFB00,   // FB00..FB12; LATIN
3966
            0xFB13,   // FB13..FB1C; ARMENIAN
3967
            0xFB1D,   // FB1D..FB4F; HEBREW
3968
            0xFB50,   // FB50..FD3D; ARABIC
3969
            0xFD3E,   // FD3E..FD4F; COMMON
3970
            0xFD50,   // FD50..FDFC; ARABIC
3971
            0xFDFD,   // FDFD..FDFF; COMMON
3972
            0xFE00,   // FE00..FE0F; INHERITED
3973
            0xFE10,   // FE10..FE1F; COMMON
3974
            0xFE20,   // FE20..FE2F; INHERITED
3975
            0xFE30,   // FE30..FE6F; COMMON
3976
            0xFE70,   // FE70..FEFE; ARABIC
3977
            0xFEFF,   // FEFF..FF20; COMMON
3978
            0xFF21,   // FF21..FF3A; LATIN
3979
            0xFF3B,   // FF3B..FF40; COMMON
3980
            0xFF41,   // FF41..FF5A; LATIN
3981
            0xFF5B,   // FF5B..FF65; COMMON
3982
            0xFF66,   // FF66..FF6F; KATAKANA
3983
            0xFF70,   // FF70..FF70; COMMON
3984
            0xFF71,   // FF71..FF9D; KATAKANA
3985
            0xFF9E,   // FF9E..FF9F; COMMON
3986
            0xFFA0,   // FFA0..FFDF; HANGUL
3987
            0xFFE0,   // FFE0..FFFF; COMMON
3988
            0x10000,  // 10000..100FF; LINEAR_B
3989
            0x10100,  // 10100..1013F; COMMON
3990
            0x10140,  // 10140..1018F; GREEK
3991
            0x10190,  // 10190..101FC; COMMON
3992
            0x101FD,  // 101FD..1027F; INHERITED
3993
            0x10280,  // 10280..1029F; LYCIAN
3994
            0x102A0,  // 102A0..102FF; CARIAN
3995
            0x10300,  // 10300..1032F; OLD_ITALIC
3996
            0x10330,  // 10330..1037F; GOTHIC
3997
            0x10380,  // 10380..1039F; UGARITIC
3998
            0x103A0,  // 103A0..103FF; OLD_PERSIAN
3999
            0x10400,  // 10400..1044F; DESERET
4000
            0x10450,  // 10450..1047F; SHAVIAN
4001
            0x10480,  // 10480..107FF; OSMANYA
4002
            0x10800,  // 10800..1083F; CYPRIOT
4003
            0x10840,  // 10840..108FF; IMPERIAL_ARAMAIC
4004
            0x10900,  // 10900..1091F; PHOENICIAN
4005
            0x10920,  // 10920..1097F; LYDIAN
4006
            0x10980,  // 10980..1099F; MEROITIC_HIEROGLYPHS
4007
            0x109A0,  // 109A0..109FF; MEROITIC_CURSIVE
4008
            0x10A00,  // 10A00..10A5F; KHAROSHTHI
4009
            0x10A60,  // 10A60..10AFF; OLD_SOUTH_ARABIAN
4010
            0x10B00,  // 10B00..10B3F; AVESTAN
4011
            0x10B40,  // 10B40..10B5F; INSCRIPTIONAL_PARTHIAN
4012
            0x10B60,  // 10B60..10BFF; INSCRIPTIONAL_PAHLAVI
4013
            0x10C00,  // 10C00..10E5F; OLD_TURKIC
4014
            0x10E60,  // 10E60..10FFF; ARABIC
4015
            0x11000,  // 11000..1107F; BRAHMI
4016
            0x11080,  // 11080..110CF; KAITHI
4017
            0x110D0,  // 110D0..110FF; SORA_SOMPENG
4018
            0x11100,  // 11100..1117F; CHAKMA
4019
            0x11180,  // 11180..1167F; SHARADA
4020
            0x11680,  // 11680..116CF; TAKRI
4021
            0x12000,  // 12000..12FFF; CUNEIFORM
4022
            0x13000,  // 13000..167FF; EGYPTIAN_HIEROGLYPHS
4023
            0x16800,  // 16800..16A38; BAMUM
4024
            0x16F00,  // 16F00..16F9F; MIAO
4025
            0x1B000,  // 1B000..1B000; KATAKANA
4026
            0x1B001,  // 1B001..1CFFF; HIRAGANA
4027
            0x1D000,  // 1D000..1D166; COMMON
4028
            0x1D167,  // 1D167..1D169; INHERITED
4029
            0x1D16A,  // 1D16A..1D17A; COMMON
4030
            0x1D17B,  // 1D17B..1D182; INHERITED
4031
            0x1D183,  // 1D183..1D184; COMMON
4032
            0x1D185,  // 1D185..1D18B; INHERITED
4033
            0x1D18C,  // 1D18C..1D1A9; COMMON
4034
            0x1D1AA,  // 1D1AA..1D1AD; INHERITED
4035
            0x1D1AE,  // 1D1AE..1D1FF; COMMON
4036
            0x1D200,  // 1D200..1D2FF; GREEK
4037
            0x1D300,  // 1D300..1EDFF; COMMON
4038
            0x1EE00,  // 1EE00..1EFFF; ARABIC
4039
            0x1F000,  // 1F000..1F1FF; COMMON
4040
            0x1F200,  // 1F200..1F200; HIRAGANA
4041
            0x1F201,  // 1F210..1FFFF; COMMON
4042
            0x20000,  // 20000..E0000; HAN
4043
            0xE0001,  // E0001..E00FF; COMMON
4044
            0xE0100,  // E0100..E01EF; INHERITED
4045
            0xE01F0   // E01F0..10FFFF; UNKNOWN
4046

4047
        };
4048

4049
        private static final UnicodeScript[] scripts = {
4050
            COMMON,
4051
            LATIN,
4052
            COMMON,
4053
            LATIN,
4054
            COMMON,
4055
            LATIN,
4056
            COMMON,
4057
            LATIN,
4058
            COMMON,
4059
            LATIN,
4060
            COMMON,
4061
            LATIN,
4062
            COMMON,
4063
            LATIN,
4064
            COMMON,
4065
            LATIN,
4066
            COMMON,
4067
            BOPOMOFO,
4068
            COMMON,
4069
            INHERITED,
4070
            GREEK,
4071
            COMMON,
4072
            GREEK,
4073
            COMMON,
4074
            GREEK,
4075
            COMMON,
4076
            GREEK,
4077
            COMMON,
4078
            GREEK,
4079
            COPTIC,
4080
            GREEK,
4081
            CYRILLIC,
4082
            INHERITED,
4083
            CYRILLIC,
4084
            ARMENIAN,
4085
            COMMON,
4086
            ARMENIAN,
4087
            HEBREW,
4088
            ARABIC,
4089
            COMMON,
4090
            ARABIC,
4091
            COMMON,
4092
            ARABIC,
4093
            COMMON,
4094
            ARABIC,
4095
            COMMON,
4096
            ARABIC,
4097
            INHERITED,
4098
            ARABIC,
4099
            COMMON,
4100
            ARABIC,
4101
            INHERITED,
4102
            ARABIC,
4103
            COMMON,
4104
            ARABIC,
4105
            SYRIAC,
4106
            ARABIC,
4107
            THAANA,
4108
            NKO,
4109
            SAMARITAN,
4110
            MANDAIC,
4111
            ARABIC,
4112
            DEVANAGARI,
4113
            INHERITED,
4114
            DEVANAGARI,
4115
            COMMON,
4116
            DEVANAGARI,
4117
            BENGALI,
4118
            GURMUKHI,
4119
            GUJARATI,
4120
            ORIYA,
4121
            TAMIL,
4122
            TELUGU,
4123
            KANNADA,
4124
            MALAYALAM,
4125
            SINHALA,
4126
            THAI,
4127
            COMMON,
4128
            THAI,
4129
            LAO,
4130
            TIBETAN,
4131
            COMMON,
4132
            TIBETAN,
4133
            MYANMAR,
4134
            GEORGIAN,
4135
            COMMON,
4136
            GEORGIAN,
4137
            HANGUL,
4138
            ETHIOPIC,
4139
            CHEROKEE,
4140
            CANADIAN_ABORIGINAL,
4141
            OGHAM,
4142
            RUNIC,
4143
            COMMON,
4144
            RUNIC,
4145
            TAGALOG,
4146
            HANUNOO,
4147
            COMMON,
4148
            BUHID,
4149
            TAGBANWA,
4150
            KHMER,
4151
            MONGOLIAN,
4152
            COMMON,
4153
            MONGOLIAN,
4154
            COMMON,
4155
            MONGOLIAN,
4156
            CANADIAN_ABORIGINAL,
4157
            LIMBU,
4158
            TAI_LE,
4159
            NEW_TAI_LUE,
4160
            KHMER,
4161
            BUGINESE,
4162
            TAI_THAM,
4163
            BALINESE,
4164
            SUNDANESE,
4165
            BATAK,
4166
            LEPCHA,
4167
            OL_CHIKI,
4168
            SUNDANESE,
4169
            INHERITED,
4170
            COMMON,
4171
            INHERITED,
4172
            COMMON,
4173
            INHERITED,
4174
            COMMON,
4175
            INHERITED,
4176
            COMMON,
4177
            INHERITED,
4178
            COMMON,
4179
            LATIN,
4180
            GREEK,
4181
            CYRILLIC,
4182
            LATIN,
4183
            GREEK,
4184
            LATIN,
4185
            GREEK,
4186
            LATIN,
4187
            CYRILLIC,
4188
            LATIN,
4189
            GREEK,
4190
            INHERITED,
4191
            LATIN,
4192
            GREEK,
4193
            COMMON,
4194
            INHERITED,
4195
            COMMON,
4196
            LATIN,
4197
            COMMON,
4198
            LATIN,
4199
            COMMON,
4200
            LATIN,
4201
            COMMON,
4202
            INHERITED,
4203
            COMMON,
4204
            GREEK,
4205
            COMMON,
4206
            LATIN,
4207
            COMMON,
4208
            LATIN,
4209
            COMMON,
4210
            LATIN,
4211
            COMMON,
4212
            LATIN,
4213
            COMMON,
4214
            BRAILLE,
4215
            COMMON,
4216
            GLAGOLITIC,
4217
            LATIN,
4218
            COPTIC,
4219
            GEORGIAN,
4220
            TIFINAGH,
4221
            ETHIOPIC,
4222
            CYRILLIC,
4223
            COMMON,
4224
            HAN,
4225
            COMMON,
4226
            HAN,
4227
            COMMON,
4228
            HAN,
4229
            COMMON,
4230
            HAN,
4231
            INHERITED,
4232
            HANGUL,
4233
            COMMON,
4234
            HAN,
4235
            COMMON,
4236
            HIRAGANA,
4237
            INHERITED,
4238
            COMMON,
4239
            HIRAGANA,
4240
            COMMON,
4241
            KATAKANA,
4242
            COMMON,
4243
            KATAKANA,
4244
            BOPOMOFO,
4245
            HANGUL,
4246
            COMMON,
4247
            BOPOMOFO,
4248
            COMMON,
4249
            KATAKANA,
4250
            HANGUL,
4251
            COMMON,
4252
            HANGUL,
4253
            COMMON,
4254
            KATAKANA,  // 32D0..32FE
4255
            COMMON,    // 32FF
4256
            KATAKANA,  // 3300..3357
4257
            COMMON,
4258
            HAN,
4259
            COMMON,
4260
            HAN,
4261
            YI,
4262
            LISU,
4263
            VAI,
4264
            CYRILLIC,
4265
            BAMUM,
4266
            COMMON,
4267
            LATIN,
4268
            COMMON,
4269
            LATIN,
4270
            SYLOTI_NAGRI,
4271
            COMMON,
4272
            PHAGS_PA,
4273
            SAURASHTRA,
4274
            DEVANAGARI,
4275
            KAYAH_LI,
4276
            REJANG,
4277
            HANGUL,
4278
            JAVANESE,
4279
            CHAM,
4280
            MYANMAR,
4281
            TAI_VIET,
4282
            MEETEI_MAYEK,
4283
            ETHIOPIC,
4284
            MEETEI_MAYEK,
4285
            HANGUL,
4286
            UNKNOWN     ,
4287
            HAN,
4288
            LATIN,
4289
            ARMENIAN,
4290
            HEBREW,
4291
            ARABIC,
4292
            COMMON,
4293
            ARABIC,
4294
            COMMON,
4295
            INHERITED,
4296
            COMMON,
4297
            INHERITED,
4298
            COMMON,
4299
            ARABIC,
4300
            COMMON,
4301
            LATIN,
4302
            COMMON,
4303
            LATIN,
4304
            COMMON,
4305
            KATAKANA,
4306
            COMMON,
4307
            KATAKANA,
4308
            COMMON,
4309
            HANGUL,
4310
            COMMON,
4311
            LINEAR_B,
4312
            COMMON,
4313
            GREEK,
4314
            COMMON,
4315
            INHERITED,
4316
            LYCIAN,
4317
            CARIAN,
4318
            OLD_ITALIC,
4319
            GOTHIC,
4320
            UGARITIC,
4321
            OLD_PERSIAN,
4322
            DESERET,
4323
            SHAVIAN,
4324
            OSMANYA,
4325
            CYPRIOT,
4326
            IMPERIAL_ARAMAIC,
4327
            PHOENICIAN,
4328
            LYDIAN,
4329
            MEROITIC_HIEROGLYPHS,
4330
            MEROITIC_CURSIVE,
4331
            KHAROSHTHI,
4332
            OLD_SOUTH_ARABIAN,
4333
            AVESTAN,
4334
            INSCRIPTIONAL_PARTHIAN,
4335
            INSCRIPTIONAL_PAHLAVI,
4336
            OLD_TURKIC,
4337
            ARABIC,
4338
            BRAHMI,
4339
            KAITHI,
4340
            SORA_SOMPENG,
4341
            CHAKMA,
4342
            SHARADA,
4343
            TAKRI,
4344
            CUNEIFORM,
4345
            EGYPTIAN_HIEROGLYPHS,
4346
            BAMUM,
4347
            MIAO,
4348
            KATAKANA,
4349
            HIRAGANA,
4350
            COMMON,
4351
            INHERITED,
4352
            COMMON,
4353
            INHERITED,
4354
            COMMON,
4355
            INHERITED,
4356
            COMMON,
4357
            INHERITED,
4358
            COMMON,
4359
            GREEK,
4360
            COMMON,
4361
            ARABIC,
4362
            COMMON,
4363
            HIRAGANA,
4364
            COMMON,
4365
            HAN,
4366
            COMMON,
4367
            INHERITED,
4368
            UNKNOWN
4369
        };
4370

4371
        private static HashMap<String, Character.UnicodeScript> aliases;
4372
        static {
4373
            aliases = new HashMap<>(128);
4374
            aliases.put("ARAB", ARABIC);
4375
            aliases.put("ARMI", IMPERIAL_ARAMAIC);
4376
            aliases.put("ARMN", ARMENIAN);
4377
            aliases.put("AVST", AVESTAN);
4378
            aliases.put("BALI", BALINESE);
4379
            aliases.put("BAMU", BAMUM);
4380
            aliases.put("BATK", BATAK);
4381
            aliases.put("BENG", BENGALI);
4382
            aliases.put("BOPO", BOPOMOFO);
4383
            aliases.put("BRAI", BRAILLE);
4384
            aliases.put("BRAH", BRAHMI);
4385
            aliases.put("BUGI", BUGINESE);
4386
            aliases.put("BUHD", BUHID);
4387
            aliases.put("CAKM", CHAKMA);
4388
            aliases.put("CANS", CANADIAN_ABORIGINAL);
4389
            aliases.put("CARI", CARIAN);
4390
            aliases.put("CHAM", CHAM);
4391
            aliases.put("CHER", CHEROKEE);
4392
            aliases.put("COPT", COPTIC);
4393
            aliases.put("CPRT", CYPRIOT);
4394
            aliases.put("CYRL", CYRILLIC);
4395
            aliases.put("DEVA", DEVANAGARI);
4396
            aliases.put("DSRT", DESERET);
4397
            aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);
4398
            aliases.put("ETHI", ETHIOPIC);
4399
            aliases.put("GEOR", GEORGIAN);
4400
            aliases.put("GLAG", GLAGOLITIC);
4401
            aliases.put("GOTH", GOTHIC);
4402
            aliases.put("GREK", GREEK);
4403
            aliases.put("GUJR", GUJARATI);
4404
            aliases.put("GURU", GURMUKHI);
4405
            aliases.put("HANG", HANGUL);
4406
            aliases.put("HANI", HAN);
4407
            aliases.put("HANO", HANUNOO);
4408
            aliases.put("HEBR", HEBREW);
4409
            aliases.put("HIRA", HIRAGANA);
4410
            // it appears we don't have the KATAKANA_OR_HIRAGANA
4411
            //aliases.put("HRKT", KATAKANA_OR_HIRAGANA);
4412
            aliases.put("ITAL", OLD_ITALIC);
4413
            aliases.put("JAVA", JAVANESE);
4414
            aliases.put("KALI", KAYAH_LI);
4415
            aliases.put("KANA", KATAKANA);
4416
            aliases.put("KHAR", KHAROSHTHI);
4417
            aliases.put("KHMR", KHMER);
4418
            aliases.put("KNDA", KANNADA);
4419
            aliases.put("KTHI", KAITHI);
4420
            aliases.put("LANA", TAI_THAM);
4421
            aliases.put("LAOO", LAO);
4422
            aliases.put("LATN", LATIN);
4423
            aliases.put("LEPC", LEPCHA);
4424
            aliases.put("LIMB", LIMBU);
4425
            aliases.put("LINB", LINEAR_B);
4426
            aliases.put("LISU", LISU);
4427
            aliases.put("LYCI", LYCIAN);
4428
            aliases.put("LYDI", LYDIAN);
4429
            aliases.put("MAND", MANDAIC);
4430
            aliases.put("MERC", MEROITIC_CURSIVE);
4431
            aliases.put("MERO", MEROITIC_HIEROGLYPHS);
4432
            aliases.put("MLYM", MALAYALAM);
4433
            aliases.put("MONG", MONGOLIAN);
4434
            aliases.put("MTEI", MEETEI_MAYEK);
4435
            aliases.put("MYMR", MYANMAR);
4436
            aliases.put("NKOO", NKO);
4437
            aliases.put("OGAM", OGHAM);
4438
            aliases.put("OLCK", OL_CHIKI);
4439
            aliases.put("ORKH", OLD_TURKIC);
4440
            aliases.put("ORYA", ORIYA);
4441
            aliases.put("OSMA", OSMANYA);
4442
            aliases.put("PHAG", PHAGS_PA);
4443
            aliases.put("PLRD", MIAO);
4444
            aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);
4445
            aliases.put("PHNX", PHOENICIAN);
4446
            aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
4447
            aliases.put("RJNG", REJANG);
4448
            aliases.put("RUNR", RUNIC);
4449
            aliases.put("SAMR", SAMARITAN);
4450
            aliases.put("SARB", OLD_SOUTH_ARABIAN);
4451
            aliases.put("SAUR", SAURASHTRA);
4452
            aliases.put("SHAW", SHAVIAN);
4453
            aliases.put("SHRD", SHARADA);
4454
            aliases.put("SINH", SINHALA);
4455
            aliases.put("SORA", SORA_SOMPENG);
4456
            aliases.put("SUND", SUNDANESE);
4457
            aliases.put("SYLO", SYLOTI_NAGRI);
4458
            aliases.put("SYRC", SYRIAC);
4459
            aliases.put("TAGB", TAGBANWA);
4460
            aliases.put("TALE", TAI_LE);
4461
            aliases.put("TAKR", TAKRI);
4462
            aliases.put("TALU", NEW_TAI_LUE);
4463
            aliases.put("TAML", TAMIL);
4464
            aliases.put("TAVT", TAI_VIET);
4465
            aliases.put("TELU", TELUGU);
4466
            aliases.put("TFNG", TIFINAGH);
4467
            aliases.put("TGLG", TAGALOG);
4468
            aliases.put("THAA", THAANA);
4469
            aliases.put("THAI", THAI);
4470
            aliases.put("TIBT", TIBETAN);
4471
            aliases.put("UGAR", UGARITIC);
4472
            aliases.put("VAII", VAI);
4473
            aliases.put("XPEO", OLD_PERSIAN);
4474
            aliases.put("XSUX", CUNEIFORM);
4475
            aliases.put("YIII", YI);
4476
            aliases.put("ZINH", INHERITED);
4477
            aliases.put("ZYYY", COMMON);
4478
            aliases.put("ZZZZ", UNKNOWN);
4479
        }
4480

4481
        /**
4482
         * Returns the enum constant representing the Unicode script of which
4483
         * the given character (Unicode code point) is assigned to.
4484
         *
4485
         * @param   codePoint the character (Unicode code point) in question.
4486
         * @return  The {@code UnicodeScript} constant representing the
4487
         *          Unicode script of which this character is assigned to.
4488
         *
4489
         * @exception IllegalArgumentException if the specified
4490
         * {@code codePoint} is an invalid Unicode code point.
4491
         * @see Character#isValidCodePoint(int)
4492
         *
4493
         */
4494
        public static UnicodeScript of(int codePoint) {
4495
            if (!isValidCodePoint(codePoint))
4496
                throw new IllegalArgumentException();
4497
            int type = getType(codePoint);
4498
            // leave SURROGATE and PRIVATE_USE for table lookup
4499
            if (type == UNASSIGNED)
4500
                return UNKNOWN;
4501
            int index = Arrays.binarySearch(scriptStarts, codePoint);
4502
            if (index < 0)
4503
                index = -index - 2;
4504
            return scripts[index];
4505
        }
4506

4507
        /**
4508
         * Returns the UnicodeScript constant with the given Unicode script
4509
         * name or the script name alias. Script names and their aliases are
4510
         * determined by The Unicode Standard. The files Scripts&lt;version&gt;.txt
4511
         * and PropertyValueAliases&lt;version&gt;.txt define script names
4512
         * and the script name aliases for a particular version of the
4513
         * standard. The {@link Character} class specifies the version of
4514
         * the standard that it supports.
4515
         * <p>
4516
         * Character case is ignored for all of the valid script names.
4517
         * The en_US locale's case mapping rules are used to provide
4518
         * case-insensitive string comparisons for script name validation.
4519
         * <p>
4520
         *
4521
         * @param scriptName A {@code UnicodeScript} name.
4522
         * @return The {@code UnicodeScript} constant identified
4523
         *         by {@code scriptName}
4524
         * @throws IllegalArgumentException if {@code scriptName} is an
4525
         *         invalid name
4526
         * @throws NullPointerException if {@code scriptName} is null
4527
         */
4528
        public static final UnicodeScript forName(String scriptName) {
4529
            scriptName = scriptName.toUpperCase(Locale.ENGLISH);
4530
                                 //.replace(' ', '_'));
4531
            UnicodeScript sc = aliases.get(scriptName);
4532
            if (sc != null)
4533
                return sc;
4534
            return valueOf(scriptName);
4535
        }
4536
    }
4537

4538
    /**
4539
     * The value of the {@code Character}.
4540
     *
4541
     * @serial
4542
     */
4543
    private final char value;
4544

4545
    /** use serialVersionUID from JDK 1.0.2 for interoperability */
4546
    private static final long serialVersionUID = 3786198910865385080L;
4547

4548
    /**
4549
     * Constructs a newly allocated {@code Character} object that
4550
     * represents the specified {@code char} value.
4551
     *
4552
     * @param  value   the value to be represented by the
4553
     *                  {@code Character} object.
4554
     */
4555
    public Character(char value) {
4556
        this.value = value;
4557
    }
4558

4559
    private static class CharacterCache {
4560
        private CharacterCache(){}
4561

4562
        static final Character cache[] = new Character[127 + 1];
4563

4564
        static {
4565
            for (int i = 0; i < cache.length; i++)
4566
                cache[i] = new Character((char)i);
4567
        }
4568
    }
4569

4570
    /**
4571
     * Returns a <tt>Character</tt> instance representing the specified
4572
     * <tt>char</tt> value.
4573
     * If a new <tt>Character</tt> instance is not required, this method
4574
     * should generally be used in preference to the constructor
4575
     * {@link #Character(char)}, as this method is likely to yield
4576
     * significantly better space and time performance by caching
4577
     * frequently requested values.
4578
     *
4579
     * This method will always cache values in the range {@code
4580
     * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
4581
     * cache other values outside of this range.
4582
     *
4583
     * @param  c a char value.
4584
     * @return a <tt>Character</tt> instance representing <tt>c</tt>.
4585
     * @since  1.5
4586
     */
4587
    public static Character valueOf(char c) {
4588
        if (c <= 127) { // must cache
4589
            return CharacterCache.cache[(int)c];
4590
        }
4591
        return new Character(c);
4592
    }
4593

4594
    /**
4595
     * Returns the value of this {@code Character} object.
4596
     * @return  the primitive {@code char} value represented by
4597
     *          this object.
4598
     */
4599
    public char charValue() {
4600
        return value;
4601
    }
4602

4603
    /**
4604
     * Returns a hash code for this {@code Character}; equal to the result
4605
     * of invoking {@code charValue()}.
4606
     *
4607
     * @return a hash code value for this {@code Character}
4608
     */
4609
    @Override
4610
    public int hashCode() {
4611
        return Character.hashCode(value);
4612
    }
4613

4614
    /**
4615
     * Returns a hash code for a {@code char} value; compatible with
4616
     * {@code Character.hashCode()}.
4617
     *
4618
     * @since 1.8
4619
     *
4620
     * @param value The {@code char} for which to return a hash code.
4621
     * @return a hash code value for a {@code char} value.
4622
     */
4623
    public static int hashCode(char value) {
4624
        return (int)value;
4625
    }
4626

4627
    /**
4628
     * Compares this object against the specified object.
4629
     * The result is {@code true} if and only if the argument is not
4630
     * {@code null} and is a {@code Character} object that
4631
     * represents the same {@code char} value as this object.
4632
     *
4633
     * @param   obj   the object to compare with.
4634
     * @return  {@code true} if the objects are the same;
4635
     *          {@code false} otherwise.
4636
     */
4637
    public boolean equals(Object obj) {
4638
        if (obj instanceof Character) {
4639
            return value == ((Character)obj).charValue();
4640
        }
4641
        return false;
4642
    }
4643

4644
    /**
4645
     * Returns a {@code String} object representing this
4646
     * {@code Character}'s value.  The result is a string of
4647
     * length 1 whose sole component is the primitive
4648
     * {@code char} value represented by this
4649
     * {@code Character} object.
4650
     *
4651
     * @return  a string representation of this object.
4652
     */
4653
    public String toString() {
4654
        char buf[] = {value};
4655
        return String.valueOf(buf);
4656
    }
4657

4658
    /**
4659
     * Returns a {@code String} object representing the
4660
     * specified {@code char}.  The result is a string of length
4661
     * 1 consisting solely of the specified {@code char}.
4662
     *
4663
     * @param c the {@code char} to be converted
4664
     * @return the string representation of the specified {@code char}
4665
     * @since 1.4
4666
     */
4667
    public static String toString(char c) {
4668
        return String.valueOf(c);
4669
    }
4670

4671
    /**
4672
     * Determines whether the specified code point is a valid
4673
     * <a href="http://www.unicode.org/glossary/#code_point">
4674
     * Unicode code point value</a>.
4675
     *
4676
     * @param  codePoint the Unicode code point to be tested
4677
     * @return {@code true} if the specified code point value is between
4678
     *         {@link #MIN_CODE_POINT} and
4679
     *         {@link #MAX_CODE_POINT} inclusive;
4680
     *         {@code false} otherwise.
4681
     * @since  1.5
4682
     */
4683
    public static boolean isValidCodePoint(int codePoint) {
4684
        // Optimized form of:
4685
        //     codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
4686
        int plane = codePoint >>> 16;
4687
        return plane < ((MAX_CODE_POINT + 1) >>> 16);
4688
    }
4689

4690
    /**
4691
     * Determines whether the specified character (Unicode code point)
4692
     * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
4693
     * Such code points can be represented using a single {@code char}.
4694
     *
4695
     * @param  codePoint the character (Unicode code point) to be tested
4696
     * @return {@code true} if the specified code point is between
4697
     *         {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
4698
     *         {@code false} otherwise.
4699
     * @since  1.7
4700
     */
4701
    public static boolean isBmpCodePoint(int codePoint) {
4702
        return codePoint >>> 16 == 0;
4703
        // Optimized form of:
4704
        //     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
4705
        // We consistently use logical shift (>>>) to facilitate
4706
        // additional runtime optimizations.
4707
    }
4708

4709
    /**
4710
     * Determines whether the specified character (Unicode code point)
4711
     * is in the <a href="#supplementary">supplementary character</a> range.
4712
     *
4713
     * @param  codePoint the character (Unicode code point) to be tested
4714
     * @return {@code true} if the specified code point is between
4715
     *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
4716
     *         {@link #MAX_CODE_POINT} inclusive;
4717
     *         {@code false} otherwise.
4718
     * @since  1.5
4719
     */
4720
    public static boolean isSupplementaryCodePoint(int codePoint) {
4721
        return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
4722
            && codePoint <  MAX_CODE_POINT + 1;
4723
    }
4724

4725
    /**
4726
     * Determines if the given {@code char} value is a
4727
     * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
4728
     * Unicode high-surrogate code unit</a>
4729
     * (also known as <i>leading-surrogate code unit</i>).
4730
     *
4731
     * <p>Such values do not represent characters by themselves,
4732
     * but are used in the representation of
4733
     * <a href="#supplementary">supplementary characters</a>
4734
     * in the UTF-16 encoding.
4735
     *
4736
     * @param  ch the {@code char} value to be tested.
4737
     * @return {@code true} if the {@code char} value is between
4738
     *         {@link #MIN_HIGH_SURROGATE} and
4739
     *         {@link #MAX_HIGH_SURROGATE} inclusive;
4740
     *         {@code false} otherwise.
4741
     * @see    Character#isLowSurrogate(char)
4742
     * @see    Character.UnicodeBlock#of(int)
4743
     * @since  1.5
4744
     */
4745
    public static boolean isHighSurrogate(char ch) {
4746
        // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
4747
        return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
4748
    }
4749

4750
    /**
4751
     * Determines if the given {@code char} value is a
4752
     * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
4753
     * Unicode low-surrogate code unit</a>
4754
     * (also known as <i>trailing-surrogate code unit</i>).
4755
     *
4756
     * <p>Such values do not represent characters by themselves,
4757
     * but are used in the representation of
4758
     * <a href="#supplementary">supplementary characters</a>
4759
     * in the UTF-16 encoding.
4760
     *
4761
     * @param  ch the {@code char} value to be tested.
4762
     * @return {@code true} if the {@code char} value is between
4763
     *         {@link #MIN_LOW_SURROGATE} and
4764
     *         {@link #MAX_LOW_SURROGATE} inclusive;
4765
     *         {@code false} otherwise.
4766
     * @see    Character#isHighSurrogate(char)
4767
     * @since  1.5
4768
     */
4769
    public static boolean isLowSurrogate(char ch) {
4770
        return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
4771
    }
4772

4773
    /**
4774
     * Determines if the given {@code char} value is a Unicode
4775
     * <i>surrogate code unit</i>.
4776
     *
4777
     * <p>Such values do not represent characters by themselves,
4778
     * but are used in the representation of
4779
     * <a href="#supplementary">supplementary characters</a>
4780
     * in the UTF-16 encoding.
4781
     *
4782
     * <p>A char value is a surrogate code unit if and only if it is either
4783
     * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
4784
     * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
4785
     *
4786
     * @param  ch the {@code char} value to be tested.
4787
     * @return {@code true} if the {@code char} value is between
4788
     *         {@link #MIN_SURROGATE} and
4789
     *         {@link #MAX_SURROGATE} inclusive;
4790
     *         {@code false} otherwise.
4791
     * @since  1.7
4792
     */
4793
    public static boolean isSurrogate(char ch) {
4794
        return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
4795
    }
4796

4797
    /**
4798
     * Determines whether the specified pair of {@code char}
4799
     * values is a valid
4800
     * <a href="http://www.unicode.org/glossary/#surrogate_pair">
4801
     * Unicode surrogate pair</a>.
4802

4803
     * <p>This method is equivalent to the expression:
4804
     * <blockquote><pre>{@code
4805
     * isHighSurrogate(high) && isLowSurrogate(low)
4806
     * }</pre></blockquote>
4807
     *
4808
     * @param  high the high-surrogate code value to be tested
4809
     * @param  low the low-surrogate code value to be tested
4810
     * @return {@code true} if the specified high and
4811
     * low-surrogate code values represent a valid surrogate pair;
4812
     * {@code false} otherwise.
4813
     * @since  1.5
4814
     */
4815
    public static boolean isSurrogatePair(char high, char low) {
4816
        return isHighSurrogate(high) && isLowSurrogate(low);
4817
    }
4818

4819
    /**
4820
     * Determines the number of {@code char} values needed to
4821
     * represent the specified character (Unicode code point). If the
4822
     * specified character is equal to or greater than 0x10000, then
4823
     * the method returns 2. Otherwise, the method returns 1.
4824
     *
4825
     * <p>This method doesn't validate the specified character to be a
4826
     * valid Unicode code point. The caller must validate the
4827
     * character value using {@link #isValidCodePoint(int) isValidCodePoint}
4828
     * if necessary.
4829
     *
4830
     * @param   codePoint the character (Unicode code point) to be tested.
4831
     * @return  2 if the character is a valid supplementary character; 1 otherwise.
4832
     * @see     Character#isSupplementaryCodePoint(int)
4833
     * @since   1.5
4834
     */
4835
    public static int charCount(int codePoint) {
4836
        return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
4837
    }
4838

4839
    /**
4840
     * Converts the specified surrogate pair to its supplementary code
4841
     * point value. This method does not validate the specified
4842
     * surrogate pair. The caller must validate it using {@link
4843
     * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
4844
     *
4845
     * @param  high the high-surrogate code unit
4846
     * @param  low the low-surrogate code unit
4847
     * @return the supplementary code point composed from the
4848
     *         specified surrogate pair.
4849
     * @since  1.5
4850
     */
4851
    public static int toCodePoint(char high, char low) {
4852
        // Optimized form of:
4853
        // return ((high - MIN_HIGH_SURROGATE) << 10)
4854
        //         + (low - MIN_LOW_SURROGATE)
4855
        //         + MIN_SUPPLEMENTARY_CODE_POINT;
4856
        return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
4857
                                       - (MIN_HIGH_SURROGATE << 10)
4858
                                       - MIN_LOW_SURROGATE);
4859
    }
4860

4861
    /**
4862
     * Returns the code point at the given index of the
4863
     * {@code CharSequence}. If the {@code char} value at
4864
     * the given index in the {@code CharSequence} is in the
4865
     * high-surrogate range, the following index is less than the
4866
     * length of the {@code CharSequence}, and the
4867
     * {@code char} value at the following index is in the
4868
     * low-surrogate range, then the supplementary code point
4869
     * corresponding to this surrogate pair is returned. Otherwise,
4870
     * the {@code char} value at the given index is returned.
4871
     *
4872
     * @param seq a sequence of {@code char} values (Unicode code
4873
     * units)
4874
     * @param index the index to the {@code char} values (Unicode
4875
     * code units) in {@code seq} to be converted
4876
     * @return the Unicode code point at the given index
4877
     * @exception NullPointerException if {@code seq} is null.
4878
     * @exception IndexOutOfBoundsException if the value
4879
     * {@code index} is negative or not less than
4880
     * {@link CharSequence#length() seq.length()}.
4881
     * @since  1.5
4882
     */
4883
    public static int codePointAt(CharSequence seq, int index) {
4884
        char c1 = seq.charAt(index);
4885
        if (isHighSurrogate(c1) && ++index < seq.length()) {
4886
            char c2 = seq.charAt(index);
4887
            if (isLowSurrogate(c2)) {
4888
                return toCodePoint(c1, c2);
4889
            }
4890
        }
4891
        return c1;
4892
    }
4893

4894
    /**
4895
     * Returns the code point at the given index of the
4896
     * {@code char} array. If the {@code char} value at
4897
     * the given index in the {@code char} array is in the
4898
     * high-surrogate range, the following index is less than the
4899
     * length of the {@code char} array, and the
4900
     * {@code char} value at the following index is in the
4901
     * low-surrogate range, then the supplementary code point
4902
     * corresponding to this surrogate pair is returned. Otherwise,
4903
     * the {@code char} value at the given index is returned.
4904
     *
4905
     * @param a the {@code char} array
4906
     * @param index the index to the {@code char} values (Unicode
4907
     * code units) in the {@code char} array to be converted
4908
     * @return the Unicode code point at the given index
4909
     * @exception NullPointerException if {@code a} is null.
4910
     * @exception IndexOutOfBoundsException if the value
4911
     * {@code index} is negative or not less than
4912
     * the length of the {@code char} array.
4913
     * @since  1.5
4914
     */
4915
    public static int codePointAt(char[] a, int index) {
4916
        return codePointAtImpl(a, index, a.length);
4917
    }
4918

4919
    /**
4920
     * Returns the code point at the given index of the
4921
     * {@code char} array, where only array elements with
4922
     * {@code index} less than {@code limit} can be used. If
4923
     * the {@code char} value at the given index in the
4924
     * {@code char} array is in the high-surrogate range, the
4925
     * following index is less than the {@code limit}, and the
4926
     * {@code char} value at the following index is in the
4927
     * low-surrogate range, then the supplementary code point
4928
     * corresponding to this surrogate pair is returned. Otherwise,
4929
     * the {@code char} value at the given index is returned.
4930
     *
4931
     * @param a the {@code char} array
4932
     * @param index the index to the {@code char} values (Unicode
4933
     * code units) in the {@code char} array to be converted
4934
     * @param limit the index after the last array element that
4935
     * can be used in the {@code char} array
4936
     * @return the Unicode code point at the given index
4937
     * @exception NullPointerException if {@code a} is null.
4938
     * @exception IndexOutOfBoundsException if the {@code index}
4939
     * argument is negative or not less than the {@code limit}
4940
     * argument, or if the {@code limit} argument is negative or
4941
     * greater than the length of the {@code char} array.
4942
     * @since  1.5
4943
     */
4944
    public static int codePointAt(char[] a, int index, int limit) {
4945
        if (index >= limit || limit < 0 || limit > a.length) {
4946
            throw new IndexOutOfBoundsException();
4947
        }
4948
        return codePointAtImpl(a, index, limit);
4949
    }
4950

4951
    // throws ArrayIndexOutOfBoundsException if index out of bounds
4952
    static int codePointAtImpl(char[] a, int index, int limit) {
4953
        char c1 = a[index];
4954
        if (isHighSurrogate(c1) && ++index < limit) {
4955
            char c2 = a[index];
4956
            if (isLowSurrogate(c2)) {
4957
                return toCodePoint(c1, c2);
4958
            }
4959
        }
4960
        return c1;
4961
    }
4962

4963
    /**
4964
     * Returns the code point preceding the given index of the
4965
     * {@code CharSequence}. If the {@code char} value at
4966
     * {@code (index - 1)} in the {@code CharSequence} is in
4967
     * the low-surrogate range, {@code (index - 2)} is not
4968
     * negative, and the {@code char} value at {@code (index - 2)}
4969
     * in the {@code CharSequence} is in the
4970
     * high-surrogate range, then the supplementary code point
4971
     * corresponding to this surrogate pair is returned. Otherwise,
4972
     * the {@code char} value at {@code (index - 1)} is
4973
     * returned.
4974
     *
4975
     * @param seq the {@code CharSequence} instance
4976
     * @param index the index following the code point that should be returned
4977
     * @return the Unicode code point value before the given index.
4978
     * @exception NullPointerException if {@code seq} is null.
4979
     * @exception IndexOutOfBoundsException if the {@code index}
4980
     * argument is less than 1 or greater than {@link
4981
     * CharSequence#length() seq.length()}.
4982
     * @since  1.5
4983
     */
4984
    public static int codePointBefore(CharSequence seq, int index) {
4985
        char c2 = seq.charAt(--index);
4986
        if (isLowSurrogate(c2) && index > 0) {
4987
            char c1 = seq.charAt(--index);
4988
            if (isHighSurrogate(c1)) {
4989
                return toCodePoint(c1, c2);
4990
            }
4991
        }
4992
        return c2;
4993
    }
4994

4995
    /**
4996
     * Returns the code point preceding the given index of the
4997
     * {@code char} array. If the {@code char} value at
4998
     * {@code (index - 1)} in the {@code char} array is in
4999
     * the low-surrogate range, {@code (index - 2)} is not
5000
     * negative, and the {@code char} value at {@code (index - 2)}
5001
     * in the {@code char} array is in the
5002
     * high-surrogate range, then the supplementary code point
5003
     * corresponding to this surrogate pair is returned. Otherwise,
5004
     * the {@code char} value at {@code (index - 1)} is
5005
     * returned.
5006
     *
5007
     * @param a the {@code char} array
5008
     * @param index the index following the code point that should be returned
5009
     * @return the Unicode code point value before the given index.
5010
     * @exception NullPointerException if {@code a} is null.
5011
     * @exception IndexOutOfBoundsException if the {@code index}
5012
     * argument is less than 1 or greater than the length of the
5013
     * {@code char} array
5014
     * @since  1.5
5015
     */
5016
    public static int codePointBefore(char[] a, int index) {
5017
        return codePointBeforeImpl(a, index, 0);
5018
    }
5019

5020
    /**
5021
     * Returns the code point preceding the given index of the
5022
     * {@code char} array, where only array elements with
5023
     * {@code index} greater than or equal to {@code start}
5024
     * can be used. If the {@code char} value at {@code (index - 1)}
5025
     * in the {@code char} array is in the
5026
     * low-surrogate range, {@code (index - 2)} is not less than
5027
     * {@code start}, and the {@code char} value at
5028
     * {@code (index - 2)} in the {@code char} array is in
5029
     * the high-surrogate range, then the supplementary code point
5030
     * corresponding to this surrogate pair is returned. Otherwise,
5031
     * the {@code char} value at {@code (index - 1)} is
5032
     * returned.
5033
     *
5034
     * @param a the {@code char} array
5035
     * @param index the index following the code point that should be returned
5036
     * @param start the index of the first array element in the
5037
     * {@code char} array
5038
     * @return the Unicode code point value before the given index.
5039
     * @exception NullPointerException if {@code a} is null.
5040
     * @exception IndexOutOfBoundsException if the {@code index}
5041
     * argument is not greater than the {@code start} argument or
5042
     * is greater than the length of the {@code char} array, or
5043
     * if the {@code start} argument is negative or not less than
5044
     * the length of the {@code char} array.
5045
     * @since  1.5
5046
     */
5047
    public static int codePointBefore(char[] a, int index, int start) {
5048
        if (index <= start || start < 0 || start >= a.length) {
5049
            throw new IndexOutOfBoundsException();
5050
        }
5051
        return codePointBeforeImpl(a, index, start);
5052
    }
5053

5054
    // throws ArrayIndexOutOfBoundsException if index-1 out of bounds
5055
    static int codePointBeforeImpl(char[] a, int index, int start) {
5056
        char c2 = a[--index];
5057
        if (isLowSurrogate(c2) && index > start) {
5058
            char c1 = a[--index];
5059
            if (isHighSurrogate(c1)) {
5060
                return toCodePoint(c1, c2);
5061
            }
5062
        }
5063
        return c2;
5064
    }
5065

5066
    /**
5067
     * Returns the leading surrogate (a
5068
     * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
5069
     * high surrogate code unit</a>) of the
5070
     * <a href="http://www.unicode.org/glossary/#surrogate_pair">
5071
     * surrogate pair</a>
5072
     * representing the specified supplementary character (Unicode
5073
     * code point) in the UTF-16 encoding.  If the specified character
5074
     * is not a
5075
     * <a href="Character.html#supplementary">supplementary character</a>,
5076
     * an unspecified {@code char} is returned.
5077
     *
5078
     * <p>If
5079
     * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
5080
     * is {@code true}, then
5081
     * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
5082
     * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
5083
     * are also always {@code true}.
5084
     *
5085
     * @param   codePoint a supplementary character (Unicode code point)
5086
     * @return  the leading surrogate code unit used to represent the
5087
     *          character in the UTF-16 encoding
5088
     * @since   1.7
5089
     */
5090
    public static char highSurrogate(int codePoint) {
5091
        return (char) ((codePoint >>> 10)
5092
            + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
5093
    }
5094

5095
    /**
5096
     * Returns the trailing surrogate (a
5097
     * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
5098
     * low surrogate code unit</a>) of the
5099
     * <a href="http://www.unicode.org/glossary/#surrogate_pair">
5100
     * surrogate pair</a>
5101
     * representing the specified supplementary character (Unicode
5102
     * code point) in the UTF-16 encoding.  If the specified character
5103
     * is not a
5104
     * <a href="Character.html#supplementary">supplementary character</a>,
5105
     * an unspecified {@code char} is returned.
5106
     *
5107
     * <p>If
5108
     * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
5109
     * is {@code true}, then
5110
     * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
5111
     * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
5112
     * are also always {@code true}.
5113
     *
5114
     * @param   codePoint a supplementary character (Unicode code point)
5115
     * @return  the trailing surrogate code unit used to represent the
5116
     *          character in the UTF-16 encoding
5117
     * @since   1.7
5118
     */
5119
    public static char lowSurrogate(int codePoint) {
5120
        return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
5121
    }
5122

5123
    /**
5124
     * Converts the specified character (Unicode code point) to its
5125
     * UTF-16 representation. If the specified code point is a BMP
5126
     * (Basic Multilingual Plane or Plane 0) value, the same value is
5127
     * stored in {@code dst[dstIndex]}, and 1 is returned. If the
5128
     * specified code point is a supplementary character, its
5129
     * surrogate values are stored in {@code dst[dstIndex]}
5130
     * (high-surrogate) and {@code dst[dstIndex+1]}
5131
     * (low-surrogate), and 2 is returned.
5132
     *
5133
     * @param  codePoint the character (Unicode code point) to be converted.
5134
     * @param  dst an array of {@code char} in which the
5135
     * {@code codePoint}'s UTF-16 value is stored.
5136
     * @param dstIndex the start index into the {@code dst}
5137
     * array where the converted value is stored.
5138
     * @return 1 if the code point is a BMP code point, 2 if the
5139
     * code point is a supplementary code point.
5140
     * @exception IllegalArgumentException if the specified
5141
     * {@code codePoint} is not a valid Unicode code point.
5142
     * @exception NullPointerException if the specified {@code dst} is null.
5143
     * @exception IndexOutOfBoundsException if {@code dstIndex}
5144
     * is negative or not less than {@code dst.length}, or if
5145
     * {@code dst} at {@code dstIndex} doesn't have enough
5146
     * array element(s) to store the resulting {@code char}
5147
     * value(s). (If {@code dstIndex} is equal to
5148
     * {@code dst.length-1} and the specified
5149
     * {@code codePoint} is a supplementary character, the
5150
     * high-surrogate value is not stored in
5151
     * {@code dst[dstIndex]}.)
5152
     * @since  1.5
5153
     */
5154
    public static int toChars(int codePoint, char[] dst, int dstIndex) {
5155
        if (isBmpCodePoint(codePoint)) {
5156
            dst[dstIndex] = (char) codePoint;
5157
            return 1;
5158
        } else if (isValidCodePoint(codePoint)) {
5159
            toSurrogates(codePoint, dst, dstIndex);
5160
            return 2;
5161
        } else {
5162
            throw new IllegalArgumentException();
5163
        }
5164
    }
5165

5166
    /**
5167
     * Converts the specified character (Unicode code point) to its
5168
     * UTF-16 representation stored in a {@code char} array. If
5169
     * the specified code point is a BMP (Basic Multilingual Plane or
5170
     * Plane 0) value, the resulting {@code char} array has
5171
     * the same value as {@code codePoint}. If the specified code
5172
     * point is a supplementary code point, the resulting
5173
     * {@code char} array has the corresponding surrogate pair.
5174
     *
5175
     * @param  codePoint a Unicode code point
5176
     * @return a {@code char} array having
5177
     *         {@code codePoint}'s UTF-16 representation.
5178
     * @exception IllegalArgumentException if the specified
5179
     * {@code codePoint} is not a valid Unicode code point.
5180
     * @since  1.5
5181
     */
5182
    public static char[] toChars(int codePoint) {
5183
        if (isBmpCodePoint(codePoint)) {
5184
            return new char[] { (char) codePoint };
5185
        } else if (isValidCodePoint(codePoint)) {
5186
            char[] result = new char[2];
5187
            toSurrogates(codePoint, result, 0);
5188
            return result;
5189
        } else {
5190
            throw new IllegalArgumentException();
5191
        }
5192
    }
5193

5194
    static void toSurrogates(int codePoint, char[] dst, int index) {
5195
        // We write elements "backwards" to guarantee all-or-nothing
5196
        dst[index+1] = lowSurrogate(codePoint);
5197
        dst[index] = highSurrogate(codePoint);
5198
    }
5199

5200
    /**
5201
     * Returns the number of Unicode code points in the text range of
5202
     * the specified char sequence. The text range begins at the
5203
     * specified {@code beginIndex} and extends to the
5204
     * {@code char} at index {@code endIndex - 1}. Thus the
5205
     * length (in {@code char}s) of the text range is
5206
     * {@code endIndex-beginIndex}. Unpaired surrogates within
5207
     * the text range count as one code point each.
5208
     *
5209
     * @param seq the char sequence
5210
     * @param beginIndex the index to the first {@code char} of
5211
     * the text range.
5212
     * @param endIndex the index after the last {@code char} of
5213
     * the text range.
5214
     * @return the number of Unicode code points in the specified text
5215
     * range
5216
     * @exception NullPointerException if {@code seq} is null.
5217
     * @exception IndexOutOfBoundsException if the
5218
     * {@code beginIndex} is negative, or {@code endIndex}
5219
     * is larger than the length of the given sequence, or
5220
     * {@code beginIndex} is larger than {@code endIndex}.
5221
     * @since  1.5
5222
     */
5223
    public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
5224
        int length = seq.length();
5225
        if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
5226
            throw new IndexOutOfBoundsException();
5227
        }
5228
        int n = endIndex - beginIndex;
5229
        for (int i = beginIndex; i < endIndex; ) {
5230
            if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
5231
                isLowSurrogate(seq.charAt(i))) {
5232
                n--;
5233
                i++;
5234
            }
5235
        }
5236
        return n;
5237
    }
5238

5239
    /**
5240
     * Returns the number of Unicode code points in a subarray of the
5241
     * {@code char} array argument. The {@code offset}
5242
     * argument is the index of the first {@code char} of the
5243
     * subarray and the {@code count} argument specifies the
5244
     * length of the subarray in {@code char}s. Unpaired
5245
     * surrogates within the subarray count as one code point each.
5246
     *
5247
     * @param a the {@code char} array
5248
     * @param offset the index of the first {@code char} in the
5249
     * given {@code char} array
5250
     * @param count the length of the subarray in {@code char}s
5251
     * @return the number of Unicode code points in the specified subarray
5252
     * @exception NullPointerException if {@code a} is null.
5253
     * @exception IndexOutOfBoundsException if {@code offset} or
5254
     * {@code count} is negative, or if {@code offset +
5255
     * count} is larger than the length of the given array.
5256
     * @since  1.5
5257
     */
5258
    public static int codePointCount(char[] a, int offset, int count) {
5259
        if (count > a.length - offset || offset < 0 || count < 0) {
5260
            throw new IndexOutOfBoundsException();
5261
        }
5262
        return codePointCountImpl(a, offset, count);
5263
    }
5264

5265
    static int codePointCountImpl(char[] a, int offset, int count) {
5266
        int endIndex = offset + count;
5267
        int n = count;
5268
        for (int i = offset; i < endIndex; ) {
5269
            if (isHighSurrogate(a[i++]) && i < endIndex &&
5270
                isLowSurrogate(a[i])) {
5271
                n--;
5272
                i++;
5273
            }
5274
        }
5275
        return n;
5276
    }
5277

5278
    /**
5279
     * Returns the index within the given char sequence that is offset
5280
     * from the given {@code index} by {@code codePointOffset}
5281
     * code points. Unpaired surrogates within the text range given by
5282
     * {@code index} and {@code codePointOffset} count as
5283
     * one code point each.
5284
     *
5285
     * @param seq the char sequence
5286
     * @param index the index to be offset
5287
     * @param codePointOffset the offset in code points
5288
     * @return the index within the char sequence
5289
     * @exception NullPointerException if {@code seq} is null.
5290
     * @exception IndexOutOfBoundsException if {@code index}
5291
     *   is negative or larger then the length of the char sequence,
5292
     *   or if {@code codePointOffset} is positive and the
5293
     *   subsequence starting with {@code index} has fewer than
5294
     *   {@code codePointOffset} code points, or if
5295
     *   {@code codePointOffset} is negative and the subsequence
5296
     *   before {@code index} has fewer than the absolute value
5297
     *   of {@code codePointOffset} code points.
5298
     * @since 1.5
5299
     */
5300
    public static int offsetByCodePoints(CharSequence seq, int index,
5301
                                         int codePointOffset) {
5302
        int length = seq.length();
5303
        if (index < 0 || index > length) {
5304
            throw new IndexOutOfBoundsException();
5305
        }
5306

5307
        int x = index;
5308
        if (codePointOffset >= 0) {
5309
            int i;
5310
            for (i = 0; x < length && i < codePointOffset; i++) {
5311
                if (isHighSurrogate(seq.charAt(x++)) && x < length &&
5312
                    isLowSurrogate(seq.charAt(x))) {
5313
                    x++;
5314
                }
5315
            }
5316
            if (i < codePointOffset) {
5317
                throw new IndexOutOfBoundsException();
5318
            }
5319
        } else {
5320
            int i;
5321
            for (i = codePointOffset; x > 0 && i < 0; i++) {
5322
                if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
5323
                    isHighSurrogate(seq.charAt(x-1))) {
5324
                    x--;
5325
                }
5326
            }
5327
            if (i < 0) {
5328
                throw new IndexOutOfBoundsException();
5329
            }
5330
        }
5331
        return x;
5332
    }
5333

5334
    /**
5335
     * Returns the index within the given {@code char} subarray
5336
     * that is offset from the given {@code index} by
5337
     * {@code codePointOffset} code points. The
5338
     * {@code start} and {@code count} arguments specify a
5339
     * subarray of the {@code char} array. Unpaired surrogates
5340
     * within the text range given by {@code index} and
5341
     * {@code codePointOffset} count as one code point each.
5342
     *
5343
     * @param a the {@code char} array
5344
     * @param start the index of the first {@code char} of the
5345
     * subarray
5346
     * @param count the length of the subarray in {@code char}s
5347
     * @param index the index to be offset
5348
     * @param codePointOffset the offset in code points
5349
     * @return the index within the subarray
5350
     * @exception NullPointerException if {@code a} is null.
5351
     * @exception IndexOutOfBoundsException
5352
     *   if {@code start} or {@code count} is negative,
5353
     *   or if {@code start + count} is larger than the length of
5354
     *   the given array,
5355
     *   or if {@code index} is less than {@code start} or
5356
     *   larger then {@code start + count},
5357
     *   or if {@code codePointOffset} is positive and the text range
5358
     *   starting with {@code index} and ending with {@code start + count - 1}
5359
     *   has fewer than {@code codePointOffset} code
5360
     *   points,
5361
     *   or if {@code codePointOffset} is negative and the text range
5362
     *   starting with {@code start} and ending with {@code index - 1}
5363
     *   has fewer than the absolute value of
5364
     *   {@code codePointOffset} code points.
5365
     * @since 1.5
5366
     */
5367
    public static int offsetByCodePoints(char[] a, int start, int count,
5368
                                         int index, int codePointOffset) {
5369
        if (count > a.length-start || start < 0 || count < 0
5370
            || index < start || index > start+count) {
5371
            throw new IndexOutOfBoundsException();
5372
        }
5373
        return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
5374
    }
5375

5376
    static int offsetByCodePointsImpl(char[]a, int start, int count,
5377
                                      int index, int codePointOffset) {
5378
        int x = index;
5379
        if (codePointOffset >= 0) {
5380
            int limit = start + count;
5381
            int i;
5382
            for (i = 0; x < limit && i < codePointOffset; i++) {
5383
                if (isHighSurrogate(a[x++]) && x < limit &&
5384
                    isLowSurrogate(a[x])) {
5385
                    x++;
5386
                }
5387
            }
5388
            if (i < codePointOffset) {
5389
                throw new IndexOutOfBoundsException();
5390
            }
5391
        } else {
5392
            int i;
5393
            for (i = codePointOffset; x > start && i < 0; i++) {
5394
                if (isLowSurrogate(a[--x]) && x > start &&
5395
                    isHighSurrogate(a[x-1])) {
5396
                    x--;
5397
                }
5398
            }
5399
            if (i < 0) {
5400
                throw new IndexOutOfBoundsException();
5401
            }
5402
        }
5403
        return x;
5404
    }
5405

5406
    /**
5407
     * Determines if the specified character is a lowercase character.
5408
     * <p>
5409
     * A character is lowercase if its general category type, provided
5410
     * by {@code Character.getType(ch)}, is
5411
     * {@code LOWERCASE_LETTER}, or it has contributory property
5412
     * Other_Lowercase as defined by the Unicode Standard.
5413
     * <p>
5414
     * The following are examples of lowercase characters:
5415
     * <blockquote><pre>
5416
     * a b c d e f g h i j k l m n o p q r s t u v w x y z
5417
     * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
5418
     * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
5419
     * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
5420
     * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
5421
     * </pre></blockquote>
5422
     * <p> Many other Unicode characters are lowercase too.
5423
     *
5424
     * <p><b>Note:</b> This method cannot handle <a
5425
     * href="#supplementary"> supplementary characters</a>. To support
5426
     * all Unicode characters, including supplementary characters, use
5427
     * the {@link #isLowerCase(int)} method.
5428
     *
5429
     * @param   ch   the character to be tested.
5430
     * @return  {@code true} if the character is lowercase;
5431
     *          {@code false} otherwise.
5432
     * @see     Character#isLowerCase(char)
5433
     * @see     Character#isTitleCase(char)
5434
     * @see     Character#toLowerCase(char)
5435
     * @see     Character#getType(char)
5436
     */
5437
    public static boolean isLowerCase(char ch) {
5438
        return isLowerCase((int)ch);
5439
    }
5440

5441
    /**
5442
     * Determines if the specified character (Unicode code point) is a
5443
     * lowercase character.
5444
     * <p>
5445
     * A character is lowercase if its general category type, provided
5446
     * by {@link Character#getType getType(codePoint)}, is
5447
     * {@code LOWERCASE_LETTER}, or it has contributory property
5448
     * Other_Lowercase as defined by the Unicode Standard.
5449
     * <p>
5450
     * The following are examples of lowercase characters:
5451
     * <blockquote><pre>
5452
     * a b c d e f g h i j k l m n o p q r s t u v w x y z
5453
     * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
5454
     * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
5455
     * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
5456
     * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
5457
     * </pre></blockquote>
5458
     * <p> Many other Unicode characters are lowercase too.
5459
     *
5460
     * @param   codePoint the character (Unicode code point) to be tested.
5461
     * @return  {@code true} if the character is lowercase;
5462
     *          {@code false} otherwise.
5463
     * @see     Character#isLowerCase(int)
5464
     * @see     Character#isTitleCase(int)
5465
     * @see     Character#toLowerCase(int)
5466
     * @see     Character#getType(int)
5467
     * @since   1.5
5468
     */
5469
    public static boolean isLowerCase(int codePoint) {
5470
        return getType(codePoint) == Character.LOWERCASE_LETTER ||
5471
               CharacterData.of(codePoint).isOtherLowercase(codePoint);
5472
    }
5473

5474
    /**
5475
     * Determines if the specified character is an uppercase character.
5476
     * <p>
5477
     * A character is uppercase if its general category type, provided by
5478
     * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
5479
     * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
5480
     * <p>
5481
     * The following are examples of uppercase characters:
5482
     * <blockquote><pre>
5483
     * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5484
     * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
5485
     * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
5486
     * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
5487
     * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
5488
     * </pre></blockquote>
5489
     * <p> Many other Unicode characters are uppercase too.
5490
     *
5491
     * <p><b>Note:</b> This method cannot handle <a
5492
     * href="#supplementary"> supplementary characters</a>. To support
5493
     * all Unicode characters, including supplementary characters, use
5494
     * the {@link #isUpperCase(int)} method.
5495
     *
5496
     * @param   ch   the character to be tested.
5497
     * @return  {@code true} if the character is uppercase;
5498
     *          {@code false} otherwise.
5499
     * @see     Character#isLowerCase(char)
5500
     * @see     Character#isTitleCase(char)
5501
     * @see     Character#toUpperCase(char)
5502
     * @see     Character#getType(char)
5503
     * @since   1.0
5504
     */
5505
    public static boolean isUpperCase(char ch) {
5506
        return isUpperCase((int)ch);
5507
    }
5508

5509
    /**
5510
     * Determines if the specified character (Unicode code point) is an uppercase character.
5511
     * <p>
5512
     * A character is uppercase if its general category type, provided by
5513
     * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},
5514
     * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
5515
     * <p>
5516
     * The following are examples of uppercase characters:
5517
     * <blockquote><pre>
5518
     * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5519
     * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
5520
     * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
5521
     * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
5522
     * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
5523
     * </pre></blockquote>
5524
     * <p> Many other Unicode characters are uppercase too.<p>
5525
     *
5526
     * @param   codePoint the character (Unicode code point) to be tested.
5527
     * @return  {@code true} if the character is uppercase;
5528
     *          {@code false} otherwise.
5529
     * @see     Character#isLowerCase(int)
5530
     * @see     Character#isTitleCase(int)
5531
     * @see     Character#toUpperCase(int)
5532
     * @see     Character#getType(int)
5533
     * @since   1.5
5534
     */
5535
    public static boolean isUpperCase(int codePoint) {
5536
        return getType(codePoint) == Character.UPPERCASE_LETTER ||
5537
               CharacterData.of(codePoint).isOtherUppercase(codePoint);
5538
    }
5539

5540
    /**
5541
     * Determines if the specified character is a titlecase character.
5542
     * <p>
5543
     * A character is a titlecase character if its general
5544
     * category type, provided by {@code Character.getType(ch)},
5545
     * is {@code TITLECASE_LETTER}.
5546
     * <p>
5547
     * Some characters look like pairs of Latin letters. For example, there
5548
     * is an uppercase letter that looks like "LJ" and has a corresponding
5549
     * lowercase letter that looks like "lj". A third form, which looks like "Lj",
5550
     * is the appropriate form to use when rendering a word in lowercase
5551
     * with initial capitals, as for a book title.
5552
     * <p>
5553
     * These are some of the Unicode characters for which this method returns
5554
     * {@code true}:
5555
     * <ul>
5556
     * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
5557
     * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
5558
     * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
5559
     * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
5560
     * </ul>
5561
     * <p> Many other Unicode characters are titlecase too.
5562
     *
5563
     * <p><b>Note:</b> This method cannot handle <a
5564
     * href="#supplementary"> supplementary characters</a>. To support
5565
     * all Unicode characters, including supplementary characters, use
5566
     * the {@link #isTitleCase(int)} method.
5567
     *
5568
     * @param   ch   the character to be tested.
5569
     * @return  {@code true} if the character is titlecase;
5570
     *          {@code false} otherwise.
5571
     * @see     Character#isLowerCase(char)
5572
     * @see     Character#isUpperCase(char)
5573
     * @see     Character#toTitleCase(char)
5574
     * @see     Character#getType(char)
5575
     * @since   1.0.2
5576
     */
5577
    public static boolean isTitleCase(char ch) {
5578
        return isTitleCase((int)ch);
5579
    }
5580

5581
    /**
5582
     * Determines if the specified character (Unicode code point) is a titlecase character.
5583
     * <p>
5584
     * A character is a titlecase character if its general
5585
     * category type, provided by {@link Character#getType(int) getType(codePoint)},
5586
     * is {@code TITLECASE_LETTER}.
5587
     * <p>
5588
     * Some characters look like pairs of Latin letters. For example, there
5589
     * is an uppercase letter that looks like "LJ" and has a corresponding
5590
     * lowercase letter that looks like "lj". A third form, which looks like "Lj",
5591
     * is the appropriate form to use when rendering a word in lowercase
5592
     * with initial capitals, as for a book title.
5593
     * <p>
5594
     * These are some of the Unicode characters for which this method returns
5595
     * {@code true}:
5596
     * <ul>
5597
     * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
5598
     * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
5599
     * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
5600
     * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
5601
     * </ul>
5602
     * <p> Many other Unicode characters are titlecase too.<p>
5603
     *
5604
     * @param   codePoint the character (Unicode code point) to be tested.
5605
     * @return  {@code true} if the character is titlecase;
5606
     *          {@code false} otherwise.
5607
     * @see     Character#isLowerCase(int)
5608
     * @see     Character#isUpperCase(int)
5609
     * @see     Character#toTitleCase(int)
5610
     * @see     Character#getType(int)
5611
     * @since   1.5
5612
     */
5613
    public static boolean isTitleCase(int codePoint) {
5614
        return getType(codePoint) == Character.TITLECASE_LETTER;
5615
    }
5616

5617
    /**
5618
     * Determines if the specified character is a digit.
5619
     * <p>
5620
     * A character is a digit if its general category type, provided
5621
     * by {@code Character.getType(ch)}, is
5622
     * {@code DECIMAL_DIGIT_NUMBER}.
5623
     * <p>
5624
     * Some Unicode character ranges that contain digits:
5625
     * <ul>
5626
     * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
5627
     *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
5628
     * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
5629
     *     Arabic-Indic digits
5630
     * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
5631
     *     Extended Arabic-Indic digits
5632
     * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
5633
     *     Devanagari digits
5634
     * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
5635
     *     Fullwidth digits
5636
     * </ul>
5637
     *
5638
     * Many other character ranges contain digits as well.
5639
     *
5640
     * <p><b>Note:</b> This method cannot handle <a
5641
     * href="#supplementary"> supplementary characters</a>. To support
5642
     * all Unicode characters, including supplementary characters, use
5643
     * the {@link #isDigit(int)} method.
5644
     *
5645
     * @param   ch   the character to be tested.
5646
     * @return  {@code true} if the character is a digit;
5647
     *          {@code false} otherwise.
5648
     * @see     Character#digit(char, int)
5649
     * @see     Character#forDigit(int, int)
5650
     * @see     Character#getType(char)
5651
     */
5652
    public static boolean isDigit(char ch) {
5653
        return isDigit((int)ch);
5654
    }
5655

5656
    /**
5657
     * Determines if the specified character (Unicode code point) is a digit.
5658
     * <p>
5659
     * A character is a digit if its general category type, provided
5660
     * by {@link Character#getType(int) getType(codePoint)}, is
5661
     * {@code DECIMAL_DIGIT_NUMBER}.
5662
     * <p>
5663
     * Some Unicode character ranges that contain digits:
5664
     * <ul>
5665
     * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
5666
     *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
5667
     * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
5668
     *     Arabic-Indic digits
5669
     * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
5670
     *     Extended Arabic-Indic digits
5671
     * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
5672
     *     Devanagari digits
5673
     * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
5674
     *     Fullwidth digits
5675
     * </ul>
5676
     *
5677
     * Many other character ranges contain digits as well.
5678
     *
5679
     * @param   codePoint the character (Unicode code point) to be tested.
5680
     * @return  {@code true} if the character is a digit;
5681
     *          {@code false} otherwise.
5682
     * @see     Character#forDigit(int, int)
5683
     * @see     Character#getType(int)
5684
     * @since   1.5
5685
     */
5686
    public static boolean isDigit(int codePoint) {
5687
        return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER;
5688
    }
5689

5690
    /**
5691
     * Determines if a character is defined in Unicode.
5692
     * <p>
5693
     * A character is defined if at least one of the following is true:
5694
     * <ul>
5695
     * <li>It has an entry in the UnicodeData file.
5696
     * <li>It has a value in a range defined by the UnicodeData file.
5697
     * </ul>
5698
     *
5699
     * <p><b>Note:</b> This method cannot handle <a
5700
     * href="#supplementary"> supplementary characters</a>. To support
5701
     * all Unicode characters, including supplementary characters, use
5702
     * the {@link #isDefined(int)} method.
5703
     *
5704
     * @param   ch   the character to be tested
5705
     * @return  {@code true} if the character has a defined meaning
5706
     *          in Unicode; {@code false} otherwise.
5707
     * @see     Character#isDigit(char)
5708
     * @see     Character#isLetter(char)
5709
     * @see     Character#isLetterOrDigit(char)
5710
     * @see     Character#isLowerCase(char)
5711
     * @see     Character#isTitleCase(char)
5712
     * @see     Character#isUpperCase(char)
5713
     * @since   1.0.2
5714
     */
5715
    public static boolean isDefined(char ch) {
5716
        return isDefined((int)ch);
5717
    }
5718

5719
    /**
5720
     * Determines if a character (Unicode code point) is defined in Unicode.
5721
     * <p>
5722
     * A character is defined if at least one of the following is true:
5723
     * <ul>
5724
     * <li>It has an entry in the UnicodeData file.
5725
     * <li>It has a value in a range defined by the UnicodeData file.
5726
     * </ul>
5727
     *
5728
     * @param   codePoint the character (Unicode code point) to be tested.
5729
     * @return  {@code true} if the character has a defined meaning
5730
     *          in Unicode; {@code false} otherwise.
5731
     * @see     Character#isDigit(int)
5732
     * @see     Character#isLetter(int)
5733
     * @see     Character#isLetterOrDigit(int)
5734
     * @see     Character#isLowerCase(int)
5735
     * @see     Character#isTitleCase(int)
5736
     * @see     Character#isUpperCase(int)
5737
     * @since   1.5
5738
     */
5739
    public static boolean isDefined(int codePoint) {
5740
        return getType(codePoint) != Character.UNASSIGNED;
5741
    }
5742

5743
    /**
5744
     * Determines if the specified character is a letter.
5745
     * <p>
5746
     * A character is considered to be a letter if its general
5747
     * category type, provided by {@code Character.getType(ch)},
5748
     * is any of the following:
5749
     * <ul>
5750
     * <li> {@code UPPERCASE_LETTER}
5751
     * <li> {@code LOWERCASE_LETTER}
5752
     * <li> {@code TITLECASE_LETTER}
5753
     * <li> {@code MODIFIER_LETTER}
5754
     * <li> {@code OTHER_LETTER}
5755
     * </ul>
5756
     *
5757
     * Not all letters have case. Many characters are
5758
     * letters but are neither uppercase nor lowercase nor titlecase.
5759
     *
5760
     * <p><b>Note:</b> This method cannot handle <a
5761
     * href="#supplementary"> supplementary characters</a>. To support
5762
     * all Unicode characters, including supplementary characters, use
5763
     * the {@link #isLetter(int)} method.
5764
     *
5765
     * @param   ch   the character to be tested.
5766
     * @return  {@code true} if the character is a letter;
5767
     *          {@code false} otherwise.
5768
     * @see     Character#isDigit(char)
5769
     * @see     Character#isJavaIdentifierStart(char)
5770
     * @see     Character#isJavaLetter(char)
5771
     * @see     Character#isJavaLetterOrDigit(char)
5772
     * @see     Character#isLetterOrDigit(char)
5773
     * @see     Character#isLowerCase(char)
5774
     * @see     Character#isTitleCase(char)
5775
     * @see     Character#isUnicodeIdentifierStart(char)
5776
     * @see     Character#isUpperCase(char)
5777
     */
5778
    public static boolean isLetter(char ch) {
5779
        return isLetter((int)ch);
5780
    }
5781

5782
    /**
5783
     * Determines if the specified character (Unicode code point) is a letter.
5784
     * <p>
5785
     * A character is considered to be a letter if its general
5786
     * category type, provided by {@link Character#getType(int) getType(codePoint)},
5787
     * is any of the following:
5788
     * <ul>
5789
     * <li> {@code UPPERCASE_LETTER}
5790
     * <li> {@code LOWERCASE_LETTER}
5791
     * <li> {@code TITLECASE_LETTER}
5792
     * <li> {@code MODIFIER_LETTER}
5793
     * <li> {@code OTHER_LETTER}
5794
     * </ul>
5795
     *
5796
     * Not all letters have case. Many characters are
5797
     * letters but are neither uppercase nor lowercase nor titlecase.
5798
     *
5799
     * @param   codePoint the character (Unicode code point) to be tested.
5800
     * @return  {@code true} if the character is a letter;
5801
     *          {@code false} otherwise.
5802
     * @see     Character#isDigit(int)
5803
     * @see     Character#isJavaIdentifierStart(int)
5804
     * @see     Character#isLetterOrDigit(int)
5805
     * @see     Character#isLowerCase(int)
5806
     * @see     Character#isTitleCase(int)
5807
     * @see     Character#isUnicodeIdentifierStart(int)
5808
     * @see     Character#isUpperCase(int)
5809
     * @since   1.5
5810
     */
5811
    public static boolean isLetter(int codePoint) {
5812
        return ((((1 << Character.UPPERCASE_LETTER) |
5813
            (1 << Character.LOWERCASE_LETTER) |
5814
            (1 << Character.TITLECASE_LETTER) |
5815
            (1 << Character.MODIFIER_LETTER) |
5816
            (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1)
5817
            != 0;
5818
    }
5819

5820
    /**
5821
     * Determines if the specified character is a letter or digit.
5822
     * <p>
5823
     * A character is considered to be a letter or digit if either
5824
     * {@code Character.isLetter(char ch)} or
5825
     * {@code Character.isDigit(char ch)} returns
5826
     * {@code true} for the character.
5827
     *
5828
     * <p><b>Note:</b> This method cannot handle <a
5829
     * href="#supplementary"> supplementary characters</a>. To support
5830
     * all Unicode characters, including supplementary characters, use
5831
     * the {@link #isLetterOrDigit(int)} method.
5832
     *
5833
     * @param   ch   the character to be tested.
5834
     * @return  {@code true} if the character is a letter or digit;
5835
     *          {@code false} otherwise.
5836
     * @see     Character#isDigit(char)
5837
     * @see     Character#isJavaIdentifierPart(char)
5838
     * @see     Character#isJavaLetter(char)
5839
     * @see     Character#isJavaLetterOrDigit(char)
5840
     * @see     Character#isLetter(char)
5841
     * @see     Character#isUnicodeIdentifierPart(char)
5842
     * @since   1.0.2
5843
     */
5844
    public static boolean isLetterOrDigit(char ch) {
5845
        return isLetterOrDigit((int)ch);
5846
    }
5847

5848
    /**
5849
     * Determines if the specified character (Unicode code point) is a letter or digit.
5850
     * <p>
5851
     * A character is considered to be a letter or digit if either
5852
     * {@link #isLetter(int) isLetter(codePoint)} or
5853
     * {@link #isDigit(int) isDigit(codePoint)} returns
5854
     * {@code true} for the character.
5855
     *
5856
     * @param   codePoint the character (Unicode code point) to be tested.
5857
     * @return  {@code true} if the character is a letter or digit;
5858
     *          {@code false} otherwise.
5859
     * @see     Character#isDigit(int)
5860
     * @see     Character#isJavaIdentifierPart(int)
5861
     * @see     Character#isLetter(int)
5862
     * @see     Character#isUnicodeIdentifierPart(int)
5863
     * @since   1.5
5864
     */
5865
    public static boolean isLetterOrDigit(int codePoint) {
5866
        return ((((1 << Character.UPPERCASE_LETTER) |
5867
            (1 << Character.LOWERCASE_LETTER) |
5868
            (1 << Character.TITLECASE_LETTER) |
5869
            (1 << Character.MODIFIER_LETTER) |
5870
            (1 << Character.OTHER_LETTER) |
5871
            (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1)
5872
            != 0;
5873
    }
5874

5875
    /**
5876
     * Determines if the specified character is permissible as the first
5877
     * character in a Java identifier.
5878
     * <p>
5879
     * A character may start a Java identifier if and only if
5880
     * one of the following conditions is true:
5881
     * <ul>
5882
     * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
5883
     * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
5884
     * <li> {@code ch} is a currency symbol (such as {@code '$'})
5885
     * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
5886
     * </ul>
5887
     *
5888
     * These conditions are tested against the character information from version
5889
     * 6.2 of the Unicode Standard.
5890
     *
5891
     * @param   ch the character to be tested.
5892
     * @return  {@code true} if the character may start a Java
5893
     *          identifier; {@code false} otherwise.
5894
     * @see     Character#isJavaLetterOrDigit(char)
5895
     * @see     Character#isJavaIdentifierStart(char)
5896
     * @see     Character#isJavaIdentifierPart(char)
5897
     * @see     Character#isLetter(char)
5898
     * @see     Character#isLetterOrDigit(char)
5899
     * @see     Character#isUnicodeIdentifierStart(char)
5900
     * @since   1.02
5901
     * @deprecated Replaced by isJavaIdentifierStart(char).
5902
     */
5903
    @Deprecated
5904
    public static boolean isJavaLetter(char ch) {
5905
        return isJavaIdentifierStart(ch);
5906
    }
5907

5908
    /**
5909
     * Determines if the specified character may be part of a Java
5910
     * identifier as other than the first character.
5911
     * <p>
5912
     * A character may be part of a Java identifier if and only if any
5913
     * of the following conditions are true:
5914
     * <ul>
5915
     * <li>  it is a letter
5916
     * <li>  it is a currency symbol (such as {@code '$'})
5917
     * <li>  it is a connecting punctuation character (such as {@code '_'})
5918
     * <li>  it is a digit
5919
     * <li>  it is a numeric letter (such as a Roman numeral character)
5920
     * <li>  it is a combining mark
5921
     * <li>  it is a non-spacing mark
5922
     * <li> {@code isIdentifierIgnorable} returns
5923
     * {@code true} for the character.
5924
     * </ul>
5925
     *
5926
     * These conditions are tested against the character information from version
5927
     * 6.2 of the Unicode Standard.
5928
     *
5929
     * @param   ch the character to be tested.
5930
     * @return  {@code true} if the character may be part of a
5931
     *          Java identifier; {@code false} otherwise.
5932
     * @see     Character#isJavaLetter(char)
5933
     * @see     Character#isJavaIdentifierStart(char)
5934
     * @see     Character#isJavaIdentifierPart(char)
5935
     * @see     Character#isLetter(char)
5936
     * @see     Character#isLetterOrDigit(char)
5937
     * @see     Character#isUnicodeIdentifierPart(char)
5938
     * @see     Character#isIdentifierIgnorable(char)
5939
     * @since   1.02
5940
     * @deprecated Replaced by isJavaIdentifierPart(char).
5941
     */
5942
    @Deprecated
5943
    public static boolean isJavaLetterOrDigit(char ch) {
5944
        return isJavaIdentifierPart(ch);
5945
    }
5946

5947
    /**
5948
     * Determines if the specified character (Unicode code point) is an alphabet.
5949
     * <p>
5950
     * A character is considered to be alphabetic if its general category type,
5951
     * provided by {@link Character#getType(int) getType(codePoint)}, is any of
5952
     * the following:
5953
     * <ul>
5954
     * <li> <code>UPPERCASE_LETTER</code>
5955
     * <li> <code>LOWERCASE_LETTER</code>
5956
     * <li> <code>TITLECASE_LETTER</code>
5957
     * <li> <code>MODIFIER_LETTER</code>
5958
     * <li> <code>OTHER_LETTER</code>
5959
     * <li> <code>LETTER_NUMBER</code>
5960
     * </ul>
5961
     * or it has contributory property Other_Alphabetic as defined by the
5962
     * Unicode Standard.
5963
     *
5964
     * @param   codePoint the character (Unicode code point) to be tested.
5965
     * @return  <code>true</code> if the character is a Unicode alphabet
5966
     *          character, <code>false</code> otherwise.
5967
     * @since   1.7
5968
     */
5969
    public static boolean isAlphabetic(int codePoint) {
5970
        return (((((1 << Character.UPPERCASE_LETTER) |
5971
            (1 << Character.LOWERCASE_LETTER) |
5972
            (1 << Character.TITLECASE_LETTER) |
5973
            (1 << Character.MODIFIER_LETTER) |
5974
            (1 << Character.OTHER_LETTER) |
5975
            (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) ||
5976
            CharacterData.of(codePoint).isOtherAlphabetic(codePoint);
5977
    }
5978

5979
    /**
5980
     * Determines if the specified character (Unicode code point) is a CJKV
5981
     * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by
5982
     * the Unicode Standard.
5983
     *
5984
     * @param   codePoint the character (Unicode code point) to be tested.
5985
     * @return  <code>true</code> if the character is a Unicode ideograph
5986
     *          character, <code>false</code> otherwise.
5987
     * @since   1.7
5988
     */
5989
    public static boolean isIdeographic(int codePoint) {
5990
        return CharacterData.of(codePoint).isIdeographic(codePoint);
5991
    }
5992

5993
    /**
5994
     * Determines if the specified character is
5995
     * permissible as the first character in a Java identifier.
5996
     * <p>
5997
     * A character may start a Java identifier if and only if
5998
     * one of the following conditions is true:
5999
     * <ul>
6000
     * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
6001
     * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
6002
     * <li> {@code ch} is a currency symbol (such as {@code '$'})
6003
     * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
6004
     * </ul>
6005
     *
6006
     * These conditions are tested against the character information from version
6007
     * 6.2 of the Unicode Standard.
6008
     *
6009
     * <p><b>Note:</b> This method cannot handle <a
6010
     * href="#supplementary"> supplementary characters</a>. To support
6011
     * all Unicode characters, including supplementary characters, use
6012
     * the {@link #isJavaIdentifierStart(int)} method.
6013
     *
6014
     * @param   ch the character to be tested.
6015
     * @return  {@code true} if the character may start a Java identifier;
6016
     *          {@code false} otherwise.
6017
     * @see     Character#isJavaIdentifierPart(char)
6018
     * @see     Character#isLetter(char)
6019
     * @see     Character#isUnicodeIdentifierStart(char)
6020
     * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6021
     * @since   1.1
6022
     */
6023
    public static boolean isJavaIdentifierStart(char ch) {
6024
        return isJavaIdentifierStart((int)ch);
6025
    }
6026

6027
    /**
6028
     * Determines if the character (Unicode code point) is
6029
     * permissible as the first character in a Java identifier.
6030
     * <p>
6031
     * A character may start a Java identifier if and only if
6032
     * one of the following conditions is true:
6033
     * <ul>
6034
     * <li> {@link #isLetter(int) isLetter(codePoint)}
6035
     *      returns {@code true}
6036
     * <li> {@link #getType(int) getType(codePoint)}
6037
     *      returns {@code LETTER_NUMBER}
6038
     * <li> the referenced character is a currency symbol (such as {@code '$'})
6039
     * <li> the referenced character is a connecting punctuation character
6040
     *      (such as {@code '_'}).
6041
     * </ul>
6042
     *
6043
     * These conditions are tested against the character information from version
6044
     * 6.2 of the Unicode Standard.
6045
     *
6046
     * @param   codePoint the character (Unicode code point) to be tested.
6047
     * @return  {@code true} if the character may start a Java identifier;
6048
     *          {@code false} otherwise.
6049
     * @see     Character#isJavaIdentifierPart(int)
6050
     * @see     Character#isLetter(int)
6051
     * @see     Character#isUnicodeIdentifierStart(int)
6052
     * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6053
     * @since   1.5
6054
     */
6055
    public static boolean isJavaIdentifierStart(int codePoint) {
6056
        return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint);
6057
    }
6058

6059
    /**
6060
     * Determines if the specified character may be part of a Java
6061
     * identifier as other than the first character.
6062
     * <p>
6063
     * A character may be part of a Java identifier if any of the following
6064
     * conditions are true:
6065
     * <ul>
6066
     * <li>  it is a letter
6067
     * <li>  it is a currency symbol (such as {@code '$'})
6068
     * <li>  it is a connecting punctuation character (such as {@code '_'})
6069
     * <li>  it is a digit
6070
     * <li>  it is a numeric letter (such as a Roman numeral character)
6071
     * <li>  it is a combining mark
6072
     * <li>  it is a non-spacing mark
6073
     * <li> {@code isIdentifierIgnorable} returns
6074
     * {@code true} for the character
6075
     * </ul>
6076
     *
6077
     * These conditions are tested against the character information from version
6078
     * 6.2 of the Unicode Standard.
6079
     *
6080
     * <p><b>Note:</b> This method cannot handle <a
6081
     * href="#supplementary"> supplementary characters</a>. To support
6082
     * all Unicode characters, including supplementary characters, use
6083
     * the {@link #isJavaIdentifierPart(int)} method.
6084
     *
6085
     * @param   ch      the character to be tested.
6086
     * @return {@code true} if the character may be part of a
6087
     *          Java identifier; {@code false} otherwise.
6088
     * @see     Character#isIdentifierIgnorable(char)
6089
     * @see     Character#isJavaIdentifierStart(char)
6090
     * @see     Character#isLetterOrDigit(char)
6091
     * @see     Character#isUnicodeIdentifierPart(char)
6092
     * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6093
     * @since   1.1
6094
     */
6095
    public static boolean isJavaIdentifierPart(char ch) {
6096
        return isJavaIdentifierPart((int)ch);
6097
    }
6098

6099
    /**
6100
     * Determines if the character (Unicode code point) may be part of a Java
6101
     * identifier as other than the first character.
6102
     * <p>
6103
     * A character may be part of a Java identifier if any of the following
6104
     * conditions are true:
6105
     * <ul>
6106
     * <li>  it is a letter
6107
     * <li>  it is a currency symbol (such as {@code '$'})
6108
     * <li>  it is a connecting punctuation character (such as {@code '_'})
6109
     * <li>  it is a digit
6110
     * <li>  it is a numeric letter (such as a Roman numeral character)
6111
     * <li>  it is a combining mark
6112
     * <li>  it is a non-spacing mark
6113
     * <li> {@link #isIdentifierIgnorable(int)
6114
     * isIdentifierIgnorable(codePoint)} returns {@code true} for
6115
     * the code point
6116
     * </ul>
6117
     *
6118
     * These conditions are tested against the character information from version
6119
     * 6.2 of the Unicode Standard.
6120
     *
6121
     * @param   codePoint the character (Unicode code point) to be tested.
6122
     * @return {@code true} if the character may be part of a
6123
     *          Java identifier; {@code false} otherwise.
6124
     * @see     Character#isIdentifierIgnorable(int)
6125
     * @see     Character#isJavaIdentifierStart(int)
6126
     * @see     Character#isLetterOrDigit(int)
6127
     * @see     Character#isUnicodeIdentifierPart(int)
6128
     * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6129
     * @since   1.5
6130
     */
6131
    public static boolean isJavaIdentifierPart(int codePoint) {
6132
        return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint);
6133
    }
6134

6135
    /**
6136
     * Determines if the specified character is permissible as the
6137
     * first character in a Unicode identifier.
6138
     * <p>
6139
     * A character may start a Unicode identifier if and only if
6140
     * one of the following conditions is true:
6141
     * <ul>
6142
     * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
6143
     * <li> {@link #getType(char) getType(ch)} returns
6144
     *      {@code LETTER_NUMBER}.
6145
     * </ul>
6146
     *
6147
     * <p><b>Note:</b> This method cannot handle <a
6148
     * href="#supplementary"> supplementary characters</a>. To support
6149
     * all Unicode characters, including supplementary characters, use
6150
     * the {@link #isUnicodeIdentifierStart(int)} method.
6151
     *
6152
     * @param   ch      the character to be tested.
6153
     * @return  {@code true} if the character may start a Unicode
6154
     *          identifier; {@code false} otherwise.
6155
     * @see     Character#isJavaIdentifierStart(char)
6156
     * @see     Character#isLetter(char)
6157
     * @see     Character#isUnicodeIdentifierPart(char)
6158
     * @since   1.1
6159
     */
6160
    public static boolean isUnicodeIdentifierStart(char ch) {
6161
        return isUnicodeIdentifierStart((int)ch);
6162
    }
6163

6164
    /**
6165
     * Determines if the specified character (Unicode code point) is permissible as the
6166
     * first character in a Unicode identifier.
6167
     * <p>
6168
     * A character may start a Unicode identifier if and only if
6169
     * one of the following conditions is true:
6170
     * <ul>
6171
     * <li> {@link #isLetter(int) isLetter(codePoint)}
6172
     *      returns {@code true}
6173
     * <li> {@link #getType(int) getType(codePoint)}
6174
     *      returns {@code LETTER_NUMBER}.
6175
     * </ul>
6176
     * @param   codePoint the character (Unicode code point) to be tested.
6177
     * @return  {@code true} if the character may start a Unicode
6178
     *          identifier; {@code false} otherwise.
6179
     * @see     Character#isJavaIdentifierStart(int)
6180
     * @see     Character#isLetter(int)
6181
     * @see     Character#isUnicodeIdentifierPart(int)
6182
     * @since   1.5
6183
     */
6184
    public static boolean isUnicodeIdentifierStart(int codePoint) {
6185
        return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint);
6186
    }
6187

6188
    /**
6189
     * Determines if the specified character may be part of a Unicode
6190
     * identifier as other than the first character.
6191
     * <p>
6192
     * A character may be part of a Unicode identifier if and only if
6193
     * one of the following statements is true:
6194
     * <ul>
6195
     * <li>  it is a letter
6196
     * <li>  it is a connecting punctuation character (such as {@code '_'})
6197
     * <li>  it is a digit
6198
     * <li>  it is a numeric letter (such as a Roman numeral character)
6199
     * <li>  it is a combining mark
6200
     * <li>  it is a non-spacing mark
6201
     * <li> {@code isIdentifierIgnorable} returns
6202
     * {@code true} for this character.
6203
     * </ul>
6204
     *
6205
     * <p><b>Note:</b> This method cannot handle <a
6206
     * href="#supplementary"> supplementary characters</a>. To support
6207
     * all Unicode characters, including supplementary characters, use
6208
     * the {@link #isUnicodeIdentifierPart(int)} method.
6209
     *
6210
     * @param   ch      the character to be tested.
6211
     * @return  {@code true} if the character may be part of a
6212
     *          Unicode identifier; {@code false} otherwise.
6213
     * @see     Character#isIdentifierIgnorable(char)
6214
     * @see     Character#isJavaIdentifierPart(char)
6215
     * @see     Character#isLetterOrDigit(char)
6216
     * @see     Character#isUnicodeIdentifierStart(char)
6217
     * @since   1.1
6218
     */
6219
    public static boolean isUnicodeIdentifierPart(char ch) {
6220
        return isUnicodeIdentifierPart((int)ch);
6221
    }
6222

6223
    /**
6224
     * Determines if the specified character (Unicode code point) may be part of a Unicode
6225
     * identifier as other than the first character.
6226
     * <p>
6227
     * A character may be part of a Unicode identifier if and only if
6228
     * one of the following statements is true:
6229
     * <ul>
6230
     * <li>  it is a letter
6231
     * <li>  it is a connecting punctuation character (such as {@code '_'})
6232
     * <li>  it is a digit
6233
     * <li>  it is a numeric letter (such as a Roman numeral character)
6234
     * <li>  it is a combining mark
6235
     * <li>  it is a non-spacing mark
6236
     * <li> {@code isIdentifierIgnorable} returns
6237
     * {@code true} for this character.
6238
     * </ul>
6239
     * @param   codePoint the character (Unicode code point) to be tested.
6240
     * @return  {@code true} if the character may be part of a
6241
     *          Unicode identifier; {@code false} otherwise.
6242
     * @see     Character#isIdentifierIgnorable(int)
6243
     * @see     Character#isJavaIdentifierPart(int)
6244
     * @see     Character#isLetterOrDigit(int)
6245
     * @see     Character#isUnicodeIdentifierStart(int)
6246
     * @since   1.5
6247
     */
6248
    public static boolean isUnicodeIdentifierPart(int codePoint) {
6249
        return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint);
6250
    }
6251

6252
    /**
6253
     * Determines if the specified character should be regarded as
6254
     * an ignorable character in a Java identifier or a Unicode identifier.
6255
     * <p>
6256
     * The following Unicode characters are ignorable in a Java identifier
6257
     * or a Unicode identifier:
6258
     * <ul>
6259
     * <li>ISO control characters that are not whitespace
6260
     * <ul>
6261
     * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
6262
     * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
6263
     * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
6264
     * </ul>
6265
     *
6266
     * <li>all characters that have the {@code FORMAT} general
6267
     * category value
6268
     * </ul>
6269
     *
6270
     * <p><b>Note:</b> This method cannot handle <a
6271
     * href="#supplementary"> supplementary characters</a>. To support
6272
     * all Unicode characters, including supplementary characters, use
6273
     * the {@link #isIdentifierIgnorable(int)} method.
6274
     *
6275
     * @param   ch      the character to be tested.
6276
     * @return  {@code true} if the character is an ignorable control
6277
     *          character that may be part of a Java or Unicode identifier;
6278
     *           {@code false} otherwise.
6279
     * @see     Character#isJavaIdentifierPart(char)
6280
     * @see     Character#isUnicodeIdentifierPart(char)
6281
     * @since   1.1
6282
     */
6283
    public static boolean isIdentifierIgnorable(char ch) {
6284
        return isIdentifierIgnorable((int)ch);
6285
    }
6286

6287
    /**
6288
     * Determines if the specified character (Unicode code point) should be regarded as
6289
     * an ignorable character in a Java identifier or a Unicode identifier.
6290
     * <p>
6291
     * The following Unicode characters are ignorable in a Java identifier
6292
     * or a Unicode identifier:
6293
     * <ul>
6294
     * <li>ISO control characters that are not whitespace
6295
     * <ul>
6296
     * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
6297
     * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
6298
     * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
6299
     * </ul>
6300
     *
6301
     * <li>all characters that have the {@code FORMAT} general
6302
     * category value
6303
     * </ul>
6304
     *
6305
     * @param   codePoint the character (Unicode code point) to be tested.
6306
     * @return  {@code true} if the character is an ignorable control
6307
     *          character that may be part of a Java or Unicode identifier;
6308
     *          {@code false} otherwise.
6309
     * @see     Character#isJavaIdentifierPart(int)
6310
     * @see     Character#isUnicodeIdentifierPart(int)
6311
     * @since   1.5
6312
     */
6313
    public static boolean isIdentifierIgnorable(int codePoint) {
6314
        return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint);
6315
    }
6316

6317
    /**
6318
     * Converts the character argument to lowercase using case
6319
     * mapping information from the UnicodeData file.
6320
     * <p>
6321
     * Note that
6322
     * {@code Character.isLowerCase(Character.toLowerCase(ch))}
6323
     * does not always return {@code true} for some ranges of
6324
     * characters, particularly those that are symbols or ideographs.
6325
     *
6326
     * <p>In general, {@link String#toLowerCase()} should be used to map
6327
     * characters to lowercase. {@code String} case mapping methods
6328
     * have several benefits over {@code Character} case mapping methods.
6329
     * {@code String} case mapping methods can perform locale-sensitive
6330
     * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6331
     * the {@code Character} case mapping methods cannot.
6332
     *
6333
     * <p><b>Note:</b> This method cannot handle <a
6334
     * href="#supplementary"> supplementary characters</a>. To support
6335
     * all Unicode characters, including supplementary characters, use
6336
     * the {@link #toLowerCase(int)} method.
6337
     *
6338
     * @param   ch   the character to be converted.
6339
     * @return  the lowercase equivalent of the character, if any;
6340
     *          otherwise, the character itself.
6341
     * @see     Character#isLowerCase(char)
6342
     * @see     String#toLowerCase()
6343
     */
6344
    public static char toLowerCase(char ch) {
6345
        return (char)toLowerCase((int)ch);
6346
    }
6347

6348
    /**
6349
     * Converts the character (Unicode code point) argument to
6350
     * lowercase using case mapping information from the UnicodeData
6351
     * file.
6352
     *
6353
     * <p> Note that
6354
     * {@code Character.isLowerCase(Character.toLowerCase(codePoint))}
6355
     * does not always return {@code true} for some ranges of
6356
     * characters, particularly those that are symbols or ideographs.
6357
     *
6358
     * <p>In general, {@link String#toLowerCase()} should be used to map
6359
     * characters to lowercase. {@code String} case mapping methods
6360
     * have several benefits over {@code Character} case mapping methods.
6361
     * {@code String} case mapping methods can perform locale-sensitive
6362
     * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6363
     * the {@code Character} case mapping methods cannot.
6364
     *
6365
     * @param   codePoint   the character (Unicode code point) to be converted.
6366
     * @return  the lowercase equivalent of the character (Unicode code
6367
     *          point), if any; otherwise, the character itself.
6368
     * @see     Character#isLowerCase(int)
6369
     * @see     String#toLowerCase()
6370
     *
6371
     * @since   1.5
6372
     */
6373
    public static int toLowerCase(int codePoint) {
6374
        return CharacterData.of(codePoint).toLowerCase(codePoint);
6375
    }
6376

6377
    /**
6378
     * Converts the character argument to uppercase using case mapping
6379
     * information from the UnicodeData file.
6380
     * <p>
6381
     * Note that
6382
     * {@code Character.isUpperCase(Character.toUpperCase(ch))}
6383
     * does not always return {@code true} for some ranges of
6384
     * characters, particularly those that are symbols or ideographs.
6385
     *
6386
     * <p>In general, {@link String#toUpperCase()} should be used to map
6387
     * characters to uppercase. {@code String} case mapping methods
6388
     * have several benefits over {@code Character} case mapping methods.
6389
     * {@code String} case mapping methods can perform locale-sensitive
6390
     * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6391
     * the {@code Character} case mapping methods cannot.
6392
     *
6393
     * <p><b>Note:</b> This method cannot handle <a
6394
     * href="#supplementary"> supplementary characters</a>. To support
6395
     * all Unicode characters, including supplementary characters, use
6396
     * the {@link #toUpperCase(int)} method.
6397
     *
6398
     * @param   ch   the character to be converted.
6399
     * @return  the uppercase equivalent of the character, if any;
6400
     *          otherwise, the character itself.
6401
     * @see     Character#isUpperCase(char)
6402
     * @see     String#toUpperCase()
6403
     */
6404
    public static char toUpperCase(char ch) {
6405
        return (char)toUpperCase((int)ch);
6406
    }
6407

6408
    /**
6409
     * Converts the character (Unicode code point) argument to
6410
     * uppercase using case mapping information from the UnicodeData
6411
     * file.
6412
     *
6413
     * <p>Note that
6414
     * {@code Character.isUpperCase(Character.toUpperCase(codePoint))}
6415
     * does not always return {@code true} for some ranges of
6416
     * characters, particularly those that are symbols or ideographs.
6417
     *
6418
     * <p>In general, {@link String#toUpperCase()} should be used to map
6419
     * characters to uppercase. {@code String} case mapping methods
6420
     * have several benefits over {@code Character} case mapping methods.
6421
     * {@code String} case mapping methods can perform locale-sensitive
6422
     * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6423
     * the {@code Character} case mapping methods cannot.
6424
     *
6425
     * @param   codePoint   the character (Unicode code point) to be converted.
6426
     * @return  the uppercase equivalent of the character, if any;
6427
     *          otherwise, the character itself.
6428
     * @see     Character#isUpperCase(int)
6429
     * @see     String#toUpperCase()
6430
     *
6431
     * @since   1.5
6432
     */
6433
    public static int toUpperCase(int codePoint) {
6434
        return CharacterData.of(codePoint).toUpperCase(codePoint);
6435
    }
6436

6437
    /**
6438
     * Converts the character argument to titlecase using case mapping
6439
     * information from the UnicodeData file. If a character has no
6440
     * explicit titlecase mapping and is not itself a titlecase char
6441
     * according to UnicodeData, then the uppercase mapping is
6442
     * returned as an equivalent titlecase mapping. If the
6443
     * {@code char} argument is already a titlecase
6444
     * {@code char}, the same {@code char} value will be
6445
     * returned.
6446
     * <p>
6447
     * Note that
6448
     * {@code Character.isTitleCase(Character.toTitleCase(ch))}
6449
     * does not always return {@code true} for some ranges of
6450
     * characters.
6451
     *
6452
     * <p><b>Note:</b> This method cannot handle <a
6453
     * href="#supplementary"> supplementary characters</a>. To support
6454
     * all Unicode characters, including supplementary characters, use
6455
     * the {@link #toTitleCase(int)} method.
6456
     *
6457
     * @param   ch   the character to be converted.
6458
     * @return  the titlecase equivalent of the character, if any;
6459
     *          otherwise, the character itself.
6460
     * @see     Character#isTitleCase(char)
6461
     * @see     Character#toLowerCase(char)
6462
     * @see     Character#toUpperCase(char)
6463
     * @since   1.0.2
6464
     */
6465
    public static char toTitleCase(char ch) {
6466
        return (char)toTitleCase((int)ch);
6467
    }
6468

6469
    /**
6470
     * Converts the character (Unicode code point) argument to titlecase using case mapping
6471
     * information from the UnicodeData file. If a character has no
6472
     * explicit titlecase mapping and is not itself a titlecase char
6473
     * according to UnicodeData, then the uppercase mapping is
6474
     * returned as an equivalent titlecase mapping. If the
6475
     * character argument is already a titlecase
6476
     * character, the same character value will be
6477
     * returned.
6478
     *
6479
     * <p>Note that
6480
     * {@code Character.isTitleCase(Character.toTitleCase(codePoint))}
6481
     * does not always return {@code true} for some ranges of
6482
     * characters.
6483
     *
6484
     * @param   codePoint   the character (Unicode code point) to be converted.
6485
     * @return  the titlecase equivalent of the character, if any;
6486
     *          otherwise, the character itself.
6487
     * @see     Character#isTitleCase(int)
6488
     * @see     Character#toLowerCase(int)
6489
     * @see     Character#toUpperCase(int)
6490
     * @since   1.5
6491
     */
6492
    public static int toTitleCase(int codePoint) {
6493
        return CharacterData.of(codePoint).toTitleCase(codePoint);
6494
    }
6495

6496
    /**
6497
     * Returns the numeric value of the character {@code ch} in the
6498
     * specified radix.
6499
     * <p>
6500
     * If the radix is not in the range {@code MIN_RADIX} &le;
6501
     * {@code radix} &le; {@code MAX_RADIX} or if the
6502
     * value of {@code ch} is not a valid digit in the specified
6503
     * radix, {@code -1} is returned. A character is a valid digit
6504
     * if at least one of the following is true:
6505
     * <ul>
6506
     * <li>The method {@code isDigit} is {@code true} of the character
6507
     *     and the Unicode decimal digit value of the character (or its
6508
     *     single-character decomposition) is less than the specified radix.
6509
     *     In this case the decimal digit value is returned.
6510
     * <li>The character is one of the uppercase Latin letters
6511
     *     {@code 'A'} through {@code 'Z'} and its code is less than
6512
     *     {@code radix + 'A' - 10}.
6513
     *     In this case, {@code ch - 'A' + 10}
6514
     *     is returned.
6515
     * <li>The character is one of the lowercase Latin letters
6516
     *     {@code 'a'} through {@code 'z'} and its code is less than
6517
     *     {@code radix + 'a' - 10}.
6518
     *     In this case, {@code ch - 'a' + 10}
6519
     *     is returned.
6520
     * <li>The character is one of the fullwidth uppercase Latin letters A
6521
     *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
6522
     *     and its code is less than
6523
     *     {@code radix + '\u005CuFF21' - 10}.
6524
     *     In this case, {@code ch - '\u005CuFF21' + 10}
6525
     *     is returned.
6526
     * <li>The character is one of the fullwidth lowercase Latin letters a
6527
     *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
6528
     *     and its code is less than
6529
     *     {@code radix + '\u005CuFF41' - 10}.
6530
     *     In this case, {@code ch - '\u005CuFF41' + 10}
6531
     *     is returned.
6532
     * </ul>
6533
     *
6534
     * <p><b>Note:</b> This method cannot handle <a
6535
     * href="#supplementary"> supplementary characters</a>. To support
6536
     * all Unicode characters, including supplementary characters, use
6537
     * the {@link #digit(int, int)} method.
6538
     *
6539
     * @param   ch      the character to be converted.
6540
     * @param   radix   the radix.
6541
     * @return  the numeric value represented by the character in the
6542
     *          specified radix.
6543
     * @see     Character#forDigit(int, int)
6544
     * @see     Character#isDigit(char)
6545
     */
6546
    public static int digit(char ch, int radix) {
6547
        return digit((int)ch, radix);
6548
    }
6549

6550
    /**
6551
     * Returns the numeric value of the specified character (Unicode
6552
     * code point) in the specified radix.
6553
     *
6554
     * <p>If the radix is not in the range {@code MIN_RADIX} &le;
6555
     * {@code radix} &le; {@code MAX_RADIX} or if the
6556
     * character is not a valid digit in the specified
6557
     * radix, {@code -1} is returned. A character is a valid digit
6558
     * if at least one of the following is true:
6559
     * <ul>
6560
     * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
6561
     *     and the Unicode decimal digit value of the character (or its
6562
     *     single-character decomposition) is less than the specified radix.
6563
     *     In this case the decimal digit value is returned.
6564
     * <li>The character is one of the uppercase Latin letters
6565
     *     {@code 'A'} through {@code 'Z'} and its code is less than
6566
     *     {@code radix + 'A' - 10}.
6567
     *     In this case, {@code codePoint - 'A' + 10}
6568
     *     is returned.
6569
     * <li>The character is one of the lowercase Latin letters
6570
     *     {@code 'a'} through {@code 'z'} and its code is less than
6571
     *     {@code radix + 'a' - 10}.
6572
     *     In this case, {@code codePoint - 'a' + 10}
6573
     *     is returned.
6574
     * <li>The character is one of the fullwidth uppercase Latin letters A
6575
     *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
6576
     *     and its code is less than
6577
     *     {@code radix + '\u005CuFF21' - 10}.
6578
     *     In this case,
6579
     *     {@code codePoint - '\u005CuFF21' + 10}
6580
     *     is returned.
6581
     * <li>The character is one of the fullwidth lowercase Latin letters a
6582
     *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
6583
     *     and its code is less than
6584
     *     {@code radix + '\u005CuFF41'- 10}.
6585
     *     In this case,
6586
     *     {@code codePoint - '\u005CuFF41' + 10}
6587
     *     is returned.
6588
     * </ul>
6589
     *
6590
     * @param   codePoint the character (Unicode code point) to be converted.
6591
     * @param   radix   the radix.
6592
     * @return  the numeric value represented by the character in the
6593
     *          specified radix.
6594
     * @see     Character#forDigit(int, int)
6595
     * @see     Character#isDigit(int)
6596
     * @since   1.5
6597
     */
6598
    public static int digit(int codePoint, int radix) {
6599
        return CharacterData.of(codePoint).digit(codePoint, radix);
6600
    }
6601

6602
    /**
6603
     * Returns the {@code int} value that the specified Unicode
6604
     * character represents. For example, the character
6605
     * {@code '\u005Cu216C'} (the roman numeral fifty) will return
6606
     * an int with a value of 50.
6607
     * <p>
6608
     * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
6609
     * {@code '\u005Cu005A'}), lowercase
6610
     * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
6611
     * full width variant ({@code '\u005CuFF21'} through
6612
     * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
6613
     * {@code '\u005CuFF5A'}) forms have numeric values from 10
6614
     * through 35. This is independent of the Unicode specification,
6615
     * which does not assign numeric values to these {@code char}
6616
     * values.
6617
     * <p>
6618
     * If the character does not have a numeric value, then -1 is returned.
6619
     * If the character has a numeric value that cannot be represented as a
6620
     * nonnegative integer (for example, a fractional value), then -2
6621
     * is returned.
6622
     *
6623
     * <p><b>Note:</b> This method cannot handle <a
6624
     * href="#supplementary"> supplementary characters</a>. To support
6625
     * all Unicode characters, including supplementary characters, use
6626
     * the {@link #getNumericValue(int)} method.
6627
     *
6628
     * @param   ch      the character to be converted.
6629
     * @return  the numeric value of the character, as a nonnegative {@code int}
6630
     *           value; -2 if the character has a numeric value that is not a
6631
     *          nonnegative integer; -1 if the character has no numeric value.
6632
     * @see     Character#forDigit(int, int)
6633
     * @see     Character#isDigit(char)
6634
     * @since   1.1
6635
     */
6636
    public static int getNumericValue(char ch) {
6637
        return getNumericValue((int)ch);
6638
    }
6639

6640
    /**
6641
     * Returns the {@code int} value that the specified
6642
     * character (Unicode code point) represents. For example, the character
6643
     * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
6644
     * an {@code int} with a value of 50.
6645
     * <p>
6646
     * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
6647
     * {@code '\u005Cu005A'}), lowercase
6648
     * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
6649
     * full width variant ({@code '\u005CuFF21'} through
6650
     * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
6651
     * {@code '\u005CuFF5A'}) forms have numeric values from 10
6652
     * through 35. This is independent of the Unicode specification,
6653
     * which does not assign numeric values to these {@code char}
6654
     * values.
6655
     * <p>
6656
     * If the character does not have a numeric value, then -1 is returned.
6657
     * If the character has a numeric value that cannot be represented as a
6658
     * nonnegative integer (for example, a fractional value), then -2
6659
     * is returned.
6660
     *
6661
     * @param   codePoint the character (Unicode code point) to be converted.
6662
     * @return  the numeric value of the character, as a nonnegative {@code int}
6663
     *          value; -2 if the character has a numeric value that is not a
6664
     *          nonnegative integer; -1 if the character has no numeric value.
6665
     * @see     Character#forDigit(int, int)
6666
     * @see     Character#isDigit(int)
6667
     * @since   1.5
6668
     */
6669
    public static int getNumericValue(int codePoint) {
6670
        return CharacterData.of(codePoint).getNumericValue(codePoint);
6671
    }
6672

6673
    /**
6674
     * Determines if the specified character is ISO-LATIN-1 white space.
6675
     * This method returns {@code true} for the following five
6676
     * characters only:
6677
     * <table summary="truechars">
6678
     * <tr><td>{@code '\t'}</td>            <td>{@code U+0009}</td>
6679
     *     <td>{@code HORIZONTAL TABULATION}</td></tr>
6680
     * <tr><td>{@code '\n'}</td>            <td>{@code U+000A}</td>
6681
     *     <td>{@code NEW LINE}</td></tr>
6682
     * <tr><td>{@code '\f'}</td>            <td>{@code U+000C}</td>
6683
     *     <td>{@code FORM FEED}</td></tr>
6684
     * <tr><td>{@code '\r'}</td>            <td>{@code U+000D}</td>
6685
     *     <td>{@code CARRIAGE RETURN}</td></tr>
6686
     * <tr><td>{@code ' '}</td>             <td>{@code U+0020}</td>
6687
     *     <td>{@code SPACE}</td></tr>
6688
     * </table>
6689
     *
6690
     * @param      ch   the character to be tested.
6691
     * @return     {@code true} if the character is ISO-LATIN-1 white
6692
     *             space; {@code false} otherwise.
6693
     * @see        Character#isSpaceChar(char)
6694
     * @see        Character#isWhitespace(char)
6695
     * @deprecated Replaced by isWhitespace(char).
6696
     */
6697
    @Deprecated
6698
    public static boolean isSpace(char ch) {
6699
        return (ch <= 0x0020) &&
6700
            (((((1L << 0x0009) |
6701
            (1L << 0x000A) |
6702
            (1L << 0x000C) |
6703
            (1L << 0x000D) |
6704
            (1L << 0x0020)) >> ch) & 1L) != 0);
6705
    }
6706

6707

6708
    /**
6709
     * Determines if the specified character is a Unicode space character.
6710
     * A character is considered to be a space character if and only if
6711
     * it is specified to be a space character by the Unicode Standard. This
6712
     * method returns true if the character's general category type is any of
6713
     * the following:
6714
     * <ul>
6715
     * <li> {@code SPACE_SEPARATOR}
6716
     * <li> {@code LINE_SEPARATOR}
6717
     * <li> {@code PARAGRAPH_SEPARATOR}
6718
     * </ul>
6719
     *
6720
     * <p><b>Note:</b> This method cannot handle <a
6721
     * href="#supplementary"> supplementary characters</a>. To support
6722
     * all Unicode characters, including supplementary characters, use
6723
     * the {@link #isSpaceChar(int)} method.
6724
     *
6725
     * @param   ch      the character to be tested.
6726
     * @return  {@code true} if the character is a space character;
6727
     *          {@code false} otherwise.
6728
     * @see     Character#isWhitespace(char)
6729
     * @since   1.1
6730
     */
6731
    public static boolean isSpaceChar(char ch) {
6732
        return isSpaceChar((int)ch);
6733
    }
6734

6735
    /**
6736
     * Determines if the specified character (Unicode code point) is a
6737
     * Unicode space character.  A character is considered to be a
6738
     * space character if and only if it is specified to be a space
6739
     * character by the Unicode Standard. This method returns true if
6740
     * the character's general category type is any of the following:
6741
     *
6742
     * <ul>
6743
     * <li> {@link #SPACE_SEPARATOR}
6744
     * <li> {@link #LINE_SEPARATOR}
6745
     * <li> {@link #PARAGRAPH_SEPARATOR}
6746
     * </ul>
6747
     *
6748
     * @param   codePoint the character (Unicode code point) to be tested.
6749
     * @return  {@code true} if the character is a space character;
6750
     *          {@code false} otherwise.
6751
     * @see     Character#isWhitespace(int)
6752
     * @since   1.5
6753
     */
6754
    public static boolean isSpaceChar(int codePoint) {
6755
        return ((((1 << Character.SPACE_SEPARATOR) |
6756
                  (1 << Character.LINE_SEPARATOR) |
6757
                  (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1)
6758
            != 0;
6759
    }
6760

6761
    /**
6762
     * Determines if the specified character is white space according to Java.
6763
     * A character is a Java whitespace character if and only if it satisfies
6764
     * one of the following criteria:
6765
     * <ul>
6766
     * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
6767
     *      {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
6768
     *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
6769
     *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
6770
     * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
6771
     * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
6772
     * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
6773
     * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
6774
     * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
6775
     * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
6776
     * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
6777
     * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
6778
     * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
6779
     * </ul>
6780
     *
6781
     * <p><b>Note:</b> This method cannot handle <a
6782
     * href="#supplementary"> supplementary characters</a>. To support
6783
     * all Unicode characters, including supplementary characters, use
6784
     * the {@link #isWhitespace(int)} method.
6785
     *
6786
     * @param   ch the character to be tested.
6787
     * @return  {@code true} if the character is a Java whitespace
6788
     *          character; {@code false} otherwise.
6789
     * @see     Character#isSpaceChar(char)
6790
     * @since   1.1
6791
     */
6792
    public static boolean isWhitespace(char ch) {
6793
        return isWhitespace((int)ch);
6794
    }
6795

6796
    /**
6797
     * Determines if the specified character (Unicode code point) is
6798
     * white space according to Java.  A character is a Java
6799
     * whitespace character if and only if it satisfies one of the
6800
     * following criteria:
6801
     * <ul>
6802
     * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
6803
     *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
6804
     *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
6805
     *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
6806
     * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
6807
     * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
6808
     * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
6809
     * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
6810
     * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
6811
     * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
6812
     * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
6813
     * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
6814
     * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
6815
     * </ul>
6816
     * <p>
6817
     *
6818
     * @param   codePoint the character (Unicode code point) to be tested.
6819
     * @return  {@code true} if the character is a Java whitespace
6820
     *          character; {@code false} otherwise.
6821
     * @see     Character#isSpaceChar(int)
6822
     * @since   1.5
6823
     */
6824
    public static boolean isWhitespace(int codePoint) {
6825
        return CharacterData.of(codePoint).isWhitespace(codePoint);
6826
    }
6827

6828
    /**
6829
     * Determines if the specified character is an ISO control
6830
     * character.  A character is considered to be an ISO control
6831
     * character if its code is in the range {@code '\u005Cu0000'}
6832
     * through {@code '\u005Cu001F'} or in the range
6833
     * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
6834
     *
6835
     * <p><b>Note:</b> This method cannot handle <a
6836
     * href="#supplementary"> supplementary characters</a>. To support
6837
     * all Unicode characters, including supplementary characters, use
6838
     * the {@link #isISOControl(int)} method.
6839
     *
6840
     * @param   ch      the character to be tested.
6841
     * @return  {@code true} if the character is an ISO control character;
6842
     *          {@code false} otherwise.
6843
     *
6844
     * @see     Character#isSpaceChar(char)
6845
     * @see     Character#isWhitespace(char)
6846
     * @since   1.1
6847
     */
6848
    public static boolean isISOControl(char ch) {
6849
        return isISOControl((int)ch);
6850
    }
6851

6852
    /**
6853
     * Determines if the referenced character (Unicode code point) is an ISO control
6854
     * character.  A character is considered to be an ISO control
6855
     * character if its code is in the range {@code '\u005Cu0000'}
6856
     * through {@code '\u005Cu001F'} or in the range
6857
     * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
6858
     *
6859
     * @param   codePoint the character (Unicode code point) to be tested.
6860
     * @return  {@code true} if the character is an ISO control character;
6861
     *          {@code false} otherwise.
6862
     * @see     Character#isSpaceChar(int)
6863
     * @see     Character#isWhitespace(int)
6864
     * @since   1.5
6865
     */
6866
    public static boolean isISOControl(int codePoint) {
6867
        // Optimized form of:
6868
        //     (codePoint >= 0x00 && codePoint <= 0x1F) ||
6869
        //     (codePoint >= 0x7F && codePoint <= 0x9F);
6870
        return codePoint <= 0x9F &&
6871
            (codePoint >= 0x7F || (codePoint >>> 5 == 0));
6872
    }
6873

6874
    /**
6875
     * Returns a value indicating a character's general category.
6876
     *
6877
     * <p><b>Note:</b> This method cannot handle <a
6878
     * href="#supplementary"> supplementary characters</a>. To support
6879
     * all Unicode characters, including supplementary characters, use
6880
     * the {@link #getType(int)} method.
6881
     *
6882
     * @param   ch      the character to be tested.
6883
     * @return  a value of type {@code int} representing the
6884
     *          character's general category.
6885
     * @see     Character#COMBINING_SPACING_MARK
6886
     * @see     Character#CONNECTOR_PUNCTUATION
6887
     * @see     Character#CONTROL
6888
     * @see     Character#CURRENCY_SYMBOL
6889
     * @see     Character#DASH_PUNCTUATION
6890
     * @see     Character#DECIMAL_DIGIT_NUMBER
6891
     * @see     Character#ENCLOSING_MARK
6892
     * @see     Character#END_PUNCTUATION
6893
     * @see     Character#FINAL_QUOTE_PUNCTUATION
6894
     * @see     Character#FORMAT
6895
     * @see     Character#INITIAL_QUOTE_PUNCTUATION
6896
     * @see     Character#LETTER_NUMBER
6897
     * @see     Character#LINE_SEPARATOR
6898
     * @see     Character#LOWERCASE_LETTER
6899
     * @see     Character#MATH_SYMBOL
6900
     * @see     Character#MODIFIER_LETTER
6901
     * @see     Character#MODIFIER_SYMBOL
6902
     * @see     Character#NON_SPACING_MARK
6903
     * @see     Character#OTHER_LETTER
6904
     * @see     Character#OTHER_NUMBER
6905
     * @see     Character#OTHER_PUNCTUATION
6906
     * @see     Character#OTHER_SYMBOL
6907
     * @see     Character#PARAGRAPH_SEPARATOR
6908
     * @see     Character#PRIVATE_USE
6909
     * @see     Character#SPACE_SEPARATOR
6910
     * @see     Character#START_PUNCTUATION
6911
     * @see     Character#SURROGATE
6912
     * @see     Character#TITLECASE_LETTER
6913
     * @see     Character#UNASSIGNED
6914
     * @see     Character#UPPERCASE_LETTER
6915
     * @since   1.1
6916
     */
6917
    public static int getType(char ch) {
6918
        return getType((int)ch);
6919
    }
6920

6921
    /**
6922
     * Returns a value indicating a character's general category.
6923
     *
6924
     * @param   codePoint the character (Unicode code point) to be tested.
6925
     * @return  a value of type {@code int} representing the
6926
     *          character's general category.
6927
     * @see     Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
6928
     * @see     Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
6929
     * @see     Character#CONTROL CONTROL
6930
     * @see     Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
6931
     * @see     Character#DASH_PUNCTUATION DASH_PUNCTUATION
6932
     * @see     Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
6933
     * @see     Character#ENCLOSING_MARK ENCLOSING_MARK
6934
     * @see     Character#END_PUNCTUATION END_PUNCTUATION
6935
     * @see     Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
6936
     * @see     Character#FORMAT FORMAT
6937
     * @see     Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
6938
     * @see     Character#LETTER_NUMBER LETTER_NUMBER
6939
     * @see     Character#LINE_SEPARATOR LINE_SEPARATOR
6940
     * @see     Character#LOWERCASE_LETTER LOWERCASE_LETTER
6941
     * @see     Character#MATH_SYMBOL MATH_SYMBOL
6942
     * @see     Character#MODIFIER_LETTER MODIFIER_LETTER
6943
     * @see     Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
6944
     * @see     Character#NON_SPACING_MARK NON_SPACING_MARK
6945
     * @see     Character#OTHER_LETTER OTHER_LETTER
6946
     * @see     Character#OTHER_NUMBER OTHER_NUMBER
6947
     * @see     Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
6948
     * @see     Character#OTHER_SYMBOL OTHER_SYMBOL
6949
     * @see     Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
6950
     * @see     Character#PRIVATE_USE PRIVATE_USE
6951
     * @see     Character#SPACE_SEPARATOR SPACE_SEPARATOR
6952
     * @see     Character#START_PUNCTUATION START_PUNCTUATION
6953
     * @see     Character#SURROGATE SURROGATE
6954
     * @see     Character#TITLECASE_LETTER TITLECASE_LETTER
6955
     * @see     Character#UNASSIGNED UNASSIGNED
6956
     * @see     Character#UPPERCASE_LETTER UPPERCASE_LETTER
6957
     * @since   1.5
6958
     */
6959
    public static int getType(int codePoint) {
6960
        return CharacterData.of(codePoint).getType(codePoint);
6961
    }
6962

6963
    /**
6964
     * Determines the character representation for a specific digit in
6965
     * the specified radix. If the value of {@code radix} is not a
6966
     * valid radix, or the value of {@code digit} is not a valid
6967
     * digit in the specified radix, the null character
6968
     * ({@code '\u005Cu0000'}) is returned.
6969
     * <p>
6970
     * The {@code radix} argument is valid if it is greater than or
6971
     * equal to {@code MIN_RADIX} and less than or equal to
6972
     * {@code MAX_RADIX}. The {@code digit} argument is valid if
6973
     * {@code 0 <= digit < radix}.
6974
     * <p>
6975
     * If the digit is less than 10, then
6976
     * {@code '0' + digit} is returned. Otherwise, the value
6977
     * {@code 'a' + digit - 10} is returned.
6978
     *
6979
     * @param   digit   the number to convert to a character.
6980
     * @param   radix   the radix.
6981
     * @return  the {@code char} representation of the specified digit
6982
     *          in the specified radix.
6983
     * @see     Character#MIN_RADIX
6984
     * @see     Character#MAX_RADIX
6985
     * @see     Character#digit(char, int)
6986
     */
6987
    public static char forDigit(int digit, int radix) {
6988
        if ((digit >= radix) || (digit < 0)) {
6989
            return '\0';
6990
        }
6991
        if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
6992
            return '\0';
6993
        }
6994
        if (digit < 10) {
6995
            return (char)('0' + digit);
6996
        }
6997
        return (char)('a' - 10 + digit);
6998
    }
6999

7000
    /**
7001
     * Returns the Unicode directionality property for the given
7002
     * character.  Character directionality is used to calculate the
7003
     * visual ordering of text. The directionality value of undefined
7004
     * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}.
7005
     *
7006
     * <p><b>Note:</b> This method cannot handle <a
7007
     * href="#supplementary"> supplementary characters</a>. To support
7008
     * all Unicode characters, including supplementary characters, use
7009
     * the {@link #getDirectionality(int)} method.
7010
     *
7011
     * @param  ch {@code char} for which the directionality property
7012
     *            is requested.
7013
     * @return the directionality property of the {@code char} value.
7014
     *
7015
     * @see Character#DIRECTIONALITY_UNDEFINED
7016
     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
7017
     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
7018
     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
7019
     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
7020
     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
7021
     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
7022
     * @see Character#DIRECTIONALITY_ARABIC_NUMBER
7023
     * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
7024
     * @see Character#DIRECTIONALITY_NONSPACING_MARK
7025
     * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
7026
     * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
7027
     * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
7028
     * @see Character#DIRECTIONALITY_WHITESPACE
7029
     * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
7030
     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
7031
     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
7032
     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
7033
     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
7034
     * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
7035
     * @since 1.4
7036
     */
7037
    public static byte getDirectionality(char ch) {
7038
        return getDirectionality((int)ch);
7039
    }
7040

7041
    /**
7042
     * Returns the Unicode directionality property for the given
7043
     * character (Unicode code point).  Character directionality is
7044
     * used to calculate the visual ordering of text. The
7045
     * directionality value of undefined character is {@link
7046
     * #DIRECTIONALITY_UNDEFINED}.
7047
     *
7048
     * @param   codePoint the character (Unicode code point) for which
7049
     *          the directionality property is requested.
7050
     * @return the directionality property of the character.
7051
     *
7052
     * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
7053
     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
7054
     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
7055
     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
7056
     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
7057
     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
7058
     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
7059
     * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
7060
     * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
7061
     * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
7062
     * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
7063
     * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
7064
     * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
7065
     * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
7066
     * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
7067
     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
7068
     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
7069
     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
7070
     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
7071
     * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
7072
     * @since    1.5
7073
     */
7074
    public static byte getDirectionality(int codePoint) {
7075
        return CharacterData.of(codePoint).getDirectionality(codePoint);
7076
    }
7077

7078
    /**
7079
     * Determines whether the character is mirrored according to the
7080
     * Unicode specification.  Mirrored characters should have their
7081
     * glyphs horizontally mirrored when displayed in text that is
7082
     * right-to-left.  For example, {@code '\u005Cu0028'} LEFT
7083
     * PARENTHESIS is semantically defined to be an <i>opening
7084
     * parenthesis</i>.  This will appear as a "(" in text that is
7085
     * left-to-right but as a ")" in text that is right-to-left.
7086
     *
7087
     * <p><b>Note:</b> This method cannot handle <a
7088
     * href="#supplementary"> supplementary characters</a>. To support
7089
     * all Unicode characters, including supplementary characters, use
7090
     * the {@link #isMirrored(int)} method.
7091
     *
7092
     * @param  ch {@code char} for which the mirrored property is requested
7093
     * @return {@code true} if the char is mirrored, {@code false}
7094
     *         if the {@code char} is not mirrored or is not defined.
7095
     * @since 1.4
7096
     */
7097
    public static boolean isMirrored(char ch) {
7098
        return isMirrored((int)ch);
7099
    }
7100

7101
    /**
7102
     * Determines whether the specified character (Unicode code point)
7103
     * is mirrored according to the Unicode specification.  Mirrored
7104
     * characters should have their glyphs horizontally mirrored when
7105
     * displayed in text that is right-to-left.  For example,
7106
     * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically
7107
     * defined to be an <i>opening parenthesis</i>.  This will appear
7108
     * as a "(" in text that is left-to-right but as a ")" in text
7109
     * that is right-to-left.
7110
     *
7111
     * @param   codePoint the character (Unicode code point) to be tested.
7112
     * @return  {@code true} if the character is mirrored, {@code false}
7113
     *          if the character is not mirrored or is not defined.
7114
     * @since   1.5
7115
     */
7116
    public static boolean isMirrored(int codePoint) {
7117
        return CharacterData.of(codePoint).isMirrored(codePoint);
7118
    }
7119

7120
    /**
7121
     * Compares two {@code Character} objects numerically.
7122
     *
7123
     * @param   anotherCharacter   the {@code Character} to be compared.
7124

7125
     * @return  the value {@code 0} if the argument {@code Character}
7126
     *          is equal to this {@code Character}; a value less than
7127
     *          {@code 0} if this {@code Character} is numerically less
7128
     *          than the {@code Character} argument; and a value greater than
7129
     *          {@code 0} if this {@code Character} is numerically greater
7130
     *          than the {@code Character} argument (unsigned comparison).
7131
     *          Note that this is strictly a numerical comparison; it is not
7132
     *          locale-dependent.
7133
     * @since   1.2
7134
     */
7135
    public int compareTo(Character anotherCharacter) {
7136
        return compare(this.value, anotherCharacter.value);
7137
    }
7138

7139
    /**
7140
     * Compares two {@code char} values numerically.
7141
     * The value returned is identical to what would be returned by:
7142
     * <pre>
7143
     *    Character.valueOf(x).compareTo(Character.valueOf(y))
7144
     * </pre>
7145
     *
7146
     * @param  x the first {@code char} to compare
7147
     * @param  y the second {@code char} to compare
7148
     * @return the value {@code 0} if {@code x == y};
7149
     *         a value less than {@code 0} if {@code x < y}; and
7150
     *         a value greater than {@code 0} if {@code x > y}
7151
     * @since 1.7
7152
     */
7153
    public static int compare(char x, char y) {
7154
        return x - y;
7155
    }
7156

7157
    /**
7158
     * Converts the character (Unicode code point) argument to uppercase using
7159
     * information from the UnicodeData file.
7160
     * <p>
7161
     *
7162
     * @param   codePoint   the character (Unicode code point) to be converted.
7163
     * @return  either the uppercase equivalent of the character, if
7164
     *          any, or an error flag ({@code Character.ERROR})
7165
     *          that indicates that a 1:M {@code char} mapping exists.
7166
     * @see     Character#isLowerCase(char)
7167
     * @see     Character#isUpperCase(char)
7168
     * @see     Character#toLowerCase(char)
7169
     * @see     Character#toTitleCase(char)
7170
     * @since 1.4
7171
     */
7172
    static int toUpperCaseEx(int codePoint) {
7173
        assert isValidCodePoint(codePoint);
7174
        return CharacterData.of(codePoint).toUpperCaseEx(codePoint);
7175
    }
7176

7177
    /**
7178
     * Converts the character (Unicode code point) argument to uppercase using case
7179
     * mapping information from the SpecialCasing file in the Unicode
7180
     * specification. If a character has no explicit uppercase
7181
     * mapping, then the {@code char} itself is returned in the
7182
     * {@code char[]}.
7183
     *
7184
     * @param   codePoint   the character (Unicode code point) to be converted.
7185
     * @return a {@code char[]} with the uppercased character.
7186
     * @since 1.4
7187
     */
7188
    static char[] toUpperCaseCharArray(int codePoint) {
7189
        // As of Unicode 6.0, 1:M uppercasings only happen in the BMP.
7190
        assert isBmpCodePoint(codePoint);
7191
        return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
7192
    }
7193

7194
    /**
7195
     * The number of bits used to represent a <tt>char</tt> value in unsigned
7196
     * binary form, constant {@code 16}.
7197
     *
7198
     * @since 1.5
7199
     */
7200
    public static final int SIZE = 16;
7201

7202
    /**
7203
     * The number of bytes used to represent a {@code char} value in unsigned
7204
     * binary form.
7205
     *
7206
     * @since 1.8
7207
     */
7208
    public static final int BYTES = SIZE / Byte.SIZE;
7209

7210
    /**
7211
     * Returns the value obtained by reversing the order of the bytes in the
7212
     * specified <tt>char</tt> value.
7213
     *
7214
     * @param ch The {@code char} of which to reverse the byte order.
7215
     * @return the value obtained by reversing (or, equivalently, swapping)
7216
     *     the bytes in the specified <tt>char</tt> value.
7217
     * @since 1.5
7218
     */
7219
    public static char reverseBytes(char ch) {
7220
        return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
7221
    }
7222

7223
    /**
7224
     * Returns the Unicode name of the specified character
7225
     * {@code codePoint}, or null if the code point is
7226
     * {@link #UNASSIGNED unassigned}.
7227
     * <p>
7228
     * Note: if the specified character is not assigned a name by
7229
     * the <i>UnicodeData</i> file (part of the Unicode Character
7230
     * Database maintained by the Unicode Consortium), the returned
7231
     * name is the same as the result of expression.
7232
     *
7233
     * <blockquote>{@code
7234
     *     Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
7235
     *     + " "
7236
     *     + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7237
     *
7238
     * }</blockquote>
7239
     *
7240
     * @param  codePoint the character (Unicode code point)
7241
     *
7242
     * @return the Unicode name of the specified character, or null if
7243
     *         the code point is unassigned.
7244
     *
7245
     * @exception IllegalArgumentException if the specified
7246
     *            {@code codePoint} is not a valid Unicode
7247
     *            code point.
7248
     *
7249
     * @since 1.7
7250
     */
7251
    public static String getName(int codePoint) {
7252
        if (!isValidCodePoint(codePoint)) {
7253
            throw new IllegalArgumentException();
7254
        }
7255
        String name = CharacterName.get(codePoint);
7256
        if (name != null)
7257
            return name;
7258
        if (getType(codePoint) == UNASSIGNED)
7259
            return null;
7260
        UnicodeBlock block = UnicodeBlock.of(codePoint);
7261
        if (block != null)
7262
            return block.toString().replace('_', ' ') + " "
7263
                   + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7264
        // should never come here
7265
        return Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7266
    }
7267
}
7268

7269
Product

Resources

Company