Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/openjdk-multiarch-jdk8u
Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/src/share/classes/java/lang/Character.java
38829 views
1
/*
2
* Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
3
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
*
5
* This code is free software; you can redistribute it and/or modify it
6
* under the terms of the GNU General Public License version 2 only, as
7
* published by the Free Software Foundation. Oracle designates this
8
* particular file as subject to the "Classpath" exception as provided
9
* by Oracle in the LICENSE file that accompanied this code.
10
*
11
* This code is distributed in the hope that it will be useful, but WITHOUT
12
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14
* version 2 for more details (a copy is included in the LICENSE file that
15
* accompanied this code).
16
*
17
* You should have received a copy of the GNU General Public License version
18
* 2 along with this work; if not, write to the Free Software Foundation,
19
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20
*
21
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22
* or visit www.oracle.com if you need additional information or have any
23
* questions.
24
*/
25
26
package java.lang;
27
28
import java.util.Arrays;
29
import java.util.Map;
30
import java.util.HashMap;
31
import java.util.Locale;
32
33
/**
34
* The {@code Character} class wraps a value of the primitive
35
* type {@code char} in an object. An object of class
36
* {@code Character} contains a single field whose type is
37
* {@code char}.
38
* <p>
39
* In addition, this class provides a large number of static methods for
40
* determining a character's category (lowercase letter, digit, etc.)
41
* and for converting characters from uppercase to lowercase and vice
42
* versa.
43
*
44
* <h3><a id="conformance">Unicode Conformance</a></h3>
45
* <p>
46
* The fields and methods of class {@code Character} are defined in terms
47
* of character information from the Unicode Standard, specifically the
48
* <i>UnicodeData</i> file that is part of the Unicode Character Database.
49
* This file specifies properties including name and category for every
50
* assigned Unicode code point or character range. The file is available
51
* from the Unicode Consortium at
52
* <a href="http://www.unicode.org">http://www.unicode.org</a>.
53
* <p>
54
* The Java SE 8 Platform uses character information from version 6.2
55
* of the Unicode Standard, with two extensions. First, the Java SE 8 Platform
56
* allows an implementation of class {@code Character} to use the Japanese Era
57
* code point, {@code U+32FF}, from the first version of the Unicode Standard
58
* after 6.2 that assigns the code point. Second, in recognition of the fact
59
* that new currencies appear frequently, the Java SE 8 Platform allows an
60
* implementation of class {@code Character} to use the Currency Symbols
61
* block from version 10.0 of the Unicode Standard. Consequently, the
62
* behavior of fields and methods of class {@code Character} may vary across
63
* implementations of the Java SE 8 Platform when processing the aforementioned
64
* code points ( outside of version 6.2 ), except for the following methods
65
* that define Java identifiers:
66
* {@link #isJavaIdentifierStart(int)}, {@link #isJavaIdentifierStart(char)},
67
* {@link #isJavaIdentifierPart(int)}, and {@link #isJavaIdentifierPart(char)}.
68
* Code points in Java identifiers must be drawn from version 6.2 of
69
* the Unicode Standard.
70
*
71
* <h3><a name="unicode">Unicode Character Representations</a></h3>
72
*
73
* <p>The {@code char} data type (and therefore the value that a
74
* {@code Character} object encapsulates) are based on the
75
* original Unicode specification, which defined characters as
76
* fixed-width 16-bit entities. The Unicode Standard has since been
77
* changed to allow for characters whose representation requires more
78
* than 16 bits. The range of legal <em>code point</em>s is now
79
* U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
80
* (Refer to the <a
81
* href="http://www.unicode.org/reports/tr27/#notation"><i>
82
* definition</i></a> of the U+<i>n</i> notation in the Unicode
83
* Standard.)
84
*
85
* <p><a name="BMP">The set of characters from U+0000 to U+FFFF</a> is
86
* sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
87
* <a name="supplementary">Characters</a> whose code points are greater
88
* than U+FFFF are called <em>supplementary character</em>s. The Java
89
* platform uses the UTF-16 representation in {@code char} arrays and
90
* in the {@code String} and {@code StringBuffer} classes. In
91
* this representation, supplementary characters are represented as a pair
92
* of {@code char} values, the first from the <em>high-surrogates</em>
93
* range, (&#92;uD800-&#92;uDBFF), the second from the
94
* <em>low-surrogates</em> range (&#92;uDC00-&#92;uDFFF).
95
*
96
* <p>A {@code char} value, therefore, represents Basic
97
* Multilingual Plane (BMP) code points, including the surrogate
98
* code points, or code units of the UTF-16 encoding. An
99
* {@code int} value represents all Unicode code points,
100
* including supplementary code points. The lower (least significant)
101
* 21 bits of {@code int} are used to represent Unicode code
102
* points and the upper (most significant) 11 bits must be zero.
103
* Unless otherwise specified, the behavior with respect to
104
* supplementary characters and surrogate {@code char} values is
105
* as follows:
106
*
107
* <ul>
108
* <li>The methods that only accept a {@code char} value cannot support
109
* supplementary characters. They treat {@code char} values from the
110
* surrogate ranges as undefined characters. For example,
111
* {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
112
* this specific value if followed by any low-surrogate value in a string
113
* would represent a letter.
114
*
115
* <li>The methods that accept an {@code int} value support all
116
* Unicode characters, including supplementary characters. For
117
* example, {@code Character.isLetter(0x2F81A)} returns
118
* {@code true} because the code point value represents a letter
119
* (a CJK ideograph).
120
* </ul>
121
*
122
* <p>In the Java SE API documentation, <em>Unicode code point</em> is
123
* used for character values in the range between U+0000 and U+10FFFF,
124
* and <em>Unicode code unit</em> is used for 16-bit
125
* {@code char} values that are code units of the <em>UTF-16</em>
126
* encoding. For more information on Unicode terminology, refer to the
127
* <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
128
*
129
* @author Lee Boynton
130
* @author Guy Steele
131
* @author Akira Tanaka
132
* @author Martin Buchholz
133
* @author Ulf Zibis
134
* @since 1.0
135
*/
136
public final
137
class Character implements java.io.Serializable, Comparable<Character> {
138
/**
139
* The minimum radix available for conversion to and from strings.
140
* The constant value of this field is the smallest value permitted
141
* for the radix argument in radix-conversion methods such as the
142
* {@code digit} method, the {@code forDigit} method, and the
143
* {@code toString} method of class {@code Integer}.
144
*
145
* @see Character#digit(char, int)
146
* @see Character#forDigit(int, int)
147
* @see Integer#toString(int, int)
148
* @see Integer#valueOf(String)
149
*/
150
public static final int MIN_RADIX = 2;
151
152
/**
153
* The maximum radix available for conversion to and from strings.
154
* The constant value of this field is the largest value permitted
155
* for the radix argument in radix-conversion methods such as the
156
* {@code digit} method, the {@code forDigit} method, and the
157
* {@code toString} method of class {@code Integer}.
158
*
159
* @see Character#digit(char, int)
160
* @see Character#forDigit(int, int)
161
* @see Integer#toString(int, int)
162
* @see Integer#valueOf(String)
163
*/
164
public static final int MAX_RADIX = 36;
165
166
/**
167
* The constant value of this field is the smallest value of type
168
* {@code char}, {@code '\u005Cu0000'}.
169
*
170
* @since 1.0.2
171
*/
172
public static final char MIN_VALUE = '\u0000';
173
174
/**
175
* The constant value of this field is the largest value of type
176
* {@code char}, {@code '\u005CuFFFF'}.
177
*
178
* @since 1.0.2
179
*/
180
public static final char MAX_VALUE = '\uFFFF';
181
182
/**
183
* The {@code Class} instance representing the primitive type
184
* {@code char}.
185
*
186
* @since 1.1
187
*/
188
@SuppressWarnings("unchecked")
189
public static final Class<Character> TYPE = (Class<Character>) Class.getPrimitiveClass("char");
190
191
/*
192
* Normative general types
193
*/
194
195
/*
196
* General character types
197
*/
198
199
/**
200
* General category "Cn" in the Unicode specification.
201
* @since 1.1
202
*/
203
public static final byte UNASSIGNED = 0;
204
205
/**
206
* General category "Lu" in the Unicode specification.
207
* @since 1.1
208
*/
209
public static final byte UPPERCASE_LETTER = 1;
210
211
/**
212
* General category "Ll" in the Unicode specification.
213
* @since 1.1
214
*/
215
public static final byte LOWERCASE_LETTER = 2;
216
217
/**
218
* General category "Lt" in the Unicode specification.
219
* @since 1.1
220
*/
221
public static final byte TITLECASE_LETTER = 3;
222
223
/**
224
* General category "Lm" in the Unicode specification.
225
* @since 1.1
226
*/
227
public static final byte MODIFIER_LETTER = 4;
228
229
/**
230
* General category "Lo" in the Unicode specification.
231
* @since 1.1
232
*/
233
public static final byte OTHER_LETTER = 5;
234
235
/**
236
* General category "Mn" in the Unicode specification.
237
* @since 1.1
238
*/
239
public static final byte NON_SPACING_MARK = 6;
240
241
/**
242
* General category "Me" in the Unicode specification.
243
* @since 1.1
244
*/
245
public static final byte ENCLOSING_MARK = 7;
246
247
/**
248
* General category "Mc" in the Unicode specification.
249
* @since 1.1
250
*/
251
public static final byte COMBINING_SPACING_MARK = 8;
252
253
/**
254
* General category "Nd" in the Unicode specification.
255
* @since 1.1
256
*/
257
public static final byte DECIMAL_DIGIT_NUMBER = 9;
258
259
/**
260
* General category "Nl" in the Unicode specification.
261
* @since 1.1
262
*/
263
public static final byte LETTER_NUMBER = 10;
264
265
/**
266
* General category "No" in the Unicode specification.
267
* @since 1.1
268
*/
269
public static final byte OTHER_NUMBER = 11;
270
271
/**
272
* General category "Zs" in the Unicode specification.
273
* @since 1.1
274
*/
275
public static final byte SPACE_SEPARATOR = 12;
276
277
/**
278
* General category "Zl" in the Unicode specification.
279
* @since 1.1
280
*/
281
public static final byte LINE_SEPARATOR = 13;
282
283
/**
284
* General category "Zp" in the Unicode specification.
285
* @since 1.1
286
*/
287
public static final byte PARAGRAPH_SEPARATOR = 14;
288
289
/**
290
* General category "Cc" in the Unicode specification.
291
* @since 1.1
292
*/
293
public static final byte CONTROL = 15;
294
295
/**
296
* General category "Cf" in the Unicode specification.
297
* @since 1.1
298
*/
299
public static final byte FORMAT = 16;
300
301
/**
302
* General category "Co" in the Unicode specification.
303
* @since 1.1
304
*/
305
public static final byte PRIVATE_USE = 18;
306
307
/**
308
* General category "Cs" in the Unicode specification.
309
* @since 1.1
310
*/
311
public static final byte SURROGATE = 19;
312
313
/**
314
* General category "Pd" in the Unicode specification.
315
* @since 1.1
316
*/
317
public static final byte DASH_PUNCTUATION = 20;
318
319
/**
320
* General category "Ps" in the Unicode specification.
321
* @since 1.1
322
*/
323
public static final byte START_PUNCTUATION = 21;
324
325
/**
326
* General category "Pe" in the Unicode specification.
327
* @since 1.1
328
*/
329
public static final byte END_PUNCTUATION = 22;
330
331
/**
332
* General category "Pc" in the Unicode specification.
333
* @since 1.1
334
*/
335
public static final byte CONNECTOR_PUNCTUATION = 23;
336
337
/**
338
* General category "Po" in the Unicode specification.
339
* @since 1.1
340
*/
341
public static final byte OTHER_PUNCTUATION = 24;
342
343
/**
344
* General category "Sm" in the Unicode specification.
345
* @since 1.1
346
*/
347
public static final byte MATH_SYMBOL = 25;
348
349
/**
350
* General category "Sc" in the Unicode specification.
351
* @since 1.1
352
*/
353
public static final byte CURRENCY_SYMBOL = 26;
354
355
/**
356
* General category "Sk" in the Unicode specification.
357
* @since 1.1
358
*/
359
public static final byte MODIFIER_SYMBOL = 27;
360
361
/**
362
* General category "So" in the Unicode specification.
363
* @since 1.1
364
*/
365
public static final byte OTHER_SYMBOL = 28;
366
367
/**
368
* General category "Pi" in the Unicode specification.
369
* @since 1.4
370
*/
371
public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
372
373
/**
374
* General category "Pf" in the Unicode specification.
375
* @since 1.4
376
*/
377
public static final byte FINAL_QUOTE_PUNCTUATION = 30;
378
379
/**
380
* Error flag. Use int (code point) to avoid confusion with U+FFFF.
381
*/
382
static final int ERROR = 0xFFFFFFFF;
383
384
385
/**
386
* Undefined bidirectional character type. Undefined {@code char}
387
* values have undefined directionality in the Unicode specification.
388
* @since 1.4
389
*/
390
public static final byte DIRECTIONALITY_UNDEFINED = -1;
391
392
/**
393
* Strong bidirectional character type "L" in the Unicode specification.
394
* @since 1.4
395
*/
396
public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
397
398
/**
399
* Strong bidirectional character type "R" in the Unicode specification.
400
* @since 1.4
401
*/
402
public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
403
404
/**
405
* Strong bidirectional character type "AL" in the Unicode specification.
406
* @since 1.4
407
*/
408
public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
409
410
/**
411
* Weak bidirectional character type "EN" in the Unicode specification.
412
* @since 1.4
413
*/
414
public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
415
416
/**
417
* Weak bidirectional character type "ES" in the Unicode specification.
418
* @since 1.4
419
*/
420
public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
421
422
/**
423
* Weak bidirectional character type "ET" in the Unicode specification.
424
* @since 1.4
425
*/
426
public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
427
428
/**
429
* Weak bidirectional character type "AN" in the Unicode specification.
430
* @since 1.4
431
*/
432
public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
433
434
/**
435
* Weak bidirectional character type "CS" in the Unicode specification.
436
* @since 1.4
437
*/
438
public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
439
440
/**
441
* Weak bidirectional character type "NSM" in the Unicode specification.
442
* @since 1.4
443
*/
444
public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
445
446
/**
447
* Weak bidirectional character type "BN" in the Unicode specification.
448
* @since 1.4
449
*/
450
public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
451
452
/**
453
* Neutral bidirectional character type "B" in the Unicode specification.
454
* @since 1.4
455
*/
456
public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
457
458
/**
459
* Neutral bidirectional character type "S" in the Unicode specification.
460
* @since 1.4
461
*/
462
public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
463
464
/**
465
* Neutral bidirectional character type "WS" in the Unicode specification.
466
* @since 1.4
467
*/
468
public static final byte DIRECTIONALITY_WHITESPACE = 12;
469
470
/**
471
* Neutral bidirectional character type "ON" in the Unicode specification.
472
* @since 1.4
473
*/
474
public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
475
476
/**
477
* Strong bidirectional character type "LRE" in the Unicode specification.
478
* @since 1.4
479
*/
480
public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
481
482
/**
483
* Strong bidirectional character type "LRO" in the Unicode specification.
484
* @since 1.4
485
*/
486
public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
487
488
/**
489
* Strong bidirectional character type "RLE" in the Unicode specification.
490
* @since 1.4
491
*/
492
public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
493
494
/**
495
* Strong bidirectional character type "RLO" in the Unicode specification.
496
* @since 1.4
497
*/
498
public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
499
500
/**
501
* Weak bidirectional character type "PDF" in the Unicode specification.
502
* @since 1.4
503
*/
504
public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
505
506
/**
507
* The minimum value of a
508
* <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
509
* Unicode high-surrogate code unit</a>
510
* in the UTF-16 encoding, constant {@code '\u005CuD800'}.
511
* A high-surrogate is also known as a <i>leading-surrogate</i>.
512
*
513
* @since 1.5
514
*/
515
public static final char MIN_HIGH_SURROGATE = '\uD800';
516
517
/**
518
* The maximum value of a
519
* <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
520
* Unicode high-surrogate code unit</a>
521
* in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
522
* A high-surrogate is also known as a <i>leading-surrogate</i>.
523
*
524
* @since 1.5
525
*/
526
public static final char MAX_HIGH_SURROGATE = '\uDBFF';
527
528
/**
529
* The minimum value of a
530
* <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
531
* Unicode low-surrogate code unit</a>
532
* in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
533
* A low-surrogate is also known as a <i>trailing-surrogate</i>.
534
*
535
* @since 1.5
536
*/
537
public static final char MIN_LOW_SURROGATE = '\uDC00';
538
539
/**
540
* The maximum value of a
541
* <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
542
* Unicode low-surrogate code unit</a>
543
* in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
544
* A low-surrogate is also known as a <i>trailing-surrogate</i>.
545
*
546
* @since 1.5
547
*/
548
public static final char MAX_LOW_SURROGATE = '\uDFFF';
549
550
/**
551
* The minimum value of a Unicode surrogate code unit in the
552
* UTF-16 encoding, constant {@code '\u005CuD800'}.
553
*
554
* @since 1.5
555
*/
556
public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
557
558
/**
559
* The maximum value of a Unicode surrogate code unit in the
560
* UTF-16 encoding, constant {@code '\u005CuDFFF'}.
561
*
562
* @since 1.5
563
*/
564
public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
565
566
/**
567
* The minimum value of a
568
* <a href="http://www.unicode.org/glossary/#supplementary_code_point">
569
* Unicode supplementary code point</a>, constant {@code U+10000}.
570
*
571
* @since 1.5
572
*/
573
public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
574
575
/**
576
* The minimum value of a
577
* <a href="http://www.unicode.org/glossary/#code_point">
578
* Unicode code point</a>, constant {@code U+0000}.
579
*
580
* @since 1.5
581
*/
582
public static final int MIN_CODE_POINT = 0x000000;
583
584
/**
585
* The maximum value of a
586
* <a href="http://www.unicode.org/glossary/#code_point">
587
* Unicode code point</a>, constant {@code U+10FFFF}.
588
*
589
* @since 1.5
590
*/
591
public static final int MAX_CODE_POINT = 0X10FFFF;
592
593
594
/**
595
* Instances of this class represent particular subsets of the Unicode
596
* character set. The only family of subsets defined in the
597
* {@code Character} class is {@link Character.UnicodeBlock}.
598
* Other portions of the Java API may define other subsets for their
599
* own purposes.
600
*
601
* @since 1.2
602
*/
603
public static class Subset {
604
605
private String name;
606
607
/**
608
* Constructs a new {@code Subset} instance.
609
*
610
* @param name The name of this subset
611
* @exception NullPointerException if name is {@code null}
612
*/
613
protected Subset(String name) {
614
if (name == null) {
615
throw new NullPointerException("name");
616
}
617
this.name = name;
618
}
619
620
/**
621
* Compares two {@code Subset} objects for equality.
622
* This method returns {@code true} if and only if
623
* {@code this} and the argument refer to the same
624
* object; since this method is {@code final}, this
625
* guarantee holds for all subclasses.
626
*/
627
public final boolean equals(Object obj) {
628
return (this == obj);
629
}
630
631
/**
632
* Returns the standard hash code as defined by the
633
* {@link Object#hashCode} method. This method
634
* is {@code final} in order to ensure that the
635
* {@code equals} and {@code hashCode} methods will
636
* be consistent in all subclasses.
637
*/
638
public final int hashCode() {
639
return super.hashCode();
640
}
641
642
/**
643
* Returns the name of this subset.
644
*/
645
public final String toString() {
646
return name;
647
}
648
}
649
650
// See http://www.unicode.org/Public/UNIDATA/Blocks.txt
651
// for the latest specification of Unicode Blocks.
652
653
/**
654
* A family of character subsets representing the character blocks in the
655
* Unicode specification. Character blocks generally define characters
656
* used for a specific script or purpose. A character is contained by
657
* at most one Unicode block.
658
*
659
* @since 1.2
660
*/
661
public static final class UnicodeBlock extends Subset {
662
663
private static Map<String, UnicodeBlock> map = new HashMap<>(256);
664
665
/**
666
* Creates a UnicodeBlock with the given identifier name.
667
* This name must be the same as the block identifier.
668
*/
669
private UnicodeBlock(String idName) {
670
super(idName);
671
map.put(idName, this);
672
}
673
674
/**
675
* Creates a UnicodeBlock with the given identifier name and
676
* alias name.
677
*/
678
private UnicodeBlock(String idName, String alias) {
679
this(idName);
680
map.put(alias, this);
681
}
682
683
/**
684
* Creates a UnicodeBlock with the given identifier name and
685
* alias names.
686
*/
687
private UnicodeBlock(String idName, String... aliases) {
688
this(idName);
689
for (String alias : aliases)
690
map.put(alias, this);
691
}
692
693
/**
694
* Constant for the "Basic Latin" Unicode character block.
695
* @since 1.2
696
*/
697
public static final UnicodeBlock BASIC_LATIN =
698
new UnicodeBlock("BASIC_LATIN",
699
"BASIC LATIN",
700
"BASICLATIN");
701
702
/**
703
* Constant for the "Latin-1 Supplement" Unicode character block.
704
* @since 1.2
705
*/
706
public static final UnicodeBlock LATIN_1_SUPPLEMENT =
707
new UnicodeBlock("LATIN_1_SUPPLEMENT",
708
"LATIN-1 SUPPLEMENT",
709
"LATIN-1SUPPLEMENT");
710
711
/**
712
* Constant for the "Latin Extended-A" Unicode character block.
713
* @since 1.2
714
*/
715
public static final UnicodeBlock LATIN_EXTENDED_A =
716
new UnicodeBlock("LATIN_EXTENDED_A",
717
"LATIN EXTENDED-A",
718
"LATINEXTENDED-A");
719
720
/**
721
* Constant for the "Latin Extended-B" Unicode character block.
722
* @since 1.2
723
*/
724
public static final UnicodeBlock LATIN_EXTENDED_B =
725
new UnicodeBlock("LATIN_EXTENDED_B",
726
"LATIN EXTENDED-B",
727
"LATINEXTENDED-B");
728
729
/**
730
* Constant for the "IPA Extensions" Unicode character block.
731
* @since 1.2
732
*/
733
public static final UnicodeBlock IPA_EXTENSIONS =
734
new UnicodeBlock("IPA_EXTENSIONS",
735
"IPA EXTENSIONS",
736
"IPAEXTENSIONS");
737
738
/**
739
* Constant for the "Spacing Modifier Letters" Unicode character block.
740
* @since 1.2
741
*/
742
public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
743
new UnicodeBlock("SPACING_MODIFIER_LETTERS",
744
"SPACING MODIFIER LETTERS",
745
"SPACINGMODIFIERLETTERS");
746
747
/**
748
* Constant for the "Combining Diacritical Marks" Unicode character block.
749
* @since 1.2
750
*/
751
public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
752
new UnicodeBlock("COMBINING_DIACRITICAL_MARKS",
753
"COMBINING DIACRITICAL MARKS",
754
"COMBININGDIACRITICALMARKS");
755
756
/**
757
* Constant for the "Greek and Coptic" Unicode character block.
758
* <p>
759
* This block was previously known as the "Greek" block.
760
*
761
* @since 1.2
762
*/
763
public static final UnicodeBlock GREEK =
764
new UnicodeBlock("GREEK",
765
"GREEK AND COPTIC",
766
"GREEKANDCOPTIC");
767
768
/**
769
* Constant for the "Cyrillic" Unicode character block.
770
* @since 1.2
771
*/
772
public static final UnicodeBlock CYRILLIC =
773
new UnicodeBlock("CYRILLIC");
774
775
/**
776
* Constant for the "Armenian" Unicode character block.
777
* @since 1.2
778
*/
779
public static final UnicodeBlock ARMENIAN =
780
new UnicodeBlock("ARMENIAN");
781
782
/**
783
* Constant for the "Hebrew" Unicode character block.
784
* @since 1.2
785
*/
786
public static final UnicodeBlock HEBREW =
787
new UnicodeBlock("HEBREW");
788
789
/**
790
* Constant for the "Arabic" Unicode character block.
791
* @since 1.2
792
*/
793
public static final UnicodeBlock ARABIC =
794
new UnicodeBlock("ARABIC");
795
796
/**
797
* Constant for the "Devanagari" Unicode character block.
798
* @since 1.2
799
*/
800
public static final UnicodeBlock DEVANAGARI =
801
new UnicodeBlock("DEVANAGARI");
802
803
/**
804
* Constant for the "Bengali" Unicode character block.
805
* @since 1.2
806
*/
807
public static final UnicodeBlock BENGALI =
808
new UnicodeBlock("BENGALI");
809
810
/**
811
* Constant for the "Gurmukhi" Unicode character block.
812
* @since 1.2
813
*/
814
public static final UnicodeBlock GURMUKHI =
815
new UnicodeBlock("GURMUKHI");
816
817
/**
818
* Constant for the "Gujarati" Unicode character block.
819
* @since 1.2
820
*/
821
public static final UnicodeBlock GUJARATI =
822
new UnicodeBlock("GUJARATI");
823
824
/**
825
* Constant for the "Oriya" Unicode character block.
826
* @since 1.2
827
*/
828
public static final UnicodeBlock ORIYA =
829
new UnicodeBlock("ORIYA");
830
831
/**
832
* Constant for the "Tamil" Unicode character block.
833
* @since 1.2
834
*/
835
public static final UnicodeBlock TAMIL =
836
new UnicodeBlock("TAMIL");
837
838
/**
839
* Constant for the "Telugu" Unicode character block.
840
* @since 1.2
841
*/
842
public static final UnicodeBlock TELUGU =
843
new UnicodeBlock("TELUGU");
844
845
/**
846
* Constant for the "Kannada" Unicode character block.
847
* @since 1.2
848
*/
849
public static final UnicodeBlock KANNADA =
850
new UnicodeBlock("KANNADA");
851
852
/**
853
* Constant for the "Malayalam" Unicode character block.
854
* @since 1.2
855
*/
856
public static final UnicodeBlock MALAYALAM =
857
new UnicodeBlock("MALAYALAM");
858
859
/**
860
* Constant for the "Thai" Unicode character block.
861
* @since 1.2
862
*/
863
public static final UnicodeBlock THAI =
864
new UnicodeBlock("THAI");
865
866
/**
867
* Constant for the "Lao" Unicode character block.
868
* @since 1.2
869
*/
870
public static final UnicodeBlock LAO =
871
new UnicodeBlock("LAO");
872
873
/**
874
* Constant for the "Tibetan" Unicode character block.
875
* @since 1.2
876
*/
877
public static final UnicodeBlock TIBETAN =
878
new UnicodeBlock("TIBETAN");
879
880
/**
881
* Constant for the "Georgian" Unicode character block.
882
* @since 1.2
883
*/
884
public static final UnicodeBlock GEORGIAN =
885
new UnicodeBlock("GEORGIAN");
886
887
/**
888
* Constant for the "Hangul Jamo" Unicode character block.
889
* @since 1.2
890
*/
891
public static final UnicodeBlock HANGUL_JAMO =
892
new UnicodeBlock("HANGUL_JAMO",
893
"HANGUL JAMO",
894
"HANGULJAMO");
895
896
/**
897
* Constant for the "Latin Extended Additional" Unicode character block.
898
* @since 1.2
899
*/
900
public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
901
new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL",
902
"LATIN EXTENDED ADDITIONAL",
903
"LATINEXTENDEDADDITIONAL");
904
905
/**
906
* Constant for the "Greek Extended" Unicode character block.
907
* @since 1.2
908
*/
909
public static final UnicodeBlock GREEK_EXTENDED =
910
new UnicodeBlock("GREEK_EXTENDED",
911
"GREEK EXTENDED",
912
"GREEKEXTENDED");
913
914
/**
915
* Constant for the "General Punctuation" Unicode character block.
916
* @since 1.2
917
*/
918
public static final UnicodeBlock GENERAL_PUNCTUATION =
919
new UnicodeBlock("GENERAL_PUNCTUATION",
920
"GENERAL PUNCTUATION",
921
"GENERALPUNCTUATION");
922
923
/**
924
* Constant for the "Superscripts and Subscripts" Unicode character
925
* block.
926
* @since 1.2
927
*/
928
public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
929
new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS",
930
"SUPERSCRIPTS AND SUBSCRIPTS",
931
"SUPERSCRIPTSANDSUBSCRIPTS");
932
933
/**
934
* Constant for the "Currency Symbols" Unicode character block.
935
* @since 1.2
936
*/
937
public static final UnicodeBlock CURRENCY_SYMBOLS =
938
new UnicodeBlock("CURRENCY_SYMBOLS",
939
"CURRENCY SYMBOLS",
940
"CURRENCYSYMBOLS");
941
942
/**
943
* Constant for the "Combining Diacritical Marks for Symbols" Unicode
944
* character block.
945
* <p>
946
* This block was previously known as "Combining Marks for Symbols".
947
* @since 1.2
948
*/
949
public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
950
new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS",
951
"COMBINING DIACRITICAL MARKS FOR SYMBOLS",
952
"COMBININGDIACRITICALMARKSFORSYMBOLS",
953
"COMBINING MARKS FOR SYMBOLS",
954
"COMBININGMARKSFORSYMBOLS");
955
956
/**
957
* Constant for the "Letterlike Symbols" Unicode character block.
958
* @since 1.2
959
*/
960
public static final UnicodeBlock LETTERLIKE_SYMBOLS =
961
new UnicodeBlock("LETTERLIKE_SYMBOLS",
962
"LETTERLIKE SYMBOLS",
963
"LETTERLIKESYMBOLS");
964
965
/**
966
* Constant for the "Number Forms" Unicode character block.
967
* @since 1.2
968
*/
969
public static final UnicodeBlock NUMBER_FORMS =
970
new UnicodeBlock("NUMBER_FORMS",
971
"NUMBER FORMS",
972
"NUMBERFORMS");
973
974
/**
975
* Constant for the "Arrows" Unicode character block.
976
* @since 1.2
977
*/
978
public static final UnicodeBlock ARROWS =
979
new UnicodeBlock("ARROWS");
980
981
/**
982
* Constant for the "Mathematical Operators" Unicode character block.
983
* @since 1.2
984
*/
985
public static final UnicodeBlock MATHEMATICAL_OPERATORS =
986
new UnicodeBlock("MATHEMATICAL_OPERATORS",
987
"MATHEMATICAL OPERATORS",
988
"MATHEMATICALOPERATORS");
989
990
/**
991
* Constant for the "Miscellaneous Technical" Unicode character block.
992
* @since 1.2
993
*/
994
public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
995
new UnicodeBlock("MISCELLANEOUS_TECHNICAL",
996
"MISCELLANEOUS TECHNICAL",
997
"MISCELLANEOUSTECHNICAL");
998
999
/**
1000
* Constant for the "Control Pictures" Unicode character block.
1001
* @since 1.2
1002
*/
1003
public static final UnicodeBlock CONTROL_PICTURES =
1004
new UnicodeBlock("CONTROL_PICTURES",
1005
"CONTROL PICTURES",
1006
"CONTROLPICTURES");
1007
1008
/**
1009
* Constant for the "Optical Character Recognition" Unicode character block.
1010
* @since 1.2
1011
*/
1012
public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
1013
new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION",
1014
"OPTICAL CHARACTER RECOGNITION",
1015
"OPTICALCHARACTERRECOGNITION");
1016
1017
/**
1018
* Constant for the "Enclosed Alphanumerics" Unicode character block.
1019
* @since 1.2
1020
*/
1021
public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
1022
new UnicodeBlock("ENCLOSED_ALPHANUMERICS",
1023
"ENCLOSED ALPHANUMERICS",
1024
"ENCLOSEDALPHANUMERICS");
1025
1026
/**
1027
* Constant for the "Box Drawing" Unicode character block.
1028
* @since 1.2
1029
*/
1030
public static final UnicodeBlock BOX_DRAWING =
1031
new UnicodeBlock("BOX_DRAWING",
1032
"BOX DRAWING",
1033
"BOXDRAWING");
1034
1035
/**
1036
* Constant for the "Block Elements" Unicode character block.
1037
* @since 1.2
1038
*/
1039
public static final UnicodeBlock BLOCK_ELEMENTS =
1040
new UnicodeBlock("BLOCK_ELEMENTS",
1041
"BLOCK ELEMENTS",
1042
"BLOCKELEMENTS");
1043
1044
/**
1045
* Constant for the "Geometric Shapes" Unicode character block.
1046
* @since 1.2
1047
*/
1048
public static final UnicodeBlock GEOMETRIC_SHAPES =
1049
new UnicodeBlock("GEOMETRIC_SHAPES",
1050
"GEOMETRIC SHAPES",
1051
"GEOMETRICSHAPES");
1052
1053
/**
1054
* Constant for the "Miscellaneous Symbols" Unicode character block.
1055
* @since 1.2
1056
*/
1057
public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
1058
new UnicodeBlock("MISCELLANEOUS_SYMBOLS",
1059
"MISCELLANEOUS SYMBOLS",
1060
"MISCELLANEOUSSYMBOLS");
1061
1062
/**
1063
* Constant for the "Dingbats" Unicode character block.
1064
* @since 1.2
1065
*/
1066
public static final UnicodeBlock DINGBATS =
1067
new UnicodeBlock("DINGBATS");
1068
1069
/**
1070
* Constant for the "CJK Symbols and Punctuation" Unicode character block.
1071
* @since 1.2
1072
*/
1073
public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
1074
new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION",
1075
"CJK SYMBOLS AND PUNCTUATION",
1076
"CJKSYMBOLSANDPUNCTUATION");
1077
1078
/**
1079
* Constant for the "Hiragana" Unicode character block.
1080
* @since 1.2
1081
*/
1082
public static final UnicodeBlock HIRAGANA =
1083
new UnicodeBlock("HIRAGANA");
1084
1085
/**
1086
* Constant for the "Katakana" Unicode character block.
1087
* @since 1.2
1088
*/
1089
public static final UnicodeBlock KATAKANA =
1090
new UnicodeBlock("KATAKANA");
1091
1092
/**
1093
* Constant for the "Bopomofo" Unicode character block.
1094
* @since 1.2
1095
*/
1096
public static final UnicodeBlock BOPOMOFO =
1097
new UnicodeBlock("BOPOMOFO");
1098
1099
/**
1100
* Constant for the "Hangul Compatibility Jamo" Unicode character block.
1101
* @since 1.2
1102
*/
1103
public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
1104
new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO",
1105
"HANGUL COMPATIBILITY JAMO",
1106
"HANGULCOMPATIBILITYJAMO");
1107
1108
/**
1109
* Constant for the "Kanbun" Unicode character block.
1110
* @since 1.2
1111
*/
1112
public static final UnicodeBlock KANBUN =
1113
new UnicodeBlock("KANBUN");
1114
1115
/**
1116
* Constant for the "Enclosed CJK Letters and Months" Unicode character block.
1117
* @since 1.2
1118
*/
1119
public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
1120
new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1121
"ENCLOSED CJK LETTERS AND MONTHS",
1122
"ENCLOSEDCJKLETTERSANDMONTHS");
1123
1124
/**
1125
* Constant for the "CJK Compatibility" Unicode character block.
1126
* @since 1.2
1127
*/
1128
public static final UnicodeBlock CJK_COMPATIBILITY =
1129
new UnicodeBlock("CJK_COMPATIBILITY",
1130
"CJK COMPATIBILITY",
1131
"CJKCOMPATIBILITY");
1132
1133
/**
1134
* Constant for the "CJK Unified Ideographs" Unicode character block.
1135
* @since 1.2
1136
*/
1137
public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
1138
new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS",
1139
"CJK UNIFIED IDEOGRAPHS",
1140
"CJKUNIFIEDIDEOGRAPHS");
1141
1142
/**
1143
* Constant for the "Hangul Syllables" Unicode character block.
1144
* @since 1.2
1145
*/
1146
public static final UnicodeBlock HANGUL_SYLLABLES =
1147
new UnicodeBlock("HANGUL_SYLLABLES",
1148
"HANGUL SYLLABLES",
1149
"HANGULSYLLABLES");
1150
1151
/**
1152
* Constant for the "Private Use Area" Unicode character block.
1153
* @since 1.2
1154
*/
1155
public static final UnicodeBlock PRIVATE_USE_AREA =
1156
new UnicodeBlock("PRIVATE_USE_AREA",
1157
"PRIVATE USE AREA",
1158
"PRIVATEUSEAREA");
1159
1160
/**
1161
* Constant for the "CJK Compatibility Ideographs" Unicode character
1162
* block.
1163
* @since 1.2
1164
*/
1165
public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
1166
new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
1167
"CJK COMPATIBILITY IDEOGRAPHS",
1168
"CJKCOMPATIBILITYIDEOGRAPHS");
1169
1170
/**
1171
* Constant for the "Alphabetic Presentation Forms" Unicode character block.
1172
* @since 1.2
1173
*/
1174
public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
1175
new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS",
1176
"ALPHABETIC PRESENTATION FORMS",
1177
"ALPHABETICPRESENTATIONFORMS");
1178
1179
/**
1180
* Constant for the "Arabic Presentation Forms-A" Unicode character
1181
* block.
1182
* @since 1.2
1183
*/
1184
public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
1185
new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A",
1186
"ARABIC PRESENTATION FORMS-A",
1187
"ARABICPRESENTATIONFORMS-A");
1188
1189
/**
1190
* Constant for the "Combining Half Marks" Unicode character block.
1191
* @since 1.2
1192
*/
1193
public static final UnicodeBlock COMBINING_HALF_MARKS =
1194
new UnicodeBlock("COMBINING_HALF_MARKS",
1195
"COMBINING HALF MARKS",
1196
"COMBININGHALFMARKS");
1197
1198
/**
1199
* Constant for the "CJK Compatibility Forms" Unicode character block.
1200
* @since 1.2
1201
*/
1202
public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
1203
new UnicodeBlock("CJK_COMPATIBILITY_FORMS",
1204
"CJK COMPATIBILITY FORMS",
1205
"CJKCOMPATIBILITYFORMS");
1206
1207
/**
1208
* Constant for the "Small Form Variants" Unicode character block.
1209
* @since 1.2
1210
*/
1211
public static final UnicodeBlock SMALL_FORM_VARIANTS =
1212
new UnicodeBlock("SMALL_FORM_VARIANTS",
1213
"SMALL FORM VARIANTS",
1214
"SMALLFORMVARIANTS");
1215
1216
/**
1217
* Constant for the "Arabic Presentation Forms-B" Unicode character block.
1218
* @since 1.2
1219
*/
1220
public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
1221
new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B",
1222
"ARABIC PRESENTATION FORMS-B",
1223
"ARABICPRESENTATIONFORMS-B");
1224
1225
/**
1226
* Constant for the "Halfwidth and Fullwidth Forms" Unicode character
1227
* block.
1228
* @since 1.2
1229
*/
1230
public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
1231
new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
1232
"HALFWIDTH AND FULLWIDTH FORMS",
1233
"HALFWIDTHANDFULLWIDTHFORMS");
1234
1235
/**
1236
* Constant for the "Specials" Unicode character block.
1237
* @since 1.2
1238
*/
1239
public static final UnicodeBlock SPECIALS =
1240
new UnicodeBlock("SPECIALS");
1241
1242
/**
1243
* @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES},
1244
* {@link #HIGH_PRIVATE_USE_SURROGATES}, and
1245
* {@link #LOW_SURROGATES}. These new constants match
1246
* the block definitions of the Unicode Standard.
1247
* The {@link #of(char)} and {@link #of(int)} methods
1248
* return the new constants, not SURROGATES_AREA.
1249
*/
1250
@Deprecated
1251
public static final UnicodeBlock SURROGATES_AREA =
1252
new UnicodeBlock("SURROGATES_AREA");
1253
1254
/**
1255
* Constant for the "Syriac" Unicode character block.
1256
* @since 1.4
1257
*/
1258
public static final UnicodeBlock SYRIAC =
1259
new UnicodeBlock("SYRIAC");
1260
1261
/**
1262
* Constant for the "Thaana" Unicode character block.
1263
* @since 1.4
1264
*/
1265
public static final UnicodeBlock THAANA =
1266
new UnicodeBlock("THAANA");
1267
1268
/**
1269
* Constant for the "Sinhala" Unicode character block.
1270
* @since 1.4
1271
*/
1272
public static final UnicodeBlock SINHALA =
1273
new UnicodeBlock("SINHALA");
1274
1275
/**
1276
* Constant for the "Myanmar" Unicode character block.
1277
* @since 1.4
1278
*/
1279
public static final UnicodeBlock MYANMAR =
1280
new UnicodeBlock("MYANMAR");
1281
1282
/**
1283
* Constant for the "Ethiopic" Unicode character block.
1284
* @since 1.4
1285
*/
1286
public static final UnicodeBlock ETHIOPIC =
1287
new UnicodeBlock("ETHIOPIC");
1288
1289
/**
1290
* Constant for the "Cherokee" Unicode character block.
1291
* @since 1.4
1292
*/
1293
public static final UnicodeBlock CHEROKEE =
1294
new UnicodeBlock("CHEROKEE");
1295
1296
/**
1297
* Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
1298
* @since 1.4
1299
*/
1300
public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
1301
new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1302
"UNIFIED CANADIAN ABORIGINAL SYLLABICS",
1303
"UNIFIEDCANADIANABORIGINALSYLLABICS");
1304
1305
/**
1306
* Constant for the "Ogham" Unicode character block.
1307
* @since 1.4
1308
*/
1309
public static final UnicodeBlock OGHAM =
1310
new UnicodeBlock("OGHAM");
1311
1312
/**
1313
* Constant for the "Runic" Unicode character block.
1314
* @since 1.4
1315
*/
1316
public static final UnicodeBlock RUNIC =
1317
new UnicodeBlock("RUNIC");
1318
1319
/**
1320
* Constant for the "Khmer" Unicode character block.
1321
* @since 1.4
1322
*/
1323
public static final UnicodeBlock KHMER =
1324
new UnicodeBlock("KHMER");
1325
1326
/**
1327
* Constant for the "Mongolian" Unicode character block.
1328
* @since 1.4
1329
*/
1330
public static final UnicodeBlock MONGOLIAN =
1331
new UnicodeBlock("MONGOLIAN");
1332
1333
/**
1334
* Constant for the "Braille Patterns" Unicode character block.
1335
* @since 1.4
1336
*/
1337
public static final UnicodeBlock BRAILLE_PATTERNS =
1338
new UnicodeBlock("BRAILLE_PATTERNS",
1339
"BRAILLE PATTERNS",
1340
"BRAILLEPATTERNS");
1341
1342
/**
1343
* Constant for the "CJK Radicals Supplement" Unicode character block.
1344
* @since 1.4
1345
*/
1346
public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
1347
new UnicodeBlock("CJK_RADICALS_SUPPLEMENT",
1348
"CJK RADICALS SUPPLEMENT",
1349
"CJKRADICALSSUPPLEMENT");
1350
1351
/**
1352
* Constant for the "Kangxi Radicals" Unicode character block.
1353
* @since 1.4
1354
*/
1355
public static final UnicodeBlock KANGXI_RADICALS =
1356
new UnicodeBlock("KANGXI_RADICALS",
1357
"KANGXI RADICALS",
1358
"KANGXIRADICALS");
1359
1360
/**
1361
* Constant for the "Ideographic Description Characters" Unicode character block.
1362
* @since 1.4
1363
*/
1364
public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
1365
new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1366
"IDEOGRAPHIC DESCRIPTION CHARACTERS",
1367
"IDEOGRAPHICDESCRIPTIONCHARACTERS");
1368
1369
/**
1370
* Constant for the "Bopomofo Extended" Unicode character block.
1371
* @since 1.4
1372
*/
1373
public static final UnicodeBlock BOPOMOFO_EXTENDED =
1374
new UnicodeBlock("BOPOMOFO_EXTENDED",
1375
"BOPOMOFO EXTENDED",
1376
"BOPOMOFOEXTENDED");
1377
1378
/**
1379
* Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
1380
* @since 1.4
1381
*/
1382
public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
1383
new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1384
"CJK UNIFIED IDEOGRAPHS EXTENSION A",
1385
"CJKUNIFIEDIDEOGRAPHSEXTENSIONA");
1386
1387
/**
1388
* Constant for the "Yi Syllables" Unicode character block.
1389
* @since 1.4
1390
*/
1391
public static final UnicodeBlock YI_SYLLABLES =
1392
new UnicodeBlock("YI_SYLLABLES",
1393
"YI SYLLABLES",
1394
"YISYLLABLES");
1395
1396
/**
1397
* Constant for the "Yi Radicals" Unicode character block.
1398
* @since 1.4
1399
*/
1400
public static final UnicodeBlock YI_RADICALS =
1401
new UnicodeBlock("YI_RADICALS",
1402
"YI RADICALS",
1403
"YIRADICALS");
1404
1405
/**
1406
* Constant for the "Cyrillic Supplementary" Unicode character block.
1407
* @since 1.5
1408
*/
1409
public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
1410
new UnicodeBlock("CYRILLIC_SUPPLEMENTARY",
1411
"CYRILLIC SUPPLEMENTARY",
1412
"CYRILLICSUPPLEMENTARY",
1413
"CYRILLIC SUPPLEMENT",
1414
"CYRILLICSUPPLEMENT");
1415
1416
/**
1417
* Constant for the "Tagalog" Unicode character block.
1418
* @since 1.5
1419
*/
1420
public static final UnicodeBlock TAGALOG =
1421
new UnicodeBlock("TAGALOG");
1422
1423
/**
1424
* Constant for the "Hanunoo" Unicode character block.
1425
* @since 1.5
1426
*/
1427
public static final UnicodeBlock HANUNOO =
1428
new UnicodeBlock("HANUNOO");
1429
1430
/**
1431
* Constant for the "Buhid" Unicode character block.
1432
* @since 1.5
1433
*/
1434
public static final UnicodeBlock BUHID =
1435
new UnicodeBlock("BUHID");
1436
1437
/**
1438
* Constant for the "Tagbanwa" Unicode character block.
1439
* @since 1.5
1440
*/
1441
public static final UnicodeBlock TAGBANWA =
1442
new UnicodeBlock("TAGBANWA");
1443
1444
/**
1445
* Constant for the "Limbu" Unicode character block.
1446
* @since 1.5
1447
*/
1448
public static final UnicodeBlock LIMBU =
1449
new UnicodeBlock("LIMBU");
1450
1451
/**
1452
* Constant for the "Tai Le" Unicode character block.
1453
* @since 1.5
1454
*/
1455
public static final UnicodeBlock TAI_LE =
1456
new UnicodeBlock("TAI_LE",
1457
"TAI LE",
1458
"TAILE");
1459
1460
/**
1461
* Constant for the "Khmer Symbols" Unicode character block.
1462
* @since 1.5
1463
*/
1464
public static final UnicodeBlock KHMER_SYMBOLS =
1465
new UnicodeBlock("KHMER_SYMBOLS",
1466
"KHMER SYMBOLS",
1467
"KHMERSYMBOLS");
1468
1469
/**
1470
* Constant for the "Phonetic Extensions" Unicode character block.
1471
* @since 1.5
1472
*/
1473
public static final UnicodeBlock PHONETIC_EXTENSIONS =
1474
new UnicodeBlock("PHONETIC_EXTENSIONS",
1475
"PHONETIC EXTENSIONS",
1476
"PHONETICEXTENSIONS");
1477
1478
/**
1479
* Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
1480
* @since 1.5
1481
*/
1482
public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
1483
new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1484
"MISCELLANEOUS MATHEMATICAL SYMBOLS-A",
1485
"MISCELLANEOUSMATHEMATICALSYMBOLS-A");
1486
1487
/**
1488
* Constant for the "Supplemental Arrows-A" Unicode character block.
1489
* @since 1.5
1490
*/
1491
public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
1492
new UnicodeBlock("SUPPLEMENTAL_ARROWS_A",
1493
"SUPPLEMENTAL ARROWS-A",
1494
"SUPPLEMENTALARROWS-A");
1495
1496
/**
1497
* Constant for the "Supplemental Arrows-B" Unicode character block.
1498
* @since 1.5
1499
*/
1500
public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
1501
new UnicodeBlock("SUPPLEMENTAL_ARROWS_B",
1502
"SUPPLEMENTAL ARROWS-B",
1503
"SUPPLEMENTALARROWS-B");
1504
1505
/**
1506
* Constant for the "Miscellaneous Mathematical Symbols-B" Unicode
1507
* character block.
1508
* @since 1.5
1509
*/
1510
public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B =
1511
new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1512
"MISCELLANEOUS MATHEMATICAL SYMBOLS-B",
1513
"MISCELLANEOUSMATHEMATICALSYMBOLS-B");
1514
1515
/**
1516
* Constant for the "Supplemental Mathematical Operators" Unicode
1517
* character block.
1518
* @since 1.5
1519
*/
1520
public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
1521
new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1522
"SUPPLEMENTAL MATHEMATICAL OPERATORS",
1523
"SUPPLEMENTALMATHEMATICALOPERATORS");
1524
1525
/**
1526
* Constant for the "Miscellaneous Symbols and Arrows" Unicode character
1527
* block.
1528
* @since 1.5
1529
*/
1530
public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
1531
new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1532
"MISCELLANEOUS SYMBOLS AND ARROWS",
1533
"MISCELLANEOUSSYMBOLSANDARROWS");
1534
1535
/**
1536
* Constant for the "Katakana Phonetic Extensions" Unicode character
1537
* block.
1538
* @since 1.5
1539
*/
1540
public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
1541
new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS",
1542
"KATAKANA PHONETIC EXTENSIONS",
1543
"KATAKANAPHONETICEXTENSIONS");
1544
1545
/**
1546
* Constant for the "Yijing Hexagram Symbols" Unicode character block.
1547
* @since 1.5
1548
*/
1549
public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
1550
new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS",
1551
"YIJING HEXAGRAM SYMBOLS",
1552
"YIJINGHEXAGRAMSYMBOLS");
1553
1554
/**
1555
* Constant for the "Variation Selectors" Unicode character block.
1556
* @since 1.5
1557
*/
1558
public static final UnicodeBlock VARIATION_SELECTORS =
1559
new UnicodeBlock("VARIATION_SELECTORS",
1560
"VARIATION SELECTORS",
1561
"VARIATIONSELECTORS");
1562
1563
/**
1564
* Constant for the "Linear B Syllabary" Unicode character block.
1565
* @since 1.5
1566
*/
1567
public static final UnicodeBlock LINEAR_B_SYLLABARY =
1568
new UnicodeBlock("LINEAR_B_SYLLABARY",
1569
"LINEAR B SYLLABARY",
1570
"LINEARBSYLLABARY");
1571
1572
/**
1573
* Constant for the "Linear B Ideograms" Unicode character block.
1574
* @since 1.5
1575
*/
1576
public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
1577
new UnicodeBlock("LINEAR_B_IDEOGRAMS",
1578
"LINEAR B IDEOGRAMS",
1579
"LINEARBIDEOGRAMS");
1580
1581
/**
1582
* Constant for the "Aegean Numbers" Unicode character block.
1583
* @since 1.5
1584
*/
1585
public static final UnicodeBlock AEGEAN_NUMBERS =
1586
new UnicodeBlock("AEGEAN_NUMBERS",
1587
"AEGEAN NUMBERS",
1588
"AEGEANNUMBERS");
1589
1590
/**
1591
* Constant for the "Old Italic" Unicode character block.
1592
* @since 1.5
1593
*/
1594
public static final UnicodeBlock OLD_ITALIC =
1595
new UnicodeBlock("OLD_ITALIC",
1596
"OLD ITALIC",
1597
"OLDITALIC");
1598
1599
/**
1600
* Constant for the "Gothic" Unicode character block.
1601
* @since 1.5
1602
*/
1603
public static final UnicodeBlock GOTHIC =
1604
new UnicodeBlock("GOTHIC");
1605
1606
/**
1607
* Constant for the "Ugaritic" Unicode character block.
1608
* @since 1.5
1609
*/
1610
public static final UnicodeBlock UGARITIC =
1611
new UnicodeBlock("UGARITIC");
1612
1613
/**
1614
* Constant for the "Deseret" Unicode character block.
1615
* @since 1.5
1616
*/
1617
public static final UnicodeBlock DESERET =
1618
new UnicodeBlock("DESERET");
1619
1620
/**
1621
* Constant for the "Shavian" Unicode character block.
1622
* @since 1.5
1623
*/
1624
public static final UnicodeBlock SHAVIAN =
1625
new UnicodeBlock("SHAVIAN");
1626
1627
/**
1628
* Constant for the "Osmanya" Unicode character block.
1629
* @since 1.5
1630
*/
1631
public static final UnicodeBlock OSMANYA =
1632
new UnicodeBlock("OSMANYA");
1633
1634
/**
1635
* Constant for the "Cypriot Syllabary" Unicode character block.
1636
* @since 1.5
1637
*/
1638
public static final UnicodeBlock CYPRIOT_SYLLABARY =
1639
new UnicodeBlock("CYPRIOT_SYLLABARY",
1640
"CYPRIOT SYLLABARY",
1641
"CYPRIOTSYLLABARY");
1642
1643
/**
1644
* Constant for the "Byzantine Musical Symbols" Unicode character block.
1645
* @since 1.5
1646
*/
1647
public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
1648
new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS",
1649
"BYZANTINE MUSICAL SYMBOLS",
1650
"BYZANTINEMUSICALSYMBOLS");
1651
1652
/**
1653
* Constant for the "Musical Symbols" Unicode character block.
1654
* @since 1.5
1655
*/
1656
public static final UnicodeBlock MUSICAL_SYMBOLS =
1657
new UnicodeBlock("MUSICAL_SYMBOLS",
1658
"MUSICAL SYMBOLS",
1659
"MUSICALSYMBOLS");
1660
1661
/**
1662
* Constant for the "Tai Xuan Jing Symbols" Unicode character block.
1663
* @since 1.5
1664
*/
1665
public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
1666
new UnicodeBlock("TAI_XUAN_JING_SYMBOLS",
1667
"TAI XUAN JING SYMBOLS",
1668
"TAIXUANJINGSYMBOLS");
1669
1670
/**
1671
* Constant for the "Mathematical Alphanumeric Symbols" Unicode
1672
* character block.
1673
* @since 1.5
1674
*/
1675
public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
1676
new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1677
"MATHEMATICAL ALPHANUMERIC SYMBOLS",
1678
"MATHEMATICALALPHANUMERICSYMBOLS");
1679
1680
/**
1681
* Constant for the "CJK Unified Ideographs Extension B" Unicode
1682
* character block.
1683
* @since 1.5
1684
*/
1685
public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
1686
new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1687
"CJK UNIFIED IDEOGRAPHS EXTENSION B",
1688
"CJKUNIFIEDIDEOGRAPHSEXTENSIONB");
1689
1690
/**
1691
* Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
1692
* @since 1.5
1693
*/
1694
public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
1695
new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1696
"CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT",
1697
"CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT");
1698
1699
/**
1700
* Constant for the "Tags" Unicode character block.
1701
* @since 1.5
1702
*/
1703
public static final UnicodeBlock TAGS =
1704
new UnicodeBlock("TAGS");
1705
1706
/**
1707
* Constant for the "Variation Selectors Supplement" Unicode character
1708
* block.
1709
* @since 1.5
1710
*/
1711
public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
1712
new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT",
1713
"VARIATION SELECTORS SUPPLEMENT",
1714
"VARIATIONSELECTORSSUPPLEMENT");
1715
1716
/**
1717
* Constant for the "Supplementary Private Use Area-A" Unicode character
1718
* block.
1719
* @since 1.5
1720
*/
1721
public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
1722
new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1723
"SUPPLEMENTARY PRIVATE USE AREA-A",
1724
"SUPPLEMENTARYPRIVATEUSEAREA-A");
1725
1726
/**
1727
* Constant for the "Supplementary Private Use Area-B" Unicode character
1728
* block.
1729
* @since 1.5
1730
*/
1731
public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
1732
new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1733
"SUPPLEMENTARY PRIVATE USE AREA-B",
1734
"SUPPLEMENTARYPRIVATEUSEAREA-B");
1735
1736
/**
1737
* Constant for the "High Surrogates" Unicode character block.
1738
* This block represents codepoint values in the high surrogate
1739
* range: U+D800 through U+DB7F
1740
*
1741
* @since 1.5
1742
*/
1743
public static final UnicodeBlock HIGH_SURROGATES =
1744
new UnicodeBlock("HIGH_SURROGATES",
1745
"HIGH SURROGATES",
1746
"HIGHSURROGATES");
1747
1748
/**
1749
* Constant for the "High Private Use Surrogates" Unicode character
1750
* block.
1751
* This block represents codepoint values in the private use high
1752
* surrogate range: U+DB80 through U+DBFF
1753
*
1754
* @since 1.5
1755
*/
1756
public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
1757
new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES",
1758
"HIGH PRIVATE USE SURROGATES",
1759
"HIGHPRIVATEUSESURROGATES");
1760
1761
/**
1762
* Constant for the "Low Surrogates" Unicode character block.
1763
* This block represents codepoint values in the low surrogate
1764
* range: U+DC00 through U+DFFF
1765
*
1766
* @since 1.5
1767
*/
1768
public static final UnicodeBlock LOW_SURROGATES =
1769
new UnicodeBlock("LOW_SURROGATES",
1770
"LOW SURROGATES",
1771
"LOWSURROGATES");
1772
1773
/**
1774
* Constant for the "Arabic Supplement" Unicode character block.
1775
* @since 1.7
1776
*/
1777
public static final UnicodeBlock ARABIC_SUPPLEMENT =
1778
new UnicodeBlock("ARABIC_SUPPLEMENT",
1779
"ARABIC SUPPLEMENT",
1780
"ARABICSUPPLEMENT");
1781
1782
/**
1783
* Constant for the "NKo" Unicode character block.
1784
* @since 1.7
1785
*/
1786
public static final UnicodeBlock NKO =
1787
new UnicodeBlock("NKO");
1788
1789
/**
1790
* Constant for the "Samaritan" Unicode character block.
1791
* @since 1.7
1792
*/
1793
public static final UnicodeBlock SAMARITAN =
1794
new UnicodeBlock("SAMARITAN");
1795
1796
/**
1797
* Constant for the "Mandaic" Unicode character block.
1798
* @since 1.7
1799
*/
1800
public static final UnicodeBlock MANDAIC =
1801
new UnicodeBlock("MANDAIC");
1802
1803
/**
1804
* Constant for the "Ethiopic Supplement" Unicode character block.
1805
* @since 1.7
1806
*/
1807
public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1808
new UnicodeBlock("ETHIOPIC_SUPPLEMENT",
1809
"ETHIOPIC SUPPLEMENT",
1810
"ETHIOPICSUPPLEMENT");
1811
1812
/**
1813
* Constant for the "Unified Canadian Aboriginal Syllabics Extended"
1814
* Unicode character block.
1815
* @since 1.7
1816
*/
1817
public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
1818
new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
1819
"UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED",
1820
"UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED");
1821
1822
/**
1823
* Constant for the "New Tai Lue" Unicode character block.
1824
* @since 1.7
1825
*/
1826
public static final UnicodeBlock NEW_TAI_LUE =
1827
new UnicodeBlock("NEW_TAI_LUE",
1828
"NEW TAI LUE",
1829
"NEWTAILUE");
1830
1831
/**
1832
* Constant for the "Buginese" Unicode character block.
1833
* @since 1.7
1834
*/
1835
public static final UnicodeBlock BUGINESE =
1836
new UnicodeBlock("BUGINESE");
1837
1838
/**
1839
* Constant for the "Tai Tham" Unicode character block.
1840
* @since 1.7
1841
*/
1842
public static final UnicodeBlock TAI_THAM =
1843
new UnicodeBlock("TAI_THAM",
1844
"TAI THAM",
1845
"TAITHAM");
1846
1847
/**
1848
* Constant for the "Balinese" Unicode character block.
1849
* @since 1.7
1850
*/
1851
public static final UnicodeBlock BALINESE =
1852
new UnicodeBlock("BALINESE");
1853
1854
/**
1855
* Constant for the "Sundanese" Unicode character block.
1856
* @since 1.7
1857
*/
1858
public static final UnicodeBlock SUNDANESE =
1859
new UnicodeBlock("SUNDANESE");
1860
1861
/**
1862
* Constant for the "Batak" Unicode character block.
1863
* @since 1.7
1864
*/
1865
public static final UnicodeBlock BATAK =
1866
new UnicodeBlock("BATAK");
1867
1868
/**
1869
* Constant for the "Lepcha" Unicode character block.
1870
* @since 1.7
1871
*/
1872
public static final UnicodeBlock LEPCHA =
1873
new UnicodeBlock("LEPCHA");
1874
1875
/**
1876
* Constant for the "Ol Chiki" Unicode character block.
1877
* @since 1.7
1878
*/
1879
public static final UnicodeBlock OL_CHIKI =
1880
new UnicodeBlock("OL_CHIKI",
1881
"OL CHIKI",
1882
"OLCHIKI");
1883
1884
/**
1885
* Constant for the "Vedic Extensions" Unicode character block.
1886
* @since 1.7
1887
*/
1888
public static final UnicodeBlock VEDIC_EXTENSIONS =
1889
new UnicodeBlock("VEDIC_EXTENSIONS",
1890
"VEDIC EXTENSIONS",
1891
"VEDICEXTENSIONS");
1892
1893
/**
1894
* Constant for the "Phonetic Extensions Supplement" Unicode character
1895
* block.
1896
* @since 1.7
1897
*/
1898
public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
1899
new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
1900
"PHONETIC EXTENSIONS SUPPLEMENT",
1901
"PHONETICEXTENSIONSSUPPLEMENT");
1902
1903
/**
1904
* Constant for the "Combining Diacritical Marks Supplement" Unicode
1905
* character block.
1906
* @since 1.7
1907
*/
1908
public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
1909
new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
1910
"COMBINING DIACRITICAL MARKS SUPPLEMENT",
1911
"COMBININGDIACRITICALMARKSSUPPLEMENT");
1912
1913
/**
1914
* Constant for the "Glagolitic" Unicode character block.
1915
* @since 1.7
1916
*/
1917
public static final UnicodeBlock GLAGOLITIC =
1918
new UnicodeBlock("GLAGOLITIC");
1919
1920
/**
1921
* Constant for the "Latin Extended-C" Unicode character block.
1922
* @since 1.7
1923
*/
1924
public static final UnicodeBlock LATIN_EXTENDED_C =
1925
new UnicodeBlock("LATIN_EXTENDED_C",
1926
"LATIN EXTENDED-C",
1927
"LATINEXTENDED-C");
1928
1929
/**
1930
* Constant for the "Coptic" Unicode character block.
1931
* @since 1.7
1932
*/
1933
public static final UnicodeBlock COPTIC =
1934
new UnicodeBlock("COPTIC");
1935
1936
/**
1937
* Constant for the "Georgian Supplement" Unicode character block.
1938
* @since 1.7
1939
*/
1940
public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
1941
new UnicodeBlock("GEORGIAN_SUPPLEMENT",
1942
"GEORGIAN SUPPLEMENT",
1943
"GEORGIANSUPPLEMENT");
1944
1945
/**
1946
* Constant for the "Tifinagh" Unicode character block.
1947
* @since 1.7
1948
*/
1949
public static final UnicodeBlock TIFINAGH =
1950
new UnicodeBlock("TIFINAGH");
1951
1952
/**
1953
* Constant for the "Ethiopic Extended" Unicode character block.
1954
* @since 1.7
1955
*/
1956
public static final UnicodeBlock ETHIOPIC_EXTENDED =
1957
new UnicodeBlock("ETHIOPIC_EXTENDED",
1958
"ETHIOPIC EXTENDED",
1959
"ETHIOPICEXTENDED");
1960
1961
/**
1962
* Constant for the "Cyrillic Extended-A" Unicode character block.
1963
* @since 1.7
1964
*/
1965
public static final UnicodeBlock CYRILLIC_EXTENDED_A =
1966
new UnicodeBlock("CYRILLIC_EXTENDED_A",
1967
"CYRILLIC EXTENDED-A",
1968
"CYRILLICEXTENDED-A");
1969
1970
/**
1971
* Constant for the "Supplemental Punctuation" Unicode character block.
1972
* @since 1.7
1973
*/
1974
public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
1975
new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION",
1976
"SUPPLEMENTAL PUNCTUATION",
1977
"SUPPLEMENTALPUNCTUATION");
1978
1979
/**
1980
* Constant for the "CJK Strokes" Unicode character block.
1981
* @since 1.7
1982
*/
1983
public static final UnicodeBlock CJK_STROKES =
1984
new UnicodeBlock("CJK_STROKES",
1985
"CJK STROKES",
1986
"CJKSTROKES");
1987
1988
/**
1989
* Constant for the "Lisu" Unicode character block.
1990
* @since 1.7
1991
*/
1992
public static final UnicodeBlock LISU =
1993
new UnicodeBlock("LISU");
1994
1995
/**
1996
* Constant for the "Vai" Unicode character block.
1997
* @since 1.7
1998
*/
1999
public static final UnicodeBlock VAI =
2000
new UnicodeBlock("VAI");
2001
2002
/**
2003
* Constant for the "Cyrillic Extended-B" Unicode character block.
2004
* @since 1.7
2005
*/
2006
public static final UnicodeBlock CYRILLIC_EXTENDED_B =
2007
new UnicodeBlock("CYRILLIC_EXTENDED_B",
2008
"CYRILLIC EXTENDED-B",
2009
"CYRILLICEXTENDED-B");
2010
2011
/**
2012
* Constant for the "Bamum" Unicode character block.
2013
* @since 1.7
2014
*/
2015
public static final UnicodeBlock BAMUM =
2016
new UnicodeBlock("BAMUM");
2017
2018
/**
2019
* Constant for the "Modifier Tone Letters" Unicode character block.
2020
* @since 1.7
2021
*/
2022
public static final UnicodeBlock MODIFIER_TONE_LETTERS =
2023
new UnicodeBlock("MODIFIER_TONE_LETTERS",
2024
"MODIFIER TONE LETTERS",
2025
"MODIFIERTONELETTERS");
2026
2027
/**
2028
* Constant for the "Latin Extended-D" Unicode character block.
2029
* @since 1.7
2030
*/
2031
public static final UnicodeBlock LATIN_EXTENDED_D =
2032
new UnicodeBlock("LATIN_EXTENDED_D",
2033
"LATIN EXTENDED-D",
2034
"LATINEXTENDED-D");
2035
2036
/**
2037
* Constant for the "Syloti Nagri" Unicode character block.
2038
* @since 1.7
2039
*/
2040
public static final UnicodeBlock SYLOTI_NAGRI =
2041
new UnicodeBlock("SYLOTI_NAGRI",
2042
"SYLOTI NAGRI",
2043
"SYLOTINAGRI");
2044
2045
/**
2046
* Constant for the "Common Indic Number Forms" Unicode character block.
2047
* @since 1.7
2048
*/
2049
public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
2050
new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS",
2051
"COMMON INDIC NUMBER FORMS",
2052
"COMMONINDICNUMBERFORMS");
2053
2054
/**
2055
* Constant for the "Phags-pa" Unicode character block.
2056
* @since 1.7
2057
*/
2058
public static final UnicodeBlock PHAGS_PA =
2059
new UnicodeBlock("PHAGS_PA",
2060
"PHAGS-PA");
2061
2062
/**
2063
* Constant for the "Saurashtra" Unicode character block.
2064
* @since 1.7
2065
*/
2066
public static final UnicodeBlock SAURASHTRA =
2067
new UnicodeBlock("SAURASHTRA");
2068
2069
/**
2070
* Constant for the "Devanagari Extended" Unicode character block.
2071
* @since 1.7
2072
*/
2073
public static final UnicodeBlock DEVANAGARI_EXTENDED =
2074
new UnicodeBlock("DEVANAGARI_EXTENDED",
2075
"DEVANAGARI EXTENDED",
2076
"DEVANAGARIEXTENDED");
2077
2078
/**
2079
* Constant for the "Kayah Li" Unicode character block.
2080
* @since 1.7
2081
*/
2082
public static final UnicodeBlock KAYAH_LI =
2083
new UnicodeBlock("KAYAH_LI",
2084
"KAYAH LI",
2085
"KAYAHLI");
2086
2087
/**
2088
* Constant for the "Rejang" Unicode character block.
2089
* @since 1.7
2090
*/
2091
public static final UnicodeBlock REJANG =
2092
new UnicodeBlock("REJANG");
2093
2094
/**
2095
* Constant for the "Hangul Jamo Extended-A" Unicode character block.
2096
* @since 1.7
2097
*/
2098
public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
2099
new UnicodeBlock("HANGUL_JAMO_EXTENDED_A",
2100
"HANGUL JAMO EXTENDED-A",
2101
"HANGULJAMOEXTENDED-A");
2102
2103
/**
2104
* Constant for the "Javanese" Unicode character block.
2105
* @since 1.7
2106
*/
2107
public static final UnicodeBlock JAVANESE =
2108
new UnicodeBlock("JAVANESE");
2109
2110
/**
2111
* Constant for the "Cham" Unicode character block.
2112
* @since 1.7
2113
*/
2114
public static final UnicodeBlock CHAM =
2115
new UnicodeBlock("CHAM");
2116
2117
/**
2118
* Constant for the "Myanmar Extended-A" Unicode character block.
2119
* @since 1.7
2120
*/
2121
public static final UnicodeBlock MYANMAR_EXTENDED_A =
2122
new UnicodeBlock("MYANMAR_EXTENDED_A",
2123
"MYANMAR EXTENDED-A",
2124
"MYANMAREXTENDED-A");
2125
2126
/**
2127
* Constant for the "Tai Viet" Unicode character block.
2128
* @since 1.7
2129
*/
2130
public static final UnicodeBlock TAI_VIET =
2131
new UnicodeBlock("TAI_VIET",
2132
"TAI VIET",
2133
"TAIVIET");
2134
2135
/**
2136
* Constant for the "Ethiopic Extended-A" Unicode character block.
2137
* @since 1.7
2138
*/
2139
public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
2140
new UnicodeBlock("ETHIOPIC_EXTENDED_A",
2141
"ETHIOPIC EXTENDED-A",
2142
"ETHIOPICEXTENDED-A");
2143
2144
/**
2145
* Constant for the "Meetei Mayek" Unicode character block.
2146
* @since 1.7
2147
*/
2148
public static final UnicodeBlock MEETEI_MAYEK =
2149
new UnicodeBlock("MEETEI_MAYEK",
2150
"MEETEI MAYEK",
2151
"MEETEIMAYEK");
2152
2153
/**
2154
* Constant for the "Hangul Jamo Extended-B" Unicode character block.
2155
* @since 1.7
2156
*/
2157
public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
2158
new UnicodeBlock("HANGUL_JAMO_EXTENDED_B",
2159
"HANGUL JAMO EXTENDED-B",
2160
"HANGULJAMOEXTENDED-B");
2161
2162
/**
2163
* Constant for the "Vertical Forms" Unicode character block.
2164
* @since 1.7
2165
*/
2166
public static final UnicodeBlock VERTICAL_FORMS =
2167
new UnicodeBlock("VERTICAL_FORMS",
2168
"VERTICAL FORMS",
2169
"VERTICALFORMS");
2170
2171
/**
2172
* Constant for the "Ancient Greek Numbers" Unicode character block.
2173
* @since 1.7
2174
*/
2175
public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
2176
new UnicodeBlock("ANCIENT_GREEK_NUMBERS",
2177
"ANCIENT GREEK NUMBERS",
2178
"ANCIENTGREEKNUMBERS");
2179
2180
/**
2181
* Constant for the "Ancient Symbols" Unicode character block.
2182
* @since 1.7
2183
*/
2184
public static final UnicodeBlock ANCIENT_SYMBOLS =
2185
new UnicodeBlock("ANCIENT_SYMBOLS",
2186
"ANCIENT SYMBOLS",
2187
"ANCIENTSYMBOLS");
2188
2189
/**
2190
* Constant for the "Phaistos Disc" Unicode character block.
2191
* @since 1.7
2192
*/
2193
public static final UnicodeBlock PHAISTOS_DISC =
2194
new UnicodeBlock("PHAISTOS_DISC",
2195
"PHAISTOS DISC",
2196
"PHAISTOSDISC");
2197
2198
/**
2199
* Constant for the "Lycian" Unicode character block.
2200
* @since 1.7
2201
*/
2202
public static final UnicodeBlock LYCIAN =
2203
new UnicodeBlock("LYCIAN");
2204
2205
/**
2206
* Constant for the "Carian" Unicode character block.
2207
* @since 1.7
2208
*/
2209
public static final UnicodeBlock CARIAN =
2210
new UnicodeBlock("CARIAN");
2211
2212
/**
2213
* Constant for the "Old Persian" Unicode character block.
2214
* @since 1.7
2215
*/
2216
public static final UnicodeBlock OLD_PERSIAN =
2217
new UnicodeBlock("OLD_PERSIAN",
2218
"OLD PERSIAN",
2219
"OLDPERSIAN");
2220
2221
/**
2222
* Constant for the "Imperial Aramaic" Unicode character block.
2223
* @since 1.7
2224
*/
2225
public static final UnicodeBlock IMPERIAL_ARAMAIC =
2226
new UnicodeBlock("IMPERIAL_ARAMAIC",
2227
"IMPERIAL ARAMAIC",
2228
"IMPERIALARAMAIC");
2229
2230
/**
2231
* Constant for the "Phoenician" Unicode character block.
2232
* @since 1.7
2233
*/
2234
public static final UnicodeBlock PHOENICIAN =
2235
new UnicodeBlock("PHOENICIAN");
2236
2237
/**
2238
* Constant for the "Lydian" Unicode character block.
2239
* @since 1.7
2240
*/
2241
public static final UnicodeBlock LYDIAN =
2242
new UnicodeBlock("LYDIAN");
2243
2244
/**
2245
* Constant for the "Kharoshthi" Unicode character block.
2246
* @since 1.7
2247
*/
2248
public static final UnicodeBlock KHAROSHTHI =
2249
new UnicodeBlock("KHAROSHTHI");
2250
2251
/**
2252
* Constant for the "Old South Arabian" Unicode character block.
2253
* @since 1.7
2254
*/
2255
public static final UnicodeBlock OLD_SOUTH_ARABIAN =
2256
new UnicodeBlock("OLD_SOUTH_ARABIAN",
2257
"OLD SOUTH ARABIAN",
2258
"OLDSOUTHARABIAN");
2259
2260
/**
2261
* Constant for the "Avestan" Unicode character block.
2262
* @since 1.7
2263
*/
2264
public static final UnicodeBlock AVESTAN =
2265
new UnicodeBlock("AVESTAN");
2266
2267
/**
2268
* Constant for the "Inscriptional Parthian" Unicode character block.
2269
* @since 1.7
2270
*/
2271
public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
2272
new UnicodeBlock("INSCRIPTIONAL_PARTHIAN",
2273
"INSCRIPTIONAL PARTHIAN",
2274
"INSCRIPTIONALPARTHIAN");
2275
2276
/**
2277
* Constant for the "Inscriptional Pahlavi" Unicode character block.
2278
* @since 1.7
2279
*/
2280
public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
2281
new UnicodeBlock("INSCRIPTIONAL_PAHLAVI",
2282
"INSCRIPTIONAL PAHLAVI",
2283
"INSCRIPTIONALPAHLAVI");
2284
2285
/**
2286
* Constant for the "Old Turkic" Unicode character block.
2287
* @since 1.7
2288
*/
2289
public static final UnicodeBlock OLD_TURKIC =
2290
new UnicodeBlock("OLD_TURKIC",
2291
"OLD TURKIC",
2292
"OLDTURKIC");
2293
2294
/**
2295
* Constant for the "Rumi Numeral Symbols" Unicode character block.
2296
* @since 1.7
2297
*/
2298
public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
2299
new UnicodeBlock("RUMI_NUMERAL_SYMBOLS",
2300
"RUMI NUMERAL SYMBOLS",
2301
"RUMINUMERALSYMBOLS");
2302
2303
/**
2304
* Constant for the "Brahmi" Unicode character block.
2305
* @since 1.7
2306
*/
2307
public static final UnicodeBlock BRAHMI =
2308
new UnicodeBlock("BRAHMI");
2309
2310
/**
2311
* Constant for the "Kaithi" Unicode character block.
2312
* @since 1.7
2313
*/
2314
public static final UnicodeBlock KAITHI =
2315
new UnicodeBlock("KAITHI");
2316
2317
/**
2318
* Constant for the "Cuneiform" Unicode character block.
2319
* @since 1.7
2320
*/
2321
public static final UnicodeBlock CUNEIFORM =
2322
new UnicodeBlock("CUNEIFORM");
2323
2324
/**
2325
* Constant for the "Cuneiform Numbers and Punctuation" Unicode
2326
* character block.
2327
* @since 1.7
2328
*/
2329
public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
2330
new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
2331
"CUNEIFORM NUMBERS AND PUNCTUATION",
2332
"CUNEIFORMNUMBERSANDPUNCTUATION");
2333
2334
/**
2335
* Constant for the "Egyptian Hieroglyphs" Unicode character block.
2336
* @since 1.7
2337
*/
2338
public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
2339
new UnicodeBlock("EGYPTIAN_HIEROGLYPHS",
2340
"EGYPTIAN HIEROGLYPHS",
2341
"EGYPTIANHIEROGLYPHS");
2342
2343
/**
2344
* Constant for the "Bamum Supplement" Unicode character block.
2345
* @since 1.7
2346
*/
2347
public static final UnicodeBlock BAMUM_SUPPLEMENT =
2348
new UnicodeBlock("BAMUM_SUPPLEMENT",
2349
"BAMUM SUPPLEMENT",
2350
"BAMUMSUPPLEMENT");
2351
2352
/**
2353
* Constant for the "Kana Supplement" Unicode character block.
2354
* @since 1.7
2355
*/
2356
public static final UnicodeBlock KANA_SUPPLEMENT =
2357
new UnicodeBlock("KANA_SUPPLEMENT",
2358
"KANA SUPPLEMENT",
2359
"KANASUPPLEMENT");
2360
2361
/**
2362
* Constant for the "Ancient Greek Musical Notation" Unicode character
2363
* block.
2364
* @since 1.7
2365
*/
2366
public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
2367
new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
2368
"ANCIENT GREEK MUSICAL NOTATION",
2369
"ANCIENTGREEKMUSICALNOTATION");
2370
2371
/**
2372
* Constant for the "Counting Rod Numerals" Unicode character block.
2373
* @since 1.7
2374
*/
2375
public static final UnicodeBlock COUNTING_ROD_NUMERALS =
2376
new UnicodeBlock("COUNTING_ROD_NUMERALS",
2377
"COUNTING ROD NUMERALS",
2378
"COUNTINGRODNUMERALS");
2379
2380
/**
2381
* Constant for the "Mahjong Tiles" Unicode character block.
2382
* @since 1.7
2383
*/
2384
public static final UnicodeBlock MAHJONG_TILES =
2385
new UnicodeBlock("MAHJONG_TILES",
2386
"MAHJONG TILES",
2387
"MAHJONGTILES");
2388
2389
/**
2390
* Constant for the "Domino Tiles" Unicode character block.
2391
* @since 1.7
2392
*/
2393
public static final UnicodeBlock DOMINO_TILES =
2394
new UnicodeBlock("DOMINO_TILES",
2395
"DOMINO TILES",
2396
"DOMINOTILES");
2397
2398
/**
2399
* Constant for the "Playing Cards" Unicode character block.
2400
* @since 1.7
2401
*/
2402
public static final UnicodeBlock PLAYING_CARDS =
2403
new UnicodeBlock("PLAYING_CARDS",
2404
"PLAYING CARDS",
2405
"PLAYINGCARDS");
2406
2407
/**
2408
* Constant for the "Enclosed Alphanumeric Supplement" Unicode character
2409
* block.
2410
* @since 1.7
2411
*/
2412
public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
2413
new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
2414
"ENCLOSED ALPHANUMERIC SUPPLEMENT",
2415
"ENCLOSEDALPHANUMERICSUPPLEMENT");
2416
2417
/**
2418
* Constant for the "Enclosed Ideographic Supplement" Unicode character
2419
* block.
2420
* @since 1.7
2421
*/
2422
public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
2423
new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
2424
"ENCLOSED IDEOGRAPHIC SUPPLEMENT",
2425
"ENCLOSEDIDEOGRAPHICSUPPLEMENT");
2426
2427
/**
2428
* Constant for the "Miscellaneous Symbols And Pictographs" Unicode
2429
* character block.
2430
* @since 1.7
2431
*/
2432
public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
2433
new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
2434
"MISCELLANEOUS SYMBOLS AND PICTOGRAPHS",
2435
"MISCELLANEOUSSYMBOLSANDPICTOGRAPHS");
2436
2437
/**
2438
* Constant for the "Emoticons" Unicode character block.
2439
* @since 1.7
2440
*/
2441
public static final UnicodeBlock EMOTICONS =
2442
new UnicodeBlock("EMOTICONS");
2443
2444
/**
2445
* Constant for the "Transport And Map Symbols" Unicode character block.
2446
* @since 1.7
2447
*/
2448
public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
2449
new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS",
2450
"TRANSPORT AND MAP SYMBOLS",
2451
"TRANSPORTANDMAPSYMBOLS");
2452
2453
/**
2454
* Constant for the "Alchemical Symbols" Unicode character block.
2455
* @since 1.7
2456
*/
2457
public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
2458
new UnicodeBlock("ALCHEMICAL_SYMBOLS",
2459
"ALCHEMICAL SYMBOLS",
2460
"ALCHEMICALSYMBOLS");
2461
2462
/**
2463
* Constant for the "CJK Unified Ideographs Extension C" Unicode
2464
* character block.
2465
* @since 1.7
2466
*/
2467
public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
2468
new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
2469
"CJK UNIFIED IDEOGRAPHS EXTENSION C",
2470
"CJKUNIFIEDIDEOGRAPHSEXTENSIONC");
2471
2472
/**
2473
* Constant for the "CJK Unified Ideographs Extension D" Unicode
2474
* character block.
2475
* @since 1.7
2476
*/
2477
public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
2478
new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
2479
"CJK UNIFIED IDEOGRAPHS EXTENSION D",
2480
"CJKUNIFIEDIDEOGRAPHSEXTENSIOND");
2481
2482
/**
2483
* Constant for the "Arabic Extended-A" Unicode character block.
2484
* @since 1.8
2485
*/
2486
public static final UnicodeBlock ARABIC_EXTENDED_A =
2487
new UnicodeBlock("ARABIC_EXTENDED_A",
2488
"ARABIC EXTENDED-A",
2489
"ARABICEXTENDED-A");
2490
2491
/**
2492
* Constant for the "Sundanese Supplement" Unicode character block.
2493
* @since 1.8
2494
*/
2495
public static final UnicodeBlock SUNDANESE_SUPPLEMENT =
2496
new UnicodeBlock("SUNDANESE_SUPPLEMENT",
2497
"SUNDANESE SUPPLEMENT",
2498
"SUNDANESESUPPLEMENT");
2499
2500
/**
2501
* Constant for the "Meetei Mayek Extensions" Unicode character block.
2502
* @since 1.8
2503
*/
2504
public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS =
2505
new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS",
2506
"MEETEI MAYEK EXTENSIONS",
2507
"MEETEIMAYEKEXTENSIONS");
2508
2509
/**
2510
* Constant for the "Meroitic Hieroglyphs" Unicode character block.
2511
* @since 1.8
2512
*/
2513
public static final UnicodeBlock MEROITIC_HIEROGLYPHS =
2514
new UnicodeBlock("MEROITIC_HIEROGLYPHS",
2515
"MEROITIC HIEROGLYPHS",
2516
"MEROITICHIEROGLYPHS");
2517
2518
/**
2519
* Constant for the "Meroitic Cursive" Unicode character block.
2520
* @since 1.8
2521
*/
2522
public static final UnicodeBlock MEROITIC_CURSIVE =
2523
new UnicodeBlock("MEROITIC_CURSIVE",
2524
"MEROITIC CURSIVE",
2525
"MEROITICCURSIVE");
2526
2527
/**
2528
* Constant for the "Sora Sompeng" Unicode character block.
2529
* @since 1.8
2530
*/
2531
public static final UnicodeBlock SORA_SOMPENG =
2532
new UnicodeBlock("SORA_SOMPENG",
2533
"SORA SOMPENG",
2534
"SORASOMPENG");
2535
2536
/**
2537
* Constant for the "Chakma" Unicode character block.
2538
* @since 1.8
2539
*/
2540
public static final UnicodeBlock CHAKMA =
2541
new UnicodeBlock("CHAKMA");
2542
2543
/**
2544
* Constant for the "Sharada" Unicode character block.
2545
* @since 1.8
2546
*/
2547
public static final UnicodeBlock SHARADA =
2548
new UnicodeBlock("SHARADA");
2549
2550
/**
2551
* Constant for the "Takri" Unicode character block.
2552
* @since 1.8
2553
*/
2554
public static final UnicodeBlock TAKRI =
2555
new UnicodeBlock("TAKRI");
2556
2557
/**
2558
* Constant for the "Miao" Unicode character block.
2559
* @since 1.8
2560
*/
2561
public static final UnicodeBlock MIAO =
2562
new UnicodeBlock("MIAO");
2563
2564
/**
2565
* Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode
2566
* character block.
2567
* @since 1.8
2568
*/
2569
public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
2570
new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS",
2571
"ARABIC MATHEMATICAL ALPHABETIC SYMBOLS",
2572
"ARABICMATHEMATICALALPHABETICSYMBOLS");
2573
2574
private static final int[] blockStarts = {
2575
0x0000, // 0000..007F; Basic Latin
2576
0x0080, // 0080..00FF; Latin-1 Supplement
2577
0x0100, // 0100..017F; Latin Extended-A
2578
0x0180, // 0180..024F; Latin Extended-B
2579
0x0250, // 0250..02AF; IPA Extensions
2580
0x02B0, // 02B0..02FF; Spacing Modifier Letters
2581
0x0300, // 0300..036F; Combining Diacritical Marks
2582
0x0370, // 0370..03FF; Greek and Coptic
2583
0x0400, // 0400..04FF; Cyrillic
2584
0x0500, // 0500..052F; Cyrillic Supplement
2585
0x0530, // 0530..058F; Armenian
2586
0x0590, // 0590..05FF; Hebrew
2587
0x0600, // 0600..06FF; Arabic
2588
0x0700, // 0700..074F; Syriac
2589
0x0750, // 0750..077F; Arabic Supplement
2590
0x0780, // 0780..07BF; Thaana
2591
0x07C0, // 07C0..07FF; NKo
2592
0x0800, // 0800..083F; Samaritan
2593
0x0840, // 0840..085F; Mandaic
2594
0x0860, // unassigned
2595
0x08A0, // 08A0..08FF; Arabic Extended-A
2596
0x0900, // 0900..097F; Devanagari
2597
0x0980, // 0980..09FF; Bengali
2598
0x0A00, // 0A00..0A7F; Gurmukhi
2599
0x0A80, // 0A80..0AFF; Gujarati
2600
0x0B00, // 0B00..0B7F; Oriya
2601
0x0B80, // 0B80..0BFF; Tamil
2602
0x0C00, // 0C00..0C7F; Telugu
2603
0x0C80, // 0C80..0CFF; Kannada
2604
0x0D00, // 0D00..0D7F; Malayalam
2605
0x0D80, // 0D80..0DFF; Sinhala
2606
0x0E00, // 0E00..0E7F; Thai
2607
0x0E80, // 0E80..0EFF; Lao
2608
0x0F00, // 0F00..0FFF; Tibetan
2609
0x1000, // 1000..109F; Myanmar
2610
0x10A0, // 10A0..10FF; Georgian
2611
0x1100, // 1100..11FF; Hangul Jamo
2612
0x1200, // 1200..137F; Ethiopic
2613
0x1380, // 1380..139F; Ethiopic Supplement
2614
0x13A0, // 13A0..13FF; Cherokee
2615
0x1400, // 1400..167F; Unified Canadian Aboriginal Syllabics
2616
0x1680, // 1680..169F; Ogham
2617
0x16A0, // 16A0..16FF; Runic
2618
0x1700, // 1700..171F; Tagalog
2619
0x1720, // 1720..173F; Hanunoo
2620
0x1740, // 1740..175F; Buhid
2621
0x1760, // 1760..177F; Tagbanwa
2622
0x1780, // 1780..17FF; Khmer
2623
0x1800, // 1800..18AF; Mongolian
2624
0x18B0, // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
2625
0x1900, // 1900..194F; Limbu
2626
0x1950, // 1950..197F; Tai Le
2627
0x1980, // 1980..19DF; New Tai Lue
2628
0x19E0, // 19E0..19FF; Khmer Symbols
2629
0x1A00, // 1A00..1A1F; Buginese
2630
0x1A20, // 1A20..1AAF; Tai Tham
2631
0x1AB0, // unassigned
2632
0x1B00, // 1B00..1B7F; Balinese
2633
0x1B80, // 1B80..1BBF; Sundanese
2634
0x1BC0, // 1BC0..1BFF; Batak
2635
0x1C00, // 1C00..1C4F; Lepcha
2636
0x1C50, // 1C50..1C7F; Ol Chiki
2637
0x1C80, // unassigned
2638
0x1CC0, // 1CC0..1CCF; Sundanese Supplement
2639
0x1CD0, // 1CD0..1CFF; Vedic Extensions
2640
0x1D00, // 1D00..1D7F; Phonetic Extensions
2641
0x1D80, // 1D80..1DBF; Phonetic Extensions Supplement
2642
0x1DC0, // 1DC0..1DFF; Combining Diacritical Marks Supplement
2643
0x1E00, // 1E00..1EFF; Latin Extended Additional
2644
0x1F00, // 1F00..1FFF; Greek Extended
2645
0x2000, // 2000..206F; General Punctuation
2646
0x2070, // 2070..209F; Superscripts and Subscripts
2647
0x20A0, // 20A0..20CF; Currency Symbols
2648
0x20D0, // 20D0..20FF; Combining Diacritical Marks for Symbols
2649
0x2100, // 2100..214F; Letterlike Symbols
2650
0x2150, // 2150..218F; Number Forms
2651
0x2190, // 2190..21FF; Arrows
2652
0x2200, // 2200..22FF; Mathematical Operators
2653
0x2300, // 2300..23FF; Miscellaneous Technical
2654
0x2400, // 2400..243F; Control Pictures
2655
0x2440, // 2440..245F; Optical Character Recognition
2656
0x2460, // 2460..24FF; Enclosed Alphanumerics
2657
0x2500, // 2500..257F; Box Drawing
2658
0x2580, // 2580..259F; Block Elements
2659
0x25A0, // 25A0..25FF; Geometric Shapes
2660
0x2600, // 2600..26FF; Miscellaneous Symbols
2661
0x2700, // 2700..27BF; Dingbats
2662
0x27C0, // 27C0..27EF; Miscellaneous Mathematical Symbols-A
2663
0x27F0, // 27F0..27FF; Supplemental Arrows-A
2664
0x2800, // 2800..28FF; Braille Patterns
2665
0x2900, // 2900..297F; Supplemental Arrows-B
2666
0x2980, // 2980..29FF; Miscellaneous Mathematical Symbols-B
2667
0x2A00, // 2A00..2AFF; Supplemental Mathematical Operators
2668
0x2B00, // 2B00..2BFF; Miscellaneous Symbols and Arrows
2669
0x2C00, // 2C00..2C5F; Glagolitic
2670
0x2C60, // 2C60..2C7F; Latin Extended-C
2671
0x2C80, // 2C80..2CFF; Coptic
2672
0x2D00, // 2D00..2D2F; Georgian Supplement
2673
0x2D30, // 2D30..2D7F; Tifinagh
2674
0x2D80, // 2D80..2DDF; Ethiopic Extended
2675
0x2DE0, // 2DE0..2DFF; Cyrillic Extended-A
2676
0x2E00, // 2E00..2E7F; Supplemental Punctuation
2677
0x2E80, // 2E80..2EFF; CJK Radicals Supplement
2678
0x2F00, // 2F00..2FDF; Kangxi Radicals
2679
0x2FE0, // unassigned
2680
0x2FF0, // 2FF0..2FFF; Ideographic Description Characters
2681
0x3000, // 3000..303F; CJK Symbols and Punctuation
2682
0x3040, // 3040..309F; Hiragana
2683
0x30A0, // 30A0..30FF; Katakana
2684
0x3100, // 3100..312F; Bopomofo
2685
0x3130, // 3130..318F; Hangul Compatibility Jamo
2686
0x3190, // 3190..319F; Kanbun
2687
0x31A0, // 31A0..31BF; Bopomofo Extended
2688
0x31C0, // 31C0..31EF; CJK Strokes
2689
0x31F0, // 31F0..31FF; Katakana Phonetic Extensions
2690
0x3200, // 3200..32FF; Enclosed CJK Letters and Months
2691
0x3300, // 3300..33FF; CJK Compatibility
2692
0x3400, // 3400..4DBF; CJK Unified Ideographs Extension A
2693
0x4DC0, // 4DC0..4DFF; Yijing Hexagram Symbols
2694
0x4E00, // 4E00..9FFF; CJK Unified Ideographs
2695
0xA000, // A000..A48F; Yi Syllables
2696
0xA490, // A490..A4CF; Yi Radicals
2697
0xA4D0, // A4D0..A4FF; Lisu
2698
0xA500, // A500..A63F; Vai
2699
0xA640, // A640..A69F; Cyrillic Extended-B
2700
0xA6A0, // A6A0..A6FF; Bamum
2701
0xA700, // A700..A71F; Modifier Tone Letters
2702
0xA720, // A720..A7FF; Latin Extended-D
2703
0xA800, // A800..A82F; Syloti Nagri
2704
0xA830, // A830..A83F; Common Indic Number Forms
2705
0xA840, // A840..A87F; Phags-pa
2706
0xA880, // A880..A8DF; Saurashtra
2707
0xA8E0, // A8E0..A8FF; Devanagari Extended
2708
0xA900, // A900..A92F; Kayah Li
2709
0xA930, // A930..A95F; Rejang
2710
0xA960, // A960..A97F; Hangul Jamo Extended-A
2711
0xA980, // A980..A9DF; Javanese
2712
0xA9E0, // unassigned
2713
0xAA00, // AA00..AA5F; Cham
2714
0xAA60, // AA60..AA7F; Myanmar Extended-A
2715
0xAA80, // AA80..AADF; Tai Viet
2716
0xAAE0, // AAE0..AAFF; Meetei Mayek Extensions
2717
0xAB00, // AB00..AB2F; Ethiopic Extended-A
2718
0xAB30, // unassigned
2719
0xABC0, // ABC0..ABFF; Meetei Mayek
2720
0xAC00, // AC00..D7AF; Hangul Syllables
2721
0xD7B0, // D7B0..D7FF; Hangul Jamo Extended-B
2722
0xD800, // D800..DB7F; High Surrogates
2723
0xDB80, // DB80..DBFF; High Private Use Surrogates
2724
0xDC00, // DC00..DFFF; Low Surrogates
2725
0xE000, // E000..F8FF; Private Use Area
2726
0xF900, // F900..FAFF; CJK Compatibility Ideographs
2727
0xFB00, // FB00..FB4F; Alphabetic Presentation Forms
2728
0xFB50, // FB50..FDFF; Arabic Presentation Forms-A
2729
0xFE00, // FE00..FE0F; Variation Selectors
2730
0xFE10, // FE10..FE1F; Vertical Forms
2731
0xFE20, // FE20..FE2F; Combining Half Marks
2732
0xFE30, // FE30..FE4F; CJK Compatibility Forms
2733
0xFE50, // FE50..FE6F; Small Form Variants
2734
0xFE70, // FE70..FEFF; Arabic Presentation Forms-B
2735
0xFF00, // FF00..FFEF; Halfwidth and Fullwidth Forms
2736
0xFFF0, // FFF0..FFFF; Specials
2737
0x10000, // 10000..1007F; Linear B Syllabary
2738
0x10080, // 10080..100FF; Linear B Ideograms
2739
0x10100, // 10100..1013F; Aegean Numbers
2740
0x10140, // 10140..1018F; Ancient Greek Numbers
2741
0x10190, // 10190..101CF; Ancient Symbols
2742
0x101D0, // 101D0..101FF; Phaistos Disc
2743
0x10200, // unassigned
2744
0x10280, // 10280..1029F; Lycian
2745
0x102A0, // 102A0..102DF; Carian
2746
0x102E0, // unassigned
2747
0x10300, // 10300..1032F; Old Italic
2748
0x10330, // 10330..1034F; Gothic
2749
0x10350, // unassigned
2750
0x10380, // 10380..1039F; Ugaritic
2751
0x103A0, // 103A0..103DF; Old Persian
2752
0x103E0, // unassigned
2753
0x10400, // 10400..1044F; Deseret
2754
0x10450, // 10450..1047F; Shavian
2755
0x10480, // 10480..104AF; Osmanya
2756
0x104B0, // unassigned
2757
0x10800, // 10800..1083F; Cypriot Syllabary
2758
0x10840, // 10840..1085F; Imperial Aramaic
2759
0x10860, // unassigned
2760
0x10900, // 10900..1091F; Phoenician
2761
0x10920, // 10920..1093F; Lydian
2762
0x10940, // unassigned
2763
0x10980, // 10980..1099F; Meroitic Hieroglyphs
2764
0x109A0, // 109A0..109FF; Meroitic Cursive
2765
0x10A00, // 10A00..10A5F; Kharoshthi
2766
0x10A60, // 10A60..10A7F; Old South Arabian
2767
0x10A80, // unassigned
2768
0x10B00, // 10B00..10B3F; Avestan
2769
0x10B40, // 10B40..10B5F; Inscriptional Parthian
2770
0x10B60, // 10B60..10B7F; Inscriptional Pahlavi
2771
0x10B80, // unassigned
2772
0x10C00, // 10C00..10C4F; Old Turkic
2773
0x10C50, // unassigned
2774
0x10E60, // 10E60..10E7F; Rumi Numeral Symbols
2775
0x10E80, // unassigned
2776
0x11000, // 11000..1107F; Brahmi
2777
0x11080, // 11080..110CF; Kaithi
2778
0x110D0, // 110D0..110FF; Sora Sompeng
2779
0x11100, // 11100..1114F; Chakma
2780
0x11150, // unassigned
2781
0x11180, // 11180..111DF; Sharada
2782
0x111E0, // unassigned
2783
0x11680, // 11680..116CF; Takri
2784
0x116D0, // unassigned
2785
0x12000, // 12000..123FF; Cuneiform
2786
0x12400, // 12400..1247F; Cuneiform Numbers and Punctuation
2787
0x12480, // unassigned
2788
0x13000, // 13000..1342F; Egyptian Hieroglyphs
2789
0x13430, // unassigned
2790
0x16800, // 16800..16A3F; Bamum Supplement
2791
0x16A40, // unassigned
2792
0x16F00, // 16F00..16F9F; Miao
2793
0x16FA0, // unassigned
2794
0x1B000, // 1B000..1B0FF; Kana Supplement
2795
0x1B100, // unassigned
2796
0x1D000, // 1D000..1D0FF; Byzantine Musical Symbols
2797
0x1D100, // 1D100..1D1FF; Musical Symbols
2798
0x1D200, // 1D200..1D24F; Ancient Greek Musical Notation
2799
0x1D250, // unassigned
2800
0x1D300, // 1D300..1D35F; Tai Xuan Jing Symbols
2801
0x1D360, // 1D360..1D37F; Counting Rod Numerals
2802
0x1D380, // unassigned
2803
0x1D400, // 1D400..1D7FF; Mathematical Alphanumeric Symbols
2804
0x1D800, // unassigned
2805
0x1EE00, // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
2806
0x1EF00, // unassigned
2807
0x1F000, // 1F000..1F02F; Mahjong Tiles
2808
0x1F030, // 1F030..1F09F; Domino Tiles
2809
0x1F0A0, // 1F0A0..1F0FF; Playing Cards
2810
0x1F100, // 1F100..1F1FF; Enclosed Alphanumeric Supplement
2811
0x1F200, // 1F200..1F2FF; Enclosed Ideographic Supplement
2812
0x1F300, // 1F300..1F5FF; Miscellaneous Symbols And Pictographs
2813
0x1F600, // 1F600..1F64F; Emoticons
2814
0x1F650, // unassigned
2815
0x1F680, // 1F680..1F6FF; Transport And Map Symbols
2816
0x1F700, // 1F700..1F77F; Alchemical Symbols
2817
0x1F780, // unassigned
2818
0x20000, // 20000..2A6DF; CJK Unified Ideographs Extension B
2819
0x2A6E0, // unassigned
2820
0x2A700, // 2A700..2B73F; CJK Unified Ideographs Extension C
2821
0x2B740, // 2B740..2B81F; CJK Unified Ideographs Extension D
2822
0x2B820, // unassigned
2823
0x2F800, // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
2824
0x2FA20, // unassigned
2825
0xE0000, // E0000..E007F; Tags
2826
0xE0080, // unassigned
2827
0xE0100, // E0100..E01EF; Variation Selectors Supplement
2828
0xE01F0, // unassigned
2829
0xF0000, // F0000..FFFFF; Supplementary Private Use Area-A
2830
0x100000 // 100000..10FFFF; Supplementary Private Use Area-B
2831
};
2832
2833
private static final UnicodeBlock[] blocks = {
2834
BASIC_LATIN,
2835
LATIN_1_SUPPLEMENT,
2836
LATIN_EXTENDED_A,
2837
LATIN_EXTENDED_B,
2838
IPA_EXTENSIONS,
2839
SPACING_MODIFIER_LETTERS,
2840
COMBINING_DIACRITICAL_MARKS,
2841
GREEK,
2842
CYRILLIC,
2843
CYRILLIC_SUPPLEMENTARY,
2844
ARMENIAN,
2845
HEBREW,
2846
ARABIC,
2847
SYRIAC,
2848
ARABIC_SUPPLEMENT,
2849
THAANA,
2850
NKO,
2851
SAMARITAN,
2852
MANDAIC,
2853
null,
2854
ARABIC_EXTENDED_A,
2855
DEVANAGARI,
2856
BENGALI,
2857
GURMUKHI,
2858
GUJARATI,
2859
ORIYA,
2860
TAMIL,
2861
TELUGU,
2862
KANNADA,
2863
MALAYALAM,
2864
SINHALA,
2865
THAI,
2866
LAO,
2867
TIBETAN,
2868
MYANMAR,
2869
GEORGIAN,
2870
HANGUL_JAMO,
2871
ETHIOPIC,
2872
ETHIOPIC_SUPPLEMENT,
2873
CHEROKEE,
2874
UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
2875
OGHAM,
2876
RUNIC,
2877
TAGALOG,
2878
HANUNOO,
2879
BUHID,
2880
TAGBANWA,
2881
KHMER,
2882
MONGOLIAN,
2883
UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
2884
LIMBU,
2885
TAI_LE,
2886
NEW_TAI_LUE,
2887
KHMER_SYMBOLS,
2888
BUGINESE,
2889
TAI_THAM,
2890
null,
2891
BALINESE,
2892
SUNDANESE,
2893
BATAK,
2894
LEPCHA,
2895
OL_CHIKI,
2896
null,
2897
SUNDANESE_SUPPLEMENT,
2898
VEDIC_EXTENSIONS,
2899
PHONETIC_EXTENSIONS,
2900
PHONETIC_EXTENSIONS_SUPPLEMENT,
2901
COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
2902
LATIN_EXTENDED_ADDITIONAL,
2903
GREEK_EXTENDED,
2904
GENERAL_PUNCTUATION,
2905
SUPERSCRIPTS_AND_SUBSCRIPTS,
2906
CURRENCY_SYMBOLS,
2907
COMBINING_MARKS_FOR_SYMBOLS,
2908
LETTERLIKE_SYMBOLS,
2909
NUMBER_FORMS,
2910
ARROWS,
2911
MATHEMATICAL_OPERATORS,
2912
MISCELLANEOUS_TECHNICAL,
2913
CONTROL_PICTURES,
2914
OPTICAL_CHARACTER_RECOGNITION,
2915
ENCLOSED_ALPHANUMERICS,
2916
BOX_DRAWING,
2917
BLOCK_ELEMENTS,
2918
GEOMETRIC_SHAPES,
2919
MISCELLANEOUS_SYMBOLS,
2920
DINGBATS,
2921
MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
2922
SUPPLEMENTAL_ARROWS_A,
2923
BRAILLE_PATTERNS,
2924
SUPPLEMENTAL_ARROWS_B,
2925
MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
2926
SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
2927
MISCELLANEOUS_SYMBOLS_AND_ARROWS,
2928
GLAGOLITIC,
2929
LATIN_EXTENDED_C,
2930
COPTIC,
2931
GEORGIAN_SUPPLEMENT,
2932
TIFINAGH,
2933
ETHIOPIC_EXTENDED,
2934
CYRILLIC_EXTENDED_A,
2935
SUPPLEMENTAL_PUNCTUATION,
2936
CJK_RADICALS_SUPPLEMENT,
2937
KANGXI_RADICALS,
2938
null,
2939
IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
2940
CJK_SYMBOLS_AND_PUNCTUATION,
2941
HIRAGANA,
2942
KATAKANA,
2943
BOPOMOFO,
2944
HANGUL_COMPATIBILITY_JAMO,
2945
KANBUN,
2946
BOPOMOFO_EXTENDED,
2947
CJK_STROKES,
2948
KATAKANA_PHONETIC_EXTENSIONS,
2949
ENCLOSED_CJK_LETTERS_AND_MONTHS,
2950
CJK_COMPATIBILITY,
2951
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
2952
YIJING_HEXAGRAM_SYMBOLS,
2953
CJK_UNIFIED_IDEOGRAPHS,
2954
YI_SYLLABLES,
2955
YI_RADICALS,
2956
LISU,
2957
VAI,
2958
CYRILLIC_EXTENDED_B,
2959
BAMUM,
2960
MODIFIER_TONE_LETTERS,
2961
LATIN_EXTENDED_D,
2962
SYLOTI_NAGRI,
2963
COMMON_INDIC_NUMBER_FORMS,
2964
PHAGS_PA,
2965
SAURASHTRA,
2966
DEVANAGARI_EXTENDED,
2967
KAYAH_LI,
2968
REJANG,
2969
HANGUL_JAMO_EXTENDED_A,
2970
JAVANESE,
2971
null,
2972
CHAM,
2973
MYANMAR_EXTENDED_A,
2974
TAI_VIET,
2975
MEETEI_MAYEK_EXTENSIONS,
2976
ETHIOPIC_EXTENDED_A,
2977
null,
2978
MEETEI_MAYEK,
2979
HANGUL_SYLLABLES,
2980
HANGUL_JAMO_EXTENDED_B,
2981
HIGH_SURROGATES,
2982
HIGH_PRIVATE_USE_SURROGATES,
2983
LOW_SURROGATES,
2984
PRIVATE_USE_AREA,
2985
CJK_COMPATIBILITY_IDEOGRAPHS,
2986
ALPHABETIC_PRESENTATION_FORMS,
2987
ARABIC_PRESENTATION_FORMS_A,
2988
VARIATION_SELECTORS,
2989
VERTICAL_FORMS,
2990
COMBINING_HALF_MARKS,
2991
CJK_COMPATIBILITY_FORMS,
2992
SMALL_FORM_VARIANTS,
2993
ARABIC_PRESENTATION_FORMS_B,
2994
HALFWIDTH_AND_FULLWIDTH_FORMS,
2995
SPECIALS,
2996
LINEAR_B_SYLLABARY,
2997
LINEAR_B_IDEOGRAMS,
2998
AEGEAN_NUMBERS,
2999
ANCIENT_GREEK_NUMBERS,
3000
ANCIENT_SYMBOLS,
3001
PHAISTOS_DISC,
3002
null,
3003
LYCIAN,
3004
CARIAN,
3005
null,
3006
OLD_ITALIC,
3007
GOTHIC,
3008
null,
3009
UGARITIC,
3010
OLD_PERSIAN,
3011
null,
3012
DESERET,
3013
SHAVIAN,
3014
OSMANYA,
3015
null,
3016
CYPRIOT_SYLLABARY,
3017
IMPERIAL_ARAMAIC,
3018
null,
3019
PHOENICIAN,
3020
LYDIAN,
3021
null,
3022
MEROITIC_HIEROGLYPHS,
3023
MEROITIC_CURSIVE,
3024
KHAROSHTHI,
3025
OLD_SOUTH_ARABIAN,
3026
null,
3027
AVESTAN,
3028
INSCRIPTIONAL_PARTHIAN,
3029
INSCRIPTIONAL_PAHLAVI,
3030
null,
3031
OLD_TURKIC,
3032
null,
3033
RUMI_NUMERAL_SYMBOLS,
3034
null,
3035
BRAHMI,
3036
KAITHI,
3037
SORA_SOMPENG,
3038
CHAKMA,
3039
null,
3040
SHARADA,
3041
null,
3042
TAKRI,
3043
null,
3044
CUNEIFORM,
3045
CUNEIFORM_NUMBERS_AND_PUNCTUATION,
3046
null,
3047
EGYPTIAN_HIEROGLYPHS,
3048
null,
3049
BAMUM_SUPPLEMENT,
3050
null,
3051
MIAO,
3052
null,
3053
KANA_SUPPLEMENT,
3054
null,
3055
BYZANTINE_MUSICAL_SYMBOLS,
3056
MUSICAL_SYMBOLS,
3057
ANCIENT_GREEK_MUSICAL_NOTATION,
3058
null,
3059
TAI_XUAN_JING_SYMBOLS,
3060
COUNTING_ROD_NUMERALS,
3061
null,
3062
MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
3063
null,
3064
ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS,
3065
null,
3066
MAHJONG_TILES,
3067
DOMINO_TILES,
3068
PLAYING_CARDS,
3069
ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
3070
ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
3071
MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
3072
EMOTICONS,
3073
null,
3074
TRANSPORT_AND_MAP_SYMBOLS,
3075
ALCHEMICAL_SYMBOLS,
3076
null,
3077
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
3078
null,
3079
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
3080
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
3081
null,
3082
CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
3083
null,
3084
TAGS,
3085
null,
3086
VARIATION_SELECTORS_SUPPLEMENT,
3087
null,
3088
SUPPLEMENTARY_PRIVATE_USE_AREA_A,
3089
SUPPLEMENTARY_PRIVATE_USE_AREA_B
3090
};
3091
3092
3093
/**
3094
* Returns the object representing the Unicode block containing the
3095
* given character, or {@code null} if the character is not a
3096
* member of a defined block.
3097
*
3098
* <p><b>Note:</b> This method cannot handle
3099
* <a href="Character.html#supplementary"> supplementary
3100
* characters</a>. To support all Unicode characters, including
3101
* supplementary characters, use the {@link #of(int)} method.
3102
*
3103
* @param c The character in question
3104
* @return The {@code UnicodeBlock} instance representing the
3105
* Unicode block of which this character is a member, or
3106
* {@code null} if the character is not a member of any
3107
* Unicode block
3108
*/
3109
public static UnicodeBlock of(char c) {
3110
return of((int)c);
3111
}
3112
3113
/**
3114
* Returns the object representing the Unicode block
3115
* containing the given character (Unicode code point), or
3116
* {@code null} if the character is not a member of a
3117
* defined block.
3118
*
3119
* @param codePoint the character (Unicode code point) in question.
3120
* @return The {@code UnicodeBlock} instance representing the
3121
* Unicode block of which this character is a member, or
3122
* {@code null} if the character is not a member of any
3123
* Unicode block
3124
* @exception IllegalArgumentException if the specified
3125
* {@code codePoint} is an invalid Unicode code point.
3126
* @see Character#isValidCodePoint(int)
3127
* @since 1.5
3128
*/
3129
public static UnicodeBlock of(int codePoint) {
3130
if (!isValidCodePoint(codePoint)) {
3131
throw new IllegalArgumentException();
3132
}
3133
3134
int top, bottom, current;
3135
bottom = 0;
3136
top = blockStarts.length;
3137
current = top/2;
3138
3139
// invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
3140
while (top - bottom > 1) {
3141
if (codePoint >= blockStarts[current]) {
3142
bottom = current;
3143
} else {
3144
top = current;
3145
}
3146
current = (top + bottom) / 2;
3147
}
3148
return blocks[current];
3149
}
3150
3151
/**
3152
* Returns the UnicodeBlock with the given name. Block
3153
* names are determined by The Unicode Standard. The file
3154
* Blocks-&lt;version&gt;.txt defines blocks for a particular
3155
* version of the standard. The {@link Character} class specifies
3156
* the version of the standard that it supports.
3157
* <p>
3158
* This method accepts block names in the following forms:
3159
* <ol>
3160
* <li> Canonical block names as defined by the Unicode Standard.
3161
* For example, the standard defines a "Basic Latin" block. Therefore, this
3162
* method accepts "Basic Latin" as a valid block name. The documentation of
3163
* each UnicodeBlock provides the canonical name.
3164
* <li>Canonical block names with all spaces removed. For example, "BasicLatin"
3165
* is a valid block name for the "Basic Latin" block.
3166
* <li>The text representation of each constant UnicodeBlock identifier.
3167
* For example, this method will return the {@link #BASIC_LATIN} block if
3168
* provided with the "BASIC_LATIN" name. This form replaces all spaces and
3169
* hyphens in the canonical name with underscores.
3170
* </ol>
3171
* Finally, character case is ignored for all of the valid block name forms.
3172
* For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
3173
* The en_US locale's case mapping rules are used to provide case-insensitive
3174
* string comparisons for block name validation.
3175
* <p>
3176
* If the Unicode Standard changes block names, both the previous and
3177
* current names will be accepted.
3178
*
3179
* @param blockName A {@code UnicodeBlock} name.
3180
* @return The {@code UnicodeBlock} instance identified
3181
* by {@code blockName}
3182
* @throws IllegalArgumentException if {@code blockName} is an
3183
* invalid name
3184
* @throws NullPointerException if {@code blockName} is null
3185
* @since 1.5
3186
*/
3187
public static final UnicodeBlock forName(String blockName) {
3188
UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US));
3189
if (block == null) {
3190
throw new IllegalArgumentException();
3191
}
3192
return block;
3193
}
3194
}
3195
3196
3197
/**
3198
* A family of character subsets representing the character scripts
3199
* defined in the <a href="http://www.unicode.org/reports/tr24/">
3200
* <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode
3201
* character is assigned to a single Unicode script, either a specific
3202
* script, such as {@link Character.UnicodeScript#LATIN Latin}, or
3203
* one of the following three special values,
3204
* {@link Character.UnicodeScript#INHERITED Inherited},
3205
* {@link Character.UnicodeScript#COMMON Common} or
3206
* {@link Character.UnicodeScript#UNKNOWN Unknown}.
3207
*
3208
* @since 1.7
3209
*/
3210
public static enum UnicodeScript {
3211
/**
3212
* Unicode script "Common".
3213
*/
3214
COMMON,
3215
3216
/**
3217
* Unicode script "Latin".
3218
*/
3219
LATIN,
3220
3221
/**
3222
* Unicode script "Greek".
3223
*/
3224
GREEK,
3225
3226
/**
3227
* Unicode script "Cyrillic".
3228
*/
3229
CYRILLIC,
3230
3231
/**
3232
* Unicode script "Armenian".
3233
*/
3234
ARMENIAN,
3235
3236
/**
3237
* Unicode script "Hebrew".
3238
*/
3239
HEBREW,
3240
3241
/**
3242
* Unicode script "Arabic".
3243
*/
3244
ARABIC,
3245
3246
/**
3247
* Unicode script "Syriac".
3248
*/
3249
SYRIAC,
3250
3251
/**
3252
* Unicode script "Thaana".
3253
*/
3254
THAANA,
3255
3256
/**
3257
* Unicode script "Devanagari".
3258
*/
3259
DEVANAGARI,
3260
3261
/**
3262
* Unicode script "Bengali".
3263
*/
3264
BENGALI,
3265
3266
/**
3267
* Unicode script "Gurmukhi".
3268
*/
3269
GURMUKHI,
3270
3271
/**
3272
* Unicode script "Gujarati".
3273
*/
3274
GUJARATI,
3275
3276
/**
3277
* Unicode script "Oriya".
3278
*/
3279
ORIYA,
3280
3281
/**
3282
* Unicode script "Tamil".
3283
*/
3284
TAMIL,
3285
3286
/**
3287
* Unicode script "Telugu".
3288
*/
3289
TELUGU,
3290
3291
/**
3292
* Unicode script "Kannada".
3293
*/
3294
KANNADA,
3295
3296
/**
3297
* Unicode script "Malayalam".
3298
*/
3299
MALAYALAM,
3300
3301
/**
3302
* Unicode script "Sinhala".
3303
*/
3304
SINHALA,
3305
3306
/**
3307
* Unicode script "Thai".
3308
*/
3309
THAI,
3310
3311
/**
3312
* Unicode script "Lao".
3313
*/
3314
LAO,
3315
3316
/**
3317
* Unicode script "Tibetan".
3318
*/
3319
TIBETAN,
3320
3321
/**
3322
* Unicode script "Myanmar".
3323
*/
3324
MYANMAR,
3325
3326
/**
3327
* Unicode script "Georgian".
3328
*/
3329
GEORGIAN,
3330
3331
/**
3332
* Unicode script "Hangul".
3333
*/
3334
HANGUL,
3335
3336
/**
3337
* Unicode script "Ethiopic".
3338
*/
3339
ETHIOPIC,
3340
3341
/**
3342
* Unicode script "Cherokee".
3343
*/
3344
CHEROKEE,
3345
3346
/**
3347
* Unicode script "Canadian_Aboriginal".
3348
*/
3349
CANADIAN_ABORIGINAL,
3350
3351
/**
3352
* Unicode script "Ogham".
3353
*/
3354
OGHAM,
3355
3356
/**
3357
* Unicode script "Runic".
3358
*/
3359
RUNIC,
3360
3361
/**
3362
* Unicode script "Khmer".
3363
*/
3364
KHMER,
3365
3366
/**
3367
* Unicode script "Mongolian".
3368
*/
3369
MONGOLIAN,
3370
3371
/**
3372
* Unicode script "Hiragana".
3373
*/
3374
HIRAGANA,
3375
3376
/**
3377
* Unicode script "Katakana".
3378
*/
3379
KATAKANA,
3380
3381
/**
3382
* Unicode script "Bopomofo".
3383
*/
3384
BOPOMOFO,
3385
3386
/**
3387
* Unicode script "Han".
3388
*/
3389
HAN,
3390
3391
/**
3392
* Unicode script "Yi".
3393
*/
3394
YI,
3395
3396
/**
3397
* Unicode script "Old_Italic".
3398
*/
3399
OLD_ITALIC,
3400
3401
/**
3402
* Unicode script "Gothic".
3403
*/
3404
GOTHIC,
3405
3406
/**
3407
* Unicode script "Deseret".
3408
*/
3409
DESERET,
3410
3411
/**
3412
* Unicode script "Inherited".
3413
*/
3414
INHERITED,
3415
3416
/**
3417
* Unicode script "Tagalog".
3418
*/
3419
TAGALOG,
3420
3421
/**
3422
* Unicode script "Hanunoo".
3423
*/
3424
HANUNOO,
3425
3426
/**
3427
* Unicode script "Buhid".
3428
*/
3429
BUHID,
3430
3431
/**
3432
* Unicode script "Tagbanwa".
3433
*/
3434
TAGBANWA,
3435
3436
/**
3437
* Unicode script "Limbu".
3438
*/
3439
LIMBU,
3440
3441
/**
3442
* Unicode script "Tai_Le".
3443
*/
3444
TAI_LE,
3445
3446
/**
3447
* Unicode script "Linear_B".
3448
*/
3449
LINEAR_B,
3450
3451
/**
3452
* Unicode script "Ugaritic".
3453
*/
3454
UGARITIC,
3455
3456
/**
3457
* Unicode script "Shavian".
3458
*/
3459
SHAVIAN,
3460
3461
/**
3462
* Unicode script "Osmanya".
3463
*/
3464
OSMANYA,
3465
3466
/**
3467
* Unicode script "Cypriot".
3468
*/
3469
CYPRIOT,
3470
3471
/**
3472
* Unicode script "Braille".
3473
*/
3474
BRAILLE,
3475
3476
/**
3477
* Unicode script "Buginese".
3478
*/
3479
BUGINESE,
3480
3481
/**
3482
* Unicode script "Coptic".
3483
*/
3484
COPTIC,
3485
3486
/**
3487
* Unicode script "New_Tai_Lue".
3488
*/
3489
NEW_TAI_LUE,
3490
3491
/**
3492
* Unicode script "Glagolitic".
3493
*/
3494
GLAGOLITIC,
3495
3496
/**
3497
* Unicode script "Tifinagh".
3498
*/
3499
TIFINAGH,
3500
3501
/**
3502
* Unicode script "Syloti_Nagri".
3503
*/
3504
SYLOTI_NAGRI,
3505
3506
/**
3507
* Unicode script "Old_Persian".
3508
*/
3509
OLD_PERSIAN,
3510
3511
/**
3512
* Unicode script "Kharoshthi".
3513
*/
3514
KHAROSHTHI,
3515
3516
/**
3517
* Unicode script "Balinese".
3518
*/
3519
BALINESE,
3520
3521
/**
3522
* Unicode script "Cuneiform".
3523
*/
3524
CUNEIFORM,
3525
3526
/**
3527
* Unicode script "Phoenician".
3528
*/
3529
PHOENICIAN,
3530
3531
/**
3532
* Unicode script "Phags_Pa".
3533
*/
3534
PHAGS_PA,
3535
3536
/**
3537
* Unicode script "Nko".
3538
*/
3539
NKO,
3540
3541
/**
3542
* Unicode script "Sundanese".
3543
*/
3544
SUNDANESE,
3545
3546
/**
3547
* Unicode script "Batak".
3548
*/
3549
BATAK,
3550
3551
/**
3552
* Unicode script "Lepcha".
3553
*/
3554
LEPCHA,
3555
3556
/**
3557
* Unicode script "Ol_Chiki".
3558
*/
3559
OL_CHIKI,
3560
3561
/**
3562
* Unicode script "Vai".
3563
*/
3564
VAI,
3565
3566
/**
3567
* Unicode script "Saurashtra".
3568
*/
3569
SAURASHTRA,
3570
3571
/**
3572
* Unicode script "Kayah_Li".
3573
*/
3574
KAYAH_LI,
3575
3576
/**
3577
* Unicode script "Rejang".
3578
*/
3579
REJANG,
3580
3581
/**
3582
* Unicode script "Lycian".
3583
*/
3584
LYCIAN,
3585
3586
/**
3587
* Unicode script "Carian".
3588
*/
3589
CARIAN,
3590
3591
/**
3592
* Unicode script "Lydian".
3593
*/
3594
LYDIAN,
3595
3596
/**
3597
* Unicode script "Cham".
3598
*/
3599
CHAM,
3600
3601
/**
3602
* Unicode script "Tai_Tham".
3603
*/
3604
TAI_THAM,
3605
3606
/**
3607
* Unicode script "Tai_Viet".
3608
*/
3609
TAI_VIET,
3610
3611
/**
3612
* Unicode script "Avestan".
3613
*/
3614
AVESTAN,
3615
3616
/**
3617
* Unicode script "Egyptian_Hieroglyphs".
3618
*/
3619
EGYPTIAN_HIEROGLYPHS,
3620
3621
/**
3622
* Unicode script "Samaritan".
3623
*/
3624
SAMARITAN,
3625
3626
/**
3627
* Unicode script "Mandaic".
3628
*/
3629
MANDAIC,
3630
3631
/**
3632
* Unicode script "Lisu".
3633
*/
3634
LISU,
3635
3636
/**
3637
* Unicode script "Bamum".
3638
*/
3639
BAMUM,
3640
3641
/**
3642
* Unicode script "Javanese".
3643
*/
3644
JAVANESE,
3645
3646
/**
3647
* Unicode script "Meetei_Mayek".
3648
*/
3649
MEETEI_MAYEK,
3650
3651
/**
3652
* Unicode script "Imperial_Aramaic".
3653
*/
3654
IMPERIAL_ARAMAIC,
3655
3656
/**
3657
* Unicode script "Old_South_Arabian".
3658
*/
3659
OLD_SOUTH_ARABIAN,
3660
3661
/**
3662
* Unicode script "Inscriptional_Parthian".
3663
*/
3664
INSCRIPTIONAL_PARTHIAN,
3665
3666
/**
3667
* Unicode script "Inscriptional_Pahlavi".
3668
*/
3669
INSCRIPTIONAL_PAHLAVI,
3670
3671
/**
3672
* Unicode script "Old_Turkic".
3673
*/
3674
OLD_TURKIC,
3675
3676
/**
3677
* Unicode script "Brahmi".
3678
*/
3679
BRAHMI,
3680
3681
/**
3682
* Unicode script "Kaithi".
3683
*/
3684
KAITHI,
3685
3686
/**
3687
* Unicode script "Meroitic Hieroglyphs".
3688
*/
3689
MEROITIC_HIEROGLYPHS,
3690
3691
/**
3692
* Unicode script "Meroitic Cursive".
3693
*/
3694
MEROITIC_CURSIVE,
3695
3696
/**
3697
* Unicode script "Sora Sompeng".
3698
*/
3699
SORA_SOMPENG,
3700
3701
/**
3702
* Unicode script "Chakma".
3703
*/
3704
CHAKMA,
3705
3706
/**
3707
* Unicode script "Sharada".
3708
*/
3709
SHARADA,
3710
3711
/**
3712
* Unicode script "Takri".
3713
*/
3714
TAKRI,
3715
3716
/**
3717
* Unicode script "Miao".
3718
*/
3719
MIAO,
3720
3721
/**
3722
* Unicode script "Unknown".
3723
*/
3724
UNKNOWN;
3725
3726
private static final int[] scriptStarts = {
3727
0x0000, // 0000..0040; COMMON
3728
0x0041, // 0041..005A; LATIN
3729
0x005B, // 005B..0060; COMMON
3730
0x0061, // 0061..007A; LATIN
3731
0x007B, // 007B..00A9; COMMON
3732
0x00AA, // 00AA..00AA; LATIN
3733
0x00AB, // 00AB..00B9; COMMON
3734
0x00BA, // 00BA..00BA; LATIN
3735
0x00BB, // 00BB..00BF; COMMON
3736
0x00C0, // 00C0..00D6; LATIN
3737
0x00D7, // 00D7..00D7; COMMON
3738
0x00D8, // 00D8..00F6; LATIN
3739
0x00F7, // 00F7..00F7; COMMON
3740
0x00F8, // 00F8..02B8; LATIN
3741
0x02B9, // 02B9..02DF; COMMON
3742
0x02E0, // 02E0..02E4; LATIN
3743
0x02E5, // 02E5..02E9; COMMON
3744
0x02EA, // 02EA..02EB; BOPOMOFO
3745
0x02EC, // 02EC..02FF; COMMON
3746
0x0300, // 0300..036F; INHERITED
3747
0x0370, // 0370..0373; GREEK
3748
0x0374, // 0374..0374; COMMON
3749
0x0375, // 0375..037D; GREEK
3750
0x037E, // 037E..0383; COMMON
3751
0x0384, // 0384..0384; GREEK
3752
0x0385, // 0385..0385; COMMON
3753
0x0386, // 0386..0386; GREEK
3754
0x0387, // 0387..0387; COMMON
3755
0x0388, // 0388..03E1; GREEK
3756
0x03E2, // 03E2..03EF; COPTIC
3757
0x03F0, // 03F0..03FF; GREEK
3758
0x0400, // 0400..0484; CYRILLIC
3759
0x0485, // 0485..0486; INHERITED
3760
0x0487, // 0487..0530; CYRILLIC
3761
0x0531, // 0531..0588; ARMENIAN
3762
0x0589, // 0589..0589; COMMON
3763
0x058A, // 058A..0590; ARMENIAN
3764
0x0591, // 0591..05FF; HEBREW
3765
0x0600, // 0600..060B; ARABIC
3766
0x060C, // 060C..060C; COMMON
3767
0x060D, // 060D..061A; ARABIC
3768
0x061B, // 061B..061D; COMMON
3769
0x061E, // 061E..061E; ARABIC
3770
0x061F, // 061F..061F; COMMON
3771
0x0620, // 0620..063F; ARABIC
3772
0x0640, // 0640..0640; COMMON
3773
0x0641, // 0641..064A; ARABIC
3774
0x064B, // 064B..0655; INHERITED
3775
0x0656, // 0656..065F; ARABIC
3776
0x0660, // 0660..0669; COMMON
3777
0x066A, // 066A..066F; ARABIC
3778
0x0670, // 0670..0670; INHERITED
3779
0x0671, // 0671..06DC; ARABIC
3780
0x06DD, // 06DD..06DD; COMMON
3781
0x06DE, // 06DE..06FF; ARABIC
3782
0x0700, // 0700..074F; SYRIAC
3783
0x0750, // 0750..077F; ARABIC
3784
0x0780, // 0780..07BF; THAANA
3785
0x07C0, // 07C0..07FF; NKO
3786
0x0800, // 0800..083F; SAMARITAN
3787
0x0840, // 0840..089F; MANDAIC
3788
0x08A0, // 08A0..08FF; ARABIC
3789
0x0900, // 0900..0950; DEVANAGARI
3790
0x0951, // 0951..0952; INHERITED
3791
0x0953, // 0953..0963; DEVANAGARI
3792
0x0964, // 0964..0965; COMMON
3793
0x0966, // 0966..0980; DEVANAGARI
3794
0x0981, // 0981..0A00; BENGALI
3795
0x0A01, // 0A01..0A80; GURMUKHI
3796
0x0A81, // 0A81..0B00; GUJARATI
3797
0x0B01, // 0B01..0B81; ORIYA
3798
0x0B82, // 0B82..0C00; TAMIL
3799
0x0C01, // 0C01..0C81; TELUGU
3800
0x0C82, // 0C82..0CF0; KANNADA
3801
0x0D02, // 0D02..0D81; MALAYALAM
3802
0x0D82, // 0D82..0E00; SINHALA
3803
0x0E01, // 0E01..0E3E; THAI
3804
0x0E3F, // 0E3F..0E3F; COMMON
3805
0x0E40, // 0E40..0E80; THAI
3806
0x0E81, // 0E81..0EFF; LAO
3807
0x0F00, // 0F00..0FD4; TIBETAN
3808
0x0FD5, // 0FD5..0FD8; COMMON
3809
0x0FD9, // 0FD9..0FFF; TIBETAN
3810
0x1000, // 1000..109F; MYANMAR
3811
0x10A0, // 10A0..10FA; GEORGIAN
3812
0x10FB, // 10FB..10FB; COMMON
3813
0x10FC, // 10FC..10FF; GEORGIAN
3814
0x1100, // 1100..11FF; HANGUL
3815
0x1200, // 1200..139F; ETHIOPIC
3816
0x13A0, // 13A0..13FF; CHEROKEE
3817
0x1400, // 1400..167F; CANADIAN_ABORIGINAL
3818
0x1680, // 1680..169F; OGHAM
3819
0x16A0, // 16A0..16EA; RUNIC
3820
0x16EB, // 16EB..16ED; COMMON
3821
0x16EE, // 16EE..16FF; RUNIC
3822
0x1700, // 1700..171F; TAGALOG
3823
0x1720, // 1720..1734; HANUNOO
3824
0x1735, // 1735..173F; COMMON
3825
0x1740, // 1740..175F; BUHID
3826
0x1760, // 1760..177F; TAGBANWA
3827
0x1780, // 1780..17FF; KHMER
3828
0x1800, // 1800..1801; MONGOLIAN
3829
0x1802, // 1802..1803; COMMON
3830
0x1804, // 1804..1804; MONGOLIAN
3831
0x1805, // 1805..1805; COMMON
3832
0x1806, // 1806..18AF; MONGOLIAN
3833
0x18B0, // 18B0..18FF; CANADIAN_ABORIGINAL
3834
0x1900, // 1900..194F; LIMBU
3835
0x1950, // 1950..197F; TAI_LE
3836
0x1980, // 1980..19DF; NEW_TAI_LUE
3837
0x19E0, // 19E0..19FF; KHMER
3838
0x1A00, // 1A00..1A1F; BUGINESE
3839
0x1A20, // 1A20..1AFF; TAI_THAM
3840
0x1B00, // 1B00..1B7F; BALINESE
3841
0x1B80, // 1B80..1BBF; SUNDANESE
3842
0x1BC0, // 1BC0..1BFF; BATAK
3843
0x1C00, // 1C00..1C4F; LEPCHA
3844
0x1C50, // 1C50..1CBF; OL_CHIKI
3845
0x1CC0, // 1CC0..1CCF; SUNDANESE
3846
0x1CD0, // 1CD0..1CD2; INHERITED
3847
0x1CD3, // 1CD3..1CD3; COMMON
3848
0x1CD4, // 1CD4..1CE0; INHERITED
3849
0x1CE1, // 1CE1..1CE1; COMMON
3850
0x1CE2, // 1CE2..1CE8; INHERITED
3851
0x1CE9, // 1CE9..1CEC; COMMON
3852
0x1CED, // 1CED..1CED; INHERITED
3853
0x1CEE, // 1CEE..1CF3; COMMON
3854
0x1CF4, // 1CF4..1CF4; INHERITED
3855
0x1CF5, // 1CF5..1CFF; COMMON
3856
0x1D00, // 1D00..1D25; LATIN
3857
0x1D26, // 1D26..1D2A; GREEK
3858
0x1D2B, // 1D2B..1D2B; CYRILLIC
3859
0x1D2C, // 1D2C..1D5C; LATIN
3860
0x1D5D, // 1D5D..1D61; GREEK
3861
0x1D62, // 1D62..1D65; LATIN
3862
0x1D66, // 1D66..1D6A; GREEK
3863
0x1D6B, // 1D6B..1D77; LATIN
3864
0x1D78, // 1D78..1D78; CYRILLIC
3865
0x1D79, // 1D79..1DBE; LATIN
3866
0x1DBF, // 1DBF..1DBF; GREEK
3867
0x1DC0, // 1DC0..1DFF; INHERITED
3868
0x1E00, // 1E00..1EFF; LATIN
3869
0x1F00, // 1F00..1FFF; GREEK
3870
0x2000, // 2000..200B; COMMON
3871
0x200C, // 200C..200D; INHERITED
3872
0x200E, // 200E..2070; COMMON
3873
0x2071, // 2071..2073; LATIN
3874
0x2074, // 2074..207E; COMMON
3875
0x207F, // 207F..207F; LATIN
3876
0x2080, // 2080..208F; COMMON
3877
0x2090, // 2090..209F; LATIN
3878
0x20A0, // 20A0..20CF; COMMON
3879
0x20D0, // 20D0..20FF; INHERITED
3880
0x2100, // 2100..2125; COMMON
3881
0x2126, // 2126..2126; GREEK
3882
0x2127, // 2127..2129; COMMON
3883
0x212A, // 212A..212B; LATIN
3884
0x212C, // 212C..2131; COMMON
3885
0x2132, // 2132..2132; LATIN
3886
0x2133, // 2133..214D; COMMON
3887
0x214E, // 214E..214E; LATIN
3888
0x214F, // 214F..215F; COMMON
3889
0x2160, // 2160..2188; LATIN
3890
0x2189, // 2189..27FF; COMMON
3891
0x2800, // 2800..28FF; BRAILLE
3892
0x2900, // 2900..2BFF; COMMON
3893
0x2C00, // 2C00..2C5F; GLAGOLITIC
3894
0x2C60, // 2C60..2C7F; LATIN
3895
0x2C80, // 2C80..2CFF; COPTIC
3896
0x2D00, // 2D00..2D2F; GEORGIAN
3897
0x2D30, // 2D30..2D7F; TIFINAGH
3898
0x2D80, // 2D80..2DDF; ETHIOPIC
3899
0x2DE0, // 2DE0..2DFF; CYRILLIC
3900
0x2E00, // 2E00..2E7F; COMMON
3901
0x2E80, // 2E80..2FEF; HAN
3902
0x2FF0, // 2FF0..3004; COMMON
3903
0x3005, // 3005..3005; HAN
3904
0x3006, // 3006..3006; COMMON
3905
0x3007, // 3007..3007; HAN
3906
0x3008, // 3008..3020; COMMON
3907
0x3021, // 3021..3029; HAN
3908
0x302A, // 302A..302D; INHERITED
3909
0x302E, // 302E..302F; HANGUL
3910
0x3030, // 3030..3037; COMMON
3911
0x3038, // 3038..303B; HAN
3912
0x303C, // 303C..3040; COMMON
3913
0x3041, // 3041..3098; HIRAGANA
3914
0x3099, // 3099..309A; INHERITED
3915
0x309B, // 309B..309C; COMMON
3916
0x309D, // 309D..309F; HIRAGANA
3917
0x30A0, // 30A0..30A0; COMMON
3918
0x30A1, // 30A1..30FA; KATAKANA
3919
0x30FB, // 30FB..30FC; COMMON
3920
0x30FD, // 30FD..3104; KATAKANA
3921
0x3105, // 3105..3130; BOPOMOFO
3922
0x3131, // 3131..318F; HANGUL
3923
0x3190, // 3190..319F; COMMON
3924
0x31A0, // 31A0..31BF; BOPOMOFO
3925
0x31C0, // 31C0..31EF; COMMON
3926
0x31F0, // 31F0..31FF; KATAKANA
3927
0x3200, // 3200..321F; HANGUL
3928
0x3220, // 3220..325F; COMMON
3929
0x3260, // 3260..327E; HANGUL
3930
0x327F, // 327F..32CF; COMMON
3931
0x32D0, // 32D0..32FE; KATAKANA
3932
0x32FF, // 32FF ; COMMON
3933
0x3300, // 3300..3357; KATAKANA
3934
0x3358, // 3358..33FF; COMMON
3935
0x3400, // 3400..4DBF; HAN
3936
0x4DC0, // 4DC0..4DFF; COMMON
3937
0x4E00, // 4E00..9FFF; HAN
3938
0xA000, // A000..A4CF; YI
3939
0xA4D0, // A4D0..A4FF; LISU
3940
0xA500, // A500..A63F; VAI
3941
0xA640, // A640..A69F; CYRILLIC
3942
0xA6A0, // A6A0..A6FF; BAMUM
3943
0xA700, // A700..A721; COMMON
3944
0xA722, // A722..A787; LATIN
3945
0xA788, // A788..A78A; COMMON
3946
0xA78B, // A78B..A7FF; LATIN
3947
0xA800, // A800..A82F; SYLOTI_NAGRI
3948
0xA830, // A830..A83F; COMMON
3949
0xA840, // A840..A87F; PHAGS_PA
3950
0xA880, // A880..A8DF; SAURASHTRA
3951
0xA8E0, // A8E0..A8FF; DEVANAGARI
3952
0xA900, // A900..A92F; KAYAH_LI
3953
0xA930, // A930..A95F; REJANG
3954
0xA960, // A960..A97F; HANGUL
3955
0xA980, // A980..A9FF; JAVANESE
3956
0xAA00, // AA00..AA5F; CHAM
3957
0xAA60, // AA60..AA7F; MYANMAR
3958
0xAA80, // AA80..AADF; TAI_VIET
3959
0xAAE0, // AAE0..AB00; MEETEI_MAYEK
3960
0xAB01, // AB01..ABBF; ETHIOPIC
3961
0xABC0, // ABC0..ABFF; MEETEI_MAYEK
3962
0xAC00, // AC00..D7FB; HANGUL
3963
0xD7FC, // D7FC..F8FF; UNKNOWN
3964
0xF900, // F900..FAFF; HAN
3965
0xFB00, // FB00..FB12; LATIN
3966
0xFB13, // FB13..FB1C; ARMENIAN
3967
0xFB1D, // FB1D..FB4F; HEBREW
3968
0xFB50, // FB50..FD3D; ARABIC
3969
0xFD3E, // FD3E..FD4F; COMMON
3970
0xFD50, // FD50..FDFC; ARABIC
3971
0xFDFD, // FDFD..FDFF; COMMON
3972
0xFE00, // FE00..FE0F; INHERITED
3973
0xFE10, // FE10..FE1F; COMMON
3974
0xFE20, // FE20..FE2F; INHERITED
3975
0xFE30, // FE30..FE6F; COMMON
3976
0xFE70, // FE70..FEFE; ARABIC
3977
0xFEFF, // FEFF..FF20; COMMON
3978
0xFF21, // FF21..FF3A; LATIN
3979
0xFF3B, // FF3B..FF40; COMMON
3980
0xFF41, // FF41..FF5A; LATIN
3981
0xFF5B, // FF5B..FF65; COMMON
3982
0xFF66, // FF66..FF6F; KATAKANA
3983
0xFF70, // FF70..FF70; COMMON
3984
0xFF71, // FF71..FF9D; KATAKANA
3985
0xFF9E, // FF9E..FF9F; COMMON
3986
0xFFA0, // FFA0..FFDF; HANGUL
3987
0xFFE0, // FFE0..FFFF; COMMON
3988
0x10000, // 10000..100FF; LINEAR_B
3989
0x10100, // 10100..1013F; COMMON
3990
0x10140, // 10140..1018F; GREEK
3991
0x10190, // 10190..101FC; COMMON
3992
0x101FD, // 101FD..1027F; INHERITED
3993
0x10280, // 10280..1029F; LYCIAN
3994
0x102A0, // 102A0..102FF; CARIAN
3995
0x10300, // 10300..1032F; OLD_ITALIC
3996
0x10330, // 10330..1037F; GOTHIC
3997
0x10380, // 10380..1039F; UGARITIC
3998
0x103A0, // 103A0..103FF; OLD_PERSIAN
3999
0x10400, // 10400..1044F; DESERET
4000
0x10450, // 10450..1047F; SHAVIAN
4001
0x10480, // 10480..107FF; OSMANYA
4002
0x10800, // 10800..1083F; CYPRIOT
4003
0x10840, // 10840..108FF; IMPERIAL_ARAMAIC
4004
0x10900, // 10900..1091F; PHOENICIAN
4005
0x10920, // 10920..1097F; LYDIAN
4006
0x10980, // 10980..1099F; MEROITIC_HIEROGLYPHS
4007
0x109A0, // 109A0..109FF; MEROITIC_CURSIVE
4008
0x10A00, // 10A00..10A5F; KHAROSHTHI
4009
0x10A60, // 10A60..10AFF; OLD_SOUTH_ARABIAN
4010
0x10B00, // 10B00..10B3F; AVESTAN
4011
0x10B40, // 10B40..10B5F; INSCRIPTIONAL_PARTHIAN
4012
0x10B60, // 10B60..10BFF; INSCRIPTIONAL_PAHLAVI
4013
0x10C00, // 10C00..10E5F; OLD_TURKIC
4014
0x10E60, // 10E60..10FFF; ARABIC
4015
0x11000, // 11000..1107F; BRAHMI
4016
0x11080, // 11080..110CF; KAITHI
4017
0x110D0, // 110D0..110FF; SORA_SOMPENG
4018
0x11100, // 11100..1117F; CHAKMA
4019
0x11180, // 11180..1167F; SHARADA
4020
0x11680, // 11680..116CF; TAKRI
4021
0x12000, // 12000..12FFF; CUNEIFORM
4022
0x13000, // 13000..167FF; EGYPTIAN_HIEROGLYPHS
4023
0x16800, // 16800..16A38; BAMUM
4024
0x16F00, // 16F00..16F9F; MIAO
4025
0x1B000, // 1B000..1B000; KATAKANA
4026
0x1B001, // 1B001..1CFFF; HIRAGANA
4027
0x1D000, // 1D000..1D166; COMMON
4028
0x1D167, // 1D167..1D169; INHERITED
4029
0x1D16A, // 1D16A..1D17A; COMMON
4030
0x1D17B, // 1D17B..1D182; INHERITED
4031
0x1D183, // 1D183..1D184; COMMON
4032
0x1D185, // 1D185..1D18B; INHERITED
4033
0x1D18C, // 1D18C..1D1A9; COMMON
4034
0x1D1AA, // 1D1AA..1D1AD; INHERITED
4035
0x1D1AE, // 1D1AE..1D1FF; COMMON
4036
0x1D200, // 1D200..1D2FF; GREEK
4037
0x1D300, // 1D300..1EDFF; COMMON
4038
0x1EE00, // 1EE00..1EFFF; ARABIC
4039
0x1F000, // 1F000..1F1FF; COMMON
4040
0x1F200, // 1F200..1F200; HIRAGANA
4041
0x1F201, // 1F210..1FFFF; COMMON
4042
0x20000, // 20000..E0000; HAN
4043
0xE0001, // E0001..E00FF; COMMON
4044
0xE0100, // E0100..E01EF; INHERITED
4045
0xE01F0 // E01F0..10FFFF; UNKNOWN
4046
4047
};
4048
4049
private static final UnicodeScript[] scripts = {
4050
COMMON,
4051
LATIN,
4052
COMMON,
4053
LATIN,
4054
COMMON,
4055
LATIN,
4056
COMMON,
4057
LATIN,
4058
COMMON,
4059
LATIN,
4060
COMMON,
4061
LATIN,
4062
COMMON,
4063
LATIN,
4064
COMMON,
4065
LATIN,
4066
COMMON,
4067
BOPOMOFO,
4068
COMMON,
4069
INHERITED,
4070
GREEK,
4071
COMMON,
4072
GREEK,
4073
COMMON,
4074
GREEK,
4075
COMMON,
4076
GREEK,
4077
COMMON,
4078
GREEK,
4079
COPTIC,
4080
GREEK,
4081
CYRILLIC,
4082
INHERITED,
4083
CYRILLIC,
4084
ARMENIAN,
4085
COMMON,
4086
ARMENIAN,
4087
HEBREW,
4088
ARABIC,
4089
COMMON,
4090
ARABIC,
4091
COMMON,
4092
ARABIC,
4093
COMMON,
4094
ARABIC,
4095
COMMON,
4096
ARABIC,
4097
INHERITED,
4098
ARABIC,
4099
COMMON,
4100
ARABIC,
4101
INHERITED,
4102
ARABIC,
4103
COMMON,
4104
ARABIC,
4105
SYRIAC,
4106
ARABIC,
4107
THAANA,
4108
NKO,
4109
SAMARITAN,
4110
MANDAIC,
4111
ARABIC,
4112
DEVANAGARI,
4113
INHERITED,
4114
DEVANAGARI,
4115
COMMON,
4116
DEVANAGARI,
4117
BENGALI,
4118
GURMUKHI,
4119
GUJARATI,
4120
ORIYA,
4121
TAMIL,
4122
TELUGU,
4123
KANNADA,
4124
MALAYALAM,
4125
SINHALA,
4126
THAI,
4127
COMMON,
4128
THAI,
4129
LAO,
4130
TIBETAN,
4131
COMMON,
4132
TIBETAN,
4133
MYANMAR,
4134
GEORGIAN,
4135
COMMON,
4136
GEORGIAN,
4137
HANGUL,
4138
ETHIOPIC,
4139
CHEROKEE,
4140
CANADIAN_ABORIGINAL,
4141
OGHAM,
4142
RUNIC,
4143
COMMON,
4144
RUNIC,
4145
TAGALOG,
4146
HANUNOO,
4147
COMMON,
4148
BUHID,
4149
TAGBANWA,
4150
KHMER,
4151
MONGOLIAN,
4152
COMMON,
4153
MONGOLIAN,
4154
COMMON,
4155
MONGOLIAN,
4156
CANADIAN_ABORIGINAL,
4157
LIMBU,
4158
TAI_LE,
4159
NEW_TAI_LUE,
4160
KHMER,
4161
BUGINESE,
4162
TAI_THAM,
4163
BALINESE,
4164
SUNDANESE,
4165
BATAK,
4166
LEPCHA,
4167
OL_CHIKI,
4168
SUNDANESE,
4169
INHERITED,
4170
COMMON,
4171
INHERITED,
4172
COMMON,
4173
INHERITED,
4174
COMMON,
4175
INHERITED,
4176
COMMON,
4177
INHERITED,
4178
COMMON,
4179
LATIN,
4180
GREEK,
4181
CYRILLIC,
4182
LATIN,
4183
GREEK,
4184
LATIN,
4185
GREEK,
4186
LATIN,
4187
CYRILLIC,
4188
LATIN,
4189
GREEK,
4190
INHERITED,
4191
LATIN,
4192
GREEK,
4193
COMMON,
4194
INHERITED,
4195
COMMON,
4196
LATIN,
4197
COMMON,
4198
LATIN,
4199
COMMON,
4200
LATIN,
4201
COMMON,
4202
INHERITED,
4203
COMMON,
4204
GREEK,
4205
COMMON,
4206
LATIN,
4207
COMMON,
4208
LATIN,
4209
COMMON,
4210
LATIN,
4211
COMMON,
4212
LATIN,
4213
COMMON,
4214
BRAILLE,
4215
COMMON,
4216
GLAGOLITIC,
4217
LATIN,
4218
COPTIC,
4219
GEORGIAN,
4220
TIFINAGH,
4221
ETHIOPIC,
4222
CYRILLIC,
4223
COMMON,
4224
HAN,
4225
COMMON,
4226
HAN,
4227
COMMON,
4228
HAN,
4229
COMMON,
4230
HAN,
4231
INHERITED,
4232
HANGUL,
4233
COMMON,
4234
HAN,
4235
COMMON,
4236
HIRAGANA,
4237
INHERITED,
4238
COMMON,
4239
HIRAGANA,
4240
COMMON,
4241
KATAKANA,
4242
COMMON,
4243
KATAKANA,
4244
BOPOMOFO,
4245
HANGUL,
4246
COMMON,
4247
BOPOMOFO,
4248
COMMON,
4249
KATAKANA,
4250
HANGUL,
4251
COMMON,
4252
HANGUL,
4253
COMMON,
4254
KATAKANA, // 32D0..32FE
4255
COMMON, // 32FF
4256
KATAKANA, // 3300..3357
4257
COMMON,
4258
HAN,
4259
COMMON,
4260
HAN,
4261
YI,
4262
LISU,
4263
VAI,
4264
CYRILLIC,
4265
BAMUM,
4266
COMMON,
4267
LATIN,
4268
COMMON,
4269
LATIN,
4270
SYLOTI_NAGRI,
4271
COMMON,
4272
PHAGS_PA,
4273
SAURASHTRA,
4274
DEVANAGARI,
4275
KAYAH_LI,
4276
REJANG,
4277
HANGUL,
4278
JAVANESE,
4279
CHAM,
4280
MYANMAR,
4281
TAI_VIET,
4282
MEETEI_MAYEK,
4283
ETHIOPIC,
4284
MEETEI_MAYEK,
4285
HANGUL,
4286
UNKNOWN ,
4287
HAN,
4288
LATIN,
4289
ARMENIAN,
4290
HEBREW,
4291
ARABIC,
4292
COMMON,
4293
ARABIC,
4294
COMMON,
4295
INHERITED,
4296
COMMON,
4297
INHERITED,
4298
COMMON,
4299
ARABIC,
4300
COMMON,
4301
LATIN,
4302
COMMON,
4303
LATIN,
4304
COMMON,
4305
KATAKANA,
4306
COMMON,
4307
KATAKANA,
4308
COMMON,
4309
HANGUL,
4310
COMMON,
4311
LINEAR_B,
4312
COMMON,
4313
GREEK,
4314
COMMON,
4315
INHERITED,
4316
LYCIAN,
4317
CARIAN,
4318
OLD_ITALIC,
4319
GOTHIC,
4320
UGARITIC,
4321
OLD_PERSIAN,
4322
DESERET,
4323
SHAVIAN,
4324
OSMANYA,
4325
CYPRIOT,
4326
IMPERIAL_ARAMAIC,
4327
PHOENICIAN,
4328
LYDIAN,
4329
MEROITIC_HIEROGLYPHS,
4330
MEROITIC_CURSIVE,
4331
KHAROSHTHI,
4332
OLD_SOUTH_ARABIAN,
4333
AVESTAN,
4334
INSCRIPTIONAL_PARTHIAN,
4335
INSCRIPTIONAL_PAHLAVI,
4336
OLD_TURKIC,
4337
ARABIC,
4338
BRAHMI,
4339
KAITHI,
4340
SORA_SOMPENG,
4341
CHAKMA,
4342
SHARADA,
4343
TAKRI,
4344
CUNEIFORM,
4345
EGYPTIAN_HIEROGLYPHS,
4346
BAMUM,
4347
MIAO,
4348
KATAKANA,
4349
HIRAGANA,
4350
COMMON,
4351
INHERITED,
4352
COMMON,
4353
INHERITED,
4354
COMMON,
4355
INHERITED,
4356
COMMON,
4357
INHERITED,
4358
COMMON,
4359
GREEK,
4360
COMMON,
4361
ARABIC,
4362
COMMON,
4363
HIRAGANA,
4364
COMMON,
4365
HAN,
4366
COMMON,
4367
INHERITED,
4368
UNKNOWN
4369
};
4370
4371
private static final HashMap<String, Character.UnicodeScript> aliases;
4372
static {
4373
aliases = new HashMap<>(128);
4374
aliases.put("ARAB", ARABIC);
4375
aliases.put("ARMI", IMPERIAL_ARAMAIC);
4376
aliases.put("ARMN", ARMENIAN);
4377
aliases.put("AVST", AVESTAN);
4378
aliases.put("BALI", BALINESE);
4379
aliases.put("BAMU", BAMUM);
4380
aliases.put("BATK", BATAK);
4381
aliases.put("BENG", BENGALI);
4382
aliases.put("BOPO", BOPOMOFO);
4383
aliases.put("BRAI", BRAILLE);
4384
aliases.put("BRAH", BRAHMI);
4385
aliases.put("BUGI", BUGINESE);
4386
aliases.put("BUHD", BUHID);
4387
aliases.put("CAKM", CHAKMA);
4388
aliases.put("CANS", CANADIAN_ABORIGINAL);
4389
aliases.put("CARI", CARIAN);
4390
aliases.put("CHAM", CHAM);
4391
aliases.put("CHER", CHEROKEE);
4392
aliases.put("COPT", COPTIC);
4393
aliases.put("CPRT", CYPRIOT);
4394
aliases.put("CYRL", CYRILLIC);
4395
aliases.put("DEVA", DEVANAGARI);
4396
aliases.put("DSRT", DESERET);
4397
aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);
4398
aliases.put("ETHI", ETHIOPIC);
4399
aliases.put("GEOR", GEORGIAN);
4400
aliases.put("GLAG", GLAGOLITIC);
4401
aliases.put("GOTH", GOTHIC);
4402
aliases.put("GREK", GREEK);
4403
aliases.put("GUJR", GUJARATI);
4404
aliases.put("GURU", GURMUKHI);
4405
aliases.put("HANG", HANGUL);
4406
aliases.put("HANI", HAN);
4407
aliases.put("HANO", HANUNOO);
4408
aliases.put("HEBR", HEBREW);
4409
aliases.put("HIRA", HIRAGANA);
4410
// it appears we don't have the KATAKANA_OR_HIRAGANA
4411
//aliases.put("HRKT", KATAKANA_OR_HIRAGANA);
4412
aliases.put("ITAL", OLD_ITALIC);
4413
aliases.put("JAVA", JAVANESE);
4414
aliases.put("KALI", KAYAH_LI);
4415
aliases.put("KANA", KATAKANA);
4416
aliases.put("KHAR", KHAROSHTHI);
4417
aliases.put("KHMR", KHMER);
4418
aliases.put("KNDA", KANNADA);
4419
aliases.put("KTHI", KAITHI);
4420
aliases.put("LANA", TAI_THAM);
4421
aliases.put("LAOO", LAO);
4422
aliases.put("LATN", LATIN);
4423
aliases.put("LEPC", LEPCHA);
4424
aliases.put("LIMB", LIMBU);
4425
aliases.put("LINB", LINEAR_B);
4426
aliases.put("LISU", LISU);
4427
aliases.put("LYCI", LYCIAN);
4428
aliases.put("LYDI", LYDIAN);
4429
aliases.put("MAND", MANDAIC);
4430
aliases.put("MERC", MEROITIC_CURSIVE);
4431
aliases.put("MERO", MEROITIC_HIEROGLYPHS);
4432
aliases.put("MLYM", MALAYALAM);
4433
aliases.put("MONG", MONGOLIAN);
4434
aliases.put("MTEI", MEETEI_MAYEK);
4435
aliases.put("MYMR", MYANMAR);
4436
aliases.put("NKOO", NKO);
4437
aliases.put("OGAM", OGHAM);
4438
aliases.put("OLCK", OL_CHIKI);
4439
aliases.put("ORKH", OLD_TURKIC);
4440
aliases.put("ORYA", ORIYA);
4441
aliases.put("OSMA", OSMANYA);
4442
aliases.put("PHAG", PHAGS_PA);
4443
aliases.put("PLRD", MIAO);
4444
aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);
4445
aliases.put("PHNX", PHOENICIAN);
4446
aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
4447
aliases.put("RJNG", REJANG);
4448
aliases.put("RUNR", RUNIC);
4449
aliases.put("SAMR", SAMARITAN);
4450
aliases.put("SARB", OLD_SOUTH_ARABIAN);
4451
aliases.put("SAUR", SAURASHTRA);
4452
aliases.put("SHAW", SHAVIAN);
4453
aliases.put("SHRD", SHARADA);
4454
aliases.put("SINH", SINHALA);
4455
aliases.put("SORA", SORA_SOMPENG);
4456
aliases.put("SUND", SUNDANESE);
4457
aliases.put("SYLO", SYLOTI_NAGRI);
4458
aliases.put("SYRC", SYRIAC);
4459
aliases.put("TAGB", TAGBANWA);
4460
aliases.put("TALE", TAI_LE);
4461
aliases.put("TAKR", TAKRI);
4462
aliases.put("TALU", NEW_TAI_LUE);
4463
aliases.put("TAML", TAMIL);
4464
aliases.put("TAVT", TAI_VIET);
4465
aliases.put("TELU", TELUGU);
4466
aliases.put("TFNG", TIFINAGH);
4467
aliases.put("TGLG", TAGALOG);
4468
aliases.put("THAA", THAANA);
4469
aliases.put("THAI", THAI);
4470
aliases.put("TIBT", TIBETAN);
4471
aliases.put("UGAR", UGARITIC);
4472
aliases.put("VAII", VAI);
4473
aliases.put("XPEO", OLD_PERSIAN);
4474
aliases.put("XSUX", CUNEIFORM);
4475
aliases.put("YIII", YI);
4476
aliases.put("ZINH", INHERITED);
4477
aliases.put("ZYYY", COMMON);
4478
aliases.put("ZZZZ", UNKNOWN);
4479
}
4480
4481
/**
4482
* Returns the enum constant representing the Unicode script of which
4483
* the given character (Unicode code point) is assigned to.
4484
*
4485
* @param codePoint the character (Unicode code point) in question.
4486
* @return The {@code UnicodeScript} constant representing the
4487
* Unicode script of which this character is assigned to.
4488
*
4489
* @exception IllegalArgumentException if the specified
4490
* {@code codePoint} is an invalid Unicode code point.
4491
* @see Character#isValidCodePoint(int)
4492
*
4493
*/
4494
public static UnicodeScript of(int codePoint) {
4495
if (!isValidCodePoint(codePoint))
4496
throw new IllegalArgumentException();
4497
int type = getType(codePoint);
4498
// leave SURROGATE and PRIVATE_USE for table lookup
4499
if (type == UNASSIGNED)
4500
return UNKNOWN;
4501
int index = Arrays.binarySearch(scriptStarts, codePoint);
4502
if (index < 0)
4503
index = -index - 2;
4504
return scripts[index];
4505
}
4506
4507
/**
4508
* Returns the UnicodeScript constant with the given Unicode script
4509
* name or the script name alias. Script names and their aliases are
4510
* determined by The Unicode Standard. The files Scripts&lt;version&gt;.txt
4511
* and PropertyValueAliases&lt;version&gt;.txt define script names
4512
* and the script name aliases for a particular version of the
4513
* standard. The {@link Character} class specifies the version of
4514
* the standard that it supports.
4515
* <p>
4516
* Character case is ignored for all of the valid script names.
4517
* The en_US locale's case mapping rules are used to provide
4518
* case-insensitive string comparisons for script name validation.
4519
* <p>
4520
*
4521
* @param scriptName A {@code UnicodeScript} name.
4522
* @return The {@code UnicodeScript} constant identified
4523
* by {@code scriptName}
4524
* @throws IllegalArgumentException if {@code scriptName} is an
4525
* invalid name
4526
* @throws NullPointerException if {@code scriptName} is null
4527
*/
4528
public static final UnicodeScript forName(String scriptName) {
4529
scriptName = scriptName.toUpperCase(Locale.ENGLISH);
4530
//.replace(' ', '_'));
4531
UnicodeScript sc = aliases.get(scriptName);
4532
if (sc != null)
4533
return sc;
4534
return valueOf(scriptName);
4535
}
4536
}
4537
4538
/**
4539
* The value of the {@code Character}.
4540
*
4541
* @serial
4542
*/
4543
private final char value;
4544
4545
/** use serialVersionUID from JDK 1.0.2 for interoperability */
4546
private static final long serialVersionUID = 3786198910865385080L;
4547
4548
/**
4549
* Constructs a newly allocated {@code Character} object that
4550
* represents the specified {@code char} value.
4551
*
4552
* @param value the value to be represented by the
4553
* {@code Character} object.
4554
*/
4555
public Character(char value) {
4556
this.value = value;
4557
}
4558
4559
private static class CharacterCache {
4560
private CharacterCache(){}
4561
4562
static final Character cache[] = new Character[127 + 1];
4563
4564
static {
4565
for (int i = 0; i < cache.length; i++)
4566
cache[i] = new Character((char)i);
4567
}
4568
}
4569
4570
/**
4571
* Returns a <tt>Character</tt> instance representing the specified
4572
* <tt>char</tt> value.
4573
* If a new <tt>Character</tt> instance is not required, this method
4574
* should generally be used in preference to the constructor
4575
* {@link #Character(char)}, as this method is likely to yield
4576
* significantly better space and time performance by caching
4577
* frequently requested values.
4578
*
4579
* This method will always cache values in the range {@code
4580
* '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
4581
* cache other values outside of this range.
4582
*
4583
* @param c a char value.
4584
* @return a <tt>Character</tt> instance representing <tt>c</tt>.
4585
* @since 1.5
4586
*/
4587
public static Character valueOf(char c) {
4588
if (c <= 127) { // must cache
4589
return CharacterCache.cache[(int)c];
4590
}
4591
return new Character(c);
4592
}
4593
4594
/**
4595
* Returns the value of this {@code Character} object.
4596
* @return the primitive {@code char} value represented by
4597
* this object.
4598
*/
4599
public char charValue() {
4600
return value;
4601
}
4602
4603
/**
4604
* Returns a hash code for this {@code Character}; equal to the result
4605
* of invoking {@code charValue()}.
4606
*
4607
* @return a hash code value for this {@code Character}
4608
*/
4609
@Override
4610
public int hashCode() {
4611
return Character.hashCode(value);
4612
}
4613
4614
/**
4615
* Returns a hash code for a {@code char} value; compatible with
4616
* {@code Character.hashCode()}.
4617
*
4618
* @since 1.8
4619
*
4620
* @param value The {@code char} for which to return a hash code.
4621
* @return a hash code value for a {@code char} value.
4622
*/
4623
public static int hashCode(char value) {
4624
return (int)value;
4625
}
4626
4627
/**
4628
* Compares this object against the specified object.
4629
* The result is {@code true} if and only if the argument is not
4630
* {@code null} and is a {@code Character} object that
4631
* represents the same {@code char} value as this object.
4632
*
4633
* @param obj the object to compare with.
4634
* @return {@code true} if the objects are the same;
4635
* {@code false} otherwise.
4636
*/
4637
public boolean equals(Object obj) {
4638
if (obj instanceof Character) {
4639
return value == ((Character)obj).charValue();
4640
}
4641
return false;
4642
}
4643
4644
/**
4645
* Returns a {@code String} object representing this
4646
* {@code Character}'s value. The result is a string of
4647
* length 1 whose sole component is the primitive
4648
* {@code char} value represented by this
4649
* {@code Character} object.
4650
*
4651
* @return a string representation of this object.
4652
*/
4653
public String toString() {
4654
return String.valueOf(value);
4655
}
4656
4657
/**
4658
* Returns a {@code String} object representing the
4659
* specified {@code char}. The result is a string of length
4660
* 1 consisting solely of the specified {@code char}.
4661
*
4662
* @param c the {@code char} to be converted
4663
* @return the string representation of the specified {@code char}
4664
* @since 1.4
4665
*/
4666
public static String toString(char c) {
4667
return String.valueOf(c);
4668
}
4669
4670
/**
4671
* Determines whether the specified code point is a valid
4672
* <a href="http://www.unicode.org/glossary/#code_point">
4673
* Unicode code point value</a>.
4674
*
4675
* @param codePoint the Unicode code point to be tested
4676
* @return {@code true} if the specified code point value is between
4677
* {@link #MIN_CODE_POINT} and
4678
* {@link #MAX_CODE_POINT} inclusive;
4679
* {@code false} otherwise.
4680
* @since 1.5
4681
*/
4682
public static boolean isValidCodePoint(int codePoint) {
4683
// Optimized form of:
4684
// codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
4685
int plane = codePoint >>> 16;
4686
return plane < ((MAX_CODE_POINT + 1) >>> 16);
4687
}
4688
4689
/**
4690
* Determines whether the specified character (Unicode code point)
4691
* is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
4692
* Such code points can be represented using a single {@code char}.
4693
*
4694
* @param codePoint the character (Unicode code point) to be tested
4695
* @return {@code true} if the specified code point is between
4696
* {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
4697
* {@code false} otherwise.
4698
* @since 1.7
4699
*/
4700
public static boolean isBmpCodePoint(int codePoint) {
4701
return codePoint >>> 16 == 0;
4702
// Optimized form of:
4703
// codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
4704
// We consistently use logical shift (>>>) to facilitate
4705
// additional runtime optimizations.
4706
}
4707
4708
/**
4709
* Determines whether the specified character (Unicode code point)
4710
* is in the <a href="#supplementary">supplementary character</a> range.
4711
*
4712
* @param codePoint the character (Unicode code point) to be tested
4713
* @return {@code true} if the specified code point is between
4714
* {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
4715
* {@link #MAX_CODE_POINT} inclusive;
4716
* {@code false} otherwise.
4717
* @since 1.5
4718
*/
4719
public static boolean isSupplementaryCodePoint(int codePoint) {
4720
return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
4721
&& codePoint < MAX_CODE_POINT + 1;
4722
}
4723
4724
/**
4725
* Determines if the given {@code char} value is a
4726
* <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
4727
* Unicode high-surrogate code unit</a>
4728
* (also known as <i>leading-surrogate code unit</i>).
4729
*
4730
* <p>Such values do not represent characters by themselves,
4731
* but are used in the representation of
4732
* <a href="#supplementary">supplementary characters</a>
4733
* in the UTF-16 encoding.
4734
*
4735
* @param ch the {@code char} value to be tested.
4736
* @return {@code true} if the {@code char} value is between
4737
* {@link #MIN_HIGH_SURROGATE} and
4738
* {@link #MAX_HIGH_SURROGATE} inclusive;
4739
* {@code false} otherwise.
4740
* @see Character#isLowSurrogate(char)
4741
* @see Character.UnicodeBlock#of(int)
4742
* @since 1.5
4743
*/
4744
public static boolean isHighSurrogate(char ch) {
4745
// Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
4746
return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
4747
}
4748
4749
/**
4750
* Determines if the given {@code char} value is a
4751
* <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
4752
* Unicode low-surrogate code unit</a>
4753
* (also known as <i>trailing-surrogate code unit</i>).
4754
*
4755
* <p>Such values do not represent characters by themselves,
4756
* but are used in the representation of
4757
* <a href="#supplementary">supplementary characters</a>
4758
* in the UTF-16 encoding.
4759
*
4760
* @param ch the {@code char} value to be tested.
4761
* @return {@code true} if the {@code char} value is between
4762
* {@link #MIN_LOW_SURROGATE} and
4763
* {@link #MAX_LOW_SURROGATE} inclusive;
4764
* {@code false} otherwise.
4765
* @see Character#isHighSurrogate(char)
4766
* @since 1.5
4767
*/
4768
public static boolean isLowSurrogate(char ch) {
4769
return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
4770
}
4771
4772
/**
4773
* Determines if the given {@code char} value is a Unicode
4774
* <i>surrogate code unit</i>.
4775
*
4776
* <p>Such values do not represent characters by themselves,
4777
* but are used in the representation of
4778
* <a href="#supplementary">supplementary characters</a>
4779
* in the UTF-16 encoding.
4780
*
4781
* <p>A char value is a surrogate code unit if and only if it is either
4782
* a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
4783
* a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
4784
*
4785
* @param ch the {@code char} value to be tested.
4786
* @return {@code true} if the {@code char} value is between
4787
* {@link #MIN_SURROGATE} and
4788
* {@link #MAX_SURROGATE} inclusive;
4789
* {@code false} otherwise.
4790
* @since 1.7
4791
*/
4792
public static boolean isSurrogate(char ch) {
4793
return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
4794
}
4795
4796
/**
4797
* Determines whether the specified pair of {@code char}
4798
* values is a valid
4799
* <a href="http://www.unicode.org/glossary/#surrogate_pair">
4800
* Unicode surrogate pair</a>.
4801
4802
* <p>This method is equivalent to the expression:
4803
* <blockquote><pre>{@code
4804
* isHighSurrogate(high) && isLowSurrogate(low)
4805
* }</pre></blockquote>
4806
*
4807
* @param high the high-surrogate code value to be tested
4808
* @param low the low-surrogate code value to be tested
4809
* @return {@code true} if the specified high and
4810
* low-surrogate code values represent a valid surrogate pair;
4811
* {@code false} otherwise.
4812
* @since 1.5
4813
*/
4814
public static boolean isSurrogatePair(char high, char low) {
4815
return isHighSurrogate(high) && isLowSurrogate(low);
4816
}
4817
4818
/**
4819
* Determines the number of {@code char} values needed to
4820
* represent the specified character (Unicode code point). If the
4821
* specified character is equal to or greater than 0x10000, then
4822
* the method returns 2. Otherwise, the method returns 1.
4823
*
4824
* <p>This method doesn't validate the specified character to be a
4825
* valid Unicode code point. The caller must validate the
4826
* character value using {@link #isValidCodePoint(int) isValidCodePoint}
4827
* if necessary.
4828
*
4829
* @param codePoint the character (Unicode code point) to be tested.
4830
* @return 2 if the character is a valid supplementary character; 1 otherwise.
4831
* @see Character#isSupplementaryCodePoint(int)
4832
* @since 1.5
4833
*/
4834
public static int charCount(int codePoint) {
4835
return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
4836
}
4837
4838
/**
4839
* Converts the specified surrogate pair to its supplementary code
4840
* point value. This method does not validate the specified
4841
* surrogate pair. The caller must validate it using {@link
4842
* #isSurrogatePair(char, char) isSurrogatePair} if necessary.
4843
*
4844
* @param high the high-surrogate code unit
4845
* @param low the low-surrogate code unit
4846
* @return the supplementary code point composed from the
4847
* specified surrogate pair.
4848
* @since 1.5
4849
*/
4850
public static int toCodePoint(char high, char low) {
4851
// Optimized form of:
4852
// return ((high - MIN_HIGH_SURROGATE) << 10)
4853
// + (low - MIN_LOW_SURROGATE)
4854
// + MIN_SUPPLEMENTARY_CODE_POINT;
4855
return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
4856
- (MIN_HIGH_SURROGATE << 10)
4857
- MIN_LOW_SURROGATE);
4858
}
4859
4860
/**
4861
* Returns the code point at the given index of the
4862
* {@code CharSequence}. If the {@code char} value at
4863
* the given index in the {@code CharSequence} is in the
4864
* high-surrogate range, the following index is less than the
4865
* length of the {@code CharSequence}, and the
4866
* {@code char} value at the following index is in the
4867
* low-surrogate range, then the supplementary code point
4868
* corresponding to this surrogate pair is returned. Otherwise,
4869
* the {@code char} value at the given index is returned.
4870
*
4871
* @param seq a sequence of {@code char} values (Unicode code
4872
* units)
4873
* @param index the index to the {@code char} values (Unicode
4874
* code units) in {@code seq} to be converted
4875
* @return the Unicode code point at the given index
4876
* @exception NullPointerException if {@code seq} is null.
4877
* @exception IndexOutOfBoundsException if the value
4878
* {@code index} is negative or not less than
4879
* {@link CharSequence#length() seq.length()}.
4880
* @since 1.5
4881
*/
4882
public static int codePointAt(CharSequence seq, int index) {
4883
char c1 = seq.charAt(index);
4884
if (isHighSurrogate(c1) && ++index < seq.length()) {
4885
char c2 = seq.charAt(index);
4886
if (isLowSurrogate(c2)) {
4887
return toCodePoint(c1, c2);
4888
}
4889
}
4890
return c1;
4891
}
4892
4893
/**
4894
* Returns the code point at the given index of the
4895
* {@code char} array. If the {@code char} value at
4896
* the given index in the {@code char} array is in the
4897
* high-surrogate range, the following index is less than the
4898
* length of the {@code char} array, and the
4899
* {@code char} value at the following index is in the
4900
* low-surrogate range, then the supplementary code point
4901
* corresponding to this surrogate pair is returned. Otherwise,
4902
* the {@code char} value at the given index is returned.
4903
*
4904
* @param a the {@code char} array
4905
* @param index the index to the {@code char} values (Unicode
4906
* code units) in the {@code char} array to be converted
4907
* @return the Unicode code point at the given index
4908
* @exception NullPointerException if {@code a} is null.
4909
* @exception IndexOutOfBoundsException if the value
4910
* {@code index} is negative or not less than
4911
* the length of the {@code char} array.
4912
* @since 1.5
4913
*/
4914
public static int codePointAt(char[] a, int index) {
4915
return codePointAtImpl(a, index, a.length);
4916
}
4917
4918
/**
4919
* Returns the code point at the given index of the
4920
* {@code char} array, where only array elements with
4921
* {@code index} less than {@code limit} can be used. If
4922
* the {@code char} value at the given index in the
4923
* {@code char} array is in the high-surrogate range, the
4924
* following index is less than the {@code limit}, and the
4925
* {@code char} value at the following index is in the
4926
* low-surrogate range, then the supplementary code point
4927
* corresponding to this surrogate pair is returned. Otherwise,
4928
* the {@code char} value at the given index is returned.
4929
*
4930
* @param a the {@code char} array
4931
* @param index the index to the {@code char} values (Unicode
4932
* code units) in the {@code char} array to be converted
4933
* @param limit the index after the last array element that
4934
* can be used in the {@code char} array
4935
* @return the Unicode code point at the given index
4936
* @exception NullPointerException if {@code a} is null.
4937
* @exception IndexOutOfBoundsException if the {@code index}
4938
* argument is negative or not less than the {@code limit}
4939
* argument, or if the {@code limit} argument is negative or
4940
* greater than the length of the {@code char} array.
4941
* @since 1.5
4942
*/
4943
public static int codePointAt(char[] a, int index, int limit) {
4944
if (index >= limit || limit < 0 || limit > a.length) {
4945
throw new IndexOutOfBoundsException();
4946
}
4947
return codePointAtImpl(a, index, limit);
4948
}
4949
4950
// throws ArrayIndexOutOfBoundsException if index out of bounds
4951
static int codePointAtImpl(char[] a, int index, int limit) {
4952
char c1 = a[index];
4953
if (isHighSurrogate(c1) && ++index < limit) {
4954
char c2 = a[index];
4955
if (isLowSurrogate(c2)) {
4956
return toCodePoint(c1, c2);
4957
}
4958
}
4959
return c1;
4960
}
4961
4962
/**
4963
* Returns the code point preceding the given index of the
4964
* {@code CharSequence}. If the {@code char} value at
4965
* {@code (index - 1)} in the {@code CharSequence} is in
4966
* the low-surrogate range, {@code (index - 2)} is not
4967
* negative, and the {@code char} value at {@code (index - 2)}
4968
* in the {@code CharSequence} is in the
4969
* high-surrogate range, then the supplementary code point
4970
* corresponding to this surrogate pair is returned. Otherwise,
4971
* the {@code char} value at {@code (index - 1)} is
4972
* returned.
4973
*
4974
* @param seq the {@code CharSequence} instance
4975
* @param index the index following the code point that should be returned
4976
* @return the Unicode code point value before the given index.
4977
* @exception NullPointerException if {@code seq} is null.
4978
* @exception IndexOutOfBoundsException if the {@code index}
4979
* argument is less than 1 or greater than {@link
4980
* CharSequence#length() seq.length()}.
4981
* @since 1.5
4982
*/
4983
public static int codePointBefore(CharSequence seq, int index) {
4984
char c2 = seq.charAt(--index);
4985
if (isLowSurrogate(c2) && index > 0) {
4986
char c1 = seq.charAt(--index);
4987
if (isHighSurrogate(c1)) {
4988
return toCodePoint(c1, c2);
4989
}
4990
}
4991
return c2;
4992
}
4993
4994
/**
4995
* Returns the code point preceding the given index of the
4996
* {@code char} array. If the {@code char} value at
4997
* {@code (index - 1)} in the {@code char} array is in
4998
* the low-surrogate range, {@code (index - 2)} is not
4999
* negative, and the {@code char} value at {@code (index - 2)}
5000
* in the {@code char} array is in the
5001
* high-surrogate range, then the supplementary code point
5002
* corresponding to this surrogate pair is returned. Otherwise,
5003
* the {@code char} value at {@code (index - 1)} is
5004
* returned.
5005
*
5006
* @param a the {@code char} array
5007
* @param index the index following the code point that should be returned
5008
* @return the Unicode code point value before the given index.
5009
* @exception NullPointerException if {@code a} is null.
5010
* @exception IndexOutOfBoundsException if the {@code index}
5011
* argument is less than 1 or greater than the length of the
5012
* {@code char} array
5013
* @since 1.5
5014
*/
5015
public static int codePointBefore(char[] a, int index) {
5016
return codePointBeforeImpl(a, index, 0);
5017
}
5018
5019
/**
5020
* Returns the code point preceding the given index of the
5021
* {@code char} array, where only array elements with
5022
* {@code index} greater than or equal to {@code start}
5023
* can be used. If the {@code char} value at {@code (index - 1)}
5024
* in the {@code char} array is in the
5025
* low-surrogate range, {@code (index - 2)} is not less than
5026
* {@code start}, and the {@code char} value at
5027
* {@code (index - 2)} in the {@code char} array is in
5028
* the high-surrogate range, then the supplementary code point
5029
* corresponding to this surrogate pair is returned. Otherwise,
5030
* the {@code char} value at {@code (index - 1)} is
5031
* returned.
5032
*
5033
* @param a the {@code char} array
5034
* @param index the index following the code point that should be returned
5035
* @param start the index of the first array element in the
5036
* {@code char} array
5037
* @return the Unicode code point value before the given index.
5038
* @exception NullPointerException if {@code a} is null.
5039
* @exception IndexOutOfBoundsException if the {@code index}
5040
* argument is not greater than the {@code start} argument or
5041
* is greater than the length of the {@code char} array, or
5042
* if the {@code start} argument is negative or not less than
5043
* the length of the {@code char} array.
5044
* @since 1.5
5045
*/
5046
public static int codePointBefore(char[] a, int index, int start) {
5047
if (index <= start || start < 0 || start >= a.length) {
5048
throw new IndexOutOfBoundsException();
5049
}
5050
return codePointBeforeImpl(a, index, start);
5051
}
5052
5053
// throws ArrayIndexOutOfBoundsException if index-1 out of bounds
5054
static int codePointBeforeImpl(char[] a, int index, int start) {
5055
char c2 = a[--index];
5056
if (isLowSurrogate(c2) && index > start) {
5057
char c1 = a[--index];
5058
if (isHighSurrogate(c1)) {
5059
return toCodePoint(c1, c2);
5060
}
5061
}
5062
return c2;
5063
}
5064
5065
/**
5066
* Returns the leading surrogate (a
5067
* <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
5068
* high surrogate code unit</a>) of the
5069
* <a href="http://www.unicode.org/glossary/#surrogate_pair">
5070
* surrogate pair</a>
5071
* representing the specified supplementary character (Unicode
5072
* code point) in the UTF-16 encoding. If the specified character
5073
* is not a
5074
* <a href="Character.html#supplementary">supplementary character</a>,
5075
* an unspecified {@code char} is returned.
5076
*
5077
* <p>If
5078
* {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
5079
* is {@code true}, then
5080
* {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
5081
* {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
5082
* are also always {@code true}.
5083
*
5084
* @param codePoint a supplementary character (Unicode code point)
5085
* @return the leading surrogate code unit used to represent the
5086
* character in the UTF-16 encoding
5087
* @since 1.7
5088
*/
5089
public static char highSurrogate(int codePoint) {
5090
return (char) ((codePoint >>> 10)
5091
+ (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
5092
}
5093
5094
/**
5095
* Returns the trailing surrogate (a
5096
* <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
5097
* low surrogate code unit</a>) of the
5098
* <a href="http://www.unicode.org/glossary/#surrogate_pair">
5099
* surrogate pair</a>
5100
* representing the specified supplementary character (Unicode
5101
* code point) in the UTF-16 encoding. If the specified character
5102
* is not a
5103
* <a href="Character.html#supplementary">supplementary character</a>,
5104
* an unspecified {@code char} is returned.
5105
*
5106
* <p>If
5107
* {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
5108
* is {@code true}, then
5109
* {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
5110
* {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
5111
* are also always {@code true}.
5112
*
5113
* @param codePoint a supplementary character (Unicode code point)
5114
* @return the trailing surrogate code unit used to represent the
5115
* character in the UTF-16 encoding
5116
* @since 1.7
5117
*/
5118
public static char lowSurrogate(int codePoint) {
5119
return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
5120
}
5121
5122
/**
5123
* Converts the specified character (Unicode code point) to its
5124
* UTF-16 representation. If the specified code point is a BMP
5125
* (Basic Multilingual Plane or Plane 0) value, the same value is
5126
* stored in {@code dst[dstIndex]}, and 1 is returned. If the
5127
* specified code point is a supplementary character, its
5128
* surrogate values are stored in {@code dst[dstIndex]}
5129
* (high-surrogate) and {@code dst[dstIndex+1]}
5130
* (low-surrogate), and 2 is returned.
5131
*
5132
* @param codePoint the character (Unicode code point) to be converted.
5133
* @param dst an array of {@code char} in which the
5134
* {@code codePoint}'s UTF-16 value is stored.
5135
* @param dstIndex the start index into the {@code dst}
5136
* array where the converted value is stored.
5137
* @return 1 if the code point is a BMP code point, 2 if the
5138
* code point is a supplementary code point.
5139
* @exception IllegalArgumentException if the specified
5140
* {@code codePoint} is not a valid Unicode code point.
5141
* @exception NullPointerException if the specified {@code dst} is null.
5142
* @exception IndexOutOfBoundsException if {@code dstIndex}
5143
* is negative or not less than {@code dst.length}, or if
5144
* {@code dst} at {@code dstIndex} doesn't have enough
5145
* array element(s) to store the resulting {@code char}
5146
* value(s). (If {@code dstIndex} is equal to
5147
* {@code dst.length-1} and the specified
5148
* {@code codePoint} is a supplementary character, the
5149
* high-surrogate value is not stored in
5150
* {@code dst[dstIndex]}.)
5151
* @since 1.5
5152
*/
5153
public static int toChars(int codePoint, char[] dst, int dstIndex) {
5154
if (isBmpCodePoint(codePoint)) {
5155
dst[dstIndex] = (char) codePoint;
5156
return 1;
5157
} else if (isValidCodePoint(codePoint)) {
5158
toSurrogates(codePoint, dst, dstIndex);
5159
return 2;
5160
} else {
5161
throw new IllegalArgumentException();
5162
}
5163
}
5164
5165
/**
5166
* Converts the specified character (Unicode code point) to its
5167
* UTF-16 representation stored in a {@code char} array. If
5168
* the specified code point is a BMP (Basic Multilingual Plane or
5169
* Plane 0) value, the resulting {@code char} array has
5170
* the same value as {@code codePoint}. If the specified code
5171
* point is a supplementary code point, the resulting
5172
* {@code char} array has the corresponding surrogate pair.
5173
*
5174
* @param codePoint a Unicode code point
5175
* @return a {@code char} array having
5176
* {@code codePoint}'s UTF-16 representation.
5177
* @exception IllegalArgumentException if the specified
5178
* {@code codePoint} is not a valid Unicode code point.
5179
* @since 1.5
5180
*/
5181
public static char[] toChars(int codePoint) {
5182
if (isBmpCodePoint(codePoint)) {
5183
return new char[] { (char) codePoint };
5184
} else if (isValidCodePoint(codePoint)) {
5185
char[] result = new char[2];
5186
toSurrogates(codePoint, result, 0);
5187
return result;
5188
} else {
5189
throw new IllegalArgumentException();
5190
}
5191
}
5192
5193
static void toSurrogates(int codePoint, char[] dst, int index) {
5194
// We write elements "backwards" to guarantee all-or-nothing
5195
dst[index+1] = lowSurrogate(codePoint);
5196
dst[index] = highSurrogate(codePoint);
5197
}
5198
5199
/**
5200
* Returns the number of Unicode code points in the text range of
5201
* the specified char sequence. The text range begins at the
5202
* specified {@code beginIndex} and extends to the
5203
* {@code char} at index {@code endIndex - 1}. Thus the
5204
* length (in {@code char}s) of the text range is
5205
* {@code endIndex-beginIndex}. Unpaired surrogates within
5206
* the text range count as one code point each.
5207
*
5208
* @param seq the char sequence
5209
* @param beginIndex the index to the first {@code char} of
5210
* the text range.
5211
* @param endIndex the index after the last {@code char} of
5212
* the text range.
5213
* @return the number of Unicode code points in the specified text
5214
* range
5215
* @exception NullPointerException if {@code seq} is null.
5216
* @exception IndexOutOfBoundsException if the
5217
* {@code beginIndex} is negative, or {@code endIndex}
5218
* is larger than the length of the given sequence, or
5219
* {@code beginIndex} is larger than {@code endIndex}.
5220
* @since 1.5
5221
*/
5222
public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
5223
int length = seq.length();
5224
if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
5225
throw new IndexOutOfBoundsException();
5226
}
5227
int n = endIndex - beginIndex;
5228
for (int i = beginIndex; i < endIndex; ) {
5229
if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
5230
isLowSurrogate(seq.charAt(i))) {
5231
n--;
5232
i++;
5233
}
5234
}
5235
return n;
5236
}
5237
5238
/**
5239
* Returns the number of Unicode code points in a subarray of the
5240
* {@code char} array argument. The {@code offset}
5241
* argument is the index of the first {@code char} of the
5242
* subarray and the {@code count} argument specifies the
5243
* length of the subarray in {@code char}s. Unpaired
5244
* surrogates within the subarray count as one code point each.
5245
*
5246
* @param a the {@code char} array
5247
* @param offset the index of the first {@code char} in the
5248
* given {@code char} array
5249
* @param count the length of the subarray in {@code char}s
5250
* @return the number of Unicode code points in the specified subarray
5251
* @exception NullPointerException if {@code a} is null.
5252
* @exception IndexOutOfBoundsException if {@code offset} or
5253
* {@code count} is negative, or if {@code offset +
5254
* count} is larger than the length of the given array.
5255
* @since 1.5
5256
*/
5257
public static int codePointCount(char[] a, int offset, int count) {
5258
if (count > a.length - offset || offset < 0 || count < 0) {
5259
throw new IndexOutOfBoundsException();
5260
}
5261
return codePointCountImpl(a, offset, count);
5262
}
5263
5264
static int codePointCountImpl(char[] a, int offset, int count) {
5265
int endIndex = offset + count;
5266
int n = count;
5267
for (int i = offset; i < endIndex; ) {
5268
if (isHighSurrogate(a[i++]) && i < endIndex &&
5269
isLowSurrogate(a[i])) {
5270
n--;
5271
i++;
5272
}
5273
}
5274
return n;
5275
}
5276
5277
/**
5278
* Returns the index within the given char sequence that is offset
5279
* from the given {@code index} by {@code codePointOffset}
5280
* code points. Unpaired surrogates within the text range given by
5281
* {@code index} and {@code codePointOffset} count as
5282
* one code point each.
5283
*
5284
* @param seq the char sequence
5285
* @param index the index to be offset
5286
* @param codePointOffset the offset in code points
5287
* @return the index within the char sequence
5288
* @exception NullPointerException if {@code seq} is null.
5289
* @exception IndexOutOfBoundsException if {@code index}
5290
* is negative or larger then the length of the char sequence,
5291
* or if {@code codePointOffset} is positive and the
5292
* subsequence starting with {@code index} has fewer than
5293
* {@code codePointOffset} code points, or if
5294
* {@code codePointOffset} is negative and the subsequence
5295
* before {@code index} has fewer than the absolute value
5296
* of {@code codePointOffset} code points.
5297
* @since 1.5
5298
*/
5299
public static int offsetByCodePoints(CharSequence seq, int index,
5300
int codePointOffset) {
5301
int length = seq.length();
5302
if (index < 0 || index > length) {
5303
throw new IndexOutOfBoundsException();
5304
}
5305
5306
int x = index;
5307
if (codePointOffset >= 0) {
5308
int i;
5309
for (i = 0; x < length && i < codePointOffset; i++) {
5310
if (isHighSurrogate(seq.charAt(x++)) && x < length &&
5311
isLowSurrogate(seq.charAt(x))) {
5312
x++;
5313
}
5314
}
5315
if (i < codePointOffset) {
5316
throw new IndexOutOfBoundsException();
5317
}
5318
} else {
5319
int i;
5320
for (i = codePointOffset; x > 0 && i < 0; i++) {
5321
if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
5322
isHighSurrogate(seq.charAt(x-1))) {
5323
x--;
5324
}
5325
}
5326
if (i < 0) {
5327
throw new IndexOutOfBoundsException();
5328
}
5329
}
5330
return x;
5331
}
5332
5333
/**
5334
* Returns the index within the given {@code char} subarray
5335
* that is offset from the given {@code index} by
5336
* {@code codePointOffset} code points. The
5337
* {@code start} and {@code count} arguments specify a
5338
* subarray of the {@code char} array. Unpaired surrogates
5339
* within the text range given by {@code index} and
5340
* {@code codePointOffset} count as one code point each.
5341
*
5342
* @param a the {@code char} array
5343
* @param start the index of the first {@code char} of the
5344
* subarray
5345
* @param count the length of the subarray in {@code char}s
5346
* @param index the index to be offset
5347
* @param codePointOffset the offset in code points
5348
* @return the index within the subarray
5349
* @exception NullPointerException if {@code a} is null.
5350
* @exception IndexOutOfBoundsException
5351
* if {@code start} or {@code count} is negative,
5352
* or if {@code start + count} is larger than the length of
5353
* the given array,
5354
* or if {@code index} is less than {@code start} or
5355
* larger then {@code start + count},
5356
* or if {@code codePointOffset} is positive and the text range
5357
* starting with {@code index} and ending with {@code start + count - 1}
5358
* has fewer than {@code codePointOffset} code
5359
* points,
5360
* or if {@code codePointOffset} is negative and the text range
5361
* starting with {@code start} and ending with {@code index - 1}
5362
* has fewer than the absolute value of
5363
* {@code codePointOffset} code points.
5364
* @since 1.5
5365
*/
5366
public static int offsetByCodePoints(char[] a, int start, int count,
5367
int index, int codePointOffset) {
5368
if (count > a.length-start || start < 0 || count < 0
5369
|| index < start || index > start+count) {
5370
throw new IndexOutOfBoundsException();
5371
}
5372
return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
5373
}
5374
5375
static int offsetByCodePointsImpl(char[]a, int start, int count,
5376
int index, int codePointOffset) {
5377
int x = index;
5378
if (codePointOffset >= 0) {
5379
int limit = start + count;
5380
int i;
5381
for (i = 0; x < limit && i < codePointOffset; i++) {
5382
if (isHighSurrogate(a[x++]) && x < limit &&
5383
isLowSurrogate(a[x])) {
5384
x++;
5385
}
5386
}
5387
if (i < codePointOffset) {
5388
throw new IndexOutOfBoundsException();
5389
}
5390
} else {
5391
int i;
5392
for (i = codePointOffset; x > start && i < 0; i++) {
5393
if (isLowSurrogate(a[--x]) && x > start &&
5394
isHighSurrogate(a[x-1])) {
5395
x--;
5396
}
5397
}
5398
if (i < 0) {
5399
throw new IndexOutOfBoundsException();
5400
}
5401
}
5402
return x;
5403
}
5404
5405
/**
5406
* Determines if the specified character is a lowercase character.
5407
* <p>
5408
* A character is lowercase if its general category type, provided
5409
* by {@code Character.getType(ch)}, is
5410
* {@code LOWERCASE_LETTER}, or it has contributory property
5411
* Other_Lowercase as defined by the Unicode Standard.
5412
* <p>
5413
* The following are examples of lowercase characters:
5414
* <blockquote><pre>
5415
* a b c d e f g h i j k l m n o p q r s t u v w x y z
5416
* '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
5417
* '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
5418
* '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
5419
* '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
5420
* </pre></blockquote>
5421
* <p> Many other Unicode characters are lowercase too.
5422
*
5423
* <p><b>Note:</b> This method cannot handle <a
5424
* href="#supplementary"> supplementary characters</a>. To support
5425
* all Unicode characters, including supplementary characters, use
5426
* the {@link #isLowerCase(int)} method.
5427
*
5428
* @param ch the character to be tested.
5429
* @return {@code true} if the character is lowercase;
5430
* {@code false} otherwise.
5431
* @see Character#isLowerCase(char)
5432
* @see Character#isTitleCase(char)
5433
* @see Character#toLowerCase(char)
5434
* @see Character#getType(char)
5435
*/
5436
public static boolean isLowerCase(char ch) {
5437
return isLowerCase((int)ch);
5438
}
5439
5440
/**
5441
* Determines if the specified character (Unicode code point) is a
5442
* lowercase character.
5443
* <p>
5444
* A character is lowercase if its general category type, provided
5445
* by {@link Character#getType getType(codePoint)}, is
5446
* {@code LOWERCASE_LETTER}, or it has contributory property
5447
* Other_Lowercase as defined by the Unicode Standard.
5448
* <p>
5449
* The following are examples of lowercase characters:
5450
* <blockquote><pre>
5451
* a b c d e f g h i j k l m n o p q r s t u v w x y z
5452
* '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
5453
* '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
5454
* '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
5455
* '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
5456
* </pre></blockquote>
5457
* <p> Many other Unicode characters are lowercase too.
5458
*
5459
* @param codePoint the character (Unicode code point) to be tested.
5460
* @return {@code true} if the character is lowercase;
5461
* {@code false} otherwise.
5462
* @see Character#isLowerCase(int)
5463
* @see Character#isTitleCase(int)
5464
* @see Character#toLowerCase(int)
5465
* @see Character#getType(int)
5466
* @since 1.5
5467
*/
5468
public static boolean isLowerCase(int codePoint) {
5469
return getType(codePoint) == Character.LOWERCASE_LETTER ||
5470
CharacterData.of(codePoint).isOtherLowercase(codePoint);
5471
}
5472
5473
/**
5474
* Determines if the specified character is an uppercase character.
5475
* <p>
5476
* A character is uppercase if its general category type, provided by
5477
* {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
5478
* or it has contributory property Other_Uppercase as defined by the Unicode Standard.
5479
* <p>
5480
* The following are examples of uppercase characters:
5481
* <blockquote><pre>
5482
* A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5483
* '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
5484
* '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
5485
* '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
5486
* '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
5487
* </pre></blockquote>
5488
* <p> Many other Unicode characters are uppercase too.
5489
*
5490
* <p><b>Note:</b> This method cannot handle <a
5491
* href="#supplementary"> supplementary characters</a>. To support
5492
* all Unicode characters, including supplementary characters, use
5493
* the {@link #isUpperCase(int)} method.
5494
*
5495
* @param ch the character to be tested.
5496
* @return {@code true} if the character is uppercase;
5497
* {@code false} otherwise.
5498
* @see Character#isLowerCase(char)
5499
* @see Character#isTitleCase(char)
5500
* @see Character#toUpperCase(char)
5501
* @see Character#getType(char)
5502
* @since 1.0
5503
*/
5504
public static boolean isUpperCase(char ch) {
5505
return isUpperCase((int)ch);
5506
}
5507
5508
/**
5509
* Determines if the specified character (Unicode code point) is an uppercase character.
5510
* <p>
5511
* A character is uppercase if its general category type, provided by
5512
* {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},
5513
* or it has contributory property Other_Uppercase as defined by the Unicode Standard.
5514
* <p>
5515
* The following are examples of uppercase characters:
5516
* <blockquote><pre>
5517
* A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5518
* '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
5519
* '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
5520
* '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
5521
* '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
5522
* </pre></blockquote>
5523
* <p> Many other Unicode characters are uppercase too.<p>
5524
*
5525
* @param codePoint the character (Unicode code point) to be tested.
5526
* @return {@code true} if the character is uppercase;
5527
* {@code false} otherwise.
5528
* @see Character#isLowerCase(int)
5529
* @see Character#isTitleCase(int)
5530
* @see Character#toUpperCase(int)
5531
* @see Character#getType(int)
5532
* @since 1.5
5533
*/
5534
public static boolean isUpperCase(int codePoint) {
5535
return getType(codePoint) == Character.UPPERCASE_LETTER ||
5536
CharacterData.of(codePoint).isOtherUppercase(codePoint);
5537
}
5538
5539
/**
5540
* Determines if the specified character is a titlecase character.
5541
* <p>
5542
* A character is a titlecase character if its general
5543
* category type, provided by {@code Character.getType(ch)},
5544
* is {@code TITLECASE_LETTER}.
5545
* <p>
5546
* Some characters look like pairs of Latin letters. For example, there
5547
* is an uppercase letter that looks like "LJ" and has a corresponding
5548
* lowercase letter that looks like "lj". A third form, which looks like "Lj",
5549
* is the appropriate form to use when rendering a word in lowercase
5550
* with initial capitals, as for a book title.
5551
* <p>
5552
* These are some of the Unicode characters for which this method returns
5553
* {@code true}:
5554
* <ul>
5555
* <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
5556
* <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
5557
* <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
5558
* <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
5559
* </ul>
5560
* <p> Many other Unicode characters are titlecase too.
5561
*
5562
* <p><b>Note:</b> This method cannot handle <a
5563
* href="#supplementary"> supplementary characters</a>. To support
5564
* all Unicode characters, including supplementary characters, use
5565
* the {@link #isTitleCase(int)} method.
5566
*
5567
* @param ch the character to be tested.
5568
* @return {@code true} if the character is titlecase;
5569
* {@code false} otherwise.
5570
* @see Character#isLowerCase(char)
5571
* @see Character#isUpperCase(char)
5572
* @see Character#toTitleCase(char)
5573
* @see Character#getType(char)
5574
* @since 1.0.2
5575
*/
5576
public static boolean isTitleCase(char ch) {
5577
return isTitleCase((int)ch);
5578
}
5579
5580
/**
5581
* Determines if the specified character (Unicode code point) is a titlecase character.
5582
* <p>
5583
* A character is a titlecase character if its general
5584
* category type, provided by {@link Character#getType(int) getType(codePoint)},
5585
* is {@code TITLECASE_LETTER}.
5586
* <p>
5587
* Some characters look like pairs of Latin letters. For example, there
5588
* is an uppercase letter that looks like "LJ" and has a corresponding
5589
* lowercase letter that looks like "lj". A third form, which looks like "Lj",
5590
* is the appropriate form to use when rendering a word in lowercase
5591
* with initial capitals, as for a book title.
5592
* <p>
5593
* These are some of the Unicode characters for which this method returns
5594
* {@code true}:
5595
* <ul>
5596
* <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
5597
* <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
5598
* <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
5599
* <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
5600
* </ul>
5601
* <p> Many other Unicode characters are titlecase too.<p>
5602
*
5603
* @param codePoint the character (Unicode code point) to be tested.
5604
* @return {@code true} if the character is titlecase;
5605
* {@code false} otherwise.
5606
* @see Character#isLowerCase(int)
5607
* @see Character#isUpperCase(int)
5608
* @see Character#toTitleCase(int)
5609
* @see Character#getType(int)
5610
* @since 1.5
5611
*/
5612
public static boolean isTitleCase(int codePoint) {
5613
return getType(codePoint) == Character.TITLECASE_LETTER;
5614
}
5615
5616
/**
5617
* Determines if the specified character is a digit.
5618
* <p>
5619
* A character is a digit if its general category type, provided
5620
* by {@code Character.getType(ch)}, is
5621
* {@code DECIMAL_DIGIT_NUMBER}.
5622
* <p>
5623
* Some Unicode character ranges that contain digits:
5624
* <ul>
5625
* <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
5626
* ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
5627
* <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
5628
* Arabic-Indic digits
5629
* <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
5630
* Extended Arabic-Indic digits
5631
* <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
5632
* Devanagari digits
5633
* <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
5634
* Fullwidth digits
5635
* </ul>
5636
*
5637
* Many other character ranges contain digits as well.
5638
*
5639
* <p><b>Note:</b> This method cannot handle <a
5640
* href="#supplementary"> supplementary characters</a>. To support
5641
* all Unicode characters, including supplementary characters, use
5642
* the {@link #isDigit(int)} method.
5643
*
5644
* @param ch the character to be tested.
5645
* @return {@code true} if the character is a digit;
5646
* {@code false} otherwise.
5647
* @see Character#digit(char, int)
5648
* @see Character#forDigit(int, int)
5649
* @see Character#getType(char)
5650
*/
5651
public static boolean isDigit(char ch) {
5652
return isDigit((int)ch);
5653
}
5654
5655
/**
5656
* Determines if the specified character (Unicode code point) is a digit.
5657
* <p>
5658
* A character is a digit if its general category type, provided
5659
* by {@link Character#getType(int) getType(codePoint)}, is
5660
* {@code DECIMAL_DIGIT_NUMBER}.
5661
* <p>
5662
* Some Unicode character ranges that contain digits:
5663
* <ul>
5664
* <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
5665
* ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
5666
* <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
5667
* Arabic-Indic digits
5668
* <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
5669
* Extended Arabic-Indic digits
5670
* <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
5671
* Devanagari digits
5672
* <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
5673
* Fullwidth digits
5674
* </ul>
5675
*
5676
* Many other character ranges contain digits as well.
5677
*
5678
* @param codePoint the character (Unicode code point) to be tested.
5679
* @return {@code true} if the character is a digit;
5680
* {@code false} otherwise.
5681
* @see Character#forDigit(int, int)
5682
* @see Character#getType(int)
5683
* @since 1.5
5684
*/
5685
public static boolean isDigit(int codePoint) {
5686
return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER;
5687
}
5688
5689
/**
5690
* Determines if a character is defined in Unicode.
5691
* <p>
5692
* A character is defined if at least one of the following is true:
5693
* <ul>
5694
* <li>It has an entry in the UnicodeData file.
5695
* <li>It has a value in a range defined by the UnicodeData file.
5696
* </ul>
5697
*
5698
* <p><b>Note:</b> This method cannot handle <a
5699
* href="#supplementary"> supplementary characters</a>. To support
5700
* all Unicode characters, including supplementary characters, use
5701
* the {@link #isDefined(int)} method.
5702
*
5703
* @param ch the character to be tested
5704
* @return {@code true} if the character has a defined meaning
5705
* in Unicode; {@code false} otherwise.
5706
* @see Character#isDigit(char)
5707
* @see Character#isLetter(char)
5708
* @see Character#isLetterOrDigit(char)
5709
* @see Character#isLowerCase(char)
5710
* @see Character#isTitleCase(char)
5711
* @see Character#isUpperCase(char)
5712
* @since 1.0.2
5713
*/
5714
public static boolean isDefined(char ch) {
5715
return isDefined((int)ch);
5716
}
5717
5718
/**
5719
* Determines if a character (Unicode code point) is defined in Unicode.
5720
* <p>
5721
* A character is defined if at least one of the following is true:
5722
* <ul>
5723
* <li>It has an entry in the UnicodeData file.
5724
* <li>It has a value in a range defined by the UnicodeData file.
5725
* </ul>
5726
*
5727
* @param codePoint the character (Unicode code point) to be tested.
5728
* @return {@code true} if the character has a defined meaning
5729
* in Unicode; {@code false} otherwise.
5730
* @see Character#isDigit(int)
5731
* @see Character#isLetter(int)
5732
* @see Character#isLetterOrDigit(int)
5733
* @see Character#isLowerCase(int)
5734
* @see Character#isTitleCase(int)
5735
* @see Character#isUpperCase(int)
5736
* @since 1.5
5737
*/
5738
public static boolean isDefined(int codePoint) {
5739
return getType(codePoint) != Character.UNASSIGNED;
5740
}
5741
5742
/**
5743
* Determines if the specified character is a letter.
5744
* <p>
5745
* A character is considered to be a letter if its general
5746
* category type, provided by {@code Character.getType(ch)},
5747
* is any of the following:
5748
* <ul>
5749
* <li> {@code UPPERCASE_LETTER}
5750
* <li> {@code LOWERCASE_LETTER}
5751
* <li> {@code TITLECASE_LETTER}
5752
* <li> {@code MODIFIER_LETTER}
5753
* <li> {@code OTHER_LETTER}
5754
* </ul>
5755
*
5756
* Not all letters have case. Many characters are
5757
* letters but are neither uppercase nor lowercase nor titlecase.
5758
*
5759
* <p><b>Note:</b> This method cannot handle <a
5760
* href="#supplementary"> supplementary characters</a>. To support
5761
* all Unicode characters, including supplementary characters, use
5762
* the {@link #isLetter(int)} method.
5763
*
5764
* @param ch the character to be tested.
5765
* @return {@code true} if the character is a letter;
5766
* {@code false} otherwise.
5767
* @see Character#isDigit(char)
5768
* @see Character#isJavaIdentifierStart(char)
5769
* @see Character#isJavaLetter(char)
5770
* @see Character#isJavaLetterOrDigit(char)
5771
* @see Character#isLetterOrDigit(char)
5772
* @see Character#isLowerCase(char)
5773
* @see Character#isTitleCase(char)
5774
* @see Character#isUnicodeIdentifierStart(char)
5775
* @see Character#isUpperCase(char)
5776
*/
5777
public static boolean isLetter(char ch) {
5778
return isLetter((int)ch);
5779
}
5780
5781
/**
5782
* Determines if the specified character (Unicode code point) is a letter.
5783
* <p>
5784
* A character is considered to be a letter if its general
5785
* category type, provided by {@link Character#getType(int) getType(codePoint)},
5786
* is any of the following:
5787
* <ul>
5788
* <li> {@code UPPERCASE_LETTER}
5789
* <li> {@code LOWERCASE_LETTER}
5790
* <li> {@code TITLECASE_LETTER}
5791
* <li> {@code MODIFIER_LETTER}
5792
* <li> {@code OTHER_LETTER}
5793
* </ul>
5794
*
5795
* Not all letters have case. Many characters are
5796
* letters but are neither uppercase nor lowercase nor titlecase.
5797
*
5798
* @param codePoint the character (Unicode code point) to be tested.
5799
* @return {@code true} if the character is a letter;
5800
* {@code false} otherwise.
5801
* @see Character#isDigit(int)
5802
* @see Character#isJavaIdentifierStart(int)
5803
* @see Character#isLetterOrDigit(int)
5804
* @see Character#isLowerCase(int)
5805
* @see Character#isTitleCase(int)
5806
* @see Character#isUnicodeIdentifierStart(int)
5807
* @see Character#isUpperCase(int)
5808
* @since 1.5
5809
*/
5810
public static boolean isLetter(int codePoint) {
5811
return ((((1 << Character.UPPERCASE_LETTER) |
5812
(1 << Character.LOWERCASE_LETTER) |
5813
(1 << Character.TITLECASE_LETTER) |
5814
(1 << Character.MODIFIER_LETTER) |
5815
(1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1)
5816
!= 0;
5817
}
5818
5819
/**
5820
* Determines if the specified character is a letter or digit.
5821
* <p>
5822
* A character is considered to be a letter or digit if either
5823
* {@code Character.isLetter(char ch)} or
5824
* {@code Character.isDigit(char ch)} returns
5825
* {@code true} for the character.
5826
*
5827
* <p><b>Note:</b> This method cannot handle <a
5828
* href="#supplementary"> supplementary characters</a>. To support
5829
* all Unicode characters, including supplementary characters, use
5830
* the {@link #isLetterOrDigit(int)} method.
5831
*
5832
* @param ch the character to be tested.
5833
* @return {@code true} if the character is a letter or digit;
5834
* {@code false} otherwise.
5835
* @see Character#isDigit(char)
5836
* @see Character#isJavaIdentifierPart(char)
5837
* @see Character#isJavaLetter(char)
5838
* @see Character#isJavaLetterOrDigit(char)
5839
* @see Character#isLetter(char)
5840
* @see Character#isUnicodeIdentifierPart(char)
5841
* @since 1.0.2
5842
*/
5843
public static boolean isLetterOrDigit(char ch) {
5844
return isLetterOrDigit((int)ch);
5845
}
5846
5847
/**
5848
* Determines if the specified character (Unicode code point) is a letter or digit.
5849
* <p>
5850
* A character is considered to be a letter or digit if either
5851
* {@link #isLetter(int) isLetter(codePoint)} or
5852
* {@link #isDigit(int) isDigit(codePoint)} returns
5853
* {@code true} for the character.
5854
*
5855
* @param codePoint the character (Unicode code point) to be tested.
5856
* @return {@code true} if the character is a letter or digit;
5857
* {@code false} otherwise.
5858
* @see Character#isDigit(int)
5859
* @see Character#isJavaIdentifierPart(int)
5860
* @see Character#isLetter(int)
5861
* @see Character#isUnicodeIdentifierPart(int)
5862
* @since 1.5
5863
*/
5864
public static boolean isLetterOrDigit(int codePoint) {
5865
return ((((1 << Character.UPPERCASE_LETTER) |
5866
(1 << Character.LOWERCASE_LETTER) |
5867
(1 << Character.TITLECASE_LETTER) |
5868
(1 << Character.MODIFIER_LETTER) |
5869
(1 << Character.OTHER_LETTER) |
5870
(1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1)
5871
!= 0;
5872
}
5873
5874
/**
5875
* Determines if the specified character is permissible as the first
5876
* character in a Java identifier.
5877
* <p>
5878
* A character may start a Java identifier if and only if
5879
* one of the following conditions is true:
5880
* <ul>
5881
* <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
5882
* <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
5883
* <li> {@code ch} is a currency symbol (such as {@code '$'})
5884
* <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
5885
* </ul>
5886
*
5887
* These conditions are tested against the character information from version
5888
* 6.2 of the Unicode Standard.
5889
*
5890
* @param ch the character to be tested.
5891
* @return {@code true} if the character may start a Java
5892
* identifier; {@code false} otherwise.
5893
* @see Character#isJavaLetterOrDigit(char)
5894
* @see Character#isJavaIdentifierStart(char)
5895
* @see Character#isJavaIdentifierPart(char)
5896
* @see Character#isLetter(char)
5897
* @see Character#isLetterOrDigit(char)
5898
* @see Character#isUnicodeIdentifierStart(char)
5899
* @since 1.02
5900
* @deprecated Replaced by isJavaIdentifierStart(char).
5901
*/
5902
@Deprecated
5903
public static boolean isJavaLetter(char ch) {
5904
return isJavaIdentifierStart(ch);
5905
}
5906
5907
/**
5908
* Determines if the specified character may be part of a Java
5909
* identifier as other than the first character.
5910
* <p>
5911
* A character may be part of a Java identifier if and only if any
5912
* of the following conditions are true:
5913
* <ul>
5914
* <li> it is a letter
5915
* <li> it is a currency symbol (such as {@code '$'})
5916
* <li> it is a connecting punctuation character (such as {@code '_'})
5917
* <li> it is a digit
5918
* <li> it is a numeric letter (such as a Roman numeral character)
5919
* <li> it is a combining mark
5920
* <li> it is a non-spacing mark
5921
* <li> {@code isIdentifierIgnorable} returns
5922
* {@code true} for the character.
5923
* </ul>
5924
*
5925
* These conditions are tested against the character information from version
5926
* 6.2 of the Unicode Standard.
5927
*
5928
* @param ch the character to be tested.
5929
* @return {@code true} if the character may be part of a
5930
* Java identifier; {@code false} otherwise.
5931
* @see Character#isJavaLetter(char)
5932
* @see Character#isJavaIdentifierStart(char)
5933
* @see Character#isJavaIdentifierPart(char)
5934
* @see Character#isLetter(char)
5935
* @see Character#isLetterOrDigit(char)
5936
* @see Character#isUnicodeIdentifierPart(char)
5937
* @see Character#isIdentifierIgnorable(char)
5938
* @since 1.02
5939
* @deprecated Replaced by isJavaIdentifierPart(char).
5940
*/
5941
@Deprecated
5942
public static boolean isJavaLetterOrDigit(char ch) {
5943
return isJavaIdentifierPart(ch);
5944
}
5945
5946
/**
5947
* Determines if the specified character (Unicode code point) is an alphabet.
5948
* <p>
5949
* A character is considered to be alphabetic if its general category type,
5950
* provided by {@link Character#getType(int) getType(codePoint)}, is any of
5951
* the following:
5952
* <ul>
5953
* <li> <code>UPPERCASE_LETTER</code>
5954
* <li> <code>LOWERCASE_LETTER</code>
5955
* <li> <code>TITLECASE_LETTER</code>
5956
* <li> <code>MODIFIER_LETTER</code>
5957
* <li> <code>OTHER_LETTER</code>
5958
* <li> <code>LETTER_NUMBER</code>
5959
* </ul>
5960
* or it has contributory property Other_Alphabetic as defined by the
5961
* Unicode Standard.
5962
*
5963
* @param codePoint the character (Unicode code point) to be tested.
5964
* @return <code>true</code> if the character is a Unicode alphabet
5965
* character, <code>false</code> otherwise.
5966
* @since 1.7
5967
*/
5968
public static boolean isAlphabetic(int codePoint) {
5969
return (((((1 << Character.UPPERCASE_LETTER) |
5970
(1 << Character.LOWERCASE_LETTER) |
5971
(1 << Character.TITLECASE_LETTER) |
5972
(1 << Character.MODIFIER_LETTER) |
5973
(1 << Character.OTHER_LETTER) |
5974
(1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) ||
5975
CharacterData.of(codePoint).isOtherAlphabetic(codePoint);
5976
}
5977
5978
/**
5979
* Determines if the specified character (Unicode code point) is a CJKV
5980
* (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by
5981
* the Unicode Standard.
5982
*
5983
* @param codePoint the character (Unicode code point) to be tested.
5984
* @return <code>true</code> if the character is a Unicode ideograph
5985
* character, <code>false</code> otherwise.
5986
* @since 1.7
5987
*/
5988
public static boolean isIdeographic(int codePoint) {
5989
return CharacterData.of(codePoint).isIdeographic(codePoint);
5990
}
5991
5992
/**
5993
* Determines if the specified character is
5994
* permissible as the first character in a Java identifier.
5995
* <p>
5996
* A character may start a Java identifier if and only if
5997
* one of the following conditions is true:
5998
* <ul>
5999
* <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
6000
* <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
6001
* <li> {@code ch} is a currency symbol (such as {@code '$'})
6002
* <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
6003
* </ul>
6004
*
6005
* These conditions are tested against the character information from version
6006
* 6.2 of the Unicode Standard.
6007
*
6008
* <p><b>Note:</b> This method cannot handle <a
6009
* href="#supplementary"> supplementary characters</a>. To support
6010
* all Unicode characters, including supplementary characters, use
6011
* the {@link #isJavaIdentifierStart(int)} method.
6012
*
6013
* @param ch the character to be tested.
6014
* @return {@code true} if the character may start a Java identifier;
6015
* {@code false} otherwise.
6016
* @see Character#isJavaIdentifierPart(char)
6017
* @see Character#isLetter(char)
6018
* @see Character#isUnicodeIdentifierStart(char)
6019
* @see javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6020
* @since 1.1
6021
*/
6022
public static boolean isJavaIdentifierStart(char ch) {
6023
return isJavaIdentifierStart((int)ch);
6024
}
6025
6026
/**
6027
* Determines if the character (Unicode code point) is
6028
* permissible as the first character in a Java identifier.
6029
* <p>
6030
* A character may start a Java identifier if and only if
6031
* one of the following conditions is true:
6032
* <ul>
6033
* <li> {@link #isLetter(int) isLetter(codePoint)}
6034
* returns {@code true}
6035
* <li> {@link #getType(int) getType(codePoint)}
6036
* returns {@code LETTER_NUMBER}
6037
* <li> the referenced character is a currency symbol (such as {@code '$'})
6038
* <li> the referenced character is a connecting punctuation character
6039
* (such as {@code '_'}).
6040
* </ul>
6041
*
6042
* These conditions are tested against the character information from version
6043
* 6.2 of the Unicode Standard.
6044
*
6045
* @param codePoint the character (Unicode code point) to be tested.
6046
* @return {@code true} if the character may start a Java identifier;
6047
* {@code false} otherwise.
6048
* @see Character#isJavaIdentifierPart(int)
6049
* @see Character#isLetter(int)
6050
* @see Character#isUnicodeIdentifierStart(int)
6051
* @see javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6052
* @since 1.5
6053
*/
6054
public static boolean isJavaIdentifierStart(int codePoint) {
6055
return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint);
6056
}
6057
6058
/**
6059
* Determines if the specified character may be part of a Java
6060
* identifier as other than the first character.
6061
* <p>
6062
* A character may be part of a Java identifier if any of the following
6063
* conditions are true:
6064
* <ul>
6065
* <li> it is a letter
6066
* <li> it is a currency symbol (such as {@code '$'})
6067
* <li> it is a connecting punctuation character (such as {@code '_'})
6068
* <li> it is a digit
6069
* <li> it is a numeric letter (such as a Roman numeral character)
6070
* <li> it is a combining mark
6071
* <li> it is a non-spacing mark
6072
* <li> {@code isIdentifierIgnorable} returns
6073
* {@code true} for the character
6074
* </ul>
6075
*
6076
* These conditions are tested against the character information from version
6077
* 6.2 of the Unicode Standard.
6078
*
6079
* <p><b>Note:</b> This method cannot handle <a
6080
* href="#supplementary"> supplementary characters</a>. To support
6081
* all Unicode characters, including supplementary characters, use
6082
* the {@link #isJavaIdentifierPart(int)} method.
6083
*
6084
* @param ch the character to be tested.
6085
* @return {@code true} if the character may be part of a
6086
* Java identifier; {@code false} otherwise.
6087
* @see Character#isIdentifierIgnorable(char)
6088
* @see Character#isJavaIdentifierStart(char)
6089
* @see Character#isLetterOrDigit(char)
6090
* @see Character#isUnicodeIdentifierPart(char)
6091
* @see javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6092
* @since 1.1
6093
*/
6094
public static boolean isJavaIdentifierPart(char ch) {
6095
return isJavaIdentifierPart((int)ch);
6096
}
6097
6098
/**
6099
* Determines if the character (Unicode code point) may be part of a Java
6100
* identifier as other than the first character.
6101
* <p>
6102
* A character may be part of a Java identifier if any of the following
6103
* conditions are true:
6104
* <ul>
6105
* <li> it is a letter
6106
* <li> it is a currency symbol (such as {@code '$'})
6107
* <li> it is a connecting punctuation character (such as {@code '_'})
6108
* <li> it is a digit
6109
* <li> it is a numeric letter (such as a Roman numeral character)
6110
* <li> it is a combining mark
6111
* <li> it is a non-spacing mark
6112
* <li> {@link #isIdentifierIgnorable(int)
6113
* isIdentifierIgnorable(codePoint)} returns {@code true} for
6114
* the code point
6115
* </ul>
6116
*
6117
* These conditions are tested against the character information from version
6118
* 6.2 of the Unicode Standard.
6119
*
6120
* @param codePoint the character (Unicode code point) to be tested.
6121
* @return {@code true} if the character may be part of a
6122
* Java identifier; {@code false} otherwise.
6123
* @see Character#isIdentifierIgnorable(int)
6124
* @see Character#isJavaIdentifierStart(int)
6125
* @see Character#isLetterOrDigit(int)
6126
* @see Character#isUnicodeIdentifierPart(int)
6127
* @see javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6128
* @since 1.5
6129
*/
6130
public static boolean isJavaIdentifierPart(int codePoint) {
6131
return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint);
6132
}
6133
6134
/**
6135
* Determines if the specified character is permissible as the
6136
* first character in a Unicode identifier.
6137
* <p>
6138
* A character may start a Unicode identifier if and only if
6139
* one of the following conditions is true:
6140
* <ul>
6141
* <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
6142
* <li> {@link #getType(char) getType(ch)} returns
6143
* {@code LETTER_NUMBER}.
6144
* </ul>
6145
*
6146
* <p><b>Note:</b> This method cannot handle <a
6147
* href="#supplementary"> supplementary characters</a>. To support
6148
* all Unicode characters, including supplementary characters, use
6149
* the {@link #isUnicodeIdentifierStart(int)} method.
6150
*
6151
* @param ch the character to be tested.
6152
* @return {@code true} if the character may start a Unicode
6153
* identifier; {@code false} otherwise.
6154
* @see Character#isJavaIdentifierStart(char)
6155
* @see Character#isLetter(char)
6156
* @see Character#isUnicodeIdentifierPart(char)
6157
* @since 1.1
6158
*/
6159
public static boolean isUnicodeIdentifierStart(char ch) {
6160
return isUnicodeIdentifierStart((int)ch);
6161
}
6162
6163
/**
6164
* Determines if the specified character (Unicode code point) is permissible as the
6165
* first character in a Unicode identifier.
6166
* <p>
6167
* A character may start a Unicode identifier if and only if
6168
* one of the following conditions is true:
6169
* <ul>
6170
* <li> {@link #isLetter(int) isLetter(codePoint)}
6171
* returns {@code true}
6172
* <li> {@link #getType(int) getType(codePoint)}
6173
* returns {@code LETTER_NUMBER}.
6174
* </ul>
6175
* @param codePoint the character (Unicode code point) to be tested.
6176
* @return {@code true} if the character may start a Unicode
6177
* identifier; {@code false} otherwise.
6178
* @see Character#isJavaIdentifierStart(int)
6179
* @see Character#isLetter(int)
6180
* @see Character#isUnicodeIdentifierPart(int)
6181
* @since 1.5
6182
*/
6183
public static boolean isUnicodeIdentifierStart(int codePoint) {
6184
return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint);
6185
}
6186
6187
/**
6188
* Determines if the specified character may be part of a Unicode
6189
* identifier as other than the first character.
6190
* <p>
6191
* A character may be part of a Unicode identifier if and only if
6192
* one of the following statements is true:
6193
* <ul>
6194
* <li> it is a letter
6195
* <li> it is a connecting punctuation character (such as {@code '_'})
6196
* <li> it is a digit
6197
* <li> it is a numeric letter (such as a Roman numeral character)
6198
* <li> it is a combining mark
6199
* <li> it is a non-spacing mark
6200
* <li> {@code isIdentifierIgnorable} returns
6201
* {@code true} for this character.
6202
* </ul>
6203
*
6204
* <p><b>Note:</b> This method cannot handle <a
6205
* href="#supplementary"> supplementary characters</a>. To support
6206
* all Unicode characters, including supplementary characters, use
6207
* the {@link #isUnicodeIdentifierPart(int)} method.
6208
*
6209
* @param ch the character to be tested.
6210
* @return {@code true} if the character may be part of a
6211
* Unicode identifier; {@code false} otherwise.
6212
* @see Character#isIdentifierIgnorable(char)
6213
* @see Character#isJavaIdentifierPart(char)
6214
* @see Character#isLetterOrDigit(char)
6215
* @see Character#isUnicodeIdentifierStart(char)
6216
* @since 1.1
6217
*/
6218
public static boolean isUnicodeIdentifierPart(char ch) {
6219
return isUnicodeIdentifierPart((int)ch);
6220
}
6221
6222
/**
6223
* Determines if the specified character (Unicode code point) may be part of a Unicode
6224
* identifier as other than the first character.
6225
* <p>
6226
* A character may be part of a Unicode identifier if and only if
6227
* one of the following statements is true:
6228
* <ul>
6229
* <li> it is a letter
6230
* <li> it is a connecting punctuation character (such as {@code '_'})
6231
* <li> it is a digit
6232
* <li> it is a numeric letter (such as a Roman numeral character)
6233
* <li> it is a combining mark
6234
* <li> it is a non-spacing mark
6235
* <li> {@code isIdentifierIgnorable} returns
6236
* {@code true} for this character.
6237
* </ul>
6238
* @param codePoint the character (Unicode code point) to be tested.
6239
* @return {@code true} if the character may be part of a
6240
* Unicode identifier; {@code false} otherwise.
6241
* @see Character#isIdentifierIgnorable(int)
6242
* @see Character#isJavaIdentifierPart(int)
6243
* @see Character#isLetterOrDigit(int)
6244
* @see Character#isUnicodeIdentifierStart(int)
6245
* @since 1.5
6246
*/
6247
public static boolean isUnicodeIdentifierPart(int codePoint) {
6248
return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint);
6249
}
6250
6251
/**
6252
* Determines if the specified character should be regarded as
6253
* an ignorable character in a Java identifier or a Unicode identifier.
6254
* <p>
6255
* The following Unicode characters are ignorable in a Java identifier
6256
* or a Unicode identifier:
6257
* <ul>
6258
* <li>ISO control characters that are not whitespace
6259
* <ul>
6260
* <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
6261
* <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
6262
* <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
6263
* </ul>
6264
*
6265
* <li>all characters that have the {@code FORMAT} general
6266
* category value
6267
* </ul>
6268
*
6269
* <p><b>Note:</b> This method cannot handle <a
6270
* href="#supplementary"> supplementary characters</a>. To support
6271
* all Unicode characters, including supplementary characters, use
6272
* the {@link #isIdentifierIgnorable(int)} method.
6273
*
6274
* @param ch the character to be tested.
6275
* @return {@code true} if the character is an ignorable control
6276
* character that may be part of a Java or Unicode identifier;
6277
* {@code false} otherwise.
6278
* @see Character#isJavaIdentifierPart(char)
6279
* @see Character#isUnicodeIdentifierPart(char)
6280
* @since 1.1
6281
*/
6282
public static boolean isIdentifierIgnorable(char ch) {
6283
return isIdentifierIgnorable((int)ch);
6284
}
6285
6286
/**
6287
* Determines if the specified character (Unicode code point) should be regarded as
6288
* an ignorable character in a Java identifier or a Unicode identifier.
6289
* <p>
6290
* The following Unicode characters are ignorable in a Java identifier
6291
* or a Unicode identifier:
6292
* <ul>
6293
* <li>ISO control characters that are not whitespace
6294
* <ul>
6295
* <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
6296
* <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
6297
* <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
6298
* </ul>
6299
*
6300
* <li>all characters that have the {@code FORMAT} general
6301
* category value
6302
* </ul>
6303
*
6304
* @param codePoint the character (Unicode code point) to be tested.
6305
* @return {@code true} if the character is an ignorable control
6306
* character that may be part of a Java or Unicode identifier;
6307
* {@code false} otherwise.
6308
* @see Character#isJavaIdentifierPart(int)
6309
* @see Character#isUnicodeIdentifierPart(int)
6310
* @since 1.5
6311
*/
6312
public static boolean isIdentifierIgnorable(int codePoint) {
6313
return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint);
6314
}
6315
6316
/**
6317
* Converts the character argument to lowercase using case
6318
* mapping information from the UnicodeData file.
6319
* <p>
6320
* Note that
6321
* {@code Character.isLowerCase(Character.toLowerCase(ch))}
6322
* does not always return {@code true} for some ranges of
6323
* characters, particularly those that are symbols or ideographs.
6324
*
6325
* <p>In general, {@link String#toLowerCase()} should be used to map
6326
* characters to lowercase. {@code String} case mapping methods
6327
* have several benefits over {@code Character} case mapping methods.
6328
* {@code String} case mapping methods can perform locale-sensitive
6329
* mappings, context-sensitive mappings, and 1:M character mappings, whereas
6330
* the {@code Character} case mapping methods cannot.
6331
*
6332
* <p><b>Note:</b> This method cannot handle <a
6333
* href="#supplementary"> supplementary characters</a>. To support
6334
* all Unicode characters, including supplementary characters, use
6335
* the {@link #toLowerCase(int)} method.
6336
*
6337
* @param ch the character to be converted.
6338
* @return the lowercase equivalent of the character, if any;
6339
* otherwise, the character itself.
6340
* @see Character#isLowerCase(char)
6341
* @see String#toLowerCase()
6342
*/
6343
public static char toLowerCase(char ch) {
6344
return (char)toLowerCase((int)ch);
6345
}
6346
6347
/**
6348
* Converts the character (Unicode code point) argument to
6349
* lowercase using case mapping information from the UnicodeData
6350
* file.
6351
*
6352
* <p> Note that
6353
* {@code Character.isLowerCase(Character.toLowerCase(codePoint))}
6354
* does not always return {@code true} for some ranges of
6355
* characters, particularly those that are symbols or ideographs.
6356
*
6357
* <p>In general, {@link String#toLowerCase()} should be used to map
6358
* characters to lowercase. {@code String} case mapping methods
6359
* have several benefits over {@code Character} case mapping methods.
6360
* {@code String} case mapping methods can perform locale-sensitive
6361
* mappings, context-sensitive mappings, and 1:M character mappings, whereas
6362
* the {@code Character} case mapping methods cannot.
6363
*
6364
* @param codePoint the character (Unicode code point) to be converted.
6365
* @return the lowercase equivalent of the character (Unicode code
6366
* point), if any; otherwise, the character itself.
6367
* @see Character#isLowerCase(int)
6368
* @see String#toLowerCase()
6369
*
6370
* @since 1.5
6371
*/
6372
public static int toLowerCase(int codePoint) {
6373
return CharacterData.of(codePoint).toLowerCase(codePoint);
6374
}
6375
6376
/**
6377
* Converts the character argument to uppercase using case mapping
6378
* information from the UnicodeData file.
6379
* <p>
6380
* Note that
6381
* {@code Character.isUpperCase(Character.toUpperCase(ch))}
6382
* does not always return {@code true} for some ranges of
6383
* characters, particularly those that are symbols or ideographs.
6384
*
6385
* <p>In general, {@link String#toUpperCase()} should be used to map
6386
* characters to uppercase. {@code String} case mapping methods
6387
* have several benefits over {@code Character} case mapping methods.
6388
* {@code String} case mapping methods can perform locale-sensitive
6389
* mappings, context-sensitive mappings, and 1:M character mappings, whereas
6390
* the {@code Character} case mapping methods cannot.
6391
*
6392
* <p><b>Note:</b> This method cannot handle <a
6393
* href="#supplementary"> supplementary characters</a>. To support
6394
* all Unicode characters, including supplementary characters, use
6395
* the {@link #toUpperCase(int)} method.
6396
*
6397
* @param ch the character to be converted.
6398
* @return the uppercase equivalent of the character, if any;
6399
* otherwise, the character itself.
6400
* @see Character#isUpperCase(char)
6401
* @see String#toUpperCase()
6402
*/
6403
public static char toUpperCase(char ch) {
6404
return (char)toUpperCase((int)ch);
6405
}
6406
6407
/**
6408
* Converts the character (Unicode code point) argument to
6409
* uppercase using case mapping information from the UnicodeData
6410
* file.
6411
*
6412
* <p>Note that
6413
* {@code Character.isUpperCase(Character.toUpperCase(codePoint))}
6414
* does not always return {@code true} for some ranges of
6415
* characters, particularly those that are symbols or ideographs.
6416
*
6417
* <p>In general, {@link String#toUpperCase()} should be used to map
6418
* characters to uppercase. {@code String} case mapping methods
6419
* have several benefits over {@code Character} case mapping methods.
6420
* {@code String} case mapping methods can perform locale-sensitive
6421
* mappings, context-sensitive mappings, and 1:M character mappings, whereas
6422
* the {@code Character} case mapping methods cannot.
6423
*
6424
* @param codePoint the character (Unicode code point) to be converted.
6425
* @return the uppercase equivalent of the character, if any;
6426
* otherwise, the character itself.
6427
* @see Character#isUpperCase(int)
6428
* @see String#toUpperCase()
6429
*
6430
* @since 1.5
6431
*/
6432
public static int toUpperCase(int codePoint) {
6433
return CharacterData.of(codePoint).toUpperCase(codePoint);
6434
}
6435
6436
/**
6437
* Converts the character argument to titlecase using case mapping
6438
* information from the UnicodeData file. If a character has no
6439
* explicit titlecase mapping and is not itself a titlecase char
6440
* according to UnicodeData, then the uppercase mapping is
6441
* returned as an equivalent titlecase mapping. If the
6442
* {@code char} argument is already a titlecase
6443
* {@code char}, the same {@code char} value will be
6444
* returned.
6445
* <p>
6446
* Note that
6447
* {@code Character.isTitleCase(Character.toTitleCase(ch))}
6448
* does not always return {@code true} for some ranges of
6449
* characters.
6450
*
6451
* <p><b>Note:</b> This method cannot handle <a
6452
* href="#supplementary"> supplementary characters</a>. To support
6453
* all Unicode characters, including supplementary characters, use
6454
* the {@link #toTitleCase(int)} method.
6455
*
6456
* @param ch the character to be converted.
6457
* @return the titlecase equivalent of the character, if any;
6458
* otherwise, the character itself.
6459
* @see Character#isTitleCase(char)
6460
* @see Character#toLowerCase(char)
6461
* @see Character#toUpperCase(char)
6462
* @since 1.0.2
6463
*/
6464
public static char toTitleCase(char ch) {
6465
return (char)toTitleCase((int)ch);
6466
}
6467
6468
/**
6469
* Converts the character (Unicode code point) argument to titlecase using case mapping
6470
* information from the UnicodeData file. If a character has no
6471
* explicit titlecase mapping and is not itself a titlecase char
6472
* according to UnicodeData, then the uppercase mapping is
6473
* returned as an equivalent titlecase mapping. If the
6474
* character argument is already a titlecase
6475
* character, the same character value will be
6476
* returned.
6477
*
6478
* <p>Note that
6479
* {@code Character.isTitleCase(Character.toTitleCase(codePoint))}
6480
* does not always return {@code true} for some ranges of
6481
* characters.
6482
*
6483
* @param codePoint the character (Unicode code point) to be converted.
6484
* @return the titlecase equivalent of the character, if any;
6485
* otherwise, the character itself.
6486
* @see Character#isTitleCase(int)
6487
* @see Character#toLowerCase(int)
6488
* @see Character#toUpperCase(int)
6489
* @since 1.5
6490
*/
6491
public static int toTitleCase(int codePoint) {
6492
return CharacterData.of(codePoint).toTitleCase(codePoint);
6493
}
6494
6495
/**
6496
* Returns the numeric value of the character {@code ch} in the
6497
* specified radix.
6498
* <p>
6499
* If the radix is not in the range {@code MIN_RADIX} &le;
6500
* {@code radix} &le; {@code MAX_RADIX} or if the
6501
* value of {@code ch} is not a valid digit in the specified
6502
* radix, {@code -1} is returned. A character is a valid digit
6503
* if at least one of the following is true:
6504
* <ul>
6505
* <li>The method {@code isDigit} is {@code true} of the character
6506
* and the Unicode decimal digit value of the character (or its
6507
* single-character decomposition) is less than the specified radix.
6508
* In this case the decimal digit value is returned.
6509
* <li>The character is one of the uppercase Latin letters
6510
* {@code 'A'} through {@code 'Z'} and its code is less than
6511
* {@code radix + 'A' - 10}.
6512
* In this case, {@code ch - 'A' + 10}
6513
* is returned.
6514
* <li>The character is one of the lowercase Latin letters
6515
* {@code 'a'} through {@code 'z'} and its code is less than
6516
* {@code radix + 'a' - 10}.
6517
* In this case, {@code ch - 'a' + 10}
6518
* is returned.
6519
* <li>The character is one of the fullwidth uppercase Latin letters A
6520
* ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
6521
* and its code is less than
6522
* {@code radix + '\u005CuFF21' - 10}.
6523
* In this case, {@code ch - '\u005CuFF21' + 10}
6524
* is returned.
6525
* <li>The character is one of the fullwidth lowercase Latin letters a
6526
* ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
6527
* and its code is less than
6528
* {@code radix + '\u005CuFF41' - 10}.
6529
* In this case, {@code ch - '\u005CuFF41' + 10}
6530
* is returned.
6531
* </ul>
6532
*
6533
* <p><b>Note:</b> This method cannot handle <a
6534
* href="#supplementary"> supplementary characters</a>. To support
6535
* all Unicode characters, including supplementary characters, use
6536
* the {@link #digit(int, int)} method.
6537
*
6538
* @param ch the character to be converted.
6539
* @param radix the radix.
6540
* @return the numeric value represented by the character in the
6541
* specified radix.
6542
* @see Character#forDigit(int, int)
6543
* @see Character#isDigit(char)
6544
*/
6545
public static int digit(char ch, int radix) {
6546
return digit((int)ch, radix);
6547
}
6548
6549
/**
6550
* Returns the numeric value of the specified character (Unicode
6551
* code point) in the specified radix.
6552
*
6553
* <p>If the radix is not in the range {@code MIN_RADIX} &le;
6554
* {@code radix} &le; {@code MAX_RADIX} or if the
6555
* character is not a valid digit in the specified
6556
* radix, {@code -1} is returned. A character is a valid digit
6557
* if at least one of the following is true:
6558
* <ul>
6559
* <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
6560
* and the Unicode decimal digit value of the character (or its
6561
* single-character decomposition) is less than the specified radix.
6562
* In this case the decimal digit value is returned.
6563
* <li>The character is one of the uppercase Latin letters
6564
* {@code 'A'} through {@code 'Z'} and its code is less than
6565
* {@code radix + 'A' - 10}.
6566
* In this case, {@code codePoint - 'A' + 10}
6567
* is returned.
6568
* <li>The character is one of the lowercase Latin letters
6569
* {@code 'a'} through {@code 'z'} and its code is less than
6570
* {@code radix + 'a' - 10}.
6571
* In this case, {@code codePoint - 'a' + 10}
6572
* is returned.
6573
* <li>The character is one of the fullwidth uppercase Latin letters A
6574
* ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
6575
* and its code is less than
6576
* {@code radix + '\u005CuFF21' - 10}.
6577
* In this case,
6578
* {@code codePoint - '\u005CuFF21' + 10}
6579
* is returned.
6580
* <li>The character is one of the fullwidth lowercase Latin letters a
6581
* ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
6582
* and its code is less than
6583
* {@code radix + '\u005CuFF41'- 10}.
6584
* In this case,
6585
* {@code codePoint - '\u005CuFF41' + 10}
6586
* is returned.
6587
* </ul>
6588
*
6589
* @param codePoint the character (Unicode code point) to be converted.
6590
* @param radix the radix.
6591
* @return the numeric value represented by the character in the
6592
* specified radix.
6593
* @see Character#forDigit(int, int)
6594
* @see Character#isDigit(int)
6595
* @since 1.5
6596
*/
6597
public static int digit(int codePoint, int radix) {
6598
return CharacterData.of(codePoint).digit(codePoint, radix);
6599
}
6600
6601
/**
6602
* Returns the {@code int} value that the specified Unicode
6603
* character represents. For example, the character
6604
* {@code '\u005Cu216C'} (the roman numeral fifty) will return
6605
* an int with a value of 50.
6606
* <p>
6607
* The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
6608
* {@code '\u005Cu005A'}), lowercase
6609
* ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
6610
* full width variant ({@code '\u005CuFF21'} through
6611
* {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
6612
* {@code '\u005CuFF5A'}) forms have numeric values from 10
6613
* through 35. This is independent of the Unicode specification,
6614
* which does not assign numeric values to these {@code char}
6615
* values.
6616
* <p>
6617
* If the character does not have a numeric value, then -1 is returned.
6618
* If the character has a numeric value that cannot be represented as a
6619
* nonnegative integer (for example, a fractional value), then -2
6620
* is returned.
6621
*
6622
* <p><b>Note:</b> This method cannot handle <a
6623
* href="#supplementary"> supplementary characters</a>. To support
6624
* all Unicode characters, including supplementary characters, use
6625
* the {@link #getNumericValue(int)} method.
6626
*
6627
* @param ch the character to be converted.
6628
* @return the numeric value of the character, as a nonnegative {@code int}
6629
* value; -2 if the character has a numeric value that is not a
6630
* nonnegative integer; -1 if the character has no numeric value.
6631
* @see Character#forDigit(int, int)
6632
* @see Character#isDigit(char)
6633
* @since 1.1
6634
*/
6635
public static int getNumericValue(char ch) {
6636
return getNumericValue((int)ch);
6637
}
6638
6639
/**
6640
* Returns the {@code int} value that the specified
6641
* character (Unicode code point) represents. For example, the character
6642
* {@code '\u005Cu216C'} (the Roman numeral fifty) will return
6643
* an {@code int} with a value of 50.
6644
* <p>
6645
* The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
6646
* {@code '\u005Cu005A'}), lowercase
6647
* ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
6648
* full width variant ({@code '\u005CuFF21'} through
6649
* {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
6650
* {@code '\u005CuFF5A'}) forms have numeric values from 10
6651
* through 35. This is independent of the Unicode specification,
6652
* which does not assign numeric values to these {@code char}
6653
* values.
6654
* <p>
6655
* If the character does not have a numeric value, then -1 is returned.
6656
* If the character has a numeric value that cannot be represented as a
6657
* nonnegative integer (for example, a fractional value), then -2
6658
* is returned.
6659
*
6660
* @param codePoint the character (Unicode code point) to be converted.
6661
* @return the numeric value of the character, as a nonnegative {@code int}
6662
* value; -2 if the character has a numeric value that is not a
6663
* nonnegative integer; -1 if the character has no numeric value.
6664
* @see Character#forDigit(int, int)
6665
* @see Character#isDigit(int)
6666
* @since 1.5
6667
*/
6668
public static int getNumericValue(int codePoint) {
6669
return CharacterData.of(codePoint).getNumericValue(codePoint);
6670
}
6671
6672
/**
6673
* Determines if the specified character is ISO-LATIN-1 white space.
6674
* This method returns {@code true} for the following five
6675
* characters only:
6676
* <table summary="truechars">
6677
* <tr><td>{@code '\t'}</td> <td>{@code U+0009}</td>
6678
* <td>{@code HORIZONTAL TABULATION}</td></tr>
6679
* <tr><td>{@code '\n'}</td> <td>{@code U+000A}</td>
6680
* <td>{@code NEW LINE}</td></tr>
6681
* <tr><td>{@code '\f'}</td> <td>{@code U+000C}</td>
6682
* <td>{@code FORM FEED}</td></tr>
6683
* <tr><td>{@code '\r'}</td> <td>{@code U+000D}</td>
6684
* <td>{@code CARRIAGE RETURN}</td></tr>
6685
* <tr><td>{@code ' '}</td> <td>{@code U+0020}</td>
6686
* <td>{@code SPACE}</td></tr>
6687
* </table>
6688
*
6689
* @param ch the character to be tested.
6690
* @return {@code true} if the character is ISO-LATIN-1 white
6691
* space; {@code false} otherwise.
6692
* @see Character#isSpaceChar(char)
6693
* @see Character#isWhitespace(char)
6694
* @deprecated Replaced by isWhitespace(char).
6695
*/
6696
@Deprecated
6697
public static boolean isSpace(char ch) {
6698
return (ch <= 0x0020) &&
6699
(((((1L << 0x0009) |
6700
(1L << 0x000A) |
6701
(1L << 0x000C) |
6702
(1L << 0x000D) |
6703
(1L << 0x0020)) >> ch) & 1L) != 0);
6704
}
6705
6706
6707
/**
6708
* Determines if the specified character is a Unicode space character.
6709
* A character is considered to be a space character if and only if
6710
* it is specified to be a space character by the Unicode Standard. This
6711
* method returns true if the character's general category type is any of
6712
* the following:
6713
* <ul>
6714
* <li> {@code SPACE_SEPARATOR}
6715
* <li> {@code LINE_SEPARATOR}
6716
* <li> {@code PARAGRAPH_SEPARATOR}
6717
* </ul>
6718
*
6719
* <p><b>Note:</b> This method cannot handle <a
6720
* href="#supplementary"> supplementary characters</a>. To support
6721
* all Unicode characters, including supplementary characters, use
6722
* the {@link #isSpaceChar(int)} method.
6723
*
6724
* @param ch the character to be tested.
6725
* @return {@code true} if the character is a space character;
6726
* {@code false} otherwise.
6727
* @see Character#isWhitespace(char)
6728
* @since 1.1
6729
*/
6730
public static boolean isSpaceChar(char ch) {
6731
return isSpaceChar((int)ch);
6732
}
6733
6734
/**
6735
* Determines if the specified character (Unicode code point) is a
6736
* Unicode space character. A character is considered to be a
6737
* space character if and only if it is specified to be a space
6738
* character by the Unicode Standard. This method returns true if
6739
* the character's general category type is any of the following:
6740
*
6741
* <ul>
6742
* <li> {@link #SPACE_SEPARATOR}
6743
* <li> {@link #LINE_SEPARATOR}
6744
* <li> {@link #PARAGRAPH_SEPARATOR}
6745
* </ul>
6746
*
6747
* @param codePoint the character (Unicode code point) to be tested.
6748
* @return {@code true} if the character is a space character;
6749
* {@code false} otherwise.
6750
* @see Character#isWhitespace(int)
6751
* @since 1.5
6752
*/
6753
public static boolean isSpaceChar(int codePoint) {
6754
return ((((1 << Character.SPACE_SEPARATOR) |
6755
(1 << Character.LINE_SEPARATOR) |
6756
(1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1)
6757
!= 0;
6758
}
6759
6760
/**
6761
* Determines if the specified character is white space according to Java.
6762
* A character is a Java whitespace character if and only if it satisfies
6763
* one of the following criteria:
6764
* <ul>
6765
* <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
6766
* {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
6767
* but is not also a non-breaking space ({@code '\u005Cu00A0'},
6768
* {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
6769
* <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
6770
* <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
6771
* <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
6772
* <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
6773
* <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
6774
* <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
6775
* <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
6776
* <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
6777
* <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
6778
* </ul>
6779
*
6780
* <p><b>Note:</b> This method cannot handle <a
6781
* href="#supplementary"> supplementary characters</a>. To support
6782
* all Unicode characters, including supplementary characters, use
6783
* the {@link #isWhitespace(int)} method.
6784
*
6785
* @param ch the character to be tested.
6786
* @return {@code true} if the character is a Java whitespace
6787
* character; {@code false} otherwise.
6788
* @see Character#isSpaceChar(char)
6789
* @since 1.1
6790
*/
6791
public static boolean isWhitespace(char ch) {
6792
return isWhitespace((int)ch);
6793
}
6794
6795
/**
6796
* Determines if the specified character (Unicode code point) is
6797
* white space according to Java. A character is a Java
6798
* whitespace character if and only if it satisfies one of the
6799
* following criteria:
6800
* <ul>
6801
* <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
6802
* {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
6803
* but is not also a non-breaking space ({@code '\u005Cu00A0'},
6804
* {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
6805
* <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
6806
* <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
6807
* <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
6808
* <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
6809
* <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
6810
* <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
6811
* <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
6812
* <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
6813
* <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
6814
* </ul>
6815
* <p>
6816
*
6817
* @param codePoint the character (Unicode code point) to be tested.
6818
* @return {@code true} if the character is a Java whitespace
6819
* character; {@code false} otherwise.
6820
* @see Character#isSpaceChar(int)
6821
* @since 1.5
6822
*/
6823
public static boolean isWhitespace(int codePoint) {
6824
return CharacterData.of(codePoint).isWhitespace(codePoint);
6825
}
6826
6827
/**
6828
* Determines if the specified character is an ISO control
6829
* character. A character is considered to be an ISO control
6830
* character if its code is in the range {@code '\u005Cu0000'}
6831
* through {@code '\u005Cu001F'} or in the range
6832
* {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
6833
*
6834
* <p><b>Note:</b> This method cannot handle <a
6835
* href="#supplementary"> supplementary characters</a>. To support
6836
* all Unicode characters, including supplementary characters, use
6837
* the {@link #isISOControl(int)} method.
6838
*
6839
* @param ch the character to be tested.
6840
* @return {@code true} if the character is an ISO control character;
6841
* {@code false} otherwise.
6842
*
6843
* @see Character#isSpaceChar(char)
6844
* @see Character#isWhitespace(char)
6845
* @since 1.1
6846
*/
6847
public static boolean isISOControl(char ch) {
6848
return isISOControl((int)ch);
6849
}
6850
6851
/**
6852
* Determines if the referenced character (Unicode code point) is an ISO control
6853
* character. A character is considered to be an ISO control
6854
* character if its code is in the range {@code '\u005Cu0000'}
6855
* through {@code '\u005Cu001F'} or in the range
6856
* {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
6857
*
6858
* @param codePoint the character (Unicode code point) to be tested.
6859
* @return {@code true} if the character is an ISO control character;
6860
* {@code false} otherwise.
6861
* @see Character#isSpaceChar(int)
6862
* @see Character#isWhitespace(int)
6863
* @since 1.5
6864
*/
6865
public static boolean isISOControl(int codePoint) {
6866
// Optimized form of:
6867
// (codePoint >= 0x00 && codePoint <= 0x1F) ||
6868
// (codePoint >= 0x7F && codePoint <= 0x9F);
6869
return codePoint <= 0x9F &&
6870
(codePoint >= 0x7F || (codePoint >>> 5 == 0));
6871
}
6872
6873
/**
6874
* Returns a value indicating a character's general category.
6875
*
6876
* <p><b>Note:</b> This method cannot handle <a
6877
* href="#supplementary"> supplementary characters</a>. To support
6878
* all Unicode characters, including supplementary characters, use
6879
* the {@link #getType(int)} method.
6880
*
6881
* @param ch the character to be tested.
6882
* @return a value of type {@code int} representing the
6883
* character's general category.
6884
* @see Character#COMBINING_SPACING_MARK
6885
* @see Character#CONNECTOR_PUNCTUATION
6886
* @see Character#CONTROL
6887
* @see Character#CURRENCY_SYMBOL
6888
* @see Character#DASH_PUNCTUATION
6889
* @see Character#DECIMAL_DIGIT_NUMBER
6890
* @see Character#ENCLOSING_MARK
6891
* @see Character#END_PUNCTUATION
6892
* @see Character#FINAL_QUOTE_PUNCTUATION
6893
* @see Character#FORMAT
6894
* @see Character#INITIAL_QUOTE_PUNCTUATION
6895
* @see Character#LETTER_NUMBER
6896
* @see Character#LINE_SEPARATOR
6897
* @see Character#LOWERCASE_LETTER
6898
* @see Character#MATH_SYMBOL
6899
* @see Character#MODIFIER_LETTER
6900
* @see Character#MODIFIER_SYMBOL
6901
* @see Character#NON_SPACING_MARK
6902
* @see Character#OTHER_LETTER
6903
* @see Character#OTHER_NUMBER
6904
* @see Character#OTHER_PUNCTUATION
6905
* @see Character#OTHER_SYMBOL
6906
* @see Character#PARAGRAPH_SEPARATOR
6907
* @see Character#PRIVATE_USE
6908
* @see Character#SPACE_SEPARATOR
6909
* @see Character#START_PUNCTUATION
6910
* @see Character#SURROGATE
6911
* @see Character#TITLECASE_LETTER
6912
* @see Character#UNASSIGNED
6913
* @see Character#UPPERCASE_LETTER
6914
* @since 1.1
6915
*/
6916
public static int getType(char ch) {
6917
return getType((int)ch);
6918
}
6919
6920
/**
6921
* Returns a value indicating a character's general category.
6922
*
6923
* @param codePoint the character (Unicode code point) to be tested.
6924
* @return a value of type {@code int} representing the
6925
* character's general category.
6926
* @see Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
6927
* @see Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
6928
* @see Character#CONTROL CONTROL
6929
* @see Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
6930
* @see Character#DASH_PUNCTUATION DASH_PUNCTUATION
6931
* @see Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
6932
* @see Character#ENCLOSING_MARK ENCLOSING_MARK
6933
* @see Character#END_PUNCTUATION END_PUNCTUATION
6934
* @see Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
6935
* @see Character#FORMAT FORMAT
6936
* @see Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
6937
* @see Character#LETTER_NUMBER LETTER_NUMBER
6938
* @see Character#LINE_SEPARATOR LINE_SEPARATOR
6939
* @see Character#LOWERCASE_LETTER LOWERCASE_LETTER
6940
* @see Character#MATH_SYMBOL MATH_SYMBOL
6941
* @see Character#MODIFIER_LETTER MODIFIER_LETTER
6942
* @see Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
6943
* @see Character#NON_SPACING_MARK NON_SPACING_MARK
6944
* @see Character#OTHER_LETTER OTHER_LETTER
6945
* @see Character#OTHER_NUMBER OTHER_NUMBER
6946
* @see Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
6947
* @see Character#OTHER_SYMBOL OTHER_SYMBOL
6948
* @see Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
6949
* @see Character#PRIVATE_USE PRIVATE_USE
6950
* @see Character#SPACE_SEPARATOR SPACE_SEPARATOR
6951
* @see Character#START_PUNCTUATION START_PUNCTUATION
6952
* @see Character#SURROGATE SURROGATE
6953
* @see Character#TITLECASE_LETTER TITLECASE_LETTER
6954
* @see Character#UNASSIGNED UNASSIGNED
6955
* @see Character#UPPERCASE_LETTER UPPERCASE_LETTER
6956
* @since 1.5
6957
*/
6958
public static int getType(int codePoint) {
6959
return CharacterData.of(codePoint).getType(codePoint);
6960
}
6961
6962
/**
6963
* Determines the character representation for a specific digit in
6964
* the specified radix. If the value of {@code radix} is not a
6965
* valid radix, or the value of {@code digit} is not a valid
6966
* digit in the specified radix, the null character
6967
* ({@code '\u005Cu0000'}) is returned.
6968
* <p>
6969
* The {@code radix} argument is valid if it is greater than or
6970
* equal to {@code MIN_RADIX} and less than or equal to
6971
* {@code MAX_RADIX}. The {@code digit} argument is valid if
6972
* {@code 0 <= digit < radix}.
6973
* <p>
6974
* If the digit is less than 10, then
6975
* {@code '0' + digit} is returned. Otherwise, the value
6976
* {@code 'a' + digit - 10} is returned.
6977
*
6978
* @param digit the number to convert to a character.
6979
* @param radix the radix.
6980
* @return the {@code char} representation of the specified digit
6981
* in the specified radix.
6982
* @see Character#MIN_RADIX
6983
* @see Character#MAX_RADIX
6984
* @see Character#digit(char, int)
6985
*/
6986
public static char forDigit(int digit, int radix) {
6987
if ((digit >= radix) || (digit < 0)) {
6988
return '\0';
6989
}
6990
if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
6991
return '\0';
6992
}
6993
if (digit < 10) {
6994
return (char)('0' + digit);
6995
}
6996
return (char)('a' - 10 + digit);
6997
}
6998
6999
/**
7000
* Returns the Unicode directionality property for the given
7001
* character. Character directionality is used to calculate the
7002
* visual ordering of text. The directionality value of undefined
7003
* {@code char} values is {@code DIRECTIONALITY_UNDEFINED}.
7004
*
7005
* <p><b>Note:</b> This method cannot handle <a
7006
* href="#supplementary"> supplementary characters</a>. To support
7007
* all Unicode characters, including supplementary characters, use
7008
* the {@link #getDirectionality(int)} method.
7009
*
7010
* @param ch {@code char} for which the directionality property
7011
* is requested.
7012
* @return the directionality property of the {@code char} value.
7013
*
7014
* @see Character#DIRECTIONALITY_UNDEFINED
7015
* @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
7016
* @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
7017
* @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
7018
* @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
7019
* @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
7020
* @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
7021
* @see Character#DIRECTIONALITY_ARABIC_NUMBER
7022
* @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
7023
* @see Character#DIRECTIONALITY_NONSPACING_MARK
7024
* @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
7025
* @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
7026
* @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
7027
* @see Character#DIRECTIONALITY_WHITESPACE
7028
* @see Character#DIRECTIONALITY_OTHER_NEUTRALS
7029
* @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
7030
* @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
7031
* @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
7032
* @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
7033
* @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
7034
* @since 1.4
7035
*/
7036
public static byte getDirectionality(char ch) {
7037
return getDirectionality((int)ch);
7038
}
7039
7040
/**
7041
* Returns the Unicode directionality property for the given
7042
* character (Unicode code point). Character directionality is
7043
* used to calculate the visual ordering of text. The
7044
* directionality value of undefined character is {@link
7045
* #DIRECTIONALITY_UNDEFINED}.
7046
*
7047
* @param codePoint the character (Unicode code point) for which
7048
* the directionality property is requested.
7049
* @return the directionality property of the character.
7050
*
7051
* @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
7052
* @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
7053
* @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
7054
* @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
7055
* @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
7056
* @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
7057
* @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
7058
* @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
7059
* @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
7060
* @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
7061
* @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
7062
* @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
7063
* @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
7064
* @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
7065
* @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
7066
* @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
7067
* @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
7068
* @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
7069
* @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
7070
* @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
7071
* @since 1.5
7072
*/
7073
public static byte getDirectionality(int codePoint) {
7074
return CharacterData.of(codePoint).getDirectionality(codePoint);
7075
}
7076
7077
/**
7078
* Determines whether the character is mirrored according to the
7079
* Unicode specification. Mirrored characters should have their
7080
* glyphs horizontally mirrored when displayed in text that is
7081
* right-to-left. For example, {@code '\u005Cu0028'} LEFT
7082
* PARENTHESIS is semantically defined to be an <i>opening
7083
* parenthesis</i>. This will appear as a "(" in text that is
7084
* left-to-right but as a ")" in text that is right-to-left.
7085
*
7086
* <p><b>Note:</b> This method cannot handle <a
7087
* href="#supplementary"> supplementary characters</a>. To support
7088
* all Unicode characters, including supplementary characters, use
7089
* the {@link #isMirrored(int)} method.
7090
*
7091
* @param ch {@code char} for which the mirrored property is requested
7092
* @return {@code true} if the char is mirrored, {@code false}
7093
* if the {@code char} is not mirrored or is not defined.
7094
* @since 1.4
7095
*/
7096
public static boolean isMirrored(char ch) {
7097
return isMirrored((int)ch);
7098
}
7099
7100
/**
7101
* Determines whether the specified character (Unicode code point)
7102
* is mirrored according to the Unicode specification. Mirrored
7103
* characters should have their glyphs horizontally mirrored when
7104
* displayed in text that is right-to-left. For example,
7105
* {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically
7106
* defined to be an <i>opening parenthesis</i>. This will appear
7107
* as a "(" in text that is left-to-right but as a ")" in text
7108
* that is right-to-left.
7109
*
7110
* @param codePoint the character (Unicode code point) to be tested.
7111
* @return {@code true} if the character is mirrored, {@code false}
7112
* if the character is not mirrored or is not defined.
7113
* @since 1.5
7114
*/
7115
public static boolean isMirrored(int codePoint) {
7116
return CharacterData.of(codePoint).isMirrored(codePoint);
7117
}
7118
7119
/**
7120
* Compares two {@code Character} objects numerically.
7121
*
7122
* @param anotherCharacter the {@code Character} to be compared.
7123
7124
* @return the value {@code 0} if the argument {@code Character}
7125
* is equal to this {@code Character}; a value less than
7126
* {@code 0} if this {@code Character} is numerically less
7127
* than the {@code Character} argument; and a value greater than
7128
* {@code 0} if this {@code Character} is numerically greater
7129
* than the {@code Character} argument (unsigned comparison).
7130
* Note that this is strictly a numerical comparison; it is not
7131
* locale-dependent.
7132
* @since 1.2
7133
*/
7134
public int compareTo(Character anotherCharacter) {
7135
return compare(this.value, anotherCharacter.value);
7136
}
7137
7138
/**
7139
* Compares two {@code char} values numerically.
7140
* The value returned is identical to what would be returned by:
7141
* <pre>
7142
* Character.valueOf(x).compareTo(Character.valueOf(y))
7143
* </pre>
7144
*
7145
* @param x the first {@code char} to compare
7146
* @param y the second {@code char} to compare
7147
* @return the value {@code 0} if {@code x == y};
7148
* a value less than {@code 0} if {@code x < y}; and
7149
* a value greater than {@code 0} if {@code x > y}
7150
* @since 1.7
7151
*/
7152
public static int compare(char x, char y) {
7153
return x - y;
7154
}
7155
7156
/**
7157
* Converts the character (Unicode code point) argument to uppercase using
7158
* information from the UnicodeData file.
7159
* <p>
7160
*
7161
* @param codePoint the character (Unicode code point) to be converted.
7162
* @return either the uppercase equivalent of the character, if
7163
* any, or an error flag ({@code Character.ERROR})
7164
* that indicates that a 1:M {@code char} mapping exists.
7165
* @see Character#isLowerCase(char)
7166
* @see Character#isUpperCase(char)
7167
* @see Character#toLowerCase(char)
7168
* @see Character#toTitleCase(char)
7169
* @since 1.4
7170
*/
7171
static int toUpperCaseEx(int codePoint) {
7172
assert isValidCodePoint(codePoint);
7173
return CharacterData.of(codePoint).toUpperCaseEx(codePoint);
7174
}
7175
7176
/**
7177
* Converts the character (Unicode code point) argument to uppercase using case
7178
* mapping information from the SpecialCasing file in the Unicode
7179
* specification. If a character has no explicit uppercase
7180
* mapping, then the {@code char} itself is returned in the
7181
* {@code char[]}.
7182
*
7183
* @param codePoint the character (Unicode code point) to be converted.
7184
* @return a {@code char[]} with the uppercased character.
7185
* @since 1.4
7186
*/
7187
static char[] toUpperCaseCharArray(int codePoint) {
7188
// As of Unicode 6.0, 1:M uppercasings only happen in the BMP.
7189
assert isBmpCodePoint(codePoint);
7190
return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
7191
}
7192
7193
/**
7194
* The number of bits used to represent a <tt>char</tt> value in unsigned
7195
* binary form, constant {@code 16}.
7196
*
7197
* @since 1.5
7198
*/
7199
public static final int SIZE = 16;
7200
7201
/**
7202
* The number of bytes used to represent a {@code char} value in unsigned
7203
* binary form.
7204
*
7205
* @since 1.8
7206
*/
7207
public static final int BYTES = SIZE / Byte.SIZE;
7208
7209
/**
7210
* Returns the value obtained by reversing the order of the bytes in the
7211
* specified <tt>char</tt> value.
7212
*
7213
* @param ch The {@code char} of which to reverse the byte order.
7214
* @return the value obtained by reversing (or, equivalently, swapping)
7215
* the bytes in the specified <tt>char</tt> value.
7216
* @since 1.5
7217
*/
7218
public static char reverseBytes(char ch) {
7219
return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
7220
}
7221
7222
/**
7223
* Returns the Unicode name of the specified character
7224
* {@code codePoint}, or null if the code point is
7225
* {@link #UNASSIGNED unassigned}.
7226
* <p>
7227
* Note: if the specified character is not assigned a name by
7228
* the <i>UnicodeData</i> file (part of the Unicode Character
7229
* Database maintained by the Unicode Consortium), the returned
7230
* name is the same as the result of expression.
7231
*
7232
* <blockquote>{@code
7233
* Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
7234
* + " "
7235
* + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7236
*
7237
* }</blockquote>
7238
*
7239
* @param codePoint the character (Unicode code point)
7240
*
7241
* @return the Unicode name of the specified character, or null if
7242
* the code point is unassigned.
7243
*
7244
* @exception IllegalArgumentException if the specified
7245
* {@code codePoint} is not a valid Unicode
7246
* code point.
7247
*
7248
* @since 1.7
7249
*/
7250
public static String getName(int codePoint) {
7251
if (!isValidCodePoint(codePoint)) {
7252
throw new IllegalArgumentException();
7253
}
7254
String name = CharacterName.get(codePoint);
7255
if (name != null)
7256
return name;
7257
if (getType(codePoint) == UNASSIGNED)
7258
return null;
7259
UnicodeBlock block = UnicodeBlock.of(codePoint);
7260
if (block != null)
7261
return block.toString().replace('_', ' ') + " "
7262
+ Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7263
// should never come here
7264
return Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7265
}
7266
}
7267
7268