CoCalc -- UnicodeSpec.java

GitHub Repository: PojavLauncherTeam/openjdk-multiarch-jdk8u
Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/make/src/classes/build/tools/generatecharacter/UnicodeSpec.java
³²²⁸⁷ views
1
/*
2
 * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
 *
5
 * This code is free software; you can redistribute it and/or modify it
6
 * under the terms of the GNU General Public License version 2 only, as
7
 * published by the Free Software Foundation.  Oracle designates this
8
 * particular file as subject to the "Classpath" exception as provided
9
 * by Oracle in the LICENSE file that accompanied this code.
10
 *
11
 * This code is distributed in the hope that it will be useful, but WITHOUT
12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14
 * version 2 for more details (a copy is included in the LICENSE file that
15
 * accompanied this code).
16
 *
17
 * You should have received a copy of the GNU General Public License version
18
 * 2 along with this work; if not, write to the Free Software Foundation,
19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20
 *
21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22
 * or visit www.oracle.com if you need additional information or have any
23
 * questions.
24
 */
25

26
package build.tools.generatecharacter;
27

28
import java.io.BufferedReader;
29
import java.io.FileReader;
30
import java.io.FileNotFoundException;
31
import java.io.IOException;
32
import java.util.StringTokenizer;
33
import java.io.File;
34
import java.util.regex.Pattern;
35
import java.util.ArrayList;
36

37
/**
38
 * The UnicodeSpec class provides a way to read in Unicode character
39
 * properties from a Unicode data file.  One instance of class UnicodeSpec
40
 * holds a decoded version of one line of the data file.  The file may
41
 * be obtained from www.unicode.org.  The method readSpecFile returns an array
42
 * of UnicodeSpec objects.
43
 * @author      Guy Steele
44
 * @author  John O'Conner
45
 */
46

47
public class UnicodeSpec {
48

49
    private static final int MAP_UNDEFINED = 0xFFFFFFFF;
50

51
        /**
52
         * Construct a default UnicodeSpec object, with a default
53
         * code point value 0xFFFF.
54
         *
55
         */
56
    public UnicodeSpec() {
57
                this(0xffff);
58
    }
59

60
        /**
61
         * Construct a UnicodeSpec object for the given <code>codePoint<code>
62
         * argument. Provide default properties.
63
         * @param codePoint a Unicode code point between 0x0000 and 0x10FFFF
64
         */
65
    public UnicodeSpec(int codePoint) {
66
        this.codePoint = codePoint;
67
        generalCategory = UNASSIGNED;
68
        bidiCategory = DIRECTIONALITY_UNDEFINED;
69
        mirrored = false;
70
        titleMap = MAP_UNDEFINED;
71
        upperMap = MAP_UNDEFINED;
72
        lowerMap = MAP_UNDEFINED;
73
        decimalValue = -1;
74
        digitValue = -1;
75
        numericValue = "";
76
                oldName = null;
77
                comment = null;
78
                name = null;
79
    }
80

81
        /**
82
         * Create a String representation of this UnicodeSpec object.
83
         * The string will contain the code point and all its case mappings
84
         * if available.
85
         */
86
    public String toString() {
87
        StringBuffer result = new StringBuffer(hex6(codePoint));
88
        if (getUpperMap() != MAP_UNDEFINED) {
89
            result.append(", upper=").append(hex6(upperMap));
90
        }
91
        if (getLowerMap() != MAP_UNDEFINED) {
92
            result.append(", lower=").append(hex6(lowerMap));
93
        }
94
        if (getTitleMap() != MAP_UNDEFINED) {
95
            result.append(", title=").append(hex6(titleMap));
96
        }
97
        return result.toString();
98
    }
99

100
    static String hex4(int n) {
101
        String q = Integer.toHexString(n & 0xFFFF).toUpperCase();
102
        return "0000".substring(Math.min(4, q.length())) + q;
103
    }
104

105
        static String hex6(int n) {
106
                String str = Integer.toHexString(n & 0xFFFFFF).toUpperCase();
107
                return "000000".substring(Math.min(6, str.length())) + str;
108

109
        }
110

111

112
    /**
113
    * Given one line of a Unicode data file as a String, parse the line
114
    * and return a UnicodeSpec object that contains the same character information.
115
    *
116
    * @param s a line of the Unicode data file to be parsed
117
    * @return a UnicodeSpec object, or null if the parsing process failed for some reason
118
    */
119
    public static UnicodeSpec parse(String s) {
120
        UnicodeSpec spec = null;
121
        String[] tokens = null;
122

123
        try {
124
                        tokens = tokenSeparator.split(s, REQUIRED_FIELDS);
125
            spec = new UnicodeSpec();
126
            spec.setCodePoint(parseCodePoint(tokens[FIELD_VALUE]));
127
            spec.setName(parseName(tokens[FIELD_NAME]));
128
            spec.setGeneralCategory(parseGeneralCategory(tokens[FIELD_CATEGORY]));
129
            spec.setBidiCategory(parseBidiCategory(tokens[FIELD_BIDI]));
130
            spec.setCombiningClass(parseCombiningClass(tokens[FIELD_CLASS]));
131
            spec.setDecomposition(parseDecomposition(tokens[FIELD_DECOMPOSITION]));
132
            spec.setDecimalValue(parseDecimalValue(tokens[FIELD_DECIMAL]));
133
            spec.setDigitValue(parseDigitValue(tokens[FIELD_DIGIT]));
134
            spec.setNumericValue(parseNumericValue(tokens[FIELD_NUMERIC]));
135
            spec.setMirrored(parseMirrored(tokens[FIELD_MIRRORED]));
136
            spec.setOldName(parseOldName(tokens[FIELD_OLDNAME]));
137
            spec.setComment(parseComment(tokens[FIELD_COMMENT]));
138
            spec.setUpperMap(parseUpperMap(tokens[FIELD_UPPERCASE]));
139
            spec.setLowerMap(parseLowerMap(tokens[FIELD_LOWERCASE]));
140
            spec.setTitleMap(parseTitleMap(tokens[FIELD_TITLECASE]));
141
        }
142

143
        catch(Exception e) {
144
            spec = null;
145
            System.out.println("Error parsing spec line.");
146
        }
147
        return spec;
148
    }
149

150
    /**
151
    * Parse the codePoint attribute for a Unicode character.  If the parse succeeds,
152
    * the codePoint field of this UnicodeSpec object is updated and false is returned.
153
    *
154
    * The codePoint attribute should be a four to six digit hexadecimal integer.
155
    *
156
    * @param s   the codePoint attribute extracted from a line of the Unicode data file
157
    * @return   code point if successful
158
    * @exception NumberFormatException if unable to parse argument
159
    */
160
    public static int parseCodePoint(String s) throws NumberFormatException {
161
        return Integer.parseInt(s, 16);
162
    }
163

164
    public static String parseName(String s) throws Exception {
165
        if (s==null) throw new Exception("Cannot parse name.");
166
        return s;
167
    }
168

169
    public static byte parseGeneralCategory(String s) throws Exception {
170
        byte category = GENERAL_CATEGORY_COUNT;
171

172
        for (byte x=0; x<generalCategoryList.length; x++) {
173
            if (s.equals(generalCategoryList[x][SHORT])) {
174
                category = x;
175
                break;
176
            }
177
        }
178
        if (category >= GENERAL_CATEGORY_COUNT) {
179
            throw new Exception("Could not parse general category.");
180
        }
181
        return category;
182
    }
183

184
    public static byte parseBidiCategory(String s) throws Exception {
185
        byte category = DIRECTIONALITY_CATEGORY_COUNT;
186

187
        for (byte x=0; x<bidiCategoryList.length; x++) {
188
            if (s.equals(bidiCategoryList[x][SHORT])) {
189
                category = x;
190
                break;
191
            }
192
        }
193
        if (category >= DIRECTIONALITY_CATEGORY_COUNT) {
194
            throw new Exception("Could not parse bidi category.");
195
        }
196
        return category;
197
    }
198

199

200
    /**
201
    * Parse the combining attribute for a Unicode character.  If there is a combining
202
    * attribute and the parse succeeds, then the hasCombining field is set to true,
203
    * the combining field of this UnicodeSpec object is updated, and false is returned.
204
    * If the combining attribute is an empty string, the parse succeeds but the
205
    * hasCombining field is set to false. (and false is returned).
206
    *
207
    * The combining attribute, if any, should be a nonnegative decimal integer.
208
    *
209
    * @param s   the combining attribute extracted from a line of the Unicode data file
210
    * @return   the combining class value if any, -1 if property not defined
211
    * @exception Exception if can't parse the combining class
212
    */
213

214
    public static int parseCombiningClass(String s) throws Exception {
215
        int combining = -1;
216
        if (s.length()>0) {
217
            combining = Integer.parseInt(s, 10);
218
        }
219
        return combining;
220
    }
221

222
    /**
223
    * Parse the decomposition attribute for a Unicode character.  If the parse succeeds,
224
    * the decomposition field of this UnicodeSpec object is updated and false is returned.
225
    *
226
    * The decomposition attribute is complicated; for now, it is treated as a string.
227
    *
228
    * @param s   the decomposition attribute extracted from a line of the Unicode data file
229
    * @return   true if the parse failed; otherwise false
230
    */
231

232
    public static String parseDecomposition(String s) throws Exception {
233
        if (s==null) throw new Exception("Cannot parse decomposition.");
234
        return s;
235
    }
236

237

238
    /**
239
    * Parse the decimal value attribute for a Unicode character.  If there is a decimal value
240
    * attribute and the parse succeeds, then the hasDecimalValue field is set to true,
241
    * the decimalValue field of this UnicodeSpec object is updated, and false is returned.
242
    * If the decimal value attribute is an empty string, the parse succeeds but the
243
    * hasDecimalValue field is set to false. (and false is returned).
244
    *
245
    * The decimal value attribute, if any, should be a nonnegative decimal integer.
246
    *
247
    * @param s   the decimal value attribute extracted from a line of the Unicode data file
248
    * @return   the decimal value as an int, -1 if no decimal value defined
249
    * @exception NumberFormatException if the parse fails
250
    */
251
    public static int parseDecimalValue(String s) throws NumberFormatException {
252
        int value = -1;
253

254
        if (s.length() > 0) {
255
            value = Integer.parseInt(s, 10);
256
        }
257
        return value;
258
    }
259

260
    /**
261
    * Parse the digit value attribute for a Unicode character.  If there is a digit value
262
    * attribute and the parse succeeds, then the hasDigitValue field is set to true,
263
    * the digitValue field of this UnicodeSpec object is updated, and false is returned.
264
    * If the digit value attribute is an empty string, the parse succeeds but the
265
    * hasDigitValue field is set to false. (and false is returned).
266
    *
267
    * The digit value attribute, if any, should be a nonnegative decimal integer.
268
    *
269
    * @param s   the digit value attribute extracted from a line of the Unicode data file
270
    * @return   the digit value as an non-negative int, or -1 if no digit property defined
271
    * @exception NumberFormatException if the parse fails
272
    */
273
    public static int parseDigitValue(String s) throws NumberFormatException {
274
        int value = -1;
275

276
        if (s.length() > 0) {
277
            value = Integer.parseInt(s, 10);
278
        }
279
        return value;
280
    }
281

282
    public static String parseNumericValue(String s) throws Exception {
283
        if (s == null) throw new Exception("Cannot parse numeric value.");
284
        return s;
285
    }
286

287
    public static String parseComment(String s) throws Exception {
288
        if (s == null) throw new Exception("Cannot parse comment.");
289
        return s;
290
    }
291

292
    public static boolean parseMirrored(String s) throws Exception {
293
        boolean mirrored;
294
        if (s.length() == 1) {
295
            if (s.charAt(0) == 'Y') {mirrored = true;}
296
            else if (s.charAt(0) == 'N') {mirrored = false;}
297
            else {throw new Exception("Cannot parse mirrored property.");}
298
        }
299
        else { throw new Exception("Cannot parse mirrored property.");}
300
        return mirrored;
301
    }
302

303
    public static String parseOldName(String s) throws Exception {
304
        if (s == null) throw new Exception("Cannot parse old name");
305
        return s;
306
    }
307

308
    /**
309
    * Parse the uppercase mapping attribute for a Unicode character.  If there is a uppercase
310
    * mapping attribute and the parse succeeds, then the hasUpperMap field is set to true,
311
    * the upperMap field of this UnicodeSpec object is updated, and false is returned.
312
    * If the uppercase mapping attribute is an empty string, the parse succeeds but the
313
    * hasUpperMap field is set to false. (and false is returned).
314
    *
315
    * The uppercase mapping attribute should be a four to six digit hexadecimal integer.
316
    *
317
    * @param s   the uppercase mapping attribute extracted from a line of the Unicode data file
318
    * @return   simple uppercase character mapping if defined, MAP_UNDEFINED otherwise
319
    * @exception NumberFormatException if parse fails
320
    */
321
    public static int parseUpperMap(String s) throws NumberFormatException {
322
        int upperCase = MAP_UNDEFINED;
323

324
                int length = s.length();
325
        if (length >= 4 && length <=6) {
326
            upperCase = Integer.parseInt(s, 16);
327
        }
328
        else if (s.length() != 0) {
329
            throw new NumberFormatException();
330
        }
331
        return upperCase;
332
    }
333

334
    /**
335
    * Parse the lowercase mapping attribute for a Unicode character.  If there is a lowercase
336
    * mapping attribute and the parse succeeds, then the hasLowerMap field is set to true,
337
    * the lowerMap field of this UnicodeSpec object is updated, and false is returned.
338
    * If the lowercase mapping attribute is an empty string, the parse succeeds but the
339
     * hasLowerMap field is set to false. (and false is returned).
340
    *
341
    * The lowercase mapping attribute should be a four to six digit hexadecimal integer.
342
    *
343
    * @param s   the lowercase mapping attribute extracted from a line of the Unicode data file
344
    * @return   simple lowercase character mapping if defined, MAP_UNDEFINED otherwise
345
    * @exception NumberFormatException if parse fails
346
    */
347
    public static int parseLowerMap(String s) throws NumberFormatException {
348
        int lowerCase = MAP_UNDEFINED;
349
                int length = s.length();
350
        if (length >= 4 && length <= 6) {
351
            lowerCase = Integer.parseInt(s, 16);
352
        }
353
        else if (s.length() != 0) {
354
            throw new NumberFormatException();
355
        }
356
        return lowerCase;
357
    }
358

359
    /**
360
    * Parse the titlecase mapping attribute for a Unicode character.  If there is a titlecase
361
    * mapping attribute and the parse succeeds, then the hasTitleMap field is set to true,
362
    * the titleMap field of this UnicodeSpec object is updated, and false is returned.
363
    * If the titlecase mapping attribute is an empty string, the parse succeeds but the
364
    * hasTitleMap field is set to false. (and false is returned).
365
    *
366
    * The titlecase mapping attribute should be a four to six digit hexadecimal integer.
367
    *
368
    * @param s   the titlecase mapping attribute extracted from a line of the Unicode data file
369
    * @return   simple title case char mapping if defined, MAP_UNDEFINED otherwise
370
    * @exception NumberFormatException if parse fails
371
    */
372
    public static int parseTitleMap(String s) throws NumberFormatException {
373
        int titleCase = MAP_UNDEFINED;
374
                int length = s.length();
375
        if (length >= 4 && length <= 6) {
376
            titleCase = Integer.parseInt(s, 16);
377
        }
378
        else if (s.length() != 0) {
379
            throw new NumberFormatException();
380
        }
381
        return titleCase;
382
    }
383

384
    /**
385
    * Read and parse a Unicode data file.
386
    *
387
    * @param file   a file specifying the Unicode data file to be read
388
    * @return   an array of UnicodeSpec objects, one for each line of the
389
    *           Unicode data file that could be successfully parsed as
390
    *           specifying Unicode character attributes
391
    */
392

393
    public static UnicodeSpec[] readSpecFile(File file, int plane) throws FileNotFoundException {
394
        ArrayList<UnicodeSpec> list = new ArrayList<>(3000);
395
        UnicodeSpec[] result = null;
396
        int count = 0;
397
        BufferedReader f = new BufferedReader(new FileReader(file));
398
        String line = null;
399
        loop:
400
        while(true) {
401
            try {
402
                line = f.readLine();
403
            }
404
            catch (IOException e) {
405
                break loop;
406
            }
407
            if (line == null) break loop;
408
            UnicodeSpec item = parse(line.trim());
409
            int specPlane = item.getCodePoint() >>> 16;
410
            if (specPlane < plane) continue;
411
            if (specPlane > plane) break;
412

413
            if (item != null) {
414
                list.add(item);
415
            }
416
        }
417
        result = new UnicodeSpec[list.size()];
418
        list.toArray(result);
419
        return result;
420
    }
421

422
    void setCodePoint(int value) {
423
        codePoint = value;
424
    }
425

426
    /**
427
     * Return the code point in this Unicode specification
428
     * @return the char code point representing by the specification
429
     */
430
    public int getCodePoint() {
431
        return codePoint;
432
    }
433

434
    void setName(String name) {
435
        this.name = name;
436
    }
437

438
    public String getName() {
439
        return name;
440
    }
441

442
    void setGeneralCategory(byte category) {
443
        generalCategory = category;
444
    }
445

446
    public byte getGeneralCategory() {
447
        return generalCategory;
448
    }
449

450
    void setBidiCategory(byte category) {
451
        bidiCategory = category;
452
    }
453

454
    public byte getBidiCategory() {
455
        return bidiCategory;
456
    }
457

458
    void setCombiningClass(int combiningClass) {
459
        this.combiningClass = combiningClass;
460
    }
461

462
    public int getCombiningClass() {
463
        return combiningClass;
464
    }
465

466
    void setDecomposition(String decomposition) {
467
        this.decomposition = decomposition;
468
    }
469

470
    public String getDecomposition() {
471
         return decomposition;
472
    }
473

474
    void setDecimalValue(int value) {
475
        decimalValue = value;
476
    }
477

478
    public int getDecimalValue() {
479
        return decimalValue;
480
    }
481

482
    public boolean isDecimalValue() {
483
        return decimalValue != -1;
484
    }
485

486
    void setDigitValue(int value) {
487
        digitValue = value;
488
    }
489

490
    public int getDigitValue() {
491
        return digitValue;
492
    }
493

494
    public boolean isDigitValue() {
495
        return digitValue != -1;
496
    }
497

498
    void setNumericValue(String value) {
499
        numericValue = value;
500
    }
501

502
    public String getNumericValue() {
503
        return numericValue;
504
    }
505

506
    public boolean isNumericValue() {
507
        return numericValue.length() > 0;
508
    }
509

510
    void setMirrored(boolean value) {
511
        mirrored = value;
512
    }
513

514
    public boolean isMirrored() {
515
        return mirrored;
516
    }
517

518
    void setOldName(String name) {
519
        oldName = name;
520
    }
521

522
    public String getOldName() {
523
        return oldName;
524
    }
525

526
    void setComment(String comment) {
527
        this.comment = comment;
528
    }
529

530
    public String getComment() {
531
        return comment;
532
    }
533

534
    void setUpperMap(int ch) {
535
        upperMap = ch;
536
    };
537

538
    public int getUpperMap() {
539
        return upperMap;
540
    }
541

542
    public boolean hasUpperMap() {
543
        return upperMap != MAP_UNDEFINED;
544
    }
545

546
    void setLowerMap(int ch) {
547
        lowerMap = ch;
548
    }
549

550
    public int getLowerMap() {
551
        return lowerMap;
552
    }
553

554
    public boolean hasLowerMap() {
555
        return lowerMap != MAP_UNDEFINED;
556
    }
557

558
    void setTitleMap(int ch) {
559
        titleMap = ch;
560
    }
561

562
    public int getTitleMap() {
563
        return titleMap;
564
    }
565

566
    public boolean hasTitleMap() {
567
        return titleMap != MAP_UNDEFINED;
568
    }
569

570
    int codePoint;         // the characters UTF-32 code value
571
    String name;            // the ASCII name
572
    byte generalCategory;   // general category, available via Characte.getType()
573
    byte bidiCategory;      // available via Character.getBidiType()
574
    int combiningClass;     // not used in Character
575
    String decomposition;   // not used in Character
576
    int decimalValue;       // decimal digit value
577
    int digitValue;         // not all digits are decimal
578
    String numericValue;    // numeric value if digit or non-digit
579
    boolean mirrored;       //
580
    String oldName;
581
    String comment;
582
    int upperMap;
583
    int lowerMap;
584
    int titleMap;
585

586
    // this is the number of fields in one line of the UnicodeData.txt file
587
    // each field is separated by a semicolon (a token)
588
    static final int REQUIRED_FIELDS = 15;
589

590
    /**
591
     * General category types
592
     * To preserve compatibility, these values cannot be changed
593
     */
594
    public static final byte
595
        UNASSIGNED                  =  0, // Cn normative
596
        UPPERCASE_LETTER            =  1, // Lu normative
597
        LOWERCASE_LETTER            =  2, // Ll normative
598
        TITLECASE_LETTER            =  3, // Lt normative
599
        MODIFIER_LETTER             =  4, // Lm normative
600
        OTHER_LETTER                =  5, // Lo normative
601
        NON_SPACING_MARK            =  6, // Mn informative
602
        ENCLOSING_MARK              =  7, // Me informative
603
        COMBINING_SPACING_MARK      =  8, // Mc normative
604
        DECIMAL_DIGIT_NUMBER        =  9, // Nd normative
605
        LETTER_NUMBER               = 10, // Nl normative
606
        OTHER_NUMBER                = 11, // No normative
607
        SPACE_SEPARATOR             = 12, // Zs normative
608
        LINE_SEPARATOR              = 13, // Zl normative
609
        PARAGRAPH_SEPARATOR         = 14, // Zp normative
610
        CONTROL                     = 15, // Cc normative
611
        FORMAT                      = 16, // Cf normative
612
        // 17 is unused for no apparent reason,
613
        // but must preserve forward compatibility
614
        PRIVATE_USE                 = 18, // Co normative
615
        SURROGATE                   = 19, // Cs normative
616
        DASH_PUNCTUATION            = 20, // Pd informative
617
        START_PUNCTUATION           = 21, // Ps informative
618
        END_PUNCTUATION             = 22, // Pe informative
619
        CONNECTOR_PUNCTUATION       = 23, // Pc informative
620
        OTHER_PUNCTUATION           = 24, // Po informative
621
        MATH_SYMBOL                 = 25, // Sm informative
622
        CURRENCY_SYMBOL             = 26, // Sc informative
623
        MODIFIER_SYMBOL             = 27, // Sk informative
624
        OTHER_SYMBOL                = 28, // So informative
625
        INITIAL_QUOTE_PUNCTUATION   = 29, // Pi informative
626
        FINAL_QUOTE_PUNCTUATION     = 30, // Pf informative
627

628
        // this value is only used in the character generation tool
629
        // it can change to accommodate the addition of new categories.
630
        GENERAL_CATEGORY_COUNT      = 31; // sentinel value
631

632
    static final byte SHORT = 0, LONG = 1;
633
    // general category type strings
634
    // NOTE: The order of this category array is dependent on the assignment of
635
    // category constants above. We want to access this array using constants above.
636
    // [][SHORT] is the SHORT name, [][LONG] is the LONG name
637
    static final String[][] generalCategoryList = {
638
        {"Cn", "UNASSIGNED"},
639
        {"Lu", "UPPERCASE_LETTER"},
640
        {"Ll", "LOWERCASE_LETTER"},
641
        {"Lt", "TITLECASE_LETTER"},
642
        {"Lm", "MODIFIER_LETTER"},
643
        {"Lo", "OTHER_LETTER"},
644
        {"Mn", "NON_SPACING_MARK"},
645
        {"Me", "ENCLOSING_MARK"},
646
        {"Mc", "COMBINING_SPACING_MARK"},
647
        {"Nd", "DECIMAL_DIGIT_NUMBER"},
648
        {"Nl", "LETTER_NUMBER"},
649
        {"No", "OTHER_NUMBER"},
650
        {"Zs", "SPACE_SEPARATOR"},
651
        {"Zl", "LINE_SEPARATOR"},
652
        {"Zp", "PARAGRAPH_SEPARATOR"},
653
        {"Cc", "CONTROL"},
654
        {"Cf", "FORMAT"},
655
        {"xx", "unused"},
656
        {"Co", "PRIVATE_USE"},
657
        {"Cs", "SURROGATE"},
658
        {"Pd", "DASH_PUNCTUATION"},
659
        {"Ps", "START_PUNCTUATION"},
660
        {"Pe", "END_PUNCTUATION"},
661
        {"Pc", "CONNECTOR_PUNCTUATION"},
662
        {"Po", "OTHER_PUNCTUATION"},
663
        {"Sm", "MATH_SYMBOL"},
664
        {"Sc", "CURRENCY_SYMBOL"},
665
        {"Sk", "MODIFIER_SYMBOL"},
666
        {"So", "OTHER_SYMBOL"},
667
        {"Pi", "INITIAL_QUOTE_PUNCTUATION"},
668
        {"Pf", "FINAL_QUOTE_PUNCTUATION"}
669
    };
670

671
    /**
672
     * Bidirectional categories
673
     */
674
    public static final byte
675
                DIRECTIONALITY_UNDEFINED                  = -1,
676
        // Strong category
677
        DIRECTIONALITY_LEFT_TO_RIGHT              =  0, // L
678
        DIRECTIONALITY_RIGHT_TO_LEFT              =  1, // R
679
        DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC       =  2, // AL
680
        // Weak category
681
        DIRECTIONALITY_EUROPEAN_NUMBER            =  3, // EN
682
        DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR  =  4, // ES
683
        DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR =  5, // ET
684
        DIRECTIONALITY_ARABIC_NUMBER              =  6, // AN
685
        DIRECTIONALITY_COMMON_NUMBER_SEPARATOR    =  7, // CS
686
        DIRECTIONALITY_NONSPACING_MARK            =  8, // NSM
687
        DIRECTIONALITY_BOUNDARY_NEUTRAL           =  9, // BN
688
        // Neutral category
689
        DIRECTIONALITY_PARAGRAPH_SEPARATOR        = 10, // B
690
        DIRECTIONALITY_SEGMENT_SEPARATOR          = 11, // S
691
        DIRECTIONALITY_WHITESPACE                 = 12, // WS
692
        DIRECTIONALITY_OTHER_NEUTRALS              = 13, // ON
693

694
        DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING    = 14, // LRE
695
        DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE     = 15, // LRO
696
        DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING    = 16, // RLE
697
        DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE     = 17, // RLO
698
        DIRECTIONALITY_POP_DIRECTIONAL_FORMAT     = 18, // PDF
699

700
        DIRECTIONALITY_CATEGORY_COUNT             = 19; // sentinel value
701

702
    // If changes are made to the above bidi category assignments, this
703
    // list of bidi category names must be changed to keep their order in synch.
704
    // Access this list using the bidi category constants above.
705
    static final String[][] bidiCategoryList = {
706
        {"L", "DIRECTIONALITY_LEFT_TO_RIGHT"},
707
        {"R", "DIRECTIONALITY_RIGHT_TO_LEFT"},
708
        {"AL", "DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC"},
709
        {"EN", "DIRECTIONALITY_EUROPEAN_NUMBER"},
710
        {"ES", "DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR"},
711
        {"ET", "DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR"},
712
        {"AN", "DIRECTIONALITY_ARABIC_NUMBER"},
713
        {"CS", "DIRECTIONALITY_COMMON_NUMBER_SEPARATOR"},
714
        {"NSM", "DIRECTIONALITY_NONSPACING_MARK"},
715
        {"BN", "DIRECTIONALITY_BOUNDARY_NEUTRAL"},
716
        {"B", "DIRECTIONALITY_PARAGRAPH_SEPARATOR"},
717
        {"S", "DIRECTIONALITY_SEGMENT_SEPARATOR"},
718
        {"WS", "DIRECTIONALITY_WHITESPACE"},
719
        {"ON", "DIRECTIONALITY_OTHER_NEUTRALS"},
720
        {"LRE", "DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING"},
721
        {"LRO", "DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE"},
722
        {"RLE", "DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING"},
723
        {"RLO", "DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE"},
724
        {"PDF", "DIRECTIONALITY_POP_DIRECTIONAL_FORMAT"},
725

726
    };
727

728
    // Unicode specification lines have fields in this order.
729
    static final byte
730
        FIELD_VALUE         = 0,
731
        FIELD_NAME          = 1,
732
        FIELD_CATEGORY      = 2,
733
        FIELD_CLASS         = 3,
734
        FIELD_BIDI          = 4,
735
        FIELD_DECOMPOSITION = 5,
736
        FIELD_DECIMAL       = 6,
737
        FIELD_DIGIT         = 7,
738
        FIELD_NUMERIC       = 8,
739
        FIELD_MIRRORED      = 9,
740
        FIELD_OLDNAME       = 10,
741
        FIELD_COMMENT       = 11,
742
        FIELD_UPPERCASE     = 12,
743
        FIELD_LOWERCASE     = 13,
744
        FIELD_TITLECASE     = 14;
745

746
        static final Pattern tokenSeparator = Pattern.compile(";");
747

748
        public static void main(String[] args) {
749
                UnicodeSpec[] spec = null;
750
                if (args.length == 2 ) {
751
                        try {
752
                                File file = new File(args[0]);
753
                                int plane = Integer.parseInt(args[1]);
754
                                spec = UnicodeSpec.readSpecFile(file, plane);
755
                                System.out.println("UnicodeSpec[" + spec.length + "]:");
756
                                for (int x=0; x<spec.length; x++) {
757
                                        System.out.println(spec[x].toString());
758
                                }
759
                        }
760
                        catch(Exception e) {
761
                                e.printStackTrace();
762
                        }
763
                }
764

765
        }
766

767
}
768

769
Product

Resources

Company