Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/make/src/classes/build/tools/generatecharacter/SpecialCaseMap.java
32287 views
/*1* Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation. Oracle designates this7* particular file as subject to the "Classpath" exception as provided8* by Oracle in the LICENSE file that accompanied this code.9*10* This code is distributed in the hope that it will be useful, but WITHOUT11* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or12* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License13* version 2 for more details (a copy is included in the LICENSE file that14* accompanied this code).15*16* You should have received a copy of the GNU General Public License version17* 2 along with this work; if not, write to the Free Software Foundation,18* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.19*20* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA21* or visit www.oracle.com if you need additional information or have any22* questions.23*/2425package build.tools.generatecharacter;2627import java.io.*;28import java.util.*;29import java.lang.*;3031/**32* SpecialCaseMap has the responsibility of storing the33* 1:M, locale-sensitive, and context sensitive case mappings34* that occur when uppercasing Unicode 4.0 characters. This class can35* read and parse the SpecialCasing.txt file that contains those mappings.36* <p>37* A single SpecialCaseMap contains the mapping for one character.38* <p>39* @author John O'Conner40*/41public class SpecialCaseMap implements Comparable<SpecialCaseMap> {4243SpecialCaseMap() {44chSource = 0xFFFF;45}464748/**49* Read and parse a Unicode special case map file.50*51* @param file a file specifying the Unicode special case mappings52* @return an array of SpecialCaseMap objects, one for each line of the53* special case map data file that could be successfully parsed54*/5556public static SpecialCaseMap[] readSpecFile(File file, int plane) throws FileNotFoundException {57ArrayList<SpecialCaseMap> caseMaps = new ArrayList<>(150);58int count = 0;59BufferedReader f = new BufferedReader(new FileReader(file));60String line = null;61loop:62while(true) {63try {64line = f.readLine();65}66catch (IOException e) { break loop; }67if (line == null) break loop;68SpecialCaseMap item = parse(line.trim());69if (item != null) {70if(item.getCharSource() >> 16 < plane) continue;71if(item.getCharSource() >> 16 > plane) break;72caseMaps.add(item);73++count;74}7576}77caseMaps.trimToSize();78SpecialCaseMap[] result = new SpecialCaseMap[caseMaps.size()];79caseMaps.toArray(result);80Arrays.sort(result);81return result;8283}8485/**86* Given one line of a Unicode special casing data file as a String, parse the line87* and return a SpecialCaseMap object that contains the case mapping.88*89* @param s a line of the Unicode special case map data file to be parsed90* @return a SpecialCaseMap object, or null if the parsing process failed for some reason91*/92public static SpecialCaseMap parse(String s) {93SpecialCaseMap spec = null;94String[] tokens = new String[REQUIRED_FIELDS];95if ( s != null && s.length() != 0 && s.charAt(0) != '#') {96try {97int x = 0, tokenStart = 0, tokenEnd = 0;98for (x=0; x<REQUIRED_FIELDS-1; x++) {99tokenEnd = s.indexOf(';', tokenStart);100tokens[x] = s.substring(tokenStart, tokenEnd);101tokenStart = tokenEnd+1;102}103tokens[x] = s.substring(tokenStart);104105if(tokens[FIELD_CONDITIONS].indexOf(';') == -1) {106spec = new SpecialCaseMap();107spec.setCharSource(parseChar(tokens[FIELD_SOURCE]));108spec.setUpperCaseMap(parseCaseMap(tokens[FIELD_UPPERCASE]));109spec.setLowerCaseMap(parseCaseMap(tokens[FIELD_LOWERCASE]));110spec.setTitleCaseMap(parseCaseMap(tokens[FIELD_TITLECASE]));111spec.setLocale(parseLocale(tokens[FIELD_CONDITIONS]));112spec.setContext(parseContext(tokens[FIELD_CONDITIONS]));113}114}115catch(Exception e) {116spec = null;117System.out.println("Error parsing spec line.");118}119}120return spec;121}122123static int parseChar(String token) throws NumberFormatException {124return Integer.parseInt(token, 16);125}126127static char[] parseCaseMap(String token ) throws NumberFormatException {128int pos = 0;129StringBuffer buff = new StringBuffer();130int start = 0, end = 0;131while(pos < token.length() ){132while(Character.isSpaceChar(token.charAt(pos++)));133--pos;134start = pos;135while(pos < token.length() && !Character.isSpaceChar(token.charAt(pos))) pos++;136end = pos;137int ch = parseChar(token.substring(start,end));138if (ch > 0xFFFF) {139buff.append(getHighSurrogate(ch));140buff.append(getLowSurrogate(ch));141} else {142buff.append((char)ch);143}144}145char[] map = new char[buff.length()];146buff.getChars(0, buff.length(), map, 0);147return map;148}149150static Locale parseLocale(String token) {151return null;152}153154static String[] parseContext(String token) {155return null;156}157158static int find(int ch, SpecialCaseMap[] map) {159if ((map == null) || (map.length == 0)) {160return -1;161}162int top, bottom, current;163bottom = 0;164top = map.length;165current = top/2;166// invariant: top > current >= bottom && ch >= map.chSource167while (top - bottom > 1) {168if (ch >= map[current].getCharSource()) {169bottom = current;170} else {171top = current;172}173current = (top + bottom) / 2;174}175if (ch == map[current].getCharSource()) return current;176else return -1;177}178179/*180* Extracts and returns the high surrogate value from a UTF-32 code point.181* If argument is a BMP character, then it is converted to a char and returned;182* otherwise the high surrogate value is extracted.183* @param codePoint a UTF-32 codePoint with value greater than 0xFFFF.184* @return the high surrogate value that helps create <code>codePoint</code>; else185* the char representation of <code>codePoint</code> if it is a BMP character.186* @since 1.5187*/188static char getHighSurrogate(int codePoint) {189char high = (char)codePoint;190if (codePoint > 0xFFFF) {191high = (char)((codePoint - 0x10000)/0x0400 + 0xD800);192}193return high;194}195196197/*198* Extracts and returns the low surrogate value from a UTF-32 code point.199* If argument is a BMP character, then it is converted to a char and returned;200* otherwise the high surrogate value is extracted.201* @param codePoint a UTF-32 codePoint with value greater than 0xFFFF.202* @return the low surrogate value that helps create <code>codePoint</code>; else203* the char representation of <code>codePoint</code> if it is a BMP character.204* @since 1.5205*/206static char getLowSurrogate(int codePoint) {207char low = (char)codePoint;208if(codePoint > 0xFFFF) {209low = (char)((codePoint - 0x10000)%0x0400 + 0xDC00);210}211return low;212}213214static String hex6(int n) {215String str = Integer.toHexString(n & 0xFFFFFF).toUpperCase();216return "000000".substring(Math.min(6, str.length())) + str;217}218219static String hex6(char[] map){220StringBuffer buff = new StringBuffer();221int x=0;222buff.append(hex6(map[x++]));223while(x<map.length) {224buff.append(" " + hex6(map[x++]));225}226return buff.toString();227}228229void setCharSource(int ch) {230chSource = ch;231}232233void setLowerCaseMap(char[] map) {234lowerCaseMap = map;235}236237void setUpperCaseMap(char[] map) {238upperCaseMap = map;239}240241void setTitleCaseMap(char[] map) {242titleCaseMap = map;243}244245void setLocale(Locale locale) {246this.locale = locale;247}248249void setContext(String[] context) {250this.context = context;251}252253public int getCharSource() {254return chSource;255}256257public char[] getLowerCaseMap() {258return lowerCaseMap;259}260261public char[] getUpperCaseMap() {262return upperCaseMap;263}264265public char[] getTitleCaseMap() {266return titleCaseMap;267}268269public Locale getLocale() {270return locale;271}272273public String[] getContext() {274return context;275}276277278int chSource;279Locale locale;280char[] lowerCaseMap;281char[] upperCaseMap;282char[] titleCaseMap;283String[] context;284285/**286* Fields that can be found in the SpecialCasing.txt file.287*/288static int REQUIRED_FIELDS = 5;289static int FIELD_SOURCE = 0;290static int FIELD_LOWERCASE = 1;291static int FIELD_TITLECASE = 2;292static int FIELD_UPPERCASE = 3;293static int FIELD_CONDITIONS = 4;294295/**296* Context values297*/298static String CONTEXT_FINAL = "FINAL";299static String CONTEXT_NONFINAL = "NON_FINAL";300static String CONTEXT_MODERN = "MODERN";301static String CONTEXT_NONMODERN = "NON_MODERN";302303public int compareTo(SpecialCaseMap otherObject) {304if (chSource < otherObject.chSource) {305return -1;306}307else if (chSource > otherObject.chSource) {308return 1;309}310else return 0;311}312313public boolean equals(Object o1) {314if (this == o1) {315return true;316}317if (o1 == null || !(o1 instanceof SpecialCaseMap)) {318return false;319}320SpecialCaseMap other = (SpecialCaseMap)o1;321boolean bEqual = false;322if (0 == compareTo(other)) {323bEqual = true;324}325return bEqual;326}327328public String toString() {329StringBuffer buff = new StringBuffer();330buff.append(hex6(getCharSource()));331buff.append("|" + hex6(lowerCaseMap));332buff.append("|" + hex6(upperCaseMap));333buff.append("|" + hex6(titleCaseMap));334buff.append("|" + context);335return buff.toString();336}337338public int hashCode() {339return chSource;340}341342public static void main(String[] args) {343SpecialCaseMap[] spec = null;344if (args.length == 2 ) {345try {346File file = new File(args[0]);347int plane = Integer.parseInt(args[1]);348spec = SpecialCaseMap.readSpecFile(file, plane);349System.out.println("SpecialCaseMap[" + spec.length + "]:");350for (int x=0; x<spec.length; x++) {351System.out.println(spec[x].toString());352}353}354catch(Exception e) {355e.printStackTrace();356}357}358359}360361}362363364