Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/make/src/classes/build/tools/generatecharacter/CharacterScript.java
32287 views
package build.tools.generatecharacter;12import java.util.regex.*;3import java.util.*;4import java.io.*;56public class CharacterScript {78// generate the code needed for j.l.C.UnicodeScript9static void fortest(String fmt, Object... o) {10//System.out.printf(fmt, o);11}1213static void print(String fmt, Object... o) {14System.out.printf(fmt, o);15}1617static void debug(String fmt, Object... o) {18//System.out.printf(fmt, o);19}2021public static void main(String args[]){22try {23if (args.length != 1) {24System.out.println("java CharacterScript script.txt out");25System.exit(1);26}2728int i, j;29BufferedReader sbfr = new BufferedReader(new FileReader(args[0]));30HashMap<String,Integer> scriptMap = new HashMap<String,Integer>();31String line = null;3233Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s+;\\s+(\\w+)\\s+#.*").matcher("");3435int prevS = -1;36int prevE = -1;37String prevN = null;38int[][] scripts = new int[1024][3];39int scriptSize = 0;4041while ((line = sbfr.readLine()) != null) {42if (line.length() <= 1 || line.charAt(0) == '#') {43continue;44}45m.reset(line);46if (m.matches()) {47int start = Integer.parseInt(m.group(1), 16);48int end = (m.group(2)==null)?start49:Integer.parseInt(m.group(2), 16);50String name = m.group(3);51if (name.equals(prevN) && start == prevE + 1) {52prevE = end;53} else {54if (prevS != -1) {55if (scriptMap.get(prevN) == null) {56scriptMap.put(prevN, scriptMap.size());57}58scripts[scriptSize][0] = prevS;59scripts[scriptSize][1] = prevE;60scripts[scriptSize][2] = scriptMap.get(prevN);61scriptSize++;62}63debug("%x-%x\t%s%n", prevS, prevE, prevN);64prevS = start; prevE = end; prevN = name;65}66} else {67debug("Warning: Unrecognized line <%s>%n", line);68}69}7071//last one.72if (scriptMap.get(prevN) == null) {73scriptMap.put(prevN, scriptMap.size());74}75scripts[scriptSize][0] = prevS;76scripts[scriptSize][1] = prevE;77scripts[scriptSize][2] = scriptMap.get(prevN);78scriptSize++;7980debug("%x-%x\t%s%n", prevS, prevE, prevN);81debug("-----------------%n");82debug("Total scripts=%s%n", scriptMap.size());83debug("-----------------%n%n");8485String[] names = new String[scriptMap.size()];86for (String name: scriptMap.keySet()) {87names[scriptMap.get(name).intValue()] = name;88}8990for (j = 0; j < scriptSize; j++) {91for (int cp = scripts[j][0]; cp <= scripts[j][1]; cp++) {92String name = names[scripts[j][2]].toUpperCase(Locale.ENGLISH);;93if (cp > 0xffff)94System.out.printf("%05X %s%n", cp, name);95else96System.out.printf("%05X %s%n", cp, name);97}98}99100Arrays.sort(scripts, 0, scriptSize,101new Comparator<int[]>() {102public int compare(int[] a1, int[] a2) {103return a1[0] - a2[0];104}105public boolean compare(Object obj) {106return obj == this;107}108});109110111112// Consolidation: there are lots of "reserved" code points113// embedded in those otherwise "sequential" blocks.114// To make the lookup table smaller, we combine those115// separated segments with the assumption that the lookup116// implementation checks117// Character.getType() != Character.UNASSIGNED118// first (return UNKNOWN for unassigned)119120ArrayList<int[]> list = new ArrayList();121list.add(scripts[0]);122123int[] last = scripts[0];124for (i = 1; i < scriptSize; i++) {125if (scripts[i][0] != (last[1] + 1)) {126127boolean isNotUnassigned = false;128for (int cp = last[1] + 1; cp < scripts[i][0]; cp++) {129if (Character.getType(cp) != Character.UNASSIGNED) {130isNotUnassigned = true;131debug("Warning: [%x] is ASSIGNED but in NON script%n", cp);132break;133}134}135if (isNotUnassigned) {136// surrogates only?137int[] a = new int[3];138a[0] = last[1] + 1;139a[1] = scripts[i][0] - 1;140a[2] = -1; // unknown141list.add(a);142} else {143if (last[2] == scripts[i][2]) {144//combine145last[1] = scripts[i][1];146continue;147} else {148// expand last149last[1] = scripts[i][0] - 1;150}151}152}153list.add(scripts[i]);154last = scripts[i];155}156157for (i = 0; i < list.size(); i++) {158int[] a = (int[])list.get(i);159String name = "UNKNOWN";160if (a[2] != -1)161name = names[a[2]].toUpperCase(Locale.US);162debug("0x%05x, 0x%05x %s%n", a[0], a[1], name);163}164debug("--->total=%d%n", list.size());165166167//////////////////OUTPUT//////////////////////////////////168print("public class Scripts {%n%n");169print(" public static enum UnicodeScript {%n");170for (i = 0; i < names.length; i++) {171print(" /**%n * Unicode script \"%s\".%n */%n", names[i]);172print(" %s,%n%n", names[i].toUpperCase(Locale.US));173}174print(" /**%n * Unicode script \"Unknown\".%n */%n UNKNOWN;%n%n");175176177// lookup table178print(" private static final int[] scriptStarts = {%n");179for (int[] a : list) {180String name = "UNKNOWN";181if (a[2] != -1)182name = names[a[2]].toUpperCase(Locale.US);183if (a[0] < 0x10000)184print(" 0x%04X, // %04X..%04X; %s%n",185a[0], a[0], a[1], name);186else187print(" 0x%05X, // %05X..%05X; %s%n",188a[0], a[0], a[1], name);189}190last = list.get(list.size() -1);191if (last[1] != Character.MAX_CODE_POINT)192print(" 0x%05X // %05X..%06X; %s%n",193last[1] + 1, last[1] + 1, Character.MAX_CODE_POINT,194"UNKNOWN");195print("%n };%n%n");196197print(" private static final UnicodeScript[] scripts = {%n");198for (int[] a : list) {199String name = "UNKNOWN";200if (a[2] != -1)201name = names[a[2]].toUpperCase(Locale.US);202print(" %s,%n", name);203}204205if (last[1] != Character.MAX_CODE_POINT)206print(" UNKNOWN%n");207print(" };%n");208print(" }%n");209print("}%n");210211} catch (Exception e) {212e.printStackTrace();213}214}215}216217218