Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/make/src/classes/build/tools/generatecharacter/Utility.java
32287 views
/*1* Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation. Oracle designates this7* particular file as subject to the "Classpath" exception as provided8* by Oracle in the LICENSE file that accompanied this code.9*10* This code is distributed in the hope that it will be useful, but WITHOUT11* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or12* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License13* version 2 for more details (a copy is included in the LICENSE file that14* accompanied this code).15*16* You should have received a copy of the GNU General Public License version17* 2 along with this work; if not, write to the Free Software Foundation,18* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.19*20* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA21* or visit www.oracle.com if you need additional information or have any22* questions.23*/2425package build.tools.generatecharacter;2627import java.text.*;28import java.util.*;2930public class Utility {31static byte peekByte(String s, int index) {32char c = s.charAt(index/2);33return ((index&1)==0)?(byte)(c>>8):(byte)c;34}3536static short peekShort(String s, int index) {37return (short)s.charAt(index);38}3940static int peekInt(String s, int index) {41index *= 2;42return (((int)s.charAt(index)) << 16) | s.charAt(index+1);43}4445static void poke(String s, int index, byte value) {46int mask = 0xFF00;47int ivalue = value;48if ((index&1)==0) {49ivalue <<= 8;50mask = 0x00FF;51}52index /= 2;53if (index == s.length()) {54s = s + (char)ivalue;55}56else if (index == 0) {57s = (char)(ivalue|(s.charAt(0)&mask)) + s.substring(1);58}59else {60s = s.substring(0, index) + (char)(ivalue|(s.charAt(index)&mask))61+ s.substring(index+1);62}63}6465static void poke(String s, int index, short value) {66if (index == s.length()) {67s = s + (char)value;68}69else if (index == 0) {70s = (char)value + s.substring(1);71}72else {73s = s.substring(0, index) + (char)value + s.substring(index+1);74}75}7677static void poke(String s, int index, int value) {78index *= 2;79char hi = (char)(value >> 16);80if (index == s.length()) {81s = s + hi + (char)value;82}83else if (index == 0) {84s = hi + (char)value + s.substring(2);85}86else {87s = s.substring(0, index) + hi + (char)value + s.substring(index+2);88}89}9091/**92* The ESCAPE character is used during run-length encoding. It signals93* a run of identical chars.94*/95static final char ESCAPE = '\uA5A5';9697/**98* The ESCAPE_BYTE character is used during run-length encoding. It signals99* a run of identical bytes.100*/101static final byte ESCAPE_BYTE = (byte)0xA5;102103/**104* Construct a string representing a short array. Use run-length encoding.105* A character represents itself, unless it is the ESCAPE character. Then106* the following notations are possible:107* ESCAPE ESCAPE ESCAPE literal108* ESCAPE n c n instances of character c109* Since an encoded run occupies 3 characters, we only encode runs of 4 or110* more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.111* If we encounter a run where n == ESCAPE, we represent this as:112* c ESCAPE n-1 c113* The ESCAPE value is chosen so as not to collide with commonly114* seen values.115*/116static final String arrayToRLEString(short[] a) {117StringBuffer buffer = new StringBuffer();118// for (int i=0; i<a.length; ++i) buffer.append((char) a[i]);119buffer.append((char) (a.length >> 16));120buffer.append((char) a.length);121short runValue = a[0];122int runLength = 1;123for (int i=1; i<a.length; ++i) {124short s = a[i];125if (s == runValue && runLength < 0xFFFF) ++runLength;126else {127encodeRun(buffer, runValue, runLength);128runValue = s;129runLength = 1;130}131}132encodeRun(buffer, runValue, runLength);133return buffer.toString();134}135136/**137* Construct a string representing a byte array. Use run-length encoding.138* Two bytes are packed into a single char, with a single extra zero byte at139* the end if needed. A byte represents itself, unless it is the140* ESCAPE_BYTE. Then the following notations are possible:141* ESCAPE_BYTE ESCAPE_BYTE ESCAPE_BYTE literal142* ESCAPE_BYTE n b n instances of byte b143* Since an encoded run occupies 3 bytes, we only encode runs of 4 or144* more bytes. Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF.145* If we encounter a run where n == ESCAPE_BYTE, we represent this as:146* b ESCAPE_BYTE n-1 b147* The ESCAPE_BYTE value is chosen so as not to collide with commonly148* seen values.149*/150static final String arrayToRLEString(byte[] a) {151StringBuffer buffer = new StringBuffer();152buffer.append((char) (a.length >> 16));153buffer.append((char) a.length);154byte runValue = a[0];155int runLength = 1;156byte[] state = new byte[2];157for (int i=1; i<a.length; ++i) {158byte b = a[i];159if (b == runValue && runLength < 0xFF) ++runLength;160else {161encodeRun(buffer, runValue, runLength, state);162runValue = b;163runLength = 1;164}165}166encodeRun(buffer, runValue, runLength, state);167168// We must save the final byte, if there is one, by padding169// an extra zero.170if (state[0] != 0) appendEncodedByte(buffer, (byte)0, state);171172return buffer.toString();173}174175/**176* Encode a run, possibly a degenerate run (of < 4 values).177* @param length The length of the run; must be > 0 && <= 0xFFFF.178*/179private static final void encodeRun(StringBuffer buffer, short value, int length) {180if (length < 4) {181for (int j=0; j<length; ++j) {182if (value == (int) ESCAPE) buffer.append(ESCAPE);183buffer.append((char) value);184}185}186else {187if (length == (int) ESCAPE) {188if (value == (int) ESCAPE) buffer.append(ESCAPE);189buffer.append((char) value);190--length;191}192buffer.append(ESCAPE);193buffer.append((char) length);194buffer.append((char) value); // Don't need to escape this value195}196}197198/**199* Encode a run, possibly a degenerate run (of < 4 values).200* @param length The length of the run; must be > 0 && <= 0xFF.201*/202private static final void encodeRun(StringBuffer buffer, byte value, int length,203byte[] state) {204if (length < 4) {205for (int j=0; j<length; ++j) {206if (value == ESCAPE_BYTE) appendEncodedByte(buffer, ESCAPE_BYTE, state);207appendEncodedByte(buffer, value, state);208}209}210else {211if (length == ESCAPE_BYTE) {212if (value == ESCAPE_BYTE) appendEncodedByte(buffer, ESCAPE_BYTE, state);213appendEncodedByte(buffer, value, state);214--length;215}216appendEncodedByte(buffer, ESCAPE_BYTE, state);217appendEncodedByte(buffer, (byte)length, state);218appendEncodedByte(buffer, value, state); // Don't need to escape this value219}220}221222/**223* Append a byte to the given StringBuffer, packing two bytes into each224* character. The state parameter maintains intermediary data between225* calls.226* @param state A two-element array, with state[0] == 0 if this is the227* first byte of a pair, or state[0] != 0 if this is the second byte228* of a pair, in which case state[1] is the first byte.229*/230private static final void appendEncodedByte(StringBuffer buffer, byte value,231byte[] state) {232if (state[0] != 0) {233char c = (char) ((state[1] << 8) | (((int) value) & 0xFF));234buffer.append(c);235state[0] = 0;236}237else {238state[0] = 1;239state[1] = value;240}241}242243/**244* Construct an array of shorts from a run-length encoded string.245*/246static final short[] RLEStringToShortArray(String s) {247int length = (((int) s.charAt(0)) << 16) | ((int) s.charAt(1));248short[] array = new short[length];249int ai = 0;250for (int i=2; i<s.length(); ++i) {251char c = s.charAt(i);252if (c == ESCAPE) {253c = s.charAt(++i);254if (c == ESCAPE) array[ai++] = (short) c;255else {256int runLength = (int) c;257short runValue = (short) s.charAt(++i);258for (int j=0; j<runLength; ++j) array[ai++] = runValue;259}260}261else {262array[ai++] = (short) c;263}264}265266if (ai != length)267throw new InternalError("Bad run-length encoded short array");268269return array;270}271272/**273* Construct an array of bytes from a run-length encoded string.274*/275static final byte[] RLEStringToByteArray(String s) {276int length = (((int) s.charAt(0)) << 16) | ((int) s.charAt(1));277byte[] array = new byte[length];278boolean nextChar = true;279char c = 0;280int node = 0;281int runLength = 0;282int i = 2;283for (int ai=0; ai<length; ) {284// This part of the loop places the next byte into the local285// variable 'b' each time through the loop. It keeps the286// current character in 'c' and uses the boolean 'nextChar'287// to see if we've taken both bytes out of 'c' yet.288byte b;289if (nextChar) {290c = s.charAt(i++);291b = (byte) (c >> 8);292nextChar = false;293}294else {295b = (byte) (c & 0xFF);296nextChar = true;297}298299// This part of the loop is a tiny state machine which handles300// the parsing of the run-length encoding. This would be simpler301// if we could look ahead, but we can't, so we use 'node' to302// move between three nodes in the state machine.303switch (node) {304case 0:305// Normal idle node306if (b == ESCAPE_BYTE) {307node = 1;308}309else {310array[ai++] = b;311}312break;313case 1:314// We have seen one ESCAPE_BYTE; we expect either a second315// one, or a run length and value.316if (b == ESCAPE_BYTE) {317array[ai++] = ESCAPE_BYTE;318node = 0;319}320else {321runLength = b;322// Interpret signed byte as unsigned323if (runLength < 0) runLength += 0x100;324node = 2;325}326break;327case 2:328// We have seen an ESCAPE_BYTE and length byte. We interpret329// the next byte as the value to be repeated.330for (int j=0; j<runLength; ++j) array[ai++] = b;331node = 0;332break;333}334}335336if (node != 0)337throw new InternalError("Bad run-length encoded byte array");338339if (i != s.length())340throw new InternalError("Excess data in RLE byte array string");341342return array;343}344345/**346* Format a String for representation in a source file. This includes347* breaking it into lines escaping characters using octal notation348* when necessary (control characters and double quotes).349*/350static final String formatForSource(String s) {351return formatForSource(s, " ");352}353354/**355* Format a String for representation in a source file. This includes356* breaking it into lines escaping characters using octal notation357* when necessary (control characters and double quotes).358*/359static final String formatForSource(String s, String indent) {360StringBuffer buffer = new StringBuffer();361for (int i=0; i<s.length();) {362if (i > 0) buffer.append("+\n");363int limit = buffer.length() + 78; // Leave 2 for trailing <"+>364buffer.append(indent + '"');365while (i<s.length() && buffer.length()<limit) {366char c = s.charAt(i++);367/* This works too but it's kind of unnecessary; might as368well keep things simple.369if (c == '\\' || c == '"') {370// Escape backslash and double-quote. Don't need to371// escape single-quote.372buffer.append("\\" + c);373}374else if (c >= '\u0020' && c <= '\u007E') {375// Printable ASCII ranges from ' ' to '~'376buffer.append(c);377}378else379*/380if (c <= '\377') {381// Represent control characters382// using octal notation; otherwise the string we form383// won't compile, since Unicode escape sequences are384// processed before tokenization.385buffer.append('\\');386buffer.append(HEX_DIGIT[(c & 0700) >> 6]); // HEX_DIGIT works for octal387buffer.append(HEX_DIGIT[(c & 0070) >> 3]);388buffer.append(HEX_DIGIT[(c & 0007)]);389}390else {391// Handle the rest with Unicode392buffer.append("\\u");393buffer.append(HEX_DIGIT[(c & 0xF000) >> 12]);394buffer.append(HEX_DIGIT[(c & 0x0F00) >> 8]);395buffer.append(HEX_DIGIT[(c & 0x00F0) >> 4]);396buffer.append(HEX_DIGIT[(c & 0x000F)]);397}398}399buffer.append('"');400}401return buffer.toString();402}403404static final char[] HEX_DIGIT = {'0','1','2','3','4','5','6','7',405'8','9','A','B','C','D','E','F'};406}407408409