Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/src/share/classes/sun/nio/cs/CharsetMapping.java
38918 views
/*1* Copyright (c) 2008, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation. Oracle designates this7* particular file as subject to the "Classpath" exception as provided8* by Oracle in the LICENSE file that accompanied this code.9*10* This code is distributed in the hope that it will be useful, but WITHOUT11* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or12* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License13* version 2 for more details (a copy is included in the LICENSE file that14* accompanied this code).15*16* You should have received a copy of the GNU General Public License version17* 2 along with this work; if not, write to the Free Software Foundation,18* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.19*20* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA21* or visit www.oracle.com if you need additional information or have any22* questions.23*/2425package sun.nio.cs;2627import java.io.InputStream;28import java.io.InputStreamReader;29import java.io.OutputStream;30import java.io.BufferedReader;31import java.io.IOException;32import java.util.regex.Matcher;33import java.util.regex.Pattern;34import java.util.*;35import java.security.*;3637public class CharsetMapping {38public final static char UNMAPPABLE_DECODING = '\uFFFD';39public final static int UNMAPPABLE_ENCODING = 0xFFFD;4041char[] b2cSB; //singlebyte b->c42char[] b2cDB1; //dobulebyte b->c /db143char[] b2cDB2; //dobulebyte b->c /db24445int b2Min, b2Max; //min/max(start/end) value of 2nd byte46int b1MinDB1, b1MaxDB1; //min/Max(start/end) value of 1st byte/db147int b1MinDB2, b1MaxDB2; //min/Max(start/end) value of 1st byte/db248int dbSegSize;4950char[] c2b;51char[] c2bIndex;5253// Supplementary54char[] b2cSupp;55char[] c2bSupp;5657// Composite58Entry[] b2cComp;59Entry[] c2bComp;6061public char decodeSingle(int b) {62return b2cSB[b];63}6465public char decodeDouble(int b1, int b2) {66if (b2 >= b2Min && b2 < b2Max) {67b2 -= b2Min;68if (b1 >= b1MinDB1 && b1 <= b1MaxDB1) {69b1 -= b1MinDB1;70return b2cDB1[b1 * dbSegSize + b2];71}72if (b1 >= b1MinDB2 && b1 <= b1MaxDB2) {73b1 -= b1MinDB2;74return b2cDB2[b1 * dbSegSize + b2];75}76}77return UNMAPPABLE_DECODING;78}7980// for jis0213 all supplementary characters are in 0x2xxxx range,81// so only the xxxx part is now stored, should actually store the82// codepoint value instead.83public char[] decodeSurrogate(int db, char[] cc) {84int end = b2cSupp.length / 2;85int i = Arrays.binarySearch(b2cSupp, 0, end, (char)db);86if (i >= 0) {87Character.toChars(b2cSupp[end + i] + 0x20000, cc, 0);88return cc;89}90return null;91}9293public char[] decodeComposite(Entry comp, char[] cc) {94int i = findBytes(b2cComp, comp);95if (i >= 0) {96cc[0] = (char)b2cComp[i].cp;97cc[1] = (char)b2cComp[i].cp2;98return cc;99}100return null;101}102103public int encodeChar(char ch) {104int index = c2bIndex[ch >> 8];105if (index == 0xffff)106return UNMAPPABLE_ENCODING;107return c2b[index + (ch & 0xff)];108}109110public int encodeSurrogate(char hi, char lo) {111int cp = Character.toCodePoint(hi, lo);112if (cp < 0x20000 || cp >= 0x30000)113return UNMAPPABLE_ENCODING;114int end = c2bSupp.length / 2;115int i = Arrays.binarySearch(c2bSupp, 0, end, (char)cp);116if (i >= 0)117return c2bSupp[end + i];118return UNMAPPABLE_ENCODING;119}120121public boolean isCompositeBase(Entry comp) {122if (comp.cp <= 0x31f7 && comp.cp >= 0xe6) {123return (findCP(c2bComp, comp) >= 0);124}125return false;126}127128public int encodeComposite(Entry comp) {129int i = findComp(c2bComp, comp);130if (i >= 0)131return c2bComp[i].bs;132return UNMAPPABLE_ENCODING;133}134135// init the CharsetMapping object from the .dat binary file136public static CharsetMapping get(final InputStream is) {137return AccessController.doPrivileged(new PrivilegedAction<CharsetMapping>() {138public CharsetMapping run() {139return new CharsetMapping().load(is);140}141});142}143144public static class Entry {145public int bs; //byte sequence reps146public int cp; //Unicode codepoint147public int cp2; //CC of composite148}149150static Comparator<Entry> comparatorBytes =151new Comparator<Entry>() {152public int compare(Entry m1, Entry m2) {153return m1.bs - m2.bs;154}155public boolean equals(Object obj) {156return this == obj;157}158};159160static Comparator<Entry> comparatorCP =161new Comparator<Entry>() {162public int compare(Entry m1, Entry m2) {163return m1.cp - m2.cp;164}165public boolean equals(Object obj) {166return this == obj;167}168};169170static Comparator<Entry> comparatorComp =171new Comparator<Entry>() {172public int compare(Entry m1, Entry m2) {173int v = m1.cp - m2.cp;174if (v == 0)175v = m1.cp2 - m2.cp2;176return v;177}178public boolean equals(Object obj) {179return this == obj;180}181};182183static int findBytes(Entry[] a, Entry k) {184return Arrays.binarySearch(a, 0, a.length, k, comparatorBytes);185}186187static int findCP(Entry[] a, Entry k) {188return Arrays.binarySearch(a, 0, a.length, k, comparatorCP);189}190191static int findComp(Entry[] a, Entry k) {192return Arrays.binarySearch(a, 0, a.length, k, comparatorComp);193}194195/*****************************************************************************/196// tags of different charset mapping tables197private final static int MAP_SINGLEBYTE = 0x1; // 0..256 : c198private final static int MAP_DOUBLEBYTE1 = 0x2; // min..max: c199private final static int MAP_DOUBLEBYTE2 = 0x3; // min..max: c [DB2]200private final static int MAP_SUPPLEMENT = 0x5; // db,c201private final static int MAP_SUPPLEMENT_C2B = 0x6; // c,db202private final static int MAP_COMPOSITE = 0x7; // db,base,cc203private final static int MAP_INDEXC2B = 0x8; // index table of c->bb204205private static final boolean readNBytes(InputStream in, byte[] bb, int N)206throws IOException207{208int off = 0;209while (N > 0) {210int n = in.read(bb, off, N);211if (n == -1)212return false;213N = N - n;214off += n;215}216return true;217}218219int off = 0;220byte[] bb;221private char[] readCharArray() {222// first 2 bytes are the number of "chars" stored in this table223int size = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);224char [] cc = new char[size];225for (int i = 0; i < size; i++) {226cc[i] = (char)(((bb[off++]&0xff)<<8) | (bb[off++]&0xff));227}228return cc;229}230231void readSINGLEBYTE() {232char[] map = readCharArray();233for (int i = 0; i < map.length; i++) {234char c = map[i];235if (c != UNMAPPABLE_DECODING) {236c2b[c2bIndex[c >> 8] + (c&0xff)] = (char)i;237}238}239b2cSB = map;240}241242void readINDEXC2B() {243char[] map = readCharArray();244for (int i = map.length - 1; i >= 0; i--) {245if (c2b == null && map[i] != -1) {246c2b = new char[map[i] + 256];247Arrays.fill(c2b, (char)UNMAPPABLE_ENCODING);248break;249}250}251c2bIndex = map;252}253254char[] readDB(int b1Min, int b2Min, int segSize) {255char[] map = readCharArray();256for (int i = 0; i < map.length; i++) {257char c = map[i];258if (c != UNMAPPABLE_DECODING) {259int b1 = i / segSize;260int b2 = i % segSize;261int b = (b1 + b1Min)* 256 + (b2 + b2Min);262//System.out.printf(" DB %x\t%x%n", b, c & 0xffff);263c2b[c2bIndex[c >> 8] + (c&0xff)] = (char)(b);264}265}266return map;267}268269void readDOUBLEBYTE1() {270b1MinDB1 = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);271b1MaxDB1 = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);272b2Min = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);273b2Max = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);274dbSegSize = b2Max - b2Min + 1;275b2cDB1 = readDB(b1MinDB1, b2Min, dbSegSize);276}277278void readDOUBLEBYTE2() {279b1MinDB2 = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);280b1MaxDB2 = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);281b2Min = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);282b2Max = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);283dbSegSize = b2Max - b2Min + 1;284b2cDB2 = readDB(b1MinDB2, b2Min, dbSegSize);285}286287void readCOMPOSITE() {288char[] map = readCharArray();289int mLen = map.length/3;290b2cComp = new Entry[mLen];291c2bComp = new Entry[mLen];292for (int i = 0, j= 0; i < mLen; i++) {293Entry m = new Entry();294m.bs = map[j++];295m.cp = map[j++];296m.cp2 = map[j++];297b2cComp[i] = m;298c2bComp[i] = m;299}300Arrays.sort(c2bComp, 0, c2bComp.length, comparatorComp);301}302303CharsetMapping load(InputStream in) {304try {305// The first 4 bytes are the size of the total data followed in306// this .dat file.307int len = ((in.read()&0xff) << 24) | ((in.read()&0xff) << 16) |308((in.read()&0xff) << 8) | (in.read()&0xff);309bb = new byte[len];310off = 0;311//System.out.printf("In : Total=%d%n", len);312// Read in all bytes313if (!readNBytes(in, bb, len))314throw new RuntimeException("Corrupted data file");315in.close();316317while (off < len) {318int type = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);319switch(type) {320case MAP_INDEXC2B:321readINDEXC2B();322break;323case MAP_SINGLEBYTE:324readSINGLEBYTE();325break;326case MAP_DOUBLEBYTE1:327readDOUBLEBYTE1();328break;329case MAP_DOUBLEBYTE2:330readDOUBLEBYTE2();331break;332case MAP_SUPPLEMENT:333b2cSupp = readCharArray();334break;335case MAP_SUPPLEMENT_C2B:336c2bSupp = readCharArray();337break;338case MAP_COMPOSITE:339readCOMPOSITE();340break;341default:342throw new RuntimeException("Corrupted data file");343}344}345bb = null;346return this;347} catch (IOException x) {348x.printStackTrace();349return null;350}351}352}353354355