Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/src/share/classes/sun/nio/cs/CESU_8.java
38918 views
/*1* Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation. Oracle designates this7* particular file as subject to the "Classpath" exception as provided8* by Oracle in the LICENSE file that accompanied this code.9*10* This code is distributed in the hope that it will be useful, but WITHOUT11* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or12* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License13* version 2 for more details (a copy is included in the LICENSE file that14* accompanied this code).15*16* You should have received a copy of the GNU General Public License version17* 2 along with this work; if not, write to the Free Software Foundation,18* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.19*20* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA21* or visit www.oracle.com if you need additional information or have any22* questions.23*/2425package sun.nio.cs;2627import java.nio.Buffer;28import java.nio.ByteBuffer;29import java.nio.CharBuffer;30import java.nio.charset.Charset;31import java.nio.charset.CharsetDecoder;32import java.nio.charset.CharsetEncoder;33import java.nio.charset.CoderResult;34import java.nio.charset.CodingErrorAction;3536/* Legal CESU-8 Byte Sequences37*38* # Code Points Bits Bit/Byte pattern39* 1 7 0xxxxxxx40* U+0000..U+007F 00..7F41*42* 2 11 110xxxxx 10xxxxxx43* U+0080..U+07FF C2..DF 80..BF44*45* 3 16 1110xxxx 10xxxxxx 10xxxxxx46* U+0800..U+0FFF E0 A0..BF 80..BF47* U+1000..U+FFFF E1..EF 80..BF 80..BF48*49*/5051class CESU_8 extends Unicode52{53public CESU_8() {54super("CESU-8", StandardCharsets.aliases_CESU_8);55}5657public String historicalName() {58return "CESU8";59}6061public CharsetDecoder newDecoder() {62return new Decoder(this);63}6465public CharsetEncoder newEncoder() {66return new Encoder(this);67}6869private static final void updatePositions(Buffer src, int sp,70Buffer dst, int dp) {71src.position(sp - src.arrayOffset());72dst.position(dp - dst.arrayOffset());73}7475private static class Decoder extends CharsetDecoder76implements ArrayDecoder {77private Decoder(Charset cs) {78super(cs, 1.0f, 1.0f);79}8081private static boolean isNotContinuation(int b) {82return (b & 0xc0) != 0x80;83}8485// [E0] [A0..BF] [80..BF]86// [E1..EF] [80..BF] [80..BF]87private static boolean isMalformed3(int b1, int b2, int b3) {88return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||89(b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80;90}9192// only used when there is only one byte left in src buffer93private static boolean isMalformed3_2(int b1, int b2) {94return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||95(b2 & 0xc0) != 0x80;96}979899// [F0] [90..BF] [80..BF] [80..BF]100// [F1..F3] [80..BF] [80..BF] [80..BF]101// [F4] [80..8F] [80..BF] [80..BF]102// only check 80-be range here, the [0xf0,0x80...] and [0xf4,0x90-...]103// will be checked by Character.isSupplementaryCodePoint(uc)104private static boolean isMalformed4(int b2, int b3, int b4) {105return (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 ||106(b4 & 0xc0) != 0x80;107}108109// only used when there is less than 4 bytes left in src buffer110private static boolean isMalformed4_2(int b1, int b2) {111return (b1 == 0xf0 && b2 == 0x90) ||112(b2 & 0xc0) != 0x80;113}114115private static boolean isMalformed4_3(int b3) {116return (b3 & 0xc0) != 0x80;117}118119private static CoderResult malformedN(ByteBuffer src, int nb) {120switch (nb) {121case 1:122case 2: // always 1123return CoderResult.malformedForLength(1);124case 3:125int b1 = src.get();126int b2 = src.get(); // no need to lookup b3127return CoderResult.malformedForLength(128((b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||129isNotContinuation(b2)) ? 1 : 2);130case 4: // we don't care the speed here131b1 = src.get() & 0xff;132b2 = src.get() & 0xff;133if (b1 > 0xf4 ||134(b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) ||135(b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||136isNotContinuation(b2))137return CoderResult.malformedForLength(1);138if (isNotContinuation(src.get()))139return CoderResult.malformedForLength(2);140return CoderResult.malformedForLength(3);141default:142assert false;143return null;144}145}146147private static CoderResult malformed(ByteBuffer src, int sp,148CharBuffer dst, int dp,149int nb)150{151src.position(sp - src.arrayOffset());152CoderResult cr = malformedN(src, nb);153updatePositions(src, sp, dst, dp);154return cr;155}156157158private static CoderResult malformed(ByteBuffer src,159int mark, int nb)160{161src.position(mark);162CoderResult cr = malformedN(src, nb);163src.position(mark);164return cr;165}166167private static CoderResult malformedForLength(ByteBuffer src,168int sp,169CharBuffer dst,170int dp,171int malformedNB)172{173updatePositions(src, sp, dst, dp);174return CoderResult.malformedForLength(malformedNB);175}176177private static CoderResult malformedForLength(ByteBuffer src,178int mark,179int malformedNB)180{181src.position(mark);182return CoderResult.malformedForLength(malformedNB);183}184185186private static CoderResult xflow(Buffer src, int sp, int sl,187Buffer dst, int dp, int nb) {188updatePositions(src, sp, dst, dp);189return (nb == 0 || sl - sp < nb)190? CoderResult.UNDERFLOW : CoderResult.OVERFLOW;191}192193private static CoderResult xflow(Buffer src, int mark, int nb) {194src.position(mark);195return (nb == 0 || src.remaining() < nb)196? CoderResult.UNDERFLOW : CoderResult.OVERFLOW;197}198199private CoderResult decodeArrayLoop(ByteBuffer src,200CharBuffer dst)201{202// This method is optimized for ASCII input.203byte[] sa = src.array();204int sp = src.arrayOffset() + src.position();205int sl = src.arrayOffset() + src.limit();206207char[] da = dst.array();208int dp = dst.arrayOffset() + dst.position();209int dl = dst.arrayOffset() + dst.limit();210int dlASCII = dp + Math.min(sl - sp, dl - dp);211212// ASCII only loop213while (dp < dlASCII && sa[sp] >= 0)214da[dp++] = (char) sa[sp++];215while (sp < sl) {216int b1 = sa[sp];217if (b1 >= 0) {218// 1 byte, 7 bits: 0xxxxxxx219if (dp >= dl)220return xflow(src, sp, sl, dst, dp, 1);221da[dp++] = (char) b1;222sp++;223} else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {224// 2 bytes, 11 bits: 110xxxxx 10xxxxxx225if (sl - sp < 2 || dp >= dl)226return xflow(src, sp, sl, dst, dp, 2);227int b2 = sa[sp + 1];228if (isNotContinuation(b2))229return malformedForLength(src, sp, dst, dp, 1);230da[dp++] = (char) (((b1 << 6) ^ b2)231^232(((byte) 0xC0 << 6) ^233((byte) 0x80 << 0)));234sp += 2;235} else if ((b1 >> 4) == -2) {236// 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx237int srcRemaining = sl - sp;238if (srcRemaining < 3 || dp >= dl) {239if (srcRemaining > 1 && isMalformed3_2(b1, sa[sp + 1]))240return malformedForLength(src, sp, dst, dp, 1);241return xflow(src, sp, sl, dst, dp, 3);242}243int b2 = sa[sp + 1];244int b3 = sa[sp + 2];245if (isMalformed3(b1, b2, b3))246return malformed(src, sp, dst, dp, 3);247da[dp++] = (char)248((b1 << 12) ^249(b2 << 6) ^250(b3 ^251(((byte) 0xE0 << 12) ^252((byte) 0x80 << 6) ^253((byte) 0x80 << 0))));254sp += 3;255} else {256return malformed(src, sp, dst, dp, 1);257}258}259return xflow(src, sp, sl, dst, dp, 0);260}261262private CoderResult decodeBufferLoop(ByteBuffer src,263CharBuffer dst)264{265int mark = src.position();266int limit = src.limit();267while (mark < limit) {268int b1 = src.get();269if (b1 >= 0) {270// 1 byte, 7 bits: 0xxxxxxx271if (dst.remaining() < 1)272return xflow(src, mark, 1); // overflow273dst.put((char) b1);274mark++;275} else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {276// 2 bytes, 11 bits: 110xxxxx 10xxxxxx277if (limit - mark < 2|| dst.remaining() < 1)278return xflow(src, mark, 2);279int b2 = src.get();280if (isNotContinuation(b2))281return malformedForLength(src, mark, 1);282dst.put((char) (((b1 << 6) ^ b2)283^284(((byte) 0xC0 << 6) ^285((byte) 0x80 << 0))));286mark += 2;287} else if ((b1 >> 4) == -2) {288// 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx289int srcRemaining = limit - mark;290if (srcRemaining < 3 || dst.remaining() < 1) {291if (srcRemaining > 1 && isMalformed3_2(b1, src.get()))292return malformedForLength(src, mark, 1);293return xflow(src, mark, 3);294}295int b2 = src.get();296int b3 = src.get();297if (isMalformed3(b1, b2, b3))298return malformed(src, mark, 3);299dst.put((char)300((b1 << 12) ^301(b2 << 6) ^302(b3 ^303(((byte) 0xE0 << 12) ^304((byte) 0x80 << 6) ^305((byte) 0x80 << 0)))));306mark += 3;307} else {308return malformed(src, mark, 1);309}310}311return xflow(src, mark, 0);312}313314protected CoderResult decodeLoop(ByteBuffer src,315CharBuffer dst)316{317if (src.hasArray() && dst.hasArray())318return decodeArrayLoop(src, dst);319else320return decodeBufferLoop(src, dst);321}322323private static ByteBuffer getByteBuffer(ByteBuffer bb, byte[] ba, int sp)324{325if (bb == null)326bb = ByteBuffer.wrap(ba);327bb.position(sp);328return bb;329}330331// returns -1 if there is/are malformed byte(s) and the332// "action" for malformed input is not REPLACE.333public int decode(byte[] sa, int sp, int len, char[] da) {334final int sl = sp + len;335int dp = 0;336int dlASCII = Math.min(len, da.length);337ByteBuffer bb = null; // only necessary if malformed338339// ASCII only optimized loop340while (dp < dlASCII && sa[sp] >= 0)341da[dp++] = (char) sa[sp++];342343while (sp < sl) {344int b1 = sa[sp++];345if (b1 >= 0) {346// 1 byte, 7 bits: 0xxxxxxx347da[dp++] = (char) b1;348} else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {349// 2 bytes, 11 bits: 110xxxxx 10xxxxxx350if (sp < sl) {351int b2 = sa[sp++];352if (isNotContinuation(b2)) {353if (malformedInputAction() != CodingErrorAction.REPLACE)354return -1;355da[dp++] = replacement().charAt(0);356sp--; // malformedN(bb, 2) always returns 1357} else {358da[dp++] = (char) (((b1 << 6) ^ b2)^359(((byte) 0xC0 << 6) ^360((byte) 0x80 << 0)));361}362continue;363}364if (malformedInputAction() != CodingErrorAction.REPLACE)365return -1;366da[dp++] = replacement().charAt(0);367return dp;368} else if ((b1 >> 4) == -2) {369// 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx370if (sp + 1 < sl) {371int b2 = sa[sp++];372int b3 = sa[sp++];373if (isMalformed3(b1, b2, b3)) {374if (malformedInputAction() != CodingErrorAction.REPLACE)375return -1;376da[dp++] = replacement().charAt(0);377sp -=3;378bb = getByteBuffer(bb, sa, sp);379sp += malformedN(bb, 3).length();380} else {381da[dp++] = (char)((b1 << 12) ^382(b2 << 6) ^383(b3 ^384(((byte) 0xE0 << 12) ^385((byte) 0x80 << 6) ^386((byte) 0x80 << 0))));387}388continue;389}390if (malformedInputAction() != CodingErrorAction.REPLACE)391return -1;392if (sp < sl && isMalformed3_2(b1, sa[sp])) {393da[dp++] = replacement().charAt(0);394continue;395396}397da[dp++] = replacement().charAt(0);398return dp;399} else {400if (malformedInputAction() != CodingErrorAction.REPLACE)401return -1;402da[dp++] = replacement().charAt(0);403}404}405return dp;406}407}408409private static class Encoder extends CharsetEncoder410implements ArrayEncoder {411412private Encoder(Charset cs) {413super(cs, 1.1f, 3.0f);414}415416public boolean canEncode(char c) {417return !Character.isSurrogate(c);418}419420public boolean isLegalReplacement(byte[] repl) {421return ((repl.length == 1 && repl[0] >= 0) ||422super.isLegalReplacement(repl));423}424425private static CoderResult overflow(CharBuffer src, int sp,426ByteBuffer dst, int dp) {427updatePositions(src, sp, dst, dp);428return CoderResult.OVERFLOW;429}430431private static CoderResult overflow(CharBuffer src, int mark) {432src.position(mark);433return CoderResult.OVERFLOW;434}435436private static void to3Bytes(byte[] da, int dp, char c) {437da[dp] = (byte)(0xe0 | ((c >> 12)));438da[dp + 1] = (byte)(0x80 | ((c >> 6) & 0x3f));439da[dp + 2] = (byte)(0x80 | (c & 0x3f));440}441442private static void to3Bytes(ByteBuffer dst, char c) {443dst.put((byte)(0xe0 | ((c >> 12))));444dst.put((byte)(0x80 | ((c >> 6) & 0x3f)));445dst.put((byte)(0x80 | (c & 0x3f)));446}447448private Surrogate.Parser sgp;449private char[] c2;450private CoderResult encodeArrayLoop(CharBuffer src,451ByteBuffer dst)452{453char[] sa = src.array();454int sp = src.arrayOffset() + src.position();455int sl = src.arrayOffset() + src.limit();456457byte[] da = dst.array();458int dp = dst.arrayOffset() + dst.position();459int dl = dst.arrayOffset() + dst.limit();460int dlASCII = dp + Math.min(sl - sp, dl - dp);461462// ASCII only loop463while (dp < dlASCII && sa[sp] < '\u0080')464da[dp++] = (byte) sa[sp++];465while (sp < sl) {466char c = sa[sp];467if (c < 0x80) {468// Have at most seven bits469if (dp >= dl)470return overflow(src, sp, dst, dp);471da[dp++] = (byte)c;472} else if (c < 0x800) {473// 2 bytes, 11 bits474if (dl - dp < 2)475return overflow(src, sp, dst, dp);476da[dp++] = (byte)(0xc0 | (c >> 6));477da[dp++] = (byte)(0x80 | (c & 0x3f));478} else if (Character.isSurrogate(c)) {479// Have a surrogate pair480if (sgp == null)481sgp = new Surrogate.Parser();482int uc = sgp.parse(c, sa, sp, sl);483if (uc < 0) {484updatePositions(src, sp, dst, dp);485return sgp.error();486}487if (dl - dp < 6)488return overflow(src, sp, dst, dp);489to3Bytes(da, dp, Character.highSurrogate(uc));490dp += 3;491to3Bytes(da, dp, Character.lowSurrogate(uc));492dp += 3;493sp++; // 2 chars494} else {495// 3 bytes, 16 bits496if (dl - dp < 3)497return overflow(src, sp, dst, dp);498to3Bytes(da, dp, c);499dp += 3;500}501sp++;502}503updatePositions(src, sp, dst, dp);504return CoderResult.UNDERFLOW;505}506507private CoderResult encodeBufferLoop(CharBuffer src,508ByteBuffer dst)509{510int mark = src.position();511while (src.hasRemaining()) {512char c = src.get();513if (c < 0x80) {514// Have at most seven bits515if (!dst.hasRemaining())516return overflow(src, mark);517dst.put((byte)c);518} else if (c < 0x800) {519// 2 bytes, 11 bits520if (dst.remaining() < 2)521return overflow(src, mark);522dst.put((byte)(0xc0 | (c >> 6)));523dst.put((byte)(0x80 | (c & 0x3f)));524} else if (Character.isSurrogate(c)) {525// Have a surrogate pair526if (sgp == null)527sgp = new Surrogate.Parser();528int uc = sgp.parse(c, src);529if (uc < 0) {530src.position(mark);531return sgp.error();532}533if (dst.remaining() < 6)534return overflow(src, mark);535to3Bytes(dst, Character.highSurrogate(uc));536to3Bytes(dst, Character.lowSurrogate(uc));537mark++; // 2 chars538} else {539// 3 bytes, 16 bits540if (dst.remaining() < 3)541return overflow(src, mark);542to3Bytes(dst, c);543}544mark++;545}546src.position(mark);547return CoderResult.UNDERFLOW;548}549550protected final CoderResult encodeLoop(CharBuffer src,551ByteBuffer dst)552{553if (src.hasArray() && dst.hasArray())554return encodeArrayLoop(src, dst);555else556return encodeBufferLoop(src, dst);557}558559// returns -1 if there is malformed char(s) and the560// "action" for malformed input is not REPLACE.561public int encode(char[] sa, int sp, int len, byte[] da) {562int sl = sp + len;563int dp = 0;564int dlASCII = dp + Math.min(len, da.length);565566// ASCII only optimized loop567while (dp < dlASCII && sa[sp] < '\u0080')568da[dp++] = (byte) sa[sp++];569570while (sp < sl) {571char c = sa[sp++];572if (c < 0x80) {573// Have at most seven bits574da[dp++] = (byte)c;575} else if (c < 0x800) {576// 2 bytes, 11 bits577da[dp++] = (byte)(0xc0 | (c >> 6));578da[dp++] = (byte)(0x80 | (c & 0x3f));579} else if (Character.isSurrogate(c)) {580if (sgp == null)581sgp = new Surrogate.Parser();582int uc = sgp.parse(c, sa, sp - 1, sl);583if (uc < 0) {584if (malformedInputAction() != CodingErrorAction.REPLACE)585return -1;586da[dp++] = replacement()[0];587} else {588to3Bytes(da, dp, Character.highSurrogate(uc));589dp += 3;590to3Bytes(da, dp, Character.lowSurrogate(uc));591dp += 3;592sp++; // 2 chars593}594} else {595// 3 bytes, 16 bits596to3Bytes(da, dp, c);597dp += 3;598}599}600return dp;601}602}603}604605606