Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/src/share/classes/sun/nio/cs/UTF_8.java
38918 views
/*1* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation. Oracle designates this7* particular file as subject to the "Classpath" exception as provided8* by Oracle in the LICENSE file that accompanied this code.9*10* This code is distributed in the hope that it will be useful, but WITHOUT11* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or12* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License13* version 2 for more details (a copy is included in the LICENSE file that14* accompanied this code).15*16* You should have received a copy of the GNU General Public License version17* 2 along with this work; if not, write to the Free Software Foundation,18* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.19*20* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA21* or visit www.oracle.com if you need additional information or have any22* questions.23*/2425package sun.nio.cs;2627import java.nio.Buffer;28import java.nio.ByteBuffer;29import java.nio.CharBuffer;30import java.nio.charset.Charset;31import java.nio.charset.CharsetDecoder;32import java.nio.charset.CharsetEncoder;33import java.nio.charset.CoderResult;34import java.nio.charset.CodingErrorAction;3536/* Legal UTF-8 Byte Sequences37*38* # Code Points Bits Bit/Byte pattern39* 1 7 0xxxxxxx40* U+0000..U+007F 00..7F41*42* 2 11 110xxxxx 10xxxxxx43* U+0080..U+07FF C2..DF 80..BF44*45* 3 16 1110xxxx 10xxxxxx 10xxxxxx46* U+0800..U+0FFF E0 A0..BF 80..BF47* U+1000..U+FFFF E1..EF 80..BF 80..BF48*49* 4 21 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx50* U+10000..U+3FFFF F0 90..BF 80..BF 80..BF51* U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF52* U+100000..U10FFFF F4 80..8F 80..BF 80..BF53*54*/5556class UTF_8 extends Unicode57{58public UTF_8() {59super("UTF-8", StandardCharsets.aliases_UTF_8);60}6162public String historicalName() {63return "UTF8";64}6566public CharsetDecoder newDecoder() {67return new Decoder(this);68}6970public CharsetEncoder newEncoder() {71return new Encoder(this);72}7374private static final void updatePositions(Buffer src, int sp,75Buffer dst, int dp) {76src.position(sp - src.arrayOffset());77dst.position(dp - dst.arrayOffset());78}7980private static class Decoder extends CharsetDecoder81implements ArrayDecoder {82private Decoder(Charset cs) {83super(cs, 1.0f, 1.0f);84}8586private static boolean isNotContinuation(int b) {87return (b & 0xc0) != 0x80;88}8990// [E0] [A0..BF] [80..BF]91// [E1..EF] [80..BF] [80..BF]92private static boolean isMalformed3(int b1, int b2, int b3) {93return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||94(b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80;95}9697// only used when there is only one byte left in src buffer98private static boolean isMalformed3_2(int b1, int b2) {99return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||100(b2 & 0xc0) != 0x80;101}102103// [F0] [90..BF] [80..BF] [80..BF]104// [F1..F3] [80..BF] [80..BF] [80..BF]105// [F4] [80..8F] [80..BF] [80..BF]106// only check 80-be range here, the [0xf0,0x80...] and [0xf4,0x90-...]107// will be checked by Character.isSupplementaryCodePoint(uc)108private static boolean isMalformed4(int b2, int b3, int b4) {109return (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 ||110(b4 & 0xc0) != 0x80;111}112113// only used when there is less than 4 bytes left in src buffer.114// both b1 and b2 should be "& 0xff" before passed in.115private static boolean isMalformed4_2(int b1, int b2) {116return (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) ||117(b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||118(b2 & 0xc0) != 0x80;119}120121// tests if b1 and b2 are malformed as the first 2 bytes of a122// legal`4-byte utf-8 byte sequence.123// only used when there is less than 4 bytes left in src buffer,124// after isMalformed4_2 has been invoked.125private static boolean isMalformed4_3(int b3) {126return (b3 & 0xc0) != 0x80;127}128129private static CoderResult lookupN(ByteBuffer src, int n)130{131for (int i = 1; i < n; i++) {132if (isNotContinuation(src.get()))133return CoderResult.malformedForLength(i);134}135return CoderResult.malformedForLength(n);136}137138private static CoderResult malformedN(ByteBuffer src, int nb) {139switch (nb) {140case 1:141case 2: // always 1142return CoderResult.malformedForLength(1);143case 3:144int b1 = src.get();145int b2 = src.get(); // no need to lookup b3146return CoderResult.malformedForLength(147((b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||148isNotContinuation(b2)) ? 1 : 2);149case 4: // we don't care the speed here150b1 = src.get() & 0xff;151b2 = src.get() & 0xff;152if (b1 > 0xf4 ||153(b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) ||154(b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||155isNotContinuation(b2))156return CoderResult.malformedForLength(1);157if (isNotContinuation(src.get()))158return CoderResult.malformedForLength(2);159return CoderResult.malformedForLength(3);160default:161assert false;162return null;163}164}165166private static CoderResult malformed(ByteBuffer src, int sp,167CharBuffer dst, int dp,168int nb)169{170src.position(sp - src.arrayOffset());171CoderResult cr = malformedN(src, nb);172updatePositions(src, sp, dst, dp);173return cr;174}175176177private static CoderResult malformed(ByteBuffer src,178int mark, int nb)179{180src.position(mark);181CoderResult cr = malformedN(src, nb);182src.position(mark);183return cr;184}185186private static CoderResult malformedForLength(ByteBuffer src,187int sp,188CharBuffer dst,189int dp,190int malformedNB)191{192updatePositions(src, sp, dst, dp);193return CoderResult.malformedForLength(malformedNB);194}195196private static CoderResult malformedForLength(ByteBuffer src,197int mark,198int malformedNB)199{200src.position(mark);201return CoderResult.malformedForLength(malformedNB);202}203204205private static CoderResult xflow(Buffer src, int sp, int sl,206Buffer dst, int dp, int nb) {207updatePositions(src, sp, dst, dp);208return (nb == 0 || sl - sp < nb)209? CoderResult.UNDERFLOW : CoderResult.OVERFLOW;210}211212private static CoderResult xflow(Buffer src, int mark, int nb) {213src.position(mark);214return (nb == 0 || src.remaining() < nb)215? CoderResult.UNDERFLOW : CoderResult.OVERFLOW;216}217218private CoderResult decodeArrayLoop(ByteBuffer src,219CharBuffer dst)220{221// This method is optimized for ASCII input.222byte[] sa = src.array();223int sp = src.arrayOffset() + src.position();224int sl = src.arrayOffset() + src.limit();225226char[] da = dst.array();227int dp = dst.arrayOffset() + dst.position();228int dl = dst.arrayOffset() + dst.limit();229int dlASCII = dp + Math.min(sl - sp, dl - dp);230231// ASCII only loop232while (dp < dlASCII && sa[sp] >= 0)233da[dp++] = (char) sa[sp++];234while (sp < sl) {235int b1 = sa[sp];236if (b1 >= 0) {237// 1 byte, 7 bits: 0xxxxxxx238if (dp >= dl)239return xflow(src, sp, sl, dst, dp, 1);240da[dp++] = (char) b1;241sp++;242} else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {243// 2 bytes, 11 bits: 110xxxxx 10xxxxxx244// [C2..DF] [80..BF]245if (sl - sp < 2 || dp >= dl)246return xflow(src, sp, sl, dst, dp, 2);247int b2 = sa[sp + 1];248// Now we check the first byte of 2-byte sequence as249// if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0)250// no longer need to check b1 against c1 & c0 for251// malformed as we did in previous version252// (b1 & 0x1e) == 0x0 || (b2 & 0xc0) != 0x80;253// only need to check the second byte b2.254if (isNotContinuation(b2))255return malformedForLength(src, sp, dst, dp, 1);256da[dp++] = (char) (((b1 << 6) ^ b2)257^258(((byte) 0xC0 << 6) ^259((byte) 0x80 << 0)));260sp += 2;261} else if ((b1 >> 4) == -2) {262// 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx263int srcRemaining = sl - sp;264if (srcRemaining < 3 || dp >= dl) {265if (srcRemaining > 1 && isMalformed3_2(b1, sa[sp + 1]))266return malformedForLength(src, sp, dst, dp, 1);267return xflow(src, sp, sl, dst, dp, 3);268}269int b2 = sa[sp + 1];270int b3 = sa[sp + 2];271if (isMalformed3(b1, b2, b3))272return malformed(src, sp, dst, dp, 3);273char c = (char)274((b1 << 12) ^275(b2 << 6) ^276(b3 ^277(((byte) 0xE0 << 12) ^278((byte) 0x80 << 6) ^279((byte) 0x80 << 0))));280if (Character.isSurrogate(c))281return malformedForLength(src, sp, dst, dp, 3);282da[dp++] = c;283sp += 3;284} else if ((b1 >> 3) == -2) {285// 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx286int srcRemaining = sl - sp;287if (srcRemaining < 4 || dl - dp < 2) {288b1 &= 0xff;289if (b1 > 0xf4 ||290srcRemaining > 1 && isMalformed4_2(b1, sa[sp + 1] & 0xff))291return malformedForLength(src, sp, dst, dp, 1);292if (srcRemaining > 2 && isMalformed4_3(sa[sp + 2]))293return malformedForLength(src, sp, dst, dp, 2);294return xflow(src, sp, sl, dst, dp, 4);295}296int b2 = sa[sp + 1];297int b3 = sa[sp + 2];298int b4 = sa[sp + 3];299int uc = ((b1 << 18) ^300(b2 << 12) ^301(b3 << 6) ^302(b4 ^303(((byte) 0xF0 << 18) ^304((byte) 0x80 << 12) ^305((byte) 0x80 << 6) ^306((byte) 0x80 << 0))));307if (isMalformed4(b2, b3, b4) ||308// shortest form check309!Character.isSupplementaryCodePoint(uc)) {310return malformed(src, sp, dst, dp, 4);311}312da[dp++] = Character.highSurrogate(uc);313da[dp++] = Character.lowSurrogate(uc);314sp += 4;315} else316return malformed(src, sp, dst, dp, 1);317}318return xflow(src, sp, sl, dst, dp, 0);319}320321private CoderResult decodeBufferLoop(ByteBuffer src,322CharBuffer dst)323{324int mark = src.position();325int limit = src.limit();326while (mark < limit) {327int b1 = src.get();328if (b1 >= 0) {329// 1 byte, 7 bits: 0xxxxxxx330if (dst.remaining() < 1)331return xflow(src, mark, 1); // overflow332dst.put((char) b1);333mark++;334} else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {335// 2 bytes, 11 bits: 110xxxxx 10xxxxxx336if (limit - mark < 2|| dst.remaining() < 1)337return xflow(src, mark, 2);338int b2 = src.get();339if (isNotContinuation(b2))340return malformedForLength(src, mark, 1);341dst.put((char) (((b1 << 6) ^ b2)342^343(((byte) 0xC0 << 6) ^344((byte) 0x80 << 0))));345mark += 2;346} else if ((b1 >> 4) == -2) {347// 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx348int srcRemaining = limit - mark;349if (srcRemaining < 3 || dst.remaining() < 1) {350if (srcRemaining > 1 && isMalformed3_2(b1, src.get()))351return malformedForLength(src, mark, 1);352return xflow(src, mark, 3);353}354int b2 = src.get();355int b3 = src.get();356if (isMalformed3(b1, b2, b3))357return malformed(src, mark, 3);358char c = (char)359((b1 << 12) ^360(b2 << 6) ^361(b3 ^362(((byte) 0xE0 << 12) ^363((byte) 0x80 << 6) ^364((byte) 0x80 << 0))));365if (Character.isSurrogate(c))366return malformedForLength(src, mark, 3);367dst.put(c);368mark += 3;369} else if ((b1 >> 3) == -2) {370// 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx371int srcRemaining = limit - mark;372if (srcRemaining < 4 || dst.remaining() < 2) {373b1 &= 0xff;374if (b1 > 0xf4 ||375srcRemaining > 1 && isMalformed4_2(b1, src.get() & 0xff))376return malformedForLength(src, mark, 1);377if (srcRemaining > 2 && isMalformed4_3(src.get()))378return malformedForLength(src, mark, 2);379return xflow(src, mark, 4);380}381int b2 = src.get();382int b3 = src.get();383int b4 = src.get();384int uc = ((b1 << 18) ^385(b2 << 12) ^386(b3 << 6) ^387(b4 ^388(((byte) 0xF0 << 18) ^389((byte) 0x80 << 12) ^390((byte) 0x80 << 6) ^391((byte) 0x80 << 0))));392if (isMalformed4(b2, b3, b4) ||393// shortest form check394!Character.isSupplementaryCodePoint(uc)) {395return malformed(src, mark, 4);396}397dst.put(Character.highSurrogate(uc));398dst.put(Character.lowSurrogate(uc));399mark += 4;400} else {401return malformed(src, mark, 1);402}403}404return xflow(src, mark, 0);405}406407protected CoderResult decodeLoop(ByteBuffer src,408CharBuffer dst)409{410if (src.hasArray() && dst.hasArray())411return decodeArrayLoop(src, dst);412else413return decodeBufferLoop(src, dst);414}415416private static ByteBuffer getByteBuffer(ByteBuffer bb, byte[] ba, int sp)417{418if (bb == null)419bb = ByteBuffer.wrap(ba);420bb.position(sp);421return bb;422}423424// returns -1 if there is/are malformed byte(s) and the425// "action" for malformed input is not REPLACE.426public int decode(byte[] sa, int sp, int len, char[] da) {427final int sl = sp + len;428int dp = 0;429int dlASCII = Math.min(len, da.length);430ByteBuffer bb = null; // only necessary if malformed431432// ASCII only optimized loop433while (dp < dlASCII && sa[sp] >= 0)434da[dp++] = (char) sa[sp++];435436while (sp < sl) {437int b1 = sa[sp++];438if (b1 >= 0) {439// 1 byte, 7 bits: 0xxxxxxx440da[dp++] = (char) b1;441} else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {442// 2 bytes, 11 bits: 110xxxxx 10xxxxxx443if (sp < sl) {444int b2 = sa[sp++];445if (isNotContinuation(b2)) {446if (malformedInputAction() != CodingErrorAction.REPLACE)447return -1;448da[dp++] = replacement().charAt(0);449sp--; // malformedN(bb, 2) always returns 1450} else {451da[dp++] = (char) (((b1 << 6) ^ b2)^452(((byte) 0xC0 << 6) ^453((byte) 0x80 << 0)));454}455continue;456}457if (malformedInputAction() != CodingErrorAction.REPLACE)458return -1;459da[dp++] = replacement().charAt(0);460return dp;461} else if ((b1 >> 4) == -2) {462// 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx463if (sp + 1 < sl) {464int b2 = sa[sp++];465int b3 = sa[sp++];466if (isMalformed3(b1, b2, b3)) {467if (malformedInputAction() != CodingErrorAction.REPLACE)468return -1;469da[dp++] = replacement().charAt(0);470sp -= 3;471bb = getByteBuffer(bb, sa, sp);472sp += malformedN(bb, 3).length();473} else {474char c = (char)((b1 << 12) ^475(b2 << 6) ^476(b3 ^477(((byte) 0xE0 << 12) ^478((byte) 0x80 << 6) ^479((byte) 0x80 << 0))));480if (Character.isSurrogate(c)) {481if (malformedInputAction() != CodingErrorAction.REPLACE)482return -1;483da[dp++] = replacement().charAt(0);484} else {485da[dp++] = c;486}487}488continue;489}490if (malformedInputAction() != CodingErrorAction.REPLACE)491return -1;492if (sp < sl && isMalformed3_2(b1, sa[sp])) {493da[dp++] = replacement().charAt(0);494continue;495496}497da[dp++] = replacement().charAt(0);498return dp;499} else if ((b1 >> 3) == -2) {500// 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx501if (sp + 2 < sl) {502int b2 = sa[sp++];503int b3 = sa[sp++];504int b4 = sa[sp++];505int uc = ((b1 << 18) ^506(b2 << 12) ^507(b3 << 6) ^508(b4 ^509(((byte) 0xF0 << 18) ^510((byte) 0x80 << 12) ^511((byte) 0x80 << 6) ^512((byte) 0x80 << 0))));513if (isMalformed4(b2, b3, b4) ||514// shortest form check515!Character.isSupplementaryCodePoint(uc)) {516if (malformedInputAction() != CodingErrorAction.REPLACE)517return -1;518da[dp++] = replacement().charAt(0);519sp -= 4;520bb = getByteBuffer(bb, sa, sp);521sp += malformedN(bb, 4).length();522} else {523da[dp++] = Character.highSurrogate(uc);524da[dp++] = Character.lowSurrogate(uc);525}526continue;527}528if (malformedInputAction() != CodingErrorAction.REPLACE)529return -1;530b1 &= 0xff;531if (b1 > 0xf4 ||532sp < sl && isMalformed4_2(b1, sa[sp] & 0xff)) {533da[dp++] = replacement().charAt(0);534continue;535}536sp++;537if (sp < sl && isMalformed4_3(sa[sp])) {538da[dp++] = replacement().charAt(0);539continue;540}541da[dp++] = replacement().charAt(0);542return dp;543} else {544if (malformedInputAction() != CodingErrorAction.REPLACE)545return -1;546da[dp++] = replacement().charAt(0);547}548}549return dp;550}551}552553private static final class Encoder extends CharsetEncoder554implements ArrayEncoder {555556private Encoder(Charset cs) {557super(cs, 1.1f, 3.0f);558}559560public boolean canEncode(char c) {561return !Character.isSurrogate(c);562}563564public boolean isLegalReplacement(byte[] repl) {565return ((repl.length == 1 && repl[0] >= 0) ||566super.isLegalReplacement(repl));567}568569private static CoderResult overflow(CharBuffer src, int sp,570ByteBuffer dst, int dp) {571updatePositions(src, sp, dst, dp);572return CoderResult.OVERFLOW;573}574575private static CoderResult overflow(CharBuffer src, int mark) {576src.position(mark);577return CoderResult.OVERFLOW;578}579580private Surrogate.Parser sgp;581private CoderResult encodeArrayLoop(CharBuffer src,582ByteBuffer dst)583{584char[] sa = src.array();585int sp = src.arrayOffset() + src.position();586int sl = src.arrayOffset() + src.limit();587588byte[] da = dst.array();589int dp = dst.arrayOffset() + dst.position();590int dl = dst.arrayOffset() + dst.limit();591int dlASCII = dp + Math.min(sl - sp, dl - dp);592593// ASCII only loop594while (dp < dlASCII && sa[sp] < '\u0080')595da[dp++] = (byte) sa[sp++];596while (sp < sl) {597char c = sa[sp];598if (c < 0x80) {599// Have at most seven bits600if (dp >= dl)601return overflow(src, sp, dst, dp);602da[dp++] = (byte)c;603} else if (c < 0x800) {604// 2 bytes, 11 bits605if (dl - dp < 2)606return overflow(src, sp, dst, dp);607da[dp++] = (byte)(0xc0 | (c >> 6));608da[dp++] = (byte)(0x80 | (c & 0x3f));609} else if (Character.isSurrogate(c)) {610// Have a surrogate pair611if (sgp == null)612sgp = new Surrogate.Parser();613int uc = sgp.parse(c, sa, sp, sl);614if (uc < 0) {615updatePositions(src, sp, dst, dp);616return sgp.error();617}618if (dl - dp < 4)619return overflow(src, sp, dst, dp);620da[dp++] = (byte)(0xf0 | ((uc >> 18)));621da[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));622da[dp++] = (byte)(0x80 | ((uc >> 6) & 0x3f));623da[dp++] = (byte)(0x80 | (uc & 0x3f));624sp++; // 2 chars625} else {626// 3 bytes, 16 bits627if (dl - dp < 3)628return overflow(src, sp, dst, dp);629da[dp++] = (byte)(0xe0 | ((c >> 12)));630da[dp++] = (byte)(0x80 | ((c >> 6) & 0x3f));631da[dp++] = (byte)(0x80 | (c & 0x3f));632}633sp++;634}635updatePositions(src, sp, dst, dp);636return CoderResult.UNDERFLOW;637}638639private CoderResult encodeBufferLoop(CharBuffer src,640ByteBuffer dst)641{642int mark = src.position();643while (src.hasRemaining()) {644char c = src.get();645if (c < 0x80) {646// Have at most seven bits647if (!dst.hasRemaining())648return overflow(src, mark);649dst.put((byte)c);650} else if (c < 0x800) {651// 2 bytes, 11 bits652if (dst.remaining() < 2)653return overflow(src, mark);654dst.put((byte)(0xc0 | (c >> 6)));655dst.put((byte)(0x80 | (c & 0x3f)));656} else if (Character.isSurrogate(c)) {657// Have a surrogate pair658if (sgp == null)659sgp = new Surrogate.Parser();660int uc = sgp.parse(c, src);661if (uc < 0) {662src.position(mark);663return sgp.error();664}665if (dst.remaining() < 4)666return overflow(src, mark);667dst.put((byte)(0xf0 | ((uc >> 18))));668dst.put((byte)(0x80 | ((uc >> 12) & 0x3f)));669dst.put((byte)(0x80 | ((uc >> 6) & 0x3f)));670dst.put((byte)(0x80 | (uc & 0x3f)));671mark++; // 2 chars672} else {673// 3 bytes, 16 bits674if (dst.remaining() < 3)675return overflow(src, mark);676dst.put((byte)(0xe0 | ((c >> 12))));677dst.put((byte)(0x80 | ((c >> 6) & 0x3f)));678dst.put((byte)(0x80 | (c & 0x3f)));679}680mark++;681}682src.position(mark);683return CoderResult.UNDERFLOW;684}685686protected final CoderResult encodeLoop(CharBuffer src,687ByteBuffer dst)688{689if (src.hasArray() && dst.hasArray())690return encodeArrayLoop(src, dst);691else692return encodeBufferLoop(src, dst);693}694695private byte repl = (byte)'?';696protected void implReplaceWith(byte[] newReplacement) {697repl = newReplacement[0];698}699700// returns -1 if there is malformed char(s) and the701// "action" for malformed input is not REPLACE.702public int encode(char[] sa, int sp, int len, byte[] da) {703int sl = sp + len;704int dp = 0;705int dlASCII = dp + Math.min(len, da.length);706707// ASCII only optimized loop708while (dp < dlASCII && sa[sp] < '\u0080')709da[dp++] = (byte) sa[sp++];710711while (sp < sl) {712char c = sa[sp++];713if (c < 0x80) {714// Have at most seven bits715da[dp++] = (byte)c;716} else if (c < 0x800) {717// 2 bytes, 11 bits718da[dp++] = (byte)(0xc0 | (c >> 6));719da[dp++] = (byte)(0x80 | (c & 0x3f));720} else if (Character.isSurrogate(c)) {721if (sgp == null)722sgp = new Surrogate.Parser();723int uc = sgp.parse(c, sa, sp - 1, sl);724if (uc < 0) {725if (malformedInputAction() != CodingErrorAction.REPLACE)726return -1;727da[dp++] = repl;728} else {729da[dp++] = (byte)(0xf0 | ((uc >> 18)));730da[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));731da[dp++] = (byte)(0x80 | ((uc >> 6) & 0x3f));732da[dp++] = (byte)(0x80 | (uc & 0x3f));733sp++; // 2 chars734}735} else {736// 3 bytes, 16 bits737da[dp++] = (byte)(0xe0 | ((c >> 12)));738da[dp++] = (byte)(0x80 | ((c >> 6) & 0x3f));739da[dp++] = (byte)(0x80 | (c & 0x3f));740}741}742return dp;743}744}745}746747748