CoCalc -- DoubleByte.java

GitHub Repository: PojavLauncherTeam/jdk17u
Path: blob/master/src/java.base/share/classes/sun/nio/cs/DoubleByte.java
⁶⁷⁸⁶² views
1
/*
2
 * Copyright (c) 2009, 2021, Oracle and/or its affiliates. All rights reserved.
3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
 *
5
 * This code is free software; you can redistribute it and/or modify it
6
 * under the terms of the GNU General Public License version 2 only, as
7
 * published by the Free Software Foundation.  Oracle designates this
8
 * particular file as subject to the "Classpath" exception as provided
9
 * by Oracle in the LICENSE file that accompanied this code.
10
 *
11
 * This code is distributed in the hope that it will be useful, but WITHOUT
12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14
 * version 2 for more details (a copy is included in the LICENSE file that
15
 * accompanied this code).
16
 *
17
 * You should have received a copy of the GNU General Public License version
18
 * 2 along with this work; if not, write to the Free Software Foundation,
19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20
 *
21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22
 * or visit www.oracle.com if you need additional information or have any
23
 * questions.
24
 */
25

26
package sun.nio.cs;
27

28
import java.nio.ByteBuffer;
29
import java.nio.CharBuffer;
30
import java.nio.charset.Charset;
31
import java.nio.charset.CharsetDecoder;
32
import java.nio.charset.CharsetEncoder;
33
import java.nio.charset.CoderResult;
34
import java.util.Arrays;
35

36
import jdk.internal.access.JavaLangAccess;
37
import jdk.internal.access.SharedSecrets;
38
import sun.nio.cs.Surrogate;
39
import sun.nio.cs.ArrayDecoder;
40
import sun.nio.cs.ArrayEncoder;
41
import static sun.nio.cs.CharsetMapping.*;
42

43
/*
44
 * Four types of "DoubleByte" charsets are implemented in this class
45
 * (1)DoubleByte
46
 *    The "mostly widely used" multibyte charset, a combination of
47
 *    a singlebyte character set (usually the ASCII charset) and a
48
 *    doublebyte character set. The codepoint values of singlebyte
49
 *    and doublebyte don't overlap. Microsoft's multibyte charsets
50
 *    and IBM's "DBCS_ASCII" charsets, such as IBM1381, 942, 943,
51
 *    948, 949 and 950 are such charsets.
52
 *
53
 * (2)DoubleByte_EBCDIC
54
 *    IBM EBCDIC Mix multibyte charset. Use SO and SI to shift (switch)
55
 *    in and out between the singlebyte character set and doublebyte
56
 *    character set.
57
 *
58
 * (3)DoubleByte_SIMPLE_EUC
59
 *    It's a "simple" form of EUC encoding scheme, only have the
60
 *    singlebyte character set G0 and one doublebyte character set
61
 *    G1 are defined, G2 (with SS2) and G3 (with SS3) are not used.
62
 *    So it is actually the same as the "typical" type (1) mentioned
63
 *    above, except it return "malformed" for the SS2 and SS3 when
64
 *    decoding.
65
 *
66
 * (4)DoubleByte ONLY
67
 *    A "pure" doublebyte only character set. From implementation
68
 *    point of view, this is the type (1) with "decodeSingle" always
69
 *    returns unmappable.
70
 *
71
 * For simplicity, all implementations share the same decoding and
72
 * encoding data structure.
73
 *
74
 * Decoding:
75
 *
76
 *    char[][] b2c;
77
 *    char[] b2cSB;
78
 *    int b2Min, b2Max
79
 *
80
 *    public char decodeSingle(int b) {
81
 *        return b2cSB.[b];
82
 *    }
83
 *
84
 *    public char decodeDouble(int b1, int b2) {
85
 *        if (b2 < b2Min || b2 > b2Max)
86
 *            return UNMAPPABLE_DECODING;
87
 *         return b2c[b1][b2 - b2Min];
88
 *    }
89
 *
90
 *    (1)b2Min, b2Max are the corresponding min and max value of the
91
 *       low-half of the double-byte.
92
 *    (2)The high 8-bit/b1 of the double-byte are used to indexed into
93
 *       b2c array.
94
 *
95
 * Encoding:
96
 *
97
 *    char[] c2b;
98
 *    char[] c2bIndex;
99
 *
100
 *    public int encodeChar(char ch) {
101
 *        return c2b[c2bIndex[ch >> 8] + (ch & 0xff)];
102
 *    }
103
 *
104
 */
105

106
public class DoubleByte {
107

108
    public static final char[] B2C_UNMAPPABLE;
109
    static {
110
        B2C_UNMAPPABLE = new char[0x100];
111
        Arrays.fill(B2C_UNMAPPABLE, UNMAPPABLE_DECODING);
112
    }
113

114
    private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess();
115

116
    public static class Decoder extends CharsetDecoder
117
                                implements DelegatableDecoder, ArrayDecoder
118
    {
119
        final char[][] b2c;
120
        final char[] b2cSB;
121
        final int b2Min;
122
        final int b2Max;
123
        final boolean isASCIICompatible;
124

125
        // for SimpleEUC override
126
        protected CoderResult crMalformedOrUnderFlow(int b) {
127
            return CoderResult.UNDERFLOW;
128
        }
129

130
        protected CoderResult crMalformedOrUnmappable(int b1, int b2) {
131
            if (b2c[b1] == B2C_UNMAPPABLE ||                // isNotLeadingByte(b1)
132
                b2c[b2] != B2C_UNMAPPABLE ||                // isLeadingByte(b2)
133
                decodeSingle(b2) != UNMAPPABLE_DECODING) {  // isSingle(b2)
134
                return CoderResult.malformedForLength(1);
135
            }
136
            return CoderResult.unmappableForLength(2);
137
        }
138

139
        public Decoder(Charset cs, float avgcpb, float maxcpb,
140
                       char[][] b2c, char[] b2cSB,
141
                       int b2Min, int b2Max,
142
                       boolean isASCIICompatible) {
143
            super(cs, avgcpb, maxcpb);
144
            this.b2c = b2c;
145
            this.b2cSB = b2cSB;
146
            this.b2Min = b2Min;
147
            this.b2Max = b2Max;
148
            this.isASCIICompatible = isASCIICompatible;
149
        }
150

151
        public Decoder(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max,
152
                       boolean isASCIICompatible) {
153
            this(cs, 0.5f, 1.0f, b2c, b2cSB, b2Min, b2Max, isASCIICompatible);
154
        }
155

156
        public Decoder(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max) {
157
            this(cs, 0.5f, 1.0f, b2c, b2cSB, b2Min, b2Max, false);
158
        }
159

160
        protected CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) {
161
            byte[] sa = src.array();
162
            int soff = src.arrayOffset();
163
            int sp = soff + src.position();
164
            int sl = soff + src.limit();
165

166
            char[] da = dst.array();
167
            int doff = dst.arrayOffset();
168
            int dp = doff + dst.position();
169
            int dl = doff + dst.limit();
170

171
            try {
172
                if (isASCIICompatible) {
173
                    int n = JLA.decodeASCII(sa, sp, da, dp, Math.min(dl - dp, sl - sp));
174
                    dp += n;
175
                    sp += n;
176
                }
177
                while (sp < sl && dp < dl) {
178
                    // inline the decodeSingle/Double() for better performance
179
                    int inSize = 1;
180
                    int b1 = sa[sp] & 0xff;
181
                    char c = b2cSB[b1];
182
                    if (c == UNMAPPABLE_DECODING) {
183
                        if (sl - sp < 2)
184
                            return crMalformedOrUnderFlow(b1);
185
                        int b2 = sa[sp + 1] & 0xff;
186
                        if (b2 < b2Min || b2 > b2Max ||
187
                            (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {
188
                            return crMalformedOrUnmappable(b1, b2);
189
                        }
190
                        inSize++;
191
                    }
192
                    da[dp++] = c;
193
                    sp += inSize;
194
                }
195
                return (sp >= sl) ? CoderResult.UNDERFLOW
196
                                  : CoderResult.OVERFLOW;
197
            } finally {
198
                src.position(sp - soff);
199
                dst.position(dp - doff);
200
            }
201
        }
202

203
        protected CoderResult decodeBufferLoop(ByteBuffer src, CharBuffer dst) {
204
            int mark = src.position();
205
            try {
206

207
                while (src.hasRemaining() && dst.hasRemaining()) {
208
                    int b1 = src.get() & 0xff;
209
                    char c = b2cSB[b1];
210
                    int inSize = 1;
211
                    if (c == UNMAPPABLE_DECODING) {
212
                        if (src.remaining() < 1)
213
                            return crMalformedOrUnderFlow(b1);
214
                        int b2 = src.get() & 0xff;
215
                        if (b2 < b2Min || b2 > b2Max ||
216
                            (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING)
217
                            return crMalformedOrUnmappable(b1, b2);
218
                        inSize++;
219
                    }
220
                    dst.put(c);
221
                    mark += inSize;
222
                }
223
                return src.hasRemaining()? CoderResult.OVERFLOW
224
                                         : CoderResult.UNDERFLOW;
225
            } finally {
226
                src.position(mark);
227
            }
228
        }
229

230
        // Make some protected methods public for use by JISAutoDetect
231
        public CoderResult decodeLoop(ByteBuffer src, CharBuffer dst) {
232
            if (src.hasArray() && dst.hasArray())
233
                return decodeArrayLoop(src, dst);
234
            else
235
                return decodeBufferLoop(src, dst);
236
        }
237

238
        @Override
239
        public int decode(byte[] src, int sp, int len, char[] dst) {
240
            int dp = 0;
241
            int sl = sp + len;
242
            char repl = replacement().charAt(0);
243
            while (sp < sl) {
244
                int b1 = src[sp++] & 0xff;
245
                char c = b2cSB[b1];
246
                if (c == UNMAPPABLE_DECODING) {
247
                    if (sp < sl) {
248
                        int b2 = src[sp++] & 0xff;
249
                        if (b2 < b2Min || b2 > b2Max ||
250
                            (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {
251
                            if (crMalformedOrUnmappable(b1, b2).length() == 1) {
252
                                sp--;
253
                            }
254
                        }
255
                    }
256
                    if (c == UNMAPPABLE_DECODING) {
257
                         c = repl;
258
                    }
259
                }
260
                dst[dp++] = c;
261
            }
262
            return dp;
263
        }
264

265
        @Override
266
        public boolean isASCIICompatible() {
267
            return isASCIICompatible;
268
        }
269

270
        public void implReset() {
271
            super.implReset();
272
        }
273

274
        public CoderResult implFlush(CharBuffer out) {
275
            return super.implFlush(out);
276
        }
277

278
        // decode loops are not using decodeSingle/Double() for performance
279
        // reason.
280
        public char decodeSingle(int b) {
281
            return b2cSB[b];
282
        }
283

284
        public char decodeDouble(int b1, int b2) {
285
            if (b1 < 0 || b1 > b2c.length ||
286
                b2 < b2Min || b2 > b2Max)
287
                return UNMAPPABLE_DECODING;
288
            return  b2c[b1][b2 - b2Min];
289
        }
290
    }
291

292
    // IBM_EBCDIC_DBCS
293
    public static class Decoder_EBCDIC extends Decoder {
294
        private static final int SBCS = 0;
295
        private static final int DBCS = 1;
296
        private static final int SO = 0x0e;
297
        private static final int SI = 0x0f;
298
        private int  currentState;
299

300
        public Decoder_EBCDIC(Charset cs,
301
                              char[][] b2c, char[] b2cSB, int b2Min, int b2Max,
302
                              boolean isASCIICompatible) {
303
            super(cs, b2c, b2cSB, b2Min, b2Max, isASCIICompatible);
304
        }
305

306
        public Decoder_EBCDIC(Charset cs,
307
                              char[][] b2c, char[] b2cSB, int b2Min, int b2Max) {
308
            super(cs, b2c, b2cSB, b2Min, b2Max, false);
309
        }
310

311
        public void implReset() {
312
            currentState = SBCS;
313
        }
314

315
        // Check validity of dbcs ebcdic byte pair values
316
        //
317
        // First byte : 0x41 -- 0xFE
318
        // Second byte: 0x41 -- 0xFE
319
        // Doublebyte blank: 0x4040
320
        //
321
        // The validation implementation in "old" DBCS_IBM_EBCDIC and sun.io
322
        // as
323
        //            if ((b1 != 0x40 || b2 != 0x40) &&
324
        //                (b2 < 0x41 || b2 > 0xfe)) {...}
325
        // is not correct/complete (range check for b1)
326
        //
327
        private static boolean isDoubleByte(int b1, int b2) {
328
            return (0x41 <= b1 && b1 <= 0xfe && 0x41 <= b2 && b2 <= 0xfe)
329
                   || (b1 == 0x40 && b2 == 0x40); // DBCS-HOST SPACE
330
        }
331

332
        protected CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) {
333
            byte[] sa = src.array();
334
            int sp = src.arrayOffset() + src.position();
335
            int sl = src.arrayOffset() + src.limit();
336
            char[] da = dst.array();
337
            int dp = dst.arrayOffset() + dst.position();
338
            int dl = dst.arrayOffset() + dst.limit();
339

340
            try {
341
                // don't check dp/dl together here, it's possible to
342
                // decdoe a SO/SI without space in output buffer.
343
                while (sp < sl) {
344
                    int b1 = sa[sp] & 0xff;
345
                    int inSize = 1;
346
                    if (b1 == SO) {  // Shift out
347
                        if (currentState != SBCS)
348
                            return CoderResult.malformedForLength(1);
349
                        else
350
                            currentState = DBCS;
351
                    } else if (b1 == SI) {
352
                        if (currentState != DBCS)
353
                            return CoderResult.malformedForLength(1);
354
                        else
355
                            currentState = SBCS;
356
                    } else {
357
                        char c;
358
                        if (currentState == SBCS) {
359
                            c = b2cSB[b1];
360
                            if (c == UNMAPPABLE_DECODING)
361
                                return CoderResult.unmappableForLength(1);
362
                        } else {
363
                            if (sl - sp < 2)
364
                                return CoderResult.UNDERFLOW;
365
                            int b2 = sa[sp + 1] & 0xff;
366
                            if (b2 < b2Min || b2 > b2Max ||
367
                                (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {
368
                                if (!isDoubleByte(b1, b2))
369
                                    return CoderResult.malformedForLength(2);
370
                                return CoderResult.unmappableForLength(2);
371
                            }
372
                            inSize++;
373
                        }
374
                        if (dl - dp < 1)
375
                            return CoderResult.OVERFLOW;
376

377
                        da[dp++] = c;
378
                    }
379
                    sp += inSize;
380
                }
381
                return CoderResult.UNDERFLOW;
382
            } finally {
383
                src.position(sp - src.arrayOffset());
384
                dst.position(dp - dst.arrayOffset());
385
            }
386
        }
387

388
        protected CoderResult decodeBufferLoop(ByteBuffer src, CharBuffer dst) {
389
            int mark = src.position();
390
            try {
391
                while (src.hasRemaining()) {
392
                    int b1 = src.get() & 0xff;
393
                    int inSize = 1;
394
                    if (b1 == SO) {  // Shift out
395
                        if (currentState != SBCS)
396
                            return CoderResult.malformedForLength(1);
397
                        else
398
                            currentState = DBCS;
399
                    } else if (b1 == SI) {
400
                        if (currentState != DBCS)
401
                            return CoderResult.malformedForLength(1);
402
                        else
403
                            currentState = SBCS;
404
                    } else {
405
                        char c = UNMAPPABLE_DECODING;
406
                        if (currentState == SBCS) {
407
                            c = b2cSB[b1];
408
                            if (c == UNMAPPABLE_DECODING)
409
                                return CoderResult.unmappableForLength(1);
410
                        } else {
411
                            if (src.remaining() < 1)
412
                                return CoderResult.UNDERFLOW;
413
                            int b2 = src.get()&0xff;
414
                            if (b2 < b2Min || b2 > b2Max ||
415
                                (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {
416
                                if (!isDoubleByte(b1, b2))
417
                                    return CoderResult.malformedForLength(2);
418
                                return CoderResult.unmappableForLength(2);
419
                            }
420
                            inSize++;
421
                        }
422

423
                        if (dst.remaining() < 1)
424
                            return CoderResult.OVERFLOW;
425

426
                        dst.put(c);
427
                    }
428
                    mark += inSize;
429
                }
430
                return CoderResult.UNDERFLOW;
431
            } finally {
432
                src.position(mark);
433
            }
434
        }
435

436
        @Override
437
        public int decode(byte[] src, int sp, int len, char[] dst) {
438
            int dp = 0;
439
            int sl = sp + len;
440
            currentState = SBCS;
441
            char repl = replacement().charAt(0);
442
            while (sp < sl) {
443
                int b1 = src[sp++] & 0xff;
444
                if (b1 == SO) {  // Shift out
445
                    if (currentState != SBCS)
446
                        dst[dp++] = repl;
447
                    else
448
                        currentState = DBCS;
449
                } else if (b1 == SI) {
450
                    if (currentState != DBCS)
451
                        dst[dp++] = repl;
452
                    else
453
                        currentState = SBCS;
454
                } else {
455
                    char c =  UNMAPPABLE_DECODING;
456
                    if (currentState == SBCS) {
457
                        c = b2cSB[b1];
458
                        if (c == UNMAPPABLE_DECODING)
459
                            c = repl;
460
                    } else {
461
                        if (sl == sp) {
462
                            c = repl;
463
                        } else {
464
                            int b2 = src[sp++] & 0xff;
465
                            if (b2 < b2Min || b2 > b2Max ||
466
                                (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {
467
                                c = repl;
468
                            }
469
                        }
470
                    }
471
                    dst[dp++] = c;
472
                }
473
            }
474
            return dp;
475
        }
476
    }
477

478
    // DBCS_ONLY
479
    public static class Decoder_DBCSONLY extends Decoder {
480
        static final char[] b2cSB_UNMAPPABLE;
481
        static {
482
            b2cSB_UNMAPPABLE = new char[0x100];
483
            Arrays.fill(b2cSB_UNMAPPABLE, UNMAPPABLE_DECODING);
484
        }
485

486
        // always returns unmappableForLenth(2) for doublebyte_only
487
        @Override
488
        protected CoderResult crMalformedOrUnmappable(int b1, int b2) {
489
            return CoderResult.unmappableForLength(2);
490
        }
491

492
        public Decoder_DBCSONLY(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max,
493
                                boolean isASCIICompatible) {
494
            super(cs, 0.5f, 1.0f, b2c, b2cSB_UNMAPPABLE, b2Min, b2Max, isASCIICompatible);
495
        }
496

497
        public Decoder_DBCSONLY(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max) {
498
            super(cs, 0.5f, 1.0f, b2c, b2cSB_UNMAPPABLE, b2Min, b2Max, false);
499
        }
500
    }
501

502
    // EUC_SIMPLE
503
    // The only thing we need to "override" is to check SS2/SS3 and
504
    // return "malformed" if found
505
    public static class Decoder_EUC_SIM extends Decoder {
506
        private final int SS2 =  0x8E;
507
        private final int SS3 =  0x8F;
508

509
        public Decoder_EUC_SIM(Charset cs,
510
                               char[][] b2c, char[] b2cSB, int b2Min, int b2Max,
511
                               boolean isASCIICompatible) {
512
            super(cs, b2c, b2cSB, b2Min, b2Max, isASCIICompatible);
513
        }
514

515
        // No support provided for G2/G3 for SimpleEUC
516
        protected CoderResult crMalformedOrUnderFlow(int b) {
517
            if (b == SS2 || b == SS3 )
518
                return CoderResult.malformedForLength(1);
519
            return CoderResult.UNDERFLOW;
520
        }
521

522
        protected CoderResult crMalformedOrUnmappable(int b1, int b2) {
523
            if (b1 == SS2 || b1 == SS3 )
524
                return CoderResult.malformedForLength(1);
525
            return CoderResult.unmappableForLength(2);
526
        }
527

528
        @Override
529
        public int decode(byte[] src, int sp, int len, char[] dst) {
530
            int dp = 0;
531
            int sl = sp + len;
532
            char repl = replacement().charAt(0);
533
            while (sp < sl) {
534
                int b1 = src[sp++] & 0xff;
535
                char c = b2cSB[b1];
536
                if (c == UNMAPPABLE_DECODING) {
537
                    if (sp < sl) {
538
                        int b2 = src[sp++] & 0xff;
539
                        if (b2 < b2Min || b2 > b2Max ||
540
                            (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {
541
                            if (b1 == SS2 || b1 == SS3) {
542
                                sp--;
543
                            }
544
                            c = repl;
545
                        }
546
                    } else {
547
                        c = repl;
548
                    }
549
                }
550
                dst[dp++] = c;
551
            }
552
            return dp;
553
        }
554
    }
555

556
    public static class Encoder extends CharsetEncoder
557
                                implements ArrayEncoder
558
    {
559
        protected final int MAX_SINGLEBYTE = 0xff;
560
        private final char[] c2b;
561
        private final char[] c2bIndex;
562
        protected Surrogate.Parser sgp;
563
        final boolean isASCIICompatible;
564

565
        public Encoder(Charset cs, char[] c2b, char[] c2bIndex) {
566
            this(cs, c2b, c2bIndex, false);
567
        }
568

569
        public Encoder(Charset cs, char[] c2b, char[] c2bIndex, boolean isASCIICompatible) {
570
            super(cs, 2.0f, 2.0f);
571
            this.c2b = c2b;
572
            this.c2bIndex = c2bIndex;
573
            this.isASCIICompatible = isASCIICompatible;
574
        }
575

576
        public Encoder(Charset cs, float avg, float max, byte[] repl, char[] c2b, char[] c2bIndex,
577
                       boolean isASCIICompatible) {
578
            super(cs, avg, max, repl);
579
            this.c2b = c2b;
580
            this.c2bIndex = c2bIndex;
581
            this.isASCIICompatible = isASCIICompatible;
582
        }
583

584
        public boolean canEncode(char c) {
585
            return encodeChar(c) != UNMAPPABLE_ENCODING;
586
        }
587

588
        protected Surrogate.Parser sgp() {
589
            if (sgp == null)
590
                sgp = new Surrogate.Parser();
591
            return sgp;
592
        }
593

594
        protected CoderResult encodeArrayLoop(CharBuffer src, ByteBuffer dst) {
595
            char[] sa = src.array();
596
            int sp = src.arrayOffset() + src.position();
597
            int sl = src.arrayOffset() + src.limit();
598

599
            byte[] da = dst.array();
600
            int dp = dst.arrayOffset() + dst.position();
601
            int dl = dst.arrayOffset() + dst.limit();
602

603
            try {
604
                if (isASCIICompatible) {
605
                    int n = JLA.encodeASCII(sa, sp, da, dp, Math.min(dl - dp, sl - sp));
606
                    sp += n;
607
                    dp += n;
608
                }
609
                while (sp < sl) {
610
                    char c = sa[sp];
611
                    int bb = encodeChar(c);
612
                    if (bb == UNMAPPABLE_ENCODING) {
613
                        if (Character.isSurrogate(c)) {
614
                            if (sgp().parse(c, sa, sp, sl) < 0)
615
                                return sgp.error();
616
                            return sgp.unmappableResult();
617
                        }
618
                        return CoderResult.unmappableForLength(1);
619
                    }
620

621
                    if (bb > MAX_SINGLEBYTE) {    // DoubleByte
622
                        if (dl - dp < 2)
623
                            return CoderResult.OVERFLOW;
624
                        da[dp++] = (byte)(bb >> 8);
625
                        da[dp++] = (byte)bb;
626
                    } else {                      // SingleByte
627
                        if (dl - dp < 1)
628
                            return CoderResult.OVERFLOW;
629
                        da[dp++] = (byte)bb;
630
                    }
631

632
                    sp++;
633
                }
634
                return CoderResult.UNDERFLOW;
635
            } finally {
636
                src.position(sp - src.arrayOffset());
637
                dst.position(dp - dst.arrayOffset());
638
            }
639
        }
640

641
        protected CoderResult encodeBufferLoop(CharBuffer src, ByteBuffer dst) {
642
            int mark = src.position();
643
            try {
644
                while (src.hasRemaining()) {
645
                    char c = src.get();
646
                    int bb = encodeChar(c);
647
                    if (bb == UNMAPPABLE_ENCODING) {
648
                        if (Character.isSurrogate(c)) {
649
                            if (sgp().parse(c, src) < 0)
650
                                return sgp.error();
651
                            return sgp.unmappableResult();
652
                        }
653
                        return CoderResult.unmappableForLength(1);
654
                    }
655
                    if (bb > MAX_SINGLEBYTE) {  // DoubleByte
656
                        if (dst.remaining() < 2)
657
                            return CoderResult.OVERFLOW;
658
                        dst.put((byte)(bb >> 8));
659
                        dst.put((byte)(bb));
660
                    } else {
661
                        if (dst.remaining() < 1)
662
                        return CoderResult.OVERFLOW;
663
                        dst.put((byte)bb);
664
                    }
665
                    mark++;
666
                }
667
                return CoderResult.UNDERFLOW;
668
            } finally {
669
                src.position(mark);
670
            }
671
        }
672

673
        protected CoderResult encodeLoop(CharBuffer src, ByteBuffer dst) {
674
            if (src.hasArray() && dst.hasArray())
675
                return encodeArrayLoop(src, dst);
676
            else
677
                return encodeBufferLoop(src, dst);
678
        }
679

680
        protected byte[] repl = replacement();
681
        protected void implReplaceWith(byte[] newReplacement) {
682
            repl = newReplacement;
683
        }
684

685
        @Override
686
        public int encode(char[] src, int sp, int len, byte[] dst) {
687
            int dp = 0;
688
            int sl = sp + len;
689
            if (isASCIICompatible) {
690
                int n = JLA.encodeASCII(src, sp, dst, dp, len);
691
                sp += n;
692
                dp += n;
693
            }
694
            while (sp < sl) {
695
                char c = src[sp++];
696
                int bb = encodeChar(c);
697
                if (bb == UNMAPPABLE_ENCODING) {
698
                    if (Character.isHighSurrogate(c) && sp < sl &&
699
                        Character.isLowSurrogate(src[sp])) {
700
                        sp++;
701
                    }
702
                    dst[dp++] = repl[0];
703
                    if (repl.length > 1)
704
                        dst[dp++] = repl[1];
705
                    continue;
706
                } //else
707
                if (bb > MAX_SINGLEBYTE) { // DoubleByte
708
                    dst[dp++] = (byte)(bb >> 8);
709
                    dst[dp++] = (byte)bb;
710
                } else {                          // SingleByte
711
                    dst[dp++] = (byte)bb;
712
                }
713
            }
714
            return dp;
715
        }
716

717
        @Override
718
        public int encodeFromLatin1(byte[] src, int sp, int len, byte[] dst) {
719
            int dp = 0;
720
            int sl = sp + len;
721
            while (sp < sl) {
722
                char c = (char)(src[sp++] & 0xff);
723
                int bb = encodeChar(c);
724
                if (bb == UNMAPPABLE_ENCODING) {
725
                    // no surrogate pair in latin1 string
726
                    dst[dp++] = repl[0];
727
                    if (repl.length > 1) {
728
                        dst[dp++] = repl[1];
729
                    }
730
                    continue;
731
                } //else
732
                if (bb > MAX_SINGLEBYTE) { // DoubleByte
733
                    dst[dp++] = (byte)(bb >> 8);
734
                    dst[dp++] = (byte)bb;
735
                } else {                   // SingleByte
736
                    dst[dp++] = (byte)bb;
737
                }
738

739
            }
740
            return dp;
741
        }
742

743
        @Override
744
        public int encodeFromUTF16(byte[] src, int sp, int len, byte[] dst) {
745
            int dp = 0;
746
            int sl = sp + len;
747
            while (sp < sl) {
748
                char c = StringUTF16.getChar(src, sp++);
749
                int bb = encodeChar(c);
750
                if (bb == UNMAPPABLE_ENCODING) {
751
                    if (Character.isHighSurrogate(c) && sp < sl &&
752
                        Character.isLowSurrogate(StringUTF16.getChar(src, sp))) {
753
                        sp++;
754
                    }
755
                    dst[dp++] = repl[0];
756
                    if (repl.length > 1) {
757
                        dst[dp++] = repl[1];
758
                    }
759
                    continue;
760
                } //else
761
                if (bb > MAX_SINGLEBYTE) { // DoubleByte
762
                    dst[dp++] = (byte)(bb >> 8);
763
                    dst[dp++] = (byte)bb;
764
                } else {                   // SingleByte
765
                    dst[dp++] = (byte)bb;
766
                }
767
            }
768
            return dp;
769
        }
770

771
        @Override
772
        public boolean isASCIICompatible() {
773
            return isASCIICompatible;
774
        }
775

776
        public int encodeChar(char ch) {
777
            return c2b[c2bIndex[ch >> 8] + (ch & 0xff)];
778
        }
779

780
        // init the c2b and c2bIndex tables from b2c.
781
        public static void initC2B(String[] b2c, String b2cSB, String b2cNR,  String c2bNR,
782
                            int b2Min, int b2Max,
783
                            char[] c2b, char[] c2bIndex)
784
        {
785
            Arrays.fill(c2b, (char)UNMAPPABLE_ENCODING);
786
            int off = 0x100;
787

788
            char[][] b2c_ca = new char[b2c.length][];
789
            char[] b2cSB_ca = null;
790
            if (b2cSB != null)
791
                b2cSB_ca = b2cSB.toCharArray();
792

793
            for (int i = 0; i < b2c.length; i++) {
794
                if (b2c[i] == null)
795
                    continue;
796
                b2c_ca[i] = b2c[i].toCharArray();
797
            }
798

799
            if (b2cNR != null) {
800
                int j = 0;
801
                while (j < b2cNR.length()) {
802
                    char b  = b2cNR.charAt(j++);
803
                    char c  = b2cNR.charAt(j++);
804
                    if (b < 0x100 && b2cSB_ca != null) {
805
                        if (b2cSB_ca[b] == c)
806
                            b2cSB_ca[b] = UNMAPPABLE_DECODING;
807
                    } else {
808
                        if (b2c_ca[b >> 8][(b & 0xff) - b2Min] == c)
809
                            b2c_ca[b >> 8][(b & 0xff) - b2Min] = UNMAPPABLE_DECODING;
810
                    }
811
                }
812
            }
813

814
            if (b2cSB_ca != null) {      // SingleByte
815
                for (int b = 0; b < b2cSB_ca.length; b++) {
816
                    char c = b2cSB_ca[b];
817
                    if (c == UNMAPPABLE_DECODING)
818
                        continue;
819
                    int index = c2bIndex[c >> 8];
820
                    if (index == 0) {
821
                        index = off;
822
                        off += 0x100;
823
                        c2bIndex[c >> 8] = (char)index;
824
                    }
825
                    c2b[index + (c & 0xff)] = (char)b;
826
                }
827
            }
828

829
            for (int b1 = 0; b1 < b2c.length; b1++) {  // DoubleByte
830
                char[] db = b2c_ca[b1];
831
                if (db == null)
832
                    continue;
833
                for (int b2 = b2Min; b2 <= b2Max; b2++) {
834
                    char c = db[b2 - b2Min];
835
                    if (c == UNMAPPABLE_DECODING)
836
                        continue;
837
                    int index = c2bIndex[c >> 8];
838
                    if (index == 0) {
839
                        index = off;
840
                        off += 0x100;
841
                        c2bIndex[c >> 8] = (char)index;
842
                    }
843
                    c2b[index + (c & 0xff)] = (char)((b1 << 8) | b2);
844
                }
845
            }
846

847
            if (c2bNR != null) {
848
                // add c->b only nr entries
849
                for (int i = 0; i < c2bNR.length(); i += 2) {
850
                    char b = c2bNR.charAt(i);
851
                    char c = c2bNR.charAt(i + 1);
852
                    int index = (c >> 8);
853
                    if (c2bIndex[index] == 0) {
854
                        c2bIndex[index] = (char)off;
855
                        off += 0x100;
856
                    }
857
                    index = c2bIndex[index] + (c & 0xff);
858
                    c2b[index] = b;
859
                }
860
            }
861
        }
862
    }
863

864
    public static class Encoder_DBCSONLY extends Encoder {
865

866
        public Encoder_DBCSONLY(Charset cs, byte[] repl,
867
                                char[] c2b, char[] c2bIndex,
868
                                boolean isASCIICompatible) {
869
            super(cs, 2.0f, 2.0f, repl, c2b, c2bIndex, isASCIICompatible);
870
        }
871

872
        public int encodeChar(char ch) {
873
            int bb = super.encodeChar(ch);
874
            if (bb <= MAX_SINGLEBYTE)
875
                return UNMAPPABLE_ENCODING;
876
            return bb;
877
        }
878
    }
879

880
    public static class Encoder_EBCDIC extends Encoder {
881
        static final int SBCS = 0;
882
        static final int DBCS = 1;
883
        static final byte SO = 0x0e;
884
        static final byte SI = 0x0f;
885

886
        protected int  currentState = SBCS;
887

888
        public Encoder_EBCDIC(Charset cs, char[] c2b, char[] c2bIndex,
889
                              boolean isASCIICompatible) {
890
            super(cs, 4.0f, 5.0f, new byte[] {(byte)0x6f}, c2b, c2bIndex, isASCIICompatible);
891
        }
892

893
        protected void implReset() {
894
            currentState = SBCS;
895
        }
896

897
        protected CoderResult implFlush(ByteBuffer out) {
898
            if (currentState == DBCS) {
899
                if (out.remaining() < 1)
900
                    return CoderResult.OVERFLOW;
901
                out.put(SI);
902
            }
903
            implReset();
904
            return CoderResult.UNDERFLOW;
905
        }
906

907
        protected CoderResult encodeArrayLoop(CharBuffer src, ByteBuffer dst) {
908
            char[] sa = src.array();
909
            int sp = src.arrayOffset() + src.position();
910
            int sl = src.arrayOffset() + src.limit();
911
            byte[] da = dst.array();
912
            int dp = dst.arrayOffset() + dst.position();
913
            int dl = dst.arrayOffset() + dst.limit();
914

915
            try {
916
                while (sp < sl) {
917
                    char c = sa[sp];
918
                    int bb = encodeChar(c);
919
                    if (bb == UNMAPPABLE_ENCODING) {
920
                        if (Character.isSurrogate(c)) {
921
                            if (sgp().parse(c, sa, sp, sl) < 0)
922
                                return sgp.error();
923
                            return sgp.unmappableResult();
924
                        }
925
                        return CoderResult.unmappableForLength(1);
926
                    }
927
                    if (bb > MAX_SINGLEBYTE) {  // DoubleByte
928
                        if (currentState == SBCS) {
929
                            if (dl - dp < 1)
930
                                return CoderResult.OVERFLOW;
931
                            currentState = DBCS;
932
                            da[dp++] = SO;
933
                        }
934
                        if (dl - dp < 2)
935
                            return CoderResult.OVERFLOW;
936
                        da[dp++] = (byte)(bb >> 8);
937
                        da[dp++] = (byte)bb;
938
                    } else {                    // SingleByte
939
                        if (currentState == DBCS) {
940
                            if (dl - dp < 1)
941
                                return CoderResult.OVERFLOW;
942
                            currentState = SBCS;
943
                            da[dp++] = SI;
944
                        }
945
                        if (dl - dp < 1)
946
                            return CoderResult.OVERFLOW;
947
                        da[dp++] = (byte)bb;
948

949
                    }
950
                    sp++;
951
                }
952
                return CoderResult.UNDERFLOW;
953
            } finally {
954
                src.position(sp - src.arrayOffset());
955
                dst.position(dp - dst.arrayOffset());
956
            }
957
        }
958

959
        protected CoderResult encodeBufferLoop(CharBuffer src, ByteBuffer dst) {
960
            int mark = src.position();
961
            try {
962
                while (src.hasRemaining()) {
963
                    char c = src.get();
964
                    int bb = encodeChar(c);
965
                    if (bb == UNMAPPABLE_ENCODING) {
966
                        if (Character.isSurrogate(c)) {
967
                            if (sgp().parse(c, src) < 0)
968
                                return sgp.error();
969
                            return sgp.unmappableResult();
970
                        }
971
                        return CoderResult.unmappableForLength(1);
972
                    }
973
                    if (bb > MAX_SINGLEBYTE) {  // DoubleByte
974
                        if (currentState == SBCS) {
975
                            if (dst.remaining() < 1)
976
                                return CoderResult.OVERFLOW;
977
                            currentState = DBCS;
978
                            dst.put(SO);
979
                        }
980
                        if (dst.remaining() < 2)
981
                            return CoderResult.OVERFLOW;
982
                        dst.put((byte)(bb >> 8));
983
                        dst.put((byte)(bb));
984
                    } else {                  // Single-byte
985
                        if (currentState == DBCS) {
986
                            if (dst.remaining() < 1)
987
                                return CoderResult.OVERFLOW;
988
                            currentState = SBCS;
989
                            dst.put(SI);
990
                        }
991
                        if (dst.remaining() < 1)
992
                            return CoderResult.OVERFLOW;
993
                        dst.put((byte)bb);
994
                    }
995
                    mark++;
996
                }
997
                return CoderResult.UNDERFLOW;
998
            } finally {
999
                src.position(mark);
1000
            }
1001
        }
1002

1003
        @Override
1004
        public int encode(char[] src, int sp, int len, byte[] dst) {
1005
            int dp = 0;
1006
            int sl = sp + len;
1007
            while (sp < sl) {
1008
                char c = src[sp++];
1009
                int bb = encodeChar(c);
1010

1011
                if (bb == UNMAPPABLE_ENCODING) {
1012
                    if (Character.isHighSurrogate(c) && sp < sl &&
1013
                        Character.isLowSurrogate(src[sp])) {
1014
                        sp++;
1015
                    }
1016
                    dst[dp++] = repl[0];
1017
                    if (repl.length > 1)
1018
                        dst[dp++] = repl[1];
1019
                    continue;
1020
                } //else
1021
                if (bb > MAX_SINGLEBYTE) {           // DoubleByte
1022
                    if (currentState == SBCS) {
1023
                        currentState = DBCS;
1024
                        dst[dp++] = SO;
1025
                    }
1026
                    dst[dp++] = (byte)(bb >> 8);
1027
                    dst[dp++] = (byte)bb;
1028
                } else {                             // SingleByte
1029
                    if (currentState == DBCS) {
1030
                         currentState = SBCS;
1031
                         dst[dp++] = SI;
1032
                    }
1033
                    dst[dp++] = (byte)bb;
1034
                }
1035
            }
1036

1037
            if (currentState == DBCS) {
1038
                 currentState = SBCS;
1039
                 dst[dp++] = SI;
1040
            }
1041
            return dp;
1042
        }
1043

1044
        @Override
1045
        public int encodeFromLatin1(byte[] src, int sp, int len, byte[] dst) {
1046
            int dp = 0;
1047
            int sl = sp + len;
1048
            while (sp < sl) {
1049
                char c = (char)(src[sp++] & 0xff);
1050
                int bb = encodeChar(c);
1051
                if (bb == UNMAPPABLE_ENCODING) {
1052
                    // no surrogate pair in latin1 string
1053
                    dst[dp++] = repl[0];
1054
                    if (repl.length > 1)
1055
                        dst[dp++] = repl[1];
1056
                    continue;
1057
                } //else
1058
                if (bb > MAX_SINGLEBYTE) {           // DoubleByte
1059
                    if (currentState == SBCS) {
1060
                        currentState = DBCS;
1061
                        dst[dp++] = SO;
1062
                    }
1063
                    dst[dp++] = (byte)(bb >> 8);
1064
                    dst[dp++] = (byte)bb;
1065
                } else {                             // SingleByte
1066
                    if (currentState == DBCS) {
1067
                         currentState = SBCS;
1068
                         dst[dp++] = SI;
1069
                    }
1070
                    dst[dp++] = (byte)bb;
1071
                }
1072
            }
1073
            if (currentState == DBCS) {
1074
                 currentState = SBCS;
1075
                 dst[dp++] = SI;
1076
            }
1077
            return dp;
1078
        }
1079

1080
        @Override
1081
        public int encodeFromUTF16(byte[] src, int sp, int len, byte[] dst) {
1082
            int dp = 0;
1083
            int sl = sp + len;
1084
            while (sp < sl) {
1085
                char c = StringUTF16.getChar(src, sp++);
1086
                int bb = encodeChar(c);
1087
                if (bb == UNMAPPABLE_ENCODING) {
1088
                    if (Character.isHighSurrogate(c) && sp < sl &&
1089
                        Character.isLowSurrogate(StringUTF16.getChar(src, sp))) {
1090
                        sp++;
1091
                    }
1092
                    dst[dp++] = repl[0];
1093
                    if (repl.length > 1)
1094
                        dst[dp++] = repl[1];
1095
                    continue;
1096
                } //else
1097
                if (bb > MAX_SINGLEBYTE) {           // DoubleByte
1098
                    if (currentState == SBCS) {
1099
                        currentState = DBCS;
1100
                        dst[dp++] = SO;
1101
                    }
1102
                    dst[dp++] = (byte)(bb >> 8);
1103
                    dst[dp++] = (byte)bb;
1104
                } else {                             // SingleByte
1105
                    if (currentState == DBCS) {
1106
                         currentState = SBCS;
1107
                         dst[dp++] = SI;
1108
                    }
1109
                    dst[dp++] = (byte)bb;
1110
                }
1111
            }
1112
            if (currentState == DBCS) {
1113
                 currentState = SBCS;
1114
                 dst[dp++] = SI;
1115
            }
1116
            return dp;
1117
        }
1118
    }
1119

1120
    // EUC_SIMPLE
1121
    public static class Encoder_EUC_SIM extends Encoder {
1122
        public Encoder_EUC_SIM(Charset cs, char[] c2b, char[] c2bIndex,
1123
                               boolean isASCIICompatible) {
1124
            super(cs, c2b, c2bIndex, isASCIICompatible);
1125
        }
1126
    }
1127

1128
}
1129

1130
Product

Resources

Company