Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/jdk17u
Path: blob/master/src/java.base/share/classes/sun/nio/cs/DoubleByte.java
67862 views
1
/*
2
* Copyright (c) 2009, 2021, Oracle and/or its affiliates. All rights reserved.
3
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
*
5
* This code is free software; you can redistribute it and/or modify it
6
* under the terms of the GNU General Public License version 2 only, as
7
* published by the Free Software Foundation. Oracle designates this
8
* particular file as subject to the "Classpath" exception as provided
9
* by Oracle in the LICENSE file that accompanied this code.
10
*
11
* This code is distributed in the hope that it will be useful, but WITHOUT
12
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14
* version 2 for more details (a copy is included in the LICENSE file that
15
* accompanied this code).
16
*
17
* You should have received a copy of the GNU General Public License version
18
* 2 along with this work; if not, write to the Free Software Foundation,
19
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20
*
21
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22
* or visit www.oracle.com if you need additional information or have any
23
* questions.
24
*/
25
26
package sun.nio.cs;
27
28
import java.nio.ByteBuffer;
29
import java.nio.CharBuffer;
30
import java.nio.charset.Charset;
31
import java.nio.charset.CharsetDecoder;
32
import java.nio.charset.CharsetEncoder;
33
import java.nio.charset.CoderResult;
34
import java.util.Arrays;
35
36
import jdk.internal.access.JavaLangAccess;
37
import jdk.internal.access.SharedSecrets;
38
import sun.nio.cs.Surrogate;
39
import sun.nio.cs.ArrayDecoder;
40
import sun.nio.cs.ArrayEncoder;
41
import static sun.nio.cs.CharsetMapping.*;
42
43
/*
44
* Four types of "DoubleByte" charsets are implemented in this class
45
* (1)DoubleByte
46
* The "mostly widely used" multibyte charset, a combination of
47
* a singlebyte character set (usually the ASCII charset) and a
48
* doublebyte character set. The codepoint values of singlebyte
49
* and doublebyte don't overlap. Microsoft's multibyte charsets
50
* and IBM's "DBCS_ASCII" charsets, such as IBM1381, 942, 943,
51
* 948, 949 and 950 are such charsets.
52
*
53
* (2)DoubleByte_EBCDIC
54
* IBM EBCDIC Mix multibyte charset. Use SO and SI to shift (switch)
55
* in and out between the singlebyte character set and doublebyte
56
* character set.
57
*
58
* (3)DoubleByte_SIMPLE_EUC
59
* It's a "simple" form of EUC encoding scheme, only have the
60
* singlebyte character set G0 and one doublebyte character set
61
* G1 are defined, G2 (with SS2) and G3 (with SS3) are not used.
62
* So it is actually the same as the "typical" type (1) mentioned
63
* above, except it return "malformed" for the SS2 and SS3 when
64
* decoding.
65
*
66
* (4)DoubleByte ONLY
67
* A "pure" doublebyte only character set. From implementation
68
* point of view, this is the type (1) with "decodeSingle" always
69
* returns unmappable.
70
*
71
* For simplicity, all implementations share the same decoding and
72
* encoding data structure.
73
*
74
* Decoding:
75
*
76
* char[][] b2c;
77
* char[] b2cSB;
78
* int b2Min, b2Max
79
*
80
* public char decodeSingle(int b) {
81
* return b2cSB.[b];
82
* }
83
*
84
* public char decodeDouble(int b1, int b2) {
85
* if (b2 < b2Min || b2 > b2Max)
86
* return UNMAPPABLE_DECODING;
87
* return b2c[b1][b2 - b2Min];
88
* }
89
*
90
* (1)b2Min, b2Max are the corresponding min and max value of the
91
* low-half of the double-byte.
92
* (2)The high 8-bit/b1 of the double-byte are used to indexed into
93
* b2c array.
94
*
95
* Encoding:
96
*
97
* char[] c2b;
98
* char[] c2bIndex;
99
*
100
* public int encodeChar(char ch) {
101
* return c2b[c2bIndex[ch >> 8] + (ch & 0xff)];
102
* }
103
*
104
*/
105
106
public class DoubleByte {
107
108
public static final char[] B2C_UNMAPPABLE;
109
static {
110
B2C_UNMAPPABLE = new char[0x100];
111
Arrays.fill(B2C_UNMAPPABLE, UNMAPPABLE_DECODING);
112
}
113
114
private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess();
115
116
public static class Decoder extends CharsetDecoder
117
implements DelegatableDecoder, ArrayDecoder
118
{
119
final char[][] b2c;
120
final char[] b2cSB;
121
final int b2Min;
122
final int b2Max;
123
final boolean isASCIICompatible;
124
125
// for SimpleEUC override
126
protected CoderResult crMalformedOrUnderFlow(int b) {
127
return CoderResult.UNDERFLOW;
128
}
129
130
protected CoderResult crMalformedOrUnmappable(int b1, int b2) {
131
if (b2c[b1] == B2C_UNMAPPABLE || // isNotLeadingByte(b1)
132
b2c[b2] != B2C_UNMAPPABLE || // isLeadingByte(b2)
133
decodeSingle(b2) != UNMAPPABLE_DECODING) { // isSingle(b2)
134
return CoderResult.malformedForLength(1);
135
}
136
return CoderResult.unmappableForLength(2);
137
}
138
139
public Decoder(Charset cs, float avgcpb, float maxcpb,
140
char[][] b2c, char[] b2cSB,
141
int b2Min, int b2Max,
142
boolean isASCIICompatible) {
143
super(cs, avgcpb, maxcpb);
144
this.b2c = b2c;
145
this.b2cSB = b2cSB;
146
this.b2Min = b2Min;
147
this.b2Max = b2Max;
148
this.isASCIICompatible = isASCIICompatible;
149
}
150
151
public Decoder(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max,
152
boolean isASCIICompatible) {
153
this(cs, 0.5f, 1.0f, b2c, b2cSB, b2Min, b2Max, isASCIICompatible);
154
}
155
156
public Decoder(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max) {
157
this(cs, 0.5f, 1.0f, b2c, b2cSB, b2Min, b2Max, false);
158
}
159
160
protected CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) {
161
byte[] sa = src.array();
162
int soff = src.arrayOffset();
163
int sp = soff + src.position();
164
int sl = soff + src.limit();
165
166
char[] da = dst.array();
167
int doff = dst.arrayOffset();
168
int dp = doff + dst.position();
169
int dl = doff + dst.limit();
170
171
try {
172
if (isASCIICompatible) {
173
int n = JLA.decodeASCII(sa, sp, da, dp, Math.min(dl - dp, sl - sp));
174
dp += n;
175
sp += n;
176
}
177
while (sp < sl && dp < dl) {
178
// inline the decodeSingle/Double() for better performance
179
int inSize = 1;
180
int b1 = sa[sp] & 0xff;
181
char c = b2cSB[b1];
182
if (c == UNMAPPABLE_DECODING) {
183
if (sl - sp < 2)
184
return crMalformedOrUnderFlow(b1);
185
int b2 = sa[sp + 1] & 0xff;
186
if (b2 < b2Min || b2 > b2Max ||
187
(c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {
188
return crMalformedOrUnmappable(b1, b2);
189
}
190
inSize++;
191
}
192
da[dp++] = c;
193
sp += inSize;
194
}
195
return (sp >= sl) ? CoderResult.UNDERFLOW
196
: CoderResult.OVERFLOW;
197
} finally {
198
src.position(sp - soff);
199
dst.position(dp - doff);
200
}
201
}
202
203
protected CoderResult decodeBufferLoop(ByteBuffer src, CharBuffer dst) {
204
int mark = src.position();
205
try {
206
207
while (src.hasRemaining() && dst.hasRemaining()) {
208
int b1 = src.get() & 0xff;
209
char c = b2cSB[b1];
210
int inSize = 1;
211
if (c == UNMAPPABLE_DECODING) {
212
if (src.remaining() < 1)
213
return crMalformedOrUnderFlow(b1);
214
int b2 = src.get() & 0xff;
215
if (b2 < b2Min || b2 > b2Max ||
216
(c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING)
217
return crMalformedOrUnmappable(b1, b2);
218
inSize++;
219
}
220
dst.put(c);
221
mark += inSize;
222
}
223
return src.hasRemaining()? CoderResult.OVERFLOW
224
: CoderResult.UNDERFLOW;
225
} finally {
226
src.position(mark);
227
}
228
}
229
230
// Make some protected methods public for use by JISAutoDetect
231
public CoderResult decodeLoop(ByteBuffer src, CharBuffer dst) {
232
if (src.hasArray() && dst.hasArray())
233
return decodeArrayLoop(src, dst);
234
else
235
return decodeBufferLoop(src, dst);
236
}
237
238
@Override
239
public int decode(byte[] src, int sp, int len, char[] dst) {
240
int dp = 0;
241
int sl = sp + len;
242
char repl = replacement().charAt(0);
243
while (sp < sl) {
244
int b1 = src[sp++] & 0xff;
245
char c = b2cSB[b1];
246
if (c == UNMAPPABLE_DECODING) {
247
if (sp < sl) {
248
int b2 = src[sp++] & 0xff;
249
if (b2 < b2Min || b2 > b2Max ||
250
(c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {
251
if (crMalformedOrUnmappable(b1, b2).length() == 1) {
252
sp--;
253
}
254
}
255
}
256
if (c == UNMAPPABLE_DECODING) {
257
c = repl;
258
}
259
}
260
dst[dp++] = c;
261
}
262
return dp;
263
}
264
265
@Override
266
public boolean isASCIICompatible() {
267
return isASCIICompatible;
268
}
269
270
public void implReset() {
271
super.implReset();
272
}
273
274
public CoderResult implFlush(CharBuffer out) {
275
return super.implFlush(out);
276
}
277
278
// decode loops are not using decodeSingle/Double() for performance
279
// reason.
280
public char decodeSingle(int b) {
281
return b2cSB[b];
282
}
283
284
public char decodeDouble(int b1, int b2) {
285
if (b1 < 0 || b1 > b2c.length ||
286
b2 < b2Min || b2 > b2Max)
287
return UNMAPPABLE_DECODING;
288
return b2c[b1][b2 - b2Min];
289
}
290
}
291
292
// IBM_EBCDIC_DBCS
293
public static class Decoder_EBCDIC extends Decoder {
294
private static final int SBCS = 0;
295
private static final int DBCS = 1;
296
private static final int SO = 0x0e;
297
private static final int SI = 0x0f;
298
private int currentState;
299
300
public Decoder_EBCDIC(Charset cs,
301
char[][] b2c, char[] b2cSB, int b2Min, int b2Max,
302
boolean isASCIICompatible) {
303
super(cs, b2c, b2cSB, b2Min, b2Max, isASCIICompatible);
304
}
305
306
public Decoder_EBCDIC(Charset cs,
307
char[][] b2c, char[] b2cSB, int b2Min, int b2Max) {
308
super(cs, b2c, b2cSB, b2Min, b2Max, false);
309
}
310
311
public void implReset() {
312
currentState = SBCS;
313
}
314
315
// Check validity of dbcs ebcdic byte pair values
316
//
317
// First byte : 0x41 -- 0xFE
318
// Second byte: 0x41 -- 0xFE
319
// Doublebyte blank: 0x4040
320
//
321
// The validation implementation in "old" DBCS_IBM_EBCDIC and sun.io
322
// as
323
// if ((b1 != 0x40 || b2 != 0x40) &&
324
// (b2 < 0x41 || b2 > 0xfe)) {...}
325
// is not correct/complete (range check for b1)
326
//
327
private static boolean isDoubleByte(int b1, int b2) {
328
return (0x41 <= b1 && b1 <= 0xfe && 0x41 <= b2 && b2 <= 0xfe)
329
|| (b1 == 0x40 && b2 == 0x40); // DBCS-HOST SPACE
330
}
331
332
protected CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) {
333
byte[] sa = src.array();
334
int sp = src.arrayOffset() + src.position();
335
int sl = src.arrayOffset() + src.limit();
336
char[] da = dst.array();
337
int dp = dst.arrayOffset() + dst.position();
338
int dl = dst.arrayOffset() + dst.limit();
339
340
try {
341
// don't check dp/dl together here, it's possible to
342
// decdoe a SO/SI without space in output buffer.
343
while (sp < sl) {
344
int b1 = sa[sp] & 0xff;
345
int inSize = 1;
346
if (b1 == SO) { // Shift out
347
if (currentState != SBCS)
348
return CoderResult.malformedForLength(1);
349
else
350
currentState = DBCS;
351
} else if (b1 == SI) {
352
if (currentState != DBCS)
353
return CoderResult.malformedForLength(1);
354
else
355
currentState = SBCS;
356
} else {
357
char c;
358
if (currentState == SBCS) {
359
c = b2cSB[b1];
360
if (c == UNMAPPABLE_DECODING)
361
return CoderResult.unmappableForLength(1);
362
} else {
363
if (sl - sp < 2)
364
return CoderResult.UNDERFLOW;
365
int b2 = sa[sp + 1] & 0xff;
366
if (b2 < b2Min || b2 > b2Max ||
367
(c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {
368
if (!isDoubleByte(b1, b2))
369
return CoderResult.malformedForLength(2);
370
return CoderResult.unmappableForLength(2);
371
}
372
inSize++;
373
}
374
if (dl - dp < 1)
375
return CoderResult.OVERFLOW;
376
377
da[dp++] = c;
378
}
379
sp += inSize;
380
}
381
return CoderResult.UNDERFLOW;
382
} finally {
383
src.position(sp - src.arrayOffset());
384
dst.position(dp - dst.arrayOffset());
385
}
386
}
387
388
protected CoderResult decodeBufferLoop(ByteBuffer src, CharBuffer dst) {
389
int mark = src.position();
390
try {
391
while (src.hasRemaining()) {
392
int b1 = src.get() & 0xff;
393
int inSize = 1;
394
if (b1 == SO) { // Shift out
395
if (currentState != SBCS)
396
return CoderResult.malformedForLength(1);
397
else
398
currentState = DBCS;
399
} else if (b1 == SI) {
400
if (currentState != DBCS)
401
return CoderResult.malformedForLength(1);
402
else
403
currentState = SBCS;
404
} else {
405
char c = UNMAPPABLE_DECODING;
406
if (currentState == SBCS) {
407
c = b2cSB[b1];
408
if (c == UNMAPPABLE_DECODING)
409
return CoderResult.unmappableForLength(1);
410
} else {
411
if (src.remaining() < 1)
412
return CoderResult.UNDERFLOW;
413
int b2 = src.get()&0xff;
414
if (b2 < b2Min || b2 > b2Max ||
415
(c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {
416
if (!isDoubleByte(b1, b2))
417
return CoderResult.malformedForLength(2);
418
return CoderResult.unmappableForLength(2);
419
}
420
inSize++;
421
}
422
423
if (dst.remaining() < 1)
424
return CoderResult.OVERFLOW;
425
426
dst.put(c);
427
}
428
mark += inSize;
429
}
430
return CoderResult.UNDERFLOW;
431
} finally {
432
src.position(mark);
433
}
434
}
435
436
@Override
437
public int decode(byte[] src, int sp, int len, char[] dst) {
438
int dp = 0;
439
int sl = sp + len;
440
currentState = SBCS;
441
char repl = replacement().charAt(0);
442
while (sp < sl) {
443
int b1 = src[sp++] & 0xff;
444
if (b1 == SO) { // Shift out
445
if (currentState != SBCS)
446
dst[dp++] = repl;
447
else
448
currentState = DBCS;
449
} else if (b1 == SI) {
450
if (currentState != DBCS)
451
dst[dp++] = repl;
452
else
453
currentState = SBCS;
454
} else {
455
char c = UNMAPPABLE_DECODING;
456
if (currentState == SBCS) {
457
c = b2cSB[b1];
458
if (c == UNMAPPABLE_DECODING)
459
c = repl;
460
} else {
461
if (sl == sp) {
462
c = repl;
463
} else {
464
int b2 = src[sp++] & 0xff;
465
if (b2 < b2Min || b2 > b2Max ||
466
(c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {
467
c = repl;
468
}
469
}
470
}
471
dst[dp++] = c;
472
}
473
}
474
return dp;
475
}
476
}
477
478
// DBCS_ONLY
479
public static class Decoder_DBCSONLY extends Decoder {
480
static final char[] b2cSB_UNMAPPABLE;
481
static {
482
b2cSB_UNMAPPABLE = new char[0x100];
483
Arrays.fill(b2cSB_UNMAPPABLE, UNMAPPABLE_DECODING);
484
}
485
486
// always returns unmappableForLenth(2) for doublebyte_only
487
@Override
488
protected CoderResult crMalformedOrUnmappable(int b1, int b2) {
489
return CoderResult.unmappableForLength(2);
490
}
491
492
public Decoder_DBCSONLY(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max,
493
boolean isASCIICompatible) {
494
super(cs, 0.5f, 1.0f, b2c, b2cSB_UNMAPPABLE, b2Min, b2Max, isASCIICompatible);
495
}
496
497
public Decoder_DBCSONLY(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max) {
498
super(cs, 0.5f, 1.0f, b2c, b2cSB_UNMAPPABLE, b2Min, b2Max, false);
499
}
500
}
501
502
// EUC_SIMPLE
503
// The only thing we need to "override" is to check SS2/SS3 and
504
// return "malformed" if found
505
public static class Decoder_EUC_SIM extends Decoder {
506
private final int SS2 = 0x8E;
507
private final int SS3 = 0x8F;
508
509
public Decoder_EUC_SIM(Charset cs,
510
char[][] b2c, char[] b2cSB, int b2Min, int b2Max,
511
boolean isASCIICompatible) {
512
super(cs, b2c, b2cSB, b2Min, b2Max, isASCIICompatible);
513
}
514
515
// No support provided for G2/G3 for SimpleEUC
516
protected CoderResult crMalformedOrUnderFlow(int b) {
517
if (b == SS2 || b == SS3 )
518
return CoderResult.malformedForLength(1);
519
return CoderResult.UNDERFLOW;
520
}
521
522
protected CoderResult crMalformedOrUnmappable(int b1, int b2) {
523
if (b1 == SS2 || b1 == SS3 )
524
return CoderResult.malformedForLength(1);
525
return CoderResult.unmappableForLength(2);
526
}
527
528
@Override
529
public int decode(byte[] src, int sp, int len, char[] dst) {
530
int dp = 0;
531
int sl = sp + len;
532
char repl = replacement().charAt(0);
533
while (sp < sl) {
534
int b1 = src[sp++] & 0xff;
535
char c = b2cSB[b1];
536
if (c == UNMAPPABLE_DECODING) {
537
if (sp < sl) {
538
int b2 = src[sp++] & 0xff;
539
if (b2 < b2Min || b2 > b2Max ||
540
(c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {
541
if (b1 == SS2 || b1 == SS3) {
542
sp--;
543
}
544
c = repl;
545
}
546
} else {
547
c = repl;
548
}
549
}
550
dst[dp++] = c;
551
}
552
return dp;
553
}
554
}
555
556
public static class Encoder extends CharsetEncoder
557
implements ArrayEncoder
558
{
559
protected final int MAX_SINGLEBYTE = 0xff;
560
private final char[] c2b;
561
private final char[] c2bIndex;
562
protected Surrogate.Parser sgp;
563
final boolean isASCIICompatible;
564
565
public Encoder(Charset cs, char[] c2b, char[] c2bIndex) {
566
this(cs, c2b, c2bIndex, false);
567
}
568
569
public Encoder(Charset cs, char[] c2b, char[] c2bIndex, boolean isASCIICompatible) {
570
super(cs, 2.0f, 2.0f);
571
this.c2b = c2b;
572
this.c2bIndex = c2bIndex;
573
this.isASCIICompatible = isASCIICompatible;
574
}
575
576
public Encoder(Charset cs, float avg, float max, byte[] repl, char[] c2b, char[] c2bIndex,
577
boolean isASCIICompatible) {
578
super(cs, avg, max, repl);
579
this.c2b = c2b;
580
this.c2bIndex = c2bIndex;
581
this.isASCIICompatible = isASCIICompatible;
582
}
583
584
public boolean canEncode(char c) {
585
return encodeChar(c) != UNMAPPABLE_ENCODING;
586
}
587
588
protected Surrogate.Parser sgp() {
589
if (sgp == null)
590
sgp = new Surrogate.Parser();
591
return sgp;
592
}
593
594
protected CoderResult encodeArrayLoop(CharBuffer src, ByteBuffer dst) {
595
char[] sa = src.array();
596
int sp = src.arrayOffset() + src.position();
597
int sl = src.arrayOffset() + src.limit();
598
599
byte[] da = dst.array();
600
int dp = dst.arrayOffset() + dst.position();
601
int dl = dst.arrayOffset() + dst.limit();
602
603
try {
604
if (isASCIICompatible) {
605
int n = JLA.encodeASCII(sa, sp, da, dp, Math.min(dl - dp, sl - sp));
606
sp += n;
607
dp += n;
608
}
609
while (sp < sl) {
610
char c = sa[sp];
611
int bb = encodeChar(c);
612
if (bb == UNMAPPABLE_ENCODING) {
613
if (Character.isSurrogate(c)) {
614
if (sgp().parse(c, sa, sp, sl) < 0)
615
return sgp.error();
616
return sgp.unmappableResult();
617
}
618
return CoderResult.unmappableForLength(1);
619
}
620
621
if (bb > MAX_SINGLEBYTE) { // DoubleByte
622
if (dl - dp < 2)
623
return CoderResult.OVERFLOW;
624
da[dp++] = (byte)(bb >> 8);
625
da[dp++] = (byte)bb;
626
} else { // SingleByte
627
if (dl - dp < 1)
628
return CoderResult.OVERFLOW;
629
da[dp++] = (byte)bb;
630
}
631
632
sp++;
633
}
634
return CoderResult.UNDERFLOW;
635
} finally {
636
src.position(sp - src.arrayOffset());
637
dst.position(dp - dst.arrayOffset());
638
}
639
}
640
641
protected CoderResult encodeBufferLoop(CharBuffer src, ByteBuffer dst) {
642
int mark = src.position();
643
try {
644
while (src.hasRemaining()) {
645
char c = src.get();
646
int bb = encodeChar(c);
647
if (bb == UNMAPPABLE_ENCODING) {
648
if (Character.isSurrogate(c)) {
649
if (sgp().parse(c, src) < 0)
650
return sgp.error();
651
return sgp.unmappableResult();
652
}
653
return CoderResult.unmappableForLength(1);
654
}
655
if (bb > MAX_SINGLEBYTE) { // DoubleByte
656
if (dst.remaining() < 2)
657
return CoderResult.OVERFLOW;
658
dst.put((byte)(bb >> 8));
659
dst.put((byte)(bb));
660
} else {
661
if (dst.remaining() < 1)
662
return CoderResult.OVERFLOW;
663
dst.put((byte)bb);
664
}
665
mark++;
666
}
667
return CoderResult.UNDERFLOW;
668
} finally {
669
src.position(mark);
670
}
671
}
672
673
protected CoderResult encodeLoop(CharBuffer src, ByteBuffer dst) {
674
if (src.hasArray() && dst.hasArray())
675
return encodeArrayLoop(src, dst);
676
else
677
return encodeBufferLoop(src, dst);
678
}
679
680
protected byte[] repl = replacement();
681
protected void implReplaceWith(byte[] newReplacement) {
682
repl = newReplacement;
683
}
684
685
@Override
686
public int encode(char[] src, int sp, int len, byte[] dst) {
687
int dp = 0;
688
int sl = sp + len;
689
if (isASCIICompatible) {
690
int n = JLA.encodeASCII(src, sp, dst, dp, len);
691
sp += n;
692
dp += n;
693
}
694
while (sp < sl) {
695
char c = src[sp++];
696
int bb = encodeChar(c);
697
if (bb == UNMAPPABLE_ENCODING) {
698
if (Character.isHighSurrogate(c) && sp < sl &&
699
Character.isLowSurrogate(src[sp])) {
700
sp++;
701
}
702
dst[dp++] = repl[0];
703
if (repl.length > 1)
704
dst[dp++] = repl[1];
705
continue;
706
} //else
707
if (bb > MAX_SINGLEBYTE) { // DoubleByte
708
dst[dp++] = (byte)(bb >> 8);
709
dst[dp++] = (byte)bb;
710
} else { // SingleByte
711
dst[dp++] = (byte)bb;
712
}
713
}
714
return dp;
715
}
716
717
@Override
718
public int encodeFromLatin1(byte[] src, int sp, int len, byte[] dst) {
719
int dp = 0;
720
int sl = sp + len;
721
while (sp < sl) {
722
char c = (char)(src[sp++] & 0xff);
723
int bb = encodeChar(c);
724
if (bb == UNMAPPABLE_ENCODING) {
725
// no surrogate pair in latin1 string
726
dst[dp++] = repl[0];
727
if (repl.length > 1) {
728
dst[dp++] = repl[1];
729
}
730
continue;
731
} //else
732
if (bb > MAX_SINGLEBYTE) { // DoubleByte
733
dst[dp++] = (byte)(bb >> 8);
734
dst[dp++] = (byte)bb;
735
} else { // SingleByte
736
dst[dp++] = (byte)bb;
737
}
738
739
}
740
return dp;
741
}
742
743
@Override
744
public int encodeFromUTF16(byte[] src, int sp, int len, byte[] dst) {
745
int dp = 0;
746
int sl = sp + len;
747
while (sp < sl) {
748
char c = StringUTF16.getChar(src, sp++);
749
int bb = encodeChar(c);
750
if (bb == UNMAPPABLE_ENCODING) {
751
if (Character.isHighSurrogate(c) && sp < sl &&
752
Character.isLowSurrogate(StringUTF16.getChar(src, sp))) {
753
sp++;
754
}
755
dst[dp++] = repl[0];
756
if (repl.length > 1) {
757
dst[dp++] = repl[1];
758
}
759
continue;
760
} //else
761
if (bb > MAX_SINGLEBYTE) { // DoubleByte
762
dst[dp++] = (byte)(bb >> 8);
763
dst[dp++] = (byte)bb;
764
} else { // SingleByte
765
dst[dp++] = (byte)bb;
766
}
767
}
768
return dp;
769
}
770
771
@Override
772
public boolean isASCIICompatible() {
773
return isASCIICompatible;
774
}
775
776
public int encodeChar(char ch) {
777
return c2b[c2bIndex[ch >> 8] + (ch & 0xff)];
778
}
779
780
// init the c2b and c2bIndex tables from b2c.
781
public static void initC2B(String[] b2c, String b2cSB, String b2cNR, String c2bNR,
782
int b2Min, int b2Max,
783
char[] c2b, char[] c2bIndex)
784
{
785
Arrays.fill(c2b, (char)UNMAPPABLE_ENCODING);
786
int off = 0x100;
787
788
char[][] b2c_ca = new char[b2c.length][];
789
char[] b2cSB_ca = null;
790
if (b2cSB != null)
791
b2cSB_ca = b2cSB.toCharArray();
792
793
for (int i = 0; i < b2c.length; i++) {
794
if (b2c[i] == null)
795
continue;
796
b2c_ca[i] = b2c[i].toCharArray();
797
}
798
799
if (b2cNR != null) {
800
int j = 0;
801
while (j < b2cNR.length()) {
802
char b = b2cNR.charAt(j++);
803
char c = b2cNR.charAt(j++);
804
if (b < 0x100 && b2cSB_ca != null) {
805
if (b2cSB_ca[b] == c)
806
b2cSB_ca[b] = UNMAPPABLE_DECODING;
807
} else {
808
if (b2c_ca[b >> 8][(b & 0xff) - b2Min] == c)
809
b2c_ca[b >> 8][(b & 0xff) - b2Min] = UNMAPPABLE_DECODING;
810
}
811
}
812
}
813
814
if (b2cSB_ca != null) { // SingleByte
815
for (int b = 0; b < b2cSB_ca.length; b++) {
816
char c = b2cSB_ca[b];
817
if (c == UNMAPPABLE_DECODING)
818
continue;
819
int index = c2bIndex[c >> 8];
820
if (index == 0) {
821
index = off;
822
off += 0x100;
823
c2bIndex[c >> 8] = (char)index;
824
}
825
c2b[index + (c & 0xff)] = (char)b;
826
}
827
}
828
829
for (int b1 = 0; b1 < b2c.length; b1++) { // DoubleByte
830
char[] db = b2c_ca[b1];
831
if (db == null)
832
continue;
833
for (int b2 = b2Min; b2 <= b2Max; b2++) {
834
char c = db[b2 - b2Min];
835
if (c == UNMAPPABLE_DECODING)
836
continue;
837
int index = c2bIndex[c >> 8];
838
if (index == 0) {
839
index = off;
840
off += 0x100;
841
c2bIndex[c >> 8] = (char)index;
842
}
843
c2b[index + (c & 0xff)] = (char)((b1 << 8) | b2);
844
}
845
}
846
847
if (c2bNR != null) {
848
// add c->b only nr entries
849
for (int i = 0; i < c2bNR.length(); i += 2) {
850
char b = c2bNR.charAt(i);
851
char c = c2bNR.charAt(i + 1);
852
int index = (c >> 8);
853
if (c2bIndex[index] == 0) {
854
c2bIndex[index] = (char)off;
855
off += 0x100;
856
}
857
index = c2bIndex[index] + (c & 0xff);
858
c2b[index] = b;
859
}
860
}
861
}
862
}
863
864
public static class Encoder_DBCSONLY extends Encoder {
865
866
public Encoder_DBCSONLY(Charset cs, byte[] repl,
867
char[] c2b, char[] c2bIndex,
868
boolean isASCIICompatible) {
869
super(cs, 2.0f, 2.0f, repl, c2b, c2bIndex, isASCIICompatible);
870
}
871
872
public int encodeChar(char ch) {
873
int bb = super.encodeChar(ch);
874
if (bb <= MAX_SINGLEBYTE)
875
return UNMAPPABLE_ENCODING;
876
return bb;
877
}
878
}
879
880
public static class Encoder_EBCDIC extends Encoder {
881
static final int SBCS = 0;
882
static final int DBCS = 1;
883
static final byte SO = 0x0e;
884
static final byte SI = 0x0f;
885
886
protected int currentState = SBCS;
887
888
public Encoder_EBCDIC(Charset cs, char[] c2b, char[] c2bIndex,
889
boolean isASCIICompatible) {
890
super(cs, 4.0f, 5.0f, new byte[] {(byte)0x6f}, c2b, c2bIndex, isASCIICompatible);
891
}
892
893
protected void implReset() {
894
currentState = SBCS;
895
}
896
897
protected CoderResult implFlush(ByteBuffer out) {
898
if (currentState == DBCS) {
899
if (out.remaining() < 1)
900
return CoderResult.OVERFLOW;
901
out.put(SI);
902
}
903
implReset();
904
return CoderResult.UNDERFLOW;
905
}
906
907
protected CoderResult encodeArrayLoop(CharBuffer src, ByteBuffer dst) {
908
char[] sa = src.array();
909
int sp = src.arrayOffset() + src.position();
910
int sl = src.arrayOffset() + src.limit();
911
byte[] da = dst.array();
912
int dp = dst.arrayOffset() + dst.position();
913
int dl = dst.arrayOffset() + dst.limit();
914
915
try {
916
while (sp < sl) {
917
char c = sa[sp];
918
int bb = encodeChar(c);
919
if (bb == UNMAPPABLE_ENCODING) {
920
if (Character.isSurrogate(c)) {
921
if (sgp().parse(c, sa, sp, sl) < 0)
922
return sgp.error();
923
return sgp.unmappableResult();
924
}
925
return CoderResult.unmappableForLength(1);
926
}
927
if (bb > MAX_SINGLEBYTE) { // DoubleByte
928
if (currentState == SBCS) {
929
if (dl - dp < 1)
930
return CoderResult.OVERFLOW;
931
currentState = DBCS;
932
da[dp++] = SO;
933
}
934
if (dl - dp < 2)
935
return CoderResult.OVERFLOW;
936
da[dp++] = (byte)(bb >> 8);
937
da[dp++] = (byte)bb;
938
} else { // SingleByte
939
if (currentState == DBCS) {
940
if (dl - dp < 1)
941
return CoderResult.OVERFLOW;
942
currentState = SBCS;
943
da[dp++] = SI;
944
}
945
if (dl - dp < 1)
946
return CoderResult.OVERFLOW;
947
da[dp++] = (byte)bb;
948
949
}
950
sp++;
951
}
952
return CoderResult.UNDERFLOW;
953
} finally {
954
src.position(sp - src.arrayOffset());
955
dst.position(dp - dst.arrayOffset());
956
}
957
}
958
959
protected CoderResult encodeBufferLoop(CharBuffer src, ByteBuffer dst) {
960
int mark = src.position();
961
try {
962
while (src.hasRemaining()) {
963
char c = src.get();
964
int bb = encodeChar(c);
965
if (bb == UNMAPPABLE_ENCODING) {
966
if (Character.isSurrogate(c)) {
967
if (sgp().parse(c, src) < 0)
968
return sgp.error();
969
return sgp.unmappableResult();
970
}
971
return CoderResult.unmappableForLength(1);
972
}
973
if (bb > MAX_SINGLEBYTE) { // DoubleByte
974
if (currentState == SBCS) {
975
if (dst.remaining() < 1)
976
return CoderResult.OVERFLOW;
977
currentState = DBCS;
978
dst.put(SO);
979
}
980
if (dst.remaining() < 2)
981
return CoderResult.OVERFLOW;
982
dst.put((byte)(bb >> 8));
983
dst.put((byte)(bb));
984
} else { // Single-byte
985
if (currentState == DBCS) {
986
if (dst.remaining() < 1)
987
return CoderResult.OVERFLOW;
988
currentState = SBCS;
989
dst.put(SI);
990
}
991
if (dst.remaining() < 1)
992
return CoderResult.OVERFLOW;
993
dst.put((byte)bb);
994
}
995
mark++;
996
}
997
return CoderResult.UNDERFLOW;
998
} finally {
999
src.position(mark);
1000
}
1001
}
1002
1003
@Override
1004
public int encode(char[] src, int sp, int len, byte[] dst) {
1005
int dp = 0;
1006
int sl = sp + len;
1007
while (sp < sl) {
1008
char c = src[sp++];
1009
int bb = encodeChar(c);
1010
1011
if (bb == UNMAPPABLE_ENCODING) {
1012
if (Character.isHighSurrogate(c) && sp < sl &&
1013
Character.isLowSurrogate(src[sp])) {
1014
sp++;
1015
}
1016
dst[dp++] = repl[0];
1017
if (repl.length > 1)
1018
dst[dp++] = repl[1];
1019
continue;
1020
} //else
1021
if (bb > MAX_SINGLEBYTE) { // DoubleByte
1022
if (currentState == SBCS) {
1023
currentState = DBCS;
1024
dst[dp++] = SO;
1025
}
1026
dst[dp++] = (byte)(bb >> 8);
1027
dst[dp++] = (byte)bb;
1028
} else { // SingleByte
1029
if (currentState == DBCS) {
1030
currentState = SBCS;
1031
dst[dp++] = SI;
1032
}
1033
dst[dp++] = (byte)bb;
1034
}
1035
}
1036
1037
if (currentState == DBCS) {
1038
currentState = SBCS;
1039
dst[dp++] = SI;
1040
}
1041
return dp;
1042
}
1043
1044
@Override
1045
public int encodeFromLatin1(byte[] src, int sp, int len, byte[] dst) {
1046
int dp = 0;
1047
int sl = sp + len;
1048
while (sp < sl) {
1049
char c = (char)(src[sp++] & 0xff);
1050
int bb = encodeChar(c);
1051
if (bb == UNMAPPABLE_ENCODING) {
1052
// no surrogate pair in latin1 string
1053
dst[dp++] = repl[0];
1054
if (repl.length > 1)
1055
dst[dp++] = repl[1];
1056
continue;
1057
} //else
1058
if (bb > MAX_SINGLEBYTE) { // DoubleByte
1059
if (currentState == SBCS) {
1060
currentState = DBCS;
1061
dst[dp++] = SO;
1062
}
1063
dst[dp++] = (byte)(bb >> 8);
1064
dst[dp++] = (byte)bb;
1065
} else { // SingleByte
1066
if (currentState == DBCS) {
1067
currentState = SBCS;
1068
dst[dp++] = SI;
1069
}
1070
dst[dp++] = (byte)bb;
1071
}
1072
}
1073
if (currentState == DBCS) {
1074
currentState = SBCS;
1075
dst[dp++] = SI;
1076
}
1077
return dp;
1078
}
1079
1080
@Override
1081
public int encodeFromUTF16(byte[] src, int sp, int len, byte[] dst) {
1082
int dp = 0;
1083
int sl = sp + len;
1084
while (sp < sl) {
1085
char c = StringUTF16.getChar(src, sp++);
1086
int bb = encodeChar(c);
1087
if (bb == UNMAPPABLE_ENCODING) {
1088
if (Character.isHighSurrogate(c) && sp < sl &&
1089
Character.isLowSurrogate(StringUTF16.getChar(src, sp))) {
1090
sp++;
1091
}
1092
dst[dp++] = repl[0];
1093
if (repl.length > 1)
1094
dst[dp++] = repl[1];
1095
continue;
1096
} //else
1097
if (bb > MAX_SINGLEBYTE) { // DoubleByte
1098
if (currentState == SBCS) {
1099
currentState = DBCS;
1100
dst[dp++] = SO;
1101
}
1102
dst[dp++] = (byte)(bb >> 8);
1103
dst[dp++] = (byte)bb;
1104
} else { // SingleByte
1105
if (currentState == DBCS) {
1106
currentState = SBCS;
1107
dst[dp++] = SI;
1108
}
1109
dst[dp++] = (byte)bb;
1110
}
1111
}
1112
if (currentState == DBCS) {
1113
currentState = SBCS;
1114
dst[dp++] = SI;
1115
}
1116
return dp;
1117
}
1118
}
1119
1120
// EUC_SIMPLE
1121
public static class Encoder_EUC_SIM extends Encoder {
1122
public Encoder_EUC_SIM(Charset cs, char[] c2b, char[] c2bIndex,
1123
boolean isASCIICompatible) {
1124
super(cs, c2b, c2bIndex, isASCIICompatible);
1125
}
1126
}
1127
1128
}
1129
1130