Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Avatar for KuCalc : devops.
Download
50663 views
1
// If we're in node require encoding-indexes and attach it to the global.
2
if (typeof module !== "undefined" && module.exports) {
3
this["encoding-indexes"] = require("./encoding-indexes.js")["encoding-indexes"];
4
}
5
6
(function(global) {
7
'use strict';
8
9
//
10
// Utilities
11
//
12
13
/**
14
* @param {number} a The number to test.
15
* @param {number} min The minimum value in the range, inclusive.
16
* @param {number} max The maximum value in the range, inclusive.
17
* @return {boolean} True if a >= min and a <= max.
18
*/
19
function inRange(a, min, max) {
20
return min <= a && a <= max;
21
}
22
23
/**
24
* @param {number} n The numerator.
25
* @param {number} d The denominator.
26
* @return {number} The result of the integer division of n by d.
27
*/
28
function div(n, d) {
29
return Math.floor(n / d);
30
}
31
32
33
//
34
// Implementation of Encoding specification
35
// http://dvcs.w3.org/hg/encoding/raw-file/tip/Overview.html
36
//
37
38
//
39
// 3. Terminology
40
//
41
42
//
43
// 4. Encodings
44
//
45
46
/** @const */ var EOF_byte = -1;
47
/** @const */ var EOF_code_point = -1;
48
49
/**
50
* @constructor
51
* @param {Uint8Array} bytes Array of bytes that provide the stream.
52
*/
53
function ByteInputStream(bytes) {
54
/** @type {number} */
55
var pos = 0;
56
57
/**
58
* @this {ByteInputStream}
59
* @return {number} Get the next byte from the stream.
60
*/
61
this.get = function() {
62
return (pos >= bytes.length) ? EOF_byte : Number(bytes[pos]);
63
};
64
65
/** @param {number} n Number (positive or negative) by which to
66
* offset the byte pointer. */
67
this.offset = function(n) {
68
pos += n;
69
if (pos < 0) {
70
throw new Error('Seeking past start of the buffer');
71
}
72
if (pos > bytes.length) {
73
throw new Error('Seeking past EOF');
74
}
75
};
76
77
/**
78
* @param {Array.<number>} test Array of bytes to compare against.
79
* @return {boolean} True if the start of the stream matches the test
80
* bytes.
81
*/
82
this.match = function(test) {
83
if (test.length > pos + bytes.length) {
84
return false;
85
}
86
var i;
87
for (i = 0; i < test.length; i += 1) {
88
if (Number(bytes[pos + i]) !== test[i]) {
89
return false;
90
}
91
}
92
return true;
93
};
94
}
95
96
/**
97
* @constructor
98
* @param {Array.<number>} bytes The array to write bytes into.
99
*/
100
function ByteOutputStream(bytes) {
101
/** @type {number} */
102
var pos = 0;
103
104
/**
105
* @param {...number} var_args The byte or bytes to emit into the stream.
106
* @return {number} The last byte emitted.
107
*/
108
this.emit = function(var_args) {
109
/** @type {number} */
110
var last = EOF_byte;
111
var i;
112
for (i = 0; i < arguments.length; ++i) {
113
last = Number(arguments[i]);
114
bytes[pos++] = last;
115
}
116
return last;
117
};
118
}
119
120
/**
121
* @constructor
122
* @param {string} string The source of code units for the stream.
123
*/
124
function CodePointInputStream(string) {
125
/**
126
* @param {string} string Input string of UTF-16 code units.
127
* @return {Array.<number>} Code points.
128
*/
129
function stringToCodePoints(string) {
130
/** @type {Array.<number>} */
131
var cps = [];
132
// Based on http://www.w3.org/TR/WebIDL/#idl-DOMString
133
var i = 0, n = string.length;
134
while (i < string.length) {
135
var c = string.charCodeAt(i);
136
if (!inRange(c, 0xD800, 0xDFFF)) {
137
cps.push(c);
138
} else if (inRange(c, 0xDC00, 0xDFFF)) {
139
cps.push(0xFFFD);
140
} else { // (inRange(cu, 0xD800, 0xDBFF))
141
if (i === n - 1) {
142
cps.push(0xFFFD);
143
} else {
144
var d = string.charCodeAt(i + 1);
145
if (inRange(d, 0xDC00, 0xDFFF)) {
146
var a = c & 0x3FF;
147
var b = d & 0x3FF;
148
i += 1;
149
cps.push(0x10000 + (a << 10) + b);
150
} else {
151
cps.push(0xFFFD);
152
}
153
}
154
}
155
i += 1;
156
}
157
return cps;
158
}
159
160
/** @type {number} */
161
var pos = 0;
162
/** @type {Array.<number>} */
163
var cps = stringToCodePoints(string);
164
165
/** @param {number} n The number of bytes (positive or negative)
166
* to advance the code point pointer by.*/
167
this.offset = function(n) {
168
pos += n;
169
if (pos < 0) {
170
throw new Error('Seeking past start of the buffer');
171
}
172
if (pos > cps.length) {
173
throw new Error('Seeking past EOF');
174
}
175
};
176
177
178
/** @return {number} Get the next code point from the stream. */
179
this.get = function() {
180
if (pos >= cps.length) {
181
return EOF_code_point;
182
}
183
return cps[pos];
184
};
185
}
186
187
/**
188
* @constructor
189
*/
190
function CodePointOutputStream() {
191
/** @type {string} */
192
var string = '';
193
194
/** @return {string} The accumulated string. */
195
this.string = function() {
196
return string;
197
};
198
199
/** @param {number} c The code point to encode into the stream. */
200
this.emit = function(c) {
201
if (c <= 0xFFFF) {
202
string += String.fromCharCode(c);
203
} else {
204
c -= 0x10000;
205
string += String.fromCharCode(0xD800 + ((c >> 10) & 0x3ff));
206
string += String.fromCharCode(0xDC00 + (c & 0x3ff));
207
}
208
};
209
}
210
211
/**
212
* @constructor
213
* @param {string} message Description of the error.
214
*/
215
function EncodingError(message) {
216
this.name = 'EncodingError';
217
this.message = message;
218
this.code = 0;
219
}
220
EncodingError.prototype = Error.prototype;
221
222
/**
223
* @param {boolean} fatal If true, decoding errors raise an exception.
224
* @param {number=} opt_code_point Override the standard fallback code point.
225
* @return {number} The code point to insert on a decoding error.
226
*/
227
function decoderError(fatal, opt_code_point) {
228
if (fatal) {
229
throw new EncodingError('Decoder error');
230
}
231
return opt_code_point || 0xFFFD;
232
}
233
234
/**
235
* @param {number} code_point The code point that could not be encoded.
236
* @return {number} Always throws, no value is actually returned.
237
*/
238
function encoderError(code_point) {
239
throw new EncodingError('The code point ' + code_point +
240
' could not be encoded.');
241
}
242
243
/**
244
* @param {string} label The encoding label.
245
* @return {?{name:string,labels:Array.<string>}}
246
*/
247
function getEncoding(label) {
248
label = String(label).trim().toLowerCase();
249
if (Object.prototype.hasOwnProperty.call(label_to_encoding, label)) {
250
return label_to_encoding[label];
251
}
252
return null;
253
}
254
255
/** @type {Array.<{encodings: Array.<{name:string,labels:Array.<string>}>,
256
* heading: string}>} */
257
var encodings = [
258
{
259
"encodings": [
260
{
261
"labels": [
262
"unicode-1-1-utf-8",
263
"utf-8",
264
"utf8"
265
],
266
"name": "utf-8"
267
}
268
],
269
"heading": "The Encoding"
270
},
271
{
272
"encodings": [
273
{
274
"labels": [
275
"866",
276
"cp866",
277
"csibm866",
278
"ibm866"
279
],
280
"name": "ibm866"
281
},
282
{
283
"labels": [
284
"csisolatin2",
285
"iso-8859-2",
286
"iso-ir-101",
287
"iso8859-2",
288
"iso88592",
289
"iso_8859-2",
290
"iso_8859-2:1987",
291
"l2",
292
"latin2"
293
],
294
"name": "iso-8859-2"
295
},
296
{
297
"labels": [
298
"csisolatin3",
299
"iso-8859-3",
300
"iso-ir-109",
301
"iso8859-3",
302
"iso88593",
303
"iso_8859-3",
304
"iso_8859-3:1988",
305
"l3",
306
"latin3"
307
],
308
"name": "iso-8859-3"
309
},
310
{
311
"labels": [
312
"csisolatin4",
313
"iso-8859-4",
314
"iso-ir-110",
315
"iso8859-4",
316
"iso88594",
317
"iso_8859-4",
318
"iso_8859-4:1988",
319
"l4",
320
"latin4"
321
],
322
"name": "iso-8859-4"
323
},
324
{
325
"labels": [
326
"csisolatincyrillic",
327
"cyrillic",
328
"iso-8859-5",
329
"iso-ir-144",
330
"iso8859-5",
331
"iso88595",
332
"iso_8859-5",
333
"iso_8859-5:1988"
334
],
335
"name": "iso-8859-5"
336
},
337
{
338
"labels": [
339
"arabic",
340
"asmo-708",
341
"csiso88596e",
342
"csiso88596i",
343
"csisolatinarabic",
344
"ecma-114",
345
"iso-8859-6",
346
"iso-8859-6-e",
347
"iso-8859-6-i",
348
"iso-ir-127",
349
"iso8859-6",
350
"iso88596",
351
"iso_8859-6",
352
"iso_8859-6:1987"
353
],
354
"name": "iso-8859-6"
355
},
356
{
357
"labels": [
358
"csisolatingreek",
359
"ecma-118",
360
"elot_928",
361
"greek",
362
"greek8",
363
"iso-8859-7",
364
"iso-ir-126",
365
"iso8859-7",
366
"iso88597",
367
"iso_8859-7",
368
"iso_8859-7:1987",
369
"sun_eu_greek"
370
],
371
"name": "iso-8859-7"
372
},
373
{
374
"labels": [
375
"csiso88598e",
376
"csisolatinhebrew",
377
"hebrew",
378
"iso-8859-8",
379
"iso-8859-8-e",
380
"iso-ir-138",
381
"iso8859-8",
382
"iso88598",
383
"iso_8859-8",
384
"iso_8859-8:1988",
385
"visual"
386
],
387
"name": "iso-8859-8"
388
},
389
{
390
"labels": [
391
"csiso88598i",
392
"iso-8859-8-i",
393
"logical"
394
],
395
"name": "iso-8859-8-i"
396
},
397
{
398
"labels": [
399
"csisolatin6",
400
"iso-8859-10",
401
"iso-ir-157",
402
"iso8859-10",
403
"iso885910",
404
"l6",
405
"latin6"
406
],
407
"name": "iso-8859-10"
408
},
409
{
410
"labels": [
411
"iso-8859-13",
412
"iso8859-13",
413
"iso885913"
414
],
415
"name": "iso-8859-13"
416
},
417
{
418
"labels": [
419
"iso-8859-14",
420
"iso8859-14",
421
"iso885914"
422
],
423
"name": "iso-8859-14"
424
},
425
{
426
"labels": [
427
"csisolatin9",
428
"iso-8859-15",
429
"iso8859-15",
430
"iso885915",
431
"iso_8859-15",
432
"l9"
433
],
434
"name": "iso-8859-15"
435
},
436
{
437
"labels": [
438
"iso-8859-16"
439
],
440
"name": "iso-8859-16"
441
},
442
{
443
"labels": [
444
"cskoi8r",
445
"koi",
446
"koi8",
447
"koi8-r",
448
"koi8_r"
449
],
450
"name": "koi8-r"
451
},
452
{
453
"labels": [
454
"koi8-u"
455
],
456
"name": "koi8-u"
457
},
458
{
459
"labels": [
460
"csmacintosh",
461
"mac",
462
"macintosh",
463
"x-mac-roman"
464
],
465
"name": "macintosh"
466
},
467
{
468
"labels": [
469
"dos-874",
470
"iso-8859-11",
471
"iso8859-11",
472
"iso885911",
473
"tis-620",
474
"windows-874"
475
],
476
"name": "windows-874"
477
},
478
{
479
"labels": [
480
"cp1250",
481
"windows-1250",
482
"x-cp1250"
483
],
484
"name": "windows-1250"
485
},
486
{
487
"labels": [
488
"cp1251",
489
"windows-1251",
490
"x-cp1251"
491
],
492
"name": "windows-1251"
493
},
494
{
495
"labels": [
496
"ansi_x3.4-1968",
497
"ascii",
498
"cp1252",
499
"cp819",
500
"csisolatin1",
501
"ibm819",
502
"iso-8859-1",
503
"iso-ir-100",
504
"iso8859-1",
505
"iso88591",
506
"iso_8859-1",
507
"iso_8859-1:1987",
508
"l1",
509
"latin1",
510
"us-ascii",
511
"windows-1252",
512
"x-cp1252"
513
],
514
"name": "windows-1252"
515
},
516
{
517
"labels": [
518
"cp1253",
519
"windows-1253",
520
"x-cp1253"
521
],
522
"name": "windows-1253"
523
},
524
{
525
"labels": [
526
"cp1254",
527
"csisolatin5",
528
"iso-8859-9",
529
"iso-ir-148",
530
"iso8859-9",
531
"iso88599",
532
"iso_8859-9",
533
"iso_8859-9:1989",
534
"l5",
535
"latin5",
536
"windows-1254",
537
"x-cp1254"
538
],
539
"name": "windows-1254"
540
},
541
{
542
"labels": [
543
"cp1255",
544
"windows-1255",
545
"x-cp1255"
546
],
547
"name": "windows-1255"
548
},
549
{
550
"labels": [
551
"cp1256",
552
"windows-1256",
553
"x-cp1256"
554
],
555
"name": "windows-1256"
556
},
557
{
558
"labels": [
559
"cp1257",
560
"windows-1257",
561
"x-cp1257"
562
],
563
"name": "windows-1257"
564
},
565
{
566
"labels": [
567
"cp1258",
568
"windows-1258",
569
"x-cp1258"
570
],
571
"name": "windows-1258"
572
},
573
{
574
"labels": [
575
"x-mac-cyrillic",
576
"x-mac-ukrainian"
577
],
578
"name": "x-mac-cyrillic"
579
}
580
],
581
"heading": "Legacy single-byte encodings"
582
},
583
{
584
"encodings": [
585
{
586
"labels": [
587
"chinese",
588
"csgb2312",
589
"csiso58gb231280",
590
"gb18030",
591
"gb2312",
592
"gb_2312",
593
"gb_2312-80",
594
"gbk",
595
"iso-ir-58",
596
"x-gbk"
597
],
598
"name": "gb18030"
599
},
600
{
601
"labels": [
602
"hz-gb-2312"
603
],
604
"name": "hz-gb-2312"
605
}
606
],
607
"heading": "Legacy multi-byte Chinese (simplified) encodings"
608
},
609
{
610
"encodings": [
611
{
612
"labels": [
613
"big5",
614
"big5-hkscs",
615
"cn-big5",
616
"csbig5",
617
"x-x-big5"
618
],
619
"name": "big5"
620
}
621
],
622
"heading": "Legacy multi-byte Chinese (traditional) encodings"
623
},
624
{
625
"encodings": [
626
{
627
"labels": [
628
"cseucpkdfmtjapanese",
629
"euc-jp",
630
"x-euc-jp"
631
],
632
"name": "euc-jp"
633
},
634
{
635
"labels": [
636
"csiso2022jp",
637
"iso-2022-jp"
638
],
639
"name": "iso-2022-jp"
640
},
641
{
642
"labels": [
643
"csshiftjis",
644
"ms_kanji",
645
"shift-jis",
646
"shift_jis",
647
"sjis",
648
"windows-31j",
649
"x-sjis"
650
],
651
"name": "shift_jis"
652
}
653
],
654
"heading": "Legacy multi-byte Japanese encodings"
655
},
656
{
657
"encodings": [
658
{
659
"labels": [
660
"cseuckr",
661
"csksc56011987",
662
"euc-kr",
663
"iso-ir-149",
664
"korean",
665
"ks_c_5601-1987",
666
"ks_c_5601-1989",
667
"ksc5601",
668
"ksc_5601",
669
"windows-949"
670
],
671
"name": "euc-kr"
672
}
673
],
674
"heading": "Legacy multi-byte Korean encodings"
675
},
676
{
677
"encodings": [
678
{
679
"labels": [
680
"csiso2022kr",
681
"iso-2022-cn",
682
"iso-2022-cn-ext",
683
"iso-2022-kr"
684
],
685
"name": "replacement"
686
},
687
{
688
"labels": [
689
"utf-16be"
690
],
691
"name": "utf-16be"
692
},
693
{
694
"labels": [
695
"utf-16",
696
"utf-16le"
697
],
698
"name": "utf-16le"
699
},
700
{
701
"labels": [
702
"x-user-defined"
703
],
704
"name": "x-user-defined"
705
}
706
],
707
"heading": "Legacy miscellaneous encodings"
708
}
709
];
710
711
var name_to_encoding = {};
712
var label_to_encoding = {};
713
encodings.forEach(function(category) {
714
category['encodings'].forEach(function(encoding) {
715
name_to_encoding[encoding['name']] = encoding;
716
encoding['labels'].forEach(function(label) {
717
label_to_encoding[label] = encoding;
718
});
719
});
720
});
721
722
//
723
// 5. Indexes
724
//
725
726
/**
727
* @param {number} pointer The |pointer| to search for.
728
* @param {Array.<?number>|undefined} index The |index| to search within.
729
* @return {?number} The code point corresponding to |pointer| in |index|,
730
* or null if |code point| is not in |index|.
731
*/
732
function indexCodePointFor(pointer, index) {
733
if (!index) return null;
734
return index[pointer] || null;
735
}
736
737
/**
738
* @param {number} code_point The |code point| to search for.
739
* @param {Array.<?number>} index The |index| to search within.
740
* @return {?number} The first pointer corresponding to |code point| in
741
* |index|, or null if |code point| is not in |index|.
742
*/
743
function indexPointerFor(code_point, index) {
744
var pointer = index.indexOf(code_point);
745
return pointer === -1 ? null : pointer;
746
}
747
748
/**
749
* @param {string} name Name of the index.
750
* @return {(Array.<number>|Array.<Array.<number>>)}
751
* */
752
function index(name) {
753
if (!('encoding-indexes' in global))
754
throw new Error("Indexes missing. Did you forget to include encoding-indexes.js?");
755
return global['encoding-indexes'][name];
756
}
757
758
/**
759
* @param {number} pointer The |pointer| to search for in the gb18030 index.
760
* @return {?number} The code point corresponding to |pointer| in |index|,
761
* or null if |code point| is not in the gb18030 index.
762
*/
763
function indexGB18030CodePointFor(pointer) {
764
if ((pointer > 39419 && pointer < 189000) || (pointer > 1237575)) {
765
return null;
766
}
767
var /** @type {number} */ offset = 0,
768
/** @type {number} */ code_point_offset = 0,
769
/** @type {Array.<Array.<number>>} */ idx = index('gb18030');
770
var i;
771
for (i = 0; i < idx.length; ++i) {
772
var entry = idx[i];
773
if (entry[0] <= pointer) {
774
offset = entry[0];
775
code_point_offset = entry[1];
776
} else {
777
break;
778
}
779
}
780
return code_point_offset + pointer - offset;
781
}
782
783
/**
784
* @param {number} code_point The |code point| to locate in the gb18030 index.
785
* @return {number} The first pointer corresponding to |code point| in the
786
* gb18030 index.
787
*/
788
function indexGB18030PointerFor(code_point) {
789
var /** @type {number} */ offset = 0,
790
/** @type {number} */ pointer_offset = 0,
791
/** @type {Array.<Array.<number>>} */ idx = index('gb18030');
792
var i;
793
for (i = 0; i < idx.length; ++i) {
794
var entry = idx[i];
795
if (entry[1] <= code_point) {
796
offset = entry[1];
797
pointer_offset = entry[0];
798
} else {
799
break;
800
}
801
}
802
return pointer_offset + code_point - offset;
803
}
804
805
806
//
807
// 7. API
808
//
809
810
/** @const */ var DEFAULT_ENCODING = 'utf-8';
811
812
// 7.1 Interface TextDecoder
813
814
/**
815
* @constructor
816
* @param {string=} opt_encoding The label of the encoding;
817
* defaults to 'utf-8'.
818
* @param {{fatal: boolean}=} options
819
*/
820
function TextDecoder(opt_encoding, options) {
821
if (!(this instanceof TextDecoder)) {
822
return new TextDecoder(opt_encoding, options);
823
}
824
opt_encoding = opt_encoding ? String(opt_encoding) : DEFAULT_ENCODING;
825
options = Object(options);
826
/** @private */
827
this._encoding = getEncoding(opt_encoding);
828
if (this._encoding === null || this._encoding.name === 'replacement')
829
throw new TypeError('Unknown encoding: ' + opt_encoding);
830
831
if (!this._encoding.getDecoder)
832
throw new Error('Decoder not present. Did you forget to include encoding-indexes.js?');
833
834
/** @private @type {boolean} */
835
this._streaming = false;
836
/** @private @type {boolean} */
837
this._BOMseen = false;
838
/** @private */
839
this._decoder = null;
840
/** @private @type {{fatal: boolean}=} */
841
this._options = { fatal: Boolean(options.fatal) };
842
843
if (Object.defineProperty) {
844
Object.defineProperty(
845
this, 'encoding',
846
{ get: function() { return this._encoding.name; } });
847
} else {
848
this.encoding = this._encoding.name;
849
}
850
851
return this;
852
}
853
854
// TODO: Issue if input byte stream is offset by decoder
855
// TODO: BOM detection will not work if stream header spans multiple calls
856
// (last N bytes of previous stream may need to be retained?)
857
TextDecoder.prototype = {
858
/**
859
* @param {ArrayBufferView=} opt_view The buffer of bytes to decode.
860
* @param {{stream: boolean}=} options
861
*/
862
decode: function decode(opt_view, options) {
863
if (opt_view && !('buffer' in opt_view && 'byteOffset' in opt_view &&
864
'byteLength' in opt_view)) {
865
throw new TypeError('Expected ArrayBufferView');
866
} else if (!opt_view) {
867
opt_view = new Uint8Array(0);
868
}
869
options = Object(options);
870
871
if (!this._streaming) {
872
this._decoder = this._encoding.getDecoder(this._options);
873
this._BOMseen = false;
874
}
875
this._streaming = Boolean(options.stream);
876
877
var bytes = new Uint8Array(opt_view.buffer,
878
opt_view.byteOffset,
879
opt_view.byteLength);
880
var input_stream = new ByteInputStream(bytes);
881
882
var output_stream = new CodePointOutputStream();
883
884
/** @type {number} */
885
var code_point;
886
887
while (input_stream.get() !== EOF_byte) {
888
code_point = this._decoder.decode(input_stream);
889
if (code_point !== null && code_point !== EOF_code_point) {
890
output_stream.emit(code_point);
891
}
892
}
893
if (!this._streaming) {
894
do {
895
code_point = this._decoder.decode(input_stream);
896
if (code_point !== null && code_point !== EOF_code_point) {
897
output_stream.emit(code_point);
898
}
899
} while (code_point !== EOF_code_point &&
900
input_stream.get() != EOF_byte);
901
this._decoder = null;
902
}
903
904
var result = output_stream.string();
905
if (!this._BOMseen && result.length) {
906
this._BOMseen = true;
907
if (['utf-8', 'utf-16le', 'utf-16be'].indexOf(this.encoding) !== -1 &&
908
result.charCodeAt(0) === 0xFEFF) {
909
result = result.substring(1);
910
}
911
}
912
913
return result;
914
}
915
};
916
917
// 7.2 Interface TextEncoder
918
919
/**
920
* @constructor
921
* @param {string=} opt_encoding The label of the encoding;
922
* defaults to 'utf-8'.
923
* @param {{fatal: boolean}=} options
924
*/
925
function TextEncoder(opt_encoding, options) {
926
if (!(this instanceof TextEncoder)) {
927
return new TextEncoder(opt_encoding, options);
928
}
929
opt_encoding = opt_encoding ? String(opt_encoding) : DEFAULT_ENCODING;
930
options = Object(options);
931
/** @private */
932
this._encoding = getEncoding(opt_encoding);
933
934
var allowLegacyEncoding = options.NONSTANDARD_allowLegacyEncoding;
935
var isLegacyEncoding = (this._encoding.name !== 'utf-8' &&
936
this._encoding.name !== 'utf-16le' &&
937
this._encoding.name !== 'utf-16be');
938
if (this._encoding === null || (isLegacyEncoding && !allowLegacyEncoding))
939
throw new TypeError('Unknown encoding: ' + opt_encoding);
940
941
if (!this._encoding.getEncoder)
942
throw new Error('Encoder not present. Did you forget to include encoding-indexes.js?');
943
944
/** @private @type {boolean} */
945
this._streaming = false;
946
/** @private */
947
this._encoder = null;
948
/** @private @type {{fatal: boolean}=} */
949
this._options = { fatal: Boolean(options.fatal) };
950
951
if (Object.defineProperty) {
952
Object.defineProperty(
953
this, 'encoding',
954
{ get: function() { return this._encoding.name; } });
955
} else {
956
this.encoding = this._encoding.name;
957
}
958
959
return this;
960
}
961
962
TextEncoder.prototype = {
963
/**
964
* @param {string=} opt_string The string to encode.
965
* @param {{stream: boolean}=} options
966
*/
967
encode: function encode(opt_string, options) {
968
opt_string = opt_string ? String(opt_string) : '';
969
options = Object(options);
970
// TODO: any options?
971
if (!this._streaming) {
972
this._encoder = this._encoding.getEncoder(this._options);
973
}
974
this._streaming = Boolean(options.stream);
975
976
var bytes = [];
977
var output_stream = new ByteOutputStream(bytes);
978
var input_stream = new CodePointInputStream(opt_string);
979
while (input_stream.get() !== EOF_code_point) {
980
this._encoder.encode(output_stream, input_stream);
981
}
982
if (!this._streaming) {
983
/** @type {number} */
984
var last_byte;
985
do {
986
last_byte = this._encoder.encode(output_stream, input_stream);
987
} while (last_byte !== EOF_byte);
988
this._encoder = null;
989
}
990
return new Uint8Array(bytes);
991
}
992
};
993
994
995
//
996
// 8. The encoding
997
//
998
999
// 8.1 utf-8
1000
1001
/**
1002
* @constructor
1003
* @param {{fatal: boolean}} options
1004
*/
1005
function UTF8Decoder(options) {
1006
var fatal = options.fatal;
1007
var /** @type {number} */ utf8_code_point = 0,
1008
/** @type {number} */ utf8_bytes_needed = 0,
1009
/** @type {number} */ utf8_bytes_seen = 0,
1010
/** @type {number} */ utf8_lower_boundary = 0;
1011
1012
/**
1013
* @param {ByteInputStream} byte_pointer The byte stream to decode.
1014
* @return {?number} The next code point decoded, or null if not enough
1015
* data exists in the input stream to decode a complete code point.
1016
*/
1017
this.decode = function(byte_pointer) {
1018
var bite = byte_pointer.get();
1019
if (bite === EOF_byte) {
1020
if (utf8_bytes_needed !== 0) {
1021
return decoderError(fatal);
1022
}
1023
return EOF_code_point;
1024
}
1025
byte_pointer.offset(1);
1026
1027
if (utf8_bytes_needed === 0) {
1028
if (inRange(bite, 0x00, 0x7F)) {
1029
return bite;
1030
}
1031
if (inRange(bite, 0xC2, 0xDF)) {
1032
utf8_bytes_needed = 1;
1033
utf8_lower_boundary = 0x80;
1034
utf8_code_point = bite - 0xC0;
1035
} else if (inRange(bite, 0xE0, 0xEF)) {
1036
utf8_bytes_needed = 2;
1037
utf8_lower_boundary = 0x800;
1038
utf8_code_point = bite - 0xE0;
1039
} else if (inRange(bite, 0xF0, 0xF4)) {
1040
utf8_bytes_needed = 3;
1041
utf8_lower_boundary = 0x10000;
1042
utf8_code_point = bite - 0xF0;
1043
} else {
1044
return decoderError(fatal);
1045
}
1046
utf8_code_point = utf8_code_point * Math.pow(64, utf8_bytes_needed);
1047
return null;
1048
}
1049
if (!inRange(bite, 0x80, 0xBF)) {
1050
utf8_code_point = 0;
1051
utf8_bytes_needed = 0;
1052
utf8_bytes_seen = 0;
1053
utf8_lower_boundary = 0;
1054
byte_pointer.offset(-1);
1055
return decoderError(fatal);
1056
}
1057
utf8_bytes_seen += 1;
1058
utf8_code_point = utf8_code_point + (bite - 0x80) *
1059
Math.pow(64, utf8_bytes_needed - utf8_bytes_seen);
1060
if (utf8_bytes_seen !== utf8_bytes_needed) {
1061
return null;
1062
}
1063
var code_point = utf8_code_point;
1064
var lower_boundary = utf8_lower_boundary;
1065
utf8_code_point = 0;
1066
utf8_bytes_needed = 0;
1067
utf8_bytes_seen = 0;
1068
utf8_lower_boundary = 0;
1069
if (inRange(code_point, lower_boundary, 0x10FFFF) &&
1070
!inRange(code_point, 0xD800, 0xDFFF)) {
1071
return code_point;
1072
}
1073
return decoderError(fatal);
1074
};
1075
}
1076
1077
/**
1078
* @constructor
1079
* @param {{fatal: boolean}} options
1080
*/
1081
function UTF8Encoder(options) {
1082
var fatal = options.fatal;
1083
/**
1084
* @param {ByteOutputStream} output_byte_stream Output byte stream.
1085
* @param {CodePointInputStream} code_point_pointer Input stream.
1086
* @return {number} The last byte emitted.
1087
*/
1088
this.encode = function(output_byte_stream, code_point_pointer) {
1089
/** @type {number} */
1090
var code_point = code_point_pointer.get();
1091
if (code_point === EOF_code_point) {
1092
return EOF_byte;
1093
}
1094
code_point_pointer.offset(1);
1095
if (inRange(code_point, 0xD800, 0xDFFF)) {
1096
return encoderError(code_point);
1097
}
1098
if (inRange(code_point, 0x0000, 0x007f)) {
1099
return output_byte_stream.emit(code_point);
1100
}
1101
var count, offset;
1102
if (inRange(code_point, 0x0080, 0x07FF)) {
1103
count = 1;
1104
offset = 0xC0;
1105
} else if (inRange(code_point, 0x0800, 0xFFFF)) {
1106
count = 2;
1107
offset = 0xE0;
1108
} else if (inRange(code_point, 0x10000, 0x10FFFF)) {
1109
count = 3;
1110
offset = 0xF0;
1111
}
1112
var result = output_byte_stream.emit(
1113
div(code_point, Math.pow(64, count)) + offset);
1114
while (count > 0) {
1115
var temp = div(code_point, Math.pow(64, count - 1));
1116
result = output_byte_stream.emit(0x80 + (temp % 64));
1117
count -= 1;
1118
}
1119
return result;
1120
};
1121
}
1122
1123
/** @param {{fatal: boolean}} options */
1124
name_to_encoding['utf-8'].getEncoder = function(options) {
1125
return new UTF8Encoder(options);
1126
};
1127
/** @param {{fatal: boolean}} options */
1128
name_to_encoding['utf-8'].getDecoder = function(options) {
1129
return new UTF8Decoder(options);
1130
};
1131
1132
//
1133
// 9. Legacy single-byte encodings
1134
//
1135
1136
/**
1137
* @constructor
1138
* @param {Array.<number>} index The encoding index.
1139
* @param {{fatal: boolean}} options
1140
*/
1141
function SingleByteDecoder(index, options) {
1142
var fatal = options.fatal;
1143
/**
1144
* @param {ByteInputStream} byte_pointer The byte stream to decode.
1145
* @return {?number} The next code point decoded, or null if not enough
1146
* data exists in the input stream to decode a complete code point.
1147
*/
1148
this.decode = function(byte_pointer) {
1149
var bite = byte_pointer.get();
1150
if (bite === EOF_byte) {
1151
return EOF_code_point;
1152
}
1153
byte_pointer.offset(1);
1154
if (inRange(bite, 0x00, 0x7F)) {
1155
return bite;
1156
}
1157
var code_point = index[bite - 0x80];
1158
if (code_point === null) {
1159
return decoderError(fatal);
1160
}
1161
return code_point;
1162
};
1163
}
1164
1165
/**
1166
* @constructor
1167
* @param {Array.<?number>} index The encoding index.
1168
* @param {{fatal: boolean}} options
1169
*/
1170
function SingleByteEncoder(index, options) {
1171
var fatal = options.fatal;
1172
/**
1173
* @param {ByteOutputStream} output_byte_stream Output byte stream.
1174
* @param {CodePointInputStream} code_point_pointer Input stream.
1175
* @return {number} The last byte emitted.
1176
*/
1177
this.encode = function(output_byte_stream, code_point_pointer) {
1178
var code_point = code_point_pointer.get();
1179
if (code_point === EOF_code_point) {
1180
return EOF_byte;
1181
}
1182
code_point_pointer.offset(1);
1183
if (inRange(code_point, 0x0000, 0x007F)) {
1184
return output_byte_stream.emit(code_point);
1185
}
1186
var pointer = indexPointerFor(code_point, index);
1187
if (pointer === null) {
1188
encoderError(code_point);
1189
}
1190
return output_byte_stream.emit(pointer + 0x80);
1191
};
1192
}
1193
1194
(function() {
1195
if (!('encoding-indexes' in global))
1196
return;
1197
encodings.forEach(function(category) {
1198
if (category['heading'] !== 'Legacy single-byte encodings')
1199
return;
1200
category['encodings'].forEach(function(encoding) {
1201
var idx = index(encoding['name']);
1202
/** @param {{fatal: boolean}} options */
1203
encoding.getDecoder = function(options) {
1204
return new SingleByteDecoder(idx, options);
1205
};
1206
/** @param {{fatal: boolean}} options */
1207
encoding.getEncoder = function(options) {
1208
return new SingleByteEncoder(idx, options);
1209
};
1210
});
1211
});
1212
}());
1213
1214
//
1215
// 10. Legacy multi-byte Chinese (simplified) encodings
1216
//
1217
1218
// 9.1 gb18030
1219
1220
/**
1221
* @constructor
1222
* @param {{fatal: boolean}} options
1223
*/
1224
function GB18030Decoder(options) {
1225
var fatal = options.fatal;
1226
var /** @type {number} */ gb18030_first = 0x00,
1227
/** @type {number} */ gb18030_second = 0x00,
1228
/** @type {number} */ gb18030_third = 0x00;
1229
/**
1230
* @param {ByteInputStream} byte_pointer The byte stream to decode.
1231
* @return {?number} The next code point decoded, or null if not enough
1232
* data exists in the input stream to decode a complete code point.
1233
*/
1234
this.decode = function(byte_pointer) {
1235
var bite = byte_pointer.get();
1236
if (bite === EOF_byte && gb18030_first === 0x00 &&
1237
gb18030_second === 0x00 && gb18030_third === 0x00) {
1238
return EOF_code_point;
1239
}
1240
if (bite === EOF_byte &&
1241
(gb18030_first !== 0x00 || gb18030_second !== 0x00 || gb18030_third !== 0x00)) {
1242
gb18030_first = 0x00;
1243
gb18030_second = 0x00;
1244
gb18030_third = 0x00;
1245
decoderError(fatal);
1246
}
1247
byte_pointer.offset(1);
1248
var code_point;
1249
if (gb18030_third !== 0x00) {
1250
code_point = null;
1251
if (inRange(bite, 0x30, 0x39)) {
1252
code_point = indexGB18030CodePointFor(
1253
(((gb18030_first - 0x81) * 10 + (gb18030_second - 0x30)) * 126 +
1254
(gb18030_third - 0x81)) * 10 + bite - 0x30);
1255
}
1256
gb18030_first = 0x00;
1257
gb18030_second = 0x00;
1258
gb18030_third = 0x00;
1259
if (code_point === null) {
1260
byte_pointer.offset(-3);
1261
return decoderError(fatal);
1262
}
1263
return code_point;
1264
}
1265
if (gb18030_second !== 0x00) {
1266
if (inRange(bite, 0x81, 0xFE)) {
1267
gb18030_third = bite;
1268
return null;
1269
}
1270
byte_pointer.offset(-2);
1271
gb18030_first = 0x00;
1272
gb18030_second = 0x00;
1273
return decoderError(fatal);
1274
}
1275
if (gb18030_first !== 0x00) {
1276
if (inRange(bite, 0x30, 0x39)) {
1277
gb18030_second = bite;
1278
return null;
1279
}
1280
var lead = gb18030_first;
1281
var pointer = null;
1282
gb18030_first = 0x00;
1283
var offset = bite < 0x7F ? 0x40 : 0x41;
1284
if (inRange(bite, 0x40, 0x7E) || inRange(bite, 0x80, 0xFE)) {
1285
pointer = (lead - 0x81) * 190 + (bite - offset);
1286
}
1287
code_point = pointer === null ? null :
1288
indexCodePointFor(pointer, index('gb18030'));
1289
if (pointer === null) {
1290
byte_pointer.offset(-1);
1291
}
1292
if (code_point === null) {
1293
return decoderError(fatal);
1294
}
1295
return code_point;
1296
}
1297
if (inRange(bite, 0x00, 0x7F)) {
1298
return bite;
1299
}
1300
if (bite === 0x80) {
1301
return 0x20AC;
1302
}
1303
if (inRange(bite, 0x81, 0xFE)) {
1304
gb18030_first = bite;
1305
return null;
1306
}
1307
return decoderError(fatal);
1308
};
1309
}
1310
1311
/**
1312
* @constructor
1313
* @param {{fatal: boolean}} options
1314
*/
1315
function GB18030Encoder(options) {
1316
var fatal = options.fatal;
1317
/**
1318
* @param {ByteOutputStream} output_byte_stream Output byte stream.
1319
* @param {CodePointInputStream} code_point_pointer Input stream.
1320
* @return {number} The last byte emitted.
1321
*/
1322
this.encode = function(output_byte_stream, code_point_pointer) {
1323
var code_point = code_point_pointer.get();
1324
if (code_point === EOF_code_point) {
1325
return EOF_byte;
1326
}
1327
code_point_pointer.offset(1);
1328
if (inRange(code_point, 0x0000, 0x007F)) {
1329
return output_byte_stream.emit(code_point);
1330
}
1331
var pointer = indexPointerFor(code_point, index('gb18030'));
1332
if (pointer !== null) {
1333
var lead = div(pointer, 190) + 0x81;
1334
var trail = pointer % 190;
1335
var offset = trail < 0x3F ? 0x40 : 0x41;
1336
return output_byte_stream.emit(lead, trail + offset);
1337
}
1338
pointer = indexGB18030PointerFor(code_point);
1339
var byte1 = div(div(div(pointer, 10), 126), 10);
1340
pointer = pointer - byte1 * 10 * 126 * 10;
1341
var byte2 = div(div(pointer, 10), 126);
1342
pointer = pointer - byte2 * 10 * 126;
1343
var byte3 = div(pointer, 10);
1344
var byte4 = pointer - byte3 * 10;
1345
return output_byte_stream.emit(byte1 + 0x81,
1346
byte2 + 0x30,
1347
byte3 + 0x81,
1348
byte4 + 0x30);
1349
};
1350
}
1351
1352
/** @param {{fatal: boolean}} options */
1353
name_to_encoding['gb18030'].getEncoder = function(options) {
1354
return new GB18030Encoder(options);
1355
};
1356
/** @param {{fatal: boolean}} options */
1357
name_to_encoding['gb18030'].getDecoder = function(options) {
1358
return new GB18030Decoder(options);
1359
};
1360
1361
// 10.2 hz-gb-2312
1362
1363
/**
1364
* @constructor
1365
* @param {{fatal: boolean}} options
1366
*/
1367
function HZGB2312Decoder(options) {
1368
var fatal = options.fatal;
1369
var /** @type {boolean} */ hzgb2312 = false,
1370
/** @type {number} */ hzgb2312_lead = 0x00;
1371
/**
1372
* @param {ByteInputStream} byte_pointer The byte stream to decode.
1373
* @return {?number} The next code point decoded, or null if not enough
1374
* data exists in the input stream to decode a complete code point.
1375
*/
1376
this.decode = function(byte_pointer) {
1377
var bite = byte_pointer.get();
1378
if (bite === EOF_byte && hzgb2312_lead === 0x00) {
1379
return EOF_code_point;
1380
}
1381
if (bite === EOF_byte && hzgb2312_lead !== 0x00) {
1382
hzgb2312_lead = 0x00;
1383
return decoderError(fatal);
1384
}
1385
byte_pointer.offset(1);
1386
if (hzgb2312_lead === 0x7E) {
1387
hzgb2312_lead = 0x00;
1388
if (bite === 0x7B) {
1389
hzgb2312 = true;
1390
return null;
1391
}
1392
if (bite === 0x7D) {
1393
hzgb2312 = false;
1394
return null;
1395
}
1396
if (bite === 0x7E) {
1397
return 0x007E;
1398
}
1399
if (bite === 0x0A) {
1400
return null;
1401
}
1402
byte_pointer.offset(-1);
1403
return decoderError(fatal);
1404
}
1405
if (hzgb2312_lead !== 0x00) {
1406
var lead = hzgb2312_lead;
1407
hzgb2312_lead = 0x00;
1408
var code_point = null;
1409
if (inRange(bite, 0x21, 0x7E)) {
1410
code_point = indexCodePointFor((lead - 1) * 190 +
1411
(bite + 0x3F), index('gb18030'));
1412
}
1413
if (bite === 0x0A) {
1414
hzgb2312 = false;
1415
}
1416
if (code_point === null) {
1417
return decoderError(fatal);
1418
}
1419
return code_point;
1420
}
1421
if (bite === 0x7E) {
1422
hzgb2312_lead = 0x7E;
1423
return null;
1424
}
1425
if (hzgb2312) {
1426
if (inRange(bite, 0x20, 0x7F)) {
1427
hzgb2312_lead = bite;
1428
return null;
1429
}
1430
if (bite === 0x0A) {
1431
hzgb2312 = false;
1432
}
1433
return decoderError(fatal);
1434
}
1435
if (inRange(bite, 0x00, 0x7F)) {
1436
return bite;
1437
}
1438
return decoderError(fatal);
1439
};
1440
}
1441
1442
/**
1443
* @constructor
1444
* @param {{fatal: boolean}} options
1445
*/
1446
function HZGB2312Encoder(options) {
1447
var fatal = options.fatal;
1448
/** @type {boolean} */
1449
var hzgb2312 = false;
1450
/**
1451
* @param {ByteOutputStream} output_byte_stream Output byte stream.
1452
* @param {CodePointInputStream} code_point_pointer Input stream.
1453
* @return {number} The last byte emitted.
1454
*/
1455
this.encode = function(output_byte_stream, code_point_pointer) {
1456
var code_point = code_point_pointer.get();
1457
if (code_point === EOF_code_point) {
1458
return EOF_byte;
1459
}
1460
code_point_pointer.offset(1);
1461
if (inRange(code_point, 0x0000, 0x007F) && hzgb2312) {
1462
code_point_pointer.offset(-1);
1463
hzgb2312 = false;
1464
return output_byte_stream.emit(0x7E, 0x7D);
1465
}
1466
if (code_point === 0x007E) {
1467
return output_byte_stream.emit(0x7E, 0x7E);
1468
}
1469
if (inRange(code_point, 0x0000, 0x007F)) {
1470
return output_byte_stream.emit(code_point);
1471
}
1472
if (!hzgb2312) {
1473
code_point_pointer.offset(-1);
1474
hzgb2312 = true;
1475
return output_byte_stream.emit(0x7E, 0x7B);
1476
}
1477
var pointer = indexPointerFor(code_point, index('gb18030'));
1478
if (pointer === null) {
1479
return encoderError(code_point);
1480
}
1481
var lead = div(pointer, 190) + 1;
1482
var trail = pointer % 190 - 0x3F;
1483
if (!inRange(lead, 0x21, 0x7E) || !inRange(trail, 0x21, 0x7E)) {
1484
return encoderError(code_point);
1485
}
1486
return output_byte_stream.emit(lead, trail);
1487
};
1488
}
1489
1490
/** @param {{fatal: boolean}} options */
1491
name_to_encoding['hz-gb-2312'].getEncoder = function(options) {
1492
return new HZGB2312Encoder(options);
1493
};
1494
/** @param {{fatal: boolean}} options */
1495
name_to_encoding['hz-gb-2312'].getDecoder = function(options) {
1496
return new HZGB2312Decoder(options);
1497
};
1498
1499
//
1500
// 11. Legacy multi-byte Chinese (traditional) encodings
1501
//
1502
1503
// 11.1 big5
1504
1505
/**
1506
* @constructor
1507
* @param {{fatal: boolean}} options
1508
*/
1509
function Big5Decoder(options) {
1510
var fatal = options.fatal;
1511
var /** @type {number} */ big5_lead = 0x00,
1512
/** @type {?number} */ big5_pending = null;
1513
1514
/**
1515
* @param {ByteInputStream} byte_pointer The byte steram to decode.
1516
* @return {?number} The next code point decoded, or null if not enough
1517
* data exists in the input stream to decode a complete code point.
1518
*/
1519
this.decode = function(byte_pointer) {
1520
// NOTE: Hack to support emitting two code points
1521
if (big5_pending !== null) {
1522
var pending = big5_pending;
1523
big5_pending = null;
1524
return pending;
1525
}
1526
var bite = byte_pointer.get();
1527
if (bite === EOF_byte && big5_lead === 0x00) {
1528
return EOF_code_point;
1529
}
1530
if (bite === EOF_byte && big5_lead !== 0x00) {
1531
big5_lead = 0x00;
1532
return decoderError(fatal);
1533
}
1534
byte_pointer.offset(1);
1535
if (big5_lead !== 0x00) {
1536
var lead = big5_lead;
1537
var pointer = null;
1538
big5_lead = 0x00;
1539
var offset = bite < 0x7F ? 0x40 : 0x62;
1540
if (inRange(bite, 0x40, 0x7E) || inRange(bite, 0xA1, 0xFE)) {
1541
pointer = (lead - 0x81) * 157 + (bite - offset);
1542
}
1543
if (pointer === 1133) {
1544
big5_pending = 0x0304;
1545
return 0x00CA;
1546
}
1547
if (pointer === 1135) {
1548
big5_pending = 0x030C;
1549
return 0x00CA;
1550
}
1551
if (pointer === 1164) {
1552
big5_pending = 0x0304;
1553
return 0x00EA;
1554
}
1555
if (pointer === 1166) {
1556
big5_pending = 0x030C;
1557
return 0x00EA;
1558
}
1559
var code_point = (pointer === null) ? null :
1560
indexCodePointFor(pointer, index('big5'));
1561
if (pointer === null) {
1562
byte_pointer.offset(-1);
1563
}
1564
if (code_point === null) {
1565
return decoderError(fatal);
1566
}
1567
return code_point;
1568
}
1569
if (inRange(bite, 0x00, 0x7F)) {
1570
return bite;
1571
}
1572
if (inRange(bite, 0x81, 0xFE)) {
1573
big5_lead = bite;
1574
return null;
1575
}
1576
return decoderError(fatal);
1577
};
1578
}
1579
1580
/**
1581
* @constructor
1582
* @param {{fatal: boolean}} options
1583
*/
1584
function Big5Encoder(options) {
1585
var fatal = options.fatal;
1586
/**
1587
* @param {ByteOutputStream} output_byte_stream Output byte stream.
1588
* @param {CodePointInputStream} code_point_pointer Input stream.
1589
* @return {number} The last byte emitted.
1590
*/
1591
this.encode = function(output_byte_stream, code_point_pointer) {
1592
var code_point = code_point_pointer.get();
1593
if (code_point === EOF_code_point) {
1594
return EOF_byte;
1595
}
1596
code_point_pointer.offset(1);
1597
if (inRange(code_point, 0x0000, 0x007F)) {
1598
return output_byte_stream.emit(code_point);
1599
}
1600
var pointer = indexPointerFor(code_point, index('big5'));
1601
if (pointer === null) {
1602
return encoderError(code_point);
1603
}
1604
var lead = div(pointer, 157) + 0x81;
1605
//if (lead < 0xA1) {
1606
// return encoderError(code_point);
1607
//}
1608
var trail = pointer % 157;
1609
var offset = trail < 0x3F ? 0x40 : 0x62;
1610
return output_byte_stream.emit(lead, trail + offset);
1611
};
1612
}
1613
1614
/** @param {{fatal: boolean}} options */
1615
name_to_encoding['big5'].getEncoder = function(options) {
1616
return new Big5Encoder(options);
1617
};
1618
/** @param {{fatal: boolean}} options */
1619
name_to_encoding['big5'].getDecoder = function(options) {
1620
return new Big5Decoder(options);
1621
};
1622
1623
1624
//
1625
// 12. Legacy multi-byte Japanese encodings
1626
//
1627
1628
// 12.1 euc.jp
1629
1630
/**
1631
* @constructor
1632
* @param {{fatal: boolean}} options
1633
*/
1634
function EUCJPDecoder(options) {
1635
var fatal = options.fatal;
1636
var /** @type {number} */ eucjp_first = 0x00,
1637
/** @type {number} */ eucjp_second = 0x00;
1638
/**
1639
* @param {ByteInputStream} byte_pointer The byte stream to decode.
1640
* @return {?number} The next code point decoded, or null if not enough
1641
* data exists in the input stream to decode a complete code point.
1642
*/
1643
this.decode = function(byte_pointer) {
1644
var bite = byte_pointer.get();
1645
if (bite === EOF_byte) {
1646
if (eucjp_first === 0x00 && eucjp_second === 0x00) {
1647
return EOF_code_point;
1648
}
1649
eucjp_first = 0x00;
1650
eucjp_second = 0x00;
1651
return decoderError(fatal);
1652
}
1653
byte_pointer.offset(1);
1654
1655
var lead, code_point;
1656
if (eucjp_second !== 0x00) {
1657
lead = eucjp_second;
1658
eucjp_second = 0x00;
1659
code_point = null;
1660
if (inRange(lead, 0xA1, 0xFE) && inRange(bite, 0xA1, 0xFE)) {
1661
code_point = indexCodePointFor((lead - 0xA1) * 94 + bite - 0xA1,
1662
index('jis0212'));
1663
}
1664
if (!inRange(bite, 0xA1, 0xFE)) {
1665
byte_pointer.offset(-1);
1666
}
1667
if (code_point === null) {
1668
return decoderError(fatal);
1669
}
1670
return code_point;
1671
}
1672
if (eucjp_first === 0x8E && inRange(bite, 0xA1, 0xDF)) {
1673
eucjp_first = 0x00;
1674
return 0xFF61 + bite - 0xA1;
1675
}
1676
if (eucjp_first === 0x8F && inRange(bite, 0xA1, 0xFE)) {
1677
eucjp_first = 0x00;
1678
eucjp_second = bite;
1679
return null;
1680
}
1681
if (eucjp_first !== 0x00) {
1682
lead = eucjp_first;
1683
eucjp_first = 0x00;
1684
code_point = null;
1685
if (inRange(lead, 0xA1, 0xFE) && inRange(bite, 0xA1, 0xFE)) {
1686
code_point = indexCodePointFor((lead - 0xA1) * 94 + bite - 0xA1,
1687
index('jis0208'));
1688
}
1689
if (!inRange(bite, 0xA1, 0xFE)) {
1690
byte_pointer.offset(-1);
1691
}
1692
if (code_point === null) {
1693
return decoderError(fatal);
1694
}
1695
return code_point;
1696
}
1697
if (inRange(bite, 0x00, 0x7F)) {
1698
return bite;
1699
}
1700
if (bite === 0x8E || bite === 0x8F || (inRange(bite, 0xA1, 0xFE))) {
1701
eucjp_first = bite;
1702
return null;
1703
}
1704
return decoderError(fatal);
1705
};
1706
}
1707
1708
/**
1709
* @constructor
1710
* @param {{fatal: boolean}} options
1711
*/
1712
function EUCJPEncoder(options) {
1713
var fatal = options.fatal;
1714
/**
1715
* @param {ByteOutputStream} output_byte_stream Output byte stream.
1716
* @param {CodePointInputStream} code_point_pointer Input stream.
1717
* @return {number} The last byte emitted.
1718
*/
1719
this.encode = function(output_byte_stream, code_point_pointer) {
1720
var code_point = code_point_pointer.get();
1721
if (code_point === EOF_code_point) {
1722
return EOF_byte;
1723
}
1724
code_point_pointer.offset(1);
1725
if (inRange(code_point, 0x0000, 0x007F)) {
1726
return output_byte_stream.emit(code_point);
1727
}
1728
if (code_point === 0x00A5) {
1729
return output_byte_stream.emit(0x5C);
1730
}
1731
if (code_point === 0x203E) {
1732
return output_byte_stream.emit(0x7E);
1733
}
1734
if (inRange(code_point, 0xFF61, 0xFF9F)) {
1735
return output_byte_stream.emit(0x8E, code_point - 0xFF61 + 0xA1);
1736
}
1737
1738
var pointer = indexPointerFor(code_point, index('jis0208'));
1739
if (pointer === null) {
1740
return encoderError(code_point);
1741
}
1742
var lead = div(pointer, 94) + 0xA1;
1743
var trail = pointer % 94 + 0xA1;
1744
return output_byte_stream.emit(lead, trail);
1745
};
1746
}
1747
1748
/** @param {{fatal: boolean}} options */
1749
name_to_encoding['euc-jp'].getEncoder = function(options) {
1750
return new EUCJPEncoder(options);
1751
};
1752
/** @param {{fatal: boolean}} options */
1753
name_to_encoding['euc-jp'].getDecoder = function(options) {
1754
return new EUCJPDecoder(options);
1755
};
1756
1757
// 12.2 iso-2022-jp
1758
1759
/**
1760
* @constructor
1761
* @param {{fatal: boolean}} options
1762
*/
1763
function ISO2022JPDecoder(options) {
1764
var fatal = options.fatal;
1765
/** @enum */
1766
var state = {
1767
ASCII: 0,
1768
escape_start: 1,
1769
escape_middle: 2,
1770
escape_final: 3,
1771
lead: 4,
1772
trail: 5,
1773
Katakana: 6
1774
};
1775
var /** @type {number} */ iso2022jp_state = state.ASCII,
1776
/** @type {boolean} */ iso2022jp_jis0212 = false,
1777
/** @type {number} */ iso2022jp_lead = 0x00;
1778
/**
1779
* @param {ByteInputStream} byte_pointer The byte stream to decode.
1780
* @return {?number} The next code point decoded, or null if not enough
1781
* data exists in the input stream to decode a complete code point.
1782
*/
1783
this.decode = function(byte_pointer) {
1784
var bite = byte_pointer.get();
1785
if (bite !== EOF_byte) {
1786
byte_pointer.offset(1);
1787
}
1788
switch (iso2022jp_state) {
1789
default:
1790
case state.ASCII:
1791
if (bite === 0x1B) {
1792
iso2022jp_state = state.escape_start;
1793
return null;
1794
}
1795
if (inRange(bite, 0x00, 0x7F)) {
1796
return bite;
1797
}
1798
if (bite === EOF_byte) {
1799
return EOF_code_point;
1800
}
1801
return decoderError(fatal);
1802
1803
case state.escape_start:
1804
if (bite === 0x24 || bite === 0x28) {
1805
iso2022jp_lead = bite;
1806
iso2022jp_state = state.escape_middle;
1807
return null;
1808
}
1809
if (bite !== EOF_byte) {
1810
byte_pointer.offset(-1);
1811
}
1812
iso2022jp_state = state.ASCII;
1813
return decoderError(fatal);
1814
1815
case state.escape_middle:
1816
var lead = iso2022jp_lead;
1817
iso2022jp_lead = 0x00;
1818
if (lead === 0x24 && (bite === 0x40 || bite === 0x42)) {
1819
iso2022jp_jis0212 = false;
1820
iso2022jp_state = state.lead;
1821
return null;
1822
}
1823
if (lead === 0x24 && bite === 0x28) {
1824
iso2022jp_state = state.escape_final;
1825
return null;
1826
}
1827
if (lead === 0x28 && (bite === 0x42 || bite === 0x4A)) {
1828
iso2022jp_state = state.ASCII;
1829
return null;
1830
}
1831
if (lead === 0x28 && bite === 0x49) {
1832
iso2022jp_state = state.Katakana;
1833
return null;
1834
}
1835
if (bite === EOF_byte) {
1836
byte_pointer.offset(-1);
1837
} else {
1838
byte_pointer.offset(-2);
1839
}
1840
iso2022jp_state = state.ASCII;
1841
return decoderError(fatal);
1842
1843
case state.escape_final:
1844
if (bite === 0x44) {
1845
iso2022jp_jis0212 = true;
1846
iso2022jp_state = state.lead;
1847
return null;
1848
}
1849
if (bite === EOF_byte) {
1850
byte_pointer.offset(-2);
1851
} else {
1852
byte_pointer.offset(-3);
1853
}
1854
iso2022jp_state = state.ASCII;
1855
return decoderError(fatal);
1856
1857
case state.lead:
1858
if (bite === 0x0A) {
1859
iso2022jp_state = state.ASCII;
1860
return decoderError(fatal, 0x000A);
1861
}
1862
if (bite === 0x1B) {
1863
iso2022jp_state = state.escape_start;
1864
return null;
1865
}
1866
if (bite === EOF_byte) {
1867
return EOF_code_point;
1868
}
1869
iso2022jp_lead = bite;
1870
iso2022jp_state = state.trail;
1871
return null;
1872
1873
case state.trail:
1874
iso2022jp_state = state.lead;
1875
if (bite === EOF_byte) {
1876
return decoderError(fatal);
1877
}
1878
var code_point = null;
1879
var pointer = (iso2022jp_lead - 0x21) * 94 + bite - 0x21;
1880
if (inRange(iso2022jp_lead, 0x21, 0x7E) &&
1881
inRange(bite, 0x21, 0x7E)) {
1882
code_point = (iso2022jp_jis0212 === false) ?
1883
indexCodePointFor(pointer, index('jis0208')) :
1884
indexCodePointFor(pointer, index('jis0212'));
1885
}
1886
if (code_point === null) {
1887
return decoderError(fatal);
1888
}
1889
return code_point;
1890
1891
case state.Katakana:
1892
if (bite === 0x1B) {
1893
iso2022jp_state = state.escape_start;
1894
return null;
1895
}
1896
if (inRange(bite, 0x21, 0x5F)) {
1897
return 0xFF61 + bite - 0x21;
1898
}
1899
if (bite === EOF_byte) {
1900
return EOF_code_point;
1901
}
1902
return decoderError(fatal);
1903
}
1904
};
1905
}
1906
1907
/**
1908
* @constructor
1909
* @param {{fatal: boolean}} options
1910
*/
1911
function ISO2022JPEncoder(options) {
1912
var fatal = options.fatal;
1913
/** @enum */
1914
var state = {
1915
ASCII: 0,
1916
lead: 1,
1917
Katakana: 2
1918
};
1919
var /** @type {number} */ iso2022jp_state = state.ASCII;
1920
/**
1921
* @param {ByteOutputStream} output_byte_stream Output byte stream.
1922
* @param {CodePointInputStream} code_point_pointer Input stream.
1923
* @return {number} The last byte emitted.
1924
*/
1925
this.encode = function(output_byte_stream, code_point_pointer) {
1926
var code_point = code_point_pointer.get();
1927
if (code_point === EOF_code_point) {
1928
return EOF_byte;
1929
}
1930
code_point_pointer.offset(1);
1931
if ((inRange(code_point, 0x0000, 0x007F) ||
1932
code_point === 0x00A5 || code_point === 0x203E) &&
1933
iso2022jp_state !== state.ASCII) {
1934
code_point_pointer.offset(-1);
1935
iso2022jp_state = state.ASCII;
1936
return output_byte_stream.emit(0x1B, 0x28, 0x42);
1937
}
1938
if (inRange(code_point, 0x0000, 0x007F)) {
1939
return output_byte_stream.emit(code_point);
1940
}
1941
if (code_point === 0x00A5) {
1942
return output_byte_stream.emit(0x5C);
1943
}
1944
if (code_point === 0x203E) {
1945
return output_byte_stream.emit(0x7E);
1946
}
1947
if (inRange(code_point, 0xFF61, 0xFF9F) &&
1948
iso2022jp_state !== state.Katakana) {
1949
code_point_pointer.offset(-1);
1950
iso2022jp_state = state.Katakana;
1951
return output_byte_stream.emit(0x1B, 0x28, 0x49);
1952
}
1953
if (inRange(code_point, 0xFF61, 0xFF9F)) {
1954
return output_byte_stream.emit(code_point - 0xFF61 - 0x21);
1955
}
1956
if (iso2022jp_state !== state.lead) {
1957
code_point_pointer.offset(-1);
1958
iso2022jp_state = state.lead;
1959
return output_byte_stream.emit(0x1B, 0x24, 0x42);
1960
}
1961
var pointer = indexPointerFor(code_point, index('jis0208'));
1962
if (pointer === null) {
1963
return encoderError(code_point);
1964
}
1965
var lead = div(pointer, 94) + 0x21;
1966
var trail = pointer % 94 + 0x21;
1967
return output_byte_stream.emit(lead, trail);
1968
};
1969
}
1970
1971
/** @param {{fatal: boolean}} options */
1972
name_to_encoding['iso-2022-jp'].getEncoder = function(options) {
1973
return new ISO2022JPEncoder(options);
1974
};
1975
/** @param {{fatal: boolean}} options */
1976
name_to_encoding['iso-2022-jp'].getDecoder = function(options) {
1977
return new ISO2022JPDecoder(options);
1978
};
1979
1980
// 12.3 shift_jis
1981
1982
/**
1983
* @constructor
1984
* @param {{fatal: boolean}} options
1985
*/
1986
function ShiftJISDecoder(options) {
1987
var fatal = options.fatal;
1988
var /** @type {number} */ shiftjis_lead = 0x00;
1989
/**
1990
* @param {ByteInputStream} byte_pointer The byte stream to decode.
1991
* @return {?number} The next code point decoded, or null if not enough
1992
* data exists in the input stream to decode a complete code point.
1993
*/
1994
this.decode = function(byte_pointer) {
1995
var bite = byte_pointer.get();
1996
if (bite === EOF_byte && shiftjis_lead === 0x00) {
1997
return EOF_code_point;
1998
}
1999
if (bite === EOF_byte && shiftjis_lead !== 0x00) {
2000
shiftjis_lead = 0x00;
2001
return decoderError(fatal);
2002
}
2003
byte_pointer.offset(1);
2004
if (shiftjis_lead !== 0x00) {
2005
var lead = shiftjis_lead;
2006
shiftjis_lead = 0x00;
2007
if (inRange(bite, 0x40, 0x7E) || inRange(bite, 0x80, 0xFC)) {
2008
var offset = (bite < 0x7F) ? 0x40 : 0x41;
2009
var lead_offset = (lead < 0xA0) ? 0x81 : 0xC1;
2010
var code_point = indexCodePointFor((lead - lead_offset) * 188 +
2011
bite - offset, index('jis0208'));
2012
if (code_point === null) {
2013
return decoderError(fatal);
2014
}
2015
return code_point;
2016
}
2017
byte_pointer.offset(-1);
2018
return decoderError(fatal);
2019
}
2020
if (inRange(bite, 0x00, 0x80)) {
2021
return bite;
2022
}
2023
if (inRange(bite, 0xA1, 0xDF)) {
2024
return 0xFF61 + bite - 0xA1;
2025
}
2026
if (inRange(bite, 0x81, 0x9F) || inRange(bite, 0xE0, 0xFC)) {
2027
shiftjis_lead = bite;
2028
return null;
2029
}
2030
return decoderError(fatal);
2031
};
2032
}
2033
2034
/**
2035
* @constructor
2036
* @param {{fatal: boolean}} options
2037
*/
2038
function ShiftJISEncoder(options) {
2039
var fatal = options.fatal;
2040
/**
2041
* @param {ByteOutputStream} output_byte_stream Output byte stream.
2042
* @param {CodePointInputStream} code_point_pointer Input stream.
2043
* @return {number} The last byte emitted.
2044
*/
2045
this.encode = function(output_byte_stream, code_point_pointer) {
2046
var code_point = code_point_pointer.get();
2047
if (code_point === EOF_code_point) {
2048
return EOF_byte;
2049
}
2050
code_point_pointer.offset(1);
2051
if (inRange(code_point, 0x0000, 0x0080)) {
2052
return output_byte_stream.emit(code_point);
2053
}
2054
if (code_point === 0x00A5) {
2055
return output_byte_stream.emit(0x5C);
2056
}
2057
if (code_point === 0x203E) {
2058
return output_byte_stream.emit(0x7E);
2059
}
2060
if (inRange(code_point, 0xFF61, 0xFF9F)) {
2061
return output_byte_stream.emit(code_point - 0xFF61 + 0xA1);
2062
}
2063
var pointer = indexPointerFor(code_point, index('jis0208'));
2064
if (pointer === null) {
2065
return encoderError(code_point);
2066
}
2067
var lead = div(pointer, 188);
2068
var lead_offset = lead < 0x1F ? 0x81 : 0xC1;
2069
var trail = pointer % 188;
2070
var offset = trail < 0x3F ? 0x40 : 0x41;
2071
return output_byte_stream.emit(lead + lead_offset, trail + offset);
2072
};
2073
}
2074
2075
/** @param {{fatal: boolean}} options */
2076
name_to_encoding['shift_jis'].getEncoder = function(options) {
2077
return new ShiftJISEncoder(options);
2078
};
2079
/** @param {{fatal: boolean}} options */
2080
name_to_encoding['shift_jis'].getDecoder = function(options) {
2081
return new ShiftJISDecoder(options);
2082
};
2083
2084
//
2085
// 13. Legacy multi-byte Korean encodings
2086
//
2087
2088
// 13.1 euc-kr
2089
2090
/**
2091
* @constructor
2092
* @param {{fatal: boolean}} options
2093
*/
2094
function EUCKRDecoder(options) {
2095
var fatal = options.fatal;
2096
var /** @type {number} */ euckr_lead = 0x00;
2097
/**
2098
* @param {ByteInputStream} byte_pointer The byte stream to decode.
2099
* @return {?number} The next code point decoded, or null if not enough
2100
* data exists in the input stream to decode a complete code point.
2101
*/
2102
this.decode = function(byte_pointer) {
2103
var bite = byte_pointer.get();
2104
if (bite === EOF_byte && euckr_lead === 0) {
2105
return EOF_code_point;
2106
}
2107
if (bite === EOF_byte && euckr_lead !== 0) {
2108
euckr_lead = 0x00;
2109
return decoderError(fatal);
2110
}
2111
byte_pointer.offset(1);
2112
if (euckr_lead !== 0x00) {
2113
var lead = euckr_lead;
2114
var pointer = null;
2115
euckr_lead = 0x00;
2116
2117
if (inRange(lead, 0x81, 0xC6)) {
2118
var temp = (26 + 26 + 126) * (lead - 0x81);
2119
if (inRange(bite, 0x41, 0x5A)) {
2120
pointer = temp + bite - 0x41;
2121
} else if (inRange(bite, 0x61, 0x7A)) {
2122
pointer = temp + 26 + bite - 0x61;
2123
} else if (inRange(bite, 0x81, 0xFE)) {
2124
pointer = temp + 26 + 26 + bite - 0x81;
2125
}
2126
}
2127
2128
if (inRange(lead, 0xC7, 0xFD) && inRange(bite, 0xA1, 0xFE)) {
2129
pointer = (26 + 26 + 126) * (0xC7 - 0x81) + (lead - 0xC7) * 94 +
2130
(bite - 0xA1);
2131
}
2132
2133
var code_point = (pointer === null) ? null :
2134
indexCodePointFor(pointer, index('euc-kr'));
2135
if (pointer === null) {
2136
byte_pointer.offset(-1);
2137
}
2138
if (code_point === null) {
2139
return decoderError(fatal);
2140
}
2141
return code_point;
2142
}
2143
2144
if (inRange(bite, 0x00, 0x7F)) {
2145
return bite;
2146
}
2147
2148
if (inRange(bite, 0x81, 0xFD)) {
2149
euckr_lead = bite;
2150
return null;
2151
}
2152
2153
return decoderError(fatal);
2154
};
2155
}
2156
2157
/**
2158
* @constructor
2159
* @param {{fatal: boolean}} options
2160
*/
2161
function EUCKREncoder(options) {
2162
var fatal = options.fatal;
2163
/**
2164
* @param {ByteOutputStream} output_byte_stream Output byte stream.
2165
* @param {CodePointInputStream} code_point_pointer Input stream.
2166
* @return {number} The last byte emitted.
2167
*/
2168
this.encode = function(output_byte_stream, code_point_pointer) {
2169
var code_point = code_point_pointer.get();
2170
if (code_point === EOF_code_point) {
2171
return EOF_byte;
2172
}
2173
code_point_pointer.offset(1);
2174
if (inRange(code_point, 0x0000, 0x007F)) {
2175
return output_byte_stream.emit(code_point);
2176
}
2177
var pointer = indexPointerFor(code_point, index('euc-kr'));
2178
if (pointer === null) {
2179
return encoderError(code_point);
2180
}
2181
var lead, trail;
2182
if (pointer < ((26 + 26 + 126) * (0xC7 - 0x81))) {
2183
lead = div(pointer, (26 + 26 + 126)) + 0x81;
2184
trail = pointer % (26 + 26 + 126);
2185
var offset = trail < 26 ? 0x41 : trail < 26 + 26 ? 0x47 : 0x4D;
2186
return output_byte_stream.emit(lead, trail + offset);
2187
}
2188
pointer = pointer - (26 + 26 + 126) * (0xC7 - 0x81);
2189
lead = div(pointer, 94) + 0xC7;
2190
trail = pointer % 94 + 0xA1;
2191
return output_byte_stream.emit(lead, trail);
2192
};
2193
}
2194
2195
/** @param {{fatal: boolean}} options */
2196
name_to_encoding['euc-kr'].getEncoder = function(options) {
2197
return new EUCKREncoder(options);
2198
};
2199
/** @param {{fatal: boolean}} options */
2200
name_to_encoding['euc-kr'].getDecoder = function(options) {
2201
return new EUCKRDecoder(options);
2202
};
2203
2204
2205
//
2206
// 14. Legacy miscellaneous encodings
2207
//
2208
2209
// 14.1 replacement
2210
2211
// Not needed - API throws TypeError
2212
2213
// 14.2 utf-16
2214
2215
/**
2216
* @constructor
2217
* @param {boolean} utf16_be True if big-endian, false if little-endian.
2218
* @param {{fatal: boolean}} options
2219
*/
2220
function UTF16Decoder(utf16_be, options) {
2221
var fatal = options.fatal;
2222
var /** @type {?number} */ utf16_lead_byte = null,
2223
/** @type {?number} */ utf16_lead_surrogate = null;
2224
/**
2225
* @param {ByteInputStream} byte_pointer The byte stream to decode.
2226
* @return {?number} The next code point decoded, or null if not enough
2227
* data exists in the input stream to decode a complete code point.
2228
*/
2229
this.decode = function(byte_pointer) {
2230
var bite = byte_pointer.get();
2231
if (bite === EOF_byte && utf16_lead_byte === null &&
2232
utf16_lead_surrogate === null) {
2233
return EOF_code_point;
2234
}
2235
if (bite === EOF_byte && (utf16_lead_byte !== null ||
2236
utf16_lead_surrogate !== null)) {
2237
return decoderError(fatal);
2238
}
2239
byte_pointer.offset(1);
2240
if (utf16_lead_byte === null) {
2241
utf16_lead_byte = bite;
2242
return null;
2243
}
2244
var code_point;
2245
if (utf16_be) {
2246
code_point = (utf16_lead_byte << 8) + bite;
2247
} else {
2248
code_point = (bite << 8) + utf16_lead_byte;
2249
}
2250
utf16_lead_byte = null;
2251
if (utf16_lead_surrogate !== null) {
2252
var lead_surrogate = utf16_lead_surrogate;
2253
utf16_lead_surrogate = null;
2254
if (inRange(code_point, 0xDC00, 0xDFFF)) {
2255
return 0x10000 + (lead_surrogate - 0xD800) * 0x400 +
2256
(code_point - 0xDC00);
2257
}
2258
byte_pointer.offset(-2);
2259
return decoderError(fatal);
2260
}
2261
if (inRange(code_point, 0xD800, 0xDBFF)) {
2262
utf16_lead_surrogate = code_point;
2263
return null;
2264
}
2265
if (inRange(code_point, 0xDC00, 0xDFFF)) {
2266
return decoderError(fatal);
2267
}
2268
return code_point;
2269
};
2270
}
2271
2272
/**
2273
* @constructor
2274
* @param {boolean} utf16_be True if big-endian, false if little-endian.
2275
* @param {{fatal: boolean}} options
2276
*/
2277
function UTF16Encoder(utf16_be, options) {
2278
var fatal = options.fatal;
2279
/**
2280
* @param {ByteOutputStream} output_byte_stream Output byte stream.
2281
* @param {CodePointInputStream} code_point_pointer Input stream.
2282
* @return {number} The last byte emitted.
2283
*/
2284
this.encode = function(output_byte_stream, code_point_pointer) {
2285
/**
2286
* @param {number} code_unit
2287
* @return {number} last byte emitted
2288
*/
2289
function convert_to_bytes(code_unit) {
2290
var byte1 = code_unit >> 8;
2291
var byte2 = code_unit & 0x00FF;
2292
if (utf16_be) {
2293
return output_byte_stream.emit(byte1, byte2);
2294
}
2295
return output_byte_stream.emit(byte2, byte1);
2296
}
2297
var code_point = code_point_pointer.get();
2298
if (code_point === EOF_code_point) {
2299
return EOF_byte;
2300
}
2301
code_point_pointer.offset(1);
2302
if (inRange(code_point, 0xD800, 0xDFFF)) {
2303
encoderError(code_point);
2304
}
2305
if (code_point <= 0xFFFF) {
2306
return convert_to_bytes(code_point);
2307
}
2308
var lead = div((code_point - 0x10000), 0x400) + 0xD800;
2309
var trail = ((code_point - 0x10000) % 0x400) + 0xDC00;
2310
convert_to_bytes(lead);
2311
return convert_to_bytes(trail);
2312
};
2313
}
2314
2315
// 14.3 utf-16be
2316
/** @param {{fatal: boolean}} options */
2317
name_to_encoding['utf-16be'].getEncoder = function(options) {
2318
return new UTF16Encoder(true, options);
2319
};
2320
/** @param {{fatal: boolean}} options */
2321
name_to_encoding['utf-16be'].getDecoder = function(options) {
2322
return new UTF16Decoder(true, options);
2323
};
2324
2325
// 14.4 utf-16le
2326
/** @param {{fatal: boolean}} options */
2327
name_to_encoding['utf-16le'].getEncoder = function(options) {
2328
return new UTF16Encoder(false, options);
2329
};
2330
/** @param {{fatal: boolean}} options */
2331
name_to_encoding['utf-16le'].getDecoder = function(options) {
2332
return new UTF16Decoder(false, options);
2333
};
2334
2335
// 14.5 x-user-defined
2336
2337
/**
2338
* @constructor
2339
* @param {{fatal: boolean}} options
2340
*/
2341
function XUserDefinedDecoder(options) {
2342
var fatal = options.fatal;
2343
/**
2344
* @param {ByteInputStream} byte_pointer The byte stream to decode.
2345
* @return {?number} The next code point decoded, or null if not enough
2346
* data exists in the input stream to decode a complete code point.
2347
*/
2348
this.decode = function(byte_pointer) {
2349
var bite = byte_pointer.get();
2350
if (bite === EOF_byte) {
2351
return EOF_code_point;
2352
}
2353
byte_pointer.offset(1);
2354
if (inRange(bite, 0x00, 0x7F)) {
2355
return bite;
2356
}
2357
return 0xF780 + bite - 0x80;
2358
};
2359
}
2360
2361
/**
2362
* @constructor
2363
* @param {{fatal: boolean}} options
2364
*/
2365
function XUserDefinedEncoder(index, options) {
2366
var fatal = options.fatal;
2367
/**
2368
* @param {ByteOutputStream} output_byte_stream Output byte stream.
2369
* @param {CodePointInputStream} code_point_pointer Input stream.
2370
* @return {number} The last byte emitted.
2371
*/
2372
this.encode = function(output_byte_stream, code_point_pointer) {
2373
var code_point = code_point_pointer.get();
2374
if (code_point === EOF_code_point) {
2375
return EOF_byte;
2376
}
2377
code_point_pointer.offset(1);
2378
if (inRange(code_point, 0x0000, 0x007F)) {
2379
return output_byte_stream.emit(code_point);
2380
}
2381
if (inRange(code_point, 0xF780, 0xF7FF)) {
2382
return output_byte_stream.emit(code_point - 0xF780 + 0x80);
2383
}
2384
encoderError(code_point);
2385
};
2386
}
2387
2388
/** @param {{fatal: boolean}} options */
2389
name_to_encoding['x-user-defined'].getEncoder = function(options) {
2390
return new XUserDefinedEncoder(false, options);
2391
};
2392
/** @param {{fatal: boolean}} options */
2393
name_to_encoding['x-user-defined'].getDecoder = function(options) {
2394
return new XUserDefinedDecoder(false, options);
2395
};
2396
2397
// NOTE: currently unused
2398
/**
2399
* @param {string} label The encoding label.
2400
* @param {ByteInputStream} input_stream The byte stream to test.
2401
*/
2402
function detectEncoding(label, input_stream) {
2403
if (input_stream.match([0xFF, 0xFE])) {
2404
input_stream.offset(2);
2405
return 'utf-16le';
2406
}
2407
if (input_stream.match([0xFE, 0xFF])) {
2408
input_stream.offset(2);
2409
return 'utf-16be';
2410
}
2411
if (input_stream.match([0xEF, 0xBB, 0xBF])) {
2412
input_stream.offset(3);
2413
return 'utf-8';
2414
}
2415
return label;
2416
}
2417
2418
if (!('TextEncoder' in global)) global['TextEncoder'] = TextEncoder;
2419
if (!('TextDecoder' in global)) global['TextDecoder'] = TextDecoder;
2420
}(this));
2421
2422