Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
wine-mirror
GitHub Repository: wine-mirror/wine
Path: blob/master/libs/xml2/encoding.c
4389 views
1
/*
2
* encoding.c : implements the encoding conversion functions needed for XML
3
*
4
* Related specs:
5
* rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6
* rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7
* [ISO-10646] UTF-8 and UTF-16 in Annexes
8
* [ISO-8859-1] ISO Latin-1 characters codes.
9
* [UNICODE] The Unicode Consortium, "The Unicode Standard --
10
* Worldwide Character Encoding -- Version 1.0", Addison-
11
* Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
12
* described in Unicode Technical Report #4.
13
* [US-ASCII] Coded Character Set--7-bit American Standard Code for
14
* Information Interchange, ANSI X3.4-1986.
15
*
16
* See Copyright for the status of this software.
17
*
18
* [email protected]
19
*
20
* Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <[email protected]>
21
*/
22
23
#define IN_LIBXML
24
#include "libxml.h"
25
26
#include <string.h>
27
#include <limits.h>
28
#include <ctype.h>
29
#include <stdlib.h>
30
31
#ifdef LIBXML_ICONV_ENABLED
32
#include <errno.h>
33
#endif
34
35
#include <libxml/encoding.h>
36
#include <libxml/xmlmemory.h>
37
#include <libxml/parser.h>
38
#ifdef LIBXML_HTML_ENABLED
39
#include <libxml/HTMLparser.h>
40
#endif
41
#include <libxml/xmlerror.h>
42
43
#include "private/buf.h"
44
#include "private/enc.h"
45
#include "private/error.h"
46
47
#ifdef LIBXML_ICU_ENABLED
48
#include <unicode/ucnv.h>
49
/* Size of pivot buffer, same as icu/source/common/ucnv.cpp CHUNK_SIZE */
50
#define ICU_PIVOT_BUF_SIZE 1024
51
typedef struct _uconv_t uconv_t;
52
struct _uconv_t {
53
UConverter *uconv; /* for conversion between an encoding and UTF-16 */
54
UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */
55
UChar pivot_buf[ICU_PIVOT_BUF_SIZE];
56
UChar *pivot_source;
57
UChar *pivot_target;
58
};
59
#endif
60
61
typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
62
typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
63
struct _xmlCharEncodingAlias {
64
const char *name;
65
const char *alias;
66
};
67
68
static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
69
static int xmlCharEncodingAliasesNb = 0;
70
static int xmlCharEncodingAliasesMax = 0;
71
72
static int xmlLittleEndian = 1;
73
74
#ifdef LIBXML_ICU_ENABLED
75
static uconv_t*
76
openIcuConverter(const char* name, int toUnicode)
77
{
78
UErrorCode status = U_ZERO_ERROR;
79
uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
80
if (conv == NULL)
81
return NULL;
82
83
conv->pivot_source = conv->pivot_buf;
84
conv->pivot_target = conv->pivot_buf;
85
86
conv->uconv = ucnv_open(name, &status);
87
if (U_FAILURE(status))
88
goto error;
89
90
status = U_ZERO_ERROR;
91
if (toUnicode) {
92
ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
93
NULL, NULL, NULL, &status);
94
}
95
else {
96
ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
97
NULL, NULL, NULL, &status);
98
}
99
if (U_FAILURE(status))
100
goto error;
101
102
status = U_ZERO_ERROR;
103
conv->utf8 = ucnv_open("UTF-8", &status);
104
if (U_SUCCESS(status))
105
return conv;
106
107
error:
108
if (conv->uconv)
109
ucnv_close(conv->uconv);
110
xmlFree(conv);
111
return NULL;
112
}
113
114
static void
115
closeIcuConverter(uconv_t *conv)
116
{
117
if (conv != NULL) {
118
ucnv_close(conv->uconv);
119
ucnv_close(conv->utf8);
120
xmlFree(conv);
121
}
122
}
123
#endif /* LIBXML_ICU_ENABLED */
124
125
/************************************************************************
126
* *
127
* Conversions To/From UTF8 encoding *
128
* *
129
************************************************************************/
130
131
/**
132
* asciiToUTF8:
133
* @out: a pointer to an array of bytes to store the result
134
* @outlen: the length of @out
135
* @in: a pointer to an array of ASCII chars
136
* @inlen: the length of @in
137
*
138
* Take a block of ASCII chars in and try to convert it to an UTF-8
139
* block of chars out.
140
*
141
* Returns the number of bytes written or an XML_ENC_ERR code.
142
*
143
* The value of @inlen after return is the number of octets consumed
144
* if the return value is positive, else unpredictable.
145
* The value of @outlen after return is the number of octets produced.
146
*/
147
static int
148
asciiToUTF8(unsigned char* out, int *outlen,
149
const unsigned char* in, int *inlen) {
150
unsigned char* outstart = out;
151
const unsigned char* base = in;
152
const unsigned char* processed = in;
153
unsigned char* outend = out + *outlen;
154
const unsigned char* inend;
155
unsigned int c;
156
157
inend = in + (*inlen);
158
while ((in < inend) && (out - outstart + 5 < *outlen)) {
159
c= *in++;
160
161
if (out >= outend)
162
break;
163
if (c < 0x80) {
164
*out++ = c;
165
} else {
166
*outlen = out - outstart;
167
*inlen = processed - base;
168
return(XML_ENC_ERR_INPUT);
169
}
170
171
processed = (const unsigned char*) in;
172
}
173
*outlen = out - outstart;
174
*inlen = processed - base;
175
return(*outlen);
176
}
177
178
#ifdef LIBXML_OUTPUT_ENABLED
179
/**
180
* UTF8Toascii:
181
* @out: a pointer to an array of bytes to store the result
182
* @outlen: the length of @out
183
* @in: a pointer to an array of UTF-8 chars
184
* @inlen: the length of @in
185
*
186
* Take a block of UTF-8 chars in and try to convert it to an ASCII
187
* block of chars out.
188
*
189
* Returns the number of bytes written or an XML_ENC_ERR code.
190
*
191
* The value of @inlen after return is the number of octets consumed
192
* if the return value is positive, else unpredictable.
193
* The value of @outlen after return is the number of octets produced.
194
*/
195
static int
196
UTF8Toascii(unsigned char* out, int *outlen,
197
const unsigned char* in, int *inlen) {
198
const unsigned char* processed = in;
199
const unsigned char* outend;
200
const unsigned char* outstart = out;
201
const unsigned char* instart = in;
202
const unsigned char* inend;
203
unsigned int c, d;
204
int trailing;
205
206
if ((out == NULL) || (outlen == NULL) || (inlen == NULL))
207
return(XML_ENC_ERR_INTERNAL);
208
if (in == NULL) {
209
/*
210
* initialization nothing to do
211
*/
212
*outlen = 0;
213
*inlen = 0;
214
return(0);
215
}
216
inend = in + (*inlen);
217
outend = out + (*outlen);
218
while (in < inend) {
219
d = *in++;
220
if (d < 0x80) { c= d; trailing= 0; }
221
else if (d < 0xC0) {
222
/* trailing byte in leading position */
223
*outlen = out - outstart;
224
*inlen = processed - instart;
225
return(XML_ENC_ERR_INPUT);
226
} else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
227
else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
228
else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
229
else {
230
/* no chance for this in Ascii */
231
*outlen = out - outstart;
232
*inlen = processed - instart;
233
return(XML_ENC_ERR_INPUT);
234
}
235
236
if (inend - in < trailing) {
237
break;
238
}
239
240
for ( ; trailing; trailing--) {
241
if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
242
break;
243
c <<= 6;
244
c |= d & 0x3F;
245
}
246
247
/* assertion: c is a single UTF-4 value */
248
if (c < 0x80) {
249
if (out >= outend)
250
break;
251
*out++ = c;
252
} else {
253
/* no chance for this in Ascii */
254
*outlen = out - outstart;
255
*inlen = processed - instart;
256
return(XML_ENC_ERR_INPUT);
257
}
258
processed = in;
259
}
260
*outlen = out - outstart;
261
*inlen = processed - instart;
262
return(*outlen);
263
}
264
#endif /* LIBXML_OUTPUT_ENABLED */
265
266
/**
267
* isolat1ToUTF8:
268
* @out: a pointer to an array of bytes to store the result
269
* @outlen: the length of @out
270
* @in: a pointer to an array of ISO Latin 1 chars
271
* @inlen: the length of @in
272
*
273
* Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
274
* block of chars out.
275
*
276
* Returns the number of bytes written or an XML_ENC_ERR code.
277
*
278
* The value of @inlen after return is the number of octets consumed
279
* if the return value is positive, else unpredictable.
280
* The value of @outlen after return is the number of octets produced.
281
*/
282
int
283
isolat1ToUTF8(unsigned char* out, int *outlen,
284
const unsigned char* in, int *inlen) {
285
unsigned char* outstart = out;
286
const unsigned char* base = in;
287
unsigned char* outend;
288
const unsigned char* inend;
289
const unsigned char* instop;
290
291
if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
292
return(XML_ENC_ERR_INTERNAL);
293
294
outend = out + *outlen;
295
inend = in + (*inlen);
296
instop = inend;
297
298
while ((in < inend) && (out < outend - 1)) {
299
if (*in >= 0x80) {
300
*out++ = (((*in) >> 6) & 0x1F) | 0xC0;
301
*out++ = ((*in) & 0x3F) | 0x80;
302
++in;
303
}
304
if ((instop - in) > (outend - out)) instop = in + (outend - out);
305
while ((in < instop) && (*in < 0x80)) {
306
*out++ = *in++;
307
}
308
}
309
if ((in < inend) && (out < outend) && (*in < 0x80)) {
310
*out++ = *in++;
311
}
312
*outlen = out - outstart;
313
*inlen = in - base;
314
return(*outlen);
315
}
316
317
/**
318
* UTF8ToUTF8:
319
* @out: a pointer to an array of bytes to store the result
320
* @outlen: the length of @out
321
* @inb: a pointer to an array of UTF-8 chars
322
* @inlenb: the length of @in in UTF-8 chars
323
*
324
* No op copy operation for UTF8 handling.
325
*
326
* Returns the number of bytes written or an XML_ENC_ERR code.
327
*
328
* The value of *inlen after return is the number of octets consumed
329
* if the return value is positive, else unpredictable.
330
*/
331
static int
332
UTF8ToUTF8(unsigned char* out, int *outlen,
333
const unsigned char* inb, int *inlenb)
334
{
335
int len;
336
337
if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
338
return(XML_ENC_ERR_INTERNAL);
339
if (inb == NULL) {
340
/* inb == NULL means output is initialized. */
341
*outlen = 0;
342
*inlenb = 0;
343
return(0);
344
}
345
if (*outlen > *inlenb) {
346
len = *inlenb;
347
} else {
348
len = *outlen;
349
}
350
if (len < 0)
351
return(XML_ENC_ERR_INTERNAL);
352
353
/*
354
* FIXME: Conversion functions must assure valid UTF-8, so we have
355
* to check for UTF-8 validity. Preferably, this converter shouldn't
356
* be used at all.
357
*/
358
memcpy(out, inb, len);
359
360
*outlen = len;
361
*inlenb = len;
362
return(*outlen);
363
}
364
365
366
#ifdef LIBXML_OUTPUT_ENABLED
367
/**
368
* UTF8Toisolat1:
369
* @out: a pointer to an array of bytes to store the result
370
* @outlen: the length of @out
371
* @in: a pointer to an array of UTF-8 chars
372
* @inlen: the length of @in
373
*
374
* Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
375
* block of chars out.
376
*
377
* Returns the number of bytes written or an XML_ENC_ERR code.
378
*
379
* The value of @inlen after return is the number of octets consumed
380
* if the return value is positive, else unpredictable.
381
* The value of @outlen after return is the number of octets produced.
382
*/
383
int
384
UTF8Toisolat1(unsigned char* out, int *outlen,
385
const unsigned char* in, int *inlen) {
386
const unsigned char* processed = in;
387
const unsigned char* outend;
388
const unsigned char* outstart = out;
389
const unsigned char* instart = in;
390
const unsigned char* inend;
391
unsigned int c, d;
392
int trailing;
393
394
if ((out == NULL) || (outlen == NULL) || (inlen == NULL))
395
return(XML_ENC_ERR_INTERNAL);
396
if (in == NULL) {
397
/*
398
* initialization nothing to do
399
*/
400
*outlen = 0;
401
*inlen = 0;
402
return(0);
403
}
404
inend = in + (*inlen);
405
outend = out + (*outlen);
406
while (in < inend) {
407
d = *in++;
408
if (d < 0x80) { c= d; trailing= 0; }
409
else if (d < 0xC0) {
410
/* trailing byte in leading position */
411
*outlen = out - outstart;
412
*inlen = processed - instart;
413
return(XML_ENC_ERR_INPUT);
414
} else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
415
else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
416
else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
417
else {
418
/* no chance for this in IsoLat1 */
419
*outlen = out - outstart;
420
*inlen = processed - instart;
421
return(XML_ENC_ERR_INPUT);
422
}
423
424
if (inend - in < trailing) {
425
break;
426
}
427
428
for ( ; trailing; trailing--) {
429
if (in >= inend)
430
break;
431
if (((d= *in++) & 0xC0) != 0x80) {
432
*outlen = out - outstart;
433
*inlen = processed - instart;
434
return(XML_ENC_ERR_INPUT);
435
}
436
c <<= 6;
437
c |= d & 0x3F;
438
}
439
440
/* assertion: c is a single UTF-4 value */
441
if (c <= 0xFF) {
442
if (out >= outend)
443
break;
444
*out++ = c;
445
} else {
446
/* no chance for this in IsoLat1 */
447
*outlen = out - outstart;
448
*inlen = processed - instart;
449
return(XML_ENC_ERR_INPUT);
450
}
451
processed = in;
452
}
453
*outlen = out - outstart;
454
*inlen = processed - instart;
455
return(*outlen);
456
}
457
#endif /* LIBXML_OUTPUT_ENABLED */
458
459
/**
460
* UTF16LEToUTF8:
461
* @out: a pointer to an array of bytes to store the result
462
* @outlen: the length of @out
463
* @inb: a pointer to an array of UTF-16LE passwd as a byte array
464
* @inlenb: the length of @in in UTF-16LE chars
465
*
466
* Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
467
* block of chars out. This function assumes the endian property
468
* is the same between the native type of this machine and the
469
* inputed one.
470
*
471
* Returns the number of bytes written or an XML_ENC_ERR code.
472
*
473
* The value of *inlen after return is the number of octets consumed
474
* if the return value is positive, else unpredictable.
475
*/
476
static int
477
UTF16LEToUTF8(unsigned char* out, int *outlen,
478
const unsigned char* inb, int *inlenb)
479
{
480
unsigned char* outstart = out;
481
const unsigned char* processed = inb;
482
unsigned char* outend;
483
unsigned short* in = (unsigned short *) (void *) inb;
484
unsigned short* inend;
485
unsigned int c, d, inlen;
486
unsigned char *tmp;
487
int bits;
488
489
if (*outlen == 0) {
490
*inlenb = 0;
491
return(0);
492
}
493
outend = out + *outlen;
494
if ((*inlenb % 2) == 1)
495
(*inlenb)--;
496
inlen = *inlenb / 2;
497
inend = in + inlen;
498
while ((in < inend) && (out - outstart + 5 < *outlen)) {
499
if (xmlLittleEndian) {
500
c= *in++;
501
} else {
502
tmp = (unsigned char *) in;
503
c = *tmp++;
504
c = c | (*tmp << 8);
505
in++;
506
}
507
if ((c & 0xFC00) == 0xD800) { /* surrogates */
508
if (in >= inend) { /* handle split mutli-byte characters */
509
break;
510
}
511
if (xmlLittleEndian) {
512
d = *in++;
513
} else {
514
tmp = (unsigned char *) in;
515
d = *tmp++;
516
d = d | (*tmp << 8);
517
in++;
518
}
519
if ((d & 0xFC00) == 0xDC00) {
520
c &= 0x03FF;
521
c <<= 10;
522
c |= d & 0x03FF;
523
c += 0x10000;
524
}
525
else {
526
*outlen = out - outstart;
527
*inlenb = processed - inb;
528
return(XML_ENC_ERR_INPUT);
529
}
530
}
531
532
/* assertion: c is a single UTF-4 value */
533
if (out >= outend)
534
break;
535
if (c < 0x80) { *out++= c; bits= -6; }
536
else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
537
else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
538
else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
539
540
for ( ; bits >= 0; bits-= 6) {
541
if (out >= outend)
542
break;
543
*out++= ((c >> bits) & 0x3F) | 0x80;
544
}
545
processed = (const unsigned char*) in;
546
}
547
*outlen = out - outstart;
548
*inlenb = processed - inb;
549
return(*outlen);
550
}
551
552
#ifdef LIBXML_OUTPUT_ENABLED
553
/**
554
* UTF8ToUTF16LE:
555
* @outb: a pointer to an array of bytes to store the result
556
* @outlen: the length of @outb
557
* @in: a pointer to an array of UTF-8 chars
558
* @inlen: the length of @in
559
*
560
* Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
561
* block of chars out.
562
*
563
* Returns the number of bytes written or an XML_ENC_ERR code.
564
*/
565
static int
566
UTF8ToUTF16LE(unsigned char* outb, int *outlen,
567
const unsigned char* in, int *inlen)
568
{
569
unsigned short* out = (unsigned short *) (void *) outb;
570
const unsigned char* processed = in;
571
const unsigned char *const instart = in;
572
unsigned short* outstart= out;
573
unsigned short* outend;
574
const unsigned char* inend;
575
unsigned int c, d;
576
int trailing;
577
unsigned char *tmp;
578
unsigned short tmp1, tmp2;
579
580
/* UTF16LE encoding has no BOM */
581
if ((out == NULL) || (outlen == NULL) || (inlen == NULL))
582
return(XML_ENC_ERR_INTERNAL);
583
if (in == NULL) {
584
*outlen = 0;
585
*inlen = 0;
586
return(0);
587
}
588
inend= in + *inlen;
589
outend = out + (*outlen / 2);
590
while (in < inend) {
591
d= *in++;
592
if (d < 0x80) { c= d; trailing= 0; }
593
else if (d < 0xC0) {
594
/* trailing byte in leading position */
595
*outlen = (out - outstart) * 2;
596
*inlen = processed - instart;
597
return(XML_ENC_ERR_INPUT);
598
} else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
599
else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
600
else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
601
else {
602
/* no chance for this in UTF-16 */
603
*outlen = (out - outstart) * 2;
604
*inlen = processed - instart;
605
return(XML_ENC_ERR_INPUT);
606
}
607
608
if (inend - in < trailing) {
609
break;
610
}
611
612
for ( ; trailing; trailing--) {
613
if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
614
break;
615
c <<= 6;
616
c |= d & 0x3F;
617
}
618
619
/* assertion: c is a single UTF-4 value */
620
if (c < 0x10000) {
621
if (out >= outend)
622
break;
623
if (xmlLittleEndian) {
624
*out++ = c;
625
} else {
626
tmp = (unsigned char *) out;
627
*tmp = (unsigned char) c; /* Explicit truncation */
628
*(tmp + 1) = c >> 8 ;
629
out++;
630
}
631
}
632
else if (c < 0x110000) {
633
if (out+1 >= outend)
634
break;
635
c -= 0x10000;
636
if (xmlLittleEndian) {
637
*out++ = 0xD800 | (c >> 10);
638
*out++ = 0xDC00 | (c & 0x03FF);
639
} else {
640
tmp1 = 0xD800 | (c >> 10);
641
tmp = (unsigned char *) out;
642
*tmp = (unsigned char) tmp1; /* Explicit truncation */
643
*(tmp + 1) = tmp1 >> 8;
644
out++;
645
646
tmp2 = 0xDC00 | (c & 0x03FF);
647
tmp = (unsigned char *) out;
648
*tmp = (unsigned char) tmp2; /* Explicit truncation */
649
*(tmp + 1) = tmp2 >> 8;
650
out++;
651
}
652
}
653
else
654
break;
655
processed = in;
656
}
657
*outlen = (out - outstart) * 2;
658
*inlen = processed - instart;
659
return(*outlen);
660
}
661
662
/**
663
* UTF8ToUTF16:
664
* @outb: a pointer to an array of bytes to store the result
665
* @outlen: the length of @outb
666
* @in: a pointer to an array of UTF-8 chars
667
* @inlen: the length of @in
668
*
669
* Take a block of UTF-8 chars in and try to convert it to an UTF-16
670
* block of chars out.
671
*
672
* Returns the number of bytes written or an XML_ENC_ERR code.
673
*/
674
static int
675
UTF8ToUTF16(unsigned char* outb, int *outlen,
676
const unsigned char* in, int *inlen)
677
{
678
if (in == NULL) {
679
/*
680
* initialization, add the Byte Order Mark for UTF-16LE
681
*/
682
if (*outlen >= 2) {
683
outb[0] = 0xFF;
684
outb[1] = 0xFE;
685
*outlen = 2;
686
*inlen = 0;
687
return(2);
688
}
689
*outlen = 0;
690
*inlen = 0;
691
return(0);
692
}
693
return (UTF8ToUTF16LE(outb, outlen, in, inlen));
694
}
695
#endif /* LIBXML_OUTPUT_ENABLED */
696
697
/**
698
* UTF16BEToUTF8:
699
* @out: a pointer to an array of bytes to store the result
700
* @outlen: the length of @out
701
* @inb: a pointer to an array of UTF-16 passed as a byte array
702
* @inlenb: the length of @in in UTF-16 chars
703
*
704
* Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
705
* block of chars out. This function assumes the endian property
706
* is the same between the native type of this machine and the
707
* inputed one.
708
*
709
* Returns the number of bytes written or an XML_ENC_ERR code.
710
*
711
* The value of *inlen after return is the number of octets consumed
712
* if the return value is positive, else unpredictable.
713
*/
714
static int
715
UTF16BEToUTF8(unsigned char* out, int *outlen,
716
const unsigned char* inb, int *inlenb)
717
{
718
unsigned char* outstart = out;
719
const unsigned char* processed = inb;
720
unsigned char* outend;
721
unsigned short* in = (unsigned short *) (void *) inb;
722
unsigned short* inend;
723
unsigned int c, d, inlen;
724
unsigned char *tmp;
725
int bits;
726
727
if (*outlen == 0) {
728
*inlenb = 0;
729
return(0);
730
}
731
outend = out + *outlen;
732
if ((*inlenb % 2) == 1)
733
(*inlenb)--;
734
inlen = *inlenb / 2;
735
inend= in + inlen;
736
while ((in < inend) && (out - outstart + 5 < *outlen)) {
737
if (xmlLittleEndian) {
738
tmp = (unsigned char *) in;
739
c = *tmp++;
740
c = (c << 8) | *tmp;
741
in++;
742
} else {
743
c= *in++;
744
}
745
if ((c & 0xFC00) == 0xD800) { /* surrogates */
746
if (in >= inend) { /* handle split mutli-byte characters */
747
break;
748
}
749
if (xmlLittleEndian) {
750
tmp = (unsigned char *) in;
751
d = *tmp++;
752
d = (d << 8) | *tmp;
753
in++;
754
} else {
755
d= *in++;
756
}
757
if ((d & 0xFC00) == 0xDC00) {
758
c &= 0x03FF;
759
c <<= 10;
760
c |= d & 0x03FF;
761
c += 0x10000;
762
}
763
else {
764
*outlen = out - outstart;
765
*inlenb = processed - inb;
766
return(XML_ENC_ERR_INPUT);
767
}
768
}
769
770
/* assertion: c is a single UTF-4 value */
771
if (out >= outend)
772
break;
773
if (c < 0x80) { *out++= c; bits= -6; }
774
else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
775
else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
776
else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
777
778
for ( ; bits >= 0; bits-= 6) {
779
if (out >= outend)
780
break;
781
*out++= ((c >> bits) & 0x3F) | 0x80;
782
}
783
processed = (const unsigned char*) in;
784
}
785
*outlen = out - outstart;
786
*inlenb = processed - inb;
787
return(*outlen);
788
}
789
790
#ifdef LIBXML_OUTPUT_ENABLED
791
/**
792
* UTF8ToUTF16BE:
793
* @outb: a pointer to an array of bytes to store the result
794
* @outlen: the length of @outb
795
* @in: a pointer to an array of UTF-8 chars
796
* @inlen: the length of @in
797
*
798
* Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
799
* block of chars out.
800
*
801
* Returns the number of bytes written or an XML_ENC_ERR code.
802
*/
803
static int
804
UTF8ToUTF16BE(unsigned char* outb, int *outlen,
805
const unsigned char* in, int *inlen)
806
{
807
unsigned short* out = (unsigned short *) (void *) outb;
808
const unsigned char* processed = in;
809
const unsigned char *const instart = in;
810
unsigned short* outstart= out;
811
unsigned short* outend;
812
const unsigned char* inend;
813
unsigned int c, d;
814
int trailing;
815
unsigned char *tmp;
816
unsigned short tmp1, tmp2;
817
818
/* UTF-16BE has no BOM */
819
if ((outb == NULL) || (outlen == NULL) || (inlen == NULL))
820
return(XML_ENC_ERR_INTERNAL);
821
if (in == NULL) {
822
*outlen = 0;
823
*inlen = 0;
824
return(0);
825
}
826
inend= in + *inlen;
827
outend = out + (*outlen / 2);
828
while (in < inend) {
829
d= *in++;
830
if (d < 0x80) { c= d; trailing= 0; }
831
else if (d < 0xC0) {
832
/* trailing byte in leading position */
833
*outlen = out - outstart;
834
*inlen = processed - instart;
835
return(XML_ENC_ERR_INPUT);
836
} else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
837
else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
838
else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
839
else {
840
/* no chance for this in UTF-16 */
841
*outlen = out - outstart;
842
*inlen = processed - instart;
843
return(XML_ENC_ERR_INPUT);
844
}
845
846
if (inend - in < trailing) {
847
break;
848
}
849
850
for ( ; trailing; trailing--) {
851
if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break;
852
c <<= 6;
853
c |= d & 0x3F;
854
}
855
856
/* assertion: c is a single UTF-4 value */
857
if (c < 0x10000) {
858
if (out >= outend) break;
859
if (xmlLittleEndian) {
860
tmp = (unsigned char *) out;
861
*tmp = c >> 8;
862
*(tmp + 1) = (unsigned char) c; /* Explicit truncation */
863
out++;
864
} else {
865
*out++ = c;
866
}
867
}
868
else if (c < 0x110000) {
869
if (out+1 >= outend) break;
870
c -= 0x10000;
871
if (xmlLittleEndian) {
872
tmp1 = 0xD800 | (c >> 10);
873
tmp = (unsigned char *) out;
874
*tmp = tmp1 >> 8;
875
*(tmp + 1) = (unsigned char) tmp1; /* Explicit truncation */
876
out++;
877
878
tmp2 = 0xDC00 | (c & 0x03FF);
879
tmp = (unsigned char *) out;
880
*tmp = tmp2 >> 8;
881
*(tmp + 1) = (unsigned char) tmp2; /* Explicit truncation */
882
out++;
883
} else {
884
*out++ = 0xD800 | (c >> 10);
885
*out++ = 0xDC00 | (c & 0x03FF);
886
}
887
}
888
else
889
break;
890
processed = in;
891
}
892
*outlen = (out - outstart) * 2;
893
*inlen = processed - instart;
894
return(*outlen);
895
}
896
#endif /* LIBXML_OUTPUT_ENABLED */
897
898
/************************************************************************
899
* *
900
* Generic encoding handling routines *
901
* *
902
************************************************************************/
903
904
/**
905
* xmlDetectCharEncoding:
906
* @in: a pointer to the first bytes of the XML entity, must be at least
907
* 2 bytes long (at least 4 if encoding is UTF4 variant).
908
* @len: pointer to the length of the buffer
909
*
910
* Guess the encoding of the entity using the first bytes of the entity content
911
* according to the non-normative appendix F of the XML-1.0 recommendation.
912
*
913
* Returns one of the XML_CHAR_ENCODING_... values.
914
*/
915
xmlCharEncoding
916
xmlDetectCharEncoding(const unsigned char* in, int len)
917
{
918
if (in == NULL)
919
return(XML_CHAR_ENCODING_NONE);
920
if (len >= 4) {
921
if ((in[0] == 0x00) && (in[1] == 0x00) &&
922
(in[2] == 0x00) && (in[3] == 0x3C))
923
return(XML_CHAR_ENCODING_UCS4BE);
924
if ((in[0] == 0x3C) && (in[1] == 0x00) &&
925
(in[2] == 0x00) && (in[3] == 0x00))
926
return(XML_CHAR_ENCODING_UCS4LE);
927
if ((in[0] == 0x00) && (in[1] == 0x00) &&
928
(in[2] == 0x3C) && (in[3] == 0x00))
929
return(XML_CHAR_ENCODING_UCS4_2143);
930
if ((in[0] == 0x00) && (in[1] == 0x3C) &&
931
(in[2] == 0x00) && (in[3] == 0x00))
932
return(XML_CHAR_ENCODING_UCS4_3412);
933
if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
934
(in[2] == 0xA7) && (in[3] == 0x94))
935
return(XML_CHAR_ENCODING_EBCDIC);
936
if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
937
(in[2] == 0x78) && (in[3] == 0x6D))
938
return(XML_CHAR_ENCODING_UTF8);
939
/*
940
* Although not part of the recommendation, we also
941
* attempt an "auto-recognition" of UTF-16LE and
942
* UTF-16BE encodings.
943
*/
944
if ((in[0] == 0x3C) && (in[1] == 0x00) &&
945
(in[2] == 0x3F) && (in[3] == 0x00))
946
return(XML_CHAR_ENCODING_UTF16LE);
947
if ((in[0] == 0x00) && (in[1] == 0x3C) &&
948
(in[2] == 0x00) && (in[3] == 0x3F))
949
return(XML_CHAR_ENCODING_UTF16BE);
950
}
951
if (len >= 3) {
952
/*
953
* Errata on XML-1.0 June 20 2001
954
* We now allow an UTF8 encoded BOM
955
*/
956
if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
957
(in[2] == 0xBF))
958
return(XML_CHAR_ENCODING_UTF8);
959
}
960
/* For UTF-16 we can recognize by the BOM */
961
if (len >= 2) {
962
if ((in[0] == 0xFE) && (in[1] == 0xFF))
963
return(XML_CHAR_ENCODING_UTF16BE);
964
if ((in[0] == 0xFF) && (in[1] == 0xFE))
965
return(XML_CHAR_ENCODING_UTF16LE);
966
}
967
return(XML_CHAR_ENCODING_NONE);
968
}
969
970
/**
971
* xmlCleanupEncodingAliases:
972
*
973
* Unregisters all aliases
974
*/
975
void
976
xmlCleanupEncodingAliases(void) {
977
int i;
978
979
if (xmlCharEncodingAliases == NULL)
980
return;
981
982
for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
983
if (xmlCharEncodingAliases[i].name != NULL)
984
xmlFree((char *) xmlCharEncodingAliases[i].name);
985
if (xmlCharEncodingAliases[i].alias != NULL)
986
xmlFree((char *) xmlCharEncodingAliases[i].alias);
987
}
988
xmlCharEncodingAliasesNb = 0;
989
xmlCharEncodingAliasesMax = 0;
990
xmlFree(xmlCharEncodingAliases);
991
xmlCharEncodingAliases = NULL;
992
}
993
994
/**
995
* xmlGetEncodingAlias:
996
* @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
997
*
998
* Lookup an encoding name for the given alias.
999
*
1000
* Returns NULL if not found, otherwise the original name
1001
*/
1002
const char *
1003
xmlGetEncodingAlias(const char *alias) {
1004
int i;
1005
char upper[100];
1006
1007
if (alias == NULL)
1008
return(NULL);
1009
1010
if (xmlCharEncodingAliases == NULL)
1011
return(NULL);
1012
1013
for (i = 0;i < 99;i++) {
1014
upper[i] = (char) toupper((unsigned char) alias[i]);
1015
if (upper[i] == 0) break;
1016
}
1017
upper[i] = 0;
1018
1019
/*
1020
* Walk down the list looking for a definition of the alias
1021
*/
1022
for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1023
if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1024
return(xmlCharEncodingAliases[i].name);
1025
}
1026
}
1027
return(NULL);
1028
}
1029
1030
/**
1031
* xmlAddEncodingAlias:
1032
* @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1033
* @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1034
*
1035
* Registers an alias @alias for an encoding named @name. Existing alias
1036
* will be overwritten.
1037
*
1038
* Returns 0 in case of success, -1 in case of error
1039
*/
1040
int
1041
xmlAddEncodingAlias(const char *name, const char *alias) {
1042
int i;
1043
char upper[100];
1044
char *nameCopy, *aliasCopy;
1045
1046
if ((name == NULL) || (alias == NULL))
1047
return(-1);
1048
1049
for (i = 0;i < 99;i++) {
1050
upper[i] = (char) toupper((unsigned char) alias[i]);
1051
if (upper[i] == 0) break;
1052
}
1053
upper[i] = 0;
1054
1055
if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1056
xmlCharEncodingAliasPtr tmp;
1057
size_t newSize = xmlCharEncodingAliasesMax ?
1058
xmlCharEncodingAliasesMax * 2 :
1059
20;
1060
1061
tmp = (xmlCharEncodingAliasPtr)
1062
xmlRealloc(xmlCharEncodingAliases,
1063
newSize * sizeof(xmlCharEncodingAlias));
1064
if (tmp == NULL)
1065
return(-1);
1066
xmlCharEncodingAliases = tmp;
1067
xmlCharEncodingAliasesMax = newSize;
1068
}
1069
1070
/*
1071
* Walk down the list looking for a definition of the alias
1072
*/
1073
for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1074
if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1075
/*
1076
* Replace the definition.
1077
*/
1078
nameCopy = xmlMemStrdup(name);
1079
if (nameCopy == NULL)
1080
return(-1);
1081
xmlFree((char *) xmlCharEncodingAliases[i].name);
1082
xmlCharEncodingAliases[i].name = nameCopy;
1083
return(0);
1084
}
1085
}
1086
/*
1087
* Add the definition
1088
*/
1089
nameCopy = xmlMemStrdup(name);
1090
if (nameCopy == NULL)
1091
return(-1);
1092
aliasCopy = xmlMemStrdup(upper);
1093
if (aliasCopy == NULL) {
1094
xmlFree(nameCopy);
1095
return(-1);
1096
}
1097
xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = nameCopy;
1098
xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = aliasCopy;
1099
xmlCharEncodingAliasesNb++;
1100
return(0);
1101
}
1102
1103
/**
1104
* xmlDelEncodingAlias:
1105
* @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1106
*
1107
* Unregisters an encoding alias @alias
1108
*
1109
* Returns 0 in case of success, -1 in case of error
1110
*/
1111
int
1112
xmlDelEncodingAlias(const char *alias) {
1113
int i;
1114
1115
if (alias == NULL)
1116
return(-1);
1117
1118
if (xmlCharEncodingAliases == NULL)
1119
return(-1);
1120
/*
1121
* Walk down the list looking for a definition of the alias
1122
*/
1123
for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1124
if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1125
xmlFree((char *) xmlCharEncodingAliases[i].name);
1126
xmlFree((char *) xmlCharEncodingAliases[i].alias);
1127
xmlCharEncodingAliasesNb--;
1128
memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1129
sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1130
return(0);
1131
}
1132
}
1133
return(-1);
1134
}
1135
1136
/**
1137
* xmlParseCharEncoding:
1138
* @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1139
*
1140
* Compare the string to the encoding schemes already known. Note
1141
* that the comparison is case insensitive accordingly to the section
1142
* [XML] 4.3.3 Character Encoding in Entities.
1143
*
1144
* Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1145
* if not recognized.
1146
*/
1147
xmlCharEncoding
1148
xmlParseCharEncoding(const char* name)
1149
{
1150
const char *alias;
1151
char upper[500];
1152
int i;
1153
1154
if (name == NULL)
1155
return(XML_CHAR_ENCODING_NONE);
1156
1157
/*
1158
* Do the alias resolution
1159
*/
1160
alias = xmlGetEncodingAlias(name);
1161
if (alias != NULL)
1162
name = alias;
1163
1164
for (i = 0;i < 499;i++) {
1165
upper[i] = (char) toupper((unsigned char) name[i]);
1166
if (upper[i] == 0) break;
1167
}
1168
upper[i] = 0;
1169
1170
if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1171
if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1172
if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1173
1174
/*
1175
* NOTE: if we were able to parse this, the endianness of UTF16 is
1176
* already found and in use
1177
*/
1178
if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1179
if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1180
1181
if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1182
if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1183
if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1184
1185
/*
1186
* NOTE: if we were able to parse this, the endianness of UCS4 is
1187
* already found and in use
1188
*/
1189
if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1190
if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1191
if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1192
1193
1194
if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1195
if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1196
if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1197
1198
if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1199
if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1200
if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1201
1202
if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1203
if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1204
if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1205
if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1206
if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1207
if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1208
if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1209
1210
if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1211
if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1212
if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1213
1214
return(XML_CHAR_ENCODING_ERROR);
1215
}
1216
1217
/**
1218
* xmlGetCharEncodingName:
1219
* @enc: the encoding
1220
*
1221
* The "canonical" name for XML encoding.
1222
* C.f. http://www.w3.org/TR/REC-xml#charencoding
1223
* Section 4.3.3 Character Encoding in Entities
1224
*
1225
* Returns the canonical name for the given encoding
1226
*/
1227
1228
const char*
1229
xmlGetCharEncodingName(xmlCharEncoding enc) {
1230
switch (enc) {
1231
case XML_CHAR_ENCODING_ERROR:
1232
return(NULL);
1233
case XML_CHAR_ENCODING_NONE:
1234
return(NULL);
1235
case XML_CHAR_ENCODING_UTF8:
1236
return("UTF-8");
1237
case XML_CHAR_ENCODING_UTF16LE:
1238
return("UTF-16");
1239
case XML_CHAR_ENCODING_UTF16BE:
1240
return("UTF-16");
1241
case XML_CHAR_ENCODING_EBCDIC:
1242
return("EBCDIC");
1243
case XML_CHAR_ENCODING_UCS4LE:
1244
return("ISO-10646-UCS-4");
1245
case XML_CHAR_ENCODING_UCS4BE:
1246
return("ISO-10646-UCS-4");
1247
case XML_CHAR_ENCODING_UCS4_2143:
1248
return("ISO-10646-UCS-4");
1249
case XML_CHAR_ENCODING_UCS4_3412:
1250
return("ISO-10646-UCS-4");
1251
case XML_CHAR_ENCODING_UCS2:
1252
return("ISO-10646-UCS-2");
1253
case XML_CHAR_ENCODING_8859_1:
1254
return("ISO-8859-1");
1255
case XML_CHAR_ENCODING_8859_2:
1256
return("ISO-8859-2");
1257
case XML_CHAR_ENCODING_8859_3:
1258
return("ISO-8859-3");
1259
case XML_CHAR_ENCODING_8859_4:
1260
return("ISO-8859-4");
1261
case XML_CHAR_ENCODING_8859_5:
1262
return("ISO-8859-5");
1263
case XML_CHAR_ENCODING_8859_6:
1264
return("ISO-8859-6");
1265
case XML_CHAR_ENCODING_8859_7:
1266
return("ISO-8859-7");
1267
case XML_CHAR_ENCODING_8859_8:
1268
return("ISO-8859-8");
1269
case XML_CHAR_ENCODING_8859_9:
1270
return("ISO-8859-9");
1271
case XML_CHAR_ENCODING_2022_JP:
1272
return("ISO-2022-JP");
1273
case XML_CHAR_ENCODING_SHIFT_JIS:
1274
return("Shift-JIS");
1275
case XML_CHAR_ENCODING_EUC_JP:
1276
return("EUC-JP");
1277
case XML_CHAR_ENCODING_ASCII:
1278
return(NULL);
1279
}
1280
return(NULL);
1281
}
1282
1283
/************************************************************************
1284
* *
1285
* Char encoding handlers *
1286
* *
1287
************************************************************************/
1288
1289
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) && \
1290
defined(LIBXML_ISO8859X_ENABLED)
1291
1292
#define DECLARE_ISO_FUNCS(n) \
1293
static int ISO8859_##n##ToUTF8(unsigned char* out, int *outlen, \
1294
const unsigned char* in, int *inlen); \
1295
static int UTF8ToISO8859_##n(unsigned char* out, int *outlen, \
1296
const unsigned char* in, int *inlen);
1297
1298
/** DOC_DISABLE */
1299
DECLARE_ISO_FUNCS(2)
1300
DECLARE_ISO_FUNCS(3)
1301
DECLARE_ISO_FUNCS(4)
1302
DECLARE_ISO_FUNCS(5)
1303
DECLARE_ISO_FUNCS(6)
1304
DECLARE_ISO_FUNCS(7)
1305
DECLARE_ISO_FUNCS(8)
1306
DECLARE_ISO_FUNCS(9)
1307
DECLARE_ISO_FUNCS(10)
1308
DECLARE_ISO_FUNCS(11)
1309
DECLARE_ISO_FUNCS(13)
1310
DECLARE_ISO_FUNCS(14)
1311
DECLARE_ISO_FUNCS(15)
1312
DECLARE_ISO_FUNCS(16)
1313
/** DOC_ENABLE */
1314
1315
#endif /* LIBXML_ISO8859X_ENABLED */
1316
1317
#ifdef LIBXML_ICONV_ENABLED
1318
#define EMPTY_ICONV , (iconv_t) 0, (iconv_t) 0
1319
#else
1320
#define EMPTY_ICONV
1321
#endif
1322
1323
#ifdef LIBXML_ICU_ENABLED
1324
#define EMPTY_UCONV , NULL, NULL
1325
#else
1326
#define EMPTY_UCONV
1327
#endif
1328
1329
#define MAKE_HANDLER(name, in, out) \
1330
{ (char *) name, in, out EMPTY_ICONV EMPTY_UCONV }
1331
1332
static const xmlCharEncodingHandler defaultHandlers[] = {
1333
MAKE_HANDLER("UTF-8", UTF8ToUTF8, UTF8ToUTF8)
1334
#ifdef LIBXML_OUTPUT_ENABLED
1335
,MAKE_HANDLER("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE)
1336
,MAKE_HANDLER("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE)
1337
,MAKE_HANDLER("UTF-16", UTF16LEToUTF8, UTF8ToUTF16)
1338
,MAKE_HANDLER("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1)
1339
,MAKE_HANDLER("ASCII", asciiToUTF8, UTF8Toascii)
1340
,MAKE_HANDLER("US-ASCII", asciiToUTF8, UTF8Toascii)
1341
#ifdef LIBXML_HTML_ENABLED
1342
,MAKE_HANDLER("HTML", NULL, UTF8ToHtml)
1343
#endif
1344
#else
1345
,MAKE_HANDLER("UTF-16LE", UTF16LEToUTF8, NULL)
1346
,MAKE_HANDLER("UTF-16BE", UTF16BEToUTF8, NULL)
1347
,MAKE_HANDLER("UTF-16", UTF16LEToUTF8, NULL)
1348
,MAKE_HANDLER("ISO-8859-1", isolat1ToUTF8, NULL)
1349
,MAKE_HANDLER("ASCII", asciiToUTF8, NULL)
1350
,MAKE_HANDLER("US-ASCII", asciiToUTF8, NULL)
1351
#endif /* LIBXML_OUTPUT_ENABLED */
1352
1353
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) && \
1354
defined(LIBXML_ISO8859X_ENABLED)
1355
,MAKE_HANDLER("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2)
1356
,MAKE_HANDLER("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3)
1357
,MAKE_HANDLER("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4)
1358
,MAKE_HANDLER("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5)
1359
,MAKE_HANDLER("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6)
1360
,MAKE_HANDLER("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7)
1361
,MAKE_HANDLER("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8)
1362
,MAKE_HANDLER("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9)
1363
,MAKE_HANDLER("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10)
1364
,MAKE_HANDLER("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11)
1365
,MAKE_HANDLER("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13)
1366
,MAKE_HANDLER("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14)
1367
,MAKE_HANDLER("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15)
1368
,MAKE_HANDLER("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16)
1369
#endif
1370
};
1371
1372
#define NUM_DEFAULT_HANDLERS \
1373
(sizeof(defaultHandlers) / sizeof(defaultHandlers[0]))
1374
1375
static const xmlCharEncodingHandler *xmlUTF16LEHandler = &defaultHandlers[1];
1376
static const xmlCharEncodingHandler *xmlUTF16BEHandler = &defaultHandlers[2];
1377
1378
/* the size should be growable, but it's not a big deal ... */
1379
#define MAX_ENCODING_HANDLERS 50
1380
static xmlCharEncodingHandlerPtr *handlers = NULL;
1381
static int nbCharEncodingHandler = 0;
1382
1383
/**
1384
* xmlNewCharEncodingHandler:
1385
* @name: the encoding name, in UTF-8 format (ASCII actually)
1386
* @input: the xmlCharEncodingInputFunc to read that encoding
1387
* @output: the xmlCharEncodingOutputFunc to write that encoding
1388
*
1389
* Create and registers an xmlCharEncodingHandler.
1390
*
1391
* Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1392
*/
1393
xmlCharEncodingHandlerPtr
1394
xmlNewCharEncodingHandler(const char *name,
1395
xmlCharEncodingInputFunc input,
1396
xmlCharEncodingOutputFunc output) {
1397
xmlCharEncodingHandlerPtr handler;
1398
const char *alias;
1399
char upper[500];
1400
int i;
1401
char *up = NULL;
1402
1403
/*
1404
* Do the alias resolution
1405
*/
1406
alias = xmlGetEncodingAlias(name);
1407
if (alias != NULL)
1408
name = alias;
1409
1410
/*
1411
* Keep only the uppercase version of the encoding.
1412
*/
1413
if (name == NULL)
1414
return(NULL);
1415
for (i = 0;i < 499;i++) {
1416
upper[i] = (char) toupper((unsigned char) name[i]);
1417
if (upper[i] == 0) break;
1418
}
1419
upper[i] = 0;
1420
up = xmlMemStrdup(upper);
1421
if (up == NULL)
1422
return(NULL);
1423
1424
/*
1425
* allocate and fill-up an handler block.
1426
*/
1427
handler = (xmlCharEncodingHandlerPtr)
1428
xmlMalloc(sizeof(xmlCharEncodingHandler));
1429
if (handler == NULL) {
1430
xmlFree(up);
1431
return(NULL);
1432
}
1433
memset(handler, 0, sizeof(xmlCharEncodingHandler));
1434
handler->input = input;
1435
handler->output = output;
1436
handler->name = up;
1437
1438
#ifdef LIBXML_ICONV_ENABLED
1439
handler->iconv_in = NULL;
1440
handler->iconv_out = NULL;
1441
#endif
1442
#ifdef LIBXML_ICU_ENABLED
1443
handler->uconv_in = NULL;
1444
handler->uconv_out = NULL;
1445
#endif
1446
1447
/*
1448
* registers and returns the handler.
1449
*/
1450
xmlRegisterCharEncodingHandler(handler);
1451
return(handler);
1452
}
1453
1454
/**
1455
* xmlInitCharEncodingHandlers:
1456
*
1457
* DEPRECATED: Alias for xmlInitParser.
1458
*/
1459
void
1460
xmlInitCharEncodingHandlers(void) {
1461
xmlInitParser();
1462
}
1463
1464
/**
1465
* xmlInitEncodingInternal:
1466
*
1467
* Initialize the char encoding support.
1468
*/
1469
void
1470
xmlInitEncodingInternal(void) {
1471
unsigned short int tst = 0x1234;
1472
unsigned char *ptr = (unsigned char *) &tst;
1473
1474
if (*ptr == 0x12) xmlLittleEndian = 0;
1475
else xmlLittleEndian = 1;
1476
}
1477
1478
/**
1479
* xmlCleanupCharEncodingHandlers:
1480
*
1481
* DEPRECATED: This function will be made private. Call xmlCleanupParser
1482
* to free global state but see the warnings there. xmlCleanupParser
1483
* should be only called once at program exit. In most cases, you don't
1484
* have call cleanup functions at all.
1485
*
1486
* Cleanup the memory allocated for the char encoding support, it
1487
* unregisters all the encoding handlers and the aliases.
1488
*/
1489
void
1490
xmlCleanupCharEncodingHandlers(void) {
1491
xmlCleanupEncodingAliases();
1492
1493
if (handlers == NULL) return;
1494
1495
for (;nbCharEncodingHandler > 0;) {
1496
nbCharEncodingHandler--;
1497
if (handlers[nbCharEncodingHandler] != NULL) {
1498
if (handlers[nbCharEncodingHandler]->name != NULL)
1499
xmlFree(handlers[nbCharEncodingHandler]->name);
1500
xmlFree(handlers[nbCharEncodingHandler]);
1501
}
1502
}
1503
xmlFree(handlers);
1504
handlers = NULL;
1505
nbCharEncodingHandler = 0;
1506
}
1507
1508
/**
1509
* xmlRegisterCharEncodingHandler:
1510
* @handler: the xmlCharEncodingHandlerPtr handler block
1511
*
1512
* Register the char encoding handler, surprising, isn't it ?
1513
*/
1514
void
1515
xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1516
if (handler == NULL)
1517
return;
1518
if (handlers == NULL) {
1519
handlers = xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(handlers[0]));
1520
if (handlers == NULL)
1521
goto free_handler;
1522
}
1523
1524
if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS)
1525
goto free_handler;
1526
handlers[nbCharEncodingHandler++] = handler;
1527
return;
1528
1529
free_handler:
1530
if (handler != NULL) {
1531
if (handler->name != NULL) {
1532
xmlFree(handler->name);
1533
}
1534
xmlFree(handler);
1535
}
1536
}
1537
1538
/**
1539
* xmlGetCharEncodingHandler:
1540
* @enc: an xmlCharEncoding value.
1541
*
1542
* Search in the registered set the handler able to read/write that encoding.
1543
*
1544
* Returns the handler or NULL if not found
1545
*/
1546
xmlCharEncodingHandlerPtr
1547
xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1548
xmlCharEncodingHandlerPtr handler;
1549
1550
switch (enc) {
1551
case XML_CHAR_ENCODING_ERROR:
1552
return(NULL);
1553
case XML_CHAR_ENCODING_NONE:
1554
return(NULL);
1555
case XML_CHAR_ENCODING_UTF8:
1556
return(NULL);
1557
case XML_CHAR_ENCODING_UTF16LE:
1558
return((xmlCharEncodingHandlerPtr) xmlUTF16LEHandler);
1559
case XML_CHAR_ENCODING_UTF16BE:
1560
return((xmlCharEncodingHandlerPtr) xmlUTF16BEHandler);
1561
case XML_CHAR_ENCODING_EBCDIC:
1562
handler = xmlFindCharEncodingHandler("EBCDIC");
1563
if (handler != NULL) return(handler);
1564
handler = xmlFindCharEncodingHandler("ebcdic");
1565
if (handler != NULL) return(handler);
1566
handler = xmlFindCharEncodingHandler("EBCDIC-US");
1567
if (handler != NULL) return(handler);
1568
handler = xmlFindCharEncodingHandler("IBM-037");
1569
if (handler != NULL) return(handler);
1570
break;
1571
case XML_CHAR_ENCODING_UCS4BE:
1572
handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1573
if (handler != NULL) return(handler);
1574
handler = xmlFindCharEncodingHandler("UCS-4");
1575
if (handler != NULL) return(handler);
1576
handler = xmlFindCharEncodingHandler("UCS4");
1577
if (handler != NULL) return(handler);
1578
break;
1579
case XML_CHAR_ENCODING_UCS4LE:
1580
handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1581
if (handler != NULL) return(handler);
1582
handler = xmlFindCharEncodingHandler("UCS-4");
1583
if (handler != NULL) return(handler);
1584
handler = xmlFindCharEncodingHandler("UCS4");
1585
if (handler != NULL) return(handler);
1586
break;
1587
case XML_CHAR_ENCODING_UCS4_2143:
1588
break;
1589
case XML_CHAR_ENCODING_UCS4_3412:
1590
break;
1591
case XML_CHAR_ENCODING_UCS2:
1592
handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1593
if (handler != NULL) return(handler);
1594
handler = xmlFindCharEncodingHandler("UCS-2");
1595
if (handler != NULL) return(handler);
1596
handler = xmlFindCharEncodingHandler("UCS2");
1597
if (handler != NULL) return(handler);
1598
break;
1599
1600
/*
1601
* We used to keep ISO Latin encodings native in the
1602
* generated data. This led to so many problems that
1603
* this has been removed. One can still change this
1604
* back by registering no-ops encoders for those
1605
*/
1606
case XML_CHAR_ENCODING_8859_1:
1607
handler = xmlFindCharEncodingHandler("ISO-8859-1");
1608
if (handler != NULL) return(handler);
1609
break;
1610
case XML_CHAR_ENCODING_8859_2:
1611
handler = xmlFindCharEncodingHandler("ISO-8859-2");
1612
if (handler != NULL) return(handler);
1613
break;
1614
case XML_CHAR_ENCODING_8859_3:
1615
handler = xmlFindCharEncodingHandler("ISO-8859-3");
1616
if (handler != NULL) return(handler);
1617
break;
1618
case XML_CHAR_ENCODING_8859_4:
1619
handler = xmlFindCharEncodingHandler("ISO-8859-4");
1620
if (handler != NULL) return(handler);
1621
break;
1622
case XML_CHAR_ENCODING_8859_5:
1623
handler = xmlFindCharEncodingHandler("ISO-8859-5");
1624
if (handler != NULL) return(handler);
1625
break;
1626
case XML_CHAR_ENCODING_8859_6:
1627
handler = xmlFindCharEncodingHandler("ISO-8859-6");
1628
if (handler != NULL) return(handler);
1629
break;
1630
case XML_CHAR_ENCODING_8859_7:
1631
handler = xmlFindCharEncodingHandler("ISO-8859-7");
1632
if (handler != NULL) return(handler);
1633
break;
1634
case XML_CHAR_ENCODING_8859_8:
1635
handler = xmlFindCharEncodingHandler("ISO-8859-8");
1636
if (handler != NULL) return(handler);
1637
break;
1638
case XML_CHAR_ENCODING_8859_9:
1639
handler = xmlFindCharEncodingHandler("ISO-8859-9");
1640
if (handler != NULL) return(handler);
1641
break;
1642
1643
1644
case XML_CHAR_ENCODING_2022_JP:
1645
handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1646
if (handler != NULL) return(handler);
1647
break;
1648
case XML_CHAR_ENCODING_SHIFT_JIS:
1649
handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1650
if (handler != NULL) return(handler);
1651
handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1652
if (handler != NULL) return(handler);
1653
handler = xmlFindCharEncodingHandler("Shift_JIS");
1654
if (handler != NULL) return(handler);
1655
break;
1656
case XML_CHAR_ENCODING_EUC_JP:
1657
handler = xmlFindCharEncodingHandler("EUC-JP");
1658
if (handler != NULL) return(handler);
1659
break;
1660
default:
1661
break;
1662
}
1663
1664
return(NULL);
1665
}
1666
1667
/**
1668
* xmlFindCharEncodingHandler:
1669
* @name: a string describing the char encoding.
1670
*
1671
* Search in the registered set the handler able to read/write that encoding
1672
* or create a new one.
1673
*
1674
* Returns the handler or NULL if not found
1675
*/
1676
xmlCharEncodingHandlerPtr
1677
xmlFindCharEncodingHandler(const char *name) {
1678
const char *nalias;
1679
const char *norig;
1680
xmlCharEncoding alias;
1681
#ifdef LIBXML_ICONV_ENABLED
1682
xmlCharEncodingHandlerPtr enc;
1683
iconv_t icv_in, icv_out;
1684
#endif /* LIBXML_ICONV_ENABLED */
1685
#ifdef LIBXML_ICU_ENABLED
1686
xmlCharEncodingHandlerPtr encu;
1687
uconv_t *ucv_in, *ucv_out;
1688
#endif /* LIBXML_ICU_ENABLED */
1689
char upper[100];
1690
int i;
1691
1692
if (name == NULL) return(NULL);
1693
if (name[0] == 0) return(NULL);
1694
1695
/*
1696
* Do the alias resolution
1697
*/
1698
norig = name;
1699
nalias = xmlGetEncodingAlias(name);
1700
if (nalias != NULL)
1701
name = nalias;
1702
1703
/*
1704
* Check first for directly registered encoding names
1705
*/
1706
for (i = 0;i < 99;i++) {
1707
upper[i] = (char) toupper((unsigned char) name[i]);
1708
if (upper[i] == 0) break;
1709
}
1710
upper[i] = 0;
1711
1712
for (i = 0; i < (int) NUM_DEFAULT_HANDLERS; i++) {
1713
if (strcmp(upper, defaultHandlers[i].name) == 0)
1714
return((xmlCharEncodingHandlerPtr) &defaultHandlers[i]);
1715
}
1716
1717
if (handlers != NULL) {
1718
for (i = 0;i < nbCharEncodingHandler; i++) {
1719
if (!strcmp(upper, handlers[i]->name)) {
1720
return(handlers[i]);
1721
}
1722
}
1723
}
1724
1725
#ifdef LIBXML_ICONV_ENABLED
1726
/* check whether iconv can handle this */
1727
icv_in = iconv_open("UTF-8", name);
1728
icv_out = iconv_open(name, "UTF-8");
1729
if (icv_in == (iconv_t) -1) {
1730
icv_in = iconv_open("UTF-8", upper);
1731
}
1732
if (icv_out == (iconv_t) -1) {
1733
icv_out = iconv_open(upper, "UTF-8");
1734
}
1735
if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1736
enc = (xmlCharEncodingHandlerPtr)
1737
xmlMalloc(sizeof(xmlCharEncodingHandler));
1738
if (enc == NULL) {
1739
iconv_close(icv_in);
1740
iconv_close(icv_out);
1741
return(NULL);
1742
}
1743
memset(enc, 0, sizeof(xmlCharEncodingHandler));
1744
enc->name = xmlMemStrdup(name);
1745
if (enc->name == NULL) {
1746
xmlFree(enc);
1747
iconv_close(icv_in);
1748
iconv_close(icv_out);
1749
return(NULL);
1750
}
1751
enc->input = NULL;
1752
enc->output = NULL;
1753
enc->iconv_in = icv_in;
1754
enc->iconv_out = icv_out;
1755
return enc;
1756
} else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1757
if (icv_in != (iconv_t) -1)
1758
iconv_close(icv_in);
1759
else
1760
iconv_close(icv_out);
1761
}
1762
#endif /* LIBXML_ICONV_ENABLED */
1763
#ifdef LIBXML_ICU_ENABLED
1764
/* check whether icu can handle this */
1765
ucv_in = openIcuConverter(name, 1);
1766
ucv_out = openIcuConverter(name, 0);
1767
if (ucv_in != NULL && ucv_out != NULL) {
1768
encu = (xmlCharEncodingHandlerPtr)
1769
xmlMalloc(sizeof(xmlCharEncodingHandler));
1770
if (encu == NULL) {
1771
closeIcuConverter(ucv_in);
1772
closeIcuConverter(ucv_out);
1773
return(NULL);
1774
}
1775
memset(encu, 0, sizeof(xmlCharEncodingHandler));
1776
encu->name = xmlMemStrdup(name);
1777
if (encu->name == NULL) {
1778
xmlFree(encu);
1779
closeIcuConverter(ucv_in);
1780
closeIcuConverter(ucv_out);
1781
return(NULL);
1782
}
1783
encu->input = NULL;
1784
encu->output = NULL;
1785
encu->uconv_in = ucv_in;
1786
encu->uconv_out = ucv_out;
1787
return encu;
1788
} else if (ucv_in != NULL || ucv_out != NULL) {
1789
closeIcuConverter(ucv_in);
1790
closeIcuConverter(ucv_out);
1791
}
1792
#endif /* LIBXML_ICU_ENABLED */
1793
1794
/*
1795
* Fallback using the canonical names
1796
*/
1797
alias = xmlParseCharEncoding(norig);
1798
if (alias != XML_CHAR_ENCODING_ERROR) {
1799
const char* canon;
1800
canon = xmlGetCharEncodingName(alias);
1801
if ((canon != NULL) && (strcmp(name, canon))) {
1802
return(xmlFindCharEncodingHandler(canon));
1803
}
1804
}
1805
1806
/* If "none of the above", give up */
1807
return(NULL);
1808
}
1809
1810
/************************************************************************
1811
* *
1812
* ICONV based generic conversion functions *
1813
* *
1814
************************************************************************/
1815
1816
#ifdef LIBXML_ICONV_ENABLED
1817
/**
1818
* xmlIconvWrapper:
1819
* @cd: iconv converter data structure
1820
* @out: a pointer to an array of bytes to store the result
1821
* @outlen: the length of @out
1822
* @in: a pointer to an array of input bytes
1823
* @inlen: the length of @in
1824
*
1825
* Returns an XML_ENC_ERR code.
1826
*
1827
* The value of @inlen after return is the number of octets consumed
1828
* as the return value is positive, else unpredictable.
1829
* The value of @outlen after return is the number of octets produced.
1830
*/
1831
static int
1832
xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1833
const unsigned char *in, int *inlen) {
1834
size_t icv_inlen, icv_outlen;
1835
const char *icv_in = (const char *) in;
1836
char *icv_out = (char *) out;
1837
size_t ret;
1838
1839
if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1840
if (outlen != NULL) *outlen = 0;
1841
return(XML_ENC_ERR_INTERNAL);
1842
}
1843
icv_inlen = *inlen;
1844
icv_outlen = *outlen;
1845
/*
1846
* Some versions take const, other versions take non-const input.
1847
*/
1848
ret = iconv(cd, (void *) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1849
*inlen -= icv_inlen;
1850
*outlen -= icv_outlen;
1851
if (ret == (size_t) -1) {
1852
if (errno == EILSEQ)
1853
return(XML_ENC_ERR_INPUT);
1854
if (errno == E2BIG)
1855
return(XML_ENC_ERR_SPACE);
1856
if (errno == EINVAL)
1857
return(XML_ENC_ERR_PARTIAL);
1858
return(XML_ENC_ERR_INTERNAL);
1859
}
1860
return(XML_ENC_ERR_SUCCESS);
1861
}
1862
#endif /* LIBXML_ICONV_ENABLED */
1863
1864
/************************************************************************
1865
* *
1866
* ICU based generic conversion functions *
1867
* *
1868
************************************************************************/
1869
1870
#ifdef LIBXML_ICU_ENABLED
1871
/**
1872
* xmlUconvWrapper:
1873
* @cd: ICU uconverter data structure
1874
* @toUnicode : non-zero if toUnicode. 0 otherwise.
1875
* @out: a pointer to an array of bytes to store the result
1876
* @outlen: the length of @out
1877
* @in: a pointer to an array of input bytes
1878
* @inlen: the length of @in
1879
*
1880
* Returns an XML_ENC_ERR code.
1881
*
1882
* The value of @inlen after return is the number of octets consumed
1883
* as the return value is positive, else unpredictable.
1884
* The value of @outlen after return is the number of octets produced.
1885
*/
1886
static int
1887
xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1888
const unsigned char *in, int *inlen) {
1889
const char *ucv_in = (const char *) in;
1890
char *ucv_out = (char *) out;
1891
UErrorCode err = U_ZERO_ERROR;
1892
1893
if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1894
if (outlen != NULL) *outlen = 0;
1895
return(XML_ENC_ERR_INTERNAL);
1896
}
1897
1898
/*
1899
* Note that the ICU API is stateful. It can always consume a certain
1900
* amount of input even if the output buffer would overflow. The
1901
* remaining input must be processed by calling ucnv_convertEx with a
1902
* possibly empty input buffer.
1903
*
1904
* ucnv_convertEx is always called with reset and flush set to 0,
1905
* so we don't mess up the state. This should never generate
1906
* U_TRUNCATED_CHAR_FOUND errors.
1907
*
1908
* This also means that ICU xmlCharEncodingHandlers should never be
1909
* reused. It would be a lot nicer if there was a way to emulate the
1910
* stateless iconv API.
1911
*/
1912
if (toUnicode) {
1913
/* encoding => UTF-16 => UTF-8 */
1914
ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1915
&ucv_in, ucv_in + *inlen, cd->pivot_buf,
1916
&cd->pivot_source, &cd->pivot_target,
1917
cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, 0, &err);
1918
} else {
1919
/* UTF-8 => UTF-16 => encoding */
1920
ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1921
&ucv_in, ucv_in + *inlen, cd->pivot_buf,
1922
&cd->pivot_source, &cd->pivot_target,
1923
cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, 0, &err);
1924
}
1925
*inlen = ucv_in - (const char*) in;
1926
*outlen = ucv_out - (char *) out;
1927
if (U_SUCCESS(err)) {
1928
return(XML_ENC_ERR_SUCCESS);
1929
}
1930
if (err == U_BUFFER_OVERFLOW_ERROR)
1931
return(XML_ENC_ERR_SPACE);
1932
if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1933
return(XML_ENC_ERR_INPUT);
1934
return(XML_ENC_ERR_PARTIAL);
1935
}
1936
#endif /* LIBXML_ICU_ENABLED */
1937
1938
/************************************************************************
1939
* *
1940
* The real API used by libxml for on-the-fly conversion *
1941
* *
1942
************************************************************************/
1943
1944
/**
1945
* xmlEncConvertError:
1946
* @code: XML_ENC_ERR code
1947
*
1948
* Convert XML_ENC_ERR to libxml2 error codes.
1949
*/
1950
static int
1951
xmlEncConvertError(int code) {
1952
int ret;
1953
1954
switch (code) {
1955
case XML_ENC_ERR_SUCCESS:
1956
ret = XML_ERR_OK;
1957
break;
1958
case XML_ENC_ERR_INPUT:
1959
ret = XML_ERR_INVALID_ENCODING;
1960
break;
1961
case XML_ENC_ERR_MEMORY:
1962
ret = XML_ERR_NO_MEMORY;
1963
break;
1964
default:
1965
ret = XML_ERR_INTERNAL_ERROR;
1966
break;
1967
}
1968
1969
return(ret);
1970
}
1971
1972
/**
1973
* xmlEncInputChunk:
1974
* @handler: encoding handler
1975
* @out: a pointer to an array of bytes to store the result
1976
* @outlen: the length of @out
1977
* @in: a pointer to an array of input bytes
1978
* @inlen: the length of @in
1979
*
1980
* The value of @inlen after return is the number of octets consumed
1981
* as the return value is 0, else unpredictable.
1982
* The value of @outlen after return is the number of octets produced.
1983
*
1984
* Returns an XML_ENC_ERR code.
1985
*/
1986
int
1987
xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
1988
int *outlen, const unsigned char *in, int *inlen) {
1989
int ret;
1990
1991
if (handler->input != NULL) {
1992
int oldinlen = *inlen;
1993
1994
ret = handler->input(out, outlen, in, inlen);
1995
if (ret >= 0) {
1996
/*
1997
* The built-in converters don't signal XML_ENC_ERR_SPACE.
1998
*/
1999
if (*inlen < oldinlen) {
2000
if (*outlen > 0)
2001
ret = XML_ENC_ERR_SPACE;
2002
else
2003
ret = XML_ENC_ERR_PARTIAL;
2004
} else {
2005
ret = XML_ENC_ERR_SUCCESS;
2006
}
2007
}
2008
}
2009
#ifdef LIBXML_ICONV_ENABLED
2010
else if (handler->iconv_in != NULL) {
2011
ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
2012
}
2013
#endif /* LIBXML_ICONV_ENABLED */
2014
#ifdef LIBXML_ICU_ENABLED
2015
else if (handler->uconv_in != NULL) {
2016
ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen);
2017
}
2018
#endif /* LIBXML_ICU_ENABLED */
2019
else {
2020
*outlen = 0;
2021
*inlen = 0;
2022
ret = XML_ENC_ERR_INTERNAL;
2023
}
2024
2025
/* Ignore partial errors when reading. */
2026
if (ret == XML_ENC_ERR_PARTIAL)
2027
ret = XML_ENC_ERR_SUCCESS;
2028
2029
return(ret);
2030
}
2031
2032
/**
2033
* xmlEncOutputChunk:
2034
* @handler: encoding handler
2035
* @out: a pointer to an array of bytes to store the result
2036
* @outlen: the length of @out
2037
* @in: a pointer to an array of input bytes
2038
* @inlen: the length of @in
2039
*
2040
* Returns an XML_ENC_ERR code.
2041
*
2042
* The value of @inlen after return is the number of octets consumed
2043
* as the return value is 0, else unpredictable.
2044
* The value of @outlen after return is the number of octets produced.
2045
*/
2046
static int
2047
xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2048
int *outlen, const unsigned char *in, int *inlen) {
2049
int ret;
2050
2051
if (handler->output != NULL) {
2052
int oldinlen = *inlen;
2053
2054
ret = handler->output(out, outlen, in, inlen);
2055
if (ret >= 0) {
2056
/*
2057
* The built-in converters don't signal XML_ENC_ERR_SPACE.
2058
*/
2059
if (*inlen < oldinlen) {
2060
if (*outlen > 0)
2061
ret = XML_ENC_ERR_SPACE;
2062
else
2063
ret = XML_ENC_ERR_PARTIAL;
2064
} else {
2065
ret = XML_ENC_ERR_SUCCESS;
2066
}
2067
}
2068
}
2069
#ifdef LIBXML_ICONV_ENABLED
2070
else if (handler->iconv_out != NULL) {
2071
ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
2072
}
2073
#endif /* LIBXML_ICONV_ENABLED */
2074
#ifdef LIBXML_ICU_ENABLED
2075
else if (handler->uconv_out != NULL) {
2076
ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen);
2077
}
2078
#endif /* LIBXML_ICU_ENABLED */
2079
else {
2080
*outlen = 0;
2081
*inlen = 0;
2082
ret = XML_ENC_ERR_INTERNAL;
2083
}
2084
2085
/* We shouldn't generate partial sequences when writing. */
2086
if (ret == XML_ENC_ERR_PARTIAL)
2087
ret = XML_ENC_ERR_INTERNAL;
2088
2089
return(ret);
2090
}
2091
2092
/**
2093
* xmlCharEncFirstLine:
2094
* @handler: char encoding transformation data structure
2095
* @out: an xmlBuffer for the output.
2096
* @in: an xmlBuffer for the input
2097
*
2098
* DEPERECATED: Don't use.
2099
*
2100
* Returns the number of bytes written or an XML_ENC_ERR code.
2101
*/
2102
int
2103
xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2104
xmlBufferPtr in) {
2105
return(xmlCharEncInFunc(handler, out, in));
2106
}
2107
2108
/**
2109
* xmlCharEncInput:
2110
* @input: a parser input buffer
2111
*
2112
* Generic front-end for the encoding handler on parser input
2113
*
2114
* Returns the number of bytes written or an XML_ENC_ERR code.
2115
*/
2116
int
2117
xmlCharEncInput(xmlParserInputBufferPtr input)
2118
{
2119
int ret;
2120
size_t avail;
2121
size_t toconv;
2122
int c_in;
2123
int c_out;
2124
xmlBufPtr in;
2125
xmlBufPtr out;
2126
const xmlChar *inData;
2127
size_t inTotal = 0;
2128
2129
if ((input == NULL) || (input->encoder == NULL) ||
2130
(input->buffer == NULL) || (input->raw == NULL))
2131
return(XML_ENC_ERR_INTERNAL);
2132
out = input->buffer;
2133
in = input->raw;
2134
2135
toconv = xmlBufUse(in);
2136
if (toconv == 0)
2137
return (0);
2138
inData = xmlBufContent(in);
2139
inTotal = 0;
2140
2141
do {
2142
c_in = toconv > INT_MAX / 2 ? INT_MAX / 2 : toconv;
2143
2144
avail = xmlBufAvail(out);
2145
if (avail > INT_MAX)
2146
avail = INT_MAX;
2147
if (avail < 4096) {
2148
if (xmlBufGrow(out, 4096) < 0) {
2149
input->error = XML_ERR_NO_MEMORY;
2150
return(XML_ENC_ERR_MEMORY);
2151
}
2152
avail = xmlBufAvail(out);
2153
}
2154
2155
c_in = toconv;
2156
c_out = avail;
2157
ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2158
inData, &c_in);
2159
inTotal += c_in;
2160
inData += c_in;
2161
toconv -= c_in;
2162
xmlBufAddLen(out, c_out);
2163
} while (ret == XML_ENC_ERR_SPACE);
2164
2165
xmlBufShrink(in, inTotal);
2166
2167
if (input->rawconsumed > ULONG_MAX - (unsigned long)c_in)
2168
input->rawconsumed = ULONG_MAX;
2169
else
2170
input->rawconsumed += c_in;
2171
2172
if ((c_out == 0) && (ret != 0)) {
2173
if (input->error == 0)
2174
input->error = xmlEncConvertError(ret);
2175
return(ret);
2176
}
2177
2178
return (c_out);
2179
}
2180
2181
/**
2182
* xmlCharEncInFunc:
2183
* @handler: char encoding transformation data structure
2184
* @out: an xmlBuffer for the output.
2185
* @in: an xmlBuffer for the input
2186
*
2187
* Generic front-end for the encoding handler input function
2188
*
2189
* Returns the number of bytes written or an XML_ENC_ERR code.
2190
*/
2191
int
2192
xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2193
xmlBufferPtr in)
2194
{
2195
int ret;
2196
int written;
2197
int toconv;
2198
2199
if (handler == NULL)
2200
return(XML_ENC_ERR_INTERNAL);
2201
if (out == NULL)
2202
return(XML_ENC_ERR_INTERNAL);
2203
if (in == NULL)
2204
return(XML_ENC_ERR_INTERNAL);
2205
2206
toconv = in->use;
2207
if (toconv == 0)
2208
return (0);
2209
written = out->size - out->use -1; /* count '\0' */
2210
if (toconv * 2 >= written) {
2211
xmlBufferGrow(out, out->size + toconv * 2);
2212
written = out->size - out->use - 1;
2213
}
2214
ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2215
in->content, &toconv);
2216
xmlBufferShrink(in, toconv);
2217
out->use += written;
2218
out->content[out->use] = 0;
2219
2220
return (written? written : ret);
2221
}
2222
2223
#ifdef LIBXML_OUTPUT_ENABLED
2224
/**
2225
* xmlCharEncOutput:
2226
* @output: a parser output buffer
2227
* @init: is this an initialization call without data
2228
*
2229
* Generic front-end for the encoding handler on parser output
2230
* a first call with @init == 1 has to be made first to initiate the
2231
* output in case of non-stateless encoding needing to initiate their
2232
* state or the output (like the BOM in UTF16).
2233
* In case of UTF8 sequence conversion errors for the given encoder,
2234
* the content will be automatically remapped to a CharRef sequence.
2235
*
2236
* Returns the number of bytes written or an XML_ENC_ERR code.
2237
*/
2238
int
2239
xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2240
{
2241
int ret;
2242
size_t written;
2243
int writtentot = 0;
2244
size_t toconv;
2245
int c_in;
2246
int c_out;
2247
xmlBufPtr in;
2248
xmlBufPtr out;
2249
2250
if ((output == NULL) || (output->encoder == NULL) ||
2251
(output->buffer == NULL) || (output->conv == NULL))
2252
return(XML_ENC_ERR_INTERNAL);
2253
out = output->conv;
2254
in = output->buffer;
2255
2256
retry:
2257
2258
written = xmlBufAvail(out);
2259
2260
/*
2261
* First specific handling of the initialization call
2262
*/
2263
if (init) {
2264
c_in = 0;
2265
c_out = written;
2266
/* TODO: Check return value. */
2267
xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2268
NULL, &c_in);
2269
xmlBufAddLen(out, c_out);
2270
return(c_out);
2271
}
2272
2273
/*
2274
* Conversion itself.
2275
*/
2276
toconv = xmlBufUse(in);
2277
if (toconv > 64 * 1024)
2278
toconv = 64 * 1024;
2279
if (toconv * 4 >= written) {
2280
xmlBufGrow(out, toconv * 4);
2281
written = xmlBufAvail(out);
2282
}
2283
if (written > 256 * 1024)
2284
written = 256 * 1024;
2285
2286
c_in = toconv;
2287
c_out = written;
2288
ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2289
xmlBufContent(in), &c_in);
2290
xmlBufShrink(in, c_in);
2291
xmlBufAddLen(out, c_out);
2292
writtentot += c_out;
2293
2294
if (ret == XML_ENC_ERR_SPACE)
2295
goto retry;
2296
2297
/*
2298
* Attempt to handle error cases
2299
*/
2300
if (ret == XML_ENC_ERR_INPUT) {
2301
xmlChar charref[20];
2302
int len = xmlBufUse(in);
2303
xmlChar *content = xmlBufContent(in);
2304
int cur, charrefLen;
2305
2306
cur = xmlGetUTF8Char(content, &len);
2307
if (cur <= 0)
2308
goto error;
2309
2310
/*
2311
* Removes the UTF8 sequence, and replace it by a charref
2312
* and continue the transcoding phase, hoping the error
2313
* did not mangle the encoder state.
2314
*/
2315
charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2316
"&#%d;", cur);
2317
xmlBufShrink(in, len);
2318
xmlBufGrow(out, charrefLen * 4);
2319
c_out = xmlBufAvail(out);
2320
c_in = charrefLen;
2321
ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2322
charref, &c_in);
2323
if ((ret < 0) || (c_in != charrefLen)) {
2324
ret = XML_ENC_ERR_INTERNAL;
2325
goto error;
2326
}
2327
2328
xmlBufAddLen(out, c_out);
2329
writtentot += c_out;
2330
goto retry;
2331
}
2332
2333
error:
2334
if ((writtentot <= 0) && (ret != 0)) {
2335
if (output->error == 0)
2336
output->error = xmlEncConvertError(ret);
2337
return(ret);
2338
}
2339
2340
return(writtentot);
2341
}
2342
#endif
2343
2344
/**
2345
* xmlCharEncOutFunc:
2346
* @handler: char encoding transformation data structure
2347
* @out: an xmlBuffer for the output.
2348
* @in: an xmlBuffer for the input
2349
*
2350
* Generic front-end for the encoding handler output function
2351
* a first call with @in == NULL has to be made firs to initiate the
2352
* output in case of non-stateless encoding needing to initiate their
2353
* state or the output (like the BOM in UTF16).
2354
* In case of UTF8 sequence conversion errors for the given encoder,
2355
* the content will be automatically remapped to a CharRef sequence.
2356
*
2357
* Returns the number of bytes written or an XML_ENC_ERR code.
2358
*/
2359
int
2360
xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2361
xmlBufferPtr in) {
2362
int ret;
2363
int written;
2364
int writtentot = 0;
2365
int toconv;
2366
2367
if (handler == NULL) return(XML_ENC_ERR_INTERNAL);
2368
if (out == NULL) return(XML_ENC_ERR_INTERNAL);
2369
2370
retry:
2371
2372
written = out->size - out->use;
2373
2374
if (written > 0)
2375
written--; /* Gennady: count '/0' */
2376
2377
/*
2378
* First specific handling of in = NULL, i.e. the initialization call
2379
*/
2380
if (in == NULL) {
2381
toconv = 0;
2382
/* TODO: Check return value. */
2383
xmlEncOutputChunk(handler, &out->content[out->use], &written,
2384
NULL, &toconv);
2385
out->use += written;
2386
out->content[out->use] = 0;
2387
return(0);
2388
}
2389
2390
/*
2391
* Conversion itself.
2392
*/
2393
toconv = in->use;
2394
if (toconv * 4 >= written) {
2395
xmlBufferGrow(out, toconv * 4);
2396
written = out->size - out->use - 1;
2397
}
2398
ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2399
in->content, &toconv);
2400
xmlBufferShrink(in, toconv);
2401
out->use += written;
2402
writtentot += written;
2403
out->content[out->use] = 0;
2404
2405
if (ret == XML_ENC_ERR_SPACE)
2406
goto retry;
2407
2408
/*
2409
* Attempt to handle error cases
2410
*/
2411
if (ret == XML_ENC_ERR_INPUT) {
2412
xmlChar charref[20];
2413
int len = in->use;
2414
const xmlChar *utf = (const xmlChar *) in->content;
2415
int cur, charrefLen;
2416
2417
cur = xmlGetUTF8Char(utf, &len);
2418
if (cur <= 0)
2419
return(ret);
2420
2421
/*
2422
* Removes the UTF8 sequence, and replace it by a charref
2423
* and continue the transcoding phase, hoping the error
2424
* did not mangle the encoder state.
2425
*/
2426
charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2427
"&#%d;", cur);
2428
xmlBufferShrink(in, len);
2429
xmlBufferGrow(out, charrefLen * 4);
2430
written = out->size - out->use - 1;
2431
toconv = charrefLen;
2432
ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2433
charref, &toconv);
2434
if ((ret < 0) || (toconv != charrefLen))
2435
return(XML_ENC_ERR_INTERNAL);
2436
2437
out->use += written;
2438
writtentot += written;
2439
out->content[out->use] = 0;
2440
goto retry;
2441
}
2442
return(writtentot ? writtentot : ret);
2443
}
2444
2445
/**
2446
* xmlCharEncCloseFunc:
2447
* @handler: char encoding transformation data structure
2448
*
2449
* Generic front-end for encoding handler close function
2450
*
2451
* Returns 0 if success, or -1 in case of error
2452
*/
2453
int
2454
xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2455
int ret = 0;
2456
int tofree = 0;
2457
int i = 0;
2458
2459
if (handler == NULL) return(-1);
2460
2461
for (i = 0; i < (int) NUM_DEFAULT_HANDLERS; i++) {
2462
if (handler == &defaultHandlers[i])
2463
return(0);
2464
}
2465
2466
if (handlers != NULL) {
2467
for (i = 0;i < nbCharEncodingHandler; i++) {
2468
if (handler == handlers[i])
2469
return(0);
2470
}
2471
}
2472
#ifdef LIBXML_ICONV_ENABLED
2473
/*
2474
* Iconv handlers can be used only once, free the whole block.
2475
* and the associated icon resources.
2476
*/
2477
if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
2478
tofree = 1;
2479
if (handler->iconv_out != NULL) {
2480
if (iconv_close(handler->iconv_out))
2481
ret = -1;
2482
handler->iconv_out = NULL;
2483
}
2484
if (handler->iconv_in != NULL) {
2485
if (iconv_close(handler->iconv_in))
2486
ret = -1;
2487
handler->iconv_in = NULL;
2488
}
2489
}
2490
#endif /* LIBXML_ICONV_ENABLED */
2491
#ifdef LIBXML_ICU_ENABLED
2492
if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) {
2493
tofree = 1;
2494
if (handler->uconv_out != NULL) {
2495
closeIcuConverter(handler->uconv_out);
2496
handler->uconv_out = NULL;
2497
}
2498
if (handler->uconv_in != NULL) {
2499
closeIcuConverter(handler->uconv_in);
2500
handler->uconv_in = NULL;
2501
}
2502
}
2503
#endif
2504
if (tofree) {
2505
/* free up only dynamic handlers iconv/uconv */
2506
if (handler->name != NULL)
2507
xmlFree(handler->name);
2508
handler->name = NULL;
2509
xmlFree(handler);
2510
}
2511
2512
return(ret);
2513
}
2514
2515
/**
2516
* xmlByteConsumed:
2517
* @ctxt: an XML parser context
2518
*
2519
* This function provides the current index of the parser relative
2520
* to the start of the current entity. This function is computed in
2521
* bytes from the beginning starting at zero and finishing at the
2522
* size in byte of the file if parsing a file. The function is
2523
* of constant cost if the input is UTF-8 but can be costly if run
2524
* on non-UTF-8 input.
2525
*
2526
* Returns the index in bytes from the beginning of the entity or -1
2527
* in case the index could not be computed.
2528
*/
2529
long
2530
xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2531
xmlParserInputPtr in;
2532
2533
if (ctxt == NULL) return(-1);
2534
in = ctxt->input;
2535
if (in == NULL) return(-1);
2536
if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2537
unsigned int unused = 0;
2538
xmlCharEncodingHandler * handler = in->buf->encoder;
2539
/*
2540
* Encoding conversion, compute the number of unused original
2541
* bytes from the input not consumed and subtract that from
2542
* the raw consumed value, this is not a cheap operation
2543
*/
2544
if (in->end - in->cur > 0) {
2545
unsigned char convbuf[32000];
2546
const unsigned char *cur = (const unsigned char *)in->cur;
2547
int toconv = in->end - in->cur, written = 32000;
2548
2549
int ret;
2550
2551
do {
2552
toconv = in->end - cur;
2553
written = 32000;
2554
ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2555
cur, &toconv);
2556
if ((ret != XML_ENC_ERR_SUCCESS) && (ret != XML_ENC_ERR_SPACE))
2557
return(-1);
2558
unused += written;
2559
cur += toconv;
2560
} while (ret == XML_ENC_ERR_SPACE);
2561
}
2562
if (in->buf->rawconsumed < unused)
2563
return(-1);
2564
return(in->buf->rawconsumed - unused);
2565
}
2566
return(in->consumed + (in->cur - in->base));
2567
}
2568
2569
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2570
#ifdef LIBXML_ISO8859X_ENABLED
2571
2572
/**
2573
* UTF8ToISO8859x:
2574
* @out: a pointer to an array of bytes to store the result
2575
* @outlen: the length of @out
2576
* @in: a pointer to an array of UTF-8 chars
2577
* @inlen: the length of @in
2578
* @xlattable: the 2-level transcoding table
2579
*
2580
* Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2581
* block of chars out.
2582
*
2583
* Returns the number of bytes written or an XML_ENC_ERR code.
2584
*
2585
* The value of @inlen after return is the number of octets consumed
2586
* as the return value is positive, else unpredictable.
2587
* The value of @outlen after return is the number of octets consumed.
2588
*/
2589
static int
2590
UTF8ToISO8859x(unsigned char* out, int *outlen,
2591
const unsigned char* in, int *inlen,
2592
const unsigned char* const xlattable) {
2593
const unsigned char* outstart = out;
2594
const unsigned char* inend;
2595
const unsigned char* instart = in;
2596
const unsigned char* processed = in;
2597
2598
if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2599
(xlattable == NULL))
2600
return(XML_ENC_ERR_INTERNAL);
2601
if (in == NULL) {
2602
/*
2603
* initialization nothing to do
2604
*/
2605
*outlen = 0;
2606
*inlen = 0;
2607
return(0);
2608
}
2609
inend = in + (*inlen);
2610
while (in < inend) {
2611
unsigned char d = *in++;
2612
if (d < 0x80) {
2613
*out++ = d;
2614
} else if (d < 0xC0) {
2615
/* trailing byte in leading position */
2616
*outlen = out - outstart;
2617
*inlen = processed - instart;
2618
return(XML_ENC_ERR_INPUT);
2619
} else if (d < 0xE0) {
2620
unsigned char c;
2621
if (!(in < inend)) {
2622
/* trailing byte not in input buffer */
2623
*outlen = out - outstart;
2624
*inlen = processed - instart;
2625
return(XML_ENC_ERR_PARTIAL);
2626
}
2627
c = *in++;
2628
if ((c & 0xC0) != 0x80) {
2629
/* not a trailing byte */
2630
*outlen = out - outstart;
2631
*inlen = processed - instart;
2632
return(XML_ENC_ERR_INPUT);
2633
}
2634
c = c & 0x3F;
2635
d = d & 0x1F;
2636
d = xlattable [48 + c + xlattable [d] * 64];
2637
if (d == 0) {
2638
/* not in character set */
2639
*outlen = out - outstart;
2640
*inlen = processed - instart;
2641
return(XML_ENC_ERR_INPUT);
2642
}
2643
*out++ = d;
2644
} else if (d < 0xF0) {
2645
unsigned char c1;
2646
unsigned char c2;
2647
if (!(in < inend - 1)) {
2648
/* trailing bytes not in input buffer */
2649
*outlen = out - outstart;
2650
*inlen = processed - instart;
2651
return(XML_ENC_ERR_PARTIAL);
2652
}
2653
c1 = *in++;
2654
if ((c1 & 0xC0) != 0x80) {
2655
/* not a trailing byte (c1) */
2656
*outlen = out - outstart;
2657
*inlen = processed - instart;
2658
return(XML_ENC_ERR_INPUT);
2659
}
2660
c2 = *in++;
2661
if ((c2 & 0xC0) != 0x80) {
2662
/* not a trailing byte (c2) */
2663
*outlen = out - outstart;
2664
*inlen = processed - instart;
2665
return(XML_ENC_ERR_INPUT);
2666
}
2667
c1 = c1 & 0x3F;
2668
c2 = c2 & 0x3F;
2669
d = d & 0x0F;
2670
d = xlattable [48 + c2 + xlattable [48 + c1 +
2671
xlattable [32 + d] * 64] * 64];
2672
if (d == 0) {
2673
/* not in character set */
2674
*outlen = out - outstart;
2675
*inlen = processed - instart;
2676
return(XML_ENC_ERR_INPUT);
2677
}
2678
*out++ = d;
2679
} else {
2680
/* cannot transcode >= U+010000 */
2681
*outlen = out - outstart;
2682
*inlen = processed - instart;
2683
return(XML_ENC_ERR_INPUT);
2684
}
2685
processed = in;
2686
}
2687
*outlen = out - outstart;
2688
*inlen = processed - instart;
2689
return(*outlen);
2690
}
2691
2692
/**
2693
* ISO8859xToUTF8
2694
* @out: a pointer to an array of bytes to store the result
2695
* @outlen: the length of @out
2696
* @in: a pointer to an array of ISO Latin 1 chars
2697
* @inlen: the length of @in
2698
*
2699
* Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2700
* block of chars out.
2701
*
2702
* Returns the number of bytes written or an XML_ENC_ERR code.
2703
*
2704
* The value of @inlen after return is the number of octets consumed
2705
* The value of @outlen after return is the number of octets produced.
2706
*/
2707
static int
2708
ISO8859xToUTF8(unsigned char* out, int *outlen,
2709
const unsigned char* in, int *inlen,
2710
unsigned short const *unicodetable) {
2711
unsigned char* outstart = out;
2712
unsigned char* outend;
2713
const unsigned char* instart = in;
2714
const unsigned char* inend;
2715
const unsigned char* instop;
2716
unsigned int c;
2717
2718
if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2719
(in == NULL) || (unicodetable == NULL))
2720
return(XML_ENC_ERR_INTERNAL);
2721
outend = out + *outlen;
2722
inend = in + *inlen;
2723
instop = inend;
2724
2725
while ((in < inend) && (out < outend - 2)) {
2726
if (*in >= 0x80) {
2727
c = unicodetable [*in - 0x80];
2728
if (c == 0) {
2729
/* undefined code point */
2730
*outlen = out - outstart;
2731
*inlen = in - instart;
2732
return(XML_ENC_ERR_INPUT);
2733
}
2734
if (c < 0x800) {
2735
*out++ = ((c >> 6) & 0x1F) | 0xC0;
2736
*out++ = (c & 0x3F) | 0x80;
2737
} else {
2738
*out++ = ((c >> 12) & 0x0F) | 0xE0;
2739
*out++ = ((c >> 6) & 0x3F) | 0x80;
2740
*out++ = (c & 0x3F) | 0x80;
2741
}
2742
++in;
2743
}
2744
if (instop - in > outend - out) instop = in + (outend - out);
2745
while ((*in < 0x80) && (in < instop)) {
2746
*out++ = *in++;
2747
}
2748
}
2749
if ((in < inend) && (out < outend) && (*in < 0x80)) {
2750
*out++ = *in++;
2751
}
2752
if ((in < inend) && (out < outend) && (*in < 0x80)) {
2753
*out++ = *in++;
2754
}
2755
*outlen = out - outstart;
2756
*inlen = in - instart;
2757
return (*outlen);
2758
}
2759
2760
2761
/************************************************************************
2762
* Lookup tables for ISO-8859-2..ISO-8859-16 transcoding *
2763
************************************************************************/
2764
2765
static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
2766
0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2767
0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2768
0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2769
0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2770
0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
2771
0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
2772
0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
2773
0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
2774
0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
2775
0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
2776
0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
2777
0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
2778
0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
2779
0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
2780
0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
2781
0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
2782
};
2783
2784
static const unsigned char xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
2785
"\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2786
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2787
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2788
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2789
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2790
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2791
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2792
"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2793
"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2794
"\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2795
"\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2796
"\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
2797
"\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
2798
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2799
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
2800
"\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2801
"\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
2802
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2803
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2804
"\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
2805
"\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
2806
"\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
2807
"\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
2808
"\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2809
"\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
2810
"\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
2811
"\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
2812
};
2813
2814
static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
2815
0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2816
0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2817
0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2818
0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2819
0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
2820
0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
2821
0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
2822
0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
2823
0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
2824
0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2825
0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
2826
0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
2827
0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
2828
0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2829
0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
2830
0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
2831
};
2832
2833
static const unsigned char xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
2834
"\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2835
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2836
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2837
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2838
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2839
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2840
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2841
"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2842
"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2843
"\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2844
"\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
2845
"\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
2846
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
2847
"\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
2848
"\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2849
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2850
"\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
2851
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2852
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2853
"\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2854
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2855
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2856
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2857
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2858
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
2859
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
2860
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
2861
"\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2862
"\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
2863
"\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2864
"\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
2865
};
2866
2867
static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
2868
0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2869
0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2870
0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2871
0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2872
0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
2873
0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
2874
0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
2875
0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
2876
0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
2877
0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
2878
0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2879
0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
2880
0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
2881
0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
2882
0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
2883
0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
2884
};
2885
2886
static const unsigned char xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
2887
"\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
2888
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2889
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2890
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2891
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2892
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2893
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2894
"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2895
"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2896
"\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
2897
"\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2898
"\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
2899
"\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
2900
"\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
2901
"\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
2902
"\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
2903
"\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
2904
"\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
2905
"\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
2906
"\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2907
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
2908
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2909
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2910
"\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2911
"\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
2912
"\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
2913
"\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
2914
};
2915
2916
static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
2917
0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2918
0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2919
0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2920
0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2921
0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
2922
0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
2923
0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
2924
0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
2925
0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
2926
0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
2927
0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
2928
0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
2929
0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
2930
0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
2931
0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
2932
0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
2933
};
2934
2935
static const unsigned char xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
2936
"\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2937
"\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2938
"\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2939
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2940
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2941
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2942
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2943
"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2944
"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2945
"\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
2946
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2947
"\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
2948
"\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2949
"\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2950
"\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2951
"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2952
"\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
2953
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2954
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2955
"\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2956
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2957
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2958
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2959
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2960
"\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2961
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2962
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2963
};
2964
2965
static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
2966
0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2967
0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2968
0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2969
0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2970
0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
2971
0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
2972
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2973
0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
2974
0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
2975
0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
2976
0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
2977
0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2978
0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
2979
0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
2980
0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2981
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2982
};
2983
2984
static const unsigned char xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
2985
"\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2986
"\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
2987
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2988
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2989
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2990
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2991
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2992
"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2993
"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2994
"\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
2995
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2996
"\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2997
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2998
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2999
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3000
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3001
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3002
"\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3003
"\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3004
"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3005
"\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3006
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3007
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3008
};
3009
3010
static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
3011
0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3012
0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3013
0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3014
0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3015
0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3016
0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3017
0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3018
0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3019
0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3020
0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3021
0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3022
0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3023
0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3024
0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3025
0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3026
0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3027
};
3028
3029
static const unsigned char xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3030
"\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3031
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3032
"\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3033
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3034
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3035
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3036
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3037
"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3038
"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3039
"\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3040
"\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3041
"\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3042
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3043
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3044
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3045
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3046
"\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3047
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3048
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3049
"\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3050
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3051
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3052
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3053
"\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3054
"\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3055
"\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3056
"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3057
"\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3058
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3059
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3060
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3061
};
3062
3063
static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3064
0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3065
0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3066
0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3067
0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3068
0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3069
0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3070
0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3071
0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3072
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3073
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3074
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3075
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3076
0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3077
0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3078
0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3079
0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3080
};
3081
3082
static const unsigned char xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3083
"\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3084
"\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3085
"\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3086
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3087
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3088
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3089
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3090
"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3091
"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3092
"\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3093
"\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3094
"\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3095
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3096
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3097
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3098
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3099
"\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3100
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3101
"\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3102
"\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3103
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3104
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3105
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3106
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3107
"\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3108
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3109
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3110
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3111
"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3112
"\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3113
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3114
};
3115
3116
static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3117
0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3118
0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3119
0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3120
0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3121
0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3122
0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3123
0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3124
0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3125
0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3126
0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3127
0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3128
0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3129
0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3130
0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3131
0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3132
0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3133
};
3134
3135
static const unsigned char xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3136
"\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3137
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3138
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3139
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3140
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3141
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3142
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3143
"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3144
"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3145
"\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3146
"\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3147
"\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3148
"\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3149
"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3150
"\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3151
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3152
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3153
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3154
"\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3155
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3156
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3157
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3158
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3159
};
3160
3161
static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3162
0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3163
0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3164
0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3165
0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3166
0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3167
0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3168
0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3169
0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3170
0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3171
0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3172
0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3173
0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3174
0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3175
0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3176
0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3177
0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3178
};
3179
3180
static const unsigned char xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3181
"\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3182
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3183
"\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3184
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3185
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3186
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3187
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3188
"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3189
"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3190
"\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3191
"\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3192
"\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3193
"\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3194
"\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3195
"\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3196
"\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3197
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3198
"\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3199
"\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3200
"\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3201
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3202
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3203
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3204
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3205
"\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3206
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3207
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3208
"\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3209
"\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3210
"\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3211
"\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3212
};
3213
3214
static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3215
0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3216
0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3217
0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3218
0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3219
0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3220
0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3221
0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3222
0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3223
0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3224
0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3225
0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3226
0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3227
0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3228
0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3229
0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3230
0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3231
};
3232
3233
static const unsigned char xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3234
"\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3235
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3236
"\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3237
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3238
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3239
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3240
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3241
"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3242
"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3243
"\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3244
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3245
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3246
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3247
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3248
"\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3249
"\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3250
"\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3251
"\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3252
"\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3253
"\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3254
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3255
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3256
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3257
"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3258
"\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3259
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3260
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3261
};
3262
3263
static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3264
0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3265
0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3266
0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3267
0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3268
0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3269
0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3270
0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3271
0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3272
0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3273
0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3274
0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3275
0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3276
0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3277
0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3278
0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3279
0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3280
};
3281
3282
static const unsigned char xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3283
"\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3284
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3285
"\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3286
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3287
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3288
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3289
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3290
"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3291
"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3292
"\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3293
"\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3294
"\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3295
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3296
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3297
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3298
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3299
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3300
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3301
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3302
"\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3303
"\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3304
"\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3305
"\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3306
"\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3307
"\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3308
"\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3309
"\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3310
"\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3311
"\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3312
"\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3313
"\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3314
};
3315
3316
static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3317
0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3318
0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3319
0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3320
0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3321
0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3322
0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3323
0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3324
0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3325
0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3326
0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3327
0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3328
0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3329
0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3330
0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3331
0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3332
0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3333
};
3334
3335
static const unsigned char xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3336
"\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3337
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3338
"\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3339
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3340
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3341
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3342
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3343
"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3344
"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3345
"\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3346
"\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3347
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3348
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3349
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3350
"\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3351
"\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3352
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3353
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3354
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3355
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3356
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3357
"\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3358
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3359
"\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3360
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3361
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3362
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3363
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3364
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3365
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3366
"\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3367
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3368
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3369
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3370
"\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3371
"\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3372
"\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3373
"\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3374
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3375
"\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3376
"\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3377
"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3378
"\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3379
};
3380
3381
static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3382
0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3383
0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3384
0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3385
0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3386
0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3387
0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3388
0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3389
0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3390
0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3391
0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3392
0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3393
0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3394
0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3395
0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3396
0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3397
0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3398
};
3399
3400
static const unsigned char xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3401
"\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3402
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3403
"\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3404
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3405
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3406
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3407
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3408
"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3409
"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3410
"\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3411
"\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3412
"\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3413
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3414
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3415
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3416
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3417
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3418
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3419
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3420
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3421
"\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3422
"\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3423
"\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3424
"\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3425
"\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3426
"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3427
"\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3428
};
3429
3430
static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3431
0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3432
0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3433
0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3434
0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3435
0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3436
0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3437
0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3438
0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3439
0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3440
0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3441
0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3442
0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3443
0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3444
0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3445
0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3446
0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3447
};
3448
3449
static const unsigned char xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3450
"\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3451
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3452
"\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3453
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3454
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3455
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3456
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3457
"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3458
"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3459
"\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3460
"\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3461
"\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3462
"\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3463
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3464
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3465
"\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3466
"\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3467
"\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3468
"\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3469
"\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3470
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3471
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3472
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3473
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3474
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3475
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3476
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3477
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3478
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3479
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3480
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3481
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3482
"\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3483
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3484
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3485
"\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3486
"\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3487
"\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3488
"\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3489
};
3490
3491
3492
/*
3493
* auto-generated functions for ISO-8859-2 .. ISO-8859-16
3494
*/
3495
3496
static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3497
const unsigned char* in, int *inlen) {
3498
return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3499
}
3500
static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3501
const unsigned char* in, int *inlen) {
3502
return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3503
}
3504
3505
static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3506
const unsigned char* in, int *inlen) {
3507
return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3508
}
3509
static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3510
const unsigned char* in, int *inlen) {
3511
return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3512
}
3513
3514
static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3515
const unsigned char* in, int *inlen) {
3516
return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3517
}
3518
static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3519
const unsigned char* in, int *inlen) {
3520
return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3521
}
3522
3523
static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3524
const unsigned char* in, int *inlen) {
3525
return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3526
}
3527
static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3528
const unsigned char* in, int *inlen) {
3529
return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3530
}
3531
3532
static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3533
const unsigned char* in, int *inlen) {
3534
return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3535
}
3536
static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3537
const unsigned char* in, int *inlen) {
3538
return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3539
}
3540
3541
static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3542
const unsigned char* in, int *inlen) {
3543
return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3544
}
3545
static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3546
const unsigned char* in, int *inlen) {
3547
return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3548
}
3549
3550
static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3551
const unsigned char* in, int *inlen) {
3552
return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3553
}
3554
static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3555
const unsigned char* in, int *inlen) {
3556
return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3557
}
3558
3559
static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3560
const unsigned char* in, int *inlen) {
3561
return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3562
}
3563
static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3564
const unsigned char* in, int *inlen) {
3565
return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3566
}
3567
3568
static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3569
const unsigned char* in, int *inlen) {
3570
return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3571
}
3572
static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3573
const unsigned char* in, int *inlen) {
3574
return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3575
}
3576
3577
static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3578
const unsigned char* in, int *inlen) {
3579
return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3580
}
3581
static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3582
const unsigned char* in, int *inlen) {
3583
return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3584
}
3585
3586
static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3587
const unsigned char* in, int *inlen) {
3588
return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3589
}
3590
static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3591
const unsigned char* in, int *inlen) {
3592
return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3593
}
3594
3595
static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3596
const unsigned char* in, int *inlen) {
3597
return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3598
}
3599
static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3600
const unsigned char* in, int *inlen) {
3601
return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3602
}
3603
3604
static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3605
const unsigned char* in, int *inlen) {
3606
return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3607
}
3608
static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3609
const unsigned char* in, int *inlen) {
3610
return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3611
}
3612
3613
static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3614
const unsigned char* in, int *inlen) {
3615
return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3616
}
3617
static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3618
const unsigned char* in, int *inlen) {
3619
return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3620
}
3621
3622
#endif
3623
#endif
3624
3625