Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/sdl/stdlib/SDL_iconv.c
9903 views
1
/*
2
Simple DirectMedia Layer
3
Copyright (C) 1997-2025 Sam Lantinga <[email protected]>
4
5
This software is provided 'as-is', without any express or implied
6
warranty. In no event will the authors be held liable for any damages
7
arising from the use of this software.
8
9
Permission is granted to anyone to use this software for any purpose,
10
including commercial applications, and to alter it and redistribute it
11
freely, subject to the following restrictions:
12
13
1. The origin of this software must not be misrepresented; you must not
14
claim that you wrote the original software. If you use this software
15
in a product, an acknowledgment in the product documentation would be
16
appreciated but is not required.
17
2. Altered source versions must be plainly marked as such, and must not be
18
misrepresented as being the original software.
19
3. This notice may not be removed or altered from any source distribution.
20
*/
21
#include "SDL_internal.h"
22
23
// This file contains portable iconv functions for SDL
24
25
#if defined(HAVE_ICONV) && defined(HAVE_ICONV_H)
26
#ifndef SDL_USE_LIBICONV
27
// Define LIBICONV_PLUG to use iconv from the base instead of ports and avoid linker errors.
28
#define LIBICONV_PLUG 1
29
#endif
30
#include <iconv.h>
31
#include <errno.h>
32
33
SDL_COMPILE_TIME_ASSERT(iconv_t, sizeof(iconv_t) <= sizeof(SDL_iconv_t));
34
35
SDL_iconv_t SDL_iconv_open(const char *tocode, const char *fromcode)
36
{
37
return (SDL_iconv_t)((uintptr_t)iconv_open(tocode, fromcode));
38
}
39
40
int SDL_iconv_close(SDL_iconv_t cd)
41
{
42
if ((size_t)cd == SDL_ICONV_ERROR) {
43
return -1;
44
}
45
return iconv_close((iconv_t)((uintptr_t)cd));
46
}
47
48
size_t SDL_iconv(SDL_iconv_t cd,
49
const char **inbuf, size_t *inbytesleft,
50
char **outbuf, size_t *outbytesleft)
51
{
52
if ((size_t)cd == SDL_ICONV_ERROR) {
53
return SDL_ICONV_ERROR;
54
}
55
/* iconv's second parameter may or may not be `const char const *` depending on the
56
C runtime's whims. Casting to void * seems to make everyone happy, though. */
57
const size_t retCode = iconv((iconv_t)((uintptr_t)cd), (void *)inbuf, inbytesleft, outbuf, outbytesleft);
58
if (retCode == (size_t)-1) {
59
switch (errno) {
60
case E2BIG:
61
return SDL_ICONV_E2BIG;
62
case EILSEQ:
63
return SDL_ICONV_EILSEQ;
64
case EINVAL:
65
return SDL_ICONV_EINVAL;
66
default:
67
return SDL_ICONV_ERROR;
68
}
69
}
70
return retCode;
71
}
72
73
#else
74
75
/* Lots of useful information on Unicode at:
76
http://www.cl.cam.ac.uk/~mgk25/unicode.html
77
*/
78
79
#define UNICODE_BOM 0xFEFF
80
81
#define UNKNOWN_ASCII '?'
82
#define UNKNOWN_UNICODE 0xFFFD
83
84
enum
85
{
86
ENCODING_UNKNOWN,
87
ENCODING_ASCII,
88
ENCODING_LATIN1,
89
ENCODING_UTF8,
90
ENCODING_UTF16, // Needs byte order marker
91
ENCODING_UTF16BE,
92
ENCODING_UTF16LE,
93
ENCODING_UTF32, // Needs byte order marker
94
ENCODING_UTF32BE,
95
ENCODING_UTF32LE,
96
ENCODING_UCS2BE,
97
ENCODING_UCS2LE,
98
ENCODING_UCS4BE,
99
ENCODING_UCS4LE,
100
};
101
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
102
#define ENCODING_UTF16NATIVE ENCODING_UTF16BE
103
#define ENCODING_UTF32NATIVE ENCODING_UTF32BE
104
#define ENCODING_UCS2NATIVE ENCODING_UCS2BE
105
#define ENCODING_UCS4NATIVE ENCODING_UCS4BE
106
#else
107
#define ENCODING_UTF16NATIVE ENCODING_UTF16LE
108
#define ENCODING_UTF32NATIVE ENCODING_UTF32LE
109
#define ENCODING_UCS2NATIVE ENCODING_UCS2LE
110
#define ENCODING_UCS4NATIVE ENCODING_UCS4LE
111
#endif
112
113
struct SDL_iconv_data_t
114
{
115
int src_fmt;
116
int dst_fmt;
117
};
118
119
static struct
120
{
121
const char *name;
122
int format;
123
} encodings[] = {
124
/* *INDENT-OFF* */ // clang-format off
125
{ "ASCII", ENCODING_ASCII },
126
{ "US-ASCII", ENCODING_ASCII },
127
{ "8859-1", ENCODING_LATIN1 },
128
{ "ISO-8859-1", ENCODING_LATIN1 },
129
#if defined(SDL_PLATFORM_WINDOWS) || defined(SDL_PLATFORM_OS2)
130
{ "WCHAR_T", ENCODING_UTF16LE },
131
#else
132
{ "WCHAR_T", ENCODING_UCS4NATIVE },
133
#endif
134
{ "UTF8", ENCODING_UTF8 },
135
{ "UTF-8", ENCODING_UTF8 },
136
{ "UTF16", ENCODING_UTF16 },
137
{ "UTF-16", ENCODING_UTF16 },
138
{ "UTF16BE", ENCODING_UTF16BE },
139
{ "UTF-16BE", ENCODING_UTF16BE },
140
{ "UTF16LE", ENCODING_UTF16LE },
141
{ "UTF-16LE", ENCODING_UTF16LE },
142
{ "UTF32", ENCODING_UTF32 },
143
{ "UTF-32", ENCODING_UTF32 },
144
{ "UTF32BE", ENCODING_UTF32BE },
145
{ "UTF-32BE", ENCODING_UTF32BE },
146
{ "UTF32LE", ENCODING_UTF32LE },
147
{ "UTF-32LE", ENCODING_UTF32LE },
148
{ "UCS2", ENCODING_UCS2BE },
149
{ "UCS-2", ENCODING_UCS2BE },
150
{ "UCS-2LE", ENCODING_UCS2LE },
151
{ "UCS-2BE", ENCODING_UCS2BE },
152
{ "UCS-2-INTERNAL", ENCODING_UCS2NATIVE },
153
{ "UCS4", ENCODING_UCS4BE },
154
{ "UCS-4", ENCODING_UCS4BE },
155
{ "UCS-4LE", ENCODING_UCS4LE },
156
{ "UCS-4BE", ENCODING_UCS4BE },
157
{ "UCS-4-INTERNAL", ENCODING_UCS4NATIVE },
158
/* *INDENT-ON* */ // clang-format on
159
};
160
161
static const char *getlocale(char *buffer, size_t bufsize)
162
{
163
const char *lang;
164
char *ptr;
165
166
lang = SDL_getenv("LC_ALL");
167
if (!lang) {
168
lang = SDL_getenv("LC_CTYPE");
169
}
170
if (!lang) {
171
lang = SDL_getenv("LC_MESSAGES");
172
}
173
if (!lang) {
174
lang = SDL_getenv("LANG");
175
}
176
if (!lang || !*lang || SDL_strcmp(lang, "C") == 0) {
177
lang = "ASCII";
178
}
179
180
// We need to trim down strings like "en_US.UTF-8@blah" to "UTF-8"
181
ptr = SDL_strchr(lang, '.');
182
if (ptr) {
183
lang = ptr + 1;
184
}
185
186
SDL_strlcpy(buffer, lang, bufsize);
187
ptr = SDL_strchr(buffer, '@');
188
if (ptr) {
189
*ptr = '\0'; // chop end of string.
190
}
191
192
return buffer;
193
}
194
195
SDL_iconv_t SDL_iconv_open(const char *tocode, const char *fromcode)
196
{
197
int src_fmt = ENCODING_UNKNOWN;
198
int dst_fmt = ENCODING_UNKNOWN;
199
int i;
200
char fromcode_buffer[64];
201
char tocode_buffer[64];
202
203
if (!fromcode || !*fromcode) {
204
fromcode = getlocale(fromcode_buffer, sizeof(fromcode_buffer));
205
}
206
if (!tocode || !*tocode) {
207
tocode = getlocale(tocode_buffer, sizeof(tocode_buffer));
208
}
209
for (i = 0; i < SDL_arraysize(encodings); ++i) {
210
if (SDL_strcasecmp(fromcode, encodings[i].name) == 0) {
211
src_fmt = encodings[i].format;
212
if (dst_fmt != ENCODING_UNKNOWN) {
213
break;
214
}
215
}
216
if (SDL_strcasecmp(tocode, encodings[i].name) == 0) {
217
dst_fmt = encodings[i].format;
218
if (src_fmt != ENCODING_UNKNOWN) {
219
break;
220
}
221
}
222
}
223
if (src_fmt != ENCODING_UNKNOWN && dst_fmt != ENCODING_UNKNOWN) {
224
SDL_iconv_t cd = (SDL_iconv_t)SDL_malloc(sizeof(*cd));
225
if (cd) {
226
cd->src_fmt = src_fmt;
227
cd->dst_fmt = dst_fmt;
228
return cd;
229
}
230
}
231
return (SDL_iconv_t)-1;
232
}
233
234
size_t SDL_iconv(SDL_iconv_t cd,
235
const char **inbuf, size_t *inbytesleft,
236
char **outbuf, size_t *outbytesleft)
237
{
238
// For simplicity, we'll convert everything to and from UCS-4
239
const char *src;
240
char *dst;
241
size_t srclen, dstlen;
242
Uint32 ch = 0;
243
size_t total;
244
245
if ((size_t)cd == SDL_ICONV_ERROR) {
246
return SDL_ICONV_ERROR;
247
}
248
if (!inbuf || !*inbuf) {
249
// Reset the context
250
return 0;
251
}
252
if (!outbuf || !*outbuf || !outbytesleft || !*outbytesleft) {
253
return SDL_ICONV_E2BIG;
254
}
255
src = *inbuf;
256
srclen = (inbytesleft ? *inbytesleft : 0);
257
dst = *outbuf;
258
dstlen = *outbytesleft;
259
260
switch (cd->src_fmt) {
261
case ENCODING_UTF16:
262
// Scan for a byte order marker
263
{
264
Uint8 *p = (Uint8 *)src;
265
size_t n = srclen / 2;
266
while (n) {
267
if (p[0] == 0xFF && p[1] == 0xFE) {
268
cd->src_fmt = ENCODING_UTF16BE;
269
break;
270
} else if (p[0] == 0xFE && p[1] == 0xFF) {
271
cd->src_fmt = ENCODING_UTF16LE;
272
break;
273
}
274
p += 2;
275
--n;
276
}
277
if (n == 0) {
278
// We can't tell, default to host order
279
cd->src_fmt = ENCODING_UTF16NATIVE;
280
}
281
}
282
break;
283
case ENCODING_UTF32:
284
// Scan for a byte order marker
285
{
286
Uint8 *p = (Uint8 *)src;
287
size_t n = srclen / 4;
288
while (n) {
289
if (p[0] == 0xFF && p[1] == 0xFE &&
290
p[2] == 0x00 && p[3] == 0x00) {
291
cd->src_fmt = ENCODING_UTF32BE;
292
break;
293
} else if (p[0] == 0x00 && p[1] == 0x00 &&
294
p[2] == 0xFE && p[3] == 0xFF) {
295
cd->src_fmt = ENCODING_UTF32LE;
296
break;
297
}
298
p += 4;
299
--n;
300
}
301
if (n == 0) {
302
// We can't tell, default to host order
303
cd->src_fmt = ENCODING_UTF32NATIVE;
304
}
305
}
306
break;
307
}
308
309
switch (cd->dst_fmt) {
310
case ENCODING_UTF16:
311
// Default to host order, need to add byte order marker
312
if (dstlen < 2) {
313
return SDL_ICONV_E2BIG;
314
}
315
*(Uint16 *)dst = UNICODE_BOM;
316
dst += 2;
317
dstlen -= 2;
318
cd->dst_fmt = ENCODING_UTF16NATIVE;
319
break;
320
case ENCODING_UTF32:
321
// Default to host order, need to add byte order marker
322
if (dstlen < 4) {
323
return SDL_ICONV_E2BIG;
324
}
325
*(Uint32 *)dst = UNICODE_BOM;
326
dst += 4;
327
dstlen -= 4;
328
cd->dst_fmt = ENCODING_UTF32NATIVE;
329
break;
330
}
331
332
total = 0;
333
while (srclen > 0) {
334
// Decode a character
335
switch (cd->src_fmt) {
336
case ENCODING_ASCII:
337
{
338
Uint8 *p = (Uint8 *)src;
339
ch = (Uint32)(p[0] & 0x7F);
340
++src;
341
--srclen;
342
} break;
343
case ENCODING_LATIN1:
344
{
345
Uint8 *p = (Uint8 *)src;
346
ch = (Uint32)p[0];
347
++src;
348
--srclen;
349
} break;
350
case ENCODING_UTF8: // RFC 3629
351
{
352
Uint8 *p = (Uint8 *)src;
353
size_t left = 0;
354
bool overlong = false;
355
if (p[0] >= 0xF0) {
356
if ((p[0] & 0xF8) != 0xF0) {
357
/* Skip illegal sequences
358
return SDL_ICONV_EILSEQ;
359
*/
360
ch = UNKNOWN_UNICODE;
361
} else {
362
if (p[0] == 0xF0 && srclen > 1 && (p[1] & 0xF0) == 0x80) {
363
overlong = true;
364
}
365
ch = (Uint32)(p[0] & 0x07);
366
left = 3;
367
}
368
} else if (p[0] >= 0xE0) {
369
if ((p[0] & 0xF0) != 0xE0) {
370
/* Skip illegal sequences
371
return SDL_ICONV_EILSEQ;
372
*/
373
ch = UNKNOWN_UNICODE;
374
} else {
375
if (p[0] == 0xE0 && srclen > 1 && (p[1] & 0xE0) == 0x80) {
376
overlong = true;
377
}
378
ch = (Uint32)(p[0] & 0x0F);
379
left = 2;
380
}
381
} else if (p[0] >= 0xC0) {
382
if ((p[0] & 0xE0) != 0xC0) {
383
/* Skip illegal sequences
384
return SDL_ICONV_EILSEQ;
385
*/
386
ch = UNKNOWN_UNICODE;
387
} else {
388
if ((p[0] & 0xDE) == 0xC0) {
389
overlong = true;
390
}
391
ch = (Uint32)(p[0] & 0x1F);
392
left = 1;
393
}
394
} else {
395
if (p[0] & 0x80) {
396
/* Skip illegal sequences
397
return SDL_ICONV_EILSEQ;
398
*/
399
ch = UNKNOWN_UNICODE;
400
} else {
401
ch = (Uint32)p[0];
402
}
403
}
404
++src;
405
--srclen;
406
if (srclen < left) {
407
return SDL_ICONV_EINVAL;
408
}
409
while (left--) {
410
++p;
411
if ((p[0] & 0xC0) != 0x80) {
412
/* Skip illegal sequences
413
return SDL_ICONV_EILSEQ;
414
*/
415
ch = UNKNOWN_UNICODE;
416
break;
417
}
418
ch <<= 6;
419
ch |= (p[0] & 0x3F);
420
++src;
421
--srclen;
422
}
423
if (overlong) {
424
/* Potential security risk
425
return SDL_ICONV_EILSEQ;
426
*/
427
ch = UNKNOWN_UNICODE;
428
}
429
if ((ch >= 0xD800 && ch <= 0xDFFF) ||
430
(ch == 0xFFFE || ch == 0xFFFF) || ch > 0x10FFFF) {
431
/* Skip illegal sequences
432
return SDL_ICONV_EILSEQ;
433
*/
434
ch = UNKNOWN_UNICODE;
435
}
436
} break;
437
case ENCODING_UTF16BE: // RFC 2781
438
{
439
Uint8 *p = (Uint8 *)src;
440
Uint16 W1, W2;
441
if (srclen < 2) {
442
return SDL_ICONV_EINVAL;
443
}
444
W1 = ((Uint16)p[0] << 8) | (Uint16)p[1];
445
src += 2;
446
srclen -= 2;
447
if (W1 < 0xD800 || W1 > 0xDFFF) {
448
ch = (Uint32)W1;
449
break;
450
}
451
if (W1 > 0xDBFF) {
452
/* Skip illegal sequences
453
return SDL_ICONV_EILSEQ;
454
*/
455
ch = UNKNOWN_UNICODE;
456
break;
457
}
458
if (srclen < 2) {
459
return SDL_ICONV_EINVAL;
460
}
461
p = (Uint8 *)src;
462
W2 = ((Uint16)p[0] << 8) | (Uint16)p[1];
463
src += 2;
464
srclen -= 2;
465
if (W2 < 0xDC00 || W2 > 0xDFFF) {
466
/* Skip illegal sequences
467
return SDL_ICONV_EILSEQ;
468
*/
469
ch = UNKNOWN_UNICODE;
470
break;
471
}
472
ch = (((Uint32)(W1 & 0x3FF) << 10) |
473
(Uint32)(W2 & 0x3FF)) +
474
0x10000;
475
} break;
476
case ENCODING_UTF16LE: // RFC 2781
477
{
478
Uint8 *p = (Uint8 *)src;
479
Uint16 W1, W2;
480
if (srclen < 2) {
481
return SDL_ICONV_EINVAL;
482
}
483
W1 = ((Uint16)p[1] << 8) | (Uint16)p[0];
484
src += 2;
485
srclen -= 2;
486
if (W1 < 0xD800 || W1 > 0xDFFF) {
487
ch = (Uint32)W1;
488
break;
489
}
490
if (W1 > 0xDBFF) {
491
/* Skip illegal sequences
492
return SDL_ICONV_EILSEQ;
493
*/
494
ch = UNKNOWN_UNICODE;
495
break;
496
}
497
if (srclen < 2) {
498
return SDL_ICONV_EINVAL;
499
}
500
p = (Uint8 *)src;
501
W2 = ((Uint16)p[1] << 8) | (Uint16)p[0];
502
src += 2;
503
srclen -= 2;
504
if (W2 < 0xDC00 || W2 > 0xDFFF) {
505
/* Skip illegal sequences
506
return SDL_ICONV_EILSEQ;
507
*/
508
ch = UNKNOWN_UNICODE;
509
break;
510
}
511
ch = (((Uint32)(W1 & 0x3FF) << 10) |
512
(Uint32)(W2 & 0x3FF)) +
513
0x10000;
514
} break;
515
case ENCODING_UCS2LE:
516
{
517
Uint8 *p = (Uint8 *)src;
518
if (srclen < 2) {
519
return SDL_ICONV_EINVAL;
520
}
521
ch = ((Uint32)p[1] << 8) | (Uint32)p[0];
522
src += 2;
523
srclen -= 2;
524
} break;
525
case ENCODING_UCS2BE:
526
{
527
Uint8 *p = (Uint8 *)src;
528
if (srclen < 2) {
529
return SDL_ICONV_EINVAL;
530
}
531
ch = ((Uint32)p[0] << 8) | (Uint32)p[1];
532
src += 2;
533
srclen -= 2;
534
} break;
535
case ENCODING_UCS4BE:
536
case ENCODING_UTF32BE:
537
{
538
Uint8 *p = (Uint8 *)src;
539
if (srclen < 4) {
540
return SDL_ICONV_EINVAL;
541
}
542
ch = ((Uint32)p[0] << 24) |
543
((Uint32)p[1] << 16) |
544
((Uint32)p[2] << 8) | (Uint32)p[3];
545
src += 4;
546
srclen -= 4;
547
} break;
548
case ENCODING_UCS4LE:
549
case ENCODING_UTF32LE:
550
{
551
Uint8 *p = (Uint8 *)src;
552
if (srclen < 4) {
553
return SDL_ICONV_EINVAL;
554
}
555
ch = ((Uint32)p[3] << 24) |
556
((Uint32)p[2] << 16) |
557
((Uint32)p[1] << 8) | (Uint32)p[0];
558
src += 4;
559
srclen -= 4;
560
} break;
561
}
562
563
// Encode a character
564
switch (cd->dst_fmt) {
565
case ENCODING_ASCII:
566
{
567
Uint8 *p = (Uint8 *)dst;
568
if (dstlen < 1) {
569
return SDL_ICONV_E2BIG;
570
}
571
if (ch > 0x7F) {
572
*p = UNKNOWN_ASCII;
573
} else {
574
*p = (Uint8)ch;
575
}
576
++dst;
577
--dstlen;
578
} break;
579
case ENCODING_LATIN1:
580
{
581
Uint8 *p = (Uint8 *)dst;
582
if (dstlen < 1) {
583
return SDL_ICONV_E2BIG;
584
}
585
if (ch > 0xFF) {
586
*p = UNKNOWN_ASCII;
587
} else {
588
*p = (Uint8)ch;
589
}
590
++dst;
591
--dstlen;
592
} break;
593
case ENCODING_UTF8: // RFC 3629
594
{
595
Uint8 *p = (Uint8 *)dst;
596
if (ch > 0x10FFFF) {
597
ch = UNKNOWN_UNICODE;
598
}
599
if (ch <= 0x7F) {
600
if (dstlen < 1) {
601
return SDL_ICONV_E2BIG;
602
}
603
*p = (Uint8)ch;
604
++dst;
605
--dstlen;
606
} else if (ch <= 0x7FF) {
607
if (dstlen < 2) {
608
return SDL_ICONV_E2BIG;
609
}
610
p[0] = 0xC0 | (Uint8)((ch >> 6) & 0x1F);
611
p[1] = 0x80 | (Uint8)(ch & 0x3F);
612
dst += 2;
613
dstlen -= 2;
614
} else if (ch <= 0xFFFF) {
615
if (dstlen < 3) {
616
return SDL_ICONV_E2BIG;
617
}
618
p[0] = 0xE0 | (Uint8)((ch >> 12) & 0x0F);
619
p[1] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
620
p[2] = 0x80 | (Uint8)(ch & 0x3F);
621
dst += 3;
622
dstlen -= 3;
623
} else {
624
if (dstlen < 4) {
625
return SDL_ICONV_E2BIG;
626
}
627
p[0] = 0xF0 | (Uint8)((ch >> 18) & 0x07);
628
p[1] = 0x80 | (Uint8)((ch >> 12) & 0x3F);
629
p[2] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
630
p[3] = 0x80 | (Uint8)(ch & 0x3F);
631
dst += 4;
632
dstlen -= 4;
633
}
634
} break;
635
case ENCODING_UTF16BE: // RFC 2781
636
{
637
Uint8 *p = (Uint8 *)dst;
638
if (ch > 0x10FFFF) {
639
ch = UNKNOWN_UNICODE;
640
}
641
if (ch < 0x10000) {
642
if (dstlen < 2) {
643
return SDL_ICONV_E2BIG;
644
}
645
p[0] = (Uint8)(ch >> 8);
646
p[1] = (Uint8)ch;
647
dst += 2;
648
dstlen -= 2;
649
} else {
650
Uint16 W1, W2;
651
if (dstlen < 4) {
652
return SDL_ICONV_E2BIG;
653
}
654
ch = ch - 0x10000;
655
W1 = 0xD800 | (Uint16)((ch >> 10) & 0x3FF);
656
W2 = 0xDC00 | (Uint16)(ch & 0x3FF);
657
p[0] = (Uint8)(W1 >> 8);
658
p[1] = (Uint8)W1;
659
p[2] = (Uint8)(W2 >> 8);
660
p[3] = (Uint8)W2;
661
dst += 4;
662
dstlen -= 4;
663
}
664
} break;
665
case ENCODING_UTF16LE: // RFC 2781
666
{
667
Uint8 *p = (Uint8 *)dst;
668
if (ch > 0x10FFFF) {
669
ch = UNKNOWN_UNICODE;
670
}
671
if (ch < 0x10000) {
672
if (dstlen < 2) {
673
return SDL_ICONV_E2BIG;
674
}
675
p[1] = (Uint8)(ch >> 8);
676
p[0] = (Uint8)ch;
677
dst += 2;
678
dstlen -= 2;
679
} else {
680
Uint16 W1, W2;
681
if (dstlen < 4) {
682
return SDL_ICONV_E2BIG;
683
}
684
ch = ch - 0x10000;
685
W1 = 0xD800 | (Uint16)((ch >> 10) & 0x3FF);
686
W2 = 0xDC00 | (Uint16)(ch & 0x3FF);
687
p[1] = (Uint8)(W1 >> 8);
688
p[0] = (Uint8)W1;
689
p[3] = (Uint8)(W2 >> 8);
690
p[2] = (Uint8)W2;
691
dst += 4;
692
dstlen -= 4;
693
}
694
} break;
695
case ENCODING_UCS2BE:
696
{
697
Uint8 *p = (Uint8 *)dst;
698
if (ch > 0xFFFF) {
699
ch = UNKNOWN_UNICODE;
700
}
701
if (dstlen < 2) {
702
return SDL_ICONV_E2BIG;
703
}
704
p[0] = (Uint8)(ch >> 8);
705
p[1] = (Uint8)ch;
706
dst += 2;
707
dstlen -= 2;
708
} break;
709
case ENCODING_UCS2LE:
710
{
711
Uint8 *p = (Uint8 *)dst;
712
if (ch > 0xFFFF) {
713
ch = UNKNOWN_UNICODE;
714
}
715
if (dstlen < 2) {
716
return SDL_ICONV_E2BIG;
717
}
718
p[1] = (Uint8)(ch >> 8);
719
p[0] = (Uint8)ch;
720
dst += 2;
721
dstlen -= 2;
722
} break;
723
case ENCODING_UTF32BE:
724
if (ch > 0x10FFFF) {
725
ch = UNKNOWN_UNICODE;
726
}
727
SDL_FALLTHROUGH;
728
case ENCODING_UCS4BE:
729
if (ch > 0x7FFFFFFF) {
730
ch = UNKNOWN_UNICODE;
731
}
732
{
733
Uint8 *p = (Uint8 *)dst;
734
if (dstlen < 4) {
735
return SDL_ICONV_E2BIG;
736
}
737
p[0] = (Uint8)(ch >> 24);
738
p[1] = (Uint8)(ch >> 16);
739
p[2] = (Uint8)(ch >> 8);
740
p[3] = (Uint8)ch;
741
dst += 4;
742
dstlen -= 4;
743
}
744
break;
745
case ENCODING_UTF32LE:
746
if (ch > 0x10FFFF) {
747
ch = UNKNOWN_UNICODE;
748
}
749
SDL_FALLTHROUGH;
750
case ENCODING_UCS4LE:
751
if (ch > 0x7FFFFFFF) {
752
ch = UNKNOWN_UNICODE;
753
}
754
{
755
Uint8 *p = (Uint8 *)dst;
756
if (dstlen < 4) {
757
return SDL_ICONV_E2BIG;
758
}
759
p[3] = (Uint8)(ch >> 24);
760
p[2] = (Uint8)(ch >> 16);
761
p[1] = (Uint8)(ch >> 8);
762
p[0] = (Uint8)ch;
763
dst += 4;
764
dstlen -= 4;
765
}
766
break;
767
}
768
769
// Update state
770
*inbuf = src;
771
*inbytesleft = srclen;
772
*outbuf = dst;
773
*outbytesleft = dstlen;
774
++total;
775
}
776
return total;
777
}
778
779
int SDL_iconv_close(SDL_iconv_t cd)
780
{
781
if (cd == (SDL_iconv_t)-1) {
782
return -1;
783
}
784
SDL_free(cd);
785
return 0;
786
}
787
788
#endif // !HAVE_ICONV
789
790
char *SDL_iconv_string(const char *tocode, const char *fromcode, const char *inbuf, size_t inbytesleft)
791
{
792
SDL_iconv_t cd;
793
char *string;
794
size_t stringsize;
795
char *outbuf;
796
size_t outbytesleft;
797
size_t retCode = 0;
798
799
if (!tocode || !*tocode) {
800
tocode = "UTF-8";
801
}
802
if (!fromcode || !*fromcode) {
803
fromcode = "UTF-8";
804
}
805
cd = SDL_iconv_open(tocode, fromcode);
806
if (cd == (SDL_iconv_t)-1) {
807
return NULL;
808
}
809
810
stringsize = inbytesleft;
811
string = (char *)SDL_malloc(stringsize + sizeof(Uint32));
812
if (!string) {
813
SDL_iconv_close(cd);
814
return NULL;
815
}
816
outbuf = string;
817
outbytesleft = stringsize;
818
SDL_memset(outbuf, 0, sizeof(Uint32));
819
820
while (inbytesleft > 0) {
821
const size_t oldinbytesleft = inbytesleft;
822
retCode = SDL_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
823
switch (retCode) {
824
case SDL_ICONV_E2BIG:
825
{
826
const ptrdiff_t diff = (ptrdiff_t) (outbuf - string);
827
char *oldstring = string;
828
stringsize *= 2;
829
string = (char *)SDL_realloc(string, stringsize + sizeof(Uint32));
830
if (!string) {
831
SDL_free(oldstring);
832
SDL_iconv_close(cd);
833
return NULL;
834
}
835
outbuf = string + diff;
836
outbytesleft = stringsize - diff;
837
SDL_memset(outbuf, 0, sizeof(Uint32));
838
continue;
839
}
840
case SDL_ICONV_EILSEQ:
841
// Try skipping some input data - not perfect, but...
842
++inbuf;
843
--inbytesleft;
844
break;
845
case SDL_ICONV_EINVAL:
846
case SDL_ICONV_ERROR:
847
// We can't continue...
848
inbytesleft = 0;
849
break;
850
}
851
// Avoid infinite loops when nothing gets converted
852
if (oldinbytesleft == inbytesleft) {
853
break;
854
}
855
}
856
SDL_memset(outbuf, 0, sizeof(Uint32));
857
SDL_iconv_close(cd);
858
859
return string;
860
}
861
862