Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/openjdk-multiarch-jdk8u
Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/src/share/tinyiconv/iconv.cpp
38771 views
1
/*
2
* Copyright (C) 2017 The Android Open Source Project
3
* All rights reserved.
4
*
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions
7
* are met:
8
* * Redistributions of source code must retain the above copyright
9
* notice, this list of conditions and the following disclaimer.
10
* * Redistributions in binary form must reproduce the above copyright
11
* notice, this list of conditions and the following disclaimer in
12
* the documentation and/or other materials provided with the
13
* distribution.
14
*
15
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26
* SUCH DAMAGE.
27
*/
28
29
#ifdef __ANDROID__
30
31
#include <stdint.h>
32
33
// for char16_t and char32_t
34
typedef uint32_t char32_t;
35
typedef uint16_t char16_t;
36
37
#include <ctype.h>
38
39
#include "iconv.h"
40
#include <endian.h>
41
#include <errno.h>
42
#include <stdlib.h>
43
#include <string.h>
44
#include <uchar.h>
45
46
#include "bionic_mbstate.h"
47
48
49
#ifdef __cplusplus
50
# define INVALID_ICONV_T reinterpret_cast<iconv_t>(-1)
51
#else // !__cplusplus
52
# define INVALID_ICONV_T (iconv_t)(-1)
53
#endif // __cplusplus
54
55
// Ideally we'd use icu4c but the API mismatch seems too great. So we just offer something
56
// equivalent to (but slightly easier to use for runs of text than) <uchar.h>. If you're
57
// here to add more encodings, consider working on finishing the icu4c NDK wrappers instead.
58
59
#ifdef __cplusplus
60
enum Encoding
61
#else
62
typedef enum
63
#endif // __cplusplus
64
{
65
US_ASCII,
66
UTF_8,
67
UTF_16_LE,
68
UTF_16_BE,
69
UTF_32_LE,
70
UTF_32_BE,
71
WCHAR_T,
72
#ifdef __cplusplus
73
};
74
#else
75
} Encoding;
76
#endif // __cplusplus
77
78
#ifdef __cplusplus
79
enum Mode
80
#else
81
typedef enum
82
#endif // __cplusplus
83
{
84
ERROR,
85
IGNORE,
86
TRANSLIT,
87
#ifdef __cplusplus
88
};
89
#else
90
} Mode;
91
#endif // __cplusplus
92
93
// This matching is strange but true.
94
// See http://www.unicode.org/reports/tr22/#Charset_Alias_Matching.
95
static bool __match_encoding(const char* lhs, const char* rhs) {
96
while (*lhs && *rhs) {
97
// Skip non-alnum in lhs; "UTF-8", "UTF_8", "UTF8", "UTF 8" are all equivalent.
98
// Also implement the "delete each 0 that is not preceded by a digit" rule.
99
for (; *lhs; ++lhs) {
100
if (isalnum(*lhs) && (*lhs != '0' || !isdigit(*(lhs + 1)))) break;
101
}
102
// Case doesn't matter either.
103
if (tolower(*lhs) != tolower(*rhs)) break;
104
++lhs;
105
++rhs;
106
}
107
// As a special case we treat the GNU "//" extensions as end of string.
108
if ((*lhs == '\0' || strstr(lhs, "//") == lhs) && *rhs == '\0') return true;
109
return false;
110
}
111
112
static bool __parse_encoding(const char* s, Encoding* encoding, Mode* mode) {
113
const char* suffix = strstr(s, "//");
114
if (suffix) {
115
if (!mode) return false;
116
if (strcmp(suffix, "//IGNORE") == 0) {
117
*mode = IGNORE;
118
} else if (strcmp(suffix, "//TRANSLIT") == 0) {
119
*mode = TRANSLIT;
120
} else {
121
return false;
122
}
123
}
124
if (__match_encoding(s, "utf8")) {
125
*encoding = UTF_8;
126
} else if (__match_encoding(s, "ascii") || __match_encoding(s, "usascii")) {
127
*encoding = US_ASCII;
128
} else if (__match_encoding(s, "utf16le")) {
129
*encoding = UTF_16_LE;
130
} else if (__match_encoding(s, "utf16be")) {
131
*encoding = UTF_16_BE;
132
} else if (__match_encoding(s, "utf32le")) {
133
*encoding = UTF_32_LE;
134
} else if (__match_encoding(s, "utf32be")) {
135
*encoding = UTF_32_BE;
136
} else if (__match_encoding(s, "wchart")) {
137
*encoding = WCHAR_T;
138
} else {
139
return false;
140
}
141
return true;
142
}
143
144
struct __iconv_t {
145
Encoding src_encoding;
146
Encoding dst_encoding;
147
Mode mode;
148
__iconv_t() : mode(ERROR) {
149
}
150
int Convert(char** src_buf0, size_t* src_bytes_left0, char** dst_buf0, size_t* dst_bytes_left0) {
151
// Reset state.
152
wc = 0;
153
memset(&ps, 0, sizeof(ps));
154
replacement_count = 0;
155
ignored = false;
156
src_buf = src_buf0;
157
src_bytes_left = src_bytes_left0;
158
dst_buf = dst_buf0;
159
dst_bytes_left = dst_bytes_left0;
160
while (*src_bytes_left > 0) {
161
if (!GetNext() || !Convert()) return -1;
162
}
163
return Done();
164
}
165
private:
166
char32_t wc;
167
char buf[16];
168
size_t src_bytes_used;
169
size_t dst_bytes_used;
170
mbstate_t ps;
171
size_t replacement_count;
172
bool ignored;
173
char** src_buf;
174
size_t* src_bytes_left;
175
char** dst_buf;
176
size_t* dst_bytes_left;
177
bool GetNext() {
178
errno = 0;
179
switch (src_encoding) {
180
case US_ASCII:
181
wc = **src_buf;
182
src_bytes_used = 1;
183
if (wc > 0x7f) errno = EILSEQ;
184
break;
185
case UTF_8:
186
src_bytes_used = mbrtoc32(&wc, *src_buf, *src_bytes_left, &ps);
187
if (src_bytes_used == __MB_ERR_ILLEGAL_SEQUENCE) {
188
break; // EILSEQ already set.
189
} else if (src_bytes_used == __MB_ERR_INCOMPLETE_SEQUENCE) {
190
errno = EINVAL;
191
return false;
192
}
193
break;
194
case UTF_16_BE:
195
case UTF_16_LE: {
196
if (*src_bytes_left < 2) {
197
errno = EINVAL;
198
return false;
199
}
200
bool swap = (src_encoding == UTF_16_BE);
201
wc = In16(*src_buf, swap);
202
// 0xd800-0xdbff: high surrogates
203
// 0xdc00-0xdfff: low surrogates
204
if (wc >= 0xd800 && wc <= 0xdfff) {
205
if (wc >= 0xdc00) { // Low surrogate before high surrogate.
206
errno = EILSEQ;
207
return false;
208
}
209
if (*src_bytes_left < 4) {
210
errno = EINVAL;
211
return false;
212
}
213
uint16_t hi = wc;
214
uint16_t lo = In16(*src_buf + 2, swap);
215
wc = 0x10000 + ((hi - 0xd800) << 10) + (lo - 0xdc00);
216
src_bytes_used = 4;
217
}
218
break;
219
}
220
case UTF_32_BE:
221
case UTF_32_LE:
222
case WCHAR_T:
223
if (*src_bytes_left < 4) {
224
errno = EINVAL;
225
return false;
226
}
227
wc = In32(*src_buf, (src_encoding == UTF_32_BE));
228
break;
229
}
230
if (errno == EILSEQ) {
231
switch (mode) {
232
case ERROR:
233
return false;
234
case IGNORE:
235
*src_buf += src_bytes_used;
236
*src_bytes_left -= src_bytes_used;
237
ignored = true;
238
return GetNext();
239
case TRANSLIT:
240
wc = '?';
241
++replacement_count;
242
return true;
243
}
244
}
245
return true;
246
}
247
248
bool Convert() {
249
errno = 0;
250
switch (dst_encoding) {
251
case US_ASCII:
252
buf[0] = wc;
253
dst_bytes_used = 1;
254
if (wc > 0x7f) errno = EILSEQ;
255
break;
256
case UTF_8:
257
dst_bytes_used = c32rtomb(buf, wc, &ps);
258
if (dst_bytes_used == __MB_ERR_ILLEGAL_SEQUENCE) {
259
break; // EILSEQ already set.
260
} else if (dst_bytes_used == __MB_ERR_INCOMPLETE_SEQUENCE) {
261
errno = EINVAL;
262
return false;
263
}
264
break;
265
case UTF_16_BE:
266
case UTF_16_LE: {
267
bool swap = (dst_encoding == UTF_16_BE);
268
if (wc < 0x10000) { // BMP.
269
Out16(buf, wc, swap);
270
} else { // Supplementary plane; output surrogate pair.
271
wc -= 0x10000;
272
char16_t hi = 0xd800 | (wc >> 10);
273
char16_t lo = 0xdc00 | (wc & 0x3ff);
274
Out16(buf + 0, hi, swap);
275
Out16(buf + 2, lo, swap);
276
dst_bytes_used = 4;
277
}
278
} break;
279
case UTF_32_BE:
280
case UTF_32_LE:
281
case WCHAR_T:
282
Out32(wc, (dst_encoding == UTF_32_BE));
283
break;
284
}
285
if (errno == EILSEQ) {
286
if (mode == IGNORE) {
287
*src_buf += src_bytes_used;
288
*src_bytes_left -= src_bytes_used;
289
ignored = true;
290
return true;
291
} else if (mode == TRANSLIT) {
292
wc = '?';
293
++replacement_count;
294
return Convert();
295
}
296
return false;
297
}
298
return Emit();
299
}
300
301
uint16_t In16(const char* buf, bool swap) {
302
#ifdef __cplusplus
303
const uint8_t* src = reinterpret_cast<const uint8_t*>(buf);
304
#else // !__cplusplus
305
const uint8_t* src = (const uint8_t*)(buf);
306
#endif // __cplusplus
307
uint16_t wc = (src[0]) | (src[1] << 8);
308
if (swap) wc = __swap16(wc);
309
src_bytes_used = 2;
310
return wc;
311
}
312
313
uint32_t In32(const char* buf, bool swap) {
314
#ifdef __cplusplus
315
const uint8_t* src = reinterpret_cast<const uint8_t*>(buf);
316
#else // !__cplusplus
317
const uint8_t* src = (const uint8_t*)(buf);
318
#endif // __cplusplus
319
uint32_t wc = (src[0]) | (src[1] << 8) | (src[2] << 16) | (src[3] << 24);
320
if (swap) wc = __swap32(wc);
321
src_bytes_used = 4;
322
return wc;
323
}
324
325
void Out16(char* dst, char16_t ch, bool swap) {
326
if (swap) ch = __swap16(ch);
327
dst[0] = ch;
328
dst[1] = ch >> 8;
329
dst_bytes_used = 2;
330
}
331
332
void Out32(char32_t ch, bool swap) {
333
if (swap) ch = __swap32(ch);
334
buf[0] = ch;
335
buf[1] = ch >> 8;
336
buf[2] = ch >> 16;
337
buf[3] = ch >> 24;
338
dst_bytes_used = 4;
339
}
340
341
bool Emit() {
342
if (dst_bytes_used > *dst_bytes_left) {
343
errno = E2BIG;
344
return false;
345
}
346
memcpy(*dst_buf, buf, dst_bytes_used);
347
*src_buf += src_bytes_used;
348
*src_bytes_left -= src_bytes_used;
349
*dst_buf += dst_bytes_used;
350
*dst_bytes_left -= dst_bytes_used;
351
return true;
352
}
353
354
int Done() {
355
if (mode == TRANSLIT) return replacement_count;
356
if (ignored) {
357
errno = EILSEQ;
358
return -1;
359
}
360
return 0;
361
}
362
};
363
364
iconv_t iconv_open(const char* __dst_encoding, const char* __src_encoding) {
365
iconv_t result = new __iconv_t;
366
if (!__parse_encoding(__src_encoding, &result->src_encoding, 0 /* nullptr */) ||
367
!__parse_encoding(__dst_encoding, &result->dst_encoding, &result->mode)) {
368
delete result;
369
errno = EINVAL;
370
return INVALID_ICONV_T;
371
}
372
return result;
373
}
374
375
size_t iconv(iconv_t __converter,
376
char** __src_buf, size_t* __src_bytes_left,
377
char** __dst_buf, size_t* __dst_bytes_left) {
378
if (__converter == INVALID_ICONV_T) {
379
errno = EBADF;
380
return -1;
381
}
382
return __converter->Convert(__src_buf, __src_bytes_left, __dst_buf, __dst_bytes_left);
383
}
384
385
int iconv_close(iconv_t __converter) {
386
if (__converter == INVALID_ICONV_T) {
387
errno = EBADF;
388
return -1;
389
}
390
delete __converter;
391
return 0;
392
}
393
394
#endif // __ANDROID__
395
396