Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Kitware
GitHub Repository: Kitware/CMake
Path: blob/master/Utilities/cmlibuv/src/idna.c
3153 views
1
/* Copyright (c) 2011, 2018 Ben Noordhuis <[email protected]>
2
*
3
* Permission to use, copy, modify, and/or distribute this software for any
4
* purpose with or without fee is hereby granted, provided that the above
5
* copyright notice and this permission notice appear in all copies.
6
*
7
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
*/
15
16
/* Derived from https://github.com/bnoordhuis/punycode
17
* but updated to support IDNA 2008.
18
*/
19
20
#include "uv.h"
21
#include "idna.h"
22
#include <assert.h>
23
#include <string.h>
24
#include <limits.h> /* UINT_MAX */
25
26
static unsigned uv__utf8_decode1_slow(const char** p,
27
const char* pe,
28
unsigned a) {
29
unsigned b;
30
unsigned c;
31
unsigned d;
32
unsigned min;
33
34
if (a > 0xF7)
35
return -1;
36
37
switch (pe - *p) {
38
default:
39
if (a > 0xEF) {
40
min = 0x10000;
41
a = a & 7;
42
b = (unsigned char) *(*p)++;
43
c = (unsigned char) *(*p)++;
44
d = (unsigned char) *(*p)++;
45
break;
46
}
47
/* Fall through. */
48
case 2:
49
if (a > 0xDF) {
50
min = 0x800;
51
b = 0x80 | (a & 15);
52
c = (unsigned char) *(*p)++;
53
d = (unsigned char) *(*p)++;
54
a = 0;
55
break;
56
}
57
/* Fall through. */
58
case 1:
59
if (a > 0xBF) {
60
min = 0x80;
61
b = 0x80;
62
c = 0x80 | (a & 31);
63
d = (unsigned char) *(*p)++;
64
a = 0;
65
break;
66
}
67
/* Fall through. */
68
case 0:
69
return -1; /* Invalid continuation byte. */
70
}
71
72
if (0x80 != (0xC0 & (b ^ c ^ d)))
73
return -1; /* Invalid sequence. */
74
75
b &= 63;
76
c &= 63;
77
d &= 63;
78
a = (a << 18) | (b << 12) | (c << 6) | d;
79
80
if (a < min)
81
return -1; /* Overlong sequence. */
82
83
if (a > 0x10FFFF)
84
return -1; /* Four-byte sequence > U+10FFFF. */
85
86
if (a >= 0xD800 && a <= 0xDFFF)
87
return -1; /* Surrogate pair. */
88
89
return a;
90
}
91
92
unsigned uv__utf8_decode1(const char** p, const char* pe) {
93
unsigned a;
94
95
assert(*p < pe);
96
97
a = (unsigned char) *(*p)++;
98
99
if (a < 128)
100
return a; /* ASCII, common case. */
101
102
return uv__utf8_decode1_slow(p, pe, a);
103
}
104
105
static int uv__idna_toascii_label(const char* s, const char* se,
106
char** d, char* de) {
107
static const char alphabet[] = "abcdefghijklmnopqrstuvwxyz0123456789";
108
const char* ss;
109
unsigned c;
110
unsigned h;
111
unsigned k;
112
unsigned n;
113
unsigned m;
114
unsigned q;
115
unsigned t;
116
unsigned x;
117
unsigned y;
118
unsigned bias;
119
unsigned delta;
120
unsigned todo;
121
int first;
122
123
h = 0;
124
ss = s;
125
todo = 0;
126
127
/* Note: after this loop we've visited all UTF-8 characters and know
128
* they're legal so we no longer need to check for decode errors.
129
*/
130
while (s < se) {
131
c = uv__utf8_decode1(&s, se);
132
133
if (c == UINT_MAX)
134
return UV_EINVAL;
135
136
if (c < 128)
137
h++;
138
else
139
todo++;
140
}
141
142
/* Only write "xn--" when there are non-ASCII characters. */
143
if (todo > 0) {
144
if (*d < de) *(*d)++ = 'x';
145
if (*d < de) *(*d)++ = 'n';
146
if (*d < de) *(*d)++ = '-';
147
if (*d < de) *(*d)++ = '-';
148
}
149
150
/* Write ASCII characters. */
151
x = 0;
152
s = ss;
153
while (s < se) {
154
c = uv__utf8_decode1(&s, se);
155
assert(c != UINT_MAX);
156
157
if (c > 127)
158
continue;
159
160
if (*d < de)
161
*(*d)++ = c;
162
163
if (++x == h)
164
break; /* Visited all ASCII characters. */
165
}
166
167
if (todo == 0)
168
return h;
169
170
/* Only write separator when we've written ASCII characters first. */
171
if (h > 0)
172
if (*d < de)
173
*(*d)++ = '-';
174
175
n = 128;
176
bias = 72;
177
delta = 0;
178
first = 1;
179
180
while (todo > 0) {
181
m = -1;
182
s = ss;
183
184
while (s < se) {
185
c = uv__utf8_decode1(&s, se);
186
assert(c != UINT_MAX);
187
188
if (c >= n)
189
if (c < m)
190
m = c;
191
}
192
193
x = m - n;
194
y = h + 1;
195
196
if (x > ~delta / y)
197
return UV_E2BIG; /* Overflow. */
198
199
delta += x * y;
200
n = m;
201
202
s = ss;
203
while (s < se) {
204
c = uv__utf8_decode1(&s, se);
205
assert(c != UINT_MAX);
206
207
if (c < n)
208
if (++delta == 0)
209
return UV_E2BIG; /* Overflow. */
210
211
if (c != n)
212
continue;
213
214
for (k = 36, q = delta; /* empty */; k += 36) {
215
t = 1;
216
217
if (k > bias)
218
t = k - bias;
219
220
if (t > 26)
221
t = 26;
222
223
if (q < t)
224
break;
225
226
/* TODO(bnoordhuis) Since 1 <= t <= 26 and therefore
227
* 10 <= y <= 35, we can optimize the long division
228
* into a table-based reciprocal multiplication.
229
*/
230
x = q - t;
231
y = 36 - t; /* 10 <= y <= 35 since 1 <= t <= 26. */
232
q = x / y;
233
t = t + x % y; /* 1 <= t <= 35 because of y. */
234
235
if (*d < de)
236
*(*d)++ = alphabet[t];
237
}
238
239
if (*d < de)
240
*(*d)++ = alphabet[q];
241
242
delta /= 2;
243
244
if (first) {
245
delta /= 350;
246
first = 0;
247
}
248
249
/* No overflow check is needed because |delta| was just
250
* divided by 2 and |delta+delta >= delta + delta/h|.
251
*/
252
h++;
253
delta += delta / h;
254
255
for (bias = 0; delta > 35 * 26 / 2; bias += 36)
256
delta /= 35;
257
258
bias += 36 * delta / (delta + 38);
259
delta = 0;
260
todo--;
261
}
262
263
delta++;
264
n++;
265
}
266
267
return 0;
268
}
269
270
long uv__idna_toascii(const char* s, const char* se, char* d, char* de) {
271
const char* si;
272
const char* st;
273
unsigned c;
274
char* ds;
275
int rc;
276
277
if (s == se)
278
return UV_EINVAL;
279
280
ds = d;
281
282
si = s;
283
while (si < se) {
284
st = si;
285
c = uv__utf8_decode1(&si, se);
286
287
if (c == UINT_MAX)
288
return UV_EINVAL;
289
290
if (c != '.')
291
if (c != 0x3002) /* 。 */
292
if (c != 0xFF0E) /* . */
293
if (c != 0xFF61) /* 。 */
294
continue;
295
296
rc = uv__idna_toascii_label(s, st, &d, de);
297
298
if (rc < 0)
299
return rc;
300
301
if (d < de)
302
*d++ = '.';
303
304
s = si;
305
}
306
307
if (s < se) {
308
rc = uv__idna_toascii_label(s, se, &d, de);
309
310
if (rc < 0)
311
return rc;
312
}
313
314
if (d >= de)
315
return UV_EINVAL;
316
317
*d++ = '\0';
318
return d - ds; /* Number of bytes written. */
319
}
320
321