Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/crypto/heimdal/lib/wind/normalize.c
34889 views
1
/*
2
* Copyright (c) 2004 Kungliga Tekniska Högskolan
3
* (Royal Institute of Technology, Stockholm, Sweden).
4
* All rights reserved.
5
*
6
* Redistribution and use in source and binary forms, with or without
7
* modification, are permitted provided that the following conditions
8
* are met:
9
*
10
* 1. Redistributions of source code must retain the above copyright
11
* notice, this list of conditions and the following disclaimer.
12
*
13
* 2. Redistributions in binary form must reproduce the above copyright
14
* notice, this list of conditions and the following disclaimer in the
15
* documentation and/or other materials provided with the distribution.
16
*
17
* 3. Neither the name of the Institute nor the names of its contributors
18
* may be used to endorse or promote products derived from this software
19
* without specific prior written permission.
20
*
21
* THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
22
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24
* ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
25
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31
* SUCH DAMAGE.
32
*/
33
34
#ifdef HAVE_CONFIG_H
35
#include <config.h>
36
#endif
37
#include "windlocl.h"
38
39
#include <assert.h>
40
#include <stdlib.h>
41
#include <errno.h>
42
#include <stdio.h>
43
44
#include "roken.h"
45
46
#include "normalize_table.h"
47
48
static int
49
translation_cmp(const void *key, const void *data)
50
{
51
const struct translation *t1 = (const struct translation *)key;
52
const struct translation *t2 = (const struct translation *)data;
53
54
return t1->key - t2->key;
55
}
56
57
enum { s_base = 0xAC00};
58
enum { s_count = 11172};
59
enum { l_base = 0x1100};
60
enum { l_count = 19};
61
enum { v_base = 0x1161};
62
enum { v_count = 21};
63
enum { t_base = 0x11A7};
64
enum { t_count = 28};
65
enum { n_count = v_count * t_count};
66
67
static int
68
hangul_decomp(const uint32_t *in, size_t in_len,
69
uint32_t *out, size_t *out_len)
70
{
71
uint32_t u = *in;
72
unsigned s_index;
73
unsigned l, v, t;
74
unsigned o;
75
76
if (u < s_base || u >= s_base + s_count)
77
return 0;
78
s_index = u - s_base;
79
l = l_base + s_index / n_count;
80
v = v_base + (s_index % n_count) / t_count;
81
t = t_base + s_index % t_count;
82
o = 2;
83
if (t != t_base)
84
++o;
85
if (*out_len < o)
86
return WIND_ERR_OVERRUN;
87
out[0] = l;
88
out[1] = v;
89
if (t != t_base)
90
out[2] = t;
91
*out_len = o;
92
return 1;
93
}
94
95
static uint32_t
96
hangul_composition(const uint32_t *in, size_t in_len)
97
{
98
if (in_len < 2)
99
return 0;
100
if (in[0] >= l_base && in[0] < l_base + l_count) {
101
unsigned l_index = in[0] - l_base;
102
unsigned v_index;
103
104
if (in[1] < v_base || in[1] >= v_base + v_count)
105
return 0;
106
v_index = in[1] - v_base;
107
return (l_index * v_count + v_index) * t_count + s_base;
108
} else if (in[0] >= s_base && in[0] < s_base + s_count) {
109
unsigned s_index = in[0] - s_base;
110
unsigned t_index;
111
112
if (s_index % t_count != 0)
113
return 0;
114
if (in[1] < t_base || in[1] >= t_base + t_count)
115
return 0;
116
t_index = in[1] - t_base;
117
return in[0] + t_index;
118
}
119
return 0;
120
}
121
122
static int
123
compat_decomp(const uint32_t *in, size_t in_len,
124
uint32_t *out, size_t *out_len)
125
{
126
unsigned i;
127
unsigned o = 0;
128
129
for (i = 0; i < in_len; ++i) {
130
struct translation ts = {in[i]};
131
size_t sub_len = *out_len - o;
132
int ret;
133
134
ret = hangul_decomp(in + i, in_len - i,
135
out + o, &sub_len);
136
if (ret) {
137
if (ret == WIND_ERR_OVERRUN)
138
return ret;
139
o += sub_len;
140
} else {
141
void *s = bsearch(&ts,
142
_wind_normalize_table,
143
_wind_normalize_table_size,
144
sizeof(_wind_normalize_table[0]),
145
translation_cmp);
146
if (s != NULL) {
147
const struct translation *t = (const struct translation *)s;
148
149
ret = compat_decomp(_wind_normalize_val_table + t->val_offset,
150
t->val_len,
151
out + o, &sub_len);
152
if (ret)
153
return ret;
154
o += sub_len;
155
} else {
156
if (o >= *out_len)
157
return WIND_ERR_OVERRUN;
158
out[o++] = in[i];
159
160
}
161
}
162
}
163
*out_len = o;
164
return 0;
165
}
166
167
static void
168
swap_char(uint32_t * a, uint32_t * b)
169
{
170
uint32_t t;
171
t = *a;
172
*a = *b;
173
*b = t;
174
}
175
176
/* Unicode 5.2.0 D109 Canonical Ordering for a sequence of code points
177
* that all have Canonical_Combining_Class > 0 */
178
static void
179
canonical_reorder_sequence(uint32_t * a, size_t len)
180
{
181
size_t i, j;
182
183
if (len <= 1)
184
return;
185
186
for (i = 1; i < len; i++) {
187
for (j = i;
188
j > 0 &&
189
_wind_combining_class(a[j]) < _wind_combining_class(a[j-1]);
190
j--)
191
swap_char(&a[j], &a[j-1]);
192
}
193
}
194
195
static void
196
canonical_reorder(uint32_t *tmp, size_t tmp_len)
197
{
198
size_t i;
199
200
for (i = 0; i < tmp_len; ++i) {
201
int cc = _wind_combining_class(tmp[i]);
202
if (cc) {
203
size_t j;
204
for (j = i + 1;
205
j < tmp_len && _wind_combining_class(tmp[j]);
206
++j)
207
;
208
canonical_reorder_sequence(&tmp[i], j - i);
209
i = j;
210
}
211
}
212
}
213
214
static uint32_t
215
find_composition(const uint32_t *in, unsigned in_len)
216
{
217
unsigned short canon_index = 0;
218
uint32_t cur;
219
unsigned n = 0;
220
221
cur = hangul_composition(in, in_len);
222
if (cur)
223
return cur;
224
225
do {
226
const struct canon_node *c = &_wind_canon_table[canon_index];
227
unsigned i;
228
229
if (n % 5 == 0) {
230
if (in_len-- == 0)
231
return c->val;
232
cur = *in++;
233
}
234
235
i = cur >> 16;
236
if (i < c->next_start || i >= c->next_end)
237
canon_index = 0;
238
else
239
canon_index =
240
_wind_canon_next_table[c->next_offset + i - c->next_start];
241
if (canon_index != 0) {
242
cur = (cur << 4) & 0xFFFFF;
243
++n;
244
}
245
} while (canon_index != 0);
246
return 0;
247
}
248
249
static int
250
combine(const uint32_t *in, size_t in_len,
251
uint32_t *out, size_t *out_len)
252
{
253
unsigned i;
254
int ostarter;
255
unsigned o = 0;
256
int old_cc;
257
258
for (i = 0; i < in_len;) {
259
while (i < in_len && _wind_combining_class(in[i]) != 0) {
260
out[o++] = in[i++];
261
}
262
if (i < in_len) {
263
if (o >= *out_len)
264
return WIND_ERR_OVERRUN;
265
ostarter = o;
266
out[o++] = in[i++];
267
old_cc = -1;
268
269
while (i < in_len) {
270
uint32_t comb;
271
uint32_t v[2];
272
int cc;
273
274
v[0] = out[ostarter];
275
v[1] = in[i];
276
277
cc = _wind_combining_class(in[i]);
278
if (old_cc != cc && (comb = find_composition(v, 2))) {
279
out[ostarter] = comb;
280
} else if (cc == 0) {
281
break;
282
} else {
283
if (o >= *out_len)
284
return WIND_ERR_OVERRUN;
285
out[o++] = in[i];
286
old_cc = cc;
287
}
288
++i;
289
}
290
}
291
}
292
*out_len = o;
293
return 0;
294
}
295
296
int
297
_wind_stringprep_normalize(const uint32_t *in, size_t in_len,
298
uint32_t *out, size_t *out_len)
299
{
300
size_t tmp_len;
301
uint32_t *tmp;
302
int ret;
303
304
if (in_len == 0) {
305
*out_len = 0;
306
return 0;
307
}
308
309
tmp_len = in_len * 4;
310
if (tmp_len < MAX_LENGTH_CANON)
311
tmp_len = MAX_LENGTH_CANON;
312
tmp = malloc(tmp_len * sizeof(uint32_t));
313
if (tmp == NULL)
314
return ENOMEM;
315
316
ret = compat_decomp(in, in_len, tmp, &tmp_len);
317
if (ret) {
318
free(tmp);
319
return ret;
320
}
321
canonical_reorder(tmp, tmp_len);
322
ret = combine(tmp, tmp_len, out, out_len);
323
free(tmp);
324
return ret;
325
}
326
327