CoCalc -- t

GitHub Repository: freebsd/freebsd-src
Path: blob/main/crypto/krb5/src/util/support/t_utf8.c
¹⁰⁵²⁴⁷ views
1
/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2
/* util/support/t_utf8.c - test UTF-8 boundary conditions */
3
/*
4
 * Copyright (C) 2015 by the Massachusetts Institute of Technology.
5
 * All rights reserved.
6
 *
7
 * Redistribution and use in source and binary forms, with or without
8
 * modification, are permitted provided that the following conditions
9
 * are met:
10
 *
11
 * * Redistributions of source code must retain the above copyright
12
 *   notice, this list of conditions and the following disclaimer.
13
 *
14
 * * Redistributions in binary form must reproduce the above copyright
15
 *   notice, this list of conditions and the following disclaimer in
16
 *   the documentation and/or other materials provided with the
17
 *   distribution.
18
 *
19
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22
 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23
 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
24
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
28
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
30
 * OF THE POSSIBILITY OF SUCH DAMAGE.
31
 */
32

33
#include <stdio.h>
34
#include <string.h>
35

36
#include "k5-platform.h"
37
#include "k5-utf8.h"
38

39
/*
40
 * Convenience macro to allow testing of old encodings.
41
 *
42
 * "Old" means ISO/IEC 10646 prior to 2011, when the highest valid code point
43
 * was U+7FFFFFFF instead of U+10FFFF.
44
 */
45
#ifdef OLDENCODINGS
46
#define L(x) (x)
47
#else
48
#define L(x) 0
49
#endif
50

51
/*
52
 * len is 0 for invalid encoding prefixes (KRB5_UTF8_CHARLEN2() partially
53
 * enforces the validity of the first two bytes, based on masking the second
54
 * byte.  It doesn't check whether bit 6 is 0, though, and doesn't catch the
55
 * range between U+110000 and U+13FFFF).
56
 *
57
 * ucs is 0 for invalid encodings (including ones with valid prefixes according
58
 * to KRB5_UTF8_CHARLEN2(); krb5int_utf8_to_ucs4() will still fail on them
59
 * because it checks more things.)  Code points above U+10FFFF are excluded by
60
 * the actual test code and remain in the table for possibly testing the old
61
 * implementation that didn't exclude them.
62
 *
63
 * Neither krb5int_ucs4_to_utf8() nor krb5int_utf8_to_ucs4() excludes the
64
 * surrogate pair range.
65
 */
66
struct testcase {
67
    const char *p;
68
    krb5_ucs4 ucs;
69
    int len;
70
} testcases[] = {
71
    { "\x7f", 0x0000007f, 1 },             /* Lowest 1-byte encoding */
72
    { "\xc0\x80", 0x00000000, 0 },         /* Invalid 2-byte encoding */
73
    { "\xc2\x80", 0x00000080, 2 },         /* Lowest valid 2-byte encoding */
74
    { "\xdf\xbf", 0x000007ff, 2 },         /* Highest valid 2-byte encoding*/
75
    { "\xdf\xff", 0x00000000, 2 },         /* Invalid 2-byte encoding*/
76
    { "\xe0\x80\x80", 0x00000000, 0 },     /* Invalid 3-byte encoding */
77
    { "\xe0\xa0\x80", 0x00000800, 3 },     /* Lowest valid 3-byte encoding */
78
    { "\xef\xbf\xbf", 0x0000ffff, 3 },     /* Highest valid 3-byte encoding */
79
    { "\xef\xff\xff", 0x00000000, 3 },     /* Invalid 3-byte encoding */
80
    { "\xf0\x80\x80\x80", 0x00000000, 0 }, /* Invalid 4-byte encoding */
81
    { "\xf0\x90\x80\x80", 0x00010000, 4 }, /* Lowest valid 4-byte encoding */
82
    { "\xf4\x8f\xbf\xbf", 0x0010ffff, 4 }, /* Highest valid 4-byte encoding */
83
    /* Next higher 4-byte encoding (old) */
84
    { "\xf4\x90\x80\x80", 0x00110000, 4 },
85
    /* Highest 4-byte encoding starting with 0xf4 (old) */
86
    { "\xf4\xbf\xbf\xbf", 0x0013ffff, 4 },
87
    /* Next higher 4-byte prefix byte (old) */
88
    { "\xf5\x80\x80\x80", 0x00140000, L(4) },
89
    /* Highest valid 4-byte encoding (old) */
90
    { "\xf7\xbf\xbf\xbf", 0x001fffff, L(4) },
91
    /* Invalid 4-byte encoding */
92
    { "\xf7\xff\xff\xff", 0x00000000, L(4) },
93
    /* Invalid 5-byte encoding */
94
    { "\xf8\x80\x80\x80\x80", 0x00000000, 0 },
95
    /* Lowest valid 5-byte encoding (old) */
96
    { "\xf8\x88\x80\x80\x80", 0x00200000, L(5) },
97
    /* Highest valid 5-byte encoding (old) */
98
    { "\xfb\xbf\xbf\xbf\xbf", 0x03ffffff, L(5) },
99
    /* Invalid 5-byte encoding */
100
    { "\xfb\xff\xff\xff\xff", 0x00000000, L(5) },
101
    /* Invalid 6-byte encoding */
102
    { "\xfc\x80\x80\x80\x80\x80", 0x00000000, 0 },
103
    /* Lowest valid 6-byte encoding (old) */
104
    { "\xfc\x84\x80\x80\x80\x80", 0x04000000, L(6) },
105
    /* Highest valid 6-byte encoding (old) */
106
    { "\xfd\xbf\xbf\xbf\xbf\xbf", 0x7fffffff, L(6) },
107
    /* Invalid 6-byte encoding */
108
    { "\xfd\xff\xff\xff\xff\xff", 0x00000000, L(6) },
109
};
110

111
static void
112
printhex(const char *p)
113
{
114
    for (; *p != '\0'; p++) {
115
        printf("%02x ", (unsigned char)*p);
116
    }
117
}
118

119
static void
120
printtest(struct testcase *t)
121
{
122
    printhex(t->p);
123
    printf("0x%08lx, %d\n", (unsigned long)t->ucs, t->len);
124
}
125

126
static int
127
test_decode(struct testcase *t, int high4)
128
{
129
    int len, status = 0;
130
    krb5_ucs4 u = 0;
131

132
    len = KRB5_UTF8_CHARLEN2(t->p, len);
133
    if (len != t->len) {
134
        printf("expected len=%d, got len=%d\n", t->len, len);
135
        status = 1;
136
    }
137
    if ((t->len == 0 || high4) && krb5int_utf8_to_ucs4(t->p, &u) != -1) {
138
        printf("unexpected success in utf8_to_ucs4\n");
139
        status = 1;
140
    }
141
    if (krb5int_utf8_to_ucs4(t->p, &u) != 0 && t->ucs != 0 && !high4) {
142
        printf("unexpected failure in utf8_to_ucs4\n");
143
        status = 1;
144
    }
145
    if (t->ucs != u && !high4) {
146
        printf("expected 0x%08lx, got 0x%08lx\n", (unsigned long)t->ucs,
147
               (unsigned long)u);
148
        status = 1;
149
    }
150
    return status;
151
}
152

153
static int
154
test_encode(struct testcase *t, int high4)
155
{
156
    size_t size;
157
    char buf[7];
158

159
    memset(buf, 0, sizeof(buf));
160
    size = krb5int_ucs4_to_utf8(t->ucs, buf);
161
    if (high4 && size != 0) {
162
        printf("unexpected success beyond U+10FFFF\n");
163
        return 1;
164
    }
165
    if (!high4 && size == 0) {
166
        printf("unexpected zero size on encode\n");
167
        return 1;
168
    }
169
    if (size != 0 && strcmp(t->p, buf) != 0) {
170
        printf("expected ");
171
        printhex(t->p);
172
        printf("got ");
173
        printhex(buf);
174
        printf("\n");
175
        return 1;
176
    }
177
    return 0;
178
}
179

180
int
181
main(int argc, char **argv)
182
{
183
    size_t ncases = sizeof(testcases) / sizeof(testcases[0]);
184
    size_t i;
185
    struct testcase *t;
186
    int status = 0, verbose = 0;
187
    /* Is this a "high" 4-byte encoding above U+10FFFF? */
188
    int high4;
189

190
    if (argc == 2 && strcmp(argv[1], "-v") == 0)
191
        verbose = 1;
192
    for (i = 0; i < ncases; i++) {
193
        t = &testcases[i];
194
        if (verbose)
195
            printtest(t);
196
#ifndef OLDENCODINGS
197
        high4 = t->ucs > 0x10ffff;
198
#else
199
        high4 = 0;
200
#endif
201
        if (test_decode(t, high4) != 0)
202
            status = 1;
203
        if (t->ucs == 0)
204
            continue;
205
        if (test_encode(t, high4) != 0)
206
            status = 1;
207
    }
208
    return status;
209
}
210

211
Product

Resources

Company