CoCalc -- t_utf16.c

GitHub Repository: freebsd/freebsd-src
Path: blob/main/crypto/krb5/src/util/support/t_utf16.c
¹⁰⁵⁴²⁰ views
1
/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2
/* util/support/t_utf16.c - test UTF-16 conversion functions */
3
/*
4
 * Copyright (C) 2017 by the Massachusetts Institute of Technology.
5
 * All rights reserved.
6
 *
7
 * Redistribution and use in source and binary forms, with or without
8
 * modification, are permitted provided that the following conditions
9
 * are met:
10
 *
11
 * * Redistributions of source code must retain the above copyright
12
 *   notice, this list of conditions and the following disclaimer.
13
 *
14
 * * Redistributions in binary form must reproduce the above copyright
15
 *   notice, this list of conditions and the following disclaimer in
16
 *   the documentation and/or other materials provided with the
17
 *   distribution.
18
 *
19
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22
 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23
 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
24
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
28
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
30
 * OF THE POSSIBILITY OF SUCH DAMAGE.
31
 */
32

33
/*
34
 * This program tests conversions between UTF-8 and little-endian UTF-16, with
35
 * an eye mainly towards covering UTF-16 edge cases and UTF-8 decoding results
36
 * which we detect as invalid in utf8_conv.c.  t_utf8.c covers more UTF-8 edge
37
 * cases.
38
 */
39

40
#include <stdio.h>
41
#include <string.h>
42

43
#include "k5-platform.h"
44
#include "k5-utf8.h"
45

46
struct test {
47
    const char *utf8;
48
    const char *utf16;
49
    size_t utf16len;
50
} tests[] = {
51
    { "", "", 0 },
52
    { "abcd", "a\0b\0c\0d\0", 8 },
53
    /* From RFC 2781 (tests code point 0x12345 and some ASCII) */
54
    { "\xF0\x92\x8D\x85=Ra", "\x08\xD8\x45\xDF=\0R\0a\0", 10 },
55
    /* Lowest and highest Supplementary Plane code points */
56
    { "\xF0\x90\x80\x80 \xF4\x8F\xBF\xBF",
57
      "\x00\xD8\x00\xDC \0\xFF\xDB\xFF\xDF", 10 },
58
    /* Basic Multilingual Plane code points near and above surrogate range */
59
    { "\xED\x9F\xBF", "\xFF\xD7", 2 },
60
    { "\xEE\x80\x80 \xEE\xBF\xBF", "\x00\xE0 \0\xFF\xEF", 6 },
61
    /* Invalid UTF-8: decodes to value in surrogate pair range */
62
    { "\xED\xA0\x80", NULL, 0 }, /* 0xD800 */
63
    { "\xED\xAF\xBF", NULL, 0 }, /* 0xDBFF */
64
    { "\xED\xB0\x80", NULL, 0 }, /* 0xDC00 */
65
    { "\xED\xBF\xBF", NULL, 0 }, /* 0xDFFF */
66
    /* Invalid UTF-8: decodes to value above Unicode range */
67
    { "\xF4\x90\x80\x80", NULL, 0 },
68
    { "\xF4\xBF\xBF\xBF", NULL, 0 },
69
    { "\xF5\x80\x80\x80", NULL, 0 }, /* thrown out early due to first byte */
70
    /* Invalid UTF-16: odd numbers of UTF-16 bytes */
71
    { NULL, "\x00", 1 },
72
    { NULL, "\x01\x00\x02", 3 },
73
    /* Invalid UTF-16: high surrogate without a following low surrogate */
74
    { NULL, "\x00\xD8\x00\x00", 4 },
75
    { NULL, "\x00\xD8\xFF\xDB", 4 },
76
    { NULL, "\xFF\xDB", 2 },
77
    /* Invalid UTF-16: low surrogate without a preceding high surrogate */
78
    { NULL, "\x61\x00\x00\xDC", 4 },
79
    { NULL, "\xFF\xDF\xFF\xDB", 4 },
80
};
81

82
int
83
main(int argc, char **argv)
84
{
85
    int ret;
86
    struct test *t;
87
    size_t i, utf16len;
88
    uint8_t *utf16;
89
    char *utf8;
90

91
    for (i = 0; i < sizeof(tests) / sizeof(*tests); i++) {
92
        t = &tests[i];
93
        if (t->utf8 != NULL) {
94
            ret = k5_utf8_to_utf16le(t->utf8, &utf16, &utf16len);
95
            if (t->utf16 == NULL) {
96
                assert(ret == EINVAL);
97
            } else {
98
                assert(ret == 0);
99
                assert(t->utf16len == utf16len);
100
                assert(memcmp(t->utf16, utf16, utf16len) == 0);
101
                free(utf16);
102
            }
103
        }
104

105
        if (t->utf16 != NULL) {
106
            ret = k5_utf16le_to_utf8((uint8_t *)t->utf16, t->utf16len, &utf8);
107
            if (t->utf8 == NULL) {
108
                assert(ret == EINVAL);
109
            } else {
110
                assert(ret == 0);
111
                assert(strcmp(t->utf8, utf8) == 0);
112
                free(utf8);
113
            }
114
        }
115
    }
116
    return 0;
117
}
118

119
Product

Resources

Company