Path: blob/main/crypto/krb5/src/util/support/t_utf16.c
34889 views
/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil -*- */1/* util/support/t_utf16.c - test UTF-16 conversion functions */2/*3* Copyright (C) 2017 by the Massachusetts Institute of Technology.4* All rights reserved.5*6* Redistribution and use in source and binary forms, with or without7* modification, are permitted provided that the following conditions8* are met:9*10* * Redistributions of source code must retain the above copyright11* notice, this list of conditions and the following disclaimer.12*13* * Redistributions in binary form must reproduce the above copyright14* notice, this list of conditions and the following disclaimer in15* the documentation and/or other materials provided with the16* distribution.17*18* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS19* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT20* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS21* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE22* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,23* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES24* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR25* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)26* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,27* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)28* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED29* OF THE POSSIBILITY OF SUCH DAMAGE.30*/3132/*33* This program tests conversions between UTF-8 and little-endian UTF-16, with34* an eye mainly towards covering UTF-16 edge cases and UTF-8 decoding results35* which we detect as invalid in utf8_conv.c. t_utf8.c covers more UTF-8 edge36* cases.37*/3839#include <stdio.h>40#include <string.h>4142#include "k5-platform.h"43#include "k5-utf8.h"4445struct test {46const char *utf8;47const char *utf16;48size_t utf16len;49} tests[] = {50{ "", "", 0 },51{ "abcd", "a\0b\0c\0d\0", 8 },52/* From RFC 2781 (tests code point 0x12345 and some ASCII) */53{ "\xF0\x92\x8D\x85=Ra", "\x08\xD8\x45\xDF=\0R\0a\0", 10 },54/* Lowest and highest Supplementary Plane code points */55{ "\xF0\x90\x80\x80 \xF4\x8F\xBF\xBF",56"\x00\xD8\x00\xDC \0\xFF\xDB\xFF\xDF", 10 },57/* Basic Multilingual Plane code points near and above surrogate range */58{ "\xED\x9F\xBF", "\xFF\xD7", 2 },59{ "\xEE\x80\x80 \xEE\xBF\xBF", "\x00\xE0 \0\xFF\xEF", 6 },60/* Invalid UTF-8: decodes to value in surrogate pair range */61{ "\xED\xA0\x80", NULL, 0 }, /* 0xD800 */62{ "\xED\xAF\xBF", NULL, 0 }, /* 0xDBFF */63{ "\xED\xB0\x80", NULL, 0 }, /* 0xDC00 */64{ "\xED\xBF\xBF", NULL, 0 }, /* 0xDFFF */65/* Invalid UTF-8: decodes to value above Unicode range */66{ "\xF4\x90\x80\x80", NULL, 0 },67{ "\xF4\xBF\xBF\xBF", NULL, 0 },68{ "\xF5\x80\x80\x80", NULL, 0 }, /* thrown out early due to first byte */69/* Invalid UTF-16: odd numbers of UTF-16 bytes */70{ NULL, "\x00", 1 },71{ NULL, "\x01\x00\x02", 3 },72/* Invalid UTF-16: high surrogate without a following low surrogate */73{ NULL, "\x00\xD8\x00\x00", 4 },74{ NULL, "\x00\xD8\xFF\xDB", 4 },75{ NULL, "\xFF\xDB", 2 },76/* Invalid UTF-16: low surrogate without a preceding high surrogate */77{ NULL, "\x61\x00\x00\xDC", 4 },78{ NULL, "\xFF\xDF\xFF\xDB", 4 },79};8081int82main(int argc, char **argv)83{84int ret;85struct test *t;86size_t i, utf16len;87uint8_t *utf16;88char *utf8;8990for (i = 0; i < sizeof(tests) / sizeof(*tests); i++) {91t = &tests[i];92if (t->utf8 != NULL) {93ret = k5_utf8_to_utf16le(t->utf8, &utf16, &utf16len);94if (t->utf16 == NULL) {95assert(ret == EINVAL);96} else {97assert(ret == 0);98assert(t->utf16len == utf16len);99assert(memcmp(t->utf16, utf16, utf16len) == 0);100free(utf16);101}102}103104if (t->utf16 != NULL) {105ret = k5_utf16le_to_utf8((uint8_t *)t->utf16, t->utf16len, &utf8);106if (t->utf8 == NULL) {107assert(ret == EINVAL);108} else {109assert(ret == 0);110assert(strcmp(t->utf8, utf8) == 0);111free(utf8);112}113}114}115return 0;116}117118119