Path: blob/main/tools/test/iconv/tablegen/tablegen.c
39566 views
/*-1* Copyright (C) 2009, 2010 Gabor Kovesdan <[email protected]>2* All rights reserved.3*4* Redistribution and use in source and binary forms, with or without5* modification, are permitted provided that the following conditions6* are met:7* 1. Redistributions of source code must retain the above copyright8* notice, this list of conditions and the following disclaimer.9* 2. Redistributions in binary form must reproduce the above copyright10* notice, this list of conditions and the following disclaimer in the11* documentation and/or other materials provided with the distribution.12*13* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND14* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE15* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE16* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE17* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL18* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS19* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)20* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT21* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY22* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF23* SUCH DAMAGE.24*/2526#include <sys/cdefs.h>27#include <sys/endian.h>28#include <sys/types.h>2930#include <err.h>31#include <errno.h>32#include <getopt.h>33#include <iconv.h>34#include <stdbool.h>35#include <stdio.h>36#include <stdlib.h>3738#define UC_TO_MB_FLAG 139#define MB_TO_WC_FLAG 240#define MB_TO_UC_FLAG 441#define WC_TO_MB_FLAG 84243#define MAX(a,b) ((a) < (b) ? (b) : (a))4445extern char *__progname;4647static const char *optstr = "cdilrt";48static const char *citrus_common = "SRC_ZONE\t0x0000-0xFFFF\n"49"OOB_MODE\tILSEQ\n"50"DST_ILSEQ\t0xFFFE\n"51"DST_UNIT_BITS\t32\n\n"52"BEGIN_MAP\n"53"#\n# Generated with Citrus iconv (FreeBSD)\n#\n";54bool cflag;55bool dflag;56bool iflag;57bool lflag;58bool tflag;59bool rflag;60int fb_flags;6162static void do_conv(iconv_t, bool);63void mb_to_uc_fb(const char*, size_t,64void (*write_replacement)(const unsigned int *,65size_t, void *), void *, void *);66void mb_to_wc_fb(const char*, size_t,67void (*write_replacement) (const wchar_t *, size_t, void *),68void *, void *);69void uc_to_mb_fb(unsigned int,70void (*write_replacement) (const char *, size_t, void *), void *,71void *);72void wc_to_mb_fb(wchar_t,73void (*write_replacement)(const char *,74size_t, void *), void *, void *);7576struct option long_options[] =77{78{"citrus", no_argument, NULL, 'c'},79{"diagnostic", no_argument, NULL, 'd'},80{"ignore", no_argument, NULL, 'i'},81{"long", no_argument, NULL, 'l'},82{"reverse", no_argument, NULL, 'r'},83{"translit", no_argument, NULL, 't'},84{NULL, no_argument, NULL, 0}85};8687static void88usage(void) {8990fprintf(stderr, "Usage: %s [-cdilrt] ENCODING\n", __progname);91exit(EXIT_FAILURE);92}9394static void95format_diag(int errcode)96{97const char *errstr;98const char *u2m, *m2u, *m2w, *w2m;99100switch (errcode) {101case EINVAL:102errstr = "EINVAL ";103break;104case EILSEQ:105errstr = "EILSEQ ";106break;107case E2BIG:108errstr = "E2BIG ";109break;110default:111errstr = "UNKNOWN ";112break;113}114115u2m = (fb_flags & UC_TO_MB_FLAG) ? "U2M " : "";116m2w = (fb_flags & MB_TO_WC_FLAG) ? "M2W " : "";117m2u = (fb_flags & MB_TO_UC_FLAG) ? "M2U " : "";118w2m = (fb_flags & WC_TO_MB_FLAG) ? "W2M " : "";119120printf("%s%s%s%s%s", errstr, u2m, m2w, m2u, w2m);121}122123static int124magnitude(const uint32_t p)125{126127if (p >> 8 == 0)128return (1);129else if (p >> 16 == 0)130return (2);131else132return (p >> 24 == 0 ? 3 : 4);133}134135static void136format(const uint32_t data)137{138139/* XXX: could be simpler, something like this but with leading 0s?140141printf("0x%.*X", magnitude(data), data);142*/143144switch (magnitude(data)) {145default:146case 2:147printf("0x%04X", data);148break;149case 3:150printf("0x%06X", data);151break;152case 4:153printf("0x%08X", data);154break;155}156}157158void159uc_to_mb_fb(unsigned int code,160void (*write_replacement)(const char *buf, size_t buflen,161void* callback_arg), void* callback_arg, void* data)162{163164fb_flags |= UC_TO_MB_FLAG;165}166167void168mb_to_wc_fb(const char* inbuf, size_t inbufsize,169void (*write_replacement)(const wchar_t *buf, size_t buflen,170void* callback_arg), void* callback_arg, void* data)171{172173fb_flags |= MB_TO_WC_FLAG;174}175176void177mb_to_uc_fb(const char* inbuf, size_t inbufsize,178void (*write_replacement)(const unsigned int *buf, size_t buflen,179void* callback_arg), void* callback_arg, void* data)180{181182fb_flags |= MB_TO_UC_FLAG;183}184185void186wc_to_mb_fb(wchar_t wc,187void (*write_replacement)(const char *buf, size_t buflen,188void* callback_arg), void* callback_arg, void* data)189{190191fb_flags |= WC_TO_MB_FLAG;192}193194int195main (int argc, char *argv[])196{197struct iconv_fallbacks fbs;198iconv_t cd;199char *tocode;200int c;201202while (((c = getopt_long(argc, argv, optstr, long_options, NULL)) != -1)) {203switch (c) {204case 'c':205cflag = true;206break;207case 'd':208dflag = true;209break;210case 'i':211iflag = true;212break;213case 'l':214lflag = true;215break;216case 'r':217rflag = true;218break;219case 't':220tflag = true;221break;222}223}224argc -= optind;225argv += optind;226227if (argc < 1)228usage();229230fbs.uc_to_mb_fallback = uc_to_mb_fb;231fbs.mb_to_wc_fallback = mb_to_wc_fb;232fbs.mb_to_uc_fallback = mb_to_uc_fb;233fbs.wc_to_mb_fallback = wc_to_mb_fb;234fbs.data = NULL;235236if (argc == 2) {237asprintf(&tocode, "%s%s%s", argv[1], tflag ? "//TRASNLIT" : "",238iflag ? "//IGNORE" : "");239240if ((cd = iconv_open(tocode, argv[0])) == (iconv_t)-1)241err(1, NULL);242if (dflag) {243if (iconvctl(cd, ICONV_SET_FALLBACKS, &fbs) != 0)244err(1, NULL);245}246do_conv(cd, false);247} else if (rflag) {248asprintf(&tocode, "%s%s%s", argv[0], tflag ? "//TRANSLIT" : "",249iflag ? "//IGNORE" : "");250251if ((cd = iconv_open(tocode, "UTF-32LE")) == (iconv_t)-1)252err(1, NULL);253if (dflag && iconvctl(cd, ICONV_SET_FALLBACKS, &fbs) != 0)254err(1, NULL);255if (cflag) {256printf("TYPE\t\tROWCOL\n");257printf("NAME\t\tUCS/%s\n", argv[0]);258printf("%s", citrus_common);259}260do_conv(cd, true);261} else {262if ((cd = iconv_open("UTF-32LE//TRANSLIT", argv[0])) == (iconv_t)-1)263err(1, NULL);264if (dflag && (iconvctl(cd, ICONV_SET_FALLBACKS, &fbs) != 0))265err(1, NULL);266if (cflag) {267printf("TYPE\t\tROWCOL\n");268printf("NAME\t\t%s/UCS\n", argv[0]);269printf("%s", citrus_common);270}271do_conv(cd, false);272}273274if (iconv_close(cd) != 0)275err(1, NULL);276277return (EXIT_SUCCESS);278}279280static void281do_conv(iconv_t cd, bool uniinput) {282size_t inbytesleft, outbytesleft, ret;283uint32_t outbuf;284uint32_t inbuf;285char *inbuf_;286char *outbuf_;287288for (inbuf = 0; inbuf < (lflag ? 0x100000 : 0x10000); inbuf += 1) {289if (uniinput && (inbuf >= 0xD800) && (inbuf <= 0xDF00))290continue;291inbytesleft = uniinput ? 4 : magnitude(inbuf);292outbytesleft = 4;293outbuf = 0x00000000;294outbuf_ = (char *)&outbuf;295inbuf_ = (char *)&inbuf;296iconv(cd, NULL, NULL, NULL, NULL);297fb_flags = 0;298errno = 0;299ret = iconv(cd, &inbuf_, &inbytesleft, &outbuf_, &outbytesleft);300if (ret == (size_t)-1) {301if (dflag) {302format(inbuf);303printf(" = ");304format_diag(errno);305printf("\n");306}307continue;308}309format(inbuf);310printf(" = ");311format(outbuf);312printf("\n");313}314if (cflag)315printf("END_MAP\n");316}317318319