Path: blob/master/libs/mpg123/src/libmpg123/icy2utf8.c
4394 views
/* mpg123 note: This is BSD-licensed code that is no problem for mpg123 usage under LGPL.1It's Free, understood? ;-) */23/* Another note: This code is basically written by Thorsten Glaser,4Thomas Orgis did just some rearrangements and comments. */56/*-7* Copyright (c) 20088* Thorsten Glaser <[email protected]>9*10* Provided that these terms and disclaimer and all copyright notices11* are retained or reproduced in an accompanying document, permission12* is granted to deal in this work without restriction, including un-13* limited rights to use, publicly perform, distribute, sell, modify,14* merge, give away, or sublicence.15*16* This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to17* the utmost extent permitted by applicable law, neither express nor18* implied; without malicious intent or gross negligence. In no event19* may a licensor, author or contributor be held liable for indirect,20* direct, other damage, loss, or other issues arising in any way out21* of dealing in the work, even if advised of the possibility of such22* damage or existence of a defect, except proven that it results out23* of said person's immediate fault when using the work as intended.24*-25* Convert from ICY encoding (windows-1252 codepage) to UTF-826*/2728#include "config.h"29/* Includes string and stdlib headers... */30#include "../compat/compat.h"3132/* ThOr: too lazy for this type check; also we use char/short all around anyway.33Of cource, it would be the proper way to use _these_ kind of types all around. */34#define uint8_t unsigned char35#define uint16_t unsigned short3637static const uint8_t cp1252_utf8[] = {38/* 0x00 @ 0 */ 0x00,39/* 0x01 @ 1 */ 0x01,40/* 0x02 @ 2 */ 0x02,41/* 0x03 @ 3 */ 0x03,42/* 0x04 @ 4 */ 0x04,43/* 0x05 @ 5 */ 0x05,44/* 0x06 @ 6 */ 0x06,45/* 0x07 @ 7 */ 0x07,46/* 0x08 @ 8 */ 0x08,47/* 0x09 @ 9 */ 0x09,48/* 0x0A @ 10 */ 0x0A,49/* 0x0B @ 11 */ 0x0B,50/* 0x0C @ 12 */ 0x0C,51/* 0x0D @ 13 */ 0x0D,52/* 0x0E @ 14 */ 0x0E,53/* 0x0F @ 15 */ 0x0F,54/* 0x10 @ 16 */ 0x10,55/* 0x11 @ 17 */ 0x11,56/* 0x12 @ 18 */ 0x12,57/* 0x13 @ 19 */ 0x13,58/* 0x14 @ 20 */ 0x14,59/* 0x15 @ 21 */ 0x15,60/* 0x16 @ 22 */ 0x16,61/* 0x17 @ 23 */ 0x17,62/* 0x18 @ 24 */ 0x18,63/* 0x19 @ 25 */ 0x19,64/* 0x1A @ 26 */ 0x1A,65/* 0x1B @ 27 */ 0x1B,66/* 0x1C @ 28 */ 0x1C,67/* 0x1D @ 29 */ 0x1D,68/* 0x1E @ 30 */ 0x1E,69/* 0x1F @ 31 */ 0x1F,70/* 0x20 @ 32 */ 0x20,71/* 0x21 @ 33 */ 0x21,72/* 0x22 @ 34 */ 0x22,73/* 0x23 @ 35 */ 0x23,74/* 0x24 @ 36 */ 0x24,75/* 0x25 @ 37 */ 0x25,76/* 0x26 @ 38 */ 0x26,77/* 0x27 @ 39 */ 0x27,78/* 0x28 @ 40 */ 0x28,79/* 0x29 @ 41 */ 0x29,80/* 0x2A @ 42 */ 0x2A,81/* 0x2B @ 43 */ 0x2B,82/* 0x2C @ 44 */ 0x2C,83/* 0x2D @ 45 */ 0x2D,84/* 0x2E @ 46 */ 0x2E,85/* 0x2F @ 47 */ 0x2F,86/* 0x30 @ 48 */ 0x30,87/* 0x31 @ 49 */ 0x31,88/* 0x32 @ 50 */ 0x32,89/* 0x33 @ 51 */ 0x33,90/* 0x34 @ 52 */ 0x34,91/* 0x35 @ 53 */ 0x35,92/* 0x36 @ 54 */ 0x36,93/* 0x37 @ 55 */ 0x37,94/* 0x38 @ 56 */ 0x38,95/* 0x39 @ 57 */ 0x39,96/* 0x3A @ 58 */ 0x3A,97/* 0x3B @ 59 */ 0x3B,98/* 0x3C @ 60 */ 0x3C,99/* 0x3D @ 61 */ 0x3D,100/* 0x3E @ 62 */ 0x3E,101/* 0x3F @ 63 */ 0x3F,102/* 0x40 @ 64 */ 0x40,103/* 0x41 @ 65 */ 0x41,104/* 0x42 @ 66 */ 0x42,105/* 0x43 @ 67 */ 0x43,106/* 0x44 @ 68 */ 0x44,107/* 0x45 @ 69 */ 0x45,108/* 0x46 @ 70 */ 0x46,109/* 0x47 @ 71 */ 0x47,110/* 0x48 @ 72 */ 0x48,111/* 0x49 @ 73 */ 0x49,112/* 0x4A @ 74 */ 0x4A,113/* 0x4B @ 75 */ 0x4B,114/* 0x4C @ 76 */ 0x4C,115/* 0x4D @ 77 */ 0x4D,116/* 0x4E @ 78 */ 0x4E,117/* 0x4F @ 79 */ 0x4F,118/* 0x50 @ 80 */ 0x50,119/* 0x51 @ 81 */ 0x51,120/* 0x52 @ 82 */ 0x52,121/* 0x53 @ 83 */ 0x53,122/* 0x54 @ 84 */ 0x54,123/* 0x55 @ 85 */ 0x55,124/* 0x56 @ 86 */ 0x56,125/* 0x57 @ 87 */ 0x57,126/* 0x58 @ 88 */ 0x58,127/* 0x59 @ 89 */ 0x59,128/* 0x5A @ 90 */ 0x5A,129/* 0x5B @ 91 */ 0x5B,130/* 0x5C @ 92 */ 0x5C,131/* 0x5D @ 93 */ 0x5D,132/* 0x5E @ 94 */ 0x5E,133/* 0x5F @ 95 */ 0x5F,134/* 0x60 @ 96 */ 0x60,135/* 0x61 @ 97 */ 0x61,136/* 0x62 @ 98 */ 0x62,137/* 0x63 @ 99 */ 0x63,138/* 0x64 @ 100 */ 0x64,139/* 0x65 @ 101 */ 0x65,140/* 0x66 @ 102 */ 0x66,141/* 0x67 @ 103 */ 0x67,142/* 0x68 @ 104 */ 0x68,143/* 0x69 @ 105 */ 0x69,144/* 0x6A @ 106 */ 0x6A,145/* 0x6B @ 107 */ 0x6B,146/* 0x6C @ 108 */ 0x6C,147/* 0x6D @ 109 */ 0x6D,148/* 0x6E @ 110 */ 0x6E,149/* 0x6F @ 111 */ 0x6F,150/* 0x70 @ 112 */ 0x70,151/* 0x71 @ 113 */ 0x71,152/* 0x72 @ 114 */ 0x72,153/* 0x73 @ 115 */ 0x73,154/* 0x74 @ 116 */ 0x74,155/* 0x75 @ 117 */ 0x75,156/* 0x76 @ 118 */ 0x76,157/* 0x77 @ 119 */ 0x77,158/* 0x78 @ 120 */ 0x78,159/* 0x79 @ 121 */ 0x79,160/* 0x7A @ 122 */ 0x7A,161/* 0x7B @ 123 */ 0x7B,162/* 0x7C @ 124 */ 0x7C,163/* 0x7D @ 125 */ 0x7D,164/* 0x7E @ 126 */ 0x7E,165/* 0x7F @ 127 */ 0x7F,166/* 0x80 @ 128 */ 0xE2, 0x82, 0xAC,167/* 0x81 @ 131 */ 0xEF, 0xBF, 0xBD,168/* 0x82 @ 134 */ 0xE2, 0x80, 0x9A,169/* 0x83 @ 137 */ 0xC6, 0x92,170/* 0x84 @ 139 */ 0xE2, 0x80, 0x9E,171/* 0x85 @ 142 */ 0xE2, 0x80, 0xA6,172/* 0x86 @ 145 */ 0xE2, 0x80, 0xA0,173/* 0x87 @ 148 */ 0xE2, 0x80, 0xA1,174/* 0x88 @ 151 */ 0xCB, 0x86,175/* 0x89 @ 153 */ 0xE2, 0x80, 0xB0,176/* 0x8A @ 156 */ 0xC5, 0xA0,177/* 0x8B @ 158 */ 0xE2, 0x80, 0xB9,178/* 0x8C @ 161 */ 0xC5, 0x92,179/* 0x8D @ 163 */ 0xEF, 0xBF, 0xBD,180/* 0x8E @ 166 */ 0xC5, 0xBD,181/* 0x8F @ 168 */ 0xEF, 0xBF, 0xBD,182/* 0x90 @ 171 */ 0xEF, 0xBF, 0xBD,183/* 0x91 @ 174 */ 0xE2, 0x80, 0x98,184/* 0x92 @ 177 */ 0xE2, 0x80, 0x99,185/* 0x93 @ 180 */ 0xE2, 0x80, 0x9C,186/* 0x94 @ 183 */ 0xE2, 0x80, 0x9D,187/* 0x95 @ 186 */ 0xE2, 0x80, 0xA2,188/* 0x96 @ 189 */ 0xE2, 0x80, 0x93,189/* 0x97 @ 192 */ 0xE2, 0x80, 0x94,190/* 0x98 @ 195 */ 0xCB, 0x9C,191/* 0x99 @ 197 */ 0xE2, 0x84, 0xA2,192/* 0x9A @ 200 */ 0xC5, 0xA1,193/* 0x9B @ 202 */ 0xE2, 0x80, 0xBA,194/* 0x9C @ 205 */ 0xC5, 0x93,195/* 0x9D @ 207 */ 0xEF, 0xBF, 0xBD,196/* 0x9E @ 210 */ 0xC5, 0xBE,197/* 0x9F @ 212 */ 0xC5, 0xB8,198/* 0xA0 @ 214 */ 0xC2, 0xA0,199/* 0xA1 @ 216 */ 0xC2, 0xA1,200/* 0xA2 @ 218 */ 0xC2, 0xA2,201/* 0xA3 @ 220 */ 0xC2, 0xA3,202/* 0xA4 @ 222 */ 0xC2, 0xA4,203/* 0xA5 @ 224 */ 0xC2, 0xA5,204/* 0xA6 @ 226 */ 0xC2, 0xA6,205/* 0xA7 @ 228 */ 0xC2, 0xA7,206/* 0xA8 @ 230 */ 0xC2, 0xA8,207/* 0xA9 @ 232 */ 0xC2, 0xA9,208/* 0xAA @ 234 */ 0xC2, 0xAA,209/* 0xAB @ 236 */ 0xC2, 0xAB,210/* 0xAC @ 238 */ 0xC2, 0xAC,211/* 0xAD @ 240 */ 0xC2, 0xAD,212/* 0xAE @ 242 */ 0xC2, 0xAE,213/* 0xAF @ 244 */ 0xC2, 0xAF,214/* 0xB0 @ 246 */ 0xC2, 0xB0,215/* 0xB1 @ 248 */ 0xC2, 0xB1,216/* 0xB2 @ 250 */ 0xC2, 0xB2,217/* 0xB3 @ 252 */ 0xC2, 0xB3,218/* 0xB4 @ 254 */ 0xC2, 0xB4,219/* 0xB5 @ 256 */ 0xC2, 0xB5,220/* 0xB6 @ 258 */ 0xC2, 0xB6,221/* 0xB7 @ 260 */ 0xC2, 0xB7,222/* 0xB8 @ 262 */ 0xC2, 0xB8,223/* 0xB9 @ 264 */ 0xC2, 0xB9,224/* 0xBA @ 266 */ 0xC2, 0xBA,225/* 0xBB @ 268 */ 0xC2, 0xBB,226/* 0xBC @ 270 */ 0xC2, 0xBC,227/* 0xBD @ 272 */ 0xC2, 0xBD,228/* 0xBE @ 274 */ 0xC2, 0xBE,229/* 0xBF @ 276 */ 0xC2, 0xBF,230/* 0xC0 @ 278 */ 0xC3, 0x80,231/* 0xC1 @ 280 */ 0xC3, 0x81,232/* 0xC2 @ 282 */ 0xC3, 0x82,233/* 0xC3 @ 284 */ 0xC3, 0x83,234/* 0xC4 @ 286 */ 0xC3, 0x84,235/* 0xC5 @ 288 */ 0xC3, 0x85,236/* 0xC6 @ 290 */ 0xC3, 0x86,237/* 0xC7 @ 292 */ 0xC3, 0x87,238/* 0xC8 @ 294 */ 0xC3, 0x88,239/* 0xC9 @ 296 */ 0xC3, 0x89,240/* 0xCA @ 298 */ 0xC3, 0x8A,241/* 0xCB @ 300 */ 0xC3, 0x8B,242/* 0xCC @ 302 */ 0xC3, 0x8C,243/* 0xCD @ 304 */ 0xC3, 0x8D,244/* 0xCE @ 306 */ 0xC3, 0x8E,245/* 0xCF @ 308 */ 0xC3, 0x8F,246/* 0xD0 @ 310 */ 0xC3, 0x90,247/* 0xD1 @ 312 */ 0xC3, 0x91,248/* 0xD2 @ 314 */ 0xC3, 0x92,249/* 0xD3 @ 316 */ 0xC3, 0x93,250/* 0xD4 @ 318 */ 0xC3, 0x94,251/* 0xD5 @ 320 */ 0xC3, 0x95,252/* 0xD6 @ 322 */ 0xC3, 0x96,253/* 0xD7 @ 324 */ 0xC3, 0x97,254/* 0xD8 @ 326 */ 0xC3, 0x98,255/* 0xD9 @ 328 */ 0xC3, 0x99,256/* 0xDA @ 330 */ 0xC3, 0x9A,257/* 0xDB @ 332 */ 0xC3, 0x9B,258/* 0xDC @ 334 */ 0xC3, 0x9C,259/* 0xDD @ 336 */ 0xC3, 0x9D,260/* 0xDE @ 338 */ 0xC3, 0x9E,261/* 0xDF @ 340 */ 0xC3, 0x9F,262/* 0xE0 @ 342 */ 0xC3, 0xA0,263/* 0xE1 @ 344 */ 0xC3, 0xA1,264/* 0xE2 @ 346 */ 0xC3, 0xA2,265/* 0xE3 @ 348 */ 0xC3, 0xA3,266/* 0xE4 @ 350 */ 0xC3, 0xA4,267/* 0xE5 @ 352 */ 0xC3, 0xA5,268/* 0xE6 @ 354 */ 0xC3, 0xA6,269/* 0xE7 @ 356 */ 0xC3, 0xA7,270/* 0xE8 @ 358 */ 0xC3, 0xA8,271/* 0xE9 @ 360 */ 0xC3, 0xA9,272/* 0xEA @ 362 */ 0xC3, 0xAA,273/* 0xEB @ 364 */ 0xC3, 0xAB,274/* 0xEC @ 366 */ 0xC3, 0xAC,275/* 0xED @ 368 */ 0xC3, 0xAD,276/* 0xEE @ 370 */ 0xC3, 0xAE,277/* 0xEF @ 372 */ 0xC3, 0xAF,278/* 0xF0 @ 374 */ 0xC3, 0xB0,279/* 0xF1 @ 376 */ 0xC3, 0xB1,280/* 0xF2 @ 378 */ 0xC3, 0xB2,281/* 0xF3 @ 380 */ 0xC3, 0xB3,282/* 0xF4 @ 382 */ 0xC3, 0xB4,283/* 0xF5 @ 384 */ 0xC3, 0xB5,284/* 0xF6 @ 386 */ 0xC3, 0xB6,285/* 0xF7 @ 388 */ 0xC3, 0xB7,286/* 0xF8 @ 390 */ 0xC3, 0xB8,287/* 0xF9 @ 392 */ 0xC3, 0xB9,288/* 0xFA @ 394 */ 0xC3, 0xBA,289/* 0xFB @ 396 */ 0xC3, 0xBB,290/* 0xFC @ 398 */ 0xC3, 0xBC,291/* 0xFD @ 400 */ 0xC3, 0xBD,292/* 0xFE @ 402 */ 0xC3, 0xBE,293/* 0xFF @ 404 */ 0xC3, 0xBF,294};295296static const uint16_t tblofs[257] = {297/* 0x00 */ 0, 1, 2, 3, 4, 5, 6, 7,298/* 0x08 */ 8, 9, 10, 11, 12, 13, 14, 15,299/* 0x10 */ 16, 17, 18, 19, 20, 21, 22, 23,300/* 0x18 */ 24, 25, 26, 27, 28, 29, 30, 31,301/* 0x20 */ 32, 33, 34, 35, 36, 37, 38, 39,302/* 0x28 */ 40, 41, 42, 43, 44, 45, 46, 47,303/* 0x30 */ 48, 49, 50, 51, 52, 53, 54, 55,304/* 0x38 */ 56, 57, 58, 59, 60, 61, 62, 63,305/* 0x40 */ 64, 65, 66, 67, 68, 69, 70, 71,306/* 0x48 */ 72, 73, 74, 75, 76, 77, 78, 79,307/* 0x50 */ 80, 81, 82, 83, 84, 85, 86, 87,308/* 0x58 */ 88, 89, 90, 91, 92, 93, 94, 95,309/* 0x60 */ 96, 97, 98, 99, 100, 101, 102, 103,310/* 0x68 */ 104, 105, 106, 107, 108, 109, 110, 111,311/* 0x70 */ 112, 113, 114, 115, 116, 117, 118, 119,312/* 0x78 */ 120, 121, 122, 123, 124, 125, 126, 127,313/* 0x80 */ 128, 131, 134, 137, 139, 142, 145, 148,314/* 0x88 */ 151, 153, 156, 158, 161, 163, 166, 168,315/* 0x90 */ 171, 174, 177, 180, 183, 186, 189, 192,316/* 0x98 */ 195, 197, 200, 202, 205, 207, 210, 212,317/* 0xA0 */ 214, 216, 218, 220, 222, 224, 226, 228,318/* 0xA8 */ 230, 232, 234, 236, 238, 240, 242, 244,319/* 0xB0 */ 246, 248, 250, 252, 254, 256, 258, 260,320/* 0xB8 */ 262, 264, 266, 268, 270, 272, 274, 276,321/* 0xC0 */ 278, 280, 282, 284, 286, 288, 290, 292,322/* 0xC8 */ 294, 296, 298, 300, 302, 304, 306, 308,323/* 0xD0 */ 310, 312, 314, 316, 318, 320, 322, 324,324/* 0xD8 */ 326, 328, 330, 332, 334, 336, 338, 340,325/* 0xE0 */ 342, 344, 346, 348, 350, 352, 354, 356,326/* 0xE8 */ 358, 360, 362, 364, 366, 368, 370, 372,327/* 0xF0 */ 374, 376, 378, 380, 382, 384, 386, 388,328/* 0xF8 */ 390, 392, 394, 396, 398, 400, 402, 404,329/* sizeof (cp1252_utf8) */ 406330};331332/* Check if a string qualifies as UTF-8. */333static int334is_utf8(const char* src)335{336uint8_t ch;337size_t i;338const uint8_t* s = (const uint8_t*) src;339340/* We make a loop over every character, until we find a null one.341Remember: The string is supposed to end with a NUL, so ahead checks are safe. */342while ((ch = *s++)) {343/* Ye olde 7bit ASCII chars 'rr fine for anything */344if(ch < 0x80) continue;345346/* Now, we watch out for non-UTF conform sequences. */347else if ((ch < 0xC2) || (ch > 0xFD))348return 0;349/* check for some misformed sequences */350if (((ch == 0xC2) && (s[0] < 0xA0)) ||351((ch == 0xEF) && (s[0] == 0xBF) && (s[1] > 0xBD)))352/* XXX add more for outside the BMP */353return 0;354355/* Check the continuation bytes. */356if (ch < 0xE0) i = 1;357else if (ch < 0xF0) i = 2;358else if (ch < 0xF8) i = 3;359else if (ch < 0xFC) i = 4;360else361i = 5;362363while (i--)364if ((*s++ & 0xC0) != 0x80)365return 0;366}367368/* If no check failed, the string indeed looks like valid UTF-8. */369return 1;370}371372/* The main conversion routine.373ICY in CP-1252 (or UTF-8 alreay) to UTF-8 encoded string.374If force is applied, it will always encode to UTF-8, without checking. */375char *376INT123_icy2utf8(const char *src, int force)377{378const uint8_t *s = (const uint8_t *)src;379size_t srclen, dstlen, i, k;380uint8_t ch, *d;381char *dst;382383/* Some funny streams from Apple/iTunes give ICY info in UTF-8 already.384So, be prepared and don't try to re-encode such. Unless forced. */385if(!force && is_utf8(src)) return (INT123_compat_strdup(src));386387srclen = strlen(src) + 1;388/* allocate conservatively */389if ((d = malloc(srclen * 3)) == NULL)390return (NULL);391392i = 0;393dstlen = 0;394while (i < srclen) {395ch = s[i++];396k = tblofs[ch];397while (k < tblofs[ch + 1])398d[dstlen++] = cp1252_utf8[k++];399}400401/* dstlen includes trailing NUL since srclen also does */402if ((dst = realloc(d, dstlen)) == NULL) {403free(d);404return (NULL);405}406return (dst);407}408409/* This stuff is for testing only. */410#ifdef TEST411static const char intext[] = "\225 Gr\374\337e kosten 0,55 \200\205";412413#include <stdio.h>414415int416main(void)417{418char *t, *t2;419420if ((t = INT123_icy2utf8(intext, 0)) == NULL) {421fprintf(stderr, "out of memory\n");422return (1);423}424425/* make sure it won't be converted twice */426if ((t2 = INT123_icy2utf8(t), 0) == NULL) {427fprintf(stderr, "out of memory\n");428return (1);429}430431printf("Result is:\t\343\200\214%s\343\200\215\n"432"\t\t\343\200\214%s\343\200\215\n", t, t2);433434free(t);435free(t2);436return (0);437}438#endif439440441