/*1* The authors of this software are Rob Pike and Ken Thompson.2* Copyright (c) 2002 by Lucent Technologies.3* Permission to use, copy, modify, and distribute this software for any4* purpose without fee is hereby granted, provided that this entire notice5* is included in all copies of any software which is or includes a copy6* or modification of this software and in all copies of the supporting7* documentation for such software.8* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED9* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE10* ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY11* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.12*/13#include <stdlib.h>14#include <string.h>1516#include "utf.h"1718typedef unsigned char uchar;1920enum21{22Bit1 = 7,23Bitx = 6,24Bit2 = 5,25Bit3 = 4,26Bit4 = 3,2728T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */29Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */30T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */31T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */32T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */3334Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */35Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */36Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */3738Maskx = (1<<Bitx)-1, /* 0011 1111 */39Testx = Maskx ^ 0xFF, /* 1100 0000 */4041Bad = Runeerror,42};4344unsigned int45chartorune(Rune *rune, const char *str)46{47int c, c1, c2;48int l;4950/*51* one character sequence52* 00000-0007F => T153*/54c = *(uchar*)str;55if(c < Tx) {56*rune = c;57return 1;58}5960/*61* two character sequence62* 0080-07FF => T2 Tx63*/64c1 = *(uchar*)(str+1) ^ Tx;65if(c1 & Testx)66goto bad;67if(c < T3) {68if(c < T2)69goto bad;70l = ((c << Bitx) | c1) & Rune2;71if(l <= Rune1)72goto bad;73*rune = l;74return 2;75}7677/*78* three character sequence79* 0800-FFFF => T3 Tx Tx80*/81c2 = *(uchar*)(str+2) ^ Tx;82if(c2 & Testx)83goto bad;84if(c < T4) {85l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;86if(l <= Rune2)87goto bad;88*rune = l;89return 3;90}9192/*93* bad decoding94*/95bad:96*rune = Bad;97return 1;98}99100unsigned int101runetochar(char *str, const Rune *rune)102{103unsigned int c;104105/*106* one character sequence107* 00000-0007F => 00-7F108*/109c = *rune;110if(c <= Rune1) {111str[0] = c;112return 1;113}114115/*116* two character sequence117* 0080-07FF => T2 Tx118*/119if(c <= Rune2) {120str[0] = T2 | (c >> 1*Bitx);121str[1] = Tx | (c & Maskx);122return 2;123}124125/*126* three character sequence127* 0800-FFFF => T3 Tx Tx128*/129str[0] = T3 | (c >> 2*Bitx);130str[1] = Tx | ((c >> 1*Bitx) & Maskx);131str[2] = Tx | (c & Maskx);132return 3;133}134135unsigned int136runelen(int c)137{138Rune rune;139char str[10];140141rune = c;142return runetochar(str, &rune);143}144145unsigned int146utflen(const char *s)147{148unsigned int c;149unsigned int n;150Rune rune;151152n = 0;153for(;;) {154c = *(uchar*)s;155if(c < Runeself) {156if(c == 0)157return n;158s++;159} else160s += chartorune(&rune, s);161n++;162}163}164165166