Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download
7641 views
1
/*
2
* The authors of this software are Rob Pike and Ken Thompson.
3
* Copyright (c) 2002 by Lucent Technologies.
4
* Permission to use, copy, modify, and distribute this software for any
5
* purpose without fee is hereby granted, provided that this entire notice
6
* is included in all copies of any software which is or includes a copy
7
* or modification of this software and in all copies of the supporting
8
* documentation for such software.
9
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
10
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
11
* ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
12
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
13
*/
14
#include <stdlib.h>
15
#include <string.h>
16
17
#include "utf.h"
18
19
typedef unsigned char uchar;
20
21
enum
22
{
23
Bit1 = 7,
24
Bitx = 6,
25
Bit2 = 5,
26
Bit3 = 4,
27
Bit4 = 3,
28
29
T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */
30
Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */
31
T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */
32
T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */
33
T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
34
35
Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */
36
Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */
37
Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */
38
39
Maskx = (1<<Bitx)-1, /* 0011 1111 */
40
Testx = Maskx ^ 0xFF, /* 1100 0000 */
41
42
Bad = Runeerror,
43
};
44
45
unsigned int
46
chartorune(Rune *rune, const char *str)
47
{
48
int c, c1, c2;
49
int l;
50
51
/*
52
* one character sequence
53
* 00000-0007F => T1
54
*/
55
c = *(uchar*)str;
56
if(c < Tx) {
57
*rune = c;
58
return 1;
59
}
60
61
/*
62
* two character sequence
63
* 0080-07FF => T2 Tx
64
*/
65
c1 = *(uchar*)(str+1) ^ Tx;
66
if(c1 & Testx)
67
goto bad;
68
if(c < T3) {
69
if(c < T2)
70
goto bad;
71
l = ((c << Bitx) | c1) & Rune2;
72
if(l <= Rune1)
73
goto bad;
74
*rune = l;
75
return 2;
76
}
77
78
/*
79
* three character sequence
80
* 0800-FFFF => T3 Tx Tx
81
*/
82
c2 = *(uchar*)(str+2) ^ Tx;
83
if(c2 & Testx)
84
goto bad;
85
if(c < T4) {
86
l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
87
if(l <= Rune2)
88
goto bad;
89
*rune = l;
90
return 3;
91
}
92
93
/*
94
* bad decoding
95
*/
96
bad:
97
*rune = Bad;
98
return 1;
99
}
100
101
unsigned int
102
runetochar(char *str, const Rune *rune)
103
{
104
unsigned int c;
105
106
/*
107
* one character sequence
108
* 00000-0007F => 00-7F
109
*/
110
c = *rune;
111
if(c <= Rune1) {
112
str[0] = c;
113
return 1;
114
}
115
116
/*
117
* two character sequence
118
* 0080-07FF => T2 Tx
119
*/
120
if(c <= Rune2) {
121
str[0] = T2 | (c >> 1*Bitx);
122
str[1] = Tx | (c & Maskx);
123
return 2;
124
}
125
126
/*
127
* three character sequence
128
* 0800-FFFF => T3 Tx Tx
129
*/
130
str[0] = T3 | (c >> 2*Bitx);
131
str[1] = Tx | ((c >> 1*Bitx) & Maskx);
132
str[2] = Tx | (c & Maskx);
133
return 3;
134
}
135
136
unsigned int
137
runelen(int c)
138
{
139
Rune rune;
140
char str[10];
141
142
rune = c;
143
return runetochar(str, &rune);
144
}
145
146
unsigned int
147
utflen(const char *s)
148
{
149
unsigned int c;
150
unsigned int n;
151
Rune rune;
152
153
n = 0;
154
for(;;) {
155
c = *(uchar*)s;
156
if(c < Runeself) {
157
if(c == 0)
158
return n;
159
s++;
160
} else
161
s += chartorune(&rune, s);
162
n++;
163
}
164
}
165
166