Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
tpruvot
GitHub Repository: tpruvot/cpuminer-multi
Path: blob/linux/compat/jansson/utf.c
1201 views
1
/*
2
* Copyright (c) 2009-2013 Petri Lehtinen <[email protected]>
3
*
4
* Jansson is free software; you can redistribute it and/or modify
5
* it under the terms of the MIT license. See LICENSE for details.
6
*/
7
8
#include <string.h>
9
#include "utf.h"
10
11
int utf8_encode(int32_t codepoint, char *buffer, int *size)
12
{
13
if(codepoint < 0)
14
return -1;
15
else if(codepoint < 0x80)
16
{
17
buffer[0] = (char)codepoint;
18
*size = 1;
19
}
20
else if(codepoint < 0x800)
21
{
22
buffer[0] = 0xC0 + ((codepoint & 0x7C0) >> 6);
23
buffer[1] = 0x80 + ((codepoint & 0x03F));
24
*size = 2;
25
}
26
else if(codepoint < 0x10000)
27
{
28
buffer[0] = 0xE0 + ((codepoint & 0xF000) >> 12);
29
buffer[1] = 0x80 + ((codepoint & 0x0FC0) >> 6);
30
buffer[2] = 0x80 + ((codepoint & 0x003F));
31
*size = 3;
32
}
33
else if(codepoint <= 0x10FFFF)
34
{
35
buffer[0] = 0xF0 + ((codepoint & 0x1C0000) >> 18);
36
buffer[1] = 0x80 + ((codepoint & 0x03F000) >> 12);
37
buffer[2] = 0x80 + ((codepoint & 0x000FC0) >> 6);
38
buffer[3] = 0x80 + ((codepoint & 0x00003F));
39
*size = 4;
40
}
41
else
42
return -1;
43
44
return 0;
45
}
46
47
int utf8_check_first(char byte)
48
{
49
unsigned char u = (unsigned char)byte;
50
51
if(u < 0x80)
52
return 1;
53
54
if(0x80 <= u && u <= 0xBF) {
55
/* second, third or fourth byte of a multi-byte
56
sequence, i.e. a "continuation byte" */
57
return 0;
58
}
59
else if(u == 0xC0 || u == 0xC1) {
60
/* overlong encoding of an ASCII byte */
61
return 0;
62
}
63
else if(0xC2 <= u && u <= 0xDF) {
64
/* 2-byte sequence */
65
return 2;
66
}
67
68
else if(0xE0 <= u && u <= 0xEF) {
69
/* 3-byte sequence */
70
return 3;
71
}
72
else if(0xF0 <= u && u <= 0xF4) {
73
/* 4-byte sequence */
74
return 4;
75
}
76
else { /* u >= 0xF5 */
77
/* Restricted (start of 4-, 5- or 6-byte sequence) or invalid
78
UTF-8 */
79
return 0;
80
}
81
}
82
83
int utf8_check_full(const char *buffer, int size, int32_t *codepoint)
84
{
85
int i;
86
int32_t value = 0;
87
unsigned char u = (unsigned char)buffer[0];
88
89
if(size == 2)
90
{
91
value = u & 0x1F;
92
}
93
else if(size == 3)
94
{
95
value = u & 0xF;
96
}
97
else if(size == 4)
98
{
99
value = u & 0x7;
100
}
101
else
102
return 0;
103
104
for(i = 1; i < size; i++)
105
{
106
u = (unsigned char)buffer[i];
107
108
if(u < 0x80 || u > 0xBF) {
109
/* not a continuation byte */
110
return 0;
111
}
112
113
value = (value << 6) + (u & 0x3F);
114
}
115
116
if(value > 0x10FFFF) {
117
/* not in Unicode range */
118
return 0;
119
}
120
121
else if(0xD800 <= value && value <= 0xDFFF) {
122
/* invalid code point (UTF-16 surrogate halves) */
123
return 0;
124
}
125
126
else if((size == 2 && value < 0x80) ||
127
(size == 3 && value < 0x800) ||
128
(size == 4 && value < 0x10000)) {
129
/* overlong encoding */
130
return 0;
131
}
132
133
if(codepoint)
134
*codepoint = value;
135
136
return 1;
137
}
138
139
const char *utf8_iterate(const char *buffer, int32_t *codepoint)
140
{
141
int count;
142
int32_t value;
143
144
if(!*buffer)
145
return buffer;
146
147
count = utf8_check_first(buffer[0]);
148
if(count <= 0)
149
return NULL;
150
151
if(count == 1)
152
value = (unsigned char)buffer[0];
153
else
154
{
155
if(!utf8_check_full(buffer, count, &value))
156
return NULL;
157
}
158
159
if(codepoint)
160
*codepoint = value;
161
162
return buffer + count;
163
}
164
165
int utf8_check_string(const char *string, int length)
166
{
167
int i;
168
169
if(length == -1)
170
length = strlen(string);
171
172
for(i = 0; i < length; i++)
173
{
174
int count = utf8_check_first(string[i]);
175
if(count == 0)
176
return 0;
177
else if(count > 1)
178
{
179
if(i + count > length)
180
return 0;
181
182
if(!utf8_check_full(&string[i], count, NULL))
183
return 0;
184
185
i += count - 1;
186
}
187
}
188
189
return 1;
190
}
191
192