Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
alexbevi
GitHub Repository: alexbevi/BizHawk
Path: blob/master/waterbox/libc/internals/_PDCLIB_encoding.h
2 views
1
/* Encoding support <_PDCLIB_encoding.h>
2
3
This file is part of the Public Domain C Library (PDCLib).
4
Permission is granted to use, modify, and / or redistribute at will.
5
*/
6
7
#ifndef __PDCLIB_ENCODING_H
8
#define __PDCLIB_ENCODING_H __PDCLIB_ENCODING_H
9
10
#include <uchar.h>
11
12
/* Must be cauued with bufsize >= 1, in != NULL, out != NULL, ps != NULL
13
*
14
* Converts a UTF-16 (char16_t) to a UCS4 (char32_t) value. Returns
15
* 1, 2 : Valid character (converted to UCS-4)
16
* -1 : Encoding error
17
* -2 : Partial character (only lead surrogate in buffer)
18
*/
19
static inline int _PDCLIB_c16rtoc32(
20
_PDCLIB_char32_t *_PDCLIB_restrict out,
21
const _PDCLIB_char16_t *_PDCLIB_restrict in,
22
_PDCLIB_size_t bufsize,
23
_PDCLIB_mbstate_t *_PDCLIB_restrict ps
24
)
25
{
26
if(ps->_Surrogate) {
27
// We already have a lead surrogate
28
if((*in & ~0x3FF) != 0xDC00) {
29
// Encoding error
30
return -1;
31
} else {
32
// Decode and reset state
33
*out = (ps->_Surrogate & 0x3FF) << 10 | (*in & 0x3FF);
34
ps->_Surrogate = 0;
35
return 1;
36
}
37
} if((*in & ~0x3FF) == 0xD800) {
38
// Lead surrogate
39
if(bufsize >= 2) {
40
// Buffer big enough
41
if((in[1] & ~0x3FF) != 0xDC00) {
42
// Encoding error
43
return -1;
44
} else {
45
*out = (in[0] & 0x3FF) << 10 | (in[1] & 0x3FF);
46
return 2;
47
}
48
} else {
49
// Buffer too small - update state
50
ps->_Surrogate = *in;
51
return -2;
52
}
53
} else {
54
// BMP character
55
*out = *in;
56
return 1;
57
}
58
}
59
60
static inline _PDCLIB_size_t _PDCLIB_c32rtoc16(
61
_PDCLIB_wchar_t *_PDCLIB_restrict out,
62
const _PDCLIB_char32_t *_PDCLIB_restrict in,
63
_PDCLIB_size_t bufsize,
64
_PDCLIB_mbstate_t *_PDCLIB_restrict ps
65
)
66
{
67
if(ps->_Surrogate) {
68
*out = ps->_Surrogate;
69
ps->_Surrogate = 0;
70
return 0;
71
}
72
73
if(*in <= 0xFFFF) {
74
// BMP character
75
*out = *in;
76
return 1;
77
} else {
78
// Supplementary plane character
79
*out = 0xD800 | (*in >> 10);
80
if(bufsize >= 2) {
81
out[1] = 0xDC00 | (*in & 0x3FF);
82
return 2;
83
} else {
84
ps->_Surrogate = 0xDC00 | (*in & 0x3FF);
85
return 1;
86
}
87
}
88
}
89
90
struct _PDCLIB_charcodec_t {
91
/* Reads at most *_P_insz code units from *_P_inbuf and writes the result
92
* into *_P_outbuf, writing at most *_P_outsz code units. Updates
93
* *_P_outbuf, *_P_outsz, *_P_inbuf, *_P_outsz with the resulting state
94
*
95
* If _P_outbuf is NULL, then the input must be processed but no output
96
* generated. _P_outsz may be processed as normal.
97
*
98
* Returns true if the conversion completed successfully (i.e. one of
99
* _P_outsize or _P_insize reached zero and no coding errors were
100
* encountered), else return false.
101
*/
102
103
/* mbsinit. Mandatory. */
104
_PDCLIB_bool (*__mbsinit)(const _PDCLIB_mbstate_t *_P_ps);
105
106
/* UCS-4 variants. Mandatory. */
107
108
_PDCLIB_bool (*__mbstoc32s)(
109
_PDCLIB_char32_t *_PDCLIB_restrict *_PDCLIB_restrict _P_outbuf,
110
_PDCLIB_size_t *_PDCLIB_restrict _P_outsz,
111
const char *_PDCLIB_restrict *_PDCLIB_restrict _P_inbuf,
112
_PDCLIB_size_t *_PDCLIB_restrict _P_insz,
113
_PDCLIB_mbstate_t *_PDCLIB_restrict _P_ps
114
);
115
116
_PDCLIB_bool (*__c32stombs)(
117
char *_PDCLIB_restrict *_PDCLIB_restrict _P_outbuf,
118
_PDCLIB_size_t *_PDCLIB_restrict _P_outsz,
119
const _PDCLIB_char32_t *_PDCLIB_restrict *_PDCLIB_restrict _P_inbuf,
120
_PDCLIB_size_t *_PDCLIB_restrict _P_insz,
121
_PDCLIB_mbstate_t *_PDCLIB_restrict _P_ps
122
);
123
124
/* UTF-16 variants; same as above except optional.
125
*
126
* If not provided, _PDCLib will internally synthesize on top of the UCS-4
127
* variants above, albeit at a performance cost.
128
*/
129
130
_PDCLIB_bool (*__mbstoc16s)(
131
_PDCLIB_char16_t *_PDCLIB_restrict *_PDCLIB_restrict _P_outbuf,
132
_PDCLIB_size_t *_PDCLIB_restrict _P_outsz,
133
const char *_PDCLIB_restrict *_PDCLIB_restrict _P_inbuf,
134
_PDCLIB_size_t *_PDCLIB_restrict _P_insz,
135
_PDCLIB_mbstate_t *_PDCLIB_restrict _P_ps
136
);
137
138
_PDCLIB_bool (*__c16stombs)(
139
char *_PDCLIB_restrict *_PDCLIB_restrict _P_outbuf,
140
_PDCLIB_size_t *_PDCLIB_restrict _P_outsz,
141
const _PDCLIB_char16_t *_PDCLIB_restrict *_PDCLIB_restrict _P_inbuf,
142
_PDCLIB_size_t *_PDCLIB_restrict _P_insz,
143
_PDCLIB_mbstate_t *_PDCLIB_restrict _P_ps
144
);
145
146
size_t __mb_max;
147
};
148
149
/* mbstate _PendState values */
150
enum {
151
/* Nothing pending; _PendChar ignored */
152
_PendClear = 0,
153
154
/* Process the character stored in _PendChar before reading the buffer
155
* passed for the conversion
156
*/
157
_PendPrefix = 1,
158
};
159
160
/* XXX Defining these here is temporary - will move to xlocale in future */
161
size_t mbrtoc16_l(
162
char16_t *_PDCLIB_restrict pc16,
163
const char *_PDCLIB_restrict s,
164
size_t n,
165
mbstate_t *_PDCLIB_restrict ps,
166
_PDCLIB_locale_t _PDCLIB_restrict l);
167
168
size_t c16rtomb_l(
169
char *_PDCLIB_restrict s,
170
char16_t c16,
171
mbstate_t *_PDCLIB_restrict ps,
172
_PDCLIB_locale_t _PDCLIB_restrict l);
173
174
size_t mbrtoc32_l(
175
char32_t *_PDCLIB_restrict pc32,
176
const char *_PDCLIB_restrict s,
177
size_t n,
178
mbstate_t *_PDCLIB_restrict ps,
179
_PDCLIB_locale_t _PDCLIB_restrict l);
180
181
size_t c32rtomb_l(
182
char *_PDCLIB_restrict s,
183
char32_t c32,
184
mbstate_t *_PDCLIB_restrict ps,
185
_PDCLIB_locale_t _PDCLIB_restrict l);
186
187
#define _PDCLIB_WCHAR_ENCODING_UTF16 16
188
#define _PDCLIB_WCHAR_ENCODING_UCS4 32
189
190
#if !defined(_PDCLIB_WCHAR_ENCODING)
191
#define _PDCLIB_WCHAR_ENCODING 0
192
#endif
193
194
#if _PDCLIB_WCHAR_ENCODING == _PDCLIB_WCHAR_ENCODING_UTF16
195
#define _PDCLIB_mbrtocwc_l mbrtoc16_l
196
#define _PDCLIB_mbrtocwc mbrtoc16
197
#define _PDCLIB_cwcrtomb_l c16rtomb_l
198
#define _PDCLIB_cwcrtomb c16rtomb
199
#elif _PDCLIB_WCHAR_ENCODING == _PDCLIB_WCHAR_ENCODING_UCS4
200
#define _PDCLIB_mbrtocwc_l mbrtoc32_l
201
#define _PDCLIB_mbrtocwc mbrtoc32
202
#define _PDCLIB_cwcrtomb_l c32rtomb_l
203
#define _PDCLIB_cwcrtomb c32rtomb
204
#else
205
#error _PDCLIB_WCHAR_ENCODING not defined correctly
206
#error Define to one of _PDCLIB_WCHAR_ENCODING_UCS4 or _PDCLIB_WCHAR_ENCODING_UTF16
207
#endif
208
209
#endif
210
211