Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/libedit/chartype.c
39475 views
1
/* $NetBSD: chartype.c,v 1.37 2023/08/10 20:38:00 mrg Exp $ */
2
3
/*-
4
* Copyright (c) 2009 The NetBSD Foundation, Inc.
5
* All rights reserved.
6
*
7
* Redistribution and use in source and binary forms, with or without
8
* modification, are permitted provided that the following conditions
9
* are met:
10
* 1. Redistributions of source code must retain the above copyright
11
* notice, this list of conditions and the following disclaimer.
12
* 2. Redistributions in binary form must reproduce the above copyright
13
* notice, this list of conditions and the following disclaimer in the
14
* documentation and/or other materials provided with the distribution.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26
* POSSIBILITY OF SUCH DAMAGE.
27
*/
28
29
/*
30
* chartype.c: character classification and meta information
31
*/
32
#include "config.h"
33
#if !defined(lint) && !defined(SCCSID)
34
__RCSID("$NetBSD: chartype.c,v 1.37 2023/08/10 20:38:00 mrg Exp $");
35
#endif /* not lint && not SCCSID */
36
37
#include <ctype.h>
38
#include <limits.h>
39
#include <stdlib.h>
40
#include <string.h>
41
42
#include "el.h"
43
44
#define CT_BUFSIZ ((size_t)1024)
45
46
static int ct_conv_cbuff_resize(ct_buffer_t *, size_t);
47
static int ct_conv_wbuff_resize(ct_buffer_t *, size_t);
48
49
static int
50
ct_conv_cbuff_resize(ct_buffer_t *conv, size_t csize)
51
{
52
void *p;
53
54
if (csize <= conv->csize)
55
return 0;
56
57
conv->csize = csize;
58
59
p = el_realloc(conv->cbuff, conv->csize * sizeof(*conv->cbuff));
60
if (p == NULL) {
61
conv->csize = 0;
62
el_free(conv->cbuff);
63
conv->cbuff = NULL;
64
return -1;
65
}
66
conv->cbuff = p;
67
return 0;
68
}
69
70
static int
71
ct_conv_wbuff_resize(ct_buffer_t *conv, size_t wsize)
72
{
73
void *p;
74
75
if (wsize <= conv->wsize)
76
return 0;
77
78
conv->wsize = wsize;
79
80
p = el_realloc(conv->wbuff, conv->wsize * sizeof(*conv->wbuff));
81
if (p == NULL) {
82
conv->wsize = 0;
83
el_free(conv->wbuff);
84
conv->wbuff = NULL;
85
return -1;
86
}
87
conv->wbuff = p;
88
return 0;
89
}
90
91
92
char *
93
ct_encode_string(const wchar_t *s, ct_buffer_t *conv)
94
{
95
char *dst;
96
ssize_t used;
97
98
if (!s)
99
return NULL;
100
101
dst = conv->cbuff;
102
for (;;) {
103
used = (ssize_t)(dst - conv->cbuff);
104
if ((conv->csize - (size_t)used) < 5) {
105
if (ct_conv_cbuff_resize(conv,
106
conv->csize + CT_BUFSIZ) == -1)
107
return NULL;
108
dst = conv->cbuff + used;
109
}
110
if (!*s)
111
break;
112
used = ct_encode_char(dst, (size_t)5, *s);
113
if (used == -1) /* failed to encode, need more buffer space */
114
abort();
115
++s;
116
dst += used;
117
}
118
*dst = '\0';
119
return conv->cbuff;
120
}
121
122
wchar_t *
123
ct_decode_string(const char *s, ct_buffer_t *conv)
124
{
125
size_t len;
126
127
if (!s)
128
return NULL;
129
130
len = mbstowcs(NULL, s, (size_t)0);
131
if (len == (size_t)-1)
132
return NULL;
133
134
if (conv->wsize < ++len)
135
if (ct_conv_wbuff_resize(conv, len + CT_BUFSIZ) == -1)
136
return NULL;
137
138
mbstowcs(conv->wbuff, s, conv->wsize);
139
return conv->wbuff;
140
}
141
142
143
libedit_private wchar_t **
144
ct_decode_argv(int argc, const char *argv[], ct_buffer_t *conv)
145
{
146
size_t bufspace;
147
int i;
148
wchar_t *p;
149
wchar_t **wargv;
150
ssize_t bytes;
151
152
/* Make sure we have enough space in the conversion buffer to store all
153
* the argv strings. */
154
for (i = 0, bufspace = 0; i < argc; ++i)
155
bufspace += argv[i] ? strlen(argv[i]) + 1 : 0;
156
if (conv->wsize < ++bufspace)
157
if (ct_conv_wbuff_resize(conv, bufspace + CT_BUFSIZ) == -1)
158
return NULL;
159
160
wargv = el_calloc((size_t)(argc + 1), sizeof(*wargv));
161
if (wargv == NULL)
162
return NULL;
163
164
for (i = 0, p = conv->wbuff; i < argc; ++i) {
165
if (!argv[i]) { /* don't pass null pointers to mbstowcs */
166
wargv[i] = NULL;
167
continue;
168
} else {
169
wargv[i] = p;
170
bytes = (ssize_t)mbstowcs(p, argv[i], bufspace);
171
}
172
if (bytes == -1) {
173
el_free(wargv);
174
return NULL;
175
} else
176
bytes++; /* include '\0' in the count */
177
bufspace -= (size_t)bytes;
178
p += bytes;
179
}
180
wargv[i] = NULL;
181
182
return wargv;
183
}
184
185
186
libedit_private size_t
187
ct_enc_width(wchar_t c)
188
{
189
mbstate_t mbs;
190
char buf[MB_LEN_MAX];
191
size_t size;
192
memset(&mbs, 0, sizeof(mbs));
193
194
if ((size = wcrtomb(buf, c, &mbs)) == (size_t)-1)
195
return 0;
196
return size;
197
}
198
199
libedit_private ssize_t
200
ct_encode_char(char *dst, size_t len, wchar_t c)
201
{
202
ssize_t l = 0;
203
if (len < ct_enc_width(c))
204
return -1;
205
l = wctomb(dst, c);
206
207
if (l < 0) {
208
wctomb(NULL, L'\0');
209
l = 0;
210
}
211
return l;
212
}
213
214
libedit_private const wchar_t *
215
ct_visual_string(const wchar_t *s, ct_buffer_t *conv)
216
{
217
wchar_t *dst;
218
ssize_t used;
219
220
if (!s)
221
return NULL;
222
223
if (ct_conv_wbuff_resize(conv, CT_BUFSIZ) == -1)
224
return NULL;
225
226
used = 0;
227
dst = conv->wbuff;
228
while (*s) {
229
used = ct_visual_char(dst,
230
conv->wsize - (size_t)(dst - conv->wbuff), *s);
231
if (used != -1) {
232
++s;
233
dst += used;
234
continue;
235
}
236
237
/* failed to encode, need more buffer space */
238
uintptr_t sused = (uintptr_t)dst - (uintptr_t)conv->wbuff;
239
if (ct_conv_wbuff_resize(conv, conv->wsize + CT_BUFSIZ) == -1)
240
return NULL;
241
dst = conv->wbuff + sused;
242
}
243
244
if (dst >= (conv->wbuff + conv->wsize)) { /* sigh */
245
uintptr_t sused = (uintptr_t)dst - (uintptr_t)conv->wbuff;
246
if (ct_conv_wbuff_resize(conv, conv->wsize + CT_BUFSIZ) == -1)
247
return NULL;
248
dst = conv->wbuff + sused;
249
}
250
251
*dst = L'\0';
252
return conv->wbuff;
253
}
254
255
256
257
libedit_private int
258
ct_visual_width(wchar_t c)
259
{
260
int t = ct_chr_class(c);
261
switch (t) {
262
case CHTYPE_ASCIICTL:
263
return 2; /* ^@ ^? etc. */
264
case CHTYPE_TAB:
265
return 1; /* Hmm, this really need to be handled outside! */
266
case CHTYPE_NL:
267
return 0; /* Should this be 1 instead? */
268
case CHTYPE_PRINT:
269
return wcwidth(c);
270
case CHTYPE_NONPRINT:
271
if (c > 0xffff) /* prefer standard 4-byte display over 5-byte */
272
return 8; /* \U+12345 */
273
else
274
return 7; /* \U+1234 */
275
default:
276
return 0; /* should not happen */
277
}
278
}
279
280
281
libedit_private ssize_t
282
ct_visual_char(wchar_t *dst, size_t len, wchar_t c)
283
{
284
int t = ct_chr_class(c);
285
switch (t) {
286
case CHTYPE_TAB:
287
case CHTYPE_NL:
288
case CHTYPE_ASCIICTL:
289
if (len < 2)
290
return -1; /* insufficient space */
291
*dst++ = '^';
292
if (c == '\177')
293
*dst = '?'; /* DEL -> ^? */
294
else
295
*dst = c | 0100; /* uncontrolify it */
296
return 2;
297
case CHTYPE_PRINT:
298
if (len < 1)
299
return -1; /* insufficient space */
300
*dst = c;
301
return 1;
302
case CHTYPE_NONPRINT:
303
/* we only use single-width glyphs for display,
304
* so this is right */
305
if ((ssize_t)len < ct_visual_width(c))
306
return -1; /* insufficient space */
307
*dst++ = '\\';
308
*dst++ = 'U';
309
*dst++ = '+';
310
#define tohexdigit(v) "0123456789ABCDEF"[v]
311
if (c > 0xffff) /* prefer standard 4-byte display over 5-byte */
312
*dst++ = tohexdigit(((unsigned int) c >> 16) & 0xf);
313
*dst++ = tohexdigit(((unsigned int) c >> 12) & 0xf);
314
*dst++ = tohexdigit(((unsigned int) c >> 8) & 0xf);
315
*dst++ = tohexdigit(((unsigned int) c >> 4) & 0xf);
316
*dst = tohexdigit(((unsigned int) c ) & 0xf);
317
return c > 0xffff ? 8 : 7;
318
/*FALLTHROUGH*/
319
/* these two should be handled outside this function */
320
default: /* we should never hit the default */
321
return 0;
322
}
323
}
324
325
326
327
328
libedit_private int
329
ct_chr_class(wchar_t c)
330
{
331
if (c == '\t')
332
return CHTYPE_TAB;
333
else if (c == '\n')
334
return CHTYPE_NL;
335
else if (c < 0x100 && iswcntrl(c))
336
return CHTYPE_ASCIICTL;
337
else if (iswprint(c))
338
return CHTYPE_PRINT;
339
else
340
return CHTYPE_NONPRINT;
341
}
342
343