Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Roblox
GitHub Repository: Roblox/luau
Path: blob/master/extern/isocline/src/stringbuf.c
2727 views
1
/* ----------------------------------------------------------------------------
2
Copyright (c) 2021, Daan Leijen
3
This is free software; you can redistribute it and/or modify it
4
under the terms of the MIT License. A copy of the license can be
5
found in the "LICENSE" file at the root of this distribution.
6
-----------------------------------------------------------------------------*/
7
8
// get `wcwidth` for the column width of unicode characters
9
// note: for now the OS provided one is unused as we see quite a bit of variation
10
// among platforms and including our own seems more reliable.
11
/*
12
#if defined(__linux__) || defined(__freebsd__)
13
// use the system supplied one
14
#if !defined(_XOPEN_SOURCE)
15
#define _XOPEN_SOURCE 700 // so wcwidth is visible
16
#endif
17
#include <wchar.h>
18
#else
19
*/
20
// use our own (also on APPLE as that fails within vscode)
21
#define wcwidth(c) mk_wcwidth(c)
22
#include "wcwidth.c"
23
// #endif
24
25
#include <stdio.h>
26
#include <string.h>
27
#include <inttypes.h>
28
29
#include "common.h"
30
#include "stringbuf.h"
31
32
//-------------------------------------------------------------
33
// In place growable utf-8 strings
34
//-------------------------------------------------------------
35
36
struct stringbuf_s {
37
char* buf;
38
ssize_t buflen;
39
ssize_t count;
40
alloc_t* mem;
41
};
42
43
44
//-------------------------------------------------------------
45
// String column width
46
//-------------------------------------------------------------
47
48
// column width of a utf8 single character sequence.
49
static ssize_t utf8_char_width( const char* s, ssize_t n ) {
50
if (n <= 0) return 0;
51
52
uint8_t b = (uint8_t)s[0];
53
int32_t c;
54
if (b < ' ') {
55
return 0;
56
}
57
else if (b <= 0x7F) {
58
return 1;
59
}
60
else if (b <= 0xC1) { // invalid continuation byte or invalid 0xC0, 0xC1 (check is strictly not necessary as we don't validate..)
61
return 1;
62
}
63
else if (b <= 0xDF && n >= 2) { // b >= 0xC2 // 2 bytes
64
c = (((b & 0x1F) << 6) | (s[1] & 0x3F));
65
assert(c < 0xD800 || c > 0xDFFF);
66
int w = wcwidth(c);
67
return w;
68
}
69
else if (b <= 0xEF && n >= 3) { // b >= 0xE0 // 3 bytes
70
c = (((b & 0x0F) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F));
71
return wcwidth(c);
72
}
73
else if (b <= 0xF4 && n >= 4) { // b >= 0xF0 // 4 bytes
74
c = (((b & 0x07) << 18) | ((s[1] & 0x3F) << 12) | ((s[2] & 0x3F) << 6) | (s[3] & 0x3F));
75
return wcwidth(c);
76
}
77
else {
78
// failed
79
return 1;
80
}
81
}
82
83
84
// The column width of a codepoint (0, 1, or 2)
85
static ssize_t char_column_width( const char* s, ssize_t n ) {
86
if (s == NULL || n <= 0) return 0;
87
else if ((uint8_t)(*s) < ' ') return 0; // also for CSI escape sequences
88
else {
89
ssize_t w = utf8_char_width(s, n);
90
#ifdef _WIN32
91
return (w <= 0 ? 1 : w); // windows console seems to use at least one column
92
#else
93
return w;
94
#endif
95
}
96
}
97
98
static ssize_t str_column_width_n( const char* s, ssize_t len ) {
99
if (s == NULL || len <= 0) return 0;
100
ssize_t pos = 0;
101
ssize_t cwidth = 0;
102
ssize_t cw;
103
ssize_t ofs;
104
while (s[pos] != 0 && (ofs = str_next_ofs(s, len, pos, &cw)) > 0) {
105
cwidth += cw;
106
pos += ofs;
107
}
108
return cwidth;
109
}
110
111
ic_private ssize_t str_column_width( const char* s ) {
112
return str_column_width_n( s, ic_strlen(s) );
113
}
114
115
ic_private ssize_t str_skip_until_fit( const char* s, ssize_t max_width ) {
116
if (s == NULL) return 0;
117
ssize_t cwidth = str_column_width(s);
118
ssize_t len = ic_strlen(s);
119
ssize_t pos = 0;
120
ssize_t next;
121
ssize_t cw;
122
while (cwidth > max_width && (next = str_next_ofs(s, len, pos, &cw)) > 0) {
123
cwidth -= cw;
124
pos += next;
125
}
126
return pos;
127
}
128
129
ic_private ssize_t str_take_while_fit( const char* s, ssize_t max_width) {
130
if (s == NULL) return 0;
131
const ssize_t len = ic_strlen(s);
132
ssize_t pos = 0;
133
ssize_t next;
134
ssize_t cw;
135
ssize_t cwidth = 0;
136
while ((next = str_next_ofs(s, len, pos, &cw)) > 0) {
137
if (cwidth + cw > max_width) break;
138
cwidth += cw;
139
pos += next;
140
}
141
return pos;
142
}
143
144
145
//-------------------------------------------------------------
146
// String navigation
147
//-------------------------------------------------------------
148
149
// get offset of the previous codepoint. does not skip back over CSI sequences.
150
ic_private ssize_t str_prev_ofs( const char* s, ssize_t pos, ssize_t* width ) {
151
ssize_t ofs = 0;
152
if (s != NULL && pos > 0) {
153
ofs = 1;
154
while (pos > ofs) {
155
uint8_t u = (uint8_t)s[pos - ofs];
156
if (u < 0x80 || u > 0xBF) break; // continue while follower
157
ofs++;
158
}
159
}
160
if (width != NULL) *width = char_column_width( s+(pos-ofs), ofs );
161
return ofs;
162
}
163
164
// skip an escape sequence
165
// <https://www.xfree86.org/current/ctlseqs.html>
166
ic_private bool skip_esc( const char* s, ssize_t len, ssize_t* esclen ) {
167
if (s == NULL || len <= 1 || s[0] != '\x1B') return false;
168
if (esclen != NULL) *esclen = 0;
169
if (strchr("[PX^_]",s[1]) != NULL) {
170
// CSI (ESC [), DCS (ESC P), SOS (ESC X), PM (ESC ^), APC (ESC _), and OSC (ESC ]): terminated with a special sequence
171
bool finalCSI = (s[1] == '['); // CSI terminates with 0x40-0x7F; otherwise ST (bell or ESC \)
172
ssize_t n = 2;
173
while (len > n) {
174
char c = s[n++];
175
if ((finalCSI && (uint8_t)c >= 0x40 && (uint8_t)c <= 0x7F) || // terminating byte: @A–Z[\]^_`a–z{|}~
176
(!finalCSI && c == '\x07') || // bell
177
(c == '\x02')) // STX terminates as well
178
{
179
if (esclen != NULL) *esclen = n;
180
return true;
181
}
182
else if (!finalCSI && c == '\x1B' && len > n && s[n] == '\\') { // ST (ESC \)
183
n++;
184
if (esclen != NULL) *esclen = n;
185
return true;
186
}
187
}
188
}
189
if (strchr(" #%()*+",s[1]) != NULL) {
190
// assume escape sequence of length 3 (like ESC % G)
191
if (esclen != NULL) *esclen = 2;
192
return true;
193
}
194
else {
195
// assume single character escape code (like ESC 7)
196
if (esclen != NULL) *esclen = 2;
197
return true;
198
}
199
return false;
200
}
201
202
// Offset to the next codepoint, treats CSI escape sequences as a single code point.
203
ic_private ssize_t str_next_ofs( const char* s, ssize_t len, ssize_t pos, ssize_t* cwidth ) {
204
ssize_t ofs = 0;
205
if (s != NULL && len > pos) {
206
if (skip_esc(s+pos,len-pos,&ofs)) {
207
// skip escape sequence
208
}
209
else {
210
ofs = 1;
211
// utf8 extended character?
212
while(len > pos + ofs) {
213
uint8_t u = (uint8_t)s[pos + ofs];
214
if (u < 0x80 || u > 0xBF) break; // break if not a follower
215
ofs++;
216
}
217
}
218
}
219
if (cwidth != NULL) *cwidth = char_column_width( s+pos, ofs );
220
return ofs;
221
}
222
223
static ssize_t str_limit_to_length( const char* s, ssize_t n ) {
224
ssize_t i;
225
for(i = 0; i < n && s[i] != 0; i++) { /* nothing */ }
226
return i;
227
}
228
229
230
//-------------------------------------------------------------
231
// String searching prev/next word, line, ws_word
232
//-------------------------------------------------------------
233
234
235
static ssize_t str_find_backward( const char* s, ssize_t len, ssize_t pos, ic_is_char_class_fun_t* match, bool skip_immediate_matches ) {
236
if (pos > len) pos = len;
237
if (pos < 0) pos = 0;
238
ssize_t i = pos;
239
// skip matching first (say, whitespace in case of the previous start-of-word)
240
if (skip_immediate_matches) {
241
do {
242
ssize_t prev = str_prev_ofs(s, i, NULL);
243
if (prev <= 0) break;
244
assert(i - prev >= 0);
245
if (!match(s + i - prev, (long)prev)) break;
246
i -= prev;
247
} while (i > 0);
248
}
249
// find match
250
do {
251
ssize_t prev = str_prev_ofs(s, i, NULL);
252
if (prev <= 0) break;
253
assert(i - prev >= 0);
254
if (match(s + i - prev, (long)prev)) {
255
return i; // found;
256
}
257
i -= prev;
258
} while (i > 0);
259
return -1; // not found
260
}
261
262
static ssize_t str_find_forward( const char* s, ssize_t len, ssize_t pos, ic_is_char_class_fun_t* match, bool skip_immediate_matches ) {
263
if (s == NULL || len < 0) return -1;
264
if (pos > len) pos = len;
265
if (pos < 0) pos = 0;
266
ssize_t i = pos;
267
ssize_t next;
268
// skip matching first (say, whitespace in case of the next end-of-word)
269
if (skip_immediate_matches) {
270
do {
271
next = str_next_ofs(s, len, i, NULL);
272
if (next <= 0) break;
273
assert( i + next <= len);
274
if (!match(s + i, (long)next)) break;
275
i += next;
276
} while (i < len);
277
}
278
// and then look
279
do {
280
next = str_next_ofs(s, len, i, NULL);
281
if (next <= 0) break;
282
assert( i + next <= len);
283
if (match(s + i, (long)next)) {
284
return i; // found
285
}
286
i += next;
287
} while (i < len);
288
return -1;
289
}
290
291
static bool char_is_linefeed( const char* s, long n ) {
292
return (n == 1 && (*s == '\n' || *s == 0));
293
}
294
295
static ssize_t str_find_line_start( const char* s, ssize_t len, ssize_t pos) {
296
ssize_t start = str_find_backward(s,len,pos,&char_is_linefeed,false /* don't skip immediate matches */);
297
return (start < 0 ? 0 : start);
298
}
299
300
static ssize_t str_find_line_end( const char* s, ssize_t len, ssize_t pos) {
301
ssize_t end = str_find_forward(s,len,pos, &char_is_linefeed, false);
302
return (end < 0 ? len : end);
303
}
304
305
static ssize_t str_find_word_start( const char* s, ssize_t len, ssize_t pos) {
306
ssize_t start = str_find_backward(s,len,pos, &ic_char_is_idletter,true /* skip immediate matches */);
307
return (start < 0 ? 0 : start);
308
}
309
310
static ssize_t str_find_word_end( const char* s, ssize_t len, ssize_t pos) {
311
ssize_t end = str_find_forward(s,len,pos,&ic_char_is_idletter,true /* skip immediate matches */);
312
return (end < 0 ? len : end);
313
}
314
315
static ssize_t str_find_ws_word_start( const char* s, ssize_t len, ssize_t pos) {
316
ssize_t start = str_find_backward(s,len,pos,&ic_char_is_white,true /* skip immediate matches */);
317
return (start < 0 ? 0 : start);
318
}
319
320
static ssize_t str_find_ws_word_end( const char* s, ssize_t len, ssize_t pos) {
321
ssize_t end = str_find_forward(s,len,pos,&ic_char_is_white,true /* skip immediate matches */);
322
return (end < 0 ? len : end);
323
}
324
325
326
//-------------------------------------------------------------
327
// String row/column iteration
328
//-------------------------------------------------------------
329
330
// invoke a function for each terminal row; returns total row count.
331
static ssize_t str_for_each_row( const char* s, ssize_t len, ssize_t termw, ssize_t promptw, ssize_t cpromptw,
332
row_fun_t* fun, const void* arg, void* res )
333
{
334
if (s == NULL) s = "";
335
ssize_t i;
336
ssize_t rcount = 0;
337
ssize_t rcol = 0;
338
ssize_t rstart = 0;
339
ssize_t startw = promptw;
340
for(i = 0; i < len; ) {
341
ssize_t w;
342
ssize_t next = str_next_ofs(s, len, i, &w);
343
if (next <= 0) {
344
debug_msg("str: foreach row: next<=0: len %zd, i %zd, w %zd, buf %s\n", len, i, w, s );
345
assert(false);
346
break;
347
}
348
startw = (rcount == 0 ? promptw : cpromptw);
349
ssize_t termcol = rcol + w + startw + 1 /* for the cursor */;
350
if (termw != 0 && i != 0 && termcol >= termw) {
351
// wrap
352
if (fun != NULL) {
353
if (fun(s,rcount,rstart,i - rstart,startw,true,arg,res)) return rcount;
354
}
355
rcount++;
356
rstart = i;
357
rcol = 0;
358
}
359
if (s[i] == '\n') {
360
// newline
361
if (fun != NULL) {
362
if (fun(s,rcount,rstart,i - rstart,startw,false,arg,res)) return rcount;
363
}
364
rcount++;
365
rstart = i+1;
366
rcol = 0;
367
}
368
assert (s[i] != 0);
369
i += next;
370
rcol += w;
371
}
372
if (fun != NULL) {
373
if (fun(s,rcount,rstart,i - rstart,startw,false,arg,res)) return rcount;
374
}
375
return rcount+1;
376
}
377
378
//-------------------------------------------------------------
379
// String: get row/column position
380
//-------------------------------------------------------------
381
382
383
static bool str_get_current_pos_iter(
384
const char* s,
385
ssize_t row, ssize_t row_start, ssize_t row_len,
386
ssize_t startw, bool is_wrap, const void* arg, void* res)
387
{
388
ic_unused(is_wrap); ic_unused(startw);
389
rowcol_t* rc = (rowcol_t*)res;
390
ssize_t pos = *((ssize_t*)arg);
391
392
if (pos >= row_start && pos <= (row_start + row_len)) {
393
// found the cursor row
394
rc->row_start = row_start;
395
rc->row_len = row_len;
396
rc->row = row;
397
rc->col = str_column_width_n( s + row_start, pos - row_start );
398
rc->first_on_row = (pos == row_start);
399
if (is_wrap) {
400
// if wrapped, we check if the next character is at row_len
401
ssize_t next = str_next_ofs(s, row_start + row_len, pos, NULL);
402
rc->last_on_row = (pos + next >= row_start + row_len);
403
}
404
else {
405
// normal last position is right after the last character
406
rc->last_on_row = (pos >= row_start + row_len);
407
}
408
// debug_msg("edit; pos iter: pos: %zd (%c), row_start: %zd, rowlen: %zd\n", pos, s[pos], row_start, row_len);
409
}
410
return false; // always continue to count all rows
411
}
412
413
static ssize_t str_get_rc_at_pos(const char* s, ssize_t len, ssize_t termw, ssize_t promptw, ssize_t cpromptw, ssize_t pos, rowcol_t* rc) {
414
memset(rc, 0, sizeof(*rc));
415
ssize_t rows = str_for_each_row(s, len, termw, promptw, cpromptw, &str_get_current_pos_iter, &pos, rc);
416
// debug_msg("edit: current pos: (%d, %d) %s %s\n", rc->row, rc->col, rc->first_on_row ? "first" : "", rc->last_on_row ? "last" : "");
417
return rows;
418
}
419
420
421
422
//-------------------------------------------------------------
423
// String: get row/column position for a resized terminal
424
// with potentially "hard-wrapped" rows
425
//-------------------------------------------------------------
426
typedef struct wrapped_arg_s {
427
ssize_t pos;
428
ssize_t newtermw;
429
} wrapped_arg_t;
430
431
typedef struct wrowcol_s {
432
rowcol_t rc;
433
ssize_t hrows; // count of hard-wrapped extra rows
434
} wrowcol_t;
435
436
static bool str_get_current_wrapped_pos_iter(
437
const char* s,
438
ssize_t row, ssize_t row_start, ssize_t row_len,
439
ssize_t startw, bool is_wrap, const void* arg, void* res)
440
{
441
ic_unused(is_wrap);
442
wrowcol_t* wrc = (wrowcol_t*)res;
443
const wrapped_arg_t* warg = (const wrapped_arg_t*)arg;
444
445
// iterate through the row and record the postion and hard-wraps
446
ssize_t hwidth = startw;
447
ssize_t i = 0;
448
while( i <= row_len ) { // include rowlen as the cursor position can be just after the last character
449
// get next position and column width
450
ssize_t cw;
451
ssize_t next;
452
bool is_cursor = (warg->pos == row_start+i);
453
if (i < row_len) {
454
next = str_next_ofs(s + row_start, row_len, i, &cw);
455
}
456
else {
457
// end of row: take wrap or cursor into account
458
// (wrap has width 2 as it displays a back-arrow but also has an invisible newline that wraps)
459
cw = (is_wrap ? 2 : (is_cursor ? 1 : 0));
460
next = 1;
461
}
462
463
if (next > 0) {
464
if (hwidth + cw > warg->newtermw) {
465
// hardwrap
466
hwidth = 0;
467
wrc->hrows++;
468
debug_msg("str: found hardwrap: row: %zd, hrows: %zd\n", row, wrc->hrows);
469
}
470
}
471
else {
472
next++; // ensure we terminate (as we go up to rowlen)
473
}
474
475
// did we find our position?
476
if (is_cursor) {
477
debug_msg("str: found position: row: %zd, hrows: %zd\n", row, wrc->hrows);
478
wrc->rc.row_start = row_start;
479
wrc->rc.row_len = row_len;
480
wrc->rc.row = wrc->hrows + row;
481
wrc->rc.col = hwidth;
482
wrc->rc.first_on_row = (i==0);
483
wrc->rc.last_on_row = (i+next >= row_len - (is_wrap ? 1 : 0));
484
}
485
486
// advance
487
hwidth += cw;
488
i += next;
489
}
490
return false; // always continue to count all rows
491
}
492
493
494
static ssize_t str_get_wrapped_rc_at_pos(const char* s, ssize_t len, ssize_t termw, ssize_t newtermw, ssize_t promptw, ssize_t cpromptw, ssize_t pos, rowcol_t* rc) {
495
wrapped_arg_t warg;
496
warg.pos = pos;
497
warg.newtermw = newtermw;
498
wrowcol_t wrc;
499
memset(&wrc,0,sizeof(wrc));
500
ssize_t rows = str_for_each_row(s, len, termw, promptw, cpromptw, &str_get_current_wrapped_pos_iter, &warg, &wrc);
501
debug_msg("edit: wrapped pos: (%zd,%zd) rows %zd %s %s, hrows: %zd\n", wrc.rc.row, wrc.rc.col, rows, wrc.rc.first_on_row ? "first" : "", wrc.rc.last_on_row ? "last" : "", wrc.hrows);
502
*rc = wrc.rc;
503
return (rows + wrc.hrows);
504
}
505
506
507
//-------------------------------------------------------------
508
// Set position
509
//-------------------------------------------------------------
510
511
static bool str_set_pos_iter(
512
const char* s,
513
ssize_t row, ssize_t row_start, ssize_t row_len,
514
ssize_t startw, bool is_wrap, const void* arg, void* res)
515
{
516
ic_unused(arg); ic_unused(is_wrap); ic_unused(startw);
517
rowcol_t* rc = (rowcol_t*)arg;
518
if (rc->row != row) return false; // keep searching
519
// we found our row
520
ssize_t col = 0;
521
ssize_t i = row_start;
522
ssize_t end = row_start + row_len;
523
while (col < rc->col && i < end) {
524
ssize_t cw;
525
ssize_t next = str_next_ofs(s, row_start + row_len, i, &cw);
526
if (next <= 0) break;
527
i += next;
528
col += cw;
529
}
530
*((ssize_t*)res) = i;
531
return true; // stop iteration
532
}
533
534
static ssize_t str_get_pos_at_rc(const char* s, ssize_t len, ssize_t termw, ssize_t promptw, ssize_t cpromptw, ssize_t row, ssize_t col /* without prompt */) {
535
rowcol_t rc;
536
memset(&rc,0,ssizeof(rc));
537
rc.row = row;
538
rc.col = col;
539
ssize_t pos = -1;
540
str_for_each_row(s,len,termw,promptw,cpromptw,&str_set_pos_iter,&rc,&pos);
541
return pos;
542
}
543
544
545
//-------------------------------------------------------------
546
// String buffer
547
//-------------------------------------------------------------
548
static bool sbuf_ensure_extra(stringbuf_t* s, ssize_t extra)
549
{
550
if (s->buflen >= s->count + extra) return true;
551
// reallocate; pick good initial size and multiples to increase reuse on allocation
552
ssize_t newlen = (s->buflen <= 0 ? 120 : (s->buflen > 1000 ? s->buflen + 1000 : 2*s->buflen));
553
if (newlen < s->count + extra) newlen = s->count + extra;
554
if (s->buflen > 0) {
555
debug_msg("stringbuf: reallocate: old %zd, new %zd\n", s->buflen, newlen);
556
}
557
char* newbuf = mem_realloc_tp(s->mem, char, s->buf, newlen+1); // one more for terminating zero
558
if (newbuf == NULL) {
559
assert(false);
560
return false;
561
}
562
s->buf = newbuf;
563
s->buflen = newlen;
564
s->buf[s->count] = s->buf[s->buflen] = 0;
565
assert(s->buflen >= s->count + extra);
566
return true;
567
}
568
569
static void sbuf_init( stringbuf_t* sbuf, alloc_t* mem ) {
570
sbuf->mem = mem;
571
sbuf->buf = NULL;
572
sbuf->buflen = 0;
573
sbuf->count = 0;
574
}
575
576
static void sbuf_done( stringbuf_t* sbuf ) {
577
mem_free( sbuf->mem, sbuf->buf );
578
sbuf->buf = NULL;
579
sbuf->buflen = 0;
580
sbuf->count = 0;
581
}
582
583
584
ic_private void sbuf_free( stringbuf_t* sbuf ) {
585
if (sbuf==NULL) return;
586
sbuf_done(sbuf);
587
mem_free(sbuf->mem, sbuf);
588
}
589
590
ic_private stringbuf_t* sbuf_new( alloc_t* mem ) {
591
stringbuf_t* sbuf = mem_zalloc_tp(mem,stringbuf_t);
592
if (sbuf == NULL) return NULL;
593
sbuf_init(sbuf,mem);
594
return sbuf;
595
}
596
597
// free the sbuf and return the current string buffer as the result
598
ic_private char* sbuf_free_dup(stringbuf_t* sbuf) {
599
if (sbuf == NULL) return NULL;
600
char* s = NULL;
601
if (sbuf->buf != NULL) {
602
s = mem_realloc_tp(sbuf->mem, char, sbuf->buf, sbuf_len(sbuf)+1);
603
if (s == NULL) { s = sbuf->buf; }
604
sbuf->buf = 0;
605
sbuf->buflen = 0;
606
sbuf->count = 0;
607
}
608
sbuf_free(sbuf);
609
return s;
610
}
611
612
ic_private const char* sbuf_string_at( stringbuf_t* sbuf, ssize_t pos ) {
613
if (pos < 0 || sbuf->count < pos) return NULL;
614
if (sbuf->buf == NULL) return "";
615
assert(sbuf->buf[sbuf->count] == 0);
616
return sbuf->buf + pos;
617
}
618
619
ic_private const char* sbuf_string( stringbuf_t* sbuf ) {
620
return sbuf_string_at( sbuf, 0 );
621
}
622
623
ic_private char sbuf_char_at(stringbuf_t* sbuf, ssize_t pos) {
624
if (sbuf->buf == NULL || pos < 0 || sbuf->count < pos) return 0;
625
return sbuf->buf[pos];
626
}
627
628
ic_private char* sbuf_strdup_at( stringbuf_t* sbuf, ssize_t pos ) {
629
return mem_strdup(sbuf->mem, sbuf_string_at(sbuf,pos));
630
}
631
632
ic_private char* sbuf_strdup( stringbuf_t* sbuf ) {
633
return mem_strdup(sbuf->mem, sbuf_string(sbuf));
634
}
635
636
ic_private ssize_t sbuf_len(const stringbuf_t* s) {
637
if (s == NULL) return 0;
638
return s->count;
639
}
640
641
ic_private ssize_t sbuf_append_vprintf(stringbuf_t* sb, const char* fmt, va_list args) {
642
const ssize_t min_needed = ic_strlen(fmt);
643
if (!sbuf_ensure_extra(sb,min_needed + 16)) return sb->count;
644
ssize_t avail = sb->buflen - sb->count;
645
va_list args0;
646
va_copy(args0, args);
647
ssize_t needed = vsnprintf(sb->buf + sb->count, to_size_t(avail), fmt, args0);
648
if (needed > avail) {
649
sb->buf[sb->count] = 0;
650
if (!sbuf_ensure_extra(sb, needed)) return sb->count;
651
avail = sb->buflen - sb->count;
652
needed = vsnprintf(sb->buf + sb->count, to_size_t(avail), fmt, args);
653
}
654
assert(needed <= avail);
655
sb->count += (needed > avail ? avail : (needed >= 0 ? needed : 0));
656
assert(sb->count <= sb->buflen);
657
sb->buf[sb->count] = 0;
658
return sb->count;
659
}
660
661
ic_private ssize_t sbuf_appendf(stringbuf_t* sb, const char* fmt, ...) {
662
va_list args;
663
va_start( args, fmt);
664
ssize_t res = sbuf_append_vprintf( sb, fmt, args );
665
va_end(args);
666
return res;
667
}
668
669
670
ic_private ssize_t sbuf_insert_at_n(stringbuf_t* sbuf, const char* s, ssize_t n, ssize_t pos ) {
671
if (pos < 0 || pos > sbuf->count || s == NULL) return pos;
672
n = str_limit_to_length(s,n);
673
if (n <= 0 || !sbuf_ensure_extra(sbuf,n)) return pos;
674
ic_memmove(sbuf->buf + pos + n, sbuf->buf + pos, sbuf->count - pos);
675
ic_memcpy(sbuf->buf + pos, s, n);
676
sbuf->count += n;
677
sbuf->buf[sbuf->count] = 0;
678
return (pos + n);
679
}
680
681
ic_private stringbuf_t* sbuf_split_at( stringbuf_t* sb, ssize_t pos ) {
682
stringbuf_t* res = sbuf_new(sb->mem);
683
if (res==NULL || pos < 0) return NULL;
684
if (pos < sb->count) {
685
sbuf_append_n(res, sb->buf + pos, sb->count - pos);
686
sb->count = pos;
687
}
688
return res;
689
}
690
691
ic_private ssize_t sbuf_insert_at(stringbuf_t* sbuf, const char* s, ssize_t pos ) {
692
return sbuf_insert_at_n( sbuf, s, ic_strlen(s), pos );
693
}
694
695
ic_private ssize_t sbuf_insert_char_at(stringbuf_t* sbuf, char c, ssize_t pos ) {
696
char s[2];
697
s[0] = c;
698
s[1] = 0;
699
return sbuf_insert_at_n( sbuf, s, 1, pos);
700
}
701
702
ic_private ssize_t sbuf_insert_unicode_at(stringbuf_t* sbuf, unicode_t u, ssize_t pos) {
703
uint8_t s[5];
704
unicode_to_qutf8(u, s);
705
return sbuf_insert_at(sbuf, (const char*)s, pos);
706
}
707
708
709
710
ic_private void sbuf_delete_at( stringbuf_t* sbuf, ssize_t pos, ssize_t count ) {
711
if (pos < 0 || pos >= sbuf->count) return;
712
if (pos + count > sbuf->count) count = sbuf->count - pos;
713
ic_memmove(sbuf->buf + pos, sbuf->buf + pos + count, sbuf->count - pos - count);
714
sbuf->count -= count;
715
sbuf->buf[sbuf->count] = 0;
716
}
717
718
ic_private void sbuf_delete_from_to( stringbuf_t* sbuf, ssize_t pos, ssize_t end ) {
719
if (end <= pos) return;
720
sbuf_delete_at( sbuf, pos, end - pos);
721
}
722
723
ic_private void sbuf_delete_from(stringbuf_t* sbuf, ssize_t pos ) {
724
sbuf_delete_at(sbuf, pos, sbuf_len(sbuf) - pos );
725
}
726
727
728
ic_private void sbuf_clear( stringbuf_t* sbuf ) {
729
sbuf_delete_at(sbuf, 0, sbuf_len(sbuf));
730
}
731
732
ic_private ssize_t sbuf_append_n( stringbuf_t* sbuf, const char* s, ssize_t n ) {
733
return sbuf_insert_at_n( sbuf, s, n, sbuf_len(sbuf));
734
}
735
736
ic_private ssize_t sbuf_append( stringbuf_t* sbuf, const char* s ) {
737
return sbuf_insert_at( sbuf, s, sbuf_len(sbuf));
738
}
739
740
ic_private ssize_t sbuf_append_char( stringbuf_t* sbuf, char c ) {
741
char buf[2];
742
buf[0] = c;
743
buf[1] = 0;
744
return sbuf_append( sbuf, buf );
745
}
746
747
ic_private void sbuf_replace(stringbuf_t* sbuf, const char* s) {
748
sbuf_clear(sbuf);
749
sbuf_append(sbuf,s);
750
}
751
752
ic_private ssize_t sbuf_next_ofs( stringbuf_t* sbuf, ssize_t pos, ssize_t* cwidth ) {
753
return str_next_ofs( sbuf->buf, sbuf->count, pos, cwidth);
754
}
755
756
ic_private ssize_t sbuf_prev_ofs( stringbuf_t* sbuf, ssize_t pos, ssize_t* cwidth ) {
757
return str_prev_ofs( sbuf->buf, pos, cwidth);
758
}
759
760
ic_private ssize_t sbuf_next( stringbuf_t* sbuf, ssize_t pos, ssize_t* cwidth) {
761
ssize_t ofs = sbuf_next_ofs(sbuf,pos,cwidth);
762
if (ofs <= 0) return -1;
763
assert(pos + ofs <= sbuf->count);
764
return pos + ofs;
765
}
766
767
ic_private ssize_t sbuf_prev( stringbuf_t* sbuf, ssize_t pos, ssize_t* cwidth) {
768
ssize_t ofs = sbuf_prev_ofs(sbuf,pos,cwidth);
769
if (ofs <= 0) return -1;
770
assert(pos - ofs >= 0);
771
return pos - ofs;
772
}
773
774
ic_private ssize_t sbuf_delete_char_before( stringbuf_t* sbuf, ssize_t pos ) {
775
ssize_t n = sbuf_prev_ofs(sbuf, pos, NULL);
776
if (n <= 0) return 0;
777
assert( pos - n >= 0 );
778
sbuf_delete_at(sbuf, pos - n, n);
779
return pos - n;
780
}
781
782
ic_private void sbuf_delete_char_at( stringbuf_t* sbuf, ssize_t pos ) {
783
ssize_t n = sbuf_next_ofs(sbuf, pos, NULL);
784
if (n <= 0) return;
785
assert( pos + n <= sbuf->count );
786
sbuf_delete_at(sbuf, pos, n);
787
return;
788
}
789
790
ic_private ssize_t sbuf_swap_char( stringbuf_t* sbuf, ssize_t pos ) {
791
ssize_t next = sbuf_next_ofs(sbuf, pos, NULL);
792
if (next <= 0) return 0;
793
ssize_t prev = sbuf_prev_ofs(sbuf, pos, NULL);
794
if (prev <= 0) return 0;
795
char buf[64];
796
if (prev >= 63) return 0;
797
ic_memcpy(buf, sbuf->buf + pos - prev, prev );
798
ic_memmove(sbuf->buf + pos - prev, sbuf->buf + pos, next);
799
ic_memmove(sbuf->buf + pos - prev + next, buf, prev);
800
return pos - prev;
801
}
802
803
ic_private ssize_t sbuf_find_line_start( stringbuf_t* sbuf, ssize_t pos ) {
804
return str_find_line_start( sbuf->buf, sbuf->count, pos);
805
}
806
807
ic_private ssize_t sbuf_find_line_end( stringbuf_t* sbuf, ssize_t pos ) {
808
return str_find_line_end( sbuf->buf, sbuf->count, pos);
809
}
810
811
ic_private ssize_t sbuf_find_word_start( stringbuf_t* sbuf, ssize_t pos ) {
812
return str_find_word_start( sbuf->buf, sbuf->count, pos);
813
}
814
815
ic_private ssize_t sbuf_find_word_end( stringbuf_t* sbuf, ssize_t pos ) {
816
return str_find_word_end( sbuf->buf, sbuf->count, pos);
817
}
818
819
ic_private ssize_t sbuf_find_ws_word_start( stringbuf_t* sbuf, ssize_t pos ) {
820
return str_find_ws_word_start( sbuf->buf, sbuf->count, pos);
821
}
822
823
ic_private ssize_t sbuf_find_ws_word_end( stringbuf_t* sbuf, ssize_t pos ) {
824
return str_find_ws_word_end( sbuf->buf, sbuf->count, pos);
825
}
826
827
// find row/col position
828
ic_private ssize_t sbuf_get_pos_at_rc( stringbuf_t* sbuf, ssize_t termw, ssize_t promptw, ssize_t cpromptw, ssize_t row, ssize_t col ) {
829
return str_get_pos_at_rc( sbuf->buf, sbuf->count, termw, promptw, cpromptw, row, col);
830
}
831
832
// get row/col for a given position
833
ic_private ssize_t sbuf_get_rc_at_pos( stringbuf_t* sbuf, ssize_t termw, ssize_t promptw, ssize_t cpromptw, ssize_t pos, rowcol_t* rc ) {
834
return str_get_rc_at_pos( sbuf->buf, sbuf->count, termw, promptw, cpromptw, pos, rc);
835
}
836
837
ic_private ssize_t sbuf_get_wrapped_rc_at_pos( stringbuf_t* sbuf, ssize_t termw, ssize_t newtermw, ssize_t promptw, ssize_t cpromptw, ssize_t pos, rowcol_t* rc ) {
838
return str_get_wrapped_rc_at_pos( sbuf->buf, sbuf->count, termw, newtermw, promptw, cpromptw, pos, rc);
839
}
840
841
ic_private ssize_t sbuf_for_each_row( stringbuf_t* sbuf, ssize_t termw, ssize_t promptw, ssize_t cpromptw, row_fun_t* fun, void* arg, void* res ) {
842
if (sbuf == NULL) return 0;
843
return str_for_each_row( sbuf->buf, sbuf->count, termw, promptw, cpromptw, fun, arg, res);
844
}
845
846
847
// Duplicate and decode from utf-8 (for non-utf8 terminals)
848
ic_private char* sbuf_strdup_from_utf8(stringbuf_t* sbuf) {
849
ssize_t len = sbuf_len(sbuf);
850
if (sbuf == NULL || len <= 0) return NULL;
851
char* s = mem_zalloc_tp_n(sbuf->mem, char, len);
852
if (s == NULL) return NULL;
853
ssize_t dest = 0;
854
for (ssize_t i = 0; i < len; ) {
855
ssize_t ofs = sbuf_next_ofs(sbuf, i, NULL);
856
if (ofs <= 0) {
857
// invalid input
858
break;
859
}
860
else if (ofs == 1) {
861
// regular character
862
s[dest++] = sbuf->buf[i];
863
}
864
else if (sbuf->buf[i] == '\x1B') {
865
// skip escape sequences
866
}
867
else {
868
// decode unicode
869
ssize_t nread;
870
unicode_t uchr = unicode_from_qutf8( (const uint8_t*)(sbuf->buf + i), ofs, &nread);
871
uint8_t c;
872
if (unicode_is_raw(uchr, &c)) {
873
// raw byte, output as is (this will take care of locale specific input)
874
s[dest++] = (char)c;
875
}
876
else if (uchr <= 0x7F) {
877
// allow ascii
878
s[dest++] = (char)uchr;
879
}
880
else {
881
// skip unknown unicode characters..
882
// todo: convert according to locale?
883
}
884
}
885
i += ofs;
886
}
887
assert(dest <= len);
888
s[dest] = 0;
889
return s;
890
}
891
892
//-------------------------------------------------------------
893
// String helpers
894
//-------------------------------------------------------------
895
896
ic_public long ic_prev_char( const char* s, long pos ) {
897
ssize_t len = ic_strlen(s);
898
if (pos < 0 || pos > len) return -1;
899
ssize_t ofs = str_prev_ofs( s, pos, NULL );
900
if (ofs <= 0) return -1;
901
return (long)(pos - ofs);
902
}
903
904
ic_public long ic_next_char( const char* s, long pos ) {
905
ssize_t len = ic_strlen(s);
906
if (pos < 0 || pos > len) return -1;
907
ssize_t ofs = str_next_ofs( s, len, pos, NULL );
908
if (ofs <= 0) return -1;
909
return (long)(pos + ofs);
910
}
911
912
913
// parse a decimal (leave pi unchanged on error)
914
ic_private bool ic_atoz(const char* s, ssize_t* pi) {
915
return (sscanf(s, "%zd", pi) == 1);
916
}
917
918
// parse two decimals separated by a semicolon
919
ic_private bool ic_atoz2(const char* s, ssize_t* pi, ssize_t* pj) {
920
return (sscanf(s, "%zd;%zd", pi, pj) == 2);
921
}
922
923
// parse unsigned 32-bit (leave pu unchanged on error)
924
ic_private bool ic_atou32(const char* s, uint32_t* pu) {
925
return (sscanf(s, "%" SCNu32, pu) == 1);
926
}
927
928
929
// Convenience: character class for whitespace `[ \t\r\n]`.
930
ic_public bool ic_char_is_white(const char* s, long len) {
931
if (s == NULL || len != 1) return false;
932
const char c = *s;
933
return (c==' ' || c == '\t' || c == '\n' || c == '\r');
934
}
935
936
// Convenience: character class for non-whitespace `[^ \t\r\n]`.
937
ic_public bool ic_char_is_nonwhite(const char* s, long len) {
938
return !ic_char_is_white(s, len);
939
}
940
941
// Convenience: character class for separators `[ \t\r\n,.;:/\\\(\)\{\}\[\]]`.
942
ic_public bool ic_char_is_separator(const char* s, long len) {
943
if (s == NULL || len != 1) return false;
944
const char c = *s;
945
return (strchr(" \t\r\n,.;:/\\(){}[]", c) != NULL);
946
}
947
948
// Convenience: character class for non-separators.
949
ic_public bool ic_char_is_nonseparator(const char* s, long len) {
950
return !ic_char_is_separator(s, len);
951
}
952
953
954
// Convenience: character class for digits (`[0-9]`).
955
ic_public bool ic_char_is_digit(const char* s, long len) {
956
if (s == NULL || len != 1) return false;
957
const char c = *s;
958
return (c >= '0' && c <= '9');
959
}
960
961
// Convenience: character class for hexadecimal digits (`[A-Fa-f0-9]`).
962
ic_public bool ic_char_is_hexdigit(const char* s, long len) {
963
if (s == NULL || len != 1) return false;
964
const char c = *s;
965
return ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'));
966
}
967
968
// Convenience: character class for letters (`[A-Za-z]` and any unicode > 0x80).
969
ic_public bool ic_char_is_letter(const char* s, long len) {
970
if (s == NULL || len <= 0) return false;
971
const char c = *s;
972
return ((uint8_t)c >= 0x80 || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'));
973
}
974
975
// Convenience: character class for identifier letters (`[A-Za-z0-9_-]` and any unicode > 0x80).
976
ic_public bool ic_char_is_idletter(const char* s, long len) {
977
if (s == NULL || len <= 0) return false;
978
const char c = *s;
979
return ((uint8_t)c >= 0x80 || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || (c == '_') || (c == '-'));
980
}
981
982
// Convenience: character class for filename letters (`[^ \t\r\n`@$><=;|&{(]`).
983
ic_public bool ic_char_is_filename_letter(const char* s, long len) {
984
if (s == NULL || len <= 0) return false;
985
const char c = *s;
986
return ((uint8_t)c >= 0x80 || (strchr(" \t\r\n`@$><=;|&{}()[]", c) == NULL));
987
}
988
989
// Convenience: If this is a token start, returns the length (or <= 0 if not found).
990
ic_public long ic_is_token(const char* s, long pos, ic_is_char_class_fun_t* is_token_char) {
991
if (s == NULL || pos < 0 || is_token_char == NULL) return -1;
992
ssize_t len = ic_strlen(s);
993
if (pos >= len) return -1;
994
if (pos > 0 && is_token_char(s + pos -1, 1)) return -1; // token start?
995
ssize_t i = pos;
996
while ( i < len ) {
997
ssize_t next = str_next_ofs(s, len, i, NULL);
998
if (next <= 0) return -1;
999
if (!is_token_char(s + i, (long)next)) break;
1000
i += next;
1001
}
1002
return (long)(i - pos);
1003
}
1004
1005
1006
static int ic_strncmp(const char* s1, const char* s2, ssize_t n) {
1007
return strncmp(s1, s2, to_size_t(n));
1008
}
1009
1010
// Convenience: Does this match the specified token?
1011
// Ensures not to match prefixes or suffixes, and returns the length of the match (in bytes).
1012
// E.g. `ic_match_token("function",0,&ic_char_is_letter,"fun")` returns 0.
1013
ic_public long ic_match_token(const char* s, long pos, ic_is_char_class_fun_t* is_token_char, const char* token) {
1014
long n = ic_is_token(s, pos, is_token_char);
1015
if (n > 0 && token != NULL && n == ic_strlen(token) && ic_strncmp(s + pos, token, n) == 0) {
1016
return n;
1017
}
1018
else {
1019
return 0;
1020
}
1021
}
1022
1023
1024
// Convenience: Do any of the specified tokens match?
1025
// Ensures not to match prefixes or suffixes, and returns the length of the match (in bytes).
1026
// Ensures not to match prefixes or suffixes.
1027
// E.g. `ic_match_any_token("function",0,&ic_char_is_letter,{"fun","func",NULL})` returns 0.
1028
ic_public long ic_match_any_token(const char* s, long pos, ic_is_char_class_fun_t* is_token_char, const char** tokens) {
1029
long n = ic_is_token(s, pos, is_token_char);
1030
if (n <= 0 || tokens == NULL) return 0;
1031
for (const char** token = tokens; *token != NULL; token++) {
1032
if (n == ic_strlen(*token) && ic_strncmp(s + pos, *token, n) == 0) {
1033
return n;
1034
}
1035
}
1036
return 0;
1037
}
1038
1039
1040