Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/lib/libc/regex/grot/split.c
39530 views
1
#include <stdio.h>
2
#include <string.h>
3
4
#include "split.ih"
5
6
/*
7
- split - divide a string into fields, like awk split()
8
== int split(char *string, char *fields[], int nfields, char *sep);
9
- fields: list is not NULL-terminated
10
- nfields: number of entries available in fields[]
11
- sep: "" white, "c" single char, "ab" [ab]+
12
*/
13
int /* number of fields, including overflow */
14
split(char *string, char *fields[], int nfields, char *sep)
15
{
16
char *p = string;
17
char c; /* latest character */
18
char sepc = sep[0];
19
char sepc2;
20
int fn;
21
char **fp = fields;
22
char *sepp;
23
int trimtrail;
24
25
/* white space */
26
if (sepc == '\0') {
27
while ((c = *p++) == ' ' || c == '\t')
28
continue;
29
p--;
30
trimtrail = 1;
31
sep = " \t"; /* note, code below knows this is 2 long */
32
sepc = ' ';
33
} else
34
trimtrail = 0;
35
sepc2 = sep[1]; /* now we can safely pick this up */
36
37
/* catch empties */
38
if (*p == '\0')
39
return(0);
40
41
/* single separator */
42
if (sepc2 == '\0') {
43
fn = nfields;
44
for (;;) {
45
*fp++ = p;
46
fn--;
47
if (fn == 0)
48
break;
49
while ((c = *p++) != sepc)
50
if (c == '\0')
51
return(nfields - fn);
52
*(p-1) = '\0';
53
}
54
/* we have overflowed the fields vector -- just count them */
55
fn = nfields;
56
for (;;) {
57
while ((c = *p++) != sepc)
58
if (c == '\0')
59
return(fn);
60
fn++;
61
}
62
/* not reached */
63
}
64
65
/* two separators */
66
if (sep[2] == '\0') {
67
fn = nfields;
68
for (;;) {
69
*fp++ = p;
70
fn--;
71
while ((c = *p++) != sepc && c != sepc2)
72
if (c == '\0') {
73
if (trimtrail && **(fp-1) == '\0')
74
fn++;
75
return(nfields - fn);
76
}
77
if (fn == 0)
78
break;
79
*(p-1) = '\0';
80
while ((c = *p++) == sepc || c == sepc2)
81
continue;
82
p--;
83
}
84
/* we have overflowed the fields vector -- just count them */
85
fn = nfields;
86
while (c != '\0') {
87
while ((c = *p++) == sepc || c == sepc2)
88
continue;
89
p--;
90
fn++;
91
while ((c = *p++) != '\0' && c != sepc && c != sepc2)
92
continue;
93
}
94
/* might have to trim trailing white space */
95
if (trimtrail) {
96
p--;
97
while ((c = *--p) == sepc || c == sepc2)
98
continue;
99
p++;
100
if (*p != '\0') {
101
if (fn == nfields+1)
102
*p = '\0';
103
fn--;
104
}
105
}
106
return(fn);
107
}
108
109
/* n separators */
110
fn = 0;
111
for (;;) {
112
if (fn < nfields)
113
*fp++ = p;
114
fn++;
115
for (;;) {
116
c = *p++;
117
if (c == '\0')
118
return(fn);
119
sepp = sep;
120
while ((sepc = *sepp++) != '\0' && sepc != c)
121
continue;
122
if (sepc != '\0') /* it was a separator */
123
break;
124
}
125
if (fn < nfields)
126
*(p-1) = '\0';
127
for (;;) {
128
c = *p++;
129
sepp = sep;
130
while ((sepc = *sepp++) != '\0' && sepc != c)
131
continue;
132
if (sepc == '\0') /* it wasn't a separator */
133
break;
134
}
135
p--;
136
}
137
138
/* not reached */
139
}
140
141
#ifdef TEST_SPLIT
142
143
144
/*
145
* test program
146
* pgm runs regression
147
* pgm sep splits stdin lines by sep
148
* pgm str sep splits str by sep
149
* pgm str sep n splits str by sep n times
150
*/
151
int
152
main(int argc, char *argv[])
153
{
154
char buf[512];
155
int n;
156
# define MNF 10
157
char *fields[MNF];
158
159
if (argc > 4)
160
for (n = atoi(argv[3]); n > 0; n--) {
161
(void) strcpy(buf, argv[1]);
162
}
163
else if (argc > 3)
164
for (n = atoi(argv[3]); n > 0; n--) {
165
(void) strcpy(buf, argv[1]);
166
(void) split(buf, fields, MNF, argv[2]);
167
}
168
else if (argc > 2)
169
dosplit(argv[1], argv[2]);
170
else if (argc > 1)
171
while (fgets(buf, sizeof(buf), stdin) != NULL) {
172
buf[strlen(buf)-1] = '\0'; /* stomp newline */
173
dosplit(buf, argv[1]);
174
}
175
else
176
regress();
177
178
exit(0);
179
}
180
181
void
182
dosplit(char *string, char *seps)
183
{
184
# define NF 5
185
char *fields[NF];
186
int nf;
187
188
nf = split(string, fields, NF, seps);
189
print(nf, NF, fields);
190
}
191
192
void
193
print(int nf, int nfp, char *fields[])
194
{
195
int fn;
196
int bound;
197
198
bound = (nf > nfp) ? nfp : nf;
199
printf("%d:\t", nf);
200
for (fn = 0; fn < bound; fn++)
201
printf("\"%s\"%s", fields[fn], (fn+1 < nf) ? ", " : "\n");
202
}
203
204
#define RNF 5 /* some table entries know this */
205
struct {
206
char *str;
207
char *seps;
208
int nf;
209
char *fi[RNF];
210
} tests[] = {
211
"", " ", 0, { "" },
212
" ", " ", 2, { "", "" },
213
"x", " ", 1, { "x" },
214
"xy", " ", 1, { "xy" },
215
"x y", " ", 2, { "x", "y" },
216
"abc def g ", " ", 5, { "abc", "def", "", "g", "" },
217
" a bcd", " ", 4, { "", "", "a", "bcd" },
218
"a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" },
219
" a b c d ", " ", 6, { "", "a", "b", "c", "d " },
220
221
"", " _", 0, { "" },
222
" ", " _", 2, { "", "" },
223
"x", " _", 1, { "x" },
224
"x y", " _", 2, { "x", "y" },
225
"ab _ cd", " _", 2, { "ab", "cd" },
226
" a_b c ", " _", 5, { "", "a", "b", "c", "" },
227
"a b c_d e f", " _", 6, { "a", "b", "c", "d", "e f" },
228
" a b c d ", " _", 6, { "", "a", "b", "c", "d " },
229
230
"", " _~", 0, { "" },
231
" ", " _~", 2, { "", "" },
232
"x", " _~", 1, { "x" },
233
"x y", " _~", 2, { "x", "y" },
234
"ab _~ cd", " _~", 2, { "ab", "cd" },
235
" a_b c~", " _~", 5, { "", "a", "b", "c", "" },
236
"a b_c d~e f", " _~", 6, { "a", "b", "c", "d", "e f" },
237
"~a b c d ", " _~", 6, { "", "a", "b", "c", "d " },
238
239
"", " _~-", 0, { "" },
240
" ", " _~-", 2, { "", "" },
241
"x", " _~-", 1, { "x" },
242
"x y", " _~-", 2, { "x", "y" },
243
"ab _~- cd", " _~-", 2, { "ab", "cd" },
244
" a_b c~", " _~-", 5, { "", "a", "b", "c", "" },
245
"a b_c-d~e f", " _~-", 6, { "a", "b", "c", "d", "e f" },
246
"~a-b c d ", " _~-", 6, { "", "a", "b", "c", "d " },
247
248
"", " ", 0, { "" },
249
" ", " ", 2, { "", "" },
250
"x", " ", 1, { "x" },
251
"xy", " ", 1, { "xy" },
252
"x y", " ", 2, { "x", "y" },
253
"abc def g ", " ", 4, { "abc", "def", "g", "" },
254
" a bcd", " ", 3, { "", "a", "bcd" },
255
"a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" },
256
" a b c d ", " ", 6, { "", "a", "b", "c", "d " },
257
258
"", "", 0, { "" },
259
" ", "", 0, { "" },
260
"x", "", 1, { "x" },
261
"xy", "", 1, { "xy" },
262
"x y", "", 2, { "x", "y" },
263
"abc def g ", "", 3, { "abc", "def", "g" },
264
"\t a bcd", "", 2, { "a", "bcd" },
265
" a \tb\t c ", "", 3, { "a", "b", "c" },
266
"a b c d e ", "", 5, { "a", "b", "c", "d", "e" },
267
"a b\tc d e f", "", 6, { "a", "b", "c", "d", "e f" },
268
" a b c d e f ", "", 6, { "a", "b", "c", "d", "e f " },
269
270
NULL, NULL, 0, { NULL },
271
};
272
273
void
274
regress(void)
275
{
276
char buf[512];
277
int n;
278
char *fields[RNF+1];
279
int nf;
280
int i;
281
int printit;
282
char *f;
283
284
for (n = 0; tests[n].str != NULL; n++) {
285
(void) strcpy(buf, tests[n].str);
286
fields[RNF] = NULL;
287
nf = split(buf, fields, RNF, tests[n].seps);
288
printit = 0;
289
if (nf != tests[n].nf) {
290
printf("split `%s' by `%s' gave %d fields, not %d\n",
291
tests[n].str, tests[n].seps, nf, tests[n].nf);
292
printit = 1;
293
} else if (fields[RNF] != NULL) {
294
printf("split() went beyond array end\n");
295
printit = 1;
296
} else {
297
for (i = 0; i < nf && i < RNF; i++) {
298
f = fields[i];
299
if (f == NULL)
300
f = "(NULL)";
301
if (strcmp(f, tests[n].fi[i]) != 0) {
302
printf("split `%s' by `%s' field %d is `%s', not `%s'\n",
303
tests[n].str, tests[n].seps,
304
i, fields[i], tests[n].fi[i]);
305
printit = 1;
306
}
307
}
308
}
309
if (printit)
310
print(nf, RNF, fields);
311
}
312
}
313
#endif
314
315