Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/usr.bin/cut/cut.c
34677 views
1
/*
2
* SPDX-License-Identifier: BSD-3-Clause
3
*
4
* Copyright (c) 1989, 1993
5
* The Regents of the University of California. All rights reserved.
6
*
7
* This code is derived from software contributed to Berkeley by
8
* Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue.
9
*
10
* Redistribution and use in source and binary forms, with or without
11
* modification, are permitted provided that the following conditions
12
* are met:
13
* 1. Redistributions of source code must retain the above copyright
14
* notice, this list of conditions and the following disclaimer.
15
* 2. Redistributions in binary form must reproduce the above copyright
16
* notice, this list of conditions and the following disclaimer in the
17
* documentation and/or other materials provided with the distribution.
18
* 3. Neither the name of the University nor the names of its contributors
19
* may be used to endorse or promote products derived from this software
20
* without specific prior written permission.
21
*
22
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32
* SUCH DAMAGE.
33
*/
34
35
#include <ctype.h>
36
#include <err.h>
37
#include <errno.h>
38
#include <limits.h>
39
#include <locale.h>
40
#include <stdio.h>
41
#include <stdlib.h>
42
#include <string.h>
43
#include <unistd.h>
44
#include <wchar.h>
45
46
static int bflag;
47
static int cflag;
48
static wchar_t dchar;
49
static char dcharmb[MB_LEN_MAX + 1];
50
static int dflag;
51
static int fflag;
52
static int nflag;
53
static int sflag;
54
static int wflag;
55
56
static size_t autostart, autostop, maxval;
57
static char * positions;
58
59
static int b_cut(FILE *, const char *);
60
static int b_n_cut(FILE *, const char *);
61
static int c_cut(FILE *, const char *);
62
static int f_cut(FILE *, const char *);
63
static void get_list(char *);
64
static int is_delim(wchar_t);
65
static void needpos(size_t);
66
static void usage(void);
67
68
int
69
main(int argc, char *argv[])
70
{
71
FILE *fp;
72
int (*fcn)(FILE *, const char *);
73
int ch, rval;
74
size_t n;
75
76
setlocale(LC_ALL, "");
77
78
fcn = NULL;
79
dchar = '\t'; /* default delimiter is \t */
80
strcpy(dcharmb, "\t");
81
82
while ((ch = getopt(argc, argv, "b:c:d:f:snw")) != -1)
83
switch(ch) {
84
case 'b':
85
get_list(optarg);
86
bflag = 1;
87
break;
88
case 'c':
89
get_list(optarg);
90
cflag = 1;
91
break;
92
case 'd':
93
n = mbrtowc(&dchar, optarg, MB_LEN_MAX, NULL);
94
if (dchar == '\0' || n != strlen(optarg))
95
errx(1, "bad delimiter");
96
strcpy(dcharmb, optarg);
97
dflag = 1;
98
break;
99
case 'f':
100
get_list(optarg);
101
fflag = 1;
102
break;
103
case 's':
104
sflag = 1;
105
break;
106
case 'n':
107
nflag = 1;
108
break;
109
case 'w':
110
wflag = 1;
111
break;
112
case '?':
113
default:
114
usage();
115
}
116
argc -= optind;
117
argv += optind;
118
119
if (fflag) {
120
if (bflag || cflag || nflag || (wflag && dflag))
121
usage();
122
} else if (!(bflag || cflag) || dflag || sflag || wflag)
123
usage();
124
else if (!bflag && nflag)
125
usage();
126
127
if (fflag)
128
fcn = f_cut;
129
else if (cflag)
130
fcn = MB_CUR_MAX > 1 ? c_cut : b_cut;
131
else if (bflag)
132
fcn = nflag && MB_CUR_MAX > 1 ? b_n_cut : b_cut;
133
134
rval = 0;
135
if (*argv)
136
for (; *argv; ++argv) {
137
if (strcmp(*argv, "-") == 0)
138
rval |= fcn(stdin, "stdin");
139
else {
140
if (!(fp = fopen(*argv, "r"))) {
141
warn("%s", *argv);
142
rval = 1;
143
continue;
144
}
145
fcn(fp, *argv);
146
(void)fclose(fp);
147
}
148
}
149
else
150
rval = fcn(stdin, "stdin");
151
exit(rval);
152
}
153
154
static void
155
get_list(char *list)
156
{
157
size_t setautostart, start, stop;
158
char *pos;
159
char *p;
160
161
/*
162
* set a byte in the positions array to indicate if a field or
163
* column is to be selected; use +1, it's 1-based, not 0-based.
164
* Numbers and number ranges may be overlapping, repeated, and in
165
* any order. We handle "-3-5" although there's no real reason to.
166
*/
167
for (; (p = strsep(&list, ", \t")) != NULL;) {
168
setautostart = start = stop = 0;
169
if (*p == '-') {
170
++p;
171
setautostart = 1;
172
}
173
if (isdigit((unsigned char)*p)) {
174
start = stop = strtol(p, &p, 10);
175
if (setautostart && start > autostart)
176
autostart = start;
177
}
178
if (*p == '-') {
179
if (isdigit((unsigned char)p[1]))
180
stop = strtol(p + 1, &p, 10);
181
if (*p == '-') {
182
++p;
183
if (!autostop || autostop > stop)
184
autostop = stop;
185
}
186
}
187
if (*p)
188
errx(1, "[-bcf] list: illegal list value");
189
if (!stop || !start)
190
errx(1, "[-bcf] list: values may not include zero");
191
if (maxval < stop) {
192
maxval = stop;
193
needpos(maxval + 1);
194
}
195
for (pos = positions + start; start++ <= stop; *pos++ = 1);
196
}
197
198
/* overlapping ranges */
199
if (autostop && maxval > autostop) {
200
maxval = autostop;
201
needpos(maxval + 1);
202
}
203
204
/* reversed range with autostart */
205
if (maxval < autostart) {
206
maxval = autostart;
207
needpos(maxval + 1);
208
}
209
210
/* set autostart */
211
if (autostart)
212
memset(positions + 1, '1', autostart);
213
}
214
215
static void
216
needpos(size_t n)
217
{
218
static size_t npos;
219
size_t oldnpos;
220
221
/* Grow the positions array to at least the specified size. */
222
if (n > npos) {
223
oldnpos = npos;
224
if (npos == 0)
225
npos = n;
226
while (n > npos)
227
npos *= 2;
228
if ((positions = realloc(positions, npos)) == NULL)
229
err(1, "realloc");
230
memset((char *)positions + oldnpos, 0, npos - oldnpos);
231
}
232
}
233
234
static int
235
b_cut(FILE *fp, const char *fname __unused)
236
{
237
int ch, col;
238
char *pos;
239
240
ch = 0;
241
for (;;) {
242
pos = positions + 1;
243
for (col = maxval; col; --col) {
244
if ((ch = getc(fp)) == EOF)
245
return (0);
246
if (ch == '\n')
247
break;
248
if (*pos++)
249
(void)putchar(ch);
250
}
251
if (ch != '\n') {
252
if (autostop)
253
while ((ch = getc(fp)) != EOF && ch != '\n')
254
(void)putchar(ch);
255
else
256
while ((ch = getc(fp)) != EOF && ch != '\n');
257
}
258
(void)putchar('\n');
259
}
260
return (0);
261
}
262
263
/*
264
* Cut based on byte positions, taking care not to split multibyte characters.
265
* Although this function also handles the case where -n is not specified,
266
* b_cut() ought to be much faster.
267
*/
268
static int
269
b_n_cut(FILE *fp, const char *fname)
270
{
271
size_t col, i, bufsize = 0;
272
ssize_t lbuflen;
273
char *lbuf = NULL;
274
int canwrite, clen, warned;
275
mbstate_t mbs;
276
277
memset(&mbs, 0, sizeof(mbs));
278
warned = 0;
279
while ((lbuflen = getline(&lbuf, &bufsize, fp)) >= 0) {
280
for (col = 0; lbuflen > 0; col += clen) {
281
if ((clen = mbrlen(lbuf, lbuflen, &mbs)) < 0) {
282
if (!warned) {
283
warn("%s", fname);
284
warned = 1;
285
}
286
memset(&mbs, 0, sizeof(mbs));
287
clen = 1;
288
}
289
if (clen == 0 || *lbuf == '\n')
290
break;
291
if (col < maxval && !positions[1 + col]) {
292
/*
293
* Print the character if (1) after an initial
294
* segment of un-selected bytes, the rest of
295
* it is selected, and (2) the last byte is
296
* selected.
297
*/
298
i = col;
299
while (i < col + clen && i < maxval &&
300
!positions[1 + i])
301
i++;
302
canwrite = i < col + clen;
303
for (; i < col + clen && i < maxval; i++)
304
canwrite &= positions[1 + i];
305
if (canwrite)
306
fwrite(lbuf, 1, clen, stdout);
307
} else {
308
/*
309
* Print the character if all of it has
310
* been selected.
311
*/
312
canwrite = 1;
313
for (i = col; i < col + clen; i++)
314
if ((i >= maxval && !autostop) ||
315
(i < maxval && !positions[1 + i])) {
316
canwrite = 0;
317
break;
318
}
319
if (canwrite)
320
fwrite(lbuf, 1, clen, stdout);
321
}
322
lbuf += clen;
323
lbuflen -= clen;
324
}
325
if (lbuflen > 0)
326
putchar('\n');
327
}
328
free(lbuf);
329
return (warned);
330
}
331
332
static int
333
c_cut(FILE *fp, const char *fname)
334
{
335
wint_t ch;
336
int col;
337
char *pos;
338
339
ch = 0;
340
for (;;) {
341
pos = positions + 1;
342
for (col = maxval; col; --col) {
343
if ((ch = getwc(fp)) == WEOF)
344
goto out;
345
if (ch == '\n')
346
break;
347
if (*pos++)
348
(void)putwchar(ch);
349
}
350
if (ch != '\n') {
351
if (autostop)
352
while ((ch = getwc(fp)) != WEOF && ch != '\n')
353
(void)putwchar(ch);
354
else
355
while ((ch = getwc(fp)) != WEOF && ch != '\n');
356
}
357
(void)putwchar('\n');
358
}
359
out:
360
if (ferror(fp)) {
361
warn("%s", fname);
362
return (1);
363
}
364
return (0);
365
}
366
367
static int
368
is_delim(wchar_t ch)
369
{
370
if (wflag) {
371
if (ch == ' ' || ch == '\t')
372
return 1;
373
} else {
374
if (ch == dchar)
375
return 1;
376
}
377
return 0;
378
}
379
380
static int
381
f_cut(FILE *fp, const char *fname)
382
{
383
wchar_t ch;
384
int field, i, isdelim;
385
char *pos, *p;
386
int output;
387
char *lbuf = NULL;
388
size_t clen, bufsize = 0, reallen;
389
ssize_t lbuflen;
390
391
while ((lbuflen = getline(&lbuf, &bufsize, fp)) >= 0) {
392
reallen = lbuflen;
393
/* Assert EOL has a newline. */
394
if (lbuflen > 0 && *(lbuf + lbuflen - 1) != '\n') {
395
/* Can't have > 1 line with no trailing newline. */
396
if ((ssize_t)bufsize < (lbuflen + 1)) {
397
bufsize = lbuflen + 1;
398
lbuf = realloc(lbuf, bufsize);
399
}
400
if (lbuf == NULL)
401
err(1, "realloc");
402
lbuf[lbuflen] = '\n';
403
reallen++;
404
}
405
output = 0;
406
for (isdelim = 0, p = lbuf;; p += clen) {
407
clen = mbrtowc(&ch, p, lbuf + reallen - p, NULL);
408
if (clen == (size_t)-1 || clen == (size_t)-2) {
409
warnc(EILSEQ, "%s", fname);
410
free(lbuf);
411
return (1);
412
}
413
if (clen == 0)
414
clen = 1;
415
/* this should work if newline is delimiter */
416
if (is_delim(ch))
417
isdelim = 1;
418
if (ch == '\n') {
419
if (!isdelim && !sflag)
420
(void)fwrite(lbuf, lbuflen, 1, stdout);
421
break;
422
}
423
}
424
if (!isdelim)
425
continue;
426
427
pos = positions + 1;
428
for (field = maxval, p = lbuf; field; --field, ++pos) {
429
if (*pos && output++)
430
for (i = 0; dcharmb[i] != '\0'; i++)
431
putchar(dcharmb[i]);
432
for (;;) {
433
clen = mbrtowc(&ch, p, lbuf + reallen - p,
434
NULL);
435
if (clen == (size_t)-1 || clen == (size_t)-2) {
436
warnc(EILSEQ, "%s", fname);
437
free(lbuf);
438
return (1);
439
}
440
if (clen == 0)
441
clen = 1;
442
p += clen;
443
if (ch == '\n' || is_delim(ch)) {
444
/* compress whitespace */
445
if (wflag && ch != '\n')
446
while (is_delim(*p))
447
p++;
448
break;
449
}
450
if (*pos)
451
for (i = 0; i < (int)clen; i++)
452
putchar(p[i - clen]);
453
}
454
if (ch == '\n')
455
break;
456
}
457
if (ch != '\n') {
458
if (autostop) {
459
if (output)
460
for (i = 0; dcharmb[i] != '\0'; i++)
461
putchar(dcharmb[i]);
462
for (; (ch = *p) != '\n'; ++p)
463
(void)putchar(ch);
464
} else
465
for (; (ch = *p) != '\n'; ++p);
466
}
467
(void)putchar('\n');
468
}
469
free(lbuf);
470
return (0);
471
}
472
473
static void
474
usage(void)
475
{
476
(void)fprintf(stderr, "%s\n%s\n%s\n",
477
"usage: cut -b list [-n] [file ...]",
478
" cut -c list [file ...]",
479
" cut -f list [-s] [-w | -d delim] [file ...]");
480
exit(1);
481
}
482
483