Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/usr.bin/csplit/csplit.c
34677 views
1
/*-
2
* SPDX-License-Identifier: BSD-2-Clause
3
*
4
* Copyright (c) 2002 Tim J. Robbins.
5
* All rights reserved.
6
*
7
* Redistribution and use in source and binary forms, with or without
8
* modification, are permitted provided that the following conditions
9
* are met:
10
* 1. Redistributions of source code must retain the above copyright
11
* notice, this list of conditions and the following disclaimer.
12
* 2. Redistributions in binary form must reproduce the above copyright
13
* notice, this list of conditions and the following disclaimer in the
14
* documentation and/or other materials provided with the distribution.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26
* SUCH DAMAGE.
27
*/
28
29
/*
30
* csplit -- split files based on context
31
*
32
* This utility splits its input into numbered output files by line number
33
* or by a regular expression. Regular expression matches have an optional
34
* offset with them, allowing the split to occur a specified number of
35
* lines before or after the match.
36
*
37
* To handle negative offsets, we stop reading when the match occurs and
38
* store the offset that the file should have been split at, then use
39
* this output file as input until all the "overflowed" lines have been read.
40
* The file is then closed and truncated to the correct length.
41
*
42
* We assume that the output files can be seeked upon (ie. they cannot be
43
* symlinks to named pipes or character devices), but make no such
44
* assumption about the input.
45
*/
46
47
#include <sys/types.h>
48
49
#include <ctype.h>
50
#include <err.h>
51
#include <errno.h>
52
#include <limits.h>
53
#include <locale.h>
54
#include <regex.h>
55
#include <signal.h>
56
#include <stdint.h>
57
#include <stdio.h>
58
#include <stdlib.h>
59
#include <string.h>
60
#include <unistd.h>
61
62
static void cleanup(void);
63
static void do_lineno(const char *);
64
static void do_rexp(const char *);
65
static char *get_line(void);
66
static void handlesig(int);
67
static FILE *newfile(void);
68
static void toomuch(FILE *, long);
69
static void usage(void);
70
71
/*
72
* Command line options
73
*/
74
static const char *prefix; /* File name prefix */
75
static long sufflen; /* Number of decimal digits for suffix */
76
static int sflag; /* Suppress output of file names */
77
static int kflag; /* Keep output if error occurs */
78
79
/*
80
* Other miscellaneous globals (XXX too many)
81
*/
82
static long lineno; /* Current line number in input file */
83
static long reps; /* Number of repetitions for this pattern */
84
static long nfiles; /* Number of files output so far */
85
static long maxfiles; /* Maximum number of files we can create */
86
static char currfile[PATH_MAX]; /* Current output file */
87
static const char *infn; /* Name of the input file */
88
static FILE *infile; /* Input file handle */
89
static FILE *overfile; /* Overflow file for toomuch() */
90
static off_t truncofs; /* Offset this file should be truncated at */
91
static int doclean; /* Should cleanup() remove output? */
92
93
int
94
main(int argc, char *argv[])
95
{
96
struct sigaction sa;
97
long i;
98
int ch;
99
const char *expr;
100
char *ep, *p;
101
FILE *ofp;
102
103
setlocale(LC_ALL, "");
104
105
kflag = sflag = 0;
106
prefix = "xx";
107
sufflen = 2;
108
while ((ch = getopt(argc, argv, "ksf:n:")) > 0) {
109
switch (ch) {
110
case 'f':
111
prefix = optarg;
112
break;
113
case 'k':
114
kflag = 1;
115
break;
116
case 'n':
117
errno = 0;
118
sufflen = strtol(optarg, &ep, 10);
119
if (sufflen <= 0 || *ep != '\0' || errno != 0)
120
errx(1, "%s: bad suffix length", optarg);
121
break;
122
case 's':
123
sflag = 1;
124
break;
125
default:
126
usage();
127
/*NOTREACHED*/
128
}
129
}
130
131
if (sufflen + strlen(prefix) >= PATH_MAX)
132
errx(1, "name too long");
133
134
argc -= optind;
135
argv += optind;
136
137
if ((infn = *argv++) == NULL)
138
usage();
139
if (strcmp(infn, "-") == 0) {
140
infile = stdin;
141
infn = "stdin";
142
} else if ((infile = fopen(infn, "r")) == NULL)
143
err(1, "%s", infn);
144
145
if (!kflag) {
146
doclean = 1;
147
atexit(cleanup);
148
sa.sa_flags = 0;
149
sa.sa_handler = handlesig;
150
sigemptyset(&sa.sa_mask);
151
sigaddset(&sa.sa_mask, SIGHUP);
152
sigaddset(&sa.sa_mask, SIGINT);
153
sigaddset(&sa.sa_mask, SIGTERM);
154
sigaction(SIGHUP, &sa, NULL);
155
sigaction(SIGINT, &sa, NULL);
156
sigaction(SIGTERM, &sa, NULL);
157
}
158
159
lineno = 0;
160
nfiles = 0;
161
truncofs = 0;
162
overfile = NULL;
163
164
/* Ensure 10^sufflen < LONG_MAX. */
165
for (maxfiles = 1, i = 0; i < sufflen; i++) {
166
if (maxfiles > LONG_MAX / 10)
167
errx(1, "%ld: suffix too long (limit %ld)",
168
sufflen, i);
169
maxfiles *= 10;
170
}
171
172
/* Create files based on supplied patterns. */
173
while (nfiles < maxfiles - 1 && (expr = *argv++) != NULL) {
174
/* Look ahead & see if this pattern has any repetitions. */
175
if (*argv != NULL && **argv == '{') {
176
errno = 0;
177
reps = strtol(*argv + 1, &ep, 10);
178
if (reps < 0 || *ep != '}' || errno != 0)
179
errx(1, "%s: bad repetition count", *argv + 1);
180
argv++;
181
} else
182
reps = 0;
183
184
if (*expr == '/' || *expr == '%') {
185
do
186
do_rexp(expr);
187
while (reps-- != 0 && nfiles < maxfiles - 1);
188
} else if (isdigit((unsigned char)*expr))
189
do_lineno(expr);
190
else
191
errx(1, "%s: unrecognised pattern", expr);
192
}
193
194
/* Copy the rest into a new file. */
195
if (!feof(infile)) {
196
ofp = newfile();
197
while ((p = get_line()) != NULL && fputs(p, ofp) != EOF)
198
;
199
if (!sflag)
200
printf("%jd\n", (intmax_t)ftello(ofp));
201
if (fclose(ofp) != 0)
202
err(1, "%s", currfile);
203
}
204
205
toomuch(NULL, 0);
206
doclean = 0;
207
208
return (0);
209
}
210
211
static void
212
usage(void)
213
{
214
215
fprintf(stderr,
216
"usage: csplit [-ks] [-f prefix] [-n number] file args ...\n");
217
exit(1);
218
}
219
220
static void
221
handlesig(int sig __unused)
222
{
223
const char msg[] = "csplit: caught signal, cleaning up\n";
224
225
write(STDERR_FILENO, msg, sizeof(msg) - 1);
226
cleanup();
227
_exit(2);
228
}
229
230
/* Create a new output file. */
231
static FILE *
232
newfile(void)
233
{
234
FILE *fp;
235
236
if ((size_t)snprintf(currfile, sizeof(currfile), "%s%0*ld", prefix,
237
(int)sufflen, nfiles) >= sizeof(currfile))
238
errc(1, ENAMETOOLONG, NULL);
239
if ((fp = fopen(currfile, "w+")) == NULL)
240
err(1, "%s", currfile);
241
nfiles++;
242
243
return (fp);
244
}
245
246
/* Remove partial output, called before exiting. */
247
static void
248
cleanup(void)
249
{
250
char fnbuf[PATH_MAX];
251
long i;
252
253
if (!doclean)
254
return;
255
256
/*
257
* NOTE: One cannot portably assume to be able to call snprintf()
258
* from inside a signal handler. It does, however, appear to be safe
259
* to do on FreeBSD. The solution to this problem is worse than the
260
* problem itself.
261
*/
262
263
for (i = 0; i < nfiles; i++) {
264
snprintf(fnbuf, sizeof(fnbuf), "%s%0*ld", prefix,
265
(int)sufflen, i);
266
unlink(fnbuf);
267
}
268
}
269
270
/* Read a line from the input into a static buffer. */
271
static char *
272
get_line(void)
273
{
274
static char lbuf[LINE_MAX];
275
FILE *src;
276
277
src = overfile != NULL ? overfile : infile;
278
279
again: if (fgets(lbuf, sizeof(lbuf), src) == NULL) {
280
if (src == overfile) {
281
src = infile;
282
goto again;
283
}
284
return (NULL);
285
}
286
if (ferror(src))
287
err(1, "%s", infn);
288
lineno++;
289
290
return (lbuf);
291
}
292
293
/* Conceptually rewind the input (as obtained by get_line()) back `n' lines. */
294
static void
295
toomuch(FILE *ofp, long n)
296
{
297
char buf[BUFSIZ];
298
size_t i, nread;
299
300
if (overfile != NULL) {
301
/*
302
* Truncate the previous file we overflowed into back to
303
* the correct length, close it.
304
*/
305
if (fflush(overfile) != 0)
306
err(1, "overflow");
307
if (ftruncate(fileno(overfile), truncofs) != 0)
308
err(1, "overflow");
309
if (fclose(overfile) != 0)
310
err(1, "overflow");
311
overfile = NULL;
312
}
313
314
if (n == 0)
315
/* Just tidying up */
316
return;
317
318
lineno -= n;
319
320
/*
321
* Wind the overflow file backwards to `n' lines before the
322
* current one.
323
*/
324
do {
325
if (ftello(ofp) < (off_t)sizeof(buf))
326
rewind(ofp);
327
else
328
fseeko(ofp, -(off_t)sizeof(buf), SEEK_CUR);
329
if (ferror(ofp))
330
errx(1, "%s: can't seek", currfile);
331
if ((nread = fread(buf, 1, sizeof(buf), ofp)) == 0)
332
errx(1, "can't read overflowed output");
333
if (fseeko(ofp, -(off_t)nread, SEEK_CUR) != 0)
334
err(1, "%s", currfile);
335
for (i = 1; i <= nread; i++)
336
if (buf[nread - i] == '\n' && n-- == 0)
337
break;
338
if (ftello(ofp) == 0)
339
break;
340
} while (n > 0);
341
if (fseeko(ofp, nread - i + 1, SEEK_CUR) != 0)
342
err(1, "%s", currfile);
343
344
/*
345
* get_line() will read from here. Next call will truncate to
346
* truncofs in this file.
347
*/
348
overfile = ofp;
349
truncofs = ftello(overfile);
350
}
351
352
/* Handle splits for /regexp/ and %regexp% patterns. */
353
static void
354
do_rexp(const char *expr)
355
{
356
regex_t cre;
357
intmax_t nwritten;
358
long ofs;
359
int first;
360
char *ecopy, *ep, *p, *pofs, *re;
361
FILE *ofp;
362
363
if ((ecopy = strdup(expr)) == NULL)
364
err(1, "strdup");
365
366
re = ecopy + 1;
367
if ((pofs = strrchr(ecopy, *expr)) == NULL || pofs[-1] == '\\')
368
errx(1, "%s: missing trailing %c", expr, *expr);
369
*pofs++ = '\0';
370
371
if (*pofs != '\0') {
372
errno = 0;
373
ofs = strtol(pofs, &ep, 10);
374
if (*ep != '\0' || errno != 0)
375
errx(1, "%s: bad offset", pofs);
376
} else
377
ofs = 0;
378
379
if (regcomp(&cre, re, REG_BASIC|REG_NOSUB) != 0)
380
errx(1, "%s: bad regular expression", re);
381
382
if (*expr == '/')
383
/* /regexp/: Save results to a file. */
384
ofp = newfile();
385
else {
386
/* %regexp%: Make a temporary file for overflow. */
387
if ((ofp = tmpfile()) == NULL)
388
err(1, "tmpfile");
389
}
390
391
/* Read and output lines until we get a match. */
392
first = 1;
393
while ((p = get_line()) != NULL) {
394
if (fputs(p, ofp) == EOF)
395
break;
396
if (!first && regexec(&cre, p, 0, NULL, 0) == 0)
397
break;
398
first = 0;
399
}
400
401
if (p == NULL) {
402
toomuch(NULL, 0);
403
errx(1, "%s: no match", re);
404
}
405
406
if (ofs <= 0) {
407
/*
408
* Negative (or zero) offset: throw back any lines we should
409
* not have read yet.
410
*/
411
if (p != NULL) {
412
toomuch(ofp, -ofs + 1);
413
nwritten = (intmax_t)truncofs;
414
} else
415
nwritten = (intmax_t)ftello(ofp);
416
} else {
417
/*
418
* Positive offset: copy the requested number of lines
419
* after the match.
420
*/
421
while (--ofs > 0 && (p = get_line()) != NULL)
422
fputs(p, ofp);
423
toomuch(NULL, 0);
424
nwritten = (intmax_t)ftello(ofp);
425
if (fclose(ofp) != 0)
426
err(1, "%s", currfile);
427
}
428
429
if (!sflag && *expr == '/')
430
printf("%jd\n", nwritten);
431
432
regfree(&cre);
433
free(ecopy);
434
}
435
436
/* Handle splits based on line number. */
437
static void
438
do_lineno(const char *expr)
439
{
440
long lastline, tgtline;
441
char *ep, *p;
442
FILE *ofp;
443
444
errno = 0;
445
tgtline = strtol(expr, &ep, 10);
446
if (tgtline <= 0 || errno != 0 || *ep != '\0')
447
errx(1, "%s: bad line number", expr);
448
lastline = tgtline;
449
if (lastline <= lineno)
450
errx(1, "%s: can't go backwards", expr);
451
452
while (nfiles < maxfiles - 1) {
453
ofp = newfile();
454
while (lineno + 1 != lastline) {
455
if ((p = get_line()) == NULL)
456
errx(1, "%ld: out of range", lastline);
457
if (fputs(p, ofp) == EOF)
458
break;
459
}
460
if (!sflag)
461
printf("%jd\n", (intmax_t)ftello(ofp));
462
if (fclose(ofp) != 0)
463
err(1, "%s", currfile);
464
if (reps-- == 0)
465
break;
466
lastline += tgtline;
467
}
468
}
469
470