Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
att
GitHub Repository: att/ast
Path: blob/master/src/lib/libcmd/fmt.c
1808 views
1
/***********************************************************************
2
* *
3
* This software is part of the ast package *
4
* Copyright (c) 1992-2012 AT&T Intellectual Property *
5
* and is licensed under the *
6
* Eclipse Public License, Version 1.0 *
7
* by AT&T Intellectual Property *
8
* *
9
* A copy of the License is available at *
10
* http://www.eclipse.org/org/documents/epl-v10.html *
11
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
12
* *
13
* Information and Software Systems Research *
14
* AT&T Research *
15
* Florham Park NJ *
16
* *
17
* Glenn Fowler <[email protected]> *
18
* David Korn <[email protected]> *
19
* *
20
***********************************************************************/
21
#pragma prototyped
22
23
static const char usage[] =
24
"[-?\n@(#)$Id: fmt (AT&T Research) 2007-01-02 $\n]"
25
USAGE_LICENSE
26
"[+NAME?fmt - simple text formatter]"
27
"[+DESCRIPTION?\bfmt\b reads the input files and left justifies space "
28
"separated words into lines \awidth\a characters or less in length and "
29
"writes the lines to the standard output. The standard input is read if "
30
"\b-\b or no files are specified. Blank lines and interword spacing are "
31
"preserved in the output. Indentation is preserved, and lines with "
32
"identical indentation are joined and justified.]"
33
"[+?\bfmt\b is meant to format mail messages prior to sending, but may "
34
"also be useful for other simple tasks. For example, in \bvi\b(1) the "
35
"command \b:!}fmt\b will justify the lines in the current paragraph.]"
36
"[c:crown-margin?Preserve the indentation of the first two lines within "
37
"a paragraph, and align the left margin of each subsequent line with "
38
"that of the second line.]"
39
"[o:optget?Format concatenated \boptget\b(3) usage strings.]"
40
"[s:split-only?Split lines only; do not join short lines to form longer "
41
"ones.]"
42
"[u:uniform-spacing?One space between words, two after sentences.]"
43
"[w:width?Set the output line width to \acolumns\a.]#[columns:=72]"
44
"\n\n"
45
"[ file ... ]"
46
"\n\n"
47
"[+SEE ALSO?\bmailx\b(1), \bnroff\b(1), \btroff\b(1), \bvi\b(1), "
48
"\boptget\b(3)]"
49
;
50
51
#include <cmd.h>
52
#include <ctype.h>
53
54
typedef struct Fmt_s
55
{
56
long flags;
57
char* outp;
58
char* outbuf;
59
char* endbuf;
60
Sfio_t* in;
61
Sfio_t* out;
62
int indent;
63
int nextdent;
64
int nwords;
65
int prefix;
66
int quote;
67
int retain;
68
int section;
69
} Fmt_t;
70
71
#define INDENT 4
72
#define TABSZ 8
73
74
#define isoption(fp,c) ((fp)->flags&(1L<<((c)-'a')))
75
#define setoption(fp,c) ((fp)->flags|=(1L<<((c)-'a')))
76
#define clroption(fp,c) ((fp)->flags&=~(1L<<((c)-'a')))
77
78
static void
79
outline(Fmt_t* fp)
80
{
81
register char* cp = fp->outbuf;
82
int n = 0;
83
int c;
84
int d;
85
86
if (!fp->outp)
87
return;
88
while (fp->outp[-1] == ' ')
89
fp->outp--;
90
*fp->outp = 0;
91
while (*cp++ == ' ')
92
n++;
93
if (n >= TABSZ)
94
{
95
n /= TABSZ;
96
cp = &fp->outbuf[TABSZ*n];
97
while (n--)
98
*--cp = '\t';
99
}
100
else
101
cp = fp->outbuf;
102
fp->nwords = 0;
103
if (!isoption(fp, 'o'))
104
sfputr(fp->out, cp, '\n');
105
else if (*cp)
106
{
107
n = fp->indent;
108
if (*cp != '[')
109
{
110
if (*cp == ' ')
111
cp++;
112
n += INDENT;
113
}
114
while (n--)
115
sfputc(fp->out, ' ');
116
if (fp->quote)
117
{
118
if ((d = (fp->outp - cp)) <= 0)
119
c = 0;
120
else if ((c = fp->outp[-1]) == 'n' && d > 1 && fp->outp[-2] == '\\')
121
c = '}';
122
sfprintf(fp->out, "\"%s%s\"\n", cp, c == ']' || c == '{' || c == '}' ? "" : " ");
123
}
124
else
125
sfputr(fp->out, cp, '\n');
126
if (fp->nextdent)
127
{
128
fp->indent += fp->nextdent;
129
fp->endbuf -= fp->nextdent;
130
fp->nextdent = 0;
131
}
132
}
133
fp->outp = 0;
134
}
135
136
static void
137
split(Fmt_t* fp, char* buf, int splice)
138
{
139
register char* cp;
140
register char* ep;
141
register char* qp;
142
register int c = 1;
143
register int q = 0;
144
register int n;
145
int prefix;
146
147
for (ep = buf; *ep == ' '; ep++);
148
prefix = ep - buf;
149
150
/*
151
* preserve blank lines
152
*/
153
154
if ((*ep == 0 || *buf == '.') && !isoption(fp, 'o'))
155
{
156
if (*ep)
157
prefix = strlen(buf);
158
outline(fp);
159
strcpy(fp->outbuf, buf);
160
fp->outp = fp->outbuf+prefix;
161
outline(fp);
162
return;
163
}
164
if (fp->prefix < prefix && !isoption(fp, 'c'))
165
outline(fp);
166
if (!fp->outp || prefix < fp->prefix)
167
fp->prefix = prefix;
168
while (c)
169
{
170
cp = ep;
171
while (*ep == ' ')
172
ep++;
173
if (cp != ep && isoption(fp, 'u'))
174
cp = ep-1;
175
while (c = *ep)
176
{
177
if (c == ' ')
178
break;
179
ep++;
180
181
/*
182
* skip over \space
183
*/
184
185
if (c == '\\' && *ep)
186
ep++;
187
}
188
n = (ep-cp);
189
if (n && isoption(fp, 'o'))
190
{
191
for (qp = cp; qp < ep; qp++)
192
if (*qp == '\\')
193
qp++;
194
else if (*qp == '"')
195
q = !q;
196
if (*(ep-1) == '"')
197
goto skip;
198
}
199
if (fp->nwords > 0 && &fp->outp[n] >= fp->endbuf && !fp->retain && !q)
200
outline(fp);
201
skip:
202
if (fp->nwords == 0)
203
{
204
if (fp->prefix)
205
memset(fp->outbuf, ' ', fp->prefix);
206
fp->outp = &fp->outbuf[fp->prefix];
207
while (*cp == ' ')
208
cp++;
209
n = (ep-cp);
210
}
211
memcpy(fp->outp, cp, n);
212
fp->outp += n;
213
fp->nwords++;
214
}
215
if (isoption(fp, 's') || *buf == 0)
216
outline(fp);
217
else if (fp->outp)
218
{
219
/*
220
* two spaces at ends of sentences
221
*/
222
223
if (!isoption(fp, 'o') && strchr(".:!?", fp->outp[-1]))
224
*fp->outp++ = ' ';
225
if (!splice && !fp->retain && (!fp->quote || (fp->outp - fp->outbuf) < 2 || fp->outp[-2] != '\\' || fp->outp[-1] != 'n' && fp->outp[-1] != 't' && fp->outp[-1] != ' '))
226
*fp->outp++ = ' ';
227
}
228
}
229
230
static int
231
dofmt(Fmt_t* fp)
232
{
233
register int c;
234
int b;
235
int x;
236
int splice;
237
char* cp;
238
char* dp;
239
char* ep;
240
char* lp;
241
char* tp;
242
char buf[8192];
243
244
cp = 0;
245
while (cp || (cp = sfgetr(fp->in, '\n', 0)) && !(splice = 0) && (lp = cp + sfvalue(fp->in) - 1) || (cp = sfgetr(fp->in, '\n', SF_LASTR)) && (splice = 1) && (lp = cp + sfvalue(fp->in)))
246
{
247
if (isoption(fp, 'o'))
248
{
249
if (!isoption(fp, 'i'))
250
{
251
setoption(fp, 'i');
252
b = 0;
253
while (cp < lp)
254
{
255
if (*cp == ' ')
256
b += 1;
257
else if (*cp == '\t')
258
b += INDENT;
259
else
260
break;
261
cp++;
262
}
263
fp->indent = roundof(b, INDENT);
264
}
265
else
266
while (cp < lp && (*cp == ' ' || *cp == '\t'))
267
cp++;
268
if (!isoption(fp, 'q') && cp < lp)
269
{
270
setoption(fp, 'q');
271
if (*cp == '"')
272
{
273
ep = lp;
274
while (--ep > cp)
275
if (*ep == '"')
276
{
277
fp->quote = 1;
278
break;
279
}
280
else if (*ep != ' ' && *ep != '\t')
281
break;
282
}
283
}
284
}
285
again:
286
dp = buf;
287
ep = 0;
288
for (b = 1;; b = 0)
289
{
290
if (cp >= lp)
291
{
292
cp = 0;
293
break;
294
}
295
c = *cp++;
296
if (isoption(fp, 'o'))
297
{
298
if (c == '\\')
299
{
300
x = 0;
301
c = ' ';
302
cp--;
303
while (cp < lp)
304
{
305
if (*cp == '\\')
306
{
307
cp++;
308
if ((lp - cp) < 1)
309
{
310
c = '\\';
311
break;
312
}
313
if (*cp == 'n')
314
{
315
cp++;
316
c = '\n';
317
if ((lp - cp) > 2)
318
{
319
if (*cp == ']' || *cp == '@' && *(cp + 1) == '(')
320
{
321
*dp++ = '\\';
322
*dp++ = 'n';
323
c = *cp++;
324
break;
325
}
326
if (*cp == '\\' && *(cp + 1) == 'n')
327
{
328
cp += 2;
329
*dp++ = '\n';
330
break;
331
}
332
}
333
}
334
else if (*cp == 't' || *cp == ' ')
335
{
336
cp++;
337
x = 1;
338
c = ' ';
339
}
340
else
341
{
342
if (x && dp != buf && *(dp - 1) != ' ')
343
*dp++ = ' ';
344
*dp++ = '\\';
345
c = *cp++;
346
break;
347
}
348
}
349
else if (*cp == ' ' || *cp == '\t')
350
{
351
cp++;
352
c = ' ';
353
x = 1;
354
}
355
else
356
{
357
if (x && c != '\n' && dp != buf && *(dp - 1) != ' ')
358
*dp++ = ' ';
359
break;
360
}
361
}
362
if (c == '\n')
363
{
364
c = 0;
365
goto flush;
366
}
367
if (c == ' ' && (dp == buf || *(dp - 1) == ' '))
368
continue;
369
}
370
else if (c == '"')
371
{
372
if (b || cp >= lp)
373
{
374
if (fp->quote)
375
continue;
376
fp->section = 0;
377
}
378
}
379
else if (c == '\a')
380
{
381
*dp++ = '\\';
382
c = 'a';
383
}
384
else if (c == '\b')
385
{
386
*dp++ = '\\';
387
c = 'b';
388
}
389
else if (c == '\f')
390
{
391
*dp++ = '\\';
392
c = 'f';
393
}
394
else if (c == '\v')
395
{
396
*dp++ = '\\';
397
c = 'v';
398
}
399
else if (c == ']' && (cp >= lp || *cp != ':' && *cp != '#' && *cp != '!'))
400
{
401
if (cp < lp && *cp == ']')
402
{
403
cp++;
404
*dp++ = c;
405
}
406
else
407
{
408
fp->section = 1;
409
fp->retain = 0;
410
flush:
411
*dp++ = c;
412
*dp = 0;
413
split(fp, buf, 0);
414
outline(fp);
415
goto again;
416
}
417
}
418
else if (fp->section)
419
{
420
if (c == '[')
421
{
422
if (b)
423
fp->retain = 1;
424
else
425
{
426
cp--;
427
c = 0;
428
goto flush;
429
}
430
fp->section = 0;
431
}
432
else if (c == '{')
433
{
434
x = 1;
435
for (tp = cp; tp < lp; tp++)
436
{
437
if (*tp == '[' || *tp == '\n')
438
break;
439
if (*tp == ' ' || *tp == '\t' || *tp == '"')
440
continue;
441
if (*tp == '\\' && (lp - tp) > 1)
442
{
443
if (*++tp == 'n')
444
break;
445
if (*tp == 't' || *tp == '\n')
446
continue;
447
}
448
x = 0;
449
break;
450
}
451
if (x)
452
{
453
if (fp->endbuf > (fp->outbuf + fp->indent + 2*INDENT))
454
fp->nextdent = 2*INDENT;
455
goto flush;
456
}
457
else
458
fp->section = 0;
459
}
460
else if (c == '}')
461
{
462
if (fp->indent && (b || *(cp - 2) != 'f'))
463
{
464
if (b)
465
{
466
fp->indent -= 2*INDENT;
467
fp->endbuf += 2*INDENT;
468
}
469
else
470
{
471
cp--;
472
c = 0;
473
}
474
goto flush;
475
}
476
else
477
fp->section = 0;
478
}
479
else if (c == ' ' || c == '\t')
480
continue;
481
else
482
fp->section = 0;
483
}
484
else if (c == '?' && (cp >= lp || *cp != '?'))
485
{
486
if (fp->retain)
487
{
488
cp--;
489
while (cp < lp && *cp != ' ' && *cp != '\t' && *cp != ']' && dp < &buf[sizeof(buf)-3])
490
*dp++ = *cp++;
491
if (cp < lp && (*cp == ' ' || *cp == '\t'))
492
*dp++ = *cp++;
493
*dp = 0;
494
split(fp, buf, 0);
495
dp = buf;
496
ep = 0;
497
fp->retain = 0;
498
if (fp->outp >= fp->endbuf)
499
outline(fp);
500
continue;
501
}
502
}
503
else if (c == ' ' || c == '\t')
504
for (c = ' '; *cp == ' ' || *cp == '\t'; cp++);
505
}
506
else if (c == '\b')
507
{
508
if (dp > buf)
509
{
510
dp--;
511
if (ep)
512
ep--;
513
}
514
continue;
515
}
516
else if (c == '\t')
517
{
518
/*
519
* expand tabs
520
*/
521
522
if (!ep)
523
ep = dp;
524
c = isoption(fp, 'o') ? 1 : TABSZ - (dp - buf) % TABSZ;
525
if (dp >= &buf[sizeof(buf) - c - 3])
526
{
527
cp--;
528
break;
529
}
530
while (c-- > 0)
531
*dp++ = ' ';
532
continue;
533
}
534
else if (!isprint(c))
535
continue;
536
if (dp >= &buf[sizeof(buf) - 3])
537
{
538
tp = dp;
539
while (--tp > buf)
540
if (isspace(*tp))
541
{
542
cp -= dp - tp;
543
dp = tp;
544
break;
545
}
546
ep = 0;
547
break;
548
}
549
if (c != ' ')
550
ep = 0;
551
else if (!ep)
552
ep = dp;
553
*dp++ = c;
554
}
555
if (ep)
556
*ep = 0;
557
else
558
*dp = 0;
559
split(fp, buf, splice);
560
}
561
return 0;
562
}
563
564
int
565
b_fmt(int argc, char** argv, Shbltin_t* context)
566
{
567
register int n;
568
char* cp;
569
Fmt_t fmt;
570
char outbuf[8 * 1024];
571
572
fmt.flags = 0;
573
fmt.out = sfstdout;
574
fmt.outbuf = outbuf;
575
fmt.outp = 0;
576
fmt.endbuf = &outbuf[72];
577
fmt.indent = 0;
578
fmt.nextdent = 0;
579
fmt.nwords = 0;
580
fmt.prefix = 0;
581
fmt.quote = 0;
582
fmt.retain = 0;
583
fmt.section = 1;
584
cmdinit(argc, argv, context, ERROR_CATALOG, 0);
585
for (;;)
586
{
587
switch (n = optget(argv, usage))
588
{
589
case 'c':
590
case 'o':
591
case 's':
592
case 'u':
593
setoption(&fmt, n);
594
continue;
595
case 'w':
596
if (opt_info.num < TABSZ || opt_info.num>= sizeof(outbuf))
597
error(2, "width out of range");
598
fmt.endbuf = &outbuf[opt_info.num];
599
continue;
600
case ':':
601
error(2, "%s", opt_info.arg);
602
break;
603
case '?':
604
error(ERROR_usage(2), "%s", opt_info.arg);
605
break;
606
}
607
break;
608
}
609
argv += opt_info.index;
610
if (error_info.errors)
611
error(ERROR_usage(2), "%s", optusage(NiL));
612
if (isoption(&fmt, 'o'))
613
setoption(&fmt, 'c');
614
if (isoption(&fmt, 's'))
615
clroption(&fmt, 'u');
616
if (cp = *argv)
617
argv++;
618
do {
619
if (!cp || streq(cp, "-"))
620
fmt.in = sfstdin;
621
else if (!(fmt.in = sfopen(NiL, cp, "r")))
622
{
623
error(ERROR_system(0), "%s: cannot open", cp);
624
error_info.errors = 1;
625
continue;
626
}
627
dofmt(&fmt);
628
if (fmt.in != sfstdin)
629
sfclose(fmt.in);
630
} while (cp = *argv++);
631
outline(&fmt);
632
if (sfsync(sfstdout))
633
error(ERROR_system(0), "write error");
634
return error_info.errors != 0;
635
}
636
637