Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
att
GitHub Repository: att/ast
Path: blob/master/src/cmd/sortlib/sum/sortsum.c
1810 views
1
/***********************************************************************
2
* *
3
* This software is part of the ast package *
4
* Copyright (c) 2003-2011 AT&T Intellectual Property *
5
* and is licensed under the *
6
* Eclipse Public License, Version 1.0 *
7
* by AT&T Intellectual Property *
8
* *
9
* A copy of the License is available at *
10
* http://www.eclipse.org/org/documents/epl-v10.html *
11
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
12
* *
13
* Information and Software Systems Research *
14
* AT&T Research *
15
* Florham Park NJ *
16
* *
17
* Glenn Fowler <[email protected]> *
18
* *
19
***********************************************************************/
20
#pragma prototyped
21
22
/*
23
* sort uniq summary discipline
24
*/
25
26
static const char usage[] =
27
"[-1lp0s5P?\n@(#)$Id: sum (AT&T Research) 2011-08-23 $\n]"
28
USAGE_LICENSE
29
"[+PLUGIN?sum - sort uniq summary discipline]"
30
"[+DESCRIPTION?The \bsum\b \bsort\b(1) discipline applies "
31
"summarization operations to selected fields in records that compare "
32
"equal. The discipline sets the \bsort\b \b--unique\b option. Summary "
33
"fields in non-unique records are modified according to the operations "
34
"specified in the \bop\b discipline option.]"
35
"[l:library?Load the \bdss\b(1) type library \alibrary\a. Types are used "
36
"by the \bop\b option. The \bnum_t\b library is loaded by default. \vdss "
37
"--plugin=man\v lists the information on all \bdss\b libraries and \vdss "
38
"--plugin=man\v \aname\a lists the information for the \aname\a "
39
"library.]:[library]"
40
"[o:op?A field summary operation. \aarg\a is a \bdss\b(1) type name for "
41
"all but the \bset\b \aop\a, either from the \bnum_t\b library or from a "
42
"library loaded by the \blibrary\b option. \atype\a may also contain one "
43
"or more \b:\b separated attributes. \akey\a is a \bsort\b(1) \b-k\b "
44
"style field specification. \aop\a\b:\b\aarg\a may be specified multiple "
45
"times; \aop\a and \aarg\a are inherited across \akey\a values from left "
46
"to right. The default type is native character set \binteger\b; some "
47
"operations may ignore the type. Spaces may be used in place of the "
48
"\b:\b. \aop\a may be one of:]:[op::[arg...]]::key[...]]]"
49
"{"
50
"[+max (M)?maximum value]"
51
"[+min (m)?minimum value]"
52
"[+average (a)?average value]"
53
"[+sum (s)?sum]"
54
"[+count (c)?multiply subsequent field values and increment the "
55
"total count by this value]"
56
"[+set (s)?set all field field bytes to the first character of "
57
"\aarg\a, which may be a C-style escape sequence]"
58
"}"
59
"[r:regress?Massage output for regression testing.]"
60
"[d:debug?List the field operations on the standard error.]"
61
"[+EXAMPLES]"
62
"{"
63
"[+sort -k.2.1 -lsum,op=sum::integer::.6.2?Sorts on the 1 byte "
64
"fixed width field starting at byte position 2 (counting from 1) "
65
"and computes the sum of the integers in the 2 byte fixed width "
66
"field starting at byte position 6.]"
67
"[+dlls --base dss | grep '_t$'?Lists the \bdss\b(1) type "
68
"library names.]"
69
"[+dss --plugin=man num_t?Lists the \bdss\b(1) \bnum_t\b type "
70
"library description in the \b--man\b style.]"
71
"}"
72
"[+SEE ALSO?\bdss\b(1), \bsort\b(1)]"
73
"\n\n--library=sum[,option[=value]...]\n\n"
74
;
75
76
#include <ast.h>
77
#include <ctype.h>
78
#include <ccode.h>
79
#include <dss.h>
80
#include <error.h>
81
#include <recsort.h>
82
#include <recfmt.h>
83
#include <vmalloc.h>
84
85
struct Library_s; typedef struct Library_s Library_t;
86
struct Summary_s; typedef struct Summary_s Summary_t;
87
88
struct Library_s
89
{
90
Library_t* next;
91
Cxtype_t* types;
92
};
93
94
typedef struct Position_s
95
{
96
short field;
97
short index;
98
} Position_t;
99
100
struct Summary_s
101
{
102
Summary_t* next;
103
Cxtype_t* type;
104
unsigned char* map;
105
unsigned char* pam;
106
Cxformat_t format;
107
Position_t beg;
108
Position_t end;
109
int op;
110
int set;
111
int fixed;
112
int width;
113
Sflong_t count;
114
Sfdouble_t value;
115
};
116
117
typedef struct Buffer_s
118
{
119
unsigned char* buf;
120
size_t siz;
121
} Buffer_t;
122
123
typedef struct State_s
124
{
125
Rsdisc_t disc;
126
Dss_t* dss;
127
Summary_t* sum;
128
Sflong_t records;
129
Recfmt_t fmt;
130
unsigned char* tab;
131
unsigned char delim[256];
132
int alt;
133
int regress;
134
Buffer_t tmp;
135
Buffer_t buf[2];
136
} State_t;
137
138
#define ASSURE(s,b,z) do{if(((b)->siz<(z))&&assure(s,b,z))return -1;}while(0)
139
140
static int
141
assure(State_t* state, Buffer_t* b, size_t z)
142
{
143
if (b->siz < z)
144
{
145
b->siz = roundof(z, 32);
146
if (!(b->buf = vmnewof(state->dss->vm, b->buf, unsigned char, b->siz, 0)))
147
{
148
error(ERROR_SYSTEM|3, "out of space extending to %I*u", sizeof(b->siz), b->siz);
149
return -1;
150
}
151
}
152
return 0;
153
}
154
155
static int
156
record(register State_t* state, register Rsobj_t* r, int op)
157
{
158
Cx_t* cx = state->dss->cx;
159
register Summary_t* sum;
160
register unsigned char* s;
161
register unsigned char* e;
162
register unsigned char* a;
163
register unsigned char* z;
164
register unsigned char* del;
165
register const unsigned char* map;
166
unsigned char* x;
167
unsigned char* tab;
168
Buffer_t* ext;
169
int beg;
170
int end;
171
int t;
172
int c;
173
size_t count;
174
size_t w;
175
size_t y;
176
ssize_t n;
177
Cxoperand_t v;
178
179
state->records++;
180
s = r->data;
181
e = s + r->datalen - (RECTYPE(state->fmt) == REC_delimited);
182
beg = end = 0;
183
count = 1;
184
tab = state->tab;
185
t = *tab++;
186
if (!*tab)
187
tab = 0;
188
del = state->delim;
189
for (sum = state->sum; sum; sum = sum->next)
190
{
191
while (beg < sum->beg.field)
192
{
193
tab1:
194
while (s < e)
195
if (del[*s++])
196
{
197
if (tab)
198
{
199
for (c = 0; (s + c) < e; c++)
200
if (!tab[c])
201
{
202
s += c;
203
break;
204
}
205
else if (tab[c] != s[c])
206
goto tab1;
207
}
208
else if (t == ' ')
209
while (s < e && del[*s])
210
s++;
211
break;
212
}
213
end = ++beg;
214
}
215
if (sum->beg.index < (e - s))
216
{
217
a = s + sum->beg.index;
218
while (end < sum->end.field)
219
{
220
tab2:
221
while (s < e)
222
if (del[*s++])
223
{
224
if (tab)
225
{
226
for (c = 0; (s + c) < e; c++)
227
if (!tab[c])
228
{
229
s += c;
230
break;
231
}
232
else if (tab[c] != s[c])
233
goto tab2;
234
}
235
else if (t == ' ')
236
while (s < e && del[*s])
237
s++;
238
break;
239
}
240
end++;
241
}
242
if (!sum->end.index)
243
{
244
tab3:
245
while (s < e)
246
if (del[*s++])
247
{
248
if (tab)
249
{
250
for (c = 0; (s + c) < e; c++)
251
if (!tab[c])
252
break;
253
else if (tab[c] != s[c])
254
goto tab3;
255
}
256
else if (t == ' ')
257
while (s < e && del[*s])
258
s++;
259
s--;
260
break;
261
}
262
z = s;
263
}
264
else if (sum->end.index <= (e - s))
265
z = s + sum->end.index;
266
else
267
z = a;
268
}
269
else
270
a = z = s;
271
w = z - a;
272
if (!sum->width)
273
sum->format.width = RECTYPE(state->fmt) == REC_fixed ? w : (!(sum->format.flags & CX_FLOAT) || sum->end.index || w >= 8) ? 0 : 8;
274
if (map = sum->map)
275
{
276
ASSURE(state, &state->tmp, w + 2);
277
for (x = state->tmp.buf; a < z; *a++ = map[*x++]);
278
map = sum->pam;
279
x = state->tmp.buf;
280
a -= w;
281
}
282
else
283
x = a;
284
if (sum->op == 'v' || (*sum->type->internalf)(cx, sum->type, NiL, &sum->format, &v, (char*)x, w, cx->rm, cx->disc) < 0)
285
v.value.number = 0;
286
else if (state->regress && (sum->format.flags & CX_FLOAT))
287
{
288
n = v.value.number * 1000.0;
289
n /= 10;
290
v.value.number = n;
291
}
292
if (op < 0)
293
{
294
sum->value = v.value.number;
295
sum->count = 1;
296
}
297
else
298
{
299
if (count != 1)
300
v.value.number *= count;
301
switch (sum->op)
302
{
303
case 'a':
304
sum->value += v.value.number;
305
sum->count += count;
306
break;
307
case 'c':
308
count = v.value.number;
309
continue;
310
case 'M':
311
if (sum->value < v.value.number)
312
sum->value = v.value.number;
313
break;
314
case 'm':
315
if (sum->value > v.value.number)
316
sum->value = v.value.number;
317
break;
318
case 's':
319
sum->value += v.value.number;
320
break;
321
}
322
if (op > 0)
323
{
324
v.value.number = sum->value;
325
switch (sum->op)
326
{
327
case 'a':
328
v.value.number /= sum->count;
329
break;
330
case 'v':
331
while (a < z)
332
*a++ = sum->set;
333
continue;
334
}
335
n = (RECTYPE(state->fmt) == REC_fixed || w < 7) ? 7 : w;
336
for (;;)
337
{
338
y = n + 1;
339
ASSURE(state, &state->tmp, y);
340
if ((n = (*sum->type->externalf)(cx, sum->type, NiL, &sum->format, &v.value, (char*)state->tmp.buf, y, cx->disc)) < 0)
341
{
342
error(2, "%s value %I*g conversion error", sum->type->name, sizeof(v.value.number), v.value.number);
343
return -1;
344
}
345
if (n < y)
346
break;
347
}
348
if (n > w)
349
{
350
if (sum->end.index || RECTYPE(state->fmt) == REC_fixed)
351
{
352
error(2, "%s value %I*g width exceeds %d", sum->type->name, sizeof(v.value.number), v.value.number, w);
353
return -1;
354
}
355
ext = &state->buf[state->alt = !state->alt];
356
ASSURE(state, ext, r->datalen + (n - w));
357
memcpy(ext->buf, r->data, a - r->data);
358
memcpy(ext->buf + (a - r->data) + n, a + w, r->datalen - (w + (a - r->data)));
359
s = ext->buf + (s - r->data);
360
a = ext->buf + (a - r->data);
361
z = ext->buf + (z - r->data) + (n - w);
362
r->data = ext->buf;
363
r->datalen += n - w;
364
e = s + r->datalen - (RECTYPE(state->fmt) == REC_delimited);
365
}
366
if (map)
367
{
368
if (n < w)
369
{
370
c = (sum->type->format.flags & CX_BINARY) ? 0 : map[' '];
371
while (n++ < w)
372
*a++ = c;
373
}
374
for (x = state->tmp.buf; a < z; *a++ = map[*x++]);
375
}
376
else
377
{
378
if (n < w)
379
{
380
c = (sum->type->format.flags & CX_BINARY) ? 0 : ' ';
381
while (n++ < w)
382
*a++ = c;
383
}
384
for (x = state->tmp.buf; a < z; *a++ = *x++);
385
}
386
}
387
}
388
}
389
return 0;
390
}
391
392
static int
393
summary(Rs_t* rs, int op, Void_t* data, Void_t* arg, Rsdisc_t* disc)
394
{
395
State_t* state = (State_t*)disc;
396
register Rsobj_t* r;
397
register Rsobj_t* q;
398
399
switch (op)
400
{
401
case RS_POP:
402
dssclose(state->dss);
403
break;
404
case RS_SUMMARY:
405
r = (Rsobj_t*)data;
406
for (op = -1, q = r->equal; q; op = 0, q = q->right)
407
if (record(state, q, op))
408
return -1;
409
if (record(state, r, 1))
410
return -1;
411
break;
412
default:
413
return -1;
414
}
415
return 0;
416
}
417
418
Rsdisc_t*
419
rs_disc(Rskey_t* key, const char* options)
420
{
421
register Summary_t* sum;
422
char* s;
423
char* t;
424
char* b;
425
char* loc;
426
State_t* state;
427
Cxtype_t* type;
428
Dss_t* dss;
429
Position_t* pos;
430
Summary_t* cur;
431
Summary_t* def;
432
Summary_t* prv;
433
int tok;
434
int n;
435
int debug;
436
char chr;
437
438
static Dssdisc_t disc;
439
440
dssinit(&disc, errorf);
441
if (!(dss = dssopen(0, 0, &disc, dssmeth("dss", &disc))))
442
return 0;
443
if (!(state = vmnewof(dss->vm, 0, State_t, 1, 0)))
444
error(ERROR_SYSTEM|3, "out of space");
445
state->dss = dss;
446
if (!dssload("num_t", dss->disc))
447
goto drop;
448
debug = 0;
449
if (options)
450
{
451
for (;;)
452
{
453
switch (optstr(options, usage))
454
{
455
case 0:
456
break;
457
case 'd':
458
debug = 1;
459
continue;
460
case 'l':
461
if (!dssload(opt_info.arg, dss->disc))
462
goto drop;
463
continue;
464
case 'o':
465
def = 0;
466
s = opt_info.arg;
467
for (;;)
468
{
469
while (*s == ':' || isspace(*s))
470
s++;
471
if (!*s)
472
break;
473
if (!(sum = vmnewof(dss->vm, 0, Summary_t, 1, 0)))
474
error(ERROR_SYSTEM|3, "out of space");
475
sum->beg.field = -1;
476
if (def)
477
{
478
sum->type = def->type;
479
sum->format = def->format;
480
sum->op = def->op;
481
sum->set = def->set;
482
}
483
else
484
sum->format.code = key->code;
485
def = sum;
486
b = s;
487
tok = 0;
488
/*UNDENT...*/
489
for (;;)
490
{
491
if (*s == '.' || isdigit(*s))
492
{
493
pos = 0;
494
while (*s == '.' || isdigit(*s))
495
{
496
if (!pos)
497
{
498
pos = &sum->beg;
499
loc = "begin";
500
}
501
else if (pos == &sum->beg)
502
{
503
pos = &sum->end;
504
loc = "end";
505
}
506
else
507
{
508
error(2, "%s: invalid summary field position", s);
509
goto drop;
510
}
511
if (*s == '.')
512
n = 1;
513
else
514
for (n = 0; *s >= '0' && *s <= '9'; n = n * 10 + (*s++ - '0'));
515
if ((pos->field = n - 1) < 0)
516
{
517
error(2, "%d: invalid summary field %s position", n, loc);
518
goto drop;
519
}
520
switch (*s)
521
{
522
case '.':
523
for (n = 0; *++s >= '0' && *s <= '9'; n = n * 10 + (*s - '0'));
524
if ((pos->index = n - 1) < 0)
525
{
526
error(2, "%d: invalid summary field %s offset", n, loc);
527
goto drop;
528
}
529
if (*s == '.')
530
{
531
n = 0;
532
if (pos == &sum->beg)
533
for (n = 0; *++s >= '0' && *s <= '9'; n = n * 10 + (*s - '0'));
534
if (n <= 0)
535
{
536
error(2, "%d: invalid summary field %s size", n, loc);
537
goto drop;
538
}
539
sum->end.field = sum->beg.field;
540
sum->end.index = sum->beg.index + n;
541
}
542
break;
543
case 'C':
544
s++;
545
switch (*s++)
546
{
547
case 'a':
548
n = CC_ASCII;
549
break;
550
case 'e':
551
n = CC_EBCDIC_E;
552
break;
553
case 'i':
554
n = CC_EBCDIC_I;
555
break;
556
case 'o':
557
n = CC_EBCDIC_O;
558
break;
559
case 'n':
560
n = CC_NATIVE;
561
break;
562
default:
563
error(2, "%s: invalid code set", s - 1);
564
goto drop;
565
}
566
switch (*s++)
567
{
568
case 'a':
569
n = CCOP(n, CC_ASCII);
570
break;
571
case 'e':
572
n = CCOP(n, CC_EBCDIC_E);
573
break;
574
case 'i':
575
n = CCOP(n, CC_EBCDIC_I);
576
break;
577
case 'o':
578
n = CCOP(n, CC_EBCDIC_O);
579
break;
580
case 'n':
581
n = CCOP(n, CC_NATIVE);
582
break;
583
default:
584
s--;
585
break;
586
}
587
if (n && n != CC_NATIVE && CCIN(n) != CCOUT(n))
588
sum->format.code = n;
589
break;
590
default:
591
if (isalpha(*s))
592
{
593
error(2, "%s: invalid summary field attribute", s);
594
goto drop;
595
}
596
break;
597
}
598
}
599
break;
600
}
601
switch (tok)
602
{
603
case 0:
604
switch (sum->op = *s++)
605
{
606
case 'a':
607
case 'c':
608
break;
609
case 'M':
610
if (*s == 'I')
611
sum->op = 'm';
612
break;
613
case 'm':
614
if (*s == 'a')
615
sum->op = 'M';
616
break;
617
case 's':
618
if (*s != 'e')
619
break;
620
sum->op = 'v';
621
/*FALLTHROUGH*/
622
case 'v':
623
t = s - 1;
624
while (isalnum(*s))
625
s++;
626
if (*s != ':' || !*++s)
627
{
628
error(2, "%s: summary field character value expected", t);
629
goto drop;
630
}
631
sum->set = chresc(s, &s);
632
break;
633
default:
634
error(2, "%s: invalid summary field operation", s - 1);
635
goto drop;
636
}
637
while (isalnum(*s))
638
s++;
639
tok++;
640
break;
641
case 1:
642
if (type = cxattr(dss->cx, s, &t, &sum->format, dss->cx->disc))
643
{
644
s = t;
645
sum->type = type;
646
sum->width = sum->format.width;
647
tok++;
648
break;
649
}
650
/*FALLTHROUGH*/
651
default:
652
error(2, "%s: invalid summary field specification", s);
653
goto drop;
654
}
655
while (*s == ':' || isspace(*s))
656
s++;
657
if (!*s)
658
break;
659
}
660
/*...INDENT*/
661
if (sum->beg.field < 0)
662
{
663
error(2, "%s: field position expected", b);
664
goto drop;
665
}
666
if (!sum->type)
667
sum->type = cxattr(dss->cx, "integer", NiL, &sum->format, dss->cx->disc);
668
for (prv = 0, cur = state->sum; cur; cur = (prv = cur)->next)
669
if (sum->beg.field < cur->beg.field || sum->beg.field == cur->beg.field && sum->end.field < cur->end.field)
670
break;
671
if (prv)
672
prv->next = sum;
673
else
674
state->sum = sum;
675
sum->next = cur;
676
}
677
continue;
678
case 'r':
679
state->regress = 1;
680
continue;
681
case '?':
682
error(ERROR_USAGE|4, "%s", opt_info.arg);
683
goto drop;
684
case ':':
685
error(2, "%s", opt_info.arg);
686
goto drop;
687
}
688
break;
689
}
690
}
691
key->type &= ~RS_DATA;
692
key->type |= RS_UNIQ;
693
state->fmt = key->disc->data;
694
if (!*key->tab || *key->tab == ' ')
695
{
696
state->tab = (unsigned char*)" ";
697
for (n = 0; n < elementsof(state->delim); n++)
698
if (isspace(n))
699
state->delim[n] = 1;
700
}
701
else
702
state->delim[*(state->tab = key->tab)] = 1;
703
state->disc.eventf = summary;
704
state->disc.events = RS_SUMMARY|RS_POP;
705
for (sum = state->sum; sum; sum = sum->next)
706
if (sum->format.code)
707
{
708
if (!CCCONVERT(sum->format.code))
709
{
710
if (sum->format.code == CC_NATIVE || (sum->type->format.flags & CX_BINARY))
711
sum->format.code = 0;
712
else
713
sum->format.code = CCOP(sum->format.code, CC_NATIVE);
714
}
715
if (sum->format.code)
716
{
717
sum->map = ccmap(CCIN(sum->format.code), CCOUT(sum->format.code));
718
sum->pam = ccmap(CCOUT(sum->format.code), CCIN(sum->format.code));
719
}
720
}
721
if (debug || key->verbose)
722
for (n = 1, sum = state->sum; sum; n++, sum = sum->next)
723
{
724
sfprintf(sfstderr, "op %d ", n);
725
if (sum->beg.field == sum->end.field)
726
sfprintf(sfstderr, ".%d.%d", sum->beg.index + 1, sum->end.index - sum->beg.index);
727
else
728
sfprintf(sfstderr, "%d.%d,%d.%d", sum->beg.field + 1, sum->beg.index + 1, sum->end.field + 1, sum->end.index);
729
sfprintf(sfstderr, " %c", sum->op);
730
if (sum->format.code)
731
sfprintf(sfstderr, " %d=>%d ", CCIN(sum->format.code), CCOUT(sum->format.code));
732
else
733
sfprintf(sfstderr, " ");
734
if (sum->op == 'v')
735
{
736
chr = sum->set;
737
sfprintf(sfstderr, "'%s'", fmtquote(&chr, NiL, "'", 1, 0));
738
}
739
else
740
sfprintf(sfstderr, "%s", sum->type->name);
741
sfprintf(sfstderr, "\n");
742
}
743
return &state->disc;
744
drop:
745
dssclose(dss);
746
return 0;
747
}
748
749
SORTLIB(sum)
750
751