Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
att
GitHub Repository: att/ast
Path: blob/master/src/lib/librecsort/rskey.c
1808 views
1
/***********************************************************************
2
* *
3
* This software is part of the ast package *
4
* Copyright (c) 1996-2012 AT&T Intellectual Property *
5
* and is licensed under the *
6
* Eclipse Public License, Version 1.0 *
7
* by AT&T Intellectual Property *
8
* *
9
* A copy of the License is available at *
10
* http://www.eclipse.org/org/documents/epl-v10.html *
11
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
12
* *
13
* Information and Software Systems Research *
14
* AT&T Research *
15
* Florham Park NJ *
16
* *
17
* Phong Vo <[email protected]> *
18
* Glenn Fowler <[email protected]> *
19
* *
20
***********************************************************************/
21
#pragma prototyped
22
/*
23
* Glenn Fowler
24
* AT&T Research
25
*
26
* rskey coding for recsort
27
*
28
* Doug McIlroy did the hard part here
29
* (and with regression tests too)
30
*/
31
32
#include "rskeyhdr.h"
33
34
#include <tm.h>
35
#include <hashpart.h>
36
37
#if _sys_resource && _lib_getrlimit
38
39
#include <times.h>
40
#include <sys/resource.h>
41
42
static size_t
43
datasize(void)
44
{
45
struct rlimit rlim;
46
47
getrlimit(RLIMIT_DATA, &rlim);
48
return rlim.rlim_cur;
49
}
50
51
#else
52
53
#define datasize() (size_t)(128*1024*1024)
54
55
#endif
56
57
/*
58
* Canonicalize the number string pointed to by dp, of length
59
* len. Put the result in kp.
60
*
61
* A field of length zero, or all blank, is regarded as 0.
62
* Over/underflow is rendered as huge or zero and properly signed.
63
* It happens 1e+-1022.
64
*
65
* Canonicalized strings may be compared as strings of unsigned
66
* chars. For good measure, a canonical string has no zero bytes.
67
*
68
* Syntax: optionally signed floating point, with optional
69
* leading spaces. A syntax deviation ends the number.
70
*
71
* Form of output: packed in 4-bit nibbles. First
72
* 3 nibbles count the number N of significant digits
73
* before the decimal point. The quantity actually stored
74
* is 2048+sign(x)*(N+1024). Further nibbles contain
75
* 1 decimal digit d each, stored as d+2 if x is positive
76
* and as 10-d if x is negative. Leading and trailing
77
* zeros are stripped, and a trailing "digit" d = -1
78
* is appended. (The trailing digit handled like all others,
79
* so encodes as 1 or 0xb according to the sign of x.)
80
* An odd number of nibbles is padded with zero.
81
*
82
* Buglet: overflow is reported if output is exactly filled.
83
*/
84
85
#define encode(x) (neg?(10-(x)):((x)+2))
86
#define putdig(x) (nib?(*dig=encode(x)<<4,nib=0):(*dig++|=encode(x),nib=1))
87
88
static ssize_t
89
#if __STD_C
90
key_n_code(Rskey_t* kp, Rskeyfield_t* f, unsigned char* dp, size_t len, unsigned char* cp, unsigned char* zp)
91
#else
92
key_n_code(kp, f, dp, len, cp, zp)
93
Rskey_t* kp;
94
Rskeyfield_t* f;
95
unsigned char* dp;
96
size_t len;
97
unsigned char* cp;
98
unsigned char* zp;
99
#endif
100
{
101
unsigned char* dig = cp + 1; /* byte for next digit */
102
int nib = 0; /* high nibble 1, low nibble 0 */
103
unsigned char* xp = dp;
104
unsigned char* ep = xp + len; /* end pointer */
105
unsigned char* trans = f->trans;
106
int zeros = 0; /* count zeros seen but not installed */
107
int sigdig = 1024;
108
int neg = f->rflag; /* 0 for +, 1 for - */
109
int decimal = 0;
110
int n;
111
int inv;
112
113
cp[1] = 0;
114
115
/*
116
* eat blanks
117
*/
118
119
while (xp < ep && blank(trans[*xp])) xp++;
120
121
/*
122
* eat sign
123
*/
124
125
if (xp < ep)
126
switch (trans[*xp])
127
{
128
case '-':
129
neg ^= 1;
130
/*FALLTHROUGH*/
131
case '+':
132
xp++;
133
break;
134
}
135
136
/*
137
* eat leading zeros
138
*/
139
140
while (xp < ep && trans[*xp] == '0') xp++;
141
if (xp < ep && trans[*xp] == '.')
142
{
143
decimal++;
144
for (xp++; xp < ep && trans[*xp] == '0'; xp++)
145
sigdig--;
146
}
147
if (xp >= ep || trans[*xp] > '9' || trans[*xp] < '0')
148
{
149
/*
150
* no significant digit
151
*/
152
153
sigdig = 0;
154
neg = 0;
155
goto retzero;
156
}
157
for (; xp < ep; xp++)
158
{
159
switch (trans[*xp])
160
{
161
case '.':
162
if (decimal)
163
goto out;
164
decimal++;
165
continue;
166
case '0':
167
zeros++;
168
if (!decimal)
169
sigdig++;
170
continue;
171
case '1': case '2': case '3': case '4': case '5':
172
case '6': case '7': case '8': case '9':
173
for (; zeros > 0; zeros--)
174
putdig(0);
175
n = trans[*xp] - '0';
176
putdig(n);
177
if (!decimal)
178
sigdig++;
179
continue;
180
case 'k':
181
case 'K':
182
if (f->flag == 'h')
183
sigdig += 3;
184
goto out;
185
case 'M':
186
if (f->flag == 'h')
187
sigdig += 6;
188
goto out;
189
case 'G':
190
if (f->flag == 'h')
191
sigdig += 9;
192
goto out;
193
case 'T':
194
if (f->flag == 'h')
195
sigdig += 12;
196
goto out;
197
case 'P':
198
if (f->flag == 'h')
199
sigdig += 15;
200
goto out;
201
case 'E':
202
if (f->flag == 'h')
203
{
204
sigdig += 18;
205
goto out;
206
}
207
/*FALLTHROUGH*/
208
case 'e':
209
if (f->flag != 'g')
210
goto out;
211
inv = 1;
212
if (xp < ep)
213
switch(trans[*++xp])
214
{
215
case '-':
216
inv = -1;
217
/*FALLTHROUGH*/
218
case '+':
219
xp++;
220
break;
221
}
222
if (xp >= ep || trans[*xp] > '9' || trans[*xp] < '0')
223
goto out;
224
for (n = 0; xp < ep; xp++)
225
{
226
int c = trans[*xp];
227
228
if (c < '0' || c > '9')
229
break;
230
if ((n = 10 * n + c - '0') >= 0)
231
continue;
232
sigdig = 2047 * inv;
233
goto out;
234
}
235
sigdig += n * inv;
236
goto out;
237
default:
238
goto out;
239
}
240
}
241
out:
242
if (sigdig < 0 || sigdig >= 2047)
243
{
244
sigdig = sigdig < 0 ? 0 : 2047;
245
if (kp->keydisc->errorf)
246
(*kp->keydisc->errorf)(kp, kp->keydisc, 1, "%-.*s: numeric field overflow", len, dp);
247
dig = cp + 1;
248
*dig = 0;
249
nib = 0;
250
}
251
retzero:
252
if (neg)
253
sigdig = 2048 - sigdig;
254
else
255
sigdig = 2048 + sigdig;
256
cp[0] = sigdig >> 4;
257
cp[1] |= sigdig << 4;
258
putdig(-1);
259
return dig - cp + 1 - nib;
260
}
261
262
/*
263
* packed decimal (bcd)
264
*/
265
266
static ssize_t
267
#if __STD_C
268
key_p_code(Rskey_t* kp, Rskeyfield_t* f, unsigned char* dp, size_t len, unsigned char* cp, unsigned char* zp)
269
#else
270
key_p_code(kp, f, dp, len, cp, zp)
271
Rskey_t* kp;
272
Rskeyfield_t* f;
273
unsigned char* dp;
274
size_t len;
275
unsigned char* cp;
276
unsigned char* zp;
277
#endif
278
{
279
unsigned char* dig = cp + 1; /* byte for next digit */
280
int nib = 0; /* high nibble 1, low nibble 0 */
281
unsigned char* xp = dp;
282
unsigned char* ep = xp + len; /* end pointer */
283
unsigned char* trans = f->trans;
284
int sigdig = 1024;
285
int neg = f->rflag; /* 0 for +, 1 for - */
286
int n;
287
int c;
288
289
cp[1] = 0;
290
291
/*
292
* sign
293
*/
294
295
if ((trans[*(ep - 1)] & 0xF) == 0xD)
296
neg ^= 1;
297
while (xp < ep)
298
{
299
c = trans[*xp++];
300
n = (c >> 4) & 0xF;
301
putdig(n);
302
sigdig++;
303
n = c & 0xF;
304
if (n > 0x9)
305
break;
306
putdig(n);
307
sigdig++;
308
}
309
if (sigdig >= 2047)
310
{
311
sigdig = 2047;
312
if (kp->keydisc->errorf)
313
(*kp->keydisc->errorf)(kp, kp->keydisc, 1, "%-.*s: numeric field overflow", dp);
314
dig = cp + 1;
315
*dig = 0;
316
nib = 0;
317
}
318
if (neg)
319
sigdig = 2048 - sigdig;
320
else
321
sigdig = 2048 + sigdig;
322
cp[0] = sigdig >> 4;
323
cp[1] |= sigdig << 4;
324
putdig(-1);
325
return dig - cp + 1 - nib;
326
}
327
328
/*
329
* zoned decimal
330
*/
331
332
static ssize_t
333
#if __STD_C
334
key_z_code(Rskey_t* kp, Rskeyfield_t* f, unsigned char* dp, size_t len, unsigned char* cp, unsigned char* zp)
335
#else
336
key_z_code(kp, f, dp, len, cp, zp)
337
Rskey_t* kp;
338
Rskeyfield_t* f;
339
unsigned char* dp;
340
size_t len;
341
unsigned char* cp;
342
unsigned char* zp;
343
#endif
344
{
345
unsigned char* dig = cp + 1; /* byte for next digit */
346
int nib = 0; /* high nibble 1, low nibble 0 */
347
unsigned char* xp = dp;
348
unsigned char* ep = xp + len; /* end pointer */
349
unsigned char* trans = f->trans;
350
int sigdig = 1024;
351
int neg = f->rflag; /* 0 for +, 1 for - */
352
int n;
353
int c;
354
355
cp[1] = 0;
356
357
/*
358
* sign
359
*/
360
361
switch (trans[*(ep - 1)] & 0xF0)
362
{
363
case 0x70: /* ascii */
364
case 0xB0: /* ebcdic alternate */
365
case 0xD0: /* ebcdic preferred */
366
neg ^= 1;
367
break;
368
}
369
while (xp < ep)
370
{
371
c = trans[*xp++];
372
n = c & 0xF;
373
putdig(n);
374
sigdig++;
375
}
376
if (sigdig >= 2047)
377
{
378
sigdig = 2047;
379
if (kp->keydisc->errorf)
380
(*kp->keydisc->errorf)(kp, kp->keydisc, 1, "%-.*s: numeric field overflow", dp);
381
dig = cp + 1;
382
*dig = 0;
383
nib = 0;
384
}
385
if (neg)
386
sigdig = 2048 - sigdig;
387
else
388
sigdig = 2048 + sigdig;
389
cp[0] = sigdig >> 4;
390
cp[1] |= sigdig << 4;
391
putdig(-1);
392
return dig - cp + 1 - nib;
393
}
394
395
/*
396
* random shuffle
397
*/
398
399
static ssize_t
400
#if __STD_C
401
key_j_code(Rskey_t* kp, Rskeyfield_t* f, unsigned char* dp, size_t len, unsigned char* cp, unsigned char* zp)
402
#else
403
key_j_code(kp, f, dp, len, cp, zp)
404
Rskey_t* kp;
405
Rskeyfield_t* f;
406
unsigned char* dp;
407
size_t len;
408
unsigned char* cp;
409
unsigned char* zp;
410
#endif
411
{
412
unsigned char* xp = cp;
413
int c;
414
415
while (len--)
416
{
417
c = *dp++;
418
HASHPART(kp->shuffle, c);
419
*xp++ = (kp->shuffle >> 4) & 0xff;
420
}
421
return xp - cp;
422
}
423
424
/*
425
* Encode text field subject to options -r -fdi -b.
426
* Fields are separated by 0 (or 255 if rflag is set)
427
* the anti-ambiguity stuff prevents such codes from
428
* happening otherwise by coding real zeros and ones
429
* as 0x0101 and 0x0102, and similarly for complements
430
*/
431
432
static ssize_t
433
#if __STD_C
434
key_t_code(Rskey_t* kp, Rskeyfield_t* f, unsigned char* dp, size_t len, unsigned char* cp, unsigned char* zp)
435
#else
436
key_t_code(kp, f, dp, len, cp, zp)
437
Rskey_t* kp;
438
Rskeyfield_t* f;
439
unsigned char* dp;
440
size_t len;
441
unsigned char* cp;
442
unsigned char* zp;
443
#endif
444
{
445
unsigned char* xp = cp;
446
int c;
447
int i;
448
int n;
449
int m;
450
unsigned char* keep = f->keep;
451
unsigned char* trans = f->trans;
452
unsigned char* bp;
453
int reverse = f->rflag ? ~0: 0;
454
455
if (kp->xfrmbuf && len)
456
{
457
n = ((len + 1) * 4);
458
for (;;)
459
{
460
if (kp->xfrmsiz < n)
461
{
462
kp->xfrmsiz = n = roundof(n, 256);
463
if (!(kp->xfrmbuf = vmnewof(Vmheap, kp->xfrmbuf, unsigned char, n, 0)))
464
{
465
if (kp->keydisc->errorf)
466
(*kp->keydisc->errorf)(kp, kp->keydisc, 1, "%-.*s: multibyte field overflow -- falling back to native collation", dp);
467
goto native;
468
}
469
}
470
bp = kp->xfrmbuf;
471
for (i = 0; i < len; i++)
472
if (keep[c = dp[i]])
473
*bp++ = trans[c];
474
*bp++ = 0;
475
m = kp->xfrmsiz - (bp - kp->xfrmbuf);
476
if ((n = mbxfrm(bp, kp->xfrmbuf, m)) < m)
477
{
478
dp = bp;
479
break;
480
}
481
n += n - m + (bp - kp->xfrmbuf);
482
}
483
bp = dp;
484
m = 0;
485
while (--n >= 0)
486
{
487
c = *dp++;
488
if (c <= 1)
489
{
490
/*
491
* anti-ambiguity
492
*/
493
494
if (xp < zp)
495
*xp++ = 1 ^ reverse;
496
else
497
m++;
498
c++;
499
}
500
else if (c >= 254)
501
{
502
if (xp < zp)
503
*xp++ = 255 ^ reverse;
504
else
505
m++;
506
c--;
507
}
508
if (xp < zp)
509
*xp++ = c ^ reverse;
510
else
511
m++;
512
}
513
if (m)
514
{
515
if (kp->keydisc->errorf)
516
(*kp->keydisc->errorf)(kp, kp->keydisc, 1, "key coder collation overflow (%d/%I*u) -- falling back to native collation", m, sizeof(zp - cp), zp - cp);
517
dp = bp;
518
goto native;
519
}
520
}
521
else
522
{
523
native:
524
while (len-- > 0)
525
{
526
c = *dp++;
527
if (keep[c])
528
{
529
c = trans[c];
530
if (c <= 1)
531
{
532
/*
533
* anti-ambiguity
534
*/
535
536
*xp++ = 1 ^ reverse;
537
c++;
538
}
539
else if (c >= 254)
540
{
541
*xp++ = 255 ^ reverse;
542
c--;
543
}
544
*xp++ = c ^ reverse;
545
}
546
}
547
}
548
*xp++ = reverse;
549
return xp - cp;
550
}
551
552
static ssize_t
553
#if __STD_C
554
key_m_code(Rskey_t* kp, Rskeyfield_t* f, unsigned char* dp, size_t len, unsigned char* cp, unsigned char* zp)
555
#else
556
key_m_code(kp, f, dp, len, cp, zp)
557
Rskey_t* kp;
558
Rskeyfield_t* f;
559
unsigned char* dp;
560
size_t len;
561
unsigned char* cp;
562
unsigned char* zp;
563
#endif
564
{
565
register int c;
566
int j = -1;
567
int i;
568
unsigned char* mp;
569
unsigned char* trans = f->trans;
570
char** month = (char**)f->data;
571
572
for (; len > 0 && blank(trans[*dp]); dp++, len--);
573
if (len > 0)
574
while (++j < 12)
575
{
576
mp = (unsigned char*)month[j];
577
for (i = 0; mp[i] && i < len; i++)
578
{
579
c = trans[dp[i]];
580
if (c != mp[i])
581
{
582
if (isupper(c))
583
c = tolower(c);
584
else if (islower(c))
585
c = toupper(c);
586
else
587
break;
588
if (c != mp[i])
589
break;
590
}
591
}
592
if (!mp[i])
593
break;
594
}
595
*cp = j >= 12 ? 0 : j + 1;
596
if (f->rflag)
597
*cp ^= ~0;
598
return 1;
599
}
600
601
/*
602
* the recsort defkeyf
603
* return encoded key for dat,datlen in key,keylen
604
*/
605
606
static ssize_t
607
#if __STD_C
608
code(Rs_t* rs, unsigned char* dat, size_t datlen, unsigned char* key, size_t keylen, Rsdisc_t* disc)
609
#else
610
code(rs, dat, datlen, key, keylen, disc)
611
Rs_t* rs;
612
unsigned char* dat;
613
size_t datlen;
614
unsigned char* key;
615
size_t keylen;
616
Rsdisc_t* disc;
617
#endif
618
{
619
Rskey_t* kp = rs ? rs->key : (Rskey_t*)((char*)disc - sizeof(Rskey_t));
620
unsigned char* cp;
621
Rskeyfield_t* fp;
622
unsigned char* ep;
623
unsigned char* op = key;
624
unsigned char* zp = key + keylen;
625
unsigned char* xp = dat + datlen;
626
unsigned char* tp;
627
int n;
628
int t;
629
int np;
630
int m = kp->field.maxfield;
631
unsigned char** pp = kp->field.positions;
632
633
pp[0] = dat;
634
np = 1;
635
switch (t = kp->tab[0])
636
{
637
case 0:
638
for (cp = dat; cp < xp && np < m;)
639
{
640
while (cp < xp && blank(*cp))
641
cp++;
642
while (cp < xp && !blank(*cp))
643
cp++;
644
pp[np++] = cp;
645
}
646
break;
647
case '\n':
648
break;
649
default:
650
tp = kp->tab[1] ? (kp->tab + 1) : 0;
651
for (cp = dat; cp < xp && np < m;)
652
if (*cp++ == t)
653
{
654
if (!tp)
655
pp[np++] = cp;
656
else
657
for (n = 0; (cp + n) < xp; n++)
658
if (!tp[n])
659
{
660
pp[np++] = cp + n;
661
break;
662
}
663
else if (tp[n] != cp[n])
664
break;
665
}
666
break;
667
}
668
for (fp = kp->head; fp; fp = fp->next)
669
{
670
n = fp->begin.field;
671
if (n < np)
672
{
673
cp = pp[n];
674
if (fp->bflag && kp->field.global.next)
675
while (cp < xp && blank(*cp))
676
cp++;
677
cp += fp->begin.index;
678
if (cp > xp)
679
cp = xp;
680
}
681
else
682
cp = xp;
683
n = fp->end.field;
684
if (n < np)
685
{
686
if (fp->end.index < 0)
687
{
688
if (n >= np - 1)
689
ep = xp;
690
else
691
{
692
ep = pp[n + 1];
693
if (t)
694
ep--;
695
}
696
}
697
else
698
{
699
ep = pp[n];
700
if (fp->eflag)
701
while(ep < xp && blank(*ep))
702
ep++;
703
ep += fp->end.index;
704
}
705
if (ep > xp)
706
ep = xp;
707
else if (ep < cp)
708
ep = cp;
709
}
710
else
711
ep = xp;
712
op += (*fp->coder)(kp, fp, cp, ep - cp, op, zp);
713
}
714
return op - key;
715
}
716
717
/*
718
* conflict message
719
*/
720
721
static void
722
#if __STD_C
723
conflict(Rskey_t* kp, int c)
724
#else
725
conflict(kp, c)
726
Rskey_t* kp;
727
int c;
728
#endif
729
{
730
(*kp->keydisc->errorf)(kp, kp->keydisc, 1, "%c: key type conflicts with previous value", c);
731
}
732
733
/*
734
* nice band
735
*/
736
737
static int
738
#if __STD_C
739
checkfield(Rskey_t* kp, Rskeyfield_t* fp, const char* key, int c)
740
#else
741
checkfield(kp, fp, key, c)
742
Rskey_t* kp;
743
Rskeyfield_t* fp;
744
char* key;
745
int c;
746
#endif
747
{
748
if (c || fp->begin.field < 0 || fp->end.field < 0 || fp->begin.index < 0 || fp->end.index < -1)
749
{
750
if (kp->keydisc->errorf)
751
{
752
if (key)
753
(*kp->keydisc->errorf)(kp, kp->keydisc, 2, "%s: invalid key field specification", key);
754
else
755
(*kp->keydisc->errorf)(kp, kp->keydisc, 2, "field[%d]: invalid key field specification", fp->index);
756
}
757
kp->keydisc->flags |= RSKEY_ERROR;
758
return -1;
759
}
760
if (kp->keydisc->errorf && fp->coder == key_n_code && fp->keep)
761
(*kp->keydisc->errorf)(kp, kp->keydisc, 1, "n: key type conflicts with d,i");
762
return 0;
763
}
764
765
/*
766
* add coding function
767
*/
768
769
static void
770
#if __STD_C
771
addcoder(Rskey_t* kp, Rskeyfield_t* fp, Rskeycode_f np, int c, int b)
772
#else
773
addcoder(kp, fp, np, c, b)
774
Rskey_t* kp;
775
Rskeyfield_t* fp;
776
Rskeycode_f np;
777
int c;
778
int b;
779
#endif
780
{
781
NoP(kp);
782
if (kp->keydisc->errorf && fp->coder && fp->coder != np)
783
conflict(kp, c);
784
fp->coder = np;
785
fp->flag = c;
786
fp->binary = b;
787
}
788
789
/*
790
* add translation table
791
*/
792
793
static void
794
#if __STD_C
795
addtable(Rskey_t* kp, int c, unsigned char** op, unsigned char* np)
796
#else
797
addtable(kp, c, op, np)
798
Rskey_t* kp;
799
int c;
800
unsigned char** op;
801
unsigned char* np;
802
#endif
803
{
804
NoP(kp);
805
if (kp->keydisc->errorf && *op && *op != np)
806
conflict(kp, c);
807
*op = np;
808
}
809
810
/*
811
* add a sort key field option c
812
*/
813
814
static int
815
#if __STD_C
816
addopt(Rskey_t* kp, register Rskeyfield_t* fp, register char* s, int end)
817
#else
818
addopt(kp, fp, s, end)
819
Rskey_t* kp;
820
register Rskeyfield_t* fp;
821
register char* s;
822
int end;
823
#endif
824
{
825
char* b = s;
826
char* e;
827
int c;
828
int x;
829
830
switch (c = *s++)
831
{
832
case 0:
833
return 0;
834
case 'a':
835
if (!fp->aflag)
836
{
837
fp->aflag = 1;
838
if (!kp->field.prev)
839
{
840
if (kp->keydisc->errorf)
841
(*kp->keydisc->errorf)(kp, kp->keydisc, 2, "field[%d]: global accumulate invalid", fp->index);
842
kp->keydisc->flags |= RSKEY_ERROR;
843
return 0;
844
}
845
(kp->tail = kp->field.prev)->next = 0;
846
kp->field.prev = 0;
847
if (kp->accumulate.tail)
848
kp->accumulate.tail->next = fp;
849
else
850
kp->accumulate.head = kp->accumulate.tail = fp;
851
}
852
return s - b;
853
case 'b':
854
if (end)
855
fp->eflag = 1;
856
else
857
fp->bflag = 1;
858
return s - b;
859
case 'd':
860
addtable(kp, c, &fp->keep, kp->state->dict);
861
break;
862
case 'E':
863
switch (*s++)
864
{
865
case 'a':
866
x = CC_ASCII;
867
break;
868
case 'e':
869
x = CC_EBCDIC_E;
870
break;
871
case 'i':
872
x = CC_EBCDIC_I;
873
break;
874
case 'o':
875
x = CC_EBCDIC_O;
876
break;
877
case 'x':
878
x = CC_NATIVE;
879
break;
880
default:
881
if (kp->keydisc->errorf)
882
(*kp->keydisc->errorf)(kp, kp->keydisc, 2, "field[%d]: %s: invalid code set", fp->index, s - 1);
883
kp->keydisc->flags |= RSKEY_ERROR;
884
return 0;
885
}
886
if (*s == ':')
887
s++;
888
switch (*s++)
889
{
890
case 'a':
891
x = CCOP(x, CC_ASCII);
892
break;
893
case 'e':
894
x = CCOP(x, CC_EBCDIC_E);
895
break;
896
case 'i':
897
x = CCOP(x, CC_EBCDIC_I);
898
break;
899
case 'o':
900
x = CCOP(x, CC_EBCDIC_O);
901
break;
902
case 'x':
903
x = CCOP(x, CC_NATIVE);
904
break;
905
default:
906
s--;
907
break;
908
}
909
if (x != CC_NATIVE && CCIN(x) != CCOUT(x))
910
{
911
fp->code = x;
912
if (fp == kp->head)
913
kp->code = fp->code;
914
}
915
return s - b;
916
case 'f':
917
addtable(kp, c, &fp->trans, kp->state->fold);
918
break;
919
case 'g':
920
case 'n':
921
addcoder(kp, fp, key_n_code, c, 0);
922
break;
923
case 'h':
924
addcoder(kp, fp, key_n_code, c, 0);
925
break;
926
case 'i':
927
addtable(kp, c, &fp->keep, kp->state->print);
928
break;
929
case 'J':
930
kp->shuffle = strtoul(s, &e, 0);
931
s = e;
932
if (!kp->shuffle)
933
kp->shuffle = (unsigned long)time(NiL) * (unsigned long)getpid();
934
addcoder(kp, fp, key_j_code, c, 0);
935
break;
936
case 'M':
937
tminit(NiL);
938
fp->data = tm_info.format + TM_MONTH_ABBREV;
939
addcoder(kp, fp, key_m_code, c, 0);
940
break;
941
case 'p':
942
addcoder(kp, fp, key_p_code, c, 1);
943
break;
944
case 'r':
945
fp->rflag = 1;
946
return s - b;
947
case 'Z':
948
addcoder(kp, fp, key_z_code, c, 1);
949
break;
950
default:
951
return 0;
952
}
953
kp->coded = 1;
954
if (kp->keydisc->errorf && fp != kp->tail)
955
(*kp->keydisc->errorf)(kp, kp->keydisc, 1, "field spec precedes global option %c", c);
956
return s - b;
957
}
958
959
/*
960
* add sort key options in s
961
* all!=0 applies to all fields,
962
* otherwise the current field
963
*/
964
965
int
966
#if __STD_C
967
rskeyopt(Rskey_t* kp, const char* key, int all)
968
#else
969
rskeyopt(kp, key, all)
970
Rskey_t* kp;
971
char* key;
972
int all;
973
#endif
974
{
975
register Rskeyfield_t* fp;
976
register int i;
977
char* s;
978
979
fp = all ? kp->head : kp->tail;
980
s = (char*)key;
981
while (i = addopt(kp, fp, s, 0))
982
s += i;
983
if (fp->standard && (*s == ',' || *s == ' '))
984
{
985
s++;
986
if ((fp->end.field = (int)strtol(s, (char**)&s, 10) - 1) > kp->field.maxfield)
987
kp->field.maxfield = fp->end.field;
988
if (*s == '.' && !(fp->end.index = (int)strtol(s + 1, &s, 10)))
989
fp->end.index = -1;
990
while (i = addopt(kp, fp, s, 1))
991
s += i;
992
}
993
return checkfield(kp, fp, key, *s);
994
}
995
996
/*
997
* add a sort key
998
*/
999
1000
int
1001
#if __STD_C
1002
rskey(Rskey_t* kp, const char* key, int obsolete)
1003
#else
1004
rskey(kp, key, obsolete)
1005
Rskey_t* kp;
1006
char* key;
1007
int obsolete;
1008
#endif
1009
{
1010
register Rskeyfield_t* fp;
1011
int o;
1012
int n;
1013
int standard;
1014
char* s;
1015
char* t;
1016
char buf[32];
1017
1018
kp->keydisc->flags |= RSKEY_KEYS;
1019
s = (char*)key;
1020
if (*s == '.')
1021
{
1022
n = (int)strtol(s + 1, &t, 10);
1023
if (!*t)
1024
{
1025
if (n != kp->fixed && kp->fixed)
1026
{
1027
if (kp->keydisc->errorf)
1028
(*kp->keydisc->errorf)(kp, kp->keydisc, 2, "%s: fixed record length mismatch -- %d expected", key, kp->fixed);
1029
kp->keydisc->flags |= RSKEY_ERROR;
1030
return -1;
1031
}
1032
kp->fixed = n;
1033
kp->disc->data = REC_F_TYPE(n);
1034
return 0;
1035
}
1036
}
1037
n = (int)strtol(s, &t, 10);
1038
if (s == t)
1039
n = *s != ':';
1040
else
1041
s = t;
1042
if ((standard = !obsolete) && *s == ':')
1043
{
1044
if (n)
1045
{
1046
if (n != kp->fixed && kp->fixed)
1047
{
1048
if (kp->keydisc->errorf)
1049
(*kp->keydisc->errorf)(kp, kp->keydisc, 2, "%s: fixed record key length mismatch -- %d expected", key, kp->fixed);
1050
kp->keydisc->flags |= RSKEY_ERROR;
1051
return -1;
1052
}
1053
kp->fixed = n;
1054
kp->disc->data = REC_F_TYPE(n);
1055
}
1056
if (!*++s)
1057
return 0;
1058
n = (int)strtol(s, &s, 10);
1059
o = *s == ':' ? (int)(strtol(s + 1, &s, 10) + 1) : 1;
1060
if (kp->fixed && (o + n) > kp->fixed)
1061
{
1062
if (kp->keydisc->errorf)
1063
(*kp->keydisc->errorf)(kp, kp->keydisc, 2, "%s: fixed field exceeds record length %d", key, kp->fixed);
1064
kp->keydisc->flags |= RSKEY_ERROR;
1065
return -1;
1066
}
1067
key = (const char*)(s = buf);
1068
sfsprintf(s, sizeof(buf), ".%d,1.%d", o, o + n - 1);
1069
n = 1;
1070
}
1071
if (obsolete == '-')
1072
{
1073
if (!kp->field.global.next && rskey(kp, "0", 1))
1074
return -1;
1075
s = (char*)key;
1076
if ((kp->tail->end.field = *s == '.' ? kp->tail->begin.field : (int)strtol(s, &s, 10)) > kp->field.maxfield)
1077
kp->field.maxfield = kp->tail->end.field;
1078
if (*s == '.')
1079
kp->tail->end.index = (int)strtol(s + 1, &s, 10);
1080
else
1081
kp->tail->end.field--;
1082
if (!kp->tail->end.index)
1083
kp->tail->end.index = -1;
1084
}
1085
else if (!(fp = vmnewof(Vmheap, 0, Rskeyfield_t, 1, 0)))
1086
{
1087
if (kp->keydisc->errorf)
1088
(*kp->keydisc->errorf)(kp, kp->keydisc, 2, "out of space [field]");
1089
kp->keydisc->flags |= RSKEY_ERROR;
1090
return -1;
1091
}
1092
else
1093
{
1094
fp->index = ++kp->field.index;
1095
kp->field.prev = kp->tail;
1096
kp->tail = kp->tail->next = fp;
1097
fp->bflag = fp->eflag = 0;
1098
fp->standard = standard;
1099
if ((fp->begin.field = n - fp->standard) > kp->field.maxfield)
1100
kp->field.maxfield = fp->begin.field;
1101
fp->end.field = MAXFIELD;
1102
fp->code = kp->head->code;
1103
if (*s == '.')
1104
{
1105
fp->begin.index = (int)strtol(s + 1, &s, 10) - fp->standard;
1106
if (*s == '.')
1107
{
1108
fp->end.field = fp->begin.field;
1109
fp->end.index = fp->begin.index + (int)strtol(s + 1, &s, 10);
1110
}
1111
}
1112
}
1113
return *s ? rskeyopt(kp, s, 0) : 0;
1114
}
1115
1116
/*
1117
* set up field character transform
1118
*/
1119
1120
static int
1121
#if __STD_C
1122
transform(Rskey_t* kp, register Rskeyfield_t* fp)
1123
#else
1124
transform(kp, fp)
1125
Rskey_t* kp;
1126
register Rskeyfield_t* fp;
1127
#endif
1128
{
1129
register unsigned char* m;
1130
register unsigned char* t;
1131
register unsigned char* x;
1132
register int c;
1133
1134
if (fp->code)
1135
{
1136
if (fp->binary)
1137
{
1138
if (CCCONVERT(fp->code))
1139
fp->trans = ccmap(fp->code, 0);
1140
}
1141
else if (m = ccmap(fp->code, CC_NATIVE))
1142
{
1143
if (!fp->trans)
1144
fp->trans = m;
1145
else if (!(x = vmnewof(Vmheap, 0, unsigned char, UCHAR_MAX, 1)))
1146
{
1147
if (kp->keydisc->errorf)
1148
(*kp->keydisc->errorf)(kp, kp->keydisc, 2, "out of space");
1149
kp->keydisc->flags |= RSKEY_ERROR;
1150
return -1;
1151
}
1152
else
1153
{
1154
t = fp->trans;
1155
for (c = 0; c <= UCHAR_MAX; c++)
1156
x[c] = t[m[c]];
1157
fp->trans = x;
1158
fp->freetrans = 1;
1159
}
1160
}
1161
}
1162
if (!fp->trans)
1163
fp->trans = kp->state->ident;
1164
return 0;
1165
}
1166
1167
/*
1168
* initialize key info after all rskey() calls
1169
*/
1170
1171
int
1172
#if __STD_C
1173
rskeyinit(register Rskey_t* kp)
1174
#else
1175
rskeyinit(kp)
1176
register Rskey_t* kp;
1177
#endif
1178
{
1179
register long n;
1180
register Rskeyfield_t* fp;
1181
long m;
1182
size_t z;
1183
1184
static char* in[] = { "-", 0 };
1185
1186
/*
1187
* finalize the fields
1188
*/
1189
1190
if (checkfield(kp, kp->tail, NiL, 0))
1191
return -1;
1192
fp = kp->head;
1193
if (!fp->coder)
1194
{
1195
fp->coder = key_t_code;
1196
fp->flag = 't';
1197
}
1198
if (transform(kp, fp))
1199
return -1;
1200
if (!fp->keep)
1201
fp->keep = kp->state->all;
1202
if (fp->rflag)
1203
{
1204
fp->rflag = 0;
1205
kp->type |= RS_REVERSE;
1206
}
1207
kp->code = fp->code;
1208
while (fp = fp->next)
1209
{
1210
n = 0;
1211
if (!fp->coder)
1212
{
1213
fp->coder = key_t_code;
1214
fp->flag = 't';
1215
}
1216
else
1217
n = 1;
1218
if(!fp->keep)
1219
fp->keep = kp->state->all;
1220
else
1221
n = 1;
1222
if (!n && !fp->trans && !fp->bflag && !fp->eflag && !fp->rflag)
1223
{
1224
fp->coder = kp->field.global.coder;
1225
fp->code = kp->field.global.code;
1226
fp->flag = kp->field.global.flag;
1227
fp->trans = kp->field.global.trans;
1228
fp->keep = kp->field.global.keep;
1229
fp->rflag = kp->field.global.rflag;
1230
fp->bflag = kp->field.global.bflag;
1231
if (fp->standard)
1232
fp->eflag = kp->field.global.bflag;
1233
}
1234
else
1235
{
1236
if (transform(kp, fp))
1237
return -1;
1238
if (kp->type & RS_REVERSE)
1239
fp->rflag = !fp->rflag;
1240
}
1241
if (fp->standard)
1242
{
1243
if (!fp->end.index)
1244
fp->end.index--;
1245
}
1246
else if (!fp->end.index && fp->end.field)
1247
{
1248
if (kp->tab[0] && fp->eflag)
1249
{
1250
if (kp->keydisc->errorf)
1251
(*kp->keydisc->errorf)(kp, kp->keydisc, 2, "skipping blanks right after tab-char is ill-defined");
1252
kp->keydisc->flags |= RSKEY_ERROR;
1253
return -1;
1254
}
1255
fp->end.index--;
1256
}
1257
if (kp->fixed)
1258
{
1259
if (fp->begin.index > kp->fixed)
1260
{
1261
if (kp->keydisc->errorf)
1262
(*kp->keydisc->errorf)(kp, kp->keydisc, 2, "field[%d]: begin index %d is greater than fixed record size", fp->index, fp->begin.index);
1263
kp->keydisc->flags |= RSKEY_ERROR;
1264
return -1;
1265
}
1266
if (fp->end.index > kp->fixed)
1267
{
1268
if (kp->keydisc->errorf)
1269
(*kp->keydisc->errorf)(kp, kp->keydisc, 2, "field[%d]: end index %d is greater than fixed record size", fp->index, fp->end.index);
1270
kp->keydisc->flags |= RSKEY_ERROR;
1271
return -1;
1272
}
1273
}
1274
}
1275
fp = kp->head;
1276
if (fp = fp->next)
1277
{
1278
kp->head = fp;
1279
if (!fp->next && !kp->tab[0] && !fp->begin.field && !fp->end.field && fp->end.index > 0 && fp->flag == 't' && fp->trans == kp->state->ident && fp->keep == kp->state->all && !fp->bflag && !fp->eflag && !fp->rflag)
1280
{
1281
kp->disc->type |= RS_KSAMELEN;
1282
kp->disc->key = fp->begin.index;
1283
kp->disc->keylen = fp->end.index - fp->begin.index;
1284
}
1285
else
1286
kp->coded = 1;
1287
}
1288
else if (kp->head->flag == 't' && kp->xfrmbuf)
1289
kp->coded = 1;
1290
if (kp->coded)
1291
{
1292
kp->field.maxfield += 2;
1293
kp->disc->defkeyf = code;
1294
kp->disc->key = (mbcoll() ? 32 : 2) * kp->field.maxfield;
1295
if (!(kp->field.positions = vmnewof(Vmheap, 0, unsigned char*, kp->field.maxfield, 0)))
1296
{
1297
if (kp->keydisc->errorf)
1298
(*kp->keydisc->errorf)(kp, kp->keydisc, 2, "out of space [positions]");
1299
kp->keydisc->flags |= RSKEY_ERROR;
1300
return -1;
1301
}
1302
}
1303
if (kp->fixed)
1304
{
1305
kp->disc->type |= RS_DSAMELEN;
1306
kp->disc->data = kp->fixed;
1307
if (kp->disc->keylen < 0)
1308
kp->disc->keylen = 0;
1309
}
1310
1311
/*
1312
* limit the sizes
1313
*/
1314
1315
z = datasize() / 3;
1316
if (kp->nproc > 1)
1317
z /= 2;
1318
if (kp->insize > z)
1319
kp->insize = z;
1320
if (kp->outsize > z)
1321
kp->outsize = z;
1322
1323
/*
1324
* reconcile the sizes
1325
*/
1326
1327
if (!(n = kp->alignsize))
1328
n = SF_BUFSIZE;
1329
if (n & (n - 1))
1330
{
1331
for (m = 1; m < n; m <<= 1)
1332
if (m >= (LONG_MAX >> CHAR_BIT))
1333
{
1334
if (kp->keydisc->errorf)
1335
(*kp->keydisc->errorf)(kp, kp->keydisc, 2, "%ld: invalid alignment size", n);
1336
kp->keydisc->flags |= RSKEY_ERROR;
1337
return -1;
1338
}
1339
if (kp->keydisc->errorf)
1340
(*kp->keydisc->errorf)(kp, kp->keydisc, 1, "%ld: alignment size rounded to %ld", n, m);
1341
n = m;
1342
}
1343
kp->alignsize = n--;
1344
kp->insize = (kp->insize < kp->alignsize) ? kp->alignsize : roundof(kp->insize, kp->alignsize);
1345
kp->outsize = (kp->outsize && kp->outsize < kp->alignsize) ? kp->alignsize : roundof(kp->outsize, kp->alignsize);
1346
kp->procsize = (kp->procsize < kp->alignsize) ? kp->alignsize : roundof(kp->procsize, kp->alignsize);
1347
if (kp->procsize > kp->insize)
1348
kp->procsize = kp->insize;
1349
if (kp->insize == kp->alignsize && kp->alignsize > 1)
1350
kp->alignsize /= 2;
1351
1352
/*
1353
* no input files equivalent to "-"
1354
*/
1355
1356
if (!kp->input || !*kp->input)
1357
kp->input = in;
1358
return (kp->keydisc->flags & RSKEY_ERROR) ? -1 : 0;
1359
}
1360
1361