Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
att
GitHub Repository: att/ast
Path: blob/master/src/lib/libbz/blocksort.c
1808 views
1
#pragma prototyped
2
3
/*-------------------------------------------------------------*/
4
/*--- Block sorting machinery ---*/
5
/*--- blocksort.c ---*/
6
/*-------------------------------------------------------------*/
7
8
/*--
9
This file is a part of bzip2 and/or libbzip2, a program and
10
library for lossless, block-sorting data compression.
11
12
Copyright (C) 1996-1998 Julian R Seward. All rights reserved.
13
14
Redistribution and use in source and binary forms, with or without
15
modification, are permitted provided that the following conditions
16
are met:
17
18
1. Redistributions of source code must retain the above copyright
19
notice, this list of conditions and the following disclaimer.
20
21
2. The origin of this software must not be misrepresented; you must
22
not claim that you wrote the original software. If you use this
23
software in a product, an acknowledgment in the product
24
documentation would be appreciated but is not required.
25
26
3. Altered source versions must be plainly marked as such, and must
27
not be misrepresented as being the original software.
28
29
4. The name of the author may not be used to endorse or promote
30
products derived from this software without specific prior written
31
permission.
32
33
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
34
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
35
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
37
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
38
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
39
GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
40
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
41
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
42
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
43
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
44
45
Julian Seward, Guildford, Surrey, UK.
46
[email protected]
47
bzip2/libbzip2 version 0.9.0c of 18 October 1998
48
49
This program is based on (at least) the work of:
50
Mike Burrows
51
David Wheeler
52
Peter Fenwick
53
Alistair Moffat
54
Radford Neal
55
Ian H. Witten
56
Robert Sedgewick
57
Jon L. Bentley
58
59
For more information on these sources, see the manual.
60
--*/
61
62
63
#include "bzhdr.h"
64
65
/*---------------------------------------------*/
66
/*--
67
Compare two strings in block. We assume (see
68
discussion above) that i1 and i2 have a max
69
offset of 10 on entry, and that the first
70
bytes of both block and quadrant have been
71
copied into the "overshoot area", ie
72
into the subscript range
73
[nblock .. nblock+NUM_OVERSHOOT_BYTES-1].
74
--*/
75
static __inline__ Bool fullGtU ( UChar* block,
76
UInt16* quadrant,
77
UInt32 nblock,
78
Int32* workDone,
79
Int32 i1,
80
Int32 i2
81
)
82
{
83
Int32 k;
84
UChar c1, c2;
85
UInt16 s1, s2;
86
87
AssertD ( i1 != i2, "fullGtU(1)" );
88
89
c1 = block[i1];
90
c2 = block[i2];
91
if (c1 != c2) return (c1 > c2);
92
i1++; i2++;
93
94
c1 = block[i1];
95
c2 = block[i2];
96
if (c1 != c2) return (c1 > c2);
97
i1++; i2++;
98
99
c1 = block[i1];
100
c2 = block[i2];
101
if (c1 != c2) return (c1 > c2);
102
i1++; i2++;
103
104
c1 = block[i1];
105
c2 = block[i2];
106
if (c1 != c2) return (c1 > c2);
107
i1++; i2++;
108
109
c1 = block[i1];
110
c2 = block[i2];
111
if (c1 != c2) return (c1 > c2);
112
i1++; i2++;
113
114
c1 = block[i1];
115
c2 = block[i2];
116
if (c1 != c2) return (c1 > c2);
117
i1++; i2++;
118
119
k = nblock;
120
121
do {
122
123
c1 = block[i1];
124
c2 = block[i2];
125
if (c1 != c2) return (c1 > c2);
126
s1 = quadrant[i1];
127
s2 = quadrant[i2];
128
if (s1 != s2) return (s1 > s2);
129
i1++; i2++;
130
131
c1 = block[i1];
132
c2 = block[i2];
133
if (c1 != c2) return (c1 > c2);
134
s1 = quadrant[i1];
135
s2 = quadrant[i2];
136
if (s1 != s2) return (s1 > s2);
137
i1++; i2++;
138
139
c1 = block[i1];
140
c2 = block[i2];
141
if (c1 != c2) return (c1 > c2);
142
s1 = quadrant[i1];
143
s2 = quadrant[i2];
144
if (s1 != s2) return (s1 > s2);
145
i1++; i2++;
146
147
c1 = block[i1];
148
c2 = block[i2];
149
if (c1 != c2) return (c1 > c2);
150
s1 = quadrant[i1];
151
s2 = quadrant[i2];
152
if (s1 != s2) return (s1 > s2);
153
i1++; i2++;
154
155
if (i1 >= nblock) i1 -= nblock;
156
if (i2 >= nblock) i2 -= nblock;
157
158
k -= 4;
159
(*workDone)++;
160
}
161
while (k >= 0);
162
163
return False;
164
}
165
166
/*---------------------------------------------*/
167
/*--
168
Knuth's increments seem to work better
169
than Incerpi-Sedgewick here. Possibly
170
because the number of elems to sort is
171
usually small, typically <= 20.
172
--*/
173
static Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
174
9841, 29524, 88573, 265720,
175
797161, 2391484 };
176
177
static void simpleSort ( EState* s, Int32 lo, Int32 hi, Int32 d )
178
{
179
Int32 i, j, h, bigN, hp;
180
Int32 v;
181
182
UChar* block = s->block;
183
UInt32* zptr = s->zptr;
184
UInt16* quadrant = s->quadrant;
185
Int32* workDone = &(s->workDone);
186
Int32 nblock = s->nblock;
187
Int32 workLimit = s->workLimit;
188
Bool firstAttempt = s->firstAttempt;
189
190
bigN = hi - lo + 1;
191
if (bigN < 2) return;
192
193
hp = 0;
194
while (incs[hp] < bigN) hp++;
195
hp--;
196
197
for (; hp >= 0; hp--) {
198
h = incs[hp];
199
i = lo + h;
200
while (True) {
201
202
/*-- copy 1 --*/
203
if (i > hi) break;
204
v = zptr[i];
205
j = i;
206
while ( fullGtU ( block, quadrant, nblock, workDone,
207
zptr[j-h]+d, v+d ) ) {
208
zptr[j] = zptr[j-h];
209
j = j - h;
210
if (j <= (lo + h - 1)) break;
211
}
212
zptr[j] = v;
213
i++;
214
215
/*-- copy 2 --*/
216
if (i > hi) break;
217
v = zptr[i];
218
j = i;
219
while ( fullGtU ( block, quadrant, nblock, workDone,
220
zptr[j-h]+d, v+d ) ) {
221
zptr[j] = zptr[j-h];
222
j = j - h;
223
if (j <= (lo + h - 1)) break;
224
}
225
zptr[j] = v;
226
i++;
227
228
/*-- copy 3 --*/
229
if (i > hi) break;
230
v = zptr[i];
231
j = i;
232
while ( fullGtU ( block, quadrant, nblock, workDone,
233
zptr[j-h]+d, v+d ) ) {
234
zptr[j] = zptr[j-h];
235
j = j - h;
236
if (j <= (lo + h - 1)) break;
237
}
238
zptr[j] = v;
239
i++;
240
241
if (*workDone > workLimit && firstAttempt) return;
242
}
243
}
244
}
245
246
247
/*---------------------------------------------*/
248
/*--
249
The following is an implementation of
250
an elegant 3-way quicksort for strings,
251
described in a paper "Fast Algorithms for
252
Sorting and Searching Strings", by Robert
253
Sedgewick and Jon L. Bentley.
254
--*/
255
256
#define swap(lv1, lv2) \
257
{ Int32 tmp = lv1; lv1 = lv2; lv2 = tmp; }
258
259
static void vswap ( UInt32* zptr, Int32 p1, Int32 p2, Int32 n )
260
{
261
while (n > 0) {
262
swap(zptr[p1], zptr[p2]);
263
p1++; p2++; n--;
264
}
265
}
266
267
static UChar med3 ( UChar a, UChar b, UChar c )
268
{
269
UChar t;
270
if (a > b) { t = a; a = b; b = t; };
271
if (b > c) { t = b; b = c; c = t; };
272
if (a > b) b = a;
273
return b;
274
}
275
276
277
#define min(a,b) ((a) < (b)) ? (a) : (b)
278
279
typedef
280
struct { Int32 ll; Int32 hh; Int32 dd; }
281
StackElem;
282
283
#define push(lz,hz,dz) { stack[sp].ll = lz; \
284
stack[sp].hh = hz; \
285
stack[sp].dd = dz; \
286
sp++; }
287
288
#define pop(lz,hz,dz) { sp--; \
289
lz = stack[sp].ll; \
290
hz = stack[sp].hh; \
291
dz = stack[sp].dd; }
292
293
#define SMALL_THRESH 20
294
#define DEPTH_THRESH 10
295
296
/*--
297
If you are ever unlucky/improbable enough
298
to get a stack overflow whilst sorting,
299
increase the following constant and try
300
again. In practice I have never seen the
301
stack go above 27 elems, so the following
302
limit seems very generous.
303
--*/
304
#define QSORT_STACK_SIZE 1000
305
306
307
static void qSort3 ( EState* s, Int32 loSt, Int32 hiSt, Int32 dSt )
308
{
309
Int32 unLo, unHi, ltLo, gtHi, med, n, m;
310
Int32 sp, lo, hi, d;
311
StackElem stack[QSORT_STACK_SIZE];
312
313
UChar* block = s->block;
314
UInt32* zptr = s->zptr;
315
Int32* workDone = &(s->workDone);
316
Int32 workLimit = s->workLimit;
317
Bool firstAttempt = s->firstAttempt;
318
319
sp = 0;
320
push ( loSt, hiSt, dSt );
321
322
while (sp > 0) {
323
324
AssertH ( sp < QSORT_STACK_SIZE, 1001 );
325
326
pop ( lo, hi, d );
327
328
if (hi - lo < SMALL_THRESH || d > DEPTH_THRESH) {
329
simpleSort ( s, lo, hi, d );
330
if (*workDone > workLimit && firstAttempt) return;
331
continue;
332
}
333
334
med = med3 ( block[zptr[ lo ]+d],
335
block[zptr[ hi ]+d],
336
block[zptr[ (lo+hi)>>1 ]+d] );
337
338
unLo = ltLo = lo;
339
unHi = gtHi = hi;
340
341
while (True) {
342
while (True) {
343
if (unLo > unHi) break;
344
n = ((Int32)block[zptr[unLo]+d]) - med;
345
if (n == 0) { swap(zptr[unLo], zptr[ltLo]); ltLo++; unLo++; continue; };
346
if (n > 0) break;
347
unLo++;
348
}
349
while (True) {
350
if (unLo > unHi) break;
351
n = ((Int32)block[zptr[unHi]+d]) - med;
352
if (n == 0) { swap(zptr[unHi], zptr[gtHi]); gtHi--; unHi--; continue; };
353
if (n < 0) break;
354
unHi--;
355
}
356
if (unLo > unHi) break;
357
swap(zptr[unLo], zptr[unHi]); unLo++; unHi--;
358
}
359
360
AssertD ( unHi == unLo-1, "bad termination in qSort3" );
361
362
if (gtHi < ltLo) {
363
push(lo, hi, d+1 );
364
continue;
365
}
366
367
n = min(ltLo-lo, unLo-ltLo); vswap(zptr, lo, unLo-n, n);
368
m = min(hi-gtHi, gtHi-unHi); vswap(zptr, unLo, hi-m+1, m);
369
370
n = lo + unLo - ltLo - 1;
371
m = hi - (gtHi - unHi) + 1;
372
373
push ( lo, n, d );
374
push ( n+1, m-1, d+1 );
375
push ( m, hi, d );
376
}
377
}
378
379
380
/*---------------------------------------------*/
381
382
#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
383
384
#define SETMASK (1 << 21)
385
#define CLEARMASK (~(SETMASK))
386
387
static void sortMain ( EState* s )
388
{
389
Int32 i, j, k, ss, sb;
390
Int32 runningOrder[256];
391
Int32 copy[256];
392
Bool bigDone[256];
393
UChar c1, c2;
394
Int32 numQSorted;
395
396
UChar* block = s->block;
397
UInt32* zptr = s->zptr;
398
UInt16* quadrant = s->quadrant;
399
Int32* ftab = s->ftab;
400
Int32* workDone = &(s->workDone);
401
Int32 nblock = s->nblock;
402
Int32 workLimit = s->workLimit;
403
Bool firstAttempt = s->firstAttempt;
404
405
/*--
406
In the various block-sized structures, live data runs
407
from 0 to last+NUM_OVERSHOOT_BYTES inclusive. First,
408
set up the overshoot area for block.
409
--*/
410
411
if (s->verbosity >= 4)
412
VPrintf0( " sort initialise ...\n" );
413
414
for (i = 0; i < BZ_NUM_OVERSHOOT_BYTES; i++)
415
block[nblock+i] = block[i % nblock];
416
for (i = 0; i < nblock+BZ_NUM_OVERSHOOT_BYTES; i++)
417
quadrant[i] = 0;
418
419
420
if (nblock <= 4000) {
421
422
/*--
423
Use simpleSort(), since the full sorting mechanism
424
has quite a large constant overhead.
425
--*/
426
if (s->verbosity >= 4) VPrintf0( " simpleSort ...\n" );
427
for (i = 0; i < nblock; i++) zptr[i] = i;
428
firstAttempt = False;
429
*workDone = workLimit = 0;
430
simpleSort ( s, 0, nblock-1, 0 );
431
if (s->verbosity >= 4) VPrintf0( " simpleSort done.\n" );
432
433
} else {
434
435
numQSorted = 0;
436
for (i = 0; i <= 255; i++) bigDone[i] = False;
437
438
if (s->verbosity >= 4) VPrintf0( " bucket sorting ...\n" );
439
440
for (i = 0; i <= 65536; i++) ftab[i] = 0;
441
442
c1 = block[nblock-1];
443
for (i = 0; i < nblock; i++) {
444
c2 = block[i];
445
ftab[(c1 << 8) + c2]++;
446
c1 = c2;
447
}
448
449
for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
450
451
c1 = block[0];
452
for (i = 0; i < nblock-1; i++) {
453
c2 = block[i+1];
454
j = (c1 << 8) + c2;
455
c1 = c2;
456
ftab[j]--;
457
zptr[ftab[j]] = i;
458
}
459
j = (block[nblock-1] << 8) + block[0];
460
ftab[j]--;
461
zptr[ftab[j]] = nblock-1;
462
463
/*--
464
Now ftab contains the first loc of every small bucket.
465
Calculate the running order, from smallest to largest
466
big bucket.
467
--*/
468
469
for (i = 0; i <= 255; i++) runningOrder[i] = i;
470
471
{
472
Int32 vv;
473
Int32 h = 1;
474
do h = 3 * h + 1; while (h <= 256);
475
do {
476
h = h / 3;
477
for (i = h; i <= 255; i++) {
478
vv = runningOrder[i];
479
j = i;
480
while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
481
runningOrder[j] = runningOrder[j-h];
482
j = j - h;
483
if (j <= (h - 1)) goto zero;
484
}
485
zero:
486
runningOrder[j] = vv;
487
}
488
} while (h != 1);
489
}
490
491
/*--
492
The main sorting loop.
493
--*/
494
495
for (i = 0; i <= 255; i++) {
496
497
/*--
498
Process big buckets, starting with the least full.
499
Basically this is a 4-step process in which we call
500
qSort3 to sort the small buckets [ss, j], but
501
also make a big effort to avoid the calls if we can.
502
--*/
503
ss = runningOrder[i];
504
505
/*--
506
Step 1:
507
Complete the big bucket [ss] by quicksorting
508
any unsorted small buckets [ss, j], for j != ss.
509
Hopefully previous pointer-scanning phases have already
510
completed many of the small buckets [ss, j], so
511
we don't have to sort them at all.
512
--*/
513
for (j = 0; j <= 255; j++) {
514
if (j != ss) {
515
sb = (ss << 8) + j;
516
if ( ! (ftab[sb] & SETMASK) ) {
517
Int32 lo = ftab[sb] & CLEARMASK;
518
Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
519
if (hi > lo) {
520
if (s->verbosity >= 4)
521
VPrintf4( " qsort [0x%x, 0x%x] done %d this %d\n",
522
ss, j, numQSorted, hi - lo + 1 );
523
qSort3 ( s, lo, hi, 2 );
524
numQSorted += ( hi - lo + 1 );
525
if (*workDone > workLimit && firstAttempt) return;
526
}
527
}
528
ftab[sb] |= SETMASK;
529
}
530
}
531
532
/*--
533
Step 2:
534
Deal specially with case [ss, ss]. This establishes the
535
sorted order for [ss, ss] without any comparisons.
536
A clever trick, cryptically described as steps Q6b and Q6c
537
in SRC-124 (aka BW94). This makes it entirely practical to
538
not use a preliminary run-length coder, but unfortunately
539
we are now stuck with the .bz2 file format.
540
--*/
541
{
542
Int32 put0, get0, put1, get1;
543
Int32 sbn = (ss << 8) + ss;
544
Int32 lo = ftab[sbn] & CLEARMASK;
545
Int32 hi = (ftab[sbn+1] & CLEARMASK) - 1;
546
UChar ssc = (UChar)ss;
547
put0 = lo;
548
get0 = ftab[ss << 8] & CLEARMASK;
549
put1 = hi;
550
get1 = (ftab[(ss+1) << 8] & CLEARMASK) - 1;
551
while (get0 < put0) {
552
j = zptr[get0]-1; if (j < 0) j += nblock;
553
c1 = block[j];
554
if (c1 == ssc) { zptr[put0] = j; put0++; };
555
get0++;
556
}
557
while (get1 > put1) {
558
j = zptr[get1]-1; if (j < 0) j += nblock;
559
c1 = block[j];
560
if (c1 == ssc) { zptr[put1] = j; put1--; };
561
get1--;
562
}
563
ftab[sbn] |= SETMASK;
564
}
565
566
/*--
567
Step 3:
568
The [ss] big bucket is now done. Record this fact,
569
and update the quadrant descriptors. Remember to
570
update quadrants in the overshoot area too, if
571
necessary. The "if (i < 255)" test merely skips
572
this updating for the last bucket processed, since
573
updating for the last bucket is pointless.
574
575
The quadrant array provides a way to incrementally
576
cache sort orderings, as they appear, so as to
577
make subsequent comparisons in fullGtU() complete
578
faster. For repetitive blocks this makes a big
579
difference (but not big enough to be able to avoid
580
randomisation for very repetitive data.)
581
582
The precise meaning is: at all times:
583
584
for 0 <= i < nblock and 0 <= j <= nblock
585
586
if block[i] != block[j],
587
588
then the relative values of quadrant[i] and
589
quadrant[j] are meaningless.
590
591
else {
592
if quadrant[i] < quadrant[j]
593
then the string starting at i lexicographically
594
precedes the string starting at j
595
596
else if quadrant[i] > quadrant[j]
597
then the string starting at j lexicographically
598
precedes the string starting at i
599
600
else
601
the relative ordering of the strings starting
602
at i and j has not yet been determined.
603
}
604
--*/
605
bigDone[ss] = True;
606
607
if (i < 255) {
608
Int32 bbStart = ftab[ss << 8] & CLEARMASK;
609
Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
610
Int32 shifts = 0;
611
612
while ((bbSize >> shifts) > 65534) shifts++;
613
614
for (j = 0; j < bbSize; j++) {
615
Int32 a2update = zptr[bbStart + j];
616
UInt16 qVal = (UInt16)(j >> shifts);
617
quadrant[a2update] = qVal;
618
if (a2update < BZ_NUM_OVERSHOOT_BYTES)
619
quadrant[a2update + nblock] = qVal;
620
}
621
622
AssertH ( ( ((bbSize-1) >> shifts) <= 65535 ), 1002 );
623
}
624
625
/*--
626
Step 4:
627
Now scan this big bucket [ss] so as to synthesise the
628
sorted order for small buckets [t, ss] for all t != ss.
629
This will avoid doing Real Work in subsequent Step 1's.
630
--*/
631
for (j = 0; j <= 255; j++)
632
copy[j] = ftab[(j << 8) + ss] & CLEARMASK;
633
634
for (j = ftab[ss << 8] & CLEARMASK;
635
j < (ftab[(ss+1) << 8] & CLEARMASK);
636
j++) {
637
k = zptr[j]-1; if (k < 0) k += nblock;
638
c1 = block[k];
639
if ( ! bigDone[c1] ) {
640
zptr[copy[c1]] = k;
641
copy[c1] ++;
642
}
643
}
644
645
for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
646
}
647
if (s->verbosity >= 4)
648
VPrintf3( " %d pointers, %d sorted, %d scanned\n",
649
nblock, numQSorted, nblock - numQSorted );
650
}
651
}
652
653
654
/*---------------------------------------------*/
655
static void randomiseBlock ( EState* s )
656
{
657
Int32 i;
658
BZ_RAND_INIT_MASK;
659
for (i = 0; i < 256; i++) s->inUse[i] = False;
660
661
for (i = 0; i < s->nblock; i++) {
662
BZ_RAND_UPD_MASK;
663
s->block[i] ^= BZ_RAND_MASK;
664
s->inUse[s->block[i]] = True;
665
}
666
}
667
668
669
/*---------------------------------------------*/
670
void blockSort ( EState* s )
671
{
672
Int32 i;
673
674
s->workLimit = s->workFactor * (s->nblock - 1);
675
s->workDone = 0;
676
s->blockRandomised = False;
677
s->firstAttempt = True;
678
679
sortMain ( s );
680
681
if (s->verbosity >= 3)
682
VPrintf3( " %d work, %d block, ratio %5.2f\n",
683
s->workDone, s->nblock-1,
684
(float)(s->workDone) / (float)(s->nblock-1) );
685
686
if (s->workDone > s->workLimit && s->firstAttempt) {
687
if (s->verbosity >= 2)
688
VPrintf0( " sorting aborted; randomising block\n" );
689
randomiseBlock ( s );
690
s->workLimit = s->workDone = 0;
691
s->blockRandomised = True;
692
s->firstAttempt = False;
693
sortMain ( s );
694
if (s->verbosity >= 3)
695
VPrintf3( " %d work, %d block, ratio %f\n",
696
s->workDone, s->nblock-1,
697
(float)(s->workDone) / (float)(s->nblock-1) );
698
}
699
700
s->origPtr = -1;
701
for (i = 0; i < s->nblock; i++)
702
if (s->zptr[i] == 0)
703
{ s->origPtr = i; break; };
704
705
AssertH( s->origPtr != -1, 1003 );
706
}
707
708
/*-------------------------------------------------------------*/
709
/*--- end blocksort.c ---*/
710
/*-------------------------------------------------------------*/
711
712