Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Kitware
GitHub Repository: Kitware/CMake
Path: blob/master/Utilities/cmbzip2/blocksort.c
3148 views
1
2
/*-------------------------------------------------------------*/
3
/*--- Block sorting machinery ---*/
4
/*--- blocksort.c ---*/
5
/*-------------------------------------------------------------*/
6
7
/* ------------------------------------------------------------------
8
This file is part of bzip2/libbzip2, a program and library for
9
lossless, block-sorting data compression.
10
11
bzip2/libbzip2 version 1.0.8 of 13 July 2019
12
Copyright (C) 1996-2019 Julian Seward <[email protected]>
13
14
Please read the WARNING, DISCLAIMER and PATENTS sections in the
15
README file.
16
17
This program is released under the terms of the license contained
18
in the file LICENSE.
19
------------------------------------------------------------------ */
20
21
22
#include "bzlib_private.h"
23
24
/*---------------------------------------------*/
25
/*--- Fallback O(N log(N)^2) sorting ---*/
26
/*--- algorithm, for repetitive blocks ---*/
27
/*---------------------------------------------*/
28
29
/*---------------------------------------------*/
30
static
31
__inline__
32
void fallbackSimpleSort ( UInt32* fmap,
33
UInt32* eclass,
34
Int32 lo,
35
Int32 hi )
36
{
37
Int32 i, j, tmp;
38
UInt32 ec_tmp;
39
40
if (lo == hi) return;
41
42
if (hi - lo > 3) {
43
for ( i = hi-4; i >= lo; i-- ) {
44
tmp = fmap[i];
45
ec_tmp = eclass[tmp];
46
for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
47
fmap[j-4] = fmap[j];
48
fmap[j-4] = tmp;
49
}
50
}
51
52
for ( i = hi-1; i >= lo; i-- ) {
53
tmp = fmap[i];
54
ec_tmp = eclass[tmp];
55
for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
56
fmap[j-1] = fmap[j];
57
fmap[j-1] = tmp;
58
}
59
}
60
61
62
/*---------------------------------------------*/
63
#define fswap(zz1, zz2) \
64
{ Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
65
66
#define fvswap(zzp1, zzp2, zzn) \
67
{ \
68
Int32 yyp1 = (zzp1); \
69
Int32 yyp2 = (zzp2); \
70
Int32 yyn = (zzn); \
71
while (yyn > 0) { \
72
fswap(fmap[yyp1], fmap[yyp2]); \
73
yyp1++; yyp2++; yyn--; \
74
} \
75
}
76
77
78
#define fmin(a,b) ((a) < (b)) ? (a) : (b)
79
80
#define fpush(lz,hz) { stackLo[sp] = lz; \
81
stackHi[sp] = hz; \
82
sp++; }
83
84
#define fpop(lz,hz) { sp--; \
85
lz = stackLo[sp]; \
86
hz = stackHi[sp]; }
87
88
#define FALLBACK_QSORT_SMALL_THRESH 10
89
#define FALLBACK_QSORT_STACK_SIZE 100
90
91
92
static
93
void fallbackQSort3 ( UInt32* fmap,
94
UInt32* eclass,
95
Int32 loSt,
96
Int32 hiSt )
97
{
98
Int32 unLo, unHi, ltLo, gtHi, n, m;
99
Int32 sp, lo, hi;
100
UInt32 med, r, r3;
101
Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
102
Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
103
104
r = 0;
105
106
sp = 0;
107
fpush ( loSt, hiSt );
108
109
while (sp > 0) {
110
111
AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 );
112
113
fpop ( lo, hi );
114
if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
115
fallbackSimpleSort ( fmap, eclass, lo, hi );
116
continue;
117
}
118
119
/* Random partitioning. Median of 3 sometimes fails to
120
avoid bad cases. Median of 9 seems to help but
121
looks rather expensive. This too seems to work but
122
is cheaper. Guidance for the magic constants
123
7621 and 32768 is taken from Sedgewick's algorithms
124
book, chapter 35.
125
*/
126
r = ((r * 7621) + 1) % 32768;
127
r3 = r % 3;
128
if (r3 == 0) med = eclass[fmap[lo]]; else
129
if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
130
med = eclass[fmap[hi]];
131
132
unLo = ltLo = lo;
133
unHi = gtHi = hi;
134
135
while (1) {
136
while (1) {
137
if (unLo > unHi) break;
138
n = (Int32)eclass[fmap[unLo]] - (Int32)med;
139
if (n == 0) {
140
fswap(fmap[unLo], fmap[ltLo]);
141
ltLo++; unLo++;
142
continue;
143
};
144
if (n > 0) break;
145
unLo++;
146
}
147
while (1) {
148
if (unLo > unHi) break;
149
n = (Int32)eclass[fmap[unHi]] - (Int32)med;
150
if (n == 0) {
151
fswap(fmap[unHi], fmap[gtHi]);
152
gtHi--; unHi--;
153
continue;
154
};
155
if (n < 0) break;
156
unHi--;
157
}
158
if (unLo > unHi) break;
159
fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
160
}
161
162
AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
163
164
if (gtHi < ltLo) continue;
165
166
n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
167
m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
168
169
n = lo + unLo - ltLo - 1;
170
m = hi - (gtHi - unHi) + 1;
171
172
if (n - lo > hi - m) {
173
fpush ( lo, n );
174
fpush ( m, hi );
175
} else {
176
fpush ( m, hi );
177
fpush ( lo, n );
178
}
179
}
180
}
181
182
#undef fmin
183
#undef fpush
184
#undef fpop
185
#undef fswap
186
#undef fvswap
187
#undef FALLBACK_QSORT_SMALL_THRESH
188
#undef FALLBACK_QSORT_STACK_SIZE
189
190
191
/*---------------------------------------------*/
192
/* Pre:
193
nblock > 0
194
eclass exists for [0 .. nblock-1]
195
((UChar*)eclass) [0 .. nblock-1] holds block
196
ptr exists for [0 .. nblock-1]
197
198
Post:
199
((UChar*)eclass) [0 .. nblock-1] holds block
200
All other areas of eclass destroyed
201
fmap [0 .. nblock-1] holds sorted order
202
bhtab [ 0 .. 2+(nblock/32) ] destroyed
203
*/
204
205
#define SET_BH(zz) bhtab[(zz) >> 5] |= ((UInt32)1 << ((zz) & 31))
206
#define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~((UInt32)1 << ((zz) & 31))
207
#define ISSET_BH(zz) (bhtab[(zz) >> 5] & ((UInt32)1 << ((zz) & 31)))
208
#define WORD_BH(zz) bhtab[(zz) >> 5]
209
#define UNALIGNED_BH(zz) ((zz) & 0x01f)
210
211
static
212
void fallbackSort ( UInt32* fmap,
213
UInt32* eclass,
214
UInt32* bhtab,
215
Int32 nblock,
216
Int32 verb )
217
{
218
Int32 ftab[257];
219
Int32 ftabCopy[256];
220
Int32 H, i, j, k, l, r, cc, cc1;
221
Int32 nNotDone;
222
Int32 nBhtab;
223
UChar* eclass8 = (UChar*)eclass;
224
225
/*--
226
Initial 1-char radix sort to generate
227
initial fmap and initial BH bits.
228
--*/
229
if (verb >= 4)
230
VPrintf0 ( " bucket sorting ...\n" );
231
for (i = 0; i < 257; i++) ftab[i] = 0;
232
for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
233
for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
234
for (i = 1; i < 257; i++) ftab[i] += ftab[i-1];
235
236
for (i = 0; i < nblock; i++) {
237
j = eclass8[i];
238
k = ftab[j] - 1;
239
ftab[j] = k;
240
fmap[k] = i;
241
}
242
243
nBhtab = 2 + (nblock / 32);
244
for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
245
for (i = 0; i < 256; i++) SET_BH(ftab[i]);
246
247
/*--
248
Inductively refine the buckets. Kind-of an
249
"exponential radix sort" (!), inspired by the
250
Manber-Myers suffix array construction algorithm.
251
--*/
252
253
/*-- set sentinel bits for block-end detection --*/
254
for (i = 0; i < 32; i++) {
255
SET_BH(nblock + 2*i);
256
CLEAR_BH(nblock + 2*i + 1);
257
}
258
259
/*-- the log(N) loop --*/
260
H = 1;
261
while (1) {
262
263
if (verb >= 4)
264
VPrintf1 ( " depth %6d has ", H );
265
266
j = 0;
267
for (i = 0; i < nblock; i++) {
268
if (ISSET_BH(i)) j = i;
269
k = fmap[i] - H; if (k < 0) k += nblock;
270
eclass[k] = j;
271
}
272
273
nNotDone = 0;
274
r = -1;
275
while (1) {
276
277
/*-- find the next non-singleton bucket --*/
278
k = r + 1;
279
while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
280
if (ISSET_BH(k)) {
281
while (WORD_BH(k) == 0xffffffff) k += 32;
282
while (ISSET_BH(k)) k++;
283
}
284
l = k - 1;
285
if (l >= nblock) break;
286
while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
287
if (!ISSET_BH(k)) {
288
while (WORD_BH(k) == 0x00000000) k += 32;
289
while (!ISSET_BH(k)) k++;
290
}
291
r = k - 1;
292
if (r >= nblock) break;
293
294
/*-- now [l, r] bracket current bucket --*/
295
if (r > l) {
296
nNotDone += (r - l + 1);
297
fallbackQSort3 ( fmap, eclass, l, r );
298
299
/*-- scan bucket and generate header bits-- */
300
cc = -1;
301
for (i = l; i <= r; i++) {
302
cc1 = eclass[fmap[i]];
303
if (cc != cc1) { SET_BH(i); cc = cc1; };
304
}
305
}
306
}
307
308
if (verb >= 4)
309
VPrintf1 ( "%6d unresolved strings\n", nNotDone );
310
311
H *= 2;
312
if (H > nblock || nNotDone == 0) break;
313
}
314
315
/*--
316
Reconstruct the original block in
317
eclass8 [0 .. nblock-1], since the
318
previous phase destroyed it.
319
--*/
320
if (verb >= 4)
321
VPrintf0 ( " reconstructing block ...\n" );
322
j = 0;
323
for (i = 0; i < nblock; i++) {
324
while (ftabCopy[j] == 0) j++;
325
ftabCopy[j]--;
326
eclass8[fmap[i]] = (UChar)j;
327
}
328
AssertH ( j < 256, 1005 );
329
}
330
331
#undef SET_BH
332
#undef CLEAR_BH
333
#undef ISSET_BH
334
#undef WORD_BH
335
#undef UNALIGNED_BH
336
337
338
/*---------------------------------------------*/
339
/*--- The main, O(N^2 log(N)) sorting ---*/
340
/*--- algorithm. Faster for "normal" ---*/
341
/*--- non-repetitive blocks. ---*/
342
/*---------------------------------------------*/
343
344
/*---------------------------------------------*/
345
static
346
__inline__
347
Bool mainGtU ( UInt32 i1,
348
UInt32 i2,
349
UChar* block,
350
UInt16* quadrant,
351
UInt32 nblock,
352
Int32* budget )
353
{
354
Int32 k;
355
UChar c1, c2;
356
UInt16 s1, s2;
357
358
AssertD ( i1 != i2, "mainGtU" );
359
/* 1 */
360
c1 = block[i1]; c2 = block[i2];
361
if (c1 != c2) return (c1 > c2);
362
i1++; i2++;
363
/* 2 */
364
c1 = block[i1]; c2 = block[i2];
365
if (c1 != c2) return (c1 > c2);
366
i1++; i2++;
367
/* 3 */
368
c1 = block[i1]; c2 = block[i2];
369
if (c1 != c2) return (c1 > c2);
370
i1++; i2++;
371
/* 4 */
372
c1 = block[i1]; c2 = block[i2];
373
if (c1 != c2) return (c1 > c2);
374
i1++; i2++;
375
/* 5 */
376
c1 = block[i1]; c2 = block[i2];
377
if (c1 != c2) return (c1 > c2);
378
i1++; i2++;
379
/* 6 */
380
c1 = block[i1]; c2 = block[i2];
381
if (c1 != c2) return (c1 > c2);
382
i1++; i2++;
383
/* 7 */
384
c1 = block[i1]; c2 = block[i2];
385
if (c1 != c2) return (c1 > c2);
386
i1++; i2++;
387
/* 8 */
388
c1 = block[i1]; c2 = block[i2];
389
if (c1 != c2) return (c1 > c2);
390
i1++; i2++;
391
/* 9 */
392
c1 = block[i1]; c2 = block[i2];
393
if (c1 != c2) return (c1 > c2);
394
i1++; i2++;
395
/* 10 */
396
c1 = block[i1]; c2 = block[i2];
397
if (c1 != c2) return (c1 > c2);
398
i1++; i2++;
399
/* 11 */
400
c1 = block[i1]; c2 = block[i2];
401
if (c1 != c2) return (c1 > c2);
402
i1++; i2++;
403
/* 12 */
404
c1 = block[i1]; c2 = block[i2];
405
if (c1 != c2) return (c1 > c2);
406
i1++; i2++;
407
408
k = nblock + 8;
409
410
do {
411
/* 1 */
412
c1 = block[i1]; c2 = block[i2];
413
if (c1 != c2) return (c1 > c2);
414
s1 = quadrant[i1]; s2 = quadrant[i2];
415
if (s1 != s2) return (s1 > s2);
416
i1++; i2++;
417
/* 2 */
418
c1 = block[i1]; c2 = block[i2];
419
if (c1 != c2) return (c1 > c2);
420
s1 = quadrant[i1]; s2 = quadrant[i2];
421
if (s1 != s2) return (s1 > s2);
422
i1++; i2++;
423
/* 3 */
424
c1 = block[i1]; c2 = block[i2];
425
if (c1 != c2) return (c1 > c2);
426
s1 = quadrant[i1]; s2 = quadrant[i2];
427
if (s1 != s2) return (s1 > s2);
428
i1++; i2++;
429
/* 4 */
430
c1 = block[i1]; c2 = block[i2];
431
if (c1 != c2) return (c1 > c2);
432
s1 = quadrant[i1]; s2 = quadrant[i2];
433
if (s1 != s2) return (s1 > s2);
434
i1++; i2++;
435
/* 5 */
436
c1 = block[i1]; c2 = block[i2];
437
if (c1 != c2) return (c1 > c2);
438
s1 = quadrant[i1]; s2 = quadrant[i2];
439
if (s1 != s2) return (s1 > s2);
440
i1++; i2++;
441
/* 6 */
442
c1 = block[i1]; c2 = block[i2];
443
if (c1 != c2) return (c1 > c2);
444
s1 = quadrant[i1]; s2 = quadrant[i2];
445
if (s1 != s2) return (s1 > s2);
446
i1++; i2++;
447
/* 7 */
448
c1 = block[i1]; c2 = block[i2];
449
if (c1 != c2) return (c1 > c2);
450
s1 = quadrant[i1]; s2 = quadrant[i2];
451
if (s1 != s2) return (s1 > s2);
452
i1++; i2++;
453
/* 8 */
454
c1 = block[i1]; c2 = block[i2];
455
if (c1 != c2) return (c1 > c2);
456
s1 = quadrant[i1]; s2 = quadrant[i2];
457
if (s1 != s2) return (s1 > s2);
458
i1++; i2++;
459
460
if (i1 >= nblock) i1 -= nblock;
461
if (i2 >= nblock) i2 -= nblock;
462
463
k -= 8;
464
(*budget)--;
465
}
466
while (k >= 0);
467
468
return False;
469
}
470
471
472
/*---------------------------------------------*/
473
/*--
474
Knuth's increments seem to work better
475
than Incerpi-Sedgewick here. Possibly
476
because the number of elems to sort is
477
usually small, typically <= 20.
478
--*/
479
static
480
Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
481
9841, 29524, 88573, 265720,
482
797161, 2391484 };
483
484
static
485
void mainSimpleSort ( UInt32* ptr,
486
UChar* block,
487
UInt16* quadrant,
488
Int32 nblock,
489
Int32 lo,
490
Int32 hi,
491
Int32 d,
492
Int32* budget )
493
{
494
Int32 i, j, h, bigN, hp;
495
UInt32 v;
496
497
bigN = hi - lo + 1;
498
if (bigN < 2) return;
499
500
hp = 0;
501
while (incs[hp] < bigN) hp++;
502
hp--;
503
504
for (; hp >= 0; hp--) {
505
h = incs[hp];
506
507
i = lo + h;
508
while (True) {
509
510
/*-- copy 1 --*/
511
if (i > hi) break;
512
v = ptr[i];
513
j = i;
514
while ( mainGtU (
515
ptr[j-h]+d, v+d, block, quadrant, nblock, budget
516
) ) {
517
ptr[j] = ptr[j-h];
518
j = j - h;
519
if (j <= (lo + h - 1)) break;
520
}
521
ptr[j] = v;
522
i++;
523
524
/*-- copy 2 --*/
525
if (i > hi) break;
526
v = ptr[i];
527
j = i;
528
while ( mainGtU (
529
ptr[j-h]+d, v+d, block, quadrant, nblock, budget
530
) ) {
531
ptr[j] = ptr[j-h];
532
j = j - h;
533
if (j <= (lo + h - 1)) break;
534
}
535
ptr[j] = v;
536
i++;
537
538
/*-- copy 3 --*/
539
if (i > hi) break;
540
v = ptr[i];
541
j = i;
542
while ( mainGtU (
543
ptr[j-h]+d, v+d, block, quadrant, nblock, budget
544
) ) {
545
ptr[j] = ptr[j-h];
546
j = j - h;
547
if (j <= (lo + h - 1)) break;
548
}
549
ptr[j] = v;
550
i++;
551
552
if (*budget < 0) return;
553
}
554
}
555
}
556
557
558
/*---------------------------------------------*/
559
/*--
560
The following is an implementation of
561
an elegant 3-way quicksort for strings,
562
described in a paper "Fast Algorithms for
563
Sorting and Searching Strings", by Robert
564
Sedgewick and Jon L. Bentley.
565
--*/
566
567
#define mswap(zz1, zz2) \
568
{ Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
569
570
#define mvswap(zzp1, zzp2, zzn) \
571
{ \
572
Int32 yyp1 = (zzp1); \
573
Int32 yyp2 = (zzp2); \
574
Int32 yyn = (zzn); \
575
while (yyn > 0) { \
576
mswap(ptr[yyp1], ptr[yyp2]); \
577
yyp1++; yyp2++; yyn--; \
578
} \
579
}
580
581
static
582
__inline__
583
UChar mmed3 ( UChar a, UChar b, UChar c )
584
{
585
UChar t;
586
if (a > b) { t = a; a = b; b = t; };
587
if (b > c) {
588
b = c;
589
if (a > b) b = a;
590
}
591
return b;
592
}
593
594
#define mmin(a,b) ((a) < (b)) ? (a) : (b)
595
596
#define mpush(lz,hz,dz) { stackLo[sp] = lz; \
597
stackHi[sp] = hz; \
598
stackD [sp] = dz; \
599
sp++; }
600
601
#define mpop(lz,hz,dz) { sp--; \
602
lz = stackLo[sp]; \
603
hz = stackHi[sp]; \
604
dz = stackD [sp]; }
605
606
607
#define mnextsize(az) (nextHi[az]-nextLo[az])
608
609
#define mnextswap(az,bz) \
610
{ Int32 tz; \
611
tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
612
tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
613
tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
614
615
616
#define MAIN_QSORT_SMALL_THRESH 20
617
#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
618
#define MAIN_QSORT_STACK_SIZE 100
619
620
static
621
void mainQSort3 ( UInt32* ptr,
622
UChar* block,
623
UInt16* quadrant,
624
Int32 nblock,
625
Int32 loSt,
626
Int32 hiSt,
627
Int32 dSt,
628
Int32* budget )
629
{
630
Int32 unLo, unHi, ltLo, gtHi, n, m, med;
631
Int32 sp, lo, hi, d;
632
633
Int32 stackLo[MAIN_QSORT_STACK_SIZE];
634
Int32 stackHi[MAIN_QSORT_STACK_SIZE];
635
Int32 stackD [MAIN_QSORT_STACK_SIZE];
636
637
Int32 nextLo[3];
638
Int32 nextHi[3];
639
Int32 nextD [3];
640
641
sp = 0;
642
mpush ( loSt, hiSt, dSt );
643
644
while (sp > 0) {
645
646
AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 );
647
648
mpop ( lo, hi, d );
649
if (hi - lo < MAIN_QSORT_SMALL_THRESH ||
650
d > MAIN_QSORT_DEPTH_THRESH) {
651
mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
652
if (*budget < 0) return;
653
continue;
654
}
655
656
med = (Int32)
657
mmed3 ( block[ptr[ lo ]+d],
658
block[ptr[ hi ]+d],
659
block[ptr[ (lo+hi)>>1 ]+d] );
660
661
unLo = ltLo = lo;
662
unHi = gtHi = hi;
663
664
while (True) {
665
while (True) {
666
if (unLo > unHi) break;
667
n = ((Int32)block[ptr[unLo]+d]) - med;
668
if (n == 0) {
669
mswap(ptr[unLo], ptr[ltLo]);
670
ltLo++; unLo++; continue;
671
};
672
if (n > 0) break;
673
unLo++;
674
}
675
while (True) {
676
if (unLo > unHi) break;
677
n = ((Int32)block[ptr[unHi]+d]) - med;
678
if (n == 0) {
679
mswap(ptr[unHi], ptr[gtHi]);
680
gtHi--; unHi--; continue;
681
};
682
if (n < 0) break;
683
unHi--;
684
}
685
if (unLo > unHi) break;
686
mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
687
}
688
689
AssertD ( unHi == unLo-1, "mainQSort3(2)" );
690
691
if (gtHi < ltLo) {
692
mpush(lo, hi, d+1 );
693
continue;
694
}
695
696
n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
697
m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
698
699
n = lo + unLo - ltLo - 1;
700
m = hi - (gtHi - unHi) + 1;
701
702
nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
703
nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
704
nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
705
706
if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
707
if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
708
if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
709
710
AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
711
AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
712
713
mpush (nextLo[0], nextHi[0], nextD[0]);
714
mpush (nextLo[1], nextHi[1], nextD[1]);
715
mpush (nextLo[2], nextHi[2], nextD[2]);
716
}
717
}
718
719
#undef mswap
720
#undef mvswap
721
#undef mpush
722
#undef mpop
723
#undef mmin
724
#undef mnextsize
725
#undef mnextswap
726
#undef MAIN_QSORT_SMALL_THRESH
727
#undef MAIN_QSORT_DEPTH_THRESH
728
#undef MAIN_QSORT_STACK_SIZE
729
730
731
/*---------------------------------------------*/
732
/* Pre:
733
nblock > N_OVERSHOOT
734
block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
735
((UChar*)block32) [0 .. nblock-1] holds block
736
ptr exists for [0 .. nblock-1]
737
738
Post:
739
((UChar*)block32) [0 .. nblock-1] holds block
740
All other areas of block32 destroyed
741
ftab [0 .. 65536 ] destroyed
742
ptr [0 .. nblock-1] holds sorted order
743
if (*budget < 0), sorting was abandoned
744
*/
745
746
#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
747
#define SETMASK (1 << 21)
748
#define CLEARMASK (~(SETMASK))
749
750
static
751
void mainSort ( UInt32* ptr,
752
UChar* block,
753
UInt16* quadrant,
754
UInt32* ftab,
755
Int32 nblock,
756
Int32 verb,
757
Int32* budget )
758
{
759
Int32 i, j, k, ss, sb;
760
Int32 runningOrder[256];
761
Bool bigDone[256];
762
Int32 copyStart[256];
763
Int32 copyEnd [256];
764
UChar c1;
765
Int32 numQSorted;
766
UInt16 s;
767
if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" );
768
769
/*-- set up the 2-byte frequency table --*/
770
for (i = 65536; i >= 0; i--) ftab[i] = 0;
771
772
j = block[0] << 8;
773
i = nblock-1;
774
for (; i >= 3; i -= 4) {
775
quadrant[i] = 0;
776
j = (j >> 8) | ( ((UInt16)block[i]) << 8);
777
ftab[j]++;
778
quadrant[i-1] = 0;
779
j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
780
ftab[j]++;
781
quadrant[i-2] = 0;
782
j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
783
ftab[j]++;
784
quadrant[i-3] = 0;
785
j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
786
ftab[j]++;
787
}
788
for (; i >= 0; i--) {
789
quadrant[i] = 0;
790
j = (j >> 8) | ( ((UInt16)block[i]) << 8);
791
ftab[j]++;
792
}
793
794
/*-- (emphasises close relationship of block & quadrant) --*/
795
for (i = 0; i < BZ_N_OVERSHOOT; i++) {
796
block [nblock+i] = block[i];
797
quadrant[nblock+i] = 0;
798
}
799
800
if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" );
801
802
/*-- Complete the initial radix sort --*/
803
for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
804
805
s = block[0] << 8;
806
i = nblock-1;
807
for (; i >= 3; i -= 4) {
808
s = (s >> 8) | (block[i] << 8);
809
j = ftab[s] -1;
810
ftab[s] = j;
811
ptr[j] = i;
812
s = (s >> 8) | (block[i-1] << 8);
813
j = ftab[s] -1;
814
ftab[s] = j;
815
ptr[j] = i-1;
816
s = (s >> 8) | (block[i-2] << 8);
817
j = ftab[s] -1;
818
ftab[s] = j;
819
ptr[j] = i-2;
820
s = (s >> 8) | (block[i-3] << 8);
821
j = ftab[s] -1;
822
ftab[s] = j;
823
ptr[j] = i-3;
824
}
825
for (; i >= 0; i--) {
826
s = (s >> 8) | (block[i] << 8);
827
j = ftab[s] -1;
828
ftab[s] = j;
829
ptr[j] = i;
830
}
831
832
/*--
833
Now ftab contains the first loc of every small bucket.
834
Calculate the running order, from smallest to largest
835
big bucket.
836
--*/
837
for (i = 0; i <= 255; i++) {
838
bigDone [i] = False;
839
runningOrder[i] = i;
840
}
841
842
{
843
Int32 vv;
844
Int32 h = 1;
845
do h = 3 * h + 1; while (h <= 256);
846
do {
847
h = h / 3;
848
for (i = h; i <= 255; i++) {
849
vv = runningOrder[i];
850
j = i;
851
while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
852
runningOrder[j] = runningOrder[j-h];
853
j = j - h;
854
if (j <= (h - 1)) goto zero;
855
}
856
zero:
857
runningOrder[j] = vv;
858
}
859
} while (h != 1);
860
}
861
862
/*--
863
The main sorting loop.
864
--*/
865
866
numQSorted = 0;
867
868
for (i = 0; i <= 255; i++) {
869
870
/*--
871
Process big buckets, starting with the least full.
872
Basically this is a 3-step process in which we call
873
mainQSort3 to sort the small buckets [ss, j], but
874
also make a big effort to avoid the calls if we can.
875
--*/
876
ss = runningOrder[i];
877
878
/*--
879
Step 1:
880
Complete the big bucket [ss] by quicksorting
881
any unsorted small buckets [ss, j], for j != ss.
882
Hopefully previous pointer-scanning phases have already
883
completed many of the small buckets [ss, j], so
884
we don't have to sort them at all.
885
--*/
886
for (j = 0; j <= 255; j++) {
887
if (j != ss) {
888
sb = (ss << 8) + j;
889
if ( ! (ftab[sb] & SETMASK) ) {
890
Int32 lo = ftab[sb] & CLEARMASK;
891
Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
892
if (hi > lo) {
893
if (verb >= 4)
894
VPrintf4 ( " qsort [0x%x, 0x%x] "
895
"done %d this %d\n",
896
ss, j, numQSorted, hi - lo + 1 );
897
mainQSort3 (
898
ptr, block, quadrant, nblock,
899
lo, hi, BZ_N_RADIX, budget
900
);
901
numQSorted += (hi - lo + 1);
902
if (*budget < 0) return;
903
}
904
}
905
ftab[sb] |= SETMASK;
906
}
907
}
908
909
AssertH ( !bigDone[ss], 1006 );
910
911
/*--
912
Step 2:
913
Now scan this big bucket [ss] so as to synthesise the
914
sorted order for small buckets [t, ss] for all t,
915
including, magically, the bucket [ss,ss] too.
916
This will avoid doing Real Work in subsequent Step 1's.
917
--*/
918
{
919
for (j = 0; j <= 255; j++) {
920
copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
921
copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
922
}
923
for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
924
k = ptr[j]-1; if (k < 0) k += nblock;
925
c1 = block[k];
926
if (!bigDone[c1])
927
ptr[ copyStart[c1]++ ] = k;
928
}
929
for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
930
k = ptr[j]-1; if (k < 0) k += nblock;
931
c1 = block[k];
932
if (!bigDone[c1])
933
ptr[ copyEnd[c1]-- ] = k;
934
}
935
}
936
937
AssertH ( (copyStart[ss]-1 == copyEnd[ss])
938
||
939
/* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
940
Necessity for this case is demonstrated by compressing
941
a sequence of approximately 48.5 million of character
942
251; 1.0.0/1.0.1 will then die here. */
943
(copyStart[ss] == 0 && copyEnd[ss] == nblock-1),
944
1007 )
945
946
for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
947
948
/*--
949
Step 3:
950
The [ss] big bucket is now done. Record this fact,
951
and update the quadrant descriptors. Remember to
952
update quadrants in the overshoot area too, if
953
necessary. The "if (i < 255)" test merely skips
954
this updating for the last bucket processed, since
955
updating for the last bucket is pointless.
956
957
The quadrant array provides a way to incrementally
958
cache sort orderings, as they appear, so as to
959
make subsequent comparisons in fullGtU() complete
960
faster. For repetitive blocks this makes a big
961
difference (but not big enough to be able to avoid
962
the fallback sorting mechanism, exponential radix sort).
963
964
The precise meaning is: at all times:
965
966
for 0 <= i < nblock and 0 <= j <= nblock
967
968
if block[i] != block[j],
969
970
then the relative values of quadrant[i] and
971
quadrant[j] are meaningless.
972
973
else {
974
if quadrant[i] < quadrant[j]
975
then the string starting at i lexicographically
976
precedes the string starting at j
977
978
else if quadrant[i] > quadrant[j]
979
then the string starting at j lexicographically
980
precedes the string starting at i
981
982
else
983
the relative ordering of the strings starting
984
at i and j has not yet been determined.
985
}
986
--*/
987
bigDone[ss] = True;
988
989
if (i < 255) {
990
Int32 bbStart = ftab[ss << 8] & CLEARMASK;
991
Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
992
Int32 shifts = 0;
993
994
while ((bbSize >> shifts) > 65534) shifts++;
995
996
for (j = bbSize-1; j >= 0; j--) {
997
Int32 a2update = ptr[bbStart + j];
998
UInt16 qVal = (UInt16)(j >> shifts);
999
quadrant[a2update] = qVal;
1000
if (a2update < BZ_N_OVERSHOOT)
1001
quadrant[a2update + nblock] = qVal;
1002
}
1003
AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
1004
}
1005
1006
}
1007
1008
if (verb >= 4)
1009
VPrintf3 ( " %d pointers, %d sorted, %d scanned\n",
1010
nblock, numQSorted, nblock - numQSorted );
1011
}
1012
1013
#undef BIGFREQ
1014
#undef SETMASK
1015
#undef CLEARMASK
1016
1017
1018
/*---------------------------------------------*/
1019
/* Pre:
1020
nblock > 0
1021
arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
1022
((UChar*)arr2) [0 .. nblock-1] holds block
1023
arr1 exists for [0 .. nblock-1]
1024
1025
Post:
1026
((UChar*)arr2) [0 .. nblock-1] holds block
1027
All other areas of block destroyed
1028
ftab [ 0 .. 65536 ] destroyed
1029
arr1 [0 .. nblock-1] holds sorted order
1030
*/
1031
void BZ2_blockSort ( EState* s )
1032
{
1033
UInt32* ptr = s->ptr;
1034
UChar* block = s->block;
1035
UInt32* ftab = s->ftab;
1036
Int32 nblock = s->nblock;
1037
Int32 verb = s->verbosity;
1038
Int32 wfact = s->workFactor;
1039
UInt16* quadrant;
1040
Int32 budget;
1041
Int32 budgetInit;
1042
Int32 i;
1043
1044
if (nblock < 10000) {
1045
fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
1046
} else {
1047
/* Calculate the location for quadrant, remembering to get
1048
the alignment right. Assumes that &(block[0]) is at least
1049
2-byte aligned -- this should be ok since block is really
1050
the first section of arr2.
1051
*/
1052
i = nblock+BZ_N_OVERSHOOT;
1053
if (i & 1) i++;
1054
quadrant = (UInt16*)(&(block[i]));
1055
1056
/* (wfact-1) / 3 puts the default-factor-30
1057
transition point at very roughly the same place as
1058
with v0.1 and v0.9.0.
1059
Not that it particularly matters any more, since the
1060
resulting compressed stream is now the same regardless
1061
of whether or not we use the main sort or fallback sort.
1062
*/
1063
if (wfact < 1 ) wfact = 1;
1064
if (wfact > 100) wfact = 100;
1065
budgetInit = nblock * ((wfact-1) / 3);
1066
budget = budgetInit;
1067
1068
mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
1069
if (verb >= 3)
1070
VPrintf3 ( " %d work, %d block, ratio %5.2f\n",
1071
budgetInit - budget,
1072
nblock,
1073
(float)(budgetInit - budget) /
1074
(float)(nblock==0 ? 1 : nblock) );
1075
if (budget < 0) {
1076
if (verb >= 2)
1077
VPrintf0 ( " too repetitive; using fallback"
1078
" sorting algorithm\n" );
1079
fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
1080
}
1081
}
1082
1083
s->origPtr = -1;
1084
for (i = 0; i < s->nblock; i++)
1085
if (ptr[i] == 0)
1086
{ s->origPtr = i; break; };
1087
1088
AssertH( s->origPtr != -1, 1003 );
1089
}
1090
1091
1092
/*-------------------------------------------------------------*/
1093
/*--- end blocksort.c ---*/
1094
/*-------------------------------------------------------------*/
1095
1096