Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
att
GitHub Repository: att/ast
Path: blob/master/src/lib/libbz/compress.c
1808 views
1
#pragma prototyped
2
3
/*-------------------------------------------------------------*/
4
/*--- Compression machinery (not incl block sorting) ---*/
5
/*--- compress.c ---*/
6
/*-------------------------------------------------------------*/
7
8
/*--
9
This file is a part of bzip2 and/or libbzip2, a program and
10
library for lossless, block-sorting data compression.
11
12
Copyright (C) 1996-1998 Julian R Seward. All rights reserved.
13
14
Redistribution and use in source and binary forms, with or without
15
modification, are permitted provided that the following conditions
16
are met:
17
18
1. Redistributions of source code must retain the above copyright
19
notice, this list of conditions and the following disclaimer.
20
21
2. The origin of this software must not be misrepresented; you must
22
not claim that you wrote the original software. If you use this
23
software in a product, an acknowledgment in the product
24
documentation would be appreciated but is not required.
25
26
3. Altered source versions must be plainly marked as such, and must
27
not be misrepresented as being the original software.
28
29
4. The name of the author may not be used to endorse or promote
30
products derived from this software without specific prior written
31
permission.
32
33
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
34
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
35
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
37
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
38
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
39
GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
40
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
41
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
42
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
43
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
44
45
Julian Seward, Guildford, Surrey, UK.
46
[email protected]
47
bzip2/libbzip2 version 0.9.0 of 28 June 1998
48
49
This program is based on (at least) the work of:
50
Mike Burrows
51
David Wheeler
52
Peter Fenwick
53
Alistair Moffat
54
Radford Neal
55
Ian H. Witten
56
Robert Sedgewick
57
Jon L. Bentley
58
59
For more information on these sources, see the manual.
60
--*/
61
62
/*--
63
CHANGES
64
~~~~~~~
65
0.9.0 -- original version.
66
67
0.9.0a/b -- no changes in this file.
68
69
0.9.0c
70
* changed setting of nGroups in sendMTFValues() so as to
71
do a bit better on small files
72
--*/
73
74
#include "bzhdr.h"
75
76
77
/*---------------------------------------------------*/
78
/*--- Bit stream I/O ---*/
79
/*---------------------------------------------------*/
80
81
/*---------------------------------------------------*/
82
void bsInitWrite ( EState* s )
83
{
84
s->bsLive = 0;
85
s->bsBuff = 0;
86
}
87
88
89
/*---------------------------------------------------*/
90
static
91
void bsFinishWrite ( EState* s )
92
{
93
while (s->bsLive > 0) {
94
((UChar*)(s->quadrant))[s->numZ] = (UChar)(s->bsBuff >> 24);
95
s->numZ++;
96
s->bsBuff <<= 8;
97
s->bsLive -= 8;
98
}
99
}
100
101
102
/*---------------------------------------------------*/
103
#define bsNEEDW(nz) \
104
{ \
105
while (s->bsLive >= 8) { \
106
((UChar*)(s->quadrant))[s->numZ] \
107
= (UChar)(s->bsBuff >> 24); \
108
s->numZ++; \
109
s->bsBuff <<= 8; \
110
s->bsLive -= 8; \
111
} \
112
}
113
114
115
/*---------------------------------------------------*/
116
static
117
void bsW ( EState* s, Int32 n, UInt32 v )
118
{
119
bsNEEDW ( n );
120
s->bsBuff |= (v << (32 - s->bsLive - n));
121
s->bsLive += n;
122
}
123
124
125
/*---------------------------------------------------*/
126
static
127
void bsPutUInt32 ( EState* s, UInt32 u )
128
{
129
bsW ( s, 8, (u >> 24) & 0xffL );
130
bsW ( s, 8, (u >> 16) & 0xffL );
131
bsW ( s, 8, (u >> 8) & 0xffL );
132
bsW ( s, 8, u & 0xffL );
133
}
134
135
136
/*---------------------------------------------------*/
137
static
138
void bsPutUChar ( EState* s, UChar c )
139
{
140
bsW( s, 8, (UInt32)c );
141
}
142
143
144
/*---------------------------------------------------*/
145
/*--- The back end proper ---*/
146
/*---------------------------------------------------*/
147
148
/*---------------------------------------------------*/
149
static
150
void makeMaps_e ( EState* s )
151
{
152
Int32 i;
153
s->nInUse = 0;
154
for (i = 0; i < 256; i++)
155
if (s->inUse[i]) {
156
s->unseqToSeq[i] = s->nInUse;
157
s->nInUse++;
158
}
159
}
160
161
162
/*---------------------------------------------------*/
163
static
164
void generateMTFValues ( EState* s )
165
{
166
UChar yy[256];
167
Int32 i, j;
168
UChar tmp;
169
UChar tmp2;
170
Int32 zPend;
171
Int32 wr;
172
Int32 EOB;
173
174
makeMaps_e ( s );
175
EOB = s->nInUse+1;
176
177
for (i = 0; i <= EOB; i++) s->mtfFreq[i] = 0;
178
179
wr = 0;
180
zPend = 0;
181
for (i = 0; i < s->nInUse; i++) yy[i] = (UChar) i;
182
183
for (i = 0; i < s->nblock; i++) {
184
UChar ll_i;
185
186
AssertD ( wr <= i, "generateMTFValues(1)" );
187
j = s->zptr[i]-1; if (j < 0) j += s->nblock;
188
ll_i = s->unseqToSeq[s->block[j]];
189
AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" );
190
191
j = 0;
192
tmp = yy[j];
193
while ( ll_i != tmp ) {
194
j++;
195
tmp2 = tmp;
196
tmp = yy[j];
197
yy[j] = tmp2;
198
};
199
yy[0] = tmp;
200
201
if (j == 0) {
202
zPend++;
203
} else {
204
if (zPend > 0) {
205
zPend--;
206
while (True) {
207
switch (zPend % 2) {
208
case 0: s->szptr[wr] = BZ_RUNA; wr++; s->mtfFreq[BZ_RUNA]++; break;
209
case 1: s->szptr[wr] = BZ_RUNB; wr++; s->mtfFreq[BZ_RUNB]++; break;
210
};
211
if (zPend < 2) break;
212
zPend = (zPend - 2) / 2;
213
};
214
zPend = 0;
215
}
216
s->szptr[wr] = j+1; wr++; s->mtfFreq[j+1]++;
217
}
218
}
219
220
if (zPend > 0) {
221
zPend--;
222
while (True) {
223
switch (zPend % 2) {
224
case 0: s->szptr[wr] = BZ_RUNA; wr++; s->mtfFreq[BZ_RUNA]++; break;
225
case 1: s->szptr[wr] = BZ_RUNB; wr++; s->mtfFreq[BZ_RUNB]++; break;
226
};
227
if (zPend < 2) break;
228
zPend = (zPend - 2) / 2;
229
};
230
}
231
232
s->szptr[wr] = EOB; wr++; s->mtfFreq[EOB]++;
233
234
s->nMTF = wr;
235
}
236
237
238
/*---------------------------------------------------*/
239
#define BZ_LESSER_ICOST 0
240
#define BZ_GREATER_ICOST 15
241
242
static
243
void sendMTFValues ( EState* s )
244
{
245
Int32 v, t, i, j, gs, ge, totc, bt, bc, iter;
246
Int32 nSelectors, alphaSize, minLen, maxLen, selCtr;
247
Int32 nGroups, nBytes;
248
249
/*--
250
UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
251
is a global since the decoder also needs it.
252
253
Int32 code[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
254
Int32 rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
255
are also globals only used in this proc.
256
Made global to keep stack frame size small.
257
--*/
258
259
260
UInt16 cost[BZ_N_GROUPS];
261
Int32 fave[BZ_N_GROUPS];
262
263
if (s->verbosity >= 3)
264
VPrintf3( " %d in block, %d after MTF & 1-2 coding, "
265
"%d+2 syms in use\n",
266
s->nblock, s->nMTF, s->nInUse );
267
268
alphaSize = s->nInUse+2;
269
for (t = 0; t < BZ_N_GROUPS; t++)
270
for (v = 0; v < alphaSize; v++)
271
s->len[t][v] = BZ_GREATER_ICOST;
272
273
/*--- Decide how many coding tables to use ---*/
274
AssertH ( s->nMTF > 0, 3001 );
275
if (s->nMTF < 200) nGroups = 2; else
276
if (s->nMTF < 600) nGroups = 3; else
277
if (s->nMTF < 1200) nGroups = 4; else
278
if (s->nMTF < 2400) nGroups = 5; else
279
nGroups = 6;
280
281
/*--- Generate an initial set of coding tables ---*/
282
{
283
Int32 nPart, remF, tFreq, aFreq;
284
285
nPart = nGroups;
286
remF = s->nMTF;
287
gs = 0;
288
while (nPart > 0) {
289
tFreq = remF / nPart;
290
ge = gs-1;
291
aFreq = 0;
292
while (aFreq < tFreq && ge < alphaSize-1) {
293
ge++;
294
aFreq += s->mtfFreq[ge];
295
}
296
297
if (ge > gs
298
&& nPart != nGroups && nPart != 1
299
&& ((nGroups-nPart) % 2 == 1)) {
300
aFreq -= s->mtfFreq[ge];
301
ge--;
302
}
303
304
if (s->verbosity >= 3)
305
VPrintf5( " initial group %d, [%d .. %d], "
306
"has %d syms (%4.1f%%)\n",
307
nPart, gs, ge, aFreq,
308
(100.0 * (float)aFreq) / (float)(s->nMTF) );
309
310
for (v = 0; v < alphaSize; v++)
311
if (v >= gs && v <= ge)
312
s->len[nPart-1][v] = BZ_LESSER_ICOST; else
313
s->len[nPart-1][v] = BZ_GREATER_ICOST;
314
315
nPart--;
316
gs = ge+1;
317
remF -= aFreq;
318
}
319
}
320
321
/*---
322
Iterate up to BZ_N_ITERS times to improve the tables.
323
---*/
324
for (iter = 0; iter < BZ_N_ITERS; iter++) {
325
326
for (t = 0; t < nGroups; t++) fave[t] = 0;
327
328
for (t = 0; t < nGroups; t++)
329
for (v = 0; v < alphaSize; v++)
330
s->rfreq[t][v] = 0;
331
332
nSelectors = 0;
333
totc = 0;
334
gs = 0;
335
while (True) {
336
337
/*--- Set group start & end marks. --*/
338
if (gs >= s->nMTF) break;
339
ge = gs + BZ_G_SIZE - 1;
340
if (ge >= s->nMTF) ge = s->nMTF-1;
341
342
/*--
343
Calculate the cost of this group as coded
344
by each of the coding tables.
345
--*/
346
for (t = 0; t < nGroups; t++) cost[t] = 0;
347
348
if (nGroups == 6) {
349
register UInt16 cost0, cost1, cost2, cost3, cost4, cost5;
350
cost0 = cost1 = cost2 = cost3 = cost4 = cost5 = 0;
351
for (i = gs; i <= ge; i++) {
352
UInt16 icv = s->szptr[i];
353
cost0 += s->len[0][icv];
354
cost1 += s->len[1][icv];
355
cost2 += s->len[2][icv];
356
cost3 += s->len[3][icv];
357
cost4 += s->len[4][icv];
358
cost5 += s->len[5][icv];
359
}
360
cost[0] = cost0; cost[1] = cost1; cost[2] = cost2;
361
cost[3] = cost3; cost[4] = cost4; cost[5] = cost5;
362
} else {
363
for (i = gs; i <= ge; i++) {
364
UInt16 icv = s->szptr[i];
365
for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv];
366
}
367
}
368
369
/*--
370
Find the coding table which is best for this group,
371
and record its identity in the selector table.
372
--*/
373
bc = 999999999; bt = -1;
374
for (t = 0; t < nGroups; t++)
375
if (cost[t] < bc) { bc = cost[t]; bt = t; };
376
totc += bc;
377
fave[bt]++;
378
s->selector[nSelectors] = bt;
379
nSelectors++;
380
381
/*--
382
Increment the symbol frequencies for the selected table.
383
--*/
384
for (i = gs; i <= ge; i++)
385
s->rfreq[bt][ s->szptr[i] ]++;
386
387
gs = ge+1;
388
}
389
if (s->verbosity >= 3) {
390
VPrintf2 ( " pass %d: size is %d, grp uses are ",
391
iter+1, totc/8 );
392
for (t = 0; t < nGroups; t++)
393
VPrintf1 ( "%d ", fave[t] );
394
VPrintf0 ( "\n" );
395
}
396
397
/*--
398
Recompute the tables based on the accumulated frequencies.
399
--*/
400
for (t = 0; t < nGroups; t++)
401
hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]),
402
alphaSize, 20 );
403
}
404
405
406
AssertH( nGroups < 8, 3002 );
407
AssertH( nSelectors < 32768 &&
408
nSelectors <= (2 + (900000 / BZ_G_SIZE)),
409
3003 );
410
411
412
/*--- Compute MTF values for the selectors. ---*/
413
{
414
UChar pos[BZ_N_GROUPS], ll_i, tmp2, tmp;
415
for (i = 0; i < nGroups; i++) pos[i] = i;
416
for (i = 0; i < nSelectors; i++) {
417
ll_i = s->selector[i];
418
j = 0;
419
tmp = pos[j];
420
while ( ll_i != tmp ) {
421
j++;
422
tmp2 = tmp;
423
tmp = pos[j];
424
pos[j] = tmp2;
425
};
426
pos[0] = tmp;
427
s->selectorMtf[i] = j;
428
}
429
};
430
431
/*--- Assign actual codes for the tables. --*/
432
for (t = 0; t < nGroups; t++) {
433
minLen = 32;
434
maxLen = 0;
435
for (i = 0; i < alphaSize; i++) {
436
if (s->len[t][i] > maxLen) maxLen = s->len[t][i];
437
if (s->len[t][i] < minLen) minLen = s->len[t][i];
438
}
439
AssertH ( !(maxLen > 20), 3004 );
440
AssertH ( !(minLen < 1), 3005 );
441
hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]),
442
minLen, maxLen, alphaSize );
443
}
444
445
/*--- Transmit the mapping table. ---*/
446
{
447
Bool inUse16[16];
448
for (i = 0; i < 16; i++) {
449
inUse16[i] = False;
450
for (j = 0; j < 16; j++)
451
if (s->inUse[i * 16 + j]) inUse16[i] = True;
452
}
453
454
nBytes = s->numZ;
455
for (i = 0; i < 16; i++)
456
if (inUse16[i]) bsW(s,1,1); else bsW(s,1,0);
457
458
for (i = 0; i < 16; i++)
459
if (inUse16[i])
460
for (j = 0; j < 16; j++) {
461
if (s->inUse[i * 16 + j]) bsW(s,1,1); else bsW(s,1,0);
462
}
463
464
if (s->verbosity >= 3)
465
VPrintf1( " bytes: mapping %d, ", s->numZ-nBytes );
466
}
467
468
/*--- Now the selectors. ---*/
469
nBytes = s->numZ;
470
bsW ( s, 3, nGroups );
471
bsW ( s, 15, nSelectors );
472
for (i = 0; i < nSelectors; i++) {
473
for (j = 0; j < s->selectorMtf[i]; j++) bsW(s,1,1);
474
bsW(s,1,0);
475
}
476
if (s->verbosity >= 3)
477
VPrintf1( "selectors %d, ", s->numZ-nBytes );
478
479
/*--- Now the coding tables. ---*/
480
nBytes = s->numZ;
481
482
for (t = 0; t < nGroups; t++) {
483
Int32 curr = s->len[t][0];
484
bsW ( s, 5, curr );
485
for (i = 0; i < alphaSize; i++) {
486
while (curr < s->len[t][i]) { bsW(s,2,2); curr++; /* 10 */ };
487
while (curr > s->len[t][i]) { bsW(s,2,3); curr--; /* 11 */ };
488
bsW ( s, 1, 0 );
489
}
490
}
491
492
if (s->verbosity >= 3)
493
VPrintf1 ( "code lengths %d, ", s->numZ-nBytes );
494
495
/*--- And finally, the block data proper ---*/
496
nBytes = s->numZ;
497
selCtr = 0;
498
gs = 0;
499
while (True) {
500
if (gs >= s->nMTF) break;
501
ge = gs + BZ_G_SIZE - 1;
502
if (ge >= s->nMTF) ge = s->nMTF-1;
503
for (i = gs; i <= ge; i++) {
504
AssertH ( s->selector[selCtr] < nGroups, 3006 );
505
bsW ( s,
506
s->len [s->selector[selCtr]] [s->szptr[i]],
507
s->code [s->selector[selCtr]] [s->szptr[i]] );
508
}
509
510
gs = ge+1;
511
selCtr++;
512
}
513
AssertH( selCtr == nSelectors, 3007 );
514
515
if (s->verbosity >= 3)
516
VPrintf1( "codes %d\n", s->numZ-nBytes );
517
}
518
519
520
/*---------------------------------------------------*/
521
void compressBlock ( EState* s, Bool is_last_block )
522
{
523
if (s->nblock > 0) {
524
525
BZ_FINALISE_CRC ( s->blockCRC );
526
s->combinedCRC = (s->combinedCRC << 1) | (s->combinedCRC >> 31);
527
s->combinedCRC ^= s->blockCRC;
528
if (s->blockNo > 1) s->numZ = 0;
529
530
if (s->verbosity >= 2)
531
VPrintf4( " block %d: crc = 0x%8x, "
532
"combined CRC = 0x%8x, size = %d\n",
533
s->blockNo, s->blockCRC, s->combinedCRC, s->nblock );
534
535
blockSort ( s );
536
}
537
538
/*-- If this is the first block, create the stream header. --*/
539
if (s->blockNo == 1) {
540
bsInitWrite ( s );
541
bsPutUChar ( s, 'B' );
542
bsPutUChar ( s, 'Z' );
543
bsPutUChar ( s, 'h' );
544
bsPutUChar ( s, '0' + s->blockSize100k );
545
}
546
547
if (s->nblock > 0) {
548
549
bsPutUChar ( s, 0x31 ); bsPutUChar ( s, 0x41 );
550
bsPutUChar ( s, 0x59 ); bsPutUChar ( s, 0x26 );
551
bsPutUChar ( s, 0x53 ); bsPutUChar ( s, 0x59 );
552
553
/*-- Now the block's CRC, so it is in a known place. --*/
554
bsPutUInt32 ( s, s->blockCRC );
555
556
/*-- Now a single bit indicating randomisation. --*/
557
if (s->blockRandomised) {
558
bsW(s,1,1); s->nBlocksRandomised++;
559
} else
560
bsW(s,1,0);
561
562
bsW ( s, 24, s->origPtr );
563
generateMTFValues ( s );
564
sendMTFValues ( s );
565
}
566
567
568
/*-- If this is the last block, add the stream trailer. --*/
569
if (is_last_block) {
570
571
if (s->verbosity >= 2 && s->nBlocksRandomised > 0)
572
VPrintf2 ( " %d block%s needed randomisation\n",
573
s->nBlocksRandomised,
574
s->nBlocksRandomised == 1 ? "" : "s" );
575
576
bsPutUChar ( s, 0x17 ); bsPutUChar ( s, 0x72 );
577
bsPutUChar ( s, 0x45 ); bsPutUChar ( s, 0x38 );
578
bsPutUChar ( s, 0x50 ); bsPutUChar ( s, 0x90 );
579
bsPutUInt32 ( s, s->combinedCRC );
580
if (s->verbosity >= 2)
581
VPrintf1( " final combined CRC = 0x%x\n ", s->combinedCRC );
582
bsFinishWrite ( s );
583
}
584
}
585
586
587
/*-------------------------------------------------------------*/
588
/*--- end compress.c ---*/
589
/*-------------------------------------------------------------*/
590
591