Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Kitware
GitHub Repository: Kitware/CMake
Path: blob/master/Utilities/cmbzip2/bzip2recover.c
3150 views
1
/*-----------------------------------------------------------*/
2
/*--- Block recoverer program for bzip2 ---*/
3
/*--- bzip2recover.c ---*/
4
/*-----------------------------------------------------------*/
5
6
/* ------------------------------------------------------------------
7
This file is part of bzip2/libbzip2, a program and library for
8
lossless, block-sorting data compression.
9
10
bzip2/libbzip2 version 1.0.8 of 13 July 2019
11
Copyright (C) 1996-2019 Julian Seward <[email protected]>
12
13
Please read the WARNING, DISCLAIMER and PATENTS sections in the
14
README file.
15
16
This program is released under the terms of the license contained
17
in the file LICENSE.
18
------------------------------------------------------------------ */
19
20
/* This program is a complete hack and should be rewritten properly.
21
It isn't very complicated. */
22
23
#include <stdio.h>
24
#include <errno.h>
25
#include <stdlib.h>
26
#include <string.h>
27
28
29
/* This program records bit locations in the file to be recovered.
30
That means that if 64-bit ints are not supported, we will not
31
be able to recover .bz2 files over 512MB (2^32 bits) long.
32
On GNU supported platforms, we take advantage of the 64-bit
33
int support to circumvent this problem. Ditto MSVC.
34
35
This change occurred in version 1.0.2; all prior versions have
36
the 512MB limitation.
37
*/
38
#ifdef __GNUC__
39
typedef unsigned long long int MaybeUInt64;
40
# define MaybeUInt64_FMT "%Lu"
41
#else
42
#ifdef _MSC_VER
43
typedef unsigned __int64 MaybeUInt64;
44
# define MaybeUInt64_FMT "%I64u"
45
#else
46
typedef unsigned int MaybeUInt64;
47
# define MaybeUInt64_FMT "%u"
48
#endif
49
#endif
50
51
typedef unsigned int UInt32;
52
typedef int Int32;
53
typedef unsigned char UChar;
54
typedef char Char;
55
typedef unsigned char Bool;
56
#define True ((Bool)1)
57
#define False ((Bool)0)
58
59
60
#define BZ_MAX_FILENAME 2000
61
62
Char inFileName[BZ_MAX_FILENAME];
63
Char outFileName[BZ_MAX_FILENAME];
64
Char progName[BZ_MAX_FILENAME];
65
66
MaybeUInt64 bytesOut = 0;
67
MaybeUInt64 bytesIn = 0;
68
69
70
/*---------------------------------------------------*/
71
/*--- Header bytes ---*/
72
/*---------------------------------------------------*/
73
74
#define BZ_HDR_B 0x42 /* 'B' */
75
#define BZ_HDR_Z 0x5a /* 'Z' */
76
#define BZ_HDR_h 0x68 /* 'h' */
77
#define BZ_HDR_0 0x30 /* '0' */
78
79
80
/*---------------------------------------------------*/
81
/*--- I/O errors ---*/
82
/*---------------------------------------------------*/
83
84
/*---------------------------------------------*/
85
static void readError ( void )
86
{
87
fprintf ( stderr,
88
"%s: I/O error reading `%s', possible reason follows.\n",
89
progName, inFileName );
90
perror ( progName );
91
fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
92
progName );
93
exit ( 1 );
94
}
95
96
97
/*---------------------------------------------*/
98
static void writeError ( void )
99
{
100
fprintf ( stderr,
101
"%s: I/O error reading `%s', possible reason follows.\n",
102
progName, inFileName );
103
perror ( progName );
104
fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
105
progName );
106
exit ( 1 );
107
}
108
109
110
/*---------------------------------------------*/
111
static void mallocFail ( Int32 n )
112
{
113
fprintf ( stderr,
114
"%s: malloc failed on request for %d bytes.\n",
115
progName, n );
116
fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
117
progName );
118
exit ( 1 );
119
}
120
121
122
/*---------------------------------------------*/
123
static void tooManyBlocks ( Int32 max_handled_blocks )
124
{
125
fprintf ( stderr,
126
"%s: `%s' appears to contain more than %d blocks\n",
127
progName, inFileName, max_handled_blocks );
128
fprintf ( stderr,
129
"%s: and cannot be handled. To fix, increase\n",
130
progName );
131
fprintf ( stderr,
132
"%s: BZ_MAX_HANDLED_BLOCKS in bzip2recover.c, and recompile.\n",
133
progName );
134
exit ( 1 );
135
}
136
137
138
139
/*---------------------------------------------------*/
140
/*--- Bit stream I/O ---*/
141
/*---------------------------------------------------*/
142
143
typedef
144
struct {
145
FILE* handle;
146
Int32 buffer;
147
Int32 buffLive;
148
Char mode;
149
}
150
BitStream;
151
152
153
/*---------------------------------------------*/
154
static BitStream* bsOpenReadStream ( FILE* stream )
155
{
156
BitStream *bs = malloc ( sizeof(BitStream) );
157
if (bs == NULL) mallocFail ( sizeof(BitStream) );
158
bs->handle = stream;
159
bs->buffer = 0;
160
bs->buffLive = 0;
161
bs->mode = 'r';
162
return bs;
163
}
164
165
166
/*---------------------------------------------*/
167
static BitStream* bsOpenWriteStream ( FILE* stream )
168
{
169
BitStream *bs = malloc ( sizeof(BitStream) );
170
if (bs == NULL) mallocFail ( sizeof(BitStream) );
171
bs->handle = stream;
172
bs->buffer = 0;
173
bs->buffLive = 0;
174
bs->mode = 'w';
175
return bs;
176
}
177
178
179
/*---------------------------------------------*/
180
static void bsPutBit ( BitStream* bs, Int32 bit )
181
{
182
if (bs->buffLive == 8) {
183
Int32 retVal = putc ( (UChar) bs->buffer, bs->handle );
184
if (retVal == EOF) writeError();
185
bytesOut++;
186
bs->buffLive = 1;
187
bs->buffer = bit & 0x1;
188
} else {
189
bs->buffer = ( (bs->buffer << 1) | (bit & 0x1) );
190
bs->buffLive++;
191
};
192
}
193
194
195
/*---------------------------------------------*/
196
/*--
197
Returns 0 or 1, or 2 to indicate EOF.
198
--*/
199
static Int32 bsGetBit ( BitStream* bs )
200
{
201
if (bs->buffLive > 0) {
202
bs->buffLive --;
203
return ( ((bs->buffer) >> (bs->buffLive)) & 0x1 );
204
} else {
205
Int32 retVal = getc ( bs->handle );
206
if ( retVal == EOF ) {
207
if (errno != 0) readError();
208
return 2;
209
}
210
bs->buffLive = 7;
211
bs->buffer = retVal;
212
return ( ((bs->buffer) >> 7) & 0x1 );
213
}
214
}
215
216
217
/*---------------------------------------------*/
218
static void bsClose ( BitStream* bs )
219
{
220
Int32 retVal;
221
222
if ( bs->mode == 'w' ) {
223
while ( bs->buffLive < 8 ) {
224
bs->buffLive++;
225
bs->buffer <<= 1;
226
};
227
retVal = putc ( (UChar) (bs->buffer), bs->handle );
228
if (retVal == EOF) writeError();
229
bytesOut++;
230
retVal = fflush ( bs->handle );
231
if (retVal == EOF) writeError();
232
}
233
retVal = fclose ( bs->handle );
234
if (retVal == EOF) {
235
if (bs->mode == 'w') writeError(); else readError();
236
}
237
free ( bs );
238
}
239
240
241
/*---------------------------------------------*/
242
static void bsPutUChar ( BitStream* bs, UChar c )
243
{
244
Int32 i;
245
for (i = 7; i >= 0; i--)
246
bsPutBit ( bs, (((UInt32) c) >> i) & 0x1 );
247
}
248
249
250
/*---------------------------------------------*/
251
static void bsPutUInt32 ( BitStream* bs, UInt32 c )
252
{
253
Int32 i;
254
255
for (i = 31; i >= 0; i--)
256
bsPutBit ( bs, (c >> i) & 0x1 );
257
}
258
259
260
/*---------------------------------------------*/
261
static Bool endsInBz2 ( Char* name )
262
{
263
Int32 n = strlen ( name );
264
if (n <= 4) return False;
265
return
266
(name[n-4] == '.' &&
267
name[n-3] == 'b' &&
268
name[n-2] == 'z' &&
269
name[n-1] == '2');
270
}
271
272
273
/*---------------------------------------------------*/
274
/*--- ---*/
275
/*---------------------------------------------------*/
276
277
/* This logic isn't really right when it comes to Cygwin. */
278
#ifdef _WIN32
279
# define BZ_SPLIT_SYM '\\' /* path splitter on Windows platform */
280
#else
281
# define BZ_SPLIT_SYM '/' /* path splitter on Unix platform */
282
#endif
283
284
#define BLOCK_HEADER_HI 0x00003141UL
285
#define BLOCK_HEADER_LO 0x59265359UL
286
287
#define BLOCK_ENDMARK_HI 0x00001772UL
288
#define BLOCK_ENDMARK_LO 0x45385090UL
289
290
/* Increase if necessary. However, a .bz2 file with > 50000 blocks
291
would have an uncompressed size of at least 40GB, so the chances
292
are low you'll need to up this.
293
*/
294
#define BZ_MAX_HANDLED_BLOCKS 50000
295
296
MaybeUInt64 bStart [BZ_MAX_HANDLED_BLOCKS];
297
MaybeUInt64 bEnd [BZ_MAX_HANDLED_BLOCKS];
298
MaybeUInt64 rbStart[BZ_MAX_HANDLED_BLOCKS];
299
MaybeUInt64 rbEnd [BZ_MAX_HANDLED_BLOCKS];
300
301
Int32 main ( Int32 argc, Char** argv )
302
{
303
FILE* inFile;
304
FILE* outFile;
305
BitStream* bsIn, *bsWr;
306
Int32 b, wrBlock, currBlock, rbCtr;
307
MaybeUInt64 bitsRead;
308
309
UInt32 buffHi, buffLo, blockCRC;
310
Char* p;
311
312
strncpy ( progName, argv[0], BZ_MAX_FILENAME-1);
313
progName[BZ_MAX_FILENAME-1]='\0';
314
inFileName[0] = outFileName[0] = 0;
315
316
fprintf ( stderr,
317
"bzip2recover 1.0.8: extracts blocks from damaged .bz2 files.\n" );
318
319
if (argc != 2) {
320
fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n",
321
progName, progName );
322
switch (sizeof(MaybeUInt64)) {
323
case 8:
324
fprintf(stderr,
325
"\trestrictions on size of recovered file: None\n");
326
break;
327
case 4:
328
fprintf(stderr,
329
"\trestrictions on size of recovered file: 512 MB\n");
330
fprintf(stderr,
331
"\tto circumvent, recompile with MaybeUInt64 as an\n"
332
"\tunsigned 64-bit int.\n");
333
break;
334
default:
335
fprintf(stderr,
336
"\tsizeof(MaybeUInt64) is not 4 or 8 -- "
337
"configuration error.\n");
338
break;
339
}
340
exit(1);
341
}
342
343
if (strlen(argv[1]) >= BZ_MAX_FILENAME-20) {
344
fprintf ( stderr,
345
"%s: supplied filename is suspiciously (>= %d chars) long. Bye!\n",
346
progName, (int)strlen(argv[1]) );
347
exit(1);
348
}
349
350
strcpy ( inFileName, argv[1] );
351
352
inFile = fopen ( inFileName, "rb" );
353
if (inFile == NULL) {
354
fprintf ( stderr, "%s: can't read `%s'\n", progName, inFileName );
355
exit(1);
356
}
357
358
bsIn = bsOpenReadStream ( inFile );
359
fprintf ( stderr, "%s: searching for block boundaries ...\n", progName );
360
361
bitsRead = 0;
362
buffHi = buffLo = 0;
363
currBlock = 0;
364
bStart[currBlock] = 0;
365
366
rbCtr = 0;
367
368
while (True) {
369
b = bsGetBit ( bsIn );
370
bitsRead++;
371
if (b == 2) {
372
if (bitsRead >= bStart[currBlock] &&
373
(bitsRead - bStart[currBlock]) >= 40) {
374
bEnd[currBlock] = bitsRead-1;
375
if (currBlock > 0)
376
fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT
377
" to " MaybeUInt64_FMT " (incomplete)\n",
378
currBlock, bStart[currBlock], bEnd[currBlock] );
379
} else
380
currBlock--;
381
break;
382
}
383
buffHi = (buffHi << 1) | (buffLo >> 31);
384
buffLo = (buffLo << 1) | (b & 1);
385
if ( ( (buffHi & 0x0000ffff) == BLOCK_HEADER_HI
386
&& buffLo == BLOCK_HEADER_LO)
387
||
388
( (buffHi & 0x0000ffff) == BLOCK_ENDMARK_HI
389
&& buffLo == BLOCK_ENDMARK_LO)
390
) {
391
if (bitsRead > 49) {
392
bEnd[currBlock] = bitsRead-49;
393
} else {
394
bEnd[currBlock] = 0;
395
}
396
if (currBlock > 0 &&
397
(bEnd[currBlock] - bStart[currBlock]) >= 130) {
398
fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT
399
" to " MaybeUInt64_FMT "\n",
400
rbCtr+1, bStart[currBlock], bEnd[currBlock] );
401
rbStart[rbCtr] = bStart[currBlock];
402
rbEnd[rbCtr] = bEnd[currBlock];
403
rbCtr++;
404
}
405
if (currBlock >= BZ_MAX_HANDLED_BLOCKS)
406
tooManyBlocks(BZ_MAX_HANDLED_BLOCKS);
407
currBlock++;
408
409
bStart[currBlock] = bitsRead;
410
}
411
}
412
413
bsClose ( bsIn );
414
415
/*-- identified blocks run from 1 to rbCtr inclusive. --*/
416
417
if (rbCtr < 1) {
418
fprintf ( stderr,
419
"%s: sorry, I couldn't find any block boundaries.\n",
420
progName );
421
exit(1);
422
};
423
424
fprintf ( stderr, "%s: splitting into blocks\n", progName );
425
426
inFile = fopen ( inFileName, "rb" );
427
if (inFile == NULL) {
428
fprintf ( stderr, "%s: can't open `%s'\n", progName, inFileName );
429
exit(1);
430
}
431
bsIn = bsOpenReadStream ( inFile );
432
433
/*-- placate gcc's dataflow analyser --*/
434
blockCRC = 0; bsWr = 0;
435
436
bitsRead = 0;
437
outFile = NULL;
438
wrBlock = 0;
439
while (True) {
440
b = bsGetBit(bsIn);
441
if (b == 2) break;
442
buffHi = (buffHi << 1) | (buffLo >> 31);
443
buffLo = (buffLo << 1) | (b & 1);
444
if (bitsRead == 47+rbStart[wrBlock])
445
blockCRC = (buffHi << 16) | (buffLo >> 16);
446
447
if (outFile != NULL && bitsRead >= rbStart[wrBlock]
448
&& bitsRead <= rbEnd[wrBlock]) {
449
bsPutBit ( bsWr, b );
450
}
451
452
bitsRead++;
453
454
if (bitsRead == rbEnd[wrBlock]+1) {
455
if (outFile != NULL) {
456
bsPutUChar ( bsWr, 0x17 ); bsPutUChar ( bsWr, 0x72 );
457
bsPutUChar ( bsWr, 0x45 ); bsPutUChar ( bsWr, 0x38 );
458
bsPutUChar ( bsWr, 0x50 ); bsPutUChar ( bsWr, 0x90 );
459
bsPutUInt32 ( bsWr, blockCRC );
460
bsClose ( bsWr );
461
outFile = NULL;
462
}
463
if (wrBlock >= rbCtr) break;
464
wrBlock++;
465
} else
466
if (bitsRead == rbStart[wrBlock]) {
467
/* Create the output file name, correctly handling leading paths.
468
(31.10.2001 by Sergey E. Kusikov) */
469
Char* split;
470
Int32 ofs, k;
471
for (k = 0; k < BZ_MAX_FILENAME; k++)
472
outFileName[k] = 0;
473
strcpy (outFileName, inFileName);
474
split = strrchr (outFileName, BZ_SPLIT_SYM);
475
if (split == NULL) {
476
split = outFileName;
477
} else {
478
++split;
479
}
480
/* Now split points to the start of the basename. */
481
ofs = split - outFileName;
482
sprintf (split, "rec%5d", wrBlock+1);
483
for (p = split; *p != 0; p++) if (*p == ' ') *p = '0';
484
strcat (outFileName, inFileName + ofs);
485
486
if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" );
487
488
fprintf ( stderr, " writing block %d to `%s' ...\n",
489
wrBlock+1, outFileName );
490
491
outFile = fopen ( outFileName, "wb" );
492
if (outFile == NULL) {
493
fprintf ( stderr, "%s: can't write `%s'\n",
494
progName, outFileName );
495
exit(1);
496
}
497
bsWr = bsOpenWriteStream ( outFile );
498
bsPutUChar ( bsWr, BZ_HDR_B );
499
bsPutUChar ( bsWr, BZ_HDR_Z );
500
bsPutUChar ( bsWr, BZ_HDR_h );
501
bsPutUChar ( bsWr, BZ_HDR_0 + 9 );
502
bsPutUChar ( bsWr, 0x31 ); bsPutUChar ( bsWr, 0x41 );
503
bsPutUChar ( bsWr, 0x59 ); bsPutUChar ( bsWr, 0x26 );
504
bsPutUChar ( bsWr, 0x53 ); bsPutUChar ( bsWr, 0x59 );
505
}
506
}
507
508
fprintf ( stderr, "%s: finished\n", progName );
509
return 0;
510
}
511
512
513
514
/*-----------------------------------------------------------*/
515
/*--- end bzip2recover.c ---*/
516
/*-----------------------------------------------------------*/
517
518