Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/file/src/compress.c
39478 views
1
/*
2
* Copyright (c) Ian F. Darwin 1986-1995.
3
* Software written by Ian F. Darwin and others;
4
* maintained 1995-present by Christos Zoulas and others.
5
*
6
* Redistribution and use in source and binary forms, with or without
7
* modification, are permitted provided that the following conditions
8
* are met:
9
* 1. Redistributions of source code must retain the above copyright
10
* notice immediately at the beginning of the file, without modification,
11
* this list of conditions, and the following disclaimer.
12
* 2. Redistributions in binary form must reproduce the above copyright
13
* notice, this list of conditions and the following disclaimer in the
14
* documentation and/or other materials provided with the distribution.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26
* SUCH DAMAGE.
27
*/
28
/*
29
* compress routines:
30
* zmagic() - returns 0 if not recognized, uncompresses and prints
31
* information if recognized
32
* uncompress(method, old, n, newch) - uncompress old into new,
33
* using method, return sizeof new
34
*/
35
#include "file.h"
36
37
#ifndef lint
38
FILE_RCSID("@(#)$File: compress.c,v 1.158 2024/11/10 16:52:27 christos Exp $")
39
#endif
40
41
#include "magic.h"
42
#include <stdlib.h>
43
#ifdef HAVE_UNISTD_H
44
#include <unistd.h>
45
#endif
46
#ifdef HAVE_SPAWN_H
47
#include <spawn.h>
48
#endif
49
#include <stdio.h>
50
#include <string.h>
51
#include <errno.h>
52
#include <ctype.h>
53
#include <stdarg.h>
54
#include <signal.h>
55
#ifndef HAVE_SIG_T
56
typedef void (*sig_t)(int);
57
#endif /* HAVE_SIG_T */
58
#ifdef HAVE_SYS_IOCTL_H
59
#include <sys/ioctl.h>
60
#endif
61
#ifdef HAVE_SYS_WAIT_H
62
#include <sys/wait.h>
63
#endif
64
#if defined(HAVE_SYS_TIME_H)
65
#include <sys/time.h>
66
#endif
67
68
#if defined(HAVE_ZLIB_H) && defined(ZLIBSUPPORT)
69
#define BUILTIN_DECOMPRESS
70
#include <zlib.h>
71
#endif
72
73
#if defined(HAVE_BZLIB_H) && defined(BZLIBSUPPORT)
74
#define BUILTIN_BZLIB
75
#include <bzlib.h>
76
#endif
77
78
#if defined(HAVE_LZMA_H) && defined(XZLIBSUPPORT)
79
#define BUILTIN_XZLIB
80
#include <lzma.h>
81
#endif
82
83
#if defined(HAVE_ZSTD_H) && defined(ZSTDLIBSUPPORT)
84
#define BUILTIN_ZSTDLIB
85
#include <zstd.h>
86
#include <zstd_errors.h>
87
#endif
88
89
#if defined(HAVE_LZLIB_H) && defined(LZLIBSUPPORT)
90
#define BUILTIN_LZLIB
91
#include <lzlib.h>
92
#endif
93
94
#ifdef notyet
95
#if defined(HAVE_LRZIP_H) && defined(LRZIPLIBSUPPORT)
96
#define BUILTIN_LRZIP
97
#include <Lrzip.h>
98
#endif
99
#endif
100
101
#ifdef DEBUG
102
int tty = -1;
103
#define DPRINTF(...) do { \
104
if (tty == -1) \
105
tty = open("/dev/tty", O_RDWR); \
106
if (tty == -1) \
107
abort(); \
108
dprintf(tty, __VA_ARGS__); \
109
} while (/*CONSTCOND*/0)
110
#else
111
#define DPRINTF(...)
112
#endif
113
114
#ifdef ZLIBSUPPORT
115
/*
116
* The following python code is not really used because ZLIBSUPPORT is only
117
* defined if we have a built-in zlib, and the built-in zlib handles that.
118
* That is not true for android where we have zlib.h and not -lz.
119
*/
120
static const char zlibcode[] =
121
"import sys, zlib; sys.stdout.write(zlib.decompress(sys.stdin.read()))";
122
123
static const char *zlib_args[] = { "python", "-c", zlibcode, NULL };
124
125
static int
126
zlibcmp(const unsigned char *buf)
127
{
128
unsigned short x = 1;
129
unsigned char *s = CAST(unsigned char *, CAST(void *, &x));
130
131
if ((buf[0] & 0xf) != 8 || (buf[0] & 0x80) != 0)
132
return 0;
133
if (s[0] != 1) /* endianness test */
134
x = buf[0] | (buf[1] << 8);
135
else
136
x = buf[1] | (buf[0] << 8);
137
if (x % 31)
138
return 0;
139
return 1;
140
}
141
#endif
142
143
static int
144
lzmacmp(const unsigned char *buf)
145
{
146
if (buf[0] != 0x5d || buf[1] || buf[2])
147
return 0;
148
if (buf[12] && buf[12] != 0xff)
149
return 0;
150
return 1;
151
}
152
153
#define gzip_flags "-cd"
154
#define lzip_flags gzip_flags
155
156
static const char *gzip_args[] = {
157
"gzip", gzip_flags, NULL
158
};
159
static const char *uncompress_args[] = {
160
"uncompress", "-c", NULL
161
};
162
static const char *bzip2_args[] = {
163
"bzip2", "-cd", NULL
164
};
165
static const char *lzip_args[] = {
166
"lzip", lzip_flags, NULL
167
};
168
static const char *xz_args[] = {
169
"xz", "-cd", NULL
170
};
171
static const char *lrzip_args[] = {
172
"lrzip", "-qdf", "-", NULL
173
};
174
static const char *lz4_args[] = {
175
"lz4", "-cd", NULL
176
};
177
static const char *zstd_args[] = {
178
"zstd", "-cd", NULL
179
};
180
181
#define do_zlib NULL
182
#define do_bzlib NULL
183
184
file_private const struct {
185
union {
186
const char *magic;
187
int (*func)(const unsigned char *);
188
} u;
189
int maglen;
190
const char **argv;
191
void *unused;
192
} compr[] = {
193
#define METH_FROZEN 2
194
#define METH_BZIP 7
195
#define METH_XZ 9
196
#define METH_LZIP 8
197
#define METH_LRZIP 10
198
#define METH_ZSTD 12
199
#define METH_LZMA 13
200
#define METH_ZLIB 14
201
{ { .magic = "\037\235" }, 2, gzip_args, NULL }, /* 0, compressed */
202
/* Uncompress can get stuck; so use gzip first if we have it
203
* Idea from Damien Clark, thanks! */
204
{ { .magic = "\037\235" }, 2, uncompress_args, NULL },/* 1, compressed */
205
{ { .magic = "\037\213" }, 2, gzip_args, do_zlib },/* 2, gzipped */
206
{ { .magic = "\037\236" }, 2, gzip_args, NULL }, /* 3, frozen */
207
{ { .magic = "\037\240" }, 2, gzip_args, NULL }, /* 4, SCO LZH */
208
/* the standard pack utilities do not accept standard input */
209
{ { .magic = "\037\036" }, 2, gzip_args, NULL }, /* 5, packed */
210
{ { .magic = "PK\3\4" }, 4, gzip_args, NULL }, /* 6, pkziped */
211
/* ...only first file examined */
212
{ { .magic = "BZh" }, 3, bzip2_args, do_bzlib },/* 7, bzip2-ed */
213
{ { .magic = "LZIP" }, 4, lzip_args, NULL }, /* 8, lzip-ed */
214
{ { .magic = "\3757zXZ\0" },6, xz_args, NULL }, /* 9, XZ Util */
215
{ { .magic = "LRZI" }, 4, lrzip_args, NULL }, /* 10, LRZIP */
216
{ { .magic = "\004\"M\030" },4, lz4_args, NULL }, /* 11, LZ4 */
217
{ { .magic = "\x28\xB5\x2F\xFD" }, 4, zstd_args, NULL },/* 12, zstd */
218
{ { .func = lzmacmp }, -13, xz_args, NULL }, /* 13, lzma */
219
#ifdef ZLIBSUPPORT
220
{ { .func = zlibcmp }, -2, zlib_args, NULL }, /* 14, zlib */
221
#endif
222
};
223
224
#define OKDATA 0
225
#define NODATA 1
226
#define ERRDATA 2
227
228
file_private ssize_t swrite(int, const void *, size_t);
229
#if HAVE_FORK
230
file_private size_t ncompr = __arraycount(compr);
231
file_private int uncompressbuf(int, size_t, size_t, int, const unsigned char *,
232
unsigned char **, size_t *);
233
#ifdef BUILTIN_DECOMPRESS
234
file_private int uncompresszlib(const unsigned char *, unsigned char **, size_t,
235
size_t *, int);
236
file_private int uncompressgzipped(const unsigned char *, unsigned char **, size_t,
237
size_t *, int);
238
#endif
239
#ifdef BUILTIN_BZLIB
240
file_private int uncompressbzlib(const unsigned char *, unsigned char **, size_t,
241
size_t *, int);
242
#endif
243
#ifdef BUILTIN_XZLIB
244
file_private int uncompressxzlib(const unsigned char *, unsigned char **, size_t,
245
size_t *, int);
246
#endif
247
#ifdef BUILTIN_ZSTDLIB
248
file_private int uncompresszstd(const unsigned char *, unsigned char **, size_t,
249
size_t *, int);
250
#endif
251
#ifdef BUILTIN_LZLIB
252
file_private int uncompresslzlib(const unsigned char *, unsigned char **, size_t,
253
size_t *, int);
254
#endif
255
#ifdef BUILTIN_LRZIP
256
file_private int uncompresslrzip(const unsigned char *, unsigned char **, size_t,
257
size_t *, int);
258
#endif
259
260
261
static int makeerror(unsigned char **, size_t *, const char *, ...)
262
__attribute__((__format__(__printf__, 3, 4)));
263
file_private const char *methodname(size_t);
264
265
file_private int
266
format_decompression_error(struct magic_set *ms, size_t i, unsigned char *buf)
267
{
268
unsigned char *p;
269
int mime = ms->flags & MAGIC_MIME;
270
271
if (!mime)
272
return file_printf(ms, "ERROR:[%s: %s]", methodname(i), buf);
273
274
for (p = buf; *p; p++)
275
if (!isalnum(*p))
276
*p = '-';
277
278
return file_printf(ms, "application/x-decompression-error-%s-%s",
279
methodname(i), buf);
280
}
281
282
file_protected int
283
file_zmagic(struct magic_set *ms, const struct buffer *b, const char *name)
284
{
285
unsigned char *newbuf = NULL;
286
size_t i, nsz;
287
char *rbuf;
288
file_pushbuf_t *pb;
289
int urv, prv, rv = 0;
290
int mime = ms->flags & MAGIC_MIME;
291
int fd = b->fd;
292
const unsigned char *buf = CAST(const unsigned char *, b->fbuf);
293
size_t nbytes = b->flen;
294
int sa_saved = 0;
295
struct sigaction sig_act;
296
297
if ((ms->flags & MAGIC_COMPRESS) == 0)
298
return 0;
299
300
for (i = 0; i < ncompr; i++) {
301
int zm;
302
if (nbytes < CAST(size_t, abs(compr[i].maglen)))
303
continue;
304
if (compr[i].maglen < 0) {
305
zm = (*compr[i].u.func)(buf);
306
} else {
307
zm = memcmp(buf, compr[i].u.magic,
308
CAST(size_t, compr[i].maglen)) == 0;
309
}
310
311
if (!zm)
312
continue;
313
314
/* Prevent SIGPIPE death if child dies unexpectedly */
315
if (!sa_saved) {
316
//We can use sig_act for both new and old, but
317
struct sigaction new_act;
318
memset(&new_act, 0, sizeof(new_act));
319
new_act.sa_handler = SIG_IGN;
320
sa_saved = sigaction(SIGPIPE, &new_act, &sig_act) != -1;
321
}
322
323
nsz = nbytes;
324
free(newbuf);
325
urv = uncompressbuf(fd, ms->bytes_max, i,
326
(ms->flags & MAGIC_NO_COMPRESS_FORK), buf, &newbuf, &nsz);
327
DPRINTF("uncompressbuf = %d, %s, %" SIZE_T_FORMAT "u\n", urv,
328
(char *)newbuf, nsz);
329
switch (urv) {
330
case OKDATA:
331
case ERRDATA:
332
ms->flags &= ~MAGIC_COMPRESS;
333
if (urv == ERRDATA)
334
prv = format_decompression_error(ms, i, newbuf);
335
else
336
prv = file_buffer(ms, -1, NULL, name, newbuf,
337
nsz);
338
if (prv == -1)
339
goto error;
340
rv = 1;
341
if ((ms->flags & MAGIC_COMPRESS_TRANSP) != 0)
342
goto out;
343
if (mime != MAGIC_MIME && mime != 0)
344
goto out;
345
if ((file_printf(ms,
346
mime ? " compressed-encoding=" : " (")) == -1)
347
goto error;
348
if ((pb = file_push_buffer(ms)) == NULL)
349
goto error;
350
/*
351
* XXX: If file_buffer fails here, we overwrite
352
* the compressed text. FIXME.
353
*/
354
if (file_buffer(ms, -1, NULL, NULL, buf, nbytes) == -1)
355
{
356
if (file_pop_buffer(ms, pb) != NULL)
357
abort();
358
goto error;
359
}
360
if ((rbuf = file_pop_buffer(ms, pb)) != NULL) {
361
if (file_printf(ms, "%s", rbuf) == -1) {
362
free(rbuf);
363
goto error;
364
}
365
free(rbuf);
366
}
367
if (!mime && file_printf(ms, ")") == -1)
368
goto error;
369
/*FALLTHROUGH*/
370
case NODATA:
371
break;
372
default:
373
abort();
374
/*NOTREACHED*/
375
error:
376
rv = -1;
377
break;
378
}
379
}
380
out:
381
DPRINTF("rv = %d\n", rv);
382
383
if (sa_saved && sig_act.sa_handler != SIG_IGN)
384
(void)sigaction(SIGPIPE, &sig_act, NULL);
385
386
free(newbuf);
387
ms->flags |= MAGIC_COMPRESS;
388
DPRINTF("Zmagic returns %d\n", rv);
389
return rv;
390
}
391
#endif
392
/*
393
* `safe' write for sockets and pipes.
394
*/
395
file_private ssize_t
396
swrite(int fd, const void *buf, size_t n)
397
{
398
ssize_t rv;
399
size_t rn = n;
400
401
do
402
switch (rv = write(fd, buf, n)) {
403
case -1:
404
if (errno == EINTR)
405
continue;
406
return -1;
407
default:
408
n -= rv;
409
buf = CAST(const char *, buf) + rv;
410
break;
411
}
412
while (n > 0);
413
return rn;
414
}
415
416
417
/*
418
* `safe' read for sockets and pipes.
419
*/
420
file_protected ssize_t
421
sread(int fd, void *buf, size_t n, int canbepipe __attribute__((__unused__)))
422
{
423
ssize_t rv;
424
#if defined(FIONREAD) && !defined(__MINGW32__)
425
int t = 0;
426
#endif
427
size_t rn = n;
428
429
if (fd == STDIN_FILENO)
430
goto nocheck;
431
432
#if defined(FIONREAD) && !defined(__MINGW32__)
433
if (canbepipe && (ioctl(fd, FIONREAD, &t) == -1 || t == 0)) {
434
#ifdef FD_ZERO
435
ssize_t cnt;
436
for (cnt = 0;; cnt++) {
437
fd_set check;
438
struct timeval tout = {0, 100 * 1000};
439
int selrv;
440
441
FD_ZERO(&check);
442
FD_SET(fd, &check);
443
444
/*
445
* Avoid soft deadlock: do not read if there
446
* is nothing to read from sockets and pipes.
447
*/
448
selrv = select(fd + 1, &check, NULL, NULL, &tout);
449
if (selrv == -1) {
450
if (errno == EINTR || errno == EAGAIN)
451
continue;
452
} else if (selrv == 0 && cnt >= 5) {
453
return 0;
454
} else
455
break;
456
}
457
#endif
458
(void)ioctl(fd, FIONREAD, &t);
459
}
460
461
if (t > 0 && CAST(size_t, t) < n) {
462
n = t;
463
rn = n;
464
}
465
#endif
466
467
nocheck:
468
do
469
switch ((rv = read(fd, buf, n))) {
470
case -1:
471
if (errno == EINTR)
472
continue;
473
return -1;
474
case 0:
475
return rn - n;
476
default:
477
n -= rv;
478
buf = CAST(char *, CCAST(void *, buf)) + rv;
479
break;
480
}
481
while (n > 0);
482
return rn;
483
}
484
485
file_protected int
486
file_pipe2file(struct magic_set *ms, int fd, const void *startbuf,
487
size_t nbytes)
488
{
489
char buf[4096];
490
ssize_t r;
491
int tfd;
492
493
#ifdef WIN32
494
const char *t;
495
buf[0] = '\0';
496
if ((t = getenv("TEMP")) != NULL)
497
(void)strlcpy(buf, t, sizeof(buf));
498
else if ((t = getenv("TMP")) != NULL)
499
(void)strlcpy(buf, t, sizeof(buf));
500
else if ((t = getenv("TMPDIR")) != NULL)
501
(void)strlcpy(buf, t, sizeof(buf));
502
if (buf[0] != '\0')
503
(void)strlcat(buf, "/", sizeof(buf));
504
(void)strlcat(buf, "file.XXXXXX", sizeof(buf));
505
#else
506
(void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof(buf));
507
#endif
508
#ifndef HAVE_MKSTEMP
509
{
510
char *ptr = mktemp(buf);
511
tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600);
512
r = errno;
513
(void)unlink(ptr);
514
errno = r;
515
}
516
#else
517
{
518
int te;
519
mode_t ou = umask(0);
520
tfd = mkstemp(buf);
521
(void)umask(ou);
522
te = errno;
523
(void)unlink(buf);
524
errno = te;
525
}
526
#endif
527
if (tfd == -1) {
528
file_error(ms, errno,
529
"cannot create temporary file for pipe copy");
530
return -1;
531
}
532
533
if (swrite(tfd, startbuf, nbytes) != CAST(ssize_t, nbytes))
534
r = 1;
535
else {
536
while ((r = sread(fd, buf, sizeof(buf), 1)) > 0)
537
if (swrite(tfd, buf, CAST(size_t, r)) != r)
538
break;
539
}
540
541
switch (r) {
542
case -1:
543
file_error(ms, errno, "error copying from pipe to temp file");
544
return -1;
545
case 0:
546
break;
547
default:
548
file_error(ms, errno, "error while writing to temp file");
549
return -1;
550
}
551
552
/*
553
* We duplicate the file descriptor, because fclose on a
554
* tmpfile will delete the file, but any open descriptors
555
* can still access the phantom inode.
556
*/
557
if ((fd = dup2(tfd, fd)) == -1) {
558
file_error(ms, errno, "could not dup descriptor for temp file");
559
return -1;
560
}
561
(void)close(tfd);
562
if (lseek(fd, CAST(off_t, 0), SEEK_SET) == CAST(off_t, -1)) {
563
file_badseek(ms);
564
return -1;
565
}
566
return fd;
567
}
568
#if HAVE_FORK
569
#ifdef BUILTIN_DECOMPRESS
570
571
#define FHCRC (1 << 1)
572
#define FEXTRA (1 << 2)
573
#define FNAME (1 << 3)
574
#define FCOMMENT (1 << 4)
575
576
577
file_private int
578
uncompressgzipped(const unsigned char *old, unsigned char **newch,
579
size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
580
{
581
unsigned char flg;
582
size_t data_start = 10;
583
584
if (*n < 4) {
585
goto err;
586
}
587
588
flg = old[3];
589
590
if (flg & FEXTRA) {
591
if (data_start + 1 >= *n)
592
goto err;
593
data_start += 2 + old[data_start] + old[data_start + 1] * 256;
594
}
595
if (flg & FNAME) {
596
while(data_start < *n && old[data_start])
597
data_start++;
598
data_start++;
599
}
600
if (flg & FCOMMENT) {
601
while(data_start < *n && old[data_start])
602
data_start++;
603
data_start++;
604
}
605
if (flg & FHCRC)
606
data_start += 2;
607
608
if (data_start >= *n)
609
goto err;
610
611
*n -= data_start;
612
old += data_start;
613
return uncompresszlib(old, newch, bytes_max, n, 0);
614
err:
615
return makeerror(newch, n, "File too short");
616
}
617
618
file_private int
619
uncompresszlib(const unsigned char *old, unsigned char **newch,
620
size_t bytes_max, size_t *n, int zlib)
621
{
622
int rc;
623
z_stream z;
624
625
DPRINTF("builtin zlib decompression\n");
626
z.next_in = CCAST(Bytef *, old);
627
z.avail_in = CAST(uint32_t, *n);
628
z.next_out = *newch;
629
z.avail_out = CAST(unsigned int, bytes_max);
630
z.zalloc = Z_NULL;
631
z.zfree = Z_NULL;
632
z.opaque = Z_NULL;
633
634
/* LINTED bug in header macro */
635
rc = zlib ? inflateInit(&z) : inflateInit2(&z, -15);
636
if (rc != Z_OK)
637
goto err;
638
639
rc = inflate(&z, Z_SYNC_FLUSH);
640
if (rc != Z_OK && rc != Z_STREAM_END) {
641
inflateEnd(&z);
642
goto err;
643
}
644
645
*n = CAST(size_t, z.total_out);
646
rc = inflateEnd(&z);
647
if (rc != Z_OK)
648
goto err;
649
650
/* let's keep the nul-terminate tradition */
651
(*newch)[*n] = '\0';
652
653
return OKDATA;
654
err:
655
return makeerror(newch, n, "%s", z.msg ? z.msg : zError(rc));
656
}
657
#endif
658
659
#ifdef BUILTIN_BZLIB
660
file_private int
661
uncompressbzlib(const unsigned char *old, unsigned char **newch,
662
size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
663
{
664
int rc;
665
bz_stream bz;
666
667
DPRINTF("builtin bzlib decompression\n");
668
memset(&bz, 0, sizeof(bz));
669
rc = BZ2_bzDecompressInit(&bz, 0, 0);
670
if (rc != BZ_OK)
671
goto err;
672
673
bz.next_in = CCAST(char *, RCAST(const char *, old));
674
bz.avail_in = CAST(uint32_t, *n);
675
bz.next_out = RCAST(char *, *newch);
676
bz.avail_out = CAST(unsigned int, bytes_max);
677
678
rc = BZ2_bzDecompress(&bz);
679
if (rc != BZ_OK && rc != BZ_STREAM_END) {
680
BZ2_bzDecompressEnd(&bz);
681
goto err;
682
}
683
684
/* Assume byte_max is within 32bit */
685
/* assert(bz.total_out_hi32 == 0); */
686
*n = CAST(size_t, bz.total_out_lo32);
687
rc = BZ2_bzDecompressEnd(&bz);
688
if (rc != BZ_OK)
689
goto err;
690
691
/* let's keep the nul-terminate tradition */
692
(*newch)[*n] = '\0';
693
694
return OKDATA;
695
err:
696
return makeerror(newch, n, "bunzip error %d", rc);
697
}
698
#endif
699
700
#ifdef BUILTIN_XZLIB
701
file_private int
702
uncompressxzlib(const unsigned char *old, unsigned char **newch,
703
size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
704
{
705
int rc;
706
lzma_stream xz;
707
708
DPRINTF("builtin xzlib decompression\n");
709
memset(&xz, 0, sizeof(xz));
710
rc = lzma_auto_decoder(&xz, UINT64_MAX, 0);
711
if (rc != LZMA_OK)
712
goto err;
713
714
xz.next_in = CCAST(const uint8_t *, old);
715
xz.avail_in = CAST(uint32_t, *n);
716
xz.next_out = RCAST(uint8_t *, *newch);
717
xz.avail_out = CAST(unsigned int, bytes_max);
718
719
rc = lzma_code(&xz, LZMA_RUN);
720
if (rc != LZMA_OK && rc != LZMA_STREAM_END) {
721
lzma_end(&xz);
722
goto err;
723
}
724
725
*n = CAST(size_t, xz.total_out);
726
727
lzma_end(&xz);
728
729
/* let's keep the nul-terminate tradition */
730
(*newch)[*n] = '\0';
731
732
return OKDATA;
733
err:
734
return makeerror(newch, n, "unxz error %d", rc);
735
}
736
#endif
737
738
#ifdef BUILTIN_ZSTDLIB
739
file_private int
740
uncompresszstd(const unsigned char *old, unsigned char **newch,
741
size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
742
{
743
size_t rc;
744
ZSTD_DStream *zstd;
745
ZSTD_inBuffer in;
746
ZSTD_outBuffer out;
747
748
DPRINTF("builtin zstd decompression\n");
749
if ((zstd = ZSTD_createDStream()) == NULL) {
750
return makeerror(newch, n, "No ZSTD decompression stream, %s",
751
strerror(errno));
752
}
753
754
rc = ZSTD_DCtx_reset(zstd, ZSTD_reset_session_only);
755
if (ZSTD_isError(rc))
756
goto err;
757
758
in.src = CCAST(const void *, old);
759
in.size = *n;
760
in.pos = 0;
761
out.dst = RCAST(void *, *newch);
762
out.size = bytes_max;
763
out.pos = 0;
764
765
rc = ZSTD_decompressStream(zstd, &out, &in);
766
if (ZSTD_isError(rc))
767
goto err;
768
769
*n = out.pos;
770
771
ZSTD_freeDStream(zstd);
772
773
/* let's keep the nul-terminate tradition */
774
(*newch)[*n] = '\0';
775
776
return OKDATA;
777
err:
778
ZSTD_freeDStream(zstd);
779
return makeerror(newch, n, "zstd error %d", ZSTD_getErrorCode(rc));
780
}
781
#endif
782
783
#ifdef BUILTIN_LZLIB
784
file_private int
785
uncompresslzlib(const unsigned char *old, unsigned char **newch,
786
size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
787
{
788
enum LZ_Errno err;
789
size_t old_remaining = *n;
790
size_t new_remaining = bytes_max;
791
size_t total_read = 0;
792
unsigned char *bufp;
793
struct LZ_Decoder *dec;
794
795
bufp = *newch;
796
797
DPRINTF("builtin lzlib decompression\n");
798
dec = LZ_decompress_open();
799
if (!dec) {
800
return makeerror(newch, n, "unable to allocate LZ_Decoder");
801
}
802
if (LZ_decompress_errno(dec) != LZ_ok)
803
goto err;
804
805
for (;;) {
806
// LZ_decompress_read() stops at member boundaries, so we may
807
// have more than one successful read after writing all data
808
// we have.
809
if (old_remaining > 0) {
810
int wr = LZ_decompress_write(dec, old, old_remaining);
811
if (wr < 0)
812
goto err;
813
old_remaining -= wr;
814
old += wr;
815
}
816
817
int rd = LZ_decompress_read(dec, bufp, new_remaining);
818
if (rd > 0) {
819
new_remaining -= rd;
820
bufp += rd;
821
total_read += rd;
822
}
823
824
if (rd < 0 || LZ_decompress_errno(dec) != LZ_ok)
825
goto err;
826
if (new_remaining == 0)
827
break;
828
if (old_remaining == 0 && rd == 0)
829
break;
830
}
831
832
LZ_decompress_close(dec);
833
*n = total_read;
834
835
/* let's keep the nul-terminate tradition */
836
*bufp = '\0';
837
838
return OKDATA;
839
err:
840
err = LZ_decompress_errno(dec);
841
LZ_decompress_close(dec);
842
return makeerror(newch, n, "lzlib error: %s", LZ_strerror(err));
843
}
844
#endif
845
846
#ifdef BUILTIN_LRZIP
847
file_private int
848
uncompresslrzip(const unsigned char *old, unsigned char **newch,
849
size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
850
{
851
Lrzip *lr;
852
FILE *in, *out;
853
int res = OKDATA;
854
855
DPRINTF("builtin rlzip decompression\n");
856
lr = lrzip_new(LRZIP_MODE_DECOMPRESS);
857
if (lr == NULL) {
858
res = makeerror(newch, n, "unable to create an lrzip decoder");
859
goto out0;
860
}
861
lrzip_config_env(lr);
862
in = fmemopen(RCAST(void *, old), bytes_max, "r");
863
if (in == NULL) {
864
res = makeerror(newch, n, "unable to construct input file");
865
goto out1;
866
}
867
if (!lrzip_file_add(lr, in)) {
868
res = makeerror(newch, n, "unable to add input file");
869
goto out2;
870
}
871
*newch = calloc(*n = 2 * bytes_max, 1);
872
if (*newch == NULL) {
873
res = makeerror(newch, n, "unable to allocate output buffer");
874
goto out2;
875
}
876
out = fmemopen(*newch, *n, "w");
877
if (out == NULL) {
878
free(*newch);
879
res = makeerror(newch, n, "unable to allocate output file");
880
goto out2;
881
}
882
lrzip_outfile_set(lr, out);
883
if (lrzip_run(lr)) {
884
free(*newch);
885
res = makeerror(newch, n, "unable to decompress file");
886
goto out3;
887
}
888
*n = (size_t)ftell(out);
889
out3:
890
fclose(out);
891
out2:
892
fclose(in);
893
out1:
894
lrzip_free(lr);
895
out0:
896
return res;
897
}
898
#endif
899
900
static int
901
makeerror(unsigned char **buf, size_t *len, const char *fmt, ...)
902
{
903
char *msg;
904
va_list ap;
905
int rv;
906
907
DPRINTF("Makeerror %s\n", fmt);
908
free(*buf);
909
va_start(ap, fmt);
910
rv = vasprintf(&msg, fmt, ap);
911
va_end(ap);
912
if (rv < 0) {
913
DPRINTF("Makeerror failed");
914
*buf = NULL;
915
*len = 0;
916
return NODATA;
917
}
918
*buf = RCAST(unsigned char *, msg);
919
*len = strlen(msg);
920
return ERRDATA;
921
}
922
923
static void
924
closefd(int *fd, size_t i)
925
{
926
if (fd[i] == -1)
927
return;
928
(void) close(fd[i]);
929
fd[i] = -1;
930
}
931
932
static void
933
closep(int *fd)
934
{
935
size_t i;
936
for (i = 0; i < 2; i++)
937
closefd(fd, i);
938
}
939
940
static void
941
movedesc(void *v, int i, int fd)
942
{
943
if (fd == i)
944
return; /* "no dup was necessary" */
945
#ifdef HAVE_POSIX_SPAWNP
946
posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v);
947
posix_spawn_file_actions_adddup2(fa, fd, i);
948
posix_spawn_file_actions_addclose(fa, fd);
949
#else
950
if (dup2(fd, i) == -1) {
951
DPRINTF("dup(%d, %d) failed (%s)\n", fd, i, strerror(errno));
952
exit(EXIT_FAILURE);
953
}
954
close(v ? fd : fd);
955
#endif
956
}
957
958
static void
959
closedesc(void *v, int fd)
960
{
961
#ifdef HAVE_POSIX_SPAWNP
962
posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v);
963
posix_spawn_file_actions_addclose(fa, fd);
964
#else
965
close(v ? fd : fd);
966
#endif
967
}
968
969
static void
970
handledesc(void *v, int fd, int fdp[3][2])
971
{
972
if (fd != -1) {
973
(void) lseek(fd, CAST(off_t, 0), SEEK_SET);
974
movedesc(v, STDIN_FILENO, fd);
975
} else {
976
movedesc(v, STDIN_FILENO, fdp[STDIN_FILENO][0]);
977
if (fdp[STDIN_FILENO][1] > 2)
978
closedesc(v, fdp[STDIN_FILENO][1]);
979
}
980
981
file_clear_closexec(STDIN_FILENO);
982
983
///FIXME: if one of the fdp[i][j] is 0 or 1, this can bomb spectacularly
984
movedesc(v, STDOUT_FILENO, fdp[STDOUT_FILENO][1]);
985
if (fdp[STDOUT_FILENO][0] > 2)
986
closedesc(v, fdp[STDOUT_FILENO][0]);
987
988
file_clear_closexec(STDOUT_FILENO);
989
990
movedesc(v, STDERR_FILENO, fdp[STDERR_FILENO][1]);
991
if (fdp[STDERR_FILENO][0] > 2)
992
closedesc(v, fdp[STDERR_FILENO][0]);
993
994
file_clear_closexec(STDERR_FILENO);
995
}
996
997
static pid_t
998
writechild(int fd, const void *old, size_t n)
999
{
1000
pid_t pid;
1001
1002
/*
1003
* fork again, to avoid blocking because both
1004
* pipes filled
1005
*/
1006
pid = fork();
1007
if (pid == -1) {
1008
DPRINTF("Fork failed (%s)\n", strerror(errno));
1009
return -1;
1010
}
1011
if (pid == 0) {
1012
/* child */
1013
if (swrite(fd, old, n) != CAST(ssize_t, n)) {
1014
DPRINTF("Write failed (%s)\n", strerror(errno));
1015
exit(EXIT_FAILURE);
1016
}
1017
exit(EXIT_SUCCESS);
1018
}
1019
/* parent */
1020
return pid;
1021
}
1022
1023
static ssize_t
1024
filter_error(unsigned char *ubuf, ssize_t n)
1025
{
1026
char *p;
1027
char *buf;
1028
1029
ubuf[n] = '\0';
1030
buf = RCAST(char *, ubuf);
1031
while (isspace(CAST(unsigned char, *buf)))
1032
buf++;
1033
DPRINTF("Filter error[[[%s]]]\n", buf);
1034
if ((p = strchr(CAST(char *, buf), '\n')) != NULL)
1035
*p = '\0';
1036
if ((p = strchr(CAST(char *, buf), ';')) != NULL)
1037
*p = '\0';
1038
if ((p = strrchr(CAST(char *, buf), ':')) != NULL) {
1039
++p;
1040
while (isspace(CAST(unsigned char, *p)))
1041
p++;
1042
n = strlen(p);
1043
memmove(ubuf, p, CAST(size_t, n + 1));
1044
}
1045
DPRINTF("Filter error after[[[%s]]]\n", (char *)ubuf);
1046
if (islower(*ubuf))
1047
*ubuf = toupper(*ubuf);
1048
return n;
1049
}
1050
1051
file_private const char *
1052
methodname(size_t method)
1053
{
1054
switch (method) {
1055
#ifdef BUILTIN_DECOMPRESS
1056
case METH_FROZEN:
1057
case METH_ZLIB:
1058
return "zlib";
1059
#endif
1060
#ifdef BUILTIN_BZLIB
1061
case METH_BZIP:
1062
return "bzlib";
1063
#endif
1064
#ifdef BUILTIN_XZLIB
1065
case METH_XZ:
1066
case METH_LZMA:
1067
return "xzlib";
1068
#endif
1069
#ifdef BUILTIN_ZSTDLIB
1070
case METH_ZSTD:
1071
return "zstd";
1072
#endif
1073
#ifdef BUILTIN_LZLIB
1074
case METH_LZIP:
1075
return "lzlib";
1076
#endif
1077
#ifdef BUILTIN_LRZIP
1078
case METH_LRZIP:
1079
return "lrzip";
1080
#endif
1081
default:
1082
return compr[method].argv[0];
1083
}
1084
}
1085
1086
file_private int (*
1087
getdecompressor(size_t method))(const unsigned char *, unsigned char **, size_t,
1088
size_t *, int)
1089
{
1090
switch (method) {
1091
#ifdef BUILTIN_DECOMPRESS
1092
case METH_FROZEN:
1093
return uncompressgzipped;
1094
case METH_ZLIB:
1095
return uncompresszlib;
1096
#endif
1097
#ifdef BUILTIN_BZLIB
1098
case METH_BZIP:
1099
return uncompressbzlib;
1100
#endif
1101
#ifdef BUILTIN_XZLIB
1102
case METH_XZ:
1103
case METH_LZMA:
1104
return uncompressxzlib;
1105
#endif
1106
#ifdef BUILTIN_ZSTDLIB
1107
case METH_ZSTD:
1108
return uncompresszstd;
1109
#endif
1110
#ifdef BUILTIN_LZLIB
1111
case METH_LZIP:
1112
return uncompresslzlib;
1113
#endif
1114
#ifdef BUILTIN_LRZIP
1115
case METH_LRZIP:
1116
return uncompresslrzip;
1117
#endif
1118
default:
1119
return NULL;
1120
}
1121
}
1122
1123
file_private int
1124
uncompressbuf(int fd, size_t bytes_max, size_t method, int nofork,
1125
const unsigned char *old, unsigned char **newch, size_t* n)
1126
{
1127
int fdp[3][2];
1128
int status, rv, w;
1129
pid_t pid;
1130
pid_t writepid = -1;
1131
size_t i;
1132
ssize_t r, re;
1133
char *const *args;
1134
#ifdef HAVE_POSIX_SPAWNP
1135
posix_spawn_file_actions_t fa;
1136
#endif
1137
int (*decompress)(const unsigned char *, unsigned char **,
1138
size_t, size_t *, int) = getdecompressor(method);
1139
1140
*newch = CAST(unsigned char *, malloc(bytes_max + 1));
1141
if (*newch == NULL)
1142
return makeerror(newch, n, "No buffer, %s", strerror(errno));
1143
1144
if (decompress) {
1145
if (nofork) {
1146
return makeerror(newch, n,
1147
"Fork is required to uncompress, but disabled");
1148
}
1149
return (*decompress)(old, newch, bytes_max, n, 1);
1150
}
1151
1152
(void)fflush(stdout);
1153
(void)fflush(stderr);
1154
1155
for (i = 0; i < __arraycount(fdp); i++)
1156
fdp[i][0] = fdp[i][1] = -1;
1157
1158
/*
1159
* There are multithreaded users who run magic_file()
1160
* from dozens of threads. If two parallel magic_file() calls
1161
* analyze two large compressed files, both will spawn
1162
* an uncompressing child here, which writes out uncompressed data.
1163
* We read some portion, then close the pipe, then waitpid() the child.
1164
* If uncompressed data is larger, child should get EPIPE and exit.
1165
* However, with *parallel* calls OTHER child may unintentionally
1166
* inherit pipe fds, thus keeping pipe open and making writes in
1167
* our child block instead of failing with EPIPE!
1168
* (For the bug to occur, two threads must mutually inherit their pipes,
1169
* and both must have large outputs. Thus it happens not that often).
1170
* To avoid this, be sure to create pipes with O_CLOEXEC.
1171
*/
1172
if ((fd == -1 && file_pipe_closexec(fdp[STDIN_FILENO]) == -1) ||
1173
file_pipe_closexec(fdp[STDOUT_FILENO]) == -1 ||
1174
file_pipe_closexec(fdp[STDERR_FILENO]) == -1) {
1175
closep(fdp[STDIN_FILENO]);
1176
closep(fdp[STDOUT_FILENO]);
1177
return makeerror(newch, n, "Cannot create pipe, %s",
1178
strerror(errno));
1179
}
1180
1181
args = RCAST(char *const *, RCAST(intptr_t, compr[method].argv));
1182
#ifdef HAVE_POSIX_SPAWNP
1183
posix_spawn_file_actions_init(&fa);
1184
1185
handledesc(&fa, fd, fdp);
1186
1187
DPRINTF("Executing %s\n", compr[method].argv[0]);
1188
status = posix_spawnp(&pid, compr[method].argv[0], &fa, NULL,
1189
args, NULL);
1190
1191
posix_spawn_file_actions_destroy(&fa);
1192
1193
if (status == -1) {
1194
return makeerror(newch, n, "Cannot posix_spawn `%s', %s",
1195
compr[method].argv[0], strerror(errno));
1196
}
1197
#else
1198
/* For processes with large mapped virtual sizes, vfork
1199
* may be _much_ faster (10-100 times) than fork.
1200
*/
1201
pid = vfork();
1202
if (pid == -1) {
1203
return makeerror(newch, n, "Cannot vfork, %s",
1204
strerror(errno));
1205
}
1206
if (pid == 0) {
1207
/* child */
1208
/* Note: we are after vfork, do not modify memory
1209
* in a way which confuses parent. In particular,
1210
* do not modify fdp[i][j].
1211
*/
1212
handledesc(NULL, fd, fdp);
1213
DPRINTF("Executing %s\n", compr[method].argv[0]);
1214
1215
(void)execvp(compr[method].argv[0], args);
1216
dprintf(STDERR_FILENO, "exec `%s' failed, %s",
1217
compr[method].argv[0], strerror(errno));
1218
_exit(EXIT_FAILURE); /* _exit(), not exit(), because of vfork */
1219
}
1220
#endif
1221
/* parent */
1222
/* Close write sides of child stdout/err pipes */
1223
for (i = 1; i < __arraycount(fdp); i++)
1224
closefd(fdp[i], 1);
1225
/* Write the buffer data to child stdin, if we don't have fd */
1226
if (fd == -1) {
1227
closefd(fdp[STDIN_FILENO], 0);
1228
writepid = writechild(fdp[STDIN_FILENO][1], old, *n);
1229
if (writepid == (pid_t)-1) {
1230
rv = makeerror(newch, n, "Write to child failed, %s",
1231
strerror(errno));
1232
DPRINTF("Write to child failed\n");
1233
goto err;
1234
}
1235
closefd(fdp[STDIN_FILENO], 1);
1236
}
1237
1238
rv = OKDATA;
1239
r = sread(fdp[STDOUT_FILENO][0], *newch, bytes_max, 0);
1240
DPRINTF("read got %zd\n", r);
1241
if (r < 0) {
1242
rv = ERRDATA;
1243
DPRINTF("Read stdout failed %d (%s)\n", fdp[STDOUT_FILENO][0],
1244
strerror(errno));
1245
goto err;
1246
}
1247
if (CAST(size_t, r) == bytes_max) {
1248
/*
1249
* close fd so that the child exits with sigpipe and ignore
1250
* errors, otherwise we risk the child blocking and never
1251
* exiting.
1252
*/
1253
DPRINTF("Closing stdout for bytes_max\n");
1254
closefd(fdp[STDOUT_FILENO], 0);
1255
goto ok;
1256
}
1257
if ((re = sread(fdp[STDERR_FILENO][0], *newch, bytes_max, 0)) > 0) {
1258
DPRINTF("Got stuff from stderr %s\n", *newch);
1259
rv = ERRDATA;
1260
r = filter_error(*newch, r);
1261
goto ok;
1262
}
1263
if (re == 0)
1264
goto ok;
1265
rv = makeerror(newch, n, "Read stderr failed, %s",
1266
strerror(errno));
1267
goto err;
1268
ok:
1269
*n = r;
1270
/* NUL terminate, as every buffer is handled here. */
1271
(*newch)[*n] = '\0';
1272
err:
1273
closefd(fdp[STDIN_FILENO], 1);
1274
closefd(fdp[STDOUT_FILENO], 0);
1275
closefd(fdp[STDERR_FILENO], 0);
1276
1277
w = waitpid(pid, &status, 0);
1278
wait_err:
1279
if (w == -1) {
1280
rv = makeerror(newch, n, "Wait failed, %s", strerror(errno));
1281
DPRINTF("Child wait return %#x\n", status);
1282
} else if (!WIFEXITED(status)) {
1283
DPRINTF("Child not exited (%#x)\n", status);
1284
} else if (WEXITSTATUS(status) != 0) {
1285
DPRINTF("Child exited (%#x)\n", WEXITSTATUS(status));
1286
}
1287
if (writepid > 0) {
1288
/* _After_ we know decompressor has exited, our input writer
1289
* definitely will exit now (at worst, writing fails in it,
1290
* since output fd is closed now on the reading size).
1291
*/
1292
w = waitpid(writepid, &status, 0);
1293
writepid = -1;
1294
goto wait_err;
1295
}
1296
1297
closefd(fdp[STDIN_FILENO], 0); //why? it is already closed here!
1298
DPRINTF("Returning %p n=%" SIZE_T_FORMAT "u rv=%d\n", *newch, *n, rv);
1299
1300
return rv;
1301
}
1302
#endif
1303
1304