Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/contrib/openzfs/module/zfs/lz4.c
48383 views
1
// SPDX-License-Identifier: BSD-2-Clause
2
/*
3
LZ4 - Fast LZ compression algorithm
4
Copyright (C) 2011-present, Yann Collet.
5
6
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
8
Redistribution and use in source and binary forms, with or without
9
modification, are permitted provided that the following conditions are
10
met:
11
12
* Redistributions of source code must retain the above copyright
13
notice, this list of conditions and the following disclaimer.
14
* Redistributions in binary form must reproduce the above
15
copyright notice, this list of conditions and the following disclaimer
16
in the documentation and/or other materials provided with the
17
distribution.
18
19
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31
You can contact the author at :
32
- LZ4 homepage : http://www.lz4.org
33
- LZ4 source repository : https://github.com/lz4/lz4
34
*/
35
36
/*
37
* This file contains unmodified code from lz4 1.9.3's decompressor, plus
38
* associated macros and constants.
39
*
40
* It also contains a couple of defines from the old lz4.c to make things
41
* fit together smoothly.
42
*
43
*/
44
45
#include <sys/zfs_context.h>
46
47
int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
48
int isize, int maxOutputSize);
49
50
/*
51
* Tuning parameters
52
*/
53
54
/*
55
* COMPRESSIONLEVEL: Increasing this value improves compression ratio
56
* Lowering this value reduces memory usage. Reduced memory usage
57
* typically improves speed, due to cache effect (ex: L1 32KB for Intel,
58
* L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
59
* (examples : 12 -> 16KB ; 17 -> 512KB)
60
*/
61
#define COMPRESSIONLEVEL 12
62
63
/*
64
* NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
65
* algorithm skip faster data segments considered "incompressible".
66
* This may decrease compression ratio dramatically, but will be
67
* faster on incompressible data. Increasing this value will make
68
* the algorithm search more before declaring a segment "incompressible".
69
* This could improve compression a bit, but will be slower on
70
* incompressible data. The default value (6) is recommended.
71
*/
72
#define NOTCOMPRESSIBLE_CONFIRMATION 6
73
74
/*
75
* Little Endian or Big Endian?
76
* Note: overwrite the below #define if you know your architecture endianness.
77
*/
78
#if defined(_ZFS_BIG_ENDIAN)
79
#define LZ4_BIG_ENDIAN 1
80
#else
81
/*
82
* Little Endian assumed. PDP Endian and other very rare endian format
83
* are unsupported.
84
*/
85
#undef LZ4_BIG_ENDIAN
86
#endif
87
88
/*-************************************
89
* CPU Feature Detection
90
**************************************/
91
/* LZ4_FORCE_MEMORY_ACCESS
92
* By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
93
* Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
94
* The below switch allow to select different access method for improved performance.
95
* Method 0 (default) : use `memcpy()`. Safe and portable.
96
* Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
97
* This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
98
* Method 2 : direct access. This method is portable but violate C standard.
99
* It can generate buggy code on targets which assembly generation depends on alignment.
100
* But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
101
* See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
102
* Prefer these methods in priority order (0 > 1 > 2)
103
*/
104
#ifndef LZ4_FORCE_MEMORY_ACCESS /* can be defined externally */
105
# if defined(__GNUC__) && \
106
( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) \
107
|| defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
108
# define LZ4_FORCE_MEMORY_ACCESS 2
109
# elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__)
110
# define LZ4_FORCE_MEMORY_ACCESS 1
111
# endif
112
#endif
113
114
/*
115
* LZ4_FORCE_SW_BITCOUNT
116
* Define this parameter if your target system or compiler does not support hardware bit count
117
*/
118
/*
119
* Illumos : we can't use GCC's __builtin_ctz family of builtins in the
120
* kernel
121
* Linux : we can use GCC's __builtin_ctz family of builtins in the
122
* kernel
123
*/
124
#undef LZ4_FORCE_SW_BITCOUNT
125
#if defined(__sunos__)
126
#define LZ4_FORCE_SW_BITCOUNT
127
#endif
128
129
/*
130
* Compiler Options
131
*/
132
/* Disable restrict */
133
#define restrict
134
135
/*
136
* Linux : GCC_VERSION is defined as of 3.9-rc1, so undefine it.
137
* torvalds/linux@3f3f8d2f48acfd8ed3b8e6b7377935da57b27b16
138
*/
139
#ifdef GCC_VERSION
140
#undef GCC_VERSION
141
#endif
142
143
#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
144
145
#ifndef LZ4_FORCE_INLINE
146
# ifdef _MSC_VER /* Visual Studio */
147
# define LZ4_FORCE_INLINE static __forceinline
148
# else
149
# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
150
# ifdef __GNUC__
151
# define LZ4_FORCE_INLINE static inline __attribute__((always_inline))
152
# else
153
# define LZ4_FORCE_INLINE static inline
154
# endif
155
# else
156
# define LZ4_FORCE_INLINE static
157
# endif /* __STDC_VERSION__ */
158
# endif /* _MSC_VER */
159
#endif /* LZ4_FORCE_INLINE */
160
161
/* LZ4_FORCE_O2 and LZ4_FORCE_INLINE
162
* gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8,
163
* together with a simple 8-byte copy loop as a fall-back path.
164
* However, this optimization hurts the decompression speed by >30%,
165
* because the execution does not go to the optimized loop
166
* for typical compressible data, and all of the preamble checks
167
* before going to the fall-back path become useless overhead.
168
* This optimization happens only with the -O3 flag, and -O2 generates
169
* a simple 8-byte copy loop.
170
* With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy8
171
* functions are annotated with __attribute__((optimize("O2"))),
172
* and also LZ4_wildCopy8 is forcibly inlined, so that the O2 attribute
173
* of LZ4_wildCopy8 does not affect the compression speed.
174
*/
175
#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__)
176
# define LZ4_FORCE_O2 __attribute__((optimize("O2")))
177
# undef LZ4_FORCE_INLINE
178
# define LZ4_FORCE_INLINE static __inline __attribute__((optimize("O2"),always_inline))
179
#else
180
# define LZ4_FORCE_O2
181
#endif
182
183
#ifndef expect
184
#if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__)
185
# define expect(expr,value) (__builtin_expect ((expr),(value)) )
186
#else
187
# define expect(expr,value) (expr)
188
#endif
189
#endif
190
191
#ifndef likely
192
#define likely(expr) expect((expr) != 0, 1)
193
#endif
194
195
#ifndef unlikely
196
#define unlikely(expr) expect((expr) != 0, 0)
197
#endif
198
199
#ifndef _KERNEL
200
#include <stdlib.h> /* malloc, calloc, free */
201
#include <string.h> /* memset, memcpy */
202
#endif
203
#define ALLOC(s) malloc(s)
204
#define ALLOC_AND_ZERO(s) calloc(1,s)
205
#define FREEMEM(p) free(p)
206
207
#define MEM_INIT(p,v,s) memset((p),(v),(s))
208
209
210
/*-************************************
211
* Common Constants
212
**************************************/
213
#define MINMATCH 4
214
215
#define WILDCOPYLENGTH 8
216
#define LASTLITERALS 5 /* see ../doc/lz4_Block_format.md#parsing-restrictions */
217
#define MFLIMIT 12 /* see ../doc/lz4_Block_format.md#parsing-restrictions */
218
#define MATCH_SAFEGUARD_DISTANCE ((2*WILDCOPYLENGTH) - MINMATCH) /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */
219
#define FASTLOOP_SAFE_DISTANCE 64
220
221
#define KB *(1 <<10)
222
#define MB *(1 <<20)
223
#define GB *(1U<<30)
224
225
#ifndef LZ4_DISTANCE_MAX /* history window size; can be user-defined at compile time */
226
# define LZ4_DISTANCE_MAX 65535 /* set to maximum value by default */
227
#endif
228
229
#define LZ4_DISTANCE_ABSOLUTE_MAX 65535
230
#if (LZ4_DISTANCE_MAX > LZ4_DISTANCE_ABSOLUTE_MAX) /* max supported by LZ4 format */
231
# error "LZ4_DISTANCE_MAX is too big : must be <= 65535"
232
#endif
233
234
#define ML_BITS 4
235
#define ML_MASK ((1U<<ML_BITS)-1)
236
#define RUN_BITS (8-ML_BITS)
237
#define RUN_MASK ((1U<<RUN_BITS)-1)
238
239
#define DEBUGLOG(l, ...) {} /* disabled */
240
241
#ifndef assert
242
#define assert ASSERT
243
#endif
244
245
/*-************************************
246
* Types
247
**************************************/
248
#ifndef _KERNEL
249
#include <limits.h>
250
#endif
251
#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
252
#ifndef _KERNEL
253
#include <stdint.h>
254
#endif
255
typedef uint8_t BYTE;
256
typedef uint16_t U16;
257
typedef uint32_t U32;
258
typedef int32_t S32;
259
typedef uint64_t U64;
260
typedef uintptr_t uptrval;
261
#else
262
# if UINT_MAX != 4294967295UL
263
# error "LZ4 code (when not C++ or C99) assumes that sizeof(int) == 4"
264
# endif
265
typedef unsigned char BYTE;
266
typedef unsigned short U16;
267
typedef unsigned int U32;
268
typedef signed int S32;
269
typedef unsigned long long U64;
270
typedef size_t uptrval; /* generally true, except OpenVMS-64 */
271
#endif
272
273
#if defined(__x86_64__)
274
typedef U64 reg_t; /* 64-bits in x32 mode */
275
#else
276
typedef size_t reg_t; /* 32-bits in x32 mode */
277
#endif
278
279
typedef enum {
280
notLimited = 0,
281
limitedOutput = 1,
282
fillOutput = 2
283
} limitedOutput_directive;
284
285
286
/*-************************************
287
* Reading and writing into memory
288
**************************************/
289
290
/**
291
* LZ4 relies on memcpy with a constant size being inlined. In freestanding
292
* environments, the compiler can't assume the implementation of memcpy() is
293
* standard compliant, so it can't apply its specialized memcpy() inlining
294
* logic. When possible, use __builtin_memcpy() to tell the compiler to analyze
295
* memcpy() as if it were standard compliant, so it can inline it in freestanding
296
* environments. This is needed when decompressing the Linux Kernel, for example.
297
*/
298
#if defined(__GNUC__) && (__GNUC__ >= 4)
299
#define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size)
300
#else
301
#define LZ4_memcpy(dst, src, size) memcpy(dst, src, size)
302
#endif
303
304
static unsigned LZ4_isLittleEndian(void)
305
{
306
const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
307
return one.c[0];
308
}
309
310
311
#if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2)
312
/* lie to the compiler about data alignment; use with caution */
313
314
static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; }
315
316
static void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
317
static void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
318
319
#elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1)
320
321
/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
322
/* currently only defined for gcc and icc */
323
typedef union { U16 u16; U32 u32; reg_t uArch; } __attribute__((packed)) unalign;
324
325
static U16 LZ4_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
326
327
static void LZ4_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
328
329
#else /* safe and portable access using memcpy() */
330
331
static U16 LZ4_read16(const void* memPtr)
332
{
333
U16 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
334
}
335
336
static void LZ4_write32(void* memPtr, U32 value)
337
{
338
LZ4_memcpy(memPtr, &value, sizeof(value));
339
}
340
341
#endif /* LZ4_FORCE_MEMORY_ACCESS */
342
343
static U16 LZ4_readLE16(const void* memPtr)
344
{
345
if (LZ4_isLittleEndian()) {
346
return LZ4_read16(memPtr);
347
} else {
348
const BYTE* p = (const BYTE*)memPtr;
349
return (U16)((U16)p[0] + (p[1]<<8));
350
}
351
}
352
353
/* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */
354
LZ4_FORCE_INLINE
355
void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd)
356
{
357
BYTE* d = (BYTE*)dstPtr;
358
const BYTE* s = (const BYTE*)srcPtr;
359
BYTE* const e = (BYTE*)dstEnd;
360
361
do { LZ4_memcpy(d,s,8); d+=8; s+=8; } while (d<e);
362
}
363
364
static const unsigned inc32table[8] = {0, 1, 2, 1, 0, 4, 4, 4};
365
static const int dec64table[8] = {0, 0, 0, -1, -4, 1, 2, 3};
366
367
368
#ifndef LZ4_FAST_DEC_LOOP
369
# if defined __i386__ || defined _M_IX86 || defined __x86_64__ || defined _M_X64
370
# define LZ4_FAST_DEC_LOOP 1
371
# elif defined(__aarch64__) && !defined(__clang__)
372
/* On aarch64, we disable this optimization for clang because on certain
373
* mobile chipsets, performance is reduced with clang. For information
374
* refer to https://github.com/lz4/lz4/pull/707 */
375
# define LZ4_FAST_DEC_LOOP 1
376
# else
377
# define LZ4_FAST_DEC_LOOP 0
378
# endif
379
#endif
380
381
#if LZ4_FAST_DEC_LOOP
382
383
LZ4_FORCE_INLINE void
384
LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
385
{
386
assert(srcPtr + offset == dstPtr);
387
if (offset < 8) {
388
LZ4_write32(dstPtr, 0); /* silence an msan warning when offset==0 */
389
dstPtr[0] = srcPtr[0];
390
dstPtr[1] = srcPtr[1];
391
dstPtr[2] = srcPtr[2];
392
dstPtr[3] = srcPtr[3];
393
srcPtr += inc32table[offset];
394
LZ4_memcpy(dstPtr+4, srcPtr, 4);
395
srcPtr -= dec64table[offset];
396
dstPtr += 8;
397
} else {
398
LZ4_memcpy(dstPtr, srcPtr, 8);
399
dstPtr += 8;
400
srcPtr += 8;
401
}
402
403
LZ4_wildCopy8(dstPtr, srcPtr, dstEnd);
404
}
405
406
/* customized variant of memcpy, which can overwrite up to 32 bytes beyond dstEnd
407
* this version copies two times 16 bytes (instead of one time 32 bytes)
408
* because it must be compatible with offsets >= 16. */
409
LZ4_FORCE_INLINE void
410
LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd)
411
{
412
BYTE* d = (BYTE*)dstPtr;
413
const BYTE* s = (const BYTE*)srcPtr;
414
BYTE* const e = (BYTE*)dstEnd;
415
416
do { LZ4_memcpy(d,s,16); LZ4_memcpy(d+16,s+16,16); d+=32; s+=32; } while (d<e);
417
}
418
419
/* LZ4_memcpy_using_offset() presumes :
420
* - dstEnd >= dstPtr + MINMATCH
421
* - there is at least 8 bytes available to write after dstEnd */
422
LZ4_FORCE_INLINE void
423
LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
424
{
425
BYTE v[8];
426
427
assert(dstEnd >= dstPtr + MINMATCH);
428
429
switch(offset) {
430
case 1:
431
MEM_INIT(v, *srcPtr, 8);
432
break;
433
case 2:
434
LZ4_memcpy(v, srcPtr, 2);
435
LZ4_memcpy(&v[2], srcPtr, 2);
436
LZ4_memcpy(&v[4], v, 4);
437
break;
438
case 4:
439
LZ4_memcpy(v, srcPtr, 4);
440
LZ4_memcpy(&v[4], srcPtr, 4);
441
break;
442
default:
443
LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset);
444
return;
445
}
446
447
LZ4_memcpy(dstPtr, v, 8);
448
dstPtr += 8;
449
while (dstPtr < dstEnd) {
450
LZ4_memcpy(dstPtr, v, 8);
451
dstPtr += 8;
452
}
453
}
454
#endif
455
456
457
/*-************************************
458
* Local Structures and types
459
**************************************/
460
typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t;
461
462
/**
463
* This enum distinguishes several different modes of accessing previous
464
* content in the stream.
465
*
466
* - noDict : There is no preceding content.
467
* - withPrefix64k : Table entries up to ctx->dictSize before the current blob
468
* blob being compressed are valid and refer to the preceding
469
* content (of length ctx->dictSize), which is available
470
* contiguously preceding in memory the content currently
471
* being compressed.
472
* - usingExtDict : Like withPrefix64k, but the preceding content is somewhere
473
* else in memory, starting at ctx->dictionary with length
474
* ctx->dictSize.
475
* - usingDictCtx : Like usingExtDict, but everything concerning the preceding
476
* content is in a separate context, pointed to by
477
* ctx->dictCtx. ctx->dictionary, ctx->dictSize, and table
478
* entries in the current context that refer to positions
479
* preceding the beginning of the current compression are
480
* ignored. Instead, ctx->dictCtx->dictionary and ctx->dictCtx
481
* ->dictSize describe the location and size of the preceding
482
* content, and matches are found by looking in the ctx
483
* ->dictCtx->hashTable.
484
*/
485
typedef enum { noDict = 0, withPrefix64k, usingExtDict, usingDictCtx } dict_directive;
486
typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
487
488
/*-*******************************
489
* Decompression functions
490
********************************/
491
492
typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
493
typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive;
494
495
typedef enum { loop_error = -2, initial_error = -1, ok = 0 } variable_length_error;
496
497
LZ4_FORCE_INLINE unsigned
498
read_variable_length(const BYTE**ip, const BYTE* lencheck,
499
int loop_check, int initial_check,
500
variable_length_error* error)
501
{
502
U32 length = 0;
503
U32 s;
504
if (initial_check && unlikely((*ip) >= lencheck)) { /* overflow detection */
505
*error = initial_error;
506
return length;
507
}
508
do {
509
s = **ip;
510
(*ip)++;
511
length += s;
512
if (loop_check && unlikely((*ip) >= lencheck)) { /* overflow detection */
513
*error = loop_error;
514
return length;
515
}
516
} while (s==255);
517
518
return length;
519
}
520
521
#define LZ4_STATIC_ASSERT(c) ASSERT(c)
522
523
524
/*! LZ4_decompress_generic() :
525
* This generic decompression function covers all use cases.
526
* It shall be instantiated several times, using different sets of directives.
527
* Note that it is important for performance that this function really get inlined,
528
* in order to remove useless branches during compilation optimization.
529
*/
530
LZ4_FORCE_INLINE int
531
LZ4_decompress_generic(
532
const char* const src,
533
char* const dst,
534
int srcSize,
535
int outputSize, /* If endOnInput==endOnInputSize, this value is `dstCapacity` */
536
537
endCondition_directive endOnInput, /* endOnOutputSize, endOnInputSize */
538
earlyEnd_directive partialDecoding, /* full, partial */
539
dict_directive dict, /* noDict, withPrefix64k, usingExtDict */
540
const BYTE* const lowPrefix, /* always <= dst, == dst when no prefix */
541
const BYTE* const dictStart, /* only if dict==usingExtDict */
542
const size_t dictSize /* note : = 0 if noDict */
543
)
544
{
545
if ((src == NULL) || (outputSize < 0)) { return -1; }
546
547
{ const BYTE* ip = (const BYTE*) src;
548
const BYTE* const iend = ip + srcSize;
549
550
BYTE* op = (BYTE*) dst;
551
BYTE* const oend = op + outputSize;
552
BYTE* cpy;
553
554
const BYTE* const dictEnd = (dictStart == NULL) ? NULL : dictStart + dictSize;
555
556
const int safeDecode = (endOnInput==endOnInputSize);
557
const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB)));
558
559
560
/* Set up the "end" pointers for the shortcut. */
561
const BYTE* const shortiend = iend - (endOnInput ? 14 : 8) /*maxLL*/ - 2 /*offset*/;
562
const BYTE* const shortoend = oend - (endOnInput ? 14 : 8) /*maxLL*/ - 18 /*maxML*/;
563
564
const BYTE* match;
565
size_t offset;
566
unsigned token;
567
size_t length;
568
569
570
DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize);
571
572
/* Special cases */
573
assert(lowPrefix <= op);
574
if ((endOnInput) && (unlikely(outputSize==0))) {
575
/* Empty output buffer */
576
if (partialDecoding) return 0;
577
return ((srcSize==1) && (*ip==0)) ? 0 : -1;
578
}
579
if ((!endOnInput) && (unlikely(outputSize==0))) { return (*ip==0 ? 1 : -1); }
580
if ((endOnInput) && unlikely(srcSize==0)) { return -1; }
581
582
/* Currently the fast loop shows a regression on qualcomm arm chips. */
583
#if LZ4_FAST_DEC_LOOP
584
if ((oend - op) < FASTLOOP_SAFE_DISTANCE) {
585
DEBUGLOG(6, "skip fast decode loop");
586
goto safe_decode;
587
}
588
589
/* Fast loop : decode sequences as long as output < iend-FASTLOOP_SAFE_DISTANCE */
590
while (1) {
591
/* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */
592
assert(oend - op >= FASTLOOP_SAFE_DISTANCE);
593
if (endOnInput) { assert(ip < iend); }
594
token = *ip++;
595
length = token >> ML_BITS; /* literal length */
596
597
assert(!endOnInput || ip <= iend); /* ip < iend before the increment */
598
599
/* decode literal length */
600
if (length == RUN_MASK) {
601
variable_length_error error = ok;
602
length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error);
603
if (error == initial_error) { goto _output_error; }
604
if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
605
if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
606
607
/* copy literals */
608
cpy = op+length;
609
LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
610
if (endOnInput) { /* LZ4_decompress_safe() */
611
if ((cpy>oend-32) || (ip+length>iend-32)) { goto safe_literal_copy; }
612
LZ4_wildCopy32(op, ip, cpy);
613
} else { /* LZ4_decompress_fast() */
614
if (cpy>oend-8) { goto safe_literal_copy; }
615
LZ4_wildCopy8(op, ip, cpy); /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time :
616
* it doesn't know input length, and only relies on end-of-block properties */
617
}
618
ip += length; op = cpy;
619
} else {
620
cpy = op+length;
621
if (endOnInput) { /* LZ4_decompress_safe() */
622
DEBUGLOG(7, "copy %u bytes in a 16-bytes stripe", (unsigned)length);
623
/* We don't need to check oend, since we check it once for each loop below */
624
if (ip > iend-(16 + 1/*max lit + offset + nextToken*/)) { goto safe_literal_copy; }
625
/* Literals can only be 14, but hope compilers optimize if we copy by a register size */
626
LZ4_memcpy(op, ip, 16);
627
} else { /* LZ4_decompress_fast() */
628
/* LZ4_decompress_fast() cannot copy more than 8 bytes at a time :
629
* it doesn't know input length, and relies on end-of-block properties */
630
LZ4_memcpy(op, ip, 8);
631
if (length > 8) { LZ4_memcpy(op+8, ip+8, 8); }
632
}
633
ip += length; op = cpy;
634
}
635
636
/* get offset */
637
offset = LZ4_readLE16(ip); ip+=2;
638
match = op - offset;
639
assert(match <= op);
640
641
/* get matchlength */
642
length = token & ML_MASK;
643
644
if (length == ML_MASK) {
645
variable_length_error error = ok;
646
if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
647
length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error);
648
if (error != ok) { goto _output_error; }
649
if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } /* overflow detection */
650
length += MINMATCH;
651
if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
652
goto safe_match_copy;
653
}
654
} else {
655
length += MINMATCH;
656
if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
657
goto safe_match_copy;
658
}
659
660
/* Fastpath check: Avoids a branch in LZ4_wildCopy32 if true */
661
if ((dict == withPrefix64k) || (match >= lowPrefix)) {
662
if (offset >= 8) {
663
assert(match >= lowPrefix);
664
assert(match <= op);
665
assert(op + 18 <= oend);
666
667
LZ4_memcpy(op, match, 8);
668
LZ4_memcpy(op+8, match+8, 8);
669
LZ4_memcpy(op+16, match+16, 2);
670
op += length;
671
continue;
672
} } }
673
674
if (checkOffset && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
675
/* match starting within external dictionary */
676
if ((dict==usingExtDict) && (match < lowPrefix)) {
677
if (unlikely(op+length > oend-LASTLITERALS)) {
678
if (partialDecoding) {
679
DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd");
680
length = MIN(length, (size_t)(oend-op));
681
} else {
682
goto _output_error; /* end-of-block condition violated */
683
} }
684
685
if (length <= (size_t)(lowPrefix-match)) {
686
/* match fits entirely within external dictionary : just copy */
687
memmove(op, dictEnd - (lowPrefix-match), length);
688
op += length;
689
} else {
690
/* match stretches into both external dictionary and current block */
691
size_t const copySize = (size_t)(lowPrefix - match);
692
size_t const restSize = length - copySize;
693
LZ4_memcpy(op, dictEnd - copySize, copySize);
694
op += copySize;
695
if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */
696
BYTE* const endOfMatch = op + restSize;
697
const BYTE* copyFrom = lowPrefix;
698
while (op < endOfMatch) { *op++ = *copyFrom++; }
699
} else {
700
LZ4_memcpy(op, lowPrefix, restSize);
701
op += restSize;
702
} }
703
continue;
704
}
705
706
/* copy match within block */
707
cpy = op + length;
708
709
assert((op <= oend) && (oend-op >= 32));
710
if (unlikely(offset<16)) {
711
LZ4_memcpy_using_offset(op, match, cpy, offset);
712
} else {
713
LZ4_wildCopy32(op, match, cpy);
714
}
715
716
op = cpy; /* wildcopy correction */
717
}
718
safe_decode:
719
#endif
720
721
/* Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE */
722
while (1) {
723
token = *ip++;
724
length = token >> ML_BITS; /* literal length */
725
726
assert(!endOnInput || ip <= iend); /* ip < iend before the increment */
727
728
/* A two-stage shortcut for the most common case:
729
* 1) If the literal length is 0..14, and there is enough space,
730
* enter the shortcut and copy 16 bytes on behalf of the literals
731
* (in the fast mode, only 8 bytes can be safely copied this way).
732
* 2) Further if the match length is 4..18, copy 18 bytes in a similar
733
* manner; but we ensure that there's enough space in the output for
734
* those 18 bytes earlier, upon entering the shortcut (in other words,
735
* there is a combined check for both stages).
736
*/
737
if ( (endOnInput ? length != RUN_MASK : length <= 8)
738
/* strictly "less than" on input, to re-enter the loop with at least one byte */
739
&& likely((endOnInput ? ip < shortiend : 1) & (op <= shortoend)) ) {
740
/* Copy the literals */
741
LZ4_memcpy(op, ip, endOnInput ? 16 : 8);
742
op += length; ip += length;
743
744
/* The second stage: prepare for match copying, decode full info.
745
* If it doesn't work out, the info won't be wasted. */
746
length = token & ML_MASK; /* match length */
747
offset = LZ4_readLE16(ip); ip += 2;
748
match = op - offset;
749
assert(match <= op); /* check overflow */
750
751
/* Do not deal with overlapping matches. */
752
if ( (length != ML_MASK)
753
&& (offset >= 8)
754
&& (dict==withPrefix64k || match >= lowPrefix) ) {
755
/* Copy the match. */
756
LZ4_memcpy(op + 0, match + 0, 8);
757
LZ4_memcpy(op + 8, match + 8, 8);
758
LZ4_memcpy(op +16, match +16, 2);
759
op += length + MINMATCH;
760
/* Both stages worked, load the next token. */
761
continue;
762
}
763
764
/* The second stage didn't work out, but the info is ready.
765
* Propel it right to the point of match copying. */
766
goto _copy_match;
767
}
768
769
/* decode literal length */
770
if (length == RUN_MASK) {
771
variable_length_error error = ok;
772
length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error);
773
if (error == initial_error) { goto _output_error; }
774
if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
775
if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
776
}
777
778
/* copy literals */
779
cpy = op+length;
780
#if LZ4_FAST_DEC_LOOP
781
safe_literal_copy:
782
#endif
783
LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
784
if ( ((endOnInput) && ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) )
785
|| ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) )
786
{
787
/* We've either hit the input parsing restriction or the output parsing restriction.
788
* In the normal scenario, decoding a full block, it must be the last sequence,
789
* otherwise it's an error (invalid input or dimensions).
790
* In partialDecoding scenario, it's necessary to ensure there is no buffer overflow.
791
*/
792
if (partialDecoding) {
793
/* Since we are partial decoding we may be in this block because of the output parsing
794
* restriction, which is not valid since the output buffer is allowed to be undersized.
795
*/
796
assert(endOnInput);
797
DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end")
798
DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length);
799
DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op));
800
DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip));
801
/* Finishing in the middle of a literals segment,
802
* due to lack of input.
803
*/
804
if (ip+length > iend) {
805
length = (size_t)(iend-ip);
806
cpy = op + length;
807
}
808
/* Finishing in the middle of a literals segment,
809
* due to lack of output space.
810
*/
811
if (cpy > oend) {
812
cpy = oend;
813
assert(op<=oend);
814
length = (size_t)(oend-op);
815
}
816
} else {
817
/* We must be on the last sequence because of the parsing limitations so check
818
* that we exactly regenerate the original size (must be exact when !endOnInput).
819
*/
820
if ((!endOnInput) && (cpy != oend)) { goto _output_error; }
821
/* We must be on the last sequence (or invalid) because of the parsing limitations
822
* so check that we exactly consume the input and don't overrun the output buffer.
823
*/
824
if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) {
825
DEBUGLOG(6, "should have been last run of literals")
826
DEBUGLOG(6, "ip(%p) + length(%i) = %p != iend (%p)", ip, (int)length, ip+length, iend);
827
DEBUGLOG(6, "or cpy(%p) > oend(%p)", cpy, oend);
828
goto _output_error;
829
}
830
}
831
memmove(op, ip, length); /* supports overlapping memory regions; only matters for in-place decompression scenarios */
832
ip += length;
833
op += length;
834
/* Necessarily EOF when !partialDecoding.
835
* When partialDecoding, it is EOF if we've either
836
* filled the output buffer or
837
* can't proceed with reading an offset for following match.
838
*/
839
if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) {
840
break;
841
}
842
} else {
843
LZ4_wildCopy8(op, ip, cpy); /* may overwrite up to WILDCOPYLENGTH beyond cpy */
844
ip += length; op = cpy;
845
}
846
847
/* get offset */
848
offset = LZ4_readLE16(ip); ip+=2;
849
match = op - offset;
850
851
/* get matchlength */
852
length = token & ML_MASK;
853
854
_copy_match:
855
if (length == ML_MASK) {
856
variable_length_error error = ok;
857
length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error);
858
if (error != ok) goto _output_error;
859
if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error; /* overflow detection */
860
}
861
length += MINMATCH;
862
863
#if LZ4_FAST_DEC_LOOP
864
safe_match_copy:
865
#endif
866
if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error; /* Error : offset outside buffers */
867
/* match starting within external dictionary */
868
if ((dict==usingExtDict) && (match < lowPrefix)) {
869
if (unlikely(op+length > oend-LASTLITERALS)) {
870
if (partialDecoding) length = MIN(length, (size_t)(oend-op));
871
else goto _output_error; /* doesn't respect parsing restriction */
872
}
873
874
if (length <= (size_t)(lowPrefix-match)) {
875
/* match fits entirely within external dictionary : just copy */
876
memmove(op, dictEnd - (lowPrefix-match), length);
877
op += length;
878
} else {
879
/* match stretches into both external dictionary and current block */
880
size_t const copySize = (size_t)(lowPrefix - match);
881
size_t const restSize = length - copySize;
882
LZ4_memcpy(op, dictEnd - copySize, copySize);
883
op += copySize;
884
if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */
885
BYTE* const endOfMatch = op + restSize;
886
const BYTE* copyFrom = lowPrefix;
887
while (op < endOfMatch) *op++ = *copyFrom++;
888
} else {
889
LZ4_memcpy(op, lowPrefix, restSize);
890
op += restSize;
891
} }
892
continue;
893
}
894
assert(match >= lowPrefix);
895
896
/* copy match within block */
897
cpy = op + length;
898
899
/* partialDecoding : may end anywhere within the block */
900
assert(op<=oend);
901
if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
902
size_t const mlen = MIN(length, (size_t)(oend-op));
903
const BYTE* const matchEnd = match + mlen;
904
BYTE* const copyEnd = op + mlen;
905
if (matchEnd > op) { /* overlap copy */
906
while (op < copyEnd) { *op++ = *match++; }
907
} else {
908
LZ4_memcpy(op, match, mlen);
909
}
910
op = copyEnd;
911
if (op == oend) { break; }
912
continue;
913
}
914
915
if (unlikely(offset<8)) {
916
LZ4_write32(op, 0); /* silence msan warning when offset==0 */
917
op[0] = match[0];
918
op[1] = match[1];
919
op[2] = match[2];
920
op[3] = match[3];
921
match += inc32table[offset];
922
LZ4_memcpy(op+4, match, 4);
923
match -= dec64table[offset];
924
} else {
925
LZ4_memcpy(op, match, 8);
926
match += 8;
927
}
928
op += 8;
929
930
if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
931
BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1);
932
if (cpy > oend-LASTLITERALS) { goto _output_error; } /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
933
if (op < oCopyLimit) {
934
LZ4_wildCopy8(op, match, oCopyLimit);
935
match += oCopyLimit - op;
936
op = oCopyLimit;
937
}
938
while (op < cpy) { *op++ = *match++; }
939
} else {
940
LZ4_memcpy(op, match, 8);
941
if (length > 16) { LZ4_wildCopy8(op+8, match+8, cpy); }
942
}
943
op = cpy; /* wildcopy correction */
944
}
945
946
/* end of decoding */
947
if (endOnInput) {
948
DEBUGLOG(5, "decoded %i bytes", (int) (((char*)op)-dst));
949
return (int) (((char*)op)-dst); /* Nb of output bytes decoded */
950
} else {
951
return (int) (((const char*)ip)-src); /* Nb of input bytes read */
952
}
953
954
/* Overflow error detected */
955
_output_error:
956
return (int) (-(((const char*)ip)-src))-1;
957
}
958
}
959
960
/*
961
* LZ4_uncompress_unknownOutputSize() :
962
* isize : is the input size, therefore the compressed size
963
* maxOutputSize : is the size of the destination buffer (which must be
964
* already allocated)
965
* return : the number of bytes decoded in the destination buffer
966
* (necessarily <= maxOutputSize). If the source stream is
967
* malformed, the function will stop decoding and return a
968
* negative result, indicating the byte position of the faulty
969
* instruction. This function never writes beyond dest +
970
* maxOutputSize, and is therefore protected against malicious
971
* data packets.
972
* note : Destination buffer must be already allocated.
973
* This version is slightly slower than real_LZ4_uncompress()
974
*
975
*/
976
977
/*
978
* Note: In upstream code, LZ4_uncompress_unknownOutputSize is now a legacy
979
* wrapper for LZ4_decompress_safe which is a wrapper for
980
* LZ4_decompress_generic; this wrapper flattens that, rather than
981
* rewriting the callers.
982
*/
983
int LZ4_uncompress_unknownOutputSize(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
984
{
985
return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize,
986
endOnInputSize, decode_full_block, noDict,
987
(BYTE*)dest, NULL, 0);
988
}
989
990