CoCalc -- lz4.c

GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/contrib/openzfs/module/zfs/lz4.c
⁴⁸³⁸³ views
1
// SPDX-License-Identifier: BSD-2-Clause
2
/*
3
   LZ4 - Fast LZ compression algorithm
4
   Copyright (C) 2011-present, Yann Collet.
5

6
   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7

8
   Redistribution and use in source and binary forms, with or without
9
   modification, are permitted provided that the following conditions are
10
   met:
11

12
       * Redistributions of source code must retain the above copyright
13
   notice, this list of conditions and the following disclaimer.
14
       * Redistributions in binary form must reproduce the above
15
   copyright notice, this list of conditions and the following disclaimer
16
   in the documentation and/or other materials provided with the
17
   distribution.
18

19
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20
   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21
   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22
   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23
   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24
   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25
   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26
   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27
   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28
   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30

31
   You can contact the author at :
32
    - LZ4 homepage : http://www.lz4.org
33
    - LZ4 source repository : https://github.com/lz4/lz4
34
*/
35

36
/*
37
 * This file contains unmodified code from lz4 1.9.3's decompressor, plus
38
 * associated macros and constants.
39
 *
40
 * It also contains a couple of defines from the old lz4.c to make things
41
 * fit together smoothly.
42
 *
43
 */
44

45
#include <sys/zfs_context.h>
46

47
int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
48
    int isize, int maxOutputSize);
49

50
/*
51
 * Tuning parameters
52
 */
53

54
/*
55
 * COMPRESSIONLEVEL: Increasing this value improves compression ratio
56
 *	 Lowering this value reduces memory usage. Reduced memory usage
57
 *	typically improves speed, due to cache effect (ex: L1 32KB for Intel,
58
 *	L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
59
 *	(examples : 12 -> 16KB ; 17 -> 512KB)
60
 */
61
#define	COMPRESSIONLEVEL 12
62

63
/*
64
 * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
65
 *	algorithm skip faster data segments considered "incompressible".
66
 *	This may decrease compression ratio dramatically, but will be
67
 *	faster on incompressible data. Increasing this value will make
68
 *	the algorithm search more before declaring a segment "incompressible".
69
 *	This could improve compression a bit, but will be slower on
70
 *	incompressible data. The default value (6) is recommended.
71
 */
72
#define	NOTCOMPRESSIBLE_CONFIRMATION 6
73

74
/*
75
 * Little Endian or Big Endian?
76
 * Note: overwrite the below #define if you know your architecture endianness.
77
 */
78
#if defined(_ZFS_BIG_ENDIAN)
79
#define	LZ4_BIG_ENDIAN 1
80
#else
81
/*
82
 * Little Endian assumed. PDP Endian and other very rare endian format
83
 * are unsupported.
84
 */
85
#undef LZ4_BIG_ENDIAN
86
#endif
87

88
/*-************************************
89
*  CPU Feature Detection
90
**************************************/
91
/* LZ4_FORCE_MEMORY_ACCESS
92
 * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
93
 * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
94
 * The below switch allow to select different access method for improved performance.
95
 * Method 0 (default) : use `memcpy()`. Safe and portable.
96
 * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
97
 *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
98
 * Method 2 : direct access. This method is portable but violate C standard.
99
 *            It can generate buggy code on targets which assembly generation depends on alignment.
100
 *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
101
 * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
102
 * Prefer these methods in priority order (0 > 1 > 2)
103
 */
104
#ifndef LZ4_FORCE_MEMORY_ACCESS   /* can be defined externally */
105
#  if defined(__GNUC__) && \
106
  ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) \
107
  || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
108
#    define LZ4_FORCE_MEMORY_ACCESS 2
109
#  elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__)
110
#    define LZ4_FORCE_MEMORY_ACCESS 1
111
#  endif
112
#endif
113

114
/*
115
 * LZ4_FORCE_SW_BITCOUNT
116
 * Define this parameter if your target system or compiler does not support hardware bit count
117
 */
118
/*
119
 * Illumos : we can't use GCC's __builtin_ctz family of builtins in the
120
 * kernel
121
 * Linux : we can use GCC's __builtin_ctz family of builtins in the
122
 * kernel
123
 */
124
#undef	LZ4_FORCE_SW_BITCOUNT
125
#if defined(__sunos__)
126
#define	LZ4_FORCE_SW_BITCOUNT
127
#endif
128

129
/*
130
 * Compiler Options
131
 */
132
/* Disable restrict */
133
#define	restrict
134

135
/*
136
 * Linux : GCC_VERSION is defined as of 3.9-rc1, so undefine it.
137
 * torvalds/linux@3f3f8d2f48acfd8ed3b8e6b7377935da57b27b16
138
 */
139
#ifdef GCC_VERSION
140
#undef GCC_VERSION
141
#endif
142

143
#define	GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
144

145
#ifndef LZ4_FORCE_INLINE
146
#  ifdef _MSC_VER    /* Visual Studio */
147
#    define LZ4_FORCE_INLINE static __forceinline
148
#  else
149
#    if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
150
#      ifdef __GNUC__
151
#        define LZ4_FORCE_INLINE static inline __attribute__((always_inline))
152
#      else
153
#        define LZ4_FORCE_INLINE static inline
154
#      endif
155
#    else
156
#      define LZ4_FORCE_INLINE static
157
#    endif /* __STDC_VERSION__ */
158
#  endif  /* _MSC_VER */
159
#endif /* LZ4_FORCE_INLINE */
160

161
/* LZ4_FORCE_O2 and LZ4_FORCE_INLINE
162
 * gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8,
163
 * together with a simple 8-byte copy loop as a fall-back path.
164
 * However, this optimization hurts the decompression speed by >30%,
165
 * because the execution does not go to the optimized loop
166
 * for typical compressible data, and all of the preamble checks
167
 * before going to the fall-back path become useless overhead.
168
 * This optimization happens only with the -O3 flag, and -O2 generates
169
 * a simple 8-byte copy loop.
170
 * With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy8
171
 * functions are annotated with __attribute__((optimize("O2"))),
172
 * and also LZ4_wildCopy8 is forcibly inlined, so that the O2 attribute
173
 * of LZ4_wildCopy8 does not affect the compression speed.
174
 */
175
#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__)
176
#  define LZ4_FORCE_O2  __attribute__((optimize("O2")))
177
#  undef LZ4_FORCE_INLINE
178
#  define LZ4_FORCE_INLINE  static __inline __attribute__((optimize("O2"),always_inline))
179
#else
180
#  define LZ4_FORCE_O2
181
#endif
182

183
#ifndef expect
184
#if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__)
185
#  define expect(expr,value)    (__builtin_expect ((expr),(value)) )
186
#else
187
#  define expect(expr,value)    (expr)
188
#endif
189
#endif
190

191
#ifndef likely
192
#define	likely(expr)	expect((expr) != 0, 1)
193
#endif
194

195
#ifndef unlikely
196
#define	unlikely(expr)	expect((expr) != 0, 0)
197
#endif
198

199
#ifndef _KERNEL
200
#include <stdlib.h>   /* malloc, calloc, free */
201
#include <string.h>   /* memset, memcpy */
202
#endif
203
#define ALLOC(s)          malloc(s)
204
#define ALLOC_AND_ZERO(s) calloc(1,s)
205
#define FREEMEM(p)        free(p)
206

207
#define MEM_INIT(p,v,s)   memset((p),(v),(s))
208

209

210
/*-************************************
211
*  Common Constants
212
**************************************/
213
#define MINMATCH 4
214

215
#define WILDCOPYLENGTH 8
216
#define LASTLITERALS   5   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
217
#define MFLIMIT       12   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
218
#define MATCH_SAFEGUARD_DISTANCE  ((2*WILDCOPYLENGTH) - MINMATCH)   /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */
219
#define FASTLOOP_SAFE_DISTANCE 64
220

221
#define KB *(1 <<10)
222
#define MB *(1 <<20)
223
#define GB *(1U<<30)
224

225
#ifndef LZ4_DISTANCE_MAX   /* history window size; can be user-defined at compile time */
226
#  define LZ4_DISTANCE_MAX 65535   /* set to maximum value by default */
227
#endif
228

229
#define LZ4_DISTANCE_ABSOLUTE_MAX 65535
230
#if (LZ4_DISTANCE_MAX > LZ4_DISTANCE_ABSOLUTE_MAX)   /* max supported by LZ4 format */
231
#  error "LZ4_DISTANCE_MAX is too big : must be <= 65535"
232
#endif
233

234
#define ML_BITS  4
235
#define ML_MASK  ((1U<<ML_BITS)-1)
236
#define RUN_BITS (8-ML_BITS)
237
#define RUN_MASK ((1U<<RUN_BITS)-1)
238

239
#define DEBUGLOG(l, ...) {}    /* disabled */
240

241
#ifndef assert
242
#define assert ASSERT
243
#endif
244

245
/*-************************************
246
*  Types
247
**************************************/
248
#ifndef _KERNEL
249
#include <limits.h>
250
#endif
251
#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
252
#ifndef _KERNEL
253
#include <stdint.h>
254
#endif
255
  typedef  uint8_t BYTE;
256
  typedef uint16_t U16;
257
  typedef uint32_t U32;
258
  typedef  int32_t S32;
259
  typedef uint64_t U64;
260
  typedef uintptr_t uptrval;
261
#else
262
# if UINT_MAX != 4294967295UL
263
#   error "LZ4 code (when not C++ or C99) assumes that sizeof(int) == 4"
264
# endif
265
  typedef unsigned char       BYTE;
266
  typedef unsigned short      U16;
267
  typedef unsigned int        U32;
268
  typedef   signed int        S32;
269
  typedef unsigned long long  U64;
270
  typedef size_t              uptrval;   /* generally true, except OpenVMS-64 */
271
#endif
272

273
#if defined(__x86_64__)
274
  typedef U64    reg_t;   /* 64-bits in x32 mode */
275
#else
276
  typedef size_t reg_t;   /* 32-bits in x32 mode */
277
#endif
278

279
typedef enum {
280
    notLimited = 0,
281
    limitedOutput = 1,
282
    fillOutput = 2
283
} limitedOutput_directive;
284

285

286
/*-************************************
287
*  Reading and writing into memory
288
**************************************/
289

290
/**
291
 * LZ4 relies on memcpy with a constant size being inlined. In freestanding
292
 * environments, the compiler can't assume the implementation of memcpy() is
293
 * standard compliant, so it can't apply its specialized memcpy() inlining
294
 * logic. When possible, use __builtin_memcpy() to tell the compiler to analyze
295
 * memcpy() as if it were standard compliant, so it can inline it in freestanding
296
 * environments. This is needed when decompressing the Linux Kernel, for example.
297
 */
298
#if defined(__GNUC__) && (__GNUC__ >= 4)
299
#define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size)
300
#else
301
#define LZ4_memcpy(dst, src, size) memcpy(dst, src, size)
302
#endif
303

304
static unsigned LZ4_isLittleEndian(void)
305
{
306
    const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental */
307
    return one.c[0];
308
}
309

310

311
#if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2)
312
/* lie to the compiler about data alignment; use with caution */
313

314
static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; }
315

316
static void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
317
static void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
318

319
#elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1)
320

321
/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
322
/* currently only defined for gcc and icc */
323
typedef union { U16 u16; U32 u32; reg_t uArch; } __attribute__((packed)) unalign;
324

325
static U16 LZ4_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
326

327
static void LZ4_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
328

329
#else  /* safe and portable access using memcpy() */
330

331
static U16 LZ4_read16(const void* memPtr)
332
{
333
    U16 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
334
}
335

336
static void LZ4_write32(void* memPtr, U32 value)
337
{
338
    LZ4_memcpy(memPtr, &value, sizeof(value));
339
}
340

341
#endif /* LZ4_FORCE_MEMORY_ACCESS */
342

343
static U16 LZ4_readLE16(const void* memPtr)
344
{
345
    if (LZ4_isLittleEndian()) {
346
        return LZ4_read16(memPtr);
347
    } else {
348
        const BYTE* p = (const BYTE*)memPtr;
349
        return (U16)((U16)p[0] + (p[1]<<8));
350
    }
351
}
352

353
/* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */
354
LZ4_FORCE_INLINE
355
void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd)
356
{
357
    BYTE* d = (BYTE*)dstPtr;
358
    const BYTE* s = (const BYTE*)srcPtr;
359
    BYTE* const e = (BYTE*)dstEnd;
360

361
    do { LZ4_memcpy(d,s,8); d+=8; s+=8; } while (d<e);
362
}
363

364
static const unsigned inc32table[8] = {0, 1, 2,  1,  0,  4, 4, 4};
365
static const int      dec64table[8] = {0, 0, 0, -1, -4,  1, 2, 3};
366

367

368
#ifndef LZ4_FAST_DEC_LOOP
369
#  if defined __i386__ || defined _M_IX86 || defined __x86_64__ || defined _M_X64
370
#    define LZ4_FAST_DEC_LOOP 1
371
#  elif defined(__aarch64__) && !defined(__clang__)
372
     /* On aarch64, we disable this optimization for clang because on certain
373
      * mobile chipsets, performance is reduced with clang. For information
374
      * refer to https://github.com/lz4/lz4/pull/707 */
375
#    define LZ4_FAST_DEC_LOOP 1
376
#  else
377
#    define LZ4_FAST_DEC_LOOP 0
378
#  endif
379
#endif
380

381
#if LZ4_FAST_DEC_LOOP
382

383
LZ4_FORCE_INLINE void
384
LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
385
{
386
    assert(srcPtr + offset == dstPtr);
387
    if (offset < 8) {
388
        LZ4_write32(dstPtr, 0);   /* silence an msan warning when offset==0 */
389
        dstPtr[0] = srcPtr[0];
390
        dstPtr[1] = srcPtr[1];
391
        dstPtr[2] = srcPtr[2];
392
        dstPtr[3] = srcPtr[3];
393
        srcPtr += inc32table[offset];
394
        LZ4_memcpy(dstPtr+4, srcPtr, 4);
395
        srcPtr -= dec64table[offset];
396
        dstPtr += 8;
397
    } else {
398
        LZ4_memcpy(dstPtr, srcPtr, 8);
399
        dstPtr += 8;
400
        srcPtr += 8;
401
    }
402

403
    LZ4_wildCopy8(dstPtr, srcPtr, dstEnd);
404
}
405

406
/* customized variant of memcpy, which can overwrite up to 32 bytes beyond dstEnd
407
 * this version copies two times 16 bytes (instead of one time 32 bytes)
408
 * because it must be compatible with offsets >= 16. */
409
LZ4_FORCE_INLINE void
410
LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd)
411
{
412
    BYTE* d = (BYTE*)dstPtr;
413
    const BYTE* s = (const BYTE*)srcPtr;
414
    BYTE* const e = (BYTE*)dstEnd;
415

416
    do { LZ4_memcpy(d,s,16); LZ4_memcpy(d+16,s+16,16); d+=32; s+=32; } while (d<e);
417
}
418

419
/* LZ4_memcpy_using_offset()  presumes :
420
 * - dstEnd >= dstPtr + MINMATCH
421
 * - there is at least 8 bytes available to write after dstEnd */
422
LZ4_FORCE_INLINE void
423
LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
424
{
425
    BYTE v[8];
426

427
    assert(dstEnd >= dstPtr + MINMATCH);
428

429
    switch(offset) {
430
    case 1:
431
        MEM_INIT(v, *srcPtr, 8);
432
        break;
433
    case 2:
434
        LZ4_memcpy(v, srcPtr, 2);
435
        LZ4_memcpy(&v[2], srcPtr, 2);
436
        LZ4_memcpy(&v[4], v, 4);
437
        break;
438
    case 4:
439
        LZ4_memcpy(v, srcPtr, 4);
440
        LZ4_memcpy(&v[4], srcPtr, 4);
441
        break;
442
    default:
443
        LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset);
444
        return;
445
    }
446

447
    LZ4_memcpy(dstPtr, v, 8);
448
    dstPtr += 8;
449
    while (dstPtr < dstEnd) {
450
        LZ4_memcpy(dstPtr, v, 8);
451
        dstPtr += 8;
452
    }
453
}
454
#endif
455

456

457
/*-************************************
458
*  Local Structures and types
459
**************************************/
460
typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t;
461

462
/**
463
 * This enum distinguishes several different modes of accessing previous
464
 * content in the stream.
465
 *
466
 * - noDict        : There is no preceding content.
467
 * - withPrefix64k : Table entries up to ctx->dictSize before the current blob
468
 *                   blob being compressed are valid and refer to the preceding
469
 *                   content (of length ctx->dictSize), which is available
470
 *                   contiguously preceding in memory the content currently
471
 *                   being compressed.
472
 * - usingExtDict  : Like withPrefix64k, but the preceding content is somewhere
473
 *                   else in memory, starting at ctx->dictionary with length
474
 *                   ctx->dictSize.
475
 * - usingDictCtx  : Like usingExtDict, but everything concerning the preceding
476
 *                   content is in a separate context, pointed to by
477
 *                   ctx->dictCtx. ctx->dictionary, ctx->dictSize, and table
478
 *                   entries in the current context that refer to positions
479
 *                   preceding the beginning of the current compression are
480
 *                   ignored. Instead, ctx->dictCtx->dictionary and ctx->dictCtx
481
 *                   ->dictSize describe the location and size of the preceding
482
 *                   content, and matches are found by looking in the ctx
483
 *                   ->dictCtx->hashTable.
484
 */
485
typedef enum { noDict = 0, withPrefix64k, usingExtDict, usingDictCtx } dict_directive;
486
typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
487

488
/*-*******************************
489
 *  Decompression functions
490
 ********************************/
491

492
typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
493
typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive;
494

495
typedef enum { loop_error = -2, initial_error = -1, ok = 0 } variable_length_error;
496

497
LZ4_FORCE_INLINE unsigned
498
read_variable_length(const BYTE**ip, const BYTE* lencheck,
499
                     int loop_check, int initial_check,
500
                     variable_length_error* error)
501
{
502
    U32 length = 0;
503
    U32 s;
504
    if (initial_check && unlikely((*ip) >= lencheck)) {    /* overflow detection */
505
        *error = initial_error;
506
        return length;
507
    }
508
    do {
509
        s = **ip;
510
        (*ip)++;
511
        length += s;
512
        if (loop_check && unlikely((*ip) >= lencheck)) {    /* overflow detection */
513
            *error = loop_error;
514
            return length;
515
        }
516
    } while (s==255);
517

518
    return length;
519
}
520

521
#define	LZ4_STATIC_ASSERT(c)	ASSERT(c)
522

523

524
/*! LZ4_decompress_generic() :
525
 *  This generic decompression function covers all use cases.
526
 *  It shall be instantiated several times, using different sets of directives.
527
 *  Note that it is important for performance that this function really get inlined,
528
 *  in order to remove useless branches during compilation optimization.
529
 */
530
LZ4_FORCE_INLINE int
531
LZ4_decompress_generic(
532
                 const char* const src,
533
                 char* const dst,
534
                 int srcSize,
535
                 int outputSize,         /* If endOnInput==endOnInputSize, this value is `dstCapacity` */
536

537
                 endCondition_directive endOnInput,   /* endOnOutputSize, endOnInputSize */
538
                 earlyEnd_directive partialDecoding,  /* full, partial */
539
                 dict_directive dict,                 /* noDict, withPrefix64k, usingExtDict */
540
                 const BYTE* const lowPrefix,  /* always <= dst, == dst when no prefix */
541
                 const BYTE* const dictStart,  /* only if dict==usingExtDict */
542
                 const size_t dictSize         /* note : = 0 if noDict */
543
                 )
544
{
545
    if ((src == NULL) || (outputSize < 0)) { return -1; }
546

547
    {   const BYTE* ip = (const BYTE*) src;
548
        const BYTE* const iend = ip + srcSize;
549

550
        BYTE* op = (BYTE*) dst;
551
        BYTE* const oend = op + outputSize;
552
        BYTE* cpy;
553

554
        const BYTE* const dictEnd = (dictStart == NULL) ? NULL : dictStart + dictSize;
555

556
        const int safeDecode = (endOnInput==endOnInputSize);
557
        const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB)));
558

559

560
        /* Set up the "end" pointers for the shortcut. */
561
        const BYTE* const shortiend = iend - (endOnInput ? 14 : 8) /*maxLL*/ - 2 /*offset*/;
562
        const BYTE* const shortoend = oend - (endOnInput ? 14 : 8) /*maxLL*/ - 18 /*maxML*/;
563

564
        const BYTE* match;
565
        size_t offset;
566
        unsigned token;
567
        size_t length;
568

569

570
        DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize);
571

572
        /* Special cases */
573
        assert(lowPrefix <= op);
574
        if ((endOnInput) && (unlikely(outputSize==0))) {
575
            /* Empty output buffer */
576
            if (partialDecoding) return 0;
577
            return ((srcSize==1) && (*ip==0)) ? 0 : -1;
578
        }
579
        if ((!endOnInput) && (unlikely(outputSize==0))) { return (*ip==0 ? 1 : -1); }
580
        if ((endOnInput) && unlikely(srcSize==0)) { return -1; }
581

582
	/* Currently the fast loop shows a regression on qualcomm arm chips. */
583
#if LZ4_FAST_DEC_LOOP
584
        if ((oend - op) < FASTLOOP_SAFE_DISTANCE) {
585
            DEBUGLOG(6, "skip fast decode loop");
586
            goto safe_decode;
587
        }
588

589
        /* Fast loop : decode sequences as long as output < iend-FASTLOOP_SAFE_DISTANCE */
590
        while (1) {
591
            /* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */
592
            assert(oend - op >= FASTLOOP_SAFE_DISTANCE);
593
            if (endOnInput) { assert(ip < iend); }
594
            token = *ip++;
595
            length = token >> ML_BITS;  /* literal length */
596

597
            assert(!endOnInput || ip <= iend); /* ip < iend before the increment */
598

599
            /* decode literal length */
600
            if (length == RUN_MASK) {
601
                variable_length_error error = ok;
602
                length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error);
603
                if (error == initial_error) { goto _output_error; }
604
                if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
605
                if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
606

607
                /* copy literals */
608
                cpy = op+length;
609
                LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
610
                if (endOnInput) {  /* LZ4_decompress_safe() */
611
                    if ((cpy>oend-32) || (ip+length>iend-32)) { goto safe_literal_copy; }
612
                    LZ4_wildCopy32(op, ip, cpy);
613
                } else {   /* LZ4_decompress_fast() */
614
                    if (cpy>oend-8) { goto safe_literal_copy; }
615
                    LZ4_wildCopy8(op, ip, cpy); /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time :
616
                                                 * it doesn't know input length, and only relies on end-of-block properties */
617
                }
618
                ip += length; op = cpy;
619
            } else {
620
                cpy = op+length;
621
                if (endOnInput) {  /* LZ4_decompress_safe() */
622
                    DEBUGLOG(7, "copy %u bytes in a 16-bytes stripe", (unsigned)length);
623
                    /* We don't need to check oend, since we check it once for each loop below */
624
                    if (ip > iend-(16 + 1/*max lit + offset + nextToken*/)) { goto safe_literal_copy; }
625
                    /* Literals can only be 14, but hope compilers optimize if we copy by a register size */
626
                    LZ4_memcpy(op, ip, 16);
627
                } else {  /* LZ4_decompress_fast() */
628
                    /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time :
629
                     * it doesn't know input length, and relies on end-of-block properties */
630
                    LZ4_memcpy(op, ip, 8);
631
                    if (length > 8) { LZ4_memcpy(op+8, ip+8, 8); }
632
                }
633
                ip += length; op = cpy;
634
            }
635

636
            /* get offset */
637
            offset = LZ4_readLE16(ip); ip+=2;
638
            match = op - offset;
639
            assert(match <= op);
640

641
            /* get matchlength */
642
            length = token & ML_MASK;
643

644
            if (length == ML_MASK) {
645
                variable_length_error error = ok;
646
                if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
647
                length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error);
648
                if (error != ok) { goto _output_error; }
649
                if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } /* overflow detection */
650
                length += MINMATCH;
651
                if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
652
                    goto safe_match_copy;
653
                }
654
            } else {
655
                length += MINMATCH;
656
                if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
657
                    goto safe_match_copy;
658
                }
659

660
                /* Fastpath check: Avoids a branch in LZ4_wildCopy32 if true */
661
                if ((dict == withPrefix64k) || (match >= lowPrefix)) {
662
                    if (offset >= 8) {
663
                        assert(match >= lowPrefix);
664
                        assert(match <= op);
665
                        assert(op + 18 <= oend);
666

667
                        LZ4_memcpy(op, match, 8);
668
                        LZ4_memcpy(op+8, match+8, 8);
669
                        LZ4_memcpy(op+16, match+16, 2);
670
                        op += length;
671
                        continue;
672
            }   }   }
673

674
            if (checkOffset && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
675
            /* match starting within external dictionary */
676
            if ((dict==usingExtDict) && (match < lowPrefix)) {
677
                if (unlikely(op+length > oend-LASTLITERALS)) {
678
                    if (partialDecoding) {
679
                        DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd");
680
                        length = MIN(length, (size_t)(oend-op));
681
                    } else {
682
                        goto _output_error;  /* end-of-block condition violated */
683
                }   }
684

685
                if (length <= (size_t)(lowPrefix-match)) {
686
                    /* match fits entirely within external dictionary : just copy */
687
                    memmove(op, dictEnd - (lowPrefix-match), length);
688
                    op += length;
689
                } else {
690
                    /* match stretches into both external dictionary and current block */
691
                    size_t const copySize = (size_t)(lowPrefix - match);
692
                    size_t const restSize = length - copySize;
693
                    LZ4_memcpy(op, dictEnd - copySize, copySize);
694
                    op += copySize;
695
                    if (restSize > (size_t)(op - lowPrefix)) {  /* overlap copy */
696
                        BYTE* const endOfMatch = op + restSize;
697
                        const BYTE* copyFrom = lowPrefix;
698
                        while (op < endOfMatch) { *op++ = *copyFrom++; }
699
                    } else {
700
                        LZ4_memcpy(op, lowPrefix, restSize);
701
                        op += restSize;
702
                }   }
703
                continue;
704
            }
705

706
            /* copy match within block */
707
            cpy = op + length;
708

709
            assert((op <= oend) && (oend-op >= 32));
710
            if (unlikely(offset<16)) {
711
                LZ4_memcpy_using_offset(op, match, cpy, offset);
712
            } else {
713
                LZ4_wildCopy32(op, match, cpy);
714
            }
715

716
            op = cpy;   /* wildcopy correction */
717
        }
718
    safe_decode:
719
#endif
720

721
        /* Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE */
722
        while (1) {
723
            token = *ip++;
724
            length = token >> ML_BITS;  /* literal length */
725

726
            assert(!endOnInput || ip <= iend); /* ip < iend before the increment */
727

728
            /* A two-stage shortcut for the most common case:
729
             * 1) If the literal length is 0..14, and there is enough space,
730
             * enter the shortcut and copy 16 bytes on behalf of the literals
731
             * (in the fast mode, only 8 bytes can be safely copied this way).
732
             * 2) Further if the match length is 4..18, copy 18 bytes in a similar
733
             * manner; but we ensure that there's enough space in the output for
734
             * those 18 bytes earlier, upon entering the shortcut (in other words,
735
             * there is a combined check for both stages).
736
             */
737
            if ( (endOnInput ? length != RUN_MASK : length <= 8)
738
                /* strictly "less than" on input, to re-enter the loop with at least one byte */
739
              && likely((endOnInput ? ip < shortiend : 1) & (op <= shortoend)) ) {
740
                /* Copy the literals */
741
                LZ4_memcpy(op, ip, endOnInput ? 16 : 8);
742
                op += length; ip += length;
743

744
                /* The second stage: prepare for match copying, decode full info.
745
                 * If it doesn't work out, the info won't be wasted. */
746
                length = token & ML_MASK; /* match length */
747
                offset = LZ4_readLE16(ip); ip += 2;
748
                match = op - offset;
749
                assert(match <= op); /* check overflow */
750

751
                /* Do not deal with overlapping matches. */
752
                if ( (length != ML_MASK)
753
                  && (offset >= 8)
754
                  && (dict==withPrefix64k || match >= lowPrefix) ) {
755
                    /* Copy the match. */
756
                    LZ4_memcpy(op + 0, match + 0, 8);
757
                    LZ4_memcpy(op + 8, match + 8, 8);
758
                    LZ4_memcpy(op +16, match +16, 2);
759
                    op += length + MINMATCH;
760
                    /* Both stages worked, load the next token. */
761
                    continue;
762
                }
763

764
                /* The second stage didn't work out, but the info is ready.
765
                 * Propel it right to the point of match copying. */
766
                goto _copy_match;
767
            }
768

769
            /* decode literal length */
770
            if (length == RUN_MASK) {
771
                variable_length_error error = ok;
772
                length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error);
773
                if (error == initial_error) { goto _output_error; }
774
                if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
775
                if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
776
            }
777

778
            /* copy literals */
779
            cpy = op+length;
780
#if LZ4_FAST_DEC_LOOP
781
        safe_literal_copy:
782
#endif
783
            LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
784
            if ( ((endOnInput) && ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) )
785
              || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) )
786
            {
787
                /* We've either hit the input parsing restriction or the output parsing restriction.
788
                 * In the normal scenario, decoding a full block, it must be the last sequence,
789
                 * otherwise it's an error (invalid input or dimensions).
790
                 * In partialDecoding scenario, it's necessary to ensure there is no buffer overflow.
791
                 */
792
                if (partialDecoding) {
793
                    /* Since we are partial decoding we may be in this block because of the output parsing
794
                     * restriction, which is not valid since the output buffer is allowed to be undersized.
795
                     */
796
                    assert(endOnInput);
797
                    DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end")
798
                    DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length);
799
                    DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op));
800
                    DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip));
801
                    /* Finishing in the middle of a literals segment,
802
                     * due to lack of input.
803
                     */
804
                    if (ip+length > iend) {
805
                        length = (size_t)(iend-ip);
806
                        cpy = op + length;
807
                    }
808
                    /* Finishing in the middle of a literals segment,
809
                     * due to lack of output space.
810
                     */
811
                    if (cpy > oend) {
812
                        cpy = oend;
813
                        assert(op<=oend);
814
                        length = (size_t)(oend-op);
815
                    }
816
                } else {
817
                    /* We must be on the last sequence because of the parsing limitations so check
818
                     * that we exactly regenerate the original size (must be exact when !endOnInput).
819
                     */
820
                    if ((!endOnInput) && (cpy != oend)) { goto _output_error; }
821
                     /* We must be on the last sequence (or invalid) because of the parsing limitations
822
                      * so check that we exactly consume the input and don't overrun the output buffer.
823
                      */
824
                    if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) {
825
                        DEBUGLOG(6, "should have been last run of literals")
826
                        DEBUGLOG(6, "ip(%p) + length(%i) = %p != iend (%p)", ip, (int)length, ip+length, iend);
827
                        DEBUGLOG(6, "or cpy(%p) > oend(%p)", cpy, oend);
828
                        goto _output_error;
829
                    }
830
                }
831
                memmove(op, ip, length);  /* supports overlapping memory regions; only matters for in-place decompression scenarios */
832
                ip += length;
833
                op += length;
834
                /* Necessarily EOF when !partialDecoding.
835
                 * When partialDecoding, it is EOF if we've either
836
                 * filled the output buffer or
837
                 * can't proceed with reading an offset for following match.
838
                 */
839
                if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) {
840
                    break;
841
                }
842
            } else {
843
                LZ4_wildCopy8(op, ip, cpy);   /* may overwrite up to WILDCOPYLENGTH beyond cpy */
844
                ip += length; op = cpy;
845
            }
846

847
            /* get offset */
848
            offset = LZ4_readLE16(ip); ip+=2;
849
            match = op - offset;
850

851
            /* get matchlength */
852
            length = token & ML_MASK;
853

854
    _copy_match:
855
            if (length == ML_MASK) {
856
              variable_length_error error = ok;
857
              length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error);
858
              if (error != ok) goto _output_error;
859
                if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error;   /* overflow detection */
860
            }
861
            length += MINMATCH;
862

863
#if LZ4_FAST_DEC_LOOP
864
        safe_match_copy:
865
#endif
866
            if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error;   /* Error : offset outside buffers */
867
            /* match starting within external dictionary */
868
            if ((dict==usingExtDict) && (match < lowPrefix)) {
869
                if (unlikely(op+length > oend-LASTLITERALS)) {
870
                    if (partialDecoding) length = MIN(length, (size_t)(oend-op));
871
                    else goto _output_error;   /* doesn't respect parsing restriction */
872
                }
873

874
                if (length <= (size_t)(lowPrefix-match)) {
875
                    /* match fits entirely within external dictionary : just copy */
876
                    memmove(op, dictEnd - (lowPrefix-match), length);
877
                    op += length;
878
                } else {
879
                    /* match stretches into both external dictionary and current block */
880
                    size_t const copySize = (size_t)(lowPrefix - match);
881
                    size_t const restSize = length - copySize;
882
                    LZ4_memcpy(op, dictEnd - copySize, copySize);
883
                    op += copySize;
884
                    if (restSize > (size_t)(op - lowPrefix)) {  /* overlap copy */
885
                        BYTE* const endOfMatch = op + restSize;
886
                        const BYTE* copyFrom = lowPrefix;
887
                        while (op < endOfMatch) *op++ = *copyFrom++;
888
                    } else {
889
                        LZ4_memcpy(op, lowPrefix, restSize);
890
                        op += restSize;
891
                }   }
892
                continue;
893
            }
894
            assert(match >= lowPrefix);
895

896
            /* copy match within block */
897
            cpy = op + length;
898

899
            /* partialDecoding : may end anywhere within the block */
900
            assert(op<=oend);
901
            if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
902
                size_t const mlen = MIN(length, (size_t)(oend-op));
903
                const BYTE* const matchEnd = match + mlen;
904
                BYTE* const copyEnd = op + mlen;
905
                if (matchEnd > op) {   /* overlap copy */
906
                    while (op < copyEnd) { *op++ = *match++; }
907
                } else {
908
                    LZ4_memcpy(op, match, mlen);
909
                }
910
                op = copyEnd;
911
                if (op == oend) { break; }
912
                continue;
913
            }
914

915
            if (unlikely(offset<8)) {
916
                LZ4_write32(op, 0);   /* silence msan warning when offset==0 */
917
                op[0] = match[0];
918
                op[1] = match[1];
919
                op[2] = match[2];
920
                op[3] = match[3];
921
                match += inc32table[offset];
922
                LZ4_memcpy(op+4, match, 4);
923
                match -= dec64table[offset];
924
            } else {
925
                LZ4_memcpy(op, match, 8);
926
                match += 8;
927
            }
928
            op += 8;
929

930
            if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
931
                BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1);
932
                if (cpy > oend-LASTLITERALS) { goto _output_error; } /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
933
                if (op < oCopyLimit) {
934
                    LZ4_wildCopy8(op, match, oCopyLimit);
935
                    match += oCopyLimit - op;
936
                    op = oCopyLimit;
937
                }
938
                while (op < cpy) { *op++ = *match++; }
939
            } else {
940
                LZ4_memcpy(op, match, 8);
941
                if (length > 16)  { LZ4_wildCopy8(op+8, match+8, cpy); }
942
            }
943
            op = cpy;   /* wildcopy correction */
944
        }
945

946
        /* end of decoding */
947
        if (endOnInput) {
948
            DEBUGLOG(5, "decoded %i bytes", (int) (((char*)op)-dst));
949
           return (int) (((char*)op)-dst);     /* Nb of output bytes decoded */
950
       } else {
951
           return (int) (((const char*)ip)-src);   /* Nb of input bytes read */
952
       }
953

954
        /* Overflow error detected */
955
    _output_error:
956
        return (int) (-(((const char*)ip)-src))-1;
957
    }
958
}
959

960
/*
961
 * LZ4_uncompress_unknownOutputSize() :
962
 * 	isize  : is the input size, therefore the compressed size
963
 * 	maxOutputSize : is the size of the destination buffer (which must be
964
 * 		already allocated)
965
 * 	return : the number of bytes decoded in the destination buffer
966
 * 		(necessarily <= maxOutputSize). If the source stream is
967
 * 		malformed, the function will stop decoding and return a
968
 * 		negative result, indicating the byte position of the faulty
969
 * 		instruction. This function never writes beyond dest +
970
 * 		maxOutputSize, and is therefore protected against malicious
971
 * 		data packets.
972
 * 	note   : Destination buffer must be already allocated.
973
 *		This version is slightly slower than real_LZ4_uncompress()
974
 *
975
 */
976

977
/*
978
 * Note: In upstream code, LZ4_uncompress_unknownOutputSize is now a legacy
979
 *       wrapper for LZ4_decompress_safe which is a wrapper for
980
 *	 LZ4_decompress_generic; this wrapper flattens that, rather than
981
 *	 rewriting the callers.
982
 */
983
int LZ4_uncompress_unknownOutputSize(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
984
{
985
    return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize,
986
                                  endOnInputSize, decode_full_block, noDict,
987
                                  (BYTE*)dest, NULL, 0);
988
}
989

990
Product

Resources

Company