Path: blob/main/sys/contrib/zstd/lib/decompress/huf_decompress.c
48375 views
/* ******************************************************************1* huff0 huffman decoder,2* part of Finite State Entropy library3* Copyright (c) Yann Collet, Facebook, Inc.4*5* You can contact the author at :6* - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy7*8* This source code is licensed under both the BSD-style license (found in the9* LICENSE file in the root directory of this source tree) and the GPLv2 (found10* in the COPYING file in the root directory of this source tree).11* You may select, at your option, one of the above-listed licenses.12****************************************************************** */1314/* **************************************************************15* Dependencies16****************************************************************/17#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */18#include "../common/compiler.h"19#include "../common/bitstream.h" /* BIT_* */20#include "../common/fse.h" /* to compress headers */21#define HUF_STATIC_LINKING_ONLY22#include "../common/huf.h"23#include "../common/error_private.h"24#include "../common/zstd_internal.h"2526/* **************************************************************27* Constants28****************************************************************/2930#define HUF_DECODER_FAST_TABLELOG 113132/* **************************************************************33* Macros34****************************************************************/3536/* These two optional macros force the use one way or another of the two37* Huffman decompression implementations. You can't force in both directions38* at the same time.39*/40#if defined(HUF_FORCE_DECOMPRESS_X1) && \41defined(HUF_FORCE_DECOMPRESS_X2)42#error "Cannot force the use of the X1 and X2 decoders at the same time!"43#endif4445#if ZSTD_ENABLE_ASM_X86_64_BMI2 && DYNAMIC_BMI246# define HUF_ASM_X86_64_BMI2_ATTRS BMI2_TARGET_ATTRIBUTE47#else48# define HUF_ASM_X86_64_BMI2_ATTRS49#endif5051#ifdef __cplusplus52# define HUF_EXTERN_C extern "C"53#else54# define HUF_EXTERN_C55#endif56#define HUF_ASM_DECL HUF_EXTERN_C5758#if DYNAMIC_BMI2 || (ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__))59# define HUF_NEED_BMI2_FUNCTION 160#else61# define HUF_NEED_BMI2_FUNCTION 062#endif6364#if !(ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__))65# define HUF_NEED_DEFAULT_FUNCTION 166#else67# define HUF_NEED_DEFAULT_FUNCTION 068#endif6970/* **************************************************************71* Error Management72****************************************************************/73#define HUF_isError ERR_isError747576/* **************************************************************77* Byte alignment for workSpace management78****************************************************************/79#define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1)80#define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))818283/* **************************************************************84* BMI2 Variant Wrappers85****************************************************************/86#if DYNAMIC_BMI28788#define HUF_DGEN(fn) \89\90static size_t fn##_default( \91void* dst, size_t dstSize, \92const void* cSrc, size_t cSrcSize, \93const HUF_DTable* DTable) \94{ \95return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \96} \97\98static BMI2_TARGET_ATTRIBUTE size_t fn##_bmi2( \99void* dst, size_t dstSize, \100const void* cSrc, size_t cSrcSize, \101const HUF_DTable* DTable) \102{ \103return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \104} \105\106static size_t fn(void* dst, size_t dstSize, void const* cSrc, \107size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \108{ \109if (bmi2) { \110return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \111} \112return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \113}114115#else116117#define HUF_DGEN(fn) \118static size_t fn(void* dst, size_t dstSize, void const* cSrc, \119size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \120{ \121(void)bmi2; \122return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \123}124125#endif126127128/*-***************************/129/* generic DTableDesc */130/*-***************************/131typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;132133static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)134{135DTableDesc dtd;136ZSTD_memcpy(&dtd, table, sizeof(dtd));137return dtd;138}139140#if ZSTD_ENABLE_ASM_X86_64_BMI2141142static size_t HUF_initDStream(BYTE const* ip) {143BYTE const lastByte = ip[7];144size_t const bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;145size_t const value = MEM_readLEST(ip) | 1;146assert(bitsConsumed <= 8);147return value << bitsConsumed;148}149typedef struct {150BYTE const* ip[4];151BYTE* op[4];152U64 bits[4];153void const* dt;154BYTE const* ilimit;155BYTE* oend;156BYTE const* iend[4];157} HUF_DecompressAsmArgs;158159/**160* Initializes args for the asm decoding loop.161* @returns 0 on success162* 1 if the fallback implementation should be used.163* Or an error code on failure.164*/165static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst, size_t dstSize, void const* src, size_t srcSize, const HUF_DTable* DTable)166{167void const* dt = DTable + 1;168U32 const dtLog = HUF_getDTableDesc(DTable).tableLog;169170const BYTE* const ilimit = (const BYTE*)src + 6 + 8;171172BYTE* const oend = (BYTE*)dst + dstSize;173174/* The following condition is false on x32 platform,175* but HUF_asm is not compatible with this ABI */176if (!(MEM_isLittleEndian() && !MEM_32bits())) return 1;177178/* strict minimum : jump table + 1 byte per stream */179if (srcSize < 10)180return ERROR(corruption_detected);181182/* Must have at least 8 bytes per stream because we don't handle initializing smaller bit containers.183* If table log is not correct at this point, fallback to the old decoder.184* On small inputs we don't have enough data to trigger the fast loop, so use the old decoder.185*/186if (dtLog != HUF_DECODER_FAST_TABLELOG)187return 1;188189/* Read the jump table. */190{191const BYTE* const istart = (const BYTE*)src;192size_t const length1 = MEM_readLE16(istart);193size_t const length2 = MEM_readLE16(istart+2);194size_t const length3 = MEM_readLE16(istart+4);195size_t const length4 = srcSize - (length1 + length2 + length3 + 6);196args->iend[0] = istart + 6; /* jumpTable */197args->iend[1] = args->iend[0] + length1;198args->iend[2] = args->iend[1] + length2;199args->iend[3] = args->iend[2] + length3;200201/* HUF_initDStream() requires this, and this small of an input202* won't benefit from the ASM loop anyways.203* length1 must be >= 16 so that ip[0] >= ilimit before the loop204* starts.205*/206if (length1 < 16 || length2 < 8 || length3 < 8 || length4 < 8)207return 1;208if (length4 > srcSize) return ERROR(corruption_detected); /* overflow */209}210/* ip[] contains the position that is currently loaded into bits[]. */211args->ip[0] = args->iend[1] - sizeof(U64);212args->ip[1] = args->iend[2] - sizeof(U64);213args->ip[2] = args->iend[3] - sizeof(U64);214args->ip[3] = (BYTE const*)src + srcSize - sizeof(U64);215216/* op[] contains the output pointers. */217args->op[0] = (BYTE*)dst;218args->op[1] = args->op[0] + (dstSize+3)/4;219args->op[2] = args->op[1] + (dstSize+3)/4;220args->op[3] = args->op[2] + (dstSize+3)/4;221222/* No point to call the ASM loop for tiny outputs. */223if (args->op[3] >= oend)224return 1;225226/* bits[] is the bit container.227* It is read from the MSB down to the LSB.228* It is shifted left as it is read, and zeros are229* shifted in. After the lowest valid bit a 1 is230* set, so that CountTrailingZeros(bits[]) can be used231* to count how many bits we've consumed.232*/233args->bits[0] = HUF_initDStream(args->ip[0]);234args->bits[1] = HUF_initDStream(args->ip[1]);235args->bits[2] = HUF_initDStream(args->ip[2]);236args->bits[3] = HUF_initDStream(args->ip[3]);237238/* If ip[] >= ilimit, it is guaranteed to be safe to239* reload bits[]. It may be beyond its section, but is240* guaranteed to be valid (>= istart).241*/242args->ilimit = ilimit;243244args->oend = oend;245args->dt = dt;246247return 0;248}249250static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressAsmArgs const* args, int stream, BYTE* segmentEnd)251{252/* Validate that we haven't overwritten. */253if (args->op[stream] > segmentEnd)254return ERROR(corruption_detected);255/* Validate that we haven't read beyond iend[].256* Note that ip[] may be < iend[] because the MSB is257* the next bit to read, and we may have consumed 100%258* of the stream, so down to iend[i] - 8 is valid.259*/260if (args->ip[stream] < args->iend[stream] - 8)261return ERROR(corruption_detected);262263/* Construct the BIT_DStream_t. */264bit->bitContainer = MEM_readLE64(args->ip[stream]);265bit->bitsConsumed = ZSTD_countTrailingZeros((size_t)args->bits[stream]);266bit->start = (const char*)args->iend[0];267bit->limitPtr = bit->start + sizeof(size_t);268bit->ptr = (const char*)args->ip[stream];269270return 0;271}272#endif273274275#ifndef HUF_FORCE_DECOMPRESS_X2276277/*-***************************/278/* single-symbol decoding */279/*-***************************/280typedef struct { BYTE nbBits; BYTE byte; } HUF_DEltX1; /* single-symbol decoding */281282/**283* Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at284* a time.285*/286static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) {287U64 D4;288if (MEM_isLittleEndian()) {289D4 = (symbol << 8) + nbBits;290} else {291D4 = symbol + (nbBits << 8);292}293D4 *= 0x0001000100010001ULL;294return D4;295}296297/**298* Increase the tableLog to targetTableLog and rescales the stats.299* If tableLog > targetTableLog this is a no-op.300* @returns New tableLog301*/302static U32 HUF_rescaleStats(BYTE* huffWeight, U32* rankVal, U32 nbSymbols, U32 tableLog, U32 targetTableLog)303{304if (tableLog > targetTableLog)305return tableLog;306if (tableLog < targetTableLog) {307U32 const scale = targetTableLog - tableLog;308U32 s;309/* Increase the weight for all non-zero probability symbols by scale. */310for (s = 0; s < nbSymbols; ++s) {311huffWeight[s] += (BYTE)((huffWeight[s] == 0) ? 0 : scale);312}313/* Update rankVal to reflect the new weights.314* All weights except 0 get moved to weight + scale.315* Weights [1, scale] are empty.316*/317for (s = targetTableLog; s > scale; --s) {318rankVal[s] = rankVal[s - scale];319}320for (s = scale; s > 0; --s) {321rankVal[s] = 0;322}323}324return targetTableLog;325}326327typedef struct {328U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];329U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1];330U32 statsWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];331BYTE symbols[HUF_SYMBOLVALUE_MAX + 1];332BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];333} HUF_ReadDTableX1_Workspace;334335336size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)337{338return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);339}340341size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2)342{343U32 tableLog = 0;344U32 nbSymbols = 0;345size_t iSize;346void* const dtPtr = DTable + 1;347HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;348HUF_ReadDTableX1_Workspace* wksp = (HUF_ReadDTableX1_Workspace*)workSpace;349350DEBUG_STATIC_ASSERT(HUF_DECOMPRESS_WORKSPACE_SIZE >= sizeof(*wksp));351if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge);352353DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));354/* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */355356iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2);357if (HUF_isError(iSize)) return iSize;358359360/* Table header */361{ DTableDesc dtd = HUF_getDTableDesc(DTable);362U32 const maxTableLog = dtd.maxTableLog + 1;363U32 const targetTableLog = MIN(maxTableLog, HUF_DECODER_FAST_TABLELOG);364tableLog = HUF_rescaleStats(wksp->huffWeight, wksp->rankVal, nbSymbols, tableLog, targetTableLog);365if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */366dtd.tableType = 0;367dtd.tableLog = (BYTE)tableLog;368ZSTD_memcpy(DTable, &dtd, sizeof(dtd));369}370371/* Compute symbols and rankStart given rankVal:372*373* rankVal already contains the number of values of each weight.374*375* symbols contains the symbols ordered by weight. First are the rankVal[0]376* weight 0 symbols, followed by the rankVal[1] weight 1 symbols, and so on.377* symbols[0] is filled (but unused) to avoid a branch.378*379* rankStart contains the offset where each rank belongs in the DTable.380* rankStart[0] is not filled because there are no entries in the table for381* weight 0.382*/383{384int n;385int nextRankStart = 0;386int const unroll = 4;387int const nLimit = (int)nbSymbols - unroll + 1;388for (n=0; n<(int)tableLog+1; n++) {389U32 const curr = nextRankStart;390nextRankStart += wksp->rankVal[n];391wksp->rankStart[n] = curr;392}393for (n=0; n < nLimit; n += unroll) {394int u;395for (u=0; u < unroll; ++u) {396size_t const w = wksp->huffWeight[n+u];397wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u);398}399}400for (; n < (int)nbSymbols; ++n) {401size_t const w = wksp->huffWeight[n];402wksp->symbols[wksp->rankStart[w]++] = (BYTE)n;403}404}405406/* fill DTable407* We fill all entries of each weight in order.408* That way length is a constant for each iteration of the outer loop.409* We can switch based on the length to a different inner loop which is410* optimized for that particular case.411*/412{413U32 w;414int symbol=wksp->rankVal[0];415int rankStart=0;416for (w=1; w<tableLog+1; ++w) {417int const symbolCount = wksp->rankVal[w];418int const length = (1 << w) >> 1;419int uStart = rankStart;420BYTE const nbBits = (BYTE)(tableLog + 1 - w);421int s;422int u;423switch (length) {424case 1:425for (s=0; s<symbolCount; ++s) {426HUF_DEltX1 D;427D.byte = wksp->symbols[symbol + s];428D.nbBits = nbBits;429dt[uStart] = D;430uStart += 1;431}432break;433case 2:434for (s=0; s<symbolCount; ++s) {435HUF_DEltX1 D;436D.byte = wksp->symbols[symbol + s];437D.nbBits = nbBits;438dt[uStart+0] = D;439dt[uStart+1] = D;440uStart += 2;441}442break;443case 4:444for (s=0; s<symbolCount; ++s) {445U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);446MEM_write64(dt + uStart, D4);447uStart += 4;448}449break;450case 8:451for (s=0; s<symbolCount; ++s) {452U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);453MEM_write64(dt + uStart, D4);454MEM_write64(dt + uStart + 4, D4);455uStart += 8;456}457break;458default:459for (s=0; s<symbolCount; ++s) {460U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);461for (u=0; u < length; u += 16) {462MEM_write64(dt + uStart + u + 0, D4);463MEM_write64(dt + uStart + u + 4, D4);464MEM_write64(dt + uStart + u + 8, D4);465MEM_write64(dt + uStart + u + 12, D4);466}467assert(u == length);468uStart += length;469}470break;471}472symbol += symbolCount;473rankStart += symbolCount * length;474}475}476return iSize;477}478479FORCE_INLINE_TEMPLATE BYTE480HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)481{482size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */483BYTE const c = dt[val].byte;484BIT_skipBits(Dstream, dt[val].nbBits);485return c;486}487488#define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \489*ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)490491#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \492if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \493HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)494495#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \496if (MEM_64bits()) \497HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)498499HINT_INLINE size_t500HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)501{502BYTE* const pStart = p;503504/* up to 4 symbols at a time */505if ((pEnd - p) > 3) {506while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {507HUF_DECODE_SYMBOLX1_2(p, bitDPtr);508HUF_DECODE_SYMBOLX1_1(p, bitDPtr);509HUF_DECODE_SYMBOLX1_2(p, bitDPtr);510HUF_DECODE_SYMBOLX1_0(p, bitDPtr);511}512} else {513BIT_reloadDStream(bitDPtr);514}515516/* [0-3] symbols remaining */517if (MEM_32bits())518while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))519HUF_DECODE_SYMBOLX1_0(p, bitDPtr);520521/* no more data to retrieve from bitstream, no need to reload */522while (p < pEnd)523HUF_DECODE_SYMBOLX1_0(p, bitDPtr);524525return pEnd-pStart;526}527528FORCE_INLINE_TEMPLATE size_t529HUF_decompress1X1_usingDTable_internal_body(530void* dst, size_t dstSize,531const void* cSrc, size_t cSrcSize,532const HUF_DTable* DTable)533{534BYTE* op = (BYTE*)dst;535BYTE* const oend = op + dstSize;536const void* dtPtr = DTable + 1;537const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;538BIT_DStream_t bitD;539DTableDesc const dtd = HUF_getDTableDesc(DTable);540U32 const dtLog = dtd.tableLog;541542CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );543544HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);545546if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);547548return dstSize;549}550551FORCE_INLINE_TEMPLATE size_t552HUF_decompress4X1_usingDTable_internal_body(553void* dst, size_t dstSize,554const void* cSrc, size_t cSrcSize,555const HUF_DTable* DTable)556{557/* Check */558if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */559560{ const BYTE* const istart = (const BYTE*) cSrc;561BYTE* const ostart = (BYTE*) dst;562BYTE* const oend = ostart + dstSize;563BYTE* const olimit = oend - 3;564const void* const dtPtr = DTable + 1;565const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;566567/* Init */568BIT_DStream_t bitD1;569BIT_DStream_t bitD2;570BIT_DStream_t bitD3;571BIT_DStream_t bitD4;572size_t const length1 = MEM_readLE16(istart);573size_t const length2 = MEM_readLE16(istart+2);574size_t const length3 = MEM_readLE16(istart+4);575size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);576const BYTE* const istart1 = istart + 6; /* jumpTable */577const BYTE* const istart2 = istart1 + length1;578const BYTE* const istart3 = istart2 + length2;579const BYTE* const istart4 = istart3 + length3;580const size_t segmentSize = (dstSize+3) / 4;581BYTE* const opStart2 = ostart + segmentSize;582BYTE* const opStart3 = opStart2 + segmentSize;583BYTE* const opStart4 = opStart3 + segmentSize;584BYTE* op1 = ostart;585BYTE* op2 = opStart2;586BYTE* op3 = opStart3;587BYTE* op4 = opStart4;588DTableDesc const dtd = HUF_getDTableDesc(DTable);589U32 const dtLog = dtd.tableLog;590U32 endSignal = 1;591592if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */593if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */594CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );595CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );596CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );597CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );598599/* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */600if ((size_t)(oend - op4) >= sizeof(size_t)) {601for ( ; (endSignal) & (op4 < olimit) ; ) {602HUF_DECODE_SYMBOLX1_2(op1, &bitD1);603HUF_DECODE_SYMBOLX1_2(op2, &bitD2);604HUF_DECODE_SYMBOLX1_2(op3, &bitD3);605HUF_DECODE_SYMBOLX1_2(op4, &bitD4);606HUF_DECODE_SYMBOLX1_1(op1, &bitD1);607HUF_DECODE_SYMBOLX1_1(op2, &bitD2);608HUF_DECODE_SYMBOLX1_1(op3, &bitD3);609HUF_DECODE_SYMBOLX1_1(op4, &bitD4);610HUF_DECODE_SYMBOLX1_2(op1, &bitD1);611HUF_DECODE_SYMBOLX1_2(op2, &bitD2);612HUF_DECODE_SYMBOLX1_2(op3, &bitD3);613HUF_DECODE_SYMBOLX1_2(op4, &bitD4);614HUF_DECODE_SYMBOLX1_0(op1, &bitD1);615HUF_DECODE_SYMBOLX1_0(op2, &bitD2);616HUF_DECODE_SYMBOLX1_0(op3, &bitD3);617HUF_DECODE_SYMBOLX1_0(op4, &bitD4);618endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;619endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;620endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;621endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;622}623}624625/* check corruption */626/* note : should not be necessary : op# advance in lock step, and we control op4.627* but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */628if (op1 > opStart2) return ERROR(corruption_detected);629if (op2 > opStart3) return ERROR(corruption_detected);630if (op3 > opStart4) return ERROR(corruption_detected);631/* note : op4 supposed already verified within main loop */632633/* finish bitStreams one by one */634HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);635HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);636HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);637HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog);638639/* check */640{ U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);641if (!endCheck) return ERROR(corruption_detected); }642643/* decoded size */644return dstSize;645}646}647648#if HUF_NEED_BMI2_FUNCTION649static BMI2_TARGET_ATTRIBUTE650size_t HUF_decompress4X1_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc,651size_t cSrcSize, HUF_DTable const* DTable) {652return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);653}654#endif655656#if HUF_NEED_DEFAULT_FUNCTION657static658size_t HUF_decompress4X1_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc,659size_t cSrcSize, HUF_DTable const* DTable) {660return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);661}662#endif663664#if ZSTD_ENABLE_ASM_X86_64_BMI2665666HUF_ASM_DECL void HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN;667668static HUF_ASM_X86_64_BMI2_ATTRS669size_t670HUF_decompress4X1_usingDTable_internal_bmi2_asm(671void* dst, size_t dstSize,672const void* cSrc, size_t cSrcSize,673const HUF_DTable* DTable)674{675void const* dt = DTable + 1;676const BYTE* const iend = (const BYTE*)cSrc + 6;677BYTE* const oend = (BYTE*)dst + dstSize;678HUF_DecompressAsmArgs args;679{680size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);681FORWARD_IF_ERROR(ret, "Failed to init asm args");682if (ret != 0)683return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);684}685686assert(args.ip[0] >= args.ilimit);687HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(&args);688689/* Our loop guarantees that ip[] >= ilimit and that we haven't690* overwritten any op[].691*/692assert(args.ip[0] >= iend);693assert(args.ip[1] >= iend);694assert(args.ip[2] >= iend);695assert(args.ip[3] >= iend);696assert(args.op[3] <= oend);697(void)iend;698699/* finish bit streams one by one. */700{701size_t const segmentSize = (dstSize+3) / 4;702BYTE* segmentEnd = (BYTE*)dst;703int i;704for (i = 0; i < 4; ++i) {705BIT_DStream_t bit;706if (segmentSize <= (size_t)(oend - segmentEnd))707segmentEnd += segmentSize;708else709segmentEnd = oend;710FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd), "corruption");711/* Decompress and validate that we've produced exactly the expected length. */712args.op[i] += HUF_decodeStreamX1(args.op[i], &bit, segmentEnd, (HUF_DEltX1 const*)dt, HUF_DECODER_FAST_TABLELOG);713if (args.op[i] != segmentEnd) return ERROR(corruption_detected);714}715}716717/* decoded size */718return dstSize;719}720#endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */721722typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,723const void *cSrc,724size_t cSrcSize,725const HUF_DTable *DTable);726727HUF_DGEN(HUF_decompress1X1_usingDTable_internal)728729static size_t HUF_decompress4X1_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc,730size_t cSrcSize, HUF_DTable const* DTable, int bmi2)731{732#if DYNAMIC_BMI2733if (bmi2) {734# if ZSTD_ENABLE_ASM_X86_64_BMI2735return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);736# else737return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);738# endif739}740#else741(void)bmi2;742#endif743744#if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)745return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);746#else747return HUF_decompress4X1_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable);748#endif749}750751752size_t HUF_decompress1X1_usingDTable(753void* dst, size_t dstSize,754const void* cSrc, size_t cSrcSize,755const HUF_DTable* DTable)756{757DTableDesc dtd = HUF_getDTableDesc(DTable);758if (dtd.tableType != 0) return ERROR(GENERIC);759return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);760}761762size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,763const void* cSrc, size_t cSrcSize,764void* workSpace, size_t wkspSize)765{766const BYTE* ip = (const BYTE*) cSrc;767768size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);769if (HUF_isError(hSize)) return hSize;770if (hSize >= cSrcSize) return ERROR(srcSize_wrong);771ip += hSize; cSrcSize -= hSize;772773return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);774}775776777size_t HUF_decompress4X1_usingDTable(778void* dst, size_t dstSize,779const void* cSrc, size_t cSrcSize,780const HUF_DTable* DTable)781{782DTableDesc dtd = HUF_getDTableDesc(DTable);783if (dtd.tableType != 0) return ERROR(GENERIC);784return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);785}786787static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,788const void* cSrc, size_t cSrcSize,789void* workSpace, size_t wkspSize, int bmi2)790{791const BYTE* ip = (const BYTE*) cSrc;792793size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);794if (HUF_isError(hSize)) return hSize;795if (hSize >= cSrcSize) return ERROR(srcSize_wrong);796ip += hSize; cSrcSize -= hSize;797798return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);799}800801size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,802const void* cSrc, size_t cSrcSize,803void* workSpace, size_t wkspSize)804{805return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);806}807808809#endif /* HUF_FORCE_DECOMPRESS_X2 */810811812#ifndef HUF_FORCE_DECOMPRESS_X1813814/* *************************/815/* double-symbols decoding */816/* *************************/817818typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */819typedef struct { BYTE symbol; } sortedSymbol_t;820typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];821typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];822823/**824* Constructs a HUF_DEltX2 in a U32.825*/826static U32 HUF_buildDEltX2U32(U32 symbol, U32 nbBits, U32 baseSeq, int level)827{828U32 seq;829DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, sequence) == 0);830DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, nbBits) == 2);831DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, length) == 3);832DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U32));833if (MEM_isLittleEndian()) {834seq = level == 1 ? symbol : (baseSeq + (symbol << 8));835return seq + (nbBits << 16) + ((U32)level << 24);836} else {837seq = level == 1 ? (symbol << 8) : ((baseSeq << 8) + symbol);838return (seq << 16) + (nbBits << 8) + (U32)level;839}840}841842/**843* Constructs a HUF_DEltX2.844*/845static HUF_DEltX2 HUF_buildDEltX2(U32 symbol, U32 nbBits, U32 baseSeq, int level)846{847HUF_DEltX2 DElt;848U32 const val = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level);849DEBUG_STATIC_ASSERT(sizeof(DElt) == sizeof(val));850ZSTD_memcpy(&DElt, &val, sizeof(val));851return DElt;852}853854/**855* Constructs 2 HUF_DEltX2s and packs them into a U64.856*/857static U64 HUF_buildDEltX2U64(U32 symbol, U32 nbBits, U16 baseSeq, int level)858{859U32 DElt = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level);860return (U64)DElt + ((U64)DElt << 32);861}862863/**864* Fills the DTable rank with all the symbols from [begin, end) that are each865* nbBits long.866*867* @param DTableRank The start of the rank in the DTable.868* @param begin The first symbol to fill (inclusive).869* @param end The last symbol to fill (exclusive).870* @param nbBits Each symbol is nbBits long.871* @param tableLog The table log.872* @param baseSeq If level == 1 { 0 } else { the first level symbol }873* @param level The level in the table. Must be 1 or 2.874*/875static void HUF_fillDTableX2ForWeight(876HUF_DEltX2* DTableRank,877sortedSymbol_t const* begin, sortedSymbol_t const* end,878U32 nbBits, U32 tableLog,879U16 baseSeq, int const level)880{881U32 const length = 1U << ((tableLog - nbBits) & 0x1F /* quiet static-analyzer */);882const sortedSymbol_t* ptr;883assert(level >= 1 && level <= 2);884switch (length) {885case 1:886for (ptr = begin; ptr != end; ++ptr) {887HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->symbol, nbBits, baseSeq, level);888*DTableRank++ = DElt;889}890break;891case 2:892for (ptr = begin; ptr != end; ++ptr) {893HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->symbol, nbBits, baseSeq, level);894DTableRank[0] = DElt;895DTableRank[1] = DElt;896DTableRank += 2;897}898break;899case 4:900for (ptr = begin; ptr != end; ++ptr) {901U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);902ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));903ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));904DTableRank += 4;905}906break;907case 8:908for (ptr = begin; ptr != end; ++ptr) {909U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);910ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));911ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));912ZSTD_memcpy(DTableRank + 4, &DEltX2, sizeof(DEltX2));913ZSTD_memcpy(DTableRank + 6, &DEltX2, sizeof(DEltX2));914DTableRank += 8;915}916break;917default:918for (ptr = begin; ptr != end; ++ptr) {919U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);920HUF_DEltX2* const DTableRankEnd = DTableRank + length;921for (; DTableRank != DTableRankEnd; DTableRank += 8) {922ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));923ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));924ZSTD_memcpy(DTableRank + 4, &DEltX2, sizeof(DEltX2));925ZSTD_memcpy(DTableRank + 6, &DEltX2, sizeof(DEltX2));926}927}928break;929}930}931932/* HUF_fillDTableX2Level2() :933* `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */934static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 targetLog, const U32 consumedBits,935const U32* rankVal, const int minWeight, const int maxWeight1,936const sortedSymbol_t* sortedSymbols, U32 const* rankStart,937U32 nbBitsBaseline, U16 baseSeq)938{939/* Fill skipped values (all positions up to rankVal[minWeight]).940* These are positions only get a single symbol because the combined weight941* is too large.942*/943if (minWeight>1) {944U32 const length = 1U << ((targetLog - consumedBits) & 0x1F /* quiet static-analyzer */);945U64 const DEltX2 = HUF_buildDEltX2U64(baseSeq, consumedBits, /* baseSeq */ 0, /* level */ 1);946int const skipSize = rankVal[minWeight];947assert(length > 1);948assert((U32)skipSize < length);949switch (length) {950case 2:951assert(skipSize == 1);952ZSTD_memcpy(DTable, &DEltX2, sizeof(DEltX2));953break;954case 4:955assert(skipSize <= 4);956ZSTD_memcpy(DTable + 0, &DEltX2, sizeof(DEltX2));957ZSTD_memcpy(DTable + 2, &DEltX2, sizeof(DEltX2));958break;959default:960{961int i;962for (i = 0; i < skipSize; i += 8) {963ZSTD_memcpy(DTable + i + 0, &DEltX2, sizeof(DEltX2));964ZSTD_memcpy(DTable + i + 2, &DEltX2, sizeof(DEltX2));965ZSTD_memcpy(DTable + i + 4, &DEltX2, sizeof(DEltX2));966ZSTD_memcpy(DTable + i + 6, &DEltX2, sizeof(DEltX2));967}968}969}970}971972/* Fill each of the second level symbols by weight. */973{974int w;975for (w = minWeight; w < maxWeight1; ++w) {976int const begin = rankStart[w];977int const end = rankStart[w+1];978U32 const nbBits = nbBitsBaseline - w;979U32 const totalBits = nbBits + consumedBits;980HUF_fillDTableX2ForWeight(981DTable + rankVal[w],982sortedSymbols + begin, sortedSymbols + end,983totalBits, targetLog,984baseSeq, /* level */ 2);985}986}987}988989static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,990const sortedSymbol_t* sortedList,991const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,992const U32 nbBitsBaseline)993{994U32* const rankVal = rankValOrigin[0];995const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */996const U32 minBits = nbBitsBaseline - maxWeight;997int w;998int const wEnd = (int)maxWeight + 1;9991000/* Fill DTable in order of weight. */1001for (w = 1; w < wEnd; ++w) {1002int const begin = (int)rankStart[w];1003int const end = (int)rankStart[w+1];1004U32 const nbBits = nbBitsBaseline - w;10051006if (targetLog-nbBits >= minBits) {1007/* Enough room for a second symbol. */1008int start = rankVal[w];1009U32 const length = 1U << ((targetLog - nbBits) & 0x1F /* quiet static-analyzer */);1010int minWeight = nbBits + scaleLog;1011int s;1012if (minWeight < 1) minWeight = 1;1013/* Fill the DTable for every symbol of weight w.1014* These symbols get at least 1 second symbol.1015*/1016for (s = begin; s != end; ++s) {1017HUF_fillDTableX2Level2(1018DTable + start, targetLog, nbBits,1019rankValOrigin[nbBits], minWeight, wEnd,1020sortedList, rankStart,1021nbBitsBaseline, sortedList[s].symbol);1022start += length;1023}1024} else {1025/* Only a single symbol. */1026HUF_fillDTableX2ForWeight(1027DTable + rankVal[w],1028sortedList + begin, sortedList + end,1029nbBits, targetLog,1030/* baseSeq */ 0, /* level */ 1);1031}1032}1033}10341035typedef struct {1036rankValCol_t rankVal[HUF_TABLELOG_MAX];1037U32 rankStats[HUF_TABLELOG_MAX + 1];1038U32 rankStart0[HUF_TABLELOG_MAX + 3];1039sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1];1040BYTE weightList[HUF_SYMBOLVALUE_MAX + 1];1041U32 calleeWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];1042} HUF_ReadDTableX2_Workspace;10431044size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,1045const void* src, size_t srcSize,1046void* workSpace, size_t wkspSize)1047{1048return HUF_readDTableX2_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);1049}10501051size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable,1052const void* src, size_t srcSize,1053void* workSpace, size_t wkspSize, int bmi2)1054{1055U32 tableLog, maxW, nbSymbols;1056DTableDesc dtd = HUF_getDTableDesc(DTable);1057U32 maxTableLog = dtd.maxTableLog;1058size_t iSize;1059void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */1060HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;1061U32 *rankStart;10621063HUF_ReadDTableX2_Workspace* const wksp = (HUF_ReadDTableX2_Workspace*)workSpace;10641065if (sizeof(*wksp) > wkspSize) return ERROR(GENERIC);10661067rankStart = wksp->rankStart0 + 1;1068ZSTD_memset(wksp->rankStats, 0, sizeof(wksp->rankStats));1069ZSTD_memset(wksp->rankStart0, 0, sizeof(wksp->rankStart0));10701071DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */1072if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);1073/* ZSTD_memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */10741075iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), bmi2);1076if (HUF_isError(iSize)) return iSize;10771078/* check result */1079if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */1080if (tableLog <= HUF_DECODER_FAST_TABLELOG && maxTableLog > HUF_DECODER_FAST_TABLELOG) maxTableLog = HUF_DECODER_FAST_TABLELOG;10811082/* find maxWeight */1083for (maxW = tableLog; wksp->rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */10841085/* Get start index of each weight */1086{ U32 w, nextRankStart = 0;1087for (w=1; w<maxW+1; w++) {1088U32 curr = nextRankStart;1089nextRankStart += wksp->rankStats[w];1090rankStart[w] = curr;1091}1092rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/1093rankStart[maxW+1] = nextRankStart;1094}10951096/* sort symbols by weight */1097{ U32 s;1098for (s=0; s<nbSymbols; s++) {1099U32 const w = wksp->weightList[s];1100U32 const r = rankStart[w]++;1101wksp->sortedSymbol[r].symbol = (BYTE)s;1102}1103rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */1104}11051106/* Build rankVal */1107{ U32* const rankVal0 = wksp->rankVal[0];1108{ int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */1109U32 nextRankVal = 0;1110U32 w;1111for (w=1; w<maxW+1; w++) {1112U32 curr = nextRankVal;1113nextRankVal += wksp->rankStats[w] << (w+rescale);1114rankVal0[w] = curr;1115} }1116{ U32 const minBits = tableLog+1 - maxW;1117U32 consumed;1118for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {1119U32* const rankValPtr = wksp->rankVal[consumed];1120U32 w;1121for (w = 1; w < maxW+1; w++) {1122rankValPtr[w] = rankVal0[w] >> consumed;1123} } } }11241125HUF_fillDTableX2(dt, maxTableLog,1126wksp->sortedSymbol,1127wksp->rankStart0, wksp->rankVal, maxW,1128tableLog+1);11291130dtd.tableLog = (BYTE)maxTableLog;1131dtd.tableType = 1;1132ZSTD_memcpy(DTable, &dtd, sizeof(dtd));1133return iSize;1134}113511361137FORCE_INLINE_TEMPLATE U321138HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)1139{1140size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */1141ZSTD_memcpy(op, &dt[val].sequence, 2);1142BIT_skipBits(DStream, dt[val].nbBits);1143return dt[val].length;1144}11451146FORCE_INLINE_TEMPLATE U321147HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)1148{1149size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */1150ZSTD_memcpy(op, &dt[val].sequence, 1);1151if (dt[val].length==1) {1152BIT_skipBits(DStream, dt[val].nbBits);1153} else {1154if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {1155BIT_skipBits(DStream, dt[val].nbBits);1156if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))1157/* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */1158DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);1159}1160}1161return 1;1162}11631164#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \1165ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)11661167#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \1168if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \1169ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)11701171#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \1172if (MEM_64bits()) \1173ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)11741175HINT_INLINE size_t1176HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,1177const HUF_DEltX2* const dt, const U32 dtLog)1178{1179BYTE* const pStart = p;11801181/* up to 8 symbols at a time */1182if ((size_t)(pEnd - p) >= sizeof(bitDPtr->bitContainer)) {1183if (dtLog <= 11 && MEM_64bits()) {1184/* up to 10 symbols at a time */1185while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-9)) {1186HUF_DECODE_SYMBOLX2_0(p, bitDPtr);1187HUF_DECODE_SYMBOLX2_0(p, bitDPtr);1188HUF_DECODE_SYMBOLX2_0(p, bitDPtr);1189HUF_DECODE_SYMBOLX2_0(p, bitDPtr);1190HUF_DECODE_SYMBOLX2_0(p, bitDPtr);1191}1192} else {1193/* up to 8 symbols at a time */1194while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {1195HUF_DECODE_SYMBOLX2_2(p, bitDPtr);1196HUF_DECODE_SYMBOLX2_1(p, bitDPtr);1197HUF_DECODE_SYMBOLX2_2(p, bitDPtr);1198HUF_DECODE_SYMBOLX2_0(p, bitDPtr);1199}1200}1201} else {1202BIT_reloadDStream(bitDPtr);1203}12041205/* closer to end : up to 2 symbols at a time */1206if ((size_t)(pEnd - p) >= 2) {1207while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))1208HUF_DECODE_SYMBOLX2_0(p, bitDPtr);12091210while (p <= pEnd-2)1211HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */1212}12131214if (p < pEnd)1215p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);12161217return p-pStart;1218}12191220FORCE_INLINE_TEMPLATE size_t1221HUF_decompress1X2_usingDTable_internal_body(1222void* dst, size_t dstSize,1223const void* cSrc, size_t cSrcSize,1224const HUF_DTable* DTable)1225{1226BIT_DStream_t bitD;12271228/* Init */1229CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );12301231/* decode */1232{ BYTE* const ostart = (BYTE*) dst;1233BYTE* const oend = ostart + dstSize;1234const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */1235const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;1236DTableDesc const dtd = HUF_getDTableDesc(DTable);1237HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);1238}12391240/* check */1241if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);12421243/* decoded size */1244return dstSize;1245}1246FORCE_INLINE_TEMPLATE size_t1247HUF_decompress4X2_usingDTable_internal_body(1248void* dst, size_t dstSize,1249const void* cSrc, size_t cSrcSize,1250const HUF_DTable* DTable)1251{1252if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */12531254{ const BYTE* const istart = (const BYTE*) cSrc;1255BYTE* const ostart = (BYTE*) dst;1256BYTE* const oend = ostart + dstSize;1257BYTE* const olimit = oend - (sizeof(size_t)-1);1258const void* const dtPtr = DTable+1;1259const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;12601261/* Init */1262BIT_DStream_t bitD1;1263BIT_DStream_t bitD2;1264BIT_DStream_t bitD3;1265BIT_DStream_t bitD4;1266size_t const length1 = MEM_readLE16(istart);1267size_t const length2 = MEM_readLE16(istart+2);1268size_t const length3 = MEM_readLE16(istart+4);1269size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);1270const BYTE* const istart1 = istart + 6; /* jumpTable */1271const BYTE* const istart2 = istart1 + length1;1272const BYTE* const istart3 = istart2 + length2;1273const BYTE* const istart4 = istart3 + length3;1274size_t const segmentSize = (dstSize+3) / 4;1275BYTE* const opStart2 = ostart + segmentSize;1276BYTE* const opStart3 = opStart2 + segmentSize;1277BYTE* const opStart4 = opStart3 + segmentSize;1278BYTE* op1 = ostart;1279BYTE* op2 = opStart2;1280BYTE* op3 = opStart3;1281BYTE* op4 = opStart4;1282U32 endSignal = 1;1283DTableDesc const dtd = HUF_getDTableDesc(DTable);1284U32 const dtLog = dtd.tableLog;12851286if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */1287if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */1288CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );1289CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );1290CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );1291CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );12921293/* 16-32 symbols per loop (4-8 symbols per stream) */1294if ((size_t)(oend - op4) >= sizeof(size_t)) {1295for ( ; (endSignal) & (op4 < olimit); ) {1296#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))1297HUF_DECODE_SYMBOLX2_2(op1, &bitD1);1298HUF_DECODE_SYMBOLX2_1(op1, &bitD1);1299HUF_DECODE_SYMBOLX2_2(op1, &bitD1);1300HUF_DECODE_SYMBOLX2_0(op1, &bitD1);1301HUF_DECODE_SYMBOLX2_2(op2, &bitD2);1302HUF_DECODE_SYMBOLX2_1(op2, &bitD2);1303HUF_DECODE_SYMBOLX2_2(op2, &bitD2);1304HUF_DECODE_SYMBOLX2_0(op2, &bitD2);1305endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;1306endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;1307HUF_DECODE_SYMBOLX2_2(op3, &bitD3);1308HUF_DECODE_SYMBOLX2_1(op3, &bitD3);1309HUF_DECODE_SYMBOLX2_2(op3, &bitD3);1310HUF_DECODE_SYMBOLX2_0(op3, &bitD3);1311HUF_DECODE_SYMBOLX2_2(op4, &bitD4);1312HUF_DECODE_SYMBOLX2_1(op4, &bitD4);1313HUF_DECODE_SYMBOLX2_2(op4, &bitD4);1314HUF_DECODE_SYMBOLX2_0(op4, &bitD4);1315endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;1316endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;1317#else1318HUF_DECODE_SYMBOLX2_2(op1, &bitD1);1319HUF_DECODE_SYMBOLX2_2(op2, &bitD2);1320HUF_DECODE_SYMBOLX2_2(op3, &bitD3);1321HUF_DECODE_SYMBOLX2_2(op4, &bitD4);1322HUF_DECODE_SYMBOLX2_1(op1, &bitD1);1323HUF_DECODE_SYMBOLX2_1(op2, &bitD2);1324HUF_DECODE_SYMBOLX2_1(op3, &bitD3);1325HUF_DECODE_SYMBOLX2_1(op4, &bitD4);1326HUF_DECODE_SYMBOLX2_2(op1, &bitD1);1327HUF_DECODE_SYMBOLX2_2(op2, &bitD2);1328HUF_DECODE_SYMBOLX2_2(op3, &bitD3);1329HUF_DECODE_SYMBOLX2_2(op4, &bitD4);1330HUF_DECODE_SYMBOLX2_0(op1, &bitD1);1331HUF_DECODE_SYMBOLX2_0(op2, &bitD2);1332HUF_DECODE_SYMBOLX2_0(op3, &bitD3);1333HUF_DECODE_SYMBOLX2_0(op4, &bitD4);1334endSignal = (U32)LIKELY((U32)1335(BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)1336& (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)1337& (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)1338& (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));1339#endif1340}1341}13421343/* check corruption */1344if (op1 > opStart2) return ERROR(corruption_detected);1345if (op2 > opStart3) return ERROR(corruption_detected);1346if (op3 > opStart4) return ERROR(corruption_detected);1347/* note : op4 already verified within main loop */13481349/* finish bitStreams one by one */1350HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);1351HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);1352HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);1353HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);13541355/* check */1356{ U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);1357if (!endCheck) return ERROR(corruption_detected); }13581359/* decoded size */1360return dstSize;1361}1362}13631364#if HUF_NEED_BMI2_FUNCTION1365static BMI2_TARGET_ATTRIBUTE1366size_t HUF_decompress4X2_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc,1367size_t cSrcSize, HUF_DTable const* DTable) {1368return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);1369}1370#endif13711372#if HUF_NEED_DEFAULT_FUNCTION1373static1374size_t HUF_decompress4X2_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc,1375size_t cSrcSize, HUF_DTable const* DTable) {1376return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);1377}1378#endif13791380#if ZSTD_ENABLE_ASM_X86_64_BMI213811382HUF_ASM_DECL void HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN;13831384static HUF_ASM_X86_64_BMI2_ATTRS size_t1385HUF_decompress4X2_usingDTable_internal_bmi2_asm(1386void* dst, size_t dstSize,1387const void* cSrc, size_t cSrcSize,1388const HUF_DTable* DTable) {1389void const* dt = DTable + 1;1390const BYTE* const iend = (const BYTE*)cSrc + 6;1391BYTE* const oend = (BYTE*)dst + dstSize;1392HUF_DecompressAsmArgs args;1393{1394size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);1395FORWARD_IF_ERROR(ret, "Failed to init asm args");1396if (ret != 0)1397return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);1398}13991400assert(args.ip[0] >= args.ilimit);1401HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(&args);14021403/* note : op4 already verified within main loop */1404assert(args.ip[0] >= iend);1405assert(args.ip[1] >= iend);1406assert(args.ip[2] >= iend);1407assert(args.ip[3] >= iend);1408assert(args.op[3] <= oend);1409(void)iend;14101411/* finish bitStreams one by one */1412{1413size_t const segmentSize = (dstSize+3) / 4;1414BYTE* segmentEnd = (BYTE*)dst;1415int i;1416for (i = 0; i < 4; ++i) {1417BIT_DStream_t bit;1418if (segmentSize <= (size_t)(oend - segmentEnd))1419segmentEnd += segmentSize;1420else1421segmentEnd = oend;1422FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd), "corruption");1423args.op[i] += HUF_decodeStreamX2(args.op[i], &bit, segmentEnd, (HUF_DEltX2 const*)dt, HUF_DECODER_FAST_TABLELOG);1424if (args.op[i] != segmentEnd)1425return ERROR(corruption_detected);1426}1427}14281429/* decoded size */1430return dstSize;1431}1432#endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */14331434static size_t HUF_decompress4X2_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc,1435size_t cSrcSize, HUF_DTable const* DTable, int bmi2)1436{1437#if DYNAMIC_BMI21438if (bmi2) {1439# if ZSTD_ENABLE_ASM_X86_64_BMI21440return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);1441# else1442return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);1443# endif1444}1445#else1446(void)bmi2;1447#endif14481449#if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)1450return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);1451#else1452return HUF_decompress4X2_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable);1453#endif1454}14551456HUF_DGEN(HUF_decompress1X2_usingDTable_internal)14571458size_t HUF_decompress1X2_usingDTable(1459void* dst, size_t dstSize,1460const void* cSrc, size_t cSrcSize,1461const HUF_DTable* DTable)1462{1463DTableDesc dtd = HUF_getDTableDesc(DTable);1464if (dtd.tableType != 1) return ERROR(GENERIC);1465return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);1466}14671468size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,1469const void* cSrc, size_t cSrcSize,1470void* workSpace, size_t wkspSize)1471{1472const BYTE* ip = (const BYTE*) cSrc;14731474size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,1475workSpace, wkspSize);1476if (HUF_isError(hSize)) return hSize;1477if (hSize >= cSrcSize) return ERROR(srcSize_wrong);1478ip += hSize; cSrcSize -= hSize;14791480return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);1481}148214831484size_t HUF_decompress4X2_usingDTable(1485void* dst, size_t dstSize,1486const void* cSrc, size_t cSrcSize,1487const HUF_DTable* DTable)1488{1489DTableDesc dtd = HUF_getDTableDesc(DTable);1490if (dtd.tableType != 1) return ERROR(GENERIC);1491return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);1492}14931494static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,1495const void* cSrc, size_t cSrcSize,1496void* workSpace, size_t wkspSize, int bmi2)1497{1498const BYTE* ip = (const BYTE*) cSrc;14991500size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,1501workSpace, wkspSize);1502if (HUF_isError(hSize)) return hSize;1503if (hSize >= cSrcSize) return ERROR(srcSize_wrong);1504ip += hSize; cSrcSize -= hSize;15051506return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);1507}15081509size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,1510const void* cSrc, size_t cSrcSize,1511void* workSpace, size_t wkspSize)1512{1513return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);1514}151515161517#endif /* HUF_FORCE_DECOMPRESS_X1 */151815191520/* ***********************************/1521/* Universal decompression selectors */1522/* ***********************************/15231524size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,1525const void* cSrc, size_t cSrcSize,1526const HUF_DTable* DTable)1527{1528DTableDesc const dtd = HUF_getDTableDesc(DTable);1529#if defined(HUF_FORCE_DECOMPRESS_X1)1530(void)dtd;1531assert(dtd.tableType == 0);1532return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);1533#elif defined(HUF_FORCE_DECOMPRESS_X2)1534(void)dtd;1535assert(dtd.tableType == 1);1536return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);1537#else1538return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :1539HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);1540#endif1541}15421543size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,1544const void* cSrc, size_t cSrcSize,1545const HUF_DTable* DTable)1546{1547DTableDesc const dtd = HUF_getDTableDesc(DTable);1548#if defined(HUF_FORCE_DECOMPRESS_X1)1549(void)dtd;1550assert(dtd.tableType == 0);1551return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);1552#elif defined(HUF_FORCE_DECOMPRESS_X2)1553(void)dtd;1554assert(dtd.tableType == 1);1555return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);1556#else1557return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :1558HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);1559#endif1560}156115621563#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)1564typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;1565static const algo_time_t algoTime[16 /* Quantization */][2 /* single, double */] =1566{1567/* single, double, quad */1568{{0,0}, {1,1}}, /* Q==0 : impossible */1569{{0,0}, {1,1}}, /* Q==1 : impossible */1570{{ 150,216}, { 381,119}}, /* Q == 2 : 12-18% */1571{{ 170,205}, { 514,112}}, /* Q == 3 : 18-25% */1572{{ 177,199}, { 539,110}}, /* Q == 4 : 25-32% */1573{{ 197,194}, { 644,107}}, /* Q == 5 : 32-38% */1574{{ 221,192}, { 735,107}}, /* Q == 6 : 38-44% */1575{{ 256,189}, { 881,106}}, /* Q == 7 : 44-50% */1576{{ 359,188}, {1167,109}}, /* Q == 8 : 50-56% */1577{{ 582,187}, {1570,114}}, /* Q == 9 : 56-62% */1578{{ 688,187}, {1712,122}}, /* Q ==10 : 62-69% */1579{{ 825,186}, {1965,136}}, /* Q ==11 : 69-75% */1580{{ 976,185}, {2131,150}}, /* Q ==12 : 75-81% */1581{{1180,186}, {2070,175}}, /* Q ==13 : 81-87% */1582{{1377,185}, {1731,202}}, /* Q ==14 : 87-93% */1583{{1412,185}, {1695,202}}, /* Q ==15 : 93-99% */1584};1585#endif15861587/** HUF_selectDecoder() :1588* Tells which decoder is likely to decode faster,1589* based on a set of pre-computed metrics.1590* @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .1591* Assumption : 0 < dstSize <= 128 KB */1592U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)1593{1594assert(dstSize > 0);1595assert(dstSize <= 128*1024);1596#if defined(HUF_FORCE_DECOMPRESS_X1)1597(void)dstSize;1598(void)cSrcSize;1599return 0;1600#elif defined(HUF_FORCE_DECOMPRESS_X2)1601(void)dstSize;1602(void)cSrcSize;1603return 1;1604#else1605/* decoder timing evaluation */1606{ U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */1607U32 const D256 = (U32)(dstSize >> 8);1608U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);1609U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);1610DTime1 += DTime1 >> 5; /* small advantage to algorithm using less memory, to reduce cache eviction */1611return DTime1 < DTime0;1612}1613#endif1614}161516161617size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,1618size_t dstSize, const void* cSrc,1619size_t cSrcSize, void* workSpace,1620size_t wkspSize)1621{1622/* validation checks */1623if (dstSize == 0) return ERROR(dstSize_tooSmall);1624if (cSrcSize == 0) return ERROR(corruption_detected);16251626{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);1627#if defined(HUF_FORCE_DECOMPRESS_X1)1628(void)algoNb;1629assert(algoNb == 0);1630return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);1631#elif defined(HUF_FORCE_DECOMPRESS_X2)1632(void)algoNb;1633assert(algoNb == 1);1634return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);1635#else1636return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc,1637cSrcSize, workSpace, wkspSize):1638HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);1639#endif1640}1641}16421643size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,1644const void* cSrc, size_t cSrcSize,1645void* workSpace, size_t wkspSize)1646{1647/* validation checks */1648if (dstSize == 0) return ERROR(dstSize_tooSmall);1649if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */1650if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */1651if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */16521653{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);1654#if defined(HUF_FORCE_DECOMPRESS_X1)1655(void)algoNb;1656assert(algoNb == 0);1657return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,1658cSrcSize, workSpace, wkspSize);1659#elif defined(HUF_FORCE_DECOMPRESS_X2)1660(void)algoNb;1661assert(algoNb == 1);1662return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,1663cSrcSize, workSpace, wkspSize);1664#else1665return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,1666cSrcSize, workSpace, wkspSize):1667HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,1668cSrcSize, workSpace, wkspSize);1669#endif1670}1671}167216731674size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)1675{1676DTableDesc const dtd = HUF_getDTableDesc(DTable);1677#if defined(HUF_FORCE_DECOMPRESS_X1)1678(void)dtd;1679assert(dtd.tableType == 0);1680return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);1681#elif defined(HUF_FORCE_DECOMPRESS_X2)1682(void)dtd;1683assert(dtd.tableType == 1);1684return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);1685#else1686return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :1687HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);1688#endif1689}16901691#ifndef HUF_FORCE_DECOMPRESS_X21692size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)1693{1694const BYTE* ip = (const BYTE*) cSrc;16951696size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);1697if (HUF_isError(hSize)) return hSize;1698if (hSize >= cSrcSize) return ERROR(srcSize_wrong);1699ip += hSize; cSrcSize -= hSize;17001701return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);1702}1703#endif17041705size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)1706{1707DTableDesc const dtd = HUF_getDTableDesc(DTable);1708#if defined(HUF_FORCE_DECOMPRESS_X1)1709(void)dtd;1710assert(dtd.tableType == 0);1711return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);1712#elif defined(HUF_FORCE_DECOMPRESS_X2)1713(void)dtd;1714assert(dtd.tableType == 1);1715return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);1716#else1717return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :1718HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);1719#endif1720}17211722size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)1723{1724/* validation checks */1725if (dstSize == 0) return ERROR(dstSize_tooSmall);1726if (cSrcSize == 0) return ERROR(corruption_detected);17271728{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);1729#if defined(HUF_FORCE_DECOMPRESS_X1)1730(void)algoNb;1731assert(algoNb == 0);1732return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);1733#elif defined(HUF_FORCE_DECOMPRESS_X2)1734(void)algoNb;1735assert(algoNb == 1);1736return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);1737#else1738return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :1739HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);1740#endif1741}1742}17431744#ifndef ZSTD_NO_UNUSED_FUNCTIONS1745#ifndef HUF_FORCE_DECOMPRESS_X21746size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)1747{1748U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];1749return HUF_readDTableX1_wksp(DTable, src, srcSize,1750workSpace, sizeof(workSpace));1751}17521753size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,1754const void* cSrc, size_t cSrcSize)1755{1756U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];1757return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,1758workSpace, sizeof(workSpace));1759}17601761size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)1762{1763HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);1764return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);1765}1766#endif17671768#ifndef HUF_FORCE_DECOMPRESS_X11769size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)1770{1771U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];1772return HUF_readDTableX2_wksp(DTable, src, srcSize,1773workSpace, sizeof(workSpace));1774}17751776size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,1777const void* cSrc, size_t cSrcSize)1778{1779U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];1780return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,1781workSpace, sizeof(workSpace));1782}17831784size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)1785{1786HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);1787return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);1788}1789#endif17901791#ifndef HUF_FORCE_DECOMPRESS_X21792size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)1793{1794U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];1795return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,1796workSpace, sizeof(workSpace));1797}1798size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)1799{1800HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);1801return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);1802}1803#endif18041805#ifndef HUF_FORCE_DECOMPRESS_X11806size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,1807const void* cSrc, size_t cSrcSize)1808{1809U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];1810return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,1811workSpace, sizeof(workSpace));1812}18131814size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)1815{1816HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);1817return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);1818}1819#endif18201821typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);18221823size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)1824{1825#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)1826static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };1827#endif18281829/* validation checks */1830if (dstSize == 0) return ERROR(dstSize_tooSmall);1831if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */1832if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */1833if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */18341835{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);1836#if defined(HUF_FORCE_DECOMPRESS_X1)1837(void)algoNb;1838assert(algoNb == 0);1839return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);1840#elif defined(HUF_FORCE_DECOMPRESS_X2)1841(void)algoNb;1842assert(algoNb == 1);1843return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);1844#else1845return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);1846#endif1847}1848}18491850size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)1851{1852/* validation checks */1853if (dstSize == 0) return ERROR(dstSize_tooSmall);1854if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */1855if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */1856if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */18571858{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);1859#if defined(HUF_FORCE_DECOMPRESS_X1)1860(void)algoNb;1861assert(algoNb == 0);1862return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);1863#elif defined(HUF_FORCE_DECOMPRESS_X2)1864(void)algoNb;1865assert(algoNb == 1);1866return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);1867#else1868return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :1869HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;1870#endif1871}1872}18731874size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)1875{1876U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];1877return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,1878workSpace, sizeof(workSpace));1879}18801881size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,1882const void* cSrc, size_t cSrcSize)1883{1884U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];1885return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,1886workSpace, sizeof(workSpace));1887}1888#endif188918901891