Path: blob/main/sys/contrib/openzfs/module/zstd/lib/decompress/huf_decompress.c
48774 views
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only1/* ******************************************************************2* huff0 huffman decoder,3* part of Finite State Entropy library4* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.5*6* You can contact the author at :7* - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy8*9* This source code is licensed under both the BSD-style license (found in the10* LICENSE file in the root directory of this source tree) and the GPLv2 (found11* in the COPYING file in the root directory of this source tree).12* You may select, at your option, one of the above-listed licenses.13****************************************************************** */1415/* **************************************************************16* Dependencies17****************************************************************/18#include <string.h> /* memcpy, memset */19#include "../common/compiler.h"20#include "../common/bitstream.h" /* BIT_* */21#include "../common/fse.h" /* to compress headers */22#define HUF_STATIC_LINKING_ONLY23#include "../common/huf.h"24#include "../common/error_private.h"2526/* **************************************************************27* Macros28****************************************************************/2930/* These two optional macros force the use one way or another of the two31* Huffman decompression implementations. You can't force in both directions32* at the same time.33*/34#if defined(HUF_FORCE_DECOMPRESS_X1) && \35defined(HUF_FORCE_DECOMPRESS_X2)36#error "Cannot force the use of the X1 and X2 decoders at the same time!"37#endif383940/* **************************************************************41* Error Management42****************************************************************/43#define HUF_isError ERR_isError444546/* **************************************************************47* Byte alignment for workSpace management48****************************************************************/49#define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1)50#define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))515253/* **************************************************************54* BMI2 Variant Wrappers55****************************************************************/56#if DYNAMIC_BMI25758#define HUF_DGEN(fn) \59\60static size_t fn##_default( \61void* dst, size_t dstSize, \62const void* cSrc, size_t cSrcSize, \63const HUF_DTable* DTable) \64{ \65return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \66} \67\68static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \69void* dst, size_t dstSize, \70const void* cSrc, size_t cSrcSize, \71const HUF_DTable* DTable) \72{ \73return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \74} \75\76static size_t fn(void* dst, size_t dstSize, void const* cSrc, \77size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \78{ \79if (bmi2) { \80return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \81} \82return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \83}8485#else8687#define HUF_DGEN(fn) \88static size_t fn(void* dst, size_t dstSize, void const* cSrc, \89size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \90{ \91(void)bmi2; \92return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \93}9495#endif969798/*-***************************/99/* generic DTableDesc */100/*-***************************/101typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;102103static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)104{105DTableDesc dtd;106memcpy(&dtd, table, sizeof(dtd));107return dtd;108}109110111#ifndef HUF_FORCE_DECOMPRESS_X2112113/*-***************************/114/* single-symbol decoding */115/*-***************************/116typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */117118size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)119{120U32 tableLog = 0;121U32 nbSymbols = 0;122size_t iSize;123void* const dtPtr = DTable + 1;124HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;125126U32* rankVal;127BYTE* huffWeight;128size_t spaceUsed32 = 0;129130rankVal = (U32 *)workSpace + spaceUsed32;131spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;132huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32);133spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;134135if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);136137DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));138/* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */139140iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);141if (HUF_isError(iSize)) return iSize;142143/* Table header */144{ DTableDesc dtd = HUF_getDTableDesc(DTable);145if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */146dtd.tableType = 0;147dtd.tableLog = (BYTE)tableLog;148memcpy(DTable, &dtd, sizeof(dtd));149}150151/* Calculate starting value for each rank */152{ U32 n, nextRankStart = 0;153for (n=1; n<tableLog+1; n++) {154U32 const current = nextRankStart;155nextRankStart += (rankVal[n] << (n-1));156rankVal[n] = current;157} }158159/* fill DTable */160{ U32 n;161size_t const nEnd = nbSymbols;162for (n=0; n<nEnd; n++) {163size_t const w = huffWeight[n];164size_t const length = (1 << w) >> 1;165size_t const uStart = rankVal[w];166size_t const uEnd = uStart + length;167size_t u;168HUF_DEltX1 D;169D.byte = (BYTE)n;170D.nbBits = (BYTE)(tableLog + 1 - w);171rankVal[w] = (U32)uEnd;172if (length < 4) {173/* Use length in the loop bound so the compiler knows it is short. */174for (u = 0; u < length; ++u)175dt[uStart + u] = D;176} else {177/* Unroll the loop 4 times, we know it is a power of 2. */178for (u = uStart; u < uEnd; u += 4) {179dt[u + 0] = D;180dt[u + 1] = D;181dt[u + 2] = D;182dt[u + 3] = D;183} } } }184return iSize;185}186187size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)188{189U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];190return HUF_readDTableX1_wksp(DTable, src, srcSize,191workSpace, sizeof(workSpace));192}193194FORCE_INLINE_TEMPLATE BYTE195HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)196{197size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */198BYTE const c = dt[val].byte;199BIT_skipBits(Dstream, dt[val].nbBits);200return c;201}202203#define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \204*ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)205206#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \207if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \208HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)209210#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \211if (MEM_64bits()) \212HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)213214HINT_INLINE size_t215HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)216{217BYTE* const pStart = p;218219/* up to 4 symbols at a time */220while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {221HUF_DECODE_SYMBOLX1_2(p, bitDPtr);222HUF_DECODE_SYMBOLX1_1(p, bitDPtr);223HUF_DECODE_SYMBOLX1_2(p, bitDPtr);224HUF_DECODE_SYMBOLX1_0(p, bitDPtr);225}226227/* [0-3] symbols remaining */228if (MEM_32bits())229while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))230HUF_DECODE_SYMBOLX1_0(p, bitDPtr);231232/* no more data to retrieve from bitstream, no need to reload */233while (p < pEnd)234HUF_DECODE_SYMBOLX1_0(p, bitDPtr);235236return pEnd-pStart;237}238239FORCE_INLINE_TEMPLATE size_t240HUF_decompress1X1_usingDTable_internal_body(241void* dst, size_t dstSize,242const void* cSrc, size_t cSrcSize,243const HUF_DTable* DTable)244{245BYTE* op = (BYTE*)dst;246BYTE* const oend = op + dstSize;247const void* dtPtr = DTable + 1;248const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;249BIT_DStream_t bitD;250DTableDesc const dtd = HUF_getDTableDesc(DTable);251U32 const dtLog = dtd.tableLog;252253CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );254255HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);256257if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);258259return dstSize;260}261262FORCE_INLINE_TEMPLATE size_t263HUF_decompress4X1_usingDTable_internal_body(264void* dst, size_t dstSize,265const void* cSrc, size_t cSrcSize,266const HUF_DTable* DTable)267{268/* Check */269if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */270271{ const BYTE* const istart = (const BYTE*) cSrc;272BYTE* const ostart = (BYTE*) dst;273BYTE* const oend = ostart + dstSize;274BYTE* const olimit = oend - 3;275const void* const dtPtr = DTable + 1;276const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;277278/* Init */279BIT_DStream_t bitD1;280BIT_DStream_t bitD2;281BIT_DStream_t bitD3;282BIT_DStream_t bitD4;283size_t const length1 = MEM_readLE16(istart);284size_t const length2 = MEM_readLE16(istart+2);285size_t const length3 = MEM_readLE16(istart+4);286size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);287const BYTE* const istart1 = istart + 6; /* jumpTable */288const BYTE* const istart2 = istart1 + length1;289const BYTE* const istart3 = istart2 + length2;290const BYTE* const istart4 = istart3 + length3;291const size_t segmentSize = (dstSize+3) / 4;292BYTE* const opStart2 = ostart + segmentSize;293BYTE* const opStart3 = opStart2 + segmentSize;294BYTE* const opStart4 = opStart3 + segmentSize;295BYTE* op1 = ostart;296BYTE* op2 = opStart2;297BYTE* op3 = opStart3;298BYTE* op4 = opStart4;299DTableDesc const dtd = HUF_getDTableDesc(DTable);300U32 const dtLog = dtd.tableLog;301U32 endSignal = 1;302303if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */304CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );305CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );306CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );307CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );308309/* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */310for ( ; (endSignal) & (op4 < olimit) ; ) {311HUF_DECODE_SYMBOLX1_2(op1, &bitD1);312HUF_DECODE_SYMBOLX1_2(op2, &bitD2);313HUF_DECODE_SYMBOLX1_2(op3, &bitD3);314HUF_DECODE_SYMBOLX1_2(op4, &bitD4);315HUF_DECODE_SYMBOLX1_1(op1, &bitD1);316HUF_DECODE_SYMBOLX1_1(op2, &bitD2);317HUF_DECODE_SYMBOLX1_1(op3, &bitD3);318HUF_DECODE_SYMBOLX1_1(op4, &bitD4);319HUF_DECODE_SYMBOLX1_2(op1, &bitD1);320HUF_DECODE_SYMBOLX1_2(op2, &bitD2);321HUF_DECODE_SYMBOLX1_2(op3, &bitD3);322HUF_DECODE_SYMBOLX1_2(op4, &bitD4);323HUF_DECODE_SYMBOLX1_0(op1, &bitD1);324HUF_DECODE_SYMBOLX1_0(op2, &bitD2);325HUF_DECODE_SYMBOLX1_0(op3, &bitD3);326HUF_DECODE_SYMBOLX1_0(op4, &bitD4);327endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;328endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;329endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;330endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;331}332333/* check corruption */334/* note : should not be necessary : op# advance in lock step, and we control op4.335* but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */336if (op1 > opStart2) return ERROR(corruption_detected);337if (op2 > opStart3) return ERROR(corruption_detected);338if (op3 > opStart4) return ERROR(corruption_detected);339/* note : op4 supposed already verified within main loop */340341/* finish bitStreams one by one */342HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);343HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);344HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);345HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog);346347/* check */348{ U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);349if (!endCheck) return ERROR(corruption_detected); }350351/* decoded size */352return dstSize;353}354}355356357typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,358const void *cSrc,359size_t cSrcSize,360const HUF_DTable *DTable);361362HUF_DGEN(HUF_decompress1X1_usingDTable_internal)363HUF_DGEN(HUF_decompress4X1_usingDTable_internal)364365366367size_t HUF_decompress1X1_usingDTable(368void* dst, size_t dstSize,369const void* cSrc, size_t cSrcSize,370const HUF_DTable* DTable)371{372DTableDesc dtd = HUF_getDTableDesc(DTable);373if (dtd.tableType != 0) return ERROR(GENERIC);374return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);375}376377size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,378const void* cSrc, size_t cSrcSize,379void* workSpace, size_t wkspSize)380{381const BYTE* ip = (const BYTE*) cSrc;382383size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);384if (HUF_isError(hSize)) return hSize;385if (hSize >= cSrcSize) return ERROR(srcSize_wrong);386ip += hSize; cSrcSize -= hSize;387388return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);389}390391392size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,393const void* cSrc, size_t cSrcSize)394{395U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];396return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,397workSpace, sizeof(workSpace));398}399400size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)401{402HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);403return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);404}405406size_t HUF_decompress4X1_usingDTable(407void* dst, size_t dstSize,408const void* cSrc, size_t cSrcSize,409const HUF_DTable* DTable)410{411DTableDesc dtd = HUF_getDTableDesc(DTable);412if (dtd.tableType != 0) return ERROR(GENERIC);413return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);414}415416static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,417const void* cSrc, size_t cSrcSize,418void* workSpace, size_t wkspSize, int bmi2)419{420const BYTE* ip = (const BYTE*) cSrc;421422size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize,423workSpace, wkspSize);424if (HUF_isError(hSize)) return hSize;425if (hSize >= cSrcSize) return ERROR(srcSize_wrong);426ip += hSize; cSrcSize -= hSize;427428return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);429}430431size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,432const void* cSrc, size_t cSrcSize,433void* workSpace, size_t wkspSize)434{435return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);436}437438439size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)440{441U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];442return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,443workSpace, sizeof(workSpace));444}445size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)446{447HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);448return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);449}450451#endif /* HUF_FORCE_DECOMPRESS_X2 */452453454#ifndef HUF_FORCE_DECOMPRESS_X1455456/* *************************/457/* double-symbols decoding */458/* *************************/459460typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */461typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;462typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];463typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];464465466/* HUF_fillDTableX2Level2() :467* `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */468static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed,469const U32* rankValOrigin, const int minWeight,470const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,471U32 nbBitsBaseline, U16 baseSeq)472{473HUF_DEltX2 DElt;474U32 rankVal[HUF_TABLELOG_MAX + 1];475476/* get pre-calculated rankVal */477memcpy(rankVal, rankValOrigin, sizeof(rankVal));478479/* fill skipped values */480if (minWeight>1) {481U32 i, skipSize = rankVal[minWeight];482MEM_writeLE16(&(DElt.sequence), baseSeq);483DElt.nbBits = (BYTE)(consumed);484DElt.length = 1;485for (i = 0; i < skipSize; i++)486DTable[i] = DElt;487}488489/* fill DTable */490{ U32 s; for (s=0; s<sortedListSize; s++) { /* note : sortedSymbols already skipped */491const U32 symbol = sortedSymbols[s].symbol;492const U32 weight = sortedSymbols[s].weight;493const U32 nbBits = nbBitsBaseline - weight;494const U32 length = 1 << (sizeLog-nbBits);495const U32 start = rankVal[weight];496U32 i = start;497const U32 end = start + length;498499MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));500DElt.nbBits = (BYTE)(nbBits + consumed);501DElt.length = 2;502do { DTable[i++] = DElt; } while (i<end); /* since length >= 1 */503504rankVal[weight] += length;505} }506}507508509static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,510const sortedSymbol_t* sortedList, const U32 sortedListSize,511const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,512const U32 nbBitsBaseline)513{514U32 rankVal[HUF_TABLELOG_MAX + 1];515const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */516const U32 minBits = nbBitsBaseline - maxWeight;517U32 s;518519memcpy(rankVal, rankValOrigin, sizeof(rankVal));520521/* fill DTable */522for (s=0; s<sortedListSize; s++) {523const U16 symbol = sortedList[s].symbol;524const U32 weight = sortedList[s].weight;525const U32 nbBits = nbBitsBaseline - weight;526const U32 start = rankVal[weight];527const U32 length = 1 << (targetLog-nbBits);528529if (targetLog-nbBits >= minBits) { /* enough room for a second symbol */530U32 sortedRank;531int minWeight = nbBits + scaleLog;532if (minWeight < 1) minWeight = 1;533sortedRank = rankStart[minWeight];534HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits,535rankValOrigin[nbBits], minWeight,536sortedList+sortedRank, sortedListSize-sortedRank,537nbBitsBaseline, symbol);538} else {539HUF_DEltX2 DElt;540MEM_writeLE16(&(DElt.sequence), symbol);541DElt.nbBits = (BYTE)(nbBits);542DElt.length = 1;543{ U32 const end = start + length;544U32 u;545for (u = start; u < end; u++) DTable[u] = DElt;546} }547rankVal[weight] += length;548}549}550551size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,552const void* src, size_t srcSize,553void* workSpace, size_t wkspSize)554{555U32 tableLog, maxW, sizeOfSort, nbSymbols;556DTableDesc dtd = HUF_getDTableDesc(DTable);557U32 const maxTableLog = dtd.maxTableLog;558size_t iSize;559void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */560HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;561U32 *rankStart;562563rankValCol_t* rankVal;564U32* rankStats;565U32* rankStart0;566sortedSymbol_t* sortedSymbol;567BYTE* weightList;568size_t spaceUsed32 = 0;569570rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32);571spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2;572rankStats = (U32 *)workSpace + spaceUsed32;573spaceUsed32 += HUF_TABLELOG_MAX + 1;574rankStart0 = (U32 *)workSpace + spaceUsed32;575spaceUsed32 += HUF_TABLELOG_MAX + 2;576sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t);577spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2;578weightList = (BYTE *)((U32 *)workSpace + spaceUsed32);579spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;580581if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);582583rankStart = rankStart0 + 1;584memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));585586DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */587if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);588/* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */589590iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);591if (HUF_isError(iSize)) return iSize;592593/* check result */594if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */595596/* find maxWeight */597for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */598599/* Get start index of each weight */600{ U32 w, nextRankStart = 0;601for (w=1; w<maxW+1; w++) {602U32 current = nextRankStart;603nextRankStart += rankStats[w];604rankStart[w] = current;605}606rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/607sizeOfSort = nextRankStart;608}609610/* sort symbols by weight */611{ U32 s;612for (s=0; s<nbSymbols; s++) {613U32 const w = weightList[s];614U32 const r = rankStart[w]++;615sortedSymbol[r].symbol = (BYTE)s;616sortedSymbol[r].weight = (BYTE)w;617}618rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */619}620621/* Build rankVal */622{ U32* const rankVal0 = rankVal[0];623{ int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */624U32 nextRankVal = 0;625U32 w;626for (w=1; w<maxW+1; w++) {627U32 current = nextRankVal;628nextRankVal += rankStats[w] << (w+rescale);629rankVal0[w] = current;630} }631{ U32 const minBits = tableLog+1 - maxW;632U32 consumed;633for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {634U32* const rankValPtr = rankVal[consumed];635U32 w;636for (w = 1; w < maxW+1; w++) {637rankValPtr[w] = rankVal0[w] >> consumed;638} } } }639640HUF_fillDTableX2(dt, maxTableLog,641sortedSymbol, sizeOfSort,642rankStart0, rankVal, maxW,643tableLog+1);644645dtd.tableLog = (BYTE)maxTableLog;646dtd.tableType = 1;647memcpy(DTable, &dtd, sizeof(dtd));648return iSize;649}650651size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)652{653U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];654return HUF_readDTableX2_wksp(DTable, src, srcSize,655workSpace, sizeof(workSpace));656}657658659FORCE_INLINE_TEMPLATE U32660HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)661{662size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */663memcpy(op, dt+val, 2);664BIT_skipBits(DStream, dt[val].nbBits);665return dt[val].length;666}667668FORCE_INLINE_TEMPLATE U32669HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)670{671size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */672memcpy(op, dt+val, 1);673if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);674else {675if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {676BIT_skipBits(DStream, dt[val].nbBits);677if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))678/* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */679DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);680} }681return 1;682}683684#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \685ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)686687#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \688if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \689ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)690691#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \692if (MEM_64bits()) \693ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)694695HINT_INLINE size_t696HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,697const HUF_DEltX2* const dt, const U32 dtLog)698{699BYTE* const pStart = p;700701/* up to 8 symbols at a time */702while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {703HUF_DECODE_SYMBOLX2_2(p, bitDPtr);704HUF_DECODE_SYMBOLX2_1(p, bitDPtr);705HUF_DECODE_SYMBOLX2_2(p, bitDPtr);706HUF_DECODE_SYMBOLX2_0(p, bitDPtr);707}708709/* closer to end : up to 2 symbols at a time */710while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))711HUF_DECODE_SYMBOLX2_0(p, bitDPtr);712713while (p <= pEnd-2)714HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */715716if (p < pEnd)717p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);718719return p-pStart;720}721722FORCE_INLINE_TEMPLATE size_t723HUF_decompress1X2_usingDTable_internal_body(724void* dst, size_t dstSize,725const void* cSrc, size_t cSrcSize,726const HUF_DTable* DTable)727{728BIT_DStream_t bitD;729730/* Init */731CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );732733/* decode */734{ BYTE* const ostart = (BYTE*) dst;735BYTE* const oend = ostart + dstSize;736const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */737const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;738DTableDesc const dtd = HUF_getDTableDesc(DTable);739HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);740}741742/* check */743if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);744745/* decoded size */746return dstSize;747}748749FORCE_INLINE_TEMPLATE size_t750HUF_decompress4X2_usingDTable_internal_body(751void* dst, size_t dstSize,752const void* cSrc, size_t cSrcSize,753const HUF_DTable* DTable)754{755if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */756757{ const BYTE* const istart = (const BYTE*) cSrc;758BYTE* const ostart = (BYTE*) dst;759BYTE* const oend = ostart + dstSize;760BYTE* const olimit = oend - (sizeof(size_t)-1);761const void* const dtPtr = DTable+1;762const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;763764/* Init */765BIT_DStream_t bitD1;766BIT_DStream_t bitD2;767BIT_DStream_t bitD3;768BIT_DStream_t bitD4;769size_t const length1 = MEM_readLE16(istart);770size_t const length2 = MEM_readLE16(istart+2);771size_t const length3 = MEM_readLE16(istart+4);772size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);773const BYTE* const istart1 = istart + 6; /* jumpTable */774const BYTE* const istart2 = istart1 + length1;775const BYTE* const istart3 = istart2 + length2;776const BYTE* const istart4 = istart3 + length3;777size_t const segmentSize = (dstSize+3) / 4;778BYTE* const opStart2 = ostart + segmentSize;779BYTE* const opStart3 = opStart2 + segmentSize;780BYTE* const opStart4 = opStart3 + segmentSize;781BYTE* op1 = ostart;782BYTE* op2 = opStart2;783BYTE* op3 = opStart3;784BYTE* op4 = opStart4;785U32 endSignal = 1;786DTableDesc const dtd = HUF_getDTableDesc(DTable);787U32 const dtLog = dtd.tableLog;788789if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */790CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );791CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );792CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );793CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );794795/* 16-32 symbols per loop (4-8 symbols per stream) */796for ( ; (endSignal) & (op4 < olimit); ) {797#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))798HUF_DECODE_SYMBOLX2_2(op1, &bitD1);799HUF_DECODE_SYMBOLX2_1(op1, &bitD1);800HUF_DECODE_SYMBOLX2_2(op1, &bitD1);801HUF_DECODE_SYMBOLX2_0(op1, &bitD1);802HUF_DECODE_SYMBOLX2_2(op2, &bitD2);803HUF_DECODE_SYMBOLX2_1(op2, &bitD2);804HUF_DECODE_SYMBOLX2_2(op2, &bitD2);805HUF_DECODE_SYMBOLX2_0(op2, &bitD2);806endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;807endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;808HUF_DECODE_SYMBOLX2_2(op3, &bitD3);809HUF_DECODE_SYMBOLX2_1(op3, &bitD3);810HUF_DECODE_SYMBOLX2_2(op3, &bitD3);811HUF_DECODE_SYMBOLX2_0(op3, &bitD3);812HUF_DECODE_SYMBOLX2_2(op4, &bitD4);813HUF_DECODE_SYMBOLX2_1(op4, &bitD4);814HUF_DECODE_SYMBOLX2_2(op4, &bitD4);815HUF_DECODE_SYMBOLX2_0(op4, &bitD4);816endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;817endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;818#else819HUF_DECODE_SYMBOLX2_2(op1, &bitD1);820HUF_DECODE_SYMBOLX2_2(op2, &bitD2);821HUF_DECODE_SYMBOLX2_2(op3, &bitD3);822HUF_DECODE_SYMBOLX2_2(op4, &bitD4);823HUF_DECODE_SYMBOLX2_1(op1, &bitD1);824HUF_DECODE_SYMBOLX2_1(op2, &bitD2);825HUF_DECODE_SYMBOLX2_1(op3, &bitD3);826HUF_DECODE_SYMBOLX2_1(op4, &bitD4);827HUF_DECODE_SYMBOLX2_2(op1, &bitD1);828HUF_DECODE_SYMBOLX2_2(op2, &bitD2);829HUF_DECODE_SYMBOLX2_2(op3, &bitD3);830HUF_DECODE_SYMBOLX2_2(op4, &bitD4);831HUF_DECODE_SYMBOLX2_0(op1, &bitD1);832HUF_DECODE_SYMBOLX2_0(op2, &bitD2);833HUF_DECODE_SYMBOLX2_0(op3, &bitD3);834HUF_DECODE_SYMBOLX2_0(op4, &bitD4);835endSignal = (U32)LIKELY(836(BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)837& (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)838& (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)839& (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));840#endif841}842843/* check corruption */844if (op1 > opStart2) return ERROR(corruption_detected);845if (op2 > opStart3) return ERROR(corruption_detected);846if (op3 > opStart4) return ERROR(corruption_detected);847/* note : op4 already verified within main loop */848849/* finish bitStreams one by one */850HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);851HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);852HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);853HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);854855/* check */856{ U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);857if (!endCheck) return ERROR(corruption_detected); }858859/* decoded size */860return dstSize;861}862}863864HUF_DGEN(HUF_decompress1X2_usingDTable_internal)865HUF_DGEN(HUF_decompress4X2_usingDTable_internal)866867size_t HUF_decompress1X2_usingDTable(868void* dst, size_t dstSize,869const void* cSrc, size_t cSrcSize,870const HUF_DTable* DTable)871{872DTableDesc dtd = HUF_getDTableDesc(DTable);873if (dtd.tableType != 1) return ERROR(GENERIC);874return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);875}876877size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,878const void* cSrc, size_t cSrcSize,879void* workSpace, size_t wkspSize)880{881const BYTE* ip = (const BYTE*) cSrc;882883size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,884workSpace, wkspSize);885if (HUF_isError(hSize)) return hSize;886if (hSize >= cSrcSize) return ERROR(srcSize_wrong);887ip += hSize; cSrcSize -= hSize;888889return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);890}891892893size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,894const void* cSrc, size_t cSrcSize)895{896U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];897return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,898workSpace, sizeof(workSpace));899}900901size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)902{903HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);904return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);905}906907size_t HUF_decompress4X2_usingDTable(908void* dst, size_t dstSize,909const void* cSrc, size_t cSrcSize,910const HUF_DTable* DTable)911{912DTableDesc dtd = HUF_getDTableDesc(DTable);913if (dtd.tableType != 1) return ERROR(GENERIC);914return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);915}916917static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,918const void* cSrc, size_t cSrcSize,919void* workSpace, size_t wkspSize, int bmi2)920{921const BYTE* ip = (const BYTE*) cSrc;922923size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,924workSpace, wkspSize);925if (HUF_isError(hSize)) return hSize;926if (hSize >= cSrcSize) return ERROR(srcSize_wrong);927ip += hSize; cSrcSize -= hSize;928929return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);930}931932size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,933const void* cSrc, size_t cSrcSize,934void* workSpace, size_t wkspSize)935{936return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);937}938939940size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,941const void* cSrc, size_t cSrcSize)942{943U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];944return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,945workSpace, sizeof(workSpace));946}947948size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)949{950HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);951return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);952}953954#endif /* HUF_FORCE_DECOMPRESS_X1 */955956957/* ***********************************/958/* Universal decompression selectors */959/* ***********************************/960961size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,962const void* cSrc, size_t cSrcSize,963const HUF_DTable* DTable)964{965DTableDesc const dtd = HUF_getDTableDesc(DTable);966#if defined(HUF_FORCE_DECOMPRESS_X1)967(void)dtd;968assert(dtd.tableType == 0);969return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);970#elif defined(HUF_FORCE_DECOMPRESS_X2)971(void)dtd;972assert(dtd.tableType == 1);973return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);974#else975return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :976HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);977#endif978}979980size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,981const void* cSrc, size_t cSrcSize,982const HUF_DTable* DTable)983{984DTableDesc const dtd = HUF_getDTableDesc(DTable);985#if defined(HUF_FORCE_DECOMPRESS_X1)986(void)dtd;987assert(dtd.tableType == 0);988return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);989#elif defined(HUF_FORCE_DECOMPRESS_X2)990(void)dtd;991assert(dtd.tableType == 1);992return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);993#else994return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :995HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);996#endif997}9989991000#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)1001typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;1002static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =1003{1004/* single, double, quad */1005{{0,0}, {1,1}, {2,2}}, /* Q==0 : impossible */1006{{0,0}, {1,1}, {2,2}}, /* Q==1 : impossible */1007{{ 38,130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */1008{{ 448,128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */1009{{ 556,128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */1010{{ 714,128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */1011{{ 883,128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */1012{{ 897,128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */1013{{ 926,128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */1014{{ 947,128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */1015{{1107,128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */1016{{1177,128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */1017{{1242,128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */1018{{1349,128}, {2644,106}, {5260,106}}, /* Q ==13 : 81-87% */1019{{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */1020{{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */1021};1022#endif10231024/** HUF_selectDecoder() :1025* Tells which decoder is likely to decode faster,1026* based on a set of pre-computed metrics.1027* @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .1028* Assumption : 0 < dstSize <= 128 KB */1029U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)1030{1031assert(dstSize > 0);1032assert(dstSize <= 128*1024);1033#if defined(HUF_FORCE_DECOMPRESS_X1)1034(void)dstSize;1035(void)cSrcSize;1036return 0;1037#elif defined(HUF_FORCE_DECOMPRESS_X2)1038(void)dstSize;1039(void)cSrcSize;1040return 1;1041#else1042/* decoder timing evaluation */1043{ U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */1044U32 const D256 = (U32)(dstSize >> 8);1045U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);1046U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);1047DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */1048return DTime1 < DTime0;1049}1050#endif1051}105210531054typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);10551056size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)1057{1058#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)1059static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };1060#endif10611062/* validation checks */1063if (dstSize == 0) return ERROR(dstSize_tooSmall);1064if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */1065if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */1066if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */10671068{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);1069#if defined(HUF_FORCE_DECOMPRESS_X1)1070(void)algoNb;1071assert(algoNb == 0);1072return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);1073#elif defined(HUF_FORCE_DECOMPRESS_X2)1074(void)algoNb;1075assert(algoNb == 1);1076return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);1077#else1078return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);1079#endif1080}1081}10821083size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)1084{1085/* validation checks */1086if (dstSize == 0) return ERROR(dstSize_tooSmall);1087if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */1088if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */1089if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */10901091{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);1092#if defined(HUF_FORCE_DECOMPRESS_X1)1093(void)algoNb;1094assert(algoNb == 0);1095return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);1096#elif defined(HUF_FORCE_DECOMPRESS_X2)1097(void)algoNb;1098assert(algoNb == 1);1099return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);1100#else1101return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :1102HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;1103#endif1104}1105}11061107size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)1108{1109U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];1110return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,1111workSpace, sizeof(workSpace));1112}111311141115size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,1116size_t dstSize, const void* cSrc,1117size_t cSrcSize, void* workSpace,1118size_t wkspSize)1119{1120/* validation checks */1121if (dstSize == 0) return ERROR(dstSize_tooSmall);1122if (cSrcSize == 0) return ERROR(corruption_detected);11231124{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);1125#if defined(HUF_FORCE_DECOMPRESS_X1)1126(void)algoNb;1127assert(algoNb == 0);1128return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);1129#elif defined(HUF_FORCE_DECOMPRESS_X2)1130(void)algoNb;1131assert(algoNb == 1);1132return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);1133#else1134return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc,1135cSrcSize, workSpace, wkspSize):1136HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);1137#endif1138}1139}11401141size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,1142const void* cSrc, size_t cSrcSize,1143void* workSpace, size_t wkspSize)1144{1145/* validation checks */1146if (dstSize == 0) return ERROR(dstSize_tooSmall);1147if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */1148if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */1149if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */11501151{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);1152#if defined(HUF_FORCE_DECOMPRESS_X1)1153(void)algoNb;1154assert(algoNb == 0);1155return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,1156cSrcSize, workSpace, wkspSize);1157#elif defined(HUF_FORCE_DECOMPRESS_X2)1158(void)algoNb;1159assert(algoNb == 1);1160return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,1161cSrcSize, workSpace, wkspSize);1162#else1163return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,1164cSrcSize, workSpace, wkspSize):1165HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,1166cSrcSize, workSpace, wkspSize);1167#endif1168}1169}11701171size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,1172const void* cSrc, size_t cSrcSize)1173{1174U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];1175return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,1176workSpace, sizeof(workSpace));1177}117811791180size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)1181{1182DTableDesc const dtd = HUF_getDTableDesc(DTable);1183#if defined(HUF_FORCE_DECOMPRESS_X1)1184(void)dtd;1185assert(dtd.tableType == 0);1186return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);1187#elif defined(HUF_FORCE_DECOMPRESS_X2)1188(void)dtd;1189assert(dtd.tableType == 1);1190return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);1191#else1192return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :1193HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);1194#endif1195}11961197#ifndef HUF_FORCE_DECOMPRESS_X21198size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)1199{1200const BYTE* ip = (const BYTE*) cSrc;12011202size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);1203if (HUF_isError(hSize)) return hSize;1204if (hSize >= cSrcSize) return ERROR(srcSize_wrong);1205ip += hSize; cSrcSize -= hSize;12061207return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);1208}1209#endif12101211size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)1212{1213DTableDesc const dtd = HUF_getDTableDesc(DTable);1214#if defined(HUF_FORCE_DECOMPRESS_X1)1215(void)dtd;1216assert(dtd.tableType == 0);1217return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);1218#elif defined(HUF_FORCE_DECOMPRESS_X2)1219(void)dtd;1220assert(dtd.tableType == 1);1221return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);1222#else1223return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :1224HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);1225#endif1226}12271228size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)1229{1230/* validation checks */1231if (dstSize == 0) return ERROR(dstSize_tooSmall);1232if (cSrcSize == 0) return ERROR(corruption_detected);12331234{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);1235#if defined(HUF_FORCE_DECOMPRESS_X1)1236(void)algoNb;1237assert(algoNb == 0);1238return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);1239#elif defined(HUF_FORCE_DECOMPRESS_X2)1240(void)algoNb;1241assert(algoNb == 1);1242return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);1243#else1244return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :1245HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);1246#endif1247}1248}124912501251