Path: blob/master/Utilities/cmzstd/lib/compress/huf_compress.c
5028 views
/* ******************************************************************1* Huffman encoder, part of New Generation Entropy library2* Copyright (c) Meta Platforms, Inc. and affiliates.3*4* You can contact the author at :5* - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy6* - Public forum : https://groups.google.com/forum/#!forum/lz4c7*8* This source code is licensed under both the BSD-style license (found in the9* LICENSE file in the root directory of this source tree) and the GPLv2 (found10* in the COPYING file in the root directory of this source tree).11* You may select, at your option, one of the above-listed licenses.12****************************************************************** */1314/* **************************************************************15* Compiler specifics16****************************************************************/17#ifdef _MSC_VER /* Visual Studio */18# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */19#endif202122/* **************************************************************23* Includes24****************************************************************/25#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */26#include "../common/compiler.h"27#include "../common/bitstream.h"28#include "hist.h"29#define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */30#include "../common/fse.h" /* header compression */31#include "../common/huf.h"32#include "../common/error_private.h"33#include "../common/bits.h" /* ZSTD_highbit32 */343536/* **************************************************************37* Error Management38****************************************************************/39#define HUF_isError ERR_isError40#define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */414243/* **************************************************************44* Required declarations45****************************************************************/46typedef struct nodeElt_s {47U32 count;48U16 parent;49BYTE byte;50BYTE nbBits;51} nodeElt;525354/* **************************************************************55* Debug Traces56****************************************************************/5758#if DEBUGLEVEL >= 25960static size_t showU32(const U32* arr, size_t size)61{62size_t u;63for (u=0; u<size; u++) {64RAWLOG(6, " %u", arr[u]); (void)arr;65}66RAWLOG(6, " \n");67return size;68}6970static size_t HUF_getNbBits(HUF_CElt elt);7172static size_t showCTableBits(const HUF_CElt* ctable, size_t size)73{74size_t u;75for (u=0; u<size; u++) {76RAWLOG(6, " %zu", HUF_getNbBits(ctable[u])); (void)ctable;77}78RAWLOG(6, " \n");79return size;8081}8283static size_t showHNodeSymbols(const nodeElt* hnode, size_t size)84{85size_t u;86for (u=0; u<size; u++) {87RAWLOG(6, " %u", hnode[u].byte); (void)hnode;88}89RAWLOG(6, " \n");90return size;91}9293static size_t showHNodeBits(const nodeElt* hnode, size_t size)94{95size_t u;96for (u=0; u<size; u++) {97RAWLOG(6, " %u", hnode[u].nbBits); (void)hnode;98}99RAWLOG(6, " \n");100return size;101}102103#endif104105106/* *******************************************************107* HUF : Huffman block compression108*********************************************************/109#define HUF_WORKSPACE_MAX_ALIGNMENT 8110111static void* HUF_alignUpWorkspace(void* workspace, size_t* workspaceSizePtr, size_t align)112{113size_t const mask = align - 1;114size_t const rem = (size_t)workspace & mask;115size_t const add = (align - rem) & mask;116BYTE* const aligned = (BYTE*)workspace + add;117assert((align & (align - 1)) == 0); /* pow 2 */118assert(align <= HUF_WORKSPACE_MAX_ALIGNMENT);119if (*workspaceSizePtr >= add) {120assert(add < align);121assert(((size_t)aligned & mask) == 0);122*workspaceSizePtr -= add;123return aligned;124} else {125*workspaceSizePtr = 0;126return NULL;127}128}129130131/* HUF_compressWeights() :132* Same as FSE_compress(), but dedicated to huff0's weights compression.133* The use case needs much less stack memory.134* Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.135*/136#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6137138typedef struct {139FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];140U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)];141unsigned count[HUF_TABLELOG_MAX+1];142S16 norm[HUF_TABLELOG_MAX+1];143} HUF_CompressWeightsWksp;144145static size_t146HUF_compressWeights(void* dst, size_t dstSize,147const void* weightTable, size_t wtSize,148void* workspace, size_t workspaceSize)149{150BYTE* const ostart = (BYTE*) dst;151BYTE* op = ostart;152BYTE* const oend = ostart + dstSize;153154unsigned maxSymbolValue = HUF_TABLELOG_MAX;155U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;156HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));157158if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC);159160/* init conditions */161if (wtSize <= 1) return 0; /* Not compressible */162163/* Scan input and build symbol stats */164{ unsigned const maxCount = HIST_count_simple(wksp->count, &maxSymbolValue, weightTable, wtSize); /* never fails */165if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */166if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */167}168169tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);170CHECK_F( FSE_normalizeCount(wksp->norm, tableLog, wksp->count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) );171172/* Write table description header */173{ CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), wksp->norm, maxSymbolValue, tableLog) );174op += hSize;175}176177/* Compress */178CHECK_F( FSE_buildCTable_wksp(wksp->CTable, wksp->norm, maxSymbolValue, tableLog, wksp->scratchBuffer, sizeof(wksp->scratchBuffer)) );179{ CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, wksp->CTable) );180if (cSize == 0) return 0; /* not enough space for compressed data */181op += cSize;182}183184return (size_t)(op-ostart);185}186187static size_t HUF_getNbBits(HUF_CElt elt)188{189return elt & 0xFF;190}191192static size_t HUF_getNbBitsFast(HUF_CElt elt)193{194return elt;195}196197static size_t HUF_getValue(HUF_CElt elt)198{199return elt & ~(size_t)0xFF;200}201202static size_t HUF_getValueFast(HUF_CElt elt)203{204return elt;205}206207static void HUF_setNbBits(HUF_CElt* elt, size_t nbBits)208{209assert(nbBits <= HUF_TABLELOG_ABSOLUTEMAX);210*elt = nbBits;211}212213static void HUF_setValue(HUF_CElt* elt, size_t value)214{215size_t const nbBits = HUF_getNbBits(*elt);216if (nbBits > 0) {217assert((value >> nbBits) == 0);218*elt |= value << (sizeof(HUF_CElt) * 8 - nbBits);219}220}221222HUF_CTableHeader HUF_readCTableHeader(HUF_CElt const* ctable)223{224HUF_CTableHeader header;225ZSTD_memcpy(&header, ctable, sizeof(header));226return header;227}228229static void HUF_writeCTableHeader(HUF_CElt* ctable, U32 tableLog, U32 maxSymbolValue)230{231HUF_CTableHeader header;232HUF_STATIC_ASSERT(sizeof(ctable[0]) == sizeof(header));233ZSTD_memset(&header, 0, sizeof(header));234assert(tableLog < 256);235header.tableLog = (BYTE)tableLog;236assert(maxSymbolValue < 256);237header.maxSymbolValue = (BYTE)maxSymbolValue;238ZSTD_memcpy(ctable, &header, sizeof(header));239}240241typedef struct {242HUF_CompressWeightsWksp wksp;243BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */244BYTE huffWeight[HUF_SYMBOLVALUE_MAX];245} HUF_WriteCTableWksp;246247size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,248const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog,249void* workspace, size_t workspaceSize)250{251HUF_CElt const* const ct = CTable + 1;252BYTE* op = (BYTE*)dst;253U32 n;254HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));255256HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE >= sizeof(HUF_WriteCTableWksp));257258assert(HUF_readCTableHeader(CTable).maxSymbolValue == maxSymbolValue);259assert(HUF_readCTableHeader(CTable).tableLog == huffLog);260261/* check conditions */262if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);263if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);264265/* convert to weight */266wksp->bitsToWeight[0] = 0;267for (n=1; n<huffLog+1; n++)268wksp->bitsToWeight[n] = (BYTE)(huffLog + 1 - n);269for (n=0; n<maxSymbolValue; n++)270wksp->huffWeight[n] = wksp->bitsToWeight[HUF_getNbBits(ct[n])];271272/* attempt weights compression by FSE */273if (maxDstSize < 1) return ERROR(dstSize_tooSmall);274{ CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, wksp->huffWeight, maxSymbolValue, &wksp->wksp, sizeof(wksp->wksp)) );275if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */276op[0] = (BYTE)hSize;277return hSize+1;278} }279280/* write raw values as 4-bits (max : 15) */281if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */282if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */283op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));284wksp->huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */285for (n=0; n<maxSymbolValue; n+=2)286op[(n/2)+1] = (BYTE)((wksp->huffWeight[n] << 4) + wksp->huffWeight[n+1]);287return ((maxSymbolValue+1)/2) + 1;288}289290291size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights)292{293BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */294U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */295U32 tableLog = 0;296U32 nbSymbols = 0;297HUF_CElt* const ct = CTable + 1;298299/* get symbol weights */300CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));301*hasZeroWeights = (rankVal[0] > 0);302303/* check result */304if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);305if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);306307*maxSymbolValuePtr = nbSymbols - 1;308309HUF_writeCTableHeader(CTable, tableLog, *maxSymbolValuePtr);310311/* Prepare base value per rank */312{ U32 n, nextRankStart = 0;313for (n=1; n<=tableLog; n++) {314U32 curr = nextRankStart;315nextRankStart += (rankVal[n] << (n-1));316rankVal[n] = curr;317} }318319/* fill nbBits */320{ U32 n; for (n=0; n<nbSymbols; n++) {321const U32 w = huffWeight[n];322HUF_setNbBits(ct + n, (BYTE)(tableLog + 1 - w) & -(w != 0));323} }324325/* fill val */326{ U16 nbPerRank[HUF_TABLELOG_MAX+2] = {0}; /* support w=0=>n=tableLog+1 */327U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};328{ U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[HUF_getNbBits(ct[n])]++; }329/* determine stating value per rank */330valPerRank[tableLog+1] = 0; /* for w==0 */331{ U16 min = 0;332U32 n; for (n=tableLog; n>0; n--) { /* start at n=tablelog <-> w=1 */333valPerRank[n] = min; /* get starting value within each rank */334min += nbPerRank[n];335min >>= 1;336} }337/* assign value within rank, symbol order */338{ U32 n; for (n=0; n<nbSymbols; n++) HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); }339}340341return readSize;342}343344U32 HUF_getNbBitsFromCTable(HUF_CElt const* CTable, U32 symbolValue)345{346const HUF_CElt* const ct = CTable + 1;347assert(symbolValue <= HUF_SYMBOLVALUE_MAX);348if (symbolValue > HUF_readCTableHeader(CTable).maxSymbolValue)349return 0;350return (U32)HUF_getNbBits(ct[symbolValue]);351}352353354/**355* HUF_setMaxHeight():356* Try to enforce @targetNbBits on the Huffman tree described in @huffNode.357*358* It attempts to convert all nodes with nbBits > @targetNbBits359* to employ @targetNbBits instead. Then it adjusts the tree360* so that it remains a valid canonical Huffman tree.361*362* @pre The sum of the ranks of each symbol == 2^largestBits,363* where largestBits == huffNode[lastNonNull].nbBits.364* @post The sum of the ranks of each symbol == 2^largestBits,365* where largestBits is the return value (expected <= targetNbBits).366*367* @param huffNode The Huffman tree modified in place to enforce targetNbBits.368* It's presumed sorted, from most frequent to rarest symbol.369* @param lastNonNull The symbol with the lowest count in the Huffman tree.370* @param targetNbBits The allowed number of bits, which the Huffman tree371* may not respect. After this function the Huffman tree will372* respect targetNbBits.373* @return The maximum number of bits of the Huffman tree after adjustment.374*/375static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 targetNbBits)376{377const U32 largestBits = huffNode[lastNonNull].nbBits;378/* early exit : no elt > targetNbBits, so the tree is already valid. */379if (largestBits <= targetNbBits) return largestBits;380381DEBUGLOG(5, "HUF_setMaxHeight (targetNbBits = %u)", targetNbBits);382383/* there are several too large elements (at least >= 2) */384{ int totalCost = 0;385const U32 baseCost = 1 << (largestBits - targetNbBits);386int n = (int)lastNonNull;387388/* Adjust any ranks > targetNbBits to targetNbBits.389* Compute totalCost, which is how far the sum of the ranks is390* we are over 2^largestBits after adjust the offending ranks.391*/392while (huffNode[n].nbBits > targetNbBits) {393totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));394huffNode[n].nbBits = (BYTE)targetNbBits;395n--;396}397/* n stops at huffNode[n].nbBits <= targetNbBits */398assert(huffNode[n].nbBits <= targetNbBits);399/* n end at index of smallest symbol using < targetNbBits */400while (huffNode[n].nbBits == targetNbBits) --n;401402/* renorm totalCost from 2^largestBits to 2^targetNbBits403* note : totalCost is necessarily a multiple of baseCost */404assert(((U32)totalCost & (baseCost - 1)) == 0);405totalCost >>= (largestBits - targetNbBits);406assert(totalCost > 0);407408/* repay normalized cost */409{ U32 const noSymbol = 0xF0F0F0F0;410U32 rankLast[HUF_TABLELOG_MAX+2];411412/* Get pos of last (smallest = lowest cum. count) symbol per rank */413ZSTD_memset(rankLast, 0xF0, sizeof(rankLast));414{ U32 currentNbBits = targetNbBits;415int pos;416for (pos=n ; pos >= 0; pos--) {417if (huffNode[pos].nbBits >= currentNbBits) continue;418currentNbBits = huffNode[pos].nbBits; /* < targetNbBits */419rankLast[targetNbBits-currentNbBits] = (U32)pos;420} }421422while (totalCost > 0) {423/* Try to reduce the next power of 2 above totalCost because we424* gain back half the rank.425*/426U32 nBitsToDecrease = ZSTD_highbit32((U32)totalCost) + 1;427for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {428U32 const highPos = rankLast[nBitsToDecrease];429U32 const lowPos = rankLast[nBitsToDecrease-1];430if (highPos == noSymbol) continue;431/* Decrease highPos if no symbols of lowPos or if it is432* not cheaper to remove 2 lowPos than highPos.433*/434if (lowPos == noSymbol) break;435{ U32 const highTotal = huffNode[highPos].count;436U32 const lowTotal = 2 * huffNode[lowPos].count;437if (highTotal <= lowTotal) break;438} }439/* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */440assert(rankLast[nBitsToDecrease] != noSymbol || nBitsToDecrease == 1);441/* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */442while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))443nBitsToDecrease++;444assert(rankLast[nBitsToDecrease] != noSymbol);445/* Increase the number of bits to gain back half the rank cost. */446totalCost -= 1 << (nBitsToDecrease-1);447huffNode[rankLast[nBitsToDecrease]].nbBits++;448449/* Fix up the new rank.450* If the new rank was empty, this symbol is now its smallest.451* Otherwise, this symbol will be the largest in the new rank so no adjustment.452*/453if (rankLast[nBitsToDecrease-1] == noSymbol)454rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease];455/* Fix up the old rank.456* If the symbol was at position 0, meaning it was the highest weight symbol in the tree,457* it must be the only symbol in its rank, so the old rank now has no symbols.458* Otherwise, since the Huffman nodes are sorted by count, the previous position is now459* the smallest node in the rank. If the previous position belongs to a different rank,460* then the rank is now empty.461*/462if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */463rankLast[nBitsToDecrease] = noSymbol;464else {465rankLast[nBitsToDecrease]--;466if (huffNode[rankLast[nBitsToDecrease]].nbBits != targetNbBits-nBitsToDecrease)467rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */468}469} /* while (totalCost > 0) */470471/* If we've removed too much weight, then we have to add it back.472* To avoid overshooting again, we only adjust the smallest rank.473* We take the largest nodes from the lowest rank 0 and move them474* to rank 1. There's guaranteed to be enough rank 0 symbols because475* TODO.476*/477while (totalCost < 0) { /* Sometimes, cost correction overshoot */478/* special case : no rank 1 symbol (using targetNbBits-1);479* let's create one from largest rank 0 (using targetNbBits).480*/481if (rankLast[1] == noSymbol) {482while (huffNode[n].nbBits == targetNbBits) n--;483huffNode[n+1].nbBits--;484assert(n >= 0);485rankLast[1] = (U32)(n+1);486totalCost++;487continue;488}489huffNode[ rankLast[1] + 1 ].nbBits--;490rankLast[1]++;491totalCost ++;492}493} /* repay normalized cost */494} /* there are several too large elements (at least >= 2) */495496return targetNbBits;497}498499typedef struct {500U16 base;501U16 curr;502} rankPos;503504typedef nodeElt huffNodeTable[2 * (HUF_SYMBOLVALUE_MAX + 1)];505506/* Number of buckets available for HUF_sort() */507#define RANK_POSITION_TABLE_SIZE 192508509typedef struct {510huffNodeTable huffNodeTbl;511rankPos rankPosition[RANK_POSITION_TABLE_SIZE];512} HUF_buildCTable_wksp_tables;513514/* RANK_POSITION_DISTINCT_COUNT_CUTOFF == Cutoff point in HUF_sort() buckets for which we use log2 bucketing.515* Strategy is to use as many buckets as possible for representing distinct516* counts while using the remainder to represent all "large" counts.517*518* To satisfy this requirement for 192 buckets, we can do the following:519* Let buckets 0-166 represent distinct counts of [0, 166]520* Let buckets 166 to 192 represent all remaining counts up to RANK_POSITION_MAX_COUNT_LOG using log2 bucketing.521*/522#define RANK_POSITION_MAX_COUNT_LOG 32523#define RANK_POSITION_LOG_BUCKETS_BEGIN ((RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */)524#define RANK_POSITION_DISTINCT_COUNT_CUTOFF (RANK_POSITION_LOG_BUCKETS_BEGIN + ZSTD_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */)525526/* Return the appropriate bucket index for a given count. See definition of527* RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy.528*/529static U32 HUF_getIndex(U32 const count) {530return (count < RANK_POSITION_DISTINCT_COUNT_CUTOFF)531? count532: ZSTD_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN;533}534535/* Helper swap function for HUF_quickSortPartition() */536static void HUF_swapNodes(nodeElt* a, nodeElt* b) {537nodeElt tmp = *a;538*a = *b;539*b = tmp;540}541542/* Returns 0 if the huffNode array is not sorted by descending count */543MEM_STATIC int HUF_isSorted(nodeElt huffNode[], U32 const maxSymbolValue1) {544U32 i;545for (i = 1; i < maxSymbolValue1; ++i) {546if (huffNode[i].count > huffNode[i-1].count) {547return 0;548}549}550return 1;551}552553/* Insertion sort by descending order */554HINT_INLINE void HUF_insertionSort(nodeElt huffNode[], int const low, int const high) {555int i;556int const size = high-low+1;557huffNode += low;558for (i = 1; i < size; ++i) {559nodeElt const key = huffNode[i];560int j = i - 1;561while (j >= 0 && huffNode[j].count < key.count) {562huffNode[j + 1] = huffNode[j];563j--;564}565huffNode[j + 1] = key;566}567}568569/* Pivot helper function for quicksort. */570static int HUF_quickSortPartition(nodeElt arr[], int const low, int const high) {571/* Simply select rightmost element as pivot. "Better" selectors like572* median-of-three don't experimentally appear to have any benefit.573*/574U32 const pivot = arr[high].count;575int i = low - 1;576int j = low;577for ( ; j < high; j++) {578if (arr[j].count > pivot) {579i++;580HUF_swapNodes(&arr[i], &arr[j]);581}582}583HUF_swapNodes(&arr[i + 1], &arr[high]);584return i + 1;585}586587/* Classic quicksort by descending with partially iterative calls588* to reduce worst case callstack size.589*/590static void HUF_simpleQuickSort(nodeElt arr[], int low, int high) {591int const kInsertionSortThreshold = 8;592if (high - low < kInsertionSortThreshold) {593HUF_insertionSort(arr, low, high);594return;595}596while (low < high) {597int const idx = HUF_quickSortPartition(arr, low, high);598if (idx - low < high - idx) {599HUF_simpleQuickSort(arr, low, idx - 1);600low = idx + 1;601} else {602HUF_simpleQuickSort(arr, idx + 1, high);603high = idx - 1;604}605}606}607608/**609* HUF_sort():610* Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order.611* This is a typical bucket sorting strategy that uses either quicksort or insertion sort to sort each bucket.612*613* @param[out] huffNode Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled.614* Must have (maxSymbolValue + 1) entries.615* @param[in] count Histogram of the symbols.616* @param[in] maxSymbolValue Maximum symbol value.617* @param rankPosition This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries.618*/619static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSymbolValue, rankPos rankPosition[]) {620U32 n;621U32 const maxSymbolValue1 = maxSymbolValue+1;622623/* Compute base and set curr to base.624* For symbol s let lowerRank = HUF_getIndex(count[n]) and rank = lowerRank + 1.625* See HUF_getIndex to see bucketing strategy.626* We attribute each symbol to lowerRank's base value, because we want to know where627* each rank begins in the output, so for rank R we want to count ranks R+1 and above.628*/629ZSTD_memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE);630for (n = 0; n < maxSymbolValue1; ++n) {631U32 lowerRank = HUF_getIndex(count[n]);632assert(lowerRank < RANK_POSITION_TABLE_SIZE - 1);633rankPosition[lowerRank].base++;634}635636assert(rankPosition[RANK_POSITION_TABLE_SIZE - 1].base == 0);637/* Set up the rankPosition table */638for (n = RANK_POSITION_TABLE_SIZE - 1; n > 0; --n) {639rankPosition[n-1].base += rankPosition[n].base;640rankPosition[n-1].curr = rankPosition[n-1].base;641}642643/* Insert each symbol into their appropriate bucket, setting up rankPosition table. */644for (n = 0; n < maxSymbolValue1; ++n) {645U32 const c = count[n];646U32 const r = HUF_getIndex(c) + 1;647U32 const pos = rankPosition[r].curr++;648assert(pos < maxSymbolValue1);649huffNode[pos].count = c;650huffNode[pos].byte = (BYTE)n;651}652653/* Sort each bucket. */654for (n = RANK_POSITION_DISTINCT_COUNT_CUTOFF; n < RANK_POSITION_TABLE_SIZE - 1; ++n) {655int const bucketSize = rankPosition[n].curr - rankPosition[n].base;656U32 const bucketStartIdx = rankPosition[n].base;657if (bucketSize > 1) {658assert(bucketStartIdx < maxSymbolValue1);659HUF_simpleQuickSort(huffNode + bucketStartIdx, 0, bucketSize-1);660}661}662663assert(HUF_isSorted(huffNode, maxSymbolValue1));664}665666667/** HUF_buildCTable_wksp() :668* Same as HUF_buildCTable(), but using externally allocated scratch buffer.669* `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables).670*/671#define STARTNODE (HUF_SYMBOLVALUE_MAX+1)672673/* HUF_buildTree():674* Takes the huffNode array sorted by HUF_sort() and builds an unlimited-depth Huffman tree.675*676* @param huffNode The array sorted by HUF_sort(). Builds the Huffman tree in this array.677* @param maxSymbolValue The maximum symbol value.678* @return The smallest node in the Huffman tree (by count).679*/680static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)681{682nodeElt* const huffNode0 = huffNode - 1;683int nonNullRank;684int lowS, lowN;685int nodeNb = STARTNODE;686int n, nodeRoot;687DEBUGLOG(5, "HUF_buildTree (alphabet size = %u)", maxSymbolValue + 1);688/* init for parents */689nonNullRank = (int)maxSymbolValue;690while(huffNode[nonNullRank].count == 0) nonNullRank--;691lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb;692huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count;693huffNode[lowS].parent = huffNode[lowS-1].parent = (U16)nodeNb;694nodeNb++; lowS-=2;695for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);696huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */697698/* create parents */699while (nodeNb <= nodeRoot) {700int const n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;701int const n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;702huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;703huffNode[n1].parent = huffNode[n2].parent = (U16)nodeNb;704nodeNb++;705}706707/* distribute weights (unlimited tree height) */708huffNode[nodeRoot].nbBits = 0;709for (n=nodeRoot-1; n>=STARTNODE; n--)710huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;711for (n=0; n<=nonNullRank; n++)712huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;713714DEBUGLOG(6, "Initial distribution of bits completed (%zu sorted symbols)", showHNodeBits(huffNode, maxSymbolValue+1));715716return nonNullRank;717}718719/**720* HUF_buildCTableFromTree():721* Build the CTable given the Huffman tree in huffNode.722*723* @param[out] CTable The output Huffman CTable.724* @param huffNode The Huffman tree.725* @param nonNullRank The last and smallest node in the Huffman tree.726* @param maxSymbolValue The maximum symbol value.727* @param maxNbBits The exact maximum number of bits used in the Huffman tree.728*/729static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits)730{731HUF_CElt* const ct = CTable + 1;732/* fill result into ctable (val, nbBits) */733int n;734U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};735U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};736int const alphabetSize = (int)(maxSymbolValue + 1);737for (n=0; n<=nonNullRank; n++)738nbPerRank[huffNode[n].nbBits]++;739/* determine starting value per rank */740{ U16 min = 0;741for (n=(int)maxNbBits; n>0; n--) {742valPerRank[n] = min; /* get starting value within each rank */743min += nbPerRank[n];744min >>= 1;745} }746for (n=0; n<alphabetSize; n++)747HUF_setNbBits(ct + huffNode[n].byte, huffNode[n].nbBits); /* push nbBits per symbol, symbol order */748for (n=0; n<alphabetSize; n++)749HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); /* assign value within rank, symbol order */750751HUF_writeCTableHeader(CTable, maxNbBits, maxSymbolValue);752}753754size_t755HUF_buildCTable_wksp(HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits,756void* workSpace, size_t wkspSize)757{758HUF_buildCTable_wksp_tables* const wksp_tables =759(HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(U32));760nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;761nodeElt* const huffNode = huffNode0+1;762int nonNullRank;763764HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE == sizeof(HUF_buildCTable_wksp_tables));765766DEBUGLOG(5, "HUF_buildCTable_wksp (alphabet size = %u)", maxSymbolValue+1);767768/* safety checks */769if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))770return ERROR(workSpace_tooSmall);771if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;772if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)773return ERROR(maxSymbolValue_tooLarge);774ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable));775776/* sort, decreasing order */777HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);778DEBUGLOG(6, "sorted symbols completed (%zu symbols)", showHNodeSymbols(huffNode, maxSymbolValue+1));779780/* build tree */781nonNullRank = HUF_buildTree(huffNode, maxSymbolValue);782783/* determine and enforce maxTableLog */784maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);785if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */786787HUF_buildCTableFromTree(CTable, huffNode, nonNullRank, maxSymbolValue, maxNbBits);788789return maxNbBits;790}791792size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)793{794HUF_CElt const* ct = CTable + 1;795size_t nbBits = 0;796int s;797for (s = 0; s <= (int)maxSymbolValue; ++s) {798nbBits += HUF_getNbBits(ct[s]) * count[s];799}800return nbBits >> 3;801}802803int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {804HUF_CTableHeader header = HUF_readCTableHeader(CTable);805HUF_CElt const* ct = CTable + 1;806int bad = 0;807int s;808809assert(header.tableLog <= HUF_TABLELOG_ABSOLUTEMAX);810811if (header.maxSymbolValue < maxSymbolValue)812return 0;813814for (s = 0; s <= (int)maxSymbolValue; ++s) {815bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0);816}817return !bad;818}819820size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }821822/** HUF_CStream_t:823* Huffman uses its own BIT_CStream_t implementation.824* There are three major differences from BIT_CStream_t:825* 1. HUF_addBits() takes a HUF_CElt (size_t) which is826* the pair (nbBits, value) in the format:827* format:828* - Bits [0, 4) = nbBits829* - Bits [4, 64 - nbBits) = 0830* - Bits [64 - nbBits, 64) = value831* 2. The bitContainer is built from the upper bits and832* right shifted. E.g. to add a new value of N bits833* you right shift the bitContainer by N, then or in834* the new value into the N upper bits.835* 3. The bitstream has two bit containers. You can add836* bits to the second container and merge them into837* the first container.838*/839840#define HUF_BITS_IN_CONTAINER (sizeof(size_t) * 8)841842typedef struct {843size_t bitContainer[2];844size_t bitPos[2];845846BYTE* startPtr;847BYTE* ptr;848BYTE* endPtr;849} HUF_CStream_t;850851/**! HUF_initCStream():852* Initializes the bitstream.853* @returns 0 or an error code.854*/855static size_t HUF_initCStream(HUF_CStream_t* bitC,856void* startPtr, size_t dstCapacity)857{858ZSTD_memset(bitC, 0, sizeof(*bitC));859bitC->startPtr = (BYTE*)startPtr;860bitC->ptr = bitC->startPtr;861bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer[0]);862if (dstCapacity <= sizeof(bitC->bitContainer[0])) return ERROR(dstSize_tooSmall);863return 0;864}865866/*! HUF_addBits():867* Adds the symbol stored in HUF_CElt elt to the bitstream.868*869* @param elt The element we're adding. This is a (nbBits, value) pair.870* See the HUF_CStream_t docs for the format.871* @param idx Insert into the bitstream at this idx.872* @param kFast This is a template parameter. If the bitstream is guaranteed873* to have at least 4 unused bits after this call it may be 1,874* otherwise it must be 0. HUF_addBits() is faster when fast is set.875*/876FORCE_INLINE_TEMPLATE void HUF_addBits(HUF_CStream_t* bitC, HUF_CElt elt, int idx, int kFast)877{878assert(idx <= 1);879assert(HUF_getNbBits(elt) <= HUF_TABLELOG_ABSOLUTEMAX);880/* This is efficient on x86-64 with BMI2 because shrx881* only reads the low 6 bits of the register. The compiler882* knows this and elides the mask. When fast is set,883* every operation can use the same value loaded from elt.884*/885bitC->bitContainer[idx] >>= HUF_getNbBits(elt);886bitC->bitContainer[idx] |= kFast ? HUF_getValueFast(elt) : HUF_getValue(elt);887/* We only read the low 8 bits of bitC->bitPos[idx] so it888* doesn't matter that the high bits have noise from the value.889*/890bitC->bitPos[idx] += HUF_getNbBitsFast(elt);891assert((bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);892/* The last 4-bits of elt are dirty if fast is set,893* so we must not be overwriting bits that have already been894* inserted into the bit container.895*/896#if DEBUGLEVEL >= 1897{898size_t const nbBits = HUF_getNbBits(elt);899size_t const dirtyBits = nbBits == 0 ? 0 : ZSTD_highbit32((U32)nbBits) + 1;900(void)dirtyBits;901/* Middle bits are 0. */902assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0);903/* We didn't overwrite any bits in the bit container. */904assert(!kFast || (bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);905(void)dirtyBits;906}907#endif908}909910FORCE_INLINE_TEMPLATE void HUF_zeroIndex1(HUF_CStream_t* bitC)911{912bitC->bitContainer[1] = 0;913bitC->bitPos[1] = 0;914}915916/*! HUF_mergeIndex1() :917* Merges the bit container @ index 1 into the bit container @ index 0918* and zeros the bit container @ index 1.919*/920FORCE_INLINE_TEMPLATE void HUF_mergeIndex1(HUF_CStream_t* bitC)921{922assert((bitC->bitPos[1] & 0xFF) < HUF_BITS_IN_CONTAINER);923bitC->bitContainer[0] >>= (bitC->bitPos[1] & 0xFF);924bitC->bitContainer[0] |= bitC->bitContainer[1];925bitC->bitPos[0] += bitC->bitPos[1];926assert((bitC->bitPos[0] & 0xFF) <= HUF_BITS_IN_CONTAINER);927}928929/*! HUF_flushBits() :930* Flushes the bits in the bit container @ index 0.931*932* @post bitPos will be < 8.933* @param kFast If kFast is set then we must know a-priori that934* the bit container will not overflow.935*/936FORCE_INLINE_TEMPLATE void HUF_flushBits(HUF_CStream_t* bitC, int kFast)937{938/* The upper bits of bitPos are noisy, so we must mask by 0xFF. */939size_t const nbBits = bitC->bitPos[0] & 0xFF;940size_t const nbBytes = nbBits >> 3;941/* The top nbBits bits of bitContainer are the ones we need. */942size_t const bitContainer = bitC->bitContainer[0] >> (HUF_BITS_IN_CONTAINER - nbBits);943/* Mask bitPos to account for the bytes we consumed. */944bitC->bitPos[0] &= 7;945assert(nbBits > 0);946assert(nbBits <= sizeof(bitC->bitContainer[0]) * 8);947assert(bitC->ptr <= bitC->endPtr);948MEM_writeLEST(bitC->ptr, bitContainer);949bitC->ptr += nbBytes;950assert(!kFast || bitC->ptr <= bitC->endPtr);951if (!kFast && bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;952/* bitContainer doesn't need to be modified because the leftover953* bits are already the top bitPos bits. And we don't care about954* noise in the lower values.955*/956}957958/*! HUF_endMark()959* @returns The Huffman stream end mark: A 1-bit value = 1.960*/961static HUF_CElt HUF_endMark(void)962{963HUF_CElt endMark;964HUF_setNbBits(&endMark, 1);965HUF_setValue(&endMark, 1);966return endMark;967}968969/*! HUF_closeCStream() :970* @return Size of CStream, in bytes,971* or 0 if it could not fit into dstBuffer */972static size_t HUF_closeCStream(HUF_CStream_t* bitC)973{974HUF_addBits(bitC, HUF_endMark(), /* idx */ 0, /* kFast */ 0);975HUF_flushBits(bitC, /* kFast */ 0);976{977size_t const nbBits = bitC->bitPos[0] & 0xFF;978if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */979return (size_t)(bitC->ptr - bitC->startPtr) + (nbBits > 0);980}981}982983FORCE_INLINE_TEMPLATE void984HUF_encodeSymbol(HUF_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable, int idx, int fast)985{986HUF_addBits(bitCPtr, CTable[symbol], idx, fast);987}988989FORCE_INLINE_TEMPLATE void990HUF_compress1X_usingCTable_internal_body_loop(HUF_CStream_t* bitC,991const BYTE* ip, size_t srcSize,992const HUF_CElt* ct,993int kUnroll, int kFastFlush, int kLastFast)994{995/* Join to kUnroll */996int n = (int)srcSize;997int rem = n % kUnroll;998if (rem > 0) {999for (; rem > 0; --rem) {1000HUF_encodeSymbol(bitC, ip[--n], ct, 0, /* fast */ 0);1001}1002HUF_flushBits(bitC, kFastFlush);1003}1004assert(n % kUnroll == 0);10051006/* Join to 2 * kUnroll */1007if (n % (2 * kUnroll)) {1008int u;1009for (u = 1; u < kUnroll; ++u) {1010HUF_encodeSymbol(bitC, ip[n - u], ct, 0, 1);1011}1012HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, 0, kLastFast);1013HUF_flushBits(bitC, kFastFlush);1014n -= kUnroll;1015}1016assert(n % (2 * kUnroll) == 0);10171018for (; n>0; n-= 2 * kUnroll) {1019/* Encode kUnroll symbols into the bitstream @ index 0. */1020int u;1021for (u = 1; u < kUnroll; ++u) {1022HUF_encodeSymbol(bitC, ip[n - u], ct, /* idx */ 0, /* fast */ 1);1023}1024HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, /* idx */ 0, /* fast */ kLastFast);1025HUF_flushBits(bitC, kFastFlush);1026/* Encode kUnroll symbols into the bitstream @ index 1.1027* This allows us to start filling the bit container1028* without any data dependencies.1029*/1030HUF_zeroIndex1(bitC);1031for (u = 1; u < kUnroll; ++u) {1032HUF_encodeSymbol(bitC, ip[n - kUnroll - u], ct, /* idx */ 1, /* fast */ 1);1033}1034HUF_encodeSymbol(bitC, ip[n - kUnroll - kUnroll], ct, /* idx */ 1, /* fast */ kLastFast);1035/* Merge bitstream @ index 1 into the bitstream @ index 0 */1036HUF_mergeIndex1(bitC);1037HUF_flushBits(bitC, kFastFlush);1038}1039assert(n == 0);10401041}10421043/**1044* Returns a tight upper bound on the output space needed by Huffman1045* with 8 bytes buffer to handle over-writes. If the output is at least1046* this large we don't need to do bounds checks during Huffman encoding.1047*/1048static size_t HUF_tightCompressBound(size_t srcSize, size_t tableLog)1049{1050return ((srcSize * tableLog) >> 3) + 8;1051}105210531054FORCE_INLINE_TEMPLATE size_t1055HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,1056const void* src, size_t srcSize,1057const HUF_CElt* CTable)1058{1059U32 const tableLog = HUF_readCTableHeader(CTable).tableLog;1060HUF_CElt const* ct = CTable + 1;1061const BYTE* ip = (const BYTE*) src;1062BYTE* const ostart = (BYTE*)dst;1063BYTE* const oend = ostart + dstSize;1064HUF_CStream_t bitC;10651066/* init */1067if (dstSize < 8) return 0; /* not enough space to compress */1068{ BYTE* op = ostart;1069size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op));1070if (HUF_isError(initErr)) return 0; }10711072if (dstSize < HUF_tightCompressBound(srcSize, (size_t)tableLog) || tableLog > 11)1073HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ MEM_32bits() ? 2 : 4, /* kFast */ 0, /* kLastFast */ 0);1074else {1075if (MEM_32bits()) {1076switch (tableLog) {1077case 11:1078HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 0);1079break;1080case 10: ZSTD_FALLTHROUGH;1081case 9: ZSTD_FALLTHROUGH;1082case 8:1083HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 1);1084break;1085case 7: ZSTD_FALLTHROUGH;1086default:1087HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 3, /* kFastFlush */ 1, /* kLastFast */ 1);1088break;1089}1090} else {1091switch (tableLog) {1092case 11:1093HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 0);1094break;1095case 10:1096HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 1);1097break;1098case 9:1099HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 6, /* kFastFlush */ 1, /* kLastFast */ 0);1100break;1101case 8:1102HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 7, /* kFastFlush */ 1, /* kLastFast */ 0);1103break;1104case 7:1105HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 8, /* kFastFlush */ 1, /* kLastFast */ 0);1106break;1107case 6: ZSTD_FALLTHROUGH;1108default:1109HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 9, /* kFastFlush */ 1, /* kLastFast */ 1);1110break;1111}1112}1113}1114assert(bitC.ptr <= bitC.endPtr);11151116return HUF_closeCStream(&bitC);1117}11181119#if DYNAMIC_BMI211201121static BMI2_TARGET_ATTRIBUTE size_t1122HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,1123const void* src, size_t srcSize,1124const HUF_CElt* CTable)1125{1126return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);1127}11281129static size_t1130HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,1131const void* src, size_t srcSize,1132const HUF_CElt* CTable)1133{1134return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);1135}11361137static size_t1138HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,1139const void* src, size_t srcSize,1140const HUF_CElt* CTable, const int flags)1141{1142if (flags & HUF_flags_bmi2) {1143return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);1144}1145return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable);1146}11471148#else11491150static size_t1151HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,1152const void* src, size_t srcSize,1153const HUF_CElt* CTable, const int flags)1154{1155(void)flags;1156return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);1157}11581159#endif11601161size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags)1162{1163return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags);1164}11651166static size_t1167HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,1168const void* src, size_t srcSize,1169const HUF_CElt* CTable, int flags)1170{1171size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */1172const BYTE* ip = (const BYTE*) src;1173const BYTE* const iend = ip + srcSize;1174BYTE* const ostart = (BYTE*) dst;1175BYTE* const oend = ostart + dstSize;1176BYTE* op = ostart;11771178if (dstSize < 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */1179if (srcSize < 12) return 0; /* no saving possible : too small input */1180op += 6; /* jumpTable */11811182assert(op <= oend);1183{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );1184if (cSize == 0 || cSize > 65535) return 0;1185MEM_writeLE16(ostart, (U16)cSize);1186op += cSize;1187}11881189ip += segmentSize;1190assert(op <= oend);1191{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );1192if (cSize == 0 || cSize > 65535) return 0;1193MEM_writeLE16(ostart+2, (U16)cSize);1194op += cSize;1195}11961197ip += segmentSize;1198assert(op <= oend);1199{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );1200if (cSize == 0 || cSize > 65535) return 0;1201MEM_writeLE16(ostart+4, (U16)cSize);1202op += cSize;1203}12041205ip += segmentSize;1206assert(op <= oend);1207assert(ip <= iend);1208{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, flags) );1209if (cSize == 0 || cSize > 65535) return 0;1210op += cSize;1211}12121213return (size_t)(op-ostart);1214}12151216size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags)1217{1218return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags);1219}12201221typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;12221223static size_t HUF_compressCTable_internal(1224BYTE* const ostart, BYTE* op, BYTE* const oend,1225const void* src, size_t srcSize,1226HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int flags)1227{1228size_t const cSize = (nbStreams==HUF_singleStream) ?1229HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags) :1230HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags);1231if (HUF_isError(cSize)) { return cSize; }1232if (cSize==0) { return 0; } /* uncompressible */1233op += cSize;1234/* check compressibility */1235assert(op >= ostart);1236if ((size_t)(op-ostart) >= srcSize-1) { return 0; }1237return (size_t)(op-ostart);1238}12391240typedef struct {1241unsigned count[HUF_SYMBOLVALUE_MAX + 1];1242HUF_CElt CTable[HUF_CTABLE_SIZE_ST(HUF_SYMBOLVALUE_MAX)];1243union {1244HUF_buildCTable_wksp_tables buildCTable_wksp;1245HUF_WriteCTableWksp writeCTable_wksp;1246U32 hist_wksp[HIST_WKSP_SIZE_U32];1247} wksps;1248} HUF_compress_tables_t;12491250#define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 40961251#define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10 /* Must be >= 2 */12521253unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue)1254{1255unsigned cardinality = 0;1256unsigned i;12571258for (i = 0; i < maxSymbolValue + 1; i++) {1259if (count[i] != 0) cardinality += 1;1260}12611262return cardinality;1263}12641265unsigned HUF_minTableLog(unsigned symbolCardinality)1266{1267U32 minBitsSymbols = ZSTD_highbit32(symbolCardinality) + 1;1268return minBitsSymbols;1269}12701271unsigned HUF_optimalTableLog(1272unsigned maxTableLog,1273size_t srcSize,1274unsigned maxSymbolValue,1275void* workSpace, size_t wkspSize,1276HUF_CElt* table,1277const unsigned* count,1278int flags)1279{1280assert(srcSize > 1); /* Not supported, RLE should be used instead */1281assert(wkspSize >= sizeof(HUF_buildCTable_wksp_tables));12821283if (!(flags & HUF_flags_optimalDepth)) {1284/* cheap evaluation, based on FSE */1285return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);1286}12871288{ BYTE* dst = (BYTE*)workSpace + sizeof(HUF_WriteCTableWksp);1289size_t dstSize = wkspSize - sizeof(HUF_WriteCTableWksp);1290size_t hSize, newSize;1291const unsigned symbolCardinality = HUF_cardinality(count, maxSymbolValue);1292const unsigned minTableLog = HUF_minTableLog(symbolCardinality);1293size_t optSize = ((size_t) ~0) - 1;1294unsigned optLog = maxTableLog, optLogGuess;12951296DEBUGLOG(6, "HUF_optimalTableLog: probing huf depth (srcSize=%zu)", srcSize);12971298/* Search until size increases */1299for (optLogGuess = minTableLog; optLogGuess <= maxTableLog; optLogGuess++) {1300DEBUGLOG(7, "checking for huffLog=%u", optLogGuess);13011302{ size_t maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, optLogGuess, workSpace, wkspSize);1303if (ERR_isError(maxBits)) continue;13041305if (maxBits < optLogGuess && optLogGuess > minTableLog) break;13061307hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, workSpace, wkspSize);1308}13091310if (ERR_isError(hSize)) continue;13111312newSize = HUF_estimateCompressedSize(table, count, maxSymbolValue) + hSize;13131314if (newSize > optSize + 1) {1315break;1316}13171318if (newSize < optSize) {1319optSize = newSize;1320optLog = optLogGuess;1321}1322}1323assert(optLog <= HUF_TABLELOG_MAX);1324return optLog;1325}1326}13271328/* HUF_compress_internal() :1329* `workSpace_align4` must be aligned on 4-bytes boundaries,1330* and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */1331static size_t1332HUF_compress_internal (void* dst, size_t dstSize,1333const void* src, size_t srcSize,1334unsigned maxSymbolValue, unsigned huffLog,1335HUF_nbStreams_e nbStreams,1336void* workSpace, size_t wkspSize,1337HUF_CElt* oldHufTable, HUF_repeat* repeat, int flags)1338{1339HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(size_t));1340BYTE* const ostart = (BYTE*)dst;1341BYTE* const oend = ostart + dstSize;1342BYTE* op = ostart;13431344DEBUGLOG(5, "HUF_compress_internal (srcSize=%zu)", srcSize);1345HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE);13461347/* checks & inits */1348if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall);1349if (!srcSize) return 0; /* Uncompressed */1350if (!dstSize) return 0; /* cannot fit anything within dst budget */1351if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */1352if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);1353if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);1354if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX;1355if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;13561357/* Heuristic : If old table is valid, use it for small inputs */1358if ((flags & HUF_flags_preferRepeat) && repeat && *repeat == HUF_repeat_valid) {1359return HUF_compressCTable_internal(ostart, op, oend,1360src, srcSize,1361nbStreams, oldHufTable, flags);1362}13631364/* If uncompressible data is suspected, do a smaller sampling first */1365DEBUG_STATIC_ASSERT(SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO >= 2);1366if ((flags & HUF_flags_suspectUncompressible) && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) {1367size_t largestTotal = 0;1368DEBUGLOG(5, "input suspected incompressible : sampling to check");1369{ unsigned maxSymbolValueBegin = maxSymbolValue;1370CHECK_V_F(largestBegin, HIST_count_simple (table->count, &maxSymbolValueBegin, (const BYTE*)src, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );1371largestTotal += largestBegin;1372}1373{ unsigned maxSymbolValueEnd = maxSymbolValue;1374CHECK_V_F(largestEnd, HIST_count_simple (table->count, &maxSymbolValueEnd, (const BYTE*)src + srcSize - SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );1375largestTotal += largestEnd;1376}1377if (largestTotal <= ((2 * SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) >> 7)+4) return 0; /* heuristic : probably not compressible enough */1378}13791380/* Scan input and build symbol stats */1381{ CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->wksps.hist_wksp, sizeof(table->wksps.hist_wksp)) );1382if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */1383if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */1384}1385DEBUGLOG(6, "histogram detail completed (%zu symbols)", showU32(table->count, maxSymbolValue+1));13861387/* Check validity of previous table */1388if ( repeat1389&& *repeat == HUF_repeat_check1390&& !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) {1391*repeat = HUF_repeat_none;1392}1393/* Heuristic : use existing table for small inputs */1394if ((flags & HUF_flags_preferRepeat) && repeat && *repeat != HUF_repeat_none) {1395return HUF_compressCTable_internal(ostart, op, oend,1396src, srcSize,1397nbStreams, oldHufTable, flags);1398}13991400/* Build Huffman Tree */1401huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue, &table->wksps, sizeof(table->wksps), table->CTable, table->count, flags);1402{ size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,1403maxSymbolValue, huffLog,1404&table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp));1405CHECK_F(maxBits);1406huffLog = (U32)maxBits;1407DEBUGLOG(6, "bit distribution completed (%zu symbols)", showCTableBits(table->CTable + 1, maxSymbolValue+1));1408}14091410/* Write table description header */1411{ CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog,1412&table->wksps.writeCTable_wksp, sizeof(table->wksps.writeCTable_wksp)) );1413/* Check if using previous huffman table is beneficial */1414if (repeat && *repeat != HUF_repeat_none) {1415size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);1416size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue);1417if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {1418return HUF_compressCTable_internal(ostart, op, oend,1419src, srcSize,1420nbStreams, oldHufTable, flags);1421} }14221423/* Use the new huffman table */1424if (hSize + 12ul >= srcSize) { return 0; }1425op += hSize;1426if (repeat) { *repeat = HUF_repeat_none; }1427if (oldHufTable)1428ZSTD_memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */1429}1430return HUF_compressCTable_internal(ostart, op, oend,1431src, srcSize,1432nbStreams, table->CTable, flags);1433}14341435size_t HUF_compress1X_repeat (void* dst, size_t dstSize,1436const void* src, size_t srcSize,1437unsigned maxSymbolValue, unsigned huffLog,1438void* workSpace, size_t wkspSize,1439HUF_CElt* hufTable, HUF_repeat* repeat, int flags)1440{1441DEBUGLOG(5, "HUF_compress1X_repeat (srcSize = %zu)", srcSize);1442return HUF_compress_internal(dst, dstSize, src, srcSize,1443maxSymbolValue, huffLog, HUF_singleStream,1444workSpace, wkspSize, hufTable,1445repeat, flags);1446}14471448/* HUF_compress4X_repeat():1449* compress input using 4 streams.1450* consider skipping quickly1451* reuse an existing huffman compression table */1452size_t HUF_compress4X_repeat (void* dst, size_t dstSize,1453const void* src, size_t srcSize,1454unsigned maxSymbolValue, unsigned huffLog,1455void* workSpace, size_t wkspSize,1456HUF_CElt* hufTable, HUF_repeat* repeat, int flags)1457{1458DEBUGLOG(5, "HUF_compress4X_repeat (srcSize = %zu)", srcSize);1459return HUF_compress_internal(dst, dstSize, src, srcSize,1460maxSymbolValue, huffLog, HUF_fourStreams,1461workSpace, wkspSize,1462hufTable, repeat, flags);1463}146414651466