Path: blob/master/Utilities/cmzstd/lib/compress/huf_compress.c
3158 views
/* ******************************************************************1* Huffman encoder, part of New Generation Entropy library2* Copyright (c) Meta Platforms, Inc. and affiliates.3*4* You can contact the author at :5* - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy6* - Public forum : https://groups.google.com/forum/#!forum/lz4c7*8* This source code is licensed under both the BSD-style license (found in the9* LICENSE file in the root directory of this source tree) and the GPLv2 (found10* in the COPYING file in the root directory of this source tree).11* You may select, at your option, one of the above-listed licenses.12****************************************************************** */1314/* **************************************************************15* Compiler specifics16****************************************************************/17#ifdef _MSC_VER /* Visual Studio */18# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */19#endif202122/* **************************************************************23* Includes24****************************************************************/25#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */26#include "../common/compiler.h"27#include "../common/bitstream.h"28#include "hist.h"29#define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */30#include "../common/fse.h" /* header compression */31#include "../common/huf.h"32#include "../common/error_private.h"33#include "../common/bits.h" /* ZSTD_highbit32 */343536/* **************************************************************37* Error Management38****************************************************************/39#define HUF_isError ERR_isError40#define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */414243/* **************************************************************44* Required declarations45****************************************************************/46typedef struct nodeElt_s {47U32 count;48U16 parent;49BYTE byte;50BYTE nbBits;51} nodeElt;525354/* **************************************************************55* Debug Traces56****************************************************************/5758#if DEBUGLEVEL >= 25960static size_t showU32(const U32* arr, size_t size)61{62size_t u;63for (u=0; u<size; u++) {64RAWLOG(6, " %u", arr[u]); (void)arr;65}66RAWLOG(6, " \n");67return size;68}6970static size_t HUF_getNbBits(HUF_CElt elt);7172static size_t showCTableBits(const HUF_CElt* ctable, size_t size)73{74size_t u;75for (u=0; u<size; u++) {76RAWLOG(6, " %zu", HUF_getNbBits(ctable[u])); (void)ctable;77}78RAWLOG(6, " \n");79return size;8081}8283static size_t showHNodeSymbols(const nodeElt* hnode, size_t size)84{85size_t u;86for (u=0; u<size; u++) {87RAWLOG(6, " %u", hnode[u].byte); (void)hnode;88}89RAWLOG(6, " \n");90return size;91}9293static size_t showHNodeBits(const nodeElt* hnode, size_t size)94{95size_t u;96for (u=0; u<size; u++) {97RAWLOG(6, " %u", hnode[u].nbBits); (void)hnode;98}99RAWLOG(6, " \n");100return size;101}102103#endif104105106/* *******************************************************107* HUF : Huffman block compression108*********************************************************/109#define HUF_WORKSPACE_MAX_ALIGNMENT 8110111static void* HUF_alignUpWorkspace(void* workspace, size_t* workspaceSizePtr, size_t align)112{113size_t const mask = align - 1;114size_t const rem = (size_t)workspace & mask;115size_t const add = (align - rem) & mask;116BYTE* const aligned = (BYTE*)workspace + add;117assert((align & (align - 1)) == 0); /* pow 2 */118assert(align <= HUF_WORKSPACE_MAX_ALIGNMENT);119if (*workspaceSizePtr >= add) {120assert(add < align);121assert(((size_t)aligned & mask) == 0);122*workspaceSizePtr -= add;123return aligned;124} else {125*workspaceSizePtr = 0;126return NULL;127}128}129130131/* HUF_compressWeights() :132* Same as FSE_compress(), but dedicated to huff0's weights compression.133* The use case needs much less stack memory.134* Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.135*/136#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6137138typedef struct {139FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];140U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)];141unsigned count[HUF_TABLELOG_MAX+1];142S16 norm[HUF_TABLELOG_MAX+1];143} HUF_CompressWeightsWksp;144145static size_t146HUF_compressWeights(void* dst, size_t dstSize,147const void* weightTable, size_t wtSize,148void* workspace, size_t workspaceSize)149{150BYTE* const ostart = (BYTE*) dst;151BYTE* op = ostart;152BYTE* const oend = ostart + dstSize;153154unsigned maxSymbolValue = HUF_TABLELOG_MAX;155U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;156HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));157158if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC);159160/* init conditions */161if (wtSize <= 1) return 0; /* Not compressible */162163/* Scan input and build symbol stats */164{ unsigned const maxCount = HIST_count_simple(wksp->count, &maxSymbolValue, weightTable, wtSize); /* never fails */165if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */166if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */167}168169tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);170CHECK_F( FSE_normalizeCount(wksp->norm, tableLog, wksp->count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) );171172/* Write table description header */173{ CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), wksp->norm, maxSymbolValue, tableLog) );174op += hSize;175}176177/* Compress */178CHECK_F( FSE_buildCTable_wksp(wksp->CTable, wksp->norm, maxSymbolValue, tableLog, wksp->scratchBuffer, sizeof(wksp->scratchBuffer)) );179{ CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, wksp->CTable) );180if (cSize == 0) return 0; /* not enough space for compressed data */181op += cSize;182}183184return (size_t)(op-ostart);185}186187static size_t HUF_getNbBits(HUF_CElt elt)188{189return elt & 0xFF;190}191192static size_t HUF_getNbBitsFast(HUF_CElt elt)193{194return elt;195}196197static size_t HUF_getValue(HUF_CElt elt)198{199return elt & ~(size_t)0xFF;200}201202static size_t HUF_getValueFast(HUF_CElt elt)203{204return elt;205}206207static void HUF_setNbBits(HUF_CElt* elt, size_t nbBits)208{209assert(nbBits <= HUF_TABLELOG_ABSOLUTEMAX);210*elt = nbBits;211}212213static void HUF_setValue(HUF_CElt* elt, size_t value)214{215size_t const nbBits = HUF_getNbBits(*elt);216if (nbBits > 0) {217assert((value >> nbBits) == 0);218*elt |= value << (sizeof(HUF_CElt) * 8 - nbBits);219}220}221222typedef struct {223HUF_CompressWeightsWksp wksp;224BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */225BYTE huffWeight[HUF_SYMBOLVALUE_MAX];226} HUF_WriteCTableWksp;227228size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,229const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog,230void* workspace, size_t workspaceSize)231{232HUF_CElt const* const ct = CTable + 1;233BYTE* op = (BYTE*)dst;234U32 n;235HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));236237HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE >= sizeof(HUF_WriteCTableWksp));238239/* check conditions */240if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);241if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);242243/* convert to weight */244wksp->bitsToWeight[0] = 0;245for (n=1; n<huffLog+1; n++)246wksp->bitsToWeight[n] = (BYTE)(huffLog + 1 - n);247for (n=0; n<maxSymbolValue; n++)248wksp->huffWeight[n] = wksp->bitsToWeight[HUF_getNbBits(ct[n])];249250/* attempt weights compression by FSE */251if (maxDstSize < 1) return ERROR(dstSize_tooSmall);252{ CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, wksp->huffWeight, maxSymbolValue, &wksp->wksp, sizeof(wksp->wksp)) );253if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */254op[0] = (BYTE)hSize;255return hSize+1;256} }257258/* write raw values as 4-bits (max : 15) */259if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */260if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */261op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));262wksp->huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */263for (n=0; n<maxSymbolValue; n+=2)264op[(n/2)+1] = (BYTE)((wksp->huffWeight[n] << 4) + wksp->huffWeight[n+1]);265return ((maxSymbolValue+1)/2) + 1;266}267268269size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights)270{271BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */272U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */273U32 tableLog = 0;274U32 nbSymbols = 0;275HUF_CElt* const ct = CTable + 1;276277/* get symbol weights */278CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));279*hasZeroWeights = (rankVal[0] > 0);280281/* check result */282if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);283if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);284285CTable[0] = tableLog;286287/* Prepare base value per rank */288{ U32 n, nextRankStart = 0;289for (n=1; n<=tableLog; n++) {290U32 curr = nextRankStart;291nextRankStart += (rankVal[n] << (n-1));292rankVal[n] = curr;293} }294295/* fill nbBits */296{ U32 n; for (n=0; n<nbSymbols; n++) {297const U32 w = huffWeight[n];298HUF_setNbBits(ct + n, (BYTE)(tableLog + 1 - w) & -(w != 0));299} }300301/* fill val */302{ U16 nbPerRank[HUF_TABLELOG_MAX+2] = {0}; /* support w=0=>n=tableLog+1 */303U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};304{ U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[HUF_getNbBits(ct[n])]++; }305/* determine stating value per rank */306valPerRank[tableLog+1] = 0; /* for w==0 */307{ U16 min = 0;308U32 n; for (n=tableLog; n>0; n--) { /* start at n=tablelog <-> w=1 */309valPerRank[n] = min; /* get starting value within each rank */310min += nbPerRank[n];311min >>= 1;312} }313/* assign value within rank, symbol order */314{ U32 n; for (n=0; n<nbSymbols; n++) HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); }315}316317*maxSymbolValuePtr = nbSymbols - 1;318return readSize;319}320321U32 HUF_getNbBitsFromCTable(HUF_CElt const* CTable, U32 symbolValue)322{323const HUF_CElt* const ct = CTable + 1;324assert(symbolValue <= HUF_SYMBOLVALUE_MAX);325return (U32)HUF_getNbBits(ct[symbolValue]);326}327328329/**330* HUF_setMaxHeight():331* Try to enforce @targetNbBits on the Huffman tree described in @huffNode.332*333* It attempts to convert all nodes with nbBits > @targetNbBits334* to employ @targetNbBits instead. Then it adjusts the tree335* so that it remains a valid canonical Huffman tree.336*337* @pre The sum of the ranks of each symbol == 2^largestBits,338* where largestBits == huffNode[lastNonNull].nbBits.339* @post The sum of the ranks of each symbol == 2^largestBits,340* where largestBits is the return value (expected <= targetNbBits).341*342* @param huffNode The Huffman tree modified in place to enforce targetNbBits.343* It's presumed sorted, from most frequent to rarest symbol.344* @param lastNonNull The symbol with the lowest count in the Huffman tree.345* @param targetNbBits The allowed number of bits, which the Huffman tree346* may not respect. After this function the Huffman tree will347* respect targetNbBits.348* @return The maximum number of bits of the Huffman tree after adjustment.349*/350static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 targetNbBits)351{352const U32 largestBits = huffNode[lastNonNull].nbBits;353/* early exit : no elt > targetNbBits, so the tree is already valid. */354if (largestBits <= targetNbBits) return largestBits;355356DEBUGLOG(5, "HUF_setMaxHeight (targetNbBits = %u)", targetNbBits);357358/* there are several too large elements (at least >= 2) */359{ int totalCost = 0;360const U32 baseCost = 1 << (largestBits - targetNbBits);361int n = (int)lastNonNull;362363/* Adjust any ranks > targetNbBits to targetNbBits.364* Compute totalCost, which is how far the sum of the ranks is365* we are over 2^largestBits after adjust the offending ranks.366*/367while (huffNode[n].nbBits > targetNbBits) {368totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));369huffNode[n].nbBits = (BYTE)targetNbBits;370n--;371}372/* n stops at huffNode[n].nbBits <= targetNbBits */373assert(huffNode[n].nbBits <= targetNbBits);374/* n end at index of smallest symbol using < targetNbBits */375while (huffNode[n].nbBits == targetNbBits) --n;376377/* renorm totalCost from 2^largestBits to 2^targetNbBits378* note : totalCost is necessarily a multiple of baseCost */379assert(((U32)totalCost & (baseCost - 1)) == 0);380totalCost >>= (largestBits - targetNbBits);381assert(totalCost > 0);382383/* repay normalized cost */384{ U32 const noSymbol = 0xF0F0F0F0;385U32 rankLast[HUF_TABLELOG_MAX+2];386387/* Get pos of last (smallest = lowest cum. count) symbol per rank */388ZSTD_memset(rankLast, 0xF0, sizeof(rankLast));389{ U32 currentNbBits = targetNbBits;390int pos;391for (pos=n ; pos >= 0; pos--) {392if (huffNode[pos].nbBits >= currentNbBits) continue;393currentNbBits = huffNode[pos].nbBits; /* < targetNbBits */394rankLast[targetNbBits-currentNbBits] = (U32)pos;395} }396397while (totalCost > 0) {398/* Try to reduce the next power of 2 above totalCost because we399* gain back half the rank.400*/401U32 nBitsToDecrease = ZSTD_highbit32((U32)totalCost) + 1;402for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {403U32 const highPos = rankLast[nBitsToDecrease];404U32 const lowPos = rankLast[nBitsToDecrease-1];405if (highPos == noSymbol) continue;406/* Decrease highPos if no symbols of lowPos or if it is407* not cheaper to remove 2 lowPos than highPos.408*/409if (lowPos == noSymbol) break;410{ U32 const highTotal = huffNode[highPos].count;411U32 const lowTotal = 2 * huffNode[lowPos].count;412if (highTotal <= lowTotal) break;413} }414/* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */415assert(rankLast[nBitsToDecrease] != noSymbol || nBitsToDecrease == 1);416/* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */417while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))418nBitsToDecrease++;419assert(rankLast[nBitsToDecrease] != noSymbol);420/* Increase the number of bits to gain back half the rank cost. */421totalCost -= 1 << (nBitsToDecrease-1);422huffNode[rankLast[nBitsToDecrease]].nbBits++;423424/* Fix up the new rank.425* If the new rank was empty, this symbol is now its smallest.426* Otherwise, this symbol will be the largest in the new rank so no adjustment.427*/428if (rankLast[nBitsToDecrease-1] == noSymbol)429rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease];430/* Fix up the old rank.431* If the symbol was at position 0, meaning it was the highest weight symbol in the tree,432* it must be the only symbol in its rank, so the old rank now has no symbols.433* Otherwise, since the Huffman nodes are sorted by count, the previous position is now434* the smallest node in the rank. If the previous position belongs to a different rank,435* then the rank is now empty.436*/437if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */438rankLast[nBitsToDecrease] = noSymbol;439else {440rankLast[nBitsToDecrease]--;441if (huffNode[rankLast[nBitsToDecrease]].nbBits != targetNbBits-nBitsToDecrease)442rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */443}444} /* while (totalCost > 0) */445446/* If we've removed too much weight, then we have to add it back.447* To avoid overshooting again, we only adjust the smallest rank.448* We take the largest nodes from the lowest rank 0 and move them449* to rank 1. There's guaranteed to be enough rank 0 symbols because450* TODO.451*/452while (totalCost < 0) { /* Sometimes, cost correction overshoot */453/* special case : no rank 1 symbol (using targetNbBits-1);454* let's create one from largest rank 0 (using targetNbBits).455*/456if (rankLast[1] == noSymbol) {457while (huffNode[n].nbBits == targetNbBits) n--;458huffNode[n+1].nbBits--;459assert(n >= 0);460rankLast[1] = (U32)(n+1);461totalCost++;462continue;463}464huffNode[ rankLast[1] + 1 ].nbBits--;465rankLast[1]++;466totalCost ++;467}468} /* repay normalized cost */469} /* there are several too large elements (at least >= 2) */470471return targetNbBits;472}473474typedef struct {475U16 base;476U16 curr;477} rankPos;478479typedef nodeElt huffNodeTable[2 * (HUF_SYMBOLVALUE_MAX + 1)];480481/* Number of buckets available for HUF_sort() */482#define RANK_POSITION_TABLE_SIZE 192483484typedef struct {485huffNodeTable huffNodeTbl;486rankPos rankPosition[RANK_POSITION_TABLE_SIZE];487} HUF_buildCTable_wksp_tables;488489/* RANK_POSITION_DISTINCT_COUNT_CUTOFF == Cutoff point in HUF_sort() buckets for which we use log2 bucketing.490* Strategy is to use as many buckets as possible for representing distinct491* counts while using the remainder to represent all "large" counts.492*493* To satisfy this requirement for 192 buckets, we can do the following:494* Let buckets 0-166 represent distinct counts of [0, 166]495* Let buckets 166 to 192 represent all remaining counts up to RANK_POSITION_MAX_COUNT_LOG using log2 bucketing.496*/497#define RANK_POSITION_MAX_COUNT_LOG 32498#define RANK_POSITION_LOG_BUCKETS_BEGIN ((RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */)499#define RANK_POSITION_DISTINCT_COUNT_CUTOFF (RANK_POSITION_LOG_BUCKETS_BEGIN + ZSTD_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */)500501/* Return the appropriate bucket index for a given count. See definition of502* RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy.503*/504static U32 HUF_getIndex(U32 const count) {505return (count < RANK_POSITION_DISTINCT_COUNT_CUTOFF)506? count507: ZSTD_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN;508}509510/* Helper swap function for HUF_quickSortPartition() */511static void HUF_swapNodes(nodeElt* a, nodeElt* b) {512nodeElt tmp = *a;513*a = *b;514*b = tmp;515}516517/* Returns 0 if the huffNode array is not sorted by descending count */518MEM_STATIC int HUF_isSorted(nodeElt huffNode[], U32 const maxSymbolValue1) {519U32 i;520for (i = 1; i < maxSymbolValue1; ++i) {521if (huffNode[i].count > huffNode[i-1].count) {522return 0;523}524}525return 1;526}527528/* Insertion sort by descending order */529HINT_INLINE void HUF_insertionSort(nodeElt huffNode[], int const low, int const high) {530int i;531int const size = high-low+1;532huffNode += low;533for (i = 1; i < size; ++i) {534nodeElt const key = huffNode[i];535int j = i - 1;536while (j >= 0 && huffNode[j].count < key.count) {537huffNode[j + 1] = huffNode[j];538j--;539}540huffNode[j + 1] = key;541}542}543544/* Pivot helper function for quicksort. */545static int HUF_quickSortPartition(nodeElt arr[], int const low, int const high) {546/* Simply select rightmost element as pivot. "Better" selectors like547* median-of-three don't experimentally appear to have any benefit.548*/549U32 const pivot = arr[high].count;550int i = low - 1;551int j = low;552for ( ; j < high; j++) {553if (arr[j].count > pivot) {554i++;555HUF_swapNodes(&arr[i], &arr[j]);556}557}558HUF_swapNodes(&arr[i + 1], &arr[high]);559return i + 1;560}561562/* Classic quicksort by descending with partially iterative calls563* to reduce worst case callstack size.564*/565static void HUF_simpleQuickSort(nodeElt arr[], int low, int high) {566int const kInsertionSortThreshold = 8;567if (high - low < kInsertionSortThreshold) {568HUF_insertionSort(arr, low, high);569return;570}571while (low < high) {572int const idx = HUF_quickSortPartition(arr, low, high);573if (idx - low < high - idx) {574HUF_simpleQuickSort(arr, low, idx - 1);575low = idx + 1;576} else {577HUF_simpleQuickSort(arr, idx + 1, high);578high = idx - 1;579}580}581}582583/**584* HUF_sort():585* Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order.586* This is a typical bucket sorting strategy that uses either quicksort or insertion sort to sort each bucket.587*588* @param[out] huffNode Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled.589* Must have (maxSymbolValue + 1) entries.590* @param[in] count Histogram of the symbols.591* @param[in] maxSymbolValue Maximum symbol value.592* @param rankPosition This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries.593*/594static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSymbolValue, rankPos rankPosition[]) {595U32 n;596U32 const maxSymbolValue1 = maxSymbolValue+1;597598/* Compute base and set curr to base.599* For symbol s let lowerRank = HUF_getIndex(count[n]) and rank = lowerRank + 1.600* See HUF_getIndex to see bucketing strategy.601* We attribute each symbol to lowerRank's base value, because we want to know where602* each rank begins in the output, so for rank R we want to count ranks R+1 and above.603*/604ZSTD_memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE);605for (n = 0; n < maxSymbolValue1; ++n) {606U32 lowerRank = HUF_getIndex(count[n]);607assert(lowerRank < RANK_POSITION_TABLE_SIZE - 1);608rankPosition[lowerRank].base++;609}610611assert(rankPosition[RANK_POSITION_TABLE_SIZE - 1].base == 0);612/* Set up the rankPosition table */613for (n = RANK_POSITION_TABLE_SIZE - 1; n > 0; --n) {614rankPosition[n-1].base += rankPosition[n].base;615rankPosition[n-1].curr = rankPosition[n-1].base;616}617618/* Insert each symbol into their appropriate bucket, setting up rankPosition table. */619for (n = 0; n < maxSymbolValue1; ++n) {620U32 const c = count[n];621U32 const r = HUF_getIndex(c) + 1;622U32 const pos = rankPosition[r].curr++;623assert(pos < maxSymbolValue1);624huffNode[pos].count = c;625huffNode[pos].byte = (BYTE)n;626}627628/* Sort each bucket. */629for (n = RANK_POSITION_DISTINCT_COUNT_CUTOFF; n < RANK_POSITION_TABLE_SIZE - 1; ++n) {630int const bucketSize = rankPosition[n].curr - rankPosition[n].base;631U32 const bucketStartIdx = rankPosition[n].base;632if (bucketSize > 1) {633assert(bucketStartIdx < maxSymbolValue1);634HUF_simpleQuickSort(huffNode + bucketStartIdx, 0, bucketSize-1);635}636}637638assert(HUF_isSorted(huffNode, maxSymbolValue1));639}640641642/** HUF_buildCTable_wksp() :643* Same as HUF_buildCTable(), but using externally allocated scratch buffer.644* `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables).645*/646#define STARTNODE (HUF_SYMBOLVALUE_MAX+1)647648/* HUF_buildTree():649* Takes the huffNode array sorted by HUF_sort() and builds an unlimited-depth Huffman tree.650*651* @param huffNode The array sorted by HUF_sort(). Builds the Huffman tree in this array.652* @param maxSymbolValue The maximum symbol value.653* @return The smallest node in the Huffman tree (by count).654*/655static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)656{657nodeElt* const huffNode0 = huffNode - 1;658int nonNullRank;659int lowS, lowN;660int nodeNb = STARTNODE;661int n, nodeRoot;662DEBUGLOG(5, "HUF_buildTree (alphabet size = %u)", maxSymbolValue + 1);663/* init for parents */664nonNullRank = (int)maxSymbolValue;665while(huffNode[nonNullRank].count == 0) nonNullRank--;666lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb;667huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count;668huffNode[lowS].parent = huffNode[lowS-1].parent = (U16)nodeNb;669nodeNb++; lowS-=2;670for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);671huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */672673/* create parents */674while (nodeNb <= nodeRoot) {675int const n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;676int const n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;677huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;678huffNode[n1].parent = huffNode[n2].parent = (U16)nodeNb;679nodeNb++;680}681682/* distribute weights (unlimited tree height) */683huffNode[nodeRoot].nbBits = 0;684for (n=nodeRoot-1; n>=STARTNODE; n--)685huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;686for (n=0; n<=nonNullRank; n++)687huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;688689DEBUGLOG(6, "Initial distribution of bits completed (%zu sorted symbols)", showHNodeBits(huffNode, maxSymbolValue+1));690691return nonNullRank;692}693694/**695* HUF_buildCTableFromTree():696* Build the CTable given the Huffman tree in huffNode.697*698* @param[out] CTable The output Huffman CTable.699* @param huffNode The Huffman tree.700* @param nonNullRank The last and smallest node in the Huffman tree.701* @param maxSymbolValue The maximum symbol value.702* @param maxNbBits The exact maximum number of bits used in the Huffman tree.703*/704static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits)705{706HUF_CElt* const ct = CTable + 1;707/* fill result into ctable (val, nbBits) */708int n;709U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};710U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};711int const alphabetSize = (int)(maxSymbolValue + 1);712for (n=0; n<=nonNullRank; n++)713nbPerRank[huffNode[n].nbBits]++;714/* determine starting value per rank */715{ U16 min = 0;716for (n=(int)maxNbBits; n>0; n--) {717valPerRank[n] = min; /* get starting value within each rank */718min += nbPerRank[n];719min >>= 1;720} }721for (n=0; n<alphabetSize; n++)722HUF_setNbBits(ct + huffNode[n].byte, huffNode[n].nbBits); /* push nbBits per symbol, symbol order */723for (n=0; n<alphabetSize; n++)724HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); /* assign value within rank, symbol order */725CTable[0] = maxNbBits;726}727728size_t729HUF_buildCTable_wksp(HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits,730void* workSpace, size_t wkspSize)731{732HUF_buildCTable_wksp_tables* const wksp_tables =733(HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(U32));734nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;735nodeElt* const huffNode = huffNode0+1;736int nonNullRank;737738HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE == sizeof(HUF_buildCTable_wksp_tables));739740DEBUGLOG(5, "HUF_buildCTable_wksp (alphabet size = %u)", maxSymbolValue+1);741742/* safety checks */743if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))744return ERROR(workSpace_tooSmall);745if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;746if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)747return ERROR(maxSymbolValue_tooLarge);748ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable));749750/* sort, decreasing order */751HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);752DEBUGLOG(6, "sorted symbols completed (%zu symbols)", showHNodeSymbols(huffNode, maxSymbolValue+1));753754/* build tree */755nonNullRank = HUF_buildTree(huffNode, maxSymbolValue);756757/* determine and enforce maxTableLog */758maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);759if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */760761HUF_buildCTableFromTree(CTable, huffNode, nonNullRank, maxSymbolValue, maxNbBits);762763return maxNbBits;764}765766size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)767{768HUF_CElt const* ct = CTable + 1;769size_t nbBits = 0;770int s;771for (s = 0; s <= (int)maxSymbolValue; ++s) {772nbBits += HUF_getNbBits(ct[s]) * count[s];773}774return nbBits >> 3;775}776777int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {778HUF_CElt const* ct = CTable + 1;779int bad = 0;780int s;781for (s = 0; s <= (int)maxSymbolValue; ++s) {782bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0);783}784return !bad;785}786787size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }788789/** HUF_CStream_t:790* Huffman uses its own BIT_CStream_t implementation.791* There are three major differences from BIT_CStream_t:792* 1. HUF_addBits() takes a HUF_CElt (size_t) which is793* the pair (nbBits, value) in the format:794* format:795* - Bits [0, 4) = nbBits796* - Bits [4, 64 - nbBits) = 0797* - Bits [64 - nbBits, 64) = value798* 2. The bitContainer is built from the upper bits and799* right shifted. E.g. to add a new value of N bits800* you right shift the bitContainer by N, then or in801* the new value into the N upper bits.802* 3. The bitstream has two bit containers. You can add803* bits to the second container and merge them into804* the first container.805*/806807#define HUF_BITS_IN_CONTAINER (sizeof(size_t) * 8)808809typedef struct {810size_t bitContainer[2];811size_t bitPos[2];812813BYTE* startPtr;814BYTE* ptr;815BYTE* endPtr;816} HUF_CStream_t;817818/**! HUF_initCStream():819* Initializes the bitstream.820* @returns 0 or an error code.821*/822static size_t HUF_initCStream(HUF_CStream_t* bitC,823void* startPtr, size_t dstCapacity)824{825ZSTD_memset(bitC, 0, sizeof(*bitC));826bitC->startPtr = (BYTE*)startPtr;827bitC->ptr = bitC->startPtr;828bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer[0]);829if (dstCapacity <= sizeof(bitC->bitContainer[0])) return ERROR(dstSize_tooSmall);830return 0;831}832833/*! HUF_addBits():834* Adds the symbol stored in HUF_CElt elt to the bitstream.835*836* @param elt The element we're adding. This is a (nbBits, value) pair.837* See the HUF_CStream_t docs for the format.838* @param idx Insert into the bitstream at this idx.839* @param kFast This is a template parameter. If the bitstream is guaranteed840* to have at least 4 unused bits after this call it may be 1,841* otherwise it must be 0. HUF_addBits() is faster when fast is set.842*/843FORCE_INLINE_TEMPLATE void HUF_addBits(HUF_CStream_t* bitC, HUF_CElt elt, int idx, int kFast)844{845assert(idx <= 1);846assert(HUF_getNbBits(elt) <= HUF_TABLELOG_ABSOLUTEMAX);847/* This is efficient on x86-64 with BMI2 because shrx848* only reads the low 6 bits of the register. The compiler849* knows this and elides the mask. When fast is set,850* every operation can use the same value loaded from elt.851*/852bitC->bitContainer[idx] >>= HUF_getNbBits(elt);853bitC->bitContainer[idx] |= kFast ? HUF_getValueFast(elt) : HUF_getValue(elt);854/* We only read the low 8 bits of bitC->bitPos[idx] so it855* doesn't matter that the high bits have noise from the value.856*/857bitC->bitPos[idx] += HUF_getNbBitsFast(elt);858assert((bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);859/* The last 4-bits of elt are dirty if fast is set,860* so we must not be overwriting bits that have already been861* inserted into the bit container.862*/863#if DEBUGLEVEL >= 1864{865size_t const nbBits = HUF_getNbBits(elt);866size_t const dirtyBits = nbBits == 0 ? 0 : ZSTD_highbit32((U32)nbBits) + 1;867(void)dirtyBits;868/* Middle bits are 0. */869assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0);870/* We didn't overwrite any bits in the bit container. */871assert(!kFast || (bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);872(void)dirtyBits;873}874#endif875}876877FORCE_INLINE_TEMPLATE void HUF_zeroIndex1(HUF_CStream_t* bitC)878{879bitC->bitContainer[1] = 0;880bitC->bitPos[1] = 0;881}882883/*! HUF_mergeIndex1() :884* Merges the bit container @ index 1 into the bit container @ index 0885* and zeros the bit container @ index 1.886*/887FORCE_INLINE_TEMPLATE void HUF_mergeIndex1(HUF_CStream_t* bitC)888{889assert((bitC->bitPos[1] & 0xFF) < HUF_BITS_IN_CONTAINER);890bitC->bitContainer[0] >>= (bitC->bitPos[1] & 0xFF);891bitC->bitContainer[0] |= bitC->bitContainer[1];892bitC->bitPos[0] += bitC->bitPos[1];893assert((bitC->bitPos[0] & 0xFF) <= HUF_BITS_IN_CONTAINER);894}895896/*! HUF_flushBits() :897* Flushes the bits in the bit container @ index 0.898*899* @post bitPos will be < 8.900* @param kFast If kFast is set then we must know a-priori that901* the bit container will not overflow.902*/903FORCE_INLINE_TEMPLATE void HUF_flushBits(HUF_CStream_t* bitC, int kFast)904{905/* The upper bits of bitPos are noisy, so we must mask by 0xFF. */906size_t const nbBits = bitC->bitPos[0] & 0xFF;907size_t const nbBytes = nbBits >> 3;908/* The top nbBits bits of bitContainer are the ones we need. */909size_t const bitContainer = bitC->bitContainer[0] >> (HUF_BITS_IN_CONTAINER - nbBits);910/* Mask bitPos to account for the bytes we consumed. */911bitC->bitPos[0] &= 7;912assert(nbBits > 0);913assert(nbBits <= sizeof(bitC->bitContainer[0]) * 8);914assert(bitC->ptr <= bitC->endPtr);915MEM_writeLEST(bitC->ptr, bitContainer);916bitC->ptr += nbBytes;917assert(!kFast || bitC->ptr <= bitC->endPtr);918if (!kFast && bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;919/* bitContainer doesn't need to be modified because the leftover920* bits are already the top bitPos bits. And we don't care about921* noise in the lower values.922*/923}924925/*! HUF_endMark()926* @returns The Huffman stream end mark: A 1-bit value = 1.927*/928static HUF_CElt HUF_endMark(void)929{930HUF_CElt endMark;931HUF_setNbBits(&endMark, 1);932HUF_setValue(&endMark, 1);933return endMark;934}935936/*! HUF_closeCStream() :937* @return Size of CStream, in bytes,938* or 0 if it could not fit into dstBuffer */939static size_t HUF_closeCStream(HUF_CStream_t* bitC)940{941HUF_addBits(bitC, HUF_endMark(), /* idx */ 0, /* kFast */ 0);942HUF_flushBits(bitC, /* kFast */ 0);943{944size_t const nbBits = bitC->bitPos[0] & 0xFF;945if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */946return (size_t)(bitC->ptr - bitC->startPtr) + (nbBits > 0);947}948}949950FORCE_INLINE_TEMPLATE void951HUF_encodeSymbol(HUF_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable, int idx, int fast)952{953HUF_addBits(bitCPtr, CTable[symbol], idx, fast);954}955956FORCE_INLINE_TEMPLATE void957HUF_compress1X_usingCTable_internal_body_loop(HUF_CStream_t* bitC,958const BYTE* ip, size_t srcSize,959const HUF_CElt* ct,960int kUnroll, int kFastFlush, int kLastFast)961{962/* Join to kUnroll */963int n = (int)srcSize;964int rem = n % kUnroll;965if (rem > 0) {966for (; rem > 0; --rem) {967HUF_encodeSymbol(bitC, ip[--n], ct, 0, /* fast */ 0);968}969HUF_flushBits(bitC, kFastFlush);970}971assert(n % kUnroll == 0);972973/* Join to 2 * kUnroll */974if (n % (2 * kUnroll)) {975int u;976for (u = 1; u < kUnroll; ++u) {977HUF_encodeSymbol(bitC, ip[n - u], ct, 0, 1);978}979HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, 0, kLastFast);980HUF_flushBits(bitC, kFastFlush);981n -= kUnroll;982}983assert(n % (2 * kUnroll) == 0);984985for (; n>0; n-= 2 * kUnroll) {986/* Encode kUnroll symbols into the bitstream @ index 0. */987int u;988for (u = 1; u < kUnroll; ++u) {989HUF_encodeSymbol(bitC, ip[n - u], ct, /* idx */ 0, /* fast */ 1);990}991HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, /* idx */ 0, /* fast */ kLastFast);992HUF_flushBits(bitC, kFastFlush);993/* Encode kUnroll symbols into the bitstream @ index 1.994* This allows us to start filling the bit container995* without any data dependencies.996*/997HUF_zeroIndex1(bitC);998for (u = 1; u < kUnroll; ++u) {999HUF_encodeSymbol(bitC, ip[n - kUnroll - u], ct, /* idx */ 1, /* fast */ 1);1000}1001HUF_encodeSymbol(bitC, ip[n - kUnroll - kUnroll], ct, /* idx */ 1, /* fast */ kLastFast);1002/* Merge bitstream @ index 1 into the bitstream @ index 0 */1003HUF_mergeIndex1(bitC);1004HUF_flushBits(bitC, kFastFlush);1005}1006assert(n == 0);10071008}10091010/**1011* Returns a tight upper bound on the output space needed by Huffman1012* with 8 bytes buffer to handle over-writes. If the output is at least1013* this large we don't need to do bounds checks during Huffman encoding.1014*/1015static size_t HUF_tightCompressBound(size_t srcSize, size_t tableLog)1016{1017return ((srcSize * tableLog) >> 3) + 8;1018}101910201021FORCE_INLINE_TEMPLATE size_t1022HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,1023const void* src, size_t srcSize,1024const HUF_CElt* CTable)1025{1026U32 const tableLog = (U32)CTable[0];1027HUF_CElt const* ct = CTable + 1;1028const BYTE* ip = (const BYTE*) src;1029BYTE* const ostart = (BYTE*)dst;1030BYTE* const oend = ostart + dstSize;1031BYTE* op = ostart;1032HUF_CStream_t bitC;10331034/* init */1035if (dstSize < 8) return 0; /* not enough space to compress */1036{ size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op));1037if (HUF_isError(initErr)) return 0; }10381039if (dstSize < HUF_tightCompressBound(srcSize, (size_t)tableLog) || tableLog > 11)1040HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ MEM_32bits() ? 2 : 4, /* kFast */ 0, /* kLastFast */ 0);1041else {1042if (MEM_32bits()) {1043switch (tableLog) {1044case 11:1045HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 0);1046break;1047case 10: ZSTD_FALLTHROUGH;1048case 9: ZSTD_FALLTHROUGH;1049case 8:1050HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 1);1051break;1052case 7: ZSTD_FALLTHROUGH;1053default:1054HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 3, /* kFastFlush */ 1, /* kLastFast */ 1);1055break;1056}1057} else {1058switch (tableLog) {1059case 11:1060HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 0);1061break;1062case 10:1063HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 1);1064break;1065case 9:1066HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 6, /* kFastFlush */ 1, /* kLastFast */ 0);1067break;1068case 8:1069HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 7, /* kFastFlush */ 1, /* kLastFast */ 0);1070break;1071case 7:1072HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 8, /* kFastFlush */ 1, /* kLastFast */ 0);1073break;1074case 6: ZSTD_FALLTHROUGH;1075default:1076HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 9, /* kFastFlush */ 1, /* kLastFast */ 1);1077break;1078}1079}1080}1081assert(bitC.ptr <= bitC.endPtr);10821083return HUF_closeCStream(&bitC);1084}10851086#if DYNAMIC_BMI210871088static BMI2_TARGET_ATTRIBUTE size_t1089HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,1090const void* src, size_t srcSize,1091const HUF_CElt* CTable)1092{1093return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);1094}10951096static size_t1097HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,1098const void* src, size_t srcSize,1099const HUF_CElt* CTable)1100{1101return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);1102}11031104static size_t1105HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,1106const void* src, size_t srcSize,1107const HUF_CElt* CTable, const int flags)1108{1109if (flags & HUF_flags_bmi2) {1110return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);1111}1112return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable);1113}11141115#else11161117static size_t1118HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,1119const void* src, size_t srcSize,1120const HUF_CElt* CTable, const int flags)1121{1122(void)flags;1123return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);1124}11251126#endif11271128size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags)1129{1130return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags);1131}11321133static size_t1134HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,1135const void* src, size_t srcSize,1136const HUF_CElt* CTable, int flags)1137{1138size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */1139const BYTE* ip = (const BYTE*) src;1140const BYTE* const iend = ip + srcSize;1141BYTE* const ostart = (BYTE*) dst;1142BYTE* const oend = ostart + dstSize;1143BYTE* op = ostart;11441145if (dstSize < 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */1146if (srcSize < 12) return 0; /* no saving possible : too small input */1147op += 6; /* jumpTable */11481149assert(op <= oend);1150{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );1151if (cSize == 0 || cSize > 65535) return 0;1152MEM_writeLE16(ostart, (U16)cSize);1153op += cSize;1154}11551156ip += segmentSize;1157assert(op <= oend);1158{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );1159if (cSize == 0 || cSize > 65535) return 0;1160MEM_writeLE16(ostart+2, (U16)cSize);1161op += cSize;1162}11631164ip += segmentSize;1165assert(op <= oend);1166{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );1167if (cSize == 0 || cSize > 65535) return 0;1168MEM_writeLE16(ostart+4, (U16)cSize);1169op += cSize;1170}11711172ip += segmentSize;1173assert(op <= oend);1174assert(ip <= iend);1175{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, flags) );1176if (cSize == 0 || cSize > 65535) return 0;1177op += cSize;1178}11791180return (size_t)(op-ostart);1181}11821183size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags)1184{1185return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags);1186}11871188typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;11891190static size_t HUF_compressCTable_internal(1191BYTE* const ostart, BYTE* op, BYTE* const oend,1192const void* src, size_t srcSize,1193HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int flags)1194{1195size_t const cSize = (nbStreams==HUF_singleStream) ?1196HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags) :1197HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags);1198if (HUF_isError(cSize)) { return cSize; }1199if (cSize==0) { return 0; } /* uncompressible */1200op += cSize;1201/* check compressibility */1202assert(op >= ostart);1203if ((size_t)(op-ostart) >= srcSize-1) { return 0; }1204return (size_t)(op-ostart);1205}12061207typedef struct {1208unsigned count[HUF_SYMBOLVALUE_MAX + 1];1209HUF_CElt CTable[HUF_CTABLE_SIZE_ST(HUF_SYMBOLVALUE_MAX)];1210union {1211HUF_buildCTable_wksp_tables buildCTable_wksp;1212HUF_WriteCTableWksp writeCTable_wksp;1213U32 hist_wksp[HIST_WKSP_SIZE_U32];1214} wksps;1215} HUF_compress_tables_t;12161217#define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 40961218#define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10 /* Must be >= 2 */12191220unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue)1221{1222unsigned cardinality = 0;1223unsigned i;12241225for (i = 0; i < maxSymbolValue + 1; i++) {1226if (count[i] != 0) cardinality += 1;1227}12281229return cardinality;1230}12311232unsigned HUF_minTableLog(unsigned symbolCardinality)1233{1234U32 minBitsSymbols = ZSTD_highbit32(symbolCardinality) + 1;1235return minBitsSymbols;1236}12371238unsigned HUF_optimalTableLog(1239unsigned maxTableLog,1240size_t srcSize,1241unsigned maxSymbolValue,1242void* workSpace, size_t wkspSize,1243HUF_CElt* table,1244const unsigned* count,1245int flags)1246{1247assert(srcSize > 1); /* Not supported, RLE should be used instead */1248assert(wkspSize >= sizeof(HUF_buildCTable_wksp_tables));12491250if (!(flags & HUF_flags_optimalDepth)) {1251/* cheap evaluation, based on FSE */1252return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);1253}12541255{ BYTE* dst = (BYTE*)workSpace + sizeof(HUF_WriteCTableWksp);1256size_t dstSize = wkspSize - sizeof(HUF_WriteCTableWksp);1257size_t maxBits, hSize, newSize;1258const unsigned symbolCardinality = HUF_cardinality(count, maxSymbolValue);1259const unsigned minTableLog = HUF_minTableLog(symbolCardinality);1260size_t optSize = ((size_t) ~0) - 1;1261unsigned optLog = maxTableLog, optLogGuess;12621263DEBUGLOG(6, "HUF_optimalTableLog: probing huf depth (srcSize=%zu)", srcSize);12641265/* Search until size increases */1266for (optLogGuess = minTableLog; optLogGuess <= maxTableLog; optLogGuess++) {1267DEBUGLOG(7, "checking for huffLog=%u", optLogGuess);1268maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, optLogGuess, workSpace, wkspSize);1269if (ERR_isError(maxBits)) continue;12701271if (maxBits < optLogGuess && optLogGuess > minTableLog) break;12721273hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, workSpace, wkspSize);12741275if (ERR_isError(hSize)) continue;12761277newSize = HUF_estimateCompressedSize(table, count, maxSymbolValue) + hSize;12781279if (newSize > optSize + 1) {1280break;1281}12821283if (newSize < optSize) {1284optSize = newSize;1285optLog = optLogGuess;1286}1287}1288assert(optLog <= HUF_TABLELOG_MAX);1289return optLog;1290}1291}12921293/* HUF_compress_internal() :1294* `workSpace_align4` must be aligned on 4-bytes boundaries,1295* and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */1296static size_t1297HUF_compress_internal (void* dst, size_t dstSize,1298const void* src, size_t srcSize,1299unsigned maxSymbolValue, unsigned huffLog,1300HUF_nbStreams_e nbStreams,1301void* workSpace, size_t wkspSize,1302HUF_CElt* oldHufTable, HUF_repeat* repeat, int flags)1303{1304HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(size_t));1305BYTE* const ostart = (BYTE*)dst;1306BYTE* const oend = ostart + dstSize;1307BYTE* op = ostart;13081309DEBUGLOG(5, "HUF_compress_internal (srcSize=%zu)", srcSize);1310HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE);13111312/* checks & inits */1313if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall);1314if (!srcSize) return 0; /* Uncompressed */1315if (!dstSize) return 0; /* cannot fit anything within dst budget */1316if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */1317if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);1318if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);1319if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX;1320if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;13211322/* Heuristic : If old table is valid, use it for small inputs */1323if ((flags & HUF_flags_preferRepeat) && repeat && *repeat == HUF_repeat_valid) {1324return HUF_compressCTable_internal(ostart, op, oend,1325src, srcSize,1326nbStreams, oldHufTable, flags);1327}13281329/* If uncompressible data is suspected, do a smaller sampling first */1330DEBUG_STATIC_ASSERT(SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO >= 2);1331if ((flags & HUF_flags_suspectUncompressible) && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) {1332size_t largestTotal = 0;1333DEBUGLOG(5, "input suspected incompressible : sampling to check");1334{ unsigned maxSymbolValueBegin = maxSymbolValue;1335CHECK_V_F(largestBegin, HIST_count_simple (table->count, &maxSymbolValueBegin, (const BYTE*)src, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );1336largestTotal += largestBegin;1337}1338{ unsigned maxSymbolValueEnd = maxSymbolValue;1339CHECK_V_F(largestEnd, HIST_count_simple (table->count, &maxSymbolValueEnd, (const BYTE*)src + srcSize - SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );1340largestTotal += largestEnd;1341}1342if (largestTotal <= ((2 * SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) >> 7)+4) return 0; /* heuristic : probably not compressible enough */1343}13441345/* Scan input and build symbol stats */1346{ CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->wksps.hist_wksp, sizeof(table->wksps.hist_wksp)) );1347if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */1348if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */1349}1350DEBUGLOG(6, "histogram detail completed (%zu symbols)", showU32(table->count, maxSymbolValue+1));13511352/* Check validity of previous table */1353if ( repeat1354&& *repeat == HUF_repeat_check1355&& !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) {1356*repeat = HUF_repeat_none;1357}1358/* Heuristic : use existing table for small inputs */1359if ((flags & HUF_flags_preferRepeat) && repeat && *repeat != HUF_repeat_none) {1360return HUF_compressCTable_internal(ostart, op, oend,1361src, srcSize,1362nbStreams, oldHufTable, flags);1363}13641365/* Build Huffman Tree */1366huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue, &table->wksps, sizeof(table->wksps), table->CTable, table->count, flags);1367{ size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,1368maxSymbolValue, huffLog,1369&table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp));1370CHECK_F(maxBits);1371huffLog = (U32)maxBits;1372DEBUGLOG(6, "bit distribution completed (%zu symbols)", showCTableBits(table->CTable + 1, maxSymbolValue+1));1373}1374/* Zero unused symbols in CTable, so we can check it for validity */1375{1376size_t const ctableSize = HUF_CTABLE_SIZE_ST(maxSymbolValue);1377size_t const unusedSize = sizeof(table->CTable) - ctableSize * sizeof(HUF_CElt);1378ZSTD_memset(table->CTable + ctableSize, 0, unusedSize);1379}13801381/* Write table description header */1382{ CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog,1383&table->wksps.writeCTable_wksp, sizeof(table->wksps.writeCTable_wksp)) );1384/* Check if using previous huffman table is beneficial */1385if (repeat && *repeat != HUF_repeat_none) {1386size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);1387size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue);1388if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {1389return HUF_compressCTable_internal(ostart, op, oend,1390src, srcSize,1391nbStreams, oldHufTable, flags);1392} }13931394/* Use the new huffman table */1395if (hSize + 12ul >= srcSize) { return 0; }1396op += hSize;1397if (repeat) { *repeat = HUF_repeat_none; }1398if (oldHufTable)1399ZSTD_memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */1400}1401return HUF_compressCTable_internal(ostart, op, oend,1402src, srcSize,1403nbStreams, table->CTable, flags);1404}14051406size_t HUF_compress1X_repeat (void* dst, size_t dstSize,1407const void* src, size_t srcSize,1408unsigned maxSymbolValue, unsigned huffLog,1409void* workSpace, size_t wkspSize,1410HUF_CElt* hufTable, HUF_repeat* repeat, int flags)1411{1412DEBUGLOG(5, "HUF_compress1X_repeat (srcSize = %zu)", srcSize);1413return HUF_compress_internal(dst, dstSize, src, srcSize,1414maxSymbolValue, huffLog, HUF_singleStream,1415workSpace, wkspSize, hufTable,1416repeat, flags);1417}14181419/* HUF_compress4X_repeat():1420* compress input using 4 streams.1421* consider skipping quickly1422* re-use an existing huffman compression table */1423size_t HUF_compress4X_repeat (void* dst, size_t dstSize,1424const void* src, size_t srcSize,1425unsigned maxSymbolValue, unsigned huffLog,1426void* workSpace, size_t wkspSize,1427HUF_CElt* hufTable, HUF_repeat* repeat, int flags)1428{1429DEBUGLOG(5, "HUF_compress4X_repeat (srcSize = %zu)", srcSize);1430return HUF_compress_internal(dst, dstSize, src, srcSize,1431maxSymbolValue, huffLog, HUF_fourStreams,1432workSpace, wkspSize,1433hufTable, repeat, flags);1434}143514361437