Path: blob/master/Utilities/cmzstd/lib/compress/fse_compress.c
3158 views
/* ******************************************************************1* FSE : Finite State Entropy encoder2* Copyright (c) Meta Platforms, Inc. and affiliates.3*4* You can contact the author at :5* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy6* - Public forum : https://groups.google.com/forum/#!forum/lz4c7*8* This source code is licensed under both the BSD-style license (found in the9* LICENSE file in the root directory of this source tree) and the GPLv2 (found10* in the COPYING file in the root directory of this source tree).11* You may select, at your option, one of the above-listed licenses.12****************************************************************** */1314/* **************************************************************15* Includes16****************************************************************/17#include "../common/compiler.h"18#include "../common/mem.h" /* U32, U16, etc. */19#include "../common/debug.h" /* assert, DEBUGLOG */20#include "hist.h" /* HIST_count_wksp */21#include "../common/bitstream.h"22#define FSE_STATIC_LINKING_ONLY23#include "../common/fse.h"24#include "../common/error_private.h"25#define ZSTD_DEPS_NEED_MALLOC26#define ZSTD_DEPS_NEED_MATH6427#include "../common/zstd_deps.h" /* ZSTD_malloc, ZSTD_free, ZSTD_memcpy, ZSTD_memset */28#include "../common/bits.h" /* ZSTD_highbit32 */293031/* **************************************************************32* Error Management33****************************************************************/34#define FSE_isError ERR_isError353637/* **************************************************************38* Templates39****************************************************************/40/*41designed to be included42for type-specific functions (template emulation in C)43Objective is to write these functions only once, for improved maintenance44*/4546/* safety checks */47#ifndef FSE_FUNCTION_EXTENSION48# error "FSE_FUNCTION_EXTENSION must be defined"49#endif50#ifndef FSE_FUNCTION_TYPE51# error "FSE_FUNCTION_TYPE must be defined"52#endif5354/* Function names */55#define FSE_CAT(X,Y) X##Y56#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)57#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)585960/* Function templates */6162/* FSE_buildCTable_wksp() :63* Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).64* wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)`65* workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements66*/67size_t FSE_buildCTable_wksp(FSE_CTable* ct,68const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,69void* workSpace, size_t wkspSize)70{71U32 const tableSize = 1 << tableLog;72U32 const tableMask = tableSize - 1;73void* const ptr = ct;74U16* const tableU16 = ( (U16*) ptr) + 2;75void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;76FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);77U32 const step = FSE_TABLESTEP(tableSize);78U32 const maxSV1 = maxSymbolValue+1;7980U16* cumul = (U16*)workSpace; /* size = maxSV1 */81FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSV1+1)); /* size = tableSize */8283U32 highThreshold = tableSize-1;8485assert(((size_t)workSpace & 1) == 0); /* Must be 2 bytes-aligned */86if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge);87/* CTable header */88tableU16[-2] = (U16) tableLog;89tableU16[-1] = (U16) maxSymbolValue;90assert(tableLog < 16); /* required for threshold strategy to work */9192/* For explanations on how to distribute symbol values over the table :93* https://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */9495#ifdef __clang_analyzer__96ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */97#endif9899/* symbol start positions */100{ U32 u;101cumul[0] = 0;102for (u=1; u <= maxSV1; u++) {103if (normalizedCounter[u-1]==-1) { /* Low proba symbol */104cumul[u] = cumul[u-1] + 1;105tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);106} else {107assert(normalizedCounter[u-1] >= 0);108cumul[u] = cumul[u-1] + (U16)normalizedCounter[u-1];109assert(cumul[u] >= cumul[u-1]); /* no overflow */110} }111cumul[maxSV1] = (U16)(tableSize+1);112}113114/* Spread symbols */115if (highThreshold == tableSize - 1) {116/* Case for no low prob count symbols. Lay down 8 bytes at a time117* to reduce branch misses since we are operating on a small block118*/119BYTE* const spread = tableSymbol + tableSize; /* size = tableSize + 8 (may write beyond tableSize) */120{ U64 const add = 0x0101010101010101ull;121size_t pos = 0;122U64 sv = 0;123U32 s;124for (s=0; s<maxSV1; ++s, sv += add) {125int i;126int const n = normalizedCounter[s];127MEM_write64(spread + pos, sv);128for (i = 8; i < n; i += 8) {129MEM_write64(spread + pos + i, sv);130}131assert(n>=0);132pos += (size_t)n;133}134}135/* Spread symbols across the table. Lack of lowprob symbols means that136* we don't need variable sized inner loop, so we can unroll the loop and137* reduce branch misses.138*/139{ size_t position = 0;140size_t s;141size_t const unroll = 2; /* Experimentally determined optimal unroll */142assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */143for (s = 0; s < (size_t)tableSize; s += unroll) {144size_t u;145for (u = 0; u < unroll; ++u) {146size_t const uPosition = (position + (u * step)) & tableMask;147tableSymbol[uPosition] = spread[s + u];148}149position = (position + (unroll * step)) & tableMask;150}151assert(position == 0); /* Must have initialized all positions */152}153} else {154U32 position = 0;155U32 symbol;156for (symbol=0; symbol<maxSV1; symbol++) {157int nbOccurrences;158int const freq = normalizedCounter[symbol];159for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) {160tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;161position = (position + step) & tableMask;162while (position > highThreshold)163position = (position + step) & tableMask; /* Low proba area */164} }165assert(position==0); /* Must have initialized all positions */166}167168/* Build table */169{ U32 u; for (u=0; u<tableSize; u++) {170FSE_FUNCTION_TYPE s = tableSymbol[u]; /* note : static analyzer may not understand tableSymbol is properly initialized */171tableU16[cumul[s]++] = (U16) (tableSize+u); /* TableU16 : sorted by symbol order; gives next state value */172} }173174/* Build Symbol Transformation Table */175{ unsigned total = 0;176unsigned s;177for (s=0; s<=maxSymbolValue; s++) {178switch (normalizedCounter[s])179{180case 0:181/* filling nonetheless, for compatibility with FSE_getMaxNbBits() */182symbolTT[s].deltaNbBits = ((tableLog+1) << 16) - (1<<tableLog);183break;184185case -1:186case 1:187symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog);188assert(total <= INT_MAX);189symbolTT[s].deltaFindState = (int)(total - 1);190total ++;191break;192default :193assert(normalizedCounter[s] > 1);194{ U32 const maxBitsOut = tableLog - ZSTD_highbit32 ((U32)normalizedCounter[s]-1);195U32 const minStatePlus = (U32)normalizedCounter[s] << maxBitsOut;196symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;197symbolTT[s].deltaFindState = (int)(total - (unsigned)normalizedCounter[s]);198total += (unsigned)normalizedCounter[s];199} } } }200201#if 0 /* debug : symbol costs */202DEBUGLOG(5, "\n --- table statistics : ");203{ U32 symbol;204for (symbol=0; symbol<=maxSymbolValue; symbol++) {205DEBUGLOG(5, "%3u: w=%3i, maxBits=%u, fracBits=%.2f",206symbol, normalizedCounter[symbol],207FSE_getMaxNbBits(symbolTT, symbol),208(double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256);209} }210#endif211212return 0;213}214215216217#ifndef FSE_COMMONDEFS_ONLY218219/*-**************************************************************220* FSE NCount encoding221****************************************************************/222size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)223{224size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog225+ 4 /* bitCount initialized at 4 */226+ 2 /* first two symbols may use one additional bit each */) / 8)227+ 1 /* round up to whole nb bytes */228+ 2 /* additional two bytes for bitstream flush */;229return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */230}231232static size_t233FSE_writeNCount_generic (void* header, size_t headerBufferSize,234const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,235unsigned writeIsSafe)236{237BYTE* const ostart = (BYTE*) header;238BYTE* out = ostart;239BYTE* const oend = ostart + headerBufferSize;240int nbBits;241const int tableSize = 1 << tableLog;242int remaining;243int threshold;244U32 bitStream = 0;245int bitCount = 0;246unsigned symbol = 0;247unsigned const alphabetSize = maxSymbolValue + 1;248int previousIs0 = 0;249250/* Table Size */251bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;252bitCount += 4;253254/* Init */255remaining = tableSize+1; /* +1 for extra accuracy */256threshold = tableSize;257nbBits = tableLog+1;258259while ((symbol < alphabetSize) && (remaining>1)) { /* stops at 1 */260if (previousIs0) {261unsigned start = symbol;262while ((symbol < alphabetSize) && !normalizedCounter[symbol]) symbol++;263if (symbol == alphabetSize) break; /* incorrect distribution */264while (symbol >= start+24) {265start+=24;266bitStream += 0xFFFFU << bitCount;267if ((!writeIsSafe) && (out > oend-2))268return ERROR(dstSize_tooSmall); /* Buffer overflow */269out[0] = (BYTE) bitStream;270out[1] = (BYTE)(bitStream>>8);271out+=2;272bitStream>>=16;273}274while (symbol >= start+3) {275start+=3;276bitStream += 3 << bitCount;277bitCount += 2;278}279bitStream += (symbol-start) << bitCount;280bitCount += 2;281if (bitCount>16) {282if ((!writeIsSafe) && (out > oend - 2))283return ERROR(dstSize_tooSmall); /* Buffer overflow */284out[0] = (BYTE)bitStream;285out[1] = (BYTE)(bitStream>>8);286out += 2;287bitStream >>= 16;288bitCount -= 16;289} }290{ int count = normalizedCounter[symbol++];291int const max = (2*threshold-1) - remaining;292remaining -= count < 0 ? -count : count;293count++; /* +1 for extra accuracy */294if (count>=threshold)295count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */296bitStream += count << bitCount;297bitCount += nbBits;298bitCount -= (count<max);299previousIs0 = (count==1);300if (remaining<1) return ERROR(GENERIC);301while (remaining<threshold) { nbBits--; threshold>>=1; }302}303if (bitCount>16) {304if ((!writeIsSafe) && (out > oend - 2))305return ERROR(dstSize_tooSmall); /* Buffer overflow */306out[0] = (BYTE)bitStream;307out[1] = (BYTE)(bitStream>>8);308out += 2;309bitStream >>= 16;310bitCount -= 16;311} }312313if (remaining != 1)314return ERROR(GENERIC); /* incorrect normalized distribution */315assert(symbol <= alphabetSize);316317/* flush remaining bitStream */318if ((!writeIsSafe) && (out > oend - 2))319return ERROR(dstSize_tooSmall); /* Buffer overflow */320out[0] = (BYTE)bitStream;321out[1] = (BYTE)(bitStream>>8);322out+= (bitCount+7) /8;323324return (out-ostart);325}326327328size_t FSE_writeNCount (void* buffer, size_t bufferSize,329const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)330{331if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported */332if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */333334if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))335return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);336337return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1 /* write in buffer is safe */);338}339340341/*-**************************************************************342* FSE Compression Code343****************************************************************/344345/* provides the minimum logSize to safely represent a distribution */346static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)347{348U32 minBitsSrc = ZSTD_highbit32((U32)(srcSize)) + 1;349U32 minBitsSymbols = ZSTD_highbit32(maxSymbolValue) + 2;350U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;351assert(srcSize > 1); /* Not supported, RLE should be used instead */352return minBits;353}354355unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)356{357U32 maxBitsSrc = ZSTD_highbit32((U32)(srcSize - 1)) - minus;358U32 tableLog = maxTableLog;359U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);360assert(srcSize > 1); /* Not supported, RLE should be used instead */361if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;362if (maxBitsSrc < tableLog) tableLog = maxBitsSrc; /* Accuracy can be reduced */363if (minBits > tableLog) tableLog = minBits; /* Need a minimum to safely represent all symbol values */364if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG;365if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG;366return tableLog;367}368369unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)370{371return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2);372}373374/* Secondary normalization method.375To be used when primary method fails. */376377static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue, short lowProbCount)378{379short const NOT_YET_ASSIGNED = -2;380U32 s;381U32 distributed = 0;382U32 ToDistribute;383384/* Init */385U32 const lowThreshold = (U32)(total >> tableLog);386U32 lowOne = (U32)((total * 3) >> (tableLog + 1));387388for (s=0; s<=maxSymbolValue; s++) {389if (count[s] == 0) {390norm[s]=0;391continue;392}393if (count[s] <= lowThreshold) {394norm[s] = lowProbCount;395distributed++;396total -= count[s];397continue;398}399if (count[s] <= lowOne) {400norm[s] = 1;401distributed++;402total -= count[s];403continue;404}405406norm[s]=NOT_YET_ASSIGNED;407}408ToDistribute = (1 << tableLog) - distributed;409410if (ToDistribute == 0)411return 0;412413if ((total / ToDistribute) > lowOne) {414/* risk of rounding to zero */415lowOne = (U32)((total * 3) / (ToDistribute * 2));416for (s=0; s<=maxSymbolValue; s++) {417if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) {418norm[s] = 1;419distributed++;420total -= count[s];421continue;422} }423ToDistribute = (1 << tableLog) - distributed;424}425426if (distributed == maxSymbolValue+1) {427/* all values are pretty poor;428probably incompressible data (should have already been detected);429find max, then give all remaining points to max */430U32 maxV = 0, maxC = 0;431for (s=0; s<=maxSymbolValue; s++)432if (count[s] > maxC) { maxV=s; maxC=count[s]; }433norm[maxV] += (short)ToDistribute;434return 0;435}436437if (total == 0) {438/* all of the symbols were low enough for the lowOne or lowThreshold */439for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1))440if (norm[s] > 0) { ToDistribute--; norm[s]++; }441return 0;442}443444{ U64 const vStepLog = 62 - tableLog;445U64 const mid = (1ULL << (vStepLog-1)) - 1;446U64 const rStep = ZSTD_div64((((U64)1<<vStepLog) * ToDistribute) + mid, (U32)total); /* scale on remaining */447U64 tmpTotal = mid;448for (s=0; s<=maxSymbolValue; s++) {449if (norm[s]==NOT_YET_ASSIGNED) {450U64 const end = tmpTotal + (count[s] * rStep);451U32 const sStart = (U32)(tmpTotal >> vStepLog);452U32 const sEnd = (U32)(end >> vStepLog);453U32 const weight = sEnd - sStart;454if (weight < 1)455return ERROR(GENERIC);456norm[s] = (short)weight;457tmpTotal = end;458} } }459460return 0;461}462463size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,464const unsigned* count, size_t total,465unsigned maxSymbolValue, unsigned useLowProbCount)466{467/* Sanity checks */468if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;469if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported size */470if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported size */471if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */472473{ static U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };474short const lowProbCount = useLowProbCount ? -1 : 1;475U64 const scale = 62 - tableLog;476U64 const step = ZSTD_div64((U64)1<<62, (U32)total); /* <== here, one division ! */477U64 const vStep = 1ULL<<(scale-20);478int stillToDistribute = 1<<tableLog;479unsigned s;480unsigned largest=0;481short largestP=0;482U32 lowThreshold = (U32)(total >> tableLog);483484for (s=0; s<=maxSymbolValue; s++) {485if (count[s] == total) return 0; /* rle special case */486if (count[s] == 0) { normalizedCounter[s]=0; continue; }487if (count[s] <= lowThreshold) {488normalizedCounter[s] = lowProbCount;489stillToDistribute--;490} else {491short proba = (short)((count[s]*step) >> scale);492if (proba<8) {493U64 restToBeat = vStep * rtbTable[proba];494proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat;495}496if (proba > largestP) { largestP=proba; largest=s; }497normalizedCounter[s] = proba;498stillToDistribute -= proba;499} }500if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {501/* corner case, need another normalization method */502size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue, lowProbCount);503if (FSE_isError(errorCode)) return errorCode;504}505else normalizedCounter[largest] += (short)stillToDistribute;506}507508#if 0509{ /* Print Table (debug) */510U32 s;511U32 nTotal = 0;512for (s=0; s<=maxSymbolValue; s++)513RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]);514for (s=0; s<=maxSymbolValue; s++)515nTotal += abs(normalizedCounter[s]);516if (nTotal != (1U<<tableLog))517RAWLOG(2, "Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);518getchar();519}520#endif521522return tableLog;523}524525/* fake FSE_CTable, for rle input (always same symbol) */526size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)527{528void* ptr = ct;529U16* tableU16 = ( (U16*) ptr) + 2;530void* FSCTptr = (U32*)ptr + 2;531FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) FSCTptr;532533/* header */534tableU16[-2] = (U16) 0;535tableU16[-1] = (U16) symbolValue;536537/* Build table */538tableU16[0] = 0;539tableU16[1] = 0; /* just in case */540541/* Build Symbol Transformation Table */542symbolTT[symbolValue].deltaNbBits = 0;543symbolTT[symbolValue].deltaFindState = 0;544545return 0;546}547548549static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize,550const void* src, size_t srcSize,551const FSE_CTable* ct, const unsigned fast)552{553const BYTE* const istart = (const BYTE*) src;554const BYTE* const iend = istart + srcSize;555const BYTE* ip=iend;556557BIT_CStream_t bitC;558FSE_CState_t CState1, CState2;559560/* init */561if (srcSize <= 2) return 0;562{ size_t const initError = BIT_initCStream(&bitC, dst, dstSize);563if (FSE_isError(initError)) return 0; /* not enough space available to write a bitstream */ }564565#define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))566567if (srcSize & 1) {568FSE_initCState2(&CState1, ct, *--ip);569FSE_initCState2(&CState2, ct, *--ip);570FSE_encodeSymbol(&bitC, &CState1, *--ip);571FSE_FLUSHBITS(&bitC);572} else {573FSE_initCState2(&CState2, ct, *--ip);574FSE_initCState2(&CState1, ct, *--ip);575}576577/* join to mod 4 */578srcSize -= 2;579if ((sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) { /* test bit 2 */580FSE_encodeSymbol(&bitC, &CState2, *--ip);581FSE_encodeSymbol(&bitC, &CState1, *--ip);582FSE_FLUSHBITS(&bitC);583}584585/* 2 or 4 encoding per loop */586while ( ip>istart ) {587588FSE_encodeSymbol(&bitC, &CState2, *--ip);589590if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 ) /* this test must be static */591FSE_FLUSHBITS(&bitC);592593FSE_encodeSymbol(&bitC, &CState1, *--ip);594595if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) { /* this test must be static */596FSE_encodeSymbol(&bitC, &CState2, *--ip);597FSE_encodeSymbol(&bitC, &CState1, *--ip);598}599600FSE_FLUSHBITS(&bitC);601}602603FSE_flushCState(&bitC, &CState2);604FSE_flushCState(&bitC, &CState1);605return BIT_closeCStream(&bitC);606}607608size_t FSE_compress_usingCTable (void* dst, size_t dstSize,609const void* src, size_t srcSize,610const FSE_CTable* ct)611{612unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize));613614if (fast)615return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1);616else617return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0);618}619620621size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }622623#endif /* FSE_COMMONDEFS_ONLY */624625626