Path: blob/master/Utilities/cmzstd/lib/compress/fse_compress.c
5029 views
/* ******************************************************************1* FSE : Finite State Entropy encoder2* Copyright (c) Meta Platforms, Inc. and affiliates.3*4* You can contact the author at :5* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy6* - Public forum : https://groups.google.com/forum/#!forum/lz4c7*8* This source code is licensed under both the BSD-style license (found in the9* LICENSE file in the root directory of this source tree) and the GPLv2 (found10* in the COPYING file in the root directory of this source tree).11* You may select, at your option, one of the above-listed licenses.12****************************************************************** */1314/* **************************************************************15* Includes16****************************************************************/17#include "../common/compiler.h"18#include "../common/mem.h" /* U32, U16, etc. */19#include "../common/debug.h" /* assert, DEBUGLOG */20#include "hist.h" /* HIST_count_wksp */21#include "../common/bitstream.h"22#define FSE_STATIC_LINKING_ONLY23#include "../common/fse.h"24#include "../common/error_private.h"25#define ZSTD_DEPS_NEED_MALLOC26#define ZSTD_DEPS_NEED_MATH6427#include "../common/zstd_deps.h" /* ZSTD_memset */28#include "../common/bits.h" /* ZSTD_highbit32 */293031/* **************************************************************32* Error Management33****************************************************************/34#define FSE_isError ERR_isError353637/* **************************************************************38* Templates39****************************************************************/40/*41designed to be included42for type-specific functions (template emulation in C)43Objective is to write these functions only once, for improved maintenance44*/4546/* safety checks */47#ifndef FSE_FUNCTION_EXTENSION48# error "FSE_FUNCTION_EXTENSION must be defined"49#endif50#ifndef FSE_FUNCTION_TYPE51# error "FSE_FUNCTION_TYPE must be defined"52#endif5354/* Function names */55#define FSE_CAT(X,Y) X##Y56#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)57#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)585960/* Function templates */6162/* FSE_buildCTable_wksp() :63* Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).64* wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)`65* workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements66*/67size_t FSE_buildCTable_wksp(FSE_CTable* ct,68const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,69void* workSpace, size_t wkspSize)70{71U32 const tableSize = 1 << tableLog;72U32 const tableMask = tableSize - 1;73void* const ptr = ct;74U16* const tableU16 = ( (U16*) ptr) + 2;75void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;76FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);77U32 const step = FSE_TABLESTEP(tableSize);78U32 const maxSV1 = maxSymbolValue+1;7980U16* cumul = (U16*)workSpace; /* size = maxSV1 */81FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSV1+1)); /* size = tableSize */8283U32 highThreshold = tableSize-1;8485assert(((size_t)workSpace & 1) == 0); /* Must be 2 bytes-aligned */86if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge);87/* CTable header */88tableU16[-2] = (U16) tableLog;89tableU16[-1] = (U16) maxSymbolValue;90assert(tableLog < 16); /* required for threshold strategy to work */9192/* For explanations on how to distribute symbol values over the table :93* https://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */9495#ifdef __clang_analyzer__96ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */97#endif9899/* symbol start positions */100{ U32 u;101cumul[0] = 0;102for (u=1; u <= maxSV1; u++) {103if (normalizedCounter[u-1]==-1) { /* Low proba symbol */104cumul[u] = cumul[u-1] + 1;105tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);106} else {107assert(normalizedCounter[u-1] >= 0);108cumul[u] = cumul[u-1] + (U16)normalizedCounter[u-1];109assert(cumul[u] >= cumul[u-1]); /* no overflow */110} }111cumul[maxSV1] = (U16)(tableSize+1);112}113114/* Spread symbols */115if (highThreshold == tableSize - 1) {116/* Case for no low prob count symbols. Lay down 8 bytes at a time117* to reduce branch misses since we are operating on a small block118*/119BYTE* const spread = tableSymbol + tableSize; /* size = tableSize + 8 (may write beyond tableSize) */120{ U64 const add = 0x0101010101010101ull;121size_t pos = 0;122U64 sv = 0;123U32 s;124for (s=0; s<maxSV1; ++s, sv += add) {125int i;126int const n = normalizedCounter[s];127MEM_write64(spread + pos, sv);128for (i = 8; i < n; i += 8) {129MEM_write64(spread + pos + i, sv);130}131assert(n>=0);132pos += (size_t)n;133}134}135/* Spread symbols across the table. Lack of lowprob symbols means that136* we don't need variable sized inner loop, so we can unroll the loop and137* reduce branch misses.138*/139{ size_t position = 0;140size_t s;141size_t const unroll = 2; /* Experimentally determined optimal unroll */142assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */143for (s = 0; s < (size_t)tableSize; s += unroll) {144size_t u;145for (u = 0; u < unroll; ++u) {146size_t const uPosition = (position + (u * step)) & tableMask;147tableSymbol[uPosition] = spread[s + u];148}149position = (position + (unroll * step)) & tableMask;150}151assert(position == 0); /* Must have initialized all positions */152}153} else {154U32 position = 0;155U32 symbol;156for (symbol=0; symbol<maxSV1; symbol++) {157int nbOccurrences;158int const freq = normalizedCounter[symbol];159for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) {160tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;161position = (position + step) & tableMask;162while (position > highThreshold)163position = (position + step) & tableMask; /* Low proba area */164} }165assert(position==0); /* Must have initialized all positions */166}167168/* Build table */169{ U32 u; for (u=0; u<tableSize; u++) {170FSE_FUNCTION_TYPE s = tableSymbol[u]; /* note : static analyzer may not understand tableSymbol is properly initialized */171tableU16[cumul[s]++] = (U16) (tableSize+u); /* TableU16 : sorted by symbol order; gives next state value */172} }173174/* Build Symbol Transformation Table */175{ unsigned total = 0;176unsigned s;177for (s=0; s<=maxSymbolValue; s++) {178switch (normalizedCounter[s])179{180case 0:181/* filling nonetheless, for compatibility with FSE_getMaxNbBits() */182symbolTT[s].deltaNbBits = ((tableLog+1) << 16) - (1<<tableLog);183break;184185case -1:186case 1:187symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog);188assert(total <= INT_MAX);189symbolTT[s].deltaFindState = (int)(total - 1);190total ++;191break;192default :193assert(normalizedCounter[s] > 1);194{ U32 const maxBitsOut = tableLog - ZSTD_highbit32 ((U32)normalizedCounter[s]-1);195U32 const minStatePlus = (U32)normalizedCounter[s] << maxBitsOut;196symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;197symbolTT[s].deltaFindState = (int)(total - (unsigned)normalizedCounter[s]);198total += (unsigned)normalizedCounter[s];199} } } }200201#if 0 /* debug : symbol costs */202DEBUGLOG(5, "\n --- table statistics : ");203{ U32 symbol;204for (symbol=0; symbol<=maxSymbolValue; symbol++) {205DEBUGLOG(5, "%3u: w=%3i, maxBits=%u, fracBits=%.2f",206symbol, normalizedCounter[symbol],207FSE_getMaxNbBits(symbolTT, symbol),208(double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256);209} }210#endif211212return 0;213}214215216217#ifndef FSE_COMMONDEFS_ONLY218219/*-**************************************************************220* FSE NCount encoding221****************************************************************/222size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)223{224size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog225+ 4 /* bitCount initialized at 4 */226+ 2 /* first two symbols may use one additional bit each */) / 8)227+ 1 /* round up to whole nb bytes */228+ 2 /* additional two bytes for bitstream flush */;229return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */230}231232static size_t233FSE_writeNCount_generic (void* header, size_t headerBufferSize,234const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,235unsigned writeIsSafe)236{237BYTE* const ostart = (BYTE*) header;238BYTE* out = ostart;239BYTE* const oend = ostart + headerBufferSize;240int nbBits;241const int tableSize = 1 << tableLog;242int remaining;243int threshold;244U32 bitStream = 0;245int bitCount = 0;246unsigned symbol = 0;247unsigned const alphabetSize = maxSymbolValue + 1;248int previousIs0 = 0;249250/* Table Size */251bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;252bitCount += 4;253254/* Init */255remaining = tableSize+1; /* +1 for extra accuracy */256threshold = tableSize;257nbBits = (int)tableLog+1;258259while ((symbol < alphabetSize) && (remaining>1)) { /* stops at 1 */260if (previousIs0) {261unsigned start = symbol;262while ((symbol < alphabetSize) && !normalizedCounter[symbol]) symbol++;263if (symbol == alphabetSize) break; /* incorrect distribution */264while (symbol >= start+24) {265start+=24;266bitStream += 0xFFFFU << bitCount;267if ((!writeIsSafe) && (out > oend-2))268return ERROR(dstSize_tooSmall); /* Buffer overflow */269out[0] = (BYTE) bitStream;270out[1] = (BYTE)(bitStream>>8);271out+=2;272bitStream>>=16;273}274while (symbol >= start+3) {275start+=3;276bitStream += 3U << bitCount;277bitCount += 2;278}279bitStream += (symbol-start) << bitCount;280bitCount += 2;281if (bitCount>16) {282if ((!writeIsSafe) && (out > oend - 2))283return ERROR(dstSize_tooSmall); /* Buffer overflow */284out[0] = (BYTE)bitStream;285out[1] = (BYTE)(bitStream>>8);286out += 2;287bitStream >>= 16;288bitCount -= 16;289} }290{ int count = normalizedCounter[symbol++];291int const max = (2*threshold-1) - remaining;292remaining -= count < 0 ? -count : count;293count++; /* +1 for extra accuracy */294if (count>=threshold)295count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */296bitStream += (U32)count << bitCount;297bitCount += nbBits;298bitCount -= (count<max);299previousIs0 = (count==1);300if (remaining<1) return ERROR(GENERIC);301while (remaining<threshold) { nbBits--; threshold>>=1; }302}303if (bitCount>16) {304if ((!writeIsSafe) && (out > oend - 2))305return ERROR(dstSize_tooSmall); /* Buffer overflow */306out[0] = (BYTE)bitStream;307out[1] = (BYTE)(bitStream>>8);308out += 2;309bitStream >>= 16;310bitCount -= 16;311} }312313if (remaining != 1)314return ERROR(GENERIC); /* incorrect normalized distribution */315assert(symbol <= alphabetSize);316317/* flush remaining bitStream */318if ((!writeIsSafe) && (out > oend - 2))319return ERROR(dstSize_tooSmall); /* Buffer overflow */320out[0] = (BYTE)bitStream;321out[1] = (BYTE)(bitStream>>8);322out+= (bitCount+7) /8;323324assert(out >= ostart);325return (size_t)(out-ostart);326}327328329size_t FSE_writeNCount (void* buffer, size_t bufferSize,330const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)331{332if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported */333if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */334335if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))336return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);337338return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1 /* write in buffer is safe */);339}340341342/*-**************************************************************343* FSE Compression Code344****************************************************************/345346/* provides the minimum logSize to safely represent a distribution */347static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)348{349U32 minBitsSrc = ZSTD_highbit32((U32)(srcSize)) + 1;350U32 minBitsSymbols = ZSTD_highbit32(maxSymbolValue) + 2;351U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;352assert(srcSize > 1); /* Not supported, RLE should be used instead */353return minBits;354}355356unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)357{358U32 maxBitsSrc = ZSTD_highbit32((U32)(srcSize - 1)) - minus;359U32 tableLog = maxTableLog;360U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);361assert(srcSize > 1); /* Not supported, RLE should be used instead */362if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;363if (maxBitsSrc < tableLog) tableLog = maxBitsSrc; /* Accuracy can be reduced */364if (minBits > tableLog) tableLog = minBits; /* Need a minimum to safely represent all symbol values */365if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG;366if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG;367return tableLog;368}369370unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)371{372return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2);373}374375/* Secondary normalization method.376To be used when primary method fails. */377378static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue, short lowProbCount)379{380short const NOT_YET_ASSIGNED = -2;381U32 s;382U32 distributed = 0;383U32 ToDistribute;384385/* Init */386U32 const lowThreshold = (U32)(total >> tableLog);387U32 lowOne = (U32)((total * 3) >> (tableLog + 1));388389for (s=0; s<=maxSymbolValue; s++) {390if (count[s] == 0) {391norm[s]=0;392continue;393}394if (count[s] <= lowThreshold) {395norm[s] = lowProbCount;396distributed++;397total -= count[s];398continue;399}400if (count[s] <= lowOne) {401norm[s] = 1;402distributed++;403total -= count[s];404continue;405}406407norm[s]=NOT_YET_ASSIGNED;408}409ToDistribute = (1 << tableLog) - distributed;410411if (ToDistribute == 0)412return 0;413414if ((total / ToDistribute) > lowOne) {415/* risk of rounding to zero */416lowOne = (U32)((total * 3) / (ToDistribute * 2));417for (s=0; s<=maxSymbolValue; s++) {418if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) {419norm[s] = 1;420distributed++;421total -= count[s];422continue;423} }424ToDistribute = (1 << tableLog) - distributed;425}426427if (distributed == maxSymbolValue+1) {428/* all values are pretty poor;429probably incompressible data (should have already been detected);430find max, then give all remaining points to max */431U32 maxV = 0, maxC = 0;432for (s=0; s<=maxSymbolValue; s++)433if (count[s] > maxC) { maxV=s; maxC=count[s]; }434norm[maxV] += (short)ToDistribute;435return 0;436}437438if (total == 0) {439/* all of the symbols were low enough for the lowOne or lowThreshold */440for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1))441if (norm[s] > 0) { ToDistribute--; norm[s]++; }442return 0;443}444445{ U64 const vStepLog = 62 - tableLog;446U64 const mid = (1ULL << (vStepLog-1)) - 1;447U64 const rStep = ZSTD_div64((((U64)1<<vStepLog) * ToDistribute) + mid, (U32)total); /* scale on remaining */448U64 tmpTotal = mid;449for (s=0; s<=maxSymbolValue; s++) {450if (norm[s]==NOT_YET_ASSIGNED) {451U64 const end = tmpTotal + (count[s] * rStep);452U32 const sStart = (U32)(tmpTotal >> vStepLog);453U32 const sEnd = (U32)(end >> vStepLog);454U32 const weight = sEnd - sStart;455if (weight < 1)456return ERROR(GENERIC);457norm[s] = (short)weight;458tmpTotal = end;459} } }460461return 0;462}463464size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,465const unsigned* count, size_t total,466unsigned maxSymbolValue, unsigned useLowProbCount)467{468/* Sanity checks */469if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;470if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported size */471if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported size */472if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */473474{ static U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };475short const lowProbCount = useLowProbCount ? -1 : 1;476U64 const scale = 62 - tableLog;477U64 const step = ZSTD_div64((U64)1<<62, (U32)total); /* <== here, one division ! */478U64 const vStep = 1ULL<<(scale-20);479int stillToDistribute = 1<<tableLog;480unsigned s;481unsigned largest=0;482short largestP=0;483U32 lowThreshold = (U32)(total >> tableLog);484485for (s=0; s<=maxSymbolValue; s++) {486if (count[s] == total) return 0; /* rle special case */487if (count[s] == 0) { normalizedCounter[s]=0; continue; }488if (count[s] <= lowThreshold) {489normalizedCounter[s] = lowProbCount;490stillToDistribute--;491} else {492short proba = (short)((count[s]*step) >> scale);493if (proba<8) {494U64 restToBeat = vStep * rtbTable[proba];495proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat;496}497if (proba > largestP) { largestP=proba; largest=s; }498normalizedCounter[s] = proba;499stillToDistribute -= proba;500} }501if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {502/* corner case, need another normalization method */503size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue, lowProbCount);504if (FSE_isError(errorCode)) return errorCode;505}506else normalizedCounter[largest] += (short)stillToDistribute;507}508509#if 0510{ /* Print Table (debug) */511U32 s;512U32 nTotal = 0;513for (s=0; s<=maxSymbolValue; s++)514RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]);515for (s=0; s<=maxSymbolValue; s++)516nTotal += abs(normalizedCounter[s]);517if (nTotal != (1U<<tableLog))518RAWLOG(2, "Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);519getchar();520}521#endif522523return tableLog;524}525526/* fake FSE_CTable, for rle input (always same symbol) */527size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)528{529void* ptr = ct;530U16* tableU16 = ( (U16*) ptr) + 2;531void* FSCTptr = (U32*)ptr + 2;532FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) FSCTptr;533534/* header */535tableU16[-2] = (U16) 0;536tableU16[-1] = (U16) symbolValue;537538/* Build table */539tableU16[0] = 0;540tableU16[1] = 0; /* just in case */541542/* Build Symbol Transformation Table */543symbolTT[symbolValue].deltaNbBits = 0;544symbolTT[symbolValue].deltaFindState = 0;545546return 0;547}548549550static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize,551const void* src, size_t srcSize,552const FSE_CTable* ct, const unsigned fast)553{554const BYTE* const istart = (const BYTE*) src;555const BYTE* const iend = istart + srcSize;556const BYTE* ip=iend;557558BIT_CStream_t bitC;559FSE_CState_t CState1, CState2;560561/* init */562if (srcSize <= 2) return 0;563{ size_t const initError = BIT_initCStream(&bitC, dst, dstSize);564if (FSE_isError(initError)) return 0; /* not enough space available to write a bitstream */ }565566#define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))567568if (srcSize & 1) {569FSE_initCState2(&CState1, ct, *--ip);570FSE_initCState2(&CState2, ct, *--ip);571FSE_encodeSymbol(&bitC, &CState1, *--ip);572FSE_FLUSHBITS(&bitC);573} else {574FSE_initCState2(&CState2, ct, *--ip);575FSE_initCState2(&CState1, ct, *--ip);576}577578/* join to mod 4 */579srcSize -= 2;580if ((sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) { /* test bit 2 */581FSE_encodeSymbol(&bitC, &CState2, *--ip);582FSE_encodeSymbol(&bitC, &CState1, *--ip);583FSE_FLUSHBITS(&bitC);584}585586/* 2 or 4 encoding per loop */587while ( ip>istart ) {588589FSE_encodeSymbol(&bitC, &CState2, *--ip);590591if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 ) /* this test must be static */592FSE_FLUSHBITS(&bitC);593594FSE_encodeSymbol(&bitC, &CState1, *--ip);595596if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) { /* this test must be static */597FSE_encodeSymbol(&bitC, &CState2, *--ip);598FSE_encodeSymbol(&bitC, &CState1, *--ip);599}600601FSE_FLUSHBITS(&bitC);602}603604FSE_flushCState(&bitC, &CState2);605FSE_flushCState(&bitC, &CState1);606return BIT_closeCStream(&bitC);607}608609size_t FSE_compress_usingCTable (void* dst, size_t dstSize,610const void* src, size_t srcSize,611const FSE_CTable* ct)612{613unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize));614615if (fast)616return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1);617else618return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0);619}620621622size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }623624#endif /* FSE_COMMONDEFS_ONLY */625626627