Path: blob/main/sys/contrib/zstd/lib/common/fse_decompress.c
48378 views
/* ******************************************************************1* FSE : Finite State Entropy decoder2* Copyright (c) Yann Collet, Facebook, Inc.3*4* You can contact the author at :5* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy6* - Public forum : https://groups.google.com/forum/#!forum/lz4c7*8* This source code is licensed under both the BSD-style license (found in the9* LICENSE file in the root directory of this source tree) and the GPLv2 (found10* in the COPYING file in the root directory of this source tree).11* You may select, at your option, one of the above-listed licenses.12****************************************************************** */131415/* **************************************************************16* Includes17****************************************************************/18#include "debug.h" /* assert */19#include "bitstream.h"20#include "compiler.h"21#define FSE_STATIC_LINKING_ONLY22#include "fse.h"23#include "error_private.h"24#define ZSTD_DEPS_NEED_MALLOC25#include "zstd_deps.h"262728/* **************************************************************29* Error Management30****************************************************************/31#define FSE_isError ERR_isError32#define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */333435/* **************************************************************36* Templates37****************************************************************/38/*39designed to be included40for type-specific functions (template emulation in C)41Objective is to write these functions only once, for improved maintenance42*/4344/* safety checks */45#ifndef FSE_FUNCTION_EXTENSION46# error "FSE_FUNCTION_EXTENSION must be defined"47#endif48#ifndef FSE_FUNCTION_TYPE49# error "FSE_FUNCTION_TYPE must be defined"50#endif5152/* Function names */53#define FSE_CAT(X,Y) X##Y54#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)55#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)565758/* Function templates */59FSE_DTable* FSE_createDTable (unsigned tableLog)60{61if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;62return (FSE_DTable*)ZSTD_malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );63}6465void FSE_freeDTable (FSE_DTable* dt)66{67ZSTD_free(dt);68}6970static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)71{72void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */73FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr);74U16* symbolNext = (U16*)workSpace;75BYTE* spread = (BYTE*)(symbolNext + maxSymbolValue + 1);7677U32 const maxSV1 = maxSymbolValue + 1;78U32 const tableSize = 1 << tableLog;79U32 highThreshold = tableSize-1;8081/* Sanity Checks */82if (FSE_BUILD_DTABLE_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(maxSymbolValue_tooLarge);83if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);84if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);8586/* Init, lay down lowprob symbols */87{ FSE_DTableHeader DTableH;88DTableH.tableLog = (U16)tableLog;89DTableH.fastMode = 1;90{ S16 const largeLimit= (S16)(1 << (tableLog-1));91U32 s;92for (s=0; s<maxSV1; s++) {93if (normalizedCounter[s]==-1) {94tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;95symbolNext[s] = 1;96} else {97if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;98symbolNext[s] = normalizedCounter[s];99} } }100ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));101}102103/* Spread symbols */104if (highThreshold == tableSize - 1) {105size_t const tableMask = tableSize-1;106size_t const step = FSE_TABLESTEP(tableSize);107/* First lay down the symbols in order.108* We use a uint64_t to lay down 8 bytes at a time. This reduces branch109* misses since small blocks generally have small table logs, so nearly110* all symbols have counts <= 8. We ensure we have 8 bytes at the end of111* our buffer to handle the over-write.112*/113{114U64 const add = 0x0101010101010101ull;115size_t pos = 0;116U64 sv = 0;117U32 s;118for (s=0; s<maxSV1; ++s, sv += add) {119int i;120int const n = normalizedCounter[s];121MEM_write64(spread + pos, sv);122for (i = 8; i < n; i += 8) {123MEM_write64(spread + pos + i, sv);124}125pos += n;126}127}128/* Now we spread those positions across the table.129* The benefit of doing it in two stages is that we avoid the the130* variable size inner loop, which caused lots of branch misses.131* Now we can run through all the positions without any branch misses.132* We unroll the loop twice, since that is what emperically worked best.133*/134{135size_t position = 0;136size_t s;137size_t const unroll = 2;138assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */139for (s = 0; s < (size_t)tableSize; s += unroll) {140size_t u;141for (u = 0; u < unroll; ++u) {142size_t const uPosition = (position + (u * step)) & tableMask;143tableDecode[uPosition].symbol = spread[s + u];144}145position = (position + (unroll * step)) & tableMask;146}147assert(position == 0);148}149} else {150U32 const tableMask = tableSize-1;151U32 const step = FSE_TABLESTEP(tableSize);152U32 s, position = 0;153for (s=0; s<maxSV1; s++) {154int i;155for (i=0; i<normalizedCounter[s]; i++) {156tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;157position = (position + step) & tableMask;158while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */159} }160if (position!=0) return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */161}162163/* Build Decoding table */164{ U32 u;165for (u=0; u<tableSize; u++) {166FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);167U32 const nextState = symbolNext[symbol]++;168tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );169tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);170} }171172return 0;173}174175size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)176{177return FSE_buildDTable_internal(dt, normalizedCounter, maxSymbolValue, tableLog, workSpace, wkspSize);178}179180181#ifndef FSE_COMMONDEFS_ONLY182183/*-*******************************************************184* Decompression (Byte symbols)185*********************************************************/186size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)187{188void* ptr = dt;189FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;190void* dPtr = dt + 1;191FSE_decode_t* const cell = (FSE_decode_t*)dPtr;192193DTableH->tableLog = 0;194DTableH->fastMode = 0;195196cell->newState = 0;197cell->symbol = symbolValue;198cell->nbBits = 0;199200return 0;201}202203204size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)205{206void* ptr = dt;207FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;208void* dPtr = dt + 1;209FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr;210const unsigned tableSize = 1 << nbBits;211const unsigned tableMask = tableSize - 1;212const unsigned maxSV1 = tableMask+1;213unsigned s;214215/* Sanity checks */216if (nbBits < 1) return ERROR(GENERIC); /* min size */217218/* Build Decoding Table */219DTableH->tableLog = (U16)nbBits;220DTableH->fastMode = 1;221for (s=0; s<maxSV1; s++) {222dinfo[s].newState = 0;223dinfo[s].symbol = (BYTE)s;224dinfo[s].nbBits = (BYTE)nbBits;225}226227return 0;228}229230FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(231void* dst, size_t maxDstSize,232const void* cSrc, size_t cSrcSize,233const FSE_DTable* dt, const unsigned fast)234{235BYTE* const ostart = (BYTE*) dst;236BYTE* op = ostart;237BYTE* const omax = op + maxDstSize;238BYTE* const olimit = omax-3;239240BIT_DStream_t bitD;241FSE_DState_t state1;242FSE_DState_t state2;243244/* Init */245CHECK_F(BIT_initDStream(&bitD, cSrc, cSrcSize));246247FSE_initDState(&state1, &bitD, dt);248FSE_initDState(&state2, &bitD, dt);249250#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)251252/* 4 symbols per loop */253for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) & (op<olimit) ; op+=4) {254op[0] = FSE_GETSYMBOL(&state1);255256if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */257BIT_reloadDStream(&bitD);258259op[1] = FSE_GETSYMBOL(&state2);260261if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */262{ if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } }263264op[2] = FSE_GETSYMBOL(&state1);265266if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */267BIT_reloadDStream(&bitD);268269op[3] = FSE_GETSYMBOL(&state2);270}271272/* tail */273/* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */274while (1) {275if (op>(omax-2)) return ERROR(dstSize_tooSmall);276*op++ = FSE_GETSYMBOL(&state1);277if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) {278*op++ = FSE_GETSYMBOL(&state2);279break;280}281282if (op>(omax-2)) return ERROR(dstSize_tooSmall);283*op++ = FSE_GETSYMBOL(&state2);284if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) {285*op++ = FSE_GETSYMBOL(&state1);286break;287} }288289return op-ostart;290}291292293size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,294const void* cSrc, size_t cSrcSize,295const FSE_DTable* dt)296{297const void* ptr = dt;298const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;299const U32 fastMode = DTableH->fastMode;300301/* select fast mode (static) */302if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);303return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);304}305306307size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)308{309return FSE_decompress_wksp_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, /* bmi2 */ 0);310}311312typedef struct {313short ncount[FSE_MAX_SYMBOL_VALUE + 1];314FSE_DTable dtable[1]; /* Dynamically sized */315} FSE_DecompressWksp;316317318FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(319void* dst, size_t dstCapacity,320const void* cSrc, size_t cSrcSize,321unsigned maxLog, void* workSpace, size_t wkspSize,322int bmi2)323{324const BYTE* const istart = (const BYTE*)cSrc;325const BYTE* ip = istart;326unsigned tableLog;327unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;328FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace;329330DEBUG_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0);331if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC);332333/* normal FSE decoding mode */334{335size_t const NCountLength = FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);336if (FSE_isError(NCountLength)) return NCountLength;337if (tableLog > maxLog) return ERROR(tableLog_tooLarge);338assert(NCountLength <= cSrcSize);339ip += NCountLength;340cSrcSize -= NCountLength;341}342343if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge);344workSpace = wksp->dtable + FSE_DTABLE_SIZE_U32(tableLog);345wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);346347CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) );348349{350const void* ptr = wksp->dtable;351const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;352const U32 fastMode = DTableH->fastMode;353354/* select fast mode (static) */355if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 1);356return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 0);357}358}359360/* Avoids the FORCE_INLINE of the _body() function. */361static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)362{363return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 0);364}365366#if DYNAMIC_BMI2367BMI2_TARGET_ATTRIBUTE static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)368{369return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1);370}371#endif372373size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2)374{375#if DYNAMIC_BMI2376if (bmi2) {377return FSE_decompress_wksp_body_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);378}379#endif380(void)bmi2;381return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);382}383384385typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];386387#ifndef ZSTD_NO_UNUSED_FUNCTIONS388size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) {389U32 wksp[FSE_BUILD_DTABLE_WKSP_SIZE_U32(FSE_TABLELOG_ABSOLUTE_MAX, FSE_MAX_SYMBOL_VALUE)];390return FSE_buildDTable_wksp(dt, normalizedCounter, maxSymbolValue, tableLog, wksp, sizeof(wksp));391}392393size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize)394{395/* Static analyzer seems unable to understand this table will be properly initialized later */396U32 wksp[FSE_DECOMPRESS_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];397return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, FSE_MAX_TABLELOG, wksp, sizeof(wksp));398}399#endif400401402#endif /* FSE_COMMONDEFS_ONLY */403404405