Path: blob/main/sys/contrib/openzfs/module/zstd/lib/common/entropy_common.c
48774 views
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only1/* ******************************************************************2* Common functions of New Generation Entropy library3* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.4*5* You can contact the author at :6* - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy7* - Public forum : https://groups.google.com/forum/#!forum/lz4c8*9* This source code is licensed under both the BSD-style license (found in the10* LICENSE file in the root directory of this source tree) and the GPLv2 (found11* in the COPYING file in the root directory of this source tree).12* You may select, at your option, one of the above-listed licenses.13****************************************************************** */1415/* *************************************16* Dependencies17***************************************/18#include "mem.h"19#include "error_private.h" /* ERR_*, ERROR */20#define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */21#include "fse.h"22#define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */23#include "huf.h"242526/*=== Version ===*/27unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; }282930/*=== Error Management ===*/31unsigned FSE_isError(size_t code) { return ERR_isError(code); }32const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); }3334unsigned HUF_isError(size_t code) { return ERR_isError(code); }35const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }363738/*-**************************************************************39* FSE NCount encoding-decoding40****************************************************************/41size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,42const void* headerBuffer, size_t hbSize)43{44const BYTE* const istart = (const BYTE*) headerBuffer;45const BYTE* const iend = istart + hbSize;46const BYTE* ip = istart;47int nbBits;48int remaining;49int threshold;50U32 bitStream;51int bitCount;52unsigned charnum = 0;53int previous0 = 0;5455if (hbSize < 4) {56/* This function only works when hbSize >= 4 */57char buffer[4];58memset(buffer, 0, sizeof(buffer));59memcpy(buffer, headerBuffer, hbSize);60{ size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr,61buffer, sizeof(buffer));62if (FSE_isError(countSize)) return countSize;63if (countSize > hbSize) return ERROR(corruption_detected);64return countSize;65} }66assert(hbSize >= 4);6768/* init */69memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */70bitStream = MEM_readLE32(ip);71nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */72if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);73bitStream >>= 4;74bitCount = 4;75*tableLogPtr = nbBits;76remaining = (1<<nbBits)+1;77threshold = 1<<nbBits;78nbBits++;7980while ((remaining>1) & (charnum<=*maxSVPtr)) {81if (previous0) {82unsigned n0 = charnum;83while ((bitStream & 0xFFFF) == 0xFFFF) {84n0 += 24;85if (ip < iend-5) {86ip += 2;87bitStream = MEM_readLE32(ip) >> bitCount;88} else {89bitStream >>= 16;90bitCount += 16;91} }92while ((bitStream & 3) == 3) {93n0 += 3;94bitStream >>= 2;95bitCount += 2;96}97n0 += bitStream & 3;98bitCount += 2;99if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);100while (charnum < n0) normalizedCounter[charnum++] = 0;101if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {102assert((bitCount >> 3) <= 3); /* For first condition to work */103ip += bitCount>>3;104bitCount &= 7;105bitStream = MEM_readLE32(ip) >> bitCount;106} else {107bitStream >>= 2;108} }109{ int const max = (2*threshold-1) - remaining;110int count;111112if ((bitStream & (threshold-1)) < (U32)max) {113count = bitStream & (threshold-1);114bitCount += nbBits-1;115} else {116count = bitStream & (2*threshold-1);117if (count >= threshold) count -= max;118bitCount += nbBits;119}120121count--; /* extra accuracy */122remaining -= count < 0 ? -count : count; /* -1 means +1 */123normalizedCounter[charnum++] = (short)count;124previous0 = !count;125while (remaining < threshold) {126nbBits--;127threshold >>= 1;128}129130if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {131ip += bitCount>>3;132bitCount &= 7;133} else {134bitCount -= (int)(8 * (iend - 4 - ip));135ip = iend - 4;136}137bitStream = MEM_readLE32(ip) >> (bitCount & 31);138} } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */139if (remaining != 1) return ERROR(corruption_detected);140if (bitCount > 32) return ERROR(corruption_detected);141*maxSVPtr = charnum-1;142143ip += (bitCount+7)>>3;144return ip-istart;145}146147148/*! HUF_readStats() :149Read compact Huffman tree, saved by HUF_writeCTable().150`huffWeight` is destination buffer.151`rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32.152@return : size read from `src` , or an error Code .153Note : Needed by HUF_readCTable() and HUF_readDTableX?() .154*/155size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,156U32* nbSymbolsPtr, U32* tableLogPtr,157const void* src, size_t srcSize)158{159U32 weightTotal;160const BYTE* ip = (const BYTE*) src;161size_t iSize;162size_t oSize;163164if (!srcSize) return ERROR(srcSize_wrong);165iSize = ip[0];166/* memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */167168if (iSize >= 128) { /* special header */169oSize = iSize - 127;170iSize = ((oSize+1)/2);171if (iSize+1 > srcSize) return ERROR(srcSize_wrong);172if (oSize >= hwSize) return ERROR(corruption_detected);173ip += 1;174{ U32 n;175for (n=0; n<oSize; n+=2) {176huffWeight[n] = ip[n/2] >> 4;177huffWeight[n+1] = ip[n/2] & 15;178} } }179else { /* header compressed with FSE (normal case) */180FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)]; /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */181if (iSize+1 > srcSize) return ERROR(srcSize_wrong);182oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6); /* max (hwSize-1) values decoded, as last one is implied */183if (FSE_isError(oSize)) return oSize;184}185186/* collect weight stats */187memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));188weightTotal = 0;189{ U32 n; for (n=0; n<oSize; n++) {190if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected);191rankStats[huffWeight[n]]++;192weightTotal += (1 << huffWeight[n]) >> 1;193} }194if (weightTotal == 0) return ERROR(corruption_detected);195196/* get last non-null symbol weight (implied, total must be 2^n) */197{ U32 const tableLog = BIT_highbit32(weightTotal) + 1;198if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected);199*tableLogPtr = tableLog;200/* determine last weight */201{ U32 const total = 1 << tableLog;202U32 const rest = total - weightTotal;203U32 const verif = 1 << BIT_highbit32(rest);204U32 const lastWeight = BIT_highbit32(rest) + 1;205if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */206huffWeight[oSize] = (BYTE)lastWeight;207rankStats[lastWeight]++;208} }209210/* check tree construction validity */211if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */212213/* results */214*nbSymbolsPtr = (U32)(oSize+1);215return iSize+1;216}217218219