Path: blob/master/Utilities/cmzstd/lib/common/bitstream.h
4998 views
/* ******************************************************************1* bitstream2* Part of FSE library3* Copyright (c) Meta Platforms, Inc. and affiliates.4*5* You can contact the author at :6* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy7*8* This source code is licensed under both the BSD-style license (found in the9* LICENSE file in the root directory of this source tree) and the GPLv2 (found10* in the COPYING file in the root directory of this source tree).11* You may select, at your option, one of the above-listed licenses.12****************************************************************** */13#ifndef BITSTREAM_H_MODULE14#define BITSTREAM_H_MODULE1516#include <assert.h>1718/*19* This API consists of small unitary functions, which must be inlined for best performance.20* Since link-time-optimization is not available for all compilers,21* these functions are defined into a .h to be included.22*/2324/*-****************************************25* Dependencies26******************************************/27#include "mem.h" /* unaligned access routines */28#include "compiler.h" /* UNLIKELY() */29#include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */30#include "error_private.h" /* error codes and messages */31#include "bits.h" /* ZSTD_highbit32 */3233/*=========================================34* Target specific35=========================================*/36#ifndef ZSTD_NO_INTRINSICS37# if (defined(__BMI__) || defined(__BMI2__)) && defined(__GNUC__)38# include <immintrin.h> /* support for bextr (experimental)/bzhi */39# elif defined(__ICCARM__)40# include <intrinsics.h>41# endif42#endif4344#define STREAM_ACCUMULATOR_MIN_32 2545#define STREAM_ACCUMULATOR_MIN_64 5746#define STREAM_ACCUMULATOR_MIN ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64))474849/*-******************************************50* bitStream encoding API (write forward)51********************************************/52typedef size_t BitContainerType;53/* bitStream can mix input from multiple sources.54* A critical property of these streams is that they encode and decode in **reverse** direction.55* So the first bit sequence you add will be the last to be read, like a LIFO stack.56*/57typedef struct {58BitContainerType bitContainer;59unsigned bitPos;60char* startPtr;61char* ptr;62char* endPtr;63} BIT_CStream_t;6465MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity);66MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, BitContainerType value, unsigned nbBits);67MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC);68MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);6970/* Start with initCStream, providing the size of buffer to write into.71* bitStream will never write outside of this buffer.72* `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code.73*74* bits are first added to a local register.75* Local register is BitContainerType, 64-bits on 64-bits systems, or 32-bits on 32-bits systems.76* Writing data into memory is an explicit operation, performed by the flushBits function.77* Hence keep track how many bits are potentially stored into local register to avoid register overflow.78* After a flushBits, a maximum of 7 bits might still be stored into local register.79*80* Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers.81*82* Last operation is to close the bitStream.83* The function returns the final size of CStream in bytes.84* If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable)85*/868788/*-********************************************89* bitStream decoding API (read backward)90**********************************************/91typedef struct {92BitContainerType bitContainer;93unsigned bitsConsumed;94const char* ptr;95const char* start;96const char* limitPtr;97} BIT_DStream_t;9899typedef enum { BIT_DStream_unfinished = 0, /* fully refilled */100BIT_DStream_endOfBuffer = 1, /* still some bits left in bitstream */101BIT_DStream_completed = 2, /* bitstream entirely consumed, bit-exact */102BIT_DStream_overflow = 3 /* user requested more bits than present in bitstream */103} BIT_DStream_status; /* result of BIT_reloadDStream() */104105MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);106MEM_STATIC BitContainerType BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);107MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);108MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);109110111/* Start by invoking BIT_initDStream().112* A chunk of the bitStream is then stored into a local register.113* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (BitContainerType).114* You can then retrieve bitFields stored into the local register, **in reverse order**.115* Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.116* A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.117* Otherwise, it can be less than that, so proceed accordingly.118* Checking if DStream has reached its end can be performed with BIT_endOfDStream().119*/120121122/*-****************************************123* unsafe API124******************************************/125MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, BitContainerType value, unsigned nbBits);126/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */127128MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);129/* unsafe version; does not check buffer overflow */130131MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);132/* faster, but works only if nbBits >= 1 */133134/*===== Local Constants =====*/135static const unsigned BIT_mask[] = {1360, 1, 3, 7, 0xF, 0x1F,1370x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF,1380xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF,1390x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF,1400xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF,1410x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */142#define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0]))143144/*-**************************************************************145* bitStream encoding146****************************************************************/147/*! BIT_initCStream() :148* `dstCapacity` must be > sizeof(size_t)149* @return : 0 if success,150* otherwise an error code (can be tested using ERR_isError()) */151MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,152void* startPtr, size_t dstCapacity)153{154bitC->bitContainer = 0;155bitC->bitPos = 0;156bitC->startPtr = (char*)startPtr;157bitC->ptr = bitC->startPtr;158bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer);159if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall);160return 0;161}162163FORCE_INLINE_TEMPLATE BitContainerType BIT_getLowerBits(BitContainerType bitContainer, U32 const nbBits)164{165#if STATIC_BMI2 && !defined(ZSTD_NO_INTRINSICS)166# if (defined(__x86_64__) || defined(_M_X64)) && !defined(__ILP32__)167return _bzhi_u64(bitContainer, nbBits);168# else169DEBUG_STATIC_ASSERT(sizeof(bitContainer) == sizeof(U32));170return _bzhi_u32(bitContainer, nbBits);171# endif172#else173assert(nbBits < BIT_MASK_SIZE);174return bitContainer & BIT_mask[nbBits];175#endif176}177178/*! BIT_addBits() :179* can add up to 31 bits into `bitC`.180* Note : does not check for register overflow ! */181MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,182BitContainerType value, unsigned nbBits)183{184DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);185assert(nbBits < BIT_MASK_SIZE);186assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);187bitC->bitContainer |= BIT_getLowerBits(value, nbBits) << bitC->bitPos;188bitC->bitPos += nbBits;189}190191/*! BIT_addBitsFast() :192* works only if `value` is _clean_,193* meaning all high bits above nbBits are 0 */194MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,195BitContainerType value, unsigned nbBits)196{197assert((value>>nbBits) == 0);198assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);199bitC->bitContainer |= value << bitC->bitPos;200bitC->bitPos += nbBits;201}202203/*! BIT_flushBitsFast() :204* assumption : bitContainer has not overflowed205* unsafe version; does not check buffer overflow */206MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)207{208size_t const nbBytes = bitC->bitPos >> 3;209assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);210assert(bitC->ptr <= bitC->endPtr);211MEM_writeLEST(bitC->ptr, bitC->bitContainer);212bitC->ptr += nbBytes;213bitC->bitPos &= 7;214bitC->bitContainer >>= nbBytes*8;215}216217/*! BIT_flushBits() :218* assumption : bitContainer has not overflowed219* safe version; check for buffer overflow, and prevents it.220* note : does not signal buffer overflow.221* overflow will be revealed later on using BIT_closeCStream() */222MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)223{224size_t const nbBytes = bitC->bitPos >> 3;225assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);226assert(bitC->ptr <= bitC->endPtr);227MEM_writeLEST(bitC->ptr, bitC->bitContainer);228bitC->ptr += nbBytes;229if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;230bitC->bitPos &= 7;231bitC->bitContainer >>= nbBytes*8;232}233234/*! BIT_closeCStream() :235* @return : size of CStream, in bytes,236* or 0 if it could not fit into dstBuffer */237MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)238{239BIT_addBitsFast(bitC, 1, 1); /* endMark */240BIT_flushBits(bitC);241if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */242return (size_t)(bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);243}244245246/*-********************************************************247* bitStream decoding248**********************************************************/249/*! BIT_initDStream() :250* Initialize a BIT_DStream_t.251* `bitD` : a pointer to an already allocated BIT_DStream_t structure.252* `srcSize` must be the *exact* size of the bitStream, in bytes.253* @return : size of stream (== srcSize), or an errorCode if a problem is detected254*/255MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)256{257if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }258259bitD->start = (const char*)srcBuffer;260bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);261262if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */263bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);264bitD->bitContainer = MEM_readLEST(bitD->ptr);265{ BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];266bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */267if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }268} else {269bitD->ptr = bitD->start;270bitD->bitContainer = *(const BYTE*)(bitD->start);271switch(srcSize)272{273case 7: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);274ZSTD_FALLTHROUGH;275276case 6: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);277ZSTD_FALLTHROUGH;278279case 5: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);280ZSTD_FALLTHROUGH;281282case 4: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[3]) << 24;283ZSTD_FALLTHROUGH;284285case 3: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[2]) << 16;286ZSTD_FALLTHROUGH;287288case 2: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[1]) << 8;289ZSTD_FALLTHROUGH;290291default: break;292}293{ BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];294bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0;295if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */296}297bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;298}299300return srcSize;301}302303FORCE_INLINE_TEMPLATE BitContainerType BIT_getUpperBits(BitContainerType bitContainer, U32 const start)304{305return bitContainer >> start;306}307308FORCE_INLINE_TEMPLATE BitContainerType BIT_getMiddleBits(BitContainerType bitContainer, U32 const start, U32 const nbBits)309{310U32 const regMask = sizeof(bitContainer)*8 - 1;311/* if start > regMask, bitstream is corrupted, and result is undefined */312assert(nbBits < BIT_MASK_SIZE);313/* x86 transform & ((1 << nbBits) - 1) to bzhi instruction, it is better314* than accessing memory. When bmi2 instruction is not present, we consider315* such cpus old (pre-Haswell, 2013) and their performance is not of that316* importance.317*/318#if defined(__x86_64__) || defined(_M_X64)319return (bitContainer >> (start & regMask)) & ((((U64)1) << nbBits) - 1);320#else321return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];322#endif323}324325/*! BIT_lookBits() :326* Provides next n bits from local register.327* local register is not modified.328* On 32-bits, maxNbBits==24.329* On 64-bits, maxNbBits==56.330* @return : value extracted */331FORCE_INLINE_TEMPLATE BitContainerType BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)332{333/* arbitrate between double-shift and shift+mask */334#if 1335/* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8,336* bitstream is likely corrupted, and result is undefined */337return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);338#else339/* this code path is slower on my os-x laptop */340U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;341return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);342#endif343}344345/*! BIT_lookBitsFast() :346* unsafe version; only works if nbBits >= 1 */347MEM_STATIC BitContainerType BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)348{349U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;350assert(nbBits >= 1);351return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);352}353354FORCE_INLINE_TEMPLATE void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)355{356bitD->bitsConsumed += nbBits;357}358359/*! BIT_readBits() :360* Read (consume) next n bits from local register and update.361* Pay attention to not read more than nbBits contained into local register.362* @return : extracted value. */363FORCE_INLINE_TEMPLATE BitContainerType BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)364{365BitContainerType const value = BIT_lookBits(bitD, nbBits);366BIT_skipBits(bitD, nbBits);367return value;368}369370/*! BIT_readBitsFast() :371* unsafe version; only works if nbBits >= 1 */372MEM_STATIC BitContainerType BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)373{374BitContainerType const value = BIT_lookBitsFast(bitD, nbBits);375assert(nbBits >= 1);376BIT_skipBits(bitD, nbBits);377return value;378}379380/*! BIT_reloadDStream_internal() :381* Simple variant of BIT_reloadDStream(), with two conditions:382* 1. bitstream is valid : bitsConsumed <= sizeof(bitD->bitContainer)*8383* 2. look window is valid after shifted down : bitD->ptr >= bitD->start384*/385MEM_STATIC BIT_DStream_status BIT_reloadDStream_internal(BIT_DStream_t* bitD)386{387assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);388bitD->ptr -= bitD->bitsConsumed >> 3;389assert(bitD->ptr >= bitD->start);390bitD->bitsConsumed &= 7;391bitD->bitContainer = MEM_readLEST(bitD->ptr);392return BIT_DStream_unfinished;393}394395/*! BIT_reloadDStreamFast() :396* Similar to BIT_reloadDStream(), but with two differences:397* 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!398* 2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this399* point you must use BIT_reloadDStream() to reload.400*/401MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)402{403if (UNLIKELY(bitD->ptr < bitD->limitPtr))404return BIT_DStream_overflow;405return BIT_reloadDStream_internal(bitD);406}407408/*! BIT_reloadDStream() :409* Refill `bitD` from buffer previously set in BIT_initDStream() .410* This function is safe, it guarantees it will not never beyond src buffer.411* @return : status of `BIT_DStream_t` internal register.412* when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */413FORCE_INLINE_TEMPLATE BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)414{415/* note : once in overflow mode, a bitstream remains in this mode until it's reset */416if (UNLIKELY(bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))) {417static const BitContainerType zeroFilled = 0;418bitD->ptr = (const char*)&zeroFilled; /* aliasing is allowed for char */419/* overflow detected, erroneous scenario or end of stream: no update */420return BIT_DStream_overflow;421}422423assert(bitD->ptr >= bitD->start);424425if (bitD->ptr >= bitD->limitPtr) {426return BIT_reloadDStream_internal(bitD);427}428if (bitD->ptr == bitD->start) {429/* reached end of bitStream => no update */430if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;431return BIT_DStream_completed;432}433/* start < ptr < limitPtr => cautious update */434{ U32 nbBytes = bitD->bitsConsumed >> 3;435BIT_DStream_status result = BIT_DStream_unfinished;436if (bitD->ptr - nbBytes < bitD->start) {437nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */438result = BIT_DStream_endOfBuffer;439}440bitD->ptr -= nbBytes;441bitD->bitsConsumed -= nbBytes*8;442bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */443return result;444}445}446447/*! BIT_endOfDStream() :448* @return : 1 if DStream has _exactly_ reached its end (all bits consumed).449*/450MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)451{452return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));453}454455#endif /* BITSTREAM_H_MODULE */456457458