Path: blob/main/sys/contrib/zstd/lib/common/bitstream.h
48378 views
/* ******************************************************************1* bitstream2* Part of FSE library3* Copyright (c) Yann Collet, Facebook, Inc.4*5* You can contact the author at :6* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy7*8* This source code is licensed under both the BSD-style license (found in the9* LICENSE file in the root directory of this source tree) and the GPLv2 (found10* in the COPYING file in the root directory of this source tree).11* You may select, at your option, one of the above-listed licenses.12****************************************************************** */13#ifndef BITSTREAM_H_MODULE14#define BITSTREAM_H_MODULE1516#if defined (__cplusplus)17extern "C" {18#endif19/*20* This API consists of small unitary functions, which must be inlined for best performance.21* Since link-time-optimization is not available for all compilers,22* these functions are defined into a .h to be included.23*/2425/*-****************************************26* Dependencies27******************************************/28#include "mem.h" /* unaligned access routines */29#include "compiler.h" /* UNLIKELY() */30#include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */31#include "error_private.h" /* error codes and messages */323334/*=========================================35* Target specific36=========================================*/37#ifndef ZSTD_NO_INTRINSICS38# if defined(__BMI__) && defined(__GNUC__)39# include <immintrin.h> /* support for bextr (experimental) */40# elif defined(__ICCARM__)41# include <intrinsics.h>42# endif43#endif4445#define STREAM_ACCUMULATOR_MIN_32 2546#define STREAM_ACCUMULATOR_MIN_64 5747#define STREAM_ACCUMULATOR_MIN ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64))484950/*-******************************************51* bitStream encoding API (write forward)52********************************************/53/* bitStream can mix input from multiple sources.54* A critical property of these streams is that they encode and decode in **reverse** direction.55* So the first bit sequence you add will be the last to be read, like a LIFO stack.56*/57typedef struct {58size_t bitContainer;59unsigned bitPos;60char* startPtr;61char* ptr;62char* endPtr;63} BIT_CStream_t;6465MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity);66MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits);67MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC);68MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);6970/* Start with initCStream, providing the size of buffer to write into.71* bitStream will never write outside of this buffer.72* `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code.73*74* bits are first added to a local register.75* Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems.76* Writing data into memory is an explicit operation, performed by the flushBits function.77* Hence keep track how many bits are potentially stored into local register to avoid register overflow.78* After a flushBits, a maximum of 7 bits might still be stored into local register.79*80* Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers.81*82* Last operation is to close the bitStream.83* The function returns the final size of CStream in bytes.84* If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable)85*/868788/*-********************************************89* bitStream decoding API (read backward)90**********************************************/91typedef struct {92size_t bitContainer;93unsigned bitsConsumed;94const char* ptr;95const char* start;96const char* limitPtr;97} BIT_DStream_t;9899typedef enum { BIT_DStream_unfinished = 0,100BIT_DStream_endOfBuffer = 1,101BIT_DStream_completed = 2,102BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */103/* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */104105MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);106MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);107MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);108MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);109110111/* Start by invoking BIT_initDStream().112* A chunk of the bitStream is then stored into a local register.113* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).114* You can then retrieve bitFields stored into the local register, **in reverse order**.115* Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.116* A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.117* Otherwise, it can be less than that, so proceed accordingly.118* Checking if DStream has reached its end can be performed with BIT_endOfDStream().119*/120121122/*-****************************************123* unsafe API124******************************************/125MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits);126/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */127128MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);129/* unsafe version; does not check buffer overflow */130131MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);132/* faster, but works only if nbBits >= 1 */133134135136/*-**************************************************************137* Internal functions138****************************************************************/139MEM_STATIC unsigned BIT_highbit32 (U32 val)140{141assert(val != 0);142{143# if defined(_MSC_VER) /* Visual */144# if STATIC_BMI2 == 1145return _lzcnt_u32(val) ^ 31;146# else147if (val != 0) {148unsigned long r;149_BitScanReverse(&r, val);150return (unsigned)r;151} else {152/* Should not reach this code path */153__assume(0);154}155# endif156# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */157return __builtin_clz (val) ^ 31;158# elif defined(__ICCARM__) /* IAR Intrinsic */159return 31 - __CLZ(val);160# else /* Software version */161static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29,16211, 14, 16, 18, 22, 25, 3, 30,1638, 12, 20, 28, 15, 17, 24, 7,16419, 27, 23, 6, 26, 5, 4, 31 };165U32 v = val;166v |= v >> 1;167v |= v >> 2;168v |= v >> 4;169v |= v >> 8;170v |= v >> 16;171return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];172# endif173}174}175176/*===== Local Constants =====*/177static const unsigned BIT_mask[] = {1780, 1, 3, 7, 0xF, 0x1F,1790x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF,1800xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF,1810x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF,1820xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF,1830x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */184#define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0]))185186/*-**************************************************************187* bitStream encoding188****************************************************************/189/*! BIT_initCStream() :190* `dstCapacity` must be > sizeof(size_t)191* @return : 0 if success,192* otherwise an error code (can be tested using ERR_isError()) */193MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,194void* startPtr, size_t dstCapacity)195{196bitC->bitContainer = 0;197bitC->bitPos = 0;198bitC->startPtr = (char*)startPtr;199bitC->ptr = bitC->startPtr;200bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer);201if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall);202return 0;203}204205/*! BIT_addBits() :206* can add up to 31 bits into `bitC`.207* Note : does not check for register overflow ! */208MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,209size_t value, unsigned nbBits)210{211DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);212assert(nbBits < BIT_MASK_SIZE);213assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);214bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;215bitC->bitPos += nbBits;216}217218/*! BIT_addBitsFast() :219* works only if `value` is _clean_,220* meaning all high bits above nbBits are 0 */221MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,222size_t value, unsigned nbBits)223{224assert((value>>nbBits) == 0);225assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);226bitC->bitContainer |= value << bitC->bitPos;227bitC->bitPos += nbBits;228}229230/*! BIT_flushBitsFast() :231* assumption : bitContainer has not overflowed232* unsafe version; does not check buffer overflow */233MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)234{235size_t const nbBytes = bitC->bitPos >> 3;236assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);237assert(bitC->ptr <= bitC->endPtr);238MEM_writeLEST(bitC->ptr, bitC->bitContainer);239bitC->ptr += nbBytes;240bitC->bitPos &= 7;241bitC->bitContainer >>= nbBytes*8;242}243244/*! BIT_flushBits() :245* assumption : bitContainer has not overflowed246* safe version; check for buffer overflow, and prevents it.247* note : does not signal buffer overflow.248* overflow will be revealed later on using BIT_closeCStream() */249MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)250{251size_t const nbBytes = bitC->bitPos >> 3;252assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);253assert(bitC->ptr <= bitC->endPtr);254MEM_writeLEST(bitC->ptr, bitC->bitContainer);255bitC->ptr += nbBytes;256if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;257bitC->bitPos &= 7;258bitC->bitContainer >>= nbBytes*8;259}260261/*! BIT_closeCStream() :262* @return : size of CStream, in bytes,263* or 0 if it could not fit into dstBuffer */264MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)265{266BIT_addBitsFast(bitC, 1, 1); /* endMark */267BIT_flushBits(bitC);268if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */269return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);270}271272273/*-********************************************************274* bitStream decoding275**********************************************************/276/*! BIT_initDStream() :277* Initialize a BIT_DStream_t.278* `bitD` : a pointer to an already allocated BIT_DStream_t structure.279* `srcSize` must be the *exact* size of the bitStream, in bytes.280* @return : size of stream (== srcSize), or an errorCode if a problem is detected281*/282MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)283{284if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }285286bitD->start = (const char*)srcBuffer;287bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);288289if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */290bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);291bitD->bitContainer = MEM_readLEST(bitD->ptr);292{ BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];293bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */294if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }295} else {296bitD->ptr = bitD->start;297bitD->bitContainer = *(const BYTE*)(bitD->start);298switch(srcSize)299{300case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);301ZSTD_FALLTHROUGH;302303case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);304ZSTD_FALLTHROUGH;305306case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);307ZSTD_FALLTHROUGH;308309case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;310ZSTD_FALLTHROUGH;311312case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;313ZSTD_FALLTHROUGH;314315case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8;316ZSTD_FALLTHROUGH;317318default: break;319}320{ BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];321bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;322if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */323}324bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;325}326327return srcSize;328}329330MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start)331{332return bitContainer >> start;333}334335MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)336{337U32 const regMask = sizeof(bitContainer)*8 - 1;338/* if start > regMask, bitstream is corrupted, and result is undefined */339assert(nbBits < BIT_MASK_SIZE);340/* x86 transform & ((1 << nbBits) - 1) to bzhi instruction, it is better341* than accessing memory. When bmi2 instruction is not present, we consider342* such cpus old (pre-Haswell, 2013) and their performance is not of that343* importance.344*/345#if defined(__x86_64__) || defined(_M_X86)346return (bitContainer >> (start & regMask)) & ((((U64)1) << nbBits) - 1);347#else348return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];349#endif350}351352MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)353{354#if defined(STATIC_BMI2) && STATIC_BMI2 == 1355return _bzhi_u64(bitContainer, nbBits);356#else357assert(nbBits < BIT_MASK_SIZE);358return bitContainer & BIT_mask[nbBits];359#endif360}361362/*! BIT_lookBits() :363* Provides next n bits from local register.364* local register is not modified.365* On 32-bits, maxNbBits==24.366* On 64-bits, maxNbBits==56.367* @return : value extracted */368MEM_STATIC FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)369{370/* arbitrate between double-shift and shift+mask */371#if 1372/* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8,373* bitstream is likely corrupted, and result is undefined */374return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);375#else376/* this code path is slower on my os-x laptop */377U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;378return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);379#endif380}381382/*! BIT_lookBitsFast() :383* unsafe version; only works if nbBits >= 1 */384MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)385{386U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;387assert(nbBits >= 1);388return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);389}390391MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)392{393bitD->bitsConsumed += nbBits;394}395396/*! BIT_readBits() :397* Read (consume) next n bits from local register and update.398* Pay attention to not read more than nbBits contained into local register.399* @return : extracted value. */400MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)401{402size_t const value = BIT_lookBits(bitD, nbBits);403BIT_skipBits(bitD, nbBits);404return value;405}406407/*! BIT_readBitsFast() :408* unsafe version; only works only if nbBits >= 1 */409MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)410{411size_t const value = BIT_lookBitsFast(bitD, nbBits);412assert(nbBits >= 1);413BIT_skipBits(bitD, nbBits);414return value;415}416417/*! BIT_reloadDStreamFast() :418* Similar to BIT_reloadDStream(), but with two differences:419* 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!420* 2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this421* point you must use BIT_reloadDStream() to reload.422*/423MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)424{425if (UNLIKELY(bitD->ptr < bitD->limitPtr))426return BIT_DStream_overflow;427assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);428bitD->ptr -= bitD->bitsConsumed >> 3;429bitD->bitsConsumed &= 7;430bitD->bitContainer = MEM_readLEST(bitD->ptr);431return BIT_DStream_unfinished;432}433434/*! BIT_reloadDStream() :435* Refill `bitD` from buffer previously set in BIT_initDStream() .436* This function is safe, it guarantees it will not read beyond src buffer.437* @return : status of `BIT_DStream_t` internal register.438* when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */439MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)440{441if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */442return BIT_DStream_overflow;443444if (bitD->ptr >= bitD->limitPtr) {445return BIT_reloadDStreamFast(bitD);446}447if (bitD->ptr == bitD->start) {448if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;449return BIT_DStream_completed;450}451/* start < ptr < limitPtr */452{ U32 nbBytes = bitD->bitsConsumed >> 3;453BIT_DStream_status result = BIT_DStream_unfinished;454if (bitD->ptr - nbBytes < bitD->start) {455nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */456result = BIT_DStream_endOfBuffer;457}458bitD->ptr -= nbBytes;459bitD->bitsConsumed -= nbBytes*8;460bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */461return result;462}463}464465/*! BIT_endOfDStream() :466* @return : 1 if DStream has _exactly_ reached its end (all bits consumed).467*/468MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)469{470return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));471}472473#if defined (__cplusplus)474}475#endif476477#endif /* BITSTREAM_H_MODULE */478479480