Path: blob/master/Utilities/cmzstd/lib/common/bitstream.h
3158 views
/* ******************************************************************1* bitstream2* Part of FSE library3* Copyright (c) Meta Platforms, Inc. and affiliates.4*5* You can contact the author at :6* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy7*8* This source code is licensed under both the BSD-style license (found in the9* LICENSE file in the root directory of this source tree) and the GPLv2 (found10* in the COPYING file in the root directory of this source tree).11* You may select, at your option, one of the above-listed licenses.12****************************************************************** */13#ifndef BITSTREAM_H_MODULE14#define BITSTREAM_H_MODULE1516#include <assert.h>1718#if defined (__cplusplus)19extern "C" {20#endif21/*22* This API consists of small unitary functions, which must be inlined for best performance.23* Since link-time-optimization is not available for all compilers,24* these functions are defined into a .h to be included.25*/2627/*-****************************************28* Dependencies29******************************************/30#include "mem.h" /* unaligned access routines */31#include "compiler.h" /* UNLIKELY() */32#include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */33#include "error_private.h" /* error codes and messages */34#include "bits.h" /* ZSTD_highbit32 */353637/*=========================================38* Target specific39=========================================*/40#ifndef ZSTD_NO_INTRINSICS41# if (defined(__BMI__) || defined(__BMI2__)) && defined(__GNUC__)42# include <immintrin.h> /* support for bextr (experimental)/bzhi */43# elif defined(__ICCARM__)44# include <intrinsics.h>45# endif46#endif4748#define STREAM_ACCUMULATOR_MIN_32 2549#define STREAM_ACCUMULATOR_MIN_64 5750#define STREAM_ACCUMULATOR_MIN ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64))515253/*-******************************************54* bitStream encoding API (write forward)55********************************************/56/* bitStream can mix input from multiple sources.57* A critical property of these streams is that they encode and decode in **reverse** direction.58* So the first bit sequence you add will be the last to be read, like a LIFO stack.59*/60typedef struct {61size_t bitContainer;62unsigned bitPos;63char* startPtr;64char* ptr;65char* endPtr;66} BIT_CStream_t;6768MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity);69MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits);70MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC);71MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);7273/* Start with initCStream, providing the size of buffer to write into.74* bitStream will never write outside of this buffer.75* `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code.76*77* bits are first added to a local register.78* Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems.79* Writing data into memory is an explicit operation, performed by the flushBits function.80* Hence keep track how many bits are potentially stored into local register to avoid register overflow.81* After a flushBits, a maximum of 7 bits might still be stored into local register.82*83* Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers.84*85* Last operation is to close the bitStream.86* The function returns the final size of CStream in bytes.87* If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable)88*/899091/*-********************************************92* bitStream decoding API (read backward)93**********************************************/94typedef struct {95size_t bitContainer;96unsigned bitsConsumed;97const char* ptr;98const char* start;99const char* limitPtr;100} BIT_DStream_t;101102typedef enum { BIT_DStream_unfinished = 0,103BIT_DStream_endOfBuffer = 1,104BIT_DStream_completed = 2,105BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */106/* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */107108MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);109MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);110MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);111MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);112113114/* Start by invoking BIT_initDStream().115* A chunk of the bitStream is then stored into a local register.116* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).117* You can then retrieve bitFields stored into the local register, **in reverse order**.118* Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.119* A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.120* Otherwise, it can be less than that, so proceed accordingly.121* Checking if DStream has reached its end can be performed with BIT_endOfDStream().122*/123124125/*-****************************************126* unsafe API127******************************************/128MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits);129/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */130131MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);132/* unsafe version; does not check buffer overflow */133134MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);135/* faster, but works only if nbBits >= 1 */136137/*===== Local Constants =====*/138static const unsigned BIT_mask[] = {1390, 1, 3, 7, 0xF, 0x1F,1400x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF,1410xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF,1420x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF,1430xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF,1440x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */145#define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0]))146147/*-**************************************************************148* bitStream encoding149****************************************************************/150/*! BIT_initCStream() :151* `dstCapacity` must be > sizeof(size_t)152* @return : 0 if success,153* otherwise an error code (can be tested using ERR_isError()) */154MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,155void* startPtr, size_t dstCapacity)156{157bitC->bitContainer = 0;158bitC->bitPos = 0;159bitC->startPtr = (char*)startPtr;160bitC->ptr = bitC->startPtr;161bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer);162if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall);163return 0;164}165166MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)167{168#if defined(STATIC_BMI2) && STATIC_BMI2 == 1 && !defined(ZSTD_NO_INTRINSICS)169return _bzhi_u64(bitContainer, nbBits);170#else171assert(nbBits < BIT_MASK_SIZE);172return bitContainer & BIT_mask[nbBits];173#endif174}175176/*! BIT_addBits() :177* can add up to 31 bits into `bitC`.178* Note : does not check for register overflow ! */179MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,180size_t value, unsigned nbBits)181{182DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);183assert(nbBits < BIT_MASK_SIZE);184assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);185bitC->bitContainer |= BIT_getLowerBits(value, nbBits) << bitC->bitPos;186bitC->bitPos += nbBits;187}188189/*! BIT_addBitsFast() :190* works only if `value` is _clean_,191* meaning all high bits above nbBits are 0 */192MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,193size_t value, unsigned nbBits)194{195assert((value>>nbBits) == 0);196assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);197bitC->bitContainer |= value << bitC->bitPos;198bitC->bitPos += nbBits;199}200201/*! BIT_flushBitsFast() :202* assumption : bitContainer has not overflowed203* unsafe version; does not check buffer overflow */204MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)205{206size_t const nbBytes = bitC->bitPos >> 3;207assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);208assert(bitC->ptr <= bitC->endPtr);209MEM_writeLEST(bitC->ptr, bitC->bitContainer);210bitC->ptr += nbBytes;211bitC->bitPos &= 7;212bitC->bitContainer >>= nbBytes*8;213}214215/*! BIT_flushBits() :216* assumption : bitContainer has not overflowed217* safe version; check for buffer overflow, and prevents it.218* note : does not signal buffer overflow.219* overflow will be revealed later on using BIT_closeCStream() */220MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)221{222size_t const nbBytes = bitC->bitPos >> 3;223assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);224assert(bitC->ptr <= bitC->endPtr);225MEM_writeLEST(bitC->ptr, bitC->bitContainer);226bitC->ptr += nbBytes;227if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;228bitC->bitPos &= 7;229bitC->bitContainer >>= nbBytes*8;230}231232/*! BIT_closeCStream() :233* @return : size of CStream, in bytes,234* or 0 if it could not fit into dstBuffer */235MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)236{237BIT_addBitsFast(bitC, 1, 1); /* endMark */238BIT_flushBits(bitC);239if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */240return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);241}242243244/*-********************************************************245* bitStream decoding246**********************************************************/247/*! BIT_initDStream() :248* Initialize a BIT_DStream_t.249* `bitD` : a pointer to an already allocated BIT_DStream_t structure.250* `srcSize` must be the *exact* size of the bitStream, in bytes.251* @return : size of stream (== srcSize), or an errorCode if a problem is detected252*/253MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)254{255if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }256257bitD->start = (const char*)srcBuffer;258bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);259260if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */261bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);262bitD->bitContainer = MEM_readLEST(bitD->ptr);263{ BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];264bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */265if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }266} else {267bitD->ptr = bitD->start;268bitD->bitContainer = *(const BYTE*)(bitD->start);269switch(srcSize)270{271case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);272ZSTD_FALLTHROUGH;273274case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);275ZSTD_FALLTHROUGH;276277case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);278ZSTD_FALLTHROUGH;279280case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;281ZSTD_FALLTHROUGH;282283case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;284ZSTD_FALLTHROUGH;285286case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8;287ZSTD_FALLTHROUGH;288289default: break;290}291{ BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];292bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0;293if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */294}295bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;296}297298return srcSize;299}300301MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start)302{303return bitContainer >> start;304}305306MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)307{308U32 const regMask = sizeof(bitContainer)*8 - 1;309/* if start > regMask, bitstream is corrupted, and result is undefined */310assert(nbBits < BIT_MASK_SIZE);311/* x86 transform & ((1 << nbBits) - 1) to bzhi instruction, it is better312* than accessing memory. When bmi2 instruction is not present, we consider313* such cpus old (pre-Haswell, 2013) and their performance is not of that314* importance.315*/316#if defined(__x86_64__) || defined(_M_X86)317return (bitContainer >> (start & regMask)) & ((((U64)1) << nbBits) - 1);318#else319return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];320#endif321}322323/*! BIT_lookBits() :324* Provides next n bits from local register.325* local register is not modified.326* On 32-bits, maxNbBits==24.327* On 64-bits, maxNbBits==56.328* @return : value extracted */329MEM_STATIC FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)330{331/* arbitrate between double-shift and shift+mask */332#if 1333/* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8,334* bitstream is likely corrupted, and result is undefined */335return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);336#else337/* this code path is slower on my os-x laptop */338U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;339return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);340#endif341}342343/*! BIT_lookBitsFast() :344* unsafe version; only works if nbBits >= 1 */345MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)346{347U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;348assert(nbBits >= 1);349return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);350}351352MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)353{354bitD->bitsConsumed += nbBits;355}356357/*! BIT_readBits() :358* Read (consume) next n bits from local register and update.359* Pay attention to not read more than nbBits contained into local register.360* @return : extracted value. */361MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)362{363size_t const value = BIT_lookBits(bitD, nbBits);364BIT_skipBits(bitD, nbBits);365return value;366}367368/*! BIT_readBitsFast() :369* unsafe version; only works if nbBits >= 1 */370MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)371{372size_t const value = BIT_lookBitsFast(bitD, nbBits);373assert(nbBits >= 1);374BIT_skipBits(bitD, nbBits);375return value;376}377378/*! BIT_reloadDStreamFast() :379* Similar to BIT_reloadDStream(), but with two differences:380* 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!381* 2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this382* point you must use BIT_reloadDStream() to reload.383*/384MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)385{386if (UNLIKELY(bitD->ptr < bitD->limitPtr))387return BIT_DStream_overflow;388assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);389bitD->ptr -= bitD->bitsConsumed >> 3;390bitD->bitsConsumed &= 7;391bitD->bitContainer = MEM_readLEST(bitD->ptr);392return BIT_DStream_unfinished;393}394395/*! BIT_reloadDStream() :396* Refill `bitD` from buffer previously set in BIT_initDStream() .397* This function is safe, it guarantees it will not read beyond src buffer.398* @return : status of `BIT_DStream_t` internal register.399* when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */400MEM_STATIC FORCE_INLINE_ATTR BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)401{402if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */403return BIT_DStream_overflow;404405if (bitD->ptr >= bitD->limitPtr) {406return BIT_reloadDStreamFast(bitD);407}408if (bitD->ptr == bitD->start) {409if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;410return BIT_DStream_completed;411}412/* start < ptr < limitPtr */413{ U32 nbBytes = bitD->bitsConsumed >> 3;414BIT_DStream_status result = BIT_DStream_unfinished;415if (bitD->ptr - nbBytes < bitD->start) {416nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */417result = BIT_DStream_endOfBuffer;418}419bitD->ptr -= nbBytes;420bitD->bitsConsumed -= nbBytes*8;421bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */422return result;423}424}425426/*! BIT_endOfDStream() :427* @return : 1 if DStream has _exactly_ reached its end (all bits consumed).428*/429MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)430{431return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));432}433434#if defined (__cplusplus)435}436#endif437438#endif /* BITSTREAM_H_MODULE */439440441