Path: blob/main/sys/contrib/openzfs/module/zstd/lib/common/bitstream.h
48774 views
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only1/* ******************************************************************2* bitstream3* Part of FSE library4* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.5*6* You can contact the author at :7* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy8*9* This source code is licensed under both the BSD-style license (found in the10* LICENSE file in the root directory of this source tree) and the GPLv2 (found11* in the COPYING file in the root directory of this source tree).12* You may select, at your option, one of the above-listed licenses.13****************************************************************** */14#ifndef BITSTREAM_H_MODULE15#define BITSTREAM_H_MODULE1617#if defined (__cplusplus)18extern "C" {19#endif2021/*22* This API consists of small unitary functions, which must be inlined for best performance.23* Since link-time-optimization is not available for all compilers,24* these functions are defined into a .h to be included.25*/2627/*-****************************************28* Dependencies29******************************************/30#include "mem.h" /* unaligned access routines */31#include "compiler.h" /* UNLIKELY() */32#include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */33#include "error_private.h" /* error codes and messages */343536/*=========================================37* Target specific38=========================================*/39#if defined(__BMI__) && defined(__GNUC__)40# include <immintrin.h> /* support for bextr (experimental) */41#elif defined(__ICCARM__)42# include <intrinsics.h>43#endif4445#define STREAM_ACCUMULATOR_MIN_32 2546#define STREAM_ACCUMULATOR_MIN_64 5747#define STREAM_ACCUMULATOR_MIN ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64))484950/*-******************************************51* bitStream encoding API (write forward)52********************************************/53/* bitStream can mix input from multiple sources.54* A critical property of these streams is that they encode and decode in **reverse** direction.55* So the first bit sequence you add will be the last to be read, like a LIFO stack.56*/57typedef struct {58size_t bitContainer;59unsigned bitPos;60char* startPtr;61char* ptr;62char* endPtr;63} BIT_CStream_t;6465MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity);66MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits);67MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC);68MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);6970/* Start with initCStream, providing the size of buffer to write into.71* bitStream will never write outside of this buffer.72* `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code.73*74* bits are first added to a local register.75* Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems.76* Writing data into memory is an explicit operation, performed by the flushBits function.77* Hence keep track how many bits are potentially stored into local register to avoid register overflow.78* After a flushBits, a maximum of 7 bits might still be stored into local register.79*80* Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers.81*82* Last operation is to close the bitStream.83* The function returns the final size of CStream in bytes.84* If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable)85*/868788/*-********************************************89* bitStream decoding API (read backward)90**********************************************/91typedef struct {92size_t bitContainer;93unsigned bitsConsumed;94const char* ptr;95const char* start;96const char* limitPtr;97} BIT_DStream_t;9899typedef enum { BIT_DStream_unfinished = 0,100BIT_DStream_endOfBuffer = 1,101BIT_DStream_completed = 2,102BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */103/* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */104105MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);106MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);107MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);108MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);109110111/* Start by invoking BIT_initDStream().112* A chunk of the bitStream is then stored into a local register.113* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).114* You can then retrieve bitFields stored into the local register, **in reverse order**.115* Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.116* A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.117* Otherwise, it can be less than that, so proceed accordingly.118* Checking if DStream has reached its end can be performed with BIT_endOfDStream().119*/120121122/*-****************************************123* unsafe API124******************************************/125MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits);126/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */127128MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);129/* unsafe version; does not check buffer overflow */130131MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);132/* faster, but works only if nbBits >= 1 */133134135136/*-**************************************************************137* Internal functions138****************************************************************/139MEM_STATIC unsigned BIT_highbit32 (U32 val)140{141assert(val != 0);142{143# if defined(_MSC_VER) /* Visual */144unsigned long r=0;145return _BitScanReverse ( &r, val ) ? (unsigned)r : 0;146# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */147return __builtin_clz (val) ^ 31;148# elif defined(__ICCARM__) /* IAR Intrinsic */149return 31 - __CLZ(val);150# else /* Software version */151static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29,15211, 14, 16, 18, 22, 25, 3, 30,1538, 12, 20, 28, 15, 17, 24, 7,15419, 27, 23, 6, 26, 5, 4, 31 };155U32 v = val;156v |= v >> 1;157v |= v >> 2;158v |= v >> 4;159v |= v >> 8;160v |= v >> 16;161return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];162# endif163}164}165166/*===== Local Constants =====*/167static const unsigned BIT_mask[] = {1680, 1, 3, 7, 0xF, 0x1F,1690x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF,1700xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF,1710x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF,1720xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF,1730x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */174#define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0]))175176/*-**************************************************************177* bitStream encoding178****************************************************************/179/*! BIT_initCStream() :180* `dstCapacity` must be > sizeof(size_t)181* @return : 0 if success,182* otherwise an error code (can be tested using ERR_isError()) */183MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,184void* startPtr, size_t dstCapacity)185{186bitC->bitContainer = 0;187bitC->bitPos = 0;188bitC->startPtr = (char*)startPtr;189bitC->ptr = bitC->startPtr;190bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer);191if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall);192return 0;193}194195/*! BIT_addBits() :196* can add up to 31 bits into `bitC`.197* Note : does not check for register overflow ! */198MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,199size_t value, unsigned nbBits)200{201MEM_STATIC_ASSERT(BIT_MASK_SIZE == 32);202assert(nbBits < BIT_MASK_SIZE);203assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);204bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;205bitC->bitPos += nbBits;206}207208/*! BIT_addBitsFast() :209* works only if `value` is _clean_,210* meaning all high bits above nbBits are 0 */211MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,212size_t value, unsigned nbBits)213{214assert((value>>nbBits) == 0);215assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);216bitC->bitContainer |= value << bitC->bitPos;217bitC->bitPos += nbBits;218}219220/*! BIT_flushBitsFast() :221* assumption : bitContainer has not overflowed222* unsafe version; does not check buffer overflow */223MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)224{225size_t const nbBytes = bitC->bitPos >> 3;226assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);227assert(bitC->ptr <= bitC->endPtr);228MEM_writeLEST(bitC->ptr, bitC->bitContainer);229bitC->ptr += nbBytes;230bitC->bitPos &= 7;231bitC->bitContainer >>= nbBytes*8;232}233234/*! BIT_flushBits() :235* assumption : bitContainer has not overflowed236* safe version; check for buffer overflow, and prevents it.237* note : does not signal buffer overflow.238* overflow will be revealed later on using BIT_closeCStream() */239MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)240{241size_t const nbBytes = bitC->bitPos >> 3;242assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);243assert(bitC->ptr <= bitC->endPtr);244MEM_writeLEST(bitC->ptr, bitC->bitContainer);245bitC->ptr += nbBytes;246if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;247bitC->bitPos &= 7;248bitC->bitContainer >>= nbBytes*8;249}250251/*! BIT_closeCStream() :252* @return : size of CStream, in bytes,253* or 0 if it could not fit into dstBuffer */254MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)255{256BIT_addBitsFast(bitC, 1, 1); /* endMark */257BIT_flushBits(bitC);258if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */259return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);260}261262263/*-********************************************************264* bitStream decoding265**********************************************************/266/*! BIT_initDStream() :267* Initialize a BIT_DStream_t.268* `bitD` : a pointer to an already allocated BIT_DStream_t structure.269* `srcSize` must be the *exact* size of the bitStream, in bytes.270* @return : size of stream (== srcSize), or an errorCode if a problem is detected271*/272MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)273{274if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }275276bitD->start = (const char*)srcBuffer;277bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);278279if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */280bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);281bitD->bitContainer = MEM_readLEST(bitD->ptr);282{ BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];283bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */284if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }285} else {286bitD->ptr = bitD->start;287bitD->bitContainer = *(const BYTE*)(bitD->start);288switch(srcSize)289{290case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);291/* fall-through */292293case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);294/* fall-through */295296case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);297/* fall-through */298299case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;300/* fall-through */301302case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;303/* fall-through */304305case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8;306/* fall-through */307308default: break;309}310{ BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];311bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;312if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */313}314bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;315}316317return srcSize;318}319320MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start)321{322return bitContainer >> start;323}324325MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)326{327U32 const regMask = sizeof(bitContainer)*8 - 1;328/* if start > regMask, bitstream is corrupted, and result is undefined */329assert(nbBits < BIT_MASK_SIZE);330return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];331}332333MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)334{335assert(nbBits < BIT_MASK_SIZE);336return bitContainer & BIT_mask[nbBits];337}338339/*! BIT_lookBits() :340* Provides next n bits from local register.341* local register is not modified.342* On 32-bits, maxNbBits==24.343* On 64-bits, maxNbBits==56.344* @return : value extracted */345MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)346{347/* arbitrate between double-shift and shift+mask */348#if 1349/* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8,350* bitstream is likely corrupted, and result is undefined */351return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);352#else353/* this code path is slower on my os-x laptop */354U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;355return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);356#endif357}358359/*! BIT_lookBitsFast() :360* unsafe version; only works if nbBits >= 1 */361MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)362{363U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;364assert(nbBits >= 1);365return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);366}367368MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)369{370bitD->bitsConsumed += nbBits;371}372373/*! BIT_readBits() :374* Read (consume) next n bits from local register and update.375* Pay attention to not read more than nbBits contained into local register.376* @return : extracted value. */377MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)378{379size_t const value = BIT_lookBits(bitD, nbBits);380BIT_skipBits(bitD, nbBits);381return value;382}383384/*! BIT_readBitsFast() :385* unsafe version; only works only if nbBits >= 1 */386MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)387{388size_t const value = BIT_lookBitsFast(bitD, nbBits);389assert(nbBits >= 1);390BIT_skipBits(bitD, nbBits);391return value;392}393394/*! BIT_reloadDStreamFast() :395* Similar to BIT_reloadDStream(), but with two differences:396* 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!397* 2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this398* point you must use BIT_reloadDStream() to reload.399*/400MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)401{402if (UNLIKELY(bitD->ptr < bitD->limitPtr))403return BIT_DStream_overflow;404assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);405bitD->ptr -= bitD->bitsConsumed >> 3;406bitD->bitsConsumed &= 7;407bitD->bitContainer = MEM_readLEST(bitD->ptr);408return BIT_DStream_unfinished;409}410411/*! BIT_reloadDStream() :412* Refill `bitD` from buffer previously set in BIT_initDStream() .413* This function is safe, it guarantees it will not read beyond src buffer.414* @return : status of `BIT_DStream_t` internal register.415* when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */416MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)417{418if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */419return BIT_DStream_overflow;420421if (bitD->ptr >= bitD->limitPtr) {422return BIT_reloadDStreamFast(bitD);423}424if (bitD->ptr == bitD->start) {425if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;426return BIT_DStream_completed;427}428/* start < ptr < limitPtr */429{ U32 nbBytes = bitD->bitsConsumed >> 3;430BIT_DStream_status result = BIT_DStream_unfinished;431if (bitD->ptr - nbBytes < bitD->start) {432nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */433result = BIT_DStream_endOfBuffer;434}435bitD->ptr -= nbBytes;436bitD->bitsConsumed -= nbBytes*8;437bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */438return result;439}440}441442/*! BIT_endOfDStream() :443* @return : 1 if DStream has _exactly_ reached its end (all bits consumed).444*/445MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)446{447return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));448}449450#if defined (__cplusplus)451}452#endif453454#endif /* BITSTREAM_H_MODULE */455456457