Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Kitware
GitHub Repository: Kitware/CMake
Path: blob/master/Utilities/cmzstd/lib/common/bitstream.h
4998 views
1
/* ******************************************************************
2
* bitstream
3
* Part of FSE library
4
* Copyright (c) Meta Platforms, Inc. and affiliates.
5
*
6
* You can contact the author at :
7
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
8
*
9
* This source code is licensed under both the BSD-style license (found in the
10
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
11
* in the COPYING file in the root directory of this source tree).
12
* You may select, at your option, one of the above-listed licenses.
13
****************************************************************** */
14
#ifndef BITSTREAM_H_MODULE
15
#define BITSTREAM_H_MODULE
16
17
#include <assert.h>
18
19
/*
20
* This API consists of small unitary functions, which must be inlined for best performance.
21
* Since link-time-optimization is not available for all compilers,
22
* these functions are defined into a .h to be included.
23
*/
24
25
/*-****************************************
26
* Dependencies
27
******************************************/
28
#include "mem.h" /* unaligned access routines */
29
#include "compiler.h" /* UNLIKELY() */
30
#include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */
31
#include "error_private.h" /* error codes and messages */
32
#include "bits.h" /* ZSTD_highbit32 */
33
34
/*=========================================
35
* Target specific
36
=========================================*/
37
#ifndef ZSTD_NO_INTRINSICS
38
# if (defined(__BMI__) || defined(__BMI2__)) && defined(__GNUC__)
39
# include <immintrin.h> /* support for bextr (experimental)/bzhi */
40
# elif defined(__ICCARM__)
41
# include <intrinsics.h>
42
# endif
43
#endif
44
45
#define STREAM_ACCUMULATOR_MIN_32 25
46
#define STREAM_ACCUMULATOR_MIN_64 57
47
#define STREAM_ACCUMULATOR_MIN ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64))
48
49
50
/*-******************************************
51
* bitStream encoding API (write forward)
52
********************************************/
53
typedef size_t BitContainerType;
54
/* bitStream can mix input from multiple sources.
55
* A critical property of these streams is that they encode and decode in **reverse** direction.
56
* So the first bit sequence you add will be the last to be read, like a LIFO stack.
57
*/
58
typedef struct {
59
BitContainerType bitContainer;
60
unsigned bitPos;
61
char* startPtr;
62
char* ptr;
63
char* endPtr;
64
} BIT_CStream_t;
65
66
MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity);
67
MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, BitContainerType value, unsigned nbBits);
68
MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC);
69
MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
70
71
/* Start with initCStream, providing the size of buffer to write into.
72
* bitStream will never write outside of this buffer.
73
* `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code.
74
*
75
* bits are first added to a local register.
76
* Local register is BitContainerType, 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
77
* Writing data into memory is an explicit operation, performed by the flushBits function.
78
* Hence keep track how many bits are potentially stored into local register to avoid register overflow.
79
* After a flushBits, a maximum of 7 bits might still be stored into local register.
80
*
81
* Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers.
82
*
83
* Last operation is to close the bitStream.
84
* The function returns the final size of CStream in bytes.
85
* If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable)
86
*/
87
88
89
/*-********************************************
90
* bitStream decoding API (read backward)
91
**********************************************/
92
typedef struct {
93
BitContainerType bitContainer;
94
unsigned bitsConsumed;
95
const char* ptr;
96
const char* start;
97
const char* limitPtr;
98
} BIT_DStream_t;
99
100
typedef enum { BIT_DStream_unfinished = 0, /* fully refilled */
101
BIT_DStream_endOfBuffer = 1, /* still some bits left in bitstream */
102
BIT_DStream_completed = 2, /* bitstream entirely consumed, bit-exact */
103
BIT_DStream_overflow = 3 /* user requested more bits than present in bitstream */
104
} BIT_DStream_status; /* result of BIT_reloadDStream() */
105
106
MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
107
MEM_STATIC BitContainerType BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
108
MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
109
MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
110
111
112
/* Start by invoking BIT_initDStream().
113
* A chunk of the bitStream is then stored into a local register.
114
* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (BitContainerType).
115
* You can then retrieve bitFields stored into the local register, **in reverse order**.
116
* Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
117
* A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
118
* Otherwise, it can be less than that, so proceed accordingly.
119
* Checking if DStream has reached its end can be performed with BIT_endOfDStream().
120
*/
121
122
123
/*-****************************************
124
* unsafe API
125
******************************************/
126
MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, BitContainerType value, unsigned nbBits);
127
/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */
128
129
MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
130
/* unsafe version; does not check buffer overflow */
131
132
MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
133
/* faster, but works only if nbBits >= 1 */
134
135
/*===== Local Constants =====*/
136
static const unsigned BIT_mask[] = {
137
0, 1, 3, 7, 0xF, 0x1F,
138
0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF,
139
0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF,
140
0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF,
141
0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF,
142
0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */
143
#define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0]))
144
145
/*-**************************************************************
146
* bitStream encoding
147
****************************************************************/
148
/*! BIT_initCStream() :
149
* `dstCapacity` must be > sizeof(size_t)
150
* @return : 0 if success,
151
* otherwise an error code (can be tested using ERR_isError()) */
152
MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
153
void* startPtr, size_t dstCapacity)
154
{
155
bitC->bitContainer = 0;
156
bitC->bitPos = 0;
157
bitC->startPtr = (char*)startPtr;
158
bitC->ptr = bitC->startPtr;
159
bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer);
160
if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall);
161
return 0;
162
}
163
164
FORCE_INLINE_TEMPLATE BitContainerType BIT_getLowerBits(BitContainerType bitContainer, U32 const nbBits)
165
{
166
#if STATIC_BMI2 && !defined(ZSTD_NO_INTRINSICS)
167
# if (defined(__x86_64__) || defined(_M_X64)) && !defined(__ILP32__)
168
return _bzhi_u64(bitContainer, nbBits);
169
# else
170
DEBUG_STATIC_ASSERT(sizeof(bitContainer) == sizeof(U32));
171
return _bzhi_u32(bitContainer, nbBits);
172
# endif
173
#else
174
assert(nbBits < BIT_MASK_SIZE);
175
return bitContainer & BIT_mask[nbBits];
176
#endif
177
}
178
179
/*! BIT_addBits() :
180
* can add up to 31 bits into `bitC`.
181
* Note : does not check for register overflow ! */
182
MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
183
BitContainerType value, unsigned nbBits)
184
{
185
DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
186
assert(nbBits < BIT_MASK_SIZE);
187
assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
188
bitC->bitContainer |= BIT_getLowerBits(value, nbBits) << bitC->bitPos;
189
bitC->bitPos += nbBits;
190
}
191
192
/*! BIT_addBitsFast() :
193
* works only if `value` is _clean_,
194
* meaning all high bits above nbBits are 0 */
195
MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
196
BitContainerType value, unsigned nbBits)
197
{
198
assert((value>>nbBits) == 0);
199
assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
200
bitC->bitContainer |= value << bitC->bitPos;
201
bitC->bitPos += nbBits;
202
}
203
204
/*! BIT_flushBitsFast() :
205
* assumption : bitContainer has not overflowed
206
* unsafe version; does not check buffer overflow */
207
MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
208
{
209
size_t const nbBytes = bitC->bitPos >> 3;
210
assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
211
assert(bitC->ptr <= bitC->endPtr);
212
MEM_writeLEST(bitC->ptr, bitC->bitContainer);
213
bitC->ptr += nbBytes;
214
bitC->bitPos &= 7;
215
bitC->bitContainer >>= nbBytes*8;
216
}
217
218
/*! BIT_flushBits() :
219
* assumption : bitContainer has not overflowed
220
* safe version; check for buffer overflow, and prevents it.
221
* note : does not signal buffer overflow.
222
* overflow will be revealed later on using BIT_closeCStream() */
223
MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
224
{
225
size_t const nbBytes = bitC->bitPos >> 3;
226
assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
227
assert(bitC->ptr <= bitC->endPtr);
228
MEM_writeLEST(bitC->ptr, bitC->bitContainer);
229
bitC->ptr += nbBytes;
230
if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
231
bitC->bitPos &= 7;
232
bitC->bitContainer >>= nbBytes*8;
233
}
234
235
/*! BIT_closeCStream() :
236
* @return : size of CStream, in bytes,
237
* or 0 if it could not fit into dstBuffer */
238
MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
239
{
240
BIT_addBitsFast(bitC, 1, 1); /* endMark */
241
BIT_flushBits(bitC);
242
if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
243
return (size_t)(bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
244
}
245
246
247
/*-********************************************************
248
* bitStream decoding
249
**********************************************************/
250
/*! BIT_initDStream() :
251
* Initialize a BIT_DStream_t.
252
* `bitD` : a pointer to an already allocated BIT_DStream_t structure.
253
* `srcSize` must be the *exact* size of the bitStream, in bytes.
254
* @return : size of stream (== srcSize), or an errorCode if a problem is detected
255
*/
256
MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
257
{
258
if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
259
260
bitD->start = (const char*)srcBuffer;
261
bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
262
263
if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */
264
bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
265
bitD->bitContainer = MEM_readLEST(bitD->ptr);
266
{ BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
267
bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */
268
if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
269
} else {
270
bitD->ptr = bitD->start;
271
bitD->bitContainer = *(const BYTE*)(bitD->start);
272
switch(srcSize)
273
{
274
case 7: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
275
ZSTD_FALLTHROUGH;
276
277
case 6: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
278
ZSTD_FALLTHROUGH;
279
280
case 5: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
281
ZSTD_FALLTHROUGH;
282
283
case 4: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[3]) << 24;
284
ZSTD_FALLTHROUGH;
285
286
case 3: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[2]) << 16;
287
ZSTD_FALLTHROUGH;
288
289
case 2: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[1]) << 8;
290
ZSTD_FALLTHROUGH;
291
292
default: break;
293
}
294
{ BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
295
bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0;
296
if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */
297
}
298
bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
299
}
300
301
return srcSize;
302
}
303
304
FORCE_INLINE_TEMPLATE BitContainerType BIT_getUpperBits(BitContainerType bitContainer, U32 const start)
305
{
306
return bitContainer >> start;
307
}
308
309
FORCE_INLINE_TEMPLATE BitContainerType BIT_getMiddleBits(BitContainerType bitContainer, U32 const start, U32 const nbBits)
310
{
311
U32 const regMask = sizeof(bitContainer)*8 - 1;
312
/* if start > regMask, bitstream is corrupted, and result is undefined */
313
assert(nbBits < BIT_MASK_SIZE);
314
/* x86 transform & ((1 << nbBits) - 1) to bzhi instruction, it is better
315
* than accessing memory. When bmi2 instruction is not present, we consider
316
* such cpus old (pre-Haswell, 2013) and their performance is not of that
317
* importance.
318
*/
319
#if defined(__x86_64__) || defined(_M_X64)
320
return (bitContainer >> (start & regMask)) & ((((U64)1) << nbBits) - 1);
321
#else
322
return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
323
#endif
324
}
325
326
/*! BIT_lookBits() :
327
* Provides next n bits from local register.
328
* local register is not modified.
329
* On 32-bits, maxNbBits==24.
330
* On 64-bits, maxNbBits==56.
331
* @return : value extracted */
332
FORCE_INLINE_TEMPLATE BitContainerType BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
333
{
334
/* arbitrate between double-shift and shift+mask */
335
#if 1
336
/* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8,
337
* bitstream is likely corrupted, and result is undefined */
338
return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
339
#else
340
/* this code path is slower on my os-x laptop */
341
U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
342
return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);
343
#endif
344
}
345
346
/*! BIT_lookBitsFast() :
347
* unsafe version; only works if nbBits >= 1 */
348
MEM_STATIC BitContainerType BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
349
{
350
U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
351
assert(nbBits >= 1);
352
return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
353
}
354
355
FORCE_INLINE_TEMPLATE void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
356
{
357
bitD->bitsConsumed += nbBits;
358
}
359
360
/*! BIT_readBits() :
361
* Read (consume) next n bits from local register and update.
362
* Pay attention to not read more than nbBits contained into local register.
363
* @return : extracted value. */
364
FORCE_INLINE_TEMPLATE BitContainerType BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
365
{
366
BitContainerType const value = BIT_lookBits(bitD, nbBits);
367
BIT_skipBits(bitD, nbBits);
368
return value;
369
}
370
371
/*! BIT_readBitsFast() :
372
* unsafe version; only works if nbBits >= 1 */
373
MEM_STATIC BitContainerType BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
374
{
375
BitContainerType const value = BIT_lookBitsFast(bitD, nbBits);
376
assert(nbBits >= 1);
377
BIT_skipBits(bitD, nbBits);
378
return value;
379
}
380
381
/*! BIT_reloadDStream_internal() :
382
* Simple variant of BIT_reloadDStream(), with two conditions:
383
* 1. bitstream is valid : bitsConsumed <= sizeof(bitD->bitContainer)*8
384
* 2. look window is valid after shifted down : bitD->ptr >= bitD->start
385
*/
386
MEM_STATIC BIT_DStream_status BIT_reloadDStream_internal(BIT_DStream_t* bitD)
387
{
388
assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
389
bitD->ptr -= bitD->bitsConsumed >> 3;
390
assert(bitD->ptr >= bitD->start);
391
bitD->bitsConsumed &= 7;
392
bitD->bitContainer = MEM_readLEST(bitD->ptr);
393
return BIT_DStream_unfinished;
394
}
395
396
/*! BIT_reloadDStreamFast() :
397
* Similar to BIT_reloadDStream(), but with two differences:
398
* 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!
399
* 2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this
400
* point you must use BIT_reloadDStream() to reload.
401
*/
402
MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
403
{
404
if (UNLIKELY(bitD->ptr < bitD->limitPtr))
405
return BIT_DStream_overflow;
406
return BIT_reloadDStream_internal(bitD);
407
}
408
409
/*! BIT_reloadDStream() :
410
* Refill `bitD` from buffer previously set in BIT_initDStream() .
411
* This function is safe, it guarantees it will not never beyond src buffer.
412
* @return : status of `BIT_DStream_t` internal register.
413
* when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
414
FORCE_INLINE_TEMPLATE BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
415
{
416
/* note : once in overflow mode, a bitstream remains in this mode until it's reset */
417
if (UNLIKELY(bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))) {
418
static const BitContainerType zeroFilled = 0;
419
bitD->ptr = (const char*)&zeroFilled; /* aliasing is allowed for char */
420
/* overflow detected, erroneous scenario or end of stream: no update */
421
return BIT_DStream_overflow;
422
}
423
424
assert(bitD->ptr >= bitD->start);
425
426
if (bitD->ptr >= bitD->limitPtr) {
427
return BIT_reloadDStream_internal(bitD);
428
}
429
if (bitD->ptr == bitD->start) {
430
/* reached end of bitStream => no update */
431
if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
432
return BIT_DStream_completed;
433
}
434
/* start < ptr < limitPtr => cautious update */
435
{ U32 nbBytes = bitD->bitsConsumed >> 3;
436
BIT_DStream_status result = BIT_DStream_unfinished;
437
if (bitD->ptr - nbBytes < bitD->start) {
438
nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */
439
result = BIT_DStream_endOfBuffer;
440
}
441
bitD->ptr -= nbBytes;
442
bitD->bitsConsumed -= nbBytes*8;
443
bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */
444
return result;
445
}
446
}
447
448
/*! BIT_endOfDStream() :
449
* @return : 1 if DStream has _exactly_ reached its end (all bits consumed).
450
*/
451
MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
452
{
453
return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
454
}
455
456
#endif /* BITSTREAM_H_MODULE */
457
458