Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-arrow/src/bitmap/mutable.rs
6939 views
1
#![allow(unsafe_op_in_unsafe_fn)]
2
use std::hint::unreachable_unchecked;
3
4
use polars_error::{PolarsResult, polars_bail};
5
use polars_utils::vec::PushUnchecked;
6
7
use super::bitmask::BitMask;
8
use super::utils::{BitChunk, BitChunks, BitChunksExactMut, BitmapIter, count_zeros, fmt};
9
use super::{Bitmap, intersects_with_mut};
10
use crate::bitmap::utils::{get_bit_unchecked, merge_reversed, set_bit_in_byte};
11
use crate::storage::SharedStorage;
12
use crate::trusted_len::TrustedLen;
13
14
/// A container of booleans. [`MutableBitmap`] is semantically equivalent
15
/// to [`Vec<bool>`].
16
///
17
/// The two main differences against [`Vec<bool>`] is that each element stored as a single bit,
18
/// thereby:
19
/// * it uses 8x less memory
20
/// * it cannot be represented as `&[bool]` (i.e. no pointer arithmetics).
21
///
22
/// A [`MutableBitmap`] can be converted to a [`Bitmap`] at `O(1)`.
23
/// # Examples
24
/// ```
25
/// use polars_arrow::bitmap::MutableBitmap;
26
///
27
/// let bitmap = MutableBitmap::from([true, false, true]);
28
/// assert_eq!(bitmap.iter().collect::<Vec<_>>(), vec![true, false, true]);
29
///
30
/// // creation directly from bytes
31
/// let mut bitmap = MutableBitmap::try_new(vec![0b00001101], 5).unwrap();
32
/// // note: the first bit is the left-most of the first byte
33
/// assert_eq!(bitmap.iter().collect::<Vec<_>>(), vec![true, false, true, true, false]);
34
/// // we can also get the slice:
35
/// assert_eq!(bitmap.as_slice(), [0b00001101u8].as_ref());
36
/// // debug helps :)
37
/// assert_eq!(format!("{:?}", bitmap), "Bitmap { len: 5, offset: 0, bytes: [0b___01101] }");
38
///
39
/// // It supports mutation in place
40
/// bitmap.set(0, false);
41
/// assert_eq!(format!("{:?}", bitmap), "Bitmap { len: 5, offset: 0, bytes: [0b___01100] }");
42
/// // and `O(1)` random access
43
/// assert_eq!(bitmap.get(0), false);
44
/// ```
45
/// # Implementation
46
/// This container is internally a [`Vec<u8>`].
47
#[derive(Clone)]
48
pub struct MutableBitmap {
49
buffer: Vec<u8>,
50
// invariant: length.saturating_add(7) / 8 == buffer.len();
51
length: usize,
52
}
53
54
impl std::fmt::Debug for MutableBitmap {
55
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
56
fmt(&self.buffer, 0, self.len(), f)
57
}
58
}
59
60
impl PartialEq for MutableBitmap {
61
fn eq(&self, other: &Self) -> bool {
62
self.iter().eq(other.iter())
63
}
64
}
65
66
impl MutableBitmap {
67
/// Initializes an empty [`MutableBitmap`].
68
#[inline]
69
pub fn new() -> Self {
70
Self {
71
buffer: Vec::new(),
72
length: 0,
73
}
74
}
75
76
/// Initializes a new [`MutableBitmap`] from a [`Vec<u8>`] and a length.
77
/// # Errors
78
/// This function errors iff `length > bytes.len() * 8`
79
#[inline]
80
pub fn try_new(mut bytes: Vec<u8>, length: usize) -> PolarsResult<Self> {
81
if length > bytes.len().saturating_mul(8) {
82
polars_bail!(InvalidOperation:
83
"The length of the bitmap ({}) must be `<=` to the number of bytes times 8 ({})",
84
length,
85
bytes.len().saturating_mul(8)
86
)
87
}
88
89
// Ensure invariant holds.
90
let min_byte_length_needed = length.div_ceil(8);
91
bytes.drain(min_byte_length_needed..);
92
Ok(Self {
93
length,
94
buffer: bytes,
95
})
96
}
97
98
/// Initializes a [`MutableBitmap`] from a [`Vec<u8>`] and a length.
99
/// This function is `O(1)`.
100
/// # Panic
101
/// Panics iff the length is larger than the length of the buffer times 8.
102
#[inline]
103
pub fn from_vec(buffer: Vec<u8>, length: usize) -> Self {
104
Self::try_new(buffer, length).unwrap()
105
}
106
107
/// Initializes a pre-allocated [`MutableBitmap`] with capacity for `capacity` bits.
108
#[inline]
109
pub fn with_capacity(capacity: usize) -> Self {
110
Self {
111
buffer: Vec::with_capacity(capacity.saturating_add(7) / 8),
112
length: 0,
113
}
114
}
115
116
/// Pushes a new bit to the [`MutableBitmap`], re-sizing it if necessary.
117
#[inline]
118
pub fn push(&mut self, value: bool) {
119
if self.length.is_multiple_of(8) {
120
self.buffer.push(0);
121
}
122
let byte = unsafe { self.buffer.last_mut().unwrap_unchecked() };
123
*byte = set_bit_in_byte(*byte, self.length % 8, value);
124
self.length += 1;
125
}
126
127
/// Pop the last bit from the [`MutableBitmap`].
128
/// Note if the [`MutableBitmap`] is empty, this method will return None.
129
#[inline]
130
pub fn pop(&mut self) -> Option<bool> {
131
if self.is_empty() {
132
return None;
133
}
134
135
self.length -= 1;
136
let value = unsafe { self.get_unchecked(self.length) };
137
if self.length.is_multiple_of(8) {
138
self.buffer.pop();
139
}
140
Some(value)
141
}
142
143
/// Returns whether the position `index` is set.
144
/// # Panics
145
/// Panics iff `index >= self.len()`.
146
#[inline]
147
pub fn get(&self, index: usize) -> bool {
148
assert!(index < self.len());
149
unsafe { self.get_unchecked(index) }
150
}
151
152
/// Returns whether the position `index` is set.
153
///
154
/// # Safety
155
/// The caller must ensure `index < self.len()`.
156
#[inline]
157
pub unsafe fn get_unchecked(&self, index: usize) -> bool {
158
get_bit_unchecked(&self.buffer, index)
159
}
160
161
/// Sets the position `index` to `value`
162
/// # Panics
163
/// Panics iff `index >= self.len()`.
164
#[inline]
165
pub fn set(&mut self, index: usize, value: bool) {
166
assert!(index < self.len());
167
unsafe {
168
self.set_unchecked(index, value);
169
}
170
}
171
172
/// Sets the position `index` to the OR of its original value and `value`.
173
///
174
/// # Safety
175
/// It's undefined behavior if index >= self.len().
176
#[inline]
177
pub unsafe fn or_pos_unchecked(&mut self, index: usize, value: bool) {
178
*self.buffer.get_unchecked_mut(index / 8) |= (value as u8) << (index % 8);
179
}
180
181
/// Sets the position `index` to the AND of its original value and `value`.
182
///
183
/// # Safety
184
/// It's undefined behavior if index >= self.len().
185
#[inline]
186
pub unsafe fn and_pos_unchecked(&mut self, index: usize, value: bool) {
187
*self.buffer.get_unchecked_mut(index / 8) &=
188
(0xFE | u8::from(value)).rotate_left(index as u32);
189
}
190
191
/// Sets the position `index` to the XOR of its original value and `value`.
192
///
193
/// # Safety
194
/// It's undefined behavior if index >= self.len().
195
#[inline]
196
pub unsafe fn xor_pos_unchecked(&mut self, index: usize, value: bool) {
197
*self.buffer.get_unchecked_mut(index / 8) ^= (value as u8) << (index % 8);
198
}
199
200
/// constructs a new iterator over the bits of [`MutableBitmap`].
201
pub fn iter(&self) -> BitmapIter<'_> {
202
BitmapIter::new(&self.buffer, 0, self.length)
203
}
204
205
/// Empties the [`MutableBitmap`].
206
#[inline]
207
pub fn clear(&mut self) {
208
self.length = 0;
209
self.buffer.clear();
210
}
211
212
/// Extends [`MutableBitmap`] by `additional` values of constant `value`.
213
/// # Implementation
214
/// This function is an order of magnitude faster than pushing element by element.
215
#[inline]
216
pub fn extend_constant(&mut self, additional: usize, value: bool) {
217
if additional == 0 {
218
return;
219
}
220
221
if value {
222
self.extend_set(additional)
223
} else {
224
self.extend_unset(additional)
225
}
226
}
227
228
/// Resizes the [`MutableBitmap`] to the specified length, inserting value
229
/// if the length is bigger than the current length.
230
pub fn resize(&mut self, length: usize, value: bool) {
231
if let Some(additional) = length.checked_sub(self.len()) {
232
self.extend_constant(additional, value);
233
} else {
234
self.buffer.truncate(length.saturating_add(7) / 8);
235
self.length = length;
236
}
237
}
238
239
/// Initializes a zeroed [`MutableBitmap`].
240
#[inline]
241
pub fn from_len_zeroed(length: usize) -> Self {
242
Self {
243
buffer: vec![0; length.saturating_add(7) / 8],
244
length,
245
}
246
}
247
248
/// Initializes a [`MutableBitmap`] with all values set to valid/ true.
249
#[inline]
250
pub fn from_len_set(length: usize) -> Self {
251
Self {
252
buffer: vec![u8::MAX; length.saturating_add(7) / 8],
253
length,
254
}
255
}
256
257
/// Reserves `additional` bits in the [`MutableBitmap`], potentially re-allocating its buffer.
258
#[inline(always)]
259
pub fn reserve(&mut self, additional: usize) {
260
self.buffer
261
.reserve((self.length + additional).saturating_add(7) / 8 - self.buffer.len())
262
}
263
264
/// Returns the capacity of [`MutableBitmap`] in number of bits.
265
#[inline]
266
pub fn capacity(&self) -> usize {
267
self.buffer.capacity() * 8
268
}
269
270
/// Pushes a new bit to the [`MutableBitmap`]
271
///
272
/// # Safety
273
/// The caller must ensure that the [`MutableBitmap`] has sufficient capacity.
274
#[inline]
275
pub unsafe fn push_unchecked(&mut self, value: bool) {
276
if self.length.is_multiple_of(8) {
277
self.buffer.push_unchecked(0);
278
}
279
let byte = self.buffer.last_mut().unwrap_unchecked();
280
*byte = set_bit_in_byte(*byte, self.length % 8, value);
281
self.length += 1;
282
}
283
284
/// Returns the number of unset bits on this [`MutableBitmap`].
285
///
286
/// Guaranteed to be `<= self.len()`.
287
/// # Implementation
288
/// This function is `O(N)`
289
pub fn unset_bits(&self) -> usize {
290
count_zeros(&self.buffer, 0, self.length)
291
}
292
293
/// Returns the number of set bits on this [`MutableBitmap`].
294
///
295
/// Guaranteed to be `<= self.len()`.
296
/// # Implementation
297
/// This function is `O(N)`
298
pub fn set_bits(&self) -> usize {
299
self.length - self.unset_bits()
300
}
301
302
/// Returns the length of the [`MutableBitmap`].
303
#[inline]
304
pub fn len(&self) -> usize {
305
self.length
306
}
307
308
/// Returns whether [`MutableBitmap`] is empty.
309
#[inline]
310
pub fn is_empty(&self) -> bool {
311
self.len() == 0
312
}
313
314
/// # Safety
315
/// The caller must ensure that the [`MutableBitmap`] was properly initialized up to `len`.
316
#[inline]
317
pub(crate) unsafe fn set_len(&mut self, len: usize) {
318
self.buffer.set_len(len.saturating_add(7) / 8);
319
self.length = len;
320
}
321
322
fn extend_set(&mut self, mut additional: usize) {
323
let offset = self.length % 8;
324
let added = if offset != 0 {
325
// offset != 0 => at least one byte in the buffer
326
let last_index = self.buffer.len() - 1;
327
let last = &mut self.buffer[last_index];
328
329
let remaining = 0b11111111u8;
330
let remaining = remaining >> 8usize.saturating_sub(additional);
331
let remaining = remaining << offset;
332
*last |= remaining;
333
std::cmp::min(additional, 8 - offset)
334
} else {
335
0
336
};
337
self.length += added;
338
additional = additional.saturating_sub(added);
339
if additional > 0 {
340
debug_assert_eq!(self.length % 8, 0);
341
let existing = self.length.saturating_add(7) / 8;
342
let required = (self.length + additional).saturating_add(7) / 8;
343
// add remaining as full bytes
344
self.buffer
345
.extend(std::iter::repeat_n(0b11111111u8, required - existing));
346
self.length += additional;
347
}
348
}
349
350
fn extend_unset(&mut self, mut additional: usize) {
351
let offset = self.length % 8;
352
let added = if offset != 0 {
353
// offset != 0 => at least one byte in the buffer
354
let last_index = self.buffer.len() - 1;
355
let last = &mut self.buffer[last_index];
356
*last &= 0b11111111u8 >> (8 - offset); // unset them
357
std::cmp::min(additional, 8 - offset)
358
} else {
359
0
360
};
361
self.length += added;
362
additional = additional.saturating_sub(added);
363
if additional > 0 {
364
debug_assert_eq!(self.length % 8, 0);
365
self.buffer
366
.resize((self.length + additional).saturating_add(7) / 8, 0);
367
self.length += additional;
368
}
369
}
370
371
/// Sets the position `index` to `value`
372
///
373
/// # Safety
374
/// Caller must ensure that `index < self.len()`
375
#[inline]
376
pub unsafe fn set_unchecked(&mut self, index: usize, value: bool) {
377
debug_assert!(index < self.len());
378
let byte = self.buffer.get_unchecked_mut(index / 8);
379
*byte = set_bit_in_byte(*byte, index % 8, value);
380
}
381
382
/// Shrinks the capacity of the [`MutableBitmap`] to fit its current length.
383
pub fn shrink_to_fit(&mut self) {
384
self.buffer.shrink_to_fit();
385
}
386
387
/// Returns an iterator over bits in bit chunks [`BitChunk`].
388
///
389
/// This iterator is useful to operate over multiple bits via e.g. bitwise.
390
pub fn chunks<T: BitChunk>(&self) -> BitChunks<'_, T> {
391
BitChunks::new(&self.buffer, 0, self.length)
392
}
393
394
/// Returns an iterator over mutable slices, [`BitChunksExactMut`]
395
pub(crate) fn bitchunks_exact_mut<T: BitChunk>(&mut self) -> BitChunksExactMut<'_, T> {
396
BitChunksExactMut::new(&mut self.buffer, self.length)
397
}
398
399
pub fn intersects_with(&self, other: &Self) -> bool {
400
intersects_with_mut(self, other)
401
}
402
403
pub fn freeze(self) -> Bitmap {
404
self.into()
405
}
406
}
407
408
impl From<MutableBitmap> for Bitmap {
409
#[inline]
410
fn from(buffer: MutableBitmap) -> Self {
411
Bitmap::try_new(buffer.buffer, buffer.length).unwrap()
412
}
413
}
414
415
impl From<MutableBitmap> for Option<Bitmap> {
416
#[inline]
417
fn from(buffer: MutableBitmap) -> Self {
418
let unset_bits = buffer.unset_bits();
419
if unset_bits > 0 {
420
// SAFETY: invariants of the `MutableBitmap` equal that of `Bitmap`.
421
let bitmap = unsafe {
422
Bitmap::from_inner_unchecked(
423
SharedStorage::from_vec(buffer.buffer),
424
0,
425
buffer.length,
426
Some(unset_bits),
427
)
428
};
429
Some(bitmap)
430
} else {
431
None
432
}
433
}
434
}
435
436
impl<P: AsRef<[bool]>> From<P> for MutableBitmap {
437
#[inline]
438
fn from(slice: P) -> Self {
439
MutableBitmap::from_trusted_len_iter(slice.as_ref().iter().copied())
440
}
441
}
442
443
impl Extend<bool> for MutableBitmap {
444
fn extend<T: IntoIterator<Item = bool>>(&mut self, iter: T) {
445
let mut iterator = iter.into_iter();
446
447
let mut buffer = std::mem::take(&mut self.buffer);
448
let mut length = std::mem::take(&mut self.length);
449
450
let byte_capacity: usize = iterator.size_hint().0.saturating_add(7) / 8;
451
buffer.reserve(byte_capacity);
452
453
loop {
454
let mut exhausted = false;
455
let mut byte_accum: u8 = 0;
456
let mut mask: u8 = 1;
457
458
//collect (up to) 8 bits into a byte
459
while mask != 0 {
460
if let Some(value) = iterator.next() {
461
length += 1;
462
byte_accum |= match value {
463
true => mask,
464
false => 0,
465
};
466
mask <<= 1;
467
} else {
468
exhausted = true;
469
break;
470
}
471
}
472
473
// break if the iterator was exhausted before it provided a bool for this byte
474
if exhausted && mask == 1 {
475
break;
476
}
477
478
//ensure we have capacity to write the byte
479
if buffer.len() == buffer.capacity() {
480
//no capacity for new byte, allocate 1 byte more (plus however many more the iterator advertises)
481
let additional_byte_capacity = 1usize.saturating_add(
482
iterator.size_hint().0.saturating_add(7) / 8, //convert bit count to byte count, rounding up
483
);
484
buffer.reserve(additional_byte_capacity)
485
}
486
487
// Soundness: capacity was allocated above
488
buffer.push(byte_accum);
489
if exhausted {
490
break;
491
}
492
}
493
494
self.buffer = buffer;
495
self.length = length;
496
}
497
}
498
499
impl FromIterator<bool> for MutableBitmap {
500
fn from_iter<I>(iter: I) -> Self
501
where
502
I: IntoIterator<Item = bool>,
503
{
504
let mut bm = Self::new();
505
bm.extend(iter);
506
bm
507
}
508
}
509
510
// [7, 6, 5, 4, 3, 2, 1, 0], [15, 14, 13, 12, 11, 10, 9, 8]
511
// [00000001_00000000_00000000_00000000_...] // u64
512
/// # Safety
513
/// The iterator must be trustedLen and its len must be least `len`.
514
#[inline]
515
unsafe fn get_chunk_unchecked(iterator: &mut impl Iterator<Item = bool>) -> u64 {
516
let mut byte = 0u64;
517
let mut mask;
518
for i in 0..8 {
519
mask = 1u64 << (8 * i);
520
for _ in 0..8 {
521
let value = match iterator.next() {
522
Some(value) => value,
523
None => unsafe { unreachable_unchecked() },
524
};
525
526
byte |= match value {
527
true => mask,
528
false => 0,
529
};
530
mask <<= 1;
531
}
532
}
533
byte
534
}
535
536
/// # Safety
537
/// The iterator must be trustedLen and its len must be least `len`.
538
#[inline]
539
unsafe fn get_byte_unchecked(len: usize, iterator: &mut impl Iterator<Item = bool>) -> u8 {
540
let mut byte_accum: u8 = 0;
541
let mut mask: u8 = 1;
542
for _ in 0..len {
543
let value = match iterator.next() {
544
Some(value) => value,
545
None => unsafe { unreachable_unchecked() },
546
};
547
548
byte_accum |= match value {
549
true => mask,
550
false => 0,
551
};
552
mask <<= 1;
553
}
554
byte_accum
555
}
556
557
/// Extends the [`Vec<u8>`] from `iterator`
558
/// # Safety
559
/// The iterator MUST be [`TrustedLen`].
560
#[inline]
561
unsafe fn extend_aligned_trusted_iter_unchecked(
562
buffer: &mut Vec<u8>,
563
mut iterator: impl Iterator<Item = bool>,
564
) -> usize {
565
let additional_bits = iterator.size_hint().1.unwrap();
566
let chunks = additional_bits / 64;
567
let remainder = additional_bits % 64;
568
569
let additional = additional_bits.div_ceil(8);
570
assert_eq!(
571
additional,
572
// a hint of how the following calculation will be done
573
chunks * 8 + remainder / 8 + !remainder.is_multiple_of(8) as usize
574
);
575
buffer.reserve(additional);
576
577
// chunks of 64 bits
578
for _ in 0..chunks {
579
let chunk = get_chunk_unchecked(&mut iterator);
580
buffer.extend_from_slice(&chunk.to_le_bytes());
581
}
582
583
// remaining complete bytes
584
for _ in 0..(remainder / 8) {
585
let byte = unsafe { get_byte_unchecked(8, &mut iterator) };
586
buffer.push(byte)
587
}
588
589
// remaining bits
590
let remainder = remainder % 8;
591
if remainder > 0 {
592
let byte = unsafe { get_byte_unchecked(remainder, &mut iterator) };
593
buffer.push(byte)
594
}
595
additional_bits
596
}
597
598
impl MutableBitmap {
599
/// Extends `self` from a [`TrustedLen`] iterator.
600
#[inline]
601
pub fn extend_from_trusted_len_iter<I: TrustedLen<Item = bool>>(&mut self, iterator: I) {
602
// SAFETY: I: TrustedLen
603
unsafe { self.extend_from_trusted_len_iter_unchecked(iterator) }
604
}
605
606
/// Extends `self` from an iterator of trusted len.
607
///
608
/// # Safety
609
/// The caller must guarantee that the iterator has a trusted len.
610
#[inline]
611
pub unsafe fn extend_from_trusted_len_iter_unchecked<I: Iterator<Item = bool>>(
612
&mut self,
613
mut iterator: I,
614
) {
615
// the length of the iterator throughout this function.
616
let mut length = iterator.size_hint().1.unwrap();
617
618
let bit_offset = self.length % 8;
619
620
if length < 8 - bit_offset {
621
if bit_offset == 0 {
622
self.buffer.push(0);
623
}
624
// the iterator will not fill the last byte
625
let byte = self.buffer.last_mut().unwrap();
626
let mut i = bit_offset;
627
for value in iterator {
628
*byte = set_bit_in_byte(*byte, i, value);
629
i += 1;
630
}
631
self.length += length;
632
return;
633
}
634
635
// at this point we know that length will hit a byte boundary and thus
636
// increase the buffer.
637
638
if bit_offset != 0 {
639
// we are in the middle of a byte; lets finish it
640
let byte = self.buffer.last_mut().unwrap();
641
(bit_offset..8).for_each(|i| {
642
*byte = set_bit_in_byte(*byte, i, iterator.next().unwrap());
643
});
644
self.length += 8 - bit_offset;
645
length -= 8 - bit_offset;
646
}
647
648
// everything is aligned; proceed with the bulk operation
649
debug_assert_eq!(self.length % 8, 0);
650
651
unsafe { extend_aligned_trusted_iter_unchecked(&mut self.buffer, iterator) };
652
self.length += length;
653
}
654
655
/// Creates a new [`MutableBitmap`] from an iterator of booleans.
656
///
657
/// # Safety
658
/// The iterator must report an accurate length.
659
#[inline]
660
pub unsafe fn from_trusted_len_iter_unchecked<I>(iterator: I) -> Self
661
where
662
I: Iterator<Item = bool>,
663
{
664
let mut buffer = Vec::<u8>::new();
665
666
let length = extend_aligned_trusted_iter_unchecked(&mut buffer, iterator);
667
668
Self { buffer, length }
669
}
670
671
/// Creates a new [`MutableBitmap`] from an iterator of booleans.
672
#[inline]
673
pub fn from_trusted_len_iter<I>(iterator: I) -> Self
674
where
675
I: TrustedLen<Item = bool>,
676
{
677
// SAFETY: Iterator is `TrustedLen`
678
unsafe { Self::from_trusted_len_iter_unchecked(iterator) }
679
}
680
681
/// Creates a new [`MutableBitmap`] from an iterator of booleans.
682
pub fn try_from_trusted_len_iter<E, I>(iterator: I) -> std::result::Result<Self, E>
683
where
684
I: TrustedLen<Item = std::result::Result<bool, E>>,
685
{
686
unsafe { Self::try_from_trusted_len_iter_unchecked(iterator) }
687
}
688
689
/// Creates a new [`MutableBitmap`] from an falible iterator of booleans.
690
///
691
/// # Safety
692
/// The caller must guarantee that the iterator is `TrustedLen`.
693
pub unsafe fn try_from_trusted_len_iter_unchecked<E, I>(
694
mut iterator: I,
695
) -> std::result::Result<Self, E>
696
where
697
I: Iterator<Item = std::result::Result<bool, E>>,
698
{
699
let length = iterator.size_hint().1.unwrap();
700
701
let mut buffer = vec![0u8; length.div_ceil(8)];
702
703
let chunks = length / 8;
704
let reminder = length % 8;
705
706
let data = buffer.as_mut_slice();
707
data[..chunks].iter_mut().try_for_each(|byte| {
708
(0..8).try_for_each(|i| {
709
*byte = set_bit_in_byte(*byte, i, iterator.next().unwrap()?);
710
Ok(())
711
})
712
})?;
713
714
if reminder != 0 {
715
let last = &mut data[chunks];
716
iterator.enumerate().try_for_each(|(i, value)| {
717
*last = set_bit_in_byte(*last, i, value?);
718
Ok(())
719
})?;
720
}
721
722
Ok(Self { buffer, length })
723
}
724
725
fn extend_unaligned(&mut self, slice: &[u8], offset: usize, length: usize) {
726
// e.g.
727
// [a, b, --101010] <- to be extended
728
// [00111111, 11010101] <- to extend
729
// [a, b, 11101010, --001111] expected result
730
731
let aligned_offset = offset / 8;
732
let own_offset = self.length % 8;
733
debug_assert_eq!(offset % 8, 0); // assumed invariant
734
debug_assert!(own_offset != 0); // assumed invariant
735
736
let bytes_len = length.saturating_add(7) / 8;
737
let items = &slice[aligned_offset..aligned_offset + bytes_len];
738
// self has some offset => we need to shift all `items`, and merge the first
739
let buffer = self.buffer.as_mut_slice();
740
let last = &mut buffer[buffer.len() - 1];
741
742
// --101010 | 00111111 << 6 = 11101010
743
// erase previous
744
*last &= 0b11111111u8 >> (8 - own_offset); // unset before setting
745
*last |= items[0] << own_offset;
746
747
if length + own_offset <= 8 {
748
// no new bytes needed
749
self.length += length;
750
return;
751
}
752
let additional = length - (8 - own_offset);
753
754
let remaining = [items[items.len() - 1], 0];
755
let bytes = items
756
.windows(2)
757
.chain(std::iter::once(remaining.as_ref()))
758
.map(|w| merge_reversed(w[0], w[1], 8 - own_offset))
759
.take(additional.saturating_add(7) / 8);
760
self.buffer.extend(bytes);
761
762
self.length += length;
763
}
764
765
fn extend_aligned(&mut self, slice: &[u8], offset: usize, length: usize) {
766
let aligned_offset = offset / 8;
767
let bytes_len = length.saturating_add(7) / 8;
768
let items = &slice[aligned_offset..aligned_offset + bytes_len];
769
self.buffer.extend_from_slice(items);
770
self.length += length;
771
}
772
773
/// Extends the [`MutableBitmap`] from a slice of bytes with optional offset.
774
/// This is the fastest way to extend a [`MutableBitmap`].
775
/// # Implementation
776
/// When both [`MutableBitmap`]'s length and `offset` are both multiples of 8,
777
/// this function performs a memcopy. Else, it first aligns bit by bit and then performs a memcopy.
778
///
779
/// # Safety
780
/// Caller must ensure `offset + length <= slice.len() * 8`
781
#[inline]
782
pub unsafe fn extend_from_slice_unchecked(
783
&mut self,
784
slice: &[u8],
785
offset: usize,
786
length: usize,
787
) {
788
if length == 0 {
789
return;
790
};
791
let is_aligned = self.length.is_multiple_of(8);
792
let other_is_aligned = offset.is_multiple_of(8);
793
match (is_aligned, other_is_aligned) {
794
(true, true) => self.extend_aligned(slice, offset, length),
795
(false, true) => self.extend_unaligned(slice, offset, length),
796
// todo: further optimize the other branches.
797
_ => self.extend_from_trusted_len_iter(BitmapIter::new(slice, offset, length)),
798
}
799
// internal invariant:
800
debug_assert_eq!(self.length.saturating_add(7) / 8, self.buffer.len());
801
}
802
803
/// Extends the [`MutableBitmap`] from a slice of bytes with optional offset.
804
/// This is the fastest way to extend a [`MutableBitmap`].
805
/// # Implementation
806
/// When both [`MutableBitmap`]'s length and `offset` are both multiples of 8,
807
/// this function performs a memcopy. Else, it first aligns bit by bit and then performs a memcopy.
808
#[inline]
809
pub fn extend_from_slice(&mut self, slice: &[u8], offset: usize, length: usize) {
810
assert!(offset + length <= slice.len() * 8);
811
// SAFETY: invariant is asserted
812
unsafe { self.extend_from_slice_unchecked(slice, offset, length) }
813
}
814
815
#[inline]
816
pub fn extend_from_bitmask(&mut self, bitmask: BitMask<'_>) {
817
let (slice, offset, length) = bitmask.inner();
818
self.extend_from_slice(slice, offset, length)
819
}
820
821
/// Extends the [`MutableBitmap`] from a [`Bitmap`].
822
#[inline]
823
pub fn extend_from_bitmap(&mut self, bitmap: &Bitmap) {
824
let (slice, offset, length) = bitmap.as_slice();
825
// SAFETY: bitmap.as_slice adheres to the invariant
826
unsafe {
827
self.extend_from_slice_unchecked(slice, offset, length);
828
}
829
}
830
831
/// Returns the slice of bytes of this [`MutableBitmap`].
832
/// Note that the last byte may not be fully used.
833
#[inline]
834
pub fn as_slice(&self) -> &[u8] {
835
let len = (self.length).saturating_add(7) / 8;
836
&self.buffer[..len]
837
}
838
839
/// Returns the slice of bytes of this [`MutableBitmap`].
840
/// Note that the last byte may not be fully used.
841
#[inline]
842
pub fn as_mut_slice(&mut self) -> &mut [u8] {
843
let len = (self.length).saturating_add(7) / 8;
844
&mut self.buffer[..len]
845
}
846
}
847
848
impl Default for MutableBitmap {
849
fn default() -> Self {
850
Self::new()
851
}
852
}
853
854
impl<'a> IntoIterator for &'a MutableBitmap {
855
type Item = bool;
856
type IntoIter = BitmapIter<'a>;
857
858
fn into_iter(self) -> Self::IntoIter {
859
BitmapIter::<'a>::new(&self.buffer, 0, self.length)
860
}
861
}
862
863