Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-arrow/src/bitmap/immutable.rs
6939 views
1
#![allow(unsafe_op_in_unsafe_fn)]
2
use std::ops::Deref;
3
use std::sync::LazyLock;
4
5
use either::Either;
6
use polars_error::{PolarsResult, polars_bail};
7
use polars_utils::relaxed_cell::RelaxedCell;
8
9
use super::utils::{self, BitChunk, BitChunks, BitmapIter, count_zeros, fmt, get_bit_unchecked};
10
use super::{IntoIter, MutableBitmap, chunk_iter_to_vec, num_intersections_with};
11
use crate::array::Splitable;
12
use crate::bitmap::aligned::AlignedBitmapSlice;
13
use crate::bitmap::iterator::{
14
FastU32BitmapIter, FastU56BitmapIter, FastU64BitmapIter, TrueIdxIter,
15
};
16
use crate::bitmap::utils::bytes_for;
17
use crate::legacy::utils::FromTrustedLenIterator;
18
use crate::storage::SharedStorage;
19
use crate::trusted_len::TrustedLen;
20
21
const UNKNOWN_BIT_COUNT: u64 = u64::MAX;
22
23
/// An immutable container semantically equivalent to `Arc<Vec<bool>>` but represented as `Arc<Vec<u8>>` where
24
/// each boolean is represented as a single bit.
25
///
26
/// # Examples
27
/// ```
28
/// use polars_arrow::bitmap::{Bitmap, MutableBitmap};
29
///
30
/// let bitmap = Bitmap::from([true, false, true]);
31
/// assert_eq!(bitmap.iter().collect::<Vec<_>>(), vec![true, false, true]);
32
///
33
/// // creation directly from bytes
34
/// let bitmap = Bitmap::try_new(vec![0b00001101], 5).unwrap();
35
/// // note: the first bit is the left-most of the first byte
36
/// assert_eq!(bitmap.iter().collect::<Vec<_>>(), vec![true, false, true, true, false]);
37
/// // we can also get the slice:
38
/// assert_eq!(bitmap.as_slice(), ([0b00001101u8].as_ref(), 0, 5));
39
/// // debug helps :)
40
/// assert_eq!(format!("{:?}", bitmap), "Bitmap { len: 5, offset: 0, bytes: [0b___01101] }");
41
///
42
/// // it supports copy-on-write semantics (to a `MutableBitmap`)
43
/// let bitmap: MutableBitmap = bitmap.into_mut().right().unwrap();
44
/// assert_eq!(bitmap, MutableBitmap::from([true, false, true, true, false]));
45
///
46
/// // slicing is 'O(1)' (data is shared)
47
/// let bitmap = Bitmap::try_new(vec![0b00001101], 5).unwrap();
48
/// let mut sliced = bitmap.clone();
49
/// sliced.slice(1, 4);
50
/// assert_eq!(sliced.as_slice(), ([0b00001101u8].as_ref(), 1, 4)); // 1 here is the offset:
51
/// assert_eq!(format!("{:?}", sliced), "Bitmap { len: 4, offset: 1, bytes: [0b___0110_] }");
52
/// // when sliced (or cloned), it is no longer possible to `into_mut`.
53
/// let same: Bitmap = sliced.into_mut().left().unwrap();
54
/// ```
55
#[derive(Default, Clone)]
56
pub struct Bitmap {
57
storage: SharedStorage<u8>,
58
// Both offset and length are measured in bits. They are used to bound the
59
// bitmap to a region of Bytes.
60
offset: usize,
61
length: usize,
62
63
// A bit field that contains our cache for the number of unset bits.
64
// If it is u64::MAX, we have no known value at all.
65
// Other bit patterns where the top bit is set is reserved for future use.
66
// If the top bit is not set we have an exact count.
67
unset_bit_count_cache: RelaxedCell<u64>,
68
}
69
70
#[inline(always)]
71
fn has_cached_unset_bit_count(ubcc: u64) -> bool {
72
ubcc >> 63 == 0
73
}
74
75
impl std::fmt::Debug for Bitmap {
76
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
77
let (bytes, offset, len) = self.as_slice();
78
fmt(bytes, offset, len, f)
79
}
80
}
81
82
pub(super) fn check(bytes: &[u8], offset: usize, length: usize) -> PolarsResult<()> {
83
if offset + length > bytes.len().saturating_mul(8) {
84
polars_bail!(InvalidOperation:
85
"The offset + length of the bitmap ({}) must be `<=` to the number of bytes times 8 ({})",
86
offset + length,
87
bytes.len().saturating_mul(8)
88
);
89
}
90
Ok(())
91
}
92
93
impl Bitmap {
94
/// Initializes an empty [`Bitmap`].
95
#[inline]
96
pub fn new() -> Self {
97
Self::default()
98
}
99
100
/// Initializes a new [`Bitmap`] from vector of bytes and a length.
101
/// # Errors
102
/// This function errors iff `length > bytes.len() * 8`
103
#[inline]
104
pub fn try_new(bytes: Vec<u8>, length: usize) -> PolarsResult<Self> {
105
check(&bytes, 0, length)?;
106
Ok(Self {
107
storage: SharedStorage::from_vec(bytes),
108
length,
109
offset: 0,
110
unset_bit_count_cache: RelaxedCell::from(if length == 0 {
111
0
112
} else {
113
UNKNOWN_BIT_COUNT
114
}),
115
})
116
}
117
118
/// Returns the length of the [`Bitmap`].
119
#[inline]
120
pub fn len(&self) -> usize {
121
self.length
122
}
123
124
/// Returns whether [`Bitmap`] is empty
125
#[inline]
126
pub fn is_empty(&self) -> bool {
127
self.len() == 0
128
}
129
130
/// Returns a new iterator of `bool` over this bitmap
131
pub fn iter(&self) -> BitmapIter<'_> {
132
BitmapIter::new(&self.storage, self.offset, self.length)
133
}
134
135
/// Returns an iterator over bits in bit chunks [`BitChunk`].
136
///
137
/// This iterator is useful to operate over multiple bits via e.g. bitwise.
138
pub fn chunks<T: BitChunk>(&self) -> BitChunks<'_, T> {
139
BitChunks::new(&self.storage, self.offset, self.length)
140
}
141
142
/// Returns a fast iterator that gives 32 bits at a time.
143
/// Has a remainder that must be handled separately.
144
pub fn fast_iter_u32(&self) -> FastU32BitmapIter<'_> {
145
FastU32BitmapIter::new(&self.storage, self.offset, self.length)
146
}
147
148
/// Returns a fast iterator that gives 56 bits at a time.
149
/// Has a remainder that must be handled separately.
150
pub fn fast_iter_u56(&self) -> FastU56BitmapIter<'_> {
151
FastU56BitmapIter::new(&self.storage, self.offset, self.length)
152
}
153
154
/// Returns a fast iterator that gives 64 bits at a time.
155
/// Has a remainder that must be handled separately.
156
pub fn fast_iter_u64(&self) -> FastU64BitmapIter<'_> {
157
FastU64BitmapIter::new(&self.storage, self.offset, self.length)
158
}
159
160
/// Returns an iterator that only iterates over the set bits.
161
pub fn true_idx_iter(&self) -> TrueIdxIter<'_> {
162
TrueIdxIter::new(self.len(), Some(self))
163
}
164
165
/// Returns the bits of this [`Bitmap`] as a [`AlignedBitmapSlice`].
166
pub fn aligned<T: BitChunk>(&self) -> AlignedBitmapSlice<'_, T> {
167
AlignedBitmapSlice::new(&self.storage, self.offset, self.length)
168
}
169
170
/// Returns the byte slice of this [`Bitmap`].
171
///
172
/// The returned tuple contains:
173
/// * `.1`: The byte slice, truncated to the start of the first bit. So the start of the slice
174
/// is within the first 8 bits.
175
/// * `.2`: The start offset in bits on a range `0 <= offsets < 8`.
176
/// * `.3`: The length in number of bits.
177
#[inline]
178
pub fn as_slice(&self) -> (&[u8], usize, usize) {
179
let start = self.offset / 8;
180
let len = (self.offset % 8 + self.length).saturating_add(7) / 8;
181
(
182
&self.storage[start..start + len],
183
self.offset % 8,
184
self.length,
185
)
186
}
187
188
/// Returns the number of set bits on this [`Bitmap`].
189
///
190
/// See `unset_bits` for details.
191
#[inline]
192
pub fn set_bits(&self) -> usize {
193
self.length - self.unset_bits()
194
}
195
196
/// Returns the number of set bits on this [`Bitmap`] if it is known.
197
///
198
/// See `lazy_unset_bits` for details.
199
#[inline]
200
pub fn lazy_set_bits(&self) -> Option<usize> {
201
Some(self.length - self.lazy_unset_bits()?)
202
}
203
204
/// Returns the number of unset bits on this [`Bitmap`].
205
///
206
/// Guaranteed to be `<= self.len()`.
207
///
208
/// # Implementation
209
///
210
/// This function counts the number of unset bits if it is not already
211
/// computed. Repeated calls use the cached bitcount.
212
pub fn unset_bits(&self) -> usize {
213
self.lazy_unset_bits().unwrap_or_else(|| {
214
let zeros = count_zeros(&self.storage, self.offset, self.length);
215
self.unset_bit_count_cache.store(zeros as u64);
216
zeros
217
})
218
}
219
220
/// Returns the number of unset bits on this [`Bitmap`] if it is known.
221
///
222
/// Guaranteed to be `<= self.len()`.
223
pub fn lazy_unset_bits(&self) -> Option<usize> {
224
let cache = self.unset_bit_count_cache.load();
225
has_cached_unset_bit_count(cache).then_some(cache as usize)
226
}
227
228
/// Updates the count of the number of set bits on this [`Bitmap`].
229
///
230
/// # Safety
231
///
232
/// The number of set bits must be correct.
233
pub unsafe fn update_bit_count(&mut self, bits_set: usize) {
234
assert!(bits_set <= self.length);
235
let zeros = self.length - bits_set;
236
self.unset_bit_count_cache.store(zeros as u64);
237
}
238
239
/// Slices `self`, offsetting by `offset` and truncating up to `length` bits.
240
/// # Panic
241
/// Panics iff `offset + length > self.length`, i.e. if the offset and `length`
242
/// exceeds the allocated capacity of `self`.
243
#[inline]
244
pub fn slice(&mut self, offset: usize, length: usize) {
245
assert!(offset + length <= self.length);
246
unsafe { self.slice_unchecked(offset, length) }
247
}
248
249
/// Slices `self`, offsetting by `offset` and truncating up to `length` bits.
250
///
251
/// # Safety
252
/// The caller must ensure that `self.offset + offset + length <= self.len()`
253
#[inline]
254
pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
255
// Fast path: no-op slice.
256
if offset == 0 && length == self.length {
257
return;
258
}
259
260
// Fast path: we have no nulls or are full-null.
261
let unset_bit_count_cache = self.unset_bit_count_cache.get_mut();
262
if *unset_bit_count_cache == 0 || *unset_bit_count_cache == self.length as u64 {
263
let new_count = if *unset_bit_count_cache > 0 {
264
length as u64
265
} else {
266
0
267
};
268
*unset_bit_count_cache = new_count;
269
self.offset += offset;
270
self.length = length;
271
return;
272
}
273
274
if has_cached_unset_bit_count(*unset_bit_count_cache) {
275
// If we keep all but a small portion of the array it is worth
276
// doing an eager re-count since we can reuse the old count via the
277
// inclusion-exclusion principle.
278
let small_portion = (self.length / 5).max(32);
279
if length + small_portion >= self.length {
280
// Subtract the null count of the chunks we slice off.
281
let slice_end = self.offset + offset + length;
282
let head_count = count_zeros(&self.storage, self.offset, offset);
283
let tail_count =
284
count_zeros(&self.storage, slice_end, self.length - length - offset);
285
let new_count = *unset_bit_count_cache - head_count as u64 - tail_count as u64;
286
*unset_bit_count_cache = new_count;
287
} else {
288
*unset_bit_count_cache = UNKNOWN_BIT_COUNT;
289
}
290
}
291
292
self.offset += offset;
293
self.length = length;
294
}
295
296
/// Slices `self`, offsetting by `offset` and truncating up to `length` bits.
297
/// # Panic
298
/// Panics iff `offset + length > self.length`, i.e. if the offset and `length`
299
/// exceeds the allocated capacity of `self`.
300
#[inline]
301
#[must_use]
302
pub fn sliced(self, offset: usize, length: usize) -> Self {
303
assert!(offset + length <= self.length);
304
unsafe { self.sliced_unchecked(offset, length) }
305
}
306
307
/// Slices `self`, offsetting by `offset` and truncating up to `length` bits.
308
///
309
/// # Safety
310
/// The caller must ensure that `self.offset + offset + length <= self.len()`
311
#[inline]
312
#[must_use]
313
pub unsafe fn sliced_unchecked(mut self, offset: usize, length: usize) -> Self {
314
self.slice_unchecked(offset, length);
315
self
316
}
317
318
/// Returns whether the bit at position `i` is set.
319
/// # Panics
320
/// Panics iff `i >= self.len()`.
321
#[inline]
322
pub fn get_bit(&self, i: usize) -> bool {
323
assert!(i < self.len());
324
unsafe { self.get_bit_unchecked(i) }
325
}
326
327
/// Unsafely returns whether the bit at position `i` is set.
328
///
329
/// # Safety
330
/// Unsound iff `i >= self.len()`.
331
#[inline]
332
pub unsafe fn get_bit_unchecked(&self, i: usize) -> bool {
333
debug_assert!(i < self.len());
334
get_bit_unchecked(&self.storage, self.offset + i)
335
}
336
337
/// Returns a pointer to the start of this [`Bitmap`] (ignores `offsets`)
338
/// This pointer is allocated iff `self.len() > 0`.
339
pub(crate) fn as_ptr(&self) -> *const u8 {
340
self.storage.deref().as_ptr()
341
}
342
343
/// Returns a pointer to the start of this [`Bitmap`] (ignores `offsets`)
344
/// This pointer is allocated iff `self.len() > 0`.
345
pub(crate) fn offset(&self) -> usize {
346
self.offset
347
}
348
349
/// Converts this [`Bitmap`] to [`MutableBitmap`], returning itself if the conversion
350
/// is not possible
351
///
352
/// This operation returns a [`MutableBitmap`] iff:
353
/// * this [`Bitmap`] is not an offsetted slice of another [`Bitmap`]
354
/// * this [`Bitmap`] has not been cloned (i.e. [`Arc`]`::get_mut` yields [`Some`])
355
/// * this [`Bitmap`] was not imported from the c data interface (FFI)
356
pub fn into_mut(mut self) -> Either<Self, MutableBitmap> {
357
match self.storage.try_into_vec() {
358
Ok(v) => Either::Right(MutableBitmap::from_vec(v, self.length)),
359
Err(storage) => {
360
self.storage = storage;
361
Either::Left(self)
362
},
363
}
364
}
365
366
/// Converts this [`Bitmap`] into a [`MutableBitmap`], cloning its internal
367
/// buffer if required (clone-on-write).
368
pub fn make_mut(self) -> MutableBitmap {
369
match self.into_mut() {
370
Either::Left(data) => {
371
if data.offset > 0 {
372
// re-align the bits (remove the offset)
373
let chunks = data.chunks::<u64>();
374
let remainder = chunks.remainder();
375
let vec = chunk_iter_to_vec(chunks.chain(std::iter::once(remainder)));
376
MutableBitmap::from_vec(vec, data.length)
377
} else {
378
let len = bytes_for(data.length);
379
MutableBitmap::from_vec(data.storage[0..len].to_vec(), data.length)
380
}
381
},
382
Either::Right(data) => data,
383
}
384
}
385
386
/// Initializes an new [`Bitmap`] filled with unset values.
387
#[inline]
388
pub fn new_zeroed(length: usize) -> Self {
389
// We intentionally leak 1MiB of zeroed memory once so we don't have to
390
// refcount it.
391
const GLOBAL_ZERO_SIZE: usize = 1024 * 1024;
392
static GLOBAL_ZEROES: LazyLock<SharedStorage<u8>> = LazyLock::new(|| {
393
let mut ss = SharedStorage::from_vec(vec![0; GLOBAL_ZERO_SIZE]);
394
ss.leak();
395
ss
396
});
397
398
let bytes_needed = length.div_ceil(8);
399
let storage = if bytes_needed <= GLOBAL_ZERO_SIZE {
400
GLOBAL_ZEROES.clone()
401
} else {
402
SharedStorage::from_vec(vec![0; bytes_needed])
403
};
404
Self {
405
storage,
406
offset: 0,
407
length,
408
unset_bit_count_cache: RelaxedCell::from(length as u64),
409
}
410
}
411
412
/// Initializes an new [`Bitmap`] filled with the given value.
413
#[inline]
414
pub fn new_with_value(value: bool, length: usize) -> Self {
415
if !value {
416
return Self::new_zeroed(length);
417
}
418
419
unsafe {
420
Bitmap::from_inner_unchecked(
421
SharedStorage::from_vec(vec![u8::MAX; length.saturating_add(7) / 8]),
422
0,
423
length,
424
Some(0),
425
)
426
}
427
}
428
429
/// Counts the nulls (unset bits) starting from `offset` bits and for `length` bits.
430
#[inline]
431
pub fn null_count_range(&self, offset: usize, length: usize) -> usize {
432
count_zeros(&self.storage, self.offset + offset, length)
433
}
434
435
/// Creates a new [`Bitmap`] from a slice and length.
436
/// # Panic
437
/// Panics iff `length > bytes.len() * 8`
438
#[inline]
439
pub fn from_u8_slice<T: AsRef<[u8]>>(slice: T, length: usize) -> Self {
440
Bitmap::try_new(slice.as_ref().to_vec(), length).unwrap()
441
}
442
443
/// Alias for `Bitmap::try_new().unwrap()`
444
/// This function is `O(1)`
445
/// # Panic
446
/// This function panics iff `length > bytes.len() * 8`
447
#[inline]
448
pub fn from_u8_vec(vec: Vec<u8>, length: usize) -> Self {
449
Bitmap::try_new(vec, length).unwrap()
450
}
451
452
/// Returns whether the bit at position `i` is set.
453
#[inline]
454
pub fn get(&self, i: usize) -> Option<bool> {
455
if i < self.len() {
456
Some(unsafe { self.get_bit_unchecked(i) })
457
} else {
458
None
459
}
460
}
461
462
/// Creates a [`Bitmap`] from its internal representation.
463
/// This is the inverted from [`Bitmap::into_inner`]
464
///
465
/// # Safety
466
/// Callers must ensure all invariants of this struct are upheld.
467
pub unsafe fn from_inner_unchecked(
468
storage: SharedStorage<u8>,
469
offset: usize,
470
length: usize,
471
unset_bits: Option<usize>,
472
) -> Self {
473
debug_assert!(check(&storage[..], offset, length).is_ok());
474
475
let unset_bit_count_cache = if let Some(n) = unset_bits {
476
RelaxedCell::from(n as u64)
477
} else {
478
RelaxedCell::from(UNKNOWN_BIT_COUNT)
479
};
480
Self {
481
storage,
482
offset,
483
length,
484
unset_bit_count_cache,
485
}
486
}
487
488
/// Checks whether two [`Bitmap`]s have shared set bits.
489
///
490
/// This is an optimized version of `(self & other) != 0000..`.
491
pub fn intersects_with(&self, other: &Self) -> bool {
492
self.num_intersections_with(other) != 0
493
}
494
495
/// Calculates the number of shared set bits between two [`Bitmap`]s.
496
pub fn num_intersections_with(&self, other: &Self) -> usize {
497
num_intersections_with(self, other)
498
}
499
500
/// Select between `truthy` and `falsy` based on `self`.
501
///
502
/// This essentially performs:
503
///
504
/// `out[i] = if self[i] { truthy[i] } else { falsy[i] }`
505
pub fn select(&self, truthy: &Self, falsy: &Self) -> Self {
506
super::bitmap_ops::select(self, truthy, falsy)
507
}
508
509
/// Select between `truthy` and constant `falsy` based on `self`.
510
///
511
/// This essentially performs:
512
///
513
/// `out[i] = if self[i] { truthy[i] } else { falsy }`
514
pub fn select_constant(&self, truthy: &Self, falsy: bool) -> Self {
515
super::bitmap_ops::select_constant(self, truthy, falsy)
516
}
517
518
/// Calculates the number of edges from `0 -> 1` and `1 -> 0`.
519
pub fn num_edges(&self) -> usize {
520
super::bitmap_ops::num_edges(self)
521
}
522
523
/// Returns the number of zero bits from the start before a one bit is seen
524
pub fn leading_zeros(&self) -> usize {
525
utils::leading_zeros(&self.storage, self.offset, self.length)
526
}
527
/// Returns the number of one bits from the start before a zero bit is seen
528
pub fn leading_ones(&self) -> usize {
529
utils::leading_ones(&self.storage, self.offset, self.length)
530
}
531
/// Returns the number of zero bits from the back before a one bit is seen
532
pub fn trailing_zeros(&self) -> usize {
533
utils::trailing_zeros(&self.storage, self.offset, self.length)
534
}
535
/// Returns the number of one bits from the back before a zero bit is seen
536
pub fn trailing_ones(&self) -> usize {
537
utils::trailing_ones(&self.storage, self.offset, self.length)
538
}
539
540
/// Take all `0` bits at the start of the [`Bitmap`] before a `1` is seen, returning how many
541
/// bits were taken
542
pub fn take_leading_zeros(&mut self) -> usize {
543
if self
544
.lazy_unset_bits()
545
.is_some_and(|unset_bits| unset_bits == self.length)
546
{
547
let leading_zeros = self.length;
548
self.offset += self.length;
549
self.length = 0;
550
*self.unset_bit_count_cache.get_mut() = 0;
551
return leading_zeros;
552
}
553
554
let leading_zeros = self.leading_zeros();
555
self.offset += leading_zeros;
556
self.length -= leading_zeros;
557
if has_cached_unset_bit_count(*self.unset_bit_count_cache.get_mut()) {
558
*self.unset_bit_count_cache.get_mut() -= leading_zeros as u64;
559
}
560
leading_zeros
561
}
562
/// Take all `1` bits at the start of the [`Bitmap`] before a `0` is seen, returning how many
563
/// bits were taken
564
pub fn take_leading_ones(&mut self) -> usize {
565
if self
566
.lazy_unset_bits()
567
.is_some_and(|unset_bits| unset_bits == 0)
568
{
569
let leading_ones = self.length;
570
self.offset += self.length;
571
self.length = 0;
572
*self.unset_bit_count_cache.get_mut() = 0;
573
return leading_ones;
574
}
575
576
let leading_ones = self.leading_ones();
577
self.offset += leading_ones;
578
self.length -= leading_ones;
579
// @NOTE: the unset_bit_count_cache remains unchanged
580
leading_ones
581
}
582
/// Take all `0` bits at the back of the [`Bitmap`] before a `1` is seen, returning how many
583
/// bits were taken
584
pub fn take_trailing_zeros(&mut self) -> usize {
585
if self
586
.lazy_unset_bits()
587
.is_some_and(|unset_bits| unset_bits == self.length)
588
{
589
let trailing_zeros = self.length;
590
self.length = 0;
591
*self.unset_bit_count_cache.get_mut() = 0;
592
return trailing_zeros;
593
}
594
595
let trailing_zeros = self.trailing_zeros();
596
self.length -= trailing_zeros;
597
if has_cached_unset_bit_count(*self.unset_bit_count_cache.get_mut()) {
598
*self.unset_bit_count_cache.get_mut() -= trailing_zeros as u64;
599
}
600
trailing_zeros
601
}
602
/// Take all `1` bits at the back of the [`Bitmap`] before a `0` is seen, returning how many
603
/// bits were taken
604
pub fn take_trailing_ones(&mut self) -> usize {
605
if self
606
.lazy_unset_bits()
607
.is_some_and(|unset_bits| unset_bits == 0)
608
{
609
let trailing_ones = self.length;
610
self.length = 0;
611
*self.unset_bit_count_cache.get_mut() = 0;
612
return trailing_ones;
613
}
614
615
let trailing_ones = self.trailing_ones();
616
self.length -= trailing_ones;
617
// @NOTE: the unset_bit_count_cache remains unchanged
618
trailing_ones
619
}
620
}
621
622
impl<P: AsRef<[bool]>> From<P> for Bitmap {
623
fn from(slice: P) -> Self {
624
Self::from_trusted_len_iter(slice.as_ref().iter().copied())
625
}
626
}
627
628
impl FromIterator<bool> for Bitmap {
629
fn from_iter<I>(iter: I) -> Self
630
where
631
I: IntoIterator<Item = bool>,
632
{
633
MutableBitmap::from_iter(iter).into()
634
}
635
}
636
637
impl FromTrustedLenIterator<bool> for Bitmap {
638
fn from_iter_trusted_length<T: IntoIterator<Item = bool>>(iter: T) -> Self
639
where
640
T::IntoIter: TrustedLen,
641
{
642
MutableBitmap::from_trusted_len_iter(iter.into_iter()).into()
643
}
644
}
645
646
impl Bitmap {
647
/// Creates a new [`Bitmap`] from an iterator of booleans.
648
///
649
/// # Safety
650
/// The iterator must report an accurate length.
651
#[inline]
652
pub unsafe fn from_trusted_len_iter_unchecked<I: Iterator<Item = bool>>(iterator: I) -> Self {
653
MutableBitmap::from_trusted_len_iter_unchecked(iterator).into()
654
}
655
656
/// Creates a new [`Bitmap`] from an iterator of booleans.
657
#[inline]
658
pub fn from_trusted_len_iter<I: TrustedLen<Item = bool>>(iterator: I) -> Self {
659
MutableBitmap::from_trusted_len_iter(iterator).into()
660
}
661
662
/// Creates a new [`Bitmap`] from a fallible iterator of booleans.
663
#[inline]
664
pub fn try_from_trusted_len_iter<E, I: TrustedLen<Item = std::result::Result<bool, E>>>(
665
iterator: I,
666
) -> std::result::Result<Self, E> {
667
Ok(MutableBitmap::try_from_trusted_len_iter(iterator)?.into())
668
}
669
670
/// Creates a new [`Bitmap`] from a fallible iterator of booleans.
671
///
672
/// # Safety
673
/// The iterator must report an accurate length.
674
#[inline]
675
pub unsafe fn try_from_trusted_len_iter_unchecked<
676
E,
677
I: Iterator<Item = std::result::Result<bool, E>>,
678
>(
679
iterator: I,
680
) -> std::result::Result<Self, E> {
681
Ok(MutableBitmap::try_from_trusted_len_iter_unchecked(iterator)?.into())
682
}
683
}
684
685
impl<'a> IntoIterator for &'a Bitmap {
686
type Item = bool;
687
type IntoIter = BitmapIter<'a>;
688
689
fn into_iter(self) -> Self::IntoIter {
690
BitmapIter::<'a>::new(&self.storage, self.offset, self.length)
691
}
692
}
693
694
impl IntoIterator for Bitmap {
695
type Item = bool;
696
type IntoIter = IntoIter;
697
698
fn into_iter(self) -> Self::IntoIter {
699
IntoIter::new(self)
700
}
701
}
702
703
impl Splitable for Bitmap {
704
#[inline(always)]
705
fn check_bound(&self, offset: usize) -> bool {
706
offset <= self.len()
707
}
708
709
unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
710
if offset == 0 {
711
return (Self::new(), self.clone());
712
}
713
if offset == self.len() {
714
return (self.clone(), Self::new());
715
}
716
717
let ubcc = self.unset_bit_count_cache.load();
718
719
let lhs_length = offset;
720
let rhs_length = self.length - offset;
721
722
let mut lhs_ubcc = UNKNOWN_BIT_COUNT;
723
let mut rhs_ubcc = UNKNOWN_BIT_COUNT;
724
725
if has_cached_unset_bit_count(ubcc) {
726
if ubcc == 0 {
727
lhs_ubcc = 0;
728
rhs_ubcc = 0;
729
} else if ubcc == self.length as u64 {
730
lhs_ubcc = offset as u64;
731
rhs_ubcc = (self.length - offset) as u64;
732
} else {
733
// If we keep all but a small portion of the array it is worth
734
// doing an eager re-count since we can reuse the old count via the
735
// inclusion-exclusion principle.
736
let small_portion = (self.length / 4).max(32);
737
738
if lhs_length <= rhs_length {
739
if rhs_length + small_portion >= self.length {
740
let count = count_zeros(&self.storage, self.offset, lhs_length) as u64;
741
lhs_ubcc = count;
742
rhs_ubcc = ubcc - count;
743
}
744
} else if lhs_length + small_portion >= self.length {
745
let count = count_zeros(&self.storage, self.offset + offset, rhs_length) as u64;
746
lhs_ubcc = ubcc - count;
747
rhs_ubcc = count;
748
}
749
}
750
}
751
752
debug_assert!(lhs_ubcc == UNKNOWN_BIT_COUNT || lhs_ubcc <= ubcc);
753
debug_assert!(rhs_ubcc == UNKNOWN_BIT_COUNT || rhs_ubcc <= ubcc);
754
755
(
756
Self {
757
storage: self.storage.clone(),
758
offset: self.offset,
759
length: lhs_length,
760
unset_bit_count_cache: RelaxedCell::from(lhs_ubcc),
761
},
762
Self {
763
storage: self.storage.clone(),
764
offset: self.offset + offset,
765
length: rhs_length,
766
unset_bit_count_cache: RelaxedCell::from(rhs_ubcc),
767
},
768
)
769
}
770
}
771
772