Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-arrow/src/array/boolean/mod.rs
6939 views
1
use either::Either;
2
use polars_error::{PolarsResult, polars_bail};
3
4
use super::{Array, Splitable};
5
use crate::array::iterator::NonNullValuesIter;
6
use crate::bitmap::utils::{BitmapIter, ZipValidity};
7
use crate::bitmap::{Bitmap, MutableBitmap};
8
use crate::compute::utils::{combine_validities_and, combine_validities_or};
9
use crate::datatypes::{ArrowDataType, PhysicalType};
10
use crate::trusted_len::TrustedLen;
11
12
mod ffi;
13
pub(super) mod fmt;
14
mod from;
15
mod iterator;
16
mod mutable;
17
pub use mutable::*;
18
mod builder;
19
pub use builder::*;
20
#[cfg(feature = "proptest")]
21
pub mod proptest;
22
23
/// A [`BooleanArray`] is Arrow's semantically equivalent of an immutable `Vec<Option<bool>>`.
24
/// It implements [`Array`].
25
///
26
/// One way to think about a [`BooleanArray`] is `(DataType, Arc<Vec<u8>>, Option<Arc<Vec<u8>>>)`
27
/// where:
28
/// * the first item is the array's logical type
29
/// * the second is the immutable values
30
/// * the third is the immutable validity (whether a value is null or not as a bitmap).
31
///
32
/// The size of this struct is `O(1)`, as all data is stored behind an [`std::sync::Arc`].
33
/// # Example
34
/// ```
35
/// use polars_arrow::array::BooleanArray;
36
/// use polars_arrow::bitmap::Bitmap;
37
/// use polars_arrow::buffer::Buffer;
38
///
39
/// let array = BooleanArray::from([Some(true), None, Some(false)]);
40
/// assert_eq!(array.value(0), true);
41
/// assert_eq!(array.iter().collect::<Vec<_>>(), vec![Some(true), None, Some(false)]);
42
/// assert_eq!(array.values_iter().collect::<Vec<_>>(), vec![true, false, false]);
43
/// // the underlying representation
44
/// assert_eq!(array.values(), &Bitmap::from([true, false, false]));
45
/// assert_eq!(array.validity(), Some(&Bitmap::from([true, false, true])));
46
///
47
/// ```
48
#[derive(Clone)]
49
pub struct BooleanArray {
50
dtype: ArrowDataType,
51
values: Bitmap,
52
validity: Option<Bitmap>,
53
}
54
55
impl BooleanArray {
56
/// The canonical method to create a [`BooleanArray`] out of low-end APIs.
57
/// # Errors
58
/// This function errors iff:
59
/// * The validity is not `None` and its length is different from `values`'s length
60
/// * The `dtype`'s [`PhysicalType`] is not equal to [`PhysicalType::Boolean`].
61
pub fn try_new(
62
dtype: ArrowDataType,
63
values: Bitmap,
64
validity: Option<Bitmap>,
65
) -> PolarsResult<Self> {
66
if validity
67
.as_ref()
68
.is_some_and(|validity| validity.len() != values.len())
69
{
70
polars_bail!(ComputeError: "validity mask length must match the number of values")
71
}
72
73
if dtype.to_physical_type() != PhysicalType::Boolean {
74
polars_bail!(ComputeError: "BooleanArray can only be initialized with a DataType whose physical type is Boolean")
75
}
76
77
Ok(Self {
78
dtype,
79
values,
80
validity,
81
})
82
}
83
84
/// Alias to `Self::try_new().unwrap()`
85
pub fn new(dtype: ArrowDataType, values: Bitmap, validity: Option<Bitmap>) -> Self {
86
Self::try_new(dtype, values, validity).unwrap()
87
}
88
89
/// Returns an iterator over the optional values of this [`BooleanArray`].
90
#[inline]
91
pub fn iter(&self) -> ZipValidity<bool, BitmapIter<'_>, BitmapIter<'_>> {
92
ZipValidity::new_with_validity(self.values().iter(), self.validity())
93
}
94
95
/// Returns an iterator over the values of this [`BooleanArray`].
96
#[inline]
97
pub fn values_iter(&self) -> BitmapIter<'_> {
98
self.values().iter()
99
}
100
101
/// Returns an iterator of the non-null values.
102
#[inline]
103
pub fn non_null_values_iter(&self) -> NonNullValuesIter<'_, BooleanArray> {
104
NonNullValuesIter::new(self, self.validity())
105
}
106
107
/// Returns the length of this array
108
#[inline]
109
pub fn len(&self) -> usize {
110
self.values.len()
111
}
112
113
/// The values [`Bitmap`].
114
/// Values on null slots are undetermined (they can be anything).
115
#[inline]
116
pub fn values(&self) -> &Bitmap {
117
&self.values
118
}
119
120
/// Returns the optional validity.
121
#[inline]
122
pub fn validity(&self) -> Option<&Bitmap> {
123
self.validity.as_ref()
124
}
125
126
/// Returns the arrays' [`ArrowDataType`].
127
#[inline]
128
pub fn dtype(&self) -> &ArrowDataType {
129
&self.dtype
130
}
131
132
/// Returns the value at index `i`
133
/// # Panic
134
/// This function panics iff `i >= self.len()`.
135
#[inline]
136
pub fn value(&self, i: usize) -> bool {
137
self.values.get_bit(i)
138
}
139
140
/// Returns the element at index `i` as bool
141
///
142
/// # Safety
143
/// Caller must be sure that `i < self.len()`
144
#[inline]
145
pub unsafe fn value_unchecked(&self, i: usize) -> bool {
146
self.values.get_bit_unchecked(i)
147
}
148
149
/// Returns the element at index `i` or `None` if it is null
150
/// # Panics
151
/// iff `i >= self.len()`
152
#[inline]
153
pub fn get(&self, i: usize) -> Option<bool> {
154
if !self.is_null(i) {
155
// soundness: Array::is_null panics if i >= self.len
156
unsafe { Some(self.value_unchecked(i)) }
157
} else {
158
None
159
}
160
}
161
162
/// Slices this [`BooleanArray`].
163
/// # Implementation
164
/// This operation is `O(1)` as it amounts to increase up to two ref counts.
165
/// # Panic
166
/// This function panics iff `offset + length > self.len()`.
167
#[inline]
168
pub fn slice(&mut self, offset: usize, length: usize) {
169
assert!(
170
offset + length <= self.len(),
171
"the offset of the new Buffer cannot exceed the existing length"
172
);
173
unsafe { self.slice_unchecked(offset, length) }
174
}
175
176
/// Slices this [`BooleanArray`].
177
/// # Implementation
178
/// This operation is `O(1)` as it amounts to increase two ref counts.
179
///
180
/// # Safety
181
/// The caller must ensure that `offset + length <= self.len()`.
182
#[inline]
183
pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
184
self.validity = self
185
.validity
186
.take()
187
.map(|bitmap| bitmap.sliced_unchecked(offset, length))
188
.filter(|bitmap| bitmap.unset_bits() > 0);
189
self.values.slice_unchecked(offset, length);
190
}
191
192
impl_sliced!();
193
impl_mut_validity!();
194
impl_into_array!();
195
196
/// Returns a clone of this [`BooleanArray`] with new values.
197
/// # Panics
198
/// This function panics iff `values.len() != self.len()`.
199
#[must_use]
200
pub fn with_values(&self, values: Bitmap) -> Self {
201
let mut out = self.clone();
202
out.set_values(values);
203
out
204
}
205
206
/// Sets the values of this [`BooleanArray`].
207
/// # Panics
208
/// This function panics iff `values.len() != self.len()`.
209
pub fn set_values(&mut self, values: Bitmap) {
210
assert_eq!(
211
values.len(),
212
self.len(),
213
"values length must be equal to this arrays length"
214
);
215
self.values = values;
216
}
217
218
/// Applies a function `f` to the values of this array, cloning the values
219
/// iff they are being shared with others
220
///
221
/// This is an API to use clone-on-write
222
/// # Implementation
223
/// This function is `O(f)` if the data is not being shared, and `O(N) + O(f)`
224
/// if it is being shared (since it results in a `O(N)` memcopy).
225
/// # Panics
226
/// This function panics if the function modifies the length of the [`MutableBitmap`].
227
pub fn apply_values_mut<F: Fn(&mut MutableBitmap)>(&mut self, f: F) {
228
let values = std::mem::take(&mut self.values);
229
let mut values = values.make_mut();
230
f(&mut values);
231
if let Some(validity) = &self.validity {
232
assert_eq!(validity.len(), values.len());
233
}
234
self.values = values.into();
235
}
236
237
/// Try to convert this [`BooleanArray`] to a [`MutableBooleanArray`]
238
pub fn into_mut(self) -> Either<Self, MutableBooleanArray> {
239
use Either::*;
240
241
if let Some(bitmap) = self.validity {
242
match bitmap.into_mut() {
243
Left(bitmap) => Left(BooleanArray::new(self.dtype, self.values, Some(bitmap))),
244
Right(mutable_bitmap) => match self.values.into_mut() {
245
Left(immutable) => Left(BooleanArray::new(
246
self.dtype,
247
immutable,
248
Some(mutable_bitmap.into()),
249
)),
250
Right(mutable) => Right(
251
MutableBooleanArray::try_new(self.dtype, mutable, Some(mutable_bitmap))
252
.unwrap(),
253
),
254
},
255
}
256
} else {
257
match self.values.into_mut() {
258
Left(immutable) => Left(BooleanArray::new(self.dtype, immutable, None)),
259
Right(mutable) => {
260
Right(MutableBooleanArray::try_new(self.dtype, mutable, None).unwrap())
261
},
262
}
263
}
264
}
265
266
/// Returns a new empty [`BooleanArray`].
267
pub fn new_empty(dtype: ArrowDataType) -> Self {
268
Self::new(dtype, Bitmap::new(), None)
269
}
270
271
/// Returns a new [`BooleanArray`] whose all slots are null / `None`.
272
pub fn new_null(dtype: ArrowDataType, length: usize) -> Self {
273
let bitmap = Bitmap::new_zeroed(length);
274
Self::new(dtype, bitmap.clone(), Some(bitmap))
275
}
276
277
/// Creates a new [`BooleanArray`] from an [`TrustedLen`] of `bool`.
278
#[inline]
279
pub fn from_trusted_len_values_iter<I: TrustedLen<Item = bool>>(iterator: I) -> Self {
280
MutableBooleanArray::from_trusted_len_values_iter(iterator).into()
281
}
282
283
/// Creates a new [`BooleanArray`] from an [`TrustedLen`] of `bool`.
284
/// Use this over [`BooleanArray::from_trusted_len_iter`] when the iterator is trusted len
285
/// but this crate does not mark it as such.
286
///
287
/// # Safety
288
/// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
289
/// I.e. that `size_hint().1` correctly reports its length.
290
#[inline]
291
pub unsafe fn from_trusted_len_values_iter_unchecked<I: Iterator<Item = bool>>(
292
iterator: I,
293
) -> Self {
294
MutableBooleanArray::from_trusted_len_values_iter_unchecked(iterator).into()
295
}
296
297
/// Creates a new [`BooleanArray`] from a slice of `bool`.
298
#[inline]
299
pub fn from_slice<P: AsRef<[bool]>>(slice: P) -> Self {
300
MutableBooleanArray::from_slice(slice).into()
301
}
302
303
/// Creates a [`BooleanArray`] from an iterator of trusted length.
304
/// Use this over [`BooleanArray::from_trusted_len_iter`] when the iterator is trusted len
305
/// but this crate does not mark it as such.
306
///
307
/// # Safety
308
/// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
309
/// I.e. that `size_hint().1` correctly reports its length.
310
#[inline]
311
pub unsafe fn from_trusted_len_iter_unchecked<I, P>(iterator: I) -> Self
312
where
313
P: std::borrow::Borrow<bool>,
314
I: Iterator<Item = Option<P>>,
315
{
316
MutableBooleanArray::from_trusted_len_iter_unchecked(iterator).into()
317
}
318
319
/// Creates a [`BooleanArray`] from a [`TrustedLen`].
320
#[inline]
321
pub fn from_trusted_len_iter<I, P>(iterator: I) -> Self
322
where
323
P: std::borrow::Borrow<bool>,
324
I: TrustedLen<Item = Option<P>>,
325
{
326
MutableBooleanArray::from_trusted_len_iter(iterator).into()
327
}
328
329
/// Creates a [`BooleanArray`] from an falible iterator of trusted length.
330
///
331
/// # Safety
332
/// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
333
/// I.e. that `size_hint().1` correctly reports its length.
334
#[inline]
335
pub unsafe fn try_from_trusted_len_iter_unchecked<E, I, P>(iterator: I) -> Result<Self, E>
336
where
337
P: std::borrow::Borrow<bool>,
338
I: Iterator<Item = Result<Option<P>, E>>,
339
{
340
Ok(MutableBooleanArray::try_from_trusted_len_iter_unchecked(iterator)?.into())
341
}
342
343
/// Creates a [`BooleanArray`] from a [`TrustedLen`].
344
#[inline]
345
pub fn try_from_trusted_len_iter<E, I, P>(iterator: I) -> Result<Self, E>
346
where
347
P: std::borrow::Borrow<bool>,
348
I: TrustedLen<Item = Result<Option<P>, E>>,
349
{
350
Ok(MutableBooleanArray::try_from_trusted_len_iter(iterator)?.into())
351
}
352
353
pub fn true_and_valid(&self) -> Bitmap {
354
match &self.validity {
355
None => self.values.clone(),
356
Some(validity) => combine_validities_and(Some(&self.values), Some(validity)).unwrap(),
357
}
358
}
359
360
pub fn true_or_valid(&self) -> Bitmap {
361
match &self.validity {
362
None => self.values.clone(),
363
Some(validity) => combine_validities_or(Some(&self.values), Some(validity)).unwrap(),
364
}
365
}
366
367
/// Returns its internal representation
368
#[must_use]
369
pub fn into_inner(self) -> (ArrowDataType, Bitmap, Option<Bitmap>) {
370
let Self {
371
dtype,
372
values,
373
validity,
374
} = self;
375
(dtype, values, validity)
376
}
377
378
/// Creates a [`BooleanArray`] from its internal representation.
379
/// This is the inverted from [`BooleanArray::into_inner`]
380
///
381
/// # Safety
382
/// Callers must ensure all invariants of this struct are upheld.
383
pub unsafe fn from_inner_unchecked(
384
dtype: ArrowDataType,
385
values: Bitmap,
386
validity: Option<Bitmap>,
387
) -> Self {
388
Self {
389
dtype,
390
values,
391
validity,
392
}
393
}
394
}
395
396
impl Array for BooleanArray {
397
impl_common_array!();
398
399
fn validity(&self) -> Option<&Bitmap> {
400
self.validity.as_ref()
401
}
402
403
#[inline]
404
fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
405
Box::new(self.clone().with_validity(validity))
406
}
407
}
408
409
impl Splitable for BooleanArray {
410
fn check_bound(&self, offset: usize) -> bool {
411
offset <= self.len()
412
}
413
414
unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
415
let (lhs_values, rhs_values) = unsafe { self.values.split_at_unchecked(offset) };
416
let (lhs_validity, rhs_validity) = unsafe { self.validity.split_at_unchecked(offset) };
417
418
(
419
Self {
420
dtype: self.dtype.clone(),
421
values: lhs_values,
422
validity: lhs_validity,
423
},
424
Self {
425
dtype: self.dtype.clone(),
426
values: rhs_values,
427
validity: rhs_validity,
428
},
429
)
430
}
431
}
432
433
impl From<Bitmap> for BooleanArray {
434
fn from(values: Bitmap) -> Self {
435
Self {
436
dtype: ArrowDataType::Boolean,
437
values,
438
validity: None,
439
}
440
}
441
}
442
443