Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-arrow/src/array/boolean/mutable.rs
6939 views
1
use std::sync::Arc;
2
3
use polars_error::{PolarsResult, polars_bail};
4
5
use super::BooleanArray;
6
use crate::array::physical_binary::extend_validity;
7
use crate::array::{Array, MutableArray, TryExtend, TryExtendFromSelf, TryPush};
8
use crate::bitmap::MutableBitmap;
9
use crate::datatypes::{ArrowDataType, PhysicalType};
10
use crate::trusted_len::TrustedLen;
11
12
/// The Arrow's equivalent to `Vec<Option<bool>>`, but with `1/16` of its size.
13
/// Converting a [`MutableBooleanArray`] into a [`BooleanArray`] is `O(1)`.
14
/// # Implementation
15
/// This struct does not allocate a validity until one is required (i.e. push a null to it).
16
#[derive(Debug, Clone)]
17
pub struct MutableBooleanArray {
18
dtype: ArrowDataType,
19
values: MutableBitmap,
20
validity: Option<MutableBitmap>,
21
}
22
23
impl From<MutableBooleanArray> for BooleanArray {
24
fn from(other: MutableBooleanArray) -> Self {
25
BooleanArray::new(
26
other.dtype,
27
other.values.into(),
28
other.validity.map(|x| x.into()),
29
)
30
}
31
}
32
33
impl<P: AsRef<[Option<bool>]>> From<P> for MutableBooleanArray {
34
/// Creates a new [`MutableBooleanArray`] out of a slice of Optional `bool`.
35
fn from(slice: P) -> Self {
36
Self::from_trusted_len_iter(slice.as_ref().iter().map(|x| x.as_ref()))
37
}
38
}
39
40
impl Default for MutableBooleanArray {
41
fn default() -> Self {
42
Self::new()
43
}
44
}
45
46
impl MutableBooleanArray {
47
/// Creates an new empty [`MutableBooleanArray`].
48
pub fn new() -> Self {
49
Self::with_capacity(0)
50
}
51
52
/// The canonical method to create a [`MutableBooleanArray`] out of low-end APIs.
53
/// # Errors
54
/// This function errors iff:
55
/// * The validity is not `None` and its length is different from `values`'s length
56
/// * The `dtype`'s [`PhysicalType`] is not equal to [`PhysicalType::Boolean`].
57
pub fn try_new(
58
dtype: ArrowDataType,
59
values: MutableBitmap,
60
validity: Option<MutableBitmap>,
61
) -> PolarsResult<Self> {
62
if validity
63
.as_ref()
64
.is_some_and(|validity| validity.len() != values.len())
65
{
66
polars_bail!(ComputeError:
67
"validity mask length must match the number of values",
68
)
69
}
70
71
if dtype.to_physical_type() != PhysicalType::Boolean {
72
polars_bail!(
73
oos = "MutableBooleanArray can only be initialized with a DataType whose physical type is Boolean",
74
)
75
}
76
77
Ok(Self {
78
dtype,
79
values,
80
validity,
81
})
82
}
83
84
/// Creates an new [`MutableBooleanArray`] with a capacity of values.
85
pub fn with_capacity(capacity: usize) -> Self {
86
Self {
87
dtype: ArrowDataType::Boolean,
88
values: MutableBitmap::with_capacity(capacity),
89
validity: None,
90
}
91
}
92
93
/// Reserves `additional` slots.
94
pub fn reserve(&mut self, additional: usize) {
95
self.values.reserve(additional);
96
if let Some(x) = self.validity.as_mut() {
97
x.reserve(additional)
98
}
99
}
100
101
#[inline]
102
pub fn push_value(&mut self, value: bool) {
103
self.values.push(value);
104
if let Some(validity) = &mut self.validity {
105
validity.push(true)
106
}
107
}
108
109
#[inline]
110
pub fn push_null(&mut self) {
111
self.values.push(false);
112
match &mut self.validity {
113
Some(validity) => validity.push(false),
114
None => self.init_validity(),
115
}
116
}
117
118
/// Pushes a new entry to [`MutableBooleanArray`].
119
#[inline]
120
pub fn push(&mut self, value: Option<bool>) {
121
match value {
122
Some(value) => self.push_value(value),
123
None => self.push_null(),
124
}
125
}
126
127
/// Pop an entry from [`MutableBooleanArray`].
128
/// Note If the values is empty, this method will return None.
129
pub fn pop(&mut self) -> Option<bool> {
130
let value = self.values.pop()?;
131
self.validity
132
.as_mut()
133
.map(|x| x.pop()?.then(|| value))
134
.unwrap_or_else(|| Some(value))
135
}
136
137
/// Extends the [`MutableBooleanArray`] from an iterator of values of trusted len.
138
/// This differs from `extend_trusted_len` which accepts in iterator of optional values.
139
#[inline]
140
pub fn extend_trusted_len_values<I>(&mut self, iterator: I)
141
where
142
I: TrustedLen<Item = bool>,
143
{
144
// SAFETY: `I` is `TrustedLen`
145
unsafe { self.extend_trusted_len_values_unchecked(iterator) }
146
}
147
148
/// Extends the [`MutableBooleanArray`] from an iterator of values of trusted len.
149
/// This differs from `extend_trusted_len_unchecked`, which accepts in iterator of optional values.
150
///
151
/// # Safety
152
/// The iterator must be trusted len.
153
#[inline]
154
pub unsafe fn extend_trusted_len_values_unchecked<I>(&mut self, iterator: I)
155
where
156
I: Iterator<Item = bool>,
157
{
158
let (_, upper) = iterator.size_hint();
159
let additional =
160
upper.expect("extend_trusted_len_values_unchecked requires an upper limit");
161
162
if let Some(validity) = self.validity.as_mut() {
163
validity.extend_constant(additional, true);
164
}
165
166
self.values.extend_from_trusted_len_iter_unchecked(iterator)
167
}
168
169
/// Extends the [`MutableBooleanArray`] from an iterator of trusted len.
170
#[inline]
171
pub fn extend_trusted_len<I, P>(&mut self, iterator: I)
172
where
173
P: std::borrow::Borrow<bool>,
174
I: TrustedLen<Item = Option<P>>,
175
{
176
// SAFETY: `I` is `TrustedLen`
177
unsafe { self.extend_trusted_len_unchecked(iterator) }
178
}
179
180
/// Extends the [`MutableBooleanArray`] from an iterator of trusted len.
181
///
182
/// # Safety
183
/// The iterator must be trusted len.
184
#[inline]
185
pub unsafe fn extend_trusted_len_unchecked<I, P>(&mut self, iterator: I)
186
where
187
P: std::borrow::Borrow<bool>,
188
I: Iterator<Item = Option<P>>,
189
{
190
if let Some(validity) = self.validity.as_mut() {
191
extend_trusted_len_unzip(iterator, validity, &mut self.values);
192
} else {
193
let mut validity = MutableBitmap::new();
194
validity.extend_constant(self.len(), true);
195
196
extend_trusted_len_unzip(iterator, &mut validity, &mut self.values);
197
198
if validity.unset_bits() > 0 {
199
self.validity = Some(validity);
200
}
201
}
202
}
203
204
/// Extends `MutableBooleanArray` by additional values of constant value.
205
#[inline]
206
pub fn extend_constant(&mut self, additional: usize, value: Option<bool>) {
207
match value {
208
Some(value) => {
209
self.values.extend_constant(additional, value);
210
if let Some(validity) = self.validity.as_mut() {
211
validity.extend_constant(additional, true);
212
}
213
},
214
None => {
215
self.values.extend_constant(additional, false);
216
if let Some(validity) = self.validity.as_mut() {
217
validity.extend_constant(additional, false)
218
} else {
219
self.init_validity();
220
self.validity
221
.as_mut()
222
.unwrap()
223
.extend_constant(additional, false)
224
};
225
},
226
};
227
}
228
229
fn init_validity(&mut self) {
230
let mut validity = MutableBitmap::with_capacity(self.values.capacity());
231
validity.extend_constant(self.len(), true);
232
validity.set(self.len() - 1, false);
233
self.validity = Some(validity)
234
}
235
236
/// Converts itself into an [`Array`].
237
pub fn into_arc(self) -> Arc<dyn Array> {
238
let a: BooleanArray = self.into();
239
Arc::new(a)
240
}
241
242
pub fn freeze(self) -> BooleanArray {
243
self.into()
244
}
245
}
246
247
/// Getters
248
impl MutableBooleanArray {
249
/// Returns its values.
250
pub fn values(&self) -> &MutableBitmap {
251
&self.values
252
}
253
}
254
255
/// Setters
256
impl MutableBooleanArray {
257
/// Sets position `index` to `value`.
258
/// Note that if it is the first time a null appears in this array,
259
/// this initializes the validity bitmap (`O(N)`).
260
/// # Panic
261
/// Panics iff index is larger than `self.len()`.
262
pub fn set(&mut self, index: usize, value: Option<bool>) {
263
self.values.set(index, value.unwrap_or_default());
264
265
if value.is_none() && self.validity.is_none() {
266
// When the validity is None, all elements so far are valid. When one of the elements is set of null,
267
// the validity must be initialized.
268
self.validity = Some(MutableBitmap::from_trusted_len_iter(std::iter::repeat_n(
269
true,
270
self.len(),
271
)));
272
}
273
if let Some(x) = self.validity.as_mut() {
274
x.set(index, value.is_some())
275
}
276
}
277
}
278
279
/// From implementations
280
impl MutableBooleanArray {
281
/// Creates a new [`MutableBooleanArray`] from an [`TrustedLen`] of `bool`.
282
#[inline]
283
pub fn from_trusted_len_values_iter<I: TrustedLen<Item = bool>>(iterator: I) -> Self {
284
Self::try_new(
285
ArrowDataType::Boolean,
286
MutableBitmap::from_trusted_len_iter(iterator),
287
None,
288
)
289
.unwrap()
290
}
291
292
/// Creates a new [`MutableBooleanArray`] from an [`TrustedLen`] of `bool`.
293
/// Use this over [`BooleanArray::from_trusted_len_iter`] when the iterator is trusted len
294
/// but this crate does not mark it as such.
295
///
296
/// # Safety
297
/// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
298
/// I.e. that `size_hint().1` correctly reports its length.
299
#[inline]
300
pub unsafe fn from_trusted_len_values_iter_unchecked<I: Iterator<Item = bool>>(
301
iterator: I,
302
) -> Self {
303
let mut mutable = MutableBitmap::new();
304
mutable.extend_from_trusted_len_iter_unchecked(iterator);
305
MutableBooleanArray::try_new(ArrowDataType::Boolean, mutable, None).unwrap()
306
}
307
308
/// Creates a new [`MutableBooleanArray`] from a slice of `bool`.
309
#[inline]
310
pub fn from_slice<P: AsRef<[bool]>>(slice: P) -> Self {
311
Self::from_trusted_len_values_iter(slice.as_ref().iter().copied())
312
}
313
314
/// Creates a [`BooleanArray`] from an iterator of trusted length.
315
/// Use this over [`BooleanArray::from_trusted_len_iter`] when the iterator is trusted len
316
/// but this crate does not mark it as such.
317
///
318
/// # Safety
319
/// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
320
/// I.e. that `size_hint().1` correctly reports its length.
321
#[inline]
322
pub unsafe fn from_trusted_len_iter_unchecked<I, P>(iterator: I) -> Self
323
where
324
P: std::borrow::Borrow<bool>,
325
I: Iterator<Item = Option<P>>,
326
{
327
let (validity, values) = trusted_len_unzip(iterator);
328
329
Self::try_new(ArrowDataType::Boolean, values, validity).unwrap()
330
}
331
332
/// Creates a [`BooleanArray`] from a [`TrustedLen`].
333
#[inline]
334
pub fn from_trusted_len_iter<I, P>(iterator: I) -> Self
335
where
336
P: std::borrow::Borrow<bool>,
337
I: TrustedLen<Item = Option<P>>,
338
{
339
// SAFETY: `I` is `TrustedLen`
340
unsafe { Self::from_trusted_len_iter_unchecked(iterator) }
341
}
342
343
/// Creates a [`BooleanArray`] from an falible iterator of trusted length.
344
///
345
/// # Safety
346
/// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
347
/// I.e. that `size_hint().1` correctly reports its length.
348
#[inline]
349
pub unsafe fn try_from_trusted_len_iter_unchecked<E, I, P>(
350
iterator: I,
351
) -> std::result::Result<Self, E>
352
where
353
P: std::borrow::Borrow<bool>,
354
I: Iterator<Item = std::result::Result<Option<P>, E>>,
355
{
356
let (validity, values) = try_trusted_len_unzip(iterator)?;
357
358
let validity = if validity.unset_bits() > 0 {
359
Some(validity)
360
} else {
361
None
362
};
363
364
Ok(Self::try_new(ArrowDataType::Boolean, values, validity).unwrap())
365
}
366
367
/// Creates a [`BooleanArray`] from a [`TrustedLen`].
368
#[inline]
369
pub fn try_from_trusted_len_iter<E, I, P>(iterator: I) -> std::result::Result<Self, E>
370
where
371
P: std::borrow::Borrow<bool>,
372
I: TrustedLen<Item = std::result::Result<Option<P>, E>>,
373
{
374
// SAFETY: `I` is `TrustedLen`
375
unsafe { Self::try_from_trusted_len_iter_unchecked(iterator) }
376
}
377
378
/// Shrinks the capacity of the [`MutableBooleanArray`] to fit its current length.
379
pub fn shrink_to_fit(&mut self) {
380
self.values.shrink_to_fit();
381
if let Some(validity) = &mut self.validity {
382
validity.shrink_to_fit()
383
}
384
}
385
}
386
387
/// Creates a Bitmap and an optional [`MutableBitmap`] from an iterator of `Option<bool>`.
388
/// The first buffer corresponds to a bitmap buffer, the second one
389
/// corresponds to a values buffer.
390
/// # Safety
391
/// The caller must ensure that `iterator` is `TrustedLen`.
392
#[inline]
393
pub(crate) unsafe fn trusted_len_unzip<I, P>(iterator: I) -> (Option<MutableBitmap>, MutableBitmap)
394
where
395
P: std::borrow::Borrow<bool>,
396
I: Iterator<Item = Option<P>>,
397
{
398
let mut validity = MutableBitmap::new();
399
let mut values = MutableBitmap::new();
400
401
extend_trusted_len_unzip(iterator, &mut validity, &mut values);
402
403
let validity = if validity.unset_bits() > 0 {
404
Some(validity)
405
} else {
406
None
407
};
408
409
(validity, values)
410
}
411
412
/// Extends validity [`MutableBitmap`] and values [`MutableBitmap`] from an iterator of `Option`.
413
/// # Safety
414
/// The caller must ensure that `iterator` is `TrustedLen`.
415
#[inline]
416
pub(crate) unsafe fn extend_trusted_len_unzip<I, P>(
417
iterator: I,
418
validity: &mut MutableBitmap,
419
values: &mut MutableBitmap,
420
) where
421
P: std::borrow::Borrow<bool>,
422
I: Iterator<Item = Option<P>>,
423
{
424
let (_, upper) = iterator.size_hint();
425
let additional = upper.expect("extend_trusted_len_unzip requires an upper limit");
426
427
// Length of the array before new values are pushed,
428
// variable created for assertion post operation
429
let pre_length = values.len();
430
431
validity.reserve(additional);
432
values.reserve(additional);
433
434
for item in iterator {
435
let item = if let Some(item) = item {
436
validity.push_unchecked(true);
437
*item.borrow()
438
} else {
439
validity.push_unchecked(false);
440
bool::default()
441
};
442
values.push_unchecked(item);
443
}
444
445
debug_assert_eq!(
446
values.len(),
447
pre_length + additional,
448
"Trusted iterator length was not accurately reported"
449
);
450
}
451
452
/// # Safety
453
/// The caller must ensure that `iterator` is `TrustedLen`.
454
#[inline]
455
pub(crate) unsafe fn try_trusted_len_unzip<E, I, P>(
456
iterator: I,
457
) -> std::result::Result<(MutableBitmap, MutableBitmap), E>
458
where
459
P: std::borrow::Borrow<bool>,
460
I: Iterator<Item = std::result::Result<Option<P>, E>>,
461
{
462
let (_, upper) = iterator.size_hint();
463
let len = upper.expect("trusted_len_unzip requires an upper limit");
464
465
let mut null = MutableBitmap::with_capacity(len);
466
let mut values = MutableBitmap::with_capacity(len);
467
468
for item in iterator {
469
let item = if let Some(item) = item? {
470
null.push(true);
471
*item.borrow()
472
} else {
473
null.push(false);
474
false
475
};
476
values.push(item);
477
}
478
assert_eq!(
479
values.len(),
480
len,
481
"Trusted iterator length was not accurately reported"
482
);
483
values.set_len(len);
484
null.set_len(len);
485
486
Ok((null, values))
487
}
488
489
impl<Ptr: std::borrow::Borrow<Option<bool>>> FromIterator<Ptr> for MutableBooleanArray {
490
fn from_iter<I: IntoIterator<Item = Ptr>>(iter: I) -> Self {
491
let iter = iter.into_iter();
492
let (lower, _) = iter.size_hint();
493
494
let mut validity = MutableBitmap::with_capacity(lower);
495
496
let values: MutableBitmap = iter
497
.map(|item| {
498
if let Some(a) = item.borrow() {
499
validity.push(true);
500
*a
501
} else {
502
validity.push(false);
503
false
504
}
505
})
506
.collect();
507
508
let validity = if validity.unset_bits() > 0 {
509
Some(validity)
510
} else {
511
None
512
};
513
514
MutableBooleanArray::try_new(ArrowDataType::Boolean, values, validity).unwrap()
515
}
516
}
517
518
impl MutableArray for MutableBooleanArray {
519
fn len(&self) -> usize {
520
self.values.len()
521
}
522
523
fn validity(&self) -> Option<&MutableBitmap> {
524
self.validity.as_ref()
525
}
526
527
fn as_box(&mut self) -> Box<dyn Array> {
528
let array: BooleanArray = std::mem::take(self).into();
529
array.boxed()
530
}
531
532
fn as_arc(&mut self) -> Arc<dyn Array> {
533
let array: BooleanArray = std::mem::take(self).into();
534
array.arced()
535
}
536
537
fn dtype(&self) -> &ArrowDataType {
538
&self.dtype
539
}
540
541
fn as_any(&self) -> &dyn std::any::Any {
542
self
543
}
544
545
fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
546
self
547
}
548
549
#[inline]
550
fn push_null(&mut self) {
551
self.push(None)
552
}
553
554
fn reserve(&mut self, additional: usize) {
555
self.reserve(additional)
556
}
557
558
fn shrink_to_fit(&mut self) {
559
self.shrink_to_fit()
560
}
561
}
562
563
impl Extend<Option<bool>> for MutableBooleanArray {
564
fn extend<I: IntoIterator<Item = Option<bool>>>(&mut self, iter: I) {
565
let iter = iter.into_iter();
566
self.reserve(iter.size_hint().0);
567
iter.for_each(|x| self.push(x))
568
}
569
}
570
571
impl TryExtend<Option<bool>> for MutableBooleanArray {
572
/// This is infalible and is implemented for consistency with all other types
573
fn try_extend<I: IntoIterator<Item = Option<bool>>>(&mut self, iter: I) -> PolarsResult<()> {
574
self.extend(iter);
575
Ok(())
576
}
577
}
578
579
impl TryPush<Option<bool>> for MutableBooleanArray {
580
/// This is infalible and is implemented for consistency with all other types
581
fn try_push(&mut self, item: Option<bool>) -> PolarsResult<()> {
582
self.push(item);
583
Ok(())
584
}
585
}
586
587
impl PartialEq for MutableBooleanArray {
588
fn eq(&self, other: &Self) -> bool {
589
self.iter().eq(other.iter())
590
}
591
}
592
593
impl TryExtendFromSelf for MutableBooleanArray {
594
fn try_extend_from_self(&mut self, other: &Self) -> PolarsResult<()> {
595
extend_validity(self.len(), &mut self.validity, &other.validity);
596
597
let slice = other.values.as_slice();
598
// SAFETY: invariant offset + length <= slice.len()
599
unsafe {
600
self.values
601
.extend_from_slice_unchecked(slice, 0, other.values.len());
602
}
603
Ok(())
604
}
605
}
606
607