Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-arrow/src/array/primitive/mutable.rs
6939 views
1
use std::sync::Arc;
2
3
use polars_error::PolarsResult;
4
5
use super::{PrimitiveArray, check};
6
use crate::array::physical_binary::extend_validity;
7
use crate::array::{Array, MutableArray, TryExtend, TryExtendFromSelf, TryPush};
8
use crate::bitmap::{Bitmap, MutableBitmap};
9
use crate::datatypes::ArrowDataType;
10
use crate::trusted_len::TrustedLen;
11
use crate::types::NativeType;
12
13
/// The Arrow's equivalent to `Vec<Option<T>>` where `T` is byte-size (e.g. `i32`).
14
/// Converting a [`MutablePrimitiveArray`] into a [`PrimitiveArray`] is `O(1)`.
15
#[derive(Debug, Clone)]
16
pub struct MutablePrimitiveArray<T: NativeType> {
17
dtype: ArrowDataType,
18
values: Vec<T>,
19
validity: Option<MutableBitmap>,
20
}
21
22
impl<T: NativeType> From<MutablePrimitiveArray<T>> for PrimitiveArray<T> {
23
fn from(other: MutablePrimitiveArray<T>) -> Self {
24
let validity = other.validity.and_then(|x| {
25
let bitmap: Bitmap = x.into();
26
if bitmap.unset_bits() == 0 {
27
None
28
} else {
29
Some(bitmap)
30
}
31
});
32
33
PrimitiveArray::<T>::new(other.dtype, other.values.into(), validity)
34
}
35
}
36
37
impl<T: NativeType, P: AsRef<[Option<T>]>> From<P> for MutablePrimitiveArray<T> {
38
fn from(slice: P) -> Self {
39
Self::from_trusted_len_iter(slice.as_ref().iter().map(|x| x.as_ref()))
40
}
41
}
42
43
impl<T: NativeType> MutablePrimitiveArray<T> {
44
/// Creates a new empty [`MutablePrimitiveArray`].
45
pub fn new() -> Self {
46
Self::with_capacity(0)
47
}
48
49
/// Creates a new [`MutablePrimitiveArray`] with a capacity.
50
pub fn with_capacity(capacity: usize) -> Self {
51
Self::with_capacity_from(capacity, T::PRIMITIVE.into())
52
}
53
54
/// The canonical method to create a [`MutablePrimitiveArray`] out of its internal components.
55
/// # Implementation
56
/// This function is `O(1)`.
57
///
58
/// # Errors
59
/// This function errors iff:
60
/// * The validity is not `None` and its length is different from `values`'s length
61
/// * The `dtype`'s [`crate::datatypes::PhysicalType`] is not equal to [`crate::datatypes::PhysicalType::Primitive(T::PRIMITIVE)`]
62
pub fn try_new(
63
dtype: ArrowDataType,
64
values: Vec<T>,
65
validity: Option<MutableBitmap>,
66
) -> PolarsResult<Self> {
67
check(&dtype, &values, validity.as_ref().map(|x| x.len()))?;
68
Ok(Self {
69
dtype,
70
values,
71
validity,
72
})
73
}
74
75
/// Extract the low-end APIs from the [`MutablePrimitiveArray`].
76
pub fn into_inner(self) -> (ArrowDataType, Vec<T>, Option<MutableBitmap>) {
77
(self.dtype, self.values, self.validity)
78
}
79
80
/// Applies a function `f` to the values of this array, cloning the values
81
/// iff they are being shared with others
82
///
83
/// This is an API to use clone-on-write
84
/// # Implementation
85
/// This function is `O(f)` if the data is not being shared, and `O(N) + O(f)`
86
/// if it is being shared (since it results in a `O(N)` memcopy).
87
/// # Panics
88
/// This function panics iff `f` panics
89
pub fn apply_values<F: Fn(&mut [T])>(&mut self, f: F) {
90
f(&mut self.values);
91
}
92
}
93
94
impl<T: NativeType> Default for MutablePrimitiveArray<T> {
95
fn default() -> Self {
96
Self::new()
97
}
98
}
99
100
impl<T: NativeType> From<ArrowDataType> for MutablePrimitiveArray<T> {
101
fn from(dtype: ArrowDataType) -> Self {
102
assert!(dtype.to_physical_type().eq_primitive(T::PRIMITIVE));
103
Self {
104
dtype,
105
values: Vec::<T>::new(),
106
validity: None,
107
}
108
}
109
}
110
111
impl<T: NativeType> MutablePrimitiveArray<T> {
112
/// Creates a new [`MutablePrimitiveArray`] from a capacity and [`ArrowDataType`].
113
pub fn with_capacity_from(capacity: usize, dtype: ArrowDataType) -> Self {
114
assert!(dtype.to_physical_type().eq_primitive(T::PRIMITIVE));
115
Self {
116
dtype,
117
values: Vec::<T>::with_capacity(capacity),
118
validity: None,
119
}
120
}
121
122
/// Reserves `additional` entries.
123
pub fn reserve(&mut self, additional: usize) {
124
self.values.reserve(additional);
125
if let Some(x) = self.validity.as_mut() {
126
x.reserve(additional)
127
}
128
}
129
130
#[inline]
131
pub fn push_value(&mut self, value: T) {
132
self.values.push(value);
133
if let Some(validity) = &mut self.validity {
134
validity.push(true)
135
}
136
}
137
138
/// Adds a new value to the array.
139
#[inline]
140
pub fn push(&mut self, value: Option<T>) {
141
match value {
142
Some(value) => self.push_value(value),
143
None => {
144
self.values.push(T::default());
145
match &mut self.validity {
146
Some(validity) => validity.push(false),
147
None => {
148
self.init_validity();
149
},
150
}
151
},
152
}
153
}
154
155
/// Pop a value from the array.
156
/// Note if the values is empty, this method will return None.
157
pub fn pop(&mut self) -> Option<T> {
158
let value = self.values.pop()?;
159
self.validity
160
.as_mut()
161
.map(|x| x.pop()?.then(|| value))
162
.unwrap_or_else(|| Some(value))
163
}
164
165
/// Extends the [`MutablePrimitiveArray`] with a constant
166
#[inline]
167
pub fn extend_constant(&mut self, additional: usize, value: Option<T>) {
168
if let Some(value) = value {
169
self.values.resize(self.values.len() + additional, value);
170
if let Some(validity) = &mut self.validity {
171
validity.extend_constant(additional, true)
172
}
173
} else {
174
if let Some(validity) = &mut self.validity {
175
validity.extend_constant(additional, false)
176
} else {
177
let mut validity = MutableBitmap::with_capacity(self.values.capacity());
178
validity.extend_constant(self.len(), true);
179
validity.extend_constant(additional, false);
180
self.validity = Some(validity)
181
}
182
self.values
183
.resize(self.values.len() + additional, T::default());
184
}
185
}
186
187
/// Extends the [`MutablePrimitiveArray`] from an iterator of trusted len.
188
#[inline]
189
pub fn extend_trusted_len<P, I>(&mut self, iterator: I)
190
where
191
P: std::borrow::Borrow<T>,
192
I: TrustedLen<Item = Option<P>>,
193
{
194
unsafe { self.extend_trusted_len_unchecked(iterator) }
195
}
196
197
/// Extends the [`MutablePrimitiveArray`] from an iterator of trusted len.
198
///
199
/// # Safety
200
/// The iterator must be trusted len.
201
#[inline]
202
pub unsafe fn extend_trusted_len_unchecked<P, I>(&mut self, iterator: I)
203
where
204
P: std::borrow::Borrow<T>,
205
I: Iterator<Item = Option<P>>,
206
{
207
if let Some(validity) = self.validity.as_mut() {
208
extend_trusted_len_unzip(iterator, validity, &mut self.values)
209
} else {
210
let mut validity = MutableBitmap::new();
211
validity.extend_constant(self.len(), true);
212
extend_trusted_len_unzip(iterator, &mut validity, &mut self.values);
213
self.validity = Some(validity);
214
}
215
}
216
/// Extends the [`MutablePrimitiveArray`] from an iterator of values of trusted len.
217
/// This differs from `extend_trusted_len` which accepts in iterator of optional values.
218
#[inline]
219
pub fn extend_trusted_len_values<I>(&mut self, iterator: I)
220
where
221
I: TrustedLen<Item = T>,
222
{
223
unsafe { self.extend_values(iterator) }
224
}
225
226
/// Extends the [`MutablePrimitiveArray`] from an iterator of values of trusted len.
227
/// This differs from `extend_trusted_len_unchecked` which accepts in iterator of optional values.
228
///
229
/// # Safety
230
/// The iterator must be trusted len.
231
#[inline]
232
pub fn extend_values<I>(&mut self, iterator: I)
233
where
234
I: Iterator<Item = T>,
235
{
236
self.values.extend(iterator);
237
self.update_all_valid();
238
}
239
240
#[inline]
241
/// Extends the [`MutablePrimitiveArray`] from a slice
242
pub fn extend_from_slice(&mut self, items: &[T]) {
243
self.values.extend_from_slice(items);
244
self.update_all_valid();
245
}
246
247
fn update_all_valid(&mut self) {
248
// get len before mutable borrow
249
let len = self.len();
250
if let Some(validity) = self.validity.as_mut() {
251
validity.extend_constant(len - validity.len(), true);
252
}
253
}
254
255
fn init_validity(&mut self) {
256
let mut validity = MutableBitmap::with_capacity(self.values.capacity());
257
validity.extend_constant(self.len(), true);
258
validity.set(self.len() - 1, false);
259
self.validity = Some(validity)
260
}
261
262
/// Changes the arrays' [`ArrowDataType`], returning a new [`MutablePrimitiveArray`].
263
/// Use to change the logical type without changing the corresponding physical Type.
264
/// # Implementation
265
/// This operation is `O(1)`.
266
#[inline]
267
pub fn to(self, dtype: ArrowDataType) -> Self {
268
Self::try_new(dtype, self.values, self.validity).unwrap()
269
}
270
271
/// Converts itself into an [`Array`].
272
pub fn into_arc(self) -> Arc<dyn Array> {
273
let a: PrimitiveArray<T> = self.into();
274
Arc::new(a)
275
}
276
277
/// Shrinks the capacity of the [`MutablePrimitiveArray`] to fit its current length.
278
pub fn shrink_to_fit(&mut self) {
279
self.values.shrink_to_fit();
280
if let Some(validity) = &mut self.validity {
281
validity.shrink_to_fit()
282
}
283
}
284
285
/// Returns the capacity of this [`MutablePrimitiveArray`].
286
pub fn capacity(&self) -> usize {
287
self.values.capacity()
288
}
289
290
pub fn freeze(self) -> PrimitiveArray<T> {
291
self.into()
292
}
293
294
/// Clears the array, removing all values.
295
///
296
/// Note that this method has no effect on the allocated capacity
297
/// of the array.
298
pub fn clear(&mut self) {
299
self.values.clear();
300
self.validity = None;
301
}
302
303
/// Apply a function that temporarily freezes this `MutableArray` into a `PrimitiveArray`.
304
pub fn with_freeze<K, F: FnOnce(&PrimitiveArray<T>) -> K>(&mut self, f: F) -> K {
305
let mutable = std::mem::take(self);
306
let arr = mutable.freeze();
307
let out = f(&arr);
308
*self = arr.into_mut().right().unwrap();
309
out
310
}
311
}
312
313
/// Accessors
314
impl<T: NativeType> MutablePrimitiveArray<T> {
315
/// Returns its values.
316
pub fn values(&self) -> &Vec<T> {
317
&self.values
318
}
319
320
/// Returns a mutable slice of values.
321
pub fn values_mut_slice(&mut self) -> &mut [T] {
322
self.values.as_mut_slice()
323
}
324
}
325
326
/// Setters
327
impl<T: NativeType> MutablePrimitiveArray<T> {
328
/// Sets position `index` to `value`.
329
/// Note that if it is the first time a null appears in this array,
330
/// this initializes the validity bitmap (`O(N)`).
331
/// # Panic
332
/// Panics iff `index >= self.len()`.
333
pub fn set(&mut self, index: usize, value: Option<T>) {
334
assert!(index < self.len());
335
// SAFETY:
336
// we just checked bounds
337
unsafe { self.set_unchecked(index, value) }
338
}
339
340
/// Sets position `index` to `value`.
341
/// Note that if it is the first time a null appears in this array,
342
/// this initializes the validity bitmap (`O(N)`).
343
///
344
/// # Safety
345
/// Caller must ensure `index < self.len()`
346
pub unsafe fn set_unchecked(&mut self, index: usize, value: Option<T>) {
347
*self.values.get_unchecked_mut(index) = value.unwrap_or_default();
348
349
if value.is_none() && self.validity.is_none() {
350
// When the validity is None, all elements so far are valid. When one of the elements is set of null,
351
// the validity must be initialized.
352
let mut validity = MutableBitmap::new();
353
validity.extend_constant(self.len(), true);
354
self.validity = Some(validity);
355
}
356
if let Some(x) = self.validity.as_mut() {
357
x.set_unchecked(index, value.is_some())
358
}
359
}
360
361
/// Sets the validity.
362
/// # Panic
363
/// Panics iff the validity's len is not equal to the existing values' length.
364
pub fn set_validity(&mut self, validity: Option<MutableBitmap>) {
365
if let Some(validity) = &validity {
366
assert_eq!(self.values.len(), validity.len())
367
}
368
self.validity = validity;
369
}
370
371
/// Sets values.
372
/// # Panic
373
/// Panics iff the values' length is not equal to the existing values' len.
374
pub fn set_values(&mut self, values: Vec<T>) {
375
assert_eq!(values.len(), self.values.len());
376
self.values = values;
377
}
378
}
379
380
impl<T: NativeType> Extend<Option<T>> for MutablePrimitiveArray<T> {
381
fn extend<I: IntoIterator<Item = Option<T>>>(&mut self, iter: I) {
382
let iter = iter.into_iter();
383
self.reserve(iter.size_hint().0);
384
iter.for_each(|x| self.push(x))
385
}
386
}
387
388
impl<T: NativeType> TryExtend<Option<T>> for MutablePrimitiveArray<T> {
389
/// This is infallible and is implemented for consistency with all other types
390
fn try_extend<I: IntoIterator<Item = Option<T>>>(&mut self, iter: I) -> PolarsResult<()> {
391
self.extend(iter);
392
Ok(())
393
}
394
}
395
396
impl<T: NativeType> TryPush<Option<T>> for MutablePrimitiveArray<T> {
397
/// This is infalible and is implemented for consistency with all other types
398
#[inline]
399
fn try_push(&mut self, item: Option<T>) -> PolarsResult<()> {
400
self.push(item);
401
Ok(())
402
}
403
}
404
405
impl<T: NativeType> MutableArray for MutablePrimitiveArray<T> {
406
fn len(&self) -> usize {
407
self.values.len()
408
}
409
410
fn validity(&self) -> Option<&MutableBitmap> {
411
self.validity.as_ref()
412
}
413
414
fn as_box(&mut self) -> Box<dyn Array> {
415
PrimitiveArray::new(
416
self.dtype.clone(),
417
std::mem::take(&mut self.values).into(),
418
std::mem::take(&mut self.validity).map(|x| x.into()),
419
)
420
.boxed()
421
}
422
423
fn as_arc(&mut self) -> Arc<dyn Array> {
424
PrimitiveArray::new(
425
self.dtype.clone(),
426
std::mem::take(&mut self.values).into(),
427
std::mem::take(&mut self.validity).map(|x| x.into()),
428
)
429
.arced()
430
}
431
432
fn dtype(&self) -> &ArrowDataType {
433
&self.dtype
434
}
435
436
fn as_any(&self) -> &dyn std::any::Any {
437
self
438
}
439
440
fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
441
self
442
}
443
444
fn push_null(&mut self) {
445
self.push(None)
446
}
447
448
fn reserve(&mut self, additional: usize) {
449
self.reserve(additional)
450
}
451
452
fn shrink_to_fit(&mut self) {
453
self.shrink_to_fit()
454
}
455
}
456
457
impl<T: NativeType> MutablePrimitiveArray<T> {
458
/// Creates a [`MutablePrimitiveArray`] from a slice of values.
459
pub fn from_slice<P: AsRef<[T]>>(slice: P) -> Self {
460
Self::from_trusted_len_values_iter(slice.as_ref().iter().copied())
461
}
462
463
/// Creates a [`MutablePrimitiveArray`] from an iterator of trusted length.
464
///
465
/// # Safety
466
/// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
467
/// I.e. `size_hint().1` correctly reports its length.
468
#[inline]
469
pub unsafe fn from_trusted_len_iter_unchecked<I, P>(iterator: I) -> Self
470
where
471
P: std::borrow::Borrow<T>,
472
I: Iterator<Item = Option<P>>,
473
{
474
let (validity, values) = trusted_len_unzip(iterator);
475
476
Self {
477
dtype: T::PRIMITIVE.into(),
478
values,
479
validity,
480
}
481
}
482
483
/// Creates a [`MutablePrimitiveArray`] from a [`TrustedLen`].
484
#[inline]
485
pub fn from_trusted_len_iter<I, P>(iterator: I) -> Self
486
where
487
P: std::borrow::Borrow<T>,
488
I: TrustedLen<Item = Option<P>>,
489
{
490
unsafe { Self::from_trusted_len_iter_unchecked(iterator) }
491
}
492
493
/// Creates a [`MutablePrimitiveArray`] from an fallible iterator of trusted length.
494
///
495
/// # Safety
496
/// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
497
/// I.e. that `size_hint().1` correctly reports its length.
498
#[inline]
499
pub unsafe fn try_from_trusted_len_iter_unchecked<E, I, P>(
500
iter: I,
501
) -> std::result::Result<Self, E>
502
where
503
P: std::borrow::Borrow<T>,
504
I: IntoIterator<Item = std::result::Result<Option<P>, E>>,
505
{
506
let iterator = iter.into_iter();
507
508
let (validity, values) = try_trusted_len_unzip(iterator)?;
509
510
Ok(Self {
511
dtype: T::PRIMITIVE.into(),
512
values,
513
validity,
514
})
515
}
516
517
/// Creates a [`MutablePrimitiveArray`] from an fallible iterator of trusted length.
518
#[inline]
519
pub fn try_from_trusted_len_iter<E, I, P>(iterator: I) -> std::result::Result<Self, E>
520
where
521
P: std::borrow::Borrow<T>,
522
I: TrustedLen<Item = std::result::Result<Option<P>, E>>,
523
{
524
unsafe { Self::try_from_trusted_len_iter_unchecked(iterator) }
525
}
526
527
/// Creates a new [`MutablePrimitiveArray`] out an iterator over values
528
pub fn from_trusted_len_values_iter<I: TrustedLen<Item = T>>(iter: I) -> Self {
529
Self {
530
dtype: T::PRIMITIVE.into(),
531
values: iter.collect(),
532
validity: None,
533
}
534
}
535
536
/// Creates a (non-null) [`MutablePrimitiveArray`] from a vector of values.
537
/// This does not have memcopy and is the fastest way to create a [`PrimitiveArray`].
538
pub fn from_vec(values: Vec<T>) -> Self {
539
Self::try_new(T::PRIMITIVE.into(), values, None).unwrap()
540
}
541
542
/// Creates a new [`MutablePrimitiveArray`] from an iterator over values
543
///
544
/// # Safety
545
/// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
546
/// I.e. that `size_hint().1` correctly reports its length.
547
pub unsafe fn from_trusted_len_values_iter_unchecked<I: Iterator<Item = T>>(iter: I) -> Self {
548
Self {
549
dtype: T::PRIMITIVE.into(),
550
values: iter.collect(),
551
validity: None,
552
}
553
}
554
}
555
556
impl<T: NativeType, Ptr: std::borrow::Borrow<Option<T>>> FromIterator<Ptr>
557
for MutablePrimitiveArray<T>
558
{
559
fn from_iter<I: IntoIterator<Item = Ptr>>(iter: I) -> Self {
560
let iter = iter.into_iter();
561
let (lower, _) = iter.size_hint();
562
563
let mut validity = MutableBitmap::with_capacity(lower);
564
565
let values: Vec<T> = iter
566
.map(|item| {
567
if let Some(a) = item.borrow() {
568
validity.push(true);
569
*a
570
} else {
571
validity.push(false);
572
T::default()
573
}
574
})
575
.collect();
576
577
let validity = Some(validity);
578
579
Self {
580
dtype: T::PRIMITIVE.into(),
581
values,
582
validity,
583
}
584
}
585
}
586
587
/// Extends a [`MutableBitmap`] and a [`Vec`] from an iterator of `Option`.
588
/// The first buffer corresponds to a bitmap buffer, the second one
589
/// corresponds to a values buffer.
590
/// # Safety
591
/// The caller must ensure that `iterator` is `TrustedLen`.
592
#[inline]
593
pub(crate) unsafe fn extend_trusted_len_unzip<I, P, T>(
594
iterator: I,
595
validity: &mut MutableBitmap,
596
buffer: &mut Vec<T>,
597
) where
598
T: NativeType,
599
P: std::borrow::Borrow<T>,
600
I: Iterator<Item = Option<P>>,
601
{
602
let (_, upper) = iterator.size_hint();
603
let additional = upper.expect("trusted_len_unzip requires an upper limit");
604
605
validity.reserve(additional);
606
let values = iterator.map(|item| {
607
if let Some(item) = item {
608
validity.push_unchecked(true);
609
*item.borrow()
610
} else {
611
validity.push_unchecked(false);
612
T::default()
613
}
614
});
615
buffer.extend(values);
616
}
617
618
/// Creates a [`MutableBitmap`] and a [`Vec`] from an iterator of `Option`.
619
/// The first buffer corresponds to a bitmap buffer, the second one
620
/// corresponds to a values buffer.
621
/// # Safety
622
/// The caller must ensure that `iterator` is `TrustedLen`.
623
#[inline]
624
pub(crate) unsafe fn trusted_len_unzip<I, P, T>(iterator: I) -> (Option<MutableBitmap>, Vec<T>)
625
where
626
T: NativeType,
627
P: std::borrow::Borrow<T>,
628
I: Iterator<Item = Option<P>>,
629
{
630
let mut validity = MutableBitmap::new();
631
let mut buffer = Vec::<T>::new();
632
633
extend_trusted_len_unzip(iterator, &mut validity, &mut buffer);
634
635
let validity = Some(validity);
636
637
(validity, buffer)
638
}
639
640
/// # Safety
641
/// The caller must ensure that `iterator` is `TrustedLen`.
642
#[inline]
643
pub(crate) unsafe fn try_trusted_len_unzip<E, I, P, T>(
644
iterator: I,
645
) -> std::result::Result<(Option<MutableBitmap>, Vec<T>), E>
646
where
647
T: NativeType,
648
P: std::borrow::Borrow<T>,
649
I: Iterator<Item = std::result::Result<Option<P>, E>>,
650
{
651
let (_, upper) = iterator.size_hint();
652
let len = upper.expect("trusted_len_unzip requires an upper limit");
653
654
let mut null = MutableBitmap::with_capacity(len);
655
let mut buffer = Vec::<T>::with_capacity(len);
656
657
let mut dst = buffer.as_mut_ptr();
658
for item in iterator {
659
let item = if let Some(item) = item? {
660
null.push(true);
661
*item.borrow()
662
} else {
663
null.push(false);
664
T::default()
665
};
666
std::ptr::write(dst, item);
667
dst = dst.add(1);
668
}
669
assert_eq!(
670
dst.offset_from(buffer.as_ptr()) as usize,
671
len,
672
"Trusted iterator length was not accurately reported"
673
);
674
buffer.set_len(len);
675
null.set_len(len);
676
677
let validity = Some(null);
678
679
Ok((validity, buffer))
680
}
681
682
impl<T: NativeType> PartialEq for MutablePrimitiveArray<T> {
683
fn eq(&self, other: &Self) -> bool {
684
self.iter().eq(other.iter())
685
}
686
}
687
688
impl<T: NativeType> TryExtendFromSelf for MutablePrimitiveArray<T> {
689
fn try_extend_from_self(&mut self, other: &Self) -> PolarsResult<()> {
690
extend_validity(self.len(), &mut self.validity, &other.validity);
691
692
let slice = other.values.as_slice();
693
self.values.extend_from_slice(slice);
694
Ok(())
695
}
696
}
697
698