Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-arrow/src/array/mod.rs
8398 views
1
//! Contains the [`Array`] and [`MutableArray`] trait objects declaring arrays,
2
//! as well as concrete arrays (such as [`Utf8Array`] and [`MutableUtf8Array`]).
3
//!
4
//! Fixed-length containers with optional values
5
//! that are laid in memory according to the Arrow specification.
6
//! Each array type has its own `struct`. The following are the main array types:
7
//! * [`PrimitiveArray`] and [`MutablePrimitiveArray`], an array of values with a fixed length such as integers, floats, etc.
8
//! * [`BooleanArray`] and [`MutableBooleanArray`], an array of boolean values (stored as a bitmap)
9
//! * [`Utf8Array`] and [`MutableUtf8Array`], an array of variable length utf8 values
10
//! * [`BinaryArray`] and [`MutableBinaryArray`], an array of opaque variable length values
11
//! * [`ListArray`] and [`MutableListArray`], an array of arrays (e.g. `[[1, 2], None, [], [None]]`)
12
//! * [`StructArray`] and [`MutableStructArray`], an array of arrays identified by a string (e.g. `{"a": [1, 2], "b": [true, false]}`)
13
//!
14
//! All immutable arrays implement the trait object [`Array`] and that can be downcast
15
//! to a concrete struct based on [`PhysicalType`](crate::datatypes::PhysicalType) available from [`Array::dtype`].
16
//! All immutable arrays are backed by [`Buffer`](polars_buffer::Buffer) and thus cloning and slicing them is `O(1)`.
17
//!
18
//! Most arrays contain a [`MutableArray`] counterpart that is neither cloneable nor sliceable, but
19
//! can be operated in-place.
20
#![allow(unsafe_op_in_unsafe_fn)]
21
use std::any::Any;
22
use std::sync::Arc;
23
24
use crate::bitmap::{Bitmap, MutableBitmap};
25
use crate::datatypes::ArrowDataType;
26
27
pub mod physical_binary;
28
#[cfg(feature = "proptest")]
29
pub mod proptest;
30
31
pub trait Splitable: Sized {
32
fn check_bound(&self, offset: usize) -> bool;
33
34
/// Split [`Self`] at `offset` where `offset <= self.len()`.
35
#[inline]
36
#[must_use]
37
fn split_at(&self, offset: usize) -> (Self, Self) {
38
assert!(self.check_bound(offset));
39
unsafe { self._split_at_unchecked(offset) }
40
}
41
42
/// Split [`Self`] at `offset` without checking `offset <= self.len()`.
43
///
44
/// # Safety
45
///
46
/// Safe if `offset <= self.len()`.
47
#[inline]
48
#[must_use]
49
unsafe fn split_at_unchecked(&self, offset: usize) -> (Self, Self) {
50
debug_assert!(self.check_bound(offset));
51
unsafe { self._split_at_unchecked(offset) }
52
}
53
54
/// Internal implementation of `split_at_unchecked`. For any usage, prefer the using
55
/// `split_at` or `split_at_unchecked`.
56
///
57
/// # Safety
58
///
59
/// Safe if `offset <= self.len()`.
60
unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self);
61
}
62
63
impl<T> Splitable for Buffer<T> {
64
fn check_bound(&self, offset: usize) -> bool {
65
offset <= self.len()
66
}
67
68
unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
69
let left = self.clone().sliced_unchecked(..offset);
70
let right = self.clone().sliced_unchecked(offset..);
71
(left, right)
72
}
73
}
74
75
/// A trait representing an immutable Arrow array. Arrow arrays are trait objects
76
/// that are infallibly downcast to concrete types according to the [`Array::dtype`].
77
pub trait Array: Send + Sync + dyn_clone::DynClone + 'static {
78
/// Converts itself to a reference of [`Any`], which enables downcasting to concrete types.
79
fn as_any(&self) -> &dyn Any;
80
81
/// Converts itself to a mutable reference of [`Any`], which enables mutable downcasting to concrete types.
82
fn as_any_mut(&mut self) -> &mut dyn Any;
83
84
/// The length of the [`Array`]. Every array has a length corresponding to the number of
85
/// elements (slots).
86
fn len(&self) -> usize;
87
88
/// whether the array is empty
89
fn is_empty(&self) -> bool {
90
self.len() == 0
91
}
92
93
/// The [`ArrowDataType`] of the [`Array`]. In combination with [`Array::as_any`], this can be
94
/// used to downcast trait objects (`dyn Array`) to concrete arrays.
95
fn dtype(&self) -> &ArrowDataType;
96
97
fn dtype_mut(&mut self) -> &mut ArrowDataType;
98
99
/// The validity of the [`Array`]: every array has an optional [`Bitmap`] that, when available
100
/// specifies whether the array slot is valid or not (null).
101
/// When the validity is [`None`], all slots are valid.
102
fn validity(&self) -> Option<&Bitmap>;
103
104
/// The number of null slots on this [`Array`].
105
/// # Implementation
106
/// This is `O(1)` since the number of null elements is pre-computed.
107
#[inline]
108
fn null_count(&self) -> usize {
109
if self.dtype() == &ArrowDataType::Null {
110
return self.len();
111
};
112
self.validity()
113
.as_ref()
114
.map(|x| x.unset_bits())
115
.unwrap_or(0)
116
}
117
118
#[inline]
119
fn has_nulls(&self) -> bool {
120
self.null_count() > 0
121
}
122
123
/// Returns whether slot `i` is null.
124
/// # Panic
125
/// Panics iff `i >= self.len()`.
126
#[inline]
127
fn is_null(&self, i: usize) -> bool {
128
assert!(i < self.len());
129
unsafe { self.is_null_unchecked(i) }
130
}
131
132
/// Returns whether slot `i` is null.
133
///
134
/// # Safety
135
/// The caller must ensure `i < self.len()`
136
#[inline]
137
unsafe fn is_null_unchecked(&self, i: usize) -> bool {
138
self.validity()
139
.as_ref()
140
.map(|x| !x.get_bit_unchecked(i))
141
.unwrap_or(false)
142
}
143
144
/// Returns whether slot `i` is valid.
145
/// # Panic
146
/// Panics iff `i >= self.len()`.
147
#[inline]
148
fn is_valid(&self, i: usize) -> bool {
149
!self.is_null(i)
150
}
151
152
/// Split [`Self`] at `offset` into two boxed [`Array`]s where `offset <= self.len()`.
153
#[must_use]
154
fn split_at_boxed(&self, offset: usize) -> (Box<dyn Array>, Box<dyn Array>);
155
156
/// Split [`Self`] at `offset` into two boxed [`Array`]s without checking `offset <= self.len()`.
157
///
158
/// # Safety
159
///
160
/// Safe if `offset <= self.len()`.
161
#[must_use]
162
unsafe fn split_at_boxed_unchecked(&self, offset: usize) -> (Box<dyn Array>, Box<dyn Array>);
163
164
/// Slices this [`Array`].
165
/// # Implementation
166
/// This operation is `O(1)` over `len`.
167
/// # Panic
168
/// This function panics iff `offset + length > self.len()`.
169
fn slice(&mut self, offset: usize, length: usize);
170
171
/// Slices the [`Array`].
172
/// # Implementation
173
/// This operation is `O(1)`.
174
///
175
/// # Safety
176
/// The caller must ensure that `offset + length <= self.len()`
177
unsafe fn slice_unchecked(&mut self, offset: usize, length: usize);
178
179
/// Returns a slice of this [`Array`].
180
/// # Implementation
181
/// This operation is `O(1)` over `len`.
182
/// # Panic
183
/// This function panics iff `offset + length > self.len()`.
184
#[must_use]
185
fn sliced(&self, offset: usize, length: usize) -> Box<dyn Array> {
186
if length == 0 {
187
return new_empty_array(self.dtype().clone());
188
}
189
let mut new = self.to_boxed();
190
new.slice(offset, length);
191
new
192
}
193
194
/// Returns a slice of this [`Array`].
195
/// # Implementation
196
/// This operation is `O(1)` over `len`, as it amounts to increase two ref counts
197
/// and moving the struct to the heap.
198
///
199
/// # Safety
200
/// The caller must ensure that `offset + length <= self.len()`
201
#[must_use]
202
unsafe fn sliced_unchecked(&self, offset: usize, length: usize) -> Box<dyn Array> {
203
debug_assert!(offset + length <= self.len());
204
let mut new = self.to_boxed();
205
new.slice_unchecked(offset, length);
206
new
207
}
208
209
/// Clones this [`Array`] with a new assigned bitmap.
210
/// # Panic
211
/// This function panics iff `validity.len() != self.len()`.
212
fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array>;
213
214
/// Clone a `&dyn Array` to an owned `Box<dyn Array>`.
215
fn to_boxed(&self) -> Box<dyn Array>;
216
}
217
218
dyn_clone::clone_trait_object!(Array);
219
220
pub trait IntoBoxedArray {
221
fn into_boxed(self) -> Box<dyn Array>;
222
}
223
224
impl<A: Array> IntoBoxedArray for A {
225
#[inline(always)]
226
fn into_boxed(self) -> Box<dyn Array> {
227
Box::new(self) as _
228
}
229
}
230
impl IntoBoxedArray for Box<dyn Array> {
231
#[inline(always)]
232
fn into_boxed(self) -> Box<dyn Array> {
233
self
234
}
235
}
236
237
/// A trait describing a mutable array; i.e. an array whose values can be changed.
238
///
239
/// Mutable arrays cannot be cloned but can be mutated in place,
240
/// thereby making them useful to perform numeric operations without allocations.
241
/// As in [`Array`], concrete arrays (such as [`MutablePrimitiveArray`]) implement how they are mutated.
242
pub trait MutableArray: std::fmt::Debug + Send + Sync {
243
/// The [`ArrowDataType`] of the array.
244
fn dtype(&self) -> &ArrowDataType;
245
246
/// The length of the array.
247
fn len(&self) -> usize;
248
249
/// Whether the array is empty.
250
fn is_empty(&self) -> bool {
251
self.len() == 0
252
}
253
254
/// The optional validity of the array.
255
fn validity(&self) -> Option<&MutableBitmap>;
256
257
/// Convert itself to an (immutable) [`Array`].
258
fn as_box(&mut self) -> Box<dyn Array>;
259
260
/// Convert itself to an (immutable) atomically reference counted [`Array`].
261
// This provided implementation has an extra allocation as it first
262
// boxes `self`, then converts the box into an `Arc`. Implementors may wish
263
// to avoid an allocation by skipping the box completely.
264
fn as_arc(&mut self) -> std::sync::Arc<dyn Array> {
265
self.as_box().into()
266
}
267
268
/// Convert to `Any`, to enable dynamic casting.
269
fn as_any(&self) -> &dyn Any;
270
271
/// Convert to mutable `Any`, to enable dynamic casting.
272
fn as_mut_any(&mut self) -> &mut dyn Any;
273
274
/// Adds a new null element to the array.
275
fn push_null(&mut self);
276
277
/// Whether `index` is valid / set.
278
/// # Panic
279
/// Panics if `index >= self.len()`.
280
#[inline]
281
fn is_valid(&self, index: usize) -> bool {
282
self.validity()
283
.as_ref()
284
.map(|x| x.get(index))
285
.unwrap_or(true)
286
}
287
288
/// Reserves additional slots to its capacity.
289
fn reserve(&mut self, additional: usize);
290
291
/// Shrink the array to fit its length.
292
fn shrink_to_fit(&mut self);
293
}
294
295
impl MutableArray for Box<dyn MutableArray> {
296
fn len(&self) -> usize {
297
self.as_ref().len()
298
}
299
300
fn validity(&self) -> Option<&MutableBitmap> {
301
self.as_ref().validity()
302
}
303
304
fn as_box(&mut self) -> Box<dyn Array> {
305
self.as_mut().as_box()
306
}
307
308
fn as_arc(&mut self) -> Arc<dyn Array> {
309
self.as_mut().as_arc()
310
}
311
312
fn dtype(&self) -> &ArrowDataType {
313
self.as_ref().dtype()
314
}
315
316
fn as_any(&self) -> &dyn std::any::Any {
317
self.as_ref().as_any()
318
}
319
320
fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
321
self.as_mut().as_mut_any()
322
}
323
324
#[inline]
325
fn push_null(&mut self) {
326
self.as_mut().push_null()
327
}
328
329
fn shrink_to_fit(&mut self) {
330
self.as_mut().shrink_to_fit();
331
}
332
333
fn reserve(&mut self, additional: usize) {
334
self.as_mut().reserve(additional);
335
}
336
}
337
338
macro_rules! general_dyn {
339
($array:expr, $ty:ty, $f:expr) => {{
340
let array = $array.as_any().downcast_ref::<$ty>().unwrap();
341
($f)(array)
342
}};
343
}
344
345
macro_rules! fmt_dyn {
346
($array:expr, $ty:ty, $f:expr) => {{
347
let mut f = |x: &$ty| x.fmt($f);
348
general_dyn!($array, $ty, f)
349
}};
350
}
351
352
impl std::fmt::Debug for dyn Array + '_ {
353
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
354
use crate::datatypes::PhysicalType::*;
355
match self.dtype().to_physical_type() {
356
Null => fmt_dyn!(self, NullArray, f),
357
Boolean => fmt_dyn!(self, BooleanArray, f),
358
Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| {
359
fmt_dyn!(self, PrimitiveArray<$T>, f)
360
}),
361
BinaryView => fmt_dyn!(self, BinaryViewArray, f),
362
Utf8View => fmt_dyn!(self, Utf8ViewArray, f),
363
Binary => fmt_dyn!(self, BinaryArray<i32>, f),
364
LargeBinary => fmt_dyn!(self, BinaryArray<i64>, f),
365
FixedSizeBinary => fmt_dyn!(self, FixedSizeBinaryArray, f),
366
Utf8 => fmt_dyn!(self, Utf8Array::<i32>, f),
367
LargeUtf8 => fmt_dyn!(self, Utf8Array::<i64>, f),
368
List => fmt_dyn!(self, ListArray::<i32>, f),
369
LargeList => fmt_dyn!(self, ListArray::<i64>, f),
370
FixedSizeList => fmt_dyn!(self, FixedSizeListArray, f),
371
Struct => fmt_dyn!(self, StructArray, f),
372
Union => fmt_dyn!(self, UnionArray, f),
373
Dictionary(key_type) => {
374
match_integer_type!(key_type, |$T| {
375
fmt_dyn!(self, DictionaryArray::<$T>, f)
376
})
377
},
378
Map => fmt_dyn!(self, MapArray, f),
379
}
380
}
381
}
382
383
/// Creates a new [`Array`] with a [`Array::len`] of 0.
384
pub fn new_empty_array(dtype: ArrowDataType) -> Box<dyn Array> {
385
use crate::datatypes::PhysicalType::*;
386
match dtype.to_physical_type() {
387
Null => Box::new(NullArray::new_empty(dtype)),
388
Boolean => Box::new(BooleanArray::new_empty(dtype)),
389
Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| {
390
Box::new(PrimitiveArray::<$T>::new_empty(dtype))
391
}),
392
Binary => Box::new(BinaryArray::<i32>::new_empty(dtype)),
393
LargeBinary => Box::new(BinaryArray::<i64>::new_empty(dtype)),
394
FixedSizeBinary => Box::new(FixedSizeBinaryArray::new_empty(dtype)),
395
Utf8 => Box::new(Utf8Array::<i32>::new_empty(dtype)),
396
LargeUtf8 => Box::new(Utf8Array::<i64>::new_empty(dtype)),
397
List => Box::new(ListArray::<i32>::new_empty(dtype)),
398
LargeList => Box::new(ListArray::<i64>::new_empty(dtype)),
399
FixedSizeList => Box::new(FixedSizeListArray::new_empty(dtype)),
400
Struct => Box::new(StructArray::new_empty(dtype)),
401
Union => Box::new(UnionArray::new_empty(dtype)),
402
Map => Box::new(MapArray::new_empty(dtype)),
403
Utf8View => Box::new(Utf8ViewArray::new_empty(dtype)),
404
BinaryView => Box::new(BinaryViewArray::new_empty(dtype)),
405
Dictionary(key_type) => {
406
match_integer_type!(key_type, |$T| {
407
Box::new(DictionaryArray::<$T>::new_empty(dtype))
408
})
409
},
410
}
411
}
412
413
/// Creates a new [`Array`] of [`ArrowDataType`] `dtype` and `length`.
414
///
415
/// The array is guaranteed to have [`Array::null_count`] equal to [`Array::len`]
416
/// for all types except Union, which does not have a validity.
417
pub fn new_null_array(dtype: ArrowDataType, length: usize) -> Box<dyn Array> {
418
use crate::datatypes::PhysicalType::*;
419
match dtype.to_physical_type() {
420
Null => Box::new(NullArray::new_null(dtype, length)),
421
Boolean => Box::new(BooleanArray::new_null(dtype, length)),
422
Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| {
423
Box::new(PrimitiveArray::<$T>::new_null(dtype, length))
424
}),
425
Binary => Box::new(BinaryArray::<i32>::new_null(dtype, length)),
426
LargeBinary => Box::new(BinaryArray::<i64>::new_null(dtype, length)),
427
FixedSizeBinary => Box::new(FixedSizeBinaryArray::new_null(dtype, length)),
428
Utf8 => Box::new(Utf8Array::<i32>::new_null(dtype, length)),
429
LargeUtf8 => Box::new(Utf8Array::<i64>::new_null(dtype, length)),
430
List => Box::new(ListArray::<i32>::new_null(dtype, length)),
431
LargeList => Box::new(ListArray::<i64>::new_null(dtype, length)),
432
FixedSizeList => Box::new(FixedSizeListArray::new_null(dtype, length)),
433
Struct => Box::new(StructArray::new_null(dtype, length)),
434
Union => Box::new(UnionArray::new_null(dtype, length)),
435
Map => Box::new(MapArray::new_null(dtype, length)),
436
BinaryView => Box::new(BinaryViewArray::new_null(dtype, length)),
437
Utf8View => Box::new(Utf8ViewArray::new_null(dtype, length)),
438
Dictionary(key_type) => {
439
match_integer_type!(key_type, |$T| {
440
Box::new(DictionaryArray::<$T>::new_null(dtype, length))
441
})
442
},
443
}
444
}
445
446
macro_rules! clone_dyn {
447
($array:expr, $ty:ty) => {{
448
let f = |x: &$ty| Box::new(x.clone());
449
general_dyn!($array, $ty, f)
450
}};
451
}
452
453
// macro implementing `sliced` and `sliced_unchecked`
454
macro_rules! impl_sliced {
455
() => {
456
/// Returns this array sliced.
457
/// # Implementation
458
/// This function is `O(1)`.
459
/// # Panics
460
/// iff `offset + length > self.len()`.
461
#[inline]
462
#[must_use]
463
pub fn sliced(self, offset: usize, length: usize) -> Self {
464
let total = offset
465
.checked_add(length)
466
.expect("offset + length overflowed");
467
assert!(
468
total <= self.len(),
469
"the offset of the new Buffer cannot exceed the existing length"
470
);
471
unsafe { Self::sliced_unchecked(self, offset, length) }
472
}
473
474
/// Returns this array sliced.
475
/// # Implementation
476
/// This function is `O(1)`.
477
///
478
/// # Safety
479
/// The caller must ensure that `offset + length <= self.len()`.
480
#[inline]
481
#[must_use]
482
pub unsafe fn sliced_unchecked(mut self, offset: usize, length: usize) -> Self {
483
Self::slice_unchecked(&mut self, offset, length);
484
self
485
}
486
};
487
}
488
489
// macro implementing `with_validity` and `set_validity`
490
macro_rules! impl_mut_validity {
491
() => {
492
/// Returns this array with a new validity.
493
/// # Panic
494
/// Panics iff `validity.len() != self.len()`.
495
#[must_use]
496
#[inline]
497
pub fn with_validity(mut self, validity: Option<Bitmap>) -> Self {
498
self.set_validity(validity);
499
self
500
}
501
502
/// Sets the validity of this array.
503
/// # Panics
504
/// This function panics iff `values.len() != self.len()`.
505
#[inline]
506
pub fn set_validity(&mut self, validity: Option<Bitmap>) {
507
if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) {
508
panic!("validity must be equal to the array's length")
509
}
510
self.validity = validity;
511
}
512
513
/// Takes the validity of this array, leaving it without a validity mask.
514
#[inline]
515
pub fn take_validity(&mut self) -> Option<Bitmap> {
516
self.validity.take()
517
}
518
}
519
}
520
521
// macro implementing `with_validity`, `set_validity` and `apply_validity` for mutable arrays
522
macro_rules! impl_mutable_array_mut_validity {
523
() => {
524
/// Returns this array with a new validity.
525
/// # Panic
526
/// Panics iff `validity.len() != self.len()`.
527
#[must_use]
528
#[inline]
529
pub fn with_validity(mut self, validity: Option<MutableBitmap>) -> Self {
530
self.set_validity(validity);
531
self
532
}
533
534
/// Sets the validity of this array.
535
/// # Panics
536
/// This function panics iff `values.len() != self.len()`.
537
#[inline]
538
pub fn set_validity(&mut self, validity: Option<MutableBitmap>) {
539
if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) {
540
panic!("validity must be equal to the array's length")
541
}
542
self.validity = validity;
543
}
544
545
/// Applies a function `f` to the validity of this array.
546
///
547
/// This is an API to leverage clone-on-write
548
/// # Panics
549
/// This function panics if the function `f` modifies the length of the [`Bitmap`].
550
#[inline]
551
pub fn apply_validity<F: FnOnce(MutableBitmap) -> MutableBitmap>(&mut self, f: F) {
552
if let Some(validity) = std::mem::take(&mut self.validity) {
553
self.set_validity(Some(f(validity)))
554
}
555
}
556
557
}
558
}
559
560
// macro implementing `boxed` and `arced`
561
macro_rules! impl_into_array {
562
() => {
563
/// Boxes this array into a [`Box<dyn Array>`].
564
pub fn boxed(self) -> Box<dyn Array> {
565
Box::new(self)
566
}
567
568
/// Arcs this array into a [`std::sync::Arc<dyn Array>`].
569
pub fn arced(self) -> std::sync::Arc<dyn Array> {
570
std::sync::Arc::new(self)
571
}
572
};
573
}
574
575
// macro implementing common methods of trait `Array`
576
macro_rules! impl_common_array {
577
() => {
578
#[inline]
579
fn as_any(&self) -> &dyn std::any::Any {
580
self
581
}
582
583
#[inline]
584
fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
585
self
586
}
587
588
#[inline]
589
fn len(&self) -> usize {
590
self.len()
591
}
592
593
#[inline]
594
fn dtype(&self) -> &ArrowDataType {
595
&self.dtype
596
}
597
598
#[inline]
599
fn dtype_mut(&mut self) -> &mut ArrowDataType {
600
&mut self.dtype
601
}
602
603
#[inline]
604
fn split_at_boxed(&self, offset: usize) -> (Box<dyn Array>, Box<dyn Array>) {
605
let (lhs, rhs) = $crate::array::Splitable::split_at(self, offset);
606
(Box::new(lhs), Box::new(rhs))
607
}
608
609
#[inline]
610
unsafe fn split_at_boxed_unchecked(
611
&self,
612
offset: usize,
613
) -> (Box<dyn Array>, Box<dyn Array>) {
614
let (lhs, rhs) = unsafe { $crate::array::Splitable::split_at_unchecked(self, offset) };
615
(Box::new(lhs), Box::new(rhs))
616
}
617
618
#[inline]
619
fn slice(&mut self, offset: usize, length: usize) {
620
self.slice(offset, length);
621
}
622
623
#[inline]
624
unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
625
self.slice_unchecked(offset, length);
626
}
627
628
#[inline]
629
fn to_boxed(&self) -> Box<dyn Array> {
630
Box::new(self.clone())
631
}
632
};
633
}
634
635
/// Clones a dynamic [`Array`].
636
/// # Implementation
637
/// This operation is `O(1)` over `len`, as it amounts to increase two ref counts
638
/// and moving the concrete struct under a `Box`.
639
pub fn clone(array: &dyn Array) -> Box<dyn Array> {
640
use crate::datatypes::PhysicalType::*;
641
match array.dtype().to_physical_type() {
642
Null => clone_dyn!(array, NullArray),
643
Boolean => clone_dyn!(array, BooleanArray),
644
Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| {
645
clone_dyn!(array, PrimitiveArray<$T>)
646
}),
647
Binary => clone_dyn!(array, BinaryArray<i32>),
648
LargeBinary => clone_dyn!(array, BinaryArray<i64>),
649
FixedSizeBinary => clone_dyn!(array, FixedSizeBinaryArray),
650
Utf8 => clone_dyn!(array, Utf8Array::<i32>),
651
LargeUtf8 => clone_dyn!(array, Utf8Array::<i64>),
652
List => clone_dyn!(array, ListArray::<i32>),
653
LargeList => clone_dyn!(array, ListArray::<i64>),
654
FixedSizeList => clone_dyn!(array, FixedSizeListArray),
655
Struct => clone_dyn!(array, StructArray),
656
Union => clone_dyn!(array, UnionArray),
657
Map => clone_dyn!(array, MapArray),
658
BinaryView => clone_dyn!(array, BinaryViewArray),
659
Utf8View => clone_dyn!(array, Utf8ViewArray),
660
Dictionary(key_type) => {
661
match_integer_type!(key_type, |$T| {
662
clone_dyn!(array, DictionaryArray::<$T>)
663
})
664
},
665
}
666
}
667
668
// see https://users.rust-lang.org/t/generic-for-dyn-a-or-box-dyn-a-or-arc-dyn-a/69430/3
669
// for details
670
impl<'a> AsRef<dyn Array + 'a> for dyn Array {
671
fn as_ref(&self) -> &(dyn Array + 'a) {
672
self
673
}
674
}
675
676
mod binary;
677
mod boolean;
678
pub mod builder;
679
mod dictionary;
680
mod fixed_size_binary;
681
mod fixed_size_list;
682
mod list;
683
pub use list::LIST_VALUES_NAME;
684
mod map;
685
mod null;
686
mod primitive;
687
pub mod specification;
688
mod static_array;
689
mod static_array_collect;
690
mod struct_;
691
mod total_ord;
692
mod union;
693
mod utf8;
694
695
mod equal;
696
mod ffi;
697
mod fmt;
698
#[doc(hidden)]
699
pub mod indexable;
700
pub mod iterator;
701
702
mod binview;
703
mod values;
704
705
pub use binary::{
706
BinaryArray, BinaryArrayBuilder, BinaryValueIter, MutableBinaryArray, MutableBinaryValuesArray,
707
};
708
pub use binview::{
709
BinaryViewArray, BinaryViewArrayBuilder, BinaryViewArrayGeneric, BinaryViewArrayGenericBuilder,
710
MutableBinaryViewArray, MutablePlBinary, MutablePlString, Utf8ViewArray, Utf8ViewArrayBuilder,
711
View, ViewType,
712
};
713
pub use boolean::{BooleanArray, BooleanArrayBuilder, MutableBooleanArray};
714
pub use dictionary::{DictionaryArray, DictionaryKey, MutableDictionaryArray};
715
pub use equal::equal;
716
pub use fixed_size_binary::{
717
FixedSizeBinaryArray, FixedSizeBinaryArrayBuilder, MutableFixedSizeBinaryArray,
718
};
719
pub use fixed_size_list::{
720
FixedSizeListArray, FixedSizeListArrayBuilder, MutableFixedSizeListArray,
721
};
722
pub use fmt::{get_display, get_value_display};
723
pub(crate) use iterator::ArrayAccessor;
724
pub use iterator::ArrayValuesIter;
725
pub use list::{ListArray, ListArrayBuilder, ListValuesIter, MutableListArray};
726
pub use map::MapArray;
727
pub use null::{MutableNullArray, NullArray, NullArrayBuilder};
728
use polars_buffer::Buffer;
729
use polars_error::PolarsResult;
730
pub use primitive::*;
731
pub use static_array::{ParameterFreeDtypeStaticArray, StaticArray};
732
pub use static_array_collect::{ArrayCollectIterExt, ArrayFromIter, ArrayFromIterDtype};
733
pub use struct_::{StructArray, StructArrayBuilder};
734
pub use union::UnionArray;
735
pub use utf8::{MutableUtf8Array, MutableUtf8ValuesArray, Utf8Array, Utf8ValuesIter};
736
pub use values::ValueSize;
737
738
#[cfg(feature = "proptest")]
739
pub use self::boolean::proptest::boolean_array;
740
pub(crate) use self::ffi::{FromFfi, ToFfi, offset_buffers_children_dictionary};
741
use crate::{match_integer_type, with_match_primitive_type_full};
742
743
/// A trait describing the ability of a struct to create itself from a iterator.
744
/// This is similar to [`Extend`], but accepted the creation to error.
745
pub trait TryExtend<A> {
746
/// Fallible version of [`Extend::extend`].
747
fn try_extend<I: IntoIterator<Item = A>>(&mut self, iter: I) -> PolarsResult<()>;
748
}
749
750
/// A trait describing the ability of a struct to receive new items.
751
pub trait TryPush<A> {
752
/// Tries to push a new element.
753
fn try_push(&mut self, item: A) -> PolarsResult<()>;
754
}
755
756
/// A trait describing the ability of a struct to receive new items.
757
pub trait PushUnchecked<A> {
758
/// Push a new element that holds the invariants of the struct.
759
///
760
/// # Safety
761
/// The items must uphold the invariants of the struct
762
/// Read the specific implementation of the trait to understand what these are.
763
unsafe fn push_unchecked(&mut self, item: A);
764
}
765
766
/// A trait describing the ability of a struct to extend from a reference of itself.
767
/// Specialization of [`TryExtend`].
768
pub trait TryExtendFromSelf {
769
/// Tries to extend itself with elements from `other`, failing only on overflow.
770
fn try_extend_from_self(&mut self, other: &Self) -> PolarsResult<()>;
771
}
772
773
/// Trait that [`BinaryArray`] and [`Utf8Array`] implement for the purposes of DRY.
774
/// # Safety
775
/// The implementer must ensure that
776
/// 1. `offsets.len() > 0`
777
/// 2. `offsets[i] >= offsets[i-1] for all i`
778
/// 3. `offsets[i] < values.len() for all i`
779
pub unsafe trait GenericBinaryArray<O: crate::offset::Offset>: Array {
780
/// The values of the array
781
fn values(&self) -> &[u8];
782
/// The offsets of the array
783
fn offsets(&self) -> &[O];
784
}
785
786
pub type ArrayRef = Box<dyn Array>;
787
788
impl Splitable for Option<Bitmap> {
789
#[inline(always)]
790
fn check_bound(&self, offset: usize) -> bool {
791
self.as_ref().is_none_or(|v| offset <= v.len())
792
}
793
794
unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
795
self.as_ref().map_or((None, None), |bm| {
796
let (lhs, rhs) = unsafe { bm.split_at_unchecked(offset) };
797
(
798
(lhs.unset_bits() > 0).then_some(lhs),
799
(rhs.unset_bits() > 0).then_some(rhs),
800
)
801
})
802
}
803
}
804
805