Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-arrow/src/array/mod.rs
6939 views
1
//! Contains the [`Array`] and [`MutableArray`] trait objects declaring arrays,
2
//! as well as concrete arrays (such as [`Utf8Array`] and [`MutableUtf8Array`]).
3
//!
4
//! Fixed-length containers with optional values
5
//! that are laid in memory according to the Arrow specification.
6
//! Each array type has its own `struct`. The following are the main array types:
7
//! * [`PrimitiveArray`] and [`MutablePrimitiveArray`], an array of values with a fixed length such as integers, floats, etc.
8
//! * [`BooleanArray`] and [`MutableBooleanArray`], an array of boolean values (stored as a bitmap)
9
//! * [`Utf8Array`] and [`MutableUtf8Array`], an array of variable length utf8 values
10
//! * [`BinaryArray`] and [`MutableBinaryArray`], an array of opaque variable length values
11
//! * [`ListArray`] and [`MutableListArray`], an array of arrays (e.g. `[[1, 2], None, [], [None]]`)
12
//! * [`StructArray`] and [`MutableStructArray`], an array of arrays identified by a string (e.g. `{"a": [1, 2], "b": [true, false]}`)
13
//!
14
//! All immutable arrays implement the trait object [`Array`] and that can be downcast
15
//! to a concrete struct based on [`PhysicalType`](crate::datatypes::PhysicalType) available from [`Array::dtype`].
16
//! All immutable arrays are backed by [`Buffer`](crate::buffer::Buffer) and thus cloning and slicing them is `O(1)`.
17
//!
18
//! Most arrays contain a [`MutableArray`] counterpart that is neither cloneable nor sliceable, but
19
//! can be operated in-place.
20
#![allow(unsafe_op_in_unsafe_fn)]
21
use std::any::Any;
22
use std::sync::Arc;
23
24
use crate::bitmap::{Bitmap, MutableBitmap};
25
use crate::datatypes::ArrowDataType;
26
27
pub mod physical_binary;
28
#[cfg(feature = "proptest")]
29
pub mod proptest;
30
31
pub trait Splitable: Sized {
32
fn check_bound(&self, offset: usize) -> bool;
33
34
/// Split [`Self`] at `offset` where `offset <= self.len()`.
35
#[inline]
36
#[must_use]
37
fn split_at(&self, offset: usize) -> (Self, Self) {
38
assert!(self.check_bound(offset));
39
unsafe { self._split_at_unchecked(offset) }
40
}
41
42
/// Split [`Self`] at `offset` without checking `offset <= self.len()`.
43
///
44
/// # Safety
45
///
46
/// Safe if `offset <= self.len()`.
47
#[inline]
48
#[must_use]
49
unsafe fn split_at_unchecked(&self, offset: usize) -> (Self, Self) {
50
debug_assert!(self.check_bound(offset));
51
unsafe { self._split_at_unchecked(offset) }
52
}
53
54
/// Internal implementation of `split_at_unchecked`. For any usage, prefer the using
55
/// `split_at` or `split_at_unchecked`.
56
///
57
/// # Safety
58
///
59
/// Safe if `offset <= self.len()`.
60
unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self);
61
}
62
63
/// A trait representing an immutable Arrow array. Arrow arrays are trait objects
64
/// that are infallibly downcast to concrete types according to the [`Array::dtype`].
65
pub trait Array: Send + Sync + dyn_clone::DynClone + 'static {
66
/// Converts itself to a reference of [`Any`], which enables downcasting to concrete types.
67
fn as_any(&self) -> &dyn Any;
68
69
/// Converts itself to a mutable reference of [`Any`], which enables mutable downcasting to concrete types.
70
fn as_any_mut(&mut self) -> &mut dyn Any;
71
72
/// The length of the [`Array`]. Every array has a length corresponding to the number of
73
/// elements (slots).
74
fn len(&self) -> usize;
75
76
/// whether the array is empty
77
fn is_empty(&self) -> bool {
78
self.len() == 0
79
}
80
81
/// The [`ArrowDataType`] of the [`Array`]. In combination with [`Array::as_any`], this can be
82
/// used to downcast trait objects (`dyn Array`) to concrete arrays.
83
fn dtype(&self) -> &ArrowDataType;
84
85
/// The validity of the [`Array`]: every array has an optional [`Bitmap`] that, when available
86
/// specifies whether the array slot is valid or not (null).
87
/// When the validity is [`None`], all slots are valid.
88
fn validity(&self) -> Option<&Bitmap>;
89
90
/// The number of null slots on this [`Array`].
91
/// # Implementation
92
/// This is `O(1)` since the number of null elements is pre-computed.
93
#[inline]
94
fn null_count(&self) -> usize {
95
if self.dtype() == &ArrowDataType::Null {
96
return self.len();
97
};
98
self.validity()
99
.as_ref()
100
.map(|x| x.unset_bits())
101
.unwrap_or(0)
102
}
103
104
#[inline]
105
fn has_nulls(&self) -> bool {
106
self.null_count() > 0
107
}
108
109
/// Returns whether slot `i` is null.
110
/// # Panic
111
/// Panics iff `i >= self.len()`.
112
#[inline]
113
fn is_null(&self, i: usize) -> bool {
114
assert!(i < self.len());
115
unsafe { self.is_null_unchecked(i) }
116
}
117
118
/// Returns whether slot `i` is null.
119
///
120
/// # Safety
121
/// The caller must ensure `i < self.len()`
122
#[inline]
123
unsafe fn is_null_unchecked(&self, i: usize) -> bool {
124
self.validity()
125
.as_ref()
126
.map(|x| !x.get_bit_unchecked(i))
127
.unwrap_or(false)
128
}
129
130
/// Returns whether slot `i` is valid.
131
/// # Panic
132
/// Panics iff `i >= self.len()`.
133
#[inline]
134
fn is_valid(&self, i: usize) -> bool {
135
!self.is_null(i)
136
}
137
138
/// Split [`Self`] at `offset` into two boxed [`Array`]s where `offset <= self.len()`.
139
#[must_use]
140
fn split_at_boxed(&self, offset: usize) -> (Box<dyn Array>, Box<dyn Array>);
141
142
/// Split [`Self`] at `offset` into two boxed [`Array`]s without checking `offset <= self.len()`.
143
///
144
/// # Safety
145
///
146
/// Safe if `offset <= self.len()`.
147
#[must_use]
148
unsafe fn split_at_boxed_unchecked(&self, offset: usize) -> (Box<dyn Array>, Box<dyn Array>);
149
150
/// Slices this [`Array`].
151
/// # Implementation
152
/// This operation is `O(1)` over `len`.
153
/// # Panic
154
/// This function panics iff `offset + length > self.len()`.
155
fn slice(&mut self, offset: usize, length: usize);
156
157
/// Slices the [`Array`].
158
/// # Implementation
159
/// This operation is `O(1)`.
160
///
161
/// # Safety
162
/// The caller must ensure that `offset + length <= self.len()`
163
unsafe fn slice_unchecked(&mut self, offset: usize, length: usize);
164
165
/// Returns a slice of this [`Array`].
166
/// # Implementation
167
/// This operation is `O(1)` over `len`.
168
/// # Panic
169
/// This function panics iff `offset + length > self.len()`.
170
#[must_use]
171
fn sliced(&self, offset: usize, length: usize) -> Box<dyn Array> {
172
if length == 0 {
173
return new_empty_array(self.dtype().clone());
174
}
175
let mut new = self.to_boxed();
176
new.slice(offset, length);
177
new
178
}
179
180
/// Returns a slice of this [`Array`].
181
/// # Implementation
182
/// This operation is `O(1)` over `len`, as it amounts to increase two ref counts
183
/// and moving the struct to the heap.
184
///
185
/// # Safety
186
/// The caller must ensure that `offset + length <= self.len()`
187
#[must_use]
188
unsafe fn sliced_unchecked(&self, offset: usize, length: usize) -> Box<dyn Array> {
189
debug_assert!(offset + length <= self.len());
190
let mut new = self.to_boxed();
191
new.slice_unchecked(offset, length);
192
new
193
}
194
195
/// Clones this [`Array`] with a new assigned bitmap.
196
/// # Panic
197
/// This function panics iff `validity.len() != self.len()`.
198
fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array>;
199
200
/// Clone a `&dyn Array` to an owned `Box<dyn Array>`.
201
fn to_boxed(&self) -> Box<dyn Array>;
202
}
203
204
dyn_clone::clone_trait_object!(Array);
205
206
pub trait IntoBoxedArray {
207
fn into_boxed(self) -> Box<dyn Array>;
208
}
209
210
impl<A: Array> IntoBoxedArray for A {
211
#[inline(always)]
212
fn into_boxed(self) -> Box<dyn Array> {
213
Box::new(self) as _
214
}
215
}
216
impl IntoBoxedArray for Box<dyn Array> {
217
#[inline(always)]
218
fn into_boxed(self) -> Box<dyn Array> {
219
self
220
}
221
}
222
223
/// A trait describing a mutable array; i.e. an array whose values can be changed.
224
///
225
/// Mutable arrays cannot be cloned but can be mutated in place,
226
/// thereby making them useful to perform numeric operations without allocations.
227
/// As in [`Array`], concrete arrays (such as [`MutablePrimitiveArray`]) implement how they are mutated.
228
pub trait MutableArray: std::fmt::Debug + Send + Sync {
229
/// The [`ArrowDataType`] of the array.
230
fn dtype(&self) -> &ArrowDataType;
231
232
/// The length of the array.
233
fn len(&self) -> usize;
234
235
/// Whether the array is empty.
236
fn is_empty(&self) -> bool {
237
self.len() == 0
238
}
239
240
/// The optional validity of the array.
241
fn validity(&self) -> Option<&MutableBitmap>;
242
243
/// Convert itself to an (immutable) [`Array`].
244
fn as_box(&mut self) -> Box<dyn Array>;
245
246
/// Convert itself to an (immutable) atomically reference counted [`Array`].
247
// This provided implementation has an extra allocation as it first
248
// boxes `self`, then converts the box into an `Arc`. Implementors may wish
249
// to avoid an allocation by skipping the box completely.
250
fn as_arc(&mut self) -> std::sync::Arc<dyn Array> {
251
self.as_box().into()
252
}
253
254
/// Convert to `Any`, to enable dynamic casting.
255
fn as_any(&self) -> &dyn Any;
256
257
/// Convert to mutable `Any`, to enable dynamic casting.
258
fn as_mut_any(&mut self) -> &mut dyn Any;
259
260
/// Adds a new null element to the array.
261
fn push_null(&mut self);
262
263
/// Whether `index` is valid / set.
264
/// # Panic
265
/// Panics if `index >= self.len()`.
266
#[inline]
267
fn is_valid(&self, index: usize) -> bool {
268
self.validity()
269
.as_ref()
270
.map(|x| x.get(index))
271
.unwrap_or(true)
272
}
273
274
/// Reserves additional slots to its capacity.
275
fn reserve(&mut self, additional: usize);
276
277
/// Shrink the array to fit its length.
278
fn shrink_to_fit(&mut self);
279
}
280
281
impl MutableArray for Box<dyn MutableArray> {
282
fn len(&self) -> usize {
283
self.as_ref().len()
284
}
285
286
fn validity(&self) -> Option<&MutableBitmap> {
287
self.as_ref().validity()
288
}
289
290
fn as_box(&mut self) -> Box<dyn Array> {
291
self.as_mut().as_box()
292
}
293
294
fn as_arc(&mut self) -> Arc<dyn Array> {
295
self.as_mut().as_arc()
296
}
297
298
fn dtype(&self) -> &ArrowDataType {
299
self.as_ref().dtype()
300
}
301
302
fn as_any(&self) -> &dyn std::any::Any {
303
self.as_ref().as_any()
304
}
305
306
fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
307
self.as_mut().as_mut_any()
308
}
309
310
#[inline]
311
fn push_null(&mut self) {
312
self.as_mut().push_null()
313
}
314
315
fn shrink_to_fit(&mut self) {
316
self.as_mut().shrink_to_fit();
317
}
318
319
fn reserve(&mut self, additional: usize) {
320
self.as_mut().reserve(additional);
321
}
322
}
323
324
macro_rules! general_dyn {
325
($array:expr, $ty:ty, $f:expr) => {{
326
let array = $array.as_any().downcast_ref::<$ty>().unwrap();
327
($f)(array)
328
}};
329
}
330
331
macro_rules! fmt_dyn {
332
($array:expr, $ty:ty, $f:expr) => {{
333
let mut f = |x: &$ty| x.fmt($f);
334
general_dyn!($array, $ty, f)
335
}};
336
}
337
338
impl std::fmt::Debug for dyn Array + '_ {
339
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
340
use crate::datatypes::PhysicalType::*;
341
match self.dtype().to_physical_type() {
342
Null => fmt_dyn!(self, NullArray, f),
343
Boolean => fmt_dyn!(self, BooleanArray, f),
344
Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| {
345
fmt_dyn!(self, PrimitiveArray<$T>, f)
346
}),
347
BinaryView => fmt_dyn!(self, BinaryViewArray, f),
348
Utf8View => fmt_dyn!(self, Utf8ViewArray, f),
349
Binary => fmt_dyn!(self, BinaryArray<i32>, f),
350
LargeBinary => fmt_dyn!(self, BinaryArray<i64>, f),
351
FixedSizeBinary => fmt_dyn!(self, FixedSizeBinaryArray, f),
352
Utf8 => fmt_dyn!(self, Utf8Array::<i32>, f),
353
LargeUtf8 => fmt_dyn!(self, Utf8Array::<i64>, f),
354
List => fmt_dyn!(self, ListArray::<i32>, f),
355
LargeList => fmt_dyn!(self, ListArray::<i64>, f),
356
FixedSizeList => fmt_dyn!(self, FixedSizeListArray, f),
357
Struct => fmt_dyn!(self, StructArray, f),
358
Union => fmt_dyn!(self, UnionArray, f),
359
Dictionary(key_type) => {
360
match_integer_type!(key_type, |$T| {
361
fmt_dyn!(self, DictionaryArray::<$T>, f)
362
})
363
},
364
Map => fmt_dyn!(self, MapArray, f),
365
}
366
}
367
}
368
369
/// Creates a new [`Array`] with a [`Array::len`] of 0.
370
pub fn new_empty_array(dtype: ArrowDataType) -> Box<dyn Array> {
371
use crate::datatypes::PhysicalType::*;
372
match dtype.to_physical_type() {
373
Null => Box::new(NullArray::new_empty(dtype)),
374
Boolean => Box::new(BooleanArray::new_empty(dtype)),
375
Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| {
376
Box::new(PrimitiveArray::<$T>::new_empty(dtype))
377
}),
378
Binary => Box::new(BinaryArray::<i32>::new_empty(dtype)),
379
LargeBinary => Box::new(BinaryArray::<i64>::new_empty(dtype)),
380
FixedSizeBinary => Box::new(FixedSizeBinaryArray::new_empty(dtype)),
381
Utf8 => Box::new(Utf8Array::<i32>::new_empty(dtype)),
382
LargeUtf8 => Box::new(Utf8Array::<i64>::new_empty(dtype)),
383
List => Box::new(ListArray::<i32>::new_empty(dtype)),
384
LargeList => Box::new(ListArray::<i64>::new_empty(dtype)),
385
FixedSizeList => Box::new(FixedSizeListArray::new_empty(dtype)),
386
Struct => Box::new(StructArray::new_empty(dtype)),
387
Union => Box::new(UnionArray::new_empty(dtype)),
388
Map => Box::new(MapArray::new_empty(dtype)),
389
Utf8View => Box::new(Utf8ViewArray::new_empty(dtype)),
390
BinaryView => Box::new(BinaryViewArray::new_empty(dtype)),
391
Dictionary(key_type) => {
392
match_integer_type!(key_type, |$T| {
393
Box::new(DictionaryArray::<$T>::new_empty(dtype))
394
})
395
},
396
}
397
}
398
399
/// Creates a new [`Array`] of [`ArrowDataType`] `dtype` and `length`.
400
///
401
/// The array is guaranteed to have [`Array::null_count`] equal to [`Array::len`]
402
/// for all types except Union, which does not have a validity.
403
pub fn new_null_array(dtype: ArrowDataType, length: usize) -> Box<dyn Array> {
404
use crate::datatypes::PhysicalType::*;
405
match dtype.to_physical_type() {
406
Null => Box::new(NullArray::new_null(dtype, length)),
407
Boolean => Box::new(BooleanArray::new_null(dtype, length)),
408
Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| {
409
Box::new(PrimitiveArray::<$T>::new_null(dtype, length))
410
}),
411
Binary => Box::new(BinaryArray::<i32>::new_null(dtype, length)),
412
LargeBinary => Box::new(BinaryArray::<i64>::new_null(dtype, length)),
413
FixedSizeBinary => Box::new(FixedSizeBinaryArray::new_null(dtype, length)),
414
Utf8 => Box::new(Utf8Array::<i32>::new_null(dtype, length)),
415
LargeUtf8 => Box::new(Utf8Array::<i64>::new_null(dtype, length)),
416
List => Box::new(ListArray::<i32>::new_null(dtype, length)),
417
LargeList => Box::new(ListArray::<i64>::new_null(dtype, length)),
418
FixedSizeList => Box::new(FixedSizeListArray::new_null(dtype, length)),
419
Struct => Box::new(StructArray::new_null(dtype, length)),
420
Union => Box::new(UnionArray::new_null(dtype, length)),
421
Map => Box::new(MapArray::new_null(dtype, length)),
422
BinaryView => Box::new(BinaryViewArray::new_null(dtype, length)),
423
Utf8View => Box::new(Utf8ViewArray::new_null(dtype, length)),
424
Dictionary(key_type) => {
425
match_integer_type!(key_type, |$T| {
426
Box::new(DictionaryArray::<$T>::new_null(dtype, length))
427
})
428
},
429
}
430
}
431
432
macro_rules! clone_dyn {
433
($array:expr, $ty:ty) => {{
434
let f = |x: &$ty| Box::new(x.clone());
435
general_dyn!($array, $ty, f)
436
}};
437
}
438
439
// macro implementing `sliced` and `sliced_unchecked`
440
macro_rules! impl_sliced {
441
() => {
442
/// Returns this array sliced.
443
/// # Implementation
444
/// This function is `O(1)`.
445
/// # Panics
446
/// iff `offset + length > self.len()`.
447
#[inline]
448
#[must_use]
449
pub fn sliced(self, offset: usize, length: usize) -> Self {
450
let total = offset
451
.checked_add(length)
452
.expect("offset + length overflowed");
453
assert!(
454
total <= self.len(),
455
"the offset of the new Buffer cannot exceed the existing length"
456
);
457
unsafe { Self::sliced_unchecked(self, offset, length) }
458
}
459
460
/// Returns this array sliced.
461
/// # Implementation
462
/// This function is `O(1)`.
463
///
464
/// # Safety
465
/// The caller must ensure that `offset + length <= self.len()`.
466
#[inline]
467
#[must_use]
468
pub unsafe fn sliced_unchecked(mut self, offset: usize, length: usize) -> Self {
469
Self::slice_unchecked(&mut self, offset, length);
470
self
471
}
472
};
473
}
474
475
// macro implementing `with_validity` and `set_validity`
476
macro_rules! impl_mut_validity {
477
() => {
478
/// Returns this array with a new validity.
479
/// # Panic
480
/// Panics iff `validity.len() != self.len()`.
481
#[must_use]
482
#[inline]
483
pub fn with_validity(mut self, validity: Option<Bitmap>) -> Self {
484
self.set_validity(validity);
485
self
486
}
487
488
/// Sets the validity of this array.
489
/// # Panics
490
/// This function panics iff `values.len() != self.len()`.
491
#[inline]
492
pub fn set_validity(&mut self, validity: Option<Bitmap>) {
493
if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) {
494
panic!("validity must be equal to the array's length")
495
}
496
self.validity = validity;
497
}
498
499
/// Takes the validity of this array, leaving it without a validity mask.
500
#[inline]
501
pub fn take_validity(&mut self) -> Option<Bitmap> {
502
self.validity.take()
503
}
504
}
505
}
506
507
// macro implementing `with_validity`, `set_validity` and `apply_validity` for mutable arrays
508
macro_rules! impl_mutable_array_mut_validity {
509
() => {
510
/// Returns this array with a new validity.
511
/// # Panic
512
/// Panics iff `validity.len() != self.len()`.
513
#[must_use]
514
#[inline]
515
pub fn with_validity(mut self, validity: Option<MutableBitmap>) -> Self {
516
self.set_validity(validity);
517
self
518
}
519
520
/// Sets the validity of this array.
521
/// # Panics
522
/// This function panics iff `values.len() != self.len()`.
523
#[inline]
524
pub fn set_validity(&mut self, validity: Option<MutableBitmap>) {
525
if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) {
526
panic!("validity must be equal to the array's length")
527
}
528
self.validity = validity;
529
}
530
531
/// Applies a function `f` to the validity of this array.
532
///
533
/// This is an API to leverage clone-on-write
534
/// # Panics
535
/// This function panics if the function `f` modifies the length of the [`Bitmap`].
536
#[inline]
537
pub fn apply_validity<F: FnOnce(MutableBitmap) -> MutableBitmap>(&mut self, f: F) {
538
if let Some(validity) = std::mem::take(&mut self.validity) {
539
self.set_validity(Some(f(validity)))
540
}
541
}
542
543
}
544
}
545
546
// macro implementing `boxed` and `arced`
547
macro_rules! impl_into_array {
548
() => {
549
/// Boxes this array into a [`Box<dyn Array>`].
550
pub fn boxed(self) -> Box<dyn Array> {
551
Box::new(self)
552
}
553
554
/// Arcs this array into a [`std::sync::Arc<dyn Array>`].
555
pub fn arced(self) -> std::sync::Arc<dyn Array> {
556
std::sync::Arc::new(self)
557
}
558
};
559
}
560
561
// macro implementing common methods of trait `Array`
562
macro_rules! impl_common_array {
563
() => {
564
#[inline]
565
fn as_any(&self) -> &dyn std::any::Any {
566
self
567
}
568
569
#[inline]
570
fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
571
self
572
}
573
574
#[inline]
575
fn len(&self) -> usize {
576
self.len()
577
}
578
579
#[inline]
580
fn dtype(&self) -> &ArrowDataType {
581
&self.dtype
582
}
583
584
#[inline]
585
fn split_at_boxed(&self, offset: usize) -> (Box<dyn Array>, Box<dyn Array>) {
586
let (lhs, rhs) = $crate::array::Splitable::split_at(self, offset);
587
(Box::new(lhs), Box::new(rhs))
588
}
589
590
#[inline]
591
unsafe fn split_at_boxed_unchecked(
592
&self,
593
offset: usize,
594
) -> (Box<dyn Array>, Box<dyn Array>) {
595
let (lhs, rhs) = unsafe { $crate::array::Splitable::split_at_unchecked(self, offset) };
596
(Box::new(lhs), Box::new(rhs))
597
}
598
599
#[inline]
600
fn slice(&mut self, offset: usize, length: usize) {
601
self.slice(offset, length);
602
}
603
604
#[inline]
605
unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
606
self.slice_unchecked(offset, length);
607
}
608
609
#[inline]
610
fn to_boxed(&self) -> Box<dyn Array> {
611
Box::new(self.clone())
612
}
613
};
614
}
615
616
/// Clones a dynamic [`Array`].
617
/// # Implementation
618
/// This operation is `O(1)` over `len`, as it amounts to increase two ref counts
619
/// and moving the concrete struct under a `Box`.
620
pub fn clone(array: &dyn Array) -> Box<dyn Array> {
621
use crate::datatypes::PhysicalType::*;
622
match array.dtype().to_physical_type() {
623
Null => clone_dyn!(array, NullArray),
624
Boolean => clone_dyn!(array, BooleanArray),
625
Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| {
626
clone_dyn!(array, PrimitiveArray<$T>)
627
}),
628
Binary => clone_dyn!(array, BinaryArray<i32>),
629
LargeBinary => clone_dyn!(array, BinaryArray<i64>),
630
FixedSizeBinary => clone_dyn!(array, FixedSizeBinaryArray),
631
Utf8 => clone_dyn!(array, Utf8Array::<i32>),
632
LargeUtf8 => clone_dyn!(array, Utf8Array::<i64>),
633
List => clone_dyn!(array, ListArray::<i32>),
634
LargeList => clone_dyn!(array, ListArray::<i64>),
635
FixedSizeList => clone_dyn!(array, FixedSizeListArray),
636
Struct => clone_dyn!(array, StructArray),
637
Union => clone_dyn!(array, UnionArray),
638
Map => clone_dyn!(array, MapArray),
639
BinaryView => clone_dyn!(array, BinaryViewArray),
640
Utf8View => clone_dyn!(array, Utf8ViewArray),
641
Dictionary(key_type) => {
642
match_integer_type!(key_type, |$T| {
643
clone_dyn!(array, DictionaryArray::<$T>)
644
})
645
},
646
}
647
}
648
649
// see https://users.rust-lang.org/t/generic-for-dyn-a-or-box-dyn-a-or-arc-dyn-a/69430/3
650
// for details
651
impl<'a> AsRef<dyn Array + 'a> for dyn Array {
652
fn as_ref(&self) -> &(dyn Array + 'a) {
653
self
654
}
655
}
656
657
mod binary;
658
mod boolean;
659
pub mod builder;
660
mod dictionary;
661
mod fixed_size_binary;
662
mod fixed_size_list;
663
mod list;
664
pub use list::LIST_VALUES_NAME;
665
mod map;
666
mod null;
667
mod primitive;
668
pub mod specification;
669
mod static_array;
670
mod static_array_collect;
671
mod struct_;
672
mod total_ord;
673
mod union;
674
mod utf8;
675
676
mod equal;
677
mod ffi;
678
mod fmt;
679
#[doc(hidden)]
680
pub mod indexable;
681
pub mod iterator;
682
683
mod binview;
684
mod values;
685
686
pub use binary::{
687
BinaryArray, BinaryArrayBuilder, BinaryValueIter, MutableBinaryArray, MutableBinaryValuesArray,
688
};
689
pub use binview::{
690
BinaryViewArray, BinaryViewArrayBuilder, BinaryViewArrayGeneric, BinaryViewArrayGenericBuilder,
691
MutableBinaryViewArray, MutablePlBinary, MutablePlString, Utf8ViewArray, Utf8ViewArrayBuilder,
692
View, ViewType,
693
};
694
pub use boolean::{BooleanArray, BooleanArrayBuilder, MutableBooleanArray};
695
pub use dictionary::{DictionaryArray, DictionaryKey, MutableDictionaryArray};
696
pub use equal::equal;
697
pub use fixed_size_binary::{
698
FixedSizeBinaryArray, FixedSizeBinaryArrayBuilder, MutableFixedSizeBinaryArray,
699
};
700
pub use fixed_size_list::{
701
FixedSizeListArray, FixedSizeListArrayBuilder, MutableFixedSizeListArray,
702
};
703
pub use fmt::{get_display, get_value_display};
704
pub(crate) use iterator::ArrayAccessor;
705
pub use iterator::ArrayValuesIter;
706
pub use list::{ListArray, ListArrayBuilder, ListValuesIter, MutableListArray};
707
pub use map::MapArray;
708
pub use null::{MutableNullArray, NullArray, NullArrayBuilder};
709
use polars_error::PolarsResult;
710
pub use primitive::*;
711
pub use static_array::{ParameterFreeDtypeStaticArray, StaticArray};
712
pub use static_array_collect::{ArrayCollectIterExt, ArrayFromIter, ArrayFromIterDtype};
713
pub use struct_::{StructArray, StructArrayBuilder};
714
pub use union::UnionArray;
715
pub use utf8::{MutableUtf8Array, MutableUtf8ValuesArray, Utf8Array, Utf8ValuesIter};
716
pub use values::ValueSize;
717
718
#[cfg(feature = "proptest")]
719
pub use self::boolean::proptest::boolean_array;
720
pub(crate) use self::ffi::{FromFfi, ToFfi, offset_buffers_children_dictionary};
721
use crate::{match_integer_type, with_match_primitive_type_full};
722
723
/// A trait describing the ability of a struct to create itself from a iterator.
724
/// This is similar to [`Extend`], but accepted the creation to error.
725
pub trait TryExtend<A> {
726
/// Fallible version of [`Extend::extend`].
727
fn try_extend<I: IntoIterator<Item = A>>(&mut self, iter: I) -> PolarsResult<()>;
728
}
729
730
/// A trait describing the ability of a struct to receive new items.
731
pub trait TryPush<A> {
732
/// Tries to push a new element.
733
fn try_push(&mut self, item: A) -> PolarsResult<()>;
734
}
735
736
/// A trait describing the ability of a struct to receive new items.
737
pub trait PushUnchecked<A> {
738
/// Push a new element that holds the invariants of the struct.
739
///
740
/// # Safety
741
/// The items must uphold the invariants of the struct
742
/// Read the specific implementation of the trait to understand what these are.
743
unsafe fn push_unchecked(&mut self, item: A);
744
}
745
746
/// A trait describing the ability of a struct to extend from a reference of itself.
747
/// Specialization of [`TryExtend`].
748
pub trait TryExtendFromSelf {
749
/// Tries to extend itself with elements from `other`, failing only on overflow.
750
fn try_extend_from_self(&mut self, other: &Self) -> PolarsResult<()>;
751
}
752
753
/// Trait that [`BinaryArray`] and [`Utf8Array`] implement for the purposes of DRY.
754
/// # Safety
755
/// The implementer must ensure that
756
/// 1. `offsets.len() > 0`
757
/// 2. `offsets[i] >= offsets[i-1] for all i`
758
/// 3. `offsets[i] < values.len() for all i`
759
pub unsafe trait GenericBinaryArray<O: crate::offset::Offset>: Array {
760
/// The values of the array
761
fn values(&self) -> &[u8];
762
/// The offsets of the array
763
fn offsets(&self) -> &[O];
764
}
765
766
pub type ArrayRef = Box<dyn Array>;
767
768
impl Splitable for Option<Bitmap> {
769
#[inline(always)]
770
fn check_bound(&self, offset: usize) -> bool {
771
self.as_ref().is_none_or(|v| offset <= v.len())
772
}
773
774
unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
775
self.as_ref().map_or((None, None), |bm| {
776
let (lhs, rhs) = unsafe { bm.split_at_unchecked(offset) };
777
(
778
(lhs.unset_bits() > 0).then_some(lhs),
779
(rhs.unset_bits() > 0).then_some(rhs),
780
)
781
})
782
}
783
}
784
785