Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-arrow/src/array/list/mod.rs
6939 views
1
use super::specification::try_check_offsets_bounds;
2
use super::{Array, Splitable, new_empty_array};
3
use crate::bitmap::Bitmap;
4
use crate::datatypes::{ArrowDataType, Field};
5
use crate::offset::{Offset, Offsets, OffsetsBuffer};
6
7
mod builder;
8
pub use builder::*;
9
mod ffi;
10
pub(super) mod fmt;
11
mod iterator;
12
pub use iterator::*;
13
mod mutable;
14
pub use mutable::*;
15
use polars_error::{PolarsResult, polars_bail};
16
use polars_utils::pl_str::PlSmallStr;
17
#[cfg(feature = "proptest")]
18
pub mod proptest;
19
20
/// Name used for the values array within List/FixedSizeList arrays.
21
pub const LIST_VALUES_NAME: PlSmallStr = PlSmallStr::from_static("item");
22
23
/// An [`Array`] semantically equivalent to `Vec<Option<Vec<Option<T>>>>` with Arrow's in-memory.
24
#[derive(Clone)]
25
pub struct ListArray<O: Offset> {
26
dtype: ArrowDataType,
27
offsets: OffsetsBuffer<O>,
28
values: Box<dyn Array>,
29
validity: Option<Bitmap>,
30
}
31
32
impl<O: Offset> ListArray<O> {
33
/// Creates a new [`ListArray`].
34
///
35
/// # Errors
36
/// This function returns an error iff:
37
/// * `offsets.last()` is greater than `values.len()`.
38
/// * the validity's length is not equal to `offsets.len_proxy()`.
39
/// * The `dtype`'s [`crate::datatypes::PhysicalType`] is not equal to either [`crate::datatypes::PhysicalType::List`] or [`crate::datatypes::PhysicalType::LargeList`].
40
/// * The `dtype`'s inner field's data type is not equal to `values.dtype`.
41
/// # Implementation
42
/// This function is `O(1)`
43
pub fn try_new(
44
dtype: ArrowDataType,
45
offsets: OffsetsBuffer<O>,
46
values: Box<dyn Array>,
47
validity: Option<Bitmap>,
48
) -> PolarsResult<Self> {
49
try_check_offsets_bounds(&offsets, values.len())?;
50
51
if validity
52
.as_ref()
53
.is_some_and(|validity| validity.len() != offsets.len_proxy())
54
{
55
polars_bail!(ComputeError: "validity mask length must match the number of values")
56
}
57
58
let child_dtype = Self::try_get_child(&dtype)?.dtype();
59
let values_dtype = values.dtype();
60
if child_dtype != values_dtype {
61
polars_bail!(ComputeError: "ListArray's child's DataType must match. However, the expected DataType is {child_dtype:?} while it got {values_dtype:?}.");
62
}
63
64
Ok(Self {
65
dtype,
66
offsets,
67
values,
68
validity,
69
})
70
}
71
72
/// Creates a new [`ListArray`].
73
///
74
/// # Panics
75
/// This function panics iff:
76
/// * `offsets.last()` is greater than `values.len()`.
77
/// * the validity's length is not equal to `offsets.len_proxy()`.
78
/// * The `dtype`'s [`crate::datatypes::PhysicalType`] is not equal to either [`crate::datatypes::PhysicalType::List`] or [`crate::datatypes::PhysicalType::LargeList`].
79
/// * The `dtype`'s inner field's data type is not equal to `values.dtype`.
80
/// # Implementation
81
/// This function is `O(1)`
82
pub fn new(
83
dtype: ArrowDataType,
84
offsets: OffsetsBuffer<O>,
85
values: Box<dyn Array>,
86
validity: Option<Bitmap>,
87
) -> Self {
88
Self::try_new(dtype, offsets, values, validity).unwrap()
89
}
90
91
/// Returns a new empty [`ListArray`].
92
pub fn new_empty(dtype: ArrowDataType) -> Self {
93
let values = new_empty_array(Self::get_child_type(&dtype).clone());
94
Self::new(dtype, OffsetsBuffer::default(), values, None)
95
}
96
97
/// Returns a new null [`ListArray`].
98
#[inline]
99
pub fn new_null(dtype: ArrowDataType, length: usize) -> Self {
100
let child = Self::get_child_type(&dtype).clone();
101
Self::new(
102
dtype,
103
Offsets::new_zeroed(length).into(),
104
new_empty_array(child),
105
Some(Bitmap::new_zeroed(length)),
106
)
107
}
108
}
109
110
impl<O: Offset> ListArray<O> {
111
/// Slices this [`ListArray`].
112
/// # Panics
113
/// panics iff `offset + length > self.len()`
114
pub fn slice(&mut self, offset: usize, length: usize) {
115
assert!(
116
offset + length <= self.len(),
117
"the offset of the new Buffer cannot exceed the existing length"
118
);
119
unsafe { self.slice_unchecked(offset, length) }
120
}
121
122
/// Slices this [`ListArray`].
123
///
124
/// # Safety
125
/// The caller must ensure that `offset + length < self.len()`.
126
pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
127
self.validity = self
128
.validity
129
.take()
130
.map(|bitmap| bitmap.sliced_unchecked(offset, length))
131
.filter(|bitmap| bitmap.unset_bits() > 0);
132
self.offsets.slice_unchecked(offset, length + 1);
133
}
134
135
impl_sliced!();
136
impl_mut_validity!();
137
impl_into_array!();
138
}
139
140
// Accessors
141
impl<O: Offset> ListArray<O> {
142
/// Returns the length of this array
143
#[inline]
144
pub fn len(&self) -> usize {
145
self.offsets.len_proxy()
146
}
147
148
/// Returns the element at index `i`
149
/// # Panic
150
/// Panics iff `i >= self.len()`
151
#[inline]
152
pub fn value(&self, i: usize) -> Box<dyn Array> {
153
assert!(i < self.len());
154
// SAFETY: invariant of this function
155
unsafe { self.value_unchecked(i) }
156
}
157
158
/// Returns the element at index `i` as &str
159
///
160
/// # Safety
161
/// Assumes that the `i < self.len`.
162
#[inline]
163
pub unsafe fn value_unchecked(&self, i: usize) -> Box<dyn Array> {
164
// SAFETY: the invariant of the function
165
let (start, end) = self.offsets.start_end_unchecked(i);
166
let length = end - start;
167
168
// SAFETY: the invariant of the struct
169
self.values.sliced_unchecked(start, length)
170
}
171
172
/// The optional validity.
173
#[inline]
174
pub fn validity(&self) -> Option<&Bitmap> {
175
self.validity.as_ref()
176
}
177
178
/// The offsets [`Buffer`].
179
#[inline]
180
pub fn offsets(&self) -> &OffsetsBuffer<O> {
181
&self.offsets
182
}
183
184
/// The values.
185
#[inline]
186
pub fn values(&self) -> &Box<dyn Array> {
187
&self.values
188
}
189
}
190
191
impl<O: Offset> ListArray<O> {
192
/// Returns a default [`ArrowDataType`]: inner field is named "item" and is nullable
193
pub fn default_datatype(dtype: ArrowDataType) -> ArrowDataType {
194
let field = Box::new(Field::new(LIST_VALUES_NAME, dtype, true));
195
if O::IS_LARGE {
196
ArrowDataType::LargeList(field)
197
} else {
198
ArrowDataType::List(field)
199
}
200
}
201
202
/// Returns a the inner [`Field`]
203
/// # Panics
204
/// Panics iff the logical type is not consistent with this struct.
205
pub fn get_child_field(dtype: &ArrowDataType) -> &Field {
206
Self::try_get_child(dtype).unwrap()
207
}
208
209
/// Returns a the inner [`Field`]
210
/// # Errors
211
/// Panics iff the logical type is not consistent with this struct.
212
pub fn try_get_child(dtype: &ArrowDataType) -> PolarsResult<&Field> {
213
if O::IS_LARGE {
214
match dtype.to_logical_type() {
215
ArrowDataType::LargeList(child) => Ok(child.as_ref()),
216
_ => polars_bail!(ComputeError: "ListArray<i64> expects DataType::LargeList"),
217
}
218
} else {
219
match dtype.to_logical_type() {
220
ArrowDataType::List(child) => Ok(child.as_ref()),
221
_ => polars_bail!(ComputeError: "ListArray<i32> expects DataType::List"),
222
}
223
}
224
}
225
226
/// Returns a the inner [`ArrowDataType`]
227
/// # Panics
228
/// Panics iff the logical type is not consistent with this struct.
229
pub fn get_child_type(dtype: &ArrowDataType) -> &ArrowDataType {
230
Self::get_child_field(dtype).dtype()
231
}
232
}
233
234
impl<O: Offset> Array for ListArray<O> {
235
impl_common_array!();
236
237
fn validity(&self) -> Option<&Bitmap> {
238
self.validity.as_ref()
239
}
240
241
#[inline]
242
fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
243
Box::new(self.clone().with_validity(validity))
244
}
245
}
246
247
impl<O: Offset> Splitable for ListArray<O> {
248
fn check_bound(&self, offset: usize) -> bool {
249
offset <= self.len()
250
}
251
252
unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
253
let (lhs_offsets, rhs_offsets) = unsafe { self.offsets.split_at_unchecked(offset) };
254
let (lhs_validity, rhs_validity) = unsafe { self.validity.split_at_unchecked(offset) };
255
256
(
257
Self {
258
dtype: self.dtype.clone(),
259
offsets: lhs_offsets,
260
validity: lhs_validity,
261
values: self.values.clone(),
262
},
263
Self {
264
dtype: self.dtype.clone(),
265
offsets: rhs_offsets,
266
validity: rhs_validity,
267
values: self.values.clone(),
268
},
269
)
270
}
271
}
272
273