Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-arrow/src/array/struct_/mod.rs
6939 views
1
use super::{Array, Splitable, new_empty_array, new_null_array};
2
use crate::bitmap::Bitmap;
3
use crate::datatypes::{ArrowDataType, Field};
4
5
mod builder;
6
pub use builder::*;
7
mod ffi;
8
pub(super) mod fmt;
9
mod iterator;
10
use polars_error::{PolarsResult, polars_bail, polars_ensure};
11
#[cfg(feature = "proptest")]
12
pub mod proptest;
13
14
/// A [`StructArray`] is a nested [`Array`] with an optional validity representing
15
/// multiple [`Array`] with the same number of rows.
16
/// # Example
17
/// ```
18
/// use polars_arrow::array::*;
19
/// use polars_arrow::datatypes::*;
20
/// let boolean = BooleanArray::from_slice(&[false, false, true, true]).boxed();
21
/// let int = Int32Array::from_slice(&[42, 28, 19, 31]).boxed();
22
///
23
/// let fields = vec![
24
/// Field::new("b".into(), ArrowDataType::Boolean, false),
25
/// Field::new("c".into(), ArrowDataType::Int32, false),
26
/// ];
27
///
28
/// let array = StructArray::new(ArrowDataType::Struct(fields), 4, vec![boolean, int], None);
29
/// ```
30
#[derive(Clone)]
31
pub struct StructArray {
32
dtype: ArrowDataType,
33
// invariant: each array has the same length
34
values: Vec<Box<dyn Array>>,
35
// invariant: for each v in values: length == v.len()
36
length: usize,
37
validity: Option<Bitmap>,
38
}
39
40
impl StructArray {
41
/// Returns a new [`StructArray`].
42
/// # Errors
43
/// This function errors iff:
44
/// * `dtype`'s physical type is not [`crate::datatypes::PhysicalType::Struct`].
45
/// * the children of `dtype` are empty
46
/// * the values's len is different from children's length
47
/// * any of the values's data type is different from its corresponding children' data type
48
/// * any element of values has a different length than the first element
49
/// * the validity's length is not equal to the length of the first element
50
pub fn try_new(
51
dtype: ArrowDataType,
52
length: usize,
53
values: Vec<Box<dyn Array>>,
54
validity: Option<Bitmap>,
55
) -> PolarsResult<Self> {
56
let fields = Self::try_get_fields(&dtype)?;
57
58
polars_ensure!(
59
fields.len() == values.len(),
60
ComputeError:
61
"a StructArray must have a number of fields in its DataType equal to the number of child values"
62
);
63
64
fields
65
.iter().map(|a| &a.dtype)
66
.zip(values.iter().map(|a| a.dtype()))
67
.enumerate()
68
.try_for_each(|(index, (dtype, child))| {
69
if dtype != child {
70
polars_bail!(ComputeError:
71
"The children DataTypes of a StructArray must equal the children data types.
72
However, the field {index} has data type {dtype:?} but the value has data type {child:?}"
73
)
74
} else {
75
Ok(())
76
}
77
})?;
78
79
values
80
.iter()
81
.map(|f| f.len())
82
.enumerate()
83
.try_for_each(|(index, f_length)| {
84
if f_length != length {
85
polars_bail!(ComputeError: "The children must have the given number of values.
86
However, the values at index {index} have a length of {f_length}, which is different from given length {length}.")
87
} else {
88
Ok(())
89
}
90
})?;
91
92
if validity
93
.as_ref()
94
.is_some_and(|validity| validity.len() != length)
95
{
96
polars_bail!(ComputeError:"The validity length of a StructArray must match its number of elements")
97
}
98
99
Ok(Self {
100
dtype,
101
length,
102
values,
103
validity,
104
})
105
}
106
107
/// Returns a new [`StructArray`]
108
/// # Panics
109
/// This function panics iff:
110
/// * `dtype`'s physical type is not [`crate::datatypes::PhysicalType::Struct`].
111
/// * the children of `dtype` are empty
112
/// * the values's len is different from children's length
113
/// * any of the values's data type is different from its corresponding children' data type
114
/// * any element of values has a different length than the first element
115
/// * the validity's length is not equal to the length of the first element
116
pub fn new(
117
dtype: ArrowDataType,
118
length: usize,
119
values: Vec<Box<dyn Array>>,
120
validity: Option<Bitmap>,
121
) -> Self {
122
Self::try_new(dtype, length, values, validity).unwrap()
123
}
124
125
/// Creates an empty [`StructArray`].
126
pub fn new_empty(dtype: ArrowDataType) -> Self {
127
if let ArrowDataType::Struct(fields) = &dtype.to_logical_type() {
128
let values = fields
129
.iter()
130
.map(|field| new_empty_array(field.dtype().clone()))
131
.collect();
132
Self::new(dtype, 0, values, None)
133
} else {
134
panic!("StructArray must be initialized with DataType::Struct");
135
}
136
}
137
138
/// Creates a null [`StructArray`] of length `length`.
139
pub fn new_null(dtype: ArrowDataType, length: usize) -> Self {
140
if let ArrowDataType::Struct(fields) = &dtype {
141
let values = fields
142
.iter()
143
.map(|field| new_null_array(field.dtype().clone(), length))
144
.collect();
145
Self::new(dtype, length, values, Some(Bitmap::new_zeroed(length)))
146
} else {
147
panic!("StructArray must be initialized with DataType::Struct");
148
}
149
}
150
}
151
152
// must use
153
impl StructArray {
154
/// Deconstructs the [`StructArray`] into its individual components.
155
#[must_use]
156
pub fn into_data(self) -> (Vec<Field>, usize, Vec<Box<dyn Array>>, Option<Bitmap>) {
157
let Self {
158
dtype,
159
length,
160
values,
161
validity,
162
} = self;
163
let fields = if let ArrowDataType::Struct(fields) = dtype {
164
fields
165
} else {
166
unreachable!()
167
};
168
(fields, length, values, validity)
169
}
170
171
/// Slices this [`StructArray`].
172
/// # Panics
173
/// panics iff `offset + length > self.len()`
174
/// # Implementation
175
/// This operation is `O(F)` where `F` is the number of fields.
176
pub fn slice(&mut self, offset: usize, length: usize) {
177
assert!(
178
offset + length <= self.len(),
179
"offset + length may not exceed length of array"
180
);
181
unsafe { self.slice_unchecked(offset, length) }
182
}
183
184
/// Slices this [`StructArray`].
185
/// # Implementation
186
/// This operation is `O(F)` where `F` is the number of fields.
187
///
188
/// # Safety
189
/// The caller must ensure that `offset + length <= self.len()`.
190
pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
191
self.validity = self
192
.validity
193
.take()
194
.map(|bitmap| bitmap.sliced_unchecked(offset, length))
195
.filter(|bitmap| bitmap.unset_bits() > 0);
196
self.values
197
.iter_mut()
198
.for_each(|x| x.slice_unchecked(offset, length));
199
self.length = length;
200
}
201
202
impl_sliced!();
203
204
impl_mut_validity!();
205
206
impl_into_array!();
207
}
208
209
// Accessors
210
impl StructArray {
211
#[inline]
212
pub fn len(&self) -> usize {
213
if cfg!(debug_assertions) {
214
for arr in self.values.iter() {
215
assert_eq!(
216
arr.len(),
217
self.length,
218
"StructArray invariant: each array has same length"
219
);
220
}
221
}
222
223
self.length
224
}
225
226
/// The optional validity.
227
#[inline]
228
pub fn validity(&self) -> Option<&Bitmap> {
229
self.validity.as_ref()
230
}
231
232
/// Returns the values of this [`StructArray`].
233
pub fn values(&self) -> &[Box<dyn Array>] {
234
&self.values
235
}
236
237
/// Returns the fields of this [`StructArray`].
238
pub fn fields(&self) -> &[Field] {
239
let fields = Self::get_fields(&self.dtype);
240
debug_assert_eq!(self.values().len(), fields.len());
241
fields
242
}
243
}
244
245
impl StructArray {
246
/// Returns the fields the `DataType::Struct`.
247
pub(crate) fn try_get_fields(dtype: &ArrowDataType) -> PolarsResult<&[Field]> {
248
match dtype.to_logical_type() {
249
ArrowDataType::Struct(fields) => Ok(fields),
250
_ => {
251
polars_bail!(ComputeError: "Struct array must be created with a DataType whose physical type is Struct")
252
},
253
}
254
}
255
256
/// Returns the fields the `DataType::Struct`.
257
pub fn get_fields(dtype: &ArrowDataType) -> &[Field] {
258
Self::try_get_fields(dtype).unwrap()
259
}
260
}
261
262
impl Array for StructArray {
263
impl_common_array!();
264
265
fn validity(&self) -> Option<&Bitmap> {
266
self.validity.as_ref()
267
}
268
269
#[inline]
270
fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
271
Box::new(self.clone().with_validity(validity))
272
}
273
}
274
275
impl Splitable for StructArray {
276
fn check_bound(&self, offset: usize) -> bool {
277
offset <= self.len()
278
}
279
280
unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
281
let (lhs_validity, rhs_validity) = unsafe { self.validity.split_at_unchecked(offset) };
282
283
let mut lhs_values = Vec::with_capacity(self.values.len());
284
let mut rhs_values = Vec::with_capacity(self.values.len());
285
286
for v in self.values.iter() {
287
let (lhs, rhs) = unsafe { v.split_at_boxed_unchecked(offset) };
288
lhs_values.push(lhs);
289
rhs_values.push(rhs);
290
}
291
292
(
293
Self {
294
dtype: self.dtype.clone(),
295
length: offset,
296
values: lhs_values,
297
validity: lhs_validity,
298
},
299
Self {
300
dtype: self.dtype.clone(),
301
length: self.length - offset,
302
values: rhs_values,
303
validity: rhs_validity,
304
},
305
)
306
}
307
}
308
309