Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-arrow/src/legacy/array/mod.rs
8415 views
1
#![allow(unsafe_op_in_unsafe_fn)]
2
3
use crate::array::{
4
Array, BooleanArray, FixedSizeListArray, ListArray, MutableBinaryViewArray, PrimitiveArray,
5
StructArray, ViewType, new_null_array,
6
};
7
use crate::bitmap::BitmapBuilder;
8
use crate::datatypes::ArrowDataType;
9
use crate::legacy::utils::CustomIterTools;
10
use crate::offset::Offsets;
11
use crate::types::NativeType;
12
13
pub mod default_arrays;
14
#[cfg(feature = "dtype-array")]
15
pub mod fixed_size_list;
16
pub mod list;
17
pub mod null;
18
pub mod slice;
19
pub mod utf8;
20
21
pub use slice::*;
22
23
use crate::legacy::prelude::LargeListArray;
24
25
macro_rules! iter_to_values {
26
($iterator:expr, $validity:expr, $offsets:expr, $length_so_far:expr) => {{
27
$iterator
28
.filter_map(|opt_iter| match opt_iter {
29
Some(x) => {
30
let it = x.into_iter();
31
$length_so_far += it.size_hint().0 as i64;
32
$validity.push(true);
33
$offsets.push($length_so_far);
34
Some(it)
35
},
36
None => {
37
$validity.push(false);
38
$offsets.push($length_so_far);
39
None
40
},
41
})
42
.flatten()
43
.collect()
44
}};
45
}
46
47
pub trait ListFromIter {
48
/// Create a list-array from an iterator.
49
/// Used in group_by agg-list
50
///
51
/// # Safety
52
/// Will produce incorrect arrays if size hint is incorrect.
53
unsafe fn from_iter_primitive_trusted_len<T, P, I>(
54
iter: I,
55
dtype: ArrowDataType,
56
) -> ListArray<i64>
57
where
58
T: NativeType,
59
P: IntoIterator<Item = Option<T>>,
60
I: IntoIterator<Item = Option<P>>,
61
{
62
let iterator = iter.into_iter();
63
let (lower, _) = iterator.size_hint();
64
65
let mut validity = BitmapBuilder::with_capacity(lower);
66
let mut offsets = Vec::<i64>::with_capacity(lower + 1);
67
let mut length_so_far = 0i64;
68
offsets.push(length_so_far);
69
70
let values: PrimitiveArray<T> = iter_to_values!(iterator, validity, offsets, length_so_far);
71
72
// SAFETY:
73
// offsets are monotonically increasing
74
ListArray::new(
75
ListArray::<i64>::default_datatype(dtype.clone()),
76
Offsets::new_unchecked(offsets).into(),
77
Box::new(values.to(dtype)),
78
validity.into_opt_validity(),
79
)
80
}
81
82
/// Create a list-array from an iterator.
83
/// Used in group_by agg-list
84
///
85
/// # Safety
86
/// Will produce incorrect arrays if size hint is incorrect.
87
unsafe fn from_iter_bool_trusted_len<I, P>(iter: I) -> ListArray<i64>
88
where
89
I: IntoIterator<Item = Option<P>>,
90
P: IntoIterator<Item = Option<bool>>,
91
{
92
let iterator = iter.into_iter();
93
let (lower, _) = iterator.size_hint();
94
95
let mut validity = Vec::with_capacity(lower);
96
let mut offsets = Vec::<i64>::with_capacity(lower + 1);
97
let mut length_so_far = 0i64;
98
offsets.push(length_so_far);
99
100
let values: BooleanArray = iter_to_values!(iterator, validity, offsets, length_so_far);
101
102
// SAFETY:
103
// Offsets are monotonically increasing.
104
ListArray::new(
105
ListArray::<i64>::default_datatype(ArrowDataType::Boolean),
106
Offsets::new_unchecked(offsets).into(),
107
Box::new(values),
108
Some(validity.into()),
109
)
110
}
111
112
/// # Safety
113
/// Will produce incorrect arrays if size hint is incorrect.
114
unsafe fn from_iter_binview_trusted_len<I, P, Ref, T: ViewType + ?Sized>(
115
iter: I,
116
n_elements: usize,
117
) -> ListArray<i64>
118
where
119
I: IntoIterator<Item = Option<P>>,
120
P: IntoIterator<Item = Option<Ref>>,
121
Ref: AsRef<T>,
122
{
123
let iterator = iter.into_iter();
124
let (lower, _) = iterator.size_hint();
125
126
let mut validity = BitmapBuilder::with_capacity(lower);
127
let mut offsets = Vec::<i64>::with_capacity(lower + 1);
128
let mut length_so_far = 0i64;
129
offsets.push(length_so_far);
130
131
let values: MutableBinaryViewArray<T> = iterator
132
.filter_map(|opt_iter| match opt_iter {
133
Some(x) => {
134
let it = x.into_iter();
135
length_so_far += it.size_hint().0 as i64;
136
validity.push(true);
137
offsets.push(length_so_far);
138
Some(it)
139
},
140
None => {
141
validity.push(false);
142
offsets.push(length_so_far);
143
None
144
},
145
})
146
.flatten()
147
.trust_my_length(n_elements)
148
.collect();
149
150
// SAFETY:
151
// offsets are monotonically increasing
152
ListArray::new(
153
ListArray::<i64>::default_datatype(T::DATA_TYPE),
154
Offsets::new_unchecked(offsets).into(),
155
values.freeze().boxed(),
156
validity.into_opt_validity(),
157
)
158
}
159
160
/// Create a list-array from an iterator.
161
/// Used in group_by agg-list
162
///
163
/// # Safety
164
/// Will produce incorrect arrays if size hint is incorrect.
165
unsafe fn from_iter_utf8_trusted_len<I, P, Ref>(iter: I, n_elements: usize) -> ListArray<i64>
166
where
167
I: IntoIterator<Item = Option<P>>,
168
P: IntoIterator<Item = Option<Ref>>,
169
Ref: AsRef<str>,
170
{
171
Self::from_iter_binview_trusted_len(iter, n_elements)
172
}
173
174
/// Create a list-array from an iterator.
175
/// Used in group_by agg-list
176
///
177
/// # Safety
178
/// Will produce incorrect arrays if size hint is incorrect.
179
unsafe fn from_iter_binary_trusted_len<I, P, Ref>(iter: I, n_elements: usize) -> ListArray<i64>
180
where
181
I: IntoIterator<Item = Option<P>>,
182
P: IntoIterator<Item = Option<Ref>>,
183
Ref: AsRef<[u8]>,
184
{
185
Self::from_iter_binview_trusted_len(iter, n_elements)
186
}
187
}
188
impl ListFromIter for ListArray<i64> {}
189
190
fn is_nested_null(dtype: &ArrowDataType) -> bool {
191
match dtype {
192
ArrowDataType::Null => true,
193
ArrowDataType::LargeList(field) => is_nested_null(field.dtype()),
194
ArrowDataType::FixedSizeList(field, _) => is_nested_null(field.dtype()),
195
ArrowDataType::Struct(fields) => fields.iter().all(|field| is_nested_null(field.dtype())),
196
_ => false,
197
}
198
}
199
200
/// Cast null arrays to inner type and ensure that all offsets remain correct
201
pub fn convert_inner_type(array: &dyn Array, dtype: &ArrowDataType) -> Box<dyn Array> {
202
match dtype {
203
ArrowDataType::LargeList(field) => {
204
let array = array.as_any().downcast_ref::<LargeListArray>().unwrap();
205
let inner = array.values();
206
let new_values = convert_inner_type(inner.as_ref(), field.dtype());
207
let dtype = LargeListArray::default_datatype(new_values.dtype().clone());
208
LargeListArray::new(
209
dtype,
210
array.offsets().clone(),
211
new_values,
212
array.validity().cloned(),
213
)
214
.boxed()
215
},
216
ArrowDataType::FixedSizeList(field, width) => {
217
let width = *width;
218
219
let array = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
220
let inner = array.values();
221
let length = if width == array.size() {
222
array.len()
223
} else {
224
assert!(!array.values().is_empty() || width != 0);
225
if width == 0 {
226
0
227
} else {
228
array.values().len() / width
229
}
230
};
231
let new_values = convert_inner_type(inner.as_ref(), field.dtype());
232
let dtype = FixedSizeListArray::default_datatype(new_values.dtype().clone(), width);
233
FixedSizeListArray::new(dtype, length, new_values, array.validity().cloned()).boxed()
234
},
235
ArrowDataType::Struct(fields) => {
236
let array = array.as_any().downcast_ref::<StructArray>().unwrap();
237
let inner = array.values();
238
let new_values = inner
239
.iter()
240
.zip(fields)
241
.map(|(arr, field)| convert_inner_type(arr.as_ref(), field.dtype()))
242
.collect::<Vec<_>>();
243
StructArray::new(
244
dtype.clone(),
245
array.len(),
246
new_values,
247
array.validity().cloned(),
248
)
249
.boxed()
250
},
251
_ => new_null_array(dtype.clone(), array.len()),
252
}
253
}
254
255