Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-arrow/src/legacy/array/mod.rs
6939 views
1
#![allow(unsafe_op_in_unsafe_fn)]
2
use crate::array::{
3
Array, BooleanArray, FixedSizeListArray, ListArray, MutableBinaryViewArray, PrimitiveArray,
4
StructArray, ViewType, new_null_array,
5
};
6
use crate::bitmap::BitmapBuilder;
7
use crate::datatypes::ArrowDataType;
8
use crate::legacy::utils::CustomIterTools;
9
use crate::offset::Offsets;
10
use crate::types::NativeType;
11
12
pub mod default_arrays;
13
#[cfg(feature = "dtype-array")]
14
pub mod fixed_size_list;
15
pub mod list;
16
pub mod null;
17
pub mod slice;
18
pub mod utf8;
19
20
pub use slice::*;
21
22
use crate::legacy::prelude::LargeListArray;
23
24
macro_rules! iter_to_values {
25
($iterator:expr, $validity:expr, $offsets:expr, $length_so_far:expr) => {{
26
$iterator
27
.filter_map(|opt_iter| match opt_iter {
28
Some(x) => {
29
let it = x.into_iter();
30
$length_so_far += it.size_hint().0 as i64;
31
$validity.push(true);
32
$offsets.push($length_so_far);
33
Some(it)
34
},
35
None => {
36
$validity.push(false);
37
$offsets.push($length_so_far);
38
None
39
},
40
})
41
.flatten()
42
.collect()
43
}};
44
}
45
46
pub trait ListFromIter {
47
/// Create a list-array from an iterator.
48
/// Used in group_by agg-list
49
///
50
/// # Safety
51
/// Will produce incorrect arrays if size hint is incorrect.
52
unsafe fn from_iter_primitive_trusted_len<T, P, I>(
53
iter: I,
54
dtype: ArrowDataType,
55
) -> ListArray<i64>
56
where
57
T: NativeType,
58
P: IntoIterator<Item = Option<T>>,
59
I: IntoIterator<Item = Option<P>>,
60
{
61
let iterator = iter.into_iter();
62
let (lower, _) = iterator.size_hint();
63
64
let mut validity = BitmapBuilder::with_capacity(lower);
65
let mut offsets = Vec::<i64>::with_capacity(lower + 1);
66
let mut length_so_far = 0i64;
67
offsets.push(length_so_far);
68
69
let values: PrimitiveArray<T> = iter_to_values!(iterator, validity, offsets, length_so_far);
70
71
// SAFETY:
72
// offsets are monotonically increasing
73
ListArray::new(
74
ListArray::<i64>::default_datatype(dtype.clone()),
75
Offsets::new_unchecked(offsets).into(),
76
Box::new(values.to(dtype)),
77
validity.into_opt_validity(),
78
)
79
}
80
81
/// Create a list-array from an iterator.
82
/// Used in group_by agg-list
83
///
84
/// # Safety
85
/// Will produce incorrect arrays if size hint is incorrect.
86
unsafe fn from_iter_bool_trusted_len<I, P>(iter: I) -> ListArray<i64>
87
where
88
I: IntoIterator<Item = Option<P>>,
89
P: IntoIterator<Item = Option<bool>>,
90
{
91
let iterator = iter.into_iter();
92
let (lower, _) = iterator.size_hint();
93
94
let mut validity = Vec::with_capacity(lower);
95
let mut offsets = Vec::<i64>::with_capacity(lower + 1);
96
let mut length_so_far = 0i64;
97
offsets.push(length_so_far);
98
99
let values: BooleanArray = iter_to_values!(iterator, validity, offsets, length_so_far);
100
101
// SAFETY:
102
// Offsets are monotonically increasing.
103
ListArray::new(
104
ListArray::<i64>::default_datatype(ArrowDataType::Boolean),
105
Offsets::new_unchecked(offsets).into(),
106
Box::new(values),
107
Some(validity.into()),
108
)
109
}
110
111
/// # Safety
112
/// Will produce incorrect arrays if size hint is incorrect.
113
unsafe fn from_iter_binview_trusted_len<I, P, Ref, T: ViewType + ?Sized>(
114
iter: I,
115
n_elements: usize,
116
) -> ListArray<i64>
117
where
118
I: IntoIterator<Item = Option<P>>,
119
P: IntoIterator<Item = Option<Ref>>,
120
Ref: AsRef<T>,
121
{
122
let iterator = iter.into_iter();
123
let (lower, _) = iterator.size_hint();
124
125
let mut validity = BitmapBuilder::with_capacity(lower);
126
let mut offsets = Vec::<i64>::with_capacity(lower + 1);
127
let mut length_so_far = 0i64;
128
offsets.push(length_so_far);
129
130
let values: MutableBinaryViewArray<T> = iterator
131
.filter_map(|opt_iter| match opt_iter {
132
Some(x) => {
133
let it = x.into_iter();
134
length_so_far += it.size_hint().0 as i64;
135
validity.push(true);
136
offsets.push(length_so_far);
137
Some(it)
138
},
139
None => {
140
validity.push(false);
141
offsets.push(length_so_far);
142
None
143
},
144
})
145
.flatten()
146
.trust_my_length(n_elements)
147
.collect();
148
149
// SAFETY:
150
// offsets are monotonically increasing
151
ListArray::new(
152
ListArray::<i64>::default_datatype(T::DATA_TYPE),
153
Offsets::new_unchecked(offsets).into(),
154
values.freeze().boxed(),
155
validity.into_opt_validity(),
156
)
157
}
158
159
/// Create a list-array from an iterator.
160
/// Used in group_by agg-list
161
///
162
/// # Safety
163
/// Will produce incorrect arrays if size hint is incorrect.
164
unsafe fn from_iter_utf8_trusted_len<I, P, Ref>(iter: I, n_elements: usize) -> ListArray<i64>
165
where
166
I: IntoIterator<Item = Option<P>>,
167
P: IntoIterator<Item = Option<Ref>>,
168
Ref: AsRef<str>,
169
{
170
Self::from_iter_binview_trusted_len(iter, n_elements)
171
}
172
173
/// Create a list-array from an iterator.
174
/// Used in group_by agg-list
175
///
176
/// # Safety
177
/// Will produce incorrect arrays if size hint is incorrect.
178
unsafe fn from_iter_binary_trusted_len<I, P, Ref>(iter: I, n_elements: usize) -> ListArray<i64>
179
where
180
I: IntoIterator<Item = Option<P>>,
181
P: IntoIterator<Item = Option<Ref>>,
182
Ref: AsRef<[u8]>,
183
{
184
Self::from_iter_binview_trusted_len(iter, n_elements)
185
}
186
}
187
impl ListFromIter for ListArray<i64> {}
188
189
fn is_nested_null(dtype: &ArrowDataType) -> bool {
190
match dtype {
191
ArrowDataType::Null => true,
192
ArrowDataType::LargeList(field) => is_nested_null(field.dtype()),
193
ArrowDataType::FixedSizeList(field, _) => is_nested_null(field.dtype()),
194
ArrowDataType::Struct(fields) => fields.iter().all(|field| is_nested_null(field.dtype())),
195
_ => false,
196
}
197
}
198
199
/// Cast null arrays to inner type and ensure that all offsets remain correct
200
pub fn convert_inner_type(array: &dyn Array, dtype: &ArrowDataType) -> Box<dyn Array> {
201
match dtype {
202
ArrowDataType::LargeList(field) => {
203
let array = array.as_any().downcast_ref::<LargeListArray>().unwrap();
204
let inner = array.values();
205
let new_values = convert_inner_type(inner.as_ref(), field.dtype());
206
let dtype = LargeListArray::default_datatype(new_values.dtype().clone());
207
LargeListArray::new(
208
dtype,
209
array.offsets().clone(),
210
new_values,
211
array.validity().cloned(),
212
)
213
.boxed()
214
},
215
ArrowDataType::FixedSizeList(field, width) => {
216
let width = *width;
217
218
let array = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
219
let inner = array.values();
220
let length = if width == array.size() {
221
array.len()
222
} else {
223
assert!(!array.values().is_empty() || width != 0);
224
if width == 0 {
225
0
226
} else {
227
array.values().len() / width
228
}
229
};
230
let new_values = convert_inner_type(inner.as_ref(), field.dtype());
231
let dtype = FixedSizeListArray::default_datatype(new_values.dtype().clone(), width);
232
FixedSizeListArray::new(dtype, length, new_values, array.validity().cloned()).boxed()
233
},
234
ArrowDataType::Struct(fields) => {
235
let array = array.as_any().downcast_ref::<StructArray>().unwrap();
236
let inner = array.values();
237
let new_values = inner
238
.iter()
239
.zip(fields)
240
.map(|(arr, field)| convert_inner_type(arr.as_ref(), field.dtype()))
241
.collect::<Vec<_>>();
242
StructArray::new(
243
dtype.clone(),
244
array.len(),
245
new_values,
246
array.validity().cloned(),
247
)
248
.boxed()
249
},
250
_ => new_null_array(dtype.clone(), array.len()),
251
}
252
}
253
254