Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-arrow/src/legacy/array/list.rs
6939 views
1
use polars_error::PolarsResult;
2
3
use crate::array::{Array, ArrayRef, ListArray, NullArray, new_null_array};
4
use crate::bitmap::BitmapBuilder;
5
use crate::compute::concatenate;
6
use crate::datatypes::ArrowDataType;
7
use crate::legacy::array::is_nested_null;
8
use crate::legacy::prelude::*;
9
use crate::offset::Offsets;
10
11
pub struct AnonymousBuilder<'a> {
12
arrays: Vec<&'a dyn Array>,
13
offsets: Vec<i64>,
14
validity: Option<BitmapBuilder>,
15
size: i64,
16
}
17
18
impl<'a> AnonymousBuilder<'a> {
19
pub fn new(size: usize) -> Self {
20
let mut offsets = Vec::with_capacity(size + 1);
21
offsets.push(0i64);
22
Self {
23
arrays: Vec::with_capacity(size),
24
offsets,
25
validity: None,
26
size: 0,
27
}
28
}
29
#[inline]
30
fn last_offset(&self) -> i64 {
31
*self.offsets.last().unwrap()
32
}
33
34
pub fn is_empty(&self) -> bool {
35
self.offsets.len() == 1
36
}
37
38
pub fn offsets(&self) -> &[i64] {
39
&self.offsets
40
}
41
42
pub fn take_offsets(self) -> Offsets<i64> {
43
// SAFETY: offsets are correct
44
unsafe { Offsets::new_unchecked(self.offsets) }
45
}
46
47
#[inline]
48
pub fn push(&mut self, arr: &'a dyn Array) {
49
self.size += arr.len() as i64;
50
self.offsets.push(self.size);
51
self.arrays.push(arr);
52
53
if let Some(validity) = &mut self.validity {
54
validity.push(true)
55
}
56
}
57
58
pub fn push_multiple(&mut self, arrs: &'a [ArrayRef]) {
59
for arr in arrs {
60
self.size += arr.len() as i64;
61
self.arrays.push(arr.as_ref());
62
}
63
self.offsets.push(self.size);
64
self.update_validity()
65
}
66
67
#[inline]
68
pub fn push_null(&mut self) {
69
self.offsets.push(self.last_offset());
70
match &mut self.validity {
71
Some(validity) => validity.push(false),
72
None => self.init_validity(),
73
}
74
}
75
76
#[inline]
77
pub fn push_opt(&mut self, arr: Option<&'a dyn Array>) {
78
match arr {
79
None => self.push_null(),
80
Some(arr) => self.push(arr),
81
}
82
}
83
84
pub fn push_empty(&mut self) {
85
self.offsets.push(self.last_offset());
86
self.update_validity()
87
}
88
89
fn init_validity(&mut self) {
90
let len = self.offsets.len() - 1;
91
let mut validity = BitmapBuilder::with_capacity(self.offsets.capacity());
92
if len > 0 {
93
validity.extend_constant(len - 1, true);
94
validity.push(false);
95
}
96
self.validity = Some(validity)
97
}
98
99
fn update_validity(&mut self) {
100
if let Some(validity) = &mut self.validity {
101
validity.push(true)
102
}
103
}
104
105
pub fn finish(self, inner_dtype: Option<&ArrowDataType>) -> PolarsResult<ListArray<i64>> {
106
// SAFETY:
107
// offsets are monotonically increasing
108
let offsets = unsafe { Offsets::new_unchecked(self.offsets) };
109
let (inner_dtype, values) = if self.arrays.is_empty() {
110
let len = *offsets.last() as usize;
111
match inner_dtype {
112
None => {
113
let values = NullArray::new(ArrowDataType::Null, len).boxed();
114
(ArrowDataType::Null, values)
115
},
116
Some(inner_dtype) => {
117
let values = new_null_array(inner_dtype.clone(), len);
118
(inner_dtype.clone(), values)
119
},
120
}
121
} else {
122
let inner_dtype = inner_dtype.unwrap_or_else(|| self.arrays[0].dtype());
123
124
// check if there is a dtype that is not `Null`
125
// if we find it, we will convert the null arrays
126
// to empty arrays of this dtype, otherwise the concat kernel fails.
127
let mut non_null_dtype = None;
128
if is_nested_null(inner_dtype) {
129
for arr in &self.arrays {
130
if !is_nested_null(arr.dtype()) {
131
non_null_dtype = Some(arr.dtype());
132
break;
133
}
134
}
135
};
136
137
// there are null arrays found, ensure the types are correct.
138
if let Some(dtype) = non_null_dtype {
139
let arrays = self
140
.arrays
141
.iter()
142
.map(|arr| {
143
if is_nested_null(arr.dtype()) {
144
convert_inner_type(&**arr, dtype)
145
} else {
146
arr.to_boxed()
147
}
148
})
149
.collect::<Vec<_>>();
150
151
let values = concatenate::concatenate_unchecked(&arrays)?;
152
(dtype.clone(), values)
153
} else {
154
let values = concatenate::concatenate(&self.arrays)?;
155
(inner_dtype.clone(), values)
156
}
157
};
158
let dtype = ListArray::<i64>::default_datatype(inner_dtype);
159
Ok(ListArray::<i64>::new(
160
dtype,
161
offsets.into(),
162
values,
163
self.validity
164
.and_then(|validity| validity.into_opt_validity()),
165
))
166
}
167
}
168
169