Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-arrow/src/array/list/builder.rs
6939 views
1
use polars_utils::IdxSize;
2
3
use super::ListArray;
4
use crate::array::builder::{ArrayBuilder, ShareStrategy, StaticArrayBuilder};
5
use crate::bitmap::OptBitmapBuilder;
6
use crate::datatypes::ArrowDataType;
7
use crate::offset::{Offset, Offsets, OffsetsBuffer};
8
9
pub struct ListArrayBuilder<O: Offset, B: ArrayBuilder> {
10
dtype: ArrowDataType,
11
offsets: Offsets<O>,
12
inner_builder: B,
13
validity: OptBitmapBuilder,
14
}
15
16
impl<O: Offset, B: ArrayBuilder> ListArrayBuilder<O, B> {
17
pub fn new(dtype: ArrowDataType, inner_builder: B) -> Self {
18
Self {
19
dtype,
20
inner_builder,
21
offsets: Offsets::new(),
22
validity: OptBitmapBuilder::default(),
23
}
24
}
25
}
26
27
impl<O: Offset, B: ArrayBuilder> StaticArrayBuilder for ListArrayBuilder<O, B> {
28
type Array = ListArray<O>;
29
30
fn dtype(&self) -> &ArrowDataType {
31
&self.dtype
32
}
33
34
fn reserve(&mut self, additional: usize) {
35
self.offsets.reserve(additional);
36
self.validity.reserve(additional);
37
// No inner reserve, we have no idea how large it needs to be.
38
}
39
40
fn freeze(self) -> ListArray<O> {
41
let offsets = OffsetsBuffer::from(self.offsets);
42
let values = self.inner_builder.freeze();
43
let validity = self.validity.into_opt_validity();
44
ListArray::new(self.dtype, offsets, values, validity)
45
}
46
47
fn freeze_reset(&mut self) -> Self::Array {
48
let offsets = OffsetsBuffer::from(core::mem::take(&mut self.offsets));
49
let values = self.inner_builder.freeze_reset();
50
let validity = core::mem::take(&mut self.validity).into_opt_validity();
51
ListArray::new(self.dtype.clone(), offsets, values, validity)
52
}
53
54
fn len(&self) -> usize {
55
self.offsets.len_proxy()
56
}
57
58
fn extend_nulls(&mut self, length: usize) {
59
self.offsets.extend_constant(length);
60
self.validity.extend_constant(length, false);
61
}
62
63
fn subslice_extend(
64
&mut self,
65
other: &ListArray<O>,
66
start: usize,
67
length: usize,
68
share: ShareStrategy,
69
) {
70
let start_offset = other.offsets()[start].to_usize();
71
let stop_offset = other.offsets()[start + length].to_usize();
72
self.offsets
73
.try_extend_from_slice(other.offsets(), start, length)
74
.unwrap();
75
self.inner_builder.subslice_extend(
76
&**other.values(),
77
start_offset,
78
stop_offset - start_offset,
79
share,
80
);
81
self.validity
82
.subslice_extend_from_opt_validity(other.validity(), start, length);
83
}
84
85
fn subslice_extend_each_repeated(
86
&mut self,
87
other: &ListArray<O>,
88
start: usize,
89
length: usize,
90
repeats: usize,
91
share: ShareStrategy,
92
) {
93
let other_offsets = other.offsets();
94
let other_values = &**other.values();
95
96
let start_offset = other.offsets()[start].to_usize();
97
let stop_offset = other.offsets()[start + length].to_usize();
98
self.offsets.reserve(length * repeats);
99
self.inner_builder
100
.reserve((stop_offset - start_offset) * repeats);
101
for offset_idx in start..start + length {
102
let sublist_start = other_offsets[offset_idx].to_usize();
103
let sublist_stop = other_offsets[offset_idx + 1].to_usize();
104
for _ in 0..repeats {
105
self.offsets.try_push(sublist_stop - sublist_start).unwrap();
106
}
107
self.inner_builder.subslice_extend_repeated(
108
other_values,
109
sublist_start,
110
sublist_stop - sublist_start,
111
repeats,
112
share,
113
);
114
}
115
self.validity
116
.subslice_extend_each_repeated_from_opt_validity(
117
other.validity(),
118
start,
119
length,
120
repeats,
121
);
122
}
123
124
unsafe fn gather_extend(
125
&mut self,
126
other: &ListArray<O>,
127
idxs: &[IdxSize],
128
share: ShareStrategy,
129
) {
130
let other_values = &**other.values();
131
let other_offsets = other.offsets();
132
133
// Pre-compute proper length for reserve.
134
let total_len: usize = idxs
135
.iter()
136
.map(|i| {
137
let start = other_offsets.get_unchecked(*i as usize).to_usize();
138
let stop = other_offsets.get_unchecked(*i as usize + 1).to_usize();
139
stop - start
140
})
141
.sum();
142
self.inner_builder.reserve(total_len);
143
144
// Group consecutive indices into larger copies.
145
let mut group_start = 0;
146
while group_start < idxs.len() {
147
let start_idx = idxs[group_start] as usize;
148
let mut group_len = 1;
149
while group_start + group_len < idxs.len()
150
&& idxs[group_start + group_len] as usize == start_idx + group_len
151
{
152
group_len += 1;
153
}
154
155
let start_offset = other_offsets.get_unchecked(start_idx).to_usize();
156
let stop_offset = other_offsets
157
.get_unchecked(start_idx + group_len)
158
.to_usize();
159
self.offsets
160
.try_extend_from_slice(other_offsets, start_idx, group_len)
161
.unwrap();
162
self.inner_builder.subslice_extend(
163
other_values,
164
start_offset,
165
stop_offset - start_offset,
166
share,
167
);
168
group_start += group_len;
169
}
170
171
self.validity
172
.gather_extend_from_opt_validity(other.validity(), idxs);
173
}
174
175
fn opt_gather_extend(&mut self, other: &ListArray<O>, idxs: &[IdxSize], share: ShareStrategy) {
176
let other_values = &**other.values();
177
let other_offsets = other.offsets();
178
179
unsafe {
180
// Pre-compute proper length for reserve.
181
let total_len: usize = idxs
182
.iter()
183
.map(|idx| {
184
if (*idx as usize) < other.len() {
185
let start = other_offsets.get_unchecked(*idx as usize).to_usize();
186
let stop = other_offsets.get_unchecked(*idx as usize + 1).to_usize();
187
stop - start
188
} else {
189
0
190
}
191
})
192
.sum();
193
self.inner_builder.reserve(total_len);
194
195
// Group consecutive indices into larger copies.
196
let mut group_start = 0;
197
while group_start < idxs.len() {
198
let start_idx = idxs[group_start] as usize;
199
let mut group_len = 1;
200
let in_bounds = start_idx < other.len();
201
202
if in_bounds {
203
while group_start + group_len < idxs.len()
204
&& idxs[group_start + group_len] as usize == start_idx + group_len
205
&& start_idx + group_len < other.len()
206
{
207
group_len += 1;
208
}
209
210
let start_offset = other_offsets.get_unchecked(start_idx).to_usize();
211
let stop_offset = other_offsets
212
.get_unchecked(start_idx + group_len)
213
.to_usize();
214
self.offsets
215
.try_extend_from_slice(other_offsets, start_idx, group_len)
216
.unwrap();
217
self.inner_builder.subslice_extend(
218
other_values,
219
start_offset,
220
stop_offset - start_offset,
221
share,
222
);
223
} else {
224
while group_start + group_len < idxs.len()
225
&& idxs[group_start + group_len] as usize >= other.len()
226
{
227
group_len += 1;
228
}
229
self.offsets.extend_constant(group_len);
230
}
231
group_start += group_len;
232
}
233
234
self.validity
235
.opt_gather_extend_from_opt_validity(other.validity(), idxs, other.len());
236
}
237
}
238
}
239
240