Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-arrow/src/array/builder.rs
6939 views
1
use polars_utils::IdxSize;
2
3
use crate::array::binary::BinaryArrayBuilder;
4
use crate::array::binview::BinaryViewArrayGenericBuilder;
5
use crate::array::boolean::BooleanArrayBuilder;
6
use crate::array::fixed_size_binary::FixedSizeBinaryArrayBuilder;
7
use crate::array::fixed_size_list::FixedSizeListArrayBuilder;
8
use crate::array::list::ListArrayBuilder;
9
use crate::array::null::NullArrayBuilder;
10
use crate::array::struct_::StructArrayBuilder;
11
use crate::array::{Array, PrimitiveArrayBuilder};
12
use crate::datatypes::{ArrowDataType, PhysicalType};
13
use crate::with_match_primitive_type_full;
14
15
/// Used for arrays which can share buffers with input arrays to appends,
16
/// gathers, etc.
17
#[derive(Copy, Clone, Debug)]
18
pub enum ShareStrategy {
19
Never,
20
Always,
21
}
22
23
pub trait StaticArrayBuilder: Send {
24
type Array: Array;
25
26
fn dtype(&self) -> &ArrowDataType;
27
fn reserve(&mut self, additional: usize);
28
29
/// Consume this builder returning the built array.
30
fn freeze(self) -> Self::Array;
31
32
/// Return the built array and reset to an empty state.
33
fn freeze_reset(&mut self) -> Self::Array;
34
35
/// Returns the length of this builder (so far).
36
fn len(&self) -> usize;
37
38
/// Extend this builder with the given number of null elements.
39
fn extend_nulls(&mut self, length: usize);
40
41
/// Extends this builder with the contents of the given array. May panic if
42
/// other does not match the dtype of this array.
43
fn extend(&mut self, other: &Self::Array, share: ShareStrategy) {
44
self.subslice_extend(other, 0, other.len(), share);
45
}
46
47
/// Extends this builder with the contents of the given array subslice. May
48
/// panic if other does not match the dtype of this array.
49
fn subslice_extend(
50
&mut self,
51
other: &Self::Array,
52
start: usize,
53
length: usize,
54
share: ShareStrategy,
55
);
56
57
/// The same as subslice_extend, but repeats the extension `repeats` times.
58
fn subslice_extend_repeated(
59
&mut self,
60
other: &Self::Array,
61
start: usize,
62
length: usize,
63
repeats: usize,
64
share: ShareStrategy,
65
) {
66
self.reserve(length * repeats);
67
for _ in 0..repeats {
68
self.subslice_extend(other, start, length, share)
69
}
70
}
71
72
/// The same as subslice_extend, but repeats each element `repeats` times.
73
fn subslice_extend_each_repeated(
74
&mut self,
75
other: &Self::Array,
76
start: usize,
77
length: usize,
78
repeats: usize,
79
share: ShareStrategy,
80
);
81
82
/// Extends this builder with the contents of the given array at the given
83
/// indices. That is, `other[idxs[i]]` is appended to this array in order,
84
/// for each i=0..idxs.len(). May panic if other does not match the
85
/// dtype of this array.
86
///
87
/// # Safety
88
/// The indices must be in-bounds.
89
unsafe fn gather_extend(&mut self, other: &Self::Array, idxs: &[IdxSize], share: ShareStrategy);
90
91
/// Extends this builder with the contents of the given array at the given
92
/// indices. That is, `other[idxs[i]]` is appended to this array in order,
93
/// for each i=0..idxs.len(). May panic if other does not match the
94
/// dtype of this array. Out-of-bounds indices are mapped to nulls.
95
fn opt_gather_extend(&mut self, other: &Self::Array, idxs: &[IdxSize], share: ShareStrategy);
96
}
97
98
impl<T: StaticArrayBuilder> ArrayBuilder for T {
99
#[inline(always)]
100
fn dtype(&self) -> &ArrowDataType {
101
StaticArrayBuilder::dtype(self)
102
}
103
104
#[inline(always)]
105
fn reserve(&mut self, additional: usize) {
106
StaticArrayBuilder::reserve(self, additional)
107
}
108
109
#[inline(always)]
110
fn freeze(self) -> Box<dyn Array> {
111
Box::new(StaticArrayBuilder::freeze(self))
112
}
113
114
#[inline(always)]
115
fn freeze_reset(&mut self) -> Box<dyn Array> {
116
Box::new(StaticArrayBuilder::freeze_reset(self))
117
}
118
119
#[inline(always)]
120
fn len(&self) -> usize {
121
StaticArrayBuilder::len(self)
122
}
123
124
#[inline(always)]
125
fn extend_nulls(&mut self, length: usize) {
126
StaticArrayBuilder::extend_nulls(self, length);
127
}
128
129
#[inline(always)]
130
fn subslice_extend(
131
&mut self,
132
other: &dyn Array,
133
start: usize,
134
length: usize,
135
share: ShareStrategy,
136
) {
137
let other: &T::Array = other.as_any().downcast_ref().unwrap();
138
StaticArrayBuilder::subslice_extend(self, other, start, length, share);
139
}
140
141
#[inline(always)]
142
fn subslice_extend_repeated(
143
&mut self,
144
other: &dyn Array,
145
start: usize,
146
length: usize,
147
repeats: usize,
148
share: ShareStrategy,
149
) {
150
let other: &T::Array = other.as_any().downcast_ref().unwrap();
151
StaticArrayBuilder::subslice_extend_repeated(self, other, start, length, repeats, share);
152
}
153
154
#[inline(always)]
155
fn subslice_extend_each_repeated(
156
&mut self,
157
other: &dyn Array,
158
start: usize,
159
length: usize,
160
repeats: usize,
161
share: ShareStrategy,
162
) {
163
let other: &T::Array = other.as_any().downcast_ref().unwrap();
164
StaticArrayBuilder::subslice_extend_each_repeated(
165
self, other, start, length, repeats, share,
166
);
167
}
168
169
#[inline(always)]
170
unsafe fn gather_extend(&mut self, other: &dyn Array, idxs: &[IdxSize], share: ShareStrategy) {
171
let other: &T::Array = other.as_any().downcast_ref().unwrap();
172
StaticArrayBuilder::gather_extend(self, other, idxs, share);
173
}
174
175
#[inline(always)]
176
fn opt_gather_extend(&mut self, other: &dyn Array, idxs: &[IdxSize], share: ShareStrategy) {
177
let other: &T::Array = other.as_any().downcast_ref().unwrap();
178
StaticArrayBuilder::opt_gather_extend(self, other, idxs, share);
179
}
180
}
181
182
#[allow(private_bounds)]
183
pub trait ArrayBuilder: ArrayBuilderBoxedHelper + Send {
184
fn dtype(&self) -> &ArrowDataType;
185
fn reserve(&mut self, additional: usize);
186
187
/// Consume this builder returning the built array.
188
fn freeze(self) -> Box<dyn Array>;
189
190
/// Return the built array and reset to an empty state.
191
fn freeze_reset(&mut self) -> Box<dyn Array>;
192
193
/// Returns the length of this builder (so far).
194
fn len(&self) -> usize;
195
196
/// Extend this builder with the given number of null elements.
197
fn extend_nulls(&mut self, length: usize);
198
199
/// Extends this builder with the contents of the given array. May panic if
200
/// other does not match the dtype of this array.
201
fn extend(&mut self, other: &dyn Array, share: ShareStrategy) {
202
self.subslice_extend(other, 0, other.len(), share);
203
}
204
205
/// Extends this builder with the contents of the given array subslice. May
206
/// panic if other does not match the dtype of this array.
207
fn subslice_extend(
208
&mut self,
209
other: &dyn Array,
210
start: usize,
211
length: usize,
212
share: ShareStrategy,
213
);
214
215
/// The same as subslice_extend, but repeats the extension `repeats` times.
216
fn subslice_extend_repeated(
217
&mut self,
218
other: &dyn Array,
219
start: usize,
220
length: usize,
221
repeats: usize,
222
share: ShareStrategy,
223
);
224
225
/// The same as subslice_extend, but repeats each element `repeats` times.
226
fn subslice_extend_each_repeated(
227
&mut self,
228
other: &dyn Array,
229
start: usize,
230
length: usize,
231
repeats: usize,
232
share: ShareStrategy,
233
);
234
235
/// Extends this builder with the contents of the given array at the given
236
/// indices. That is, `other[idxs[i]]` is appended to this array in order,
237
/// for each i=0..idxs.len(). May panic if other does not match the
238
/// dtype of this array.
239
///
240
/// # Safety
241
/// The indices must be in-bounds.
242
unsafe fn gather_extend(&mut self, other: &dyn Array, idxs: &[IdxSize], share: ShareStrategy);
243
244
/// Extends this builder with the contents of the given array at the given
245
/// indices. That is, `other[idxs[i]]` is appended to this array in order,
246
/// for each i=0..idxs.len(). May panic if other does not match the
247
/// dtype of this array. Out-of-bounds indices are mapped to nulls.
248
fn opt_gather_extend(&mut self, other: &dyn Array, idxs: &[IdxSize], share: ShareStrategy);
249
}
250
251
/// A hack that lets us call the consuming `freeze` method on Box<dyn ArrayBuilder>.
252
trait ArrayBuilderBoxedHelper {
253
fn freeze_boxed(self: Box<Self>) -> Box<dyn Array>;
254
}
255
256
impl<T: ArrayBuilder> ArrayBuilderBoxedHelper for T {
257
fn freeze_boxed(self: Box<Self>) -> Box<dyn Array> {
258
self.freeze()
259
}
260
}
261
262
impl ArrayBuilder for Box<dyn ArrayBuilder> {
263
#[inline(always)]
264
fn dtype(&self) -> &ArrowDataType {
265
(**self).dtype()
266
}
267
268
#[inline(always)]
269
fn reserve(&mut self, additional: usize) {
270
(**self).reserve(additional)
271
}
272
273
#[inline(always)]
274
fn freeze(self) -> Box<dyn Array> {
275
self.freeze_boxed()
276
}
277
278
#[inline(always)]
279
fn freeze_reset(&mut self) -> Box<dyn Array> {
280
(**self).freeze_reset()
281
}
282
283
#[inline(always)]
284
fn len(&self) -> usize {
285
(**self).len()
286
}
287
288
#[inline(always)]
289
fn extend_nulls(&mut self, length: usize) {
290
(**self).extend_nulls(length);
291
}
292
293
#[inline(always)]
294
fn subslice_extend(
295
&mut self,
296
other: &dyn Array,
297
start: usize,
298
length: usize,
299
share: ShareStrategy,
300
) {
301
(**self).subslice_extend(other, start, length, share);
302
}
303
304
#[inline(always)]
305
fn subslice_extend_repeated(
306
&mut self,
307
other: &dyn Array,
308
start: usize,
309
length: usize,
310
repeats: usize,
311
share: ShareStrategy,
312
) {
313
(**self).subslice_extend_repeated(other, start, length, repeats, share);
314
}
315
316
#[inline(always)]
317
fn subslice_extend_each_repeated(
318
&mut self,
319
other: &dyn Array,
320
start: usize,
321
length: usize,
322
repeats: usize,
323
share: ShareStrategy,
324
) {
325
(**self).subslice_extend_each_repeated(other, start, length, repeats, share);
326
}
327
328
#[inline(always)]
329
unsafe fn gather_extend(&mut self, other: &dyn Array, idxs: &[IdxSize], share: ShareStrategy) {
330
(**self).gather_extend(other, idxs, share);
331
}
332
333
#[inline(always)]
334
fn opt_gather_extend(&mut self, other: &dyn Array, idxs: &[IdxSize], share: ShareStrategy) {
335
(**self).opt_gather_extend(other, idxs, share);
336
}
337
}
338
339
/// Construct an ArrayBuilder for the given type.
340
pub fn make_builder(dtype: &ArrowDataType) -> Box<dyn ArrayBuilder> {
341
use PhysicalType::*;
342
match dtype.to_physical_type() {
343
Null => Box::new(NullArrayBuilder::new(dtype.clone())),
344
Boolean => Box::new(BooleanArrayBuilder::new(dtype.clone())),
345
Primitive(prim_t) => with_match_primitive_type_full!(prim_t, |$T| {
346
Box::new(PrimitiveArrayBuilder::<$T>::new(dtype.clone()))
347
}),
348
LargeBinary => Box::new(BinaryArrayBuilder::<i64>::new(dtype.clone())),
349
FixedSizeBinary => Box::new(FixedSizeBinaryArrayBuilder::new(dtype.clone())),
350
LargeList => {
351
let ArrowDataType::LargeList(inner_dt) = dtype else {
352
unreachable!()
353
};
354
Box::new(ListArrayBuilder::<i64, _>::new(
355
dtype.clone(),
356
make_builder(inner_dt.dtype()),
357
))
358
},
359
FixedSizeList => {
360
let ArrowDataType::FixedSizeList(inner_dt, _) = dtype else {
361
unreachable!()
362
};
363
Box::new(FixedSizeListArrayBuilder::new(
364
dtype.clone(),
365
make_builder(inner_dt.dtype()),
366
))
367
},
368
Struct => {
369
let ArrowDataType::Struct(fields) = dtype else {
370
unreachable!()
371
};
372
let builders = fields.iter().map(|f| make_builder(f.dtype())).collect();
373
Box::new(StructArrayBuilder::new(dtype.clone(), builders))
374
},
375
BinaryView => Box::new(BinaryViewArrayGenericBuilder::<[u8]>::new(dtype.clone())),
376
Utf8View => Box::new(BinaryViewArrayGenericBuilder::<str>::new(dtype.clone())),
377
378
List | Binary | Utf8 | LargeUtf8 | Map | Union | Dictionary(_) => {
379
unimplemented!()
380
},
381
}
382
}
383
384