Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-arrow/src/array/static_array_collect.rs
6939 views
1
use std::borrow::Cow;
2
3
use polars_utils::no_call_const;
4
5
use crate::array::static_array::{ParameterFreeDtypeStaticArray, StaticArray};
6
use crate::array::{
7
Array, BinaryArray, BinaryViewArray, BooleanArray, FixedSizeListArray, ListArray,
8
MutableBinaryArray, MutableBinaryValuesArray, MutableBinaryViewArray, PrimitiveArray,
9
StructArray, Utf8Array, Utf8ViewArray,
10
};
11
use crate::bitmap::BitmapBuilder;
12
use crate::datatypes::ArrowDataType;
13
#[cfg(feature = "dtype-array")]
14
use crate::legacy::prelude::fixed_size_list::AnonymousBuilder as AnonymousFixedSizeListArrayBuilder;
15
use crate::legacy::prelude::list::AnonymousBuilder as AnonymousListArrayBuilder;
16
use crate::legacy::trusted_len::TrustedLenPush;
17
use crate::trusted_len::TrustedLen;
18
use crate::types::NativeType;
19
20
pub trait ArrayFromIterDtype<T>: Sized {
21
fn arr_from_iter_with_dtype<I: IntoIterator<Item = T>>(dtype: ArrowDataType, iter: I) -> Self;
22
23
#[inline(always)]
24
fn arr_from_iter_trusted_with_dtype<I>(dtype: ArrowDataType, iter: I) -> Self
25
where
26
I: IntoIterator<Item = T>,
27
I::IntoIter: TrustedLen,
28
{
29
Self::arr_from_iter_with_dtype(dtype, iter)
30
}
31
32
fn try_arr_from_iter_with_dtype<E, I: IntoIterator<Item = Result<T, E>>>(
33
dtype: ArrowDataType,
34
iter: I,
35
) -> Result<Self, E>;
36
37
#[inline(always)]
38
fn try_arr_from_iter_trusted_with_dtype<E, I>(dtype: ArrowDataType, iter: I) -> Result<Self, E>
39
where
40
I: IntoIterator<Item = Result<T, E>>,
41
I::IntoIter: TrustedLen,
42
{
43
Self::try_arr_from_iter_with_dtype(dtype, iter)
44
}
45
}
46
47
pub trait ArrayFromIter<T>: Sized {
48
fn arr_from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self;
49
50
#[inline(always)]
51
fn arr_from_iter_trusted<I>(iter: I) -> Self
52
where
53
I: IntoIterator<Item = T>,
54
I::IntoIter: TrustedLen,
55
{
56
Self::arr_from_iter(iter)
57
}
58
59
fn try_arr_from_iter<E, I: IntoIterator<Item = Result<T, E>>>(iter: I) -> Result<Self, E>;
60
61
#[inline(always)]
62
fn try_arr_from_iter_trusted<E, I>(iter: I) -> Result<Self, E>
63
where
64
I: IntoIterator<Item = Result<T, E>>,
65
I::IntoIter: TrustedLen,
66
{
67
Self::try_arr_from_iter(iter)
68
}
69
}
70
71
impl<T, A: ParameterFreeDtypeStaticArray + ArrayFromIter<T>> ArrayFromIterDtype<T> for A {
72
#[inline(always)]
73
fn arr_from_iter_with_dtype<I: IntoIterator<Item = T>>(dtype: ArrowDataType, iter: I) -> Self {
74
// FIXME: currently some Object arrays have Unknown dtype, when this is fixed remove this bypass.
75
if dtype != ArrowDataType::Unknown {
76
debug_assert_eq!(
77
std::mem::discriminant(&dtype),
78
std::mem::discriminant(&A::get_dtype())
79
);
80
}
81
Self::arr_from_iter(iter)
82
}
83
84
#[inline(always)]
85
fn arr_from_iter_trusted_with_dtype<I>(dtype: ArrowDataType, iter: I) -> Self
86
where
87
I: IntoIterator<Item = T>,
88
I::IntoIter: TrustedLen,
89
{
90
// FIXME: currently some Object arrays have Unknown dtype, when this is fixed remove this bypass.
91
if dtype != ArrowDataType::Unknown {
92
debug_assert_eq!(
93
std::mem::discriminant(&dtype),
94
std::mem::discriminant(&A::get_dtype())
95
);
96
}
97
Self::arr_from_iter_trusted(iter)
98
}
99
100
#[inline(always)]
101
fn try_arr_from_iter_with_dtype<E, I: IntoIterator<Item = Result<T, E>>>(
102
dtype: ArrowDataType,
103
iter: I,
104
) -> Result<Self, E> {
105
// FIXME: currently some Object arrays have Unknown dtype, when this is fixed remove this bypass.
106
if dtype != ArrowDataType::Unknown {
107
debug_assert_eq!(
108
std::mem::discriminant(&dtype),
109
std::mem::discriminant(&A::get_dtype())
110
);
111
}
112
Self::try_arr_from_iter(iter)
113
}
114
115
#[inline(always)]
116
fn try_arr_from_iter_trusted_with_dtype<E, I>(dtype: ArrowDataType, iter: I) -> Result<Self, E>
117
where
118
I: IntoIterator<Item = Result<T, E>>,
119
I::IntoIter: TrustedLen,
120
{
121
// FIXME: currently some Object arrays have Unknown dtype, when this is fixed remove this bypass.
122
if dtype != ArrowDataType::Unknown {
123
debug_assert_eq!(
124
std::mem::discriminant(&dtype),
125
std::mem::discriminant(&A::get_dtype())
126
);
127
}
128
Self::try_arr_from_iter_trusted(iter)
129
}
130
}
131
132
pub trait ArrayCollectIterExt<A: StaticArray>: Iterator + Sized {
133
#[inline(always)]
134
fn collect_arr(self) -> A
135
where
136
A: ArrayFromIter<Self::Item>,
137
{
138
A::arr_from_iter(self)
139
}
140
141
#[inline(always)]
142
fn collect_arr_trusted(self) -> A
143
where
144
A: ArrayFromIter<Self::Item>,
145
Self: TrustedLen,
146
{
147
A::arr_from_iter_trusted(self)
148
}
149
150
#[inline(always)]
151
fn try_collect_arr<U, E>(self) -> Result<A, E>
152
where
153
A: ArrayFromIter<U>,
154
Self: Iterator<Item = Result<U, E>>,
155
{
156
A::try_arr_from_iter(self)
157
}
158
159
#[inline(always)]
160
fn try_collect_arr_trusted<U, E>(self) -> Result<A, E>
161
where
162
A: ArrayFromIter<U>,
163
Self: Iterator<Item = Result<U, E>> + TrustedLen,
164
{
165
A::try_arr_from_iter_trusted(self)
166
}
167
168
#[inline(always)]
169
fn collect_arr_with_dtype(self, dtype: ArrowDataType) -> A
170
where
171
A: ArrayFromIterDtype<Self::Item>,
172
{
173
A::arr_from_iter_with_dtype(dtype, self)
174
}
175
176
#[inline(always)]
177
fn collect_arr_trusted_with_dtype(self, dtype: ArrowDataType) -> A
178
where
179
A: ArrayFromIterDtype<Self::Item>,
180
Self: TrustedLen,
181
{
182
A::arr_from_iter_trusted_with_dtype(dtype, self)
183
}
184
185
#[inline(always)]
186
fn try_collect_arr_with_dtype<U, E>(self, dtype: ArrowDataType) -> Result<A, E>
187
where
188
A: ArrayFromIterDtype<U>,
189
Self: Iterator<Item = Result<U, E>>,
190
{
191
A::try_arr_from_iter_with_dtype(dtype, self)
192
}
193
194
#[inline(always)]
195
fn try_collect_arr_trusted_with_dtype<U, E>(self, dtype: ArrowDataType) -> Result<A, E>
196
where
197
A: ArrayFromIterDtype<U>,
198
Self: Iterator<Item = Result<U, E>> + TrustedLen,
199
{
200
A::try_arr_from_iter_trusted_with_dtype(dtype, self)
201
}
202
}
203
204
impl<A: StaticArray, I: Iterator> ArrayCollectIterExt<A> for I {}
205
206
// ---------------
207
// Implementations
208
// ---------------
209
210
impl<T: NativeType> ArrayFromIter<T> for PrimitiveArray<T> {
211
#[inline]
212
fn arr_from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
213
PrimitiveArray::from_vec(iter.into_iter().collect())
214
}
215
216
#[inline]
217
fn arr_from_iter_trusted<I>(iter: I) -> Self
218
where
219
I: IntoIterator<Item = T>,
220
I::IntoIter: TrustedLen,
221
{
222
PrimitiveArray::from_vec(Vec::from_trusted_len_iter(iter))
223
}
224
225
#[inline]
226
fn try_arr_from_iter<E, I: IntoIterator<Item = Result<T, E>>>(iter: I) -> Result<Self, E> {
227
let v: Result<Vec<T>, E> = iter.into_iter().collect();
228
Ok(PrimitiveArray::from_vec(v?))
229
}
230
231
#[inline]
232
fn try_arr_from_iter_trusted<E, I>(iter: I) -> Result<Self, E>
233
where
234
I: IntoIterator<Item = Result<T, E>>,
235
I::IntoIter: TrustedLen,
236
{
237
let v = Vec::try_from_trusted_len_iter(iter);
238
Ok(PrimitiveArray::from_vec(v?))
239
}
240
}
241
242
impl<T: NativeType> ArrayFromIter<Option<T>> for PrimitiveArray<T> {
243
fn arr_from_iter<I: IntoIterator<Item = Option<T>>>(iter: I) -> Self {
244
let iter = iter.into_iter();
245
let n = iter.size_hint().0;
246
let mut buf = Vec::with_capacity(n);
247
let mut validity = BitmapBuilder::with_capacity(n);
248
unsafe {
249
for val in iter {
250
// Use one check for both capacities.
251
if buf.len() == buf.capacity() {
252
buf.reserve(1);
253
validity.reserve(buf.capacity() - buf.len());
254
}
255
buf.push_unchecked(val.unwrap_or_default());
256
validity.push_unchecked(val.is_some());
257
}
258
}
259
PrimitiveArray::new(
260
T::PRIMITIVE.into(),
261
buf.into(),
262
validity.into_opt_validity(),
263
)
264
}
265
266
fn arr_from_iter_trusted<I>(iter: I) -> Self
267
where
268
I: IntoIterator<Item = Option<T>>,
269
I::IntoIter: TrustedLen,
270
{
271
let iter = iter.into_iter();
272
let n = iter.size_hint().1.expect("must have an upper bound");
273
let mut buf = Vec::with_capacity(n);
274
let mut validity = BitmapBuilder::with_capacity(n);
275
unsafe {
276
for val in iter {
277
buf.push_unchecked(val.unwrap_or_default());
278
validity.push_unchecked(val.is_some());
279
}
280
}
281
PrimitiveArray::new(
282
T::PRIMITIVE.into(),
283
buf.into(),
284
validity.into_opt_validity(),
285
)
286
}
287
288
fn try_arr_from_iter<E, I: IntoIterator<Item = Result<Option<T>, E>>>(
289
iter: I,
290
) -> Result<Self, E> {
291
let iter = iter.into_iter();
292
let n = iter.size_hint().0;
293
let mut buf = Vec::with_capacity(n);
294
let mut validity = BitmapBuilder::with_capacity(n);
295
unsafe {
296
for val in iter {
297
let val = val?;
298
// Use one check for both capacities.
299
if buf.len() == buf.capacity() {
300
buf.reserve(1);
301
validity.reserve(buf.capacity() - buf.len());
302
}
303
buf.push_unchecked(val.unwrap_or_default());
304
validity.push_unchecked(val.is_some());
305
}
306
}
307
Ok(PrimitiveArray::new(
308
T::PRIMITIVE.into(),
309
buf.into(),
310
validity.into_opt_validity(),
311
))
312
}
313
314
fn try_arr_from_iter_trusted<E, I>(iter: I) -> Result<Self, E>
315
where
316
I: IntoIterator<Item = Result<Option<T>, E>>,
317
I::IntoIter: TrustedLen,
318
{
319
let iter = iter.into_iter();
320
let n = iter.size_hint().1.expect("must have an upper bound");
321
let mut buf = Vec::with_capacity(n);
322
let mut validity = BitmapBuilder::with_capacity(n);
323
unsafe {
324
for val in iter {
325
let val = val?;
326
buf.push_unchecked(val.unwrap_or_default());
327
validity.push_unchecked(val.is_some());
328
}
329
}
330
Ok(PrimitiveArray::new(
331
T::PRIMITIVE.into(),
332
buf.into(),
333
validity.into_opt_validity(),
334
))
335
}
336
}
337
338
// We don't use AsRef here because it leads to problems with conflicting implementations,
339
// as Rust considers that AsRef<[u8]> for Option<&[u8]> could be implemented.
340
trait IntoBytes {
341
type AsRefT: AsRef<[u8]>;
342
fn into_bytes(self) -> Self::AsRefT;
343
}
344
trait TrivialIntoBytes: AsRef<[u8]> {}
345
impl<T: TrivialIntoBytes> IntoBytes for T {
346
type AsRefT = Self;
347
fn into_bytes(self) -> Self {
348
self
349
}
350
}
351
impl TrivialIntoBytes for Vec<u8> {}
352
impl TrivialIntoBytes for Cow<'_, [u8]> {}
353
impl TrivialIntoBytes for &[u8] {}
354
impl TrivialIntoBytes for String {}
355
impl TrivialIntoBytes for &str {}
356
impl<'a> IntoBytes for Cow<'a, str> {
357
type AsRefT = Cow<'a, [u8]>;
358
fn into_bytes(self) -> Cow<'a, [u8]> {
359
match self {
360
Cow::Borrowed(a) => Cow::Borrowed(a.as_bytes()),
361
Cow::Owned(s) => Cow::Owned(s.into_bytes()),
362
}
363
}
364
}
365
366
impl<T: IntoBytes> ArrayFromIter<T> for BinaryArray<i64> {
367
fn arr_from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
368
BinaryArray::from_iter_values(iter.into_iter().map(|s| s.into_bytes()))
369
}
370
371
fn arr_from_iter_trusted<I>(iter: I) -> Self
372
where
373
I: IntoIterator<Item = T>,
374
I::IntoIter: TrustedLen,
375
{
376
unsafe {
377
// SAFETY: our iterator is TrustedLen.
378
MutableBinaryArray::from_trusted_len_values_iter_unchecked(
379
iter.into_iter().map(|s| s.into_bytes()),
380
)
381
.into()
382
}
383
}
384
385
fn try_arr_from_iter<E, I: IntoIterator<Item = Result<T, E>>>(iter: I) -> Result<Self, E> {
386
// No built-in for this?
387
let mut arr = MutableBinaryValuesArray::new();
388
let mut iter = iter.into_iter();
389
arr.reserve(iter.size_hint().0, 0);
390
iter.try_for_each(|x| -> Result<(), E> {
391
arr.push(x?.into_bytes());
392
Ok(())
393
})?;
394
Ok(arr.into())
395
}
396
397
// No faster implementation than this available, fall back to default.
398
// fn try_arr_from_iter_trusted<E, I>(iter: I) -> Result<Self, E>
399
}
400
401
impl<T: IntoBytes> ArrayFromIter<Option<T>> for BinaryArray<i64> {
402
#[inline]
403
fn arr_from_iter<I: IntoIterator<Item = Option<T>>>(iter: I) -> Self {
404
BinaryArray::from_iter(iter.into_iter().map(|s| Some(s?.into_bytes())))
405
}
406
407
#[inline]
408
fn arr_from_iter_trusted<I>(iter: I) -> Self
409
where
410
I: IntoIterator<Item = Option<T>>,
411
I::IntoIter: TrustedLen,
412
{
413
unsafe {
414
// SAFETY: the iterator is TrustedLen.
415
BinaryArray::from_trusted_len_iter_unchecked(
416
iter.into_iter().map(|s| Some(s?.into_bytes())),
417
)
418
}
419
}
420
421
fn try_arr_from_iter<E, I: IntoIterator<Item = Result<Option<T>, E>>>(
422
iter: I,
423
) -> Result<Self, E> {
424
// No built-in for this?
425
let mut arr = MutableBinaryArray::new();
426
let mut iter = iter.into_iter();
427
arr.reserve(iter.size_hint().0, 0);
428
iter.try_for_each(|x| -> Result<(), E> {
429
arr.push(x?.map(|s| s.into_bytes()));
430
Ok(())
431
})?;
432
Ok(arr.into())
433
}
434
435
fn try_arr_from_iter_trusted<E, I>(iter: I) -> Result<Self, E>
436
where
437
I: IntoIterator<Item = Result<Option<T>, E>>,
438
I::IntoIter: TrustedLen,
439
{
440
unsafe {
441
// SAFETY: the iterator is TrustedLen.
442
BinaryArray::try_from_trusted_len_iter_unchecked(
443
iter.into_iter().map(|s| s.map(|s| Some(s?.into_bytes()))),
444
)
445
}
446
}
447
}
448
449
impl<T: IntoBytes> ArrayFromIter<T> for BinaryViewArray {
450
#[inline]
451
fn arr_from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
452
MutableBinaryViewArray::from_values_iter(iter.into_iter().map(|a| a.into_bytes())).into()
453
}
454
455
#[inline]
456
fn arr_from_iter_trusted<I>(iter: I) -> Self
457
where
458
I: IntoIterator<Item = T>,
459
I::IntoIter: TrustedLen,
460
{
461
Self::arr_from_iter(iter)
462
}
463
464
fn try_arr_from_iter<E, I: IntoIterator<Item = Result<T, E>>>(iter: I) -> Result<Self, E> {
465
let mut iter = iter.into_iter();
466
let mut arr = MutableBinaryViewArray::with_capacity(iter.size_hint().0);
467
iter.try_for_each(|x| -> Result<(), E> {
468
arr.push_value_ignore_validity(x?.into_bytes());
469
Ok(())
470
})?;
471
Ok(arr.into())
472
}
473
474
// No faster implementation than this available, fall back to default.
475
// fn try_arr_from_iter_trusted<E, I>(iter: I) -> Result<Self, E>
476
}
477
478
impl<T: IntoBytes> ArrayFromIter<Option<T>> for BinaryViewArray {
479
#[inline]
480
fn arr_from_iter<I: IntoIterator<Item = Option<T>>>(iter: I) -> Self {
481
MutableBinaryViewArray::from_iter(
482
iter.into_iter().map(|opt_a| opt_a.map(|a| a.into_bytes())),
483
)
484
.into()
485
}
486
487
#[inline]
488
fn arr_from_iter_trusted<I>(iter: I) -> Self
489
where
490
I: IntoIterator<Item = Option<T>>,
491
I::IntoIter: TrustedLen,
492
{
493
Self::arr_from_iter(iter)
494
}
495
496
fn try_arr_from_iter<E, I: IntoIterator<Item = Result<Option<T>, E>>>(
497
iter: I,
498
) -> Result<Self, E> {
499
let mut iter = iter.into_iter();
500
let mut arr = MutableBinaryViewArray::with_capacity(iter.size_hint().0);
501
iter.try_for_each(|x| -> Result<(), E> {
502
let x = x?;
503
arr.push(x.map(|x| x.into_bytes()));
504
Ok(())
505
})?;
506
Ok(arr.into())
507
}
508
509
// No faster implementation than this available, fall back to default.
510
// fn try_arr_from_iter_trusted<E, I>(iter: I) -> Result<Self, E>
511
}
512
513
/// We use this to reuse the binary collect implementation for strings.
514
/// # Safety
515
/// The array must be valid UTF-8.
516
unsafe fn into_utf8array(arr: BinaryArray<i64>) -> Utf8Array<i64> {
517
unsafe {
518
let (_dt, offsets, values, validity) = arr.into_inner();
519
Utf8Array::new_unchecked(ArrowDataType::LargeUtf8, offsets, values, validity)
520
}
521
}
522
523
trait StrIntoBytes: IntoBytes {}
524
impl StrIntoBytes for String {}
525
impl StrIntoBytes for &str {}
526
impl StrIntoBytes for Cow<'_, str> {}
527
528
impl<T: StrIntoBytes> ArrayFromIter<T> for Utf8ViewArray {
529
#[inline]
530
fn arr_from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
531
unsafe { BinaryViewArray::arr_from_iter(iter).to_utf8view_unchecked() }
532
}
533
534
#[inline]
535
fn arr_from_iter_trusted<I>(iter: I) -> Self
536
where
537
I: IntoIterator<Item = T>,
538
I::IntoIter: TrustedLen,
539
{
540
Self::arr_from_iter(iter)
541
}
542
543
fn try_arr_from_iter<E, I: IntoIterator<Item = Result<T, E>>>(iter: I) -> Result<Self, E> {
544
unsafe { BinaryViewArray::try_arr_from_iter(iter).map(|arr| arr.to_utf8view_unchecked()) }
545
}
546
547
// No faster implementation than this available, fall back to default.
548
// fn try_arr_from_iter_trusted<E, I>(iter: I) -> Result<Self, E>
549
}
550
551
impl<T: StrIntoBytes> ArrayFromIter<Option<T>> for Utf8ViewArray {
552
#[inline]
553
fn arr_from_iter<I: IntoIterator<Item = Option<T>>>(iter: I) -> Self {
554
unsafe { BinaryViewArray::arr_from_iter(iter).to_utf8view_unchecked() }
555
}
556
557
#[inline]
558
fn arr_from_iter_trusted<I>(iter: I) -> Self
559
where
560
I: IntoIterator<Item = Option<T>>,
561
I::IntoIter: TrustedLen,
562
{
563
Self::arr_from_iter(iter)
564
}
565
566
fn try_arr_from_iter<E, I: IntoIterator<Item = Result<Option<T>, E>>>(
567
iter: I,
568
) -> Result<Self, E> {
569
unsafe { BinaryViewArray::try_arr_from_iter(iter).map(|arr| arr.to_utf8view_unchecked()) }
570
}
571
572
// No faster implementation than this available, fall back to default.
573
// fn try_arr_from_iter_trusted<E, I>(iter: I) -> Result<Self, E>
574
}
575
576
impl<T: StrIntoBytes> ArrayFromIter<T> for Utf8Array<i64> {
577
#[inline(always)]
578
fn arr_from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
579
unsafe { into_utf8array(iter.into_iter().collect_arr()) }
580
}
581
582
#[inline(always)]
583
fn arr_from_iter_trusted<I>(iter: I) -> Self
584
where
585
I: IntoIterator<Item = T>,
586
I::IntoIter: TrustedLen,
587
{
588
unsafe { into_utf8array(iter.into_iter().collect_arr()) }
589
}
590
591
#[inline(always)]
592
fn try_arr_from_iter<E, I: IntoIterator<Item = Result<T, E>>>(iter: I) -> Result<Self, E> {
593
let arr = iter.into_iter().try_collect_arr()?;
594
unsafe { Ok(into_utf8array(arr)) }
595
}
596
597
#[inline(always)]
598
fn try_arr_from_iter_trusted<E, I: IntoIterator<Item = Result<T, E>>>(
599
iter: I,
600
) -> Result<Self, E> {
601
let arr = iter.into_iter().try_collect_arr()?;
602
unsafe { Ok(into_utf8array(arr)) }
603
}
604
}
605
606
impl<T: StrIntoBytes> ArrayFromIter<Option<T>> for Utf8Array<i64> {
607
#[inline(always)]
608
fn arr_from_iter<I: IntoIterator<Item = Option<T>>>(iter: I) -> Self {
609
unsafe { into_utf8array(iter.into_iter().collect_arr()) }
610
}
611
612
#[inline(always)]
613
fn arr_from_iter_trusted<I>(iter: I) -> Self
614
where
615
I: IntoIterator<Item = Option<T>>,
616
I::IntoIter: TrustedLen,
617
{
618
unsafe { into_utf8array(iter.into_iter().collect_arr()) }
619
}
620
621
#[inline(always)]
622
fn try_arr_from_iter<E, I: IntoIterator<Item = Result<Option<T>, E>>>(
623
iter: I,
624
) -> Result<Self, E> {
625
let arr = iter.into_iter().try_collect_arr()?;
626
unsafe { Ok(into_utf8array(arr)) }
627
}
628
629
#[inline(always)]
630
fn try_arr_from_iter_trusted<E, I: IntoIterator<Item = Result<Option<T>, E>>>(
631
iter: I,
632
) -> Result<Self, E> {
633
let arr = iter.into_iter().try_collect_arr()?;
634
unsafe { Ok(into_utf8array(arr)) }
635
}
636
}
637
638
impl ArrayFromIter<bool> for BooleanArray {
639
fn arr_from_iter<I: IntoIterator<Item = bool>>(iter: I) -> Self {
640
let iter = iter.into_iter();
641
let n = iter.size_hint().0;
642
let mut values = BitmapBuilder::with_capacity(n);
643
for val in iter {
644
values.push(val);
645
}
646
BooleanArray::new(ArrowDataType::Boolean, values.freeze(), None)
647
}
648
649
// TODO: are efficient trusted collects for booleans worth it?
650
// fn arr_from_iter_trusted<I>(iter: I) -> Self
651
652
fn try_arr_from_iter<E, I: IntoIterator<Item = Result<bool, E>>>(iter: I) -> Result<Self, E> {
653
let iter = iter.into_iter();
654
let n = iter.size_hint().0;
655
let mut values = BitmapBuilder::with_capacity(n);
656
for val in iter {
657
values.push(val?);
658
}
659
Ok(BooleanArray::new(
660
ArrowDataType::Boolean,
661
values.freeze(),
662
None,
663
))
664
}
665
666
// fn try_arr_from_iter_trusted<E, I: IntoIterator<Item = Result<bool, E>>>(
667
}
668
669
impl ArrayFromIter<Option<bool>> for BooleanArray {
670
fn arr_from_iter<I: IntoIterator<Item = Option<bool>>>(iter: I) -> Self {
671
let iter = iter.into_iter();
672
let n = iter.size_hint().0;
673
let mut values = BitmapBuilder::with_capacity(n);
674
let mut validity = BitmapBuilder::with_capacity(n);
675
for val in iter {
676
values.push(val.unwrap_or(false));
677
validity.push(val.is_some());
678
}
679
BooleanArray::new(
680
ArrowDataType::Boolean,
681
values.freeze(),
682
validity.into_opt_validity(),
683
)
684
}
685
686
// fn arr_from_iter_trusted<I>(iter: I) -> Self
687
688
fn try_arr_from_iter<E, I: IntoIterator<Item = Result<Option<bool>, E>>>(
689
iter: I,
690
) -> Result<Self, E> {
691
let iter = iter.into_iter();
692
let n = iter.size_hint().0;
693
let mut values = BitmapBuilder::with_capacity(n);
694
let mut validity = BitmapBuilder::with_capacity(n);
695
for val in iter {
696
let val = val?;
697
values.push(val.unwrap_or(false));
698
validity.push(val.is_some());
699
}
700
Ok(BooleanArray::new(
701
ArrowDataType::Boolean,
702
values.freeze(),
703
validity.into_opt_validity(),
704
))
705
}
706
707
// fn try_arr_from_iter_trusted<E, I: IntoIterator<Item = Result<Option<bool>, E>>>(
708
}
709
710
// We don't use AsRef here because it leads to problems with conflicting implementations,
711
// as Rust considers that AsRef<dyn Array> for Option<&dyn Array> could be implemented.
712
trait AsArray {
713
fn as_array(&self) -> &dyn Array;
714
#[cfg(feature = "dtype-array")]
715
fn into_boxed_array(self) -> Box<dyn Array>; // Prevents unnecessary re-boxing.
716
}
717
impl AsArray for Box<dyn Array> {
718
fn as_array(&self) -> &dyn Array {
719
self.as_ref()
720
}
721
#[cfg(feature = "dtype-array")]
722
fn into_boxed_array(self) -> Box<dyn Array> {
723
self
724
}
725
}
726
impl<'a> AsArray for &'a dyn Array {
727
fn as_array(&self) -> &'a dyn Array {
728
*self
729
}
730
#[cfg(feature = "dtype-array")]
731
fn into_boxed_array(self) -> Box<dyn Array> {
732
self.to_boxed()
733
}
734
}
735
736
// TODO: more efficient (fixed size) list collect routines.
737
impl<T: AsArray> ArrayFromIterDtype<T> for ListArray<i64> {
738
fn arr_from_iter_with_dtype<I: IntoIterator<Item = T>>(dtype: ArrowDataType, iter: I) -> Self {
739
let iter_values: Vec<T> = iter.into_iter().collect();
740
let mut builder = AnonymousListArrayBuilder::new(iter_values.len());
741
for arr in &iter_values {
742
builder.push(arr.as_array());
743
}
744
let inner = dtype
745
.inner_dtype()
746
.expect("expected nested type in ListArray collect");
747
builder
748
.finish(Some(&inner.underlying_physical_type()))
749
.unwrap()
750
}
751
752
fn try_arr_from_iter_with_dtype<E, I: IntoIterator<Item = Result<T, E>>>(
753
dtype: ArrowDataType,
754
iter: I,
755
) -> Result<Self, E> {
756
let iter_values = iter.into_iter().collect::<Result<Vec<_>, E>>()?;
757
Ok(Self::arr_from_iter_with_dtype(dtype, iter_values))
758
}
759
}
760
761
impl<T: AsArray> ArrayFromIterDtype<Option<T>> for ListArray<i64> {
762
fn arr_from_iter_with_dtype<I: IntoIterator<Item = Option<T>>>(
763
dtype: ArrowDataType,
764
iter: I,
765
) -> Self {
766
let iter_values: Vec<Option<T>> = iter.into_iter().collect();
767
let mut builder = AnonymousListArrayBuilder::new(iter_values.len());
768
for arr in &iter_values {
769
builder.push_opt(arr.as_ref().map(|a| a.as_array()));
770
}
771
let inner = dtype
772
.inner_dtype()
773
.expect("expected nested type in ListArray collect");
774
builder
775
.finish(Some(&inner.underlying_physical_type()))
776
.unwrap()
777
}
778
779
fn try_arr_from_iter_with_dtype<E, I: IntoIterator<Item = Result<Option<T>, E>>>(
780
dtype: ArrowDataType,
781
iter: I,
782
) -> Result<Self, E> {
783
let iter_values = iter.into_iter().collect::<Result<Vec<_>, E>>()?;
784
let mut builder = AnonymousListArrayBuilder::new(iter_values.len());
785
for arr in &iter_values {
786
builder.push_opt(arr.as_ref().map(|a| a.as_array()));
787
}
788
let inner = dtype
789
.inner_dtype()
790
.expect("expected nested type in ListArray collect");
791
Ok(builder
792
.finish(Some(&inner.underlying_physical_type()))
793
.unwrap())
794
}
795
}
796
797
impl<T: AsArray> ArrayFromIter<Option<T>> for ListArray<i64> {
798
fn arr_from_iter<I: IntoIterator<Item = Option<T>>>(iter: I) -> Self {
799
let iter = iter.into_iter();
800
let iter_values: Vec<Option<T>> = iter.into_iter().collect();
801
let mut builder = AnonymousListArrayBuilder::new(iter_values.len());
802
for arr in &iter_values {
803
builder.push_opt(arr.as_ref().map(|a| a.as_array()));
804
}
805
builder.finish(None).unwrap()
806
}
807
808
fn try_arr_from_iter<E, I: IntoIterator<Item = Result<Option<T>, E>>>(
809
iter: I,
810
) -> Result<Self, E> {
811
let iter_values = iter.into_iter().collect::<Result<Vec<_>, E>>()?;
812
let mut builder = AnonymousListArrayBuilder::new(iter_values.len());
813
for arr in &iter_values {
814
builder.push_opt(arr.as_ref().map(|a| a.as_array()));
815
}
816
Ok(builder.finish(None).unwrap())
817
}
818
}
819
820
impl ArrayFromIterDtype<Box<dyn Array>> for FixedSizeListArray {
821
#[allow(unused_variables)]
822
fn arr_from_iter_with_dtype<I: IntoIterator<Item = Box<dyn Array>>>(
823
dtype: ArrowDataType,
824
iter: I,
825
) -> Self {
826
#[cfg(feature = "dtype-array")]
827
{
828
let ArrowDataType::FixedSizeList(_, width) = &dtype else {
829
panic!("FixedSizeListArray::arr_from_iter_with_dtype called with non-Array dtype");
830
};
831
let iter_values: Vec<_> = iter.into_iter().collect();
832
let mut builder = AnonymousFixedSizeListArrayBuilder::new(iter_values.len(), *width);
833
for arr in iter_values {
834
builder.push(arr.into_boxed_array());
835
}
836
let inner = dtype
837
.inner_dtype()
838
.expect("expected nested type in ListArray collect");
839
builder
840
.finish(Some(&inner.underlying_physical_type()))
841
.unwrap()
842
}
843
#[cfg(not(feature = "dtype-array"))]
844
panic!("activate 'dtype-array'")
845
}
846
847
fn try_arr_from_iter_with_dtype<E, I: IntoIterator<Item = Result<Box<dyn Array>, E>>>(
848
dtype: ArrowDataType,
849
iter: I,
850
) -> Result<Self, E> {
851
let iter_values = iter.into_iter().collect::<Result<Vec<_>, E>>()?;
852
Ok(Self::arr_from_iter_with_dtype(dtype, iter_values))
853
}
854
}
855
856
impl ArrayFromIterDtype<Option<Box<dyn Array>>> for FixedSizeListArray {
857
#[allow(unused_variables)]
858
fn arr_from_iter_with_dtype<I: IntoIterator<Item = Option<Box<dyn Array>>>>(
859
dtype: ArrowDataType,
860
iter: I,
861
) -> Self {
862
#[cfg(feature = "dtype-array")]
863
{
864
let ArrowDataType::FixedSizeList(_, width) = &dtype else {
865
panic!("FixedSizeListArray::arr_from_iter_with_dtype called with non-Array dtype");
866
};
867
let iter_values: Vec<_> = iter.into_iter().collect();
868
let mut builder = AnonymousFixedSizeListArrayBuilder::new(iter_values.len(), *width);
869
for arr in iter_values {
870
match arr {
871
Some(a) => builder.push(a.into_boxed_array()),
872
None => builder.push_null(),
873
}
874
}
875
let inner = dtype
876
.inner_dtype()
877
.expect("expected nested type in ListArray collect");
878
builder
879
.finish(Some(&inner.underlying_physical_type()))
880
.unwrap()
881
}
882
#[cfg(not(feature = "dtype-array"))]
883
panic!("activate 'dtype-array'")
884
}
885
886
fn try_arr_from_iter_with_dtype<
887
E,
888
I: IntoIterator<Item = Result<Option<Box<dyn Array>>, E>>,
889
>(
890
dtype: ArrowDataType,
891
iter: I,
892
) -> Result<Self, E> {
893
let iter_values = iter.into_iter().collect::<Result<Vec<_>, E>>()?;
894
Ok(Self::arr_from_iter_with_dtype(dtype, iter_values))
895
}
896
}
897
898
impl ArrayFromIter<Option<()>> for StructArray {
899
fn arr_from_iter<I: IntoIterator<Item = Option<()>>>(_iter: I) -> Self {
900
no_call_const!()
901
}
902
903
fn try_arr_from_iter<E, I: IntoIterator<Item = Result<Option<()>, E>>>(
904
_iter: I,
905
) -> Result<Self, E> {
906
no_call_const!()
907
}
908
}
909
910
impl ArrayFromIter<()> for StructArray {
911
fn arr_from_iter<I: IntoIterator<Item = ()>>(_iter: I) -> Self {
912
no_call_const!()
913
}
914
915
fn try_arr_from_iter<E, I: IntoIterator<Item = Result<(), E>>>(_iter: I) -> Result<Self, E> {
916
no_call_const!()
917
}
918
}
919
920
impl ArrayFromIterDtype<()> for StructArray {
921
fn arr_from_iter_with_dtype<I: IntoIterator<Item = ()>>(
922
_dtype: ArrowDataType,
923
_iter: I,
924
) -> Self {
925
no_call_const!()
926
}
927
928
fn try_arr_from_iter_with_dtype<E, I: IntoIterator<Item = Result<(), E>>>(
929
_dtype: ArrowDataType,
930
_iter: I,
931
) -> Result<Self, E> {
932
no_call_const!()
933
}
934
}
935
936
impl ArrayFromIterDtype<Option<()>> for StructArray {
937
fn arr_from_iter_with_dtype<I: IntoIterator<Item = Option<()>>>(
938
_dtype: ArrowDataType,
939
_iter: I,
940
) -> Self {
941
no_call_const!()
942
}
943
944
fn try_arr_from_iter_with_dtype<E, I: IntoIterator<Item = Result<Option<()>, E>>>(
945
_dtype: ArrowDataType,
946
_iter: I,
947
) -> Result<Self, E> {
948
no_call_const!()
949
}
950
}
951
952