Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-ops/src/chunked_array/list/namespace.rs
6939 views
1
use std::borrow::Cow;
2
use std::fmt::Write;
3
4
use arrow::array::ValueSize;
5
#[cfg(feature = "list_gather")]
6
use num_traits::ToPrimitive;
7
#[cfg(feature = "list_gather")]
8
use num_traits::{NumCast, Signed, Zero};
9
use polars_compute::gather::sublist::list::{index_is_oob, sublist_get};
10
use polars_core::chunked_array::builder::get_list_builder;
11
#[cfg(feature = "diff")]
12
use polars_core::series::ops::NullBehavior;
13
use polars_core::utils::try_get_supertype;
14
15
use super::*;
16
#[cfg(feature = "list_any_all")]
17
use crate::chunked_array::list::any_all::*;
18
use crate::chunked_array::list::min_max::{list_max_function, list_min_function};
19
use crate::chunked_array::list::sum_mean::sum_with_nulls;
20
#[cfg(feature = "diff")]
21
use crate::prelude::diff;
22
use crate::prelude::list::sum_mean::{mean_list_numerical, sum_list_numerical};
23
use crate::series::ArgAgg;
24
25
pub(super) fn has_inner_nulls(ca: &ListChunked) -> bool {
26
for arr in ca.downcast_iter() {
27
if arr.values().null_count() > 0 {
28
return true;
29
}
30
}
31
false
32
}
33
34
fn cast_rhs(
35
other: &mut [Column],
36
inner_type: &DataType,
37
dtype: &DataType,
38
length: usize,
39
allow_broadcast: bool,
40
) -> PolarsResult<()> {
41
for s in other.iter_mut() {
42
// make sure that inner types match before we coerce into list
43
if !matches!(s.dtype(), DataType::List(_)) {
44
*s = s.cast(inner_type)?
45
}
46
if !matches!(s.dtype(), DataType::List(_)) && s.dtype() == inner_type {
47
// coerce to list JIT
48
*s = s
49
.reshape_list(&[ReshapeDimension::Infer, ReshapeDimension::new_dimension(1)])
50
.unwrap();
51
}
52
if s.dtype() != dtype {
53
*s = s.cast(dtype).map_err(|e| {
54
polars_err!(
55
SchemaMismatch:
56
"cannot concat `{}` into a list of `{}`: {}",
57
s.dtype(),
58
dtype,
59
e
60
)
61
})?;
62
}
63
64
if s.len() != length {
65
polars_ensure!(
66
s.len() == 1,
67
ShapeMismatch: "series length {} does not match expected length of {}",
68
s.len(), length
69
);
70
if allow_broadcast {
71
// broadcast JIT
72
*s = s.new_from_index(0, length)
73
}
74
// else do nothing
75
}
76
}
77
Ok(())
78
}
79
80
pub trait ListNameSpaceImpl: AsList {
81
/// In case the inner dtype [`DataType::String`], the individual items will be joined into a
82
/// single string separated by `separator`.
83
fn lst_join(
84
&self,
85
separator: &StringChunked,
86
ignore_nulls: bool,
87
) -> PolarsResult<StringChunked> {
88
let ca = self.as_list();
89
match ca.inner_dtype() {
90
DataType::String => match separator.len() {
91
1 => match separator.get(0) {
92
Some(separator) => self.join_literal(separator, ignore_nulls),
93
_ => Ok(StringChunked::full_null(ca.name().clone(), ca.len())),
94
},
95
_ => self.join_many(separator, ignore_nulls),
96
},
97
dt => polars_bail!(op = "`lst.join`", got = dt, expected = "String"),
98
}
99
}
100
101
fn join_literal(&self, separator: &str, ignore_nulls: bool) -> PolarsResult<StringChunked> {
102
let ca = self.as_list();
103
// used to amortize heap allocs
104
let mut buf = String::with_capacity(128);
105
let mut builder = StringChunkedBuilder::new(ca.name().clone(), ca.len());
106
107
ca.for_each_amortized(|opt_s| {
108
let opt_val = opt_s.and_then(|s| {
109
// make sure that we don't write values of previous iteration
110
buf.clear();
111
let ca = s.as_ref().str().unwrap();
112
113
if ca.null_count() != 0 && !ignore_nulls {
114
return None;
115
}
116
117
for arr in ca.downcast_iter() {
118
for val in arr.non_null_values_iter() {
119
buf.write_str(val).unwrap();
120
buf.write_str(separator).unwrap();
121
}
122
}
123
124
// last value should not have a separator, so slice that off
125
// saturating sub because there might have been nothing written.
126
Some(&buf[..buf.len().saturating_sub(separator.len())])
127
});
128
builder.append_option(opt_val)
129
});
130
Ok(builder.finish())
131
}
132
133
fn join_many(
134
&self,
135
separator: &StringChunked,
136
ignore_nulls: bool,
137
) -> PolarsResult<StringChunked> {
138
let ca = self.as_list();
139
// used to amortize heap allocs
140
let mut buf = String::with_capacity(128);
141
let mut builder = StringChunkedBuilder::new(ca.name().clone(), ca.len());
142
{
143
ca.amortized_iter()
144
.zip(separator)
145
.for_each(|(opt_s, opt_sep)| match opt_sep {
146
Some(separator) => {
147
let opt_val = opt_s.and_then(|s| {
148
// make sure that we don't write values of previous iteration
149
buf.clear();
150
let ca = s.as_ref().str().unwrap();
151
152
if ca.null_count() != 0 && !ignore_nulls {
153
return None;
154
}
155
156
for arr in ca.downcast_iter() {
157
for val in arr.non_null_values_iter() {
158
buf.write_str(val).unwrap();
159
buf.write_str(separator).unwrap();
160
}
161
}
162
163
// last value should not have a separator, so slice that off
164
// saturating sub because there might have been nothing written.
165
Some(&buf[..buf.len().saturating_sub(separator.len())])
166
});
167
builder.append_option(opt_val)
168
},
169
_ => builder.append_null(),
170
})
171
}
172
Ok(builder.finish())
173
}
174
175
fn lst_max(&self) -> PolarsResult<Series> {
176
list_max_function(self.as_list())
177
}
178
179
#[cfg(feature = "list_any_all")]
180
fn lst_all(&self) -> PolarsResult<Series> {
181
let ca = self.as_list();
182
list_all(ca)
183
}
184
185
#[cfg(feature = "list_any_all")]
186
fn lst_any(&self) -> PolarsResult<Series> {
187
let ca = self.as_list();
188
list_any(ca)
189
}
190
191
fn lst_min(&self) -> PolarsResult<Series> {
192
list_min_function(self.as_list())
193
}
194
195
fn lst_sum(&self) -> PolarsResult<Series> {
196
let ca = self.as_list();
197
198
if has_inner_nulls(ca) {
199
return sum_with_nulls(ca, ca.inner_dtype());
200
};
201
202
match ca.inner_dtype() {
203
DataType::Boolean => Ok(count_boolean_bits(ca).into_series()),
204
dt if dt.is_primitive_numeric() => Ok(sum_list_numerical(ca, dt)),
205
dt => sum_with_nulls(ca, dt),
206
}
207
}
208
209
fn lst_mean(&self) -> Series {
210
let ca = self.as_list();
211
212
if has_inner_nulls(ca) {
213
return sum_mean::mean_with_nulls(ca);
214
};
215
216
match ca.inner_dtype() {
217
dt if dt.is_primitive_numeric() => mean_list_numerical(ca, dt),
218
_ => sum_mean::mean_with_nulls(ca),
219
}
220
}
221
222
fn lst_median(&self) -> Series {
223
let ca = self.as_list();
224
dispersion::median_with_nulls(ca)
225
}
226
227
fn lst_std(&self, ddof: u8) -> Series {
228
let ca = self.as_list();
229
dispersion::std_with_nulls(ca, ddof)
230
}
231
232
fn lst_var(&self, ddof: u8) -> PolarsResult<Series> {
233
let ca = self.as_list();
234
dispersion::var_with_nulls(ca, ddof)
235
}
236
237
fn same_type(&self, out: ListChunked) -> ListChunked {
238
let ca = self.as_list();
239
let dtype = ca.dtype();
240
if out.dtype() != dtype {
241
out.cast(ca.dtype()).unwrap().list().unwrap().clone()
242
} else {
243
out
244
}
245
}
246
247
fn lst_sort(&self, options: SortOptions) -> PolarsResult<ListChunked> {
248
let ca = self.as_list();
249
let out = ca.try_apply_amortized(|s| s.as_ref().sort_with(options))?;
250
Ok(self.same_type(out))
251
}
252
253
#[must_use]
254
fn lst_reverse(&self) -> ListChunked {
255
let ca = self.as_list();
256
let out = ca.apply_amortized(|s| s.as_ref().reverse());
257
self.same_type(out)
258
}
259
260
fn lst_n_unique(&self) -> PolarsResult<IdxCa> {
261
let ca = self.as_list();
262
ca.try_apply_amortized_generic(|s| {
263
let opt_v = s.map(|s| s.as_ref().n_unique()).transpose()?;
264
Ok(opt_v.map(|idx| idx as IdxSize))
265
})
266
}
267
268
fn lst_unique(&self) -> PolarsResult<ListChunked> {
269
let ca = self.as_list();
270
let out = ca.try_apply_amortized(|s| s.as_ref().unique())?;
271
Ok(self.same_type(out))
272
}
273
274
fn lst_unique_stable(&self) -> PolarsResult<ListChunked> {
275
let ca = self.as_list();
276
let out = ca.try_apply_amortized(|s| s.as_ref().unique_stable())?;
277
Ok(self.same_type(out))
278
}
279
280
fn lst_arg_min(&self) -> IdxCa {
281
let ca = self.as_list();
282
ca.apply_amortized_generic(|opt_s| {
283
opt_s.and_then(|s| s.as_ref().arg_min().map(|idx| idx as IdxSize))
284
})
285
}
286
287
fn lst_arg_max(&self) -> IdxCa {
288
let ca = self.as_list();
289
ca.apply_amortized_generic(|opt_s| {
290
opt_s.and_then(|s| s.as_ref().arg_max().map(|idx| idx as IdxSize))
291
})
292
}
293
294
#[cfg(feature = "diff")]
295
fn lst_diff(&self, n: i64, null_behavior: NullBehavior) -> PolarsResult<ListChunked> {
296
let ca = self.as_list();
297
ca.try_apply_amortized(|s| diff(s.as_ref(), n, null_behavior))
298
}
299
300
fn lst_shift(&self, periods: &Column) -> PolarsResult<ListChunked> {
301
let ca = self.as_list();
302
let periods_s = periods.cast(&DataType::Int64)?;
303
let periods = periods_s.i64()?;
304
305
polars_ensure!(
306
ca.len() == periods.len() || ca.len() == 1 || periods.len() == 1,
307
length_mismatch = "list.shift",
308
ca.len(),
309
periods.len()
310
);
311
312
// Broadcast `self`
313
let mut ca = Cow::Borrowed(ca);
314
if ca.len() == 1 && periods.len() != 1 {
315
// Optimize: Don't broadcast and instead have a special path.
316
ca = Cow::Owned(ca.new_from_index(0, periods.len()));
317
}
318
let ca = ca.as_ref();
319
320
let out = match periods.len() {
321
1 => {
322
if let Some(periods) = periods.get(0) {
323
ca.apply_amortized(|s| s.as_ref().shift(periods))
324
} else {
325
ListChunked::full_null_with_dtype(ca.name().clone(), ca.len(), ca.inner_dtype())
326
}
327
},
328
_ => ca.zip_and_apply_amortized(periods, |opt_s, opt_periods| {
329
match (opt_s, opt_periods) {
330
(Some(s), Some(periods)) => Some(s.as_ref().shift(periods)),
331
_ => None,
332
}
333
}),
334
};
335
Ok(self.same_type(out))
336
}
337
338
fn lst_slice(&self, offset: i64, length: usize) -> ListChunked {
339
let ca = self.as_list();
340
let out = ca.apply_amortized(|s| s.as_ref().slice(offset, length));
341
self.same_type(out)
342
}
343
344
fn lst_lengths(&self) -> IdxCa {
345
let ca = self.as_list();
346
347
let ca_validity = ca.rechunk_validity();
348
349
if ca_validity.as_ref().is_some_and(|x| x.set_bits() == 0) {
350
return IdxCa::full_null(ca.name().clone(), ca.len());
351
}
352
353
let mut lengths = Vec::with_capacity(ca.len());
354
ca.downcast_iter().for_each(|arr| {
355
let offsets = arr.offsets().as_slice();
356
let mut last = offsets[0];
357
for o in &offsets[1..] {
358
lengths.push((*o - last) as IdxSize);
359
last = *o;
360
}
361
});
362
363
let arr = IdxArr::from_vec(lengths).with_validity(ca_validity);
364
IdxCa::with_chunk(ca.name().clone(), arr)
365
}
366
367
/// Get the value by index in the sublists.
368
/// So index `0` would return the first item of every sublist
369
/// and index `-1` would return the last item of every sublist
370
/// if an index is out of bounds, it will return a `None`.
371
fn lst_get(&self, idx: i64, null_on_oob: bool) -> PolarsResult<Series> {
372
let ca = self.as_list();
373
if !null_on_oob && ca.downcast_iter().any(|arr| index_is_oob(arr, idx)) {
374
polars_bail!(ComputeError: "get index is out of bounds");
375
}
376
377
let chunks = ca
378
.downcast_iter()
379
.map(|arr| sublist_get(arr, idx))
380
.collect::<Vec<_>>();
381
382
let s = Series::try_from((ca.name().clone(), chunks)).unwrap();
383
// SAFETY: every element in list has dtype equal to its inner type
384
unsafe { s.from_physical_unchecked(ca.inner_dtype()) }
385
}
386
387
#[cfg(feature = "list_gather")]
388
fn lst_gather_every(&self, n: &IdxCa, offset: &IdxCa) -> PolarsResult<Series> {
389
let list_ca = self.as_list();
390
let out = match (n.len(), offset.len()) {
391
(1, 1) => match (n.get(0), offset.get(0)) {
392
(Some(n), Some(offset)) => list_ca.try_apply_amortized(|s| {
393
s.as_ref().gather_every(n as usize, offset as usize)
394
})?,
395
_ => ListChunked::full_null_with_dtype(
396
list_ca.name().clone(),
397
list_ca.len(),
398
list_ca.inner_dtype(),
399
),
400
},
401
(1, len_offset) if len_offset == list_ca.len() => {
402
if let Some(n) = n.get(0) {
403
list_ca.try_zip_and_apply_amortized(offset, |opt_s, opt_offset| {
404
match (opt_s, opt_offset) {
405
(Some(s), Some(offset)) => {
406
Ok(Some(s.as_ref().gather_every(n as usize, offset as usize)?))
407
},
408
_ => Ok(None),
409
}
410
})?
411
} else {
412
ListChunked::full_null_with_dtype(
413
list_ca.name().clone(),
414
list_ca.len(),
415
list_ca.inner_dtype(),
416
)
417
}
418
},
419
(len_n, 1) if len_n == list_ca.len() => {
420
if let Some(offset) = offset.get(0) {
421
list_ca.try_zip_and_apply_amortized(n, |opt_s, opt_n| match (opt_s, opt_n) {
422
(Some(s), Some(n)) => {
423
Ok(Some(s.as_ref().gather_every(n as usize, offset as usize)?))
424
},
425
_ => Ok(None),
426
})?
427
} else {
428
ListChunked::full_null_with_dtype(
429
list_ca.name().clone(),
430
list_ca.len(),
431
list_ca.inner_dtype(),
432
)
433
}
434
},
435
(len_n, len_offset) if len_n == len_offset && len_n == list_ca.len() => list_ca
436
.try_binary_zip_and_apply_amortized(
437
n,
438
offset,
439
|opt_s, opt_n, opt_offset| match (opt_s, opt_n, opt_offset) {
440
(Some(s), Some(n), Some(offset)) => {
441
Ok(Some(s.as_ref().gather_every(n as usize, offset as usize)?))
442
},
443
_ => Ok(None),
444
},
445
)?,
446
_ => {
447
polars_bail!(ComputeError: "The lengths of `n` and `offset` should be 1 or equal to the length of list.")
448
},
449
};
450
Ok(out.into_series())
451
}
452
453
#[cfg(feature = "list_gather")]
454
fn lst_gather(&self, idx: &Series, null_on_oob: bool) -> PolarsResult<Series> {
455
let list_ca = self.as_list();
456
let idx_ca = idx.list()?;
457
458
polars_ensure!(
459
idx_ca.inner_dtype().is_integer(),
460
ComputeError: "cannot use dtype `{}` as an index", idx_ca.inner_dtype()
461
);
462
463
let index_typed_index = |idx: &Series| {
464
let idx = idx.cast(&IDX_DTYPE).unwrap();
465
{
466
list_ca
467
.amortized_iter()
468
.map(|s| {
469
s.map(|s| {
470
let s = s.as_ref();
471
take_series(s, idx.clone(), null_on_oob)
472
})
473
.transpose()
474
})
475
.collect::<PolarsResult<ListChunked>>()
476
.map(|mut ca| {
477
ca.rename(list_ca.name().clone());
478
ca.into_series()
479
})
480
}
481
};
482
483
match (list_ca.len(), idx_ca.len()) {
484
(1, _) => {
485
let mut out = if list_ca.has_nulls() {
486
ListChunked::full_null_with_dtype(
487
PlSmallStr::EMPTY,
488
idx.len(),
489
list_ca.inner_dtype(),
490
)
491
} else {
492
let s = list_ca.explode(false)?;
493
idx_ca
494
.into_iter()
495
.map(|opt_idx| {
496
opt_idx
497
.map(|idx| take_series(&s, idx, null_on_oob))
498
.transpose()
499
})
500
.collect::<PolarsResult<ListChunked>>()?
501
};
502
out.rename(list_ca.name().clone());
503
Ok(out.into_series())
504
},
505
(_, 1) => {
506
let idx_ca = idx_ca.explode(false)?;
507
508
use DataType as D;
509
match idx_ca.dtype() {
510
D::UInt32 | D::UInt64 => index_typed_index(&idx_ca),
511
dt if dt.is_signed_integer() => {
512
if let Some(min) = idx_ca.min::<i64>().unwrap() {
513
if min >= 0 {
514
index_typed_index(&idx_ca)
515
} else {
516
let mut out = {
517
list_ca
518
.amortized_iter()
519
.map(|opt_s| {
520
opt_s
521
.map(|s| {
522
take_series(
523
s.as_ref(),
524
idx_ca.clone(),
525
null_on_oob,
526
)
527
})
528
.transpose()
529
})
530
.collect::<PolarsResult<ListChunked>>()?
531
};
532
out.rename(list_ca.name().clone());
533
Ok(out.into_series())
534
}
535
} else {
536
polars_bail!(ComputeError: "all indices are null");
537
}
538
},
539
dt => polars_bail!(ComputeError: "cannot use dtype `{dt}` as an index"),
540
}
541
},
542
(a, b) if a == b => {
543
let mut out = {
544
list_ca
545
.amortized_iter()
546
.zip(idx_ca)
547
.map(|(opt_s, opt_idx)| {
548
{
549
match (opt_s, opt_idx) {
550
(Some(s), Some(idx)) => {
551
Some(take_series(s.as_ref(), idx, null_on_oob))
552
},
553
_ => None,
554
}
555
}
556
.transpose()
557
})
558
.collect::<PolarsResult<ListChunked>>()?
559
};
560
out.rename(list_ca.name().clone());
561
Ok(out.into_series())
562
},
563
(a, b) => polars_bail!(length_mismatch = "list.gather", a, b),
564
}
565
}
566
567
#[cfg(feature = "list_drop_nulls")]
568
fn lst_drop_nulls(&self) -> ListChunked {
569
let list_ca = self.as_list();
570
571
list_ca.apply_amortized(|s| s.as_ref().drop_nulls())
572
}
573
574
#[cfg(feature = "list_sample")]
575
fn lst_sample_n(
576
&self,
577
n: &Series,
578
with_replacement: bool,
579
shuffle: bool,
580
seed: Option<u64>,
581
) -> PolarsResult<ListChunked> {
582
use std::borrow::Cow;
583
584
let ca = self.as_list();
585
586
let n_s = n.cast(&IDX_DTYPE)?;
587
let n = n_s.idx()?;
588
589
polars_ensure!(
590
ca.len() == n.len() || ca.len() == 1 || n.len() == 1,
591
length_mismatch = "list.sample(n)",
592
ca.len(),
593
n.len()
594
);
595
596
// Broadcast `self`
597
let mut ca = Cow::Borrowed(ca);
598
if ca.len() == 1 && n.len() != 1 {
599
// Optimize: Don't broadcast and instead have a special path.
600
ca = Cow::Owned(ca.new_from_index(0, n.len()));
601
}
602
let ca = ca.as_ref();
603
604
let out = match n.len() {
605
1 => {
606
if let Some(n) = n.get(0) {
607
ca.try_apply_amortized(|s| {
608
s.as_ref()
609
.sample_n(n as usize, with_replacement, shuffle, seed)
610
})
611
} else {
612
Ok(ListChunked::full_null_with_dtype(
613
ca.name().clone(),
614
ca.len(),
615
ca.inner_dtype(),
616
))
617
}
618
},
619
_ => ca.try_zip_and_apply_amortized(n, |opt_s, opt_n| match (opt_s, opt_n) {
620
(Some(s), Some(n)) => s
621
.as_ref()
622
.sample_n(n as usize, with_replacement, shuffle, seed)
623
.map(Some),
624
_ => Ok(None),
625
}),
626
};
627
out.map(|ok| self.same_type(ok))
628
}
629
630
#[cfg(feature = "list_sample")]
631
fn lst_sample_fraction(
632
&self,
633
fraction: &Series,
634
with_replacement: bool,
635
shuffle: bool,
636
seed: Option<u64>,
637
) -> PolarsResult<ListChunked> {
638
use std::borrow::Cow;
639
640
let ca = self.as_list();
641
642
let fraction_s = fraction.cast(&DataType::Float64)?;
643
let fraction = fraction_s.f64()?;
644
645
polars_ensure!(
646
ca.len() == fraction.len() || ca.len() == 1 || fraction.len() == 1,
647
length_mismatch = "list.sample(fraction)",
648
ca.len(),
649
fraction.len()
650
);
651
652
// Broadcast `self`
653
let mut ca = Cow::Borrowed(ca);
654
if ca.len() == 1 && fraction.len() != 1 {
655
// Optimize: Don't broadcast and instead have a special path.
656
ca = Cow::Owned(ca.new_from_index(0, fraction.len()));
657
}
658
let ca = ca.as_ref();
659
660
let out = match fraction.len() {
661
1 => {
662
if let Some(fraction) = fraction.get(0) {
663
ca.try_apply_amortized(|s| {
664
let n = (s.as_ref().len() as f64 * fraction) as usize;
665
s.as_ref().sample_n(n, with_replacement, shuffle, seed)
666
})
667
} else {
668
Ok(ListChunked::full_null_with_dtype(
669
ca.name().clone(),
670
ca.len(),
671
ca.inner_dtype(),
672
))
673
}
674
},
675
_ => ca.try_zip_and_apply_amortized(fraction, |opt_s, opt_n| match (opt_s, opt_n) {
676
(Some(s), Some(fraction)) => {
677
let n = (s.as_ref().len() as f64 * fraction) as usize;
678
s.as_ref()
679
.sample_n(n, with_replacement, shuffle, seed)
680
.map(Some)
681
},
682
_ => Ok(None),
683
}),
684
};
685
out.map(|ok| self.same_type(ok))
686
}
687
688
fn lst_concat(&self, other: &[Column]) -> PolarsResult<ListChunked> {
689
let ca = self.as_list();
690
let other_len = other.len();
691
let length = ca.len();
692
let mut other = other.to_vec();
693
let mut inner_super_type = ca.inner_dtype().clone();
694
695
for s in &other {
696
match s.dtype() {
697
DataType::List(inner_type) => {
698
inner_super_type = try_get_supertype(&inner_super_type, inner_type)?;
699
},
700
dt => {
701
inner_super_type = try_get_supertype(&inner_super_type, dt)?;
702
},
703
}
704
}
705
706
// cast lhs
707
let dtype = &DataType::List(Box::new(inner_super_type.clone()));
708
let ca = ca.cast(dtype)?;
709
let ca = ca.list().unwrap();
710
711
// broadcasting path in case all unit length
712
// this path will not expand the series, so saves memory
713
let out = if other.iter().all(|s| s.len() == 1) && ca.len() != 1 {
714
cast_rhs(&mut other, &inner_super_type, dtype, length, false)?;
715
let to_append = other
716
.iter()
717
.filter_map(|s| {
718
let lst = s.list().unwrap();
719
// SAFETY: previous rhs_cast ensures the type is correct
720
unsafe {
721
lst.get_as_series(0)
722
.map(|s| s.from_physical_unchecked(&inner_super_type).unwrap())
723
}
724
})
725
.collect::<Vec<_>>();
726
727
// there was a None, so all values will be None
728
if to_append.len() != other_len {
729
return Ok(ListChunked::full_null_with_dtype(
730
ca.name().clone(),
731
length,
732
&inner_super_type,
733
));
734
}
735
736
let vals_size_other = other
737
.iter()
738
.map(|s| s.list().unwrap().get_values_size())
739
.sum::<usize>();
740
741
let mut builder = get_list_builder(
742
&inner_super_type,
743
ca.get_values_size() + vals_size_other + 1,
744
length,
745
ca.name().clone(),
746
);
747
ca.into_iter().for_each(|opt_s| {
748
let opt_s = opt_s.map(|mut s| {
749
for append in &to_append {
750
s.append(append).unwrap();
751
}
752
match inner_super_type {
753
// structs don't have chunks, so we must first rechunk the underlying series
754
#[cfg(feature = "dtype-struct")]
755
DataType::Struct(_) => s = s.rechunk(),
756
// nothing
757
_ => {},
758
}
759
s
760
});
761
builder.append_opt_series(opt_s.as_ref()).unwrap();
762
});
763
builder.finish()
764
} else {
765
// normal path which may contain same length list or unit length lists
766
cast_rhs(&mut other, &inner_super_type, dtype, length, true)?;
767
768
let vals_size_other = other
769
.iter()
770
.map(|s| s.list().unwrap().get_values_size())
771
.sum::<usize>();
772
let mut iters = Vec::with_capacity(other_len + 1);
773
774
for s in other.iter_mut() {
775
iters.push(s.list()?.amortized_iter())
776
}
777
let mut first_iter: Box<dyn PolarsIterator<Item = Option<Series>>> = ca.into_iter();
778
let mut builder = get_list_builder(
779
&inner_super_type,
780
ca.get_values_size() + vals_size_other + 1,
781
length,
782
ca.name().clone(),
783
);
784
785
for _ in 0..ca.len() {
786
let mut acc = match first_iter.next().unwrap() {
787
Some(s) => s,
788
None => {
789
builder.append_null();
790
// make sure that the iterators advance before we continue
791
for it in &mut iters {
792
it.next().unwrap();
793
}
794
continue;
795
},
796
};
797
798
let mut has_nulls = false;
799
for it in &mut iters {
800
match it.next().unwrap() {
801
Some(s) => {
802
if !has_nulls {
803
acc.append(s.as_ref())?;
804
}
805
},
806
None => {
807
has_nulls = true;
808
},
809
}
810
}
811
if has_nulls {
812
builder.append_null();
813
continue;
814
}
815
816
match inner_super_type {
817
// structs don't have chunks, so we must first rechunk the underlying series
818
#[cfg(feature = "dtype-struct")]
819
DataType::Struct(_) => acc = acc.rechunk(),
820
// nothing
821
_ => {},
822
}
823
builder.append_series(&acc).unwrap();
824
}
825
builder.finish()
826
};
827
Ok(out)
828
}
829
}
830
831
impl ListNameSpaceImpl for ListChunked {}
832
833
#[cfg(feature = "list_gather")]
834
fn take_series(s: &Series, idx: Series, null_on_oob: bool) -> PolarsResult<Series> {
835
let len = s.len();
836
let idx = cast_index(idx, len, null_on_oob)?;
837
let idx = idx.idx().unwrap();
838
s.take(idx)
839
}
840
841
#[cfg(feature = "list_gather")]
842
fn cast_signed_index_ca<T: PolarsNumericType>(idx: &ChunkedArray<T>, len: usize) -> Series
843
where
844
T::Native: Copy + PartialOrd + PartialEq + NumCast + Signed + Zero,
845
{
846
idx.iter()
847
.map(|opt_idx| opt_idx.and_then(|idx| idx.negative_to_usize(len).map(|idx| idx as IdxSize)))
848
.collect::<IdxCa>()
849
.into_series()
850
}
851
852
#[cfg(feature = "list_gather")]
853
fn cast_unsigned_index_ca<T: PolarsNumericType>(idx: &ChunkedArray<T>, len: usize) -> Series
854
where
855
T::Native: Copy + PartialOrd + ToPrimitive,
856
{
857
idx.iter()
858
.map(|opt_idx| {
859
opt_idx.and_then(|idx| {
860
let idx = idx.to_usize().unwrap();
861
if idx >= len {
862
None
863
} else {
864
Some(idx as IdxSize)
865
}
866
})
867
})
868
.collect::<IdxCa>()
869
.into_series()
870
}
871
872
#[cfg(feature = "list_gather")]
873
fn cast_index(idx: Series, len: usize, null_on_oob: bool) -> PolarsResult<Series> {
874
let idx_null_count = idx.null_count();
875
use DataType::*;
876
let out = match idx.dtype() {
877
#[cfg(feature = "big_idx")]
878
UInt32 => {
879
if null_on_oob {
880
let a = idx.u32().unwrap();
881
cast_unsigned_index_ca(a, len)
882
} else {
883
idx.cast(&IDX_DTYPE).unwrap()
884
}
885
},
886
#[cfg(feature = "big_idx")]
887
UInt64 => {
888
if null_on_oob {
889
let a = idx.u64().unwrap();
890
cast_unsigned_index_ca(a, len)
891
} else {
892
idx
893
}
894
},
895
#[cfg(not(feature = "big_idx"))]
896
UInt64 => {
897
if null_on_oob {
898
let a = idx.u64().unwrap();
899
cast_unsigned_index_ca(a, len)
900
} else {
901
idx.cast(&IDX_DTYPE).unwrap()
902
}
903
},
904
#[cfg(not(feature = "big_idx"))]
905
UInt32 => {
906
if null_on_oob {
907
let a = idx.u32().unwrap();
908
cast_unsigned_index_ca(a, len)
909
} else {
910
idx
911
}
912
},
913
dt if dt.is_unsigned_integer() => idx.cast(&IDX_DTYPE).unwrap(),
914
Int8 => {
915
let a = idx.i8().unwrap();
916
cast_signed_index_ca(a, len)
917
},
918
Int16 => {
919
let a = idx.i16().unwrap();
920
cast_signed_index_ca(a, len)
921
},
922
Int32 => {
923
let a = idx.i32().unwrap();
924
cast_signed_index_ca(a, len)
925
},
926
Int64 => {
927
let a = idx.i64().unwrap();
928
cast_signed_index_ca(a, len)
929
},
930
_ => {
931
unreachable!()
932
},
933
};
934
polars_ensure!(
935
out.null_count() == idx_null_count || null_on_oob,
936
OutOfBounds: "gather indices are out of bounds"
937
);
938
Ok(out)
939
}
940
941
// TODO: implement the above for ArrayChunked as well?
942
943