Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-ops/src/chunked_array/list/namespace.rs
8412 views
1
use std::borrow::Cow;
2
use std::fmt::Write;
3
4
use arrow::array::ValueSize;
5
#[cfg(feature = "list_gather")]
6
use num_traits::ToPrimitive;
7
#[cfg(feature = "list_gather")]
8
use num_traits::{NumCast, Signed, Zero};
9
use polars_compute::gather::sublist::list::{index_is_oob, sublist_get};
10
use polars_core::chunked_array::builder::get_list_builder;
11
#[cfg(feature = "diff")]
12
use polars_core::series::ops::NullBehavior;
13
use polars_core::utils::try_get_supertype;
14
15
use super::*;
16
#[cfg(feature = "list_any_all")]
17
use crate::chunked_array::list::any_all::*;
18
use crate::chunked_array::list::min_max::{list_max_function, list_min_function};
19
use crate::chunked_array::list::sum_mean::sum_with_nulls;
20
#[cfg(feature = "diff")]
21
use crate::prelude::diff;
22
use crate::prelude::list::sum_mean::{mean_list_numerical, sum_list_numerical};
23
use crate::series::ArgAgg;
24
25
pub(super) fn has_inner_nulls(ca: &ListChunked) -> bool {
26
for arr in ca.downcast_iter() {
27
if arr.values().null_count() > 0 {
28
return true;
29
}
30
}
31
false
32
}
33
34
fn cast_rhs(
35
other: &mut [Column],
36
inner_type: &DataType,
37
dtype: &DataType,
38
length: usize,
39
allow_broadcast: bool,
40
) -> PolarsResult<()> {
41
for s in other.iter_mut() {
42
// make sure that inner types match before we coerce into list
43
if !matches!(s.dtype(), DataType::List(_)) {
44
*s = s.cast(inner_type)?
45
}
46
if !matches!(s.dtype(), DataType::List(_)) && s.dtype() == inner_type {
47
// coerce to list JIT
48
*s = s
49
.reshape_list(&[ReshapeDimension::Infer, ReshapeDimension::new_dimension(1)])
50
.unwrap();
51
}
52
if s.dtype() != dtype {
53
*s = s.cast(dtype).map_err(|e| {
54
polars_err!(
55
SchemaMismatch:
56
"cannot concat `{}` into a list of `{}`: {}",
57
s.dtype(),
58
dtype,
59
e
60
)
61
})?;
62
}
63
64
if s.len() != length {
65
polars_ensure!(
66
s.len() == 1,
67
ShapeMismatch: "series length {} does not match expected length of {}",
68
s.len(), length
69
);
70
if allow_broadcast {
71
// broadcast JIT
72
*s = s.new_from_index(0, length)
73
}
74
// else do nothing
75
}
76
}
77
Ok(())
78
}
79
80
pub trait ListNameSpaceImpl: AsList {
81
/// In case the inner dtype [`DataType::String`], the individual items will be joined into a
82
/// single string separated by `separator`.
83
fn lst_join(
84
&self,
85
separator: &StringChunked,
86
ignore_nulls: bool,
87
) -> PolarsResult<StringChunked> {
88
let ca = self.as_list();
89
match ca.inner_dtype() {
90
DataType::String => match separator.len() {
91
1 => match separator.get(0) {
92
Some(separator) => self.join_literal(separator, ignore_nulls),
93
_ => Ok(StringChunked::full_null(ca.name().clone(), ca.len())),
94
},
95
_ => self.join_many(separator, ignore_nulls),
96
},
97
dt => polars_bail!(op = "`lst.join`", got = dt, expected = "String"),
98
}
99
}
100
101
fn join_literal(&self, separator: &str, ignore_nulls: bool) -> PolarsResult<StringChunked> {
102
let ca = self.as_list();
103
// used to amortize heap allocs
104
let mut buf = String::with_capacity(128);
105
let mut builder = StringChunkedBuilder::new(ca.name().clone(), ca.len());
106
107
ca.for_each_amortized(|opt_s| {
108
let opt_val = opt_s.and_then(|s| {
109
// make sure that we don't write values of previous iteration
110
buf.clear();
111
let ca = s.as_ref().str().unwrap();
112
113
if ca.null_count() != 0 && !ignore_nulls {
114
return None;
115
}
116
117
for arr in ca.downcast_iter() {
118
for val in arr.non_null_values_iter() {
119
buf.write_str(val).unwrap();
120
buf.write_str(separator).unwrap();
121
}
122
}
123
124
// last value should not have a separator, so slice that off
125
// saturating sub because there might have been nothing written.
126
Some(&buf[..buf.len().saturating_sub(separator.len())])
127
});
128
builder.append_option(opt_val)
129
});
130
Ok(builder.finish())
131
}
132
133
fn join_many(
134
&self,
135
separator: &StringChunked,
136
ignore_nulls: bool,
137
) -> PolarsResult<StringChunked> {
138
let ca = self.as_list();
139
// used to amortize heap allocs
140
let mut buf = String::with_capacity(128);
141
let mut builder = StringChunkedBuilder::new(ca.name().clone(), ca.len());
142
{
143
ca.amortized_iter()
144
.zip(separator)
145
.for_each(|(opt_s, opt_sep)| match opt_sep {
146
Some(separator) => {
147
let opt_val = opt_s.and_then(|s| {
148
// make sure that we don't write values of previous iteration
149
buf.clear();
150
let ca = s.as_ref().str().unwrap();
151
152
if ca.null_count() != 0 && !ignore_nulls {
153
return None;
154
}
155
156
for arr in ca.downcast_iter() {
157
for val in arr.non_null_values_iter() {
158
buf.write_str(val).unwrap();
159
buf.write_str(separator).unwrap();
160
}
161
}
162
163
// last value should not have a separator, so slice that off
164
// saturating sub because there might have been nothing written.
165
Some(&buf[..buf.len().saturating_sub(separator.len())])
166
});
167
builder.append_option(opt_val)
168
},
169
_ => builder.append_null(),
170
})
171
}
172
Ok(builder.finish())
173
}
174
175
fn lst_max(&self) -> PolarsResult<Series> {
176
list_max_function(self.as_list())
177
}
178
179
#[cfg(feature = "list_any_all")]
180
fn lst_all(&self) -> PolarsResult<Series> {
181
let ca = self.as_list();
182
list_all(ca)
183
}
184
185
#[cfg(feature = "list_any_all")]
186
fn lst_any(&self) -> PolarsResult<Series> {
187
let ca = self.as_list();
188
list_any(ca)
189
}
190
191
fn lst_min(&self) -> PolarsResult<Series> {
192
list_min_function(self.as_list())
193
}
194
195
fn lst_sum(&self) -> PolarsResult<Series> {
196
let ca = self.as_list();
197
198
if has_inner_nulls(ca) {
199
return sum_with_nulls(ca, ca.inner_dtype());
200
};
201
202
match ca.inner_dtype() {
203
DataType::Boolean => Ok(count_boolean_bits(ca).into_series()),
204
dt if dt.is_primitive_numeric() => Ok(sum_list_numerical(ca, dt)),
205
dt => sum_with_nulls(ca, dt),
206
}
207
}
208
209
fn lst_mean(&self) -> Series {
210
let ca = self.as_list();
211
212
if has_inner_nulls(ca) {
213
return sum_mean::mean_with_nulls(ca);
214
};
215
216
match ca.inner_dtype() {
217
dt if dt.is_primitive_numeric() => mean_list_numerical(ca, dt),
218
_ => sum_mean::mean_with_nulls(ca),
219
}
220
}
221
222
fn lst_median(&self) -> Series {
223
let ca = self.as_list();
224
dispersion::median_with_nulls(ca)
225
}
226
227
fn lst_std(&self, ddof: u8) -> Series {
228
let ca = self.as_list();
229
dispersion::std_with_nulls(ca, ddof)
230
}
231
232
fn lst_var(&self, ddof: u8) -> PolarsResult<Series> {
233
let ca = self.as_list();
234
dispersion::var_with_nulls(ca, ddof)
235
}
236
237
fn same_type(&self, out: ListChunked) -> ListChunked {
238
let ca = self.as_list();
239
let dtype = ca.dtype();
240
if out.dtype() != dtype {
241
out.cast(ca.dtype()).unwrap().list().unwrap().clone()
242
} else {
243
out
244
}
245
}
246
247
fn lst_sort(&self, options: SortOptions) -> PolarsResult<ListChunked> {
248
let ca = self.as_list();
249
// SAFETY: `sort_with`` doesn't change the dtype
250
let out = unsafe { ca.try_apply_amortized_same_type(|s| s.as_ref().sort_with(options))? };
251
Ok(self.same_type(out))
252
}
253
254
#[must_use]
255
fn lst_reverse(&self) -> ListChunked {
256
let ca = self.as_list();
257
// SAFETY: `reverse` doesn't change the dtype
258
unsafe { ca.apply_amortized_same_type(|s| s.as_ref().reverse()) }
259
}
260
261
fn lst_n_unique(&self) -> PolarsResult<IdxCa> {
262
let ca = self.as_list();
263
ca.try_apply_amortized_generic(|s| {
264
let opt_v = s.map(|s| s.as_ref().n_unique()).transpose()?;
265
Ok(opt_v.map(|idx| idx as IdxSize))
266
})
267
}
268
269
fn lst_unique(&self) -> PolarsResult<ListChunked> {
270
let ca = self.as_list();
271
// SAFETY: `unique` doesn't change the dtype
272
let out = unsafe { ca.try_apply_amortized_same_type(|s| s.as_ref().unique())? };
273
Ok(self.same_type(out))
274
}
275
276
fn lst_unique_stable(&self) -> PolarsResult<ListChunked> {
277
let ca = self.as_list();
278
// SAFETY: `unique_stable` doesn't change the dtype
279
let out = unsafe { ca.try_apply_amortized_same_type(|s| s.as_ref().unique_stable())? };
280
Ok(self.same_type(out))
281
}
282
283
fn lst_arg_min(&self) -> IdxCa {
284
let ca = self.as_list();
285
ca.apply_amortized_generic(|opt_s| {
286
opt_s.and_then(|s| s.as_ref().arg_min().map(|idx| idx as IdxSize))
287
})
288
}
289
290
fn lst_arg_max(&self) -> IdxCa {
291
let ca = self.as_list();
292
ca.apply_amortized_generic(|opt_s| {
293
opt_s.and_then(|s| s.as_ref().arg_max().map(|idx| idx as IdxSize))
294
})
295
}
296
297
#[cfg(feature = "diff")]
298
fn lst_diff(&self, n: i64, null_behavior: NullBehavior) -> PolarsResult<ListChunked> {
299
let ca = self.as_list();
300
ca.try_apply_amortized(|s| diff(s.as_ref(), n, null_behavior))
301
}
302
303
fn lst_shift(&self, periods: &Column) -> PolarsResult<ListChunked> {
304
let ca = self.as_list();
305
let periods_s = periods.cast(&DataType::Int64)?;
306
let periods = periods_s.i64()?;
307
308
polars_ensure!(
309
ca.len() == periods.len() || ca.len() == 1 || periods.len() == 1,
310
length_mismatch = "list.shift",
311
ca.len(),
312
periods.len()
313
);
314
315
// Broadcast `self`
316
let mut ca = Cow::Borrowed(ca);
317
if ca.len() == 1 && periods.len() != 1 {
318
// Optimize: Don't broadcast and instead have a special path.
319
ca = Cow::Owned(ca.new_from_index(0, periods.len()));
320
}
321
let ca = ca.as_ref();
322
323
let out = match periods.len() {
324
1 => {
325
if let Some(periods) = periods.get(0) {
326
// SAFETY: `shift` doesn't change the dtype
327
unsafe { ca.apply_amortized_same_type(|s| s.as_ref().shift(periods)) }
328
} else {
329
ListChunked::full_null_with_dtype(ca.name().clone(), ca.len(), ca.inner_dtype())
330
}
331
},
332
_ => ca.zip_and_apply_amortized(periods, |opt_s, opt_periods| {
333
match (opt_s, opt_periods) {
334
(Some(s), Some(periods)) => Some(s.as_ref().shift(periods)),
335
_ => None,
336
}
337
}),
338
};
339
Ok(self.same_type(out))
340
}
341
342
fn lst_slice(&self, offset: i64, length: usize) -> ListChunked {
343
let ca = self.as_list();
344
// SAFETY: `slice` doesn't change the dtype
345
unsafe { ca.apply_amortized_same_type(|s| s.as_ref().slice(offset, length)) }
346
}
347
348
fn lst_lengths(&self) -> IdxCa {
349
let ca = self.as_list();
350
351
let ca_validity = ca.rechunk_validity();
352
353
if ca_validity.as_ref().is_some_and(|x| x.set_bits() == 0) {
354
return IdxCa::full_null(ca.name().clone(), ca.len());
355
}
356
357
let mut lengths = Vec::with_capacity(ca.len());
358
ca.downcast_iter().for_each(|arr| {
359
let offsets = arr.offsets().as_slice();
360
let mut last = offsets[0];
361
for o in &offsets[1..] {
362
lengths.push((*o - last) as IdxSize);
363
last = *o;
364
}
365
});
366
367
let arr = IdxArr::from_vec(lengths).with_validity(ca_validity);
368
IdxCa::with_chunk(ca.name().clone(), arr)
369
}
370
371
/// Get the value by index in the sublists.
372
/// So index `0` would return the first item of every sublist
373
/// and index `-1` would return the last item of every sublist
374
/// if an index is out of bounds, it will return a `None`.
375
fn lst_get(&self, idx: i64, null_on_oob: bool) -> PolarsResult<Series> {
376
let ca = self.as_list();
377
if !null_on_oob && ca.downcast_iter().any(|arr| index_is_oob(arr, idx)) {
378
polars_bail!(ComputeError: "get index is out of bounds");
379
}
380
381
let chunks = ca
382
.downcast_iter()
383
.map(|arr| sublist_get(arr, idx))
384
.collect::<Vec<_>>();
385
386
let s = Series::try_from((ca.name().clone(), chunks)).unwrap();
387
// SAFETY: every element in list has dtype equal to its inner type
388
unsafe { s.from_physical_unchecked(ca.inner_dtype()) }
389
}
390
391
#[cfg(feature = "list_gather")]
392
fn lst_gather_every(&self, n: &IdxCa, offset: &IdxCa) -> PolarsResult<Series> {
393
let list_ca = self.as_list();
394
let out = match (n.len(), offset.len()) {
395
(1, 1) => match (n.get(0), offset.get(0)) {
396
(Some(n), Some(offset)) => unsafe {
397
// SAFETY: `gather_every` doesn't change the dtype
398
list_ca.try_apply_amortized_same_type(|s| {
399
s.as_ref().gather_every(n as usize, offset as usize)
400
})?
401
},
402
_ => ListChunked::full_null_with_dtype(
403
list_ca.name().clone(),
404
list_ca.len(),
405
list_ca.inner_dtype(),
406
),
407
},
408
(1, len_offset) if len_offset == list_ca.len() => {
409
if let Some(n) = n.get(0) {
410
list_ca.try_zip_and_apply_amortized(offset, |opt_s, opt_offset| {
411
match (opt_s, opt_offset) {
412
(Some(s), Some(offset)) => {
413
Ok(Some(s.as_ref().gather_every(n as usize, offset as usize)?))
414
},
415
_ => Ok(None),
416
}
417
})?
418
} else {
419
ListChunked::full_null_with_dtype(
420
list_ca.name().clone(),
421
list_ca.len(),
422
list_ca.inner_dtype(),
423
)
424
}
425
},
426
(len_n, 1) if len_n == list_ca.len() => {
427
if let Some(offset) = offset.get(0) {
428
list_ca.try_zip_and_apply_amortized(n, |opt_s, opt_n| match (opt_s, opt_n) {
429
(Some(s), Some(n)) => {
430
Ok(Some(s.as_ref().gather_every(n as usize, offset as usize)?))
431
},
432
_ => Ok(None),
433
})?
434
} else {
435
ListChunked::full_null_with_dtype(
436
list_ca.name().clone(),
437
list_ca.len(),
438
list_ca.inner_dtype(),
439
)
440
}
441
},
442
(len_n, len_offset) if len_n == len_offset && len_n == list_ca.len() => list_ca
443
.try_binary_zip_and_apply_amortized(
444
n,
445
offset,
446
|opt_s, opt_n, opt_offset| match (opt_s, opt_n, opt_offset) {
447
(Some(s), Some(n), Some(offset)) => {
448
Ok(Some(s.as_ref().gather_every(n as usize, offset as usize)?))
449
},
450
_ => Ok(None),
451
},
452
)?,
453
_ => {
454
polars_bail!(ComputeError: "The lengths of `n` and `offset` should be 1 or equal to the length of list.")
455
},
456
};
457
Ok(out.into_series())
458
}
459
460
#[cfg(feature = "list_gather")]
461
fn lst_gather(&self, idx: &Series, null_on_oob: bool) -> PolarsResult<Series> {
462
let list_ca = self.as_list();
463
let idx_ca = idx.list()?;
464
465
polars_ensure!(
466
idx_ca.inner_dtype().is_integer(),
467
ComputeError: "cannot use dtype `{}` as an index", idx_ca.inner_dtype()
468
);
469
470
let index_typed_index = |idx: &Series| {
471
let idx = idx.cast(&IDX_DTYPE).unwrap();
472
{
473
list_ca
474
.amortized_iter()
475
.map(|s| {
476
s.map(|s| {
477
let s = s.as_ref();
478
take_series(s, idx.clone(), null_on_oob)
479
})
480
.transpose()
481
})
482
.collect::<PolarsResult<ListChunked>>()
483
.map(|mut ca| {
484
ca.rename(list_ca.name().clone());
485
ca.into_series()
486
})
487
}
488
};
489
490
match (list_ca.len(), idx_ca.len()) {
491
(1, _) => {
492
let mut out = if list_ca.has_nulls() {
493
ListChunked::full_null_with_dtype(
494
PlSmallStr::EMPTY,
495
idx.len(),
496
list_ca.inner_dtype(),
497
)
498
} else {
499
let s = list_ca.explode(ExplodeOptions {
500
empty_as_null: true,
501
keep_nulls: true,
502
})?;
503
idx_ca
504
.into_iter()
505
.map(|opt_idx| {
506
opt_idx
507
.map(|idx| take_series(&s, idx, null_on_oob))
508
.transpose()
509
})
510
.collect::<PolarsResult<ListChunked>>()?
511
};
512
out.rename(list_ca.name().clone());
513
Ok(out.into_series())
514
},
515
(_, 1) => {
516
let idx_ca = idx_ca.explode(ExplodeOptions {
517
empty_as_null: true,
518
keep_nulls: true,
519
})?;
520
521
use DataType as D;
522
match idx_ca.dtype() {
523
D::UInt32 | D::UInt64 => index_typed_index(&idx_ca),
524
dt if dt.is_signed_integer() => {
525
if let Some(min) = idx_ca.min::<i64>().unwrap() {
526
if min >= 0 {
527
index_typed_index(&idx_ca)
528
} else {
529
let mut out = {
530
list_ca
531
.amortized_iter()
532
.map(|opt_s| {
533
opt_s
534
.map(|s| {
535
take_series(
536
s.as_ref(),
537
idx_ca.clone(),
538
null_on_oob,
539
)
540
})
541
.transpose()
542
})
543
.collect::<PolarsResult<ListChunked>>()?
544
};
545
out.rename(list_ca.name().clone());
546
Ok(out.into_series())
547
}
548
} else {
549
polars_bail!(ComputeError: "all indices are null");
550
}
551
},
552
dt => polars_bail!(ComputeError: "cannot use dtype `{dt}` as an index"),
553
}
554
},
555
(a, b) if a == b => {
556
let mut out = {
557
list_ca
558
.amortized_iter()
559
.zip(idx_ca)
560
.map(|(opt_s, opt_idx)| {
561
{
562
match (opt_s, opt_idx) {
563
(Some(s), Some(idx)) => {
564
Some(take_series(s.as_ref(), idx, null_on_oob))
565
},
566
_ => None,
567
}
568
}
569
.transpose()
570
})
571
.collect::<PolarsResult<ListChunked>>()?
572
};
573
out.rename(list_ca.name().clone());
574
Ok(out.into_series())
575
},
576
(a, b) => polars_bail!(length_mismatch = "list.gather", a, b),
577
}
578
}
579
580
#[cfg(feature = "list_drop_nulls")]
581
fn lst_drop_nulls(&self) -> ListChunked {
582
let list_ca = self.as_list();
583
584
// SAFETY: `drop_nulls` doesn't change the dtype
585
unsafe { list_ca.apply_amortized_same_type(|s| s.as_ref().drop_nulls()) }
586
}
587
588
#[cfg(feature = "list_sample")]
589
fn lst_sample_n(
590
&self,
591
n: &Series,
592
with_replacement: bool,
593
shuffle: bool,
594
seed: Option<u64>,
595
) -> PolarsResult<ListChunked> {
596
use std::borrow::Cow;
597
598
let ca = self.as_list();
599
600
let n_s = n.strict_cast(&IDX_DTYPE)?;
601
let n = n_s.idx()?;
602
603
polars_ensure!(
604
ca.len() == n.len() || ca.len() == 1 || n.len() == 1,
605
length_mismatch = "list.sample(n)",
606
ca.len(),
607
n.len()
608
);
609
610
// Broadcast `self`
611
let mut ca = Cow::Borrowed(ca);
612
if ca.len() == 1 && n.len() != 1 {
613
// Optimize: Don't broadcast and instead have a special path.
614
ca = Cow::Owned(ca.new_from_index(0, n.len()));
615
}
616
let ca = ca.as_ref();
617
618
let out = match n.len() {
619
1 => {
620
if let Some(n) = n.get(0) {
621
unsafe {
622
// SAFETY: `sample_n` doesn't change the dtype
623
ca.try_apply_amortized_same_type(|s| {
624
s.as_ref()
625
.sample_n(n as usize, with_replacement, shuffle, seed)
626
})
627
}
628
} else {
629
Ok(ListChunked::full_null_with_dtype(
630
ca.name().clone(),
631
ca.len(),
632
ca.inner_dtype(),
633
))
634
}
635
},
636
_ => ca.try_zip_and_apply_amortized(n, |opt_s, opt_n| match (opt_s, opt_n) {
637
(Some(s), Some(n)) => s
638
.as_ref()
639
.sample_n(n as usize, with_replacement, shuffle, seed)
640
.map(Some),
641
_ => Ok(None),
642
}),
643
};
644
out.map(|ok| self.same_type(ok))
645
}
646
647
#[cfg(feature = "list_sample")]
648
fn lst_sample_fraction(
649
&self,
650
fraction: &Series,
651
with_replacement: bool,
652
shuffle: bool,
653
seed: Option<u64>,
654
) -> PolarsResult<ListChunked> {
655
use std::borrow::Cow;
656
657
let ca = self.as_list();
658
659
let fraction_s = fraction.cast(&DataType::Float64)?;
660
let fraction = fraction_s.f64()?;
661
662
polars_ensure!(
663
ca.len() == fraction.len() || ca.len() == 1 || fraction.len() == 1,
664
length_mismatch = "list.sample(fraction)",
665
ca.len(),
666
fraction.len()
667
);
668
669
// Broadcast `self`
670
let mut ca = Cow::Borrowed(ca);
671
if ca.len() == 1 && fraction.len() != 1 {
672
// Optimize: Don't broadcast and instead have a special path.
673
ca = Cow::Owned(ca.new_from_index(0, fraction.len()));
674
}
675
let ca = ca.as_ref();
676
677
let out = match fraction.len() {
678
1 => {
679
if let Some(fraction) = fraction.get(0) {
680
unsafe {
681
// SAFETY: `sample_n` doesn't change the dtype
682
ca.try_apply_amortized_same_type(|s| {
683
let n = (s.as_ref().len() as f64 * fraction) as usize;
684
s.as_ref().sample_n(n, with_replacement, shuffle, seed)
685
})
686
}
687
} else {
688
Ok(ListChunked::full_null_with_dtype(
689
ca.name().clone(),
690
ca.len(),
691
ca.inner_dtype(),
692
))
693
}
694
},
695
_ => ca.try_zip_and_apply_amortized(fraction, |opt_s, opt_n| match (opt_s, opt_n) {
696
(Some(s), Some(fraction)) => {
697
let n = (s.as_ref().len() as f64 * fraction) as usize;
698
s.as_ref()
699
.sample_n(n, with_replacement, shuffle, seed)
700
.map(Some)
701
},
702
_ => Ok(None),
703
}),
704
};
705
out.map(|ok| self.same_type(ok))
706
}
707
708
fn lst_concat(&self, other: &[Column]) -> PolarsResult<ListChunked> {
709
let ca = self.as_list();
710
let other_len = other.len();
711
let length = ca.len();
712
let mut other = other.to_vec();
713
let mut inner_super_type = ca.inner_dtype().clone();
714
715
for s in &other {
716
match s.dtype() {
717
DataType::List(inner_type) => {
718
inner_super_type = try_get_supertype(&inner_super_type, inner_type)?;
719
},
720
dt => {
721
inner_super_type = try_get_supertype(&inner_super_type, dt)?;
722
},
723
}
724
}
725
726
// cast lhs
727
let dtype = &DataType::List(Box::new(inner_super_type.clone()));
728
let ca = ca.cast(dtype)?;
729
let ca = ca.list().unwrap();
730
731
// broadcasting path in case all unit length
732
// this path will not expand the series, so saves memory
733
let out = if other.iter().all(|s| s.len() == 1) && ca.len() != 1 {
734
cast_rhs(&mut other, &inner_super_type, dtype, length, false)?;
735
let to_append = other
736
.iter()
737
.filter_map(|s| {
738
let lst = s.list().unwrap();
739
// SAFETY: previous rhs_cast ensures the type is correct
740
unsafe {
741
lst.get_as_series(0)
742
.map(|s| s.from_physical_unchecked(&inner_super_type).unwrap())
743
}
744
})
745
.collect::<Vec<_>>();
746
747
// there was a None, so all values will be None
748
if to_append.len() != other_len {
749
return Ok(ListChunked::full_null_with_dtype(
750
ca.name().clone(),
751
length,
752
&inner_super_type,
753
));
754
}
755
756
let vals_size_other = other
757
.iter()
758
.map(|s| s.list().unwrap().get_values_size())
759
.sum::<usize>();
760
761
let mut builder = get_list_builder(
762
&inner_super_type,
763
ca.get_values_size() + vals_size_other + 1,
764
length,
765
ca.name().clone(),
766
);
767
ca.into_iter().for_each(|opt_s| {
768
let opt_s = opt_s.map(|mut s| {
769
for append in &to_append {
770
s.append(append).unwrap();
771
}
772
match inner_super_type {
773
// structs don't have chunks, so we must first rechunk the underlying series
774
#[cfg(feature = "dtype-struct")]
775
DataType::Struct(_) => s = s.rechunk(),
776
// nothing
777
_ => {},
778
}
779
s
780
});
781
builder.append_opt_series(opt_s.as_ref()).unwrap();
782
});
783
builder.finish()
784
} else {
785
// normal path which may contain same length list or unit length lists
786
cast_rhs(&mut other, &inner_super_type, dtype, length, true)?;
787
788
let vals_size_other = other
789
.iter()
790
.map(|s| s.list().unwrap().get_values_size())
791
.sum::<usize>();
792
let mut iters = Vec::with_capacity(other_len + 1);
793
794
for s in other.iter_mut() {
795
iters.push(s.list()?.amortized_iter())
796
}
797
let mut first_iter: Box<dyn PolarsIterator<Item = Option<Series>>> = ca.into_iter();
798
let mut builder = get_list_builder(
799
&inner_super_type,
800
ca.get_values_size() + vals_size_other + 1,
801
length,
802
ca.name().clone(),
803
);
804
805
for _ in 0..ca.len() {
806
let mut acc = match first_iter.next().unwrap() {
807
Some(s) => s,
808
None => {
809
builder.append_null();
810
// make sure that the iterators advance before we continue
811
for it in &mut iters {
812
it.next().unwrap();
813
}
814
continue;
815
},
816
};
817
818
let mut has_nulls = false;
819
for it in &mut iters {
820
match it.next().unwrap() {
821
Some(s) => {
822
if !has_nulls {
823
acc.append(s.as_ref())?;
824
}
825
},
826
None => {
827
has_nulls = true;
828
},
829
}
830
}
831
if has_nulls {
832
builder.append_null();
833
continue;
834
}
835
836
match inner_super_type {
837
// structs don't have chunks, so we must first rechunk the underlying series
838
#[cfg(feature = "dtype-struct")]
839
DataType::Struct(_) => acc = acc.rechunk(),
840
// nothing
841
_ => {},
842
}
843
builder.append_series(&acc).unwrap();
844
}
845
builder.finish()
846
};
847
Ok(out)
848
}
849
}
850
851
impl ListNameSpaceImpl for ListChunked {}
852
853
#[cfg(feature = "list_gather")]
854
fn take_series(s: &Series, idx: Series, null_on_oob: bool) -> PolarsResult<Series> {
855
let len = s.len();
856
let idx = cast_index(idx, len, null_on_oob)?;
857
let idx = idx.idx().unwrap();
858
s.take(idx)
859
}
860
861
#[cfg(feature = "list_gather")]
862
fn cast_signed_index_ca<T: PolarsNumericType>(idx: &ChunkedArray<T>, len: usize) -> Series
863
where
864
T::Native: Copy + PartialOrd + PartialEq + NumCast + Signed + Zero,
865
{
866
idx.iter()
867
.map(|opt_idx| opt_idx.and_then(|idx| idx.negative_to_usize(len).map(|idx| idx as IdxSize)))
868
.collect::<IdxCa>()
869
.into_series()
870
}
871
872
#[cfg(feature = "list_gather")]
873
fn cast_unsigned_index_ca<T: PolarsNumericType>(idx: &ChunkedArray<T>, len: usize) -> Series
874
where
875
T::Native: Copy + PartialOrd + ToPrimitive,
876
{
877
idx.iter()
878
.map(|opt_idx| {
879
opt_idx.and_then(|idx| {
880
let idx = idx.to_usize().unwrap();
881
if idx >= len {
882
None
883
} else {
884
Some(idx as IdxSize)
885
}
886
})
887
})
888
.collect::<IdxCa>()
889
.into_series()
890
}
891
892
#[cfg(feature = "list_gather")]
893
fn cast_index(idx: Series, len: usize, null_on_oob: bool) -> PolarsResult<Series> {
894
let idx_null_count = idx.null_count();
895
use DataType::*;
896
let out = match idx.dtype() {
897
#[cfg(feature = "big_idx")]
898
UInt32 => {
899
if null_on_oob {
900
let a = idx.u32().unwrap();
901
cast_unsigned_index_ca(a, len)
902
} else {
903
idx.cast(&IDX_DTYPE).unwrap()
904
}
905
},
906
#[cfg(feature = "big_idx")]
907
UInt64 => {
908
if null_on_oob {
909
let a = idx.u64().unwrap();
910
cast_unsigned_index_ca(a, len)
911
} else {
912
idx
913
}
914
},
915
#[cfg(not(feature = "big_idx"))]
916
UInt64 => {
917
if null_on_oob {
918
let a = idx.u64().unwrap();
919
cast_unsigned_index_ca(a, len)
920
} else {
921
idx.cast(&IDX_DTYPE).unwrap()
922
}
923
},
924
#[cfg(not(feature = "big_idx"))]
925
UInt32 => {
926
if null_on_oob {
927
let a = idx.u32().unwrap();
928
cast_unsigned_index_ca(a, len)
929
} else {
930
idx
931
}
932
},
933
dt if dt.is_unsigned_integer() => idx.cast(&IDX_DTYPE).unwrap(),
934
Int8 => {
935
let a = idx.i8().unwrap();
936
cast_signed_index_ca(a, len)
937
},
938
Int16 => {
939
let a = idx.i16().unwrap();
940
cast_signed_index_ca(a, len)
941
},
942
Int32 => {
943
let a = idx.i32().unwrap();
944
cast_signed_index_ca(a, len)
945
},
946
Int64 => {
947
let a = idx.i64().unwrap();
948
cast_signed_index_ca(a, len)
949
},
950
_ => {
951
unreachable!()
952
},
953
};
954
polars_ensure!(
955
out.null_count() == idx_null_count || null_on_oob,
956
OutOfBounds: "gather indices are out of bounds"
957
);
958
Ok(out)
959
}
960
961
// TODO: implement the above for ArrayChunked as well?
962
963