CoCalc -- min

GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-expr/src/reduce/min_max.rs
⁶⁹⁴⁰ views
1
#![allow(unsafe_op_in_unsafe_fn)]
2
use std::borrow::Cow;
3
use std::marker::PhantomData;
4

5
use arrow::array::BooleanArray;
6
use arrow::bitmap::Bitmap;
7
use num_traits::Bounded;
8
use polars_core::with_match_physical_integer_polars_type;
9
#[cfg(feature = "propagate_nans")]
10
use polars_ops::prelude::nan_propagating_aggregate::ca_nan_agg;
11
use polars_utils::float::IsFloat;
12
use polars_utils::min_max::MinMax;
13

14
use super::*;
15

16
pub fn new_min_reduction(dtype: DataType, propagate_nans: bool) -> Box<dyn GroupedReduction> {
17
    use DataType::*;
18
    use VecMaskGroupedReduction as VMGR;
19
    match &dtype {
20
        Boolean => Box::new(BoolMinGroupedReduction::default()),
21
        #[cfg(feature = "propagate_nans")]
22
        Float32 if propagate_nans => {
23
            Box::new(VMGR::new(dtype, NumReducer::<NanMin<Float32Type>>::new()))
24
        },
25
        #[cfg(feature = "propagate_nans")]
26
        Float64 if propagate_nans => {
27
            Box::new(VMGR::new(dtype, NumReducer::<NanMin<Float64Type>>::new()))
28
        },
29
        Float32 => Box::new(VMGR::new(dtype, NumReducer::<Min<Float32Type>>::new())),
30
        Float64 => Box::new(VMGR::new(dtype, NumReducer::<Min<Float64Type>>::new())),
31
        Null => Box::new(NullGroupedReduction::default()),
32
        String | Binary => Box::new(VecGroupedReduction::new(dtype, BinaryMinReducer)),
33
        _ if dtype.is_integer() || dtype.is_temporal() || dtype.is_enum() => {
34
            with_match_physical_integer_polars_type!(dtype.to_physical(), |$T| {
35
                Box::new(VMGR::new(dtype, NumReducer::<Min<$T>>::new()))
36
            })
37
        },
38
        #[cfg(feature = "dtype-decimal")]
39
        Decimal(_, _) => Box::new(VMGR::new(dtype, NumReducer::<Min<Int128Type>>::new())),
40
        #[cfg(feature = "dtype-categorical")]
41
        Categorical(cats, map) => with_match_categorical_physical_type!(cats.physical(), |$C| {
42
            Box::new(VMGR::new(dtype.clone(), CatMinReducer::<$C>(map.clone(), PhantomData)))
43
        }),
44
        _ => unimplemented!(),
45
    }
46
}
47

48
pub fn new_max_reduction(dtype: DataType, propagate_nans: bool) -> Box<dyn GroupedReduction> {
49
    use DataType::*;
50
    use VecMaskGroupedReduction as VMGR;
51
    match &dtype {
52
        Boolean => Box::new(BoolMaxGroupedReduction::default()),
53
        #[cfg(feature = "propagate_nans")]
54
        Float32 if propagate_nans => {
55
            Box::new(VMGR::new(dtype, NumReducer::<NanMax<Float32Type>>::new()))
56
        },
57
        #[cfg(feature = "propagate_nans")]
58
        Float64 if propagate_nans => {
59
            Box::new(VMGR::new(dtype, NumReducer::<NanMax<Float64Type>>::new()))
60
        },
61
        Float32 => Box::new(VMGR::new(dtype, NumReducer::<Max<Float32Type>>::new())),
62
        Float64 => Box::new(VMGR::new(dtype, NumReducer::<Max<Float64Type>>::new())),
63
        Null => Box::new(NullGroupedReduction::default()),
64
        String | Binary => Box::new(VecGroupedReduction::new(dtype, BinaryMaxReducer)),
65
        _ if dtype.is_integer() || dtype.is_temporal() || dtype.is_enum() => {
66
            with_match_physical_integer_polars_type!(dtype.to_physical(), |$T| {
67
                Box::new(VMGR::new(dtype, NumReducer::<Max<$T>>::new()))
68
            })
69
        },
70
        #[cfg(feature = "dtype-decimal")]
71
        Decimal(_, _) => Box::new(VMGR::new(dtype, NumReducer::<Max<Int128Type>>::new())),
72
        #[cfg(feature = "dtype-categorical")]
73
        Categorical(cats, map) => with_match_categorical_physical_type!(cats.physical(), |$C| {
74
            Box::new(VMGR::new(dtype.clone(), CatMaxReducer::<$C>(map.clone(), PhantomData)))
75
        }),
76
        _ => unimplemented!(),
77
    }
78
}
79

80
// These two variants ignore nans.
81
struct Min<T>(PhantomData<T>);
82
struct Max<T>(PhantomData<T>);
83

84
// These two variants propagate nans.
85
#[cfg(feature = "propagate_nans")]
86
struct NanMin<T>(PhantomData<T>);
87
#[cfg(feature = "propagate_nans")]
88
struct NanMax<T>(PhantomData<T>);
89

90
impl<T> NumericReduction for Min<T>
91
where
92
    T: PolarsNumericType,
93
    ChunkedArray<T>: ChunkAgg<T::Native>,
94
{
95
    type Dtype = T;
96

97
    #[inline(always)]
98
    fn init() -> T::Native {
99
        if T::Native::is_float() {
100
            T::Native::nan_value()
101
        } else {
102
            T::Native::max_value()
103
        }
104
    }
105

106
    #[inline(always)]
107
    fn combine(a: T::Native, b: T::Native) -> T::Native {
108
        MinMax::min_ignore_nan(a, b)
109
    }
110

111
    #[inline(always)]
112
    fn reduce_ca(ca: &ChunkedArray<T>) -> Option<T::Native> {
113
        ChunkAgg::min(ca)
114
    }
115
}
116

117
impl<T> NumericReduction for Max<T>
118
where
119
    T: PolarsNumericType,
120
    ChunkedArray<T>: ChunkAgg<T::Native>,
121
{
122
    type Dtype = T;
123

124
    #[inline(always)]
125
    fn init() -> T::Native {
126
        if T::Native::is_float() {
127
            T::Native::nan_value()
128
        } else {
129
            T::Native::min_value()
130
        }
131
    }
132

133
    #[inline(always)]
134
    fn combine(a: T::Native, b: T::Native) -> T::Native {
135
        MinMax::max_ignore_nan(a, b)
136
    }
137

138
    #[inline(always)]
139
    fn reduce_ca(ca: &ChunkedArray<T>) -> Option<T::Native> {
140
        ChunkAgg::max(ca)
141
    }
142
}
143

144
#[cfg(feature = "propagate_nans")]
145
impl<T: PolarsFloatType> NumericReduction for NanMin<T> {
146
    type Dtype = T;
147

148
    #[inline(always)]
149
    fn init() -> T::Native {
150
        T::Native::max_value()
151
    }
152

153
    #[inline(always)]
154
    fn combine(a: T::Native, b: T::Native) -> T::Native {
155
        MinMax::min_propagate_nan(a, b)
156
    }
157

158
    #[inline(always)]
159
    fn reduce_ca(ca: &ChunkedArray<T>) -> Option<T::Native> {
160
        ca_nan_agg(ca, MinMax::min_propagate_nan)
161
    }
162
}
163

164
#[cfg(feature = "propagate_nans")]
165
impl<T: PolarsFloatType> NumericReduction for NanMax<T> {
166
    type Dtype = T;
167

168
    #[inline(always)]
169
    fn init() -> T::Native {
170
        T::Native::min_value()
171
    }
172

173
    #[inline(always)]
174
    fn combine(a: T::Native, b: T::Native) -> T::Native {
175
        MinMax::max_propagate_nan(a, b)
176
    }
177

178
    #[inline(always)]
179
    fn reduce_ca(ca: &ChunkedArray<T>) -> Option<T::Native> {
180
        ca_nan_agg(ca, MinMax::max_propagate_nan)
181
    }
182
}
183

184
#[derive(Clone)]
185
struct BinaryMinReducer;
186
#[derive(Clone)]
187
struct BinaryMaxReducer;
188

189
impl Reducer for BinaryMinReducer {
190
    type Dtype = BinaryType;
191
    type Value = Option<Vec<u8>>; // TODO: evaluate SmallVec<u8>.
192

193
    fn init(&self) -> Self::Value {
194
        None
195
    }
196

197
    #[inline(always)]
198
    fn cast_series<'a>(&self, s: &'a Series) -> Cow<'a, Series> {
199
        Cow::Owned(s.cast(&DataType::Binary).unwrap())
200
    }
201

202
    fn combine(&self, a: &mut Self::Value, b: &Self::Value) {
203
        self.reduce_one(a, b.as_deref(), 0)
204
    }
205

206
    fn reduce_one(&self, a: &mut Self::Value, b: Option<&[u8]>, _seq_id: u64) {
207
        match (a, b) {
208
            (_, None) => {},
209
            (l @ None, Some(r)) => *l = Some(r.to_owned()),
210
            (Some(l), Some(r)) => {
211
                if l.as_slice() > r {
212
                    l.clear();
213
                    l.extend_from_slice(r);
214
                }
215
            },
216
        }
217
    }
218

219
    fn reduce_ca(&self, v: &mut Self::Value, ca: &BinaryChunked, _seq_id: u64) {
220
        self.reduce_one(v, ca.min_binary(), 0)
221
    }
222

223
    fn finish(
224
        &self,
225
        v: Vec<Self::Value>,
226
        m: Option<Bitmap>,
227
        dtype: &DataType,
228
    ) -> PolarsResult<Series> {
229
        assert!(m.is_none()); // This should only be used with VecGroupedReduction.
230
        let ca: BinaryChunked = v.into_iter().collect_ca(PlSmallStr::EMPTY);
231
        ca.into_series().cast(dtype)
232
    }
233
}
234

235
impl Reducer for BinaryMaxReducer {
236
    type Dtype = BinaryType;
237
    type Value = Option<Vec<u8>>; // TODO: evaluate SmallVec<u8>.
238

239
    #[inline(always)]
240
    fn init(&self) -> Self::Value {
241
        None
242
    }
243

244
    #[inline(always)]
245
    fn cast_series<'a>(&self, s: &'a Series) -> Cow<'a, Series> {
246
        Cow::Owned(s.cast(&DataType::Binary).unwrap())
247
    }
248

249
    #[inline(always)]
250
    fn combine(&self, a: &mut Self::Value, b: &Self::Value) {
251
        self.reduce_one(a, b.as_deref(), 0)
252
    }
253

254
    #[inline(always)]
255
    fn reduce_one(&self, a: &mut Self::Value, b: Option<&[u8]>, _seq_id: u64) {
256
        match (a, b) {
257
            (_, None) => {},
258
            (l @ None, Some(r)) => *l = Some(r.to_owned()),
259
            (Some(l), Some(r)) => {
260
                if l.as_slice() < r {
261
                    l.clear();
262
                    l.extend_from_slice(r);
263
                }
264
            },
265
        }
266
    }
267

268
    #[inline(always)]
269
    fn reduce_ca(&self, v: &mut Self::Value, ca: &BinaryChunked, _seq_id: u64) {
270
        self.reduce_one(v, ca.max_binary(), 0)
271
    }
272

273
    #[inline(always)]
274
    fn finish(
275
        &self,
276
        v: Vec<Self::Value>,
277
        m: Option<Bitmap>,
278
        dtype: &DataType,
279
    ) -> PolarsResult<Series> {
280
        assert!(m.is_none()); // This should only be used with VecGroupedReduction.
281
        let ca: BinaryChunked = v.into_iter().collect_ca(PlSmallStr::EMPTY);
282
        ca.into_series().cast(dtype)
283
    }
284
}
285

286
#[derive(Default)]
287
pub struct BoolMinGroupedReduction {
288
    values: MutableBitmap,
289
    mask: MutableBitmap,
290
    evicted_values: BitmapBuilder,
291
    evicted_mask: BitmapBuilder,
292
}
293

294
impl GroupedReduction for BoolMinGroupedReduction {
295
    fn new_empty(&self) -> Box<dyn GroupedReduction> {
296
        Box::new(Self::default())
297
    }
298

299
    fn reserve(&mut self, additional: usize) {
300
        self.values.reserve(additional);
301
        self.mask.reserve(additional)
302
    }
303

304
    fn resize(&mut self, num_groups: IdxSize) {
305
        self.values.resize(num_groups as usize, true);
306
        self.mask.resize(num_groups as usize, false);
307
    }
308

309
    fn update_group(
310
        &mut self,
311
        values: &Column,
312
        group_idx: IdxSize,
313
        _seq_id: u64,
314
    ) -> PolarsResult<()> {
315
        // TODO: we should really implement a sum-as-other-type operation instead
316
        // of doing this materialized cast.
317
        assert!(values.dtype() == &DataType::Boolean);
318
        let values = values.as_materialized_series_maintain_scalar();
319
        let ca: &BooleanChunked = values.as_ref().as_ref();
320
        if !ca.all() {
321
            self.values.set(group_idx as usize, false);
322
        }
323
        if ca.len() != ca.null_count() {
324
            self.mask.set(group_idx as usize, true);
325
        }
326
        Ok(())
327
    }
328

329
    unsafe fn update_groups_while_evicting(
330
        &mut self,
331
        values: &Column,
332
        subset: &[IdxSize],
333
        group_idxs: &[EvictIdx],
334
        _seq_id: u64,
335
    ) -> PolarsResult<()> {
336
        assert!(values.dtype() == &DataType::Boolean);
337
        assert!(subset.len() == group_idxs.len());
338
        let values = values.as_materialized_series(); // @scalar-opt
339
        let ca: &BooleanChunked = values.as_ref().as_ref();
340
        let arr = ca.downcast_as_array();
341
        unsafe {
342
            // SAFETY: indices are in-bounds guaranteed by trait.
343
            for (i, g) in subset.iter().zip(group_idxs) {
344
                let ov = arr.get_unchecked(*i as usize);
345
                if g.should_evict() {
346
                    self.evicted_values.push(self.values.get_unchecked(g.idx()));
347
                    self.evicted_mask.push(self.mask.get_unchecked(g.idx()));
348
                    self.values.set_unchecked(g.idx(), ov.unwrap_or(true));
349
                    self.mask.set_unchecked(g.idx(), ov.is_some());
350
                } else {
351
                    self.values.and_pos_unchecked(g.idx(), ov.unwrap_or(true));
352
                    self.mask.or_pos_unchecked(g.idx(), ov.is_some());
353
                }
354
            }
355
        }
356
        Ok(())
357
    }
358

359
    unsafe fn combine_subset(
360
        &mut self,
361
        other: &dyn GroupedReduction,
362
        subset: &[IdxSize],
363
        group_idxs: &[IdxSize],
364
    ) -> PolarsResult<()> {
365
        let other = other.as_any().downcast_ref::<Self>().unwrap();
366
        assert!(subset.len() == group_idxs.len());
367
        unsafe {
368
            // SAFETY: indices are in-bounds guaranteed by trait.
369
            for (i, g) in subset.iter().zip(group_idxs) {
370
                self.values
371
                    .and_pos_unchecked(*g as usize, other.values.get_unchecked(*i as usize));
372
                self.mask
373
                    .or_pos_unchecked(*g as usize, other.mask.get_unchecked(*i as usize));
374
            }
375
        }
376
        Ok(())
377
    }
378

379
    fn take_evictions(&mut self) -> Box<dyn GroupedReduction> {
380
        Box::new(Self {
381
            values: core::mem::take(&mut self.evicted_values).into_mut(),
382
            mask: core::mem::take(&mut self.evicted_mask).into_mut(),
383
            evicted_values: BitmapBuilder::new(),
384
            evicted_mask: BitmapBuilder::new(),
385
        })
386
    }
387

388
    fn finalize(&mut self) -> PolarsResult<Series> {
389
        let v = core::mem::take(&mut self.values);
390
        let m = core::mem::take(&mut self.mask);
391
        let arr = BooleanArray::from(v.freeze()).with_validity(Some(m.freeze()));
392
        Ok(Series::from_array(PlSmallStr::EMPTY, arr))
393
    }
394

395
    fn as_any(&self) -> &dyn Any {
396
        self
397
    }
398
}
399

400
#[derive(Default)]
401
pub struct BoolMaxGroupedReduction {
402
    values: MutableBitmap,
403
    mask: MutableBitmap,
404
    evicted_values: BitmapBuilder,
405
    evicted_mask: BitmapBuilder,
406
}
407

408
impl GroupedReduction for BoolMaxGroupedReduction {
409
    fn new_empty(&self) -> Box<dyn GroupedReduction> {
410
        Box::new(Self::default())
411
    }
412

413
    fn reserve(&mut self, additional: usize) {
414
        self.values.reserve(additional);
415
        self.mask.reserve(additional)
416
    }
417

418
    fn resize(&mut self, num_groups: IdxSize) {
419
        self.values.resize(num_groups as usize, false);
420
        self.mask.resize(num_groups as usize, false);
421
    }
422

423
    fn update_group(
424
        &mut self,
425
        values: &Column,
426
        group_idx: IdxSize,
427
        _seq_id: u64,
428
    ) -> PolarsResult<()> {
429
        // TODO: we should really implement a sum-as-other-type operation instead
430
        // of doing this materialized cast.
431
        assert!(values.dtype() == &DataType::Boolean);
432
        let values = values.as_materialized_series_maintain_scalar();
433
        let ca: &BooleanChunked = values.as_ref().as_ref();
434
        if ca.any() {
435
            self.values.set(group_idx as usize, true);
436
        }
437
        if ca.len() != ca.null_count() {
438
            self.mask.set(group_idx as usize, true);
439
        }
440
        Ok(())
441
    }
442

443
    unsafe fn update_groups_while_evicting(
444
        &mut self,
445
        values: &Column,
446
        subset: &[IdxSize],
447
        group_idxs: &[EvictIdx],
448
        _seq_id: u64,
449
    ) -> PolarsResult<()> {
450
        assert!(values.dtype() == &DataType::Boolean);
451
        assert!(subset.len() == group_idxs.len());
452
        let values = values.as_materialized_series(); // @scalar-opt
453
        let ca: &BooleanChunked = values.as_ref().as_ref();
454
        let arr = ca.downcast_as_array();
455
        unsafe {
456
            // SAFETY: indices are in-bounds guaranteed by trait.
457
            for (i, g) in subset.iter().zip(group_idxs) {
458
                let ov = arr.get_unchecked(*i as usize);
459
                if g.should_evict() {
460
                    self.evicted_values.push(self.values.get_unchecked(g.idx()));
461
                    self.evicted_mask.push(self.mask.get_unchecked(g.idx()));
462
                    self.values.set_unchecked(g.idx(), ov.unwrap_or(false));
463
                    self.mask.set_unchecked(g.idx(), ov.is_some());
464
                } else {
465
                    self.values.or_pos_unchecked(g.idx(), ov.unwrap_or(false));
466
                    self.mask.or_pos_unchecked(g.idx(), ov.is_some());
467
                }
468
            }
469
        }
470
        Ok(())
471
    }
472

473
    unsafe fn combine_subset(
474
        &mut self,
475
        other: &dyn GroupedReduction,
476
        subset: &[IdxSize],
477
        group_idxs: &[IdxSize],
478
    ) -> PolarsResult<()> {
479
        let other = other.as_any().downcast_ref::<Self>().unwrap();
480
        assert!(subset.len() == group_idxs.len());
481
        unsafe {
482
            // SAFETY: indices are in-bounds guaranteed by trait.
483
            for (i, g) in subset.iter().zip(group_idxs) {
484
                self.values
485
                    .or_pos_unchecked(*g as usize, other.values.get_unchecked(*i as usize));
486
                self.mask
487
                    .or_pos_unchecked(*g as usize, other.mask.get_unchecked(*i as usize));
488
            }
489
        }
490
        Ok(())
491
    }
492

493
    fn take_evictions(&mut self) -> Box<dyn GroupedReduction> {
494
        Box::new(Self {
495
            values: core::mem::take(&mut self.evicted_values).into_mut(),
496
            mask: core::mem::take(&mut self.evicted_mask).into_mut(),
497
            evicted_values: BitmapBuilder::new(),
498
            evicted_mask: BitmapBuilder::new(),
499
        })
500
    }
501

502
    fn finalize(&mut self) -> PolarsResult<Series> {
503
        let v = core::mem::take(&mut self.values);
504
        let m = core::mem::take(&mut self.mask);
505
        let arr = BooleanArray::from(v.freeze()).with_validity(Some(m.freeze()));
506
        Ok(Series::from_array(PlSmallStr::EMPTY, arr))
507
    }
508

509
    fn as_any(&self) -> &dyn Any {
510
        self
511
    }
512
}
513

514
#[cfg(feature = "dtype-categorical")]
515
struct CatMinReducer<T>(Arc<CategoricalMapping>, PhantomData<T>);
516

517
#[cfg(feature = "dtype-categorical")]
518
impl<T> Clone for CatMinReducer<T> {
519
    fn clone(&self) -> Self {
520
        Self(self.0.clone(), PhantomData)
521
    }
522
}
523

524
#[cfg(feature = "dtype-categorical")]
525
impl<T: PolarsCategoricalType> Reducer for CatMinReducer<T> {
526
    type Dtype = T::PolarsPhysical;
527
    type Value = T::Native;
528

529
    fn init(&self) -> Self::Value {
530
        T::Native::max_value() // Ensures it's invalid, preferring the other value.
531
    }
532

533
    #[inline(always)]
534
    fn cast_series<'a>(&self, s: &'a Series) -> Cow<'a, Series> {
535
        s.to_physical_repr()
536
    }
537

538
    fn combine(&self, a: &mut Self::Value, b: &Self::Value) {
539
        let Some(b_s) = self.0.cat_to_str(b.as_cat()) else {
540
            return;
541
        };
542
        let Some(a_s) = self.0.cat_to_str(a.as_cat()) else {
543
            *a = *b;
544
            return;
545
        };
546

547
        if b_s < a_s {
548
            *a = *b;
549
        }
550
    }
551

552
    fn reduce_one(&self, a: &mut Self::Value, b: Option<Self::Value>, _seq_id: u64) {
553
        if let Some(b) = b {
554
            self.combine(a, &b);
555
        }
556
    }
557

558
    fn reduce_ca(&self, v: &mut Self::Value, ca: &ChunkedArray<T::PolarsPhysical>, _seq_id: u64) {
559
        for cat in ca.iter().flatten() {
560
            self.combine(v, &cat);
561
        }
562
    }
563

564
    fn finish(
565
        &self,
566
        v: Vec<Self::Value>,
567
        m: Option<Bitmap>,
568
        dtype: &DataType,
569
    ) -> PolarsResult<Series> {
570
        let cat_ids = PrimitiveArray::from_vec(v).with_validity(m);
571
        let cat_ids = ChunkedArray::from(cat_ids);
572
        unsafe {
573
            Ok(
574
                CategoricalChunked::<T>::from_cats_and_dtype_unchecked(cat_ids, dtype.clone())
575
                    .into_series(),
576
            )
577
        }
578
    }
579
}
580

581
#[cfg(feature = "dtype-categorical")]
582
struct CatMaxReducer<T>(Arc<CategoricalMapping>, PhantomData<T>);
583

584
#[cfg(feature = "dtype-categorical")]
585
impl<T> Clone for CatMaxReducer<T> {
586
    fn clone(&self) -> Self {
587
        Self(self.0.clone(), PhantomData)
588
    }
589
}
590

591
#[cfg(feature = "dtype-categorical")]
592
impl<T: PolarsCategoricalType> Reducer for CatMaxReducer<T> {
593
    type Dtype = T::PolarsPhysical;
594
    type Value = T::Native;
595

596
    fn init(&self) -> Self::Value {
597
        T::Native::max_value() // Ensures it's invalid, preferring the other value.
598
    }
599

600
    #[inline(always)]
601
    fn cast_series<'a>(&self, s: &'a Series) -> Cow<'a, Series> {
602
        s.to_physical_repr()
603
    }
604

605
    fn combine(&self, a: &mut Self::Value, b: &Self::Value) {
606
        let Some(b_s) = self.0.cat_to_str(b.as_cat()) else {
607
            return;
608
        };
609
        let Some(a_s) = self.0.cat_to_str(a.as_cat()) else {
610
            *a = *b;
611
            return;
612
        };
613

614
        if b_s > a_s {
615
            *a = *b;
616
        }
617
    }
618

619
    fn reduce_one(&self, a: &mut Self::Value, b: Option<Self::Value>, _seq_id: u64) {
620
        if let Some(b) = b {
621
            self.combine(a, &b);
622
        }
623
    }
624

625
    fn reduce_ca(&self, v: &mut Self::Value, ca: &ChunkedArray<T::PolarsPhysical>, _seq_id: u64) {
626
        for cat in ca.iter().flatten() {
627
            self.combine(v, &cat);
628
        }
629
    }
630

631
    fn finish(
632
        &self,
633
        v: Vec<Self::Value>,
634
        m: Option<Bitmap>,
635
        dtype: &DataType,
636
    ) -> PolarsResult<Series> {
637
        let cat_ids = PrimitiveArray::from_vec(v).with_validity(m);
638
        let cat_ids = ChunkedArray::from(cat_ids);
639
        unsafe {
640
            Ok(
641
                CategoricalChunked::<T>::from_cats_and_dtype_unchecked(cat_ids, dtype.clone())
642
                    .into_series(),
643
            )
644
        }
645
    }
646
}
647

648
#[derive(Default)]
649
pub struct NullGroupedReduction {
650
    length: usize,
651
    num_evictions: usize,
652
}
653

654
impl GroupedReduction for NullGroupedReduction {
655
    fn new_empty(&self) -> Box<dyn GroupedReduction> {
656
        Box::new(Self::default())
657
    }
658

659
    fn reserve(&mut self, _additional: usize) {}
660

661
    fn resize(&mut self, num_groups: IdxSize) {
662
        self.length = num_groups as usize;
663
    }
664

665
    fn update_group(
666
        &mut self,
667
        values: &Column,
668
        _group_idx: IdxSize,
669
        _seq_id: u64,
670
    ) -> PolarsResult<()> {
671
        assert!(values.dtype() == &DataType::Null);
672

673
        // no-op
674
        Ok(())
675
    }
676

677
    unsafe fn update_groups_while_evicting(
678
        &mut self,
679
        values: &Column,
680
        subset: &[IdxSize],
681
        group_idxs: &[EvictIdx],
682
        _seq_id: u64,
683
    ) -> PolarsResult<()> {
684
        assert!(values.dtype() == &DataType::Null);
685
        assert!(subset.len() == group_idxs.len());
686

687
        for g in group_idxs {
688
            self.num_evictions += g.should_evict() as usize;
689
        }
690
        Ok(())
691
    }
692

693
    unsafe fn combine_subset(
694
        &mut self,
695
        _other: &dyn GroupedReduction,
696
        subset: &[IdxSize],
697
        group_idxs: &[IdxSize],
698
    ) -> PolarsResult<()> {
699
        assert!(subset.len() == group_idxs.len());
700

701
        // no-op
702
        Ok(())
703
    }
704

705
    fn take_evictions(&mut self) -> Box<dyn GroupedReduction> {
706
        let out = Box::new(Self {
707
            length: self.num_evictions,
708
            num_evictions: 0,
709
        });
710
        self.num_evictions = 0;
711
        out
712
    }
713

714
    fn finalize(&mut self) -> PolarsResult<Series> {
715
        Ok(Series::full_null(
716
            PlSmallStr::EMPTY,
717
            self.length,
718
            &DataType::Null,
719
        ))
720
    }
721

722
    fn as_any(&self) -> &dyn Any {
723
        self
724
    }
725
}
726

727
Product

Resources

Company