Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-expr/src/reduce/count.rs
6940 views
1
#![allow(unsafe_op_in_unsafe_fn)]
2
use polars_core::error::constants::LENGTH_LIMIT_MSG;
3
4
use super::*;
5
6
pub struct CountReduce {
7
counts: Vec<u64>,
8
evicted_counts: Vec<u64>,
9
include_nulls: bool,
10
}
11
12
impl CountReduce {
13
pub fn new(include_nulls: bool) -> Self {
14
Self {
15
counts: Vec::new(),
16
evicted_counts: Vec::new(),
17
include_nulls,
18
}
19
}
20
}
21
22
impl GroupedReduction for CountReduce {
23
fn new_empty(&self) -> Box<dyn GroupedReduction> {
24
Box::new(Self::new(self.include_nulls))
25
}
26
27
fn reserve(&mut self, additional: usize) {
28
self.counts.reserve(additional);
29
}
30
31
fn resize(&mut self, num_groups: IdxSize) {
32
self.counts.resize(num_groups as usize, 0);
33
}
34
35
fn update_group(
36
&mut self,
37
values: &Column,
38
group_idx: IdxSize,
39
_seq_id: u64,
40
) -> PolarsResult<()> {
41
let mut count = values.len();
42
if !self.include_nulls {
43
count -= values.null_count();
44
}
45
self.counts[group_idx as usize] += count as u64;
46
Ok(())
47
}
48
49
unsafe fn update_groups_while_evicting(
50
&mut self,
51
values: &Column,
52
subset: &[IdxSize],
53
group_idxs: &[EvictIdx],
54
_seq_id: u64,
55
) -> PolarsResult<()> {
56
assert!(subset.len() == group_idxs.len());
57
let values = values.as_materialized_series(); // @scalar-opt
58
let chunks = values.chunks();
59
assert!(chunks.len() == 1);
60
let arr = &*chunks[0];
61
if arr.has_nulls() && !self.include_nulls {
62
let valid = arr.validity().unwrap();
63
for (i, g) in subset.iter().zip(group_idxs) {
64
let grp = self.counts.get_unchecked_mut(g.idx());
65
if g.should_evict() {
66
self.evicted_counts.push(*grp);
67
*grp = 0;
68
}
69
*grp += valid.get_bit_unchecked(*i as usize) as u64;
70
}
71
} else {
72
for (_, g) in subset.iter().zip(group_idxs) {
73
let grp = self.counts.get_unchecked_mut(g.idx());
74
if g.should_evict() {
75
self.evicted_counts.push(*grp);
76
*grp = 0;
77
}
78
*grp += 1;
79
}
80
}
81
Ok(())
82
}
83
84
unsafe fn combine_subset(
85
&mut self,
86
other: &dyn GroupedReduction,
87
subset: &[IdxSize],
88
group_idxs: &[IdxSize],
89
) -> PolarsResult<()> {
90
let other = other.as_any().downcast_ref::<Self>().unwrap();
91
assert!(subset.len() == group_idxs.len());
92
unsafe {
93
// SAFETY: indices are in-bounds guaranteed by trait.
94
for (i, g) in subset.iter().zip(group_idxs) {
95
*self.counts.get_unchecked_mut(*g as usize) +=
96
*other.counts.get_unchecked(*i as usize);
97
}
98
}
99
Ok(())
100
}
101
102
fn take_evictions(&mut self) -> Box<dyn GroupedReduction> {
103
Box::new(Self {
104
counts: core::mem::take(&mut self.evicted_counts),
105
evicted_counts: Vec::new(),
106
include_nulls: self.include_nulls,
107
})
108
}
109
110
fn finalize(&mut self) -> PolarsResult<Series> {
111
let ca: IdxCa = self
112
.counts
113
.drain(..)
114
.map(|l| IdxSize::try_from(l).expect(LENGTH_LIMIT_MSG))
115
.collect_ca(PlSmallStr::EMPTY);
116
Ok(ca.into_series())
117
}
118
119
fn as_any(&self) -> &dyn Any {
120
self
121
}
122
}
123
124