Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-expr/src/reduce/mean.rs
8421 views
1
use std::marker::PhantomData;
2
3
use arrow::temporal_conversions::MICROSECONDS_IN_DAY;
4
use num_traits::{AsPrimitive, Zero};
5
use polars_core::with_match_physical_numeric_polars_type;
6
7
use super::*;
8
9
pub fn new_mean_reduction(dtype: DataType) -> PolarsResult<Box<dyn GroupedReduction>> {
10
// TODO: Move the error checks up and make this function infallible
11
use DataType::*;
12
use VecGroupedReduction as VGR;
13
Ok(match dtype {
14
Boolean => Box::new(VGR::new(dtype, BoolMeanReducer)),
15
_ if dtype.is_primitive_numeric() || dtype.is_temporal() => {
16
with_match_physical_numeric_polars_type!(dtype.to_physical(), |$T| {
17
Box::new(VGR::new(dtype, NumMeanReducer::<$T>(PhantomData)))
18
})
19
},
20
#[cfg(feature = "dtype-decimal")]
21
Decimal(_, _) => Box::new(VGR::new(dtype, NumMeanReducer::<Int128Type>(PhantomData))),
22
Null => Box::new(super::NullGroupedReduction::new(Scalar::null(
23
DataType::Null,
24
))),
25
_ => polars_bail!(InvalidOperation: "`mean` operation not supported for dtype `{dtype}`"),
26
})
27
}
28
29
fn finish_output(values: Vec<(f64, usize)>, dtype: &DataType) -> Series {
30
match dtype {
31
#[cfg(feature = "dtype-f16")]
32
DataType::Float16 => {
33
let ca: Float16Chunked = values
34
.into_iter()
35
.map(|(s, c)| (c != 0).then(|| (s / c as f64).as_()))
36
.collect_ca(PlSmallStr::EMPTY);
37
ca.into_series()
38
},
39
DataType::Float32 => {
40
let ca: Float32Chunked = values
41
.into_iter()
42
.map(|(s, c)| (c != 0).then(|| (s / c as f64) as f32))
43
.collect_ca(PlSmallStr::EMPTY);
44
ca.into_series()
45
},
46
dt if dt.is_primitive_numeric() => {
47
let ca: Float64Chunked = values
48
.into_iter()
49
.map(|(s, c)| (c != 0).then(|| s / c as f64))
50
.collect_ca(PlSmallStr::EMPTY);
51
ca.into_series()
52
},
53
#[cfg(feature = "dtype-decimal")]
54
DataType::Decimal(_prec, scale) => {
55
let inv_scale_factor = 1.0 / 10u128.pow(*scale as u32) as f64;
56
let ca: Float64Chunked = values
57
.into_iter()
58
.map(|(s, c)| (c != 0).then(|| s / c as f64 * inv_scale_factor))
59
.collect_ca(PlSmallStr::EMPTY);
60
ca.into_series()
61
},
62
#[cfg(feature = "dtype-datetime")]
63
DataType::Date => {
64
const US_IN_DAY: f64 = MICROSECONDS_IN_DAY as f64;
65
let ca: Int64Chunked = values
66
.into_iter()
67
.map(|(s, c)| (c != 0).then(|| (s * US_IN_DAY / c as f64) as i64))
68
.collect_ca(PlSmallStr::EMPTY);
69
ca.into_datetime(TimeUnit::Microseconds, None).into_series()
70
},
71
DataType::Datetime(_, _) | DataType::Duration(_) | DataType::Time => {
72
let ca: Int64Chunked = values
73
.into_iter()
74
.map(|(s, c)| (c != 0).then(|| (s / c as f64) as i64))
75
.collect_ca(PlSmallStr::EMPTY);
76
ca.into_series().cast(dtype).unwrap()
77
},
78
_ => unimplemented!(),
79
}
80
}
81
82
struct NumMeanReducer<T>(PhantomData<T>);
83
impl<T> Clone for NumMeanReducer<T> {
84
fn clone(&self) -> Self {
85
Self(PhantomData)
86
}
87
}
88
89
impl<T> Reducer for NumMeanReducer<T>
90
where
91
T: PolarsNumericType,
92
ChunkedArray<T>: ChunkAgg<T::Native>,
93
{
94
type Dtype = T;
95
type Value = (f64, usize);
96
97
#[inline(always)]
98
fn init(&self) -> Self::Value {
99
(0.0, 0)
100
}
101
102
fn cast_series<'a>(&self, s: &'a Series) -> Cow<'a, Series> {
103
s.to_physical_repr()
104
}
105
106
#[inline(always)]
107
fn combine(&self, a: &mut Self::Value, b: &Self::Value) {
108
a.0 += b.0;
109
a.1 += b.1;
110
}
111
112
#[inline(always)]
113
fn reduce_one(&self, a: &mut Self::Value, b: Option<T::Native>, _seq_id: u64) {
114
a.0 += b.unwrap_or(T::Native::zero()).as_();
115
a.1 += b.is_some() as usize;
116
}
117
118
fn reduce_ca(&self, v: &mut Self::Value, ca: &ChunkedArray<Self::Dtype>, _seq_id: u64) {
119
v.0 += ChunkAgg::_sum_as_f64(ca);
120
v.1 += ca.len() - ca.null_count();
121
}
122
123
fn finish(
124
&self,
125
v: Vec<Self::Value>,
126
m: Option<Bitmap>,
127
dtype: &DataType,
128
) -> PolarsResult<Series> {
129
assert!(m.is_none());
130
Ok(finish_output(v, dtype))
131
}
132
}
133
134
#[derive(Clone)]
135
struct BoolMeanReducer;
136
137
impl Reducer for BoolMeanReducer {
138
type Dtype = BooleanType;
139
type Value = (usize, usize);
140
141
#[inline(always)]
142
fn init(&self) -> Self::Value {
143
(0, 0)
144
}
145
146
#[inline(always)]
147
fn combine(&self, a: &mut Self::Value, b: &Self::Value) {
148
a.0 += b.0;
149
a.1 += b.1;
150
}
151
152
#[inline(always)]
153
fn reduce_one(&self, a: &mut Self::Value, b: Option<bool>, _seq_id: u64) {
154
a.0 += b.unwrap_or(false) as usize;
155
a.1 += b.is_some() as usize;
156
}
157
158
fn reduce_ca(&self, v: &mut Self::Value, ca: &ChunkedArray<Self::Dtype>, _seq_id: u64) {
159
v.0 += ca.sum().unwrap_or(0) as usize;
160
v.1 += ca.len() - ca.null_count();
161
}
162
163
fn finish(
164
&self,
165
v: Vec<Self::Value>,
166
m: Option<Bitmap>,
167
dtype: &DataType,
168
) -> PolarsResult<Series> {
169
assert!(m.is_none());
170
assert!(dtype == &DataType::Boolean);
171
let ca: Float64Chunked = v
172
.into_iter()
173
.map(|(s, c)| (c != 0).then(|| s as f64 / c as f64))
174
.collect_ca(PlSmallStr::EMPTY);
175
Ok(ca.into_series())
176
}
177
}
178
179