Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-expr/src/reduce/var_std.rs
8420 views
1
use std::marker::PhantomData;
2
3
use num_traits::AsPrimitive;
4
use polars_compute::moment::VarState;
5
use polars_core::with_match_physical_numeric_polars_type;
6
7
use super::*;
8
9
pub fn new_var_std_reduction(
10
dtype: DataType,
11
is_std: bool,
12
ddof: u8,
13
) -> PolarsResult<Box<dyn GroupedReduction>> {
14
// TODO: Move the error checks up and make this function infallible
15
use DataType::*;
16
use VecGroupedReduction as VGR;
17
let op_name = if is_std { "std" } else { "var" };
18
Ok(match dtype {
19
Boolean => Box::new(VGR::new(dtype, BoolVarStdReducer { is_std, ddof })),
20
_ if dtype.is_primitive_numeric() => {
21
with_match_physical_numeric_polars_type!(dtype.to_physical(), |$T| {
22
Box::new(VGR::new(dtype, VarStdReducer::<$T> {
23
is_std,
24
ddof,
25
needs_cast: false,
26
_phantom: PhantomData,
27
}))
28
})
29
},
30
#[cfg(feature = "dtype-decimal")]
31
Decimal(_, _) => Box::new(VGR::new(
32
dtype,
33
VarStdReducer::<Float64Type> {
34
is_std,
35
ddof,
36
needs_cast: true,
37
_phantom: PhantomData,
38
},
39
)),
40
Duration(..) => todo!(),
41
Null => Box::new(super::NullGroupedReduction::new(Scalar::null(
42
DataType::Null,
43
))),
44
_ => {
45
polars_bail!(InvalidOperation: "`{op_name}` operation not supported for dtype `{dtype}`")
46
},
47
})
48
}
49
50
struct VarStdReducer<T> {
51
is_std: bool,
52
ddof: u8,
53
needs_cast: bool,
54
_phantom: PhantomData<T>,
55
}
56
57
impl<T> Clone for VarStdReducer<T> {
58
fn clone(&self) -> Self {
59
Self {
60
is_std: self.is_std,
61
ddof: self.ddof,
62
needs_cast: self.needs_cast,
63
_phantom: PhantomData,
64
}
65
}
66
}
67
68
impl<T: PolarsNumericType> Reducer for VarStdReducer<T> {
69
type Dtype = T;
70
type Value = VarState;
71
72
fn init(&self) -> Self::Value {
73
VarState::default()
74
}
75
76
fn cast_series<'a>(&self, s: &'a Series) -> Cow<'a, Series> {
77
if self.needs_cast {
78
Cow::Owned(s.cast(&DataType::Float64).unwrap())
79
} else {
80
Cow::Borrowed(s)
81
}
82
}
83
84
fn combine(&self, a: &mut Self::Value, b: &Self::Value) {
85
a.combine(b)
86
}
87
88
#[inline(always)]
89
fn reduce_one(&self, a: &mut Self::Value, b: Option<T::Native>, _seq_id: u64) {
90
if let Some(x) = b {
91
a.insert_one(x.as_());
92
}
93
}
94
95
fn reduce_ca(&self, v: &mut Self::Value, ca: &ChunkedArray<Self::Dtype>, _seq_id: u64) {
96
for arr in ca.downcast_iter() {
97
v.combine(&polars_compute::moment::var(arr))
98
}
99
}
100
101
fn finish(
102
&self,
103
v: Vec<Self::Value>,
104
m: Option<Bitmap>,
105
dtype: &DataType,
106
) -> PolarsResult<Series> {
107
assert!(m.is_none());
108
match dtype {
109
#[cfg(feature = "dtype-f16")]
110
DataType::Float16 => {
111
let ca: Float16Chunked = v
112
.into_iter()
113
.map(|s| {
114
let var = s.finalize(self.ddof);
115
let out = if self.is_std { var.map(f64::sqrt) } else { var };
116
out.map(|v| v.as_())
117
})
118
.collect_ca(PlSmallStr::EMPTY);
119
Ok(ca.into_series())
120
},
121
DataType::Float32 => {
122
let ca: Float32Chunked = v
123
.into_iter()
124
.map(|s| {
125
let var = s.finalize(self.ddof);
126
let out = if self.is_std { var.map(f64::sqrt) } else { var };
127
out.map(|v| v as f32)
128
})
129
.collect_ca(PlSmallStr::EMPTY);
130
Ok(ca.into_series())
131
},
132
_ => {
133
let ca: Float64Chunked = v
134
.into_iter()
135
.map(|s| {
136
let var = s.finalize(self.ddof);
137
if self.is_std { var.map(f64::sqrt) } else { var }
138
})
139
.collect_ca(PlSmallStr::EMPTY);
140
Ok(ca.into_series())
141
},
142
}
143
}
144
}
145
146
#[derive(Clone)]
147
struct BoolVarStdReducer {
148
is_std: bool,
149
ddof: u8,
150
}
151
152
impl Reducer for BoolVarStdReducer {
153
type Dtype = BooleanType;
154
type Value = (usize, usize);
155
156
fn init(&self) -> Self::Value {
157
(0, 0)
158
}
159
160
fn combine(&self, a: &mut Self::Value, b: &Self::Value) {
161
a.0 += b.0;
162
a.1 += b.1;
163
}
164
165
#[inline(always)]
166
fn reduce_one(&self, a: &mut Self::Value, b: Option<bool>, _seq_id: u64) {
167
a.0 += b.unwrap_or(false) as usize;
168
a.1 += b.is_some() as usize;
169
}
170
171
fn reduce_ca(&self, v: &mut Self::Value, ca: &ChunkedArray<Self::Dtype>, _seq_id: u64) {
172
v.0 += ca.sum().unwrap_or(0) as usize;
173
v.1 += ca.len() - ca.null_count();
174
}
175
176
fn finish(
177
&self,
178
v: Vec<Self::Value>,
179
m: Option<Bitmap>,
180
_dtype: &DataType,
181
) -> PolarsResult<Series> {
182
assert!(m.is_none());
183
let ca: Float64Chunked = v
184
.into_iter()
185
.map(|v| {
186
if v.1 <= self.ddof as usize {
187
return None;
188
}
189
190
let sum = v.0 as f64; // Both the sum and sum-of-squares, letting us simplify.
191
let n = v.1;
192
let var = sum * (1.0 - sum / n as f64) / ((n - self.ddof as usize) as f64);
193
if self.is_std {
194
Some(var.sqrt())
195
} else {
196
Some(var)
197
}
198
})
199
.collect_ca(PlSmallStr::EMPTY);
200
Ok(ca.into_series())
201
}
202
}
203
204