CoCalc -- var

GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-expr/src/reduce/var_std.rs
⁶⁹⁴⁰ views
1
use std::marker::PhantomData;
2

3
use num_traits::AsPrimitive;
4
use polars_compute::moment::VarState;
5
use polars_core::with_match_physical_numeric_polars_type;
6

7
use super::*;
8

9
pub fn new_var_std_reduction(dtype: DataType, is_std: bool, ddof: u8) -> Box<dyn GroupedReduction> {
10
    use DataType::*;
11
    use VecGroupedReduction as VGR;
12
    match dtype {
13
        Boolean => Box::new(VGR::new(dtype, BoolVarStdReducer { is_std, ddof })),
14
        _ if dtype.is_primitive_numeric() => {
15
            with_match_physical_numeric_polars_type!(dtype.to_physical(), |$T| {
16
                Box::new(VGR::new(dtype, VarStdReducer::<$T> {
17
                    is_std,
18
                    ddof,
19
                    needs_cast: false,
20
                    _phantom: PhantomData,
21
                }))
22
            })
23
        },
24
        #[cfg(feature = "dtype-decimal")]
25
        Decimal(_, _) => Box::new(VGR::new(
26
            dtype,
27
            VarStdReducer::<Float64Type> {
28
                is_std,
29
                ddof,
30
                needs_cast: true,
31
                _phantom: PhantomData,
32
            },
33
        )),
34
        Duration(..) => todo!(),
35
        _ => unimplemented!(),
36
    }
37
}
38

39
struct VarStdReducer<T> {
40
    is_std: bool,
41
    ddof: u8,
42
    needs_cast: bool,
43
    _phantom: PhantomData<T>,
44
}
45

46
impl<T> Clone for VarStdReducer<T> {
47
    fn clone(&self) -> Self {
48
        Self {
49
            is_std: self.is_std,
50
            ddof: self.ddof,
51
            needs_cast: self.needs_cast,
52
            _phantom: PhantomData,
53
        }
54
    }
55
}
56

57
impl<T: PolarsNumericType> Reducer for VarStdReducer<T> {
58
    type Dtype = T;
59
    type Value = VarState;
60

61
    fn init(&self) -> Self::Value {
62
        VarState::default()
63
    }
64

65
    fn cast_series<'a>(&self, s: &'a Series) -> Cow<'a, Series> {
66
        if self.needs_cast {
67
            Cow::Owned(s.cast(&DataType::Float64).unwrap())
68
        } else {
69
            Cow::Borrowed(s)
70
        }
71
    }
72

73
    fn combine(&self, a: &mut Self::Value, b: &Self::Value) {
74
        a.combine(b)
75
    }
76

77
    #[inline(always)]
78
    fn reduce_one(&self, a: &mut Self::Value, b: Option<T::Native>, _seq_id: u64) {
79
        if let Some(x) = b {
80
            a.insert_one(x.as_());
81
        }
82
    }
83

84
    fn reduce_ca(&self, v: &mut Self::Value, ca: &ChunkedArray<Self::Dtype>, _seq_id: u64) {
85
        for arr in ca.downcast_iter() {
86
            v.combine(&polars_compute::moment::var(arr))
87
        }
88
    }
89

90
    fn finish(
91
        &self,
92
        v: Vec<Self::Value>,
93
        m: Option<Bitmap>,
94
        _dtype: &DataType,
95
    ) -> PolarsResult<Series> {
96
        assert!(m.is_none());
97
        let ca: Float64Chunked = v
98
            .into_iter()
99
            .map(|s| {
100
                let var = s.finalize(self.ddof);
101
                if self.is_std { var.map(f64::sqrt) } else { var }
102
            })
103
            .collect_ca(PlSmallStr::EMPTY);
104
        Ok(ca.into_series())
105
    }
106
}
107

108
#[derive(Clone)]
109
struct BoolVarStdReducer {
110
    is_std: bool,
111
    ddof: u8,
112
}
113

114
impl Reducer for BoolVarStdReducer {
115
    type Dtype = BooleanType;
116
    type Value = (usize, usize);
117

118
    fn init(&self) -> Self::Value {
119
        (0, 0)
120
    }
121

122
    fn combine(&self, a: &mut Self::Value, b: &Self::Value) {
123
        a.0 += b.0;
124
        a.1 += b.1;
125
    }
126

127
    #[inline(always)]
128
    fn reduce_one(&self, a: &mut Self::Value, b: Option<bool>, _seq_id: u64) {
129
        a.0 += b.unwrap_or(false) as usize;
130
        a.1 += b.is_some() as usize;
131
    }
132

133
    fn reduce_ca(&self, v: &mut Self::Value, ca: &ChunkedArray<Self::Dtype>, _seq_id: u64) {
134
        v.0 += ca.sum().unwrap_or(0) as usize;
135
        v.1 += ca.len() - ca.null_count();
136
    }
137

138
    fn finish(
139
        &self,
140
        v: Vec<Self::Value>,
141
        m: Option<Bitmap>,
142
        _dtype: &DataType,
143
    ) -> PolarsResult<Series> {
144
        assert!(m.is_none());
145
        let ca: Float64Chunked = v
146
            .into_iter()
147
            .map(|v| {
148
                if v.1 <= self.ddof as usize {
149
                    return None;
150
                }
151

152
                let sum = v.0 as f64; // Both the sum and sum-of-squares, letting us simplify.
153
                let n = v.1;
154
                let var = sum * (1.0 - sum / n as f64) / ((n - self.ddof as usize) as f64);
155
                if self.is_std {
156
                    Some(var.sqrt())
157
                } else {
158
                    Some(var)
159
                }
160
            })
161
            .collect_ca(PlSmallStr::EMPTY);
162
        Ok(ca.into_series())
163
    }
164
}
165

166
Product

Resources

Company