Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-arrow/src/legacy/kernels/take_agg/var.rs
6940 views
1
use super::*;
2
3
/// Numerical stable online variance aggregation.
4
///
5
/// See:
6
/// Welford, B. P. (1962). "Note on a method for calculating corrected sums of squares and products".
7
/// Technometrics. 4 (3): 419–420. doi:10.2307/1266577. JSTOR 1266577.
8
/// and:
9
/// Ling, Robert F. (1974). "Comparison of Several Algorithms for Computing Sample Means and Variances".
10
/// Journal of the American Statistical Association. 69 (348): 859–866. doi:10.2307/2286154. JSTOR 2286154.
11
pub fn online_variance<I>(
12
// iterator producing values
13
iter: I,
14
ddof: u8,
15
) -> Option<f64>
16
where
17
I: IntoIterator<Item = f64>,
18
{
19
let mut m2 = 0.0;
20
let mut mean = 0.0;
21
let mut count = 0u64;
22
23
for value in iter {
24
let new_count = count + 1;
25
let delta_1 = value - mean;
26
let new_mean = delta_1 / new_count as f64 + mean;
27
let delta_2 = value - new_mean;
28
let new_m2 = m2 + delta_1 * delta_2;
29
30
count += 1;
31
mean = new_mean;
32
m2 = new_m2;
33
}
34
35
if count <= ddof as u64 {
36
return None;
37
}
38
39
Some(m2 / (count as f64 - ddof as f64))
40
}
41
42
/// Take kernel for single chunk and an iterator as index.
43
/// # Safety
44
/// caller must ensure iterators indexes are in bounds
45
pub unsafe fn take_var_no_null_primitive_iter_unchecked<T, I>(
46
arr: &PrimitiveArray<T>,
47
indices: I,
48
ddof: u8,
49
) -> Option<f64>
50
where
51
T: NativeType + ToPrimitive,
52
I: IntoIterator<Item = usize>,
53
{
54
debug_assert!(arr.null_count() == 0);
55
let array_values = arr.values().as_slice();
56
let iter = unsafe {
57
indices.into_iter().map(|idx| {
58
let value = *array_values.get_unchecked(idx);
59
value.to_f64().unwrap_unchecked()
60
})
61
};
62
online_variance(iter, ddof)
63
}
64
65
/// Take kernel for single chunk and an iterator as index.
66
/// # Safety
67
/// caller must ensure iterators indexes are in bounds
68
pub unsafe fn take_var_nulls_primitive_iter_unchecked<T, I>(
69
arr: &PrimitiveArray<T>,
70
indices: I,
71
ddof: u8,
72
) -> Option<f64>
73
where
74
T: NativeType + ToPrimitive,
75
I: IntoIterator<Item = usize>,
76
{
77
debug_assert!(arr.null_count() > 0);
78
let array_values = arr.values().as_slice();
79
let validity = arr.validity().unwrap();
80
81
let iter = unsafe {
82
indices.into_iter().flat_map(|idx| {
83
if validity.get_bit_unchecked(idx) {
84
let value = *array_values.get_unchecked(idx);
85
value.to_f64()
86
} else {
87
None
88
}
89
})
90
};
91
online_variance(iter, ddof)
92
}
93
94