Path: blob/main/crates/polars-arrow/src/legacy/kernels/take_agg/var.rs
6940 views
use super::*;12/// Numerical stable online variance aggregation.3///4/// See:5/// Welford, B. P. (1962). "Note on a method for calculating corrected sums of squares and products".6/// Technometrics. 4 (3): 419–420. doi:10.2307/1266577. JSTOR 1266577.7/// and:8/// Ling, Robert F. (1974). "Comparison of Several Algorithms for Computing Sample Means and Variances".9/// Journal of the American Statistical Association. 69 (348): 859–866. doi:10.2307/2286154. JSTOR 2286154.10pub fn online_variance<I>(11// iterator producing values12iter: I,13ddof: u8,14) -> Option<f64>15where16I: IntoIterator<Item = f64>,17{18let mut m2 = 0.0;19let mut mean = 0.0;20let mut count = 0u64;2122for value in iter {23let new_count = count + 1;24let delta_1 = value - mean;25let new_mean = delta_1 / new_count as f64 + mean;26let delta_2 = value - new_mean;27let new_m2 = m2 + delta_1 * delta_2;2829count += 1;30mean = new_mean;31m2 = new_m2;32}3334if count <= ddof as u64 {35return None;36}3738Some(m2 / (count as f64 - ddof as f64))39}4041/// Take kernel for single chunk and an iterator as index.42/// # Safety43/// caller must ensure iterators indexes are in bounds44pub unsafe fn take_var_no_null_primitive_iter_unchecked<T, I>(45arr: &PrimitiveArray<T>,46indices: I,47ddof: u8,48) -> Option<f64>49where50T: NativeType + ToPrimitive,51I: IntoIterator<Item = usize>,52{53debug_assert!(arr.null_count() == 0);54let array_values = arr.values().as_slice();55let iter = unsafe {56indices.into_iter().map(|idx| {57let value = *array_values.get_unchecked(idx);58value.to_f64().unwrap_unchecked()59})60};61online_variance(iter, ddof)62}6364/// Take kernel for single chunk and an iterator as index.65/// # Safety66/// caller must ensure iterators indexes are in bounds67pub unsafe fn take_var_nulls_primitive_iter_unchecked<T, I>(68arr: &PrimitiveArray<T>,69indices: I,70ddof: u8,71) -> Option<f64>72where73T: NativeType + ToPrimitive,74I: IntoIterator<Item = usize>,75{76debug_assert!(arr.null_count() > 0);77let array_values = arr.values().as_slice();78let validity = arr.validity().unwrap();7980let iter = unsafe {81indices.into_iter().flat_map(|idx| {82if validity.get_bit_unchecked(idx) {83let value = *array_values.get_unchecked(idx);84value.to_f64()85} else {86None87}88})89};90online_variance(iter, ddof)91}929394