Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-compute/src/sum.rs
6939 views
1
use std::ops::Add;
2
#[cfg(feature = "simd")]
3
use std::simd::prelude::*;
4
5
use arrow::array::{Array, PrimitiveArray};
6
use arrow::bitmap::bitmask::BitMask;
7
use arrow::types::NativeType;
8
use num_traits::Zero;
9
10
macro_rules! wrapping_impl {
11
($trait_name:ident, $method:ident, $t:ty) => {
12
impl $trait_name for $t {
13
#[inline(always)]
14
fn wrapping_add(&self, v: &Self) -> Self {
15
<$t>::$method(*self, *v)
16
}
17
}
18
};
19
}
20
21
/// Performs addition that wraps around on overflow.
22
///
23
/// Differs from num::WrappingAdd in that this is also implemented for floats.
24
pub trait WrappingAdd: Sized {
25
/// Wrapping (modular) addition. Computes `self + other`, wrapping around at
26
/// the boundary of the type.
27
fn wrapping_add(&self, v: &Self) -> Self;
28
}
29
30
wrapping_impl!(WrappingAdd, wrapping_add, u8);
31
wrapping_impl!(WrappingAdd, wrapping_add, u16);
32
wrapping_impl!(WrappingAdd, wrapping_add, u32);
33
wrapping_impl!(WrappingAdd, wrapping_add, u64);
34
wrapping_impl!(WrappingAdd, wrapping_add, usize);
35
wrapping_impl!(WrappingAdd, wrapping_add, u128);
36
37
wrapping_impl!(WrappingAdd, wrapping_add, i8);
38
wrapping_impl!(WrappingAdd, wrapping_add, i16);
39
wrapping_impl!(WrappingAdd, wrapping_add, i32);
40
wrapping_impl!(WrappingAdd, wrapping_add, i64);
41
wrapping_impl!(WrappingAdd, wrapping_add, isize);
42
wrapping_impl!(WrappingAdd, wrapping_add, i128);
43
44
wrapping_impl!(WrappingAdd, add, f32);
45
wrapping_impl!(WrappingAdd, add, f64);
46
47
#[cfg(feature = "simd")]
48
const STRIPE: usize = 16;
49
50
fn wrapping_sum_with_mask_scalar<T: Zero + WrappingAdd + Copy>(vals: &[T], mask: &BitMask) -> T {
51
assert!(vals.len() == mask.len());
52
vals.iter()
53
.enumerate()
54
.map(|(i, x)| {
55
// No filter but rather select of 0 for cmov opt.
56
if mask.get(i) { *x } else { T::zero() }
57
})
58
.fold(T::zero(), |a, b| a.wrapping_add(&b))
59
}
60
61
#[cfg(not(feature = "simd"))]
62
impl<T> WrappingSum for T
63
where
64
T: NativeType + WrappingAdd + Zero,
65
{
66
fn wrapping_sum(vals: &[Self]) -> Self {
67
vals.iter()
68
.copied()
69
.fold(T::zero(), |a, b| a.wrapping_add(&b))
70
}
71
72
fn wrapping_sum_with_validity(vals: &[Self], mask: &BitMask) -> Self {
73
wrapping_sum_with_mask_scalar(vals, mask)
74
}
75
}
76
77
#[cfg(feature = "simd")]
78
impl<T> WrappingSum for T
79
where
80
T: NativeType + WrappingAdd + Zero + crate::SimdPrimitive,
81
{
82
fn wrapping_sum(vals: &[Self]) -> Self {
83
vals.iter()
84
.copied()
85
.fold(T::zero(), |a, b| a.wrapping_add(&b))
86
}
87
88
fn wrapping_sum_with_validity(vals: &[Self], mask: &BitMask) -> Self {
89
assert!(vals.len() == mask.len());
90
let remainder = vals.len() % STRIPE;
91
let (rest, main) = vals.split_at(remainder);
92
let (rest_mask, main_mask) = mask.split_at(remainder);
93
let zero: Simd<T, STRIPE> = Simd::default();
94
95
let vsum = main
96
.chunks_exact(STRIPE)
97
.enumerate()
98
.map(|(i, a)| {
99
let m: Mask<_, STRIPE> = main_mask.get_simd(i * STRIPE);
100
m.select(Simd::from_slice(a), zero)
101
})
102
.fold(zero, |a, b| {
103
let a = a.to_array();
104
let b = b.to_array();
105
Simd::from_array(std::array::from_fn(|i| a[i].wrapping_add(&b[i])))
106
});
107
108
let mainsum = vsum
109
.to_array()
110
.into_iter()
111
.fold(T::zero(), |a, b| a.wrapping_add(&b));
112
113
// TODO: faster remainder.
114
let restsum = wrapping_sum_with_mask_scalar(rest, &rest_mask);
115
mainsum.wrapping_add(&restsum)
116
}
117
}
118
119
#[cfg(feature = "simd")]
120
impl WrappingSum for u128 {
121
fn wrapping_sum(vals: &[Self]) -> Self {
122
vals.iter().copied().fold(0, |a, b| a.wrapping_add(b))
123
}
124
125
fn wrapping_sum_with_validity(vals: &[Self], mask: &BitMask) -> Self {
126
wrapping_sum_with_mask_scalar(vals, mask)
127
}
128
}
129
130
#[cfg(feature = "simd")]
131
impl WrappingSum for i128 {
132
fn wrapping_sum(vals: &[Self]) -> Self {
133
vals.iter().copied().fold(0, |a, b| a.wrapping_add(b))
134
}
135
136
fn wrapping_sum_with_validity(vals: &[Self], mask: &BitMask) -> Self {
137
wrapping_sum_with_mask_scalar(vals, mask)
138
}
139
}
140
141
pub trait WrappingSum: Sized {
142
fn wrapping_sum(vals: &[Self]) -> Self;
143
fn wrapping_sum_with_validity(vals: &[Self], mask: &BitMask) -> Self;
144
}
145
146
pub fn wrapping_sum_arr<T>(arr: &PrimitiveArray<T>) -> T
147
where
148
T: NativeType + WrappingSum,
149
{
150
let validity = arr.validity().filter(|_| arr.null_count() > 0);
151
if let Some(mask) = validity {
152
WrappingSum::wrapping_sum_with_validity(arr.values(), &BitMask::from_bitmap(mask))
153
} else {
154
WrappingSum::wrapping_sum(arr.values())
155
}
156
}
157
158