CoCalc -- sum

GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-ops/src/chunked_array/array/sum_mean.rs
⁶⁹³⁹ views
1
use arrow::array::{Array, PrimitiveArray};
2
use arrow::bitmap::Bitmap;
3
use arrow::legacy::utils::CustomIterTools;
4
use arrow::types::NativeType;
5
use num_traits::{NumCast, ToPrimitive};
6
use polars_core::prelude::*;
7

8
use crate::chunked_array::sum::sum_slice;
9

10
fn dispatch_sum<T, S>(arr: &dyn Array, width: usize, validity: Option<&Bitmap>) -> ArrayRef
11
where
12
    T: NativeType + ToPrimitive,
13
    S: NativeType + NumCast + std::iter::Sum,
14
{
15
    let values = arr.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
16
    let values = values.values().as_slice();
17

18
    let summed: Vec<_> = (0..values.len())
19
        .step_by(width)
20
        .map(|start| {
21
            let slice = unsafe { values.get_unchecked(start..start + width) };
22
            sum_slice::<T, S>(slice)
23
        })
24
        .collect_trusted();
25

26
    Box::new(PrimitiveArray::from_data_default(
27
        summed.into(),
28
        validity.cloned(),
29
    )) as ArrayRef
30
}
31

32
pub(super) fn sum_array_numerical(ca: &ArrayChunked, inner_type: &DataType) -> Series {
33
    let width = ca.width();
34
    use DataType::*;
35
    let chunks = ca
36
        .downcast_iter()
37
        .map(|arr| {
38
            let values = arr.values().as_ref();
39

40
            match inner_type {
41
                Int8 => dispatch_sum::<i8, i64>(values, width, arr.validity()),
42
                Int16 => dispatch_sum::<i16, i64>(values, width, arr.validity()),
43
                Int32 => dispatch_sum::<i32, i32>(values, width, arr.validity()),
44
                Int64 => dispatch_sum::<i64, i64>(values, width, arr.validity()),
45
                Int128 => dispatch_sum::<i128, i128>(values, width, arr.validity()),
46
                UInt8 => dispatch_sum::<u8, i64>(values, width, arr.validity()),
47
                UInt16 => dispatch_sum::<u16, i64>(values, width, arr.validity()),
48
                UInt32 => dispatch_sum::<u32, u32>(values, width, arr.validity()),
49
                UInt64 => dispatch_sum::<u64, u64>(values, width, arr.validity()),
50
                Float32 => dispatch_sum::<f32, f32>(values, width, arr.validity()),
51
                Float64 => dispatch_sum::<f64, f64>(values, width, arr.validity()),
52
                _ => unimplemented!(),
53
            }
54
        })
55
        .collect::<Vec<_>>();
56

57
    Series::try_from((ca.name().clone(), chunks)).unwrap()
58
}
59

60
pub(super) fn sum_with_nulls(ca: &ArrayChunked, inner_dtype: &DataType) -> PolarsResult<Series> {
61
    use DataType::*;
62
    // TODO: add fast path for smaller ints?
63
    let mut out = {
64
        match inner_dtype {
65
            Boolean => {
66
                let out: IdxCa = ca
67
                    .amortized_iter()
68
                    .map(|s| s.and_then(|s| s.as_ref().sum().ok()))
69
                    .collect();
70
                out.into_series()
71
            },
72
            UInt32 => {
73
                let out: UInt32Chunked = ca
74
                    .amortized_iter()
75
                    .map(|s| s.and_then(|s| s.as_ref().sum().ok()))
76
                    .collect();
77
                out.into_series()
78
            },
79
            UInt64 => {
80
                let out: UInt64Chunked = ca
81
                    .amortized_iter()
82
                    .map(|s| s.and_then(|s| s.as_ref().sum().ok()))
83
                    .collect();
84
                out.into_series()
85
            },
86
            Int32 => {
87
                let out: Int32Chunked = ca
88
                    .amortized_iter()
89
                    .map(|s| s.and_then(|s| s.as_ref().sum().ok()))
90
                    .collect();
91
                out.into_series()
92
            },
93
            Int64 => {
94
                let out: Int64Chunked = ca
95
                    .amortized_iter()
96
                    .map(|s| s.and_then(|s| s.as_ref().sum().ok()))
97
                    .collect();
98
                out.into_series()
99
            },
100
            #[cfg(feature = "dtype-i128")]
101
            Int128 => {
102
                let out: Int128Chunked = ca
103
                    .amortized_iter()
104
                    .map(|s| s.and_then(|s| s.as_ref().sum().ok()))
105
                    .collect();
106
                out.into_series()
107
            },
108
            Float32 => {
109
                let out: Float32Chunked = ca
110
                    .amortized_iter()
111
                    .map(|s| s.and_then(|s| s.as_ref().sum().ok()))
112
                    .collect();
113
                out.into_series()
114
            },
115
            Float64 => {
116
                let out: Float64Chunked = ca
117
                    .amortized_iter()
118
                    .map(|s| s.and_then(|s| s.as_ref().sum().ok()))
119
                    .collect();
120
                out.into_series()
121
            },
122
            _ => {
123
                polars_bail!(ComputeError: "summing array with dtype: {} not yet supported", ca.dtype())
124
            },
125
        }
126
    };
127
    out.rename(ca.name().clone());
128
    Ok(out)
129
}
130

131
Product

Resources

Company