CoCalc -- cum

GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-ops/src/series/ops/cum_agg.rs
⁶⁹³⁹ views
1
use std::ops::{AddAssign, Mul};
2

3
use arity::unary_elementwise_values;
4
use arrow::array::{Array, BooleanArray};
5
use arrow::bitmap::{Bitmap, BitmapBuilder};
6
use num_traits::{Bounded, One, Zero};
7
use polars_core::prelude::*;
8
use polars_core::series::IsSorted;
9
use polars_core::utils::{CustomIterTools, NoNull};
10
use polars_core::with_match_physical_numeric_polars_type;
11
use polars_utils::float::IsFloat;
12
use polars_utils::min_max::MinMax;
13

14
fn det_max<T>(state: &mut T, v: Option<T>) -> Option<Option<T>>
15
where
16
    T: Copy + MinMax,
17
{
18
    match v {
19
        Some(v) => {
20
            *state = MinMax::max_ignore_nan(*state, v);
21
            Some(Some(*state))
22
        },
23
        None => Some(None),
24
    }
25
}
26

27
fn det_min<T>(state: &mut T, v: Option<T>) -> Option<Option<T>>
28
where
29
    T: Copy + MinMax,
30
{
31
    match v {
32
        Some(v) => {
33
            *state = MinMax::min_ignore_nan(*state, v);
34
            Some(Some(*state))
35
        },
36
        None => Some(None),
37
    }
38
}
39

40
fn det_sum<T>(state: &mut T, v: Option<T>) -> Option<Option<T>>
41
where
42
    T: Copy + AddAssign,
43
{
44
    match v {
45
        Some(v) => {
46
            *state += v;
47
            Some(Some(*state))
48
        },
49
        None => Some(None),
50
    }
51
}
52

53
fn det_prod<T>(state: &mut T, v: Option<T>) -> Option<Option<T>>
54
where
55
    T: Copy + Mul<Output = T>,
56
{
57
    match v {
58
        Some(v) => {
59
            *state = *state * v;
60
            Some(Some(*state))
61
        },
62
        None => Some(None),
63
    }
64
}
65

66
fn cum_scan_numeric<T, F>(
67
    ca: &ChunkedArray<T>,
68
    reverse: bool,
69
    init: T::Native,
70
    update: F,
71
) -> ChunkedArray<T>
72
where
73
    T: PolarsNumericType,
74
    ChunkedArray<T>: FromIterator<Option<T::Native>>,
75
    F: Fn(&mut T::Native, Option<T::Native>) -> Option<Option<T::Native>>,
76
{
77
    let out: ChunkedArray<T> = match reverse {
78
        false => ca.iter().scan(init, update).collect_trusted(),
79
        true => ca.iter().rev().scan(init, update).collect_reversed(),
80
    };
81
    out.with_name(ca.name().clone())
82
}
83

84
fn cum_max_numeric<T>(
85
    ca: &ChunkedArray<T>,
86
    reverse: bool,
87
    init: Option<T::Native>,
88
) -> ChunkedArray<T>
89
where
90
    T: PolarsNumericType,
91
    T::Native: MinMax + Bounded,
92
    ChunkedArray<T>: FromIterator<Option<T::Native>>,
93
{
94
    let init = init.unwrap_or(if T::Native::is_float() {
95
        T::Native::nan_value()
96
    } else {
97
        Bounded::min_value()
98
    });
99
    cum_scan_numeric(ca, reverse, init, det_max)
100
}
101

102
fn cum_min_numeric<T>(
103
    ca: &ChunkedArray<T>,
104
    reverse: bool,
105
    init: Option<T::Native>,
106
) -> ChunkedArray<T>
107
where
108
    T: PolarsNumericType,
109
    T::Native: MinMax + Bounded,
110
    ChunkedArray<T>: FromIterator<Option<T::Native>>,
111
{
112
    let init = init.unwrap_or(if T::Native::is_float() {
113
        T::Native::nan_value()
114
    } else {
115
        Bounded::max_value()
116
    });
117
    cum_scan_numeric(ca, reverse, init, det_min)
118
}
119

120
fn cum_max_bool(ca: &BooleanChunked, reverse: bool, init: Option<bool>) -> BooleanChunked {
121
    if ca.len() == ca.null_count() {
122
        return ca.clone();
123
    }
124

125
    if init == Some(true) {
126
        return unsafe {
127
            BooleanChunked::from_chunks(
128
                ca.name().clone(),
129
                ca.downcast_iter()
130
                    .map(|arr| {
131
                        arr.with_values(Bitmap::new_with_value(true, arr.len()))
132
                            .to_boxed()
133
                    })
134
                    .collect(),
135
            )
136
        };
137
    }
138

139
    let mut out;
140
    if !reverse {
141
        // TODO: efficient bitscan.
142
        let Some(first_true_idx) = ca.iter().position(|x| x == Some(true)) else {
143
            return ca.clone();
144
        };
145
        out = BitmapBuilder::with_capacity(ca.len());
146
        out.extend_constant(first_true_idx, false);
147
        out.extend_constant(ca.len() - first_true_idx, true);
148
    } else {
149
        // TODO: efficient bitscan.
150
        let Some(last_true_idx) = ca.iter().rposition(|x| x == Some(true)) else {
151
            return ca.clone();
152
        };
153
        out = BitmapBuilder::with_capacity(ca.len());
154
        out.extend_constant(last_true_idx + 1, true);
155
        out.extend_constant(ca.len() - 1 - last_true_idx, false);
156
    }
157

158
    let arr: BooleanArray = out.freeze().into();
159
    BooleanChunked::with_chunk_like(ca, arr.with_validity(ca.rechunk_validity()))
160
}
161

162
fn cum_min_bool(ca: &BooleanChunked, reverse: bool, init: Option<bool>) -> BooleanChunked {
163
    if ca.len() == ca.null_count() {
164
        return ca.clone();
165
    }
166

167
    if init == Some(false) {
168
        return unsafe {
169
            BooleanChunked::from_chunks(
170
                ca.name().clone(),
171
                ca.downcast_iter()
172
                    .map(|arr| {
173
                        arr.with_values(Bitmap::new_with_value(false, arr.len()))
174
                            .to_boxed()
175
                    })
176
                    .collect(),
177
            )
178
        };
179
    }
180

181
    let mut out;
182
    if !reverse {
183
        // TODO: efficient bitscan.
184
        let Some(first_false_idx) = ca.iter().position(|x| x == Some(false)) else {
185
            return ca.clone();
186
        };
187
        out = BitmapBuilder::with_capacity(ca.len());
188
        out.extend_constant(first_false_idx, true);
189
        out.extend_constant(ca.len() - first_false_idx, false);
190
    } else {
191
        // TODO: efficient bitscan.
192
        let Some(last_false_idx) = ca.iter().rposition(|x| x == Some(false)) else {
193
            return ca.clone();
194
        };
195
        out = BitmapBuilder::with_capacity(ca.len());
196
        out.extend_constant(last_false_idx + 1, false);
197
        out.extend_constant(ca.len() - 1 - last_false_idx, true);
198
    }
199

200
    let arr: BooleanArray = out.freeze().into();
201
    BooleanChunked::with_chunk_like(ca, arr.with_validity(ca.rechunk_validity()))
202
}
203

204
fn cum_sum_numeric<T>(
205
    ca: &ChunkedArray<T>,
206
    reverse: bool,
207
    init: Option<T::Native>,
208
) -> ChunkedArray<T>
209
where
210
    T: PolarsNumericType,
211
    ChunkedArray<T>: FromIterator<Option<T::Native>>,
212
{
213
    let init = init.unwrap_or(T::Native::zero());
214
    cum_scan_numeric(ca, reverse, init, det_sum)
215
}
216

217
fn cum_prod_numeric<T>(
218
    ca: &ChunkedArray<T>,
219
    reverse: bool,
220
    init: Option<T::Native>,
221
) -> ChunkedArray<T>
222
where
223
    T: PolarsNumericType,
224
    ChunkedArray<T>: FromIterator<Option<T::Native>>,
225
{
226
    let init = init.unwrap_or(T::Native::one());
227
    cum_scan_numeric(ca, reverse, init, det_prod)
228
}
229

230
pub fn cum_prod_with_init(
231
    s: &Series,
232
    reverse: bool,
233
    init: &AnyValue<'static>,
234
) -> PolarsResult<Series> {
235
    use DataType::*;
236
    let out = match s.dtype() {
237
        Boolean | Int8 | UInt8 | Int16 | UInt16 | Int32 | UInt32 => {
238
            let s = s.cast(&Int64)?;
239
            cum_prod_numeric(s.i64()?, reverse, init.extract()).into_series()
240
        },
241
        Int64 => cum_prod_numeric(s.i64()?, reverse, init.extract()).into_series(),
242
        UInt64 => cum_prod_numeric(s.u64()?, reverse, init.extract()).into_series(),
243
        #[cfg(feature = "dtype-i128")]
244
        Int128 => cum_prod_numeric(s.i128()?, reverse, init.extract()).into_series(),
245
        Float32 => cum_prod_numeric(s.f32()?, reverse, init.extract()).into_series(),
246
        Float64 => cum_prod_numeric(s.f64()?, reverse, init.extract()).into_series(),
247
        dt => polars_bail!(opq = cum_prod, dt),
248
    };
249
    Ok(out)
250
}
251

252
/// Get an array with the cumulative product computed at every element.
253
///
254
/// If the [`DataType`] is one of `{Int8, UInt8, Int16, UInt16, Int32, UInt32}` the `Series` is
255
/// first cast to `Int64` to prevent overflow issues.
256
pub fn cum_prod(s: &Series, reverse: bool) -> PolarsResult<Series> {
257
    cum_prod_with_init(s, reverse, &AnyValue::Null)
258
}
259

260
pub fn cum_sum_with_init(
261
    s: &Series,
262
    reverse: bool,
263
    init: &AnyValue<'static>,
264
) -> PolarsResult<Series> {
265
    use DataType::*;
266
    let out = match s.dtype() {
267
        Boolean => {
268
            let s = s.cast(&UInt32)?;
269
            cum_sum_numeric(s.u32()?, reverse, init.extract()).into_series()
270
        },
271
        Int8 | UInt8 | Int16 | UInt16 => {
272
            let s = s.cast(&Int64)?;
273
            cum_sum_numeric(s.i64()?, reverse, init.extract()).into_series()
274
        },
275
        Int32 => cum_sum_numeric(s.i32()?, reverse, init.extract()).into_series(),
276
        UInt32 => cum_sum_numeric(s.u32()?, reverse, init.extract()).into_series(),
277
        Int64 => cum_sum_numeric(s.i64()?, reverse, init.extract()).into_series(),
278
        UInt64 => cum_sum_numeric(s.u64()?, reverse, init.extract()).into_series(),
279
        #[cfg(feature = "dtype-i128")]
280
        Int128 => cum_sum_numeric(s.i128()?, reverse, init.extract()).into_series(),
281
        Float32 => cum_sum_numeric(s.f32()?, reverse, init.extract()).into_series(),
282
        Float64 => cum_sum_numeric(s.f64()?, reverse, init.extract()).into_series(),
283
        #[cfg(feature = "dtype-decimal")]
284
        Decimal(precision, scale) => {
285
            let ca = s.decimal().unwrap().physical();
286
            cum_sum_numeric(ca, reverse, init.clone().to_physical().extract())
287
                .into_decimal_unchecked(*precision, scale.unwrap())
288
                .into_series()
289
        },
290
        #[cfg(feature = "dtype-duration")]
291
        Duration(tu) => {
292
            let s = s.to_physical_repr();
293
            let ca = s.i64()?;
294
            cum_sum_numeric(ca, reverse, init.extract()).cast(&Duration(*tu))?
295
        },
296
        dt => polars_bail!(opq = cum_sum, dt),
297
    };
298
    Ok(out)
299
}
300

301
/// Get an array with the cumulative sum computed at every element
302
///
303
/// If the [`DataType`] is one of `{Int8, UInt8, Int16, UInt16}` the `Series` is
304
/// first cast to `Int64` to prevent overflow issues.
305
pub fn cum_sum(s: &Series, reverse: bool) -> PolarsResult<Series> {
306
    cum_sum_with_init(s, reverse, &AnyValue::Null)
307
}
308

309
pub fn cum_min_with_init(
310
    s: &Series,
311
    reverse: bool,
312
    init: &AnyValue<'static>,
313
) -> PolarsResult<Series> {
314
    match s.dtype() {
315
        DataType::Boolean => {
316
            Ok(cum_min_bool(s.bool()?, reverse, init.extract_bool()).into_series())
317
        },
318
        #[cfg(feature = "dtype-decimal")]
319
        DataType::Decimal(precision, scale) => {
320
            let ca = s.decimal().unwrap().physical();
321
            let out = cum_min_numeric(ca, reverse, init.clone().to_physical().extract())
322
                .into_decimal_unchecked(*precision, scale.unwrap())
323
                .into_series();
324
            Ok(out)
325
        },
326
        dt if dt.to_physical().is_primitive_numeric() => {
327
            let s = s.to_physical_repr();
328
            with_match_physical_numeric_polars_type!(s.dtype(), |$T| {
329
                let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref();
330
                let out = cum_min_numeric(ca, reverse, init.extract()).into_series();
331
                if dt.is_logical() {
332
                    out.cast(dt)
333
                } else {
334
                    Ok(out)
335
                }
336
            })
337
        },
338
        dt => polars_bail!(opq = cum_min, dt),
339
    }
340
}
341

342
/// Get an array with the cumulative min computed at every element.
343
pub fn cum_min(s: &Series, reverse: bool) -> PolarsResult<Series> {
344
    cum_min_with_init(s, reverse, &AnyValue::Null)
345
}
346

347
pub fn cum_max_with_init(
348
    s: &Series,
349
    reverse: bool,
350
    init: &AnyValue<'static>,
351
) -> PolarsResult<Series> {
352
    match s.dtype() {
353
        DataType::Boolean => {
354
            Ok(cum_max_bool(s.bool()?, reverse, init.extract_bool()).into_series())
355
        },
356
        #[cfg(feature = "dtype-decimal")]
357
        DataType::Decimal(precision, scale) => {
358
            let ca = s.decimal().unwrap().physical();
359
            let out = cum_max_numeric(ca, reverse, init.clone().to_physical().extract())
360
                .into_decimal_unchecked(*precision, scale.unwrap())
361
                .into_series();
362
            Ok(out)
363
        },
364
        dt if dt.to_physical().is_primitive_numeric() => {
365
            let s = s.to_physical_repr();
366
            with_match_physical_numeric_polars_type!(s.dtype(), |$T| {
367
                let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref();
368
                let out = cum_max_numeric(ca, reverse, init.extract()).into_series();
369
                if dt.is_logical() {
370
                    out.cast(dt)
371
                } else {
372
                    Ok(out)
373
                }
374
            })
375
        },
376
        dt => polars_bail!(opq = cum_max, dt),
377
    }
378
}
379

380
/// Get an array with the cumulative max computed at every element.
381
pub fn cum_max(s: &Series, reverse: bool) -> PolarsResult<Series> {
382
    cum_max_with_init(s, reverse, &AnyValue::Null)
383
}
384

385
pub fn cum_count(s: &Series, reverse: bool) -> PolarsResult<Series> {
386
    cum_count_with_init(s, reverse, 0)
387
}
388

389
pub fn cum_count_with_init(s: &Series, reverse: bool, init: IdxSize) -> PolarsResult<Series> {
390
    let mut out = if s.null_count() == 0 {
391
        // Fast paths for no nulls
392
        cum_count_no_nulls(s.name().clone(), s.len(), reverse, init)
393
    } else {
394
        let ca = s.is_not_null();
395
        let out: IdxCa = if reverse {
396
            let mut count = init + (s.len() - s.null_count()) as IdxSize;
397
            let mut prev = false;
398
            unary_elementwise_values(&ca, |v: bool| {
399
                if prev {
400
                    count -= 1;
401
                }
402
                prev = v;
403
                count
404
            })
405
        } else {
406
            let mut count = init;
407
            unary_elementwise_values(&ca, |v: bool| {
408
                if v {
409
                    count += 1;
410
                }
411
                count
412
            })
413
        };
414

415
        out.into()
416
    };
417

418
    out.set_sorted_flag([IsSorted::Ascending, IsSorted::Descending][reverse as usize]);
419

420
    Ok(out)
421
}
422

423
fn cum_count_no_nulls(name: PlSmallStr, len: usize, reverse: bool, init: IdxSize) -> Series {
424
    let start = 1 as IdxSize;
425
    let end = len as IdxSize + 1;
426
    let ca: NoNull<IdxCa> = if reverse {
427
        (start..end).rev().map(|v| v + init).collect()
428
    } else {
429
        (start..end).map(|v| v + init).collect()
430
    };
431
    let mut ca = ca.into_inner();
432
    ca.rename(name);
433
    ca.into_series()
434
}
435

436
Product

Resources

Company