Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-ops/src/series/ops/rle.rs
6939 views
1
use polars_core::prelude::*;
2
use polars_core::series::IsSorted;
3
4
pub static RLE_VALUE_COLUMN_NAME: &str = "value";
5
pub static RLE_LENGTH_COLUMN_NAME: &str = "len";
6
7
/// Get the run-Lengths of values.
8
pub fn rle_lengths(s: &Column, lengths: &mut Vec<IdxSize>) -> PolarsResult<()> {
9
lengths.clear();
10
if s.is_empty() {
11
return Ok(());
12
}
13
14
if let Some(sc) = s.as_scalar_column() {
15
lengths.push(sc.len() as IdxSize);
16
return Ok(());
17
}
18
19
let (s1, s2) = (s.slice(0, s.len() - 1), s.slice(1, s.len()));
20
let s_neq = s1
21
.as_materialized_series()
22
.not_equal_missing(s2.as_materialized_series())?;
23
let n_runs = s_neq.sum().unwrap() + 1;
24
25
lengths.reserve(n_runs as usize);
26
lengths.push(1);
27
28
assert!(!s_neq.has_nulls());
29
for arr in s_neq.downcast_iter() {
30
let mut values = arr.values().clone();
31
while !values.is_empty() {
32
// @NOTE: This `as IdxSize` is safe because it is less than or equal to the a ChunkedArray
33
// length.
34
*lengths.last_mut().unwrap() += values.take_leading_zeros() as IdxSize;
35
36
if !values.is_empty() {
37
lengths.push(1);
38
values.slice(1, values.len() - 1);
39
}
40
}
41
}
42
Ok(())
43
}
44
45
/// Get the lengths of runs of identical values.
46
pub fn rle(s: &Column) -> PolarsResult<Column> {
47
let mut lengths = Vec::new();
48
rle_lengths(s, &mut lengths)?;
49
50
let mut idxs = Vec::with_capacity(lengths.len());
51
if !lengths.is_empty() {
52
idxs.push(0);
53
for length in &lengths[..lengths.len() - 1] {
54
idxs.push(*idxs.last().unwrap() + length);
55
}
56
}
57
58
let vals = s
59
.take_slice(&idxs)
60
.unwrap()
61
.with_name(PlSmallStr::from_static(RLE_VALUE_COLUMN_NAME));
62
let outvals = vec![
63
Series::from_vec(PlSmallStr::from_static(RLE_LENGTH_COLUMN_NAME), lengths).into(),
64
vals,
65
];
66
Ok(StructChunked::from_columns(s.name().clone(), idxs.len(), &outvals)?.into_column())
67
}
68
69
/// Similar to `rle`, but maps values to run IDs.
70
pub fn rle_id(s: &Column) -> PolarsResult<Column> {
71
if s.is_empty() {
72
return Ok(Column::new_empty(s.name().clone(), &IDX_DTYPE));
73
}
74
75
let (s1, s2) = (s.slice(0, s.len() - 1), s.slice(1, s.len()));
76
let s_neq = s1
77
.as_materialized_series()
78
.not_equal_missing(s2.as_materialized_series())?;
79
80
let mut out = Vec::<IdxSize>::with_capacity(s.len());
81
let mut last = 0;
82
out.push(last); // Run numbers start at zero
83
assert_eq!(s_neq.null_count(), 0);
84
for a in s_neq.downcast_iter() {
85
for aa in a.values_iter() {
86
last += aa as IdxSize;
87
out.push(last);
88
}
89
}
90
Ok(IdxCa::from_vec(s.name().clone(), out)
91
.with_sorted_flag(IsSorted::Ascending)
92
.into_column())
93
}
94
95