Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-ops/src/series/ops/is_last_distinct.rs
6939 views
1
use std::hash::Hash;
2
3
use arrow::array::BooleanArray;
4
use arrow::bitmap::MutableBitmap;
5
use arrow::legacy::utils::CustomIterTools;
6
use polars_core::prelude::*;
7
use polars_core::utils::NoNull;
8
use polars_core::with_match_physical_numeric_polars_type;
9
use polars_utils::total_ord::{ToTotalOrd, TotalEq, TotalHash};
10
11
pub fn is_last_distinct(s: &Series) -> PolarsResult<BooleanChunked> {
12
// fast path.
13
if s.is_empty() {
14
return Ok(BooleanChunked::full_null(s.name().clone(), 0));
15
} else if s.len() == 1 {
16
return Ok(BooleanChunked::new(s.name().clone(), &[true]));
17
}
18
19
let s = s.to_physical_repr();
20
21
use DataType::*;
22
let out = match s.dtype() {
23
Boolean => {
24
let ca = s.bool().unwrap();
25
is_last_distinct_boolean(ca)
26
},
27
Binary => {
28
let ca = s.binary().unwrap();
29
is_last_distinct_bin(ca)
30
},
31
String => {
32
let s = s.cast(&Binary).unwrap();
33
return is_last_distinct(&s);
34
},
35
dt if dt.is_primitive_numeric() => {
36
with_match_physical_numeric_polars_type!(s.dtype(), |$T| {
37
let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref();
38
is_last_distinct_numeric(ca)
39
})
40
},
41
#[cfg(feature = "dtype-struct")]
42
Struct(_) => return is_last_distinct_struct(&s),
43
List(inner) => {
44
polars_ensure!(
45
!inner.is_nested(),
46
InvalidOperation: "`is_last_distinct` on list type is only allowed if the inner type is not nested."
47
);
48
let ca = s.list().unwrap();
49
return is_last_distinct_list(ca);
50
},
51
dt => polars_bail!(opq = is_last_distinct, dt),
52
};
53
Ok(out)
54
}
55
56
fn is_last_distinct_boolean(ca: &BooleanChunked) -> BooleanChunked {
57
let mut out = MutableBitmap::with_capacity(ca.len());
58
out.extend_constant(ca.len(), false);
59
60
if ca.null_count() == ca.len() {
61
out.set(ca.len() - 1, true);
62
}
63
// TODO supports fast path.
64
else {
65
let mut first_true_found = false;
66
let mut first_false_found = false;
67
let mut first_null_found = false;
68
let mut all_found = false;
69
let ca = ca.rechunk();
70
ca.downcast_as_array()
71
.iter()
72
.enumerate()
73
.rev()
74
.find_map(|(idx, val)| match val {
75
Some(true) if !first_true_found => {
76
first_true_found = true;
77
all_found &= first_true_found;
78
out.set(idx, true);
79
if all_found { Some(()) } else { None }
80
},
81
Some(false) if !first_false_found => {
82
first_false_found = true;
83
all_found &= first_false_found;
84
out.set(idx, true);
85
if all_found { Some(()) } else { None }
86
},
87
None if !first_null_found => {
88
first_null_found = true;
89
all_found &= first_null_found;
90
out.set(idx, true);
91
if all_found { Some(()) } else { None }
92
},
93
_ => None,
94
});
95
}
96
97
let arr = BooleanArray::new(ArrowDataType::Boolean, out.into(), None);
98
BooleanChunked::with_chunk(ca.name().clone(), arr)
99
}
100
101
fn is_last_distinct_bin(ca: &BinaryChunked) -> BooleanChunked {
102
let tmp = ca.rechunk();
103
let arr = tmp.downcast_as_array();
104
let mut unique = PlHashSet::new();
105
arr.iter()
106
.rev()
107
.map(|opt_v| unique.insert(opt_v))
108
.collect_reversed::<NoNull<BooleanChunked>>()
109
.into_inner()
110
.with_name(ca.name().clone())
111
}
112
113
fn is_last_distinct_numeric<T>(ca: &ChunkedArray<T>) -> BooleanChunked
114
where
115
T: PolarsNumericType,
116
T::Native: TotalHash + TotalEq + ToTotalOrd,
117
<T::Native as ToTotalOrd>::TotalOrdItem: Hash + Eq,
118
{
119
let tmp = ca.rechunk();
120
let arr = tmp.downcast_as_array();
121
let mut unique = PlHashSet::new();
122
arr.iter()
123
.rev()
124
.map(|opt_v| unique.insert(opt_v.to_total_ord()))
125
.collect_reversed::<NoNull<BooleanChunked>>()
126
.into_inner()
127
.with_name(ca.name().clone())
128
}
129
130
#[cfg(feature = "dtype-struct")]
131
fn is_last_distinct_struct(s: &Series) -> PolarsResult<BooleanChunked> {
132
let groups = s.group_tuples(true, false)?;
133
// SAFETY: all groups have at least a single member
134
let last = unsafe { groups.take_group_lasts() };
135
let mut out = MutableBitmap::with_capacity(s.len());
136
out.extend_constant(s.len(), false);
137
138
for idx in last {
139
// Group tuples are always in bounds
140
unsafe { out.set_unchecked(idx as usize, true) }
141
}
142
143
let arr = BooleanArray::new(ArrowDataType::Boolean, out.into(), None);
144
Ok(BooleanChunked::with_chunk(s.name().clone(), arr))
145
}
146
147
fn is_last_distinct_list(ca: &ListChunked) -> PolarsResult<BooleanChunked> {
148
let groups = ca.group_tuples(true, false)?;
149
// SAFETY: all groups have at least a single member
150
let last = unsafe { groups.take_group_lasts() };
151
let mut out = MutableBitmap::with_capacity(ca.len());
152
out.extend_constant(ca.len(), false);
153
154
for idx in last {
155
// Group tuples are always in bounds
156
unsafe { out.set_unchecked(idx as usize, true) }
157
}
158
159
let arr = BooleanArray::new(ArrowDataType::Boolean, out.into(), None);
160
Ok(BooleanChunked::with_chunk(ca.name().clone(), arr))
161
}
162
163