Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-compute/src/unique/boolean.rs
6939 views
1
use arrow::array::{Array, BooleanArray};
2
use arrow::bitmap::BitmapBuilder;
3
use arrow::datatypes::ArrowDataType;
4
5
use super::{GenericUniqueKernel, RangedUniqueKernel};
6
7
#[derive(Default, Clone)]
8
pub struct BooleanUniqueKernelState {
9
seen: u32,
10
}
11
12
impl BooleanUniqueKernelState {
13
pub fn new() -> Self {
14
Self::default()
15
}
16
}
17
18
impl RangedUniqueKernel for BooleanUniqueKernelState {
19
type Array = BooleanArray;
20
21
fn has_seen_all(&self) -> bool {
22
self.seen == 0b111
23
}
24
25
fn append(&mut self, array: &Self::Array) {
26
if array.len() == 0 {
27
return;
28
}
29
30
let null_count = array.null_count();
31
self.seen |= u32::from(null_count > 0) << 2;
32
let num_trues = if null_count > 0 {
33
array
34
.values()
35
.num_intersections_with(array.validity().unwrap())
36
} else {
37
array.values().set_bits()
38
};
39
40
self.seen |= u32::from(num_trues != array.len() - null_count);
41
self.seen |= u32::from(num_trues != 0) << 1;
42
}
43
44
fn append_state(&mut self, other: &Self) {
45
self.seen |= other.seen;
46
}
47
48
fn finalize_unique(self) -> Self::Array {
49
let mut values = BitmapBuilder::with_capacity(self.seen.count_ones() as usize);
50
51
if self.seen & 0b001 != 0 {
52
values.push(false);
53
}
54
if self.seen & 0b010 != 0 {
55
values.push(true);
56
}
57
let validity = if self.seen & 0b100 != 0 {
58
let mut validity = BitmapBuilder::with_capacity(values.len() + 1);
59
validity.extend_constant(values.len(), true);
60
validity.push(false);
61
values.push(false);
62
Some(validity.freeze())
63
} else {
64
None
65
};
66
67
let values = values.freeze();
68
BooleanArray::new(ArrowDataType::Boolean, values, validity)
69
}
70
71
fn finalize_n_unique(&self) -> usize {
72
self.seen.count_ones() as usize
73
}
74
75
fn finalize_n_unique_non_null(&self) -> usize {
76
(self.seen & 0b011).count_ones() as usize
77
}
78
}
79
80
impl GenericUniqueKernel for BooleanArray {
81
fn unique(&self) -> Self {
82
let mut state = BooleanUniqueKernelState::new();
83
state.append(self);
84
state.finalize_unique()
85
}
86
87
fn n_unique(&self) -> usize {
88
let mut state = BooleanUniqueKernelState::new();
89
state.append(self);
90
state.finalize_n_unique()
91
}
92
93
fn n_unique_non_null(&self) -> usize {
94
let mut state = BooleanUniqueKernelState::new();
95
state.append(self);
96
state.finalize_n_unique_non_null()
97
}
98
}
99
100
#[cfg(test)]
101
mod tests {
102
use arrow::array::{BooleanArray, MutableBooleanArray, boolean_array};
103
use proptest::prelude::*;
104
105
use super::*;
106
107
#[test]
108
fn test_boolean_distinct_count() {
109
use arrow::bitmap::Bitmap;
110
use arrow::datatypes::ArrowDataType;
111
112
macro_rules! assert_bool_dc {
113
($values:expr, $validity:expr => $dc:expr) => {
114
let validity: Option<Bitmap> =
115
<Option<Vec<bool>>>::map($validity, |v| Bitmap::from_iter(v));
116
let arr =
117
BooleanArray::new(ArrowDataType::Boolean, Bitmap::from_iter($values), validity);
118
assert_eq!(arr.n_unique(), $dc);
119
};
120
}
121
122
assert_bool_dc!(vec![], None => 0);
123
assert_bool_dc!(vec![], Some(vec![]) => 0);
124
assert_bool_dc!(vec![true], None => 1);
125
assert_bool_dc!(vec![true], Some(vec![true]) => 1);
126
assert_bool_dc!(vec![true], Some(vec![false]) => 1);
127
assert_bool_dc!(vec![true, false], None => 2);
128
assert_bool_dc!(vec![true, false, false], None => 2);
129
assert_bool_dc!(vec![true, false, false], Some(vec![true, true, false]) => 3);
130
131
// Copied from https://github.com/pola-rs/polars/pull/16765#discussion_r1629426159
132
assert_bool_dc!(vec![true, true, true, true, true], Some(vec![true, false, true, false, false]) => 2);
133
assert_bool_dc!(vec![false, true, false, true, true], Some(vec![true, false, true, false, false]) => 2);
134
assert_bool_dc!(vec![true, false, true, false, true, true], Some(vec![true, true, false, true, false, false]) => 3);
135
}
136
137
proptest! {
138
#[test]
139
fn test_proptest(array in boolean_array(0..100)) {
140
let mut state = BooleanUniqueKernelState::new();
141
state.append(&array);
142
143
let mut has_none = false;
144
let mut has_false = false;
145
let mut has_true = false;
146
for v in array.iter() {
147
match v {
148
None => has_none |= true,
149
Some(false) => has_false |= true,
150
Some(true) => has_true |= true,
151
}
152
}
153
154
let mut unique = MutableBooleanArray::new();
155
if has_false {
156
unique.push(Some(false));
157
}
158
if has_true {
159
unique.push(Some(true));
160
}
161
if has_none {
162
unique.push(None);
163
}
164
let unique = unique.freeze();
165
166
assert_eq!(state.clone().finalize_unique(), unique);
167
assert_eq!(state.clone().finalize_n_unique(), unique.len());
168
assert_eq!(state.clone().finalize_n_unique_non_null(), unique.len() - usize::from(has_none));
169
}
170
}
171
}
172
173