Path: blob/main/crates/polars-compute/src/unique/boolean.rs
6939 views
use arrow::array::{Array, BooleanArray};1use arrow::bitmap::BitmapBuilder;2use arrow::datatypes::ArrowDataType;34use super::{GenericUniqueKernel, RangedUniqueKernel};56#[derive(Default, Clone)]7pub struct BooleanUniqueKernelState {8seen: u32,9}1011impl BooleanUniqueKernelState {12pub fn new() -> Self {13Self::default()14}15}1617impl RangedUniqueKernel for BooleanUniqueKernelState {18type Array = BooleanArray;1920fn has_seen_all(&self) -> bool {21self.seen == 0b11122}2324fn append(&mut self, array: &Self::Array) {25if array.len() == 0 {26return;27}2829let null_count = array.null_count();30self.seen |= u32::from(null_count > 0) << 2;31let num_trues = if null_count > 0 {32array33.values()34.num_intersections_with(array.validity().unwrap())35} else {36array.values().set_bits()37};3839self.seen |= u32::from(num_trues != array.len() - null_count);40self.seen |= u32::from(num_trues != 0) << 1;41}4243fn append_state(&mut self, other: &Self) {44self.seen |= other.seen;45}4647fn finalize_unique(self) -> Self::Array {48let mut values = BitmapBuilder::with_capacity(self.seen.count_ones() as usize);4950if self.seen & 0b001 != 0 {51values.push(false);52}53if self.seen & 0b010 != 0 {54values.push(true);55}56let validity = if self.seen & 0b100 != 0 {57let mut validity = BitmapBuilder::with_capacity(values.len() + 1);58validity.extend_constant(values.len(), true);59validity.push(false);60values.push(false);61Some(validity.freeze())62} else {63None64};6566let values = values.freeze();67BooleanArray::new(ArrowDataType::Boolean, values, validity)68}6970fn finalize_n_unique(&self) -> usize {71self.seen.count_ones() as usize72}7374fn finalize_n_unique_non_null(&self) -> usize {75(self.seen & 0b011).count_ones() as usize76}77}7879impl GenericUniqueKernel for BooleanArray {80fn unique(&self) -> Self {81let mut state = BooleanUniqueKernelState::new();82state.append(self);83state.finalize_unique()84}8586fn n_unique(&self) -> usize {87let mut state = BooleanUniqueKernelState::new();88state.append(self);89state.finalize_n_unique()90}9192fn n_unique_non_null(&self) -> usize {93let mut state = BooleanUniqueKernelState::new();94state.append(self);95state.finalize_n_unique_non_null()96}97}9899#[cfg(test)]100mod tests {101use arrow::array::{BooleanArray, MutableBooleanArray, boolean_array};102use proptest::prelude::*;103104use super::*;105106#[test]107fn test_boolean_distinct_count() {108use arrow::bitmap::Bitmap;109use arrow::datatypes::ArrowDataType;110111macro_rules! assert_bool_dc {112($values:expr, $validity:expr => $dc:expr) => {113let validity: Option<Bitmap> =114<Option<Vec<bool>>>::map($validity, |v| Bitmap::from_iter(v));115let arr =116BooleanArray::new(ArrowDataType::Boolean, Bitmap::from_iter($values), validity);117assert_eq!(arr.n_unique(), $dc);118};119}120121assert_bool_dc!(vec![], None => 0);122assert_bool_dc!(vec![], Some(vec![]) => 0);123assert_bool_dc!(vec![true], None => 1);124assert_bool_dc!(vec![true], Some(vec![true]) => 1);125assert_bool_dc!(vec![true], Some(vec![false]) => 1);126assert_bool_dc!(vec![true, false], None => 2);127assert_bool_dc!(vec![true, false, false], None => 2);128assert_bool_dc!(vec![true, false, false], Some(vec![true, true, false]) => 3);129130// Copied from https://github.com/pola-rs/polars/pull/16765#discussion_r1629426159131assert_bool_dc!(vec![true, true, true, true, true], Some(vec![true, false, true, false, false]) => 2);132assert_bool_dc!(vec![false, true, false, true, true], Some(vec![true, false, true, false, false]) => 2);133assert_bool_dc!(vec![true, false, true, false, true, true], Some(vec![true, true, false, true, false, false]) => 3);134}135136proptest! {137#[test]138fn test_proptest(array in boolean_array(0..100)) {139let mut state = BooleanUniqueKernelState::new();140state.append(&array);141142let mut has_none = false;143let mut has_false = false;144let mut has_true = false;145for v in array.iter() {146match v {147None => has_none |= true,148Some(false) => has_false |= true,149Some(true) => has_true |= true,150}151}152153let mut unique = MutableBooleanArray::new();154if has_false {155unique.push(Some(false));156}157if has_true {158unique.push(Some(true));159}160if has_none {161unique.push(None);162}163let unique = unique.freeze();164165assert_eq!(state.clone().finalize_unique(), unique);166assert_eq!(state.clone().finalize_n_unique(), unique.len());167assert_eq!(state.clone().finalize_n_unique_non_null(), unique.len() - usize::from(has_none));168}169}170}171172173