Path: blob/main/crates/polars-expr/src/reduce/any_all.rs
6940 views
use arrow::array::BooleanArray;1use arrow::bitmap::binary_assign_mut;23use super::*;45pub fn new_any_reduction(ignore_nulls: bool) -> Box<dyn GroupedReduction> {6if ignore_nulls {7Box::new(AnyIgnoreNullGroupedReduction::default())8} else {9Box::new(AnyKleeneNullGroupedReduction::default())10}11}1213pub fn new_all_reduction(ignore_nulls: bool) -> Box<dyn GroupedReduction> {14if ignore_nulls {15Box::new(AllIgnoreNullGroupedReduction::default())16} else {17Box::new(AllKleeneNullGroupedReduction::default())18}19}2021#[derive(Default)]22struct AnyIgnoreNullGroupedReduction {23values: MutableBitmap,24evicted_values: BitmapBuilder,25}2627impl GroupedReduction for AnyIgnoreNullGroupedReduction {28fn new_empty(&self) -> Box<dyn GroupedReduction> {29Box::new(Self::default())30}3132fn reserve(&mut self, additional: usize) {33self.values.reserve(additional);34}3536fn resize(&mut self, num_groups: IdxSize) {37self.values.resize(num_groups as usize, false);38}3940fn update_group(41&mut self,42values: &Column,43group_idx: IdxSize,44_seq_id: u64,45) -> PolarsResult<()> {46assert!(values.dtype() == &DataType::Boolean);47let values = values.as_materialized_series_maintain_scalar();48let ca: &BooleanChunked = values.as_ref().as_ref();49if ca.any() {50self.values.set(group_idx as usize, true);51}52Ok(())53}5455unsafe fn update_groups_while_evicting(56&mut self,57values: &Column,58subset: &[IdxSize],59group_idxs: &[EvictIdx],60_seq_id: u64,61) -> PolarsResult<()> {62assert!(values.dtype() == &DataType::Boolean);63assert!(subset.len() == group_idxs.len());64let values = values.as_materialized_series(); // @scalar-opt65let ca: &BooleanChunked = values.as_ref().as_ref();66let arr = ca.downcast_as_array();67unsafe {68// SAFETY: indices are in-bounds guaranteed by trait.69for (i, g) in subset.iter().zip(group_idxs) {70let ov = arr.get_unchecked(*i as usize);71if g.should_evict() {72self.evicted_values.push(self.values.get_unchecked(g.idx()));73self.values.set_unchecked(g.idx(), ov.unwrap_or(false));74} else {75self.values.or_pos_unchecked(g.idx(), ov.unwrap_or(false));76}77}78}79Ok(())80}8182unsafe fn combine_subset(83&mut self,84other: &dyn GroupedReduction,85subset: &[IdxSize],86group_idxs: &[IdxSize],87) -> PolarsResult<()> {88let other = other.as_any().downcast_ref::<Self>().unwrap();89assert!(subset.len() == group_idxs.len());90unsafe {91// SAFETY: indices are in-bounds guaranteed by trait.92for (i, g) in subset.iter().zip(group_idxs) {93self.values94.or_pos_unchecked(*g as usize, other.values.get_unchecked(*i as usize));95}96}97Ok(())98}99100fn take_evictions(&mut self) -> Box<dyn GroupedReduction> {101Box::new(Self {102values: core::mem::take(&mut self.evicted_values).into_mut(),103evicted_values: BitmapBuilder::new(),104})105}106107fn finalize(&mut self) -> PolarsResult<Series> {108let v = core::mem::take(&mut self.values);109let arr = BooleanArray::from(v.freeze());110Ok(Series::from_array(PlSmallStr::EMPTY, arr))111}112113fn as_any(&self) -> &dyn Any {114self115}116}117118#[derive(Default)]119struct AllIgnoreNullGroupedReduction {120values: MutableBitmap,121evicted_values: BitmapBuilder,122}123124impl GroupedReduction for AllIgnoreNullGroupedReduction {125fn new_empty(&self) -> Box<dyn GroupedReduction> {126Box::new(Self::default())127}128129fn reserve(&mut self, additional: usize) {130self.values.reserve(additional);131}132133fn resize(&mut self, num_groups: IdxSize) {134self.values.resize(num_groups as usize, true);135}136137fn update_group(138&mut self,139values: &Column,140group_idx: IdxSize,141_seq_id: u64,142) -> PolarsResult<()> {143assert!(values.dtype() == &DataType::Boolean);144let values = values.as_materialized_series_maintain_scalar();145let ca: &BooleanChunked = values.as_ref().as_ref();146if !ca.all() {147self.values.set(group_idx as usize, false);148}149Ok(())150}151152unsafe fn update_groups_while_evicting(153&mut self,154values: &Column,155subset: &[IdxSize],156group_idxs: &[EvictIdx],157_seq_id: u64,158) -> PolarsResult<()> {159assert!(values.dtype() == &DataType::Boolean);160assert!(subset.len() == group_idxs.len());161let values = values.as_materialized_series(); // @scalar-opt162let ca: &BooleanChunked = values.as_ref().as_ref();163let arr = ca.downcast_as_array();164unsafe {165// SAFETY: indices are in-bounds guaranteed by trait.166for (i, g) in subset.iter().zip(group_idxs) {167let ov = arr.get_unchecked(*i as usize);168if g.should_evict() {169self.evicted_values.push(self.values.get_unchecked(g.idx()));170self.values.set_unchecked(g.idx(), ov.unwrap_or(true));171} else {172self.values.and_pos_unchecked(g.idx(), ov.unwrap_or(true));173}174}175}176Ok(())177}178179unsafe fn combine_subset(180&mut self,181other: &dyn GroupedReduction,182subset: &[IdxSize],183group_idxs: &[IdxSize],184) -> PolarsResult<()> {185let other = other.as_any().downcast_ref::<Self>().unwrap();186assert!(subset.len() == group_idxs.len());187unsafe {188// SAFETY: indices are in-bounds guaranteed by trait.189for (i, g) in subset.iter().zip(group_idxs) {190self.values191.and_pos_unchecked(*g as usize, other.values.get_unchecked(*i as usize));192}193}194Ok(())195}196197fn take_evictions(&mut self) -> Box<dyn GroupedReduction> {198Box::new(Self {199values: core::mem::take(&mut self.evicted_values).into_mut(),200evicted_values: BitmapBuilder::new(),201})202}203204fn finalize(&mut self) -> PolarsResult<Series> {205let v = core::mem::take(&mut self.values);206let arr = BooleanArray::from(v.freeze());207Ok(Series::from_array(PlSmallStr::EMPTY, arr))208}209210fn as_any(&self) -> &dyn Any {211self212}213}214215#[derive(Default)]216struct AnyKleeneNullGroupedReduction {217seen_true: MutableBitmap,218seen_null: MutableBitmap,219evicted_values: BitmapBuilder,220evicted_mask: BitmapBuilder,221}222223impl GroupedReduction for AnyKleeneNullGroupedReduction {224fn new_empty(&self) -> Box<dyn GroupedReduction> {225Box::new(Self::default())226}227228fn reserve(&mut self, additional: usize) {229self.seen_true.reserve(additional);230self.seen_null.reserve(additional)231}232233fn resize(&mut self, num_groups: IdxSize) {234self.seen_true.resize(num_groups as usize, false);235self.seen_null.resize(num_groups as usize, false);236}237238fn update_group(239&mut self,240values: &Column,241group_idx: IdxSize,242_seq_id: u64,243) -> PolarsResult<()> {244assert!(values.dtype() == &DataType::Boolean);245let values = values.as_materialized_series_maintain_scalar();246let ca: &BooleanChunked = values.as_ref().as_ref();247if ca.any() {248self.seen_true.set(group_idx as usize, true);249}250if ca.len() != ca.null_count() {251self.seen_null.set(group_idx as usize, true);252}253Ok(())254}255256unsafe fn update_groups_while_evicting(257&mut self,258values: &Column,259subset: &[IdxSize],260group_idxs: &[EvictIdx],261_seq_id: u64,262) -> PolarsResult<()> {263assert!(values.dtype() == &DataType::Boolean);264assert!(subset.len() == group_idxs.len());265let values = values.as_materialized_series(); // @scalar-opt266let ca: &BooleanChunked = values.as_ref().as_ref();267let arr = ca.downcast_as_array();268unsafe {269// SAFETY: indices are in-bounds guaranteed by trait.270for (i, g) in subset.iter().zip(group_idxs) {271let ov = arr.get_unchecked(*i as usize);272if g.should_evict() {273self.evicted_values274.push(self.seen_true.get_unchecked(g.idx()));275self.evicted_mask276.push(self.seen_null.get_unchecked(g.idx()));277self.seen_true.set_unchecked(g.idx(), ov.unwrap_or(false));278self.seen_null.set_unchecked(g.idx(), ov.is_none());279} else {280self.seen_true281.or_pos_unchecked(g.idx(), ov.unwrap_or(false));282self.seen_null.or_pos_unchecked(g.idx(), ov.is_none());283}284}285}286Ok(())287}288289unsafe fn combine_subset(290&mut self,291other: &dyn GroupedReduction,292subset: &[IdxSize],293group_idxs: &[IdxSize],294) -> PolarsResult<()> {295let other = other.as_any().downcast_ref::<Self>().unwrap();296assert!(subset.len() == group_idxs.len());297unsafe {298// SAFETY: indices are in-bounds guaranteed by trait.299for (i, g) in subset.iter().zip(group_idxs) {300self.seen_true301.or_pos_unchecked(*g as usize, other.seen_true.get_unchecked(*i as usize));302self.seen_null303.or_pos_unchecked(*g as usize, other.seen_null.get_unchecked(*i as usize));304}305}306Ok(())307}308309fn take_evictions(&mut self) -> Box<dyn GroupedReduction> {310Box::new(Self {311seen_true: core::mem::take(&mut self.evicted_values).into_mut(),312seen_null: core::mem::take(&mut self.evicted_mask).into_mut(),313evicted_values: BitmapBuilder::new(),314evicted_mask: BitmapBuilder::new(),315})316}317318fn finalize(&mut self) -> PolarsResult<Series> {319let seen_true = core::mem::take(&mut self.seen_true);320let mut mask = core::mem::take(&mut self.seen_null);321binary_assign_mut(&mut mask, &seen_true, |mi: u64, ti: u64| mi & !ti);322let arr = BooleanArray::from(seen_true.freeze())323.with_validity(Some(mask.freeze()))324.boxed();325Ok(unsafe {326Series::from_chunks_and_dtype_unchecked(327PlSmallStr::EMPTY,328vec![arr],329&DataType::Boolean,330)331})332}333334fn as_any(&self) -> &dyn Any {335self336}337}338339#[derive(Default)]340struct AllKleeneNullGroupedReduction {341seen_false: MutableBitmap,342seen_null: MutableBitmap,343evicted_values: BitmapBuilder,344evicted_mask: BitmapBuilder,345}346347impl GroupedReduction for AllKleeneNullGroupedReduction {348fn new_empty(&self) -> Box<dyn GroupedReduction> {349Box::new(Self::default())350}351352fn reserve(&mut self, additional: usize) {353self.seen_false.reserve(additional);354self.seen_null.reserve(additional)355}356357fn resize(&mut self, num_groups: IdxSize) {358self.seen_false.resize(num_groups as usize, false);359self.seen_null.resize(num_groups as usize, false);360}361362fn update_group(363&mut self,364values: &Column,365group_idx: IdxSize,366_seq_id: u64,367) -> PolarsResult<()> {368assert!(values.dtype() == &DataType::Boolean);369let values = values.as_materialized_series_maintain_scalar();370let ca: &BooleanChunked = values.as_ref().as_ref();371if !ca.all() {372self.seen_false.set(group_idx as usize, true);373}374if ca.len() != ca.null_count() {375self.seen_null.set(group_idx as usize, true);376}377Ok(())378}379380unsafe fn update_groups_while_evicting(381&mut self,382values: &Column,383subset: &[IdxSize],384group_idxs: &[EvictIdx],385_seq_id: u64,386) -> PolarsResult<()> {387assert!(values.dtype() == &DataType::Boolean);388assert!(subset.len() == group_idxs.len());389let values = values.as_materialized_series(); // @scalar-opt390let ca: &BooleanChunked = values.as_ref().as_ref();391let arr = ca.downcast_as_array();392unsafe {393// SAFETY: indices are in-bounds guaranteed by trait.394for (i, g) in subset.iter().zip(group_idxs) {395let ov = arr.get_unchecked(*i as usize);396if g.should_evict() {397self.evicted_values398.push(self.seen_false.get_unchecked(g.idx()));399self.evicted_mask400.push(self.seen_null.get_unchecked(g.idx()));401self.seen_false.set_unchecked(g.idx(), !ov.unwrap_or(true));402self.seen_null.set_unchecked(g.idx(), ov.is_none());403} else {404self.seen_false405.or_pos_unchecked(g.idx(), !ov.unwrap_or(true));406self.seen_null.or_pos_unchecked(g.idx(), ov.is_none());407}408}409}410Ok(())411}412413unsafe fn combine_subset(414&mut self,415other: &dyn GroupedReduction,416subset: &[IdxSize],417group_idxs: &[IdxSize],418) -> PolarsResult<()> {419let other = other.as_any().downcast_ref::<Self>().unwrap();420assert!(subset.len() == group_idxs.len());421unsafe {422// SAFETY: indices are in-bounds guaranteed by trait.423for (i, g) in subset.iter().zip(group_idxs) {424self.seen_false425.or_pos_unchecked(*g as usize, other.seen_false.get_unchecked(*i as usize));426self.seen_null427.or_pos_unchecked(*g as usize, other.seen_null.get_unchecked(*i as usize));428}429}430Ok(())431}432433fn take_evictions(&mut self) -> Box<dyn GroupedReduction> {434Box::new(Self {435seen_false: core::mem::take(&mut self.evicted_values).into_mut(),436seen_null: core::mem::take(&mut self.evicted_mask).into_mut(),437evicted_values: BitmapBuilder::new(),438evicted_mask: BitmapBuilder::new(),439})440}441442fn finalize(&mut self) -> PolarsResult<Series> {443let seen_false = core::mem::take(&mut self.seen_false);444let mut mask = core::mem::take(&mut self.seen_null);445binary_assign_mut(&mut mask, &seen_false, |mi: u64, fi: u64| mi & !fi);446let arr = BooleanArray::from((!seen_false).freeze())447.with_validity(Some(mask.freeze()))448.boxed();449Ok(unsafe {450Series::from_chunks_and_dtype_unchecked(451PlSmallStr::EMPTY,452vec![arr],453&DataType::Boolean,454)455})456}457458fn as_any(&self) -> &dyn Any {459self460}461}462463464