Path: blob/main/crates/polars-expr/src/reduce/any_all.rs
8424 views
use arrow::array::BooleanArray;1use arrow::bitmap::binary_assign_mut;23use super::*;45pub fn new_any_reduction(ignore_nulls: bool) -> Box<dyn GroupedReduction> {6if ignore_nulls {7Box::new(AnyIgnoreNullGroupedReduction::default())8} else {9Box::new(AnyKleeneNullGroupedReduction::default())10}11}1213pub fn new_all_reduction(ignore_nulls: bool) -> Box<dyn GroupedReduction> {14if ignore_nulls {15Box::new(AllIgnoreNullGroupedReduction::default())16} else {17Box::new(AllKleeneNullGroupedReduction::default())18}19}2021#[derive(Default)]22struct AnyIgnoreNullGroupedReduction {23values: MutableBitmap,24evicted_values: BitmapBuilder,25}2627impl GroupedReduction for AnyIgnoreNullGroupedReduction {28fn new_empty(&self) -> Box<dyn GroupedReduction> {29Box::new(Self::default())30}3132fn reserve(&mut self, additional: usize) {33self.values.reserve(additional);34}3536fn resize(&mut self, num_groups: IdxSize) {37self.values.resize(num_groups as usize, false);38}3940fn update_group(41&mut self,42values: &[&Column],43group_idx: IdxSize,44_seq_id: u64,45) -> PolarsResult<()> {46let &[values] = values else { unreachable!() };47assert!(values.dtype() == &DataType::Boolean);48let values = values.as_materialized_series_maintain_scalar();49let ca: &BooleanChunked = values.as_ref().as_ref();50if ca.any() {51self.values.set(group_idx as usize, true);52}53Ok(())54}5556unsafe fn update_groups_while_evicting(57&mut self,58values: &[&Column],59subset: &[IdxSize],60group_idxs: &[EvictIdx],61_seq_id: u64,62) -> PolarsResult<()> {63let &[values] = values else { unreachable!() };64assert!(values.dtype() == &DataType::Boolean);65assert!(subset.len() == group_idxs.len());66let values = values.as_materialized_series(); // @scalar-opt67let ca: &BooleanChunked = values.as_ref().as_ref();68let arr = ca.downcast_as_array();69unsafe {70// SAFETY: indices are in-bounds guaranteed by trait.71for (i, g) in subset.iter().zip(group_idxs) {72let ov = arr.get_unchecked(*i as usize);73if g.should_evict() {74self.evicted_values.push(self.values.get_unchecked(g.idx()));75self.values.set_unchecked(g.idx(), ov.unwrap_or(false));76} else {77self.values.or_pos_unchecked(g.idx(), ov.unwrap_or(false));78}79}80}81Ok(())82}8384unsafe fn combine_subset(85&mut self,86other: &dyn GroupedReduction,87subset: &[IdxSize],88group_idxs: &[IdxSize],89) -> PolarsResult<()> {90let other = other.as_any().downcast_ref::<Self>().unwrap();91assert!(subset.len() == group_idxs.len());92unsafe {93// SAFETY: indices are in-bounds guaranteed by trait.94for (i, g) in subset.iter().zip(group_idxs) {95self.values96.or_pos_unchecked(*g as usize, other.values.get_unchecked(*i as usize));97}98}99Ok(())100}101102fn take_evictions(&mut self) -> Box<dyn GroupedReduction> {103Box::new(Self {104values: core::mem::take(&mut self.evicted_values).into_mut(),105evicted_values: BitmapBuilder::new(),106})107}108109fn finalize(&mut self) -> PolarsResult<Series> {110let v = core::mem::take(&mut self.values);111let arr = BooleanArray::from(v.freeze());112Ok(Series::from_array(PlSmallStr::EMPTY, arr))113}114115fn as_any(&self) -> &dyn Any {116self117}118}119120#[derive(Default)]121struct AllIgnoreNullGroupedReduction {122values: MutableBitmap,123evicted_values: BitmapBuilder,124}125126impl GroupedReduction for AllIgnoreNullGroupedReduction {127fn new_empty(&self) -> Box<dyn GroupedReduction> {128Box::new(Self::default())129}130131fn reserve(&mut self, additional: usize) {132self.values.reserve(additional);133}134135fn resize(&mut self, num_groups: IdxSize) {136self.values.resize(num_groups as usize, true);137}138139fn update_group(140&mut self,141values: &[&Column],142group_idx: IdxSize,143_seq_id: u64,144) -> PolarsResult<()> {145let &[values] = values else { unreachable!() };146assert!(values.dtype() == &DataType::Boolean);147let values = values.as_materialized_series_maintain_scalar();148let ca: &BooleanChunked = values.as_ref().as_ref();149if !ca.all() {150self.values.set(group_idx as usize, false);151}152Ok(())153}154155unsafe fn update_groups_while_evicting(156&mut self,157values: &[&Column],158subset: &[IdxSize],159group_idxs: &[EvictIdx],160_seq_id: u64,161) -> PolarsResult<()> {162let &[values] = values else { unreachable!() };163assert!(values.dtype() == &DataType::Boolean);164assert!(subset.len() == group_idxs.len());165let values = values.as_materialized_series(); // @scalar-opt166let ca: &BooleanChunked = values.as_ref().as_ref();167let arr = ca.downcast_as_array();168unsafe {169// SAFETY: indices are in-bounds guaranteed by trait.170for (i, g) in subset.iter().zip(group_idxs) {171let ov = arr.get_unchecked(*i as usize);172if g.should_evict() {173self.evicted_values.push(self.values.get_unchecked(g.idx()));174self.values.set_unchecked(g.idx(), ov.unwrap_or(true));175} else {176self.values.and_pos_unchecked(g.idx(), ov.unwrap_or(true));177}178}179}180Ok(())181}182183unsafe fn combine_subset(184&mut self,185other: &dyn GroupedReduction,186subset: &[IdxSize],187group_idxs: &[IdxSize],188) -> PolarsResult<()> {189let other = other.as_any().downcast_ref::<Self>().unwrap();190assert!(subset.len() == group_idxs.len());191unsafe {192// SAFETY: indices are in-bounds guaranteed by trait.193for (i, g) in subset.iter().zip(group_idxs) {194self.values195.and_pos_unchecked(*g as usize, other.values.get_unchecked(*i as usize));196}197}198Ok(())199}200201fn take_evictions(&mut self) -> Box<dyn GroupedReduction> {202Box::new(Self {203values: core::mem::take(&mut self.evicted_values).into_mut(),204evicted_values: BitmapBuilder::new(),205})206}207208fn finalize(&mut self) -> PolarsResult<Series> {209let v = core::mem::take(&mut self.values);210let arr = BooleanArray::from(v.freeze());211Ok(Series::from_array(PlSmallStr::EMPTY, arr))212}213214fn as_any(&self) -> &dyn Any {215self216}217}218219#[derive(Default)]220struct AnyKleeneNullGroupedReduction {221seen_true: MutableBitmap,222seen_null: MutableBitmap,223evicted_values: BitmapBuilder,224evicted_mask: BitmapBuilder,225}226227impl GroupedReduction for AnyKleeneNullGroupedReduction {228fn new_empty(&self) -> Box<dyn GroupedReduction> {229Box::new(Self::default())230}231232fn reserve(&mut self, additional: usize) {233self.seen_true.reserve(additional);234self.seen_null.reserve(additional)235}236237fn resize(&mut self, num_groups: IdxSize) {238self.seen_true.resize(num_groups as usize, false);239self.seen_null.resize(num_groups as usize, false);240}241242fn update_group(243&mut self,244values: &[&Column],245group_idx: IdxSize,246_seq_id: u64,247) -> PolarsResult<()> {248let &[values] = values else { unreachable!() };249assert!(values.dtype() == &DataType::Boolean);250let values = values.as_materialized_series_maintain_scalar();251let ca: &BooleanChunked = values.as_ref().as_ref();252if ca.any() {253self.seen_true.set(group_idx as usize, true);254}255if ca.has_nulls() {256self.seen_null.set(group_idx as usize, true);257}258Ok(())259}260261unsafe fn update_groups_while_evicting(262&mut self,263values: &[&Column],264subset: &[IdxSize],265group_idxs: &[EvictIdx],266_seq_id: u64,267) -> PolarsResult<()> {268let &[values] = values else { unreachable!() };269assert!(values.dtype() == &DataType::Boolean);270assert!(subset.len() == group_idxs.len());271let values = values.as_materialized_series(); // @scalar-opt272let ca: &BooleanChunked = values.as_ref().as_ref();273let arr = ca.downcast_as_array();274unsafe {275// SAFETY: indices are in-bounds guaranteed by trait.276for (i, g) in subset.iter().zip(group_idxs) {277let ov = arr.get_unchecked(*i as usize);278if g.should_evict() {279self.evicted_values280.push(self.seen_true.get_unchecked(g.idx()));281self.evicted_mask282.push(self.seen_null.get_unchecked(g.idx()));283self.seen_true.set_unchecked(g.idx(), ov.unwrap_or(false));284self.seen_null.set_unchecked(g.idx(), ov.is_none());285} else {286self.seen_true287.or_pos_unchecked(g.idx(), ov.unwrap_or(false));288self.seen_null.or_pos_unchecked(g.idx(), ov.is_none());289}290}291}292Ok(())293}294295unsafe fn combine_subset(296&mut self,297other: &dyn GroupedReduction,298subset: &[IdxSize],299group_idxs: &[IdxSize],300) -> PolarsResult<()> {301let other = other.as_any().downcast_ref::<Self>().unwrap();302assert!(subset.len() == group_idxs.len());303unsafe {304// SAFETY: indices are in-bounds guaranteed by trait.305for (i, g) in subset.iter().zip(group_idxs) {306self.seen_true307.or_pos_unchecked(*g as usize, other.seen_true.get_unchecked(*i as usize));308self.seen_null309.or_pos_unchecked(*g as usize, other.seen_null.get_unchecked(*i as usize));310}311}312Ok(())313}314315fn take_evictions(&mut self) -> Box<dyn GroupedReduction> {316Box::new(Self {317seen_true: core::mem::take(&mut self.evicted_values).into_mut(),318seen_null: core::mem::take(&mut self.evicted_mask).into_mut(),319evicted_values: BitmapBuilder::new(),320evicted_mask: BitmapBuilder::new(),321})322}323324fn finalize(&mut self) -> PolarsResult<Series> {325let seen_true = core::mem::take(&mut self.seen_true);326let mut mask = core::mem::take(&mut self.seen_null);327binary_assign_mut(&mut mask, &seen_true, |mi: u64, ti: u64| ti | !mi);328let arr = BooleanArray::from(seen_true.freeze())329.with_validity(Some(mask.freeze()))330.boxed();331Ok(unsafe {332Series::from_chunks_and_dtype_unchecked(333PlSmallStr::EMPTY,334vec![arr],335&DataType::Boolean,336)337})338}339340fn as_any(&self) -> &dyn Any {341self342}343}344345#[derive(Default)]346struct AllKleeneNullGroupedReduction {347seen_false: MutableBitmap,348seen_null: MutableBitmap,349evicted_values: BitmapBuilder,350evicted_mask: BitmapBuilder,351}352353impl GroupedReduction for AllKleeneNullGroupedReduction {354fn new_empty(&self) -> Box<dyn GroupedReduction> {355Box::new(Self::default())356}357358fn reserve(&mut self, additional: usize) {359self.seen_false.reserve(additional);360self.seen_null.reserve(additional)361}362363fn resize(&mut self, num_groups: IdxSize) {364self.seen_false.resize(num_groups as usize, false);365self.seen_null.resize(num_groups as usize, false);366}367368fn update_group(369&mut self,370values: &[&Column],371group_idx: IdxSize,372_seq_id: u64,373) -> PolarsResult<()> {374let &[values] = values else { unreachable!() };375assert!(values.dtype() == &DataType::Boolean);376let values = values.as_materialized_series_maintain_scalar();377let ca: &BooleanChunked = values.as_ref().as_ref();378if !ca.all() {379self.seen_false.set(group_idx as usize, true);380}381if ca.has_nulls() {382self.seen_null.set(group_idx as usize, true);383}384Ok(())385}386387unsafe fn update_groups_while_evicting(388&mut self,389values: &[&Column],390subset: &[IdxSize],391group_idxs: &[EvictIdx],392_seq_id: u64,393) -> PolarsResult<()> {394let &[values] = values else { unreachable!() };395assert!(values.dtype() == &DataType::Boolean);396assert!(subset.len() == group_idxs.len());397let values = values.as_materialized_series(); // @scalar-opt398let ca: &BooleanChunked = values.as_ref().as_ref();399let arr = ca.downcast_as_array();400unsafe {401// SAFETY: indices are in-bounds guaranteed by trait.402for (i, g) in subset.iter().zip(group_idxs) {403let ov = arr.get_unchecked(*i as usize);404if g.should_evict() {405self.evicted_values406.push(self.seen_false.get_unchecked(g.idx()));407self.evicted_mask408.push(self.seen_null.get_unchecked(g.idx()));409self.seen_false.set_unchecked(g.idx(), !ov.unwrap_or(true));410self.seen_null.set_unchecked(g.idx(), ov.is_none());411} else {412self.seen_false413.or_pos_unchecked(g.idx(), !ov.unwrap_or(true));414self.seen_null.or_pos_unchecked(g.idx(), ov.is_none());415}416}417}418Ok(())419}420421unsafe fn combine_subset(422&mut self,423other: &dyn GroupedReduction,424subset: &[IdxSize],425group_idxs: &[IdxSize],426) -> PolarsResult<()> {427let other = other.as_any().downcast_ref::<Self>().unwrap();428assert!(subset.len() == group_idxs.len());429unsafe {430// SAFETY: indices are in-bounds guaranteed by trait.431for (i, g) in subset.iter().zip(group_idxs) {432self.seen_false433.or_pos_unchecked(*g as usize, other.seen_false.get_unchecked(*i as usize));434self.seen_null435.or_pos_unchecked(*g as usize, other.seen_null.get_unchecked(*i as usize));436}437}438Ok(())439}440441fn take_evictions(&mut self) -> Box<dyn GroupedReduction> {442Box::new(Self {443seen_false: core::mem::take(&mut self.evicted_values).into_mut(),444seen_null: core::mem::take(&mut self.evicted_mask).into_mut(),445evicted_values: BitmapBuilder::new(),446evicted_mask: BitmapBuilder::new(),447})448}449450fn finalize(&mut self) -> PolarsResult<Series> {451let seen_false = core::mem::take(&mut self.seen_false);452let mut mask = core::mem::take(&mut self.seen_null);453binary_assign_mut(&mut mask, &seen_false, |mi: u64, fi: u64| fi | !mi);454let arr = BooleanArray::from((!seen_false).freeze())455.with_validity(Some(mask.freeze()))456.boxed();457Ok(unsafe {458Series::from_chunks_and_dtype_unchecked(459PlSmallStr::EMPTY,460vec![arr],461&DataType::Boolean,462)463})464}465466fn as_any(&self) -> &dyn Any {467self468}469}470471472