use std::any::Any;
use arrow::bitmap::BitmapBuilder;
use polars_core::prelude::*;
#[cfg(feature = "dtype-categorical")]
use polars_core::with_match_categorical_physical_type;
use polars_core::with_match_physical_numeric_polars_type;
use polars_utils::IdxSize;
use polars_utils::hashing::HashPartitioner;
use crate::hash_keys::HashKeys;
mod binview;
mod row_encoded;
mod single_key;
pub trait Grouper: Any + Send + Sync {
fn new_empty(&self) -> Box<dyn Grouper>;
fn reserve(&mut self, additional: usize);
fn num_groups(&self) -> IdxSize;
unsafe fn insert_keys_subset(
&mut self,
keys: &HashKeys,
subset: &[IdxSize],
group_idxs: Option<&mut Vec<IdxSize>>,
);
fn get_keys_in_group_order(&self, schema: &Schema) -> DataFrame;
unsafe fn probe_partitioned_groupers(
&self,
groupers: &[Box<dyn Grouper>],
keys: &HashKeys,
partitioner: &HashPartitioner,
invert: bool,
probe_matches: &mut Vec<IdxSize>,
);
unsafe fn contains_key_partitioned_groupers(
&self,
groupers: &[Box<dyn Grouper>],
keys: &HashKeys,
partitioner: &HashPartitioner,
invert: bool,
contains_key: &mut BitmapBuilder,
);
fn as_any(&self) -> &dyn Any;
}
pub fn new_hash_grouper(key_schema: Arc<Schema>) -> Box<dyn Grouper> {
if key_schema.len() > 1 {
Box::new(row_encoded::RowEncodedHashGrouper::new())
} else {
let (_name, dt) = key_schema.get_at_index(0).unwrap();
match dt {
dt if dt.is_primitive_numeric() | dt.is_temporal() => {
with_match_physical_numeric_polars_type!(dt.to_physical(), |$T| {
Box::new(single_key::SingleKeyHashGrouper::<$T>::new())
})
},
#[cfg(feature = "dtype-decimal")]
DataType::Decimal(_, _) => {
Box::new(single_key::SingleKeyHashGrouper::<Int128Type>::new())
},
#[cfg(feature = "dtype-categorical")]
dt @ (DataType::Enum(_, _) | DataType::Categorical(_, _)) => {
with_match_categorical_physical_type!(dt.cat_physical().unwrap(), |$C| {
Box::new(single_key::SingleKeyHashGrouper::<<$C as PolarsCategoricalType>::PolarsPhysical>::new())
})
},
DataType::String | DataType::Binary => Box::new(binview::BinviewHashGrouper::new()),
_ => Box::new(row_encoded::RowEncodedHashGrouper::new()),
}
}
}