Path: blob/main/crates/polars-expr/src/hot_groups/mod.rs
8409 views
use std::any::Any;12use polars_core::prelude::*;3use polars_utils::IdxSize;45use crate::EvictIdx;6use crate::hash_keys::HashKeys;78mod binview;9mod fixed_index_table;10mod row_encoded;11mod single_key;1213/// A HotGrouper maps keys to groups, such that duplicate keys map to the same14/// group. Unlike a Grouper it has a fixed size and will cause evictions rather15/// than growing.16pub trait HotGrouper: Any + Send + Sync {17/// Creates a new empty HotGrouper similar to this one, with the given size.18fn new_empty(&self, groups: usize) -> Box<dyn HotGrouper>;1920/// Returns the number of groups in this HotGrouper.21fn num_groups(&self) -> IdxSize;2223/// Inserts the given keys into this Grouper, extending groups_idxs with24/// the group index of keys[i].25fn insert_keys(26&mut self,27keys: &HashKeys,28hot_idxs: &mut Vec<IdxSize>,29hot_group_idxs: &mut Vec<EvictIdx>,30cold_idxs: &mut Vec<IdxSize>,31force_hot: bool,32);3334/// Get all the current hot keys, in group order.35fn keys(&self) -> HashKeys;3637/// Get the number of evicted keys stored.38fn num_evictions(&self) -> usize;3940/// Consume all the evicted keys from this HotGrouper.41fn take_evicted_keys(&mut self) -> HashKeys;4243fn as_any(&self) -> &dyn Any;44}4546pub fn new_hash_hot_grouper(key_schema: Arc<Schema>, num_groups: usize) -> Box<dyn HotGrouper> {47if key_schema.len() > 1 {48Box::new(row_encoded::RowEncodedHashHotGrouper::new(49key_schema, num_groups,50))51} else {52use single_key::SingleKeyHashHotGrouper as SK;53let dt = key_schema.get_at_index(0).unwrap().1.clone();54let ng = num_groups;55match dt {56#[cfg(feature = "dtype-u8")]57DataType::UInt8 => Box::new(SK::<UInt8Type>::new(dt, ng)),58#[cfg(feature = "dtype-u16")]59DataType::UInt16 => Box::new(SK::<UInt16Type>::new(dt, ng)),60DataType::UInt32 => Box::new(SK::<UInt32Type>::new(dt, ng)),61DataType::UInt64 => Box::new(SK::<UInt64Type>::new(dt, ng)),62#[cfg(feature = "dtype-u128")]63DataType::UInt128 => Box::new(SK::<UInt128Type>::new(dt, ng)),64#[cfg(feature = "dtype-i8")]65DataType::Int8 => Box::new(SK::<Int8Type>::new(dt, ng)),66#[cfg(feature = "dtype-i16")]67DataType::Int16 => Box::new(SK::<Int16Type>::new(dt, ng)),68DataType::Int32 => Box::new(SK::<Int32Type>::new(dt, ng)),69DataType::Int64 => Box::new(SK::<Int64Type>::new(dt, ng)),70#[cfg(feature = "dtype-i128")]71DataType::Int128 => Box::new(SK::<Int128Type>::new(dt, ng)),72#[cfg(feature = "dtype-f16")]73DataType::Float16 => Box::new(SK::<Float16Type>::new(dt, ng)),74DataType::Float32 => Box::new(SK::<Float32Type>::new(dt, ng)),75DataType::Float64 => Box::new(SK::<Float64Type>::new(dt, ng)),7677#[cfg(feature = "dtype-date")]78DataType::Date => Box::new(SK::<Int32Type>::new(dt, ng)),79#[cfg(feature = "dtype-datetime")]80DataType::Datetime(_, _) => Box::new(SK::<Int64Type>::new(dt, ng)),81#[cfg(feature = "dtype-duration")]82DataType::Duration(_) => Box::new(SK::<Int64Type>::new(dt, ng)),83#[cfg(feature = "dtype-time")]84DataType::Time => Box::new(SK::<Int64Type>::new(dt, ng)),8586#[cfg(feature = "dtype-decimal")]87DataType::Decimal(_, _) => Box::new(SK::<Int128Type>::new(dt, ng)),88#[cfg(feature = "dtype-categorical")]89dt @ (DataType::Enum(_, _) | DataType::Categorical(_, _)) => {90with_match_categorical_physical_type!(dt.cat_physical().unwrap(), |$C| {91Box::new(SK::<<$C as PolarsCategoricalType>::PolarsPhysical>::new(dt.clone(), ng))92})93},9495DataType::String | DataType::Binary => {96Box::new(binview::BinviewHashHotGrouper::new(ng))97},9899_ => Box::new(row_encoded::RowEncodedHashHotGrouper::new(100key_schema, num_groups,101)),102}103}104}105106107