Path: blob/main/crates/polars-expr/src/hot_groups/mod.rs
6940 views
use std::any::Any;12use polars_core::prelude::*;3use polars_utils::IdxSize;45use crate::EvictIdx;6use crate::hash_keys::HashKeys;78mod binview;9mod fixed_index_table;10mod row_encoded;11mod single_key;1213/// A HotGrouper maps keys to groups, such that duplicate keys map to the same14/// group. Unlike a Grouper it has a fixed size and will cause evictions rather15/// than growing.16pub trait HotGrouper: Any + Send + Sync {17/// Creates a new empty HotGrouper similar to this one, with the given size.18fn new_empty(&self, groups: usize) -> Box<dyn HotGrouper>;1920/// Returns the number of groups in this HotGrouper.21fn num_groups(&self) -> IdxSize;2223/// Inserts the given keys into this Grouper, extending groups_idxs with24/// the group index of keys[i].25fn insert_keys(26&mut self,27keys: &HashKeys,28hot_idxs: &mut Vec<IdxSize>,29hot_group_idxs: &mut Vec<EvictIdx>,30cold_idxs: &mut Vec<IdxSize>,31force_hot: bool,32);3334/// Get all the current hot keys, in group order.35fn keys(&self) -> HashKeys;3637/// Get the number of evicted keys stored.38fn num_evictions(&self) -> usize;3940/// Consume all the evicted keys from this HotGrouper.41fn take_evicted_keys(&mut self) -> HashKeys;4243fn as_any(&self) -> &dyn Any;44}4546pub fn new_hash_hot_grouper(key_schema: Arc<Schema>, num_groups: usize) -> Box<dyn HotGrouper> {47if key_schema.len() > 1 {48Box::new(row_encoded::RowEncodedHashHotGrouper::new(49key_schema, num_groups,50))51} else {52use single_key::SingleKeyHashHotGrouper as SK;53let dt = key_schema.get_at_index(0).unwrap().1.clone();54let ng = num_groups;55match dt {56#[cfg(feature = "dtype-u8")]57DataType::UInt8 => Box::new(SK::<UInt8Type>::new(dt, ng)),58#[cfg(feature = "dtype-u16")]59DataType::UInt16 => Box::new(SK::<UInt16Type>::new(dt, ng)),60DataType::UInt32 => Box::new(SK::<UInt32Type>::new(dt, ng)),61DataType::UInt64 => Box::new(SK::<UInt64Type>::new(dt, ng)),62#[cfg(feature = "dtype-i8")]63DataType::Int8 => Box::new(SK::<Int8Type>::new(dt, ng)),64#[cfg(feature = "dtype-i16")]65DataType::Int16 => Box::new(SK::<Int16Type>::new(dt, ng)),66DataType::Int32 => Box::new(SK::<Int32Type>::new(dt, ng)),67DataType::Int64 => Box::new(SK::<Int64Type>::new(dt, ng)),68#[cfg(feature = "dtype-i128")]69DataType::Int128 => Box::new(SK::<Int128Type>::new(dt, ng)),70DataType::Float32 => Box::new(SK::<Float32Type>::new(dt, ng)),71DataType::Float64 => Box::new(SK::<Float64Type>::new(dt, ng)),7273#[cfg(feature = "dtype-date")]74DataType::Date => Box::new(SK::<Int32Type>::new(dt, ng)),75#[cfg(feature = "dtype-datetime")]76DataType::Datetime(_, _) => Box::new(SK::<Int64Type>::new(dt, ng)),77#[cfg(feature = "dtype-duration")]78DataType::Duration(_) => Box::new(SK::<Int64Type>::new(dt, ng)),79#[cfg(feature = "dtype-time")]80DataType::Time => Box::new(SK::<Int64Type>::new(dt, ng)),8182#[cfg(feature = "dtype-decimal")]83DataType::Decimal(_, _) => Box::new(SK::<Int128Type>::new(dt, ng)),84#[cfg(feature = "dtype-categorical")]85dt @ (DataType::Enum(_, _) | DataType::Categorical(_, _)) => {86with_match_categorical_physical_type!(dt.cat_physical().unwrap(), |$C| {87Box::new(SK::<<$C as PolarsCategoricalType>::PolarsPhysical>::new(dt.clone(), ng))88})89},9091DataType::String | DataType::Binary => {92Box::new(binview::BinviewHashHotGrouper::new(ng))93},9495_ => Box::new(row_encoded::RowEncodedHashHotGrouper::new(96key_schema, num_groups,97)),98}99}100}101102103