Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-expr/src/hot_groups/mod.rs
8409 views
1
use std::any::Any;
2
3
use polars_core::prelude::*;
4
use polars_utils::IdxSize;
5
6
use crate::EvictIdx;
7
use crate::hash_keys::HashKeys;
8
9
mod binview;
10
mod fixed_index_table;
11
mod row_encoded;
12
mod single_key;
13
14
/// A HotGrouper maps keys to groups, such that duplicate keys map to the same
15
/// group. Unlike a Grouper it has a fixed size and will cause evictions rather
16
/// than growing.
17
pub trait HotGrouper: Any + Send + Sync {
18
/// Creates a new empty HotGrouper similar to this one, with the given size.
19
fn new_empty(&self, groups: usize) -> Box<dyn HotGrouper>;
20
21
/// Returns the number of groups in this HotGrouper.
22
fn num_groups(&self) -> IdxSize;
23
24
/// Inserts the given keys into this Grouper, extending groups_idxs with
25
/// the group index of keys[i].
26
fn insert_keys(
27
&mut self,
28
keys: &HashKeys,
29
hot_idxs: &mut Vec<IdxSize>,
30
hot_group_idxs: &mut Vec<EvictIdx>,
31
cold_idxs: &mut Vec<IdxSize>,
32
force_hot: bool,
33
);
34
35
/// Get all the current hot keys, in group order.
36
fn keys(&self) -> HashKeys;
37
38
/// Get the number of evicted keys stored.
39
fn num_evictions(&self) -> usize;
40
41
/// Consume all the evicted keys from this HotGrouper.
42
fn take_evicted_keys(&mut self) -> HashKeys;
43
44
fn as_any(&self) -> &dyn Any;
45
}
46
47
pub fn new_hash_hot_grouper(key_schema: Arc<Schema>, num_groups: usize) -> Box<dyn HotGrouper> {
48
if key_schema.len() > 1 {
49
Box::new(row_encoded::RowEncodedHashHotGrouper::new(
50
key_schema, num_groups,
51
))
52
} else {
53
use single_key::SingleKeyHashHotGrouper as SK;
54
let dt = key_schema.get_at_index(0).unwrap().1.clone();
55
let ng = num_groups;
56
match dt {
57
#[cfg(feature = "dtype-u8")]
58
DataType::UInt8 => Box::new(SK::<UInt8Type>::new(dt, ng)),
59
#[cfg(feature = "dtype-u16")]
60
DataType::UInt16 => Box::new(SK::<UInt16Type>::new(dt, ng)),
61
DataType::UInt32 => Box::new(SK::<UInt32Type>::new(dt, ng)),
62
DataType::UInt64 => Box::new(SK::<UInt64Type>::new(dt, ng)),
63
#[cfg(feature = "dtype-u128")]
64
DataType::UInt128 => Box::new(SK::<UInt128Type>::new(dt, ng)),
65
#[cfg(feature = "dtype-i8")]
66
DataType::Int8 => Box::new(SK::<Int8Type>::new(dt, ng)),
67
#[cfg(feature = "dtype-i16")]
68
DataType::Int16 => Box::new(SK::<Int16Type>::new(dt, ng)),
69
DataType::Int32 => Box::new(SK::<Int32Type>::new(dt, ng)),
70
DataType::Int64 => Box::new(SK::<Int64Type>::new(dt, ng)),
71
#[cfg(feature = "dtype-i128")]
72
DataType::Int128 => Box::new(SK::<Int128Type>::new(dt, ng)),
73
#[cfg(feature = "dtype-f16")]
74
DataType::Float16 => Box::new(SK::<Float16Type>::new(dt, ng)),
75
DataType::Float32 => Box::new(SK::<Float32Type>::new(dt, ng)),
76
DataType::Float64 => Box::new(SK::<Float64Type>::new(dt, ng)),
77
78
#[cfg(feature = "dtype-date")]
79
DataType::Date => Box::new(SK::<Int32Type>::new(dt, ng)),
80
#[cfg(feature = "dtype-datetime")]
81
DataType::Datetime(_, _) => Box::new(SK::<Int64Type>::new(dt, ng)),
82
#[cfg(feature = "dtype-duration")]
83
DataType::Duration(_) => Box::new(SK::<Int64Type>::new(dt, ng)),
84
#[cfg(feature = "dtype-time")]
85
DataType::Time => Box::new(SK::<Int64Type>::new(dt, ng)),
86
87
#[cfg(feature = "dtype-decimal")]
88
DataType::Decimal(_, _) => Box::new(SK::<Int128Type>::new(dt, ng)),
89
#[cfg(feature = "dtype-categorical")]
90
dt @ (DataType::Enum(_, _) | DataType::Categorical(_, _)) => {
91
with_match_categorical_physical_type!(dt.cat_physical().unwrap(), |$C| {
92
Box::new(SK::<<$C as PolarsCategoricalType>::PolarsPhysical>::new(dt.clone(), ng))
93
})
94
},
95
96
DataType::String | DataType::Binary => {
97
Box::new(binview::BinviewHashHotGrouper::new(ng))
98
},
99
100
_ => Box::new(row_encoded::RowEncodedHashHotGrouper::new(
101
key_schema, num_groups,
102
)),
103
}
104
}
105
}
106
107