Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-expr/src/hot_groups/mod.rs
6940 views
1
use std::any::Any;
2
3
use polars_core::prelude::*;
4
use polars_utils::IdxSize;
5
6
use crate::EvictIdx;
7
use crate::hash_keys::HashKeys;
8
9
mod binview;
10
mod fixed_index_table;
11
mod row_encoded;
12
mod single_key;
13
14
/// A HotGrouper maps keys to groups, such that duplicate keys map to the same
15
/// group. Unlike a Grouper it has a fixed size and will cause evictions rather
16
/// than growing.
17
pub trait HotGrouper: Any + Send + Sync {
18
/// Creates a new empty HotGrouper similar to this one, with the given size.
19
fn new_empty(&self, groups: usize) -> Box<dyn HotGrouper>;
20
21
/// Returns the number of groups in this HotGrouper.
22
fn num_groups(&self) -> IdxSize;
23
24
/// Inserts the given keys into this Grouper, extending groups_idxs with
25
/// the group index of keys[i].
26
fn insert_keys(
27
&mut self,
28
keys: &HashKeys,
29
hot_idxs: &mut Vec<IdxSize>,
30
hot_group_idxs: &mut Vec<EvictIdx>,
31
cold_idxs: &mut Vec<IdxSize>,
32
force_hot: bool,
33
);
34
35
/// Get all the current hot keys, in group order.
36
fn keys(&self) -> HashKeys;
37
38
/// Get the number of evicted keys stored.
39
fn num_evictions(&self) -> usize;
40
41
/// Consume all the evicted keys from this HotGrouper.
42
fn take_evicted_keys(&mut self) -> HashKeys;
43
44
fn as_any(&self) -> &dyn Any;
45
}
46
47
pub fn new_hash_hot_grouper(key_schema: Arc<Schema>, num_groups: usize) -> Box<dyn HotGrouper> {
48
if key_schema.len() > 1 {
49
Box::new(row_encoded::RowEncodedHashHotGrouper::new(
50
key_schema, num_groups,
51
))
52
} else {
53
use single_key::SingleKeyHashHotGrouper as SK;
54
let dt = key_schema.get_at_index(0).unwrap().1.clone();
55
let ng = num_groups;
56
match dt {
57
#[cfg(feature = "dtype-u8")]
58
DataType::UInt8 => Box::new(SK::<UInt8Type>::new(dt, ng)),
59
#[cfg(feature = "dtype-u16")]
60
DataType::UInt16 => Box::new(SK::<UInt16Type>::new(dt, ng)),
61
DataType::UInt32 => Box::new(SK::<UInt32Type>::new(dt, ng)),
62
DataType::UInt64 => Box::new(SK::<UInt64Type>::new(dt, ng)),
63
#[cfg(feature = "dtype-i8")]
64
DataType::Int8 => Box::new(SK::<Int8Type>::new(dt, ng)),
65
#[cfg(feature = "dtype-i16")]
66
DataType::Int16 => Box::new(SK::<Int16Type>::new(dt, ng)),
67
DataType::Int32 => Box::new(SK::<Int32Type>::new(dt, ng)),
68
DataType::Int64 => Box::new(SK::<Int64Type>::new(dt, ng)),
69
#[cfg(feature = "dtype-i128")]
70
DataType::Int128 => Box::new(SK::<Int128Type>::new(dt, ng)),
71
DataType::Float32 => Box::new(SK::<Float32Type>::new(dt, ng)),
72
DataType::Float64 => Box::new(SK::<Float64Type>::new(dt, ng)),
73
74
#[cfg(feature = "dtype-date")]
75
DataType::Date => Box::new(SK::<Int32Type>::new(dt, ng)),
76
#[cfg(feature = "dtype-datetime")]
77
DataType::Datetime(_, _) => Box::new(SK::<Int64Type>::new(dt, ng)),
78
#[cfg(feature = "dtype-duration")]
79
DataType::Duration(_) => Box::new(SK::<Int64Type>::new(dt, ng)),
80
#[cfg(feature = "dtype-time")]
81
DataType::Time => Box::new(SK::<Int64Type>::new(dt, ng)),
82
83
#[cfg(feature = "dtype-decimal")]
84
DataType::Decimal(_, _) => Box::new(SK::<Int128Type>::new(dt, ng)),
85
#[cfg(feature = "dtype-categorical")]
86
dt @ (DataType::Enum(_, _) | DataType::Categorical(_, _)) => {
87
with_match_categorical_physical_type!(dt.cat_physical().unwrap(), |$C| {
88
Box::new(SK::<<$C as PolarsCategoricalType>::PolarsPhysical>::new(dt.clone(), ng))
89
})
90
},
91
92
DataType::String | DataType::Binary => {
93
Box::new(binview::BinviewHashHotGrouper::new(ng))
94
},
95
96
_ => Box::new(row_encoded::RowEncodedHashHotGrouper::new(
97
key_schema, num_groups,
98
)),
99
}
100
}
101
}
102
103