Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-expr/src/idx_table/mod.rs
6940 views
1
use std::any::Any;
2
3
use polars_core::prelude::*;
4
use polars_utils::IdxSize;
5
6
use crate::hash_keys::HashKeys;
7
8
mod binview;
9
mod row_encoded;
10
mod single_key;
11
12
pub trait IdxTable: Any + Send + Sync {
13
/// Creates a new empty IdxTable similar to this one.
14
fn new_empty(&self) -> Box<dyn IdxTable>;
15
16
/// Reserves space for the given number additional keys.
17
fn reserve(&mut self, additional: usize);
18
19
/// Returns the number of unique keys in this IdxTable.
20
fn num_keys(&self) -> IdxSize;
21
22
/// Inserts the given keys into this IdxTable.
23
fn insert_keys(&mut self, keys: &HashKeys, track_unmatchable: bool);
24
25
/// Inserts a subset of the given keys into this IdxTable.
26
/// # Safety
27
/// The provided subset indices must be in-bounds.
28
unsafe fn insert_keys_subset(
29
&mut self,
30
keys: &HashKeys,
31
subset: &[IdxSize],
32
track_unmatchable: bool,
33
);
34
35
/// Probe the table, adding an entry to table_match and probe_match for each
36
/// match. Will stop processing new keys once limit matches have been
37
/// generated, returning the number of keys processed.
38
///
39
/// If mark_matches is true, matches are marked in the table as such.
40
///
41
/// If emit_unmatched is true, for keys that do not have a match we emit a
42
/// match with ChunkId::null() on the table match.
43
fn probe(
44
&self,
45
hash_keys: &HashKeys,
46
table_match: &mut Vec<IdxSize>,
47
probe_match: &mut Vec<IdxSize>,
48
mark_matches: bool,
49
emit_unmatched: bool,
50
limit: IdxSize,
51
) -> IdxSize;
52
53
/// The same as probe, except it will only apply to the specified subset of keys.
54
/// # Safety
55
/// The provided subset indices must be in-bounds.
56
#[allow(clippy::too_many_arguments)]
57
unsafe fn probe_subset(
58
&self,
59
hash_keys: &HashKeys,
60
subset: &[IdxSize],
61
table_match: &mut Vec<IdxSize>,
62
probe_match: &mut Vec<IdxSize>,
63
mark_matches: bool,
64
emit_unmatched: bool,
65
limit: IdxSize,
66
) -> IdxSize;
67
68
/// Get the ChunkIds for each key which was never marked during probing.
69
fn unmarked_keys(&self, out: &mut Vec<IdxSize>, offset: IdxSize, limit: IdxSize) -> IdxSize;
70
}
71
72
pub fn new_idx_table(key_schema: Arc<Schema>) -> Box<dyn IdxTable> {
73
if key_schema.len() > 1 {
74
Box::new(row_encoded::RowEncodedIdxTable::new())
75
} else {
76
use single_key::SingleKeyIdxTable as SKIT;
77
match key_schema.get_at_index(0).unwrap().1 {
78
#[cfg(feature = "dtype-u8")]
79
DataType::UInt8 => Box::new(SKIT::<UInt8Type>::new()),
80
#[cfg(feature = "dtype-u16")]
81
DataType::UInt16 => Box::new(SKIT::<UInt16Type>::new()),
82
DataType::UInt32 => Box::new(SKIT::<UInt32Type>::new()),
83
DataType::UInt64 => Box::new(SKIT::<UInt64Type>::new()),
84
#[cfg(feature = "dtype-i8")]
85
DataType::Int8 => Box::new(SKIT::<Int8Type>::new()),
86
#[cfg(feature = "dtype-i16")]
87
DataType::Int16 => Box::new(SKIT::<Int16Type>::new()),
88
DataType::Int32 => Box::new(SKIT::<Int32Type>::new()),
89
DataType::Int64 => Box::new(SKIT::<Int64Type>::new()),
90
#[cfg(feature = "dtype-i128")]
91
DataType::Int128 => Box::new(SKIT::<Int128Type>::new()),
92
DataType::Float32 => Box::new(SKIT::<Float32Type>::new()),
93
DataType::Float64 => Box::new(SKIT::<Float64Type>::new()),
94
95
#[cfg(feature = "dtype-date")]
96
DataType::Date => Box::new(SKIT::<Int32Type>::new()),
97
#[cfg(feature = "dtype-datetime")]
98
DataType::Datetime(_, _) => Box::new(SKIT::<Int64Type>::new()),
99
#[cfg(feature = "dtype-duration")]
100
DataType::Duration(_) => Box::new(SKIT::<Int64Type>::new()),
101
#[cfg(feature = "dtype-time")]
102
DataType::Time => Box::new(SKIT::<Int64Type>::new()),
103
104
#[cfg(feature = "dtype-decimal")]
105
DataType::Decimal(_, _) => Box::new(SKIT::<Int128Type>::new()),
106
#[cfg(feature = "dtype-categorical")]
107
dt @ (DataType::Enum(_, _) | DataType::Categorical(_, _)) => {
108
with_match_categorical_physical_type!(dt.cat_physical().unwrap(), |$C| {
109
Box::new(SKIT::<<$C as PolarsCategoricalType>::PolarsPhysical>::new())
110
})
111
},
112
113
DataType::String | DataType::Binary => Box::new(binview::BinviewKeyIdxTable::new()),
114
115
_ => Box::new(row_encoded::RowEncodedIdxTable::new()),
116
}
117
}
118
}
119
120